1#! /usr/bin/env perl 2# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16# 17# This module implements support for AES instructions as per PowerISA 18# specification version 2.07, first implemented by POWER8 processor. 19# The module is endian-agnostic in sense that it supports both big- 20# and little-endian cases. Data alignment in parallelizable modes is 21# handled with VSX loads and stores, which implies MSR.VSX flag being 22# set. It should also be noted that ISA specification doesn't prohibit 23# alignment exceptions for these instructions on page boundaries. 24# Initially alignment was handled in pure AltiVec/VMX way [when data 25# is aligned programmatically, which in turn guarantees exception- 26# free execution], but it turned to hamper performance when vcipher 27# instructions are interleaved. It's reckoned that eventual 28# misalignment penalties at page boundaries are in average lower 29# than additional overhead in pure AltiVec approach. 30# 31# May 2016 32# 33# Add XTS subroutine, 9x on little- and 12x improvement on big-endian 34# systems were measured. 35# 36###################################################################### 37# Current large-block performance in cycles per byte processed with 38# 128-bit key (less is better). 39# 40# CBC en-/decrypt CTR XTS 41# POWER8[le] 3.96/0.72 0.74 1.1 42# POWER8[be] 3.75/0.65 0.66 1.0 43# POWER9[le] 4.02/0.86 0.84 1.05 44# POWER9[be] 3.99/0.78 0.79 0.97 45 46$flavour = shift; 47 48if ($flavour =~ /64/) { 49 $SIZE_T =8; 50 $LRSAVE =2*$SIZE_T; 51 $STU ="stdu"; 52 $POP ="ld"; 53 $PUSH ="std"; 54 $UCMP ="cmpld"; 55 $SHL ="sldi"; 56} elsif ($flavour =~ /32/) { 57 $SIZE_T =4; 58 $LRSAVE =$SIZE_T; 59 $STU ="stwu"; 60 $POP ="lwz"; 61 $PUSH ="stw"; 62 $UCMP ="cmplw"; 63 $SHL ="slwi"; 64} else { die "nonsense $flavour"; } 65 66$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 67 68$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 69( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 70( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 71die "can't locate ppc-xlate.pl"; 72 73open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 74 75$FRAME=8*$SIZE_T; 76$prefix="aes_p8"; 77 78$sp="r1"; 79$vrsave="r12"; 80 81######################################################################### 82{{{ # Key setup procedures # 83my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 84my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 85my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 86 87$code.=<<___; 88.machine "any" 89 90.text 91 92.align 7 93rcon: 94.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 95.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 96.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 97.long 0,0,0,0 ?asis 98Lconsts: 99 mflr r0 100 bcl 20,31,\$+4 101 mflr $ptr #vvvvv "distance between . and rcon 102 addi $ptr,$ptr,-0x48 103 mtlr r0 104 blr 105 .long 0 106 .byte 0,12,0x14,0,0,0,0,0 107.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 108 109.globl .${prefix}_set_encrypt_key 110.align 5 111.${prefix}_set_encrypt_key: 112Lset_encrypt_key: 113 mflr r11 114 $PUSH r11,$LRSAVE($sp) 115 116 li $ptr,-1 117 ${UCMP}i $inp,0 118 beq- Lenc_key_abort # if ($inp==0) return -1; 119 ${UCMP}i $out,0 120 beq- Lenc_key_abort # if ($out==0) return -1; 121 li $ptr,-2 122 cmpwi $bits,128 123 blt- Lenc_key_abort 124 cmpwi $bits,256 125 bgt- Lenc_key_abort 126 andi. r0,$bits,0x3f 127 bne- Lenc_key_abort 128 129 lis r0,0xfff0 130 mfspr $vrsave,256 131 mtspr 256,r0 132 133 bl Lconsts 134 mtlr r11 135 136 neg r9,$inp 137 lvx $in0,0,$inp 138 addi $inp,$inp,15 # 15 is not typo 139 lvsr $key,0,r9 # borrow $key 140 li r8,0x20 141 cmpwi $bits,192 142 lvx $in1,0,$inp 143 le?vspltisb $mask,0x0f # borrow $mask 144 lvx $rcon,0,$ptr 145 le?vxor $key,$key,$mask # adjust for byte swap 146 lvx $mask,r8,$ptr 147 addi $ptr,$ptr,0x10 148 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 149 li $cnt,8 150 vxor $zero,$zero,$zero 151 mtctr $cnt 152 153 ?lvsr $outperm,0,$out 154 vspltisb $outmask,-1 155 lvx $outhead,0,$out 156 ?vperm $outmask,$zero,$outmask,$outperm 157 158 blt Loop128 159 addi $inp,$inp,8 160 beq L192 161 addi $inp,$inp,8 162 b L256 163 164.align 4 165Loop128: 166 vperm $key,$in0,$in0,$mask # rotate-n-splat 167 vsldoi $tmp,$zero,$in0,12 # >>32 168 vperm $outtail,$in0,$in0,$outperm # rotate 169 vsel $stage,$outhead,$outtail,$outmask 170 vmr $outhead,$outtail 171 vcipherlast $key,$key,$rcon 172 stvx $stage,0,$out 173 addi $out,$out,16 174 175 vxor $in0,$in0,$tmp 176 vsldoi $tmp,$zero,$tmp,12 # >>32 177 vxor $in0,$in0,$tmp 178 vsldoi $tmp,$zero,$tmp,12 # >>32 179 vxor $in0,$in0,$tmp 180 vadduwm $rcon,$rcon,$rcon 181 vxor $in0,$in0,$key 182 bdnz Loop128 183 184 lvx $rcon,0,$ptr # last two round keys 185 186 vperm $key,$in0,$in0,$mask # rotate-n-splat 187 vsldoi $tmp,$zero,$in0,12 # >>32 188 vperm $outtail,$in0,$in0,$outperm # rotate 189 vsel $stage,$outhead,$outtail,$outmask 190 vmr $outhead,$outtail 191 vcipherlast $key,$key,$rcon 192 stvx $stage,0,$out 193 addi $out,$out,16 194 195 vxor $in0,$in0,$tmp 196 vsldoi $tmp,$zero,$tmp,12 # >>32 197 vxor $in0,$in0,$tmp 198 vsldoi $tmp,$zero,$tmp,12 # >>32 199 vxor $in0,$in0,$tmp 200 vadduwm $rcon,$rcon,$rcon 201 vxor $in0,$in0,$key 202 203 vperm $key,$in0,$in0,$mask # rotate-n-splat 204 vsldoi $tmp,$zero,$in0,12 # >>32 205 vperm $outtail,$in0,$in0,$outperm # rotate 206 vsel $stage,$outhead,$outtail,$outmask 207 vmr $outhead,$outtail 208 vcipherlast $key,$key,$rcon 209 stvx $stage,0,$out 210 addi $out,$out,16 211 212 vxor $in0,$in0,$tmp 213 vsldoi $tmp,$zero,$tmp,12 # >>32 214 vxor $in0,$in0,$tmp 215 vsldoi $tmp,$zero,$tmp,12 # >>32 216 vxor $in0,$in0,$tmp 217 vxor $in0,$in0,$key 218 vperm $outtail,$in0,$in0,$outperm # rotate 219 vsel $stage,$outhead,$outtail,$outmask 220 vmr $outhead,$outtail 221 stvx $stage,0,$out 222 223 addi $inp,$out,15 # 15 is not typo 224 addi $out,$out,0x50 225 226 li $rounds,10 227 b Ldone 228 229.align 4 230L192: 231 lvx $tmp,0,$inp 232 li $cnt,4 233 vperm $outtail,$in0,$in0,$outperm # rotate 234 vsel $stage,$outhead,$outtail,$outmask 235 vmr $outhead,$outtail 236 stvx $stage,0,$out 237 addi $out,$out,16 238 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 239 vspltisb $key,8 # borrow $key 240 mtctr $cnt 241 vsububm $mask,$mask,$key # adjust the mask 242 243Loop192: 244 vperm $key,$in1,$in1,$mask # roate-n-splat 245 vsldoi $tmp,$zero,$in0,12 # >>32 246 vcipherlast $key,$key,$rcon 247 248 vxor $in0,$in0,$tmp 249 vsldoi $tmp,$zero,$tmp,12 # >>32 250 vxor $in0,$in0,$tmp 251 vsldoi $tmp,$zero,$tmp,12 # >>32 252 vxor $in0,$in0,$tmp 253 254 vsldoi $stage,$zero,$in1,8 255 vspltw $tmp,$in0,3 256 vxor $tmp,$tmp,$in1 257 vsldoi $in1,$zero,$in1,12 # >>32 258 vadduwm $rcon,$rcon,$rcon 259 vxor $in1,$in1,$tmp 260 vxor $in0,$in0,$key 261 vxor $in1,$in1,$key 262 vsldoi $stage,$stage,$in0,8 263 264 vperm $key,$in1,$in1,$mask # rotate-n-splat 265 vsldoi $tmp,$zero,$in0,12 # >>32 266 vperm $outtail,$stage,$stage,$outperm # rotate 267 vsel $stage,$outhead,$outtail,$outmask 268 vmr $outhead,$outtail 269 vcipherlast $key,$key,$rcon 270 stvx $stage,0,$out 271 addi $out,$out,16 272 273 vsldoi $stage,$in0,$in1,8 274 vxor $in0,$in0,$tmp 275 vsldoi $tmp,$zero,$tmp,12 # >>32 276 vperm $outtail,$stage,$stage,$outperm # rotate 277 vsel $stage,$outhead,$outtail,$outmask 278 vmr $outhead,$outtail 279 vxor $in0,$in0,$tmp 280 vsldoi $tmp,$zero,$tmp,12 # >>32 281 vxor $in0,$in0,$tmp 282 stvx $stage,0,$out 283 addi $out,$out,16 284 285 vspltw $tmp,$in0,3 286 vxor $tmp,$tmp,$in1 287 vsldoi $in1,$zero,$in1,12 # >>32 288 vadduwm $rcon,$rcon,$rcon 289 vxor $in1,$in1,$tmp 290 vxor $in0,$in0,$key 291 vxor $in1,$in1,$key 292 vperm $outtail,$in0,$in0,$outperm # rotate 293 vsel $stage,$outhead,$outtail,$outmask 294 vmr $outhead,$outtail 295 stvx $stage,0,$out 296 addi $inp,$out,15 # 15 is not typo 297 addi $out,$out,16 298 bdnz Loop192 299 300 li $rounds,12 301 addi $out,$out,0x20 302 b Ldone 303 304.align 4 305L256: 306 lvx $tmp,0,$inp 307 li $cnt,7 308 li $rounds,14 309 vperm $outtail,$in0,$in0,$outperm # rotate 310 vsel $stage,$outhead,$outtail,$outmask 311 vmr $outhead,$outtail 312 stvx $stage,0,$out 313 addi $out,$out,16 314 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 315 mtctr $cnt 316 317Loop256: 318 vperm $key,$in1,$in1,$mask # rotate-n-splat 319 vsldoi $tmp,$zero,$in0,12 # >>32 320 vperm $outtail,$in1,$in1,$outperm # rotate 321 vsel $stage,$outhead,$outtail,$outmask 322 vmr $outhead,$outtail 323 vcipherlast $key,$key,$rcon 324 stvx $stage,0,$out 325 addi $out,$out,16 326 327 vxor $in0,$in0,$tmp 328 vsldoi $tmp,$zero,$tmp,12 # >>32 329 vxor $in0,$in0,$tmp 330 vsldoi $tmp,$zero,$tmp,12 # >>32 331 vxor $in0,$in0,$tmp 332 vadduwm $rcon,$rcon,$rcon 333 vxor $in0,$in0,$key 334 vperm $outtail,$in0,$in0,$outperm # rotate 335 vsel $stage,$outhead,$outtail,$outmask 336 vmr $outhead,$outtail 337 stvx $stage,0,$out 338 addi $inp,$out,15 # 15 is not typo 339 addi $out,$out,16 340 bdz Ldone 341 342 vspltw $key,$in0,3 # just splat 343 vsldoi $tmp,$zero,$in1,12 # >>32 344 vsbox $key,$key 345 346 vxor $in1,$in1,$tmp 347 vsldoi $tmp,$zero,$tmp,12 # >>32 348 vxor $in1,$in1,$tmp 349 vsldoi $tmp,$zero,$tmp,12 # >>32 350 vxor $in1,$in1,$tmp 351 352 vxor $in1,$in1,$key 353 b Loop256 354 355.align 4 356Ldone: 357 lvx $in1,0,$inp # redundant in aligned case 358 vsel $in1,$outhead,$in1,$outmask 359 stvx $in1,0,$inp 360 li $ptr,0 361 mtspr 256,$vrsave 362 stw $rounds,0($out) 363 364Lenc_key_abort: 365 mr r3,$ptr 366 blr 367 .long 0 368 .byte 0,12,0x14,1,0,0,3,0 369 .long 0 370.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 371 372.globl .${prefix}_set_decrypt_key 373.align 5 374.${prefix}_set_decrypt_key: 375 $STU $sp,-$FRAME($sp) 376 mflr r10 377 $PUSH r10,$FRAME+$LRSAVE($sp) 378 bl Lset_encrypt_key 379 mtlr r10 380 381 cmpwi r3,0 382 bne- Ldec_key_abort 383 384 slwi $cnt,$rounds,4 385 subi $inp,$out,240 # first round key 386 srwi $rounds,$rounds,1 387 add $out,$inp,$cnt # last round key 388 mtctr $rounds 389 390Ldeckey: 391 lwz r0, 0($inp) 392 lwz r6, 4($inp) 393 lwz r7, 8($inp) 394 lwz r8, 12($inp) 395 addi $inp,$inp,16 396 lwz r9, 0($out) 397 lwz r10,4($out) 398 lwz r11,8($out) 399 lwz r12,12($out) 400 stw r0, 0($out) 401 stw r6, 4($out) 402 stw r7, 8($out) 403 stw r8, 12($out) 404 subi $out,$out,16 405 stw r9, -16($inp) 406 stw r10,-12($inp) 407 stw r11,-8($inp) 408 stw r12,-4($inp) 409 bdnz Ldeckey 410 411 xor r3,r3,r3 # return value 412Ldec_key_abort: 413 addi $sp,$sp,$FRAME 414 blr 415 .long 0 416 .byte 0,12,4,1,0x80,0,3,0 417 .long 0 418.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 419___ 420}}} 421######################################################################### 422{{{ # Single block en- and decrypt procedures # 423sub gen_block () { 424my $dir = shift; 425my $n = $dir eq "de" ? "n" : ""; 426my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 427 428$code.=<<___; 429.globl .${prefix}_${dir}crypt 430.align 5 431.${prefix}_${dir}crypt: 432 lwz $rounds,240($key) 433 lis r0,0xfc00 434 mfspr $vrsave,256 435 li $idx,15 # 15 is not typo 436 mtspr 256,r0 437 438 lvx v0,0,$inp 439 neg r11,$out 440 lvx v1,$idx,$inp 441 lvsl v2,0,$inp # inpperm 442 le?vspltisb v4,0x0f 443 ?lvsl v3,0,r11 # outperm 444 le?vxor v2,v2,v4 445 li $idx,16 446 vperm v0,v0,v1,v2 # align [and byte swap in LE] 447 lvx v1,0,$key 448 ?lvsl v5,0,$key # keyperm 449 srwi $rounds,$rounds,1 450 lvx v2,$idx,$key 451 addi $idx,$idx,16 452 subi $rounds,$rounds,1 453 ?vperm v1,v1,v2,v5 # align round key 454 455 vxor v0,v0,v1 456 lvx v1,$idx,$key 457 addi $idx,$idx,16 458 mtctr $rounds 459 460Loop_${dir}c: 461 ?vperm v2,v2,v1,v5 462 v${n}cipher v0,v0,v2 463 lvx v2,$idx,$key 464 addi $idx,$idx,16 465 ?vperm v1,v1,v2,v5 466 v${n}cipher v0,v0,v1 467 lvx v1,$idx,$key 468 addi $idx,$idx,16 469 bdnz Loop_${dir}c 470 471 ?vperm v2,v2,v1,v5 472 v${n}cipher v0,v0,v2 473 lvx v2,$idx,$key 474 ?vperm v1,v1,v2,v5 475 v${n}cipherlast v0,v0,v1 476 477 vspltisb v2,-1 478 vxor v1,v1,v1 479 li $idx,15 # 15 is not typo 480 ?vperm v2,v1,v2,v3 # outmask 481 le?vxor v3,v3,v4 482 lvx v1,0,$out # outhead 483 vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 484 vsel v1,v1,v0,v2 485 lvx v4,$idx,$out 486 stvx v1,0,$out 487 vsel v0,v0,v4,v2 488 stvx v0,$idx,$out 489 490 mtspr 256,$vrsave 491 blr 492 .long 0 493 .byte 0,12,0x14,0,0,0,3,0 494 .long 0 495.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 496___ 497} 498&gen_block("en"); 499&gen_block("de"); 500}}} 501######################################################################### 502{{{ # CBC en- and decrypt procedures # 503my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 504my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 505my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 506 map("v$_",(4..10)); 507$code.=<<___; 508.globl .${prefix}_cbc_encrypt 509.align 5 510.${prefix}_cbc_encrypt: 511 ${UCMP}i $len,16 512 bltlr- 513 514 cmpwi $enc,0 # test direction 515 lis r0,0xffe0 516 mfspr $vrsave,256 517 mtspr 256,r0 518 519 li $idx,15 520 vxor $rndkey0,$rndkey0,$rndkey0 521 le?vspltisb $tmp,0x0f 522 523 lvx $ivec,0,$ivp # load [unaligned] iv 524 lvsl $inpperm,0,$ivp 525 lvx $inptail,$idx,$ivp 526 le?vxor $inpperm,$inpperm,$tmp 527 vperm $ivec,$ivec,$inptail,$inpperm 528 529 neg r11,$inp 530 ?lvsl $keyperm,0,$key # prepare for unaligned key 531 lwz $rounds,240($key) 532 533 lvsr $inpperm,0,r11 # prepare for unaligned load 534 lvx $inptail,0,$inp 535 addi $inp,$inp,15 # 15 is not typo 536 le?vxor $inpperm,$inpperm,$tmp 537 538 ?lvsr $outperm,0,$out # prepare for unaligned store 539 vspltisb $outmask,-1 540 lvx $outhead,0,$out 541 ?vperm $outmask,$rndkey0,$outmask,$outperm 542 le?vxor $outperm,$outperm,$tmp 543 544 srwi $rounds,$rounds,1 545 li $idx,16 546 subi $rounds,$rounds,1 547 beq Lcbc_dec 548 549Lcbc_enc: 550 vmr $inout,$inptail 551 lvx $inptail,0,$inp 552 addi $inp,$inp,16 553 mtctr $rounds 554 subi $len,$len,16 # len-=16 555 556 lvx $rndkey0,0,$key 557 vperm $inout,$inout,$inptail,$inpperm 558 lvx $rndkey1,$idx,$key 559 addi $idx,$idx,16 560 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 561 vxor $inout,$inout,$rndkey0 562 lvx $rndkey0,$idx,$key 563 addi $idx,$idx,16 564 vxor $inout,$inout,$ivec 565 566Loop_cbc_enc: 567 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 568 vcipher $inout,$inout,$rndkey1 569 lvx $rndkey1,$idx,$key 570 addi $idx,$idx,16 571 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 572 vcipher $inout,$inout,$rndkey0 573 lvx $rndkey0,$idx,$key 574 addi $idx,$idx,16 575 bdnz Loop_cbc_enc 576 577 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 578 vcipher $inout,$inout,$rndkey1 579 lvx $rndkey1,$idx,$key 580 li $idx,16 581 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 582 vcipherlast $ivec,$inout,$rndkey0 583 ${UCMP}i $len,16 584 585 vperm $tmp,$ivec,$ivec,$outperm 586 vsel $inout,$outhead,$tmp,$outmask 587 vmr $outhead,$tmp 588 stvx $inout,0,$out 589 addi $out,$out,16 590 bge Lcbc_enc 591 592 b Lcbc_done 593 594.align 4 595Lcbc_dec: 596 ${UCMP}i $len,128 597 bge _aesp8_cbc_decrypt8x 598 vmr $tmp,$inptail 599 lvx $inptail,0,$inp 600 addi $inp,$inp,16 601 mtctr $rounds 602 subi $len,$len,16 # len-=16 603 604 lvx $rndkey0,0,$key 605 vperm $tmp,$tmp,$inptail,$inpperm 606 lvx $rndkey1,$idx,$key 607 addi $idx,$idx,16 608 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 609 vxor $inout,$tmp,$rndkey0 610 lvx $rndkey0,$idx,$key 611 addi $idx,$idx,16 612 613Loop_cbc_dec: 614 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 615 vncipher $inout,$inout,$rndkey1 616 lvx $rndkey1,$idx,$key 617 addi $idx,$idx,16 618 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 619 vncipher $inout,$inout,$rndkey0 620 lvx $rndkey0,$idx,$key 621 addi $idx,$idx,16 622 bdnz Loop_cbc_dec 623 624 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 625 vncipher $inout,$inout,$rndkey1 626 lvx $rndkey1,$idx,$key 627 li $idx,16 628 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 629 vncipherlast $inout,$inout,$rndkey0 630 ${UCMP}i $len,16 631 632 vxor $inout,$inout,$ivec 633 vmr $ivec,$tmp 634 vperm $tmp,$inout,$inout,$outperm 635 vsel $inout,$outhead,$tmp,$outmask 636 vmr $outhead,$tmp 637 stvx $inout,0,$out 638 addi $out,$out,16 639 bge Lcbc_dec 640 641Lcbc_done: 642 addi $out,$out,-1 643 lvx $inout,0,$out # redundant in aligned case 644 vsel $inout,$outhead,$inout,$outmask 645 stvx $inout,0,$out 646 647 neg $enc,$ivp # write [unaligned] iv 648 li $idx,15 # 15 is not typo 649 vxor $rndkey0,$rndkey0,$rndkey0 650 vspltisb $outmask,-1 651 le?vspltisb $tmp,0x0f 652 ?lvsl $outperm,0,$enc 653 ?vperm $outmask,$rndkey0,$outmask,$outperm 654 le?vxor $outperm,$outperm,$tmp 655 lvx $outhead,0,$ivp 656 vperm $ivec,$ivec,$ivec,$outperm 657 vsel $inout,$outhead,$ivec,$outmask 658 lvx $inptail,$idx,$ivp 659 stvx $inout,0,$ivp 660 vsel $inout,$ivec,$inptail,$outmask 661 stvx $inout,$idx,$ivp 662 663 mtspr 256,$vrsave 664 blr 665 .long 0 666 .byte 0,12,0x14,0,0,0,6,0 667 .long 0 668___ 669######################################################################### 670{{ # Optimized CBC decrypt procedure # 671my $key_="r11"; 672my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 673 $x00=0 if ($flavour =~ /osx/); 674my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 675my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 676my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 677 # v26-v31 last 6 round keys 678my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 679 680$code.=<<___; 681.align 5 682_aesp8_cbc_decrypt8x: 683 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 684 li r10,`$FRAME+8*16+15` 685 li r11,`$FRAME+8*16+31` 686 stvx v20,r10,$sp # ABI says so 687 addi r10,r10,32 688 stvx v21,r11,$sp 689 addi r11,r11,32 690 stvx v22,r10,$sp 691 addi r10,r10,32 692 stvx v23,r11,$sp 693 addi r11,r11,32 694 stvx v24,r10,$sp 695 addi r10,r10,32 696 stvx v25,r11,$sp 697 addi r11,r11,32 698 stvx v26,r10,$sp 699 addi r10,r10,32 700 stvx v27,r11,$sp 701 addi r11,r11,32 702 stvx v28,r10,$sp 703 addi r10,r10,32 704 stvx v29,r11,$sp 705 addi r11,r11,32 706 stvx v30,r10,$sp 707 stvx v31,r11,$sp 708 li r0,-1 709 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 710 li $x10,0x10 711 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 712 li $x20,0x20 713 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 714 li $x30,0x30 715 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 716 li $x40,0x40 717 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 718 li $x50,0x50 719 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 720 li $x60,0x60 721 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 722 li $x70,0x70 723 mtspr 256,r0 724 725 subi $rounds,$rounds,3 # -4 in total 726 subi $len,$len,128 # bias 727 728 lvx $rndkey0,$x00,$key # load key schedule 729 lvx v30,$x10,$key 730 addi $key,$key,0x20 731 lvx v31,$x00,$key 732 ?vperm $rndkey0,$rndkey0,v30,$keyperm 733 addi $key_,$sp,$FRAME+15 734 mtctr $rounds 735 736Load_cbc_dec_key: 737 ?vperm v24,v30,v31,$keyperm 738 lvx v30,$x10,$key 739 addi $key,$key,0x20 740 stvx v24,$x00,$key_ # off-load round[1] 741 ?vperm v25,v31,v30,$keyperm 742 lvx v31,$x00,$key 743 stvx v25,$x10,$key_ # off-load round[2] 744 addi $key_,$key_,0x20 745 bdnz Load_cbc_dec_key 746 747 lvx v26,$x10,$key 748 ?vperm v24,v30,v31,$keyperm 749 lvx v27,$x20,$key 750 stvx v24,$x00,$key_ # off-load round[3] 751 ?vperm v25,v31,v26,$keyperm 752 lvx v28,$x30,$key 753 stvx v25,$x10,$key_ # off-load round[4] 754 addi $key_,$sp,$FRAME+15 # rewind $key_ 755 ?vperm v26,v26,v27,$keyperm 756 lvx v29,$x40,$key 757 ?vperm v27,v27,v28,$keyperm 758 lvx v30,$x50,$key 759 ?vperm v28,v28,v29,$keyperm 760 lvx v31,$x60,$key 761 ?vperm v29,v29,v30,$keyperm 762 lvx $out0,$x70,$key # borrow $out0 763 ?vperm v30,v30,v31,$keyperm 764 lvx v24,$x00,$key_ # pre-load round[1] 765 ?vperm v31,v31,$out0,$keyperm 766 lvx v25,$x10,$key_ # pre-load round[2] 767 768 #lvx $inptail,0,$inp # "caller" already did this 769 #addi $inp,$inp,15 # 15 is not typo 770 subi $inp,$inp,15 # undo "caller" 771 772 le?li $idx,8 773 lvx_u $in0,$x00,$inp # load first 8 "words" 774 le?lvsl $inpperm,0,$idx 775 le?vspltisb $tmp,0x0f 776 lvx_u $in1,$x10,$inp 777 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 778 lvx_u $in2,$x20,$inp 779 le?vperm $in0,$in0,$in0,$inpperm 780 lvx_u $in3,$x30,$inp 781 le?vperm $in1,$in1,$in1,$inpperm 782 lvx_u $in4,$x40,$inp 783 le?vperm $in2,$in2,$in2,$inpperm 784 vxor $out0,$in0,$rndkey0 785 lvx_u $in5,$x50,$inp 786 le?vperm $in3,$in3,$in3,$inpperm 787 vxor $out1,$in1,$rndkey0 788 lvx_u $in6,$x60,$inp 789 le?vperm $in4,$in4,$in4,$inpperm 790 vxor $out2,$in2,$rndkey0 791 lvx_u $in7,$x70,$inp 792 addi $inp,$inp,0x80 793 le?vperm $in5,$in5,$in5,$inpperm 794 vxor $out3,$in3,$rndkey0 795 le?vperm $in6,$in6,$in6,$inpperm 796 vxor $out4,$in4,$rndkey0 797 le?vperm $in7,$in7,$in7,$inpperm 798 vxor $out5,$in5,$rndkey0 799 vxor $out6,$in6,$rndkey0 800 vxor $out7,$in7,$rndkey0 801 802 mtctr $rounds 803 b Loop_cbc_dec8x 804.align 5 805Loop_cbc_dec8x: 806 vncipher $out0,$out0,v24 807 vncipher $out1,$out1,v24 808 vncipher $out2,$out2,v24 809 vncipher $out3,$out3,v24 810 vncipher $out4,$out4,v24 811 vncipher $out5,$out5,v24 812 vncipher $out6,$out6,v24 813 vncipher $out7,$out7,v24 814 lvx v24,$x20,$key_ # round[3] 815 addi $key_,$key_,0x20 816 817 vncipher $out0,$out0,v25 818 vncipher $out1,$out1,v25 819 vncipher $out2,$out2,v25 820 vncipher $out3,$out3,v25 821 vncipher $out4,$out4,v25 822 vncipher $out5,$out5,v25 823 vncipher $out6,$out6,v25 824 vncipher $out7,$out7,v25 825 lvx v25,$x10,$key_ # round[4] 826 bdnz Loop_cbc_dec8x 827 828 subic $len,$len,128 # $len-=128 829 vncipher $out0,$out0,v24 830 vncipher $out1,$out1,v24 831 vncipher $out2,$out2,v24 832 vncipher $out3,$out3,v24 833 vncipher $out4,$out4,v24 834 vncipher $out5,$out5,v24 835 vncipher $out6,$out6,v24 836 vncipher $out7,$out7,v24 837 838 subfe. r0,r0,r0 # borrow?-1:0 839 vncipher $out0,$out0,v25 840 vncipher $out1,$out1,v25 841 vncipher $out2,$out2,v25 842 vncipher $out3,$out3,v25 843 vncipher $out4,$out4,v25 844 vncipher $out5,$out5,v25 845 vncipher $out6,$out6,v25 846 vncipher $out7,$out7,v25 847 848 and r0,r0,$len 849 vncipher $out0,$out0,v26 850 vncipher $out1,$out1,v26 851 vncipher $out2,$out2,v26 852 vncipher $out3,$out3,v26 853 vncipher $out4,$out4,v26 854 vncipher $out5,$out5,v26 855 vncipher $out6,$out6,v26 856 vncipher $out7,$out7,v26 857 858 add $inp,$inp,r0 # $inp is adjusted in such 859 # way that at exit from the 860 # loop inX-in7 are loaded 861 # with last "words" 862 vncipher $out0,$out0,v27 863 vncipher $out1,$out1,v27 864 vncipher $out2,$out2,v27 865 vncipher $out3,$out3,v27 866 vncipher $out4,$out4,v27 867 vncipher $out5,$out5,v27 868 vncipher $out6,$out6,v27 869 vncipher $out7,$out7,v27 870 871 addi $key_,$sp,$FRAME+15 # rewind $key_ 872 vncipher $out0,$out0,v28 873 vncipher $out1,$out1,v28 874 vncipher $out2,$out2,v28 875 vncipher $out3,$out3,v28 876 vncipher $out4,$out4,v28 877 vncipher $out5,$out5,v28 878 vncipher $out6,$out6,v28 879 vncipher $out7,$out7,v28 880 lvx v24,$x00,$key_ # re-pre-load round[1] 881 882 vncipher $out0,$out0,v29 883 vncipher $out1,$out1,v29 884 vncipher $out2,$out2,v29 885 vncipher $out3,$out3,v29 886 vncipher $out4,$out4,v29 887 vncipher $out5,$out5,v29 888 vncipher $out6,$out6,v29 889 vncipher $out7,$out7,v29 890 lvx v25,$x10,$key_ # re-pre-load round[2] 891 892 vncipher $out0,$out0,v30 893 vxor $ivec,$ivec,v31 # xor with last round key 894 vncipher $out1,$out1,v30 895 vxor $in0,$in0,v31 896 vncipher $out2,$out2,v30 897 vxor $in1,$in1,v31 898 vncipher $out3,$out3,v30 899 vxor $in2,$in2,v31 900 vncipher $out4,$out4,v30 901 vxor $in3,$in3,v31 902 vncipher $out5,$out5,v30 903 vxor $in4,$in4,v31 904 vncipher $out6,$out6,v30 905 vxor $in5,$in5,v31 906 vncipher $out7,$out7,v30 907 vxor $in6,$in6,v31 908 909 vncipherlast $out0,$out0,$ivec 910 vncipherlast $out1,$out1,$in0 911 lvx_u $in0,$x00,$inp # load next input block 912 vncipherlast $out2,$out2,$in1 913 lvx_u $in1,$x10,$inp 914 vncipherlast $out3,$out3,$in2 915 le?vperm $in0,$in0,$in0,$inpperm 916 lvx_u $in2,$x20,$inp 917 vncipherlast $out4,$out4,$in3 918 le?vperm $in1,$in1,$in1,$inpperm 919 lvx_u $in3,$x30,$inp 920 vncipherlast $out5,$out5,$in4 921 le?vperm $in2,$in2,$in2,$inpperm 922 lvx_u $in4,$x40,$inp 923 vncipherlast $out6,$out6,$in5 924 le?vperm $in3,$in3,$in3,$inpperm 925 lvx_u $in5,$x50,$inp 926 vncipherlast $out7,$out7,$in6 927 le?vperm $in4,$in4,$in4,$inpperm 928 lvx_u $in6,$x60,$inp 929 vmr $ivec,$in7 930 le?vperm $in5,$in5,$in5,$inpperm 931 lvx_u $in7,$x70,$inp 932 addi $inp,$inp,0x80 933 934 le?vperm $out0,$out0,$out0,$inpperm 935 le?vperm $out1,$out1,$out1,$inpperm 936 stvx_u $out0,$x00,$out 937 le?vperm $in6,$in6,$in6,$inpperm 938 vxor $out0,$in0,$rndkey0 939 le?vperm $out2,$out2,$out2,$inpperm 940 stvx_u $out1,$x10,$out 941 le?vperm $in7,$in7,$in7,$inpperm 942 vxor $out1,$in1,$rndkey0 943 le?vperm $out3,$out3,$out3,$inpperm 944 stvx_u $out2,$x20,$out 945 vxor $out2,$in2,$rndkey0 946 le?vperm $out4,$out4,$out4,$inpperm 947 stvx_u $out3,$x30,$out 948 vxor $out3,$in3,$rndkey0 949 le?vperm $out5,$out5,$out5,$inpperm 950 stvx_u $out4,$x40,$out 951 vxor $out4,$in4,$rndkey0 952 le?vperm $out6,$out6,$out6,$inpperm 953 stvx_u $out5,$x50,$out 954 vxor $out5,$in5,$rndkey0 955 le?vperm $out7,$out7,$out7,$inpperm 956 stvx_u $out6,$x60,$out 957 vxor $out6,$in6,$rndkey0 958 stvx_u $out7,$x70,$out 959 addi $out,$out,0x80 960 vxor $out7,$in7,$rndkey0 961 962 mtctr $rounds 963 beq Loop_cbc_dec8x # did $len-=128 borrow? 964 965 addic. $len,$len,128 966 beq Lcbc_dec8x_done 967 nop 968 nop 969 970Loop_cbc_dec8x_tail: # up to 7 "words" tail... 971 vncipher $out1,$out1,v24 972 vncipher $out2,$out2,v24 973 vncipher $out3,$out3,v24 974 vncipher $out4,$out4,v24 975 vncipher $out5,$out5,v24 976 vncipher $out6,$out6,v24 977 vncipher $out7,$out7,v24 978 lvx v24,$x20,$key_ # round[3] 979 addi $key_,$key_,0x20 980 981 vncipher $out1,$out1,v25 982 vncipher $out2,$out2,v25 983 vncipher $out3,$out3,v25 984 vncipher $out4,$out4,v25 985 vncipher $out5,$out5,v25 986 vncipher $out6,$out6,v25 987 vncipher $out7,$out7,v25 988 lvx v25,$x10,$key_ # round[4] 989 bdnz Loop_cbc_dec8x_tail 990 991 vncipher $out1,$out1,v24 992 vncipher $out2,$out2,v24 993 vncipher $out3,$out3,v24 994 vncipher $out4,$out4,v24 995 vncipher $out5,$out5,v24 996 vncipher $out6,$out6,v24 997 vncipher $out7,$out7,v24 998 999 vncipher $out1,$out1,v25 1000 vncipher $out2,$out2,v25 1001 vncipher $out3,$out3,v25 1002 vncipher $out4,$out4,v25 1003 vncipher $out5,$out5,v25 1004 vncipher $out6,$out6,v25 1005 vncipher $out7,$out7,v25 1006 1007 vncipher $out1,$out1,v26 1008 vncipher $out2,$out2,v26 1009 vncipher $out3,$out3,v26 1010 vncipher $out4,$out4,v26 1011 vncipher $out5,$out5,v26 1012 vncipher $out6,$out6,v26 1013 vncipher $out7,$out7,v26 1014 1015 vncipher $out1,$out1,v27 1016 vncipher $out2,$out2,v27 1017 vncipher $out3,$out3,v27 1018 vncipher $out4,$out4,v27 1019 vncipher $out5,$out5,v27 1020 vncipher $out6,$out6,v27 1021 vncipher $out7,$out7,v27 1022 1023 vncipher $out1,$out1,v28 1024 vncipher $out2,$out2,v28 1025 vncipher $out3,$out3,v28 1026 vncipher $out4,$out4,v28 1027 vncipher $out5,$out5,v28 1028 vncipher $out6,$out6,v28 1029 vncipher $out7,$out7,v28 1030 1031 vncipher $out1,$out1,v29 1032 vncipher $out2,$out2,v29 1033 vncipher $out3,$out3,v29 1034 vncipher $out4,$out4,v29 1035 vncipher $out5,$out5,v29 1036 vncipher $out6,$out6,v29 1037 vncipher $out7,$out7,v29 1038 1039 vncipher $out1,$out1,v30 1040 vxor $ivec,$ivec,v31 # last round key 1041 vncipher $out2,$out2,v30 1042 vxor $in1,$in1,v31 1043 vncipher $out3,$out3,v30 1044 vxor $in2,$in2,v31 1045 vncipher $out4,$out4,v30 1046 vxor $in3,$in3,v31 1047 vncipher $out5,$out5,v30 1048 vxor $in4,$in4,v31 1049 vncipher $out6,$out6,v30 1050 vxor $in5,$in5,v31 1051 vncipher $out7,$out7,v30 1052 vxor $in6,$in6,v31 1053 1054 cmplwi $len,32 # switch($len) 1055 blt Lcbc_dec8x_one 1056 nop 1057 beq Lcbc_dec8x_two 1058 cmplwi $len,64 1059 blt Lcbc_dec8x_three 1060 nop 1061 beq Lcbc_dec8x_four 1062 cmplwi $len,96 1063 blt Lcbc_dec8x_five 1064 nop 1065 beq Lcbc_dec8x_six 1066 1067Lcbc_dec8x_seven: 1068 vncipherlast $out1,$out1,$ivec 1069 vncipherlast $out2,$out2,$in1 1070 vncipherlast $out3,$out3,$in2 1071 vncipherlast $out4,$out4,$in3 1072 vncipherlast $out5,$out5,$in4 1073 vncipherlast $out6,$out6,$in5 1074 vncipherlast $out7,$out7,$in6 1075 vmr $ivec,$in7 1076 1077 le?vperm $out1,$out1,$out1,$inpperm 1078 le?vperm $out2,$out2,$out2,$inpperm 1079 stvx_u $out1,$x00,$out 1080 le?vperm $out3,$out3,$out3,$inpperm 1081 stvx_u $out2,$x10,$out 1082 le?vperm $out4,$out4,$out4,$inpperm 1083 stvx_u $out3,$x20,$out 1084 le?vperm $out5,$out5,$out5,$inpperm 1085 stvx_u $out4,$x30,$out 1086 le?vperm $out6,$out6,$out6,$inpperm 1087 stvx_u $out5,$x40,$out 1088 le?vperm $out7,$out7,$out7,$inpperm 1089 stvx_u $out6,$x50,$out 1090 stvx_u $out7,$x60,$out 1091 addi $out,$out,0x70 1092 b Lcbc_dec8x_done 1093 1094.align 5 1095Lcbc_dec8x_six: 1096 vncipherlast $out2,$out2,$ivec 1097 vncipherlast $out3,$out3,$in2 1098 vncipherlast $out4,$out4,$in3 1099 vncipherlast $out5,$out5,$in4 1100 vncipherlast $out6,$out6,$in5 1101 vncipherlast $out7,$out7,$in6 1102 vmr $ivec,$in7 1103 1104 le?vperm $out2,$out2,$out2,$inpperm 1105 le?vperm $out3,$out3,$out3,$inpperm 1106 stvx_u $out2,$x00,$out 1107 le?vperm $out4,$out4,$out4,$inpperm 1108 stvx_u $out3,$x10,$out 1109 le?vperm $out5,$out5,$out5,$inpperm 1110 stvx_u $out4,$x20,$out 1111 le?vperm $out6,$out6,$out6,$inpperm 1112 stvx_u $out5,$x30,$out 1113 le?vperm $out7,$out7,$out7,$inpperm 1114 stvx_u $out6,$x40,$out 1115 stvx_u $out7,$x50,$out 1116 addi $out,$out,0x60 1117 b Lcbc_dec8x_done 1118 1119.align 5 1120Lcbc_dec8x_five: 1121 vncipherlast $out3,$out3,$ivec 1122 vncipherlast $out4,$out4,$in3 1123 vncipherlast $out5,$out5,$in4 1124 vncipherlast $out6,$out6,$in5 1125 vncipherlast $out7,$out7,$in6 1126 vmr $ivec,$in7 1127 1128 le?vperm $out3,$out3,$out3,$inpperm 1129 le?vperm $out4,$out4,$out4,$inpperm 1130 stvx_u $out3,$x00,$out 1131 le?vperm $out5,$out5,$out5,$inpperm 1132 stvx_u $out4,$x10,$out 1133 le?vperm $out6,$out6,$out6,$inpperm 1134 stvx_u $out5,$x20,$out 1135 le?vperm $out7,$out7,$out7,$inpperm 1136 stvx_u $out6,$x30,$out 1137 stvx_u $out7,$x40,$out 1138 addi $out,$out,0x50 1139 b Lcbc_dec8x_done 1140 1141.align 5 1142Lcbc_dec8x_four: 1143 vncipherlast $out4,$out4,$ivec 1144 vncipherlast $out5,$out5,$in4 1145 vncipherlast $out6,$out6,$in5 1146 vncipherlast $out7,$out7,$in6 1147 vmr $ivec,$in7 1148 1149 le?vperm $out4,$out4,$out4,$inpperm 1150 le?vperm $out5,$out5,$out5,$inpperm 1151 stvx_u $out4,$x00,$out 1152 le?vperm $out6,$out6,$out6,$inpperm 1153 stvx_u $out5,$x10,$out 1154 le?vperm $out7,$out7,$out7,$inpperm 1155 stvx_u $out6,$x20,$out 1156 stvx_u $out7,$x30,$out 1157 addi $out,$out,0x40 1158 b Lcbc_dec8x_done 1159 1160.align 5 1161Lcbc_dec8x_three: 1162 vncipherlast $out5,$out5,$ivec 1163 vncipherlast $out6,$out6,$in5 1164 vncipherlast $out7,$out7,$in6 1165 vmr $ivec,$in7 1166 1167 le?vperm $out5,$out5,$out5,$inpperm 1168 le?vperm $out6,$out6,$out6,$inpperm 1169 stvx_u $out5,$x00,$out 1170 le?vperm $out7,$out7,$out7,$inpperm 1171 stvx_u $out6,$x10,$out 1172 stvx_u $out7,$x20,$out 1173 addi $out,$out,0x30 1174 b Lcbc_dec8x_done 1175 1176.align 5 1177Lcbc_dec8x_two: 1178 vncipherlast $out6,$out6,$ivec 1179 vncipherlast $out7,$out7,$in6 1180 vmr $ivec,$in7 1181 1182 le?vperm $out6,$out6,$out6,$inpperm 1183 le?vperm $out7,$out7,$out7,$inpperm 1184 stvx_u $out6,$x00,$out 1185 stvx_u $out7,$x10,$out 1186 addi $out,$out,0x20 1187 b Lcbc_dec8x_done 1188 1189.align 5 1190Lcbc_dec8x_one: 1191 vncipherlast $out7,$out7,$ivec 1192 vmr $ivec,$in7 1193 1194 le?vperm $out7,$out7,$out7,$inpperm 1195 stvx_u $out7,0,$out 1196 addi $out,$out,0x10 1197 1198Lcbc_dec8x_done: 1199 le?vperm $ivec,$ivec,$ivec,$inpperm 1200 stvx_u $ivec,0,$ivp # write [unaligned] iv 1201 1202 li r10,`$FRAME+15` 1203 li r11,`$FRAME+31` 1204 stvx $inpperm,r10,$sp # wipe copies of round keys 1205 addi r10,r10,32 1206 stvx $inpperm,r11,$sp 1207 addi r11,r11,32 1208 stvx $inpperm,r10,$sp 1209 addi r10,r10,32 1210 stvx $inpperm,r11,$sp 1211 addi r11,r11,32 1212 stvx $inpperm,r10,$sp 1213 addi r10,r10,32 1214 stvx $inpperm,r11,$sp 1215 addi r11,r11,32 1216 stvx $inpperm,r10,$sp 1217 addi r10,r10,32 1218 stvx $inpperm,r11,$sp 1219 addi r11,r11,32 1220 1221 mtspr 256,$vrsave 1222 lvx v20,r10,$sp # ABI says so 1223 addi r10,r10,32 1224 lvx v21,r11,$sp 1225 addi r11,r11,32 1226 lvx v22,r10,$sp 1227 addi r10,r10,32 1228 lvx v23,r11,$sp 1229 addi r11,r11,32 1230 lvx v24,r10,$sp 1231 addi r10,r10,32 1232 lvx v25,r11,$sp 1233 addi r11,r11,32 1234 lvx v26,r10,$sp 1235 addi r10,r10,32 1236 lvx v27,r11,$sp 1237 addi r11,r11,32 1238 lvx v28,r10,$sp 1239 addi r10,r10,32 1240 lvx v29,r11,$sp 1241 addi r11,r11,32 1242 lvx v30,r10,$sp 1243 lvx v31,r11,$sp 1244 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1245 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1246 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1247 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1248 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1249 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1250 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1251 blr 1252 .long 0 1253 .byte 0,12,0x04,0,0x80,6,6,0 1254 .long 0 1255.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1256___ 1257}} }}} 1258 1259######################################################################### 1260{{{ # CTR procedure[s] # 1261my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1262my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1263my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1264 map("v$_",(4..11)); 1265my $dat=$tmp; 1266 1267$code.=<<___; 1268.globl .${prefix}_ctr32_encrypt_blocks 1269.align 5 1270.${prefix}_ctr32_encrypt_blocks: 1271 ${UCMP}i $len,1 1272 bltlr- 1273 1274 lis r0,0xfff0 1275 mfspr $vrsave,256 1276 mtspr 256,r0 1277 1278 li $idx,15 1279 vxor $rndkey0,$rndkey0,$rndkey0 1280 le?vspltisb $tmp,0x0f 1281 1282 lvx $ivec,0,$ivp # load [unaligned] iv 1283 lvsl $inpperm,0,$ivp 1284 lvx $inptail,$idx,$ivp 1285 vspltisb $one,1 1286 le?vxor $inpperm,$inpperm,$tmp 1287 vperm $ivec,$ivec,$inptail,$inpperm 1288 vsldoi $one,$rndkey0,$one,1 1289 1290 neg r11,$inp 1291 ?lvsl $keyperm,0,$key # prepare for unaligned key 1292 lwz $rounds,240($key) 1293 1294 lvsr $inpperm,0,r11 # prepare for unaligned load 1295 lvx $inptail,0,$inp 1296 addi $inp,$inp,15 # 15 is not typo 1297 le?vxor $inpperm,$inpperm,$tmp 1298 1299 srwi $rounds,$rounds,1 1300 li $idx,16 1301 subi $rounds,$rounds,1 1302 1303 ${UCMP}i $len,8 1304 bge _aesp8_ctr32_encrypt8x 1305 1306 ?lvsr $outperm,0,$out # prepare for unaligned store 1307 vspltisb $outmask,-1 1308 lvx $outhead,0,$out 1309 ?vperm $outmask,$rndkey0,$outmask,$outperm 1310 le?vxor $outperm,$outperm,$tmp 1311 1312 lvx $rndkey0,0,$key 1313 mtctr $rounds 1314 lvx $rndkey1,$idx,$key 1315 addi $idx,$idx,16 1316 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1317 vxor $inout,$ivec,$rndkey0 1318 lvx $rndkey0,$idx,$key 1319 addi $idx,$idx,16 1320 b Loop_ctr32_enc 1321 1322.align 5 1323Loop_ctr32_enc: 1324 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1325 vcipher $inout,$inout,$rndkey1 1326 lvx $rndkey1,$idx,$key 1327 addi $idx,$idx,16 1328 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1329 vcipher $inout,$inout,$rndkey0 1330 lvx $rndkey0,$idx,$key 1331 addi $idx,$idx,16 1332 bdnz Loop_ctr32_enc 1333 1334 vadduwm $ivec,$ivec,$one 1335 vmr $dat,$inptail 1336 lvx $inptail,0,$inp 1337 addi $inp,$inp,16 1338 subic. $len,$len,1 # blocks-- 1339 1340 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1341 vcipher $inout,$inout,$rndkey1 1342 lvx $rndkey1,$idx,$key 1343 vperm $dat,$dat,$inptail,$inpperm 1344 li $idx,16 1345 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1346 lvx $rndkey0,0,$key 1347 vxor $dat,$dat,$rndkey1 # last round key 1348 vcipherlast $inout,$inout,$dat 1349 1350 lvx $rndkey1,$idx,$key 1351 addi $idx,$idx,16 1352 vperm $inout,$inout,$inout,$outperm 1353 vsel $dat,$outhead,$inout,$outmask 1354 mtctr $rounds 1355 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1356 vmr $outhead,$inout 1357 vxor $inout,$ivec,$rndkey0 1358 lvx $rndkey0,$idx,$key 1359 addi $idx,$idx,16 1360 stvx $dat,0,$out 1361 addi $out,$out,16 1362 bne Loop_ctr32_enc 1363 1364 addi $out,$out,-1 1365 lvx $inout,0,$out # redundant in aligned case 1366 vsel $inout,$outhead,$inout,$outmask 1367 stvx $inout,0,$out 1368 1369 mtspr 256,$vrsave 1370 blr 1371 .long 0 1372 .byte 0,12,0x14,0,0,0,6,0 1373 .long 0 1374___ 1375######################################################################### 1376{{ # Optimized CTR procedure # 1377my $key_="r11"; 1378my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1379 $x00=0 if ($flavour =~ /osx/); 1380my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1381my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1382my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1383 # v26-v31 last 6 round keys 1384my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1385my ($two,$three,$four)=($outhead,$outperm,$outmask); 1386 1387$code.=<<___; 1388.align 5 1389_aesp8_ctr32_encrypt8x: 1390 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1391 li r10,`$FRAME+8*16+15` 1392 li r11,`$FRAME+8*16+31` 1393 stvx v20,r10,$sp # ABI says so 1394 addi r10,r10,32 1395 stvx v21,r11,$sp 1396 addi r11,r11,32 1397 stvx v22,r10,$sp 1398 addi r10,r10,32 1399 stvx v23,r11,$sp 1400 addi r11,r11,32 1401 stvx v24,r10,$sp 1402 addi r10,r10,32 1403 stvx v25,r11,$sp 1404 addi r11,r11,32 1405 stvx v26,r10,$sp 1406 addi r10,r10,32 1407 stvx v27,r11,$sp 1408 addi r11,r11,32 1409 stvx v28,r10,$sp 1410 addi r10,r10,32 1411 stvx v29,r11,$sp 1412 addi r11,r11,32 1413 stvx v30,r10,$sp 1414 stvx v31,r11,$sp 1415 li r0,-1 1416 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1417 li $x10,0x10 1418 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1419 li $x20,0x20 1420 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1421 li $x30,0x30 1422 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1423 li $x40,0x40 1424 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1425 li $x50,0x50 1426 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1427 li $x60,0x60 1428 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1429 li $x70,0x70 1430 mtspr 256,r0 1431 1432 subi $rounds,$rounds,3 # -4 in total 1433 1434 lvx $rndkey0,$x00,$key # load key schedule 1435 lvx v30,$x10,$key 1436 addi $key,$key,0x20 1437 lvx v31,$x00,$key 1438 ?vperm $rndkey0,$rndkey0,v30,$keyperm 1439 addi $key_,$sp,$FRAME+15 1440 mtctr $rounds 1441 1442Load_ctr32_enc_key: 1443 ?vperm v24,v30,v31,$keyperm 1444 lvx v30,$x10,$key 1445 addi $key,$key,0x20 1446 stvx v24,$x00,$key_ # off-load round[1] 1447 ?vperm v25,v31,v30,$keyperm 1448 lvx v31,$x00,$key 1449 stvx v25,$x10,$key_ # off-load round[2] 1450 addi $key_,$key_,0x20 1451 bdnz Load_ctr32_enc_key 1452 1453 lvx v26,$x10,$key 1454 ?vperm v24,v30,v31,$keyperm 1455 lvx v27,$x20,$key 1456 stvx v24,$x00,$key_ # off-load round[3] 1457 ?vperm v25,v31,v26,$keyperm 1458 lvx v28,$x30,$key 1459 stvx v25,$x10,$key_ # off-load round[4] 1460 addi $key_,$sp,$FRAME+15 # rewind $key_ 1461 ?vperm v26,v26,v27,$keyperm 1462 lvx v29,$x40,$key 1463 ?vperm v27,v27,v28,$keyperm 1464 lvx v30,$x50,$key 1465 ?vperm v28,v28,v29,$keyperm 1466 lvx v31,$x60,$key 1467 ?vperm v29,v29,v30,$keyperm 1468 lvx $out0,$x70,$key # borrow $out0 1469 ?vperm v30,v30,v31,$keyperm 1470 lvx v24,$x00,$key_ # pre-load round[1] 1471 ?vperm v31,v31,$out0,$keyperm 1472 lvx v25,$x10,$key_ # pre-load round[2] 1473 1474 vadduwm $two,$one,$one 1475 subi $inp,$inp,15 # undo "caller" 1476 $SHL $len,$len,4 1477 1478 vadduwm $out1,$ivec,$one # counter values ... 1479 vadduwm $out2,$ivec,$two 1480 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1481 le?li $idx,8 1482 vadduwm $out3,$out1,$two 1483 vxor $out1,$out1,$rndkey0 1484 le?lvsl $inpperm,0,$idx 1485 vadduwm $out4,$out2,$two 1486 vxor $out2,$out2,$rndkey0 1487 le?vspltisb $tmp,0x0f 1488 vadduwm $out5,$out3,$two 1489 vxor $out3,$out3,$rndkey0 1490 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1491 vadduwm $out6,$out4,$two 1492 vxor $out4,$out4,$rndkey0 1493 vadduwm $out7,$out5,$two 1494 vxor $out5,$out5,$rndkey0 1495 vadduwm $ivec,$out6,$two # next counter value 1496 vxor $out6,$out6,$rndkey0 1497 vxor $out7,$out7,$rndkey0 1498 1499 mtctr $rounds 1500 b Loop_ctr32_enc8x 1501.align 5 1502Loop_ctr32_enc8x: 1503 vcipher $out0,$out0,v24 1504 vcipher $out1,$out1,v24 1505 vcipher $out2,$out2,v24 1506 vcipher $out3,$out3,v24 1507 vcipher $out4,$out4,v24 1508 vcipher $out5,$out5,v24 1509 vcipher $out6,$out6,v24 1510 vcipher $out7,$out7,v24 1511Loop_ctr32_enc8x_middle: 1512 lvx v24,$x20,$key_ # round[3] 1513 addi $key_,$key_,0x20 1514 1515 vcipher $out0,$out0,v25 1516 vcipher $out1,$out1,v25 1517 vcipher $out2,$out2,v25 1518 vcipher $out3,$out3,v25 1519 vcipher $out4,$out4,v25 1520 vcipher $out5,$out5,v25 1521 vcipher $out6,$out6,v25 1522 vcipher $out7,$out7,v25 1523 lvx v25,$x10,$key_ # round[4] 1524 bdnz Loop_ctr32_enc8x 1525 1526 subic r11,$len,256 # $len-256, borrow $key_ 1527 vcipher $out0,$out0,v24 1528 vcipher $out1,$out1,v24 1529 vcipher $out2,$out2,v24 1530 vcipher $out3,$out3,v24 1531 vcipher $out4,$out4,v24 1532 vcipher $out5,$out5,v24 1533 vcipher $out6,$out6,v24 1534 vcipher $out7,$out7,v24 1535 1536 subfe r0,r0,r0 # borrow?-1:0 1537 vcipher $out0,$out0,v25 1538 vcipher $out1,$out1,v25 1539 vcipher $out2,$out2,v25 1540 vcipher $out3,$out3,v25 1541 vcipher $out4,$out4,v25 1542 vcipher $out5,$out5,v25 1543 vcipher $out6,$out6,v25 1544 vcipher $out7,$out7,v25 1545 1546 and r0,r0,r11 1547 addi $key_,$sp,$FRAME+15 # rewind $key_ 1548 vcipher $out0,$out0,v26 1549 vcipher $out1,$out1,v26 1550 vcipher $out2,$out2,v26 1551 vcipher $out3,$out3,v26 1552 vcipher $out4,$out4,v26 1553 vcipher $out5,$out5,v26 1554 vcipher $out6,$out6,v26 1555 vcipher $out7,$out7,v26 1556 lvx v24,$x00,$key_ # re-pre-load round[1] 1557 1558 subic $len,$len,129 # $len-=129 1559 vcipher $out0,$out0,v27 1560 addi $len,$len,1 # $len-=128 really 1561 vcipher $out1,$out1,v27 1562 vcipher $out2,$out2,v27 1563 vcipher $out3,$out3,v27 1564 vcipher $out4,$out4,v27 1565 vcipher $out5,$out5,v27 1566 vcipher $out6,$out6,v27 1567 vcipher $out7,$out7,v27 1568 lvx v25,$x10,$key_ # re-pre-load round[2] 1569 1570 vcipher $out0,$out0,v28 1571 lvx_u $in0,$x00,$inp # load input 1572 vcipher $out1,$out1,v28 1573 lvx_u $in1,$x10,$inp 1574 vcipher $out2,$out2,v28 1575 lvx_u $in2,$x20,$inp 1576 vcipher $out3,$out3,v28 1577 lvx_u $in3,$x30,$inp 1578 vcipher $out4,$out4,v28 1579 lvx_u $in4,$x40,$inp 1580 vcipher $out5,$out5,v28 1581 lvx_u $in5,$x50,$inp 1582 vcipher $out6,$out6,v28 1583 lvx_u $in6,$x60,$inp 1584 vcipher $out7,$out7,v28 1585 lvx_u $in7,$x70,$inp 1586 addi $inp,$inp,0x80 1587 1588 vcipher $out0,$out0,v29 1589 le?vperm $in0,$in0,$in0,$inpperm 1590 vcipher $out1,$out1,v29 1591 le?vperm $in1,$in1,$in1,$inpperm 1592 vcipher $out2,$out2,v29 1593 le?vperm $in2,$in2,$in2,$inpperm 1594 vcipher $out3,$out3,v29 1595 le?vperm $in3,$in3,$in3,$inpperm 1596 vcipher $out4,$out4,v29 1597 le?vperm $in4,$in4,$in4,$inpperm 1598 vcipher $out5,$out5,v29 1599 le?vperm $in5,$in5,$in5,$inpperm 1600 vcipher $out6,$out6,v29 1601 le?vperm $in6,$in6,$in6,$inpperm 1602 vcipher $out7,$out7,v29 1603 le?vperm $in7,$in7,$in7,$inpperm 1604 1605 add $inp,$inp,r0 # $inp is adjusted in such 1606 # way that at exit from the 1607 # loop inX-in7 are loaded 1608 # with last "words" 1609 subfe. r0,r0,r0 # borrow?-1:0 1610 vcipher $out0,$out0,v30 1611 vxor $in0,$in0,v31 # xor with last round key 1612 vcipher $out1,$out1,v30 1613 vxor $in1,$in1,v31 1614 vcipher $out2,$out2,v30 1615 vxor $in2,$in2,v31 1616 vcipher $out3,$out3,v30 1617 vxor $in3,$in3,v31 1618 vcipher $out4,$out4,v30 1619 vxor $in4,$in4,v31 1620 vcipher $out5,$out5,v30 1621 vxor $in5,$in5,v31 1622 vcipher $out6,$out6,v30 1623 vxor $in6,$in6,v31 1624 vcipher $out7,$out7,v30 1625 vxor $in7,$in7,v31 1626 1627 bne Lctr32_enc8x_break # did $len-129 borrow? 1628 1629 vcipherlast $in0,$out0,$in0 1630 vcipherlast $in1,$out1,$in1 1631 vadduwm $out1,$ivec,$one # counter values ... 1632 vcipherlast $in2,$out2,$in2 1633 vadduwm $out2,$ivec,$two 1634 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1635 vcipherlast $in3,$out3,$in3 1636 vadduwm $out3,$out1,$two 1637 vxor $out1,$out1,$rndkey0 1638 vcipherlast $in4,$out4,$in4 1639 vadduwm $out4,$out2,$two 1640 vxor $out2,$out2,$rndkey0 1641 vcipherlast $in5,$out5,$in5 1642 vadduwm $out5,$out3,$two 1643 vxor $out3,$out3,$rndkey0 1644 vcipherlast $in6,$out6,$in6 1645 vadduwm $out6,$out4,$two 1646 vxor $out4,$out4,$rndkey0 1647 vcipherlast $in7,$out7,$in7 1648 vadduwm $out7,$out5,$two 1649 vxor $out5,$out5,$rndkey0 1650 le?vperm $in0,$in0,$in0,$inpperm 1651 vadduwm $ivec,$out6,$two # next counter value 1652 vxor $out6,$out6,$rndkey0 1653 le?vperm $in1,$in1,$in1,$inpperm 1654 vxor $out7,$out7,$rndkey0 1655 mtctr $rounds 1656 1657 vcipher $out0,$out0,v24 1658 stvx_u $in0,$x00,$out 1659 le?vperm $in2,$in2,$in2,$inpperm 1660 vcipher $out1,$out1,v24 1661 stvx_u $in1,$x10,$out 1662 le?vperm $in3,$in3,$in3,$inpperm 1663 vcipher $out2,$out2,v24 1664 stvx_u $in2,$x20,$out 1665 le?vperm $in4,$in4,$in4,$inpperm 1666 vcipher $out3,$out3,v24 1667 stvx_u $in3,$x30,$out 1668 le?vperm $in5,$in5,$in5,$inpperm 1669 vcipher $out4,$out4,v24 1670 stvx_u $in4,$x40,$out 1671 le?vperm $in6,$in6,$in6,$inpperm 1672 vcipher $out5,$out5,v24 1673 stvx_u $in5,$x50,$out 1674 le?vperm $in7,$in7,$in7,$inpperm 1675 vcipher $out6,$out6,v24 1676 stvx_u $in6,$x60,$out 1677 vcipher $out7,$out7,v24 1678 stvx_u $in7,$x70,$out 1679 addi $out,$out,0x80 1680 1681 b Loop_ctr32_enc8x_middle 1682 1683.align 5 1684Lctr32_enc8x_break: 1685 cmpwi $len,-0x60 1686 blt Lctr32_enc8x_one 1687 nop 1688 beq Lctr32_enc8x_two 1689 cmpwi $len,-0x40 1690 blt Lctr32_enc8x_three 1691 nop 1692 beq Lctr32_enc8x_four 1693 cmpwi $len,-0x20 1694 blt Lctr32_enc8x_five 1695 nop 1696 beq Lctr32_enc8x_six 1697 cmpwi $len,0x00 1698 blt Lctr32_enc8x_seven 1699 1700Lctr32_enc8x_eight: 1701 vcipherlast $out0,$out0,$in0 1702 vcipherlast $out1,$out1,$in1 1703 vcipherlast $out2,$out2,$in2 1704 vcipherlast $out3,$out3,$in3 1705 vcipherlast $out4,$out4,$in4 1706 vcipherlast $out5,$out5,$in5 1707 vcipherlast $out6,$out6,$in6 1708 vcipherlast $out7,$out7,$in7 1709 1710 le?vperm $out0,$out0,$out0,$inpperm 1711 le?vperm $out1,$out1,$out1,$inpperm 1712 stvx_u $out0,$x00,$out 1713 le?vperm $out2,$out2,$out2,$inpperm 1714 stvx_u $out1,$x10,$out 1715 le?vperm $out3,$out3,$out3,$inpperm 1716 stvx_u $out2,$x20,$out 1717 le?vperm $out4,$out4,$out4,$inpperm 1718 stvx_u $out3,$x30,$out 1719 le?vperm $out5,$out5,$out5,$inpperm 1720 stvx_u $out4,$x40,$out 1721 le?vperm $out6,$out6,$out6,$inpperm 1722 stvx_u $out5,$x50,$out 1723 le?vperm $out7,$out7,$out7,$inpperm 1724 stvx_u $out6,$x60,$out 1725 stvx_u $out7,$x70,$out 1726 addi $out,$out,0x80 1727 b Lctr32_enc8x_done 1728 1729.align 5 1730Lctr32_enc8x_seven: 1731 vcipherlast $out0,$out0,$in1 1732 vcipherlast $out1,$out1,$in2 1733 vcipherlast $out2,$out2,$in3 1734 vcipherlast $out3,$out3,$in4 1735 vcipherlast $out4,$out4,$in5 1736 vcipherlast $out5,$out5,$in6 1737 vcipherlast $out6,$out6,$in7 1738 1739 le?vperm $out0,$out0,$out0,$inpperm 1740 le?vperm $out1,$out1,$out1,$inpperm 1741 stvx_u $out0,$x00,$out 1742 le?vperm $out2,$out2,$out2,$inpperm 1743 stvx_u $out1,$x10,$out 1744 le?vperm $out3,$out3,$out3,$inpperm 1745 stvx_u $out2,$x20,$out 1746 le?vperm $out4,$out4,$out4,$inpperm 1747 stvx_u $out3,$x30,$out 1748 le?vperm $out5,$out5,$out5,$inpperm 1749 stvx_u $out4,$x40,$out 1750 le?vperm $out6,$out6,$out6,$inpperm 1751 stvx_u $out5,$x50,$out 1752 stvx_u $out6,$x60,$out 1753 addi $out,$out,0x70 1754 b Lctr32_enc8x_done 1755 1756.align 5 1757Lctr32_enc8x_six: 1758 vcipherlast $out0,$out0,$in2 1759 vcipherlast $out1,$out1,$in3 1760 vcipherlast $out2,$out2,$in4 1761 vcipherlast $out3,$out3,$in5 1762 vcipherlast $out4,$out4,$in6 1763 vcipherlast $out5,$out5,$in7 1764 1765 le?vperm $out0,$out0,$out0,$inpperm 1766 le?vperm $out1,$out1,$out1,$inpperm 1767 stvx_u $out0,$x00,$out 1768 le?vperm $out2,$out2,$out2,$inpperm 1769 stvx_u $out1,$x10,$out 1770 le?vperm $out3,$out3,$out3,$inpperm 1771 stvx_u $out2,$x20,$out 1772 le?vperm $out4,$out4,$out4,$inpperm 1773 stvx_u $out3,$x30,$out 1774 le?vperm $out5,$out5,$out5,$inpperm 1775 stvx_u $out4,$x40,$out 1776 stvx_u $out5,$x50,$out 1777 addi $out,$out,0x60 1778 b Lctr32_enc8x_done 1779 1780.align 5 1781Lctr32_enc8x_five: 1782 vcipherlast $out0,$out0,$in3 1783 vcipherlast $out1,$out1,$in4 1784 vcipherlast $out2,$out2,$in5 1785 vcipherlast $out3,$out3,$in6 1786 vcipherlast $out4,$out4,$in7 1787 1788 le?vperm $out0,$out0,$out0,$inpperm 1789 le?vperm $out1,$out1,$out1,$inpperm 1790 stvx_u $out0,$x00,$out 1791 le?vperm $out2,$out2,$out2,$inpperm 1792 stvx_u $out1,$x10,$out 1793 le?vperm $out3,$out3,$out3,$inpperm 1794 stvx_u $out2,$x20,$out 1795 le?vperm $out4,$out4,$out4,$inpperm 1796 stvx_u $out3,$x30,$out 1797 stvx_u $out4,$x40,$out 1798 addi $out,$out,0x50 1799 b Lctr32_enc8x_done 1800 1801.align 5 1802Lctr32_enc8x_four: 1803 vcipherlast $out0,$out0,$in4 1804 vcipherlast $out1,$out1,$in5 1805 vcipherlast $out2,$out2,$in6 1806 vcipherlast $out3,$out3,$in7 1807 1808 le?vperm $out0,$out0,$out0,$inpperm 1809 le?vperm $out1,$out1,$out1,$inpperm 1810 stvx_u $out0,$x00,$out 1811 le?vperm $out2,$out2,$out2,$inpperm 1812 stvx_u $out1,$x10,$out 1813 le?vperm $out3,$out3,$out3,$inpperm 1814 stvx_u $out2,$x20,$out 1815 stvx_u $out3,$x30,$out 1816 addi $out,$out,0x40 1817 b Lctr32_enc8x_done 1818 1819.align 5 1820Lctr32_enc8x_three: 1821 vcipherlast $out0,$out0,$in5 1822 vcipherlast $out1,$out1,$in6 1823 vcipherlast $out2,$out2,$in7 1824 1825 le?vperm $out0,$out0,$out0,$inpperm 1826 le?vperm $out1,$out1,$out1,$inpperm 1827 stvx_u $out0,$x00,$out 1828 le?vperm $out2,$out2,$out2,$inpperm 1829 stvx_u $out1,$x10,$out 1830 stvx_u $out2,$x20,$out 1831 addi $out,$out,0x30 1832 b Lctr32_enc8x_done 1833 1834.align 5 1835Lctr32_enc8x_two: 1836 vcipherlast $out0,$out0,$in6 1837 vcipherlast $out1,$out1,$in7 1838 1839 le?vperm $out0,$out0,$out0,$inpperm 1840 le?vperm $out1,$out1,$out1,$inpperm 1841 stvx_u $out0,$x00,$out 1842 stvx_u $out1,$x10,$out 1843 addi $out,$out,0x20 1844 b Lctr32_enc8x_done 1845 1846.align 5 1847Lctr32_enc8x_one: 1848 vcipherlast $out0,$out0,$in7 1849 1850 le?vperm $out0,$out0,$out0,$inpperm 1851 stvx_u $out0,0,$out 1852 addi $out,$out,0x10 1853 1854Lctr32_enc8x_done: 1855 li r10,`$FRAME+15` 1856 li r11,`$FRAME+31` 1857 stvx $inpperm,r10,$sp # wipe copies of round keys 1858 addi r10,r10,32 1859 stvx $inpperm,r11,$sp 1860 addi r11,r11,32 1861 stvx $inpperm,r10,$sp 1862 addi r10,r10,32 1863 stvx $inpperm,r11,$sp 1864 addi r11,r11,32 1865 stvx $inpperm,r10,$sp 1866 addi r10,r10,32 1867 stvx $inpperm,r11,$sp 1868 addi r11,r11,32 1869 stvx $inpperm,r10,$sp 1870 addi r10,r10,32 1871 stvx $inpperm,r11,$sp 1872 addi r11,r11,32 1873 1874 mtspr 256,$vrsave 1875 lvx v20,r10,$sp # ABI says so 1876 addi r10,r10,32 1877 lvx v21,r11,$sp 1878 addi r11,r11,32 1879 lvx v22,r10,$sp 1880 addi r10,r10,32 1881 lvx v23,r11,$sp 1882 addi r11,r11,32 1883 lvx v24,r10,$sp 1884 addi r10,r10,32 1885 lvx v25,r11,$sp 1886 addi r11,r11,32 1887 lvx v26,r10,$sp 1888 addi r10,r10,32 1889 lvx v27,r11,$sp 1890 addi r11,r11,32 1891 lvx v28,r10,$sp 1892 addi r10,r10,32 1893 lvx v29,r11,$sp 1894 addi r11,r11,32 1895 lvx v30,r10,$sp 1896 lvx v31,r11,$sp 1897 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1898 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1899 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1900 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1901 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1902 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1903 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1904 blr 1905 .long 0 1906 .byte 0,12,0x04,0,0x80,6,6,0 1907 .long 0 1908.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1909___ 1910}} }}} 1911 1912######################################################################### 1913{{{ # XTS procedures # 1914# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # 1915# const AES_KEY *key1, const AES_KEY *key2, # 1916# [const] unsigned char iv[16]); # 1917# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # 1918# input tweak value is assumed to be encrypted already, and last tweak # 1919# value, one suitable for consecutive call on same chunk of data, is # 1920# written back to original buffer. In addition, in "tweak chaining" # 1921# mode only complete input blocks are processed. # 1922 1923my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); 1924my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); 1925my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); 1926my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); 1927my $taillen = $key2; 1928 1929 ($inp,$idx) = ($idx,$inp); # reassign 1930 1931$code.=<<___; 1932.globl .${prefix}_xts_encrypt 1933.align 5 1934.${prefix}_xts_encrypt: 1935 mr $inp,r3 # reassign 1936 li r3,-1 1937 ${UCMP}i $len,16 1938 bltlr- 1939 1940 lis r0,0xfff0 1941 mfspr r12,256 # save vrsave 1942 li r11,0 1943 mtspr 256,r0 1944 1945 vspltisb $seven,0x07 # 0x070707..07 1946 le?lvsl $leperm,r11,r11 1947 le?vspltisb $tmp,0x0f 1948 le?vxor $leperm,$leperm,$seven 1949 1950 li $idx,15 1951 lvx $tweak,0,$ivp # load [unaligned] iv 1952 lvsl $inpperm,0,$ivp 1953 lvx $inptail,$idx,$ivp 1954 le?vxor $inpperm,$inpperm,$tmp 1955 vperm $tweak,$tweak,$inptail,$inpperm 1956 1957 neg r11,$inp 1958 lvsr $inpperm,0,r11 # prepare for unaligned load 1959 lvx $inout,0,$inp 1960 addi $inp,$inp,15 # 15 is not typo 1961 le?vxor $inpperm,$inpperm,$tmp 1962 1963 ${UCMP}i $key2,0 # key2==NULL? 1964 beq Lxts_enc_no_key2 1965 1966 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 1967 lwz $rounds,240($key2) 1968 srwi $rounds,$rounds,1 1969 subi $rounds,$rounds,1 1970 li $idx,16 1971 1972 lvx $rndkey0,0,$key2 1973 lvx $rndkey1,$idx,$key2 1974 addi $idx,$idx,16 1975 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1976 vxor $tweak,$tweak,$rndkey0 1977 lvx $rndkey0,$idx,$key2 1978 addi $idx,$idx,16 1979 mtctr $rounds 1980 1981Ltweak_xts_enc: 1982 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1983 vcipher $tweak,$tweak,$rndkey1 1984 lvx $rndkey1,$idx,$key2 1985 addi $idx,$idx,16 1986 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1987 vcipher $tweak,$tweak,$rndkey0 1988 lvx $rndkey0,$idx,$key2 1989 addi $idx,$idx,16 1990 bdnz Ltweak_xts_enc 1991 1992 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1993 vcipher $tweak,$tweak,$rndkey1 1994 lvx $rndkey1,$idx,$key2 1995 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1996 vcipherlast $tweak,$tweak,$rndkey0 1997 1998 li $ivp,0 # don't chain the tweak 1999 b Lxts_enc 2000 2001Lxts_enc_no_key2: 2002 li $idx,-16 2003 and $len,$len,$idx # in "tweak chaining" 2004 # mode only complete 2005 # blocks are processed 2006Lxts_enc: 2007 lvx $inptail,0,$inp 2008 addi $inp,$inp,16 2009 2010 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2011 lwz $rounds,240($key1) 2012 srwi $rounds,$rounds,1 2013 subi $rounds,$rounds,1 2014 li $idx,16 2015 2016 vslb $eighty7,$seven,$seven # 0x808080..80 2017 vor $eighty7,$eighty7,$seven # 0x878787..87 2018 vspltisb $tmp,1 # 0x010101..01 2019 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2020 2021 ${UCMP}i $len,96 2022 bge _aesp8_xts_encrypt6x 2023 2024 andi. $taillen,$len,15 2025 subic r0,$len,32 2026 subi $taillen,$taillen,16 2027 subfe r0,r0,r0 2028 and r0,r0,$taillen 2029 add $inp,$inp,r0 2030 2031 lvx $rndkey0,0,$key1 2032 lvx $rndkey1,$idx,$key1 2033 addi $idx,$idx,16 2034 vperm $inout,$inout,$inptail,$inpperm 2035 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2036 vxor $inout,$inout,$tweak 2037 vxor $inout,$inout,$rndkey0 2038 lvx $rndkey0,$idx,$key1 2039 addi $idx,$idx,16 2040 mtctr $rounds 2041 b Loop_xts_enc 2042 2043.align 5 2044Loop_xts_enc: 2045 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2046 vcipher $inout,$inout,$rndkey1 2047 lvx $rndkey1,$idx,$key1 2048 addi $idx,$idx,16 2049 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2050 vcipher $inout,$inout,$rndkey0 2051 lvx $rndkey0,$idx,$key1 2052 addi $idx,$idx,16 2053 bdnz Loop_xts_enc 2054 2055 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2056 vcipher $inout,$inout,$rndkey1 2057 lvx $rndkey1,$idx,$key1 2058 li $idx,16 2059 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2060 vxor $rndkey0,$rndkey0,$tweak 2061 vcipherlast $output,$inout,$rndkey0 2062 2063 le?vperm $tmp,$output,$output,$leperm 2064 be?nop 2065 le?stvx_u $tmp,0,$out 2066 be?stvx_u $output,0,$out 2067 addi $out,$out,16 2068 2069 subic. $len,$len,16 2070 beq Lxts_enc_done 2071 2072 vmr $inout,$inptail 2073 lvx $inptail,0,$inp 2074 addi $inp,$inp,16 2075 lvx $rndkey0,0,$key1 2076 lvx $rndkey1,$idx,$key1 2077 addi $idx,$idx,16 2078 2079 subic r0,$len,32 2080 subfe r0,r0,r0 2081 and r0,r0,$taillen 2082 add $inp,$inp,r0 2083 2084 vsrab $tmp,$tweak,$seven # next tweak value 2085 vaddubm $tweak,$tweak,$tweak 2086 vsldoi $tmp,$tmp,$tmp,15 2087 vand $tmp,$tmp,$eighty7 2088 vxor $tweak,$tweak,$tmp 2089 2090 vperm $inout,$inout,$inptail,$inpperm 2091 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2092 vxor $inout,$inout,$tweak 2093 vxor $output,$output,$rndkey0 # just in case $len<16 2094 vxor $inout,$inout,$rndkey0 2095 lvx $rndkey0,$idx,$key1 2096 addi $idx,$idx,16 2097 2098 mtctr $rounds 2099 ${UCMP}i $len,16 2100 bge Loop_xts_enc 2101 2102 vxor $output,$output,$tweak 2103 lvsr $inpperm,0,$len # $inpperm is no longer needed 2104 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2105 vspltisb $tmp,-1 2106 vperm $inptail,$inptail,$tmp,$inpperm 2107 vsel $inout,$inout,$output,$inptail 2108 2109 subi r11,$out,17 2110 subi $out,$out,16 2111 mtctr $len 2112 li $len,16 2113Loop_xts_enc_steal: 2114 lbzu r0,1(r11) 2115 stb r0,16(r11) 2116 bdnz Loop_xts_enc_steal 2117 2118 mtctr $rounds 2119 b Loop_xts_enc # one more time... 2120 2121Lxts_enc_done: 2122 ${UCMP}i $ivp,0 2123 beq Lxts_enc_ret 2124 2125 vsrab $tmp,$tweak,$seven # next tweak value 2126 vaddubm $tweak,$tweak,$tweak 2127 vsldoi $tmp,$tmp,$tmp,15 2128 vand $tmp,$tmp,$eighty7 2129 vxor $tweak,$tweak,$tmp 2130 2131 le?vperm $tweak,$tweak,$tweak,$leperm 2132 stvx_u $tweak,0,$ivp 2133 2134Lxts_enc_ret: 2135 mtspr 256,r12 # restore vrsave 2136 li r3,0 2137 blr 2138 .long 0 2139 .byte 0,12,0x04,0,0x80,6,6,0 2140 .long 0 2141.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt 2142 2143.globl .${prefix}_xts_decrypt 2144.align 5 2145.${prefix}_xts_decrypt: 2146 mr $inp,r3 # reassign 2147 li r3,-1 2148 ${UCMP}i $len,16 2149 bltlr- 2150 2151 lis r0,0xfff8 2152 mfspr r12,256 # save vrsave 2153 li r11,0 2154 mtspr 256,r0 2155 2156 andi. r0,$len,15 2157 neg r0,r0 2158 andi. r0,r0,16 2159 sub $len,$len,r0 2160 2161 vspltisb $seven,0x07 # 0x070707..07 2162 le?lvsl $leperm,r11,r11 2163 le?vspltisb $tmp,0x0f 2164 le?vxor $leperm,$leperm,$seven 2165 2166 li $idx,15 2167 lvx $tweak,0,$ivp # load [unaligned] iv 2168 lvsl $inpperm,0,$ivp 2169 lvx $inptail,$idx,$ivp 2170 le?vxor $inpperm,$inpperm,$tmp 2171 vperm $tweak,$tweak,$inptail,$inpperm 2172 2173 neg r11,$inp 2174 lvsr $inpperm,0,r11 # prepare for unaligned load 2175 lvx $inout,0,$inp 2176 addi $inp,$inp,15 # 15 is not typo 2177 le?vxor $inpperm,$inpperm,$tmp 2178 2179 ${UCMP}i $key2,0 # key2==NULL? 2180 beq Lxts_dec_no_key2 2181 2182 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2183 lwz $rounds,240($key2) 2184 srwi $rounds,$rounds,1 2185 subi $rounds,$rounds,1 2186 li $idx,16 2187 2188 lvx $rndkey0,0,$key2 2189 lvx $rndkey1,$idx,$key2 2190 addi $idx,$idx,16 2191 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2192 vxor $tweak,$tweak,$rndkey0 2193 lvx $rndkey0,$idx,$key2 2194 addi $idx,$idx,16 2195 mtctr $rounds 2196 2197Ltweak_xts_dec: 2198 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2199 vcipher $tweak,$tweak,$rndkey1 2200 lvx $rndkey1,$idx,$key2 2201 addi $idx,$idx,16 2202 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2203 vcipher $tweak,$tweak,$rndkey0 2204 lvx $rndkey0,$idx,$key2 2205 addi $idx,$idx,16 2206 bdnz Ltweak_xts_dec 2207 2208 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2209 vcipher $tweak,$tweak,$rndkey1 2210 lvx $rndkey1,$idx,$key2 2211 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2212 vcipherlast $tweak,$tweak,$rndkey0 2213 2214 li $ivp,0 # don't chain the tweak 2215 b Lxts_dec 2216 2217Lxts_dec_no_key2: 2218 neg $idx,$len 2219 andi. $idx,$idx,15 2220 add $len,$len,$idx # in "tweak chaining" 2221 # mode only complete 2222 # blocks are processed 2223Lxts_dec: 2224 lvx $inptail,0,$inp 2225 addi $inp,$inp,16 2226 2227 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2228 lwz $rounds,240($key1) 2229 srwi $rounds,$rounds,1 2230 subi $rounds,$rounds,1 2231 li $idx,16 2232 2233 vslb $eighty7,$seven,$seven # 0x808080..80 2234 vor $eighty7,$eighty7,$seven # 0x878787..87 2235 vspltisb $tmp,1 # 0x010101..01 2236 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2237 2238 ${UCMP}i $len,96 2239 bge _aesp8_xts_decrypt6x 2240 2241 lvx $rndkey0,0,$key1 2242 lvx $rndkey1,$idx,$key1 2243 addi $idx,$idx,16 2244 vperm $inout,$inout,$inptail,$inpperm 2245 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2246 vxor $inout,$inout,$tweak 2247 vxor $inout,$inout,$rndkey0 2248 lvx $rndkey0,$idx,$key1 2249 addi $idx,$idx,16 2250 mtctr $rounds 2251 2252 ${UCMP}i $len,16 2253 blt Ltail_xts_dec 2254 be?b Loop_xts_dec 2255 2256.align 5 2257Loop_xts_dec: 2258 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2259 vncipher $inout,$inout,$rndkey1 2260 lvx $rndkey1,$idx,$key1 2261 addi $idx,$idx,16 2262 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2263 vncipher $inout,$inout,$rndkey0 2264 lvx $rndkey0,$idx,$key1 2265 addi $idx,$idx,16 2266 bdnz Loop_xts_dec 2267 2268 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2269 vncipher $inout,$inout,$rndkey1 2270 lvx $rndkey1,$idx,$key1 2271 li $idx,16 2272 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2273 vxor $rndkey0,$rndkey0,$tweak 2274 vncipherlast $output,$inout,$rndkey0 2275 2276 le?vperm $tmp,$output,$output,$leperm 2277 be?nop 2278 le?stvx_u $tmp,0,$out 2279 be?stvx_u $output,0,$out 2280 addi $out,$out,16 2281 2282 subic. $len,$len,16 2283 beq Lxts_dec_done 2284 2285 vmr $inout,$inptail 2286 lvx $inptail,0,$inp 2287 addi $inp,$inp,16 2288 lvx $rndkey0,0,$key1 2289 lvx $rndkey1,$idx,$key1 2290 addi $idx,$idx,16 2291 2292 vsrab $tmp,$tweak,$seven # next tweak value 2293 vaddubm $tweak,$tweak,$tweak 2294 vsldoi $tmp,$tmp,$tmp,15 2295 vand $tmp,$tmp,$eighty7 2296 vxor $tweak,$tweak,$tmp 2297 2298 vperm $inout,$inout,$inptail,$inpperm 2299 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2300 vxor $inout,$inout,$tweak 2301 vxor $inout,$inout,$rndkey0 2302 lvx $rndkey0,$idx,$key1 2303 addi $idx,$idx,16 2304 2305 mtctr $rounds 2306 ${UCMP}i $len,16 2307 bge Loop_xts_dec 2308 2309Ltail_xts_dec: 2310 vsrab $tmp,$tweak,$seven # next tweak value 2311 vaddubm $tweak1,$tweak,$tweak 2312 vsldoi $tmp,$tmp,$tmp,15 2313 vand $tmp,$tmp,$eighty7 2314 vxor $tweak1,$tweak1,$tmp 2315 2316 subi $inp,$inp,16 2317 add $inp,$inp,$len 2318 2319 vxor $inout,$inout,$tweak # :-( 2320 vxor $inout,$inout,$tweak1 # :-) 2321 2322Loop_xts_dec_short: 2323 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2324 vncipher $inout,$inout,$rndkey1 2325 lvx $rndkey1,$idx,$key1 2326 addi $idx,$idx,16 2327 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2328 vncipher $inout,$inout,$rndkey0 2329 lvx $rndkey0,$idx,$key1 2330 addi $idx,$idx,16 2331 bdnz Loop_xts_dec_short 2332 2333 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2334 vncipher $inout,$inout,$rndkey1 2335 lvx $rndkey1,$idx,$key1 2336 li $idx,16 2337 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2338 vxor $rndkey0,$rndkey0,$tweak1 2339 vncipherlast $output,$inout,$rndkey0 2340 2341 le?vperm $tmp,$output,$output,$leperm 2342 be?nop 2343 le?stvx_u $tmp,0,$out 2344 be?stvx_u $output,0,$out 2345 2346 vmr $inout,$inptail 2347 lvx $inptail,0,$inp 2348 #addi $inp,$inp,16 2349 lvx $rndkey0,0,$key1 2350 lvx $rndkey1,$idx,$key1 2351 addi $idx,$idx,16 2352 vperm $inout,$inout,$inptail,$inpperm 2353 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2354 2355 lvsr $inpperm,0,$len # $inpperm is no longer needed 2356 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2357 vspltisb $tmp,-1 2358 vperm $inptail,$inptail,$tmp,$inpperm 2359 vsel $inout,$inout,$output,$inptail 2360 2361 vxor $rndkey0,$rndkey0,$tweak 2362 vxor $inout,$inout,$rndkey0 2363 lvx $rndkey0,$idx,$key1 2364 addi $idx,$idx,16 2365 2366 subi r11,$out,1 2367 mtctr $len 2368 li $len,16 2369Loop_xts_dec_steal: 2370 lbzu r0,1(r11) 2371 stb r0,16(r11) 2372 bdnz Loop_xts_dec_steal 2373 2374 mtctr $rounds 2375 b Loop_xts_dec # one more time... 2376 2377Lxts_dec_done: 2378 ${UCMP}i $ivp,0 2379 beq Lxts_dec_ret 2380 2381 vsrab $tmp,$tweak,$seven # next tweak value 2382 vaddubm $tweak,$tweak,$tweak 2383 vsldoi $tmp,$tmp,$tmp,15 2384 vand $tmp,$tmp,$eighty7 2385 vxor $tweak,$tweak,$tmp 2386 2387 le?vperm $tweak,$tweak,$tweak,$leperm 2388 stvx_u $tweak,0,$ivp 2389 2390Lxts_dec_ret: 2391 mtspr 256,r12 # restore vrsave 2392 li r3,0 2393 blr 2394 .long 0 2395 .byte 0,12,0x04,0,0x80,6,6,0 2396 .long 0 2397.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt 2398___ 2399######################################################################### 2400{{ # Optimized XTS procedures # 2401my $key_=$key2; 2402my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); 2403 $x00=0 if ($flavour =~ /osx/); 2404my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); 2405my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); 2406my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); 2407my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 2408 # v26-v31 last 6 round keys 2409my ($keyperm)=($out0); # aliases with "caller", redundant assignment 2410my $taillen=$x70; 2411 2412$code.=<<___; 2413.align 5 2414_aesp8_xts_encrypt6x: 2415 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 2416 mflr r11 2417 li r7,`$FRAME+8*16+15` 2418 li r3,`$FRAME+8*16+31` 2419 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 2420 stvx v20,r7,$sp # ABI says so 2421 addi r7,r7,32 2422 stvx v21,r3,$sp 2423 addi r3,r3,32 2424 stvx v22,r7,$sp 2425 addi r7,r7,32 2426 stvx v23,r3,$sp 2427 addi r3,r3,32 2428 stvx v24,r7,$sp 2429 addi r7,r7,32 2430 stvx v25,r3,$sp 2431 addi r3,r3,32 2432 stvx v26,r7,$sp 2433 addi r7,r7,32 2434 stvx v27,r3,$sp 2435 addi r3,r3,32 2436 stvx v28,r7,$sp 2437 addi r7,r7,32 2438 stvx v29,r3,$sp 2439 addi r3,r3,32 2440 stvx v30,r7,$sp 2441 stvx v31,r3,$sp 2442 li r0,-1 2443 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 2444 li $x10,0x10 2445 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2446 li $x20,0x20 2447 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2448 li $x30,0x30 2449 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2450 li $x40,0x40 2451 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2452 li $x50,0x50 2453 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2454 li $x60,0x60 2455 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2456 li $x70,0x70 2457 mtspr 256,r0 2458 2459 subi $rounds,$rounds,3 # -4 in total 2460 2461 lvx $rndkey0,$x00,$key1 # load key schedule 2462 lvx v30,$x10,$key1 2463 addi $key1,$key1,0x20 2464 lvx v31,$x00,$key1 2465 ?vperm $rndkey0,$rndkey0,v30,$keyperm 2466 addi $key_,$sp,$FRAME+15 2467 mtctr $rounds 2468 2469Load_xts_enc_key: 2470 ?vperm v24,v30,v31,$keyperm 2471 lvx v30,$x10,$key1 2472 addi $key1,$key1,0x20 2473 stvx v24,$x00,$key_ # off-load round[1] 2474 ?vperm v25,v31,v30,$keyperm 2475 lvx v31,$x00,$key1 2476 stvx v25,$x10,$key_ # off-load round[2] 2477 addi $key_,$key_,0x20 2478 bdnz Load_xts_enc_key 2479 2480 lvx v26,$x10,$key1 2481 ?vperm v24,v30,v31,$keyperm 2482 lvx v27,$x20,$key1 2483 stvx v24,$x00,$key_ # off-load round[3] 2484 ?vperm v25,v31,v26,$keyperm 2485 lvx v28,$x30,$key1 2486 stvx v25,$x10,$key_ # off-load round[4] 2487 addi $key_,$sp,$FRAME+15 # rewind $key_ 2488 ?vperm v26,v26,v27,$keyperm 2489 lvx v29,$x40,$key1 2490 ?vperm v27,v27,v28,$keyperm 2491 lvx v30,$x50,$key1 2492 ?vperm v28,v28,v29,$keyperm 2493 lvx v31,$x60,$key1 2494 ?vperm v29,v29,v30,$keyperm 2495 lvx $twk5,$x70,$key1 # borrow $twk5 2496 ?vperm v30,v30,v31,$keyperm 2497 lvx v24,$x00,$key_ # pre-load round[1] 2498 ?vperm v31,v31,$twk5,$keyperm 2499 lvx v25,$x10,$key_ # pre-load round[2] 2500 2501 vperm $in0,$inout,$inptail,$inpperm 2502 subi $inp,$inp,31 # undo "caller" 2503 vxor $twk0,$tweak,$rndkey0 2504 vsrab $tmp,$tweak,$seven # next tweak value 2505 vaddubm $tweak,$tweak,$tweak 2506 vsldoi $tmp,$tmp,$tmp,15 2507 vand $tmp,$tmp,$eighty7 2508 vxor $out0,$in0,$twk0 2509 vxor $tweak,$tweak,$tmp 2510 2511 lvx_u $in1,$x10,$inp 2512 vxor $twk1,$tweak,$rndkey0 2513 vsrab $tmp,$tweak,$seven # next tweak value 2514 vaddubm $tweak,$tweak,$tweak 2515 vsldoi $tmp,$tmp,$tmp,15 2516 le?vperm $in1,$in1,$in1,$leperm 2517 vand $tmp,$tmp,$eighty7 2518 vxor $out1,$in1,$twk1 2519 vxor $tweak,$tweak,$tmp 2520 2521 lvx_u $in2,$x20,$inp 2522 andi. $taillen,$len,15 2523 vxor $twk2,$tweak,$rndkey0 2524 vsrab $tmp,$tweak,$seven # next tweak value 2525 vaddubm $tweak,$tweak,$tweak 2526 vsldoi $tmp,$tmp,$tmp,15 2527 le?vperm $in2,$in2,$in2,$leperm 2528 vand $tmp,$tmp,$eighty7 2529 vxor $out2,$in2,$twk2 2530 vxor $tweak,$tweak,$tmp 2531 2532 lvx_u $in3,$x30,$inp 2533 sub $len,$len,$taillen 2534 vxor $twk3,$tweak,$rndkey0 2535 vsrab $tmp,$tweak,$seven # next tweak value 2536 vaddubm $tweak,$tweak,$tweak 2537 vsldoi $tmp,$tmp,$tmp,15 2538 le?vperm $in3,$in3,$in3,$leperm 2539 vand $tmp,$tmp,$eighty7 2540 vxor $out3,$in3,$twk3 2541 vxor $tweak,$tweak,$tmp 2542 2543 lvx_u $in4,$x40,$inp 2544 subi $len,$len,0x60 2545 vxor $twk4,$tweak,$rndkey0 2546 vsrab $tmp,$tweak,$seven # next tweak value 2547 vaddubm $tweak,$tweak,$tweak 2548 vsldoi $tmp,$tmp,$tmp,15 2549 le?vperm $in4,$in4,$in4,$leperm 2550 vand $tmp,$tmp,$eighty7 2551 vxor $out4,$in4,$twk4 2552 vxor $tweak,$tweak,$tmp 2553 2554 lvx_u $in5,$x50,$inp 2555 addi $inp,$inp,0x60 2556 vxor $twk5,$tweak,$rndkey0 2557 vsrab $tmp,$tweak,$seven # next tweak value 2558 vaddubm $tweak,$tweak,$tweak 2559 vsldoi $tmp,$tmp,$tmp,15 2560 le?vperm $in5,$in5,$in5,$leperm 2561 vand $tmp,$tmp,$eighty7 2562 vxor $out5,$in5,$twk5 2563 vxor $tweak,$tweak,$tmp 2564 2565 vxor v31,v31,$rndkey0 2566 mtctr $rounds 2567 b Loop_xts_enc6x 2568 2569.align 5 2570Loop_xts_enc6x: 2571 vcipher $out0,$out0,v24 2572 vcipher $out1,$out1,v24 2573 vcipher $out2,$out2,v24 2574 vcipher $out3,$out3,v24 2575 vcipher $out4,$out4,v24 2576 vcipher $out5,$out5,v24 2577 lvx v24,$x20,$key_ # round[3] 2578 addi $key_,$key_,0x20 2579 2580 vcipher $out0,$out0,v25 2581 vcipher $out1,$out1,v25 2582 vcipher $out2,$out2,v25 2583 vcipher $out3,$out3,v25 2584 vcipher $out4,$out4,v25 2585 vcipher $out5,$out5,v25 2586 lvx v25,$x10,$key_ # round[4] 2587 bdnz Loop_xts_enc6x 2588 2589 subic $len,$len,96 # $len-=96 2590 vxor $in0,$twk0,v31 # xor with last round key 2591 vcipher $out0,$out0,v24 2592 vcipher $out1,$out1,v24 2593 vsrab $tmp,$tweak,$seven # next tweak value 2594 vxor $twk0,$tweak,$rndkey0 2595 vaddubm $tweak,$tweak,$tweak 2596 vcipher $out2,$out2,v24 2597 vcipher $out3,$out3,v24 2598 vsldoi $tmp,$tmp,$tmp,15 2599 vcipher $out4,$out4,v24 2600 vcipher $out5,$out5,v24 2601 2602 subfe. r0,r0,r0 # borrow?-1:0 2603 vand $tmp,$tmp,$eighty7 2604 vcipher $out0,$out0,v25 2605 vcipher $out1,$out1,v25 2606 vxor $tweak,$tweak,$tmp 2607 vcipher $out2,$out2,v25 2608 vcipher $out3,$out3,v25 2609 vxor $in1,$twk1,v31 2610 vsrab $tmp,$tweak,$seven # next tweak value 2611 vxor $twk1,$tweak,$rndkey0 2612 vcipher $out4,$out4,v25 2613 vcipher $out5,$out5,v25 2614 2615 and r0,r0,$len 2616 vaddubm $tweak,$tweak,$tweak 2617 vsldoi $tmp,$tmp,$tmp,15 2618 vcipher $out0,$out0,v26 2619 vcipher $out1,$out1,v26 2620 vand $tmp,$tmp,$eighty7 2621 vcipher $out2,$out2,v26 2622 vcipher $out3,$out3,v26 2623 vxor $tweak,$tweak,$tmp 2624 vcipher $out4,$out4,v26 2625 vcipher $out5,$out5,v26 2626 2627 add $inp,$inp,r0 # $inp is adjusted in such 2628 # way that at exit from the 2629 # loop inX-in5 are loaded 2630 # with last "words" 2631 vxor $in2,$twk2,v31 2632 vsrab $tmp,$tweak,$seven # next tweak value 2633 vxor $twk2,$tweak,$rndkey0 2634 vaddubm $tweak,$tweak,$tweak 2635 vcipher $out0,$out0,v27 2636 vcipher $out1,$out1,v27 2637 vsldoi $tmp,$tmp,$tmp,15 2638 vcipher $out2,$out2,v27 2639 vcipher $out3,$out3,v27 2640 vand $tmp,$tmp,$eighty7 2641 vcipher $out4,$out4,v27 2642 vcipher $out5,$out5,v27 2643 2644 addi $key_,$sp,$FRAME+15 # rewind $key_ 2645 vxor $tweak,$tweak,$tmp 2646 vcipher $out0,$out0,v28 2647 vcipher $out1,$out1,v28 2648 vxor $in3,$twk3,v31 2649 vsrab $tmp,$tweak,$seven # next tweak value 2650 vxor $twk3,$tweak,$rndkey0 2651 vcipher $out2,$out2,v28 2652 vcipher $out3,$out3,v28 2653 vaddubm $tweak,$tweak,$tweak 2654 vsldoi $tmp,$tmp,$tmp,15 2655 vcipher $out4,$out4,v28 2656 vcipher $out5,$out5,v28 2657 lvx v24,$x00,$key_ # re-pre-load round[1] 2658 vand $tmp,$tmp,$eighty7 2659 2660 vcipher $out0,$out0,v29 2661 vcipher $out1,$out1,v29 2662 vxor $tweak,$tweak,$tmp 2663 vcipher $out2,$out2,v29 2664 vcipher $out3,$out3,v29 2665 vxor $in4,$twk4,v31 2666 vsrab $tmp,$tweak,$seven # next tweak value 2667 vxor $twk4,$tweak,$rndkey0 2668 vcipher $out4,$out4,v29 2669 vcipher $out5,$out5,v29 2670 lvx v25,$x10,$key_ # re-pre-load round[2] 2671 vaddubm $tweak,$tweak,$tweak 2672 vsldoi $tmp,$tmp,$tmp,15 2673 2674 vcipher $out0,$out0,v30 2675 vcipher $out1,$out1,v30 2676 vand $tmp,$tmp,$eighty7 2677 vcipher $out2,$out2,v30 2678 vcipher $out3,$out3,v30 2679 vxor $tweak,$tweak,$tmp 2680 vcipher $out4,$out4,v30 2681 vcipher $out5,$out5,v30 2682 vxor $in5,$twk5,v31 2683 vsrab $tmp,$tweak,$seven # next tweak value 2684 vxor $twk5,$tweak,$rndkey0 2685 2686 vcipherlast $out0,$out0,$in0 2687 lvx_u $in0,$x00,$inp # load next input block 2688 vaddubm $tweak,$tweak,$tweak 2689 vsldoi $tmp,$tmp,$tmp,15 2690 vcipherlast $out1,$out1,$in1 2691 lvx_u $in1,$x10,$inp 2692 vcipherlast $out2,$out2,$in2 2693 le?vperm $in0,$in0,$in0,$leperm 2694 lvx_u $in2,$x20,$inp 2695 vand $tmp,$tmp,$eighty7 2696 vcipherlast $out3,$out3,$in3 2697 le?vperm $in1,$in1,$in1,$leperm 2698 lvx_u $in3,$x30,$inp 2699 vcipherlast $out4,$out4,$in4 2700 le?vperm $in2,$in2,$in2,$leperm 2701 lvx_u $in4,$x40,$inp 2702 vxor $tweak,$tweak,$tmp 2703 vcipherlast $tmp,$out5,$in5 # last block might be needed 2704 # in stealing mode 2705 le?vperm $in3,$in3,$in3,$leperm 2706 lvx_u $in5,$x50,$inp 2707 addi $inp,$inp,0x60 2708 le?vperm $in4,$in4,$in4,$leperm 2709 le?vperm $in5,$in5,$in5,$leperm 2710 2711 le?vperm $out0,$out0,$out0,$leperm 2712 le?vperm $out1,$out1,$out1,$leperm 2713 stvx_u $out0,$x00,$out # store output 2714 vxor $out0,$in0,$twk0 2715 le?vperm $out2,$out2,$out2,$leperm 2716 stvx_u $out1,$x10,$out 2717 vxor $out1,$in1,$twk1 2718 le?vperm $out3,$out3,$out3,$leperm 2719 stvx_u $out2,$x20,$out 2720 vxor $out2,$in2,$twk2 2721 le?vperm $out4,$out4,$out4,$leperm 2722 stvx_u $out3,$x30,$out 2723 vxor $out3,$in3,$twk3 2724 le?vperm $out5,$tmp,$tmp,$leperm 2725 stvx_u $out4,$x40,$out 2726 vxor $out4,$in4,$twk4 2727 le?stvx_u $out5,$x50,$out 2728 be?stvx_u $tmp, $x50,$out 2729 vxor $out5,$in5,$twk5 2730 addi $out,$out,0x60 2731 2732 mtctr $rounds 2733 beq Loop_xts_enc6x # did $len-=96 borrow? 2734 2735 addic. $len,$len,0x60 2736 beq Lxts_enc6x_zero 2737 cmpwi $len,0x20 2738 blt Lxts_enc6x_one 2739 nop 2740 beq Lxts_enc6x_two 2741 cmpwi $len,0x40 2742 blt Lxts_enc6x_three 2743 nop 2744 beq Lxts_enc6x_four 2745 2746Lxts_enc6x_five: 2747 vxor $out0,$in1,$twk0 2748 vxor $out1,$in2,$twk1 2749 vxor $out2,$in3,$twk2 2750 vxor $out3,$in4,$twk3 2751 vxor $out4,$in5,$twk4 2752 2753 bl _aesp8_xts_enc5x 2754 2755 le?vperm $out0,$out0,$out0,$leperm 2756 vmr $twk0,$twk5 # unused tweak 2757 le?vperm $out1,$out1,$out1,$leperm 2758 stvx_u $out0,$x00,$out # store output 2759 le?vperm $out2,$out2,$out2,$leperm 2760 stvx_u $out1,$x10,$out 2761 le?vperm $out3,$out3,$out3,$leperm 2762 stvx_u $out2,$x20,$out 2763 vxor $tmp,$out4,$twk5 # last block prep for stealing 2764 le?vperm $out4,$out4,$out4,$leperm 2765 stvx_u $out3,$x30,$out 2766 stvx_u $out4,$x40,$out 2767 addi $out,$out,0x50 2768 bne Lxts_enc6x_steal 2769 b Lxts_enc6x_done 2770 2771.align 4 2772Lxts_enc6x_four: 2773 vxor $out0,$in2,$twk0 2774 vxor $out1,$in3,$twk1 2775 vxor $out2,$in4,$twk2 2776 vxor $out3,$in5,$twk3 2777 vxor $out4,$out4,$out4 2778 2779 bl _aesp8_xts_enc5x 2780 2781 le?vperm $out0,$out0,$out0,$leperm 2782 vmr $twk0,$twk4 # unused tweak 2783 le?vperm $out1,$out1,$out1,$leperm 2784 stvx_u $out0,$x00,$out # store output 2785 le?vperm $out2,$out2,$out2,$leperm 2786 stvx_u $out1,$x10,$out 2787 vxor $tmp,$out3,$twk4 # last block prep for stealing 2788 le?vperm $out3,$out3,$out3,$leperm 2789 stvx_u $out2,$x20,$out 2790 stvx_u $out3,$x30,$out 2791 addi $out,$out,0x40 2792 bne Lxts_enc6x_steal 2793 b Lxts_enc6x_done 2794 2795.align 4 2796Lxts_enc6x_three: 2797 vxor $out0,$in3,$twk0 2798 vxor $out1,$in4,$twk1 2799 vxor $out2,$in5,$twk2 2800 vxor $out3,$out3,$out3 2801 vxor $out4,$out4,$out4 2802 2803 bl _aesp8_xts_enc5x 2804 2805 le?vperm $out0,$out0,$out0,$leperm 2806 vmr $twk0,$twk3 # unused tweak 2807 le?vperm $out1,$out1,$out1,$leperm 2808 stvx_u $out0,$x00,$out # store output 2809 vxor $tmp,$out2,$twk3 # last block prep for stealing 2810 le?vperm $out2,$out2,$out2,$leperm 2811 stvx_u $out1,$x10,$out 2812 stvx_u $out2,$x20,$out 2813 addi $out,$out,0x30 2814 bne Lxts_enc6x_steal 2815 b Lxts_enc6x_done 2816 2817.align 4 2818Lxts_enc6x_two: 2819 vxor $out0,$in4,$twk0 2820 vxor $out1,$in5,$twk1 2821 vxor $out2,$out2,$out2 2822 vxor $out3,$out3,$out3 2823 vxor $out4,$out4,$out4 2824 2825 bl _aesp8_xts_enc5x 2826 2827 le?vperm $out0,$out0,$out0,$leperm 2828 vmr $twk0,$twk2 # unused tweak 2829 vxor $tmp,$out1,$twk2 # last block prep for stealing 2830 le?vperm $out1,$out1,$out1,$leperm 2831 stvx_u $out0,$x00,$out # store output 2832 stvx_u $out1,$x10,$out 2833 addi $out,$out,0x20 2834 bne Lxts_enc6x_steal 2835 b Lxts_enc6x_done 2836 2837.align 4 2838Lxts_enc6x_one: 2839 vxor $out0,$in5,$twk0 2840 nop 2841Loop_xts_enc1x: 2842 vcipher $out0,$out0,v24 2843 lvx v24,$x20,$key_ # round[3] 2844 addi $key_,$key_,0x20 2845 2846 vcipher $out0,$out0,v25 2847 lvx v25,$x10,$key_ # round[4] 2848 bdnz Loop_xts_enc1x 2849 2850 add $inp,$inp,$taillen 2851 cmpwi $taillen,0 2852 vcipher $out0,$out0,v24 2853 2854 subi $inp,$inp,16 2855 vcipher $out0,$out0,v25 2856 2857 lvsr $inpperm,0,$taillen 2858 vcipher $out0,$out0,v26 2859 2860 lvx_u $in0,0,$inp 2861 vcipher $out0,$out0,v27 2862 2863 addi $key_,$sp,$FRAME+15 # rewind $key_ 2864 vcipher $out0,$out0,v28 2865 lvx v24,$x00,$key_ # re-pre-load round[1] 2866 2867 vcipher $out0,$out0,v29 2868 lvx v25,$x10,$key_ # re-pre-load round[2] 2869 vxor $twk0,$twk0,v31 2870 2871 le?vperm $in0,$in0,$in0,$leperm 2872 vcipher $out0,$out0,v30 2873 2874 vperm $in0,$in0,$in0,$inpperm 2875 vcipherlast $out0,$out0,$twk0 2876 2877 vmr $twk0,$twk1 # unused tweak 2878 vxor $tmp,$out0,$twk1 # last block prep for stealing 2879 le?vperm $out0,$out0,$out0,$leperm 2880 stvx_u $out0,$x00,$out # store output 2881 addi $out,$out,0x10 2882 bne Lxts_enc6x_steal 2883 b Lxts_enc6x_done 2884 2885.align 4 2886Lxts_enc6x_zero: 2887 cmpwi $taillen,0 2888 beq Lxts_enc6x_done 2889 2890 add $inp,$inp,$taillen 2891 subi $inp,$inp,16 2892 lvx_u $in0,0,$inp 2893 lvsr $inpperm,0,$taillen # $in5 is no more 2894 le?vperm $in0,$in0,$in0,$leperm 2895 vperm $in0,$in0,$in0,$inpperm 2896 vxor $tmp,$tmp,$twk0 2897Lxts_enc6x_steal: 2898 vxor $in0,$in0,$twk0 2899 vxor $out0,$out0,$out0 2900 vspltisb $out1,-1 2901 vperm $out0,$out0,$out1,$inpperm 2902 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? 2903 2904 subi r30,$out,17 2905 subi $out,$out,16 2906 mtctr $taillen 2907Loop_xts_enc6x_steal: 2908 lbzu r0,1(r30) 2909 stb r0,16(r30) 2910 bdnz Loop_xts_enc6x_steal 2911 2912 li $taillen,0 2913 mtctr $rounds 2914 b Loop_xts_enc1x # one more time... 2915 2916.align 4 2917Lxts_enc6x_done: 2918 ${UCMP}i $ivp,0 2919 beq Lxts_enc6x_ret 2920 2921 vxor $tweak,$twk0,$rndkey0 2922 le?vperm $tweak,$tweak,$tweak,$leperm 2923 stvx_u $tweak,0,$ivp 2924 2925Lxts_enc6x_ret: 2926 mtlr r11 2927 li r10,`$FRAME+15` 2928 li r11,`$FRAME+31` 2929 stvx $seven,r10,$sp # wipe copies of round keys 2930 addi r10,r10,32 2931 stvx $seven,r11,$sp 2932 addi r11,r11,32 2933 stvx $seven,r10,$sp 2934 addi r10,r10,32 2935 stvx $seven,r11,$sp 2936 addi r11,r11,32 2937 stvx $seven,r10,$sp 2938 addi r10,r10,32 2939 stvx $seven,r11,$sp 2940 addi r11,r11,32 2941 stvx $seven,r10,$sp 2942 addi r10,r10,32 2943 stvx $seven,r11,$sp 2944 addi r11,r11,32 2945 2946 mtspr 256,$vrsave 2947 lvx v20,r10,$sp # ABI says so 2948 addi r10,r10,32 2949 lvx v21,r11,$sp 2950 addi r11,r11,32 2951 lvx v22,r10,$sp 2952 addi r10,r10,32 2953 lvx v23,r11,$sp 2954 addi r11,r11,32 2955 lvx v24,r10,$sp 2956 addi r10,r10,32 2957 lvx v25,r11,$sp 2958 addi r11,r11,32 2959 lvx v26,r10,$sp 2960 addi r10,r10,32 2961 lvx v27,r11,$sp 2962 addi r11,r11,32 2963 lvx v28,r10,$sp 2964 addi r10,r10,32 2965 lvx v29,r11,$sp 2966 addi r11,r11,32 2967 lvx v30,r10,$sp 2968 lvx v31,r11,$sp 2969 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2970 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2971 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2972 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2973 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2974 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2975 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 2976 blr 2977 .long 0 2978 .byte 0,12,0x04,1,0x80,6,6,0 2979 .long 0 2980 2981.align 5 2982_aesp8_xts_enc5x: 2983 vcipher $out0,$out0,v24 2984 vcipher $out1,$out1,v24 2985 vcipher $out2,$out2,v24 2986 vcipher $out3,$out3,v24 2987 vcipher $out4,$out4,v24 2988 lvx v24,$x20,$key_ # round[3] 2989 addi $key_,$key_,0x20 2990 2991 vcipher $out0,$out0,v25 2992 vcipher $out1,$out1,v25 2993 vcipher $out2,$out2,v25 2994 vcipher $out3,$out3,v25 2995 vcipher $out4,$out4,v25 2996 lvx v25,$x10,$key_ # round[4] 2997 bdnz _aesp8_xts_enc5x 2998 2999 add $inp,$inp,$taillen 3000 cmpwi $taillen,0 3001 vcipher $out0,$out0,v24 3002 vcipher $out1,$out1,v24 3003 vcipher $out2,$out2,v24 3004 vcipher $out3,$out3,v24 3005 vcipher $out4,$out4,v24 3006 3007 subi $inp,$inp,16 3008 vcipher $out0,$out0,v25 3009 vcipher $out1,$out1,v25 3010 vcipher $out2,$out2,v25 3011 vcipher $out3,$out3,v25 3012 vcipher $out4,$out4,v25 3013 vxor $twk0,$twk0,v31 3014 3015 vcipher $out0,$out0,v26 3016 lvsr $inpperm,0,$taillen # $in5 is no more 3017 vcipher $out1,$out1,v26 3018 vcipher $out2,$out2,v26 3019 vcipher $out3,$out3,v26 3020 vcipher $out4,$out4,v26 3021 vxor $in1,$twk1,v31 3022 3023 vcipher $out0,$out0,v27 3024 lvx_u $in0,0,$inp 3025 vcipher $out1,$out1,v27 3026 vcipher $out2,$out2,v27 3027 vcipher $out3,$out3,v27 3028 vcipher $out4,$out4,v27 3029 vxor $in2,$twk2,v31 3030 3031 addi $key_,$sp,$FRAME+15 # rewind $key_ 3032 vcipher $out0,$out0,v28 3033 vcipher $out1,$out1,v28 3034 vcipher $out2,$out2,v28 3035 vcipher $out3,$out3,v28 3036 vcipher $out4,$out4,v28 3037 lvx v24,$x00,$key_ # re-pre-load round[1] 3038 vxor $in3,$twk3,v31 3039 3040 vcipher $out0,$out0,v29 3041 le?vperm $in0,$in0,$in0,$leperm 3042 vcipher $out1,$out1,v29 3043 vcipher $out2,$out2,v29 3044 vcipher $out3,$out3,v29 3045 vcipher $out4,$out4,v29 3046 lvx v25,$x10,$key_ # re-pre-load round[2] 3047 vxor $in4,$twk4,v31 3048 3049 vcipher $out0,$out0,v30 3050 vperm $in0,$in0,$in0,$inpperm 3051 vcipher $out1,$out1,v30 3052 vcipher $out2,$out2,v30 3053 vcipher $out3,$out3,v30 3054 vcipher $out4,$out4,v30 3055 3056 vcipherlast $out0,$out0,$twk0 3057 vcipherlast $out1,$out1,$in1 3058 vcipherlast $out2,$out2,$in2 3059 vcipherlast $out3,$out3,$in3 3060 vcipherlast $out4,$out4,$in4 3061 blr 3062 .long 0 3063 .byte 0,12,0x14,0,0,0,0,0 3064 3065.align 5 3066_aesp8_xts_decrypt6x: 3067 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 3068 mflr r11 3069 li r7,`$FRAME+8*16+15` 3070 li r3,`$FRAME+8*16+31` 3071 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 3072 stvx v20,r7,$sp # ABI says so 3073 addi r7,r7,32 3074 stvx v21,r3,$sp 3075 addi r3,r3,32 3076 stvx v22,r7,$sp 3077 addi r7,r7,32 3078 stvx v23,r3,$sp 3079 addi r3,r3,32 3080 stvx v24,r7,$sp 3081 addi r7,r7,32 3082 stvx v25,r3,$sp 3083 addi r3,r3,32 3084 stvx v26,r7,$sp 3085 addi r7,r7,32 3086 stvx v27,r3,$sp 3087 addi r3,r3,32 3088 stvx v28,r7,$sp 3089 addi r7,r7,32 3090 stvx v29,r3,$sp 3091 addi r3,r3,32 3092 stvx v30,r7,$sp 3093 stvx v31,r3,$sp 3094 li r0,-1 3095 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 3096 li $x10,0x10 3097 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3098 li $x20,0x20 3099 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3100 li $x30,0x30 3101 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3102 li $x40,0x40 3103 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3104 li $x50,0x50 3105 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3106 li $x60,0x60 3107 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3108 li $x70,0x70 3109 mtspr 256,r0 3110 3111 subi $rounds,$rounds,3 # -4 in total 3112 3113 lvx $rndkey0,$x00,$key1 # load key schedule 3114 lvx v30,$x10,$key1 3115 addi $key1,$key1,0x20 3116 lvx v31,$x00,$key1 3117 ?vperm $rndkey0,$rndkey0,v30,$keyperm 3118 addi $key_,$sp,$FRAME+15 3119 mtctr $rounds 3120 3121Load_xts_dec_key: 3122 ?vperm v24,v30,v31,$keyperm 3123 lvx v30,$x10,$key1 3124 addi $key1,$key1,0x20 3125 stvx v24,$x00,$key_ # off-load round[1] 3126 ?vperm v25,v31,v30,$keyperm 3127 lvx v31,$x00,$key1 3128 stvx v25,$x10,$key_ # off-load round[2] 3129 addi $key_,$key_,0x20 3130 bdnz Load_xts_dec_key 3131 3132 lvx v26,$x10,$key1 3133 ?vperm v24,v30,v31,$keyperm 3134 lvx v27,$x20,$key1 3135 stvx v24,$x00,$key_ # off-load round[3] 3136 ?vperm v25,v31,v26,$keyperm 3137 lvx v28,$x30,$key1 3138 stvx v25,$x10,$key_ # off-load round[4] 3139 addi $key_,$sp,$FRAME+15 # rewind $key_ 3140 ?vperm v26,v26,v27,$keyperm 3141 lvx v29,$x40,$key1 3142 ?vperm v27,v27,v28,$keyperm 3143 lvx v30,$x50,$key1 3144 ?vperm v28,v28,v29,$keyperm 3145 lvx v31,$x60,$key1 3146 ?vperm v29,v29,v30,$keyperm 3147 lvx $twk5,$x70,$key1 # borrow $twk5 3148 ?vperm v30,v30,v31,$keyperm 3149 lvx v24,$x00,$key_ # pre-load round[1] 3150 ?vperm v31,v31,$twk5,$keyperm 3151 lvx v25,$x10,$key_ # pre-load round[2] 3152 3153 vperm $in0,$inout,$inptail,$inpperm 3154 subi $inp,$inp,31 # undo "caller" 3155 vxor $twk0,$tweak,$rndkey0 3156 vsrab $tmp,$tweak,$seven # next tweak value 3157 vaddubm $tweak,$tweak,$tweak 3158 vsldoi $tmp,$tmp,$tmp,15 3159 vand $tmp,$tmp,$eighty7 3160 vxor $out0,$in0,$twk0 3161 vxor $tweak,$tweak,$tmp 3162 3163 lvx_u $in1,$x10,$inp 3164 vxor $twk1,$tweak,$rndkey0 3165 vsrab $tmp,$tweak,$seven # next tweak value 3166 vaddubm $tweak,$tweak,$tweak 3167 vsldoi $tmp,$tmp,$tmp,15 3168 le?vperm $in1,$in1,$in1,$leperm 3169 vand $tmp,$tmp,$eighty7 3170 vxor $out1,$in1,$twk1 3171 vxor $tweak,$tweak,$tmp 3172 3173 lvx_u $in2,$x20,$inp 3174 andi. $taillen,$len,15 3175 vxor $twk2,$tweak,$rndkey0 3176 vsrab $tmp,$tweak,$seven # next tweak value 3177 vaddubm $tweak,$tweak,$tweak 3178 vsldoi $tmp,$tmp,$tmp,15 3179 le?vperm $in2,$in2,$in2,$leperm 3180 vand $tmp,$tmp,$eighty7 3181 vxor $out2,$in2,$twk2 3182 vxor $tweak,$tweak,$tmp 3183 3184 lvx_u $in3,$x30,$inp 3185 sub $len,$len,$taillen 3186 vxor $twk3,$tweak,$rndkey0 3187 vsrab $tmp,$tweak,$seven # next tweak value 3188 vaddubm $tweak,$tweak,$tweak 3189 vsldoi $tmp,$tmp,$tmp,15 3190 le?vperm $in3,$in3,$in3,$leperm 3191 vand $tmp,$tmp,$eighty7 3192 vxor $out3,$in3,$twk3 3193 vxor $tweak,$tweak,$tmp 3194 3195 lvx_u $in4,$x40,$inp 3196 subi $len,$len,0x60 3197 vxor $twk4,$tweak,$rndkey0 3198 vsrab $tmp,$tweak,$seven # next tweak value 3199 vaddubm $tweak,$tweak,$tweak 3200 vsldoi $tmp,$tmp,$tmp,15 3201 le?vperm $in4,$in4,$in4,$leperm 3202 vand $tmp,$tmp,$eighty7 3203 vxor $out4,$in4,$twk4 3204 vxor $tweak,$tweak,$tmp 3205 3206 lvx_u $in5,$x50,$inp 3207 addi $inp,$inp,0x60 3208 vxor $twk5,$tweak,$rndkey0 3209 vsrab $tmp,$tweak,$seven # next tweak value 3210 vaddubm $tweak,$tweak,$tweak 3211 vsldoi $tmp,$tmp,$tmp,15 3212 le?vperm $in5,$in5,$in5,$leperm 3213 vand $tmp,$tmp,$eighty7 3214 vxor $out5,$in5,$twk5 3215 vxor $tweak,$tweak,$tmp 3216 3217 vxor v31,v31,$rndkey0 3218 mtctr $rounds 3219 b Loop_xts_dec6x 3220 3221.align 5 3222Loop_xts_dec6x: 3223 vncipher $out0,$out0,v24 3224 vncipher $out1,$out1,v24 3225 vncipher $out2,$out2,v24 3226 vncipher $out3,$out3,v24 3227 vncipher $out4,$out4,v24 3228 vncipher $out5,$out5,v24 3229 lvx v24,$x20,$key_ # round[3] 3230 addi $key_,$key_,0x20 3231 3232 vncipher $out0,$out0,v25 3233 vncipher $out1,$out1,v25 3234 vncipher $out2,$out2,v25 3235 vncipher $out3,$out3,v25 3236 vncipher $out4,$out4,v25 3237 vncipher $out5,$out5,v25 3238 lvx v25,$x10,$key_ # round[4] 3239 bdnz Loop_xts_dec6x 3240 3241 subic $len,$len,96 # $len-=96 3242 vxor $in0,$twk0,v31 # xor with last round key 3243 vncipher $out0,$out0,v24 3244 vncipher $out1,$out1,v24 3245 vsrab $tmp,$tweak,$seven # next tweak value 3246 vxor $twk0,$tweak,$rndkey0 3247 vaddubm $tweak,$tweak,$tweak 3248 vncipher $out2,$out2,v24 3249 vncipher $out3,$out3,v24 3250 vsldoi $tmp,$tmp,$tmp,15 3251 vncipher $out4,$out4,v24 3252 vncipher $out5,$out5,v24 3253 3254 subfe. r0,r0,r0 # borrow?-1:0 3255 vand $tmp,$tmp,$eighty7 3256 vncipher $out0,$out0,v25 3257 vncipher $out1,$out1,v25 3258 vxor $tweak,$tweak,$tmp 3259 vncipher $out2,$out2,v25 3260 vncipher $out3,$out3,v25 3261 vxor $in1,$twk1,v31 3262 vsrab $tmp,$tweak,$seven # next tweak value 3263 vxor $twk1,$tweak,$rndkey0 3264 vncipher $out4,$out4,v25 3265 vncipher $out5,$out5,v25 3266 3267 and r0,r0,$len 3268 vaddubm $tweak,$tweak,$tweak 3269 vsldoi $tmp,$tmp,$tmp,15 3270 vncipher $out0,$out0,v26 3271 vncipher $out1,$out1,v26 3272 vand $tmp,$tmp,$eighty7 3273 vncipher $out2,$out2,v26 3274 vncipher $out3,$out3,v26 3275 vxor $tweak,$tweak,$tmp 3276 vncipher $out4,$out4,v26 3277 vncipher $out5,$out5,v26 3278 3279 add $inp,$inp,r0 # $inp is adjusted in such 3280 # way that at exit from the 3281 # loop inX-in5 are loaded 3282 # with last "words" 3283 vxor $in2,$twk2,v31 3284 vsrab $tmp,$tweak,$seven # next tweak value 3285 vxor $twk2,$tweak,$rndkey0 3286 vaddubm $tweak,$tweak,$tweak 3287 vncipher $out0,$out0,v27 3288 vncipher $out1,$out1,v27 3289 vsldoi $tmp,$tmp,$tmp,15 3290 vncipher $out2,$out2,v27 3291 vncipher $out3,$out3,v27 3292 vand $tmp,$tmp,$eighty7 3293 vncipher $out4,$out4,v27 3294 vncipher $out5,$out5,v27 3295 3296 addi $key_,$sp,$FRAME+15 # rewind $key_ 3297 vxor $tweak,$tweak,$tmp 3298 vncipher $out0,$out0,v28 3299 vncipher $out1,$out1,v28 3300 vxor $in3,$twk3,v31 3301 vsrab $tmp,$tweak,$seven # next tweak value 3302 vxor $twk3,$tweak,$rndkey0 3303 vncipher $out2,$out2,v28 3304 vncipher $out3,$out3,v28 3305 vaddubm $tweak,$tweak,$tweak 3306 vsldoi $tmp,$tmp,$tmp,15 3307 vncipher $out4,$out4,v28 3308 vncipher $out5,$out5,v28 3309 lvx v24,$x00,$key_ # re-pre-load round[1] 3310 vand $tmp,$tmp,$eighty7 3311 3312 vncipher $out0,$out0,v29 3313 vncipher $out1,$out1,v29 3314 vxor $tweak,$tweak,$tmp 3315 vncipher $out2,$out2,v29 3316 vncipher $out3,$out3,v29 3317 vxor $in4,$twk4,v31 3318 vsrab $tmp,$tweak,$seven # next tweak value 3319 vxor $twk4,$tweak,$rndkey0 3320 vncipher $out4,$out4,v29 3321 vncipher $out5,$out5,v29 3322 lvx v25,$x10,$key_ # re-pre-load round[2] 3323 vaddubm $tweak,$tweak,$tweak 3324 vsldoi $tmp,$tmp,$tmp,15 3325 3326 vncipher $out0,$out0,v30 3327 vncipher $out1,$out1,v30 3328 vand $tmp,$tmp,$eighty7 3329 vncipher $out2,$out2,v30 3330 vncipher $out3,$out3,v30 3331 vxor $tweak,$tweak,$tmp 3332 vncipher $out4,$out4,v30 3333 vncipher $out5,$out5,v30 3334 vxor $in5,$twk5,v31 3335 vsrab $tmp,$tweak,$seven # next tweak value 3336 vxor $twk5,$tweak,$rndkey0 3337 3338 vncipherlast $out0,$out0,$in0 3339 lvx_u $in0,$x00,$inp # load next input block 3340 vaddubm $tweak,$tweak,$tweak 3341 vsldoi $tmp,$tmp,$tmp,15 3342 vncipherlast $out1,$out1,$in1 3343 lvx_u $in1,$x10,$inp 3344 vncipherlast $out2,$out2,$in2 3345 le?vperm $in0,$in0,$in0,$leperm 3346 lvx_u $in2,$x20,$inp 3347 vand $tmp,$tmp,$eighty7 3348 vncipherlast $out3,$out3,$in3 3349 le?vperm $in1,$in1,$in1,$leperm 3350 lvx_u $in3,$x30,$inp 3351 vncipherlast $out4,$out4,$in4 3352 le?vperm $in2,$in2,$in2,$leperm 3353 lvx_u $in4,$x40,$inp 3354 vxor $tweak,$tweak,$tmp 3355 vncipherlast $out5,$out5,$in5 3356 le?vperm $in3,$in3,$in3,$leperm 3357 lvx_u $in5,$x50,$inp 3358 addi $inp,$inp,0x60 3359 le?vperm $in4,$in4,$in4,$leperm 3360 le?vperm $in5,$in5,$in5,$leperm 3361 3362 le?vperm $out0,$out0,$out0,$leperm 3363 le?vperm $out1,$out1,$out1,$leperm 3364 stvx_u $out0,$x00,$out # store output 3365 vxor $out0,$in0,$twk0 3366 le?vperm $out2,$out2,$out2,$leperm 3367 stvx_u $out1,$x10,$out 3368 vxor $out1,$in1,$twk1 3369 le?vperm $out3,$out3,$out3,$leperm 3370 stvx_u $out2,$x20,$out 3371 vxor $out2,$in2,$twk2 3372 le?vperm $out4,$out4,$out4,$leperm 3373 stvx_u $out3,$x30,$out 3374 vxor $out3,$in3,$twk3 3375 le?vperm $out5,$out5,$out5,$leperm 3376 stvx_u $out4,$x40,$out 3377 vxor $out4,$in4,$twk4 3378 stvx_u $out5,$x50,$out 3379 vxor $out5,$in5,$twk5 3380 addi $out,$out,0x60 3381 3382 mtctr $rounds 3383 beq Loop_xts_dec6x # did $len-=96 borrow? 3384 3385 addic. $len,$len,0x60 3386 beq Lxts_dec6x_zero 3387 cmpwi $len,0x20 3388 blt Lxts_dec6x_one 3389 nop 3390 beq Lxts_dec6x_two 3391 cmpwi $len,0x40 3392 blt Lxts_dec6x_three 3393 nop 3394 beq Lxts_dec6x_four 3395 3396Lxts_dec6x_five: 3397 vxor $out0,$in1,$twk0 3398 vxor $out1,$in2,$twk1 3399 vxor $out2,$in3,$twk2 3400 vxor $out3,$in4,$twk3 3401 vxor $out4,$in5,$twk4 3402 3403 bl _aesp8_xts_dec5x 3404 3405 le?vperm $out0,$out0,$out0,$leperm 3406 vmr $twk0,$twk5 # unused tweak 3407 vxor $twk1,$tweak,$rndkey0 3408 le?vperm $out1,$out1,$out1,$leperm 3409 stvx_u $out0,$x00,$out # store output 3410 vxor $out0,$in0,$twk1 3411 le?vperm $out2,$out2,$out2,$leperm 3412 stvx_u $out1,$x10,$out 3413 le?vperm $out3,$out3,$out3,$leperm 3414 stvx_u $out2,$x20,$out 3415 le?vperm $out4,$out4,$out4,$leperm 3416 stvx_u $out3,$x30,$out 3417 stvx_u $out4,$x40,$out 3418 addi $out,$out,0x50 3419 bne Lxts_dec6x_steal 3420 b Lxts_dec6x_done 3421 3422.align 4 3423Lxts_dec6x_four: 3424 vxor $out0,$in2,$twk0 3425 vxor $out1,$in3,$twk1 3426 vxor $out2,$in4,$twk2 3427 vxor $out3,$in5,$twk3 3428 vxor $out4,$out4,$out4 3429 3430 bl _aesp8_xts_dec5x 3431 3432 le?vperm $out0,$out0,$out0,$leperm 3433 vmr $twk0,$twk4 # unused tweak 3434 vmr $twk1,$twk5 3435 le?vperm $out1,$out1,$out1,$leperm 3436 stvx_u $out0,$x00,$out # store output 3437 vxor $out0,$in0,$twk5 3438 le?vperm $out2,$out2,$out2,$leperm 3439 stvx_u $out1,$x10,$out 3440 le?vperm $out3,$out3,$out3,$leperm 3441 stvx_u $out2,$x20,$out 3442 stvx_u $out3,$x30,$out 3443 addi $out,$out,0x40 3444 bne Lxts_dec6x_steal 3445 b Lxts_dec6x_done 3446 3447.align 4 3448Lxts_dec6x_three: 3449 vxor $out0,$in3,$twk0 3450 vxor $out1,$in4,$twk1 3451 vxor $out2,$in5,$twk2 3452 vxor $out3,$out3,$out3 3453 vxor $out4,$out4,$out4 3454 3455 bl _aesp8_xts_dec5x 3456 3457 le?vperm $out0,$out0,$out0,$leperm 3458 vmr $twk0,$twk3 # unused tweak 3459 vmr $twk1,$twk4 3460 le?vperm $out1,$out1,$out1,$leperm 3461 stvx_u $out0,$x00,$out # store output 3462 vxor $out0,$in0,$twk4 3463 le?vperm $out2,$out2,$out2,$leperm 3464 stvx_u $out1,$x10,$out 3465 stvx_u $out2,$x20,$out 3466 addi $out,$out,0x30 3467 bne Lxts_dec6x_steal 3468 b Lxts_dec6x_done 3469 3470.align 4 3471Lxts_dec6x_two: 3472 vxor $out0,$in4,$twk0 3473 vxor $out1,$in5,$twk1 3474 vxor $out2,$out2,$out2 3475 vxor $out3,$out3,$out3 3476 vxor $out4,$out4,$out4 3477 3478 bl _aesp8_xts_dec5x 3479 3480 le?vperm $out0,$out0,$out0,$leperm 3481 vmr $twk0,$twk2 # unused tweak 3482 vmr $twk1,$twk3 3483 le?vperm $out1,$out1,$out1,$leperm 3484 stvx_u $out0,$x00,$out # store output 3485 vxor $out0,$in0,$twk3 3486 stvx_u $out1,$x10,$out 3487 addi $out,$out,0x20 3488 bne Lxts_dec6x_steal 3489 b Lxts_dec6x_done 3490 3491.align 4 3492Lxts_dec6x_one: 3493 vxor $out0,$in5,$twk0 3494 nop 3495Loop_xts_dec1x: 3496 vncipher $out0,$out0,v24 3497 lvx v24,$x20,$key_ # round[3] 3498 addi $key_,$key_,0x20 3499 3500 vncipher $out0,$out0,v25 3501 lvx v25,$x10,$key_ # round[4] 3502 bdnz Loop_xts_dec1x 3503 3504 subi r0,$taillen,1 3505 vncipher $out0,$out0,v24 3506 3507 andi. r0,r0,16 3508 cmpwi $taillen,0 3509 vncipher $out0,$out0,v25 3510 3511 sub $inp,$inp,r0 3512 vncipher $out0,$out0,v26 3513 3514 lvx_u $in0,0,$inp 3515 vncipher $out0,$out0,v27 3516 3517 addi $key_,$sp,$FRAME+15 # rewind $key_ 3518 vncipher $out0,$out0,v28 3519 lvx v24,$x00,$key_ # re-pre-load round[1] 3520 3521 vncipher $out0,$out0,v29 3522 lvx v25,$x10,$key_ # re-pre-load round[2] 3523 vxor $twk0,$twk0,v31 3524 3525 le?vperm $in0,$in0,$in0,$leperm 3526 vncipher $out0,$out0,v30 3527 3528 mtctr $rounds 3529 vncipherlast $out0,$out0,$twk0 3530 3531 vmr $twk0,$twk1 # unused tweak 3532 vmr $twk1,$twk2 3533 le?vperm $out0,$out0,$out0,$leperm 3534 stvx_u $out0,$x00,$out # store output 3535 addi $out,$out,0x10 3536 vxor $out0,$in0,$twk2 3537 bne Lxts_dec6x_steal 3538 b Lxts_dec6x_done 3539 3540.align 4 3541Lxts_dec6x_zero: 3542 cmpwi $taillen,0 3543 beq Lxts_dec6x_done 3544 3545 lvx_u $in0,0,$inp 3546 le?vperm $in0,$in0,$in0,$leperm 3547 vxor $out0,$in0,$twk1 3548Lxts_dec6x_steal: 3549 vncipher $out0,$out0,v24 3550 lvx v24,$x20,$key_ # round[3] 3551 addi $key_,$key_,0x20 3552 3553 vncipher $out0,$out0,v25 3554 lvx v25,$x10,$key_ # round[4] 3555 bdnz Lxts_dec6x_steal 3556 3557 add $inp,$inp,$taillen 3558 vncipher $out0,$out0,v24 3559 3560 cmpwi $taillen,0 3561 vncipher $out0,$out0,v25 3562 3563 lvx_u $in0,0,$inp 3564 vncipher $out0,$out0,v26 3565 3566 lvsr $inpperm,0,$taillen # $in5 is no more 3567 vncipher $out0,$out0,v27 3568 3569 addi $key_,$sp,$FRAME+15 # rewind $key_ 3570 vncipher $out0,$out0,v28 3571 lvx v24,$x00,$key_ # re-pre-load round[1] 3572 3573 vncipher $out0,$out0,v29 3574 lvx v25,$x10,$key_ # re-pre-load round[2] 3575 vxor $twk1,$twk1,v31 3576 3577 le?vperm $in0,$in0,$in0,$leperm 3578 vncipher $out0,$out0,v30 3579 3580 vperm $in0,$in0,$in0,$inpperm 3581 vncipherlast $tmp,$out0,$twk1 3582 3583 le?vperm $out0,$tmp,$tmp,$leperm 3584 le?stvx_u $out0,0,$out 3585 be?stvx_u $tmp,0,$out 3586 3587 vxor $out0,$out0,$out0 3588 vspltisb $out1,-1 3589 vperm $out0,$out0,$out1,$inpperm 3590 vsel $out0,$in0,$tmp,$out0 3591 vxor $out0,$out0,$twk0 3592 3593 subi r30,$out,1 3594 mtctr $taillen 3595Loop_xts_dec6x_steal: 3596 lbzu r0,1(r30) 3597 stb r0,16(r30) 3598 bdnz Loop_xts_dec6x_steal 3599 3600 li $taillen,0 3601 mtctr $rounds 3602 b Loop_xts_dec1x # one more time... 3603 3604.align 4 3605Lxts_dec6x_done: 3606 ${UCMP}i $ivp,0 3607 beq Lxts_dec6x_ret 3608 3609 vxor $tweak,$twk0,$rndkey0 3610 le?vperm $tweak,$tweak,$tweak,$leperm 3611 stvx_u $tweak,0,$ivp 3612 3613Lxts_dec6x_ret: 3614 mtlr r11 3615 li r10,`$FRAME+15` 3616 li r11,`$FRAME+31` 3617 stvx $seven,r10,$sp # wipe copies of round keys 3618 addi r10,r10,32 3619 stvx $seven,r11,$sp 3620 addi r11,r11,32 3621 stvx $seven,r10,$sp 3622 addi r10,r10,32 3623 stvx $seven,r11,$sp 3624 addi r11,r11,32 3625 stvx $seven,r10,$sp 3626 addi r10,r10,32 3627 stvx $seven,r11,$sp 3628 addi r11,r11,32 3629 stvx $seven,r10,$sp 3630 addi r10,r10,32 3631 stvx $seven,r11,$sp 3632 addi r11,r11,32 3633 3634 mtspr 256,$vrsave 3635 lvx v20,r10,$sp # ABI says so 3636 addi r10,r10,32 3637 lvx v21,r11,$sp 3638 addi r11,r11,32 3639 lvx v22,r10,$sp 3640 addi r10,r10,32 3641 lvx v23,r11,$sp 3642 addi r11,r11,32 3643 lvx v24,r10,$sp 3644 addi r10,r10,32 3645 lvx v25,r11,$sp 3646 addi r11,r11,32 3647 lvx v26,r10,$sp 3648 addi r10,r10,32 3649 lvx v27,r11,$sp 3650 addi r11,r11,32 3651 lvx v28,r10,$sp 3652 addi r10,r10,32 3653 lvx v29,r11,$sp 3654 addi r11,r11,32 3655 lvx v30,r10,$sp 3656 lvx v31,r11,$sp 3657 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3658 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3659 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3660 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3661 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3662 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3663 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3664 blr 3665 .long 0 3666 .byte 0,12,0x04,1,0x80,6,6,0 3667 .long 0 3668 3669.align 5 3670_aesp8_xts_dec5x: 3671 vncipher $out0,$out0,v24 3672 vncipher $out1,$out1,v24 3673 vncipher $out2,$out2,v24 3674 vncipher $out3,$out3,v24 3675 vncipher $out4,$out4,v24 3676 lvx v24,$x20,$key_ # round[3] 3677 addi $key_,$key_,0x20 3678 3679 vncipher $out0,$out0,v25 3680 vncipher $out1,$out1,v25 3681 vncipher $out2,$out2,v25 3682 vncipher $out3,$out3,v25 3683 vncipher $out4,$out4,v25 3684 lvx v25,$x10,$key_ # round[4] 3685 bdnz _aesp8_xts_dec5x 3686 3687 subi r0,$taillen,1 3688 vncipher $out0,$out0,v24 3689 vncipher $out1,$out1,v24 3690 vncipher $out2,$out2,v24 3691 vncipher $out3,$out3,v24 3692 vncipher $out4,$out4,v24 3693 3694 andi. r0,r0,16 3695 cmpwi $taillen,0 3696 vncipher $out0,$out0,v25 3697 vncipher $out1,$out1,v25 3698 vncipher $out2,$out2,v25 3699 vncipher $out3,$out3,v25 3700 vncipher $out4,$out4,v25 3701 vxor $twk0,$twk0,v31 3702 3703 sub $inp,$inp,r0 3704 vncipher $out0,$out0,v26 3705 vncipher $out1,$out1,v26 3706 vncipher $out2,$out2,v26 3707 vncipher $out3,$out3,v26 3708 vncipher $out4,$out4,v26 3709 vxor $in1,$twk1,v31 3710 3711 vncipher $out0,$out0,v27 3712 lvx_u $in0,0,$inp 3713 vncipher $out1,$out1,v27 3714 vncipher $out2,$out2,v27 3715 vncipher $out3,$out3,v27 3716 vncipher $out4,$out4,v27 3717 vxor $in2,$twk2,v31 3718 3719 addi $key_,$sp,$FRAME+15 # rewind $key_ 3720 vncipher $out0,$out0,v28 3721 vncipher $out1,$out1,v28 3722 vncipher $out2,$out2,v28 3723 vncipher $out3,$out3,v28 3724 vncipher $out4,$out4,v28 3725 lvx v24,$x00,$key_ # re-pre-load round[1] 3726 vxor $in3,$twk3,v31 3727 3728 vncipher $out0,$out0,v29 3729 le?vperm $in0,$in0,$in0,$leperm 3730 vncipher $out1,$out1,v29 3731 vncipher $out2,$out2,v29 3732 vncipher $out3,$out3,v29 3733 vncipher $out4,$out4,v29 3734 lvx v25,$x10,$key_ # re-pre-load round[2] 3735 vxor $in4,$twk4,v31 3736 3737 vncipher $out0,$out0,v30 3738 vncipher $out1,$out1,v30 3739 vncipher $out2,$out2,v30 3740 vncipher $out3,$out3,v30 3741 vncipher $out4,$out4,v30 3742 3743 vncipherlast $out0,$out0,$twk0 3744 vncipherlast $out1,$out1,$in1 3745 vncipherlast $out2,$out2,$in2 3746 vncipherlast $out3,$out3,$in3 3747 vncipherlast $out4,$out4,$in4 3748 mtctr $rounds 3749 blr 3750 .long 0 3751 .byte 0,12,0x14,0,0,0,0,0 3752___ 3753}} }}} 3754 3755my $consts=1; 3756foreach(split("\n",$code)) { 3757 s/\`([^\`]*)\`/eval($1)/geo; 3758 3759 # constants table endian-specific conversion 3760 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 3761 my $conv=$3; 3762 my @bytes=(); 3763 3764 # convert to endian-agnostic format 3765 if ($1 eq "long") { 3766 foreach (split(/,\s*/,$2)) { 3767 my $l = /^0/?oct:int; 3768 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 3769 } 3770 } else { 3771 @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 3772 } 3773 3774 # little-endian conversion 3775 if ($flavour =~ /le$/o) { 3776 SWITCH: for($conv) { 3777 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 3778 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 3779 } 3780 } 3781 3782 #emit 3783 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 3784 next; 3785 } 3786 $consts=0 if (m/Lconsts:/o); # end of table 3787 3788 # instructions prefixed with '?' are endian-specific and need 3789 # to be adjusted accordingly... 3790 if ($flavour =~ /le$/o) { # little-endian 3791 s/le\?//o or 3792 s/be\?/#be#/o or 3793 s/\?lvsr/lvsl/o or 3794 s/\?lvsl/lvsr/o or 3795 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 3796 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 3797 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 3798 } else { # big-endian 3799 s/le\?/#le#/o or 3800 s/be\?//o or 3801 s/\?([a-z]+)/$1/o; 3802 } 3803 3804 print $_,"\n"; 3805} 3806 3807close STDOUT or die "error closing STDOUT: $!"; 3808