1#!/usr/bin/env perl 2# 3# ==================================================================== 4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9# 10# This module implements support for AES instructions as per PowerISA 11# specification version 2.07, first implemented by POWER8 processor. 12# The module is endian-agnostic in sense that it supports both big- 13# and little-endian cases. Data alignment in parallelizable modes is 14# handled with VSX loads and stores, which implies MSR.VSX flag being 15# set. It should also be noted that ISA specification doesn't prohibit 16# alignment exceptions for these instructions on page boundaries. 17# Initially alignment was handled in pure AltiVec/VMX way [when data 18# is aligned programmatically, which in turn guarantees exception- 19# free execution], but it turned to hamper performance when vcipher 20# instructions are interleaved. It's reckoned that eventual 21# misalignment penalties at page boundaries are in average lower 22# than additional overhead in pure AltiVec approach. 23 24$flavour = shift; 25 26if ($flavour =~ /64/) { 27 $SIZE_T =8; 28 $LRSAVE =2*$SIZE_T; 29 $STU ="stdu"; 30 $POP ="ld"; 31 $PUSH ="std"; 32 $UCMP ="cmpld"; 33 $SHL ="sldi"; 34} elsif ($flavour =~ /32/) { 35 $SIZE_T =4; 36 $LRSAVE =$SIZE_T; 37 $STU ="stwu"; 38 $POP ="lwz"; 39 $PUSH ="stw"; 40 $UCMP ="cmplw"; 41 $SHL ="slwi"; 42} else { die "nonsense $flavour"; } 43 44$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 45 46$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 47( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 48( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 49die "can't locate ppc-xlate.pl"; 50 51open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 52 53$FRAME=8*$SIZE_T; 54$prefix="aes_p8"; 55 56$sp="r1"; 57$vrsave="r12"; 58 59######################################################################### 60{{{ # Key setup procedures # 61my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 62my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 63my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 64 65$code.=<<___; 66.machine "any" 67 68.text 69 70.align 7 71rcon: 72.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 73.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 74.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 75.long 0,0,0,0 ?asis 76Lconsts: 77 mflr r0 78 bcl 20,31,\$+4 79 mflr $ptr #vvvvv "distance between . and rcon 80 addi $ptr,$ptr,-0x48 81 mtlr r0 82 blr 83 .long 0 84 .byte 0,12,0x14,0,0,0,0,0 85.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 86 87.globl .${prefix}_set_encrypt_key 88.align 5 89.${prefix}_set_encrypt_key: 90Lset_encrypt_key: 91 mflr r11 92 $PUSH r11,$LRSAVE($sp) 93 94 li $ptr,-1 95 ${UCMP}i $inp,0 96 beq- Lenc_key_abort # if ($inp==0) return -1; 97 ${UCMP}i $out,0 98 beq- Lenc_key_abort # if ($out==0) return -1; 99 li $ptr,-2 100 cmpwi $bits,128 101 blt- Lenc_key_abort 102 cmpwi $bits,256 103 bgt- Lenc_key_abort 104 andi. r0,$bits,0x3f 105 bne- Lenc_key_abort 106 107 lis r0,0xfff0 108 mfspr $vrsave,256 109 mtspr 256,r0 110 111 bl Lconsts 112 mtlr r11 113 114 neg r9,$inp 115 lvx $in0,0,$inp 116 addi $inp,$inp,15 # 15 is not typo 117 lvsr $key,0,r9 # borrow $key 118 li r8,0x20 119 cmpwi $bits,192 120 lvx $in1,0,$inp 121 le?vspltisb $mask,0x0f # borrow $mask 122 lvx $rcon,0,$ptr 123 le?vxor $key,$key,$mask # adjust for byte swap 124 lvx $mask,r8,$ptr 125 addi $ptr,$ptr,0x10 126 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 127 li $cnt,8 128 vxor $zero,$zero,$zero 129 mtctr $cnt 130 131 ?lvsr $outperm,0,$out 132 vspltisb $outmask,-1 133 lvx $outhead,0,$out 134 ?vperm $outmask,$zero,$outmask,$outperm 135 136 blt Loop128 137 addi $inp,$inp,8 138 beq L192 139 addi $inp,$inp,8 140 b L256 141 142.align 4 143Loop128: 144 vperm $key,$in0,$in0,$mask # rotate-n-splat 145 vsldoi $tmp,$zero,$in0,12 # >>32 146 vperm $outtail,$in0,$in0,$outperm # rotate 147 vsel $stage,$outhead,$outtail,$outmask 148 vmr $outhead,$outtail 149 vcipherlast $key,$key,$rcon 150 stvx $stage,0,$out 151 addi $out,$out,16 152 153 vxor $in0,$in0,$tmp 154 vsldoi $tmp,$zero,$tmp,12 # >>32 155 vxor $in0,$in0,$tmp 156 vsldoi $tmp,$zero,$tmp,12 # >>32 157 vxor $in0,$in0,$tmp 158 vadduwm $rcon,$rcon,$rcon 159 vxor $in0,$in0,$key 160 bdnz Loop128 161 162 lvx $rcon,0,$ptr # last two round keys 163 164 vperm $key,$in0,$in0,$mask # rotate-n-splat 165 vsldoi $tmp,$zero,$in0,12 # >>32 166 vperm $outtail,$in0,$in0,$outperm # rotate 167 vsel $stage,$outhead,$outtail,$outmask 168 vmr $outhead,$outtail 169 vcipherlast $key,$key,$rcon 170 stvx $stage,0,$out 171 addi $out,$out,16 172 173 vxor $in0,$in0,$tmp 174 vsldoi $tmp,$zero,$tmp,12 # >>32 175 vxor $in0,$in0,$tmp 176 vsldoi $tmp,$zero,$tmp,12 # >>32 177 vxor $in0,$in0,$tmp 178 vadduwm $rcon,$rcon,$rcon 179 vxor $in0,$in0,$key 180 181 vperm $key,$in0,$in0,$mask # rotate-n-splat 182 vsldoi $tmp,$zero,$in0,12 # >>32 183 vperm $outtail,$in0,$in0,$outperm # rotate 184 vsel $stage,$outhead,$outtail,$outmask 185 vmr $outhead,$outtail 186 vcipherlast $key,$key,$rcon 187 stvx $stage,0,$out 188 addi $out,$out,16 189 190 vxor $in0,$in0,$tmp 191 vsldoi $tmp,$zero,$tmp,12 # >>32 192 vxor $in0,$in0,$tmp 193 vsldoi $tmp,$zero,$tmp,12 # >>32 194 vxor $in0,$in0,$tmp 195 vxor $in0,$in0,$key 196 vperm $outtail,$in0,$in0,$outperm # rotate 197 vsel $stage,$outhead,$outtail,$outmask 198 vmr $outhead,$outtail 199 stvx $stage,0,$out 200 201 addi $inp,$out,15 # 15 is not typo 202 addi $out,$out,0x50 203 204 li $rounds,10 205 b Ldone 206 207.align 4 208L192: 209 lvx $tmp,0,$inp 210 li $cnt,4 211 vperm $outtail,$in0,$in0,$outperm # rotate 212 vsel $stage,$outhead,$outtail,$outmask 213 vmr $outhead,$outtail 214 stvx $stage,0,$out 215 addi $out,$out,16 216 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 217 vspltisb $key,8 # borrow $key 218 mtctr $cnt 219 vsububm $mask,$mask,$key # adjust the mask 220 221Loop192: 222 vperm $key,$in1,$in1,$mask # roate-n-splat 223 vsldoi $tmp,$zero,$in0,12 # >>32 224 vcipherlast $key,$key,$rcon 225 226 vxor $in0,$in0,$tmp 227 vsldoi $tmp,$zero,$tmp,12 # >>32 228 vxor $in0,$in0,$tmp 229 vsldoi $tmp,$zero,$tmp,12 # >>32 230 vxor $in0,$in0,$tmp 231 232 vsldoi $stage,$zero,$in1,8 233 vspltw $tmp,$in0,3 234 vxor $tmp,$tmp,$in1 235 vsldoi $in1,$zero,$in1,12 # >>32 236 vadduwm $rcon,$rcon,$rcon 237 vxor $in1,$in1,$tmp 238 vxor $in0,$in0,$key 239 vxor $in1,$in1,$key 240 vsldoi $stage,$stage,$in0,8 241 242 vperm $key,$in1,$in1,$mask # rotate-n-splat 243 vsldoi $tmp,$zero,$in0,12 # >>32 244 vperm $outtail,$stage,$stage,$outperm # rotate 245 vsel $stage,$outhead,$outtail,$outmask 246 vmr $outhead,$outtail 247 vcipherlast $key,$key,$rcon 248 stvx $stage,0,$out 249 addi $out,$out,16 250 251 vsldoi $stage,$in0,$in1,8 252 vxor $in0,$in0,$tmp 253 vsldoi $tmp,$zero,$tmp,12 # >>32 254 vperm $outtail,$stage,$stage,$outperm # rotate 255 vsel $stage,$outhead,$outtail,$outmask 256 vmr $outhead,$outtail 257 vxor $in0,$in0,$tmp 258 vsldoi $tmp,$zero,$tmp,12 # >>32 259 vxor $in0,$in0,$tmp 260 stvx $stage,0,$out 261 addi $out,$out,16 262 263 vspltw $tmp,$in0,3 264 vxor $tmp,$tmp,$in1 265 vsldoi $in1,$zero,$in1,12 # >>32 266 vadduwm $rcon,$rcon,$rcon 267 vxor $in1,$in1,$tmp 268 vxor $in0,$in0,$key 269 vxor $in1,$in1,$key 270 vperm $outtail,$in0,$in0,$outperm # rotate 271 vsel $stage,$outhead,$outtail,$outmask 272 vmr $outhead,$outtail 273 stvx $stage,0,$out 274 addi $inp,$out,15 # 15 is not typo 275 addi $out,$out,16 276 bdnz Loop192 277 278 li $rounds,12 279 addi $out,$out,0x20 280 b Ldone 281 282.align 4 283L256: 284 lvx $tmp,0,$inp 285 li $cnt,7 286 li $rounds,14 287 vperm $outtail,$in0,$in0,$outperm # rotate 288 vsel $stage,$outhead,$outtail,$outmask 289 vmr $outhead,$outtail 290 stvx $stage,0,$out 291 addi $out,$out,16 292 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 293 mtctr $cnt 294 295Loop256: 296 vperm $key,$in1,$in1,$mask # rotate-n-splat 297 vsldoi $tmp,$zero,$in0,12 # >>32 298 vperm $outtail,$in1,$in1,$outperm # rotate 299 vsel $stage,$outhead,$outtail,$outmask 300 vmr $outhead,$outtail 301 vcipherlast $key,$key,$rcon 302 stvx $stage,0,$out 303 addi $out,$out,16 304 305 vxor $in0,$in0,$tmp 306 vsldoi $tmp,$zero,$tmp,12 # >>32 307 vxor $in0,$in0,$tmp 308 vsldoi $tmp,$zero,$tmp,12 # >>32 309 vxor $in0,$in0,$tmp 310 vadduwm $rcon,$rcon,$rcon 311 vxor $in0,$in0,$key 312 vperm $outtail,$in0,$in0,$outperm # rotate 313 vsel $stage,$outhead,$outtail,$outmask 314 vmr $outhead,$outtail 315 stvx $stage,0,$out 316 addi $inp,$out,15 # 15 is not typo 317 addi $out,$out,16 318 bdz Ldone 319 320 vspltw $key,$in0,3 # just splat 321 vsldoi $tmp,$zero,$in1,12 # >>32 322 vsbox $key,$key 323 324 vxor $in1,$in1,$tmp 325 vsldoi $tmp,$zero,$tmp,12 # >>32 326 vxor $in1,$in1,$tmp 327 vsldoi $tmp,$zero,$tmp,12 # >>32 328 vxor $in1,$in1,$tmp 329 330 vxor $in1,$in1,$key 331 b Loop256 332 333.align 4 334Ldone: 335 lvx $in1,0,$inp # redundant in aligned case 336 vsel $in1,$outhead,$in1,$outmask 337 stvx $in1,0,$inp 338 li $ptr,0 339 mtspr 256,$vrsave 340 stw $rounds,0($out) 341 342Lenc_key_abort: 343 mr r3,$ptr 344 blr 345 .long 0 346 .byte 0,12,0x14,1,0,0,3,0 347 .long 0 348.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 349 350.globl .${prefix}_set_decrypt_key 351.align 5 352.${prefix}_set_decrypt_key: 353 $STU $sp,-$FRAME($sp) 354 mflr r10 355 $PUSH r10,$FRAME+$LRSAVE($sp) 356 bl Lset_encrypt_key 357 mtlr r10 358 359 cmpwi r3,0 360 bne- Ldec_key_abort 361 362 slwi $cnt,$rounds,4 363 subi $inp,$out,240 # first round key 364 srwi $rounds,$rounds,1 365 add $out,$inp,$cnt # last round key 366 mtctr $rounds 367 368Ldeckey: 369 lwz r0, 0($inp) 370 lwz r6, 4($inp) 371 lwz r7, 8($inp) 372 lwz r8, 12($inp) 373 addi $inp,$inp,16 374 lwz r9, 0($out) 375 lwz r10,4($out) 376 lwz r11,8($out) 377 lwz r12,12($out) 378 stw r0, 0($out) 379 stw r6, 4($out) 380 stw r7, 8($out) 381 stw r8, 12($out) 382 subi $out,$out,16 383 stw r9, -16($inp) 384 stw r10,-12($inp) 385 stw r11,-8($inp) 386 stw r12,-4($inp) 387 bdnz Ldeckey 388 389 xor r3,r3,r3 # return value 390Ldec_key_abort: 391 addi $sp,$sp,$FRAME 392 blr 393 .long 0 394 .byte 0,12,4,1,0x80,0,3,0 395 .long 0 396.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 397___ 398}}} 399######################################################################### 400{{{ # Single block en- and decrypt procedures # 401sub gen_block () { 402my $dir = shift; 403my $n = $dir eq "de" ? "n" : ""; 404my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 405 406$code.=<<___; 407.globl .${prefix}_${dir}crypt 408.align 5 409.${prefix}_${dir}crypt: 410 lwz $rounds,240($key) 411 lis r0,0xfc00 412 mfspr $vrsave,256 413 li $idx,15 # 15 is not typo 414 mtspr 256,r0 415 416 lvx v0,0,$inp 417 neg r11,$out 418 lvx v1,$idx,$inp 419 lvsl v2,0,$inp # inpperm 420 le?vspltisb v4,0x0f 421 ?lvsl v3,0,r11 # outperm 422 le?vxor v2,v2,v4 423 li $idx,16 424 vperm v0,v0,v1,v2 # align [and byte swap in LE] 425 lvx v1,0,$key 426 ?lvsl v5,0,$key # keyperm 427 srwi $rounds,$rounds,1 428 lvx v2,$idx,$key 429 addi $idx,$idx,16 430 subi $rounds,$rounds,1 431 ?vperm v1,v1,v2,v5 # align round key 432 433 vxor v0,v0,v1 434 lvx v1,$idx,$key 435 addi $idx,$idx,16 436 mtctr $rounds 437 438Loop_${dir}c: 439 ?vperm v2,v2,v1,v5 440 v${n}cipher v0,v0,v2 441 lvx v2,$idx,$key 442 addi $idx,$idx,16 443 ?vperm v1,v1,v2,v5 444 v${n}cipher v0,v0,v1 445 lvx v1,$idx,$key 446 addi $idx,$idx,16 447 bdnz Loop_${dir}c 448 449 ?vperm v2,v2,v1,v5 450 v${n}cipher v0,v0,v2 451 lvx v2,$idx,$key 452 ?vperm v1,v1,v2,v5 453 v${n}cipherlast v0,v0,v1 454 455 vspltisb v2,-1 456 vxor v1,v1,v1 457 li $idx,15 # 15 is not typo 458 ?vperm v2,v1,v2,v3 # outmask 459 le?vxor v3,v3,v4 460 lvx v1,0,$out # outhead 461 vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 462 vsel v1,v1,v0,v2 463 lvx v4,$idx,$out 464 stvx v1,0,$out 465 vsel v0,v0,v4,v2 466 stvx v0,$idx,$out 467 468 mtspr 256,$vrsave 469 blr 470 .long 0 471 .byte 0,12,0x14,0,0,0,3,0 472 .long 0 473.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 474___ 475} 476&gen_block("en"); 477&gen_block("de"); 478}}} 479######################################################################### 480{{{ # CBC en- and decrypt procedures # 481my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 482my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 483my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 484 map("v$_",(4..10)); 485$code.=<<___; 486.globl .${prefix}_cbc_encrypt 487.align 5 488.${prefix}_cbc_encrypt: 489 ${UCMP}i $len,16 490 bltlr- 491 492 cmpwi $enc,0 # test direction 493 lis r0,0xffe0 494 mfspr $vrsave,256 495 mtspr 256,r0 496 497 li $idx,15 498 vxor $rndkey0,$rndkey0,$rndkey0 499 le?vspltisb $tmp,0x0f 500 501 lvx $ivec,0,$ivp # load [unaligned] iv 502 lvsl $inpperm,0,$ivp 503 lvx $inptail,$idx,$ivp 504 le?vxor $inpperm,$inpperm,$tmp 505 vperm $ivec,$ivec,$inptail,$inpperm 506 507 neg r11,$inp 508 ?lvsl $keyperm,0,$key # prepare for unaligned key 509 lwz $rounds,240($key) 510 511 lvsr $inpperm,0,r11 # prepare for unaligned load 512 lvx $inptail,0,$inp 513 addi $inp,$inp,15 # 15 is not typo 514 le?vxor $inpperm,$inpperm,$tmp 515 516 ?lvsr $outperm,0,$out # prepare for unaligned store 517 vspltisb $outmask,-1 518 lvx $outhead,0,$out 519 ?vperm $outmask,$rndkey0,$outmask,$outperm 520 le?vxor $outperm,$outperm,$tmp 521 522 srwi $rounds,$rounds,1 523 li $idx,16 524 subi $rounds,$rounds,1 525 beq Lcbc_dec 526 527Lcbc_enc: 528 vmr $inout,$inptail 529 lvx $inptail,0,$inp 530 addi $inp,$inp,16 531 mtctr $rounds 532 subi $len,$len,16 # len-=16 533 534 lvx $rndkey0,0,$key 535 vperm $inout,$inout,$inptail,$inpperm 536 lvx $rndkey1,$idx,$key 537 addi $idx,$idx,16 538 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 539 vxor $inout,$inout,$rndkey0 540 lvx $rndkey0,$idx,$key 541 addi $idx,$idx,16 542 vxor $inout,$inout,$ivec 543 544Loop_cbc_enc: 545 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 546 vcipher $inout,$inout,$rndkey1 547 lvx $rndkey1,$idx,$key 548 addi $idx,$idx,16 549 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 550 vcipher $inout,$inout,$rndkey0 551 lvx $rndkey0,$idx,$key 552 addi $idx,$idx,16 553 bdnz Loop_cbc_enc 554 555 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 556 vcipher $inout,$inout,$rndkey1 557 lvx $rndkey1,$idx,$key 558 li $idx,16 559 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 560 vcipherlast $ivec,$inout,$rndkey0 561 ${UCMP}i $len,16 562 563 vperm $tmp,$ivec,$ivec,$outperm 564 vsel $inout,$outhead,$tmp,$outmask 565 vmr $outhead,$tmp 566 stvx $inout,0,$out 567 addi $out,$out,16 568 bge Lcbc_enc 569 570 b Lcbc_done 571 572.align 4 573Lcbc_dec: 574 ${UCMP}i $len,128 575 bge _aesp8_cbc_decrypt8x 576 vmr $tmp,$inptail 577 lvx $inptail,0,$inp 578 addi $inp,$inp,16 579 mtctr $rounds 580 subi $len,$len,16 # len-=16 581 582 lvx $rndkey0,0,$key 583 vperm $tmp,$tmp,$inptail,$inpperm 584 lvx $rndkey1,$idx,$key 585 addi $idx,$idx,16 586 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 587 vxor $inout,$tmp,$rndkey0 588 lvx $rndkey0,$idx,$key 589 addi $idx,$idx,16 590 591Loop_cbc_dec: 592 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 593 vncipher $inout,$inout,$rndkey1 594 lvx $rndkey1,$idx,$key 595 addi $idx,$idx,16 596 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 597 vncipher $inout,$inout,$rndkey0 598 lvx $rndkey0,$idx,$key 599 addi $idx,$idx,16 600 bdnz Loop_cbc_dec 601 602 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 603 vncipher $inout,$inout,$rndkey1 604 lvx $rndkey1,$idx,$key 605 li $idx,16 606 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 607 vncipherlast $inout,$inout,$rndkey0 608 ${UCMP}i $len,16 609 610 vxor $inout,$inout,$ivec 611 vmr $ivec,$tmp 612 vperm $tmp,$inout,$inout,$outperm 613 vsel $inout,$outhead,$tmp,$outmask 614 vmr $outhead,$tmp 615 stvx $inout,0,$out 616 addi $out,$out,16 617 bge Lcbc_dec 618 619Lcbc_done: 620 addi $out,$out,-1 621 lvx $inout,0,$out # redundant in aligned case 622 vsel $inout,$outhead,$inout,$outmask 623 stvx $inout,0,$out 624 625 neg $enc,$ivp # write [unaligned] iv 626 li $idx,15 # 15 is not typo 627 vxor $rndkey0,$rndkey0,$rndkey0 628 vspltisb $outmask,-1 629 le?vspltisb $tmp,0x0f 630 ?lvsl $outperm,0,$enc 631 ?vperm $outmask,$rndkey0,$outmask,$outperm 632 le?vxor $outperm,$outperm,$tmp 633 lvx $outhead,0,$ivp 634 vperm $ivec,$ivec,$ivec,$outperm 635 vsel $inout,$outhead,$ivec,$outmask 636 lvx $inptail,$idx,$ivp 637 stvx $inout,0,$ivp 638 vsel $inout,$ivec,$inptail,$outmask 639 stvx $inout,$idx,$ivp 640 641 mtspr 256,$vrsave 642 blr 643 .long 0 644 .byte 0,12,0x14,0,0,0,6,0 645 .long 0 646___ 647######################################################################### 648{{ # Optimized CBC decrypt procedure # 649my $key_="r11"; 650my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 651 $x00=0 if ($flavour =~ /osx/); 652my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 653my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 654my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 655 # v26-v31 last 6 round keys 656my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 657 658$code.=<<___; 659.align 5 660_aesp8_cbc_decrypt8x: 661 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 662 li r10,`$FRAME+8*16+15` 663 li r11,`$FRAME+8*16+31` 664 stvx v20,r10,$sp # ABI says so 665 addi r10,r10,32 666 stvx v21,r11,$sp 667 addi r11,r11,32 668 stvx v22,r10,$sp 669 addi r10,r10,32 670 stvx v23,r11,$sp 671 addi r11,r11,32 672 stvx v24,r10,$sp 673 addi r10,r10,32 674 stvx v25,r11,$sp 675 addi r11,r11,32 676 stvx v26,r10,$sp 677 addi r10,r10,32 678 stvx v27,r11,$sp 679 addi r11,r11,32 680 stvx v28,r10,$sp 681 addi r10,r10,32 682 stvx v29,r11,$sp 683 addi r11,r11,32 684 stvx v30,r10,$sp 685 stvx v31,r11,$sp 686 li r0,-1 687 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 688 li $x10,0x10 689 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 690 li $x20,0x20 691 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 692 li $x30,0x30 693 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 694 li $x40,0x40 695 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 696 li $x50,0x50 697 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 698 li $x60,0x60 699 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 700 li $x70,0x70 701 mtspr 256,r0 702 703 subi $rounds,$rounds,3 # -4 in total 704 subi $len,$len,128 # bias 705 706 lvx $rndkey0,$x00,$key # load key schedule 707 lvx v30,$x10,$key 708 addi $key,$key,0x20 709 lvx v31,$x00,$key 710 ?vperm $rndkey0,$rndkey0,v30,$keyperm 711 addi $key_,$sp,$FRAME+15 712 mtctr $rounds 713 714Load_cbc_dec_key: 715 ?vperm v24,v30,v31,$keyperm 716 lvx v30,$x10,$key 717 addi $key,$key,0x20 718 stvx v24,$x00,$key_ # off-load round[1] 719 ?vperm v25,v31,v30,$keyperm 720 lvx v31,$x00,$key 721 stvx v25,$x10,$key_ # off-load round[2] 722 addi $key_,$key_,0x20 723 bdnz Load_cbc_dec_key 724 725 lvx v26,$x10,$key 726 ?vperm v24,v30,v31,$keyperm 727 lvx v27,$x20,$key 728 stvx v24,$x00,$key_ # off-load round[3] 729 ?vperm v25,v31,v26,$keyperm 730 lvx v28,$x30,$key 731 stvx v25,$x10,$key_ # off-load round[4] 732 addi $key_,$sp,$FRAME+15 # rewind $key_ 733 ?vperm v26,v26,v27,$keyperm 734 lvx v29,$x40,$key 735 ?vperm v27,v27,v28,$keyperm 736 lvx v30,$x50,$key 737 ?vperm v28,v28,v29,$keyperm 738 lvx v31,$x60,$key 739 ?vperm v29,v29,v30,$keyperm 740 lvx $out0,$x70,$key # borrow $out0 741 ?vperm v30,v30,v31,$keyperm 742 lvx v24,$x00,$key_ # pre-load round[1] 743 ?vperm v31,v31,$out0,$keyperm 744 lvx v25,$x10,$key_ # pre-load round[2] 745 746 #lvx $inptail,0,$inp # "caller" already did this 747 #addi $inp,$inp,15 # 15 is not typo 748 subi $inp,$inp,15 # undo "caller" 749 750 le?li $idx,8 751 lvx_u $in0,$x00,$inp # load first 8 "words" 752 le?lvsl $inpperm,0,$idx 753 le?vspltisb $tmp,0x0f 754 lvx_u $in1,$x10,$inp 755 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 756 lvx_u $in2,$x20,$inp 757 le?vperm $in0,$in0,$in0,$inpperm 758 lvx_u $in3,$x30,$inp 759 le?vperm $in1,$in1,$in1,$inpperm 760 lvx_u $in4,$x40,$inp 761 le?vperm $in2,$in2,$in2,$inpperm 762 vxor $out0,$in0,$rndkey0 763 lvx_u $in5,$x50,$inp 764 le?vperm $in3,$in3,$in3,$inpperm 765 vxor $out1,$in1,$rndkey0 766 lvx_u $in6,$x60,$inp 767 le?vperm $in4,$in4,$in4,$inpperm 768 vxor $out2,$in2,$rndkey0 769 lvx_u $in7,$x70,$inp 770 addi $inp,$inp,0x80 771 le?vperm $in5,$in5,$in5,$inpperm 772 vxor $out3,$in3,$rndkey0 773 le?vperm $in6,$in6,$in6,$inpperm 774 vxor $out4,$in4,$rndkey0 775 le?vperm $in7,$in7,$in7,$inpperm 776 vxor $out5,$in5,$rndkey0 777 vxor $out6,$in6,$rndkey0 778 vxor $out7,$in7,$rndkey0 779 780 mtctr $rounds 781 b Loop_cbc_dec8x 782.align 5 783Loop_cbc_dec8x: 784 vncipher $out0,$out0,v24 785 vncipher $out1,$out1,v24 786 vncipher $out2,$out2,v24 787 vncipher $out3,$out3,v24 788 vncipher $out4,$out4,v24 789 vncipher $out5,$out5,v24 790 vncipher $out6,$out6,v24 791 vncipher $out7,$out7,v24 792 lvx v24,$x20,$key_ # round[3] 793 addi $key_,$key_,0x20 794 795 vncipher $out0,$out0,v25 796 vncipher $out1,$out1,v25 797 vncipher $out2,$out2,v25 798 vncipher $out3,$out3,v25 799 vncipher $out4,$out4,v25 800 vncipher $out5,$out5,v25 801 vncipher $out6,$out6,v25 802 vncipher $out7,$out7,v25 803 lvx v25,$x10,$key_ # round[4] 804 bdnz Loop_cbc_dec8x 805 806 subic $len,$len,128 # $len-=128 807 vncipher $out0,$out0,v24 808 vncipher $out1,$out1,v24 809 vncipher $out2,$out2,v24 810 vncipher $out3,$out3,v24 811 vncipher $out4,$out4,v24 812 vncipher $out5,$out5,v24 813 vncipher $out6,$out6,v24 814 vncipher $out7,$out7,v24 815 816 subfe. r0,r0,r0 # borrow?-1:0 817 vncipher $out0,$out0,v25 818 vncipher $out1,$out1,v25 819 vncipher $out2,$out2,v25 820 vncipher $out3,$out3,v25 821 vncipher $out4,$out4,v25 822 vncipher $out5,$out5,v25 823 vncipher $out6,$out6,v25 824 vncipher $out7,$out7,v25 825 826 and r0,r0,$len 827 vncipher $out0,$out0,v26 828 vncipher $out1,$out1,v26 829 vncipher $out2,$out2,v26 830 vncipher $out3,$out3,v26 831 vncipher $out4,$out4,v26 832 vncipher $out5,$out5,v26 833 vncipher $out6,$out6,v26 834 vncipher $out7,$out7,v26 835 836 add $inp,$inp,r0 # $inp is adjusted in such 837 # way that at exit from the 838 # loop inX-in7 are loaded 839 # with last "words" 840 vncipher $out0,$out0,v27 841 vncipher $out1,$out1,v27 842 vncipher $out2,$out2,v27 843 vncipher $out3,$out3,v27 844 vncipher $out4,$out4,v27 845 vncipher $out5,$out5,v27 846 vncipher $out6,$out6,v27 847 vncipher $out7,$out7,v27 848 849 addi $key_,$sp,$FRAME+15 # rewind $key_ 850 vncipher $out0,$out0,v28 851 vncipher $out1,$out1,v28 852 vncipher $out2,$out2,v28 853 vncipher $out3,$out3,v28 854 vncipher $out4,$out4,v28 855 vncipher $out5,$out5,v28 856 vncipher $out6,$out6,v28 857 vncipher $out7,$out7,v28 858 lvx v24,$x00,$key_ # re-pre-load round[1] 859 860 vncipher $out0,$out0,v29 861 vncipher $out1,$out1,v29 862 vncipher $out2,$out2,v29 863 vncipher $out3,$out3,v29 864 vncipher $out4,$out4,v29 865 vncipher $out5,$out5,v29 866 vncipher $out6,$out6,v29 867 vncipher $out7,$out7,v29 868 lvx v25,$x10,$key_ # re-pre-load round[2] 869 870 vncipher $out0,$out0,v30 871 vxor $ivec,$ivec,v31 # xor with last round key 872 vncipher $out1,$out1,v30 873 vxor $in0,$in0,v31 874 vncipher $out2,$out2,v30 875 vxor $in1,$in1,v31 876 vncipher $out3,$out3,v30 877 vxor $in2,$in2,v31 878 vncipher $out4,$out4,v30 879 vxor $in3,$in3,v31 880 vncipher $out5,$out5,v30 881 vxor $in4,$in4,v31 882 vncipher $out6,$out6,v30 883 vxor $in5,$in5,v31 884 vncipher $out7,$out7,v30 885 vxor $in6,$in6,v31 886 887 vncipherlast $out0,$out0,$ivec 888 vncipherlast $out1,$out1,$in0 889 lvx_u $in0,$x00,$inp # load next input block 890 vncipherlast $out2,$out2,$in1 891 lvx_u $in1,$x10,$inp 892 vncipherlast $out3,$out3,$in2 893 le?vperm $in0,$in0,$in0,$inpperm 894 lvx_u $in2,$x20,$inp 895 vncipherlast $out4,$out4,$in3 896 le?vperm $in1,$in1,$in1,$inpperm 897 lvx_u $in3,$x30,$inp 898 vncipherlast $out5,$out5,$in4 899 le?vperm $in2,$in2,$in2,$inpperm 900 lvx_u $in4,$x40,$inp 901 vncipherlast $out6,$out6,$in5 902 le?vperm $in3,$in3,$in3,$inpperm 903 lvx_u $in5,$x50,$inp 904 vncipherlast $out7,$out7,$in6 905 le?vperm $in4,$in4,$in4,$inpperm 906 lvx_u $in6,$x60,$inp 907 vmr $ivec,$in7 908 le?vperm $in5,$in5,$in5,$inpperm 909 lvx_u $in7,$x70,$inp 910 addi $inp,$inp,0x80 911 912 le?vperm $out0,$out0,$out0,$inpperm 913 le?vperm $out1,$out1,$out1,$inpperm 914 stvx_u $out0,$x00,$out 915 le?vperm $in6,$in6,$in6,$inpperm 916 vxor $out0,$in0,$rndkey0 917 le?vperm $out2,$out2,$out2,$inpperm 918 stvx_u $out1,$x10,$out 919 le?vperm $in7,$in7,$in7,$inpperm 920 vxor $out1,$in1,$rndkey0 921 le?vperm $out3,$out3,$out3,$inpperm 922 stvx_u $out2,$x20,$out 923 vxor $out2,$in2,$rndkey0 924 le?vperm $out4,$out4,$out4,$inpperm 925 stvx_u $out3,$x30,$out 926 vxor $out3,$in3,$rndkey0 927 le?vperm $out5,$out5,$out5,$inpperm 928 stvx_u $out4,$x40,$out 929 vxor $out4,$in4,$rndkey0 930 le?vperm $out6,$out6,$out6,$inpperm 931 stvx_u $out5,$x50,$out 932 vxor $out5,$in5,$rndkey0 933 le?vperm $out7,$out7,$out7,$inpperm 934 stvx_u $out6,$x60,$out 935 vxor $out6,$in6,$rndkey0 936 stvx_u $out7,$x70,$out 937 addi $out,$out,0x80 938 vxor $out7,$in7,$rndkey0 939 940 mtctr $rounds 941 beq Loop_cbc_dec8x # did $len-=128 borrow? 942 943 addic. $len,$len,128 944 beq Lcbc_dec8x_done 945 nop 946 nop 947 948Loop_cbc_dec8x_tail: # up to 7 "words" tail... 949 vncipher $out1,$out1,v24 950 vncipher $out2,$out2,v24 951 vncipher $out3,$out3,v24 952 vncipher $out4,$out4,v24 953 vncipher $out5,$out5,v24 954 vncipher $out6,$out6,v24 955 vncipher $out7,$out7,v24 956 lvx v24,$x20,$key_ # round[3] 957 addi $key_,$key_,0x20 958 959 vncipher $out1,$out1,v25 960 vncipher $out2,$out2,v25 961 vncipher $out3,$out3,v25 962 vncipher $out4,$out4,v25 963 vncipher $out5,$out5,v25 964 vncipher $out6,$out6,v25 965 vncipher $out7,$out7,v25 966 lvx v25,$x10,$key_ # round[4] 967 bdnz Loop_cbc_dec8x_tail 968 969 vncipher $out1,$out1,v24 970 vncipher $out2,$out2,v24 971 vncipher $out3,$out3,v24 972 vncipher $out4,$out4,v24 973 vncipher $out5,$out5,v24 974 vncipher $out6,$out6,v24 975 vncipher $out7,$out7,v24 976 977 vncipher $out1,$out1,v25 978 vncipher $out2,$out2,v25 979 vncipher $out3,$out3,v25 980 vncipher $out4,$out4,v25 981 vncipher $out5,$out5,v25 982 vncipher $out6,$out6,v25 983 vncipher $out7,$out7,v25 984 985 vncipher $out1,$out1,v26 986 vncipher $out2,$out2,v26 987 vncipher $out3,$out3,v26 988 vncipher $out4,$out4,v26 989 vncipher $out5,$out5,v26 990 vncipher $out6,$out6,v26 991 vncipher $out7,$out7,v26 992 993 vncipher $out1,$out1,v27 994 vncipher $out2,$out2,v27 995 vncipher $out3,$out3,v27 996 vncipher $out4,$out4,v27 997 vncipher $out5,$out5,v27 998 vncipher $out6,$out6,v27 999 vncipher $out7,$out7,v27 1000 1001 vncipher $out1,$out1,v28 1002 vncipher $out2,$out2,v28 1003 vncipher $out3,$out3,v28 1004 vncipher $out4,$out4,v28 1005 vncipher $out5,$out5,v28 1006 vncipher $out6,$out6,v28 1007 vncipher $out7,$out7,v28 1008 1009 vncipher $out1,$out1,v29 1010 vncipher $out2,$out2,v29 1011 vncipher $out3,$out3,v29 1012 vncipher $out4,$out4,v29 1013 vncipher $out5,$out5,v29 1014 vncipher $out6,$out6,v29 1015 vncipher $out7,$out7,v29 1016 1017 vncipher $out1,$out1,v30 1018 vxor $ivec,$ivec,v31 # last round key 1019 vncipher $out2,$out2,v30 1020 vxor $in1,$in1,v31 1021 vncipher $out3,$out3,v30 1022 vxor $in2,$in2,v31 1023 vncipher $out4,$out4,v30 1024 vxor $in3,$in3,v31 1025 vncipher $out5,$out5,v30 1026 vxor $in4,$in4,v31 1027 vncipher $out6,$out6,v30 1028 vxor $in5,$in5,v31 1029 vncipher $out7,$out7,v30 1030 vxor $in6,$in6,v31 1031 1032 cmplwi $len,32 # switch($len) 1033 blt Lcbc_dec8x_one 1034 nop 1035 beq Lcbc_dec8x_two 1036 cmplwi $len,64 1037 blt Lcbc_dec8x_three 1038 nop 1039 beq Lcbc_dec8x_four 1040 cmplwi $len,96 1041 blt Lcbc_dec8x_five 1042 nop 1043 beq Lcbc_dec8x_six 1044 1045Lcbc_dec8x_seven: 1046 vncipherlast $out1,$out1,$ivec 1047 vncipherlast $out2,$out2,$in1 1048 vncipherlast $out3,$out3,$in2 1049 vncipherlast $out4,$out4,$in3 1050 vncipherlast $out5,$out5,$in4 1051 vncipherlast $out6,$out6,$in5 1052 vncipherlast $out7,$out7,$in6 1053 vmr $ivec,$in7 1054 1055 le?vperm $out1,$out1,$out1,$inpperm 1056 le?vperm $out2,$out2,$out2,$inpperm 1057 stvx_u $out1,$x00,$out 1058 le?vperm $out3,$out3,$out3,$inpperm 1059 stvx_u $out2,$x10,$out 1060 le?vperm $out4,$out4,$out4,$inpperm 1061 stvx_u $out3,$x20,$out 1062 le?vperm $out5,$out5,$out5,$inpperm 1063 stvx_u $out4,$x30,$out 1064 le?vperm $out6,$out6,$out6,$inpperm 1065 stvx_u $out5,$x40,$out 1066 le?vperm $out7,$out7,$out7,$inpperm 1067 stvx_u $out6,$x50,$out 1068 stvx_u $out7,$x60,$out 1069 addi $out,$out,0x70 1070 b Lcbc_dec8x_done 1071 1072.align 5 1073Lcbc_dec8x_six: 1074 vncipherlast $out2,$out2,$ivec 1075 vncipherlast $out3,$out3,$in2 1076 vncipherlast $out4,$out4,$in3 1077 vncipherlast $out5,$out5,$in4 1078 vncipherlast $out6,$out6,$in5 1079 vncipherlast $out7,$out7,$in6 1080 vmr $ivec,$in7 1081 1082 le?vperm $out2,$out2,$out2,$inpperm 1083 le?vperm $out3,$out3,$out3,$inpperm 1084 stvx_u $out2,$x00,$out 1085 le?vperm $out4,$out4,$out4,$inpperm 1086 stvx_u $out3,$x10,$out 1087 le?vperm $out5,$out5,$out5,$inpperm 1088 stvx_u $out4,$x20,$out 1089 le?vperm $out6,$out6,$out6,$inpperm 1090 stvx_u $out5,$x30,$out 1091 le?vperm $out7,$out7,$out7,$inpperm 1092 stvx_u $out6,$x40,$out 1093 stvx_u $out7,$x50,$out 1094 addi $out,$out,0x60 1095 b Lcbc_dec8x_done 1096 1097.align 5 1098Lcbc_dec8x_five: 1099 vncipherlast $out3,$out3,$ivec 1100 vncipherlast $out4,$out4,$in3 1101 vncipherlast $out5,$out5,$in4 1102 vncipherlast $out6,$out6,$in5 1103 vncipherlast $out7,$out7,$in6 1104 vmr $ivec,$in7 1105 1106 le?vperm $out3,$out3,$out3,$inpperm 1107 le?vperm $out4,$out4,$out4,$inpperm 1108 stvx_u $out3,$x00,$out 1109 le?vperm $out5,$out5,$out5,$inpperm 1110 stvx_u $out4,$x10,$out 1111 le?vperm $out6,$out6,$out6,$inpperm 1112 stvx_u $out5,$x20,$out 1113 le?vperm $out7,$out7,$out7,$inpperm 1114 stvx_u $out6,$x30,$out 1115 stvx_u $out7,$x40,$out 1116 addi $out,$out,0x50 1117 b Lcbc_dec8x_done 1118 1119.align 5 1120Lcbc_dec8x_four: 1121 vncipherlast $out4,$out4,$ivec 1122 vncipherlast $out5,$out5,$in4 1123 vncipherlast $out6,$out6,$in5 1124 vncipherlast $out7,$out7,$in6 1125 vmr $ivec,$in7 1126 1127 le?vperm $out4,$out4,$out4,$inpperm 1128 le?vperm $out5,$out5,$out5,$inpperm 1129 stvx_u $out4,$x00,$out 1130 le?vperm $out6,$out6,$out6,$inpperm 1131 stvx_u $out5,$x10,$out 1132 le?vperm $out7,$out7,$out7,$inpperm 1133 stvx_u $out6,$x20,$out 1134 stvx_u $out7,$x30,$out 1135 addi $out,$out,0x40 1136 b Lcbc_dec8x_done 1137 1138.align 5 1139Lcbc_dec8x_three: 1140 vncipherlast $out5,$out5,$ivec 1141 vncipherlast $out6,$out6,$in5 1142 vncipherlast $out7,$out7,$in6 1143 vmr $ivec,$in7 1144 1145 le?vperm $out5,$out5,$out5,$inpperm 1146 le?vperm $out6,$out6,$out6,$inpperm 1147 stvx_u $out5,$x00,$out 1148 le?vperm $out7,$out7,$out7,$inpperm 1149 stvx_u $out6,$x10,$out 1150 stvx_u $out7,$x20,$out 1151 addi $out,$out,0x30 1152 b Lcbc_dec8x_done 1153 1154.align 5 1155Lcbc_dec8x_two: 1156 vncipherlast $out6,$out6,$ivec 1157 vncipherlast $out7,$out7,$in6 1158 vmr $ivec,$in7 1159 1160 le?vperm $out6,$out6,$out6,$inpperm 1161 le?vperm $out7,$out7,$out7,$inpperm 1162 stvx_u $out6,$x00,$out 1163 stvx_u $out7,$x10,$out 1164 addi $out,$out,0x20 1165 b Lcbc_dec8x_done 1166 1167.align 5 1168Lcbc_dec8x_one: 1169 vncipherlast $out7,$out7,$ivec 1170 vmr $ivec,$in7 1171 1172 le?vperm $out7,$out7,$out7,$inpperm 1173 stvx_u $out7,0,$out 1174 addi $out,$out,0x10 1175 1176Lcbc_dec8x_done: 1177 le?vperm $ivec,$ivec,$ivec,$inpperm 1178 stvx_u $ivec,0,$ivp # write [unaligned] iv 1179 1180 li r10,`$FRAME+15` 1181 li r11,`$FRAME+31` 1182 stvx $inpperm,r10,$sp # wipe copies of round keys 1183 addi r10,r10,32 1184 stvx $inpperm,r11,$sp 1185 addi r11,r11,32 1186 stvx $inpperm,r10,$sp 1187 addi r10,r10,32 1188 stvx $inpperm,r11,$sp 1189 addi r11,r11,32 1190 stvx $inpperm,r10,$sp 1191 addi r10,r10,32 1192 stvx $inpperm,r11,$sp 1193 addi r11,r11,32 1194 stvx $inpperm,r10,$sp 1195 addi r10,r10,32 1196 stvx $inpperm,r11,$sp 1197 addi r11,r11,32 1198 1199 mtspr 256,$vrsave 1200 lvx v20,r10,$sp # ABI says so 1201 addi r10,r10,32 1202 lvx v21,r11,$sp 1203 addi r11,r11,32 1204 lvx v22,r10,$sp 1205 addi r10,r10,32 1206 lvx v23,r11,$sp 1207 addi r11,r11,32 1208 lvx v24,r10,$sp 1209 addi r10,r10,32 1210 lvx v25,r11,$sp 1211 addi r11,r11,32 1212 lvx v26,r10,$sp 1213 addi r10,r10,32 1214 lvx v27,r11,$sp 1215 addi r11,r11,32 1216 lvx v28,r10,$sp 1217 addi r10,r10,32 1218 lvx v29,r11,$sp 1219 addi r11,r11,32 1220 lvx v30,r10,$sp 1221 lvx v31,r11,$sp 1222 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1223 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1224 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1225 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1226 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1227 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1228 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1229 blr 1230 .long 0 1231 .byte 0,12,0x04,0,0x80,6,6,0 1232 .long 0 1233.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1234___ 1235}} }}} 1236 1237######################################################################### 1238{{{ # CTR procedure[s] # 1239my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1240my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1241my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1242 map("v$_",(4..11)); 1243my $dat=$tmp; 1244 1245$code.=<<___; 1246.globl .${prefix}_ctr32_encrypt_blocks 1247.align 5 1248.${prefix}_ctr32_encrypt_blocks: 1249 ${UCMP}i $len,1 1250 bltlr- 1251 1252 lis r0,0xfff0 1253 mfspr $vrsave,256 1254 mtspr 256,r0 1255 1256 li $idx,15 1257 vxor $rndkey0,$rndkey0,$rndkey0 1258 le?vspltisb $tmp,0x0f 1259 1260 lvx $ivec,0,$ivp # load [unaligned] iv 1261 lvsl $inpperm,0,$ivp 1262 lvx $inptail,$idx,$ivp 1263 vspltisb $one,1 1264 le?vxor $inpperm,$inpperm,$tmp 1265 vperm $ivec,$ivec,$inptail,$inpperm 1266 vsldoi $one,$rndkey0,$one,1 1267 1268 neg r11,$inp 1269 ?lvsl $keyperm,0,$key # prepare for unaligned key 1270 lwz $rounds,240($key) 1271 1272 lvsr $inpperm,0,r11 # prepare for unaligned load 1273 lvx $inptail,0,$inp 1274 addi $inp,$inp,15 # 15 is not typo 1275 le?vxor $inpperm,$inpperm,$tmp 1276 1277 srwi $rounds,$rounds,1 1278 li $idx,16 1279 subi $rounds,$rounds,1 1280 1281 ${UCMP}i $len,8 1282 bge _aesp8_ctr32_encrypt8x 1283 1284 ?lvsr $outperm,0,$out # prepare for unaligned store 1285 vspltisb $outmask,-1 1286 lvx $outhead,0,$out 1287 ?vperm $outmask,$rndkey0,$outmask,$outperm 1288 le?vxor $outperm,$outperm,$tmp 1289 1290 lvx $rndkey0,0,$key 1291 mtctr $rounds 1292 lvx $rndkey1,$idx,$key 1293 addi $idx,$idx,16 1294 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1295 vxor $inout,$ivec,$rndkey0 1296 lvx $rndkey0,$idx,$key 1297 addi $idx,$idx,16 1298 b Loop_ctr32_enc 1299 1300.align 5 1301Loop_ctr32_enc: 1302 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1303 vcipher $inout,$inout,$rndkey1 1304 lvx $rndkey1,$idx,$key 1305 addi $idx,$idx,16 1306 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1307 vcipher $inout,$inout,$rndkey0 1308 lvx $rndkey0,$idx,$key 1309 addi $idx,$idx,16 1310 bdnz Loop_ctr32_enc 1311 1312 vadduwm $ivec,$ivec,$one 1313 vmr $dat,$inptail 1314 lvx $inptail,0,$inp 1315 addi $inp,$inp,16 1316 subic. $len,$len,1 # blocks-- 1317 1318 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1319 vcipher $inout,$inout,$rndkey1 1320 lvx $rndkey1,$idx,$key 1321 vperm $dat,$dat,$inptail,$inpperm 1322 li $idx,16 1323 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1324 lvx $rndkey0,0,$key 1325 vxor $dat,$dat,$rndkey1 # last round key 1326 vcipherlast $inout,$inout,$dat 1327 1328 lvx $rndkey1,$idx,$key 1329 addi $idx,$idx,16 1330 vperm $inout,$inout,$inout,$outperm 1331 vsel $dat,$outhead,$inout,$outmask 1332 mtctr $rounds 1333 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1334 vmr $outhead,$inout 1335 vxor $inout,$ivec,$rndkey0 1336 lvx $rndkey0,$idx,$key 1337 addi $idx,$idx,16 1338 stvx $dat,0,$out 1339 addi $out,$out,16 1340 bne Loop_ctr32_enc 1341 1342 addi $out,$out,-1 1343 lvx $inout,0,$out # redundant in aligned case 1344 vsel $inout,$outhead,$inout,$outmask 1345 stvx $inout,0,$out 1346 1347 mtspr 256,$vrsave 1348 blr 1349 .long 0 1350 .byte 0,12,0x14,0,0,0,6,0 1351 .long 0 1352___ 1353######################################################################### 1354{{ # Optimized CTR procedure # 1355my $key_="r11"; 1356my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1357 $x00=0 if ($flavour =~ /osx/); 1358my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1359my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1360my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1361 # v26-v31 last 6 round keys 1362my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1363my ($two,$three,$four)=($outhead,$outperm,$outmask); 1364 1365$code.=<<___; 1366.align 5 1367_aesp8_ctr32_encrypt8x: 1368 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1369 li r10,`$FRAME+8*16+15` 1370 li r11,`$FRAME+8*16+31` 1371 stvx v20,r10,$sp # ABI says so 1372 addi r10,r10,32 1373 stvx v21,r11,$sp 1374 addi r11,r11,32 1375 stvx v22,r10,$sp 1376 addi r10,r10,32 1377 stvx v23,r11,$sp 1378 addi r11,r11,32 1379 stvx v24,r10,$sp 1380 addi r10,r10,32 1381 stvx v25,r11,$sp 1382 addi r11,r11,32 1383 stvx v26,r10,$sp 1384 addi r10,r10,32 1385 stvx v27,r11,$sp 1386 addi r11,r11,32 1387 stvx v28,r10,$sp 1388 addi r10,r10,32 1389 stvx v29,r11,$sp 1390 addi r11,r11,32 1391 stvx v30,r10,$sp 1392 stvx v31,r11,$sp 1393 li r0,-1 1394 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1395 li $x10,0x10 1396 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1397 li $x20,0x20 1398 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1399 li $x30,0x30 1400 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1401 li $x40,0x40 1402 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1403 li $x50,0x50 1404 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1405 li $x60,0x60 1406 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1407 li $x70,0x70 1408 mtspr 256,r0 1409 1410 subi $rounds,$rounds,3 # -4 in total 1411 1412 lvx $rndkey0,$x00,$key # load key schedule 1413 lvx v30,$x10,$key 1414 addi $key,$key,0x20 1415 lvx v31,$x00,$key 1416 ?vperm $rndkey0,$rndkey0,v30,$keyperm 1417 addi $key_,$sp,$FRAME+15 1418 mtctr $rounds 1419 1420Load_ctr32_enc_key: 1421 ?vperm v24,v30,v31,$keyperm 1422 lvx v30,$x10,$key 1423 addi $key,$key,0x20 1424 stvx v24,$x00,$key_ # off-load round[1] 1425 ?vperm v25,v31,v30,$keyperm 1426 lvx v31,$x00,$key 1427 stvx v25,$x10,$key_ # off-load round[2] 1428 addi $key_,$key_,0x20 1429 bdnz Load_ctr32_enc_key 1430 1431 lvx v26,$x10,$key 1432 ?vperm v24,v30,v31,$keyperm 1433 lvx v27,$x20,$key 1434 stvx v24,$x00,$key_ # off-load round[3] 1435 ?vperm v25,v31,v26,$keyperm 1436 lvx v28,$x30,$key 1437 stvx v25,$x10,$key_ # off-load round[4] 1438 addi $key_,$sp,$FRAME+15 # rewind $key_ 1439 ?vperm v26,v26,v27,$keyperm 1440 lvx v29,$x40,$key 1441 ?vperm v27,v27,v28,$keyperm 1442 lvx v30,$x50,$key 1443 ?vperm v28,v28,v29,$keyperm 1444 lvx v31,$x60,$key 1445 ?vperm v29,v29,v30,$keyperm 1446 lvx $out0,$x70,$key # borrow $out0 1447 ?vperm v30,v30,v31,$keyperm 1448 lvx v24,$x00,$key_ # pre-load round[1] 1449 ?vperm v31,v31,$out0,$keyperm 1450 lvx v25,$x10,$key_ # pre-load round[2] 1451 1452 vadduwm $two,$one,$one 1453 subi $inp,$inp,15 # undo "caller" 1454 $SHL $len,$len,4 1455 1456 vadduwm $out1,$ivec,$one # counter values ... 1457 vadduwm $out2,$ivec,$two 1458 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1459 le?li $idx,8 1460 vadduwm $out3,$out1,$two 1461 vxor $out1,$out1,$rndkey0 1462 le?lvsl $inpperm,0,$idx 1463 vadduwm $out4,$out2,$two 1464 vxor $out2,$out2,$rndkey0 1465 le?vspltisb $tmp,0x0f 1466 vadduwm $out5,$out3,$two 1467 vxor $out3,$out3,$rndkey0 1468 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1469 vadduwm $out6,$out4,$two 1470 vxor $out4,$out4,$rndkey0 1471 vadduwm $out7,$out5,$two 1472 vxor $out5,$out5,$rndkey0 1473 vadduwm $ivec,$out6,$two # next counter value 1474 vxor $out6,$out6,$rndkey0 1475 vxor $out7,$out7,$rndkey0 1476 1477 mtctr $rounds 1478 b Loop_ctr32_enc8x 1479.align 5 1480Loop_ctr32_enc8x: 1481 vcipher $out0,$out0,v24 1482 vcipher $out1,$out1,v24 1483 vcipher $out2,$out2,v24 1484 vcipher $out3,$out3,v24 1485 vcipher $out4,$out4,v24 1486 vcipher $out5,$out5,v24 1487 vcipher $out6,$out6,v24 1488 vcipher $out7,$out7,v24 1489Loop_ctr32_enc8x_middle: 1490 lvx v24,$x20,$key_ # round[3] 1491 addi $key_,$key_,0x20 1492 1493 vcipher $out0,$out0,v25 1494 vcipher $out1,$out1,v25 1495 vcipher $out2,$out2,v25 1496 vcipher $out3,$out3,v25 1497 vcipher $out4,$out4,v25 1498 vcipher $out5,$out5,v25 1499 vcipher $out6,$out6,v25 1500 vcipher $out7,$out7,v25 1501 lvx v25,$x10,$key_ # round[4] 1502 bdnz Loop_ctr32_enc8x 1503 1504 subic r11,$len,256 # $len-256, borrow $key_ 1505 vcipher $out0,$out0,v24 1506 vcipher $out1,$out1,v24 1507 vcipher $out2,$out2,v24 1508 vcipher $out3,$out3,v24 1509 vcipher $out4,$out4,v24 1510 vcipher $out5,$out5,v24 1511 vcipher $out6,$out6,v24 1512 vcipher $out7,$out7,v24 1513 1514 subfe r0,r0,r0 # borrow?-1:0 1515 vcipher $out0,$out0,v25 1516 vcipher $out1,$out1,v25 1517 vcipher $out2,$out2,v25 1518 vcipher $out3,$out3,v25 1519 vcipher $out4,$out4,v25 1520 vcipher $out5,$out5,v25 1521 vcipher $out6,$out6,v25 1522 vcipher $out7,$out7,v25 1523 1524 and r0,r0,r11 1525 addi $key_,$sp,$FRAME+15 # rewind $key_ 1526 vcipher $out0,$out0,v26 1527 vcipher $out1,$out1,v26 1528 vcipher $out2,$out2,v26 1529 vcipher $out3,$out3,v26 1530 vcipher $out4,$out4,v26 1531 vcipher $out5,$out5,v26 1532 vcipher $out6,$out6,v26 1533 vcipher $out7,$out7,v26 1534 lvx v24,$x00,$key_ # re-pre-load round[1] 1535 1536 subic $len,$len,129 # $len-=129 1537 vcipher $out0,$out0,v27 1538 addi $len,$len,1 # $len-=128 really 1539 vcipher $out1,$out1,v27 1540 vcipher $out2,$out2,v27 1541 vcipher $out3,$out3,v27 1542 vcipher $out4,$out4,v27 1543 vcipher $out5,$out5,v27 1544 vcipher $out6,$out6,v27 1545 vcipher $out7,$out7,v27 1546 lvx v25,$x10,$key_ # re-pre-load round[2] 1547 1548 vcipher $out0,$out0,v28 1549 lvx_u $in0,$x00,$inp # load input 1550 vcipher $out1,$out1,v28 1551 lvx_u $in1,$x10,$inp 1552 vcipher $out2,$out2,v28 1553 lvx_u $in2,$x20,$inp 1554 vcipher $out3,$out3,v28 1555 lvx_u $in3,$x30,$inp 1556 vcipher $out4,$out4,v28 1557 lvx_u $in4,$x40,$inp 1558 vcipher $out5,$out5,v28 1559 lvx_u $in5,$x50,$inp 1560 vcipher $out6,$out6,v28 1561 lvx_u $in6,$x60,$inp 1562 vcipher $out7,$out7,v28 1563 lvx_u $in7,$x70,$inp 1564 addi $inp,$inp,0x80 1565 1566 vcipher $out0,$out0,v29 1567 le?vperm $in0,$in0,$in0,$inpperm 1568 vcipher $out1,$out1,v29 1569 le?vperm $in1,$in1,$in1,$inpperm 1570 vcipher $out2,$out2,v29 1571 le?vperm $in2,$in2,$in2,$inpperm 1572 vcipher $out3,$out3,v29 1573 le?vperm $in3,$in3,$in3,$inpperm 1574 vcipher $out4,$out4,v29 1575 le?vperm $in4,$in4,$in4,$inpperm 1576 vcipher $out5,$out5,v29 1577 le?vperm $in5,$in5,$in5,$inpperm 1578 vcipher $out6,$out6,v29 1579 le?vperm $in6,$in6,$in6,$inpperm 1580 vcipher $out7,$out7,v29 1581 le?vperm $in7,$in7,$in7,$inpperm 1582 1583 add $inp,$inp,r0 # $inp is adjusted in such 1584 # way that at exit from the 1585 # loop inX-in7 are loaded 1586 # with last "words" 1587 subfe. r0,r0,r0 # borrow?-1:0 1588 vcipher $out0,$out0,v30 1589 vxor $in0,$in0,v31 # xor with last round key 1590 vcipher $out1,$out1,v30 1591 vxor $in1,$in1,v31 1592 vcipher $out2,$out2,v30 1593 vxor $in2,$in2,v31 1594 vcipher $out3,$out3,v30 1595 vxor $in3,$in3,v31 1596 vcipher $out4,$out4,v30 1597 vxor $in4,$in4,v31 1598 vcipher $out5,$out5,v30 1599 vxor $in5,$in5,v31 1600 vcipher $out6,$out6,v30 1601 vxor $in6,$in6,v31 1602 vcipher $out7,$out7,v30 1603 vxor $in7,$in7,v31 1604 1605 bne Lctr32_enc8x_break # did $len-129 borrow? 1606 1607 vcipherlast $in0,$out0,$in0 1608 vcipherlast $in1,$out1,$in1 1609 vadduwm $out1,$ivec,$one # counter values ... 1610 vcipherlast $in2,$out2,$in2 1611 vadduwm $out2,$ivec,$two 1612 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1613 vcipherlast $in3,$out3,$in3 1614 vadduwm $out3,$out1,$two 1615 vxor $out1,$out1,$rndkey0 1616 vcipherlast $in4,$out4,$in4 1617 vadduwm $out4,$out2,$two 1618 vxor $out2,$out2,$rndkey0 1619 vcipherlast $in5,$out5,$in5 1620 vadduwm $out5,$out3,$two 1621 vxor $out3,$out3,$rndkey0 1622 vcipherlast $in6,$out6,$in6 1623 vadduwm $out6,$out4,$two 1624 vxor $out4,$out4,$rndkey0 1625 vcipherlast $in7,$out7,$in7 1626 vadduwm $out7,$out5,$two 1627 vxor $out5,$out5,$rndkey0 1628 le?vperm $in0,$in0,$in0,$inpperm 1629 vadduwm $ivec,$out6,$two # next counter value 1630 vxor $out6,$out6,$rndkey0 1631 le?vperm $in1,$in1,$in1,$inpperm 1632 vxor $out7,$out7,$rndkey0 1633 mtctr $rounds 1634 1635 vcipher $out0,$out0,v24 1636 stvx_u $in0,$x00,$out 1637 le?vperm $in2,$in2,$in2,$inpperm 1638 vcipher $out1,$out1,v24 1639 stvx_u $in1,$x10,$out 1640 le?vperm $in3,$in3,$in3,$inpperm 1641 vcipher $out2,$out2,v24 1642 stvx_u $in2,$x20,$out 1643 le?vperm $in4,$in4,$in4,$inpperm 1644 vcipher $out3,$out3,v24 1645 stvx_u $in3,$x30,$out 1646 le?vperm $in5,$in5,$in5,$inpperm 1647 vcipher $out4,$out4,v24 1648 stvx_u $in4,$x40,$out 1649 le?vperm $in6,$in6,$in6,$inpperm 1650 vcipher $out5,$out5,v24 1651 stvx_u $in5,$x50,$out 1652 le?vperm $in7,$in7,$in7,$inpperm 1653 vcipher $out6,$out6,v24 1654 stvx_u $in6,$x60,$out 1655 vcipher $out7,$out7,v24 1656 stvx_u $in7,$x70,$out 1657 addi $out,$out,0x80 1658 1659 b Loop_ctr32_enc8x_middle 1660 1661.align 5 1662Lctr32_enc8x_break: 1663 cmpwi $len,-0x60 1664 blt Lctr32_enc8x_one 1665 nop 1666 beq Lctr32_enc8x_two 1667 cmpwi $len,-0x40 1668 blt Lctr32_enc8x_three 1669 nop 1670 beq Lctr32_enc8x_four 1671 cmpwi $len,-0x20 1672 blt Lctr32_enc8x_five 1673 nop 1674 beq Lctr32_enc8x_six 1675 cmpwi $len,0x00 1676 blt Lctr32_enc8x_seven 1677 1678Lctr32_enc8x_eight: 1679 vcipherlast $out0,$out0,$in0 1680 vcipherlast $out1,$out1,$in1 1681 vcipherlast $out2,$out2,$in2 1682 vcipherlast $out3,$out3,$in3 1683 vcipherlast $out4,$out4,$in4 1684 vcipherlast $out5,$out5,$in5 1685 vcipherlast $out6,$out6,$in6 1686 vcipherlast $out7,$out7,$in7 1687 1688 le?vperm $out0,$out0,$out0,$inpperm 1689 le?vperm $out1,$out1,$out1,$inpperm 1690 stvx_u $out0,$x00,$out 1691 le?vperm $out2,$out2,$out2,$inpperm 1692 stvx_u $out1,$x10,$out 1693 le?vperm $out3,$out3,$out3,$inpperm 1694 stvx_u $out2,$x20,$out 1695 le?vperm $out4,$out4,$out4,$inpperm 1696 stvx_u $out3,$x30,$out 1697 le?vperm $out5,$out5,$out5,$inpperm 1698 stvx_u $out4,$x40,$out 1699 le?vperm $out6,$out6,$out6,$inpperm 1700 stvx_u $out5,$x50,$out 1701 le?vperm $out7,$out7,$out7,$inpperm 1702 stvx_u $out6,$x60,$out 1703 stvx_u $out7,$x70,$out 1704 addi $out,$out,0x80 1705 b Lctr32_enc8x_done 1706 1707.align 5 1708Lctr32_enc8x_seven: 1709 vcipherlast $out0,$out0,$in1 1710 vcipherlast $out1,$out1,$in2 1711 vcipherlast $out2,$out2,$in3 1712 vcipherlast $out3,$out3,$in4 1713 vcipherlast $out4,$out4,$in5 1714 vcipherlast $out5,$out5,$in6 1715 vcipherlast $out6,$out6,$in7 1716 1717 le?vperm $out0,$out0,$out0,$inpperm 1718 le?vperm $out1,$out1,$out1,$inpperm 1719 stvx_u $out0,$x00,$out 1720 le?vperm $out2,$out2,$out2,$inpperm 1721 stvx_u $out1,$x10,$out 1722 le?vperm $out3,$out3,$out3,$inpperm 1723 stvx_u $out2,$x20,$out 1724 le?vperm $out4,$out4,$out4,$inpperm 1725 stvx_u $out3,$x30,$out 1726 le?vperm $out5,$out5,$out5,$inpperm 1727 stvx_u $out4,$x40,$out 1728 le?vperm $out6,$out6,$out6,$inpperm 1729 stvx_u $out5,$x50,$out 1730 stvx_u $out6,$x60,$out 1731 addi $out,$out,0x70 1732 b Lctr32_enc8x_done 1733 1734.align 5 1735Lctr32_enc8x_six: 1736 vcipherlast $out0,$out0,$in2 1737 vcipherlast $out1,$out1,$in3 1738 vcipherlast $out2,$out2,$in4 1739 vcipherlast $out3,$out3,$in5 1740 vcipherlast $out4,$out4,$in6 1741 vcipherlast $out5,$out5,$in7 1742 1743 le?vperm $out0,$out0,$out0,$inpperm 1744 le?vperm $out1,$out1,$out1,$inpperm 1745 stvx_u $out0,$x00,$out 1746 le?vperm $out2,$out2,$out2,$inpperm 1747 stvx_u $out1,$x10,$out 1748 le?vperm $out3,$out3,$out3,$inpperm 1749 stvx_u $out2,$x20,$out 1750 le?vperm $out4,$out4,$out4,$inpperm 1751 stvx_u $out3,$x30,$out 1752 le?vperm $out5,$out5,$out5,$inpperm 1753 stvx_u $out4,$x40,$out 1754 stvx_u $out5,$x50,$out 1755 addi $out,$out,0x60 1756 b Lctr32_enc8x_done 1757 1758.align 5 1759Lctr32_enc8x_five: 1760 vcipherlast $out0,$out0,$in3 1761 vcipherlast $out1,$out1,$in4 1762 vcipherlast $out2,$out2,$in5 1763 vcipherlast $out3,$out3,$in6 1764 vcipherlast $out4,$out4,$in7 1765 1766 le?vperm $out0,$out0,$out0,$inpperm 1767 le?vperm $out1,$out1,$out1,$inpperm 1768 stvx_u $out0,$x00,$out 1769 le?vperm $out2,$out2,$out2,$inpperm 1770 stvx_u $out1,$x10,$out 1771 le?vperm $out3,$out3,$out3,$inpperm 1772 stvx_u $out2,$x20,$out 1773 le?vperm $out4,$out4,$out4,$inpperm 1774 stvx_u $out3,$x30,$out 1775 stvx_u $out4,$x40,$out 1776 addi $out,$out,0x50 1777 b Lctr32_enc8x_done 1778 1779.align 5 1780Lctr32_enc8x_four: 1781 vcipherlast $out0,$out0,$in4 1782 vcipherlast $out1,$out1,$in5 1783 vcipherlast $out2,$out2,$in6 1784 vcipherlast $out3,$out3,$in7 1785 1786 le?vperm $out0,$out0,$out0,$inpperm 1787 le?vperm $out1,$out1,$out1,$inpperm 1788 stvx_u $out0,$x00,$out 1789 le?vperm $out2,$out2,$out2,$inpperm 1790 stvx_u $out1,$x10,$out 1791 le?vperm $out3,$out3,$out3,$inpperm 1792 stvx_u $out2,$x20,$out 1793 stvx_u $out3,$x30,$out 1794 addi $out,$out,0x40 1795 b Lctr32_enc8x_done 1796 1797.align 5 1798Lctr32_enc8x_three: 1799 vcipherlast $out0,$out0,$in5 1800 vcipherlast $out1,$out1,$in6 1801 vcipherlast $out2,$out2,$in7 1802 1803 le?vperm $out0,$out0,$out0,$inpperm 1804 le?vperm $out1,$out1,$out1,$inpperm 1805 stvx_u $out0,$x00,$out 1806 le?vperm $out2,$out2,$out2,$inpperm 1807 stvx_u $out1,$x10,$out 1808 stvx_u $out2,$x20,$out 1809 addi $out,$out,0x30 1810 b Lcbc_dec8x_done 1811 1812.align 5 1813Lctr32_enc8x_two: 1814 vcipherlast $out0,$out0,$in6 1815 vcipherlast $out1,$out1,$in7 1816 1817 le?vperm $out0,$out0,$out0,$inpperm 1818 le?vperm $out1,$out1,$out1,$inpperm 1819 stvx_u $out0,$x00,$out 1820 stvx_u $out1,$x10,$out 1821 addi $out,$out,0x20 1822 b Lcbc_dec8x_done 1823 1824.align 5 1825Lctr32_enc8x_one: 1826 vcipherlast $out0,$out0,$in7 1827 1828 le?vperm $out0,$out0,$out0,$inpperm 1829 stvx_u $out0,0,$out 1830 addi $out,$out,0x10 1831 1832Lctr32_enc8x_done: 1833 li r10,`$FRAME+15` 1834 li r11,`$FRAME+31` 1835 stvx $inpperm,r10,$sp # wipe copies of round keys 1836 addi r10,r10,32 1837 stvx $inpperm,r11,$sp 1838 addi r11,r11,32 1839 stvx $inpperm,r10,$sp 1840 addi r10,r10,32 1841 stvx $inpperm,r11,$sp 1842 addi r11,r11,32 1843 stvx $inpperm,r10,$sp 1844 addi r10,r10,32 1845 stvx $inpperm,r11,$sp 1846 addi r11,r11,32 1847 stvx $inpperm,r10,$sp 1848 addi r10,r10,32 1849 stvx $inpperm,r11,$sp 1850 addi r11,r11,32 1851 1852 mtspr 256,$vrsave 1853 lvx v20,r10,$sp # ABI says so 1854 addi r10,r10,32 1855 lvx v21,r11,$sp 1856 addi r11,r11,32 1857 lvx v22,r10,$sp 1858 addi r10,r10,32 1859 lvx v23,r11,$sp 1860 addi r11,r11,32 1861 lvx v24,r10,$sp 1862 addi r10,r10,32 1863 lvx v25,r11,$sp 1864 addi r11,r11,32 1865 lvx v26,r10,$sp 1866 addi r10,r10,32 1867 lvx v27,r11,$sp 1868 addi r11,r11,32 1869 lvx v28,r10,$sp 1870 addi r10,r10,32 1871 lvx v29,r11,$sp 1872 addi r11,r11,32 1873 lvx v30,r10,$sp 1874 lvx v31,r11,$sp 1875 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1876 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1877 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1878 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1879 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1880 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1881 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1882 blr 1883 .long 0 1884 .byte 0,12,0x04,0,0x80,6,6,0 1885 .long 0 1886.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1887___ 1888}} }}} 1889 1890my $consts=1; 1891foreach(split("\n",$code)) { 1892 s/\`([^\`]*)\`/eval($1)/geo; 1893 1894 # constants table endian-specific conversion 1895 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 1896 my $conv=$3; 1897 my @bytes=(); 1898 1899 # convert to endian-agnostic format 1900 if ($1 eq "long") { 1901 foreach (split(/,\s*/,$2)) { 1902 my $l = /^0/?oct:int; 1903 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 1904 } 1905 } else { 1906 @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 1907 } 1908 1909 # little-endian conversion 1910 if ($flavour =~ /le$/o) { 1911 SWITCH: for($conv) { 1912 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 1913 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 1914 } 1915 } 1916 1917 #emit 1918 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 1919 next; 1920 } 1921 $consts=0 if (m/Lconsts:/o); # end of table 1922 1923 # instructions prefixed with '?' are endian-specific and need 1924 # to be adjusted accordingly... 1925 if ($flavour =~ /le$/o) { # little-endian 1926 s/le\?//o or 1927 s/be\?/#be#/o or 1928 s/\?lvsr/lvsl/o or 1929 s/\?lvsl/lvsr/o or 1930 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 1931 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 1932 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 1933 } else { # big-endian 1934 s/le\?/#le#/o or 1935 s/be\?//o or 1936 s/\?([a-z]+)/$1/o; 1937 } 1938 1939 print $_,"\n"; 1940} 1941 1942close STDOUT; 1943