1#! /usr/bin/env perl 2# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16# 17# This module implements support for AES instructions as per PowerISA 18# specification version 2.07, first implemented by POWER8 processor. 19# The module is endian-agnostic in sense that it supports both big- 20# and little-endian cases. Data alignment in parallelizable modes is 21# handled with VSX loads and stores, which implies MSR.VSX flag being 22# set. It should also be noted that ISA specification doesn't prohibit 23# alignment exceptions for these instructions on page boundaries. 24# Initially alignment was handled in pure AltiVec/VMX way [when data 25# is aligned programmatically, which in turn guarantees exception- 26# free execution], but it turned to hamper performance when vcipher 27# instructions are interleaved. It's reckoned that eventual 28# misalignment penalties at page boundaries are in average lower 29# than additional overhead in pure AltiVec approach. 30# 31# May 2016 32# 33# Add XTS subroutine, 9x on little- and 12x improvement on big-endian 34# systems were measured. 35# 36###################################################################### 37# Current large-block performance in cycles per byte processed with 38# 128-bit key (less is better). 39# 40# CBC en-/decrypt CTR XTS 41# POWER8[le] 3.96/0.72 0.74 1.1 42# POWER8[be] 3.75/0.65 0.66 1.0 43# POWER9[le] 4.02/0.86 0.84 1.05 44# POWER9[be] 3.99/0.78 0.79 0.97 45 46# $output is the last argument if it looks like a file (it has an extension) 47# $flavour is the first argument if it doesn't look like a file 48$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 49$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 50 51if ($flavour =~ /64/) { 52 $SIZE_T =8; 53 $LRSAVE =2*$SIZE_T; 54 $STU ="stdu"; 55 $POP ="ld"; 56 $PUSH ="std"; 57 $UCMP ="cmpld"; 58 $SHL ="sldi"; 59} elsif ($flavour =~ /32/) { 60 $SIZE_T =4; 61 $LRSAVE =$SIZE_T; 62 $STU ="stwu"; 63 $POP ="lwz"; 64 $PUSH ="stw"; 65 $UCMP ="cmplw"; 66 $SHL ="slwi"; 67} else { die "nonsense $flavour"; } 68 69$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 70 71$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 72( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 73( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 74die "can't locate ppc-xlate.pl"; 75 76open STDOUT,"| $^X $xlate $flavour \"$output\"" 77 or die "can't call $xlate: $!"; 78 79$FRAME=8*$SIZE_T; 80$prefix="aes_p8"; 81 82$sp="r1"; 83$vrsave="r12"; 84 85######################################################################### 86{{{ # Key setup procedures # 87my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 88my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 89my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 90 91$code.=<<___; 92.machine "any" 93 94.text 95 96.align 7 97rcon: 98.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 99.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 100.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 101.long 0,0,0,0 ?asis 102Lconsts: 103 mflr r0 104 bcl 20,31,\$+4 105 mflr $ptr #vvvvv "distance between . and rcon 106 addi $ptr,$ptr,-0x48 107 mtlr r0 108 blr 109 .long 0 110 .byte 0,12,0x14,0,0,0,0,0 111.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 112 113.globl .${prefix}_set_encrypt_key 114.align 5 115.${prefix}_set_encrypt_key: 116Lset_encrypt_key: 117 mflr r11 118 $PUSH r11,$LRSAVE($sp) 119 120 li $ptr,-1 121 ${UCMP}i $inp,0 122 beq- Lenc_key_abort # if ($inp==0) return -1; 123 ${UCMP}i $out,0 124 beq- Lenc_key_abort # if ($out==0) return -1; 125 li $ptr,-2 126 cmpwi $bits,128 127 blt- Lenc_key_abort 128 cmpwi $bits,256 129 bgt- Lenc_key_abort 130 andi. r0,$bits,0x3f 131 bne- Lenc_key_abort 132 133 lis r0,0xfff0 134 mfspr $vrsave,256 135 mtspr 256,r0 136 137 bl Lconsts 138 mtlr r11 139 140 neg r9,$inp 141 lvx $in0,0,$inp 142 addi $inp,$inp,15 # 15 is not typo 143 lvsr $key,0,r9 # borrow $key 144 li r8,0x20 145 cmpwi $bits,192 146 lvx $in1,0,$inp 147 le?vspltisb $mask,0x0f # borrow $mask 148 lvx $rcon,0,$ptr 149 le?vxor $key,$key,$mask # adjust for byte swap 150 lvx $mask,r8,$ptr 151 addi $ptr,$ptr,0x10 152 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 153 li $cnt,8 154 vxor $zero,$zero,$zero 155 mtctr $cnt 156 157 ?lvsr $outperm,0,$out 158 vspltisb $outmask,-1 159 lvx $outhead,0,$out 160 ?vperm $outmask,$zero,$outmask,$outperm 161 162 blt Loop128 163 addi $inp,$inp,8 164 beq L192 165 addi $inp,$inp,8 166 b L256 167 168.align 4 169Loop128: 170 vperm $key,$in0,$in0,$mask # rotate-n-splat 171 vsldoi $tmp,$zero,$in0,12 # >>32 172 vperm $outtail,$in0,$in0,$outperm # rotate 173 vsel $stage,$outhead,$outtail,$outmask 174 vmr $outhead,$outtail 175 vcipherlast $key,$key,$rcon 176 stvx $stage,0,$out 177 addi $out,$out,16 178 179 vxor $in0,$in0,$tmp 180 vsldoi $tmp,$zero,$tmp,12 # >>32 181 vxor $in0,$in0,$tmp 182 vsldoi $tmp,$zero,$tmp,12 # >>32 183 vxor $in0,$in0,$tmp 184 vadduwm $rcon,$rcon,$rcon 185 vxor $in0,$in0,$key 186 bdnz Loop128 187 188 lvx $rcon,0,$ptr # last two round keys 189 190 vperm $key,$in0,$in0,$mask # rotate-n-splat 191 vsldoi $tmp,$zero,$in0,12 # >>32 192 vperm $outtail,$in0,$in0,$outperm # rotate 193 vsel $stage,$outhead,$outtail,$outmask 194 vmr $outhead,$outtail 195 vcipherlast $key,$key,$rcon 196 stvx $stage,0,$out 197 addi $out,$out,16 198 199 vxor $in0,$in0,$tmp 200 vsldoi $tmp,$zero,$tmp,12 # >>32 201 vxor $in0,$in0,$tmp 202 vsldoi $tmp,$zero,$tmp,12 # >>32 203 vxor $in0,$in0,$tmp 204 vadduwm $rcon,$rcon,$rcon 205 vxor $in0,$in0,$key 206 207 vperm $key,$in0,$in0,$mask # rotate-n-splat 208 vsldoi $tmp,$zero,$in0,12 # >>32 209 vperm $outtail,$in0,$in0,$outperm # rotate 210 vsel $stage,$outhead,$outtail,$outmask 211 vmr $outhead,$outtail 212 vcipherlast $key,$key,$rcon 213 stvx $stage,0,$out 214 addi $out,$out,16 215 216 vxor $in0,$in0,$tmp 217 vsldoi $tmp,$zero,$tmp,12 # >>32 218 vxor $in0,$in0,$tmp 219 vsldoi $tmp,$zero,$tmp,12 # >>32 220 vxor $in0,$in0,$tmp 221 vxor $in0,$in0,$key 222 vperm $outtail,$in0,$in0,$outperm # rotate 223 vsel $stage,$outhead,$outtail,$outmask 224 vmr $outhead,$outtail 225 stvx $stage,0,$out 226 227 addi $inp,$out,15 # 15 is not typo 228 addi $out,$out,0x50 229 230 li $rounds,10 231 b Ldone 232 233.align 4 234L192: 235 lvx $tmp,0,$inp 236 li $cnt,4 237 vperm $outtail,$in0,$in0,$outperm # rotate 238 vsel $stage,$outhead,$outtail,$outmask 239 vmr $outhead,$outtail 240 stvx $stage,0,$out 241 addi $out,$out,16 242 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 243 vspltisb $key,8 # borrow $key 244 mtctr $cnt 245 vsububm $mask,$mask,$key # adjust the mask 246 247Loop192: 248 vperm $key,$in1,$in1,$mask # roate-n-splat 249 vsldoi $tmp,$zero,$in0,12 # >>32 250 vcipherlast $key,$key,$rcon 251 252 vxor $in0,$in0,$tmp 253 vsldoi $tmp,$zero,$tmp,12 # >>32 254 vxor $in0,$in0,$tmp 255 vsldoi $tmp,$zero,$tmp,12 # >>32 256 vxor $in0,$in0,$tmp 257 258 vsldoi $stage,$zero,$in1,8 259 vspltw $tmp,$in0,3 260 vxor $tmp,$tmp,$in1 261 vsldoi $in1,$zero,$in1,12 # >>32 262 vadduwm $rcon,$rcon,$rcon 263 vxor $in1,$in1,$tmp 264 vxor $in0,$in0,$key 265 vxor $in1,$in1,$key 266 vsldoi $stage,$stage,$in0,8 267 268 vperm $key,$in1,$in1,$mask # rotate-n-splat 269 vsldoi $tmp,$zero,$in0,12 # >>32 270 vperm $outtail,$stage,$stage,$outperm # rotate 271 vsel $stage,$outhead,$outtail,$outmask 272 vmr $outhead,$outtail 273 vcipherlast $key,$key,$rcon 274 stvx $stage,0,$out 275 addi $out,$out,16 276 277 vsldoi $stage,$in0,$in1,8 278 vxor $in0,$in0,$tmp 279 vsldoi $tmp,$zero,$tmp,12 # >>32 280 vperm $outtail,$stage,$stage,$outperm # rotate 281 vsel $stage,$outhead,$outtail,$outmask 282 vmr $outhead,$outtail 283 vxor $in0,$in0,$tmp 284 vsldoi $tmp,$zero,$tmp,12 # >>32 285 vxor $in0,$in0,$tmp 286 stvx $stage,0,$out 287 addi $out,$out,16 288 289 vspltw $tmp,$in0,3 290 vxor $tmp,$tmp,$in1 291 vsldoi $in1,$zero,$in1,12 # >>32 292 vadduwm $rcon,$rcon,$rcon 293 vxor $in1,$in1,$tmp 294 vxor $in0,$in0,$key 295 vxor $in1,$in1,$key 296 vperm $outtail,$in0,$in0,$outperm # rotate 297 vsel $stage,$outhead,$outtail,$outmask 298 vmr $outhead,$outtail 299 stvx $stage,0,$out 300 addi $inp,$out,15 # 15 is not typo 301 addi $out,$out,16 302 bdnz Loop192 303 304 li $rounds,12 305 addi $out,$out,0x20 306 b Ldone 307 308.align 4 309L256: 310 lvx $tmp,0,$inp 311 li $cnt,7 312 li $rounds,14 313 vperm $outtail,$in0,$in0,$outperm # rotate 314 vsel $stage,$outhead,$outtail,$outmask 315 vmr $outhead,$outtail 316 stvx $stage,0,$out 317 addi $out,$out,16 318 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 319 mtctr $cnt 320 321Loop256: 322 vperm $key,$in1,$in1,$mask # rotate-n-splat 323 vsldoi $tmp,$zero,$in0,12 # >>32 324 vperm $outtail,$in1,$in1,$outperm # rotate 325 vsel $stage,$outhead,$outtail,$outmask 326 vmr $outhead,$outtail 327 vcipherlast $key,$key,$rcon 328 stvx $stage,0,$out 329 addi $out,$out,16 330 331 vxor $in0,$in0,$tmp 332 vsldoi $tmp,$zero,$tmp,12 # >>32 333 vxor $in0,$in0,$tmp 334 vsldoi $tmp,$zero,$tmp,12 # >>32 335 vxor $in0,$in0,$tmp 336 vadduwm $rcon,$rcon,$rcon 337 vxor $in0,$in0,$key 338 vperm $outtail,$in0,$in0,$outperm # rotate 339 vsel $stage,$outhead,$outtail,$outmask 340 vmr $outhead,$outtail 341 stvx $stage,0,$out 342 addi $inp,$out,15 # 15 is not typo 343 addi $out,$out,16 344 bdz Ldone 345 346 vspltw $key,$in0,3 # just splat 347 vsldoi $tmp,$zero,$in1,12 # >>32 348 vsbox $key,$key 349 350 vxor $in1,$in1,$tmp 351 vsldoi $tmp,$zero,$tmp,12 # >>32 352 vxor $in1,$in1,$tmp 353 vsldoi $tmp,$zero,$tmp,12 # >>32 354 vxor $in1,$in1,$tmp 355 356 vxor $in1,$in1,$key 357 b Loop256 358 359.align 4 360Ldone: 361 lvx $in1,0,$inp # redundant in aligned case 362 vsel $in1,$outhead,$in1,$outmask 363 stvx $in1,0,$inp 364 li $ptr,0 365 mtspr 256,$vrsave 366 stw $rounds,0($out) 367 368Lenc_key_abort: 369 mr r3,$ptr 370 blr 371 .long 0 372 .byte 0,12,0x14,1,0,0,3,0 373 .long 0 374.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 375 376.globl .${prefix}_set_decrypt_key 377.align 5 378.${prefix}_set_decrypt_key: 379 $STU $sp,-$FRAME($sp) 380 mflr r10 381 $PUSH r10,$FRAME+$LRSAVE($sp) 382 bl Lset_encrypt_key 383 mtlr r10 384 385 cmpwi r3,0 386 bne- Ldec_key_abort 387 388 slwi $cnt,$rounds,4 389 subi $inp,$out,240 # first round key 390 srwi $rounds,$rounds,1 391 add $out,$inp,$cnt # last round key 392 mtctr $rounds 393 394Ldeckey: 395 lwz r0, 0($inp) 396 lwz r6, 4($inp) 397 lwz r7, 8($inp) 398 lwz r8, 12($inp) 399 addi $inp,$inp,16 400 lwz r9, 0($out) 401 lwz r10,4($out) 402 lwz r11,8($out) 403 lwz r12,12($out) 404 stw r0, 0($out) 405 stw r6, 4($out) 406 stw r7, 8($out) 407 stw r8, 12($out) 408 subi $out,$out,16 409 stw r9, -16($inp) 410 stw r10,-12($inp) 411 stw r11,-8($inp) 412 stw r12,-4($inp) 413 bdnz Ldeckey 414 415 xor r3,r3,r3 # return value 416Ldec_key_abort: 417 addi $sp,$sp,$FRAME 418 blr 419 .long 0 420 .byte 0,12,4,1,0x80,0,3,0 421 .long 0 422.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 423___ 424}}} 425######################################################################### 426{{{ # Single block en- and decrypt procedures # 427sub gen_block () { 428my $dir = shift; 429my $n = $dir eq "de" ? "n" : ""; 430my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 431 432$code.=<<___; 433.globl .${prefix}_${dir}crypt 434.align 5 435.${prefix}_${dir}crypt: 436 lwz $rounds,240($key) 437 lis r0,0xfc00 438 mfspr $vrsave,256 439 li $idx,15 # 15 is not typo 440 mtspr 256,r0 441 442 lvx v0,0,$inp 443 neg r11,$out 444 lvx v1,$idx,$inp 445 lvsl v2,0,$inp # inpperm 446 le?vspltisb v4,0x0f 447 ?lvsl v3,0,r11 # outperm 448 le?vxor v2,v2,v4 449 li $idx,16 450 vperm v0,v0,v1,v2 # align [and byte swap in LE] 451 lvx v1,0,$key 452 ?lvsl v5,0,$key # keyperm 453 srwi $rounds,$rounds,1 454 lvx v2,$idx,$key 455 addi $idx,$idx,16 456 subi $rounds,$rounds,1 457 ?vperm v1,v1,v2,v5 # align round key 458 459 vxor v0,v0,v1 460 lvx v1,$idx,$key 461 addi $idx,$idx,16 462 mtctr $rounds 463 464Loop_${dir}c: 465 ?vperm v2,v2,v1,v5 466 v${n}cipher v0,v0,v2 467 lvx v2,$idx,$key 468 addi $idx,$idx,16 469 ?vperm v1,v1,v2,v5 470 v${n}cipher v0,v0,v1 471 lvx v1,$idx,$key 472 addi $idx,$idx,16 473 bdnz Loop_${dir}c 474 475 ?vperm v2,v2,v1,v5 476 v${n}cipher v0,v0,v2 477 lvx v2,$idx,$key 478 ?vperm v1,v1,v2,v5 479 v${n}cipherlast v0,v0,v1 480 481 vspltisb v2,-1 482 vxor v1,v1,v1 483 li $idx,15 # 15 is not typo 484 ?vperm v2,v1,v2,v3 # outmask 485 le?vxor v3,v3,v4 486 lvx v1,0,$out # outhead 487 vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 488 vsel v1,v1,v0,v2 489 lvx v4,$idx,$out 490 stvx v1,0,$out 491 vsel v0,v0,v4,v2 492 stvx v0,$idx,$out 493 494 mtspr 256,$vrsave 495 blr 496 .long 0 497 .byte 0,12,0x14,0,0,0,3,0 498 .long 0 499.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 500___ 501} 502&gen_block("en"); 503&gen_block("de"); 504}}} 505######################################################################### 506{{{ # CBC en- and decrypt procedures # 507my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 508my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 509my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 510 map("v$_",(4..10)); 511$code.=<<___; 512.globl .${prefix}_cbc_encrypt 513.align 5 514.${prefix}_cbc_encrypt: 515 ${UCMP}i $len,16 516 bltlr- 517 518 cmpwi $enc,0 # test direction 519 lis r0,0xffe0 520 mfspr $vrsave,256 521 mtspr 256,r0 522 523 li $idx,15 524 vxor $rndkey0,$rndkey0,$rndkey0 525 le?vspltisb $tmp,0x0f 526 527 lvx $ivec,0,$ivp # load [unaligned] iv 528 lvsl $inpperm,0,$ivp 529 lvx $inptail,$idx,$ivp 530 le?vxor $inpperm,$inpperm,$tmp 531 vperm $ivec,$ivec,$inptail,$inpperm 532 533 neg r11,$inp 534 ?lvsl $keyperm,0,$key # prepare for unaligned key 535 lwz $rounds,240($key) 536 537 lvsr $inpperm,0,r11 # prepare for unaligned load 538 lvx $inptail,0,$inp 539 addi $inp,$inp,15 # 15 is not typo 540 le?vxor $inpperm,$inpperm,$tmp 541 542 ?lvsr $outperm,0,$out # prepare for unaligned store 543 vspltisb $outmask,-1 544 lvx $outhead,0,$out 545 ?vperm $outmask,$rndkey0,$outmask,$outperm 546 le?vxor $outperm,$outperm,$tmp 547 548 srwi $rounds,$rounds,1 549 li $idx,16 550 subi $rounds,$rounds,1 551 beq Lcbc_dec 552 553Lcbc_enc: 554 vmr $inout,$inptail 555 lvx $inptail,0,$inp 556 addi $inp,$inp,16 557 mtctr $rounds 558 subi $len,$len,16 # len-=16 559 560 lvx $rndkey0,0,$key 561 vperm $inout,$inout,$inptail,$inpperm 562 lvx $rndkey1,$idx,$key 563 addi $idx,$idx,16 564 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 565 vxor $inout,$inout,$rndkey0 566 lvx $rndkey0,$idx,$key 567 addi $idx,$idx,16 568 vxor $inout,$inout,$ivec 569 570Loop_cbc_enc: 571 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 572 vcipher $inout,$inout,$rndkey1 573 lvx $rndkey1,$idx,$key 574 addi $idx,$idx,16 575 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 576 vcipher $inout,$inout,$rndkey0 577 lvx $rndkey0,$idx,$key 578 addi $idx,$idx,16 579 bdnz Loop_cbc_enc 580 581 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 582 vcipher $inout,$inout,$rndkey1 583 lvx $rndkey1,$idx,$key 584 li $idx,16 585 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 586 vcipherlast $ivec,$inout,$rndkey0 587 ${UCMP}i $len,16 588 589 vperm $tmp,$ivec,$ivec,$outperm 590 vsel $inout,$outhead,$tmp,$outmask 591 vmr $outhead,$tmp 592 stvx $inout,0,$out 593 addi $out,$out,16 594 bge Lcbc_enc 595 596 b Lcbc_done 597 598.align 4 599Lcbc_dec: 600 ${UCMP}i $len,128 601 bge _aesp8_cbc_decrypt8x 602 vmr $tmp,$inptail 603 lvx $inptail,0,$inp 604 addi $inp,$inp,16 605 mtctr $rounds 606 subi $len,$len,16 # len-=16 607 608 lvx $rndkey0,0,$key 609 vperm $tmp,$tmp,$inptail,$inpperm 610 lvx $rndkey1,$idx,$key 611 addi $idx,$idx,16 612 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 613 vxor $inout,$tmp,$rndkey0 614 lvx $rndkey0,$idx,$key 615 addi $idx,$idx,16 616 617Loop_cbc_dec: 618 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 619 vncipher $inout,$inout,$rndkey1 620 lvx $rndkey1,$idx,$key 621 addi $idx,$idx,16 622 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 623 vncipher $inout,$inout,$rndkey0 624 lvx $rndkey0,$idx,$key 625 addi $idx,$idx,16 626 bdnz Loop_cbc_dec 627 628 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 629 vncipher $inout,$inout,$rndkey1 630 lvx $rndkey1,$idx,$key 631 li $idx,16 632 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 633 vncipherlast $inout,$inout,$rndkey0 634 ${UCMP}i $len,16 635 636 vxor $inout,$inout,$ivec 637 vmr $ivec,$tmp 638 vperm $tmp,$inout,$inout,$outperm 639 vsel $inout,$outhead,$tmp,$outmask 640 vmr $outhead,$tmp 641 stvx $inout,0,$out 642 addi $out,$out,16 643 bge Lcbc_dec 644 645Lcbc_done: 646 addi $out,$out,-1 647 lvx $inout,0,$out # redundant in aligned case 648 vsel $inout,$outhead,$inout,$outmask 649 stvx $inout,0,$out 650 651 neg $enc,$ivp # write [unaligned] iv 652 li $idx,15 # 15 is not typo 653 vxor $rndkey0,$rndkey0,$rndkey0 654 vspltisb $outmask,-1 655 le?vspltisb $tmp,0x0f 656 ?lvsl $outperm,0,$enc 657 ?vperm $outmask,$rndkey0,$outmask,$outperm 658 le?vxor $outperm,$outperm,$tmp 659 lvx $outhead,0,$ivp 660 vperm $ivec,$ivec,$ivec,$outperm 661 vsel $inout,$outhead,$ivec,$outmask 662 lvx $inptail,$idx,$ivp 663 stvx $inout,0,$ivp 664 vsel $inout,$ivec,$inptail,$outmask 665 stvx $inout,$idx,$ivp 666 667 mtspr 256,$vrsave 668 blr 669 .long 0 670 .byte 0,12,0x14,0,0,0,6,0 671 .long 0 672___ 673######################################################################### 674{{ # Optimized CBC decrypt procedure # 675my $key_="r11"; 676my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 677 $x00=0 if ($flavour =~ /osx/); 678my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 679my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 680my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 681 # v26-v31 last 6 round keys 682my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 683 684$code.=<<___; 685.align 5 686_aesp8_cbc_decrypt8x: 687 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 688 li r10,`$FRAME+8*16+15` 689 li r11,`$FRAME+8*16+31` 690 stvx v20,r10,$sp # ABI says so 691 addi r10,r10,32 692 stvx v21,r11,$sp 693 addi r11,r11,32 694 stvx v22,r10,$sp 695 addi r10,r10,32 696 stvx v23,r11,$sp 697 addi r11,r11,32 698 stvx v24,r10,$sp 699 addi r10,r10,32 700 stvx v25,r11,$sp 701 addi r11,r11,32 702 stvx v26,r10,$sp 703 addi r10,r10,32 704 stvx v27,r11,$sp 705 addi r11,r11,32 706 stvx v28,r10,$sp 707 addi r10,r10,32 708 stvx v29,r11,$sp 709 addi r11,r11,32 710 stvx v30,r10,$sp 711 stvx v31,r11,$sp 712 li r0,-1 713 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 714 li $x10,0x10 715 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 716 li $x20,0x20 717 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 718 li $x30,0x30 719 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 720 li $x40,0x40 721 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 722 li $x50,0x50 723 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 724 li $x60,0x60 725 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 726 li $x70,0x70 727 mtspr 256,r0 728 729 subi $rounds,$rounds,3 # -4 in total 730 subi $len,$len,128 # bias 731 732 lvx $rndkey0,$x00,$key # load key schedule 733 lvx v30,$x10,$key 734 addi $key,$key,0x20 735 lvx v31,$x00,$key 736 ?vperm $rndkey0,$rndkey0,v30,$keyperm 737 addi $key_,$sp,$FRAME+15 738 mtctr $rounds 739 740Load_cbc_dec_key: 741 ?vperm v24,v30,v31,$keyperm 742 lvx v30,$x10,$key 743 addi $key,$key,0x20 744 stvx v24,$x00,$key_ # off-load round[1] 745 ?vperm v25,v31,v30,$keyperm 746 lvx v31,$x00,$key 747 stvx v25,$x10,$key_ # off-load round[2] 748 addi $key_,$key_,0x20 749 bdnz Load_cbc_dec_key 750 751 lvx v26,$x10,$key 752 ?vperm v24,v30,v31,$keyperm 753 lvx v27,$x20,$key 754 stvx v24,$x00,$key_ # off-load round[3] 755 ?vperm v25,v31,v26,$keyperm 756 lvx v28,$x30,$key 757 stvx v25,$x10,$key_ # off-load round[4] 758 addi $key_,$sp,$FRAME+15 # rewind $key_ 759 ?vperm v26,v26,v27,$keyperm 760 lvx v29,$x40,$key 761 ?vperm v27,v27,v28,$keyperm 762 lvx v30,$x50,$key 763 ?vperm v28,v28,v29,$keyperm 764 lvx v31,$x60,$key 765 ?vperm v29,v29,v30,$keyperm 766 lvx $out0,$x70,$key # borrow $out0 767 ?vperm v30,v30,v31,$keyperm 768 lvx v24,$x00,$key_ # pre-load round[1] 769 ?vperm v31,v31,$out0,$keyperm 770 lvx v25,$x10,$key_ # pre-load round[2] 771 772 #lvx $inptail,0,$inp # "caller" already did this 773 #addi $inp,$inp,15 # 15 is not typo 774 subi $inp,$inp,15 # undo "caller" 775 776 le?li $idx,8 777 lvx_u $in0,$x00,$inp # load first 8 "words" 778 le?lvsl $inpperm,0,$idx 779 le?vspltisb $tmp,0x0f 780 lvx_u $in1,$x10,$inp 781 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 782 lvx_u $in2,$x20,$inp 783 le?vperm $in0,$in0,$in0,$inpperm 784 lvx_u $in3,$x30,$inp 785 le?vperm $in1,$in1,$in1,$inpperm 786 lvx_u $in4,$x40,$inp 787 le?vperm $in2,$in2,$in2,$inpperm 788 vxor $out0,$in0,$rndkey0 789 lvx_u $in5,$x50,$inp 790 le?vperm $in3,$in3,$in3,$inpperm 791 vxor $out1,$in1,$rndkey0 792 lvx_u $in6,$x60,$inp 793 le?vperm $in4,$in4,$in4,$inpperm 794 vxor $out2,$in2,$rndkey0 795 lvx_u $in7,$x70,$inp 796 addi $inp,$inp,0x80 797 le?vperm $in5,$in5,$in5,$inpperm 798 vxor $out3,$in3,$rndkey0 799 le?vperm $in6,$in6,$in6,$inpperm 800 vxor $out4,$in4,$rndkey0 801 le?vperm $in7,$in7,$in7,$inpperm 802 vxor $out5,$in5,$rndkey0 803 vxor $out6,$in6,$rndkey0 804 vxor $out7,$in7,$rndkey0 805 806 mtctr $rounds 807 b Loop_cbc_dec8x 808.align 5 809Loop_cbc_dec8x: 810 vncipher $out0,$out0,v24 811 vncipher $out1,$out1,v24 812 vncipher $out2,$out2,v24 813 vncipher $out3,$out3,v24 814 vncipher $out4,$out4,v24 815 vncipher $out5,$out5,v24 816 vncipher $out6,$out6,v24 817 vncipher $out7,$out7,v24 818 lvx v24,$x20,$key_ # round[3] 819 addi $key_,$key_,0x20 820 821 vncipher $out0,$out0,v25 822 vncipher $out1,$out1,v25 823 vncipher $out2,$out2,v25 824 vncipher $out3,$out3,v25 825 vncipher $out4,$out4,v25 826 vncipher $out5,$out5,v25 827 vncipher $out6,$out6,v25 828 vncipher $out7,$out7,v25 829 lvx v25,$x10,$key_ # round[4] 830 bdnz Loop_cbc_dec8x 831 832 subic $len,$len,128 # $len-=128 833 vncipher $out0,$out0,v24 834 vncipher $out1,$out1,v24 835 vncipher $out2,$out2,v24 836 vncipher $out3,$out3,v24 837 vncipher $out4,$out4,v24 838 vncipher $out5,$out5,v24 839 vncipher $out6,$out6,v24 840 vncipher $out7,$out7,v24 841 842 subfe. r0,r0,r0 # borrow?-1:0 843 vncipher $out0,$out0,v25 844 vncipher $out1,$out1,v25 845 vncipher $out2,$out2,v25 846 vncipher $out3,$out3,v25 847 vncipher $out4,$out4,v25 848 vncipher $out5,$out5,v25 849 vncipher $out6,$out6,v25 850 vncipher $out7,$out7,v25 851 852 and r0,r0,$len 853 vncipher $out0,$out0,v26 854 vncipher $out1,$out1,v26 855 vncipher $out2,$out2,v26 856 vncipher $out3,$out3,v26 857 vncipher $out4,$out4,v26 858 vncipher $out5,$out5,v26 859 vncipher $out6,$out6,v26 860 vncipher $out7,$out7,v26 861 862 add $inp,$inp,r0 # $inp is adjusted in such 863 # way that at exit from the 864 # loop inX-in7 are loaded 865 # with last "words" 866 vncipher $out0,$out0,v27 867 vncipher $out1,$out1,v27 868 vncipher $out2,$out2,v27 869 vncipher $out3,$out3,v27 870 vncipher $out4,$out4,v27 871 vncipher $out5,$out5,v27 872 vncipher $out6,$out6,v27 873 vncipher $out7,$out7,v27 874 875 addi $key_,$sp,$FRAME+15 # rewind $key_ 876 vncipher $out0,$out0,v28 877 vncipher $out1,$out1,v28 878 vncipher $out2,$out2,v28 879 vncipher $out3,$out3,v28 880 vncipher $out4,$out4,v28 881 vncipher $out5,$out5,v28 882 vncipher $out6,$out6,v28 883 vncipher $out7,$out7,v28 884 lvx v24,$x00,$key_ # re-pre-load round[1] 885 886 vncipher $out0,$out0,v29 887 vncipher $out1,$out1,v29 888 vncipher $out2,$out2,v29 889 vncipher $out3,$out3,v29 890 vncipher $out4,$out4,v29 891 vncipher $out5,$out5,v29 892 vncipher $out6,$out6,v29 893 vncipher $out7,$out7,v29 894 lvx v25,$x10,$key_ # re-pre-load round[2] 895 896 vncipher $out0,$out0,v30 897 vxor $ivec,$ivec,v31 # xor with last round key 898 vncipher $out1,$out1,v30 899 vxor $in0,$in0,v31 900 vncipher $out2,$out2,v30 901 vxor $in1,$in1,v31 902 vncipher $out3,$out3,v30 903 vxor $in2,$in2,v31 904 vncipher $out4,$out4,v30 905 vxor $in3,$in3,v31 906 vncipher $out5,$out5,v30 907 vxor $in4,$in4,v31 908 vncipher $out6,$out6,v30 909 vxor $in5,$in5,v31 910 vncipher $out7,$out7,v30 911 vxor $in6,$in6,v31 912 913 vncipherlast $out0,$out0,$ivec 914 vncipherlast $out1,$out1,$in0 915 lvx_u $in0,$x00,$inp # load next input block 916 vncipherlast $out2,$out2,$in1 917 lvx_u $in1,$x10,$inp 918 vncipherlast $out3,$out3,$in2 919 le?vperm $in0,$in0,$in0,$inpperm 920 lvx_u $in2,$x20,$inp 921 vncipherlast $out4,$out4,$in3 922 le?vperm $in1,$in1,$in1,$inpperm 923 lvx_u $in3,$x30,$inp 924 vncipherlast $out5,$out5,$in4 925 le?vperm $in2,$in2,$in2,$inpperm 926 lvx_u $in4,$x40,$inp 927 vncipherlast $out6,$out6,$in5 928 le?vperm $in3,$in3,$in3,$inpperm 929 lvx_u $in5,$x50,$inp 930 vncipherlast $out7,$out7,$in6 931 le?vperm $in4,$in4,$in4,$inpperm 932 lvx_u $in6,$x60,$inp 933 vmr $ivec,$in7 934 le?vperm $in5,$in5,$in5,$inpperm 935 lvx_u $in7,$x70,$inp 936 addi $inp,$inp,0x80 937 938 le?vperm $out0,$out0,$out0,$inpperm 939 le?vperm $out1,$out1,$out1,$inpperm 940 stvx_u $out0,$x00,$out 941 le?vperm $in6,$in6,$in6,$inpperm 942 vxor $out0,$in0,$rndkey0 943 le?vperm $out2,$out2,$out2,$inpperm 944 stvx_u $out1,$x10,$out 945 le?vperm $in7,$in7,$in7,$inpperm 946 vxor $out1,$in1,$rndkey0 947 le?vperm $out3,$out3,$out3,$inpperm 948 stvx_u $out2,$x20,$out 949 vxor $out2,$in2,$rndkey0 950 le?vperm $out4,$out4,$out4,$inpperm 951 stvx_u $out3,$x30,$out 952 vxor $out3,$in3,$rndkey0 953 le?vperm $out5,$out5,$out5,$inpperm 954 stvx_u $out4,$x40,$out 955 vxor $out4,$in4,$rndkey0 956 le?vperm $out6,$out6,$out6,$inpperm 957 stvx_u $out5,$x50,$out 958 vxor $out5,$in5,$rndkey0 959 le?vperm $out7,$out7,$out7,$inpperm 960 stvx_u $out6,$x60,$out 961 vxor $out6,$in6,$rndkey0 962 stvx_u $out7,$x70,$out 963 addi $out,$out,0x80 964 vxor $out7,$in7,$rndkey0 965 966 mtctr $rounds 967 beq Loop_cbc_dec8x # did $len-=128 borrow? 968 969 addic. $len,$len,128 970 beq Lcbc_dec8x_done 971 nop 972 nop 973 974Loop_cbc_dec8x_tail: # up to 7 "words" tail... 975 vncipher $out1,$out1,v24 976 vncipher $out2,$out2,v24 977 vncipher $out3,$out3,v24 978 vncipher $out4,$out4,v24 979 vncipher $out5,$out5,v24 980 vncipher $out6,$out6,v24 981 vncipher $out7,$out7,v24 982 lvx v24,$x20,$key_ # round[3] 983 addi $key_,$key_,0x20 984 985 vncipher $out1,$out1,v25 986 vncipher $out2,$out2,v25 987 vncipher $out3,$out3,v25 988 vncipher $out4,$out4,v25 989 vncipher $out5,$out5,v25 990 vncipher $out6,$out6,v25 991 vncipher $out7,$out7,v25 992 lvx v25,$x10,$key_ # round[4] 993 bdnz Loop_cbc_dec8x_tail 994 995 vncipher $out1,$out1,v24 996 vncipher $out2,$out2,v24 997 vncipher $out3,$out3,v24 998 vncipher $out4,$out4,v24 999 vncipher $out5,$out5,v24 1000 vncipher $out6,$out6,v24 1001 vncipher $out7,$out7,v24 1002 1003 vncipher $out1,$out1,v25 1004 vncipher $out2,$out2,v25 1005 vncipher $out3,$out3,v25 1006 vncipher $out4,$out4,v25 1007 vncipher $out5,$out5,v25 1008 vncipher $out6,$out6,v25 1009 vncipher $out7,$out7,v25 1010 1011 vncipher $out1,$out1,v26 1012 vncipher $out2,$out2,v26 1013 vncipher $out3,$out3,v26 1014 vncipher $out4,$out4,v26 1015 vncipher $out5,$out5,v26 1016 vncipher $out6,$out6,v26 1017 vncipher $out7,$out7,v26 1018 1019 vncipher $out1,$out1,v27 1020 vncipher $out2,$out2,v27 1021 vncipher $out3,$out3,v27 1022 vncipher $out4,$out4,v27 1023 vncipher $out5,$out5,v27 1024 vncipher $out6,$out6,v27 1025 vncipher $out7,$out7,v27 1026 1027 vncipher $out1,$out1,v28 1028 vncipher $out2,$out2,v28 1029 vncipher $out3,$out3,v28 1030 vncipher $out4,$out4,v28 1031 vncipher $out5,$out5,v28 1032 vncipher $out6,$out6,v28 1033 vncipher $out7,$out7,v28 1034 1035 vncipher $out1,$out1,v29 1036 vncipher $out2,$out2,v29 1037 vncipher $out3,$out3,v29 1038 vncipher $out4,$out4,v29 1039 vncipher $out5,$out5,v29 1040 vncipher $out6,$out6,v29 1041 vncipher $out7,$out7,v29 1042 1043 vncipher $out1,$out1,v30 1044 vxor $ivec,$ivec,v31 # last round key 1045 vncipher $out2,$out2,v30 1046 vxor $in1,$in1,v31 1047 vncipher $out3,$out3,v30 1048 vxor $in2,$in2,v31 1049 vncipher $out4,$out4,v30 1050 vxor $in3,$in3,v31 1051 vncipher $out5,$out5,v30 1052 vxor $in4,$in4,v31 1053 vncipher $out6,$out6,v30 1054 vxor $in5,$in5,v31 1055 vncipher $out7,$out7,v30 1056 vxor $in6,$in6,v31 1057 1058 cmplwi $len,32 # switch($len) 1059 blt Lcbc_dec8x_one 1060 nop 1061 beq Lcbc_dec8x_two 1062 cmplwi $len,64 1063 blt Lcbc_dec8x_three 1064 nop 1065 beq Lcbc_dec8x_four 1066 cmplwi $len,96 1067 blt Lcbc_dec8x_five 1068 nop 1069 beq Lcbc_dec8x_six 1070 1071Lcbc_dec8x_seven: 1072 vncipherlast $out1,$out1,$ivec 1073 vncipherlast $out2,$out2,$in1 1074 vncipherlast $out3,$out3,$in2 1075 vncipherlast $out4,$out4,$in3 1076 vncipherlast $out5,$out5,$in4 1077 vncipherlast $out6,$out6,$in5 1078 vncipherlast $out7,$out7,$in6 1079 vmr $ivec,$in7 1080 1081 le?vperm $out1,$out1,$out1,$inpperm 1082 le?vperm $out2,$out2,$out2,$inpperm 1083 stvx_u $out1,$x00,$out 1084 le?vperm $out3,$out3,$out3,$inpperm 1085 stvx_u $out2,$x10,$out 1086 le?vperm $out4,$out4,$out4,$inpperm 1087 stvx_u $out3,$x20,$out 1088 le?vperm $out5,$out5,$out5,$inpperm 1089 stvx_u $out4,$x30,$out 1090 le?vperm $out6,$out6,$out6,$inpperm 1091 stvx_u $out5,$x40,$out 1092 le?vperm $out7,$out7,$out7,$inpperm 1093 stvx_u $out6,$x50,$out 1094 stvx_u $out7,$x60,$out 1095 addi $out,$out,0x70 1096 b Lcbc_dec8x_done 1097 1098.align 5 1099Lcbc_dec8x_six: 1100 vncipherlast $out2,$out2,$ivec 1101 vncipherlast $out3,$out3,$in2 1102 vncipherlast $out4,$out4,$in3 1103 vncipherlast $out5,$out5,$in4 1104 vncipherlast $out6,$out6,$in5 1105 vncipherlast $out7,$out7,$in6 1106 vmr $ivec,$in7 1107 1108 le?vperm $out2,$out2,$out2,$inpperm 1109 le?vperm $out3,$out3,$out3,$inpperm 1110 stvx_u $out2,$x00,$out 1111 le?vperm $out4,$out4,$out4,$inpperm 1112 stvx_u $out3,$x10,$out 1113 le?vperm $out5,$out5,$out5,$inpperm 1114 stvx_u $out4,$x20,$out 1115 le?vperm $out6,$out6,$out6,$inpperm 1116 stvx_u $out5,$x30,$out 1117 le?vperm $out7,$out7,$out7,$inpperm 1118 stvx_u $out6,$x40,$out 1119 stvx_u $out7,$x50,$out 1120 addi $out,$out,0x60 1121 b Lcbc_dec8x_done 1122 1123.align 5 1124Lcbc_dec8x_five: 1125 vncipherlast $out3,$out3,$ivec 1126 vncipherlast $out4,$out4,$in3 1127 vncipherlast $out5,$out5,$in4 1128 vncipherlast $out6,$out6,$in5 1129 vncipherlast $out7,$out7,$in6 1130 vmr $ivec,$in7 1131 1132 le?vperm $out3,$out3,$out3,$inpperm 1133 le?vperm $out4,$out4,$out4,$inpperm 1134 stvx_u $out3,$x00,$out 1135 le?vperm $out5,$out5,$out5,$inpperm 1136 stvx_u $out4,$x10,$out 1137 le?vperm $out6,$out6,$out6,$inpperm 1138 stvx_u $out5,$x20,$out 1139 le?vperm $out7,$out7,$out7,$inpperm 1140 stvx_u $out6,$x30,$out 1141 stvx_u $out7,$x40,$out 1142 addi $out,$out,0x50 1143 b Lcbc_dec8x_done 1144 1145.align 5 1146Lcbc_dec8x_four: 1147 vncipherlast $out4,$out4,$ivec 1148 vncipherlast $out5,$out5,$in4 1149 vncipherlast $out6,$out6,$in5 1150 vncipherlast $out7,$out7,$in6 1151 vmr $ivec,$in7 1152 1153 le?vperm $out4,$out4,$out4,$inpperm 1154 le?vperm $out5,$out5,$out5,$inpperm 1155 stvx_u $out4,$x00,$out 1156 le?vperm $out6,$out6,$out6,$inpperm 1157 stvx_u $out5,$x10,$out 1158 le?vperm $out7,$out7,$out7,$inpperm 1159 stvx_u $out6,$x20,$out 1160 stvx_u $out7,$x30,$out 1161 addi $out,$out,0x40 1162 b Lcbc_dec8x_done 1163 1164.align 5 1165Lcbc_dec8x_three: 1166 vncipherlast $out5,$out5,$ivec 1167 vncipherlast $out6,$out6,$in5 1168 vncipherlast $out7,$out7,$in6 1169 vmr $ivec,$in7 1170 1171 le?vperm $out5,$out5,$out5,$inpperm 1172 le?vperm $out6,$out6,$out6,$inpperm 1173 stvx_u $out5,$x00,$out 1174 le?vperm $out7,$out7,$out7,$inpperm 1175 stvx_u $out6,$x10,$out 1176 stvx_u $out7,$x20,$out 1177 addi $out,$out,0x30 1178 b Lcbc_dec8x_done 1179 1180.align 5 1181Lcbc_dec8x_two: 1182 vncipherlast $out6,$out6,$ivec 1183 vncipherlast $out7,$out7,$in6 1184 vmr $ivec,$in7 1185 1186 le?vperm $out6,$out6,$out6,$inpperm 1187 le?vperm $out7,$out7,$out7,$inpperm 1188 stvx_u $out6,$x00,$out 1189 stvx_u $out7,$x10,$out 1190 addi $out,$out,0x20 1191 b Lcbc_dec8x_done 1192 1193.align 5 1194Lcbc_dec8x_one: 1195 vncipherlast $out7,$out7,$ivec 1196 vmr $ivec,$in7 1197 1198 le?vperm $out7,$out7,$out7,$inpperm 1199 stvx_u $out7,0,$out 1200 addi $out,$out,0x10 1201 1202Lcbc_dec8x_done: 1203 le?vperm $ivec,$ivec,$ivec,$inpperm 1204 stvx_u $ivec,0,$ivp # write [unaligned] iv 1205 1206 li r10,`$FRAME+15` 1207 li r11,`$FRAME+31` 1208 stvx $inpperm,r10,$sp # wipe copies of round keys 1209 addi r10,r10,32 1210 stvx $inpperm,r11,$sp 1211 addi r11,r11,32 1212 stvx $inpperm,r10,$sp 1213 addi r10,r10,32 1214 stvx $inpperm,r11,$sp 1215 addi r11,r11,32 1216 stvx $inpperm,r10,$sp 1217 addi r10,r10,32 1218 stvx $inpperm,r11,$sp 1219 addi r11,r11,32 1220 stvx $inpperm,r10,$sp 1221 addi r10,r10,32 1222 stvx $inpperm,r11,$sp 1223 addi r11,r11,32 1224 1225 mtspr 256,$vrsave 1226 lvx v20,r10,$sp # ABI says so 1227 addi r10,r10,32 1228 lvx v21,r11,$sp 1229 addi r11,r11,32 1230 lvx v22,r10,$sp 1231 addi r10,r10,32 1232 lvx v23,r11,$sp 1233 addi r11,r11,32 1234 lvx v24,r10,$sp 1235 addi r10,r10,32 1236 lvx v25,r11,$sp 1237 addi r11,r11,32 1238 lvx v26,r10,$sp 1239 addi r10,r10,32 1240 lvx v27,r11,$sp 1241 addi r11,r11,32 1242 lvx v28,r10,$sp 1243 addi r10,r10,32 1244 lvx v29,r11,$sp 1245 addi r11,r11,32 1246 lvx v30,r10,$sp 1247 lvx v31,r11,$sp 1248 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1249 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1250 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1251 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1252 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1253 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1254 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1255 blr 1256 .long 0 1257 .byte 0,12,0x04,0,0x80,6,6,0 1258 .long 0 1259.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1260___ 1261}} }}} 1262 1263######################################################################### 1264{{{ # CTR procedure[s] # 1265my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1266my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1267my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1268 map("v$_",(4..11)); 1269my $dat=$tmp; 1270 1271$code.=<<___; 1272.globl .${prefix}_ctr32_encrypt_blocks 1273.align 5 1274.${prefix}_ctr32_encrypt_blocks: 1275 ${UCMP}i $len,1 1276 bltlr- 1277 1278 lis r0,0xfff0 1279 mfspr $vrsave,256 1280 mtspr 256,r0 1281 1282 li $idx,15 1283 vxor $rndkey0,$rndkey0,$rndkey0 1284 le?vspltisb $tmp,0x0f 1285 1286 lvx $ivec,0,$ivp # load [unaligned] iv 1287 lvsl $inpperm,0,$ivp 1288 lvx $inptail,$idx,$ivp 1289 vspltisb $one,1 1290 le?vxor $inpperm,$inpperm,$tmp 1291 vperm $ivec,$ivec,$inptail,$inpperm 1292 vsldoi $one,$rndkey0,$one,1 1293 1294 neg r11,$inp 1295 ?lvsl $keyperm,0,$key # prepare for unaligned key 1296 lwz $rounds,240($key) 1297 1298 lvsr $inpperm,0,r11 # prepare for unaligned load 1299 lvx $inptail,0,$inp 1300 addi $inp,$inp,15 # 15 is not typo 1301 le?vxor $inpperm,$inpperm,$tmp 1302 1303 srwi $rounds,$rounds,1 1304 li $idx,16 1305 subi $rounds,$rounds,1 1306 1307 ${UCMP}i $len,8 1308 bge _aesp8_ctr32_encrypt8x 1309 1310 ?lvsr $outperm,0,$out # prepare for unaligned store 1311 vspltisb $outmask,-1 1312 lvx $outhead,0,$out 1313 ?vperm $outmask,$rndkey0,$outmask,$outperm 1314 le?vxor $outperm,$outperm,$tmp 1315 1316 lvx $rndkey0,0,$key 1317 mtctr $rounds 1318 lvx $rndkey1,$idx,$key 1319 addi $idx,$idx,16 1320 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1321 vxor $inout,$ivec,$rndkey0 1322 lvx $rndkey0,$idx,$key 1323 addi $idx,$idx,16 1324 b Loop_ctr32_enc 1325 1326.align 5 1327Loop_ctr32_enc: 1328 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1329 vcipher $inout,$inout,$rndkey1 1330 lvx $rndkey1,$idx,$key 1331 addi $idx,$idx,16 1332 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1333 vcipher $inout,$inout,$rndkey0 1334 lvx $rndkey0,$idx,$key 1335 addi $idx,$idx,16 1336 bdnz Loop_ctr32_enc 1337 1338 vadduwm $ivec,$ivec,$one 1339 vmr $dat,$inptail 1340 lvx $inptail,0,$inp 1341 addi $inp,$inp,16 1342 subic. $len,$len,1 # blocks-- 1343 1344 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1345 vcipher $inout,$inout,$rndkey1 1346 lvx $rndkey1,$idx,$key 1347 vperm $dat,$dat,$inptail,$inpperm 1348 li $idx,16 1349 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1350 lvx $rndkey0,0,$key 1351 vxor $dat,$dat,$rndkey1 # last round key 1352 vcipherlast $inout,$inout,$dat 1353 1354 lvx $rndkey1,$idx,$key 1355 addi $idx,$idx,16 1356 vperm $inout,$inout,$inout,$outperm 1357 vsel $dat,$outhead,$inout,$outmask 1358 mtctr $rounds 1359 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1360 vmr $outhead,$inout 1361 vxor $inout,$ivec,$rndkey0 1362 lvx $rndkey0,$idx,$key 1363 addi $idx,$idx,16 1364 stvx $dat,0,$out 1365 addi $out,$out,16 1366 bne Loop_ctr32_enc 1367 1368 addi $out,$out,-1 1369 lvx $inout,0,$out # redundant in aligned case 1370 vsel $inout,$outhead,$inout,$outmask 1371 stvx $inout,0,$out 1372 1373 mtspr 256,$vrsave 1374 blr 1375 .long 0 1376 .byte 0,12,0x14,0,0,0,6,0 1377 .long 0 1378___ 1379######################################################################### 1380{{ # Optimized CTR procedure # 1381my $key_="r11"; 1382my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1383 $x00=0 if ($flavour =~ /osx/); 1384my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1385my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1386my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1387 # v26-v31 last 6 round keys 1388my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1389my ($two,$three,$four)=($outhead,$outperm,$outmask); 1390 1391$code.=<<___; 1392.align 5 1393_aesp8_ctr32_encrypt8x: 1394 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1395 li r10,`$FRAME+8*16+15` 1396 li r11,`$FRAME+8*16+31` 1397 stvx v20,r10,$sp # ABI says so 1398 addi r10,r10,32 1399 stvx v21,r11,$sp 1400 addi r11,r11,32 1401 stvx v22,r10,$sp 1402 addi r10,r10,32 1403 stvx v23,r11,$sp 1404 addi r11,r11,32 1405 stvx v24,r10,$sp 1406 addi r10,r10,32 1407 stvx v25,r11,$sp 1408 addi r11,r11,32 1409 stvx v26,r10,$sp 1410 addi r10,r10,32 1411 stvx v27,r11,$sp 1412 addi r11,r11,32 1413 stvx v28,r10,$sp 1414 addi r10,r10,32 1415 stvx v29,r11,$sp 1416 addi r11,r11,32 1417 stvx v30,r10,$sp 1418 stvx v31,r11,$sp 1419 li r0,-1 1420 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1421 li $x10,0x10 1422 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1423 li $x20,0x20 1424 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1425 li $x30,0x30 1426 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1427 li $x40,0x40 1428 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1429 li $x50,0x50 1430 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1431 li $x60,0x60 1432 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1433 li $x70,0x70 1434 mtspr 256,r0 1435 1436 subi $rounds,$rounds,3 # -4 in total 1437 1438 lvx $rndkey0,$x00,$key # load key schedule 1439 lvx v30,$x10,$key 1440 addi $key,$key,0x20 1441 lvx v31,$x00,$key 1442 ?vperm $rndkey0,$rndkey0,v30,$keyperm 1443 addi $key_,$sp,$FRAME+15 1444 mtctr $rounds 1445 1446Load_ctr32_enc_key: 1447 ?vperm v24,v30,v31,$keyperm 1448 lvx v30,$x10,$key 1449 addi $key,$key,0x20 1450 stvx v24,$x00,$key_ # off-load round[1] 1451 ?vperm v25,v31,v30,$keyperm 1452 lvx v31,$x00,$key 1453 stvx v25,$x10,$key_ # off-load round[2] 1454 addi $key_,$key_,0x20 1455 bdnz Load_ctr32_enc_key 1456 1457 lvx v26,$x10,$key 1458 ?vperm v24,v30,v31,$keyperm 1459 lvx v27,$x20,$key 1460 stvx v24,$x00,$key_ # off-load round[3] 1461 ?vperm v25,v31,v26,$keyperm 1462 lvx v28,$x30,$key 1463 stvx v25,$x10,$key_ # off-load round[4] 1464 addi $key_,$sp,$FRAME+15 # rewind $key_ 1465 ?vperm v26,v26,v27,$keyperm 1466 lvx v29,$x40,$key 1467 ?vperm v27,v27,v28,$keyperm 1468 lvx v30,$x50,$key 1469 ?vperm v28,v28,v29,$keyperm 1470 lvx v31,$x60,$key 1471 ?vperm v29,v29,v30,$keyperm 1472 lvx $out0,$x70,$key # borrow $out0 1473 ?vperm v30,v30,v31,$keyperm 1474 lvx v24,$x00,$key_ # pre-load round[1] 1475 ?vperm v31,v31,$out0,$keyperm 1476 lvx v25,$x10,$key_ # pre-load round[2] 1477 1478 vadduwm $two,$one,$one 1479 subi $inp,$inp,15 # undo "caller" 1480 $SHL $len,$len,4 1481 1482 vadduwm $out1,$ivec,$one # counter values ... 1483 vadduwm $out2,$ivec,$two 1484 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1485 le?li $idx,8 1486 vadduwm $out3,$out1,$two 1487 vxor $out1,$out1,$rndkey0 1488 le?lvsl $inpperm,0,$idx 1489 vadduwm $out4,$out2,$two 1490 vxor $out2,$out2,$rndkey0 1491 le?vspltisb $tmp,0x0f 1492 vadduwm $out5,$out3,$two 1493 vxor $out3,$out3,$rndkey0 1494 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1495 vadduwm $out6,$out4,$two 1496 vxor $out4,$out4,$rndkey0 1497 vadduwm $out7,$out5,$two 1498 vxor $out5,$out5,$rndkey0 1499 vadduwm $ivec,$out6,$two # next counter value 1500 vxor $out6,$out6,$rndkey0 1501 vxor $out7,$out7,$rndkey0 1502 1503 mtctr $rounds 1504 b Loop_ctr32_enc8x 1505.align 5 1506Loop_ctr32_enc8x: 1507 vcipher $out0,$out0,v24 1508 vcipher $out1,$out1,v24 1509 vcipher $out2,$out2,v24 1510 vcipher $out3,$out3,v24 1511 vcipher $out4,$out4,v24 1512 vcipher $out5,$out5,v24 1513 vcipher $out6,$out6,v24 1514 vcipher $out7,$out7,v24 1515Loop_ctr32_enc8x_middle: 1516 lvx v24,$x20,$key_ # round[3] 1517 addi $key_,$key_,0x20 1518 1519 vcipher $out0,$out0,v25 1520 vcipher $out1,$out1,v25 1521 vcipher $out2,$out2,v25 1522 vcipher $out3,$out3,v25 1523 vcipher $out4,$out4,v25 1524 vcipher $out5,$out5,v25 1525 vcipher $out6,$out6,v25 1526 vcipher $out7,$out7,v25 1527 lvx v25,$x10,$key_ # round[4] 1528 bdnz Loop_ctr32_enc8x 1529 1530 subic r11,$len,256 # $len-256, borrow $key_ 1531 vcipher $out0,$out0,v24 1532 vcipher $out1,$out1,v24 1533 vcipher $out2,$out2,v24 1534 vcipher $out3,$out3,v24 1535 vcipher $out4,$out4,v24 1536 vcipher $out5,$out5,v24 1537 vcipher $out6,$out6,v24 1538 vcipher $out7,$out7,v24 1539 1540 subfe r0,r0,r0 # borrow?-1:0 1541 vcipher $out0,$out0,v25 1542 vcipher $out1,$out1,v25 1543 vcipher $out2,$out2,v25 1544 vcipher $out3,$out3,v25 1545 vcipher $out4,$out4,v25 1546 vcipher $out5,$out5,v25 1547 vcipher $out6,$out6,v25 1548 vcipher $out7,$out7,v25 1549 1550 and r0,r0,r11 1551 addi $key_,$sp,$FRAME+15 # rewind $key_ 1552 vcipher $out0,$out0,v26 1553 vcipher $out1,$out1,v26 1554 vcipher $out2,$out2,v26 1555 vcipher $out3,$out3,v26 1556 vcipher $out4,$out4,v26 1557 vcipher $out5,$out5,v26 1558 vcipher $out6,$out6,v26 1559 vcipher $out7,$out7,v26 1560 lvx v24,$x00,$key_ # re-pre-load round[1] 1561 1562 subic $len,$len,129 # $len-=129 1563 vcipher $out0,$out0,v27 1564 addi $len,$len,1 # $len-=128 really 1565 vcipher $out1,$out1,v27 1566 vcipher $out2,$out2,v27 1567 vcipher $out3,$out3,v27 1568 vcipher $out4,$out4,v27 1569 vcipher $out5,$out5,v27 1570 vcipher $out6,$out6,v27 1571 vcipher $out7,$out7,v27 1572 lvx v25,$x10,$key_ # re-pre-load round[2] 1573 1574 vcipher $out0,$out0,v28 1575 lvx_u $in0,$x00,$inp # load input 1576 vcipher $out1,$out1,v28 1577 lvx_u $in1,$x10,$inp 1578 vcipher $out2,$out2,v28 1579 lvx_u $in2,$x20,$inp 1580 vcipher $out3,$out3,v28 1581 lvx_u $in3,$x30,$inp 1582 vcipher $out4,$out4,v28 1583 lvx_u $in4,$x40,$inp 1584 vcipher $out5,$out5,v28 1585 lvx_u $in5,$x50,$inp 1586 vcipher $out6,$out6,v28 1587 lvx_u $in6,$x60,$inp 1588 vcipher $out7,$out7,v28 1589 lvx_u $in7,$x70,$inp 1590 addi $inp,$inp,0x80 1591 1592 vcipher $out0,$out0,v29 1593 le?vperm $in0,$in0,$in0,$inpperm 1594 vcipher $out1,$out1,v29 1595 le?vperm $in1,$in1,$in1,$inpperm 1596 vcipher $out2,$out2,v29 1597 le?vperm $in2,$in2,$in2,$inpperm 1598 vcipher $out3,$out3,v29 1599 le?vperm $in3,$in3,$in3,$inpperm 1600 vcipher $out4,$out4,v29 1601 le?vperm $in4,$in4,$in4,$inpperm 1602 vcipher $out5,$out5,v29 1603 le?vperm $in5,$in5,$in5,$inpperm 1604 vcipher $out6,$out6,v29 1605 le?vperm $in6,$in6,$in6,$inpperm 1606 vcipher $out7,$out7,v29 1607 le?vperm $in7,$in7,$in7,$inpperm 1608 1609 add $inp,$inp,r0 # $inp is adjusted in such 1610 # way that at exit from the 1611 # loop inX-in7 are loaded 1612 # with last "words" 1613 subfe. r0,r0,r0 # borrow?-1:0 1614 vcipher $out0,$out0,v30 1615 vxor $in0,$in0,v31 # xor with last round key 1616 vcipher $out1,$out1,v30 1617 vxor $in1,$in1,v31 1618 vcipher $out2,$out2,v30 1619 vxor $in2,$in2,v31 1620 vcipher $out3,$out3,v30 1621 vxor $in3,$in3,v31 1622 vcipher $out4,$out4,v30 1623 vxor $in4,$in4,v31 1624 vcipher $out5,$out5,v30 1625 vxor $in5,$in5,v31 1626 vcipher $out6,$out6,v30 1627 vxor $in6,$in6,v31 1628 vcipher $out7,$out7,v30 1629 vxor $in7,$in7,v31 1630 1631 bne Lctr32_enc8x_break # did $len-129 borrow? 1632 1633 vcipherlast $in0,$out0,$in0 1634 vcipherlast $in1,$out1,$in1 1635 vadduwm $out1,$ivec,$one # counter values ... 1636 vcipherlast $in2,$out2,$in2 1637 vadduwm $out2,$ivec,$two 1638 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1639 vcipherlast $in3,$out3,$in3 1640 vadduwm $out3,$out1,$two 1641 vxor $out1,$out1,$rndkey0 1642 vcipherlast $in4,$out4,$in4 1643 vadduwm $out4,$out2,$two 1644 vxor $out2,$out2,$rndkey0 1645 vcipherlast $in5,$out5,$in5 1646 vadduwm $out5,$out3,$two 1647 vxor $out3,$out3,$rndkey0 1648 vcipherlast $in6,$out6,$in6 1649 vadduwm $out6,$out4,$two 1650 vxor $out4,$out4,$rndkey0 1651 vcipherlast $in7,$out7,$in7 1652 vadduwm $out7,$out5,$two 1653 vxor $out5,$out5,$rndkey0 1654 le?vperm $in0,$in0,$in0,$inpperm 1655 vadduwm $ivec,$out6,$two # next counter value 1656 vxor $out6,$out6,$rndkey0 1657 le?vperm $in1,$in1,$in1,$inpperm 1658 vxor $out7,$out7,$rndkey0 1659 mtctr $rounds 1660 1661 vcipher $out0,$out0,v24 1662 stvx_u $in0,$x00,$out 1663 le?vperm $in2,$in2,$in2,$inpperm 1664 vcipher $out1,$out1,v24 1665 stvx_u $in1,$x10,$out 1666 le?vperm $in3,$in3,$in3,$inpperm 1667 vcipher $out2,$out2,v24 1668 stvx_u $in2,$x20,$out 1669 le?vperm $in4,$in4,$in4,$inpperm 1670 vcipher $out3,$out3,v24 1671 stvx_u $in3,$x30,$out 1672 le?vperm $in5,$in5,$in5,$inpperm 1673 vcipher $out4,$out4,v24 1674 stvx_u $in4,$x40,$out 1675 le?vperm $in6,$in6,$in6,$inpperm 1676 vcipher $out5,$out5,v24 1677 stvx_u $in5,$x50,$out 1678 le?vperm $in7,$in7,$in7,$inpperm 1679 vcipher $out6,$out6,v24 1680 stvx_u $in6,$x60,$out 1681 vcipher $out7,$out7,v24 1682 stvx_u $in7,$x70,$out 1683 addi $out,$out,0x80 1684 1685 b Loop_ctr32_enc8x_middle 1686 1687.align 5 1688Lctr32_enc8x_break: 1689 cmpwi $len,-0x60 1690 blt Lctr32_enc8x_one 1691 nop 1692 beq Lctr32_enc8x_two 1693 cmpwi $len,-0x40 1694 blt Lctr32_enc8x_three 1695 nop 1696 beq Lctr32_enc8x_four 1697 cmpwi $len,-0x20 1698 blt Lctr32_enc8x_five 1699 nop 1700 beq Lctr32_enc8x_six 1701 cmpwi $len,0x00 1702 blt Lctr32_enc8x_seven 1703 1704Lctr32_enc8x_eight: 1705 vcipherlast $out0,$out0,$in0 1706 vcipherlast $out1,$out1,$in1 1707 vcipherlast $out2,$out2,$in2 1708 vcipherlast $out3,$out3,$in3 1709 vcipherlast $out4,$out4,$in4 1710 vcipherlast $out5,$out5,$in5 1711 vcipherlast $out6,$out6,$in6 1712 vcipherlast $out7,$out7,$in7 1713 1714 le?vperm $out0,$out0,$out0,$inpperm 1715 le?vperm $out1,$out1,$out1,$inpperm 1716 stvx_u $out0,$x00,$out 1717 le?vperm $out2,$out2,$out2,$inpperm 1718 stvx_u $out1,$x10,$out 1719 le?vperm $out3,$out3,$out3,$inpperm 1720 stvx_u $out2,$x20,$out 1721 le?vperm $out4,$out4,$out4,$inpperm 1722 stvx_u $out3,$x30,$out 1723 le?vperm $out5,$out5,$out5,$inpperm 1724 stvx_u $out4,$x40,$out 1725 le?vperm $out6,$out6,$out6,$inpperm 1726 stvx_u $out5,$x50,$out 1727 le?vperm $out7,$out7,$out7,$inpperm 1728 stvx_u $out6,$x60,$out 1729 stvx_u $out7,$x70,$out 1730 addi $out,$out,0x80 1731 b Lctr32_enc8x_done 1732 1733.align 5 1734Lctr32_enc8x_seven: 1735 vcipherlast $out0,$out0,$in1 1736 vcipherlast $out1,$out1,$in2 1737 vcipherlast $out2,$out2,$in3 1738 vcipherlast $out3,$out3,$in4 1739 vcipherlast $out4,$out4,$in5 1740 vcipherlast $out5,$out5,$in6 1741 vcipherlast $out6,$out6,$in7 1742 1743 le?vperm $out0,$out0,$out0,$inpperm 1744 le?vperm $out1,$out1,$out1,$inpperm 1745 stvx_u $out0,$x00,$out 1746 le?vperm $out2,$out2,$out2,$inpperm 1747 stvx_u $out1,$x10,$out 1748 le?vperm $out3,$out3,$out3,$inpperm 1749 stvx_u $out2,$x20,$out 1750 le?vperm $out4,$out4,$out4,$inpperm 1751 stvx_u $out3,$x30,$out 1752 le?vperm $out5,$out5,$out5,$inpperm 1753 stvx_u $out4,$x40,$out 1754 le?vperm $out6,$out6,$out6,$inpperm 1755 stvx_u $out5,$x50,$out 1756 stvx_u $out6,$x60,$out 1757 addi $out,$out,0x70 1758 b Lctr32_enc8x_done 1759 1760.align 5 1761Lctr32_enc8x_six: 1762 vcipherlast $out0,$out0,$in2 1763 vcipherlast $out1,$out1,$in3 1764 vcipherlast $out2,$out2,$in4 1765 vcipherlast $out3,$out3,$in5 1766 vcipherlast $out4,$out4,$in6 1767 vcipherlast $out5,$out5,$in7 1768 1769 le?vperm $out0,$out0,$out0,$inpperm 1770 le?vperm $out1,$out1,$out1,$inpperm 1771 stvx_u $out0,$x00,$out 1772 le?vperm $out2,$out2,$out2,$inpperm 1773 stvx_u $out1,$x10,$out 1774 le?vperm $out3,$out3,$out3,$inpperm 1775 stvx_u $out2,$x20,$out 1776 le?vperm $out4,$out4,$out4,$inpperm 1777 stvx_u $out3,$x30,$out 1778 le?vperm $out5,$out5,$out5,$inpperm 1779 stvx_u $out4,$x40,$out 1780 stvx_u $out5,$x50,$out 1781 addi $out,$out,0x60 1782 b Lctr32_enc8x_done 1783 1784.align 5 1785Lctr32_enc8x_five: 1786 vcipherlast $out0,$out0,$in3 1787 vcipherlast $out1,$out1,$in4 1788 vcipherlast $out2,$out2,$in5 1789 vcipherlast $out3,$out3,$in6 1790 vcipherlast $out4,$out4,$in7 1791 1792 le?vperm $out0,$out0,$out0,$inpperm 1793 le?vperm $out1,$out1,$out1,$inpperm 1794 stvx_u $out0,$x00,$out 1795 le?vperm $out2,$out2,$out2,$inpperm 1796 stvx_u $out1,$x10,$out 1797 le?vperm $out3,$out3,$out3,$inpperm 1798 stvx_u $out2,$x20,$out 1799 le?vperm $out4,$out4,$out4,$inpperm 1800 stvx_u $out3,$x30,$out 1801 stvx_u $out4,$x40,$out 1802 addi $out,$out,0x50 1803 b Lctr32_enc8x_done 1804 1805.align 5 1806Lctr32_enc8x_four: 1807 vcipherlast $out0,$out0,$in4 1808 vcipherlast $out1,$out1,$in5 1809 vcipherlast $out2,$out2,$in6 1810 vcipherlast $out3,$out3,$in7 1811 1812 le?vperm $out0,$out0,$out0,$inpperm 1813 le?vperm $out1,$out1,$out1,$inpperm 1814 stvx_u $out0,$x00,$out 1815 le?vperm $out2,$out2,$out2,$inpperm 1816 stvx_u $out1,$x10,$out 1817 le?vperm $out3,$out3,$out3,$inpperm 1818 stvx_u $out2,$x20,$out 1819 stvx_u $out3,$x30,$out 1820 addi $out,$out,0x40 1821 b Lctr32_enc8x_done 1822 1823.align 5 1824Lctr32_enc8x_three: 1825 vcipherlast $out0,$out0,$in5 1826 vcipherlast $out1,$out1,$in6 1827 vcipherlast $out2,$out2,$in7 1828 1829 le?vperm $out0,$out0,$out0,$inpperm 1830 le?vperm $out1,$out1,$out1,$inpperm 1831 stvx_u $out0,$x00,$out 1832 le?vperm $out2,$out2,$out2,$inpperm 1833 stvx_u $out1,$x10,$out 1834 stvx_u $out2,$x20,$out 1835 addi $out,$out,0x30 1836 b Lctr32_enc8x_done 1837 1838.align 5 1839Lctr32_enc8x_two: 1840 vcipherlast $out0,$out0,$in6 1841 vcipherlast $out1,$out1,$in7 1842 1843 le?vperm $out0,$out0,$out0,$inpperm 1844 le?vperm $out1,$out1,$out1,$inpperm 1845 stvx_u $out0,$x00,$out 1846 stvx_u $out1,$x10,$out 1847 addi $out,$out,0x20 1848 b Lctr32_enc8x_done 1849 1850.align 5 1851Lctr32_enc8x_one: 1852 vcipherlast $out0,$out0,$in7 1853 1854 le?vperm $out0,$out0,$out0,$inpperm 1855 stvx_u $out0,0,$out 1856 addi $out,$out,0x10 1857 1858Lctr32_enc8x_done: 1859 li r10,`$FRAME+15` 1860 li r11,`$FRAME+31` 1861 stvx $inpperm,r10,$sp # wipe copies of round keys 1862 addi r10,r10,32 1863 stvx $inpperm,r11,$sp 1864 addi r11,r11,32 1865 stvx $inpperm,r10,$sp 1866 addi r10,r10,32 1867 stvx $inpperm,r11,$sp 1868 addi r11,r11,32 1869 stvx $inpperm,r10,$sp 1870 addi r10,r10,32 1871 stvx $inpperm,r11,$sp 1872 addi r11,r11,32 1873 stvx $inpperm,r10,$sp 1874 addi r10,r10,32 1875 stvx $inpperm,r11,$sp 1876 addi r11,r11,32 1877 1878 mtspr 256,$vrsave 1879 lvx v20,r10,$sp # ABI says so 1880 addi r10,r10,32 1881 lvx v21,r11,$sp 1882 addi r11,r11,32 1883 lvx v22,r10,$sp 1884 addi r10,r10,32 1885 lvx v23,r11,$sp 1886 addi r11,r11,32 1887 lvx v24,r10,$sp 1888 addi r10,r10,32 1889 lvx v25,r11,$sp 1890 addi r11,r11,32 1891 lvx v26,r10,$sp 1892 addi r10,r10,32 1893 lvx v27,r11,$sp 1894 addi r11,r11,32 1895 lvx v28,r10,$sp 1896 addi r10,r10,32 1897 lvx v29,r11,$sp 1898 addi r11,r11,32 1899 lvx v30,r10,$sp 1900 lvx v31,r11,$sp 1901 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1902 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1903 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1904 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1905 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1906 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1907 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1908 blr 1909 .long 0 1910 .byte 0,12,0x04,0,0x80,6,6,0 1911 .long 0 1912.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1913___ 1914}} }}} 1915 1916######################################################################### 1917{{{ # XTS procedures # 1918# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # 1919# const AES_KEY *key1, const AES_KEY *key2, # 1920# [const] unsigned char iv[16]); # 1921# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # 1922# input tweak value is assumed to be encrypted already, and last tweak # 1923# value, one suitable for consecutive call on same chunk of data, is # 1924# written back to original buffer. In addition, in "tweak chaining" # 1925# mode only complete input blocks are processed. # 1926 1927my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); 1928my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); 1929my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); 1930my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); 1931my $taillen = $key2; 1932 1933 ($inp,$idx) = ($idx,$inp); # reassign 1934 1935$code.=<<___; 1936.globl .${prefix}_xts_encrypt 1937.align 5 1938.${prefix}_xts_encrypt: 1939 mr $inp,r3 # reassign 1940 li r3,-1 1941 ${UCMP}i $len,16 1942 bltlr- 1943 1944 lis r0,0xfff0 1945 mfspr r12,256 # save vrsave 1946 li r11,0 1947 mtspr 256,r0 1948 1949 vspltisb $seven,0x07 # 0x070707..07 1950 le?lvsl $leperm,r11,r11 1951 le?vspltisb $tmp,0x0f 1952 le?vxor $leperm,$leperm,$seven 1953 1954 li $idx,15 1955 lvx $tweak,0,$ivp # load [unaligned] iv 1956 lvsl $inpperm,0,$ivp 1957 lvx $inptail,$idx,$ivp 1958 le?vxor $inpperm,$inpperm,$tmp 1959 vperm $tweak,$tweak,$inptail,$inpperm 1960 1961 neg r11,$inp 1962 lvsr $inpperm,0,r11 # prepare for unaligned load 1963 lvx $inout,0,$inp 1964 addi $inp,$inp,15 # 15 is not typo 1965 le?vxor $inpperm,$inpperm,$tmp 1966 1967 ${UCMP}i $key2,0 # key2==NULL? 1968 beq Lxts_enc_no_key2 1969 1970 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 1971 lwz $rounds,240($key2) 1972 srwi $rounds,$rounds,1 1973 subi $rounds,$rounds,1 1974 li $idx,16 1975 1976 lvx $rndkey0,0,$key2 1977 lvx $rndkey1,$idx,$key2 1978 addi $idx,$idx,16 1979 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1980 vxor $tweak,$tweak,$rndkey0 1981 lvx $rndkey0,$idx,$key2 1982 addi $idx,$idx,16 1983 mtctr $rounds 1984 1985Ltweak_xts_enc: 1986 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1987 vcipher $tweak,$tweak,$rndkey1 1988 lvx $rndkey1,$idx,$key2 1989 addi $idx,$idx,16 1990 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1991 vcipher $tweak,$tweak,$rndkey0 1992 lvx $rndkey0,$idx,$key2 1993 addi $idx,$idx,16 1994 bdnz Ltweak_xts_enc 1995 1996 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1997 vcipher $tweak,$tweak,$rndkey1 1998 lvx $rndkey1,$idx,$key2 1999 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2000 vcipherlast $tweak,$tweak,$rndkey0 2001 2002 li $ivp,0 # don't chain the tweak 2003 b Lxts_enc 2004 2005Lxts_enc_no_key2: 2006 li $idx,-16 2007 and $len,$len,$idx # in "tweak chaining" 2008 # mode only complete 2009 # blocks are processed 2010Lxts_enc: 2011 lvx $inptail,0,$inp 2012 addi $inp,$inp,16 2013 2014 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2015 lwz $rounds,240($key1) 2016 srwi $rounds,$rounds,1 2017 subi $rounds,$rounds,1 2018 li $idx,16 2019 2020 vslb $eighty7,$seven,$seven # 0x808080..80 2021 vor $eighty7,$eighty7,$seven # 0x878787..87 2022 vspltisb $tmp,1 # 0x010101..01 2023 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2024 2025 ${UCMP}i $len,96 2026 bge _aesp8_xts_encrypt6x 2027 2028 andi. $taillen,$len,15 2029 subic r0,$len,32 2030 subi $taillen,$taillen,16 2031 subfe r0,r0,r0 2032 and r0,r0,$taillen 2033 add $inp,$inp,r0 2034 2035 lvx $rndkey0,0,$key1 2036 lvx $rndkey1,$idx,$key1 2037 addi $idx,$idx,16 2038 vperm $inout,$inout,$inptail,$inpperm 2039 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2040 vxor $inout,$inout,$tweak 2041 vxor $inout,$inout,$rndkey0 2042 lvx $rndkey0,$idx,$key1 2043 addi $idx,$idx,16 2044 mtctr $rounds 2045 b Loop_xts_enc 2046 2047.align 5 2048Loop_xts_enc: 2049 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2050 vcipher $inout,$inout,$rndkey1 2051 lvx $rndkey1,$idx,$key1 2052 addi $idx,$idx,16 2053 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2054 vcipher $inout,$inout,$rndkey0 2055 lvx $rndkey0,$idx,$key1 2056 addi $idx,$idx,16 2057 bdnz Loop_xts_enc 2058 2059 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2060 vcipher $inout,$inout,$rndkey1 2061 lvx $rndkey1,$idx,$key1 2062 li $idx,16 2063 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2064 vxor $rndkey0,$rndkey0,$tweak 2065 vcipherlast $output,$inout,$rndkey0 2066 2067 le?vperm $tmp,$output,$output,$leperm 2068 be?nop 2069 le?stvx_u $tmp,0,$out 2070 be?stvx_u $output,0,$out 2071 addi $out,$out,16 2072 2073 subic. $len,$len,16 2074 beq Lxts_enc_done 2075 2076 vmr $inout,$inptail 2077 lvx $inptail,0,$inp 2078 addi $inp,$inp,16 2079 lvx $rndkey0,0,$key1 2080 lvx $rndkey1,$idx,$key1 2081 addi $idx,$idx,16 2082 2083 subic r0,$len,32 2084 subfe r0,r0,r0 2085 and r0,r0,$taillen 2086 add $inp,$inp,r0 2087 2088 vsrab $tmp,$tweak,$seven # next tweak value 2089 vaddubm $tweak,$tweak,$tweak 2090 vsldoi $tmp,$tmp,$tmp,15 2091 vand $tmp,$tmp,$eighty7 2092 vxor $tweak,$tweak,$tmp 2093 2094 vperm $inout,$inout,$inptail,$inpperm 2095 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2096 vxor $inout,$inout,$tweak 2097 vxor $output,$output,$rndkey0 # just in case $len<16 2098 vxor $inout,$inout,$rndkey0 2099 lvx $rndkey0,$idx,$key1 2100 addi $idx,$idx,16 2101 2102 mtctr $rounds 2103 ${UCMP}i $len,16 2104 bge Loop_xts_enc 2105 2106 vxor $output,$output,$tweak 2107 lvsr $inpperm,0,$len # $inpperm is no longer needed 2108 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2109 vspltisb $tmp,-1 2110 vperm $inptail,$inptail,$tmp,$inpperm 2111 vsel $inout,$inout,$output,$inptail 2112 2113 subi r11,$out,17 2114 subi $out,$out,16 2115 mtctr $len 2116 li $len,16 2117Loop_xts_enc_steal: 2118 lbzu r0,1(r11) 2119 stb r0,16(r11) 2120 bdnz Loop_xts_enc_steal 2121 2122 mtctr $rounds 2123 b Loop_xts_enc # one more time... 2124 2125Lxts_enc_done: 2126 ${UCMP}i $ivp,0 2127 beq Lxts_enc_ret 2128 2129 vsrab $tmp,$tweak,$seven # next tweak value 2130 vaddubm $tweak,$tweak,$tweak 2131 vsldoi $tmp,$tmp,$tmp,15 2132 vand $tmp,$tmp,$eighty7 2133 vxor $tweak,$tweak,$tmp 2134 2135 le?vperm $tweak,$tweak,$tweak,$leperm 2136 stvx_u $tweak,0,$ivp 2137 2138Lxts_enc_ret: 2139 mtspr 256,r12 # restore vrsave 2140 li r3,0 2141 blr 2142 .long 0 2143 .byte 0,12,0x04,0,0x80,6,6,0 2144 .long 0 2145.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt 2146 2147.globl .${prefix}_xts_decrypt 2148.align 5 2149.${prefix}_xts_decrypt: 2150 mr $inp,r3 # reassign 2151 li r3,-1 2152 ${UCMP}i $len,16 2153 bltlr- 2154 2155 lis r0,0xfff8 2156 mfspr r12,256 # save vrsave 2157 li r11,0 2158 mtspr 256,r0 2159 2160 andi. r0,$len,15 2161 neg r0,r0 2162 andi. r0,r0,16 2163 sub $len,$len,r0 2164 2165 vspltisb $seven,0x07 # 0x070707..07 2166 le?lvsl $leperm,r11,r11 2167 le?vspltisb $tmp,0x0f 2168 le?vxor $leperm,$leperm,$seven 2169 2170 li $idx,15 2171 lvx $tweak,0,$ivp # load [unaligned] iv 2172 lvsl $inpperm,0,$ivp 2173 lvx $inptail,$idx,$ivp 2174 le?vxor $inpperm,$inpperm,$tmp 2175 vperm $tweak,$tweak,$inptail,$inpperm 2176 2177 neg r11,$inp 2178 lvsr $inpperm,0,r11 # prepare for unaligned load 2179 lvx $inout,0,$inp 2180 addi $inp,$inp,15 # 15 is not typo 2181 le?vxor $inpperm,$inpperm,$tmp 2182 2183 ${UCMP}i $key2,0 # key2==NULL? 2184 beq Lxts_dec_no_key2 2185 2186 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2187 lwz $rounds,240($key2) 2188 srwi $rounds,$rounds,1 2189 subi $rounds,$rounds,1 2190 li $idx,16 2191 2192 lvx $rndkey0,0,$key2 2193 lvx $rndkey1,$idx,$key2 2194 addi $idx,$idx,16 2195 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2196 vxor $tweak,$tweak,$rndkey0 2197 lvx $rndkey0,$idx,$key2 2198 addi $idx,$idx,16 2199 mtctr $rounds 2200 2201Ltweak_xts_dec: 2202 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2203 vcipher $tweak,$tweak,$rndkey1 2204 lvx $rndkey1,$idx,$key2 2205 addi $idx,$idx,16 2206 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2207 vcipher $tweak,$tweak,$rndkey0 2208 lvx $rndkey0,$idx,$key2 2209 addi $idx,$idx,16 2210 bdnz Ltweak_xts_dec 2211 2212 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2213 vcipher $tweak,$tweak,$rndkey1 2214 lvx $rndkey1,$idx,$key2 2215 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2216 vcipherlast $tweak,$tweak,$rndkey0 2217 2218 li $ivp,0 # don't chain the tweak 2219 b Lxts_dec 2220 2221Lxts_dec_no_key2: 2222 neg $idx,$len 2223 andi. $idx,$idx,15 2224 add $len,$len,$idx # in "tweak chaining" 2225 # mode only complete 2226 # blocks are processed 2227Lxts_dec: 2228 lvx $inptail,0,$inp 2229 addi $inp,$inp,16 2230 2231 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2232 lwz $rounds,240($key1) 2233 srwi $rounds,$rounds,1 2234 subi $rounds,$rounds,1 2235 li $idx,16 2236 2237 vslb $eighty7,$seven,$seven # 0x808080..80 2238 vor $eighty7,$eighty7,$seven # 0x878787..87 2239 vspltisb $tmp,1 # 0x010101..01 2240 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2241 2242 ${UCMP}i $len,96 2243 bge _aesp8_xts_decrypt6x 2244 2245 lvx $rndkey0,0,$key1 2246 lvx $rndkey1,$idx,$key1 2247 addi $idx,$idx,16 2248 vperm $inout,$inout,$inptail,$inpperm 2249 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2250 vxor $inout,$inout,$tweak 2251 vxor $inout,$inout,$rndkey0 2252 lvx $rndkey0,$idx,$key1 2253 addi $idx,$idx,16 2254 mtctr $rounds 2255 2256 ${UCMP}i $len,16 2257 blt Ltail_xts_dec 2258 be?b Loop_xts_dec 2259 2260.align 5 2261Loop_xts_dec: 2262 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2263 vncipher $inout,$inout,$rndkey1 2264 lvx $rndkey1,$idx,$key1 2265 addi $idx,$idx,16 2266 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2267 vncipher $inout,$inout,$rndkey0 2268 lvx $rndkey0,$idx,$key1 2269 addi $idx,$idx,16 2270 bdnz Loop_xts_dec 2271 2272 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2273 vncipher $inout,$inout,$rndkey1 2274 lvx $rndkey1,$idx,$key1 2275 li $idx,16 2276 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2277 vxor $rndkey0,$rndkey0,$tweak 2278 vncipherlast $output,$inout,$rndkey0 2279 2280 le?vperm $tmp,$output,$output,$leperm 2281 be?nop 2282 le?stvx_u $tmp,0,$out 2283 be?stvx_u $output,0,$out 2284 addi $out,$out,16 2285 2286 subic. $len,$len,16 2287 beq Lxts_dec_done 2288 2289 vmr $inout,$inptail 2290 lvx $inptail,0,$inp 2291 addi $inp,$inp,16 2292 lvx $rndkey0,0,$key1 2293 lvx $rndkey1,$idx,$key1 2294 addi $idx,$idx,16 2295 2296 vsrab $tmp,$tweak,$seven # next tweak value 2297 vaddubm $tweak,$tweak,$tweak 2298 vsldoi $tmp,$tmp,$tmp,15 2299 vand $tmp,$tmp,$eighty7 2300 vxor $tweak,$tweak,$tmp 2301 2302 vperm $inout,$inout,$inptail,$inpperm 2303 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2304 vxor $inout,$inout,$tweak 2305 vxor $inout,$inout,$rndkey0 2306 lvx $rndkey0,$idx,$key1 2307 addi $idx,$idx,16 2308 2309 mtctr $rounds 2310 ${UCMP}i $len,16 2311 bge Loop_xts_dec 2312 2313Ltail_xts_dec: 2314 vsrab $tmp,$tweak,$seven # next tweak value 2315 vaddubm $tweak1,$tweak,$tweak 2316 vsldoi $tmp,$tmp,$tmp,15 2317 vand $tmp,$tmp,$eighty7 2318 vxor $tweak1,$tweak1,$tmp 2319 2320 subi $inp,$inp,16 2321 add $inp,$inp,$len 2322 2323 vxor $inout,$inout,$tweak # :-( 2324 vxor $inout,$inout,$tweak1 # :-) 2325 2326Loop_xts_dec_short: 2327 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2328 vncipher $inout,$inout,$rndkey1 2329 lvx $rndkey1,$idx,$key1 2330 addi $idx,$idx,16 2331 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2332 vncipher $inout,$inout,$rndkey0 2333 lvx $rndkey0,$idx,$key1 2334 addi $idx,$idx,16 2335 bdnz Loop_xts_dec_short 2336 2337 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2338 vncipher $inout,$inout,$rndkey1 2339 lvx $rndkey1,$idx,$key1 2340 li $idx,16 2341 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2342 vxor $rndkey0,$rndkey0,$tweak1 2343 vncipherlast $output,$inout,$rndkey0 2344 2345 le?vperm $tmp,$output,$output,$leperm 2346 be?nop 2347 le?stvx_u $tmp,0,$out 2348 be?stvx_u $output,0,$out 2349 2350 vmr $inout,$inptail 2351 lvx $inptail,0,$inp 2352 #addi $inp,$inp,16 2353 lvx $rndkey0,0,$key1 2354 lvx $rndkey1,$idx,$key1 2355 addi $idx,$idx,16 2356 vperm $inout,$inout,$inptail,$inpperm 2357 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2358 2359 lvsr $inpperm,0,$len # $inpperm is no longer needed 2360 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2361 vspltisb $tmp,-1 2362 vperm $inptail,$inptail,$tmp,$inpperm 2363 vsel $inout,$inout,$output,$inptail 2364 2365 vxor $rndkey0,$rndkey0,$tweak 2366 vxor $inout,$inout,$rndkey0 2367 lvx $rndkey0,$idx,$key1 2368 addi $idx,$idx,16 2369 2370 subi r11,$out,1 2371 mtctr $len 2372 li $len,16 2373Loop_xts_dec_steal: 2374 lbzu r0,1(r11) 2375 stb r0,16(r11) 2376 bdnz Loop_xts_dec_steal 2377 2378 mtctr $rounds 2379 b Loop_xts_dec # one more time... 2380 2381Lxts_dec_done: 2382 ${UCMP}i $ivp,0 2383 beq Lxts_dec_ret 2384 2385 vsrab $tmp,$tweak,$seven # next tweak value 2386 vaddubm $tweak,$tweak,$tweak 2387 vsldoi $tmp,$tmp,$tmp,15 2388 vand $tmp,$tmp,$eighty7 2389 vxor $tweak,$tweak,$tmp 2390 2391 le?vperm $tweak,$tweak,$tweak,$leperm 2392 stvx_u $tweak,0,$ivp 2393 2394Lxts_dec_ret: 2395 mtspr 256,r12 # restore vrsave 2396 li r3,0 2397 blr 2398 .long 0 2399 .byte 0,12,0x04,0,0x80,6,6,0 2400 .long 0 2401.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt 2402___ 2403######################################################################### 2404{{ # Optimized XTS procedures # 2405my $key_=$key2; 2406my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); 2407 $x00=0 if ($flavour =~ /osx/); 2408my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); 2409my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); 2410my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); 2411my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 2412 # v26-v31 last 6 round keys 2413my ($keyperm)=($out0); # aliases with "caller", redundant assignment 2414my $taillen=$x70; 2415 2416$code.=<<___; 2417.align 5 2418_aesp8_xts_encrypt6x: 2419 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 2420 mflr r11 2421 li r7,`$FRAME+8*16+15` 2422 li r3,`$FRAME+8*16+31` 2423 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 2424 stvx v20,r7,$sp # ABI says so 2425 addi r7,r7,32 2426 stvx v21,r3,$sp 2427 addi r3,r3,32 2428 stvx v22,r7,$sp 2429 addi r7,r7,32 2430 stvx v23,r3,$sp 2431 addi r3,r3,32 2432 stvx v24,r7,$sp 2433 addi r7,r7,32 2434 stvx v25,r3,$sp 2435 addi r3,r3,32 2436 stvx v26,r7,$sp 2437 addi r7,r7,32 2438 stvx v27,r3,$sp 2439 addi r3,r3,32 2440 stvx v28,r7,$sp 2441 addi r7,r7,32 2442 stvx v29,r3,$sp 2443 addi r3,r3,32 2444 stvx v30,r7,$sp 2445 stvx v31,r3,$sp 2446 li r0,-1 2447 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 2448 li $x10,0x10 2449 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2450 li $x20,0x20 2451 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2452 li $x30,0x30 2453 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2454 li $x40,0x40 2455 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2456 li $x50,0x50 2457 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2458 li $x60,0x60 2459 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2460 li $x70,0x70 2461 mtspr 256,r0 2462 2463 subi $rounds,$rounds,3 # -4 in total 2464 2465 lvx $rndkey0,$x00,$key1 # load key schedule 2466 lvx v30,$x10,$key1 2467 addi $key1,$key1,0x20 2468 lvx v31,$x00,$key1 2469 ?vperm $rndkey0,$rndkey0,v30,$keyperm 2470 addi $key_,$sp,$FRAME+15 2471 mtctr $rounds 2472 2473Load_xts_enc_key: 2474 ?vperm v24,v30,v31,$keyperm 2475 lvx v30,$x10,$key1 2476 addi $key1,$key1,0x20 2477 stvx v24,$x00,$key_ # off-load round[1] 2478 ?vperm v25,v31,v30,$keyperm 2479 lvx v31,$x00,$key1 2480 stvx v25,$x10,$key_ # off-load round[2] 2481 addi $key_,$key_,0x20 2482 bdnz Load_xts_enc_key 2483 2484 lvx v26,$x10,$key1 2485 ?vperm v24,v30,v31,$keyperm 2486 lvx v27,$x20,$key1 2487 stvx v24,$x00,$key_ # off-load round[3] 2488 ?vperm v25,v31,v26,$keyperm 2489 lvx v28,$x30,$key1 2490 stvx v25,$x10,$key_ # off-load round[4] 2491 addi $key_,$sp,$FRAME+15 # rewind $key_ 2492 ?vperm v26,v26,v27,$keyperm 2493 lvx v29,$x40,$key1 2494 ?vperm v27,v27,v28,$keyperm 2495 lvx v30,$x50,$key1 2496 ?vperm v28,v28,v29,$keyperm 2497 lvx v31,$x60,$key1 2498 ?vperm v29,v29,v30,$keyperm 2499 lvx $twk5,$x70,$key1 # borrow $twk5 2500 ?vperm v30,v30,v31,$keyperm 2501 lvx v24,$x00,$key_ # pre-load round[1] 2502 ?vperm v31,v31,$twk5,$keyperm 2503 lvx v25,$x10,$key_ # pre-load round[2] 2504 2505 vperm $in0,$inout,$inptail,$inpperm 2506 subi $inp,$inp,31 # undo "caller" 2507 vxor $twk0,$tweak,$rndkey0 2508 vsrab $tmp,$tweak,$seven # next tweak value 2509 vaddubm $tweak,$tweak,$tweak 2510 vsldoi $tmp,$tmp,$tmp,15 2511 vand $tmp,$tmp,$eighty7 2512 vxor $out0,$in0,$twk0 2513 vxor $tweak,$tweak,$tmp 2514 2515 lvx_u $in1,$x10,$inp 2516 vxor $twk1,$tweak,$rndkey0 2517 vsrab $tmp,$tweak,$seven # next tweak value 2518 vaddubm $tweak,$tweak,$tweak 2519 vsldoi $tmp,$tmp,$tmp,15 2520 le?vperm $in1,$in1,$in1,$leperm 2521 vand $tmp,$tmp,$eighty7 2522 vxor $out1,$in1,$twk1 2523 vxor $tweak,$tweak,$tmp 2524 2525 lvx_u $in2,$x20,$inp 2526 andi. $taillen,$len,15 2527 vxor $twk2,$tweak,$rndkey0 2528 vsrab $tmp,$tweak,$seven # next tweak value 2529 vaddubm $tweak,$tweak,$tweak 2530 vsldoi $tmp,$tmp,$tmp,15 2531 le?vperm $in2,$in2,$in2,$leperm 2532 vand $tmp,$tmp,$eighty7 2533 vxor $out2,$in2,$twk2 2534 vxor $tweak,$tweak,$tmp 2535 2536 lvx_u $in3,$x30,$inp 2537 sub $len,$len,$taillen 2538 vxor $twk3,$tweak,$rndkey0 2539 vsrab $tmp,$tweak,$seven # next tweak value 2540 vaddubm $tweak,$tweak,$tweak 2541 vsldoi $tmp,$tmp,$tmp,15 2542 le?vperm $in3,$in3,$in3,$leperm 2543 vand $tmp,$tmp,$eighty7 2544 vxor $out3,$in3,$twk3 2545 vxor $tweak,$tweak,$tmp 2546 2547 lvx_u $in4,$x40,$inp 2548 subi $len,$len,0x60 2549 vxor $twk4,$tweak,$rndkey0 2550 vsrab $tmp,$tweak,$seven # next tweak value 2551 vaddubm $tweak,$tweak,$tweak 2552 vsldoi $tmp,$tmp,$tmp,15 2553 le?vperm $in4,$in4,$in4,$leperm 2554 vand $tmp,$tmp,$eighty7 2555 vxor $out4,$in4,$twk4 2556 vxor $tweak,$tweak,$tmp 2557 2558 lvx_u $in5,$x50,$inp 2559 addi $inp,$inp,0x60 2560 vxor $twk5,$tweak,$rndkey0 2561 vsrab $tmp,$tweak,$seven # next tweak value 2562 vaddubm $tweak,$tweak,$tweak 2563 vsldoi $tmp,$tmp,$tmp,15 2564 le?vperm $in5,$in5,$in5,$leperm 2565 vand $tmp,$tmp,$eighty7 2566 vxor $out5,$in5,$twk5 2567 vxor $tweak,$tweak,$tmp 2568 2569 vxor v31,v31,$rndkey0 2570 mtctr $rounds 2571 b Loop_xts_enc6x 2572 2573.align 5 2574Loop_xts_enc6x: 2575 vcipher $out0,$out0,v24 2576 vcipher $out1,$out1,v24 2577 vcipher $out2,$out2,v24 2578 vcipher $out3,$out3,v24 2579 vcipher $out4,$out4,v24 2580 vcipher $out5,$out5,v24 2581 lvx v24,$x20,$key_ # round[3] 2582 addi $key_,$key_,0x20 2583 2584 vcipher $out0,$out0,v25 2585 vcipher $out1,$out1,v25 2586 vcipher $out2,$out2,v25 2587 vcipher $out3,$out3,v25 2588 vcipher $out4,$out4,v25 2589 vcipher $out5,$out5,v25 2590 lvx v25,$x10,$key_ # round[4] 2591 bdnz Loop_xts_enc6x 2592 2593 subic $len,$len,96 # $len-=96 2594 vxor $in0,$twk0,v31 # xor with last round key 2595 vcipher $out0,$out0,v24 2596 vcipher $out1,$out1,v24 2597 vsrab $tmp,$tweak,$seven # next tweak value 2598 vxor $twk0,$tweak,$rndkey0 2599 vaddubm $tweak,$tweak,$tweak 2600 vcipher $out2,$out2,v24 2601 vcipher $out3,$out3,v24 2602 vsldoi $tmp,$tmp,$tmp,15 2603 vcipher $out4,$out4,v24 2604 vcipher $out5,$out5,v24 2605 2606 subfe. r0,r0,r0 # borrow?-1:0 2607 vand $tmp,$tmp,$eighty7 2608 vcipher $out0,$out0,v25 2609 vcipher $out1,$out1,v25 2610 vxor $tweak,$tweak,$tmp 2611 vcipher $out2,$out2,v25 2612 vcipher $out3,$out3,v25 2613 vxor $in1,$twk1,v31 2614 vsrab $tmp,$tweak,$seven # next tweak value 2615 vxor $twk1,$tweak,$rndkey0 2616 vcipher $out4,$out4,v25 2617 vcipher $out5,$out5,v25 2618 2619 and r0,r0,$len 2620 vaddubm $tweak,$tweak,$tweak 2621 vsldoi $tmp,$tmp,$tmp,15 2622 vcipher $out0,$out0,v26 2623 vcipher $out1,$out1,v26 2624 vand $tmp,$tmp,$eighty7 2625 vcipher $out2,$out2,v26 2626 vcipher $out3,$out3,v26 2627 vxor $tweak,$tweak,$tmp 2628 vcipher $out4,$out4,v26 2629 vcipher $out5,$out5,v26 2630 2631 add $inp,$inp,r0 # $inp is adjusted in such 2632 # way that at exit from the 2633 # loop inX-in5 are loaded 2634 # with last "words" 2635 vxor $in2,$twk2,v31 2636 vsrab $tmp,$tweak,$seven # next tweak value 2637 vxor $twk2,$tweak,$rndkey0 2638 vaddubm $tweak,$tweak,$tweak 2639 vcipher $out0,$out0,v27 2640 vcipher $out1,$out1,v27 2641 vsldoi $tmp,$tmp,$tmp,15 2642 vcipher $out2,$out2,v27 2643 vcipher $out3,$out3,v27 2644 vand $tmp,$tmp,$eighty7 2645 vcipher $out4,$out4,v27 2646 vcipher $out5,$out5,v27 2647 2648 addi $key_,$sp,$FRAME+15 # rewind $key_ 2649 vxor $tweak,$tweak,$tmp 2650 vcipher $out0,$out0,v28 2651 vcipher $out1,$out1,v28 2652 vxor $in3,$twk3,v31 2653 vsrab $tmp,$tweak,$seven # next tweak value 2654 vxor $twk3,$tweak,$rndkey0 2655 vcipher $out2,$out2,v28 2656 vcipher $out3,$out3,v28 2657 vaddubm $tweak,$tweak,$tweak 2658 vsldoi $tmp,$tmp,$tmp,15 2659 vcipher $out4,$out4,v28 2660 vcipher $out5,$out5,v28 2661 lvx v24,$x00,$key_ # re-pre-load round[1] 2662 vand $tmp,$tmp,$eighty7 2663 2664 vcipher $out0,$out0,v29 2665 vcipher $out1,$out1,v29 2666 vxor $tweak,$tweak,$tmp 2667 vcipher $out2,$out2,v29 2668 vcipher $out3,$out3,v29 2669 vxor $in4,$twk4,v31 2670 vsrab $tmp,$tweak,$seven # next tweak value 2671 vxor $twk4,$tweak,$rndkey0 2672 vcipher $out4,$out4,v29 2673 vcipher $out5,$out5,v29 2674 lvx v25,$x10,$key_ # re-pre-load round[2] 2675 vaddubm $tweak,$tweak,$tweak 2676 vsldoi $tmp,$tmp,$tmp,15 2677 2678 vcipher $out0,$out0,v30 2679 vcipher $out1,$out1,v30 2680 vand $tmp,$tmp,$eighty7 2681 vcipher $out2,$out2,v30 2682 vcipher $out3,$out3,v30 2683 vxor $tweak,$tweak,$tmp 2684 vcipher $out4,$out4,v30 2685 vcipher $out5,$out5,v30 2686 vxor $in5,$twk5,v31 2687 vsrab $tmp,$tweak,$seven # next tweak value 2688 vxor $twk5,$tweak,$rndkey0 2689 2690 vcipherlast $out0,$out0,$in0 2691 lvx_u $in0,$x00,$inp # load next input block 2692 vaddubm $tweak,$tweak,$tweak 2693 vsldoi $tmp,$tmp,$tmp,15 2694 vcipherlast $out1,$out1,$in1 2695 lvx_u $in1,$x10,$inp 2696 vcipherlast $out2,$out2,$in2 2697 le?vperm $in0,$in0,$in0,$leperm 2698 lvx_u $in2,$x20,$inp 2699 vand $tmp,$tmp,$eighty7 2700 vcipherlast $out3,$out3,$in3 2701 le?vperm $in1,$in1,$in1,$leperm 2702 lvx_u $in3,$x30,$inp 2703 vcipherlast $out4,$out4,$in4 2704 le?vperm $in2,$in2,$in2,$leperm 2705 lvx_u $in4,$x40,$inp 2706 vxor $tweak,$tweak,$tmp 2707 vcipherlast $tmp,$out5,$in5 # last block might be needed 2708 # in stealing mode 2709 le?vperm $in3,$in3,$in3,$leperm 2710 lvx_u $in5,$x50,$inp 2711 addi $inp,$inp,0x60 2712 le?vperm $in4,$in4,$in4,$leperm 2713 le?vperm $in5,$in5,$in5,$leperm 2714 2715 le?vperm $out0,$out0,$out0,$leperm 2716 le?vperm $out1,$out1,$out1,$leperm 2717 stvx_u $out0,$x00,$out # store output 2718 vxor $out0,$in0,$twk0 2719 le?vperm $out2,$out2,$out2,$leperm 2720 stvx_u $out1,$x10,$out 2721 vxor $out1,$in1,$twk1 2722 le?vperm $out3,$out3,$out3,$leperm 2723 stvx_u $out2,$x20,$out 2724 vxor $out2,$in2,$twk2 2725 le?vperm $out4,$out4,$out4,$leperm 2726 stvx_u $out3,$x30,$out 2727 vxor $out3,$in3,$twk3 2728 le?vperm $out5,$tmp,$tmp,$leperm 2729 stvx_u $out4,$x40,$out 2730 vxor $out4,$in4,$twk4 2731 le?stvx_u $out5,$x50,$out 2732 be?stvx_u $tmp, $x50,$out 2733 vxor $out5,$in5,$twk5 2734 addi $out,$out,0x60 2735 2736 mtctr $rounds 2737 beq Loop_xts_enc6x # did $len-=96 borrow? 2738 2739 addic. $len,$len,0x60 2740 beq Lxts_enc6x_zero 2741 cmpwi $len,0x20 2742 blt Lxts_enc6x_one 2743 nop 2744 beq Lxts_enc6x_two 2745 cmpwi $len,0x40 2746 blt Lxts_enc6x_three 2747 nop 2748 beq Lxts_enc6x_four 2749 2750Lxts_enc6x_five: 2751 vxor $out0,$in1,$twk0 2752 vxor $out1,$in2,$twk1 2753 vxor $out2,$in3,$twk2 2754 vxor $out3,$in4,$twk3 2755 vxor $out4,$in5,$twk4 2756 2757 bl _aesp8_xts_enc5x 2758 2759 le?vperm $out0,$out0,$out0,$leperm 2760 vmr $twk0,$twk5 # unused tweak 2761 le?vperm $out1,$out1,$out1,$leperm 2762 stvx_u $out0,$x00,$out # store output 2763 le?vperm $out2,$out2,$out2,$leperm 2764 stvx_u $out1,$x10,$out 2765 le?vperm $out3,$out3,$out3,$leperm 2766 stvx_u $out2,$x20,$out 2767 vxor $tmp,$out4,$twk5 # last block prep for stealing 2768 le?vperm $out4,$out4,$out4,$leperm 2769 stvx_u $out3,$x30,$out 2770 stvx_u $out4,$x40,$out 2771 addi $out,$out,0x50 2772 bne Lxts_enc6x_steal 2773 b Lxts_enc6x_done 2774 2775.align 4 2776Lxts_enc6x_four: 2777 vxor $out0,$in2,$twk0 2778 vxor $out1,$in3,$twk1 2779 vxor $out2,$in4,$twk2 2780 vxor $out3,$in5,$twk3 2781 vxor $out4,$out4,$out4 2782 2783 bl _aesp8_xts_enc5x 2784 2785 le?vperm $out0,$out0,$out0,$leperm 2786 vmr $twk0,$twk4 # unused tweak 2787 le?vperm $out1,$out1,$out1,$leperm 2788 stvx_u $out0,$x00,$out # store output 2789 le?vperm $out2,$out2,$out2,$leperm 2790 stvx_u $out1,$x10,$out 2791 vxor $tmp,$out3,$twk4 # last block prep for stealing 2792 le?vperm $out3,$out3,$out3,$leperm 2793 stvx_u $out2,$x20,$out 2794 stvx_u $out3,$x30,$out 2795 addi $out,$out,0x40 2796 bne Lxts_enc6x_steal 2797 b Lxts_enc6x_done 2798 2799.align 4 2800Lxts_enc6x_three: 2801 vxor $out0,$in3,$twk0 2802 vxor $out1,$in4,$twk1 2803 vxor $out2,$in5,$twk2 2804 vxor $out3,$out3,$out3 2805 vxor $out4,$out4,$out4 2806 2807 bl _aesp8_xts_enc5x 2808 2809 le?vperm $out0,$out0,$out0,$leperm 2810 vmr $twk0,$twk3 # unused tweak 2811 le?vperm $out1,$out1,$out1,$leperm 2812 stvx_u $out0,$x00,$out # store output 2813 vxor $tmp,$out2,$twk3 # last block prep for stealing 2814 le?vperm $out2,$out2,$out2,$leperm 2815 stvx_u $out1,$x10,$out 2816 stvx_u $out2,$x20,$out 2817 addi $out,$out,0x30 2818 bne Lxts_enc6x_steal 2819 b Lxts_enc6x_done 2820 2821.align 4 2822Lxts_enc6x_two: 2823 vxor $out0,$in4,$twk0 2824 vxor $out1,$in5,$twk1 2825 vxor $out2,$out2,$out2 2826 vxor $out3,$out3,$out3 2827 vxor $out4,$out4,$out4 2828 2829 bl _aesp8_xts_enc5x 2830 2831 le?vperm $out0,$out0,$out0,$leperm 2832 vmr $twk0,$twk2 # unused tweak 2833 vxor $tmp,$out1,$twk2 # last block prep for stealing 2834 le?vperm $out1,$out1,$out1,$leperm 2835 stvx_u $out0,$x00,$out # store output 2836 stvx_u $out1,$x10,$out 2837 addi $out,$out,0x20 2838 bne Lxts_enc6x_steal 2839 b Lxts_enc6x_done 2840 2841.align 4 2842Lxts_enc6x_one: 2843 vxor $out0,$in5,$twk0 2844 nop 2845Loop_xts_enc1x: 2846 vcipher $out0,$out0,v24 2847 lvx v24,$x20,$key_ # round[3] 2848 addi $key_,$key_,0x20 2849 2850 vcipher $out0,$out0,v25 2851 lvx v25,$x10,$key_ # round[4] 2852 bdnz Loop_xts_enc1x 2853 2854 add $inp,$inp,$taillen 2855 cmpwi $taillen,0 2856 vcipher $out0,$out0,v24 2857 2858 subi $inp,$inp,16 2859 vcipher $out0,$out0,v25 2860 2861 lvsr $inpperm,0,$taillen 2862 vcipher $out0,$out0,v26 2863 2864 lvx_u $in0,0,$inp 2865 vcipher $out0,$out0,v27 2866 2867 addi $key_,$sp,$FRAME+15 # rewind $key_ 2868 vcipher $out0,$out0,v28 2869 lvx v24,$x00,$key_ # re-pre-load round[1] 2870 2871 vcipher $out0,$out0,v29 2872 lvx v25,$x10,$key_ # re-pre-load round[2] 2873 vxor $twk0,$twk0,v31 2874 2875 le?vperm $in0,$in0,$in0,$leperm 2876 vcipher $out0,$out0,v30 2877 2878 vperm $in0,$in0,$in0,$inpperm 2879 vcipherlast $out0,$out0,$twk0 2880 2881 vmr $twk0,$twk1 # unused tweak 2882 vxor $tmp,$out0,$twk1 # last block prep for stealing 2883 le?vperm $out0,$out0,$out0,$leperm 2884 stvx_u $out0,$x00,$out # store output 2885 addi $out,$out,0x10 2886 bne Lxts_enc6x_steal 2887 b Lxts_enc6x_done 2888 2889.align 4 2890Lxts_enc6x_zero: 2891 cmpwi $taillen,0 2892 beq Lxts_enc6x_done 2893 2894 add $inp,$inp,$taillen 2895 subi $inp,$inp,16 2896 lvx_u $in0,0,$inp 2897 lvsr $inpperm,0,$taillen # $in5 is no more 2898 le?vperm $in0,$in0,$in0,$leperm 2899 vperm $in0,$in0,$in0,$inpperm 2900 vxor $tmp,$tmp,$twk0 2901Lxts_enc6x_steal: 2902 vxor $in0,$in0,$twk0 2903 vxor $out0,$out0,$out0 2904 vspltisb $out1,-1 2905 vperm $out0,$out0,$out1,$inpperm 2906 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? 2907 2908 subi r30,$out,17 2909 subi $out,$out,16 2910 mtctr $taillen 2911Loop_xts_enc6x_steal: 2912 lbzu r0,1(r30) 2913 stb r0,16(r30) 2914 bdnz Loop_xts_enc6x_steal 2915 2916 li $taillen,0 2917 mtctr $rounds 2918 b Loop_xts_enc1x # one more time... 2919 2920.align 4 2921Lxts_enc6x_done: 2922 ${UCMP}i $ivp,0 2923 beq Lxts_enc6x_ret 2924 2925 vxor $tweak,$twk0,$rndkey0 2926 le?vperm $tweak,$tweak,$tweak,$leperm 2927 stvx_u $tweak,0,$ivp 2928 2929Lxts_enc6x_ret: 2930 mtlr r11 2931 li r10,`$FRAME+15` 2932 li r11,`$FRAME+31` 2933 stvx $seven,r10,$sp # wipe copies of round keys 2934 addi r10,r10,32 2935 stvx $seven,r11,$sp 2936 addi r11,r11,32 2937 stvx $seven,r10,$sp 2938 addi r10,r10,32 2939 stvx $seven,r11,$sp 2940 addi r11,r11,32 2941 stvx $seven,r10,$sp 2942 addi r10,r10,32 2943 stvx $seven,r11,$sp 2944 addi r11,r11,32 2945 stvx $seven,r10,$sp 2946 addi r10,r10,32 2947 stvx $seven,r11,$sp 2948 addi r11,r11,32 2949 2950 mtspr 256,$vrsave 2951 lvx v20,r10,$sp # ABI says so 2952 addi r10,r10,32 2953 lvx v21,r11,$sp 2954 addi r11,r11,32 2955 lvx v22,r10,$sp 2956 addi r10,r10,32 2957 lvx v23,r11,$sp 2958 addi r11,r11,32 2959 lvx v24,r10,$sp 2960 addi r10,r10,32 2961 lvx v25,r11,$sp 2962 addi r11,r11,32 2963 lvx v26,r10,$sp 2964 addi r10,r10,32 2965 lvx v27,r11,$sp 2966 addi r11,r11,32 2967 lvx v28,r10,$sp 2968 addi r10,r10,32 2969 lvx v29,r11,$sp 2970 addi r11,r11,32 2971 lvx v30,r10,$sp 2972 lvx v31,r11,$sp 2973 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2974 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2975 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2976 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2977 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2978 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2979 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 2980 blr 2981 .long 0 2982 .byte 0,12,0x04,1,0x80,6,6,0 2983 .long 0 2984 2985.align 5 2986_aesp8_xts_enc5x: 2987 vcipher $out0,$out0,v24 2988 vcipher $out1,$out1,v24 2989 vcipher $out2,$out2,v24 2990 vcipher $out3,$out3,v24 2991 vcipher $out4,$out4,v24 2992 lvx v24,$x20,$key_ # round[3] 2993 addi $key_,$key_,0x20 2994 2995 vcipher $out0,$out0,v25 2996 vcipher $out1,$out1,v25 2997 vcipher $out2,$out2,v25 2998 vcipher $out3,$out3,v25 2999 vcipher $out4,$out4,v25 3000 lvx v25,$x10,$key_ # round[4] 3001 bdnz _aesp8_xts_enc5x 3002 3003 add $inp,$inp,$taillen 3004 cmpwi $taillen,0 3005 vcipher $out0,$out0,v24 3006 vcipher $out1,$out1,v24 3007 vcipher $out2,$out2,v24 3008 vcipher $out3,$out3,v24 3009 vcipher $out4,$out4,v24 3010 3011 subi $inp,$inp,16 3012 vcipher $out0,$out0,v25 3013 vcipher $out1,$out1,v25 3014 vcipher $out2,$out2,v25 3015 vcipher $out3,$out3,v25 3016 vcipher $out4,$out4,v25 3017 vxor $twk0,$twk0,v31 3018 3019 vcipher $out0,$out0,v26 3020 lvsr $inpperm,0,$taillen # $in5 is no more 3021 vcipher $out1,$out1,v26 3022 vcipher $out2,$out2,v26 3023 vcipher $out3,$out3,v26 3024 vcipher $out4,$out4,v26 3025 vxor $in1,$twk1,v31 3026 3027 vcipher $out0,$out0,v27 3028 lvx_u $in0,0,$inp 3029 vcipher $out1,$out1,v27 3030 vcipher $out2,$out2,v27 3031 vcipher $out3,$out3,v27 3032 vcipher $out4,$out4,v27 3033 vxor $in2,$twk2,v31 3034 3035 addi $key_,$sp,$FRAME+15 # rewind $key_ 3036 vcipher $out0,$out0,v28 3037 vcipher $out1,$out1,v28 3038 vcipher $out2,$out2,v28 3039 vcipher $out3,$out3,v28 3040 vcipher $out4,$out4,v28 3041 lvx v24,$x00,$key_ # re-pre-load round[1] 3042 vxor $in3,$twk3,v31 3043 3044 vcipher $out0,$out0,v29 3045 le?vperm $in0,$in0,$in0,$leperm 3046 vcipher $out1,$out1,v29 3047 vcipher $out2,$out2,v29 3048 vcipher $out3,$out3,v29 3049 vcipher $out4,$out4,v29 3050 lvx v25,$x10,$key_ # re-pre-load round[2] 3051 vxor $in4,$twk4,v31 3052 3053 vcipher $out0,$out0,v30 3054 vperm $in0,$in0,$in0,$inpperm 3055 vcipher $out1,$out1,v30 3056 vcipher $out2,$out2,v30 3057 vcipher $out3,$out3,v30 3058 vcipher $out4,$out4,v30 3059 3060 vcipherlast $out0,$out0,$twk0 3061 vcipherlast $out1,$out1,$in1 3062 vcipherlast $out2,$out2,$in2 3063 vcipherlast $out3,$out3,$in3 3064 vcipherlast $out4,$out4,$in4 3065 blr 3066 .long 0 3067 .byte 0,12,0x14,0,0,0,0,0 3068 3069.align 5 3070_aesp8_xts_decrypt6x: 3071 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 3072 mflr r11 3073 li r7,`$FRAME+8*16+15` 3074 li r3,`$FRAME+8*16+31` 3075 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 3076 stvx v20,r7,$sp # ABI says so 3077 addi r7,r7,32 3078 stvx v21,r3,$sp 3079 addi r3,r3,32 3080 stvx v22,r7,$sp 3081 addi r7,r7,32 3082 stvx v23,r3,$sp 3083 addi r3,r3,32 3084 stvx v24,r7,$sp 3085 addi r7,r7,32 3086 stvx v25,r3,$sp 3087 addi r3,r3,32 3088 stvx v26,r7,$sp 3089 addi r7,r7,32 3090 stvx v27,r3,$sp 3091 addi r3,r3,32 3092 stvx v28,r7,$sp 3093 addi r7,r7,32 3094 stvx v29,r3,$sp 3095 addi r3,r3,32 3096 stvx v30,r7,$sp 3097 stvx v31,r3,$sp 3098 li r0,-1 3099 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 3100 li $x10,0x10 3101 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3102 li $x20,0x20 3103 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3104 li $x30,0x30 3105 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3106 li $x40,0x40 3107 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3108 li $x50,0x50 3109 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3110 li $x60,0x60 3111 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3112 li $x70,0x70 3113 mtspr 256,r0 3114 3115 subi $rounds,$rounds,3 # -4 in total 3116 3117 lvx $rndkey0,$x00,$key1 # load key schedule 3118 lvx v30,$x10,$key1 3119 addi $key1,$key1,0x20 3120 lvx v31,$x00,$key1 3121 ?vperm $rndkey0,$rndkey0,v30,$keyperm 3122 addi $key_,$sp,$FRAME+15 3123 mtctr $rounds 3124 3125Load_xts_dec_key: 3126 ?vperm v24,v30,v31,$keyperm 3127 lvx v30,$x10,$key1 3128 addi $key1,$key1,0x20 3129 stvx v24,$x00,$key_ # off-load round[1] 3130 ?vperm v25,v31,v30,$keyperm 3131 lvx v31,$x00,$key1 3132 stvx v25,$x10,$key_ # off-load round[2] 3133 addi $key_,$key_,0x20 3134 bdnz Load_xts_dec_key 3135 3136 lvx v26,$x10,$key1 3137 ?vperm v24,v30,v31,$keyperm 3138 lvx v27,$x20,$key1 3139 stvx v24,$x00,$key_ # off-load round[3] 3140 ?vperm v25,v31,v26,$keyperm 3141 lvx v28,$x30,$key1 3142 stvx v25,$x10,$key_ # off-load round[4] 3143 addi $key_,$sp,$FRAME+15 # rewind $key_ 3144 ?vperm v26,v26,v27,$keyperm 3145 lvx v29,$x40,$key1 3146 ?vperm v27,v27,v28,$keyperm 3147 lvx v30,$x50,$key1 3148 ?vperm v28,v28,v29,$keyperm 3149 lvx v31,$x60,$key1 3150 ?vperm v29,v29,v30,$keyperm 3151 lvx $twk5,$x70,$key1 # borrow $twk5 3152 ?vperm v30,v30,v31,$keyperm 3153 lvx v24,$x00,$key_ # pre-load round[1] 3154 ?vperm v31,v31,$twk5,$keyperm 3155 lvx v25,$x10,$key_ # pre-load round[2] 3156 3157 vperm $in0,$inout,$inptail,$inpperm 3158 subi $inp,$inp,31 # undo "caller" 3159 vxor $twk0,$tweak,$rndkey0 3160 vsrab $tmp,$tweak,$seven # next tweak value 3161 vaddubm $tweak,$tweak,$tweak 3162 vsldoi $tmp,$tmp,$tmp,15 3163 vand $tmp,$tmp,$eighty7 3164 vxor $out0,$in0,$twk0 3165 vxor $tweak,$tweak,$tmp 3166 3167 lvx_u $in1,$x10,$inp 3168 vxor $twk1,$tweak,$rndkey0 3169 vsrab $tmp,$tweak,$seven # next tweak value 3170 vaddubm $tweak,$tweak,$tweak 3171 vsldoi $tmp,$tmp,$tmp,15 3172 le?vperm $in1,$in1,$in1,$leperm 3173 vand $tmp,$tmp,$eighty7 3174 vxor $out1,$in1,$twk1 3175 vxor $tweak,$tweak,$tmp 3176 3177 lvx_u $in2,$x20,$inp 3178 andi. $taillen,$len,15 3179 vxor $twk2,$tweak,$rndkey0 3180 vsrab $tmp,$tweak,$seven # next tweak value 3181 vaddubm $tweak,$tweak,$tweak 3182 vsldoi $tmp,$tmp,$tmp,15 3183 le?vperm $in2,$in2,$in2,$leperm 3184 vand $tmp,$tmp,$eighty7 3185 vxor $out2,$in2,$twk2 3186 vxor $tweak,$tweak,$tmp 3187 3188 lvx_u $in3,$x30,$inp 3189 sub $len,$len,$taillen 3190 vxor $twk3,$tweak,$rndkey0 3191 vsrab $tmp,$tweak,$seven # next tweak value 3192 vaddubm $tweak,$tweak,$tweak 3193 vsldoi $tmp,$tmp,$tmp,15 3194 le?vperm $in3,$in3,$in3,$leperm 3195 vand $tmp,$tmp,$eighty7 3196 vxor $out3,$in3,$twk3 3197 vxor $tweak,$tweak,$tmp 3198 3199 lvx_u $in4,$x40,$inp 3200 subi $len,$len,0x60 3201 vxor $twk4,$tweak,$rndkey0 3202 vsrab $tmp,$tweak,$seven # next tweak value 3203 vaddubm $tweak,$tweak,$tweak 3204 vsldoi $tmp,$tmp,$tmp,15 3205 le?vperm $in4,$in4,$in4,$leperm 3206 vand $tmp,$tmp,$eighty7 3207 vxor $out4,$in4,$twk4 3208 vxor $tweak,$tweak,$tmp 3209 3210 lvx_u $in5,$x50,$inp 3211 addi $inp,$inp,0x60 3212 vxor $twk5,$tweak,$rndkey0 3213 vsrab $tmp,$tweak,$seven # next tweak value 3214 vaddubm $tweak,$tweak,$tweak 3215 vsldoi $tmp,$tmp,$tmp,15 3216 le?vperm $in5,$in5,$in5,$leperm 3217 vand $tmp,$tmp,$eighty7 3218 vxor $out5,$in5,$twk5 3219 vxor $tweak,$tweak,$tmp 3220 3221 vxor v31,v31,$rndkey0 3222 mtctr $rounds 3223 b Loop_xts_dec6x 3224 3225.align 5 3226Loop_xts_dec6x: 3227 vncipher $out0,$out0,v24 3228 vncipher $out1,$out1,v24 3229 vncipher $out2,$out2,v24 3230 vncipher $out3,$out3,v24 3231 vncipher $out4,$out4,v24 3232 vncipher $out5,$out5,v24 3233 lvx v24,$x20,$key_ # round[3] 3234 addi $key_,$key_,0x20 3235 3236 vncipher $out0,$out0,v25 3237 vncipher $out1,$out1,v25 3238 vncipher $out2,$out2,v25 3239 vncipher $out3,$out3,v25 3240 vncipher $out4,$out4,v25 3241 vncipher $out5,$out5,v25 3242 lvx v25,$x10,$key_ # round[4] 3243 bdnz Loop_xts_dec6x 3244 3245 subic $len,$len,96 # $len-=96 3246 vxor $in0,$twk0,v31 # xor with last round key 3247 vncipher $out0,$out0,v24 3248 vncipher $out1,$out1,v24 3249 vsrab $tmp,$tweak,$seven # next tweak value 3250 vxor $twk0,$tweak,$rndkey0 3251 vaddubm $tweak,$tweak,$tweak 3252 vncipher $out2,$out2,v24 3253 vncipher $out3,$out3,v24 3254 vsldoi $tmp,$tmp,$tmp,15 3255 vncipher $out4,$out4,v24 3256 vncipher $out5,$out5,v24 3257 3258 subfe. r0,r0,r0 # borrow?-1:0 3259 vand $tmp,$tmp,$eighty7 3260 vncipher $out0,$out0,v25 3261 vncipher $out1,$out1,v25 3262 vxor $tweak,$tweak,$tmp 3263 vncipher $out2,$out2,v25 3264 vncipher $out3,$out3,v25 3265 vxor $in1,$twk1,v31 3266 vsrab $tmp,$tweak,$seven # next tweak value 3267 vxor $twk1,$tweak,$rndkey0 3268 vncipher $out4,$out4,v25 3269 vncipher $out5,$out5,v25 3270 3271 and r0,r0,$len 3272 vaddubm $tweak,$tweak,$tweak 3273 vsldoi $tmp,$tmp,$tmp,15 3274 vncipher $out0,$out0,v26 3275 vncipher $out1,$out1,v26 3276 vand $tmp,$tmp,$eighty7 3277 vncipher $out2,$out2,v26 3278 vncipher $out3,$out3,v26 3279 vxor $tweak,$tweak,$tmp 3280 vncipher $out4,$out4,v26 3281 vncipher $out5,$out5,v26 3282 3283 add $inp,$inp,r0 # $inp is adjusted in such 3284 # way that at exit from the 3285 # loop inX-in5 are loaded 3286 # with last "words" 3287 vxor $in2,$twk2,v31 3288 vsrab $tmp,$tweak,$seven # next tweak value 3289 vxor $twk2,$tweak,$rndkey0 3290 vaddubm $tweak,$tweak,$tweak 3291 vncipher $out0,$out0,v27 3292 vncipher $out1,$out1,v27 3293 vsldoi $tmp,$tmp,$tmp,15 3294 vncipher $out2,$out2,v27 3295 vncipher $out3,$out3,v27 3296 vand $tmp,$tmp,$eighty7 3297 vncipher $out4,$out4,v27 3298 vncipher $out5,$out5,v27 3299 3300 addi $key_,$sp,$FRAME+15 # rewind $key_ 3301 vxor $tweak,$tweak,$tmp 3302 vncipher $out0,$out0,v28 3303 vncipher $out1,$out1,v28 3304 vxor $in3,$twk3,v31 3305 vsrab $tmp,$tweak,$seven # next tweak value 3306 vxor $twk3,$tweak,$rndkey0 3307 vncipher $out2,$out2,v28 3308 vncipher $out3,$out3,v28 3309 vaddubm $tweak,$tweak,$tweak 3310 vsldoi $tmp,$tmp,$tmp,15 3311 vncipher $out4,$out4,v28 3312 vncipher $out5,$out5,v28 3313 lvx v24,$x00,$key_ # re-pre-load round[1] 3314 vand $tmp,$tmp,$eighty7 3315 3316 vncipher $out0,$out0,v29 3317 vncipher $out1,$out1,v29 3318 vxor $tweak,$tweak,$tmp 3319 vncipher $out2,$out2,v29 3320 vncipher $out3,$out3,v29 3321 vxor $in4,$twk4,v31 3322 vsrab $tmp,$tweak,$seven # next tweak value 3323 vxor $twk4,$tweak,$rndkey0 3324 vncipher $out4,$out4,v29 3325 vncipher $out5,$out5,v29 3326 lvx v25,$x10,$key_ # re-pre-load round[2] 3327 vaddubm $tweak,$tweak,$tweak 3328 vsldoi $tmp,$tmp,$tmp,15 3329 3330 vncipher $out0,$out0,v30 3331 vncipher $out1,$out1,v30 3332 vand $tmp,$tmp,$eighty7 3333 vncipher $out2,$out2,v30 3334 vncipher $out3,$out3,v30 3335 vxor $tweak,$tweak,$tmp 3336 vncipher $out4,$out4,v30 3337 vncipher $out5,$out5,v30 3338 vxor $in5,$twk5,v31 3339 vsrab $tmp,$tweak,$seven # next tweak value 3340 vxor $twk5,$tweak,$rndkey0 3341 3342 vncipherlast $out0,$out0,$in0 3343 lvx_u $in0,$x00,$inp # load next input block 3344 vaddubm $tweak,$tweak,$tweak 3345 vsldoi $tmp,$tmp,$tmp,15 3346 vncipherlast $out1,$out1,$in1 3347 lvx_u $in1,$x10,$inp 3348 vncipherlast $out2,$out2,$in2 3349 le?vperm $in0,$in0,$in0,$leperm 3350 lvx_u $in2,$x20,$inp 3351 vand $tmp,$tmp,$eighty7 3352 vncipherlast $out3,$out3,$in3 3353 le?vperm $in1,$in1,$in1,$leperm 3354 lvx_u $in3,$x30,$inp 3355 vncipherlast $out4,$out4,$in4 3356 le?vperm $in2,$in2,$in2,$leperm 3357 lvx_u $in4,$x40,$inp 3358 vxor $tweak,$tweak,$tmp 3359 vncipherlast $out5,$out5,$in5 3360 le?vperm $in3,$in3,$in3,$leperm 3361 lvx_u $in5,$x50,$inp 3362 addi $inp,$inp,0x60 3363 le?vperm $in4,$in4,$in4,$leperm 3364 le?vperm $in5,$in5,$in5,$leperm 3365 3366 le?vperm $out0,$out0,$out0,$leperm 3367 le?vperm $out1,$out1,$out1,$leperm 3368 stvx_u $out0,$x00,$out # store output 3369 vxor $out0,$in0,$twk0 3370 le?vperm $out2,$out2,$out2,$leperm 3371 stvx_u $out1,$x10,$out 3372 vxor $out1,$in1,$twk1 3373 le?vperm $out3,$out3,$out3,$leperm 3374 stvx_u $out2,$x20,$out 3375 vxor $out2,$in2,$twk2 3376 le?vperm $out4,$out4,$out4,$leperm 3377 stvx_u $out3,$x30,$out 3378 vxor $out3,$in3,$twk3 3379 le?vperm $out5,$out5,$out5,$leperm 3380 stvx_u $out4,$x40,$out 3381 vxor $out4,$in4,$twk4 3382 stvx_u $out5,$x50,$out 3383 vxor $out5,$in5,$twk5 3384 addi $out,$out,0x60 3385 3386 mtctr $rounds 3387 beq Loop_xts_dec6x # did $len-=96 borrow? 3388 3389 addic. $len,$len,0x60 3390 beq Lxts_dec6x_zero 3391 cmpwi $len,0x20 3392 blt Lxts_dec6x_one 3393 nop 3394 beq Lxts_dec6x_two 3395 cmpwi $len,0x40 3396 blt Lxts_dec6x_three 3397 nop 3398 beq Lxts_dec6x_four 3399 3400Lxts_dec6x_five: 3401 vxor $out0,$in1,$twk0 3402 vxor $out1,$in2,$twk1 3403 vxor $out2,$in3,$twk2 3404 vxor $out3,$in4,$twk3 3405 vxor $out4,$in5,$twk4 3406 3407 bl _aesp8_xts_dec5x 3408 3409 le?vperm $out0,$out0,$out0,$leperm 3410 vmr $twk0,$twk5 # unused tweak 3411 vxor $twk1,$tweak,$rndkey0 3412 le?vperm $out1,$out1,$out1,$leperm 3413 stvx_u $out0,$x00,$out # store output 3414 vxor $out0,$in0,$twk1 3415 le?vperm $out2,$out2,$out2,$leperm 3416 stvx_u $out1,$x10,$out 3417 le?vperm $out3,$out3,$out3,$leperm 3418 stvx_u $out2,$x20,$out 3419 le?vperm $out4,$out4,$out4,$leperm 3420 stvx_u $out3,$x30,$out 3421 stvx_u $out4,$x40,$out 3422 addi $out,$out,0x50 3423 bne Lxts_dec6x_steal 3424 b Lxts_dec6x_done 3425 3426.align 4 3427Lxts_dec6x_four: 3428 vxor $out0,$in2,$twk0 3429 vxor $out1,$in3,$twk1 3430 vxor $out2,$in4,$twk2 3431 vxor $out3,$in5,$twk3 3432 vxor $out4,$out4,$out4 3433 3434 bl _aesp8_xts_dec5x 3435 3436 le?vperm $out0,$out0,$out0,$leperm 3437 vmr $twk0,$twk4 # unused tweak 3438 vmr $twk1,$twk5 3439 le?vperm $out1,$out1,$out1,$leperm 3440 stvx_u $out0,$x00,$out # store output 3441 vxor $out0,$in0,$twk5 3442 le?vperm $out2,$out2,$out2,$leperm 3443 stvx_u $out1,$x10,$out 3444 le?vperm $out3,$out3,$out3,$leperm 3445 stvx_u $out2,$x20,$out 3446 stvx_u $out3,$x30,$out 3447 addi $out,$out,0x40 3448 bne Lxts_dec6x_steal 3449 b Lxts_dec6x_done 3450 3451.align 4 3452Lxts_dec6x_three: 3453 vxor $out0,$in3,$twk0 3454 vxor $out1,$in4,$twk1 3455 vxor $out2,$in5,$twk2 3456 vxor $out3,$out3,$out3 3457 vxor $out4,$out4,$out4 3458 3459 bl _aesp8_xts_dec5x 3460 3461 le?vperm $out0,$out0,$out0,$leperm 3462 vmr $twk0,$twk3 # unused tweak 3463 vmr $twk1,$twk4 3464 le?vperm $out1,$out1,$out1,$leperm 3465 stvx_u $out0,$x00,$out # store output 3466 vxor $out0,$in0,$twk4 3467 le?vperm $out2,$out2,$out2,$leperm 3468 stvx_u $out1,$x10,$out 3469 stvx_u $out2,$x20,$out 3470 addi $out,$out,0x30 3471 bne Lxts_dec6x_steal 3472 b Lxts_dec6x_done 3473 3474.align 4 3475Lxts_dec6x_two: 3476 vxor $out0,$in4,$twk0 3477 vxor $out1,$in5,$twk1 3478 vxor $out2,$out2,$out2 3479 vxor $out3,$out3,$out3 3480 vxor $out4,$out4,$out4 3481 3482 bl _aesp8_xts_dec5x 3483 3484 le?vperm $out0,$out0,$out0,$leperm 3485 vmr $twk0,$twk2 # unused tweak 3486 vmr $twk1,$twk3 3487 le?vperm $out1,$out1,$out1,$leperm 3488 stvx_u $out0,$x00,$out # store output 3489 vxor $out0,$in0,$twk3 3490 stvx_u $out1,$x10,$out 3491 addi $out,$out,0x20 3492 bne Lxts_dec6x_steal 3493 b Lxts_dec6x_done 3494 3495.align 4 3496Lxts_dec6x_one: 3497 vxor $out0,$in5,$twk0 3498 nop 3499Loop_xts_dec1x: 3500 vncipher $out0,$out0,v24 3501 lvx v24,$x20,$key_ # round[3] 3502 addi $key_,$key_,0x20 3503 3504 vncipher $out0,$out0,v25 3505 lvx v25,$x10,$key_ # round[4] 3506 bdnz Loop_xts_dec1x 3507 3508 subi r0,$taillen,1 3509 vncipher $out0,$out0,v24 3510 3511 andi. r0,r0,16 3512 cmpwi $taillen,0 3513 vncipher $out0,$out0,v25 3514 3515 sub $inp,$inp,r0 3516 vncipher $out0,$out0,v26 3517 3518 lvx_u $in0,0,$inp 3519 vncipher $out0,$out0,v27 3520 3521 addi $key_,$sp,$FRAME+15 # rewind $key_ 3522 vncipher $out0,$out0,v28 3523 lvx v24,$x00,$key_ # re-pre-load round[1] 3524 3525 vncipher $out0,$out0,v29 3526 lvx v25,$x10,$key_ # re-pre-load round[2] 3527 vxor $twk0,$twk0,v31 3528 3529 le?vperm $in0,$in0,$in0,$leperm 3530 vncipher $out0,$out0,v30 3531 3532 mtctr $rounds 3533 vncipherlast $out0,$out0,$twk0 3534 3535 vmr $twk0,$twk1 # unused tweak 3536 vmr $twk1,$twk2 3537 le?vperm $out0,$out0,$out0,$leperm 3538 stvx_u $out0,$x00,$out # store output 3539 addi $out,$out,0x10 3540 vxor $out0,$in0,$twk2 3541 bne Lxts_dec6x_steal 3542 b Lxts_dec6x_done 3543 3544.align 4 3545Lxts_dec6x_zero: 3546 cmpwi $taillen,0 3547 beq Lxts_dec6x_done 3548 3549 lvx_u $in0,0,$inp 3550 le?vperm $in0,$in0,$in0,$leperm 3551 vxor $out0,$in0,$twk1 3552Lxts_dec6x_steal: 3553 vncipher $out0,$out0,v24 3554 lvx v24,$x20,$key_ # round[3] 3555 addi $key_,$key_,0x20 3556 3557 vncipher $out0,$out0,v25 3558 lvx v25,$x10,$key_ # round[4] 3559 bdnz Lxts_dec6x_steal 3560 3561 add $inp,$inp,$taillen 3562 vncipher $out0,$out0,v24 3563 3564 cmpwi $taillen,0 3565 vncipher $out0,$out0,v25 3566 3567 lvx_u $in0,0,$inp 3568 vncipher $out0,$out0,v26 3569 3570 lvsr $inpperm,0,$taillen # $in5 is no more 3571 vncipher $out0,$out0,v27 3572 3573 addi $key_,$sp,$FRAME+15 # rewind $key_ 3574 vncipher $out0,$out0,v28 3575 lvx v24,$x00,$key_ # re-pre-load round[1] 3576 3577 vncipher $out0,$out0,v29 3578 lvx v25,$x10,$key_ # re-pre-load round[2] 3579 vxor $twk1,$twk1,v31 3580 3581 le?vperm $in0,$in0,$in0,$leperm 3582 vncipher $out0,$out0,v30 3583 3584 vperm $in0,$in0,$in0,$inpperm 3585 vncipherlast $tmp,$out0,$twk1 3586 3587 le?vperm $out0,$tmp,$tmp,$leperm 3588 le?stvx_u $out0,0,$out 3589 be?stvx_u $tmp,0,$out 3590 3591 vxor $out0,$out0,$out0 3592 vspltisb $out1,-1 3593 vperm $out0,$out0,$out1,$inpperm 3594 vsel $out0,$in0,$tmp,$out0 3595 vxor $out0,$out0,$twk0 3596 3597 subi r30,$out,1 3598 mtctr $taillen 3599Loop_xts_dec6x_steal: 3600 lbzu r0,1(r30) 3601 stb r0,16(r30) 3602 bdnz Loop_xts_dec6x_steal 3603 3604 li $taillen,0 3605 mtctr $rounds 3606 b Loop_xts_dec1x # one more time... 3607 3608.align 4 3609Lxts_dec6x_done: 3610 ${UCMP}i $ivp,0 3611 beq Lxts_dec6x_ret 3612 3613 vxor $tweak,$twk0,$rndkey0 3614 le?vperm $tweak,$tweak,$tweak,$leperm 3615 stvx_u $tweak,0,$ivp 3616 3617Lxts_dec6x_ret: 3618 mtlr r11 3619 li r10,`$FRAME+15` 3620 li r11,`$FRAME+31` 3621 stvx $seven,r10,$sp # wipe copies of round keys 3622 addi r10,r10,32 3623 stvx $seven,r11,$sp 3624 addi r11,r11,32 3625 stvx $seven,r10,$sp 3626 addi r10,r10,32 3627 stvx $seven,r11,$sp 3628 addi r11,r11,32 3629 stvx $seven,r10,$sp 3630 addi r10,r10,32 3631 stvx $seven,r11,$sp 3632 addi r11,r11,32 3633 stvx $seven,r10,$sp 3634 addi r10,r10,32 3635 stvx $seven,r11,$sp 3636 addi r11,r11,32 3637 3638 mtspr 256,$vrsave 3639 lvx v20,r10,$sp # ABI says so 3640 addi r10,r10,32 3641 lvx v21,r11,$sp 3642 addi r11,r11,32 3643 lvx v22,r10,$sp 3644 addi r10,r10,32 3645 lvx v23,r11,$sp 3646 addi r11,r11,32 3647 lvx v24,r10,$sp 3648 addi r10,r10,32 3649 lvx v25,r11,$sp 3650 addi r11,r11,32 3651 lvx v26,r10,$sp 3652 addi r10,r10,32 3653 lvx v27,r11,$sp 3654 addi r11,r11,32 3655 lvx v28,r10,$sp 3656 addi r10,r10,32 3657 lvx v29,r11,$sp 3658 addi r11,r11,32 3659 lvx v30,r10,$sp 3660 lvx v31,r11,$sp 3661 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3662 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3663 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3664 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3665 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3666 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3667 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3668 blr 3669 .long 0 3670 .byte 0,12,0x04,1,0x80,6,6,0 3671 .long 0 3672 3673.align 5 3674_aesp8_xts_dec5x: 3675 vncipher $out0,$out0,v24 3676 vncipher $out1,$out1,v24 3677 vncipher $out2,$out2,v24 3678 vncipher $out3,$out3,v24 3679 vncipher $out4,$out4,v24 3680 lvx v24,$x20,$key_ # round[3] 3681 addi $key_,$key_,0x20 3682 3683 vncipher $out0,$out0,v25 3684 vncipher $out1,$out1,v25 3685 vncipher $out2,$out2,v25 3686 vncipher $out3,$out3,v25 3687 vncipher $out4,$out4,v25 3688 lvx v25,$x10,$key_ # round[4] 3689 bdnz _aesp8_xts_dec5x 3690 3691 subi r0,$taillen,1 3692 vncipher $out0,$out0,v24 3693 vncipher $out1,$out1,v24 3694 vncipher $out2,$out2,v24 3695 vncipher $out3,$out3,v24 3696 vncipher $out4,$out4,v24 3697 3698 andi. r0,r0,16 3699 cmpwi $taillen,0 3700 vncipher $out0,$out0,v25 3701 vncipher $out1,$out1,v25 3702 vncipher $out2,$out2,v25 3703 vncipher $out3,$out3,v25 3704 vncipher $out4,$out4,v25 3705 vxor $twk0,$twk0,v31 3706 3707 sub $inp,$inp,r0 3708 vncipher $out0,$out0,v26 3709 vncipher $out1,$out1,v26 3710 vncipher $out2,$out2,v26 3711 vncipher $out3,$out3,v26 3712 vncipher $out4,$out4,v26 3713 vxor $in1,$twk1,v31 3714 3715 vncipher $out0,$out0,v27 3716 lvx_u $in0,0,$inp 3717 vncipher $out1,$out1,v27 3718 vncipher $out2,$out2,v27 3719 vncipher $out3,$out3,v27 3720 vncipher $out4,$out4,v27 3721 vxor $in2,$twk2,v31 3722 3723 addi $key_,$sp,$FRAME+15 # rewind $key_ 3724 vncipher $out0,$out0,v28 3725 vncipher $out1,$out1,v28 3726 vncipher $out2,$out2,v28 3727 vncipher $out3,$out3,v28 3728 vncipher $out4,$out4,v28 3729 lvx v24,$x00,$key_ # re-pre-load round[1] 3730 vxor $in3,$twk3,v31 3731 3732 vncipher $out0,$out0,v29 3733 le?vperm $in0,$in0,$in0,$leperm 3734 vncipher $out1,$out1,v29 3735 vncipher $out2,$out2,v29 3736 vncipher $out3,$out3,v29 3737 vncipher $out4,$out4,v29 3738 lvx v25,$x10,$key_ # re-pre-load round[2] 3739 vxor $in4,$twk4,v31 3740 3741 vncipher $out0,$out0,v30 3742 vncipher $out1,$out1,v30 3743 vncipher $out2,$out2,v30 3744 vncipher $out3,$out3,v30 3745 vncipher $out4,$out4,v30 3746 3747 vncipherlast $out0,$out0,$twk0 3748 vncipherlast $out1,$out1,$in1 3749 vncipherlast $out2,$out2,$in2 3750 vncipherlast $out3,$out3,$in3 3751 vncipherlast $out4,$out4,$in4 3752 mtctr $rounds 3753 blr 3754 .long 0 3755 .byte 0,12,0x14,0,0,0,0,0 3756___ 3757}} }}} 3758 3759my $consts=1; 3760foreach(split("\n",$code)) { 3761 s/\`([^\`]*)\`/eval($1)/geo; 3762 3763 # constants table endian-specific conversion 3764 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 3765 my $conv=$3; 3766 my @bytes=(); 3767 3768 # convert to endian-agnostic format 3769 if ($1 eq "long") { 3770 foreach (split(/,\s*/,$2)) { 3771 my $l = /^0/?oct:int; 3772 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 3773 } 3774 } else { 3775 @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 3776 } 3777 3778 # little-endian conversion 3779 if ($flavour =~ /le$/o) { 3780 SWITCH: for($conv) { 3781 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 3782 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 3783 } 3784 } 3785 3786 #emit 3787 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 3788 next; 3789 } 3790 $consts=0 if (m/Lconsts:/o); # end of table 3791 3792 # instructions prefixed with '?' are endian-specific and need 3793 # to be adjusted accordingly... 3794 if ($flavour =~ /le$/o) { # little-endian 3795 s/le\?//o or 3796 s/be\?/#be#/o or 3797 s/\?lvsr/lvsl/o or 3798 s/\?lvsl/lvsr/o or 3799 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 3800 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 3801 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 3802 } else { # big-endian 3803 s/le\?/#le#/o or 3804 s/be\?//o or 3805 s/\?([a-z]+)/$1/o; 3806 } 3807 3808 print $_,"\n"; 3809} 3810 3811close STDOUT or die "error closing STDOUT: $!"; 3812