1#! /usr/bin/env perl 2# Copyright 2014-2024 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16# 17# This module implements support for AES instructions as per PowerISA 18# specification version 2.07, first implemented by POWER8 processor. 19# The module is endian-agnostic in sense that it supports both big- 20# and little-endian cases. Data alignment in parallelizable modes is 21# handled with VSX loads and stores, which implies MSR.VSX flag being 22# set. It should also be noted that ISA specification doesn't prohibit 23# alignment exceptions for these instructions on page boundaries. 24# Initially alignment was handled in pure AltiVec/VMX way [when data 25# is aligned programmatically, which in turn guarantees exception- 26# free execution], but it turned to hamper performance when vcipher 27# instructions are interleaved. It's reckoned that eventual 28# misalignment penalties at page boundaries are in average lower 29# than additional overhead in pure AltiVec approach. 30# 31# May 2016 32# 33# Add XTS subroutine, 9x on little- and 12x improvement on big-endian 34# systems were measured. 35# 36###################################################################### 37# Current large-block performance in cycles per byte processed with 38# 128-bit key (less is better). 39# 40# CBC en-/decrypt CTR XTS 41# POWER8[le] 3.96/0.72 0.74 1.1 42# POWER8[be] 3.75/0.65 0.66 1.0 43# POWER9[le] 4.02/0.86 0.84 1.05 44# POWER9[be] 3.99/0.78 0.79 0.97 45 46# $output is the last argument if it looks like a file (it has an extension) 47# $flavour is the first argument if it doesn't look like a file 48$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 49$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 50 51if ($flavour =~ /64/) { 52 $SIZE_T =8; 53 $LRSAVE =2*$SIZE_T; 54 $STU ="stdu"; 55 $POP ="ld"; 56 $PUSH ="std"; 57 $UCMP ="cmpld"; 58 $SHL ="sldi"; 59} elsif ($flavour =~ /32/) { 60 $SIZE_T =4; 61 $LRSAVE =$SIZE_T; 62 $STU ="stwu"; 63 $POP ="lwz"; 64 $PUSH ="stw"; 65 $UCMP ="cmplw"; 66 $SHL ="slwi"; 67} else { die "nonsense $flavour"; } 68 69$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 70 71$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 72( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 73( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 74die "can't locate ppc-xlate.pl"; 75 76open STDOUT,"| $^X $xlate $flavour \"$output\"" 77 or die "can't call $xlate: $!"; 78 79$FRAME=8*$SIZE_T; 80$prefix="aes_p8"; 81 82$sp="r1"; 83$vrsave="r12"; 84 85######################################################################### 86{{{ # Key setup procedures # 87my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 88my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 89my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 90 91$code.=<<___; 92.machine "any" 93 94.text 95 96.align 7 97rcon: 98.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 99.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 100.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 101.long 0,0,0,0 ?asis 102.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe 103Lconsts: 104 mflr r0 105 bcl 20,31,\$+4 106 mflr $ptr #vvvvv "distance between . and rcon 107 addi $ptr,$ptr,-0x58 108 mtlr r0 109 blr 110 .long 0 111 .byte 0,12,0x14,0,0,0,0,0 112.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 113 114.globl .${prefix}_set_encrypt_key 115.align 5 116.${prefix}_set_encrypt_key: 117Lset_encrypt_key: 118 mflr r11 119 $PUSH r11,$LRSAVE($sp) 120 121 li $ptr,-1 122 ${UCMP}i $inp,0 123 beq- Lenc_key_abort # if ($inp==0) return -1; 124 ${UCMP}i $out,0 125 beq- Lenc_key_abort # if ($out==0) return -1; 126 li $ptr,-2 127 cmpwi $bits,128 128 blt- Lenc_key_abort 129 cmpwi $bits,256 130 bgt- Lenc_key_abort 131 andi. r0,$bits,0x3f 132 bne- Lenc_key_abort 133 134 lis r0,0xfff0 135 mfspr $vrsave,256 136 mtspr 256,r0 137 138 bl Lconsts 139 mtlr r11 140 141 neg r9,$inp 142 lvx $in0,0,$inp 143 addi $inp,$inp,15 # 15 is not typo 144 lvsr $key,0,r9 # borrow $key 145 li r8,0x20 146 cmpwi $bits,192 147 lvx $in1,0,$inp 148 le?vspltisb $mask,0x0f # borrow $mask 149 lvx $rcon,0,$ptr 150 le?vxor $key,$key,$mask # adjust for byte swap 151 lvx $mask,r8,$ptr 152 addi $ptr,$ptr,0x10 153 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 154 li $cnt,8 155 vxor $zero,$zero,$zero 156 mtctr $cnt 157 158 ?lvsr $outperm,0,$out 159 vspltisb $outmask,-1 160 lvx $outhead,0,$out 161 ?vperm $outmask,$zero,$outmask,$outperm 162 163 blt Loop128 164 addi $inp,$inp,8 165 beq L192 166 addi $inp,$inp,8 167 b L256 168 169.align 4 170Loop128: 171 vperm $key,$in0,$in0,$mask # rotate-n-splat 172 vsldoi $tmp,$zero,$in0,12 # >>32 173 vperm $outtail,$in0,$in0,$outperm # rotate 174 vsel $stage,$outhead,$outtail,$outmask 175 vmr $outhead,$outtail 176 vcipherlast $key,$key,$rcon 177 stvx $stage,0,$out 178 addi $out,$out,16 179 180 vxor $in0,$in0,$tmp 181 vsldoi $tmp,$zero,$tmp,12 # >>32 182 vxor $in0,$in0,$tmp 183 vsldoi $tmp,$zero,$tmp,12 # >>32 184 vxor $in0,$in0,$tmp 185 vadduwm $rcon,$rcon,$rcon 186 vxor $in0,$in0,$key 187 bdnz Loop128 188 189 lvx $rcon,0,$ptr # last two round keys 190 191 vperm $key,$in0,$in0,$mask # rotate-n-splat 192 vsldoi $tmp,$zero,$in0,12 # >>32 193 vperm $outtail,$in0,$in0,$outperm # rotate 194 vsel $stage,$outhead,$outtail,$outmask 195 vmr $outhead,$outtail 196 vcipherlast $key,$key,$rcon 197 stvx $stage,0,$out 198 addi $out,$out,16 199 200 vxor $in0,$in0,$tmp 201 vsldoi $tmp,$zero,$tmp,12 # >>32 202 vxor $in0,$in0,$tmp 203 vsldoi $tmp,$zero,$tmp,12 # >>32 204 vxor $in0,$in0,$tmp 205 vadduwm $rcon,$rcon,$rcon 206 vxor $in0,$in0,$key 207 208 vperm $key,$in0,$in0,$mask # rotate-n-splat 209 vsldoi $tmp,$zero,$in0,12 # >>32 210 vperm $outtail,$in0,$in0,$outperm # rotate 211 vsel $stage,$outhead,$outtail,$outmask 212 vmr $outhead,$outtail 213 vcipherlast $key,$key,$rcon 214 stvx $stage,0,$out 215 addi $out,$out,16 216 217 vxor $in0,$in0,$tmp 218 vsldoi $tmp,$zero,$tmp,12 # >>32 219 vxor $in0,$in0,$tmp 220 vsldoi $tmp,$zero,$tmp,12 # >>32 221 vxor $in0,$in0,$tmp 222 vxor $in0,$in0,$key 223 vperm $outtail,$in0,$in0,$outperm # rotate 224 vsel $stage,$outhead,$outtail,$outmask 225 vmr $outhead,$outtail 226 stvx $stage,0,$out 227 228 addi $inp,$out,15 # 15 is not typo 229 addi $out,$out,0x50 230 231 li $rounds,10 232 b Ldone 233 234.align 4 235L192: 236 lvx $tmp,0,$inp 237 li $cnt,4 238 vperm $outtail,$in0,$in0,$outperm # rotate 239 vsel $stage,$outhead,$outtail,$outmask 240 vmr $outhead,$outtail 241 stvx $stage,0,$out 242 addi $out,$out,16 243 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 244 vspltisb $key,8 # borrow $key 245 mtctr $cnt 246 vsububm $mask,$mask,$key # adjust the mask 247 248Loop192: 249 vperm $key,$in1,$in1,$mask # roate-n-splat 250 vsldoi $tmp,$zero,$in0,12 # >>32 251 vcipherlast $key,$key,$rcon 252 253 vxor $in0,$in0,$tmp 254 vsldoi $tmp,$zero,$tmp,12 # >>32 255 vxor $in0,$in0,$tmp 256 vsldoi $tmp,$zero,$tmp,12 # >>32 257 vxor $in0,$in0,$tmp 258 259 vsldoi $stage,$zero,$in1,8 260 vspltw $tmp,$in0,3 261 vxor $tmp,$tmp,$in1 262 vsldoi $in1,$zero,$in1,12 # >>32 263 vadduwm $rcon,$rcon,$rcon 264 vxor $in1,$in1,$tmp 265 vxor $in0,$in0,$key 266 vxor $in1,$in1,$key 267 vsldoi $stage,$stage,$in0,8 268 269 vperm $key,$in1,$in1,$mask # rotate-n-splat 270 vsldoi $tmp,$zero,$in0,12 # >>32 271 vperm $outtail,$stage,$stage,$outperm # rotate 272 vsel $stage,$outhead,$outtail,$outmask 273 vmr $outhead,$outtail 274 vcipherlast $key,$key,$rcon 275 stvx $stage,0,$out 276 addi $out,$out,16 277 278 vsldoi $stage,$in0,$in1,8 279 vxor $in0,$in0,$tmp 280 vsldoi $tmp,$zero,$tmp,12 # >>32 281 vperm $outtail,$stage,$stage,$outperm # rotate 282 vsel $stage,$outhead,$outtail,$outmask 283 vmr $outhead,$outtail 284 vxor $in0,$in0,$tmp 285 vsldoi $tmp,$zero,$tmp,12 # >>32 286 vxor $in0,$in0,$tmp 287 stvx $stage,0,$out 288 addi $out,$out,16 289 290 vspltw $tmp,$in0,3 291 vxor $tmp,$tmp,$in1 292 vsldoi $in1,$zero,$in1,12 # >>32 293 vadduwm $rcon,$rcon,$rcon 294 vxor $in1,$in1,$tmp 295 vxor $in0,$in0,$key 296 vxor $in1,$in1,$key 297 vperm $outtail,$in0,$in0,$outperm # rotate 298 vsel $stage,$outhead,$outtail,$outmask 299 vmr $outhead,$outtail 300 stvx $stage,0,$out 301 addi $inp,$out,15 # 15 is not typo 302 addi $out,$out,16 303 bdnz Loop192 304 305 li $rounds,12 306 addi $out,$out,0x20 307 b Ldone 308 309.align 4 310L256: 311 lvx $tmp,0,$inp 312 li $cnt,7 313 li $rounds,14 314 vperm $outtail,$in0,$in0,$outperm # rotate 315 vsel $stage,$outhead,$outtail,$outmask 316 vmr $outhead,$outtail 317 stvx $stage,0,$out 318 addi $out,$out,16 319 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 320 mtctr $cnt 321 322Loop256: 323 vperm $key,$in1,$in1,$mask # rotate-n-splat 324 vsldoi $tmp,$zero,$in0,12 # >>32 325 vperm $outtail,$in1,$in1,$outperm # rotate 326 vsel $stage,$outhead,$outtail,$outmask 327 vmr $outhead,$outtail 328 vcipherlast $key,$key,$rcon 329 stvx $stage,0,$out 330 addi $out,$out,16 331 332 vxor $in0,$in0,$tmp 333 vsldoi $tmp,$zero,$tmp,12 # >>32 334 vxor $in0,$in0,$tmp 335 vsldoi $tmp,$zero,$tmp,12 # >>32 336 vxor $in0,$in0,$tmp 337 vadduwm $rcon,$rcon,$rcon 338 vxor $in0,$in0,$key 339 vperm $outtail,$in0,$in0,$outperm # rotate 340 vsel $stage,$outhead,$outtail,$outmask 341 vmr $outhead,$outtail 342 stvx $stage,0,$out 343 addi $inp,$out,15 # 15 is not typo 344 addi $out,$out,16 345 bdz Ldone 346 347 vspltw $key,$in0,3 # just splat 348 vsldoi $tmp,$zero,$in1,12 # >>32 349 vsbox $key,$key 350 351 vxor $in1,$in1,$tmp 352 vsldoi $tmp,$zero,$tmp,12 # >>32 353 vxor $in1,$in1,$tmp 354 vsldoi $tmp,$zero,$tmp,12 # >>32 355 vxor $in1,$in1,$tmp 356 357 vxor $in1,$in1,$key 358 b Loop256 359 360.align 4 361Ldone: 362 lvx $in1,0,$inp # redundant in aligned case 363 vsel $in1,$outhead,$in1,$outmask 364 stvx $in1,0,$inp 365 li $ptr,0 366 mtspr 256,$vrsave 367 stw $rounds,0($out) 368 369Lenc_key_abort: 370 mr r3,$ptr 371 blr 372 .long 0 373 .byte 0,12,0x14,1,0,0,3,0 374 .long 0 375.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 376 377.globl .${prefix}_set_decrypt_key 378.align 5 379.${prefix}_set_decrypt_key: 380 $STU $sp,-$FRAME($sp) 381 mflr r10 382 $PUSH r10,$FRAME+$LRSAVE($sp) 383 bl Lset_encrypt_key 384 mtlr r10 385 386 cmpwi r3,0 387 bne- Ldec_key_abort 388 389 slwi $cnt,$rounds,4 390 subi $inp,$out,240 # first round key 391 srwi $rounds,$rounds,1 392 add $out,$inp,$cnt # last round key 393 mtctr $rounds 394 395Ldeckey: 396 lwz r0, 0($inp) 397 lwz r6, 4($inp) 398 lwz r7, 8($inp) 399 lwz r8, 12($inp) 400 addi $inp,$inp,16 401 lwz r9, 0($out) 402 lwz r10,4($out) 403 lwz r11,8($out) 404 lwz r12,12($out) 405 stw r0, 0($out) 406 stw r6, 4($out) 407 stw r7, 8($out) 408 stw r8, 12($out) 409 subi $out,$out,16 410 stw r9, -16($inp) 411 stw r10,-12($inp) 412 stw r11,-8($inp) 413 stw r12,-4($inp) 414 bdnz Ldeckey 415 416 xor r3,r3,r3 # return value 417Ldec_key_abort: 418 addi $sp,$sp,$FRAME 419 blr 420 .long 0 421 .byte 0,12,4,1,0x80,0,3,0 422 .long 0 423.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 424___ 425}}} 426######################################################################### 427{{{ # Single block en- and decrypt procedures # 428sub gen_block () { 429my $dir = shift; 430my $n = $dir eq "de" ? "n" : ""; 431my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 432 433$code.=<<___; 434.globl .${prefix}_${dir}crypt 435.align 5 436.${prefix}_${dir}crypt: 437 lwz $rounds,240($key) 438 lis r0,0xfc00 439 mfspr $vrsave,256 440 li $idx,15 # 15 is not typo 441 mtspr 256,r0 442 443 lvx v0,0,$inp 444 neg r11,$out 445 lvx v1,$idx,$inp 446 lvsl v2,0,$inp # inpperm 447 le?vspltisb v4,0x0f 448 ?lvsl v3,0,r11 # outperm 449 le?vxor v2,v2,v4 450 li $idx,16 451 vperm v0,v0,v1,v2 # align [and byte swap in LE] 452 lvx v1,0,$key 453 ?lvsl v5,0,$key # keyperm 454 srwi $rounds,$rounds,1 455 lvx v2,$idx,$key 456 addi $idx,$idx,16 457 subi $rounds,$rounds,1 458 ?vperm v1,v1,v2,v5 # align round key 459 460 vxor v0,v0,v1 461 lvx v1,$idx,$key 462 addi $idx,$idx,16 463 mtctr $rounds 464 465Loop_${dir}c: 466 ?vperm v2,v2,v1,v5 467 v${n}cipher v0,v0,v2 468 lvx v2,$idx,$key 469 addi $idx,$idx,16 470 ?vperm v1,v1,v2,v5 471 v${n}cipher v0,v0,v1 472 lvx v1,$idx,$key 473 addi $idx,$idx,16 474 bdnz Loop_${dir}c 475 476 ?vperm v2,v2,v1,v5 477 v${n}cipher v0,v0,v2 478 lvx v2,$idx,$key 479 ?vperm v1,v1,v2,v5 480 v${n}cipherlast v0,v0,v1 481 482 vspltisb v2,-1 483 vxor v1,v1,v1 484 li $idx,15 # 15 is not typo 485 ?vperm v2,v1,v2,v3 # outmask 486 le?vxor v3,v3,v4 487 lvx v1,0,$out # outhead 488 vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 489 vsel v1,v1,v0,v2 490 lvx v4,$idx,$out 491 stvx v1,0,$out 492 vsel v0,v0,v4,v2 493 stvx v0,$idx,$out 494 495 mtspr 256,$vrsave 496 blr 497 .long 0 498 .byte 0,12,0x14,0,0,0,3,0 499 .long 0 500.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 501___ 502} 503&gen_block("en"); 504&gen_block("de"); 505}}} 506######################################################################### 507{{{ # CBC en- and decrypt procedures # 508my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 509my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 510my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 511 map("v$_",(4..10)); 512$code.=<<___; 513.globl .${prefix}_cbc_encrypt 514.align 5 515.${prefix}_cbc_encrypt: 516 ${UCMP}i $len,16 517 bltlr- 518 519 cmpwi $enc,0 # test direction 520 lis r0,0xffe0 521 mfspr $vrsave,256 522 mtspr 256,r0 523 524 li $idx,15 525 vxor $rndkey0,$rndkey0,$rndkey0 526 le?vspltisb $tmp,0x0f 527 528 lvx $ivec,0,$ivp # load [unaligned] iv 529 lvsl $inpperm,0,$ivp 530 lvx $inptail,$idx,$ivp 531 le?vxor $inpperm,$inpperm,$tmp 532 vperm $ivec,$ivec,$inptail,$inpperm 533 534 neg r11,$inp 535 ?lvsl $keyperm,0,$key # prepare for unaligned key 536 lwz $rounds,240($key) 537 538 lvsr $inpperm,0,r11 # prepare for unaligned load 539 lvx $inptail,0,$inp 540 addi $inp,$inp,15 # 15 is not typo 541 le?vxor $inpperm,$inpperm,$tmp 542 543 ?lvsr $outperm,0,$out # prepare for unaligned store 544 vspltisb $outmask,-1 545 lvx $outhead,0,$out 546 ?vperm $outmask,$rndkey0,$outmask,$outperm 547 le?vxor $outperm,$outperm,$tmp 548 549 srwi $rounds,$rounds,1 550 li $idx,16 551 subi $rounds,$rounds,1 552 beq Lcbc_dec 553 554Lcbc_enc: 555 vmr $inout,$inptail 556 lvx $inptail,0,$inp 557 addi $inp,$inp,16 558 mtctr $rounds 559 subi $len,$len,16 # len-=16 560 561 lvx $rndkey0,0,$key 562 vperm $inout,$inout,$inptail,$inpperm 563 lvx $rndkey1,$idx,$key 564 addi $idx,$idx,16 565 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 566 vxor $inout,$inout,$rndkey0 567 lvx $rndkey0,$idx,$key 568 addi $idx,$idx,16 569 vxor $inout,$inout,$ivec 570 571Loop_cbc_enc: 572 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 573 vcipher $inout,$inout,$rndkey1 574 lvx $rndkey1,$idx,$key 575 addi $idx,$idx,16 576 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 577 vcipher $inout,$inout,$rndkey0 578 lvx $rndkey0,$idx,$key 579 addi $idx,$idx,16 580 bdnz Loop_cbc_enc 581 582 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 583 vcipher $inout,$inout,$rndkey1 584 lvx $rndkey1,$idx,$key 585 li $idx,16 586 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 587 vcipherlast $ivec,$inout,$rndkey0 588 ${UCMP}i $len,16 589 590 vperm $tmp,$ivec,$ivec,$outperm 591 vsel $inout,$outhead,$tmp,$outmask 592 vmr $outhead,$tmp 593 stvx $inout,0,$out 594 addi $out,$out,16 595 bge Lcbc_enc 596 597 b Lcbc_done 598 599.align 4 600Lcbc_dec: 601 ${UCMP}i $len,128 602 bge _aesp8_cbc_decrypt8x 603 vmr $tmp,$inptail 604 lvx $inptail,0,$inp 605 addi $inp,$inp,16 606 mtctr $rounds 607 subi $len,$len,16 # len-=16 608 609 lvx $rndkey0,0,$key 610 vperm $tmp,$tmp,$inptail,$inpperm 611 lvx $rndkey1,$idx,$key 612 addi $idx,$idx,16 613 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 614 vxor $inout,$tmp,$rndkey0 615 lvx $rndkey0,$idx,$key 616 addi $idx,$idx,16 617 618Loop_cbc_dec: 619 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 620 vncipher $inout,$inout,$rndkey1 621 lvx $rndkey1,$idx,$key 622 addi $idx,$idx,16 623 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 624 vncipher $inout,$inout,$rndkey0 625 lvx $rndkey0,$idx,$key 626 addi $idx,$idx,16 627 bdnz Loop_cbc_dec 628 629 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 630 vncipher $inout,$inout,$rndkey1 631 lvx $rndkey1,$idx,$key 632 li $idx,16 633 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 634 vncipherlast $inout,$inout,$rndkey0 635 ${UCMP}i $len,16 636 637 vxor $inout,$inout,$ivec 638 vmr $ivec,$tmp 639 vperm $tmp,$inout,$inout,$outperm 640 vsel $inout,$outhead,$tmp,$outmask 641 vmr $outhead,$tmp 642 stvx $inout,0,$out 643 addi $out,$out,16 644 bge Lcbc_dec 645 646Lcbc_done: 647 addi $out,$out,-1 648 lvx $inout,0,$out # redundant in aligned case 649 vsel $inout,$outhead,$inout,$outmask 650 stvx $inout,0,$out 651 652 neg $enc,$ivp # write [unaligned] iv 653 li $idx,15 # 15 is not typo 654 vxor $rndkey0,$rndkey0,$rndkey0 655 vspltisb $outmask,-1 656 le?vspltisb $tmp,0x0f 657 ?lvsl $outperm,0,$enc 658 ?vperm $outmask,$rndkey0,$outmask,$outperm 659 le?vxor $outperm,$outperm,$tmp 660 lvx $outhead,0,$ivp 661 vperm $ivec,$ivec,$ivec,$outperm 662 vsel $inout,$outhead,$ivec,$outmask 663 lvx $inptail,$idx,$ivp 664 stvx $inout,0,$ivp 665 vsel $inout,$ivec,$inptail,$outmask 666 stvx $inout,$idx,$ivp 667 668 mtspr 256,$vrsave 669 blr 670 .long 0 671 .byte 0,12,0x14,0,0,0,6,0 672 .long 0 673___ 674######################################################################### 675{{ # Optimized CBC decrypt procedure # 676my $key_="r11"; 677my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 678 $x00=0 if ($flavour =~ /osx/); 679my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 680my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 681my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 682 # v26-v31 last 6 round keys 683my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 684 685$code.=<<___; 686.align 5 687_aesp8_cbc_decrypt8x: 688 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 689 li r10,`$FRAME+8*16+15` 690 li r11,`$FRAME+8*16+31` 691 stvx v20,r10,$sp # ABI says so 692 addi r10,r10,32 693 stvx v21,r11,$sp 694 addi r11,r11,32 695 stvx v22,r10,$sp 696 addi r10,r10,32 697 stvx v23,r11,$sp 698 addi r11,r11,32 699 stvx v24,r10,$sp 700 addi r10,r10,32 701 stvx v25,r11,$sp 702 addi r11,r11,32 703 stvx v26,r10,$sp 704 addi r10,r10,32 705 stvx v27,r11,$sp 706 addi r11,r11,32 707 stvx v28,r10,$sp 708 addi r10,r10,32 709 stvx v29,r11,$sp 710 addi r11,r11,32 711 stvx v30,r10,$sp 712 stvx v31,r11,$sp 713 li r0,-1 714 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 715 li $x10,0x10 716 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 717 li $x20,0x20 718 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 719 li $x30,0x30 720 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 721 li $x40,0x40 722 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 723 li $x50,0x50 724 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 725 li $x60,0x60 726 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 727 li $x70,0x70 728 mtspr 256,r0 729 730 subi $rounds,$rounds,3 # -4 in total 731 subi $len,$len,128 # bias 732 733 lvx $rndkey0,$x00,$key # load key schedule 734 lvx v30,$x10,$key 735 addi $key,$key,0x20 736 lvx v31,$x00,$key 737 ?vperm $rndkey0,$rndkey0,v30,$keyperm 738 addi $key_,$sp,$FRAME+15 739 mtctr $rounds 740 741Load_cbc_dec_key: 742 ?vperm v24,v30,v31,$keyperm 743 lvx v30,$x10,$key 744 addi $key,$key,0x20 745 stvx v24,$x00,$key_ # off-load round[1] 746 ?vperm v25,v31,v30,$keyperm 747 lvx v31,$x00,$key 748 stvx v25,$x10,$key_ # off-load round[2] 749 addi $key_,$key_,0x20 750 bdnz Load_cbc_dec_key 751 752 lvx v26,$x10,$key 753 ?vperm v24,v30,v31,$keyperm 754 lvx v27,$x20,$key 755 stvx v24,$x00,$key_ # off-load round[3] 756 ?vperm v25,v31,v26,$keyperm 757 lvx v28,$x30,$key 758 stvx v25,$x10,$key_ # off-load round[4] 759 addi $key_,$sp,$FRAME+15 # rewind $key_ 760 ?vperm v26,v26,v27,$keyperm 761 lvx v29,$x40,$key 762 ?vperm v27,v27,v28,$keyperm 763 lvx v30,$x50,$key 764 ?vperm v28,v28,v29,$keyperm 765 lvx v31,$x60,$key 766 ?vperm v29,v29,v30,$keyperm 767 lvx $out0,$x70,$key # borrow $out0 768 ?vperm v30,v30,v31,$keyperm 769 lvx v24,$x00,$key_ # pre-load round[1] 770 ?vperm v31,v31,$out0,$keyperm 771 lvx v25,$x10,$key_ # pre-load round[2] 772 773 #lvx $inptail,0,$inp # "caller" already did this 774 #addi $inp,$inp,15 # 15 is not typo 775 subi $inp,$inp,15 # undo "caller" 776 777 le?li $idx,8 778 lvx_u $in0,$x00,$inp # load first 8 "words" 779 le?lvsl $inpperm,0,$idx 780 le?vspltisb $tmp,0x0f 781 lvx_u $in1,$x10,$inp 782 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 783 lvx_u $in2,$x20,$inp 784 le?vperm $in0,$in0,$in0,$inpperm 785 lvx_u $in3,$x30,$inp 786 le?vperm $in1,$in1,$in1,$inpperm 787 lvx_u $in4,$x40,$inp 788 le?vperm $in2,$in2,$in2,$inpperm 789 vxor $out0,$in0,$rndkey0 790 lvx_u $in5,$x50,$inp 791 le?vperm $in3,$in3,$in3,$inpperm 792 vxor $out1,$in1,$rndkey0 793 lvx_u $in6,$x60,$inp 794 le?vperm $in4,$in4,$in4,$inpperm 795 vxor $out2,$in2,$rndkey0 796 lvx_u $in7,$x70,$inp 797 addi $inp,$inp,0x80 798 le?vperm $in5,$in5,$in5,$inpperm 799 vxor $out3,$in3,$rndkey0 800 le?vperm $in6,$in6,$in6,$inpperm 801 vxor $out4,$in4,$rndkey0 802 le?vperm $in7,$in7,$in7,$inpperm 803 vxor $out5,$in5,$rndkey0 804 vxor $out6,$in6,$rndkey0 805 vxor $out7,$in7,$rndkey0 806 807 mtctr $rounds 808 b Loop_cbc_dec8x 809.align 5 810Loop_cbc_dec8x: 811 vncipher $out0,$out0,v24 812 vncipher $out1,$out1,v24 813 vncipher $out2,$out2,v24 814 vncipher $out3,$out3,v24 815 vncipher $out4,$out4,v24 816 vncipher $out5,$out5,v24 817 vncipher $out6,$out6,v24 818 vncipher $out7,$out7,v24 819 lvx v24,$x20,$key_ # round[3] 820 addi $key_,$key_,0x20 821 822 vncipher $out0,$out0,v25 823 vncipher $out1,$out1,v25 824 vncipher $out2,$out2,v25 825 vncipher $out3,$out3,v25 826 vncipher $out4,$out4,v25 827 vncipher $out5,$out5,v25 828 vncipher $out6,$out6,v25 829 vncipher $out7,$out7,v25 830 lvx v25,$x10,$key_ # round[4] 831 bdnz Loop_cbc_dec8x 832 833 subic $len,$len,128 # $len-=128 834 vncipher $out0,$out0,v24 835 vncipher $out1,$out1,v24 836 vncipher $out2,$out2,v24 837 vncipher $out3,$out3,v24 838 vncipher $out4,$out4,v24 839 vncipher $out5,$out5,v24 840 vncipher $out6,$out6,v24 841 vncipher $out7,$out7,v24 842 843 subfe. r0,r0,r0 # borrow?-1:0 844 vncipher $out0,$out0,v25 845 vncipher $out1,$out1,v25 846 vncipher $out2,$out2,v25 847 vncipher $out3,$out3,v25 848 vncipher $out4,$out4,v25 849 vncipher $out5,$out5,v25 850 vncipher $out6,$out6,v25 851 vncipher $out7,$out7,v25 852 853 and r0,r0,$len 854 vncipher $out0,$out0,v26 855 vncipher $out1,$out1,v26 856 vncipher $out2,$out2,v26 857 vncipher $out3,$out3,v26 858 vncipher $out4,$out4,v26 859 vncipher $out5,$out5,v26 860 vncipher $out6,$out6,v26 861 vncipher $out7,$out7,v26 862 863 add $inp,$inp,r0 # $inp is adjusted in such 864 # way that at exit from the 865 # loop inX-in7 are loaded 866 # with last "words" 867 vncipher $out0,$out0,v27 868 vncipher $out1,$out1,v27 869 vncipher $out2,$out2,v27 870 vncipher $out3,$out3,v27 871 vncipher $out4,$out4,v27 872 vncipher $out5,$out5,v27 873 vncipher $out6,$out6,v27 874 vncipher $out7,$out7,v27 875 876 addi $key_,$sp,$FRAME+15 # rewind $key_ 877 vncipher $out0,$out0,v28 878 vncipher $out1,$out1,v28 879 vncipher $out2,$out2,v28 880 vncipher $out3,$out3,v28 881 vncipher $out4,$out4,v28 882 vncipher $out5,$out5,v28 883 vncipher $out6,$out6,v28 884 vncipher $out7,$out7,v28 885 lvx v24,$x00,$key_ # re-pre-load round[1] 886 887 vncipher $out0,$out0,v29 888 vncipher $out1,$out1,v29 889 vncipher $out2,$out2,v29 890 vncipher $out3,$out3,v29 891 vncipher $out4,$out4,v29 892 vncipher $out5,$out5,v29 893 vncipher $out6,$out6,v29 894 vncipher $out7,$out7,v29 895 lvx v25,$x10,$key_ # re-pre-load round[2] 896 897 vncipher $out0,$out0,v30 898 vxor $ivec,$ivec,v31 # xor with last round key 899 vncipher $out1,$out1,v30 900 vxor $in0,$in0,v31 901 vncipher $out2,$out2,v30 902 vxor $in1,$in1,v31 903 vncipher $out3,$out3,v30 904 vxor $in2,$in2,v31 905 vncipher $out4,$out4,v30 906 vxor $in3,$in3,v31 907 vncipher $out5,$out5,v30 908 vxor $in4,$in4,v31 909 vncipher $out6,$out6,v30 910 vxor $in5,$in5,v31 911 vncipher $out7,$out7,v30 912 vxor $in6,$in6,v31 913 914 vncipherlast $out0,$out0,$ivec 915 vncipherlast $out1,$out1,$in0 916 lvx_u $in0,$x00,$inp # load next input block 917 vncipherlast $out2,$out2,$in1 918 lvx_u $in1,$x10,$inp 919 vncipherlast $out3,$out3,$in2 920 le?vperm $in0,$in0,$in0,$inpperm 921 lvx_u $in2,$x20,$inp 922 vncipherlast $out4,$out4,$in3 923 le?vperm $in1,$in1,$in1,$inpperm 924 lvx_u $in3,$x30,$inp 925 vncipherlast $out5,$out5,$in4 926 le?vperm $in2,$in2,$in2,$inpperm 927 lvx_u $in4,$x40,$inp 928 vncipherlast $out6,$out6,$in5 929 le?vperm $in3,$in3,$in3,$inpperm 930 lvx_u $in5,$x50,$inp 931 vncipherlast $out7,$out7,$in6 932 le?vperm $in4,$in4,$in4,$inpperm 933 lvx_u $in6,$x60,$inp 934 vmr $ivec,$in7 935 le?vperm $in5,$in5,$in5,$inpperm 936 lvx_u $in7,$x70,$inp 937 addi $inp,$inp,0x80 938 939 le?vperm $out0,$out0,$out0,$inpperm 940 le?vperm $out1,$out1,$out1,$inpperm 941 stvx_u $out0,$x00,$out 942 le?vperm $in6,$in6,$in6,$inpperm 943 vxor $out0,$in0,$rndkey0 944 le?vperm $out2,$out2,$out2,$inpperm 945 stvx_u $out1,$x10,$out 946 le?vperm $in7,$in7,$in7,$inpperm 947 vxor $out1,$in1,$rndkey0 948 le?vperm $out3,$out3,$out3,$inpperm 949 stvx_u $out2,$x20,$out 950 vxor $out2,$in2,$rndkey0 951 le?vperm $out4,$out4,$out4,$inpperm 952 stvx_u $out3,$x30,$out 953 vxor $out3,$in3,$rndkey0 954 le?vperm $out5,$out5,$out5,$inpperm 955 stvx_u $out4,$x40,$out 956 vxor $out4,$in4,$rndkey0 957 le?vperm $out6,$out6,$out6,$inpperm 958 stvx_u $out5,$x50,$out 959 vxor $out5,$in5,$rndkey0 960 le?vperm $out7,$out7,$out7,$inpperm 961 stvx_u $out6,$x60,$out 962 vxor $out6,$in6,$rndkey0 963 stvx_u $out7,$x70,$out 964 addi $out,$out,0x80 965 vxor $out7,$in7,$rndkey0 966 967 mtctr $rounds 968 beq Loop_cbc_dec8x # did $len-=128 borrow? 969 970 addic. $len,$len,128 971 beq Lcbc_dec8x_done 972 nop 973 nop 974 975Loop_cbc_dec8x_tail: # up to 7 "words" tail... 976 vncipher $out1,$out1,v24 977 vncipher $out2,$out2,v24 978 vncipher $out3,$out3,v24 979 vncipher $out4,$out4,v24 980 vncipher $out5,$out5,v24 981 vncipher $out6,$out6,v24 982 vncipher $out7,$out7,v24 983 lvx v24,$x20,$key_ # round[3] 984 addi $key_,$key_,0x20 985 986 vncipher $out1,$out1,v25 987 vncipher $out2,$out2,v25 988 vncipher $out3,$out3,v25 989 vncipher $out4,$out4,v25 990 vncipher $out5,$out5,v25 991 vncipher $out6,$out6,v25 992 vncipher $out7,$out7,v25 993 lvx v25,$x10,$key_ # round[4] 994 bdnz Loop_cbc_dec8x_tail 995 996 vncipher $out1,$out1,v24 997 vncipher $out2,$out2,v24 998 vncipher $out3,$out3,v24 999 vncipher $out4,$out4,v24 1000 vncipher $out5,$out5,v24 1001 vncipher $out6,$out6,v24 1002 vncipher $out7,$out7,v24 1003 1004 vncipher $out1,$out1,v25 1005 vncipher $out2,$out2,v25 1006 vncipher $out3,$out3,v25 1007 vncipher $out4,$out4,v25 1008 vncipher $out5,$out5,v25 1009 vncipher $out6,$out6,v25 1010 vncipher $out7,$out7,v25 1011 1012 vncipher $out1,$out1,v26 1013 vncipher $out2,$out2,v26 1014 vncipher $out3,$out3,v26 1015 vncipher $out4,$out4,v26 1016 vncipher $out5,$out5,v26 1017 vncipher $out6,$out6,v26 1018 vncipher $out7,$out7,v26 1019 1020 vncipher $out1,$out1,v27 1021 vncipher $out2,$out2,v27 1022 vncipher $out3,$out3,v27 1023 vncipher $out4,$out4,v27 1024 vncipher $out5,$out5,v27 1025 vncipher $out6,$out6,v27 1026 vncipher $out7,$out7,v27 1027 1028 vncipher $out1,$out1,v28 1029 vncipher $out2,$out2,v28 1030 vncipher $out3,$out3,v28 1031 vncipher $out4,$out4,v28 1032 vncipher $out5,$out5,v28 1033 vncipher $out6,$out6,v28 1034 vncipher $out7,$out7,v28 1035 1036 vncipher $out1,$out1,v29 1037 vncipher $out2,$out2,v29 1038 vncipher $out3,$out3,v29 1039 vncipher $out4,$out4,v29 1040 vncipher $out5,$out5,v29 1041 vncipher $out6,$out6,v29 1042 vncipher $out7,$out7,v29 1043 1044 vncipher $out1,$out1,v30 1045 vxor $ivec,$ivec,v31 # last round key 1046 vncipher $out2,$out2,v30 1047 vxor $in1,$in1,v31 1048 vncipher $out3,$out3,v30 1049 vxor $in2,$in2,v31 1050 vncipher $out4,$out4,v30 1051 vxor $in3,$in3,v31 1052 vncipher $out5,$out5,v30 1053 vxor $in4,$in4,v31 1054 vncipher $out6,$out6,v30 1055 vxor $in5,$in5,v31 1056 vncipher $out7,$out7,v30 1057 vxor $in6,$in6,v31 1058 1059 cmplwi $len,32 # switch($len) 1060 blt Lcbc_dec8x_one 1061 nop 1062 beq Lcbc_dec8x_two 1063 cmplwi $len,64 1064 blt Lcbc_dec8x_three 1065 nop 1066 beq Lcbc_dec8x_four 1067 cmplwi $len,96 1068 blt Lcbc_dec8x_five 1069 nop 1070 beq Lcbc_dec8x_six 1071 1072Lcbc_dec8x_seven: 1073 vncipherlast $out1,$out1,$ivec 1074 vncipherlast $out2,$out2,$in1 1075 vncipherlast $out3,$out3,$in2 1076 vncipherlast $out4,$out4,$in3 1077 vncipherlast $out5,$out5,$in4 1078 vncipherlast $out6,$out6,$in5 1079 vncipherlast $out7,$out7,$in6 1080 vmr $ivec,$in7 1081 1082 le?vperm $out1,$out1,$out1,$inpperm 1083 le?vperm $out2,$out2,$out2,$inpperm 1084 stvx_u $out1,$x00,$out 1085 le?vperm $out3,$out3,$out3,$inpperm 1086 stvx_u $out2,$x10,$out 1087 le?vperm $out4,$out4,$out4,$inpperm 1088 stvx_u $out3,$x20,$out 1089 le?vperm $out5,$out5,$out5,$inpperm 1090 stvx_u $out4,$x30,$out 1091 le?vperm $out6,$out6,$out6,$inpperm 1092 stvx_u $out5,$x40,$out 1093 le?vperm $out7,$out7,$out7,$inpperm 1094 stvx_u $out6,$x50,$out 1095 stvx_u $out7,$x60,$out 1096 addi $out,$out,0x70 1097 b Lcbc_dec8x_done 1098 1099.align 5 1100Lcbc_dec8x_six: 1101 vncipherlast $out2,$out2,$ivec 1102 vncipherlast $out3,$out3,$in2 1103 vncipherlast $out4,$out4,$in3 1104 vncipherlast $out5,$out5,$in4 1105 vncipherlast $out6,$out6,$in5 1106 vncipherlast $out7,$out7,$in6 1107 vmr $ivec,$in7 1108 1109 le?vperm $out2,$out2,$out2,$inpperm 1110 le?vperm $out3,$out3,$out3,$inpperm 1111 stvx_u $out2,$x00,$out 1112 le?vperm $out4,$out4,$out4,$inpperm 1113 stvx_u $out3,$x10,$out 1114 le?vperm $out5,$out5,$out5,$inpperm 1115 stvx_u $out4,$x20,$out 1116 le?vperm $out6,$out6,$out6,$inpperm 1117 stvx_u $out5,$x30,$out 1118 le?vperm $out7,$out7,$out7,$inpperm 1119 stvx_u $out6,$x40,$out 1120 stvx_u $out7,$x50,$out 1121 addi $out,$out,0x60 1122 b Lcbc_dec8x_done 1123 1124.align 5 1125Lcbc_dec8x_five: 1126 vncipherlast $out3,$out3,$ivec 1127 vncipherlast $out4,$out4,$in3 1128 vncipherlast $out5,$out5,$in4 1129 vncipherlast $out6,$out6,$in5 1130 vncipherlast $out7,$out7,$in6 1131 vmr $ivec,$in7 1132 1133 le?vperm $out3,$out3,$out3,$inpperm 1134 le?vperm $out4,$out4,$out4,$inpperm 1135 stvx_u $out3,$x00,$out 1136 le?vperm $out5,$out5,$out5,$inpperm 1137 stvx_u $out4,$x10,$out 1138 le?vperm $out6,$out6,$out6,$inpperm 1139 stvx_u $out5,$x20,$out 1140 le?vperm $out7,$out7,$out7,$inpperm 1141 stvx_u $out6,$x30,$out 1142 stvx_u $out7,$x40,$out 1143 addi $out,$out,0x50 1144 b Lcbc_dec8x_done 1145 1146.align 5 1147Lcbc_dec8x_four: 1148 vncipherlast $out4,$out4,$ivec 1149 vncipherlast $out5,$out5,$in4 1150 vncipherlast $out6,$out6,$in5 1151 vncipherlast $out7,$out7,$in6 1152 vmr $ivec,$in7 1153 1154 le?vperm $out4,$out4,$out4,$inpperm 1155 le?vperm $out5,$out5,$out5,$inpperm 1156 stvx_u $out4,$x00,$out 1157 le?vperm $out6,$out6,$out6,$inpperm 1158 stvx_u $out5,$x10,$out 1159 le?vperm $out7,$out7,$out7,$inpperm 1160 stvx_u $out6,$x20,$out 1161 stvx_u $out7,$x30,$out 1162 addi $out,$out,0x40 1163 b Lcbc_dec8x_done 1164 1165.align 5 1166Lcbc_dec8x_three: 1167 vncipherlast $out5,$out5,$ivec 1168 vncipherlast $out6,$out6,$in5 1169 vncipherlast $out7,$out7,$in6 1170 vmr $ivec,$in7 1171 1172 le?vperm $out5,$out5,$out5,$inpperm 1173 le?vperm $out6,$out6,$out6,$inpperm 1174 stvx_u $out5,$x00,$out 1175 le?vperm $out7,$out7,$out7,$inpperm 1176 stvx_u $out6,$x10,$out 1177 stvx_u $out7,$x20,$out 1178 addi $out,$out,0x30 1179 b Lcbc_dec8x_done 1180 1181.align 5 1182Lcbc_dec8x_two: 1183 vncipherlast $out6,$out6,$ivec 1184 vncipherlast $out7,$out7,$in6 1185 vmr $ivec,$in7 1186 1187 le?vperm $out6,$out6,$out6,$inpperm 1188 le?vperm $out7,$out7,$out7,$inpperm 1189 stvx_u $out6,$x00,$out 1190 stvx_u $out7,$x10,$out 1191 addi $out,$out,0x20 1192 b Lcbc_dec8x_done 1193 1194.align 5 1195Lcbc_dec8x_one: 1196 vncipherlast $out7,$out7,$ivec 1197 vmr $ivec,$in7 1198 1199 le?vperm $out7,$out7,$out7,$inpperm 1200 stvx_u $out7,0,$out 1201 addi $out,$out,0x10 1202 1203Lcbc_dec8x_done: 1204 le?vperm $ivec,$ivec,$ivec,$inpperm 1205 stvx_u $ivec,0,$ivp # write [unaligned] iv 1206 1207 li r10,`$FRAME+15` 1208 li r11,`$FRAME+31` 1209 stvx $inpperm,r10,$sp # wipe copies of round keys 1210 addi r10,r10,32 1211 stvx $inpperm,r11,$sp 1212 addi r11,r11,32 1213 stvx $inpperm,r10,$sp 1214 addi r10,r10,32 1215 stvx $inpperm,r11,$sp 1216 addi r11,r11,32 1217 stvx $inpperm,r10,$sp 1218 addi r10,r10,32 1219 stvx $inpperm,r11,$sp 1220 addi r11,r11,32 1221 stvx $inpperm,r10,$sp 1222 addi r10,r10,32 1223 stvx $inpperm,r11,$sp 1224 addi r11,r11,32 1225 1226 mtspr 256,$vrsave 1227 lvx v20,r10,$sp # ABI says so 1228 addi r10,r10,32 1229 lvx v21,r11,$sp 1230 addi r11,r11,32 1231 lvx v22,r10,$sp 1232 addi r10,r10,32 1233 lvx v23,r11,$sp 1234 addi r11,r11,32 1235 lvx v24,r10,$sp 1236 addi r10,r10,32 1237 lvx v25,r11,$sp 1238 addi r11,r11,32 1239 lvx v26,r10,$sp 1240 addi r10,r10,32 1241 lvx v27,r11,$sp 1242 addi r11,r11,32 1243 lvx v28,r10,$sp 1244 addi r10,r10,32 1245 lvx v29,r11,$sp 1246 addi r11,r11,32 1247 lvx v30,r10,$sp 1248 lvx v31,r11,$sp 1249 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1250 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1251 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1252 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1253 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1254 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1255 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1256 blr 1257 .long 0 1258 .byte 0,12,0x04,0,0x80,6,6,0 1259 .long 0 1260.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 1261___ 1262}} }}} 1263 1264######################################################################### 1265{{{ # CTR procedure[s] # 1266my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 1267my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 1268my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 1269 map("v$_",(4..11)); 1270my $dat=$tmp; 1271 1272$code.=<<___; 1273.globl .${prefix}_ctr32_encrypt_blocks 1274.align 5 1275.${prefix}_ctr32_encrypt_blocks: 1276 ${UCMP}i $len,1 1277 bltlr- 1278 1279 lis r0,0xfff0 1280 mfspr $vrsave,256 1281 mtspr 256,r0 1282 1283 li $idx,15 1284 vxor $rndkey0,$rndkey0,$rndkey0 1285 le?vspltisb $tmp,0x0f 1286 1287 lvx $ivec,0,$ivp # load [unaligned] iv 1288 lvsl $inpperm,0,$ivp 1289 lvx $inptail,$idx,$ivp 1290 vspltisb $one,1 1291 le?vxor $inpperm,$inpperm,$tmp 1292 vperm $ivec,$ivec,$inptail,$inpperm 1293 vsldoi $one,$rndkey0,$one,1 1294 1295 neg r11,$inp 1296 ?lvsl $keyperm,0,$key # prepare for unaligned key 1297 lwz $rounds,240($key) 1298 1299 lvsr $inpperm,0,r11 # prepare for unaligned load 1300 lvx $inptail,0,$inp 1301 addi $inp,$inp,15 # 15 is not typo 1302 le?vxor $inpperm,$inpperm,$tmp 1303 1304 srwi $rounds,$rounds,1 1305 li $idx,16 1306 subi $rounds,$rounds,1 1307 1308 ${UCMP}i $len,8 1309 bge _aesp8_ctr32_encrypt8x 1310 1311 ?lvsr $outperm,0,$out # prepare for unaligned store 1312 vspltisb $outmask,-1 1313 lvx $outhead,0,$out 1314 ?vperm $outmask,$rndkey0,$outmask,$outperm 1315 le?vxor $outperm,$outperm,$tmp 1316 1317 lvx $rndkey0,0,$key 1318 mtctr $rounds 1319 lvx $rndkey1,$idx,$key 1320 addi $idx,$idx,16 1321 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1322 vxor $inout,$ivec,$rndkey0 1323 lvx $rndkey0,$idx,$key 1324 addi $idx,$idx,16 1325 b Loop_ctr32_enc 1326 1327.align 5 1328Loop_ctr32_enc: 1329 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1330 vcipher $inout,$inout,$rndkey1 1331 lvx $rndkey1,$idx,$key 1332 addi $idx,$idx,16 1333 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1334 vcipher $inout,$inout,$rndkey0 1335 lvx $rndkey0,$idx,$key 1336 addi $idx,$idx,16 1337 bdnz Loop_ctr32_enc 1338 1339 vadduwm $ivec,$ivec,$one 1340 vmr $dat,$inptail 1341 lvx $inptail,0,$inp 1342 addi $inp,$inp,16 1343 subic. $len,$len,1 # blocks-- 1344 1345 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1346 vcipher $inout,$inout,$rndkey1 1347 lvx $rndkey1,$idx,$key 1348 vperm $dat,$dat,$inptail,$inpperm 1349 li $idx,16 1350 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 1351 lvx $rndkey0,0,$key 1352 vxor $dat,$dat,$rndkey1 # last round key 1353 vcipherlast $inout,$inout,$dat 1354 1355 lvx $rndkey1,$idx,$key 1356 addi $idx,$idx,16 1357 vperm $inout,$inout,$inout,$outperm 1358 vsel $dat,$outhead,$inout,$outmask 1359 mtctr $rounds 1360 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1361 vmr $outhead,$inout 1362 vxor $inout,$ivec,$rndkey0 1363 lvx $rndkey0,$idx,$key 1364 addi $idx,$idx,16 1365 stvx $dat,0,$out 1366 addi $out,$out,16 1367 bne Loop_ctr32_enc 1368 1369 addi $out,$out,-1 1370 lvx $inout,0,$out # redundant in aligned case 1371 vsel $inout,$outhead,$inout,$outmask 1372 stvx $inout,0,$out 1373 1374 mtspr 256,$vrsave 1375 blr 1376 .long 0 1377 .byte 0,12,0x14,0,0,0,6,0 1378 .long 0 1379___ 1380######################################################################### 1381{{ # Optimized CTR procedure # 1382my $key_="r11"; 1383my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 1384 $x00=0 if ($flavour =~ /osx/); 1385my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 1386my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 1387my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 1388 # v26-v31 last 6 round keys 1389my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 1390my ($two,$three,$four)=($outhead,$outperm,$outmask); 1391 1392$code.=<<___; 1393.align 5 1394_aesp8_ctr32_encrypt8x: 1395 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 1396 li r10,`$FRAME+8*16+15` 1397 li r11,`$FRAME+8*16+31` 1398 stvx v20,r10,$sp # ABI says so 1399 addi r10,r10,32 1400 stvx v21,r11,$sp 1401 addi r11,r11,32 1402 stvx v22,r10,$sp 1403 addi r10,r10,32 1404 stvx v23,r11,$sp 1405 addi r11,r11,32 1406 stvx v24,r10,$sp 1407 addi r10,r10,32 1408 stvx v25,r11,$sp 1409 addi r11,r11,32 1410 stvx v26,r10,$sp 1411 addi r10,r10,32 1412 stvx v27,r11,$sp 1413 addi r11,r11,32 1414 stvx v28,r10,$sp 1415 addi r10,r10,32 1416 stvx v29,r11,$sp 1417 addi r11,r11,32 1418 stvx v30,r10,$sp 1419 stvx v31,r11,$sp 1420 li r0,-1 1421 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 1422 li $x10,0x10 1423 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1424 li $x20,0x20 1425 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1426 li $x30,0x30 1427 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1428 li $x40,0x40 1429 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1430 li $x50,0x50 1431 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1432 li $x60,0x60 1433 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1434 li $x70,0x70 1435 mtspr 256,r0 1436 1437 subi $rounds,$rounds,3 # -4 in total 1438 1439 lvx $rndkey0,$x00,$key # load key schedule 1440 lvx v30,$x10,$key 1441 addi $key,$key,0x20 1442 lvx v31,$x00,$key 1443 ?vperm $rndkey0,$rndkey0,v30,$keyperm 1444 addi $key_,$sp,$FRAME+15 1445 mtctr $rounds 1446 1447Load_ctr32_enc_key: 1448 ?vperm v24,v30,v31,$keyperm 1449 lvx v30,$x10,$key 1450 addi $key,$key,0x20 1451 stvx v24,$x00,$key_ # off-load round[1] 1452 ?vperm v25,v31,v30,$keyperm 1453 lvx v31,$x00,$key 1454 stvx v25,$x10,$key_ # off-load round[2] 1455 addi $key_,$key_,0x20 1456 bdnz Load_ctr32_enc_key 1457 1458 lvx v26,$x10,$key 1459 ?vperm v24,v30,v31,$keyperm 1460 lvx v27,$x20,$key 1461 stvx v24,$x00,$key_ # off-load round[3] 1462 ?vperm v25,v31,v26,$keyperm 1463 lvx v28,$x30,$key 1464 stvx v25,$x10,$key_ # off-load round[4] 1465 addi $key_,$sp,$FRAME+15 # rewind $key_ 1466 ?vperm v26,v26,v27,$keyperm 1467 lvx v29,$x40,$key 1468 ?vperm v27,v27,v28,$keyperm 1469 lvx v30,$x50,$key 1470 ?vperm v28,v28,v29,$keyperm 1471 lvx v31,$x60,$key 1472 ?vperm v29,v29,v30,$keyperm 1473 lvx $out0,$x70,$key # borrow $out0 1474 ?vperm v30,v30,v31,$keyperm 1475 lvx v24,$x00,$key_ # pre-load round[1] 1476 ?vperm v31,v31,$out0,$keyperm 1477 lvx v25,$x10,$key_ # pre-load round[2] 1478 1479 vadduwm $two,$one,$one 1480 subi $inp,$inp,15 # undo "caller" 1481 $SHL $len,$len,4 1482 1483 vadduwm $out1,$ivec,$one # counter values ... 1484 vadduwm $out2,$ivec,$two 1485 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1486 le?li $idx,8 1487 vadduwm $out3,$out1,$two 1488 vxor $out1,$out1,$rndkey0 1489 le?lvsl $inpperm,0,$idx 1490 vadduwm $out4,$out2,$two 1491 vxor $out2,$out2,$rndkey0 1492 le?vspltisb $tmp,0x0f 1493 vadduwm $out5,$out3,$two 1494 vxor $out3,$out3,$rndkey0 1495 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 1496 vadduwm $out6,$out4,$two 1497 vxor $out4,$out4,$rndkey0 1498 vadduwm $out7,$out5,$two 1499 vxor $out5,$out5,$rndkey0 1500 vadduwm $ivec,$out6,$two # next counter value 1501 vxor $out6,$out6,$rndkey0 1502 vxor $out7,$out7,$rndkey0 1503 1504 mtctr $rounds 1505 b Loop_ctr32_enc8x 1506.align 5 1507Loop_ctr32_enc8x: 1508 vcipher $out0,$out0,v24 1509 vcipher $out1,$out1,v24 1510 vcipher $out2,$out2,v24 1511 vcipher $out3,$out3,v24 1512 vcipher $out4,$out4,v24 1513 vcipher $out5,$out5,v24 1514 vcipher $out6,$out6,v24 1515 vcipher $out7,$out7,v24 1516Loop_ctr32_enc8x_middle: 1517 lvx v24,$x20,$key_ # round[3] 1518 addi $key_,$key_,0x20 1519 1520 vcipher $out0,$out0,v25 1521 vcipher $out1,$out1,v25 1522 vcipher $out2,$out2,v25 1523 vcipher $out3,$out3,v25 1524 vcipher $out4,$out4,v25 1525 vcipher $out5,$out5,v25 1526 vcipher $out6,$out6,v25 1527 vcipher $out7,$out7,v25 1528 lvx v25,$x10,$key_ # round[4] 1529 bdnz Loop_ctr32_enc8x 1530 1531 subic r11,$len,256 # $len-256, borrow $key_ 1532 vcipher $out0,$out0,v24 1533 vcipher $out1,$out1,v24 1534 vcipher $out2,$out2,v24 1535 vcipher $out3,$out3,v24 1536 vcipher $out4,$out4,v24 1537 vcipher $out5,$out5,v24 1538 vcipher $out6,$out6,v24 1539 vcipher $out7,$out7,v24 1540 1541 subfe r0,r0,r0 # borrow?-1:0 1542 vcipher $out0,$out0,v25 1543 vcipher $out1,$out1,v25 1544 vcipher $out2,$out2,v25 1545 vcipher $out3,$out3,v25 1546 vcipher $out4,$out4,v25 1547 vcipher $out5,$out5,v25 1548 vcipher $out6,$out6,v25 1549 vcipher $out7,$out7,v25 1550 1551 and r0,r0,r11 1552 addi $key_,$sp,$FRAME+15 # rewind $key_ 1553 vcipher $out0,$out0,v26 1554 vcipher $out1,$out1,v26 1555 vcipher $out2,$out2,v26 1556 vcipher $out3,$out3,v26 1557 vcipher $out4,$out4,v26 1558 vcipher $out5,$out5,v26 1559 vcipher $out6,$out6,v26 1560 vcipher $out7,$out7,v26 1561 lvx v24,$x00,$key_ # re-pre-load round[1] 1562 1563 subic $len,$len,129 # $len-=129 1564 vcipher $out0,$out0,v27 1565 addi $len,$len,1 # $len-=128 really 1566 vcipher $out1,$out1,v27 1567 vcipher $out2,$out2,v27 1568 vcipher $out3,$out3,v27 1569 vcipher $out4,$out4,v27 1570 vcipher $out5,$out5,v27 1571 vcipher $out6,$out6,v27 1572 vcipher $out7,$out7,v27 1573 lvx v25,$x10,$key_ # re-pre-load round[2] 1574 1575 vcipher $out0,$out0,v28 1576 lvx_u $in0,$x00,$inp # load input 1577 vcipher $out1,$out1,v28 1578 lvx_u $in1,$x10,$inp 1579 vcipher $out2,$out2,v28 1580 lvx_u $in2,$x20,$inp 1581 vcipher $out3,$out3,v28 1582 lvx_u $in3,$x30,$inp 1583 vcipher $out4,$out4,v28 1584 lvx_u $in4,$x40,$inp 1585 vcipher $out5,$out5,v28 1586 lvx_u $in5,$x50,$inp 1587 vcipher $out6,$out6,v28 1588 lvx_u $in6,$x60,$inp 1589 vcipher $out7,$out7,v28 1590 lvx_u $in7,$x70,$inp 1591 addi $inp,$inp,0x80 1592 1593 vcipher $out0,$out0,v29 1594 le?vperm $in0,$in0,$in0,$inpperm 1595 vcipher $out1,$out1,v29 1596 le?vperm $in1,$in1,$in1,$inpperm 1597 vcipher $out2,$out2,v29 1598 le?vperm $in2,$in2,$in2,$inpperm 1599 vcipher $out3,$out3,v29 1600 le?vperm $in3,$in3,$in3,$inpperm 1601 vcipher $out4,$out4,v29 1602 le?vperm $in4,$in4,$in4,$inpperm 1603 vcipher $out5,$out5,v29 1604 le?vperm $in5,$in5,$in5,$inpperm 1605 vcipher $out6,$out6,v29 1606 le?vperm $in6,$in6,$in6,$inpperm 1607 vcipher $out7,$out7,v29 1608 le?vperm $in7,$in7,$in7,$inpperm 1609 1610 add $inp,$inp,r0 # $inp is adjusted in such 1611 # way that at exit from the 1612 # loop inX-in7 are loaded 1613 # with last "words" 1614 subfe. r0,r0,r0 # borrow?-1:0 1615 vcipher $out0,$out0,v30 1616 vxor $in0,$in0,v31 # xor with last round key 1617 vcipher $out1,$out1,v30 1618 vxor $in1,$in1,v31 1619 vcipher $out2,$out2,v30 1620 vxor $in2,$in2,v31 1621 vcipher $out3,$out3,v30 1622 vxor $in3,$in3,v31 1623 vcipher $out4,$out4,v30 1624 vxor $in4,$in4,v31 1625 vcipher $out5,$out5,v30 1626 vxor $in5,$in5,v31 1627 vcipher $out6,$out6,v30 1628 vxor $in6,$in6,v31 1629 vcipher $out7,$out7,v30 1630 vxor $in7,$in7,v31 1631 1632 bne Lctr32_enc8x_break # did $len-129 borrow? 1633 1634 vcipherlast $in0,$out0,$in0 1635 vcipherlast $in1,$out1,$in1 1636 vadduwm $out1,$ivec,$one # counter values ... 1637 vcipherlast $in2,$out2,$in2 1638 vadduwm $out2,$ivec,$two 1639 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 1640 vcipherlast $in3,$out3,$in3 1641 vadduwm $out3,$out1,$two 1642 vxor $out1,$out1,$rndkey0 1643 vcipherlast $in4,$out4,$in4 1644 vadduwm $out4,$out2,$two 1645 vxor $out2,$out2,$rndkey0 1646 vcipherlast $in5,$out5,$in5 1647 vadduwm $out5,$out3,$two 1648 vxor $out3,$out3,$rndkey0 1649 vcipherlast $in6,$out6,$in6 1650 vadduwm $out6,$out4,$two 1651 vxor $out4,$out4,$rndkey0 1652 vcipherlast $in7,$out7,$in7 1653 vadduwm $out7,$out5,$two 1654 vxor $out5,$out5,$rndkey0 1655 le?vperm $in0,$in0,$in0,$inpperm 1656 vadduwm $ivec,$out6,$two # next counter value 1657 vxor $out6,$out6,$rndkey0 1658 le?vperm $in1,$in1,$in1,$inpperm 1659 vxor $out7,$out7,$rndkey0 1660 mtctr $rounds 1661 1662 vcipher $out0,$out0,v24 1663 stvx_u $in0,$x00,$out 1664 le?vperm $in2,$in2,$in2,$inpperm 1665 vcipher $out1,$out1,v24 1666 stvx_u $in1,$x10,$out 1667 le?vperm $in3,$in3,$in3,$inpperm 1668 vcipher $out2,$out2,v24 1669 stvx_u $in2,$x20,$out 1670 le?vperm $in4,$in4,$in4,$inpperm 1671 vcipher $out3,$out3,v24 1672 stvx_u $in3,$x30,$out 1673 le?vperm $in5,$in5,$in5,$inpperm 1674 vcipher $out4,$out4,v24 1675 stvx_u $in4,$x40,$out 1676 le?vperm $in6,$in6,$in6,$inpperm 1677 vcipher $out5,$out5,v24 1678 stvx_u $in5,$x50,$out 1679 le?vperm $in7,$in7,$in7,$inpperm 1680 vcipher $out6,$out6,v24 1681 stvx_u $in6,$x60,$out 1682 vcipher $out7,$out7,v24 1683 stvx_u $in7,$x70,$out 1684 addi $out,$out,0x80 1685 1686 b Loop_ctr32_enc8x_middle 1687 1688.align 5 1689Lctr32_enc8x_break: 1690 cmpwi $len,-0x60 1691 blt Lctr32_enc8x_one 1692 nop 1693 beq Lctr32_enc8x_two 1694 cmpwi $len,-0x40 1695 blt Lctr32_enc8x_three 1696 nop 1697 beq Lctr32_enc8x_four 1698 cmpwi $len,-0x20 1699 blt Lctr32_enc8x_five 1700 nop 1701 beq Lctr32_enc8x_six 1702 cmpwi $len,0x00 1703 blt Lctr32_enc8x_seven 1704 1705Lctr32_enc8x_eight: 1706 vcipherlast $out0,$out0,$in0 1707 vcipherlast $out1,$out1,$in1 1708 vcipherlast $out2,$out2,$in2 1709 vcipherlast $out3,$out3,$in3 1710 vcipherlast $out4,$out4,$in4 1711 vcipherlast $out5,$out5,$in5 1712 vcipherlast $out6,$out6,$in6 1713 vcipherlast $out7,$out7,$in7 1714 1715 le?vperm $out0,$out0,$out0,$inpperm 1716 le?vperm $out1,$out1,$out1,$inpperm 1717 stvx_u $out0,$x00,$out 1718 le?vperm $out2,$out2,$out2,$inpperm 1719 stvx_u $out1,$x10,$out 1720 le?vperm $out3,$out3,$out3,$inpperm 1721 stvx_u $out2,$x20,$out 1722 le?vperm $out4,$out4,$out4,$inpperm 1723 stvx_u $out3,$x30,$out 1724 le?vperm $out5,$out5,$out5,$inpperm 1725 stvx_u $out4,$x40,$out 1726 le?vperm $out6,$out6,$out6,$inpperm 1727 stvx_u $out5,$x50,$out 1728 le?vperm $out7,$out7,$out7,$inpperm 1729 stvx_u $out6,$x60,$out 1730 stvx_u $out7,$x70,$out 1731 addi $out,$out,0x80 1732 b Lctr32_enc8x_done 1733 1734.align 5 1735Lctr32_enc8x_seven: 1736 vcipherlast $out0,$out0,$in1 1737 vcipherlast $out1,$out1,$in2 1738 vcipherlast $out2,$out2,$in3 1739 vcipherlast $out3,$out3,$in4 1740 vcipherlast $out4,$out4,$in5 1741 vcipherlast $out5,$out5,$in6 1742 vcipherlast $out6,$out6,$in7 1743 1744 le?vperm $out0,$out0,$out0,$inpperm 1745 le?vperm $out1,$out1,$out1,$inpperm 1746 stvx_u $out0,$x00,$out 1747 le?vperm $out2,$out2,$out2,$inpperm 1748 stvx_u $out1,$x10,$out 1749 le?vperm $out3,$out3,$out3,$inpperm 1750 stvx_u $out2,$x20,$out 1751 le?vperm $out4,$out4,$out4,$inpperm 1752 stvx_u $out3,$x30,$out 1753 le?vperm $out5,$out5,$out5,$inpperm 1754 stvx_u $out4,$x40,$out 1755 le?vperm $out6,$out6,$out6,$inpperm 1756 stvx_u $out5,$x50,$out 1757 stvx_u $out6,$x60,$out 1758 addi $out,$out,0x70 1759 b Lctr32_enc8x_done 1760 1761.align 5 1762Lctr32_enc8x_six: 1763 vcipherlast $out0,$out0,$in2 1764 vcipherlast $out1,$out1,$in3 1765 vcipherlast $out2,$out2,$in4 1766 vcipherlast $out3,$out3,$in5 1767 vcipherlast $out4,$out4,$in6 1768 vcipherlast $out5,$out5,$in7 1769 1770 le?vperm $out0,$out0,$out0,$inpperm 1771 le?vperm $out1,$out1,$out1,$inpperm 1772 stvx_u $out0,$x00,$out 1773 le?vperm $out2,$out2,$out2,$inpperm 1774 stvx_u $out1,$x10,$out 1775 le?vperm $out3,$out3,$out3,$inpperm 1776 stvx_u $out2,$x20,$out 1777 le?vperm $out4,$out4,$out4,$inpperm 1778 stvx_u $out3,$x30,$out 1779 le?vperm $out5,$out5,$out5,$inpperm 1780 stvx_u $out4,$x40,$out 1781 stvx_u $out5,$x50,$out 1782 addi $out,$out,0x60 1783 b Lctr32_enc8x_done 1784 1785.align 5 1786Lctr32_enc8x_five: 1787 vcipherlast $out0,$out0,$in3 1788 vcipherlast $out1,$out1,$in4 1789 vcipherlast $out2,$out2,$in5 1790 vcipherlast $out3,$out3,$in6 1791 vcipherlast $out4,$out4,$in7 1792 1793 le?vperm $out0,$out0,$out0,$inpperm 1794 le?vperm $out1,$out1,$out1,$inpperm 1795 stvx_u $out0,$x00,$out 1796 le?vperm $out2,$out2,$out2,$inpperm 1797 stvx_u $out1,$x10,$out 1798 le?vperm $out3,$out3,$out3,$inpperm 1799 stvx_u $out2,$x20,$out 1800 le?vperm $out4,$out4,$out4,$inpperm 1801 stvx_u $out3,$x30,$out 1802 stvx_u $out4,$x40,$out 1803 addi $out,$out,0x50 1804 b Lctr32_enc8x_done 1805 1806.align 5 1807Lctr32_enc8x_four: 1808 vcipherlast $out0,$out0,$in4 1809 vcipherlast $out1,$out1,$in5 1810 vcipherlast $out2,$out2,$in6 1811 vcipherlast $out3,$out3,$in7 1812 1813 le?vperm $out0,$out0,$out0,$inpperm 1814 le?vperm $out1,$out1,$out1,$inpperm 1815 stvx_u $out0,$x00,$out 1816 le?vperm $out2,$out2,$out2,$inpperm 1817 stvx_u $out1,$x10,$out 1818 le?vperm $out3,$out3,$out3,$inpperm 1819 stvx_u $out2,$x20,$out 1820 stvx_u $out3,$x30,$out 1821 addi $out,$out,0x40 1822 b Lctr32_enc8x_done 1823 1824.align 5 1825Lctr32_enc8x_three: 1826 vcipherlast $out0,$out0,$in5 1827 vcipherlast $out1,$out1,$in6 1828 vcipherlast $out2,$out2,$in7 1829 1830 le?vperm $out0,$out0,$out0,$inpperm 1831 le?vperm $out1,$out1,$out1,$inpperm 1832 stvx_u $out0,$x00,$out 1833 le?vperm $out2,$out2,$out2,$inpperm 1834 stvx_u $out1,$x10,$out 1835 stvx_u $out2,$x20,$out 1836 addi $out,$out,0x30 1837 b Lctr32_enc8x_done 1838 1839.align 5 1840Lctr32_enc8x_two: 1841 vcipherlast $out0,$out0,$in6 1842 vcipherlast $out1,$out1,$in7 1843 1844 le?vperm $out0,$out0,$out0,$inpperm 1845 le?vperm $out1,$out1,$out1,$inpperm 1846 stvx_u $out0,$x00,$out 1847 stvx_u $out1,$x10,$out 1848 addi $out,$out,0x20 1849 b Lctr32_enc8x_done 1850 1851.align 5 1852Lctr32_enc8x_one: 1853 vcipherlast $out0,$out0,$in7 1854 1855 le?vperm $out0,$out0,$out0,$inpperm 1856 stvx_u $out0,0,$out 1857 addi $out,$out,0x10 1858 1859Lctr32_enc8x_done: 1860 li r10,`$FRAME+15` 1861 li r11,`$FRAME+31` 1862 stvx $inpperm,r10,$sp # wipe copies of round keys 1863 addi r10,r10,32 1864 stvx $inpperm,r11,$sp 1865 addi r11,r11,32 1866 stvx $inpperm,r10,$sp 1867 addi r10,r10,32 1868 stvx $inpperm,r11,$sp 1869 addi r11,r11,32 1870 stvx $inpperm,r10,$sp 1871 addi r10,r10,32 1872 stvx $inpperm,r11,$sp 1873 addi r11,r11,32 1874 stvx $inpperm,r10,$sp 1875 addi r10,r10,32 1876 stvx $inpperm,r11,$sp 1877 addi r11,r11,32 1878 1879 mtspr 256,$vrsave 1880 lvx v20,r10,$sp # ABI says so 1881 addi r10,r10,32 1882 lvx v21,r11,$sp 1883 addi r11,r11,32 1884 lvx v22,r10,$sp 1885 addi r10,r10,32 1886 lvx v23,r11,$sp 1887 addi r11,r11,32 1888 lvx v24,r10,$sp 1889 addi r10,r10,32 1890 lvx v25,r11,$sp 1891 addi r11,r11,32 1892 lvx v26,r10,$sp 1893 addi r10,r10,32 1894 lvx v27,r11,$sp 1895 addi r11,r11,32 1896 lvx v28,r10,$sp 1897 addi r10,r10,32 1898 lvx v29,r11,$sp 1899 addi r11,r11,32 1900 lvx v30,r10,$sp 1901 lvx v31,r11,$sp 1902 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 1903 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 1904 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 1905 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 1906 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 1907 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 1908 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 1909 blr 1910 .long 0 1911 .byte 0,12,0x04,0,0x80,6,6,0 1912 .long 0 1913.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 1914___ 1915}} }}} 1916 1917######################################################################### 1918{{{ # XTS procedures # 1919# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # 1920# const AES_KEY *key1, const AES_KEY *key2, # 1921# [const] unsigned char iv[16]); # 1922# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # 1923# input tweak value is assumed to be encrypted already, and last tweak # 1924# value, one suitable for consecutive call on same chunk of data, is # 1925# written back to original buffer. In addition, in "tweak chaining" # 1926# mode only complete input blocks are processed. # 1927 1928my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); 1929my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); 1930my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); 1931my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); 1932my $taillen = $key2; 1933 1934 ($inp,$idx) = ($idx,$inp); # reassign 1935 1936$code.=<<___; 1937.globl .${prefix}_xts_encrypt 1938.align 5 1939.${prefix}_xts_encrypt: 1940 mr $inp,r3 # reassign 1941 li r3,-1 1942 ${UCMP}i $len,16 1943 bltlr- 1944 1945 lis r0,0xfff0 1946 mfspr r12,256 # save vrsave 1947 li r11,0 1948 mtspr 256,r0 1949 1950 vspltisb $seven,0x07 # 0x070707..07 1951 le?lvsl $leperm,r11,r11 1952 le?vspltisb $tmp,0x0f 1953 le?vxor $leperm,$leperm,$seven 1954 1955 li $idx,15 1956 lvx $tweak,0,$ivp # load [unaligned] iv 1957 lvsl $inpperm,0,$ivp 1958 lvx $inptail,$idx,$ivp 1959 le?vxor $inpperm,$inpperm,$tmp 1960 vperm $tweak,$tweak,$inptail,$inpperm 1961 1962 neg r11,$inp 1963 lvsr $inpperm,0,r11 # prepare for unaligned load 1964 lvx $inout,0,$inp 1965 addi $inp,$inp,15 # 15 is not typo 1966 le?vxor $inpperm,$inpperm,$tmp 1967 1968 ${UCMP}i $key2,0 # key2==NULL? 1969 beq Lxts_enc_no_key2 1970 1971 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 1972 lwz $rounds,240($key2) 1973 srwi $rounds,$rounds,1 1974 subi $rounds,$rounds,1 1975 li $idx,16 1976 1977 lvx $rndkey0,0,$key2 1978 lvx $rndkey1,$idx,$key2 1979 addi $idx,$idx,16 1980 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1981 vxor $tweak,$tweak,$rndkey0 1982 lvx $rndkey0,$idx,$key2 1983 addi $idx,$idx,16 1984 mtctr $rounds 1985 1986Ltweak_xts_enc: 1987 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1988 vcipher $tweak,$tweak,$rndkey1 1989 lvx $rndkey1,$idx,$key2 1990 addi $idx,$idx,16 1991 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1992 vcipher $tweak,$tweak,$rndkey0 1993 lvx $rndkey0,$idx,$key2 1994 addi $idx,$idx,16 1995 bdnz Ltweak_xts_enc 1996 1997 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1998 vcipher $tweak,$tweak,$rndkey1 1999 lvx $rndkey1,$idx,$key2 2000 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2001 vcipherlast $tweak,$tweak,$rndkey0 2002 2003 li $ivp,0 # don't chain the tweak 2004 b Lxts_enc 2005 2006Lxts_enc_no_key2: 2007 li $idx,-16 2008 and $len,$len,$idx # in "tweak chaining" 2009 # mode only complete 2010 # blocks are processed 2011Lxts_enc: 2012 lvx $inptail,0,$inp 2013 addi $inp,$inp,16 2014 2015 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2016 lwz $rounds,240($key1) 2017 srwi $rounds,$rounds,1 2018 subi $rounds,$rounds,1 2019 li $idx,16 2020 2021 vslb $eighty7,$seven,$seven # 0x808080..80 2022 vor $eighty7,$eighty7,$seven # 0x878787..87 2023 vspltisb $tmp,1 # 0x010101..01 2024 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2025 2026 ${UCMP}i $len,96 2027 bge _aesp8_xts_encrypt6x 2028 2029 andi. $taillen,$len,15 2030 subic r0,$len,32 2031 subi $taillen,$taillen,16 2032 subfe r0,r0,r0 2033 and r0,r0,$taillen 2034 add $inp,$inp,r0 2035 2036 lvx $rndkey0,0,$key1 2037 lvx $rndkey1,$idx,$key1 2038 addi $idx,$idx,16 2039 vperm $inout,$inout,$inptail,$inpperm 2040 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2041 vxor $inout,$inout,$tweak 2042 vxor $inout,$inout,$rndkey0 2043 lvx $rndkey0,$idx,$key1 2044 addi $idx,$idx,16 2045 mtctr $rounds 2046 b Loop_xts_enc 2047 2048.align 5 2049Loop_xts_enc: 2050 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2051 vcipher $inout,$inout,$rndkey1 2052 lvx $rndkey1,$idx,$key1 2053 addi $idx,$idx,16 2054 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2055 vcipher $inout,$inout,$rndkey0 2056 lvx $rndkey0,$idx,$key1 2057 addi $idx,$idx,16 2058 bdnz Loop_xts_enc 2059 2060 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2061 vcipher $inout,$inout,$rndkey1 2062 lvx $rndkey1,$idx,$key1 2063 li $idx,16 2064 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2065 vxor $rndkey0,$rndkey0,$tweak 2066 vcipherlast $output,$inout,$rndkey0 2067 2068 le?vperm $tmp,$output,$output,$leperm 2069 be?nop 2070 le?stvx_u $tmp,0,$out 2071 be?stvx_u $output,0,$out 2072 addi $out,$out,16 2073 2074 subic. $len,$len,16 2075 beq Lxts_enc_done 2076 2077 vmr $inout,$inptail 2078 lvx $inptail,0,$inp 2079 addi $inp,$inp,16 2080 lvx $rndkey0,0,$key1 2081 lvx $rndkey1,$idx,$key1 2082 addi $idx,$idx,16 2083 2084 subic r0,$len,32 2085 subfe r0,r0,r0 2086 and r0,r0,$taillen 2087 add $inp,$inp,r0 2088 2089 vsrab $tmp,$tweak,$seven # next tweak value 2090 vaddubm $tweak,$tweak,$tweak 2091 vsldoi $tmp,$tmp,$tmp,15 2092 vand $tmp,$tmp,$eighty7 2093 vxor $tweak,$tweak,$tmp 2094 2095 vperm $inout,$inout,$inptail,$inpperm 2096 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2097 vxor $inout,$inout,$tweak 2098 vxor $output,$output,$rndkey0 # just in case $len<16 2099 vxor $inout,$inout,$rndkey0 2100 lvx $rndkey0,$idx,$key1 2101 addi $idx,$idx,16 2102 2103 mtctr $rounds 2104 ${UCMP}i $len,16 2105 bge Loop_xts_enc 2106 2107 vxor $output,$output,$tweak 2108 lvsr $inpperm,0,$len # $inpperm is no longer needed 2109 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2110 vspltisb $tmp,-1 2111 vperm $inptail,$inptail,$tmp,$inpperm 2112 vsel $inout,$inout,$output,$inptail 2113 2114 subi r11,$out,17 2115 subi $out,$out,16 2116 mtctr $len 2117 li $len,16 2118Loop_xts_enc_steal: 2119 lbzu r0,1(r11) 2120 stb r0,16(r11) 2121 bdnz Loop_xts_enc_steal 2122 2123 mtctr $rounds 2124 b Loop_xts_enc # one more time... 2125 2126Lxts_enc_done: 2127 ${UCMP}i $ivp,0 2128 beq Lxts_enc_ret 2129 2130 vsrab $tmp,$tweak,$seven # next tweak value 2131 vaddubm $tweak,$tweak,$tweak 2132 vsldoi $tmp,$tmp,$tmp,15 2133 vand $tmp,$tmp,$eighty7 2134 vxor $tweak,$tweak,$tmp 2135 2136 le?vperm $tweak,$tweak,$tweak,$leperm 2137 stvx_u $tweak,0,$ivp 2138 2139Lxts_enc_ret: 2140 mtspr 256,r12 # restore vrsave 2141 li r3,0 2142 blr 2143 .long 0 2144 .byte 0,12,0x04,0,0x80,6,6,0 2145 .long 0 2146.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt 2147 2148.globl .${prefix}_xts_decrypt 2149.align 5 2150.${prefix}_xts_decrypt: 2151 mr $inp,r3 # reassign 2152 li r3,-1 2153 ${UCMP}i $len,16 2154 bltlr- 2155 2156 lis r0,0xfff8 2157 mfspr r12,256 # save vrsave 2158 li r11,0 2159 mtspr 256,r0 2160 2161 andi. r0,$len,15 2162 neg r0,r0 2163 andi. r0,r0,16 2164 sub $len,$len,r0 2165 2166 vspltisb $seven,0x07 # 0x070707..07 2167 le?lvsl $leperm,r11,r11 2168 le?vspltisb $tmp,0x0f 2169 le?vxor $leperm,$leperm,$seven 2170 2171 li $idx,15 2172 lvx $tweak,0,$ivp # load [unaligned] iv 2173 lvsl $inpperm,0,$ivp 2174 lvx $inptail,$idx,$ivp 2175 le?vxor $inpperm,$inpperm,$tmp 2176 vperm $tweak,$tweak,$inptail,$inpperm 2177 2178 neg r11,$inp 2179 lvsr $inpperm,0,r11 # prepare for unaligned load 2180 lvx $inout,0,$inp 2181 addi $inp,$inp,15 # 15 is not typo 2182 le?vxor $inpperm,$inpperm,$tmp 2183 2184 ${UCMP}i $key2,0 # key2==NULL? 2185 beq Lxts_dec_no_key2 2186 2187 ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2188 lwz $rounds,240($key2) 2189 srwi $rounds,$rounds,1 2190 subi $rounds,$rounds,1 2191 li $idx,16 2192 2193 lvx $rndkey0,0,$key2 2194 lvx $rndkey1,$idx,$key2 2195 addi $idx,$idx,16 2196 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2197 vxor $tweak,$tweak,$rndkey0 2198 lvx $rndkey0,$idx,$key2 2199 addi $idx,$idx,16 2200 mtctr $rounds 2201 2202Ltweak_xts_dec: 2203 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2204 vcipher $tweak,$tweak,$rndkey1 2205 lvx $rndkey1,$idx,$key2 2206 addi $idx,$idx,16 2207 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2208 vcipher $tweak,$tweak,$rndkey0 2209 lvx $rndkey0,$idx,$key2 2210 addi $idx,$idx,16 2211 bdnz Ltweak_xts_dec 2212 2213 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2214 vcipher $tweak,$tweak,$rndkey1 2215 lvx $rndkey1,$idx,$key2 2216 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2217 vcipherlast $tweak,$tweak,$rndkey0 2218 2219 li $ivp,0 # don't chain the tweak 2220 b Lxts_dec 2221 2222Lxts_dec_no_key2: 2223 neg $idx,$len 2224 andi. $idx,$idx,15 2225 add $len,$len,$idx # in "tweak chaining" 2226 # mode only complete 2227 # blocks are processed 2228Lxts_dec: 2229 lvx $inptail,0,$inp 2230 addi $inp,$inp,16 2231 2232 ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2233 lwz $rounds,240($key1) 2234 srwi $rounds,$rounds,1 2235 subi $rounds,$rounds,1 2236 li $idx,16 2237 2238 vslb $eighty7,$seven,$seven # 0x808080..80 2239 vor $eighty7,$eighty7,$seven # 0x878787..87 2240 vspltisb $tmp,1 # 0x010101..01 2241 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2242 2243 ${UCMP}i $len,96 2244 bge _aesp8_xts_decrypt6x 2245 2246 lvx $rndkey0,0,$key1 2247 lvx $rndkey1,$idx,$key1 2248 addi $idx,$idx,16 2249 vperm $inout,$inout,$inptail,$inpperm 2250 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2251 vxor $inout,$inout,$tweak 2252 vxor $inout,$inout,$rndkey0 2253 lvx $rndkey0,$idx,$key1 2254 addi $idx,$idx,16 2255 mtctr $rounds 2256 2257 ${UCMP}i $len,16 2258 blt Ltail_xts_dec 2259 be?b Loop_xts_dec 2260 2261.align 5 2262Loop_xts_dec: 2263 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2264 vncipher $inout,$inout,$rndkey1 2265 lvx $rndkey1,$idx,$key1 2266 addi $idx,$idx,16 2267 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2268 vncipher $inout,$inout,$rndkey0 2269 lvx $rndkey0,$idx,$key1 2270 addi $idx,$idx,16 2271 bdnz Loop_xts_dec 2272 2273 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2274 vncipher $inout,$inout,$rndkey1 2275 lvx $rndkey1,$idx,$key1 2276 li $idx,16 2277 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2278 vxor $rndkey0,$rndkey0,$tweak 2279 vncipherlast $output,$inout,$rndkey0 2280 2281 le?vperm $tmp,$output,$output,$leperm 2282 be?nop 2283 le?stvx_u $tmp,0,$out 2284 be?stvx_u $output,0,$out 2285 addi $out,$out,16 2286 2287 subic. $len,$len,16 2288 beq Lxts_dec_done 2289 2290 vmr $inout,$inptail 2291 lvx $inptail,0,$inp 2292 addi $inp,$inp,16 2293 lvx $rndkey0,0,$key1 2294 lvx $rndkey1,$idx,$key1 2295 addi $idx,$idx,16 2296 2297 vsrab $tmp,$tweak,$seven # next tweak value 2298 vaddubm $tweak,$tweak,$tweak 2299 vsldoi $tmp,$tmp,$tmp,15 2300 vand $tmp,$tmp,$eighty7 2301 vxor $tweak,$tweak,$tmp 2302 2303 vperm $inout,$inout,$inptail,$inpperm 2304 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2305 vxor $inout,$inout,$tweak 2306 vxor $inout,$inout,$rndkey0 2307 lvx $rndkey0,$idx,$key1 2308 addi $idx,$idx,16 2309 2310 mtctr $rounds 2311 ${UCMP}i $len,16 2312 bge Loop_xts_dec 2313 2314Ltail_xts_dec: 2315 vsrab $tmp,$tweak,$seven # next tweak value 2316 vaddubm $tweak1,$tweak,$tweak 2317 vsldoi $tmp,$tmp,$tmp,15 2318 vand $tmp,$tmp,$eighty7 2319 vxor $tweak1,$tweak1,$tmp 2320 2321 subi $inp,$inp,16 2322 add $inp,$inp,$len 2323 2324 vxor $inout,$inout,$tweak # :-( 2325 vxor $inout,$inout,$tweak1 # :-) 2326 2327Loop_xts_dec_short: 2328 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2329 vncipher $inout,$inout,$rndkey1 2330 lvx $rndkey1,$idx,$key1 2331 addi $idx,$idx,16 2332 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2333 vncipher $inout,$inout,$rndkey0 2334 lvx $rndkey0,$idx,$key1 2335 addi $idx,$idx,16 2336 bdnz Loop_xts_dec_short 2337 2338 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2339 vncipher $inout,$inout,$rndkey1 2340 lvx $rndkey1,$idx,$key1 2341 li $idx,16 2342 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2343 vxor $rndkey0,$rndkey0,$tweak1 2344 vncipherlast $output,$inout,$rndkey0 2345 2346 le?vperm $tmp,$output,$output,$leperm 2347 be?nop 2348 le?stvx_u $tmp,0,$out 2349 be?stvx_u $output,0,$out 2350 2351 vmr $inout,$inptail 2352 lvx $inptail,0,$inp 2353 #addi $inp,$inp,16 2354 lvx $rndkey0,0,$key1 2355 lvx $rndkey1,$idx,$key1 2356 addi $idx,$idx,16 2357 vperm $inout,$inout,$inptail,$inpperm 2358 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2359 2360 lvsr $inpperm,0,$len # $inpperm is no longer needed 2361 vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2362 vspltisb $tmp,-1 2363 vperm $inptail,$inptail,$tmp,$inpperm 2364 vsel $inout,$inout,$output,$inptail 2365 2366 vxor $rndkey0,$rndkey0,$tweak 2367 vxor $inout,$inout,$rndkey0 2368 lvx $rndkey0,$idx,$key1 2369 addi $idx,$idx,16 2370 2371 subi r11,$out,1 2372 mtctr $len 2373 li $len,16 2374Loop_xts_dec_steal: 2375 lbzu r0,1(r11) 2376 stb r0,16(r11) 2377 bdnz Loop_xts_dec_steal 2378 2379 mtctr $rounds 2380 b Loop_xts_dec # one more time... 2381 2382Lxts_dec_done: 2383 ${UCMP}i $ivp,0 2384 beq Lxts_dec_ret 2385 2386 vsrab $tmp,$tweak,$seven # next tweak value 2387 vaddubm $tweak,$tweak,$tweak 2388 vsldoi $tmp,$tmp,$tmp,15 2389 vand $tmp,$tmp,$eighty7 2390 vxor $tweak,$tweak,$tmp 2391 2392 le?vperm $tweak,$tweak,$tweak,$leperm 2393 stvx_u $tweak,0,$ivp 2394 2395Lxts_dec_ret: 2396 mtspr 256,r12 # restore vrsave 2397 li r3,0 2398 blr 2399 .long 0 2400 .byte 0,12,0x04,0,0x80,6,6,0 2401 .long 0 2402.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt 2403___ 2404######################################################################### 2405{{ # Optimized XTS procedures # 2406my $key_=$key2; 2407my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); 2408 $x00=0 if ($flavour =~ /osx/); 2409my ($in0, $in1, $in2, $in3, $in4, $in5)=map("v$_",(0..5)); 2410my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); 2411my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); 2412my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 2413 # v26-v31 last 6 round keys 2414my ($keyperm)=($out0); # aliases with "caller", redundant assignment 2415my $taillen=$x70; 2416 2417$code.=<<___; 2418.align 5 2419_aesp8_xts_encrypt6x: 2420 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 2421 mflr r11 2422 li r7,`$FRAME+8*16+15` 2423 li r3,`$FRAME+8*16+31` 2424 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 2425 stvx v20,r7,$sp # ABI says so 2426 addi r7,r7,32 2427 stvx v21,r3,$sp 2428 addi r3,r3,32 2429 stvx v22,r7,$sp 2430 addi r7,r7,32 2431 stvx v23,r3,$sp 2432 addi r3,r3,32 2433 stvx v24,r7,$sp 2434 addi r7,r7,32 2435 stvx v25,r3,$sp 2436 addi r3,r3,32 2437 stvx v26,r7,$sp 2438 addi r7,r7,32 2439 stvx v27,r3,$sp 2440 addi r3,r3,32 2441 stvx v28,r7,$sp 2442 addi r7,r7,32 2443 stvx v29,r3,$sp 2444 addi r3,r3,32 2445 stvx v30,r7,$sp 2446 stvx v31,r3,$sp 2447 li r0,-1 2448 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 2449 li $x10,0x10 2450 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2451 li $x20,0x20 2452 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2453 li $x30,0x30 2454 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2455 li $x40,0x40 2456 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2457 li $x50,0x50 2458 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2459 li $x60,0x60 2460 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2461 li $x70,0x70 2462 mtspr 256,r0 2463 2464 # Reverse eighty7 to 0x010101..87 2465 xxlor 2, 32+$eighty7, 32+$eighty7 2466 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 2467 xxlor 1, 32+$eighty7, 32+$eighty7 2468 2469 # Load XOR contents. 0xf102132435465768798a9bacbdcedfe 2470 mr $x70, r6 2471 bl Lconsts 2472 lxvw4x 0, $x40, r6 # load XOR contents 2473 mr r6, $x70 2474 li $x70,0x70 2475 2476 subi $rounds,$rounds,3 # -4 in total 2477 2478 lvx $rndkey0,$x00,$key1 # load key schedule 2479 lvx v30,$x10,$key1 2480 addi $key1,$key1,0x20 2481 lvx v31,$x00,$key1 2482 ?vperm $rndkey0,$rndkey0,v30,$keyperm 2483 addi $key_,$sp,$FRAME+15 2484 mtctr $rounds 2485 2486Load_xts_enc_key: 2487 ?vperm v24,v30,v31,$keyperm 2488 lvx v30,$x10,$key1 2489 addi $key1,$key1,0x20 2490 stvx v24,$x00,$key_ # off-load round[1] 2491 ?vperm v25,v31,v30,$keyperm 2492 lvx v31,$x00,$key1 2493 stvx v25,$x10,$key_ # off-load round[2] 2494 addi $key_,$key_,0x20 2495 bdnz Load_xts_enc_key 2496 2497 lvx v26,$x10,$key1 2498 ?vperm v24,v30,v31,$keyperm 2499 lvx v27,$x20,$key1 2500 stvx v24,$x00,$key_ # off-load round[3] 2501 ?vperm v25,v31,v26,$keyperm 2502 lvx v28,$x30,$key1 2503 stvx v25,$x10,$key_ # off-load round[4] 2504 addi $key_,$sp,$FRAME+15 # rewind $key_ 2505 ?vperm v26,v26,v27,$keyperm 2506 lvx v29,$x40,$key1 2507 ?vperm v27,v27,v28,$keyperm 2508 lvx v30,$x50,$key1 2509 ?vperm v28,v28,v29,$keyperm 2510 lvx v31,$x60,$key1 2511 ?vperm v29,v29,v30,$keyperm 2512 lvx $twk5,$x70,$key1 # borrow $twk5 2513 ?vperm v30,v30,v31,$keyperm 2514 lvx v24,$x00,$key_ # pre-load round[1] 2515 ?vperm v31,v31,$twk5,$keyperm 2516 lvx v25,$x10,$key_ # pre-load round[2] 2517 2518 # Switch to use the following codes with 0x010101..87 to generate tweak. 2519 # eighty7 = 0x010101..87 2520 # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits 2521 # vand tmp, tmp, eighty7 # last byte with carry 2522 # vaddubm tweak, tweak, tweak # left shift 1 bit (x2) 2523 # xxlor vsx, 0, 0 2524 # vpermxor tweak, tweak, tmp, vsx 2525 2526 vperm $in0,$inout,$inptail,$inpperm 2527 subi $inp,$inp,31 # undo "caller" 2528 vxor $twk0,$tweak,$rndkey0 2529 vsrab $tmp,$tweak,$seven # next tweak value 2530 vaddubm $tweak,$tweak,$tweak 2531 vand $tmp,$tmp,$eighty7 2532 vxor $out0,$in0,$twk0 2533 xxlor 32+$in1, 0, 0 2534 vpermxor $tweak, $tweak, $tmp, $in1 2535 2536 lvx_u $in1,$x10,$inp 2537 vxor $twk1,$tweak,$rndkey0 2538 vsrab $tmp,$tweak,$seven # next tweak value 2539 vaddubm $tweak,$tweak,$tweak 2540 le?vperm $in1,$in1,$in1,$leperm 2541 vand $tmp,$tmp,$eighty7 2542 vxor $out1,$in1,$twk1 2543 xxlor 32+$in2, 0, 0 2544 vpermxor $tweak, $tweak, $tmp, $in2 2545 2546 lvx_u $in2,$x20,$inp 2547 andi. $taillen,$len,15 2548 vxor $twk2,$tweak,$rndkey0 2549 vsrab $tmp,$tweak,$seven # next tweak value 2550 vaddubm $tweak,$tweak,$tweak 2551 le?vperm $in2,$in2,$in2,$leperm 2552 vand $tmp,$tmp,$eighty7 2553 vxor $out2,$in2,$twk2 2554 xxlor 32+$in3, 0, 0 2555 vpermxor $tweak, $tweak, $tmp, $in3 2556 2557 lvx_u $in3,$x30,$inp 2558 sub $len,$len,$taillen 2559 vxor $twk3,$tweak,$rndkey0 2560 vsrab $tmp,$tweak,$seven # next tweak value 2561 vaddubm $tweak,$tweak,$tweak 2562 le?vperm $in3,$in3,$in3,$leperm 2563 vand $tmp,$tmp,$eighty7 2564 vxor $out3,$in3,$twk3 2565 xxlor 32+$in4, 0, 0 2566 vpermxor $tweak, $tweak, $tmp, $in4 2567 2568 lvx_u $in4,$x40,$inp 2569 subi $len,$len,0x60 2570 vxor $twk4,$tweak,$rndkey0 2571 vsrab $tmp,$tweak,$seven # next tweak value 2572 vaddubm $tweak,$tweak,$tweak 2573 le?vperm $in4,$in4,$in4,$leperm 2574 vand $tmp,$tmp,$eighty7 2575 vxor $out4,$in4,$twk4 2576 xxlor 32+$in5, 0, 0 2577 vpermxor $tweak, $tweak, $tmp, $in5 2578 2579 lvx_u $in5,$x50,$inp 2580 addi $inp,$inp,0x60 2581 vxor $twk5,$tweak,$rndkey0 2582 vsrab $tmp,$tweak,$seven # next tweak value 2583 vaddubm $tweak,$tweak,$tweak 2584 le?vperm $in5,$in5,$in5,$leperm 2585 vand $tmp,$tmp,$eighty7 2586 vxor $out5,$in5,$twk5 2587 xxlor 32+$in0, 0, 0 2588 vpermxor $tweak, $tweak, $tmp, $in0 2589 2590 vxor v31,v31,$rndkey0 2591 mtctr $rounds 2592 b Loop_xts_enc6x 2593 2594.align 5 2595Loop_xts_enc6x: 2596 vcipher $out0,$out0,v24 2597 vcipher $out1,$out1,v24 2598 vcipher $out2,$out2,v24 2599 vcipher $out3,$out3,v24 2600 vcipher $out4,$out4,v24 2601 vcipher $out5,$out5,v24 2602 lvx v24,$x20,$key_ # round[3] 2603 addi $key_,$key_,0x20 2604 2605 vcipher $out0,$out0,v25 2606 vcipher $out1,$out1,v25 2607 vcipher $out2,$out2,v25 2608 vcipher $out3,$out3,v25 2609 vcipher $out4,$out4,v25 2610 vcipher $out5,$out5,v25 2611 lvx v25,$x10,$key_ # round[4] 2612 bdnz Loop_xts_enc6x 2613 2614 xxlor 32+$eighty7, 1, 1 # 0x010101..87 2615 2616 subic $len,$len,96 # $len-=96 2617 vxor $in0,$twk0,v31 # xor with last round key 2618 vcipher $out0,$out0,v24 2619 vcipher $out1,$out1,v24 2620 vsrab $tmp,$tweak,$seven # next tweak value 2621 vxor $twk0,$tweak,$rndkey0 2622 vaddubm $tweak,$tweak,$tweak 2623 vcipher $out2,$out2,v24 2624 vcipher $out3,$out3,v24 2625 vcipher $out4,$out4,v24 2626 vcipher $out5,$out5,v24 2627 2628 subfe. r0,r0,r0 # borrow?-1:0 2629 vand $tmp,$tmp,$eighty7 2630 vcipher $out0,$out0,v25 2631 vcipher $out1,$out1,v25 2632 xxlor 32+$in1, 0, 0 2633 vpermxor $tweak, $tweak, $tmp, $in1 2634 vcipher $out2,$out2,v25 2635 vcipher $out3,$out3,v25 2636 vxor $in1,$twk1,v31 2637 vsrab $tmp,$tweak,$seven # next tweak value 2638 vxor $twk1,$tweak,$rndkey0 2639 vcipher $out4,$out4,v25 2640 vcipher $out5,$out5,v25 2641 2642 and r0,r0,$len 2643 vaddubm $tweak,$tweak,$tweak 2644 vcipher $out0,$out0,v26 2645 vcipher $out1,$out1,v26 2646 vand $tmp,$tmp,$eighty7 2647 vcipher $out2,$out2,v26 2648 vcipher $out3,$out3,v26 2649 xxlor 32+$in2, 0, 0 2650 vpermxor $tweak, $tweak, $tmp, $in2 2651 vcipher $out4,$out4,v26 2652 vcipher $out5,$out5,v26 2653 2654 add $inp,$inp,r0 # $inp is adjusted in such 2655 # way that at exit from the 2656 # loop inX-in5 are loaded 2657 # with last "words" 2658 vxor $in2,$twk2,v31 2659 vsrab $tmp,$tweak,$seven # next tweak value 2660 vxor $twk2,$tweak,$rndkey0 2661 vaddubm $tweak,$tweak,$tweak 2662 vcipher $out0,$out0,v27 2663 vcipher $out1,$out1,v27 2664 vcipher $out2,$out2,v27 2665 vcipher $out3,$out3,v27 2666 vand $tmp,$tmp,$eighty7 2667 vcipher $out4,$out4,v27 2668 vcipher $out5,$out5,v27 2669 2670 addi $key_,$sp,$FRAME+15 # rewind $key_ 2671 xxlor 32+$in3, 0, 0 2672 vpermxor $tweak, $tweak, $tmp, $in3 2673 vcipher $out0,$out0,v28 2674 vcipher $out1,$out1,v28 2675 vxor $in3,$twk3,v31 2676 vsrab $tmp,$tweak,$seven # next tweak value 2677 vxor $twk3,$tweak,$rndkey0 2678 vcipher $out2,$out2,v28 2679 vcipher $out3,$out3,v28 2680 vaddubm $tweak,$tweak,$tweak 2681 vcipher $out4,$out4,v28 2682 vcipher $out5,$out5,v28 2683 lvx v24,$x00,$key_ # re-pre-load round[1] 2684 vand $tmp,$tmp,$eighty7 2685 2686 vcipher $out0,$out0,v29 2687 vcipher $out1,$out1,v29 2688 xxlor 32+$in4, 0, 0 2689 vpermxor $tweak, $tweak, $tmp, $in4 2690 vcipher $out2,$out2,v29 2691 vcipher $out3,$out3,v29 2692 vxor $in4,$twk4,v31 2693 vsrab $tmp,$tweak,$seven # next tweak value 2694 vxor $twk4,$tweak,$rndkey0 2695 vcipher $out4,$out4,v29 2696 vcipher $out5,$out5,v29 2697 lvx v25,$x10,$key_ # re-pre-load round[2] 2698 vaddubm $tweak,$tweak,$tweak 2699 2700 vcipher $out0,$out0,v30 2701 vcipher $out1,$out1,v30 2702 vand $tmp,$tmp,$eighty7 2703 vcipher $out2,$out2,v30 2704 vcipher $out3,$out3,v30 2705 xxlor 32+$in5, 0, 0 2706 vpermxor $tweak, $tweak, $tmp, $in5 2707 vcipher $out4,$out4,v30 2708 vcipher $out5,$out5,v30 2709 vxor $in5,$twk5,v31 2710 vsrab $tmp,$tweak,$seven # next tweak value 2711 vxor $twk5,$tweak,$rndkey0 2712 2713 vcipherlast $out0,$out0,$in0 2714 lvx_u $in0,$x00,$inp # load next input block 2715 vaddubm $tweak,$tweak,$tweak 2716 vcipherlast $out1,$out1,$in1 2717 lvx_u $in1,$x10,$inp 2718 vcipherlast $out2,$out2,$in2 2719 le?vperm $in0,$in0,$in0,$leperm 2720 lvx_u $in2,$x20,$inp 2721 vand $tmp,$tmp,$eighty7 2722 vcipherlast $out3,$out3,$in3 2723 le?vperm $in1,$in1,$in1,$leperm 2724 lvx_u $in3,$x30,$inp 2725 vcipherlast $out4,$out4,$in4 2726 le?vperm $in2,$in2,$in2,$leperm 2727 lvx_u $in4,$x40,$inp 2728 xxlor 10, 32+$in0, 32+$in0 2729 xxlor 32+$in0, 0, 0 2730 vpermxor $tweak, $tweak, $tmp, $in0 2731 xxlor 32+$in0, 10, 10 2732 vcipherlast $tmp,$out5,$in5 # last block might be needed 2733 # in stealing mode 2734 le?vperm $in3,$in3,$in3,$leperm 2735 lvx_u $in5,$x50,$inp 2736 addi $inp,$inp,0x60 2737 le?vperm $in4,$in4,$in4,$leperm 2738 le?vperm $in5,$in5,$in5,$leperm 2739 2740 le?vperm $out0,$out0,$out0,$leperm 2741 le?vperm $out1,$out1,$out1,$leperm 2742 stvx_u $out0,$x00,$out # store output 2743 vxor $out0,$in0,$twk0 2744 le?vperm $out2,$out2,$out2,$leperm 2745 stvx_u $out1,$x10,$out 2746 vxor $out1,$in1,$twk1 2747 le?vperm $out3,$out3,$out3,$leperm 2748 stvx_u $out2,$x20,$out 2749 vxor $out2,$in2,$twk2 2750 le?vperm $out4,$out4,$out4,$leperm 2751 stvx_u $out3,$x30,$out 2752 vxor $out3,$in3,$twk3 2753 le?vperm $out5,$tmp,$tmp,$leperm 2754 stvx_u $out4,$x40,$out 2755 vxor $out4,$in4,$twk4 2756 le?stvx_u $out5,$x50,$out 2757 be?stvx_u $tmp, $x50,$out 2758 vxor $out5,$in5,$twk5 2759 addi $out,$out,0x60 2760 2761 mtctr $rounds 2762 beq Loop_xts_enc6x # did $len-=96 borrow? 2763 2764 xxlor 32+$eighty7, 2, 2 # 0x870101..01 2765 2766 addic. $len,$len,0x60 2767 beq Lxts_enc6x_zero 2768 cmpwi $len,0x20 2769 blt Lxts_enc6x_one 2770 nop 2771 beq Lxts_enc6x_two 2772 cmpwi $len,0x40 2773 blt Lxts_enc6x_three 2774 nop 2775 beq Lxts_enc6x_four 2776 2777Lxts_enc6x_five: 2778 vxor $out0,$in1,$twk0 2779 vxor $out1,$in2,$twk1 2780 vxor $out2,$in3,$twk2 2781 vxor $out3,$in4,$twk3 2782 vxor $out4,$in5,$twk4 2783 2784 bl _aesp8_xts_enc5x 2785 2786 le?vperm $out0,$out0,$out0,$leperm 2787 vmr $twk0,$twk5 # unused tweak 2788 le?vperm $out1,$out1,$out1,$leperm 2789 stvx_u $out0,$x00,$out # store output 2790 le?vperm $out2,$out2,$out2,$leperm 2791 stvx_u $out1,$x10,$out 2792 le?vperm $out3,$out3,$out3,$leperm 2793 stvx_u $out2,$x20,$out 2794 vxor $tmp,$out4,$twk5 # last block prep for stealing 2795 le?vperm $out4,$out4,$out4,$leperm 2796 stvx_u $out3,$x30,$out 2797 stvx_u $out4,$x40,$out 2798 addi $out,$out,0x50 2799 bne Lxts_enc6x_steal 2800 b Lxts_enc6x_done 2801 2802.align 4 2803Lxts_enc6x_four: 2804 vxor $out0,$in2,$twk0 2805 vxor $out1,$in3,$twk1 2806 vxor $out2,$in4,$twk2 2807 vxor $out3,$in5,$twk3 2808 vxor $out4,$out4,$out4 2809 2810 bl _aesp8_xts_enc5x 2811 2812 le?vperm $out0,$out0,$out0,$leperm 2813 vmr $twk0,$twk4 # unused tweak 2814 le?vperm $out1,$out1,$out1,$leperm 2815 stvx_u $out0,$x00,$out # store output 2816 le?vperm $out2,$out2,$out2,$leperm 2817 stvx_u $out1,$x10,$out 2818 vxor $tmp,$out3,$twk4 # last block prep for stealing 2819 le?vperm $out3,$out3,$out3,$leperm 2820 stvx_u $out2,$x20,$out 2821 stvx_u $out3,$x30,$out 2822 addi $out,$out,0x40 2823 bne Lxts_enc6x_steal 2824 b Lxts_enc6x_done 2825 2826.align 4 2827Lxts_enc6x_three: 2828 vxor $out0,$in3,$twk0 2829 vxor $out1,$in4,$twk1 2830 vxor $out2,$in5,$twk2 2831 vxor $out3,$out3,$out3 2832 vxor $out4,$out4,$out4 2833 2834 bl _aesp8_xts_enc5x 2835 2836 le?vperm $out0,$out0,$out0,$leperm 2837 vmr $twk0,$twk3 # unused tweak 2838 le?vperm $out1,$out1,$out1,$leperm 2839 stvx_u $out0,$x00,$out # store output 2840 vxor $tmp,$out2,$twk3 # last block prep for stealing 2841 le?vperm $out2,$out2,$out2,$leperm 2842 stvx_u $out1,$x10,$out 2843 stvx_u $out2,$x20,$out 2844 addi $out,$out,0x30 2845 bne Lxts_enc6x_steal 2846 b Lxts_enc6x_done 2847 2848.align 4 2849Lxts_enc6x_two: 2850 vxor $out0,$in4,$twk0 2851 vxor $out1,$in5,$twk1 2852 vxor $out2,$out2,$out2 2853 vxor $out3,$out3,$out3 2854 vxor $out4,$out4,$out4 2855 2856 bl _aesp8_xts_enc5x 2857 2858 le?vperm $out0,$out0,$out0,$leperm 2859 vmr $twk0,$twk2 # unused tweak 2860 vxor $tmp,$out1,$twk2 # last block prep for stealing 2861 le?vperm $out1,$out1,$out1,$leperm 2862 stvx_u $out0,$x00,$out # store output 2863 stvx_u $out1,$x10,$out 2864 addi $out,$out,0x20 2865 bne Lxts_enc6x_steal 2866 b Lxts_enc6x_done 2867 2868.align 4 2869Lxts_enc6x_one: 2870 vxor $out0,$in5,$twk0 2871 nop 2872Loop_xts_enc1x: 2873 vcipher $out0,$out0,v24 2874 lvx v24,$x20,$key_ # round[3] 2875 addi $key_,$key_,0x20 2876 2877 vcipher $out0,$out0,v25 2878 lvx v25,$x10,$key_ # round[4] 2879 bdnz Loop_xts_enc1x 2880 2881 add $inp,$inp,$taillen 2882 cmpwi $taillen,0 2883 vcipher $out0,$out0,v24 2884 2885 subi $inp,$inp,16 2886 vcipher $out0,$out0,v25 2887 2888 lvsr $inpperm,0,$taillen 2889 vcipher $out0,$out0,v26 2890 2891 lvx_u $in0,0,$inp 2892 vcipher $out0,$out0,v27 2893 2894 addi $key_,$sp,$FRAME+15 # rewind $key_ 2895 vcipher $out0,$out0,v28 2896 lvx v24,$x00,$key_ # re-pre-load round[1] 2897 2898 vcipher $out0,$out0,v29 2899 lvx v25,$x10,$key_ # re-pre-load round[2] 2900 vxor $twk0,$twk0,v31 2901 2902 le?vperm $in0,$in0,$in0,$leperm 2903 vcipher $out0,$out0,v30 2904 2905 vperm $in0,$in0,$in0,$inpperm 2906 vcipherlast $out0,$out0,$twk0 2907 2908 vmr $twk0,$twk1 # unused tweak 2909 vxor $tmp,$out0,$twk1 # last block prep for stealing 2910 le?vperm $out0,$out0,$out0,$leperm 2911 stvx_u $out0,$x00,$out # store output 2912 addi $out,$out,0x10 2913 bne Lxts_enc6x_steal 2914 b Lxts_enc6x_done 2915 2916.align 4 2917Lxts_enc6x_zero: 2918 cmpwi $taillen,0 2919 beq Lxts_enc6x_done 2920 2921 add $inp,$inp,$taillen 2922 subi $inp,$inp,16 2923 lvx_u $in0,0,$inp 2924 lvsr $inpperm,0,$taillen # $in5 is no more 2925 le?vperm $in0,$in0,$in0,$leperm 2926 vperm $in0,$in0,$in0,$inpperm 2927 vxor $tmp,$tmp,$twk0 2928Lxts_enc6x_steal: 2929 vxor $in0,$in0,$twk0 2930 vxor $out0,$out0,$out0 2931 vspltisb $out1,-1 2932 vperm $out0,$out0,$out1,$inpperm 2933 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? 2934 2935 subi r30,$out,17 2936 subi $out,$out,16 2937 mtctr $taillen 2938Loop_xts_enc6x_steal: 2939 lbzu r0,1(r30) 2940 stb r0,16(r30) 2941 bdnz Loop_xts_enc6x_steal 2942 2943 li $taillen,0 2944 mtctr $rounds 2945 b Loop_xts_enc1x # one more time... 2946 2947.align 4 2948Lxts_enc6x_done: 2949 ${UCMP}i $ivp,0 2950 beq Lxts_enc6x_ret 2951 2952 vxor $tweak,$twk0,$rndkey0 2953 le?vperm $tweak,$tweak,$tweak,$leperm 2954 stvx_u $tweak,0,$ivp 2955 2956Lxts_enc6x_ret: 2957 mtlr r11 2958 li r10,`$FRAME+15` 2959 li r11,`$FRAME+31` 2960 stvx $seven,r10,$sp # wipe copies of round keys 2961 addi r10,r10,32 2962 stvx $seven,r11,$sp 2963 addi r11,r11,32 2964 stvx $seven,r10,$sp 2965 addi r10,r10,32 2966 stvx $seven,r11,$sp 2967 addi r11,r11,32 2968 stvx $seven,r10,$sp 2969 addi r10,r10,32 2970 stvx $seven,r11,$sp 2971 addi r11,r11,32 2972 stvx $seven,r10,$sp 2973 addi r10,r10,32 2974 stvx $seven,r11,$sp 2975 addi r11,r11,32 2976 2977 mtspr 256,$vrsave 2978 lvx v20,r10,$sp # ABI says so 2979 addi r10,r10,32 2980 lvx v21,r11,$sp 2981 addi r11,r11,32 2982 lvx v22,r10,$sp 2983 addi r10,r10,32 2984 lvx v23,r11,$sp 2985 addi r11,r11,32 2986 lvx v24,r10,$sp 2987 addi r10,r10,32 2988 lvx v25,r11,$sp 2989 addi r11,r11,32 2990 lvx v26,r10,$sp 2991 addi r10,r10,32 2992 lvx v27,r11,$sp 2993 addi r11,r11,32 2994 lvx v28,r10,$sp 2995 addi r10,r10,32 2996 lvx v29,r11,$sp 2997 addi r11,r11,32 2998 lvx v30,r10,$sp 2999 lvx v31,r11,$sp 3000 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3001 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3002 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3003 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3004 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3005 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3006 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3007 blr 3008 .long 0 3009 .byte 0,12,0x04,1,0x80,6,6,0 3010 .long 0 3011 3012.align 5 3013_aesp8_xts_enc5x: 3014 vcipher $out0,$out0,v24 3015 vcipher $out1,$out1,v24 3016 vcipher $out2,$out2,v24 3017 vcipher $out3,$out3,v24 3018 vcipher $out4,$out4,v24 3019 lvx v24,$x20,$key_ # round[3] 3020 addi $key_,$key_,0x20 3021 3022 vcipher $out0,$out0,v25 3023 vcipher $out1,$out1,v25 3024 vcipher $out2,$out2,v25 3025 vcipher $out3,$out3,v25 3026 vcipher $out4,$out4,v25 3027 lvx v25,$x10,$key_ # round[4] 3028 bdnz _aesp8_xts_enc5x 3029 3030 add $inp,$inp,$taillen 3031 cmpwi $taillen,0 3032 vcipher $out0,$out0,v24 3033 vcipher $out1,$out1,v24 3034 vcipher $out2,$out2,v24 3035 vcipher $out3,$out3,v24 3036 vcipher $out4,$out4,v24 3037 3038 subi $inp,$inp,16 3039 vcipher $out0,$out0,v25 3040 vcipher $out1,$out1,v25 3041 vcipher $out2,$out2,v25 3042 vcipher $out3,$out3,v25 3043 vcipher $out4,$out4,v25 3044 vxor $twk0,$twk0,v31 3045 3046 vcipher $out0,$out0,v26 3047 lvsr $inpperm,0,$taillen # $in5 is no more 3048 vcipher $out1,$out1,v26 3049 vcipher $out2,$out2,v26 3050 vcipher $out3,$out3,v26 3051 vcipher $out4,$out4,v26 3052 vxor $in1,$twk1,v31 3053 3054 vcipher $out0,$out0,v27 3055 lvx_u $in0,0,$inp 3056 vcipher $out1,$out1,v27 3057 vcipher $out2,$out2,v27 3058 vcipher $out3,$out3,v27 3059 vcipher $out4,$out4,v27 3060 vxor $in2,$twk2,v31 3061 3062 addi $key_,$sp,$FRAME+15 # rewind $key_ 3063 vcipher $out0,$out0,v28 3064 vcipher $out1,$out1,v28 3065 vcipher $out2,$out2,v28 3066 vcipher $out3,$out3,v28 3067 vcipher $out4,$out4,v28 3068 lvx v24,$x00,$key_ # re-pre-load round[1] 3069 vxor $in3,$twk3,v31 3070 3071 vcipher $out0,$out0,v29 3072 le?vperm $in0,$in0,$in0,$leperm 3073 vcipher $out1,$out1,v29 3074 vcipher $out2,$out2,v29 3075 vcipher $out3,$out3,v29 3076 vcipher $out4,$out4,v29 3077 lvx v25,$x10,$key_ # re-pre-load round[2] 3078 vxor $in4,$twk4,v31 3079 3080 vcipher $out0,$out0,v30 3081 vperm $in0,$in0,$in0,$inpperm 3082 vcipher $out1,$out1,v30 3083 vcipher $out2,$out2,v30 3084 vcipher $out3,$out3,v30 3085 vcipher $out4,$out4,v30 3086 3087 vcipherlast $out0,$out0,$twk0 3088 vcipherlast $out1,$out1,$in1 3089 vcipherlast $out2,$out2,$in2 3090 vcipherlast $out3,$out3,$in3 3091 vcipherlast $out4,$out4,$in4 3092 blr 3093 .long 0 3094 .byte 0,12,0x14,0,0,0,0,0 3095 3096.align 5 3097_aesp8_xts_decrypt6x: 3098 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 3099 mflr r11 3100 li r7,`$FRAME+8*16+15` 3101 li r3,`$FRAME+8*16+31` 3102 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 3103 stvx v20,r7,$sp # ABI says so 3104 addi r7,r7,32 3105 stvx v21,r3,$sp 3106 addi r3,r3,32 3107 stvx v22,r7,$sp 3108 addi r7,r7,32 3109 stvx v23,r3,$sp 3110 addi r3,r3,32 3111 stvx v24,r7,$sp 3112 addi r7,r7,32 3113 stvx v25,r3,$sp 3114 addi r3,r3,32 3115 stvx v26,r7,$sp 3116 addi r7,r7,32 3117 stvx v27,r3,$sp 3118 addi r3,r3,32 3119 stvx v28,r7,$sp 3120 addi r7,r7,32 3121 stvx v29,r3,$sp 3122 addi r3,r3,32 3123 stvx v30,r7,$sp 3124 stvx v31,r3,$sp 3125 li r0,-1 3126 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 3127 li $x10,0x10 3128 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3129 li $x20,0x20 3130 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3131 li $x30,0x30 3132 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3133 li $x40,0x40 3134 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3135 li $x50,0x50 3136 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3137 li $x60,0x60 3138 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3139 li $x70,0x70 3140 mtspr 256,r0 3141 3142 # Reverse eighty7 to 0x010101..87 3143 xxlor 2, 32+$eighty7, 32+$eighty7 3144 vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 3145 xxlor 1, 32+$eighty7, 32+$eighty7 3146 3147 # Load XOR contents. 0xf102132435465768798a9bacbdcedfe 3148 mr $x70, r6 3149 bl Lconsts 3150 lxvw4x 0, $x40, r6 # load XOR contents 3151 mr r6, $x70 3152 li $x70,0x70 3153 3154 subi $rounds,$rounds,3 # -4 in total 3155 3156 lvx $rndkey0,$x00,$key1 # load key schedule 3157 lvx v30,$x10,$key1 3158 addi $key1,$key1,0x20 3159 lvx v31,$x00,$key1 3160 ?vperm $rndkey0,$rndkey0,v30,$keyperm 3161 addi $key_,$sp,$FRAME+15 3162 mtctr $rounds 3163 3164Load_xts_dec_key: 3165 ?vperm v24,v30,v31,$keyperm 3166 lvx v30,$x10,$key1 3167 addi $key1,$key1,0x20 3168 stvx v24,$x00,$key_ # off-load round[1] 3169 ?vperm v25,v31,v30,$keyperm 3170 lvx v31,$x00,$key1 3171 stvx v25,$x10,$key_ # off-load round[2] 3172 addi $key_,$key_,0x20 3173 bdnz Load_xts_dec_key 3174 3175 lvx v26,$x10,$key1 3176 ?vperm v24,v30,v31,$keyperm 3177 lvx v27,$x20,$key1 3178 stvx v24,$x00,$key_ # off-load round[3] 3179 ?vperm v25,v31,v26,$keyperm 3180 lvx v28,$x30,$key1 3181 stvx v25,$x10,$key_ # off-load round[4] 3182 addi $key_,$sp,$FRAME+15 # rewind $key_ 3183 ?vperm v26,v26,v27,$keyperm 3184 lvx v29,$x40,$key1 3185 ?vperm v27,v27,v28,$keyperm 3186 lvx v30,$x50,$key1 3187 ?vperm v28,v28,v29,$keyperm 3188 lvx v31,$x60,$key1 3189 ?vperm v29,v29,v30,$keyperm 3190 lvx $twk5,$x70,$key1 # borrow $twk5 3191 ?vperm v30,v30,v31,$keyperm 3192 lvx v24,$x00,$key_ # pre-load round[1] 3193 ?vperm v31,v31,$twk5,$keyperm 3194 lvx v25,$x10,$key_ # pre-load round[2] 3195 3196 vperm $in0,$inout,$inptail,$inpperm 3197 subi $inp,$inp,31 # undo "caller" 3198 vxor $twk0,$tweak,$rndkey0 3199 vsrab $tmp,$tweak,$seven # next tweak value 3200 vaddubm $tweak,$tweak,$tweak 3201 vand $tmp,$tmp,$eighty7 3202 vxor $out0,$in0,$twk0 3203 xxlor 32+$in1, 0, 0 3204 vpermxor $tweak, $tweak, $tmp, $in1 3205 3206 lvx_u $in1,$x10,$inp 3207 vxor $twk1,$tweak,$rndkey0 3208 vsrab $tmp,$tweak,$seven # next tweak value 3209 vaddubm $tweak,$tweak,$tweak 3210 le?vperm $in1,$in1,$in1,$leperm 3211 vand $tmp,$tmp,$eighty7 3212 vxor $out1,$in1,$twk1 3213 xxlor 32+$in2, 0, 0 3214 vpermxor $tweak, $tweak, $tmp, $in2 3215 3216 lvx_u $in2,$x20,$inp 3217 andi. $taillen,$len,15 3218 vxor $twk2,$tweak,$rndkey0 3219 vsrab $tmp,$tweak,$seven # next tweak value 3220 vaddubm $tweak,$tweak,$tweak 3221 le?vperm $in2,$in2,$in2,$leperm 3222 vand $tmp,$tmp,$eighty7 3223 vxor $out2,$in2,$twk2 3224 xxlor 32+$in3, 0, 0 3225 vpermxor $tweak, $tweak, $tmp, $in3 3226 3227 lvx_u $in3,$x30,$inp 3228 sub $len,$len,$taillen 3229 vxor $twk3,$tweak,$rndkey0 3230 vsrab $tmp,$tweak,$seven # next tweak value 3231 vaddubm $tweak,$tweak,$tweak 3232 le?vperm $in3,$in3,$in3,$leperm 3233 vand $tmp,$tmp,$eighty7 3234 vxor $out3,$in3,$twk3 3235 xxlor 32+$in4, 0, 0 3236 vpermxor $tweak, $tweak, $tmp, $in4 3237 3238 lvx_u $in4,$x40,$inp 3239 subi $len,$len,0x60 3240 vxor $twk4,$tweak,$rndkey0 3241 vsrab $tmp,$tweak,$seven # next tweak value 3242 vaddubm $tweak,$tweak,$tweak 3243 le?vperm $in4,$in4,$in4,$leperm 3244 vand $tmp,$tmp,$eighty7 3245 vxor $out4,$in4,$twk4 3246 xxlor 32+$in5, 0, 0 3247 vpermxor $tweak, $tweak, $tmp, $in5 3248 3249 lvx_u $in5,$x50,$inp 3250 addi $inp,$inp,0x60 3251 vxor $twk5,$tweak,$rndkey0 3252 vsrab $tmp,$tweak,$seven # next tweak value 3253 vaddubm $tweak,$tweak,$tweak 3254 le?vperm $in5,$in5,$in5,$leperm 3255 vand $tmp,$tmp,$eighty7 3256 vxor $out5,$in5,$twk5 3257 xxlor 32+$in0, 0, 0 3258 vpermxor $tweak, $tweak, $tmp, $in0 3259 3260 vxor v31,v31,$rndkey0 3261 mtctr $rounds 3262 b Loop_xts_dec6x 3263 3264.align 5 3265Loop_xts_dec6x: 3266 vncipher $out0,$out0,v24 3267 vncipher $out1,$out1,v24 3268 vncipher $out2,$out2,v24 3269 vncipher $out3,$out3,v24 3270 vncipher $out4,$out4,v24 3271 vncipher $out5,$out5,v24 3272 lvx v24,$x20,$key_ # round[3] 3273 addi $key_,$key_,0x20 3274 3275 vncipher $out0,$out0,v25 3276 vncipher $out1,$out1,v25 3277 vncipher $out2,$out2,v25 3278 vncipher $out3,$out3,v25 3279 vncipher $out4,$out4,v25 3280 vncipher $out5,$out5,v25 3281 lvx v25,$x10,$key_ # round[4] 3282 bdnz Loop_xts_dec6x 3283 3284 xxlor 32+$eighty7, 1, 1 3285 3286 subic $len,$len,96 # $len-=96 3287 vxor $in0,$twk0,v31 # xor with last round key 3288 vncipher $out0,$out0,v24 3289 vncipher $out1,$out1,v24 3290 vsrab $tmp,$tweak,$seven # next tweak value 3291 vxor $twk0,$tweak,$rndkey0 3292 vaddubm $tweak,$tweak,$tweak 3293 vncipher $out2,$out2,v24 3294 vncipher $out3,$out3,v24 3295 vncipher $out4,$out4,v24 3296 vncipher $out5,$out5,v24 3297 3298 subfe. r0,r0,r0 # borrow?-1:0 3299 vand $tmp,$tmp,$eighty7 3300 vncipher $out0,$out0,v25 3301 vncipher $out1,$out1,v25 3302 xxlor 32+$in1, 0, 0 3303 vpermxor $tweak, $tweak, $tmp, $in1 3304 vncipher $out2,$out2,v25 3305 vncipher $out3,$out3,v25 3306 vxor $in1,$twk1,v31 3307 vsrab $tmp,$tweak,$seven # next tweak value 3308 vxor $twk1,$tweak,$rndkey0 3309 vncipher $out4,$out4,v25 3310 vncipher $out5,$out5,v25 3311 3312 and r0,r0,$len 3313 vaddubm $tweak,$tweak,$tweak 3314 vncipher $out0,$out0,v26 3315 vncipher $out1,$out1,v26 3316 vand $tmp,$tmp,$eighty7 3317 vncipher $out2,$out2,v26 3318 vncipher $out3,$out3,v26 3319 xxlor 32+$in2, 0, 0 3320 vpermxor $tweak, $tweak, $tmp, $in2 3321 vncipher $out4,$out4,v26 3322 vncipher $out5,$out5,v26 3323 3324 add $inp,$inp,r0 # $inp is adjusted in such 3325 # way that at exit from the 3326 # loop inX-in5 are loaded 3327 # with last "words" 3328 vxor $in2,$twk2,v31 3329 vsrab $tmp,$tweak,$seven # next tweak value 3330 vxor $twk2,$tweak,$rndkey0 3331 vaddubm $tweak,$tweak,$tweak 3332 vncipher $out0,$out0,v27 3333 vncipher $out1,$out1,v27 3334 vncipher $out2,$out2,v27 3335 vncipher $out3,$out3,v27 3336 vand $tmp,$tmp,$eighty7 3337 vncipher $out4,$out4,v27 3338 vncipher $out5,$out5,v27 3339 3340 addi $key_,$sp,$FRAME+15 # rewind $key_ 3341 xxlor 32+$in3, 0, 0 3342 vpermxor $tweak, $tweak, $tmp, $in3 3343 vncipher $out0,$out0,v28 3344 vncipher $out1,$out1,v28 3345 vxor $in3,$twk3,v31 3346 vsrab $tmp,$tweak,$seven # next tweak value 3347 vxor $twk3,$tweak,$rndkey0 3348 vncipher $out2,$out2,v28 3349 vncipher $out3,$out3,v28 3350 vaddubm $tweak,$tweak,$tweak 3351 vncipher $out4,$out4,v28 3352 vncipher $out5,$out5,v28 3353 lvx v24,$x00,$key_ # re-pre-load round[1] 3354 vand $tmp,$tmp,$eighty7 3355 3356 vncipher $out0,$out0,v29 3357 vncipher $out1,$out1,v29 3358 xxlor 32+$in4, 0, 0 3359 vpermxor $tweak, $tweak, $tmp, $in4 3360 vncipher $out2,$out2,v29 3361 vncipher $out3,$out3,v29 3362 vxor $in4,$twk4,v31 3363 vsrab $tmp,$tweak,$seven # next tweak value 3364 vxor $twk4,$tweak,$rndkey0 3365 vncipher $out4,$out4,v29 3366 vncipher $out5,$out5,v29 3367 lvx v25,$x10,$key_ # re-pre-load round[2] 3368 vaddubm $tweak,$tweak,$tweak 3369 3370 vncipher $out0,$out0,v30 3371 vncipher $out1,$out1,v30 3372 vand $tmp,$tmp,$eighty7 3373 vncipher $out2,$out2,v30 3374 vncipher $out3,$out3,v30 3375 xxlor 32+$in5, 0, 0 3376 vpermxor $tweak, $tweak, $tmp, $in5 3377 vncipher $out4,$out4,v30 3378 vncipher $out5,$out5,v30 3379 vxor $in5,$twk5,v31 3380 vsrab $tmp,$tweak,$seven # next tweak value 3381 vxor $twk5,$tweak,$rndkey0 3382 3383 vncipherlast $out0,$out0,$in0 3384 lvx_u $in0,$x00,$inp # load next input block 3385 vaddubm $tweak,$tweak,$tweak 3386 vncipherlast $out1,$out1,$in1 3387 lvx_u $in1,$x10,$inp 3388 vncipherlast $out2,$out2,$in2 3389 le?vperm $in0,$in0,$in0,$leperm 3390 lvx_u $in2,$x20,$inp 3391 vand $tmp,$tmp,$eighty7 3392 vncipherlast $out3,$out3,$in3 3393 le?vperm $in1,$in1,$in1,$leperm 3394 lvx_u $in3,$x30,$inp 3395 vncipherlast $out4,$out4,$in4 3396 le?vperm $in2,$in2,$in2,$leperm 3397 lvx_u $in4,$x40,$inp 3398 xxlor 10, 32+$in0, 32+$in0 3399 xxlor 32+$in0, 0, 0 3400 vpermxor $tweak, $tweak, $tmp, $in0 3401 xxlor 32+$in0, 10, 10 3402 vncipherlast $out5,$out5,$in5 3403 le?vperm $in3,$in3,$in3,$leperm 3404 lvx_u $in5,$x50,$inp 3405 addi $inp,$inp,0x60 3406 le?vperm $in4,$in4,$in4,$leperm 3407 le?vperm $in5,$in5,$in5,$leperm 3408 3409 le?vperm $out0,$out0,$out0,$leperm 3410 le?vperm $out1,$out1,$out1,$leperm 3411 stvx_u $out0,$x00,$out # store output 3412 vxor $out0,$in0,$twk0 3413 le?vperm $out2,$out2,$out2,$leperm 3414 stvx_u $out1,$x10,$out 3415 vxor $out1,$in1,$twk1 3416 le?vperm $out3,$out3,$out3,$leperm 3417 stvx_u $out2,$x20,$out 3418 vxor $out2,$in2,$twk2 3419 le?vperm $out4,$out4,$out4,$leperm 3420 stvx_u $out3,$x30,$out 3421 vxor $out3,$in3,$twk3 3422 le?vperm $out5,$out5,$out5,$leperm 3423 stvx_u $out4,$x40,$out 3424 vxor $out4,$in4,$twk4 3425 stvx_u $out5,$x50,$out 3426 vxor $out5,$in5,$twk5 3427 addi $out,$out,0x60 3428 3429 mtctr $rounds 3430 beq Loop_xts_dec6x # did $len-=96 borrow? 3431 3432 xxlor 32+$eighty7, 2, 2 3433 3434 addic. $len,$len,0x60 3435 beq Lxts_dec6x_zero 3436 cmpwi $len,0x20 3437 blt Lxts_dec6x_one 3438 nop 3439 beq Lxts_dec6x_two 3440 cmpwi $len,0x40 3441 blt Lxts_dec6x_three 3442 nop 3443 beq Lxts_dec6x_four 3444 3445Lxts_dec6x_five: 3446 vxor $out0,$in1,$twk0 3447 vxor $out1,$in2,$twk1 3448 vxor $out2,$in3,$twk2 3449 vxor $out3,$in4,$twk3 3450 vxor $out4,$in5,$twk4 3451 3452 bl _aesp8_xts_dec5x 3453 3454 le?vperm $out0,$out0,$out0,$leperm 3455 vmr $twk0,$twk5 # unused tweak 3456 vxor $twk1,$tweak,$rndkey0 3457 le?vperm $out1,$out1,$out1,$leperm 3458 stvx_u $out0,$x00,$out # store output 3459 vxor $out0,$in0,$twk1 3460 le?vperm $out2,$out2,$out2,$leperm 3461 stvx_u $out1,$x10,$out 3462 le?vperm $out3,$out3,$out3,$leperm 3463 stvx_u $out2,$x20,$out 3464 le?vperm $out4,$out4,$out4,$leperm 3465 stvx_u $out3,$x30,$out 3466 stvx_u $out4,$x40,$out 3467 addi $out,$out,0x50 3468 bne Lxts_dec6x_steal 3469 b Lxts_dec6x_done 3470 3471.align 4 3472Lxts_dec6x_four: 3473 vxor $out0,$in2,$twk0 3474 vxor $out1,$in3,$twk1 3475 vxor $out2,$in4,$twk2 3476 vxor $out3,$in5,$twk3 3477 vxor $out4,$out4,$out4 3478 3479 bl _aesp8_xts_dec5x 3480 3481 le?vperm $out0,$out0,$out0,$leperm 3482 vmr $twk0,$twk4 # unused tweak 3483 vmr $twk1,$twk5 3484 le?vperm $out1,$out1,$out1,$leperm 3485 stvx_u $out0,$x00,$out # store output 3486 vxor $out0,$in0,$twk5 3487 le?vperm $out2,$out2,$out2,$leperm 3488 stvx_u $out1,$x10,$out 3489 le?vperm $out3,$out3,$out3,$leperm 3490 stvx_u $out2,$x20,$out 3491 stvx_u $out3,$x30,$out 3492 addi $out,$out,0x40 3493 bne Lxts_dec6x_steal 3494 b Lxts_dec6x_done 3495 3496.align 4 3497Lxts_dec6x_three: 3498 vxor $out0,$in3,$twk0 3499 vxor $out1,$in4,$twk1 3500 vxor $out2,$in5,$twk2 3501 vxor $out3,$out3,$out3 3502 vxor $out4,$out4,$out4 3503 3504 bl _aesp8_xts_dec5x 3505 3506 le?vperm $out0,$out0,$out0,$leperm 3507 vmr $twk0,$twk3 # unused tweak 3508 vmr $twk1,$twk4 3509 le?vperm $out1,$out1,$out1,$leperm 3510 stvx_u $out0,$x00,$out # store output 3511 vxor $out0,$in0,$twk4 3512 le?vperm $out2,$out2,$out2,$leperm 3513 stvx_u $out1,$x10,$out 3514 stvx_u $out2,$x20,$out 3515 addi $out,$out,0x30 3516 bne Lxts_dec6x_steal 3517 b Lxts_dec6x_done 3518 3519.align 4 3520Lxts_dec6x_two: 3521 vxor $out0,$in4,$twk0 3522 vxor $out1,$in5,$twk1 3523 vxor $out2,$out2,$out2 3524 vxor $out3,$out3,$out3 3525 vxor $out4,$out4,$out4 3526 3527 bl _aesp8_xts_dec5x 3528 3529 le?vperm $out0,$out0,$out0,$leperm 3530 vmr $twk0,$twk2 # unused tweak 3531 vmr $twk1,$twk3 3532 le?vperm $out1,$out1,$out1,$leperm 3533 stvx_u $out0,$x00,$out # store output 3534 vxor $out0,$in0,$twk3 3535 stvx_u $out1,$x10,$out 3536 addi $out,$out,0x20 3537 bne Lxts_dec6x_steal 3538 b Lxts_dec6x_done 3539 3540.align 4 3541Lxts_dec6x_one: 3542 vxor $out0,$in5,$twk0 3543 nop 3544Loop_xts_dec1x: 3545 vncipher $out0,$out0,v24 3546 lvx v24,$x20,$key_ # round[3] 3547 addi $key_,$key_,0x20 3548 3549 vncipher $out0,$out0,v25 3550 lvx v25,$x10,$key_ # round[4] 3551 bdnz Loop_xts_dec1x 3552 3553 subi r0,$taillen,1 3554 vncipher $out0,$out0,v24 3555 3556 andi. r0,r0,16 3557 cmpwi $taillen,0 3558 vncipher $out0,$out0,v25 3559 3560 sub $inp,$inp,r0 3561 vncipher $out0,$out0,v26 3562 3563 lvx_u $in0,0,$inp 3564 vncipher $out0,$out0,v27 3565 3566 addi $key_,$sp,$FRAME+15 # rewind $key_ 3567 vncipher $out0,$out0,v28 3568 lvx v24,$x00,$key_ # re-pre-load round[1] 3569 3570 vncipher $out0,$out0,v29 3571 lvx v25,$x10,$key_ # re-pre-load round[2] 3572 vxor $twk0,$twk0,v31 3573 3574 le?vperm $in0,$in0,$in0,$leperm 3575 vncipher $out0,$out0,v30 3576 3577 mtctr $rounds 3578 vncipherlast $out0,$out0,$twk0 3579 3580 vmr $twk0,$twk1 # unused tweak 3581 vmr $twk1,$twk2 3582 le?vperm $out0,$out0,$out0,$leperm 3583 stvx_u $out0,$x00,$out # store output 3584 addi $out,$out,0x10 3585 vxor $out0,$in0,$twk2 3586 bne Lxts_dec6x_steal 3587 b Lxts_dec6x_done 3588 3589.align 4 3590Lxts_dec6x_zero: 3591 cmpwi $taillen,0 3592 beq Lxts_dec6x_done 3593 3594 lvx_u $in0,0,$inp 3595 le?vperm $in0,$in0,$in0,$leperm 3596 vxor $out0,$in0,$twk1 3597Lxts_dec6x_steal: 3598 vncipher $out0,$out0,v24 3599 lvx v24,$x20,$key_ # round[3] 3600 addi $key_,$key_,0x20 3601 3602 vncipher $out0,$out0,v25 3603 lvx v25,$x10,$key_ # round[4] 3604 bdnz Lxts_dec6x_steal 3605 3606 add $inp,$inp,$taillen 3607 vncipher $out0,$out0,v24 3608 3609 cmpwi $taillen,0 3610 vncipher $out0,$out0,v25 3611 3612 lvx_u $in0,0,$inp 3613 vncipher $out0,$out0,v26 3614 3615 lvsr $inpperm,0,$taillen # $in5 is no more 3616 vncipher $out0,$out0,v27 3617 3618 addi $key_,$sp,$FRAME+15 # rewind $key_ 3619 vncipher $out0,$out0,v28 3620 lvx v24,$x00,$key_ # re-pre-load round[1] 3621 3622 vncipher $out0,$out0,v29 3623 lvx v25,$x10,$key_ # re-pre-load round[2] 3624 vxor $twk1,$twk1,v31 3625 3626 le?vperm $in0,$in0,$in0,$leperm 3627 vncipher $out0,$out0,v30 3628 3629 vperm $in0,$in0,$in0,$inpperm 3630 vncipherlast $tmp,$out0,$twk1 3631 3632 le?vperm $out0,$tmp,$tmp,$leperm 3633 le?stvx_u $out0,0,$out 3634 be?stvx_u $tmp,0,$out 3635 3636 vxor $out0,$out0,$out0 3637 vspltisb $out1,-1 3638 vperm $out0,$out0,$out1,$inpperm 3639 vsel $out0,$in0,$tmp,$out0 3640 vxor $out0,$out0,$twk0 3641 3642 subi r30,$out,1 3643 mtctr $taillen 3644Loop_xts_dec6x_steal: 3645 lbzu r0,1(r30) 3646 stb r0,16(r30) 3647 bdnz Loop_xts_dec6x_steal 3648 3649 li $taillen,0 3650 mtctr $rounds 3651 b Loop_xts_dec1x # one more time... 3652 3653.align 4 3654Lxts_dec6x_done: 3655 ${UCMP}i $ivp,0 3656 beq Lxts_dec6x_ret 3657 3658 vxor $tweak,$twk0,$rndkey0 3659 le?vperm $tweak,$tweak,$tweak,$leperm 3660 stvx_u $tweak,0,$ivp 3661 3662Lxts_dec6x_ret: 3663 mtlr r11 3664 li r10,`$FRAME+15` 3665 li r11,`$FRAME+31` 3666 stvx $seven,r10,$sp # wipe copies of round keys 3667 addi r10,r10,32 3668 stvx $seven,r11,$sp 3669 addi r11,r11,32 3670 stvx $seven,r10,$sp 3671 addi r10,r10,32 3672 stvx $seven,r11,$sp 3673 addi r11,r11,32 3674 stvx $seven,r10,$sp 3675 addi r10,r10,32 3676 stvx $seven,r11,$sp 3677 addi r11,r11,32 3678 stvx $seven,r10,$sp 3679 addi r10,r10,32 3680 stvx $seven,r11,$sp 3681 addi r11,r11,32 3682 3683 mtspr 256,$vrsave 3684 lvx v20,r10,$sp # ABI says so 3685 addi r10,r10,32 3686 lvx v21,r11,$sp 3687 addi r11,r11,32 3688 lvx v22,r10,$sp 3689 addi r10,r10,32 3690 lvx v23,r11,$sp 3691 addi r11,r11,32 3692 lvx v24,r10,$sp 3693 addi r10,r10,32 3694 lvx v25,r11,$sp 3695 addi r11,r11,32 3696 lvx v26,r10,$sp 3697 addi r10,r10,32 3698 lvx v27,r11,$sp 3699 addi r11,r11,32 3700 lvx v28,r10,$sp 3701 addi r10,r10,32 3702 lvx v29,r11,$sp 3703 addi r11,r11,32 3704 lvx v30,r10,$sp 3705 lvx v31,r11,$sp 3706 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3707 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3708 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3709 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3710 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3711 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3712 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3713 blr 3714 .long 0 3715 .byte 0,12,0x04,1,0x80,6,6,0 3716 .long 0 3717 3718.align 5 3719_aesp8_xts_dec5x: 3720 vncipher $out0,$out0,v24 3721 vncipher $out1,$out1,v24 3722 vncipher $out2,$out2,v24 3723 vncipher $out3,$out3,v24 3724 vncipher $out4,$out4,v24 3725 lvx v24,$x20,$key_ # round[3] 3726 addi $key_,$key_,0x20 3727 3728 vncipher $out0,$out0,v25 3729 vncipher $out1,$out1,v25 3730 vncipher $out2,$out2,v25 3731 vncipher $out3,$out3,v25 3732 vncipher $out4,$out4,v25 3733 lvx v25,$x10,$key_ # round[4] 3734 bdnz _aesp8_xts_dec5x 3735 3736 subi r0,$taillen,1 3737 vncipher $out0,$out0,v24 3738 vncipher $out1,$out1,v24 3739 vncipher $out2,$out2,v24 3740 vncipher $out3,$out3,v24 3741 vncipher $out4,$out4,v24 3742 3743 andi. r0,r0,16 3744 cmpwi $taillen,0 3745 vncipher $out0,$out0,v25 3746 vncipher $out1,$out1,v25 3747 vncipher $out2,$out2,v25 3748 vncipher $out3,$out3,v25 3749 vncipher $out4,$out4,v25 3750 vxor $twk0,$twk0,v31 3751 3752 sub $inp,$inp,r0 3753 vncipher $out0,$out0,v26 3754 vncipher $out1,$out1,v26 3755 vncipher $out2,$out2,v26 3756 vncipher $out3,$out3,v26 3757 vncipher $out4,$out4,v26 3758 vxor $in1,$twk1,v31 3759 3760 vncipher $out0,$out0,v27 3761 lvx_u $in0,0,$inp 3762 vncipher $out1,$out1,v27 3763 vncipher $out2,$out2,v27 3764 vncipher $out3,$out3,v27 3765 vncipher $out4,$out4,v27 3766 vxor $in2,$twk2,v31 3767 3768 addi $key_,$sp,$FRAME+15 # rewind $key_ 3769 vncipher $out0,$out0,v28 3770 vncipher $out1,$out1,v28 3771 vncipher $out2,$out2,v28 3772 vncipher $out3,$out3,v28 3773 vncipher $out4,$out4,v28 3774 lvx v24,$x00,$key_ # re-pre-load round[1] 3775 vxor $in3,$twk3,v31 3776 3777 vncipher $out0,$out0,v29 3778 le?vperm $in0,$in0,$in0,$leperm 3779 vncipher $out1,$out1,v29 3780 vncipher $out2,$out2,v29 3781 vncipher $out3,$out3,v29 3782 vncipher $out4,$out4,v29 3783 lvx v25,$x10,$key_ # re-pre-load round[2] 3784 vxor $in4,$twk4,v31 3785 3786 vncipher $out0,$out0,v30 3787 vncipher $out1,$out1,v30 3788 vncipher $out2,$out2,v30 3789 vncipher $out3,$out3,v30 3790 vncipher $out4,$out4,v30 3791 3792 vncipherlast $out0,$out0,$twk0 3793 vncipherlast $out1,$out1,$in1 3794 vncipherlast $out2,$out2,$in2 3795 vncipherlast $out3,$out3,$in3 3796 vncipherlast $out4,$out4,$in4 3797 mtctr $rounds 3798 blr 3799 .long 0 3800 .byte 0,12,0x14,0,0,0,0,0 3801___ 3802}} }}} 3803 3804my $consts=1; 3805foreach(split("\n",$code)) { 3806 s/\`([^\`]*)\`/eval($1)/geo; 3807 3808 # constants table endian-specific conversion 3809 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 3810 my $conv=$3; 3811 my @bytes=(); 3812 3813 # convert to endian-agnostic format 3814 if ($1 eq "long") { 3815 foreach (split(/,\s*/,$2)) { 3816 my $l = /^0/?oct:int; 3817 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 3818 } 3819 } else { 3820 @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 3821 } 3822 3823 # little-endian conversion 3824 if ($flavour =~ /le$/o) { 3825 SWITCH: for($conv) { 3826 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 3827 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 3828 } 3829 } 3830 3831 #emit 3832 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 3833 next; 3834 } 3835 $consts=0 if (m/Lconsts:/o); # end of table 3836 3837 # instructions prefixed with '?' are endian-specific and need 3838 # to be adjusted accordingly... 3839 if ($flavour =~ /le$/o) { # little-endian 3840 s/le\?//o or 3841 s/be\?/#be#/o or 3842 s/\?lvsr/lvsl/o or 3843 s/\?lvsl/lvsr/o or 3844 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 3845 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 3846 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 3847 } else { # big-endian 3848 s/le\?/#le#/o or 3849 s/be\?//o or 3850 s/\?([a-z]+)/$1/o; 3851 } 3852 3853 print $_,"\n"; 3854} 3855 3856close STDOUT or die "error closing STDOUT: $!"; 3857