1#! /usr/bin/env perl 2# SPDX-License-Identifier: GPL-2.0 3 4# This code is taken from CRYPTOGAMs[1] and is included here using the option 5# in the license to distribute the code under the GPL. Therefore this program 6# is free software; you can redistribute it and/or modify it under the terms of 7# the GNU General Public License version 2 as published by the Free Software 8# Foundation. 9# 10# [1] https://www.openssl.org/~appro/cryptogams/ 11 12# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> 13# All rights reserved. 14# 15# Redistribution and use in source and binary forms, with or without 16# modification, are permitted provided that the following conditions 17# are met: 18# 19# * Redistributions of source code must retain copyright notices, 20# this list of conditions and the following disclaimer. 21# 22# * Redistributions in binary form must reproduce the above 23# copyright notice, this list of conditions and the following 24# disclaimer in the documentation and/or other materials 25# provided with the distribution. 26# 27# * Neither the name of the CRYPTOGAMS nor the names of its 28# copyright holder and contributors may be used to endorse or 29# promote products derived from this software without specific 30# prior written permission. 31# 32# ALTERNATIVELY, provided that this notice is retained in full, this 33# product may be distributed under the terms of the GNU General Public 34# License (GPL), in which case the provisions of the GPL apply INSTEAD OF 35# those given above. 36# 37# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 38# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 40# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 41# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 42# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 43# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 45# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 46# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 47# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 48 49# ==================================================================== 50# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 51# project. The module is, however, dual licensed under OpenSSL and 52# CRYPTOGAMS licenses depending on where you obtain it. For further 53# details see https://www.openssl.org/~appro/cryptogams/. 54# ==================================================================== 55# 56# This module implements support for AES instructions as per PowerISA 57# specification version 2.07, first implemented by POWER8 processor. 58# The module is endian-agnostic in sense that it supports both big- 59# and little-endian cases. Data alignment in parallelizable modes is 60# handled with VSX loads and stores, which implies MSR.VSX flag being 61# set. It should also be noted that ISA specification doesn't prohibit 62# alignment exceptions for these instructions on page boundaries. 63# Initially alignment was handled in pure AltiVec/VMX way [when data 64# is aligned programmatically, which in turn guarantees exception- 65# free execution], but it turned to hamper performance when vcipher 66# instructions are interleaved. It's reckoned that eventual 67# misalignment penalties at page boundaries are in average lower 68# than additional overhead in pure AltiVec approach. 69# 70# May 2016 71# 72# Add XTS subroutine, 9x on little- and 12x improvement on big-endian 73# systems were measured. 74# 75###################################################################### 76# Current large-block performance in cycles per byte processed with 77# 128-bit key (less is better). 78# 79# CBC en-/decrypt CTR XTS 80# POWER8[le] 3.96/0.72 0.74 1.1 81# POWER8[be] 3.75/0.65 0.66 1.0 82 83$flavour = shift; 84 85if ($flavour =~ /64/) { 86 $SIZE_T =8; 87 $LRSAVE =2*$SIZE_T; 88 $STU ="stdu"; 89 $POP ="ld"; 90 $PUSH ="std"; 91 $UCMP ="cmpld"; 92 $SHL ="sldi"; 93} elsif ($flavour =~ /32/) { 94 $SIZE_T =4; 95 $LRSAVE =$SIZE_T; 96 $STU ="stwu"; 97 $POP ="lwz"; 98 $PUSH ="stw"; 99 $UCMP ="cmplw"; 100 $SHL ="slwi"; 101} else { die "nonsense $flavour"; } 102 103$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 104 105$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 106( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 107( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 108die "can't locate ppc-xlate.pl"; 109 110open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 111 112$FRAME=8*$SIZE_T; 113$prefix="aes_p8"; 114 115$sp="r1"; 116$vrsave="r12"; 117 118######################################################################### 119{{{ # Key setup procedures # 120my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 121my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 122my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 123 124$code.=<<___; 125.machine "any" 126 127.text 128 129.align 7 130rcon: 131.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 132.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 133.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 134.long 0,0,0,0 ?asis 135Lconsts: 136 mflr r0 137 bcl 20,31,\$+4 138 mflr $ptr #vvvvv "distance between . and rcon 139 addi $ptr,$ptr,-0x48 140 mtlr r0 141 blr 142 .long 0 143 .byte 0,12,0x14,0,0,0,0,0 144.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 145 146.globl .${prefix}_set_encrypt_key 147Lset_encrypt_key: 148 mflr r11 149 $PUSH r11,$LRSAVE($sp) 150 151 li $ptr,-1 152 ${UCMP}i $inp,0 153 beq- Lenc_key_abort # if ($inp==0) return -1; 154 ${UCMP}i $out,0 155 beq- Lenc_key_abort # if ($out==0) return -1; 156 li $ptr,-2 157 cmpwi $bits,128 158 blt- Lenc_key_abort 159 cmpwi $bits,256 160 bgt- Lenc_key_abort 161 andi. r0,$bits,0x3f 162 bne- Lenc_key_abort 163 164 lis r0,0xfff0 165 mfspr $vrsave,256 166 mtspr 256,r0 167 168 bl Lconsts 169 mtlr r11 170 171 neg r9,$inp 172 lvx $in0,0,$inp 173 addi $inp,$inp,15 # 15 is not typo 174 lvsr $key,0,r9 # borrow $key 175 li r8,0x20 176 cmpwi $bits,192 177 lvx $in1,0,$inp 178 le?vspltisb $mask,0x0f # borrow $mask 179 lvx $rcon,0,$ptr 180 le?vxor $key,$key,$mask # adjust for byte swap 181 lvx $mask,r8,$ptr 182 addi $ptr,$ptr,0x10 183 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 184 li $cnt,8 185 vxor $zero,$zero,$zero 186 mtctr $cnt 187 188 ?lvsr $outperm,0,$out 189 vspltisb $outmask,-1 190 lvx $outhead,0,$out 191 ?vperm $outmask,$zero,$outmask,$outperm 192 193 blt Loop128 194 addi $inp,$inp,8 195 beq L192 196 addi $inp,$inp,8 197 b L256 198 199.align 4 200Loop128: 201 vperm $key,$in0,$in0,$mask # rotate-n-splat 202 vsldoi $tmp,$zero,$in0,12 # >>32 203 vperm $outtail,$in0,$in0,$outperm # rotate 204 vsel $stage,$outhead,$outtail,$outmask 205 vmr $outhead,$outtail 206 vcipherlast $key,$key,$rcon 207 stvx $stage,0,$out 208 addi $out,$out,16 209 210 vxor $in0,$in0,$tmp 211 vsldoi $tmp,$zero,$tmp,12 # >>32 212 vxor $in0,$in0,$tmp 213 vsldoi $tmp,$zero,$tmp,12 # >>32 214 vxor $in0,$in0,$tmp 215 vadduwm $rcon,$rcon,$rcon 216 vxor $in0,$in0,$key 217 bdnz Loop128 218 219 lvx $rcon,0,$ptr # last two round keys 220 221 vperm $key,$in0,$in0,$mask # rotate-n-splat 222 vsldoi $tmp,$zero,$in0,12 # >>32 223 vperm $outtail,$in0,$in0,$outperm # rotate 224 vsel $stage,$outhead,$outtail,$outmask 225 vmr $outhead,$outtail 226 vcipherlast $key,$key,$rcon 227 stvx $stage,0,$out 228 addi $out,$out,16 229 230 vxor $in0,$in0,$tmp 231 vsldoi $tmp,$zero,$tmp,12 # >>32 232 vxor $in0,$in0,$tmp 233 vsldoi $tmp,$zero,$tmp,12 # >>32 234 vxor $in0,$in0,$tmp 235 vadduwm $rcon,$rcon,$rcon 236 vxor $in0,$in0,$key 237 238 vperm $key,$in0,$in0,$mask # rotate-n-splat 239 vsldoi $tmp,$zero,$in0,12 # >>32 240 vperm $outtail,$in0,$in0,$outperm # rotate 241 vsel $stage,$outhead,$outtail,$outmask 242 vmr $outhead,$outtail 243 vcipherlast $key,$key,$rcon 244 stvx $stage,0,$out 245 addi $out,$out,16 246 247 vxor $in0,$in0,$tmp 248 vsldoi $tmp,$zero,$tmp,12 # >>32 249 vxor $in0,$in0,$tmp 250 vsldoi $tmp,$zero,$tmp,12 # >>32 251 vxor $in0,$in0,$tmp 252 vxor $in0,$in0,$key 253 vperm $outtail,$in0,$in0,$outperm # rotate 254 vsel $stage,$outhead,$outtail,$outmask 255 vmr $outhead,$outtail 256 stvx $stage,0,$out 257 258 addi $inp,$out,15 # 15 is not typo 259 addi $out,$out,0x50 260 261 li $rounds,10 262 b Ldone 263 264.align 4 265L192: 266 lvx $tmp,0,$inp 267 li $cnt,4 268 vperm $outtail,$in0,$in0,$outperm # rotate 269 vsel $stage,$outhead,$outtail,$outmask 270 vmr $outhead,$outtail 271 stvx $stage,0,$out 272 addi $out,$out,16 273 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 274 vspltisb $key,8 # borrow $key 275 mtctr $cnt 276 vsububm $mask,$mask,$key # adjust the mask 277 278Loop192: 279 vperm $key,$in1,$in1,$mask # roate-n-splat 280 vsldoi $tmp,$zero,$in0,12 # >>32 281 vcipherlast $key,$key,$rcon 282 283 vxor $in0,$in0,$tmp 284 vsldoi $tmp,$zero,$tmp,12 # >>32 285 vxor $in0,$in0,$tmp 286 vsldoi $tmp,$zero,$tmp,12 # >>32 287 vxor $in0,$in0,$tmp 288 289 vsldoi $stage,$zero,$in1,8 290 vspltw $tmp,$in0,3 291 vxor $tmp,$tmp,$in1 292 vsldoi $in1,$zero,$in1,12 # >>32 293 vadduwm $rcon,$rcon,$rcon 294 vxor $in1,$in1,$tmp 295 vxor $in0,$in0,$key 296 vxor $in1,$in1,$key 297 vsldoi $stage,$stage,$in0,8 298 299 vperm $key,$in1,$in1,$mask # rotate-n-splat 300 vsldoi $tmp,$zero,$in0,12 # >>32 301 vperm $outtail,$stage,$stage,$outperm # rotate 302 vsel $stage,$outhead,$outtail,$outmask 303 vmr $outhead,$outtail 304 vcipherlast $key,$key,$rcon 305 stvx $stage,0,$out 306 addi $out,$out,16 307 308 vsldoi $stage,$in0,$in1,8 309 vxor $in0,$in0,$tmp 310 vsldoi $tmp,$zero,$tmp,12 # >>32 311 vperm $outtail,$stage,$stage,$outperm # rotate 312 vsel $stage,$outhead,$outtail,$outmask 313 vmr $outhead,$outtail 314 vxor $in0,$in0,$tmp 315 vsldoi $tmp,$zero,$tmp,12 # >>32 316 vxor $in0,$in0,$tmp 317 stvx $stage,0,$out 318 addi $out,$out,16 319 320 vspltw $tmp,$in0,3 321 vxor $tmp,$tmp,$in1 322 vsldoi $in1,$zero,$in1,12 # >>32 323 vadduwm $rcon,$rcon,$rcon 324 vxor $in1,$in1,$tmp 325 vxor $in0,$in0,$key 326 vxor $in1,$in1,$key 327 vperm $outtail,$in0,$in0,$outperm # rotate 328 vsel $stage,$outhead,$outtail,$outmask 329 vmr $outhead,$outtail 330 stvx $stage,0,$out 331 addi $inp,$out,15 # 15 is not typo 332 addi $out,$out,16 333 bdnz Loop192 334 335 li $rounds,12 336 addi $out,$out,0x20 337 b Ldone 338 339.align 4 340L256: 341 lvx $tmp,0,$inp 342 li $cnt,7 343 li $rounds,14 344 vperm $outtail,$in0,$in0,$outperm # rotate 345 vsel $stage,$outhead,$outtail,$outmask 346 vmr $outhead,$outtail 347 stvx $stage,0,$out 348 addi $out,$out,16 349 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 350 mtctr $cnt 351 352Loop256: 353 vperm $key,$in1,$in1,$mask # rotate-n-splat 354 vsldoi $tmp,$zero,$in0,12 # >>32 355 vperm $outtail,$in1,$in1,$outperm # rotate 356 vsel $stage,$outhead,$outtail,$outmask 357 vmr $outhead,$outtail 358 vcipherlast $key,$key,$rcon 359 stvx $stage,0,$out 360 addi $out,$out,16 361 362 vxor $in0,$in0,$tmp 363 vsldoi $tmp,$zero,$tmp,12 # >>32 364 vxor $in0,$in0,$tmp 365 vsldoi $tmp,$zero,$tmp,12 # >>32 366 vxor $in0,$in0,$tmp 367 vadduwm $rcon,$rcon,$rcon 368 vxor $in0,$in0,$key 369 vperm $outtail,$in0,$in0,$outperm # rotate 370 vsel $stage,$outhead,$outtail,$outmask 371 vmr $outhead,$outtail 372 stvx $stage,0,$out 373 addi $inp,$out,15 # 15 is not typo 374 addi $out,$out,16 375 bdz Ldone 376 377 vspltw $key,$in0,3 # just splat 378 vsldoi $tmp,$zero,$in1,12 # >>32 379 vsbox $key,$key 380 381 vxor $in1,$in1,$tmp 382 vsldoi $tmp,$zero,$tmp,12 # >>32 383 vxor $in1,$in1,$tmp 384 vsldoi $tmp,$zero,$tmp,12 # >>32 385 vxor $in1,$in1,$tmp 386 387 vxor $in1,$in1,$key 388 b Loop256 389 390.align 4 391Ldone: 392 lvx $in1,0,$inp # redundant in aligned case 393 vsel $in1,$outhead,$in1,$outmask 394 stvx $in1,0,$inp 395 li $ptr,0 396 mtspr 256,$vrsave 397 stw $rounds,0($out) 398 399Lenc_key_abort: 400 mr r3,$ptr 401 blr 402 .long 0 403 .byte 0,12,0x14,1,0,0,3,0 404 .long 0 405.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 406 407.globl .${prefix}_set_decrypt_key 408 $STU $sp,-$FRAME($sp) 409 mflr r10 410 $PUSH r10,$FRAME+$LRSAVE($sp) 411 bl Lset_encrypt_key 412 mtlr r10 413 414 cmpwi r3,0 415 bne- Ldec_key_abort 416 417 slwi $cnt,$rounds,4 418 subi $inp,$out,240 # first round key 419 srwi $rounds,$rounds,1 420 add $out,$inp,$cnt # last round key 421 mtctr $rounds 422 423Ldeckey: 424 lwz r0, 0($inp) 425 lwz r6, 4($inp) 426 lwz r7, 8($inp) 427 lwz r8, 12($inp) 428 addi $inp,$inp,16 429 lwz r9, 0($out) 430 lwz r10,4($out) 431 lwz r11,8($out) 432 lwz r12,12($out) 433 stw r0, 0($out) 434 stw r6, 4($out) 435 stw r7, 8($out) 436 stw r8, 12($out) 437 subi $out,$out,16 438 stw r9, -16($inp) 439 stw r10,-12($inp) 440 stw r11,-8($inp) 441 stw r12,-4($inp) 442 bdnz Ldeckey 443 444 xor r3,r3,r3 # return value 445Ldec_key_abort: 446 addi $sp,$sp,$FRAME 447 blr 448 .long 0 449 .byte 0,12,4,1,0x80,0,3,0 450 .long 0 451.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 452___ 453}}} 454######################################################################### 455{{{ # Single block en- and decrypt procedures # 456sub gen_block () { 457my $dir = shift; 458my $n = $dir eq "de" ? "n" : ""; 459my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 460 461$code.=<<___; 462.globl .${prefix}_${dir}crypt 463 lwz $rounds,240($key) 464 lis r0,0xfc00 465 mfspr $vrsave,256 466 li $idx,15 # 15 is not typo 467 mtspr 256,r0 468 469 lvx v0,0,$inp 470 neg r11,$out 471 lvx v1,$idx,$inp 472 lvsl v2,0,$inp # inpperm 473 le?vspltisb v4,0x0f 474 ?lvsl v3,0,r11 # outperm 475 le?vxor v2,v2,v4 476 li $idx,16 477 vperm v0,v0,v1,v2 # align [and byte swap in LE] 478 lvx v1,0,$key 479 ?lvsl v5,0,$key # keyperm 480 srwi $rounds,$rounds,1 481 lvx v2,$idx,$key 482 addi $idx,$idx,16 483 subi $rounds,$rounds,1 484 ?vperm v1,v1,v2,v5 # align round key 485 486 vxor v0,v0,v1 487 lvx v1,$idx,$key 488 addi $idx,$idx,16 489 mtctr $rounds 490 491Loop_${dir}c: 492 ?vperm v2,v2,v1,v5 493 v${n}cipher v0,v0,v2 494 lvx v2,$idx,$key 495 addi $idx,$idx,16 496 ?vperm v1,v1,v2,v5 497 v${n}cipher v0,v0,v1 498 lvx v1,$idx,$key 499 addi $idx,$idx,16 500 bdnz Loop_${dir}c 501 502 ?vperm v2,v2,v1,v5 503 v${n}cipher v0,v0,v2 504 lvx v2,$idx,$key 505 ?vperm v1,v1,v2,v5 506 v${n}cipherlast v0,v0,v1 507 508 vspltisb v2,-1 509 vxor v1,v1,v1 510 li $idx,15 # 15 is not typo 511 ?vperm v2,v1,v2,v3 # outmask 512 le?vxor v3,v3,v4 513 lvx v1,0,$out # outhead 514 vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 515 vsel v1,v1,v0,v2 516 lvx v4,$idx,$out 517 stvx v1,0,$out 518 vsel v0,v0,v4,v2 519 stvx v0,$idx,$out 520 521 mtspr 256,$vrsave 522 blr 523 .long 0 524 .byte 0,12,0x14,0,0,0,3,0 525 .long 0 526.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 527___ 528} 529&gen_block("en"); 530&gen_block("de"); 531}}} 532 533my $consts=1; 534foreach(split("\n",$code)) { 535 s/\`([^\`]*)\`/eval($1)/geo; 536 537 # constants table endian-specific conversion 538 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 539 my $conv=$3; 540 my @bytes=(); 541 542 # convert to endian-agnostic format 543 if ($1 eq "long") { 544 foreach (split(/,\s*/,$2)) { 545 my $l = /^0/?oct:int; 546 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 547 } 548 } else { 549 @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 550 } 551 552 # little-endian conversion 553 if ($flavour =~ /le$/o) { 554 SWITCH: for($conv) { 555 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 556 /\?rev/ && do { @bytes=reverse(@bytes); last; }; 557 } 558 } 559 560 #emit 561 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 562 next; 563 } 564 $consts=0 if (m/Lconsts:/o); # end of table 565 566 # instructions prefixed with '?' are endian-specific and need 567 # to be adjusted accordingly... 568 if ($flavour =~ /le$/o) { # little-endian 569 s/le\?//o or 570 s/be\?/#be#/o or 571 s/\?lvsr/lvsl/o or 572 s/\?lvsl/lvsr/o or 573 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 574 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 575 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 576 } else { # big-endian 577 s/le\?/#le#/o or 578 s/be\?//o or 579 s/\?([a-z]+)/$1/o; 580 } 581 582 print $_,"\n"; 583} 584 585close STDOUT; 586