17bded2dbSJung-uk Kim#! /usr/bin/env perl 2*a7148ab3SEnji Cooper# Copyright 2014-2024 The OpenSSL Project Authors. All Rights Reserved. 3e71b7053SJung-uk Kim# 4b077aed3SPierre Pronchery# Licensed under the Apache License 2.0 (the "License"). You may not use 5e71b7053SJung-uk Kim# this file except in compliance with the License. You can obtain a copy 6e71b7053SJung-uk Kim# in the file LICENSE in the source distribution or at 7e71b7053SJung-uk Kim# https://www.openssl.org/source/license.html 8e71b7053SJung-uk Kim 97bded2dbSJung-uk Kim# 107bded2dbSJung-uk Kim# ==================================================================== 117bded2dbSJung-uk Kim# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 127bded2dbSJung-uk Kim# project. The module is, however, dual licensed under OpenSSL and 137bded2dbSJung-uk Kim# CRYPTOGAMS licenses depending on where you obtain it. For further 147bded2dbSJung-uk Kim# details see http://www.openssl.org/~appro/cryptogams/. 157bded2dbSJung-uk Kim# ==================================================================== 167bded2dbSJung-uk Kim# 177bded2dbSJung-uk Kim# This module implements support for AES instructions as per PowerISA 187bded2dbSJung-uk Kim# specification version 2.07, first implemented by POWER8 processor. 197bded2dbSJung-uk Kim# The module is endian-agnostic in sense that it supports both big- 207bded2dbSJung-uk Kim# and little-endian cases. Data alignment in parallelizable modes is 217bded2dbSJung-uk Kim# handled with VSX loads and stores, which implies MSR.VSX flag being 227bded2dbSJung-uk Kim# set. It should also be noted that ISA specification doesn't prohibit 237bded2dbSJung-uk Kim# alignment exceptions for these instructions on page boundaries. 247bded2dbSJung-uk Kim# Initially alignment was handled in pure AltiVec/VMX way [when data 257bded2dbSJung-uk Kim# is aligned programmatically, which in turn guarantees exception- 267bded2dbSJung-uk Kim# free execution], but it turned to hamper performance when vcipher 277bded2dbSJung-uk Kim# instructions are interleaved. It's reckoned that eventual 287bded2dbSJung-uk Kim# misalignment penalties at page boundaries are in average lower 297bded2dbSJung-uk Kim# than additional overhead in pure AltiVec approach. 30e71b7053SJung-uk Kim# 31e71b7053SJung-uk Kim# May 2016 32e71b7053SJung-uk Kim# 33e71b7053SJung-uk Kim# Add XTS subroutine, 9x on little- and 12x improvement on big-endian 34e71b7053SJung-uk Kim# systems were measured. 35e71b7053SJung-uk Kim# 36e71b7053SJung-uk Kim###################################################################### 37e71b7053SJung-uk Kim# Current large-block performance in cycles per byte processed with 38e71b7053SJung-uk Kim# 128-bit key (less is better). 39e71b7053SJung-uk Kim# 40e71b7053SJung-uk Kim# CBC en-/decrypt CTR XTS 41e71b7053SJung-uk Kim# POWER8[le] 3.96/0.72 0.74 1.1 42e71b7053SJung-uk Kim# POWER8[be] 3.75/0.65 0.66 1.0 43e71b7053SJung-uk Kim# POWER9[le] 4.02/0.86 0.84 1.05 44e71b7053SJung-uk Kim# POWER9[be] 3.99/0.78 0.79 0.97 457bded2dbSJung-uk Kim 46b077aed3SPierre Pronchery# $output is the last argument if it looks like a file (it has an extension) 47b077aed3SPierre Pronchery# $flavour is the first argument if it doesn't look like a file 48b077aed3SPierre Pronchery$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 49b077aed3SPierre Pronchery$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 507bded2dbSJung-uk Kim 517bded2dbSJung-uk Kimif ($flavour =~ /64/) { 527bded2dbSJung-uk Kim $SIZE_T =8; 537bded2dbSJung-uk Kim $LRSAVE =2*$SIZE_T; 547bded2dbSJung-uk Kim $STU ="stdu"; 557bded2dbSJung-uk Kim $POP ="ld"; 567bded2dbSJung-uk Kim $PUSH ="std"; 577bded2dbSJung-uk Kim $UCMP ="cmpld"; 587bded2dbSJung-uk Kim $SHL ="sldi"; 597bded2dbSJung-uk Kim} elsif ($flavour =~ /32/) { 607bded2dbSJung-uk Kim $SIZE_T =4; 617bded2dbSJung-uk Kim $LRSAVE =$SIZE_T; 627bded2dbSJung-uk Kim $STU ="stwu"; 637bded2dbSJung-uk Kim $POP ="lwz"; 647bded2dbSJung-uk Kim $PUSH ="stw"; 657bded2dbSJung-uk Kim $UCMP ="cmplw"; 667bded2dbSJung-uk Kim $SHL ="slwi"; 677bded2dbSJung-uk Kim} else { die "nonsense $flavour"; } 687bded2dbSJung-uk Kim 697bded2dbSJung-uk Kim$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 707bded2dbSJung-uk Kim 717bded2dbSJung-uk Kim$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 727bded2dbSJung-uk Kim( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 737bded2dbSJung-uk Kim( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 747bded2dbSJung-uk Kimdie "can't locate ppc-xlate.pl"; 757bded2dbSJung-uk Kim 76b077aed3SPierre Proncheryopen STDOUT,"| $^X $xlate $flavour \"$output\"" 77b077aed3SPierre Pronchery or die "can't call $xlate: $!"; 787bded2dbSJung-uk Kim 797bded2dbSJung-uk Kim$FRAME=8*$SIZE_T; 807bded2dbSJung-uk Kim$prefix="aes_p8"; 817bded2dbSJung-uk Kim 827bded2dbSJung-uk Kim$sp="r1"; 837bded2dbSJung-uk Kim$vrsave="r12"; 847bded2dbSJung-uk Kim 857bded2dbSJung-uk Kim######################################################################### 867bded2dbSJung-uk Kim{{{ # Key setup procedures # 877bded2dbSJung-uk Kimmy ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); 887bded2dbSJung-uk Kimmy ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); 897bded2dbSJung-uk Kimmy ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); 907bded2dbSJung-uk Kim 917bded2dbSJung-uk Kim$code.=<<___; 927bded2dbSJung-uk Kim.machine "any" 937bded2dbSJung-uk Kim 947bded2dbSJung-uk Kim.text 957bded2dbSJung-uk Kim 967bded2dbSJung-uk Kim.align 7 977bded2dbSJung-uk Kimrcon: 987bded2dbSJung-uk Kim.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev 997bded2dbSJung-uk Kim.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev 1007bded2dbSJung-uk Kim.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev 1017bded2dbSJung-uk Kim.long 0,0,0,0 ?asis 102*a7148ab3SEnji Cooper.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe 1037bded2dbSJung-uk KimLconsts: 1047bded2dbSJung-uk Kim mflr r0 1057bded2dbSJung-uk Kim bcl 20,31,\$+4 1067bded2dbSJung-uk Kim mflr $ptr #vvvvv "distance between . and rcon 107*a7148ab3SEnji Cooper addi $ptr,$ptr,-0x58 1087bded2dbSJung-uk Kim mtlr r0 1097bded2dbSJung-uk Kim blr 1107bded2dbSJung-uk Kim .long 0 1117bded2dbSJung-uk Kim .byte 0,12,0x14,0,0,0,0,0 1127bded2dbSJung-uk Kim.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 1137bded2dbSJung-uk Kim 1147bded2dbSJung-uk Kim.globl .${prefix}_set_encrypt_key 1157bded2dbSJung-uk Kim.align 5 1167bded2dbSJung-uk Kim.${prefix}_set_encrypt_key: 1177bded2dbSJung-uk KimLset_encrypt_key: 1187bded2dbSJung-uk Kim mflr r11 1197bded2dbSJung-uk Kim $PUSH r11,$LRSAVE($sp) 1207bded2dbSJung-uk Kim 1217bded2dbSJung-uk Kim li $ptr,-1 1227bded2dbSJung-uk Kim ${UCMP}i $inp,0 1237bded2dbSJung-uk Kim beq- Lenc_key_abort # if ($inp==0) return -1; 1247bded2dbSJung-uk Kim ${UCMP}i $out,0 1257bded2dbSJung-uk Kim beq- Lenc_key_abort # if ($out==0) return -1; 1267bded2dbSJung-uk Kim li $ptr,-2 1277bded2dbSJung-uk Kim cmpwi $bits,128 1287bded2dbSJung-uk Kim blt- Lenc_key_abort 1297bded2dbSJung-uk Kim cmpwi $bits,256 1307bded2dbSJung-uk Kim bgt- Lenc_key_abort 1317bded2dbSJung-uk Kim andi. r0,$bits,0x3f 1327bded2dbSJung-uk Kim bne- Lenc_key_abort 1337bded2dbSJung-uk Kim 1347bded2dbSJung-uk Kim lis r0,0xfff0 1357bded2dbSJung-uk Kim mfspr $vrsave,256 1367bded2dbSJung-uk Kim mtspr 256,r0 1377bded2dbSJung-uk Kim 1387bded2dbSJung-uk Kim bl Lconsts 1397bded2dbSJung-uk Kim mtlr r11 1407bded2dbSJung-uk Kim 1417bded2dbSJung-uk Kim neg r9,$inp 1427bded2dbSJung-uk Kim lvx $in0,0,$inp 1437bded2dbSJung-uk Kim addi $inp,$inp,15 # 15 is not typo 1447bded2dbSJung-uk Kim lvsr $key,0,r9 # borrow $key 1457bded2dbSJung-uk Kim li r8,0x20 1467bded2dbSJung-uk Kim cmpwi $bits,192 1477bded2dbSJung-uk Kim lvx $in1,0,$inp 1487bded2dbSJung-uk Kim le?vspltisb $mask,0x0f # borrow $mask 1497bded2dbSJung-uk Kim lvx $rcon,0,$ptr 1507bded2dbSJung-uk Kim le?vxor $key,$key,$mask # adjust for byte swap 1517bded2dbSJung-uk Kim lvx $mask,r8,$ptr 1527bded2dbSJung-uk Kim addi $ptr,$ptr,0x10 1537bded2dbSJung-uk Kim vperm $in0,$in0,$in1,$key # align [and byte swap in LE] 1547bded2dbSJung-uk Kim li $cnt,8 1557bded2dbSJung-uk Kim vxor $zero,$zero,$zero 1567bded2dbSJung-uk Kim mtctr $cnt 1577bded2dbSJung-uk Kim 1587bded2dbSJung-uk Kim ?lvsr $outperm,0,$out 1597bded2dbSJung-uk Kim vspltisb $outmask,-1 1607bded2dbSJung-uk Kim lvx $outhead,0,$out 1617bded2dbSJung-uk Kim ?vperm $outmask,$zero,$outmask,$outperm 1627bded2dbSJung-uk Kim 1637bded2dbSJung-uk Kim blt Loop128 1647bded2dbSJung-uk Kim addi $inp,$inp,8 1657bded2dbSJung-uk Kim beq L192 1667bded2dbSJung-uk Kim addi $inp,$inp,8 1677bded2dbSJung-uk Kim b L256 1687bded2dbSJung-uk Kim 1697bded2dbSJung-uk Kim.align 4 1707bded2dbSJung-uk KimLoop128: 1717bded2dbSJung-uk Kim vperm $key,$in0,$in0,$mask # rotate-n-splat 1727bded2dbSJung-uk Kim vsldoi $tmp,$zero,$in0,12 # >>32 1737bded2dbSJung-uk Kim vperm $outtail,$in0,$in0,$outperm # rotate 1747bded2dbSJung-uk Kim vsel $stage,$outhead,$outtail,$outmask 1757bded2dbSJung-uk Kim vmr $outhead,$outtail 1767bded2dbSJung-uk Kim vcipherlast $key,$key,$rcon 1777bded2dbSJung-uk Kim stvx $stage,0,$out 1787bded2dbSJung-uk Kim addi $out,$out,16 1797bded2dbSJung-uk Kim 1807bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 1817bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 1827bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 1837bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 1847bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 1857bded2dbSJung-uk Kim vadduwm $rcon,$rcon,$rcon 1867bded2dbSJung-uk Kim vxor $in0,$in0,$key 1877bded2dbSJung-uk Kim bdnz Loop128 1887bded2dbSJung-uk Kim 1897bded2dbSJung-uk Kim lvx $rcon,0,$ptr # last two round keys 1907bded2dbSJung-uk Kim 1917bded2dbSJung-uk Kim vperm $key,$in0,$in0,$mask # rotate-n-splat 1927bded2dbSJung-uk Kim vsldoi $tmp,$zero,$in0,12 # >>32 1937bded2dbSJung-uk Kim vperm $outtail,$in0,$in0,$outperm # rotate 1947bded2dbSJung-uk Kim vsel $stage,$outhead,$outtail,$outmask 1957bded2dbSJung-uk Kim vmr $outhead,$outtail 1967bded2dbSJung-uk Kim vcipherlast $key,$key,$rcon 1977bded2dbSJung-uk Kim stvx $stage,0,$out 1987bded2dbSJung-uk Kim addi $out,$out,16 1997bded2dbSJung-uk Kim 2007bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 2017bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 2027bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 2037bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 2047bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 2057bded2dbSJung-uk Kim vadduwm $rcon,$rcon,$rcon 2067bded2dbSJung-uk Kim vxor $in0,$in0,$key 2077bded2dbSJung-uk Kim 2087bded2dbSJung-uk Kim vperm $key,$in0,$in0,$mask # rotate-n-splat 2097bded2dbSJung-uk Kim vsldoi $tmp,$zero,$in0,12 # >>32 2107bded2dbSJung-uk Kim vperm $outtail,$in0,$in0,$outperm # rotate 2117bded2dbSJung-uk Kim vsel $stage,$outhead,$outtail,$outmask 2127bded2dbSJung-uk Kim vmr $outhead,$outtail 2137bded2dbSJung-uk Kim vcipherlast $key,$key,$rcon 2147bded2dbSJung-uk Kim stvx $stage,0,$out 2157bded2dbSJung-uk Kim addi $out,$out,16 2167bded2dbSJung-uk Kim 2177bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 2187bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 2197bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 2207bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 2217bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 2227bded2dbSJung-uk Kim vxor $in0,$in0,$key 2237bded2dbSJung-uk Kim vperm $outtail,$in0,$in0,$outperm # rotate 2247bded2dbSJung-uk Kim vsel $stage,$outhead,$outtail,$outmask 2257bded2dbSJung-uk Kim vmr $outhead,$outtail 2267bded2dbSJung-uk Kim stvx $stage,0,$out 2277bded2dbSJung-uk Kim 2287bded2dbSJung-uk Kim addi $inp,$out,15 # 15 is not typo 2297bded2dbSJung-uk Kim addi $out,$out,0x50 2307bded2dbSJung-uk Kim 2317bded2dbSJung-uk Kim li $rounds,10 2327bded2dbSJung-uk Kim b Ldone 2337bded2dbSJung-uk Kim 2347bded2dbSJung-uk Kim.align 4 2357bded2dbSJung-uk KimL192: 2367bded2dbSJung-uk Kim lvx $tmp,0,$inp 2377bded2dbSJung-uk Kim li $cnt,4 2387bded2dbSJung-uk Kim vperm $outtail,$in0,$in0,$outperm # rotate 2397bded2dbSJung-uk Kim vsel $stage,$outhead,$outtail,$outmask 2407bded2dbSJung-uk Kim vmr $outhead,$outtail 2417bded2dbSJung-uk Kim stvx $stage,0,$out 2427bded2dbSJung-uk Kim addi $out,$out,16 2437bded2dbSJung-uk Kim vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 2447bded2dbSJung-uk Kim vspltisb $key,8 # borrow $key 2457bded2dbSJung-uk Kim mtctr $cnt 2467bded2dbSJung-uk Kim vsububm $mask,$mask,$key # adjust the mask 2477bded2dbSJung-uk Kim 2487bded2dbSJung-uk KimLoop192: 2497bded2dbSJung-uk Kim vperm $key,$in1,$in1,$mask # roate-n-splat 2507bded2dbSJung-uk Kim vsldoi $tmp,$zero,$in0,12 # >>32 2517bded2dbSJung-uk Kim vcipherlast $key,$key,$rcon 2527bded2dbSJung-uk Kim 2537bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 2547bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 2557bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 2567bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 2577bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 2587bded2dbSJung-uk Kim 2597bded2dbSJung-uk Kim vsldoi $stage,$zero,$in1,8 2607bded2dbSJung-uk Kim vspltw $tmp,$in0,3 2617bded2dbSJung-uk Kim vxor $tmp,$tmp,$in1 2627bded2dbSJung-uk Kim vsldoi $in1,$zero,$in1,12 # >>32 2637bded2dbSJung-uk Kim vadduwm $rcon,$rcon,$rcon 2647bded2dbSJung-uk Kim vxor $in1,$in1,$tmp 2657bded2dbSJung-uk Kim vxor $in0,$in0,$key 2667bded2dbSJung-uk Kim vxor $in1,$in1,$key 2677bded2dbSJung-uk Kim vsldoi $stage,$stage,$in0,8 2687bded2dbSJung-uk Kim 2697bded2dbSJung-uk Kim vperm $key,$in1,$in1,$mask # rotate-n-splat 2707bded2dbSJung-uk Kim vsldoi $tmp,$zero,$in0,12 # >>32 2717bded2dbSJung-uk Kim vperm $outtail,$stage,$stage,$outperm # rotate 2727bded2dbSJung-uk Kim vsel $stage,$outhead,$outtail,$outmask 2737bded2dbSJung-uk Kim vmr $outhead,$outtail 2747bded2dbSJung-uk Kim vcipherlast $key,$key,$rcon 2757bded2dbSJung-uk Kim stvx $stage,0,$out 2767bded2dbSJung-uk Kim addi $out,$out,16 2777bded2dbSJung-uk Kim 2787bded2dbSJung-uk Kim vsldoi $stage,$in0,$in1,8 2797bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 2807bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 2817bded2dbSJung-uk Kim vperm $outtail,$stage,$stage,$outperm # rotate 2827bded2dbSJung-uk Kim vsel $stage,$outhead,$outtail,$outmask 2837bded2dbSJung-uk Kim vmr $outhead,$outtail 2847bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 2857bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 2867bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 2877bded2dbSJung-uk Kim stvx $stage,0,$out 2887bded2dbSJung-uk Kim addi $out,$out,16 2897bded2dbSJung-uk Kim 2907bded2dbSJung-uk Kim vspltw $tmp,$in0,3 2917bded2dbSJung-uk Kim vxor $tmp,$tmp,$in1 2927bded2dbSJung-uk Kim vsldoi $in1,$zero,$in1,12 # >>32 2937bded2dbSJung-uk Kim vadduwm $rcon,$rcon,$rcon 2947bded2dbSJung-uk Kim vxor $in1,$in1,$tmp 2957bded2dbSJung-uk Kim vxor $in0,$in0,$key 2967bded2dbSJung-uk Kim vxor $in1,$in1,$key 2977bded2dbSJung-uk Kim vperm $outtail,$in0,$in0,$outperm # rotate 2987bded2dbSJung-uk Kim vsel $stage,$outhead,$outtail,$outmask 2997bded2dbSJung-uk Kim vmr $outhead,$outtail 3007bded2dbSJung-uk Kim stvx $stage,0,$out 3017bded2dbSJung-uk Kim addi $inp,$out,15 # 15 is not typo 3027bded2dbSJung-uk Kim addi $out,$out,16 3037bded2dbSJung-uk Kim bdnz Loop192 3047bded2dbSJung-uk Kim 3057bded2dbSJung-uk Kim li $rounds,12 3067bded2dbSJung-uk Kim addi $out,$out,0x20 3077bded2dbSJung-uk Kim b Ldone 3087bded2dbSJung-uk Kim 3097bded2dbSJung-uk Kim.align 4 3107bded2dbSJung-uk KimL256: 3117bded2dbSJung-uk Kim lvx $tmp,0,$inp 3127bded2dbSJung-uk Kim li $cnt,7 3137bded2dbSJung-uk Kim li $rounds,14 3147bded2dbSJung-uk Kim vperm $outtail,$in0,$in0,$outperm # rotate 3157bded2dbSJung-uk Kim vsel $stage,$outhead,$outtail,$outmask 3167bded2dbSJung-uk Kim vmr $outhead,$outtail 3177bded2dbSJung-uk Kim stvx $stage,0,$out 3187bded2dbSJung-uk Kim addi $out,$out,16 3197bded2dbSJung-uk Kim vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] 3207bded2dbSJung-uk Kim mtctr $cnt 3217bded2dbSJung-uk Kim 3227bded2dbSJung-uk KimLoop256: 3237bded2dbSJung-uk Kim vperm $key,$in1,$in1,$mask # rotate-n-splat 3247bded2dbSJung-uk Kim vsldoi $tmp,$zero,$in0,12 # >>32 3257bded2dbSJung-uk Kim vperm $outtail,$in1,$in1,$outperm # rotate 3267bded2dbSJung-uk Kim vsel $stage,$outhead,$outtail,$outmask 3277bded2dbSJung-uk Kim vmr $outhead,$outtail 3287bded2dbSJung-uk Kim vcipherlast $key,$key,$rcon 3297bded2dbSJung-uk Kim stvx $stage,0,$out 3307bded2dbSJung-uk Kim addi $out,$out,16 3317bded2dbSJung-uk Kim 3327bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 3337bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 3347bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 3357bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 3367bded2dbSJung-uk Kim vxor $in0,$in0,$tmp 3377bded2dbSJung-uk Kim vadduwm $rcon,$rcon,$rcon 3387bded2dbSJung-uk Kim vxor $in0,$in0,$key 3397bded2dbSJung-uk Kim vperm $outtail,$in0,$in0,$outperm # rotate 3407bded2dbSJung-uk Kim vsel $stage,$outhead,$outtail,$outmask 3417bded2dbSJung-uk Kim vmr $outhead,$outtail 3427bded2dbSJung-uk Kim stvx $stage,0,$out 3437bded2dbSJung-uk Kim addi $inp,$out,15 # 15 is not typo 3447bded2dbSJung-uk Kim addi $out,$out,16 3457bded2dbSJung-uk Kim bdz Ldone 3467bded2dbSJung-uk Kim 3477bded2dbSJung-uk Kim vspltw $key,$in0,3 # just splat 3487bded2dbSJung-uk Kim vsldoi $tmp,$zero,$in1,12 # >>32 3497bded2dbSJung-uk Kim vsbox $key,$key 3507bded2dbSJung-uk Kim 3517bded2dbSJung-uk Kim vxor $in1,$in1,$tmp 3527bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 3537bded2dbSJung-uk Kim vxor $in1,$in1,$tmp 3547bded2dbSJung-uk Kim vsldoi $tmp,$zero,$tmp,12 # >>32 3557bded2dbSJung-uk Kim vxor $in1,$in1,$tmp 3567bded2dbSJung-uk Kim 3577bded2dbSJung-uk Kim vxor $in1,$in1,$key 3587bded2dbSJung-uk Kim b Loop256 3597bded2dbSJung-uk Kim 3607bded2dbSJung-uk Kim.align 4 3617bded2dbSJung-uk KimLdone: 3627bded2dbSJung-uk Kim lvx $in1,0,$inp # redundant in aligned case 3637bded2dbSJung-uk Kim vsel $in1,$outhead,$in1,$outmask 3647bded2dbSJung-uk Kim stvx $in1,0,$inp 3657bded2dbSJung-uk Kim li $ptr,0 3667bded2dbSJung-uk Kim mtspr 256,$vrsave 3677bded2dbSJung-uk Kim stw $rounds,0($out) 3687bded2dbSJung-uk Kim 3697bded2dbSJung-uk KimLenc_key_abort: 3707bded2dbSJung-uk Kim mr r3,$ptr 3717bded2dbSJung-uk Kim blr 3727bded2dbSJung-uk Kim .long 0 3737bded2dbSJung-uk Kim .byte 0,12,0x14,1,0,0,3,0 3747bded2dbSJung-uk Kim .long 0 3757bded2dbSJung-uk Kim.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key 3767bded2dbSJung-uk Kim 3777bded2dbSJung-uk Kim.globl .${prefix}_set_decrypt_key 3787bded2dbSJung-uk Kim.align 5 3797bded2dbSJung-uk Kim.${prefix}_set_decrypt_key: 3807bded2dbSJung-uk Kim $STU $sp,-$FRAME($sp) 3817bded2dbSJung-uk Kim mflr r10 3827bded2dbSJung-uk Kim $PUSH r10,$FRAME+$LRSAVE($sp) 3837bded2dbSJung-uk Kim bl Lset_encrypt_key 3847bded2dbSJung-uk Kim mtlr r10 3857bded2dbSJung-uk Kim 3867bded2dbSJung-uk Kim cmpwi r3,0 3877bded2dbSJung-uk Kim bne- Ldec_key_abort 3887bded2dbSJung-uk Kim 3897bded2dbSJung-uk Kim slwi $cnt,$rounds,4 3907bded2dbSJung-uk Kim subi $inp,$out,240 # first round key 3917bded2dbSJung-uk Kim srwi $rounds,$rounds,1 3927bded2dbSJung-uk Kim add $out,$inp,$cnt # last round key 3937bded2dbSJung-uk Kim mtctr $rounds 3947bded2dbSJung-uk Kim 3957bded2dbSJung-uk KimLdeckey: 3967bded2dbSJung-uk Kim lwz r0, 0($inp) 3977bded2dbSJung-uk Kim lwz r6, 4($inp) 3987bded2dbSJung-uk Kim lwz r7, 8($inp) 3997bded2dbSJung-uk Kim lwz r8, 12($inp) 4007bded2dbSJung-uk Kim addi $inp,$inp,16 4017bded2dbSJung-uk Kim lwz r9, 0($out) 4027bded2dbSJung-uk Kim lwz r10,4($out) 4037bded2dbSJung-uk Kim lwz r11,8($out) 4047bded2dbSJung-uk Kim lwz r12,12($out) 4057bded2dbSJung-uk Kim stw r0, 0($out) 4067bded2dbSJung-uk Kim stw r6, 4($out) 4077bded2dbSJung-uk Kim stw r7, 8($out) 4087bded2dbSJung-uk Kim stw r8, 12($out) 4097bded2dbSJung-uk Kim subi $out,$out,16 4107bded2dbSJung-uk Kim stw r9, -16($inp) 4117bded2dbSJung-uk Kim stw r10,-12($inp) 4127bded2dbSJung-uk Kim stw r11,-8($inp) 4137bded2dbSJung-uk Kim stw r12,-4($inp) 4147bded2dbSJung-uk Kim bdnz Ldeckey 4157bded2dbSJung-uk Kim 4167bded2dbSJung-uk Kim xor r3,r3,r3 # return value 4177bded2dbSJung-uk KimLdec_key_abort: 4187bded2dbSJung-uk Kim addi $sp,$sp,$FRAME 4197bded2dbSJung-uk Kim blr 4207bded2dbSJung-uk Kim .long 0 4217bded2dbSJung-uk Kim .byte 0,12,4,1,0x80,0,3,0 4227bded2dbSJung-uk Kim .long 0 4237bded2dbSJung-uk Kim.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key 4247bded2dbSJung-uk Kim___ 4257bded2dbSJung-uk Kim}}} 4267bded2dbSJung-uk Kim######################################################################### 4277bded2dbSJung-uk Kim{{{ # Single block en- and decrypt procedures # 4287bded2dbSJung-uk Kimsub gen_block () { 4297bded2dbSJung-uk Kimmy $dir = shift; 4307bded2dbSJung-uk Kimmy $n = $dir eq "de" ? "n" : ""; 4317bded2dbSJung-uk Kimmy ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); 4327bded2dbSJung-uk Kim 4337bded2dbSJung-uk Kim$code.=<<___; 4347bded2dbSJung-uk Kim.globl .${prefix}_${dir}crypt 4357bded2dbSJung-uk Kim.align 5 4367bded2dbSJung-uk Kim.${prefix}_${dir}crypt: 4377bded2dbSJung-uk Kim lwz $rounds,240($key) 4387bded2dbSJung-uk Kim lis r0,0xfc00 4397bded2dbSJung-uk Kim mfspr $vrsave,256 4407bded2dbSJung-uk Kim li $idx,15 # 15 is not typo 4417bded2dbSJung-uk Kim mtspr 256,r0 4427bded2dbSJung-uk Kim 4437bded2dbSJung-uk Kim lvx v0,0,$inp 4447bded2dbSJung-uk Kim neg r11,$out 4457bded2dbSJung-uk Kim lvx v1,$idx,$inp 4467bded2dbSJung-uk Kim lvsl v2,0,$inp # inpperm 4477bded2dbSJung-uk Kim le?vspltisb v4,0x0f 4487bded2dbSJung-uk Kim ?lvsl v3,0,r11 # outperm 4497bded2dbSJung-uk Kim le?vxor v2,v2,v4 4507bded2dbSJung-uk Kim li $idx,16 4517bded2dbSJung-uk Kim vperm v0,v0,v1,v2 # align [and byte swap in LE] 4527bded2dbSJung-uk Kim lvx v1,0,$key 4537bded2dbSJung-uk Kim ?lvsl v5,0,$key # keyperm 4547bded2dbSJung-uk Kim srwi $rounds,$rounds,1 4557bded2dbSJung-uk Kim lvx v2,$idx,$key 4567bded2dbSJung-uk Kim addi $idx,$idx,16 4577bded2dbSJung-uk Kim subi $rounds,$rounds,1 4587bded2dbSJung-uk Kim ?vperm v1,v1,v2,v5 # align round key 4597bded2dbSJung-uk Kim 4607bded2dbSJung-uk Kim vxor v0,v0,v1 4617bded2dbSJung-uk Kim lvx v1,$idx,$key 4627bded2dbSJung-uk Kim addi $idx,$idx,16 4637bded2dbSJung-uk Kim mtctr $rounds 4647bded2dbSJung-uk Kim 4657bded2dbSJung-uk KimLoop_${dir}c: 4667bded2dbSJung-uk Kim ?vperm v2,v2,v1,v5 4677bded2dbSJung-uk Kim v${n}cipher v0,v0,v2 4687bded2dbSJung-uk Kim lvx v2,$idx,$key 4697bded2dbSJung-uk Kim addi $idx,$idx,16 4707bded2dbSJung-uk Kim ?vperm v1,v1,v2,v5 4717bded2dbSJung-uk Kim v${n}cipher v0,v0,v1 4727bded2dbSJung-uk Kim lvx v1,$idx,$key 4737bded2dbSJung-uk Kim addi $idx,$idx,16 4747bded2dbSJung-uk Kim bdnz Loop_${dir}c 4757bded2dbSJung-uk Kim 4767bded2dbSJung-uk Kim ?vperm v2,v2,v1,v5 4777bded2dbSJung-uk Kim v${n}cipher v0,v0,v2 4787bded2dbSJung-uk Kim lvx v2,$idx,$key 4797bded2dbSJung-uk Kim ?vperm v1,v1,v2,v5 4807bded2dbSJung-uk Kim v${n}cipherlast v0,v0,v1 4817bded2dbSJung-uk Kim 4827bded2dbSJung-uk Kim vspltisb v2,-1 4837bded2dbSJung-uk Kim vxor v1,v1,v1 4847bded2dbSJung-uk Kim li $idx,15 # 15 is not typo 4857bded2dbSJung-uk Kim ?vperm v2,v1,v2,v3 # outmask 4867bded2dbSJung-uk Kim le?vxor v3,v3,v4 4877bded2dbSJung-uk Kim lvx v1,0,$out # outhead 4887bded2dbSJung-uk Kim vperm v0,v0,v0,v3 # rotate [and byte swap in LE] 4897bded2dbSJung-uk Kim vsel v1,v1,v0,v2 4907bded2dbSJung-uk Kim lvx v4,$idx,$out 4917bded2dbSJung-uk Kim stvx v1,0,$out 4927bded2dbSJung-uk Kim vsel v0,v0,v4,v2 4937bded2dbSJung-uk Kim stvx v0,$idx,$out 4947bded2dbSJung-uk Kim 4957bded2dbSJung-uk Kim mtspr 256,$vrsave 4967bded2dbSJung-uk Kim blr 4977bded2dbSJung-uk Kim .long 0 4987bded2dbSJung-uk Kim .byte 0,12,0x14,0,0,0,3,0 4997bded2dbSJung-uk Kim .long 0 5007bded2dbSJung-uk Kim.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt 5017bded2dbSJung-uk Kim___ 5027bded2dbSJung-uk Kim} 5037bded2dbSJung-uk Kim&gen_block("en"); 5047bded2dbSJung-uk Kim&gen_block("de"); 5057bded2dbSJung-uk Kim}}} 5067bded2dbSJung-uk Kim######################################################################### 5077bded2dbSJung-uk Kim{{{ # CBC en- and decrypt procedures # 5087bded2dbSJung-uk Kimmy ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); 5097bded2dbSJung-uk Kimmy ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 5107bded2dbSJung-uk Kimmy ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= 5117bded2dbSJung-uk Kim map("v$_",(4..10)); 5127bded2dbSJung-uk Kim$code.=<<___; 5137bded2dbSJung-uk Kim.globl .${prefix}_cbc_encrypt 5147bded2dbSJung-uk Kim.align 5 5157bded2dbSJung-uk Kim.${prefix}_cbc_encrypt: 5167bded2dbSJung-uk Kim ${UCMP}i $len,16 5177bded2dbSJung-uk Kim bltlr- 5187bded2dbSJung-uk Kim 5197bded2dbSJung-uk Kim cmpwi $enc,0 # test direction 5207bded2dbSJung-uk Kim lis r0,0xffe0 5217bded2dbSJung-uk Kim mfspr $vrsave,256 5227bded2dbSJung-uk Kim mtspr 256,r0 5237bded2dbSJung-uk Kim 5247bded2dbSJung-uk Kim li $idx,15 5257bded2dbSJung-uk Kim vxor $rndkey0,$rndkey0,$rndkey0 5267bded2dbSJung-uk Kim le?vspltisb $tmp,0x0f 5277bded2dbSJung-uk Kim 5287bded2dbSJung-uk Kim lvx $ivec,0,$ivp # load [unaligned] iv 5297bded2dbSJung-uk Kim lvsl $inpperm,0,$ivp 5307bded2dbSJung-uk Kim lvx $inptail,$idx,$ivp 5317bded2dbSJung-uk Kim le?vxor $inpperm,$inpperm,$tmp 5327bded2dbSJung-uk Kim vperm $ivec,$ivec,$inptail,$inpperm 5337bded2dbSJung-uk Kim 5347bded2dbSJung-uk Kim neg r11,$inp 5357bded2dbSJung-uk Kim ?lvsl $keyperm,0,$key # prepare for unaligned key 5367bded2dbSJung-uk Kim lwz $rounds,240($key) 5377bded2dbSJung-uk Kim 5387bded2dbSJung-uk Kim lvsr $inpperm,0,r11 # prepare for unaligned load 5397bded2dbSJung-uk Kim lvx $inptail,0,$inp 5407bded2dbSJung-uk Kim addi $inp,$inp,15 # 15 is not typo 5417bded2dbSJung-uk Kim le?vxor $inpperm,$inpperm,$tmp 5427bded2dbSJung-uk Kim 5437bded2dbSJung-uk Kim ?lvsr $outperm,0,$out # prepare for unaligned store 5447bded2dbSJung-uk Kim vspltisb $outmask,-1 5457bded2dbSJung-uk Kim lvx $outhead,0,$out 5467bded2dbSJung-uk Kim ?vperm $outmask,$rndkey0,$outmask,$outperm 5477bded2dbSJung-uk Kim le?vxor $outperm,$outperm,$tmp 5487bded2dbSJung-uk Kim 5497bded2dbSJung-uk Kim srwi $rounds,$rounds,1 5507bded2dbSJung-uk Kim li $idx,16 5517bded2dbSJung-uk Kim subi $rounds,$rounds,1 5527bded2dbSJung-uk Kim beq Lcbc_dec 5537bded2dbSJung-uk Kim 5547bded2dbSJung-uk KimLcbc_enc: 5557bded2dbSJung-uk Kim vmr $inout,$inptail 5567bded2dbSJung-uk Kim lvx $inptail,0,$inp 5577bded2dbSJung-uk Kim addi $inp,$inp,16 5587bded2dbSJung-uk Kim mtctr $rounds 5597bded2dbSJung-uk Kim subi $len,$len,16 # len-=16 5607bded2dbSJung-uk Kim 5617bded2dbSJung-uk Kim lvx $rndkey0,0,$key 5627bded2dbSJung-uk Kim vperm $inout,$inout,$inptail,$inpperm 5637bded2dbSJung-uk Kim lvx $rndkey1,$idx,$key 5647bded2dbSJung-uk Kim addi $idx,$idx,16 5657bded2dbSJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 5667bded2dbSJung-uk Kim vxor $inout,$inout,$rndkey0 5677bded2dbSJung-uk Kim lvx $rndkey0,$idx,$key 5687bded2dbSJung-uk Kim addi $idx,$idx,16 5697bded2dbSJung-uk Kim vxor $inout,$inout,$ivec 5707bded2dbSJung-uk Kim 5717bded2dbSJung-uk KimLoop_cbc_enc: 5727bded2dbSJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 5737bded2dbSJung-uk Kim vcipher $inout,$inout,$rndkey1 5747bded2dbSJung-uk Kim lvx $rndkey1,$idx,$key 5757bded2dbSJung-uk Kim addi $idx,$idx,16 5767bded2dbSJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 5777bded2dbSJung-uk Kim vcipher $inout,$inout,$rndkey0 5787bded2dbSJung-uk Kim lvx $rndkey0,$idx,$key 5797bded2dbSJung-uk Kim addi $idx,$idx,16 5807bded2dbSJung-uk Kim bdnz Loop_cbc_enc 5817bded2dbSJung-uk Kim 5827bded2dbSJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 5837bded2dbSJung-uk Kim vcipher $inout,$inout,$rndkey1 5847bded2dbSJung-uk Kim lvx $rndkey1,$idx,$key 5857bded2dbSJung-uk Kim li $idx,16 5867bded2dbSJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 5877bded2dbSJung-uk Kim vcipherlast $ivec,$inout,$rndkey0 5887bded2dbSJung-uk Kim ${UCMP}i $len,16 5897bded2dbSJung-uk Kim 5907bded2dbSJung-uk Kim vperm $tmp,$ivec,$ivec,$outperm 5917bded2dbSJung-uk Kim vsel $inout,$outhead,$tmp,$outmask 5927bded2dbSJung-uk Kim vmr $outhead,$tmp 5937bded2dbSJung-uk Kim stvx $inout,0,$out 5947bded2dbSJung-uk Kim addi $out,$out,16 5957bded2dbSJung-uk Kim bge Lcbc_enc 5967bded2dbSJung-uk Kim 5977bded2dbSJung-uk Kim b Lcbc_done 5987bded2dbSJung-uk Kim 5997bded2dbSJung-uk Kim.align 4 6007bded2dbSJung-uk KimLcbc_dec: 6017bded2dbSJung-uk Kim ${UCMP}i $len,128 6027bded2dbSJung-uk Kim bge _aesp8_cbc_decrypt8x 6037bded2dbSJung-uk Kim vmr $tmp,$inptail 6047bded2dbSJung-uk Kim lvx $inptail,0,$inp 6057bded2dbSJung-uk Kim addi $inp,$inp,16 6067bded2dbSJung-uk Kim mtctr $rounds 6077bded2dbSJung-uk Kim subi $len,$len,16 # len-=16 6087bded2dbSJung-uk Kim 6097bded2dbSJung-uk Kim lvx $rndkey0,0,$key 6107bded2dbSJung-uk Kim vperm $tmp,$tmp,$inptail,$inpperm 6117bded2dbSJung-uk Kim lvx $rndkey1,$idx,$key 6127bded2dbSJung-uk Kim addi $idx,$idx,16 6137bded2dbSJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 6147bded2dbSJung-uk Kim vxor $inout,$tmp,$rndkey0 6157bded2dbSJung-uk Kim lvx $rndkey0,$idx,$key 6167bded2dbSJung-uk Kim addi $idx,$idx,16 6177bded2dbSJung-uk Kim 6187bded2dbSJung-uk KimLoop_cbc_dec: 6197bded2dbSJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 6207bded2dbSJung-uk Kim vncipher $inout,$inout,$rndkey1 6217bded2dbSJung-uk Kim lvx $rndkey1,$idx,$key 6227bded2dbSJung-uk Kim addi $idx,$idx,16 6237bded2dbSJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 6247bded2dbSJung-uk Kim vncipher $inout,$inout,$rndkey0 6257bded2dbSJung-uk Kim lvx $rndkey0,$idx,$key 6267bded2dbSJung-uk Kim addi $idx,$idx,16 6277bded2dbSJung-uk Kim bdnz Loop_cbc_dec 6287bded2dbSJung-uk Kim 6297bded2dbSJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 6307bded2dbSJung-uk Kim vncipher $inout,$inout,$rndkey1 6317bded2dbSJung-uk Kim lvx $rndkey1,$idx,$key 6327bded2dbSJung-uk Kim li $idx,16 6337bded2dbSJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 6347bded2dbSJung-uk Kim vncipherlast $inout,$inout,$rndkey0 6357bded2dbSJung-uk Kim ${UCMP}i $len,16 6367bded2dbSJung-uk Kim 6377bded2dbSJung-uk Kim vxor $inout,$inout,$ivec 6387bded2dbSJung-uk Kim vmr $ivec,$tmp 6397bded2dbSJung-uk Kim vperm $tmp,$inout,$inout,$outperm 6407bded2dbSJung-uk Kim vsel $inout,$outhead,$tmp,$outmask 6417bded2dbSJung-uk Kim vmr $outhead,$tmp 6427bded2dbSJung-uk Kim stvx $inout,0,$out 6437bded2dbSJung-uk Kim addi $out,$out,16 6447bded2dbSJung-uk Kim bge Lcbc_dec 6457bded2dbSJung-uk Kim 6467bded2dbSJung-uk KimLcbc_done: 6477bded2dbSJung-uk Kim addi $out,$out,-1 6487bded2dbSJung-uk Kim lvx $inout,0,$out # redundant in aligned case 6497bded2dbSJung-uk Kim vsel $inout,$outhead,$inout,$outmask 6507bded2dbSJung-uk Kim stvx $inout,0,$out 6517bded2dbSJung-uk Kim 6527bded2dbSJung-uk Kim neg $enc,$ivp # write [unaligned] iv 6537bded2dbSJung-uk Kim li $idx,15 # 15 is not typo 6547bded2dbSJung-uk Kim vxor $rndkey0,$rndkey0,$rndkey0 6557bded2dbSJung-uk Kim vspltisb $outmask,-1 6567bded2dbSJung-uk Kim le?vspltisb $tmp,0x0f 6577bded2dbSJung-uk Kim ?lvsl $outperm,0,$enc 6587bded2dbSJung-uk Kim ?vperm $outmask,$rndkey0,$outmask,$outperm 6597bded2dbSJung-uk Kim le?vxor $outperm,$outperm,$tmp 6607bded2dbSJung-uk Kim lvx $outhead,0,$ivp 6617bded2dbSJung-uk Kim vperm $ivec,$ivec,$ivec,$outperm 6627bded2dbSJung-uk Kim vsel $inout,$outhead,$ivec,$outmask 6637bded2dbSJung-uk Kim lvx $inptail,$idx,$ivp 6647bded2dbSJung-uk Kim stvx $inout,0,$ivp 6657bded2dbSJung-uk Kim vsel $inout,$ivec,$inptail,$outmask 6667bded2dbSJung-uk Kim stvx $inout,$idx,$ivp 6677bded2dbSJung-uk Kim 6687bded2dbSJung-uk Kim mtspr 256,$vrsave 6697bded2dbSJung-uk Kim blr 6707bded2dbSJung-uk Kim .long 0 6717bded2dbSJung-uk Kim .byte 0,12,0x14,0,0,0,6,0 6727bded2dbSJung-uk Kim .long 0 6737bded2dbSJung-uk Kim___ 6747bded2dbSJung-uk Kim######################################################################### 6757bded2dbSJung-uk Kim{{ # Optimized CBC decrypt procedure # 6767bded2dbSJung-uk Kimmy $key_="r11"; 6777bded2dbSJung-uk Kimmy ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 6787bded2dbSJung-uk Kim $x00=0 if ($flavour =~ /osx/); 6797bded2dbSJung-uk Kimmy ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); 6807bded2dbSJung-uk Kimmy ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); 6817bded2dbSJung-uk Kimmy $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 6827bded2dbSJung-uk Kim # v26-v31 last 6 round keys 6837bded2dbSJung-uk Kimmy ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 6847bded2dbSJung-uk Kim 6857bded2dbSJung-uk Kim$code.=<<___; 6867bded2dbSJung-uk Kim.align 5 6877bded2dbSJung-uk Kim_aesp8_cbc_decrypt8x: 6887bded2dbSJung-uk Kim $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 6897bded2dbSJung-uk Kim li r10,`$FRAME+8*16+15` 6907bded2dbSJung-uk Kim li r11,`$FRAME+8*16+31` 6917bded2dbSJung-uk Kim stvx v20,r10,$sp # ABI says so 6927bded2dbSJung-uk Kim addi r10,r10,32 6937bded2dbSJung-uk Kim stvx v21,r11,$sp 6947bded2dbSJung-uk Kim addi r11,r11,32 6957bded2dbSJung-uk Kim stvx v22,r10,$sp 6967bded2dbSJung-uk Kim addi r10,r10,32 6977bded2dbSJung-uk Kim stvx v23,r11,$sp 6987bded2dbSJung-uk Kim addi r11,r11,32 6997bded2dbSJung-uk Kim stvx v24,r10,$sp 7007bded2dbSJung-uk Kim addi r10,r10,32 7017bded2dbSJung-uk Kim stvx v25,r11,$sp 7027bded2dbSJung-uk Kim addi r11,r11,32 7037bded2dbSJung-uk Kim stvx v26,r10,$sp 7047bded2dbSJung-uk Kim addi r10,r10,32 7057bded2dbSJung-uk Kim stvx v27,r11,$sp 7067bded2dbSJung-uk Kim addi r11,r11,32 7077bded2dbSJung-uk Kim stvx v28,r10,$sp 7087bded2dbSJung-uk Kim addi r10,r10,32 7097bded2dbSJung-uk Kim stvx v29,r11,$sp 7107bded2dbSJung-uk Kim addi r11,r11,32 7117bded2dbSJung-uk Kim stvx v30,r10,$sp 7127bded2dbSJung-uk Kim stvx v31,r11,$sp 7137bded2dbSJung-uk Kim li r0,-1 7147bded2dbSJung-uk Kim stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 7157bded2dbSJung-uk Kim li $x10,0x10 7167bded2dbSJung-uk Kim $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 7177bded2dbSJung-uk Kim li $x20,0x20 7187bded2dbSJung-uk Kim $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 7197bded2dbSJung-uk Kim li $x30,0x30 7207bded2dbSJung-uk Kim $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 7217bded2dbSJung-uk Kim li $x40,0x40 7227bded2dbSJung-uk Kim $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 7237bded2dbSJung-uk Kim li $x50,0x50 7247bded2dbSJung-uk Kim $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 7257bded2dbSJung-uk Kim li $x60,0x60 7267bded2dbSJung-uk Kim $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 7277bded2dbSJung-uk Kim li $x70,0x70 7287bded2dbSJung-uk Kim mtspr 256,r0 7297bded2dbSJung-uk Kim 7307bded2dbSJung-uk Kim subi $rounds,$rounds,3 # -4 in total 7317bded2dbSJung-uk Kim subi $len,$len,128 # bias 7327bded2dbSJung-uk Kim 7337bded2dbSJung-uk Kim lvx $rndkey0,$x00,$key # load key schedule 7347bded2dbSJung-uk Kim lvx v30,$x10,$key 7357bded2dbSJung-uk Kim addi $key,$key,0x20 7367bded2dbSJung-uk Kim lvx v31,$x00,$key 7377bded2dbSJung-uk Kim ?vperm $rndkey0,$rndkey0,v30,$keyperm 7387bded2dbSJung-uk Kim addi $key_,$sp,$FRAME+15 7397bded2dbSJung-uk Kim mtctr $rounds 7407bded2dbSJung-uk Kim 7417bded2dbSJung-uk KimLoad_cbc_dec_key: 7427bded2dbSJung-uk Kim ?vperm v24,v30,v31,$keyperm 7437bded2dbSJung-uk Kim lvx v30,$x10,$key 7447bded2dbSJung-uk Kim addi $key,$key,0x20 7457bded2dbSJung-uk Kim stvx v24,$x00,$key_ # off-load round[1] 7467bded2dbSJung-uk Kim ?vperm v25,v31,v30,$keyperm 7477bded2dbSJung-uk Kim lvx v31,$x00,$key 7487bded2dbSJung-uk Kim stvx v25,$x10,$key_ # off-load round[2] 7497bded2dbSJung-uk Kim addi $key_,$key_,0x20 7507bded2dbSJung-uk Kim bdnz Load_cbc_dec_key 7517bded2dbSJung-uk Kim 7527bded2dbSJung-uk Kim lvx v26,$x10,$key 7537bded2dbSJung-uk Kim ?vperm v24,v30,v31,$keyperm 7547bded2dbSJung-uk Kim lvx v27,$x20,$key 7557bded2dbSJung-uk Kim stvx v24,$x00,$key_ # off-load round[3] 7567bded2dbSJung-uk Kim ?vperm v25,v31,v26,$keyperm 7577bded2dbSJung-uk Kim lvx v28,$x30,$key 7587bded2dbSJung-uk Kim stvx v25,$x10,$key_ # off-load round[4] 7597bded2dbSJung-uk Kim addi $key_,$sp,$FRAME+15 # rewind $key_ 7607bded2dbSJung-uk Kim ?vperm v26,v26,v27,$keyperm 7617bded2dbSJung-uk Kim lvx v29,$x40,$key 7627bded2dbSJung-uk Kim ?vperm v27,v27,v28,$keyperm 7637bded2dbSJung-uk Kim lvx v30,$x50,$key 7647bded2dbSJung-uk Kim ?vperm v28,v28,v29,$keyperm 7657bded2dbSJung-uk Kim lvx v31,$x60,$key 7667bded2dbSJung-uk Kim ?vperm v29,v29,v30,$keyperm 7677bded2dbSJung-uk Kim lvx $out0,$x70,$key # borrow $out0 7687bded2dbSJung-uk Kim ?vperm v30,v30,v31,$keyperm 7697bded2dbSJung-uk Kim lvx v24,$x00,$key_ # pre-load round[1] 7707bded2dbSJung-uk Kim ?vperm v31,v31,$out0,$keyperm 7717bded2dbSJung-uk Kim lvx v25,$x10,$key_ # pre-load round[2] 7727bded2dbSJung-uk Kim 7737bded2dbSJung-uk Kim #lvx $inptail,0,$inp # "caller" already did this 7747bded2dbSJung-uk Kim #addi $inp,$inp,15 # 15 is not typo 7757bded2dbSJung-uk Kim subi $inp,$inp,15 # undo "caller" 7767bded2dbSJung-uk Kim 7777bded2dbSJung-uk Kim le?li $idx,8 7787bded2dbSJung-uk Kim lvx_u $in0,$x00,$inp # load first 8 "words" 7797bded2dbSJung-uk Kim le?lvsl $inpperm,0,$idx 7807bded2dbSJung-uk Kim le?vspltisb $tmp,0x0f 7817bded2dbSJung-uk Kim lvx_u $in1,$x10,$inp 7827bded2dbSJung-uk Kim le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 7837bded2dbSJung-uk Kim lvx_u $in2,$x20,$inp 7847bded2dbSJung-uk Kim le?vperm $in0,$in0,$in0,$inpperm 7857bded2dbSJung-uk Kim lvx_u $in3,$x30,$inp 7867bded2dbSJung-uk Kim le?vperm $in1,$in1,$in1,$inpperm 7877bded2dbSJung-uk Kim lvx_u $in4,$x40,$inp 7887bded2dbSJung-uk Kim le?vperm $in2,$in2,$in2,$inpperm 7897bded2dbSJung-uk Kim vxor $out0,$in0,$rndkey0 7907bded2dbSJung-uk Kim lvx_u $in5,$x50,$inp 7917bded2dbSJung-uk Kim le?vperm $in3,$in3,$in3,$inpperm 7927bded2dbSJung-uk Kim vxor $out1,$in1,$rndkey0 7937bded2dbSJung-uk Kim lvx_u $in6,$x60,$inp 7947bded2dbSJung-uk Kim le?vperm $in4,$in4,$in4,$inpperm 7957bded2dbSJung-uk Kim vxor $out2,$in2,$rndkey0 7967bded2dbSJung-uk Kim lvx_u $in7,$x70,$inp 7977bded2dbSJung-uk Kim addi $inp,$inp,0x80 7987bded2dbSJung-uk Kim le?vperm $in5,$in5,$in5,$inpperm 7997bded2dbSJung-uk Kim vxor $out3,$in3,$rndkey0 8007bded2dbSJung-uk Kim le?vperm $in6,$in6,$in6,$inpperm 8017bded2dbSJung-uk Kim vxor $out4,$in4,$rndkey0 8027bded2dbSJung-uk Kim le?vperm $in7,$in7,$in7,$inpperm 8037bded2dbSJung-uk Kim vxor $out5,$in5,$rndkey0 8047bded2dbSJung-uk Kim vxor $out6,$in6,$rndkey0 8057bded2dbSJung-uk Kim vxor $out7,$in7,$rndkey0 8067bded2dbSJung-uk Kim 8077bded2dbSJung-uk Kim mtctr $rounds 8087bded2dbSJung-uk Kim b Loop_cbc_dec8x 8097bded2dbSJung-uk Kim.align 5 8107bded2dbSJung-uk KimLoop_cbc_dec8x: 8117bded2dbSJung-uk Kim vncipher $out0,$out0,v24 8127bded2dbSJung-uk Kim vncipher $out1,$out1,v24 8137bded2dbSJung-uk Kim vncipher $out2,$out2,v24 8147bded2dbSJung-uk Kim vncipher $out3,$out3,v24 8157bded2dbSJung-uk Kim vncipher $out4,$out4,v24 8167bded2dbSJung-uk Kim vncipher $out5,$out5,v24 8177bded2dbSJung-uk Kim vncipher $out6,$out6,v24 8187bded2dbSJung-uk Kim vncipher $out7,$out7,v24 8197bded2dbSJung-uk Kim lvx v24,$x20,$key_ # round[3] 8207bded2dbSJung-uk Kim addi $key_,$key_,0x20 8217bded2dbSJung-uk Kim 8227bded2dbSJung-uk Kim vncipher $out0,$out0,v25 8237bded2dbSJung-uk Kim vncipher $out1,$out1,v25 8247bded2dbSJung-uk Kim vncipher $out2,$out2,v25 8257bded2dbSJung-uk Kim vncipher $out3,$out3,v25 8267bded2dbSJung-uk Kim vncipher $out4,$out4,v25 8277bded2dbSJung-uk Kim vncipher $out5,$out5,v25 8287bded2dbSJung-uk Kim vncipher $out6,$out6,v25 8297bded2dbSJung-uk Kim vncipher $out7,$out7,v25 8307bded2dbSJung-uk Kim lvx v25,$x10,$key_ # round[4] 8317bded2dbSJung-uk Kim bdnz Loop_cbc_dec8x 8327bded2dbSJung-uk Kim 8337bded2dbSJung-uk Kim subic $len,$len,128 # $len-=128 8347bded2dbSJung-uk Kim vncipher $out0,$out0,v24 8357bded2dbSJung-uk Kim vncipher $out1,$out1,v24 8367bded2dbSJung-uk Kim vncipher $out2,$out2,v24 8377bded2dbSJung-uk Kim vncipher $out3,$out3,v24 8387bded2dbSJung-uk Kim vncipher $out4,$out4,v24 8397bded2dbSJung-uk Kim vncipher $out5,$out5,v24 8407bded2dbSJung-uk Kim vncipher $out6,$out6,v24 8417bded2dbSJung-uk Kim vncipher $out7,$out7,v24 8427bded2dbSJung-uk Kim 8437bded2dbSJung-uk Kim subfe. r0,r0,r0 # borrow?-1:0 8447bded2dbSJung-uk Kim vncipher $out0,$out0,v25 8457bded2dbSJung-uk Kim vncipher $out1,$out1,v25 8467bded2dbSJung-uk Kim vncipher $out2,$out2,v25 8477bded2dbSJung-uk Kim vncipher $out3,$out3,v25 8487bded2dbSJung-uk Kim vncipher $out4,$out4,v25 8497bded2dbSJung-uk Kim vncipher $out5,$out5,v25 8507bded2dbSJung-uk Kim vncipher $out6,$out6,v25 8517bded2dbSJung-uk Kim vncipher $out7,$out7,v25 8527bded2dbSJung-uk Kim 8537bded2dbSJung-uk Kim and r0,r0,$len 8547bded2dbSJung-uk Kim vncipher $out0,$out0,v26 8557bded2dbSJung-uk Kim vncipher $out1,$out1,v26 8567bded2dbSJung-uk Kim vncipher $out2,$out2,v26 8577bded2dbSJung-uk Kim vncipher $out3,$out3,v26 8587bded2dbSJung-uk Kim vncipher $out4,$out4,v26 8597bded2dbSJung-uk Kim vncipher $out5,$out5,v26 8607bded2dbSJung-uk Kim vncipher $out6,$out6,v26 8617bded2dbSJung-uk Kim vncipher $out7,$out7,v26 8627bded2dbSJung-uk Kim 8637bded2dbSJung-uk Kim add $inp,$inp,r0 # $inp is adjusted in such 8647bded2dbSJung-uk Kim # way that at exit from the 8657bded2dbSJung-uk Kim # loop inX-in7 are loaded 8667bded2dbSJung-uk Kim # with last "words" 8677bded2dbSJung-uk Kim vncipher $out0,$out0,v27 8687bded2dbSJung-uk Kim vncipher $out1,$out1,v27 8697bded2dbSJung-uk Kim vncipher $out2,$out2,v27 8707bded2dbSJung-uk Kim vncipher $out3,$out3,v27 8717bded2dbSJung-uk Kim vncipher $out4,$out4,v27 8727bded2dbSJung-uk Kim vncipher $out5,$out5,v27 8737bded2dbSJung-uk Kim vncipher $out6,$out6,v27 8747bded2dbSJung-uk Kim vncipher $out7,$out7,v27 8757bded2dbSJung-uk Kim 8767bded2dbSJung-uk Kim addi $key_,$sp,$FRAME+15 # rewind $key_ 8777bded2dbSJung-uk Kim vncipher $out0,$out0,v28 8787bded2dbSJung-uk Kim vncipher $out1,$out1,v28 8797bded2dbSJung-uk Kim vncipher $out2,$out2,v28 8807bded2dbSJung-uk Kim vncipher $out3,$out3,v28 8817bded2dbSJung-uk Kim vncipher $out4,$out4,v28 8827bded2dbSJung-uk Kim vncipher $out5,$out5,v28 8837bded2dbSJung-uk Kim vncipher $out6,$out6,v28 8847bded2dbSJung-uk Kim vncipher $out7,$out7,v28 8857bded2dbSJung-uk Kim lvx v24,$x00,$key_ # re-pre-load round[1] 8867bded2dbSJung-uk Kim 8877bded2dbSJung-uk Kim vncipher $out0,$out0,v29 8887bded2dbSJung-uk Kim vncipher $out1,$out1,v29 8897bded2dbSJung-uk Kim vncipher $out2,$out2,v29 8907bded2dbSJung-uk Kim vncipher $out3,$out3,v29 8917bded2dbSJung-uk Kim vncipher $out4,$out4,v29 8927bded2dbSJung-uk Kim vncipher $out5,$out5,v29 8937bded2dbSJung-uk Kim vncipher $out6,$out6,v29 8947bded2dbSJung-uk Kim vncipher $out7,$out7,v29 8957bded2dbSJung-uk Kim lvx v25,$x10,$key_ # re-pre-load round[2] 8967bded2dbSJung-uk Kim 8977bded2dbSJung-uk Kim vncipher $out0,$out0,v30 8987bded2dbSJung-uk Kim vxor $ivec,$ivec,v31 # xor with last round key 8997bded2dbSJung-uk Kim vncipher $out1,$out1,v30 9007bded2dbSJung-uk Kim vxor $in0,$in0,v31 9017bded2dbSJung-uk Kim vncipher $out2,$out2,v30 9027bded2dbSJung-uk Kim vxor $in1,$in1,v31 9037bded2dbSJung-uk Kim vncipher $out3,$out3,v30 9047bded2dbSJung-uk Kim vxor $in2,$in2,v31 9057bded2dbSJung-uk Kim vncipher $out4,$out4,v30 9067bded2dbSJung-uk Kim vxor $in3,$in3,v31 9077bded2dbSJung-uk Kim vncipher $out5,$out5,v30 9087bded2dbSJung-uk Kim vxor $in4,$in4,v31 9097bded2dbSJung-uk Kim vncipher $out6,$out6,v30 9107bded2dbSJung-uk Kim vxor $in5,$in5,v31 9117bded2dbSJung-uk Kim vncipher $out7,$out7,v30 9127bded2dbSJung-uk Kim vxor $in6,$in6,v31 9137bded2dbSJung-uk Kim 9147bded2dbSJung-uk Kim vncipherlast $out0,$out0,$ivec 9157bded2dbSJung-uk Kim vncipherlast $out1,$out1,$in0 9167bded2dbSJung-uk Kim lvx_u $in0,$x00,$inp # load next input block 9177bded2dbSJung-uk Kim vncipherlast $out2,$out2,$in1 9187bded2dbSJung-uk Kim lvx_u $in1,$x10,$inp 9197bded2dbSJung-uk Kim vncipherlast $out3,$out3,$in2 9207bded2dbSJung-uk Kim le?vperm $in0,$in0,$in0,$inpperm 9217bded2dbSJung-uk Kim lvx_u $in2,$x20,$inp 9227bded2dbSJung-uk Kim vncipherlast $out4,$out4,$in3 9237bded2dbSJung-uk Kim le?vperm $in1,$in1,$in1,$inpperm 9247bded2dbSJung-uk Kim lvx_u $in3,$x30,$inp 9257bded2dbSJung-uk Kim vncipherlast $out5,$out5,$in4 9267bded2dbSJung-uk Kim le?vperm $in2,$in2,$in2,$inpperm 9277bded2dbSJung-uk Kim lvx_u $in4,$x40,$inp 9287bded2dbSJung-uk Kim vncipherlast $out6,$out6,$in5 9297bded2dbSJung-uk Kim le?vperm $in3,$in3,$in3,$inpperm 9307bded2dbSJung-uk Kim lvx_u $in5,$x50,$inp 9317bded2dbSJung-uk Kim vncipherlast $out7,$out7,$in6 9327bded2dbSJung-uk Kim le?vperm $in4,$in4,$in4,$inpperm 9337bded2dbSJung-uk Kim lvx_u $in6,$x60,$inp 9347bded2dbSJung-uk Kim vmr $ivec,$in7 9357bded2dbSJung-uk Kim le?vperm $in5,$in5,$in5,$inpperm 9367bded2dbSJung-uk Kim lvx_u $in7,$x70,$inp 9377bded2dbSJung-uk Kim addi $inp,$inp,0x80 9387bded2dbSJung-uk Kim 9397bded2dbSJung-uk Kim le?vperm $out0,$out0,$out0,$inpperm 9407bded2dbSJung-uk Kim le?vperm $out1,$out1,$out1,$inpperm 9417bded2dbSJung-uk Kim stvx_u $out0,$x00,$out 9427bded2dbSJung-uk Kim le?vperm $in6,$in6,$in6,$inpperm 9437bded2dbSJung-uk Kim vxor $out0,$in0,$rndkey0 9447bded2dbSJung-uk Kim le?vperm $out2,$out2,$out2,$inpperm 9457bded2dbSJung-uk Kim stvx_u $out1,$x10,$out 9467bded2dbSJung-uk Kim le?vperm $in7,$in7,$in7,$inpperm 9477bded2dbSJung-uk Kim vxor $out1,$in1,$rndkey0 9487bded2dbSJung-uk Kim le?vperm $out3,$out3,$out3,$inpperm 9497bded2dbSJung-uk Kim stvx_u $out2,$x20,$out 9507bded2dbSJung-uk Kim vxor $out2,$in2,$rndkey0 9517bded2dbSJung-uk Kim le?vperm $out4,$out4,$out4,$inpperm 9527bded2dbSJung-uk Kim stvx_u $out3,$x30,$out 9537bded2dbSJung-uk Kim vxor $out3,$in3,$rndkey0 9547bded2dbSJung-uk Kim le?vperm $out5,$out5,$out5,$inpperm 9557bded2dbSJung-uk Kim stvx_u $out4,$x40,$out 9567bded2dbSJung-uk Kim vxor $out4,$in4,$rndkey0 9577bded2dbSJung-uk Kim le?vperm $out6,$out6,$out6,$inpperm 9587bded2dbSJung-uk Kim stvx_u $out5,$x50,$out 9597bded2dbSJung-uk Kim vxor $out5,$in5,$rndkey0 9607bded2dbSJung-uk Kim le?vperm $out7,$out7,$out7,$inpperm 9617bded2dbSJung-uk Kim stvx_u $out6,$x60,$out 9627bded2dbSJung-uk Kim vxor $out6,$in6,$rndkey0 9637bded2dbSJung-uk Kim stvx_u $out7,$x70,$out 9647bded2dbSJung-uk Kim addi $out,$out,0x80 9657bded2dbSJung-uk Kim vxor $out7,$in7,$rndkey0 9667bded2dbSJung-uk Kim 9677bded2dbSJung-uk Kim mtctr $rounds 9687bded2dbSJung-uk Kim beq Loop_cbc_dec8x # did $len-=128 borrow? 9697bded2dbSJung-uk Kim 9707bded2dbSJung-uk Kim addic. $len,$len,128 9717bded2dbSJung-uk Kim beq Lcbc_dec8x_done 9727bded2dbSJung-uk Kim nop 9737bded2dbSJung-uk Kim nop 9747bded2dbSJung-uk Kim 9757bded2dbSJung-uk KimLoop_cbc_dec8x_tail: # up to 7 "words" tail... 9767bded2dbSJung-uk Kim vncipher $out1,$out1,v24 9777bded2dbSJung-uk Kim vncipher $out2,$out2,v24 9787bded2dbSJung-uk Kim vncipher $out3,$out3,v24 9797bded2dbSJung-uk Kim vncipher $out4,$out4,v24 9807bded2dbSJung-uk Kim vncipher $out5,$out5,v24 9817bded2dbSJung-uk Kim vncipher $out6,$out6,v24 9827bded2dbSJung-uk Kim vncipher $out7,$out7,v24 9837bded2dbSJung-uk Kim lvx v24,$x20,$key_ # round[3] 9847bded2dbSJung-uk Kim addi $key_,$key_,0x20 9857bded2dbSJung-uk Kim 9867bded2dbSJung-uk Kim vncipher $out1,$out1,v25 9877bded2dbSJung-uk Kim vncipher $out2,$out2,v25 9887bded2dbSJung-uk Kim vncipher $out3,$out3,v25 9897bded2dbSJung-uk Kim vncipher $out4,$out4,v25 9907bded2dbSJung-uk Kim vncipher $out5,$out5,v25 9917bded2dbSJung-uk Kim vncipher $out6,$out6,v25 9927bded2dbSJung-uk Kim vncipher $out7,$out7,v25 9937bded2dbSJung-uk Kim lvx v25,$x10,$key_ # round[4] 9947bded2dbSJung-uk Kim bdnz Loop_cbc_dec8x_tail 9957bded2dbSJung-uk Kim 9967bded2dbSJung-uk Kim vncipher $out1,$out1,v24 9977bded2dbSJung-uk Kim vncipher $out2,$out2,v24 9987bded2dbSJung-uk Kim vncipher $out3,$out3,v24 9997bded2dbSJung-uk Kim vncipher $out4,$out4,v24 10007bded2dbSJung-uk Kim vncipher $out5,$out5,v24 10017bded2dbSJung-uk Kim vncipher $out6,$out6,v24 10027bded2dbSJung-uk Kim vncipher $out7,$out7,v24 10037bded2dbSJung-uk Kim 10047bded2dbSJung-uk Kim vncipher $out1,$out1,v25 10057bded2dbSJung-uk Kim vncipher $out2,$out2,v25 10067bded2dbSJung-uk Kim vncipher $out3,$out3,v25 10077bded2dbSJung-uk Kim vncipher $out4,$out4,v25 10087bded2dbSJung-uk Kim vncipher $out5,$out5,v25 10097bded2dbSJung-uk Kim vncipher $out6,$out6,v25 10107bded2dbSJung-uk Kim vncipher $out7,$out7,v25 10117bded2dbSJung-uk Kim 10127bded2dbSJung-uk Kim vncipher $out1,$out1,v26 10137bded2dbSJung-uk Kim vncipher $out2,$out2,v26 10147bded2dbSJung-uk Kim vncipher $out3,$out3,v26 10157bded2dbSJung-uk Kim vncipher $out4,$out4,v26 10167bded2dbSJung-uk Kim vncipher $out5,$out5,v26 10177bded2dbSJung-uk Kim vncipher $out6,$out6,v26 10187bded2dbSJung-uk Kim vncipher $out7,$out7,v26 10197bded2dbSJung-uk Kim 10207bded2dbSJung-uk Kim vncipher $out1,$out1,v27 10217bded2dbSJung-uk Kim vncipher $out2,$out2,v27 10227bded2dbSJung-uk Kim vncipher $out3,$out3,v27 10237bded2dbSJung-uk Kim vncipher $out4,$out4,v27 10247bded2dbSJung-uk Kim vncipher $out5,$out5,v27 10257bded2dbSJung-uk Kim vncipher $out6,$out6,v27 10267bded2dbSJung-uk Kim vncipher $out7,$out7,v27 10277bded2dbSJung-uk Kim 10287bded2dbSJung-uk Kim vncipher $out1,$out1,v28 10297bded2dbSJung-uk Kim vncipher $out2,$out2,v28 10307bded2dbSJung-uk Kim vncipher $out3,$out3,v28 10317bded2dbSJung-uk Kim vncipher $out4,$out4,v28 10327bded2dbSJung-uk Kim vncipher $out5,$out5,v28 10337bded2dbSJung-uk Kim vncipher $out6,$out6,v28 10347bded2dbSJung-uk Kim vncipher $out7,$out7,v28 10357bded2dbSJung-uk Kim 10367bded2dbSJung-uk Kim vncipher $out1,$out1,v29 10377bded2dbSJung-uk Kim vncipher $out2,$out2,v29 10387bded2dbSJung-uk Kim vncipher $out3,$out3,v29 10397bded2dbSJung-uk Kim vncipher $out4,$out4,v29 10407bded2dbSJung-uk Kim vncipher $out5,$out5,v29 10417bded2dbSJung-uk Kim vncipher $out6,$out6,v29 10427bded2dbSJung-uk Kim vncipher $out7,$out7,v29 10437bded2dbSJung-uk Kim 10447bded2dbSJung-uk Kim vncipher $out1,$out1,v30 10457bded2dbSJung-uk Kim vxor $ivec,$ivec,v31 # last round key 10467bded2dbSJung-uk Kim vncipher $out2,$out2,v30 10477bded2dbSJung-uk Kim vxor $in1,$in1,v31 10487bded2dbSJung-uk Kim vncipher $out3,$out3,v30 10497bded2dbSJung-uk Kim vxor $in2,$in2,v31 10507bded2dbSJung-uk Kim vncipher $out4,$out4,v30 10517bded2dbSJung-uk Kim vxor $in3,$in3,v31 10527bded2dbSJung-uk Kim vncipher $out5,$out5,v30 10537bded2dbSJung-uk Kim vxor $in4,$in4,v31 10547bded2dbSJung-uk Kim vncipher $out6,$out6,v30 10557bded2dbSJung-uk Kim vxor $in5,$in5,v31 10567bded2dbSJung-uk Kim vncipher $out7,$out7,v30 10577bded2dbSJung-uk Kim vxor $in6,$in6,v31 10587bded2dbSJung-uk Kim 10597bded2dbSJung-uk Kim cmplwi $len,32 # switch($len) 10607bded2dbSJung-uk Kim blt Lcbc_dec8x_one 10617bded2dbSJung-uk Kim nop 10627bded2dbSJung-uk Kim beq Lcbc_dec8x_two 10637bded2dbSJung-uk Kim cmplwi $len,64 10647bded2dbSJung-uk Kim blt Lcbc_dec8x_three 10657bded2dbSJung-uk Kim nop 10667bded2dbSJung-uk Kim beq Lcbc_dec8x_four 10677bded2dbSJung-uk Kim cmplwi $len,96 10687bded2dbSJung-uk Kim blt Lcbc_dec8x_five 10697bded2dbSJung-uk Kim nop 10707bded2dbSJung-uk Kim beq Lcbc_dec8x_six 10717bded2dbSJung-uk Kim 10727bded2dbSJung-uk KimLcbc_dec8x_seven: 10737bded2dbSJung-uk Kim vncipherlast $out1,$out1,$ivec 10747bded2dbSJung-uk Kim vncipherlast $out2,$out2,$in1 10757bded2dbSJung-uk Kim vncipherlast $out3,$out3,$in2 10767bded2dbSJung-uk Kim vncipherlast $out4,$out4,$in3 10777bded2dbSJung-uk Kim vncipherlast $out5,$out5,$in4 10787bded2dbSJung-uk Kim vncipherlast $out6,$out6,$in5 10797bded2dbSJung-uk Kim vncipherlast $out7,$out7,$in6 10807bded2dbSJung-uk Kim vmr $ivec,$in7 10817bded2dbSJung-uk Kim 10827bded2dbSJung-uk Kim le?vperm $out1,$out1,$out1,$inpperm 10837bded2dbSJung-uk Kim le?vperm $out2,$out2,$out2,$inpperm 10847bded2dbSJung-uk Kim stvx_u $out1,$x00,$out 10857bded2dbSJung-uk Kim le?vperm $out3,$out3,$out3,$inpperm 10867bded2dbSJung-uk Kim stvx_u $out2,$x10,$out 10877bded2dbSJung-uk Kim le?vperm $out4,$out4,$out4,$inpperm 10887bded2dbSJung-uk Kim stvx_u $out3,$x20,$out 10897bded2dbSJung-uk Kim le?vperm $out5,$out5,$out5,$inpperm 10907bded2dbSJung-uk Kim stvx_u $out4,$x30,$out 10917bded2dbSJung-uk Kim le?vperm $out6,$out6,$out6,$inpperm 10927bded2dbSJung-uk Kim stvx_u $out5,$x40,$out 10937bded2dbSJung-uk Kim le?vperm $out7,$out7,$out7,$inpperm 10947bded2dbSJung-uk Kim stvx_u $out6,$x50,$out 10957bded2dbSJung-uk Kim stvx_u $out7,$x60,$out 10967bded2dbSJung-uk Kim addi $out,$out,0x70 10977bded2dbSJung-uk Kim b Lcbc_dec8x_done 10987bded2dbSJung-uk Kim 10997bded2dbSJung-uk Kim.align 5 11007bded2dbSJung-uk KimLcbc_dec8x_six: 11017bded2dbSJung-uk Kim vncipherlast $out2,$out2,$ivec 11027bded2dbSJung-uk Kim vncipherlast $out3,$out3,$in2 11037bded2dbSJung-uk Kim vncipherlast $out4,$out4,$in3 11047bded2dbSJung-uk Kim vncipherlast $out5,$out5,$in4 11057bded2dbSJung-uk Kim vncipherlast $out6,$out6,$in5 11067bded2dbSJung-uk Kim vncipherlast $out7,$out7,$in6 11077bded2dbSJung-uk Kim vmr $ivec,$in7 11087bded2dbSJung-uk Kim 11097bded2dbSJung-uk Kim le?vperm $out2,$out2,$out2,$inpperm 11107bded2dbSJung-uk Kim le?vperm $out3,$out3,$out3,$inpperm 11117bded2dbSJung-uk Kim stvx_u $out2,$x00,$out 11127bded2dbSJung-uk Kim le?vperm $out4,$out4,$out4,$inpperm 11137bded2dbSJung-uk Kim stvx_u $out3,$x10,$out 11147bded2dbSJung-uk Kim le?vperm $out5,$out5,$out5,$inpperm 11157bded2dbSJung-uk Kim stvx_u $out4,$x20,$out 11167bded2dbSJung-uk Kim le?vperm $out6,$out6,$out6,$inpperm 11177bded2dbSJung-uk Kim stvx_u $out5,$x30,$out 11187bded2dbSJung-uk Kim le?vperm $out7,$out7,$out7,$inpperm 11197bded2dbSJung-uk Kim stvx_u $out6,$x40,$out 11207bded2dbSJung-uk Kim stvx_u $out7,$x50,$out 11217bded2dbSJung-uk Kim addi $out,$out,0x60 11227bded2dbSJung-uk Kim b Lcbc_dec8x_done 11237bded2dbSJung-uk Kim 11247bded2dbSJung-uk Kim.align 5 11257bded2dbSJung-uk KimLcbc_dec8x_five: 11267bded2dbSJung-uk Kim vncipherlast $out3,$out3,$ivec 11277bded2dbSJung-uk Kim vncipherlast $out4,$out4,$in3 11287bded2dbSJung-uk Kim vncipherlast $out5,$out5,$in4 11297bded2dbSJung-uk Kim vncipherlast $out6,$out6,$in5 11307bded2dbSJung-uk Kim vncipherlast $out7,$out7,$in6 11317bded2dbSJung-uk Kim vmr $ivec,$in7 11327bded2dbSJung-uk Kim 11337bded2dbSJung-uk Kim le?vperm $out3,$out3,$out3,$inpperm 11347bded2dbSJung-uk Kim le?vperm $out4,$out4,$out4,$inpperm 11357bded2dbSJung-uk Kim stvx_u $out3,$x00,$out 11367bded2dbSJung-uk Kim le?vperm $out5,$out5,$out5,$inpperm 11377bded2dbSJung-uk Kim stvx_u $out4,$x10,$out 11387bded2dbSJung-uk Kim le?vperm $out6,$out6,$out6,$inpperm 11397bded2dbSJung-uk Kim stvx_u $out5,$x20,$out 11407bded2dbSJung-uk Kim le?vperm $out7,$out7,$out7,$inpperm 11417bded2dbSJung-uk Kim stvx_u $out6,$x30,$out 11427bded2dbSJung-uk Kim stvx_u $out7,$x40,$out 11437bded2dbSJung-uk Kim addi $out,$out,0x50 11447bded2dbSJung-uk Kim b Lcbc_dec8x_done 11457bded2dbSJung-uk Kim 11467bded2dbSJung-uk Kim.align 5 11477bded2dbSJung-uk KimLcbc_dec8x_four: 11487bded2dbSJung-uk Kim vncipherlast $out4,$out4,$ivec 11497bded2dbSJung-uk Kim vncipherlast $out5,$out5,$in4 11507bded2dbSJung-uk Kim vncipherlast $out6,$out6,$in5 11517bded2dbSJung-uk Kim vncipherlast $out7,$out7,$in6 11527bded2dbSJung-uk Kim vmr $ivec,$in7 11537bded2dbSJung-uk Kim 11547bded2dbSJung-uk Kim le?vperm $out4,$out4,$out4,$inpperm 11557bded2dbSJung-uk Kim le?vperm $out5,$out5,$out5,$inpperm 11567bded2dbSJung-uk Kim stvx_u $out4,$x00,$out 11577bded2dbSJung-uk Kim le?vperm $out6,$out6,$out6,$inpperm 11587bded2dbSJung-uk Kim stvx_u $out5,$x10,$out 11597bded2dbSJung-uk Kim le?vperm $out7,$out7,$out7,$inpperm 11607bded2dbSJung-uk Kim stvx_u $out6,$x20,$out 11617bded2dbSJung-uk Kim stvx_u $out7,$x30,$out 11627bded2dbSJung-uk Kim addi $out,$out,0x40 11637bded2dbSJung-uk Kim b Lcbc_dec8x_done 11647bded2dbSJung-uk Kim 11657bded2dbSJung-uk Kim.align 5 11667bded2dbSJung-uk KimLcbc_dec8x_three: 11677bded2dbSJung-uk Kim vncipherlast $out5,$out5,$ivec 11687bded2dbSJung-uk Kim vncipherlast $out6,$out6,$in5 11697bded2dbSJung-uk Kim vncipherlast $out7,$out7,$in6 11707bded2dbSJung-uk Kim vmr $ivec,$in7 11717bded2dbSJung-uk Kim 11727bded2dbSJung-uk Kim le?vperm $out5,$out5,$out5,$inpperm 11737bded2dbSJung-uk Kim le?vperm $out6,$out6,$out6,$inpperm 11747bded2dbSJung-uk Kim stvx_u $out5,$x00,$out 11757bded2dbSJung-uk Kim le?vperm $out7,$out7,$out7,$inpperm 11767bded2dbSJung-uk Kim stvx_u $out6,$x10,$out 11777bded2dbSJung-uk Kim stvx_u $out7,$x20,$out 11787bded2dbSJung-uk Kim addi $out,$out,0x30 11797bded2dbSJung-uk Kim b Lcbc_dec8x_done 11807bded2dbSJung-uk Kim 11817bded2dbSJung-uk Kim.align 5 11827bded2dbSJung-uk KimLcbc_dec8x_two: 11837bded2dbSJung-uk Kim vncipherlast $out6,$out6,$ivec 11847bded2dbSJung-uk Kim vncipherlast $out7,$out7,$in6 11857bded2dbSJung-uk Kim vmr $ivec,$in7 11867bded2dbSJung-uk Kim 11877bded2dbSJung-uk Kim le?vperm $out6,$out6,$out6,$inpperm 11887bded2dbSJung-uk Kim le?vperm $out7,$out7,$out7,$inpperm 11897bded2dbSJung-uk Kim stvx_u $out6,$x00,$out 11907bded2dbSJung-uk Kim stvx_u $out7,$x10,$out 11917bded2dbSJung-uk Kim addi $out,$out,0x20 11927bded2dbSJung-uk Kim b Lcbc_dec8x_done 11937bded2dbSJung-uk Kim 11947bded2dbSJung-uk Kim.align 5 11957bded2dbSJung-uk KimLcbc_dec8x_one: 11967bded2dbSJung-uk Kim vncipherlast $out7,$out7,$ivec 11977bded2dbSJung-uk Kim vmr $ivec,$in7 11987bded2dbSJung-uk Kim 11997bded2dbSJung-uk Kim le?vperm $out7,$out7,$out7,$inpperm 12007bded2dbSJung-uk Kim stvx_u $out7,0,$out 12017bded2dbSJung-uk Kim addi $out,$out,0x10 12027bded2dbSJung-uk Kim 12037bded2dbSJung-uk KimLcbc_dec8x_done: 12047bded2dbSJung-uk Kim le?vperm $ivec,$ivec,$ivec,$inpperm 12057bded2dbSJung-uk Kim stvx_u $ivec,0,$ivp # write [unaligned] iv 12067bded2dbSJung-uk Kim 12077bded2dbSJung-uk Kim li r10,`$FRAME+15` 12087bded2dbSJung-uk Kim li r11,`$FRAME+31` 12097bded2dbSJung-uk Kim stvx $inpperm,r10,$sp # wipe copies of round keys 12107bded2dbSJung-uk Kim addi r10,r10,32 12117bded2dbSJung-uk Kim stvx $inpperm,r11,$sp 12127bded2dbSJung-uk Kim addi r11,r11,32 12137bded2dbSJung-uk Kim stvx $inpperm,r10,$sp 12147bded2dbSJung-uk Kim addi r10,r10,32 12157bded2dbSJung-uk Kim stvx $inpperm,r11,$sp 12167bded2dbSJung-uk Kim addi r11,r11,32 12177bded2dbSJung-uk Kim stvx $inpperm,r10,$sp 12187bded2dbSJung-uk Kim addi r10,r10,32 12197bded2dbSJung-uk Kim stvx $inpperm,r11,$sp 12207bded2dbSJung-uk Kim addi r11,r11,32 12217bded2dbSJung-uk Kim stvx $inpperm,r10,$sp 12227bded2dbSJung-uk Kim addi r10,r10,32 12237bded2dbSJung-uk Kim stvx $inpperm,r11,$sp 12247bded2dbSJung-uk Kim addi r11,r11,32 12257bded2dbSJung-uk Kim 12267bded2dbSJung-uk Kim mtspr 256,$vrsave 12277bded2dbSJung-uk Kim lvx v20,r10,$sp # ABI says so 12287bded2dbSJung-uk Kim addi r10,r10,32 12297bded2dbSJung-uk Kim lvx v21,r11,$sp 12307bded2dbSJung-uk Kim addi r11,r11,32 12317bded2dbSJung-uk Kim lvx v22,r10,$sp 12327bded2dbSJung-uk Kim addi r10,r10,32 12337bded2dbSJung-uk Kim lvx v23,r11,$sp 12347bded2dbSJung-uk Kim addi r11,r11,32 12357bded2dbSJung-uk Kim lvx v24,r10,$sp 12367bded2dbSJung-uk Kim addi r10,r10,32 12377bded2dbSJung-uk Kim lvx v25,r11,$sp 12387bded2dbSJung-uk Kim addi r11,r11,32 12397bded2dbSJung-uk Kim lvx v26,r10,$sp 12407bded2dbSJung-uk Kim addi r10,r10,32 12417bded2dbSJung-uk Kim lvx v27,r11,$sp 12427bded2dbSJung-uk Kim addi r11,r11,32 12437bded2dbSJung-uk Kim lvx v28,r10,$sp 12447bded2dbSJung-uk Kim addi r10,r10,32 12457bded2dbSJung-uk Kim lvx v29,r11,$sp 12467bded2dbSJung-uk Kim addi r11,r11,32 12477bded2dbSJung-uk Kim lvx v30,r10,$sp 12487bded2dbSJung-uk Kim lvx v31,r11,$sp 12497bded2dbSJung-uk Kim $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 12507bded2dbSJung-uk Kim $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 12517bded2dbSJung-uk Kim $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 12527bded2dbSJung-uk Kim $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 12537bded2dbSJung-uk Kim $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 12547bded2dbSJung-uk Kim $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 12557bded2dbSJung-uk Kim addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 12567bded2dbSJung-uk Kim blr 12577bded2dbSJung-uk Kim .long 0 12587bded2dbSJung-uk Kim .byte 0,12,0x04,0,0x80,6,6,0 12597bded2dbSJung-uk Kim .long 0 12607bded2dbSJung-uk Kim.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt 12617bded2dbSJung-uk Kim___ 12627bded2dbSJung-uk Kim}} }}} 12637bded2dbSJung-uk Kim 12647bded2dbSJung-uk Kim######################################################################### 12657bded2dbSJung-uk Kim{{{ # CTR procedure[s] # 12667bded2dbSJung-uk Kimmy ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); 12677bded2dbSJung-uk Kimmy ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); 12687bded2dbSJung-uk Kimmy ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= 12697bded2dbSJung-uk Kim map("v$_",(4..11)); 12707bded2dbSJung-uk Kimmy $dat=$tmp; 12717bded2dbSJung-uk Kim 12727bded2dbSJung-uk Kim$code.=<<___; 12737bded2dbSJung-uk Kim.globl .${prefix}_ctr32_encrypt_blocks 12747bded2dbSJung-uk Kim.align 5 12757bded2dbSJung-uk Kim.${prefix}_ctr32_encrypt_blocks: 12767bded2dbSJung-uk Kim ${UCMP}i $len,1 12777bded2dbSJung-uk Kim bltlr- 12787bded2dbSJung-uk Kim 12797bded2dbSJung-uk Kim lis r0,0xfff0 12807bded2dbSJung-uk Kim mfspr $vrsave,256 12817bded2dbSJung-uk Kim mtspr 256,r0 12827bded2dbSJung-uk Kim 12837bded2dbSJung-uk Kim li $idx,15 12847bded2dbSJung-uk Kim vxor $rndkey0,$rndkey0,$rndkey0 12857bded2dbSJung-uk Kim le?vspltisb $tmp,0x0f 12867bded2dbSJung-uk Kim 12877bded2dbSJung-uk Kim lvx $ivec,0,$ivp # load [unaligned] iv 12887bded2dbSJung-uk Kim lvsl $inpperm,0,$ivp 12897bded2dbSJung-uk Kim lvx $inptail,$idx,$ivp 12907bded2dbSJung-uk Kim vspltisb $one,1 12917bded2dbSJung-uk Kim le?vxor $inpperm,$inpperm,$tmp 12927bded2dbSJung-uk Kim vperm $ivec,$ivec,$inptail,$inpperm 12937bded2dbSJung-uk Kim vsldoi $one,$rndkey0,$one,1 12947bded2dbSJung-uk Kim 12957bded2dbSJung-uk Kim neg r11,$inp 12967bded2dbSJung-uk Kim ?lvsl $keyperm,0,$key # prepare for unaligned key 12977bded2dbSJung-uk Kim lwz $rounds,240($key) 12987bded2dbSJung-uk Kim 12997bded2dbSJung-uk Kim lvsr $inpperm,0,r11 # prepare for unaligned load 13007bded2dbSJung-uk Kim lvx $inptail,0,$inp 13017bded2dbSJung-uk Kim addi $inp,$inp,15 # 15 is not typo 13027bded2dbSJung-uk Kim le?vxor $inpperm,$inpperm,$tmp 13037bded2dbSJung-uk Kim 13047bded2dbSJung-uk Kim srwi $rounds,$rounds,1 13057bded2dbSJung-uk Kim li $idx,16 13067bded2dbSJung-uk Kim subi $rounds,$rounds,1 13077bded2dbSJung-uk Kim 13087bded2dbSJung-uk Kim ${UCMP}i $len,8 13097bded2dbSJung-uk Kim bge _aesp8_ctr32_encrypt8x 13107bded2dbSJung-uk Kim 13117bded2dbSJung-uk Kim ?lvsr $outperm,0,$out # prepare for unaligned store 13127bded2dbSJung-uk Kim vspltisb $outmask,-1 13137bded2dbSJung-uk Kim lvx $outhead,0,$out 13147bded2dbSJung-uk Kim ?vperm $outmask,$rndkey0,$outmask,$outperm 13157bded2dbSJung-uk Kim le?vxor $outperm,$outperm,$tmp 13167bded2dbSJung-uk Kim 13177bded2dbSJung-uk Kim lvx $rndkey0,0,$key 13187bded2dbSJung-uk Kim mtctr $rounds 13197bded2dbSJung-uk Kim lvx $rndkey1,$idx,$key 13207bded2dbSJung-uk Kim addi $idx,$idx,16 13217bded2dbSJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 13227bded2dbSJung-uk Kim vxor $inout,$ivec,$rndkey0 13237bded2dbSJung-uk Kim lvx $rndkey0,$idx,$key 13247bded2dbSJung-uk Kim addi $idx,$idx,16 13257bded2dbSJung-uk Kim b Loop_ctr32_enc 13267bded2dbSJung-uk Kim 13277bded2dbSJung-uk Kim.align 5 13287bded2dbSJung-uk KimLoop_ctr32_enc: 13297bded2dbSJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 13307bded2dbSJung-uk Kim vcipher $inout,$inout,$rndkey1 13317bded2dbSJung-uk Kim lvx $rndkey1,$idx,$key 13327bded2dbSJung-uk Kim addi $idx,$idx,16 13337bded2dbSJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 13347bded2dbSJung-uk Kim vcipher $inout,$inout,$rndkey0 13357bded2dbSJung-uk Kim lvx $rndkey0,$idx,$key 13367bded2dbSJung-uk Kim addi $idx,$idx,16 13377bded2dbSJung-uk Kim bdnz Loop_ctr32_enc 13387bded2dbSJung-uk Kim 13397bded2dbSJung-uk Kim vadduwm $ivec,$ivec,$one 13407bded2dbSJung-uk Kim vmr $dat,$inptail 13417bded2dbSJung-uk Kim lvx $inptail,0,$inp 13427bded2dbSJung-uk Kim addi $inp,$inp,16 13437bded2dbSJung-uk Kim subic. $len,$len,1 # blocks-- 13447bded2dbSJung-uk Kim 13457bded2dbSJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 13467bded2dbSJung-uk Kim vcipher $inout,$inout,$rndkey1 13477bded2dbSJung-uk Kim lvx $rndkey1,$idx,$key 13487bded2dbSJung-uk Kim vperm $dat,$dat,$inptail,$inpperm 13497bded2dbSJung-uk Kim li $idx,16 13507bded2dbSJung-uk Kim ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm 13517bded2dbSJung-uk Kim lvx $rndkey0,0,$key 13527bded2dbSJung-uk Kim vxor $dat,$dat,$rndkey1 # last round key 13537bded2dbSJung-uk Kim vcipherlast $inout,$inout,$dat 13547bded2dbSJung-uk Kim 13557bded2dbSJung-uk Kim lvx $rndkey1,$idx,$key 13567bded2dbSJung-uk Kim addi $idx,$idx,16 13577bded2dbSJung-uk Kim vperm $inout,$inout,$inout,$outperm 13587bded2dbSJung-uk Kim vsel $dat,$outhead,$inout,$outmask 13597bded2dbSJung-uk Kim mtctr $rounds 13607bded2dbSJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 13617bded2dbSJung-uk Kim vmr $outhead,$inout 13627bded2dbSJung-uk Kim vxor $inout,$ivec,$rndkey0 13637bded2dbSJung-uk Kim lvx $rndkey0,$idx,$key 13647bded2dbSJung-uk Kim addi $idx,$idx,16 13657bded2dbSJung-uk Kim stvx $dat,0,$out 13667bded2dbSJung-uk Kim addi $out,$out,16 13677bded2dbSJung-uk Kim bne Loop_ctr32_enc 13687bded2dbSJung-uk Kim 13697bded2dbSJung-uk Kim addi $out,$out,-1 13707bded2dbSJung-uk Kim lvx $inout,0,$out # redundant in aligned case 13717bded2dbSJung-uk Kim vsel $inout,$outhead,$inout,$outmask 13727bded2dbSJung-uk Kim stvx $inout,0,$out 13737bded2dbSJung-uk Kim 13747bded2dbSJung-uk Kim mtspr 256,$vrsave 13757bded2dbSJung-uk Kim blr 13767bded2dbSJung-uk Kim .long 0 13777bded2dbSJung-uk Kim .byte 0,12,0x14,0,0,0,6,0 13787bded2dbSJung-uk Kim .long 0 13797bded2dbSJung-uk Kim___ 13807bded2dbSJung-uk Kim######################################################################### 13817bded2dbSJung-uk Kim{{ # Optimized CTR procedure # 13827bded2dbSJung-uk Kimmy $key_="r11"; 13837bded2dbSJung-uk Kimmy ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); 13847bded2dbSJung-uk Kim $x00=0 if ($flavour =~ /osx/); 13857bded2dbSJung-uk Kimmy ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); 13867bded2dbSJung-uk Kimmy ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); 13877bded2dbSJung-uk Kimmy $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 13887bded2dbSJung-uk Kim # v26-v31 last 6 round keys 13897bded2dbSJung-uk Kimmy ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment 13907bded2dbSJung-uk Kimmy ($two,$three,$four)=($outhead,$outperm,$outmask); 13917bded2dbSJung-uk Kim 13927bded2dbSJung-uk Kim$code.=<<___; 13937bded2dbSJung-uk Kim.align 5 13947bded2dbSJung-uk Kim_aesp8_ctr32_encrypt8x: 13957bded2dbSJung-uk Kim $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 13967bded2dbSJung-uk Kim li r10,`$FRAME+8*16+15` 13977bded2dbSJung-uk Kim li r11,`$FRAME+8*16+31` 13987bded2dbSJung-uk Kim stvx v20,r10,$sp # ABI says so 13997bded2dbSJung-uk Kim addi r10,r10,32 14007bded2dbSJung-uk Kim stvx v21,r11,$sp 14017bded2dbSJung-uk Kim addi r11,r11,32 14027bded2dbSJung-uk Kim stvx v22,r10,$sp 14037bded2dbSJung-uk Kim addi r10,r10,32 14047bded2dbSJung-uk Kim stvx v23,r11,$sp 14057bded2dbSJung-uk Kim addi r11,r11,32 14067bded2dbSJung-uk Kim stvx v24,r10,$sp 14077bded2dbSJung-uk Kim addi r10,r10,32 14087bded2dbSJung-uk Kim stvx v25,r11,$sp 14097bded2dbSJung-uk Kim addi r11,r11,32 14107bded2dbSJung-uk Kim stvx v26,r10,$sp 14117bded2dbSJung-uk Kim addi r10,r10,32 14127bded2dbSJung-uk Kim stvx v27,r11,$sp 14137bded2dbSJung-uk Kim addi r11,r11,32 14147bded2dbSJung-uk Kim stvx v28,r10,$sp 14157bded2dbSJung-uk Kim addi r10,r10,32 14167bded2dbSJung-uk Kim stvx v29,r11,$sp 14177bded2dbSJung-uk Kim addi r11,r11,32 14187bded2dbSJung-uk Kim stvx v30,r10,$sp 14197bded2dbSJung-uk Kim stvx v31,r11,$sp 14207bded2dbSJung-uk Kim li r0,-1 14217bded2dbSJung-uk Kim stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 14227bded2dbSJung-uk Kim li $x10,0x10 14237bded2dbSJung-uk Kim $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 14247bded2dbSJung-uk Kim li $x20,0x20 14257bded2dbSJung-uk Kim $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 14267bded2dbSJung-uk Kim li $x30,0x30 14277bded2dbSJung-uk Kim $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 14287bded2dbSJung-uk Kim li $x40,0x40 14297bded2dbSJung-uk Kim $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 14307bded2dbSJung-uk Kim li $x50,0x50 14317bded2dbSJung-uk Kim $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 14327bded2dbSJung-uk Kim li $x60,0x60 14337bded2dbSJung-uk Kim $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 14347bded2dbSJung-uk Kim li $x70,0x70 14357bded2dbSJung-uk Kim mtspr 256,r0 14367bded2dbSJung-uk Kim 14377bded2dbSJung-uk Kim subi $rounds,$rounds,3 # -4 in total 14387bded2dbSJung-uk Kim 14397bded2dbSJung-uk Kim lvx $rndkey0,$x00,$key # load key schedule 14407bded2dbSJung-uk Kim lvx v30,$x10,$key 14417bded2dbSJung-uk Kim addi $key,$key,0x20 14427bded2dbSJung-uk Kim lvx v31,$x00,$key 14437bded2dbSJung-uk Kim ?vperm $rndkey0,$rndkey0,v30,$keyperm 14447bded2dbSJung-uk Kim addi $key_,$sp,$FRAME+15 14457bded2dbSJung-uk Kim mtctr $rounds 14467bded2dbSJung-uk Kim 14477bded2dbSJung-uk KimLoad_ctr32_enc_key: 14487bded2dbSJung-uk Kim ?vperm v24,v30,v31,$keyperm 14497bded2dbSJung-uk Kim lvx v30,$x10,$key 14507bded2dbSJung-uk Kim addi $key,$key,0x20 14517bded2dbSJung-uk Kim stvx v24,$x00,$key_ # off-load round[1] 14527bded2dbSJung-uk Kim ?vperm v25,v31,v30,$keyperm 14537bded2dbSJung-uk Kim lvx v31,$x00,$key 14547bded2dbSJung-uk Kim stvx v25,$x10,$key_ # off-load round[2] 14557bded2dbSJung-uk Kim addi $key_,$key_,0x20 14567bded2dbSJung-uk Kim bdnz Load_ctr32_enc_key 14577bded2dbSJung-uk Kim 14587bded2dbSJung-uk Kim lvx v26,$x10,$key 14597bded2dbSJung-uk Kim ?vperm v24,v30,v31,$keyperm 14607bded2dbSJung-uk Kim lvx v27,$x20,$key 14617bded2dbSJung-uk Kim stvx v24,$x00,$key_ # off-load round[3] 14627bded2dbSJung-uk Kim ?vperm v25,v31,v26,$keyperm 14637bded2dbSJung-uk Kim lvx v28,$x30,$key 14647bded2dbSJung-uk Kim stvx v25,$x10,$key_ # off-load round[4] 14657bded2dbSJung-uk Kim addi $key_,$sp,$FRAME+15 # rewind $key_ 14667bded2dbSJung-uk Kim ?vperm v26,v26,v27,$keyperm 14677bded2dbSJung-uk Kim lvx v29,$x40,$key 14687bded2dbSJung-uk Kim ?vperm v27,v27,v28,$keyperm 14697bded2dbSJung-uk Kim lvx v30,$x50,$key 14707bded2dbSJung-uk Kim ?vperm v28,v28,v29,$keyperm 14717bded2dbSJung-uk Kim lvx v31,$x60,$key 14727bded2dbSJung-uk Kim ?vperm v29,v29,v30,$keyperm 14737bded2dbSJung-uk Kim lvx $out0,$x70,$key # borrow $out0 14747bded2dbSJung-uk Kim ?vperm v30,v30,v31,$keyperm 14757bded2dbSJung-uk Kim lvx v24,$x00,$key_ # pre-load round[1] 14767bded2dbSJung-uk Kim ?vperm v31,v31,$out0,$keyperm 14777bded2dbSJung-uk Kim lvx v25,$x10,$key_ # pre-load round[2] 14787bded2dbSJung-uk Kim 14797bded2dbSJung-uk Kim vadduwm $two,$one,$one 14807bded2dbSJung-uk Kim subi $inp,$inp,15 # undo "caller" 14817bded2dbSJung-uk Kim $SHL $len,$len,4 14827bded2dbSJung-uk Kim 14837bded2dbSJung-uk Kim vadduwm $out1,$ivec,$one # counter values ... 14847bded2dbSJung-uk Kim vadduwm $out2,$ivec,$two 14857bded2dbSJung-uk Kim vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 14867bded2dbSJung-uk Kim le?li $idx,8 14877bded2dbSJung-uk Kim vadduwm $out3,$out1,$two 14887bded2dbSJung-uk Kim vxor $out1,$out1,$rndkey0 14897bded2dbSJung-uk Kim le?lvsl $inpperm,0,$idx 14907bded2dbSJung-uk Kim vadduwm $out4,$out2,$two 14917bded2dbSJung-uk Kim vxor $out2,$out2,$rndkey0 14927bded2dbSJung-uk Kim le?vspltisb $tmp,0x0f 14937bded2dbSJung-uk Kim vadduwm $out5,$out3,$two 14947bded2dbSJung-uk Kim vxor $out3,$out3,$rndkey0 14957bded2dbSJung-uk Kim le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u 14967bded2dbSJung-uk Kim vadduwm $out6,$out4,$two 14977bded2dbSJung-uk Kim vxor $out4,$out4,$rndkey0 14987bded2dbSJung-uk Kim vadduwm $out7,$out5,$two 14997bded2dbSJung-uk Kim vxor $out5,$out5,$rndkey0 15007bded2dbSJung-uk Kim vadduwm $ivec,$out6,$two # next counter value 15017bded2dbSJung-uk Kim vxor $out6,$out6,$rndkey0 15027bded2dbSJung-uk Kim vxor $out7,$out7,$rndkey0 15037bded2dbSJung-uk Kim 15047bded2dbSJung-uk Kim mtctr $rounds 15057bded2dbSJung-uk Kim b Loop_ctr32_enc8x 15067bded2dbSJung-uk Kim.align 5 15077bded2dbSJung-uk KimLoop_ctr32_enc8x: 15087bded2dbSJung-uk Kim vcipher $out0,$out0,v24 15097bded2dbSJung-uk Kim vcipher $out1,$out1,v24 15107bded2dbSJung-uk Kim vcipher $out2,$out2,v24 15117bded2dbSJung-uk Kim vcipher $out3,$out3,v24 15127bded2dbSJung-uk Kim vcipher $out4,$out4,v24 15137bded2dbSJung-uk Kim vcipher $out5,$out5,v24 15147bded2dbSJung-uk Kim vcipher $out6,$out6,v24 15157bded2dbSJung-uk Kim vcipher $out7,$out7,v24 15167bded2dbSJung-uk KimLoop_ctr32_enc8x_middle: 15177bded2dbSJung-uk Kim lvx v24,$x20,$key_ # round[3] 15187bded2dbSJung-uk Kim addi $key_,$key_,0x20 15197bded2dbSJung-uk Kim 15207bded2dbSJung-uk Kim vcipher $out0,$out0,v25 15217bded2dbSJung-uk Kim vcipher $out1,$out1,v25 15227bded2dbSJung-uk Kim vcipher $out2,$out2,v25 15237bded2dbSJung-uk Kim vcipher $out3,$out3,v25 15247bded2dbSJung-uk Kim vcipher $out4,$out4,v25 15257bded2dbSJung-uk Kim vcipher $out5,$out5,v25 15267bded2dbSJung-uk Kim vcipher $out6,$out6,v25 15277bded2dbSJung-uk Kim vcipher $out7,$out7,v25 15287bded2dbSJung-uk Kim lvx v25,$x10,$key_ # round[4] 15297bded2dbSJung-uk Kim bdnz Loop_ctr32_enc8x 15307bded2dbSJung-uk Kim 15317bded2dbSJung-uk Kim subic r11,$len,256 # $len-256, borrow $key_ 15327bded2dbSJung-uk Kim vcipher $out0,$out0,v24 15337bded2dbSJung-uk Kim vcipher $out1,$out1,v24 15347bded2dbSJung-uk Kim vcipher $out2,$out2,v24 15357bded2dbSJung-uk Kim vcipher $out3,$out3,v24 15367bded2dbSJung-uk Kim vcipher $out4,$out4,v24 15377bded2dbSJung-uk Kim vcipher $out5,$out5,v24 15387bded2dbSJung-uk Kim vcipher $out6,$out6,v24 15397bded2dbSJung-uk Kim vcipher $out7,$out7,v24 15407bded2dbSJung-uk Kim 15417bded2dbSJung-uk Kim subfe r0,r0,r0 # borrow?-1:0 15427bded2dbSJung-uk Kim vcipher $out0,$out0,v25 15437bded2dbSJung-uk Kim vcipher $out1,$out1,v25 15447bded2dbSJung-uk Kim vcipher $out2,$out2,v25 15457bded2dbSJung-uk Kim vcipher $out3,$out3,v25 15467bded2dbSJung-uk Kim vcipher $out4,$out4,v25 15477bded2dbSJung-uk Kim vcipher $out5,$out5,v25 15487bded2dbSJung-uk Kim vcipher $out6,$out6,v25 15497bded2dbSJung-uk Kim vcipher $out7,$out7,v25 15507bded2dbSJung-uk Kim 15517bded2dbSJung-uk Kim and r0,r0,r11 15527bded2dbSJung-uk Kim addi $key_,$sp,$FRAME+15 # rewind $key_ 15537bded2dbSJung-uk Kim vcipher $out0,$out0,v26 15547bded2dbSJung-uk Kim vcipher $out1,$out1,v26 15557bded2dbSJung-uk Kim vcipher $out2,$out2,v26 15567bded2dbSJung-uk Kim vcipher $out3,$out3,v26 15577bded2dbSJung-uk Kim vcipher $out4,$out4,v26 15587bded2dbSJung-uk Kim vcipher $out5,$out5,v26 15597bded2dbSJung-uk Kim vcipher $out6,$out6,v26 15607bded2dbSJung-uk Kim vcipher $out7,$out7,v26 15617bded2dbSJung-uk Kim lvx v24,$x00,$key_ # re-pre-load round[1] 15627bded2dbSJung-uk Kim 15637bded2dbSJung-uk Kim subic $len,$len,129 # $len-=129 15647bded2dbSJung-uk Kim vcipher $out0,$out0,v27 15657bded2dbSJung-uk Kim addi $len,$len,1 # $len-=128 really 15667bded2dbSJung-uk Kim vcipher $out1,$out1,v27 15677bded2dbSJung-uk Kim vcipher $out2,$out2,v27 15687bded2dbSJung-uk Kim vcipher $out3,$out3,v27 15697bded2dbSJung-uk Kim vcipher $out4,$out4,v27 15707bded2dbSJung-uk Kim vcipher $out5,$out5,v27 15717bded2dbSJung-uk Kim vcipher $out6,$out6,v27 15727bded2dbSJung-uk Kim vcipher $out7,$out7,v27 15737bded2dbSJung-uk Kim lvx v25,$x10,$key_ # re-pre-load round[2] 15747bded2dbSJung-uk Kim 15757bded2dbSJung-uk Kim vcipher $out0,$out0,v28 15767bded2dbSJung-uk Kim lvx_u $in0,$x00,$inp # load input 15777bded2dbSJung-uk Kim vcipher $out1,$out1,v28 15787bded2dbSJung-uk Kim lvx_u $in1,$x10,$inp 15797bded2dbSJung-uk Kim vcipher $out2,$out2,v28 15807bded2dbSJung-uk Kim lvx_u $in2,$x20,$inp 15817bded2dbSJung-uk Kim vcipher $out3,$out3,v28 15827bded2dbSJung-uk Kim lvx_u $in3,$x30,$inp 15837bded2dbSJung-uk Kim vcipher $out4,$out4,v28 15847bded2dbSJung-uk Kim lvx_u $in4,$x40,$inp 15857bded2dbSJung-uk Kim vcipher $out5,$out5,v28 15867bded2dbSJung-uk Kim lvx_u $in5,$x50,$inp 15877bded2dbSJung-uk Kim vcipher $out6,$out6,v28 15887bded2dbSJung-uk Kim lvx_u $in6,$x60,$inp 15897bded2dbSJung-uk Kim vcipher $out7,$out7,v28 15907bded2dbSJung-uk Kim lvx_u $in7,$x70,$inp 15917bded2dbSJung-uk Kim addi $inp,$inp,0x80 15927bded2dbSJung-uk Kim 15937bded2dbSJung-uk Kim vcipher $out0,$out0,v29 15947bded2dbSJung-uk Kim le?vperm $in0,$in0,$in0,$inpperm 15957bded2dbSJung-uk Kim vcipher $out1,$out1,v29 15967bded2dbSJung-uk Kim le?vperm $in1,$in1,$in1,$inpperm 15977bded2dbSJung-uk Kim vcipher $out2,$out2,v29 15987bded2dbSJung-uk Kim le?vperm $in2,$in2,$in2,$inpperm 15997bded2dbSJung-uk Kim vcipher $out3,$out3,v29 16007bded2dbSJung-uk Kim le?vperm $in3,$in3,$in3,$inpperm 16017bded2dbSJung-uk Kim vcipher $out4,$out4,v29 16027bded2dbSJung-uk Kim le?vperm $in4,$in4,$in4,$inpperm 16037bded2dbSJung-uk Kim vcipher $out5,$out5,v29 16047bded2dbSJung-uk Kim le?vperm $in5,$in5,$in5,$inpperm 16057bded2dbSJung-uk Kim vcipher $out6,$out6,v29 16067bded2dbSJung-uk Kim le?vperm $in6,$in6,$in6,$inpperm 16077bded2dbSJung-uk Kim vcipher $out7,$out7,v29 16087bded2dbSJung-uk Kim le?vperm $in7,$in7,$in7,$inpperm 16097bded2dbSJung-uk Kim 16107bded2dbSJung-uk Kim add $inp,$inp,r0 # $inp is adjusted in such 16117bded2dbSJung-uk Kim # way that at exit from the 16127bded2dbSJung-uk Kim # loop inX-in7 are loaded 16137bded2dbSJung-uk Kim # with last "words" 16147bded2dbSJung-uk Kim subfe. r0,r0,r0 # borrow?-1:0 16157bded2dbSJung-uk Kim vcipher $out0,$out0,v30 16167bded2dbSJung-uk Kim vxor $in0,$in0,v31 # xor with last round key 16177bded2dbSJung-uk Kim vcipher $out1,$out1,v30 16187bded2dbSJung-uk Kim vxor $in1,$in1,v31 16197bded2dbSJung-uk Kim vcipher $out2,$out2,v30 16207bded2dbSJung-uk Kim vxor $in2,$in2,v31 16217bded2dbSJung-uk Kim vcipher $out3,$out3,v30 16227bded2dbSJung-uk Kim vxor $in3,$in3,v31 16237bded2dbSJung-uk Kim vcipher $out4,$out4,v30 16247bded2dbSJung-uk Kim vxor $in4,$in4,v31 16257bded2dbSJung-uk Kim vcipher $out5,$out5,v30 16267bded2dbSJung-uk Kim vxor $in5,$in5,v31 16277bded2dbSJung-uk Kim vcipher $out6,$out6,v30 16287bded2dbSJung-uk Kim vxor $in6,$in6,v31 16297bded2dbSJung-uk Kim vcipher $out7,$out7,v30 16307bded2dbSJung-uk Kim vxor $in7,$in7,v31 16317bded2dbSJung-uk Kim 16327bded2dbSJung-uk Kim bne Lctr32_enc8x_break # did $len-129 borrow? 16337bded2dbSJung-uk Kim 16347bded2dbSJung-uk Kim vcipherlast $in0,$out0,$in0 16357bded2dbSJung-uk Kim vcipherlast $in1,$out1,$in1 16367bded2dbSJung-uk Kim vadduwm $out1,$ivec,$one # counter values ... 16377bded2dbSJung-uk Kim vcipherlast $in2,$out2,$in2 16387bded2dbSJung-uk Kim vadduwm $out2,$ivec,$two 16397bded2dbSJung-uk Kim vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] 16407bded2dbSJung-uk Kim vcipherlast $in3,$out3,$in3 16417bded2dbSJung-uk Kim vadduwm $out3,$out1,$two 16427bded2dbSJung-uk Kim vxor $out1,$out1,$rndkey0 16437bded2dbSJung-uk Kim vcipherlast $in4,$out4,$in4 16447bded2dbSJung-uk Kim vadduwm $out4,$out2,$two 16457bded2dbSJung-uk Kim vxor $out2,$out2,$rndkey0 16467bded2dbSJung-uk Kim vcipherlast $in5,$out5,$in5 16477bded2dbSJung-uk Kim vadduwm $out5,$out3,$two 16487bded2dbSJung-uk Kim vxor $out3,$out3,$rndkey0 16497bded2dbSJung-uk Kim vcipherlast $in6,$out6,$in6 16507bded2dbSJung-uk Kim vadduwm $out6,$out4,$two 16517bded2dbSJung-uk Kim vxor $out4,$out4,$rndkey0 16527bded2dbSJung-uk Kim vcipherlast $in7,$out7,$in7 16537bded2dbSJung-uk Kim vadduwm $out7,$out5,$two 16547bded2dbSJung-uk Kim vxor $out5,$out5,$rndkey0 16557bded2dbSJung-uk Kim le?vperm $in0,$in0,$in0,$inpperm 16567bded2dbSJung-uk Kim vadduwm $ivec,$out6,$two # next counter value 16577bded2dbSJung-uk Kim vxor $out6,$out6,$rndkey0 16587bded2dbSJung-uk Kim le?vperm $in1,$in1,$in1,$inpperm 16597bded2dbSJung-uk Kim vxor $out7,$out7,$rndkey0 16607bded2dbSJung-uk Kim mtctr $rounds 16617bded2dbSJung-uk Kim 16627bded2dbSJung-uk Kim vcipher $out0,$out0,v24 16637bded2dbSJung-uk Kim stvx_u $in0,$x00,$out 16647bded2dbSJung-uk Kim le?vperm $in2,$in2,$in2,$inpperm 16657bded2dbSJung-uk Kim vcipher $out1,$out1,v24 16667bded2dbSJung-uk Kim stvx_u $in1,$x10,$out 16677bded2dbSJung-uk Kim le?vperm $in3,$in3,$in3,$inpperm 16687bded2dbSJung-uk Kim vcipher $out2,$out2,v24 16697bded2dbSJung-uk Kim stvx_u $in2,$x20,$out 16707bded2dbSJung-uk Kim le?vperm $in4,$in4,$in4,$inpperm 16717bded2dbSJung-uk Kim vcipher $out3,$out3,v24 16727bded2dbSJung-uk Kim stvx_u $in3,$x30,$out 16737bded2dbSJung-uk Kim le?vperm $in5,$in5,$in5,$inpperm 16747bded2dbSJung-uk Kim vcipher $out4,$out4,v24 16757bded2dbSJung-uk Kim stvx_u $in4,$x40,$out 16767bded2dbSJung-uk Kim le?vperm $in6,$in6,$in6,$inpperm 16777bded2dbSJung-uk Kim vcipher $out5,$out5,v24 16787bded2dbSJung-uk Kim stvx_u $in5,$x50,$out 16797bded2dbSJung-uk Kim le?vperm $in7,$in7,$in7,$inpperm 16807bded2dbSJung-uk Kim vcipher $out6,$out6,v24 16817bded2dbSJung-uk Kim stvx_u $in6,$x60,$out 16827bded2dbSJung-uk Kim vcipher $out7,$out7,v24 16837bded2dbSJung-uk Kim stvx_u $in7,$x70,$out 16847bded2dbSJung-uk Kim addi $out,$out,0x80 16857bded2dbSJung-uk Kim 16867bded2dbSJung-uk Kim b Loop_ctr32_enc8x_middle 16877bded2dbSJung-uk Kim 16887bded2dbSJung-uk Kim.align 5 16897bded2dbSJung-uk KimLctr32_enc8x_break: 16907bded2dbSJung-uk Kim cmpwi $len,-0x60 16917bded2dbSJung-uk Kim blt Lctr32_enc8x_one 16927bded2dbSJung-uk Kim nop 16937bded2dbSJung-uk Kim beq Lctr32_enc8x_two 16947bded2dbSJung-uk Kim cmpwi $len,-0x40 16957bded2dbSJung-uk Kim blt Lctr32_enc8x_three 16967bded2dbSJung-uk Kim nop 16977bded2dbSJung-uk Kim beq Lctr32_enc8x_four 16987bded2dbSJung-uk Kim cmpwi $len,-0x20 16997bded2dbSJung-uk Kim blt Lctr32_enc8x_five 17007bded2dbSJung-uk Kim nop 17017bded2dbSJung-uk Kim beq Lctr32_enc8x_six 17027bded2dbSJung-uk Kim cmpwi $len,0x00 17037bded2dbSJung-uk Kim blt Lctr32_enc8x_seven 17047bded2dbSJung-uk Kim 17057bded2dbSJung-uk KimLctr32_enc8x_eight: 17067bded2dbSJung-uk Kim vcipherlast $out0,$out0,$in0 17077bded2dbSJung-uk Kim vcipherlast $out1,$out1,$in1 17087bded2dbSJung-uk Kim vcipherlast $out2,$out2,$in2 17097bded2dbSJung-uk Kim vcipherlast $out3,$out3,$in3 17107bded2dbSJung-uk Kim vcipherlast $out4,$out4,$in4 17117bded2dbSJung-uk Kim vcipherlast $out5,$out5,$in5 17127bded2dbSJung-uk Kim vcipherlast $out6,$out6,$in6 17137bded2dbSJung-uk Kim vcipherlast $out7,$out7,$in7 17147bded2dbSJung-uk Kim 17157bded2dbSJung-uk Kim le?vperm $out0,$out0,$out0,$inpperm 17167bded2dbSJung-uk Kim le?vperm $out1,$out1,$out1,$inpperm 17177bded2dbSJung-uk Kim stvx_u $out0,$x00,$out 17187bded2dbSJung-uk Kim le?vperm $out2,$out2,$out2,$inpperm 17197bded2dbSJung-uk Kim stvx_u $out1,$x10,$out 17207bded2dbSJung-uk Kim le?vperm $out3,$out3,$out3,$inpperm 17217bded2dbSJung-uk Kim stvx_u $out2,$x20,$out 17227bded2dbSJung-uk Kim le?vperm $out4,$out4,$out4,$inpperm 17237bded2dbSJung-uk Kim stvx_u $out3,$x30,$out 17247bded2dbSJung-uk Kim le?vperm $out5,$out5,$out5,$inpperm 17257bded2dbSJung-uk Kim stvx_u $out4,$x40,$out 17267bded2dbSJung-uk Kim le?vperm $out6,$out6,$out6,$inpperm 17277bded2dbSJung-uk Kim stvx_u $out5,$x50,$out 17287bded2dbSJung-uk Kim le?vperm $out7,$out7,$out7,$inpperm 17297bded2dbSJung-uk Kim stvx_u $out6,$x60,$out 17307bded2dbSJung-uk Kim stvx_u $out7,$x70,$out 17317bded2dbSJung-uk Kim addi $out,$out,0x80 17327bded2dbSJung-uk Kim b Lctr32_enc8x_done 17337bded2dbSJung-uk Kim 17347bded2dbSJung-uk Kim.align 5 17357bded2dbSJung-uk KimLctr32_enc8x_seven: 17367bded2dbSJung-uk Kim vcipherlast $out0,$out0,$in1 17377bded2dbSJung-uk Kim vcipherlast $out1,$out1,$in2 17387bded2dbSJung-uk Kim vcipherlast $out2,$out2,$in3 17397bded2dbSJung-uk Kim vcipherlast $out3,$out3,$in4 17407bded2dbSJung-uk Kim vcipherlast $out4,$out4,$in5 17417bded2dbSJung-uk Kim vcipherlast $out5,$out5,$in6 17427bded2dbSJung-uk Kim vcipherlast $out6,$out6,$in7 17437bded2dbSJung-uk Kim 17447bded2dbSJung-uk Kim le?vperm $out0,$out0,$out0,$inpperm 17457bded2dbSJung-uk Kim le?vperm $out1,$out1,$out1,$inpperm 17467bded2dbSJung-uk Kim stvx_u $out0,$x00,$out 17477bded2dbSJung-uk Kim le?vperm $out2,$out2,$out2,$inpperm 17487bded2dbSJung-uk Kim stvx_u $out1,$x10,$out 17497bded2dbSJung-uk Kim le?vperm $out3,$out3,$out3,$inpperm 17507bded2dbSJung-uk Kim stvx_u $out2,$x20,$out 17517bded2dbSJung-uk Kim le?vperm $out4,$out4,$out4,$inpperm 17527bded2dbSJung-uk Kim stvx_u $out3,$x30,$out 17537bded2dbSJung-uk Kim le?vperm $out5,$out5,$out5,$inpperm 17547bded2dbSJung-uk Kim stvx_u $out4,$x40,$out 17557bded2dbSJung-uk Kim le?vperm $out6,$out6,$out6,$inpperm 17567bded2dbSJung-uk Kim stvx_u $out5,$x50,$out 17577bded2dbSJung-uk Kim stvx_u $out6,$x60,$out 17587bded2dbSJung-uk Kim addi $out,$out,0x70 17597bded2dbSJung-uk Kim b Lctr32_enc8x_done 17607bded2dbSJung-uk Kim 17617bded2dbSJung-uk Kim.align 5 17627bded2dbSJung-uk KimLctr32_enc8x_six: 17637bded2dbSJung-uk Kim vcipherlast $out0,$out0,$in2 17647bded2dbSJung-uk Kim vcipherlast $out1,$out1,$in3 17657bded2dbSJung-uk Kim vcipherlast $out2,$out2,$in4 17667bded2dbSJung-uk Kim vcipherlast $out3,$out3,$in5 17677bded2dbSJung-uk Kim vcipherlast $out4,$out4,$in6 17687bded2dbSJung-uk Kim vcipherlast $out5,$out5,$in7 17697bded2dbSJung-uk Kim 17707bded2dbSJung-uk Kim le?vperm $out0,$out0,$out0,$inpperm 17717bded2dbSJung-uk Kim le?vperm $out1,$out1,$out1,$inpperm 17727bded2dbSJung-uk Kim stvx_u $out0,$x00,$out 17737bded2dbSJung-uk Kim le?vperm $out2,$out2,$out2,$inpperm 17747bded2dbSJung-uk Kim stvx_u $out1,$x10,$out 17757bded2dbSJung-uk Kim le?vperm $out3,$out3,$out3,$inpperm 17767bded2dbSJung-uk Kim stvx_u $out2,$x20,$out 17777bded2dbSJung-uk Kim le?vperm $out4,$out4,$out4,$inpperm 17787bded2dbSJung-uk Kim stvx_u $out3,$x30,$out 17797bded2dbSJung-uk Kim le?vperm $out5,$out5,$out5,$inpperm 17807bded2dbSJung-uk Kim stvx_u $out4,$x40,$out 17817bded2dbSJung-uk Kim stvx_u $out5,$x50,$out 17827bded2dbSJung-uk Kim addi $out,$out,0x60 17837bded2dbSJung-uk Kim b Lctr32_enc8x_done 17847bded2dbSJung-uk Kim 17857bded2dbSJung-uk Kim.align 5 17867bded2dbSJung-uk KimLctr32_enc8x_five: 17877bded2dbSJung-uk Kim vcipherlast $out0,$out0,$in3 17887bded2dbSJung-uk Kim vcipherlast $out1,$out1,$in4 17897bded2dbSJung-uk Kim vcipherlast $out2,$out2,$in5 17907bded2dbSJung-uk Kim vcipherlast $out3,$out3,$in6 17917bded2dbSJung-uk Kim vcipherlast $out4,$out4,$in7 17927bded2dbSJung-uk Kim 17937bded2dbSJung-uk Kim le?vperm $out0,$out0,$out0,$inpperm 17947bded2dbSJung-uk Kim le?vperm $out1,$out1,$out1,$inpperm 17957bded2dbSJung-uk Kim stvx_u $out0,$x00,$out 17967bded2dbSJung-uk Kim le?vperm $out2,$out2,$out2,$inpperm 17977bded2dbSJung-uk Kim stvx_u $out1,$x10,$out 17987bded2dbSJung-uk Kim le?vperm $out3,$out3,$out3,$inpperm 17997bded2dbSJung-uk Kim stvx_u $out2,$x20,$out 18007bded2dbSJung-uk Kim le?vperm $out4,$out4,$out4,$inpperm 18017bded2dbSJung-uk Kim stvx_u $out3,$x30,$out 18027bded2dbSJung-uk Kim stvx_u $out4,$x40,$out 18037bded2dbSJung-uk Kim addi $out,$out,0x50 18047bded2dbSJung-uk Kim b Lctr32_enc8x_done 18057bded2dbSJung-uk Kim 18067bded2dbSJung-uk Kim.align 5 18077bded2dbSJung-uk KimLctr32_enc8x_four: 18087bded2dbSJung-uk Kim vcipherlast $out0,$out0,$in4 18097bded2dbSJung-uk Kim vcipherlast $out1,$out1,$in5 18107bded2dbSJung-uk Kim vcipherlast $out2,$out2,$in6 18117bded2dbSJung-uk Kim vcipherlast $out3,$out3,$in7 18127bded2dbSJung-uk Kim 18137bded2dbSJung-uk Kim le?vperm $out0,$out0,$out0,$inpperm 18147bded2dbSJung-uk Kim le?vperm $out1,$out1,$out1,$inpperm 18157bded2dbSJung-uk Kim stvx_u $out0,$x00,$out 18167bded2dbSJung-uk Kim le?vperm $out2,$out2,$out2,$inpperm 18177bded2dbSJung-uk Kim stvx_u $out1,$x10,$out 18187bded2dbSJung-uk Kim le?vperm $out3,$out3,$out3,$inpperm 18197bded2dbSJung-uk Kim stvx_u $out2,$x20,$out 18207bded2dbSJung-uk Kim stvx_u $out3,$x30,$out 18217bded2dbSJung-uk Kim addi $out,$out,0x40 18227bded2dbSJung-uk Kim b Lctr32_enc8x_done 18237bded2dbSJung-uk Kim 18247bded2dbSJung-uk Kim.align 5 18257bded2dbSJung-uk KimLctr32_enc8x_three: 18267bded2dbSJung-uk Kim vcipherlast $out0,$out0,$in5 18277bded2dbSJung-uk Kim vcipherlast $out1,$out1,$in6 18287bded2dbSJung-uk Kim vcipherlast $out2,$out2,$in7 18297bded2dbSJung-uk Kim 18307bded2dbSJung-uk Kim le?vperm $out0,$out0,$out0,$inpperm 18317bded2dbSJung-uk Kim le?vperm $out1,$out1,$out1,$inpperm 18327bded2dbSJung-uk Kim stvx_u $out0,$x00,$out 18337bded2dbSJung-uk Kim le?vperm $out2,$out2,$out2,$inpperm 18347bded2dbSJung-uk Kim stvx_u $out1,$x10,$out 18357bded2dbSJung-uk Kim stvx_u $out2,$x20,$out 18367bded2dbSJung-uk Kim addi $out,$out,0x30 1837610a21fdSJung-uk Kim b Lctr32_enc8x_done 18387bded2dbSJung-uk Kim 18397bded2dbSJung-uk Kim.align 5 18407bded2dbSJung-uk KimLctr32_enc8x_two: 18417bded2dbSJung-uk Kim vcipherlast $out0,$out0,$in6 18427bded2dbSJung-uk Kim vcipherlast $out1,$out1,$in7 18437bded2dbSJung-uk Kim 18447bded2dbSJung-uk Kim le?vperm $out0,$out0,$out0,$inpperm 18457bded2dbSJung-uk Kim le?vperm $out1,$out1,$out1,$inpperm 18467bded2dbSJung-uk Kim stvx_u $out0,$x00,$out 18477bded2dbSJung-uk Kim stvx_u $out1,$x10,$out 18487bded2dbSJung-uk Kim addi $out,$out,0x20 1849610a21fdSJung-uk Kim b Lctr32_enc8x_done 18507bded2dbSJung-uk Kim 18517bded2dbSJung-uk Kim.align 5 18527bded2dbSJung-uk KimLctr32_enc8x_one: 18537bded2dbSJung-uk Kim vcipherlast $out0,$out0,$in7 18547bded2dbSJung-uk Kim 18557bded2dbSJung-uk Kim le?vperm $out0,$out0,$out0,$inpperm 18567bded2dbSJung-uk Kim stvx_u $out0,0,$out 18577bded2dbSJung-uk Kim addi $out,$out,0x10 18587bded2dbSJung-uk Kim 18597bded2dbSJung-uk KimLctr32_enc8x_done: 18607bded2dbSJung-uk Kim li r10,`$FRAME+15` 18617bded2dbSJung-uk Kim li r11,`$FRAME+31` 18627bded2dbSJung-uk Kim stvx $inpperm,r10,$sp # wipe copies of round keys 18637bded2dbSJung-uk Kim addi r10,r10,32 18647bded2dbSJung-uk Kim stvx $inpperm,r11,$sp 18657bded2dbSJung-uk Kim addi r11,r11,32 18667bded2dbSJung-uk Kim stvx $inpperm,r10,$sp 18677bded2dbSJung-uk Kim addi r10,r10,32 18687bded2dbSJung-uk Kim stvx $inpperm,r11,$sp 18697bded2dbSJung-uk Kim addi r11,r11,32 18707bded2dbSJung-uk Kim stvx $inpperm,r10,$sp 18717bded2dbSJung-uk Kim addi r10,r10,32 18727bded2dbSJung-uk Kim stvx $inpperm,r11,$sp 18737bded2dbSJung-uk Kim addi r11,r11,32 18747bded2dbSJung-uk Kim stvx $inpperm,r10,$sp 18757bded2dbSJung-uk Kim addi r10,r10,32 18767bded2dbSJung-uk Kim stvx $inpperm,r11,$sp 18777bded2dbSJung-uk Kim addi r11,r11,32 18787bded2dbSJung-uk Kim 18797bded2dbSJung-uk Kim mtspr 256,$vrsave 18807bded2dbSJung-uk Kim lvx v20,r10,$sp # ABI says so 18817bded2dbSJung-uk Kim addi r10,r10,32 18827bded2dbSJung-uk Kim lvx v21,r11,$sp 18837bded2dbSJung-uk Kim addi r11,r11,32 18847bded2dbSJung-uk Kim lvx v22,r10,$sp 18857bded2dbSJung-uk Kim addi r10,r10,32 18867bded2dbSJung-uk Kim lvx v23,r11,$sp 18877bded2dbSJung-uk Kim addi r11,r11,32 18887bded2dbSJung-uk Kim lvx v24,r10,$sp 18897bded2dbSJung-uk Kim addi r10,r10,32 18907bded2dbSJung-uk Kim lvx v25,r11,$sp 18917bded2dbSJung-uk Kim addi r11,r11,32 18927bded2dbSJung-uk Kim lvx v26,r10,$sp 18937bded2dbSJung-uk Kim addi r10,r10,32 18947bded2dbSJung-uk Kim lvx v27,r11,$sp 18957bded2dbSJung-uk Kim addi r11,r11,32 18967bded2dbSJung-uk Kim lvx v28,r10,$sp 18977bded2dbSJung-uk Kim addi r10,r10,32 18987bded2dbSJung-uk Kim lvx v29,r11,$sp 18997bded2dbSJung-uk Kim addi r11,r11,32 19007bded2dbSJung-uk Kim lvx v30,r10,$sp 19017bded2dbSJung-uk Kim lvx v31,r11,$sp 19027bded2dbSJung-uk Kim $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 19037bded2dbSJung-uk Kim $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 19047bded2dbSJung-uk Kim $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 19057bded2dbSJung-uk Kim $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 19067bded2dbSJung-uk Kim $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 19077bded2dbSJung-uk Kim $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 19087bded2dbSJung-uk Kim addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 19097bded2dbSJung-uk Kim blr 19107bded2dbSJung-uk Kim .long 0 19117bded2dbSJung-uk Kim .byte 0,12,0x04,0,0x80,6,6,0 19127bded2dbSJung-uk Kim .long 0 19137bded2dbSJung-uk Kim.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks 19147bded2dbSJung-uk Kim___ 19157bded2dbSJung-uk Kim}} }}} 19167bded2dbSJung-uk Kim 1917e71b7053SJung-uk Kim######################################################################### 1918e71b7053SJung-uk Kim{{{ # XTS procedures # 1919e71b7053SJung-uk Kim# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # 1920e71b7053SJung-uk Kim# const AES_KEY *key1, const AES_KEY *key2, # 1921e71b7053SJung-uk Kim# [const] unsigned char iv[16]); # 1922e71b7053SJung-uk Kim# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # 1923e71b7053SJung-uk Kim# input tweak value is assumed to be encrypted already, and last tweak # 1924e71b7053SJung-uk Kim# value, one suitable for consecutive call on same chunk of data, is # 1925e71b7053SJung-uk Kim# written back to original buffer. In addition, in "tweak chaining" # 1926e71b7053SJung-uk Kim# mode only complete input blocks are processed. # 1927e71b7053SJung-uk Kim 1928e71b7053SJung-uk Kimmy ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); 1929e71b7053SJung-uk Kimmy ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); 1930e71b7053SJung-uk Kimmy ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); 1931e71b7053SJung-uk Kimmy ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); 1932e71b7053SJung-uk Kimmy $taillen = $key2; 1933e71b7053SJung-uk Kim 1934e71b7053SJung-uk Kim ($inp,$idx) = ($idx,$inp); # reassign 1935e71b7053SJung-uk Kim 1936e71b7053SJung-uk Kim$code.=<<___; 1937e71b7053SJung-uk Kim.globl .${prefix}_xts_encrypt 1938e71b7053SJung-uk Kim.align 5 1939e71b7053SJung-uk Kim.${prefix}_xts_encrypt: 1940e71b7053SJung-uk Kim mr $inp,r3 # reassign 1941e71b7053SJung-uk Kim li r3,-1 1942e71b7053SJung-uk Kim ${UCMP}i $len,16 1943e71b7053SJung-uk Kim bltlr- 1944e71b7053SJung-uk Kim 1945e71b7053SJung-uk Kim lis r0,0xfff0 1946e71b7053SJung-uk Kim mfspr r12,256 # save vrsave 1947e71b7053SJung-uk Kim li r11,0 1948e71b7053SJung-uk Kim mtspr 256,r0 1949e71b7053SJung-uk Kim 1950e71b7053SJung-uk Kim vspltisb $seven,0x07 # 0x070707..07 1951e71b7053SJung-uk Kim le?lvsl $leperm,r11,r11 1952e71b7053SJung-uk Kim le?vspltisb $tmp,0x0f 1953e71b7053SJung-uk Kim le?vxor $leperm,$leperm,$seven 1954e71b7053SJung-uk Kim 1955e71b7053SJung-uk Kim li $idx,15 1956e71b7053SJung-uk Kim lvx $tweak,0,$ivp # load [unaligned] iv 1957e71b7053SJung-uk Kim lvsl $inpperm,0,$ivp 1958e71b7053SJung-uk Kim lvx $inptail,$idx,$ivp 1959e71b7053SJung-uk Kim le?vxor $inpperm,$inpperm,$tmp 1960e71b7053SJung-uk Kim vperm $tweak,$tweak,$inptail,$inpperm 1961e71b7053SJung-uk Kim 1962e71b7053SJung-uk Kim neg r11,$inp 1963e71b7053SJung-uk Kim lvsr $inpperm,0,r11 # prepare for unaligned load 1964e71b7053SJung-uk Kim lvx $inout,0,$inp 1965e71b7053SJung-uk Kim addi $inp,$inp,15 # 15 is not typo 1966e71b7053SJung-uk Kim le?vxor $inpperm,$inpperm,$tmp 1967e71b7053SJung-uk Kim 1968e71b7053SJung-uk Kim ${UCMP}i $key2,0 # key2==NULL? 1969e71b7053SJung-uk Kim beq Lxts_enc_no_key2 1970e71b7053SJung-uk Kim 1971e71b7053SJung-uk Kim ?lvsl $keyperm,0,$key2 # prepare for unaligned key 1972e71b7053SJung-uk Kim lwz $rounds,240($key2) 1973e71b7053SJung-uk Kim srwi $rounds,$rounds,1 1974e71b7053SJung-uk Kim subi $rounds,$rounds,1 1975e71b7053SJung-uk Kim li $idx,16 1976e71b7053SJung-uk Kim 1977e71b7053SJung-uk Kim lvx $rndkey0,0,$key2 1978e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key2 1979e71b7053SJung-uk Kim addi $idx,$idx,16 1980e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1981e71b7053SJung-uk Kim vxor $tweak,$tweak,$rndkey0 1982e71b7053SJung-uk Kim lvx $rndkey0,$idx,$key2 1983e71b7053SJung-uk Kim addi $idx,$idx,16 1984e71b7053SJung-uk Kim mtctr $rounds 1985e71b7053SJung-uk Kim 1986e71b7053SJung-uk KimLtweak_xts_enc: 1987e71b7053SJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1988e71b7053SJung-uk Kim vcipher $tweak,$tweak,$rndkey1 1989e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key2 1990e71b7053SJung-uk Kim addi $idx,$idx,16 1991e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 1992e71b7053SJung-uk Kim vcipher $tweak,$tweak,$rndkey0 1993e71b7053SJung-uk Kim lvx $rndkey0,$idx,$key2 1994e71b7053SJung-uk Kim addi $idx,$idx,16 1995e71b7053SJung-uk Kim bdnz Ltweak_xts_enc 1996e71b7053SJung-uk Kim 1997e71b7053SJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 1998e71b7053SJung-uk Kim vcipher $tweak,$tweak,$rndkey1 1999e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key2 2000e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2001e71b7053SJung-uk Kim vcipherlast $tweak,$tweak,$rndkey0 2002e71b7053SJung-uk Kim 2003e71b7053SJung-uk Kim li $ivp,0 # don't chain the tweak 2004e71b7053SJung-uk Kim b Lxts_enc 2005e71b7053SJung-uk Kim 2006e71b7053SJung-uk KimLxts_enc_no_key2: 2007e71b7053SJung-uk Kim li $idx,-16 2008e71b7053SJung-uk Kim and $len,$len,$idx # in "tweak chaining" 2009e71b7053SJung-uk Kim # mode only complete 2010e71b7053SJung-uk Kim # blocks are processed 2011e71b7053SJung-uk KimLxts_enc: 2012e71b7053SJung-uk Kim lvx $inptail,0,$inp 2013e71b7053SJung-uk Kim addi $inp,$inp,16 2014e71b7053SJung-uk Kim 2015e71b7053SJung-uk Kim ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2016e71b7053SJung-uk Kim lwz $rounds,240($key1) 2017e71b7053SJung-uk Kim srwi $rounds,$rounds,1 2018e71b7053SJung-uk Kim subi $rounds,$rounds,1 2019e71b7053SJung-uk Kim li $idx,16 2020e71b7053SJung-uk Kim 2021e71b7053SJung-uk Kim vslb $eighty7,$seven,$seven # 0x808080..80 2022e71b7053SJung-uk Kim vor $eighty7,$eighty7,$seven # 0x878787..87 2023e71b7053SJung-uk Kim vspltisb $tmp,1 # 0x010101..01 2024e71b7053SJung-uk Kim vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2025e71b7053SJung-uk Kim 2026e71b7053SJung-uk Kim ${UCMP}i $len,96 2027e71b7053SJung-uk Kim bge _aesp8_xts_encrypt6x 2028e71b7053SJung-uk Kim 2029e71b7053SJung-uk Kim andi. $taillen,$len,15 2030e71b7053SJung-uk Kim subic r0,$len,32 2031e71b7053SJung-uk Kim subi $taillen,$taillen,16 2032e71b7053SJung-uk Kim subfe r0,r0,r0 2033e71b7053SJung-uk Kim and r0,r0,$taillen 2034e71b7053SJung-uk Kim add $inp,$inp,r0 2035e71b7053SJung-uk Kim 2036e71b7053SJung-uk Kim lvx $rndkey0,0,$key1 2037e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key1 2038e71b7053SJung-uk Kim addi $idx,$idx,16 2039e71b7053SJung-uk Kim vperm $inout,$inout,$inptail,$inpperm 2040e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2041e71b7053SJung-uk Kim vxor $inout,$inout,$tweak 2042e71b7053SJung-uk Kim vxor $inout,$inout,$rndkey0 2043e71b7053SJung-uk Kim lvx $rndkey0,$idx,$key1 2044e71b7053SJung-uk Kim addi $idx,$idx,16 2045e71b7053SJung-uk Kim mtctr $rounds 2046e71b7053SJung-uk Kim b Loop_xts_enc 2047e71b7053SJung-uk Kim 2048e71b7053SJung-uk Kim.align 5 2049e71b7053SJung-uk KimLoop_xts_enc: 2050e71b7053SJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2051e71b7053SJung-uk Kim vcipher $inout,$inout,$rndkey1 2052e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key1 2053e71b7053SJung-uk Kim addi $idx,$idx,16 2054e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2055e71b7053SJung-uk Kim vcipher $inout,$inout,$rndkey0 2056e71b7053SJung-uk Kim lvx $rndkey0,$idx,$key1 2057e71b7053SJung-uk Kim addi $idx,$idx,16 2058e71b7053SJung-uk Kim bdnz Loop_xts_enc 2059e71b7053SJung-uk Kim 2060e71b7053SJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2061e71b7053SJung-uk Kim vcipher $inout,$inout,$rndkey1 2062e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key1 2063e71b7053SJung-uk Kim li $idx,16 2064e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2065e71b7053SJung-uk Kim vxor $rndkey0,$rndkey0,$tweak 2066e71b7053SJung-uk Kim vcipherlast $output,$inout,$rndkey0 2067e71b7053SJung-uk Kim 2068e71b7053SJung-uk Kim le?vperm $tmp,$output,$output,$leperm 2069e71b7053SJung-uk Kim be?nop 2070e71b7053SJung-uk Kim le?stvx_u $tmp,0,$out 2071e71b7053SJung-uk Kim be?stvx_u $output,0,$out 2072e71b7053SJung-uk Kim addi $out,$out,16 2073e71b7053SJung-uk Kim 2074e71b7053SJung-uk Kim subic. $len,$len,16 2075e71b7053SJung-uk Kim beq Lxts_enc_done 2076e71b7053SJung-uk Kim 2077e71b7053SJung-uk Kim vmr $inout,$inptail 2078e71b7053SJung-uk Kim lvx $inptail,0,$inp 2079e71b7053SJung-uk Kim addi $inp,$inp,16 2080e71b7053SJung-uk Kim lvx $rndkey0,0,$key1 2081e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key1 2082e71b7053SJung-uk Kim addi $idx,$idx,16 2083e71b7053SJung-uk Kim 2084e71b7053SJung-uk Kim subic r0,$len,32 2085e71b7053SJung-uk Kim subfe r0,r0,r0 2086e71b7053SJung-uk Kim and r0,r0,$taillen 2087e71b7053SJung-uk Kim add $inp,$inp,r0 2088e71b7053SJung-uk Kim 2089e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2090e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2091e71b7053SJung-uk Kim vsldoi $tmp,$tmp,$tmp,15 2092e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2093e71b7053SJung-uk Kim vxor $tweak,$tweak,$tmp 2094e71b7053SJung-uk Kim 2095e71b7053SJung-uk Kim vperm $inout,$inout,$inptail,$inpperm 2096e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2097e71b7053SJung-uk Kim vxor $inout,$inout,$tweak 2098e71b7053SJung-uk Kim vxor $output,$output,$rndkey0 # just in case $len<16 2099e71b7053SJung-uk Kim vxor $inout,$inout,$rndkey0 2100e71b7053SJung-uk Kim lvx $rndkey0,$idx,$key1 2101e71b7053SJung-uk Kim addi $idx,$idx,16 2102e71b7053SJung-uk Kim 2103e71b7053SJung-uk Kim mtctr $rounds 2104e71b7053SJung-uk Kim ${UCMP}i $len,16 2105e71b7053SJung-uk Kim bge Loop_xts_enc 2106e71b7053SJung-uk Kim 2107e71b7053SJung-uk Kim vxor $output,$output,$tweak 2108e71b7053SJung-uk Kim lvsr $inpperm,0,$len # $inpperm is no longer needed 2109e71b7053SJung-uk Kim vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2110e71b7053SJung-uk Kim vspltisb $tmp,-1 2111e71b7053SJung-uk Kim vperm $inptail,$inptail,$tmp,$inpperm 2112e71b7053SJung-uk Kim vsel $inout,$inout,$output,$inptail 2113e71b7053SJung-uk Kim 2114e71b7053SJung-uk Kim subi r11,$out,17 2115e71b7053SJung-uk Kim subi $out,$out,16 2116e71b7053SJung-uk Kim mtctr $len 2117e71b7053SJung-uk Kim li $len,16 2118e71b7053SJung-uk KimLoop_xts_enc_steal: 2119e71b7053SJung-uk Kim lbzu r0,1(r11) 2120e71b7053SJung-uk Kim stb r0,16(r11) 2121e71b7053SJung-uk Kim bdnz Loop_xts_enc_steal 2122e71b7053SJung-uk Kim 2123e71b7053SJung-uk Kim mtctr $rounds 2124e71b7053SJung-uk Kim b Loop_xts_enc # one more time... 2125e71b7053SJung-uk Kim 2126e71b7053SJung-uk KimLxts_enc_done: 2127e71b7053SJung-uk Kim ${UCMP}i $ivp,0 2128e71b7053SJung-uk Kim beq Lxts_enc_ret 2129e71b7053SJung-uk Kim 2130e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2131e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2132e71b7053SJung-uk Kim vsldoi $tmp,$tmp,$tmp,15 2133e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2134e71b7053SJung-uk Kim vxor $tweak,$tweak,$tmp 2135e71b7053SJung-uk Kim 2136e71b7053SJung-uk Kim le?vperm $tweak,$tweak,$tweak,$leperm 2137e71b7053SJung-uk Kim stvx_u $tweak,0,$ivp 2138e71b7053SJung-uk Kim 2139e71b7053SJung-uk KimLxts_enc_ret: 2140e71b7053SJung-uk Kim mtspr 256,r12 # restore vrsave 2141e71b7053SJung-uk Kim li r3,0 2142e71b7053SJung-uk Kim blr 2143e71b7053SJung-uk Kim .long 0 2144e71b7053SJung-uk Kim .byte 0,12,0x04,0,0x80,6,6,0 2145e71b7053SJung-uk Kim .long 0 2146e71b7053SJung-uk Kim.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt 2147e71b7053SJung-uk Kim 2148e71b7053SJung-uk Kim.globl .${prefix}_xts_decrypt 2149e71b7053SJung-uk Kim.align 5 2150e71b7053SJung-uk Kim.${prefix}_xts_decrypt: 2151e71b7053SJung-uk Kim mr $inp,r3 # reassign 2152e71b7053SJung-uk Kim li r3,-1 2153e71b7053SJung-uk Kim ${UCMP}i $len,16 2154e71b7053SJung-uk Kim bltlr- 2155e71b7053SJung-uk Kim 2156e71b7053SJung-uk Kim lis r0,0xfff8 2157e71b7053SJung-uk Kim mfspr r12,256 # save vrsave 2158e71b7053SJung-uk Kim li r11,0 2159e71b7053SJung-uk Kim mtspr 256,r0 2160e71b7053SJung-uk Kim 2161e71b7053SJung-uk Kim andi. r0,$len,15 2162e71b7053SJung-uk Kim neg r0,r0 2163e71b7053SJung-uk Kim andi. r0,r0,16 2164e71b7053SJung-uk Kim sub $len,$len,r0 2165e71b7053SJung-uk Kim 2166e71b7053SJung-uk Kim vspltisb $seven,0x07 # 0x070707..07 2167e71b7053SJung-uk Kim le?lvsl $leperm,r11,r11 2168e71b7053SJung-uk Kim le?vspltisb $tmp,0x0f 2169e71b7053SJung-uk Kim le?vxor $leperm,$leperm,$seven 2170e71b7053SJung-uk Kim 2171e71b7053SJung-uk Kim li $idx,15 2172e71b7053SJung-uk Kim lvx $tweak,0,$ivp # load [unaligned] iv 2173e71b7053SJung-uk Kim lvsl $inpperm,0,$ivp 2174e71b7053SJung-uk Kim lvx $inptail,$idx,$ivp 2175e71b7053SJung-uk Kim le?vxor $inpperm,$inpperm,$tmp 2176e71b7053SJung-uk Kim vperm $tweak,$tweak,$inptail,$inpperm 2177e71b7053SJung-uk Kim 2178e71b7053SJung-uk Kim neg r11,$inp 2179e71b7053SJung-uk Kim lvsr $inpperm,0,r11 # prepare for unaligned load 2180e71b7053SJung-uk Kim lvx $inout,0,$inp 2181e71b7053SJung-uk Kim addi $inp,$inp,15 # 15 is not typo 2182e71b7053SJung-uk Kim le?vxor $inpperm,$inpperm,$tmp 2183e71b7053SJung-uk Kim 2184e71b7053SJung-uk Kim ${UCMP}i $key2,0 # key2==NULL? 2185e71b7053SJung-uk Kim beq Lxts_dec_no_key2 2186e71b7053SJung-uk Kim 2187e71b7053SJung-uk Kim ?lvsl $keyperm,0,$key2 # prepare for unaligned key 2188e71b7053SJung-uk Kim lwz $rounds,240($key2) 2189e71b7053SJung-uk Kim srwi $rounds,$rounds,1 2190e71b7053SJung-uk Kim subi $rounds,$rounds,1 2191e71b7053SJung-uk Kim li $idx,16 2192e71b7053SJung-uk Kim 2193e71b7053SJung-uk Kim lvx $rndkey0,0,$key2 2194e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key2 2195e71b7053SJung-uk Kim addi $idx,$idx,16 2196e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2197e71b7053SJung-uk Kim vxor $tweak,$tweak,$rndkey0 2198e71b7053SJung-uk Kim lvx $rndkey0,$idx,$key2 2199e71b7053SJung-uk Kim addi $idx,$idx,16 2200e71b7053SJung-uk Kim mtctr $rounds 2201e71b7053SJung-uk Kim 2202e71b7053SJung-uk KimLtweak_xts_dec: 2203e71b7053SJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2204e71b7053SJung-uk Kim vcipher $tweak,$tweak,$rndkey1 2205e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key2 2206e71b7053SJung-uk Kim addi $idx,$idx,16 2207e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2208e71b7053SJung-uk Kim vcipher $tweak,$tweak,$rndkey0 2209e71b7053SJung-uk Kim lvx $rndkey0,$idx,$key2 2210e71b7053SJung-uk Kim addi $idx,$idx,16 2211e71b7053SJung-uk Kim bdnz Ltweak_xts_dec 2212e71b7053SJung-uk Kim 2213e71b7053SJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2214e71b7053SJung-uk Kim vcipher $tweak,$tweak,$rndkey1 2215e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key2 2216e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2217e71b7053SJung-uk Kim vcipherlast $tweak,$tweak,$rndkey0 2218e71b7053SJung-uk Kim 2219e71b7053SJung-uk Kim li $ivp,0 # don't chain the tweak 2220e71b7053SJung-uk Kim b Lxts_dec 2221e71b7053SJung-uk Kim 2222e71b7053SJung-uk KimLxts_dec_no_key2: 2223e71b7053SJung-uk Kim neg $idx,$len 2224e71b7053SJung-uk Kim andi. $idx,$idx,15 2225e71b7053SJung-uk Kim add $len,$len,$idx # in "tweak chaining" 2226e71b7053SJung-uk Kim # mode only complete 2227e71b7053SJung-uk Kim # blocks are processed 2228e71b7053SJung-uk KimLxts_dec: 2229e71b7053SJung-uk Kim lvx $inptail,0,$inp 2230e71b7053SJung-uk Kim addi $inp,$inp,16 2231e71b7053SJung-uk Kim 2232e71b7053SJung-uk Kim ?lvsl $keyperm,0,$key1 # prepare for unaligned key 2233e71b7053SJung-uk Kim lwz $rounds,240($key1) 2234e71b7053SJung-uk Kim srwi $rounds,$rounds,1 2235e71b7053SJung-uk Kim subi $rounds,$rounds,1 2236e71b7053SJung-uk Kim li $idx,16 2237e71b7053SJung-uk Kim 2238e71b7053SJung-uk Kim vslb $eighty7,$seven,$seven # 0x808080..80 2239e71b7053SJung-uk Kim vor $eighty7,$eighty7,$seven # 0x878787..87 2240e71b7053SJung-uk Kim vspltisb $tmp,1 # 0x010101..01 2241e71b7053SJung-uk Kim vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 2242e71b7053SJung-uk Kim 2243e71b7053SJung-uk Kim ${UCMP}i $len,96 2244e71b7053SJung-uk Kim bge _aesp8_xts_decrypt6x 2245e71b7053SJung-uk Kim 2246e71b7053SJung-uk Kim lvx $rndkey0,0,$key1 2247e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key1 2248e71b7053SJung-uk Kim addi $idx,$idx,16 2249e71b7053SJung-uk Kim vperm $inout,$inout,$inptail,$inpperm 2250e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2251e71b7053SJung-uk Kim vxor $inout,$inout,$tweak 2252e71b7053SJung-uk Kim vxor $inout,$inout,$rndkey0 2253e71b7053SJung-uk Kim lvx $rndkey0,$idx,$key1 2254e71b7053SJung-uk Kim addi $idx,$idx,16 2255e71b7053SJung-uk Kim mtctr $rounds 2256e71b7053SJung-uk Kim 2257e71b7053SJung-uk Kim ${UCMP}i $len,16 2258e71b7053SJung-uk Kim blt Ltail_xts_dec 2259e71b7053SJung-uk Kim be?b Loop_xts_dec 2260e71b7053SJung-uk Kim 2261e71b7053SJung-uk Kim.align 5 2262e71b7053SJung-uk KimLoop_xts_dec: 2263e71b7053SJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2264e71b7053SJung-uk Kim vncipher $inout,$inout,$rndkey1 2265e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key1 2266e71b7053SJung-uk Kim addi $idx,$idx,16 2267e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2268e71b7053SJung-uk Kim vncipher $inout,$inout,$rndkey0 2269e71b7053SJung-uk Kim lvx $rndkey0,$idx,$key1 2270e71b7053SJung-uk Kim addi $idx,$idx,16 2271e71b7053SJung-uk Kim bdnz Loop_xts_dec 2272e71b7053SJung-uk Kim 2273e71b7053SJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2274e71b7053SJung-uk Kim vncipher $inout,$inout,$rndkey1 2275e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key1 2276e71b7053SJung-uk Kim li $idx,16 2277e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2278e71b7053SJung-uk Kim vxor $rndkey0,$rndkey0,$tweak 2279e71b7053SJung-uk Kim vncipherlast $output,$inout,$rndkey0 2280e71b7053SJung-uk Kim 2281e71b7053SJung-uk Kim le?vperm $tmp,$output,$output,$leperm 2282e71b7053SJung-uk Kim be?nop 2283e71b7053SJung-uk Kim le?stvx_u $tmp,0,$out 2284e71b7053SJung-uk Kim be?stvx_u $output,0,$out 2285e71b7053SJung-uk Kim addi $out,$out,16 2286e71b7053SJung-uk Kim 2287e71b7053SJung-uk Kim subic. $len,$len,16 2288e71b7053SJung-uk Kim beq Lxts_dec_done 2289e71b7053SJung-uk Kim 2290e71b7053SJung-uk Kim vmr $inout,$inptail 2291e71b7053SJung-uk Kim lvx $inptail,0,$inp 2292e71b7053SJung-uk Kim addi $inp,$inp,16 2293e71b7053SJung-uk Kim lvx $rndkey0,0,$key1 2294e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key1 2295e71b7053SJung-uk Kim addi $idx,$idx,16 2296e71b7053SJung-uk Kim 2297e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2298e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2299e71b7053SJung-uk Kim vsldoi $tmp,$tmp,$tmp,15 2300e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2301e71b7053SJung-uk Kim vxor $tweak,$tweak,$tmp 2302e71b7053SJung-uk Kim 2303e71b7053SJung-uk Kim vperm $inout,$inout,$inptail,$inpperm 2304e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2305e71b7053SJung-uk Kim vxor $inout,$inout,$tweak 2306e71b7053SJung-uk Kim vxor $inout,$inout,$rndkey0 2307e71b7053SJung-uk Kim lvx $rndkey0,$idx,$key1 2308e71b7053SJung-uk Kim addi $idx,$idx,16 2309e71b7053SJung-uk Kim 2310e71b7053SJung-uk Kim mtctr $rounds 2311e71b7053SJung-uk Kim ${UCMP}i $len,16 2312e71b7053SJung-uk Kim bge Loop_xts_dec 2313e71b7053SJung-uk Kim 2314e71b7053SJung-uk KimLtail_xts_dec: 2315e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2316e71b7053SJung-uk Kim vaddubm $tweak1,$tweak,$tweak 2317e71b7053SJung-uk Kim vsldoi $tmp,$tmp,$tmp,15 2318e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2319e71b7053SJung-uk Kim vxor $tweak1,$tweak1,$tmp 2320e71b7053SJung-uk Kim 2321e71b7053SJung-uk Kim subi $inp,$inp,16 2322e71b7053SJung-uk Kim add $inp,$inp,$len 2323e71b7053SJung-uk Kim 2324e71b7053SJung-uk Kim vxor $inout,$inout,$tweak # :-( 2325e71b7053SJung-uk Kim vxor $inout,$inout,$tweak1 # :-) 2326e71b7053SJung-uk Kim 2327e71b7053SJung-uk KimLoop_xts_dec_short: 2328e71b7053SJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2329e71b7053SJung-uk Kim vncipher $inout,$inout,$rndkey1 2330e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key1 2331e71b7053SJung-uk Kim addi $idx,$idx,16 2332e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2333e71b7053SJung-uk Kim vncipher $inout,$inout,$rndkey0 2334e71b7053SJung-uk Kim lvx $rndkey0,$idx,$key1 2335e71b7053SJung-uk Kim addi $idx,$idx,16 2336e71b7053SJung-uk Kim bdnz Loop_xts_dec_short 2337e71b7053SJung-uk Kim 2338e71b7053SJung-uk Kim ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm 2339e71b7053SJung-uk Kim vncipher $inout,$inout,$rndkey1 2340e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key1 2341e71b7053SJung-uk Kim li $idx,16 2342e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2343e71b7053SJung-uk Kim vxor $rndkey0,$rndkey0,$tweak1 2344e71b7053SJung-uk Kim vncipherlast $output,$inout,$rndkey0 2345e71b7053SJung-uk Kim 2346e71b7053SJung-uk Kim le?vperm $tmp,$output,$output,$leperm 2347e71b7053SJung-uk Kim be?nop 2348e71b7053SJung-uk Kim le?stvx_u $tmp,0,$out 2349e71b7053SJung-uk Kim be?stvx_u $output,0,$out 2350e71b7053SJung-uk Kim 2351e71b7053SJung-uk Kim vmr $inout,$inptail 2352e71b7053SJung-uk Kim lvx $inptail,0,$inp 2353e71b7053SJung-uk Kim #addi $inp,$inp,16 2354e71b7053SJung-uk Kim lvx $rndkey0,0,$key1 2355e71b7053SJung-uk Kim lvx $rndkey1,$idx,$key1 2356e71b7053SJung-uk Kim addi $idx,$idx,16 2357e71b7053SJung-uk Kim vperm $inout,$inout,$inptail,$inpperm 2358e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm 2359e71b7053SJung-uk Kim 2360e71b7053SJung-uk Kim lvsr $inpperm,0,$len # $inpperm is no longer needed 2361e71b7053SJung-uk Kim vxor $inptail,$inptail,$inptail # $inptail is no longer needed 2362e71b7053SJung-uk Kim vspltisb $tmp,-1 2363e71b7053SJung-uk Kim vperm $inptail,$inptail,$tmp,$inpperm 2364e71b7053SJung-uk Kim vsel $inout,$inout,$output,$inptail 2365e71b7053SJung-uk Kim 2366e71b7053SJung-uk Kim vxor $rndkey0,$rndkey0,$tweak 2367e71b7053SJung-uk Kim vxor $inout,$inout,$rndkey0 2368e71b7053SJung-uk Kim lvx $rndkey0,$idx,$key1 2369e71b7053SJung-uk Kim addi $idx,$idx,16 2370e71b7053SJung-uk Kim 2371e71b7053SJung-uk Kim subi r11,$out,1 2372e71b7053SJung-uk Kim mtctr $len 2373e71b7053SJung-uk Kim li $len,16 2374e71b7053SJung-uk KimLoop_xts_dec_steal: 2375e71b7053SJung-uk Kim lbzu r0,1(r11) 2376e71b7053SJung-uk Kim stb r0,16(r11) 2377e71b7053SJung-uk Kim bdnz Loop_xts_dec_steal 2378e71b7053SJung-uk Kim 2379e71b7053SJung-uk Kim mtctr $rounds 2380e71b7053SJung-uk Kim b Loop_xts_dec # one more time... 2381e71b7053SJung-uk Kim 2382e71b7053SJung-uk KimLxts_dec_done: 2383e71b7053SJung-uk Kim ${UCMP}i $ivp,0 2384e71b7053SJung-uk Kim beq Lxts_dec_ret 2385e71b7053SJung-uk Kim 2386e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2387e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2388e71b7053SJung-uk Kim vsldoi $tmp,$tmp,$tmp,15 2389e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2390e71b7053SJung-uk Kim vxor $tweak,$tweak,$tmp 2391e71b7053SJung-uk Kim 2392e71b7053SJung-uk Kim le?vperm $tweak,$tweak,$tweak,$leperm 2393e71b7053SJung-uk Kim stvx_u $tweak,0,$ivp 2394e71b7053SJung-uk Kim 2395e71b7053SJung-uk KimLxts_dec_ret: 2396e71b7053SJung-uk Kim mtspr 256,r12 # restore vrsave 2397e71b7053SJung-uk Kim li r3,0 2398e71b7053SJung-uk Kim blr 2399e71b7053SJung-uk Kim .long 0 2400e71b7053SJung-uk Kim .byte 0,12,0x04,0,0x80,6,6,0 2401e71b7053SJung-uk Kim .long 0 2402e71b7053SJung-uk Kim.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt 2403e71b7053SJung-uk Kim___ 2404e71b7053SJung-uk Kim######################################################################### 2405e71b7053SJung-uk Kim{{ # Optimized XTS procedures # 2406e71b7053SJung-uk Kimmy $key_=$key2; 2407e71b7053SJung-uk Kimmy ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); 2408e71b7053SJung-uk Kim $x00=0 if ($flavour =~ /osx/); 2409e71b7053SJung-uk Kimmy ($in0, $in1, $in2, $in3, $in4, $in5)=map("v$_",(0..5)); 2410e71b7053SJung-uk Kimmy ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); 2411e71b7053SJung-uk Kimmy ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); 2412e71b7053SJung-uk Kimmy $rndkey0="v23"; # v24-v25 rotating buffer for first found keys 2413e71b7053SJung-uk Kim # v26-v31 last 6 round keys 2414e71b7053SJung-uk Kimmy ($keyperm)=($out0); # aliases with "caller", redundant assignment 2415e71b7053SJung-uk Kimmy $taillen=$x70; 2416e71b7053SJung-uk Kim 2417e71b7053SJung-uk Kim$code.=<<___; 2418e71b7053SJung-uk Kim.align 5 2419e71b7053SJung-uk Kim_aesp8_xts_encrypt6x: 2420e71b7053SJung-uk Kim $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 2421e71b7053SJung-uk Kim mflr r11 2422e71b7053SJung-uk Kim li r7,`$FRAME+8*16+15` 2423e71b7053SJung-uk Kim li r3,`$FRAME+8*16+31` 2424e71b7053SJung-uk Kim $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 2425e71b7053SJung-uk Kim stvx v20,r7,$sp # ABI says so 2426e71b7053SJung-uk Kim addi r7,r7,32 2427e71b7053SJung-uk Kim stvx v21,r3,$sp 2428e71b7053SJung-uk Kim addi r3,r3,32 2429e71b7053SJung-uk Kim stvx v22,r7,$sp 2430e71b7053SJung-uk Kim addi r7,r7,32 2431e71b7053SJung-uk Kim stvx v23,r3,$sp 2432e71b7053SJung-uk Kim addi r3,r3,32 2433e71b7053SJung-uk Kim stvx v24,r7,$sp 2434e71b7053SJung-uk Kim addi r7,r7,32 2435e71b7053SJung-uk Kim stvx v25,r3,$sp 2436e71b7053SJung-uk Kim addi r3,r3,32 2437e71b7053SJung-uk Kim stvx v26,r7,$sp 2438e71b7053SJung-uk Kim addi r7,r7,32 2439e71b7053SJung-uk Kim stvx v27,r3,$sp 2440e71b7053SJung-uk Kim addi r3,r3,32 2441e71b7053SJung-uk Kim stvx v28,r7,$sp 2442e71b7053SJung-uk Kim addi r7,r7,32 2443e71b7053SJung-uk Kim stvx v29,r3,$sp 2444e71b7053SJung-uk Kim addi r3,r3,32 2445e71b7053SJung-uk Kim stvx v30,r7,$sp 2446e71b7053SJung-uk Kim stvx v31,r3,$sp 2447e71b7053SJung-uk Kim li r0,-1 2448e71b7053SJung-uk Kim stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 2449e71b7053SJung-uk Kim li $x10,0x10 2450e71b7053SJung-uk Kim $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 2451e71b7053SJung-uk Kim li $x20,0x20 2452e71b7053SJung-uk Kim $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 2453e71b7053SJung-uk Kim li $x30,0x30 2454e71b7053SJung-uk Kim $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 2455e71b7053SJung-uk Kim li $x40,0x40 2456e71b7053SJung-uk Kim $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 2457e71b7053SJung-uk Kim li $x50,0x50 2458e71b7053SJung-uk Kim $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 2459e71b7053SJung-uk Kim li $x60,0x60 2460e71b7053SJung-uk Kim $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 2461e71b7053SJung-uk Kim li $x70,0x70 2462e71b7053SJung-uk Kim mtspr 256,r0 2463e71b7053SJung-uk Kim 2464*a7148ab3SEnji Cooper # Reverse eighty7 to 0x010101..87 2465*a7148ab3SEnji Cooper xxlor 2, 32+$eighty7, 32+$eighty7 2466*a7148ab3SEnji Cooper vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 2467*a7148ab3SEnji Cooper xxlor 1, 32+$eighty7, 32+$eighty7 2468*a7148ab3SEnji Cooper 2469*a7148ab3SEnji Cooper # Load XOR contents. 0xf102132435465768798a9bacbdcedfe 2470*a7148ab3SEnji Cooper mr $x70, r6 2471*a7148ab3SEnji Cooper bl Lconsts 2472*a7148ab3SEnji Cooper lxvw4x 0, $x40, r6 # load XOR contents 2473*a7148ab3SEnji Cooper mr r6, $x70 2474*a7148ab3SEnji Cooper li $x70,0x70 2475*a7148ab3SEnji Cooper 2476e71b7053SJung-uk Kim subi $rounds,$rounds,3 # -4 in total 2477e71b7053SJung-uk Kim 2478e71b7053SJung-uk Kim lvx $rndkey0,$x00,$key1 # load key schedule 2479e71b7053SJung-uk Kim lvx v30,$x10,$key1 2480e71b7053SJung-uk Kim addi $key1,$key1,0x20 2481e71b7053SJung-uk Kim lvx v31,$x00,$key1 2482e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,v30,$keyperm 2483e71b7053SJung-uk Kim addi $key_,$sp,$FRAME+15 2484e71b7053SJung-uk Kim mtctr $rounds 2485e71b7053SJung-uk Kim 2486e71b7053SJung-uk KimLoad_xts_enc_key: 2487e71b7053SJung-uk Kim ?vperm v24,v30,v31,$keyperm 2488e71b7053SJung-uk Kim lvx v30,$x10,$key1 2489e71b7053SJung-uk Kim addi $key1,$key1,0x20 2490e71b7053SJung-uk Kim stvx v24,$x00,$key_ # off-load round[1] 2491e71b7053SJung-uk Kim ?vperm v25,v31,v30,$keyperm 2492e71b7053SJung-uk Kim lvx v31,$x00,$key1 2493e71b7053SJung-uk Kim stvx v25,$x10,$key_ # off-load round[2] 2494e71b7053SJung-uk Kim addi $key_,$key_,0x20 2495e71b7053SJung-uk Kim bdnz Load_xts_enc_key 2496e71b7053SJung-uk Kim 2497e71b7053SJung-uk Kim lvx v26,$x10,$key1 2498e71b7053SJung-uk Kim ?vperm v24,v30,v31,$keyperm 2499e71b7053SJung-uk Kim lvx v27,$x20,$key1 2500e71b7053SJung-uk Kim stvx v24,$x00,$key_ # off-load round[3] 2501e71b7053SJung-uk Kim ?vperm v25,v31,v26,$keyperm 2502e71b7053SJung-uk Kim lvx v28,$x30,$key1 2503e71b7053SJung-uk Kim stvx v25,$x10,$key_ # off-load round[4] 2504e71b7053SJung-uk Kim addi $key_,$sp,$FRAME+15 # rewind $key_ 2505e71b7053SJung-uk Kim ?vperm v26,v26,v27,$keyperm 2506e71b7053SJung-uk Kim lvx v29,$x40,$key1 2507e71b7053SJung-uk Kim ?vperm v27,v27,v28,$keyperm 2508e71b7053SJung-uk Kim lvx v30,$x50,$key1 2509e71b7053SJung-uk Kim ?vperm v28,v28,v29,$keyperm 2510e71b7053SJung-uk Kim lvx v31,$x60,$key1 2511e71b7053SJung-uk Kim ?vperm v29,v29,v30,$keyperm 2512e71b7053SJung-uk Kim lvx $twk5,$x70,$key1 # borrow $twk5 2513e71b7053SJung-uk Kim ?vperm v30,v30,v31,$keyperm 2514e71b7053SJung-uk Kim lvx v24,$x00,$key_ # pre-load round[1] 2515e71b7053SJung-uk Kim ?vperm v31,v31,$twk5,$keyperm 2516e71b7053SJung-uk Kim lvx v25,$x10,$key_ # pre-load round[2] 2517e71b7053SJung-uk Kim 2518*a7148ab3SEnji Cooper # Switch to use the following codes with 0x010101..87 to generate tweak. 2519*a7148ab3SEnji Cooper # eighty7 = 0x010101..87 2520*a7148ab3SEnji Cooper # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits 2521*a7148ab3SEnji Cooper # vand tmp, tmp, eighty7 # last byte with carry 2522*a7148ab3SEnji Cooper # vaddubm tweak, tweak, tweak # left shift 1 bit (x2) 2523*a7148ab3SEnji Cooper # xxlor vsx, 0, 0 2524*a7148ab3SEnji Cooper # vpermxor tweak, tweak, tmp, vsx 2525*a7148ab3SEnji Cooper 2526e71b7053SJung-uk Kim vperm $in0,$inout,$inptail,$inpperm 2527e71b7053SJung-uk Kim subi $inp,$inp,31 # undo "caller" 2528e71b7053SJung-uk Kim vxor $twk0,$tweak,$rndkey0 2529e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2530e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2531e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2532e71b7053SJung-uk Kim vxor $out0,$in0,$twk0 2533*a7148ab3SEnji Cooper xxlor 32+$in1, 0, 0 2534*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in1 2535e71b7053SJung-uk Kim 2536e71b7053SJung-uk Kim lvx_u $in1,$x10,$inp 2537e71b7053SJung-uk Kim vxor $twk1,$tweak,$rndkey0 2538e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2539e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2540e71b7053SJung-uk Kim le?vperm $in1,$in1,$in1,$leperm 2541e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2542e71b7053SJung-uk Kim vxor $out1,$in1,$twk1 2543*a7148ab3SEnji Cooper xxlor 32+$in2, 0, 0 2544*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in2 2545e71b7053SJung-uk Kim 2546e71b7053SJung-uk Kim lvx_u $in2,$x20,$inp 2547e71b7053SJung-uk Kim andi. $taillen,$len,15 2548e71b7053SJung-uk Kim vxor $twk2,$tweak,$rndkey0 2549e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2550e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2551e71b7053SJung-uk Kim le?vperm $in2,$in2,$in2,$leperm 2552e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2553e71b7053SJung-uk Kim vxor $out2,$in2,$twk2 2554*a7148ab3SEnji Cooper xxlor 32+$in3, 0, 0 2555*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in3 2556e71b7053SJung-uk Kim 2557e71b7053SJung-uk Kim lvx_u $in3,$x30,$inp 2558e71b7053SJung-uk Kim sub $len,$len,$taillen 2559e71b7053SJung-uk Kim vxor $twk3,$tweak,$rndkey0 2560e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2561e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2562e71b7053SJung-uk Kim le?vperm $in3,$in3,$in3,$leperm 2563e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2564e71b7053SJung-uk Kim vxor $out3,$in3,$twk3 2565*a7148ab3SEnji Cooper xxlor 32+$in4, 0, 0 2566*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in4 2567e71b7053SJung-uk Kim 2568e71b7053SJung-uk Kim lvx_u $in4,$x40,$inp 2569e71b7053SJung-uk Kim subi $len,$len,0x60 2570e71b7053SJung-uk Kim vxor $twk4,$tweak,$rndkey0 2571e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2572e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2573e71b7053SJung-uk Kim le?vperm $in4,$in4,$in4,$leperm 2574e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2575e71b7053SJung-uk Kim vxor $out4,$in4,$twk4 2576*a7148ab3SEnji Cooper xxlor 32+$in5, 0, 0 2577*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in5 2578e71b7053SJung-uk Kim 2579e71b7053SJung-uk Kim lvx_u $in5,$x50,$inp 2580e71b7053SJung-uk Kim addi $inp,$inp,0x60 2581e71b7053SJung-uk Kim vxor $twk5,$tweak,$rndkey0 2582e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2583e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2584e71b7053SJung-uk Kim le?vperm $in5,$in5,$in5,$leperm 2585e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2586e71b7053SJung-uk Kim vxor $out5,$in5,$twk5 2587*a7148ab3SEnji Cooper xxlor 32+$in0, 0, 0 2588*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in0 2589e71b7053SJung-uk Kim 2590e71b7053SJung-uk Kim vxor v31,v31,$rndkey0 2591e71b7053SJung-uk Kim mtctr $rounds 2592e71b7053SJung-uk Kim b Loop_xts_enc6x 2593e71b7053SJung-uk Kim 2594e71b7053SJung-uk Kim.align 5 2595e71b7053SJung-uk KimLoop_xts_enc6x: 2596e71b7053SJung-uk Kim vcipher $out0,$out0,v24 2597e71b7053SJung-uk Kim vcipher $out1,$out1,v24 2598e71b7053SJung-uk Kim vcipher $out2,$out2,v24 2599e71b7053SJung-uk Kim vcipher $out3,$out3,v24 2600e71b7053SJung-uk Kim vcipher $out4,$out4,v24 2601e71b7053SJung-uk Kim vcipher $out5,$out5,v24 2602e71b7053SJung-uk Kim lvx v24,$x20,$key_ # round[3] 2603e71b7053SJung-uk Kim addi $key_,$key_,0x20 2604e71b7053SJung-uk Kim 2605e71b7053SJung-uk Kim vcipher $out0,$out0,v25 2606e71b7053SJung-uk Kim vcipher $out1,$out1,v25 2607e71b7053SJung-uk Kim vcipher $out2,$out2,v25 2608e71b7053SJung-uk Kim vcipher $out3,$out3,v25 2609e71b7053SJung-uk Kim vcipher $out4,$out4,v25 2610e71b7053SJung-uk Kim vcipher $out5,$out5,v25 2611e71b7053SJung-uk Kim lvx v25,$x10,$key_ # round[4] 2612e71b7053SJung-uk Kim bdnz Loop_xts_enc6x 2613e71b7053SJung-uk Kim 2614*a7148ab3SEnji Cooper xxlor 32+$eighty7, 1, 1 # 0x010101..87 2615*a7148ab3SEnji Cooper 2616e71b7053SJung-uk Kim subic $len,$len,96 # $len-=96 2617e71b7053SJung-uk Kim vxor $in0,$twk0,v31 # xor with last round key 2618e71b7053SJung-uk Kim vcipher $out0,$out0,v24 2619e71b7053SJung-uk Kim vcipher $out1,$out1,v24 2620e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2621e71b7053SJung-uk Kim vxor $twk0,$tweak,$rndkey0 2622e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2623e71b7053SJung-uk Kim vcipher $out2,$out2,v24 2624e71b7053SJung-uk Kim vcipher $out3,$out3,v24 2625e71b7053SJung-uk Kim vcipher $out4,$out4,v24 2626e71b7053SJung-uk Kim vcipher $out5,$out5,v24 2627e71b7053SJung-uk Kim 2628e71b7053SJung-uk Kim subfe. r0,r0,r0 # borrow?-1:0 2629e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2630e71b7053SJung-uk Kim vcipher $out0,$out0,v25 2631e71b7053SJung-uk Kim vcipher $out1,$out1,v25 2632*a7148ab3SEnji Cooper xxlor 32+$in1, 0, 0 2633*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in1 2634e71b7053SJung-uk Kim vcipher $out2,$out2,v25 2635e71b7053SJung-uk Kim vcipher $out3,$out3,v25 2636e71b7053SJung-uk Kim vxor $in1,$twk1,v31 2637e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2638e71b7053SJung-uk Kim vxor $twk1,$tweak,$rndkey0 2639e71b7053SJung-uk Kim vcipher $out4,$out4,v25 2640e71b7053SJung-uk Kim vcipher $out5,$out5,v25 2641e71b7053SJung-uk Kim 2642e71b7053SJung-uk Kim and r0,r0,$len 2643e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2644e71b7053SJung-uk Kim vcipher $out0,$out0,v26 2645e71b7053SJung-uk Kim vcipher $out1,$out1,v26 2646e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2647e71b7053SJung-uk Kim vcipher $out2,$out2,v26 2648e71b7053SJung-uk Kim vcipher $out3,$out3,v26 2649*a7148ab3SEnji Cooper xxlor 32+$in2, 0, 0 2650*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in2 2651e71b7053SJung-uk Kim vcipher $out4,$out4,v26 2652e71b7053SJung-uk Kim vcipher $out5,$out5,v26 2653e71b7053SJung-uk Kim 2654e71b7053SJung-uk Kim add $inp,$inp,r0 # $inp is adjusted in such 2655e71b7053SJung-uk Kim # way that at exit from the 2656e71b7053SJung-uk Kim # loop inX-in5 are loaded 2657e71b7053SJung-uk Kim # with last "words" 2658e71b7053SJung-uk Kim vxor $in2,$twk2,v31 2659e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2660e71b7053SJung-uk Kim vxor $twk2,$tweak,$rndkey0 2661e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2662e71b7053SJung-uk Kim vcipher $out0,$out0,v27 2663e71b7053SJung-uk Kim vcipher $out1,$out1,v27 2664e71b7053SJung-uk Kim vcipher $out2,$out2,v27 2665e71b7053SJung-uk Kim vcipher $out3,$out3,v27 2666e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2667e71b7053SJung-uk Kim vcipher $out4,$out4,v27 2668e71b7053SJung-uk Kim vcipher $out5,$out5,v27 2669e71b7053SJung-uk Kim 2670e71b7053SJung-uk Kim addi $key_,$sp,$FRAME+15 # rewind $key_ 2671*a7148ab3SEnji Cooper xxlor 32+$in3, 0, 0 2672*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in3 2673e71b7053SJung-uk Kim vcipher $out0,$out0,v28 2674e71b7053SJung-uk Kim vcipher $out1,$out1,v28 2675e71b7053SJung-uk Kim vxor $in3,$twk3,v31 2676e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2677e71b7053SJung-uk Kim vxor $twk3,$tweak,$rndkey0 2678e71b7053SJung-uk Kim vcipher $out2,$out2,v28 2679e71b7053SJung-uk Kim vcipher $out3,$out3,v28 2680e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2681e71b7053SJung-uk Kim vcipher $out4,$out4,v28 2682e71b7053SJung-uk Kim vcipher $out5,$out5,v28 2683e71b7053SJung-uk Kim lvx v24,$x00,$key_ # re-pre-load round[1] 2684e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2685e71b7053SJung-uk Kim 2686e71b7053SJung-uk Kim vcipher $out0,$out0,v29 2687e71b7053SJung-uk Kim vcipher $out1,$out1,v29 2688*a7148ab3SEnji Cooper xxlor 32+$in4, 0, 0 2689*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in4 2690e71b7053SJung-uk Kim vcipher $out2,$out2,v29 2691e71b7053SJung-uk Kim vcipher $out3,$out3,v29 2692e71b7053SJung-uk Kim vxor $in4,$twk4,v31 2693e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2694e71b7053SJung-uk Kim vxor $twk4,$tweak,$rndkey0 2695e71b7053SJung-uk Kim vcipher $out4,$out4,v29 2696e71b7053SJung-uk Kim vcipher $out5,$out5,v29 2697e71b7053SJung-uk Kim lvx v25,$x10,$key_ # re-pre-load round[2] 2698e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2699e71b7053SJung-uk Kim 2700e71b7053SJung-uk Kim vcipher $out0,$out0,v30 2701e71b7053SJung-uk Kim vcipher $out1,$out1,v30 2702e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2703e71b7053SJung-uk Kim vcipher $out2,$out2,v30 2704e71b7053SJung-uk Kim vcipher $out3,$out3,v30 2705*a7148ab3SEnji Cooper xxlor 32+$in5, 0, 0 2706*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in5 2707e71b7053SJung-uk Kim vcipher $out4,$out4,v30 2708e71b7053SJung-uk Kim vcipher $out5,$out5,v30 2709e71b7053SJung-uk Kim vxor $in5,$twk5,v31 2710e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 2711e71b7053SJung-uk Kim vxor $twk5,$tweak,$rndkey0 2712e71b7053SJung-uk Kim 2713e71b7053SJung-uk Kim vcipherlast $out0,$out0,$in0 2714e71b7053SJung-uk Kim lvx_u $in0,$x00,$inp # load next input block 2715e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 2716e71b7053SJung-uk Kim vcipherlast $out1,$out1,$in1 2717e71b7053SJung-uk Kim lvx_u $in1,$x10,$inp 2718e71b7053SJung-uk Kim vcipherlast $out2,$out2,$in2 2719e71b7053SJung-uk Kim le?vperm $in0,$in0,$in0,$leperm 2720e71b7053SJung-uk Kim lvx_u $in2,$x20,$inp 2721e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 2722e71b7053SJung-uk Kim vcipherlast $out3,$out3,$in3 2723e71b7053SJung-uk Kim le?vperm $in1,$in1,$in1,$leperm 2724e71b7053SJung-uk Kim lvx_u $in3,$x30,$inp 2725e71b7053SJung-uk Kim vcipherlast $out4,$out4,$in4 2726e71b7053SJung-uk Kim le?vperm $in2,$in2,$in2,$leperm 2727e71b7053SJung-uk Kim lvx_u $in4,$x40,$inp 2728*a7148ab3SEnji Cooper xxlor 10, 32+$in0, 32+$in0 2729*a7148ab3SEnji Cooper xxlor 32+$in0, 0, 0 2730*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in0 2731*a7148ab3SEnji Cooper xxlor 32+$in0, 10, 10 2732e71b7053SJung-uk Kim vcipherlast $tmp,$out5,$in5 # last block might be needed 2733e71b7053SJung-uk Kim # in stealing mode 2734e71b7053SJung-uk Kim le?vperm $in3,$in3,$in3,$leperm 2735e71b7053SJung-uk Kim lvx_u $in5,$x50,$inp 2736e71b7053SJung-uk Kim addi $inp,$inp,0x60 2737e71b7053SJung-uk Kim le?vperm $in4,$in4,$in4,$leperm 2738e71b7053SJung-uk Kim le?vperm $in5,$in5,$in5,$leperm 2739e71b7053SJung-uk Kim 2740e71b7053SJung-uk Kim le?vperm $out0,$out0,$out0,$leperm 2741e71b7053SJung-uk Kim le?vperm $out1,$out1,$out1,$leperm 2742e71b7053SJung-uk Kim stvx_u $out0,$x00,$out # store output 2743e71b7053SJung-uk Kim vxor $out0,$in0,$twk0 2744e71b7053SJung-uk Kim le?vperm $out2,$out2,$out2,$leperm 2745e71b7053SJung-uk Kim stvx_u $out1,$x10,$out 2746e71b7053SJung-uk Kim vxor $out1,$in1,$twk1 2747e71b7053SJung-uk Kim le?vperm $out3,$out3,$out3,$leperm 2748e71b7053SJung-uk Kim stvx_u $out2,$x20,$out 2749e71b7053SJung-uk Kim vxor $out2,$in2,$twk2 2750e71b7053SJung-uk Kim le?vperm $out4,$out4,$out4,$leperm 2751e71b7053SJung-uk Kim stvx_u $out3,$x30,$out 2752e71b7053SJung-uk Kim vxor $out3,$in3,$twk3 2753e71b7053SJung-uk Kim le?vperm $out5,$tmp,$tmp,$leperm 2754e71b7053SJung-uk Kim stvx_u $out4,$x40,$out 2755e71b7053SJung-uk Kim vxor $out4,$in4,$twk4 2756e71b7053SJung-uk Kim le?stvx_u $out5,$x50,$out 2757e71b7053SJung-uk Kim be?stvx_u $tmp, $x50,$out 2758e71b7053SJung-uk Kim vxor $out5,$in5,$twk5 2759e71b7053SJung-uk Kim addi $out,$out,0x60 2760e71b7053SJung-uk Kim 2761e71b7053SJung-uk Kim mtctr $rounds 2762e71b7053SJung-uk Kim beq Loop_xts_enc6x # did $len-=96 borrow? 2763e71b7053SJung-uk Kim 2764*a7148ab3SEnji Cooper xxlor 32+$eighty7, 2, 2 # 0x870101..01 2765*a7148ab3SEnji Cooper 2766e71b7053SJung-uk Kim addic. $len,$len,0x60 2767e71b7053SJung-uk Kim beq Lxts_enc6x_zero 2768e71b7053SJung-uk Kim cmpwi $len,0x20 2769e71b7053SJung-uk Kim blt Lxts_enc6x_one 2770e71b7053SJung-uk Kim nop 2771e71b7053SJung-uk Kim beq Lxts_enc6x_two 2772e71b7053SJung-uk Kim cmpwi $len,0x40 2773e71b7053SJung-uk Kim blt Lxts_enc6x_three 2774e71b7053SJung-uk Kim nop 2775e71b7053SJung-uk Kim beq Lxts_enc6x_four 2776e71b7053SJung-uk Kim 2777e71b7053SJung-uk KimLxts_enc6x_five: 2778e71b7053SJung-uk Kim vxor $out0,$in1,$twk0 2779e71b7053SJung-uk Kim vxor $out1,$in2,$twk1 2780e71b7053SJung-uk Kim vxor $out2,$in3,$twk2 2781e71b7053SJung-uk Kim vxor $out3,$in4,$twk3 2782e71b7053SJung-uk Kim vxor $out4,$in5,$twk4 2783e71b7053SJung-uk Kim 2784e71b7053SJung-uk Kim bl _aesp8_xts_enc5x 2785e71b7053SJung-uk Kim 2786e71b7053SJung-uk Kim le?vperm $out0,$out0,$out0,$leperm 2787e71b7053SJung-uk Kim vmr $twk0,$twk5 # unused tweak 2788e71b7053SJung-uk Kim le?vperm $out1,$out1,$out1,$leperm 2789e71b7053SJung-uk Kim stvx_u $out0,$x00,$out # store output 2790e71b7053SJung-uk Kim le?vperm $out2,$out2,$out2,$leperm 2791e71b7053SJung-uk Kim stvx_u $out1,$x10,$out 2792e71b7053SJung-uk Kim le?vperm $out3,$out3,$out3,$leperm 2793e71b7053SJung-uk Kim stvx_u $out2,$x20,$out 2794e71b7053SJung-uk Kim vxor $tmp,$out4,$twk5 # last block prep for stealing 2795e71b7053SJung-uk Kim le?vperm $out4,$out4,$out4,$leperm 2796e71b7053SJung-uk Kim stvx_u $out3,$x30,$out 2797e71b7053SJung-uk Kim stvx_u $out4,$x40,$out 2798e71b7053SJung-uk Kim addi $out,$out,0x50 2799e71b7053SJung-uk Kim bne Lxts_enc6x_steal 2800e71b7053SJung-uk Kim b Lxts_enc6x_done 2801e71b7053SJung-uk Kim 2802e71b7053SJung-uk Kim.align 4 2803e71b7053SJung-uk KimLxts_enc6x_four: 2804e71b7053SJung-uk Kim vxor $out0,$in2,$twk0 2805e71b7053SJung-uk Kim vxor $out1,$in3,$twk1 2806e71b7053SJung-uk Kim vxor $out2,$in4,$twk2 2807e71b7053SJung-uk Kim vxor $out3,$in5,$twk3 2808e71b7053SJung-uk Kim vxor $out4,$out4,$out4 2809e71b7053SJung-uk Kim 2810e71b7053SJung-uk Kim bl _aesp8_xts_enc5x 2811e71b7053SJung-uk Kim 2812e71b7053SJung-uk Kim le?vperm $out0,$out0,$out0,$leperm 2813e71b7053SJung-uk Kim vmr $twk0,$twk4 # unused tweak 2814e71b7053SJung-uk Kim le?vperm $out1,$out1,$out1,$leperm 2815e71b7053SJung-uk Kim stvx_u $out0,$x00,$out # store output 2816e71b7053SJung-uk Kim le?vperm $out2,$out2,$out2,$leperm 2817e71b7053SJung-uk Kim stvx_u $out1,$x10,$out 2818e71b7053SJung-uk Kim vxor $tmp,$out3,$twk4 # last block prep for stealing 2819e71b7053SJung-uk Kim le?vperm $out3,$out3,$out3,$leperm 2820e71b7053SJung-uk Kim stvx_u $out2,$x20,$out 2821e71b7053SJung-uk Kim stvx_u $out3,$x30,$out 2822e71b7053SJung-uk Kim addi $out,$out,0x40 2823e71b7053SJung-uk Kim bne Lxts_enc6x_steal 2824e71b7053SJung-uk Kim b Lxts_enc6x_done 2825e71b7053SJung-uk Kim 2826e71b7053SJung-uk Kim.align 4 2827e71b7053SJung-uk KimLxts_enc6x_three: 2828e71b7053SJung-uk Kim vxor $out0,$in3,$twk0 2829e71b7053SJung-uk Kim vxor $out1,$in4,$twk1 2830e71b7053SJung-uk Kim vxor $out2,$in5,$twk2 2831e71b7053SJung-uk Kim vxor $out3,$out3,$out3 2832e71b7053SJung-uk Kim vxor $out4,$out4,$out4 2833e71b7053SJung-uk Kim 2834e71b7053SJung-uk Kim bl _aesp8_xts_enc5x 2835e71b7053SJung-uk Kim 2836e71b7053SJung-uk Kim le?vperm $out0,$out0,$out0,$leperm 2837e71b7053SJung-uk Kim vmr $twk0,$twk3 # unused tweak 2838e71b7053SJung-uk Kim le?vperm $out1,$out1,$out1,$leperm 2839e71b7053SJung-uk Kim stvx_u $out0,$x00,$out # store output 2840e71b7053SJung-uk Kim vxor $tmp,$out2,$twk3 # last block prep for stealing 2841e71b7053SJung-uk Kim le?vperm $out2,$out2,$out2,$leperm 2842e71b7053SJung-uk Kim stvx_u $out1,$x10,$out 2843e71b7053SJung-uk Kim stvx_u $out2,$x20,$out 2844e71b7053SJung-uk Kim addi $out,$out,0x30 2845e71b7053SJung-uk Kim bne Lxts_enc6x_steal 2846e71b7053SJung-uk Kim b Lxts_enc6x_done 2847e71b7053SJung-uk Kim 2848e71b7053SJung-uk Kim.align 4 2849e71b7053SJung-uk KimLxts_enc6x_two: 2850e71b7053SJung-uk Kim vxor $out0,$in4,$twk0 2851e71b7053SJung-uk Kim vxor $out1,$in5,$twk1 2852e71b7053SJung-uk Kim vxor $out2,$out2,$out2 2853e71b7053SJung-uk Kim vxor $out3,$out3,$out3 2854e71b7053SJung-uk Kim vxor $out4,$out4,$out4 2855e71b7053SJung-uk Kim 2856e71b7053SJung-uk Kim bl _aesp8_xts_enc5x 2857e71b7053SJung-uk Kim 2858e71b7053SJung-uk Kim le?vperm $out0,$out0,$out0,$leperm 2859e71b7053SJung-uk Kim vmr $twk0,$twk2 # unused tweak 2860e71b7053SJung-uk Kim vxor $tmp,$out1,$twk2 # last block prep for stealing 2861e71b7053SJung-uk Kim le?vperm $out1,$out1,$out1,$leperm 2862e71b7053SJung-uk Kim stvx_u $out0,$x00,$out # store output 2863e71b7053SJung-uk Kim stvx_u $out1,$x10,$out 2864e71b7053SJung-uk Kim addi $out,$out,0x20 2865e71b7053SJung-uk Kim bne Lxts_enc6x_steal 2866e71b7053SJung-uk Kim b Lxts_enc6x_done 2867e71b7053SJung-uk Kim 2868e71b7053SJung-uk Kim.align 4 2869e71b7053SJung-uk KimLxts_enc6x_one: 2870e71b7053SJung-uk Kim vxor $out0,$in5,$twk0 2871e71b7053SJung-uk Kim nop 2872e71b7053SJung-uk KimLoop_xts_enc1x: 2873e71b7053SJung-uk Kim vcipher $out0,$out0,v24 2874e71b7053SJung-uk Kim lvx v24,$x20,$key_ # round[3] 2875e71b7053SJung-uk Kim addi $key_,$key_,0x20 2876e71b7053SJung-uk Kim 2877e71b7053SJung-uk Kim vcipher $out0,$out0,v25 2878e71b7053SJung-uk Kim lvx v25,$x10,$key_ # round[4] 2879e71b7053SJung-uk Kim bdnz Loop_xts_enc1x 2880e71b7053SJung-uk Kim 2881e71b7053SJung-uk Kim add $inp,$inp,$taillen 2882e71b7053SJung-uk Kim cmpwi $taillen,0 2883e71b7053SJung-uk Kim vcipher $out0,$out0,v24 2884e71b7053SJung-uk Kim 2885e71b7053SJung-uk Kim subi $inp,$inp,16 2886e71b7053SJung-uk Kim vcipher $out0,$out0,v25 2887e71b7053SJung-uk Kim 2888e71b7053SJung-uk Kim lvsr $inpperm,0,$taillen 2889e71b7053SJung-uk Kim vcipher $out0,$out0,v26 2890e71b7053SJung-uk Kim 2891e71b7053SJung-uk Kim lvx_u $in0,0,$inp 2892e71b7053SJung-uk Kim vcipher $out0,$out0,v27 2893e71b7053SJung-uk Kim 2894e71b7053SJung-uk Kim addi $key_,$sp,$FRAME+15 # rewind $key_ 2895e71b7053SJung-uk Kim vcipher $out0,$out0,v28 2896e71b7053SJung-uk Kim lvx v24,$x00,$key_ # re-pre-load round[1] 2897e71b7053SJung-uk Kim 2898e71b7053SJung-uk Kim vcipher $out0,$out0,v29 2899e71b7053SJung-uk Kim lvx v25,$x10,$key_ # re-pre-load round[2] 2900e71b7053SJung-uk Kim vxor $twk0,$twk0,v31 2901e71b7053SJung-uk Kim 2902e71b7053SJung-uk Kim le?vperm $in0,$in0,$in0,$leperm 2903e71b7053SJung-uk Kim vcipher $out0,$out0,v30 2904e71b7053SJung-uk Kim 2905e71b7053SJung-uk Kim vperm $in0,$in0,$in0,$inpperm 2906e71b7053SJung-uk Kim vcipherlast $out0,$out0,$twk0 2907e71b7053SJung-uk Kim 2908e71b7053SJung-uk Kim vmr $twk0,$twk1 # unused tweak 2909e71b7053SJung-uk Kim vxor $tmp,$out0,$twk1 # last block prep for stealing 2910e71b7053SJung-uk Kim le?vperm $out0,$out0,$out0,$leperm 2911e71b7053SJung-uk Kim stvx_u $out0,$x00,$out # store output 2912e71b7053SJung-uk Kim addi $out,$out,0x10 2913e71b7053SJung-uk Kim bne Lxts_enc6x_steal 2914e71b7053SJung-uk Kim b Lxts_enc6x_done 2915e71b7053SJung-uk Kim 2916e71b7053SJung-uk Kim.align 4 2917e71b7053SJung-uk KimLxts_enc6x_zero: 2918e71b7053SJung-uk Kim cmpwi $taillen,0 2919e71b7053SJung-uk Kim beq Lxts_enc6x_done 2920e71b7053SJung-uk Kim 2921e71b7053SJung-uk Kim add $inp,$inp,$taillen 2922e71b7053SJung-uk Kim subi $inp,$inp,16 2923e71b7053SJung-uk Kim lvx_u $in0,0,$inp 2924e71b7053SJung-uk Kim lvsr $inpperm,0,$taillen # $in5 is no more 2925e71b7053SJung-uk Kim le?vperm $in0,$in0,$in0,$leperm 2926e71b7053SJung-uk Kim vperm $in0,$in0,$in0,$inpperm 2927e71b7053SJung-uk Kim vxor $tmp,$tmp,$twk0 2928e71b7053SJung-uk KimLxts_enc6x_steal: 2929e71b7053SJung-uk Kim vxor $in0,$in0,$twk0 2930e71b7053SJung-uk Kim vxor $out0,$out0,$out0 2931e71b7053SJung-uk Kim vspltisb $out1,-1 2932e71b7053SJung-uk Kim vperm $out0,$out0,$out1,$inpperm 2933e71b7053SJung-uk Kim vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? 2934e71b7053SJung-uk Kim 2935e71b7053SJung-uk Kim subi r30,$out,17 2936e71b7053SJung-uk Kim subi $out,$out,16 2937e71b7053SJung-uk Kim mtctr $taillen 2938e71b7053SJung-uk KimLoop_xts_enc6x_steal: 2939e71b7053SJung-uk Kim lbzu r0,1(r30) 2940e71b7053SJung-uk Kim stb r0,16(r30) 2941e71b7053SJung-uk Kim bdnz Loop_xts_enc6x_steal 2942e71b7053SJung-uk Kim 2943e71b7053SJung-uk Kim li $taillen,0 2944e71b7053SJung-uk Kim mtctr $rounds 2945e71b7053SJung-uk Kim b Loop_xts_enc1x # one more time... 2946e71b7053SJung-uk Kim 2947e71b7053SJung-uk Kim.align 4 2948e71b7053SJung-uk KimLxts_enc6x_done: 2949e71b7053SJung-uk Kim ${UCMP}i $ivp,0 2950e71b7053SJung-uk Kim beq Lxts_enc6x_ret 2951e71b7053SJung-uk Kim 2952e71b7053SJung-uk Kim vxor $tweak,$twk0,$rndkey0 2953e71b7053SJung-uk Kim le?vperm $tweak,$tweak,$tweak,$leperm 2954e71b7053SJung-uk Kim stvx_u $tweak,0,$ivp 2955e71b7053SJung-uk Kim 2956e71b7053SJung-uk KimLxts_enc6x_ret: 2957e71b7053SJung-uk Kim mtlr r11 2958e71b7053SJung-uk Kim li r10,`$FRAME+15` 2959e71b7053SJung-uk Kim li r11,`$FRAME+31` 2960e71b7053SJung-uk Kim stvx $seven,r10,$sp # wipe copies of round keys 2961e71b7053SJung-uk Kim addi r10,r10,32 2962e71b7053SJung-uk Kim stvx $seven,r11,$sp 2963e71b7053SJung-uk Kim addi r11,r11,32 2964e71b7053SJung-uk Kim stvx $seven,r10,$sp 2965e71b7053SJung-uk Kim addi r10,r10,32 2966e71b7053SJung-uk Kim stvx $seven,r11,$sp 2967e71b7053SJung-uk Kim addi r11,r11,32 2968e71b7053SJung-uk Kim stvx $seven,r10,$sp 2969e71b7053SJung-uk Kim addi r10,r10,32 2970e71b7053SJung-uk Kim stvx $seven,r11,$sp 2971e71b7053SJung-uk Kim addi r11,r11,32 2972e71b7053SJung-uk Kim stvx $seven,r10,$sp 2973e71b7053SJung-uk Kim addi r10,r10,32 2974e71b7053SJung-uk Kim stvx $seven,r11,$sp 2975e71b7053SJung-uk Kim addi r11,r11,32 2976e71b7053SJung-uk Kim 2977e71b7053SJung-uk Kim mtspr 256,$vrsave 2978e71b7053SJung-uk Kim lvx v20,r10,$sp # ABI says so 2979e71b7053SJung-uk Kim addi r10,r10,32 2980e71b7053SJung-uk Kim lvx v21,r11,$sp 2981e71b7053SJung-uk Kim addi r11,r11,32 2982e71b7053SJung-uk Kim lvx v22,r10,$sp 2983e71b7053SJung-uk Kim addi r10,r10,32 2984e71b7053SJung-uk Kim lvx v23,r11,$sp 2985e71b7053SJung-uk Kim addi r11,r11,32 2986e71b7053SJung-uk Kim lvx v24,r10,$sp 2987e71b7053SJung-uk Kim addi r10,r10,32 2988e71b7053SJung-uk Kim lvx v25,r11,$sp 2989e71b7053SJung-uk Kim addi r11,r11,32 2990e71b7053SJung-uk Kim lvx v26,r10,$sp 2991e71b7053SJung-uk Kim addi r10,r10,32 2992e71b7053SJung-uk Kim lvx v27,r11,$sp 2993e71b7053SJung-uk Kim addi r11,r11,32 2994e71b7053SJung-uk Kim lvx v28,r10,$sp 2995e71b7053SJung-uk Kim addi r10,r10,32 2996e71b7053SJung-uk Kim lvx v29,r11,$sp 2997e71b7053SJung-uk Kim addi r11,r11,32 2998e71b7053SJung-uk Kim lvx v30,r10,$sp 2999e71b7053SJung-uk Kim lvx v31,r11,$sp 3000e71b7053SJung-uk Kim $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3001e71b7053SJung-uk Kim $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3002e71b7053SJung-uk Kim $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3003e71b7053SJung-uk Kim $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3004e71b7053SJung-uk Kim $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3005e71b7053SJung-uk Kim $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3006e71b7053SJung-uk Kim addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3007e71b7053SJung-uk Kim blr 3008e71b7053SJung-uk Kim .long 0 3009e71b7053SJung-uk Kim .byte 0,12,0x04,1,0x80,6,6,0 3010e71b7053SJung-uk Kim .long 0 3011e71b7053SJung-uk Kim 3012e71b7053SJung-uk Kim.align 5 3013e71b7053SJung-uk Kim_aesp8_xts_enc5x: 3014e71b7053SJung-uk Kim vcipher $out0,$out0,v24 3015e71b7053SJung-uk Kim vcipher $out1,$out1,v24 3016e71b7053SJung-uk Kim vcipher $out2,$out2,v24 3017e71b7053SJung-uk Kim vcipher $out3,$out3,v24 3018e71b7053SJung-uk Kim vcipher $out4,$out4,v24 3019e71b7053SJung-uk Kim lvx v24,$x20,$key_ # round[3] 3020e71b7053SJung-uk Kim addi $key_,$key_,0x20 3021e71b7053SJung-uk Kim 3022e71b7053SJung-uk Kim vcipher $out0,$out0,v25 3023e71b7053SJung-uk Kim vcipher $out1,$out1,v25 3024e71b7053SJung-uk Kim vcipher $out2,$out2,v25 3025e71b7053SJung-uk Kim vcipher $out3,$out3,v25 3026e71b7053SJung-uk Kim vcipher $out4,$out4,v25 3027e71b7053SJung-uk Kim lvx v25,$x10,$key_ # round[4] 3028e71b7053SJung-uk Kim bdnz _aesp8_xts_enc5x 3029e71b7053SJung-uk Kim 3030e71b7053SJung-uk Kim add $inp,$inp,$taillen 3031e71b7053SJung-uk Kim cmpwi $taillen,0 3032e71b7053SJung-uk Kim vcipher $out0,$out0,v24 3033e71b7053SJung-uk Kim vcipher $out1,$out1,v24 3034e71b7053SJung-uk Kim vcipher $out2,$out2,v24 3035e71b7053SJung-uk Kim vcipher $out3,$out3,v24 3036e71b7053SJung-uk Kim vcipher $out4,$out4,v24 3037e71b7053SJung-uk Kim 3038e71b7053SJung-uk Kim subi $inp,$inp,16 3039e71b7053SJung-uk Kim vcipher $out0,$out0,v25 3040e71b7053SJung-uk Kim vcipher $out1,$out1,v25 3041e71b7053SJung-uk Kim vcipher $out2,$out2,v25 3042e71b7053SJung-uk Kim vcipher $out3,$out3,v25 3043e71b7053SJung-uk Kim vcipher $out4,$out4,v25 3044e71b7053SJung-uk Kim vxor $twk0,$twk0,v31 3045e71b7053SJung-uk Kim 3046e71b7053SJung-uk Kim vcipher $out0,$out0,v26 3047e71b7053SJung-uk Kim lvsr $inpperm,0,$taillen # $in5 is no more 3048e71b7053SJung-uk Kim vcipher $out1,$out1,v26 3049e71b7053SJung-uk Kim vcipher $out2,$out2,v26 3050e71b7053SJung-uk Kim vcipher $out3,$out3,v26 3051e71b7053SJung-uk Kim vcipher $out4,$out4,v26 3052e71b7053SJung-uk Kim vxor $in1,$twk1,v31 3053e71b7053SJung-uk Kim 3054e71b7053SJung-uk Kim vcipher $out0,$out0,v27 3055e71b7053SJung-uk Kim lvx_u $in0,0,$inp 3056e71b7053SJung-uk Kim vcipher $out1,$out1,v27 3057e71b7053SJung-uk Kim vcipher $out2,$out2,v27 3058e71b7053SJung-uk Kim vcipher $out3,$out3,v27 3059e71b7053SJung-uk Kim vcipher $out4,$out4,v27 3060e71b7053SJung-uk Kim vxor $in2,$twk2,v31 3061e71b7053SJung-uk Kim 3062e71b7053SJung-uk Kim addi $key_,$sp,$FRAME+15 # rewind $key_ 3063e71b7053SJung-uk Kim vcipher $out0,$out0,v28 3064e71b7053SJung-uk Kim vcipher $out1,$out1,v28 3065e71b7053SJung-uk Kim vcipher $out2,$out2,v28 3066e71b7053SJung-uk Kim vcipher $out3,$out3,v28 3067e71b7053SJung-uk Kim vcipher $out4,$out4,v28 3068e71b7053SJung-uk Kim lvx v24,$x00,$key_ # re-pre-load round[1] 3069e71b7053SJung-uk Kim vxor $in3,$twk3,v31 3070e71b7053SJung-uk Kim 3071e71b7053SJung-uk Kim vcipher $out0,$out0,v29 3072e71b7053SJung-uk Kim le?vperm $in0,$in0,$in0,$leperm 3073e71b7053SJung-uk Kim vcipher $out1,$out1,v29 3074e71b7053SJung-uk Kim vcipher $out2,$out2,v29 3075e71b7053SJung-uk Kim vcipher $out3,$out3,v29 3076e71b7053SJung-uk Kim vcipher $out4,$out4,v29 3077e71b7053SJung-uk Kim lvx v25,$x10,$key_ # re-pre-load round[2] 3078e71b7053SJung-uk Kim vxor $in4,$twk4,v31 3079e71b7053SJung-uk Kim 3080e71b7053SJung-uk Kim vcipher $out0,$out0,v30 3081e71b7053SJung-uk Kim vperm $in0,$in0,$in0,$inpperm 3082e71b7053SJung-uk Kim vcipher $out1,$out1,v30 3083e71b7053SJung-uk Kim vcipher $out2,$out2,v30 3084e71b7053SJung-uk Kim vcipher $out3,$out3,v30 3085e71b7053SJung-uk Kim vcipher $out4,$out4,v30 3086e71b7053SJung-uk Kim 3087e71b7053SJung-uk Kim vcipherlast $out0,$out0,$twk0 3088e71b7053SJung-uk Kim vcipherlast $out1,$out1,$in1 3089e71b7053SJung-uk Kim vcipherlast $out2,$out2,$in2 3090e71b7053SJung-uk Kim vcipherlast $out3,$out3,$in3 3091e71b7053SJung-uk Kim vcipherlast $out4,$out4,$in4 3092e71b7053SJung-uk Kim blr 3093e71b7053SJung-uk Kim .long 0 3094e71b7053SJung-uk Kim .byte 0,12,0x14,0,0,0,0,0 3095e71b7053SJung-uk Kim 3096e71b7053SJung-uk Kim.align 5 3097e71b7053SJung-uk Kim_aesp8_xts_decrypt6x: 3098e71b7053SJung-uk Kim $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) 3099e71b7053SJung-uk Kim mflr r11 3100e71b7053SJung-uk Kim li r7,`$FRAME+8*16+15` 3101e71b7053SJung-uk Kim li r3,`$FRAME+8*16+31` 3102e71b7053SJung-uk Kim $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) 3103e71b7053SJung-uk Kim stvx v20,r7,$sp # ABI says so 3104e71b7053SJung-uk Kim addi r7,r7,32 3105e71b7053SJung-uk Kim stvx v21,r3,$sp 3106e71b7053SJung-uk Kim addi r3,r3,32 3107e71b7053SJung-uk Kim stvx v22,r7,$sp 3108e71b7053SJung-uk Kim addi r7,r7,32 3109e71b7053SJung-uk Kim stvx v23,r3,$sp 3110e71b7053SJung-uk Kim addi r3,r3,32 3111e71b7053SJung-uk Kim stvx v24,r7,$sp 3112e71b7053SJung-uk Kim addi r7,r7,32 3113e71b7053SJung-uk Kim stvx v25,r3,$sp 3114e71b7053SJung-uk Kim addi r3,r3,32 3115e71b7053SJung-uk Kim stvx v26,r7,$sp 3116e71b7053SJung-uk Kim addi r7,r7,32 3117e71b7053SJung-uk Kim stvx v27,r3,$sp 3118e71b7053SJung-uk Kim addi r3,r3,32 3119e71b7053SJung-uk Kim stvx v28,r7,$sp 3120e71b7053SJung-uk Kim addi r7,r7,32 3121e71b7053SJung-uk Kim stvx v29,r3,$sp 3122e71b7053SJung-uk Kim addi r3,r3,32 3123e71b7053SJung-uk Kim stvx v30,r7,$sp 3124e71b7053SJung-uk Kim stvx v31,r3,$sp 3125e71b7053SJung-uk Kim li r0,-1 3126e71b7053SJung-uk Kim stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave 3127e71b7053SJung-uk Kim li $x10,0x10 3128e71b7053SJung-uk Kim $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3129e71b7053SJung-uk Kim li $x20,0x20 3130e71b7053SJung-uk Kim $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3131e71b7053SJung-uk Kim li $x30,0x30 3132e71b7053SJung-uk Kim $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3133e71b7053SJung-uk Kim li $x40,0x40 3134e71b7053SJung-uk Kim $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3135e71b7053SJung-uk Kim li $x50,0x50 3136e71b7053SJung-uk Kim $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3137e71b7053SJung-uk Kim li $x60,0x60 3138e71b7053SJung-uk Kim $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3139e71b7053SJung-uk Kim li $x70,0x70 3140e71b7053SJung-uk Kim mtspr 256,r0 3141e71b7053SJung-uk Kim 3142*a7148ab3SEnji Cooper # Reverse eighty7 to 0x010101..87 3143*a7148ab3SEnji Cooper xxlor 2, 32+$eighty7, 32+$eighty7 3144*a7148ab3SEnji Cooper vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 3145*a7148ab3SEnji Cooper xxlor 1, 32+$eighty7, 32+$eighty7 3146*a7148ab3SEnji Cooper 3147*a7148ab3SEnji Cooper # Load XOR contents. 0xf102132435465768798a9bacbdcedfe 3148*a7148ab3SEnji Cooper mr $x70, r6 3149*a7148ab3SEnji Cooper bl Lconsts 3150*a7148ab3SEnji Cooper lxvw4x 0, $x40, r6 # load XOR contents 3151*a7148ab3SEnji Cooper mr r6, $x70 3152*a7148ab3SEnji Cooper li $x70,0x70 3153*a7148ab3SEnji Cooper 3154e71b7053SJung-uk Kim subi $rounds,$rounds,3 # -4 in total 3155e71b7053SJung-uk Kim 3156e71b7053SJung-uk Kim lvx $rndkey0,$x00,$key1 # load key schedule 3157e71b7053SJung-uk Kim lvx v30,$x10,$key1 3158e71b7053SJung-uk Kim addi $key1,$key1,0x20 3159e71b7053SJung-uk Kim lvx v31,$x00,$key1 3160e71b7053SJung-uk Kim ?vperm $rndkey0,$rndkey0,v30,$keyperm 3161e71b7053SJung-uk Kim addi $key_,$sp,$FRAME+15 3162e71b7053SJung-uk Kim mtctr $rounds 3163e71b7053SJung-uk Kim 3164e71b7053SJung-uk KimLoad_xts_dec_key: 3165e71b7053SJung-uk Kim ?vperm v24,v30,v31,$keyperm 3166e71b7053SJung-uk Kim lvx v30,$x10,$key1 3167e71b7053SJung-uk Kim addi $key1,$key1,0x20 3168e71b7053SJung-uk Kim stvx v24,$x00,$key_ # off-load round[1] 3169e71b7053SJung-uk Kim ?vperm v25,v31,v30,$keyperm 3170e71b7053SJung-uk Kim lvx v31,$x00,$key1 3171e71b7053SJung-uk Kim stvx v25,$x10,$key_ # off-load round[2] 3172e71b7053SJung-uk Kim addi $key_,$key_,0x20 3173e71b7053SJung-uk Kim bdnz Load_xts_dec_key 3174e71b7053SJung-uk Kim 3175e71b7053SJung-uk Kim lvx v26,$x10,$key1 3176e71b7053SJung-uk Kim ?vperm v24,v30,v31,$keyperm 3177e71b7053SJung-uk Kim lvx v27,$x20,$key1 3178e71b7053SJung-uk Kim stvx v24,$x00,$key_ # off-load round[3] 3179e71b7053SJung-uk Kim ?vperm v25,v31,v26,$keyperm 3180e71b7053SJung-uk Kim lvx v28,$x30,$key1 3181e71b7053SJung-uk Kim stvx v25,$x10,$key_ # off-load round[4] 3182e71b7053SJung-uk Kim addi $key_,$sp,$FRAME+15 # rewind $key_ 3183e71b7053SJung-uk Kim ?vperm v26,v26,v27,$keyperm 3184e71b7053SJung-uk Kim lvx v29,$x40,$key1 3185e71b7053SJung-uk Kim ?vperm v27,v27,v28,$keyperm 3186e71b7053SJung-uk Kim lvx v30,$x50,$key1 3187e71b7053SJung-uk Kim ?vperm v28,v28,v29,$keyperm 3188e71b7053SJung-uk Kim lvx v31,$x60,$key1 3189e71b7053SJung-uk Kim ?vperm v29,v29,v30,$keyperm 3190e71b7053SJung-uk Kim lvx $twk5,$x70,$key1 # borrow $twk5 3191e71b7053SJung-uk Kim ?vperm v30,v30,v31,$keyperm 3192e71b7053SJung-uk Kim lvx v24,$x00,$key_ # pre-load round[1] 3193e71b7053SJung-uk Kim ?vperm v31,v31,$twk5,$keyperm 3194e71b7053SJung-uk Kim lvx v25,$x10,$key_ # pre-load round[2] 3195e71b7053SJung-uk Kim 3196e71b7053SJung-uk Kim vperm $in0,$inout,$inptail,$inpperm 3197e71b7053SJung-uk Kim subi $inp,$inp,31 # undo "caller" 3198e71b7053SJung-uk Kim vxor $twk0,$tweak,$rndkey0 3199e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 3200e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 3201e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 3202e71b7053SJung-uk Kim vxor $out0,$in0,$twk0 3203*a7148ab3SEnji Cooper xxlor 32+$in1, 0, 0 3204*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in1 3205e71b7053SJung-uk Kim 3206e71b7053SJung-uk Kim lvx_u $in1,$x10,$inp 3207e71b7053SJung-uk Kim vxor $twk1,$tweak,$rndkey0 3208e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 3209e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 3210e71b7053SJung-uk Kim le?vperm $in1,$in1,$in1,$leperm 3211e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 3212e71b7053SJung-uk Kim vxor $out1,$in1,$twk1 3213*a7148ab3SEnji Cooper xxlor 32+$in2, 0, 0 3214*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in2 3215e71b7053SJung-uk Kim 3216e71b7053SJung-uk Kim lvx_u $in2,$x20,$inp 3217e71b7053SJung-uk Kim andi. $taillen,$len,15 3218e71b7053SJung-uk Kim vxor $twk2,$tweak,$rndkey0 3219e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 3220e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 3221e71b7053SJung-uk Kim le?vperm $in2,$in2,$in2,$leperm 3222e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 3223e71b7053SJung-uk Kim vxor $out2,$in2,$twk2 3224*a7148ab3SEnji Cooper xxlor 32+$in3, 0, 0 3225*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in3 3226e71b7053SJung-uk Kim 3227e71b7053SJung-uk Kim lvx_u $in3,$x30,$inp 3228e71b7053SJung-uk Kim sub $len,$len,$taillen 3229e71b7053SJung-uk Kim vxor $twk3,$tweak,$rndkey0 3230e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 3231e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 3232e71b7053SJung-uk Kim le?vperm $in3,$in3,$in3,$leperm 3233e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 3234e71b7053SJung-uk Kim vxor $out3,$in3,$twk3 3235*a7148ab3SEnji Cooper xxlor 32+$in4, 0, 0 3236*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in4 3237e71b7053SJung-uk Kim 3238e71b7053SJung-uk Kim lvx_u $in4,$x40,$inp 3239e71b7053SJung-uk Kim subi $len,$len,0x60 3240e71b7053SJung-uk Kim vxor $twk4,$tweak,$rndkey0 3241e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 3242e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 3243e71b7053SJung-uk Kim le?vperm $in4,$in4,$in4,$leperm 3244e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 3245e71b7053SJung-uk Kim vxor $out4,$in4,$twk4 3246*a7148ab3SEnji Cooper xxlor 32+$in5, 0, 0 3247*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in5 3248e71b7053SJung-uk Kim 3249e71b7053SJung-uk Kim lvx_u $in5,$x50,$inp 3250e71b7053SJung-uk Kim addi $inp,$inp,0x60 3251e71b7053SJung-uk Kim vxor $twk5,$tweak,$rndkey0 3252e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 3253e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 3254e71b7053SJung-uk Kim le?vperm $in5,$in5,$in5,$leperm 3255e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 3256e71b7053SJung-uk Kim vxor $out5,$in5,$twk5 3257*a7148ab3SEnji Cooper xxlor 32+$in0, 0, 0 3258*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in0 3259e71b7053SJung-uk Kim 3260e71b7053SJung-uk Kim vxor v31,v31,$rndkey0 3261e71b7053SJung-uk Kim mtctr $rounds 3262e71b7053SJung-uk Kim b Loop_xts_dec6x 3263e71b7053SJung-uk Kim 3264e71b7053SJung-uk Kim.align 5 3265e71b7053SJung-uk KimLoop_xts_dec6x: 3266e71b7053SJung-uk Kim vncipher $out0,$out0,v24 3267e71b7053SJung-uk Kim vncipher $out1,$out1,v24 3268e71b7053SJung-uk Kim vncipher $out2,$out2,v24 3269e71b7053SJung-uk Kim vncipher $out3,$out3,v24 3270e71b7053SJung-uk Kim vncipher $out4,$out4,v24 3271e71b7053SJung-uk Kim vncipher $out5,$out5,v24 3272e71b7053SJung-uk Kim lvx v24,$x20,$key_ # round[3] 3273e71b7053SJung-uk Kim addi $key_,$key_,0x20 3274e71b7053SJung-uk Kim 3275e71b7053SJung-uk Kim vncipher $out0,$out0,v25 3276e71b7053SJung-uk Kim vncipher $out1,$out1,v25 3277e71b7053SJung-uk Kim vncipher $out2,$out2,v25 3278e71b7053SJung-uk Kim vncipher $out3,$out3,v25 3279e71b7053SJung-uk Kim vncipher $out4,$out4,v25 3280e71b7053SJung-uk Kim vncipher $out5,$out5,v25 3281e71b7053SJung-uk Kim lvx v25,$x10,$key_ # round[4] 3282e71b7053SJung-uk Kim bdnz Loop_xts_dec6x 3283e71b7053SJung-uk Kim 3284*a7148ab3SEnji Cooper xxlor 32+$eighty7, 1, 1 3285*a7148ab3SEnji Cooper 3286e71b7053SJung-uk Kim subic $len,$len,96 # $len-=96 3287e71b7053SJung-uk Kim vxor $in0,$twk0,v31 # xor with last round key 3288e71b7053SJung-uk Kim vncipher $out0,$out0,v24 3289e71b7053SJung-uk Kim vncipher $out1,$out1,v24 3290e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 3291e71b7053SJung-uk Kim vxor $twk0,$tweak,$rndkey0 3292e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 3293e71b7053SJung-uk Kim vncipher $out2,$out2,v24 3294e71b7053SJung-uk Kim vncipher $out3,$out3,v24 3295e71b7053SJung-uk Kim vncipher $out4,$out4,v24 3296e71b7053SJung-uk Kim vncipher $out5,$out5,v24 3297e71b7053SJung-uk Kim 3298e71b7053SJung-uk Kim subfe. r0,r0,r0 # borrow?-1:0 3299e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 3300e71b7053SJung-uk Kim vncipher $out0,$out0,v25 3301e71b7053SJung-uk Kim vncipher $out1,$out1,v25 3302*a7148ab3SEnji Cooper xxlor 32+$in1, 0, 0 3303*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in1 3304e71b7053SJung-uk Kim vncipher $out2,$out2,v25 3305e71b7053SJung-uk Kim vncipher $out3,$out3,v25 3306e71b7053SJung-uk Kim vxor $in1,$twk1,v31 3307e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 3308e71b7053SJung-uk Kim vxor $twk1,$tweak,$rndkey0 3309e71b7053SJung-uk Kim vncipher $out4,$out4,v25 3310e71b7053SJung-uk Kim vncipher $out5,$out5,v25 3311e71b7053SJung-uk Kim 3312e71b7053SJung-uk Kim and r0,r0,$len 3313e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 3314e71b7053SJung-uk Kim vncipher $out0,$out0,v26 3315e71b7053SJung-uk Kim vncipher $out1,$out1,v26 3316e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 3317e71b7053SJung-uk Kim vncipher $out2,$out2,v26 3318e71b7053SJung-uk Kim vncipher $out3,$out3,v26 3319*a7148ab3SEnji Cooper xxlor 32+$in2, 0, 0 3320*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in2 3321e71b7053SJung-uk Kim vncipher $out4,$out4,v26 3322e71b7053SJung-uk Kim vncipher $out5,$out5,v26 3323e71b7053SJung-uk Kim 3324e71b7053SJung-uk Kim add $inp,$inp,r0 # $inp is adjusted in such 3325e71b7053SJung-uk Kim # way that at exit from the 3326e71b7053SJung-uk Kim # loop inX-in5 are loaded 3327e71b7053SJung-uk Kim # with last "words" 3328e71b7053SJung-uk Kim vxor $in2,$twk2,v31 3329e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 3330e71b7053SJung-uk Kim vxor $twk2,$tweak,$rndkey0 3331e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 3332e71b7053SJung-uk Kim vncipher $out0,$out0,v27 3333e71b7053SJung-uk Kim vncipher $out1,$out1,v27 3334e71b7053SJung-uk Kim vncipher $out2,$out2,v27 3335e71b7053SJung-uk Kim vncipher $out3,$out3,v27 3336e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 3337e71b7053SJung-uk Kim vncipher $out4,$out4,v27 3338e71b7053SJung-uk Kim vncipher $out5,$out5,v27 3339e71b7053SJung-uk Kim 3340e71b7053SJung-uk Kim addi $key_,$sp,$FRAME+15 # rewind $key_ 3341*a7148ab3SEnji Cooper xxlor 32+$in3, 0, 0 3342*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in3 3343e71b7053SJung-uk Kim vncipher $out0,$out0,v28 3344e71b7053SJung-uk Kim vncipher $out1,$out1,v28 3345e71b7053SJung-uk Kim vxor $in3,$twk3,v31 3346e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 3347e71b7053SJung-uk Kim vxor $twk3,$tweak,$rndkey0 3348e71b7053SJung-uk Kim vncipher $out2,$out2,v28 3349e71b7053SJung-uk Kim vncipher $out3,$out3,v28 3350e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 3351e71b7053SJung-uk Kim vncipher $out4,$out4,v28 3352e71b7053SJung-uk Kim vncipher $out5,$out5,v28 3353e71b7053SJung-uk Kim lvx v24,$x00,$key_ # re-pre-load round[1] 3354e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 3355e71b7053SJung-uk Kim 3356e71b7053SJung-uk Kim vncipher $out0,$out0,v29 3357e71b7053SJung-uk Kim vncipher $out1,$out1,v29 3358*a7148ab3SEnji Cooper xxlor 32+$in4, 0, 0 3359*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in4 3360e71b7053SJung-uk Kim vncipher $out2,$out2,v29 3361e71b7053SJung-uk Kim vncipher $out3,$out3,v29 3362e71b7053SJung-uk Kim vxor $in4,$twk4,v31 3363e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 3364e71b7053SJung-uk Kim vxor $twk4,$tweak,$rndkey0 3365e71b7053SJung-uk Kim vncipher $out4,$out4,v29 3366e71b7053SJung-uk Kim vncipher $out5,$out5,v29 3367e71b7053SJung-uk Kim lvx v25,$x10,$key_ # re-pre-load round[2] 3368e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 3369e71b7053SJung-uk Kim 3370e71b7053SJung-uk Kim vncipher $out0,$out0,v30 3371e71b7053SJung-uk Kim vncipher $out1,$out1,v30 3372e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 3373e71b7053SJung-uk Kim vncipher $out2,$out2,v30 3374e71b7053SJung-uk Kim vncipher $out3,$out3,v30 3375*a7148ab3SEnji Cooper xxlor 32+$in5, 0, 0 3376*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in5 3377e71b7053SJung-uk Kim vncipher $out4,$out4,v30 3378e71b7053SJung-uk Kim vncipher $out5,$out5,v30 3379e71b7053SJung-uk Kim vxor $in5,$twk5,v31 3380e71b7053SJung-uk Kim vsrab $tmp,$tweak,$seven # next tweak value 3381e71b7053SJung-uk Kim vxor $twk5,$tweak,$rndkey0 3382e71b7053SJung-uk Kim 3383e71b7053SJung-uk Kim vncipherlast $out0,$out0,$in0 3384e71b7053SJung-uk Kim lvx_u $in0,$x00,$inp # load next input block 3385e71b7053SJung-uk Kim vaddubm $tweak,$tweak,$tweak 3386e71b7053SJung-uk Kim vncipherlast $out1,$out1,$in1 3387e71b7053SJung-uk Kim lvx_u $in1,$x10,$inp 3388e71b7053SJung-uk Kim vncipherlast $out2,$out2,$in2 3389e71b7053SJung-uk Kim le?vperm $in0,$in0,$in0,$leperm 3390e71b7053SJung-uk Kim lvx_u $in2,$x20,$inp 3391e71b7053SJung-uk Kim vand $tmp,$tmp,$eighty7 3392e71b7053SJung-uk Kim vncipherlast $out3,$out3,$in3 3393e71b7053SJung-uk Kim le?vperm $in1,$in1,$in1,$leperm 3394e71b7053SJung-uk Kim lvx_u $in3,$x30,$inp 3395e71b7053SJung-uk Kim vncipherlast $out4,$out4,$in4 3396e71b7053SJung-uk Kim le?vperm $in2,$in2,$in2,$leperm 3397e71b7053SJung-uk Kim lvx_u $in4,$x40,$inp 3398*a7148ab3SEnji Cooper xxlor 10, 32+$in0, 32+$in0 3399*a7148ab3SEnji Cooper xxlor 32+$in0, 0, 0 3400*a7148ab3SEnji Cooper vpermxor $tweak, $tweak, $tmp, $in0 3401*a7148ab3SEnji Cooper xxlor 32+$in0, 10, 10 3402e71b7053SJung-uk Kim vncipherlast $out5,$out5,$in5 3403e71b7053SJung-uk Kim le?vperm $in3,$in3,$in3,$leperm 3404e71b7053SJung-uk Kim lvx_u $in5,$x50,$inp 3405e71b7053SJung-uk Kim addi $inp,$inp,0x60 3406e71b7053SJung-uk Kim le?vperm $in4,$in4,$in4,$leperm 3407e71b7053SJung-uk Kim le?vperm $in5,$in5,$in5,$leperm 3408e71b7053SJung-uk Kim 3409e71b7053SJung-uk Kim le?vperm $out0,$out0,$out0,$leperm 3410e71b7053SJung-uk Kim le?vperm $out1,$out1,$out1,$leperm 3411e71b7053SJung-uk Kim stvx_u $out0,$x00,$out # store output 3412e71b7053SJung-uk Kim vxor $out0,$in0,$twk0 3413e71b7053SJung-uk Kim le?vperm $out2,$out2,$out2,$leperm 3414e71b7053SJung-uk Kim stvx_u $out1,$x10,$out 3415e71b7053SJung-uk Kim vxor $out1,$in1,$twk1 3416e71b7053SJung-uk Kim le?vperm $out3,$out3,$out3,$leperm 3417e71b7053SJung-uk Kim stvx_u $out2,$x20,$out 3418e71b7053SJung-uk Kim vxor $out2,$in2,$twk2 3419e71b7053SJung-uk Kim le?vperm $out4,$out4,$out4,$leperm 3420e71b7053SJung-uk Kim stvx_u $out3,$x30,$out 3421e71b7053SJung-uk Kim vxor $out3,$in3,$twk3 3422e71b7053SJung-uk Kim le?vperm $out5,$out5,$out5,$leperm 3423e71b7053SJung-uk Kim stvx_u $out4,$x40,$out 3424e71b7053SJung-uk Kim vxor $out4,$in4,$twk4 3425e71b7053SJung-uk Kim stvx_u $out5,$x50,$out 3426e71b7053SJung-uk Kim vxor $out5,$in5,$twk5 3427e71b7053SJung-uk Kim addi $out,$out,0x60 3428e71b7053SJung-uk Kim 3429e71b7053SJung-uk Kim mtctr $rounds 3430e71b7053SJung-uk Kim beq Loop_xts_dec6x # did $len-=96 borrow? 3431e71b7053SJung-uk Kim 3432*a7148ab3SEnji Cooper xxlor 32+$eighty7, 2, 2 3433*a7148ab3SEnji Cooper 3434e71b7053SJung-uk Kim addic. $len,$len,0x60 3435e71b7053SJung-uk Kim beq Lxts_dec6x_zero 3436e71b7053SJung-uk Kim cmpwi $len,0x20 3437e71b7053SJung-uk Kim blt Lxts_dec6x_one 3438e71b7053SJung-uk Kim nop 3439e71b7053SJung-uk Kim beq Lxts_dec6x_two 3440e71b7053SJung-uk Kim cmpwi $len,0x40 3441e71b7053SJung-uk Kim blt Lxts_dec6x_three 3442e71b7053SJung-uk Kim nop 3443e71b7053SJung-uk Kim beq Lxts_dec6x_four 3444e71b7053SJung-uk Kim 3445e71b7053SJung-uk KimLxts_dec6x_five: 3446e71b7053SJung-uk Kim vxor $out0,$in1,$twk0 3447e71b7053SJung-uk Kim vxor $out1,$in2,$twk1 3448e71b7053SJung-uk Kim vxor $out2,$in3,$twk2 3449e71b7053SJung-uk Kim vxor $out3,$in4,$twk3 3450e71b7053SJung-uk Kim vxor $out4,$in5,$twk4 3451e71b7053SJung-uk Kim 3452e71b7053SJung-uk Kim bl _aesp8_xts_dec5x 3453e71b7053SJung-uk Kim 3454e71b7053SJung-uk Kim le?vperm $out0,$out0,$out0,$leperm 3455e71b7053SJung-uk Kim vmr $twk0,$twk5 # unused tweak 3456e71b7053SJung-uk Kim vxor $twk1,$tweak,$rndkey0 3457e71b7053SJung-uk Kim le?vperm $out1,$out1,$out1,$leperm 3458e71b7053SJung-uk Kim stvx_u $out0,$x00,$out # store output 3459e71b7053SJung-uk Kim vxor $out0,$in0,$twk1 3460e71b7053SJung-uk Kim le?vperm $out2,$out2,$out2,$leperm 3461e71b7053SJung-uk Kim stvx_u $out1,$x10,$out 3462e71b7053SJung-uk Kim le?vperm $out3,$out3,$out3,$leperm 3463e71b7053SJung-uk Kim stvx_u $out2,$x20,$out 3464e71b7053SJung-uk Kim le?vperm $out4,$out4,$out4,$leperm 3465e71b7053SJung-uk Kim stvx_u $out3,$x30,$out 3466e71b7053SJung-uk Kim stvx_u $out4,$x40,$out 3467e71b7053SJung-uk Kim addi $out,$out,0x50 3468e71b7053SJung-uk Kim bne Lxts_dec6x_steal 3469e71b7053SJung-uk Kim b Lxts_dec6x_done 3470e71b7053SJung-uk Kim 3471e71b7053SJung-uk Kim.align 4 3472e71b7053SJung-uk KimLxts_dec6x_four: 3473e71b7053SJung-uk Kim vxor $out0,$in2,$twk0 3474e71b7053SJung-uk Kim vxor $out1,$in3,$twk1 3475e71b7053SJung-uk Kim vxor $out2,$in4,$twk2 3476e71b7053SJung-uk Kim vxor $out3,$in5,$twk3 3477e71b7053SJung-uk Kim vxor $out4,$out4,$out4 3478e71b7053SJung-uk Kim 3479e71b7053SJung-uk Kim bl _aesp8_xts_dec5x 3480e71b7053SJung-uk Kim 3481e71b7053SJung-uk Kim le?vperm $out0,$out0,$out0,$leperm 3482e71b7053SJung-uk Kim vmr $twk0,$twk4 # unused tweak 3483e71b7053SJung-uk Kim vmr $twk1,$twk5 3484e71b7053SJung-uk Kim le?vperm $out1,$out1,$out1,$leperm 3485e71b7053SJung-uk Kim stvx_u $out0,$x00,$out # store output 3486e71b7053SJung-uk Kim vxor $out0,$in0,$twk5 3487e71b7053SJung-uk Kim le?vperm $out2,$out2,$out2,$leperm 3488e71b7053SJung-uk Kim stvx_u $out1,$x10,$out 3489e71b7053SJung-uk Kim le?vperm $out3,$out3,$out3,$leperm 3490e71b7053SJung-uk Kim stvx_u $out2,$x20,$out 3491e71b7053SJung-uk Kim stvx_u $out3,$x30,$out 3492e71b7053SJung-uk Kim addi $out,$out,0x40 3493e71b7053SJung-uk Kim bne Lxts_dec6x_steal 3494e71b7053SJung-uk Kim b Lxts_dec6x_done 3495e71b7053SJung-uk Kim 3496e71b7053SJung-uk Kim.align 4 3497e71b7053SJung-uk KimLxts_dec6x_three: 3498e71b7053SJung-uk Kim vxor $out0,$in3,$twk0 3499e71b7053SJung-uk Kim vxor $out1,$in4,$twk1 3500e71b7053SJung-uk Kim vxor $out2,$in5,$twk2 3501e71b7053SJung-uk Kim vxor $out3,$out3,$out3 3502e71b7053SJung-uk Kim vxor $out4,$out4,$out4 3503e71b7053SJung-uk Kim 3504e71b7053SJung-uk Kim bl _aesp8_xts_dec5x 3505e71b7053SJung-uk Kim 3506e71b7053SJung-uk Kim le?vperm $out0,$out0,$out0,$leperm 3507e71b7053SJung-uk Kim vmr $twk0,$twk3 # unused tweak 3508e71b7053SJung-uk Kim vmr $twk1,$twk4 3509e71b7053SJung-uk Kim le?vperm $out1,$out1,$out1,$leperm 3510e71b7053SJung-uk Kim stvx_u $out0,$x00,$out # store output 3511e71b7053SJung-uk Kim vxor $out0,$in0,$twk4 3512e71b7053SJung-uk Kim le?vperm $out2,$out2,$out2,$leperm 3513e71b7053SJung-uk Kim stvx_u $out1,$x10,$out 3514e71b7053SJung-uk Kim stvx_u $out2,$x20,$out 3515e71b7053SJung-uk Kim addi $out,$out,0x30 3516e71b7053SJung-uk Kim bne Lxts_dec6x_steal 3517e71b7053SJung-uk Kim b Lxts_dec6x_done 3518e71b7053SJung-uk Kim 3519e71b7053SJung-uk Kim.align 4 3520e71b7053SJung-uk KimLxts_dec6x_two: 3521e71b7053SJung-uk Kim vxor $out0,$in4,$twk0 3522e71b7053SJung-uk Kim vxor $out1,$in5,$twk1 3523e71b7053SJung-uk Kim vxor $out2,$out2,$out2 3524e71b7053SJung-uk Kim vxor $out3,$out3,$out3 3525e71b7053SJung-uk Kim vxor $out4,$out4,$out4 3526e71b7053SJung-uk Kim 3527e71b7053SJung-uk Kim bl _aesp8_xts_dec5x 3528e71b7053SJung-uk Kim 3529e71b7053SJung-uk Kim le?vperm $out0,$out0,$out0,$leperm 3530e71b7053SJung-uk Kim vmr $twk0,$twk2 # unused tweak 3531e71b7053SJung-uk Kim vmr $twk1,$twk3 3532e71b7053SJung-uk Kim le?vperm $out1,$out1,$out1,$leperm 3533e71b7053SJung-uk Kim stvx_u $out0,$x00,$out # store output 3534e71b7053SJung-uk Kim vxor $out0,$in0,$twk3 3535e71b7053SJung-uk Kim stvx_u $out1,$x10,$out 3536e71b7053SJung-uk Kim addi $out,$out,0x20 3537e71b7053SJung-uk Kim bne Lxts_dec6x_steal 3538e71b7053SJung-uk Kim b Lxts_dec6x_done 3539e71b7053SJung-uk Kim 3540e71b7053SJung-uk Kim.align 4 3541e71b7053SJung-uk KimLxts_dec6x_one: 3542e71b7053SJung-uk Kim vxor $out0,$in5,$twk0 3543e71b7053SJung-uk Kim nop 3544e71b7053SJung-uk KimLoop_xts_dec1x: 3545e71b7053SJung-uk Kim vncipher $out0,$out0,v24 3546e71b7053SJung-uk Kim lvx v24,$x20,$key_ # round[3] 3547e71b7053SJung-uk Kim addi $key_,$key_,0x20 3548e71b7053SJung-uk Kim 3549e71b7053SJung-uk Kim vncipher $out0,$out0,v25 3550e71b7053SJung-uk Kim lvx v25,$x10,$key_ # round[4] 3551e71b7053SJung-uk Kim bdnz Loop_xts_dec1x 3552e71b7053SJung-uk Kim 3553e71b7053SJung-uk Kim subi r0,$taillen,1 3554e71b7053SJung-uk Kim vncipher $out0,$out0,v24 3555e71b7053SJung-uk Kim 3556e71b7053SJung-uk Kim andi. r0,r0,16 3557e71b7053SJung-uk Kim cmpwi $taillen,0 3558e71b7053SJung-uk Kim vncipher $out0,$out0,v25 3559e71b7053SJung-uk Kim 3560e71b7053SJung-uk Kim sub $inp,$inp,r0 3561e71b7053SJung-uk Kim vncipher $out0,$out0,v26 3562e71b7053SJung-uk Kim 3563e71b7053SJung-uk Kim lvx_u $in0,0,$inp 3564e71b7053SJung-uk Kim vncipher $out0,$out0,v27 3565e71b7053SJung-uk Kim 3566e71b7053SJung-uk Kim addi $key_,$sp,$FRAME+15 # rewind $key_ 3567e71b7053SJung-uk Kim vncipher $out0,$out0,v28 3568e71b7053SJung-uk Kim lvx v24,$x00,$key_ # re-pre-load round[1] 3569e71b7053SJung-uk Kim 3570e71b7053SJung-uk Kim vncipher $out0,$out0,v29 3571e71b7053SJung-uk Kim lvx v25,$x10,$key_ # re-pre-load round[2] 3572e71b7053SJung-uk Kim vxor $twk0,$twk0,v31 3573e71b7053SJung-uk Kim 3574e71b7053SJung-uk Kim le?vperm $in0,$in0,$in0,$leperm 3575e71b7053SJung-uk Kim vncipher $out0,$out0,v30 3576e71b7053SJung-uk Kim 3577e71b7053SJung-uk Kim mtctr $rounds 3578e71b7053SJung-uk Kim vncipherlast $out0,$out0,$twk0 3579e71b7053SJung-uk Kim 3580e71b7053SJung-uk Kim vmr $twk0,$twk1 # unused tweak 3581e71b7053SJung-uk Kim vmr $twk1,$twk2 3582e71b7053SJung-uk Kim le?vperm $out0,$out0,$out0,$leperm 3583e71b7053SJung-uk Kim stvx_u $out0,$x00,$out # store output 3584e71b7053SJung-uk Kim addi $out,$out,0x10 3585e71b7053SJung-uk Kim vxor $out0,$in0,$twk2 3586e71b7053SJung-uk Kim bne Lxts_dec6x_steal 3587e71b7053SJung-uk Kim b Lxts_dec6x_done 3588e71b7053SJung-uk Kim 3589e71b7053SJung-uk Kim.align 4 3590e71b7053SJung-uk KimLxts_dec6x_zero: 3591e71b7053SJung-uk Kim cmpwi $taillen,0 3592e71b7053SJung-uk Kim beq Lxts_dec6x_done 3593e71b7053SJung-uk Kim 3594e71b7053SJung-uk Kim lvx_u $in0,0,$inp 3595e71b7053SJung-uk Kim le?vperm $in0,$in0,$in0,$leperm 3596e71b7053SJung-uk Kim vxor $out0,$in0,$twk1 3597e71b7053SJung-uk KimLxts_dec6x_steal: 3598e71b7053SJung-uk Kim vncipher $out0,$out0,v24 3599e71b7053SJung-uk Kim lvx v24,$x20,$key_ # round[3] 3600e71b7053SJung-uk Kim addi $key_,$key_,0x20 3601e71b7053SJung-uk Kim 3602e71b7053SJung-uk Kim vncipher $out0,$out0,v25 3603e71b7053SJung-uk Kim lvx v25,$x10,$key_ # round[4] 3604e71b7053SJung-uk Kim bdnz Lxts_dec6x_steal 3605e71b7053SJung-uk Kim 3606e71b7053SJung-uk Kim add $inp,$inp,$taillen 3607e71b7053SJung-uk Kim vncipher $out0,$out0,v24 3608e71b7053SJung-uk Kim 3609e71b7053SJung-uk Kim cmpwi $taillen,0 3610e71b7053SJung-uk Kim vncipher $out0,$out0,v25 3611e71b7053SJung-uk Kim 3612e71b7053SJung-uk Kim lvx_u $in0,0,$inp 3613e71b7053SJung-uk Kim vncipher $out0,$out0,v26 3614e71b7053SJung-uk Kim 3615e71b7053SJung-uk Kim lvsr $inpperm,0,$taillen # $in5 is no more 3616e71b7053SJung-uk Kim vncipher $out0,$out0,v27 3617e71b7053SJung-uk Kim 3618e71b7053SJung-uk Kim addi $key_,$sp,$FRAME+15 # rewind $key_ 3619e71b7053SJung-uk Kim vncipher $out0,$out0,v28 3620e71b7053SJung-uk Kim lvx v24,$x00,$key_ # re-pre-load round[1] 3621e71b7053SJung-uk Kim 3622e71b7053SJung-uk Kim vncipher $out0,$out0,v29 3623e71b7053SJung-uk Kim lvx v25,$x10,$key_ # re-pre-load round[2] 3624e71b7053SJung-uk Kim vxor $twk1,$twk1,v31 3625e71b7053SJung-uk Kim 3626e71b7053SJung-uk Kim le?vperm $in0,$in0,$in0,$leperm 3627e71b7053SJung-uk Kim vncipher $out0,$out0,v30 3628e71b7053SJung-uk Kim 3629e71b7053SJung-uk Kim vperm $in0,$in0,$in0,$inpperm 3630e71b7053SJung-uk Kim vncipherlast $tmp,$out0,$twk1 3631e71b7053SJung-uk Kim 3632e71b7053SJung-uk Kim le?vperm $out0,$tmp,$tmp,$leperm 3633e71b7053SJung-uk Kim le?stvx_u $out0,0,$out 3634e71b7053SJung-uk Kim be?stvx_u $tmp,0,$out 3635e71b7053SJung-uk Kim 3636e71b7053SJung-uk Kim vxor $out0,$out0,$out0 3637e71b7053SJung-uk Kim vspltisb $out1,-1 3638e71b7053SJung-uk Kim vperm $out0,$out0,$out1,$inpperm 3639e71b7053SJung-uk Kim vsel $out0,$in0,$tmp,$out0 3640e71b7053SJung-uk Kim vxor $out0,$out0,$twk0 3641e71b7053SJung-uk Kim 3642e71b7053SJung-uk Kim subi r30,$out,1 3643e71b7053SJung-uk Kim mtctr $taillen 3644e71b7053SJung-uk KimLoop_xts_dec6x_steal: 3645e71b7053SJung-uk Kim lbzu r0,1(r30) 3646e71b7053SJung-uk Kim stb r0,16(r30) 3647e71b7053SJung-uk Kim bdnz Loop_xts_dec6x_steal 3648e71b7053SJung-uk Kim 3649e71b7053SJung-uk Kim li $taillen,0 3650e71b7053SJung-uk Kim mtctr $rounds 3651e71b7053SJung-uk Kim b Loop_xts_dec1x # one more time... 3652e71b7053SJung-uk Kim 3653e71b7053SJung-uk Kim.align 4 3654e71b7053SJung-uk KimLxts_dec6x_done: 3655e71b7053SJung-uk Kim ${UCMP}i $ivp,0 3656e71b7053SJung-uk Kim beq Lxts_dec6x_ret 3657e71b7053SJung-uk Kim 3658e71b7053SJung-uk Kim vxor $tweak,$twk0,$rndkey0 3659e71b7053SJung-uk Kim le?vperm $tweak,$tweak,$tweak,$leperm 3660e71b7053SJung-uk Kim stvx_u $tweak,0,$ivp 3661e71b7053SJung-uk Kim 3662e71b7053SJung-uk KimLxts_dec6x_ret: 3663e71b7053SJung-uk Kim mtlr r11 3664e71b7053SJung-uk Kim li r10,`$FRAME+15` 3665e71b7053SJung-uk Kim li r11,`$FRAME+31` 3666e71b7053SJung-uk Kim stvx $seven,r10,$sp # wipe copies of round keys 3667e71b7053SJung-uk Kim addi r10,r10,32 3668e71b7053SJung-uk Kim stvx $seven,r11,$sp 3669e71b7053SJung-uk Kim addi r11,r11,32 3670e71b7053SJung-uk Kim stvx $seven,r10,$sp 3671e71b7053SJung-uk Kim addi r10,r10,32 3672e71b7053SJung-uk Kim stvx $seven,r11,$sp 3673e71b7053SJung-uk Kim addi r11,r11,32 3674e71b7053SJung-uk Kim stvx $seven,r10,$sp 3675e71b7053SJung-uk Kim addi r10,r10,32 3676e71b7053SJung-uk Kim stvx $seven,r11,$sp 3677e71b7053SJung-uk Kim addi r11,r11,32 3678e71b7053SJung-uk Kim stvx $seven,r10,$sp 3679e71b7053SJung-uk Kim addi r10,r10,32 3680e71b7053SJung-uk Kim stvx $seven,r11,$sp 3681e71b7053SJung-uk Kim addi r11,r11,32 3682e71b7053SJung-uk Kim 3683e71b7053SJung-uk Kim mtspr 256,$vrsave 3684e71b7053SJung-uk Kim lvx v20,r10,$sp # ABI says so 3685e71b7053SJung-uk Kim addi r10,r10,32 3686e71b7053SJung-uk Kim lvx v21,r11,$sp 3687e71b7053SJung-uk Kim addi r11,r11,32 3688e71b7053SJung-uk Kim lvx v22,r10,$sp 3689e71b7053SJung-uk Kim addi r10,r10,32 3690e71b7053SJung-uk Kim lvx v23,r11,$sp 3691e71b7053SJung-uk Kim addi r11,r11,32 3692e71b7053SJung-uk Kim lvx v24,r10,$sp 3693e71b7053SJung-uk Kim addi r10,r10,32 3694e71b7053SJung-uk Kim lvx v25,r11,$sp 3695e71b7053SJung-uk Kim addi r11,r11,32 3696e71b7053SJung-uk Kim lvx v26,r10,$sp 3697e71b7053SJung-uk Kim addi r10,r10,32 3698e71b7053SJung-uk Kim lvx v27,r11,$sp 3699e71b7053SJung-uk Kim addi r11,r11,32 3700e71b7053SJung-uk Kim lvx v28,r10,$sp 3701e71b7053SJung-uk Kim addi r10,r10,32 3702e71b7053SJung-uk Kim lvx v29,r11,$sp 3703e71b7053SJung-uk Kim addi r11,r11,32 3704e71b7053SJung-uk Kim lvx v30,r10,$sp 3705e71b7053SJung-uk Kim lvx v31,r11,$sp 3706e71b7053SJung-uk Kim $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) 3707e71b7053SJung-uk Kim $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) 3708e71b7053SJung-uk Kim $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) 3709e71b7053SJung-uk Kim $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) 3710e71b7053SJung-uk Kim $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) 3711e71b7053SJung-uk Kim $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) 3712e71b7053SJung-uk Kim addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` 3713e71b7053SJung-uk Kim blr 3714e71b7053SJung-uk Kim .long 0 3715e71b7053SJung-uk Kim .byte 0,12,0x04,1,0x80,6,6,0 3716e71b7053SJung-uk Kim .long 0 3717e71b7053SJung-uk Kim 3718e71b7053SJung-uk Kim.align 5 3719e71b7053SJung-uk Kim_aesp8_xts_dec5x: 3720e71b7053SJung-uk Kim vncipher $out0,$out0,v24 3721e71b7053SJung-uk Kim vncipher $out1,$out1,v24 3722e71b7053SJung-uk Kim vncipher $out2,$out2,v24 3723e71b7053SJung-uk Kim vncipher $out3,$out3,v24 3724e71b7053SJung-uk Kim vncipher $out4,$out4,v24 3725e71b7053SJung-uk Kim lvx v24,$x20,$key_ # round[3] 3726e71b7053SJung-uk Kim addi $key_,$key_,0x20 3727e71b7053SJung-uk Kim 3728e71b7053SJung-uk Kim vncipher $out0,$out0,v25 3729e71b7053SJung-uk Kim vncipher $out1,$out1,v25 3730e71b7053SJung-uk Kim vncipher $out2,$out2,v25 3731e71b7053SJung-uk Kim vncipher $out3,$out3,v25 3732e71b7053SJung-uk Kim vncipher $out4,$out4,v25 3733e71b7053SJung-uk Kim lvx v25,$x10,$key_ # round[4] 3734e71b7053SJung-uk Kim bdnz _aesp8_xts_dec5x 3735e71b7053SJung-uk Kim 3736e71b7053SJung-uk Kim subi r0,$taillen,1 3737e71b7053SJung-uk Kim vncipher $out0,$out0,v24 3738e71b7053SJung-uk Kim vncipher $out1,$out1,v24 3739e71b7053SJung-uk Kim vncipher $out2,$out2,v24 3740e71b7053SJung-uk Kim vncipher $out3,$out3,v24 3741e71b7053SJung-uk Kim vncipher $out4,$out4,v24 3742e71b7053SJung-uk Kim 3743e71b7053SJung-uk Kim andi. r0,r0,16 3744e71b7053SJung-uk Kim cmpwi $taillen,0 3745e71b7053SJung-uk Kim vncipher $out0,$out0,v25 3746e71b7053SJung-uk Kim vncipher $out1,$out1,v25 3747e71b7053SJung-uk Kim vncipher $out2,$out2,v25 3748e71b7053SJung-uk Kim vncipher $out3,$out3,v25 3749e71b7053SJung-uk Kim vncipher $out4,$out4,v25 3750e71b7053SJung-uk Kim vxor $twk0,$twk0,v31 3751e71b7053SJung-uk Kim 3752e71b7053SJung-uk Kim sub $inp,$inp,r0 3753e71b7053SJung-uk Kim vncipher $out0,$out0,v26 3754e71b7053SJung-uk Kim vncipher $out1,$out1,v26 3755e71b7053SJung-uk Kim vncipher $out2,$out2,v26 3756e71b7053SJung-uk Kim vncipher $out3,$out3,v26 3757e71b7053SJung-uk Kim vncipher $out4,$out4,v26 3758e71b7053SJung-uk Kim vxor $in1,$twk1,v31 3759e71b7053SJung-uk Kim 3760e71b7053SJung-uk Kim vncipher $out0,$out0,v27 3761e71b7053SJung-uk Kim lvx_u $in0,0,$inp 3762e71b7053SJung-uk Kim vncipher $out1,$out1,v27 3763e71b7053SJung-uk Kim vncipher $out2,$out2,v27 3764e71b7053SJung-uk Kim vncipher $out3,$out3,v27 3765e71b7053SJung-uk Kim vncipher $out4,$out4,v27 3766e71b7053SJung-uk Kim vxor $in2,$twk2,v31 3767e71b7053SJung-uk Kim 3768e71b7053SJung-uk Kim addi $key_,$sp,$FRAME+15 # rewind $key_ 3769e71b7053SJung-uk Kim vncipher $out0,$out0,v28 3770e71b7053SJung-uk Kim vncipher $out1,$out1,v28 3771e71b7053SJung-uk Kim vncipher $out2,$out2,v28 3772e71b7053SJung-uk Kim vncipher $out3,$out3,v28 3773e71b7053SJung-uk Kim vncipher $out4,$out4,v28 3774e71b7053SJung-uk Kim lvx v24,$x00,$key_ # re-pre-load round[1] 3775e71b7053SJung-uk Kim vxor $in3,$twk3,v31 3776e71b7053SJung-uk Kim 3777e71b7053SJung-uk Kim vncipher $out0,$out0,v29 3778e71b7053SJung-uk Kim le?vperm $in0,$in0,$in0,$leperm 3779e71b7053SJung-uk Kim vncipher $out1,$out1,v29 3780e71b7053SJung-uk Kim vncipher $out2,$out2,v29 3781e71b7053SJung-uk Kim vncipher $out3,$out3,v29 3782e71b7053SJung-uk Kim vncipher $out4,$out4,v29 3783e71b7053SJung-uk Kim lvx v25,$x10,$key_ # re-pre-load round[2] 3784e71b7053SJung-uk Kim vxor $in4,$twk4,v31 3785e71b7053SJung-uk Kim 3786e71b7053SJung-uk Kim vncipher $out0,$out0,v30 3787e71b7053SJung-uk Kim vncipher $out1,$out1,v30 3788e71b7053SJung-uk Kim vncipher $out2,$out2,v30 3789e71b7053SJung-uk Kim vncipher $out3,$out3,v30 3790e71b7053SJung-uk Kim vncipher $out4,$out4,v30 3791e71b7053SJung-uk Kim 3792e71b7053SJung-uk Kim vncipherlast $out0,$out0,$twk0 3793e71b7053SJung-uk Kim vncipherlast $out1,$out1,$in1 3794e71b7053SJung-uk Kim vncipherlast $out2,$out2,$in2 3795e71b7053SJung-uk Kim vncipherlast $out3,$out3,$in3 3796e71b7053SJung-uk Kim vncipherlast $out4,$out4,$in4 3797e71b7053SJung-uk Kim mtctr $rounds 3798e71b7053SJung-uk Kim blr 3799e71b7053SJung-uk Kim .long 0 3800e71b7053SJung-uk Kim .byte 0,12,0x14,0,0,0,0,0 3801e71b7053SJung-uk Kim___ 3802e71b7053SJung-uk Kim}} }}} 3803e71b7053SJung-uk Kim 38047bded2dbSJung-uk Kimmy $consts=1; 38057bded2dbSJung-uk Kimforeach(split("\n",$code)) { 38067bded2dbSJung-uk Kim s/\`([^\`]*)\`/eval($1)/geo; 38077bded2dbSJung-uk Kim 38087bded2dbSJung-uk Kim # constants table endian-specific conversion 38097bded2dbSJung-uk Kim if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { 38107bded2dbSJung-uk Kim my $conv=$3; 38117bded2dbSJung-uk Kim my @bytes=(); 38127bded2dbSJung-uk Kim 38137bded2dbSJung-uk Kim # convert to endian-agnostic format 38147bded2dbSJung-uk Kim if ($1 eq "long") { 38157bded2dbSJung-uk Kim foreach (split(/,\s*/,$2)) { 38167bded2dbSJung-uk Kim my $l = /^0/?oct:int; 38177bded2dbSJung-uk Kim push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; 38187bded2dbSJung-uk Kim } 38197bded2dbSJung-uk Kim } else { 38207bded2dbSJung-uk Kim @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); 38217bded2dbSJung-uk Kim } 38227bded2dbSJung-uk Kim 38237bded2dbSJung-uk Kim # little-endian conversion 38247bded2dbSJung-uk Kim if ($flavour =~ /le$/o) { 38257bded2dbSJung-uk Kim SWITCH: for($conv) { 38267bded2dbSJung-uk Kim /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; 38277bded2dbSJung-uk Kim /\?rev/ && do { @bytes=reverse(@bytes); last; }; 38287bded2dbSJung-uk Kim } 38297bded2dbSJung-uk Kim } 38307bded2dbSJung-uk Kim 38317bded2dbSJung-uk Kim #emit 38327bded2dbSJung-uk Kim print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; 38337bded2dbSJung-uk Kim next; 38347bded2dbSJung-uk Kim } 38357bded2dbSJung-uk Kim $consts=0 if (m/Lconsts:/o); # end of table 38367bded2dbSJung-uk Kim 38377bded2dbSJung-uk Kim # instructions prefixed with '?' are endian-specific and need 38387bded2dbSJung-uk Kim # to be adjusted accordingly... 38397bded2dbSJung-uk Kim if ($flavour =~ /le$/o) { # little-endian 38407bded2dbSJung-uk Kim s/le\?//o or 38417bded2dbSJung-uk Kim s/be\?/#be#/o or 38427bded2dbSJung-uk Kim s/\?lvsr/lvsl/o or 38437bded2dbSJung-uk Kim s/\?lvsl/lvsr/o or 38447bded2dbSJung-uk Kim s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or 38457bded2dbSJung-uk Kim s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or 38467bded2dbSJung-uk Kim s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; 38477bded2dbSJung-uk Kim } else { # big-endian 38487bded2dbSJung-uk Kim s/le\?/#le#/o or 38497bded2dbSJung-uk Kim s/be\?//o or 38507bded2dbSJung-uk Kim s/\?([a-z]+)/$1/o; 38517bded2dbSJung-uk Kim } 38527bded2dbSJung-uk Kim 38537bded2dbSJung-uk Kim print $_,"\n"; 38547bded2dbSJung-uk Kim} 38557bded2dbSJung-uk Kim 385617f01e99SJung-uk Kimclose STDOUT or die "error closing STDOUT: $!"; 3857