1#!/usr/bin/env perl 2# SPDX-License-Identifier: GPL-2.0 3 4# This code is taken from the OpenSSL project but the author (Andy Polyakov) 5# has relicensed it under the GPLv2. Therefore this program is free software; 6# you can redistribute it and/or modify it under the terms of the GNU General 7# Public License version 2 as published by the Free Software Foundation. 8# 9# The original headers, including the original license headers, are 10# included below for completeness. 11 12# ==================================================================== 13# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 14# project. The module is, however, dual licensed under OpenSSL and 15# CRYPTOGAMS licenses depending on where you obtain it. For further 16# details see https://www.openssl.org/~appro/cryptogams/. 17# ==================================================================== 18# 19# GHASH for PowerISA v2.07. 20# 21# July 2014 22# 23# Accurate performance measurements are problematic, because it's 24# always virtualized setup with possibly throttled processor. 25# Relative comparison is therefore more informative. This initial 26# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x 27# faster than "4-bit" integer-only compiler-generated 64-bit code. 28# "Initial version" means that there is room for futher improvement. 29 30$flavour=shift; 31$output =shift; 32 33if ($flavour =~ /64/) { 34 $SIZE_T=8; 35 $LRSAVE=2*$SIZE_T; 36 $STU="stdu"; 37 $POP="ld"; 38 $PUSH="std"; 39} elsif ($flavour =~ /32/) { 40 $SIZE_T=4; 41 $LRSAVE=$SIZE_T; 42 $STU="stwu"; 43 $POP="lwz"; 44 $PUSH="stw"; 45} else { die "nonsense $flavour"; } 46 47$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 48( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 49( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 50( $xlate="${dir}../../../arch/powerpc/crypto/ppc-xlate.pl" and -f $xlate) or 51die "can't locate ppc-xlate.pl"; 52 53open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; 54 55my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block 56 57my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); 58my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); 59my $vrsave="r12"; 60 61$code=<<___; 62.machine "any" 63 64.text 65 66.globl .gcm_init_p8 67 lis r0,0xfff0 68 li r8,0x10 69 mfspr $vrsave,256 70 li r9,0x20 71 mtspr 256,r0 72 li r10,0x30 73 lvx_u $H,0,r4 # load H 74 le?xor r7,r7,r7 75 le?addi r7,r7,0x8 # need a vperm start with 08 76 le?lvsr 5,0,r7 77 le?vspltisb 6,0x0f 78 le?vxor 5,5,6 # set a b-endian mask 79 le?vperm $H,$H,$H,5 80 81 vspltisb $xC2,-16 # 0xf0 82 vspltisb $t0,1 # one 83 vaddubm $xC2,$xC2,$xC2 # 0xe0 84 vxor $zero,$zero,$zero 85 vor $xC2,$xC2,$t0 # 0xe1 86 vsldoi $xC2,$xC2,$zero,15 # 0xe1... 87 vsldoi $t1,$zero,$t0,1 # ...1 88 vaddubm $xC2,$xC2,$xC2 # 0xc2... 89 vspltisb $t2,7 90 vor $xC2,$xC2,$t1 # 0xc2....01 91 vspltb $t1,$H,0 # most significant byte 92 vsl $H,$H,$t0 # H<<=1 93 vsrab $t1,$t1,$t2 # broadcast carry bit 94 vand $t1,$t1,$xC2 95 vxor $H,$H,$t1 # twisted H 96 97 vsldoi $H,$H,$H,8 # twist even more ... 98 vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 99 vsldoi $Hl,$zero,$H,8 # ... and split 100 vsldoi $Hh,$H,$zero,8 101 102 stvx_u $xC2,0,r3 # save pre-computed table 103 stvx_u $Hl,r8,r3 104 stvx_u $H, r9,r3 105 stvx_u $Hh,r10,r3 106 107 mtspr 256,$vrsave 108 blr 109 .long 0 110 .byte 0,12,0x14,0,0,0,2,0 111 .long 0 112.size .gcm_init_p8,.-.gcm_init_p8 113 114.globl .gcm_gmult_p8 115 lis r0,0xfff8 116 li r8,0x10 117 mfspr $vrsave,256 118 li r9,0x20 119 mtspr 256,r0 120 li r10,0x30 121 lvx_u $IN,0,$Xip # load Xi 122 123 lvx_u $Hl,r8,$Htbl # load pre-computed table 124 le?lvsl $lemask,r0,r0 125 lvx_u $H, r9,$Htbl 126 le?vspltisb $t0,0x07 127 lvx_u $Hh,r10,$Htbl 128 le?vxor $lemask,$lemask,$t0 129 lvx_u $xC2,0,$Htbl 130 le?vperm $IN,$IN,$IN,$lemask 131 vxor $zero,$zero,$zero 132 133 vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo 134 vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi 135 vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi 136 137 vpmsumd $t2,$Xl,$xC2 # 1st phase 138 139 vsldoi $t0,$Xm,$zero,8 140 vsldoi $t1,$zero,$Xm,8 141 vxor $Xl,$Xl,$t0 142 vxor $Xh,$Xh,$t1 143 144 vsldoi $Xl,$Xl,$Xl,8 145 vxor $Xl,$Xl,$t2 146 147 vsldoi $t1,$Xl,$Xl,8 # 2nd phase 148 vpmsumd $Xl,$Xl,$xC2 149 vxor $t1,$t1,$Xh 150 vxor $Xl,$Xl,$t1 151 152 le?vperm $Xl,$Xl,$Xl,$lemask 153 stvx_u $Xl,0,$Xip # write out Xi 154 155 mtspr 256,$vrsave 156 blr 157 .long 0 158 .byte 0,12,0x14,0,0,0,2,0 159 .long 0 160.size .gcm_gmult_p8,.-.gcm_gmult_p8 161 162.globl .gcm_ghash_p8 163 lis r0,0xfff8 164 li r8,0x10 165 mfspr $vrsave,256 166 li r9,0x20 167 mtspr 256,r0 168 li r10,0x30 169 lvx_u $Xl,0,$Xip # load Xi 170 171 lvx_u $Hl,r8,$Htbl # load pre-computed table 172 le?lvsl $lemask,r0,r0 173 lvx_u $H, r9,$Htbl 174 le?vspltisb $t0,0x07 175 lvx_u $Hh,r10,$Htbl 176 le?vxor $lemask,$lemask,$t0 177 lvx_u $xC2,0,$Htbl 178 le?vperm $Xl,$Xl,$Xl,$lemask 179 vxor $zero,$zero,$zero 180 181 lvx_u $IN,0,$inp 182 addi $inp,$inp,16 183 subi $len,$len,16 184 le?vperm $IN,$IN,$IN,$lemask 185 vxor $IN,$IN,$Xl 186 b Loop 187 188.align 5 189Loop: 190 subic $len,$len,16 191 vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo 192 subfe. r0,r0,r0 # borrow?-1:0 193 vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi 194 and r0,r0,$len 195 vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi 196 add $inp,$inp,r0 197 198 vpmsumd $t2,$Xl,$xC2 # 1st phase 199 200 vsldoi $t0,$Xm,$zero,8 201 vsldoi $t1,$zero,$Xm,8 202 vxor $Xl,$Xl,$t0 203 vxor $Xh,$Xh,$t1 204 205 vsldoi $Xl,$Xl,$Xl,8 206 vxor $Xl,$Xl,$t2 207 lvx_u $IN,0,$inp 208 addi $inp,$inp,16 209 210 vsldoi $t1,$Xl,$Xl,8 # 2nd phase 211 vpmsumd $Xl,$Xl,$xC2 212 le?vperm $IN,$IN,$IN,$lemask 213 vxor $t1,$t1,$Xh 214 vxor $IN,$IN,$t1 215 vxor $IN,$IN,$Xl 216 beq Loop # did $len-=16 borrow? 217 218 vxor $Xl,$Xl,$t1 219 le?vperm $Xl,$Xl,$Xl,$lemask 220 stvx_u $Xl,0,$Xip # write out Xi 221 222 mtspr 256,$vrsave 223 blr 224 .long 0 225 .byte 0,12,0x14,0,0,0,4,0 226 .long 0 227.size .gcm_ghash_p8,.-.gcm_ghash_p8 228 229.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 230.align 2 231___ 232 233foreach (split("\n",$code)) { 234 if ($flavour =~ /le$/o) { # little-endian 235 s/le\?//o or 236 s/be\?/#be#/o; 237 } else { 238 s/le\?/#le#/o or 239 s/be\?//o; 240 } 241 print $_,"\n"; 242} 243 244close STDOUT; # enforce flush 245