1*81d358b1SMichael Ellerman#!/usr/bin/env perl 2*81d358b1SMichael Ellerman# SPDX-License-Identifier: GPL-2.0 3*81d358b1SMichael Ellerman 4*81d358b1SMichael Ellerman# This code is taken from the OpenSSL project but the author (Andy Polyakov) 5*81d358b1SMichael Ellerman# has relicensed it under the GPLv2. Therefore this program is free software; 6*81d358b1SMichael Ellerman# you can redistribute it and/or modify it under the terms of the GNU General 7*81d358b1SMichael Ellerman# Public License version 2 as published by the Free Software Foundation. 8*81d358b1SMichael Ellerman# 9*81d358b1SMichael Ellerman# The original headers, including the original license headers, are 10*81d358b1SMichael Ellerman# included below for completeness. 11*81d358b1SMichael Ellerman 12*81d358b1SMichael Ellerman# ==================================================================== 13*81d358b1SMichael Ellerman# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 14*81d358b1SMichael Ellerman# project. The module is, however, dual licensed under OpenSSL and 15*81d358b1SMichael Ellerman# CRYPTOGAMS licenses depending on where you obtain it. For further 16*81d358b1SMichael Ellerman# details see https://www.openssl.org/~appro/cryptogams/. 17*81d358b1SMichael Ellerman# ==================================================================== 18*81d358b1SMichael Ellerman# 19*81d358b1SMichael Ellerman# GHASH for PowerISA v2.07. 20*81d358b1SMichael Ellerman# 21*81d358b1SMichael Ellerman# July 2014 22*81d358b1SMichael Ellerman# 23*81d358b1SMichael Ellerman# Accurate performance measurements are problematic, because it's 24*81d358b1SMichael Ellerman# always virtualized setup with possibly throttled processor. 25*81d358b1SMichael Ellerman# Relative comparison is therefore more informative. This initial 26*81d358b1SMichael Ellerman# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x 27*81d358b1SMichael Ellerman# faster than "4-bit" integer-only compiler-generated 64-bit code. 28*81d358b1SMichael Ellerman# "Initial version" means that there is room for futher improvement. 29*81d358b1SMichael Ellerman 30*81d358b1SMichael Ellerman$flavour=shift; 31*81d358b1SMichael Ellerman$output =shift; 32*81d358b1SMichael Ellerman 33*81d358b1SMichael Ellermanif ($flavour =~ /64/) { 34*81d358b1SMichael Ellerman $SIZE_T=8; 35*81d358b1SMichael Ellerman $LRSAVE=2*$SIZE_T; 36*81d358b1SMichael Ellerman $STU="stdu"; 37*81d358b1SMichael Ellerman $POP="ld"; 38*81d358b1SMichael Ellerman $PUSH="std"; 39*81d358b1SMichael Ellerman} elsif ($flavour =~ /32/) { 40*81d358b1SMichael Ellerman $SIZE_T=4; 41*81d358b1SMichael Ellerman $LRSAVE=$SIZE_T; 42*81d358b1SMichael Ellerman $STU="stwu"; 43*81d358b1SMichael Ellerman $POP="lwz"; 44*81d358b1SMichael Ellerman $PUSH="stw"; 45*81d358b1SMichael Ellerman} else { die "nonsense $flavour"; } 46*81d358b1SMichael Ellerman 47*81d358b1SMichael Ellerman$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 48*81d358b1SMichael Ellerman( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 49*81d358b1SMichael Ellerman( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 50*81d358b1SMichael Ellermandie "can't locate ppc-xlate.pl"; 51*81d358b1SMichael Ellerman 52*81d358b1SMichael Ellermanopen STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; 53*81d358b1SMichael Ellerman 54*81d358b1SMichael Ellermanmy ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block 55*81d358b1SMichael Ellerman 56*81d358b1SMichael Ellermanmy ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); 57*81d358b1SMichael Ellermanmy ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); 58*81d358b1SMichael Ellermanmy ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19)); 59*81d358b1SMichael Ellermanmy $vrsave="r12"; 60*81d358b1SMichael Ellermanmy ($t4,$t5,$t6) = ($Hl,$H,$Hh); 61*81d358b1SMichael Ellerman 62*81d358b1SMichael Ellerman$code=<<___; 63*81d358b1SMichael Ellerman.machine "any" 64*81d358b1SMichael Ellerman 65*81d358b1SMichael Ellerman.text 66*81d358b1SMichael Ellerman 67*81d358b1SMichael Ellerman.globl .gcm_init_p10 68*81d358b1SMichael Ellerman lis r0,0xfff0 69*81d358b1SMichael Ellerman li r8,0x10 70*81d358b1SMichael Ellerman mfspr $vrsave,256 71*81d358b1SMichael Ellerman li r9,0x20 72*81d358b1SMichael Ellerman mtspr 256,r0 73*81d358b1SMichael Ellerman li r10,0x30 74*81d358b1SMichael Ellerman lvx_u $H,0,r4 # load H 75*81d358b1SMichael Ellerman le?xor r7,r7,r7 76*81d358b1SMichael Ellerman le?addi r7,r7,0x8 # need a vperm start with 08 77*81d358b1SMichael Ellerman le?lvsr 5,0,r7 78*81d358b1SMichael Ellerman le?vspltisb 6,0x0f 79*81d358b1SMichael Ellerman le?vxor 5,5,6 # set a b-endian mask 80*81d358b1SMichael Ellerman le?vperm $H,$H,$H,5 81*81d358b1SMichael Ellerman 82*81d358b1SMichael Ellerman vspltisb $xC2,-16 # 0xf0 83*81d358b1SMichael Ellerman vspltisb $t0,1 # one 84*81d358b1SMichael Ellerman vaddubm $xC2,$xC2,$xC2 # 0xe0 85*81d358b1SMichael Ellerman vxor $zero,$zero,$zero 86*81d358b1SMichael Ellerman vor $xC2,$xC2,$t0 # 0xe1 87*81d358b1SMichael Ellerman vsldoi $xC2,$xC2,$zero,15 # 0xe1... 88*81d358b1SMichael Ellerman vsldoi $t1,$zero,$t0,1 # ...1 89*81d358b1SMichael Ellerman vaddubm $xC2,$xC2,$xC2 # 0xc2... 90*81d358b1SMichael Ellerman vspltisb $t2,7 91*81d358b1SMichael Ellerman vor $xC2,$xC2,$t1 # 0xc2....01 92*81d358b1SMichael Ellerman vspltb $t1,$H,0 # most significant byte 93*81d358b1SMichael Ellerman vsl $H,$H,$t0 # H<<=1 94*81d358b1SMichael Ellerman vsrab $t1,$t1,$t2 # broadcast carry bit 95*81d358b1SMichael Ellerman vand $t1,$t1,$xC2 96*81d358b1SMichael Ellerman vxor $H,$H,$t1 # twisted H 97*81d358b1SMichael Ellerman 98*81d358b1SMichael Ellerman vsldoi $H,$H,$H,8 # twist even more ... 99*81d358b1SMichael Ellerman vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 100*81d358b1SMichael Ellerman vsldoi $Hl,$zero,$H,8 # ... and split 101*81d358b1SMichael Ellerman vsldoi $Hh,$H,$zero,8 102*81d358b1SMichael Ellerman 103*81d358b1SMichael Ellerman stvx_u $xC2,0,r3 # save pre-computed table 104*81d358b1SMichael Ellerman stvx_u $Hl,r8,r3 105*81d358b1SMichael Ellerman stvx_u $H, r9,r3 106*81d358b1SMichael Ellerman stvx_u $Hh,r10,r3 107*81d358b1SMichael Ellerman 108*81d358b1SMichael Ellerman mtspr 256,$vrsave 109*81d358b1SMichael Ellerman blr 110*81d358b1SMichael Ellerman .long 0 111*81d358b1SMichael Ellerman .byte 0,12,0x14,0,0,0,2,0 112*81d358b1SMichael Ellerman .long 0 113*81d358b1SMichael Ellerman.size .gcm_init_p10,.-.gcm_init_p10 114*81d358b1SMichael Ellerman 115*81d358b1SMichael Ellerman.globl .gcm_init_htable 116*81d358b1SMichael Ellerman lis r0,0xfff0 117*81d358b1SMichael Ellerman li r8,0x10 118*81d358b1SMichael Ellerman mfspr $vrsave,256 119*81d358b1SMichael Ellerman li r9,0x20 120*81d358b1SMichael Ellerman mtspr 256,r0 121*81d358b1SMichael Ellerman li r10,0x30 122*81d358b1SMichael Ellerman lvx_u $H,0,r4 # load H 123*81d358b1SMichael Ellerman 124*81d358b1SMichael Ellerman vspltisb $xC2,-16 # 0xf0 125*81d358b1SMichael Ellerman vspltisb $t0,1 # one 126*81d358b1SMichael Ellerman vaddubm $xC2,$xC2,$xC2 # 0xe0 127*81d358b1SMichael Ellerman vxor $zero,$zero,$zero 128*81d358b1SMichael Ellerman vor $xC2,$xC2,$t0 # 0xe1 129*81d358b1SMichael Ellerman vsldoi $xC2,$xC2,$zero,15 # 0xe1... 130*81d358b1SMichael Ellerman vsldoi $t1,$zero,$t0,1 # ...1 131*81d358b1SMichael Ellerman vaddubm $xC2,$xC2,$xC2 # 0xc2... 132*81d358b1SMichael Ellerman vspltisb $t2,7 133*81d358b1SMichael Ellerman vor $xC2,$xC2,$t1 # 0xc2....01 134*81d358b1SMichael Ellerman vspltb $t1,$H,0 # most significant byte 135*81d358b1SMichael Ellerman vsl $H,$H,$t0 # H<<=1 136*81d358b1SMichael Ellerman vsrab $t1,$t1,$t2 # broadcast carry bit 137*81d358b1SMichael Ellerman vand $t1,$t1,$xC2 138*81d358b1SMichael Ellerman vxor $IN,$H,$t1 # twisted H 139*81d358b1SMichael Ellerman 140*81d358b1SMichael Ellerman vsldoi $H,$IN,$IN,8 # twist even more ... 141*81d358b1SMichael Ellerman vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 142*81d358b1SMichael Ellerman vsldoi $Hl,$zero,$H,8 # ... and split 143*81d358b1SMichael Ellerman vsldoi $Hh,$H,$zero,8 144*81d358b1SMichael Ellerman 145*81d358b1SMichael Ellerman stvx_u $xC2,0,r3 # save pre-computed table 146*81d358b1SMichael Ellerman stvx_u $Hl,r8,r3 147*81d358b1SMichael Ellerman li r8,0x40 148*81d358b1SMichael Ellerman stvx_u $H, r9,r3 149*81d358b1SMichael Ellerman li r9,0x50 150*81d358b1SMichael Ellerman stvx_u $Hh,r10,r3 151*81d358b1SMichael Ellerman li r10,0x60 152*81d358b1SMichael Ellerman 153*81d358b1SMichael Ellerman vpmsumd $Xl,$IN,$Hl # H.lo·H.lo 154*81d358b1SMichael Ellerman vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi 155*81d358b1SMichael Ellerman vpmsumd $Xh,$IN,$Hh # H.hi·H.hi 156*81d358b1SMichael Ellerman 157*81d358b1SMichael Ellerman vpmsumd $t2,$Xl,$xC2 # 1st reduction phase 158*81d358b1SMichael Ellerman 159*81d358b1SMichael Ellerman vsldoi $t0,$Xm,$zero,8 160*81d358b1SMichael Ellerman vsldoi $t1,$zero,$Xm,8 161*81d358b1SMichael Ellerman vxor $Xl,$Xl,$t0 162*81d358b1SMichael Ellerman vxor $Xh,$Xh,$t1 163*81d358b1SMichael Ellerman 164*81d358b1SMichael Ellerman vsldoi $Xl,$Xl,$Xl,8 165*81d358b1SMichael Ellerman vxor $Xl,$Xl,$t2 166*81d358b1SMichael Ellerman 167*81d358b1SMichael Ellerman vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase 168*81d358b1SMichael Ellerman vpmsumd $Xl,$Xl,$xC2 169*81d358b1SMichael Ellerman vxor $t1,$t1,$Xh 170*81d358b1SMichael Ellerman vxor $IN1,$Xl,$t1 171*81d358b1SMichael Ellerman 172*81d358b1SMichael Ellerman vsldoi $H2,$IN1,$IN1,8 173*81d358b1SMichael Ellerman vsldoi $H2l,$zero,$H2,8 174*81d358b1SMichael Ellerman vsldoi $H2h,$H2,$zero,8 175*81d358b1SMichael Ellerman 176*81d358b1SMichael Ellerman stvx_u $H2l,r8,r3 # save H^2 177*81d358b1SMichael Ellerman li r8,0x70 178*81d358b1SMichael Ellerman stvx_u $H2,r9,r3 179*81d358b1SMichael Ellerman li r9,0x80 180*81d358b1SMichael Ellerman stvx_u $H2h,r10,r3 181*81d358b1SMichael Ellerman li r10,0x90 182*81d358b1SMichael Ellerman 183*81d358b1SMichael Ellerman vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo 184*81d358b1SMichael Ellerman vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo 185*81d358b1SMichael Ellerman vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi 186*81d358b1SMichael Ellerman vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi 187*81d358b1SMichael Ellerman vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi 188*81d358b1SMichael Ellerman vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi 189*81d358b1SMichael Ellerman 190*81d358b1SMichael Ellerman vpmsumd $t2,$Xl,$xC2 # 1st reduction phase 191*81d358b1SMichael Ellerman vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase 192*81d358b1SMichael Ellerman 193*81d358b1SMichael Ellerman vsldoi $t0,$Xm,$zero,8 194*81d358b1SMichael Ellerman vsldoi $t1,$zero,$Xm,8 195*81d358b1SMichael Ellerman vsldoi $t4,$Xm1,$zero,8 196*81d358b1SMichael Ellerman vsldoi $t5,$zero,$Xm1,8 197*81d358b1SMichael Ellerman vxor $Xl,$Xl,$t0 198*81d358b1SMichael Ellerman vxor $Xh,$Xh,$t1 199*81d358b1SMichael Ellerman vxor $Xl1,$Xl1,$t4 200*81d358b1SMichael Ellerman vxor $Xh1,$Xh1,$t5 201*81d358b1SMichael Ellerman 202*81d358b1SMichael Ellerman vsldoi $Xl,$Xl,$Xl,8 203*81d358b1SMichael Ellerman vsldoi $Xl1,$Xl1,$Xl1,8 204*81d358b1SMichael Ellerman vxor $Xl,$Xl,$t2 205*81d358b1SMichael Ellerman vxor $Xl1,$Xl1,$t6 206*81d358b1SMichael Ellerman 207*81d358b1SMichael Ellerman vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase 208*81d358b1SMichael Ellerman vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase 209*81d358b1SMichael Ellerman vpmsumd $Xl,$Xl,$xC2 210*81d358b1SMichael Ellerman vpmsumd $Xl1,$Xl1,$xC2 211*81d358b1SMichael Ellerman vxor $t1,$t1,$Xh 212*81d358b1SMichael Ellerman vxor $t5,$t5,$Xh1 213*81d358b1SMichael Ellerman vxor $Xl,$Xl,$t1 214*81d358b1SMichael Ellerman vxor $Xl1,$Xl1,$t5 215*81d358b1SMichael Ellerman 216*81d358b1SMichael Ellerman vsldoi $H,$Xl,$Xl,8 217*81d358b1SMichael Ellerman vsldoi $H2,$Xl1,$Xl1,8 218*81d358b1SMichael Ellerman vsldoi $Hl,$zero,$H,8 219*81d358b1SMichael Ellerman vsldoi $Hh,$H,$zero,8 220*81d358b1SMichael Ellerman vsldoi $H2l,$zero,$H2,8 221*81d358b1SMichael Ellerman vsldoi $H2h,$H2,$zero,8 222*81d358b1SMichael Ellerman 223*81d358b1SMichael Ellerman stvx_u $Hl,r8,r3 # save H^3 224*81d358b1SMichael Ellerman li r8,0xa0 225*81d358b1SMichael Ellerman stvx_u $H,r9,r3 226*81d358b1SMichael Ellerman li r9,0xb0 227*81d358b1SMichael Ellerman stvx_u $Hh,r10,r3 228*81d358b1SMichael Ellerman li r10,0xc0 229*81d358b1SMichael Ellerman stvx_u $H2l,r8,r3 # save H^4 230*81d358b1SMichael Ellerman stvx_u $H2,r9,r3 231*81d358b1SMichael Ellerman stvx_u $H2h,r10,r3 232*81d358b1SMichael Ellerman 233*81d358b1SMichael Ellerman mtspr 256,$vrsave 234*81d358b1SMichael Ellerman blr 235*81d358b1SMichael Ellerman .long 0 236*81d358b1SMichael Ellerman .byte 0,12,0x14,0,0,0,2,0 237*81d358b1SMichael Ellerman .long 0 238*81d358b1SMichael Ellerman.size .gcm_init_htable,.-.gcm_init_htable 239*81d358b1SMichael Ellerman 240*81d358b1SMichael Ellerman.globl .gcm_gmult_p10 241*81d358b1SMichael Ellerman lis r0,0xfff8 242*81d358b1SMichael Ellerman li r8,0x10 243*81d358b1SMichael Ellerman mfspr $vrsave,256 244*81d358b1SMichael Ellerman li r9,0x20 245*81d358b1SMichael Ellerman mtspr 256,r0 246*81d358b1SMichael Ellerman li r10,0x30 247*81d358b1SMichael Ellerman lvx_u $IN,0,$Xip # load Xi 248*81d358b1SMichael Ellerman 249*81d358b1SMichael Ellerman lvx_u $Hl,r8,$Htbl # load pre-computed table 250*81d358b1SMichael Ellerman le?lvsl $lemask,r0,r0 251*81d358b1SMichael Ellerman lvx_u $H, r9,$Htbl 252*81d358b1SMichael Ellerman le?vspltisb $t0,0x07 253*81d358b1SMichael Ellerman lvx_u $Hh,r10,$Htbl 254*81d358b1SMichael Ellerman le?vxor $lemask,$lemask,$t0 255*81d358b1SMichael Ellerman lvx_u $xC2,0,$Htbl 256*81d358b1SMichael Ellerman le?vperm $IN,$IN,$IN,$lemask 257*81d358b1SMichael Ellerman vxor $zero,$zero,$zero 258*81d358b1SMichael Ellerman 259*81d358b1SMichael Ellerman vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo 260*81d358b1SMichael Ellerman vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi 261*81d358b1SMichael Ellerman vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi 262*81d358b1SMichael Ellerman 263*81d358b1SMichael Ellerman vpmsumd $t2,$Xl,$xC2 # 1st phase 264*81d358b1SMichael Ellerman 265*81d358b1SMichael Ellerman vsldoi $t0,$Xm,$zero,8 266*81d358b1SMichael Ellerman vsldoi $t1,$zero,$Xm,8 267*81d358b1SMichael Ellerman vxor $Xl,$Xl,$t0 268*81d358b1SMichael Ellerman vxor $Xh,$Xh,$t1 269*81d358b1SMichael Ellerman 270*81d358b1SMichael Ellerman vsldoi $Xl,$Xl,$Xl,8 271*81d358b1SMichael Ellerman vxor $Xl,$Xl,$t2 272*81d358b1SMichael Ellerman 273*81d358b1SMichael Ellerman vsldoi $t1,$Xl,$Xl,8 # 2nd phase 274*81d358b1SMichael Ellerman vpmsumd $Xl,$Xl,$xC2 275*81d358b1SMichael Ellerman vxor $t1,$t1,$Xh 276*81d358b1SMichael Ellerman vxor $Xl,$Xl,$t1 277*81d358b1SMichael Ellerman 278*81d358b1SMichael Ellerman le?vperm $Xl,$Xl,$Xl,$lemask 279*81d358b1SMichael Ellerman stvx_u $Xl,0,$Xip # write out Xi 280*81d358b1SMichael Ellerman 281*81d358b1SMichael Ellerman mtspr 256,$vrsave 282*81d358b1SMichael Ellerman blr 283*81d358b1SMichael Ellerman .long 0 284*81d358b1SMichael Ellerman .byte 0,12,0x14,0,0,0,2,0 285*81d358b1SMichael Ellerman .long 0 286*81d358b1SMichael Ellerman.size .gcm_gmult_p10,.-.gcm_gmult_p10 287*81d358b1SMichael Ellerman 288*81d358b1SMichael Ellerman.globl .gcm_ghash_p10 289*81d358b1SMichael Ellerman lis r0,0xfff8 290*81d358b1SMichael Ellerman li r8,0x10 291*81d358b1SMichael Ellerman mfspr $vrsave,256 292*81d358b1SMichael Ellerman li r9,0x20 293*81d358b1SMichael Ellerman mtspr 256,r0 294*81d358b1SMichael Ellerman li r10,0x30 295*81d358b1SMichael Ellerman lvx_u $Xl,0,$Xip # load Xi 296*81d358b1SMichael Ellerman 297*81d358b1SMichael Ellerman lvx_u $Hl,r8,$Htbl # load pre-computed table 298*81d358b1SMichael Ellerman le?lvsl $lemask,r0,r0 299*81d358b1SMichael Ellerman lvx_u $H, r9,$Htbl 300*81d358b1SMichael Ellerman le?vspltisb $t0,0x07 301*81d358b1SMichael Ellerman lvx_u $Hh,r10,$Htbl 302*81d358b1SMichael Ellerman le?vxor $lemask,$lemask,$t0 303*81d358b1SMichael Ellerman lvx_u $xC2,0,$Htbl 304*81d358b1SMichael Ellerman le?vperm $Xl,$Xl,$Xl,$lemask 305*81d358b1SMichael Ellerman vxor $zero,$zero,$zero 306*81d358b1SMichael Ellerman 307*81d358b1SMichael Ellerman lvx_u $IN,0,$inp 308*81d358b1SMichael Ellerman addi $inp,$inp,16 309*81d358b1SMichael Ellerman subi $len,$len,16 310*81d358b1SMichael Ellerman le?vperm $IN,$IN,$IN,$lemask 311*81d358b1SMichael Ellerman vxor $IN,$IN,$Xl 312*81d358b1SMichael Ellerman b Loop 313*81d358b1SMichael Ellerman 314*81d358b1SMichael Ellerman.align 5 315*81d358b1SMichael EllermanLoop: 316*81d358b1SMichael Ellerman subic $len,$len,16 317*81d358b1SMichael Ellerman vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo 318*81d358b1SMichael Ellerman subfe. r0,r0,r0 # borrow?-1:0 319*81d358b1SMichael Ellerman vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi 320*81d358b1SMichael Ellerman and r0,r0,$len 321*81d358b1SMichael Ellerman vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi 322*81d358b1SMichael Ellerman add $inp,$inp,r0 323*81d358b1SMichael Ellerman 324*81d358b1SMichael Ellerman vpmsumd $t2,$Xl,$xC2 # 1st phase 325*81d358b1SMichael Ellerman 326*81d358b1SMichael Ellerman vsldoi $t0,$Xm,$zero,8 327*81d358b1SMichael Ellerman vsldoi $t1,$zero,$Xm,8 328*81d358b1SMichael Ellerman vxor $Xl,$Xl,$t0 329*81d358b1SMichael Ellerman vxor $Xh,$Xh,$t1 330*81d358b1SMichael Ellerman 331*81d358b1SMichael Ellerman vsldoi $Xl,$Xl,$Xl,8 332*81d358b1SMichael Ellerman vxor $Xl,$Xl,$t2 333*81d358b1SMichael Ellerman lvx_u $IN,0,$inp 334*81d358b1SMichael Ellerman addi $inp,$inp,16 335*81d358b1SMichael Ellerman 336*81d358b1SMichael Ellerman vsldoi $t1,$Xl,$Xl,8 # 2nd phase 337*81d358b1SMichael Ellerman vpmsumd $Xl,$Xl,$xC2 338*81d358b1SMichael Ellerman le?vperm $IN,$IN,$IN,$lemask 339*81d358b1SMichael Ellerman vxor $t1,$t1,$Xh 340*81d358b1SMichael Ellerman vxor $IN,$IN,$t1 341*81d358b1SMichael Ellerman vxor $IN,$IN,$Xl 342*81d358b1SMichael Ellerman beq Loop # did $len-=16 borrow? 343*81d358b1SMichael Ellerman 344*81d358b1SMichael Ellerman vxor $Xl,$Xl,$t1 345*81d358b1SMichael Ellerman le?vperm $Xl,$Xl,$Xl,$lemask 346*81d358b1SMichael Ellerman stvx_u $Xl,0,$Xip # write out Xi 347*81d358b1SMichael Ellerman 348*81d358b1SMichael Ellerman mtspr 256,$vrsave 349*81d358b1SMichael Ellerman blr 350*81d358b1SMichael Ellerman .long 0 351*81d358b1SMichael Ellerman .byte 0,12,0x14,0,0,0,4,0 352*81d358b1SMichael Ellerman .long 0 353*81d358b1SMichael Ellerman.size .gcm_ghash_p10,.-.gcm_ghash_p10 354*81d358b1SMichael Ellerman 355*81d358b1SMichael Ellerman.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" 356*81d358b1SMichael Ellerman.align 2 357*81d358b1SMichael Ellerman___ 358*81d358b1SMichael Ellerman 359*81d358b1SMichael Ellermanforeach (split("\n",$code)) { 360*81d358b1SMichael Ellerman if ($flavour =~ /le$/o) { # little-endian 361*81d358b1SMichael Ellerman s/le\?//o or 362*81d358b1SMichael Ellerman s/be\?/#be#/o; 363*81d358b1SMichael Ellerman } else { 364*81d358b1SMichael Ellerman s/le\?/#le#/o or 365*81d358b1SMichael Ellerman s/be\?//o; 366*81d358b1SMichael Ellerman } 367*81d358b1SMichael Ellerman print $_,"\n"; 368*81d358b1SMichael Ellerman} 369*81d358b1SMichael Ellerman 370*81d358b1SMichael Ellermanclose STDOUT; # enforce flush 371