1*1f13597dSJung-uk Kim#!/usr/bin/env perl 2*1f13597dSJung-uk Kim 3*1f13597dSJung-uk Kim# ==================================================================== 4*1f13597dSJung-uk Kim# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5*1f13597dSJung-uk Kim# project. The module is, however, dual licensed under OpenSSL and 6*1f13597dSJung-uk Kim# CRYPTOGAMS licenses depending on where you obtain it. For further 7*1f13597dSJung-uk Kim# details see http://www.openssl.org/~appro/cryptogams/. 8*1f13597dSJung-uk Kim# ==================================================================== 9*1f13597dSJung-uk Kim 10*1f13597dSJung-uk Kim# SHA1 block procedure for PA-RISC. 11*1f13597dSJung-uk Kim 12*1f13597dSJung-uk Kim# June 2009. 13*1f13597dSJung-uk Kim# 14*1f13597dSJung-uk Kim# On PA-7100LC performance is >30% better than gcc 3.2 generated code 15*1f13597dSJung-uk Kim# for aligned input and >50% better for unaligned. Compared to vendor 16*1f13597dSJung-uk Kim# compiler on PA-8600 it's almost 60% faster in 64-bit build and just 17*1f13597dSJung-uk Kim# few percent faster in 32-bit one (this for aligned input, data for 18*1f13597dSJung-uk Kim# unaligned input is not available). 19*1f13597dSJung-uk Kim# 20*1f13597dSJung-uk Kim# Special thanks to polarhome.com for providing HP-UX account. 21*1f13597dSJung-uk Kim 22*1f13597dSJung-uk Kim$flavour = shift; 23*1f13597dSJung-uk Kim$output = shift; 24*1f13597dSJung-uk Kimopen STDOUT,">$output"; 25*1f13597dSJung-uk Kim 26*1f13597dSJung-uk Kimif ($flavour =~ /64/) { 27*1f13597dSJung-uk Kim $LEVEL ="2.0W"; 28*1f13597dSJung-uk Kim $SIZE_T =8; 29*1f13597dSJung-uk Kim $FRAME_MARKER =80; 30*1f13597dSJung-uk Kim $SAVED_RP =16; 31*1f13597dSJung-uk Kim $PUSH ="std"; 32*1f13597dSJung-uk Kim $PUSHMA ="std,ma"; 33*1f13597dSJung-uk Kim $POP ="ldd"; 34*1f13597dSJung-uk Kim $POPMB ="ldd,mb"; 35*1f13597dSJung-uk Kim} else { 36*1f13597dSJung-uk Kim $LEVEL ="1.0"; 37*1f13597dSJung-uk Kim $SIZE_T =4; 38*1f13597dSJung-uk Kim $FRAME_MARKER =48; 39*1f13597dSJung-uk Kim $SAVED_RP =20; 40*1f13597dSJung-uk Kim $PUSH ="stw"; 41*1f13597dSJung-uk Kim $PUSHMA ="stwm"; 42*1f13597dSJung-uk Kim $POP ="ldw"; 43*1f13597dSJung-uk Kim $POPMB ="ldwm"; 44*1f13597dSJung-uk Kim} 45*1f13597dSJung-uk Kim 46*1f13597dSJung-uk Kim$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker 47*1f13597dSJung-uk Kim # [+ argument transfer] 48*1f13597dSJung-uk Kim$ctx="%r26"; # arg0 49*1f13597dSJung-uk Kim$inp="%r25"; # arg1 50*1f13597dSJung-uk Kim$num="%r24"; # arg2 51*1f13597dSJung-uk Kim 52*1f13597dSJung-uk Kim$t0="%r28"; 53*1f13597dSJung-uk Kim$t1="%r29"; 54*1f13597dSJung-uk Kim$K="%r31"; 55*1f13597dSJung-uk Kim 56*1f13597dSJung-uk Kim@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", 57*1f13597dSJung-uk Kim "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0); 58*1f13597dSJung-uk Kim 59*1f13597dSJung-uk Kim@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23"); 60*1f13597dSJung-uk Kim 61*1f13597dSJung-uk Kimsub BODY_00_19 { 62*1f13597dSJung-uk Kimmy ($i,$a,$b,$c,$d,$e)=@_; 63*1f13597dSJung-uk Kimmy $j=$i+1; 64*1f13597dSJung-uk Kim$code.=<<___ if ($i<15); 65*1f13597dSJung-uk Kim addl $K,$e,$e ; $i 66*1f13597dSJung-uk Kim shd $a,$a,27,$t1 67*1f13597dSJung-uk Kim addl @X[$i],$e,$e 68*1f13597dSJung-uk Kim and $c,$b,$t0 69*1f13597dSJung-uk Kim addl $t1,$e,$e 70*1f13597dSJung-uk Kim andcm $d,$b,$t1 71*1f13597dSJung-uk Kim shd $b,$b,2,$b 72*1f13597dSJung-uk Kim or $t1,$t0,$t0 73*1f13597dSJung-uk Kim addl $t0,$e,$e 74*1f13597dSJung-uk Kim___ 75*1f13597dSJung-uk Kim$code.=<<___ if ($i>=15); # with forward Xupdate 76*1f13597dSJung-uk Kim addl $K,$e,$e ; $i 77*1f13597dSJung-uk Kim shd $a,$a,27,$t1 78*1f13597dSJung-uk Kim xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 79*1f13597dSJung-uk Kim addl @X[$i%16],$e,$e 80*1f13597dSJung-uk Kim and $c,$b,$t0 81*1f13597dSJung-uk Kim xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 82*1f13597dSJung-uk Kim addl $t1,$e,$e 83*1f13597dSJung-uk Kim andcm $d,$b,$t1 84*1f13597dSJung-uk Kim shd $b,$b,2,$b 85*1f13597dSJung-uk Kim or $t1,$t0,$t0 86*1f13597dSJung-uk Kim xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 87*1f13597dSJung-uk Kim add $t0,$e,$e 88*1f13597dSJung-uk Kim shd @X[$j%16],@X[$j%16],31,@X[$j%16] 89*1f13597dSJung-uk Kim___ 90*1f13597dSJung-uk Kim} 91*1f13597dSJung-uk Kim 92*1f13597dSJung-uk Kimsub BODY_20_39 { 93*1f13597dSJung-uk Kimmy ($i,$a,$b,$c,$d,$e)=@_; 94*1f13597dSJung-uk Kimmy $j=$i+1; 95*1f13597dSJung-uk Kim$code.=<<___ if ($i<79); 96*1f13597dSJung-uk Kim xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i 97*1f13597dSJung-uk Kim addl $K,$e,$e 98*1f13597dSJung-uk Kim shd $a,$a,27,$t1 99*1f13597dSJung-uk Kim xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 100*1f13597dSJung-uk Kim addl @X[$i%16],$e,$e 101*1f13597dSJung-uk Kim xor $b,$c,$t0 102*1f13597dSJung-uk Kim xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 103*1f13597dSJung-uk Kim addl $t1,$e,$e 104*1f13597dSJung-uk Kim shd $b,$b,2,$b 105*1f13597dSJung-uk Kim xor $d,$t0,$t0 106*1f13597dSJung-uk Kim shd @X[$j%16],@X[$j%16],31,@X[$j%16] 107*1f13597dSJung-uk Kim addl $t0,$e,$e 108*1f13597dSJung-uk Kim___ 109*1f13597dSJung-uk Kim$code.=<<___ if ($i==79); # with context load 110*1f13597dSJung-uk Kim ldw 0($ctx),@X[0] ; $i 111*1f13597dSJung-uk Kim addl $K,$e,$e 112*1f13597dSJung-uk Kim shd $a,$a,27,$t1 113*1f13597dSJung-uk Kim ldw 4($ctx),@X[1] 114*1f13597dSJung-uk Kim addl @X[$i%16],$e,$e 115*1f13597dSJung-uk Kim xor $b,$c,$t0 116*1f13597dSJung-uk Kim ldw 8($ctx),@X[2] 117*1f13597dSJung-uk Kim addl $t1,$e,$e 118*1f13597dSJung-uk Kim shd $b,$b,2,$b 119*1f13597dSJung-uk Kim xor $d,$t0,$t0 120*1f13597dSJung-uk Kim ldw 12($ctx),@X[3] 121*1f13597dSJung-uk Kim addl $t0,$e,$e 122*1f13597dSJung-uk Kim ldw 16($ctx),@X[4] 123*1f13597dSJung-uk Kim___ 124*1f13597dSJung-uk Kim} 125*1f13597dSJung-uk Kim 126*1f13597dSJung-uk Kimsub BODY_40_59 { 127*1f13597dSJung-uk Kimmy ($i,$a,$b,$c,$d,$e)=@_; 128*1f13597dSJung-uk Kimmy $j=$i+1; 129*1f13597dSJung-uk Kim$code.=<<___; 130*1f13597dSJung-uk Kim shd $a,$a,27,$t1 ; $i 131*1f13597dSJung-uk Kim addl $K,$e,$e 132*1f13597dSJung-uk Kim xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 133*1f13597dSJung-uk Kim xor $d,$c,$t0 134*1f13597dSJung-uk Kim addl @X[$i%16],$e,$e 135*1f13597dSJung-uk Kim xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 136*1f13597dSJung-uk Kim and $b,$t0,$t0 137*1f13597dSJung-uk Kim addl $t1,$e,$e 138*1f13597dSJung-uk Kim shd $b,$b,2,$b 139*1f13597dSJung-uk Kim xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 140*1f13597dSJung-uk Kim addl $t0,$e,$e 141*1f13597dSJung-uk Kim and $d,$c,$t1 142*1f13597dSJung-uk Kim shd @X[$j%16],@X[$j%16],31,@X[$j%16] 143*1f13597dSJung-uk Kim addl $t1,$e,$e 144*1f13597dSJung-uk Kim___ 145*1f13597dSJung-uk Kim} 146*1f13597dSJung-uk Kim 147*1f13597dSJung-uk Kim$code=<<___; 148*1f13597dSJung-uk Kim .LEVEL $LEVEL 149*1f13597dSJung-uk Kim .SPACE \$TEXT\$ 150*1f13597dSJung-uk Kim .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY 151*1f13597dSJung-uk Kim 152*1f13597dSJung-uk Kim .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR 153*1f13597dSJung-uk Kimsha1_block_data_order 154*1f13597dSJung-uk Kim .PROC 155*1f13597dSJung-uk Kim .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16 156*1f13597dSJung-uk Kim .ENTRY 157*1f13597dSJung-uk Kim $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 158*1f13597dSJung-uk Kim $PUSHMA %r3,$FRAME(%sp) 159*1f13597dSJung-uk Kim $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 160*1f13597dSJung-uk Kim $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 161*1f13597dSJung-uk Kim $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 162*1f13597dSJung-uk Kim $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 163*1f13597dSJung-uk Kim $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 164*1f13597dSJung-uk Kim $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 165*1f13597dSJung-uk Kim $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 166*1f13597dSJung-uk Kim $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 167*1f13597dSJung-uk Kim $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) 168*1f13597dSJung-uk Kim $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) 169*1f13597dSJung-uk Kim $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) 170*1f13597dSJung-uk Kim $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) 171*1f13597dSJung-uk Kim $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) 172*1f13597dSJung-uk Kim 173*1f13597dSJung-uk Kim ldw 0($ctx),$A 174*1f13597dSJung-uk Kim ldw 4($ctx),$B 175*1f13597dSJung-uk Kim ldw 8($ctx),$C 176*1f13597dSJung-uk Kim ldw 12($ctx),$D 177*1f13597dSJung-uk Kim ldw 16($ctx),$E 178*1f13597dSJung-uk Kim 179*1f13597dSJung-uk Kim extru $inp,31,2,$t0 ; t0=inp&3; 180*1f13597dSJung-uk Kim sh3addl $t0,%r0,$t0 ; t0*=8; 181*1f13597dSJung-uk Kim subi 32,$t0,$t0 ; t0=32-t0; 182*1f13597dSJung-uk Kim mtctl $t0,%cr11 ; %sar=t0; 183*1f13597dSJung-uk Kim 184*1f13597dSJung-uk KimL\$oop 185*1f13597dSJung-uk Kim ldi 3,$t0 186*1f13597dSJung-uk Kim andcm $inp,$t0,$t0 ; 64-bit neutral 187*1f13597dSJung-uk Kim___ 188*1f13597dSJung-uk Kim for ($i=0;$i<15;$i++) { # load input block 189*1f13597dSJung-uk Kim $code.="\tldw `4*$i`($t0),@X[$i]\n"; } 190*1f13597dSJung-uk Kim$code.=<<___; 191*1f13597dSJung-uk Kim cmpb,*= $inp,$t0,L\$aligned 192*1f13597dSJung-uk Kim ldw 60($t0),@X[15] 193*1f13597dSJung-uk Kim ldw 64($t0),@X[16] 194*1f13597dSJung-uk Kim___ 195*1f13597dSJung-uk Kim for ($i=0;$i<16;$i++) { # align input 196*1f13597dSJung-uk Kim $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; } 197*1f13597dSJung-uk Kim$code.=<<___; 198*1f13597dSJung-uk KimL\$aligned 199*1f13597dSJung-uk Kim ldil L'0x5a827000,$K ; K_00_19 200*1f13597dSJung-uk Kim ldo 0x999($K),$K 201*1f13597dSJung-uk Kim___ 202*1f13597dSJung-uk Kimfor ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } 203*1f13597dSJung-uk Kim$code.=<<___; 204*1f13597dSJung-uk Kim ldil L'0x6ed9e000,$K ; K_20_39 205*1f13597dSJung-uk Kim ldo 0xba1($K),$K 206*1f13597dSJung-uk Kim___ 207*1f13597dSJung-uk Kim 208*1f13597dSJung-uk Kimfor (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 209*1f13597dSJung-uk Kim$code.=<<___; 210*1f13597dSJung-uk Kim ldil L'0x8f1bb000,$K ; K_40_59 211*1f13597dSJung-uk Kim ldo 0xcdc($K),$K 212*1f13597dSJung-uk Kim___ 213*1f13597dSJung-uk Kim 214*1f13597dSJung-uk Kimfor (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 215*1f13597dSJung-uk Kim$code.=<<___; 216*1f13597dSJung-uk Kim ldil L'0xca62c000,$K ; K_60_79 217*1f13597dSJung-uk Kim ldo 0x1d6($K),$K 218*1f13597dSJung-uk Kim___ 219*1f13597dSJung-uk Kimfor (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 220*1f13597dSJung-uk Kim 221*1f13597dSJung-uk Kim$code.=<<___; 222*1f13597dSJung-uk Kim addl @X[0],$A,$A 223*1f13597dSJung-uk Kim addl @X[1],$B,$B 224*1f13597dSJung-uk Kim addl @X[2],$C,$C 225*1f13597dSJung-uk Kim addl @X[3],$D,$D 226*1f13597dSJung-uk Kim addl @X[4],$E,$E 227*1f13597dSJung-uk Kim stw $A,0($ctx) 228*1f13597dSJung-uk Kim stw $B,4($ctx) 229*1f13597dSJung-uk Kim stw $C,8($ctx) 230*1f13597dSJung-uk Kim stw $D,12($ctx) 231*1f13597dSJung-uk Kim stw $E,16($ctx) 232*1f13597dSJung-uk Kim addib,*<> -1,$num,L\$oop 233*1f13597dSJung-uk Kim ldo 64($inp),$inp 234*1f13597dSJung-uk Kim 235*1f13597dSJung-uk Kim $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 236*1f13597dSJung-uk Kim $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 237*1f13597dSJung-uk Kim $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 238*1f13597dSJung-uk Kim $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 239*1f13597dSJung-uk Kim $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 240*1f13597dSJung-uk Kim $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 241*1f13597dSJung-uk Kim $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 242*1f13597dSJung-uk Kim $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 243*1f13597dSJung-uk Kim $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 244*1f13597dSJung-uk Kim $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 245*1f13597dSJung-uk Kim $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 246*1f13597dSJung-uk Kim $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 247*1f13597dSJung-uk Kim $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 248*1f13597dSJung-uk Kim $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 249*1f13597dSJung-uk Kim bv (%r2) 250*1f13597dSJung-uk Kim .EXIT 251*1f13597dSJung-uk Kim $POPMB -$FRAME(%sp),%r3 252*1f13597dSJung-uk Kim .PROCEND 253*1f13597dSJung-uk Kim .STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" 254*1f13597dSJung-uk Kim___ 255*1f13597dSJung-uk Kim 256*1f13597dSJung-uk Kim$code =~ s/\`([^\`]*)\`/eval $1/gem; 257*1f13597dSJung-uk Kim$code =~ s/,\*/,/gm if ($SIZE_T==4); 258*1f13597dSJung-uk Kimprint $code; 259*1f13597dSJung-uk Kimclose STDOUT; 260