1#! /usr/bin/env perl 2# Copyright 2009-2018 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# SHA1 block procedure for PA-RISC. 18 19# June 2009. 20# 21# On PA-7100LC performance is >30% better than gcc 3.2 generated code 22# for aligned input and >50% better for unaligned. Compared to vendor 23# compiler on PA-8600 it's almost 60% faster in 64-bit build and just 24# few percent faster in 32-bit one (this for aligned input, data for 25# unaligned input is not available). 26# 27# Special thanks to polarhome.com for providing HP-UX account. 28 29$flavour = shift; 30$output = shift; 31open STDOUT,">$output"; 32 33if ($flavour =~ /64/) { 34 $LEVEL ="2.0W"; 35 $SIZE_T =8; 36 $FRAME_MARKER =80; 37 $SAVED_RP =16; 38 $PUSH ="std"; 39 $PUSHMA ="std,ma"; 40 $POP ="ldd"; 41 $POPMB ="ldd,mb"; 42} else { 43 $LEVEL ="1.0"; 44 $SIZE_T =4; 45 $FRAME_MARKER =48; 46 $SAVED_RP =20; 47 $PUSH ="stw"; 48 $PUSHMA ="stwm"; 49 $POP ="ldw"; 50 $POPMB ="ldwm"; 51} 52 53$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker 54 # [+ argument transfer] 55$ctx="%r26"; # arg0 56$inp="%r25"; # arg1 57$num="%r24"; # arg2 58 59$t0="%r28"; 60$t1="%r29"; 61$K="%r31"; 62 63@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", 64 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0); 65 66@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23"); 67 68sub BODY_00_19 { 69my ($i,$a,$b,$c,$d,$e)=@_; 70my $j=$i+1; 71$code.=<<___ if ($i<15); 72 addl $K,$e,$e ; $i 73 shd $a,$a,27,$t1 74 addl @X[$i],$e,$e 75 and $c,$b,$t0 76 addl $t1,$e,$e 77 andcm $d,$b,$t1 78 shd $b,$b,2,$b 79 or $t1,$t0,$t0 80 addl $t0,$e,$e 81___ 82$code.=<<___ if ($i>=15); # with forward Xupdate 83 addl $K,$e,$e ; $i 84 shd $a,$a,27,$t1 85 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 86 addl @X[$i%16],$e,$e 87 and $c,$b,$t0 88 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 89 addl $t1,$e,$e 90 andcm $d,$b,$t1 91 shd $b,$b,2,$b 92 or $t1,$t0,$t0 93 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 94 add $t0,$e,$e 95 shd @X[$j%16],@X[$j%16],31,@X[$j%16] 96___ 97} 98 99sub BODY_20_39 { 100my ($i,$a,$b,$c,$d,$e)=@_; 101my $j=$i+1; 102$code.=<<___ if ($i<79); 103 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i 104 addl $K,$e,$e 105 shd $a,$a,27,$t1 106 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 107 addl @X[$i%16],$e,$e 108 xor $b,$c,$t0 109 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 110 addl $t1,$e,$e 111 shd $b,$b,2,$b 112 xor $d,$t0,$t0 113 shd @X[$j%16],@X[$j%16],31,@X[$j%16] 114 addl $t0,$e,$e 115___ 116$code.=<<___ if ($i==79); # with context load 117 ldw 0($ctx),@X[0] ; $i 118 addl $K,$e,$e 119 shd $a,$a,27,$t1 120 ldw 4($ctx),@X[1] 121 addl @X[$i%16],$e,$e 122 xor $b,$c,$t0 123 ldw 8($ctx),@X[2] 124 addl $t1,$e,$e 125 shd $b,$b,2,$b 126 xor $d,$t0,$t0 127 ldw 12($ctx),@X[3] 128 addl $t0,$e,$e 129 ldw 16($ctx),@X[4] 130___ 131} 132 133sub BODY_40_59 { 134my ($i,$a,$b,$c,$d,$e)=@_; 135my $j=$i+1; 136$code.=<<___; 137 shd $a,$a,27,$t1 ; $i 138 addl $K,$e,$e 139 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] 140 xor $d,$c,$t0 141 addl @X[$i%16],$e,$e 142 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] 143 and $b,$t0,$t0 144 addl $t1,$e,$e 145 shd $b,$b,2,$b 146 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] 147 addl $t0,$e,$e 148 and $d,$c,$t1 149 shd @X[$j%16],@X[$j%16],31,@X[$j%16] 150 addl $t1,$e,$e 151___ 152} 153 154$code=<<___; 155 .LEVEL $LEVEL 156 .SPACE \$TEXT\$ 157 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY 158 159 .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR 160sha1_block_data_order 161 .PROC 162 .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16 163 .ENTRY 164 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 165 $PUSHMA %r3,$FRAME(%sp) 166 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 167 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 168 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 169 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 170 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 171 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 172 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 173 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 174 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) 175 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) 176 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) 177 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) 178 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) 179 180 ldw 0($ctx),$A 181 ldw 4($ctx),$B 182 ldw 8($ctx),$C 183 ldw 12($ctx),$D 184 ldw 16($ctx),$E 185 186 extru $inp,31,2,$t0 ; t0=inp&3; 187 sh3addl $t0,%r0,$t0 ; t0*=8; 188 subi 32,$t0,$t0 ; t0=32-t0; 189 mtctl $t0,%cr11 ; %sar=t0; 190 191L\$oop 192 ldi 3,$t0 193 andcm $inp,$t0,$t0 ; 64-bit neutral 194___ 195 for ($i=0;$i<15;$i++) { # load input block 196 $code.="\tldw `4*$i`($t0),@X[$i]\n"; } 197$code.=<<___; 198 cmpb,*= $inp,$t0,L\$aligned 199 ldw 60($t0),@X[15] 200 ldw 64($t0),@X[16] 201___ 202 for ($i=0;$i<16;$i++) { # align input 203 $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; } 204$code.=<<___; 205L\$aligned 206 ldil L'0x5a827000,$K ; K_00_19 207 ldo 0x999($K),$K 208___ 209for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } 210$code.=<<___; 211 ldil L'0x6ed9e000,$K ; K_20_39 212 ldo 0xba1($K),$K 213___ 214 215for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 216$code.=<<___; 217 ldil L'0x8f1bb000,$K ; K_40_59 218 ldo 0xcdc($K),$K 219___ 220 221for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 222$code.=<<___; 223 ldil L'0xca62c000,$K ; K_60_79 224 ldo 0x1d6($K),$K 225___ 226for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 227 228$code.=<<___; 229 addl @X[0],$A,$A 230 addl @X[1],$B,$B 231 addl @X[2],$C,$C 232 addl @X[3],$D,$D 233 addl @X[4],$E,$E 234 stw $A,0($ctx) 235 stw $B,4($ctx) 236 stw $C,8($ctx) 237 stw $D,12($ctx) 238 stw $E,16($ctx) 239 addib,*<> -1,$num,L\$oop 240 ldo 64($inp),$inp 241 242 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 243 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 244 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 245 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 246 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 247 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 248 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 249 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 250 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 251 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 252 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 253 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 254 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 255 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 256 bv (%r2) 257 .EXIT 258 $POPMB -$FRAME(%sp),%r3 259 .PROCEND 260 .STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" 261___ 262 263if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` 264 =~ /GNU assembler/) { 265 $gnuas = 1; 266} 267 268foreach(split("\n",$code)) { 269 s/\`([^\`]*)\`/eval $1/ge; 270 271 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8); 272 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8); 273 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8); 274 s/,\*/,/ if ($SIZE_T==4); 275 s/\bbv\b/bve/ if ($SIZE_T==8); 276 277 print $_,"\n"; 278} 279close STDOUT; 280