xref: /freebsd/crypto/openssl/crypto/sha/asm/sha1-parisc.pl (revision 1f13597d10e771d5546d31839150812bde8e4a56)
1*1f13597dSJung-uk Kim#!/usr/bin/env perl
2*1f13597dSJung-uk Kim
3*1f13597dSJung-uk Kim# ====================================================================
4*1f13597dSJung-uk Kim# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5*1f13597dSJung-uk Kim# project. The module is, however, dual licensed under OpenSSL and
6*1f13597dSJung-uk Kim# CRYPTOGAMS licenses depending on where you obtain it. For further
7*1f13597dSJung-uk Kim# details see http://www.openssl.org/~appro/cryptogams/.
8*1f13597dSJung-uk Kim# ====================================================================
9*1f13597dSJung-uk Kim
10*1f13597dSJung-uk Kim# SHA1 block procedure for PA-RISC.
11*1f13597dSJung-uk Kim
12*1f13597dSJung-uk Kim# June 2009.
13*1f13597dSJung-uk Kim#
14*1f13597dSJung-uk Kim# On PA-7100LC performance is >30% better than gcc 3.2 generated code
15*1f13597dSJung-uk Kim# for aligned input and >50% better for unaligned. Compared to vendor
16*1f13597dSJung-uk Kim# compiler on PA-8600 it's almost 60% faster in 64-bit build and just
17*1f13597dSJung-uk Kim# few percent faster in 32-bit one (this for aligned input, data for
18*1f13597dSJung-uk Kim# unaligned input is not available).
19*1f13597dSJung-uk Kim#
20*1f13597dSJung-uk Kim# Special thanks to polarhome.com for providing HP-UX account.
21*1f13597dSJung-uk Kim
22*1f13597dSJung-uk Kim$flavour = shift;
23*1f13597dSJung-uk Kim$output = shift;
24*1f13597dSJung-uk Kimopen STDOUT,">$output";
25*1f13597dSJung-uk Kim
26*1f13597dSJung-uk Kimif ($flavour =~ /64/) {
27*1f13597dSJung-uk Kim	$LEVEL		="2.0W";
28*1f13597dSJung-uk Kim	$SIZE_T		=8;
29*1f13597dSJung-uk Kim	$FRAME_MARKER	=80;
30*1f13597dSJung-uk Kim	$SAVED_RP	=16;
31*1f13597dSJung-uk Kim	$PUSH		="std";
32*1f13597dSJung-uk Kim	$PUSHMA		="std,ma";
33*1f13597dSJung-uk Kim	$POP		="ldd";
34*1f13597dSJung-uk Kim	$POPMB		="ldd,mb";
35*1f13597dSJung-uk Kim} else {
36*1f13597dSJung-uk Kim	$LEVEL		="1.0";
37*1f13597dSJung-uk Kim	$SIZE_T		=4;
38*1f13597dSJung-uk Kim	$FRAME_MARKER	=48;
39*1f13597dSJung-uk Kim	$SAVED_RP	=20;
40*1f13597dSJung-uk Kim	$PUSH		="stw";
41*1f13597dSJung-uk Kim	$PUSHMA		="stwm";
42*1f13597dSJung-uk Kim	$POP		="ldw";
43*1f13597dSJung-uk Kim	$POPMB		="ldwm";
44*1f13597dSJung-uk Kim}
45*1f13597dSJung-uk Kim
46*1f13597dSJung-uk Kim$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker
47*1f13597dSJung-uk Kim				#                 [+ argument transfer]
48*1f13597dSJung-uk Kim$ctx="%r26";		# arg0
49*1f13597dSJung-uk Kim$inp="%r25";		# arg1
50*1f13597dSJung-uk Kim$num="%r24";		# arg2
51*1f13597dSJung-uk Kim
52*1f13597dSJung-uk Kim$t0="%r28";
53*1f13597dSJung-uk Kim$t1="%r29";
54*1f13597dSJung-uk Kim$K="%r31";
55*1f13597dSJung-uk Kim
56*1f13597dSJung-uk Kim@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
57*1f13597dSJung-uk Kim    "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0);
58*1f13597dSJung-uk Kim
59*1f13597dSJung-uk Kim@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23");
60*1f13597dSJung-uk Kim
61*1f13597dSJung-uk Kimsub BODY_00_19 {
62*1f13597dSJung-uk Kimmy ($i,$a,$b,$c,$d,$e)=@_;
63*1f13597dSJung-uk Kimmy $j=$i+1;
64*1f13597dSJung-uk Kim$code.=<<___ if ($i<15);
65*1f13597dSJung-uk Kim	addl	$K,$e,$e	; $i
66*1f13597dSJung-uk Kim	shd	$a,$a,27,$t1
67*1f13597dSJung-uk Kim	addl	@X[$i],$e,$e
68*1f13597dSJung-uk Kim	and	$c,$b,$t0
69*1f13597dSJung-uk Kim	addl	$t1,$e,$e
70*1f13597dSJung-uk Kim	andcm	$d,$b,$t1
71*1f13597dSJung-uk Kim	shd	$b,$b,2,$b
72*1f13597dSJung-uk Kim	or	$t1,$t0,$t0
73*1f13597dSJung-uk Kim	addl	$t0,$e,$e
74*1f13597dSJung-uk Kim___
75*1f13597dSJung-uk Kim$code.=<<___ if ($i>=15);	# with forward Xupdate
76*1f13597dSJung-uk Kim	addl	$K,$e,$e	; $i
77*1f13597dSJung-uk Kim	shd	$a,$a,27,$t1
78*1f13597dSJung-uk Kim	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
79*1f13597dSJung-uk Kim	addl	@X[$i%16],$e,$e
80*1f13597dSJung-uk Kim	and	$c,$b,$t0
81*1f13597dSJung-uk Kim	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
82*1f13597dSJung-uk Kim	addl	$t1,$e,$e
83*1f13597dSJung-uk Kim	andcm	$d,$b,$t1
84*1f13597dSJung-uk Kim	shd	$b,$b,2,$b
85*1f13597dSJung-uk Kim	or	$t1,$t0,$t0
86*1f13597dSJung-uk Kim	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
87*1f13597dSJung-uk Kim	add	$t0,$e,$e
88*1f13597dSJung-uk Kim	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
89*1f13597dSJung-uk Kim___
90*1f13597dSJung-uk Kim}
91*1f13597dSJung-uk Kim
92*1f13597dSJung-uk Kimsub BODY_20_39 {
93*1f13597dSJung-uk Kimmy ($i,$a,$b,$c,$d,$e)=@_;
94*1f13597dSJung-uk Kimmy $j=$i+1;
95*1f13597dSJung-uk Kim$code.=<<___ if ($i<79);
96*1f13597dSJung-uk Kim	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]	; $i
97*1f13597dSJung-uk Kim	addl	$K,$e,$e
98*1f13597dSJung-uk Kim	shd	$a,$a,27,$t1
99*1f13597dSJung-uk Kim	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
100*1f13597dSJung-uk Kim	addl	@X[$i%16],$e,$e
101*1f13597dSJung-uk Kim	xor	$b,$c,$t0
102*1f13597dSJung-uk Kim	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
103*1f13597dSJung-uk Kim	addl	$t1,$e,$e
104*1f13597dSJung-uk Kim	shd	$b,$b,2,$b
105*1f13597dSJung-uk Kim	xor	$d,$t0,$t0
106*1f13597dSJung-uk Kim	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
107*1f13597dSJung-uk Kim	addl	$t0,$e,$e
108*1f13597dSJung-uk Kim___
109*1f13597dSJung-uk Kim$code.=<<___ if ($i==79);	# with context load
110*1f13597dSJung-uk Kim	ldw	0($ctx),@X[0]	; $i
111*1f13597dSJung-uk Kim	addl	$K,$e,$e
112*1f13597dSJung-uk Kim	shd	$a,$a,27,$t1
113*1f13597dSJung-uk Kim	ldw	4($ctx),@X[1]
114*1f13597dSJung-uk Kim	addl	@X[$i%16],$e,$e
115*1f13597dSJung-uk Kim	xor	$b,$c,$t0
116*1f13597dSJung-uk Kim	ldw	8($ctx),@X[2]
117*1f13597dSJung-uk Kim	addl	$t1,$e,$e
118*1f13597dSJung-uk Kim	shd	$b,$b,2,$b
119*1f13597dSJung-uk Kim	xor	$d,$t0,$t0
120*1f13597dSJung-uk Kim	ldw	12($ctx),@X[3]
121*1f13597dSJung-uk Kim	addl	$t0,$e,$e
122*1f13597dSJung-uk Kim	ldw	16($ctx),@X[4]
123*1f13597dSJung-uk Kim___
124*1f13597dSJung-uk Kim}
125*1f13597dSJung-uk Kim
126*1f13597dSJung-uk Kimsub BODY_40_59 {
127*1f13597dSJung-uk Kimmy ($i,$a,$b,$c,$d,$e)=@_;
128*1f13597dSJung-uk Kimmy $j=$i+1;
129*1f13597dSJung-uk Kim$code.=<<___;
130*1f13597dSJung-uk Kim	shd	$a,$a,27,$t1	; $i
131*1f13597dSJung-uk Kim	addl	$K,$e,$e
132*1f13597dSJung-uk Kim	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
133*1f13597dSJung-uk Kim	xor	$d,$c,$t0
134*1f13597dSJung-uk Kim	addl	@X[$i%16],$e,$e
135*1f13597dSJung-uk Kim	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
136*1f13597dSJung-uk Kim	and	$b,$t0,$t0
137*1f13597dSJung-uk Kim	addl	$t1,$e,$e
138*1f13597dSJung-uk Kim	shd	$b,$b,2,$b
139*1f13597dSJung-uk Kim	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
140*1f13597dSJung-uk Kim	addl	$t0,$e,$e
141*1f13597dSJung-uk Kim	and	$d,$c,$t1
142*1f13597dSJung-uk Kim	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
143*1f13597dSJung-uk Kim	addl	$t1,$e,$e
144*1f13597dSJung-uk Kim___
145*1f13597dSJung-uk Kim}
146*1f13597dSJung-uk Kim
147*1f13597dSJung-uk Kim$code=<<___;
148*1f13597dSJung-uk Kim	.LEVEL	$LEVEL
149*1f13597dSJung-uk Kim	.SPACE	\$TEXT\$
150*1f13597dSJung-uk Kim	.SUBSPA	\$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
151*1f13597dSJung-uk Kim
152*1f13597dSJung-uk Kim	.EXPORT	sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
153*1f13597dSJung-uk Kimsha1_block_data_order
154*1f13597dSJung-uk Kim	.PROC
155*1f13597dSJung-uk Kim	.CALLINFO	FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16
156*1f13597dSJung-uk Kim	.ENTRY
157*1f13597dSJung-uk Kim	$PUSH	%r2,-$SAVED_RP(%sp)	; standard prologue
158*1f13597dSJung-uk Kim	$PUSHMA	%r3,$FRAME(%sp)
159*1f13597dSJung-uk Kim	$PUSH	%r4,`-$FRAME+1*$SIZE_T`(%sp)
160*1f13597dSJung-uk Kim	$PUSH	%r5,`-$FRAME+2*$SIZE_T`(%sp)
161*1f13597dSJung-uk Kim	$PUSH	%r6,`-$FRAME+3*$SIZE_T`(%sp)
162*1f13597dSJung-uk Kim	$PUSH	%r7,`-$FRAME+4*$SIZE_T`(%sp)
163*1f13597dSJung-uk Kim	$PUSH	%r8,`-$FRAME+5*$SIZE_T`(%sp)
164*1f13597dSJung-uk Kim	$PUSH	%r9,`-$FRAME+6*$SIZE_T`(%sp)
165*1f13597dSJung-uk Kim	$PUSH	%r10,`-$FRAME+7*$SIZE_T`(%sp)
166*1f13597dSJung-uk Kim	$PUSH	%r11,`-$FRAME+8*$SIZE_T`(%sp)
167*1f13597dSJung-uk Kim	$PUSH	%r12,`-$FRAME+9*$SIZE_T`(%sp)
168*1f13597dSJung-uk Kim	$PUSH	%r13,`-$FRAME+10*$SIZE_T`(%sp)
169*1f13597dSJung-uk Kim	$PUSH	%r14,`-$FRAME+11*$SIZE_T`(%sp)
170*1f13597dSJung-uk Kim	$PUSH	%r15,`-$FRAME+12*$SIZE_T`(%sp)
171*1f13597dSJung-uk Kim	$PUSH	%r16,`-$FRAME+13*$SIZE_T`(%sp)
172*1f13597dSJung-uk Kim
173*1f13597dSJung-uk Kim	ldw	0($ctx),$A
174*1f13597dSJung-uk Kim	ldw	4($ctx),$B
175*1f13597dSJung-uk Kim	ldw	8($ctx),$C
176*1f13597dSJung-uk Kim	ldw	12($ctx),$D
177*1f13597dSJung-uk Kim	ldw	16($ctx),$E
178*1f13597dSJung-uk Kim
179*1f13597dSJung-uk Kim	extru	$inp,31,2,$t0		; t0=inp&3;
180*1f13597dSJung-uk Kim	sh3addl	$t0,%r0,$t0		; t0*=8;
181*1f13597dSJung-uk Kim	subi	32,$t0,$t0		; t0=32-t0;
182*1f13597dSJung-uk Kim	mtctl	$t0,%cr11		; %sar=t0;
183*1f13597dSJung-uk Kim
184*1f13597dSJung-uk KimL\$oop
185*1f13597dSJung-uk Kim	ldi	3,$t0
186*1f13597dSJung-uk Kim	andcm	$inp,$t0,$t0		; 64-bit neutral
187*1f13597dSJung-uk Kim___
188*1f13597dSJung-uk Kim	for ($i=0;$i<15;$i++) {		# load input block
189*1f13597dSJung-uk Kim	$code.="\tldw	`4*$i`($t0),@X[$i]\n";		}
190*1f13597dSJung-uk Kim$code.=<<___;
191*1f13597dSJung-uk Kim	cmpb,*=	$inp,$t0,L\$aligned
192*1f13597dSJung-uk Kim	ldw	60($t0),@X[15]
193*1f13597dSJung-uk Kim	ldw	64($t0),@X[16]
194*1f13597dSJung-uk Kim___
195*1f13597dSJung-uk Kim	for ($i=0;$i<16;$i++) {		# align input
196*1f13597dSJung-uk Kim	$code.="\tvshd	@X[$i],@X[$i+1],@X[$i]\n";	}
197*1f13597dSJung-uk Kim$code.=<<___;
198*1f13597dSJung-uk KimL\$aligned
199*1f13597dSJung-uk Kim	ldil	L'0x5a827000,$K		; K_00_19
200*1f13597dSJung-uk Kim	ldo	0x999($K),$K
201*1f13597dSJung-uk Kim___
202*1f13597dSJung-uk Kimfor ($i=0;$i<20;$i++)   { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
203*1f13597dSJung-uk Kim$code.=<<___;
204*1f13597dSJung-uk Kim	ldil	L'0x6ed9e000,$K		; K_20_39
205*1f13597dSJung-uk Kim	ldo	0xba1($K),$K
206*1f13597dSJung-uk Kim___
207*1f13597dSJung-uk Kim
208*1f13597dSJung-uk Kimfor (;$i<40;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
209*1f13597dSJung-uk Kim$code.=<<___;
210*1f13597dSJung-uk Kim	ldil	L'0x8f1bb000,$K		; K_40_59
211*1f13597dSJung-uk Kim	ldo	0xcdc($K),$K
212*1f13597dSJung-uk Kim___
213*1f13597dSJung-uk Kim
214*1f13597dSJung-uk Kimfor (;$i<60;$i++)       { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
215*1f13597dSJung-uk Kim$code.=<<___;
216*1f13597dSJung-uk Kim	ldil	L'0xca62c000,$K		; K_60_79
217*1f13597dSJung-uk Kim	ldo	0x1d6($K),$K
218*1f13597dSJung-uk Kim___
219*1f13597dSJung-uk Kimfor (;$i<80;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
220*1f13597dSJung-uk Kim
221*1f13597dSJung-uk Kim$code.=<<___;
222*1f13597dSJung-uk Kim	addl	@X[0],$A,$A
223*1f13597dSJung-uk Kim	addl	@X[1],$B,$B
224*1f13597dSJung-uk Kim	addl	@X[2],$C,$C
225*1f13597dSJung-uk Kim	addl	@X[3],$D,$D
226*1f13597dSJung-uk Kim	addl	@X[4],$E,$E
227*1f13597dSJung-uk Kim	stw	$A,0($ctx)
228*1f13597dSJung-uk Kim	stw	$B,4($ctx)
229*1f13597dSJung-uk Kim	stw	$C,8($ctx)
230*1f13597dSJung-uk Kim	stw	$D,12($ctx)
231*1f13597dSJung-uk Kim	stw	$E,16($ctx)
232*1f13597dSJung-uk Kim	addib,*<> -1,$num,L\$oop
233*1f13597dSJung-uk Kim	ldo	64($inp),$inp
234*1f13597dSJung-uk Kim
235*1f13597dSJung-uk Kim	$POP	`-$FRAME-$SAVED_RP`(%sp),%r2	; standard epilogue
236*1f13597dSJung-uk Kim	$POP	`-$FRAME+1*$SIZE_T`(%sp),%r4
237*1f13597dSJung-uk Kim	$POP	`-$FRAME+2*$SIZE_T`(%sp),%r5
238*1f13597dSJung-uk Kim	$POP	`-$FRAME+3*$SIZE_T`(%sp),%r6
239*1f13597dSJung-uk Kim	$POP	`-$FRAME+4*$SIZE_T`(%sp),%r7
240*1f13597dSJung-uk Kim	$POP	`-$FRAME+5*$SIZE_T`(%sp),%r8
241*1f13597dSJung-uk Kim	$POP	`-$FRAME+6*$SIZE_T`(%sp),%r9
242*1f13597dSJung-uk Kim	$POP	`-$FRAME+7*$SIZE_T`(%sp),%r10
243*1f13597dSJung-uk Kim	$POP	`-$FRAME+8*$SIZE_T`(%sp),%r11
244*1f13597dSJung-uk Kim	$POP	`-$FRAME+9*$SIZE_T`(%sp),%r12
245*1f13597dSJung-uk Kim	$POP	`-$FRAME+10*$SIZE_T`(%sp),%r13
246*1f13597dSJung-uk Kim	$POP	`-$FRAME+11*$SIZE_T`(%sp),%r14
247*1f13597dSJung-uk Kim	$POP	`-$FRAME+12*$SIZE_T`(%sp),%r15
248*1f13597dSJung-uk Kim	$POP	`-$FRAME+13*$SIZE_T`(%sp),%r16
249*1f13597dSJung-uk Kim	bv	(%r2)
250*1f13597dSJung-uk Kim	.EXIT
251*1f13597dSJung-uk Kim	$POPMB	-$FRAME(%sp),%r3
252*1f13597dSJung-uk Kim	.PROCEND
253*1f13597dSJung-uk Kim	.STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
254*1f13597dSJung-uk Kim___
255*1f13597dSJung-uk Kim
256*1f13597dSJung-uk Kim$code =~ s/\`([^\`]*)\`/eval $1/gem;
257*1f13597dSJung-uk Kim$code =~ s/,\*/,/gm if ($SIZE_T==4);
258*1f13597dSJung-uk Kimprint $code;
259*1f13597dSJung-uk Kimclose STDOUT;
260