xref: /freebsd/sys/crypto/openssl/powerpc64le/ppc-mont.S (revision 71625ec9ad2a9bc8c09784fbd23b759830e0ee5f)
1*3a608692SPiotr Kubaj/* Do not modify. This file is auto-generated from ppc-mont.pl. */
2*3a608692SPiotr Kubaj.machine	"any"
3*3a608692SPiotr Kubaj.abiversion	2
4*3a608692SPiotr Kubaj.text
5*3a608692SPiotr Kubaj
6*3a608692SPiotr Kubaj.globl	bn_mul_mont_int
7*3a608692SPiotr Kubaj.type	bn_mul_mont_int,@function
8*3a608692SPiotr Kubaj.align	5
9*3a608692SPiotr Kubajbn_mul_mont_int:
10*3a608692SPiotr Kubaj.localentry	bn_mul_mont_int,0
11*3a608692SPiotr Kubaj
12*3a608692SPiotr Kubaj	mr	9,3
13*3a608692SPiotr Kubaj	li	3,0
14*3a608692SPiotr Kubaj	slwi	8,8,3
15*3a608692SPiotr Kubaj	li	12,-4096
16*3a608692SPiotr Kubaj	addi	3,8,352
17*3a608692SPiotr Kubaj	subf	3,3,1
18*3a608692SPiotr Kubaj	and	3,3,12
19*3a608692SPiotr Kubaj	subf	3,1,3
20*3a608692SPiotr Kubaj	mr	12,1
21*3a608692SPiotr Kubaj	srwi	8,8,3
22*3a608692SPiotr Kubaj	stdux	1,1,3
23*3a608692SPiotr Kubaj
24*3a608692SPiotr Kubaj	std	20,-96(12)
25*3a608692SPiotr Kubaj	std	21,-88(12)
26*3a608692SPiotr Kubaj	std	22,-80(12)
27*3a608692SPiotr Kubaj	std	23,-72(12)
28*3a608692SPiotr Kubaj	std	24,-64(12)
29*3a608692SPiotr Kubaj	std	25,-56(12)
30*3a608692SPiotr Kubaj	std	26,-48(12)
31*3a608692SPiotr Kubaj	std	27,-40(12)
32*3a608692SPiotr Kubaj	std	28,-32(12)
33*3a608692SPiotr Kubaj	std	29,-24(12)
34*3a608692SPiotr Kubaj	std	30,-16(12)
35*3a608692SPiotr Kubaj	std	31,-8(12)
36*3a608692SPiotr Kubaj
37*3a608692SPiotr Kubaj	ld	7,0(7)
38*3a608692SPiotr Kubaj	addi	8,8,-2
39*3a608692SPiotr Kubaj
40*3a608692SPiotr Kubaj	ld	23,0(5)
41*3a608692SPiotr Kubaj	ld	10,0(4)
42*3a608692SPiotr Kubaj	addi	22,1,64
43*3a608692SPiotr Kubaj	mulld	25,10,23
44*3a608692SPiotr Kubaj	mulhdu	26,10,23
45*3a608692SPiotr Kubaj
46*3a608692SPiotr Kubaj	ld	10,8(4)
47*3a608692SPiotr Kubaj	ld	11,0(6)
48*3a608692SPiotr Kubaj
49*3a608692SPiotr Kubaj	mulld	24,25,7
50*3a608692SPiotr Kubaj
51*3a608692SPiotr Kubaj	mulld	29,10,23
52*3a608692SPiotr Kubaj	mulhdu	30,10,23
53*3a608692SPiotr Kubaj
54*3a608692SPiotr Kubaj	mulld	27,11,24
55*3a608692SPiotr Kubaj	mulhdu	28,11,24
56*3a608692SPiotr Kubaj	ld	11,8(6)
57*3a608692SPiotr Kubaj	addc	27,27,25
58*3a608692SPiotr Kubaj	addze	28,28
59*3a608692SPiotr Kubaj
60*3a608692SPiotr Kubaj	mulld	31,11,24
61*3a608692SPiotr Kubaj	mulhdu	0,11,24
62*3a608692SPiotr Kubaj
63*3a608692SPiotr Kubaj	mtctr	8
64*3a608692SPiotr Kubaj	li	21,16
65*3a608692SPiotr Kubaj.align	4
66*3a608692SPiotr Kubaj.L1st:
67*3a608692SPiotr Kubaj	ldx	10,4,21
68*3a608692SPiotr Kubaj	addc	25,29,26
69*3a608692SPiotr Kubaj	ldx	11,6,21
70*3a608692SPiotr Kubaj	addze	26,30
71*3a608692SPiotr Kubaj	mulld	29,10,23
72*3a608692SPiotr Kubaj	addc	27,31,28
73*3a608692SPiotr Kubaj	mulhdu	30,10,23
74*3a608692SPiotr Kubaj	addze	28,0
75*3a608692SPiotr Kubaj	mulld	31,11,24
76*3a608692SPiotr Kubaj	addc	27,27,25
77*3a608692SPiotr Kubaj	mulhdu	0,11,24
78*3a608692SPiotr Kubaj	addze	28,28
79*3a608692SPiotr Kubaj	std	27,0(22)
80*3a608692SPiotr Kubaj
81*3a608692SPiotr Kubaj	addi	21,21,8
82*3a608692SPiotr Kubaj	addi	22,22,8
83*3a608692SPiotr Kubaj	bdnz	.L1st
84*3a608692SPiotr Kubaj
85*3a608692SPiotr Kubaj	addc	25,29,26
86*3a608692SPiotr Kubaj	addze	26,30
87*3a608692SPiotr Kubaj
88*3a608692SPiotr Kubaj	addc	27,31,28
89*3a608692SPiotr Kubaj	addze	28,0
90*3a608692SPiotr Kubaj	addc	27,27,25
91*3a608692SPiotr Kubaj	addze	28,28
92*3a608692SPiotr Kubaj	std	27,0(22)
93*3a608692SPiotr Kubaj
94*3a608692SPiotr Kubaj	li	3,0
95*3a608692SPiotr Kubaj	addc	28,28,26
96*3a608692SPiotr Kubaj	addze	3,3
97*3a608692SPiotr Kubaj	std	28,8(22)
98*3a608692SPiotr Kubaj
99*3a608692SPiotr Kubaj	li	20,8
100*3a608692SPiotr Kubaj.align	4
101*3a608692SPiotr Kubaj.Louter:
102*3a608692SPiotr Kubaj	ldx	23,5,20
103*3a608692SPiotr Kubaj	ld	10,0(4)
104*3a608692SPiotr Kubaj	addi	22,1,64
105*3a608692SPiotr Kubaj	ld	12,64(1)
106*3a608692SPiotr Kubaj	mulld	25,10,23
107*3a608692SPiotr Kubaj	mulhdu	26,10,23
108*3a608692SPiotr Kubaj	ld	10,8(4)
109*3a608692SPiotr Kubaj	ld	11,0(6)
110*3a608692SPiotr Kubaj	addc	25,25,12
111*3a608692SPiotr Kubaj	mulld	29,10,23
112*3a608692SPiotr Kubaj	addze	26,26
113*3a608692SPiotr Kubaj	mulld	24,25,7
114*3a608692SPiotr Kubaj	mulhdu	30,10,23
115*3a608692SPiotr Kubaj	mulld	27,11,24
116*3a608692SPiotr Kubaj	mulhdu	28,11,24
117*3a608692SPiotr Kubaj	ld	11,8(6)
118*3a608692SPiotr Kubaj	addc	27,27,25
119*3a608692SPiotr Kubaj	mulld	31,11,24
120*3a608692SPiotr Kubaj	addze	28,28
121*3a608692SPiotr Kubaj	mulhdu	0,11,24
122*3a608692SPiotr Kubaj
123*3a608692SPiotr Kubaj	mtctr	8
124*3a608692SPiotr Kubaj	li	21,16
125*3a608692SPiotr Kubaj.align	4
126*3a608692SPiotr Kubaj.Linner:
127*3a608692SPiotr Kubaj	ldx	10,4,21
128*3a608692SPiotr Kubaj	addc	25,29,26
129*3a608692SPiotr Kubaj	ld	12,8(22)
130*3a608692SPiotr Kubaj	addze	26,30
131*3a608692SPiotr Kubaj	ldx	11,6,21
132*3a608692SPiotr Kubaj	addc	27,31,28
133*3a608692SPiotr Kubaj	mulld	29,10,23
134*3a608692SPiotr Kubaj	addze	28,0
135*3a608692SPiotr Kubaj	mulhdu	30,10,23
136*3a608692SPiotr Kubaj	addc	25,25,12
137*3a608692SPiotr Kubaj	mulld	31,11,24
138*3a608692SPiotr Kubaj	addze	26,26
139*3a608692SPiotr Kubaj	mulhdu	0,11,24
140*3a608692SPiotr Kubaj	addc	27,27,25
141*3a608692SPiotr Kubaj	addi	21,21,8
142*3a608692SPiotr Kubaj	addze	28,28
143*3a608692SPiotr Kubaj	std	27,0(22)
144*3a608692SPiotr Kubaj	addi	22,22,8
145*3a608692SPiotr Kubaj	bdnz	.Linner
146*3a608692SPiotr Kubaj
147*3a608692SPiotr Kubaj	ld	12,8(22)
148*3a608692SPiotr Kubaj	addc	25,29,26
149*3a608692SPiotr Kubaj	addze	26,30
150*3a608692SPiotr Kubaj	addc	25,25,12
151*3a608692SPiotr Kubaj	addze	26,26
152*3a608692SPiotr Kubaj
153*3a608692SPiotr Kubaj	addc	27,31,28
154*3a608692SPiotr Kubaj	addze	28,0
155*3a608692SPiotr Kubaj	addc	27,27,25
156*3a608692SPiotr Kubaj	addze	28,28
157*3a608692SPiotr Kubaj	std	27,0(22)
158*3a608692SPiotr Kubaj
159*3a608692SPiotr Kubaj	addic	3,3,-1
160*3a608692SPiotr Kubaj	li	3,0
161*3a608692SPiotr Kubaj	adde	28,28,26
162*3a608692SPiotr Kubaj	addze	3,3
163*3a608692SPiotr Kubaj	std	28,8(22)
164*3a608692SPiotr Kubaj
165*3a608692SPiotr Kubaj	slwi	12,8,3
166*3a608692SPiotr Kubaj	cmpld	20,12
167*3a608692SPiotr Kubaj	addi	20,20,8
168*3a608692SPiotr Kubaj	ble	.Louter
169*3a608692SPiotr Kubaj
170*3a608692SPiotr Kubaj	addi	8,8,2
171*3a608692SPiotr Kubaj	subfc	21,21,21
172*3a608692SPiotr Kubaj	addi	22,1,64
173*3a608692SPiotr Kubaj	mtctr	8
174*3a608692SPiotr Kubaj
175*3a608692SPiotr Kubaj.align	4
176*3a608692SPiotr Kubaj.Lsub:	ldx	12,22,21
177*3a608692SPiotr Kubaj	ldx	11,6,21
178*3a608692SPiotr Kubaj	subfe	10,11,12
179*3a608692SPiotr Kubaj	stdx	10,9,21
180*3a608692SPiotr Kubaj	addi	21,21,8
181*3a608692SPiotr Kubaj	bdnz	.Lsub
182*3a608692SPiotr Kubaj
183*3a608692SPiotr Kubaj	li	21,0
184*3a608692SPiotr Kubaj	mtctr	8
185*3a608692SPiotr Kubaj	subfe	3,21,3
186*3a608692SPiotr Kubaj
187*3a608692SPiotr Kubaj.align	4
188*3a608692SPiotr Kubaj.Lcopy:
189*3a608692SPiotr Kubaj	ldx	12,22,21
190*3a608692SPiotr Kubaj	ldx	10,9,21
191*3a608692SPiotr Kubaj	and	12,12,3
192*3a608692SPiotr Kubaj	andc	10,10,3
193*3a608692SPiotr Kubaj	stdx	21,22,21
194*3a608692SPiotr Kubaj	or	10,10,12
195*3a608692SPiotr Kubaj	stdx	10,9,21
196*3a608692SPiotr Kubaj	addi	21,21,8
197*3a608692SPiotr Kubaj	bdnz	.Lcopy
198*3a608692SPiotr Kubaj
199*3a608692SPiotr Kubaj	ld	12,0(1)
200*3a608692SPiotr Kubaj	li	3,1
201*3a608692SPiotr Kubaj	ld	20,-96(12)
202*3a608692SPiotr Kubaj	ld	21,-88(12)
203*3a608692SPiotr Kubaj	ld	22,-80(12)
204*3a608692SPiotr Kubaj	ld	23,-72(12)
205*3a608692SPiotr Kubaj	ld	24,-64(12)
206*3a608692SPiotr Kubaj	ld	25,-56(12)
207*3a608692SPiotr Kubaj	ld	26,-48(12)
208*3a608692SPiotr Kubaj	ld	27,-40(12)
209*3a608692SPiotr Kubaj	ld	28,-32(12)
210*3a608692SPiotr Kubaj	ld	29,-24(12)
211*3a608692SPiotr Kubaj	ld	30,-16(12)
212*3a608692SPiotr Kubaj	ld	31,-8(12)
213*3a608692SPiotr Kubaj	mr	1,12
214*3a608692SPiotr Kubaj	blr
215*3a608692SPiotr Kubaj.long	0
216*3a608692SPiotr Kubaj.byte	0,12,4,0,0x80,12,6,0
217*3a608692SPiotr Kubaj.long	0
218*3a608692SPiotr Kubaj.size	bn_mul_mont_int,.-bn_mul_mont_int
219*3a608692SPiotr Kubaj.globl	bn_mul4x_mont_int
220*3a608692SPiotr Kubaj.type	bn_mul4x_mont_int,@function
221*3a608692SPiotr Kubaj.align	5
222*3a608692SPiotr Kubajbn_mul4x_mont_int:
223*3a608692SPiotr Kubaj.localentry	bn_mul4x_mont_int,0
224*3a608692SPiotr Kubaj
225*3a608692SPiotr Kubaj	andi.	0,8,7
226*3a608692SPiotr Kubaj	bne	.Lmul4x_do
227*3a608692SPiotr Kubaj	cmpld	4,5
228*3a608692SPiotr Kubaj	bne	.Lmul4x_do
229*3a608692SPiotr Kubaj	b	.Lsqr8x_do
230*3a608692SPiotr Kubaj.Lmul4x_do:
231*3a608692SPiotr Kubaj	slwi	8,8,3
232*3a608692SPiotr Kubaj	mr	9,1
233*3a608692SPiotr Kubaj	li	10,-32*8
234*3a608692SPiotr Kubaj	sub	10,10,8
235*3a608692SPiotr Kubaj	stdux	1,1,10
236*3a608692SPiotr Kubaj
237*3a608692SPiotr Kubaj	std	14,-8*18(9)
238*3a608692SPiotr Kubaj	std	15,-8*17(9)
239*3a608692SPiotr Kubaj	std	16,-8*16(9)
240*3a608692SPiotr Kubaj	std	17,-8*15(9)
241*3a608692SPiotr Kubaj	std	18,-8*14(9)
242*3a608692SPiotr Kubaj	std	19,-8*13(9)
243*3a608692SPiotr Kubaj	std	20,-8*12(9)
244*3a608692SPiotr Kubaj	std	21,-8*11(9)
245*3a608692SPiotr Kubaj	std	22,-8*10(9)
246*3a608692SPiotr Kubaj	std	23,-8*9(9)
247*3a608692SPiotr Kubaj	std	24,-8*8(9)
248*3a608692SPiotr Kubaj	std	25,-8*7(9)
249*3a608692SPiotr Kubaj	std	26,-8*6(9)
250*3a608692SPiotr Kubaj	std	27,-8*5(9)
251*3a608692SPiotr Kubaj	std	28,-8*4(9)
252*3a608692SPiotr Kubaj	std	29,-8*3(9)
253*3a608692SPiotr Kubaj	std	30,-8*2(9)
254*3a608692SPiotr Kubaj	std	31,-8*1(9)
255*3a608692SPiotr Kubaj
256*3a608692SPiotr Kubaj	subi	4,4,8
257*3a608692SPiotr Kubaj	subi	6,6,8
258*3a608692SPiotr Kubaj	subi	3,3,8
259*3a608692SPiotr Kubaj	ld	7,0(7)
260*3a608692SPiotr Kubaj
261*3a608692SPiotr Kubaj	add	14,5,8
262*3a608692SPiotr Kubaj	add	30,4,8
263*3a608692SPiotr Kubaj	subi	14,14,8*4
264*3a608692SPiotr Kubaj
265*3a608692SPiotr Kubaj	ld	27,8*0(5)
266*3a608692SPiotr Kubaj	li	22,0
267*3a608692SPiotr Kubaj	ld	9,8*1(4)
268*3a608692SPiotr Kubaj	li	23,0
269*3a608692SPiotr Kubaj	ld	10,8*2(4)
270*3a608692SPiotr Kubaj	li	24,0
271*3a608692SPiotr Kubaj	ld	11,8*3(4)
272*3a608692SPiotr Kubaj	li	25,0
273*3a608692SPiotr Kubaj	ldu	12,8*4(4)
274*3a608692SPiotr Kubaj	ld	18,8*1(6)
275*3a608692SPiotr Kubaj	ld	19,8*2(6)
276*3a608692SPiotr Kubaj	ld	20,8*3(6)
277*3a608692SPiotr Kubaj	ldu	21,8*4(6)
278*3a608692SPiotr Kubaj
279*3a608692SPiotr Kubaj	std	3,8*6(1)
280*3a608692SPiotr Kubaj	std	14,8*7(1)
281*3a608692SPiotr Kubaj	li	3,0
282*3a608692SPiotr Kubaj	addic	29,1,8*7
283*3a608692SPiotr Kubaj	li	31,0
284*3a608692SPiotr Kubaj	li	0,0
285*3a608692SPiotr Kubaj	b	.Loop_mul4x_1st_reduction
286*3a608692SPiotr Kubaj
287*3a608692SPiotr Kubaj.align	5
288*3a608692SPiotr Kubaj.Loop_mul4x_1st_reduction:
289*3a608692SPiotr Kubaj	mulld	14,9,27
290*3a608692SPiotr Kubaj	addze	3,3
291*3a608692SPiotr Kubaj	mulld	15,10,27
292*3a608692SPiotr Kubaj	addi	31,31,8
293*3a608692SPiotr Kubaj	mulld	16,11,27
294*3a608692SPiotr Kubaj	andi.	31,31,8*4-1
295*3a608692SPiotr Kubaj	mulld	17,12,27
296*3a608692SPiotr Kubaj	addc	22,22,14
297*3a608692SPiotr Kubaj	mulhdu	14,9,27
298*3a608692SPiotr Kubaj	adde	23,23,15
299*3a608692SPiotr Kubaj	mulhdu	15,10,27
300*3a608692SPiotr Kubaj	adde	24,24,16
301*3a608692SPiotr Kubaj	mulld	28,22,7
302*3a608692SPiotr Kubaj	adde	25,25,17
303*3a608692SPiotr Kubaj	mulhdu	16,11,27
304*3a608692SPiotr Kubaj	addze	26,0
305*3a608692SPiotr Kubaj	mulhdu	17,12,27
306*3a608692SPiotr Kubaj	ldx	27,5,31
307*3a608692SPiotr Kubaj	addc	23,23,14
308*3a608692SPiotr Kubaj
309*3a608692SPiotr Kubaj	stdu	28,8(29)
310*3a608692SPiotr Kubaj	adde	24,24,15
311*3a608692SPiotr Kubaj	mulld	15,19,28
312*3a608692SPiotr Kubaj	adde	25,25,16
313*3a608692SPiotr Kubaj	mulld	16,20,28
314*3a608692SPiotr Kubaj	adde	26,26,17
315*3a608692SPiotr Kubaj	mulld	17,21,28
316*3a608692SPiotr Kubaj
317*3a608692SPiotr Kubaj
318*3a608692SPiotr Kubaj
319*3a608692SPiotr Kubaj
320*3a608692SPiotr Kubaj
321*3a608692SPiotr Kubaj
322*3a608692SPiotr Kubaj
323*3a608692SPiotr Kubaj
324*3a608692SPiotr Kubaj
325*3a608692SPiotr Kubaj
326*3a608692SPiotr Kubaj	addic	22,22,-1
327*3a608692SPiotr Kubaj	mulhdu	14,18,28
328*3a608692SPiotr Kubaj	adde	22,23,15
329*3a608692SPiotr Kubaj	mulhdu	15,19,28
330*3a608692SPiotr Kubaj	adde	23,24,16
331*3a608692SPiotr Kubaj	mulhdu	16,20,28
332*3a608692SPiotr Kubaj	adde	24,25,17
333*3a608692SPiotr Kubaj	mulhdu	17,21,28
334*3a608692SPiotr Kubaj	adde	25,26,3
335*3a608692SPiotr Kubaj	addze	3,0
336*3a608692SPiotr Kubaj	addc	22,22,14
337*3a608692SPiotr Kubaj	adde	23,23,15
338*3a608692SPiotr Kubaj	adde	24,24,16
339*3a608692SPiotr Kubaj	adde	25,25,17
340*3a608692SPiotr Kubaj
341*3a608692SPiotr Kubaj	bne	.Loop_mul4x_1st_reduction
342*3a608692SPiotr Kubaj
343*3a608692SPiotr Kubaj	cmpld	30,4
344*3a608692SPiotr Kubaj	beq	.Lmul4x4_post_condition
345*3a608692SPiotr Kubaj
346*3a608692SPiotr Kubaj	ld	9,8*1(4)
347*3a608692SPiotr Kubaj	ld	10,8*2(4)
348*3a608692SPiotr Kubaj	ld	11,8*3(4)
349*3a608692SPiotr Kubaj	ldu	12,8*4(4)
350*3a608692SPiotr Kubaj	ld	28,8*8(1)
351*3a608692SPiotr Kubaj	ld	18,8*1(6)
352*3a608692SPiotr Kubaj	ld	19,8*2(6)
353*3a608692SPiotr Kubaj	ld	20,8*3(6)
354*3a608692SPiotr Kubaj	ldu	21,8*4(6)
355*3a608692SPiotr Kubaj	b	.Loop_mul4x_1st_tail
356*3a608692SPiotr Kubaj
357*3a608692SPiotr Kubaj.align	5
358*3a608692SPiotr Kubaj.Loop_mul4x_1st_tail:
359*3a608692SPiotr Kubaj	mulld	14,9,27
360*3a608692SPiotr Kubaj	addze	3,3
361*3a608692SPiotr Kubaj	mulld	15,10,27
362*3a608692SPiotr Kubaj	addi	31,31,8
363*3a608692SPiotr Kubaj	mulld	16,11,27
364*3a608692SPiotr Kubaj	andi.	31,31,8*4-1
365*3a608692SPiotr Kubaj	mulld	17,12,27
366*3a608692SPiotr Kubaj	addc	22,22,14
367*3a608692SPiotr Kubaj	mulhdu	14,9,27
368*3a608692SPiotr Kubaj	adde	23,23,15
369*3a608692SPiotr Kubaj	mulhdu	15,10,27
370*3a608692SPiotr Kubaj	adde	24,24,16
371*3a608692SPiotr Kubaj	mulhdu	16,11,27
372*3a608692SPiotr Kubaj	adde	25,25,17
373*3a608692SPiotr Kubaj	mulhdu	17,12,27
374*3a608692SPiotr Kubaj	addze	26,0
375*3a608692SPiotr Kubaj	ldx	27,5,31
376*3a608692SPiotr Kubaj	addc	23,23,14
377*3a608692SPiotr Kubaj	mulld	14,18,28
378*3a608692SPiotr Kubaj	adde	24,24,15
379*3a608692SPiotr Kubaj	mulld	15,19,28
380*3a608692SPiotr Kubaj	adde	25,25,16
381*3a608692SPiotr Kubaj	mulld	16,20,28
382*3a608692SPiotr Kubaj	adde	26,26,17
383*3a608692SPiotr Kubaj	mulld	17,21,28
384*3a608692SPiotr Kubaj	addc	22,22,14
385*3a608692SPiotr Kubaj	mulhdu	14,18,28
386*3a608692SPiotr Kubaj	adde	23,23,15
387*3a608692SPiotr Kubaj	mulhdu	15,19,28
388*3a608692SPiotr Kubaj	adde	24,24,16
389*3a608692SPiotr Kubaj	mulhdu	16,20,28
390*3a608692SPiotr Kubaj	adde	25,25,17
391*3a608692SPiotr Kubaj	adde	26,26,3
392*3a608692SPiotr Kubaj	mulhdu	17,21,28
393*3a608692SPiotr Kubaj	addze	3,0
394*3a608692SPiotr Kubaj	addi	28,1,8*8
395*3a608692SPiotr Kubaj	ldx	28,28,31
396*3a608692SPiotr Kubaj	stdu	22,8(29)
397*3a608692SPiotr Kubaj	addc	22,23,14
398*3a608692SPiotr Kubaj	adde	23,24,15
399*3a608692SPiotr Kubaj	adde	24,25,16
400*3a608692SPiotr Kubaj	adde	25,26,17
401*3a608692SPiotr Kubaj
402*3a608692SPiotr Kubaj	bne	.Loop_mul4x_1st_tail
403*3a608692SPiotr Kubaj
404*3a608692SPiotr Kubaj	sub	15,30,8
405*3a608692SPiotr Kubaj	cmpld	30,4
406*3a608692SPiotr Kubaj	beq	.Lmul4x_proceed
407*3a608692SPiotr Kubaj
408*3a608692SPiotr Kubaj	ld	9,8*1(4)
409*3a608692SPiotr Kubaj	ld	10,8*2(4)
410*3a608692SPiotr Kubaj	ld	11,8*3(4)
411*3a608692SPiotr Kubaj	ldu	12,8*4(4)
412*3a608692SPiotr Kubaj	ld	18,8*1(6)
413*3a608692SPiotr Kubaj	ld	19,8*2(6)
414*3a608692SPiotr Kubaj	ld	20,8*3(6)
415*3a608692SPiotr Kubaj	ldu	21,8*4(6)
416*3a608692SPiotr Kubaj	b	.Loop_mul4x_1st_tail
417*3a608692SPiotr Kubaj
418*3a608692SPiotr Kubaj.align	5
419*3a608692SPiotr Kubaj.Lmul4x_proceed:
420*3a608692SPiotr Kubaj	ldu	27,8*4(5)
421*3a608692SPiotr Kubaj	addze	3,3
422*3a608692SPiotr Kubaj	ld	9,8*1(15)
423*3a608692SPiotr Kubaj	ld	10,8*2(15)
424*3a608692SPiotr Kubaj	ld	11,8*3(15)
425*3a608692SPiotr Kubaj	ld	12,8*4(15)
426*3a608692SPiotr Kubaj	addi	4,15,8*4
427*3a608692SPiotr Kubaj	sub	6,6,8
428*3a608692SPiotr Kubaj
429*3a608692SPiotr Kubaj	std	22,8*1(29)
430*3a608692SPiotr Kubaj	std	23,8*2(29)
431*3a608692SPiotr Kubaj	std	24,8*3(29)
432*3a608692SPiotr Kubaj	std	25,8*4(29)
433*3a608692SPiotr Kubaj	std	3,8*5(29)
434*3a608692SPiotr Kubaj	ld	22,8*12(1)
435*3a608692SPiotr Kubaj	ld	23,8*13(1)
436*3a608692SPiotr Kubaj	ld	24,8*14(1)
437*3a608692SPiotr Kubaj	ld	25,8*15(1)
438*3a608692SPiotr Kubaj
439*3a608692SPiotr Kubaj	ld	18,8*1(6)
440*3a608692SPiotr Kubaj	ld	19,8*2(6)
441*3a608692SPiotr Kubaj	ld	20,8*3(6)
442*3a608692SPiotr Kubaj	ldu	21,8*4(6)
443*3a608692SPiotr Kubaj	addic	29,1,8*7
444*3a608692SPiotr Kubaj	li	3,0
445*3a608692SPiotr Kubaj	b	.Loop_mul4x_reduction
446*3a608692SPiotr Kubaj
447*3a608692SPiotr Kubaj.align	5
448*3a608692SPiotr Kubaj.Loop_mul4x_reduction:
449*3a608692SPiotr Kubaj	mulld	14,9,27
450*3a608692SPiotr Kubaj	addze	3,3
451*3a608692SPiotr Kubaj	mulld	15,10,27
452*3a608692SPiotr Kubaj	addi	31,31,8
453*3a608692SPiotr Kubaj	mulld	16,11,27
454*3a608692SPiotr Kubaj	andi.	31,31,8*4-1
455*3a608692SPiotr Kubaj	mulld	17,12,27
456*3a608692SPiotr Kubaj	addc	22,22,14
457*3a608692SPiotr Kubaj	mulhdu	14,9,27
458*3a608692SPiotr Kubaj	adde	23,23,15
459*3a608692SPiotr Kubaj	mulhdu	15,10,27
460*3a608692SPiotr Kubaj	adde	24,24,16
461*3a608692SPiotr Kubaj	mulld	28,22,7
462*3a608692SPiotr Kubaj	adde	25,25,17
463*3a608692SPiotr Kubaj	mulhdu	16,11,27
464*3a608692SPiotr Kubaj	addze	26,0
465*3a608692SPiotr Kubaj	mulhdu	17,12,27
466*3a608692SPiotr Kubaj	ldx	27,5,31
467*3a608692SPiotr Kubaj	addc	23,23,14
468*3a608692SPiotr Kubaj
469*3a608692SPiotr Kubaj	stdu	28,8(29)
470*3a608692SPiotr Kubaj	adde	24,24,15
471*3a608692SPiotr Kubaj	mulld	15,19,28
472*3a608692SPiotr Kubaj	adde	25,25,16
473*3a608692SPiotr Kubaj	mulld	16,20,28
474*3a608692SPiotr Kubaj	adde	26,26,17
475*3a608692SPiotr Kubaj	mulld	17,21,28
476*3a608692SPiotr Kubaj
477*3a608692SPiotr Kubaj	addic	22,22,-1
478*3a608692SPiotr Kubaj	mulhdu	14,18,28
479*3a608692SPiotr Kubaj	adde	22,23,15
480*3a608692SPiotr Kubaj	mulhdu	15,19,28
481*3a608692SPiotr Kubaj	adde	23,24,16
482*3a608692SPiotr Kubaj	mulhdu	16,20,28
483*3a608692SPiotr Kubaj	adde	24,25,17
484*3a608692SPiotr Kubaj	mulhdu	17,21,28
485*3a608692SPiotr Kubaj	adde	25,26,3
486*3a608692SPiotr Kubaj	addze	3,0
487*3a608692SPiotr Kubaj	addc	22,22,14
488*3a608692SPiotr Kubaj	adde	23,23,15
489*3a608692SPiotr Kubaj	adde	24,24,16
490*3a608692SPiotr Kubaj	adde	25,25,17
491*3a608692SPiotr Kubaj
492*3a608692SPiotr Kubaj	bne	.Loop_mul4x_reduction
493*3a608692SPiotr Kubaj
494*3a608692SPiotr Kubaj	ld	14,8*5(29)
495*3a608692SPiotr Kubaj	addze	3,3
496*3a608692SPiotr Kubaj	ld	15,8*6(29)
497*3a608692SPiotr Kubaj	ld	16,8*7(29)
498*3a608692SPiotr Kubaj	ld	17,8*8(29)
499*3a608692SPiotr Kubaj	ld	9,8*1(4)
500*3a608692SPiotr Kubaj	ld	10,8*2(4)
501*3a608692SPiotr Kubaj	ld	11,8*3(4)
502*3a608692SPiotr Kubaj	ldu	12,8*4(4)
503*3a608692SPiotr Kubaj	addc	22,22,14
504*3a608692SPiotr Kubaj	adde	23,23,15
505*3a608692SPiotr Kubaj	adde	24,24,16
506*3a608692SPiotr Kubaj	adde	25,25,17
507*3a608692SPiotr Kubaj
508*3a608692SPiotr Kubaj
509*3a608692SPiotr Kubaj	ld	28,8*8(1)
510*3a608692SPiotr Kubaj	ld	18,8*1(6)
511*3a608692SPiotr Kubaj	ld	19,8*2(6)
512*3a608692SPiotr Kubaj	ld	20,8*3(6)
513*3a608692SPiotr Kubaj	ldu	21,8*4(6)
514*3a608692SPiotr Kubaj	b	.Loop_mul4x_tail
515*3a608692SPiotr Kubaj
516*3a608692SPiotr Kubaj.align	5
517*3a608692SPiotr Kubaj.Loop_mul4x_tail:
518*3a608692SPiotr Kubaj	mulld	14,9,27
519*3a608692SPiotr Kubaj	addze	3,3
520*3a608692SPiotr Kubaj	mulld	15,10,27
521*3a608692SPiotr Kubaj	addi	31,31,8
522*3a608692SPiotr Kubaj	mulld	16,11,27
523*3a608692SPiotr Kubaj	andi.	31,31,8*4-1
524*3a608692SPiotr Kubaj	mulld	17,12,27
525*3a608692SPiotr Kubaj	addc	22,22,14
526*3a608692SPiotr Kubaj	mulhdu	14,9,27
527*3a608692SPiotr Kubaj	adde	23,23,15
528*3a608692SPiotr Kubaj	mulhdu	15,10,27
529*3a608692SPiotr Kubaj	adde	24,24,16
530*3a608692SPiotr Kubaj	mulhdu	16,11,27
531*3a608692SPiotr Kubaj	adde	25,25,17
532*3a608692SPiotr Kubaj	mulhdu	17,12,27
533*3a608692SPiotr Kubaj	addze	26,0
534*3a608692SPiotr Kubaj	ldx	27,5,31
535*3a608692SPiotr Kubaj	addc	23,23,14
536*3a608692SPiotr Kubaj	mulld	14,18,28
537*3a608692SPiotr Kubaj	adde	24,24,15
538*3a608692SPiotr Kubaj	mulld	15,19,28
539*3a608692SPiotr Kubaj	adde	25,25,16
540*3a608692SPiotr Kubaj	mulld	16,20,28
541*3a608692SPiotr Kubaj	adde	26,26,17
542*3a608692SPiotr Kubaj	mulld	17,21,28
543*3a608692SPiotr Kubaj	addc	22,22,14
544*3a608692SPiotr Kubaj	mulhdu	14,18,28
545*3a608692SPiotr Kubaj	adde	23,23,15
546*3a608692SPiotr Kubaj	mulhdu	15,19,28
547*3a608692SPiotr Kubaj	adde	24,24,16
548*3a608692SPiotr Kubaj	mulhdu	16,20,28
549*3a608692SPiotr Kubaj	adde	25,25,17
550*3a608692SPiotr Kubaj	mulhdu	17,21,28
551*3a608692SPiotr Kubaj	adde	26,26,3
552*3a608692SPiotr Kubaj	addi	28,1,8*8
553*3a608692SPiotr Kubaj	ldx	28,28,31
554*3a608692SPiotr Kubaj	addze	3,0
555*3a608692SPiotr Kubaj	stdu	22,8(29)
556*3a608692SPiotr Kubaj	addc	22,23,14
557*3a608692SPiotr Kubaj	adde	23,24,15
558*3a608692SPiotr Kubaj	adde	24,25,16
559*3a608692SPiotr Kubaj	adde	25,26,17
560*3a608692SPiotr Kubaj
561*3a608692SPiotr Kubaj	bne	.Loop_mul4x_tail
562*3a608692SPiotr Kubaj
563*3a608692SPiotr Kubaj	ld	14,8*5(29)
564*3a608692SPiotr Kubaj	sub	15,6,8
565*3a608692SPiotr Kubaj	addze	3,3
566*3a608692SPiotr Kubaj	cmpld	30,4
567*3a608692SPiotr Kubaj	beq	.Loop_mul4x_break
568*3a608692SPiotr Kubaj
569*3a608692SPiotr Kubaj	ld	15,8*6(29)
570*3a608692SPiotr Kubaj	ld	16,8*7(29)
571*3a608692SPiotr Kubaj	ld	17,8*8(29)
572*3a608692SPiotr Kubaj	ld	9,8*1(4)
573*3a608692SPiotr Kubaj	ld	10,8*2(4)
574*3a608692SPiotr Kubaj	ld	11,8*3(4)
575*3a608692SPiotr Kubaj	ldu	12,8*4(4)
576*3a608692SPiotr Kubaj	addc	22,22,14
577*3a608692SPiotr Kubaj	adde	23,23,15
578*3a608692SPiotr Kubaj	adde	24,24,16
579*3a608692SPiotr Kubaj	adde	25,25,17
580*3a608692SPiotr Kubaj
581*3a608692SPiotr Kubaj
582*3a608692SPiotr Kubaj	ld	18,8*1(6)
583*3a608692SPiotr Kubaj	ld	19,8*2(6)
584*3a608692SPiotr Kubaj	ld	20,8*3(6)
585*3a608692SPiotr Kubaj	ldu	21,8*4(6)
586*3a608692SPiotr Kubaj	b	.Loop_mul4x_tail
587*3a608692SPiotr Kubaj
588*3a608692SPiotr Kubaj.align	5
589*3a608692SPiotr Kubaj.Loop_mul4x_break:
590*3a608692SPiotr Kubaj	ld	16,8*6(1)
591*3a608692SPiotr Kubaj	ld	17,8*7(1)
592*3a608692SPiotr Kubaj	addc	9,22,14
593*3a608692SPiotr Kubaj	ld	22,8*12(1)
594*3a608692SPiotr Kubaj	addze	10,23
595*3a608692SPiotr Kubaj	ld	23,8*13(1)
596*3a608692SPiotr Kubaj	addze	11,24
597*3a608692SPiotr Kubaj	ld	24,8*14(1)
598*3a608692SPiotr Kubaj	addze	12,25
599*3a608692SPiotr Kubaj	ld	25,8*15(1)
600*3a608692SPiotr Kubaj	addze	3,3
601*3a608692SPiotr Kubaj	std	9,8*1(29)
602*3a608692SPiotr Kubaj	sub	4,30,8
603*3a608692SPiotr Kubaj	std	10,8*2(29)
604*3a608692SPiotr Kubaj	std	11,8*3(29)
605*3a608692SPiotr Kubaj	std	12,8*4(29)
606*3a608692SPiotr Kubaj	std	3,8*5(29)
607*3a608692SPiotr Kubaj
608*3a608692SPiotr Kubaj	ld	18,8*1(15)
609*3a608692SPiotr Kubaj	ld	19,8*2(15)
610*3a608692SPiotr Kubaj	ld	20,8*3(15)
611*3a608692SPiotr Kubaj	ld	21,8*4(15)
612*3a608692SPiotr Kubaj	addi	6,15,8*4
613*3a608692SPiotr Kubaj	cmpld	5,17
614*3a608692SPiotr Kubaj	beq	.Lmul4x_post
615*3a608692SPiotr Kubaj
616*3a608692SPiotr Kubaj	ldu	27,8*4(5)
617*3a608692SPiotr Kubaj	ld	9,8*1(4)
618*3a608692SPiotr Kubaj	ld	10,8*2(4)
619*3a608692SPiotr Kubaj	ld	11,8*3(4)
620*3a608692SPiotr Kubaj	ldu	12,8*4(4)
621*3a608692SPiotr Kubaj	li	3,0
622*3a608692SPiotr Kubaj	addic	29,1,8*7
623*3a608692SPiotr Kubaj	b	.Loop_mul4x_reduction
624*3a608692SPiotr Kubaj
625*3a608692SPiotr Kubaj.align	5
626*3a608692SPiotr Kubaj.Lmul4x_post:
627*3a608692SPiotr Kubaj
628*3a608692SPiotr Kubaj
629*3a608692SPiotr Kubaj
630*3a608692SPiotr Kubaj
631*3a608692SPiotr Kubaj	srwi	31,8,5
632*3a608692SPiotr Kubaj	mr	5,16
633*3a608692SPiotr Kubaj	subi	31,31,1
634*3a608692SPiotr Kubaj	mr	30,16
635*3a608692SPiotr Kubaj	subfc	14,18,22
636*3a608692SPiotr Kubaj	addi	29,1,8*15
637*3a608692SPiotr Kubaj	subfe	15,19,23
638*3a608692SPiotr Kubaj
639*3a608692SPiotr Kubaj	mtctr	31
640*3a608692SPiotr Kubaj.Lmul4x_sub:
641*3a608692SPiotr Kubaj	ld	18,8*1(6)
642*3a608692SPiotr Kubaj	ld	22,8*1(29)
643*3a608692SPiotr Kubaj	subfe	16,20,24
644*3a608692SPiotr Kubaj	ld	19,8*2(6)
645*3a608692SPiotr Kubaj	ld	23,8*2(29)
646*3a608692SPiotr Kubaj	subfe	17,21,25
647*3a608692SPiotr Kubaj	ld	20,8*3(6)
648*3a608692SPiotr Kubaj	ld	24,8*3(29)
649*3a608692SPiotr Kubaj	ldu	21,8*4(6)
650*3a608692SPiotr Kubaj	ldu	25,8*4(29)
651*3a608692SPiotr Kubaj	std	14,8*1(5)
652*3a608692SPiotr Kubaj	std	15,8*2(5)
653*3a608692SPiotr Kubaj	subfe	14,18,22
654*3a608692SPiotr Kubaj	std	16,8*3(5)
655*3a608692SPiotr Kubaj	stdu	17,8*4(5)
656*3a608692SPiotr Kubaj	subfe	15,19,23
657*3a608692SPiotr Kubaj	bdnz	.Lmul4x_sub
658*3a608692SPiotr Kubaj
659*3a608692SPiotr Kubaj	ld	9,8*1(30)
660*3a608692SPiotr Kubaj	std	14,8*1(5)
661*3a608692SPiotr Kubaj	ld	14,8*12(1)
662*3a608692SPiotr Kubaj	subfe	16,20,24
663*3a608692SPiotr Kubaj	ld	10,8*2(30)
664*3a608692SPiotr Kubaj	std	15,8*2(5)
665*3a608692SPiotr Kubaj	ld	15,8*13(1)
666*3a608692SPiotr Kubaj	subfe	17,21,25
667*3a608692SPiotr Kubaj	subfe	3,0,3
668*3a608692SPiotr Kubaj	addi	29,1,8*12
669*3a608692SPiotr Kubaj	ld	11,8*3(30)
670*3a608692SPiotr Kubaj	std	16,8*3(5)
671*3a608692SPiotr Kubaj	ld	16,8*14(1)
672*3a608692SPiotr Kubaj	ld	12,8*4(30)
673*3a608692SPiotr Kubaj	std	17,8*4(5)
674*3a608692SPiotr Kubaj	ld	17,8*15(1)
675*3a608692SPiotr Kubaj
676*3a608692SPiotr Kubaj	mtctr	31
677*3a608692SPiotr Kubaj.Lmul4x_cond_copy:
678*3a608692SPiotr Kubaj	and	14,14,3
679*3a608692SPiotr Kubaj	andc	9,9,3
680*3a608692SPiotr Kubaj	std	0,8*0(29)
681*3a608692SPiotr Kubaj	and	15,15,3
682*3a608692SPiotr Kubaj	andc	10,10,3
683*3a608692SPiotr Kubaj	std	0,8*1(29)
684*3a608692SPiotr Kubaj	and	16,16,3
685*3a608692SPiotr Kubaj	andc	11,11,3
686*3a608692SPiotr Kubaj	std	0,8*2(29)
687*3a608692SPiotr Kubaj	and	17,17,3
688*3a608692SPiotr Kubaj	andc	12,12,3
689*3a608692SPiotr Kubaj	std	0,8*3(29)
690*3a608692SPiotr Kubaj	or	22,14,9
691*3a608692SPiotr Kubaj	ld	9,8*5(30)
692*3a608692SPiotr Kubaj	ld	14,8*4(29)
693*3a608692SPiotr Kubaj	or	23,15,10
694*3a608692SPiotr Kubaj	ld	10,8*6(30)
695*3a608692SPiotr Kubaj	ld	15,8*5(29)
696*3a608692SPiotr Kubaj	or	24,16,11
697*3a608692SPiotr Kubaj	ld	11,8*7(30)
698*3a608692SPiotr Kubaj	ld	16,8*6(29)
699*3a608692SPiotr Kubaj	or	25,17,12
700*3a608692SPiotr Kubaj	ld	12,8*8(30)
701*3a608692SPiotr Kubaj	ld	17,8*7(29)
702*3a608692SPiotr Kubaj	addi	29,29,8*4
703*3a608692SPiotr Kubaj	std	22,8*1(30)
704*3a608692SPiotr Kubaj	std	23,8*2(30)
705*3a608692SPiotr Kubaj	std	24,8*3(30)
706*3a608692SPiotr Kubaj	stdu	25,8*4(30)
707*3a608692SPiotr Kubaj	bdnz	.Lmul4x_cond_copy
708*3a608692SPiotr Kubaj
709*3a608692SPiotr Kubaj	ld	5,0(1)
710*3a608692SPiotr Kubaj	and	14,14,3
711*3a608692SPiotr Kubaj	andc	9,9,3
712*3a608692SPiotr Kubaj	std	0,8*0(29)
713*3a608692SPiotr Kubaj	and	15,15,3
714*3a608692SPiotr Kubaj	andc	10,10,3
715*3a608692SPiotr Kubaj	std	0,8*1(29)
716*3a608692SPiotr Kubaj	and	16,16,3
717*3a608692SPiotr Kubaj	andc	11,11,3
718*3a608692SPiotr Kubaj	std	0,8*2(29)
719*3a608692SPiotr Kubaj	and	17,17,3
720*3a608692SPiotr Kubaj	andc	12,12,3
721*3a608692SPiotr Kubaj	std	0,8*3(29)
722*3a608692SPiotr Kubaj	or	22,14,9
723*3a608692SPiotr Kubaj	or	23,15,10
724*3a608692SPiotr Kubaj	std	0,8*4(29)
725*3a608692SPiotr Kubaj	or	24,16,11
726*3a608692SPiotr Kubaj	or	25,17,12
727*3a608692SPiotr Kubaj	std	22,8*1(30)
728*3a608692SPiotr Kubaj	std	23,8*2(30)
729*3a608692SPiotr Kubaj	std	24,8*3(30)
730*3a608692SPiotr Kubaj	std	25,8*4(30)
731*3a608692SPiotr Kubaj
732*3a608692SPiotr Kubaj	b	.Lmul4x_done
733*3a608692SPiotr Kubaj
734*3a608692SPiotr Kubaj.align	4
735*3a608692SPiotr Kubaj.Lmul4x4_post_condition:
736*3a608692SPiotr Kubaj	ld	4,8*6(1)
737*3a608692SPiotr Kubaj	ld	5,0(1)
738*3a608692SPiotr Kubaj	addze	3,3
739*3a608692SPiotr Kubaj
740*3a608692SPiotr Kubaj	subfc	9,18,22
741*3a608692SPiotr Kubaj	subfe	10,19,23
742*3a608692SPiotr Kubaj	subfe	11,20,24
743*3a608692SPiotr Kubaj	subfe	12,21,25
744*3a608692SPiotr Kubaj	subfe	3,0,3
745*3a608692SPiotr Kubaj
746*3a608692SPiotr Kubaj	and	18,18,3
747*3a608692SPiotr Kubaj	and	19,19,3
748*3a608692SPiotr Kubaj	addc	9,9,18
749*3a608692SPiotr Kubaj	and	20,20,3
750*3a608692SPiotr Kubaj	adde	10,10,19
751*3a608692SPiotr Kubaj	and	21,21,3
752*3a608692SPiotr Kubaj	adde	11,11,20
753*3a608692SPiotr Kubaj	adde	12,12,21
754*3a608692SPiotr Kubaj
755*3a608692SPiotr Kubaj	std	9,8*1(4)
756*3a608692SPiotr Kubaj	std	10,8*2(4)
757*3a608692SPiotr Kubaj	std	11,8*3(4)
758*3a608692SPiotr Kubaj	std	12,8*4(4)
759*3a608692SPiotr Kubaj
760*3a608692SPiotr Kubaj.Lmul4x_done:
761*3a608692SPiotr Kubaj	std	0,8*8(1)
762*3a608692SPiotr Kubaj	std	0,8*9(1)
763*3a608692SPiotr Kubaj	std	0,8*10(1)
764*3a608692SPiotr Kubaj	std	0,8*11(1)
765*3a608692SPiotr Kubaj	li	3,1
766*3a608692SPiotr Kubaj	ld	14,-8*18(5)
767*3a608692SPiotr Kubaj	ld	15,-8*17(5)
768*3a608692SPiotr Kubaj	ld	16,-8*16(5)
769*3a608692SPiotr Kubaj	ld	17,-8*15(5)
770*3a608692SPiotr Kubaj	ld	18,-8*14(5)
771*3a608692SPiotr Kubaj	ld	19,-8*13(5)
772*3a608692SPiotr Kubaj	ld	20,-8*12(5)
773*3a608692SPiotr Kubaj	ld	21,-8*11(5)
774*3a608692SPiotr Kubaj	ld	22,-8*10(5)
775*3a608692SPiotr Kubaj	ld	23,-8*9(5)
776*3a608692SPiotr Kubaj	ld	24,-8*8(5)
777*3a608692SPiotr Kubaj	ld	25,-8*7(5)
778*3a608692SPiotr Kubaj	ld	26,-8*6(5)
779*3a608692SPiotr Kubaj	ld	27,-8*5(5)
780*3a608692SPiotr Kubaj	ld	28,-8*4(5)
781*3a608692SPiotr Kubaj	ld	29,-8*3(5)
782*3a608692SPiotr Kubaj	ld	30,-8*2(5)
783*3a608692SPiotr Kubaj	ld	31,-8*1(5)
784*3a608692SPiotr Kubaj	mr	1,5
785*3a608692SPiotr Kubaj	blr
786*3a608692SPiotr Kubaj.long	0
787*3a608692SPiotr Kubaj.byte	0,12,4,0x20,0x80,18,6,0
788*3a608692SPiotr Kubaj.long	0
789*3a608692SPiotr Kubaj.size	bn_mul4x_mont_int,.-bn_mul4x_mont_int
790*3a608692SPiotr Kubaj.align	5
791*3a608692SPiotr Kubaj__bn_sqr8x_mont:
792*3a608692SPiotr Kubaj.Lsqr8x_do:
793*3a608692SPiotr Kubaj	mr	9,1
794*3a608692SPiotr Kubaj	slwi	10,8,4
795*3a608692SPiotr Kubaj	li	11,-32*8
796*3a608692SPiotr Kubaj	sub	10,11,10
797*3a608692SPiotr Kubaj	slwi	8,8,3
798*3a608692SPiotr Kubaj	stdux	1,1,10
799*3a608692SPiotr Kubaj
800*3a608692SPiotr Kubaj	std	14,-8*18(9)
801*3a608692SPiotr Kubaj	std	15,-8*17(9)
802*3a608692SPiotr Kubaj	std	16,-8*16(9)
803*3a608692SPiotr Kubaj	std	17,-8*15(9)
804*3a608692SPiotr Kubaj	std	18,-8*14(9)
805*3a608692SPiotr Kubaj	std	19,-8*13(9)
806*3a608692SPiotr Kubaj	std	20,-8*12(9)
807*3a608692SPiotr Kubaj	std	21,-8*11(9)
808*3a608692SPiotr Kubaj	std	22,-8*10(9)
809*3a608692SPiotr Kubaj	std	23,-8*9(9)
810*3a608692SPiotr Kubaj	std	24,-8*8(9)
811*3a608692SPiotr Kubaj	std	25,-8*7(9)
812*3a608692SPiotr Kubaj	std	26,-8*6(9)
813*3a608692SPiotr Kubaj	std	27,-8*5(9)
814*3a608692SPiotr Kubaj	std	28,-8*4(9)
815*3a608692SPiotr Kubaj	std	29,-8*3(9)
816*3a608692SPiotr Kubaj	std	30,-8*2(9)
817*3a608692SPiotr Kubaj	std	31,-8*1(9)
818*3a608692SPiotr Kubaj
819*3a608692SPiotr Kubaj	subi	4,4,8
820*3a608692SPiotr Kubaj	subi	18,6,8
821*3a608692SPiotr Kubaj	subi	3,3,8
822*3a608692SPiotr Kubaj	ld	7,0(7)
823*3a608692SPiotr Kubaj	li	0,0
824*3a608692SPiotr Kubaj
825*3a608692SPiotr Kubaj	add	6,4,8
826*3a608692SPiotr Kubaj	ld	9,8*1(4)
827*3a608692SPiotr Kubaj
828*3a608692SPiotr Kubaj	ld	10,8*2(4)
829*3a608692SPiotr Kubaj	li	23,0
830*3a608692SPiotr Kubaj	ld	11,8*3(4)
831*3a608692SPiotr Kubaj	li	24,0
832*3a608692SPiotr Kubaj	ld	12,8*4(4)
833*3a608692SPiotr Kubaj	li	25,0
834*3a608692SPiotr Kubaj	ld	14,8*5(4)
835*3a608692SPiotr Kubaj	li	26,0
836*3a608692SPiotr Kubaj	ld	15,8*6(4)
837*3a608692SPiotr Kubaj	li	27,0
838*3a608692SPiotr Kubaj	ld	16,8*7(4)
839*3a608692SPiotr Kubaj	li	28,0
840*3a608692SPiotr Kubaj	ldu	17,8*8(4)
841*3a608692SPiotr Kubaj	li	29,0
842*3a608692SPiotr Kubaj
843*3a608692SPiotr Kubaj	addi	5,1,8*11
844*3a608692SPiotr Kubaj	subic.	30,8,8*8
845*3a608692SPiotr Kubaj	b	.Lsqr8x_zero_start
846*3a608692SPiotr Kubaj
847*3a608692SPiotr Kubaj.align	5
848*3a608692SPiotr Kubaj.Lsqr8x_zero:
849*3a608692SPiotr Kubaj	subic.	30,30,8*8
850*3a608692SPiotr Kubaj	std	0,8*1(5)
851*3a608692SPiotr Kubaj	std	0,8*2(5)
852*3a608692SPiotr Kubaj	std	0,8*3(5)
853*3a608692SPiotr Kubaj	std	0,8*4(5)
854*3a608692SPiotr Kubaj	std	0,8*5(5)
855*3a608692SPiotr Kubaj	std	0,8*6(5)
856*3a608692SPiotr Kubaj	std	0,8*7(5)
857*3a608692SPiotr Kubaj	std	0,8*8(5)
858*3a608692SPiotr Kubaj.Lsqr8x_zero_start:
859*3a608692SPiotr Kubaj	std	0,8*9(5)
860*3a608692SPiotr Kubaj	std	0,8*10(5)
861*3a608692SPiotr Kubaj	std	0,8*11(5)
862*3a608692SPiotr Kubaj	std	0,8*12(5)
863*3a608692SPiotr Kubaj	std	0,8*13(5)
864*3a608692SPiotr Kubaj	std	0,8*14(5)
865*3a608692SPiotr Kubaj	std	0,8*15(5)
866*3a608692SPiotr Kubaj	stdu	0,8*16(5)
867*3a608692SPiotr Kubaj	bne	.Lsqr8x_zero
868*3a608692SPiotr Kubaj
869*3a608692SPiotr Kubaj	std	3,8*6(1)
870*3a608692SPiotr Kubaj	std	18,8*7(1)
871*3a608692SPiotr Kubaj	std	7,8*8(1)
872*3a608692SPiotr Kubaj	std	5,8*9(1)
873*3a608692SPiotr Kubaj	std	0,8*10(1)
874*3a608692SPiotr Kubaj	addi	5,1,8*11
875*3a608692SPiotr Kubaj
876*3a608692SPiotr Kubaj
877*3a608692SPiotr Kubaj.align	5
878*3a608692SPiotr Kubaj.Lsqr8x_outer_loop:
879*3a608692SPiotr Kubaj
880*3a608692SPiotr Kubaj
881*3a608692SPiotr Kubaj
882*3a608692SPiotr Kubaj
883*3a608692SPiotr Kubaj
884*3a608692SPiotr Kubaj
885*3a608692SPiotr Kubaj
886*3a608692SPiotr Kubaj
887*3a608692SPiotr Kubaj
888*3a608692SPiotr Kubaj
889*3a608692SPiotr Kubaj
890*3a608692SPiotr Kubaj
891*3a608692SPiotr Kubaj
892*3a608692SPiotr Kubaj
893*3a608692SPiotr Kubaj
894*3a608692SPiotr Kubaj
895*3a608692SPiotr Kubaj
896*3a608692SPiotr Kubaj
897*3a608692SPiotr Kubaj
898*3a608692SPiotr Kubaj
899*3a608692SPiotr Kubaj
900*3a608692SPiotr Kubaj
901*3a608692SPiotr Kubaj
902*3a608692SPiotr Kubaj
903*3a608692SPiotr Kubaj
904*3a608692SPiotr Kubaj
905*3a608692SPiotr Kubaj
906*3a608692SPiotr Kubaj
907*3a608692SPiotr Kubaj
908*3a608692SPiotr Kubaj	mulld	18,10,9
909*3a608692SPiotr Kubaj	mulld	19,11,9
910*3a608692SPiotr Kubaj	mulld	20,12,9
911*3a608692SPiotr Kubaj	mulld	21,14,9
912*3a608692SPiotr Kubaj	addc	23,23,18
913*3a608692SPiotr Kubaj	mulld	18,15,9
914*3a608692SPiotr Kubaj	adde	24,24,19
915*3a608692SPiotr Kubaj	mulld	19,16,9
916*3a608692SPiotr Kubaj	adde	25,25,20
917*3a608692SPiotr Kubaj	mulld	20,17,9
918*3a608692SPiotr Kubaj	adde	26,26,21
919*3a608692SPiotr Kubaj	mulhdu	21,10,9
920*3a608692SPiotr Kubaj	adde	27,27,18
921*3a608692SPiotr Kubaj	mulhdu	18,11,9
922*3a608692SPiotr Kubaj	adde	28,28,19
923*3a608692SPiotr Kubaj	mulhdu	19,12,9
924*3a608692SPiotr Kubaj	adde	29,29,20
925*3a608692SPiotr Kubaj	mulhdu	20,14,9
926*3a608692SPiotr Kubaj	std	22,8*1(5)
927*3a608692SPiotr Kubaj	addze	22,0
928*3a608692SPiotr Kubaj	std	23,8*2(5)
929*3a608692SPiotr Kubaj	addc	24,24,21
930*3a608692SPiotr Kubaj	mulhdu	21,15,9
931*3a608692SPiotr Kubaj	adde	25,25,18
932*3a608692SPiotr Kubaj	mulhdu	18,16,9
933*3a608692SPiotr Kubaj	adde	26,26,19
934*3a608692SPiotr Kubaj	mulhdu	19,17,9
935*3a608692SPiotr Kubaj	adde	27,27,20
936*3a608692SPiotr Kubaj	mulld	20,11,10
937*3a608692SPiotr Kubaj	adde	28,28,21
938*3a608692SPiotr Kubaj	mulld	21,12,10
939*3a608692SPiotr Kubaj	adde	29,29,18
940*3a608692SPiotr Kubaj	mulld	18,14,10
941*3a608692SPiotr Kubaj	adde	22,22,19
942*3a608692SPiotr Kubaj
943*3a608692SPiotr Kubaj	mulld	19,15,10
944*3a608692SPiotr Kubaj	addc	25,25,20
945*3a608692SPiotr Kubaj	mulld	20,16,10
946*3a608692SPiotr Kubaj	adde	26,26,21
947*3a608692SPiotr Kubaj	mulld	21,17,10
948*3a608692SPiotr Kubaj	adde	27,27,18
949*3a608692SPiotr Kubaj	mulhdu	18,11,10
950*3a608692SPiotr Kubaj	adde	28,28,19
951*3a608692SPiotr Kubaj	mulhdu	19,12,10
952*3a608692SPiotr Kubaj	adde	29,29,20
953*3a608692SPiotr Kubaj	mulhdu	20,14,10
954*3a608692SPiotr Kubaj	adde	22,22,21
955*3a608692SPiotr Kubaj	mulhdu	21,15,10
956*3a608692SPiotr Kubaj	std	24,8*3(5)
957*3a608692SPiotr Kubaj	addze	23,0
958*3a608692SPiotr Kubaj	std	25,8*4(5)
959*3a608692SPiotr Kubaj	addc	26,26,18
960*3a608692SPiotr Kubaj	mulhdu	18,16,10
961*3a608692SPiotr Kubaj	adde	27,27,19
962*3a608692SPiotr Kubaj	mulhdu	19,17,10
963*3a608692SPiotr Kubaj	adde	28,28,20
964*3a608692SPiotr Kubaj	mulld	20,12,11
965*3a608692SPiotr Kubaj	adde	29,29,21
966*3a608692SPiotr Kubaj	mulld	21,14,11
967*3a608692SPiotr Kubaj	adde	22,22,18
968*3a608692SPiotr Kubaj	mulld	18,15,11
969*3a608692SPiotr Kubaj	adde	23,23,19
970*3a608692SPiotr Kubaj
971*3a608692SPiotr Kubaj	mulld	19,16,11
972*3a608692SPiotr Kubaj	addc	27,27,20
973*3a608692SPiotr Kubaj	mulld	20,17,11
974*3a608692SPiotr Kubaj	adde	28,28,21
975*3a608692SPiotr Kubaj	mulhdu	21,12,11
976*3a608692SPiotr Kubaj	adde	29,29,18
977*3a608692SPiotr Kubaj	mulhdu	18,14,11
978*3a608692SPiotr Kubaj	adde	22,22,19
979*3a608692SPiotr Kubaj	mulhdu	19,15,11
980*3a608692SPiotr Kubaj	adde	23,23,20
981*3a608692SPiotr Kubaj	mulhdu	20,16,11
982*3a608692SPiotr Kubaj	std	26,8*5(5)
983*3a608692SPiotr Kubaj	addze	24,0
984*3a608692SPiotr Kubaj	std	27,8*6(5)
985*3a608692SPiotr Kubaj	addc	28,28,21
986*3a608692SPiotr Kubaj	mulhdu	21,17,11
987*3a608692SPiotr Kubaj	adde	29,29,18
988*3a608692SPiotr Kubaj	mulld	18,14,12
989*3a608692SPiotr Kubaj	adde	22,22,19
990*3a608692SPiotr Kubaj	mulld	19,15,12
991*3a608692SPiotr Kubaj	adde	23,23,20
992*3a608692SPiotr Kubaj	mulld	20,16,12
993*3a608692SPiotr Kubaj	adde	24,24,21
994*3a608692SPiotr Kubaj
995*3a608692SPiotr Kubaj	mulld	21,17,12
996*3a608692SPiotr Kubaj	addc	29,29,18
997*3a608692SPiotr Kubaj	mulhdu	18,14,12
998*3a608692SPiotr Kubaj	adde	22,22,19
999*3a608692SPiotr Kubaj	mulhdu	19,15,12
1000*3a608692SPiotr Kubaj	adde	23,23,20
1001*3a608692SPiotr Kubaj	mulhdu	20,16,12
1002*3a608692SPiotr Kubaj	adde	24,24,21
1003*3a608692SPiotr Kubaj	mulhdu	21,17,12
1004*3a608692SPiotr Kubaj	std	28,8*7(5)
1005*3a608692SPiotr Kubaj	addze	25,0
1006*3a608692SPiotr Kubaj	stdu	29,8*8(5)
1007*3a608692SPiotr Kubaj	addc	22,22,18
1008*3a608692SPiotr Kubaj	mulld	18,15,14
1009*3a608692SPiotr Kubaj	adde	23,23,19
1010*3a608692SPiotr Kubaj	mulld	19,16,14
1011*3a608692SPiotr Kubaj	adde	24,24,20
1012*3a608692SPiotr Kubaj	mulld	20,17,14
1013*3a608692SPiotr Kubaj	adde	25,25,21
1014*3a608692SPiotr Kubaj
1015*3a608692SPiotr Kubaj	mulhdu	21,15,14
1016*3a608692SPiotr Kubaj	addc	23,23,18
1017*3a608692SPiotr Kubaj	mulhdu	18,16,14
1018*3a608692SPiotr Kubaj	adde	24,24,19
1019*3a608692SPiotr Kubaj	mulhdu	19,17,14
1020*3a608692SPiotr Kubaj	adde	25,25,20
1021*3a608692SPiotr Kubaj	mulld	20,16,15
1022*3a608692SPiotr Kubaj	addze	26,0
1023*3a608692SPiotr Kubaj	addc	24,24,21
1024*3a608692SPiotr Kubaj	mulld	21,17,15
1025*3a608692SPiotr Kubaj	adde	25,25,18
1026*3a608692SPiotr Kubaj	mulhdu	18,16,15
1027*3a608692SPiotr Kubaj	adde	26,26,19
1028*3a608692SPiotr Kubaj
1029*3a608692SPiotr Kubaj	mulhdu	19,17,15
1030*3a608692SPiotr Kubaj	addc	25,25,20
1031*3a608692SPiotr Kubaj	mulld	20,17,16
1032*3a608692SPiotr Kubaj	adde	26,26,21
1033*3a608692SPiotr Kubaj	mulhdu	21,17,16
1034*3a608692SPiotr Kubaj	addze	27,0
1035*3a608692SPiotr Kubaj	addc	26,26,18
1036*3a608692SPiotr Kubaj	cmpld	6,4
1037*3a608692SPiotr Kubaj	adde	27,27,19
1038*3a608692SPiotr Kubaj
1039*3a608692SPiotr Kubaj	addc	27,27,20
1040*3a608692SPiotr Kubaj	sub	18,6,8
1041*3a608692SPiotr Kubaj	addze	28,0
1042*3a608692SPiotr Kubaj	add	28,28,21
1043*3a608692SPiotr Kubaj
1044*3a608692SPiotr Kubaj	beq	.Lsqr8x_outer_break
1045*3a608692SPiotr Kubaj
1046*3a608692SPiotr Kubaj	mr	7,9
1047*3a608692SPiotr Kubaj	ld	9,8*1(5)
1048*3a608692SPiotr Kubaj	ld	10,8*2(5)
1049*3a608692SPiotr Kubaj	ld	11,8*3(5)
1050*3a608692SPiotr Kubaj	ld	12,8*4(5)
1051*3a608692SPiotr Kubaj	ld	14,8*5(5)
1052*3a608692SPiotr Kubaj	ld	15,8*6(5)
1053*3a608692SPiotr Kubaj	ld	16,8*7(5)
1054*3a608692SPiotr Kubaj	ld	17,8*8(5)
1055*3a608692SPiotr Kubaj	addc	22,22,9
1056*3a608692SPiotr Kubaj	ld	9,8*1(4)
1057*3a608692SPiotr Kubaj	adde	23,23,10
1058*3a608692SPiotr Kubaj	ld	10,8*2(4)
1059*3a608692SPiotr Kubaj	adde	24,24,11
1060*3a608692SPiotr Kubaj	ld	11,8*3(4)
1061*3a608692SPiotr Kubaj	adde	25,25,12
1062*3a608692SPiotr Kubaj	ld	12,8*4(4)
1063*3a608692SPiotr Kubaj	adde	26,26,14
1064*3a608692SPiotr Kubaj	ld	14,8*5(4)
1065*3a608692SPiotr Kubaj	adde	27,27,15
1066*3a608692SPiotr Kubaj	ld	15,8*6(4)
1067*3a608692SPiotr Kubaj	adde	28,28,16
1068*3a608692SPiotr Kubaj	ld	16,8*7(4)
1069*3a608692SPiotr Kubaj	subi	3,4,8*7
1070*3a608692SPiotr Kubaj	addze	29,17
1071*3a608692SPiotr Kubaj	ldu	17,8*8(4)
1072*3a608692SPiotr Kubaj
1073*3a608692SPiotr Kubaj	li	30,0
1074*3a608692SPiotr Kubaj	b	.Lsqr8x_mul
1075*3a608692SPiotr Kubaj
1076*3a608692SPiotr Kubaj
1077*3a608692SPiotr Kubaj
1078*3a608692SPiotr Kubaj
1079*3a608692SPiotr Kubaj
1080*3a608692SPiotr Kubaj
1081*3a608692SPiotr Kubaj
1082*3a608692SPiotr Kubaj
1083*3a608692SPiotr Kubaj
1084*3a608692SPiotr Kubaj
1085*3a608692SPiotr Kubaj
1086*3a608692SPiotr Kubaj
1087*3a608692SPiotr Kubaj
1088*3a608692SPiotr Kubaj
1089*3a608692SPiotr Kubaj
1090*3a608692SPiotr Kubaj
1091*3a608692SPiotr Kubaj
1092*3a608692SPiotr Kubaj
1093*3a608692SPiotr Kubaj
1094*3a608692SPiotr Kubaj
1095*3a608692SPiotr Kubaj
1096*3a608692SPiotr Kubaj
1097*3a608692SPiotr Kubaj
1098*3a608692SPiotr Kubaj.align	5
1099*3a608692SPiotr Kubaj.Lsqr8x_mul:
1100*3a608692SPiotr Kubaj	mulld	18,9,7
1101*3a608692SPiotr Kubaj	addze	31,0
1102*3a608692SPiotr Kubaj	mulld	19,10,7
1103*3a608692SPiotr Kubaj	addi	30,30,8
1104*3a608692SPiotr Kubaj	mulld	20,11,7
1105*3a608692SPiotr Kubaj	andi.	30,30,8*8-1
1106*3a608692SPiotr Kubaj	mulld	21,12,7
1107*3a608692SPiotr Kubaj	addc	22,22,18
1108*3a608692SPiotr Kubaj	mulld	18,14,7
1109*3a608692SPiotr Kubaj	adde	23,23,19
1110*3a608692SPiotr Kubaj	mulld	19,15,7
1111*3a608692SPiotr Kubaj	adde	24,24,20
1112*3a608692SPiotr Kubaj	mulld	20,16,7
1113*3a608692SPiotr Kubaj	adde	25,25,21
1114*3a608692SPiotr Kubaj	mulld	21,17,7
1115*3a608692SPiotr Kubaj	adde	26,26,18
1116*3a608692SPiotr Kubaj	mulhdu	18,9,7
1117*3a608692SPiotr Kubaj	adde	27,27,19
1118*3a608692SPiotr Kubaj	mulhdu	19,10,7
1119*3a608692SPiotr Kubaj	adde	28,28,20
1120*3a608692SPiotr Kubaj	mulhdu	20,11,7
1121*3a608692SPiotr Kubaj	adde	29,29,21
1122*3a608692SPiotr Kubaj	mulhdu	21,12,7
1123*3a608692SPiotr Kubaj	addze	31,31
1124*3a608692SPiotr Kubaj	stdu	22,8(5)
1125*3a608692SPiotr Kubaj	addc	22,23,18
1126*3a608692SPiotr Kubaj	mulhdu	18,14,7
1127*3a608692SPiotr Kubaj	adde	23,24,19
1128*3a608692SPiotr Kubaj	mulhdu	19,15,7
1129*3a608692SPiotr Kubaj	adde	24,25,20
1130*3a608692SPiotr Kubaj	mulhdu	20,16,7
1131*3a608692SPiotr Kubaj	adde	25,26,21
1132*3a608692SPiotr Kubaj	mulhdu	21,17,7
1133*3a608692SPiotr Kubaj	ldx	7,3,30
1134*3a608692SPiotr Kubaj	adde	26,27,18
1135*3a608692SPiotr Kubaj	adde	27,28,19
1136*3a608692SPiotr Kubaj	adde	28,29,20
1137*3a608692SPiotr Kubaj	adde	29,31,21
1138*3a608692SPiotr Kubaj
1139*3a608692SPiotr Kubaj	bne	.Lsqr8x_mul
1140*3a608692SPiotr Kubaj
1141*3a608692SPiotr Kubaj
1142*3a608692SPiotr Kubaj	cmpld	4,6
1143*3a608692SPiotr Kubaj	beq	.Lsqr8x_break
1144*3a608692SPiotr Kubaj
1145*3a608692SPiotr Kubaj	ld	9,8*1(5)
1146*3a608692SPiotr Kubaj	ld	10,8*2(5)
1147*3a608692SPiotr Kubaj	ld	11,8*3(5)
1148*3a608692SPiotr Kubaj	ld	12,8*4(5)
1149*3a608692SPiotr Kubaj	ld	14,8*5(5)
1150*3a608692SPiotr Kubaj	ld	15,8*6(5)
1151*3a608692SPiotr Kubaj	ld	16,8*7(5)
1152*3a608692SPiotr Kubaj	ld	17,8*8(5)
1153*3a608692SPiotr Kubaj	addc	22,22,9
1154*3a608692SPiotr Kubaj	ld	9,8*1(4)
1155*3a608692SPiotr Kubaj	adde	23,23,10
1156*3a608692SPiotr Kubaj	ld	10,8*2(4)
1157*3a608692SPiotr Kubaj	adde	24,24,11
1158*3a608692SPiotr Kubaj	ld	11,8*3(4)
1159*3a608692SPiotr Kubaj	adde	25,25,12
1160*3a608692SPiotr Kubaj	ld	12,8*4(4)
1161*3a608692SPiotr Kubaj	adde	26,26,14
1162*3a608692SPiotr Kubaj	ld	14,8*5(4)
1163*3a608692SPiotr Kubaj	adde	27,27,15
1164*3a608692SPiotr Kubaj	ld	15,8*6(4)
1165*3a608692SPiotr Kubaj	adde	28,28,16
1166*3a608692SPiotr Kubaj	ld	16,8*7(4)
1167*3a608692SPiotr Kubaj	adde	29,29,17
1168*3a608692SPiotr Kubaj	ldu	17,8*8(4)
1169*3a608692SPiotr Kubaj
1170*3a608692SPiotr Kubaj	b	.Lsqr8x_mul
1171*3a608692SPiotr Kubaj
1172*3a608692SPiotr Kubaj.align	5
1173*3a608692SPiotr Kubaj.Lsqr8x_break:
1174*3a608692SPiotr Kubaj	ld	9,8*8(3)
1175*3a608692SPiotr Kubaj	addi	4,3,8*15
1176*3a608692SPiotr Kubaj	ld	10,8*9(3)
1177*3a608692SPiotr Kubaj	sub.	18,6,4
1178*3a608692SPiotr Kubaj	ld	11,8*10(3)
1179*3a608692SPiotr Kubaj	sub	19,5,18
1180*3a608692SPiotr Kubaj	ld	12,8*11(3)
1181*3a608692SPiotr Kubaj	ld	14,8*12(3)
1182*3a608692SPiotr Kubaj	ld	15,8*13(3)
1183*3a608692SPiotr Kubaj	ld	16,8*14(3)
1184*3a608692SPiotr Kubaj	ld	17,8*15(3)
1185*3a608692SPiotr Kubaj	beq	.Lsqr8x_outer_loop
1186*3a608692SPiotr Kubaj
1187*3a608692SPiotr Kubaj	std	22,8*1(5)
1188*3a608692SPiotr Kubaj	ld	22,8*1(19)
1189*3a608692SPiotr Kubaj	std	23,8*2(5)
1190*3a608692SPiotr Kubaj	ld	23,8*2(19)
1191*3a608692SPiotr Kubaj	std	24,8*3(5)
1192*3a608692SPiotr Kubaj	ld	24,8*3(19)
1193*3a608692SPiotr Kubaj	std	25,8*4(5)
1194*3a608692SPiotr Kubaj	ld	25,8*4(19)
1195*3a608692SPiotr Kubaj	std	26,8*5(5)
1196*3a608692SPiotr Kubaj	ld	26,8*5(19)
1197*3a608692SPiotr Kubaj	std	27,8*6(5)
1198*3a608692SPiotr Kubaj	ld	27,8*6(19)
1199*3a608692SPiotr Kubaj	std	28,8*7(5)
1200*3a608692SPiotr Kubaj	ld	28,8*7(19)
1201*3a608692SPiotr Kubaj	std	29,8*8(5)
1202*3a608692SPiotr Kubaj	ld	29,8*8(19)
1203*3a608692SPiotr Kubaj	mr	5,19
1204*3a608692SPiotr Kubaj	b	.Lsqr8x_outer_loop
1205*3a608692SPiotr Kubaj
1206*3a608692SPiotr Kubaj.align	5
1207*3a608692SPiotr Kubaj.Lsqr8x_outer_break:
1208*3a608692SPiotr Kubaj
1209*3a608692SPiotr Kubaj
1210*3a608692SPiotr Kubaj	ld	10,8*1(18)
1211*3a608692SPiotr Kubaj	ld	12,8*2(18)
1212*3a608692SPiotr Kubaj	ld	15,8*3(18)
1213*3a608692SPiotr Kubaj	ld	17,8*4(18)
1214*3a608692SPiotr Kubaj	addi	4,18,8*4
1215*3a608692SPiotr Kubaj
1216*3a608692SPiotr Kubaj	ld	19,8*13(1)
1217*3a608692SPiotr Kubaj	ld	20,8*14(1)
1218*3a608692SPiotr Kubaj	ld	21,8*15(1)
1219*3a608692SPiotr Kubaj	ld	18,8*16(1)
1220*3a608692SPiotr Kubaj
1221*3a608692SPiotr Kubaj	std	22,8*1(5)
1222*3a608692SPiotr Kubaj	srwi	30,8,5
1223*3a608692SPiotr Kubaj	std	23,8*2(5)
1224*3a608692SPiotr Kubaj	subi	30,30,1
1225*3a608692SPiotr Kubaj	std	24,8*3(5)
1226*3a608692SPiotr Kubaj	std	25,8*4(5)
1227*3a608692SPiotr Kubaj	std	26,8*5(5)
1228*3a608692SPiotr Kubaj	std	27,8*6(5)
1229*3a608692SPiotr Kubaj	std	28,8*7(5)
1230*3a608692SPiotr Kubaj
1231*3a608692SPiotr Kubaj	addi	5,1,8*11
1232*3a608692SPiotr Kubaj	mulld	22,10,10
1233*3a608692SPiotr Kubaj	mulhdu	10,10,10
1234*3a608692SPiotr Kubaj	add	23,19,19
1235*3a608692SPiotr Kubaj	srdi	19,19,64-1
1236*3a608692SPiotr Kubaj	mulld	11,12,12
1237*3a608692SPiotr Kubaj	mulhdu	12,12,12
1238*3a608692SPiotr Kubaj	addc	23,23,10
1239*3a608692SPiotr Kubaj	add	24,20,20
1240*3a608692SPiotr Kubaj	srdi	20,20,64-1
1241*3a608692SPiotr Kubaj	add	25,21,21
1242*3a608692SPiotr Kubaj	srdi	21,21,64-1
1243*3a608692SPiotr Kubaj	or	24,24,19
1244*3a608692SPiotr Kubaj
1245*3a608692SPiotr Kubaj	mtctr	30
1246*3a608692SPiotr Kubaj.Lsqr4x_shift_n_add:
1247*3a608692SPiotr Kubaj	mulld	14,15,15
1248*3a608692SPiotr Kubaj	mulhdu	15,15,15
1249*3a608692SPiotr Kubaj	ld	19,8*6(5)
1250*3a608692SPiotr Kubaj	ld	10,8*1(4)
1251*3a608692SPiotr Kubaj	adde	24,24,11
1252*3a608692SPiotr Kubaj	add	26,18,18
1253*3a608692SPiotr Kubaj	srdi	18,18,64-1
1254*3a608692SPiotr Kubaj	or	25,25,20
1255*3a608692SPiotr Kubaj	ld	20,8*7(5)
1256*3a608692SPiotr Kubaj	adde	25,25,12
1257*3a608692SPiotr Kubaj	ld	12,8*2(4)
1258*3a608692SPiotr Kubaj	add	27,19,19
1259*3a608692SPiotr Kubaj	srdi	19,19,64-1
1260*3a608692SPiotr Kubaj	or	26,26,21
1261*3a608692SPiotr Kubaj	ld	21,8*8(5)
1262*3a608692SPiotr Kubaj	mulld	16,17,17
1263*3a608692SPiotr Kubaj	mulhdu	17,17,17
1264*3a608692SPiotr Kubaj	adde	26,26,14
1265*3a608692SPiotr Kubaj	add	28,20,20
1266*3a608692SPiotr Kubaj	srdi	20,20,64-1
1267*3a608692SPiotr Kubaj	or	27,27,18
1268*3a608692SPiotr Kubaj	ld	18,8*9(5)
1269*3a608692SPiotr Kubaj	adde	27,27,15
1270*3a608692SPiotr Kubaj	ld	15,8*3(4)
1271*3a608692SPiotr Kubaj	add	29,21,21
1272*3a608692SPiotr Kubaj	srdi	21,21,64-1
1273*3a608692SPiotr Kubaj	or	28,28,19
1274*3a608692SPiotr Kubaj	ld	19,8*10(5)
1275*3a608692SPiotr Kubaj	mulld	9,10,10
1276*3a608692SPiotr Kubaj	mulhdu	10,10,10
1277*3a608692SPiotr Kubaj	adde	28,28,16
1278*3a608692SPiotr Kubaj	std	22,8*1(5)
1279*3a608692SPiotr Kubaj	add	22,18,18
1280*3a608692SPiotr Kubaj	srdi	18,18,64-1
1281*3a608692SPiotr Kubaj	or	29,29,20
1282*3a608692SPiotr Kubaj	ld	20,8*11(5)
1283*3a608692SPiotr Kubaj	adde	29,29,17
1284*3a608692SPiotr Kubaj	ldu	17,8*4(4)
1285*3a608692SPiotr Kubaj	std	23,8*2(5)
1286*3a608692SPiotr Kubaj	add	23,19,19
1287*3a608692SPiotr Kubaj	srdi	19,19,64-1
1288*3a608692SPiotr Kubaj	or	22,22,21
1289*3a608692SPiotr Kubaj	ld	21,8*12(5)
1290*3a608692SPiotr Kubaj	mulld	11,12,12
1291*3a608692SPiotr Kubaj	mulhdu	12,12,12
1292*3a608692SPiotr Kubaj	adde	22,22,9
1293*3a608692SPiotr Kubaj	std	24,8*3(5)
1294*3a608692SPiotr Kubaj	add	24,20,20
1295*3a608692SPiotr Kubaj	srdi	20,20,64-1
1296*3a608692SPiotr Kubaj	or	23,23,18
1297*3a608692SPiotr Kubaj	ld	18,8*13(5)
1298*3a608692SPiotr Kubaj	adde	23,23,10
1299*3a608692SPiotr Kubaj	std	25,8*4(5)
1300*3a608692SPiotr Kubaj	std	26,8*5(5)
1301*3a608692SPiotr Kubaj	std	27,8*6(5)
1302*3a608692SPiotr Kubaj	std	28,8*7(5)
1303*3a608692SPiotr Kubaj	stdu	29,8*8(5)
1304*3a608692SPiotr Kubaj	add	25,21,21
1305*3a608692SPiotr Kubaj	srdi	21,21,64-1
1306*3a608692SPiotr Kubaj	or	24,24,19
1307*3a608692SPiotr Kubaj	bdnz	.Lsqr4x_shift_n_add
1308*3a608692SPiotr Kubaj	ld	4,8*7(1)
1309*3a608692SPiotr Kubaj	ld	7,8*8(1)
1310*3a608692SPiotr Kubaj
1311*3a608692SPiotr Kubaj	mulld	14,15,15
1312*3a608692SPiotr Kubaj	mulhdu	15,15,15
1313*3a608692SPiotr Kubaj	std	22,8*1(5)
1314*3a608692SPiotr Kubaj	ld	22,8*12(1)
1315*3a608692SPiotr Kubaj	ld	19,8*6(5)
1316*3a608692SPiotr Kubaj	adde	24,24,11
1317*3a608692SPiotr Kubaj	add	26,18,18
1318*3a608692SPiotr Kubaj	srdi	18,18,64-1
1319*3a608692SPiotr Kubaj	or	25,25,20
1320*3a608692SPiotr Kubaj	ld	20,8*7(5)
1321*3a608692SPiotr Kubaj	adde	25,25,12
1322*3a608692SPiotr Kubaj	add	27,19,19
1323*3a608692SPiotr Kubaj	srdi	19,19,64-1
1324*3a608692SPiotr Kubaj	or	26,26,21
1325*3a608692SPiotr Kubaj	mulld	16,17,17
1326*3a608692SPiotr Kubaj	mulhdu	17,17,17
1327*3a608692SPiotr Kubaj	adde	26,26,14
1328*3a608692SPiotr Kubaj	add	28,20,20
1329*3a608692SPiotr Kubaj	srdi	20,20,64-1
1330*3a608692SPiotr Kubaj	or	27,27,18
1331*3a608692SPiotr Kubaj	std	23,8*2(5)
1332*3a608692SPiotr Kubaj	ld	23,8*13(1)
1333*3a608692SPiotr Kubaj	adde	27,27,15
1334*3a608692SPiotr Kubaj	or	28,28,19
1335*3a608692SPiotr Kubaj	ld	9,8*1(4)
1336*3a608692SPiotr Kubaj	ld	10,8*2(4)
1337*3a608692SPiotr Kubaj	adde	28,28,16
1338*3a608692SPiotr Kubaj	ld	11,8*3(4)
1339*3a608692SPiotr Kubaj	ld	12,8*4(4)
1340*3a608692SPiotr Kubaj	adde	29,17,20
1341*3a608692SPiotr Kubaj	ld	14,8*5(4)
1342*3a608692SPiotr Kubaj	ld	15,8*6(4)
1343*3a608692SPiotr Kubaj
1344*3a608692SPiotr Kubaj
1345*3a608692SPiotr Kubaj
1346*3a608692SPiotr Kubaj	mulld	31,7,22
1347*3a608692SPiotr Kubaj	li	30,8
1348*3a608692SPiotr Kubaj	ld	16,8*7(4)
1349*3a608692SPiotr Kubaj	add	6,4,8
1350*3a608692SPiotr Kubaj	ldu	17,8*8(4)
1351*3a608692SPiotr Kubaj	std	24,8*3(5)
1352*3a608692SPiotr Kubaj	ld	24,8*14(1)
1353*3a608692SPiotr Kubaj	std	25,8*4(5)
1354*3a608692SPiotr Kubaj	ld	25,8*15(1)
1355*3a608692SPiotr Kubaj	std	26,8*5(5)
1356*3a608692SPiotr Kubaj	ld	26,8*16(1)
1357*3a608692SPiotr Kubaj	std	27,8*6(5)
1358*3a608692SPiotr Kubaj	ld	27,8*17(1)
1359*3a608692SPiotr Kubaj	std	28,8*7(5)
1360*3a608692SPiotr Kubaj	ld	28,8*18(1)
1361*3a608692SPiotr Kubaj	std	29,8*8(5)
1362*3a608692SPiotr Kubaj	ld	29,8*19(1)
1363*3a608692SPiotr Kubaj	addi	5,1,8*11
1364*3a608692SPiotr Kubaj	mtctr	30
1365*3a608692SPiotr Kubaj	b	.Lsqr8x_reduction
1366*3a608692SPiotr Kubaj
1367*3a608692SPiotr Kubaj.align	5
1368*3a608692SPiotr Kubaj.Lsqr8x_reduction:
1369*3a608692SPiotr Kubaj
1370*3a608692SPiotr Kubaj	mulld	19,10,31
1371*3a608692SPiotr Kubaj	mulld	20,11,31
1372*3a608692SPiotr Kubaj	stdu	31,8(5)
1373*3a608692SPiotr Kubaj	mulld	21,12,31
1374*3a608692SPiotr Kubaj
1375*3a608692SPiotr Kubaj	addic	22,22,-1
1376*3a608692SPiotr Kubaj	mulld	18,14,31
1377*3a608692SPiotr Kubaj	adde	22,23,19
1378*3a608692SPiotr Kubaj	mulld	19,15,31
1379*3a608692SPiotr Kubaj	adde	23,24,20
1380*3a608692SPiotr Kubaj	mulld	20,16,31
1381*3a608692SPiotr Kubaj	adde	24,25,21
1382*3a608692SPiotr Kubaj	mulld	21,17,31
1383*3a608692SPiotr Kubaj	adde	25,26,18
1384*3a608692SPiotr Kubaj	mulhdu	18,9,31
1385*3a608692SPiotr Kubaj	adde	26,27,19
1386*3a608692SPiotr Kubaj	mulhdu	19,10,31
1387*3a608692SPiotr Kubaj	adde	27,28,20
1388*3a608692SPiotr Kubaj	mulhdu	20,11,31
1389*3a608692SPiotr Kubaj	adde	28,29,21
1390*3a608692SPiotr Kubaj	mulhdu	21,12,31
1391*3a608692SPiotr Kubaj	addze	29,0
1392*3a608692SPiotr Kubaj	addc	22,22,18
1393*3a608692SPiotr Kubaj	mulhdu	18,14,31
1394*3a608692SPiotr Kubaj	adde	23,23,19
1395*3a608692SPiotr Kubaj	mulhdu	19,15,31
1396*3a608692SPiotr Kubaj	adde	24,24,20
1397*3a608692SPiotr Kubaj	mulhdu	20,16,31
1398*3a608692SPiotr Kubaj	adde	25,25,21
1399*3a608692SPiotr Kubaj	mulhdu	21,17,31
1400*3a608692SPiotr Kubaj	mulld	31,7,22
1401*3a608692SPiotr Kubaj	adde	26,26,18
1402*3a608692SPiotr Kubaj	adde	27,27,19
1403*3a608692SPiotr Kubaj	adde	28,28,20
1404*3a608692SPiotr Kubaj	adde	29,29,21
1405*3a608692SPiotr Kubaj	bdnz	.Lsqr8x_reduction
1406*3a608692SPiotr Kubaj
1407*3a608692SPiotr Kubaj	ld	18,8*1(5)
1408*3a608692SPiotr Kubaj	ld	19,8*2(5)
1409*3a608692SPiotr Kubaj	ld	20,8*3(5)
1410*3a608692SPiotr Kubaj	ld	21,8*4(5)
1411*3a608692SPiotr Kubaj	subi	3,5,8*7
1412*3a608692SPiotr Kubaj	cmpld	6,4
1413*3a608692SPiotr Kubaj	addc	22,22,18
1414*3a608692SPiotr Kubaj	ld	18,8*5(5)
1415*3a608692SPiotr Kubaj	adde	23,23,19
1416*3a608692SPiotr Kubaj	ld	19,8*6(5)
1417*3a608692SPiotr Kubaj	adde	24,24,20
1418*3a608692SPiotr Kubaj	ld	20,8*7(5)
1419*3a608692SPiotr Kubaj	adde	25,25,21
1420*3a608692SPiotr Kubaj	ld	21,8*8(5)
1421*3a608692SPiotr Kubaj	adde	26,26,18
1422*3a608692SPiotr Kubaj	adde	27,27,19
1423*3a608692SPiotr Kubaj	adde	28,28,20
1424*3a608692SPiotr Kubaj	adde	29,29,21
1425*3a608692SPiotr Kubaj
1426*3a608692SPiotr Kubaj	beq	.Lsqr8x8_post_condition
1427*3a608692SPiotr Kubaj
1428*3a608692SPiotr Kubaj	ld	7,8*0(3)
1429*3a608692SPiotr Kubaj	ld	9,8*1(4)
1430*3a608692SPiotr Kubaj	ld	10,8*2(4)
1431*3a608692SPiotr Kubaj	ld	11,8*3(4)
1432*3a608692SPiotr Kubaj	ld	12,8*4(4)
1433*3a608692SPiotr Kubaj	ld	14,8*5(4)
1434*3a608692SPiotr Kubaj	ld	15,8*6(4)
1435*3a608692SPiotr Kubaj	ld	16,8*7(4)
1436*3a608692SPiotr Kubaj	ldu	17,8*8(4)
1437*3a608692SPiotr Kubaj	li	30,0
1438*3a608692SPiotr Kubaj
1439*3a608692SPiotr Kubaj.align	5
1440*3a608692SPiotr Kubaj.Lsqr8x_tail:
1441*3a608692SPiotr Kubaj	mulld	18,9,7
1442*3a608692SPiotr Kubaj	addze	31,0
1443*3a608692SPiotr Kubaj	mulld	19,10,7
1444*3a608692SPiotr Kubaj	addi	30,30,8
1445*3a608692SPiotr Kubaj	mulld	20,11,7
1446*3a608692SPiotr Kubaj	andi.	30,30,8*8-1
1447*3a608692SPiotr Kubaj	mulld	21,12,7
1448*3a608692SPiotr Kubaj	addc	22,22,18
1449*3a608692SPiotr Kubaj	mulld	18,14,7
1450*3a608692SPiotr Kubaj	adde	23,23,19
1451*3a608692SPiotr Kubaj	mulld	19,15,7
1452*3a608692SPiotr Kubaj	adde	24,24,20
1453*3a608692SPiotr Kubaj	mulld	20,16,7
1454*3a608692SPiotr Kubaj	adde	25,25,21
1455*3a608692SPiotr Kubaj	mulld	21,17,7
1456*3a608692SPiotr Kubaj	adde	26,26,18
1457*3a608692SPiotr Kubaj	mulhdu	18,9,7
1458*3a608692SPiotr Kubaj	adde	27,27,19
1459*3a608692SPiotr Kubaj	mulhdu	19,10,7
1460*3a608692SPiotr Kubaj	adde	28,28,20
1461*3a608692SPiotr Kubaj	mulhdu	20,11,7
1462*3a608692SPiotr Kubaj	adde	29,29,21
1463*3a608692SPiotr Kubaj	mulhdu	21,12,7
1464*3a608692SPiotr Kubaj	addze	31,31
1465*3a608692SPiotr Kubaj	stdu	22,8(5)
1466*3a608692SPiotr Kubaj	addc	22,23,18
1467*3a608692SPiotr Kubaj	mulhdu	18,14,7
1468*3a608692SPiotr Kubaj	adde	23,24,19
1469*3a608692SPiotr Kubaj	mulhdu	19,15,7
1470*3a608692SPiotr Kubaj	adde	24,25,20
1471*3a608692SPiotr Kubaj	mulhdu	20,16,7
1472*3a608692SPiotr Kubaj	adde	25,26,21
1473*3a608692SPiotr Kubaj	mulhdu	21,17,7
1474*3a608692SPiotr Kubaj	ldx	7,3,30
1475*3a608692SPiotr Kubaj	adde	26,27,18
1476*3a608692SPiotr Kubaj	adde	27,28,19
1477*3a608692SPiotr Kubaj	adde	28,29,20
1478*3a608692SPiotr Kubaj	adde	29,31,21
1479*3a608692SPiotr Kubaj
1480*3a608692SPiotr Kubaj	bne	.Lsqr8x_tail
1481*3a608692SPiotr Kubaj
1482*3a608692SPiotr Kubaj
1483*3a608692SPiotr Kubaj	ld	9,8*1(5)
1484*3a608692SPiotr Kubaj	ld	31,8*10(1)
1485*3a608692SPiotr Kubaj	cmpld	6,4
1486*3a608692SPiotr Kubaj	ld	10,8*2(5)
1487*3a608692SPiotr Kubaj	sub	20,6,8
1488*3a608692SPiotr Kubaj	ld	11,8*3(5)
1489*3a608692SPiotr Kubaj	ld	12,8*4(5)
1490*3a608692SPiotr Kubaj	ld	14,8*5(5)
1491*3a608692SPiotr Kubaj	ld	15,8*6(5)
1492*3a608692SPiotr Kubaj	ld	16,8*7(5)
1493*3a608692SPiotr Kubaj	ld	17,8*8(5)
1494*3a608692SPiotr Kubaj	beq	.Lsqr8x_tail_break
1495*3a608692SPiotr Kubaj
1496*3a608692SPiotr Kubaj	addc	22,22,9
1497*3a608692SPiotr Kubaj	ld	9,8*1(4)
1498*3a608692SPiotr Kubaj	adde	23,23,10
1499*3a608692SPiotr Kubaj	ld	10,8*2(4)
1500*3a608692SPiotr Kubaj	adde	24,24,11
1501*3a608692SPiotr Kubaj	ld	11,8*3(4)
1502*3a608692SPiotr Kubaj	adde	25,25,12
1503*3a608692SPiotr Kubaj	ld	12,8*4(4)
1504*3a608692SPiotr Kubaj	adde	26,26,14
1505*3a608692SPiotr Kubaj	ld	14,8*5(4)
1506*3a608692SPiotr Kubaj	adde	27,27,15
1507*3a608692SPiotr Kubaj	ld	15,8*6(4)
1508*3a608692SPiotr Kubaj	adde	28,28,16
1509*3a608692SPiotr Kubaj	ld	16,8*7(4)
1510*3a608692SPiotr Kubaj	adde	29,29,17
1511*3a608692SPiotr Kubaj	ldu	17,8*8(4)
1512*3a608692SPiotr Kubaj
1513*3a608692SPiotr Kubaj	b	.Lsqr8x_tail
1514*3a608692SPiotr Kubaj
1515*3a608692SPiotr Kubaj.align	5
1516*3a608692SPiotr Kubaj.Lsqr8x_tail_break:
1517*3a608692SPiotr Kubaj	ld	7,8*8(1)
1518*3a608692SPiotr Kubaj	ld	21,8*9(1)
1519*3a608692SPiotr Kubaj	addi	30,5,8*8
1520*3a608692SPiotr Kubaj
1521*3a608692SPiotr Kubaj	addic	31,31,-1
1522*3a608692SPiotr Kubaj	adde	18,22,9
1523*3a608692SPiotr Kubaj	ld	22,8*8(3)
1524*3a608692SPiotr Kubaj	ld	9,8*1(20)
1525*3a608692SPiotr Kubaj	adde	19,23,10
1526*3a608692SPiotr Kubaj	ld	23,8*9(3)
1527*3a608692SPiotr Kubaj	ld	10,8*2(20)
1528*3a608692SPiotr Kubaj	adde	24,24,11
1529*3a608692SPiotr Kubaj	ld	11,8*3(20)
1530*3a608692SPiotr Kubaj	adde	25,25,12
1531*3a608692SPiotr Kubaj	ld	12,8*4(20)
1532*3a608692SPiotr Kubaj	adde	26,26,14
1533*3a608692SPiotr Kubaj	ld	14,8*5(20)
1534*3a608692SPiotr Kubaj	adde	27,27,15
1535*3a608692SPiotr Kubaj	ld	15,8*6(20)
1536*3a608692SPiotr Kubaj	adde	28,28,16
1537*3a608692SPiotr Kubaj	ld	16,8*7(20)
1538*3a608692SPiotr Kubaj	adde	29,29,17
1539*3a608692SPiotr Kubaj	ld	17,8*8(20)
1540*3a608692SPiotr Kubaj	addi	4,20,8*8
1541*3a608692SPiotr Kubaj	addze	20,0
1542*3a608692SPiotr Kubaj	mulld	31,7,22
1543*3a608692SPiotr Kubaj	std	18,8*1(5)
1544*3a608692SPiotr Kubaj	cmpld	30,21
1545*3a608692SPiotr Kubaj	std	19,8*2(5)
1546*3a608692SPiotr Kubaj	li	30,8
1547*3a608692SPiotr Kubaj	std	24,8*3(5)
1548*3a608692SPiotr Kubaj	ld	24,8*10(3)
1549*3a608692SPiotr Kubaj	std	25,8*4(5)
1550*3a608692SPiotr Kubaj	ld	25,8*11(3)
1551*3a608692SPiotr Kubaj	std	26,8*5(5)
1552*3a608692SPiotr Kubaj	ld	26,8*12(3)
1553*3a608692SPiotr Kubaj	std	27,8*6(5)
1554*3a608692SPiotr Kubaj	ld	27,8*13(3)
1555*3a608692SPiotr Kubaj	std	28,8*7(5)
1556*3a608692SPiotr Kubaj	ld	28,8*14(3)
1557*3a608692SPiotr Kubaj	std	29,8*8(5)
1558*3a608692SPiotr Kubaj	ld	29,8*15(3)
1559*3a608692SPiotr Kubaj	std	20,8*10(1)
1560*3a608692SPiotr Kubaj	addi	5,3,8*7
1561*3a608692SPiotr Kubaj	mtctr	30
1562*3a608692SPiotr Kubaj	bne	.Lsqr8x_reduction
1563*3a608692SPiotr Kubaj
1564*3a608692SPiotr Kubaj
1565*3a608692SPiotr Kubaj
1566*3a608692SPiotr Kubaj
1567*3a608692SPiotr Kubaj
1568*3a608692SPiotr Kubaj
1569*3a608692SPiotr Kubaj	ld	3,8*6(1)
1570*3a608692SPiotr Kubaj	srwi	30,8,6
1571*3a608692SPiotr Kubaj	mr	7,5
1572*3a608692SPiotr Kubaj	addi	5,5,8*8
1573*3a608692SPiotr Kubaj	subi	30,30,1
1574*3a608692SPiotr Kubaj	subfc	18,9,22
1575*3a608692SPiotr Kubaj	subfe	19,10,23
1576*3a608692SPiotr Kubaj	mr	31,20
1577*3a608692SPiotr Kubaj	mr	6,3
1578*3a608692SPiotr Kubaj
1579*3a608692SPiotr Kubaj	mtctr	30
1580*3a608692SPiotr Kubaj	b	.Lsqr8x_sub
1581*3a608692SPiotr Kubaj
1582*3a608692SPiotr Kubaj.align	5
1583*3a608692SPiotr Kubaj.Lsqr8x_sub:
1584*3a608692SPiotr Kubaj	ld	9,8*1(4)
1585*3a608692SPiotr Kubaj	ld	22,8*1(5)
1586*3a608692SPiotr Kubaj	ld	10,8*2(4)
1587*3a608692SPiotr Kubaj	ld	23,8*2(5)
1588*3a608692SPiotr Kubaj	subfe	20,11,24
1589*3a608692SPiotr Kubaj	ld	11,8*3(4)
1590*3a608692SPiotr Kubaj	ld	24,8*3(5)
1591*3a608692SPiotr Kubaj	subfe	21,12,25
1592*3a608692SPiotr Kubaj	ld	12,8*4(4)
1593*3a608692SPiotr Kubaj	ld	25,8*4(5)
1594*3a608692SPiotr Kubaj	std	18,8*1(3)
1595*3a608692SPiotr Kubaj	subfe	18,14,26
1596*3a608692SPiotr Kubaj	ld	14,8*5(4)
1597*3a608692SPiotr Kubaj	ld	26,8*5(5)
1598*3a608692SPiotr Kubaj	std	19,8*2(3)
1599*3a608692SPiotr Kubaj	subfe	19,15,27
1600*3a608692SPiotr Kubaj	ld	15,8*6(4)
1601*3a608692SPiotr Kubaj	ld	27,8*6(5)
1602*3a608692SPiotr Kubaj	std	20,8*3(3)
1603*3a608692SPiotr Kubaj	subfe	20,16,28
1604*3a608692SPiotr Kubaj	ld	16,8*7(4)
1605*3a608692SPiotr Kubaj	ld	28,8*7(5)
1606*3a608692SPiotr Kubaj	std	21,8*4(3)
1607*3a608692SPiotr Kubaj	subfe	21,17,29
1608*3a608692SPiotr Kubaj	ldu	17,8*8(4)
1609*3a608692SPiotr Kubaj	ldu	29,8*8(5)
1610*3a608692SPiotr Kubaj	std	18,8*5(3)
1611*3a608692SPiotr Kubaj	subfe	18,9,22
1612*3a608692SPiotr Kubaj	std	19,8*6(3)
1613*3a608692SPiotr Kubaj	subfe	19,10,23
1614*3a608692SPiotr Kubaj	std	20,8*7(3)
1615*3a608692SPiotr Kubaj	stdu	21,8*8(3)
1616*3a608692SPiotr Kubaj	bdnz	.Lsqr8x_sub
1617*3a608692SPiotr Kubaj
1618*3a608692SPiotr Kubaj	srwi	30,8,5
1619*3a608692SPiotr Kubaj	ld	9,8*1(6)
1620*3a608692SPiotr Kubaj	ld	22,8*1(7)
1621*3a608692SPiotr Kubaj	subi	30,30,1
1622*3a608692SPiotr Kubaj	ld	10,8*2(6)
1623*3a608692SPiotr Kubaj	ld	23,8*2(7)
1624*3a608692SPiotr Kubaj	subfe	20,11,24
1625*3a608692SPiotr Kubaj	ld	11,8*3(6)
1626*3a608692SPiotr Kubaj	ld	24,8*3(7)
1627*3a608692SPiotr Kubaj	subfe	21,12,25
1628*3a608692SPiotr Kubaj	ld	12,8*4(6)
1629*3a608692SPiotr Kubaj	ldu	25,8*4(7)
1630*3a608692SPiotr Kubaj	std	18,8*1(3)
1631*3a608692SPiotr Kubaj	subfe	18,14,26
1632*3a608692SPiotr Kubaj	std	19,8*2(3)
1633*3a608692SPiotr Kubaj	subfe	19,15,27
1634*3a608692SPiotr Kubaj	std	20,8*3(3)
1635*3a608692SPiotr Kubaj	subfe	20,16,28
1636*3a608692SPiotr Kubaj	std	21,8*4(3)
1637*3a608692SPiotr Kubaj	subfe	21,17,29
1638*3a608692SPiotr Kubaj	std	18,8*5(3)
1639*3a608692SPiotr Kubaj	subfe	31,0,31
1640*3a608692SPiotr Kubaj	std	19,8*6(3)
1641*3a608692SPiotr Kubaj	std	20,8*7(3)
1642*3a608692SPiotr Kubaj	std	21,8*8(3)
1643*3a608692SPiotr Kubaj
1644*3a608692SPiotr Kubaj	addi	5,1,8*11
1645*3a608692SPiotr Kubaj	mtctr	30
1646*3a608692SPiotr Kubaj
1647*3a608692SPiotr Kubaj.Lsqr4x_cond_copy:
1648*3a608692SPiotr Kubaj	andc	9,9,31
1649*3a608692SPiotr Kubaj	std	0,-8*3(7)
1650*3a608692SPiotr Kubaj	and	22,22,31
1651*3a608692SPiotr Kubaj	std	0,-8*2(7)
1652*3a608692SPiotr Kubaj	andc	10,10,31
1653*3a608692SPiotr Kubaj	std	0,-8*1(7)
1654*3a608692SPiotr Kubaj	and	23,23,31
1655*3a608692SPiotr Kubaj	std	0,-8*0(7)
1656*3a608692SPiotr Kubaj	andc	11,11,31
1657*3a608692SPiotr Kubaj	std	0,8*1(5)
1658*3a608692SPiotr Kubaj	and	24,24,31
1659*3a608692SPiotr Kubaj	std	0,8*2(5)
1660*3a608692SPiotr Kubaj	andc	12,12,31
1661*3a608692SPiotr Kubaj	std	0,8*3(5)
1662*3a608692SPiotr Kubaj	and	25,25,31
1663*3a608692SPiotr Kubaj	stdu	0,8*4(5)
1664*3a608692SPiotr Kubaj	or	18,9,22
1665*3a608692SPiotr Kubaj	ld	9,8*5(6)
1666*3a608692SPiotr Kubaj	ld	22,8*1(7)
1667*3a608692SPiotr Kubaj	or	19,10,23
1668*3a608692SPiotr Kubaj	ld	10,8*6(6)
1669*3a608692SPiotr Kubaj	ld	23,8*2(7)
1670*3a608692SPiotr Kubaj	or	20,11,24
1671*3a608692SPiotr Kubaj	ld	11,8*7(6)
1672*3a608692SPiotr Kubaj	ld	24,8*3(7)
1673*3a608692SPiotr Kubaj	or	21,12,25
1674*3a608692SPiotr Kubaj	ld	12,8*8(6)
1675*3a608692SPiotr Kubaj	ldu	25,8*4(7)
1676*3a608692SPiotr Kubaj	std	18,8*1(6)
1677*3a608692SPiotr Kubaj	std	19,8*2(6)
1678*3a608692SPiotr Kubaj	std	20,8*3(6)
1679*3a608692SPiotr Kubaj	stdu	21,8*4(6)
1680*3a608692SPiotr Kubaj	bdnz	.Lsqr4x_cond_copy
1681*3a608692SPiotr Kubaj
1682*3a608692SPiotr Kubaj	ld	4,0(1)
1683*3a608692SPiotr Kubaj	andc	9,9,31
1684*3a608692SPiotr Kubaj	and	22,22,31
1685*3a608692SPiotr Kubaj	andc	10,10,31
1686*3a608692SPiotr Kubaj	and	23,23,31
1687*3a608692SPiotr Kubaj	andc	11,11,31
1688*3a608692SPiotr Kubaj	and	24,24,31
1689*3a608692SPiotr Kubaj	andc	12,12,31
1690*3a608692SPiotr Kubaj	and	25,25,31
1691*3a608692SPiotr Kubaj	or	18,9,22
1692*3a608692SPiotr Kubaj	or	19,10,23
1693*3a608692SPiotr Kubaj	or	20,11,24
1694*3a608692SPiotr Kubaj	or	21,12,25
1695*3a608692SPiotr Kubaj	std	18,8*1(6)
1696*3a608692SPiotr Kubaj	std	19,8*2(6)
1697*3a608692SPiotr Kubaj	std	20,8*3(6)
1698*3a608692SPiotr Kubaj	std	21,8*4(6)
1699*3a608692SPiotr Kubaj
1700*3a608692SPiotr Kubaj	b	.Lsqr8x_done
1701*3a608692SPiotr Kubaj
1702*3a608692SPiotr Kubaj.align	5
1703*3a608692SPiotr Kubaj.Lsqr8x8_post_condition:
1704*3a608692SPiotr Kubaj	ld	3,8*6(1)
1705*3a608692SPiotr Kubaj	ld	4,0(1)
1706*3a608692SPiotr Kubaj	addze	31,0
1707*3a608692SPiotr Kubaj
1708*3a608692SPiotr Kubaj
1709*3a608692SPiotr Kubaj	subfc	22,9,22
1710*3a608692SPiotr Kubaj	subfe	23,10,23
1711*3a608692SPiotr Kubaj	std	0,8*12(1)
1712*3a608692SPiotr Kubaj	std	0,8*13(1)
1713*3a608692SPiotr Kubaj	subfe	24,11,24
1714*3a608692SPiotr Kubaj	std	0,8*14(1)
1715*3a608692SPiotr Kubaj	std	0,8*15(1)
1716*3a608692SPiotr Kubaj	subfe	25,12,25
1717*3a608692SPiotr Kubaj	std	0,8*16(1)
1718*3a608692SPiotr Kubaj	std	0,8*17(1)
1719*3a608692SPiotr Kubaj	subfe	26,14,26
1720*3a608692SPiotr Kubaj	std	0,8*18(1)
1721*3a608692SPiotr Kubaj	std	0,8*19(1)
1722*3a608692SPiotr Kubaj	subfe	27,15,27
1723*3a608692SPiotr Kubaj	std	0,8*20(1)
1724*3a608692SPiotr Kubaj	std	0,8*21(1)
1725*3a608692SPiotr Kubaj	subfe	28,16,28
1726*3a608692SPiotr Kubaj	std	0,8*22(1)
1727*3a608692SPiotr Kubaj	std	0,8*23(1)
1728*3a608692SPiotr Kubaj	subfe	29,17,29
1729*3a608692SPiotr Kubaj	std	0,8*24(1)
1730*3a608692SPiotr Kubaj	std	0,8*25(1)
1731*3a608692SPiotr Kubaj	subfe	31,0,31
1732*3a608692SPiotr Kubaj	std	0,8*26(1)
1733*3a608692SPiotr Kubaj	std	0,8*27(1)
1734*3a608692SPiotr Kubaj
1735*3a608692SPiotr Kubaj	and	9,9,31
1736*3a608692SPiotr Kubaj	and	10,10,31
1737*3a608692SPiotr Kubaj	addc	22,22,9
1738*3a608692SPiotr Kubaj	and	11,11,31
1739*3a608692SPiotr Kubaj	adde	23,23,10
1740*3a608692SPiotr Kubaj	and	12,12,31
1741*3a608692SPiotr Kubaj	adde	24,24,11
1742*3a608692SPiotr Kubaj	and	14,14,31
1743*3a608692SPiotr Kubaj	adde	25,25,12
1744*3a608692SPiotr Kubaj	and	15,15,31
1745*3a608692SPiotr Kubaj	adde	26,26,14
1746*3a608692SPiotr Kubaj	and	16,16,31
1747*3a608692SPiotr Kubaj	adde	27,27,15
1748*3a608692SPiotr Kubaj	and	17,17,31
1749*3a608692SPiotr Kubaj	adde	28,28,16
1750*3a608692SPiotr Kubaj	adde	29,29,17
1751*3a608692SPiotr Kubaj	std	22,8*1(3)
1752*3a608692SPiotr Kubaj	std	23,8*2(3)
1753*3a608692SPiotr Kubaj	std	24,8*3(3)
1754*3a608692SPiotr Kubaj	std	25,8*4(3)
1755*3a608692SPiotr Kubaj	std	26,8*5(3)
1756*3a608692SPiotr Kubaj	std	27,8*6(3)
1757*3a608692SPiotr Kubaj	std	28,8*7(3)
1758*3a608692SPiotr Kubaj	std	29,8*8(3)
1759*3a608692SPiotr Kubaj
1760*3a608692SPiotr Kubaj.Lsqr8x_done:
1761*3a608692SPiotr Kubaj	std	0,8*8(1)
1762*3a608692SPiotr Kubaj	std	0,8*10(1)
1763*3a608692SPiotr Kubaj
1764*3a608692SPiotr Kubaj	ld	14,-8*18(4)
1765*3a608692SPiotr Kubaj	li	3,1
1766*3a608692SPiotr Kubaj	ld	15,-8*17(4)
1767*3a608692SPiotr Kubaj	ld	16,-8*16(4)
1768*3a608692SPiotr Kubaj	ld	17,-8*15(4)
1769*3a608692SPiotr Kubaj	ld	18,-8*14(4)
1770*3a608692SPiotr Kubaj	ld	19,-8*13(4)
1771*3a608692SPiotr Kubaj	ld	20,-8*12(4)
1772*3a608692SPiotr Kubaj	ld	21,-8*11(4)
1773*3a608692SPiotr Kubaj	ld	22,-8*10(4)
1774*3a608692SPiotr Kubaj	ld	23,-8*9(4)
1775*3a608692SPiotr Kubaj	ld	24,-8*8(4)
1776*3a608692SPiotr Kubaj	ld	25,-8*7(4)
1777*3a608692SPiotr Kubaj	ld	26,-8*6(4)
1778*3a608692SPiotr Kubaj	ld	27,-8*5(4)
1779*3a608692SPiotr Kubaj	ld	28,-8*4(4)
1780*3a608692SPiotr Kubaj	ld	29,-8*3(4)
1781*3a608692SPiotr Kubaj	ld	30,-8*2(4)
1782*3a608692SPiotr Kubaj	ld	31,-8*1(4)
1783*3a608692SPiotr Kubaj	mr	1,4
1784*3a608692SPiotr Kubaj	blr
1785*3a608692SPiotr Kubaj.long	0
1786*3a608692SPiotr Kubaj.byte	0,12,4,0x20,0x80,18,6,0
1787*3a608692SPiotr Kubaj.long	0
1788*3a608692SPiotr Kubaj.size	__bn_sqr8x_mont,.-__bn_sqr8x_mont
1789*3a608692SPiotr Kubaj.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1790*3a608692SPiotr Kubaj.align	2
1791