xref: /freebsd/sys/crypto/openssl/powerpc64le/ppc-mont.S (revision 5ca8e32633c4ffbbcd6762e5888b6a4ba0708c6c)
1/* Do not modify. This file is auto-generated from ppc-mont.pl. */
2.machine	"any"
3.abiversion	2
4.text
5
6.globl	bn_mul_mont_int
7.type	bn_mul_mont_int,@function
8.align	5
9bn_mul_mont_int:
10.localentry	bn_mul_mont_int,0
11
12	mr	9,3
13	li	3,0
14	slwi	8,8,3
15	li	12,-4096
16	addi	3,8,352
17	subf	3,3,1
18	and	3,3,12
19	subf	3,1,3
20	mr	12,1
21	srwi	8,8,3
22	stdux	1,1,3
23
24	std	20,-96(12)
25	std	21,-88(12)
26	std	22,-80(12)
27	std	23,-72(12)
28	std	24,-64(12)
29	std	25,-56(12)
30	std	26,-48(12)
31	std	27,-40(12)
32	std	28,-32(12)
33	std	29,-24(12)
34	std	30,-16(12)
35	std	31,-8(12)
36
37	ld	7,0(7)
38	addi	8,8,-2
39
40	ld	23,0(5)
41	ld	10,0(4)
42	addi	22,1,64
43	mulld	25,10,23
44	mulhdu	26,10,23
45
46	ld	10,8(4)
47	ld	11,0(6)
48
49	mulld	24,25,7
50
51	mulld	29,10,23
52	mulhdu	30,10,23
53
54	mulld	27,11,24
55	mulhdu	28,11,24
56	ld	11,8(6)
57	addc	27,27,25
58	addze	28,28
59
60	mulld	31,11,24
61	mulhdu	0,11,24
62
63	mtctr	8
64	li	21,16
65.align	4
66.L1st:
67	ldx	10,4,21
68	addc	25,29,26
69	ldx	11,6,21
70	addze	26,30
71	mulld	29,10,23
72	addc	27,31,28
73	mulhdu	30,10,23
74	addze	28,0
75	mulld	31,11,24
76	addc	27,27,25
77	mulhdu	0,11,24
78	addze	28,28
79	std	27,0(22)
80
81	addi	21,21,8
82	addi	22,22,8
83	bdnz	.L1st
84
85	addc	25,29,26
86	addze	26,30
87
88	addc	27,31,28
89	addze	28,0
90	addc	27,27,25
91	addze	28,28
92	std	27,0(22)
93
94	li	3,0
95	addc	28,28,26
96	addze	3,3
97	std	28,8(22)
98
99	li	20,8
100.align	4
101.Louter:
102	ldx	23,5,20
103	ld	10,0(4)
104	addi	22,1,64
105	ld	12,64(1)
106	mulld	25,10,23
107	mulhdu	26,10,23
108	ld	10,8(4)
109	ld	11,0(6)
110	addc	25,25,12
111	mulld	29,10,23
112	addze	26,26
113	mulld	24,25,7
114	mulhdu	30,10,23
115	mulld	27,11,24
116	mulhdu	28,11,24
117	ld	11,8(6)
118	addc	27,27,25
119	mulld	31,11,24
120	addze	28,28
121	mulhdu	0,11,24
122
123	mtctr	8
124	li	21,16
125.align	4
126.Linner:
127	ldx	10,4,21
128	addc	25,29,26
129	ld	12,8(22)
130	addze	26,30
131	ldx	11,6,21
132	addc	27,31,28
133	mulld	29,10,23
134	addze	28,0
135	mulhdu	30,10,23
136	addc	25,25,12
137	mulld	31,11,24
138	addze	26,26
139	mulhdu	0,11,24
140	addc	27,27,25
141	addi	21,21,8
142	addze	28,28
143	std	27,0(22)
144	addi	22,22,8
145	bdnz	.Linner
146
147	ld	12,8(22)
148	addc	25,29,26
149	addze	26,30
150	addc	25,25,12
151	addze	26,26
152
153	addc	27,31,28
154	addze	28,0
155	addc	27,27,25
156	addze	28,28
157	std	27,0(22)
158
159	addic	3,3,-1
160	li	3,0
161	adde	28,28,26
162	addze	3,3
163	std	28,8(22)
164
165	slwi	12,8,3
166	cmpld	20,12
167	addi	20,20,8
168	ble	.Louter
169
170	addi	8,8,2
171	subfc	21,21,21
172	addi	22,1,64
173	mtctr	8
174
175.align	4
176.Lsub:	ldx	12,22,21
177	ldx	11,6,21
178	subfe	10,11,12
179	stdx	10,9,21
180	addi	21,21,8
181	bdnz	.Lsub
182
183	li	21,0
184	mtctr	8
185	subfe	3,21,3
186
187.align	4
188.Lcopy:
189	ldx	12,22,21
190	ldx	10,9,21
191	and	12,12,3
192	andc	10,10,3
193	stdx	21,22,21
194	or	10,10,12
195	stdx	10,9,21
196	addi	21,21,8
197	bdnz	.Lcopy
198
199	ld	12,0(1)
200	li	3,1
201	ld	20,-96(12)
202	ld	21,-88(12)
203	ld	22,-80(12)
204	ld	23,-72(12)
205	ld	24,-64(12)
206	ld	25,-56(12)
207	ld	26,-48(12)
208	ld	27,-40(12)
209	ld	28,-32(12)
210	ld	29,-24(12)
211	ld	30,-16(12)
212	ld	31,-8(12)
213	mr	1,12
214	blr
215.long	0
216.byte	0,12,4,0,0x80,12,6,0
217.long	0
218.size	bn_mul_mont_int,.-bn_mul_mont_int
219.globl	bn_mul4x_mont_int
220.type	bn_mul4x_mont_int,@function
221.align	5
222bn_mul4x_mont_int:
223.localentry	bn_mul4x_mont_int,0
224
225	andi.	0,8,7
226	bne	.Lmul4x_do
227	cmpld	4,5
228	bne	.Lmul4x_do
229	b	.Lsqr8x_do
230.Lmul4x_do:
231	slwi	8,8,3
232	mr	9,1
233	li	10,-32*8
234	sub	10,10,8
235	stdux	1,1,10
236
237	std	14,-8*18(9)
238	std	15,-8*17(9)
239	std	16,-8*16(9)
240	std	17,-8*15(9)
241	std	18,-8*14(9)
242	std	19,-8*13(9)
243	std	20,-8*12(9)
244	std	21,-8*11(9)
245	std	22,-8*10(9)
246	std	23,-8*9(9)
247	std	24,-8*8(9)
248	std	25,-8*7(9)
249	std	26,-8*6(9)
250	std	27,-8*5(9)
251	std	28,-8*4(9)
252	std	29,-8*3(9)
253	std	30,-8*2(9)
254	std	31,-8*1(9)
255
256	subi	4,4,8
257	subi	6,6,8
258	subi	3,3,8
259	ld	7,0(7)
260
261	add	14,5,8
262	add	30,4,8
263	subi	14,14,8*4
264
265	ld	27,8*0(5)
266	li	22,0
267	ld	9,8*1(4)
268	li	23,0
269	ld	10,8*2(4)
270	li	24,0
271	ld	11,8*3(4)
272	li	25,0
273	ldu	12,8*4(4)
274	ld	18,8*1(6)
275	ld	19,8*2(6)
276	ld	20,8*3(6)
277	ldu	21,8*4(6)
278
279	std	3,8*6(1)
280	std	14,8*7(1)
281	li	3,0
282	addic	29,1,8*7
283	li	31,0
284	li	0,0
285	b	.Loop_mul4x_1st_reduction
286
287.align	5
288.Loop_mul4x_1st_reduction:
289	mulld	14,9,27
290	addze	3,3
291	mulld	15,10,27
292	addi	31,31,8
293	mulld	16,11,27
294	andi.	31,31,8*4-1
295	mulld	17,12,27
296	addc	22,22,14
297	mulhdu	14,9,27
298	adde	23,23,15
299	mulhdu	15,10,27
300	adde	24,24,16
301	mulld	28,22,7
302	adde	25,25,17
303	mulhdu	16,11,27
304	addze	26,0
305	mulhdu	17,12,27
306	ldx	27,5,31
307	addc	23,23,14
308
309	stdu	28,8(29)
310	adde	24,24,15
311	mulld	15,19,28
312	adde	25,25,16
313	mulld	16,20,28
314	adde	26,26,17
315	mulld	17,21,28
316
317
318
319
320
321
322
323
324
325
326	addic	22,22,-1
327	mulhdu	14,18,28
328	adde	22,23,15
329	mulhdu	15,19,28
330	adde	23,24,16
331	mulhdu	16,20,28
332	adde	24,25,17
333	mulhdu	17,21,28
334	adde	25,26,3
335	addze	3,0
336	addc	22,22,14
337	adde	23,23,15
338	adde	24,24,16
339	adde	25,25,17
340
341	bne	.Loop_mul4x_1st_reduction
342
343	cmpld	30,4
344	beq	.Lmul4x4_post_condition
345
346	ld	9,8*1(4)
347	ld	10,8*2(4)
348	ld	11,8*3(4)
349	ldu	12,8*4(4)
350	ld	28,8*8(1)
351	ld	18,8*1(6)
352	ld	19,8*2(6)
353	ld	20,8*3(6)
354	ldu	21,8*4(6)
355	b	.Loop_mul4x_1st_tail
356
357.align	5
358.Loop_mul4x_1st_tail:
359	mulld	14,9,27
360	addze	3,3
361	mulld	15,10,27
362	addi	31,31,8
363	mulld	16,11,27
364	andi.	31,31,8*4-1
365	mulld	17,12,27
366	addc	22,22,14
367	mulhdu	14,9,27
368	adde	23,23,15
369	mulhdu	15,10,27
370	adde	24,24,16
371	mulhdu	16,11,27
372	adde	25,25,17
373	mulhdu	17,12,27
374	addze	26,0
375	ldx	27,5,31
376	addc	23,23,14
377	mulld	14,18,28
378	adde	24,24,15
379	mulld	15,19,28
380	adde	25,25,16
381	mulld	16,20,28
382	adde	26,26,17
383	mulld	17,21,28
384	addc	22,22,14
385	mulhdu	14,18,28
386	adde	23,23,15
387	mulhdu	15,19,28
388	adde	24,24,16
389	mulhdu	16,20,28
390	adde	25,25,17
391	adde	26,26,3
392	mulhdu	17,21,28
393	addze	3,0
394	addi	28,1,8*8
395	ldx	28,28,31
396	stdu	22,8(29)
397	addc	22,23,14
398	adde	23,24,15
399	adde	24,25,16
400	adde	25,26,17
401
402	bne	.Loop_mul4x_1st_tail
403
404	sub	15,30,8
405	cmpld	30,4
406	beq	.Lmul4x_proceed
407
408	ld	9,8*1(4)
409	ld	10,8*2(4)
410	ld	11,8*3(4)
411	ldu	12,8*4(4)
412	ld	18,8*1(6)
413	ld	19,8*2(6)
414	ld	20,8*3(6)
415	ldu	21,8*4(6)
416	b	.Loop_mul4x_1st_tail
417
418.align	5
419.Lmul4x_proceed:
420	ldu	27,8*4(5)
421	addze	3,3
422	ld	9,8*1(15)
423	ld	10,8*2(15)
424	ld	11,8*3(15)
425	ld	12,8*4(15)
426	addi	4,15,8*4
427	sub	6,6,8
428
429	std	22,8*1(29)
430	std	23,8*2(29)
431	std	24,8*3(29)
432	std	25,8*4(29)
433	std	3,8*5(29)
434	ld	22,8*12(1)
435	ld	23,8*13(1)
436	ld	24,8*14(1)
437	ld	25,8*15(1)
438
439	ld	18,8*1(6)
440	ld	19,8*2(6)
441	ld	20,8*3(6)
442	ldu	21,8*4(6)
443	addic	29,1,8*7
444	li	3,0
445	b	.Loop_mul4x_reduction
446
447.align	5
448.Loop_mul4x_reduction:
449	mulld	14,9,27
450	addze	3,3
451	mulld	15,10,27
452	addi	31,31,8
453	mulld	16,11,27
454	andi.	31,31,8*4-1
455	mulld	17,12,27
456	addc	22,22,14
457	mulhdu	14,9,27
458	adde	23,23,15
459	mulhdu	15,10,27
460	adde	24,24,16
461	mulld	28,22,7
462	adde	25,25,17
463	mulhdu	16,11,27
464	addze	26,0
465	mulhdu	17,12,27
466	ldx	27,5,31
467	addc	23,23,14
468
469	stdu	28,8(29)
470	adde	24,24,15
471	mulld	15,19,28
472	adde	25,25,16
473	mulld	16,20,28
474	adde	26,26,17
475	mulld	17,21,28
476
477	addic	22,22,-1
478	mulhdu	14,18,28
479	adde	22,23,15
480	mulhdu	15,19,28
481	adde	23,24,16
482	mulhdu	16,20,28
483	adde	24,25,17
484	mulhdu	17,21,28
485	adde	25,26,3
486	addze	3,0
487	addc	22,22,14
488	adde	23,23,15
489	adde	24,24,16
490	adde	25,25,17
491
492	bne	.Loop_mul4x_reduction
493
494	ld	14,8*5(29)
495	addze	3,3
496	ld	15,8*6(29)
497	ld	16,8*7(29)
498	ld	17,8*8(29)
499	ld	9,8*1(4)
500	ld	10,8*2(4)
501	ld	11,8*3(4)
502	ldu	12,8*4(4)
503	addc	22,22,14
504	adde	23,23,15
505	adde	24,24,16
506	adde	25,25,17
507
508
509	ld	28,8*8(1)
510	ld	18,8*1(6)
511	ld	19,8*2(6)
512	ld	20,8*3(6)
513	ldu	21,8*4(6)
514	b	.Loop_mul4x_tail
515
516.align	5
517.Loop_mul4x_tail:
518	mulld	14,9,27
519	addze	3,3
520	mulld	15,10,27
521	addi	31,31,8
522	mulld	16,11,27
523	andi.	31,31,8*4-1
524	mulld	17,12,27
525	addc	22,22,14
526	mulhdu	14,9,27
527	adde	23,23,15
528	mulhdu	15,10,27
529	adde	24,24,16
530	mulhdu	16,11,27
531	adde	25,25,17
532	mulhdu	17,12,27
533	addze	26,0
534	ldx	27,5,31
535	addc	23,23,14
536	mulld	14,18,28
537	adde	24,24,15
538	mulld	15,19,28
539	adde	25,25,16
540	mulld	16,20,28
541	adde	26,26,17
542	mulld	17,21,28
543	addc	22,22,14
544	mulhdu	14,18,28
545	adde	23,23,15
546	mulhdu	15,19,28
547	adde	24,24,16
548	mulhdu	16,20,28
549	adde	25,25,17
550	mulhdu	17,21,28
551	adde	26,26,3
552	addi	28,1,8*8
553	ldx	28,28,31
554	addze	3,0
555	stdu	22,8(29)
556	addc	22,23,14
557	adde	23,24,15
558	adde	24,25,16
559	adde	25,26,17
560
561	bne	.Loop_mul4x_tail
562
563	ld	14,8*5(29)
564	sub	15,6,8
565	addze	3,3
566	cmpld	30,4
567	beq	.Loop_mul4x_break
568
569	ld	15,8*6(29)
570	ld	16,8*7(29)
571	ld	17,8*8(29)
572	ld	9,8*1(4)
573	ld	10,8*2(4)
574	ld	11,8*3(4)
575	ldu	12,8*4(4)
576	addc	22,22,14
577	adde	23,23,15
578	adde	24,24,16
579	adde	25,25,17
580
581
582	ld	18,8*1(6)
583	ld	19,8*2(6)
584	ld	20,8*3(6)
585	ldu	21,8*4(6)
586	b	.Loop_mul4x_tail
587
588.align	5
589.Loop_mul4x_break:
590	ld	16,8*6(1)
591	ld	17,8*7(1)
592	addc	9,22,14
593	ld	22,8*12(1)
594	addze	10,23
595	ld	23,8*13(1)
596	addze	11,24
597	ld	24,8*14(1)
598	addze	12,25
599	ld	25,8*15(1)
600	addze	3,3
601	std	9,8*1(29)
602	sub	4,30,8
603	std	10,8*2(29)
604	std	11,8*3(29)
605	std	12,8*4(29)
606	std	3,8*5(29)
607
608	ld	18,8*1(15)
609	ld	19,8*2(15)
610	ld	20,8*3(15)
611	ld	21,8*4(15)
612	addi	6,15,8*4
613	cmpld	5,17
614	beq	.Lmul4x_post
615
616	ldu	27,8*4(5)
617	ld	9,8*1(4)
618	ld	10,8*2(4)
619	ld	11,8*3(4)
620	ldu	12,8*4(4)
621	li	3,0
622	addic	29,1,8*7
623	b	.Loop_mul4x_reduction
624
625.align	5
626.Lmul4x_post:
627
628
629
630
631	srwi	31,8,5
632	mr	5,16
633	subi	31,31,1
634	mr	30,16
635	subfc	14,18,22
636	addi	29,1,8*15
637	subfe	15,19,23
638
639	mtctr	31
640.Lmul4x_sub:
641	ld	18,8*1(6)
642	ld	22,8*1(29)
643	subfe	16,20,24
644	ld	19,8*2(6)
645	ld	23,8*2(29)
646	subfe	17,21,25
647	ld	20,8*3(6)
648	ld	24,8*3(29)
649	ldu	21,8*4(6)
650	ldu	25,8*4(29)
651	std	14,8*1(5)
652	std	15,8*2(5)
653	subfe	14,18,22
654	std	16,8*3(5)
655	stdu	17,8*4(5)
656	subfe	15,19,23
657	bdnz	.Lmul4x_sub
658
659	ld	9,8*1(30)
660	std	14,8*1(5)
661	ld	14,8*12(1)
662	subfe	16,20,24
663	ld	10,8*2(30)
664	std	15,8*2(5)
665	ld	15,8*13(1)
666	subfe	17,21,25
667	subfe	3,0,3
668	addi	29,1,8*12
669	ld	11,8*3(30)
670	std	16,8*3(5)
671	ld	16,8*14(1)
672	ld	12,8*4(30)
673	std	17,8*4(5)
674	ld	17,8*15(1)
675
676	mtctr	31
677.Lmul4x_cond_copy:
678	and	14,14,3
679	andc	9,9,3
680	std	0,8*0(29)
681	and	15,15,3
682	andc	10,10,3
683	std	0,8*1(29)
684	and	16,16,3
685	andc	11,11,3
686	std	0,8*2(29)
687	and	17,17,3
688	andc	12,12,3
689	std	0,8*3(29)
690	or	22,14,9
691	ld	9,8*5(30)
692	ld	14,8*4(29)
693	or	23,15,10
694	ld	10,8*6(30)
695	ld	15,8*5(29)
696	or	24,16,11
697	ld	11,8*7(30)
698	ld	16,8*6(29)
699	or	25,17,12
700	ld	12,8*8(30)
701	ld	17,8*7(29)
702	addi	29,29,8*4
703	std	22,8*1(30)
704	std	23,8*2(30)
705	std	24,8*3(30)
706	stdu	25,8*4(30)
707	bdnz	.Lmul4x_cond_copy
708
709	ld	5,0(1)
710	and	14,14,3
711	andc	9,9,3
712	std	0,8*0(29)
713	and	15,15,3
714	andc	10,10,3
715	std	0,8*1(29)
716	and	16,16,3
717	andc	11,11,3
718	std	0,8*2(29)
719	and	17,17,3
720	andc	12,12,3
721	std	0,8*3(29)
722	or	22,14,9
723	or	23,15,10
724	std	0,8*4(29)
725	or	24,16,11
726	or	25,17,12
727	std	22,8*1(30)
728	std	23,8*2(30)
729	std	24,8*3(30)
730	std	25,8*4(30)
731
732	b	.Lmul4x_done
733
734.align	4
735.Lmul4x4_post_condition:
736	ld	4,8*6(1)
737	ld	5,0(1)
738	addze	3,3
739
740	subfc	9,18,22
741	subfe	10,19,23
742	subfe	11,20,24
743	subfe	12,21,25
744	subfe	3,0,3
745
746	and	18,18,3
747	and	19,19,3
748	addc	9,9,18
749	and	20,20,3
750	adde	10,10,19
751	and	21,21,3
752	adde	11,11,20
753	adde	12,12,21
754
755	std	9,8*1(4)
756	std	10,8*2(4)
757	std	11,8*3(4)
758	std	12,8*4(4)
759
760.Lmul4x_done:
761	std	0,8*8(1)
762	std	0,8*9(1)
763	std	0,8*10(1)
764	std	0,8*11(1)
765	li	3,1
766	ld	14,-8*18(5)
767	ld	15,-8*17(5)
768	ld	16,-8*16(5)
769	ld	17,-8*15(5)
770	ld	18,-8*14(5)
771	ld	19,-8*13(5)
772	ld	20,-8*12(5)
773	ld	21,-8*11(5)
774	ld	22,-8*10(5)
775	ld	23,-8*9(5)
776	ld	24,-8*8(5)
777	ld	25,-8*7(5)
778	ld	26,-8*6(5)
779	ld	27,-8*5(5)
780	ld	28,-8*4(5)
781	ld	29,-8*3(5)
782	ld	30,-8*2(5)
783	ld	31,-8*1(5)
784	mr	1,5
785	blr
786.long	0
787.byte	0,12,4,0x20,0x80,18,6,0
788.long	0
789.size	bn_mul4x_mont_int,.-bn_mul4x_mont_int
790.align	5
791__bn_sqr8x_mont:
792.Lsqr8x_do:
793	mr	9,1
794	slwi	10,8,4
795	li	11,-32*8
796	sub	10,11,10
797	slwi	8,8,3
798	stdux	1,1,10
799
800	std	14,-8*18(9)
801	std	15,-8*17(9)
802	std	16,-8*16(9)
803	std	17,-8*15(9)
804	std	18,-8*14(9)
805	std	19,-8*13(9)
806	std	20,-8*12(9)
807	std	21,-8*11(9)
808	std	22,-8*10(9)
809	std	23,-8*9(9)
810	std	24,-8*8(9)
811	std	25,-8*7(9)
812	std	26,-8*6(9)
813	std	27,-8*5(9)
814	std	28,-8*4(9)
815	std	29,-8*3(9)
816	std	30,-8*2(9)
817	std	31,-8*1(9)
818
819	subi	4,4,8
820	subi	18,6,8
821	subi	3,3,8
822	ld	7,0(7)
823	li	0,0
824
825	add	6,4,8
826	ld	9,8*1(4)
827
828	ld	10,8*2(4)
829	li	23,0
830	ld	11,8*3(4)
831	li	24,0
832	ld	12,8*4(4)
833	li	25,0
834	ld	14,8*5(4)
835	li	26,0
836	ld	15,8*6(4)
837	li	27,0
838	ld	16,8*7(4)
839	li	28,0
840	ldu	17,8*8(4)
841	li	29,0
842
843	addi	5,1,8*11
844	subic.	30,8,8*8
845	b	.Lsqr8x_zero_start
846
847.align	5
848.Lsqr8x_zero:
849	subic.	30,30,8*8
850	std	0,8*1(5)
851	std	0,8*2(5)
852	std	0,8*3(5)
853	std	0,8*4(5)
854	std	0,8*5(5)
855	std	0,8*6(5)
856	std	0,8*7(5)
857	std	0,8*8(5)
858.Lsqr8x_zero_start:
859	std	0,8*9(5)
860	std	0,8*10(5)
861	std	0,8*11(5)
862	std	0,8*12(5)
863	std	0,8*13(5)
864	std	0,8*14(5)
865	std	0,8*15(5)
866	stdu	0,8*16(5)
867	bne	.Lsqr8x_zero
868
869	std	3,8*6(1)
870	std	18,8*7(1)
871	std	7,8*8(1)
872	std	5,8*9(1)
873	std	0,8*10(1)
874	addi	5,1,8*11
875
876
877.align	5
878.Lsqr8x_outer_loop:
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908	mulld	18,10,9
909	mulld	19,11,9
910	mulld	20,12,9
911	mulld	21,14,9
912	addc	23,23,18
913	mulld	18,15,9
914	adde	24,24,19
915	mulld	19,16,9
916	adde	25,25,20
917	mulld	20,17,9
918	adde	26,26,21
919	mulhdu	21,10,9
920	adde	27,27,18
921	mulhdu	18,11,9
922	adde	28,28,19
923	mulhdu	19,12,9
924	adde	29,29,20
925	mulhdu	20,14,9
926	std	22,8*1(5)
927	addze	22,0
928	std	23,8*2(5)
929	addc	24,24,21
930	mulhdu	21,15,9
931	adde	25,25,18
932	mulhdu	18,16,9
933	adde	26,26,19
934	mulhdu	19,17,9
935	adde	27,27,20
936	mulld	20,11,10
937	adde	28,28,21
938	mulld	21,12,10
939	adde	29,29,18
940	mulld	18,14,10
941	adde	22,22,19
942
943	mulld	19,15,10
944	addc	25,25,20
945	mulld	20,16,10
946	adde	26,26,21
947	mulld	21,17,10
948	adde	27,27,18
949	mulhdu	18,11,10
950	adde	28,28,19
951	mulhdu	19,12,10
952	adde	29,29,20
953	mulhdu	20,14,10
954	adde	22,22,21
955	mulhdu	21,15,10
956	std	24,8*3(5)
957	addze	23,0
958	std	25,8*4(5)
959	addc	26,26,18
960	mulhdu	18,16,10
961	adde	27,27,19
962	mulhdu	19,17,10
963	adde	28,28,20
964	mulld	20,12,11
965	adde	29,29,21
966	mulld	21,14,11
967	adde	22,22,18
968	mulld	18,15,11
969	adde	23,23,19
970
971	mulld	19,16,11
972	addc	27,27,20
973	mulld	20,17,11
974	adde	28,28,21
975	mulhdu	21,12,11
976	adde	29,29,18
977	mulhdu	18,14,11
978	adde	22,22,19
979	mulhdu	19,15,11
980	adde	23,23,20
981	mulhdu	20,16,11
982	std	26,8*5(5)
983	addze	24,0
984	std	27,8*6(5)
985	addc	28,28,21
986	mulhdu	21,17,11
987	adde	29,29,18
988	mulld	18,14,12
989	adde	22,22,19
990	mulld	19,15,12
991	adde	23,23,20
992	mulld	20,16,12
993	adde	24,24,21
994
995	mulld	21,17,12
996	addc	29,29,18
997	mulhdu	18,14,12
998	adde	22,22,19
999	mulhdu	19,15,12
1000	adde	23,23,20
1001	mulhdu	20,16,12
1002	adde	24,24,21
1003	mulhdu	21,17,12
1004	std	28,8*7(5)
1005	addze	25,0
1006	stdu	29,8*8(5)
1007	addc	22,22,18
1008	mulld	18,15,14
1009	adde	23,23,19
1010	mulld	19,16,14
1011	adde	24,24,20
1012	mulld	20,17,14
1013	adde	25,25,21
1014
1015	mulhdu	21,15,14
1016	addc	23,23,18
1017	mulhdu	18,16,14
1018	adde	24,24,19
1019	mulhdu	19,17,14
1020	adde	25,25,20
1021	mulld	20,16,15
1022	addze	26,0
1023	addc	24,24,21
1024	mulld	21,17,15
1025	adde	25,25,18
1026	mulhdu	18,16,15
1027	adde	26,26,19
1028
1029	mulhdu	19,17,15
1030	addc	25,25,20
1031	mulld	20,17,16
1032	adde	26,26,21
1033	mulhdu	21,17,16
1034	addze	27,0
1035	addc	26,26,18
1036	cmpld	6,4
1037	adde	27,27,19
1038
1039	addc	27,27,20
1040	sub	18,6,8
1041	addze	28,0
1042	add	28,28,21
1043
1044	beq	.Lsqr8x_outer_break
1045
1046	mr	7,9
1047	ld	9,8*1(5)
1048	ld	10,8*2(5)
1049	ld	11,8*3(5)
1050	ld	12,8*4(5)
1051	ld	14,8*5(5)
1052	ld	15,8*6(5)
1053	ld	16,8*7(5)
1054	ld	17,8*8(5)
1055	addc	22,22,9
1056	ld	9,8*1(4)
1057	adde	23,23,10
1058	ld	10,8*2(4)
1059	adde	24,24,11
1060	ld	11,8*3(4)
1061	adde	25,25,12
1062	ld	12,8*4(4)
1063	adde	26,26,14
1064	ld	14,8*5(4)
1065	adde	27,27,15
1066	ld	15,8*6(4)
1067	adde	28,28,16
1068	ld	16,8*7(4)
1069	subi	3,4,8*7
1070	addze	29,17
1071	ldu	17,8*8(4)
1072
1073	li	30,0
1074	b	.Lsqr8x_mul
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098.align	5
1099.Lsqr8x_mul:
1100	mulld	18,9,7
1101	addze	31,0
1102	mulld	19,10,7
1103	addi	30,30,8
1104	mulld	20,11,7
1105	andi.	30,30,8*8-1
1106	mulld	21,12,7
1107	addc	22,22,18
1108	mulld	18,14,7
1109	adde	23,23,19
1110	mulld	19,15,7
1111	adde	24,24,20
1112	mulld	20,16,7
1113	adde	25,25,21
1114	mulld	21,17,7
1115	adde	26,26,18
1116	mulhdu	18,9,7
1117	adde	27,27,19
1118	mulhdu	19,10,7
1119	adde	28,28,20
1120	mulhdu	20,11,7
1121	adde	29,29,21
1122	mulhdu	21,12,7
1123	addze	31,31
1124	stdu	22,8(5)
1125	addc	22,23,18
1126	mulhdu	18,14,7
1127	adde	23,24,19
1128	mulhdu	19,15,7
1129	adde	24,25,20
1130	mulhdu	20,16,7
1131	adde	25,26,21
1132	mulhdu	21,17,7
1133	ldx	7,3,30
1134	adde	26,27,18
1135	adde	27,28,19
1136	adde	28,29,20
1137	adde	29,31,21
1138
1139	bne	.Lsqr8x_mul
1140
1141
1142	cmpld	4,6
1143	beq	.Lsqr8x_break
1144
1145	ld	9,8*1(5)
1146	ld	10,8*2(5)
1147	ld	11,8*3(5)
1148	ld	12,8*4(5)
1149	ld	14,8*5(5)
1150	ld	15,8*6(5)
1151	ld	16,8*7(5)
1152	ld	17,8*8(5)
1153	addc	22,22,9
1154	ld	9,8*1(4)
1155	adde	23,23,10
1156	ld	10,8*2(4)
1157	adde	24,24,11
1158	ld	11,8*3(4)
1159	adde	25,25,12
1160	ld	12,8*4(4)
1161	adde	26,26,14
1162	ld	14,8*5(4)
1163	adde	27,27,15
1164	ld	15,8*6(4)
1165	adde	28,28,16
1166	ld	16,8*7(4)
1167	adde	29,29,17
1168	ldu	17,8*8(4)
1169
1170	b	.Lsqr8x_mul
1171
1172.align	5
1173.Lsqr8x_break:
1174	ld	9,8*8(3)
1175	addi	4,3,8*15
1176	ld	10,8*9(3)
1177	sub.	18,6,4
1178	ld	11,8*10(3)
1179	sub	19,5,18
1180	ld	12,8*11(3)
1181	ld	14,8*12(3)
1182	ld	15,8*13(3)
1183	ld	16,8*14(3)
1184	ld	17,8*15(3)
1185	beq	.Lsqr8x_outer_loop
1186
1187	std	22,8*1(5)
1188	ld	22,8*1(19)
1189	std	23,8*2(5)
1190	ld	23,8*2(19)
1191	std	24,8*3(5)
1192	ld	24,8*3(19)
1193	std	25,8*4(5)
1194	ld	25,8*4(19)
1195	std	26,8*5(5)
1196	ld	26,8*5(19)
1197	std	27,8*6(5)
1198	ld	27,8*6(19)
1199	std	28,8*7(5)
1200	ld	28,8*7(19)
1201	std	29,8*8(5)
1202	ld	29,8*8(19)
1203	mr	5,19
1204	b	.Lsqr8x_outer_loop
1205
1206.align	5
1207.Lsqr8x_outer_break:
1208
1209
1210	ld	10,8*1(18)
1211	ld	12,8*2(18)
1212	ld	15,8*3(18)
1213	ld	17,8*4(18)
1214	addi	4,18,8*4
1215
1216	ld	19,8*13(1)
1217	ld	20,8*14(1)
1218	ld	21,8*15(1)
1219	ld	18,8*16(1)
1220
1221	std	22,8*1(5)
1222	srwi	30,8,5
1223	std	23,8*2(5)
1224	subi	30,30,1
1225	std	24,8*3(5)
1226	std	25,8*4(5)
1227	std	26,8*5(5)
1228	std	27,8*6(5)
1229	std	28,8*7(5)
1230
1231	addi	5,1,8*11
1232	mulld	22,10,10
1233	mulhdu	10,10,10
1234	add	23,19,19
1235	srdi	19,19,64-1
1236	mulld	11,12,12
1237	mulhdu	12,12,12
1238	addc	23,23,10
1239	add	24,20,20
1240	srdi	20,20,64-1
1241	add	25,21,21
1242	srdi	21,21,64-1
1243	or	24,24,19
1244
1245	mtctr	30
1246.Lsqr4x_shift_n_add:
1247	mulld	14,15,15
1248	mulhdu	15,15,15
1249	ld	19,8*6(5)
1250	ld	10,8*1(4)
1251	adde	24,24,11
1252	add	26,18,18
1253	srdi	18,18,64-1
1254	or	25,25,20
1255	ld	20,8*7(5)
1256	adde	25,25,12
1257	ld	12,8*2(4)
1258	add	27,19,19
1259	srdi	19,19,64-1
1260	or	26,26,21
1261	ld	21,8*8(5)
1262	mulld	16,17,17
1263	mulhdu	17,17,17
1264	adde	26,26,14
1265	add	28,20,20
1266	srdi	20,20,64-1
1267	or	27,27,18
1268	ld	18,8*9(5)
1269	adde	27,27,15
1270	ld	15,8*3(4)
1271	add	29,21,21
1272	srdi	21,21,64-1
1273	or	28,28,19
1274	ld	19,8*10(5)
1275	mulld	9,10,10
1276	mulhdu	10,10,10
1277	adde	28,28,16
1278	std	22,8*1(5)
1279	add	22,18,18
1280	srdi	18,18,64-1
1281	or	29,29,20
1282	ld	20,8*11(5)
1283	adde	29,29,17
1284	ldu	17,8*4(4)
1285	std	23,8*2(5)
1286	add	23,19,19
1287	srdi	19,19,64-1
1288	or	22,22,21
1289	ld	21,8*12(5)
1290	mulld	11,12,12
1291	mulhdu	12,12,12
1292	adde	22,22,9
1293	std	24,8*3(5)
1294	add	24,20,20
1295	srdi	20,20,64-1
1296	or	23,23,18
1297	ld	18,8*13(5)
1298	adde	23,23,10
1299	std	25,8*4(5)
1300	std	26,8*5(5)
1301	std	27,8*6(5)
1302	std	28,8*7(5)
1303	stdu	29,8*8(5)
1304	add	25,21,21
1305	srdi	21,21,64-1
1306	or	24,24,19
1307	bdnz	.Lsqr4x_shift_n_add
1308	ld	4,8*7(1)
1309	ld	7,8*8(1)
1310
1311	mulld	14,15,15
1312	mulhdu	15,15,15
1313	std	22,8*1(5)
1314	ld	22,8*12(1)
1315	ld	19,8*6(5)
1316	adde	24,24,11
1317	add	26,18,18
1318	srdi	18,18,64-1
1319	or	25,25,20
1320	ld	20,8*7(5)
1321	adde	25,25,12
1322	add	27,19,19
1323	srdi	19,19,64-1
1324	or	26,26,21
1325	mulld	16,17,17
1326	mulhdu	17,17,17
1327	adde	26,26,14
1328	add	28,20,20
1329	srdi	20,20,64-1
1330	or	27,27,18
1331	std	23,8*2(5)
1332	ld	23,8*13(1)
1333	adde	27,27,15
1334	or	28,28,19
1335	ld	9,8*1(4)
1336	ld	10,8*2(4)
1337	adde	28,28,16
1338	ld	11,8*3(4)
1339	ld	12,8*4(4)
1340	adde	29,17,20
1341	ld	14,8*5(4)
1342	ld	15,8*6(4)
1343
1344
1345
1346	mulld	31,7,22
1347	li	30,8
1348	ld	16,8*7(4)
1349	add	6,4,8
1350	ldu	17,8*8(4)
1351	std	24,8*3(5)
1352	ld	24,8*14(1)
1353	std	25,8*4(5)
1354	ld	25,8*15(1)
1355	std	26,8*5(5)
1356	ld	26,8*16(1)
1357	std	27,8*6(5)
1358	ld	27,8*17(1)
1359	std	28,8*7(5)
1360	ld	28,8*18(1)
1361	std	29,8*8(5)
1362	ld	29,8*19(1)
1363	addi	5,1,8*11
1364	mtctr	30
1365	b	.Lsqr8x_reduction
1366
1367.align	5
1368.Lsqr8x_reduction:
1369
1370	mulld	19,10,31
1371	mulld	20,11,31
1372	stdu	31,8(5)
1373	mulld	21,12,31
1374
1375	addic	22,22,-1
1376	mulld	18,14,31
1377	adde	22,23,19
1378	mulld	19,15,31
1379	adde	23,24,20
1380	mulld	20,16,31
1381	adde	24,25,21
1382	mulld	21,17,31
1383	adde	25,26,18
1384	mulhdu	18,9,31
1385	adde	26,27,19
1386	mulhdu	19,10,31
1387	adde	27,28,20
1388	mulhdu	20,11,31
1389	adde	28,29,21
1390	mulhdu	21,12,31
1391	addze	29,0
1392	addc	22,22,18
1393	mulhdu	18,14,31
1394	adde	23,23,19
1395	mulhdu	19,15,31
1396	adde	24,24,20
1397	mulhdu	20,16,31
1398	adde	25,25,21
1399	mulhdu	21,17,31
1400	mulld	31,7,22
1401	adde	26,26,18
1402	adde	27,27,19
1403	adde	28,28,20
1404	adde	29,29,21
1405	bdnz	.Lsqr8x_reduction
1406
1407	ld	18,8*1(5)
1408	ld	19,8*2(5)
1409	ld	20,8*3(5)
1410	ld	21,8*4(5)
1411	subi	3,5,8*7
1412	cmpld	6,4
1413	addc	22,22,18
1414	ld	18,8*5(5)
1415	adde	23,23,19
1416	ld	19,8*6(5)
1417	adde	24,24,20
1418	ld	20,8*7(5)
1419	adde	25,25,21
1420	ld	21,8*8(5)
1421	adde	26,26,18
1422	adde	27,27,19
1423	adde	28,28,20
1424	adde	29,29,21
1425
1426	beq	.Lsqr8x8_post_condition
1427
1428	ld	7,8*0(3)
1429	ld	9,8*1(4)
1430	ld	10,8*2(4)
1431	ld	11,8*3(4)
1432	ld	12,8*4(4)
1433	ld	14,8*5(4)
1434	ld	15,8*6(4)
1435	ld	16,8*7(4)
1436	ldu	17,8*8(4)
1437	li	30,0
1438
1439.align	5
1440.Lsqr8x_tail:
1441	mulld	18,9,7
1442	addze	31,0
1443	mulld	19,10,7
1444	addi	30,30,8
1445	mulld	20,11,7
1446	andi.	30,30,8*8-1
1447	mulld	21,12,7
1448	addc	22,22,18
1449	mulld	18,14,7
1450	adde	23,23,19
1451	mulld	19,15,7
1452	adde	24,24,20
1453	mulld	20,16,7
1454	adde	25,25,21
1455	mulld	21,17,7
1456	adde	26,26,18
1457	mulhdu	18,9,7
1458	adde	27,27,19
1459	mulhdu	19,10,7
1460	adde	28,28,20
1461	mulhdu	20,11,7
1462	adde	29,29,21
1463	mulhdu	21,12,7
1464	addze	31,31
1465	stdu	22,8(5)
1466	addc	22,23,18
1467	mulhdu	18,14,7
1468	adde	23,24,19
1469	mulhdu	19,15,7
1470	adde	24,25,20
1471	mulhdu	20,16,7
1472	adde	25,26,21
1473	mulhdu	21,17,7
1474	ldx	7,3,30
1475	adde	26,27,18
1476	adde	27,28,19
1477	adde	28,29,20
1478	adde	29,31,21
1479
1480	bne	.Lsqr8x_tail
1481
1482
1483	ld	9,8*1(5)
1484	ld	31,8*10(1)
1485	cmpld	6,4
1486	ld	10,8*2(5)
1487	sub	20,6,8
1488	ld	11,8*3(5)
1489	ld	12,8*4(5)
1490	ld	14,8*5(5)
1491	ld	15,8*6(5)
1492	ld	16,8*7(5)
1493	ld	17,8*8(5)
1494	beq	.Lsqr8x_tail_break
1495
1496	addc	22,22,9
1497	ld	9,8*1(4)
1498	adde	23,23,10
1499	ld	10,8*2(4)
1500	adde	24,24,11
1501	ld	11,8*3(4)
1502	adde	25,25,12
1503	ld	12,8*4(4)
1504	adde	26,26,14
1505	ld	14,8*5(4)
1506	adde	27,27,15
1507	ld	15,8*6(4)
1508	adde	28,28,16
1509	ld	16,8*7(4)
1510	adde	29,29,17
1511	ldu	17,8*8(4)
1512
1513	b	.Lsqr8x_tail
1514
1515.align	5
1516.Lsqr8x_tail_break:
1517	ld	7,8*8(1)
1518	ld	21,8*9(1)
1519	addi	30,5,8*8
1520
1521	addic	31,31,-1
1522	adde	18,22,9
1523	ld	22,8*8(3)
1524	ld	9,8*1(20)
1525	adde	19,23,10
1526	ld	23,8*9(3)
1527	ld	10,8*2(20)
1528	adde	24,24,11
1529	ld	11,8*3(20)
1530	adde	25,25,12
1531	ld	12,8*4(20)
1532	adde	26,26,14
1533	ld	14,8*5(20)
1534	adde	27,27,15
1535	ld	15,8*6(20)
1536	adde	28,28,16
1537	ld	16,8*7(20)
1538	adde	29,29,17
1539	ld	17,8*8(20)
1540	addi	4,20,8*8
1541	addze	20,0
1542	mulld	31,7,22
1543	std	18,8*1(5)
1544	cmpld	30,21
1545	std	19,8*2(5)
1546	li	30,8
1547	std	24,8*3(5)
1548	ld	24,8*10(3)
1549	std	25,8*4(5)
1550	ld	25,8*11(3)
1551	std	26,8*5(5)
1552	ld	26,8*12(3)
1553	std	27,8*6(5)
1554	ld	27,8*13(3)
1555	std	28,8*7(5)
1556	ld	28,8*14(3)
1557	std	29,8*8(5)
1558	ld	29,8*15(3)
1559	std	20,8*10(1)
1560	addi	5,3,8*7
1561	mtctr	30
1562	bne	.Lsqr8x_reduction
1563
1564
1565
1566
1567
1568
1569	ld	3,8*6(1)
1570	srwi	30,8,6
1571	mr	7,5
1572	addi	5,5,8*8
1573	subi	30,30,1
1574	subfc	18,9,22
1575	subfe	19,10,23
1576	mr	31,20
1577	mr	6,3
1578
1579	mtctr	30
1580	b	.Lsqr8x_sub
1581
1582.align	5
1583.Lsqr8x_sub:
1584	ld	9,8*1(4)
1585	ld	22,8*1(5)
1586	ld	10,8*2(4)
1587	ld	23,8*2(5)
1588	subfe	20,11,24
1589	ld	11,8*3(4)
1590	ld	24,8*3(5)
1591	subfe	21,12,25
1592	ld	12,8*4(4)
1593	ld	25,8*4(5)
1594	std	18,8*1(3)
1595	subfe	18,14,26
1596	ld	14,8*5(4)
1597	ld	26,8*5(5)
1598	std	19,8*2(3)
1599	subfe	19,15,27
1600	ld	15,8*6(4)
1601	ld	27,8*6(5)
1602	std	20,8*3(3)
1603	subfe	20,16,28
1604	ld	16,8*7(4)
1605	ld	28,8*7(5)
1606	std	21,8*4(3)
1607	subfe	21,17,29
1608	ldu	17,8*8(4)
1609	ldu	29,8*8(5)
1610	std	18,8*5(3)
1611	subfe	18,9,22
1612	std	19,8*6(3)
1613	subfe	19,10,23
1614	std	20,8*7(3)
1615	stdu	21,8*8(3)
1616	bdnz	.Lsqr8x_sub
1617
1618	srwi	30,8,5
1619	ld	9,8*1(6)
1620	ld	22,8*1(7)
1621	subi	30,30,1
1622	ld	10,8*2(6)
1623	ld	23,8*2(7)
1624	subfe	20,11,24
1625	ld	11,8*3(6)
1626	ld	24,8*3(7)
1627	subfe	21,12,25
1628	ld	12,8*4(6)
1629	ldu	25,8*4(7)
1630	std	18,8*1(3)
1631	subfe	18,14,26
1632	std	19,8*2(3)
1633	subfe	19,15,27
1634	std	20,8*3(3)
1635	subfe	20,16,28
1636	std	21,8*4(3)
1637	subfe	21,17,29
1638	std	18,8*5(3)
1639	subfe	31,0,31
1640	std	19,8*6(3)
1641	std	20,8*7(3)
1642	std	21,8*8(3)
1643
1644	addi	5,1,8*11
1645	mtctr	30
1646
1647.Lsqr4x_cond_copy:
1648	andc	9,9,31
1649	std	0,-8*3(7)
1650	and	22,22,31
1651	std	0,-8*2(7)
1652	andc	10,10,31
1653	std	0,-8*1(7)
1654	and	23,23,31
1655	std	0,-8*0(7)
1656	andc	11,11,31
1657	std	0,8*1(5)
1658	and	24,24,31
1659	std	0,8*2(5)
1660	andc	12,12,31
1661	std	0,8*3(5)
1662	and	25,25,31
1663	stdu	0,8*4(5)
1664	or	18,9,22
1665	ld	9,8*5(6)
1666	ld	22,8*1(7)
1667	or	19,10,23
1668	ld	10,8*6(6)
1669	ld	23,8*2(7)
1670	or	20,11,24
1671	ld	11,8*7(6)
1672	ld	24,8*3(7)
1673	or	21,12,25
1674	ld	12,8*8(6)
1675	ldu	25,8*4(7)
1676	std	18,8*1(6)
1677	std	19,8*2(6)
1678	std	20,8*3(6)
1679	stdu	21,8*4(6)
1680	bdnz	.Lsqr4x_cond_copy
1681
1682	ld	4,0(1)
1683	andc	9,9,31
1684	and	22,22,31
1685	andc	10,10,31
1686	and	23,23,31
1687	andc	11,11,31
1688	and	24,24,31
1689	andc	12,12,31
1690	and	25,25,31
1691	or	18,9,22
1692	or	19,10,23
1693	or	20,11,24
1694	or	21,12,25
1695	std	18,8*1(6)
1696	std	19,8*2(6)
1697	std	20,8*3(6)
1698	std	21,8*4(6)
1699
1700	b	.Lsqr8x_done
1701
1702.align	5
1703.Lsqr8x8_post_condition:
1704	ld	3,8*6(1)
1705	ld	4,0(1)
1706	addze	31,0
1707
1708
1709	subfc	22,9,22
1710	subfe	23,10,23
1711	std	0,8*12(1)
1712	std	0,8*13(1)
1713	subfe	24,11,24
1714	std	0,8*14(1)
1715	std	0,8*15(1)
1716	subfe	25,12,25
1717	std	0,8*16(1)
1718	std	0,8*17(1)
1719	subfe	26,14,26
1720	std	0,8*18(1)
1721	std	0,8*19(1)
1722	subfe	27,15,27
1723	std	0,8*20(1)
1724	std	0,8*21(1)
1725	subfe	28,16,28
1726	std	0,8*22(1)
1727	std	0,8*23(1)
1728	subfe	29,17,29
1729	std	0,8*24(1)
1730	std	0,8*25(1)
1731	subfe	31,0,31
1732	std	0,8*26(1)
1733	std	0,8*27(1)
1734
1735	and	9,9,31
1736	and	10,10,31
1737	addc	22,22,9
1738	and	11,11,31
1739	adde	23,23,10
1740	and	12,12,31
1741	adde	24,24,11
1742	and	14,14,31
1743	adde	25,25,12
1744	and	15,15,31
1745	adde	26,26,14
1746	and	16,16,31
1747	adde	27,27,15
1748	and	17,17,31
1749	adde	28,28,16
1750	adde	29,29,17
1751	std	22,8*1(3)
1752	std	23,8*2(3)
1753	std	24,8*3(3)
1754	std	25,8*4(3)
1755	std	26,8*5(3)
1756	std	27,8*6(3)
1757	std	28,8*7(3)
1758	std	29,8*8(3)
1759
1760.Lsqr8x_done:
1761	std	0,8*8(1)
1762	std	0,8*10(1)
1763
1764	ld	14,-8*18(4)
1765	li	3,1
1766	ld	15,-8*17(4)
1767	ld	16,-8*16(4)
1768	ld	17,-8*15(4)
1769	ld	18,-8*14(4)
1770	ld	19,-8*13(4)
1771	ld	20,-8*12(4)
1772	ld	21,-8*11(4)
1773	ld	22,-8*10(4)
1774	ld	23,-8*9(4)
1775	ld	24,-8*8(4)
1776	ld	25,-8*7(4)
1777	ld	26,-8*6(4)
1778	ld	27,-8*5(4)
1779	ld	28,-8*4(4)
1780	ld	29,-8*3(4)
1781	ld	30,-8*2(4)
1782	ld	31,-8*1(4)
1783	mr	1,4
1784	blr
1785.long	0
1786.byte	0,12,4,0x20,0x80,18,6,0
1787.long	0
1788.size	__bn_sqr8x_mont,.-__bn_sqr8x_mont
1789.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1790.align	2
1791