xref: /freebsd/sys/crypto/openssl/powerpc64le/ppc-mont.S (revision e32fecd0c2c3ee37c47ee100f169e7eb0282a873)
1/* $FreeBSD$ */
2/* Do not modify. This file is auto-generated from ppc-mont.pl. */
3.machine	"any"
4.abiversion	2
5.text
6
7.globl	bn_mul_mont_int
8.type	bn_mul_mont_int,@function
9.align	5
10bn_mul_mont_int:
11.localentry	bn_mul_mont_int,0
12
13	mr	9,3
14	li	3,0
15	slwi	8,8,3
16	li	12,-4096
17	addi	3,8,352
18	subf	3,3,1
19	and	3,3,12
20	subf	3,1,3
21	mr	12,1
22	srwi	8,8,3
23	stdux	1,1,3
24
25	std	20,-96(12)
26	std	21,-88(12)
27	std	22,-80(12)
28	std	23,-72(12)
29	std	24,-64(12)
30	std	25,-56(12)
31	std	26,-48(12)
32	std	27,-40(12)
33	std	28,-32(12)
34	std	29,-24(12)
35	std	30,-16(12)
36	std	31,-8(12)
37
38	ld	7,0(7)
39	addi	8,8,-2
40
41	ld	23,0(5)
42	ld	10,0(4)
43	addi	22,1,64
44	mulld	25,10,23
45	mulhdu	26,10,23
46
47	ld	10,8(4)
48	ld	11,0(6)
49
50	mulld	24,25,7
51
52	mulld	29,10,23
53	mulhdu	30,10,23
54
55	mulld	27,11,24
56	mulhdu	28,11,24
57	ld	11,8(6)
58	addc	27,27,25
59	addze	28,28
60
61	mulld	31,11,24
62	mulhdu	0,11,24
63
64	mtctr	8
65	li	21,16
66.align	4
67.L1st:
68	ldx	10,4,21
69	addc	25,29,26
70	ldx	11,6,21
71	addze	26,30
72	mulld	29,10,23
73	addc	27,31,28
74	mulhdu	30,10,23
75	addze	28,0
76	mulld	31,11,24
77	addc	27,27,25
78	mulhdu	0,11,24
79	addze	28,28
80	std	27,0(22)
81
82	addi	21,21,8
83	addi	22,22,8
84	bdnz	.L1st
85
86	addc	25,29,26
87	addze	26,30
88
89	addc	27,31,28
90	addze	28,0
91	addc	27,27,25
92	addze	28,28
93	std	27,0(22)
94
95	li	3,0
96	addc	28,28,26
97	addze	3,3
98	std	28,8(22)
99
100	li	20,8
101.align	4
102.Louter:
103	ldx	23,5,20
104	ld	10,0(4)
105	addi	22,1,64
106	ld	12,64(1)
107	mulld	25,10,23
108	mulhdu	26,10,23
109	ld	10,8(4)
110	ld	11,0(6)
111	addc	25,25,12
112	mulld	29,10,23
113	addze	26,26
114	mulld	24,25,7
115	mulhdu	30,10,23
116	mulld	27,11,24
117	mulhdu	28,11,24
118	ld	11,8(6)
119	addc	27,27,25
120	mulld	31,11,24
121	addze	28,28
122	mulhdu	0,11,24
123
124	mtctr	8
125	li	21,16
126.align	4
127.Linner:
128	ldx	10,4,21
129	addc	25,29,26
130	ld	12,8(22)
131	addze	26,30
132	ldx	11,6,21
133	addc	27,31,28
134	mulld	29,10,23
135	addze	28,0
136	mulhdu	30,10,23
137	addc	25,25,12
138	mulld	31,11,24
139	addze	26,26
140	mulhdu	0,11,24
141	addc	27,27,25
142	addi	21,21,8
143	addze	28,28
144	std	27,0(22)
145	addi	22,22,8
146	bdnz	.Linner
147
148	ld	12,8(22)
149	addc	25,29,26
150	addze	26,30
151	addc	25,25,12
152	addze	26,26
153
154	addc	27,31,28
155	addze	28,0
156	addc	27,27,25
157	addze	28,28
158	std	27,0(22)
159
160	addic	3,3,-1
161	li	3,0
162	adde	28,28,26
163	addze	3,3
164	std	28,8(22)
165
166	slwi	12,8,3
167	cmpld	20,12
168	addi	20,20,8
169	ble	.Louter
170
171	addi	8,8,2
172	subfc	21,21,21
173	addi	22,1,64
174	mtctr	8
175
176.align	4
177.Lsub:	ldx	12,22,21
178	ldx	11,6,21
179	subfe	10,11,12
180	stdx	10,9,21
181	addi	21,21,8
182	bdnz	.Lsub
183
184	li	21,0
185	mtctr	8
186	subfe	3,21,3
187
188.align	4
189.Lcopy:
190	ldx	12,22,21
191	ldx	10,9,21
192	and	12,12,3
193	andc	10,10,3
194	stdx	21,22,21
195	or	10,10,12
196	stdx	10,9,21
197	addi	21,21,8
198	bdnz	.Lcopy
199
200	ld	12,0(1)
201	li	3,1
202	ld	20,-96(12)
203	ld	21,-88(12)
204	ld	22,-80(12)
205	ld	23,-72(12)
206	ld	24,-64(12)
207	ld	25,-56(12)
208	ld	26,-48(12)
209	ld	27,-40(12)
210	ld	28,-32(12)
211	ld	29,-24(12)
212	ld	30,-16(12)
213	ld	31,-8(12)
214	mr	1,12
215	blr
216.long	0
217.byte	0,12,4,0,0x80,12,6,0
218.long	0
219.size	bn_mul_mont_int,.-bn_mul_mont_int
220.globl	bn_mul4x_mont_int
221.type	bn_mul4x_mont_int,@function
222.align	5
223bn_mul4x_mont_int:
224.localentry	bn_mul4x_mont_int,0
225
226	andi.	0,8,7
227	bne	.Lmul4x_do
228	cmpld	4,5
229	bne	.Lmul4x_do
230	b	.Lsqr8x_do
231.Lmul4x_do:
232	slwi	8,8,3
233	mr	9,1
234	li	10,-32*8
235	sub	10,10,8
236	stdux	1,1,10
237
238	std	14,-8*18(9)
239	std	15,-8*17(9)
240	std	16,-8*16(9)
241	std	17,-8*15(9)
242	std	18,-8*14(9)
243	std	19,-8*13(9)
244	std	20,-8*12(9)
245	std	21,-8*11(9)
246	std	22,-8*10(9)
247	std	23,-8*9(9)
248	std	24,-8*8(9)
249	std	25,-8*7(9)
250	std	26,-8*6(9)
251	std	27,-8*5(9)
252	std	28,-8*4(9)
253	std	29,-8*3(9)
254	std	30,-8*2(9)
255	std	31,-8*1(9)
256
257	subi	4,4,8
258	subi	6,6,8
259	subi	3,3,8
260	ld	7,0(7)
261
262	add	14,5,8
263	add	30,4,8
264	subi	14,14,8*4
265
266	ld	27,8*0(5)
267	li	22,0
268	ld	9,8*1(4)
269	li	23,0
270	ld	10,8*2(4)
271	li	24,0
272	ld	11,8*3(4)
273	li	25,0
274	ldu	12,8*4(4)
275	ld	18,8*1(6)
276	ld	19,8*2(6)
277	ld	20,8*3(6)
278	ldu	21,8*4(6)
279
280	std	3,8*6(1)
281	std	14,8*7(1)
282	li	3,0
283	addic	29,1,8*7
284	li	31,0
285	li	0,0
286	b	.Loop_mul4x_1st_reduction
287
288.align	5
289.Loop_mul4x_1st_reduction:
290	mulld	14,9,27
291	addze	3,3
292	mulld	15,10,27
293	addi	31,31,8
294	mulld	16,11,27
295	andi.	31,31,8*4-1
296	mulld	17,12,27
297	addc	22,22,14
298	mulhdu	14,9,27
299	adde	23,23,15
300	mulhdu	15,10,27
301	adde	24,24,16
302	mulld	28,22,7
303	adde	25,25,17
304	mulhdu	16,11,27
305	addze	26,0
306	mulhdu	17,12,27
307	ldx	27,5,31
308	addc	23,23,14
309
310	stdu	28,8(29)
311	adde	24,24,15
312	mulld	15,19,28
313	adde	25,25,16
314	mulld	16,20,28
315	adde	26,26,17
316	mulld	17,21,28
317
318
319
320
321
322
323
324
325
326
327	addic	22,22,-1
328	mulhdu	14,18,28
329	adde	22,23,15
330	mulhdu	15,19,28
331	adde	23,24,16
332	mulhdu	16,20,28
333	adde	24,25,17
334	mulhdu	17,21,28
335	adde	25,26,3
336	addze	3,0
337	addc	22,22,14
338	adde	23,23,15
339	adde	24,24,16
340	adde	25,25,17
341
342	bne	.Loop_mul4x_1st_reduction
343
344	cmpld	30,4
345	beq	.Lmul4x4_post_condition
346
347	ld	9,8*1(4)
348	ld	10,8*2(4)
349	ld	11,8*3(4)
350	ldu	12,8*4(4)
351	ld	28,8*8(1)
352	ld	18,8*1(6)
353	ld	19,8*2(6)
354	ld	20,8*3(6)
355	ldu	21,8*4(6)
356	b	.Loop_mul4x_1st_tail
357
358.align	5
359.Loop_mul4x_1st_tail:
360	mulld	14,9,27
361	addze	3,3
362	mulld	15,10,27
363	addi	31,31,8
364	mulld	16,11,27
365	andi.	31,31,8*4-1
366	mulld	17,12,27
367	addc	22,22,14
368	mulhdu	14,9,27
369	adde	23,23,15
370	mulhdu	15,10,27
371	adde	24,24,16
372	mulhdu	16,11,27
373	adde	25,25,17
374	mulhdu	17,12,27
375	addze	26,0
376	ldx	27,5,31
377	addc	23,23,14
378	mulld	14,18,28
379	adde	24,24,15
380	mulld	15,19,28
381	adde	25,25,16
382	mulld	16,20,28
383	adde	26,26,17
384	mulld	17,21,28
385	addc	22,22,14
386	mulhdu	14,18,28
387	adde	23,23,15
388	mulhdu	15,19,28
389	adde	24,24,16
390	mulhdu	16,20,28
391	adde	25,25,17
392	adde	26,26,3
393	mulhdu	17,21,28
394	addze	3,0
395	addi	28,1,8*8
396	ldx	28,28,31
397	stdu	22,8(29)
398	addc	22,23,14
399	adde	23,24,15
400	adde	24,25,16
401	adde	25,26,17
402
403	bne	.Loop_mul4x_1st_tail
404
405	sub	15,30,8
406	cmpld	30,4
407	beq	.Lmul4x_proceed
408
409	ld	9,8*1(4)
410	ld	10,8*2(4)
411	ld	11,8*3(4)
412	ldu	12,8*4(4)
413	ld	18,8*1(6)
414	ld	19,8*2(6)
415	ld	20,8*3(6)
416	ldu	21,8*4(6)
417	b	.Loop_mul4x_1st_tail
418
419.align	5
420.Lmul4x_proceed:
421	ldu	27,8*4(5)
422	addze	3,3
423	ld	9,8*1(15)
424	ld	10,8*2(15)
425	ld	11,8*3(15)
426	ld	12,8*4(15)
427	addi	4,15,8*4
428	sub	6,6,8
429
430	std	22,8*1(29)
431	std	23,8*2(29)
432	std	24,8*3(29)
433	std	25,8*4(29)
434	std	3,8*5(29)
435	ld	22,8*12(1)
436	ld	23,8*13(1)
437	ld	24,8*14(1)
438	ld	25,8*15(1)
439
440	ld	18,8*1(6)
441	ld	19,8*2(6)
442	ld	20,8*3(6)
443	ldu	21,8*4(6)
444	addic	29,1,8*7
445	li	3,0
446	b	.Loop_mul4x_reduction
447
448.align	5
449.Loop_mul4x_reduction:
450	mulld	14,9,27
451	addze	3,3
452	mulld	15,10,27
453	addi	31,31,8
454	mulld	16,11,27
455	andi.	31,31,8*4-1
456	mulld	17,12,27
457	addc	22,22,14
458	mulhdu	14,9,27
459	adde	23,23,15
460	mulhdu	15,10,27
461	adde	24,24,16
462	mulld	28,22,7
463	adde	25,25,17
464	mulhdu	16,11,27
465	addze	26,0
466	mulhdu	17,12,27
467	ldx	27,5,31
468	addc	23,23,14
469
470	stdu	28,8(29)
471	adde	24,24,15
472	mulld	15,19,28
473	adde	25,25,16
474	mulld	16,20,28
475	adde	26,26,17
476	mulld	17,21,28
477
478	addic	22,22,-1
479	mulhdu	14,18,28
480	adde	22,23,15
481	mulhdu	15,19,28
482	adde	23,24,16
483	mulhdu	16,20,28
484	adde	24,25,17
485	mulhdu	17,21,28
486	adde	25,26,3
487	addze	3,0
488	addc	22,22,14
489	adde	23,23,15
490	adde	24,24,16
491	adde	25,25,17
492
493	bne	.Loop_mul4x_reduction
494
495	ld	14,8*5(29)
496	addze	3,3
497	ld	15,8*6(29)
498	ld	16,8*7(29)
499	ld	17,8*8(29)
500	ld	9,8*1(4)
501	ld	10,8*2(4)
502	ld	11,8*3(4)
503	ldu	12,8*4(4)
504	addc	22,22,14
505	adde	23,23,15
506	adde	24,24,16
507	adde	25,25,17
508
509
510	ld	28,8*8(1)
511	ld	18,8*1(6)
512	ld	19,8*2(6)
513	ld	20,8*3(6)
514	ldu	21,8*4(6)
515	b	.Loop_mul4x_tail
516
517.align	5
518.Loop_mul4x_tail:
519	mulld	14,9,27
520	addze	3,3
521	mulld	15,10,27
522	addi	31,31,8
523	mulld	16,11,27
524	andi.	31,31,8*4-1
525	mulld	17,12,27
526	addc	22,22,14
527	mulhdu	14,9,27
528	adde	23,23,15
529	mulhdu	15,10,27
530	adde	24,24,16
531	mulhdu	16,11,27
532	adde	25,25,17
533	mulhdu	17,12,27
534	addze	26,0
535	ldx	27,5,31
536	addc	23,23,14
537	mulld	14,18,28
538	adde	24,24,15
539	mulld	15,19,28
540	adde	25,25,16
541	mulld	16,20,28
542	adde	26,26,17
543	mulld	17,21,28
544	addc	22,22,14
545	mulhdu	14,18,28
546	adde	23,23,15
547	mulhdu	15,19,28
548	adde	24,24,16
549	mulhdu	16,20,28
550	adde	25,25,17
551	mulhdu	17,21,28
552	adde	26,26,3
553	addi	28,1,8*8
554	ldx	28,28,31
555	addze	3,0
556	stdu	22,8(29)
557	addc	22,23,14
558	adde	23,24,15
559	adde	24,25,16
560	adde	25,26,17
561
562	bne	.Loop_mul4x_tail
563
564	ld	14,8*5(29)
565	sub	15,6,8
566	addze	3,3
567	cmpld	30,4
568	beq	.Loop_mul4x_break
569
570	ld	15,8*6(29)
571	ld	16,8*7(29)
572	ld	17,8*8(29)
573	ld	9,8*1(4)
574	ld	10,8*2(4)
575	ld	11,8*3(4)
576	ldu	12,8*4(4)
577	addc	22,22,14
578	adde	23,23,15
579	adde	24,24,16
580	adde	25,25,17
581
582
583	ld	18,8*1(6)
584	ld	19,8*2(6)
585	ld	20,8*3(6)
586	ldu	21,8*4(6)
587	b	.Loop_mul4x_tail
588
589.align	5
590.Loop_mul4x_break:
591	ld	16,8*6(1)
592	ld	17,8*7(1)
593	addc	9,22,14
594	ld	22,8*12(1)
595	addze	10,23
596	ld	23,8*13(1)
597	addze	11,24
598	ld	24,8*14(1)
599	addze	12,25
600	ld	25,8*15(1)
601	addze	3,3
602	std	9,8*1(29)
603	sub	4,30,8
604	std	10,8*2(29)
605	std	11,8*3(29)
606	std	12,8*4(29)
607	std	3,8*5(29)
608
609	ld	18,8*1(15)
610	ld	19,8*2(15)
611	ld	20,8*3(15)
612	ld	21,8*4(15)
613	addi	6,15,8*4
614	cmpld	5,17
615	beq	.Lmul4x_post
616
617	ldu	27,8*4(5)
618	ld	9,8*1(4)
619	ld	10,8*2(4)
620	ld	11,8*3(4)
621	ldu	12,8*4(4)
622	li	3,0
623	addic	29,1,8*7
624	b	.Loop_mul4x_reduction
625
626.align	5
627.Lmul4x_post:
628
629
630
631
632	srwi	31,8,5
633	mr	5,16
634	subi	31,31,1
635	mr	30,16
636	subfc	14,18,22
637	addi	29,1,8*15
638	subfe	15,19,23
639
640	mtctr	31
641.Lmul4x_sub:
642	ld	18,8*1(6)
643	ld	22,8*1(29)
644	subfe	16,20,24
645	ld	19,8*2(6)
646	ld	23,8*2(29)
647	subfe	17,21,25
648	ld	20,8*3(6)
649	ld	24,8*3(29)
650	ldu	21,8*4(6)
651	ldu	25,8*4(29)
652	std	14,8*1(5)
653	std	15,8*2(5)
654	subfe	14,18,22
655	std	16,8*3(5)
656	stdu	17,8*4(5)
657	subfe	15,19,23
658	bdnz	.Lmul4x_sub
659
660	ld	9,8*1(30)
661	std	14,8*1(5)
662	ld	14,8*12(1)
663	subfe	16,20,24
664	ld	10,8*2(30)
665	std	15,8*2(5)
666	ld	15,8*13(1)
667	subfe	17,21,25
668	subfe	3,0,3
669	addi	29,1,8*12
670	ld	11,8*3(30)
671	std	16,8*3(5)
672	ld	16,8*14(1)
673	ld	12,8*4(30)
674	std	17,8*4(5)
675	ld	17,8*15(1)
676
677	mtctr	31
678.Lmul4x_cond_copy:
679	and	14,14,3
680	andc	9,9,3
681	std	0,8*0(29)
682	and	15,15,3
683	andc	10,10,3
684	std	0,8*1(29)
685	and	16,16,3
686	andc	11,11,3
687	std	0,8*2(29)
688	and	17,17,3
689	andc	12,12,3
690	std	0,8*3(29)
691	or	22,14,9
692	ld	9,8*5(30)
693	ld	14,8*4(29)
694	or	23,15,10
695	ld	10,8*6(30)
696	ld	15,8*5(29)
697	or	24,16,11
698	ld	11,8*7(30)
699	ld	16,8*6(29)
700	or	25,17,12
701	ld	12,8*8(30)
702	ld	17,8*7(29)
703	addi	29,29,8*4
704	std	22,8*1(30)
705	std	23,8*2(30)
706	std	24,8*3(30)
707	stdu	25,8*4(30)
708	bdnz	.Lmul4x_cond_copy
709
710	ld	5,0(1)
711	and	14,14,3
712	andc	9,9,3
713	std	0,8*0(29)
714	and	15,15,3
715	andc	10,10,3
716	std	0,8*1(29)
717	and	16,16,3
718	andc	11,11,3
719	std	0,8*2(29)
720	and	17,17,3
721	andc	12,12,3
722	std	0,8*3(29)
723	or	22,14,9
724	or	23,15,10
725	std	0,8*4(29)
726	or	24,16,11
727	or	25,17,12
728	std	22,8*1(30)
729	std	23,8*2(30)
730	std	24,8*3(30)
731	std	25,8*4(30)
732
733	b	.Lmul4x_done
734
735.align	4
736.Lmul4x4_post_condition:
737	ld	4,8*6(1)
738	ld	5,0(1)
739	addze	3,3
740
741	subfc	9,18,22
742	subfe	10,19,23
743	subfe	11,20,24
744	subfe	12,21,25
745	subfe	3,0,3
746
747	and	18,18,3
748	and	19,19,3
749	addc	9,9,18
750	and	20,20,3
751	adde	10,10,19
752	and	21,21,3
753	adde	11,11,20
754	adde	12,12,21
755
756	std	9,8*1(4)
757	std	10,8*2(4)
758	std	11,8*3(4)
759	std	12,8*4(4)
760
761.Lmul4x_done:
762	std	0,8*8(1)
763	std	0,8*9(1)
764	std	0,8*10(1)
765	std	0,8*11(1)
766	li	3,1
767	ld	14,-8*18(5)
768	ld	15,-8*17(5)
769	ld	16,-8*16(5)
770	ld	17,-8*15(5)
771	ld	18,-8*14(5)
772	ld	19,-8*13(5)
773	ld	20,-8*12(5)
774	ld	21,-8*11(5)
775	ld	22,-8*10(5)
776	ld	23,-8*9(5)
777	ld	24,-8*8(5)
778	ld	25,-8*7(5)
779	ld	26,-8*6(5)
780	ld	27,-8*5(5)
781	ld	28,-8*4(5)
782	ld	29,-8*3(5)
783	ld	30,-8*2(5)
784	ld	31,-8*1(5)
785	mr	1,5
786	blr
787.long	0
788.byte	0,12,4,0x20,0x80,18,6,0
789.long	0
790.size	bn_mul4x_mont_int,.-bn_mul4x_mont_int
791.align	5
792__bn_sqr8x_mont:
793.Lsqr8x_do:
794	mr	9,1
795	slwi	10,8,4
796	li	11,-32*8
797	sub	10,11,10
798	slwi	8,8,3
799	stdux	1,1,10
800
801	std	14,-8*18(9)
802	std	15,-8*17(9)
803	std	16,-8*16(9)
804	std	17,-8*15(9)
805	std	18,-8*14(9)
806	std	19,-8*13(9)
807	std	20,-8*12(9)
808	std	21,-8*11(9)
809	std	22,-8*10(9)
810	std	23,-8*9(9)
811	std	24,-8*8(9)
812	std	25,-8*7(9)
813	std	26,-8*6(9)
814	std	27,-8*5(9)
815	std	28,-8*4(9)
816	std	29,-8*3(9)
817	std	30,-8*2(9)
818	std	31,-8*1(9)
819
820	subi	4,4,8
821	subi	18,6,8
822	subi	3,3,8
823	ld	7,0(7)
824	li	0,0
825
826	add	6,4,8
827	ld	9,8*1(4)
828
829	ld	10,8*2(4)
830	li	23,0
831	ld	11,8*3(4)
832	li	24,0
833	ld	12,8*4(4)
834	li	25,0
835	ld	14,8*5(4)
836	li	26,0
837	ld	15,8*6(4)
838	li	27,0
839	ld	16,8*7(4)
840	li	28,0
841	ldu	17,8*8(4)
842	li	29,0
843
844	addi	5,1,8*11
845	subic.	30,8,8*8
846	b	.Lsqr8x_zero_start
847
848.align	5
849.Lsqr8x_zero:
850	subic.	30,30,8*8
851	std	0,8*1(5)
852	std	0,8*2(5)
853	std	0,8*3(5)
854	std	0,8*4(5)
855	std	0,8*5(5)
856	std	0,8*6(5)
857	std	0,8*7(5)
858	std	0,8*8(5)
859.Lsqr8x_zero_start:
860	std	0,8*9(5)
861	std	0,8*10(5)
862	std	0,8*11(5)
863	std	0,8*12(5)
864	std	0,8*13(5)
865	std	0,8*14(5)
866	std	0,8*15(5)
867	stdu	0,8*16(5)
868	bne	.Lsqr8x_zero
869
870	std	3,8*6(1)
871	std	18,8*7(1)
872	std	7,8*8(1)
873	std	5,8*9(1)
874	std	0,8*10(1)
875	addi	5,1,8*11
876
877
878.align	5
879.Lsqr8x_outer_loop:
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909	mulld	18,10,9
910	mulld	19,11,9
911	mulld	20,12,9
912	mulld	21,14,9
913	addc	23,23,18
914	mulld	18,15,9
915	adde	24,24,19
916	mulld	19,16,9
917	adde	25,25,20
918	mulld	20,17,9
919	adde	26,26,21
920	mulhdu	21,10,9
921	adde	27,27,18
922	mulhdu	18,11,9
923	adde	28,28,19
924	mulhdu	19,12,9
925	adde	29,29,20
926	mulhdu	20,14,9
927	std	22,8*1(5)
928	addze	22,0
929	std	23,8*2(5)
930	addc	24,24,21
931	mulhdu	21,15,9
932	adde	25,25,18
933	mulhdu	18,16,9
934	adde	26,26,19
935	mulhdu	19,17,9
936	adde	27,27,20
937	mulld	20,11,10
938	adde	28,28,21
939	mulld	21,12,10
940	adde	29,29,18
941	mulld	18,14,10
942	adde	22,22,19
943
944	mulld	19,15,10
945	addc	25,25,20
946	mulld	20,16,10
947	adde	26,26,21
948	mulld	21,17,10
949	adde	27,27,18
950	mulhdu	18,11,10
951	adde	28,28,19
952	mulhdu	19,12,10
953	adde	29,29,20
954	mulhdu	20,14,10
955	adde	22,22,21
956	mulhdu	21,15,10
957	std	24,8*3(5)
958	addze	23,0
959	std	25,8*4(5)
960	addc	26,26,18
961	mulhdu	18,16,10
962	adde	27,27,19
963	mulhdu	19,17,10
964	adde	28,28,20
965	mulld	20,12,11
966	adde	29,29,21
967	mulld	21,14,11
968	adde	22,22,18
969	mulld	18,15,11
970	adde	23,23,19
971
972	mulld	19,16,11
973	addc	27,27,20
974	mulld	20,17,11
975	adde	28,28,21
976	mulhdu	21,12,11
977	adde	29,29,18
978	mulhdu	18,14,11
979	adde	22,22,19
980	mulhdu	19,15,11
981	adde	23,23,20
982	mulhdu	20,16,11
983	std	26,8*5(5)
984	addze	24,0
985	std	27,8*6(5)
986	addc	28,28,21
987	mulhdu	21,17,11
988	adde	29,29,18
989	mulld	18,14,12
990	adde	22,22,19
991	mulld	19,15,12
992	adde	23,23,20
993	mulld	20,16,12
994	adde	24,24,21
995
996	mulld	21,17,12
997	addc	29,29,18
998	mulhdu	18,14,12
999	adde	22,22,19
1000	mulhdu	19,15,12
1001	adde	23,23,20
1002	mulhdu	20,16,12
1003	adde	24,24,21
1004	mulhdu	21,17,12
1005	std	28,8*7(5)
1006	addze	25,0
1007	stdu	29,8*8(5)
1008	addc	22,22,18
1009	mulld	18,15,14
1010	adde	23,23,19
1011	mulld	19,16,14
1012	adde	24,24,20
1013	mulld	20,17,14
1014	adde	25,25,21
1015
1016	mulhdu	21,15,14
1017	addc	23,23,18
1018	mulhdu	18,16,14
1019	adde	24,24,19
1020	mulhdu	19,17,14
1021	adde	25,25,20
1022	mulld	20,16,15
1023	addze	26,0
1024	addc	24,24,21
1025	mulld	21,17,15
1026	adde	25,25,18
1027	mulhdu	18,16,15
1028	adde	26,26,19
1029
1030	mulhdu	19,17,15
1031	addc	25,25,20
1032	mulld	20,17,16
1033	adde	26,26,21
1034	mulhdu	21,17,16
1035	addze	27,0
1036	addc	26,26,18
1037	cmpld	6,4
1038	adde	27,27,19
1039
1040	addc	27,27,20
1041	sub	18,6,8
1042	addze	28,0
1043	add	28,28,21
1044
1045	beq	.Lsqr8x_outer_break
1046
1047	mr	7,9
1048	ld	9,8*1(5)
1049	ld	10,8*2(5)
1050	ld	11,8*3(5)
1051	ld	12,8*4(5)
1052	ld	14,8*5(5)
1053	ld	15,8*6(5)
1054	ld	16,8*7(5)
1055	ld	17,8*8(5)
1056	addc	22,22,9
1057	ld	9,8*1(4)
1058	adde	23,23,10
1059	ld	10,8*2(4)
1060	adde	24,24,11
1061	ld	11,8*3(4)
1062	adde	25,25,12
1063	ld	12,8*4(4)
1064	adde	26,26,14
1065	ld	14,8*5(4)
1066	adde	27,27,15
1067	ld	15,8*6(4)
1068	adde	28,28,16
1069	ld	16,8*7(4)
1070	subi	3,4,8*7
1071	addze	29,17
1072	ldu	17,8*8(4)
1073
1074	li	30,0
1075	b	.Lsqr8x_mul
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099.align	5
1100.Lsqr8x_mul:
1101	mulld	18,9,7
1102	addze	31,0
1103	mulld	19,10,7
1104	addi	30,30,8
1105	mulld	20,11,7
1106	andi.	30,30,8*8-1
1107	mulld	21,12,7
1108	addc	22,22,18
1109	mulld	18,14,7
1110	adde	23,23,19
1111	mulld	19,15,7
1112	adde	24,24,20
1113	mulld	20,16,7
1114	adde	25,25,21
1115	mulld	21,17,7
1116	adde	26,26,18
1117	mulhdu	18,9,7
1118	adde	27,27,19
1119	mulhdu	19,10,7
1120	adde	28,28,20
1121	mulhdu	20,11,7
1122	adde	29,29,21
1123	mulhdu	21,12,7
1124	addze	31,31
1125	stdu	22,8(5)
1126	addc	22,23,18
1127	mulhdu	18,14,7
1128	adde	23,24,19
1129	mulhdu	19,15,7
1130	adde	24,25,20
1131	mulhdu	20,16,7
1132	adde	25,26,21
1133	mulhdu	21,17,7
1134	ldx	7,3,30
1135	adde	26,27,18
1136	adde	27,28,19
1137	adde	28,29,20
1138	adde	29,31,21
1139
1140	bne	.Lsqr8x_mul
1141
1142
1143	cmpld	4,6
1144	beq	.Lsqr8x_break
1145
1146	ld	9,8*1(5)
1147	ld	10,8*2(5)
1148	ld	11,8*3(5)
1149	ld	12,8*4(5)
1150	ld	14,8*5(5)
1151	ld	15,8*6(5)
1152	ld	16,8*7(5)
1153	ld	17,8*8(5)
1154	addc	22,22,9
1155	ld	9,8*1(4)
1156	adde	23,23,10
1157	ld	10,8*2(4)
1158	adde	24,24,11
1159	ld	11,8*3(4)
1160	adde	25,25,12
1161	ld	12,8*4(4)
1162	adde	26,26,14
1163	ld	14,8*5(4)
1164	adde	27,27,15
1165	ld	15,8*6(4)
1166	adde	28,28,16
1167	ld	16,8*7(4)
1168	adde	29,29,17
1169	ldu	17,8*8(4)
1170
1171	b	.Lsqr8x_mul
1172
1173.align	5
1174.Lsqr8x_break:
1175	ld	9,8*8(3)
1176	addi	4,3,8*15
1177	ld	10,8*9(3)
1178	sub.	18,6,4
1179	ld	11,8*10(3)
1180	sub	19,5,18
1181	ld	12,8*11(3)
1182	ld	14,8*12(3)
1183	ld	15,8*13(3)
1184	ld	16,8*14(3)
1185	ld	17,8*15(3)
1186	beq	.Lsqr8x_outer_loop
1187
1188	std	22,8*1(5)
1189	ld	22,8*1(19)
1190	std	23,8*2(5)
1191	ld	23,8*2(19)
1192	std	24,8*3(5)
1193	ld	24,8*3(19)
1194	std	25,8*4(5)
1195	ld	25,8*4(19)
1196	std	26,8*5(5)
1197	ld	26,8*5(19)
1198	std	27,8*6(5)
1199	ld	27,8*6(19)
1200	std	28,8*7(5)
1201	ld	28,8*7(19)
1202	std	29,8*8(5)
1203	ld	29,8*8(19)
1204	mr	5,19
1205	b	.Lsqr8x_outer_loop
1206
1207.align	5
1208.Lsqr8x_outer_break:
1209
1210
1211	ld	10,8*1(18)
1212	ld	12,8*2(18)
1213	ld	15,8*3(18)
1214	ld	17,8*4(18)
1215	addi	4,18,8*4
1216
1217	ld	19,8*13(1)
1218	ld	20,8*14(1)
1219	ld	21,8*15(1)
1220	ld	18,8*16(1)
1221
1222	std	22,8*1(5)
1223	srwi	30,8,5
1224	std	23,8*2(5)
1225	subi	30,30,1
1226	std	24,8*3(5)
1227	std	25,8*4(5)
1228	std	26,8*5(5)
1229	std	27,8*6(5)
1230	std	28,8*7(5)
1231
1232	addi	5,1,8*11
1233	mulld	22,10,10
1234	mulhdu	10,10,10
1235	add	23,19,19
1236	srdi	19,19,64-1
1237	mulld	11,12,12
1238	mulhdu	12,12,12
1239	addc	23,23,10
1240	add	24,20,20
1241	srdi	20,20,64-1
1242	add	25,21,21
1243	srdi	21,21,64-1
1244	or	24,24,19
1245
1246	mtctr	30
1247.Lsqr4x_shift_n_add:
1248	mulld	14,15,15
1249	mulhdu	15,15,15
1250	ld	19,8*6(5)
1251	ld	10,8*1(4)
1252	adde	24,24,11
1253	add	26,18,18
1254	srdi	18,18,64-1
1255	or	25,25,20
1256	ld	20,8*7(5)
1257	adde	25,25,12
1258	ld	12,8*2(4)
1259	add	27,19,19
1260	srdi	19,19,64-1
1261	or	26,26,21
1262	ld	21,8*8(5)
1263	mulld	16,17,17
1264	mulhdu	17,17,17
1265	adde	26,26,14
1266	add	28,20,20
1267	srdi	20,20,64-1
1268	or	27,27,18
1269	ld	18,8*9(5)
1270	adde	27,27,15
1271	ld	15,8*3(4)
1272	add	29,21,21
1273	srdi	21,21,64-1
1274	or	28,28,19
1275	ld	19,8*10(5)
1276	mulld	9,10,10
1277	mulhdu	10,10,10
1278	adde	28,28,16
1279	std	22,8*1(5)
1280	add	22,18,18
1281	srdi	18,18,64-1
1282	or	29,29,20
1283	ld	20,8*11(5)
1284	adde	29,29,17
1285	ldu	17,8*4(4)
1286	std	23,8*2(5)
1287	add	23,19,19
1288	srdi	19,19,64-1
1289	or	22,22,21
1290	ld	21,8*12(5)
1291	mulld	11,12,12
1292	mulhdu	12,12,12
1293	adde	22,22,9
1294	std	24,8*3(5)
1295	add	24,20,20
1296	srdi	20,20,64-1
1297	or	23,23,18
1298	ld	18,8*13(5)
1299	adde	23,23,10
1300	std	25,8*4(5)
1301	std	26,8*5(5)
1302	std	27,8*6(5)
1303	std	28,8*7(5)
1304	stdu	29,8*8(5)
1305	add	25,21,21
1306	srdi	21,21,64-1
1307	or	24,24,19
1308	bdnz	.Lsqr4x_shift_n_add
1309	ld	4,8*7(1)
1310	ld	7,8*8(1)
1311
1312	mulld	14,15,15
1313	mulhdu	15,15,15
1314	std	22,8*1(5)
1315	ld	22,8*12(1)
1316	ld	19,8*6(5)
1317	adde	24,24,11
1318	add	26,18,18
1319	srdi	18,18,64-1
1320	or	25,25,20
1321	ld	20,8*7(5)
1322	adde	25,25,12
1323	add	27,19,19
1324	srdi	19,19,64-1
1325	or	26,26,21
1326	mulld	16,17,17
1327	mulhdu	17,17,17
1328	adde	26,26,14
1329	add	28,20,20
1330	srdi	20,20,64-1
1331	or	27,27,18
1332	std	23,8*2(5)
1333	ld	23,8*13(1)
1334	adde	27,27,15
1335	or	28,28,19
1336	ld	9,8*1(4)
1337	ld	10,8*2(4)
1338	adde	28,28,16
1339	ld	11,8*3(4)
1340	ld	12,8*4(4)
1341	adde	29,17,20
1342	ld	14,8*5(4)
1343	ld	15,8*6(4)
1344
1345
1346
1347	mulld	31,7,22
1348	li	30,8
1349	ld	16,8*7(4)
1350	add	6,4,8
1351	ldu	17,8*8(4)
1352	std	24,8*3(5)
1353	ld	24,8*14(1)
1354	std	25,8*4(5)
1355	ld	25,8*15(1)
1356	std	26,8*5(5)
1357	ld	26,8*16(1)
1358	std	27,8*6(5)
1359	ld	27,8*17(1)
1360	std	28,8*7(5)
1361	ld	28,8*18(1)
1362	std	29,8*8(5)
1363	ld	29,8*19(1)
1364	addi	5,1,8*11
1365	mtctr	30
1366	b	.Lsqr8x_reduction
1367
1368.align	5
1369.Lsqr8x_reduction:
1370
1371	mulld	19,10,31
1372	mulld	20,11,31
1373	stdu	31,8(5)
1374	mulld	21,12,31
1375
1376	addic	22,22,-1
1377	mulld	18,14,31
1378	adde	22,23,19
1379	mulld	19,15,31
1380	adde	23,24,20
1381	mulld	20,16,31
1382	adde	24,25,21
1383	mulld	21,17,31
1384	adde	25,26,18
1385	mulhdu	18,9,31
1386	adde	26,27,19
1387	mulhdu	19,10,31
1388	adde	27,28,20
1389	mulhdu	20,11,31
1390	adde	28,29,21
1391	mulhdu	21,12,31
1392	addze	29,0
1393	addc	22,22,18
1394	mulhdu	18,14,31
1395	adde	23,23,19
1396	mulhdu	19,15,31
1397	adde	24,24,20
1398	mulhdu	20,16,31
1399	adde	25,25,21
1400	mulhdu	21,17,31
1401	mulld	31,7,22
1402	adde	26,26,18
1403	adde	27,27,19
1404	adde	28,28,20
1405	adde	29,29,21
1406	bdnz	.Lsqr8x_reduction
1407
1408	ld	18,8*1(5)
1409	ld	19,8*2(5)
1410	ld	20,8*3(5)
1411	ld	21,8*4(5)
1412	subi	3,5,8*7
1413	cmpld	6,4
1414	addc	22,22,18
1415	ld	18,8*5(5)
1416	adde	23,23,19
1417	ld	19,8*6(5)
1418	adde	24,24,20
1419	ld	20,8*7(5)
1420	adde	25,25,21
1421	ld	21,8*8(5)
1422	adde	26,26,18
1423	adde	27,27,19
1424	adde	28,28,20
1425	adde	29,29,21
1426
1427	beq	.Lsqr8x8_post_condition
1428
1429	ld	7,8*0(3)
1430	ld	9,8*1(4)
1431	ld	10,8*2(4)
1432	ld	11,8*3(4)
1433	ld	12,8*4(4)
1434	ld	14,8*5(4)
1435	ld	15,8*6(4)
1436	ld	16,8*7(4)
1437	ldu	17,8*8(4)
1438	li	30,0
1439
1440.align	5
1441.Lsqr8x_tail:
1442	mulld	18,9,7
1443	addze	31,0
1444	mulld	19,10,7
1445	addi	30,30,8
1446	mulld	20,11,7
1447	andi.	30,30,8*8-1
1448	mulld	21,12,7
1449	addc	22,22,18
1450	mulld	18,14,7
1451	adde	23,23,19
1452	mulld	19,15,7
1453	adde	24,24,20
1454	mulld	20,16,7
1455	adde	25,25,21
1456	mulld	21,17,7
1457	adde	26,26,18
1458	mulhdu	18,9,7
1459	adde	27,27,19
1460	mulhdu	19,10,7
1461	adde	28,28,20
1462	mulhdu	20,11,7
1463	adde	29,29,21
1464	mulhdu	21,12,7
1465	addze	31,31
1466	stdu	22,8(5)
1467	addc	22,23,18
1468	mulhdu	18,14,7
1469	adde	23,24,19
1470	mulhdu	19,15,7
1471	adde	24,25,20
1472	mulhdu	20,16,7
1473	adde	25,26,21
1474	mulhdu	21,17,7
1475	ldx	7,3,30
1476	adde	26,27,18
1477	adde	27,28,19
1478	adde	28,29,20
1479	adde	29,31,21
1480
1481	bne	.Lsqr8x_tail
1482
1483
1484	ld	9,8*1(5)
1485	ld	31,8*10(1)
1486	cmpld	6,4
1487	ld	10,8*2(5)
1488	sub	20,6,8
1489	ld	11,8*3(5)
1490	ld	12,8*4(5)
1491	ld	14,8*5(5)
1492	ld	15,8*6(5)
1493	ld	16,8*7(5)
1494	ld	17,8*8(5)
1495	beq	.Lsqr8x_tail_break
1496
1497	addc	22,22,9
1498	ld	9,8*1(4)
1499	adde	23,23,10
1500	ld	10,8*2(4)
1501	adde	24,24,11
1502	ld	11,8*3(4)
1503	adde	25,25,12
1504	ld	12,8*4(4)
1505	adde	26,26,14
1506	ld	14,8*5(4)
1507	adde	27,27,15
1508	ld	15,8*6(4)
1509	adde	28,28,16
1510	ld	16,8*7(4)
1511	adde	29,29,17
1512	ldu	17,8*8(4)
1513
1514	b	.Lsqr8x_tail
1515
1516.align	5
1517.Lsqr8x_tail_break:
1518	ld	7,8*8(1)
1519	ld	21,8*9(1)
1520	addi	30,5,8*8
1521
1522	addic	31,31,-1
1523	adde	18,22,9
1524	ld	22,8*8(3)
1525	ld	9,8*1(20)
1526	adde	19,23,10
1527	ld	23,8*9(3)
1528	ld	10,8*2(20)
1529	adde	24,24,11
1530	ld	11,8*3(20)
1531	adde	25,25,12
1532	ld	12,8*4(20)
1533	adde	26,26,14
1534	ld	14,8*5(20)
1535	adde	27,27,15
1536	ld	15,8*6(20)
1537	adde	28,28,16
1538	ld	16,8*7(20)
1539	adde	29,29,17
1540	ld	17,8*8(20)
1541	addi	4,20,8*8
1542	addze	20,0
1543	mulld	31,7,22
1544	std	18,8*1(5)
1545	cmpld	30,21
1546	std	19,8*2(5)
1547	li	30,8
1548	std	24,8*3(5)
1549	ld	24,8*10(3)
1550	std	25,8*4(5)
1551	ld	25,8*11(3)
1552	std	26,8*5(5)
1553	ld	26,8*12(3)
1554	std	27,8*6(5)
1555	ld	27,8*13(3)
1556	std	28,8*7(5)
1557	ld	28,8*14(3)
1558	std	29,8*8(5)
1559	ld	29,8*15(3)
1560	std	20,8*10(1)
1561	addi	5,3,8*7
1562	mtctr	30
1563	bne	.Lsqr8x_reduction
1564
1565
1566
1567
1568
1569
1570	ld	3,8*6(1)
1571	srwi	30,8,6
1572	mr	7,5
1573	addi	5,5,8*8
1574	subi	30,30,1
1575	subfc	18,9,22
1576	subfe	19,10,23
1577	mr	31,20
1578	mr	6,3
1579
1580	mtctr	30
1581	b	.Lsqr8x_sub
1582
1583.align	5
1584.Lsqr8x_sub:
1585	ld	9,8*1(4)
1586	ld	22,8*1(5)
1587	ld	10,8*2(4)
1588	ld	23,8*2(5)
1589	subfe	20,11,24
1590	ld	11,8*3(4)
1591	ld	24,8*3(5)
1592	subfe	21,12,25
1593	ld	12,8*4(4)
1594	ld	25,8*4(5)
1595	std	18,8*1(3)
1596	subfe	18,14,26
1597	ld	14,8*5(4)
1598	ld	26,8*5(5)
1599	std	19,8*2(3)
1600	subfe	19,15,27
1601	ld	15,8*6(4)
1602	ld	27,8*6(5)
1603	std	20,8*3(3)
1604	subfe	20,16,28
1605	ld	16,8*7(4)
1606	ld	28,8*7(5)
1607	std	21,8*4(3)
1608	subfe	21,17,29
1609	ldu	17,8*8(4)
1610	ldu	29,8*8(5)
1611	std	18,8*5(3)
1612	subfe	18,9,22
1613	std	19,8*6(3)
1614	subfe	19,10,23
1615	std	20,8*7(3)
1616	stdu	21,8*8(3)
1617	bdnz	.Lsqr8x_sub
1618
1619	srwi	30,8,5
1620	ld	9,8*1(6)
1621	ld	22,8*1(7)
1622	subi	30,30,1
1623	ld	10,8*2(6)
1624	ld	23,8*2(7)
1625	subfe	20,11,24
1626	ld	11,8*3(6)
1627	ld	24,8*3(7)
1628	subfe	21,12,25
1629	ld	12,8*4(6)
1630	ldu	25,8*4(7)
1631	std	18,8*1(3)
1632	subfe	18,14,26
1633	std	19,8*2(3)
1634	subfe	19,15,27
1635	std	20,8*3(3)
1636	subfe	20,16,28
1637	std	21,8*4(3)
1638	subfe	21,17,29
1639	std	18,8*5(3)
1640	subfe	31,0,31
1641	std	19,8*6(3)
1642	std	20,8*7(3)
1643	std	21,8*8(3)
1644
1645	addi	5,1,8*11
1646	mtctr	30
1647
1648.Lsqr4x_cond_copy:
1649	andc	9,9,31
1650	std	0,-8*3(7)
1651	and	22,22,31
1652	std	0,-8*2(7)
1653	andc	10,10,31
1654	std	0,-8*1(7)
1655	and	23,23,31
1656	std	0,-8*0(7)
1657	andc	11,11,31
1658	std	0,8*1(5)
1659	and	24,24,31
1660	std	0,8*2(5)
1661	andc	12,12,31
1662	std	0,8*3(5)
1663	and	25,25,31
1664	stdu	0,8*4(5)
1665	or	18,9,22
1666	ld	9,8*5(6)
1667	ld	22,8*1(7)
1668	or	19,10,23
1669	ld	10,8*6(6)
1670	ld	23,8*2(7)
1671	or	20,11,24
1672	ld	11,8*7(6)
1673	ld	24,8*3(7)
1674	or	21,12,25
1675	ld	12,8*8(6)
1676	ldu	25,8*4(7)
1677	std	18,8*1(6)
1678	std	19,8*2(6)
1679	std	20,8*3(6)
1680	stdu	21,8*4(6)
1681	bdnz	.Lsqr4x_cond_copy
1682
1683	ld	4,0(1)
1684	andc	9,9,31
1685	and	22,22,31
1686	andc	10,10,31
1687	and	23,23,31
1688	andc	11,11,31
1689	and	24,24,31
1690	andc	12,12,31
1691	and	25,25,31
1692	or	18,9,22
1693	or	19,10,23
1694	or	20,11,24
1695	or	21,12,25
1696	std	18,8*1(6)
1697	std	19,8*2(6)
1698	std	20,8*3(6)
1699	std	21,8*4(6)
1700
1701	b	.Lsqr8x_done
1702
1703.align	5
1704.Lsqr8x8_post_condition:
1705	ld	3,8*6(1)
1706	ld	4,0(1)
1707	addze	31,0
1708
1709
1710	subfc	22,9,22
1711	subfe	23,10,23
1712	std	0,8*12(1)
1713	std	0,8*13(1)
1714	subfe	24,11,24
1715	std	0,8*14(1)
1716	std	0,8*15(1)
1717	subfe	25,12,25
1718	std	0,8*16(1)
1719	std	0,8*17(1)
1720	subfe	26,14,26
1721	std	0,8*18(1)
1722	std	0,8*19(1)
1723	subfe	27,15,27
1724	std	0,8*20(1)
1725	std	0,8*21(1)
1726	subfe	28,16,28
1727	std	0,8*22(1)
1728	std	0,8*23(1)
1729	subfe	29,17,29
1730	std	0,8*24(1)
1731	std	0,8*25(1)
1732	subfe	31,0,31
1733	std	0,8*26(1)
1734	std	0,8*27(1)
1735
1736	and	9,9,31
1737	and	10,10,31
1738	addc	22,22,9
1739	and	11,11,31
1740	adde	23,23,10
1741	and	12,12,31
1742	adde	24,24,11
1743	and	14,14,31
1744	adde	25,25,12
1745	and	15,15,31
1746	adde	26,26,14
1747	and	16,16,31
1748	adde	27,27,15
1749	and	17,17,31
1750	adde	28,28,16
1751	adde	29,29,17
1752	std	22,8*1(3)
1753	std	23,8*2(3)
1754	std	24,8*3(3)
1755	std	25,8*4(3)
1756	std	26,8*5(3)
1757	std	27,8*6(3)
1758	std	28,8*7(3)
1759	std	29,8*8(3)
1760
1761.Lsqr8x_done:
1762	std	0,8*8(1)
1763	std	0,8*10(1)
1764
1765	ld	14,-8*18(4)
1766	li	3,1
1767	ld	15,-8*17(4)
1768	ld	16,-8*16(4)
1769	ld	17,-8*15(4)
1770	ld	18,-8*14(4)
1771	ld	19,-8*13(4)
1772	ld	20,-8*12(4)
1773	ld	21,-8*11(4)
1774	ld	22,-8*10(4)
1775	ld	23,-8*9(4)
1776	ld	24,-8*8(4)
1777	ld	25,-8*7(4)
1778	ld	26,-8*6(4)
1779	ld	27,-8*5(4)
1780	ld	28,-8*4(4)
1781	ld	29,-8*3(4)
1782	ld	30,-8*2(4)
1783	ld	31,-8*1(4)
1784	mr	1,4
1785	blr
1786.long	0
1787.byte	0,12,4,0x20,0x80,18,6,0
1788.long	0
1789.size	__bn_sqr8x_mont,.-__bn_sqr8x_mont
1790.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1791.align	2
1792