xref: /freebsd/sys/crypto/openssl/powerpc/ppc-mont.S (revision 2b8331622f0b212cf3bb4fc4914a501e5321d506)
1/* $FreeBSD$ */
2/* Do not modify. This file is auto-generated from ppc-mont.pl. */
3.machine	"any"
4.text
5
6.globl	bn_mul_mont_int
7.type	bn_mul_mont_int,@function
8.align	5
9bn_mul_mont_int:
10	mr	9,3
11	li	3,0
12	cmpwi	8,32
13	bgelr
14	slwi	8,8,2
15	li	12,-4096
16	addi	3,8,256
17	subf	3,3,1
18	and	3,3,12
19	subf	3,1,3
20	mr	12,1
21	srwi	8,8,2
22	stwux	1,1,3
23
24	stw	20,-48(12)
25	stw	21,-44(12)
26	stw	22,-40(12)
27	stw	23,-36(12)
28	stw	24,-32(12)
29	stw	25,-28(12)
30	stw	26,-24(12)
31	stw	27,-20(12)
32	stw	28,-16(12)
33	stw	29,-12(12)
34	stw	30,-8(12)
35	stw	31,-4(12)
36
37	lwz	7,0(7)
38	addi	8,8,-2
39
40	lwz	23,0(5)
41	lwz	10,0(4)
42	addi	22,1,32
43	mullw	25,10,23
44	mulhwu	26,10,23
45
46	lwz	10,4(4)
47	lwz	11,0(6)
48
49	mullw	24,25,7
50
51	mullw	29,10,23
52	mulhwu	30,10,23
53
54	mullw	27,11,24
55	mulhwu	28,11,24
56	lwz	11,4(6)
57	addc	27,27,25
58	addze	28,28
59
60	mullw	31,11,24
61	mulhwu	0,11,24
62
63	mtctr	8
64	li	21,8
65.align	4
66.L1st:
67	lwzx	10,4,21
68	addc	25,29,26
69	lwzx	11,6,21
70	addze	26,30
71	mullw	29,10,23
72	addc	27,31,28
73	mulhwu	30,10,23
74	addze	28,0
75	mullw	31,11,24
76	addc	27,27,25
77	mulhwu	0,11,24
78	addze	28,28
79	stw	27,0(22)
80
81	addi	21,21,4
82	addi	22,22,4
83	bdnz	.L1st
84
85	addc	25,29,26
86	addze	26,30
87
88	addc	27,31,28
89	addze	28,0
90	addc	27,27,25
91	addze	28,28
92	stw	27,0(22)
93
94	li	3,0
95	addc	28,28,26
96	addze	3,3
97	stw	28,4(22)
98
99	li	20,4
100.align	4
101.Louter:
102	lwzx	23,5,20
103	lwz	10,0(4)
104	addi	22,1,32
105	lwz	12,32(1)
106	mullw	25,10,23
107	mulhwu	26,10,23
108	lwz	10,4(4)
109	lwz	11,0(6)
110	addc	25,25,12
111	mullw	29,10,23
112	addze	26,26
113	mullw	24,25,7
114	mulhwu	30,10,23
115	mullw	27,11,24
116	mulhwu	28,11,24
117	lwz	11,4(6)
118	addc	27,27,25
119	mullw	31,11,24
120	addze	28,28
121	mulhwu	0,11,24
122
123	mtctr	8
124	li	21,8
125.align	4
126.Linner:
127	lwzx	10,4,21
128	addc	25,29,26
129	lwz	12,4(22)
130	addze	26,30
131	lwzx	11,6,21
132	addc	27,31,28
133	mullw	29,10,23
134	addze	28,0
135	mulhwu	30,10,23
136	addc	25,25,12
137	mullw	31,11,24
138	addze	26,26
139	mulhwu	0,11,24
140	addc	27,27,25
141	addi	21,21,4
142	addze	28,28
143	stw	27,0(22)
144	addi	22,22,4
145	bdnz	.Linner
146
147	lwz	12,4(22)
148	addc	25,29,26
149	addze	26,30
150	addc	25,25,12
151	addze	26,26
152
153	addc	27,31,28
154	addze	28,0
155	addc	27,27,25
156	addze	28,28
157	stw	27,0(22)
158
159	addic	3,3,-1
160	li	3,0
161	adde	28,28,26
162	addze	3,3
163	stw	28,4(22)
164
165	slwi	12,8,2
166	.long	0x7c146040
167	addi	20,20,4
168	ble	.Louter
169
170	addi	8,8,2
171	subfc	21,21,21
172	addi	22,1,32
173	mtctr	8
174
175.align	4
176.Lsub:	lwzx	12,22,21
177	lwzx	11,6,21
178	subfe	10,11,12
179	stwx	10,9,21
180	addi	21,21,4
181	bdnz	.Lsub
182
183	li	21,0
184	mtctr	8
185	subfe	3,21,3
186
187.align	4
188.Lcopy:
189	lwzx	12,22,21
190	lwzx	10,9,21
191	and	12,12,3
192	andc	10,10,3
193	stwx	21,22,21
194	or	10,10,12
195	stwx	10,9,21
196	addi	21,21,4
197	bdnz	.Lcopy
198
199	lwz	12,0(1)
200	li	3,1
201	lwz	20,-48(12)
202	lwz	21,-44(12)
203	lwz	22,-40(12)
204	lwz	23,-36(12)
205	lwz	24,-32(12)
206	lwz	25,-28(12)
207	lwz	26,-24(12)
208	lwz	27,-20(12)
209	lwz	28,-16(12)
210	lwz	29,-12(12)
211	lwz	30,-8(12)
212	lwz	31,-4(12)
213	mr	1,12
214	blr
215.long	0
216.byte	0,12,4,0,0x80,12,6,0
217.long	0
218.size	bn_mul_mont_int,.-bn_mul_mont_int
219.globl	bn_mul4x_mont_int
220.type	bn_mul4x_mont_int,@function
221.align	5
222bn_mul4x_mont_int:
223	andi.	0,8,7
224	bne	.Lmul4x_do
225	.long	0x7c042840
226	bne	.Lmul4x_do
227	b	.Lsqr8x_do
228.Lmul4x_do:
229	slwi	8,8,2
230	mr	9,1
231	li	10,-32*4
232	sub	10,10,8
233	stwux	1,1,10
234
235	stw	14,-4*18(9)
236	stw	15,-4*17(9)
237	stw	16,-4*16(9)
238	stw	17,-4*15(9)
239	stw	18,-4*14(9)
240	stw	19,-4*13(9)
241	stw	20,-4*12(9)
242	stw	21,-4*11(9)
243	stw	22,-4*10(9)
244	stw	23,-4*9(9)
245	stw	24,-4*8(9)
246	stw	25,-4*7(9)
247	stw	26,-4*6(9)
248	stw	27,-4*5(9)
249	stw	28,-4*4(9)
250	stw	29,-4*3(9)
251	stw	30,-4*2(9)
252	stw	31,-4*1(9)
253
254	subi	4,4,4
255	subi	6,6,4
256	subi	3,3,4
257	lwz	7,0(7)
258
259	add	14,5,8
260	add	30,4,8
261	subi	14,14,4*4
262
263	lwz	27,4*0(5)
264	li	22,0
265	lwz	9,4*1(4)
266	li	23,0
267	lwz	10,4*2(4)
268	li	24,0
269	lwz	11,4*3(4)
270	li	25,0
271	lwzu	12,4*4(4)
272	lwz	18,4*1(6)
273	lwz	19,4*2(6)
274	lwz	20,4*3(6)
275	lwzu	21,4*4(6)
276
277	stw	3,4*6(1)
278	stw	14,4*7(1)
279	li	3,0
280	addic	29,1,4*7
281	li	31,0
282	li	0,0
283	b	.Loop_mul4x_1st_reduction
284
285.align	5
286.Loop_mul4x_1st_reduction:
287	mullw	14,9,27
288	addze	3,3
289	mullw	15,10,27
290	addi	31,31,4
291	mullw	16,11,27
292	andi.	31,31,4*4-1
293	mullw	17,12,27
294	addc	22,22,14
295	mulhwu	14,9,27
296	adde	23,23,15
297	mulhwu	15,10,27
298	adde	24,24,16
299	mullw	28,22,7
300	adde	25,25,17
301	mulhwu	16,11,27
302	addze	26,0
303	mulhwu	17,12,27
304	lwzx	27,5,31
305	addc	23,23,14
306
307	stwu	28,4(29)
308	adde	24,24,15
309	mullw	15,19,28
310	adde	25,25,16
311	mullw	16,20,28
312	adde	26,26,17
313	mullw	17,21,28
314
315
316
317
318
319
320
321
322
323
324	addic	22,22,-1
325	mulhwu	14,18,28
326	adde	22,23,15
327	mulhwu	15,19,28
328	adde	23,24,16
329	mulhwu	16,20,28
330	adde	24,25,17
331	mulhwu	17,21,28
332	adde	25,26,3
333	addze	3,0
334	addc	22,22,14
335	adde	23,23,15
336	adde	24,24,16
337	adde	25,25,17
338
339	bne	.Loop_mul4x_1st_reduction
340
341	.long	0x7c1e2040
342	beq	.Lmul4x4_post_condition
343
344	lwz	9,4*1(4)
345	lwz	10,4*2(4)
346	lwz	11,4*3(4)
347	lwzu	12,4*4(4)
348	lwz	28,4*8(1)
349	lwz	18,4*1(6)
350	lwz	19,4*2(6)
351	lwz	20,4*3(6)
352	lwzu	21,4*4(6)
353	b	.Loop_mul4x_1st_tail
354
355.align	5
356.Loop_mul4x_1st_tail:
357	mullw	14,9,27
358	addze	3,3
359	mullw	15,10,27
360	addi	31,31,4
361	mullw	16,11,27
362	andi.	31,31,4*4-1
363	mullw	17,12,27
364	addc	22,22,14
365	mulhwu	14,9,27
366	adde	23,23,15
367	mulhwu	15,10,27
368	adde	24,24,16
369	mulhwu	16,11,27
370	adde	25,25,17
371	mulhwu	17,12,27
372	addze	26,0
373	lwzx	27,5,31
374	addc	23,23,14
375	mullw	14,18,28
376	adde	24,24,15
377	mullw	15,19,28
378	adde	25,25,16
379	mullw	16,20,28
380	adde	26,26,17
381	mullw	17,21,28
382	addc	22,22,14
383	mulhwu	14,18,28
384	adde	23,23,15
385	mulhwu	15,19,28
386	adde	24,24,16
387	mulhwu	16,20,28
388	adde	25,25,17
389	adde	26,26,3
390	mulhwu	17,21,28
391	addze	3,0
392	addi	28,1,4*8
393	lwzx	28,28,31
394	stwu	22,4(29)
395	addc	22,23,14
396	adde	23,24,15
397	adde	24,25,16
398	adde	25,26,17
399
400	bne	.Loop_mul4x_1st_tail
401
402	sub	15,30,8
403	.long	0x7c1e2040
404	beq	.Lmul4x_proceed
405
406	lwz	9,4*1(4)
407	lwz	10,4*2(4)
408	lwz	11,4*3(4)
409	lwzu	12,4*4(4)
410	lwz	18,4*1(6)
411	lwz	19,4*2(6)
412	lwz	20,4*3(6)
413	lwzu	21,4*4(6)
414	b	.Loop_mul4x_1st_tail
415
416.align	5
417.Lmul4x_proceed:
418	lwzu	27,4*4(5)
419	addze	3,3
420	lwz	9,4*1(15)
421	lwz	10,4*2(15)
422	lwz	11,4*3(15)
423	lwz	12,4*4(15)
424	addi	4,15,4*4
425	sub	6,6,8
426
427	stw	22,4*1(29)
428	stw	23,4*2(29)
429	stw	24,4*3(29)
430	stw	25,4*4(29)
431	stw	3,4*5(29)
432	lwz	22,4*12(1)
433	lwz	23,4*13(1)
434	lwz	24,4*14(1)
435	lwz	25,4*15(1)
436
437	lwz	18,4*1(6)
438	lwz	19,4*2(6)
439	lwz	20,4*3(6)
440	lwzu	21,4*4(6)
441	addic	29,1,4*7
442	li	3,0
443	b	.Loop_mul4x_reduction
444
445.align	5
446.Loop_mul4x_reduction:
447	mullw	14,9,27
448	addze	3,3
449	mullw	15,10,27
450	addi	31,31,4
451	mullw	16,11,27
452	andi.	31,31,4*4-1
453	mullw	17,12,27
454	addc	22,22,14
455	mulhwu	14,9,27
456	adde	23,23,15
457	mulhwu	15,10,27
458	adde	24,24,16
459	mullw	28,22,7
460	adde	25,25,17
461	mulhwu	16,11,27
462	addze	26,0
463	mulhwu	17,12,27
464	lwzx	27,5,31
465	addc	23,23,14
466
467	stwu	28,4(29)
468	adde	24,24,15
469	mullw	15,19,28
470	adde	25,25,16
471	mullw	16,20,28
472	adde	26,26,17
473	mullw	17,21,28
474
475	addic	22,22,-1
476	mulhwu	14,18,28
477	adde	22,23,15
478	mulhwu	15,19,28
479	adde	23,24,16
480	mulhwu	16,20,28
481	adde	24,25,17
482	mulhwu	17,21,28
483	adde	25,26,3
484	addze	3,0
485	addc	22,22,14
486	adde	23,23,15
487	adde	24,24,16
488	adde	25,25,17
489
490	bne	.Loop_mul4x_reduction
491
492	lwz	14,4*5(29)
493	addze	3,3
494	lwz	15,4*6(29)
495	lwz	16,4*7(29)
496	lwz	17,4*8(29)
497	lwz	9,4*1(4)
498	lwz	10,4*2(4)
499	lwz	11,4*3(4)
500	lwzu	12,4*4(4)
501	addc	22,22,14
502	adde	23,23,15
503	adde	24,24,16
504	adde	25,25,17
505
506
507	lwz	28,4*8(1)
508	lwz	18,4*1(6)
509	lwz	19,4*2(6)
510	lwz	20,4*3(6)
511	lwzu	21,4*4(6)
512	b	.Loop_mul4x_tail
513
514.align	5
515.Loop_mul4x_tail:
516	mullw	14,9,27
517	addze	3,3
518	mullw	15,10,27
519	addi	31,31,4
520	mullw	16,11,27
521	andi.	31,31,4*4-1
522	mullw	17,12,27
523	addc	22,22,14
524	mulhwu	14,9,27
525	adde	23,23,15
526	mulhwu	15,10,27
527	adde	24,24,16
528	mulhwu	16,11,27
529	adde	25,25,17
530	mulhwu	17,12,27
531	addze	26,0
532	lwzx	27,5,31
533	addc	23,23,14
534	mullw	14,18,28
535	adde	24,24,15
536	mullw	15,19,28
537	adde	25,25,16
538	mullw	16,20,28
539	adde	26,26,17
540	mullw	17,21,28
541	addc	22,22,14
542	mulhwu	14,18,28
543	adde	23,23,15
544	mulhwu	15,19,28
545	adde	24,24,16
546	mulhwu	16,20,28
547	adde	25,25,17
548	mulhwu	17,21,28
549	adde	26,26,3
550	addi	28,1,4*8
551	lwzx	28,28,31
552	addze	3,0
553	stwu	22,4(29)
554	addc	22,23,14
555	adde	23,24,15
556	adde	24,25,16
557	adde	25,26,17
558
559	bne	.Loop_mul4x_tail
560
561	lwz	14,4*5(29)
562	sub	15,6,8
563	addze	3,3
564	.long	0x7c1e2040
565	beq	.Loop_mul4x_break
566
567	lwz	15,4*6(29)
568	lwz	16,4*7(29)
569	lwz	17,4*8(29)
570	lwz	9,4*1(4)
571	lwz	10,4*2(4)
572	lwz	11,4*3(4)
573	lwzu	12,4*4(4)
574	addc	22,22,14
575	adde	23,23,15
576	adde	24,24,16
577	adde	25,25,17
578
579
580	lwz	18,4*1(6)
581	lwz	19,4*2(6)
582	lwz	20,4*3(6)
583	lwzu	21,4*4(6)
584	b	.Loop_mul4x_tail
585
586.align	5
587.Loop_mul4x_break:
588	lwz	16,4*6(1)
589	lwz	17,4*7(1)
590	addc	9,22,14
591	lwz	22,4*12(1)
592	addze	10,23
593	lwz	23,4*13(1)
594	addze	11,24
595	lwz	24,4*14(1)
596	addze	12,25
597	lwz	25,4*15(1)
598	addze	3,3
599	stw	9,4*1(29)
600	sub	4,30,8
601	stw	10,4*2(29)
602	stw	11,4*3(29)
603	stw	12,4*4(29)
604	stw	3,4*5(29)
605
606	lwz	18,4*1(15)
607	lwz	19,4*2(15)
608	lwz	20,4*3(15)
609	lwz	21,4*4(15)
610	addi	6,15,4*4
611	.long	0x7c058840
612	beq	.Lmul4x_post
613
614	lwzu	27,4*4(5)
615	lwz	9,4*1(4)
616	lwz	10,4*2(4)
617	lwz	11,4*3(4)
618	lwzu	12,4*4(4)
619	li	3,0
620	addic	29,1,4*7
621	b	.Loop_mul4x_reduction
622
623.align	5
624.Lmul4x_post:
625
626
627
628
629	srwi	31,8,4
630	mr	5,16
631	subi	31,31,1
632	mr	30,16
633	subfc	14,18,22
634	addi	29,1,4*15
635	subfe	15,19,23
636
637	mtctr	31
638.Lmul4x_sub:
639	lwz	18,4*1(6)
640	lwz	22,4*1(29)
641	subfe	16,20,24
642	lwz	19,4*2(6)
643	lwz	23,4*2(29)
644	subfe	17,21,25
645	lwz	20,4*3(6)
646	lwz	24,4*3(29)
647	lwzu	21,4*4(6)
648	lwzu	25,4*4(29)
649	stw	14,4*1(5)
650	stw	15,4*2(5)
651	subfe	14,18,22
652	stw	16,4*3(5)
653	stwu	17,4*4(5)
654	subfe	15,19,23
655	bdnz	.Lmul4x_sub
656
657	lwz	9,4*1(30)
658	stw	14,4*1(5)
659	lwz	14,4*12(1)
660	subfe	16,20,24
661	lwz	10,4*2(30)
662	stw	15,4*2(5)
663	lwz	15,4*13(1)
664	subfe	17,21,25
665	subfe	3,0,3
666	addi	29,1,4*12
667	lwz	11,4*3(30)
668	stw	16,4*3(5)
669	lwz	16,4*14(1)
670	lwz	12,4*4(30)
671	stw	17,4*4(5)
672	lwz	17,4*15(1)
673
674	mtctr	31
675.Lmul4x_cond_copy:
676	and	14,14,3
677	andc	9,9,3
678	stw	0,4*0(29)
679	and	15,15,3
680	andc	10,10,3
681	stw	0,4*1(29)
682	and	16,16,3
683	andc	11,11,3
684	stw	0,4*2(29)
685	and	17,17,3
686	andc	12,12,3
687	stw	0,4*3(29)
688	or	22,14,9
689	lwz	9,4*5(30)
690	lwz	14,4*4(29)
691	or	23,15,10
692	lwz	10,4*6(30)
693	lwz	15,4*5(29)
694	or	24,16,11
695	lwz	11,4*7(30)
696	lwz	16,4*6(29)
697	or	25,17,12
698	lwz	12,4*8(30)
699	lwz	17,4*7(29)
700	addi	29,29,4*4
701	stw	22,4*1(30)
702	stw	23,4*2(30)
703	stw	24,4*3(30)
704	stwu	25,4*4(30)
705	bdnz	.Lmul4x_cond_copy
706
707	lwz	5,0(1)
708	and	14,14,3
709	andc	9,9,3
710	stw	0,4*0(29)
711	and	15,15,3
712	andc	10,10,3
713	stw	0,4*1(29)
714	and	16,16,3
715	andc	11,11,3
716	stw	0,4*2(29)
717	and	17,17,3
718	andc	12,12,3
719	stw	0,4*3(29)
720	or	22,14,9
721	or	23,15,10
722	stw	0,4*4(29)
723	or	24,16,11
724	or	25,17,12
725	stw	22,4*1(30)
726	stw	23,4*2(30)
727	stw	24,4*3(30)
728	stw	25,4*4(30)
729
730	b	.Lmul4x_done
731
732.align	4
733.Lmul4x4_post_condition:
734	lwz	4,4*6(1)
735	lwz	5,0(1)
736	addze	3,3
737
738	subfc	9,18,22
739	subfe	10,19,23
740	subfe	11,20,24
741	subfe	12,21,25
742	subfe	3,0,3
743
744	and	18,18,3
745	and	19,19,3
746	addc	9,9,18
747	and	20,20,3
748	adde	10,10,19
749	and	21,21,3
750	adde	11,11,20
751	adde	12,12,21
752
753	stw	9,4*1(4)
754	stw	10,4*2(4)
755	stw	11,4*3(4)
756	stw	12,4*4(4)
757
758.Lmul4x_done:
759	stw	0,4*8(1)
760	stw	0,4*9(1)
761	stw	0,4*10(1)
762	stw	0,4*11(1)
763	li	3,1
764	lwz	14,-4*18(5)
765	lwz	15,-4*17(5)
766	lwz	16,-4*16(5)
767	lwz	17,-4*15(5)
768	lwz	18,-4*14(5)
769	lwz	19,-4*13(5)
770	lwz	20,-4*12(5)
771	lwz	21,-4*11(5)
772	lwz	22,-4*10(5)
773	lwz	23,-4*9(5)
774	lwz	24,-4*8(5)
775	lwz	25,-4*7(5)
776	lwz	26,-4*6(5)
777	lwz	27,-4*5(5)
778	lwz	28,-4*4(5)
779	lwz	29,-4*3(5)
780	lwz	30,-4*2(5)
781	lwz	31,-4*1(5)
782	mr	1,5
783	blr
784.long	0
785.byte	0,12,4,0x20,0x80,18,6,0
786.long	0
787.size	bn_mul4x_mont_int,.-bn_mul4x_mont_int
788.align	5
789__bn_sqr8x_mont:
790.Lsqr8x_do:
791	mr	9,1
792	slwi	10,8,3
793	li	11,-32*4
794	sub	10,11,10
795	slwi	8,8,2
796	stwux	1,1,10
797
798	stw	14,-4*18(9)
799	stw	15,-4*17(9)
800	stw	16,-4*16(9)
801	stw	17,-4*15(9)
802	stw	18,-4*14(9)
803	stw	19,-4*13(9)
804	stw	20,-4*12(9)
805	stw	21,-4*11(9)
806	stw	22,-4*10(9)
807	stw	23,-4*9(9)
808	stw	24,-4*8(9)
809	stw	25,-4*7(9)
810	stw	26,-4*6(9)
811	stw	27,-4*5(9)
812	stw	28,-4*4(9)
813	stw	29,-4*3(9)
814	stw	30,-4*2(9)
815	stw	31,-4*1(9)
816
817	subi	4,4,4
818	subi	18,6,4
819	subi	3,3,4
820	lwz	7,0(7)
821	li	0,0
822
823	add	6,4,8
824	lwz	9,4*1(4)
825
826	lwz	10,4*2(4)
827	li	23,0
828	lwz	11,4*3(4)
829	li	24,0
830	lwz	12,4*4(4)
831	li	25,0
832	lwz	14,4*5(4)
833	li	26,0
834	lwz	15,4*6(4)
835	li	27,0
836	lwz	16,4*7(4)
837	li	28,0
838	lwzu	17,4*8(4)
839	li	29,0
840
841	addi	5,1,4*11
842	subic.	30,8,4*8
843	b	.Lsqr8x_zero_start
844
845.align	5
846.Lsqr8x_zero:
847	subic.	30,30,4*8
848	stw	0,4*1(5)
849	stw	0,4*2(5)
850	stw	0,4*3(5)
851	stw	0,4*4(5)
852	stw	0,4*5(5)
853	stw	0,4*6(5)
854	stw	0,4*7(5)
855	stw	0,4*8(5)
856.Lsqr8x_zero_start:
857	stw	0,4*9(5)
858	stw	0,4*10(5)
859	stw	0,4*11(5)
860	stw	0,4*12(5)
861	stw	0,4*13(5)
862	stw	0,4*14(5)
863	stw	0,4*15(5)
864	stwu	0,4*16(5)
865	bne	.Lsqr8x_zero
866
867	stw	3,4*6(1)
868	stw	18,4*7(1)
869	stw	7,4*8(1)
870	stw	5,4*9(1)
871	stw	0,4*10(1)
872	addi	5,1,4*11
873
874
875.align	5
876.Lsqr8x_outer_loop:
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906	mullw	18,10,9
907	mullw	19,11,9
908	mullw	20,12,9
909	mullw	21,14,9
910	addc	23,23,18
911	mullw	18,15,9
912	adde	24,24,19
913	mullw	19,16,9
914	adde	25,25,20
915	mullw	20,17,9
916	adde	26,26,21
917	mulhwu	21,10,9
918	adde	27,27,18
919	mulhwu	18,11,9
920	adde	28,28,19
921	mulhwu	19,12,9
922	adde	29,29,20
923	mulhwu	20,14,9
924	stw	22,4*1(5)
925	addze	22,0
926	stw	23,4*2(5)
927	addc	24,24,21
928	mulhwu	21,15,9
929	adde	25,25,18
930	mulhwu	18,16,9
931	adde	26,26,19
932	mulhwu	19,17,9
933	adde	27,27,20
934	mullw	20,11,10
935	adde	28,28,21
936	mullw	21,12,10
937	adde	29,29,18
938	mullw	18,14,10
939	adde	22,22,19
940
941	mullw	19,15,10
942	addc	25,25,20
943	mullw	20,16,10
944	adde	26,26,21
945	mullw	21,17,10
946	adde	27,27,18
947	mulhwu	18,11,10
948	adde	28,28,19
949	mulhwu	19,12,10
950	adde	29,29,20
951	mulhwu	20,14,10
952	adde	22,22,21
953	mulhwu	21,15,10
954	stw	24,4*3(5)
955	addze	23,0
956	stw	25,4*4(5)
957	addc	26,26,18
958	mulhwu	18,16,10
959	adde	27,27,19
960	mulhwu	19,17,10
961	adde	28,28,20
962	mullw	20,12,11
963	adde	29,29,21
964	mullw	21,14,11
965	adde	22,22,18
966	mullw	18,15,11
967	adde	23,23,19
968
969	mullw	19,16,11
970	addc	27,27,20
971	mullw	20,17,11
972	adde	28,28,21
973	mulhwu	21,12,11
974	adde	29,29,18
975	mulhwu	18,14,11
976	adde	22,22,19
977	mulhwu	19,15,11
978	adde	23,23,20
979	mulhwu	20,16,11
980	stw	26,4*5(5)
981	addze	24,0
982	stw	27,4*6(5)
983	addc	28,28,21
984	mulhwu	21,17,11
985	adde	29,29,18
986	mullw	18,14,12
987	adde	22,22,19
988	mullw	19,15,12
989	adde	23,23,20
990	mullw	20,16,12
991	adde	24,24,21
992
993	mullw	21,17,12
994	addc	29,29,18
995	mulhwu	18,14,12
996	adde	22,22,19
997	mulhwu	19,15,12
998	adde	23,23,20
999	mulhwu	20,16,12
1000	adde	24,24,21
1001	mulhwu	21,17,12
1002	stw	28,4*7(5)
1003	addze	25,0
1004	stwu	29,4*8(5)
1005	addc	22,22,18
1006	mullw	18,15,14
1007	adde	23,23,19
1008	mullw	19,16,14
1009	adde	24,24,20
1010	mullw	20,17,14
1011	adde	25,25,21
1012
1013	mulhwu	21,15,14
1014	addc	23,23,18
1015	mulhwu	18,16,14
1016	adde	24,24,19
1017	mulhwu	19,17,14
1018	adde	25,25,20
1019	mullw	20,16,15
1020	addze	26,0
1021	addc	24,24,21
1022	mullw	21,17,15
1023	adde	25,25,18
1024	mulhwu	18,16,15
1025	adde	26,26,19
1026
1027	mulhwu	19,17,15
1028	addc	25,25,20
1029	mullw	20,17,16
1030	adde	26,26,21
1031	mulhwu	21,17,16
1032	addze	27,0
1033	addc	26,26,18
1034	.long	0x7c062040
1035	adde	27,27,19
1036
1037	addc	27,27,20
1038	sub	18,6,8
1039	addze	28,0
1040	add	28,28,21
1041
1042	beq	.Lsqr8x_outer_break
1043
1044	mr	7,9
1045	lwz	9,4*1(5)
1046	lwz	10,4*2(5)
1047	lwz	11,4*3(5)
1048	lwz	12,4*4(5)
1049	lwz	14,4*5(5)
1050	lwz	15,4*6(5)
1051	lwz	16,4*7(5)
1052	lwz	17,4*8(5)
1053	addc	22,22,9
1054	lwz	9,4*1(4)
1055	adde	23,23,10
1056	lwz	10,4*2(4)
1057	adde	24,24,11
1058	lwz	11,4*3(4)
1059	adde	25,25,12
1060	lwz	12,4*4(4)
1061	adde	26,26,14
1062	lwz	14,4*5(4)
1063	adde	27,27,15
1064	lwz	15,4*6(4)
1065	adde	28,28,16
1066	lwz	16,4*7(4)
1067	subi	3,4,4*7
1068	addze	29,17
1069	lwzu	17,4*8(4)
1070
1071	li	30,0
1072	b	.Lsqr8x_mul
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096.align	5
1097.Lsqr8x_mul:
1098	mullw	18,9,7
1099	addze	31,0
1100	mullw	19,10,7
1101	addi	30,30,4
1102	mullw	20,11,7
1103	andi.	30,30,4*8-1
1104	mullw	21,12,7
1105	addc	22,22,18
1106	mullw	18,14,7
1107	adde	23,23,19
1108	mullw	19,15,7
1109	adde	24,24,20
1110	mullw	20,16,7
1111	adde	25,25,21
1112	mullw	21,17,7
1113	adde	26,26,18
1114	mulhwu	18,9,7
1115	adde	27,27,19
1116	mulhwu	19,10,7
1117	adde	28,28,20
1118	mulhwu	20,11,7
1119	adde	29,29,21
1120	mulhwu	21,12,7
1121	addze	31,31
1122	stwu	22,4(5)
1123	addc	22,23,18
1124	mulhwu	18,14,7
1125	adde	23,24,19
1126	mulhwu	19,15,7
1127	adde	24,25,20
1128	mulhwu	20,16,7
1129	adde	25,26,21
1130	mulhwu	21,17,7
1131	lwzx	7,3,30
1132	adde	26,27,18
1133	adde	27,28,19
1134	adde	28,29,20
1135	adde	29,31,21
1136
1137	bne	.Lsqr8x_mul
1138
1139
1140	.long	0x7c043040
1141	beq	.Lsqr8x_break
1142
1143	lwz	9,4*1(5)
1144	lwz	10,4*2(5)
1145	lwz	11,4*3(5)
1146	lwz	12,4*4(5)
1147	lwz	14,4*5(5)
1148	lwz	15,4*6(5)
1149	lwz	16,4*7(5)
1150	lwz	17,4*8(5)
1151	addc	22,22,9
1152	lwz	9,4*1(4)
1153	adde	23,23,10
1154	lwz	10,4*2(4)
1155	adde	24,24,11
1156	lwz	11,4*3(4)
1157	adde	25,25,12
1158	lwz	12,4*4(4)
1159	adde	26,26,14
1160	lwz	14,4*5(4)
1161	adde	27,27,15
1162	lwz	15,4*6(4)
1163	adde	28,28,16
1164	lwz	16,4*7(4)
1165	adde	29,29,17
1166	lwzu	17,4*8(4)
1167
1168	b	.Lsqr8x_mul
1169
1170.align	5
1171.Lsqr8x_break:
1172	lwz	9,4*8(3)
1173	addi	4,3,4*15
1174	lwz	10,4*9(3)
1175	sub.	18,6,4
1176	lwz	11,4*10(3)
1177	sub	19,5,18
1178	lwz	12,4*11(3)
1179	lwz	14,4*12(3)
1180	lwz	15,4*13(3)
1181	lwz	16,4*14(3)
1182	lwz	17,4*15(3)
1183	beq	.Lsqr8x_outer_loop
1184
1185	stw	22,4*1(5)
1186	lwz	22,4*1(19)
1187	stw	23,4*2(5)
1188	lwz	23,4*2(19)
1189	stw	24,4*3(5)
1190	lwz	24,4*3(19)
1191	stw	25,4*4(5)
1192	lwz	25,4*4(19)
1193	stw	26,4*5(5)
1194	lwz	26,4*5(19)
1195	stw	27,4*6(5)
1196	lwz	27,4*6(19)
1197	stw	28,4*7(5)
1198	lwz	28,4*7(19)
1199	stw	29,4*8(5)
1200	lwz	29,4*8(19)
1201	mr	5,19
1202	b	.Lsqr8x_outer_loop
1203
1204.align	5
1205.Lsqr8x_outer_break:
1206
1207
1208	lwz	10,4*1(18)
1209	lwz	12,4*2(18)
1210	lwz	15,4*3(18)
1211	lwz	17,4*4(18)
1212	addi	4,18,4*4
1213
1214	lwz	19,4*13(1)
1215	lwz	20,4*14(1)
1216	lwz	21,4*15(1)
1217	lwz	18,4*16(1)
1218
1219	stw	22,4*1(5)
1220	srwi	30,8,4
1221	stw	23,4*2(5)
1222	subi	30,30,1
1223	stw	24,4*3(5)
1224	stw	25,4*4(5)
1225	stw	26,4*5(5)
1226	stw	27,4*6(5)
1227	stw	28,4*7(5)
1228
1229	addi	5,1,4*11
1230	mullw	22,10,10
1231	mulhwu	10,10,10
1232	add	23,19,19
1233	srwi	19,19,32-1
1234	mullw	11,12,12
1235	mulhwu	12,12,12
1236	addc	23,23,10
1237	add	24,20,20
1238	srwi	20,20,32-1
1239	add	25,21,21
1240	srwi	21,21,32-1
1241	or	24,24,19
1242
1243	mtctr	30
1244.Lsqr4x_shift_n_add:
1245	mullw	14,15,15
1246	mulhwu	15,15,15
1247	lwz	19,4*6(5)
1248	lwz	10,4*1(4)
1249	adde	24,24,11
1250	add	26,18,18
1251	srwi	18,18,32-1
1252	or	25,25,20
1253	lwz	20,4*7(5)
1254	adde	25,25,12
1255	lwz	12,4*2(4)
1256	add	27,19,19
1257	srwi	19,19,32-1
1258	or	26,26,21
1259	lwz	21,4*8(5)
1260	mullw	16,17,17
1261	mulhwu	17,17,17
1262	adde	26,26,14
1263	add	28,20,20
1264	srwi	20,20,32-1
1265	or	27,27,18
1266	lwz	18,4*9(5)
1267	adde	27,27,15
1268	lwz	15,4*3(4)
1269	add	29,21,21
1270	srwi	21,21,32-1
1271	or	28,28,19
1272	lwz	19,4*10(5)
1273	mullw	9,10,10
1274	mulhwu	10,10,10
1275	adde	28,28,16
1276	stw	22,4*1(5)
1277	add	22,18,18
1278	srwi	18,18,32-1
1279	or	29,29,20
1280	lwz	20,4*11(5)
1281	adde	29,29,17
1282	lwzu	17,4*4(4)
1283	stw	23,4*2(5)
1284	add	23,19,19
1285	srwi	19,19,32-1
1286	or	22,22,21
1287	lwz	21,4*12(5)
1288	mullw	11,12,12
1289	mulhwu	12,12,12
1290	adde	22,22,9
1291	stw	24,4*3(5)
1292	add	24,20,20
1293	srwi	20,20,32-1
1294	or	23,23,18
1295	lwz	18,4*13(5)
1296	adde	23,23,10
1297	stw	25,4*4(5)
1298	stw	26,4*5(5)
1299	stw	27,4*6(5)
1300	stw	28,4*7(5)
1301	stwu	29,4*8(5)
1302	add	25,21,21
1303	srwi	21,21,32-1
1304	or	24,24,19
1305	bdnz	.Lsqr4x_shift_n_add
1306	lwz	4,4*7(1)
1307	lwz	7,4*8(1)
1308
1309	mullw	14,15,15
1310	mulhwu	15,15,15
1311	stw	22,4*1(5)
1312	lwz	22,4*12(1)
1313	lwz	19,4*6(5)
1314	adde	24,24,11
1315	add	26,18,18
1316	srwi	18,18,32-1
1317	or	25,25,20
1318	lwz	20,4*7(5)
1319	adde	25,25,12
1320	add	27,19,19
1321	srwi	19,19,32-1
1322	or	26,26,21
1323	mullw	16,17,17
1324	mulhwu	17,17,17
1325	adde	26,26,14
1326	add	28,20,20
1327	srwi	20,20,32-1
1328	or	27,27,18
1329	stw	23,4*2(5)
1330	lwz	23,4*13(1)
1331	adde	27,27,15
1332	or	28,28,19
1333	lwz	9,4*1(4)
1334	lwz	10,4*2(4)
1335	adde	28,28,16
1336	lwz	11,4*3(4)
1337	lwz	12,4*4(4)
1338	adde	29,17,20
1339	lwz	14,4*5(4)
1340	lwz	15,4*6(4)
1341
1342
1343
1344	mullw	31,7,22
1345	li	30,8
1346	lwz	16,4*7(4)
1347	add	6,4,8
1348	lwzu	17,4*8(4)
1349	stw	24,4*3(5)
1350	lwz	24,4*14(1)
1351	stw	25,4*4(5)
1352	lwz	25,4*15(1)
1353	stw	26,4*5(5)
1354	lwz	26,4*16(1)
1355	stw	27,4*6(5)
1356	lwz	27,4*17(1)
1357	stw	28,4*7(5)
1358	lwz	28,4*18(1)
1359	stw	29,4*8(5)
1360	lwz	29,4*19(1)
1361	addi	5,1,4*11
1362	mtctr	30
1363	b	.Lsqr8x_reduction
1364
1365.align	5
1366.Lsqr8x_reduction:
1367
1368	mullw	19,10,31
1369	mullw	20,11,31
1370	stwu	31,4(5)
1371	mullw	21,12,31
1372
1373	addic	22,22,-1
1374	mullw	18,14,31
1375	adde	22,23,19
1376	mullw	19,15,31
1377	adde	23,24,20
1378	mullw	20,16,31
1379	adde	24,25,21
1380	mullw	21,17,31
1381	adde	25,26,18
1382	mulhwu	18,9,31
1383	adde	26,27,19
1384	mulhwu	19,10,31
1385	adde	27,28,20
1386	mulhwu	20,11,31
1387	adde	28,29,21
1388	mulhwu	21,12,31
1389	addze	29,0
1390	addc	22,22,18
1391	mulhwu	18,14,31
1392	adde	23,23,19
1393	mulhwu	19,15,31
1394	adde	24,24,20
1395	mulhwu	20,16,31
1396	adde	25,25,21
1397	mulhwu	21,17,31
1398	mullw	31,7,22
1399	adde	26,26,18
1400	adde	27,27,19
1401	adde	28,28,20
1402	adde	29,29,21
1403	bdnz	.Lsqr8x_reduction
1404
1405	lwz	18,4*1(5)
1406	lwz	19,4*2(5)
1407	lwz	20,4*3(5)
1408	lwz	21,4*4(5)
1409	subi	3,5,4*7
1410	.long	0x7c062040
1411	addc	22,22,18
1412	lwz	18,4*5(5)
1413	adde	23,23,19
1414	lwz	19,4*6(5)
1415	adde	24,24,20
1416	lwz	20,4*7(5)
1417	adde	25,25,21
1418	lwz	21,4*8(5)
1419	adde	26,26,18
1420	adde	27,27,19
1421	adde	28,28,20
1422	adde	29,29,21
1423
1424	beq	.Lsqr8x8_post_condition
1425
1426	lwz	7,4*0(3)
1427	lwz	9,4*1(4)
1428	lwz	10,4*2(4)
1429	lwz	11,4*3(4)
1430	lwz	12,4*4(4)
1431	lwz	14,4*5(4)
1432	lwz	15,4*6(4)
1433	lwz	16,4*7(4)
1434	lwzu	17,4*8(4)
1435	li	30,0
1436
1437.align	5
1438.Lsqr8x_tail:
1439	mullw	18,9,7
1440	addze	31,0
1441	mullw	19,10,7
1442	addi	30,30,4
1443	mullw	20,11,7
1444	andi.	30,30,4*8-1
1445	mullw	21,12,7
1446	addc	22,22,18
1447	mullw	18,14,7
1448	adde	23,23,19
1449	mullw	19,15,7
1450	adde	24,24,20
1451	mullw	20,16,7
1452	adde	25,25,21
1453	mullw	21,17,7
1454	adde	26,26,18
1455	mulhwu	18,9,7
1456	adde	27,27,19
1457	mulhwu	19,10,7
1458	adde	28,28,20
1459	mulhwu	20,11,7
1460	adde	29,29,21
1461	mulhwu	21,12,7
1462	addze	31,31
1463	stwu	22,4(5)
1464	addc	22,23,18
1465	mulhwu	18,14,7
1466	adde	23,24,19
1467	mulhwu	19,15,7
1468	adde	24,25,20
1469	mulhwu	20,16,7
1470	adde	25,26,21
1471	mulhwu	21,17,7
1472	lwzx	7,3,30
1473	adde	26,27,18
1474	adde	27,28,19
1475	adde	28,29,20
1476	adde	29,31,21
1477
1478	bne	.Lsqr8x_tail
1479
1480
1481	lwz	9,4*1(5)
1482	lwz	31,4*10(1)
1483	.long	0x7c062040
1484	lwz	10,4*2(5)
1485	sub	20,6,8
1486	lwz	11,4*3(5)
1487	lwz	12,4*4(5)
1488	lwz	14,4*5(5)
1489	lwz	15,4*6(5)
1490	lwz	16,4*7(5)
1491	lwz	17,4*8(5)
1492	beq	.Lsqr8x_tail_break
1493
1494	addc	22,22,9
1495	lwz	9,4*1(4)
1496	adde	23,23,10
1497	lwz	10,4*2(4)
1498	adde	24,24,11
1499	lwz	11,4*3(4)
1500	adde	25,25,12
1501	lwz	12,4*4(4)
1502	adde	26,26,14
1503	lwz	14,4*5(4)
1504	adde	27,27,15
1505	lwz	15,4*6(4)
1506	adde	28,28,16
1507	lwz	16,4*7(4)
1508	adde	29,29,17
1509	lwzu	17,4*8(4)
1510
1511	b	.Lsqr8x_tail
1512
1513.align	5
1514.Lsqr8x_tail_break:
1515	lwz	7,4*8(1)
1516	lwz	21,4*9(1)
1517	addi	30,5,4*8
1518
1519	addic	31,31,-1
1520	adde	18,22,9
1521	lwz	22,4*8(3)
1522	lwz	9,4*1(20)
1523	adde	19,23,10
1524	lwz	23,4*9(3)
1525	lwz	10,4*2(20)
1526	adde	24,24,11
1527	lwz	11,4*3(20)
1528	adde	25,25,12
1529	lwz	12,4*4(20)
1530	adde	26,26,14
1531	lwz	14,4*5(20)
1532	adde	27,27,15
1533	lwz	15,4*6(20)
1534	adde	28,28,16
1535	lwz	16,4*7(20)
1536	adde	29,29,17
1537	lwz	17,4*8(20)
1538	addi	4,20,4*8
1539	addze	20,0
1540	mullw	31,7,22
1541	stw	18,4*1(5)
1542	.long	0x7c1ea840
1543	stw	19,4*2(5)
1544	li	30,8
1545	stw	24,4*3(5)
1546	lwz	24,4*10(3)
1547	stw	25,4*4(5)
1548	lwz	25,4*11(3)
1549	stw	26,4*5(5)
1550	lwz	26,4*12(3)
1551	stw	27,4*6(5)
1552	lwz	27,4*13(3)
1553	stw	28,4*7(5)
1554	lwz	28,4*14(3)
1555	stw	29,4*8(5)
1556	lwz	29,4*15(3)
1557	stw	20,4*10(1)
1558	addi	5,3,4*7
1559	mtctr	30
1560	bne	.Lsqr8x_reduction
1561
1562
1563
1564
1565
1566
1567	lwz	3,4*6(1)
1568	srwi	30,8,5
1569	mr	7,5
1570	addi	5,5,4*8
1571	subi	30,30,1
1572	subfc	18,9,22
1573	subfe	19,10,23
1574	mr	31,20
1575	mr	6,3
1576
1577	mtctr	30
1578	b	.Lsqr8x_sub
1579
1580.align	5
1581.Lsqr8x_sub:
1582	lwz	9,4*1(4)
1583	lwz	22,4*1(5)
1584	lwz	10,4*2(4)
1585	lwz	23,4*2(5)
1586	subfe	20,11,24
1587	lwz	11,4*3(4)
1588	lwz	24,4*3(5)
1589	subfe	21,12,25
1590	lwz	12,4*4(4)
1591	lwz	25,4*4(5)
1592	stw	18,4*1(3)
1593	subfe	18,14,26
1594	lwz	14,4*5(4)
1595	lwz	26,4*5(5)
1596	stw	19,4*2(3)
1597	subfe	19,15,27
1598	lwz	15,4*6(4)
1599	lwz	27,4*6(5)
1600	stw	20,4*3(3)
1601	subfe	20,16,28
1602	lwz	16,4*7(4)
1603	lwz	28,4*7(5)
1604	stw	21,4*4(3)
1605	subfe	21,17,29
1606	lwzu	17,4*8(4)
1607	lwzu	29,4*8(5)
1608	stw	18,4*5(3)
1609	subfe	18,9,22
1610	stw	19,4*6(3)
1611	subfe	19,10,23
1612	stw	20,4*7(3)
1613	stwu	21,4*8(3)
1614	bdnz	.Lsqr8x_sub
1615
1616	srwi	30,8,4
1617	lwz	9,4*1(6)
1618	lwz	22,4*1(7)
1619	subi	30,30,1
1620	lwz	10,4*2(6)
1621	lwz	23,4*2(7)
1622	subfe	20,11,24
1623	lwz	11,4*3(6)
1624	lwz	24,4*3(7)
1625	subfe	21,12,25
1626	lwz	12,4*4(6)
1627	lwzu	25,4*4(7)
1628	stw	18,4*1(3)
1629	subfe	18,14,26
1630	stw	19,4*2(3)
1631	subfe	19,15,27
1632	stw	20,4*3(3)
1633	subfe	20,16,28
1634	stw	21,4*4(3)
1635	subfe	21,17,29
1636	stw	18,4*5(3)
1637	subfe	31,0,31
1638	stw	19,4*6(3)
1639	stw	20,4*7(3)
1640	stw	21,4*8(3)
1641
1642	addi	5,1,4*11
1643	mtctr	30
1644
1645.Lsqr4x_cond_copy:
1646	andc	9,9,31
1647	stw	0,-4*3(7)
1648	and	22,22,31
1649	stw	0,-4*2(7)
1650	andc	10,10,31
1651	stw	0,-4*1(7)
1652	and	23,23,31
1653	stw	0,-4*0(7)
1654	andc	11,11,31
1655	stw	0,4*1(5)
1656	and	24,24,31
1657	stw	0,4*2(5)
1658	andc	12,12,31
1659	stw	0,4*3(5)
1660	and	25,25,31
1661	stwu	0,4*4(5)
1662	or	18,9,22
1663	lwz	9,4*5(6)
1664	lwz	22,4*1(7)
1665	or	19,10,23
1666	lwz	10,4*6(6)
1667	lwz	23,4*2(7)
1668	or	20,11,24
1669	lwz	11,4*7(6)
1670	lwz	24,4*3(7)
1671	or	21,12,25
1672	lwz	12,4*8(6)
1673	lwzu	25,4*4(7)
1674	stw	18,4*1(6)
1675	stw	19,4*2(6)
1676	stw	20,4*3(6)
1677	stwu	21,4*4(6)
1678	bdnz	.Lsqr4x_cond_copy
1679
1680	lwz	4,0(1)
1681	andc	9,9,31
1682	and	22,22,31
1683	andc	10,10,31
1684	and	23,23,31
1685	andc	11,11,31
1686	and	24,24,31
1687	andc	12,12,31
1688	and	25,25,31
1689	or	18,9,22
1690	or	19,10,23
1691	or	20,11,24
1692	or	21,12,25
1693	stw	18,4*1(6)
1694	stw	19,4*2(6)
1695	stw	20,4*3(6)
1696	stw	21,4*4(6)
1697
1698	b	.Lsqr8x_done
1699
1700.align	5
1701.Lsqr8x8_post_condition:
1702	lwz	3,4*6(1)
1703	lwz	4,0(1)
1704	addze	31,0
1705
1706
1707	subfc	22,9,22
1708	subfe	23,10,23
1709	stw	0,4*12(1)
1710	stw	0,4*13(1)
1711	subfe	24,11,24
1712	stw	0,4*14(1)
1713	stw	0,4*15(1)
1714	subfe	25,12,25
1715	stw	0,4*16(1)
1716	stw	0,4*17(1)
1717	subfe	26,14,26
1718	stw	0,4*18(1)
1719	stw	0,4*19(1)
1720	subfe	27,15,27
1721	stw	0,4*20(1)
1722	stw	0,4*21(1)
1723	subfe	28,16,28
1724	stw	0,4*22(1)
1725	stw	0,4*23(1)
1726	subfe	29,17,29
1727	stw	0,4*24(1)
1728	stw	0,4*25(1)
1729	subfe	31,0,31
1730	stw	0,4*26(1)
1731	stw	0,4*27(1)
1732
1733	and	9,9,31
1734	and	10,10,31
1735	addc	22,22,9
1736	and	11,11,31
1737	adde	23,23,10
1738	and	12,12,31
1739	adde	24,24,11
1740	and	14,14,31
1741	adde	25,25,12
1742	and	15,15,31
1743	adde	26,26,14
1744	and	16,16,31
1745	adde	27,27,15
1746	and	17,17,31
1747	adde	28,28,16
1748	adde	29,29,17
1749	stw	22,4*1(3)
1750	stw	23,4*2(3)
1751	stw	24,4*3(3)
1752	stw	25,4*4(3)
1753	stw	26,4*5(3)
1754	stw	27,4*6(3)
1755	stw	28,4*7(3)
1756	stw	29,4*8(3)
1757
1758.Lsqr8x_done:
1759	stw	0,4*8(1)
1760	stw	0,4*10(1)
1761
1762	lwz	14,-4*18(4)
1763	li	3,1
1764	lwz	15,-4*17(4)
1765	lwz	16,-4*16(4)
1766	lwz	17,-4*15(4)
1767	lwz	18,-4*14(4)
1768	lwz	19,-4*13(4)
1769	lwz	20,-4*12(4)
1770	lwz	21,-4*11(4)
1771	lwz	22,-4*10(4)
1772	lwz	23,-4*9(4)
1773	lwz	24,-4*8(4)
1774	lwz	25,-4*7(4)
1775	lwz	26,-4*6(4)
1776	lwz	27,-4*5(4)
1777	lwz	28,-4*4(4)
1778	lwz	29,-4*3(4)
1779	lwz	30,-4*2(4)
1780	lwz	31,-4*1(4)
1781	mr	1,4
1782	blr
1783.long	0
1784.byte	0,12,4,0x20,0x80,18,6,0
1785.long	0
1786.size	__bn_sqr8x_mont,.-__bn_sqr8x_mont
1787.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1788.align	2
1789