xref: /titanic_51/usr/src/common/bignum/sun4u/mont_mulf_kernel_v9.s (revision 8de5c4f463386063e184a851437d58080c6c626c)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * This file is mostly a result of compiling the mont_mulf.c file to generate an
28 * assembly output and then hand-editing that output to replace the
29 * compiler-generated loop for the 512-bit case (nlen == 16) in the
30 * mont_mulf_noconv routine with a hand-crafted version. This file also
31 * has big_savefp() and big_restorefp() routines added by hand.
32 */
33
34#include <sys/asm_linkage.h>
35#include <sys/trap.h>
36#include <sys/stack.h>
37#include <sys/privregs.h>
38#include <sys/regset.h>
39#include <sys/vis.h>
40#include <sys/machthread.h>
41#include <sys/machtrap.h>
42#include <sys/machsig.h>
43
44#if defined(lint) || defined(__lint)
45#include <sys/types.h>
46
47/* ARGSUSED */
48uint64_t
49double2uint64_t(double* d)
50{
51	return (0ULL);
52}
53
54/* ARGSUSED */
55void
56conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen)
57{
58}
59
60/* ARGSUSED */
61void
62conv_i32_to_d32(double *d32, uint32_t *i32, int len)
63{
64}
65
66/* ARGSUSED */
67void
68conv_i32_to_d16(double *d16, uint32_t *i32, int len)
69{
70}
71
72/* ARGSUSED */
73void
74mont_mulf_noconv(uint32_t *result, double *dm1, double *dm2, double *dt,
75    double *dn, uint32_t *nint, int nlen, double dn0)
76{
77}
78
79#else	/* lint || __lint */
80
81	.section	".text",#alloc,#execinstr
82	.file	"mont_mulf.c"
83
84	.section	".bss",#alloc,#write
85Bbss.bss:
86
87	.section	".data",#alloc,#write
88Ddata.data:
89
90	.section	".rodata",#alloc
91!
92! CONSTANT POOL
93!
94Drodata.rodata:
95	.global	TwoTo16
96	.align	8
97!
98! CONSTANT POOL
99!
100	.global TwoTo16
101TwoTo16:
102	.word	1089470464
103	.word	0
104	.type	TwoTo16,#object
105	.size	TwoTo16,8
106	.global	TwoToMinus16
107!
108! CONSTANT POOL
109!
110	.global TwoToMinus16
111TwoToMinus16:
112	.word	1055916032
113	.word	0
114	.type	TwoToMinus16,#object
115	.size	TwoToMinus16,8
116	.global	Zero
117!
118! CONSTANT POOL
119!
120	.global Zero
121Zero:
122	.word	0
123	.word	0
124	.type	Zero,#object
125	.size	Zero,8
126	.global	TwoTo32
127!
128! CONSTANT POOL
129!
130	.global TwoTo32
131TwoTo32:
132	.word	1106247680
133	.word	0
134	.type	TwoTo32,#object
135	.size	TwoTo32,8
136	.global	TwoToMinus32
137!
138! CONSTANT POOL
139!
140	.global TwoToMinus32
141TwoToMinus32:
142	.word	1039138816
143	.word	0
144	.type	TwoToMinus32,#object
145	.size	TwoToMinus32,8
146
147	.section	".text",#alloc,#execinstr
148/* 000000	   0 */		.register	%g3,#scratch
149/* 000000	     */		.register	%g2,#scratch
150/* 000000	   0 */		.align	32
151! FILE mont_mulf.c
152
153!    1		      !/*
154!    2		      ! * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
155!    3		      ! * Use is subject to license terms.
156!    4		      ! */
157!    6		      !#pragma ident	"@(#)mont_mulf.c	1.2	01/09/24 SMI"
158!    9		      !/*
159!   10		      ! * If compiled without -DRF_INLINE_MACROS then needs -lm at link time
160!   11		      ! * If compiled with -DRF_INLINE_MACROS then needs conv.il at compile time
161!   12		      ! * (i.e. cc <compileer_flags> -DRF_INLINE_MACROS conv.il mont_mulf.c )
162!   13		      ! */
163!   15		      !#include <sys/types.h>
164!   16		      !#include <math.h>
165!   18		      !static const double TwoTo16 = 65536.0;
166!   19		      !static const double TwoToMinus16 = 1.0/65536.0;
167!   20		      !static const double Zero = 0.0;
168!   21		      !static const double TwoTo32 = 65536.0 * 65536.0;
169!   22		      !static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0);
170!   24		      !#ifdef RF_INLINE_MACROS
171!   26		      !double upper32(double);
172!   27		      !double lower32(double, double);
173!   28		      !double mod(double, double, double);
174!   30		      !#else
175!   32		      !static double
176!   33		      !upper32(double x)
177!   34		      !{
178!   35		      !	return (floor(x * TwoToMinus32));
179!   36		      !}
180!   39		      !/* ARGSUSED */
181!   40		      !static double
182!   41		      !lower32(double x, double y)
183!   42		      !{
184!   43		      !	return (x - TwoTo32 * floor(x * TwoToMinus32));
185!   44		      !}
186!   46		      !static double
187!   47		      !mod(double x, double oneoverm, double m)
188!   48		      !{
189!   49		      !	return (x - m * floor(x * oneoverm));
190!   50		      !}
191!   52		      !#endif
192!   55		      !static void
193!   56		      !cleanup(double *dt, int from, int tlen)
194!   57		      !{
195
196!
197! SUBROUTINE cleanup
198!
199! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
200
201                       cleanup:
202/* 000000	  57 */		sra	%o1,0,%o4
203/* 0x0004	     */		sra	%o2,0,%o5
204
205!   58		      !	int i;
206!   59		      !	double tmp, tmp1, x, x1;
207!   61		      !	tmp = tmp1 = Zero;
208
209/* 0x0008	  61 */		sll	%o5,1,%g5
210
211!   63		      !	for (i = 2 * from; i < 2 * tlen; i += 2) {
212
213/* 0x000c	  63 */		sll	%o4,1,%g3
214/* 0x0010	     */		cmp	%g3,%g5
215/* 0x0014	     */		bge,pn	%icc,.L77000188
216/* 0x0018	   0 */		sethi	%hi(Zero),%o3
217                       .L77000197:
218/* 0x001c	  63 */		ldd	[%o3+%lo(Zero)],%f8
219/* 0x0020	     */		sra	%g3,0,%o1
220/* 0x0024	     */		sub	%g5,1,%g2
221/* 0x0028	     */		sllx	%o1,3,%g4
222
223!   64		      !		x = dt[i];
224
225/* 0x002c	  64 */		ldd	[%g4+%o0],%f10
226/* 0x0030	  63 */		add	%g4,%o0,%g1
227/* 0x0034	     */		fmovd	%f8,%f18
228/* 0x0038	     */		fmovd	%f8,%f16
229
230!   65		      !		x1 = dt[i + 1];
231!   66		      !		dt[i] = lower32(x, Zero) + tmp;
232
233                       .L900000110:
234/* 0x003c	  66 */		fdtox	%f10,%f0
235/* 0x0040	  65 */		ldd	[%g1+8],%f12
236
237!   67		      !		dt[i + 1] = lower32(x1, Zero) + tmp1;
238!   68		      !		tmp = upper32(x);
239!   69		      !		tmp1 = upper32(x1);
240
241/* 0x0044	  69 */		add	%g3,2,%g3
242/* 0x0048	     */		cmp	%g3,%g2
243/* 0x004c	  67 */		fdtox	%f12,%f2
244/* 0x0050	  68 */		fmovd	%f0,%f4
245/* 0x0054	  66 */		fmovs	%f8,%f0
246/* 0x0058	  67 */		fmovs	%f8,%f2
247/* 0x005c	  66 */		fxtod	%f0,%f0
248/* 0x0060	  67 */		fxtod	%f2,%f2
249/* 0x0064	  69 */		fdtox	%f12,%f6
250/* 0x0068	  66 */		faddd	%f0,%f18,%f10
251/* 0x006c	     */		std	%f10,[%g1]
252/* 0x0070	  67 */		faddd	%f2,%f16,%f14
253/* 0x0074	     */		std	%f14,[%g1+8]
254/* 0x0078	  68 */		fitod	%f4,%f18
255/* 0x007c	  69 */		add	%g1,16,%g1
256/* 0x0080	     */		fitod	%f6,%f16
257/* 0x0084	     */		ble,a,pt	%icc,.L900000110
258/* 0x0088	  64 */		ldd	[%g1],%f10
259                       .L77000188:
260/* 0x008c	  69 */		retl	! Result =
261/* 0x0090	     */		nop
262/* 0x0094	   0 */		.type	cleanup,2
263/* 0x0094	   0 */		.size	cleanup,(.-cleanup)
264
265	.section	".text",#alloc,#execinstr
266/* 000000	   0 */		.align	8
267/* 000000	     */		.skip	24
268/* 0x0018	     */		.align	32
269
270!   70		      !	}
271!   71		      !}
272!   75		      !#ifdef _KERNEL
273!   76		      !/*
274!   77		      ! * This only works if  0 <= d < 2^53
275!   78		      ! */
276!   79		      !uint64_t
277!   80		      !double2uint64_t(double* d)
278!   81		      !{
279!   82		      !	uint64_t x;
280!   83		      !	uint64_t exp;
281!   84		      !	uint64_t man;
282!   86		      !	x = *((uint64_t *)d);
283
284!
285! SUBROUTINE double2uint64_t
286!
287! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
288
289                       	.global double2uint64_t
290                       double2uint64_t:
291/* 000000	  86 */		ldx	[%o0],%o2
292
293!   87		      !	if (x == 0) {
294
295/* 0x0004	  87 */		cmp	%o2,0
296/* 0x0008	     */		bne,pn	%xcc,.L900000206
297/* 0x000c	  94 */		sethi	%hi(0xfff00000),%o5
298                       .L77000202:
299/* 0x0010	  94 */		retl	! Result =  %o0
300
301!   88		      !		return (0ULL);
302
303/* 0x0014	  88 */		or	%g0,0,%o0
304
305!   89		      !	}
306!   90		      !	exp = (x >> 52) - 1023;
307!   91		      !	man = (x & 0xfffffffffffffULL) | 0x10000000000000ULL;
308!   92		      !	x = man >> (52 - exp);
309!   94		      !	return (x);
310
311                       .L900000206:
312/* 0x0018	  94 */		sllx	%o5,32,%o4
313/* 0x001c	     */		srlx	%o2,52,%o0
314/* 0x0020	     */		sethi	%hi(0x40000000),%o1
315/* 0x0024	     */		or	%g0,1023,%g5
316/* 0x0028	     */		sllx	%o1,22,%g4
317/* 0x002c	     */		xor	%o4,-1,%o3
318/* 0x0030	     */		sub	%g5,%o0,%g3
319/* 0x0034	     */		and	%o2,%o3,%g2
320/* 0x0038	     */		or	%g2,%g4,%o5
321/* 0x003c	     */		add	%g3,52,%g1
322/* 0x0040	     */		retl	! Result =  %o0
323/* 0x0044	     */		srlx	%o5,%g1,%o0
324/* 0x0048	   0 */		.type	double2uint64_t,2
325/* 0x0048	   0 */		.size	double2uint64_t,(.-double2uint64_t)
326
327	.section	".text",#alloc,#execinstr
328/* 000000	   0 */		.align	8
329/* 000000	     */		.skip	24
330/* 0x0018	     */		.align	32
331
332!   95		      !}
333!   96		      !#else
334!   97		      !/*
335!   98		      ! * This only works if  0 <= d < 2^63
336!   99		      ! */
337!  100		      !uint64_t
338!  101		      !double2uint64_t(double* d)
339!  102		      !{
340!  103		      !	return ((int64_t)(*d));
341!  104		      !}
342!  105		      !#endif
343!  107		      !/* ARGSUSED */
344!  108		      !void
345!  109		      !conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen)
346!  110		      !{
347
348!
349! SUBROUTINE conv_d16_to_i32
350!
351! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
352
353                       	.global conv_d16_to_i32
354                       conv_d16_to_i32:
355/* 000000	 110 */		save	%sp,-176,%sp
356
357!  111		      !	int i;
358!  112		      !	int64_t t, t1,		/* using int64_t and not uint64_t */
359!  113		      !		a, b, c, d;	/* because more efficient code is */
360!  114		      !				/* generated this way, and there  */
361!  115		      !				/* is no overflow  */
362!  116		      !	t1 = 0;
363!  117		      !	a = double2uint64_t(&(d16[0]));
364
365/* 0x0004	 117 */		ldx	[%i1],%o0
366/* 0x0008	 118 */		ldx	[%i1+8],%i2
367/* 0x000c	 117 */		cmp	%o0,0
368/* 0x0010	     */		bne,pn	%xcc,.L77000216
369/* 0x0014	     */		or	%g0,0,%i4
370                       .L77000215:
371/* 0x0018	 117 */		ba	.L900000316
372/* 0x001c	 118 */		cmp	%i2,0
373                       .L77000216:
374/* 0x0020	 117 */		srlx	%o0,52,%o5
375/* 0x0024	     */		sethi	%hi(0xfff00000),%i4
376/* 0x0028	     */		sllx	%i4,32,%o2
377/* 0x002c	     */		sethi	%hi(0x40000000),%o7
378/* 0x0030	     */		sllx	%o7,22,%o3
379/* 0x0034	     */		or	%g0,1023,%o4
380/* 0x0038	     */		xor	%o2,-1,%g5
381/* 0x003c	     */		sub	%o4,%o5,%l0
382/* 0x0040	     */		and	%o0,%g5,%o1
383/* 0x0044	     */		add	%l0,52,%l1
384/* 0x0048	     */		or	%o1,%o3,%g4
385
386!  118		      !	b = double2uint64_t(&(d16[1]));
387
388/* 0x004c	 118 */		cmp	%i2,0
389/* 0x0050	 117 */		srlx	%g4,%l1,%i4
390                       .L900000316:
391/* 0x0054	 118 */		bne,pn	%xcc,.L77000222
392/* 0x0058	 134 */		sub	%i3,1,%l3
393                       .L77000221:
394/* 0x005c	 118 */		or	%g0,0,%i2
395/* 0x0060	     */		ba	.L900000315
396/* 0x0064	 116 */		or	%g0,0,%o3
397                       .L77000222:
398/* 0x0068	 118 */		srlx	%i2,52,%l6
399/* 0x006c	     */		sethi	%hi(0xfff00000),%g4
400/* 0x0070	     */		sllx	%g4,32,%i5
401/* 0x0074	     */		sethi	%hi(0x40000000),%l5
402/* 0x0078	     */		xor	%i5,-1,%l4
403/* 0x007c	     */		or	%g0,1023,%l2
404/* 0x0080	     */		and	%i2,%l4,%l7
405/* 0x0084	     */		sllx	%l5,22,%i2
406/* 0x0088	     */		sub	%l2,%l6,%g1
407/* 0x008c	     */		or	%l7,%i2,%g3
408/* 0x0090	     */		add	%g1,52,%g2
409/* 0x0094	 116 */		or	%g0,0,%o3
410/* 0x0098	 118 */		srlx	%g3,%g2,%i2
411
412!  119		      !	for (i = 0; i < ilen - 1; i++) {
413
414                       .L900000315:
415/* 0x009c	 119 */		cmp	%l3,0
416/* 0x00a0	     */		ble,pn	%icc,.L77000210
417/* 0x00a4	     */		or	%g0,0,%l4
418                       .L77000245:
419/* 0x00a8	 118 */		sethi	%hi(0xfff00000),%l7
420/* 0x00ac	     */		or	%g0,-1,%l6
421/* 0x00b0	     */		sllx	%l7,32,%l3
422/* 0x00b4	     */		srl	%l6,0,%l6
423/* 0x00b8	     */		sethi	%hi(0x40000000),%l1
424/* 0x00bc	     */		sethi	%hi(0xfc00),%l2
425/* 0x00c0	     */		xor	%l3,-1,%l7
426/* 0x00c4	     */		sllx	%l1,22,%l3
427/* 0x00c8	     */		sub	%i3,2,%l5
428/* 0x00cc	     */		add	%l2,1023,%l2
429/* 0x00d0	     */		or	%g0,2,%g2
430/* 0x00d4	     */		or	%g0,%i0,%g1
431
432!  120		      !		c = double2uint64_t(&(d16[2 * i + 2]));
433
434                       .L77000208:
435/* 0x00d8	 120 */		sra	%g2,0,%g3
436/* 0x00dc	 123 */		add	%g2,1,%o2
437/* 0x00e0	 120 */		sllx	%g3,3,%i3
438
439!  121		      !		t1 += a & 0xffffffff;
440!  122		      !		t = (a >> 32);
441!  123		      !		d = double2uint64_t(&(d16[2 * i + 3]));
442
443/* 0x00e4	 123 */		sra	%o2,0,%g5
444/* 0x00e8	 120 */		ldx	[%i1+%i3],%o5
445/* 0x00ec	 123 */		sllx	%g5,3,%o0
446/* 0x00f0	 121 */		and	%i4,%l6,%g4
447/* 0x00f4	 123 */		ldx	[%i1+%o0],%i3
448/* 0x00f8	 120 */		cmp	%o5,0
449/* 0x00fc	     */		bne,pn	%xcc,.L77000228
450/* 0x0100	 124 */		and	%i2,%l2,%i5
451                       .L77000227:
452/* 0x0104	 120 */		or	%g0,0,%l1
453/* 0x0108	     */		ba	.L900000314
454/* 0x010c	 121 */		add	%o3,%g4,%o0
455                       .L77000228:
456/* 0x0110	 120 */		srlx	%o5,52,%o7
457/* 0x0114	     */		and	%o5,%l7,%o5
458/* 0x0118	     */		or	%g0,52,%l0
459/* 0x011c	     */		sub	%o7,1023,%o4
460/* 0x0120	     */		or	%o5,%l3,%l1
461/* 0x0124	     */		sub	%l0,%o4,%o1
462/* 0x0128	     */		srlx	%l1,%o1,%l1
463/* 0x012c	 121 */		add	%o3,%g4,%o0
464                       .L900000314:
465/* 0x0130	 122 */		srax	%i4,32,%g3
466/* 0x0134	 123 */		cmp	%i3,0
467/* 0x0138	     */		bne,pn	%xcc,.L77000234
468/* 0x013c	 124 */		sllx	%i5,16,%g5
469                       .L77000233:
470/* 0x0140	 123 */		or	%g0,0,%o2
471/* 0x0144	     */		ba	.L900000313
472/* 0x0148	 124 */		add	%o0,%g5,%o7
473                       .L77000234:
474/* 0x014c	 123 */		srlx	%i3,52,%o2
475/* 0x0150	     */		and	%i3,%l7,%i4
476/* 0x0154	     */		sub	%o2,1023,%o1
477/* 0x0158	     */		or	%g0,52,%g4
478/* 0x015c	     */		sub	%g4,%o1,%i5
479/* 0x0160	     */		or	%i4,%l3,%i3
480/* 0x0164	     */		srlx	%i3,%i5,%o2
481
482!  124		      !		t1 += (b & 0xffff) << 16;
483
484/* 0x0168	 124 */		add	%o0,%g5,%o7
485
486!  125		      !		t += (b >> 16) + (t1 >> 32);
487
488                       .L900000313:
489/* 0x016c	 125 */		srax	%i2,16,%l0
490/* 0x0170	     */		srax	%o7,32,%o4
491/* 0x0174	     */		add	%l0,%o4,%o3
492
493!  126		      !		i32[i] = t1 & 0xffffffff;
494!  127		      !		t1 = t;
495!  128		      !		a = c;
496!  129		      !		b = d;
497
498/* 0x0178	 129 */		add	%l4,1,%l4
499/* 0x017c	 126 */		and	%o7,%l6,%o5
500/* 0x0180	 125 */		add	%g3,%o3,%o3
501/* 0x0184	 126 */		st	%o5,[%g1]
502/* 0x0188	 128 */		or	%g0,%l1,%i4
503/* 0x018c	 129 */		or	%g0,%o2,%i2
504/* 0x0190	     */		add	%g2,2,%g2
505/* 0x0194	     */		cmp	%l4,%l5
506/* 0x0198	     */		ble,pt	%icc,.L77000208
507/* 0x019c	     */		add	%g1,4,%g1
508
509!  130		      !	}
510!  131		      !	t1 += a & 0xffffffff;
511!  132		      !	t = (a >> 32);
512!  133		      !	t1 += (b & 0xffff) << 16;
513!  134		      !	i32[i] = t1 & 0xffffffff;
514
515                       .L77000210:
516/* 0x01a0	 134 */		sra	%l4,0,%l4
517/* 0x01a4	     */		sethi	%hi(0xfc00),%i1
518/* 0x01a8	     */		add	%o3,%i4,%l2
519/* 0x01ac	     */		add	%i1,1023,%i5
520/* 0x01b0	     */		and	%i2,%i5,%l5
521/* 0x01b4	     */		sllx	%l4,2,%i2
522/* 0x01b8	     */		sllx	%l5,16,%l6
523/* 0x01bc	     */		add	%l2,%l6,%l7
524/* 0x01c0	     */		st	%l7,[%i0+%i2]
525/* 0x01c4	 129 */		ret	! Result =
526/* 0x01c8	     */		restore	%g0,%g0,%g0
527/* 0x01cc	   0 */		.type	conv_d16_to_i32,2
528/* 0x01cc	   0 */		.size	conv_d16_to_i32,(.-conv_d16_to_i32)
529
530	.section	".text",#alloc,#execinstr
531/* 000000	   0 */		.align	8
532!
533! CONSTANT POOL
534!
535                       ___const_seg_900000401:
536/* 000000	   0 */		.word	1127219200,0
537/* 0x0008	     */		.word	1127219200
538/* 0x000c	   0 */		.type	___const_seg_900000401,1
539/* 0x000c	   0 */		.size	___const_seg_900000401,(.-___const_seg_900000401)
540/* 0x000c	   0 */		.align	8
541/* 0x0010	     */		.skip	24
542/* 0x0028	     */		.align	32
543
544!  135		      !}
545!  138		      !void
546!  139		      !conv_i32_to_d32(double *d32, uint32_t *i32, int len)
547!  140		      !{
548
549!
550! SUBROUTINE conv_i32_to_d32
551!
552! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
553
554                       	.global conv_i32_to_d32
555                       conv_i32_to_d32:
556/* 000000	 140 */		orcc	%g0,%o2,%o2
557
558!  141		      !	int i;
559!  143		      !#pragma pipeloop(0)
560!  144		      !	for (i = 0; i < len; i++)
561
562/* 0x0004	 144 */		ble,pn	%icc,.L77000254
563/* 0x0008	     */		sub	%o2,1,%o3
564                       .L77000263:
565/* 0x000c	 140 */		or	%g0,%o0,%o2
566
567!  145		      !		d32[i] = (double)(i32[i]);
568
569/* 0x0010	 145 */		add	%o3,1,%o5
570/* 0x0014	 144 */		or	%g0,0,%g5
571/* 0x0018	 145 */		cmp	%o5,10
572/* 0x001c	     */		bl,pn	%icc,.L77000261
573/* 0x0020	     */		sethi	%hi(___const_seg_900000401),%g4
574                       .L900000407:
575/* 0x0024	 145 */		prefetch	[%o1],0
576/* 0x0028	     */		prefetch	[%o0],22
577/* 0x002c	     */		sethi	%hi(___const_seg_900000401+8),%o4
578/* 0x0030	     */		or	%g0,%o0,%o2
579/* 0x0034	     */		prefetch	[%o1+64],0
580/* 0x0038	     */		add	%o1,8,%o0
581/* 0x003c	     */		sub	%o3,7,%o5
582/* 0x0040	     */		prefetch	[%o2+64],22
583/* 0x0044	     */		or	%g0,2,%g5
584/* 0x0048	     */		prefetch	[%o2+128],22
585/* 0x004c	     */		prefetch	[%o2+192],22
586/* 0x0050	     */		prefetch	[%o1+128],0
587/* 0x0054	     */		ld	[%o4+%lo(___const_seg_900000401+8)],%f2
588/* 0x0058	     */		ldd	[%g4+%lo(___const_seg_900000401)],%f16
589/* 0x005c	     */		fmovs	%f2,%f0
590/* 0x0060	     */		prefetch	[%o2+256],22
591/* 0x0064	     */		prefetch	[%o2+320],22
592/* 0x0068	     */		ld	[%o1],%f3
593/* 0x006c	     */		prefetch	[%o1+192],0
594/* 0x0070	     */		ld	[%o1+4],%f1
595                       .L900000405:
596/* 0x0074	 145 */		prefetch	[%o0+188],0
597/* 0x0078	     */		fsubd	%f2,%f16,%f22
598/* 0x007c	     */		add	%g5,8,%g5
599/* 0x0080	     */		add	%o0,32,%o0
600/* 0x0084	     */		ld	[%o4+%lo(___const_seg_900000401+8)],%f4
601/* 0x0088	     */		std	%f22,[%o2]
602/* 0x008c	     */		cmp	%g5,%o5
603/* 0x0090	     */		ld	[%o0-32],%f5
604/* 0x0094	     */		fsubd	%f0,%f16,%f24
605/* 0x0098	     */		add	%o2,64,%o2
606/* 0x009c	     */		fmovs	%f4,%f0
607/* 0x00a0	     */		std	%f24,[%o2-56]
608/* 0x00a4	     */		ld	[%o0-28],%f1
609/* 0x00a8	     */		fsubd	%f4,%f16,%f26
610/* 0x00ac	     */		fmovs	%f0,%f6
611/* 0x00b0	     */		prefetch	[%o2+312],22
612/* 0x00b4	     */		std	%f26,[%o2-48]
613/* 0x00b8	     */		ld	[%o0-24],%f7
614/* 0x00bc	     */		fsubd	%f0,%f16,%f28
615/* 0x00c0	     */		fmovs	%f6,%f8
616/* 0x00c4	     */		std	%f28,[%o2-40]
617/* 0x00c8	     */		ld	[%o0-20],%f9
618/* 0x00cc	     */		fsubd	%f6,%f16,%f30
619/* 0x00d0	     */		fmovs	%f8,%f10
620/* 0x00d4	     */		std	%f30,[%o2-32]
621/* 0x00d8	     */		ld	[%o0-16],%f11
622/* 0x00dc	     */		prefetch	[%o2+344],22
623/* 0x00e0	     */		fsubd	%f8,%f16,%f48
624/* 0x00e4	     */		fmovs	%f10,%f12
625/* 0x00e8	     */		std	%f48,[%o2-24]
626/* 0x00ec	     */		ld	[%o0-12],%f13
627/* 0x00f0	     */		fsubd	%f10,%f16,%f50
628/* 0x00f4	     */		fmovs	%f12,%f2
629/* 0x00f8	     */		std	%f50,[%o2-16]
630/* 0x00fc	     */		ld	[%o0-8],%f3
631/* 0x0100	     */		fsubd	%f12,%f16,%f52
632/* 0x0104	     */		fmovs	%f2,%f0
633/* 0x0108	     */		std	%f52,[%o2-8]
634/* 0x010c	     */		ble,pt	%icc,.L900000405
635/* 0x0110	     */		ld	[%o0-4],%f1
636                       .L900000408:
637/* 0x0114	 145 */		fsubd	%f2,%f16,%f18
638/* 0x0118	     */		add	%o2,16,%o2
639/* 0x011c	     */		cmp	%g5,%o3
640/* 0x0120	     */		std	%f18,[%o2-16]
641/* 0x0124	     */		fsubd	%f0,%f16,%f20
642/* 0x0128	     */		or	%g0,%o0,%o1
643/* 0x012c	     */		bg,pn	%icc,.L77000254
644/* 0x0130	     */		std	%f20,[%o2-8]
645                       .L77000261:
646/* 0x0134	 145 */		ld	[%o1],%f15
647                       .L900000409:
648/* 0x0138	 145 */		sethi	%hi(___const_seg_900000401+8),%o4
649/* 0x013c	     */		ldd	[%g4+%lo(___const_seg_900000401)],%f16
650/* 0x0140	     */		add	%g5,1,%g5
651/* 0x0144	     */		ld	[%o4+%lo(___const_seg_900000401+8)],%f14
652/* 0x0148	     */		add	%o1,4,%o1
653/* 0x014c	     */		cmp	%g5,%o3
654/* 0x0150	     */		fsubd	%f14,%f16,%f54
655/* 0x0154	     */		std	%f54,[%o2]
656/* 0x0158	     */		add	%o2,8,%o2
657/* 0x015c	     */		ble,a,pt	%icc,.L900000409
658/* 0x0160	     */		ld	[%o1],%f15
659                       .L77000254:
660/* 0x0164	 145 */		retl	! Result =
661/* 0x0168	     */		nop
662/* 0x016c	   0 */		.type	conv_i32_to_d32,2
663/* 0x016c	   0 */		.size	conv_i32_to_d32,(.-conv_i32_to_d32)
664
665	.section	".text",#alloc,#execinstr
666/* 000000	   0 */		.align	8
667!
668! CONSTANT POOL
669!
670                       ___const_seg_900000501:
671/* 000000	   0 */		.word	1127219200,0
672/* 0x0008	     */		.word	1127219200
673/* 0x000c	   0 */		.type	___const_seg_900000501,1
674/* 0x000c	   0 */		.size	___const_seg_900000501,(.-___const_seg_900000501)
675/* 0x000c	   0 */		.align	8
676/* 0x0010	     */		.skip	24
677/* 0x0028	     */		.align	32
678
679!  146		      !}
680!  149		      !void
681!  150		      !conv_i32_to_d16(double *d16, uint32_t *i32, int len)
682!  151		      !{
683
684!
685! SUBROUTINE conv_i32_to_d16
686!
687! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
688
689                       	.global conv_i32_to_d16
690                       conv_i32_to_d16:
691/* 000000	 151 */		save	%sp,-368,%sp
692/* 0x0004	     */		orcc	%g0,%i2,%i2
693
694!  152		      !	int i;
695!  153		      !	uint32_t a;
696!  155		      !#pragma pipeloop(0)
697!  156		      !	for (i = 0; i < len; i++) {
698
699/* 0x0008	 156 */		ble,pn	%icc,.L77000272
700/* 0x000c	     */		sub	%i2,1,%l6
701                       .L77000281:
702/* 0x0010	 156 */		sethi	%hi(0xfc00),%i3
703
704!  157		      !		a = i32[i];
705
706/* 0x0014	 157 */		or	%g0,%i2,%l1
707/* 0x0018	 156 */		add	%i3,1023,%i4
708/* 0x001c	 157 */		cmp	%i2,4
709/* 0x0020	 151 */		or	%g0,%i1,%l7
710/* 0x0024	     */		or	%g0,%i0,%i2
711/* 0x0028	 156 */		or	%g0,0,%i5
712/* 0x002c	     */		or	%g0,0,%i3
713/* 0x0030	 157 */		bl,pn	%icc,.L77000279
714/* 0x0034	   0 */		sethi	%hi(___const_seg_900000501),%i1
715                       .L900000508:
716/* 0x0038	 157 */		prefetch	[%i0+8],22
717/* 0x003c	     */		prefetch	[%i0+72],22
718/* 0x0040	     */		or	%g0,%i0,%l2
719
720!  158		      !		d16[2 * i] = (double)(a & 0xffff);
721
722/* 0x0044	 158 */		sethi	%hi(___const_seg_900000501+8),%l1
723/* 0x0048	 157 */		prefetch	[%i0+136],22
724/* 0x004c	     */		sub	%l6,1,%i0
725/* 0x0050	     */		or	%g0,0,%i3
726/* 0x0054	     */		prefetch	[%i2+200],22
727/* 0x0058	     */		or	%g0,2,%i5
728/* 0x005c	     */		prefetch	[%i2+264],22
729/* 0x0060	     */		prefetch	[%i2+328],22
730/* 0x0064	     */		prefetch	[%i2+392],22
731/* 0x0068	     */		ld	[%l7],%l3
732/* 0x006c	     */		ld	[%l7+4],%l4
733/* 0x0070	 158 */		ldd	[%i1+%lo(___const_seg_900000501)],%f20
734
735!  159		      !		d16[2 * i + 1] = (double)(a >> 16);
736
737/* 0x0074	 159 */		srl	%l3,16,%o1
738/* 0x0078	 158 */		and	%l3,%i4,%o3
739/* 0x007c	     */		st	%o3,[%sp+2335]
740/* 0x0080	 159 */		srl	%l4,16,%g4
741/* 0x0084	 158 */		and	%l4,%i4,%o0
742/* 0x0088	     */		st	%o0,[%sp+2303]
743/* 0x008c	 159 */		add	%l7,8,%l7
744/* 0x0090	     */		st	%o1,[%sp+2271]
745/* 0x0094	     */		st	%g4,[%sp+2239]
746/* 0x0098	 157 */		prefetch	[%i2+456],22
747/* 0x009c	     */		prefetch	[%i2+520],22
748                       .L900000506:
749/* 0x00a0	 157 */		prefetch	[%l2+536],22
750/* 0x00a4	 159 */		add	%i5,2,%i5
751/* 0x00a8	 157 */		add	%l2,32,%l2
752/* 0x00ac	     */		ld	[%l7],%g2
753/* 0x00b0	 159 */		cmp	%i5,%i0
754/* 0x00b4	     */		add	%l7,8,%l7
755/* 0x00b8	 158 */		ld	[%sp+2335],%f9
756/* 0x00bc	 159 */		add	%i3,4,%i3
757/* 0x00c0	 158 */		ld	[%l1+%lo(___const_seg_900000501+8)],%f8
758/* 0x00c4	 159 */		ld	[%sp+2271],%f11
759/* 0x00c8	 158 */		and	%g2,%i4,%g3
760/* 0x00cc	 159 */		fmovs	%f8,%f10
761/* 0x00d0	 158 */		st	%g3,[%sp+2335]
762/* 0x00d4	     */		fsubd	%f8,%f20,%f28
763/* 0x00d8	     */		std	%f28,[%l2-32]
764/* 0x00dc	 159 */		srl	%g2,16,%g1
765/* 0x00e0	     */		st	%g1,[%sp+2271]
766/* 0x00e4	     */		fsubd	%f10,%f20,%f30
767/* 0x00e8	     */		std	%f30,[%l2-24]
768/* 0x00ec	 157 */		ld	[%l7-4],%l0
769/* 0x00f0	 158 */		ld	[%sp+2303],%f13
770/* 0x00f4	     */		ld	[%l1+%lo(___const_seg_900000501+8)],%f12
771/* 0x00f8	 159 */		ld	[%sp+2239],%f15
772/* 0x00fc	 158 */		and	%l0,%i4,%l5
773/* 0x0100	 159 */		fmovs	%f12,%f14
774/* 0x0104	 158 */		st	%l5,[%sp+2303]
775/* 0x0108	     */		fsubd	%f12,%f20,%f44
776/* 0x010c	     */		std	%f44,[%l2-16]
777/* 0x0110	 159 */		srl	%l0,16,%o5
778/* 0x0114	     */		st	%o5,[%sp+2239]
779/* 0x0118	     */		fsubd	%f14,%f20,%f46
780/* 0x011c	     */		ble,pt	%icc,.L900000506
781/* 0x0120	     */		std	%f46,[%l2-8]
782                       .L900000509:
783/* 0x0124	 158 */		ld	[%l1+%lo(___const_seg_900000501+8)],%f0
784/* 0x0128	 159 */		cmp	%i5,%l6
785/* 0x012c	     */		add	%i3,4,%i3
786/* 0x0130	 158 */		ld	[%sp+2335],%f1
787/* 0x0134	     */		ld	[%sp+2303],%f5
788/* 0x0138	 159 */		fmovs	%f0,%f2
789/* 0x013c	     */		ld	[%sp+2271],%f3
790/* 0x0140	 158 */		fmovs	%f0,%f4
791/* 0x0144	 159 */		ld	[%sp+2239],%f7
792/* 0x0148	     */		fmovs	%f0,%f6
793/* 0x014c	 158 */		fsubd	%f0,%f20,%f22
794/* 0x0150	     */		std	%f22,[%l2]
795/* 0x0154	 159 */		fsubd	%f2,%f20,%f24
796/* 0x0158	     */		std	%f24,[%l2+8]
797/* 0x015c	 158 */		fsubd	%f4,%f20,%f26
798/* 0x0160	     */		std	%f26,[%l2+16]
799/* 0x0164	 159 */		fsubd	%f6,%f20,%f20
800/* 0x0168	     */		bg,pn	%icc,.L77000272
801/* 0x016c	     */		std	%f20,[%l2+24]
802                       .L77000279:
803/* 0x0170	 157 */		ld	[%l7],%l2
804                       .L900000510:
805/* 0x0174	 158 */		and	%l2,%i4,%o4
806/* 0x0178	     */		st	%o4,[%sp+2399]
807/* 0x017c	 159 */		srl	%l2,16,%o2
808/* 0x0180	     */		st	%o2,[%sp+2367]
809/* 0x0184	 158 */		sethi	%hi(___const_seg_900000501+8),%l1
810/* 0x0188	     */		sra	%i3,0,%i0
811/* 0x018c	     */		ld	[%l1+%lo(___const_seg_900000501+8)],%f16
812/* 0x0190	     */		sllx	%i0,3,%o1
813/* 0x0194	 159 */		add	%i3,1,%o3
814/* 0x0198	 158 */		ldd	[%i1+%lo(___const_seg_900000501)],%f20
815/* 0x019c	 159 */		sra	%o3,0,%l3
816/* 0x01a0	     */		add	%i5,1,%i5
817/* 0x01a4	 158 */		ld	[%sp+2399],%f17
818/* 0x01a8	 159 */		sllx	%l3,3,%o0
819/* 0x01ac	     */		add	%l7,4,%l7
820/* 0x01b0	     */		fmovs	%f16,%f18
821/* 0x01b4	     */		cmp	%i5,%l6
822/* 0x01b8	     */		add	%i3,2,%i3
823/* 0x01bc	 158 */		fsubd	%f16,%f20,%f48
824/* 0x01c0	     */		std	%f48,[%i2+%o1]
825/* 0x01c4	 159 */		ld	[%sp+2367],%f19
826/* 0x01c8	     */		fsubd	%f18,%f20,%f50
827/* 0x01cc	     */		std	%f50,[%i2+%o0]
828/* 0x01d0	     */		ble,a,pt	%icc,.L900000510
829/* 0x01d4	 157 */		ld	[%l7],%l2
830                       .L77000272:
831/* 0x01d8	 159 */		ret	! Result =
832/* 0x01dc	     */		restore	%g0,%g0,%g0
833/* 0x01e0	   0 */		.type	conv_i32_to_d16,2
834/* 0x01e0	   0 */		.size	conv_i32_to_d16,(.-conv_i32_to_d16)
835
836	.section	".text",#alloc,#execinstr
837/* 000000	   0 */		.align	8
838!
839! CONSTANT POOL
840!
841                       ___const_seg_900000601:
842/* 000000	   0 */		.word	1127219200,0
843/* 0x0008	     */		.word	1127219200
844/* 0x000c	   0 */		.type	___const_seg_900000601,1
845/* 0x000c	   0 */		.size	___const_seg_900000601,(.-___const_seg_900000601)
846/* 0x000c	   0 */		.align	8
847/* 0x0010	     */		.skip	24
848/* 0x0028	     */		.align	32
849
850!  160		      !	}
851!  161		      !}
852!  163		      !#ifdef RF_INLINE_MACROS
853!  165		      !void
854!  166		      !i16_to_d16_and_d32x4(const double *,	/* 1/(2^16) */
855!  167		      !			const double *,	/* 2^16 */
856!  168		      !			const double *,	/* 0 */
857!  169		      !			double *,	/* result16 */
858!  170		      !			double *,	/* result32 */
859!  171		      !			float *);	/* source - should be unsigned int* */
860!  172		      !					/* converted to float* */
861!  174		      !#else
862!  177		      !/* ARGSUSED */
863!  178		      !static void
864!  179		      !i16_to_d16_and_d32x4(const double *dummy1,	/* 1/(2^16) */
865!  180		      !			const double *dummy2,	/* 2^16 */
866!  181		      !			const double *dummy3,	/* 0 */
867!  182		      !			double *result16,
868!  183		      !			double *result32,
869!  184		      !			float *src)	/* source - should be unsigned int* */
870!  185		      !					/* converted to float* */
871!  186		      !{
872!  187		      !	uint32_t *i32;
873!  188		      !	uint32_t a, b, c, d;
874!  190		      !	i32 = (uint32_t *)src;
875!  191		      !	a = i32[0];
876!  192		      !	b = i32[1];
877!  193		      !	c = i32[2];
878!  194		      !	d = i32[3];
879!  195		      !	result16[0] = (double)(a & 0xffff);
880!  196		      !	result16[1] = (double)(a >> 16);
881!  197		      !	result32[0] = (double)a;
882!  198		      !	result16[2] = (double)(b & 0xffff);
883!  199		      !	result16[3] = (double)(b >> 16);
884!  200		      !	result32[1] = (double)b;
885!  201		      !	result16[4] = (double)(c & 0xffff);
886!  202		      !	result16[5] = (double)(c >> 16);
887!  203		      !	result32[2] = (double)c;
888!  204		      !	result16[6] = (double)(d & 0xffff);
889!  205		      !	result16[7] = (double)(d >> 16);
890!  206		      !	result32[3] = (double)d;
891!  207		      !}
892!  209		      !#endif
893!  212		      !void
894!  213		      !conv_i32_to_d32_and_d16(double *d32, double *d16, uint32_t *i32, int len)
895!  214		      !{
896
897!
898! SUBROUTINE conv_i32_to_d32_and_d16
899!
900! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
901
902                       	.global conv_i32_to_d32_and_d16
903                       conv_i32_to_d32_and_d16:
904/* 000000	 214 */		save	%sp,-368,%sp
905
906!  215		      !	int i;
907!  216		      !	uint32_t a;
908!  218		      !#pragma pipeloop(0)
909!  219		      !	for (i = 0; i < len - 3; i += 4) {
910!  220		      !		i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
911!  221		      !					&(d16[2*i]), &(d32[i]),
912!  222		      !					(float *)(&(i32[i])));
913!  223		      !	}
914!  224		      !	for (; i < len; i++) {
915!  225		      !		a = i32[i];
916!  226		      !		d32[i] = (double)(i32[i]);
917!  227		      !		d16[2 * i] = (double)(a & 0xffff);
918!  228		      !		d16[2 * i + 1] = (double)(a >> 16);
919
920/* 0x0004	 228 */		sub	%i3,3,%i4
921/* 0x0008	 219 */		cmp	%i4,0
922/* 0x000c	     */		ble,pn	%icc,.L77000289
923/* 0x0010	     */		or	%g0,0,%i5
924                       .L77000306:
925/* 0x0014	 222 */		sethi	%hi(Zero),%g3
926/* 0x0018	     */		sethi	%hi(TwoToMinus16),%g2
927/* 0x001c	     */		sethi	%hi(TwoTo16),%o5
928/* 0x0020	     */		ldd	[%g3+%lo(Zero)],%f2
929/* 0x0024	 219 */		sub	%i3,4,%o4
930/* 0x0028	     */		or	%g0,0,%o3
931/* 0x002c	     */		or	%g0,%i0,%l6
932/* 0x0030	     */		or	%g0,%i2,%l5
933                       .L900000615:
934/* 0x0034	 222 */		fmovd	%f2,%f26
935/* 0x0038	     */		ld	[%l5],%f27
936/* 0x003c	     */		sra	%o3,0,%o0
937/* 0x0040	     */		add	%i5,4,%i5
938/* 0x0044	     */		fmovd	%f2,%f28
939/* 0x0048	     */		ld	[%l5+4],%f29
940/* 0x004c	     */		sllx	%o0,3,%g5
941/* 0x0050	     */		cmp	%i5,%o4
942/* 0x0054	     */		fmovd	%f2,%f30
943/* 0x0058	     */		ld	[%l5+8],%f31
944/* 0x005c	     */		add	%i1,%g5,%g4
945/* 0x0060	     */		add	%o3,8,%o3
946/* 0x0064	     */		ld	[%l5+12],%f3
947/* 0x0068	     */		fxtod	%f26,%f26
948/* 0x006c	     */		ldd	[%g2+%lo(TwoToMinus16)],%f32
949/* 0x0070	     */		fxtod	%f28,%f28
950/* 0x0074	     */		add	%l5,16,%l5
951/* 0x0078	     */		fxtod	%f30,%f30
952/* 0x007c	     */		ldd	[%o5+%lo(TwoTo16)],%f34
953/* 0x0080	     */		fxtod	%f2,%f2
954/* 0x0084	     */		std	%f2,[%l6+24]
955/* 0x0088	     */		fmuld	%f32,%f26,%f36
956/* 0x008c	     */		std	%f26,[%l6]
957/* 0x0090	     */		fmuld	%f32,%f28,%f38
958/* 0x0094	     */		std	%f28,[%l6+8]
959/* 0x0098	     */		fmuld	%f32,%f30,%f40
960/* 0x009c	     */		std	%f30,[%l6+16]
961/* 0x00a0	     */		fmuld	%f32,%f2,%f42
962/* 0x00a4	     */		add	%l6,32,%l6
963/* 0x00a8	     */		fdtox	%f36,%f36
964/* 0x00ac	     */		fdtox	%f38,%f38
965/* 0x00b0	     */		fdtox	%f40,%f40
966/* 0x00b4	     */		fdtox	%f42,%f42
967/* 0x00b8	     */		fxtod	%f36,%f36
968/* 0x00bc	     */		std	%f36,[%g4+8]
969/* 0x00c0	     */		fxtod	%f38,%f38
970/* 0x00c4	     */		std	%f38,[%g4+24]
971/* 0x00c8	     */		fxtod	%f40,%f40
972/* 0x00cc	     */		std	%f40,[%g4+40]
973/* 0x00d0	     */		fxtod	%f42,%f42
974/* 0x00d4	     */		std	%f42,[%g4+56]
975/* 0x00d8	     */		fmuld	%f36,%f34,%f36
976/* 0x00dc	     */		fmuld	%f38,%f34,%f38
977/* 0x00e0	     */		fmuld	%f40,%f34,%f40
978/* 0x00e4	     */		fmuld	%f42,%f34,%f42
979/* 0x00e8	     */		fsubd	%f26,%f36,%f36
980/* 0x00ec	     */		std	%f36,[%i1+%g5]
981/* 0x00f0	     */		fsubd	%f28,%f38,%f38
982/* 0x00f4	     */		std	%f38,[%g4+16]
983/* 0x00f8	     */		fsubd	%f30,%f40,%f40
984/* 0x00fc	     */		std	%f40,[%g4+32]
985/* 0x0100	     */		fsubd	%f2,%f42,%f42
986/* 0x0104	     */		std	%f42,[%g4+48]
987/* 0x0108	     */		ble,a,pt	%icc,.L900000615
988/* 0x010c	     */		ldd	[%g3+%lo(Zero)],%f2
989                       .L77000289:
990/* 0x0110	 224 */		cmp	%i5,%i3
991/* 0x0114	     */		bge,pn	%icc,.L77000294
992/* 0x0118	     */		sethi	%hi(0xfc00),%l0
993                       .L77000307:
994/* 0x011c	 224 */		sra	%i5,0,%l2
995/* 0x0120	     */		sll	%i5,1,%i4
996/* 0x0124	     */		sllx	%l2,3,%l1
997/* 0x0128	     */		sllx	%l2,2,%o1
998/* 0x012c	 225 */		sub	%i3,%i5,%l3
999/* 0x0130	 224 */		add	%l0,1023,%l0
1000/* 0x0134	     */		add	%l1,%i0,%l1
1001/* 0x0138	     */		add	%o1,%i2,%i2
1002/* 0x013c	 225 */		cmp	%l3,5
1003/* 0x0140	     */		bl,pn	%icc,.L77000291
1004/* 0x0144	   0 */		sethi	%hi(___const_seg_900000601),%l7
1005                       .L900000612:
1006/* 0x0148	 225 */		prefetch	[%l1],22
1007/* 0x014c	     */		prefetch	[%l1+64],22
1008/* 0x0150	     */		sra	%i4,0,%l6
1009/* 0x0154	 226 */		sethi	%hi(___const_seg_900000601+8),%l2
1010/* 0x0158	 225 */		prefetch	[%l1+128],22
1011/* 0x015c	     */		add	%l6,-2,%l5
1012/* 0x0160	     */		sub	%i3,3,%i0
1013/* 0x0164	     */		prefetch	[%l1+192],22
1014/* 0x0168	     */		sllx	%l5,3,%o4
1015/* 0x016c	 228 */		add	%i5,1,%i5
1016/* 0x0170	 225 */		add	%i1,%o4,%o3
1017/* 0x0174	     */		or	%g0,%i3,%g1
1018/* 0x0178	     */		ld	[%i2],%l4
1019/* 0x017c	     */		prefetch	[%o3+16],22
1020/* 0x0180	     */		add	%o3,16,%l3
1021/* 0x0184	 228 */		add	%i2,4,%i2
1022/* 0x0188	 225 */		prefetch	[%o3+80],22
1023/* 0x018c	 228 */		srl	%l4,16,%o1
1024/* 0x0190	 227 */		and	%l4,%l0,%o0
1025/* 0x0194	 225 */		prefetch	[%o3+144],22
1026/* 0x0198	 228 */		st	%o1,[%sp+2271]
1027/* 0x019c	 227 */		st	%o0,[%sp+2239]
1028/* 0x01a0	 226 */		ldd	[%l7+%lo(___const_seg_900000601)],%f32
1029/* 0x01a4	 228 */		ld	[%l2+%lo(___const_seg_900000601+8)],%f0
1030/* 0x01a8	 225 */		prefetch	[%o3+208],22
1031/* 0x01ac	     */		prefetch	[%o3+272],22
1032/* 0x01b0	     */		prefetch	[%o3+336],22
1033                       .L900000610:
1034/* 0x01b4	 225 */		prefetch	[%l1+192],22
1035/* 0x01b8	 228 */		add	%i5,4,%i5
1036/* 0x01bc	 225 */		add	%l3,64,%l3
1037/* 0x01c0	 227 */		ld	[%l2+%lo(___const_seg_900000601+8)],%f8
1038/* 0x01c4	 228 */		cmp	%i5,%i0
1039/* 0x01c8	 225 */		ld	[%i2],%g5
1040/* 0x01cc	 228 */		add	%i2,16,%i2
1041/* 0x01d0	     */		add	%l1,32,%l1
1042/* 0x01d4	     */		add	%i4,8,%i4
1043/* 0x01d8	 226 */		ld	[%i2-20],%f7
1044/* 0x01dc	 228 */		srl	%g5,16,%i3
1045/* 0x01e0	 226 */		fmovs	%f8,%f6
1046/* 0x01e4	 228 */		st	%i3,[%sp+2335]
1047/* 0x01e8	 227 */		and	%g5,%l0,%g4
1048/* 0x01ec	     */		st	%g4,[%sp+2303]
1049/* 0x01f0	 226 */		fsubd	%f6,%f32,%f40
1050/* 0x01f4	 227 */		ld	[%sp+2239],%f9
1051/* 0x01f8	 228 */		ld	[%sp+2271],%f1
1052/* 0x01fc	     */		fmovs	%f8,%f12
1053/* 0x0200	 226 */		std	%f40,[%l1-32]
1054/* 0x0204	 227 */		fsubd	%f8,%f32,%f42
1055/* 0x0208	     */		std	%f42,[%l3-64]
1056/* 0x020c	 228 */		fsubd	%f0,%f32,%f44
1057/* 0x0210	     */		std	%f44,[%l3-56]
1058/* 0x0214	 227 */		fmovs	%f12,%f10
1059/* 0x0218	 225 */		ld	[%i2-12],%g2
1060/* 0x021c	 226 */		ld	[%i2-16],%f1
1061/* 0x0220	 228 */		srl	%g2,16,%g3
1062/* 0x0224	 226 */		fmovs	%f12,%f0
1063/* 0x0228	 225 */		prefetch	[%l3+320],22
1064/* 0x022c	 228 */		st	%g3,[%sp+2271]
1065/* 0x0230	 227 */		and	%g2,%l0,%l6
1066/* 0x0234	     */		st	%l6,[%sp+2239]
1067/* 0x0238	 226 */		fsubd	%f0,%f32,%f46
1068/* 0x023c	 227 */		ld	[%sp+2303],%f11
1069/* 0x0240	 228 */		ld	[%sp+2335],%f13
1070/* 0x0244	     */		fmovs	%f12,%f18
1071/* 0x0248	 226 */		std	%f46,[%l1-24]
1072/* 0x024c	 227 */		fsubd	%f10,%f32,%f48
1073/* 0x0250	     */		std	%f48,[%l3-48]
1074/* 0x0254	 228 */		fsubd	%f12,%f32,%f50
1075/* 0x0258	     */		std	%f50,[%l3-40]
1076/* 0x025c	 227 */		fmovs	%f18,%f16
1077/* 0x0260	 225 */		ld	[%i2-8],%o5
1078/* 0x0264	 226 */		ld	[%i2-12],%f15
1079/* 0x0268	 228 */		srl	%o5,16,%l5
1080/* 0x026c	 226 */		fmovs	%f18,%f14
1081/* 0x0270	 228 */		st	%l5,[%sp+2335]
1082/* 0x0274	 227 */		and	%o5,%l0,%o4
1083/* 0x0278	     */		st	%o4,[%sp+2303]
1084/* 0x027c	 226 */		fsubd	%f14,%f32,%f52
1085/* 0x0280	 227 */		ld	[%sp+2239],%f17
1086/* 0x0284	 228 */		ld	[%sp+2271],%f19
1087/* 0x0288	 225 */		prefetch	[%l3+352],22
1088/* 0x028c	 228 */		fmovs	%f18,%f24
1089/* 0x0290	 226 */		std	%f52,[%l1-16]
1090/* 0x0294	 227 */		fsubd	%f16,%f32,%f54
1091/* 0x0298	     */		std	%f54,[%l3-32]
1092/* 0x029c	 228 */		fsubd	%f18,%f32,%f56
1093/* 0x02a0	     */		std	%f56,[%l3-24]
1094/* 0x02a4	 227 */		fmovs	%f24,%f22
1095/* 0x02a8	 225 */		ld	[%i2-4],%l4
1096/* 0x02ac	 226 */		ld	[%i2-8],%f21
1097/* 0x02b0	 228 */		srl	%l4,16,%o3
1098/* 0x02b4	 226 */		fmovs	%f24,%f20
1099/* 0x02b8	 228 */		st	%o3,[%sp+2271]
1100/* 0x02bc	 227 */		and	%l4,%l0,%o2
1101/* 0x02c0	     */		st	%o2,[%sp+2239]
1102/* 0x02c4	 226 */		fsubd	%f20,%f32,%f58
1103/* 0x02c8	 227 */		ld	[%sp+2303],%f23
1104/* 0x02cc	 228 */		ld	[%sp+2335],%f25
1105/* 0x02d0	     */		fmovs	%f24,%f0
1106/* 0x02d4	 226 */		std	%f58,[%l1-8]
1107/* 0x02d8	 227 */		fsubd	%f22,%f32,%f60
1108/* 0x02dc	     */		std	%f60,[%l3-16]
1109/* 0x02e0	 228 */		fsubd	%f24,%f32,%f62
1110/* 0x02e4	     */		bl,pt	%icc,.L900000610
1111/* 0x02e8	     */		std	%f62,[%l3-8]
1112                       .L900000613:
1113/* 0x02ec	 227 */		ld	[%l2+%lo(___const_seg_900000601+8)],%f4
1114/* 0x02f0	 228 */		add	%l1,8,%l1
1115/* 0x02f4	     */		cmp	%i5,%g1
1116/* 0x02f8	 226 */		ld	[%i2-4],%f3
1117/* 0x02fc	 225 */		or	%g0,%g1,%i3
1118/* 0x0300	 228 */		add	%i4,2,%i4
1119/* 0x0304	 227 */		ld	[%sp+2239],%f5
1120/* 0x0308	 226 */		fmovs	%f4,%f2
1121/* 0x030c	 228 */		ld	[%sp+2271],%f1
1122/* 0x0310	 226 */		fsubd	%f2,%f32,%f34
1123/* 0x0314	     */		std	%f34,[%l1-8]
1124/* 0x0318	 227 */		fsubd	%f4,%f32,%f36
1125/* 0x031c	     */		std	%f36,[%l3]
1126/* 0x0320	 228 */		fsubd	%f0,%f32,%f38
1127/* 0x0324	     */		bge,pn	%icc,.L77000294
1128/* 0x0328	     */		std	%f38,[%l3+8]
1129                       .L77000291:
1130/* 0x032c	 225 */		ld	[%i2],%o2
1131                       .L900000614:
1132/* 0x0330	 226 */		ldd	[%l7+%lo(___const_seg_900000601)],%f32
1133/* 0x0334	 228 */		srl	%o2,16,%l3
1134/* 0x0338	 227 */		sra	%i4,0,%i0
1135/* 0x033c	 228 */		st	%l3,[%sp+2367]
1136/* 0x0340	 227 */		and	%o2,%l0,%g1
1137/* 0x0344	 226 */		sethi	%hi(___const_seg_900000601+8),%l2
1138/* 0x0348	 227 */		st	%g1,[%sp+2399]
1139/* 0x034c	     */		sllx	%i0,3,%o0
1140/* 0x0350	 228 */		add	%i4,1,%l4
1141/* 0x0354	 226 */		ld	[%l2+%lo(___const_seg_900000601+8)],%f4
1142/* 0x0358	 228 */		sra	%l4,0,%o1
1143/* 0x035c	     */		add	%i5,1,%i5
1144/* 0x0360	 226 */		ld	[%i2],%f5
1145/* 0x0364	 228 */		sllx	%o1,3,%g5
1146/* 0x0368	     */		cmp	%i5,%i3
1147/* 0x036c	     */		ld	[%sp+2367],%f9
1148/* 0x0370	     */		add	%i2,4,%i2
1149/* 0x0374	     */		add	%i4,2,%i4
1150/* 0x0378	 227 */		fmovs	%f4,%f6
1151/* 0x037c	 226 */		fsubd	%f4,%f32,%f44
1152/* 0x0380	     */		std	%f44,[%l1]
1153/* 0x0384	 227 */		ld	[%sp+2399],%f7
1154/* 0x0388	 228 */		fmovs	%f6,%f8
1155/* 0x038c	     */		add	%l1,8,%l1
1156/* 0x0390	     */		fsubd	%f8,%f32,%f48
1157/* 0x0394	 227 */		fsubd	%f6,%f32,%f46
1158/* 0x0398	     */		std	%f46,[%i1+%o0]
1159/* 0x039c	 228 */		std	%f48,[%i1+%g5]
1160/* 0x03a0	     */		bl,a,pt	%icc,.L900000614
1161/* 0x03a4	 225 */		ld	[%i2],%o2
1162                       .L77000294:
1163/* 0x03a8	 222 */		ret	! Result =
1164/* 0x03ac	     */		restore	%g0,%g0,%g0
1165/* 0x03b0	   0 */		.type	conv_i32_to_d32_and_d16,2
1166/* 0x03b0	   0 */		.size	conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
1167
1168	.section	".text",#alloc,#execinstr
1169/* 000000	   0 */		.align	32
1170
1171!  229		      !	}
1172!  230		      !}
1173!  232		      !extern long long c1, c2, c3, c4;
1174!  234		      !static void
1175!  235		      !adjust_montf_result(uint32_t *i32, uint32_t *nint, int len)
1176!  236		      !{
1177
1178!
1179! SUBROUTINE adjust_montf_result
1180!
1181! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
1182
1183                       adjust_montf_result:
1184/* 000000	 236 */		sra	%o2,0,%g2
1185/* 0x0004	     */		or	%g0,%o0,%o4
1186
1187!  237		      !	int64_t acc;
1188!  238		      !	int i;
1189!  240		      !	if (i32[len] > 0) {
1190
1191/* 0x0008	 240 */		sllx	%g2,2,%g3
1192/* 0x000c	     */		ld	[%o0+%g3],%o0
1193/* 0x0010	     */		cmp	%o0,0
1194/* 0x0014	     */		bleu,pn	%icc,.L77000316
1195/* 0x0018	 236 */		or	%g0,%o1,%o5
1196
1197!  241		      !		i = -1;
1198
1199                       .L77000315:
1200/* 0x001c	 241 */		sub	%g2,1,%g3
1201/* 0x0020	     */		ba	.L900000712
1202/* 0x0024	 249 */		cmp	%g2,0
1203
1204!  242		      !	} else {
1205!  243		      !		for (i = len - 1; i >= 0; i--) {
1206
1207                       .L77000316:
1208/* 0x0028	 243 */		subcc	%g2,1,%g3
1209/* 0x002c	     */		bneg,pn	%icc,.L77000340
1210/* 0x0030	     */		or	%g0,%g3,%o3
1211                       .L77000348:
1212/* 0x0034	 243 */		sra	%g3,0,%o1
1213/* 0x0038	     */		sllx	%o1,2,%g1
1214
1215!  244		      !			if (i32[i] != nint[i]) break;
1216
1217/* 0x003c	 244 */		ld	[%g1+%o5],%g4
1218/* 0x0040	 243 */		add	%g1,%o4,%o2
1219/* 0x0044	     */		add	%g1,%o5,%o1
1220                       .L900000713:
1221/* 0x0048	 244 */		ld	[%o2],%o0
1222/* 0x004c	     */		cmp	%o0,%g4
1223/* 0x0050	     */		bne,pn	%icc,.L77000324
1224/* 0x0054	     */		sub	%o2,4,%o2
1225                       .L77000320:
1226/* 0x0058	 244 */		sub	%o1,4,%o1
1227/* 0x005c	     */		subcc	%o3,1,%o3
1228/* 0x0060	     */		bpos,a,pt	%icc,.L900000713
1229/* 0x0064	     */		ld	[%o1],%g4
1230                       .L900000706:
1231/* 0x0068	 244 */		ba	.L900000712
1232/* 0x006c	 249 */		cmp	%g2,0
1233                       .L77000324:
1234/* 0x0070	 244 */		sra	%o3,0,%o0
1235/* 0x0074	     */		sllx	%o0,2,%g1
1236/* 0x0078	     */		ld	[%o5+%g1],%o3
1237/* 0x007c	     */		ld	[%o4+%g1],%g5
1238/* 0x0080	     */		cmp	%g5,%o3
1239/* 0x0084	     */		bleu,pt	%icc,.L77000332
1240/* 0x0088	     */		nop
1241
1242!  245		      !		}
1243!  246		      !	}
1244!  247		      !	if ((i < 0) || (i32[i] > nint[i])) {
1245!  248		      !		acc = 0;
1246!  249		      !		for (i = 0; i < len; i++) {
1247
1248                       .L77000340:
1249/* 0x008c	 249 */		cmp	%g2,0
1250                       .L900000712:
1251/* 0x0090	 249 */		ble,pn	%icc,.L77000332
1252/* 0x0094	 250 */		or	%g0,%g2,%o3
1253                       .L77000347:
1254/* 0x0098	 249 */		or	%g0,0,%o0
1255
1256!  250		      !			acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
1257
1258/* 0x009c	 250 */		cmp	%o3,10
1259/* 0x00a0	     */		bl,pn	%icc,.L77000341
1260/* 0x00a4	 249 */		or	%g0,0,%g2
1261                       .L900000709:
1262/* 0x00a8	 250 */		prefetch	[%o4],22
1263/* 0x00ac	     */		prefetch	[%o4+64],22
1264
1265!  251		      !			i32[i] = acc & 0xffffffff;
1266!  252		      !			acc = acc >> 32;
1267
1268/* 0x00b0	 252 */		add	%o5,4,%o1
1269/* 0x00b4	     */		add	%o4,8,%o2
1270/* 0x00b8	 250 */		prefetch	[%o4+128],22
1271/* 0x00bc	     */		sub	%o3,8,%o5
1272/* 0x00c0	     */		or	%g0,2,%o0
1273/* 0x00c4	     */		prefetch	[%o4+192],22
1274/* 0x00c8	     */		prefetch	[%o4+256],22
1275/* 0x00cc	     */		prefetch	[%o4+320],22
1276/* 0x00d0	     */		prefetch	[%o4+384],22
1277/* 0x00d4	     */		ld	[%o2-4],%g5
1278/* 0x00d8	     */		prefetch	[%o2+440],22
1279/* 0x00dc	     */		prefetch	[%o2+504],22
1280/* 0x00e0	     */		ld	[%o4],%g4
1281/* 0x00e4	     */		ld	[%o1-4],%o4
1282/* 0x00e8	     */		sub	%g4,%o4,%o3
1283/* 0x00ec	 251 */		st	%o3,[%o2-8]
1284/* 0x00f0	 252 */		srax	%o3,32,%g4
1285                       .L900000707:
1286/* 0x00f4	 252 */		add	%o0,8,%o0
1287/* 0x00f8	     */		add	%o2,32,%o2
1288/* 0x00fc	 250 */		ld	[%o1],%g1
1289/* 0x0100	     */		prefetch	[%o2+496],22
1290/* 0x0104	 252 */		cmp	%o0,%o5
1291/* 0x0108	     */		add	%o1,32,%o1
1292/* 0x010c	 250 */		sub	%g5,%g1,%g5
1293/* 0x0110	     */		add	%g5,%g4,%o4
1294/* 0x0114	     */		ld	[%o2-32],%g4
1295/* 0x0118	 251 */		st	%o4,[%o2-36]
1296/* 0x011c	 252 */		srax	%o4,32,%g1
1297/* 0x0120	 250 */		ld	[%o1-28],%o3
1298/* 0x0124	     */		sub	%g4,%o3,%g2
1299/* 0x0128	     */		add	%g2,%g1,%g5
1300/* 0x012c	     */		ld	[%o2-28],%o3
1301/* 0x0130	 251 */		st	%g5,[%o2-32]
1302/* 0x0134	 252 */		srax	%g5,32,%g4
1303/* 0x0138	 250 */		ld	[%o1-24],%o4
1304/* 0x013c	     */		sub	%o3,%o4,%g1
1305/* 0x0140	     */		add	%g1,%g4,%g2
1306/* 0x0144	     */		ld	[%o2-24],%o3
1307/* 0x0148	 251 */		st	%g2,[%o2-28]
1308/* 0x014c	 252 */		srax	%g2,32,%g5
1309/* 0x0150	 250 */		ld	[%o1-20],%o4
1310/* 0x0154	     */		sub	%o3,%o4,%g4
1311/* 0x0158	     */		add	%g4,%g5,%g1
1312/* 0x015c	     */		ld	[%o2-20],%o4
1313/* 0x0160	 251 */		st	%g1,[%o2-24]
1314/* 0x0164	 252 */		srax	%g1,32,%o3
1315/* 0x0168	 250 */		ld	[%o1-16],%g2
1316/* 0x016c	     */		sub	%o4,%g2,%g5
1317/* 0x0170	     */		add	%g5,%o3,%g1
1318/* 0x0174	     */		ld	[%o2-16],%g4
1319/* 0x0178	 251 */		st	%g1,[%o2-20]
1320/* 0x017c	 252 */		srax	%g1,32,%o4
1321/* 0x0180	 250 */		ld	[%o1-12],%g2
1322/* 0x0184	     */		sub	%g4,%g2,%o3
1323/* 0x0188	     */		add	%o3,%o4,%g5
1324/* 0x018c	     */		ld	[%o2-12],%g2
1325/* 0x0190	 251 */		st	%g5,[%o2-16]
1326/* 0x0194	 252 */		srax	%g5,32,%g4
1327/* 0x0198	 250 */		ld	[%o1-8],%g1
1328/* 0x019c	     */		sub	%g2,%g1,%o4
1329/* 0x01a0	     */		add	%o4,%g4,%o3
1330/* 0x01a4	     */		ld	[%o2-8],%g2
1331/* 0x01a8	 251 */		st	%o3,[%o2-12]
1332/* 0x01ac	 252 */		srax	%o3,32,%g5
1333/* 0x01b0	 250 */		ld	[%o1-4],%g1
1334/* 0x01b4	     */		sub	%g2,%g1,%g4
1335/* 0x01b8	     */		add	%g4,%g5,%o4
1336/* 0x01bc	     */		ld	[%o2-4],%g5
1337/* 0x01c0	 251 */		st	%o4,[%o2-8]
1338/* 0x01c4	 252 */		ble,pt	%icc,.L900000707
1339/* 0x01c8	     */		srax	%o4,32,%g4
1340                       .L900000710:
1341/* 0x01cc	 250 */		ld	[%o1],%o3
1342/* 0x01d0	 252 */		add	%o1,4,%o5
1343/* 0x01d4	 250 */		or	%g0,%o2,%o4
1344/* 0x01d8	 252 */		cmp	%o0,%g3
1345/* 0x01dc	 250 */		sub	%g5,%o3,%g2
1346/* 0x01e0	     */		add	%g2,%g4,%g1
1347/* 0x01e4	 251 */		st	%g1,[%o2-4]
1348/* 0x01e8	 252 */		bg,pn	%icc,.L77000332
1349/* 0x01ec	     */		srax	%g1,32,%g2
1350                       .L77000341:
1351/* 0x01f0	 250 */		ld	[%o4],%g5
1352                       .L900000711:
1353/* 0x01f4	 250 */		ld	[%o5],%o2
1354/* 0x01f8	     */		add	%g2,%g5,%g4
1355/* 0x01fc	 252 */		add	%o0,1,%o0
1356/* 0x0200	     */		cmp	%o0,%g3
1357/* 0x0204	     */		add	%o5,4,%o5
1358/* 0x0208	 250 */		sub	%g4,%o2,%o1
1359/* 0x020c	 251 */		st	%o1,[%o4]
1360/* 0x0210	 252 */		srax	%o1,32,%g2
1361/* 0x0214	     */		add	%o4,4,%o4
1362/* 0x0218	     */		ble,a,pt	%icc,.L900000711
1363/* 0x021c	 250 */		ld	[%o4],%g5
1364                       .L77000332:
1365/* 0x0220	 252 */		retl	! Result =
1366/* 0x0224	     */		nop
1367/* 0x0228	   0 */		.type	adjust_montf_result,2
1368/* 0x0228	   0 */		.size	adjust_montf_result,(.-adjust_montf_result)
1369
1370	.section	".text",#alloc,#execinstr
1371/* 000000	   0 */		.align	32
1372
1373!  253		      !		}
1374!  254		      !	}
1375!  255		      !}
1376!  257		      !/*************
1377!  258		      !static void
1378!  259		      !adjust_montf_result_bad(uint32_t *i32, uint32_t *nint, int len)
1379!  260		      !{
1380!  261		      !	int64_t acc;
1381!  262		      !	int i;
1382!  264		      !	c4++;
1383!  265		      !
1384!  266		      !	if (i32[len] > 0) {
1385!  267		      !		i = -1;
1386!  268		      !		c1++;
1387!  269		      !	} else {
1388!  270		      !		for (i = len - 1; i >= 0; i++) {
1389!  271		      !			if (i32[i] != nint[i]) break;
1390!  272		      !			c2++;
1391!  273		      !		}
1392!  274		      !	}
1393!  275		      !	if ((i < 0) || (i32[i] > nint[i])) {
1394!  276		      !		c3++;
1395!  277		      !		acc = 0;
1396!  278		      !		for (i = 0; i < len; i++) {
1397!  279		      !			acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
1398!  280		      !			i32[i] = acc & 0xffffffff;
1399!  281		      !			acc = acc >> 32;
1400!  282		      !		}
1401!  283		      !	}
1402!  284		      !}
1403!  285		      !uint32_t saveresult[1000];
1404!  286		      !void printarray(char *name, uint32_t *arr, int len)
1405!  287		      !{
1406!  288		      !	int i, j;
1407!  289		      !	uint64_t tmp;
1408!  291		      !	printf("uint64_t %s[%d] =\n{\n",name,(len+1)/2);
1409!  292		      !	for(i=j=0; i<len; i+=2,j+=2){
1410!  293		      !		if(j == 6){
1411!  294		      !			printf("\n");
1412!  295		      !			j=0;
1413!  296		      !		}
1414!  297		      !		tmp = (((uint64_t)arr[i])<<32) | ((uint64_t)arr[i+1]);
1415!  298		      !		printf("0x%016llx",tmp);
1416!  299		      !		if((i/2)!=(((len+1)/2)-1))printf(",");
1417!  300		      !		if(j!=4)printf(" ");
1418!  301		      !	}
1419!  302		      !	if(j!=0) printf("\n");
1420!  303		      !	printf("};\n");
1421!  304		      !}
1422!  305		      !**************/
1423!  308		      !/*
1424!  309		      ! * the lengths of the input arrays should be at least the following:
1425!  310		      ! * result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
1426!  311		      ! * all of them should be different from one another
1427!  312		      ! */
1428!  313		      !void mont_mulf_noconv(uint32_t *result,
1429!  314		      !			double *dm1, double *dm2, double *dt,
1430!  315		      !			double *dn, uint32_t *nint,
1431!  316		      !			int nlen, double dn0)
1432!  317		      !{
1433
1434!
1435! SUBROUTINE mont_mulf_noconv
1436!
1437! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
1438
1439                       	.global mont_mulf_noconv
1440                       mont_mulf_noconv:
1441/* 000000	 317 */		save	%sp,-176,%sp
1442/* 0x0004	     */		ldx	[%fp+2223],%g1
1443/* 0x0008	   0 */		sethi	%hi(Zero),%l5
1444/* 0x000c	 317 */		or	%g0,%i2,%l0
1445
1446!  318		      !	int i, j, jj;
1447!  319		      !	double digit, m2j, a, b;
1448!  320		      !	double *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
1449!  322		      !	pdm1 = &(dm1[0]);
1450!  323		      !	pdm2 = &(dm2[0]);
1451!  324		      !	pdn = &(dn[0]);
1452!  325		      !	pdm2[2 * nlen] = Zero;
1453
1454/* 0x0010	 325 */		ldd	[%l5+%lo(Zero)],%f0
1455/* 0x0014	 317 */		or	%g0,%i0,%i2
1456/* 0x0018	 325 */		sll	%g1,1,%o3
1457
1458!  327		      !	if (nlen != 16) {
1459
1460/* 0x001c	 327 */		cmp	%g1,16
1461/* 0x0020	 325 */		sra	%o3,0,%i0
1462/* 0x0024	     */		sllx	%i0,3,%o0
1463/* 0x0028	 317 */		or	%g0,%i5,%i0
1464/* 0x002c	 327 */		bne,pn	%icc,.L77000476
1465/* 0x0030	 325 */		std	%f0,[%l0+%o0]
1466                       .L77000488:
1467/* 0x0034	   0 */		sethi	%hi(TwoToMinus16),%o2
1468/* 0x0038	   0 */		sethi	%hi(TwoTo16),%l3
1469
1470!  328		      !		for (i = 0; i < 4 * nlen + 2; i++)
1471!  329		      !			dt[i] = Zero;
1472!  330		      !		a = dt[0] = pdm1[0] * pdm2[0];
1473!  331		      !		digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
1474!  333		      !		pdtj = &(dt[0]);
1475!  334		      !		for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) {
1476!  335		      !			m2j = pdm2[j];
1477!  336		      !			a = pdtj[0] + pdn[0] * digit;
1478!  337		      !			b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16;
1479!  338		      !			pdtj[1] = b;
1480!  340		      !#pragma pipeloop(0)
1481!  341		      !			for (i = 1; i < nlen; i++) {
1482!  342		      !				pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit;
1483!  343		      !			}
1484!  344		      !			if (jj == 15) {
1485!  345		      !				cleanup(dt, j / 2 + 1, 2 * nlen + 1);
1486!  346		      !				jj = 0;
1487!  347		      !			}
1488!  349		      !			digit = mod(lower32(b, Zero) * dn0,
1489!  350		      !				    TwoToMinus16, TwoTo16);
1490!  351		      !		}
1491!  352		      !	} else {
1492!  353		      !		a = dt[0] = pdm1[0] * pdm2[0];
1493
1494/* 0x003c	 353 */		ldd	[%i1],%f40
1495
1496!  355		      !		dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] =
1497!  356		      !			dt[59] = dt[58] = dt[57] = dt[56] = dt[55] =
1498!  357		      !			dt[54] = dt[53] = dt[52] = dt[51] = dt[50] =
1499!  358		      !			dt[49] = dt[48] = dt[47] = dt[46] = dt[45] =
1500!  359		      !			dt[44] = dt[43] = dt[42] = dt[41] = dt[40] =
1501!  360		      !			dt[39] = dt[38] = dt[37] = dt[36] = dt[35] =
1502!  361		      !			dt[34] = dt[33] = dt[32] = dt[31] = dt[30] =
1503!  362		      !			dt[29] = dt[28] = dt[27] = dt[26] = dt[25] =
1504!  363		      !			dt[24] = dt[23] = dt[22] = dt[21] = dt[20] =
1505!  364		      !			dt[19] = dt[18] = dt[17] = dt[16] = dt[15] =
1506!  365		      !			dt[14] = dt[13] = dt[12] = dt[11] = dt[10] =
1507!  366		      !			dt[9] = dt[8] = dt[7] = dt[6] = dt[5] = dt[4] =
1508!  367		      !			dt[3] = dt[2] = dt[1] = Zero;
1509!  369		      !		pdn_0 = pdn[0];
1510!  370		      !		pdm1_0 = pdm1[0];
1511!  372		      !		digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
1512!  373		      !		pdtj = &(dt[0]);
1513
1514/* 0x0040	 373 */		or	%g0,%i3,%o3
1515
1516!  375		      !		for (j = 0; j < 32; j++, pdtj++) {
1517
1518/* 0x0044	 375 */		or	%g0,0,%l1
1519/* 0x0048	 353 */		ldd	[%l0],%f42
1520/* 0x004c	 372 */		ldd	[%o2+%lo(TwoToMinus16)],%f44
1521/* 0x0050	     */		ldd	[%l3+%lo(TwoTo16)],%f46
1522/* 0x0054	 367 */		std	%f0,[%i3+8]
1523/* 0x0058	 353 */		fmuld	%f40,%f42,%f38
1524/* 0x005c	     */		std	%f38,[%i3]
1525/* 0x0060	 367 */		std	%f0,[%i3+16]
1526/* 0x0064	     */		std	%f0,[%i3+24]
1527/* 0x0068	     */		std	%f0,[%i3+32]
1528/* 0x006c	 372 */		fdtox	%f38,%f4
1529/* 0x0070	 367 */		std	%f0,[%i3+40]
1530/* 0x0074	     */		std	%f0,[%i3+48]
1531/* 0x0078	     */		std	%f0,[%i3+56]
1532/* 0x007c	 372 */		fmovs	%f0,%f4
1533/* 0x0080	 367 */		std	%f0,[%i3+64]
1534/* 0x0084	     */		std	%f0,[%i3+72]
1535/* 0x0088	 372 */		fxtod	%f4,%f52
1536/* 0x008c	 367 */		std	%f0,[%i3+80]
1537/* 0x0090	     */		std	%f0,[%i3+88]
1538/* 0x0094	     */		std	%f0,[%i3+96]
1539/* 0x0098	     */		std	%f0,[%i3+104]
1540/* 0x009c	 372 */		fmuld	%f52,%f14,%f60
1541/* 0x00a0	 367 */		std	%f0,[%i3+112]
1542/* 0x00a4	     */		std	%f0,[%i3+120]
1543/* 0x00a8	     */		std	%f0,[%i3+128]
1544/* 0x00ac	     */		std	%f0,[%i3+136]
1545/* 0x00b0	 372 */		fmuld	%f60,%f44,%f62
1546/* 0x00b4	 367 */		std	%f0,[%i3+144]
1547/* 0x00b8	     */		std	%f0,[%i3+152]
1548/* 0x00bc	     */		std	%f0,[%i3+160]
1549/* 0x00c0	     */		std	%f0,[%i3+168]
1550/* 0x00c4	 372 */		fdtox	%f62,%f32
1551/* 0x00c8	 367 */		std	%f0,[%i3+176]
1552/* 0x00cc	     */		std	%f0,[%i3+184]
1553/* 0x00d0	     */		std	%f0,[%i3+192]
1554/* 0x00d4	     */		std	%f0,[%i3+200]
1555/* 0x00d8	 372 */		fxtod	%f32,%f50
1556/* 0x00dc	 367 */		std	%f0,[%i3+208]
1557/* 0x00e0	     */		std	%f0,[%i3+216]
1558/* 0x00e4	     */		std	%f0,[%i3+224]
1559/* 0x00e8	     */		std	%f0,[%i3+232]
1560/* 0x00ec	 372 */		fmuld	%f50,%f46,%f34
1561/* 0x00f0	 367 */		std	%f0,[%i3+240]
1562/* 0x00f4	     */		std	%f0,[%i3+248]
1563/* 0x00f8	     */		std	%f0,[%i3+256]
1564/* 0x00fc	     */		std	%f0,[%i3+264]
1565/* 0x0100	 372 */		fsubd	%f60,%f34,%f40
1566/* 0x0104	 367 */		std	%f0,[%i3+272]
1567/* 0x0108	     */		std	%f0,[%i3+280]
1568/* 0x010c	     */		std	%f0,[%i3+288]
1569/* 0x0110	     */		std	%f0,[%i3+296]
1570/* 0x0114	     */		std	%f0,[%i3+304]
1571/* 0x0118	     */		std	%f0,[%i3+312]
1572/* 0x011c	     */		std	%f0,[%i3+320]
1573/* 0x0120	     */		std	%f0,[%i3+328]
1574/* 0x0124	     */		std	%f0,[%i3+336]
1575/* 0x0128	     */		std	%f0,[%i3+344]
1576/* 0x012c	     */		std	%f0,[%i3+352]
1577/* 0x0130	     */		std	%f0,[%i3+360]
1578/* 0x0134	     */		std	%f0,[%i3+368]
1579/* 0x0138	 375 */		sub	%g1,1,%l3
1580/* 0x013c	     */		add	%i3,8,%o7
1581/* 0x0140	 367 */		std	%f0,[%i3+376]
1582/* 0x0144	     */		std	%f0,[%i3+384]
1583/* 0x0148	     */		std	%f0,[%i3+392]
1584/* 0x014c	     */		std	%f0,[%i3+400]
1585/* 0x0150	     */		std	%f0,[%i3+408]
1586/* 0x0154	     */		std	%f0,[%i3+416]
1587/* 0x0158	     */		std	%f0,[%i3+424]
1588/* 0x015c	     */		std	%f0,[%i3+432]
1589/* 0x0160	     */		std	%f0,[%i3+440]
1590/* 0x0164	     */		std	%f0,[%i3+448]
1591/* 0x0168	     */		std	%f0,[%i3+456]
1592/* 0x016c	     */		std	%f0,[%i3+464]
1593/* 0x0170	     */		std	%f0,[%i3+472]
1594/* 0x0174	     */		std	%f0,[%i3+480]
1595/* 0x0178	     */		std	%f0,[%i3+488]
1596/* 0x017c	     */		std	%f0,[%i3+496]
1597/* 0x0180	     */		std	%f0,[%i3+504]
1598/* 0x0184	     */		std	%f0,[%i3+512]
1599/* 0x0188	     */		std	%f0,[%i3+520]
1600
1601!BEGIN HAND CODED PART
1602
1603! cheetah schedule, no even-odd trick
1604
1605
1606	add	%i3,%g0,%o5
1607
1608	fmovd	%f40,%f0
1609	fmovd	%f14,%f2
1610	fmovd	%f44,%f8
1611	sethi	%hi(TwoTo32),%l5
1612	fmovd	%f46,%f10
1613	sethi	%hi(TwoToMinus32),%g5
1614	ldd	[%i3],%f6
1615	ldd	[%l0],%f4
1616
1617	ldd	[%i1],%f40
1618	ldd	[%i1+8],%f42
1619	ldd	[%i1+16],%f52
1620	ldd	[%i1+48],%f54
1621	ldd	[%i1+56],%f36
1622	ldd	[%i1+64],%f56
1623	ldd	[%i1+104],%f48
1624	ldd	[%i1+112],%f58
1625
1626	ldd	[%i4],%f44
1627	ldd	[%i4+8],%f46
1628	ldd	[%i4+104],%f50
1629	ldd	[%i4+112],%f60
1630
1631
1632	.L99999999:
1633!1
1634	ldd	[%i1+24],%f20
1635	fmuld	%f0,%f44,%f12
1636!2
1637	ldd	[%i4+24],%f22
1638	fmuld	%f42,%f4,%f16
1639!3
1640	ldd	[%i1+40],%f24
1641	fmuld	%f46,%f0,%f18
1642!4
1643	ldd	[%i4+40],%f26
1644	fmuld	%f20,%f4,%f20
1645!5
1646	ldd	[%l0+8],%f38
1647	faddd	%f12,%f6,%f12
1648	fmuld	%f22,%f0,%f22
1649!6
1650	add	%l0,8,%l0
1651	ldd	[%i4+56],%f30
1652	fmuld	%f24,%f4,%f24
1653!7
1654	ldd	[%i1+72],%f32
1655	faddd	%f16,%f18,%f16
1656	fmuld	%f26,%f0,%f26
1657!8
1658	ldd	[%i3+16],%f18
1659	fmuld	%f40,%f38,%f14
1660!9
1661	ldd	[%i4+72],%f34
1662	faddd	%f20,%f22,%f20
1663	fmuld	%f8,%f12,%f12
1664!10
1665	ldd	[%i3+48],%f22
1666	fmuld	%f36,%f4,%f28
1667!11
1668	ldd	[%i3+8],%f6
1669	faddd	%f16,%f18,%f16
1670	fmuld	%f30,%f0,%f30
1671!12
1672	std	%f16,[%i3+16]
1673	faddd	%f24,%f26,%f24
1674	fmuld	%f32,%f4,%f32
1675!13
1676	ldd	[%i3+80],%f26
1677	faddd	%f12,%f14,%f12
1678	fmuld	%f34,%f0,%f34
1679!14
1680	ldd	[%i1+88],%f16
1681	faddd	%f20,%f22,%f20
1682!15
1683	ldd	[%i4+88],%f18
1684	faddd	%f28,%f30,%f28
1685!16
1686	ldd	[%i3+112],%f30
1687	faddd	%f32,%f34,%f32
1688!17
1689	ldd	[%i3+144],%f34
1690	faddd	%f12,%f6,%f6
1691	fmuld	%f16,%f4,%f16
1692!18
1693	std	%f20,[%i3+48]
1694	faddd	%f24,%f26,%f24
1695	fmuld	%f18,%f0,%f18
1696!19
1697	std	%f24,[%i3+80]
1698	faddd	%f28,%f30,%f28
1699	fmuld	%f48,%f4,%f20
1700!20
1701	std	%f28,[%i3+112]
1702	faddd	%f32,%f34,%f32
1703	fmuld	%f50,%f0,%f22
1704!21
1705	ldd	[%i1+120],%f24
1706	fdtox	%f6,%f12
1707!22
1708	std	%f32,[%i3+144]
1709	faddd	%f16,%f18,%f16
1710!23
1711	ldd	[%i4+120],%f26
1712!24
1713	ldd	[%i3+176],%f18
1714	faddd	%f20,%f22,%f20
1715	fmuld	%f24,%f4,%f24
1716!25
1717	ldd	[%i4+16],%f30
1718	fmovs	%f11,%f12
1719!26
1720	ldd	[%i1+32],%f32
1721	fmuld	%f26,%f0,%f26
1722!27
1723	ldd	[%i4+32],%f34
1724	fmuld	%f52,%f4,%f28
1725!28
1726	ldd	[%i3+208],%f22
1727	faddd	%f16,%f18,%f16
1728	fmuld	%f30,%f0,%f30
1729!29
1730	std	%f16,[%i3+176]
1731	fxtod	%f12,%f12
1732	fmuld	%f32,%f4,%f32
1733!30
1734	ldd	[%i4+48],%f18
1735	faddd	%f24,%f26,%f24
1736	fmuld	%f34,%f0,%f34
1737!31
1738	ldd	[%i3+240],%f26
1739	faddd	%f20,%f22,%f20
1740!32
1741	std	%f20,[%i3+208]
1742	faddd	%f28,%f30,%f28
1743	fmuld	%f54,%f4,%f16
1744!33
1745	ldd	[%i3+32],%f30
1746	fmuld	%f12,%f2,%f14
1747!34
1748	ldd	[%i4+64],%f22
1749	faddd	%f32,%f34,%f32
1750	fmuld	%f18,%f0,%f18
1751!35
1752	ldd	[%i3+64],%f34
1753	faddd	%f24,%f26,%f24
1754!36
1755	std	%f24,[%i3+240]
1756	faddd	%f28,%f30,%f28
1757	fmuld	%f56,%f4,%f20
1758!37
1759	std	%f28,[%i3+32]
1760	fmuld	%f14,%f8,%f12
1761!38
1762	ldd	[%i1+80],%f24
1763	faddd	%f32,%f34,%f34	! yes, tmp52!
1764	fmuld	%f22,%f0,%f22
1765!39
1766	ldd	[%i4+80],%f26
1767	faddd	%f16,%f18,%f16
1768!40
1769	ldd	[%i1+96],%f28
1770	fmuld	%f58,%f4,%f32
1771!41
1772	ldd	[%i4+96],%f30
1773	fdtox	%f12,%f12
1774	fmuld	%f24,%f4,%f24
1775!42
1776	std	%f34,[%i3+64]	! yes, tmp52!
1777	faddd	%f20,%f22,%f20
1778	fmuld	%f26,%f0,%f26
1779!43
1780	ldd	[%i3+96],%f18
1781	fmuld	%f28,%f4,%f28
1782!44
1783	ldd	[%i3+128],%f22
1784	fmovd	%f38,%f4
1785	fmuld	%f30,%f0,%f30
1786!45
1787	fxtod	%f12,%f12
1788	fmuld	%f60,%f0,%f34
1789!46
1790	add	%i3,8,%i3
1791	faddd	%f24,%f26,%f24
1792!47
1793	ldd	[%i3+160-8],%f26
1794	faddd	%f16,%f18,%f16
1795!48
1796	std	%f16,[%i3+96-8]
1797	faddd	%f28,%f30,%f28
1798!49
1799	ldd	[%i3+192-8],%f30
1800	faddd	%f32,%f34,%f32
1801	fmuld	%f12,%f10,%f12
1802!50
1803	ldd	[%i3+224-8],%f34
1804	faddd	%f20,%f22,%f20
1805!51
1806	std	%f20,[%i3+128-8]
1807	faddd	%f24,%f26,%f24
1808!52
1809	add	%l1,1,%l1
1810	std	%f24,[%i3+160-8]
1811	faddd	%f28,%f30,%f28
1812!53
1813	cmp	%l1,15
1814	std	%f28,[%i3+192-8]
1815	fsubd	%f14,%f12,%f0
1816!54
1817	faddd	%f32,%f34,%f32
1818	ble,pt	%icc,.L99999999
1819	std	%f32,[%i3+224-8]
1820
1821
1822!
1823	ldd	[%g5+%lo(TwoToMinus32)],%f8
1824!
1825	ldd	[%i3+8],%f16
1826!
1827	ldd	[%i3+16],%f20
1828!
1829	fmuld	%f8,%f16,%f18
1830	ldd	[%i3+24],%f24
1831!
1832	fmuld	%f8,%f20,%f22
1833	ldd	[%i3+32],%f28
1834!
1835	fmuld	%f8,%f24,%f26
1836	ldd	[%l5+%lo(TwoTo32)],%f10
1837!
1838	fmuld	%f8,%f28,%f30
1839!
1840	fdtox	%f18,%f18
1841!
1842	fdtox	%f22,%f22
1843!
1844	fdtox	%f26,%f26
1845	ldd	[%i3+40],%f32
1846!
1847	fdtox	%f30,%f30
1848	ldd	[%i3+48],%f56
1849!
1850	fxtod	%f18,%f18
1851	fmuld	%f8,%f32,%f34
1852	ldd	[%i3+56],%f36
1853!
1854	fxtod	%f22,%f22
1855	fmuld	%f8,%f56,%f58
1856	ldd	[%i3+64],%f38
1857!
1858	fxtod	%f26,%f26
1859	fmuld	%f8,%f36,%f60
1860!
1861	fxtod	%f30,%f30
1862	fmuld	%f8,%f38,%f62
1863!
1864	fdtox	%f34,%f34
1865	fmuld	%f10,%f18,%f40
1866!
1867	fdtox	%f58,%f58
1868	fmuld	%f10,%f22,%f42
1869!
1870	fdtox	%f60,%f60
1871	fmuld	%f10,%f26,%f44
1872!
1873	fdtox	%f62,%f62
1874	fmuld	%f10,%f30,%f46
1875!
1876	fxtod	%f34,%f34
1877!
1878	fxtod	%f58,%f58
1879!
1880	fxtod	%f60,%f60
1881!
1882	fxtod	%f62,%f62
1883!
1884	fsubd	%f16,%f40,%f40
1885	fmuld	%f10,%f34,%f48
1886!
1887	fsubd	%f20,%f42,%f42
1888	fmuld	%f10,%f58,%f50
1889!
1890	fsubd	%f24,%f44,%f44
1891	fmuld	%f10,%f60,%f52
1892!
1893	fsubd	%f28,%f46,%f46
1894	fmuld	%f10,%f62,%f54
1895!
1896	std	%f40,[%i3+8]
1897!
1898	std	%f42,[%i3+16]
1899!
1900	faddd	%f18,%f44,%f44
1901	std	%f44,[%i3+24]
1902!
1903	faddd	%f22,%f46,%f46
1904	std	%f46,[%i3+32]
1905!
1906
1907
1908
1909	fsubd	%f32,%f48,%f48
1910	ldd	[%i3+64+8],%f16
1911!
1912	fsubd	%f56,%f50,%f50
1913	ldd	[%i3+64+16],%f20
1914!
1915	fsubd	%f36,%f52,%f52
1916	ldd	[%i3+64+24],%f24
1917!
1918	fsubd	%f38,%f54,%f54
1919	ldd	[%i3+64+32],%f28
1920!
1921	faddd	%f26,%f48,%f48
1922	fmuld	%f8,%f16,%f18
1923	std	%f48,[%i3+40]
1924!
1925	faddd	%f30,%f50,%f50
1926	fmuld	%f8,%f20,%f22
1927	std	%f50,[%i3+48]
1928!
1929	faddd	%f34,%f52,%f52
1930	fmuld	%f8,%f24,%f26
1931	std	%f52,[%i3+56]
1932!
1933	faddd	%f58,%f54,%f54
1934	fmuld	%f8,%f28,%f30
1935	std	%f54,[%i3+64]
1936!
1937
1938
1939	fdtox	%f18,%f18
1940!
1941	fdtox	%f22,%f22
1942!
1943	fdtox	%f26,%f26
1944	ldd	[%i3+64+40],%f32
1945!
1946	fdtox	%f30,%f30
1947	ldd	[%i3+64+48],%f56
1948!
1949	fxtod	%f18,%f18
1950	fmuld	%f8,%f32,%f34
1951	ldd	[%i3+64+56],%f36
1952!
1953	fxtod	%f22,%f22
1954	fmuld	%f8,%f56,%f58
1955	ldd	[%i3+64+64],%f38
1956!
1957	fxtod	%f26,%f26
1958	fmuld	%f8,%f36,%f12
1959!
1960	fxtod	%f30,%f30
1961	fmuld	%f8,%f38,%f14
1962!
1963	fdtox	%f34,%f34
1964	fmuld	%f10,%f18,%f40
1965!
1966	fdtox	%f58,%f58
1967	fmuld	%f10,%f22,%f42
1968!
1969	fdtox	%f12,%f12
1970	fmuld	%f10,%f26,%f44
1971!
1972	fdtox	%f14,%f14
1973	fmuld	%f10,%f30,%f46
1974!
1975	fxtod	%f34,%f34
1976!
1977	fxtod	%f58,%f58
1978!
1979	fxtod	%f12,%f12
1980!
1981	fxtod	%f14,%f14
1982!
1983	fsubd	%f16,%f40,%f40
1984	fmuld	%f10,%f34,%f48
1985!
1986	fsubd	%f20,%f42,%f42
1987	fmuld	%f10,%f58,%f50
1988!
1989	fsubd	%f24,%f44,%f44
1990	fmuld	%f10,%f12,%f52
1991!
1992	fsubd	%f28,%f46,%f46
1993	fmuld	%f10,%f14,%f54
1994!
1995	faddd	%f60,%f40,%f40
1996	std	%f40,[%i3+64+8]
1997!
1998	faddd	%f62,%f42,%f42
1999	std	%f42,[%i3+64+16]
2000!
2001	faddd	%f18,%f44,%f44
2002	std	%f44,[%i3+64+24]
2003!
2004	faddd	%f22,%f46,%f46
2005	std	%f46,[%i3+64+32]
2006!
2007
2008
2009
2010	fsubd	%f32,%f48,%f48
2011	ldd	[%i3+64+64+8],%f16
2012!
2013	fsubd	%f56,%f50,%f50
2014	ldd	[%i3+64+64+16],%f20
2015!
2016	fsubd	%f36,%f52,%f52
2017	ldd	[%i3+64+64+24],%f24
2018!
2019	fsubd	%f38,%f54,%f54
2020	ldd	[%i3+64+64+32],%f28
2021!
2022	faddd	%f26,%f48,%f48
2023	fmuld	%f8,%f16,%f18
2024	std	%f48,[%i3+64+40]
2025!
2026	faddd	%f30,%f50,%f50
2027	fmuld	%f8,%f20,%f22
2028	std	%f50,[%i3+64+48]
2029!
2030	faddd	%f34,%f52,%f52
2031	fmuld	%f8,%f24,%f26
2032	std	%f52,[%i3+64+56]
2033!
2034	faddd	%f58,%f54,%f54
2035	fmuld	%f8,%f28,%f30
2036	std	%f54,[%i3+64+64]
2037!
2038
2039
2040
2041	fdtox	%f18,%f18
2042!
2043	fdtox	%f22,%f22
2044!
2045	fdtox	%f26,%f26
2046	ldd	[%i3+64+64+40],%f32
2047!
2048	fdtox	%f30,%f30
2049	ldd	[%i3+64+64+48],%f56
2050!
2051	fxtod	%f18,%f18
2052	fmuld	%f8,%f32,%f34
2053	ldd	[%i3+64+64+56],%f36
2054!
2055	fxtod	%f22,%f22
2056	fmuld	%f8,%f56,%f58
2057	ldd	[%i3+64+64+64],%f38
2058!
2059	fxtod	%f26,%f26
2060	fmuld	%f8,%f36,%f60
2061!
2062	fxtod	%f30,%f30
2063	fmuld	%f8,%f38,%f62
2064!
2065	fdtox	%f34,%f34
2066	fmuld	%f10,%f18,%f40
2067!
2068	fdtox	%f58,%f58
2069	fmuld	%f10,%f22,%f42
2070!
2071	fdtox	%f60,%f60
2072	fmuld	%f10,%f26,%f44
2073!
2074	fdtox	%f62,%f62
2075	fmuld	%f10,%f30,%f46
2076!
2077	fxtod	%f34,%f34
2078!
2079	fxtod	%f58,%f58
2080!
2081	fxtod	%f60,%f60
2082!
2083	fxtod	%f62,%f62
2084!
2085	fsubd	%f16,%f40,%f40
2086	fmuld	%f10,%f34,%f48
2087!
2088	fsubd	%f20,%f42,%f42
2089	fmuld	%f10,%f58,%f50
2090!
2091	fsubd	%f24,%f44,%f44
2092	fmuld	%f10,%f60,%f52
2093!
2094	fsubd	%f28,%f46,%f46
2095	fmuld	%f10,%f62,%f54
2096!
2097	faddd	%f12,%f40,%f40
2098	std	%f40,[%i3+64+64+8]
2099!
2100	faddd	%f14,%f42,%f42
2101	std	%f42,[%i3+64+64+16]
2102!
2103	faddd	%f18,%f44,%f44
2104	std	%f44,[%i3+64+64+24]
2105!
2106	faddd	%f22,%f46,%f46
2107	std	%f46,[%i3+64+64+32]
2108!
2109
2110
2111	fsubd	%f32,%f48,%f48
2112	ldd	[%i3+64+64+64+8],%f16
2113!
2114	fsubd	%f56,%f50,%f50
2115	ldd	[%i3+64+64+64+16],%f20
2116!
2117	fsubd	%f36,%f52,%f52
2118	ldd	[%i3+64+64+64+24],%f24
2119!
2120	fsubd	%f38,%f54,%f54
2121	ldd	[%i3+64+64+64+32],%f28
2122!
2123	faddd	%f26,%f48,%f48
2124	fmuld	%f8,%f16,%f18
2125	std	%f48,[%i3+64+64+40]
2126!
2127	faddd	%f30,%f50,%f50
2128	fmuld	%f8,%f20,%f22
2129	std	%f50,[%i3+64+64+48]
2130!
2131	faddd	%f34,%f52,%f52
2132	fmuld	%f8,%f24,%f26
2133	std	%f52,[%i3+64+64+56]
2134!
2135	faddd	%f58,%f54,%f54
2136	fmuld	%f8,%f28,%f30
2137	std	%f54,[%i3+64+64+64]
2138!
2139
2140
2141	fdtox	%f18,%f18
2142!
2143	fdtox	%f22,%f22
2144!
2145	fdtox	%f26,%f26
2146	ldd	[%i3+64+64+64+40],%f32
2147!
2148	fdtox	%f30,%f30
2149	ldd	[%i3+64+64+64+48],%f56
2150!
2151	fxtod	%f18,%f18
2152	fmuld	%f8,%f32,%f34
2153	ldd	[%i3+64+64+64+56],%f36
2154!
2155	fxtod	%f22,%f22
2156	fmuld	%f8,%f56,%f58
2157	ldd	[%i3+64+64+64+64],%f38
2158!
2159	fxtod	%f26,%f26
2160	fmuld	%f8,%f36,%f12
2161!
2162	fxtod	%f30,%f30
2163	fmuld	%f8,%f38,%f14
2164!
2165	fdtox	%f34,%f34
2166	fmuld	%f10,%f18,%f40
2167!
2168	fdtox	%f58,%f58
2169	fmuld	%f10,%f22,%f42
2170!
2171	fdtox	%f12,%f12
2172	fmuld	%f10,%f26,%f44
2173!
2174	fdtox	%f14,%f14
2175	fmuld	%f10,%f30,%f46
2176!
2177	sethi	%hi(TwoToMinus16),%g5
2178	fxtod	%f34,%f34
2179!
2180	sethi	%hi(TwoTo16),%l5
2181	fxtod	%f58,%f58
2182!
2183	fxtod	%f12,%f12
2184!
2185	fxtod	%f14,%f14
2186!
2187	fsubd	%f16,%f40,%f16
2188	fmuld	%f10,%f34,%f48
2189	ldd	[%g5+%lo(TwoToMinus16)],%f8
2190!
2191	fsubd	%f20,%f42,%f20
2192	fmuld	%f10,%f58,%f50
2193	ldd	[%i1],%f40	! should be %f40
2194!
2195	fsubd	%f24,%f44,%f24
2196	fmuld	%f10,%f12,%f52
2197	ldd	[%i1+8],%f42	! should be %f42
2198!
2199	fsubd	%f28,%f46,%f28
2200	fmuld	%f10,%f14,%f54
2201	ldd	[%i4],%f44	! should be %f44
2202!
2203	faddd	%f60,%f16,%f16
2204	std	%f16,[%i3+64+64+64+8]
2205!
2206	faddd	%f62,%f20,%f20
2207	std	%f20,[%i3+64+64+64+16]
2208!
2209	faddd	%f18,%f24,%f24
2210	std	%f24,[%i3+64+64+64+24]
2211!
2212	faddd	%f22,%f28,%f28
2213	std	%f28,[%i3+64+64+64+32]
2214!
2215	fsubd	%f32,%f48,%f32
2216	ldd	[%i4+8],%f46	 ! should be %f46
2217!
2218	fsubd	%f56,%f50,%f56
2219	ldd	[%i1+104],%f48	! should be %f48
2220!
2221	fsubd	%f36,%f52,%f36
2222	ldd	[%i4+104],%f50	! should be %f50
2223!
2224	fsubd	%f38,%f54,%f38
2225	ldd	[%i1+16],%f52	! should be %f52
2226!
2227	faddd	%f26,%f32,%f32
2228	std	%f32,[%i3+64+64+64+40]
2229!
2230	faddd	%f30,%f56,%f56
2231	std	%f56,[%i3+64+64+64+48]
2232!
2233	faddd	%f34,%f36,%f36
2234	std	%f36,[%i3+64+64+64+56]
2235!
2236	faddd	%f58,%f38,%f38
2237	std	%f38,[%i3+64+64+64+64]
2238!
2239	std	%f12,[%i3+64+64+64+64+8]
2240!
2241	std	%f14,[%i3+64+64+64+64+16]
2242!
2243
2244	ldd	[%l5+%lo(TwoTo16)],%f10
2245	ldd	[%i1+48],%f54
2246	ldd	[%i1+56],%f36
2247	ldd	[%i1+64],%f56
2248	ldd	[%i1+112],%f58
2249
2250	ldd	[%i4+104],%f50
2251	ldd	[%i4+112],%f60
2252
2253
2254	.L99999998:
2255!1
2256	ldd	[%i1+24],%f20
2257	fmuld	%f0,%f44,%f12
2258!2
2259	ldd	[%i4+24],%f22
2260	fmuld	%f42,%f4,%f16
2261!3
2262	ldd	[%i1+40],%f24
2263	fmuld	%f46,%f0,%f18
2264!4
2265	ldd	[%i4+40],%f26
2266	fmuld	%f20,%f4,%f20
2267!5
2268	ldd	[%l0+8],%f38
2269	faddd	%f12,%f6,%f12
2270	fmuld	%f22,%f0,%f22
2271!6
2272	add	%l0,8,%l0
2273	ldd	[%i4+56],%f30
2274	fmuld	%f24,%f4,%f24
2275!7
2276	ldd	[%i1+72],%f32
2277	faddd	%f16,%f18,%f16
2278	fmuld	%f26,%f0,%f26
2279!8
2280	ldd	[%i3+16],%f18
2281	fmuld	%f40,%f38,%f14
2282!9
2283	ldd	[%i4+72],%f34
2284	faddd	%f20,%f22,%f20
2285	fmuld	%f8,%f12,%f12
2286!10
2287	ldd	[%i3+48],%f22
2288	fmuld	%f36,%f4,%f28
2289!11
2290	ldd	[%i3+8],%f6
2291	faddd	%f16,%f18,%f16
2292	fmuld	%f30,%f0,%f30
2293!12
2294	std	%f16,[%i3+16]
2295	faddd	%f24,%f26,%f24
2296	fmuld	%f32,%f4,%f32
2297!13
2298	ldd	[%i3+80],%f26
2299	faddd	%f12,%f14,%f12
2300	fmuld	%f34,%f0,%f34
2301!14
2302	ldd	[%i1+88],%f16
2303	faddd	%f20,%f22,%f20
2304!15
2305	ldd	[%i4+88],%f18
2306	faddd	%f28,%f30,%f28
2307!16
2308	ldd	[%i3+112],%f30
2309	faddd	%f32,%f34,%f32
2310!17
2311	ldd	[%i3+144],%f34
2312	faddd	%f12,%f6,%f6
2313	fmuld	%f16,%f4,%f16
2314!18
2315	std	%f20,[%i3+48]
2316	faddd	%f24,%f26,%f24
2317	fmuld	%f18,%f0,%f18
2318!19
2319	std	%f24,[%i3+80]
2320	faddd	%f28,%f30,%f28
2321	fmuld	%f48,%f4,%f20
2322!20
2323	std	%f28,[%i3+112]
2324	faddd	%f32,%f34,%f32
2325	fmuld	%f50,%f0,%f22
2326!21
2327	ldd	[%i1+120],%f24
2328	fdtox	%f6,%f12
2329!22
2330	std	%f32,[%i3+144]
2331	faddd	%f16,%f18,%f16
2332!23
2333	ldd	[%i4+120],%f26
2334!24
2335	ldd	[%i3+176],%f18
2336	faddd	%f20,%f22,%f20
2337	fmuld	%f24,%f4,%f24
2338!25
2339	ldd	[%i4+16],%f30
2340	fmovs	%f11,%f12
2341!26
2342	ldd	[%i1+32],%f32
2343	fmuld	%f26,%f0,%f26
2344!27
2345	ldd	[%i4+32],%f34
2346	fmuld	%f52,%f4,%f28
2347!28
2348	ldd	[%i3+208],%f22
2349	faddd	%f16,%f18,%f16
2350	fmuld	%f30,%f0,%f30
2351!29
2352	std	%f16,[%i3+176]
2353	fxtod	%f12,%f12
2354	fmuld	%f32,%f4,%f32
2355!30
2356	ldd	[%i4+48],%f18
2357	faddd	%f24,%f26,%f24
2358	fmuld	%f34,%f0,%f34
2359!31
2360	ldd	[%i3+240],%f26
2361	faddd	%f20,%f22,%f20
2362!32
2363	std	%f20,[%i3+208]
2364	faddd	%f28,%f30,%f28
2365	fmuld	%f54,%f4,%f16
2366!33
2367	ldd	[%i3+32],%f30
2368	fmuld	%f12,%f2,%f14
2369!34
2370	ldd	[%i4+64],%f22
2371	faddd	%f32,%f34,%f32
2372	fmuld	%f18,%f0,%f18
2373!35
2374	ldd	[%i3+64],%f34
2375	faddd	%f24,%f26,%f24
2376!36
2377	std	%f24,[%i3+240]
2378	faddd	%f28,%f30,%f28
2379	fmuld	%f56,%f4,%f20
2380!37
2381	std	%f28,[%i3+32]
2382	fmuld	%f14,%f8,%f12
2383!38
2384	ldd	[%i1+80],%f24
2385	faddd	%f32,%f34,%f34	!	yes, tmp52!
2386	fmuld	%f22,%f0,%f22
2387!39
2388	ldd	[%i4+80],%f26
2389	faddd	%f16,%f18,%f16
2390!40
2391	ldd	[%i1+96],%f28
2392	fmuld	%f58,%f4,%f32
2393!41
2394	ldd	[%i4+96],%f30
2395	fdtox	%f12,%f12
2396	fmuld	%f24,%f4,%f24
2397!42
2398	std	%f34,[%i3+64]	! yes, tmp52!
2399	faddd	%f20,%f22,%f20
2400	fmuld	%f26,%f0,%f26
2401!43
2402	ldd	[%i3+96],%f18
2403	fmuld	%f28,%f4,%f28
2404!44
2405	ldd	[%i3+128],%f22
2406	fmovd	%f38,%f4
2407	fmuld	%f30,%f0,%f30
2408!45
2409	fxtod	%f12,%f12
2410	fmuld	%f60,%f0,%f34
2411!46
2412	add	%i3,8,%i3
2413	faddd	%f24,%f26,%f24
2414!47
2415	ldd	[%i3+160-8],%f26
2416	faddd	%f16,%f18,%f16
2417!48
2418	std	%f16,[%i3+96-8]
2419	faddd	%f28,%f30,%f28
2420!49
2421	ldd	[%i3+192-8],%f30
2422	faddd	%f32,%f34,%f32
2423	fmuld	%f12,%f10,%f12
2424!50
2425	ldd	[%i3+224-8],%f34
2426	faddd	%f20,%f22,%f20
2427!51
2428	std	%f20,[%i3+128-8]
2429	faddd	%f24,%f26,%f24
2430!52
2431	add	%l1,1,%l1
2432	std	%f24,[%i3+160-8]
2433	faddd	%f28,%f30,%f28
2434!53
2435	cmp	%l1,31
2436	std	%f28,[%i3+192-8]
2437	fsubd	%f14,%f12,%f0
2438!54
2439	faddd	%f32,%f34,%f32
2440	ble,pt	%icc,.L99999998
2441	std	%f32,[%i3+224-8]
2442!55
2443	std	%f6,[%i3]
2444
2445	add	%o5,%g0,%i3
2446
2447
2448!END HAND CODED PART
2449                       .L900000828:
2450/* 0x03e4	 405 */		ba	.L900000852
2451/* 0x03e8	 409 */		ldx	[%i3+%o0],%l1
2452
2453!  406		      !		}
2454!  407		      !	}
2455!  409		      !	conv_d16_to_i32(result, dt + 2 * nlen, (int64_t *)dt, nlen + 1);
2456!  411		      !/*for(i=0;i<nlen+1;i++) saveresult[i]=result[i];*/
2457!  413		      !	adjust_montf_result(result, nint, nlen);
2458
2459                       .L77000476:
2460/* 0x03ec	 413 */		sll	%g1,2,%l3
2461/* 0x03f0	   0 */		sethi	%hi(TwoTo16),%g5
2462/* 0x03f4	 413 */		add	%l3,2,%l2
2463/* 0x03f8	 328 */		cmp	%l2,0
2464/* 0x03fc	     */		ble,pn	%icc,.L77000482
2465/* 0x0400	   0 */		sethi	%hi(TwoToMinus16),%o2
2466                       .L77000514:
2467/* 0x0404	 329 */		add	%l3,2,%l2
2468/* 0x0408	 328 */		add	%l3,1,%o4
2469/* 0x040c	     */		or	%g0,0,%l3
2470/* 0x0410	 329 */		cmp	%l2,8
2471/* 0x0414	     */		bl,pn	%icc,.L77000477
2472/* 0x0418	 328 */		or	%g0,%i3,%l1
2473                       .L900000831:
2474/* 0x041c	 329 */		prefetch	[%i3],22
2475/* 0x0420	     */		sub	%o4,7,%l4
2476/* 0x0424	     */		or	%g0,0,%l3
2477/* 0x0428	     */		or	%g0,%i3,%l1
2478                       .L900000829:
2479/* 0x042c	 329 */		prefetch	[%l1+528],22
2480/* 0x0430	     */		std	%f0,[%l1]
2481/* 0x0434	     */		add	%l3,8,%l3
2482/* 0x0438	     */		add	%l1,64,%l1
2483/* 0x043c	     */		std	%f0,[%l1-56]
2484/* 0x0440	     */		cmp	%l3,%l4
2485/* 0x0444	     */		std	%f0,[%l1-48]
2486/* 0x0448	     */		std	%f0,[%l1-40]
2487/* 0x044c	     */		prefetch	[%l1+496],22
2488/* 0x0450	     */		std	%f0,[%l1-32]
2489/* 0x0454	     */		std	%f0,[%l1-24]
2490/* 0x0458	     */		std	%f0,[%l1-16]
2491/* 0x045c	     */		ble,pt	%icc,.L900000829
2492/* 0x0460	     */		std	%f0,[%l1-8]
2493                       .L900000832:
2494/* 0x0464	 329 */		cmp	%l3,%o4
2495/* 0x0468	     */		bg,pn	%icc,.L77000482
2496/* 0x046c	     */		nop
2497                       .L77000477:
2498/* 0x0470	 329 */		add	%l3,1,%l3
2499                       .L900000851:
2500/* 0x0474	 329 */		std	%f0,[%l1]
2501/* 0x0478	     */		cmp	%l3,%o4
2502/* 0x047c	     */		add	%l1,8,%l1
2503/* 0x0480	     */		ble,pt	%icc,.L900000851
2504/* 0x0484	     */		add	%l3,1,%l3
2505                       .L77000482:
2506/* 0x0488	 330 */		ldd	[%i1],%f40
2507/* 0x048c	 334 */		cmp	%o3,0
2508/* 0x0490	     */		sub	%g1,1,%l3
2509/* 0x0494	 330 */		ldd	[%l0],%f42
2510/* 0x0498	 331 */		ldd	[%o2+%lo(TwoToMinus16)],%f36
2511/* 0x049c	     */		ldd	[%g5+%lo(TwoTo16)],%f38
2512/* 0x04a0	 330 */		fmuld	%f40,%f42,%f52
2513/* 0x04a4	 331 */		fdtox	%f52,%f8
2514/* 0x04a8	     */		fmovs	%f0,%f8
2515/* 0x04ac	     */		fxtod	%f8,%f62
2516/* 0x04b0	     */		fmuld	%f62,%f14,%f60
2517/* 0x04b4	     */		fmuld	%f60,%f36,%f32
2518/* 0x04b8	     */		fdtox	%f32,%f50
2519/* 0x04bc	     */		fxtod	%f50,%f34
2520/* 0x04c0	     */		fmuld	%f34,%f38,%f46
2521/* 0x04c4	     */		fsubd	%f60,%f46,%f40
2522/* 0x04c8	 334 */		ble,pn	%icc,.L77000378
2523/* 0x04cc	 330 */		std	%f52,[%i3]
2524                       .L77000509:
2525/* 0x04d0	 345 */		add	%o3,1,%g5
2526/* 0x04d4	     */		sll	%g5,1,%o2
2527/* 0x04d8	     */		or	%g0,0,%l1
2528/* 0x04dc	 337 */		ldd	[%i4],%f42
2529/* 0x04e0	 345 */		sub	%o3,1,%o3
2530/* 0x04e4	     */		or	%g0,0,%o5
2531/* 0x04e8	     */		or	%g0,%i3,%l2
2532/* 0x04ec	     */		add	%i4,8,%o1
2533/* 0x04f0	     */		add	%i1,8,%g5
2534                       .L900000848:
2535/* 0x04f4	 337 */		fmuld	%f40,%f42,%f34
2536/* 0x04f8	     */		ldd	[%l0+8],%f32
2537/* 0x04fc	 341 */		cmp	%g1,1
2538/* 0x0500	 337 */		ldd	[%i1],%f50
2539/* 0x0504	     */		ldd	[%l2],%f46
2540/* 0x0508	     */		ldd	[%l2+8],%f44
2541/* 0x050c	     */		fmuld	%f50,%f32,%f60
2542/* 0x0510	 335 */		ldd	[%l0],%f42
2543/* 0x0514	 337 */		faddd	%f46,%f34,%f48
2544/* 0x0518	     */		faddd	%f44,%f60,%f58
2545/* 0x051c	     */		fmuld	%f36,%f48,%f54
2546/* 0x0520	     */		faddd	%f58,%f54,%f34
2547/* 0x0524	 341 */		ble,pn	%icc,.L77000368
2548/* 0x0528	 338 */		std	%f34,[%l2+8]
2549                       .L77000507:
2550/* 0x052c	 341 */		or	%g0,1,%l5
2551/* 0x0530	     */		or	%g0,2,%l4
2552/* 0x0534	     */		or	%g0,%g5,%g4
2553/* 0x0538	 342 */		cmp	%l3,12
2554/* 0x053c	     */		bl,pn	%icc,.L77000481
2555/* 0x0540	 341 */		or	%g0,%o1,%g3
2556                       .L900000839:
2557/* 0x0544	 342 */		prefetch	[%i1+8],0
2558/* 0x0548	     */		prefetch	[%i1+72],0
2559/* 0x054c	     */		add	%i4,40,%l6
2560/* 0x0550	     */		add	%i1,40,%l7
2561/* 0x0554	     */		prefetch	[%l2+16],0
2562/* 0x0558	     */		or	%g0,%l2,%o7
2563/* 0x055c	     */		sub	%l3,7,%i5
2564/* 0x0560	     */		prefetch	[%l2+80],0
2565/* 0x0564	     */		add	%l2,80,%g2
2566/* 0x0568	     */		or	%g0,2,%l4
2567/* 0x056c	     */		prefetch	[%i1+136],0
2568/* 0x0570	     */		or	%g0,5,%l5
2569/* 0x0574	     */		prefetch	[%i1+200],0
2570/* 0x0578	     */		prefetch	[%l2+144],0
2571/* 0x057c	     */		ldd	[%i4+8],%f52
2572/* 0x0580	     */		ldd	[%i4+16],%f44
2573/* 0x0584	     */		ldd	[%i4+24],%f56
2574/* 0x0588	     */		fmuld	%f40,%f52,%f48
2575/* 0x058c	     */		fmuld	%f40,%f44,%f46
2576/* 0x0590	     */		fmuld	%f40,%f56,%f44
2577/* 0x0594	     */		ldd	[%l2+48],%f56
2578/* 0x0598	     */		prefetch	[%l2+208],0
2579/* 0x059c	     */		prefetch	[%l2+272],0
2580/* 0x05a0	     */		prefetch	[%l2+336],0
2581/* 0x05a4	     */		prefetch	[%l2+400],0
2582/* 0x05a8	     */		ldd	[%i1+8],%f32
2583/* 0x05ac	     */		ldd	[%i1+16],%f60
2584/* 0x05b0	     */		ldd	[%i1+24],%f50
2585/* 0x05b4	     */		fmuld	%f42,%f32,%f62
2586/* 0x05b8	     */		ldd	[%i1+32],%f32
2587/* 0x05bc	     */		fmuld	%f42,%f60,%f58
2588/* 0x05c0	     */		ldd	[%l2+16],%f52
2589/* 0x05c4	     */		ldd	[%l2+32],%f54
2590/* 0x05c8	     */		faddd	%f62,%f48,%f60
2591/* 0x05cc	     */		fmuld	%f42,%f50,%f48
2592/* 0x05d0	     */		faddd	%f58,%f46,%f62
2593/* 0x05d4	     */		ldd	[%i4+32],%f46
2594/* 0x05d8	     */		ldd	[%l2+64],%f58
2595                       .L900000837:
2596/* 0x05dc	 342 */		prefetch	[%l7+192],0
2597/* 0x05e0	     */		fmuld	%f40,%f46,%f46
2598/* 0x05e4	     */		faddd	%f60,%f52,%f60
2599/* 0x05e8	     */		ldd	[%l6],%f52
2600/* 0x05ec	     */		std	%f60,[%g2-64]
2601/* 0x05f0	     */		fmuld	%f42,%f32,%f50
2602/* 0x05f4	     */		add	%l5,8,%l5
2603/* 0x05f8	     */		ldd	[%l7],%f60
2604/* 0x05fc	     */		faddd	%f48,%f44,%f48
2605/* 0x0600	     */		cmp	%l5,%i5
2606/* 0x0604	     */		ldd	[%g2],%f32
2607/* 0x0608	     */		add	%g2,128,%g2
2608/* 0x060c	     */		prefetch	[%g2+256],0
2609/* 0x0610	     */		fmuld	%f40,%f52,%f52
2610/* 0x0614	     */		faddd	%f62,%f54,%f44
2611/* 0x0618	     */		ldd	[%l6+8],%f54
2612/* 0x061c	     */		std	%f44,[%g2-176]
2613/* 0x0620	     */		fmuld	%f42,%f60,%f44
2614/* 0x0624	     */		add	%l6,64,%l6
2615/* 0x0628	     */		ldd	[%l7+8],%f60
2616/* 0x062c	     */		faddd	%f50,%f46,%f50
2617/* 0x0630	     */		add	%l7,64,%l7
2618/* 0x0634	     */		add	%l4,16,%l4
2619/* 0x0638	     */		ldd	[%g2-112],%f46
2620/* 0x063c	     */		fmuld	%f40,%f54,%f54
2621/* 0x0640	     */		faddd	%f48,%f56,%f62
2622/* 0x0644	     */		ldd	[%l6-48],%f56
2623/* 0x0648	     */		std	%f62,[%g2-160]
2624/* 0x064c	     */		fmuld	%f42,%f60,%f48
2625/* 0x0650	     */		ldd	[%l7-48],%f60
2626/* 0x0654	     */		faddd	%f44,%f52,%f52
2627/* 0x0658	     */		ldd	[%g2-96],%f30
2628/* 0x065c	     */		prefetch	[%g2+288],0
2629/* 0x0660	     */		fmuld	%f40,%f56,%f56
2630/* 0x0664	     */		faddd	%f50,%f58,%f62
2631/* 0x0668	     */		ldd	[%l6-40],%f58
2632/* 0x066c	     */		std	%f62,[%g2-144]
2633/* 0x0670	     */		fmuld	%f42,%f60,%f50
2634/* 0x0674	     */		ldd	[%l7-40],%f62
2635/* 0x0678	     */		faddd	%f48,%f54,%f54
2636/* 0x067c	     */		ldd	[%g2-80],%f28
2637/* 0x0680	     */		prefetch	[%l7+160],0
2638/* 0x0684	     */		fmuld	%f40,%f58,%f48
2639/* 0x0688	     */		faddd	%f52,%f32,%f44
2640/* 0x068c	     */		ldd	[%l6-32],%f58
2641/* 0x0690	     */		std	%f44,[%g2-128]
2642/* 0x0694	     */		fmuld	%f42,%f62,%f44
2643/* 0x0698	     */		ldd	[%l7-32],%f60
2644/* 0x069c	     */		faddd	%f50,%f56,%f56
2645/* 0x06a0	     */		ldd	[%g2-64],%f52
2646/* 0x06a4	     */		prefetch	[%g2+320],0
2647/* 0x06a8	     */		fmuld	%f40,%f58,%f50
2648/* 0x06ac	     */		faddd	%f54,%f46,%f32
2649/* 0x06b0	     */		ldd	[%l6-24],%f62
2650/* 0x06b4	     */		std	%f32,[%g2-112]
2651/* 0x06b8	     */		fmuld	%f42,%f60,%f46
2652/* 0x06bc	     */		ldd	[%l7-24],%f60
2653/* 0x06c0	     */		faddd	%f44,%f48,%f48
2654/* 0x06c4	     */		ldd	[%g2-48],%f54
2655/* 0x06c8	     */		fmuld	%f40,%f62,%f26
2656/* 0x06cc	     */		faddd	%f56,%f30,%f32
2657/* 0x06d0	     */		ldd	[%l6-16],%f58
2658/* 0x06d4	     */		std	%f32,[%g2-96]
2659/* 0x06d8	     */		fmuld	%f42,%f60,%f30
2660/* 0x06dc	     */		ldd	[%l7-16],%f32
2661/* 0x06e0	     */		faddd	%f46,%f50,%f60
2662/* 0x06e4	     */		ldd	[%g2-32],%f56
2663/* 0x06e8	     */		prefetch	[%g2+352],0
2664/* 0x06ec	     */		fmuld	%f40,%f58,%f44
2665/* 0x06f0	     */		faddd	%f48,%f28,%f62
2666/* 0x06f4	     */		ldd	[%l6-8],%f46
2667/* 0x06f8	     */		std	%f62,[%g2-80]
2668/* 0x06fc	     */		fmuld	%f42,%f32,%f48
2669/* 0x0700	     */		ldd	[%l7-8],%f32
2670/* 0x0704	     */		faddd	%f30,%f26,%f62
2671/* 0x0708	     */		ble,pt	%icc,.L900000837
2672/* 0x070c	     */		ldd	[%g2-16],%f58
2673                       .L900000840:
2674/* 0x0710	 342 */		fmuld	%f40,%f46,%f46
2675/* 0x0714	     */		faddd	%f62,%f54,%f62
2676/* 0x0718	     */		std	%f62,[%g2-48]
2677/* 0x071c	     */		cmp	%l5,%l3
2678/* 0x0720	     */		fmuld	%f42,%f32,%f50
2679/* 0x0724	     */		faddd	%f48,%f44,%f48
2680/* 0x0728	     */		or	%g0,%l7,%g4
2681/* 0x072c	     */		or	%g0,%l6,%g3
2682/* 0x0730	     */		faddd	%f60,%f52,%f60
2683/* 0x0734	     */		std	%f60,[%g2-64]
2684/* 0x0738	     */		or	%g0,%o7,%l2
2685/* 0x073c	     */		add	%l4,8,%l4
2686/* 0x0740	     */		faddd	%f50,%f46,%f54
2687/* 0x0744	     */		faddd	%f48,%f56,%f56
2688/* 0x0748	     */		std	%f56,[%g2-32]
2689/* 0x074c	     */		faddd	%f54,%f58,%f58
2690/* 0x0750	     */		bg,pn	%icc,.L77000368
2691/* 0x0754	     */		std	%f58,[%g2-16]
2692                       .L77000481:
2693/* 0x0758	 342 */		ldd	[%g4],%f44
2694                       .L900000850:
2695/* 0x075c	 342 */		ldd	[%g3],%f48
2696/* 0x0760	     */		fmuld	%f42,%f44,%f58
2697/* 0x0764	     */		sra	%l4,0,%l7
2698/* 0x0768	     */		add	%l5,1,%l5
2699/* 0x076c	     */		sllx	%l7,3,%g2
2700/* 0x0770	     */		add	%g4,8,%g4
2701/* 0x0774	     */		ldd	[%l2+%g2],%f56
2702/* 0x0778	     */		cmp	%l5,%l3
2703/* 0x077c	     */		add	%l4,2,%l4
2704/* 0x0780	     */		fmuld	%f40,%f48,%f54
2705/* 0x0784	     */		add	%g3,8,%g3
2706/* 0x0788	     */		faddd	%f58,%f54,%f52
2707/* 0x078c	     */		faddd	%f52,%f56,%f62
2708/* 0x0790	     */		std	%f62,[%l2+%g2]
2709/* 0x0794	     */		ble,a,pt	%icc,.L900000850
2710/* 0x0798	     */		ldd	[%g4],%f44
2711                       .L77000368:
2712/* 0x079c	 344 */		cmp	%o5,15
2713/* 0x07a0	     */		bne,pn	%icc,.L77000483
2714/* 0x07a4	 345 */		srl	%l1,31,%g4
2715                       .L77000478:
2716/* 0x07a8	 345 */		add	%l1,%g4,%l4
2717/* 0x07ac	     */		sra	%l4,1,%o7
2718/* 0x07b0	     */		add	%o7,1,%o4
2719/* 0x07b4	     */		sll	%o4,1,%l6
2720/* 0x07b8	     */		cmp	%l6,%o2
2721/* 0x07bc	     */		bge,pn	%icc,.L77000392
2722/* 0x07c0	     */		fmovd	%f0,%f42
2723                       .L77000508:
2724/* 0x07c4	 345 */		sra	%l6,0,%l4
2725/* 0x07c8	     */		sllx	%l4,3,%g2
2726/* 0x07cc	     */		fmovd	%f0,%f32
2727/* 0x07d0	     */		sub	%o2,1,%l5
2728/* 0x07d4	     */		ldd	[%g2+%i3],%f40
2729/* 0x07d8	     */		add	%g2,%i3,%g3
2730                       .L900000849:
2731/* 0x07dc	 345 */		fdtox	%f40,%f10
2732/* 0x07e0	     */		ldd	[%g3+8],%f52
2733/* 0x07e4	     */		add	%l6,2,%l6
2734/* 0x07e8	     */		cmp	%l6,%l5
2735/* 0x07ec	     */		fdtox	%f52,%f2
2736/* 0x07f0	     */		fmovd	%f10,%f30
2737/* 0x07f4	     */		fmovs	%f0,%f10
2738/* 0x07f8	     */		fmovs	%f0,%f2
2739/* 0x07fc	     */		fxtod	%f10,%f10
2740/* 0x0800	     */		fxtod	%f2,%f2
2741/* 0x0804	     */		fdtox	%f52,%f28
2742/* 0x0808	     */		faddd	%f10,%f32,%f56
2743/* 0x080c	     */		std	%f56,[%g3]
2744/* 0x0810	     */		faddd	%f2,%f42,%f62
2745/* 0x0814	     */		std	%f62,[%g3+8]
2746/* 0x0818	     */		fitod	%f30,%f32
2747/* 0x081c	     */		add	%g3,16,%g3
2748/* 0x0820	     */		fitod	%f28,%f42
2749/* 0x0824	     */		ble,a,pt	%icc,.L900000849
2750/* 0x0828	     */		ldd	[%g3],%f40
2751                       .L77000392:
2752/* 0x082c	 346 */		or	%g0,0,%o5
2753                       .L77000483:
2754/* 0x0830	 350 */		fdtox	%f34,%f6
2755/* 0x0834	     */		add	%l1,1,%l1
2756/* 0x0838	     */		cmp	%l1,%o3
2757/* 0x083c	     */		add	%o5,1,%o5
2758/* 0x0840	     */		add	%l2,8,%l2
2759/* 0x0844	     */		add	%l0,8,%l0
2760/* 0x0848	     */		fmovs	%f0,%f6
2761/* 0x084c	     */		fxtod	%f6,%f46
2762/* 0x0850	     */		fmuld	%f46,%f14,%f56
2763/* 0x0854	     */		fmuld	%f56,%f36,%f44
2764/* 0x0858	     */		fdtox	%f44,%f48
2765/* 0x085c	     */		fxtod	%f48,%f58
2766/* 0x0860	     */		fmuld	%f58,%f38,%f54
2767/* 0x0864	     */		fsubd	%f56,%f54,%f40
2768/* 0x0868	     */		ble,a,pt	%icc,.L900000848
2769/* 0x086c	 337 */		ldd	[%i4],%f42
2770                       .L77000378:
2771/* 0x0870	 409 */		ldx	[%i3+%o0],%l1
2772                       .L900000852:
2773/* 0x0874	 409 */		add	%i3,%o0,%l4
2774/* 0x0878	     */		ldx	[%l4+8],%i1
2775/* 0x087c	     */		cmp	%l1,0
2776/* 0x0880	     */		bne,pn	%xcc,.L77000403
2777/* 0x0884	     */		or	%g0,0,%g5
2778                       .L77000402:
2779/* 0x0888	 409 */		or	%g0,0,%i3
2780/* 0x088c	     */		ba	.L900000847
2781/* 0x0890	     */		cmp	%i1,0
2782                       .L77000403:
2783/* 0x0894	 409 */		srlx	%l1,52,%o5
2784/* 0x0898	     */		sethi	%hi(0xfff00000),%i3
2785/* 0x089c	     */		sllx	%i3,32,%o2
2786/* 0x08a0	     */		sethi	%hi(0x40000000),%o0
2787/* 0x08a4	     */		sllx	%o0,22,%o4
2788/* 0x08a8	     */		or	%g0,1023,%l0
2789/* 0x08ac	     */		xor	%o2,-1,%o3
2790/* 0x08b0	     */		sub	%l0,%o5,%o7
2791/* 0x08b4	     */		and	%l1,%o3,%l1
2792/* 0x08b8	     */		add	%o7,52,%i4
2793/* 0x08bc	     */		or	%l1,%o4,%o1
2794/* 0x08c0	     */		cmp	%i1,0
2795/* 0x08c4	     */		srlx	%o1,%i4,%i3
2796                       .L900000847:
2797/* 0x08c8	 409 */		bne,pn	%xcc,.L77000409
2798/* 0x08cc	     */		or	%g0,0,%o7
2799                       .L77000408:
2800/* 0x08d0	 409 */		ba	.L900000846
2801/* 0x08d4	 350 */		cmp	%g1,0
2802                       .L77000409:
2803/* 0x08d8	 409 */		srlx	%i1,52,%l2
2804/* 0x08dc	     */		sethi	%hi(0xfff00000),%o7
2805/* 0x08e0	     */		sllx	%o7,32,%i4
2806/* 0x08e4	     */		sethi	%hi(0x40000000),%i5
2807/* 0x08e8	     */		sllx	%i5,22,%l6
2808/* 0x08ec	     */		or	%g0,1023,%l5
2809/* 0x08f0	     */		xor	%i4,-1,%o1
2810/* 0x08f4	     */		sub	%l5,%l2,%g2
2811/* 0x08f8	     */		and	%i1,%o1,%l7
2812/* 0x08fc	     */		add	%g2,52,%g3
2813/* 0x0900	     */		or	%l7,%l6,%g4
2814/* 0x0904	 350 */		cmp	%g1,0
2815/* 0x0908	 409 */		srlx	%g4,%g3,%o7
2816                       .L900000846:
2817/* 0x090c	 350 */		ble,pn	%icc,.L77000397
2818/* 0x0910	     */		or	%g0,0,%l5
2819                       .L77000510:
2820/* 0x0914	 409 */		sethi	%hi(0xfff00000),%g4
2821/* 0x0918	     */		sllx	%g4,32,%o0
2822/* 0x091c	   0 */		or	%g0,-1,%i5
2823/* 0x0920	 409 */		srl	%i5,0,%l7
2824/* 0x0924	     */		sethi	%hi(0x40000000),%i1
2825/* 0x0928	     */		sllx	%i1,22,%l6
2826/* 0x092c	     */		sethi	%hi(0xfc00),%i4
2827/* 0x0930	     */		xor	%o0,-1,%g2
2828/* 0x0934	     */		add	%i4,1023,%l2
2829/* 0x0938	     */		or	%g0,2,%g4
2830/* 0x093c	     */		or	%g0,%i2,%g3
2831                       .L77000395:
2832/* 0x0940	 409 */		sra	%g4,0,%o2
2833/* 0x0944	     */		add	%g4,1,%o3
2834/* 0x0948	     */		sllx	%o2,3,%o0
2835/* 0x094c	     */		sra	%o3,0,%o5
2836/* 0x0950	     */		ldx	[%l4+%o0],%o4
2837/* 0x0954	     */		sllx	%o5,3,%l0
2838/* 0x0958	     */		and	%i3,%l7,%o1
2839/* 0x095c	     */		ldx	[%l4+%l0],%i4
2840/* 0x0960	     */		cmp	%o4,0
2841/* 0x0964	     */		bne,pn	%xcc,.L77000415
2842/* 0x0968	 350 */		and	%o7,%l2,%i5
2843                       .L77000414:
2844/* 0x096c	 409 */		or	%g0,0,%l1
2845/* 0x0970	     */		ba	.L900000845
2846/* 0x0974	     */		add	%g5,%o1,%i1
2847                       .L77000415:
2848/* 0x0978	 409 */		srlx	%o4,52,%o3
2849/* 0x097c	     */		and	%o4,%g2,%l1
2850/* 0x0980	     */		or	%g0,52,%o0
2851/* 0x0984	     */		sub	%o3,1023,%l0
2852/* 0x0988	     */		or	%l1,%l6,%o4
2853/* 0x098c	     */		sub	%o0,%l0,%o5
2854/* 0x0990	     */		srlx	%o4,%o5,%l1
2855/* 0x0994	     */		add	%g5,%o1,%i1
2856                       .L900000845:
2857/* 0x0998	 409 */		srax	%i3,32,%g5
2858/* 0x099c	     */		cmp	%i4,0
2859/* 0x09a0	     */		bne,pn	%xcc,.L77000421
2860/* 0x09a4	 350 */		sllx	%i5,16,%o2
2861                       .L77000420:
2862/* 0x09a8	 409 */		or	%g0,0,%o4
2863/* 0x09ac	     */		ba	.L900000844
2864/* 0x09b0	 350 */		add	%i1,%o2,%o5
2865                       .L77000421:
2866/* 0x09b4	 409 */		srlx	%i4,52,%o4
2867/* 0x09b8	     */		or	%g0,52,%o0
2868/* 0x09bc	     */		sub	%o4,1023,%o3
2869/* 0x09c0	     */		and	%i4,%g2,%i3
2870/* 0x09c4	     */		or	%i3,%l6,%o5
2871/* 0x09c8	     */		sub	%o0,%o3,%l0
2872/* 0x09cc	     */		srlx	%o5,%l0,%o4
2873/* 0x09d0	 350 */		add	%i1,%o2,%o5
2874                       .L900000844:
2875/* 0x09d4	 350 */		srax	%o7,16,%i4
2876/* 0x09d8	     */		srax	%o5,32,%i5
2877/* 0x09dc	     */		add	%i4,%i5,%o1
2878/* 0x09e0	     */		add	%l5,1,%l5
2879/* 0x09e4	     */		and	%o5,%l7,%i1
2880/* 0x09e8	     */		add	%g5,%o1,%g5
2881/* 0x09ec	     */		st	%i1,[%g3]
2882/* 0x09f0	     */		or	%g0,%l1,%i3
2883/* 0x09f4	     */		or	%g0,%o4,%o7
2884/* 0x09f8	     */		add	%g4,2,%g4
2885/* 0x09fc	     */		cmp	%l5,%l3
2886/* 0x0a00	     */		ble,pt	%icc,.L77000395
2887/* 0x0a04	     */		add	%g3,4,%g3
2888                       .L77000397:
2889/* 0x0a08	 409 */		sethi	%hi(0xfc00),%l4
2890/* 0x0a0c	     */		sra	%l5,0,%i5
2891/* 0x0a10	     */		add	%l4,1023,%i1
2892/* 0x0a14	     */		add	%g5,%i3,%l5
2893/* 0x0a18	     */		and	%o7,%i1,%g5
2894/* 0x0a1c	     */		sllx	%g5,16,%l2
2895/* 0x0a20	     */		sllx	%i5,2,%l7
2896/* 0x0a24	 413 */		sra	%g1,0,%g2
2897/* 0x0a28	 409 */		add	%l5,%l2,%l6
2898/* 0x0a2c	     */		st	%l6,[%i2+%l7]
2899/* 0x0a30	 413 */		sllx	%g2,2,%g3
2900/* 0x0a34	     */		ld	[%i2+%g3],%g4
2901/* 0x0a38	     */		cmp	%g4,0
2902/* 0x0a3c	     */		bgu,pn	%icc,.L77000486
2903/* 0x0a40	     */		cmp	%l3,0
2904                       .L77000427:
2905/* 0x0a44	 413 */		bl,pn	%icc,.L77000486
2906/* 0x0a48	     */		or	%g0,%l3,%i5
2907                       .L77000512:
2908/* 0x0a4c	 413 */		sra	%l3,0,%o5
2909/* 0x0a50	     */		sllx	%o5,2,%l7
2910/* 0x0a54	     */		ld	[%l7+%i0],%o5
2911/* 0x0a58	     */		add	%l7,%i2,%o1
2912/* 0x0a5c	     */		add	%l7,%i0,%i4
2913                       .L900000843:
2914/* 0x0a60	 413 */		ld	[%o1],%i1
2915/* 0x0a64	     */		cmp	%i1,%o5
2916/* 0x0a68	     */		bne,pn	%icc,.L77000435
2917/* 0x0a6c	     */		sub	%o1,4,%o1
2918                       .L77000431:
2919/* 0x0a70	 413 */		sub	%i4,4,%i4
2920/* 0x0a74	     */		subcc	%i5,1,%i5
2921/* 0x0a78	     */		bpos,a,pt	%icc,.L900000843
2922/* 0x0a7c	     */		ld	[%i4],%o5
2923                       .L900000827:
2924/* 0x0a80	 413 */		ba	.L900000842
2925/* 0x0a84	 350 */		cmp	%g1,0
2926                       .L77000435:
2927/* 0x0a88	 413 */		sra	%i5,0,%o0
2928/* 0x0a8c	     */		sllx	%o0,2,%l1
2929/* 0x0a90	     */		ld	[%i0+%l1],%i3
2930/* 0x0a94	     */		ld	[%i2+%l1],%l0
2931/* 0x0a98	     */		cmp	%l0,%i3
2932/* 0x0a9c	     */		bleu,pt	%icc,.L77000379
2933/* 0x0aa0	     */		nop
2934                       .L77000486:
2935/* 0x0aa4	 350 */		cmp	%g1,0
2936                       .L900000842:
2937/* 0x0aa8	 350 */		ble,pn	%icc,.L77000379
2938/* 0x0aac	     */		add	%l3,1,%g3
2939                       .L77000511:
2940/* 0x0ab0	 350 */		or	%g0,0,%l5
2941/* 0x0ab4	     */		cmp	%g3,10
2942/* 0x0ab8	     */		bl,pn	%icc,.L77000487
2943/* 0x0abc	     */		or	%g0,0,%g1
2944                       .L900000835:
2945/* 0x0ac0	 350 */		prefetch	[%i2],22
2946/* 0x0ac4	     */		add	%i0,4,%l2
2947/* 0x0ac8	     */		prefetch	[%i2+64],22
2948/* 0x0acc	     */		add	%i2,8,%o5
2949/* 0x0ad0	     */		sub	%l3,7,%i0
2950/* 0x0ad4	     */		prefetch	[%i2+128],22
2951/* 0x0ad8	     */		or	%g0,2,%l5
2952/* 0x0adc	     */		prefetch	[%i2+192],22
2953/* 0x0ae0	     */		prefetch	[%i2+256],22
2954/* 0x0ae4	     */		prefetch	[%i2+320],22
2955/* 0x0ae8	     */		prefetch	[%i2+384],22
2956/* 0x0aec	     */		ld	[%l2-4],%l7
2957/* 0x0af0	     */		ld	[%o5-4],%l6
2958/* 0x0af4	     */		prefetch	[%o5+440],22
2959/* 0x0af8	     */		prefetch	[%o5+504],22
2960/* 0x0afc	     */		ld	[%i2],%i2
2961/* 0x0b00	     */		sub	%i2,%l7,%g3
2962/* 0x0b04	     */		st	%g3,[%o5-8]
2963/* 0x0b08	     */		srax	%g3,32,%l7
2964                       .L900000833:
2965/* 0x0b0c	 350 */		add	%l5,8,%l5
2966/* 0x0b10	     */		add	%o5,32,%o5
2967/* 0x0b14	     */		ld	[%l2],%i5
2968/* 0x0b18	     */		prefetch	[%o5+496],22
2969/* 0x0b1c	     */		cmp	%l5,%i0
2970/* 0x0b20	     */		add	%l2,32,%l2
2971/* 0x0b24	     */		sub	%l6,%i5,%g5
2972/* 0x0b28	     */		add	%g5,%l7,%o0
2973/* 0x0b2c	     */		ld	[%o5-32],%l4
2974/* 0x0b30	     */		st	%o0,[%o5-36]
2975/* 0x0b34	     */		srax	%o0,32,%i3
2976/* 0x0b38	     */		ld	[%l2-28],%i1
2977/* 0x0b3c	     */		sub	%l4,%i1,%i4
2978/* 0x0b40	     */		add	%i4,%i3,%o1
2979/* 0x0b44	     */		ld	[%o5-28],%o3
2980/* 0x0b48	     */		st	%o1,[%o5-32]
2981/* 0x0b4c	     */		srax	%o1,32,%l1
2982/* 0x0b50	     */		ld	[%l2-24],%o2
2983/* 0x0b54	     */		sub	%o3,%o2,%g2
2984/* 0x0b58	     */		add	%g2,%l1,%o7
2985/* 0x0b5c	     */		ld	[%o5-24],%l0
2986/* 0x0b60	     */		st	%o7,[%o5-28]
2987/* 0x0b64	     */		srax	%o7,32,%l6
2988/* 0x0b68	     */		ld	[%l2-20],%o4
2989/* 0x0b6c	     */		sub	%l0,%o4,%g1
2990/* 0x0b70	     */		add	%g1,%l6,%l7
2991/* 0x0b74	     */		ld	[%o5-20],%i2
2992/* 0x0b78	     */		st	%l7,[%o5-24]
2993/* 0x0b7c	     */		srax	%l7,32,%g4
2994/* 0x0b80	     */		ld	[%l2-16],%g3
2995/* 0x0b84	     */		sub	%i2,%g3,%i5
2996/* 0x0b88	     */		add	%i5,%g4,%g5
2997/* 0x0b8c	     */		ld	[%o5-16],%i1
2998/* 0x0b90	     */		st	%g5,[%o5-20]
2999/* 0x0b94	     */		srax	%g5,32,%l4
3000/* 0x0b98	     */		ld	[%l2-12],%o0
3001/* 0x0b9c	     */		sub	%i1,%o0,%i3
3002/* 0x0ba0	     */		add	%i3,%l4,%i4
3003/* 0x0ba4	     */		ld	[%o5-12],%o2
3004/* 0x0ba8	     */		st	%i4,[%o5-16]
3005/* 0x0bac	     */		srax	%i4,32,%o3
3006/* 0x0bb0	     */		ld	[%l2-8],%o1
3007/* 0x0bb4	     */		sub	%o2,%o1,%l1
3008/* 0x0bb8	     */		add	%l1,%o3,%g2
3009/* 0x0bbc	     */		ld	[%o5-8],%o4
3010/* 0x0bc0	     */		st	%g2,[%o5-12]
3011/* 0x0bc4	     */		srax	%g2,32,%l0
3012/* 0x0bc8	     */		ld	[%l2-4],%o7
3013/* 0x0bcc	     */		sub	%o4,%o7,%l6
3014/* 0x0bd0	     */		add	%l6,%l0,%g1
3015/* 0x0bd4	     */		ld	[%o5-4],%l6
3016/* 0x0bd8	     */		st	%g1,[%o5-8]
3017/* 0x0bdc	     */		ble,pt	%icc,.L900000833
3018/* 0x0be0	     */		srax	%g1,32,%l7
3019                       .L900000836:
3020/* 0x0be4	 350 */		ld	[%l2],%l0
3021/* 0x0be8	     */		add	%l2,4,%i0
3022/* 0x0bec	     */		or	%g0,%o5,%i2
3023/* 0x0bf0	     */		cmp	%l5,%l3
3024/* 0x0bf4	     */		sub	%l6,%l0,%l6
3025/* 0x0bf8	     */		add	%l6,%l7,%g1
3026/* 0x0bfc	     */		st	%g1,[%o5-4]
3027/* 0x0c00	     */		bg,pn	%icc,.L77000379
3028/* 0x0c04	     */		srax	%g1,32,%g1
3029                       .L77000487:
3030/* 0x0c08	 350 */		ld	[%i2],%o4
3031                       .L900000841:
3032/* 0x0c0c	 350 */		ld	[%i0],%i3
3033/* 0x0c10	     */		add	%g1,%o4,%l0
3034/* 0x0c14	     */		add	%l5,1,%l5
3035/* 0x0c18	     */		cmp	%l5,%l3
3036/* 0x0c1c	     */		add	%i0,4,%i0
3037/* 0x0c20	     */		sub	%l0,%i3,%l6
3038/* 0x0c24	     */		st	%l6,[%i2]
3039/* 0x0c28	     */		srax	%l6,32,%g1
3040/* 0x0c2c	     */		add	%i2,4,%i2
3041/* 0x0c30	     */		ble,a,pt	%icc,.L900000841
3042/* 0x0c34	     */		ld	[%i2],%o4
3043                       .L77000379:
3044/* 0x0c38	 405 */		ret	! Result =
3045/* 0x0c3c	     */		restore	%g0,%g0,%g0
3046/* 0x0c40	   0 */		.type	mont_mulf_noconv,2
3047/* 0x0c40	   0 */		.size	mont_mulf_noconv,(.-mont_mulf_noconv)
3048
3049! Begin Disassembling Debug Info
3050	.xstabs ".stab.index","V=10.0;DBG_GEN=4.14.14;cd;backend;Xa;O;R=Sun C 5.5 Patch 112760-07 2004/02/03",60,0,0,0
3051	.xstabs ".stab.index","/workspace/ferenc/algorithms/bignum/unified/mont_mulf; /ws/onnv-tools/SUNWspro/SOS8/prod/bin/cc -D_KERNEL -DRF_INLINE_MACROS -fast -xarch=v9 -xO5 -xstrconst -xdepend -Xa -xchip=ultra3 -xcode=abs32 -Wc,-Qrm-Qd -Wc,-Qrm-Qf -Wc,-assembly -V -W0,-xp -c conv_v9.il -o mont_mulf.o  mont_mulf.c",52,0,0,0
3052
3053! End Disassembling Debug Info
3054
3055! Begin Disassembling Ident
3056	.ident	"cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27"	! (NO SOURCE LINE)
3057	.ident	"@(#)mont_mulf.c\t1.2\t01/09/24 SMI"	! (/tmp/acompAAApja4Fx:8)
3058	.ident	"@(#)types.h\t1.74\t03/08/07 SMI"	! (/tmp/acompAAApja4Fx:9)
3059	.ident	"@(#)isa_defs.h\t1.20\t99/05/04 SMI"	! (/tmp/acompAAApja4Fx:10)
3060	.ident	"@(#)feature_tests.h\t1.18\t99/07/26 SMI"	! (/tmp/acompAAApja4Fx:11)
3061	.ident	"@(#)machtypes.h\t1.13\t99/05/04 SMI"	! (/tmp/acompAAApja4Fx:12)
3062	.ident	"@(#)inttypes.h\t1.2\t98/01/16 SMI"	! (/tmp/acompAAApja4Fx:13)
3063	.ident	"@(#)int_types.h\t1.6\t97/08/20 SMI"	! (/tmp/acompAAApja4Fx:14)
3064	.ident	"@(#)int_limits.h\t1.6\t99/08/06 SMI"	! (/tmp/acompAAApja4Fx:15)
3065	.ident	"@(#)int_const.h\t1.2\t96/07/08 SMI"	! (/tmp/acompAAApja4Fx:16)
3066	.ident	"@(#)int_fmtio.h\t1.2\t96/07/08 SMI"	! (/tmp/acompAAApja4Fx:17)
3067	.ident	"@(#)types32.h\t1.4\t98/02/13 SMI"	! (/tmp/acompAAApja4Fx:18)
3068	.ident	"@(#)select.h\t1.17\t01/08/15 SMI"	! (/tmp/acompAAApja4Fx:19)
3069	.ident	"@(#)math.h\t2.11\t00/09/07 SMI"	! (/tmp/acompAAApja4Fx:20)
3070	.ident	"@(#)math_iso.h\t1.2\t00/09/07 SMI"	! (/tmp/acompAAApja4Fx:21)
3071	.ident	"@(#)floatingpoint.h\t2.5\t99/06/22 SMI"	! (/tmp/acompAAApja4Fx:22)
3072	.ident	"@(#)stdio_tag.h\t1.3\t98/04/20 SMI"	! (/tmp/acompAAApja4Fx:23)
3073	.ident	"@(#)ieeefp.h\t2.8 99/10/29"	! (/tmp/acompAAApja4Fx:24)
3074	.ident	"acomp: Sun C 5.5 Patch 112760-07 2004/02/03"	! (/tmp/acompAAApja4Fx:57)
3075	.ident	"iropt: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27"	! (/tmp/acompAAApja4Fx:58)
3076	.ident	"cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27"	! (NO SOURCE LINE)
3077! End Disassembling Ident
3078
3079#define	FZERO				\
3080	fzero	%f0			;\
3081	fzero	%f2			;\
3082	faddd	%f0, %f2, %f4		;\
3083	fmuld	%f0, %f2, %f6		;\
3084	faddd	%f0, %f2, %f8		;\
3085	fmuld	%f0, %f2, %f10		;\
3086	faddd	%f0, %f2, %f12		;\
3087	fmuld	%f0, %f2, %f14		;\
3088	faddd	%f0, %f2, %f16		;\
3089	fmuld	%f0, %f2, %f18		;\
3090	faddd	%f0, %f2, %f20		;\
3091	fmuld	%f0, %f2, %f22		;\
3092	faddd	%f0, %f2, %f24		;\
3093	fmuld	%f0, %f2, %f26		;\
3094	faddd	%f0, %f2, %f28		;\
3095	fmuld	%f0, %f2, %f30		;\
3096	faddd	%f0, %f2, %f32		;\
3097	fmuld	%f0, %f2, %f34		;\
3098	faddd	%f0, %f2, %f36		;\
3099	fmuld	%f0, %f2, %f38		;\
3100	faddd	%f0, %f2, %f40		;\
3101	fmuld	%f0, %f2, %f42		;\
3102	faddd	%f0, %f2, %f44		;\
3103	fmuld	%f0, %f2, %f46		;\
3104	faddd	%f0, %f2, %f48		;\
3105	fmuld	%f0, %f2, %f50		;\
3106	faddd	%f0, %f2, %f52		;\
3107	fmuld	%f0, %f2, %f54		;\
3108	faddd	%f0, %f2, %f56		;\
3109	fmuld	%f0, %f2, %f58		;\
3110	faddd	%f0, %f2, %f60		;\
3111	fmuld	%f0, %f2, %f62
3112
3113#include "assym.h"
3114
3115/*
3116 * In the routine below, we check/set FPRS_FEF bit since
3117 * we don't want to take a fp_disabled trap. We need not
3118 * check/set PSTATE_PEF bit as it is done early during boot.
3119 */
3120	ENTRY(big_savefp)
3121	rd	%fprs, %o2
3122	st	%o2, [%o0 + FPU_FPRS]
3123	andcc	%o2, FPRS_FEF, %g0		! is FPRS_FEF set?
3124	bnz,a,pt	%icc, .fregs_save	! yes, go to save
3125	nop
3126	wr	%g0, FPRS_FEF, %fprs		! else, set the bit
3127        stx     %fsr, [%o0 + FPU_FSR]	! store %fsr
3128	retl
3129	nop
3130.fregs_save:
3131	BSTORE_FPREGS(%o0, %o4)
3132        stx     %fsr, [%o0 + FPU_FSR]	! store %fsr
3133	retl
3134	nop
3135	SET_SIZE(big_savefp)
3136
3137
3138	ENTRY(big_restorefp)
3139	ldx     [%o0 + FPU_FSR], %fsr	! restore %fsr
3140	ld	[%o0 + FPU_FPRS], %o1
3141	andcc   %o1, FPRS_FEF, %g0	! is FPRS_FEF set in saved %fprs?
3142	bnz,pt	%icc, .fregs_restore	! yes, go to restore
3143	nop
3144	FZERO				! zero out to avoid leaks
3145	wr	%g0, 0, %fprs
3146	retl
3147	nop
3148.fregs_restore:
3149	BLOAD_FPREGS(%o0, %o2)
3150	wr      %o1, 0, %fprs
3151	retl
3152	nop
3153	SET_SIZE(big_restorefp)
3154
3155#endif	/* lint || __lint */
3156