xref: /illumos-gate/usr/src/common/bignum/sun4u/mont_mulf_kernel_v9.S (revision 5d9d9091f564c198a760790b0bfa72c44e17912b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * This file is mostly a result of compiling the mont_mulf.c file to generate an
28 * assembly output and then hand-editing that output to replace the
29 * compiler-generated loop for the 512-bit case (nlen == 16) in the
30 * mont_mulf_noconv routine with a hand-crafted version. This file also
31 * has big_savefp() and big_restorefp() routines added by hand.
32 */
33
34#include <sys/asm_linkage.h>
35#include <sys/trap.h>
36#include <sys/stack.h>
37#include <sys/privregs.h>
38#include <sys/regset.h>
39#include <sys/vis.h>
40#include <sys/machthread.h>
41#include <sys/machtrap.h>
42#include <sys/machsig.h>
43
44	.section	".text",#alloc,#execinstr
45	.file	"mont_mulf.s"
46
47	.section	".bss",#alloc,#write
48Bbss.bss:
49
50	.section	".data",#alloc,#write
51Ddata.data:
52
53	.section	".rodata",#alloc
54!
55! CONSTANT POOL
56!
57Drodata.rodata:
58	.global	TwoTo16
59	.align	8
60!
61! CONSTANT POOL
62!
63	.global TwoTo16
64TwoTo16:
65	.word	1089470464
66	.word	0
67	.type	TwoTo16,#object
68	.size	TwoTo16,8
69	.global	TwoToMinus16
70!
71! CONSTANT POOL
72!
73	.global TwoToMinus16
74TwoToMinus16:
75	.word	1055916032
76	.word	0
77	.type	TwoToMinus16,#object
78	.size	TwoToMinus16,8
79	.global	Zero
80!
81! CONSTANT POOL
82!
83	.global Zero
84Zero:
85	.word	0
86	.word	0
87	.type	Zero,#object
88	.size	Zero,8
89	.global	TwoTo32
90!
91! CONSTANT POOL
92!
93	.global TwoTo32
94TwoTo32:
95	.word	1106247680
96	.word	0
97	.type	TwoTo32,#object
98	.size	TwoTo32,8
99	.global	TwoToMinus32
100!
101! CONSTANT POOL
102!
103	.global TwoToMinus32
104TwoToMinus32:
105	.word	1039138816
106	.word	0
107	.type	TwoToMinus32,#object
108	.size	TwoToMinus32,8
109
110	.section	".text",#alloc,#execinstr
111/* 000000	   0 */		.register	%g3,#scratch
112/* 000000	     */		.register	%g2,#scratch
113/* 000000	   0 */		.align	32
114! FILE mont_mulf.c
115
116!    1		      !/*
117!    2		      ! * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
118!    3		      ! * Use is subject to license terms.
119!    4		      ! */
120!    6		      !#pragma ident	"@(#)mont_mulf.c	1.2	01/09/24 SMI"
121!    9		      !/*
122!   10		      ! * If compiled without -DRF_INLINE_MACROS then needs -lm at link time
123!   11		      ! * If compiled with -DRF_INLINE_MACROS then needs conv.il at compile time
124!   12		      ! * (i.e. cc <compileer_flags> -DRF_INLINE_MACROS conv.il mont_mulf.c )
125!   13		      ! */
126!   15		      !#include <sys/types.h>
127!   16		      !#include <math.h>
128!   18		      !static const double TwoTo16 = 65536.0;
129!   19		      !static const double TwoToMinus16 = 1.0/65536.0;
130!   20		      !static const double Zero = 0.0;
131!   21		      !static const double TwoTo32 = 65536.0 * 65536.0;
132!   22		      !static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0);
133!   24		      !#ifdef RF_INLINE_MACROS
134!   26		      !double upper32(double);
135!   27		      !double lower32(double, double);
136!   28		      !double mod(double, double, double);
137!   30		      !#else
138!   32		      !static double
139!   33		      !upper32(double x)
140!   34		      !{
141!   35		      !	return (floor(x * TwoToMinus32));
142!   36		      !}
143!   39		      !/* ARGSUSED */
144!   40		      !static double
145!   41		      !lower32(double x, double y)
146!   42		      !{
147!   43		      !	return (x - TwoTo32 * floor(x * TwoToMinus32));
148!   44		      !}
149!   46		      !static double
150!   47		      !mod(double x, double oneoverm, double m)
151!   48		      !{
152!   49		      !	return (x - m * floor(x * oneoverm));
153!   50		      !}
154!   52		      !#endif
155!   55		      !static void
156!   56		      !cleanup(double *dt, int from, int tlen)
157!   57		      !{
158
159!
160! SUBROUTINE cleanup
161!
162! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
163
164                       cleanup:
165/* 000000	  57 */		sra	%o1,0,%o4
166/* 0x0004	     */		sra	%o2,0,%o5
167
168!   58		      !	int i;
169!   59		      !	double tmp, tmp1, x, x1;
170!   61		      !	tmp = tmp1 = Zero;
171
172/* 0x0008	  61 */		sll	%o5,1,%g5
173
174!   63		      !	for (i = 2 * from; i < 2 * tlen; i += 2) {
175
176/* 0x000c	  63 */		sll	%o4,1,%g3
177/* 0x0010	     */		cmp	%g3,%g5
178/* 0x0014	     */		bge,pn	%icc,.L77000188
179/* 0x0018	   0 */		sethi	%hi(Zero),%o3
180                       .L77000197:
181/* 0x001c	  63 */		ldd	[%o3+%lo(Zero)],%f8
182/* 0x0020	     */		sra	%g3,0,%o1
183/* 0x0024	     */		sub	%g5,1,%g2
184/* 0x0028	     */		sllx	%o1,3,%g4
185
186!   64		      !		x = dt[i];
187
188/* 0x002c	  64 */		ldd	[%g4+%o0],%f10
189/* 0x0030	  63 */		add	%g4,%o0,%g1
190/* 0x0034	     */		fmovd	%f8,%f18
191/* 0x0038	     */		fmovd	%f8,%f16
192
193!   65		      !		x1 = dt[i + 1];
194!   66		      !		dt[i] = lower32(x, Zero) + tmp;
195
196                       .L900000110:
197/* 0x003c	  66 */		fdtox	%f10,%f0
198/* 0x0040	  65 */		ldd	[%g1+8],%f12
199
200!   67		      !		dt[i + 1] = lower32(x1, Zero) + tmp1;
201!   68		      !		tmp = upper32(x);
202!   69		      !		tmp1 = upper32(x1);
203
204/* 0x0044	  69 */		add	%g3,2,%g3
205/* 0x0048	     */		cmp	%g3,%g2
206/* 0x004c	  67 */		fdtox	%f12,%f2
207/* 0x0050	  68 */		fmovd	%f0,%f4
208/* 0x0054	  66 */		fmovs	%f8,%f0
209/* 0x0058	  67 */		fmovs	%f8,%f2
210/* 0x005c	  66 */		fxtod	%f0,%f0
211/* 0x0060	  67 */		fxtod	%f2,%f2
212/* 0x0064	  69 */		fdtox	%f12,%f6
213/* 0x0068	  66 */		faddd	%f0,%f18,%f10
214/* 0x006c	     */		std	%f10,[%g1]
215/* 0x0070	  67 */		faddd	%f2,%f16,%f14
216/* 0x0074	     */		std	%f14,[%g1+8]
217/* 0x0078	  68 */		fitod	%f4,%f18
218/* 0x007c	  69 */		add	%g1,16,%g1
219/* 0x0080	     */		fitod	%f6,%f16
220/* 0x0084	     */		ble,a,pt	%icc,.L900000110
221/* 0x0088	  64 */		ldd	[%g1],%f10
222                       .L77000188:
223/* 0x008c	  69 */		retl	! Result =
224/* 0x0090	     */		nop
225/* 0x0094	   0 */		.type	cleanup,2
226/* 0x0094	   0 */		.size	cleanup,(.-cleanup)
227
228	.section	".text",#alloc,#execinstr
229/* 000000	   0 */		.align	8
230/* 000000	     */		.skip	24
231/* 0x0018	     */		.align	32
232
233!   70		      !	}
234!   71		      !}
235!   75		      !#ifdef _KERNEL
236!   76		      !/*
237!   77		      ! * This only works if  0 <= d < 2^53
238!   78		      ! */
239!   79		      !uint64_t
240!   80		      !double2uint64_t(double* d)
241!   81		      !{
242!   82		      !	uint64_t x;
243!   83		      !	uint64_t exp;
244!   84		      !	uint64_t man;
245!   86		      !	x = *((uint64_t *)d);
246
247!
248! SUBROUTINE double2uint64_t
249!
250! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
251
252			.global double2uint64_t
253                       double2uint64_t:
254/* 000000	  86 */		ldx	[%o0],%o2
255
256!   87		      !	if (x == 0) {
257
258/* 0x0004	  87 */		cmp	%o2,0
259/* 0x0008	     */		bne,pn	%xcc,.L900000206
260/* 0x000c	  94 */		sethi	%hi(0xfff00000),%o5
261                       .L77000202:
262/* 0x0010	  94 */		retl	! Result =  %o0
263
264!   88		      !		return (0ULL);
265
266/* 0x0014	  88 */		or	%g0,0,%o0
267
268!   89		      !	}
269!   90		      !	exp = (x >> 52) - 1023;
270!   91		      !	man = (x & 0xfffffffffffffULL) | 0x10000000000000ULL;
271!   92		      !	x = man >> (52 - exp);
272!   94		      !	return (x);
273
274                       .L900000206:
275/* 0x0018	  94 */		sllx	%o5,32,%o4
276/* 0x001c	     */		srlx	%o2,52,%o0
277/* 0x0020	     */		sethi	%hi(0x40000000),%o1
278/* 0x0024	     */		or	%g0,1023,%g5
279/* 0x0028	     */		sllx	%o1,22,%g4
280/* 0x002c	     */		xor	%o4,-1,%o3
281/* 0x0030	     */		sub	%g5,%o0,%g3
282/* 0x0034	     */		and	%o2,%o3,%g2
283/* 0x0038	     */		or	%g2,%g4,%o5
284/* 0x003c	     */		add	%g3,52,%g1
285/* 0x0040	     */		retl	! Result =  %o0
286/* 0x0044	     */		srlx	%o5,%g1,%o0
287/* 0x0048	   0 */		.type	double2uint64_t,2
288/* 0x0048	   0 */		.size	double2uint64_t,(.-double2uint64_t)
289
290	.section	".text",#alloc,#execinstr
291/* 000000	   0 */		.align	8
292/* 000000	     */		.skip	24
293/* 0x0018	     */		.align	32
294
295!   95		      !}
296!   96		      !#else
297!   97		      !/*
298!   98		      ! * This only works if  0 <= d < 2^63
299!   99		      ! */
300!  100		      !uint64_t
301!  101		      !double2uint64_t(double* d)
302!  102		      !{
303!  103		      !	return ((int64_t)(*d));
304!  104		      !}
305!  105		      !#endif
306!  107		      !/* ARGSUSED */
307!  108		      !void
308!  109		      !conv_d16_to_i32(uint32_t *i32, double *d16, int64_t *tmp, int ilen)
309!  110		      !{
310
311!
312! SUBROUTINE conv_d16_to_i32
313!
314! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
315
316			.global conv_d16_to_i32
317                       conv_d16_to_i32:
318/* 000000	 110 */		save	%sp,-176,%sp
319
320!  111		      !	int i;
321!  112		      !	int64_t t, t1,		/* using int64_t and not uint64_t */
322!  113		      !		a, b, c, d;	/* because more efficient code is */
323!  114		      !				/* generated this way, and there  */
324!  115		      !				/* is no overflow  */
325!  116		      !	t1 = 0;
326!  117		      !	a = double2uint64_t(&(d16[0]));
327
328/* 0x0004	 117 */		ldx	[%i1],%o0
329/* 0x0008	 118 */		ldx	[%i1+8],%i2
330/* 0x000c	 117 */		cmp	%o0,0
331/* 0x0010	     */		bne,pn	%xcc,.L77000216
332/* 0x0014	     */		or	%g0,0,%i4
333                       .L77000215:
334/* 0x0018	 117 */		ba	.L900000316
335/* 0x001c	 118 */		cmp	%i2,0
336                       .L77000216:
337/* 0x0020	 117 */		srlx	%o0,52,%o5
338/* 0x0024	     */		sethi	%hi(0xfff00000),%i4
339/* 0x0028	     */		sllx	%i4,32,%o2
340/* 0x002c	     */		sethi	%hi(0x40000000),%o7
341/* 0x0030	     */		sllx	%o7,22,%o3
342/* 0x0034	     */		or	%g0,1023,%o4
343/* 0x0038	     */		xor	%o2,-1,%g5
344/* 0x003c	     */		sub	%o4,%o5,%l0
345/* 0x0040	     */		and	%o0,%g5,%o1
346/* 0x0044	     */		add	%l0,52,%l1
347/* 0x0048	     */		or	%o1,%o3,%g4
348
349!  118		      !	b = double2uint64_t(&(d16[1]));
350
351/* 0x004c	 118 */		cmp	%i2,0
352/* 0x0050	 117 */		srlx	%g4,%l1,%i4
353                       .L900000316:
354/* 0x0054	 118 */		bne,pn	%xcc,.L77000222
355/* 0x0058	 134 */		sub	%i3,1,%l3
356                       .L77000221:
357/* 0x005c	 118 */		or	%g0,0,%i2
358/* 0x0060	     */		ba	.L900000315
359/* 0x0064	 116 */		or	%g0,0,%o3
360                       .L77000222:
361/* 0x0068	 118 */		srlx	%i2,52,%l6
362/* 0x006c	     */		sethi	%hi(0xfff00000),%g4
363/* 0x0070	     */		sllx	%g4,32,%i5
364/* 0x0074	     */		sethi	%hi(0x40000000),%l5
365/* 0x0078	     */		xor	%i5,-1,%l4
366/* 0x007c	     */		or	%g0,1023,%l2
367/* 0x0080	     */		and	%i2,%l4,%l7
368/* 0x0084	     */		sllx	%l5,22,%i2
369/* 0x0088	     */		sub	%l2,%l6,%g1
370/* 0x008c	     */		or	%l7,%i2,%g3
371/* 0x0090	     */		add	%g1,52,%g2
372/* 0x0094	 116 */		or	%g0,0,%o3
373/* 0x0098	 118 */		srlx	%g3,%g2,%i2
374
375!  119		      !	for (i = 0; i < ilen - 1; i++) {
376
377                       .L900000315:
378/* 0x009c	 119 */		cmp	%l3,0
379/* 0x00a0	     */		ble,pn	%icc,.L77000210
380/* 0x00a4	     */		or	%g0,0,%l4
381                       .L77000245:
382/* 0x00a8	 118 */		sethi	%hi(0xfff00000),%l7
383/* 0x00ac	     */		or	%g0,-1,%l6
384/* 0x00b0	     */		sllx	%l7,32,%l3
385/* 0x00b4	     */		srl	%l6,0,%l6
386/* 0x00b8	     */		sethi	%hi(0x40000000),%l1
387/* 0x00bc	     */		sethi	%hi(0xfc00),%l2
388/* 0x00c0	     */		xor	%l3,-1,%l7
389/* 0x00c4	     */		sllx	%l1,22,%l3
390/* 0x00c8	     */		sub	%i3,2,%l5
391/* 0x00cc	     */		add	%l2,1023,%l2
392/* 0x00d0	     */		or	%g0,2,%g2
393/* 0x00d4	     */		or	%g0,%i0,%g1
394
395!  120		      !		c = double2uint64_t(&(d16[2 * i + 2]));
396
397                       .L77000208:
398/* 0x00d8	 120 */		sra	%g2,0,%g3
399/* 0x00dc	 123 */		add	%g2,1,%o2
400/* 0x00e0	 120 */		sllx	%g3,3,%i3
401
402!  121		      !		t1 += a & 0xffffffff;
403!  122		      !		t = (a >> 32);
404!  123		      !		d = double2uint64_t(&(d16[2 * i + 3]));
405
406/* 0x00e4	 123 */		sra	%o2,0,%g5
407/* 0x00e8	 120 */		ldx	[%i1+%i3],%o5
408/* 0x00ec	 123 */		sllx	%g5,3,%o0
409/* 0x00f0	 121 */		and	%i4,%l6,%g4
410/* 0x00f4	 123 */		ldx	[%i1+%o0],%i3
411/* 0x00f8	 120 */		cmp	%o5,0
412/* 0x00fc	     */		bne,pn	%xcc,.L77000228
413/* 0x0100	 124 */		and	%i2,%l2,%i5
414                       .L77000227:
415/* 0x0104	 120 */		or	%g0,0,%l1
416/* 0x0108	     */		ba	.L900000314
417/* 0x010c	 121 */		add	%o3,%g4,%o0
418                       .L77000228:
419/* 0x0110	 120 */		srlx	%o5,52,%o7
420/* 0x0114	     */		and	%o5,%l7,%o5
421/* 0x0118	     */		or	%g0,52,%l0
422/* 0x011c	     */		sub	%o7,1023,%o4
423/* 0x0120	     */		or	%o5,%l3,%l1
424/* 0x0124	     */		sub	%l0,%o4,%o1
425/* 0x0128	     */		srlx	%l1,%o1,%l1
426/* 0x012c	 121 */		add	%o3,%g4,%o0
427                       .L900000314:
428/* 0x0130	 122 */		srax	%i4,32,%g3
429/* 0x0134	 123 */		cmp	%i3,0
430/* 0x0138	     */		bne,pn	%xcc,.L77000234
431/* 0x013c	 124 */		sllx	%i5,16,%g5
432                       .L77000233:
433/* 0x0140	 123 */		or	%g0,0,%o2
434/* 0x0144	     */		ba	.L900000313
435/* 0x0148	 124 */		add	%o0,%g5,%o7
436                       .L77000234:
437/* 0x014c	 123 */		srlx	%i3,52,%o2
438/* 0x0150	     */		and	%i3,%l7,%i4
439/* 0x0154	     */		sub	%o2,1023,%o1
440/* 0x0158	     */		or	%g0,52,%g4
441/* 0x015c	     */		sub	%g4,%o1,%i5
442/* 0x0160	     */		or	%i4,%l3,%i3
443/* 0x0164	     */		srlx	%i3,%i5,%o2
444
445!  124		      !		t1 += (b & 0xffff) << 16;
446
447/* 0x0168	 124 */		add	%o0,%g5,%o7
448
449!  125		      !		t += (b >> 16) + (t1 >> 32);
450
451                       .L900000313:
452/* 0x016c	 125 */		srax	%i2,16,%l0
453/* 0x0170	     */		srax	%o7,32,%o4
454/* 0x0174	     */		add	%l0,%o4,%o3
455
456!  126		      !		i32[i] = t1 & 0xffffffff;
457!  127		      !		t1 = t;
458!  128		      !		a = c;
459!  129		      !		b = d;
460
461/* 0x0178	 129 */		add	%l4,1,%l4
462/* 0x017c	 126 */		and	%o7,%l6,%o5
463/* 0x0180	 125 */		add	%g3,%o3,%o3
464/* 0x0184	 126 */		st	%o5,[%g1]
465/* 0x0188	 128 */		or	%g0,%l1,%i4
466/* 0x018c	 129 */		or	%g0,%o2,%i2
467/* 0x0190	     */		add	%g2,2,%g2
468/* 0x0194	     */		cmp	%l4,%l5
469/* 0x0198	     */		ble,pt	%icc,.L77000208
470/* 0x019c	     */		add	%g1,4,%g1
471
472!  130		      !	}
473!  131		      !	t1 += a & 0xffffffff;
474!  132		      !	t = (a >> 32);
475!  133		      !	t1 += (b & 0xffff) << 16;
476!  134		      !	i32[i] = t1 & 0xffffffff;
477
478                       .L77000210:
479/* 0x01a0	 134 */		sra	%l4,0,%l4
480/* 0x01a4	     */		sethi	%hi(0xfc00),%i1
481/* 0x01a8	     */		add	%o3,%i4,%l2
482/* 0x01ac	     */		add	%i1,1023,%i5
483/* 0x01b0	     */		and	%i2,%i5,%l5
484/* 0x01b4	     */		sllx	%l4,2,%i2
485/* 0x01b8	     */		sllx	%l5,16,%l6
486/* 0x01bc	     */		add	%l2,%l6,%l7
487/* 0x01c0	     */		st	%l7,[%i0+%i2]
488/* 0x01c4	 129 */		ret	! Result =
489/* 0x01c8	     */		restore	%g0,%g0,%g0
490/* 0x01cc	   0 */		.type	conv_d16_to_i32,2
491/* 0x01cc	   0 */		.size	conv_d16_to_i32,(.-conv_d16_to_i32)
492
493	.section	".text",#alloc,#execinstr
494/* 000000	   0 */		.align	8
495!
496! CONSTANT POOL
497!
498                       ___const_seg_900000401:
499/* 000000	   0 */		.word	1127219200,0
500/* 0x0008	     */		.word	1127219200
501/* 0x000c	   0 */		.type	___const_seg_900000401,1
502/* 0x000c	   0 */		.size	___const_seg_900000401,(.-___const_seg_900000401)
503/* 0x000c	   0 */		.align	8
504/* 0x0010	     */		.skip	24
505/* 0x0028	     */		.align	32
506
507!  135		      !}
508!  138		      !void
509!  139		      !conv_i32_to_d32(double *d32, uint32_t *i32, int len)
510!  140		      !{
511
512!
513! SUBROUTINE conv_i32_to_d32
514!
515! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
516
517			.global conv_i32_to_d32
518                       conv_i32_to_d32:
519/* 000000	 140 */		orcc	%g0,%o2,%o2
520
521!  141		      !	int i;
522!  143		      !#pragma pipeloop(0)
523!  144		      !	for (i = 0; i < len; i++)
524
525/* 0x0004	 144 */		ble,pn	%icc,.L77000254
526/* 0x0008	     */		sub	%o2,1,%o3
527                       .L77000263:
528/* 0x000c	 140 */		or	%g0,%o0,%o2
529
530!  145		      !		d32[i] = (double)(i32[i]);
531
532/* 0x0010	 145 */		add	%o3,1,%o5
533/* 0x0014	 144 */		or	%g0,0,%g5
534/* 0x0018	 145 */		cmp	%o5,10
535/* 0x001c	     */		bl,pn	%icc,.L77000261
536/* 0x0020	     */		sethi	%hi(___const_seg_900000401),%g4
537                       .L900000407:
538/* 0x0024	 145 */		prefetch	[%o1],0
539/* 0x0028	     */		prefetch	[%o0],22
540/* 0x002c	     */		sethi	%hi(___const_seg_900000401+8),%o4
541/* 0x0030	     */		or	%g0,%o0,%o2
542/* 0x0034	     */		prefetch	[%o1+64],0
543/* 0x0038	     */		add	%o1,8,%o0
544/* 0x003c	     */		sub	%o3,7,%o5
545/* 0x0040	     */		prefetch	[%o2+64],22
546/* 0x0044	     */		or	%g0,2,%g5
547/* 0x0048	     */		prefetch	[%o2+128],22
548/* 0x004c	     */		prefetch	[%o2+192],22
549/* 0x0050	     */		prefetch	[%o1+128],0
550/* 0x0054	     */		ld	[%o4+%lo(___const_seg_900000401+8)],%f2
551/* 0x0058	     */		ldd	[%g4+%lo(___const_seg_900000401)],%f16
552/* 0x005c	     */		fmovs	%f2,%f0
553/* 0x0060	     */		prefetch	[%o2+256],22
554/* 0x0064	     */		prefetch	[%o2+320],22
555/* 0x0068	     */		ld	[%o1],%f3
556/* 0x006c	     */		prefetch	[%o1+192],0
557/* 0x0070	     */		ld	[%o1+4],%f1
558                       .L900000405:
559/* 0x0074	 145 */		prefetch	[%o0+188],0
560/* 0x0078	     */		fsubd	%f2,%f16,%f22
561/* 0x007c	     */		add	%g5,8,%g5
562/* 0x0080	     */		add	%o0,32,%o0
563/* 0x0084	     */		ld	[%o4+%lo(___const_seg_900000401+8)],%f4
564/* 0x0088	     */		std	%f22,[%o2]
565/* 0x008c	     */		cmp	%g5,%o5
566/* 0x0090	     */		ld	[%o0-32],%f5
567/* 0x0094	     */		fsubd	%f0,%f16,%f24
568/* 0x0098	     */		add	%o2,64,%o2
569/* 0x009c	     */		fmovs	%f4,%f0
570/* 0x00a0	     */		std	%f24,[%o2-56]
571/* 0x00a4	     */		ld	[%o0-28],%f1
572/* 0x00a8	     */		fsubd	%f4,%f16,%f26
573/* 0x00ac	     */		fmovs	%f0,%f6
574/* 0x00b0	     */		prefetch	[%o2+312],22
575/* 0x00b4	     */		std	%f26,[%o2-48]
576/* 0x00b8	     */		ld	[%o0-24],%f7
577/* 0x00bc	     */		fsubd	%f0,%f16,%f28
578/* 0x00c0	     */		fmovs	%f6,%f8
579/* 0x00c4	     */		std	%f28,[%o2-40]
580/* 0x00c8	     */		ld	[%o0-20],%f9
581/* 0x00cc	     */		fsubd	%f6,%f16,%f30
582/* 0x00d0	     */		fmovs	%f8,%f10
583/* 0x00d4	     */		std	%f30,[%o2-32]
584/* 0x00d8	     */		ld	[%o0-16],%f11
585/* 0x00dc	     */		prefetch	[%o2+344],22
586/* 0x00e0	     */		fsubd	%f8,%f16,%f48
587/* 0x00e4	     */		fmovs	%f10,%f12
588/* 0x00e8	     */		std	%f48,[%o2-24]
589/* 0x00ec	     */		ld	[%o0-12],%f13
590/* 0x00f0	     */		fsubd	%f10,%f16,%f50
591/* 0x00f4	     */		fmovs	%f12,%f2
592/* 0x00f8	     */		std	%f50,[%o2-16]
593/* 0x00fc	     */		ld	[%o0-8],%f3
594/* 0x0100	     */		fsubd	%f12,%f16,%f52
595/* 0x0104	     */		fmovs	%f2,%f0
596/* 0x0108	     */		std	%f52,[%o2-8]
597/* 0x010c	     */		ble,pt	%icc,.L900000405
598/* 0x0110	     */		ld	[%o0-4],%f1
599                       .L900000408:
600/* 0x0114	 145 */		fsubd	%f2,%f16,%f18
601/* 0x0118	     */		add	%o2,16,%o2
602/* 0x011c	     */		cmp	%g5,%o3
603/* 0x0120	     */		std	%f18,[%o2-16]
604/* 0x0124	     */		fsubd	%f0,%f16,%f20
605/* 0x0128	     */		or	%g0,%o0,%o1
606/* 0x012c	     */		bg,pn	%icc,.L77000254
607/* 0x0130	     */		std	%f20,[%o2-8]
608                       .L77000261:
609/* 0x0134	 145 */		ld	[%o1],%f15
610                       .L900000409:
611/* 0x0138	 145 */		sethi	%hi(___const_seg_900000401+8),%o4
612/* 0x013c	     */		ldd	[%g4+%lo(___const_seg_900000401)],%f16
613/* 0x0140	     */		add	%g5,1,%g5
614/* 0x0144	     */		ld	[%o4+%lo(___const_seg_900000401+8)],%f14
615/* 0x0148	     */		add	%o1,4,%o1
616/* 0x014c	     */		cmp	%g5,%o3
617/* 0x0150	     */		fsubd	%f14,%f16,%f54
618/* 0x0154	     */		std	%f54,[%o2]
619/* 0x0158	     */		add	%o2,8,%o2
620/* 0x015c	     */		ble,a,pt	%icc,.L900000409
621/* 0x0160	     */		ld	[%o1],%f15
622                       .L77000254:
623/* 0x0164	 145 */		retl	! Result =
624/* 0x0168	     */		nop
625/* 0x016c	   0 */		.type	conv_i32_to_d32,2
626/* 0x016c	   0 */		.size	conv_i32_to_d32,(.-conv_i32_to_d32)
627
628	.section	".text",#alloc,#execinstr
629/* 000000	   0 */		.align	8
630!
631! CONSTANT POOL
632!
633                       ___const_seg_900000501:
634/* 000000	   0 */		.word	1127219200,0
635/* 0x0008	     */		.word	1127219200
636/* 0x000c	   0 */		.type	___const_seg_900000501,1
637/* 0x000c	   0 */		.size	___const_seg_900000501,(.-___const_seg_900000501)
638/* 0x000c	   0 */		.align	8
639/* 0x0010	     */		.skip	24
640/* 0x0028	     */		.align	32
641
642!  146		      !}
643!  149		      !void
644!  150		      !conv_i32_to_d16(double *d16, uint32_t *i32, int len)
645!  151		      !{
646
647!
648! SUBROUTINE conv_i32_to_d16
649!
650! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
651
652			.global conv_i32_to_d16
653                       conv_i32_to_d16:
654/* 000000	 151 */		save	%sp,-368,%sp
655/* 0x0004	     */		orcc	%g0,%i2,%i2
656
657!  152		      !	int i;
658!  153		      !	uint32_t a;
659!  155		      !#pragma pipeloop(0)
660!  156		      !	for (i = 0; i < len; i++) {
661
662/* 0x0008	 156 */		ble,pn	%icc,.L77000272
663/* 0x000c	     */		sub	%i2,1,%l6
664                       .L77000281:
665/* 0x0010	 156 */		sethi	%hi(0xfc00),%i3
666
667!  157		      !		a = i32[i];
668
669/* 0x0014	 157 */		or	%g0,%i2,%l1
670/* 0x0018	 156 */		add	%i3,1023,%i4
671/* 0x001c	 157 */		cmp	%i2,4
672/* 0x0020	 151 */		or	%g0,%i1,%l7
673/* 0x0024	     */		or	%g0,%i0,%i2
674/* 0x0028	 156 */		or	%g0,0,%i5
675/* 0x002c	     */		or	%g0,0,%i3
676/* 0x0030	 157 */		bl,pn	%icc,.L77000279
677/* 0x0034	   0 */		sethi	%hi(___const_seg_900000501),%i1
678                       .L900000508:
679/* 0x0038	 157 */		prefetch	[%i0+8],22
680/* 0x003c	     */		prefetch	[%i0+72],22
681/* 0x0040	     */		or	%g0,%i0,%l2
682
683!  158		      !		d16[2 * i] = (double)(a & 0xffff);
684
685/* 0x0044	 158 */		sethi	%hi(___const_seg_900000501+8),%l1
686/* 0x0048	 157 */		prefetch	[%i0+136],22
687/* 0x004c	     */		sub	%l6,1,%i0
688/* 0x0050	     */		or	%g0,0,%i3
689/* 0x0054	     */		prefetch	[%i2+200],22
690/* 0x0058	     */		or	%g0,2,%i5
691/* 0x005c	     */		prefetch	[%i2+264],22
692/* 0x0060	     */		prefetch	[%i2+328],22
693/* 0x0064	     */		prefetch	[%i2+392],22
694/* 0x0068	     */		ld	[%l7],%l3
695/* 0x006c	     */		ld	[%l7+4],%l4
696/* 0x0070	 158 */		ldd	[%i1+%lo(___const_seg_900000501)],%f20
697
698!  159		      !		d16[2 * i + 1] = (double)(a >> 16);
699
700/* 0x0074	 159 */		srl	%l3,16,%o1
701/* 0x0078	 158 */		and	%l3,%i4,%o3
702/* 0x007c	     */		st	%o3,[%sp+2335]
703/* 0x0080	 159 */		srl	%l4,16,%g4
704/* 0x0084	 158 */		and	%l4,%i4,%o0
705/* 0x0088	     */		st	%o0,[%sp+2303]
706/* 0x008c	 159 */		add	%l7,8,%l7
707/* 0x0090	     */		st	%o1,[%sp+2271]
708/* 0x0094	     */		st	%g4,[%sp+2239]
709/* 0x0098	 157 */		prefetch	[%i2+456],22
710/* 0x009c	     */		prefetch	[%i2+520],22
711                       .L900000506:
712/* 0x00a0	 157 */		prefetch	[%l2+536],22
713/* 0x00a4	 159 */		add	%i5,2,%i5
714/* 0x00a8	 157 */		add	%l2,32,%l2
715/* 0x00ac	     */		ld	[%l7],%g2
716/* 0x00b0	 159 */		cmp	%i5,%i0
717/* 0x00b4	     */		add	%l7,8,%l7
718/* 0x00b8	 158 */		ld	[%sp+2335],%f9
719/* 0x00bc	 159 */		add	%i3,4,%i3
720/* 0x00c0	 158 */		ld	[%l1+%lo(___const_seg_900000501+8)],%f8
721/* 0x00c4	 159 */		ld	[%sp+2271],%f11
722/* 0x00c8	 158 */		and	%g2,%i4,%g3
723/* 0x00cc	 159 */		fmovs	%f8,%f10
724/* 0x00d0	 158 */		st	%g3,[%sp+2335]
725/* 0x00d4	     */		fsubd	%f8,%f20,%f28
726/* 0x00d8	     */		std	%f28,[%l2-32]
727/* 0x00dc	 159 */		srl	%g2,16,%g1
728/* 0x00e0	     */		st	%g1,[%sp+2271]
729/* 0x00e4	     */		fsubd	%f10,%f20,%f30
730/* 0x00e8	     */		std	%f30,[%l2-24]
731/* 0x00ec	 157 */		ld	[%l7-4],%l0
732/* 0x00f0	 158 */		ld	[%sp+2303],%f13
733/* 0x00f4	     */		ld	[%l1+%lo(___const_seg_900000501+8)],%f12
734/* 0x00f8	 159 */		ld	[%sp+2239],%f15
735/* 0x00fc	 158 */		and	%l0,%i4,%l5
736/* 0x0100	 159 */		fmovs	%f12,%f14
737/* 0x0104	 158 */		st	%l5,[%sp+2303]
738/* 0x0108	     */		fsubd	%f12,%f20,%f44
739/* 0x010c	     */		std	%f44,[%l2-16]
740/* 0x0110	 159 */		srl	%l0,16,%o5
741/* 0x0114	     */		st	%o5,[%sp+2239]
742/* 0x0118	     */		fsubd	%f14,%f20,%f46
743/* 0x011c	     */		ble,pt	%icc,.L900000506
744/* 0x0120	     */		std	%f46,[%l2-8]
745                       .L900000509:
746/* 0x0124	 158 */		ld	[%l1+%lo(___const_seg_900000501+8)],%f0
747/* 0x0128	 159 */		cmp	%i5,%l6
748/* 0x012c	     */		add	%i3,4,%i3
749/* 0x0130	 158 */		ld	[%sp+2335],%f1
750/* 0x0134	     */		ld	[%sp+2303],%f5
751/* 0x0138	 159 */		fmovs	%f0,%f2
752/* 0x013c	     */		ld	[%sp+2271],%f3
753/* 0x0140	 158 */		fmovs	%f0,%f4
754/* 0x0144	 159 */		ld	[%sp+2239],%f7
755/* 0x0148	     */		fmovs	%f0,%f6
756/* 0x014c	 158 */		fsubd	%f0,%f20,%f22
757/* 0x0150	     */		std	%f22,[%l2]
758/* 0x0154	 159 */		fsubd	%f2,%f20,%f24
759/* 0x0158	     */		std	%f24,[%l2+8]
760/* 0x015c	 158 */		fsubd	%f4,%f20,%f26
761/* 0x0160	     */		std	%f26,[%l2+16]
762/* 0x0164	 159 */		fsubd	%f6,%f20,%f20
763/* 0x0168	     */		bg,pn	%icc,.L77000272
764/* 0x016c	     */		std	%f20,[%l2+24]
765                       .L77000279:
766/* 0x0170	 157 */		ld	[%l7],%l2
767                       .L900000510:
768/* 0x0174	 158 */		and	%l2,%i4,%o4
769/* 0x0178	     */		st	%o4,[%sp+2399]
770/* 0x017c	 159 */		srl	%l2,16,%o2
771/* 0x0180	     */		st	%o2,[%sp+2367]
772/* 0x0184	 158 */		sethi	%hi(___const_seg_900000501+8),%l1
773/* 0x0188	     */		sra	%i3,0,%i0
774/* 0x018c	     */		ld	[%l1+%lo(___const_seg_900000501+8)],%f16
775/* 0x0190	     */		sllx	%i0,3,%o1
776/* 0x0194	 159 */		add	%i3,1,%o3
777/* 0x0198	 158 */		ldd	[%i1+%lo(___const_seg_900000501)],%f20
778/* 0x019c	 159 */		sra	%o3,0,%l3
779/* 0x01a0	     */		add	%i5,1,%i5
780/* 0x01a4	 158 */		ld	[%sp+2399],%f17
781/* 0x01a8	 159 */		sllx	%l3,3,%o0
782/* 0x01ac	     */		add	%l7,4,%l7
783/* 0x01b0	     */		fmovs	%f16,%f18
784/* 0x01b4	     */		cmp	%i5,%l6
785/* 0x01b8	     */		add	%i3,2,%i3
786/* 0x01bc	 158 */		fsubd	%f16,%f20,%f48
787/* 0x01c0	     */		std	%f48,[%i2+%o1]
788/* 0x01c4	 159 */		ld	[%sp+2367],%f19
789/* 0x01c8	     */		fsubd	%f18,%f20,%f50
790/* 0x01cc	     */		std	%f50,[%i2+%o0]
791/* 0x01d0	     */		ble,a,pt	%icc,.L900000510
792/* 0x01d4	 157 */		ld	[%l7],%l2
793                       .L77000272:
794/* 0x01d8	 159 */		ret	! Result =
795/* 0x01dc	     */		restore	%g0,%g0,%g0
796/* 0x01e0	   0 */		.type	conv_i32_to_d16,2
797/* 0x01e0	   0 */		.size	conv_i32_to_d16,(.-conv_i32_to_d16)
798
799	.section	".text",#alloc,#execinstr
800/* 000000	   0 */		.align	8
801!
802! CONSTANT POOL
803!
804                       ___const_seg_900000601:
805/* 000000	   0 */		.word	1127219200,0
806/* 0x0008	     */		.word	1127219200
807/* 0x000c	   0 */		.type	___const_seg_900000601,1
808/* 0x000c	   0 */		.size	___const_seg_900000601,(.-___const_seg_900000601)
809/* 0x000c	   0 */		.align	8
810/* 0x0010	     */		.skip	24
811/* 0x0028	     */		.align	32
812
813!  160		      !	}
814!  161		      !}
815!  163		      !#ifdef RF_INLINE_MACROS
816!  165		      !void
817!  166		      !i16_to_d16_and_d32x4(const double *,	/* 1/(2^16) */
818!  167		      !			const double *,	/* 2^16 */
819!  168		      !			const double *,	/* 0 */
820!  169		      !			double *,	/* result16 */
821!  170		      !			double *,	/* result32 */
822!  171		      !			float *);	/* source - should be unsigned int* */
823!  172		      !					/* converted to float* */
824!  174		      !#else
825!  177		      !/* ARGSUSED */
826!  178		      !static void
827!  179		      !i16_to_d16_and_d32x4(const double *dummy1,	/* 1/(2^16) */
828!  180		      !			const double *dummy2,	/* 2^16 */
829!  181		      !			const double *dummy3,	/* 0 */
830!  182		      !			double *result16,
831!  183		      !			double *result32,
832!  184		      !			float *src)	/* source - should be unsigned int* */
833!  185		      !					/* converted to float* */
834!  186		      !{
835!  187		      !	uint32_t *i32;
836!  188		      !	uint32_t a, b, c, d;
837!  190		      !	i32 = (uint32_t *)src;
838!  191		      !	a = i32[0];
839!  192		      !	b = i32[1];
840!  193		      !	c = i32[2];
841!  194		      !	d = i32[3];
842!  195		      !	result16[0] = (double)(a & 0xffff);
843!  196		      !	result16[1] = (double)(a >> 16);
844!  197		      !	result32[0] = (double)a;
845!  198		      !	result16[2] = (double)(b & 0xffff);
846!  199		      !	result16[3] = (double)(b >> 16);
847!  200		      !	result32[1] = (double)b;
848!  201		      !	result16[4] = (double)(c & 0xffff);
849!  202		      !	result16[5] = (double)(c >> 16);
850!  203		      !	result32[2] = (double)c;
851!  204		      !	result16[6] = (double)(d & 0xffff);
852!  205		      !	result16[7] = (double)(d >> 16);
853!  206		      !	result32[3] = (double)d;
854!  207		      !}
855!  209		      !#endif
856!  212		      !void
857!  213		      !conv_i32_to_d32_and_d16(double *d32, double *d16, uint32_t *i32, int len)
858!  214		      !{
859
860!
861! SUBROUTINE conv_i32_to_d32_and_d16
862!
863! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
864
865			.global conv_i32_to_d32_and_d16
866                       conv_i32_to_d32_and_d16:
867/* 000000	 214 */		save	%sp,-368,%sp
868
869!  215		      !	int i;
870!  216		      !	uint32_t a;
871!  218		      !#pragma pipeloop(0)
872!  219		      !	for (i = 0; i < len - 3; i += 4) {
873!  220		      !		i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
874!  221		      !					&(d16[2*i]), &(d32[i]),
875!  222		      !					(float *)(&(i32[i])));
876!  223		      !	}
877!  224		      !	for (; i < len; i++) {
878!  225		      !		a = i32[i];
879!  226		      !		d32[i] = (double)(i32[i]);
880!  227		      !		d16[2 * i] = (double)(a & 0xffff);
881!  228		      !		d16[2 * i + 1] = (double)(a >> 16);
882
883/* 0x0004	 228 */		sub	%i3,3,%i4
884/* 0x0008	 219 */		cmp	%i4,0
885/* 0x000c	     */		ble,pn	%icc,.L77000289
886/* 0x0010	     */		or	%g0,0,%i5
887                       .L77000306:
888/* 0x0014	 222 */		sethi	%hi(Zero),%g3
889/* 0x0018	     */		sethi	%hi(TwoToMinus16),%g2
890/* 0x001c	     */		sethi	%hi(TwoTo16),%o5
891/* 0x0020	     */		ldd	[%g3+%lo(Zero)],%f2
892/* 0x0024	 219 */		sub	%i3,4,%o4
893/* 0x0028	     */		or	%g0,0,%o3
894/* 0x002c	     */		or	%g0,%i0,%l6
895/* 0x0030	     */		or	%g0,%i2,%l5
896                       .L900000615:
897/* 0x0034	 222 */		fmovd	%f2,%f26
898/* 0x0038	     */		ld	[%l5],%f27
899/* 0x003c	     */		sra	%o3,0,%o0
900/* 0x0040	     */		add	%i5,4,%i5
901/* 0x0044	     */		fmovd	%f2,%f28
902/* 0x0048	     */		ld	[%l5+4],%f29
903/* 0x004c	     */		sllx	%o0,3,%g5
904/* 0x0050	     */		cmp	%i5,%o4
905/* 0x0054	     */		fmovd	%f2,%f30
906/* 0x0058	     */		ld	[%l5+8],%f31
907/* 0x005c	     */		add	%i1,%g5,%g4
908/* 0x0060	     */		add	%o3,8,%o3
909/* 0x0064	     */		ld	[%l5+12],%f3
910/* 0x0068	     */		fxtod	%f26,%f26
911/* 0x006c	     */		ldd	[%g2+%lo(TwoToMinus16)],%f32
912/* 0x0070	     */		fxtod	%f28,%f28
913/* 0x0074	     */		add	%l5,16,%l5
914/* 0x0078	     */		fxtod	%f30,%f30
915/* 0x007c	     */		ldd	[%o5+%lo(TwoTo16)],%f34
916/* 0x0080	     */		fxtod	%f2,%f2
917/* 0x0084	     */		std	%f2,[%l6+24]
918/* 0x0088	     */		fmuld	%f32,%f26,%f36
919/* 0x008c	     */		std	%f26,[%l6]
920/* 0x0090	     */		fmuld	%f32,%f28,%f38
921/* 0x0094	     */		std	%f28,[%l6+8]
922/* 0x0098	     */		fmuld	%f32,%f30,%f40
923/* 0x009c	     */		std	%f30,[%l6+16]
924/* 0x00a0	     */		fmuld	%f32,%f2,%f42
925/* 0x00a4	     */		add	%l6,32,%l6
926/* 0x00a8	     */		fdtox	%f36,%f36
927/* 0x00ac	     */		fdtox	%f38,%f38
928/* 0x00b0	     */		fdtox	%f40,%f40
929/* 0x00b4	     */		fdtox	%f42,%f42
930/* 0x00b8	     */		fxtod	%f36,%f36
931/* 0x00bc	     */		std	%f36,[%g4+8]
932/* 0x00c0	     */		fxtod	%f38,%f38
933/* 0x00c4	     */		std	%f38,[%g4+24]
934/* 0x00c8	     */		fxtod	%f40,%f40
935/* 0x00cc	     */		std	%f40,[%g4+40]
936/* 0x00d0	     */		fxtod	%f42,%f42
937/* 0x00d4	     */		std	%f42,[%g4+56]
938/* 0x00d8	     */		fmuld	%f36,%f34,%f36
939/* 0x00dc	     */		fmuld	%f38,%f34,%f38
940/* 0x00e0	     */		fmuld	%f40,%f34,%f40
941/* 0x00e4	     */		fmuld	%f42,%f34,%f42
942/* 0x00e8	     */		fsubd	%f26,%f36,%f36
943/* 0x00ec	     */		std	%f36,[%i1+%g5]
944/* 0x00f0	     */		fsubd	%f28,%f38,%f38
945/* 0x00f4	     */		std	%f38,[%g4+16]
946/* 0x00f8	     */		fsubd	%f30,%f40,%f40
947/* 0x00fc	     */		std	%f40,[%g4+32]
948/* 0x0100	     */		fsubd	%f2,%f42,%f42
949/* 0x0104	     */		std	%f42,[%g4+48]
950/* 0x0108	     */		ble,a,pt	%icc,.L900000615
951/* 0x010c	     */		ldd	[%g3+%lo(Zero)],%f2
952                       .L77000289:
953/* 0x0110	 224 */		cmp	%i5,%i3
954/* 0x0114	     */		bge,pn	%icc,.L77000294
955/* 0x0118	     */		sethi	%hi(0xfc00),%l0
956                       .L77000307:
957/* 0x011c	 224 */		sra	%i5,0,%l2
958/* 0x0120	     */		sll	%i5,1,%i4
959/* 0x0124	     */		sllx	%l2,3,%l1
960/* 0x0128	     */		sllx	%l2,2,%o1
961/* 0x012c	 225 */		sub	%i3,%i5,%l3
962/* 0x0130	 224 */		add	%l0,1023,%l0
963/* 0x0134	     */		add	%l1,%i0,%l1
964/* 0x0138	     */		add	%o1,%i2,%i2
965/* 0x013c	 225 */		cmp	%l3,5
966/* 0x0140	     */		bl,pn	%icc,.L77000291
967/* 0x0144	   0 */		sethi	%hi(___const_seg_900000601),%l7
968                       .L900000612:
969/* 0x0148	 225 */		prefetch	[%l1],22
970/* 0x014c	     */		prefetch	[%l1+64],22
971/* 0x0150	     */		sra	%i4,0,%l6
972/* 0x0154	 226 */		sethi	%hi(___const_seg_900000601+8),%l2
973/* 0x0158	 225 */		prefetch	[%l1+128],22
974/* 0x015c	     */		add	%l6,-2,%l5
975/* 0x0160	     */		sub	%i3,3,%i0
976/* 0x0164	     */		prefetch	[%l1+192],22
977/* 0x0168	     */		sllx	%l5,3,%o4
978/* 0x016c	 228 */		add	%i5,1,%i5
979/* 0x0170	 225 */		add	%i1,%o4,%o3
980/* 0x0174	     */		or	%g0,%i3,%g1
981/* 0x0178	     */		ld	[%i2],%l4
982/* 0x017c	     */		prefetch	[%o3+16],22
983/* 0x0180	     */		add	%o3,16,%l3
984/* 0x0184	 228 */		add	%i2,4,%i2
985/* 0x0188	 225 */		prefetch	[%o3+80],22
986/* 0x018c	 228 */		srl	%l4,16,%o1
987/* 0x0190	 227 */		and	%l4,%l0,%o0
988/* 0x0194	 225 */		prefetch	[%o3+144],22
989/* 0x0198	 228 */		st	%o1,[%sp+2271]
990/* 0x019c	 227 */		st	%o0,[%sp+2239]
991/* 0x01a0	 226 */		ldd	[%l7+%lo(___const_seg_900000601)],%f32
992/* 0x01a4	 228 */		ld	[%l2+%lo(___const_seg_900000601+8)],%f0
993/* 0x01a8	 225 */		prefetch	[%o3+208],22
994/* 0x01ac	     */		prefetch	[%o3+272],22
995/* 0x01b0	     */		prefetch	[%o3+336],22
996                       .L900000610:
997/* 0x01b4	 225 */		prefetch	[%l1+192],22
998/* 0x01b8	 228 */		add	%i5,4,%i5
999/* 0x01bc	 225 */		add	%l3,64,%l3
1000/* 0x01c0	 227 */		ld	[%l2+%lo(___const_seg_900000601+8)],%f8
1001/* 0x01c4	 228 */		cmp	%i5,%i0
1002/* 0x01c8	 225 */		ld	[%i2],%g5
1003/* 0x01cc	 228 */		add	%i2,16,%i2
1004/* 0x01d0	     */		add	%l1,32,%l1
1005/* 0x01d4	     */		add	%i4,8,%i4
1006/* 0x01d8	 226 */		ld	[%i2-20],%f7
1007/* 0x01dc	 228 */		srl	%g5,16,%i3
1008/* 0x01e0	 226 */		fmovs	%f8,%f6
1009/* 0x01e4	 228 */		st	%i3,[%sp+2335]
1010/* 0x01e8	 227 */		and	%g5,%l0,%g4
1011/* 0x01ec	     */		st	%g4,[%sp+2303]
1012/* 0x01f0	 226 */		fsubd	%f6,%f32,%f40
1013/* 0x01f4	 227 */		ld	[%sp+2239],%f9
1014/* 0x01f8	 228 */		ld	[%sp+2271],%f1
1015/* 0x01fc	     */		fmovs	%f8,%f12
1016/* 0x0200	 226 */		std	%f40,[%l1-32]
1017/* 0x0204	 227 */		fsubd	%f8,%f32,%f42
1018/* 0x0208	     */		std	%f42,[%l3-64]
1019/* 0x020c	 228 */		fsubd	%f0,%f32,%f44
1020/* 0x0210	     */		std	%f44,[%l3-56]
1021/* 0x0214	 227 */		fmovs	%f12,%f10
1022/* 0x0218	 225 */		ld	[%i2-12],%g2
1023/* 0x021c	 226 */		ld	[%i2-16],%f1
1024/* 0x0220	 228 */		srl	%g2,16,%g3
1025/* 0x0224	 226 */		fmovs	%f12,%f0
1026/* 0x0228	 225 */		prefetch	[%l3+320],22
1027/* 0x022c	 228 */		st	%g3,[%sp+2271]
1028/* 0x0230	 227 */		and	%g2,%l0,%l6
1029/* 0x0234	     */		st	%l6,[%sp+2239]
1030/* 0x0238	 226 */		fsubd	%f0,%f32,%f46
1031/* 0x023c	 227 */		ld	[%sp+2303],%f11
1032/* 0x0240	 228 */		ld	[%sp+2335],%f13
1033/* 0x0244	     */		fmovs	%f12,%f18
1034/* 0x0248	 226 */		std	%f46,[%l1-24]
1035/* 0x024c	 227 */		fsubd	%f10,%f32,%f48
1036/* 0x0250	     */		std	%f48,[%l3-48]
1037/* 0x0254	 228 */		fsubd	%f12,%f32,%f50
1038/* 0x0258	     */		std	%f50,[%l3-40]
1039/* 0x025c	 227 */		fmovs	%f18,%f16
1040/* 0x0260	 225 */		ld	[%i2-8],%o5
1041/* 0x0264	 226 */		ld	[%i2-12],%f15
1042/* 0x0268	 228 */		srl	%o5,16,%l5
1043/* 0x026c	 226 */		fmovs	%f18,%f14
1044/* 0x0270	 228 */		st	%l5,[%sp+2335]
1045/* 0x0274	 227 */		and	%o5,%l0,%o4
1046/* 0x0278	     */		st	%o4,[%sp+2303]
1047/* 0x027c	 226 */		fsubd	%f14,%f32,%f52
1048/* 0x0280	 227 */		ld	[%sp+2239],%f17
1049/* 0x0284	 228 */		ld	[%sp+2271],%f19
1050/* 0x0288	 225 */		prefetch	[%l3+352],22
1051/* 0x028c	 228 */		fmovs	%f18,%f24
1052/* 0x0290	 226 */		std	%f52,[%l1-16]
1053/* 0x0294	 227 */		fsubd	%f16,%f32,%f54
1054/* 0x0298	     */		std	%f54,[%l3-32]
1055/* 0x029c	 228 */		fsubd	%f18,%f32,%f56
1056/* 0x02a0	     */		std	%f56,[%l3-24]
1057/* 0x02a4	 227 */		fmovs	%f24,%f22
1058/* 0x02a8	 225 */		ld	[%i2-4],%l4
1059/* 0x02ac	 226 */		ld	[%i2-8],%f21
1060/* 0x02b0	 228 */		srl	%l4,16,%o3
1061/* 0x02b4	 226 */		fmovs	%f24,%f20
1062/* 0x02b8	 228 */		st	%o3,[%sp+2271]
1063/* 0x02bc	 227 */		and	%l4,%l0,%o2
1064/* 0x02c0	     */		st	%o2,[%sp+2239]
1065/* 0x02c4	 226 */		fsubd	%f20,%f32,%f58
1066/* 0x02c8	 227 */		ld	[%sp+2303],%f23
1067/* 0x02cc	 228 */		ld	[%sp+2335],%f25
1068/* 0x02d0	     */		fmovs	%f24,%f0
1069/* 0x02d4	 226 */		std	%f58,[%l1-8]
1070/* 0x02d8	 227 */		fsubd	%f22,%f32,%f60
1071/* 0x02dc	     */		std	%f60,[%l3-16]
1072/* 0x02e0	 228 */		fsubd	%f24,%f32,%f62
1073/* 0x02e4	     */		bl,pt	%icc,.L900000610
1074/* 0x02e8	     */		std	%f62,[%l3-8]
1075                       .L900000613:
1076/* 0x02ec	 227 */		ld	[%l2+%lo(___const_seg_900000601+8)],%f4
1077/* 0x02f0	 228 */		add	%l1,8,%l1
1078/* 0x02f4	     */		cmp	%i5,%g1
1079/* 0x02f8	 226 */		ld	[%i2-4],%f3
1080/* 0x02fc	 225 */		or	%g0,%g1,%i3
1081/* 0x0300	 228 */		add	%i4,2,%i4
1082/* 0x0304	 227 */		ld	[%sp+2239],%f5
1083/* 0x0308	 226 */		fmovs	%f4,%f2
1084/* 0x030c	 228 */		ld	[%sp+2271],%f1
1085/* 0x0310	 226 */		fsubd	%f2,%f32,%f34
1086/* 0x0314	     */		std	%f34,[%l1-8]
1087/* 0x0318	 227 */		fsubd	%f4,%f32,%f36
1088/* 0x031c	     */		std	%f36,[%l3]
1089/* 0x0320	 228 */		fsubd	%f0,%f32,%f38
1090/* 0x0324	     */		bge,pn	%icc,.L77000294
1091/* 0x0328	     */		std	%f38,[%l3+8]
1092                       .L77000291:
1093/* 0x032c	 225 */		ld	[%i2],%o2
1094                       .L900000614:
1095/* 0x0330	 226 */		ldd	[%l7+%lo(___const_seg_900000601)],%f32
1096/* 0x0334	 228 */		srl	%o2,16,%l3
1097/* 0x0338	 227 */		sra	%i4,0,%i0
1098/* 0x033c	 228 */		st	%l3,[%sp+2367]
1099/* 0x0340	 227 */		and	%o2,%l0,%g1
1100/* 0x0344	 226 */		sethi	%hi(___const_seg_900000601+8),%l2
1101/* 0x0348	 227 */		st	%g1,[%sp+2399]
1102/* 0x034c	     */		sllx	%i0,3,%o0
1103/* 0x0350	 228 */		add	%i4,1,%l4
1104/* 0x0354	 226 */		ld	[%l2+%lo(___const_seg_900000601+8)],%f4
1105/* 0x0358	 228 */		sra	%l4,0,%o1
1106/* 0x035c	     */		add	%i5,1,%i5
1107/* 0x0360	 226 */		ld	[%i2],%f5
1108/* 0x0364	 228 */		sllx	%o1,3,%g5
1109/* 0x0368	     */		cmp	%i5,%i3
1110/* 0x036c	     */		ld	[%sp+2367],%f9
1111/* 0x0370	     */		add	%i2,4,%i2
1112/* 0x0374	     */		add	%i4,2,%i4
1113/* 0x0378	 227 */		fmovs	%f4,%f6
1114/* 0x037c	 226 */		fsubd	%f4,%f32,%f44
1115/* 0x0380	     */		std	%f44,[%l1]
1116/* 0x0384	 227 */		ld	[%sp+2399],%f7
1117/* 0x0388	 228 */		fmovs	%f6,%f8
1118/* 0x038c	     */		add	%l1,8,%l1
1119/* 0x0390	     */		fsubd	%f8,%f32,%f48
1120/* 0x0394	 227 */		fsubd	%f6,%f32,%f46
1121/* 0x0398	     */		std	%f46,[%i1+%o0]
1122/* 0x039c	 228 */		std	%f48,[%i1+%g5]
1123/* 0x03a0	     */		bl,a,pt	%icc,.L900000614
1124/* 0x03a4	 225 */		ld	[%i2],%o2
1125                       .L77000294:
1126/* 0x03a8	 222 */		ret	! Result =
1127/* 0x03ac	     */		restore	%g0,%g0,%g0
1128/* 0x03b0	   0 */		.type	conv_i32_to_d32_and_d16,2
1129/* 0x03b0	   0 */		.size	conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
1130
1131	.section	".text",#alloc,#execinstr
1132/* 000000	   0 */		.align	32
1133
1134!  229		      !	}
1135!  230		      !}
1136!  232		      !extern long long c1, c2, c3, c4;
1137!  234		      !static void
1138!  235		      !adjust_montf_result(uint32_t *i32, uint32_t *nint, int len)
1139!  236		      !{
1140
1141!
1142! SUBROUTINE adjust_montf_result
1143!
1144! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
1145
1146                       adjust_montf_result:
1147/* 000000	 236 */		sra	%o2,0,%g2
1148/* 0x0004	     */		or	%g0,%o0,%o4
1149
1150!  237		      !	int64_t acc;
1151!  238		      !	int i;
1152!  240		      !	if (i32[len] > 0) {
1153
1154/* 0x0008	 240 */		sllx	%g2,2,%g3
1155/* 0x000c	     */		ld	[%o0+%g3],%o0
1156/* 0x0010	     */		cmp	%o0,0
1157/* 0x0014	     */		bleu,pn	%icc,.L77000316
1158/* 0x0018	 236 */		or	%g0,%o1,%o5
1159
1160!  241		      !		i = -1;
1161
1162                       .L77000315:
1163/* 0x001c	 241 */		sub	%g2,1,%g3
1164/* 0x0020	     */		ba	.L900000712
1165/* 0x0024	 249 */		cmp	%g2,0
1166
1167!  242		      !	} else {
1168!  243		      !		for (i = len - 1; i >= 0; i--) {
1169
1170                       .L77000316:
1171/* 0x0028	 243 */		subcc	%g2,1,%g3
1172/* 0x002c	     */		bneg,pn	%icc,.L77000340
1173/* 0x0030	     */		or	%g0,%g3,%o3
1174                       .L77000348:
1175/* 0x0034	 243 */		sra	%g3,0,%o1
1176/* 0x0038	     */		sllx	%o1,2,%g1
1177
1178!  244		      !			if (i32[i] != nint[i]) break;
1179
1180/* 0x003c	 244 */		ld	[%g1+%o5],%g4
1181/* 0x0040	 243 */		add	%g1,%o4,%o2
1182/* 0x0044	     */		add	%g1,%o5,%o1
1183                       .L900000713:
1184/* 0x0048	 244 */		ld	[%o2],%o0
1185/* 0x004c	     */		cmp	%o0,%g4
1186/* 0x0050	     */		bne,pn	%icc,.L77000324
1187/* 0x0054	     */		sub	%o2,4,%o2
1188                       .L77000320:
1189/* 0x0058	 244 */		sub	%o1,4,%o1
1190/* 0x005c	     */		subcc	%o3,1,%o3
1191/* 0x0060	     */		bpos,a,pt	%icc,.L900000713
1192/* 0x0064	     */		ld	[%o1],%g4
1193                       .L900000706:
1194/* 0x0068	 244 */		ba	.L900000712
1195/* 0x006c	 249 */		cmp	%g2,0
1196                       .L77000324:
1197/* 0x0070	 244 */		sra	%o3,0,%o0
1198/* 0x0074	     */		sllx	%o0,2,%g1
1199/* 0x0078	     */		ld	[%o5+%g1],%o3
1200/* 0x007c	     */		ld	[%o4+%g1],%g5
1201/* 0x0080	     */		cmp	%g5,%o3
1202/* 0x0084	     */		bleu,pt	%icc,.L77000332
1203/* 0x0088	     */		nop
1204
1205!  245		      !		}
1206!  246		      !	}
1207!  247		      !	if ((i < 0) || (i32[i] > nint[i])) {
1208!  248		      !		acc = 0;
1209!  249		      !		for (i = 0; i < len; i++) {
1210
1211                       .L77000340:
1212/* 0x008c	 249 */		cmp	%g2,0
1213                       .L900000712:
1214/* 0x0090	 249 */		ble,pn	%icc,.L77000332
1215/* 0x0094	 250 */		or	%g0,%g2,%o3
1216                       .L77000347:
1217/* 0x0098	 249 */		or	%g0,0,%o0
1218
1219!  250		      !			acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
1220
1221/* 0x009c	 250 */		cmp	%o3,10
1222/* 0x00a0	     */		bl,pn	%icc,.L77000341
1223/* 0x00a4	 249 */		or	%g0,0,%g2
1224                       .L900000709:
1225/* 0x00a8	 250 */		prefetch	[%o4],22
1226/* 0x00ac	     */		prefetch	[%o4+64],22
1227
1228!  251		      !			i32[i] = acc & 0xffffffff;
1229!  252		      !			acc = acc >> 32;
1230
1231/* 0x00b0	 252 */		add	%o5,4,%o1
1232/* 0x00b4	     */		add	%o4,8,%o2
1233/* 0x00b8	 250 */		prefetch	[%o4+128],22
1234/* 0x00bc	     */		sub	%o3,8,%o5
1235/* 0x00c0	     */		or	%g0,2,%o0
1236/* 0x00c4	     */		prefetch	[%o4+192],22
1237/* 0x00c8	     */		prefetch	[%o4+256],22
1238/* 0x00cc	     */		prefetch	[%o4+320],22
1239/* 0x00d0	     */		prefetch	[%o4+384],22
1240/* 0x00d4	     */		ld	[%o2-4],%g5
1241/* 0x00d8	     */		prefetch	[%o2+440],22
1242/* 0x00dc	     */		prefetch	[%o2+504],22
1243/* 0x00e0	     */		ld	[%o4],%g4
1244/* 0x00e4	     */		ld	[%o1-4],%o4
1245/* 0x00e8	     */		sub	%g4,%o4,%o3
1246/* 0x00ec	 251 */		st	%o3,[%o2-8]
1247/* 0x00f0	 252 */		srax	%o3,32,%g4
1248                       .L900000707:
1249/* 0x00f4	 252 */		add	%o0,8,%o0
1250/* 0x00f8	     */		add	%o2,32,%o2
1251/* 0x00fc	 250 */		ld	[%o1],%g1
1252/* 0x0100	     */		prefetch	[%o2+496],22
1253/* 0x0104	 252 */		cmp	%o0,%o5
1254/* 0x0108	     */		add	%o1,32,%o1
1255/* 0x010c	 250 */		sub	%g5,%g1,%g5
1256/* 0x0110	     */		add	%g5,%g4,%o4
1257/* 0x0114	     */		ld	[%o2-32],%g4
1258/* 0x0118	 251 */		st	%o4,[%o2-36]
1259/* 0x011c	 252 */		srax	%o4,32,%g1
1260/* 0x0120	 250 */		ld	[%o1-28],%o3
1261/* 0x0124	     */		sub	%g4,%o3,%g2
1262/* 0x0128	     */		add	%g2,%g1,%g5
1263/* 0x012c	     */		ld	[%o2-28],%o3
1264/* 0x0130	 251 */		st	%g5,[%o2-32]
1265/* 0x0134	 252 */		srax	%g5,32,%g4
1266/* 0x0138	 250 */		ld	[%o1-24],%o4
1267/* 0x013c	     */		sub	%o3,%o4,%g1
1268/* 0x0140	     */		add	%g1,%g4,%g2
1269/* 0x0144	     */		ld	[%o2-24],%o3
1270/* 0x0148	 251 */		st	%g2,[%o2-28]
1271/* 0x014c	 252 */		srax	%g2,32,%g5
1272/* 0x0150	 250 */		ld	[%o1-20],%o4
1273/* 0x0154	     */		sub	%o3,%o4,%g4
1274/* 0x0158	     */		add	%g4,%g5,%g1
1275/* 0x015c	     */		ld	[%o2-20],%o4
1276/* 0x0160	 251 */		st	%g1,[%o2-24]
1277/* 0x0164	 252 */		srax	%g1,32,%o3
1278/* 0x0168	 250 */		ld	[%o1-16],%g2
1279/* 0x016c	     */		sub	%o4,%g2,%g5
1280/* 0x0170	     */		add	%g5,%o3,%g1
1281/* 0x0174	     */		ld	[%o2-16],%g4
1282/* 0x0178	 251 */		st	%g1,[%o2-20]
1283/* 0x017c	 252 */		srax	%g1,32,%o4
1284/* 0x0180	 250 */		ld	[%o1-12],%g2
1285/* 0x0184	     */		sub	%g4,%g2,%o3
1286/* 0x0188	     */		add	%o3,%o4,%g5
1287/* 0x018c	     */		ld	[%o2-12],%g2
1288/* 0x0190	 251 */		st	%g5,[%o2-16]
1289/* 0x0194	 252 */		srax	%g5,32,%g4
1290/* 0x0198	 250 */		ld	[%o1-8],%g1
1291/* 0x019c	     */		sub	%g2,%g1,%o4
1292/* 0x01a0	     */		add	%o4,%g4,%o3
1293/* 0x01a4	     */		ld	[%o2-8],%g2
1294/* 0x01a8	 251 */		st	%o3,[%o2-12]
1295/* 0x01ac	 252 */		srax	%o3,32,%g5
1296/* 0x01b0	 250 */		ld	[%o1-4],%g1
1297/* 0x01b4	     */		sub	%g2,%g1,%g4
1298/* 0x01b8	     */		add	%g4,%g5,%o4
1299/* 0x01bc	     */		ld	[%o2-4],%g5
1300/* 0x01c0	 251 */		st	%o4,[%o2-8]
1301/* 0x01c4	 252 */		ble,pt	%icc,.L900000707
1302/* 0x01c8	     */		srax	%o4,32,%g4
1303                       .L900000710:
1304/* 0x01cc	 250 */		ld	[%o1],%o3
1305/* 0x01d0	 252 */		add	%o1,4,%o5
1306/* 0x01d4	 250 */		or	%g0,%o2,%o4
1307/* 0x01d8	 252 */		cmp	%o0,%g3
1308/* 0x01dc	 250 */		sub	%g5,%o3,%g2
1309/* 0x01e0	     */		add	%g2,%g4,%g1
1310/* 0x01e4	 251 */		st	%g1,[%o2-4]
1311/* 0x01e8	 252 */		bg,pn	%icc,.L77000332
1312/* 0x01ec	     */		srax	%g1,32,%g2
1313                       .L77000341:
1314/* 0x01f0	 250 */		ld	[%o4],%g5
1315                       .L900000711:
1316/* 0x01f4	 250 */		ld	[%o5],%o2
1317/* 0x01f8	     */		add	%g2,%g5,%g4
1318/* 0x01fc	 252 */		add	%o0,1,%o0
1319/* 0x0200	     */		cmp	%o0,%g3
1320/* 0x0204	     */		add	%o5,4,%o5
1321/* 0x0208	 250 */		sub	%g4,%o2,%o1
1322/* 0x020c	 251 */		st	%o1,[%o4]
1323/* 0x0210	 252 */		srax	%o1,32,%g2
1324/* 0x0214	     */		add	%o4,4,%o4
1325/* 0x0218	     */		ble,a,pt	%icc,.L900000711
1326/* 0x021c	 250 */		ld	[%o4],%g5
1327                       .L77000332:
1328/* 0x0220	 252 */		retl	! Result =
1329/* 0x0224	     */		nop
1330/* 0x0228	   0 */		.type	adjust_montf_result,2
1331/* 0x0228	   0 */		.size	adjust_montf_result,(.-adjust_montf_result)
1332
1333	.section	".text",#alloc,#execinstr
1334/* 000000	   0 */		.align	32
1335
1336!  253		      !		}
1337!  254		      !	}
1338!  255		      !}
1339!  257		      !/*************
1340!  258		      !static void
1341!  259		      !adjust_montf_result_bad(uint32_t *i32, uint32_t *nint, int len)
1342!  260		      !{
1343!  261		      !	int64_t acc;
1344!  262		      !	int i;
1345!  264		      !	c4++;
1346!  265		      !
1347!  266		      !	if (i32[len] > 0) {
1348!  267		      !		i = -1;
1349!  268		      !		c1++;
1350!  269		      !	} else {
1351!  270		      !		for (i = len - 1; i >= 0; i++) {
1352!  271		      !			if (i32[i] != nint[i]) break;
1353!  272		      !			c2++;
1354!  273		      !		}
1355!  274		      !	}
1356!  275		      !	if ((i < 0) || (i32[i] > nint[i])) {
1357!  276		      !		c3++;
1358!  277		      !		acc = 0;
1359!  278		      !		for (i = 0; i < len; i++) {
1360!  279		      !			acc = acc + (uint64_t)(i32[i]) - (uint64_t)(nint[i]);
1361!  280		      !			i32[i] = acc & 0xffffffff;
1362!  281		      !			acc = acc >> 32;
1363!  282		      !		}
1364!  283		      !	}
1365!  284		      !}
1366!  285		      !uint32_t saveresult[1000];
1367!  286		      !void printarray(char *name, uint32_t *arr, int len)
1368!  287		      !{
1369!  288		      !	int i, j;
1370!  289		      !	uint64_t tmp;
1371!  291		      !	printf("uint64_t %s[%d] =\n{\n",name,(len+1)/2);
1372!  292		      !	for(i=j=0; i<len; i+=2,j+=2){
1373!  293		      !		if(j == 6){
1374!  294		      !			printf("\n");
1375!  295		      !			j=0;
1376!  296		      !		}
1377!  297		      !		tmp = (((uint64_t)arr[i])<<32) | ((uint64_t)arr[i+1]);
1378!  298		      !		printf("0x%016llx",tmp);
1379!  299		      !		if((i/2)!=(((len+1)/2)-1))printf(",");
1380!  300		      !		if(j!=4)printf(" ");
1381!  301		      !	}
1382!  302		      !	if(j!=0) printf("\n");
1383!  303		      !	printf("};\n");
1384!  304		      !}
1385!  305		      !**************/
1386!  308		      !/*
1387!  309		      ! * the lengths of the input arrays should be at least the following:
1388!  310		      ! * result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
1389!  311		      ! * all of them should be different from one another
1390!  312		      ! */
1391!  313		      !void mont_mulf_noconv(uint32_t *result,
1392!  314		      !			double *dm1, double *dm2, double *dt,
1393!  315		      !			double *dn, uint32_t *nint,
1394!  316		      !			int nlen, double dn0)
1395!  317		      !{
1396
1397!
1398! SUBROUTINE mont_mulf_noconv
1399!
1400! OFFSET    SOURCE LINE	LABEL	INSTRUCTION
1401
1402			.global mont_mulf_noconv
1403                       mont_mulf_noconv:
1404/* 000000	 317 */		save	%sp,-176,%sp
1405/* 0x0004	     */		ldx	[%fp+2223],%g1
1406/* 0x0008	   0 */		sethi	%hi(Zero),%l5
1407/* 0x000c	 317 */		or	%g0,%i2,%l0
1408
1409!  318		      !	int i, j, jj;
1410!  319		      !	double digit, m2j, a, b;
1411!  320		      !	double *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
1412!  322		      !	pdm1 = &(dm1[0]);
1413!  323		      !	pdm2 = &(dm2[0]);
1414!  324		      !	pdn = &(dn[0]);
1415!  325		      !	pdm2[2 * nlen] = Zero;
1416
1417/* 0x0010	 325 */		ldd	[%l5+%lo(Zero)],%f0
1418/* 0x0014	 317 */		or	%g0,%i0,%i2
1419/* 0x0018	 325 */		sll	%g1,1,%o3
1420
1421!  327		      !	if (nlen != 16) {
1422
1423/* 0x001c	 327 */		cmp	%g1,16
1424/* 0x0020	 325 */		sra	%o3,0,%i0
1425/* 0x0024	     */		sllx	%i0,3,%o0
1426/* 0x0028	 317 */		or	%g0,%i5,%i0
1427/* 0x002c	 327 */		bne,pn	%icc,.L77000476
1428/* 0x0030	 325 */		std	%f0,[%l0+%o0]
1429                       .L77000488:
1430/* 0x0034	   0 */		sethi	%hi(TwoToMinus16),%o2
1431/* 0x0038	   0 */		sethi	%hi(TwoTo16),%l3
1432
1433!  328		      !		for (i = 0; i < 4 * nlen + 2; i++)
1434!  329		      !			dt[i] = Zero;
1435!  330		      !		a = dt[0] = pdm1[0] * pdm2[0];
1436!  331		      !		digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
1437!  333		      !		pdtj = &(dt[0]);
1438!  334		      !		for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) {
1439!  335		      !			m2j = pdm2[j];
1440!  336		      !			a = pdtj[0] + pdn[0] * digit;
1441!  337		      !			b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16;
1442!  338		      !			pdtj[1] = b;
1443!  340		      !#pragma pipeloop(0)
1444!  341		      !			for (i = 1; i < nlen; i++) {
1445!  342		      !				pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit;
1446!  343		      !			}
1447!  344		      !			if (jj == 15) {
1448!  345		      !				cleanup(dt, j / 2 + 1, 2 * nlen + 1);
1449!  346		      !				jj = 0;
1450!  347		      !			}
1451!  349		      !			digit = mod(lower32(b, Zero) * dn0,
1452!  350		      !				    TwoToMinus16, TwoTo16);
1453!  351		      !		}
1454!  352		      !	} else {
1455!  353		      !		a = dt[0] = pdm1[0] * pdm2[0];
1456
1457/* 0x003c	 353 */		ldd	[%i1],%f40
1458
1459!  355		      !		dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] =
1460!  356		      !			dt[59] = dt[58] = dt[57] = dt[56] = dt[55] =
1461!  357		      !			dt[54] = dt[53] = dt[52] = dt[51] = dt[50] =
1462!  358		      !			dt[49] = dt[48] = dt[47] = dt[46] = dt[45] =
1463!  359		      !			dt[44] = dt[43] = dt[42] = dt[41] = dt[40] =
1464!  360		      !			dt[39] = dt[38] = dt[37] = dt[36] = dt[35] =
1465!  361		      !			dt[34] = dt[33] = dt[32] = dt[31] = dt[30] =
1466!  362		      !			dt[29] = dt[28] = dt[27] = dt[26] = dt[25] =
1467!  363		      !			dt[24] = dt[23] = dt[22] = dt[21] = dt[20] =
1468!  364		      !			dt[19] = dt[18] = dt[17] = dt[16] = dt[15] =
1469!  365		      !			dt[14] = dt[13] = dt[12] = dt[11] = dt[10] =
1470!  366		      !			dt[9] = dt[8] = dt[7] = dt[6] = dt[5] = dt[4] =
1471!  367		      !			dt[3] = dt[2] = dt[1] = Zero;
1472!  369		      !		pdn_0 = pdn[0];
1473!  370		      !		pdm1_0 = pdm1[0];
1474!  372		      !		digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
1475!  373		      !		pdtj = &(dt[0]);
1476
1477/* 0x0040	 373 */		or	%g0,%i3,%o3
1478
1479!  375		      !		for (j = 0; j < 32; j++, pdtj++) {
1480
1481/* 0x0044	 375 */		or	%g0,0,%l1
1482/* 0x0048	 353 */		ldd	[%l0],%f42
1483/* 0x004c	 372 */		ldd	[%o2+%lo(TwoToMinus16)],%f44
1484/* 0x0050	     */		ldd	[%l3+%lo(TwoTo16)],%f46
1485/* 0x0054	 367 */		std	%f0,[%i3+8]
1486/* 0x0058	 353 */		fmuld	%f40,%f42,%f38
1487/* 0x005c	     */		std	%f38,[%i3]
1488/* 0x0060	 367 */		std	%f0,[%i3+16]
1489/* 0x0064	     */		std	%f0,[%i3+24]
1490/* 0x0068	     */		std	%f0,[%i3+32]
1491/* 0x006c	 372 */		fdtox	%f38,%f4
1492/* 0x0070	 367 */		std	%f0,[%i3+40]
1493/* 0x0074	     */		std	%f0,[%i3+48]
1494/* 0x0078	     */		std	%f0,[%i3+56]
1495/* 0x007c	 372 */		fmovs	%f0,%f4
1496/* 0x0080	 367 */		std	%f0,[%i3+64]
1497/* 0x0084	     */		std	%f0,[%i3+72]
1498/* 0x0088	 372 */		fxtod	%f4,%f52
1499/* 0x008c	 367 */		std	%f0,[%i3+80]
1500/* 0x0090	     */		std	%f0,[%i3+88]
1501/* 0x0094	     */		std	%f0,[%i3+96]
1502/* 0x0098	     */		std	%f0,[%i3+104]
1503/* 0x009c	 372 */		fmuld	%f52,%f14,%f60
1504/* 0x00a0	 367 */		std	%f0,[%i3+112]
1505/* 0x00a4	     */		std	%f0,[%i3+120]
1506/* 0x00a8	     */		std	%f0,[%i3+128]
1507/* 0x00ac	     */		std	%f0,[%i3+136]
1508/* 0x00b0	 372 */		fmuld	%f60,%f44,%f62
1509/* 0x00b4	 367 */		std	%f0,[%i3+144]
1510/* 0x00b8	     */		std	%f0,[%i3+152]
1511/* 0x00bc	     */		std	%f0,[%i3+160]
1512/* 0x00c0	     */		std	%f0,[%i3+168]
1513/* 0x00c4	 372 */		fdtox	%f62,%f32
1514/* 0x00c8	 367 */		std	%f0,[%i3+176]
1515/* 0x00cc	     */		std	%f0,[%i3+184]
1516/* 0x00d0	     */		std	%f0,[%i3+192]
1517/* 0x00d4	     */		std	%f0,[%i3+200]
1518/* 0x00d8	 372 */		fxtod	%f32,%f50
1519/* 0x00dc	 367 */		std	%f0,[%i3+208]
1520/* 0x00e0	     */		std	%f0,[%i3+216]
1521/* 0x00e4	     */		std	%f0,[%i3+224]
1522/* 0x00e8	     */		std	%f0,[%i3+232]
1523/* 0x00ec	 372 */		fmuld	%f50,%f46,%f34
1524/* 0x00f0	 367 */		std	%f0,[%i3+240]
1525/* 0x00f4	     */		std	%f0,[%i3+248]
1526/* 0x00f8	     */		std	%f0,[%i3+256]
1527/* 0x00fc	     */		std	%f0,[%i3+264]
1528/* 0x0100	 372 */		fsubd	%f60,%f34,%f40
1529/* 0x0104	 367 */		std	%f0,[%i3+272]
1530/* 0x0108	     */		std	%f0,[%i3+280]
1531/* 0x010c	     */		std	%f0,[%i3+288]
1532/* 0x0110	     */		std	%f0,[%i3+296]
1533/* 0x0114	     */		std	%f0,[%i3+304]
1534/* 0x0118	     */		std	%f0,[%i3+312]
1535/* 0x011c	     */		std	%f0,[%i3+320]
1536/* 0x0120	     */		std	%f0,[%i3+328]
1537/* 0x0124	     */		std	%f0,[%i3+336]
1538/* 0x0128	     */		std	%f0,[%i3+344]
1539/* 0x012c	     */		std	%f0,[%i3+352]
1540/* 0x0130	     */		std	%f0,[%i3+360]
1541/* 0x0134	     */		std	%f0,[%i3+368]
1542/* 0x0138	 375 */		sub	%g1,1,%l3
1543/* 0x013c	     */		add	%i3,8,%o7
1544/* 0x0140	 367 */		std	%f0,[%i3+376]
1545/* 0x0144	     */		std	%f0,[%i3+384]
1546/* 0x0148	     */		std	%f0,[%i3+392]
1547/* 0x014c	     */		std	%f0,[%i3+400]
1548/* 0x0150	     */		std	%f0,[%i3+408]
1549/* 0x0154	     */		std	%f0,[%i3+416]
1550/* 0x0158	     */		std	%f0,[%i3+424]
1551/* 0x015c	     */		std	%f0,[%i3+432]
1552/* 0x0160	     */		std	%f0,[%i3+440]
1553/* 0x0164	     */		std	%f0,[%i3+448]
1554/* 0x0168	     */		std	%f0,[%i3+456]
1555/* 0x016c	     */		std	%f0,[%i3+464]
1556/* 0x0170	     */		std	%f0,[%i3+472]
1557/* 0x0174	     */		std	%f0,[%i3+480]
1558/* 0x0178	     */		std	%f0,[%i3+488]
1559/* 0x017c	     */		std	%f0,[%i3+496]
1560/* 0x0180	     */		std	%f0,[%i3+504]
1561/* 0x0184	     */		std	%f0,[%i3+512]
1562/* 0x0188	     */		std	%f0,[%i3+520]
1563
1564!BEGIN HAND CODED PART
1565
1566! cheetah schedule, no even-odd trick
1567
1568
1569	add	%i3,%g0,%o5
1570
1571	fmovd	%f40,%f0
1572	fmovd	%f14,%f2
1573	fmovd	%f44,%f8
1574	sethi	%hi(TwoTo32),%l5
1575	fmovd	%f46,%f10
1576	sethi	%hi(TwoToMinus32),%g5
1577	ldd	[%i3],%f6
1578	ldd	[%l0],%f4
1579
1580	ldd	[%i1],%f40
1581	ldd	[%i1+8],%f42
1582	ldd	[%i1+16],%f52
1583	ldd	[%i1+48],%f54
1584	ldd	[%i1+56],%f36
1585	ldd	[%i1+64],%f56
1586	ldd	[%i1+104],%f48
1587	ldd	[%i1+112],%f58
1588
1589	ldd	[%i4],%f44
1590	ldd	[%i4+8],%f46
1591	ldd	[%i4+104],%f50
1592	ldd	[%i4+112],%f60
1593
1594
1595	.L99999999:
1596!1
1597	ldd	[%i1+24],%f20
1598	fmuld	%f0,%f44,%f12
1599!2
1600	ldd	[%i4+24],%f22
1601	fmuld	%f42,%f4,%f16
1602!3
1603	ldd	[%i1+40],%f24
1604	fmuld	%f46,%f0,%f18
1605!4
1606	ldd	[%i4+40],%f26
1607	fmuld	%f20,%f4,%f20
1608!5
1609	ldd	[%l0+8],%f38
1610	faddd	%f12,%f6,%f12
1611	fmuld	%f22,%f0,%f22
1612!6
1613	add	%l0,8,%l0
1614	ldd	[%i4+56],%f30
1615	fmuld	%f24,%f4,%f24
1616!7
1617	ldd	[%i1+72],%f32
1618	faddd	%f16,%f18,%f16
1619	fmuld	%f26,%f0,%f26
1620!8
1621	ldd	[%i3+16],%f18
1622	fmuld	%f40,%f38,%f14
1623!9
1624	ldd	[%i4+72],%f34
1625	faddd	%f20,%f22,%f20
1626	fmuld	%f8,%f12,%f12
1627!10
1628	ldd	[%i3+48],%f22
1629	fmuld	%f36,%f4,%f28
1630!11
1631	ldd	[%i3+8],%f6
1632	faddd	%f16,%f18,%f16
1633	fmuld	%f30,%f0,%f30
1634!12
1635	std	%f16,[%i3+16]
1636	faddd	%f24,%f26,%f24
1637	fmuld	%f32,%f4,%f32
1638!13
1639	ldd	[%i3+80],%f26
1640	faddd	%f12,%f14,%f12
1641	fmuld	%f34,%f0,%f34
1642!14
1643	ldd	[%i1+88],%f16
1644	faddd	%f20,%f22,%f20
1645!15
1646	ldd	[%i4+88],%f18
1647	faddd	%f28,%f30,%f28
1648!16
1649	ldd	[%i3+112],%f30
1650	faddd	%f32,%f34,%f32
1651!17
1652	ldd	[%i3+144],%f34
1653	faddd	%f12,%f6,%f6
1654	fmuld	%f16,%f4,%f16
1655!18
1656	std	%f20,[%i3+48]
1657	faddd	%f24,%f26,%f24
1658	fmuld	%f18,%f0,%f18
1659!19
1660	std	%f24,[%i3+80]
1661	faddd	%f28,%f30,%f28
1662	fmuld	%f48,%f4,%f20
1663!20
1664	std	%f28,[%i3+112]
1665	faddd	%f32,%f34,%f32
1666	fmuld	%f50,%f0,%f22
1667!21
1668	ldd	[%i1+120],%f24
1669	fdtox	%f6,%f12
1670!22
1671	std	%f32,[%i3+144]
1672	faddd	%f16,%f18,%f16
1673!23
1674	ldd	[%i4+120],%f26
1675!24
1676	ldd	[%i3+176],%f18
1677	faddd	%f20,%f22,%f20
1678	fmuld	%f24,%f4,%f24
1679!25
1680	ldd	[%i4+16],%f30
1681	fmovs	%f11,%f12
1682!26
1683	ldd	[%i1+32],%f32
1684	fmuld	%f26,%f0,%f26
1685!27
1686	ldd	[%i4+32],%f34
1687	fmuld	%f52,%f4,%f28
1688!28
1689	ldd	[%i3+208],%f22
1690	faddd	%f16,%f18,%f16
1691	fmuld	%f30,%f0,%f30
1692!29
1693	std	%f16,[%i3+176]
1694	fxtod	%f12,%f12
1695	fmuld	%f32,%f4,%f32
1696!30
1697	ldd	[%i4+48],%f18
1698	faddd	%f24,%f26,%f24
1699	fmuld	%f34,%f0,%f34
1700!31
1701	ldd	[%i3+240],%f26
1702	faddd	%f20,%f22,%f20
1703!32
1704	std	%f20,[%i3+208]
1705	faddd	%f28,%f30,%f28
1706	fmuld	%f54,%f4,%f16
1707!33
1708	ldd	[%i3+32],%f30
1709	fmuld	%f12,%f2,%f14
1710!34
1711	ldd	[%i4+64],%f22
1712	faddd	%f32,%f34,%f32
1713	fmuld	%f18,%f0,%f18
1714!35
1715	ldd	[%i3+64],%f34
1716	faddd	%f24,%f26,%f24
1717!36
1718	std	%f24,[%i3+240]
1719	faddd	%f28,%f30,%f28
1720	fmuld	%f56,%f4,%f20
1721!37
1722	std	%f28,[%i3+32]
1723	fmuld	%f14,%f8,%f12
1724!38
1725	ldd	[%i1+80],%f24
1726	faddd	%f32,%f34,%f34	! yes, tmp52!
1727	fmuld	%f22,%f0,%f22
1728!39
1729	ldd	[%i4+80],%f26
1730	faddd	%f16,%f18,%f16
1731!40
1732	ldd	[%i1+96],%f28
1733	fmuld	%f58,%f4,%f32
1734!41
1735	ldd	[%i4+96],%f30
1736	fdtox	%f12,%f12
1737	fmuld	%f24,%f4,%f24
1738!42
1739	std	%f34,[%i3+64]	! yes, tmp52!
1740	faddd	%f20,%f22,%f20
1741	fmuld	%f26,%f0,%f26
1742!43
1743	ldd	[%i3+96],%f18
1744	fmuld	%f28,%f4,%f28
1745!44
1746	ldd	[%i3+128],%f22
1747	fmovd	%f38,%f4
1748	fmuld	%f30,%f0,%f30
1749!45
1750	fxtod	%f12,%f12
1751	fmuld	%f60,%f0,%f34
1752!46
1753	add	%i3,8,%i3
1754	faddd	%f24,%f26,%f24
1755!47
1756	ldd	[%i3+160-8],%f26
1757	faddd	%f16,%f18,%f16
1758!48
1759	std	%f16,[%i3+96-8]
1760	faddd	%f28,%f30,%f28
1761!49
1762	ldd	[%i3+192-8],%f30
1763	faddd	%f32,%f34,%f32
1764	fmuld	%f12,%f10,%f12
1765!50
1766	ldd	[%i3+224-8],%f34
1767	faddd	%f20,%f22,%f20
1768!51
1769	std	%f20,[%i3+128-8]
1770	faddd	%f24,%f26,%f24
1771!52
1772	add	%l1,1,%l1
1773	std	%f24,[%i3+160-8]
1774	faddd	%f28,%f30,%f28
1775!53
1776	cmp	%l1,15
1777	std	%f28,[%i3+192-8]
1778	fsubd	%f14,%f12,%f0
1779!54
1780	faddd	%f32,%f34,%f32
1781	ble,pt	%icc,.L99999999
1782	std	%f32,[%i3+224-8]
1783
1784
1785!
1786	ldd	[%g5+%lo(TwoToMinus32)],%f8
1787!
1788	ldd	[%i3+8],%f16
1789!
1790	ldd	[%i3+16],%f20
1791!
1792	fmuld	%f8,%f16,%f18
1793	ldd	[%i3+24],%f24
1794!
1795	fmuld	%f8,%f20,%f22
1796	ldd	[%i3+32],%f28
1797!
1798	fmuld	%f8,%f24,%f26
1799	ldd	[%l5+%lo(TwoTo32)],%f10
1800!
1801	fmuld	%f8,%f28,%f30
1802!
1803	fdtox	%f18,%f18
1804!
1805	fdtox	%f22,%f22
1806!
1807	fdtox	%f26,%f26
1808	ldd	[%i3+40],%f32
1809!
1810	fdtox	%f30,%f30
1811	ldd	[%i3+48],%f56
1812!
1813	fxtod	%f18,%f18
1814	fmuld	%f8,%f32,%f34
1815	ldd	[%i3+56],%f36
1816!
1817	fxtod	%f22,%f22
1818	fmuld	%f8,%f56,%f58
1819	ldd	[%i3+64],%f38
1820!
1821	fxtod	%f26,%f26
1822	fmuld	%f8,%f36,%f60
1823!
1824	fxtod	%f30,%f30
1825	fmuld	%f8,%f38,%f62
1826!
1827	fdtox	%f34,%f34
1828	fmuld	%f10,%f18,%f40
1829!
1830	fdtox	%f58,%f58
1831	fmuld	%f10,%f22,%f42
1832!
1833	fdtox	%f60,%f60
1834	fmuld	%f10,%f26,%f44
1835!
1836	fdtox	%f62,%f62
1837	fmuld	%f10,%f30,%f46
1838!
1839	fxtod	%f34,%f34
1840!
1841	fxtod	%f58,%f58
1842!
1843	fxtod	%f60,%f60
1844!
1845	fxtod	%f62,%f62
1846!
1847	fsubd	%f16,%f40,%f40
1848	fmuld	%f10,%f34,%f48
1849!
1850	fsubd	%f20,%f42,%f42
1851	fmuld	%f10,%f58,%f50
1852!
1853	fsubd	%f24,%f44,%f44
1854	fmuld	%f10,%f60,%f52
1855!
1856	fsubd	%f28,%f46,%f46
1857	fmuld	%f10,%f62,%f54
1858!
1859	std	%f40,[%i3+8]
1860!
1861	std	%f42,[%i3+16]
1862!
1863	faddd	%f18,%f44,%f44
1864	std	%f44,[%i3+24]
1865!
1866	faddd	%f22,%f46,%f46
1867	std	%f46,[%i3+32]
1868!
1869
1870
1871
1872	fsubd	%f32,%f48,%f48
1873	ldd	[%i3+64+8],%f16
1874!
1875	fsubd	%f56,%f50,%f50
1876	ldd	[%i3+64+16],%f20
1877!
1878	fsubd	%f36,%f52,%f52
1879	ldd	[%i3+64+24],%f24
1880!
1881	fsubd	%f38,%f54,%f54
1882	ldd	[%i3+64+32],%f28
1883!
1884	faddd	%f26,%f48,%f48
1885	fmuld	%f8,%f16,%f18
1886	std	%f48,[%i3+40]
1887!
1888	faddd	%f30,%f50,%f50
1889	fmuld	%f8,%f20,%f22
1890	std	%f50,[%i3+48]
1891!
1892	faddd	%f34,%f52,%f52
1893	fmuld	%f8,%f24,%f26
1894	std	%f52,[%i3+56]
1895!
1896	faddd	%f58,%f54,%f54
1897	fmuld	%f8,%f28,%f30
1898	std	%f54,[%i3+64]
1899!
1900
1901
1902	fdtox	%f18,%f18
1903!
1904	fdtox	%f22,%f22
1905!
1906	fdtox	%f26,%f26
1907	ldd	[%i3+64+40],%f32
1908!
1909	fdtox	%f30,%f30
1910	ldd	[%i3+64+48],%f56
1911!
1912	fxtod	%f18,%f18
1913	fmuld	%f8,%f32,%f34
1914	ldd	[%i3+64+56],%f36
1915!
1916	fxtod	%f22,%f22
1917	fmuld	%f8,%f56,%f58
1918	ldd	[%i3+64+64],%f38
1919!
1920	fxtod	%f26,%f26
1921	fmuld	%f8,%f36,%f12
1922!
1923	fxtod	%f30,%f30
1924	fmuld	%f8,%f38,%f14
1925!
1926	fdtox	%f34,%f34
1927	fmuld	%f10,%f18,%f40
1928!
1929	fdtox	%f58,%f58
1930	fmuld	%f10,%f22,%f42
1931!
1932	fdtox	%f12,%f12
1933	fmuld	%f10,%f26,%f44
1934!
1935	fdtox	%f14,%f14
1936	fmuld	%f10,%f30,%f46
1937!
1938	fxtod	%f34,%f34
1939!
1940	fxtod	%f58,%f58
1941!
1942	fxtod	%f12,%f12
1943!
1944	fxtod	%f14,%f14
1945!
1946	fsubd	%f16,%f40,%f40
1947	fmuld	%f10,%f34,%f48
1948!
1949	fsubd	%f20,%f42,%f42
1950	fmuld	%f10,%f58,%f50
1951!
1952	fsubd	%f24,%f44,%f44
1953	fmuld	%f10,%f12,%f52
1954!
1955	fsubd	%f28,%f46,%f46
1956	fmuld	%f10,%f14,%f54
1957!
1958	faddd	%f60,%f40,%f40
1959	std	%f40,[%i3+64+8]
1960!
1961	faddd	%f62,%f42,%f42
1962	std	%f42,[%i3+64+16]
1963!
1964	faddd	%f18,%f44,%f44
1965	std	%f44,[%i3+64+24]
1966!
1967	faddd	%f22,%f46,%f46
1968	std	%f46,[%i3+64+32]
1969!
1970
1971
1972
1973	fsubd	%f32,%f48,%f48
1974	ldd	[%i3+64+64+8],%f16
1975!
1976	fsubd	%f56,%f50,%f50
1977	ldd	[%i3+64+64+16],%f20
1978!
1979	fsubd	%f36,%f52,%f52
1980	ldd	[%i3+64+64+24],%f24
1981!
1982	fsubd	%f38,%f54,%f54
1983	ldd	[%i3+64+64+32],%f28
1984!
1985	faddd	%f26,%f48,%f48
1986	fmuld	%f8,%f16,%f18
1987	std	%f48,[%i3+64+40]
1988!
1989	faddd	%f30,%f50,%f50
1990	fmuld	%f8,%f20,%f22
1991	std	%f50,[%i3+64+48]
1992!
1993	faddd	%f34,%f52,%f52
1994	fmuld	%f8,%f24,%f26
1995	std	%f52,[%i3+64+56]
1996!
1997	faddd	%f58,%f54,%f54
1998	fmuld	%f8,%f28,%f30
1999	std	%f54,[%i3+64+64]
2000!
2001
2002
2003
2004	fdtox	%f18,%f18
2005!
2006	fdtox	%f22,%f22
2007!
2008	fdtox	%f26,%f26
2009	ldd	[%i3+64+64+40],%f32
2010!
2011	fdtox	%f30,%f30
2012	ldd	[%i3+64+64+48],%f56
2013!
2014	fxtod	%f18,%f18
2015	fmuld	%f8,%f32,%f34
2016	ldd	[%i3+64+64+56],%f36
2017!
2018	fxtod	%f22,%f22
2019	fmuld	%f8,%f56,%f58
2020	ldd	[%i3+64+64+64],%f38
2021!
2022	fxtod	%f26,%f26
2023	fmuld	%f8,%f36,%f60
2024!
2025	fxtod	%f30,%f30
2026	fmuld	%f8,%f38,%f62
2027!
2028	fdtox	%f34,%f34
2029	fmuld	%f10,%f18,%f40
2030!
2031	fdtox	%f58,%f58
2032	fmuld	%f10,%f22,%f42
2033!
2034	fdtox	%f60,%f60
2035	fmuld	%f10,%f26,%f44
2036!
2037	fdtox	%f62,%f62
2038	fmuld	%f10,%f30,%f46
2039!
2040	fxtod	%f34,%f34
2041!
2042	fxtod	%f58,%f58
2043!
2044	fxtod	%f60,%f60
2045!
2046	fxtod	%f62,%f62
2047!
2048	fsubd	%f16,%f40,%f40
2049	fmuld	%f10,%f34,%f48
2050!
2051	fsubd	%f20,%f42,%f42
2052	fmuld	%f10,%f58,%f50
2053!
2054	fsubd	%f24,%f44,%f44
2055	fmuld	%f10,%f60,%f52
2056!
2057	fsubd	%f28,%f46,%f46
2058	fmuld	%f10,%f62,%f54
2059!
2060	faddd	%f12,%f40,%f40
2061	std	%f40,[%i3+64+64+8]
2062!
2063	faddd	%f14,%f42,%f42
2064	std	%f42,[%i3+64+64+16]
2065!
2066	faddd	%f18,%f44,%f44
2067	std	%f44,[%i3+64+64+24]
2068!
2069	faddd	%f22,%f46,%f46
2070	std	%f46,[%i3+64+64+32]
2071!
2072
2073
2074	fsubd	%f32,%f48,%f48
2075	ldd	[%i3+64+64+64+8],%f16
2076!
2077	fsubd	%f56,%f50,%f50
2078	ldd	[%i3+64+64+64+16],%f20
2079!
2080	fsubd	%f36,%f52,%f52
2081	ldd	[%i3+64+64+64+24],%f24
2082!
2083	fsubd	%f38,%f54,%f54
2084	ldd	[%i3+64+64+64+32],%f28
2085!
2086	faddd	%f26,%f48,%f48
2087	fmuld	%f8,%f16,%f18
2088	std	%f48,[%i3+64+64+40]
2089!
2090	faddd	%f30,%f50,%f50
2091	fmuld	%f8,%f20,%f22
2092	std	%f50,[%i3+64+64+48]
2093!
2094	faddd	%f34,%f52,%f52
2095	fmuld	%f8,%f24,%f26
2096	std	%f52,[%i3+64+64+56]
2097!
2098	faddd	%f58,%f54,%f54
2099	fmuld	%f8,%f28,%f30
2100	std	%f54,[%i3+64+64+64]
2101!
2102
2103
2104	fdtox	%f18,%f18
2105!
2106	fdtox	%f22,%f22
2107!
2108	fdtox	%f26,%f26
2109	ldd	[%i3+64+64+64+40],%f32
2110!
2111	fdtox	%f30,%f30
2112	ldd	[%i3+64+64+64+48],%f56
2113!
2114	fxtod	%f18,%f18
2115	fmuld	%f8,%f32,%f34
2116	ldd	[%i3+64+64+64+56],%f36
2117!
2118	fxtod	%f22,%f22
2119	fmuld	%f8,%f56,%f58
2120	ldd	[%i3+64+64+64+64],%f38
2121!
2122	fxtod	%f26,%f26
2123	fmuld	%f8,%f36,%f12
2124!
2125	fxtod	%f30,%f30
2126	fmuld	%f8,%f38,%f14
2127!
2128	fdtox	%f34,%f34
2129	fmuld	%f10,%f18,%f40
2130!
2131	fdtox	%f58,%f58
2132	fmuld	%f10,%f22,%f42
2133!
2134	fdtox	%f12,%f12
2135	fmuld	%f10,%f26,%f44
2136!
2137	fdtox	%f14,%f14
2138	fmuld	%f10,%f30,%f46
2139!
2140	sethi	%hi(TwoToMinus16),%g5
2141	fxtod	%f34,%f34
2142!
2143	sethi	%hi(TwoTo16),%l5
2144	fxtod	%f58,%f58
2145!
2146	fxtod	%f12,%f12
2147!
2148	fxtod	%f14,%f14
2149!
2150	fsubd	%f16,%f40,%f16
2151	fmuld	%f10,%f34,%f48
2152	ldd	[%g5+%lo(TwoToMinus16)],%f8
2153!
2154	fsubd	%f20,%f42,%f20
2155	fmuld	%f10,%f58,%f50
2156	ldd	[%i1],%f40	! should be %f40
2157!
2158	fsubd	%f24,%f44,%f24
2159	fmuld	%f10,%f12,%f52
2160	ldd	[%i1+8],%f42	! should be %f42
2161!
2162	fsubd	%f28,%f46,%f28
2163	fmuld	%f10,%f14,%f54
2164	ldd	[%i4],%f44	! should be %f44
2165!
2166	faddd	%f60,%f16,%f16
2167	std	%f16,[%i3+64+64+64+8]
2168!
2169	faddd	%f62,%f20,%f20
2170	std	%f20,[%i3+64+64+64+16]
2171!
2172	faddd	%f18,%f24,%f24
2173	std	%f24,[%i3+64+64+64+24]
2174!
2175	faddd	%f22,%f28,%f28
2176	std	%f28,[%i3+64+64+64+32]
2177!
2178	fsubd	%f32,%f48,%f32
2179	ldd	[%i4+8],%f46	 ! should be %f46
2180!
2181	fsubd	%f56,%f50,%f56
2182	ldd	[%i1+104],%f48	! should be %f48
2183!
2184	fsubd	%f36,%f52,%f36
2185	ldd	[%i4+104],%f50	! should be %f50
2186!
2187	fsubd	%f38,%f54,%f38
2188	ldd	[%i1+16],%f52	! should be %f52
2189!
2190	faddd	%f26,%f32,%f32
2191	std	%f32,[%i3+64+64+64+40]
2192!
2193	faddd	%f30,%f56,%f56
2194	std	%f56,[%i3+64+64+64+48]
2195!
2196	faddd	%f34,%f36,%f36
2197	std	%f36,[%i3+64+64+64+56]
2198!
2199	faddd	%f58,%f38,%f38
2200	std	%f38,[%i3+64+64+64+64]
2201!
2202	std	%f12,[%i3+64+64+64+64+8]
2203!
2204	std	%f14,[%i3+64+64+64+64+16]
2205!
2206
2207	ldd	[%l5+%lo(TwoTo16)],%f10
2208	ldd	[%i1+48],%f54
2209	ldd	[%i1+56],%f36
2210	ldd	[%i1+64],%f56
2211	ldd	[%i1+112],%f58
2212
2213	ldd	[%i4+104],%f50
2214	ldd	[%i4+112],%f60
2215
2216
2217	.L99999998:
2218!1
2219	ldd	[%i1+24],%f20
2220	fmuld	%f0,%f44,%f12
2221!2
2222	ldd	[%i4+24],%f22
2223	fmuld	%f42,%f4,%f16
2224!3
2225	ldd	[%i1+40],%f24
2226	fmuld	%f46,%f0,%f18
2227!4
2228	ldd	[%i4+40],%f26
2229	fmuld	%f20,%f4,%f20
2230!5
2231	ldd	[%l0+8],%f38
2232	faddd	%f12,%f6,%f12
2233	fmuld	%f22,%f0,%f22
2234!6
2235	add	%l0,8,%l0
2236	ldd	[%i4+56],%f30
2237	fmuld	%f24,%f4,%f24
2238!7
2239	ldd	[%i1+72],%f32
2240	faddd	%f16,%f18,%f16
2241	fmuld	%f26,%f0,%f26
2242!8
2243	ldd	[%i3+16],%f18
2244	fmuld	%f40,%f38,%f14
2245!9
2246	ldd	[%i4+72],%f34
2247	faddd	%f20,%f22,%f20
2248	fmuld	%f8,%f12,%f12
2249!10
2250	ldd	[%i3+48],%f22
2251	fmuld	%f36,%f4,%f28
2252!11
2253	ldd	[%i3+8],%f6
2254	faddd	%f16,%f18,%f16
2255	fmuld	%f30,%f0,%f30
2256!12
2257	std	%f16,[%i3+16]
2258	faddd	%f24,%f26,%f24
2259	fmuld	%f32,%f4,%f32
2260!13
2261	ldd	[%i3+80],%f26
2262	faddd	%f12,%f14,%f12
2263	fmuld	%f34,%f0,%f34
2264!14
2265	ldd	[%i1+88],%f16
2266	faddd	%f20,%f22,%f20
2267!15
2268	ldd	[%i4+88],%f18
2269	faddd	%f28,%f30,%f28
2270!16
2271	ldd	[%i3+112],%f30
2272	faddd	%f32,%f34,%f32
2273!17
2274	ldd	[%i3+144],%f34
2275	faddd	%f12,%f6,%f6
2276	fmuld	%f16,%f4,%f16
2277!18
2278	std	%f20,[%i3+48]
2279	faddd	%f24,%f26,%f24
2280	fmuld	%f18,%f0,%f18
2281!19
2282	std	%f24,[%i3+80]
2283	faddd	%f28,%f30,%f28
2284	fmuld	%f48,%f4,%f20
2285!20
2286	std	%f28,[%i3+112]
2287	faddd	%f32,%f34,%f32
2288	fmuld	%f50,%f0,%f22
2289!21
2290	ldd	[%i1+120],%f24
2291	fdtox	%f6,%f12
2292!22
2293	std	%f32,[%i3+144]
2294	faddd	%f16,%f18,%f16
2295!23
2296	ldd	[%i4+120],%f26
2297!24
2298	ldd	[%i3+176],%f18
2299	faddd	%f20,%f22,%f20
2300	fmuld	%f24,%f4,%f24
2301!25
2302	ldd	[%i4+16],%f30
2303	fmovs	%f11,%f12
2304!26
2305	ldd	[%i1+32],%f32
2306	fmuld	%f26,%f0,%f26
2307!27
2308	ldd	[%i4+32],%f34
2309	fmuld	%f52,%f4,%f28
2310!28
2311	ldd	[%i3+208],%f22
2312	faddd	%f16,%f18,%f16
2313	fmuld	%f30,%f0,%f30
2314!29
2315	std	%f16,[%i3+176]
2316	fxtod	%f12,%f12
2317	fmuld	%f32,%f4,%f32
2318!30
2319	ldd	[%i4+48],%f18
2320	faddd	%f24,%f26,%f24
2321	fmuld	%f34,%f0,%f34
2322!31
2323	ldd	[%i3+240],%f26
2324	faddd	%f20,%f22,%f20
2325!32
2326	std	%f20,[%i3+208]
2327	faddd	%f28,%f30,%f28
2328	fmuld	%f54,%f4,%f16
2329!33
2330	ldd	[%i3+32],%f30
2331	fmuld	%f12,%f2,%f14
2332!34
2333	ldd	[%i4+64],%f22
2334	faddd	%f32,%f34,%f32
2335	fmuld	%f18,%f0,%f18
2336!35
2337	ldd	[%i3+64],%f34
2338	faddd	%f24,%f26,%f24
2339!36
2340	std	%f24,[%i3+240]
2341	faddd	%f28,%f30,%f28
2342	fmuld	%f56,%f4,%f20
2343!37
2344	std	%f28,[%i3+32]
2345	fmuld	%f14,%f8,%f12
2346!38
2347	ldd	[%i1+80],%f24
2348	faddd	%f32,%f34,%f34	!	yes, tmp52!
2349	fmuld	%f22,%f0,%f22
2350!39
2351	ldd	[%i4+80],%f26
2352	faddd	%f16,%f18,%f16
2353!40
2354	ldd	[%i1+96],%f28
2355	fmuld	%f58,%f4,%f32
2356!41
2357	ldd	[%i4+96],%f30
2358	fdtox	%f12,%f12
2359	fmuld	%f24,%f4,%f24
2360!42
2361	std	%f34,[%i3+64]	! yes, tmp52!
2362	faddd	%f20,%f22,%f20
2363	fmuld	%f26,%f0,%f26
2364!43
2365	ldd	[%i3+96],%f18
2366	fmuld	%f28,%f4,%f28
2367!44
2368	ldd	[%i3+128],%f22
2369	fmovd	%f38,%f4
2370	fmuld	%f30,%f0,%f30
2371!45
2372	fxtod	%f12,%f12
2373	fmuld	%f60,%f0,%f34
2374!46
2375	add	%i3,8,%i3
2376	faddd	%f24,%f26,%f24
2377!47
2378	ldd	[%i3+160-8],%f26
2379	faddd	%f16,%f18,%f16
2380!48
2381	std	%f16,[%i3+96-8]
2382	faddd	%f28,%f30,%f28
2383!49
2384	ldd	[%i3+192-8],%f30
2385	faddd	%f32,%f34,%f32
2386	fmuld	%f12,%f10,%f12
2387!50
2388	ldd	[%i3+224-8],%f34
2389	faddd	%f20,%f22,%f20
2390!51
2391	std	%f20,[%i3+128-8]
2392	faddd	%f24,%f26,%f24
2393!52
2394	add	%l1,1,%l1
2395	std	%f24,[%i3+160-8]
2396	faddd	%f28,%f30,%f28
2397!53
2398	cmp	%l1,31
2399	std	%f28,[%i3+192-8]
2400	fsubd	%f14,%f12,%f0
2401!54
2402	faddd	%f32,%f34,%f32
2403	ble,pt	%icc,.L99999998
2404	std	%f32,[%i3+224-8]
2405!55
2406	std	%f6,[%i3]
2407
2408	add	%o5,%g0,%i3
2409
2410
2411!END HAND CODED PART
2412                       .L900000828:
2413/* 0x03e4	 405 */		ba	.L900000852
2414/* 0x03e8	 409 */		ldx	[%i3+%o0],%l1
2415
2416!  406		      !		}
2417!  407		      !	}
2418!  409		      !	conv_d16_to_i32(result, dt + 2 * nlen, (int64_t *)dt, nlen + 1);
2419!  411		      !/*for(i=0;i<nlen+1;i++) saveresult[i]=result[i];*/
2420!  413		      !	adjust_montf_result(result, nint, nlen);
2421
2422                       .L77000476:
2423/* 0x03ec	 413 */		sll	%g1,2,%l3
2424/* 0x03f0	   0 */		sethi	%hi(TwoTo16),%g5
2425/* 0x03f4	 413 */		add	%l3,2,%l2
2426/* 0x03f8	 328 */		cmp	%l2,0
2427/* 0x03fc	     */		ble,pn	%icc,.L77000482
2428/* 0x0400	   0 */		sethi	%hi(TwoToMinus16),%o2
2429                       .L77000514:
2430/* 0x0404	 329 */		add	%l3,2,%l2
2431/* 0x0408	 328 */		add	%l3,1,%o4
2432/* 0x040c	     */		or	%g0,0,%l3
2433/* 0x0410	 329 */		cmp	%l2,8
2434/* 0x0414	     */		bl,pn	%icc,.L77000477
2435/* 0x0418	 328 */		or	%g0,%i3,%l1
2436                       .L900000831:
2437/* 0x041c	 329 */		prefetch	[%i3],22
2438/* 0x0420	     */		sub	%o4,7,%l4
2439/* 0x0424	     */		or	%g0,0,%l3
2440/* 0x0428	     */		or	%g0,%i3,%l1
2441                       .L900000829:
2442/* 0x042c	 329 */		prefetch	[%l1+528],22
2443/* 0x0430	     */		std	%f0,[%l1]
2444/* 0x0434	     */		add	%l3,8,%l3
2445/* 0x0438	     */		add	%l1,64,%l1
2446/* 0x043c	     */		std	%f0,[%l1-56]
2447/* 0x0440	     */		cmp	%l3,%l4
2448/* 0x0444	     */		std	%f0,[%l1-48]
2449/* 0x0448	     */		std	%f0,[%l1-40]
2450/* 0x044c	     */		prefetch	[%l1+496],22
2451/* 0x0450	     */		std	%f0,[%l1-32]
2452/* 0x0454	     */		std	%f0,[%l1-24]
2453/* 0x0458	     */		std	%f0,[%l1-16]
2454/* 0x045c	     */		ble,pt	%icc,.L900000829
2455/* 0x0460	     */		std	%f0,[%l1-8]
2456                       .L900000832:
2457/* 0x0464	 329 */		cmp	%l3,%o4
2458/* 0x0468	     */		bg,pn	%icc,.L77000482
2459/* 0x046c	     */		nop
2460                       .L77000477:
2461/* 0x0470	 329 */		add	%l3,1,%l3
2462                       .L900000851:
2463/* 0x0474	 329 */		std	%f0,[%l1]
2464/* 0x0478	     */		cmp	%l3,%o4
2465/* 0x047c	     */		add	%l1,8,%l1
2466/* 0x0480	     */		ble,pt	%icc,.L900000851
2467/* 0x0484	     */		add	%l3,1,%l3
2468                       .L77000482:
2469/* 0x0488	 330 */		ldd	[%i1],%f40
2470/* 0x048c	 334 */		cmp	%o3,0
2471/* 0x0490	     */		sub	%g1,1,%l3
2472/* 0x0494	 330 */		ldd	[%l0],%f42
2473/* 0x0498	 331 */		ldd	[%o2+%lo(TwoToMinus16)],%f36
2474/* 0x049c	     */		ldd	[%g5+%lo(TwoTo16)],%f38
2475/* 0x04a0	 330 */		fmuld	%f40,%f42,%f52
2476/* 0x04a4	 331 */		fdtox	%f52,%f8
2477/* 0x04a8	     */		fmovs	%f0,%f8
2478/* 0x04ac	     */		fxtod	%f8,%f62
2479/* 0x04b0	     */		fmuld	%f62,%f14,%f60
2480/* 0x04b4	     */		fmuld	%f60,%f36,%f32
2481/* 0x04b8	     */		fdtox	%f32,%f50
2482/* 0x04bc	     */		fxtod	%f50,%f34
2483/* 0x04c0	     */		fmuld	%f34,%f38,%f46
2484/* 0x04c4	     */		fsubd	%f60,%f46,%f40
2485/* 0x04c8	 334 */		ble,pn	%icc,.L77000378
2486/* 0x04cc	 330 */		std	%f52,[%i3]
2487                       .L77000509:
2488/* 0x04d0	 345 */		add	%o3,1,%g5
2489/* 0x04d4	     */		sll	%g5,1,%o2
2490/* 0x04d8	     */		or	%g0,0,%l1
2491/* 0x04dc	 337 */		ldd	[%i4],%f42
2492/* 0x04e0	 345 */		sub	%o3,1,%o3
2493/* 0x04e4	     */		or	%g0,0,%o5
2494/* 0x04e8	     */		or	%g0,%i3,%l2
2495/* 0x04ec	     */		add	%i4,8,%o1
2496/* 0x04f0	     */		add	%i1,8,%g5
2497                       .L900000848:
2498/* 0x04f4	 337 */		fmuld	%f40,%f42,%f34
2499/* 0x04f8	     */		ldd	[%l0+8],%f32
2500/* 0x04fc	 341 */		cmp	%g1,1
2501/* 0x0500	 337 */		ldd	[%i1],%f50
2502/* 0x0504	     */		ldd	[%l2],%f46
2503/* 0x0508	     */		ldd	[%l2+8],%f44
2504/* 0x050c	     */		fmuld	%f50,%f32,%f60
2505/* 0x0510	 335 */		ldd	[%l0],%f42
2506/* 0x0514	 337 */		faddd	%f46,%f34,%f48
2507/* 0x0518	     */		faddd	%f44,%f60,%f58
2508/* 0x051c	     */		fmuld	%f36,%f48,%f54
2509/* 0x0520	     */		faddd	%f58,%f54,%f34
2510/* 0x0524	 341 */		ble,pn	%icc,.L77000368
2511/* 0x0528	 338 */		std	%f34,[%l2+8]
2512                       .L77000507:
2513/* 0x052c	 341 */		or	%g0,1,%l5
2514/* 0x0530	     */		or	%g0,2,%l4
2515/* 0x0534	     */		or	%g0,%g5,%g4
2516/* 0x0538	 342 */		cmp	%l3,12
2517/* 0x053c	     */		bl,pn	%icc,.L77000481
2518/* 0x0540	 341 */		or	%g0,%o1,%g3
2519                       .L900000839:
2520/* 0x0544	 342 */		prefetch	[%i1+8],0
2521/* 0x0548	     */		prefetch	[%i1+72],0
2522/* 0x054c	     */		add	%i4,40,%l6
2523/* 0x0550	     */		add	%i1,40,%l7
2524/* 0x0554	     */		prefetch	[%l2+16],0
2525/* 0x0558	     */		or	%g0,%l2,%o7
2526/* 0x055c	     */		sub	%l3,7,%i5
2527/* 0x0560	     */		prefetch	[%l2+80],0
2528/* 0x0564	     */		add	%l2,80,%g2
2529/* 0x0568	     */		or	%g0,2,%l4
2530/* 0x056c	     */		prefetch	[%i1+136],0
2531/* 0x0570	     */		or	%g0,5,%l5
2532/* 0x0574	     */		prefetch	[%i1+200],0
2533/* 0x0578	     */		prefetch	[%l2+144],0
2534/* 0x057c	     */		ldd	[%i4+8],%f52
2535/* 0x0580	     */		ldd	[%i4+16],%f44
2536/* 0x0584	     */		ldd	[%i4+24],%f56
2537/* 0x0588	     */		fmuld	%f40,%f52,%f48
2538/* 0x058c	     */		fmuld	%f40,%f44,%f46
2539/* 0x0590	     */		fmuld	%f40,%f56,%f44
2540/* 0x0594	     */		ldd	[%l2+48],%f56
2541/* 0x0598	     */		prefetch	[%l2+208],0
2542/* 0x059c	     */		prefetch	[%l2+272],0
2543/* 0x05a0	     */		prefetch	[%l2+336],0
2544/* 0x05a4	     */		prefetch	[%l2+400],0
2545/* 0x05a8	     */		ldd	[%i1+8],%f32
2546/* 0x05ac	     */		ldd	[%i1+16],%f60
2547/* 0x05b0	     */		ldd	[%i1+24],%f50
2548/* 0x05b4	     */		fmuld	%f42,%f32,%f62
2549/* 0x05b8	     */		ldd	[%i1+32],%f32
2550/* 0x05bc	     */		fmuld	%f42,%f60,%f58
2551/* 0x05c0	     */		ldd	[%l2+16],%f52
2552/* 0x05c4	     */		ldd	[%l2+32],%f54
2553/* 0x05c8	     */		faddd	%f62,%f48,%f60
2554/* 0x05cc	     */		fmuld	%f42,%f50,%f48
2555/* 0x05d0	     */		faddd	%f58,%f46,%f62
2556/* 0x05d4	     */		ldd	[%i4+32],%f46
2557/* 0x05d8	     */		ldd	[%l2+64],%f58
2558                       .L900000837:
2559/* 0x05dc	 342 */		prefetch	[%l7+192],0
2560/* 0x05e0	     */		fmuld	%f40,%f46,%f46
2561/* 0x05e4	     */		faddd	%f60,%f52,%f60
2562/* 0x05e8	     */		ldd	[%l6],%f52
2563/* 0x05ec	     */		std	%f60,[%g2-64]
2564/* 0x05f0	     */		fmuld	%f42,%f32,%f50
2565/* 0x05f4	     */		add	%l5,8,%l5
2566/* 0x05f8	     */		ldd	[%l7],%f60
2567/* 0x05fc	     */		faddd	%f48,%f44,%f48
2568/* 0x0600	     */		cmp	%l5,%i5
2569/* 0x0604	     */		ldd	[%g2],%f32
2570/* 0x0608	     */		add	%g2,128,%g2
2571/* 0x060c	     */		prefetch	[%g2+256],0
2572/* 0x0610	     */		fmuld	%f40,%f52,%f52
2573/* 0x0614	     */		faddd	%f62,%f54,%f44
2574/* 0x0618	     */		ldd	[%l6+8],%f54
2575/* 0x061c	     */		std	%f44,[%g2-176]
2576/* 0x0620	     */		fmuld	%f42,%f60,%f44
2577/* 0x0624	     */		add	%l6,64,%l6
2578/* 0x0628	     */		ldd	[%l7+8],%f60
2579/* 0x062c	     */		faddd	%f50,%f46,%f50
2580/* 0x0630	     */		add	%l7,64,%l7
2581/* 0x0634	     */		add	%l4,16,%l4
2582/* 0x0638	     */		ldd	[%g2-112],%f46
2583/* 0x063c	     */		fmuld	%f40,%f54,%f54
2584/* 0x0640	     */		faddd	%f48,%f56,%f62
2585/* 0x0644	     */		ldd	[%l6-48],%f56
2586/* 0x0648	     */		std	%f62,[%g2-160]
2587/* 0x064c	     */		fmuld	%f42,%f60,%f48
2588/* 0x0650	     */		ldd	[%l7-48],%f60
2589/* 0x0654	     */		faddd	%f44,%f52,%f52
2590/* 0x0658	     */		ldd	[%g2-96],%f30
2591/* 0x065c	     */		prefetch	[%g2+288],0
2592/* 0x0660	     */		fmuld	%f40,%f56,%f56
2593/* 0x0664	     */		faddd	%f50,%f58,%f62
2594/* 0x0668	     */		ldd	[%l6-40],%f58
2595/* 0x066c	     */		std	%f62,[%g2-144]
2596/* 0x0670	     */		fmuld	%f42,%f60,%f50
2597/* 0x0674	     */		ldd	[%l7-40],%f62
2598/* 0x0678	     */		faddd	%f48,%f54,%f54
2599/* 0x067c	     */		ldd	[%g2-80],%f28
2600/* 0x0680	     */		prefetch	[%l7+160],0
2601/* 0x0684	     */		fmuld	%f40,%f58,%f48
2602/* 0x0688	     */		faddd	%f52,%f32,%f44
2603/* 0x068c	     */		ldd	[%l6-32],%f58
2604/* 0x0690	     */		std	%f44,[%g2-128]
2605/* 0x0694	     */		fmuld	%f42,%f62,%f44
2606/* 0x0698	     */		ldd	[%l7-32],%f60
2607/* 0x069c	     */		faddd	%f50,%f56,%f56
2608/* 0x06a0	     */		ldd	[%g2-64],%f52
2609/* 0x06a4	     */		prefetch	[%g2+320],0
2610/* 0x06a8	     */		fmuld	%f40,%f58,%f50
2611/* 0x06ac	     */		faddd	%f54,%f46,%f32
2612/* 0x06b0	     */		ldd	[%l6-24],%f62
2613/* 0x06b4	     */		std	%f32,[%g2-112]
2614/* 0x06b8	     */		fmuld	%f42,%f60,%f46
2615/* 0x06bc	     */		ldd	[%l7-24],%f60
2616/* 0x06c0	     */		faddd	%f44,%f48,%f48
2617/* 0x06c4	     */		ldd	[%g2-48],%f54
2618/* 0x06c8	     */		fmuld	%f40,%f62,%f26
2619/* 0x06cc	     */		faddd	%f56,%f30,%f32
2620/* 0x06d0	     */		ldd	[%l6-16],%f58
2621/* 0x06d4	     */		std	%f32,[%g2-96]
2622/* 0x06d8	     */		fmuld	%f42,%f60,%f30
2623/* 0x06dc	     */		ldd	[%l7-16],%f32
2624/* 0x06e0	     */		faddd	%f46,%f50,%f60
2625/* 0x06e4	     */		ldd	[%g2-32],%f56
2626/* 0x06e8	     */		prefetch	[%g2+352],0
2627/* 0x06ec	     */		fmuld	%f40,%f58,%f44
2628/* 0x06f0	     */		faddd	%f48,%f28,%f62
2629/* 0x06f4	     */		ldd	[%l6-8],%f46
2630/* 0x06f8	     */		std	%f62,[%g2-80]
2631/* 0x06fc	     */		fmuld	%f42,%f32,%f48
2632/* 0x0700	     */		ldd	[%l7-8],%f32
2633/* 0x0704	     */		faddd	%f30,%f26,%f62
2634/* 0x0708	     */		ble,pt	%icc,.L900000837
2635/* 0x070c	     */		ldd	[%g2-16],%f58
2636                       .L900000840:
2637/* 0x0710	 342 */		fmuld	%f40,%f46,%f46
2638/* 0x0714	     */		faddd	%f62,%f54,%f62
2639/* 0x0718	     */		std	%f62,[%g2-48]
2640/* 0x071c	     */		cmp	%l5,%l3
2641/* 0x0720	     */		fmuld	%f42,%f32,%f50
2642/* 0x0724	     */		faddd	%f48,%f44,%f48
2643/* 0x0728	     */		or	%g0,%l7,%g4
2644/* 0x072c	     */		or	%g0,%l6,%g3
2645/* 0x0730	     */		faddd	%f60,%f52,%f60
2646/* 0x0734	     */		std	%f60,[%g2-64]
2647/* 0x0738	     */		or	%g0,%o7,%l2
2648/* 0x073c	     */		add	%l4,8,%l4
2649/* 0x0740	     */		faddd	%f50,%f46,%f54
2650/* 0x0744	     */		faddd	%f48,%f56,%f56
2651/* 0x0748	     */		std	%f56,[%g2-32]
2652/* 0x074c	     */		faddd	%f54,%f58,%f58
2653/* 0x0750	     */		bg,pn	%icc,.L77000368
2654/* 0x0754	     */		std	%f58,[%g2-16]
2655                       .L77000481:
2656/* 0x0758	 342 */		ldd	[%g4],%f44
2657                       .L900000850:
2658/* 0x075c	 342 */		ldd	[%g3],%f48
2659/* 0x0760	     */		fmuld	%f42,%f44,%f58
2660/* 0x0764	     */		sra	%l4,0,%l7
2661/* 0x0768	     */		add	%l5,1,%l5
2662/* 0x076c	     */		sllx	%l7,3,%g2
2663/* 0x0770	     */		add	%g4,8,%g4
2664/* 0x0774	     */		ldd	[%l2+%g2],%f56
2665/* 0x0778	     */		cmp	%l5,%l3
2666/* 0x077c	     */		add	%l4,2,%l4
2667/* 0x0780	     */		fmuld	%f40,%f48,%f54
2668/* 0x0784	     */		add	%g3,8,%g3
2669/* 0x0788	     */		faddd	%f58,%f54,%f52
2670/* 0x078c	     */		faddd	%f52,%f56,%f62
2671/* 0x0790	     */		std	%f62,[%l2+%g2]
2672/* 0x0794	     */		ble,a,pt	%icc,.L900000850
2673/* 0x0798	     */		ldd	[%g4],%f44
2674                       .L77000368:
2675/* 0x079c	 344 */		cmp	%o5,15
2676/* 0x07a0	     */		bne,pn	%icc,.L77000483
2677/* 0x07a4	 345 */		srl	%l1,31,%g4
2678                       .L77000478:
2679/* 0x07a8	 345 */		add	%l1,%g4,%l4
2680/* 0x07ac	     */		sra	%l4,1,%o7
2681/* 0x07b0	     */		add	%o7,1,%o4
2682/* 0x07b4	     */		sll	%o4,1,%l6
2683/* 0x07b8	     */		cmp	%l6,%o2
2684/* 0x07bc	     */		bge,pn	%icc,.L77000392
2685/* 0x07c0	     */		fmovd	%f0,%f42
2686                       .L77000508:
2687/* 0x07c4	 345 */		sra	%l6,0,%l4
2688/* 0x07c8	     */		sllx	%l4,3,%g2
2689/* 0x07cc	     */		fmovd	%f0,%f32
2690/* 0x07d0	     */		sub	%o2,1,%l5
2691/* 0x07d4	     */		ldd	[%g2+%i3],%f40
2692/* 0x07d8	     */		add	%g2,%i3,%g3
2693                       .L900000849:
2694/* 0x07dc	 345 */		fdtox	%f40,%f10
2695/* 0x07e0	     */		ldd	[%g3+8],%f52
2696/* 0x07e4	     */		add	%l6,2,%l6
2697/* 0x07e8	     */		cmp	%l6,%l5
2698/* 0x07ec	     */		fdtox	%f52,%f2
2699/* 0x07f0	     */		fmovd	%f10,%f30
2700/* 0x07f4	     */		fmovs	%f0,%f10
2701/* 0x07f8	     */		fmovs	%f0,%f2
2702/* 0x07fc	     */		fxtod	%f10,%f10
2703/* 0x0800	     */		fxtod	%f2,%f2
2704/* 0x0804	     */		fdtox	%f52,%f28
2705/* 0x0808	     */		faddd	%f10,%f32,%f56
2706/* 0x080c	     */		std	%f56,[%g3]
2707/* 0x0810	     */		faddd	%f2,%f42,%f62
2708/* 0x0814	     */		std	%f62,[%g3+8]
2709/* 0x0818	     */		fitod	%f30,%f32
2710/* 0x081c	     */		add	%g3,16,%g3
2711/* 0x0820	     */		fitod	%f28,%f42
2712/* 0x0824	     */		ble,a,pt	%icc,.L900000849
2713/* 0x0828	     */		ldd	[%g3],%f40
2714                       .L77000392:
2715/* 0x082c	 346 */		or	%g0,0,%o5
2716                       .L77000483:
2717/* 0x0830	 350 */		fdtox	%f34,%f6
2718/* 0x0834	     */		add	%l1,1,%l1
2719/* 0x0838	     */		cmp	%l1,%o3
2720/* 0x083c	     */		add	%o5,1,%o5
2721/* 0x0840	     */		add	%l2,8,%l2
2722/* 0x0844	     */		add	%l0,8,%l0
2723/* 0x0848	     */		fmovs	%f0,%f6
2724/* 0x084c	     */		fxtod	%f6,%f46
2725/* 0x0850	     */		fmuld	%f46,%f14,%f56
2726/* 0x0854	     */		fmuld	%f56,%f36,%f44
2727/* 0x0858	     */		fdtox	%f44,%f48
2728/* 0x085c	     */		fxtod	%f48,%f58
2729/* 0x0860	     */		fmuld	%f58,%f38,%f54
2730/* 0x0864	     */		fsubd	%f56,%f54,%f40
2731/* 0x0868	     */		ble,a,pt	%icc,.L900000848
2732/* 0x086c	 337 */		ldd	[%i4],%f42
2733                       .L77000378:
2734/* 0x0870	 409 */		ldx	[%i3+%o0],%l1
2735                       .L900000852:
2736/* 0x0874	 409 */		add	%i3,%o0,%l4
2737/* 0x0878	     */		ldx	[%l4+8],%i1
2738/* 0x087c	     */		cmp	%l1,0
2739/* 0x0880	     */		bne,pn	%xcc,.L77000403
2740/* 0x0884	     */		or	%g0,0,%g5
2741                       .L77000402:
2742/* 0x0888	 409 */		or	%g0,0,%i3
2743/* 0x088c	     */		ba	.L900000847
2744/* 0x0890	     */		cmp	%i1,0
2745                       .L77000403:
2746/* 0x0894	 409 */		srlx	%l1,52,%o5
2747/* 0x0898	     */		sethi	%hi(0xfff00000),%i3
2748/* 0x089c	     */		sllx	%i3,32,%o2
2749/* 0x08a0	     */		sethi	%hi(0x40000000),%o0
2750/* 0x08a4	     */		sllx	%o0,22,%o4
2751/* 0x08a8	     */		or	%g0,1023,%l0
2752/* 0x08ac	     */		xor	%o2,-1,%o3
2753/* 0x08b0	     */		sub	%l0,%o5,%o7
2754/* 0x08b4	     */		and	%l1,%o3,%l1
2755/* 0x08b8	     */		add	%o7,52,%i4
2756/* 0x08bc	     */		or	%l1,%o4,%o1
2757/* 0x08c0	     */		cmp	%i1,0
2758/* 0x08c4	     */		srlx	%o1,%i4,%i3
2759                       .L900000847:
2760/* 0x08c8	 409 */		bne,pn	%xcc,.L77000409
2761/* 0x08cc	     */		or	%g0,0,%o7
2762                       .L77000408:
2763/* 0x08d0	 409 */		ba	.L900000846
2764/* 0x08d4	 350 */		cmp	%g1,0
2765                       .L77000409:
2766/* 0x08d8	 409 */		srlx	%i1,52,%l2
2767/* 0x08dc	     */		sethi	%hi(0xfff00000),%o7
2768/* 0x08e0	     */		sllx	%o7,32,%i4
2769/* 0x08e4	     */		sethi	%hi(0x40000000),%i5
2770/* 0x08e8	     */		sllx	%i5,22,%l6
2771/* 0x08ec	     */		or	%g0,1023,%l5
2772/* 0x08f0	     */		xor	%i4,-1,%o1
2773/* 0x08f4	     */		sub	%l5,%l2,%g2
2774/* 0x08f8	     */		and	%i1,%o1,%l7
2775/* 0x08fc	     */		add	%g2,52,%g3
2776/* 0x0900	     */		or	%l7,%l6,%g4
2777/* 0x0904	 350 */		cmp	%g1,0
2778/* 0x0908	 409 */		srlx	%g4,%g3,%o7
2779                       .L900000846:
2780/* 0x090c	 350 */		ble,pn	%icc,.L77000397
2781/* 0x0910	     */		or	%g0,0,%l5
2782                       .L77000510:
2783/* 0x0914	 409 */		sethi	%hi(0xfff00000),%g4
2784/* 0x0918	     */		sllx	%g4,32,%o0
2785/* 0x091c	   0 */		or	%g0,-1,%i5
2786/* 0x0920	 409 */		srl	%i5,0,%l7
2787/* 0x0924	     */		sethi	%hi(0x40000000),%i1
2788/* 0x0928	     */		sllx	%i1,22,%l6
2789/* 0x092c	     */		sethi	%hi(0xfc00),%i4
2790/* 0x0930	     */		xor	%o0,-1,%g2
2791/* 0x0934	     */		add	%i4,1023,%l2
2792/* 0x0938	     */		or	%g0,2,%g4
2793/* 0x093c	     */		or	%g0,%i2,%g3
2794                       .L77000395:
2795/* 0x0940	 409 */		sra	%g4,0,%o2
2796/* 0x0944	     */		add	%g4,1,%o3
2797/* 0x0948	     */		sllx	%o2,3,%o0
2798/* 0x094c	     */		sra	%o3,0,%o5
2799/* 0x0950	     */		ldx	[%l4+%o0],%o4
2800/* 0x0954	     */		sllx	%o5,3,%l0
2801/* 0x0958	     */		and	%i3,%l7,%o1
2802/* 0x095c	     */		ldx	[%l4+%l0],%i4
2803/* 0x0960	     */		cmp	%o4,0
2804/* 0x0964	     */		bne,pn	%xcc,.L77000415
2805/* 0x0968	 350 */		and	%o7,%l2,%i5
2806                       .L77000414:
2807/* 0x096c	 409 */		or	%g0,0,%l1
2808/* 0x0970	     */		ba	.L900000845
2809/* 0x0974	     */		add	%g5,%o1,%i1
2810                       .L77000415:
2811/* 0x0978	 409 */		srlx	%o4,52,%o3
2812/* 0x097c	     */		and	%o4,%g2,%l1
2813/* 0x0980	     */		or	%g0,52,%o0
2814/* 0x0984	     */		sub	%o3,1023,%l0
2815/* 0x0988	     */		or	%l1,%l6,%o4
2816/* 0x098c	     */		sub	%o0,%l0,%o5
2817/* 0x0990	     */		srlx	%o4,%o5,%l1
2818/* 0x0994	     */		add	%g5,%o1,%i1
2819                       .L900000845:
2820/* 0x0998	 409 */		srax	%i3,32,%g5
2821/* 0x099c	     */		cmp	%i4,0
2822/* 0x09a0	     */		bne,pn	%xcc,.L77000421
2823/* 0x09a4	 350 */		sllx	%i5,16,%o2
2824                       .L77000420:
2825/* 0x09a8	 409 */		or	%g0,0,%o4
2826/* 0x09ac	     */		ba	.L900000844
2827/* 0x09b0	 350 */		add	%i1,%o2,%o5
2828                       .L77000421:
2829/* 0x09b4	 409 */		srlx	%i4,52,%o4
2830/* 0x09b8	     */		or	%g0,52,%o0
2831/* 0x09bc	     */		sub	%o4,1023,%o3
2832/* 0x09c0	     */		and	%i4,%g2,%i3
2833/* 0x09c4	     */		or	%i3,%l6,%o5
2834/* 0x09c8	     */		sub	%o0,%o3,%l0
2835/* 0x09cc	     */		srlx	%o5,%l0,%o4
2836/* 0x09d0	 350 */		add	%i1,%o2,%o5
2837                       .L900000844:
2838/* 0x09d4	 350 */		srax	%o7,16,%i4
2839/* 0x09d8	     */		srax	%o5,32,%i5
2840/* 0x09dc	     */		add	%i4,%i5,%o1
2841/* 0x09e0	     */		add	%l5,1,%l5
2842/* 0x09e4	     */		and	%o5,%l7,%i1
2843/* 0x09e8	     */		add	%g5,%o1,%g5
2844/* 0x09ec	     */		st	%i1,[%g3]
2845/* 0x09f0	     */		or	%g0,%l1,%i3
2846/* 0x09f4	     */		or	%g0,%o4,%o7
2847/* 0x09f8	     */		add	%g4,2,%g4
2848/* 0x09fc	     */		cmp	%l5,%l3
2849/* 0x0a00	     */		ble,pt	%icc,.L77000395
2850/* 0x0a04	     */		add	%g3,4,%g3
2851                       .L77000397:
2852/* 0x0a08	 409 */		sethi	%hi(0xfc00),%l4
2853/* 0x0a0c	     */		sra	%l5,0,%i5
2854/* 0x0a10	     */		add	%l4,1023,%i1
2855/* 0x0a14	     */		add	%g5,%i3,%l5
2856/* 0x0a18	     */		and	%o7,%i1,%g5
2857/* 0x0a1c	     */		sllx	%g5,16,%l2
2858/* 0x0a20	     */		sllx	%i5,2,%l7
2859/* 0x0a24	 413 */		sra	%g1,0,%g2
2860/* 0x0a28	 409 */		add	%l5,%l2,%l6
2861/* 0x0a2c	     */		st	%l6,[%i2+%l7]
2862/* 0x0a30	 413 */		sllx	%g2,2,%g3
2863/* 0x0a34	     */		ld	[%i2+%g3],%g4
2864/* 0x0a38	     */		cmp	%g4,0
2865/* 0x0a3c	     */		bgu,pn	%icc,.L77000486
2866/* 0x0a40	     */		cmp	%l3,0
2867                       .L77000427:
2868/* 0x0a44	 413 */		bl,pn	%icc,.L77000486
2869/* 0x0a48	     */		or	%g0,%l3,%i5
2870                       .L77000512:
2871/* 0x0a4c	 413 */		sra	%l3,0,%o5
2872/* 0x0a50	     */		sllx	%o5,2,%l7
2873/* 0x0a54	     */		ld	[%l7+%i0],%o5
2874/* 0x0a58	     */		add	%l7,%i2,%o1
2875/* 0x0a5c	     */		add	%l7,%i0,%i4
2876                       .L900000843:
2877/* 0x0a60	 413 */		ld	[%o1],%i1
2878/* 0x0a64	     */		cmp	%i1,%o5
2879/* 0x0a68	     */		bne,pn	%icc,.L77000435
2880/* 0x0a6c	     */		sub	%o1,4,%o1
2881                       .L77000431:
2882/* 0x0a70	 413 */		sub	%i4,4,%i4
2883/* 0x0a74	     */		subcc	%i5,1,%i5
2884/* 0x0a78	     */		bpos,a,pt	%icc,.L900000843
2885/* 0x0a7c	     */		ld	[%i4],%o5
2886                       .L900000827:
2887/* 0x0a80	 413 */		ba	.L900000842
2888/* 0x0a84	 350 */		cmp	%g1,0
2889                       .L77000435:
2890/* 0x0a88	 413 */		sra	%i5,0,%o0
2891/* 0x0a8c	     */		sllx	%o0,2,%l1
2892/* 0x0a90	     */		ld	[%i0+%l1],%i3
2893/* 0x0a94	     */		ld	[%i2+%l1],%l0
2894/* 0x0a98	     */		cmp	%l0,%i3
2895/* 0x0a9c	     */		bleu,pt	%icc,.L77000379
2896/* 0x0aa0	     */		nop
2897                       .L77000486:
2898/* 0x0aa4	 350 */		cmp	%g1,0
2899                       .L900000842:
2900/* 0x0aa8	 350 */		ble,pn	%icc,.L77000379
2901/* 0x0aac	     */		add	%l3,1,%g3
2902                       .L77000511:
2903/* 0x0ab0	 350 */		or	%g0,0,%l5
2904/* 0x0ab4	     */		cmp	%g3,10
2905/* 0x0ab8	     */		bl,pn	%icc,.L77000487
2906/* 0x0abc	     */		or	%g0,0,%g1
2907                       .L900000835:
2908/* 0x0ac0	 350 */		prefetch	[%i2],22
2909/* 0x0ac4	     */		add	%i0,4,%l2
2910/* 0x0ac8	     */		prefetch	[%i2+64],22
2911/* 0x0acc	     */		add	%i2,8,%o5
2912/* 0x0ad0	     */		sub	%l3,7,%i0
2913/* 0x0ad4	     */		prefetch	[%i2+128],22
2914/* 0x0ad8	     */		or	%g0,2,%l5
2915/* 0x0adc	     */		prefetch	[%i2+192],22
2916/* 0x0ae0	     */		prefetch	[%i2+256],22
2917/* 0x0ae4	     */		prefetch	[%i2+320],22
2918/* 0x0ae8	     */		prefetch	[%i2+384],22
2919/* 0x0aec	     */		ld	[%l2-4],%l7
2920/* 0x0af0	     */		ld	[%o5-4],%l6
2921/* 0x0af4	     */		prefetch	[%o5+440],22
2922/* 0x0af8	     */		prefetch	[%o5+504],22
2923/* 0x0afc	     */		ld	[%i2],%i2
2924/* 0x0b00	     */		sub	%i2,%l7,%g3
2925/* 0x0b04	     */		st	%g3,[%o5-8]
2926/* 0x0b08	     */		srax	%g3,32,%l7
2927                       .L900000833:
2928/* 0x0b0c	 350 */		add	%l5,8,%l5
2929/* 0x0b10	     */		add	%o5,32,%o5
2930/* 0x0b14	     */		ld	[%l2],%i5
2931/* 0x0b18	     */		prefetch	[%o5+496],22
2932/* 0x0b1c	     */		cmp	%l5,%i0
2933/* 0x0b20	     */		add	%l2,32,%l2
2934/* 0x0b24	     */		sub	%l6,%i5,%g5
2935/* 0x0b28	     */		add	%g5,%l7,%o0
2936/* 0x0b2c	     */		ld	[%o5-32],%l4
2937/* 0x0b30	     */		st	%o0,[%o5-36]
2938/* 0x0b34	     */		srax	%o0,32,%i3
2939/* 0x0b38	     */		ld	[%l2-28],%i1
2940/* 0x0b3c	     */		sub	%l4,%i1,%i4
2941/* 0x0b40	     */		add	%i4,%i3,%o1
2942/* 0x0b44	     */		ld	[%o5-28],%o3
2943/* 0x0b48	     */		st	%o1,[%o5-32]
2944/* 0x0b4c	     */		srax	%o1,32,%l1
2945/* 0x0b50	     */		ld	[%l2-24],%o2
2946/* 0x0b54	     */		sub	%o3,%o2,%g2
2947/* 0x0b58	     */		add	%g2,%l1,%o7
2948/* 0x0b5c	     */		ld	[%o5-24],%l0
2949/* 0x0b60	     */		st	%o7,[%o5-28]
2950/* 0x0b64	     */		srax	%o7,32,%l6
2951/* 0x0b68	     */		ld	[%l2-20],%o4
2952/* 0x0b6c	     */		sub	%l0,%o4,%g1
2953/* 0x0b70	     */		add	%g1,%l6,%l7
2954/* 0x0b74	     */		ld	[%o5-20],%i2
2955/* 0x0b78	     */		st	%l7,[%o5-24]
2956/* 0x0b7c	     */		srax	%l7,32,%g4
2957/* 0x0b80	     */		ld	[%l2-16],%g3
2958/* 0x0b84	     */		sub	%i2,%g3,%i5
2959/* 0x0b88	     */		add	%i5,%g4,%g5
2960/* 0x0b8c	     */		ld	[%o5-16],%i1
2961/* 0x0b90	     */		st	%g5,[%o5-20]
2962/* 0x0b94	     */		srax	%g5,32,%l4
2963/* 0x0b98	     */		ld	[%l2-12],%o0
2964/* 0x0b9c	     */		sub	%i1,%o0,%i3
2965/* 0x0ba0	     */		add	%i3,%l4,%i4
2966/* 0x0ba4	     */		ld	[%o5-12],%o2
2967/* 0x0ba8	     */		st	%i4,[%o5-16]
2968/* 0x0bac	     */		srax	%i4,32,%o3
2969/* 0x0bb0	     */		ld	[%l2-8],%o1
2970/* 0x0bb4	     */		sub	%o2,%o1,%l1
2971/* 0x0bb8	     */		add	%l1,%o3,%g2
2972/* 0x0bbc	     */		ld	[%o5-8],%o4
2973/* 0x0bc0	     */		st	%g2,[%o5-12]
2974/* 0x0bc4	     */		srax	%g2,32,%l0
2975/* 0x0bc8	     */		ld	[%l2-4],%o7
2976/* 0x0bcc	     */		sub	%o4,%o7,%l6
2977/* 0x0bd0	     */		add	%l6,%l0,%g1
2978/* 0x0bd4	     */		ld	[%o5-4],%l6
2979/* 0x0bd8	     */		st	%g1,[%o5-8]
2980/* 0x0bdc	     */		ble,pt	%icc,.L900000833
2981/* 0x0be0	     */		srax	%g1,32,%l7
2982                       .L900000836:
2983/* 0x0be4	 350 */		ld	[%l2],%l0
2984/* 0x0be8	     */		add	%l2,4,%i0
2985/* 0x0bec	     */		or	%g0,%o5,%i2
2986/* 0x0bf0	     */		cmp	%l5,%l3
2987/* 0x0bf4	     */		sub	%l6,%l0,%l6
2988/* 0x0bf8	     */		add	%l6,%l7,%g1
2989/* 0x0bfc	     */		st	%g1,[%o5-4]
2990/* 0x0c00	     */		bg,pn	%icc,.L77000379
2991/* 0x0c04	     */		srax	%g1,32,%g1
2992                       .L77000487:
2993/* 0x0c08	 350 */		ld	[%i2],%o4
2994                       .L900000841:
2995/* 0x0c0c	 350 */		ld	[%i0],%i3
2996/* 0x0c10	     */		add	%g1,%o4,%l0
2997/* 0x0c14	     */		add	%l5,1,%l5
2998/* 0x0c18	     */		cmp	%l5,%l3
2999/* 0x0c1c	     */		add	%i0,4,%i0
3000/* 0x0c20	     */		sub	%l0,%i3,%l6
3001/* 0x0c24	     */		st	%l6,[%i2]
3002/* 0x0c28	     */		srax	%l6,32,%g1
3003/* 0x0c2c	     */		add	%i2,4,%i2
3004/* 0x0c30	     */		ble,a,pt	%icc,.L900000841
3005/* 0x0c34	     */		ld	[%i2],%o4
3006                       .L77000379:
3007/* 0x0c38	 405 */		ret	! Result =
3008/* 0x0c3c	     */		restore	%g0,%g0,%g0
3009/* 0x0c40	   0 */		.type	mont_mulf_noconv,2
3010/* 0x0c40	   0 */		.size	mont_mulf_noconv,(.-mont_mulf_noconv)
3011
3012! Begin Disassembling Debug Info
3013	.xstabs ".stab.index","V=10.0;DBG_GEN=4.14.14;cd;backend;Xa;O;R=Sun C 5.5 Patch 112760-07 2004/02/03",60,0,0,0
3014	.xstabs ".stab.index","/workspace/ferenc/algorithms/bignum/unified/mont_mulf; /ws/onnv-tools/SUNWspro/SOS8/prod/bin/cc -D_KERNEL -DRF_INLINE_MACROS -fast -xarch=v9 -xO5 -xstrconst -xdepend -Xa -xchip=ultra3 -xcode=abs32 -Wc,-Qrm-Qd -Wc,-Qrm-Qf -Wc,-assembly -V -W0,-xp -c conv_v9.il -o mont_mulf.o  mont_mulf.c",52,0,0,0
3015
3016! End Disassembling Debug Info
3017
3018! Begin Disassembling Ident
3019	.ident	"cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27"	! (NO SOURCE LINE)
3020	.ident	"@(#)mont_mulf.c\t1.2\t01/09/24 SMI"	! (/tmp/acompAAApja4Fx:8)
3021	.ident	"@(#)types.h\t1.74\t03/08/07 SMI"	! (/tmp/acompAAApja4Fx:9)
3022	.ident	"@(#)isa_defs.h\t1.20\t99/05/04 SMI"	! (/tmp/acompAAApja4Fx:10)
3023	.ident	"@(#)feature_tests.h\t1.18\t99/07/26 SMI"	! (/tmp/acompAAApja4Fx:11)
3024	.ident	"@(#)machtypes.h\t1.13\t99/05/04 SMI"	! (/tmp/acompAAApja4Fx:12)
3025	.ident	"@(#)inttypes.h\t1.2\t98/01/16 SMI"	! (/tmp/acompAAApja4Fx:13)
3026	.ident	"@(#)int_types.h\t1.6\t97/08/20 SMI"	! (/tmp/acompAAApja4Fx:14)
3027	.ident	"@(#)int_limits.h\t1.6\t99/08/06 SMI"	! (/tmp/acompAAApja4Fx:15)
3028	.ident	"@(#)int_const.h\t1.2\t96/07/08 SMI"	! (/tmp/acompAAApja4Fx:16)
3029	.ident	"@(#)int_fmtio.h\t1.2\t96/07/08 SMI"	! (/tmp/acompAAApja4Fx:17)
3030	.ident	"@(#)types32.h\t1.4\t98/02/13 SMI"	! (/tmp/acompAAApja4Fx:18)
3031	.ident	"@(#)select.h\t1.17\t01/08/15 SMI"	! (/tmp/acompAAApja4Fx:19)
3032	.ident	"@(#)math.h\t2.11\t00/09/07 SMI"	! (/tmp/acompAAApja4Fx:20)
3033	.ident	"@(#)math_iso.h\t1.2\t00/09/07 SMI"	! (/tmp/acompAAApja4Fx:21)
3034	.ident	"@(#)floatingpoint.h\t2.5\t99/06/22 SMI"	! (/tmp/acompAAApja4Fx:22)
3035	.ident	"@(#)stdio_tag.h\t1.3\t98/04/20 SMI"	! (/tmp/acompAAApja4Fx:23)
3036	.ident	"@(#)ieeefp.h\t2.8 99/10/29"	! (/tmp/acompAAApja4Fx:24)
3037	.ident	"acomp: Sun C 5.5 Patch 112760-07 2004/02/03"	! (/tmp/acompAAApja4Fx:57)
3038	.ident	"iropt: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27"	! (/tmp/acompAAApja4Fx:58)
3039	.ident	"cg: Sun Compiler Common 7.1 Patch 112763-10 2004/01/27"	! (NO SOURCE LINE)
3040! End Disassembling Ident
3041
3042#define	FZERO				\
3043	fzero	%f0			;\
3044	fzero	%f2			;\
3045	faddd	%f0, %f2, %f4		;\
3046	fmuld	%f0, %f2, %f6		;\
3047	faddd	%f0, %f2, %f8		;\
3048	fmuld	%f0, %f2, %f10		;\
3049	faddd	%f0, %f2, %f12		;\
3050	fmuld	%f0, %f2, %f14		;\
3051	faddd	%f0, %f2, %f16		;\
3052	fmuld	%f0, %f2, %f18		;\
3053	faddd	%f0, %f2, %f20		;\
3054	fmuld	%f0, %f2, %f22		;\
3055	faddd	%f0, %f2, %f24		;\
3056	fmuld	%f0, %f2, %f26		;\
3057	faddd	%f0, %f2, %f28		;\
3058	fmuld	%f0, %f2, %f30		;\
3059	faddd	%f0, %f2, %f32		;\
3060	fmuld	%f0, %f2, %f34		;\
3061	faddd	%f0, %f2, %f36		;\
3062	fmuld	%f0, %f2, %f38		;\
3063	faddd	%f0, %f2, %f40		;\
3064	fmuld	%f0, %f2, %f42		;\
3065	faddd	%f0, %f2, %f44		;\
3066	fmuld	%f0, %f2, %f46		;\
3067	faddd	%f0, %f2, %f48		;\
3068	fmuld	%f0, %f2, %f50		;\
3069	faddd	%f0, %f2, %f52		;\
3070	fmuld	%f0, %f2, %f54		;\
3071	faddd	%f0, %f2, %f56		;\
3072	fmuld	%f0, %f2, %f58		;\
3073	faddd	%f0, %f2, %f60		;\
3074	fmuld	%f0, %f2, %f62
3075
3076#include "assym.h"
3077
3078/*
3079 * In the routine below, we check/set FPRS_FEF bit since
3080 * we don't want to take a fp_disabled trap. We need not
3081 * check/set PSTATE_PEF bit as it is done early during boot.
3082 */
3083	ENTRY(big_savefp)
3084	rd	%fprs, %o2
3085	st	%o2, [%o0 + FPU_FPRS]
3086	andcc	%o2, FPRS_FEF, %g0		! is FPRS_FEF set?
3087	bnz,a,pt	%icc, .fregs_save	! yes, go to save
3088	nop
3089	wr	%g0, FPRS_FEF, %fprs		! else, set the bit
3090        stx     %fsr, [%o0 + FPU_FSR]	! store %fsr
3091	retl
3092	nop
3093.fregs_save:
3094	BSTORE_FPREGS(%o0, %o4)
3095        stx     %fsr, [%o0 + FPU_FSR]	! store %fsr
3096	retl
3097	nop
3098	SET_SIZE(big_savefp)
3099
3100
3101	ENTRY(big_restorefp)
3102	ldx     [%o0 + FPU_FSR], %fsr	! restore %fsr
3103	ld	[%o0 + FPU_FPRS], %o1
3104	andcc   %o1, FPRS_FEF, %g0	! is FPRS_FEF set in saved %fprs?
3105	bnz,pt	%icc, .fregs_restore	! yes, go to restore
3106	nop
3107	FZERO				! zero out to avoid leaks
3108	wr	%g0, 0, %fprs
3109	retl
3110	nop
3111.fregs_restore:
3112	BLOAD_FPREGS(%o0, %o2)
3113	wr      %o1, 0, %fprs
3114	retl
3115	nop
3116	SET_SIZE(big_restorefp)
3117