xref: /titanic_50/usr/src/lib/libmvec/common/vis/__vatanf.S (revision 4eab410fb63816fe2c0ad0fd18b4c948613f6616)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vatanf.S"
30
31#include "libm.h"
32
33	RO_DATA
34	.align	64
35
36.CONST_TBL:
37	.word	0x3fefffff, 0xfffccbbc	! K0 =  9.99999999976686608841e-01
38	.word	0xbfd55554, 0x51c6b90f	! K1 = -3.33333091601972730504e-01
39	.word	0x3fc98d6d, 0x926596cc	! K2 =  1.99628540499523379702e-01
40	.word	0x00020000, 0x00000000	! DC1
41	.word	0xfffc0000, 0x00000000	! DC2
42	.word	0x7ff00000, 0x00000000	! DC3
43	.word	0x3ff00000, 0x00000000	! DONE = 1.0
44	.word	0x40000000, 0x00000000	! DTWO = 2.0
45
46! parr0 = *(int*)&(1.0 / *(double*)&(((long long)i << 45) | 0x3ff0100000000000ULL)) + 0x3ff00000, i = [0, 127]
47
48	.word	0x7fdfe01f, 0x7fdfa11c, 0x7fdf6310, 0x7fdf25f6
49	.word	0x7fdee9c7, 0x7fdeae80, 0x7fde741a, 0x7fde3a91
50	.word	0x7fde01e0, 0x7fddca01, 0x7fdd92f2, 0x7fdd5cac
51	.word	0x7fdd272c, 0x7fdcf26e, 0x7fdcbe6d, 0x7fdc8b26
52	.word	0x7fdc5894, 0x7fdc26b5, 0x7fdbf583, 0x7fdbc4fd
53	.word	0x7fdb951e, 0x7fdb65e2, 0x7fdb3748, 0x7fdb094b
54	.word	0x7fdadbe8, 0x7fdaaf1d, 0x7fda82e6, 0x7fda5741
55	.word	0x7fda2c2a, 0x7fda01a0, 0x7fd9d79f, 0x7fd9ae24
56	.word	0x7fd9852f, 0x7fd95cbb, 0x7fd934c6, 0x7fd90d4f
57	.word	0x7fd8e652, 0x7fd8bfce, 0x7fd899c0, 0x7fd87427
58	.word	0x7fd84f00, 0x7fd82a4a, 0x7fd80601, 0x7fd7e225
59	.word	0x7fd7beb3, 0x7fd79baa, 0x7fd77908, 0x7fd756ca
60	.word	0x7fd734f0, 0x7fd71378, 0x7fd6f260, 0x7fd6d1a6
61	.word	0x7fd6b149, 0x7fd69147, 0x7fd6719f, 0x7fd6524f
62	.word	0x7fd63356, 0x7fd614b3, 0x7fd5f664, 0x7fd5d867
63	.word	0x7fd5babc, 0x7fd59d61, 0x7fd58056, 0x7fd56397
64	.word	0x7fd54725, 0x7fd52aff, 0x7fd50f22, 0x7fd4f38f
65	.word	0x7fd4d843, 0x7fd4bd3e, 0x7fd4a27f, 0x7fd48805
66	.word	0x7fd46dce, 0x7fd453d9, 0x7fd43a27, 0x7fd420b5
67	.word	0x7fd40782, 0x7fd3ee8f, 0x7fd3d5d9, 0x7fd3bd60
68	.word	0x7fd3a524, 0x7fd38d22, 0x7fd3755b, 0x7fd35dce
69	.word	0x7fd34679, 0x7fd32f5c, 0x7fd31877, 0x7fd301c8
70	.word	0x7fd2eb4e, 0x7fd2d50a, 0x7fd2bef9, 0x7fd2a91c
71	.word	0x7fd29372, 0x7fd27dfa, 0x7fd268b3, 0x7fd2539d
72	.word	0x7fd23eb7, 0x7fd22a01, 0x7fd21579, 0x7fd20120
73	.word	0x7fd1ecf4, 0x7fd1d8f5, 0x7fd1c522, 0x7fd1b17c
74	.word	0x7fd19e01, 0x7fd18ab0, 0x7fd1778a, 0x7fd1648d
75	.word	0x7fd151b9, 0x7fd13f0e, 0x7fd12c8b, 0x7fd11a30
76	.word	0x7fd107fb, 0x7fd0f5ed, 0x7fd0e406, 0x7fd0d244
77	.word	0x7fd0c0a7, 0x7fd0af2f, 0x7fd09ddb, 0x7fd08cab
78	.word	0x7fd07b9f, 0x7fd06ab5, 0x7fd059ee, 0x7fd04949
79	.word	0x7fd038c6, 0x7fd02864, 0x7fd01824, 0x7fd00804
80
81	.word	0x3ff00000, 0x00000000	!  1.0
82	.word	0xbff00000, 0x00000000	! -1.0
83
84! parr1[i] = atan((double)*(float*)&((i + 460) << 21)), i = [0, 155]
85
86	.word	0x3f2fffff, 0xf555555c, 0x3f33ffff, 0xf595555f
87	.word	0x3f37ffff, 0xee000018, 0x3f3bffff, 0xe36aaadf
88	.word	0x3f3fffff, 0xd55555bc, 0x3f43ffff, 0xd65555f2
89	.word	0x3f47ffff, 0xb8000185, 0x3f4bffff, 0x8daaadf3
90	.word	0x3f4fffff, 0x55555bbc, 0x3f53ffff, 0x59555f19
91	.word	0x3f57fffe, 0xe000184d, 0x3f5bfffe, 0x36aadf30
92	.word	0x3f5ffffd, 0x5555bbbc, 0x3f63fffd, 0x6555f195
93	.word	0x3f67fffb, 0x800184cc, 0x3f6bfff8, 0xdaadf302
94	.word	0x3f6ffff5, 0x555bbbb7, 0x3f73fff5, 0x955f194a
95	.word	0x3f77ffee, 0x00184ca6, 0x3f7bffe3, 0x6adf2fd1
96	.word	0x3f7fffd5, 0x55bbba97, 0x3f83ffd6, 0x55f1929c
97	.word	0x3f87ffb8, 0x0184c30a, 0x3f8bff8d, 0xadf2e78c
98	.word	0x3f8fff55, 0x5bbb729b, 0x3f93ff59, 0x5f18a700
99	.word	0x3f97fee0, 0x184a5c36, 0x3f9bfe36, 0xdf291712
100	.word	0x3f9ffd55, 0xbba97625, 0x3fa3fd65, 0xf169c9d9
101	.word	0x3fa7fb81, 0x8430da2a, 0x3fabf8dd, 0xf139c444
102	.word	0x3faff55b, 0xb72cfdea, 0x3fb3f59f, 0x0e7c559d
103	.word	0x3fb7ee18, 0x2602f10f, 0x3fbbe39e, 0xbe6f07c4
104	.word	0x3fbfd5ba, 0x9aac2f6e, 0x3fc3d6ee, 0xe8c6626c
105	.word	0x3fc7b97b, 0x4bce5b02, 0x3fcb90d7, 0x529260a2
106	.word	0x3fcf5b75, 0xf92c80dd, 0x3fd36277, 0x3707ebcc
107	.word	0x3fd6f619, 0x41e4def1, 0x3fda64ee, 0xc3cc23fd
108	.word	0x3fddac67, 0x0561bb4f, 0x3fe1e00b, 0xabdefeb4
109	.word	0x3fe4978f, 0xa3269ee1, 0x3fe700a7, 0xc5784634
110	.word	0x3fe921fb, 0x54442d18, 0x3fecac7c, 0x57846f9e
111	.word	0x3fef730b, 0xd281f69b, 0x3ff0d38f, 0x2c5ba09f
112	.word	0x3ff1b6e1, 0x92ebbe44, 0x3ff30b6d, 0x796a4da8
113	.word	0x3ff3fc17, 0x6b7a8560, 0x3ff4ae10, 0xfc6589a5
114	.word	0x3ff5368c, 0x951e9cfd, 0x3ff5f973, 0x15254857
115	.word	0x3ff67d88, 0x63bc99bd, 0x3ff6dcc5, 0x7bb565fd
116	.word	0x3ff7249f, 0xaa996a21, 0x3ff789bd, 0x2c160054
117	.word	0x3ff7cd6f, 0x6dc59db4, 0x3ff7fde8, 0x0870c2a0
118	.word	0x3ff82250, 0x768ac529, 0x3ff8555a, 0x2787981f
119	.word	0x3ff87769, 0xeb8e956b, 0x3ff88fc2, 0x18ace9dc
120	.word	0x3ff8a205, 0xfd558740, 0x3ff8bb9a, 0x63718f45
121	.word	0x3ff8cca9, 0x27cf0b3d, 0x3ff8d8d8, 0xbf65316f
122	.word	0x3ff8e1fc, 0xa98cb633, 0x3ff8eec8, 0xcfd00665
123	.word	0x3ff8f751, 0x0eba96e6, 0x3ff8fd69, 0x4acf36b0
124	.word	0x3ff901fb, 0x7eee715e, 0x3ff90861, 0xd082d9b5
125	.word	0x3ff90ca6, 0x0b9322c5, 0x3ff90fb2, 0x37a7ea27
126	.word	0x3ff911fb, 0x59997f3a, 0x3ff9152e, 0x8a326c38
127	.word	0x3ff91750, 0xab2e0d12, 0x3ff918d6, 0xc2f9c9e2
128	.word	0x3ff919fb, 0x54eed7a9, 0x3ff91b94, 0xee352849
129	.word	0x3ff91ca5, 0xff216922, 0x3ff91d69, 0x0b3f72ff
130	.word	0x3ff91dfb, 0x5459826d, 0x3ff91ec8, 0x211be619
131	.word	0x3ff91f50, 0xa99fd49a, 0x3ff91fb2, 0x2fb5defa
132	.word	0x3ff91ffb, 0x5446d7c3, 0x3ff92061, 0xbaabf105
133	.word	0x3ff920a5, 0xfeefa208, 0x3ff920d6, 0xc1fb87e7
134	.word	0x3ff920fb, 0x5444826e, 0x3ff9212e, 0x87778bfc
135	.word	0x3ff92150, 0xa9999bb6, 0x3ff92169, 0x0b1faabb
136	.word	0x3ff9217b, 0x544437c3, 0x3ff92194, 0xedddcc28
137	.word	0x3ff921a5, 0xfeeedaec, 0x3ff921b2, 0x2fb1e5f1
138	.word	0x3ff921bb, 0x54442e6e, 0x3ff921c8, 0x2110fa94
139	.word	0x3ff921d0, 0xa99982d3, 0x3ff921d6, 0xc1fb08c6
140	.word	0x3ff921db, 0x54442d43, 0x3ff921e1, 0xbaaa9395
141	.word	0x3ff921e5, 0xfeeed7d0, 0x3ff921e9, 0x0b1f9ad7
142	.word	0x3ff921eb, 0x54442d1e, 0x3ff921ee, 0x8777604e
143	.word	0x3ff921f0, 0xa999826f, 0x3ff921f2, 0x2fb1e3f5
144	.word	0x3ff921f3, 0x54442d19, 0x3ff921f4, 0xedddc6b2
145	.word	0x3ff921f5, 0xfeeed7c3, 0x3ff921f6, 0xc1fb0886
146	.word	0x3ff921f7, 0x54442d18, 0x3ff921f8, 0x2110f9e5
147	.word	0x3ff921f8, 0xa999826e, 0x3ff921f9, 0x0b1f9acf
148	.word	0x3ff921f9, 0x54442d18, 0x3ff921f9, 0xbaaa937f
149	.word	0x3ff921f9, 0xfeeed7c3, 0x3ff921fa, 0x2fb1e3f4
150	.word	0x3ff921fa, 0x54442d18, 0x3ff921fa, 0x8777604b
151	.word	0x3ff921fa, 0xa999826e, 0x3ff921fa, 0xc1fb0886
152	.word	0x3ff921fa, 0xd4442d18, 0x3ff921fa, 0xedddc6b2
153	.word	0x3ff921fa, 0xfeeed7c3, 0x3ff921fb, 0x0b1f9acf
154	.word	0x3ff921fb, 0x14442d18, 0x3ff921fb, 0x2110f9e5
155	.word	0x3ff921fb, 0x2999826e, 0x3ff921fb, 0x2fb1e3f4
156	.word	0x3ff921fb, 0x34442d18, 0x3ff921fb, 0x3aaa937f
157	.word	0x3ff921fb, 0x3eeed7c3, 0x3ff921fb, 0x41fb0886
158	.word	0x3ff921fb, 0x44442d18, 0x3ff921fb, 0x4777604b
159	.word	0x3ff921fb, 0x4999826e, 0x3ff921fb, 0x4b1f9acf
160	.word	0x3ff921fb, 0x4c442d18, 0x3ff921fb, 0x4dddc6b2
161	.word	0x3ff921fb, 0x4eeed7c3, 0x3ff921fb, 0x4fb1e3f4
162	.word	0x3ff921fb, 0x50442d18, 0x3ff921fb, 0x5110f9e5
163	.word	0x3ff921fb, 0x5199826e, 0x3ff921fb, 0x51fb0886
164
165#define DC2		%f2
166#define DTWO		%f6
167#define DONE		%f52
168#define K0		%f54
169#define K1		%f56
170#define K2		%f58
171#define DC1		%f60
172#define DC3		%f62
173
174#define stridex		%o2
175#define stridey		%o3
176#define MASK_0x7fffffff	%i1
177#define MASK_0x100000	%i5
178
179#define tmp_px		STACK_BIAS-32
180#define tmp_counter	STACK_BIAS-24
181#define tmp0		STACK_BIAS-16
182#define tmp1		STACK_BIAS-8
183
184#define counter		%l1
185
186! sizeof temp storage - must be a multiple of 16 for V9
187#define tmps		0x20
188
189!--------------------------------------------------------------------
190!		!!!!!	vatanf algorithm	!!!!!
191!  ux = ((int*)px)[0];
192!  ax = ux & 0x7fffffff;
193!
194!  if ( ax < 0x39b89c55 )
195!  {
196!    *(int*)py = ux;
197!    goto next;
198!  }
199!
200!  if ( ax > 0x4c700518 )
201!  {
202!    if ( ax > 0x7f800000 )
203!    {
204!      float fpx = fabsf(*px);
205!      fpx *= fpx;
206!      *py = fpx;
207!      goto next;
208!    }
209!
210!    sign = ux & 0x80000000;
211!    sign |= pi_2;
212!    *(int*)py = sign;
213!    goto next;
214!  }
215!
216!  ftmp0 = *px;
217!  x = (double)ftmp0;
218!  px += stridex;
219!  y = vis_fpadd32(x,DC1);
220!  y = vis_fand(y,DC2);
221!  div = x * y;
222!  xx = x - y;
223!  div += DONE;
224!  i = ((unsigned long long*)&div)[0];
225!  y0 = vis_fand(div,DC3);
226!  i >>= 43;
227!  i &= 508;
228!  *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
229!  y0 = vis_fpsub32(dtmp0, y0);
230!  dtmp0 = div0 * y0;
231!  dtmp0 = DTWO - dtmp0;
232!  y0 *= dtmp0;
233!  dtmp1 = div0 * y0;
234!  dtmp1 = DTWO - dtmp1;
235!  y0 *= dtmp1;
236!  ax = ux & 0x7fffffff;
237!  ax += 0x00100000;
238!  ax >>= 18;
239!  ax &= -8;
240!  res = *(double*)((char*)parr1 + ax);
241!  ux >>= 28;
242!  ux &= -8;
243!  dtmp0 = *(double*)((char*)sign_arr + ux);
244!  res *= dtmp0;
245!  xx *= y0;
246!  x2 = xx * xx;
247!  dtmp0 = K2 * x2;
248!  dtmp0 += K1;
249!  dtmp0 *= x2;
250!  dtmp0 += K0;
251!  dtmp0 *= xx;
252!  res += dtmp0;
253!  ftmp0 = (float)res;
254!  py[0] = ftmp0;
255!  py += stridey;
256!--------------------------------------------------------------------
257
258	ENTRY(__vatanf)
259	save	%sp,-SA(MINFRAME)-tmps,%sp
260	PIC_SETUP(l7)
261	PIC_SET(l7,.CONST_TBL,l2)
262
263	st	%i0,[%fp+tmp_counter]
264
265	sllx	%i2,2,stridex
266	sllx	%i4,2,stridey
267
268	or	%g0,%i3,%o1
269	stx	%i1,[%fp+tmp_px]
270
271	ldd	[%l2],K0
272	ldd	[%l2+8],K1
273	ldd	[%l2+16],K2
274	ldd	[%l2+24],DC1
275	ldd	[%l2+32],DC2
276	ldd	[%l2+40],DC3
277	ldd	[%l2+48],DONE
278	ldd	[%l2+56],DTWO
279
280	add	%l2,64,%i4
281	add	%l2,64+512,%l0
282	add	%l2,64+512+16-0x1cc*8,%l7
283
284	sethi	%hi(0x100000),MASK_0x100000
285	sethi	%hi(0x7ffffc00),MASK_0x7fffffff
286	add	MASK_0x7fffffff,1023,MASK_0x7fffffff
287
288	sethi	%hi(0x39b89c00),%o4
289	add	%o4,0x55,%o4
290	sethi	%hi(0x4c700400),%o5
291	add	%o5,0x118,%o5
292
293.begin:
294	ld	[%fp+tmp_counter],counter
295	ldx	[%fp+tmp_px],%i3
296	st	%g0,[%fp+tmp_counter]
297.begin1:
298	cmp	counter,0
299	ble,pn	%icc,.exit
300	nop
301
302	lda	[%i3]0x82,%l6		! (0_0) ux = ((int*)px)[0];
303
304	and	%l6,MASK_0x7fffffff,%l5	! (0_0) ax = ux & 0x7fffffff;
305	lda	[%i3]0x82,%f0		! (0_0) ftmp0 = *px;
306
307	cmp	%l5,%o4			! (0_0) ax ? 0x39b89c55
308	bl,pn	%icc,.spec0		! (0_0) if ( ax < 0x39b89c55 )
309	nop
310
311	cmp	%l5,%o5			! (0_0) ax ? 0x4c700518
312	bg,pn	%icc,.spec1		! (0_0) if ( ax > 0x4c700518 )
313	nop
314
315	add	%i3,stridex,%l5		! px += stridex;
316	fstod	%f0,%f22		! (0_0) ftmp0 = *px;
317	mov	%l6,%i3
318
319	lda	[%l5]0x82,%l6		! (1_0) ux = ((int*)px)[0];
320
321	and	%l6,MASK_0x7fffffff,%o7	! (1_0) ax = ux & 0x7fffffff;
322	lda	[%l5]0x82,%f0		! (1_0) ftmp0 = *px;
323	add	%l5,stridex,%l4		! px += stridex;
324	fpadd32	%f22,DC1,%f24		! (0_0) y = vis_fpadd32(x,dconst1);
325
326	cmp	%o7,%o4			! (1_0) ax ? 0x39b89c55
327	bl,pn	%icc,.update0		! (1_0) if ( ax < 0x39b89c55 )
328	nop
329.cont0:
330	cmp	%o7,%o5			! (1_0) ax ? 0x4c700518
331	bg,pn	%icc,.update1		! (1_0) if ( ax > 0x4c700518 )
332	nop
333.cont1:
334	fstod	%f0,%f20		! (1_0) x = (double)ftmp0;
335	mov	%l6,%l5
336
337	fand	%f24,DC2,%f26		! (0_0) y = vis_fand(y,dconst2);
338
339	fmuld	%f22,%f26,%f32		! (0_0) div = x * y;
340
341	lda	[%l4]0x82,%l6		! (2_0) ux = ((int*)px)[0];
342	fsubd	%f22,%f26,%f22		! (0_0) xx = x - y;
343
344	and	%l6,MASK_0x7fffffff,%o7	! (2_0) ax = ux & 0x7fffffff;
345	lda	[%l4]0x82,%f0		! (2_0) ftmp0 = *px;
346	add	%l4,stridex,%l3		! px += stridex;
347	fpadd32	%f20,DC1,%f24		! (1_0) y = vis_fpadd32(x,dconst1);
348
349	cmp	%o7,%o4			! (2_0) ax ? 0x39b89c55
350	bl,pn	%icc,.update2		! (2_0) if ( ax < 0x39b89c55 )
351	faddd	DONE,%f32,%f32		! (0_0) div += done;
352.cont2:
353	cmp	%o7,%o5			! (2_0) ax ? 0x4c700518
354	bg,pn	%icc,.update3		! (2_0) if ( ax > 0x4c700518 )
355	nop
356.cont3:
357	std	%f32,[%fp+tmp0]		! (0_0) i = ((unsigned long long*)&div)[0];
358	mov	%l6,%l4
359	fstod	%f0,%f18		! (2_0) x = (double)ftmp0;
360
361	fand	%f24,DC2,%f26		! (1_0) y = vis_fand(y,dconst2);
362
363	fmuld	%f20,%f26,%f30		! (1_0) div = x * y;
364
365	lda	[%l3]0x82,%l6		! (3_0) ux = ((int*)px)[0];
366	fsubd	%f20,%f26,%f20		! (1_0) xx = x - y;
367
368	and	%l6,MASK_0x7fffffff,%o7	! (3_0) ax = ux & 0x7fffffff;
369	lda	[%l3]0x82,%f0		! (3_0) ftmp0 = *px;
370	add	%l3,stridex,%i0		! px += stridex;
371	fpadd32	%f18,DC1,%f24		! (2_0) y = vis_fpadd32(x,dconst1);
372
373	cmp	%o7,%o4			! (3_0) ax ? 0x39b89c55
374	bl,pn	%icc,.update4		! (3_0) if ( ax < 0x39b89c55 )
375	faddd	DONE,%f30,%f30		! (1_0) div += done;
376.cont4:
377	cmp	%o7,%o5			! (3_0) ax ? 0x4c700518
378	bg,pn	%icc,.update5		! (3_0) if ( ax > 0x4c700518 )
379	nop
380.cont5:
381	std	%f30,[%fp+tmp1]		! (1_0) i = ((unsigned long long*)&div)[0];
382	mov	%l6,%l3
383	fstod	%f0,%f16		! (3_0) x = (double)ftmp0;
384
385	ldx	[%fp+tmp0],%o0		! (0_0) i = ((unsigned long long*)&div)[0];
386	fand	%f24,DC2,%f26		! (2_0) y = vis_fand(y,dconst2);
387
388	fand	%f32,DC3,%f24		! (0_0) y0 = vis_fand(div,dconst3);
389
390	srlx	%o0,43,%o0		! (0_0) i >>= 43;
391
392	and	%o0,508,%l6		! (0_0) i &= 508;
393
394	ld	[%i4+%l6],%f0		! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
395
396	fmuld	%f18,%f26,%f28		! (2_0) div = x * y;
397
398	lda	[%i0]0x82,%l6		! (4_0) ux = ((int*)px)[0];
399	fsubd	%f18,%f26,%f18		! (2_0) xx = x - y;
400
401	fpsub32	%f0,%f24,%f40		! (0_0) y0 = vis_fpsub32(dtmp0, y0);
402
403	and	%l6,MASK_0x7fffffff,%o7	! (4_0) ax = ux & 0x7fffffff;
404	lda	[%i0]0x82,%f0		! (4_0) ftmp0 = *px;
405	add	%i0,stridex,%i2		! px += stridex;
406	fpadd32	%f16,DC1,%f24		! (3_0) y = vis_fpadd32(x,dconst1);
407
408	cmp	%o7,%o4			! (4_0) ax ? 0x39b89c55
409	bl,pn	%icc,.update6		! (4_0) if ( ax < 0x39b89c55 )
410	faddd	DONE,%f28,%f28		! (2_0) div += done;
411.cont6:
412	fmuld	%f32,%f40,%f42		! (0_0) dtmp0 = div0 * y0;
413	cmp	%o7,%o5			! (4_0) ax ? 0x4c700518
414	bg,pn	%icc,.update7		! (4_0) if ( ax > 0x4c700518 )
415	nop
416.cont7:
417	std	%f28,[%fp+tmp0]		! (2_0) i = ((unsigned long long*)&div)[0];
418	mov	%l6,%i0
419	fstod	%f0,%f14		! (4_0) x = (double)ftmp0;
420
421	ldx	[%fp+tmp1],%g1		! (1_0) i = ((unsigned long long*)&div)[0];
422	fand	%f24,DC2,%f26		! (3_0) y = vis_fand(y,dconst2);
423
424	fand	%f30,DC3,%f24		! (1_0) y0 = vis_fand(div,dconst3);
425
426	fsubd	DTWO,%f42,%f44		! (0_0) dtmp0 = dtwo - dtmp0;
427	srlx	%g1,43,%g1		! (1_0) i >>= 43;
428
429	and	%g1,508,%l6		! (1_0) i &= 508;
430
431	ld	[%i4+%l6],%f0		! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
432
433	fmuld	%f16,%f26,%f34		! (3_0) div = x * y;
434
435	lda	[%i2]0x82,%l6		! (5_0) ux = ((int*)px)[0];
436	fsubd	%f16,%f26,%f16		! (3_0) xx = x - y;
437
438	fpsub32	%f0,%f24,%f38		! (1_0) y0 = vis_fpsub32(dtmp0, y0);
439	add	%i2,stridex,%l2		! px += stridex;
440
441	fmuld	%f40,%f44,%f40		! (0_0) y0 *= dtmp0;
442	and	%l6,MASK_0x7fffffff,%o7	! (5_0) ax = ux & 0x7fffffff;
443	lda	[%i2]0x82,%f0		! (5_0) ftmp0 = *px;
444	fpadd32	%f14,DC1,%f24		! (4_0) y = vis_fpadd32(x,dconst1);
445
446	cmp	%o7,%o4			! (5_0) ax ? 0x39b89c55
447	bl,pn	%icc,.update8		! (5_0) if ( ax < 0x39b89c55 )
448	faddd	DONE,%f34,%f34		! (3_0) div += done;
449.cont8:
450	fmuld	%f30,%f38,%f42		! (1_0) dtmp0 = div0 * y0;
451	cmp	%o7,%o5			! (5_0) ax ? 0x4c700518
452	bg,pn	%icc,.update9		! (5_0) if ( ax > 0x4c700518 )
453	nop
454.cont9:
455	std	%f34,[%fp+tmp1]		! (3_0) i = ((unsigned long long*)&div)[0];
456	mov	%l6,%i2
457	fstod	%f0,%f36		! (5_0) x = (double)ftmp0;
458
459	fmuld	%f32,%f40,%f32		! (0_0) dtmp1 = div0 * y0;
460	ldx	[%fp+tmp0],%o0		! (2_0) i = ((unsigned long long*)&div)[0];
461	fand	%f24,DC2,%f26		! (4_0) y = vis_fand(y,dconst2);
462
463	fand	%f28,DC3,%f24		! (2_0) y0 = vis_fand(div,dconst3);
464
465	fsubd	DTWO,%f42,%f44		! (1_0) dtmp0 = dtwo - dtmp0;
466	srlx	%o0,43,%o0		! (2_0) i >>= 43;
467
468	and	%o0,508,%l6		! (2_0) i &= 508;
469	fsubd	DTWO,%f32,%f46		! (0_0) dtmp1 = dtwo - dtmp1;
470
471	ld	[%i4+%l6],%f0		! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
472
473	fmuld	%f14,%f26,%f32		! (4_0) div = x * y;
474
475	lda	[%l2]0x82,%l6		! (6_0) ux = ((int*)px)[0];
476	fsubd	%f14,%f26,%f14		! (4_0) xx = x - y;
477
478	fmuld	%f40,%f46,%f26		! (0_0) y0 *= dtmp1;
479	add	%l2,stridex,%g5		! px += stridex;
480	fpsub32	%f0,%f24,%f40		! (2_0) y0 = vis_fpsub32(dtmp0, y0);
481
482	fmuld	%f38,%f44,%f38		! (1_0) y0 *= dtmp0;
483	and	%l6,MASK_0x7fffffff,%o7	! (6_0) ax = ux & 0x7fffffff;
484	lda	[%l2]0x82,%f0		! (6_0) ftmp0 = *px;
485	fpadd32	%f36,DC1,%f24		! (5_0) y = vis_fpadd32(x,dconst1);
486
487	cmp	%o7,%o4			! (6_0) ax ? 0x39b89c55
488	bl,pn	%icc,.update10		! (6_0) if ( ax < 0x39b89c55 )
489	faddd	DONE,%f32,%f32		! (4_0) div += done;
490.cont10:
491	fmuld	%f28,%f40,%f42		! (2_0) dtmp0 = div0 * y0;
492	cmp	%o7,%o5			! (6_0) ax ? 0x4c700518
493	bg,pn	%icc,.update11		! (6_0) if ( ax > 0x4c700518 )
494	nop
495.cont11:
496	fmuld	%f22,%f26,%f22		! (0_0) xx *= y0;
497	mov	%l6,%l2
498	std	%f32,[%fp+tmp0]		! (4_0) i = ((unsigned long long*)&div)[0];
499	fstod	%f0,%f10		! (6_0) x = (double)ftmp0;
500
501	fmuld	%f30,%f38,%f30		! (1_0) dtmp1 = div0 * y0;
502	ldx	[%fp+tmp1],%g1		! (3_0) i = ((unsigned long long*)&div)[0];
503	fand	%f24,DC2,%f26		! (5_0) y = vis_fand(y,dconst2);
504
505	fand	%f34,DC3,%f24		! (3_0) y0 = vis_fand(div,dconst3);
506
507	fmuld	%f22,%f22,%f50		! (0_0) x2 = xx * xx;
508	srlx	%g1,43,%g1		! (3_0) i >>= 43;
509	fsubd	DTWO,%f42,%f44		! (2_0) dtmp0 = dtwo - dtmp0;
510
511	and	%g1,508,%l6		! (3_0) i &= 508;
512	mov	%i3,%o7
513	fsubd	DTWO,%f30,%f46		! (1_0) dtmp1 = dtwo - dtmp1;
514
515	ld	[%i4+%l6],%f0		! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
516
517	fmuld	%f36,%f26,%f30		! (5_0) div = x * y;
518	srl	%o7,28,%g1		! (0_0) ux >>= 28;
519	add	%g5,stridex,%i3		! px += stridex;
520
521	fmuld	K2,%f50,%f4		! (0_0) dtmp0 = K2 * x2;
522	and	%o7,MASK_0x7fffffff,%o0	! (0_0) ax = ux & 0x7fffffff;
523	lda	[%g5]0x82,%l6		! (7_0) ux = ((int*)px)[0];
524	fsubd	%f36,%f26,%f36		! (5_0) xx = x - y;
525
526	fmuld	%f38,%f46,%f26		! (1_0) y0 *= dtmp1;
527	add	%o0,MASK_0x100000,%o0	! (0_0) ax += 0x00100000;
528	and	%g1,-8,%g1		! (0_0) ux &= -8;
529	fpsub32	%f0,%f24,%f38		! (3_0) y0 = vis_fpsub32(dtmp0, y0);
530
531	fmuld	%f40,%f44,%f40		! (2_0) y0 *= dtmp0;
532	and	%l6,MASK_0x7fffffff,%o7	! (7_0) ax = ux & 0x7fffffff;
533	lda	[%g5]0x82,%f0		! (7_0) ftmp0 = *px;
534	fpadd32	%f10,DC1,%f24		! (6_0) y = vis_fpadd32(x,dconst1);
535
536	cmp	%o7,%o4			! (7_0) ax ? 0x39b89c55
537	bl,pn	%icc,.update12		! (7_0) if ( ax < 0x39b89c55 )
538	faddd	DONE,%f30,%f30		! (5_0) div += done;
539.cont12:
540	fmuld	%f34,%f38,%f42		! (3_0) dtmp0 = div0 * y0;
541	cmp	%o7,%o5			! (7_0) ax ? 0x4c700518
542	bg,pn	%icc,.update13		! (7_0) if ( ax > 0x4c700518 )
543	faddd	%f4,K1,%f4		! (0_0) dtmp0 += K1;
544.cont13:
545	fmuld	%f20,%f26,%f20		! (1_0) xx *= y0;
546	srl	%o0,18,%o7		! (0_0) ax >>= 18;
547	std	%f30,[%fp+tmp1]		! (5_0) i = ((unsigned long long*)&div)[0];
548	fstod	%f0,%f8			! (7_0) x = (double)ftmp0;
549
550	fmuld	%f28,%f40,%f28		! (2_0) dtmp1 = div0 * y0;
551	and	%o7,-8,%o7		! (0_0) ux &= -8;
552	ldx	[%fp+tmp0],%o0		! (4_0) i = ((unsigned long long*)&div)[0];
553	fand	%f24,DC2,%f26		! (6_0) y = vis_fand(y,dconst2);
554
555	add	%o7,%l7,%o7		! (0_0) (char*)parr1 + ax;
556	mov	%l6,%g5
557	ldd	[%l0+%g1],%f48		! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux);
558
559	fmuld	%f4,%f50,%f4		! (0_0) dtmp0 *= x2;
560	srlx	%o0,43,%o0		! (4_0) i >>= 43;
561	ldd	[%o7],%f0		! (0_0) res = *(double*)((char*)parr1 + ax);
562	fand	%f32,DC3,%f24		! (4_0) y0 = vis_fand(div,dconst3);
563
564	fmuld	%f20,%f20,%f50		! (1_0) x2 = xx * xx;
565	and	%o0,508,%l6		! (4_0) i &= 508;
566	mov	%l5,%o7
567	fsubd	DTWO,%f42,%f44		! (3_0) dtmp0 = dtwo - dtmp0;
568
569	fsubd	DTWO,%f28,%f46		! (2_0) dtmp1 = dtwo - dtmp1;
570
571	fmuld	%f0,%f48,%f48		! (0_0) res *= dtmp0;
572	srl	%o7,28,%l5		! (1_0) ux >>= 28;
573	ld	[%i4+%l6],%f0		! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
574
575	fmuld	%f10,%f26,%f28		! (6_0) div = x * y;
576	faddd	%f4,K0,%f42		! (0_0) dtmp0 += K0;
577
578	subcc	counter,8,counter
579	bneg,pn	%icc,.tail
580	or	%g0,%o1,%o0
581
582	add	%fp,tmp0,%g1
583	lda	[%i3]0x82,%l6		! (0_0) ux = ((int*)px)[0];
584
585	ba	.main_loop
586	add	%i3,stridex,%l5		! px += stridex;
587
588	.align	16
589.main_loop:
590	fsubd	%f10,%f26,%f10		! (6_1) xx = x - y;
591	and	%o7,MASK_0x7fffffff,%o1	! (1_1) ax = ux & 0x7fffffff;
592	st	%f12,[%g1]		! (7_1) py[0] = ftmp0;
593	fmuld	K2,%f50,%f4		! (1_1) dtmp0 = K2 * x2;
594
595	fmuld	%f40,%f46,%f26		! (2_1) y0 *= dtmp1;
596	srl	%o7,28,%o7		! (1_0) ux >>= 28;
597	add	%o1,MASK_0x100000,%g1	! (1_1) ax += 0x00100000;
598	fpsub32	%f0,%f24,%f40		! (4_1) y0 = vis_fpsub32(dtmp0, y0);
599
600	fmuld	%f38,%f44,%f38		! (3_1) y0 *= dtmp0;
601	and	%l6,MASK_0x7fffffff,%o1	! (0_0) ax = ux & 0x7fffffff;
602	lda	[%i3]0x82,%f0		! (0_0) ftmp0 = *px;
603	fpadd32	%f8,DC1,%f24		! (7_1) y = vis_fpadd32(x,dconst1);
604
605	fmuld	%f42,%f22,%f44		! (0_1) dtmp0 *= xx;
606	cmp	%o1,%o4			! (0_0) ax ? 0x39b89c55
607	bl,pn	%icc,.update14		! (0_0) if ( ax < 0x39b89c55 )
608	faddd	DONE,%f28,%f28		! (6_1) div += done;
609.cont14:
610	fmuld	%f32,%f40,%f42		! (4_1) dtmp0 = div0 * y0;
611	cmp	%o1,%o5			! (0_0) ax ? 0x4c700518
612	bg,pn	%icc,.update15		! (0_0) if ( ax > 0x4c700518 )
613	faddd	%f4,K1,%f4		! (1_1) dtmp0 += K1;
614.cont15:
615	fmuld	%f18,%f26,%f18		! (2_1) xx *= y0;
616	srl	%g1,18,%o1		! (1_1) ax >>= 18;
617	std	%f28,[%fp+tmp0]		! (6_1) i = ((unsigned long long*)&div)[0];
618	fstod	%f0,%f22		! (0_0) ftmp0 = *px;
619
620	fmuld	%f34,%f38,%f34		! (3_1) dtmp1 = div0 * y0;
621	and	%o1,-8,%o1		! (1_1) ax &= -8;
622	ldx	[%fp+tmp1],%g1		! (5_1) i = ((unsigned long long*)&div)[0];
623	fand	%f24,DC2,%f26		! (7_1) y = vis_fand(y,dconst2);
624
625	ldd	[%o1+%l7],%f0		! (1_1) res = *(double*)((char*)parr1 + ax);
626	and	%o7,-8,%o7		! (1_1) ux &= -8;
627	mov	%l6,%i3
628	faddd	%f48,%f44,%f12		! (0_1) res += dtmp0;
629
630	fmuld	%f4,%f50,%f4		! (1_1) dtmp0 *= x2;
631	nop
632	ldd	[%l0+%o7],%f48		! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux);
633	fand	%f30,DC3,%f24		! (5_1) y0 = vis_fand(div,dconst3);
634
635	fmuld	%f18,%f18,%f50		! (2_1) x2 = xx * xx;
636	srlx	%g1,43,%g1		! (5_1) i >>= 43;
637	mov	%l4,%o7
638	fsubd	DTWO,%f42,%f44		! (4_1) dtmp0 = dtwo - dtmp0;
639
640	and	%g1,508,%l6		! (5_1) i &= 508;
641	nop
642	bn,pn	%icc,.exit
643	fsubd	DTWO,%f34,%f46		! (3_1) dtmp1 = dtwo - dtmp1;
644
645	fmuld	%f0,%f48,%f48		! (1_1) res *= dtmp0;
646	add	%o0,stridey,%g1		! py += stridey;
647	ld	[%i4+%l6],%f0		! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
648	fdtos	%f12,%f12		! (0_1) ftmp0 = (float)res;
649
650	fmuld	%f8,%f26,%f34		! (7_1) div = x * y;
651	srl	%o7,28,%o1		! (2_1) ux >>= 28;
652	lda	[%l5]0x82,%l6		! (1_0) ux = ((int*)px)[0];
653	faddd	%f4,K0,%f42		! (1_1) dtmp0 += K0;
654
655	fmuld	K2,%f50,%f4		! (2_1) dtmp0 = K2 * x2;
656	and	%o7,MASK_0x7fffffff,%o7	! (2_1) ax = ux & 0x7fffffff;
657	st	%f12,[%o0]		! (0_1) py[0] = ftmp0;
658	fsubd	%f8,%f26,%f8		! (7_1) xx = x - y;
659
660	fmuld	%f38,%f46,%f26		! (3_1) y0 *= dtmp1;
661	add	%l5,stridex,%l4		! px += stridex;
662	add	%o7,MASK_0x100000,%o0	! (2_1) ax += 0x00100000;
663	fpsub32	%f0,%f24,%f38		! (5_1) y0 = vis_fpsub32(dtmp0, y0);
664
665	fmuld	%f40,%f44,%f40		! (4_1) y0 *= dtmp0;
666	and	%l6,MASK_0x7fffffff,%o7	! (1_0) ax = ux & 0x7fffffff;
667	lda	[%l5]0x82,%f0		! (1_0) ftmp0 = *px;
668	fpadd32	%f22,DC1,%f24		! (0_0) y = vis_fpadd32(x,dconst1);
669
670	fmuld	%f42,%f20,%f44		! (1_1) dtmp0 *= xx;
671	cmp	%o7,%o4			! (1_0) ax ? 0x39b89c55
672	bl,pn	%icc,.update16		! (1_0) if ( ax < 0x39b89c55 )
673	faddd	DONE,%f34,%f34		! (7_1) div += done;
674.cont16:
675	fmuld	%f30,%f38,%f42		! (5_1) dtmp0 = div0 * y0;
676	cmp	%o7,%o5			! (1_0) ax ? 0x4c700518
677	bg,pn	%icc,.update17		! (1_0) if ( ax > 0x4c700518 )
678	faddd	%f4,K1,%f4		! (2_1) dtmp0 += K1;
679.cont17:
680	fmuld	%f16,%f26,%f16		! (3_1) xx *= y0;
681	srl	%o0,18,%o7		! (2_1) ax >>= 18;
682	std	%f34,[%fp+tmp1]		! (7_1) i = ((unsigned long long*)&div)[0];
683	fstod	%f0,%f20		! (1_0) x = (double)ftmp0;
684
685	fmuld	%f32,%f40,%f32		! (4_1) dtmp1 = div0 * y0;
686	ldx	[%fp+tmp0],%o0		! (6_1) i = ((unsigned long long*)&div)[0];
687	and	%o1,-8,%o1		! (2_1) ux &= -8;
688	fand	%f24,DC2,%f26		! (0_0) y = vis_fand(y,dconst2);
689
690	faddd	%f48,%f44,%f12		! (1_1) res += dtmp0;
691	and	%o7,-8,%o7		! (2_1) ax &= -8;
692	ldd	[%l0+%o1],%f48		! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux);
693	bn,pn	%icc,.exit
694
695	ldd	[%o7+%l7],%f0		! (2_1) res = *(double*)((char*)parr1 + ax);
696	mov	%l6,%l5
697	fmuld	%f4,%f50,%f4		! (2_1) dtmp0 *= x2;
698	fand	%f28,DC3,%f24		! (6_1) y0 = vis_fand(div,dconst3);
699
700	fmuld	%f16,%f16,%f50		! (3_1) x2 = xx * xx;
701	srlx	%o0,43,%o0		! (6_1) i >>= 43;
702	mov	%l3,%o7
703	fsubd	DTWO,%f42,%f44		! (5_1) dtmp0 = dtwo - dtmp0;
704
705	and	%o0,508,%l6		! (6_1) i &= 508;
706	add	%l4,stridex,%l3		! px += stridex;
707	bn,pn	%icc,.exit
708	fsubd	DTWO,%f32,%f46		! (4_1) dtmp1 = dtwo - dtmp1;
709
710	fmuld	%f0,%f48,%f48		! (2_1) res *= dtmp0;
711	add	%g1,stridey,%o0		! py += stridey;
712	ld	[%i4+%l6],%f0		! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
713	fdtos	%f12,%f12		! (1_1) ftmp0 = (float)res;
714
715	fmuld	%f22,%f26,%f32		! (0_0) div = x * y;
716	srl	%o7,28,%o1		! (3_1) ux >>= 28;
717	lda	[%l4]0x82,%l6		! (2_0) ux = ((int*)px)[0];
718	faddd	%f4,K0,%f42		! (2_1) dtmp0 += K0;
719
720	fmuld	K2,%f50,%f4		! (3_1) dtmp0 = K2 * x2;
721	and	%o7,MASK_0x7fffffff,%o7	! (3_1) ax = ux & 0x7fffffff;
722	st	%f12,[%g1]		! (1_1) py[0] = ftmp0;
723	fsubd	%f22,%f26,%f22		! (0_0) xx = x - y;
724
725	fmuld	%f40,%f46,%f26		! (4_1) y0 *= dtmp1;
726	add	%o7,MASK_0x100000,%g1	! (3_1) ax += 0x00100000;
727	and	%o1,-8,%o1		! (3_1) ux &= -8;
728	fpsub32	%f0,%f24,%f40		! (6_1) y0 = vis_fpsub32(dtmp0, y0);
729
730	fmuld	%f38,%f44,%f38		! (5_1) y0 *= dtmp0;
731	and	%l6,MASK_0x7fffffff,%o7	! (2_0) ax = ux & 0x7fffffff;
732	lda	[%l4]0x82,%f0		! (2_0) ftmp0 = *px;
733	fpadd32	%f20,DC1,%f24		! (1_0) y = vis_fpadd32(x,dconst1);
734
735	fmuld	%f42,%f18,%f44		! (2_1) dtmp0 *= xx;
736	cmp	%o7,%o4			! (2_0) ax ? 0x39b89c55
737	bl,pn	%icc,.update18		! (2_0) if ( ax < 0x39b89c55 )
738	faddd	DONE,%f32,%f32		! (0_0) div += done;
739.cont18:
740	fmuld	%f28,%f40,%f42		! (6_1) dtmp0 = div0 * y0;
741	cmp	%o7,%o5			! (2_0) ax ? 0x4c700518
742	bg,pn	%icc,.update19		! (2_0) if ( ax > 0x4c700518 )
743	faddd	%f4,K1,%f4		! (3_1) dtmp0 += K1;
744.cont19:
745	fmuld	%f14,%f26,%f14		! (4_1) xx *= y0;
746	srl	%g1,18,%o7		! (3_1) ax >>= 18;
747	std	%f32,[%fp+tmp0]		! (0_0) i = ((unsigned long long*)&div)[0];
748	fstod	%f0,%f18		! (2_0) x = (double)ftmp0;
749
750	fmuld	%f30,%f38,%f30		! (5_1) dtmp1 = div0 * y0;
751	and	%o7,-8,%o7		! (3_1) ax &= -8;
752	ldx	[%fp+tmp1],%g1		! (7_1) i = ((unsigned long long*)&div)[0];
753	fand	%f24,DC2,%f26		! (1_0) y = vis_fand(y,dconst2);
754
755	faddd	%f48,%f44,%f12		! (2_1) res += dtmp0;
756	mov	%l6,%l4
757	ldd	[%l0+%o1],%f48		! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux);
758	bn,pn	%icc,.exit
759
760	fmuld	%f4,%f50,%f4		! (3_1) dtmp0 *= x2;
761	ldd	[%o7+%l7],%f0		! (3_1) res = *(double*)((char*)parr1 + ax)
762	nop
763	fand	%f34,DC3,%f24		! (7_1) y0 = vis_fand(div,dconst3);
764
765	fmuld	%f14,%f14,%f50		! (4_1) x2 = xx * xx;
766	srlx	%g1,43,%g1		! (7_1) i >>= 43;
767	mov	%i0,%o7
768	fsubd	DTWO,%f42,%f44		! (6_1) dtmp0 = dtwo - dtmp0;
769
770	and	%g1,508,%l6		! (7_1) i &= 508;
771	add	%l3,stridex,%i0		! px += stridex;
772	bn,pn	%icc,.exit
773	fsubd	DTWO,%f30,%f46		! (5_1) dtmp1 = dtwo - dtmp1;
774
775	fmuld	%f0,%f48,%f48		! (3_1) res *= dtmp0;
776	add	%o0,stridey,%g1		! py += stridey;
777	ld	[%i4+%l6],%f0		! (7_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
778	fdtos	%f12,%f12		! (2_1) ftmp0 = (float)res;
779
780	fmuld	%f20,%f26,%f30		! (1_0) div = x * y;
781	srl	%o7,28,%o1		! (4_1) ux >>= 28;
782	lda	[%l3]0x82,%l6		! (3_0) ux = ((int*)px)[0];
783	faddd	%f4,K0,%f42		! (3_1) dtmp0 += K0;
784
785	fmuld	K2,%f50,%f4		! (4_1) dtmp0 = K2 * x2;
786	and	%o7,MASK_0x7fffffff,%o7	! (4_1) ax = ux & 0x7fffffff;
787	st	%f12,[%o0]		! (2_1) py[0] = ftmp0;
788	fsubd	%f20,%f26,%f20		! (1_0) xx = x - y;
789
790	fmuld	%f38,%f46,%f26		! (5_1) y0 *= dtmp1;
791	add	%o7,MASK_0x100000,%o0	! (4_1) ax += 0x00100000;
792	and	%o1,-8,%o1		! (4_1) ux &= -8;
793	fpsub32	%f0,%f24,%f38		! (7_1) y0 = vis_fpsub32(dtmp0, y0);
794
795	fmuld	%f40,%f44,%f40		! (6_1) y0 *= dtmp0;
796	and	%l6,MASK_0x7fffffff,%o7	! (3_0) ax = ux & 0x7fffffff;
797	lda	[%l3]0x82,%f0		! (3_0) ftmp0 = *px;
798	fpadd32	%f18,DC1,%f24		! (2_0) y = vis_fpadd32(x,dconst1);
799
800	fmuld	%f42,%f16,%f44		! (3_1) dtmp0 *= xx;
801	cmp	%o7,%o4			! (3_0) ax ? 0x39b89c55
802	bl,pn	%icc,.update20		! (3_0) if ( ax < 0x39b89c55 )
803	faddd	DONE,%f30,%f30		! (1_0) div += done;
804.cont20:
805	fmuld	%f34,%f38,%f42		! (7_1) dtmp0 = div0 * y0;
806	cmp	%o7,%o5			! (3_0) ax ? 0x4c700518
807	bg,pn	%icc,.update21		! (3_0) if ( ax > 0x4c700518 )
808	faddd	%f4,K1,%f4		! (4_1) dtmp0 += K1;
809.cont21:
810	fmuld	%f36,%f26,%f36		! (5_1) xx *= y0;
811	srl	%o0,18,%o7		! (4_1) ax >>= 18;
812	std	%f30,[%fp+tmp1]		! (1_0) i = ((unsigned long long*)&div)[0];
813	fstod	%f0,%f16		! (3_0) x = (double)ftmp0;
814
815	fmuld	%f28,%f40,%f28		! (6_1) dtmp1 = div0 * y0;
816	and	%o7,-8,%o7		! (4_1) ax &= -8;
817	ldx	[%fp+tmp0],%o0		! (0_0) i = ((unsigned long long*)&div)[0];
818	fand	%f24,DC2,%f26		! (2_0) y = vis_fand(y,dconst2);
819
820	faddd	%f48,%f44,%f12		! (3_1) res += dtmp0;
821	nop
822	ldd	[%l0+%o1],%f48		! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux);
823	bn,pn	%icc,.exit
824
825	ldd	[%o7+%l7],%f0		! (4_1) res = *(double*)((char*)parr1 + ax);
826	mov	%l6,%l3
827	fmuld	%f4,%f50,%f4		! (4_1) dtmp0 *= x2;
828	fand	%f32,DC3,%f24		! (0_0) y0 = vis_fand(div,dconst3);
829
830	fmuld	%f36,%f36,%f50		! (5_1) x2 = xx * xx;
831	srlx	%o0,43,%o0		! (0_0) i >>= 43;
832	mov	%i2,%o7
833	fsubd	DTWO,%f42,%f44		! (7_1) dtmp0 = dtwo - dtmp0;
834
835	and	%o0,508,%l6		! (0_0) i &= 508;
836	add	%i0,stridex,%i2		! px += stridex;
837	bn,pn	%icc,.exit
838	fsubd	DTWO,%f28,%f46		! (6_1) dtmp1 = dtwo - dtmp1;
839
840	fmuld	%f0,%f48,%f48		! (4_1) res *= dtmp0;
841	add	%g1,stridey,%o0		! py += stridey;
842	ld	[%i4+%l6],%f0		! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
843	fdtos	%f12,%f12		! (3_1) ftmp0 = (float)res;
844
845	fmuld	%f18,%f26,%f28		! (2_0) div = x * y;
846	srl	%o7,28,%o1		! (5_1) ux >>= 28;
847	lda	[%i0]0x82,%l6		! (4_0) ux = ((int*)px)[0];
848	faddd	%f4,K0,%f42		! (4_1) dtmp0 += K0;
849
850	fmuld	K2,%f50,%f4		! (5_1) dtmp0 = K2 * x2;
851	and	%o7,MASK_0x7fffffff,%o7	! (5_1) ax = ux & 0x7fffffff;
852	st	%f12,[%g1]		! (3_1) py[0] = ftmp0;
853	fsubd	%f18,%f26,%f18		! (2_0) xx = x - y;
854
855	fmuld	%f40,%f46,%f26		! (6_1) y0 *= dtmp1;
856	add	%o7,MASK_0x100000,%g1	! (5_1) ax += 0x00100000;
857	and	%o1,-8,%o1		! (5_1) ux &= -8;
858	fpsub32	%f0,%f24,%f40		! (0_0) y0 = vis_fpsub32(dtmp0, y0);
859
860	fmuld	%f38,%f44,%f38		! (7_1) y0 *= dtmp0;
861	and	%l6,MASK_0x7fffffff,%o7	! (4_0) ax = ux & 0x7fffffff;
862	lda	[%i0]0x82,%f0		! (4_0) ftmp0 = *px;
863	fpadd32	%f16,DC1,%f24		! (3_0) y = vis_fpadd32(x,dconst1);
864
865	fmuld	%f42,%f14,%f44		! (4_1) dtmp0 *= xx;
866	cmp	%o7,%o4			! (4_0) ax ? 0x39b89c55
867	bl,pn	%icc,.update22		! (4_0) if ( ax < 0x39b89c55 )
868	faddd	DONE,%f28,%f28		! (2_0) div += done;
869.cont22:
870	fmuld	%f32,%f40,%f42		! (0_0) dtmp0 = div0 * y0;
871	cmp	%o7,%o5			! (4_0) ax ? 0x4c700518
872	bg,pn	%icc,.update23		! (4_0) if ( ax > 0x4c700518 )
873	faddd	%f4,K1,%f4		! (5_1) dtmp0 += K1;
874.cont23:
875	fmuld	%f10,%f26,%f10		! (6_1) xx *= y0;
876	srl	%g1,18,%o7		! (5_1) ax >>= 18;
877	std	%f28,[%fp+tmp0]		! (2_0) i = ((unsigned long long*)&div)[0];
878	fstod	%f0,%f14		! (4_0) x = (double)ftmp0;
879
880	fmuld	%f34,%f38,%f34		! (7_1) dtmp1 = div0 * y0;
881	and	%o7,-8,%o7		! (5_1) ax &= -8;
882	ldx	[%fp+tmp1],%g1		! (1_0) i = ((unsigned long long*)&div)[0];
883	fand	%f24,DC2,%f26		! (3_0) y = vis_fand(y,dconst2);
884
885	faddd	%f48,%f44,%f12		! (4_1) res += dtmp0;
886	mov	%l6,%i0
887	ldd	[%l0+%o1],%f48		! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux);
888	bn,pn	%icc,.exit
889
890	ldd	[%o7+%l7],%f0		! (5_1) res = *(double*)((char*)parr1 + ax);
891	nop
892	fmuld	%f4,%f50,%f4		! (5_1) dtmp0 *= x2;
893	fand	%f30,DC3,%f24		! (1_0) y0 = vis_fand(div,dconst3);
894
895	fmuld	%f10,%f10,%f50		! (6_1) x2 = xx * xx;
896	srlx	%g1,43,%g1		! (1_0) i >>= 43;
897	mov	%l2,%o7
898	fsubd	DTWO,%f42,%f44		! (0_0) dtmp0 = dtwo - dtmp0;
899
900	and	%g1,508,%l6		! (1_0) i &= 508;
901	add	%i2,stridex,%l2		! px += stridex;
902	bn,pn	%icc,.exit
903	fsubd	DTWO,%f34,%f46		! (7_1) dtmp1 = dtwo - dtmp1;
904
905	fmuld	%f0,%f48,%f48		! (5_1) res *= dtmp0;
906	add	%o0,stridey,%g1		! py += stridey;
907	ld	[%i4+%l6],%f0		! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
908	fdtos	%f12,%f12		! (4_1) ftmp0 = (float)res;
909
910	fmuld	%f16,%f26,%f34		! (3_0) div = x * y;
911	srl	%o7,28,%o1		! (6_1) ux >>= 28;
912	lda	[%i2]0x82,%l6		! (5_0) ux = ((int*)px)[0];
913	faddd	%f4,K0,%f42		! (5_1) dtmp0 += K0;
914
915	fmuld	K2,%f50,%f4		! (6_1) dtmp0 = K2 * x2;
916	and	%o7,MASK_0x7fffffff,%o7	! (6_1) ax = ux & 0x7fffffff;
917	st	%f12,[%o0]		! (4_1) py[0] = ftmp0;
918	fsubd	%f16,%f26,%f16		! (3_0) xx = x - y;
919
920	fmuld	%f38,%f46,%f26		! (7_1) y0 *= dtmp1;
921	add	%o7,MASK_0x100000,%o0	! (6_1) ax += 0x00100000;
922	and	%o1,-8,%o1		! (6_1) ux &= -8;
923	fpsub32	%f0,%f24,%f38		! (1_0) y0 = vis_fpsub32(dtmp0, y0);
924
925	fmuld	%f40,%f44,%f40		! (0_0) y0 *= dtmp0;
926	and	%l6,MASK_0x7fffffff,%o7	! (5_0) ax = ux & 0x7fffffff;
927	lda	[%i2]0x82,%f0		! (5_0) ftmp0 = *px;
928	fpadd32	%f14,DC1,%f24		! (4_0) y = vis_fpadd32(x,dconst1);
929
930	fmuld	%f42,%f36,%f44		! (5_1) dtmp0 *= xx;
931	cmp	%o7,%o4			! (5_0) ax ? 0x39b89c55
932	bl,pn	%icc,.update24		! (5_0) if ( ax < 0x39b89c55 )
933	faddd	DONE,%f34,%f34		! (3_0) div += done;
934.cont24:
935	fmuld	%f30,%f38,%f42		! (1_0) dtmp0 = div0 * y0;
936	cmp	%o7,%o5			! (5_0) ax ? 0x4c700518
937	bg,pn	%icc,.update25		! (5_0) if ( ax > 0x4c700518 )
938	faddd	%f4,K1,%f4		! (6_1) dtmp0 += K1;
939.cont25:
940	fmuld	%f8,%f26,%f8		! (7_1) xx *= y0;
941	srl	%o0,18,%o7		! (6_1) ax >>= 18;
942	std	%f34,[%fp+tmp1]		! (3_0) i = ((unsigned long long*)&div)[0];
943	fstod	%f0,%f36		! (5_0) x = (double)ftmp0;
944
945	fmuld	%f32,%f40,%f32		! (0_0) dtmp1 = div0 * y0;
946	and	%o7,-8,%o7		! (6_1) ax &= -8;
947	ldx	[%fp+tmp0],%o0		! (2_0) i = ((unsigned long long*)&div)[0];
948	fand	%f24,DC2,%f26		! (4_0) y = vis_fand(y,dconst2);
949
950	faddd	%f48,%f44,%f12		! (5_1) res += dtmp0;
951	mov	%l6,%i2
952	ldd	[%l0+%o1],%f48		! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux);
953	bn,pn	%icc,.exit
954
955	ldd	[%o7+%l7],%f0		! (6_1) res = *(double*)((char*)parr1 + ax);
956	nop
957	fmuld	%f4,%f50,%f4		! (6_1) dtmp0 *= x2;
958	fand	%f28,DC3,%f24		! (2_0) y0 = vis_fand(div,dconst3);
959
960	fmuld	%f8,%f8,%f50		! (7_1) x2 = xx * xx;
961	srlx	%o0,43,%o0		! (2_0) i >>= 43;
962	mov	%g5,%o7
963	fsubd	DTWO,%f42,%f44		! (1_0) dtmp0 = dtwo - dtmp0;
964
965	and	%o0,508,%l6		! (2_0) i &= 508;
966	add	%l2,stridex,%g5		! px += stridex;
967	bn,pn	%icc,.exit
968	fsubd	DTWO,%f32,%f46		! (0_0) dtmp1 = dtwo - dtmp1;
969
970	fmuld	%f0,%f48,%f48		! (6_1) res *= dtmp0;
971	add	%g1,stridey,%o0		! py += stridey;
972	ld	[%i4+%l6],%f0		! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
973	fdtos	%f12,%f12		! (5_1) ftmp0 = (float)res;
974
975	fmuld	%f14,%f26,%f32		! (4_0) div = x * y;
976	srl	%o7,28,%o1		! (7_1) ux >>= 28;
977	lda	[%l2]0x82,%l6		! (6_0) ux = ((int*)px)[0];
978	faddd	%f4,K0,%f42		! (6_1) dtmp0 += K0;
979
980	fmuld	K2,%f50,%f4		! (7_1) dtmp0 = K2 * x2;
981	and	%o7,MASK_0x7fffffff,%o7	! (7_1) ax = ux & 0x7fffffff;
982	st	%f12,[%g1]		! (5_1) py[0] = ftmp0;
983	fsubd	%f14,%f26,%f14		! (4_0) xx = x - y;
984
985	fmuld	%f40,%f46,%f26		! (0_0) y0 *= dtmp1;
986	add	%o7,MASK_0x100000,%g1	! (7_1) ax += 0x00100000;
987	and	%o1,-8,%o1		! (7_1) ux &= -8;
988	fpsub32	%f0,%f24,%f40		! (2_0) y0 = vis_fpsub32(dtmp0, y0);
989
990	fmuld	%f38,%f44,%f38		! (1_0) y0 *= dtmp0;
991	and	%l6,MASK_0x7fffffff,%o7	! (6_0) ax = ux & 0x7fffffff;
992	lda	[%l2]0x82,%f0		! (6_0) ftmp0 = *px;
993	fpadd32	%f36,DC1,%f24		! (5_0) y = vis_fpadd32(x,dconst1);
994
995	fmuld	%f42,%f10,%f44		! (6_1) dtmp0 *= xx;
996	cmp	%o7,%o4			! (6_0) ax ? 0x39b89c55
997	bl,pn	%icc,.update26		! (6_0) if ( ax < 0x39b89c55 )
998	faddd	DONE,%f32,%f32		! (4_0) div += done;
999.cont26:
1000	fmuld	%f28,%f40,%f42		! (2_0) dtmp0 = div0 * y0;
1001	cmp	%o7,%o5			! (6_0) ax ? 0x4c700518
1002	bg,pn	%icc,.update27		! (6_0) if ( ax > 0x4c700518 )
1003	faddd	%f4,K1,%f4		! (7_1) dtmp0 += K1;
1004.cont27:
1005	fmuld	%f22,%f26,%f22		! (0_0) xx *= y0;
1006	srl	%g1,18,%o7		! (7_1) ax >>= 18;
1007	std	%f32,[%fp+tmp0]		! (4_0) i = ((unsigned long long*)&div)[0];
1008	fstod	%f0,%f10		! (6_0) x = (double)ftmp0;
1009
1010	fmuld	%f30,%f38,%f30		! (1_0) dtmp1 = div0 * y0;
1011	and	%o7,-8,%o7		! (7_1) ax &= -8;
1012	ldx	[%fp+tmp1],%g1		! (3_0) i = ((unsigned long long*)&div)[0];
1013	fand	%f24,DC2,%f26		! (5_0) y = vis_fand(y,dconst2);
1014
1015	faddd	%f48,%f44,%f12		! (6_1) res += dtmp0;
1016	mov	%l6,%l2
1017	ldd	[%l0+%o1],%f48		! (7_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1018	bn,pn	%icc,.exit
1019
1020	ldd	[%o7+%l7],%f0		! (7_1) res = *(double*)((char*)parr1 + ax);
1021	nop
1022	fmuld	%f4,%f50,%f4		! (7_1) dtmp0 *= x2;
1023	fand	%f34,DC3,%f24		! (3_0) y0 = vis_fand(div,dconst3);
1024
1025	fmuld	%f22,%f22,%f50		! (0_0) x2 = xx * xx;
1026	srlx	%g1,43,%g1		! (3_0) i >>= 43;
1027	mov	%i3,%o7
1028	fsubd	DTWO,%f42,%f44		! (2_0) dtmp0 = dtwo - dtmp0;
1029
1030	and	%g1,508,%l6		! (3_0) i &= 508;
1031	add	%g5,stridex,%i3		! px += stridex;
1032	bn,pn	%icc,.exit
1033	fsubd	DTWO,%f30,%f46		! (1_0) dtmp1 = dtwo - dtmp1;
1034
1035	fmuld	%f0,%f48,%f48		! (7_1) res *= dtmp0;
1036	add	%o0,stridey,%g1		! py += stridey;
1037	ld	[%i4+%l6],%f0		! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
1038	fdtos	%f12,%f12		! (6_1) ftmp0 = (float)res;
1039
1040	fmuld	%f36,%f26,%f30		! (5_0) div = x * y;
1041	srl	%o7,28,%o1		! (0_0) ux >>= 28;
1042	lda	[%g5]0x82,%l6		! (7_0) ux = ((int*)px)[0];
1043	faddd	%f4,K0,%f42		! (7_1) dtmp0 += K0;
1044
1045	fmuld	K2,%f50,%f4		! (0_0) dtmp0 = K2 * x2;
1046	and	%o7,MASK_0x7fffffff,%o7	! (0_0) ax = ux & 0x7fffffff;
1047	st	%f12,[%o0]		! (6_1) py[0] = ftmp0;
1048	fsubd	%f36,%f26,%f36		! (5_0) xx = x - y;
1049
1050	fmuld	%f38,%f46,%f26		! (1_0) y0 *= dtmp1;
1051	add	%o7,MASK_0x100000,%o0	! (0_0) ax += 0x00100000;
1052	and	%o1,-8,%o1		! (0_0) ux &= -8;
1053	fpsub32	%f0,%f24,%f38		! (3_0) y0 = vis_fpsub32(dtmp0, y0);
1054
1055	fmuld	%f40,%f44,%f40		! (2_0) y0 *= dtmp0;
1056	and	%l6,MASK_0x7fffffff,%o7	! (7_0) ax = ux & 0x7fffffff;
1057	lda	[%g5]0x82,%f0		! (7_0) ftmp0 = *px;
1058	fpadd32	%f10,DC1,%f24		! (6_0) y = vis_fpadd32(x,dconst1);
1059
1060	fmuld	%f42,%f8,%f44		! (7_1) dtmp0 *= xx;
1061	cmp	%o7,%o4			! (7_0) ax ? 0x39b89c55
1062	bl,pn	%icc,.update28		! (7_0) if ( ax < 0x39b89c55 )
1063	faddd	DONE,%f30,%f30		! (5_0) div += done;
1064.cont28:
1065	fmuld	%f34,%f38,%f42		! (3_0) dtmp0 = div0 * y0;
1066	cmp	%o7,%o5			! (7_0) ax ? 0x4c700518
1067	bg,pn	%icc,.update29		! (7_0) if ( ax > 0x4c700518 )
1068	faddd	%f4,K1,%f4		! (0_0) dtmp0 += K1;
1069.cont29:
1070	fmuld	%f20,%f26,%f20		! (1_0) xx *= y0;
1071	srl	%o0,18,%o7		! (0_0) ax >>= 18;
1072	std	%f30,[%fp+tmp1]		! (5_0) i = ((unsigned long long*)&div)[0];
1073	fstod	%f0,%f8			! (7_0) x = (double)ftmp0;
1074
1075	fmuld	%f28,%f40,%f28		! (2_0) dtmp1 = div0 * y0;
1076	and	%o7,-8,%o7		! (0_0) ux &= -8;
1077	ldx	[%fp+tmp0],%o0		! (4_0) i = ((unsigned long long*)&div)[0];
1078	fand	%f24,DC2,%f26		! (6_0) y = vis_fand(y,dconst2);
1079
1080	faddd	%f48,%f44,%f12		! (7_1) res += dtmp0;
1081	subcc	counter,8,counter
1082	ldd	[%l0+%o1],%f48		! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux);
1083	bn,pn	%icc,.exit
1084
1085	fmuld	%f4,%f50,%f4		! (0_0) dtmp0 *= x2;
1086	mov	%l6,%g5
1087	ldd	[%o7+%l7],%f0		! (0_0) res = *(double*)((char*)parr1 + ax);
1088	fand	%f32,DC3,%f24		! (4_0) y0 = vis_fand(div,dconst3);
1089
1090	fmuld	%f20,%f20,%f50		! (1_0) x2 = xx * xx;
1091	srlx	%o0,43,%l6		! (4_0) i >>= 43;
1092	mov	%l5,%o7
1093	fsubd	DTWO,%f42,%f44		! (3_0) dtmp0 = dtwo - dtmp0;
1094
1095	add	%g1,stridey,%o0		! py += stridey;
1096	and	%l6,508,%l6		! (4_0) i &= 508;
1097	bn,pn	%icc,.exit
1098	fsubd	DTWO,%f28,%f46		! (2_0) dtmp1 = dtwo - dtmp1;
1099
1100	fmuld	%f0,%f48,%f48		! (0_0) res *= dtmp0;
1101	ld	[%i4+%l6],%f0		! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
1102	add	%i3,stridex,%l5		! px += stridex;
1103	fdtos	%f12,%f12		! (7_1) ftmp0 = (float)res;
1104
1105	lda	[%i3]0x82,%l6		! (0_0) ux = ((int*)px)[0];
1106	fmuld	%f10,%f26,%f28		! (6_0) div = x * y;
1107	bpos,pt	%icc,.main_loop
1108	faddd	%f4,K0,%f42		! (0_0) dtmp0 += K0;
1109
1110	srl	%o7,28,%l5		! (1_0) ux >>= 28;
1111	st	%f12,[%g1]		! (7_1) py[0] = ftmp0;
1112
1113.tail:
1114	addcc	counter,7,counter
1115	bneg,pn	%icc,.begin
1116	or	%g0,%o0,%o1
1117
1118	fsubd	%f10,%f26,%f10		! (6_1) xx = x - y;
1119	and	%o7,MASK_0x7fffffff,%g1	! (1_1) ax = ux & 0x7fffffff;
1120	fmuld	K2,%f50,%f4		! (1_1) dtmp0 = K2 * x2;
1121
1122	fmuld	%f40,%f46,%f26		! (2_1) y0 *= dtmp1;
1123	add	%g1,MASK_0x100000,%g1	! (1_1) ax += 0x00100000;
1124	and	%l5,-8,%l5		! (1_1) ux &= -8;
1125	fpsub32	%f0,%f24,%f40		! (4_1) y0 = vis_fpsub32(dtmp0, y0);
1126
1127	fmuld	%f38,%f44,%f38		! (3_1) y0 *= dtmp0;
1128
1129	fmuld	%f42,%f22,%f44		! (0_1) dtmp0 *= xx;
1130	faddd	DONE,%f28,%f28		! (6_1) div += done;
1131
1132	fmuld	%f32,%f40,%f42		! (4_1) dtmp0 = div0 * y0;
1133	faddd	%f4,K1,%f4		! (1_1) dtmp0 += K1;
1134
1135	fmuld	%f18,%f26,%f18		! (2_1) xx *= y0;
1136	srl	%g1,18,%o7		! (1_1) ax >>= 18;
1137	std	%f28,[%fp+tmp0]		! (6_1) i = ((unsigned long long*)&div)[0];
1138
1139	fmuld	%f34,%f38,%f34		! (3_1) dtmp1 = div0 * y0;
1140	and	%o7,-8,%o7		! (1_1) ax &= -8;
1141	ldx	[%fp+tmp1],%g1		! (5_1) i = ((unsigned long long*)&div)[0];
1142
1143	faddd	%f48,%f44,%f12		! (0_1) res += dtmp0;
1144	add	%o7,%l7,%o7		! (1_1) (char*)parr1 + ax;
1145	ldd	[%l0+%l5],%f48		! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1146
1147	fmuld	%f4,%f50,%f4		! (1_1) dtmp0 *= x2;
1148	fand	%f30,DC3,%f24		! (5_1) y0 = vis_fand(div,dconst3);
1149	ldd	[%o7],%f0		! (1_1) res = *(double*)((char*)parr1 + ax);
1150
1151	fmuld	%f18,%f18,%f50		! (2_1) x2 = xx * xx;
1152	fsubd	DTWO,%f42,%f44		! (4_1) dtmp0 = dtwo - dtmp0;
1153	srlx	%g1,43,%g1		! (5_1) i >>= 43;
1154
1155	and	%g1,508,%l6		! (5_1) i &= 508;
1156	mov	%l4,%o7
1157	fsubd	DTWO,%f34,%f46		! (3_1) dtmp1 = dtwo - dtmp1;
1158
1159	fmuld	%f0,%f48,%f48		! (1_1) res *= dtmp0;
1160	add	%o0,stridey,%g1		! py += stridey;
1161	ld	[%i4+%l6],%f0		! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
1162	fdtos	%f12,%f12		! (0_1) ftmp0 = (float)res;
1163
1164	srl	%o7,28,%l4		! (2_1) ux >>= 28;
1165	st	%f12,[%o0]		! (0_1) py[0] = ftmp0;
1166	faddd	%f4,K0,%f42		! (1_1) dtmp0 += K0;
1167
1168	subcc	counter,1,counter
1169	bneg,pn	%icc,.begin
1170	or	%g0,%g1,%o1
1171
1172	fmuld	K2,%f50,%f4		! (2_1) dtmp0 = K2 * x2;
1173	and	%o7,MASK_0x7fffffff,%o0	! (2_1) ax = ux & 0x7fffffff;
1174
1175	fmuld	%f38,%f46,%f26		! (3_1) y0 *= dtmp1;
1176	add	%o0,MASK_0x100000,%o0	! (2_1) ax += 0x00100000;
1177	and	%l4,-8,%l4		! (2_1) ux &= -8;
1178	fpsub32	%f0,%f24,%f38		! (5_1) y0 = vis_fpsub32(dtmp0, y0);
1179
1180	fmuld	%f40,%f44,%f40		! (4_1) y0 *= dtmp0;
1181
1182	fmuld	%f42,%f20,%f44		! (1_1) dtmp0 *= xx;
1183
1184	fmuld	%f30,%f38,%f42		! (5_1) dtmp0 = div0 * y0;
1185	faddd	%f4,K1,%f4		! (2_1) dtmp0 += K1;
1186
1187	fmuld	%f16,%f26,%f16		! (3_1) xx *= y0;
1188	srl	%o0,18,%o7		! (2_1) ax >>= 18;
1189
1190	fmuld	%f32,%f40,%f32		! (4_1) dtmp1 = div0 * y0;
1191	and	%o7,-8,%o7		! (2_1) ax &= -8;
1192	ldx	[%fp+tmp0],%o0		! (6_1) i = ((unsigned long long*)&div)[0];
1193
1194	faddd	%f48,%f44,%f12		! (1_1) res += dtmp0;
1195	add	%o7,%l7,%o7		! (2_1) (char*)parr1 + ax;
1196	ldd	[%l0+%l4],%f48		! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1197
1198	fmuld	%f4,%f50,%f4		! (2_1) dtmp0 *= x2;
1199	fand	%f28,DC3,%f24		! (6_1) y0 = vis_fand(div,dconst3);
1200	ldd	[%o7],%f0		! (2_1) res = *(double*)((char*)parr1 + ax);
1201
1202	fmuld	%f16,%f16,%f50		! (3_1) x2 = xx * xx;
1203	fsubd	DTWO,%f42,%f44		! (5_1) dtmp0 = dtwo - dtmp0;
1204	srlx	%o0,43,%o0		! (6_1) i >>= 43;
1205
1206	and	%o0,508,%l6		! (6_1) i &= 508;
1207	mov	%l3,%o7
1208	fsubd	DTWO,%f32,%f46		! (4_1) dtmp1 = dtwo - dtmp1;
1209
1210	fmuld	%f0,%f48,%f48		! (2_1) res *= dtmp0;
1211	add	%g1,stridey,%o0		! py += stridey;
1212	ld	[%i4+%l6],%f0		! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
1213	fdtos	%f12,%f12		! (1_1) ftmp0 = (float)res;
1214
1215	srl	%o7,28,%l3		! (3_1) ux >>= 28;
1216	st	%f12,[%g1]		! (1_1) py[0] = ftmp0;
1217	faddd	%f4,K0,%f42		! (2_1) dtmp0 += K0;
1218
1219	subcc	counter,1,counter
1220	bneg,pn	%icc,.begin
1221	or	%g0,%o0,%o1
1222
1223	fmuld	K2,%f50,%f4		! (3_1) dtmp0 = K2 * x2;
1224	and	%o7,MASK_0x7fffffff,%g1	! (3_1) ax = ux & 0x7fffffff;
1225
1226	fmuld	%f40,%f46,%f26		! (4_1) y0 *= dtmp1;
1227	add	%g1,MASK_0x100000,%g1	! (3_1) ax += 0x00100000;
1228	and	%l3,-8,%l3		! (3_1) ux &= -8;
1229	fpsub32	%f0,%f24,%f40		! (6_1) y0 = vis_fpsub32(dtmp0, y0);
1230
1231	fmuld	%f38,%f44,%f38		! (5_1) y0 *= dtmp0;
1232
1233	fmuld	%f42,%f18,%f44		! (2_1) dtmp0 *= xx;
1234
1235	fmuld	%f28,%f40,%f42		! (6_1) dtmp0 = div0 * y0;
1236	faddd	%f4,K1,%f4		! (3_1) dtmp0 += K1;
1237
1238	fmuld	%f14,%f26,%f14		! (4_1) xx *= y0;
1239	srl	%g1,18,%o7		! (3_1) ax >>= 18;
1240
1241	fmuld	%f30,%f38,%f30		! (5_1) dtmp1 = div0 * y0;
1242	and	%o7,-8,%o7		! (3_1) ax &= -8;
1243
1244	faddd	%f48,%f44,%f12		! (2_1) res += dtmp0;
1245	add	%o7,%l7,%o7		! (3_1) (char*)parr1 + ax;
1246	ldd	[%l0+%l3],%f48		! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1247
1248	fmuld	%f4,%f50,%f4		! (3_1) dtmp0 *= x2;
1249	ldd	[%o7],%f0		! (3_1) res = *(double*)((char*)parr1 + ax)
1250
1251	fmuld	%f14,%f14,%f50		! (4_1) x2 = xx * xx;
1252	fsubd	DTWO,%f42,%f44		! (6_1) dtmp0 = dtwo - dtmp0;
1253
1254	mov	%i0,%o7
1255	fsubd	DTWO,%f30,%f46		! (5_1) dtmp1 = dtwo - dtmp1;
1256
1257	fmuld	%f0,%f48,%f48		! (3_1) res *= dtmp0;
1258	add	%o0,stridey,%g1		! py += stridey;
1259	fdtos	%f12,%f12		! (2_1) ftmp0 = (float)res;
1260
1261	srl	%o7,28,%i0		! (4_1) ux >>= 28;
1262	st	%f12,[%o0]		! (2_1) py[0] = ftmp0;
1263	faddd	%f4,K0,%f42		! (3_1) dtmp0 += K0;
1264
1265	subcc	counter,1,counter
1266	bneg,pn	%icc,.begin
1267	or	%g0,%g1,%o1
1268
1269	fmuld	K2,%f50,%f4		! (4_1) dtmp0 = K2 * x2;
1270	and	%o7,MASK_0x7fffffff,%o0	! (4_1) ax = ux & 0x7fffffff;
1271
1272	fmuld	%f38,%f46,%f26		! (5_1) y0 *= dtmp1;
1273	add	%o0,MASK_0x100000,%o0	! (4_1) ax += 0x00100000;
1274	and	%i0,-8,%i0		! (4_1) ux &= -8;
1275
1276	fmuld	%f40,%f44,%f40		! (6_1) y0 *= dtmp0;
1277
1278	fmuld	%f42,%f16,%f44		! (3_1) dtmp0 *= xx;
1279
1280	faddd	%f4,K1,%f4		! (4_1) dtmp0 += K1;
1281
1282	fmuld	%f36,%f26,%f36		! (5_1) xx *= y0;
1283	srl	%o0,18,%o7		! (4_1) ax >>= 18;
1284
1285	fmuld	%f28,%f40,%f28		! (6_1) dtmp1 = div0 * y0;
1286	and	%o7,-8,%o7		! (4_1) ax &= -8;
1287
1288	faddd	%f48,%f44,%f12		! (3_1) res += dtmp0;
1289	add	%o7,%l7,%o7		! (4_1) (char*)parr1 + ax;
1290	ldd	[%l0+%i0],%f48		! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1291
1292	fmuld	%f4,%f50,%f4		! (4_1) dtmp0 *= x2;
1293	ldd	[%o7],%f0		! (4_1) res = *(double*)((char*)parr1 + ax);
1294
1295	fmuld	%f36,%f36,%f50		! (5_1) x2 = xx * xx;
1296
1297	mov	%i2,%o7
1298	fsubd	DTWO,%f28,%f46		! (6_1) dtmp1 = dtwo - dtmp1;
1299
1300	fmuld	%f0,%f48,%f48		! (4_1) res *= dtmp0;
1301	add	%g1,stridey,%o0		! py += stridey;
1302	fdtos	%f12,%f12		! (3_1) ftmp0 = (float)res;
1303
1304	srl	%o7,28,%i2		! (5_1) ux >>= 28;
1305	st	%f12,[%g1]		! (3_1) py[0] = ftmp0;
1306	faddd	%f4,K0,%f42		! (4_1) dtmp0 += K0;
1307
1308	subcc	counter,1,counter
1309	bneg,pn	%icc,.begin
1310	or	%g0,%o0,%o1
1311
1312	fmuld	K2,%f50,%f4		! (5_1) dtmp0 = K2 * x2;
1313	and	%o7,MASK_0x7fffffff,%g1	! (5_1) ax = ux & 0x7fffffff;
1314
1315	fmuld	%f40,%f46,%f26		! (6_1) y0 *= dtmp1;
1316	add	%g1,MASK_0x100000,%g1	! (5_1) ax += 0x00100000;
1317	and	%i2,-8,%i2		! (5_1) ux &= -8;
1318
1319	fmuld	%f42,%f14,%f44		! (4_1) dtmp0 *= xx;
1320
1321	faddd	%f4,K1,%f4		! (5_1) dtmp0 += K1;
1322
1323	fmuld	%f10,%f26,%f10		! (6_1) xx *= y0;
1324	srl	%g1,18,%o7		! (5_1) ax >>= 18;
1325
1326	and	%o7,-8,%o7		! (5_1) ax &= -8;
1327
1328	faddd	%f48,%f44,%f12		! (4_1) res += dtmp0;
1329	add	%o7,%l7,%o7		! (5_1) (char*)parr1 + ax;
1330	ldd	[%l0+%i2],%f48		! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1331
1332	fmuld	%f4,%f50,%f4		! (5_1) dtmp0 *= x2;
1333	ldd	[%o7],%f0		! (5_1) res = *(double*)((char*)parr1 + ax);
1334
1335	fmuld	%f10,%f10,%f50		! (6_1) x2 = xx * xx;
1336
1337	mov	%l2,%o7
1338
1339	fmuld	%f0,%f48,%f48		! (5_1) res *= dtmp0;
1340	add	%o0,stridey,%g1		! py += stridey;
1341	fdtos	%f12,%f12		! (4_1) ftmp0 = (float)res;
1342
1343	srl	%o7,28,%l2		! (6_1) ux >>= 28;
1344	st	%f12,[%o0]		! (4_1) py[0] = ftmp0;
1345	faddd	%f4,K0,%f42		! (5_1) dtmp0 += K0;
1346
1347	subcc	counter,1,counter
1348	bneg,pn	%icc,.begin
1349	or	%g0,%g1,%o1
1350
1351	fmuld	K2,%f50,%f4		! (6_1) dtmp0 = K2 * x2;
1352	and	%o7,MASK_0x7fffffff,%o0	! (6_1) ax = ux & 0x7fffffff;
1353
1354	add	%o0,MASK_0x100000,%o0	! (6_1) ax += 0x00100000;
1355	and	%l2,-8,%l2		! (6_1) ux &= -8;
1356
1357	fmuld	%f42,%f36,%f44		! (5_1) dtmp0 *= xx;
1358
1359	faddd	%f4,K1,%f4		! (6_1) dtmp0 += K1;
1360
1361	srl	%o0,18,%o7		! (6_1) ax >>= 18;
1362
1363	and	%o7,-8,%o7		! (6_1) ax &= -8;
1364
1365	faddd	%f48,%f44,%f12		! (5_1) res += dtmp0;
1366	add	%o7,%l7,%o7		! (6_1) (char*)parr1 + ax;
1367	ldd	[%l0+%l2],%f48		! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1368
1369	fmuld	%f4,%f50,%f4		! (6_1) dtmp0 *= x2;
1370	ldd	[%o7],%f0		! (6_1) res = *(double*)((char*)parr1 + ax);
1371
1372	fmuld	%f0,%f48,%f48		! (6_1) res *= dtmp0;
1373	add	%g1,stridey,%o0		! py += stridey;
1374	fdtos	%f12,%f12		! (5_1) ftmp0 = (float)res;
1375
1376	st	%f12,[%g1]		! (5_1) py[0] = ftmp0;
1377	faddd	%f4,K0,%f42		! (6_1) dtmp0 += K0;
1378
1379	subcc	counter,1,counter
1380	bneg,pn	%icc,.begin
1381	or	%g0,%o0,%o1
1382
1383	fmuld	%f42,%f10,%f44		! (6_1) dtmp0 *= xx;
1384
1385	faddd	%f48,%f44,%f12		! (6_1) res += dtmp0;
1386
1387	add	%o0,stridey,%g1		! py += stridey;
1388	fdtos	%f12,%f12		! (6_1) ftmp0 = (float)res;
1389
1390	st	%f12,[%o0]		! (6_1) py[0] = ftmp0;
1391
1392	ba	.begin
1393	or	%g0,%g1,%o1		! py += stridey;
1394
1395.exit:
1396	ret
1397	restore	%g0,%g0,%g0
1398
1399	.align	16
1400.spec0:
1401	add	%i3,stridex,%i3		! px += stridex;
1402	sub	counter,1,counter
1403	st	%l6,[%o1]		! *(int*)py = ux;
1404
1405	ba	.begin1
1406	add	%o1,stridey,%o1		! py += stridey;
1407
1408	.align	16
1409.spec1:
1410	sethi	%hi(0x7f800000),%l3
1411	sethi	%hi(0x3fc90c00),%l4	! pi_2
1412
1413	sethi	%hi(0x80000000),%o0
1414	add	%l4,0x3db,%l4		! pi_2
1415
1416	cmp	%l5,%l3			! if ( ax > 0x7f800000 )
1417	bg,a,pn	%icc,1f
1418	fabss	%f0,%f0			! fpx = fabsf(*px);
1419
1420	and	%l6,%o0,%l6		! sign = ux & 0x80000000;
1421
1422	or	%l6,%l4,%l6		! sign |= pi_2;
1423
1424	add	%i3,stridex,%i3		! px += stridex;
1425	sub	counter,1,counter
1426	st	%l6,[%o1]		! *(int*)py = sign;
1427
1428	ba	.begin1
1429	add	%o1,stridey,%o1		! py += stridey;
1430
14311:
1432	fmuls	%f0,%f0,%f0		! fpx *= fpx;
1433
1434	add	%i3,stridex,%i3		! px += stridex
1435	sub	counter,1,counter
1436	st	%f0,[%o1]		! *py = fpx;
1437
1438	ba	.begin1
1439	add	%o1,stridey,%o1		! py += stridey;
1440
1441	.align	16
1442.update0:
1443	cmp	counter,1
1444	fzeros	%f0
1445	ble,a	.cont0
1446	sethi	%hi(0x3fffffff),%l6
1447
1448	sub	counter,1,counter
1449	st	counter,[%fp+tmp_counter]
1450
1451	stx	%l5,[%fp+tmp_px]
1452	sethi	%hi(0x3fffffff),%l6
1453	ba	.cont0
1454	or	%g0,1,counter
1455
1456	.align	16
1457.update1:
1458	cmp	counter,1
1459	fzeros	%f0
1460	ble,a	.cont1
1461	sethi	%hi(0x3fffffff),%l6
1462
1463	sub	counter,1,counter
1464	st	counter,[%fp+tmp_counter]
1465
1466	stx	%l5,[%fp+tmp_px]
1467	sethi	%hi(0x3fffffff),%l6
1468	ba	.cont1
1469	or	%g0,1,counter
1470
1471	.align	16
1472.update2:
1473	cmp	counter,2
1474	fzeros	%f0
1475	ble,a	.cont2
1476	sethi	%hi(0x3fffffff),%l6
1477
1478	sub	counter,2,counter
1479	st	counter,[%fp+tmp_counter]
1480
1481	stx	%l4,[%fp+tmp_px]
1482	sethi	%hi(0x3fffffff),%l6
1483	ba	.cont2
1484	or	%g0,2,counter
1485
1486	.align	16
1487.update3:
1488	cmp	counter,2
1489	fzeros	%f0
1490	ble,a	.cont3
1491	sethi	%hi(0x3fffffff),%l6
1492
1493	sub	counter,2,counter
1494	st	counter,[%fp+tmp_counter]
1495
1496	stx	%l4,[%fp+tmp_px]
1497	sethi	%hi(0x3fffffff),%l6
1498	ba	.cont3
1499	or	%g0,2,counter
1500
1501	.align	16
1502.update4:
1503	cmp	counter,3
1504	fzeros	%f0
1505	ble,a	.cont4
1506	sethi	%hi(0x3fffffff),%l6
1507
1508	sub	counter,3,counter
1509	st	counter,[%fp+tmp_counter]
1510
1511	stx	%l3,[%fp+tmp_px]
1512	sethi	%hi(0x3fffffff),%l6
1513	ba	.cont4
1514	or	%g0,3,counter
1515
1516	.align	16
1517.update5:
1518	cmp	counter,3
1519	fzeros	%f0
1520	ble,a	.cont5
1521	sethi	%hi(0x3fffffff),%l6
1522
1523	sub	counter,3,counter
1524	st	counter,[%fp+tmp_counter]
1525
1526	stx	%l3,[%fp+tmp_px]
1527	sethi	%hi(0x3fffffff),%l6
1528	ba	.cont5
1529	or	%g0,3,counter
1530
1531	.align	16
1532.update6:
1533	cmp	counter,4
1534	fzeros	%f0
1535	ble,a	.cont6
1536	sethi	%hi(0x3fffffff),%l6
1537
1538	sub	counter,4,counter
1539	st	counter,[%fp+tmp_counter]
1540
1541	stx	%i0,[%fp+tmp_px]
1542	sethi	%hi(0x3fffffff),%l6
1543	ba	.cont6
1544	or	%g0,4,counter
1545
1546	.align	16
1547.update7:
1548	cmp	counter,4
1549	fzeros	%f0
1550	ble,a	.cont7
1551	sethi	%hi(0x3fffffff),%l6
1552
1553	sub	counter,4,counter
1554	st	counter,[%fp+tmp_counter]
1555
1556	stx	%i0,[%fp+tmp_px]
1557	sethi	%hi(0x3fffffff),%l6
1558	ba	.cont7
1559	or	%g0,4,counter
1560
1561	.align	16
1562.update8:
1563	cmp	counter,5
1564	fzeros	%f0
1565	ble,a	.cont8
1566	sethi	%hi(0x3fffffff),%l6
1567
1568	sub	counter,5,counter
1569	st	counter,[%fp+tmp_counter]
1570
1571	stx	%i2,[%fp+tmp_px]
1572	sethi	%hi(0x3fffffff),%l6
1573	ba	.cont8
1574	or	%g0,5,counter
1575
1576	.align	16
1577.update9:
1578	cmp	counter,5
1579	fzeros	%f0
1580	ble,a	.cont9
1581	sethi	%hi(0x3fffffff),%l6
1582
1583	sub	counter,5,counter
1584	st	counter,[%fp+tmp_counter]
1585
1586	stx	%i2,[%fp+tmp_px]
1587	sethi	%hi(0x3fffffff),%l6
1588	ba	.cont9
1589	or	%g0,5,counter
1590
1591	.align	16
1592.update10:
1593	cmp	counter,6
1594	fzeros	%f0
1595	ble,a	.cont10
1596	sethi	%hi(0x3fffffff),%l6
1597
1598	sub	counter,6,counter
1599	st	counter,[%fp+tmp_counter]
1600
1601	stx	%l2,[%fp+tmp_px]
1602	sethi	%hi(0x3fffffff),%l6
1603	ba	.cont10
1604	or	%g0,6,counter
1605
1606	.align	16
1607.update11:
1608	cmp	counter,6
1609	fzeros	%f0
1610	ble,a	.cont11
1611	sethi	%hi(0x3fffffff),%l6
1612
1613	sub	counter,6,counter
1614	st	counter,[%fp+tmp_counter]
1615
1616	stx	%l2,[%fp+tmp_px]
1617	sethi	%hi(0x3fffffff),%l6
1618	ba	.cont11
1619	or	%g0,6,counter
1620
1621	.align	16
1622.update12:
1623	cmp	counter,7
1624	fzeros	%f0
1625	ble,a	.cont12
1626	sethi	%hi(0x3fffffff),%l6
1627
1628	sub	counter,7,counter
1629	st	counter,[%fp+tmp_counter]
1630
1631	stx	%g5,[%fp+tmp_px]
1632	sethi	%hi(0x3fffffff),%l6
1633	ba	.cont12
1634	or	%g0,7,counter
1635
1636	.align	16
1637.update13:
1638	cmp	counter,7
1639	fzeros	%f0
1640	ble,a	.cont13
1641	sethi	%hi(0x3fffffff),%l6
1642
1643	sub	counter,7,counter
1644	st	counter,[%fp+tmp_counter]
1645
1646	stx	%g5,[%fp+tmp_px]
1647	sethi	%hi(0x3fffffff),%l6
1648	ba	.cont13
1649	or	%g0,7,counter
1650
1651	.align	16
1652.update14:
1653	cmp	counter,0
1654	fzeros	%f0
1655	ble,a	.cont14
1656	sethi	%hi(0x3fffffff),%l6
1657
1658	sub	counter,0,counter
1659	st	counter,[%fp+tmp_counter]
1660
1661	stx	%i3,[%fp+tmp_px]
1662	sethi	%hi(0x3fffffff),%l6
1663	ba	.cont14
1664	or	%g0,0,counter
1665
1666	.align	16
1667.update15:
1668	cmp	counter,0
1669	fzeros	%f0
1670	ble,a	.cont15
1671	sethi	%hi(0x3fffffff),%l6
1672
1673	sub	counter,0,counter
1674	st	counter,[%fp+tmp_counter]
1675
1676	stx	%i3,[%fp+tmp_px]
1677	sethi	%hi(0x3fffffff),%l6
1678	ba	.cont15
1679	or	%g0,0,counter
1680
1681	.align	16
1682.update16:
1683	cmp	counter,1
1684	fzeros	%f0
1685	ble,a	.cont16
1686	sethi	%hi(0x3fffffff),%l6
1687
1688	sub	counter,1,counter
1689	st	counter,[%fp+tmp_counter]
1690
1691	stx	%l5,[%fp+tmp_px]
1692	sethi	%hi(0x3fffffff),%l6
1693	ba	.cont16
1694	or	%g0,1,counter
1695
1696	.align	16
1697.update17:
1698	cmp	counter,1
1699	fzeros	%f0
1700	ble,a	.cont17
1701	sethi	%hi(0x3fffffff),%l6
1702
1703	sub	counter,1,counter
1704	st	counter,[%fp+tmp_counter]
1705
1706	stx	%l5,[%fp+tmp_px]
1707	sethi	%hi(0x3fffffff),%l6
1708	ba	.cont17
1709	or	%g0,1,counter
1710
1711	.align	16
1712.update18:
1713	cmp	counter,2
1714	fzeros	%f0
1715	ble,a	.cont18
1716	sethi	%hi(0x3fffffff),%l6
1717
1718	sub	counter,2,counter
1719	st	counter,[%fp+tmp_counter]
1720
1721	stx	%l4,[%fp+tmp_px]
1722	sethi	%hi(0x3fffffff),%l6
1723	ba	.cont18
1724	or	%g0,2,counter
1725
1726	.align	16
1727.update19:
1728	cmp	counter,2
1729	fzeros	%f0
1730	ble,a	.cont19
1731	sethi	%hi(0x3fffffff),%l6
1732
1733	sub	counter,2,counter
1734	st	counter,[%fp+tmp_counter]
1735
1736	stx	%l4,[%fp+tmp_px]
1737	sethi	%hi(0x3fffffff),%l6
1738	ba	.cont19
1739	or	%g0,2,counter
1740
1741	.align	16
1742.update20:
1743	cmp	counter,3
1744	fzeros	%f0
1745	ble,a	.cont20
1746	sethi	%hi(0x3fffffff),%l6
1747
1748	sub	counter,3,counter
1749	st	counter,[%fp+tmp_counter]
1750
1751	stx	%l3,[%fp+tmp_px]
1752	sethi	%hi(0x3fffffff),%l6
1753	ba	.cont20
1754	or	%g0,3,counter
1755
1756	.align	16
1757.update21:
1758	cmp	counter,3
1759	fzeros	%f0
1760	ble,a	.cont21
1761	sethi	%hi(0x3fffffff),%l6
1762
1763	sub	counter,3,counter
1764	st	counter,[%fp+tmp_counter]
1765
1766	stx	%l3,[%fp+tmp_px]
1767	sethi	%hi(0x3fffffff),%l6
1768	ba	.cont21
1769	or	%g0,3,counter
1770
1771	.align	16
1772.update22:
1773	cmp	counter,4
1774	fzeros	%f0
1775	ble,a	.cont22
1776	sethi	%hi(0x3fffffff),%l6
1777
1778	sub	counter,4,counter
1779	st	counter,[%fp+tmp_counter]
1780
1781	stx	%i0,[%fp+tmp_px]
1782	sethi	%hi(0x3fffffff),%l6
1783	ba	.cont22
1784	or	%g0,4,counter
1785
1786	.align	16
1787.update23:
1788	cmp	counter,4
1789	fzeros	%f0
1790	ble,a	.cont23
1791	sethi	%hi(0x3fffffff),%l6
1792
1793	sub	counter,4,counter
1794	st	counter,[%fp+tmp_counter]
1795
1796	stx	%i0,[%fp+tmp_px]
1797	sethi	%hi(0x3fffffff),%l6
1798	ba	.cont23
1799	or	%g0,4,counter
1800
1801	.align	16
1802.update24:
1803	cmp	counter,5
1804	fzeros	%f0
1805	ble,a	.cont24
1806	sethi	%hi(0x3fffffff),%l6
1807
1808	sub	counter,5,counter
1809	st	counter,[%fp+tmp_counter]
1810
1811	stx	%i2,[%fp+tmp_px]
1812	sethi	%hi(0x3fffffff),%l6
1813	ba	.cont24
1814	or	%g0,5,counter
1815
1816	.align	16
1817.update25:
1818	cmp	counter,5
1819	fzeros	%f0
1820	ble,a	.cont25
1821	sethi	%hi(0x3fffffff),%l6
1822
1823	sub	counter,5,counter
1824	st	counter,[%fp+tmp_counter]
1825
1826	stx	%i2,[%fp+tmp_px]
1827	sethi	%hi(0x3fffffff),%l6
1828	ba	.cont25
1829	or	%g0,5,counter
1830
1831	.align	16
1832.update26:
1833	cmp	counter,6
1834	fzeros	%f0
1835	ble,a	.cont26
1836	sethi	%hi(0x3fffffff),%l6
1837
1838	sub	counter,6,counter
1839	st	counter,[%fp+tmp_counter]
1840
1841	stx	%l2,[%fp+tmp_px]
1842	sethi	%hi(0x3fffffff),%l6
1843	ba	.cont26
1844	or	%g0,6,counter
1845
1846	.align	16
1847.update27:
1848	cmp	counter,6
1849	fzeros	%f0
1850	ble,a	.cont27
1851	sethi	%hi(0x3fffffff),%l6
1852
1853	sub	counter,6,counter
1854	st	counter,[%fp+tmp_counter]
1855
1856	stx	%l2,[%fp+tmp_px]
1857	sethi	%hi(0x3fffffff),%l6
1858	ba	.cont27
1859	or	%g0,6,counter
1860
1861	.align	16
1862.update28:
1863	cmp	counter,7
1864	fzeros	%f0
1865	ble,a	.cont28
1866	sethi	%hi(0x3fffffff),%l6
1867
1868	sub	counter,7,counter
1869	st	counter,[%fp+tmp_counter]
1870
1871	stx	%g5,[%fp+tmp_px]
1872	sethi	%hi(0x3fffffff),%l6
1873	ba	.cont28
1874	or	%g0,7,counter
1875
1876	.align	16
1877.update29:
1878	cmp	counter,7
1879	fzeros	%f0
1880	ble,a	.cont29
1881	sethi	%hi(0x3fffffff),%l6
1882
1883	sub	counter,7,counter
1884	st	counter,[%fp+tmp_counter]
1885
1886	stx	%g5,[%fp+tmp_px]
1887	sethi	%hi(0x3fffffff),%l6
1888	ba	.cont29
1889	or	%g0,7,counter
1890
1891	SET_SIZE(__vatanf)
1892
1893