xref: /titanic_50/usr/src/lib/libmvec/common/vis/__vrhypotf.S (revision 5c5f137104b2d56181283389fa902220f2023809)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vrhypotf.S"
30
31#include "libm.h"
32
33	RO_DATA
34	.align	64
35.CONST_TBL:
36! i = [0,63]
37! TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
38! TBL[2*i+1] = (double)(0.5/sqrtl(2) / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));
39! TBL[128+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
40! TBL[128+2*i+1] = (double)(0.25 / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));
41
42	.word	0x3ff00000, 0x00000000, 0x3fd6a09e, 0x667f3bcd,
43	.word	0x3fef81f8, 0x1f81f820, 0x3fd673e3, 0x2ef63a03,
44	.word	0x3fef07c1, 0xf07c1f08, 0x3fd6482d, 0x37a5a3d2,
45	.word	0x3fee9131, 0xabf0b767, 0x3fd61d72, 0xb7978671,
46	.word	0x3fee1e1e, 0x1e1e1e1e, 0x3fd5f3aa, 0x673fa911,
47	.word	0x3fedae60, 0x76b981db, 0x3fd5cacb, 0x7802f342,
48	.word	0x3fed41d4, 0x1d41d41d, 0x3fd5a2cd, 0x8c69d61a,
49	.word	0x3fecd856, 0x89039b0b, 0x3fd57ba8, 0xb0ee01b9,
50	.word	0x3fec71c7, 0x1c71c71c, 0x3fd55555, 0x55555555,
51	.word	0x3fec0e07, 0x0381c0e0, 0x3fd52fcc, 0x468d6b54,
52	.word	0x3febacf9, 0x14c1bad0, 0x3fd50b06, 0xa8fc6b70,
53	.word	0x3feb4e81, 0xb4e81b4f, 0x3fd4e6fd, 0xf33cf032,
54	.word	0x3feaf286, 0xbca1af28, 0x3fd4c3ab, 0xe93bcf74,
55	.word	0x3fea98ef, 0x606a63be, 0x3fd4a10a, 0x97af7b92,
56	.word	0x3fea41a4, 0x1a41a41a, 0x3fd47f14, 0x4fe17f9f,
57	.word	0x3fe9ec8e, 0x951033d9, 0x3fd45dc3, 0xa3c34fa3,
58	.word	0x3fe99999, 0x9999999a, 0x3fd43d13, 0x6248490f,
59	.word	0x3fe948b0, 0xfcd6e9e0, 0x3fd41cfe, 0x93ff5199,
60	.word	0x3fe8f9c1, 0x8f9c18fa, 0x3fd3fd80, 0x77e70577,
61	.word	0x3fe8acb9, 0x0f6bf3aa, 0x3fd3de94, 0x8077db58,
62	.word	0x3fe86186, 0x18618618, 0x3fd3c036, 0x50e00e03,
63	.word	0x3fe81818, 0x18181818, 0x3fd3a261, 0xba6d7a37,
64	.word	0x3fe7d05f, 0x417d05f4, 0x3fd38512, 0xba21f51e,
65	.word	0x3fe78a4c, 0x8178a4c8, 0x3fd36845, 0x766eec92,
66	.word	0x3fe745d1, 0x745d1746, 0x3fd34bf6, 0x3d156826,
67	.word	0x3fe702e0, 0x5c0b8170, 0x3fd33021, 0x8127c0e0,
68	.word	0x3fe6c16c, 0x16c16c17, 0x3fd314c3, 0xd92a9e91,
69	.word	0x3fe68168, 0x16816817, 0x3fd2f9d9, 0xfd52fd50,
70	.word	0x3fe642c8, 0x590b2164, 0x3fd2df60, 0xc5df2c9e,
71	.word	0x3fe60581, 0x60581606, 0x3fd2c555, 0x2988e428,
72	.word	0x3fe5c988, 0x2b931057, 0x3fd2abb4, 0x3c0eb0f4,
73	.word	0x3fe58ed2, 0x308158ed, 0x3fd2927b, 0x2cd320f5,
74	.word	0x3fe55555, 0x55555555, 0x3fd279a7, 0x4590331c,
75	.word	0x3fe51d07, 0xeae2f815, 0x3fd26135, 0xe91daf55,
76	.word	0x3fe4e5e0, 0xa72f0539, 0x3fd24924, 0x92492492,
77	.word	0x3fe4afd6, 0xa052bf5b, 0x3fd23170, 0xd2be638a,
78	.word	0x3fe47ae1, 0x47ae147b, 0x3fd21a18, 0x51ff630a,
79	.word	0x3fe446f8, 0x6562d9fb, 0x3fd20318, 0xcc6a8f5d,
80	.word	0x3fe41414, 0x14141414, 0x3fd1ec70, 0x124e98f9,
81	.word	0x3fe3e22c, 0xbce4a902, 0x3fd1d61c, 0x070ae7d3,
82	.word	0x3fe3b13b, 0x13b13b14, 0x3fd1c01a, 0xa03be896,
83	.word	0x3fe38138, 0x13813814, 0x3fd1aa69, 0xe4f2777f,
84	.word	0x3fe3521c, 0xfb2b78c1, 0x3fd19507, 0xecf5b9e9,
85	.word	0x3fe323e3, 0x4a2b10bf, 0x3fd17ff2, 0xe00ec3ee,
86	.word	0x3fe2f684, 0xbda12f68, 0x3fd16b28, 0xf55d72d4,
87	.word	0x3fe2c9fb, 0x4d812ca0, 0x3fd156a8, 0x72b5ef62,
88	.word	0x3fe29e41, 0x29e4129e, 0x3fd1426f, 0xac0654db,
89	.word	0x3fe27350, 0xb8812735, 0x3fd12e7d, 0x02c40253,
90	.word	0x3fe24924, 0x92492492, 0x3fd11ace, 0xe560242a,
91	.word	0x3fe21fb7, 0x8121fb78, 0x3fd10763, 0xcec30b26,
92	.word	0x3fe1f704, 0x7dc11f70, 0x3fd0f43a, 0x45cdedad,
93	.word	0x3fe1cf06, 0xada2811d, 0x3fd0e150, 0xdce2b60c,
94	.word	0x3fe1a7b9, 0x611a7b96, 0x3fd0cea6, 0x317186dc,
95	.word	0x3fe18118, 0x11811812, 0x3fd0bc38, 0xeb8ba412,
96	.word	0x3fe15b1e, 0x5f75270d, 0x3fd0aa07, 0xbd7b7488,
97	.word	0x3fe135c8, 0x1135c811, 0x3fd09811, 0x63615499,
98	.word	0x3fe11111, 0x11111111, 0x3fd08654, 0xa2d4f6db,
99	.word	0x3fe0ecf5, 0x6be69c90, 0x3fd074d0, 0x4a8b1438,
100	.word	0x3fe0c971, 0x4fbcda3b, 0x3fd06383, 0x31ff307a,
101	.word	0x3fe0a681, 0x0a6810a7, 0x3fd0526c, 0x39213bfa,
102	.word	0x3fe08421, 0x08421084, 0x3fd0418a, 0x4806de7d,
103	.word	0x3fe0624d, 0xd2f1a9fc, 0x3fd030dc, 0x4ea03a72,
104	.word	0x3fe04104, 0x10410410, 0x3fd02061, 0x446ffa9a,
105	.word	0x3fe02040, 0x81020408, 0x3fd01018, 0x28467ee9,
106	.word	0x3ff00000, 0x00000000, 0x3fd00000, 0x00000000,
107	.word	0x3fef81f8, 0x1f81f820, 0x3fcfc0bd, 0x88a0f1d9,
108	.word	0x3fef07c1, 0xf07c1f08, 0x3fcf82ec, 0x882c0f9b,
109	.word	0x3fee9131, 0xabf0b767, 0x3fcf467f, 0x2814b0cc,
110	.word	0x3fee1e1e, 0x1e1e1e1e, 0x3fcf0b68, 0x48d2af1c,
111	.word	0x3fedae60, 0x76b981db, 0x3fced19b, 0x75e78957,
112	.word	0x3fed41d4, 0x1d41d41d, 0x3fce990c, 0xdad55ed2,
113	.word	0x3fecd856, 0x89039b0b, 0x3fce61b1, 0x38f18adc,
114	.word	0x3fec71c7, 0x1c71c71c, 0x3fce2b7d, 0xddfefa66,
115	.word	0x3fec0e07, 0x0381c0e0, 0x3fcdf668, 0x9b7e6350,
116	.word	0x3febacf9, 0x14c1bad0, 0x3fcdc267, 0xbea45549,
117	.word	0x3feb4e81, 0xb4e81b4f, 0x3fcd8f72, 0x08e6b82d,
118	.word	0x3feaf286, 0xbca1af28, 0x3fcd5d7e, 0xa914b937,
119	.word	0x3fea98ef, 0x606a63be, 0x3fcd2c85, 0x34ed6d86,
120	.word	0x3fea41a4, 0x1a41a41a, 0x3fccfc7d, 0xa32a9213,
121	.word	0x3fe9ec8e, 0x951033d9, 0x3fcccd60, 0x45f5d358,
122	.word	0x3fe99999, 0x9999999a, 0x3fcc9f25, 0xc5bfedd9,
123	.word	0x3fe948b0, 0xfcd6e9e0, 0x3fcc71c7, 0x1c71c71c,
124	.word	0x3fe8f9c1, 0x8f9c18fa, 0x3fcc453d, 0x90f057a2,
125	.word	0x3fe8acb9, 0x0f6bf3aa, 0x3fcc1982, 0xb2ece47b,
126	.word	0x3fe86186, 0x18618618, 0x3fcbee90, 0x56fb9c39,
127	.word	0x3fe81818, 0x18181818, 0x3fcbc460, 0x92eb3118,
128	.word	0x3fe7d05f, 0x417d05f4, 0x3fcb9aed, 0xba588347,
129	.word	0x3fe78a4c, 0x8178a4c8, 0x3fcb7232, 0x5b79db11,
130	.word	0x3fe745d1, 0x745d1746, 0x3fcb4a29, 0x3c1d9550,
131	.word	0x3fe702e0, 0x5c0b8170, 0x3fcb22cd, 0x56d87d7e,
132	.word	0x3fe6c16c, 0x16c16c17, 0x3fcafc19, 0xd8606169,
133	.word	0x3fe68168, 0x16816817, 0x3fcad60a, 0x1d0fb394,
134	.word	0x3fe642c8, 0x590b2164, 0x3fcab099, 0xae8f539a,
135	.word	0x3fe60581, 0x60581606, 0x3fca8bc4, 0x41a3d02c,
136	.word	0x3fe5c988, 0x2b931057, 0x3fca6785, 0xb41bacf7,
137	.word	0x3fe58ed2, 0x308158ed, 0x3fca43da, 0x0adc6899,
138	.word	0x3fe55555, 0x55555555, 0x3fca20bd, 0x700c2c3e,
139	.word	0x3fe51d07, 0xeae2f815, 0x3fc9fe2c, 0x315637ee,
140	.word	0x3fe4e5e0, 0xa72f0539, 0x3fc9dc22, 0xbe484458,
141	.word	0x3fe4afd6, 0xa052bf5b, 0x3fc9ba9d, 0xa6c73588,
142	.word	0x3fe47ae1, 0x47ae147b, 0x3fc99999, 0x9999999a,
143	.word	0x3fe446f8, 0x6562d9fb, 0x3fc97913, 0x63068b54,
144	.word	0x3fe41414, 0x14141414, 0x3fc95907, 0xeb87ab44,
145	.word	0x3fe3e22c, 0xbce4a902, 0x3fc93974, 0x368cfa31,
146	.word	0x3fe3b13b, 0x13b13b14, 0x3fc91a55, 0x6151761c,
147	.word	0x3fe38138, 0x13813814, 0x3fc8fba8, 0xa1bf6f96,
148	.word	0x3fe3521c, 0xfb2b78c1, 0x3fc8dd6b, 0x4563a009,
149	.word	0x3fe323e3, 0x4a2b10bf, 0x3fc8bf9a, 0xb06e1af3,
150	.word	0x3fe2f684, 0xbda12f68, 0x3fc8a234, 0x5cc04426,
151	.word	0x3fe2c9fb, 0x4d812ca0, 0x3fc88535, 0xd90703c6,
152	.word	0x3fe29e41, 0x29e4129e, 0x3fc8689c, 0xc7e07e7d,
153	.word	0x3fe27350, 0xb8812735, 0x3fc84c66, 0xdf0ca4c2,
154	.word	0x3fe24924, 0x92492492, 0x3fc83091, 0xe6a7f7e7,
155	.word	0x3fe21fb7, 0x8121fb78, 0x3fc8151b, 0xb86fee1d,
156	.word	0x3fe1f704, 0x7dc11f70, 0x3fc7fa02, 0x3f1068d1,
157	.word	0x3fe1cf06, 0xada2811d, 0x3fc7df43, 0x7579b9b5,
158	.word	0x3fe1a7b9, 0x611a7b96, 0x3fc7c4dd, 0x663ebb88,
159	.word	0x3fe18118, 0x11811812, 0x3fc7aace, 0x2afa8b72,
160	.word	0x3fe15b1e, 0x5f75270d, 0x3fc79113, 0xebbd7729,
161	.word	0x3fe135c8, 0x1135c811, 0x3fc777ac, 0xde80baea,
162	.word	0x3fe11111, 0x11111111, 0x3fc75e97, 0x46a0b098,
163	.word	0x3fe0ecf5, 0x6be69c90, 0x3fc745d1, 0x745d1746,
164	.word	0x3fe0c971, 0x4fbcda3b, 0x3fc72d59, 0xc45f1fc5,
165	.word	0x3fe0a681, 0x0a6810a7, 0x3fc7152e, 0x9f44f01f,
166	.word	0x3fe08421, 0x08421084, 0x3fc6fd4e, 0x79325467,
167	.word	0x3fe0624d, 0xd2f1a9fc, 0x3fc6e5b7, 0xd16657e1,
168	.word	0x3fe04104, 0x10410410, 0x3fc6ce69, 0x31d5858d,
169	.word	0x3fe02040, 0x81020408, 0x3fc6b761, 0x2ec892f6,
170
171	.word	0x000fffff, 0xffffffff	! DC0
172	.word	0x3ff00000, 0		! DC1
173	.word	0x7fffc000, 0		! DC2
174	.word	0x7fe00000, 0		! DA0
175	.word	0x60000000, 0		! DA1
176	.word	0x80808080, 0x3f800000	! SCALE , FONE = 1.0f
177	.word	0x3fefffff, 0xfee7f18f	! KA0 =  9.99999997962321453275e-01
178	.word	0xbfdfffff, 0xfe07e52f	! KA1 = -4.99999998166077580600e-01
179	.word	0x3fd80118, 0x0ca296d9	! KA2 = 3.75066768969515586277e-01
180	.word	0xbfd400fc, 0x0bbb8e78	! KA3 = -3.12560092408808548438e-01
181
182#define _0x7f800000	%o0
183#define _0x7fffffff	%o7
184#define TBL		%l2
185
186#define TBL_SHIFT	2048
187
188#define stridex		%l3
189#define stridey		%l4
190#define stridez		%l5
191#define counter		%i0
192
193#define DA0		%f52
194#define DA1		%f44
195#define SCALE		%f6
196
197#define DC0		%f46
198#define DC1		%f8
199#define FZERO		%f9
200#define DC2		%f50
201
202#define KA3		%f56
203#define KA2		%f58
204#define KA1		%f60
205#define KA0		%f54
206
207#define tmp_counter	STACK_BIAS-0x04
208#define tmp_px		STACK_BIAS-0x20
209#define tmp_py		STACK_BIAS-0x18
210
211#define ftmp0		STACK_BIAS-0x10
212#define ftmp1		STACK_BIAS-0x0c
213#define ftmp2		STACK_BIAS-0x10
214#define ftmp3		STACK_BIAS-0x0c
215#define ftmp4		STACK_BIAS-0x08
216
217! sizeof temp storage - must be a multiple of 16 for V9
218#define tmps		0x20
219
220!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
221!      !!!!!   algorithm   !!!!!
222!  x0 = *px;
223!  ax = *(int*)px;
224!
225!  y0 = *py;
226!  ay = *(int*)py;
227!
228!  ax &= 0x7fffffff;
229!  ay &= 0x7fffffff;
230!
231!  px += stridex;
232!  py += stridey;
233!
234!  if ( ax >= 0x7f800000 || ay >= 0x7f800000 )
235!  {
236!    *pz = fabsf(x0) * fabsf(y0);
237!    if( ax == 0x7f800000 ) *pz = 0.0f;
238!    else if( ay == 0x7f800000 ) *pz = 0.0f;
239!    pz += stridez;
240!    continue;
241!  }
242!
243!  if ( ay == 0 )
244!  {
245!    if ( ax == 0 )
246!    {
247!      *pz = 1.0f / 0.0f;
248!      pz += stridez;
249!      continue;
250!    }
251!  }
252!
253!  hyp0 = x0 * (double)x0;
254!  dtmp0 = y0 * (double)y0;
255!  hyp0 += dtmp0;
256!
257!  ibase0 = ((int*)&hyp0)[0];
258!
259!  dbase0 = vis_fand(hyp0,DA0);
260!  dbase0 = vis_fmul8x16(SCALE, dbase0);
261!  dbase0 = vis_fpsub32(DA1,dbase0);
262!
263!  hyp0 = vis_fand(hyp0,DC0);
264!  hyp0 = vis_for(hyp0,DC1);
265!  h_hi0 = vis_fand(hyp0,DC2);
266!
267!  ibase0 >>= 10;
268!  si0 = ibase0 & 0x7f0;
269!  xx0 = ((double*)((char*)TBL + si0))[0];
270!
271!  dtmp1 = hyp0 - h_hi0;
272!  xx0 = dtmp1 * xx0;
273!  res0 = ((double*)((char*)arr + si0))[1];
274!  dtmp2 = KA3 * xx0;
275!  dtmp2 += KA2;
276!  dtmp2 *= xx0;
277!  dtmp2 += KA1;
278!  dtmp2 *= xx0;
279!  dtmp2 += KA0;
280!  res0 *= dtmp2;
281!  res0 *= dbase0;
282!  ftmp0 = (float)res0;
283!  *pz = ftmp0;
284!  pz += stridez;
285!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
286
287	ENTRY(__vrhypotf)
288	save	%sp,-SA(MINFRAME)-tmps,%sp
289	PIC_SETUP(l7)
290	PIC_SET(l7,.CONST_TBL,l2)
291	wr	%g0,0x82,%asi
292
293#ifdef __sparcv9
294	ldx	[%fp+STACK_BIAS+176],stridez
295#else
296	ld	[%fp+STACK_BIAS+92],stridez
297#endif
298
299	stx	%i1,[%fp+tmp_px]
300	sll	%i2,2,stridex
301
302	stx	%i3,[%fp+tmp_py]
303	sll	%i4,2,stridey
304
305	st	%i0,[%fp+tmp_counter]
306	sll	stridez,2,stridez
307	mov	%i5,%o1
308
309	ldd	[TBL+TBL_SHIFT],DC0
310	ldd	[TBL+TBL_SHIFT+8],DC1
311	ldd	[TBL+TBL_SHIFT+16],DC2
312	ldd	[TBL+TBL_SHIFT+24],DA0
313	ldd	[TBL+TBL_SHIFT+32],DA1
314	ldd	[TBL+TBL_SHIFT+40],SCALE
315	ldd	[TBL+TBL_SHIFT+48],KA0
316
317	ldd	[TBL+TBL_SHIFT+56],KA1
318	sethi	%hi(0x7f800000),%o0
319
320	ldd	[TBL+TBL_SHIFT+64],KA2
321	sethi	%hi(0x7ffffc00),%o7
322
323	ldd	[TBL+TBL_SHIFT+72],KA3
324	add	%o7,1023,%o7
325
326.begin:
327	ld	[%fp+tmp_counter],counter
328	ldx	[%fp+tmp_px],%o4
329	ldx	[%fp+tmp_py],%i2
330	st	%g0,[%fp+tmp_counter]
331.begin1:
332	cmp	counter,0
333	ble,pn	%icc,.exit
334	nop
335
336	lda	[%i2]0x82,%l6		! (3_0) ay = *(int*)py;
337
338	lda	[%o4]0x82,%i5		! (3_0) ax = *(int*)px;
339
340	lda	[%i2]0x82,%f2		! (3_0) y0 = *py;
341	and	%l6,_0x7fffffff,%l6	! (3_0) ay &= 0x7fffffff;
342
343	and	%i5,_0x7fffffff,%i5	! (3_0) ax &= 0x7fffffff;
344	cmp	%l6,_0x7f800000		! (3_0) ay ? 0x7f800000
345	bge,pn	%icc,.spec0		! (3_0) if ( ay >= 0x7f800000 )
346	lda	[%o4]0x82,%f4		! (3_0) x0 = *px;
347
348	cmp	%i5,_0x7f800000		! (3_0) ax ? 0x7f800000
349	bge,pn	%icc,.spec0		! (3_0) if ( ax >= 0x7f800000 )
350	nop
351
352	cmp	%l6,0			! (3_0)
353	be,pn	%icc,.spec1		! (3_0) if ( ay == 0 )
354	fsmuld	%f4,%f4,%f36		! (3_0) hyp0 = x0 * (double)x0;
355.cont_spec1:
356	lda	[%i2+stridey]0x82,%l6	! (4_0) ay = *(int*)py;
357
358	fsmuld	%f2,%f2,%f62		! (3_0) dtmp0 = y0 * (double)y0;
359	lda	[stridex+%o4]0x82,%i5	! (4_0) ax = *(int*)px;
360
361	add	%o4,stridex,%l0		! px += stridex
362
363	add	%i2,stridey,%i2		! py += stridey
364	and	%l6,_0x7fffffff,%l6	! (4_0) ay &= 0x7fffffff;
365
366	and	%i5,_0x7fffffff,%i5	! (4_0) ax &= 0x7fffffff;
367	lda	[%i2]0x82,%f2		! (4_0) y0 = *py;
368
369	faddd	%f36,%f62,%f20		! (3_0) hyp0 += dtmp0;
370	cmp	%l6,_0x7f800000		! (4_0) ay ? 0x7f800000
371
372	bge,pn	%icc,.update0		! (4_0) if ( ay >= 0x7f800000 )
373	lda	[stridex+%o4]0x82,%f4	! (4_0) x0 = *px;
374.cont0:
375	cmp	%i5,_0x7f800000		! (4_0) ax ? 0x7f800000
376	bge,pn	%icc,.update1		! (4_0) if ( ax >= 0x7f800000 )
377	st	%f20,[%fp+ftmp4]	! (3_0) ibase0 = ((int*)&hyp0)[0];
378.cont1:
379	cmp	%l6,0			! (4_1) ay ? 0
380	be,pn	%icc,.update2		! (4_1) if ( ay == 0 )
381	fsmuld	%f4,%f4,%f38		! (4_1) hyp0 = x0 * (double)x0;
382.cont2:
383	lda	[%i2+stridey]0x82,%l6	! (0_0) ay = *(int*)py;
384
385	fsmuld	%f2,%f2,%f62		! (4_1) dtmp0 = y0 * (double)y0;
386	lda	[%l0+stridex]0x82,%i5	! (0_0) ax = *(int*)px;
387
388	add	%l0,stridex,%i1		! px += stridex
389
390	add	%i2,stridey,%i2		! py += stridey
391	and	%l6,_0x7fffffff,%l6	! (0_0) ay &= 0x7fffffff;
392
393	and	%i5,_0x7fffffff,%i5	! (0_0) ax &= 0x7fffffff;
394	lda	[%i2]0x82,%f2		! (0_0) y0 = *py;
395
396	cmp	%l6,_0x7f800000		! (0_0) ay ? 0x7f800000
397	bge,pn	%icc,.update3		! (0_0) if ( ay >= 0x7f800000 )
398	faddd	%f38,%f62,%f12		! (4_1) hyp0 += dtmp0;
399.cont3:
400	lda	[%i1]0x82,%f4		! (0_0) x0 = *px;
401
402	cmp	%i5,_0x7f800000		! (0_0) ax ? 0x7f800000
403	bge,pn	%icc,.update4		! (0_0) if ( ax >= 0x7f800000 )
404	st	%f12,[%fp+ftmp0]	! (4_1) ibase0 = ((int*)&hyp0)[0];
405.cont4:
406	cmp	%l6,0			! (0_0) ay ? 0
407	be,pn	%icc,.update5		! (0_0) if ( ay == 0 )
408	fsmuld	%f4,%f4,%f38		! (0_0) hyp0 = x0 * (double)x0;
409.cont5:
410	lda	[%i2+stridey]0x82,%l6	! (1_0) ay = *(int*)py;
411
412	fsmuld	%f2,%f2,%f62		! (0_0) dtmp0 = y0 * (double)y0;
413	lda	[%i1+stridex]0x82,%i5	! (1_0) ax = *(int*)px;
414
415	add	%i1,stridex,%g5		! px += stridex
416
417	add	%i2,stridey,%o3		! py += stridey
418	and	%l6,_0x7fffffff,%l6	! (1_0) ay &= 0x7fffffff;
419	fand	%f20,DC0,%f30		! (3_1) hyp0 = vis_fand(hyp0,DC0);
420
421	and	%i5,_0x7fffffff,%i5	! (1_0) ax &= 0x7fffffff;
422	lda	[%o3]0x82,%f2		! (1_0) y0 = *py;
423
424	faddd	%f38,%f62,%f14		! (0_0) hyp0 += dtmp0;
425	cmp	%l6,_0x7f800000		! (1_0) ay ? 0x7f800000
426
427	lda	[%g5]0x82,%f4		! (1_0) x0 = *px;
428	bge,pn	%icc,.update6		! (1_0) if ( ay >= 0x7f800000 )
429	for	%f30,DC1,%f28		! (3_1) hyp0 = vis_for(hyp0,DC1);
430.cont6:
431	cmp	%i5,_0x7f800000		! (1_0) ax ? 0x7f800000
432	bge,pn	%icc,.update7		! (1_0) if ( ax >= 0x7f800000 )
433	ld	[%fp+ftmp4],%l1		! (3_1) ibase0 = ((int*)&hyp0)[0];
434.cont7:
435	st	%f14,[%fp+ftmp1]	! (0_0) ibase0 = ((int*)&hyp0)[0];
436
437	cmp	%l6,0			! (1_0) ay ? 0
438	be,pn	%icc,.update8		! (1_0) if ( ay == 0 )
439	fand	%f28,DC2,%f30		! (3_1) h_hi0 = vis_fand(hyp0,DC2);
440.cont8:
441	fsmuld	%f4,%f4,%f38		! (1_0) hyp0 = x0 * (double)x0;
442	sra	%l1,10,%o5		! (3_1) ibase0 >>= 10;
443
444	and	%o5,2032,%o4		! (3_1) si0 = ibase0 & 0x7f0;
445	lda	[%o3+stridey]0x82,%l6	! (2_0) ay = *(int*)py;
446
447	fsmuld	%f2,%f2,%f62		! (1_0) dtmp0 = y0 * (double)y0;
448	add	%o4,TBL,%l7		! (3_1) (char*)TBL + si0
449	lda	[stridex+%g5]0x82,%i5	! (2_0) ax = *(int*)px;
450	fsubd	%f28,%f30,%f28		! (3_1) dtmp1 = hyp0 - h_hi0;
451
452	add	%g5,stridex,%i4		! px += stridex
453	ldd	[TBL+%o4],%f42		! (3_1) xx0 = ((double*)((char*)TBL + si0))[0];
454
455	and	%l6,_0x7fffffff,%l6	! (2_0) ay &= 0x7fffffff;
456	add	%o3,stridey,%i2		! py += stridey
457	fand	%f12,DC0,%f30		! (4_1) hyp0 = vis_fand(hyp0,DC0);
458
459	and	%i5,_0x7fffffff,%i5	! (2_0) ax &= 0x7fffffff;
460	lda	[%i2]0x82,%f2		! (2_0) y0 = *py;
461
462	faddd	%f38,%f62,%f16		! (1_0) hyp0 += dtmp0;
463	cmp	%l6,_0x7f800000		! (2_0) ay ? 0x7f800000
464	fmuld	%f28,%f42,%f26		! (3_1) xx0 = dtmp1 * xx0;
465
466	lda	[stridex+%g5]0x82,%f4	! (2_0) x0 = *px;
467	bge,pn	%icc,.update9		! (2_0) if ( ay >= 0x7f800000
468	for	%f30,DC1,%f28		! (4_1) hyp0 = vis_for(hyp0,DC1);
469.cont9:
470	cmp	%i5,_0x7f800000		! (2_0) ax ? 0x7f800000
471	bge,pn	%icc,.update10		! (2_0) if ( ax >= 0x7f800000 )
472	ld	[%fp+ftmp0],%i3		! (4_1) ibase0 = ((int*)&hyp0)[0];
473.cont10:
474	st	%f16,[%fp+ftmp2]	! (1_0) ibase0 = ((int*)&hyp0)[0];
475
476	fmuld	KA3,%f26,%f34		! (3_1) dtmp2 = KA3 * xx0;
477	cmp	%l6,0			! (2_0) ay ? 0
478	be,pn	%icc,.update11		! (2_0) if ( ay == 0 )
479	fand	%f28,DC2,%f30		! (4_1) h_hi0 = vis_fand(hyp0,DC2);
480.cont11:
481	fsmuld	%f4,%f4,%f36		! (2_0) hyp0 = x0 * (double)x0;
482	sra	%i3,10,%i3		! (4_1) ibase0 >>= 10;
483
484	and	%i3,2032,%i3		! (4_1) si0 = ibase0 & 0x7f0;
485	lda	[%i2+stridey]0x82,%l6	! (3_0) ay = *(int*)py;
486
487	fsmuld	%f2,%f2,%f62		! (2_0) dtmp0 = y0 * (double)y0;
488	add	%i3,TBL,%i3		! (4_1) (char*)TBL + si0
489	lda	[%i4+stridex]0x82,%i5	! (3_0) ax = *(int*)px;
490	fsubd	%f28,%f30,%f28		! (4_1) dtmp1 = hyp0 - h_hi0;
491
492	add	%i4,stridex,%o4		! px += stridex
493	ldd	[%i3],%f42		! (4_1) xx0 = ((double*)((char*)TBL + si0))[0];
494	faddd	%f34,KA2,%f10		! (3_1) dtmp2 += KA2;
495
496	add	%i2,stridey,%i2		! py += stridey
497	and	%l6,_0x7fffffff,%l6	! (3_0) ay &= 0x7fffffff;
498	fand	%f14,DC0,%f30		! (0_0) hyp0 = vis_fand(hyp0,DC0);
499
500	and	%i5,_0x7fffffff,%i5	! (3_0) ax &= 0x7fffffff;
501	lda	[%i2]0x82,%f2		! (3_0) y0 = *py;
502
503	faddd	%f36,%f62,%f18		! (2_0) hyp0 += dtmp0;
504	cmp	%l6,_0x7f800000		! (3_0) ay ? 0x7f800000
505	fmuld	%f28,%f42,%f32		! (4_1) xx0 = dtmp1 * xx0;
506
507	fmuld	%f10,%f26,%f10		! (3_1) dtmp2 *= xx0;
508	lda	[%o4]0x82,%f4		! (3_0) x0 = *px;
509	bge,pn	%icc,.update12		! (3_0) if ( ay >= 0x7f800000 )
510	for	%f30,DC1,%f28		! (0_0) hyp0 = vis_for(hyp0,DC1);
511.cont12:
512	cmp	%i5,_0x7f800000		! (3_0) ax ? 0x7f800000
513	bge,pn	%icc,.update13		! (3_0) if ( ax >= 0x7f800000 )
514	ld	[%fp+ftmp1],%i1		! (0_0) ibase0 = ((int*)&hyp0)[0];
515.cont13:
516	st	%f18,[%fp+ftmp3]	! (2_0) ibase0 = ((int*)&hyp0)[0];
517
518	fmuld	KA3,%f32,%f34		! (4_1) dtmp2 = KA3 * xx0;
519	cmp	%l6,0			! (3_0)
520	be,pn	%icc,.update14		! (3_0) if ( ay == 0 )
521	fand	%f28,DC2,%f30		! (0_0) h_hi0 = vis_fand(hyp0,DC2);
522.cont14:
523	fsmuld	%f4,%f4,%f36		! (3_0) hyp0 = x0 * (double)x0;
524	sra	%i1,10,%l1		! (0_0) ibase0 >>= 10;
525	faddd	%f10,KA1,%f40		! (3_1) dtmp2 += KA1;
526
527	and	%l1,2032,%o5		! (0_0) si0 = ibase0 & 0x7f0;
528	lda	[%i2+stridey]0x82,%l6	! (4_0) ay = *(int*)py;
529
530	fsmuld	%f2,%f2,%f62		! (3_0) dtmp0 = y0 * (double)y0;
531	add	%o5,TBL,%l1		! (0_0) (char*)TBL + si0
532	lda	[stridex+%o4]0x82,%i5	! (4_0) ax = *(int*)px;
533	fsubd	%f28,%f30,%f28		! (0_0) dtmp1 = hyp0 - h_hi0;
534
535	add	%o4,stridex,%l0		! px += stridex
536	ldd	[TBL+%o5],%f42		! (0_0) xx0 = ((double*)((char*)TBL + si0))[0];
537	faddd	%f34,KA2,%f10		! (4_1) dtmp2 += KA2;
538
539	fmuld	%f40,%f26,%f40		! (3_1) dtmp2 *= xx0;
540	add	%i2,stridey,%i2		! py += stridey
541	and	%l6,_0x7fffffff,%l6	! (4_0) ay &= 0x7fffffff;
542	fand	%f16,DC0,%f30		! (1_0) hyp0 = vis_fand(hyp0,DC0);
543
544	and	%i5,_0x7fffffff,%i5	! (4_0) ax &= 0x7fffffff;
545	lda	[%i2]0x82,%f2		! (4_0) y0 = *py;
546	fand	%f20,DA0,%f24		! (3_1) dbase0 = vis_fand(hyp0,DA0);
547
548	faddd	%f36,%f62,%f20		! (3_0) hyp0 += dtmp0;
549	cmp	%l6,_0x7f800000		! (4_0) ay ? 0x7f800000
550	ldd	[%l7+8],%f36		! (3_1) res0 = ((double*)((char*)arr + si0))[1];
551	fmuld	%f28,%f42,%f26		! (0_0) xx0 = dtmp1 * xx0;
552
553	fmuld	%f10,%f32,%f10		! (4_1) dtmp2 *= xx0;
554	lda	[stridex+%o4]0x82,%f4	! (4_0) x0 = *px;
555	bge,pn	%icc,.update15		! (4_0) if ( ay >= 0x7f800000 )
556	for	%f30,DC1,%f28		! (1_0) hyp0 = vis_for(hyp0,DC1);
557.cont15:
558	fmul8x16	SCALE,%f24,%f24	! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
559	cmp	%i5,_0x7f800000		! (4_0) ax ? 0x7f800000
560	ld	[%fp+ftmp2],%i1		! (1_0) ibase0 = ((int*)&hyp0)[0];
561	faddd	%f40,KA0,%f62		! (3_1) dtmp2 += KA0;
562
563	bge,pn	%icc,.update16		! (4_0) if ( ax >= 0x7f800000 )
564	st	%f20,[%fp+ftmp4]	! (3_0) ibase0 = ((int*)&hyp0)[0];
565.cont16:
566	fmuld	KA3,%f26,%f34		! (0_0) dtmp2 = KA3 * xx0;
567	fand	%f28,DC2,%f30		! (1_0) h_hi0 = vis_fand(hyp0,DC2);
568
569	mov	%o1,%i4
570	cmp	counter,5
571	bl,pn	%icc,.tail
572	nop
573
574	ba	.main_loop
575	sub	counter,5,counter
576
577	.align	16
578.main_loop:
579	fsmuld	%f4,%f4,%f38		! (4_1) hyp0 = x0 * (double)x0;
580	sra	%i1,10,%o2		! (1_1) ibase0 >>= 10;
581	cmp	%l6,0			! (4_1) ay ? 0
582	faddd	%f10,KA1,%f40		! (4_2) dtmp2 += KA1;
583
584	fmuld	%f36,%f62,%f36		! (3_2) res0 *= dtmp2;
585	and	%o2,2032,%o2		! (1_1) si0 = ibase0 & 0x7f0;
586	lda	[%i2+stridey]0x82,%l6	! (0_0) ay = *(int*)py;
587	fpsub32	DA1,%f24,%f24		! (3_2) dbase0 = vis_fpsub32(DA1,dbase0);
588
589	fsmuld	%f2,%f2,%f62		! (4_1) dtmp0 = y0 * (double)y0;
590	add	%o2,TBL,%o2		! (1_1) (char*)TBL + si0
591	lda	[%l0+stridex]0x82,%o1	! (0_0) ax = *(int*)px;
592	fsubd	%f28,%f30,%f28		! (1_1) dtmp1 = hyp0 - h_hi0;
593
594	add	%l0,stridex,%i1		! px += stridex
595	ldd	[%o2],%f42		! (1_1) xx0 = ((double*)((char*)TBL + si0))[0];
596	be,pn	%icc,.update17		! (4_1) if ( ay == 0 )
597	faddd	%f34,KA2,%f10		! (0_1) dtmp2 += KA2;
598.cont17:
599	fmuld	%f40,%f32,%f40		! (4_2) dtmp2 *= xx0;
600	add	%i2,stridey,%i2		! py += stridey
601	and	%l6,_0x7fffffff,%l6	! (0_0) ay &= 0x7fffffff;
602	fand	%f18,DC0,%f30		! (2_1) hyp0 = vis_fand(hyp0,DC0);
603
604	fmuld	%f36,%f24,%f32		! (3_2) res0 *= dbase0;
605	and	%o1,_0x7fffffff,%o1	! (0_0) ax &= 0x7fffffff;
606	lda	[%i2]0x82,%f2		! (0_0) y0 = *py;
607	fand	%f12,DA0,%f24		! (4_2) dbase0 = vis_fand(hyp0,DA0);
608
609	faddd	%f38,%f62,%f12		! (4_1) hyp0 += dtmp0;
610	cmp	%l6,_0x7f800000		! (0_0) ay ? 0x7f800000
611	ldd	[%i3+8],%f62		! (4_2) res0 = ((double*)((char*)arr + si0))[1];
612	fmuld	%f28,%f42,%f36		! (1_1) xx0 = dtmp1 * xx0;
613
614	fmuld	%f10,%f26,%f10		! (0_1) dtmp2 *= xx0;
615	lda	[%i1]0x82,%f4		! (0_0) x0 = *px;
616	bge,pn	%icc,.update18		! (0_0) if ( ay >= 0x7f800000 )
617	for	%f30,DC1,%f28		! (2_1) hyp0 = vis_for(hyp0,DC1);
618.cont18:
619	fmul8x16	SCALE,%f24,%f24	! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0);
620	cmp	%o1,_0x7f800000		! (0_0) ax ? 0x7f800000
621	ld	[%fp+ftmp3],%l0		! (2_1) ibase0 = ((int*)&hyp0)[0];
622	faddd	%f40,KA0,%f42		! (4_2) dtmp2 += KA0;
623
624	add	%i4,stridez,%i3		! pz += stridez
625	st	%f12,[%fp+ftmp0]	! (4_1) ibase0 = ((int*)&hyp0)[0];
626	bge,pn	%icc,.update19		! (0_0) if ( ax >= 0x7f800000 )
627	fdtos	%f32,%f1		! (3_2) ftmp0 = (float)res0;
628.cont19:
629	fmuld	KA3,%f36,%f34		! (1_1) dtmp2 = KA3 * xx0;
630	cmp	%l6,0			! (0_0) ay ? 0
631	st	%f1,[%i4]		! (3_2) *pz = ftmp0;
632	fand	%f28,DC2,%f30		! (2_1) h_hi0 = vis_fand(hyp0,DC2);
633
634	fsmuld	%f4,%f4,%f38		! (0_0) hyp0 = x0 * (double)x0;
635	sra	%l0,10,%i4		! (2_1) ibase0 >>= 10;
636	be,pn	%icc,.update20		! (0_0) if ( ay == 0 )
637	faddd	%f10,KA1,%f40		! (0_1) dtmp2 += KA1;
638.cont20:
639	fmuld	%f62,%f42,%f32		! (4_2) res0 *= dtmp2;
640	and	%i4,2032,%g1		! (2_1) si0 = ibase0 & 0x7f0;
641	lda	[%i2+stridey]0x82,%l6	! (1_0) ay = *(int*)py;
642	fpsub32	DA1,%f24,%f24		! (4_2) dbase0 = vis_fpsub32(DA1,dbase0);
643
644	fsmuld	%f2,%f2,%f62		! (0_0) dtmp0 = y0 * (double)y0;
645	add	%g1,TBL,%l0		! (2_1) (char*)TBL + si0
646	lda	[%i1+stridex]0x82,%i5	! (1_0) ax = *(int*)px;
647	fsubd	%f28,%f30,%f28		! (2_1) dtmp1 = hyp0 - h_hi0;
648
649	nop
650	add	%i1,stridex,%g5		! px += stridex
651	ldd	[TBL+%g1],%f42		! (2_1) xx0 = ((double*)((char*)TBL + si0))[0];
652	faddd	%f34,KA2,%f10		! (1_1) dtmp2 += KA2;
653
654	fmuld	%f40,%f26,%f40		! (0_1) dtmp2 *= xx0;
655	add	%i2,stridey,%o3		! py += stridey
656	and	%l6,_0x7fffffff,%l6	! (1_0) ay &= 0x7fffffff;
657	fand	%f20,DC0,%f30		! (3_1) hyp0 = vis_fand(hyp0,DC0);
658
659	fmuld	%f32,%f24,%f26		! (4_2) res0 *= dbase0;
660	and	%i5,_0x7fffffff,%i5	! (1_0) ax &= 0x7fffffff;
661	lda	[%o3]0x82,%f2		! (1_0) y0 = *py;
662	fand	%f14,DA0,%f24		! (0_1) dbase0 = vis_fand(hyp0,DA0);
663
664	faddd	%f38,%f62,%f14		! (0_0) hyp0 += dtmp0;
665	cmp	%l6,_0x7f800000		! (1_0) ay ? 0x7f800000
666	ldd	[%l1+8],%f62		! (0_1) res0 = ((double*)((char*)arr + si0))[1];
667	fmuld	%f28,%f42,%f32		! (2_1) xx0 = dtmp1 * xx0;
668
669	fmuld	%f10,%f36,%f10		! (1_1) dtmp2 *= xx0;
670	lda	[%g5]0x82,%f4		! (1_0) x0 = *px;
671	bge,pn	%icc,.update21		! (1_0) if ( ay >= 0x7f800000 )
672	for	%f30,DC1,%f28		! (3_1) hyp0 = vis_for(hyp0,DC1);
673.cont21:
674	fmul8x16	SCALE,%f24,%f24	! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
675	cmp	%i5,_0x7f800000		! (1_0) ax ? 0x7f800000
676	ld	[%fp+ftmp4],%l1		! (3_1) ibase0 = ((int*)&hyp0)[0];
677	faddd	%f40,KA0,%f42		! (0_1) dtmp2 += KA0
678
679	add	%i3,stridez,%o1		! pz += stridez
680	st	%f14,[%fp+ftmp1]	! (0_0) ibase0 = ((int*)&hyp0)[0];
681	bge,pn	%icc,.update22		! (1_0) if ( ax >= 0x7f800000 )
682	fdtos	%f26,%f1		! (4_2) ftmp0 = (float)res0;
683.cont22:
684	fmuld	KA3,%f32,%f34		! (2_1) dtmp2 = KA3 * xx0;
685	cmp	%l6,0			! (1_0) ay ? 0
686	st	%f1,[%i3]		! (4_2) *pz = ftmp0;
687	fand	%f28,DC2,%f30		! (3_1) h_hi0 = vis_fand(hyp0,DC2);
688
689	fsmuld	%f4,%f4,%f38		! (1_0) hyp0 = x0 * (double)x0;
690	sra	%l1,10,%o5		! (3_1) ibase0 >>= 10;
691	be,pn	%icc,.update23		! (1_0) if ( ay == 0 )
692	faddd	%f10,KA1,%f40		! (1_1) dtmp2 += KA1;
693.cont23:
694	fmuld	%f62,%f42,%f26		! (0_1) res0 *= dtmp2;
695	and	%o5,2032,%o4		! (3_1) si0 = ibase0 & 0x7f0;
696	lda	[%o3+stridey]0x82,%l6	! (2_0) ay = *(int*)py;
697	fpsub32	DA1,%f24,%f24		! (0_1) dbase0 = vis_fpsub32(DA1,dbase0);
698
699	fsmuld	%f2,%f2,%f62		! (1_0) dtmp0 = y0 * (double)y0;
700	add	%o4,TBL,%l7		! (3_1) (char*)TBL + si0
701	lda	[stridex+%g5]0x82,%i5	! (2_0) ax = *(int*)px;
702	fsubd	%f28,%f30,%f28		! (3_1) dtmp1 = hyp0 - h_hi0;
703
704	nop
705	add	%g5,stridex,%i4		! px += stridex
706	ldd	[TBL+%o4],%f42		! (3_1) xx0 = ((double*)((char*)TBL + si0))[0];
707	faddd	%f34,KA2,%f10		! (2_1) dtmp2 += KA2;
708
709	fmuld	%f40,%f36,%f40		! (1_1) dtmp2 *= xx0;
710	and	%l6,_0x7fffffff,%l6	! (2_0) ay &= 0x7fffffff;
711	add	%o3,stridey,%i2		! py += stridey
712	fand	%f12,DC0,%f30		! (4_1) hyp0 = vis_fand(hyp0,DC0);
713
714	fmuld	%f26,%f24,%f36		! (0_1) res0 *= dbase0;
715	and	%i5,_0x7fffffff,%i5	! (2_0) ax &= 0x7fffffff;
716	lda	[%i2]0x82,%f2		! (2_0) y0 = *py;
717	fand	%f16,DA0,%f24		! (1_1) dbase0 = vis_fand(hyp0,DA0);
718
719	faddd	%f38,%f62,%f16		! (1_0) hyp0 += dtmp0;
720	cmp	%l6,_0x7f800000		! (2_0) ay ? 0x7f800000
721	ldd	[%o2+8],%f38		! (1_1) res0 = ((double*)((char*)arr + si0))[1];
722	fmuld	%f28,%f42,%f26		! (3_1) xx0 = dtmp1 * xx0;
723
724	fmuld	%f10,%f32,%f10		! (2_1) dtmp2 *= xx0;
725	lda	[stridex+%g5]0x82,%f4	! (2_0) x0 = *px;
726	bge,pn	%icc,.update24		! (2_0) if ( ay >= 0x7f800000
727	for	%f30,DC1,%f28		! (4_1) hyp0 = vis_for(hyp0,DC1);
728.cont24:
729	fmul8x16	SCALE,%f24,%f24	! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
730	cmp	%i5,_0x7f800000		! (2_0) ax ? 0x7f800000
731	ld	[%fp+ftmp0],%i3		! (4_1) ibase0 = ((int*)&hyp0)[0];
732	faddd	%f40,KA0,%f62		! (1_1) dtmp2 += KA0;
733
734	add	%o1,stridez,%g1		! pz += stridez
735	st	%f16,[%fp+ftmp2]	! (1_0) ibase0 = ((int*)&hyp0)[0];
736	bge,pn	%icc,.update25		! (2_0) if ( ax >= 0x7f800000 )
737	fdtos	%f36,%f1		! (0_1) ftmp0 = (float)res0;
738.cont25:
739	fmuld	KA3,%f26,%f34		! (3_1) dtmp2 = KA3 * xx0;
740	cmp	%l6,0			! (2_0) ay ? 0
741	st	%f1,[%o1]		! (0_1) *pz = ftmp0;
742	fand	%f28,DC2,%f30		! (4_1) h_hi0 = vis_fand(hyp0,DC2);
743
744	fsmuld	%f4,%f4,%f36		! (2_0) hyp0 = x0 * (double)x0;
745	sra	%i3,10,%i3		! (4_1) ibase0 >>= 10;
746	be,pn	%icc,.update26		! (2_0) if ( ay == 0 )
747	faddd	%f10,KA1,%f40		! (2_1) dtmp2 += KA1;
748.cont26:
749	fmuld	%f38,%f62,%f38		! (1_1) res0 *= dtmp2;
750	and	%i3,2032,%i3		! (4_1) si0 = ibase0 & 0x7f0;
751	lda	[%i2+stridey]0x82,%l6	! (3_0) ay = *(int*)py;
752	fpsub32	DA1,%f24,%f24		! (1_1) dbase0 = vis_fpsub32(DA1,dbase0);
753
754	fsmuld	%f2,%f2,%f62		! (2_0) dtmp0 = y0 * (double)y0;
755	add	%i3,TBL,%i3		! (4_1) (char*)TBL + si0
756	lda	[%i4+stridex]0x82,%i5	! (3_0) ax = *(int*)px;
757	fsubd	%f28,%f30,%f28		! (4_1) dtmp1 = hyp0 - h_hi0;
758
759	nop
760	add	%i4,stridex,%o4		! px += stridex
761	ldd	[%i3],%f42		! (4_1) xx0 = ((double*)((char*)TBL + si0))[0];
762	faddd	%f34,KA2,%f10		! (3_1) dtmp2 += KA2;
763
764	fmuld	%f40,%f32,%f40		! (2_1) dtmp2 *= xx0;
765	add	%i2,stridey,%i2		! py += stridey
766	and	%l6,_0x7fffffff,%l6	! (3_0) ay &= 0x7fffffff;
767	fand	%f14,DC0,%f30		! (0_0) hyp0 = vis_fand(hyp0,DC0);
768
769	fmuld	%f38,%f24,%f38		! (1_1) res0 *= dbase0;
770	and	%i5,_0x7fffffff,%i5	! (3_0) ax &= 0x7fffffff;
771	lda	[%i2]0x82,%f2		! (3_0) y0 = *py;
772	fand	%f18,DA0,%f24		! (2_1) dbase0 = vis_fand(hyp0,DA0);
773
774	faddd	%f36,%f62,%f18		! (2_0) hyp0 += dtmp0;
775	cmp	%l6,_0x7f800000		! (3_0) ay ? 0x7f800000
776	ldd	[%l0+8],%f62		! (2_1) res0 = ((double*)((char*)arr + si0))[1];
777	fmuld	%f28,%f42,%f32		! (4_1) xx0 = dtmp1 * xx0;
778
779	fmuld	%f10,%f26,%f10		! (3_1) dtmp2 *= xx0;
780	lda	[%o4]0x82,%f4		! (3_0) x0 = *px;
781	bge,pn	%icc,.update27		! (3_0) if ( ay >= 0x7f800000 )
782	for	%f30,DC1,%f28		! (0_0) hyp0 = vis_for(hyp0,DC1);
783.cont27:
784	fmul8x16	SCALE,%f24,%f24	! (2_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
785	cmp	%i5,_0x7f800000		! (3_0) ax ? 0x7f800000
786	ld	[%fp+ftmp1],%i1		! (0_0) ibase0 = ((int*)&hyp0)[0];
787	faddd	%f40,KA0,%f42		! (2_1) dtmp2 += KA0;
788
789	add	%g1,stridez,%o3		! pz += stridez
790	st	%f18,[%fp+ftmp3]	! (2_0) ibase0 = ((int*)&hyp0)[0];
791	bge,pn	%icc,.update28		! (3_0) if ( ax >= 0x7f800000 )
792	fdtos	%f38,%f1		! (1_1) ftmp0 = (float)res0;
793.cont28:
794	fmuld	KA3,%f32,%f34		! (4_1) dtmp2 = KA3 * xx0;
795	cmp	%l6,0			! (3_0)
796	st	%f1,[%g1]		! (1_1) *pz = ftmp0;
797	fand	%f28,DC2,%f30		! (0_0) h_hi0 = vis_fand(hyp0,DC2);
798
799	fsmuld	%f4,%f4,%f36		! (3_0) hyp0 = x0 * (double)x0;
800	sra	%i1,10,%l1		! (0_0) ibase0 >>= 10;
801	be,pn	%icc,.update29		! (3_0) if ( ay == 0 )
802	faddd	%f10,KA1,%f40		! (3_1) dtmp2 += KA1;
803.cont29:
804	fmuld	%f62,%f42,%f38		! (2_1) res0 *= dtmp2;
805	and	%l1,2032,%o5		! (0_0) si0 = ibase0 & 0x7f0;
806	lda	[%i2+stridey]0x82,%l6	! (4_0) ay = *(int*)py;
807	fpsub32	DA1,%f24,%f24		! (2_1) dbase0 = vis_fpsub32(DA1,dbase0);
808
809	fsmuld	%f2,%f2,%f62		! (3_0) dtmp0 = y0 * (double)y0;
810	add	%o5,TBL,%l1		! (0_0) (char*)TBL + si0
811	lda	[stridex+%o4]0x82,%i5	! (4_0) ax = *(int*)px;
812	fsubd	%f28,%f30,%f28		! (0_0) dtmp1 = hyp0 - h_hi0;
813
814	add	%o3,stridez,%i4		! pz += stridez
815	add	%o4,stridex,%l0		! px += stridex
816	ldd	[TBL+%o5],%f42		! (0_0) xx0 = ((double*)((char*)TBL + si0))[0];
817	faddd	%f34,KA2,%f10		! (4_1) dtmp2 += KA2;
818
819	fmuld	%f40,%f26,%f40		! (3_1) dtmp2 *= xx0;
820	add	%i2,stridey,%i2		! py += stridey
821	and	%l6,_0x7fffffff,%l6	! (4_0) ay &= 0x7fffffff;
822	fand	%f16,DC0,%f30		! (1_0) hyp0 = vis_fand(hyp0,DC0);
823
824	fmuld	%f38,%f24,%f38		! (2_1) res0 *= dbase0;
825	and	%i5,_0x7fffffff,%i5	! (4_0) ax &= 0x7fffffff;
826	lda	[%i2]0x82,%f2		! (4_0) y0 = *py;
827	fand	%f20,DA0,%f24		! (3_1) dbase0 = vis_fand(hyp0,DA0);
828
829	faddd	%f36,%f62,%f20		! (3_0) hyp0 += dtmp0;
830	cmp	%l6,_0x7f800000		! (4_0) ay ? 0x7f800000
831	ldd	[%l7+8],%f36		! (3_1) res0 = ((double*)((char*)arr + si0))[1];
832	fmuld	%f28,%f42,%f26		! (0_0) xx0 = dtmp1 * xx0;
833
834	fmuld	%f10,%f32,%f10		! (4_1) dtmp2 *= xx0;
835	lda	[stridex+%o4]0x82,%f4	! (4_0) x0 = *px;
836	bge,pn	%icc,.update30		! (4_0) if ( ay >= 0x7f800000 )
837	for	%f30,DC1,%f28		! (1_0) hyp0 = vis_for(hyp0,DC1);
838.cont30:
839	fmul8x16	SCALE,%f24,%f24	! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
840	cmp	%i5,_0x7f800000		! (4_0) ax ? 0x7f800000
841	ld	[%fp+ftmp2],%i1		! (1_0) ibase0 = ((int*)&hyp0)[0];
842	faddd	%f40,KA0,%f62		! (3_1) dtmp2 += KA0;
843
844	bge,pn	%icc,.update31		! (4_0) if ( ax >= 0x7f800000 )
845	st	%f20,[%fp+ftmp4]	! (3_0) ibase0 = ((int*)&hyp0)[0];
846.cont31:
847	subcc	counter,5,counter	! counter -= 5;
848	fdtos	%f38,%f1		! (2_1) ftmp0 = (float)res0;
849
850	fmuld	KA3,%f26,%f34		! (0_0) dtmp2 = KA3 * xx0;
851	st	%f1,[%o3]		! (2_1) *pz = ftmp0;
852	bpos,pt	%icc,.main_loop
853	fand	%f28,DC2,%f30		! (1_0) h_hi0 = vis_fand(hyp0,DC2);
854
855	add	counter,5,counter
856
857.tail:
858	subcc	counter,1,counter
859	bneg	.begin
860	mov	%i4,%o1
861
862	sra	%i1,10,%o2		! (1_1) ibase0 >>= 10;
863	faddd	%f10,KA1,%f40		! (4_2) dtmp2 += KA1;
864
865	fmuld	%f36,%f62,%f36		! (3_2) res0 *= dtmp2;
866	and	%o2,2032,%o2		! (1_1) si0 = ibase0 & 0x7f0;
867	fpsub32	DA1,%f24,%f24		! (3_2) dbase0 = vis_fpsub32(DA1,dbase0);
868
869	add	%o2,TBL,%o2		! (1_1) (char*)TBL + si0
870	fsubd	%f28,%f30,%f28		! (1_1) dtmp1 = hyp0 - h_hi0;
871
872	ldd	[%o2],%f42		! (1_1) xx0 = ((double*)((char*)TBL + si0))[0];
873	faddd	%f34,KA2,%f10		! (0_1) dtmp2 += KA2;
874
875	fmuld	%f40,%f32,%f40		! (4_2) dtmp2 *= xx0;
876
877	fmuld	%f36,%f24,%f32		! (3_2) res0 *= dbase0;
878	fand	%f12,DA0,%f24		! (4_2) dbase0 = vis_fand(hyp0,DA0);
879
880	ldd	[%i3+8],%f62		! (4_2) res0 = ((double*)((char*)arr + si0))[1];
881	fmuld	%f28,%f42,%f36		! (1_1) xx0 = dtmp1 * xx0;
882
883	fmuld	%f10,%f26,%f10		! (0_1) dtmp2 *= xx0;
884
885	fmul8x16	SCALE,%f24,%f24	! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0);
886	faddd	%f40,KA0,%f42		! (4_2) dtmp2 += KA0;
887
888	add	%i4,stridez,%i3		! pz += stridez
889	fdtos	%f32,%f1		! (3_2) ftmp0 = (float)res0;
890
891	fmuld	KA3,%f36,%f34		! (1_1) dtmp2 = KA3 * xx0;
892	st	%f1,[%i4]		! (3_2) *pz = ftmp0;
893
894	subcc	counter,1,counter
895	bneg	.begin
896	mov	%i3,%o1
897
898	faddd	%f10,KA1,%f40		! (0_1) dtmp2 += KA1;
899
900	fmuld	%f62,%f42,%f32		! (4_2) res0 *= dtmp2;
901	fpsub32	DA1,%f24,%f24		! (4_2) dbase0 = vis_fpsub32(DA1,dbase0);
902
903
904	faddd	%f34,KA2,%f10		! (1_1) dtmp2 += KA2;
905
906	fmuld	%f40,%f26,%f40		! (0_1) dtmp2 *= xx0;
907
908	fmuld	%f32,%f24,%f26		! (4_2) res0 *= dbase0;
909	fand	%f14,DA0,%f24		! (0_1) dbase0 = vis_fand(hyp0,DA0);
910
911	ldd	[%l1+8],%f62		! (0_1) res0 = ((double*)((char*)arr + si0))[1];
912
913	fmuld	%f10,%f36,%f10		! (1_1) dtmp2 *= xx0;
914
915	fmul8x16	SCALE,%f24,%f24	! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
916	faddd	%f40,KA0,%f42		! (0_1) dtmp2 += KA0
917
918	add	%i3,stridez,%o1		! pz += stridez
919	fdtos	%f26,%f1		! (4_2) ftmp0 = (float)res0;
920
921	st	%f1,[%i3]		! (4_2) *pz = ftmp0;
922
923	subcc	counter,1,counter
924	bneg	.begin
925	nop
926
927	faddd	%f10,KA1,%f40		! (1_1) dtmp2 += KA1;
928
929	fmuld	%f62,%f42,%f26		! (0_1) res0 *= dtmp2;
930	fpsub32	DA1,%f24,%f24		! (0_1) dbase0 = vis_fpsub32(DA1,dbase0);
931
932	fmuld	%f40,%f36,%f40		! (1_1) dtmp2 *= xx0;
933
934	fmuld	%f26,%f24,%f36		! (0_1) res0 *= dbase0;
935	fand	%f16,DA0,%f24		! (1_1) dbase0 = vis_fand(hyp0,DA0);
936
937	ldd	[%o2+8],%f38		! (1_1) res0 = ((double*)((char*)arr + si0))[1];
938
939	fmul8x16	SCALE,%f24,%f24	! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
940	faddd	%f40,KA0,%f62		! (1_1) dtmp2 += KA0;
941
942	add	%o1,stridez,%g1		! pz += stridez
943	fdtos	%f36,%f1		! (0_1) ftmp0 = (float)res0;
944
945	st	%f1,[%o1]		! (0_1) *pz = ftmp0;
946
947	subcc	counter,1,counter
948	bneg	.begin
949	mov	%g1,%o1
950
951	fmuld	%f38,%f62,%f38		! (1_1) res0 *= dtmp2;
952	fpsub32	DA1,%f24,%f24		! (1_1) dbase0 = vis_fpsub32(DA1,dbase0);
953
954	fmuld	%f38,%f24,%f38		! (1_1) res0 *= dbase0;
955
956	fdtos	%f38,%f1		! (1_1) ftmp0 = (float)res0;
957	st	%f1,[%g1]		! (1_1) *pz = ftmp0;
958
959	ba	.begin
960	add	%g1,stridez,%o1		! pz += stridez
961
962	.align	16
963.spec0:
964	fabss	%f2,%f2			! fabsf(y0);
965
966	fabss	%f4,%f4			! fabsf(x0);
967
968	fcmps	%f2,%f4
969
970	cmp	%l6,_0x7f800000		! ay ? 0x7f800000
971	be,a	1f			! if( ay == 0x7f800000 )
972	st	%g0,[%o1]		! *pz = 0.0f;
973
974	cmp	%i5,_0x7f800000		! ax ? 0x7f800000
975	be,a	1f			! if( ax == 0x7f800000 )
976	st	%g0,[%o1]		! *pz = 0.0f;
977
978	fmuls	%f2,%f4,%f2		! fabsf(x0) * fabsf(y0);
979	st	%f2,[%o1]		! *pz = fabsf(x0) + fabsf(y0);
9801:
981	add	%o4,stridex,%o4		! px += stridex;
982	add	%i2,stridey,%i2		! py += stridey;
983
984	add	%o1,stridez,%o1		! pz += stridez;
985	ba	.begin1
986	sub	counter,1,counter	! counter--;
987
988	.align	16
989.spec1:
990	cmp	%i5,0			! ax ? 0
991	bne,pt	%icc,.cont_spec1	! if ( ax != 0 )
992	nop
993
994	add	%o4,stridex,%o4		! px += stridex;
995	add	%i2,stridey,%i2		! py += stridey;
996
997	fdivs	%f7,%f9,%f2		! 1.0f / 0.0f
998	st	%f2,[%o1]		! *pz = 1.0f / 0.0f;
999
1000	add	%o1,stridez,%o1		! pz += stridez;
1001	ba	.begin1
1002	sub	counter,1,counter	! counter--;
1003
1004	.align	16
1005.update0:
1006	cmp	counter,1
1007	ble	.cont0
1008	ld	[TBL+TBL_SHIFT+44],%f2
1009
1010	sub	counter,1,counter
1011	st	counter,[%fp+tmp_counter]
1012
1013	stx	%l0,[%fp+tmp_px]
1014
1015	stx	%i2,[%fp+tmp_py]
1016	ba	.cont0
1017	mov	1,counter
1018
1019	.align	16
1020.update1:
1021	cmp	counter,1
1022	ble	.cont1
1023	ld	[TBL+TBL_SHIFT+44],%f4
1024
1025	sub	counter,1,counter
1026	st	counter,[%fp+tmp_counter]
1027
1028	stx	%l0,[%fp+tmp_px]
1029
1030	stx	%i2,[%fp+tmp_py]
1031	ba	.cont1
1032	mov	1,counter
1033
1034	.align	16
1035.update2:
1036	cmp	%i5,0
1037	bne	.cont2
1038
1039	cmp	counter,1
1040	ble	.cont2
1041	ld	[TBL+TBL_SHIFT+44],%f2
1042
1043	sub	counter,1,counter
1044	st	counter,[%fp+tmp_counter]
1045
1046	stx	%l0,[%fp+tmp_px]
1047
1048	stx	%i2,[%fp+tmp_py]
1049	ba	.cont2
1050	mov	1,counter
1051
1052	.align	16
1053.update3:
1054	cmp	counter,2
1055	ble	.cont3
1056	ld	[TBL+TBL_SHIFT+44],%f2
1057
1058	sub	counter,2,counter
1059	st	counter,[%fp+tmp_counter]
1060
1061	stx	%i1,[%fp+tmp_px]
1062
1063	stx	%i2,[%fp+tmp_py]
1064	ba	.cont3
1065	mov	2,counter
1066
1067	.align	16
1068.update4:
1069	cmp	counter,2
1070	ble	.cont4
1071	ld	[TBL+TBL_SHIFT+44],%f4
1072
1073	sub	counter,2,counter
1074	st	counter,[%fp+tmp_counter]
1075
1076	stx	%i1,[%fp+tmp_px]
1077
1078	stx	%i2,[%fp+tmp_py]
1079	ba	.cont4
1080	mov	2,counter
1081
1082	.align	16
1083.update5:
1084	cmp	%i5,0
1085	bne	.cont5
1086
1087	cmp	counter,2
1088	ble	.cont5
1089	ld	[TBL+TBL_SHIFT+44],%f2
1090
1091	sub	counter,2,counter
1092	st	counter,[%fp+tmp_counter]
1093
1094	stx	%i1,[%fp+tmp_px]
1095
1096	stx	%i2,[%fp+tmp_py]
1097	ba	.cont5
1098	mov	2,counter
1099
1100	.align	16
1101.update6:
1102	cmp	counter,3
1103	ble	.cont6
1104	ld	[TBL+TBL_SHIFT+44],%f2
1105
1106	sub	counter,3,counter
1107	st	counter,[%fp+tmp_counter]
1108
1109	stx	%g5,[%fp+tmp_px]
1110
1111	stx	%o3,[%fp+tmp_py]
1112	ba	.cont6
1113	mov	3,counter
1114
1115	.align	16
1116.update7:
1117	cmp	counter,3
1118	ble	.cont7
1119	ld	[TBL+TBL_SHIFT+44],%f4
1120
1121	sub	counter,3,counter
1122	st	counter,[%fp+tmp_counter]
1123
1124	stx	%g5,[%fp+tmp_px]
1125
1126	stx	%o3,[%fp+tmp_py]
1127	ba	.cont7
1128	mov	3,counter
1129
1130	.align	16
1131.update8:
1132	cmp	%i5,0
1133	bne	.cont8
1134
1135	cmp	counter,3
1136	ble	.cont8
1137	ld	[TBL+TBL_SHIFT+44],%f2
1138
1139	sub	counter,3,counter
1140	st	counter,[%fp+tmp_counter]
1141
1142	stx	%g5,[%fp+tmp_px]
1143
1144	stx	%o3,[%fp+tmp_py]
1145	ba	.cont8
1146	mov	3,counter
1147
1148	.align	16
1149.update9:
1150	cmp	counter,4
1151	ble	.cont9
1152	ld	[TBL+TBL_SHIFT+44],%f2
1153
1154	sub	counter,4,counter
1155	st	counter,[%fp+tmp_counter]
1156
1157	stx	%i4,[%fp+tmp_px]
1158
1159	stx	%i2,[%fp+tmp_py]
1160	ba	.cont9
1161	mov	4,counter
1162
1163	.align	16
1164.update10:
1165	cmp	counter,4
1166	ble	.cont10
1167	ld	[TBL+TBL_SHIFT+44],%f4
1168
1169	sub	counter,4,counter
1170	st	counter,[%fp+tmp_counter]
1171
1172	stx	%i4,[%fp+tmp_px]
1173
1174	stx	%i2,[%fp+tmp_py]
1175	ba	.cont10
1176	mov	4,counter
1177
1178	.align	16
1179.update11:
1180	cmp	%i5,0
1181	bne	.cont11
1182
1183	cmp	counter,4
1184	ble	.cont11
1185	ld	[TBL+TBL_SHIFT+44],%f2
1186
1187	sub	counter,4,counter
1188	st	counter,[%fp+tmp_counter]
1189
1190	stx	%i4,[%fp+tmp_px]
1191
1192	stx	%i2,[%fp+tmp_py]
1193	ba	.cont11
1194	mov	4,counter
1195
1196	.align	16
1197.update12:
1198	cmp	counter,5
1199	ble	.cont12
1200	ld	[TBL+TBL_SHIFT+44],%f2
1201
1202	sub	counter,5,counter
1203	st	counter,[%fp+tmp_counter]
1204
1205	stx	%o4,[%fp+tmp_px]
1206
1207	stx	%i2,[%fp+tmp_py]
1208	ba	.cont12
1209	mov	5,counter
1210
1211	.align	16
1212.update13:
1213	cmp	counter,5
1214	ble	.cont13
1215	ld	[TBL+TBL_SHIFT+44],%f4
1216
1217	sub	counter,5,counter
1218	st	counter,[%fp+tmp_counter]
1219
1220	stx	%o4,[%fp+tmp_px]
1221
1222	stx	%i2,[%fp+tmp_py]
1223	ba	.cont13
1224	mov	5,counter
1225
1226	.align	16
1227.update14:
1228	cmp	%i5,0
1229	bne	.cont14
1230
1231	cmp	counter,5
1232	ble	.cont14
1233	ld	[TBL+TBL_SHIFT+44],%f2
1234
1235	sub	counter,5,counter
1236	st	counter,[%fp+tmp_counter]
1237
1238	stx	%o4,[%fp+tmp_px]
1239
1240	stx	%i2,[%fp+tmp_py]
1241	ba	.cont14
1242	mov	5,counter
1243
1244	.align	16
1245.update15:
1246	cmp	counter,6
1247	ble	.cont15
1248	ld	[TBL+TBL_SHIFT+44],%f2
1249
1250	sub	counter,6,counter
1251	st	counter,[%fp+tmp_counter]
1252
1253	stx	%l0,[%fp+tmp_px]
1254
1255	stx	%i2,[%fp+tmp_py]
1256	ba	.cont15
1257	mov	6,counter
1258
1259	.align	16
1260.update16:
1261	cmp	counter,6
1262	ble	.cont16
1263	ld	[TBL+TBL_SHIFT+44],%f4
1264
1265	sub	counter,6,counter
1266	st	counter,[%fp+tmp_counter]
1267
1268	stx	%l0,[%fp+tmp_px]
1269
1270	stx	%i2,[%fp+tmp_py]
1271	ba	.cont16
1272	mov	6,counter
1273
1274	.align	16
1275.update17:
1276	cmp	%i5,0
1277	bne	.cont17
1278
1279	cmp	counter,1
1280	ble	.cont17
1281	fmovd	DC1,%f62
1282
1283	sub	counter,1,counter
1284	st	counter,[%fp+tmp_counter]
1285
1286	stx	%l0,[%fp+tmp_px]
1287
1288	stx	%i2,[%fp+tmp_py]
1289	ba	.cont17
1290	mov	1,counter
1291
1292	.align	16
1293.update18:
1294	cmp	counter,2
1295	ble	.cont18
1296	ld	[TBL+TBL_SHIFT+44],%f2
1297
1298	sub	counter,2,counter
1299	st	counter,[%fp+tmp_counter]
1300
1301	stx	%i1,[%fp+tmp_px]
1302
1303	stx	%i2,[%fp+tmp_py]
1304	ba	.cont18
1305	mov	2,counter
1306
1307	.align	16
1308.update19:
1309	cmp	counter,2
1310	ble	.cont19
1311	ld	[TBL+TBL_SHIFT+44],%f4
1312
1313	sub	counter,2,counter
1314	st	counter,[%fp+tmp_counter]
1315
1316	stx	%i1,[%fp+tmp_px]
1317
1318	stx	%i2,[%fp+tmp_py]
1319	ba	.cont19
1320	mov	2,counter
1321
1322	.align	16
1323.update20:
1324	cmp	%o1,0
1325	bne	.cont20
1326
1327	cmp	counter,2
1328	ble	.cont20
1329	ld	[TBL+TBL_SHIFT+44],%f2
1330
1331	sub	counter,2,counter
1332	st	counter,[%fp+tmp_counter]
1333
1334	stx	%i1,[%fp+tmp_px]
1335
1336	stx	%i2,[%fp+tmp_py]
1337	ba	.cont20
1338	mov	2,counter
1339
1340	.align	16
1341.update21:
1342	cmp	counter,3
1343	ble	.cont21
1344	ld	[TBL+TBL_SHIFT+44],%f2
1345
1346	sub	counter,3,counter
1347	st	counter,[%fp+tmp_counter]
1348
1349	stx	%g5,[%fp+tmp_px]
1350
1351	stx	%o3,[%fp+tmp_py]
1352	ba	.cont21
1353	mov	3,counter
1354
1355	.align	16
1356.update22:
1357	cmp	counter,3
1358	ble	.cont22
1359	ld	[TBL+TBL_SHIFT+44],%f4
1360
1361	sub	counter,3,counter
1362	st	counter,[%fp+tmp_counter]
1363
1364	stx	%g5,[%fp+tmp_px]
1365
1366	stx	%o3,[%fp+tmp_py]
1367	ba	.cont22
1368	mov	3,counter
1369
1370	.align	16
1371.update23:
1372	cmp	%i5,0
1373	bne	.cont23
1374
1375	cmp	counter,3
1376	ble	.cont23
1377	ld	[TBL+TBL_SHIFT+44],%f2
1378
1379	sub	counter,3,counter
1380	st	counter,[%fp+tmp_counter]
1381
1382	stx	%g5,[%fp+tmp_px]
1383
1384	stx	%o3,[%fp+tmp_py]
1385	ba	.cont23
1386	mov	3,counter
1387
1388	.align	16
1389.update24:
1390	cmp	counter,4
1391	ble	.cont24
1392	ld	[TBL+TBL_SHIFT+44],%f2
1393
1394	sub	counter,4,counter
1395	st	counter,[%fp+tmp_counter]
1396
1397	stx	%i4,[%fp+tmp_px]
1398
1399	stx	%i2,[%fp+tmp_py]
1400	ba	.cont24
1401	mov	4,counter
1402
1403	.align	16
1404.update25:
1405	cmp	counter,4
1406	ble	.cont25
1407	ld	[TBL+TBL_SHIFT+44],%f4
1408
1409	sub	counter,4,counter
1410	st	counter,[%fp+tmp_counter]
1411
1412	stx	%i4,[%fp+tmp_px]
1413
1414	stx	%i2,[%fp+tmp_py]
1415	ba	.cont25
1416	mov	4,counter
1417
1418	.align	16
1419.update26:
1420	cmp	%i5,0
1421	bne	.cont26
1422
1423	cmp	counter,4
1424	ble	.cont26
1425	ld	[TBL+TBL_SHIFT+44],%f2
1426
1427	sub	counter,4,counter
1428	st	counter,[%fp+tmp_counter]
1429
1430	stx	%i4,[%fp+tmp_px]
1431
1432	stx	%i2,[%fp+tmp_py]
1433	ba	.cont26
1434	mov	4,counter
1435
1436	.align	16
1437.update27:
1438	cmp	counter,5
1439	ble	.cont27
1440	ld	[TBL+TBL_SHIFT+44],%f2
1441
1442	sub	counter,5,counter
1443	st	counter,[%fp+tmp_counter]
1444
1445	stx	%o4,[%fp+tmp_px]
1446
1447	stx	%i2,[%fp+tmp_py]
1448	ba	.cont27
1449	mov	5,counter
1450
1451	.align	16
1452.update28:
1453	cmp	counter,5
1454	ble	.cont28
1455	ld	[TBL+TBL_SHIFT+44],%f4
1456
1457	sub	counter,5,counter
1458	st	counter,[%fp+tmp_counter]
1459
1460	stx	%o4,[%fp+tmp_px]
1461
1462	stx	%i2,[%fp+tmp_py]
1463	ba	.cont28
1464	mov	5,counter
1465
1466	.align	16
1467.update29:
1468	cmp	%i5,0
1469	bne	.cont29
1470
1471	cmp	counter,5
1472	ble	.cont29
1473	ld	[TBL+TBL_SHIFT+44],%f2
1474
1475	sub	counter,5,counter
1476	st	counter,[%fp+tmp_counter]
1477
1478	stx	%o4,[%fp+tmp_px]
1479
1480	stx	%i2,[%fp+tmp_py]
1481	ba	.cont29
1482	mov	5,counter
1483
1484	.align	16
1485.update30:
1486	cmp	counter,6
1487	ble	.cont30
1488	ld	[TBL+TBL_SHIFT+44],%f2
1489
1490	sub	counter,6,counter
1491	st	counter,[%fp+tmp_counter]
1492
1493	stx	%l0,[%fp+tmp_px]
1494
1495	stx	%i2,[%fp+tmp_py]
1496	ba	.cont30
1497	mov	6,counter
1498
1499	.align	16
1500.update31:
1501	cmp	counter,6
1502	ble	.cont31
1503	ld	[TBL+TBL_SHIFT+44],%f4
1504
1505	sub	counter,6,counter
1506	st	counter,[%fp+tmp_counter]
1507
1508	stx	%l0,[%fp+tmp_px]
1509
1510	stx	%i2,[%fp+tmp_py]
1511	ba	.cont31
1512	mov	6,counter
1513
1514	.align	16
1515.exit:
1516	ret
1517	restore
1518	SET_SIZE(__vrhypotf)
1519
1520