xref: /titanic_51/usr/src/lib/libmvec/common/vis/__vhypotf.S (revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8)
1*25c28e83SPiotr Jasiukajtis/*
2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis *
4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis *
8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis *
13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis *
19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis */
21*25c28e83SPiotr Jasiukajtis/*
22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23*25c28e83SPiotr Jasiukajtis */
24*25c28e83SPiotr Jasiukajtis/*
25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms.
27*25c28e83SPiotr Jasiukajtis */
28*25c28e83SPiotr Jasiukajtis
29*25c28e83SPiotr Jasiukajtis	.file	"__vhypotf.S"
30*25c28e83SPiotr Jasiukajtis
31*25c28e83SPiotr Jasiukajtis#include "libm.h"
32*25c28e83SPiotr Jasiukajtis
33*25c28e83SPiotr Jasiukajtis	RO_DATA
34*25c28e83SPiotr Jasiukajtis	.align	64
35*25c28e83SPiotr Jasiukajtis
36*25c28e83SPiotr Jasiukajtis.CONST_TBL:
37*25c28e83SPiotr Jasiukajtis	.word	0x3fe00001, 0x80007e00	! K1  =  5.00000715259318464227e-01
38*25c28e83SPiotr Jasiukajtis	.word	0xbfc00003, 0xc0017a01	! K2  = -1.25000447037521686593e-01
39*25c28e83SPiotr Jasiukajtis	.word	0x000fffff, 0xffffffff	! DC0 = 0x000fffffffffffff
40*25c28e83SPiotr Jasiukajtis	.word	0x3ff00000, 0x00000000	! DC1 = 0x3ff0000000000000
41*25c28e83SPiotr Jasiukajtis	.word	0x7ffff000, 0x00000000	! DC2 = 0x7ffff00000000000
42*25c28e83SPiotr Jasiukajtis	.word	0x7fe00000, 0x00000000	! DA0 = 0x7fe0000000000000
43*25c28e83SPiotr Jasiukajtis	.word	0x47efffff, 0xe0000000	! DFMAX = 3.402823e+38
44*25c28e83SPiotr Jasiukajtis	.word	0x7f7fffff, 0x80808080	! FMAX = 3.402823e+38 , SCALE = 0x80808080
45*25c28e83SPiotr Jasiukajtis	.word	0x20000000, 0x00000000	! DA1 = 0x2000000000000000
46*25c28e83SPiotr Jasiukajtis
47*25c28e83SPiotr Jasiukajtis#define DC0		%f12
48*25c28e83SPiotr Jasiukajtis#define DC1		%f10
49*25c28e83SPiotr Jasiukajtis#define DC2		%f42
50*25c28e83SPiotr Jasiukajtis#define DA0		%f6
51*25c28e83SPiotr Jasiukajtis#define DA1		%f4
52*25c28e83SPiotr Jasiukajtis#define K2		%f26
53*25c28e83SPiotr Jasiukajtis#define K1		%f28
54*25c28e83SPiotr Jasiukajtis#define SCALE		%f3
55*25c28e83SPiotr Jasiukajtis#define FMAX		%f2
56*25c28e83SPiotr Jasiukajtis#define DFMAX		%f50
57*25c28e83SPiotr Jasiukajtis
58*25c28e83SPiotr Jasiukajtis#define stridex		%l6
59*25c28e83SPiotr Jasiukajtis#define stridey		%i4
60*25c28e83SPiotr Jasiukajtis#define stridez		%l5
61*25c28e83SPiotr Jasiukajtis#define _0x7fffffff	%o1
62*25c28e83SPiotr Jasiukajtis#define _0x7f3504f3	%o2
63*25c28e83SPiotr Jasiukajtis#define _0x1ff0		%l2
64*25c28e83SPiotr Jasiukajtis#define TBL		%l1
65*25c28e83SPiotr Jasiukajtis
66*25c28e83SPiotr Jasiukajtis#define counter		%l0
67*25c28e83SPiotr Jasiukajtis
68*25c28e83SPiotr Jasiukajtis#define tmp_px		STACK_BIAS-0x30
69*25c28e83SPiotr Jasiukajtis#define tmp_py		STACK_BIAS-0x28
70*25c28e83SPiotr Jasiukajtis#define tmp_counter	STACK_BIAS-0x20
71*25c28e83SPiotr Jasiukajtis#define tmp0		STACK_BIAS-0x18
72*25c28e83SPiotr Jasiukajtis#define tmp1		STACK_BIAS-0x10
73*25c28e83SPiotr Jasiukajtis#define tmp2		STACK_BIAS-0x0c
74*25c28e83SPiotr Jasiukajtis#define tmp3		STACK_BIAS-0x08
75*25c28e83SPiotr Jasiukajtis#define tmp4		STACK_BIAS-0x04
76*25c28e83SPiotr Jasiukajtis
77*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9
78*25c28e83SPiotr Jasiukajtis#define tmps		0x30
79*25c28e83SPiotr Jasiukajtis
80*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
81*25c28e83SPiotr Jasiukajtis!      !!!!!   algorithm   !!!!!
82*25c28e83SPiotr Jasiukajtis!  hx0 = *(int*)px;
83*25c28e83SPiotr Jasiukajtis!  x0 = *px;
84*25c28e83SPiotr Jasiukajtis!  px += stridex;
85*25c28e83SPiotr Jasiukajtis!
86*25c28e83SPiotr Jasiukajtis!  hy0 = *(int*)py;
87*25c28e83SPiotr Jasiukajtis!  y0 = *py;
88*25c28e83SPiotr Jasiukajtis!  py += stridey;
89*25c28e83SPiotr Jasiukajtis!
90*25c28e83SPiotr Jasiukajtis!  hx0 &= 0x7fffffff;
91*25c28e83SPiotr Jasiukajtis!  hy0 &= 0x7fffffff;
92*25c28e83SPiotr Jasiukajtis!
93*25c28e83SPiotr Jasiukajtis!  if ( hx >= 0x7f3504f3 || hy >= 0x7f3504f3 )
94*25c28e83SPiotr Jasiukajtis!  {
95*25c28e83SPiotr Jasiukajtis!    if ( hx >= 0x7f800000 || hy >= 0x7f800000 )
96*25c28e83SPiotr Jasiukajtis!    {
97*25c28e83SPiotr Jasiukajtis!      if ( hx == 0x7f800000 || hy == 0x7f800000 )
98*25c28e83SPiotr Jasiukajtis!        *(int*)pz = 0x7f800000;
99*25c28e83SPiotr Jasiukajtis!      else *pz = x * y;
100*25c28e83SPiotr Jasiukajtis!    }
101*25c28e83SPiotr Jasiukajtis!    else
102*25c28e83SPiotr Jasiukajtis!    {
103*25c28e83SPiotr Jasiukajtis!      hyp = sqrt(x * (double)x + y * (double)y);
104*25c28e83SPiotr Jasiukajtis!      if ( hyp <= DMAX ) ftmp0 = (float)hyp;
105*25c28e83SPiotr Jasiukajtis!      else ftmp0 = FMAX * FMAX;
106*25c28e83SPiotr Jasiukajtis!      *pz = ftmp0;
107*25c28e83SPiotr Jasiukajtis!    }
108*25c28e83SPiotr Jasiukajtis!    pz += stridez;
109*25c28e83SPiotr Jasiukajtis!    continue;
110*25c28e83SPiotr Jasiukajtis!  }
111*25c28e83SPiotr Jasiukajtis!  if ( (hx | hy) == 0 )
112*25c28e83SPiotr Jasiukajtis!  {
113*25c28e83SPiotr Jasiukajtis!    *pz = 0;
114*25c28e83SPiotr Jasiukajtis!    pz += stridez;
115*25c28e83SPiotr Jasiukajtis!    continue;
116*25c28e83SPiotr Jasiukajtis!  }
117*25c28e83SPiotr Jasiukajtis!  dx0 = x0 * (double)x0;
118*25c28e83SPiotr Jasiukajtis!  dy0 = y0 * (double)y0;
119*25c28e83SPiotr Jasiukajtis!  db0 = dx0 + dy0;
120*25c28e83SPiotr Jasiukajtis!
121*25c28e83SPiotr Jasiukajtis!  iexp0 = ((int*)&db0)[0];
122*25c28e83SPiotr Jasiukajtis!
123*25c28e83SPiotr Jasiukajtis!  h0 = vis_fand(db0,DC0);
124*25c28e83SPiotr Jasiukajtis!  h0 = vis_for(h0,DC1);
125*25c28e83SPiotr Jasiukajtis!  h_hi0 = vis_fand(h0,DC2);
126*25c28e83SPiotr Jasiukajtis!
127*25c28e83SPiotr Jasiukajtis!  db0 = vis_fand(db0,DA0);
128*25c28e83SPiotr Jasiukajtis!  db0 = vis_fmul8x16(SCALE, db0);
129*25c28e83SPiotr Jasiukajtis!  db0 = vis_fpadd32(db0,DA1);
130*25c28e83SPiotr Jasiukajtis!
131*25c28e83SPiotr Jasiukajtis!  iexp0 >>= 8;
132*25c28e83SPiotr Jasiukajtis!  di0 = iexp0 & 0x1ff0;
133*25c28e83SPiotr Jasiukajtis!  si0 = (char*)sqrt_arr + di0;
134*25c28e83SPiotr Jasiukajtis!
135*25c28e83SPiotr Jasiukajtis!  dtmp0 = ((double*)((char*)div_arr + di0))[0];
136*25c28e83SPiotr Jasiukajtis!  xx0 = h0 - h_hi0;
137*25c28e83SPiotr Jasiukajtis!  xx0 *= dmp0;
138*25c28e83SPiotr Jasiukajtis!
139*25c28e83SPiotr Jasiukajtis!  dtmp0 = ((double*)si0)[1];
140*25c28e83SPiotr Jasiukajtis!  res0 = K2 * xx0;
141*25c28e83SPiotr Jasiukajtis!  res0 += K1;
142*25c28e83SPiotr Jasiukajtis!  res0 *= xx0;
143*25c28e83SPiotr Jasiukajtis!  res0 += DC1;
144*25c28e83SPiotr Jasiukajtis!  res0 = dtmp0 * res0;
145*25c28e83SPiotr Jasiukajtis!  res0 *= db0;
146*25c28e83SPiotr Jasiukajtis!  ftmp0 = (float)res0;
147*25c28e83SPiotr Jasiukajtis!  *pz = ftmp0;
148*25c28e83SPiotr Jasiukajtis!  pz += stridez;
149*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
150*25c28e83SPiotr Jasiukajtis
151*25c28e83SPiotr Jasiukajtis	ENTRY(__vhypotf)
152*25c28e83SPiotr Jasiukajtis	save	%sp,-SA(MINFRAME)-tmps,%sp
153*25c28e83SPiotr Jasiukajtis	PIC_SETUP(l7)
154*25c28e83SPiotr Jasiukajtis	PIC_SET(l7,.CONST_TBL,o3)
155*25c28e83SPiotr Jasiukajtis	PIC_SET(l7,__vlibm_TBL_sqrtf,l1)
156*25c28e83SPiotr Jasiukajtis
157*25c28e83SPiotr Jasiukajtis#ifdef __sparcv9
158*25c28e83SPiotr Jasiukajtis	ldx	[%fp+STACK_BIAS+176],stridez
159*25c28e83SPiotr Jasiukajtis#else
160*25c28e83SPiotr Jasiukajtis	ld	[%fp+STACK_BIAS+92],stridez
161*25c28e83SPiotr Jasiukajtis#endif
162*25c28e83SPiotr Jasiukajtis	st	%i0,[%fp+tmp_counter]
163*25c28e83SPiotr Jasiukajtis
164*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
165*25c28e83SPiotr Jasiukajtis
166*25c28e83SPiotr Jasiukajtis	stx	%i3,[%fp+tmp_py]
167*25c28e83SPiotr Jasiukajtis
168*25c28e83SPiotr Jasiukajtis	ldd	[%o3],K1
169*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%o1
170*25c28e83SPiotr Jasiukajtis
171*25c28e83SPiotr Jasiukajtis	ldd	[%o3+8],K2
172*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f350400),%o2
173*25c28e83SPiotr Jasiukajtis
174*25c28e83SPiotr Jasiukajtis	ldd	[%o3+16],DC0
175*25c28e83SPiotr Jasiukajtis	add	%o1,1023,_0x7fffffff
176*25c28e83SPiotr Jasiukajtis	add	%o2,0xf3,_0x7f3504f3
177*25c28e83SPiotr Jasiukajtis
178*25c28e83SPiotr Jasiukajtis	ldd	[%o3+24],DC1
179*25c28e83SPiotr Jasiukajtis	sll	%i2,2,stridex
180*25c28e83SPiotr Jasiukajtis
181*25c28e83SPiotr Jasiukajtis	ld	[%o3+56],FMAX
182*25c28e83SPiotr Jasiukajtis
183*25c28e83SPiotr Jasiukajtis	ldd	[%o3+32],DC2
184*25c28e83SPiotr Jasiukajtis	sll	%i4,2,stridey
185*25c28e83SPiotr Jasiukajtis
186*25c28e83SPiotr Jasiukajtis	ldd	[%o3+40],DA0
187*25c28e83SPiotr Jasiukajtis	sll	stridez,2,stridez
188*25c28e83SPiotr Jasiukajtis
189*25c28e83SPiotr Jasiukajtis	ldd	[%o3+48],DFMAX
190*25c28e83SPiotr Jasiukajtis
191*25c28e83SPiotr Jasiukajtis	ld	[%o3+60],SCALE
192*25c28e83SPiotr Jasiukajtis	or	%g0,0xff8,%l2
193*25c28e83SPiotr Jasiukajtis
194*25c28e83SPiotr Jasiukajtis	ldd	[%o3+64],DA1
195*25c28e83SPiotr Jasiukajtis	sll	%l2,1,_0x1ff0
196*25c28e83SPiotr Jasiukajtis	or	%g0,%i5,%l7
197*25c28e83SPiotr Jasiukajtis
198*25c28e83SPiotr Jasiukajtis.begin:
199*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp_counter],counter
200*25c28e83SPiotr Jasiukajtis	ldx	[%fp+tmp_px],%i1
201*25c28e83SPiotr Jasiukajtis	ldx	[%fp+tmp_py],%i2
202*25c28e83SPiotr Jasiukajtis	st	%g0,[%fp+tmp_counter]
203*25c28e83SPiotr Jasiukajtis.begin1:
204*25c28e83SPiotr Jasiukajtis	cmp	counter,0
205*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.exit
206*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%l3		! (3_0) hx0 = *(int*)px;
207*25c28e83SPiotr Jasiukajtis
208*25c28e83SPiotr Jasiukajtis	lda	[%i2]0x82,%l4		! (3_0) hy0 = *(int*)py;
209*25c28e83SPiotr Jasiukajtis
210*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f17		! (3_0) x0 = *px;
211*25c28e83SPiotr Jasiukajtis	and	%l3,_0x7fffffff,%l3	! (3_0) hx0 &= 0x7fffffff;
212*25c28e83SPiotr Jasiukajtis
213*25c28e83SPiotr Jasiukajtis	cmp	%l3,_0x7f3504f3		! (3_0) hx ? 0x7f3504f3
214*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.spec		! (3_0) if ( hx >= 0x7f3504f3 )
215*25c28e83SPiotr Jasiukajtis	and	%l4,_0x7fffffff,%l4	! (3_0) hy0 &= 0x7fffffff;
216*25c28e83SPiotr Jasiukajtis
217*25c28e83SPiotr Jasiukajtis	cmp	%l4,_0x7f3504f3		! (3_0) hy ? 0x7f3504f3
218*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.spec		! (3_0) if ( hy >= 0x7f3504f3 )
219*25c28e83SPiotr Jasiukajtis	or	%g0,%i2,%o7
220*25c28e83SPiotr Jasiukajtis
221*25c28e83SPiotr Jasiukajtis	orcc	%l3,%l4,%g0
222*25c28e83SPiotr Jasiukajtis	bz,pn	%icc,.spec1
223*25c28e83SPiotr Jasiukajtis
224*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
225*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f44		! (3_0) dx0 = x0 * (double)x0;
226*25c28e83SPiotr Jasiukajtis	lda	[%i2]0x82,%f17		! (3_0) y0 = *py;
227*25c28e83SPiotr Jasiukajtis
228*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%l3		! (4_0) hx0 = *(int*)px;
229*25c28e83SPiotr Jasiukajtis
230*25c28e83SPiotr Jasiukajtis	lda	[stridey+%o7]0x82,%l4	! (4_0) hy0 = *(int*)py;
231*25c28e83SPiotr Jasiukajtis
232*25c28e83SPiotr Jasiukajtis	and	%l3,_0x7fffffff,%l3	! (4_0) hx0 &= 0x7fffffff;
233*25c28e83SPiotr Jasiukajtis
234*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f24		! (3_0) dy0 = y0 * (double)y0;
235*25c28e83SPiotr Jasiukajtis	cmp	%l3,_0x7f3504f3		! (4_0) hx ? 0x7f3504f3
236*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update0		! (4_0) if ( hx >= 0x7f3504f3 )
237*25c28e83SPiotr Jasiukajtis	and	%l4,_0x7fffffff,%l4	! (4_0) hy0 &= 0x7fffffff;
238*25c28e83SPiotr Jasiukajtis
239*25c28e83SPiotr Jasiukajtis	orcc	%l3,%l4,%g0
240*25c28e83SPiotr Jasiukajtis	bz,pn	%icc,.update0
241*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f17		! (4_0) x0 = *px;
242*25c28e83SPiotr Jasiukajtis.cont0:
243*25c28e83SPiotr Jasiukajtis	faddd	%f44,%f24,%f24		! (3_0) db0 = dx0 + dy0;
244*25c28e83SPiotr Jasiukajtis
245*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f40		! (4_1) dy0 = x0 * (double)x0;
246*25c28e83SPiotr Jasiukajtis	cmp	%l4,_0x7f3504f3		! (4_1) hy ? 0x7f3504f3
247*25c28e83SPiotr Jasiukajtis	lda	[stridey+%o7]0x82,%f17	! (4_1) hy0 = *py;
248*25c28e83SPiotr Jasiukajtis
249*25c28e83SPiotr Jasiukajtis	add	%o7,stridey,%i5		! py += stridey
250*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%l3	! (0_0) hx0 = *(int*)px;
251*25c28e83SPiotr Jasiukajtis
252*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update1		! (4_1) if ( hy >= 0x7f3504f3 )
253*25c28e83SPiotr Jasiukajtis	st	%f24,[%fp+tmp0]		! (3_1) iexp0 = ((int*)&db0)[0];
254*25c28e83SPiotr Jasiukajtis.cont1:
255*25c28e83SPiotr Jasiukajtis	and	%l3,_0x7fffffff,%l3	! (0_0) hx0 &= 0x7fffffff;
256*25c28e83SPiotr Jasiukajtis
257*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f48		! (4_1) dy0 = y0 * (double)y0;
258*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%f8	! (0_0) x0 = *px;
259*25c28e83SPiotr Jasiukajtis
260*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
261*25c28e83SPiotr Jasiukajtis
262*25c28e83SPiotr Jasiukajtis	lda	[%i5+stridey]0x82,%l4	! (0_0) hy0 = *(int*)py;
263*25c28e83SPiotr Jasiukajtis	cmp	%l3,_0x7f3504f3		! (0_0) hx ? 0x7f3504f3
264*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update2		! (0_0) if ( hx >= 0x7f3504f3 )
265*25c28e83SPiotr Jasiukajtis	add	%i5,stridey,%o4		! py += stridey
266*25c28e83SPiotr Jasiukajtis.cont2:
267*25c28e83SPiotr Jasiukajtis	faddd	%f40,%f48,%f20		! (4_1) db0 = dx0 + dy0;
268*25c28e83SPiotr Jasiukajtis
269*25c28e83SPiotr Jasiukajtis	fsmuld	%f8,%f8,%f40		! (0_0) dx0 = x0 * (double)x0;
270*25c28e83SPiotr Jasiukajtis	and	%l4,_0x7fffffff,%l4	! (0_0) hy0 &= 0x7fffffff;
271*25c28e83SPiotr Jasiukajtis	lda	[%i5+stridey]0x82,%f17	! (0_0) hy0 = *py;
272*25c28e83SPiotr Jasiukajtis
273*25c28e83SPiotr Jasiukajtis	cmp	%l4,_0x7f3504f3		! (0_0) hy ? 0x7f3504f3
274*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update3		! (0_0) if ( hy >= 0x7f3504f3 )
275*25c28e83SPiotr Jasiukajtis	st	%f20,[%fp+tmp1]		! (4_1) iexp0 = ((int*)&db0)[0];
276*25c28e83SPiotr Jasiukajtis
277*25c28e83SPiotr Jasiukajtis	orcc	%l3,%l4,%g0
278*25c28e83SPiotr Jasiukajtis	bz,pn	%icc,.update3
279*25c28e83SPiotr Jasiukajtis.cont3:
280*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%l3	! (1_0) hx0 = *(int*)px;
281*25c28e83SPiotr Jasiukajtis
282*25c28e83SPiotr Jasiukajtis	fand	%f24,DC0,%f60		! (3_1) h0 = vis_fand(db0,DC0);
283*25c28e83SPiotr Jasiukajtis
284*25c28e83SPiotr Jasiukajtis	and	%l3,_0x7fffffff,%l3	! (1_0) hx0 &= 0x7fffffff;
285*25c28e83SPiotr Jasiukajtis
286*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f34		! (0_0) dy0 = y0 * (double)y0;
287*25c28e83SPiotr Jasiukajtis	cmp	%l3,_0x7f3504f3		! (1_0) hx ? 0x7f3504f3
288*25c28e83SPiotr Jasiukajtis	lda	[%o4+stridey]0x82,%l4	! (1_0) hy0 = *(int*)py;
289*25c28e83SPiotr Jasiukajtis
290*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
291*25c28e83SPiotr Jasiukajtis
292*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f17		! (1_0) x0 = *px;
293*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update4		! (1_0) if ( hx >= 0x7f3504f3 )
294*25c28e83SPiotr Jasiukajtis	add	%o4,stridey,%i5		! py += stridey
295*25c28e83SPiotr Jasiukajtis.cont4:
296*25c28e83SPiotr Jasiukajtis	and	%l4,_0x7fffffff,%l4	! (1_0) hy0 &= 0x7fffffff;
297*25c28e83SPiotr Jasiukajtis	for	%f60,DC1,%f46		! (3_1) h0 = vis_for(h0,DC1);
298*25c28e83SPiotr Jasiukajtis
299*25c28e83SPiotr Jasiukajtis	cmp	%l4,_0x7f3504f3		! (1_0) hy ? 0x7f3504f3
300*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp0],%o0		! (3_1) iexp0 = ((int*)&db0)[0];
301*25c28e83SPiotr Jasiukajtis	faddd	%f40,%f34,%f0		! (0_0) db0 = dx0 + dy0;
302*25c28e83SPiotr Jasiukajtis
303*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f40		! (1_0) dx0 = x0 * (double)x0;
304*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
305*25c28e83SPiotr Jasiukajtis	lda	[%o4+stridey]0x82,%f17	! (1_0) y0 = *py;
306*25c28e83SPiotr Jasiukajtis
307*25c28e83SPiotr Jasiukajtis	srax	%o0,8,%o0		! (3_1) iexp0 >>= 8;
308*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update5		! (1_0) if ( hy >= 0x7f3504f3 )
309*25c28e83SPiotr Jasiukajtis	fand	%f46,DC2,%f38		! (3_1) h_hi0 = vis_fand(h0,DC2);
310*25c28e83SPiotr Jasiukajtis
311*25c28e83SPiotr Jasiukajtis	orcc	%l3,%l4,%g0
312*25c28e83SPiotr Jasiukajtis	bz,pn	%icc,.update5
313*25c28e83SPiotr Jasiukajtis.cont5:
314*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%l3		! (2_0) hx0 = *(int*)px;
315*25c28e83SPiotr Jasiukajtis
316*25c28e83SPiotr Jasiukajtis	and	%o0,_0x1ff0,%o0		! (3_1) di0 = iexp0 & 0x1ff0;
317*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp2]		! (0_0) iexp0 = ((int*)&db0)[0];
318*25c28e83SPiotr Jasiukajtis	fand	%f20,DC0,%f60		! (4_1) h0 = vis_fand(db0,DC0);
319*25c28e83SPiotr Jasiukajtis
320*25c28e83SPiotr Jasiukajtis	ldd	[TBL+%o0],%f22		! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
321*25c28e83SPiotr Jasiukajtis	fsubd	%f46,%f38,%f38		! (3_1) xx0 = h0 - h_hi0;
322*25c28e83SPiotr Jasiukajtis
323*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f32		! (1_0) dy0 = y0 * (double)y0;
324*25c28e83SPiotr Jasiukajtis	add	%i5,stridey,%i2		! py += stridey
325*25c28e83SPiotr Jasiukajtis	lda	[stridey+%i5]0x82,%l4	! (2_0) hy0 = *(int*)py;
326*25c28e83SPiotr Jasiukajtis
327*25c28e83SPiotr Jasiukajtis	and	%l3,_0x7fffffff,%l3	! (2_0) hx0 &= 0x7fffffff;
328*25c28e83SPiotr Jasiukajtis
329*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f17		! (2_0) x0 = *px;
330*25c28e83SPiotr Jasiukajtis	cmp	%l3,_0x7f3504f3		! (2_0) hx ? 0x7f3504f3
331*25c28e83SPiotr Jasiukajtis
332*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f22,%f38		! (3_1) xx0 *= dmp0;
333*25c28e83SPiotr Jasiukajtis	and	%l4,_0x7fffffff,%l4	! (2_0) hy0 &= 0x7fffffff;
334*25c28e83SPiotr Jasiukajtis	for	%f60,DC1,%f46		! (4_1) h0 = vis_for(h0,DC1);
335*25c28e83SPiotr Jasiukajtis
336*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update6		! (2_0) if ( hx >= 0x7f3504f3 )
337*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp1],%o3		! (4_1) iexp0 = ((int*)&db0)[0];
338*25c28e83SPiotr Jasiukajtis.cont6:
339*25c28e83SPiotr Jasiukajtis	faddd	%f40,%f32,%f18		! (1_0) db0 = dx0 + dy0;
340*25c28e83SPiotr Jasiukajtis
341*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f44		! (2_0) dx0 = x0 * (double)x0;
342*25c28e83SPiotr Jasiukajtis	cmp	%l4,_0x7f3504f3		! (2_0) hy ? 0x7f3504f3
343*25c28e83SPiotr Jasiukajtis	lda	[stridey+%i5]0x82,%f17	! (2_0) y0 = *py;
344*25c28e83SPiotr Jasiukajtis
345*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
346*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update7		! (2_0) if ( hy >= 0x7f3504f3 )
347*25c28e83SPiotr Jasiukajtis	fand	%f46,DC2,%f58		! (4_1) h_hi0 = vis_fand(h0,DC2);
348*25c28e83SPiotr Jasiukajtis
349*25c28e83SPiotr Jasiukajtis	orcc	%l3,%l4,%g0
350*25c28e83SPiotr Jasiukajtis	bz,pn	%icc,.update7
351*25c28e83SPiotr Jasiukajtis	nop
352*25c28e83SPiotr Jasiukajtis.cont7:
353*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f38,%f56		! (3_1) res0 = K2 * xx0;
354*25c28e83SPiotr Jasiukajtis	srax	%o3,8,%o3		! (4_1) iexp0 >>= 8;
355*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%l3		! (3_0) hx0 = *(int*)px;
356*25c28e83SPiotr Jasiukajtis
357*25c28e83SPiotr Jasiukajtis	and	%o3,_0x1ff0,%o3		! (4_1) di0 = iexp0 & 0x1ff0;
358*25c28e83SPiotr Jasiukajtis	st	%f18,[%fp+tmp3]		! (1_0) iexp0 = ((int*)&db0)[0];
359*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f60		! (0_0) h0 = vis_fand(db0,DC0);
360*25c28e83SPiotr Jasiukajtis
361*25c28e83SPiotr Jasiukajtis	ldd	[TBL+%o3],%f22		! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
362*25c28e83SPiotr Jasiukajtis	add	%i2,stridey,%o7		! py += stridey
363*25c28e83SPiotr Jasiukajtis	fsubd	%f46,%f58,%f58		! (4_1) xx0 = h0 - h_hi0;
364*25c28e83SPiotr Jasiukajtis
365*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f30		! (2_0) dy0 = y0 * (double)y0;
366*25c28e83SPiotr Jasiukajtis	lda	[stridey+%i2]0x82,%l4	! (3_0) hy0 = *(int*)py;
367*25c28e83SPiotr Jasiukajtis	and	%l3,_0x7fffffff,%l3	! (3_0) hx0 &= 0x7fffffff;
368*25c28e83SPiotr Jasiukajtis
369*25c28e83SPiotr Jasiukajtis	faddd	%f56,K1,%f54		! (3_1) res0 += K1;
370*25c28e83SPiotr Jasiukajtis	cmp	%l3,_0x7f3504f3		! (3_0) hx ? 0x7f3504f3
371*25c28e83SPiotr Jasiukajtis
372*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f17		! (3_0) x0 = *px;
373*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
374*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update8		! (3_0) if ( hx >= 0x7f3504f3 )
375*25c28e83SPiotr Jasiukajtis
376*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f22,%f58		! (4_1) xx0 *= dmp0;
377*25c28e83SPiotr Jasiukajtis.cont8:
378*25c28e83SPiotr Jasiukajtis	and	%l4,_0x7fffffff,%l4	! (3_0) hy0 &= 0x7fffffff;
379*25c28e83SPiotr Jasiukajtis	for	%f60,DC1,%f46		! (0_0) h0 = vis_for(h0,DC1);
380*25c28e83SPiotr Jasiukajtis
381*25c28e83SPiotr Jasiukajtis	cmp	%l4,_0x7f3504f3		! (3_0) hy ? 0x7f3504f3
382*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp2],%g1		! (0_0) iexp0 = ((int*)&db0)[0];
383*25c28e83SPiotr Jasiukajtis	faddd	%f44,%f30,%f30		! (2_0) db0 = dx0 + dy0;
384*25c28e83SPiotr Jasiukajtis
385*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f44		! (3_0) dx0 = x0 * (double)x0;
386*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update9		! (3_0) if ( hy >= 0x7f3504f3 )
387*25c28e83SPiotr Jasiukajtis	lda	[stridey+%i2]0x82,%f17	! (3_0) y0 = *py;
388*25c28e83SPiotr Jasiukajtis
389*25c28e83SPiotr Jasiukajtis	orcc	%l3,%l4,%g0
390*25c28e83SPiotr Jasiukajtis	bz,pn	%icc,.update9
391*25c28e83SPiotr Jasiukajtis	nop
392*25c28e83SPiotr Jasiukajtis.cont9:
393*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f38,%f40		! (3_1) res0 *= xx0;
394*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%l3		! (4_0) hx0 = *(int*)px;
395*25c28e83SPiotr Jasiukajtis	fand	%f46,DC2,%f38		! (0_0) h_hi0 = vis_fand(h0,DC2);
396*25c28e83SPiotr Jasiukajtis
397*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f58,%f54		! (4_1) res0 = K2 * xx0;
398*25c28e83SPiotr Jasiukajtis	srax	%g1,8,%o5		! (0_0) iexp0 >>= 8;
399*25c28e83SPiotr Jasiukajtis	lda	[stridey+%o7]0x82,%l4	! (4_0) hy0 = *(int*)py;
400*25c28e83SPiotr Jasiukajtis	fand	%f24,DA0,%f56		! (3_1) db0 = vis_fand(db0,DA0);
401*25c28e83SPiotr Jasiukajtis
402*25c28e83SPiotr Jasiukajtis	and	%o5,_0x1ff0,%o5		! (0_0) di0 = iexp0 & 0x1ff0;
403*25c28e83SPiotr Jasiukajtis	st	%f30,[%fp+tmp4]		! (2_0) iexp0 = ((int*)&db0)[0];
404*25c28e83SPiotr Jasiukajtis	fand	%f18,DC0,%f60		! (1_0) h0 = vis_fand(db0,DC0);
405*25c28e83SPiotr Jasiukajtis
406*25c28e83SPiotr Jasiukajtis	ldd	[TBL+%o5],%f22		! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0];
407*25c28e83SPiotr Jasiukajtis	add	%o0,TBL,%g1		! (3_1) si0 = (char*)sqrt_arr + di0;
408*25c28e83SPiotr Jasiukajtis	and	%l3,_0x7fffffff,%l3	! (4_0) hx0 &= 0x7fffffff;
409*25c28e83SPiotr Jasiukajtis	fsubd	%f46,%f38,%f38		! (0_0) xx0 = h0 - h_hi0;
410*25c28e83SPiotr Jasiukajtis
411*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f24		! (3_0) dy0 = y0 * (double)y0;
412*25c28e83SPiotr Jasiukajtis	cmp	%l3,_0x7f3504f3		! (4_0) hx ? 0x7f3504f3
413*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update10		! (4_0) if ( hx >= 0x7f3504f3 )
414*25c28e83SPiotr Jasiukajtis	faddd	%f40,DC1,%f40		! (3_1) res0 += DC1;
415*25c28e83SPiotr Jasiukajtis
416*25c28e83SPiotr Jasiukajtis	fmul8x16	SCALE,%f56,%f36	! (3_1) db0 = vis_fmul8x16(SCALE, db0);
417*25c28e83SPiotr Jasiukajtis	and	%l4,_0x7fffffff,%l4	! (4_0) hy0 &= 0x7fffffff;
418*25c28e83SPiotr Jasiukajtis	ldd	[%g1+8],%f56		! (3_1) dtmp0 = ((double*)si0)[1];
419*25c28e83SPiotr Jasiukajtis	faddd	%f54,K1,%f54		! (4_1) res0 += K1;
420*25c28e83SPiotr Jasiukajtis
421*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f17		! (4_0) x0 = *px;
422*25c28e83SPiotr Jasiukajtis.cont10:
423*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f22,%f38		! (0_0) xx0 *= dmp0;
424*25c28e83SPiotr Jasiukajtis	cmp	counter,5
425*25c28e83SPiotr Jasiukajtis	for	%f60,DC1,%f46		! (1_0) h0 = vis_for(h0,DC1);
426*25c28e83SPiotr Jasiukajtis
427*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%g1		! (1_0) iexp0 = ((int*)&db0)[0];
428*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f40,%f62		! (3_1) res0 = dtmp0 * res0;
429*25c28e83SPiotr Jasiukajtis	faddd	%f44,%f24,%f24		! (3_0) db0 = dx0 + dy0;
430*25c28e83SPiotr Jasiukajtis
431*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.tail
432*25c28e83SPiotr Jasiukajtis	nop
433*25c28e83SPiotr Jasiukajtis
434*25c28e83SPiotr Jasiukajtis	ba	.main_loop
435*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
436*25c28e83SPiotr Jasiukajtis
437*25c28e83SPiotr Jasiukajtis	.align	16
438*25c28e83SPiotr Jasiukajtis.main_loop:
439*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f40		! (4_1) dy0 = x0 * (double)x0;
440*25c28e83SPiotr Jasiukajtis	cmp	%l4,_0x7f3504f3		! (4_1) hy ? 0x7f3504f3
441*25c28e83SPiotr Jasiukajtis	lda	[stridey+%o7]0x82,%f17	! (4_1) hy0 = *py;
442*25c28e83SPiotr Jasiukajtis	fpadd32	%f36,DA1,%f36		! (3_2) db0 = vis_fpadd32(db0,DA1);
443*25c28e83SPiotr Jasiukajtis
444*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f58,%f58		! (4_2) res0 *= xx0;
445*25c28e83SPiotr Jasiukajtis	add	%o7,stridey,%i5		! py += stridey
446*25c28e83SPiotr Jasiukajtis	st	%f24,[%fp+tmp0]		! (3_1) iexp0 = ((int*)&db0)[0];
447*25c28e83SPiotr Jasiukajtis	fand	%f46,DC2,%f44		! (1_1) h_hi0 = vis_fand(h0,DC2);
448*25c28e83SPiotr Jasiukajtis
449*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f38,%f56		! (0_1) res0 = K2 * xx0;
450*25c28e83SPiotr Jasiukajtis	srax	%g1,8,%g5		! (1_1) iexp0 >>= 8;
451*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update11		! (4_1) if ( hy >= 0x7f3504f3 )
452*25c28e83SPiotr Jasiukajtis	fand	%f20,DA0,%f54		! (4_2) db0 = vis_fand(db0,DA0);
453*25c28e83SPiotr Jasiukajtis
454*25c28e83SPiotr Jasiukajtis	orcc	%l3,%l4,%g0
455*25c28e83SPiotr Jasiukajtis	nop
456*25c28e83SPiotr Jasiukajtis	bz,pn	%icc,.update11
457*25c28e83SPiotr Jasiukajtis	fzero	%f52
458*25c28e83SPiotr Jasiukajtis.cont11:
459*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f36,%f62		! (3_2) res0 *= db0;
460*25c28e83SPiotr Jasiukajtis	and	%g5,_0x1ff0,%g5		! (1_1) di0 = iexp0 & 0x1ff0;
461*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%l3	! (0_0) hx0 = *(int*)px;
462*25c28e83SPiotr Jasiukajtis	fand	%f30,DC0,%f60		! (2_1) h0 = vis_fand(db0,DC0);
463*25c28e83SPiotr Jasiukajtis
464*25c28e83SPiotr Jasiukajtis	ldd	[%g5+TBL],%f22		! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
465*25c28e83SPiotr Jasiukajtis	add	%o3,TBL,%g1		! (4_2) si0 = (char*)sqrt_arr + di0;
466*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i0		! px += stridex
467*25c28e83SPiotr Jasiukajtis	fsubd	%f46,%f44,%f44		! (1_1) xx0 = h0 - h_hi0;
468*25c28e83SPiotr Jasiukajtis
469*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f48		! (4_1) dy0 = y0 * (double)y0;
470*25c28e83SPiotr Jasiukajtis	nop
471*25c28e83SPiotr Jasiukajtis	lda	[%i1+stridex]0x82,%f8	! (0_0) x0 = *px;
472*25c28e83SPiotr Jasiukajtis	faddd	%f58,DC1,%f36		! (4_2) res0 += DC1;
473*25c28e83SPiotr Jasiukajtis
474*25c28e83SPiotr Jasiukajtis	faddd	%f56,K1,%f58		! (0_1) res0 += K1;
475*25c28e83SPiotr Jasiukajtis	and	%l3,_0x7fffffff,%l3	! (0_0) hx0 &= 0x7fffffff;
476*25c28e83SPiotr Jasiukajtis	ldd	[%g1+8],%f56		! (4_2) dtmp0 = ((double*)si0)[1];
477*25c28e83SPiotr Jasiukajtis	fmul8x16	SCALE,%f54,%f54	! (4_2) db0 = vis_fmul8x16(SCALE, db0);
478*25c28e83SPiotr Jasiukajtis
479*25c28e83SPiotr Jasiukajtis	lda	[%i5+stridey]0x82,%l4	! (0_0) hy0 = *(int*)py;
480*25c28e83SPiotr Jasiukajtis	cmp	%l3,_0x7f3504f3		! (0_0) hx ? 0x7f3504f3
481*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update12		! (0_0) if ( hx >= 0x7f3504f3 )
482*25c28e83SPiotr Jasiukajtis	fdtos	%f62,%f14		! (3_2) ftmp0 = (float)res0;
483*25c28e83SPiotr Jasiukajtis.cont12:
484*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f22,%f44		! (1_1) xx0 *= dmp0;
485*25c28e83SPiotr Jasiukajtis	add	%l7,stridez,%o7		! pz += stridez
486*25c28e83SPiotr Jasiukajtis	st	%f14,[%l7]		! (3_2) *pz = ftmp0;
487*25c28e83SPiotr Jasiukajtis	for	%f60,DC1,%f46		! (2_1) h0 = vis_for(h0,DC1);
488*25c28e83SPiotr Jasiukajtis
489*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f36,%f36		! (4_2) res0 = dtmp0 * res0;
490*25c28e83SPiotr Jasiukajtis	add	%i5,stridey,%o4		! py += stridey
491*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp4],%g1		! (2_1) iexp0 = ((int*)&db0)[0];
492*25c28e83SPiotr Jasiukajtis	faddd	%f40,%f48,%f20		! (4_1) db0 = dx0 + dy0;
493*25c28e83SPiotr Jasiukajtis
494*25c28e83SPiotr Jasiukajtis	fsmuld	%f8,%f8,%f40		! (0_0) dx0 = x0 * (double)x0;
495*25c28e83SPiotr Jasiukajtis	and	%l4,_0x7fffffff,%l4	! (0_0) hy0 &= 0x7fffffff;
496*25c28e83SPiotr Jasiukajtis	lda	[%i5+stridey]0x82,%f17	! (0_0) hy0 = *py;
497*25c28e83SPiotr Jasiukajtis	fpadd32	%f54,DA1,%f62		! (4_2) db0 = vis_fpadd32(db0,DA1);
498*25c28e83SPiotr Jasiukajtis
499*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f38,%f38		! (0_1) res0 *= xx0;
500*25c28e83SPiotr Jasiukajtis	cmp	%l4,_0x7f3504f3		! (0_0) hy ? 0x7f3504f3
501*25c28e83SPiotr Jasiukajtis	st	%f20,[%fp+tmp1]		! (4_1) iexp0 = ((int*)&db0)[0];
502*25c28e83SPiotr Jasiukajtis	fand	%f46,DC2,%f58		! (2_1) h_hi0 = vis_fand(h0,DC2);
503*25c28e83SPiotr Jasiukajtis
504*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f44,%f56		! (1_1) res0 = K2 * xx0;
505*25c28e83SPiotr Jasiukajtis	srax	%g1,8,%g1		! (2_1) iexp0 >>= 8;
506*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update13		! (0_0) if ( hy >= 0x7f3504f3 )
507*25c28e83SPiotr Jasiukajtis	fand	%f0,DA0,%f54		! (0_1) db0 = vis_fand(db0,DA0);
508*25c28e83SPiotr Jasiukajtis
509*25c28e83SPiotr Jasiukajtis	orcc	%l3,%l4,%g0
510*25c28e83SPiotr Jasiukajtis	nop
511*25c28e83SPiotr Jasiukajtis	bz,pn	%icc,.update13
512*25c28e83SPiotr Jasiukajtis	fzero	%f52
513*25c28e83SPiotr Jasiukajtis.cont13:
514*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f62,%f62		! (4_2) res0 *= db0;
515*25c28e83SPiotr Jasiukajtis	and	%g1,_0x1ff0,%g1		! (2_1) di0 = iexp0 & 0x1ff0;
516*25c28e83SPiotr Jasiukajtis	lda	[%i0+stridex]0x82,%l3	! (1_0) hx0 = *(int*)px;
517*25c28e83SPiotr Jasiukajtis	fand	%f24,DC0,%f60		! (3_1) h0 = vis_fand(db0,DC0);
518*25c28e83SPiotr Jasiukajtis
519*25c28e83SPiotr Jasiukajtis	ldd	[TBL+%g1],%f22		! (2_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
520*25c28e83SPiotr Jasiukajtis	add	%o5,TBL,%o0		! (0_1) si0 = (char*)sqrt_arr + di0;
521*25c28e83SPiotr Jasiukajtis	add	%i0,stridex,%i1		! px += stridex
522*25c28e83SPiotr Jasiukajtis	fsubd	%f46,%f58,%f58		! (2_1) xx0 = h0 - h_hi0;
523*25c28e83SPiotr Jasiukajtis
524*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f34		! (0_0) dy0 = y0 * (double)y0;
525*25c28e83SPiotr Jasiukajtis	add	%o7,stridez,%i0		! pz += stridez
526*25c28e83SPiotr Jasiukajtis	lda	[%o4+stridey]0x82,%l4	! (1_0) hy0 = *(int*)py;
527*25c28e83SPiotr Jasiukajtis	faddd	%f38,DC1,%f36		! (0_1) res0 += DC1;
528*25c28e83SPiotr Jasiukajtis
529*25c28e83SPiotr Jasiukajtis	faddd	%f56,K1,%f38		! (1_1) res0 += K1;
530*25c28e83SPiotr Jasiukajtis	and	%l3,_0x7fffffff,%l3	! (1_0) hx0 &= 0x7fffffff;
531*25c28e83SPiotr Jasiukajtis	ldd	[%o0+8],%f56		! (0_1) dtmp0 = ((double*)si0)[1];
532*25c28e83SPiotr Jasiukajtis	fmul8x16	SCALE,%f54,%f54	! (0_1) db0 = vis_fmul8x16(SCALE, db0);
533*25c28e83SPiotr Jasiukajtis
534*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f17		! (1_0) x0 = *px;
535*25c28e83SPiotr Jasiukajtis	cmp	%l3,_0x7f3504f3		! (1_0) hx ? 0x7f3504f3
536*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update14		! (1_0) if ( hx >= 0x7f3504f3 )
537*25c28e83SPiotr Jasiukajtis	fdtos	%f62,%f14		! (4_2) ftmp0 = (float)res0;
538*25c28e83SPiotr Jasiukajtis.cont14:
539*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f22,%f58		! (2_1) xx0 *= dmp0;
540*25c28e83SPiotr Jasiukajtis	and	%l4,_0x7fffffff,%l4	! (1_0) hy0 &= 0x7fffffff;
541*25c28e83SPiotr Jasiukajtis	add	%o4,stridey,%i5		! py += stridey
542*25c28e83SPiotr Jasiukajtis	for	%f60,DC1,%f46		! (3_1) h0 = vis_for(h0,DC1);
543*25c28e83SPiotr Jasiukajtis
544*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f36,%f36		! (0_1) res0 = dtmp0 * res0;
545*25c28e83SPiotr Jasiukajtis	cmp	%l4,_0x7f3504f3		! (1_0) hy ? 0x7f3504f3
546*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp0],%o0		! (3_1) iexp0 = ((int*)&db0)[0];
547*25c28e83SPiotr Jasiukajtis	faddd	%f40,%f34,%f0		! (0_0) db0 = dx0 + dy0;
548*25c28e83SPiotr Jasiukajtis
549*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f40		! (1_0) dx0 = x0 * (double)x0;
550*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
551*25c28e83SPiotr Jasiukajtis	lda	[%o4+stridey]0x82,%f17	! (1_0) y0 = *py;
552*25c28e83SPiotr Jasiukajtis	fpadd32	%f54,DA1,%f62		! (0_1) db0 = vis_fpadd32(db0,DA1);
553*25c28e83SPiotr Jasiukajtis
554*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f44,%f44		! (1_1) res0 *= xx0;
555*25c28e83SPiotr Jasiukajtis	st	%f14,[%o7]		! (4_2) *pz = ftmp0;
556*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update15		! (1_0) if ( hy >= 0x7f3504f3 )
557*25c28e83SPiotr Jasiukajtis	fand	%f46,DC2,%f38		! (3_1) h_hi0 = vis_fand(h0,DC2);
558*25c28e83SPiotr Jasiukajtis
559*25c28e83SPiotr Jasiukajtis	orcc	%l3,%l4,%g0
560*25c28e83SPiotr Jasiukajtis	bz,pn	%icc,.update15
561*25c28e83SPiotr Jasiukajtis	nop
562*25c28e83SPiotr Jasiukajtis.cont15:
563*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f58,%f54		! (2_1) res0 = K2 * xx0;
564*25c28e83SPiotr Jasiukajtis	srax	%o0,8,%o0		! (3_1) iexp0 >>= 8;
565*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp2]		! (0_0) iexp0 = ((int*)&db0)[0];
566*25c28e83SPiotr Jasiukajtis	fand	%f18,DA0,%f56		! (1_1) db0 = vis_fand(db0,DA0);
567*25c28e83SPiotr Jasiukajtis
568*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f62,%f62		! (0_1) res0 *= db0;
569*25c28e83SPiotr Jasiukajtis	and	%o0,_0x1ff0,%o0		! (3_1) di0 = iexp0 & 0x1ff0;
570*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%l3		! (2_0) hx0 = *(int*)px;
571*25c28e83SPiotr Jasiukajtis	fand	%f20,DC0,%f60		! (4_1) h0 = vis_fand(db0,DC0);
572*25c28e83SPiotr Jasiukajtis
573*25c28e83SPiotr Jasiukajtis	ldd	[TBL+%o0],%f22		! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
574*25c28e83SPiotr Jasiukajtis	add	%g5,TBL,%o3		! (1_1) si0 = (char*)sqrt_arr + di0;
575*25c28e83SPiotr Jasiukajtis	add	%i0,stridez,%i3		! pz += stridez
576*25c28e83SPiotr Jasiukajtis	fsubd	%f46,%f38,%f38		! (3_1) xx0 = h0 - h_hi0;
577*25c28e83SPiotr Jasiukajtis
578*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f32		! (1_0) dy0 = y0 * (double)y0;
579*25c28e83SPiotr Jasiukajtis	add	%i5,stridey,%i2		! py += stridey
580*25c28e83SPiotr Jasiukajtis	lda	[stridey+%i5]0x82,%l4	! (2_0) hy0 = *(int*)py;
581*25c28e83SPiotr Jasiukajtis	faddd	%f44,DC1,%f44		! (1_1) res0 += DC1;
582*25c28e83SPiotr Jasiukajtis
583*25c28e83SPiotr Jasiukajtis	fmul8x16	SCALE,%f56,%f36	! (1_1) db0 = vis_fmul8x16(SCALE, db0);
584*25c28e83SPiotr Jasiukajtis	and	%l3,_0x7fffffff,%l3	! (2_0) hx0 &= 0x7fffffff;
585*25c28e83SPiotr Jasiukajtis	ldd	[%o3+8],%f56		! (1_1) dtmp0 = ((double*)si0)[1];
586*25c28e83SPiotr Jasiukajtis	faddd	%f54,K1,%f54		! (2_1) res0 += K1;
587*25c28e83SPiotr Jasiukajtis
588*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f17		! (2_0) x0 = *px;
589*25c28e83SPiotr Jasiukajtis	cmp	%l3,_0x7f3504f3		! (2_0) hx ? 0x7f3504f3
590*25c28e83SPiotr Jasiukajtis	add	%i3,stridez,%o4		! pz += stridez
591*25c28e83SPiotr Jasiukajtis	fdtos	%f62,%f14		! (0_1) ftmp0 = (float)res0;
592*25c28e83SPiotr Jasiukajtis
593*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f22,%f38		! (3_1) xx0 *= dmp0;
594*25c28e83SPiotr Jasiukajtis	and	%l4,_0x7fffffff,%l4	! (2_0) hy0 &= 0x7fffffff;
595*25c28e83SPiotr Jasiukajtis	st	%f14,[%i0]		! (0_1) *pz = ftmp0;
596*25c28e83SPiotr Jasiukajtis	for	%f60,DC1,%f46		! (4_1) h0 = vis_for(h0,DC1);
597*25c28e83SPiotr Jasiukajtis
598*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f44,%f62		! (1_1) res0 = dtmp0 * res0;
599*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update16		! (2_0) if ( hx >= 0x7f3504f3 )
600*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp1],%o3		! (4_1) iexp0 = ((int*)&db0)[0];
601*25c28e83SPiotr Jasiukajtis	faddd	%f40,%f32,%f18		! (1_0) db0 = dx0 + dy0;
602*25c28e83SPiotr Jasiukajtis.cont16:
603*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f44		! (2_0) dx0 = x0 * (double)x0;
604*25c28e83SPiotr Jasiukajtis	cmp	%l4,_0x7f3504f3		! (2_0) hy ? 0x7f3504f3
605*25c28e83SPiotr Jasiukajtis	lda	[stridey+%i5]0x82,%f17	! (2_0) y0 = *py;
606*25c28e83SPiotr Jasiukajtis	fpadd32	%f36,DA1,%f36		! (1_1) db0 = vis_fpadd32(db0,DA1);
607*25c28e83SPiotr Jasiukajtis
608*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f58,%f54		! (2_1) res0 *= xx0;
609*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%l7		! px += stridex
610*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update17		! (2_0) if ( hy >= 0x7f3504f3 )
611*25c28e83SPiotr Jasiukajtis	fand	%f46,DC2,%f58		! (4_1) h_hi0 = vis_fand(h0,DC2);
612*25c28e83SPiotr Jasiukajtis
613*25c28e83SPiotr Jasiukajtis	orcc	%l3,%l4,%g0
614*25c28e83SPiotr Jasiukajtis	nop
615*25c28e83SPiotr Jasiukajtis	bz,pn	%icc,.update17
616*25c28e83SPiotr Jasiukajtis	fzero	%f52
617*25c28e83SPiotr Jasiukajtis.cont17:
618*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f38,%f56		! (3_1) res0 = K2 * xx0;
619*25c28e83SPiotr Jasiukajtis	srax	%o3,8,%o3		! (4_1) iexp0 >>= 8;
620*25c28e83SPiotr Jasiukajtis	st	%f18,[%fp+tmp3]		! (1_0) iexp0 = ((int*)&db0)[0];
621*25c28e83SPiotr Jasiukajtis	fand	%f30,DA0,%f40		! (2_1) db0 = vis_fand(db0,DA0);
622*25c28e83SPiotr Jasiukajtis
623*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f36,%f62		! (1_1) res0 *= db0;
624*25c28e83SPiotr Jasiukajtis	and	%o3,_0x1ff0,%o3		! (4_1) di0 = iexp0 & 0x1ff0;
625*25c28e83SPiotr Jasiukajtis	lda	[%l7]0x82,%l3		! (3_0) hx0 = *(int*)px;
626*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f60		! (0_0) h0 = vis_fand(db0,DC0);
627*25c28e83SPiotr Jasiukajtis
628*25c28e83SPiotr Jasiukajtis	ldd	[TBL+%o3],%f22		! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
629*25c28e83SPiotr Jasiukajtis	add	%g1,TBL,%g1		! (2_1) si0 = (char*)sqrt_arr + di0;
630*25c28e83SPiotr Jasiukajtis	add	%i2,stridey,%o7		! py += stridey
631*25c28e83SPiotr Jasiukajtis	fsubd	%f46,%f58,%f58		! (4_1) xx0 = h0 - h_hi0;
632*25c28e83SPiotr Jasiukajtis
633*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f30		! (2_0) dy0 = y0 * (double)y0;
634*25c28e83SPiotr Jasiukajtis	lda	[stridey+%i2]0x82,%l4	! (3_0) hy0 = *(int*)py;
635*25c28e83SPiotr Jasiukajtis	add	%l7,stridex,%i1		! px += stridex
636*25c28e83SPiotr Jasiukajtis	faddd	%f54,DC1,%f36		! (2_1) res0 += DC1;
637*25c28e83SPiotr Jasiukajtis
638*25c28e83SPiotr Jasiukajtis	faddd	%f56,K1,%f54		! (3_1) res0 += K1;
639*25c28e83SPiotr Jasiukajtis	and	%l3,_0x7fffffff,%l3	! (3_0) hx0 &= 0x7fffffff;
640*25c28e83SPiotr Jasiukajtis	ldd	[%g1+8],%f56		! (2_1) dtmp0 = ((double*)si0)[1];
641*25c28e83SPiotr Jasiukajtis	fmul8x16	SCALE,%f40,%f40	! (2_1) db0 = vis_fmul8x16(SCALE, db0);
642*25c28e83SPiotr Jasiukajtis
643*25c28e83SPiotr Jasiukajtis	lda	[%l7]0x82,%f17		! (3_0) x0 = *px;
644*25c28e83SPiotr Jasiukajtis	cmp	%l3,_0x7f3504f3		! (3_0) hx ? 0x7f3504f3
645*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update18		! (3_0) if ( hx >= 0x7f3504f3 )
646*25c28e83SPiotr Jasiukajtis	fdtos	%f62,%f14		! (1_1) ftmp0 = (float)res0;
647*25c28e83SPiotr Jasiukajtis.cont18:
648*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f22,%f58		! (4_1) xx0 *= dmp0;
649*25c28e83SPiotr Jasiukajtis	and	%l4,_0x7fffffff,%l4	! (3_0) hy0 &= 0x7fffffff;
650*25c28e83SPiotr Jasiukajtis	st	%f14,[%i3]		! (1_1) *pz = ftmp0;
651*25c28e83SPiotr Jasiukajtis	for	%f60,DC1,%f46		! (0_0) h0 = vis_for(h0,DC1);
652*25c28e83SPiotr Jasiukajtis
653*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f36,%f36		! (2_1) res0 = dtmp0 * res0;
654*25c28e83SPiotr Jasiukajtis	cmp	%l4,_0x7f3504f3		! (3_0) hy ? 0x7f3504f3
655*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp2],%g1		! (0_0) iexp0 = ((int*)&db0)[0];
656*25c28e83SPiotr Jasiukajtis	faddd	%f44,%f30,%f30		! (2_0) db0 = dx0 + dy0;
657*25c28e83SPiotr Jasiukajtis
658*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f44		! (3_0) dx0 = x0 * (double)x0;
659*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update19		! (3_0) if ( hy >= 0x7f3504f3 )
660*25c28e83SPiotr Jasiukajtis	lda	[stridey+%i2]0x82,%f17	! (3_0) y0 = *py;
661*25c28e83SPiotr Jasiukajtis	fpadd32	%f40,DA1,%f62		! (2_1) db0 = vis_fpadd32(db0,DA1);
662*25c28e83SPiotr Jasiukajtis
663*25c28e83SPiotr Jasiukajtis.cont19:
664*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f38,%f40		! (3_1) res0 *= xx0;
665*25c28e83SPiotr Jasiukajtis	orcc	%l3,%l4,%g0
666*25c28e83SPiotr Jasiukajtis	st	%f30,[%fp+tmp4]		! (2_0) iexp0 = ((int*)&db0)[0];
667*25c28e83SPiotr Jasiukajtis	fand	%f46,DC2,%f38		! (0_0) h_hi0 = vis_fand(h0,DC2);
668*25c28e83SPiotr Jasiukajtis
669*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f58,%f54		! (4_1) res0 = K2 * xx0;
670*25c28e83SPiotr Jasiukajtis	srax	%g1,8,%o5		! (0_0) iexp0 >>= 8;
671*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%l3		! (4_0) hx0 = *(int*)px;
672*25c28e83SPiotr Jasiukajtis	fand	%f24,DA0,%f56		! (3_1) db0 = vis_fand(db0,DA0);
673*25c28e83SPiotr Jasiukajtis
674*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f62,%f62		! (2_1) res0 *= db0;
675*25c28e83SPiotr Jasiukajtis	and	%o5,_0x1ff0,%o5		! (0_0) di0 = iexp0 & 0x1ff0;
676*25c28e83SPiotr Jasiukajtis	bz,pn	%icc,.update19a
677*25c28e83SPiotr Jasiukajtis	fand	%f18,DC0,%f60		! (1_0) h0 = vis_fand(db0,DC0);
678*25c28e83SPiotr Jasiukajtis.cont19a:
679*25c28e83SPiotr Jasiukajtis	ldd	[TBL+%o5],%f22		! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0];
680*25c28e83SPiotr Jasiukajtis	add	%o0,TBL,%g1		! (3_1) si0 = (char*)sqrt_arr + di0;
681*25c28e83SPiotr Jasiukajtis	and	%l3,_0x7fffffff,%l3	! (4_0) hx0 &= 0x7fffffff;
682*25c28e83SPiotr Jasiukajtis	fsubd	%f46,%f38,%f38		! (0_0) xx0 = h0 - h_hi0;
683*25c28e83SPiotr Jasiukajtis
684*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f24		! (3_0) dy0 = y0 * (double)y0;
685*25c28e83SPiotr Jasiukajtis	cmp	%l3,_0x7f3504f3		! (4_0) hx ? 0x7f3504f3
686*25c28e83SPiotr Jasiukajtis	lda	[stridey+%o7]0x82,%l4	! (4_0) hy0 = *(int*)py;
687*25c28e83SPiotr Jasiukajtis	faddd	%f40,DC1,%f40		! (3_1) res0 += DC1;
688*25c28e83SPiotr Jasiukajtis
689*25c28e83SPiotr Jasiukajtis	fmul8x16	SCALE,%f56,%f36	! (3_1) db0 = vis_fmul8x16(SCALE, db0);
690*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update20		! (4_0) if ( hx >= 0x7f3504f3 )
691*25c28e83SPiotr Jasiukajtis	ldd	[%g1+8],%f56		! (3_1) dtmp0 = ((double*)si0)[1];
692*25c28e83SPiotr Jasiukajtis	faddd	%f54,K1,%f54		! (4_1) res0 += K1;
693*25c28e83SPiotr Jasiukajtis
694*25c28e83SPiotr Jasiukajtis	lda	[%i1]0x82,%f17		! (4_0) x0 = *px;
695*25c28e83SPiotr Jasiukajtis.cont20:
696*25c28e83SPiotr Jasiukajtis	subcc	counter,5,counter	! counter -= 5
697*25c28e83SPiotr Jasiukajtis	add	%o4,stridez,%l7		! pz += stridez
698*25c28e83SPiotr Jasiukajtis	fdtos	%f62,%f14		! (2_1) ftmp0 = (float)res0;
699*25c28e83SPiotr Jasiukajtis
700*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f22,%f38		! (0_0) xx0 *= dmp0;
701*25c28e83SPiotr Jasiukajtis	and	%l4,_0x7fffffff,%l4	! (4_0) hy0 &= 0x7fffffff;
702*25c28e83SPiotr Jasiukajtis	st	%f14,[%o4]		! (2_1) *pz = ftmp0;
703*25c28e83SPiotr Jasiukajtis	for	%f60,DC1,%f46		! (1_0) h0 = vis_for(h0,DC1);
704*25c28e83SPiotr Jasiukajtis
705*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp3],%g1		! (1_0) iexp0 = ((int*)&db0)[0];
706*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f40,%f62		! (3_1) res0 = dtmp0 * res0;
707*25c28e83SPiotr Jasiukajtis	bpos,pt	%icc,.main_loop
708*25c28e83SPiotr Jasiukajtis	faddd	%f44,%f24,%f24		! (3_0) db0 = dx0 + dy0;
709*25c28e83SPiotr Jasiukajtis
710*25c28e83SPiotr Jasiukajtis	add	counter,5,counter
711*25c28e83SPiotr Jasiukajtis
712*25c28e83SPiotr Jasiukajtis.tail:
713*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
714*25c28e83SPiotr Jasiukajtis	bneg	.begin
715*25c28e83SPiotr Jasiukajtis	nop
716*25c28e83SPiotr Jasiukajtis
717*25c28e83SPiotr Jasiukajtis	fpadd32	%f36,DA1,%f36		! (3_2) db0 = vis_fpadd32(db0,DA1);
718*25c28e83SPiotr Jasiukajtis
719*25c28e83SPiotr Jasiukajtis	fmuld	%f54,%f58,%f58		! (4_2) res0 *= xx0;
720*25c28e83SPiotr Jasiukajtis	fand	%f46,DC2,%f44		! (1_1) h_hi0 = vis_fand(h0,DC2);
721*25c28e83SPiotr Jasiukajtis
722*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f38,%f56		! (0_1) res0 = K2 * xx0;
723*25c28e83SPiotr Jasiukajtis	srax	%g1,8,%g5		! (1_1) iexp0 >>= 8;
724*25c28e83SPiotr Jasiukajtis	fand	%f20,DA0,%f54		! (4_2) db0 = vis_fand(db0,DA0);
725*25c28e83SPiotr Jasiukajtis
726*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f36,%f62		! (3_2) res0 *= db0;
727*25c28e83SPiotr Jasiukajtis	and	%g5,_0x1ff0,%g5		! (1_1) di0 = iexp0 & 0x1ff0;
728*25c28e83SPiotr Jasiukajtis
729*25c28e83SPiotr Jasiukajtis	ldd	[%g5+TBL],%f22		! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
730*25c28e83SPiotr Jasiukajtis	add	%o3,TBL,%g1		! (4_2) si0 = (char*)sqrt_arr + di0;
731*25c28e83SPiotr Jasiukajtis	fsubd	%f46,%f44,%f44		! (1_1) xx0 = h0 - h_hi0;
732*25c28e83SPiotr Jasiukajtis
733*25c28e83SPiotr Jasiukajtis	faddd	%f58,DC1,%f36		! (4_2) res0 += DC1;
734*25c28e83SPiotr Jasiukajtis
735*25c28e83SPiotr Jasiukajtis	faddd	%f56,K1,%f58		! (0_1) res0 += K1;
736*25c28e83SPiotr Jasiukajtis	ldd	[%g1+8],%f56		! (4_2) dtmp0 = ((double*)si0)[1];
737*25c28e83SPiotr Jasiukajtis	fmul8x16	SCALE,%f54,%f54	! (4_2) db0 = vis_fmul8x16(SCALE, db0);
738*25c28e83SPiotr Jasiukajtis
739*25c28e83SPiotr Jasiukajtis	fdtos	%f62,%f14		! (3_2) ftmp0 = (float)res0;
740*25c28e83SPiotr Jasiukajtis
741*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f22,%f44		! (1_1) xx0 *= dmp0;
742*25c28e83SPiotr Jasiukajtis	add	%l7,stridez,%o7		! pz += stridez
743*25c28e83SPiotr Jasiukajtis	st	%f14,[%l7]		! (3_2) *pz = ftmp0;
744*25c28e83SPiotr Jasiukajtis
745*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
746*25c28e83SPiotr Jasiukajtis	bneg	.begin
747*25c28e83SPiotr Jasiukajtis	or	%g0,%o7,%l7
748*25c28e83SPiotr Jasiukajtis
749*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f36,%f36		! (4_2) res0 = dtmp0 * res0;
750*25c28e83SPiotr Jasiukajtis
751*25c28e83SPiotr Jasiukajtis	fpadd32	%f54,DA1,%f62		! (4_2) db0 = vis_fpadd32(db0,DA1);
752*25c28e83SPiotr Jasiukajtis
753*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f38,%f38		! (0_1) res0 *= xx0;
754*25c28e83SPiotr Jasiukajtis
755*25c28e83SPiotr Jasiukajtis	fmuld	K2,%f44,%f56		! (1_1) res0 = K2 * xx0;
756*25c28e83SPiotr Jasiukajtis	fand	%f0,DA0,%f54		! (0_1) db0 = vis_fand(db0,DA0);
757*25c28e83SPiotr Jasiukajtis
758*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f62,%f62		! (4_2) res0 *= db0;
759*25c28e83SPiotr Jasiukajtis
760*25c28e83SPiotr Jasiukajtis	add	%o5,TBL,%o0		! (0_1) si0 = (char*)sqrt_arr + di0;
761*25c28e83SPiotr Jasiukajtis
762*25c28e83SPiotr Jasiukajtis	faddd	%f38,DC1,%f36		! (0_1) res0 += DC1;
763*25c28e83SPiotr Jasiukajtis
764*25c28e83SPiotr Jasiukajtis	faddd	%f56,K1,%f38		! (1_1) res0 += K1;
765*25c28e83SPiotr Jasiukajtis	ldd	[%o0+8],%f56		! (0_1) dtmp0 = ((double*)si0)[1];
766*25c28e83SPiotr Jasiukajtis	fmul8x16	SCALE,%f54,%f54	! (0_1) db0 = vis_fmul8x16(SCALE, db0);
767*25c28e83SPiotr Jasiukajtis
768*25c28e83SPiotr Jasiukajtis	add	%o7,stridez,%i0		! pz += stridez
769*25c28e83SPiotr Jasiukajtis	fdtos	%f62,%f14		! (4_2) ftmp0 = (float)res0;
770*25c28e83SPiotr Jasiukajtis
771*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f36,%f36		! (0_1) res0 = dtmp0 * res0;
772*25c28e83SPiotr Jasiukajtis
773*25c28e83SPiotr Jasiukajtis	fpadd32	%f54,DA1,%f62		! (0_1) db0 = vis_fpadd32(db0,DA1);
774*25c28e83SPiotr Jasiukajtis
775*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f44,%f44		! (1_1) res0 *= xx0;
776*25c28e83SPiotr Jasiukajtis	add	%i0,stridez,%i3		! pz += stridez
777*25c28e83SPiotr Jasiukajtis	st	%f14,[%o7]		! (4_2) *pz = ftmp0;
778*25c28e83SPiotr Jasiukajtis
779*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
780*25c28e83SPiotr Jasiukajtis	bneg	.begin
781*25c28e83SPiotr Jasiukajtis	or	%g0,%i0,%l7
782*25c28e83SPiotr Jasiukajtis
783*25c28e83SPiotr Jasiukajtis	fand	%f18,DA0,%f56		! (1_1) db0 = vis_fand(db0,DA0);
784*25c28e83SPiotr Jasiukajtis
785*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f62,%f62		! (0_1) res0 *= db0;
786*25c28e83SPiotr Jasiukajtis
787*25c28e83SPiotr Jasiukajtis	add	%g5,TBL,%o3		! (1_1) si0 = (char*)sqrt_arr + di0;
788*25c28e83SPiotr Jasiukajtis
789*25c28e83SPiotr Jasiukajtis	faddd	%f44,DC1,%f44		! (1_1) res0 += DC1;
790*25c28e83SPiotr Jasiukajtis
791*25c28e83SPiotr Jasiukajtis	fmul8x16	SCALE,%f56,%f36	! (1_1) db0 = vis_fmul8x16(SCALE, db0);
792*25c28e83SPiotr Jasiukajtis	ldd	[%o3+8],%f56		! (1_1) dtmp0 = ((double*)si0)[1];
793*25c28e83SPiotr Jasiukajtis
794*25c28e83SPiotr Jasiukajtis	add	%i3,stridez,%o4		! pz += stridez
795*25c28e83SPiotr Jasiukajtis	fdtos	%f62,%f14		! (0_1) ftmp0 = (float)res0;
796*25c28e83SPiotr Jasiukajtis
797*25c28e83SPiotr Jasiukajtis	st	%f14,[%i0]		! (0_1) *pz = ftmp0;
798*25c28e83SPiotr Jasiukajtis
799*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
800*25c28e83SPiotr Jasiukajtis	bneg	.begin
801*25c28e83SPiotr Jasiukajtis	or	%g0,%i3,%l7
802*25c28e83SPiotr Jasiukajtis
803*25c28e83SPiotr Jasiukajtis	fmuld	%f56,%f44,%f62		! (1_1) res0 = dtmp0 * res0;
804*25c28e83SPiotr Jasiukajtis
805*25c28e83SPiotr Jasiukajtis	fpadd32	%f36,DA1,%f36		! (1_1) db0 = vis_fpadd32(db0,DA1);
806*25c28e83SPiotr Jasiukajtis
807*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f36,%f62		! (1_1) res0 *= db0;
808*25c28e83SPiotr Jasiukajtis
809*25c28e83SPiotr Jasiukajtis	fdtos	%f62,%f14		! (1_1) ftmp0 = (float)res0;
810*25c28e83SPiotr Jasiukajtis
811*25c28e83SPiotr Jasiukajtis	st	%f14,[%i3]		! (1_1) *pz = ftmp0;
812*25c28e83SPiotr Jasiukajtis
813*25c28e83SPiotr Jasiukajtis	ba	.begin
814*25c28e83SPiotr Jasiukajtis	or	%g0,%o4,%l7
815*25c28e83SPiotr Jasiukajtis
816*25c28e83SPiotr Jasiukajtis	.align	16
817*25c28e83SPiotr Jasiukajtis.spec1:
818*25c28e83SPiotr Jasiukajtis	st	%g0,[%l7]		! *pz = 0;
819*25c28e83SPiotr Jasiukajtis	add	%l7,stridez,%l7		! pz += stridez
820*25c28e83SPiotr Jasiukajtis
821*25c28e83SPiotr Jasiukajtis	add	%i2,stridey,%i2		! py += stridey
822*25c28e83SPiotr Jasiukajtis	ba	.begin1
823*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter	! counter--
824*25c28e83SPiotr Jasiukajtis
825*25c28e83SPiotr Jasiukajtis	.align	16
826*25c28e83SPiotr Jasiukajtis.spec:
827*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7f800000),%i0
828*25c28e83SPiotr Jasiukajtis	cmp	%l3,%i0			! hx ? 0x7f800000
829*25c28e83SPiotr Jasiukajtis	bge,pt	%icc,2f			! if ( hx >= 0x7f800000 )
830*25c28e83SPiotr Jasiukajtis	ld	[%i2],%f8
831*25c28e83SPiotr Jasiukajtis
832*25c28e83SPiotr Jasiukajtis	cmp	%l4,%i0			! hy ? 0x7f800000
833*25c28e83SPiotr Jasiukajtis	bge,pt	%icc,2f			! if ( hy >= 0x7f800000 )
834*25c28e83SPiotr Jasiukajtis	nop
835*25c28e83SPiotr Jasiukajtis
836*25c28e83SPiotr Jasiukajtis	fsmuld	%f17,%f17,%f44		! x * (double)x
837*25c28e83SPiotr Jasiukajtis	fsmuld	%f8,%f8,%f24		! y * (double)y
838*25c28e83SPiotr Jasiukajtis	faddd	%f44,%f24,%f24		! x * (double)x + y * (double)y
839*25c28e83SPiotr Jasiukajtis	fsqrtd	%f24,%f24		! hyp = sqrt(x * (double)x + y * (double)y);
840*25c28e83SPiotr Jasiukajtis	fcmped	%f24,DFMAX		! hyp ? DMAX
841*25c28e83SPiotr Jasiukajtis	fbug,a	1f			! if ( hyp > DMAX )
842*25c28e83SPiotr Jasiukajtis	fmuls	FMAX,FMAX,%f20		! ftmp0 = FMAX * FMAX;
843*25c28e83SPiotr Jasiukajtis
844*25c28e83SPiotr Jasiukajtis	fdtos	%f24,%f20		! ftmp0 = (float)hyp;
845*25c28e83SPiotr Jasiukajtis1:
846*25c28e83SPiotr Jasiukajtis	st	%f20,[%l7]		! *pz = ftmp0;
847*25c28e83SPiotr Jasiukajtis	add	%l7,stridez,%l7		! pz += stridez
848*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
849*25c28e83SPiotr Jasiukajtis
850*25c28e83SPiotr Jasiukajtis	add	%i2,stridey,%i2		! py += stridey
851*25c28e83SPiotr Jasiukajtis	ba	.begin1
852*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter	! counter--
853*25c28e83SPiotr Jasiukajtis2:
854*25c28e83SPiotr Jasiukajtis	fcmps	%f17,%f8		! exceptions
855*25c28e83SPiotr Jasiukajtis	cmp	%l3,%i0			! hx ? 0x7f800000
856*25c28e83SPiotr Jasiukajtis	be,a	%icc,1f			! if ( hx == 0x7f800000 )
857*25c28e83SPiotr Jasiukajtis	st	%i0,[%l7]		! *(int*)pz = 0x7f800000;
858*25c28e83SPiotr Jasiukajtis
859*25c28e83SPiotr Jasiukajtis	cmp	%l4,%i0			! hy ? 0x7f800000
860*25c28e83SPiotr Jasiukajtis	be,a	%icc,1f			! if ( hy == 0x7f800000
861*25c28e83SPiotr Jasiukajtis	st	%i0,[%l7]		! *(int*)pz = 0x7f800000;
862*25c28e83SPiotr Jasiukajtis
863*25c28e83SPiotr Jasiukajtis	fmuls	%f17,%f8,%f8		! x * y
864*25c28e83SPiotr Jasiukajtis	st	%f8,[%l7]		! *pz = x * y;
865*25c28e83SPiotr Jasiukajtis
866*25c28e83SPiotr Jasiukajtis1:
867*25c28e83SPiotr Jasiukajtis	add	%l7,stridez,%l7		! pz += stridez
868*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
869*25c28e83SPiotr Jasiukajtis
870*25c28e83SPiotr Jasiukajtis	add	%i2,stridey,%i2		! py += stridey
871*25c28e83SPiotr Jasiukajtis	ba	.begin1
872*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter	! counter--
873*25c28e83SPiotr Jasiukajtis
874*25c28e83SPiotr Jasiukajtis	.align	16
875*25c28e83SPiotr Jasiukajtis.update0:
876*25c28e83SPiotr Jasiukajtis	cmp	counter,1
877*25c28e83SPiotr Jasiukajtis	ble	.cont0
878*25c28e83SPiotr Jasiukajtis	fzeros	%f17
879*25c28e83SPiotr Jasiukajtis
880*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
881*25c28e83SPiotr Jasiukajtis
882*25c28e83SPiotr Jasiukajtis	add	%o7,stridey,%i5
883*25c28e83SPiotr Jasiukajtis	stx	%i5,[%fp+tmp_py]
884*25c28e83SPiotr Jasiukajtis
885*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
886*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
887*25c28e83SPiotr Jasiukajtis
888*25c28e83SPiotr Jasiukajtis	ba	.cont0
889*25c28e83SPiotr Jasiukajtis	or	%g0,1,counter
890*25c28e83SPiotr Jasiukajtis
891*25c28e83SPiotr Jasiukajtis	.align	16
892*25c28e83SPiotr Jasiukajtis.update1:
893*25c28e83SPiotr Jasiukajtis	cmp	counter,1
894*25c28e83SPiotr Jasiukajtis	ble	.cont1
895*25c28e83SPiotr Jasiukajtis	fzeros	%f17
896*25c28e83SPiotr Jasiukajtis
897*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
898*25c28e83SPiotr Jasiukajtis	stx	%i5,[%fp+tmp_py]
899*25c28e83SPiotr Jasiukajtis
900*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
901*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
902*25c28e83SPiotr Jasiukajtis
903*25c28e83SPiotr Jasiukajtis	ba	.cont1
904*25c28e83SPiotr Jasiukajtis	or	%g0,1,counter
905*25c28e83SPiotr Jasiukajtis
906*25c28e83SPiotr Jasiukajtis	.align	16
907*25c28e83SPiotr Jasiukajtis.update2:
908*25c28e83SPiotr Jasiukajtis	cmp	counter,2
909*25c28e83SPiotr Jasiukajtis	ble	.cont2
910*25c28e83SPiotr Jasiukajtis	fzeros	%f8
911*25c28e83SPiotr Jasiukajtis
912*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
913*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+tmp_py]
914*25c28e83SPiotr Jasiukajtis
915*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
916*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
917*25c28e83SPiotr Jasiukajtis
918*25c28e83SPiotr Jasiukajtis	ba	.cont2
919*25c28e83SPiotr Jasiukajtis	or	%g0,2,counter
920*25c28e83SPiotr Jasiukajtis
921*25c28e83SPiotr Jasiukajtis	.align	16
922*25c28e83SPiotr Jasiukajtis.update3:
923*25c28e83SPiotr Jasiukajtis	cmp	counter,2
924*25c28e83SPiotr Jasiukajtis	ble	.cont3
925*25c28e83SPiotr Jasiukajtis	fzeros	%f17
926*25c28e83SPiotr Jasiukajtis
927*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
928*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+tmp_py]
929*25c28e83SPiotr Jasiukajtis
930*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
931*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
932*25c28e83SPiotr Jasiukajtis
933*25c28e83SPiotr Jasiukajtis	ba	.cont3
934*25c28e83SPiotr Jasiukajtis	or	%g0,2,counter
935*25c28e83SPiotr Jasiukajtis
936*25c28e83SPiotr Jasiukajtis	.align	16
937*25c28e83SPiotr Jasiukajtis.update4:
938*25c28e83SPiotr Jasiukajtis	cmp	counter,3
939*25c28e83SPiotr Jasiukajtis	ble	.cont4
940*25c28e83SPiotr Jasiukajtis	fzeros	%f17
941*25c28e83SPiotr Jasiukajtis
942*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
943*25c28e83SPiotr Jasiukajtis	stx	%i5,[%fp+tmp_py]
944*25c28e83SPiotr Jasiukajtis
945*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
946*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
947*25c28e83SPiotr Jasiukajtis
948*25c28e83SPiotr Jasiukajtis	ba	.cont4
949*25c28e83SPiotr Jasiukajtis	or	%g0,3,counter
950*25c28e83SPiotr Jasiukajtis
951*25c28e83SPiotr Jasiukajtis	.align	16
952*25c28e83SPiotr Jasiukajtis.update5:
953*25c28e83SPiotr Jasiukajtis	cmp	counter,3
954*25c28e83SPiotr Jasiukajtis	ble	.cont5
955*25c28e83SPiotr Jasiukajtis	fzeros	%f17
956*25c28e83SPiotr Jasiukajtis
957*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,%i2
958*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
959*25c28e83SPiotr Jasiukajtis	stx	%i5,[%fp+tmp_py]
960*25c28e83SPiotr Jasiukajtis
961*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
962*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
963*25c28e83SPiotr Jasiukajtis
964*25c28e83SPiotr Jasiukajtis	ba	.cont5
965*25c28e83SPiotr Jasiukajtis	or	%g0,3,counter
966*25c28e83SPiotr Jasiukajtis
967*25c28e83SPiotr Jasiukajtis	.align	16
968*25c28e83SPiotr Jasiukajtis.update6:
969*25c28e83SPiotr Jasiukajtis	cmp	counter,4
970*25c28e83SPiotr Jasiukajtis	ble	.cont6
971*25c28e83SPiotr Jasiukajtis	fzeros	%f17
972*25c28e83SPiotr Jasiukajtis
973*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
974*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_py]
975*25c28e83SPiotr Jasiukajtis
976*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
977*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
978*25c28e83SPiotr Jasiukajtis
979*25c28e83SPiotr Jasiukajtis	ba	.cont6
980*25c28e83SPiotr Jasiukajtis	or	%g0,4,counter
981*25c28e83SPiotr Jasiukajtis
982*25c28e83SPiotr Jasiukajtis	.align	16
983*25c28e83SPiotr Jasiukajtis.update7:
984*25c28e83SPiotr Jasiukajtis	cmp	counter,4
985*25c28e83SPiotr Jasiukajtis	ble	.cont7
986*25c28e83SPiotr Jasiukajtis	fzeros	%f17
987*25c28e83SPiotr Jasiukajtis
988*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,%o7
989*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp_px]
990*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_py]
991*25c28e83SPiotr Jasiukajtis
992*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
993*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
994*25c28e83SPiotr Jasiukajtis
995*25c28e83SPiotr Jasiukajtis	ba	.cont7
996*25c28e83SPiotr Jasiukajtis	or	%g0,4,counter
997*25c28e83SPiotr Jasiukajtis
998*25c28e83SPiotr Jasiukajtis	.align	16
999*25c28e83SPiotr Jasiukajtis.update8:
1000*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1001*25c28e83SPiotr Jasiukajtis	ble	.cont8
1002*25c28e83SPiotr Jasiukajtis	fzeros	%f17
1003*25c28e83SPiotr Jasiukajtis
1004*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,%o5
1005*25c28e83SPiotr Jasiukajtis	stx	%o5,[%fp+tmp_px]
1006*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp_py]
1007*25c28e83SPiotr Jasiukajtis
1008*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
1009*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1010*25c28e83SPiotr Jasiukajtis
1011*25c28e83SPiotr Jasiukajtis	ba	.cont8
1012*25c28e83SPiotr Jasiukajtis	or	%g0,5,counter
1013*25c28e83SPiotr Jasiukajtis
1014*25c28e83SPiotr Jasiukajtis	.align	16
1015*25c28e83SPiotr Jasiukajtis.update9:
1016*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1017*25c28e83SPiotr Jasiukajtis	ble	.cont9
1018*25c28e83SPiotr Jasiukajtis	fzeros	%f17
1019*25c28e83SPiotr Jasiukajtis
1020*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,%o5
1021*25c28e83SPiotr Jasiukajtis	stx	%o5,[%fp+tmp_px]
1022*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp_py]
1023*25c28e83SPiotr Jasiukajtis
1024*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
1025*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1026*25c28e83SPiotr Jasiukajtis
1027*25c28e83SPiotr Jasiukajtis	ba	.cont9
1028*25c28e83SPiotr Jasiukajtis	or	%g0,5,counter
1029*25c28e83SPiotr Jasiukajtis
1030*25c28e83SPiotr Jasiukajtis	.align	16
1031*25c28e83SPiotr Jasiukajtis.update10:
1032*25c28e83SPiotr Jasiukajtis	fmul8x16	SCALE,%f56,%f36	! (3_1) db0 = vis_fmul8x16(SCALE, db0);
1033*25c28e83SPiotr Jasiukajtis	and	%l4,_0x7fffffff,%l4	! (4_0) hy0 &= 0x7fffffff;
1034*25c28e83SPiotr Jasiukajtis	ldd	[%g1+8],%f56		! (3_1) dtmp0 = ((double*)si0)[1];
1035*25c28e83SPiotr Jasiukajtis	faddd	%f54,K1,%f54		! (4_1) res0 += K1;
1036*25c28e83SPiotr Jasiukajtis
1037*25c28e83SPiotr Jasiukajtis	cmp	counter,6
1038*25c28e83SPiotr Jasiukajtis	ble	.cont10
1039*25c28e83SPiotr Jasiukajtis	fzeros	%f17
1040*25c28e83SPiotr Jasiukajtis
1041*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
1042*25c28e83SPiotr Jasiukajtis	add	%o7,stridey,%i5
1043*25c28e83SPiotr Jasiukajtis	stx	%i5,[%fp+tmp_py]
1044*25c28e83SPiotr Jasiukajtis
1045*25c28e83SPiotr Jasiukajtis	sub	counter,6,counter
1046*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1047*25c28e83SPiotr Jasiukajtis
1048*25c28e83SPiotr Jasiukajtis	ba	.cont10
1049*25c28e83SPiotr Jasiukajtis	or	%g0,6,counter
1050*25c28e83SPiotr Jasiukajtis
1051*25c28e83SPiotr Jasiukajtis	.align	16
1052*25c28e83SPiotr Jasiukajtis.update11:
1053*25c28e83SPiotr Jasiukajtis	cmp	counter,1
1054*25c28e83SPiotr Jasiukajtis	ble	.cont11
1055*25c28e83SPiotr Jasiukajtis	fzeros	%f17
1056*25c28e83SPiotr Jasiukajtis
1057*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
1058*25c28e83SPiotr Jasiukajtis	stx	%i5,[%fp+tmp_py]
1059*25c28e83SPiotr Jasiukajtis
1060*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
1061*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1062*25c28e83SPiotr Jasiukajtis
1063*25c28e83SPiotr Jasiukajtis	ba	.cont11
1064*25c28e83SPiotr Jasiukajtis	or	%g0,1,counter
1065*25c28e83SPiotr Jasiukajtis
1066*25c28e83SPiotr Jasiukajtis	.align	16
1067*25c28e83SPiotr Jasiukajtis.update12:
1068*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1069*25c28e83SPiotr Jasiukajtis	ble	.cont12
1070*25c28e83SPiotr Jasiukajtis	fzeros	%f8
1071*25c28e83SPiotr Jasiukajtis
1072*25c28e83SPiotr Jasiukajtis	stx	%i0,[%fp+tmp_px]
1073*25c28e83SPiotr Jasiukajtis	add	%i5,stridey,%o4
1074*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+tmp_py]
1075*25c28e83SPiotr Jasiukajtis
1076*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
1077*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1078*25c28e83SPiotr Jasiukajtis
1079*25c28e83SPiotr Jasiukajtis	ba	.cont12
1080*25c28e83SPiotr Jasiukajtis	or	%g0,2,counter
1081*25c28e83SPiotr Jasiukajtis
1082*25c28e83SPiotr Jasiukajtis	.align	16
1083*25c28e83SPiotr Jasiukajtis.update13:
1084*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1085*25c28e83SPiotr Jasiukajtis	ble	.cont13
1086*25c28e83SPiotr Jasiukajtis	fzeros	%f17
1087*25c28e83SPiotr Jasiukajtis
1088*25c28e83SPiotr Jasiukajtis	stx	%i0,[%fp+tmp_px]
1089*25c28e83SPiotr Jasiukajtis	stx	%o4,[%fp+tmp_py]
1090*25c28e83SPiotr Jasiukajtis
1091*25c28e83SPiotr Jasiukajtis	sub	counter,2,counter
1092*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1093*25c28e83SPiotr Jasiukajtis
1094*25c28e83SPiotr Jasiukajtis	ba	.cont13
1095*25c28e83SPiotr Jasiukajtis	or	%g0,2,counter
1096*25c28e83SPiotr Jasiukajtis
1097*25c28e83SPiotr Jasiukajtis	.align	16
1098*25c28e83SPiotr Jasiukajtis.update14:
1099*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1100*25c28e83SPiotr Jasiukajtis	ble	.cont14
1101*25c28e83SPiotr Jasiukajtis	fzeros	%f17
1102*25c28e83SPiotr Jasiukajtis
1103*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
1104*25c28e83SPiotr Jasiukajtis	add	%o4,stridey,%i5
1105*25c28e83SPiotr Jasiukajtis	stx	%i5,[%fp+tmp_py]
1106*25c28e83SPiotr Jasiukajtis
1107*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
1108*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1109*25c28e83SPiotr Jasiukajtis
1110*25c28e83SPiotr Jasiukajtis	ba	.cont14
1111*25c28e83SPiotr Jasiukajtis	or	%g0,3,counter
1112*25c28e83SPiotr Jasiukajtis
1113*25c28e83SPiotr Jasiukajtis	.align	16
1114*25c28e83SPiotr Jasiukajtis.update15:
1115*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1116*25c28e83SPiotr Jasiukajtis	ble	.cont15
1117*25c28e83SPiotr Jasiukajtis	fzeros	%f17
1118*25c28e83SPiotr Jasiukajtis
1119*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,%i2
1120*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_px]
1121*25c28e83SPiotr Jasiukajtis	stx	%i5,[%fp+tmp_py]
1122*25c28e83SPiotr Jasiukajtis
1123*25c28e83SPiotr Jasiukajtis	sub	counter,3,counter
1124*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1125*25c28e83SPiotr Jasiukajtis
1126*25c28e83SPiotr Jasiukajtis	ba	.cont15
1127*25c28e83SPiotr Jasiukajtis	or	%g0,3,counter
1128*25c28e83SPiotr Jasiukajtis
1129*25c28e83SPiotr Jasiukajtis	.align	16
1130*25c28e83SPiotr Jasiukajtis.update16:
1131*25c28e83SPiotr Jasiukajtis	faddd	%f40,%f32,%f18		! (1_0) db0 = dx0 + dy0;
1132*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1133*25c28e83SPiotr Jasiukajtis	ble	.cont16
1134*25c28e83SPiotr Jasiukajtis	fzeros	%f17
1135*25c28e83SPiotr Jasiukajtis
1136*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
1137*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_py]
1138*25c28e83SPiotr Jasiukajtis
1139*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
1140*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1141*25c28e83SPiotr Jasiukajtis
1142*25c28e83SPiotr Jasiukajtis	ba	.cont16
1143*25c28e83SPiotr Jasiukajtis	or	%g0,4,counter
1144*25c28e83SPiotr Jasiukajtis
1145*25c28e83SPiotr Jasiukajtis	.align	16
1146*25c28e83SPiotr Jasiukajtis.update17:
1147*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1148*25c28e83SPiotr Jasiukajtis	ble	.cont17
1149*25c28e83SPiotr Jasiukajtis	fzeros	%f17
1150*25c28e83SPiotr Jasiukajtis
1151*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
1152*25c28e83SPiotr Jasiukajtis	stx	%i2,[%fp+tmp_py]
1153*25c28e83SPiotr Jasiukajtis
1154*25c28e83SPiotr Jasiukajtis	sub	counter,4,counter
1155*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1156*25c28e83SPiotr Jasiukajtis
1157*25c28e83SPiotr Jasiukajtis	ba	.cont17
1158*25c28e83SPiotr Jasiukajtis	or	%g0,4,counter
1159*25c28e83SPiotr Jasiukajtis
1160*25c28e83SPiotr Jasiukajtis	.align	16
1161*25c28e83SPiotr Jasiukajtis.update18:
1162*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1163*25c28e83SPiotr Jasiukajtis	ble	.cont18
1164*25c28e83SPiotr Jasiukajtis	fzeros	%f17
1165*25c28e83SPiotr Jasiukajtis
1166*25c28e83SPiotr Jasiukajtis	stx	%l7,[%fp+tmp_px]
1167*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp_py]
1168*25c28e83SPiotr Jasiukajtis
1169*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
1170*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1171*25c28e83SPiotr Jasiukajtis
1172*25c28e83SPiotr Jasiukajtis	ba	.cont18
1173*25c28e83SPiotr Jasiukajtis	or	%g0,5,counter
1174*25c28e83SPiotr Jasiukajtis
1175*25c28e83SPiotr Jasiukajtis	.align	16
1176*25c28e83SPiotr Jasiukajtis.update19:
1177*25c28e83SPiotr Jasiukajtis	fpadd32	%f40,DA1,%f62		! (2_1) db0 = vis_fpadd32(db0,DA1);
1178*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1179*25c28e83SPiotr Jasiukajtis	ble	.cont19
1180*25c28e83SPiotr Jasiukajtis	fzeros	%f17
1181*25c28e83SPiotr Jasiukajtis
1182*25c28e83SPiotr Jasiukajtis	stx	%l7,[%fp+tmp_px]
1183*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp_py]
1184*25c28e83SPiotr Jasiukajtis
1185*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
1186*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1187*25c28e83SPiotr Jasiukajtis
1188*25c28e83SPiotr Jasiukajtis	ba	.cont19
1189*25c28e83SPiotr Jasiukajtis	or	%g0,5,counter
1190*25c28e83SPiotr Jasiukajtis
1191*25c28e83SPiotr Jasiukajtis	.align	16
1192*25c28e83SPiotr Jasiukajtis.update19a:
1193*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1194*25c28e83SPiotr Jasiukajtis	ble	.cont19a
1195*25c28e83SPiotr Jasiukajtis	fzeros	%f17
1196*25c28e83SPiotr Jasiukajtis
1197*25c28e83SPiotr Jasiukajtis	stx	%l7,[%fp+tmp_px]
1198*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp_py]
1199*25c28e83SPiotr Jasiukajtis
1200*25c28e83SPiotr Jasiukajtis	sub	counter,5,counter
1201*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1202*25c28e83SPiotr Jasiukajtis
1203*25c28e83SPiotr Jasiukajtis	ba	.cont19a
1204*25c28e83SPiotr Jasiukajtis	or	%g0,5,counter
1205*25c28e83SPiotr Jasiukajtis
1206*25c28e83SPiotr Jasiukajtis	.align	16
1207*25c28e83SPiotr Jasiukajtis.update20:
1208*25c28e83SPiotr Jasiukajtis	faddd	%f54,K1,%f54		! (4_1) res0 += K1;
1209*25c28e83SPiotr Jasiukajtis	cmp	counter,6
1210*25c28e83SPiotr Jasiukajtis	ble	.cont20
1211*25c28e83SPiotr Jasiukajtis	fzeros	%f17
1212*25c28e83SPiotr Jasiukajtis
1213*25c28e83SPiotr Jasiukajtis	stx	%i1,[%fp+tmp_px]
1214*25c28e83SPiotr Jasiukajtis	add	%o7,stridey,%g1
1215*25c28e83SPiotr Jasiukajtis	stx	%g1,[%fp+tmp_py]
1216*25c28e83SPiotr Jasiukajtis
1217*25c28e83SPiotr Jasiukajtis	sub	counter,6,counter
1218*25c28e83SPiotr Jasiukajtis	st	counter,[%fp+tmp_counter]
1219*25c28e83SPiotr Jasiukajtis
1220*25c28e83SPiotr Jasiukajtis	ba	.cont20
1221*25c28e83SPiotr Jasiukajtis	or	%g0,6,counter
1222*25c28e83SPiotr Jasiukajtis
1223*25c28e83SPiotr Jasiukajtis.exit:
1224*25c28e83SPiotr Jasiukajtis	ret
1225*25c28e83SPiotr Jasiukajtis	restore
1226*25c28e83SPiotr Jasiukajtis	SET_SIZE(__vhypotf)
1227*25c28e83SPiotr Jasiukajtis
1228