xref: /titanic_44/usr/src/lib/libmvec/common/vis/__vrsqrt.S (revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8)
1*25c28e83SPiotr Jasiukajtis/*
2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis *
4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis *
8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis *
13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis *
19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis */
21*25c28e83SPiotr Jasiukajtis/*
22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23*25c28e83SPiotr Jasiukajtis */
24*25c28e83SPiotr Jasiukajtis/*
25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms.
27*25c28e83SPiotr Jasiukajtis */
28*25c28e83SPiotr Jasiukajtis
29*25c28e83SPiotr Jasiukajtis	.file	"__vrsqrt.S"
30*25c28e83SPiotr Jasiukajtis
31*25c28e83SPiotr Jasiukajtis#include "libm.h"
32*25c28e83SPiotr Jasiukajtis
33*25c28e83SPiotr Jasiukajtis	RO_DATA
34*25c28e83SPiotr Jasiukajtis	.align	64
35*25c28e83SPiotr Jasiukajtis
36*25c28e83SPiotr Jasiukajtis.CONST_TBL:
37*25c28e83SPiotr Jasiukajtis	.word	0xbfe00000, 0x0000002f	! K1 =-5.00000000000005209867e-01;
38*25c28e83SPiotr Jasiukajtis	.word	0x3fd80000, 0x00000058	! K2 = 3.75000000000004884257e-01;
39*25c28e83SPiotr Jasiukajtis	.word	0xbfd3ffff, 0xff444bc8	! K3 =-3.12499999317136886551e-01;
40*25c28e83SPiotr Jasiukajtis	.word	0x3fd17fff, 0xff5006fe	! K4 = 2.73437499359815081532e-01;
41*25c28e83SPiotr Jasiukajtis	.word	0xbfcf80bb, 0xb33ef574	! K5 =-2.46116125605037803130e-01;
42*25c28e83SPiotr Jasiukajtis	.word	0x3fcce0af, 0xf8156949	! K6 = 2.25606914648617522896e-01;
43*25c28e83SPiotr Jasiukajtis
44*25c28e83SPiotr Jasiukajtis	.word	0x001fffff, 0xffffffff	! DC0
45*25c28e83SPiotr Jasiukajtis	.word	0x3fe00000, 0x00000000	! DC1
46*25c28e83SPiotr Jasiukajtis	.word	0x00002000, 0x00000000	! DC2
47*25c28e83SPiotr Jasiukajtis	.word	0x7fffc000, 0x00000000	! DC3
48*25c28e83SPiotr Jasiukajtis	.word	0x0007ffff, 0xffffffff	! DC4
49*25c28e83SPiotr Jasiukajtis
50*25c28e83SPiotr Jasiukajtis	.word	0x43200000, 0x00000000	! D2ON51  = pow(2,51)
51*25c28e83SPiotr Jasiukajtis	.word	0x3ff00000, 0x00000000	! DONE   = 1.0
52*25c28e83SPiotr Jasiukajtis
53*25c28e83SPiotr Jasiukajtis#define stridex		%l5
54*25c28e83SPiotr Jasiukajtis#define stridey		%l7
55*25c28e83SPiotr Jasiukajtis#define counter		%l0
56*25c28e83SPiotr Jasiukajtis#define TBL		%l3
57*25c28e83SPiotr Jasiukajtis#define _0x7ff00000	%o0
58*25c28e83SPiotr Jasiukajtis#define _0x00100000	%o1
59*25c28e83SPiotr Jasiukajtis
60*25c28e83SPiotr Jasiukajtis#define DC0		%f56
61*25c28e83SPiotr Jasiukajtis#define DC1		%f54
62*25c28e83SPiotr Jasiukajtis#define DC2		%f48
63*25c28e83SPiotr Jasiukajtis#define DC3		%f46
64*25c28e83SPiotr Jasiukajtis#define K6		%f42
65*25c28e83SPiotr Jasiukajtis#define K5		%f20
66*25c28e83SPiotr Jasiukajtis#define K4		%f52
67*25c28e83SPiotr Jasiukajtis#define K3		%f50
68*25c28e83SPiotr Jasiukajtis#define K2		%f14
69*25c28e83SPiotr Jasiukajtis#define K1		%f12
70*25c28e83SPiotr Jasiukajtis#define DONE		%f4
71*25c28e83SPiotr Jasiukajtis
72*25c28e83SPiotr Jasiukajtis#define tmp_counter	%g5
73*25c28e83SPiotr Jasiukajtis#define tmp_px		%o5
74*25c28e83SPiotr Jasiukajtis
75*25c28e83SPiotr Jasiukajtis#define tmp0		STACK_BIAS-0x40
76*25c28e83SPiotr Jasiukajtis#define tmp1		STACK_BIAS-0x38
77*25c28e83SPiotr Jasiukajtis#define tmp2		STACK_BIAS-0x30
78*25c28e83SPiotr Jasiukajtis#define tmp3		STACK_BIAS-0x28
79*25c28e83SPiotr Jasiukajtis#define tmp4		STACK_BIAS-0x20
80*25c28e83SPiotr Jasiukajtis#define tmp5		STACK_BIAS-0x18
81*25c28e83SPiotr Jasiukajtis#define tmp6		STACK_BIAS-0x10
82*25c28e83SPiotr Jasiukajtis#define tmp7		STACK_BIAS-0x08
83*25c28e83SPiotr Jasiukajtis
84*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9
85*25c28e83SPiotr Jasiukajtis#define tmps		0x40
86*25c28e83SPiotr Jasiukajtis
87*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
88*25c28e83SPiotr Jasiukajtis!      !!!!!   algorithm   !!!!!
89*25c28e83SPiotr Jasiukajtis!  ((float*)&res)[0] = ((float*)px)[0];
90*25c28e83SPiotr Jasiukajtis!  ((float*)&res)[1] = ((float*)px)[1];
91*25c28e83SPiotr Jasiukajtis!  hx = *(int*)px;
92*25c28e83SPiotr Jasiukajtis!  if ( hx >= 0x7ff00000 )
93*25c28e83SPiotr Jasiukajtis!  {
94*25c28e83SPiotr Jasiukajtis!    res = DONE / res;
95*25c28e83SPiotr Jasiukajtis!    ((float*)py)[0] = ((float*)&res)[0];
96*25c28e83SPiotr Jasiukajtis!    ((float*)py)[1] = ((float*)&res)[1];
97*25c28e83SPiotr Jasiukajtis!    px += stridex;
98*25c28e83SPiotr Jasiukajtis!    py += stridey;
99*25c28e83SPiotr Jasiukajtis!    continue;
100*25c28e83SPiotr Jasiukajtis!  }
101*25c28e83SPiotr Jasiukajtis!  if ( hx < 0x00100000 )
102*25c28e83SPiotr Jasiukajtis!  {
103*25c28e83SPiotr Jasiukajtis!    ax = hx & 0x7fffffff;
104*25c28e83SPiotr Jasiukajtis!    lx = ((int*)px)[1];
105*25c28e83SPiotr Jasiukajtis!
106*25c28e83SPiotr Jasiukajtis!    if ( (ax | lx) == 0 )
107*25c28e83SPiotr Jasiukajtis!    {
108*25c28e83SPiotr Jasiukajtis!      res = DONE / res;
109*25c28e83SPiotr Jasiukajtis!      ((float*)py)[0] = ((float*)&res)[0];
110*25c28e83SPiotr Jasiukajtis!      ((float*)py)[1] = ((float*)&res)[1];
111*25c28e83SPiotr Jasiukajtis!      px += stridex;
112*25c28e83SPiotr Jasiukajtis!      py += stridey;
113*25c28e83SPiotr Jasiukajtis!      continue;
114*25c28e83SPiotr Jasiukajtis!    }
115*25c28e83SPiotr Jasiukajtis!    else if ( hx >= 0 )
116*25c28e83SPiotr Jasiukajtis!    {
117*25c28e83SPiotr Jasiukajtis!      if ( hx < 0x00080000 )
118*25c28e83SPiotr Jasiukajtis!      {
119*25c28e83SPiotr Jasiukajtis!        res = *(long long*)&res;
120*25c28e83SPiotr Jasiukajtis!        hx = *(int*)&res - (537 << 21);
121*25c28e83SPiotr Jasiukajtis!      }
122*25c28e83SPiotr Jasiukajtis!      else
123*25c28e83SPiotr Jasiukajtis!      {
124*25c28e83SPiotr Jasiukajtis!        res = vis_fand(res,DC4);
125*25c28e83SPiotr Jasiukajtis!        res = *(long long*)&res;
126*25c28e83SPiotr Jasiukajtis!        res += D2ON51;
127*25c28e83SPiotr Jasiukajtis!        hx = *(int*)&res - (537 << 21);
128*25c28e83SPiotr Jasiukajtis!      }
129*25c28e83SPiotr Jasiukajtis!    }
130*25c28e83SPiotr Jasiukajtis!    else
131*25c28e83SPiotr Jasiukajtis!    {
132*25c28e83SPiotr Jasiukajtis!      res = sqrt(res);
133*25c28e83SPiotr Jasiukajtis!      ((float*)py)[0] = ((float*)&res)[0];
134*25c28e83SPiotr Jasiukajtis!      ((float*)py)[1] = ((float*)&res)[1];
135*25c28e83SPiotr Jasiukajtis!      px += stridex;
136*25c28e83SPiotr Jasiukajtis!      py += stridey;
137*25c28e83SPiotr Jasiukajtis!      continue;
138*25c28e83SPiotr Jasiukajtis!    }
139*25c28e83SPiotr Jasiukajtis!  }
140*25c28e83SPiotr Jasiukajtis!
141*25c28e83SPiotr Jasiukajtis!  iexp = hx >> 21;
142*25c28e83SPiotr Jasiukajtis!  iexp = -iexp;
143*25c28e83SPiotr Jasiukajtis!  iexp += 0x5fe;
144*25c28e83SPiotr Jasiukajtis!  lexp = iexp << 52;
145*25c28e83SPiotr Jasiukajtis!  dlexp = *(double*)&lexp;
146*25c28e83SPiotr Jasiukajtis!  hx >>= 10;
147*25c28e83SPiotr Jasiukajtis!  hx &= 0x7f8;
148*25c28e83SPiotr Jasiukajtis!  hx += 8;
149*25c28e83SPiotr Jasiukajtis!  hx &= -16;
150*25c28e83SPiotr Jasiukajtis!
151*25c28e83SPiotr Jasiukajtis!  res = vis_fand(res,DC0);
152*25c28e83SPiotr Jasiukajtis!  res = vis_for(res,DC1);
153*25c28e83SPiotr Jasiukajtis!  res_c = vis_fpadd32(res,DC2);
154*25c28e83SPiotr Jasiukajtis!  res_c = vis_fand(res_c,DC3);
155*25c28e83SPiotr Jasiukajtis!
156*25c28e83SPiotr Jasiukajtis!  addr = (char*)arr + hx;
157*25c28e83SPiotr Jasiukajtis!  dexp_hi = ((double*)addr)[0];
158*25c28e83SPiotr Jasiukajtis!  dexp_lo = ((double*)addr)[1];
159*25c28e83SPiotr Jasiukajtis!  dtmp0 = dexp_hi * dexp_hi;
160*25c28e83SPiotr Jasiukajtis!  xx = res - res_c;
161*25c28e83SPiotr Jasiukajtis!  xx *= dtmp0;
162*25c28e83SPiotr Jasiukajtis!  res = K6 * xx;
163*25c28e83SPiotr Jasiukajtis!  res += K5;
164*25c28e83SPiotr Jasiukajtis!  res *= xx;
165*25c28e83SPiotr Jasiukajtis!  res += K4;
166*25c28e83SPiotr Jasiukajtis!  res *= xx;
167*25c28e83SPiotr Jasiukajtis!  res += K3;
168*25c28e83SPiotr Jasiukajtis!  res *= xx;
169*25c28e83SPiotr Jasiukajtis!  res += K2;
170*25c28e83SPiotr Jasiukajtis!  res *= xx;
171*25c28e83SPiotr Jasiukajtis!  res += K1;
172*25c28e83SPiotr Jasiukajtis!  res *= xx;
173*25c28e83SPiotr Jasiukajtis!  res = dexp_hi * res;
174*25c28e83SPiotr Jasiukajtis!  res += dexp_lo;
175*25c28e83SPiotr Jasiukajtis!  res += dexp_hi;
176*25c28e83SPiotr Jasiukajtis!
177*25c28e83SPiotr Jasiukajtis!  res *= dlexp;
178*25c28e83SPiotr Jasiukajtis!
179*25c28e83SPiotr Jasiukajtis!  ((float*)py)[0] = ((float*)&res)[0];
180*25c28e83SPiotr Jasiukajtis!  ((float*)py)[1] = ((float*)&res)[1];
181*25c28e83SPiotr Jasiukajtis!
182*25c28e83SPiotr Jasiukajtis!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
183*25c28e83SPiotr Jasiukajtis
184*25c28e83SPiotr Jasiukajtis	ENTRY(__vrsqrt)
185*25c28e83SPiotr Jasiukajtis	save	%sp,-SA(MINFRAME)-tmps,%sp
186*25c28e83SPiotr Jasiukajtis	PIC_SETUP(l7)
187*25c28e83SPiotr Jasiukajtis	PIC_SET(l7,.CONST_TBL,o3)
188*25c28e83SPiotr Jasiukajtis	PIC_SET(l7,__vlibm_TBL_rsqrt,l3)
189*25c28e83SPiotr Jasiukajtis	wr	%g0,0x82,%asi
190*25c28e83SPiotr Jasiukajtis
191*25c28e83SPiotr Jasiukajtis	ldd	[%o3],K1
192*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ff00000),%o0
193*25c28e83SPiotr Jasiukajtis	mov	%i3,%o4
194*25c28e83SPiotr Jasiukajtis
195*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x08],K2
196*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00100000),%o1
197*25c28e83SPiotr Jasiukajtis	mov	%i1,tmp_px
198*25c28e83SPiotr Jasiukajtis
199*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x10],K3
200*25c28e83SPiotr Jasiukajtis	sll	%i2,3,stridex
201*25c28e83SPiotr Jasiukajtis	mov	%i0,tmp_counter
202*25c28e83SPiotr Jasiukajtis
203*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x18],K4
204*25c28e83SPiotr Jasiukajtis	sll	%i4,3,stridey
205*25c28e83SPiotr Jasiukajtis
206*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x20],K5
207*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x28],K6
208*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x30],DC0
209*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x38],DC1
210*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x40],DC2
211*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x48],DC3
212*25c28e83SPiotr Jasiukajtis
213*25c28e83SPiotr Jasiukajtis.begin:
214*25c28e83SPiotr Jasiukajtis	mov	tmp_counter,counter
215*25c28e83SPiotr Jasiukajtis	mov	tmp_px,%i1
216*25c28e83SPiotr Jasiukajtis	clr	tmp_counter
217*25c28e83SPiotr Jasiukajtis.begin1:
218*25c28e83SPiotr Jasiukajtis	cmp	counter,0
219*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.exit
220*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x60],DONE
221*25c28e83SPiotr Jasiukajtis
222*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%f0		! (6_0) ((float*)res)[0] = ((float*)px)[0];
223*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ffffc00),%i0
224*25c28e83SPiotr Jasiukajtis
225*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%f1		! (6_0) ((float*)res)[1] = ((float*)px)[1];
226*25c28e83SPiotr Jasiukajtis	add	%i0,1023,%i0
227*25c28e83SPiotr Jasiukajtis
228*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
229*25c28e83SPiotr Jasiukajtis
230*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%g1		! (6_1) hx = *(int*)px;
231*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%i4
232*25c28e83SPiotr Jasiukajtis
233*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%l4
234*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%l6		! px += stridex
235*25c28e83SPiotr Jasiukajtis
236*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
237*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f8		! (0_0) ((float*)res)[0] = ((float*)px)[0];
238*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (6_1) res = vis_for(res,DC1);
239*25c28e83SPiotr Jasiukajtis
240*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f9		! (0_0) ((float*)res)[1] = ((float*)px)[1];
241*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (6_1) hx >>= 10;
242*25c28e83SPiotr Jasiukajtis	and	%g1,%i0,%i2
243*25c28e83SPiotr Jasiukajtis
244*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (6_1) hx ? 0x7ff00000
245*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.spec0		! (6_1) if ( hx >= 0x7ff00000 )
246*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
247*25c28e83SPiotr Jasiukajtis
248*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (6_1) hx ? 0x00100000
249*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.spec1		! (6_1) if ( hx < 0x00100000 )
250*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
251*25c28e83SPiotr Jasiukajtis.cont_spec:
252*25c28e83SPiotr Jasiukajtis	fand	%f8,DC0,%f16		! (0_0) res = vis_fand(res,DC0);
253*25c28e83SPiotr Jasiukajtis
254*25c28e83SPiotr Jasiukajtis	fpadd32	%f44,DC2,%f18		! (6_1) res_c = vis_fpadd32(res,DC2);
255*25c28e83SPiotr Jasiukajtis
256*25c28e83SPiotr Jasiukajtis	add	%o2,8,%l4		! (6_1) hx += 8;
257*25c28e83SPiotr Jasiukajtis
258*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (6_1) iexp += 0x5fe;
259*25c28e83SPiotr Jasiukajtis
260*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (0_0) hx = *(int*)px;
261*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (6_1) iexp << 52;
262*25c28e83SPiotr Jasiukajtis	and	%l4,-16,%l4		! (6_1) hx = -16;
263*25c28e83SPiotr Jasiukajtis
264*25c28e83SPiotr Jasiukajtis	add	%l4,TBL,%l4		! (6_1) addr = (char*)arr + hx;
265*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp1]		! (6_1) dlexp = *(double*)lexp;
266*25c28e83SPiotr Jasiukajtis
267*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
268*25c28e83SPiotr Jasiukajtis	ldd	[%l4],%f30		! (6_1) dtmp0 = ((double*)addr)[0];
269*25c28e83SPiotr Jasiukajtis
270*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
271*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f0		! (1_0) ((float*)res)[0] = ((float*)px)[0];
272*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (0_0) res = vis_for(res,DC1);
273*25c28e83SPiotr Jasiukajtis
274*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (0_0) hx >>= 10;
275*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
276*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f1		! (1_0) ((float*)res)[1] = ((float*)px)[1];
277*25c28e83SPiotr Jasiukajtis
278*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (0_0) hx ? 0x7ff00000
279*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update0		! (0_0) if ( hx >= 0x7ff00000 )
280*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f6		! (6_1) res_c = vis_fand(res_c,DC3);
281*25c28e83SPiotr Jasiukajtis.cont0:
282*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (0_0) hx &= 0x7f8;
283*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f30,%f10		! (6_1) dtmp0 = dexp_hi * dexp_hi;
284*25c28e83SPiotr Jasiukajtis
285*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (0_0) hx ? 0x00100000
286*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update1		! (0_0) if ( hx < 0x00100000 )
287*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (0_0) iexp += 0x5fe;
288*25c28e83SPiotr Jasiukajtis.cont1:
289*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (1_0) res = vis_fand(res,DC0);
290*25c28e83SPiotr Jasiukajtis
291*25c28e83SPiotr Jasiukajtis	fpadd32	%f28,DC2,%f18		! (0_0) res_c = vis_fpadd32(res,DC2);
292*25c28e83SPiotr Jasiukajtis
293*25c28e83SPiotr Jasiukajtis	add	%o2,8,%l2		! (0_0) hx += 8;
294*25c28e83SPiotr Jasiukajtis	fsubd	%f44,%f6,%f6		! (6_1) xx = res - res_c;
295*25c28e83SPiotr Jasiukajtis
296*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (1_0) hx = *(int*)px;
297*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (0_0) iexp << 52;
298*25c28e83SPiotr Jasiukajtis	and	%l2,-16,%l2		! (0_0) hx = -16;
299*25c28e83SPiotr Jasiukajtis
300*25c28e83SPiotr Jasiukajtis	add	%l2,TBL,%l2		! (0_0) addr = (char*)arr + hx;
301*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
302*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp2]		! (0_0) dlexp = *(double*)lexp;
303*25c28e83SPiotr Jasiukajtis
304*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f10,%f26		! (6_1) xx *= dtmp0;
305*25c28e83SPiotr Jasiukajtis	ldd	[%l2],%f10		! (0_0) dtmp0 = ((double*)addr)[0];
306*25c28e83SPiotr Jasiukajtis
307*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (1_0) iexp = hx >> 21;
308*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f6		! (2_0) ((float*)res)[0] = ((float*)px)[0];
309*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (1_0) res = vis_for(res,DC1);
310*25c28e83SPiotr Jasiukajtis
311*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (1_0) hx >>= 10;
312*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (1_0) hx ? 0x7ff00000
313*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update2		! (1_0) if ( hx >= 0x7ff00000 )
314*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f7		! (2_0) ((float*)res)[1] = ((float*)px)[1];
315*25c28e83SPiotr Jasiukajtis.cont2:
316*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (0_0) res_c = vis_fand(res_c,DC3);
317*25c28e83SPiotr Jasiukajtis
318*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f10		! (0_0) dtmp0 = dexp_hi * dexp_hi;
319*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (1_0) hx ? 0x00100000
320*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update3		! (1_0) if ( hx < 0x00100000 )
321*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (1_0) hx &= 0x7f8;
322*25c28e83SPiotr Jasiukajtis.cont3:
323*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (1_0) iexp = -iexp;
324*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (2_0) res = vis_fand(res,DC0);
325*25c28e83SPiotr Jasiukajtis
326*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (1_0) iexp += 0x5fe;
327*25c28e83SPiotr Jasiukajtis	fpadd32	%f44,DC2,%f18		! (1_0) res_c = vis_fpadd32(res,DC2);
328*25c28e83SPiotr Jasiukajtis
329*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f26,%f62		! (6_1) res = K6 * xx;
330*25c28e83SPiotr Jasiukajtis	add	%o2,8,%i2		! (1_0) hx += 8;
331*25c28e83SPiotr Jasiukajtis	fsubd	%f28,%f8,%f32		! (0_0) xx = res - res_c;
332*25c28e83SPiotr Jasiukajtis
333*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (2_0) hx = *(int*)px;
334*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (1_0) iexp << 52;
335*25c28e83SPiotr Jasiukajtis	and	%i2,-16,%i2		! (1_0) hx = -16;
336*25c28e83SPiotr Jasiukajtis
337*25c28e83SPiotr Jasiukajtis	add	%i2,TBL,%i2		! (1_0) addr = (char*)arr + hx;
338*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp3]		! (1_0) dlexp = *(double*)lexp;
339*25c28e83SPiotr Jasiukajtis
340*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f10,%f32		! (0_0) xx *= dtmp0;
341*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
342*25c28e83SPiotr Jasiukajtis	ldd	[%i2],%f10		! (1_0) dtmp0 = ((double*)addr)[0];
343*25c28e83SPiotr Jasiukajtis	faddd	%f62,K5,%f62		! (6_1) res += K5;
344*25c28e83SPiotr Jasiukajtis
345*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (2_0) iexp = hx >> 21;
346*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f0		! (3_0) ((float*)res)[0] = ((float*)px)[0];
347*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (2_0) res = vis_for(res,DC1);
348*25c28e83SPiotr Jasiukajtis
349*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (2_0) hx >>= 10;
350*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (2_0) hx ? 0x7ff00000
351*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update4		! (2_0) if ( hx >= 0x7ff00000 )
352*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f1		! (3_0) ((float*)res)[1] = ((float*)px)[1];
353*25c28e83SPiotr Jasiukajtis.cont4:
354*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f26,%f40		! (6_1) res *= xx;
355*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (1_0) res_c = vis_fand(res_c,DC3);
356*25c28e83SPiotr Jasiukajtis
357*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f10,%f10		! (1_0) dtmp0 = dexp_hi * dexp_hi;
358*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (2_0) hx ? 0x00100000
359*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update5		! (2_0) if ( hx < 0x00100000 )
360*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (2_0) hx &= 0x7f8;
361*25c28e83SPiotr Jasiukajtis.cont5:
362*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (2_0) iexp = -iexp;
363*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (3_0) res = vis_fand(res,DC0);
364*25c28e83SPiotr Jasiukajtis
365*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (2_0) iexp += 0x5fe;
366*25c28e83SPiotr Jasiukajtis	fpadd32	%f28,DC2,%f18		! (2_0) res_c = vis_fpadd32(res,DC2);
367*25c28e83SPiotr Jasiukajtis
368*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f32,%f62		! (0_0) res = K6 * xx;
369*25c28e83SPiotr Jasiukajtis	add	%o2,8,%i4		! (2_0) hx += 8;
370*25c28e83SPiotr Jasiukajtis	fsubd	%f44,%f8,%f6		! (1_0) xx = res - res_c;
371*25c28e83SPiotr Jasiukajtis
372*25c28e83SPiotr Jasiukajtis	faddd	%f40,K4,%f40		! (6_1) res += K4;
373*25c28e83SPiotr Jasiukajtis
374*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (3_0) hx = *(int*)px;
375*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (2_0) iexp << 52;
376*25c28e83SPiotr Jasiukajtis	and	%i4,-16,%i4		! (2_0) hx = -16;
377*25c28e83SPiotr Jasiukajtis
378*25c28e83SPiotr Jasiukajtis	add	%i4,TBL,%i4		! (2_0) addr = (char*)arr + hx;
379*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp4]		! (2_0) dlexp = *(double*)lexp;
380*25c28e83SPiotr Jasiukajtis
381*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f10,%f38		! (1_0) xx *= dtmp0;
382*25c28e83SPiotr Jasiukajtis	ldd	[%i4],%f24		! (2_0) dtmp0 = ((double*)addr)[0];
383*25c28e83SPiotr Jasiukajtis	faddd	%f62,K5,%f62		! (0_0) res += K5;
384*25c28e83SPiotr Jasiukajtis
385*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f26,%f34		! (6_1) res *= xx;
386*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
387*25c28e83SPiotr Jasiukajtis
388*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (3_0) iexp = hx >> 21;
389*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f8		! (4_0) ((float*)res)[0] = ((float*)px)[0];
390*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (3_0) res = vis_for(res,DC1);
391*25c28e83SPiotr Jasiukajtis
392*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (3_0) hx >>= 10;
393*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (3_0) hx ? 0x7ff00000
394*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update6		! (3_0) if ( hx >= 0x7ff00000 )
395*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f9		! (4_0) ((float*)res)[1] = ((float*)px)[1];
396*25c28e83SPiotr Jasiukajtis.cont6:
397*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f32,%f60		! (0_0) res *= xx;
398*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (3_0) hx ? 0x00100000
399*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f22		! (2_0) res_c = vis_fand(res_c,DC3);
400*25c28e83SPiotr Jasiukajtis
401*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f24,%f24		! (2_0) dtmp0 = dexp_hi * dexp_hi;
402*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update7		! (3_0) if ( hx < 0x00100000 )
403*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (3_0) hx &= 0x7f8;
404*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f6		! (6_1) res += K3;
405*25c28e83SPiotr Jasiukajtis.cont7:
406*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (3_0) iexp = -iexp;
407*25c28e83SPiotr Jasiukajtis	fand	%f8,DC0,%f16		! (4_0) res = vis_fand(res,DC0);
408*25c28e83SPiotr Jasiukajtis
409*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (3_0) iexp += 0x5fe;
410*25c28e83SPiotr Jasiukajtis	fpadd32	%f44,DC2,%f18		! (3_0) res_c = vis_fpadd32(res,DC2);
411*25c28e83SPiotr Jasiukajtis
412*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f38,%f62		! (1_0) res = K6 * xx;
413*25c28e83SPiotr Jasiukajtis	add	%o2,8,%i5		! (3_0) hx += 8;
414*25c28e83SPiotr Jasiukajtis	fsubd	%f28,%f22,%f28		! (2_0) xx = res - res_c;
415*25c28e83SPiotr Jasiukajtis
416*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f26,%f22		! (6_1) res *= xx;
417*25c28e83SPiotr Jasiukajtis	faddd	%f60,K4,%f60		! (0_0) res += K4;
418*25c28e83SPiotr Jasiukajtis
419*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (4_0) hx = *(int*)px;
420*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (3_0) iexp << 52;
421*25c28e83SPiotr Jasiukajtis	and	%i5,-16,%i5		! (3_0) hx = -16;
422*25c28e83SPiotr Jasiukajtis
423*25c28e83SPiotr Jasiukajtis	add	%i5,TBL,%i5		! (3_0) addr = (char*)arr + hx;
424*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp5]		! (3_0) dlexp = *(double*)lexp;
425*25c28e83SPiotr Jasiukajtis
426*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f24,%f36		! (2_0) xx *= dtmp0;
427*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%i0		! px += stridex
428*25c28e83SPiotr Jasiukajtis	ldd	[%i5],%f28		! (3_0) dtmp0 = ((double*)addr)[0];
429*25c28e83SPiotr Jasiukajtis	faddd	%f62,K5,%f62		! (1_0) res += K5;
430*25c28e83SPiotr Jasiukajtis
431*25c28e83SPiotr Jasiukajtis	faddd	%f22,K2,%f10		! (6_1) res += K2;
432*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f32,%f34		! (0_0) res *= xx;
433*25c28e83SPiotr Jasiukajtis
434*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (4_0) iexp = hx >> 21;
435*25c28e83SPiotr Jasiukajtis	lda	[%i0]%asi,%f0		! (5_0) ((float*)res)[0] = ((float*)px)[0];
436*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f24		! (4_0) res = vis_for(res,DC1);
437*25c28e83SPiotr Jasiukajtis
438*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (4_0) hx >>= 10;
439*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (4_0) hx ? 0x7ff00000
440*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update8		! (4_0) if ( hx >= 0x7ff00000 )
441*25c28e83SPiotr Jasiukajtis	lda	[%i0+4]%asi,%f1		! (5_0) ((float*)res)[1] = ((float*)px)[1];
442*25c28e83SPiotr Jasiukajtis.cont8:
443*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f40		! (3_0) res_c = vis_fand(res_c,DC3);
444*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f38,%f62		! (1_0) res *= xx;
445*25c28e83SPiotr Jasiukajtis
446*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f26,%f58		! (6_1) res *= xx;
447*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (4_0) hx ? 0x00100000
448*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (4_0) hx &= 0x7f8;
449*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f60		! (0_0) res += K3;
450*25c28e83SPiotr Jasiukajtis
451*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f28,%f28		! (3_0) dtmp0 = dexp_hi * dexp_hi;
452*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update9		! (4_0) if ( hx < 0x00100000 )
453*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (4_0) iexp = -iexp;
454*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (5_0) res = vis_fand(res,DC0);
455*25c28e83SPiotr Jasiukajtis.cont9:
456*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (4_0) iexp += 0x5fe;
457*25c28e83SPiotr Jasiukajtis	fpadd32	%f24,DC2,%f18		! (4_0) res_c = vis_fpadd32(res,DC2);
458*25c28e83SPiotr Jasiukajtis
459*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f36,%f10		! (2_0) res = K6 * xx;
460*25c28e83SPiotr Jasiukajtis	add	%o2,8,%l1		! (4_0) hx += 8;
461*25c28e83SPiotr Jasiukajtis	fsubd	%f44,%f40,%f44		! (3_0) xx = res - res_c;
462*25c28e83SPiotr Jasiukajtis
463*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f32,%f60		! (0_0) res *= xx;
464*25c28e83SPiotr Jasiukajtis	faddd	%f62,K4,%f6		! (1_0) res += K4;
465*25c28e83SPiotr Jasiukajtis
466*25c28e83SPiotr Jasiukajtis	lda	[%i0]%asi,%g1		! (5_0) hx = *(int*)px;
467*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (4_0) iexp << 52;
468*25c28e83SPiotr Jasiukajtis	and	%l1,-16,%l1		! (4_0) hx = -16;
469*25c28e83SPiotr Jasiukajtis	faddd	%f58,K1,%f58		! (6_1) res += K1;
470*25c28e83SPiotr Jasiukajtis
471*25c28e83SPiotr Jasiukajtis	add	%i0,stridex,%i1		! px += stridex
472*25c28e83SPiotr Jasiukajtis	add	%l1,TBL,%l1		! (4_0) addr = (char*)arr + hx;
473*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp6]		! (4_0) dlexp = *(double*)lexp;
474*25c28e83SPiotr Jasiukajtis
475*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f28,%f40		! (3_0) xx *= dtmp0;
476*25c28e83SPiotr Jasiukajtis	ldd	[%l1],%f44		! (4_0) dtmp0 = ((double*)addr)[0];
477*25c28e83SPiotr Jasiukajtis	faddd	%f10,K5,%f62		! (2_0) res += K5;
478*25c28e83SPiotr Jasiukajtis
479*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f38,%f34		! (1_0) res *= xx;
480*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (5_0) iexp = hx >> 21;
481*25c28e83SPiotr Jasiukajtis	nop
482*25c28e83SPiotr Jasiukajtis	faddd	%f60,K2,%f60		! (0_0) res += K2;
483*25c28e83SPiotr Jasiukajtis
484*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (5_0) res = vis_for(res,DC1);
485*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (5_0) iexp = -iexp;
486*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%f6		! (6_0) ((float*)res)[0] = ((float*)px)[0];
487*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f26,%f26		! (6_1) res *= xx;
488*25c28e83SPiotr Jasiukajtis
489*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (5_0) hx >>= 10;
490*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (5_0) hx ? 0x7ff00000
491*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update10		! (5_0) if ( hx >= 0x7ff00000 )
492*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%f7		! (6_0) ((float*)res)[1] = ((float*)px)[1];
493*25c28e83SPiotr Jasiukajtis.cont10:
494*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (4_0) res_c = vis_fand(res_c,DC3);
495*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f36,%f62		! (2_0) res *= xx;
496*25c28e83SPiotr Jasiukajtis
497*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f32,%f58		! (0_0) res *= xx;
498*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (5_0) hx ? 0x00100000
499*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (5_0) hx &= 0x7f8;
500*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f34		! (1_0) res += K3;
501*25c28e83SPiotr Jasiukajtis
502*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f26,%f26		! (6_1) res = dexp_hi * res;
503*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update11		! (5_0) if ( hx < 0x00100000 )
504*25c28e83SPiotr Jasiukajtis	nop
505*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
506*25c28e83SPiotr Jasiukajtis.cont11:
507*25c28e83SPiotr Jasiukajtis	ldd	[%l4+8],%f60		! (6_1) dexp_lo = ((double*)addr)[1];
508*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f44,%f44		! (4_0) dtmp0 = dexp_hi * dexp_hi;
509*25c28e83SPiotr Jasiukajtis	fpadd32	%f28,DC2,%f18		! (5_0) res_c = vis_fpadd32(res,DC2);
510*25c28e83SPiotr Jasiukajtis
511*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f40,%f22		! (3_0) res = K6 * xx;
512*25c28e83SPiotr Jasiukajtis	add	%o2,8,%i3		! (5_0) hx += 8;
513*25c28e83SPiotr Jasiukajtis	fsubd	%f24,%f8,%f10		! (4_0) xx = res - res_c;
514*25c28e83SPiotr Jasiukajtis
515*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f38,%f24		! (1_0) res *= xx;
516*25c28e83SPiotr Jasiukajtis	or	%g0,%o4,%i0
517*25c28e83SPiotr Jasiukajtis
518*25c28e83SPiotr Jasiukajtis	cmp	counter,7
519*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.tail
520*25c28e83SPiotr Jasiukajtis	faddd	%f62,K4,%f34		! (2_0) res += K4;
521*25c28e83SPiotr Jasiukajtis
522*25c28e83SPiotr Jasiukajtis	ba	.main_loop
523*25c28e83SPiotr Jasiukajtis	sub	counter,7,counter	! counter
524*25c28e83SPiotr Jasiukajtis
525*25c28e83SPiotr Jasiukajtis	.align	16
526*25c28e83SPiotr Jasiukajtis.main_loop:
527*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (5_0) iexp += 0x5fe;
528*25c28e83SPiotr Jasiukajtis	and	%i3,-16,%i3		! (5_1) hx = -16;
529*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%g1		! (6_1) hx = *(int*)px;
530*25c28e83SPiotr Jasiukajtis	faddd	%f58,K1,%f58		! (0_1) res += K1;
531*25c28e83SPiotr Jasiukajtis
532*25c28e83SPiotr Jasiukajtis	add	%i3,TBL,%i3		! (5_1) addr = (char*)arr + hx;
533*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (5_1) iexp << 52;
534*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp0]		! (5_1) dlexp = *(double*)lexp;
535*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f60,%f8		! (6_2) res += dexp_lo;
536*25c28e83SPiotr Jasiukajtis
537*25c28e83SPiotr Jasiukajtis	faddd	%f22,K5,%f62		! (3_1) res += K5;
538*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%l6		! px += stridex
539*25c28e83SPiotr Jasiukajtis	ldd	[%i3],%f22		! (5_1) dtmp0 = ((double*)addr)[0];
540*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f44,%f60		! (4_1) xx *= dtmp0;
541*25c28e83SPiotr Jasiukajtis
542*25c28e83SPiotr Jasiukajtis	faddd	%f24,K2,%f26		! (1_1) res += K2;
543*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i1		! px += stridey
544*25c28e83SPiotr Jasiukajtis	ldd	[%l2],%f24		! (0_1) dexp_hi = ((double*)addr)[0];
545*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f36,%f34		! (2_1) res *= xx;
546*25c28e83SPiotr Jasiukajtis
547*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f32,%f58		! (0_1) res *= xx;
548*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
549*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f0		! (0_0) ((float*)res)[0] = ((float*)px)[0];
550*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (6_1) res = vis_for(res,DC1);
551*25c28e83SPiotr Jasiukajtis
552*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f1		! (0_0) ((float*)res)[1] = ((float*)px)[1];
553*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (6_1) hx >>= 10;
554*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f22,%f10		! (5_1) dtmp0 = dexp_hi * dexp_hi;
555*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f30,%f30		! (6_2) res += dexp_hi;
556*25c28e83SPiotr Jasiukajtis
557*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f40,%f32		! (3_1) res *= xx;
558*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (6_1) hx ? 0x7ff00000
559*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp1],%f62		! (6_2) dlexp = *(double*)lexp;
560*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (5_1) res_c = vis_fand(res_c,DC3);
561*25c28e83SPiotr Jasiukajtis
562*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f38,%f26		! (1_1) res *= xx;
563*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update12		! (6_1) if ( hx >= 0x7ff00000 )
564*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
565*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f34		! (2_1) res += K3;
566*25c28e83SPiotr Jasiukajtis.cont12:
567*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f58,%f58		! (0_1) res = dexp_hi * res;
568*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (6_1) hx ? 0x00100000
569*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
570*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (0_0) res = vis_fand(res,DC0);
571*25c28e83SPiotr Jasiukajtis
572*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f62,%f2		! (6_2) res *= dlexp;
573*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update13		! (6_1) if ( hx < 0x00100000 )
574*25c28e83SPiotr Jasiukajtis	ldd	[%l2+8],%f30		! (0_1) dexp_lo = ((double*)addr)[1];
575*25c28e83SPiotr Jasiukajtis	fpadd32	%f44,DC2,%f18		! (6_1) res_c = vis_fpadd32(res,DC2);
576*25c28e83SPiotr Jasiukajtis.cont13:
577*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f60,%f62		! (4_1) res = K6 * xx;
578*25c28e83SPiotr Jasiukajtis	add	%o2,8,%l4		! (6_1) hx += 8;
579*25c28e83SPiotr Jasiukajtis	st	%f2,[%i0]		! (6_2) ((float*)py)[0] = ((float*)res)[0];
580*25c28e83SPiotr Jasiukajtis	fsubd	%f28,%f8,%f6		! (5_1) xx = res - res_c;
581*25c28e83SPiotr Jasiukajtis
582*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f36,%f28		! (2_1) res *= xx;
583*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (6_1) iexp += 0x5fe;
584*25c28e83SPiotr Jasiukajtis	st	%f3,[%i0+4]		! (6_2) ((float*)py)[1] = ((float*)res)[1];
585*25c28e83SPiotr Jasiukajtis	faddd	%f32,K4,%f32		! (3_1) res += K4;
586*25c28e83SPiotr Jasiukajtis
587*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (0_0) hx = *(int*)px;
588*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (6_1) iexp << 52;
589*25c28e83SPiotr Jasiukajtis	and	%l4,-16,%l4		! (6_1) hx = -16;
590*25c28e83SPiotr Jasiukajtis	faddd	%f26,K1,%f26		! (1_1) res += K1;
591*25c28e83SPiotr Jasiukajtis
592*25c28e83SPiotr Jasiukajtis	add	%i1,stridey,%i0		! px += stridey
593*25c28e83SPiotr Jasiukajtis	add	%l4,TBL,%l4		! (6_1) addr = (char*)arr + hx;
594*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp1]		! (6_1) dlexp = *(double*)lexp;
595*25c28e83SPiotr Jasiukajtis	faddd	%f58,%f30,%f8		! (0_1) res += dexp_lo;
596*25c28e83SPiotr Jasiukajtis
597*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f10,%f58		! (5_1) xx *= dtmp0;
598*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
599*25c28e83SPiotr Jasiukajtis	ldd	[%l4],%f30		! (6_1) dtmp0 = ((double*)addr)[0];
600*25c28e83SPiotr Jasiukajtis	faddd	%f62,K5,%f62		! (4_1) res += K5;
601*25c28e83SPiotr Jasiukajtis
602*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f40,%f34		! (3_1) res *= xx;
603*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (0_0) hx >>= 10;
604*25c28e83SPiotr Jasiukajtis	ldd	[%i2],%f4		! (1_1) dexp_hi = ((double*)addr)[0];
605*25c28e83SPiotr Jasiukajtis	faddd	%f28,K2,%f32		! (2_1) res += K2;
606*25c28e83SPiotr Jasiukajtis
607*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f38,%f26		! (1_1) res *= xx;
608*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
609*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f6		! (1_0) ((float*)res)[0] = ((float*)px)[0];
610*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (0_0) res = vis_for(res,DC1);
611*25c28e83SPiotr Jasiukajtis
612*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f30,%f30		! (6_1) dtmp0 = dexp_hi * dexp_hi;
613*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
614*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f7		! (1_0) ((float*)res)[1] = ((float*)px)[1];
615*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f24,%f24		! (0_1) res += dexp_hi;
616*25c28e83SPiotr Jasiukajtis
617*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f60,%f38		! (4_1) res *= xx;
618*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (0_0) hx ? 0x7ff00000
619*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp2],%f62		! (0_1) dlexp = *(double*)lexp;
620*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (6_1) res_c = vis_fand(res_c,DC3);
621*25c28e83SPiotr Jasiukajtis
622*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f36,%f32		! (2_1) res *= xx;
623*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update14		! (0_0) if ( hx >= 0x7ff00000 )
624*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (0_0) hx &= 0x7f8;
625*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f34		! (3_1) res += K3;
626*25c28e83SPiotr Jasiukajtis.cont14:
627*25c28e83SPiotr Jasiukajtis	fmuld	%f4,%f26,%f26		! (1_1) res = dexp_hi * res;
628*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (0_0) hx ? 0x00100000
629*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (0_0) iexp += 0x5fe;
630*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (1_0) res = vis_fand(res,DC0);
631*25c28e83SPiotr Jasiukajtis
632*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f62,%f2		! (0_1) res *= dlexp;
633*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update15		! (0_0) if ( hx < 0x00100000 )
634*25c28e83SPiotr Jasiukajtis	ldd	[%i2+8],%f24		! (1_1) dexp_lo = ((double*)addr)[1];
635*25c28e83SPiotr Jasiukajtis	fpadd32	%f28,DC2,%f18		! (0_0) res_c = vis_fpadd32(res,DC2);
636*25c28e83SPiotr Jasiukajtis.cont15:
637*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f58,%f62		! (5_1) res = K6 * xx;
638*25c28e83SPiotr Jasiukajtis	add	%o2,8,%l2		! (0_0) hx += 8;
639*25c28e83SPiotr Jasiukajtis	st	%f2,[%i1]		! (0_1) ((float*)py)[0] = ((float*)res)[0];
640*25c28e83SPiotr Jasiukajtis	fsubd	%f44,%f8,%f10		! (6_1) xx = res - res_c;
641*25c28e83SPiotr Jasiukajtis
642*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f40,%f44		! (3_1) res *= xx;
643*25c28e83SPiotr Jasiukajtis	nop
644*25c28e83SPiotr Jasiukajtis	st	%f3,[%i1+4]		! (0_1) ((float*)py)[1] = ((float*)res)[1];
645*25c28e83SPiotr Jasiukajtis	faddd	%f38,K4,%f38		! (4_1) res += K4;
646*25c28e83SPiotr Jasiukajtis
647*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (1_0) hx = *(int*)px;
648*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (0_0) iexp << 52;
649*25c28e83SPiotr Jasiukajtis	and	%l2,-16,%l2		! (0_0) hx = -16;
650*25c28e83SPiotr Jasiukajtis	faddd	%f32,K1,%f32		! (2_1) res += K1;
651*25c28e83SPiotr Jasiukajtis
652*25c28e83SPiotr Jasiukajtis	add	%l2,TBL,%l2		! (0_0) addr = (char*)arr + hx;
653*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
654*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp2]		! (0_0) dlexp = *(double*)lexp;
655*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f24,%f8		! (1_1) res += dexp_lo;
656*25c28e83SPiotr Jasiukajtis
657*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f30,%f26		! (6_1) xx *= dtmp0;
658*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i1		! px += stridey
659*25c28e83SPiotr Jasiukajtis	ldd	[%l2],%f30		! (0_0) dtmp0 = ((double*)addr)[0];
660*25c28e83SPiotr Jasiukajtis	faddd	%f62,K5,%f62		! (5_1) res += K5;
661*25c28e83SPiotr Jasiukajtis
662*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f60,%f34		! (4_1) res *= xx;
663*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (1_0) hx >>= 10;
664*25c28e83SPiotr Jasiukajtis	ldd	[%i4],%f24		! (2_1) dexp_hi = ((double*)addr)[0];
665*25c28e83SPiotr Jasiukajtis	faddd	%f44,K2,%f38		! (3_1) res += K2;
666*25c28e83SPiotr Jasiukajtis
667*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f36,%f32		! (2_1) res *= xx;
668*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (1_0) iexp = hx >> 21;
669*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f0		! (2_0) ((float*)res)[0] = ((float*)px)[0];
670*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (1_0) res = vis_for(res,DC1);
671*25c28e83SPiotr Jasiukajtis
672*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f30,%f30		! (0_0) dtmp0 = dexp_hi * dexp_hi;
673*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (1_0) hx ? 0x7ff00000
674*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f1		! (2_0) ((float*)res)[1] = ((float*)px)[1];
675*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f4,%f4		! (1_1) res += dexp_hi;
676*25c28e83SPiotr Jasiukajtis
677*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f58,%f36		! (5_1) res *= xx;
678*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update16		! (1_0) if ( hx >= 0x7ff00000 )
679*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp3],%f62		! (1_1) dlexp = *(double*)lexp;
680*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (0_0) res_c = vis_fand(res_c,DC3);
681*25c28e83SPiotr Jasiukajtis.cont16:
682*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f40,%f38		! (3_1) res *= xx;
683*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (1_0) hx ? 0x00100000
684*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (1_0) hx &= 0x7f8;
685*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f34		! (4_1) res += K3;
686*25c28e83SPiotr Jasiukajtis
687*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f32,%f32		! (2_1) res = dexp_hi * res;
688*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update17		! (1_0) if ( hx < 0x00100000 )
689*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (1_0) iexp = -iexp;
690*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (2_0) res = vis_fand(res,DC0);
691*25c28e83SPiotr Jasiukajtis.cont17:
692*25c28e83SPiotr Jasiukajtis	fmuld	%f4,%f62,%f2		! (1_1) res *= dlexp;
693*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (1_0) iexp += 0x5fe;
694*25c28e83SPiotr Jasiukajtis	ldd	[%i4+8],%f4		! (2_1) dexp_lo = ((double*)addr)[1];
695*25c28e83SPiotr Jasiukajtis	fpadd32	%f44,DC2,%f18		! (1_0) res_c = vis_fpadd32(res,DC2);
696*25c28e83SPiotr Jasiukajtis
697*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f26,%f62		! (6_1) res = K6 * xx;
698*25c28e83SPiotr Jasiukajtis	add	%o2,8,%i2		! (1_0) hx += 8;
699*25c28e83SPiotr Jasiukajtis	st	%f2,[%i0]		! (1_1) ((float*)py)[0] = ((float*)res)[0];
700*25c28e83SPiotr Jasiukajtis	fsubd	%f28,%f8,%f6		! (0_0) xx = res - res_c;
701*25c28e83SPiotr Jasiukajtis
702*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f60,%f28		! (4_1) res *= xx;
703*25c28e83SPiotr Jasiukajtis	nop
704*25c28e83SPiotr Jasiukajtis	st	%f3,[%i0+4]		! (1_1) ((float*)py)[1] = ((float*)res)[1];
705*25c28e83SPiotr Jasiukajtis	faddd	%f36,K4,%f36		! (5_1) res += K4;
706*25c28e83SPiotr Jasiukajtis
707*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (2_0) hx = *(int*)px;
708*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (1_0) iexp << 52;
709*25c28e83SPiotr Jasiukajtis	and	%i2,-16,%i2		! (1_0) hx = -16;
710*25c28e83SPiotr Jasiukajtis	faddd	%f38,K1,%f38		! (3_1) res += K1;
711*25c28e83SPiotr Jasiukajtis
712*25c28e83SPiotr Jasiukajtis	add	%i1,stridey,%i0		! px += stridey
713*25c28e83SPiotr Jasiukajtis	add	%i2,TBL,%i2		! (1_0) addr = (char*)arr + hx;
714*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp3]		! (1_0) dlexp = *(double*)lexp;
715*25c28e83SPiotr Jasiukajtis	faddd	%f32,%f4,%f8		! (2_1) res += dexp_lo;
716*25c28e83SPiotr Jasiukajtis
717*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f30,%f32		! (0_0) xx *= dtmp0;
718*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
719*25c28e83SPiotr Jasiukajtis	ldd	[%i2],%f30		! (1_0) dtmp0 = ((double*)addr)[0];
720*25c28e83SPiotr Jasiukajtis	faddd	%f62,K5,%f62		! (6_1) res += K5;
721*25c28e83SPiotr Jasiukajtis
722*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f58,%f34		! (5_1) res *= xx;
723*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (2_0) hx >>= 10;
724*25c28e83SPiotr Jasiukajtis	ldd	[%i5],%f4		! (3_1) dexp_hi = ((double*)addr)[0];
725*25c28e83SPiotr Jasiukajtis	faddd	%f28,K2,%f36		! (4_1) res += K2;
726*25c28e83SPiotr Jasiukajtis
727*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f40,%f38		! (3_1) res *= xx;
728*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (2_0) iexp = hx >> 21;
729*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f6		! (3_0) ((float*)res)[0] = ((float*)px)[0];
730*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (2_0) res = vis_for(res,DC1);
731*25c28e83SPiotr Jasiukajtis
732*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f30,%f30		! (1_0) dtmp0 = dexp_hi * dexp_hi;
733*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (2_0) hx ? 0x7ff00000
734*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f7		! (3_0) ((float*)res)[1] = ((float*)px)[1];
735*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f24,%f24		! (2_1) res += dexp_hi;
736*25c28e83SPiotr Jasiukajtis
737*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f26,%f40		! (6_1) res *= xx;
738*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update18		! (2_0) if ( hx >= 0x7ff00000 )
739*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp4],%f62		! (2_1) dlexp = *(double*)lexp;
740*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (1_0) res_c = vis_fand(res_c,DC3);
741*25c28e83SPiotr Jasiukajtis.cont18:
742*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f60,%f36		! (4_1) res *= xx;
743*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (2_0) hx ? 0x00100000
744*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (2_0) hx &= 0x7f8;
745*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f34		! (5_1) res += K3;
746*25c28e83SPiotr Jasiukajtis
747*25c28e83SPiotr Jasiukajtis	fmuld	%f4,%f38,%f38		! (3_1) res = dexp_hi * res;
748*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update19		! (2_0) if ( hx < 0x00100000 )
749*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (2_0) iexp = -iexp;
750*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (3_0) res = vis_fand(res,DC0);
751*25c28e83SPiotr Jasiukajtis.cont19:
752*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f62,%f2		! (2_1) res *= dlexp;
753*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (2_0) iexp += 0x5fe;
754*25c28e83SPiotr Jasiukajtis	ldd	[%i5+8],%f24		! (3_1) dexp_lo = ((double*)addr)[1];
755*25c28e83SPiotr Jasiukajtis	fpadd32	%f28,DC2,%f18		! (2_0) res_c = vis_fpadd32(res,DC2);
756*25c28e83SPiotr Jasiukajtis
757*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f32,%f62		! (0_0) res = K6 * xx;
758*25c28e83SPiotr Jasiukajtis	add	%o2,8,%i4		! (2_0) hx += 8;
759*25c28e83SPiotr Jasiukajtis	st	%f2,[%i1]		! (2_1) ((float*)py)[0] = ((float*)res)[0];
760*25c28e83SPiotr Jasiukajtis	fsubd	%f44,%f8,%f10		! (1_0) xx = res - res_c;
761*25c28e83SPiotr Jasiukajtis
762*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f58,%f44		! (5_1) res *= xx;
763*25c28e83SPiotr Jasiukajtis	nop
764*25c28e83SPiotr Jasiukajtis	st	%f3,[%i1+4]		! (2_1) ((float*)py)[1] = ((float*)res)[1];
765*25c28e83SPiotr Jasiukajtis	faddd	%f40,K4,%f40		! (6_1) res += K4;
766*25c28e83SPiotr Jasiukajtis
767*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (3_0) hx = *(int*)px;
768*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (2_0) iexp << 52;
769*25c28e83SPiotr Jasiukajtis	and	%i4,-16,%i4		! (2_0) hx = -16;
770*25c28e83SPiotr Jasiukajtis	faddd	%f36,K1,%f36		! (4_1) res += K1;
771*25c28e83SPiotr Jasiukajtis
772*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
773*25c28e83SPiotr Jasiukajtis	add	%i4,TBL,%i4		! (2_0) addr = (char*)arr + hx;
774*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp4]		! (2_0) dlexp = *(double*)lexp;
775*25c28e83SPiotr Jasiukajtis	faddd	%f38,%f24,%f8		! (3_1) res += dexp_lo;
776*25c28e83SPiotr Jasiukajtis
777*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f30,%f38		! (1_0) xx *= dtmp0;
778*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i1		! px += stridey
779*25c28e83SPiotr Jasiukajtis	ldd	[%i4],%f24		! (2_0) dtmp0 = ((double*)addr)[0];
780*25c28e83SPiotr Jasiukajtis	faddd	%f62,K5,%f62		! (0_0) res += K5;
781*25c28e83SPiotr Jasiukajtis
782*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f26,%f34		! (6_1) res *= xx;
783*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (3_0) hx >>= 10;
784*25c28e83SPiotr Jasiukajtis	ldd	[%l1],%f30		! (4_1) dexp_hi = ((double*)addr)[0];
785*25c28e83SPiotr Jasiukajtis	faddd	%f44,K2,%f40		! (5_1) res += K2;
786*25c28e83SPiotr Jasiukajtis
787*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f60,%f36		! (4_1) res *= xx;
788*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (3_0) iexp = hx >> 21;
789*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%f0		! (4_0) ((float*)res)[0] = ((float*)px)[0];
790*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (3_0) res = vis_for(res,DC1);
791*25c28e83SPiotr Jasiukajtis
792*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f24,%f24		! (2_0) dtmp0 = dexp_hi * dexp_hi;
793*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (3_0) hx ? 0x7ff00000
794*25c28e83SPiotr Jasiukajtis	lda	[%l6+4]%asi,%f1		! (4_0) ((float*)res)[1] = ((float*)px)[1];
795*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f4,%f8		! (3_1) res += dexp_hi;
796*25c28e83SPiotr Jasiukajtis
797*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f32,%f60		! (0_0) res *= xx;
798*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update20		! (3_0) if ( hx >= 0x7ff00000 )
799*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp5],%f62		! (3_1) dlexp = *(double*)lexp;
800*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f4		! (2_0) res_c = vis_fand(res_c,DC3);
801*25c28e83SPiotr Jasiukajtis.cont20:
802*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f58,%f40		! (5_1) res *= xx;
803*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (3_0) hx ? 0x00100000
804*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (3_0) hx &= 0x7f8;
805*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f10		! (6_1) res += K3;
806*25c28e83SPiotr Jasiukajtis
807*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f36,%f36		! (4_1) res = dexp_hi * res;
808*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update21		! (3_0) if ( hx < 0x00100000 )
809*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (3_0) iexp = -iexp;
810*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (4_0) res = vis_fand(res,DC0);
811*25c28e83SPiotr Jasiukajtis.cont21:
812*25c28e83SPiotr Jasiukajtis	fmuld	%f8,%f62,%f8		! (3_1) res *= dlexp;
813*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (3_0) iexp += 0x5fe;
814*25c28e83SPiotr Jasiukajtis	ldd	[%l1+8],%f34		! (4_1) dexp_lo = ((double*)addr)[1];
815*25c28e83SPiotr Jasiukajtis	fpadd32	%f44,DC2,%f18		! (3_0) res_c = vis_fpadd32(res,DC2);
816*25c28e83SPiotr Jasiukajtis
817*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f38,%f62		! (1_0) res = K6 * xx;
818*25c28e83SPiotr Jasiukajtis	add	%o2,8,%i5		! (3_0) hx += 8;
819*25c28e83SPiotr Jasiukajtis	st	%f8,[%i0]		! (3_1) ((float*)py)[0] = ((float*)res)[0];
820*25c28e83SPiotr Jasiukajtis	fsubd	%f28,%f4,%f28		! (2_0) xx = res - res_c;
821*25c28e83SPiotr Jasiukajtis
822*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f26,%f4		! (6_1) res *= xx;
823*25c28e83SPiotr Jasiukajtis	nop
824*25c28e83SPiotr Jasiukajtis	st	%f9,[%i0+4]		! (3_1) ((float*)py)[1] = ((float*)res)[1];
825*25c28e83SPiotr Jasiukajtis	faddd	%f60,K4,%f60		! (0_0) res += K4;
826*25c28e83SPiotr Jasiukajtis
827*25c28e83SPiotr Jasiukajtis	lda	[%l6]%asi,%g1		! (4_0) hx = *(int*)px;
828*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (3_0) iexp << 52;
829*25c28e83SPiotr Jasiukajtis	and	%i5,-16,%i5		! (3_0) hx = -16;
830*25c28e83SPiotr Jasiukajtis	faddd	%f40,K1,%f40		! (5_1) res += K1;
831*25c28e83SPiotr Jasiukajtis
832*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%i0		! px += stridex
833*25c28e83SPiotr Jasiukajtis	add	%i5,TBL,%i5		! (3_0) addr = (char*)arr + hx;
834*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp5]		! (3_0) dlexp = *(double*)lexp;
835*25c28e83SPiotr Jasiukajtis	faddd	%f36,%f34,%f8		! (4_1) res += dexp_lo;
836*25c28e83SPiotr Jasiukajtis
837*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f24,%f36		! (2_0) xx *= dtmp0;
838*25c28e83SPiotr Jasiukajtis	add	%i1,stridey,%l6		! px += stridey
839*25c28e83SPiotr Jasiukajtis	ldd	[%i5],%f28		! (3_0) dtmp0 = ((double*)addr)[0];
840*25c28e83SPiotr Jasiukajtis	faddd	%f62,K5,%f62		! (1_0) res += K5;
841*25c28e83SPiotr Jasiukajtis
842*25c28e83SPiotr Jasiukajtis	faddd	%f4,K2,%f10		! (6_1) res += K2;
843*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (4_0) hx >>= 10;
844*25c28e83SPiotr Jasiukajtis	nop
845*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f32,%f34		! (0_0) res *= xx;
846*25c28e83SPiotr Jasiukajtis
847*25c28e83SPiotr Jasiukajtis	fmuld	%f40,%f58,%f40		! (5_1) res *= xx;
848*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (4_0) iexp = hx >> 21;
849*25c28e83SPiotr Jasiukajtis	lda	[%i0]%asi,%f6		! (5_0) ((float*)res)[0] = ((float*)px)[0];
850*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f24		! (4_0) res = vis_for(res,DC1);
851*25c28e83SPiotr Jasiukajtis
852*25c28e83SPiotr Jasiukajtis	fmuld	%f28,%f28,%f28		! (3_0) dtmp0 = dexp_hi * dexp_hi;
853*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (4_0) hx ? 0x7ff00000
854*25c28e83SPiotr Jasiukajtis	lda	[%i0+4]%asi,%f7		! (5_0) ((float*)res)[1] = ((float*)px)[1];
855*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f30,%f30		! (4_1) res += dexp_hi;
856*25c28e83SPiotr Jasiukajtis
857*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (3_0) res_c = vis_fand(res_c,DC3);
858*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update22		! (4_0) if ( hx >= 0x7ff00000 )
859*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp6],%f18		! (4_1) dlexp = *(double*)lexp;
860*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f38,%f62		! (1_0) res *= xx;
861*25c28e83SPiotr Jasiukajtis.cont22:
862*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f26,%f58		! (6_1) res *= xx;
863*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (4_0) hx ? 0x00100000
864*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (4_0) hx &= 0x7f8;
865*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f60		! (0_0) res += K3;
866*25c28e83SPiotr Jasiukajtis
867*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f40,%f40		! (5_1) res = dexp_hi * res;
868*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update23		! (4_0) if ( hx < 0x00100000 )
869*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (4_0) iexp = -iexp;
870*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (5_0) res = vis_fand(res,DC0);
871*25c28e83SPiotr Jasiukajtis.cont23:
872*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f18,%f6		! (4_1) res *= dlexp;
873*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (4_0) iexp += 0x5fe;
874*25c28e83SPiotr Jasiukajtis	ldd	[%i3+8],%f34		! (5_1) dexp_lo = ((double*)addr)[1];
875*25c28e83SPiotr Jasiukajtis	fpadd32	%f24,DC2,%f18		! (4_0) res_c = vis_fpadd32(res,DC2);
876*25c28e83SPiotr Jasiukajtis
877*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f36,%f30		! (2_0) res = K6 * xx;
878*25c28e83SPiotr Jasiukajtis	add	%o2,8,%l1		! (4_0) hx += 8;
879*25c28e83SPiotr Jasiukajtis	st	%f6,[%i1]		! (4_1) ((float*)py)[0] = ((float*)res)[0];
880*25c28e83SPiotr Jasiukajtis	fsubd	%f44,%f8,%f44		! (3_0) xx = res - res_c;
881*25c28e83SPiotr Jasiukajtis
882*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f32,%f60		! (0_0) res *= xx;
883*25c28e83SPiotr Jasiukajtis	sllx	%o7,52,%o7		! (4_0) iexp << 52;
884*25c28e83SPiotr Jasiukajtis	st	%f7,[%i1+4]		! (4_1) ((float*)py)[1] = ((float*)res)[1];
885*25c28e83SPiotr Jasiukajtis	faddd	%f62,K4,%f6		! (1_0) res += K4;
886*25c28e83SPiotr Jasiukajtis
887*25c28e83SPiotr Jasiukajtis	lda	[%i0]%asi,%g1		! (5_0) hx = *(int*)px;
888*25c28e83SPiotr Jasiukajtis	add	%i0,stridex,%i1		! px += stridex
889*25c28e83SPiotr Jasiukajtis	and	%l1,-16,%l1		! (4_0) hx = -16;
890*25c28e83SPiotr Jasiukajtis	faddd	%f58,K1,%f58		! (6_1) res += K1;
891*25c28e83SPiotr Jasiukajtis
892*25c28e83SPiotr Jasiukajtis	add	%l1,TBL,%l1		! (4_0) addr = (char*)arr + hx;
893*25c28e83SPiotr Jasiukajtis	add	%l6,stridey,%i0		! px += stridey
894*25c28e83SPiotr Jasiukajtis	stx	%o7,[%fp+tmp6]		! (4_0) dlexp = *(double*)lexp;
895*25c28e83SPiotr Jasiukajtis	faddd	%f40,%f34,%f8		! (5_1) res += dexp_lo;
896*25c28e83SPiotr Jasiukajtis
897*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f28,%f40		! (3_0) xx *= dtmp0;
898*25c28e83SPiotr Jasiukajtis	nop
899*25c28e83SPiotr Jasiukajtis	ldd	[%l1],%f44		! (4_0) dtmp0 = ((double*)addr)[0];
900*25c28e83SPiotr Jasiukajtis	faddd	%f30,K5,%f62		! (2_0) res += K5;
901*25c28e83SPiotr Jasiukajtis
902*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f38,%f34		! (1_0) res *= xx;
903*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (5_0) iexp = hx >> 21;
904*25c28e83SPiotr Jasiukajtis	ldd	[%l4],%f30		! (6_1) dexp_hi = ((double*)addr)[0];
905*25c28e83SPiotr Jasiukajtis	faddd	%f60,K2,%f60		! (0_0) res += K2;
906*25c28e83SPiotr Jasiukajtis
907*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (5_0) res = vis_for(res,DC1);
908*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (5_0) iexp = -iexp;
909*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%f6		! (6_0) ((float*)res)[0] = ((float*)px)[0];
910*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f26,%f26		! (6_1) res *= xx;
911*25c28e83SPiotr Jasiukajtis
912*25c28e83SPiotr Jasiukajtis	fmuld	%f44,%f44,%f44		! (4_0) dtmp0 = dexp_hi * dexp_hi;
913*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x7ff00000		! (5_0) hx ? 0x7ff00000
914*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%f7		! (6_0) ((float*)res)[1] = ((float*)px)[1];
915*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f22,%f22		! (5_1) res += dexp_hi;
916*25c28e83SPiotr Jasiukajtis
917*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (4_0) res_c = vis_fand(res_c,DC3);
918*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.update24		! (5_0) if ( hx >= 0x7ff00000 )
919*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp0],%f18		! (5_1) dlexp = *(double*)lexp;
920*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f36,%f62		! (2_0) res *= xx;
921*25c28e83SPiotr Jasiukajtis.cont24:
922*25c28e83SPiotr Jasiukajtis	fmuld	%f60,%f32,%f58		! (0_0) res *= xx;
923*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (5_0) hx >>= 10;
924*25c28e83SPiotr Jasiukajtis	cmp	%g1,_0x00100000		! (5_0) hx ? 0x00100000
925*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f34		! (1_0) res += K3;
926*25c28e83SPiotr Jasiukajtis
927*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f26,%f26		! (6_1) res = dexp_hi * res;
928*25c28e83SPiotr Jasiukajtis	bl,pn	%icc,.update25		! (5_0) if ( hx < 0x00100000 )
929*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (5_0) hx &= 0x7f8;
930*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
931*25c28e83SPiotr Jasiukajtis.cont25:
932*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f18,%f2		! (5_1) res *= dlexp;
933*25c28e83SPiotr Jasiukajtis	subcc	counter,7,counter	! counter -= 7;
934*25c28e83SPiotr Jasiukajtis	ldd	[%l4+8],%f60		! (6_1) dexp_lo = ((double*)addr)[1];
935*25c28e83SPiotr Jasiukajtis	fpadd32	%f28,DC2,%f18		! (5_0) res_c = vis_fpadd32(res,DC2);
936*25c28e83SPiotr Jasiukajtis
937*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f40,%f22		! (3_0) res = K6 * xx;
938*25c28e83SPiotr Jasiukajtis	add	%o2,8,%i3		! (5_0) hx += 8;
939*25c28e83SPiotr Jasiukajtis	st	%f2,[%l6]		! (5_1) ((float*)py)[0] = ((float*)res)[0];
940*25c28e83SPiotr Jasiukajtis	fsubd	%f24,%f8,%f10		! (4_0) xx = res - res_c;
941*25c28e83SPiotr Jasiukajtis
942*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f38,%f24		! (1_0) res *= xx;
943*25c28e83SPiotr Jasiukajtis	st	%f3,[%l6+4]		! (5_1) ((float*)py)[1] = ((float*)res)[1];
944*25c28e83SPiotr Jasiukajtis	bpos,pt	%icc,.main_loop
945*25c28e83SPiotr Jasiukajtis	faddd	%f62,K4,%f34		! (2_0) res += K4;
946*25c28e83SPiotr Jasiukajtis
947*25c28e83SPiotr Jasiukajtis	add	counter,7,counter
948*25c28e83SPiotr Jasiukajtis.tail:
949*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (5_0) iexp += 0x5fe;
950*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
951*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
952*25c28e83SPiotr Jasiukajtis	mov	%i0,%o4
953*25c28e83SPiotr Jasiukajtis
954*25c28e83SPiotr Jasiukajtis	faddd	%f58,K1,%f58		! (0_1) res += K1;
955*25c28e83SPiotr Jasiukajtis
956*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f60,%f8		! (6_2) res += dexp_lo;
957*25c28e83SPiotr Jasiukajtis
958*25c28e83SPiotr Jasiukajtis	faddd	%f22,K5,%f62		! (3_1) res += K5;
959*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f44,%f60		! (4_1) xx *= dtmp0;
960*25c28e83SPiotr Jasiukajtis
961*25c28e83SPiotr Jasiukajtis	faddd	%f24,K2,%f26		! (1_1) res += K2;
962*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%l6		! px += stridex
963*25c28e83SPiotr Jasiukajtis	ldd	[%l2],%f24		! (0_1) dexp_hi = ((double*)addr)[0];
964*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f36,%f34		! (2_1) res *= xx;
965*25c28e83SPiotr Jasiukajtis
966*25c28e83SPiotr Jasiukajtis	fmuld	%f58,%f32,%f58		! (0_1) res *= xx;
967*25c28e83SPiotr Jasiukajtis
968*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i1		! px += stridey
969*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f30,%f30		! (6_2) res += dexp_hi;
970*25c28e83SPiotr Jasiukajtis
971*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f40,%f32		! (3_1) res *= xx;
972*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp1],%f62		! (6_2) dlexp = *(double*)lexp;
973*25c28e83SPiotr Jasiukajtis
974*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f38,%f26		! (1_1) res *= xx;
975*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f34		! (2_1) res += K3;
976*25c28e83SPiotr Jasiukajtis
977*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f58,%f58		! (0_1) res = dexp_hi * res;
978*25c28e83SPiotr Jasiukajtis
979*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f62,%f2		! (6_2) res *= dlexp;
980*25c28e83SPiotr Jasiukajtis	ldd	[%l2+8],%f30		! (0_1) dexp_lo = ((double*)addr)[1];
981*25c28e83SPiotr Jasiukajtis
982*25c28e83SPiotr Jasiukajtis	fmuld	K6,%f60,%f62		! (4_1) res = K6 * xx;
983*25c28e83SPiotr Jasiukajtis	st	%f2,[%i0]		! (6_2) ((float*)py)[0] = ((float*)res)[0];
984*25c28e83SPiotr Jasiukajtis
985*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f36,%f28		! (2_1) res *= xx;
986*25c28e83SPiotr Jasiukajtis	st	%f3,[%i0+4]		! (6_2) ((float*)py)[1] = ((float*)res)[1];
987*25c28e83SPiotr Jasiukajtis	faddd	%f32,K4,%f32		! (3_1) res += K4;
988*25c28e83SPiotr Jasiukajtis
989*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
990*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
991*25c28e83SPiotr Jasiukajtis	mov	%i1,%o4
992*25c28e83SPiotr Jasiukajtis
993*25c28e83SPiotr Jasiukajtis	faddd	%f26,K1,%f26		! (1_1) res += K1;
994*25c28e83SPiotr Jasiukajtis
995*25c28e83SPiotr Jasiukajtis	faddd	%f58,%f30,%f8		! (0_1) res += dexp_lo;
996*25c28e83SPiotr Jasiukajtis
997*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
998*25c28e83SPiotr Jasiukajtis	faddd	%f62,K5,%f62		! (4_1) res += K5;
999*25c28e83SPiotr Jasiukajtis
1000*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f40,%f34		! (3_1) res *= xx;
1001*25c28e83SPiotr Jasiukajtis	add	%i1,stridey,%i0		! px += stridey
1002*25c28e83SPiotr Jasiukajtis	ldd	[%i2],%f22		! (1_1) dexp_hi = ((double*)addr)[0];
1003*25c28e83SPiotr Jasiukajtis	faddd	%f28,K2,%f32		! (2_1) res += K2;
1004*25c28e83SPiotr Jasiukajtis
1005*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f38,%f26		! (1_1) res *= xx;
1006*25c28e83SPiotr Jasiukajtis
1007*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f24,%f24		! (0_1) res += dexp_hi;
1008*25c28e83SPiotr Jasiukajtis
1009*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f60,%f38		! (4_1) res *= xx;
1010*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp2],%f62		! (0_1) dlexp = *(double*)lexp;
1011*25c28e83SPiotr Jasiukajtis
1012*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f36,%f32		! (2_1) res *= xx;
1013*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f34		! (3_1) res += K3;
1014*25c28e83SPiotr Jasiukajtis
1015*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f26,%f26		! (1_1) res = dexp_hi * res;
1016*25c28e83SPiotr Jasiukajtis
1017*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f62,%f2		! (0_1) res *= dlexp;
1018*25c28e83SPiotr Jasiukajtis	ldd	[%i2+8],%f24		! (1_1) dexp_lo = ((double*)addr)[1];
1019*25c28e83SPiotr Jasiukajtis
1020*25c28e83SPiotr Jasiukajtis	st	%f2,[%i1]		! (0_1) ((float*)py)[0] = ((float*)res)[0];
1021*25c28e83SPiotr Jasiukajtis
1022*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f40,%f44		! (3_1) res *= xx;
1023*25c28e83SPiotr Jasiukajtis	st	%f3,[%i1+4]		! (0_1) ((float*)py)[1] = ((float*)res)[1];
1024*25c28e83SPiotr Jasiukajtis	faddd	%f38,K4,%f38		! (4_1) res += K4;
1025*25c28e83SPiotr Jasiukajtis
1026*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
1027*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
1028*25c28e83SPiotr Jasiukajtis	mov	%i0,%o4
1029*25c28e83SPiotr Jasiukajtis
1030*25c28e83SPiotr Jasiukajtis	faddd	%f32,K1,%f32		! (2_1) res += K1;
1031*25c28e83SPiotr Jasiukajtis
1032*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
1033*25c28e83SPiotr Jasiukajtis	faddd	%f26,%f24,%f8		! (1_1) res += dexp_lo;
1034*25c28e83SPiotr Jasiukajtis
1035*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i1		! px += stridey
1036*25c28e83SPiotr Jasiukajtis
1037*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f60,%f34		! (4_1) res *= xx;
1038*25c28e83SPiotr Jasiukajtis	ldd	[%i4],%f24		! (2_1) dexp_hi = ((double*)addr)[0];
1039*25c28e83SPiotr Jasiukajtis	faddd	%f44,K2,%f38		! (3_1) res += K2;
1040*25c28e83SPiotr Jasiukajtis
1041*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f36,%f32		! (2_1) res *= xx;
1042*25c28e83SPiotr Jasiukajtis
1043*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f22,%f22		! (1_1) res += dexp_hi;
1044*25c28e83SPiotr Jasiukajtis
1045*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp3],%f62		! (1_1) dlexp = *(double*)lexp;
1046*25c28e83SPiotr Jasiukajtis
1047*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f40,%f38		! (3_1) res *= xx;
1048*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f34		! (4_1) res += K3;
1049*25c28e83SPiotr Jasiukajtis
1050*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f32,%f32		! (2_1) res = dexp_hi * res;
1051*25c28e83SPiotr Jasiukajtis
1052*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f62,%f2		! (1_1) res *= dlexp;
1053*25c28e83SPiotr Jasiukajtis	ldd	[%i4+8],%f22		! (2_1) dexp_lo = ((double*)addr)[1];
1054*25c28e83SPiotr Jasiukajtis
1055*25c28e83SPiotr Jasiukajtis	st	%f2,[%i0]		! (1_1) ((float*)py)[0] = ((float*)res)[0];
1056*25c28e83SPiotr Jasiukajtis
1057*25c28e83SPiotr Jasiukajtis	fmuld	%f34,%f60,%f28		! (4_1) res *= xx;
1058*25c28e83SPiotr Jasiukajtis	st	%f3,[%i0+4]		! (1_1) ((float*)py)[1] = ((float*)res)[1];
1059*25c28e83SPiotr Jasiukajtis
1060*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
1061*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
1062*25c28e83SPiotr Jasiukajtis	mov	%i1,%o4
1063*25c28e83SPiotr Jasiukajtis
1064*25c28e83SPiotr Jasiukajtis	faddd	%f38,K1,%f38		! (3_1) res += K1;
1065*25c28e83SPiotr Jasiukajtis
1066*25c28e83SPiotr Jasiukajtis	faddd	%f32,%f22,%f8		! (2_1) res += dexp_lo;
1067*25c28e83SPiotr Jasiukajtis
1068*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
1069*25c28e83SPiotr Jasiukajtis
1070*25c28e83SPiotr Jasiukajtis	add	%i1,stridey,%i0		! px += stridey
1071*25c28e83SPiotr Jasiukajtis	ldd	[%i5],%f22		! (3_1) dexp_hi = ((double*)addr)[0];
1072*25c28e83SPiotr Jasiukajtis	faddd	%f28,K2,%f36		! (4_1) res += K2;
1073*25c28e83SPiotr Jasiukajtis
1074*25c28e83SPiotr Jasiukajtis	fmuld	%f38,%f40,%f38		! (3_1) res *= xx;
1075*25c28e83SPiotr Jasiukajtis
1076*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f24,%f24		! (2_1) res += dexp_hi;
1077*25c28e83SPiotr Jasiukajtis
1078*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp4],%f62		! (2_1) dlexp = *(double*)lexp;
1079*25c28e83SPiotr Jasiukajtis
1080*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f60,%f36		! (4_1) res *= xx;
1081*25c28e83SPiotr Jasiukajtis
1082*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f38,%f38		! (3_1) res = dexp_hi * res;
1083*25c28e83SPiotr Jasiukajtis
1084*25c28e83SPiotr Jasiukajtis	fmuld	%f24,%f62,%f2		! (2_1) res *= dlexp;
1085*25c28e83SPiotr Jasiukajtis	ldd	[%i5+8],%f24		! (3_1) dexp_lo = ((double*)addr)[1];
1086*25c28e83SPiotr Jasiukajtis
1087*25c28e83SPiotr Jasiukajtis	st	%f2,[%i1]		! (2_1) ((float*)py)[0] = ((float*)res)[0];
1088*25c28e83SPiotr Jasiukajtis
1089*25c28e83SPiotr Jasiukajtis	st	%f3,[%i1+4]		! (2_1) ((float*)py)[1] = ((float*)res)[1];
1090*25c28e83SPiotr Jasiukajtis
1091*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
1092*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
1093*25c28e83SPiotr Jasiukajtis	mov	%i0,%o4
1094*25c28e83SPiotr Jasiukajtis
1095*25c28e83SPiotr Jasiukajtis	faddd	%f36,K1,%f36		! (4_1) res += K1;
1096*25c28e83SPiotr Jasiukajtis
1097*25c28e83SPiotr Jasiukajtis	faddd	%f38,%f24,%f8		! (3_1) res += dexp_lo;
1098*25c28e83SPiotr Jasiukajtis
1099*25c28e83SPiotr Jasiukajtis	add	%i0,stridey,%i1		! px += stridey
1100*25c28e83SPiotr Jasiukajtis
1101*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%l6		! px += stridex
1102*25c28e83SPiotr Jasiukajtis	ldd	[%l1],%f30		! (4_1) dexp_hi = ((double*)addr)[0];
1103*25c28e83SPiotr Jasiukajtis
1104*25c28e83SPiotr Jasiukajtis	fmuld	%f36,%f60,%f36		! (4_1) res *= xx;
1105*25c28e83SPiotr Jasiukajtis
1106*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f22,%f8		! (3_1) res += dexp_hi;
1107*25c28e83SPiotr Jasiukajtis
1108*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp5],%f62		! (3_1) dlexp = *(double*)lexp;
1109*25c28e83SPiotr Jasiukajtis
1110*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f36,%f36		! (4_1) res = dexp_hi * res;
1111*25c28e83SPiotr Jasiukajtis
1112*25c28e83SPiotr Jasiukajtis	fmuld	%f8,%f62,%f8		! (3_1) res *= dlexp;
1113*25c28e83SPiotr Jasiukajtis	ldd	[%l1+8],%f34		! (4_1) dexp_lo = ((double*)addr)[1];
1114*25c28e83SPiotr Jasiukajtis
1115*25c28e83SPiotr Jasiukajtis	st	%f8,[%i0]		! (3_1) ((float*)py)[0] = ((float*)res)[0];
1116*25c28e83SPiotr Jasiukajtis
1117*25c28e83SPiotr Jasiukajtis	st	%f9,[%i0+4]		! (3_1) ((float*)py)[1] = ((float*)res)[1];
1118*25c28e83SPiotr Jasiukajtis
1119*25c28e83SPiotr Jasiukajtis	subcc	counter,1,counter
1120*25c28e83SPiotr Jasiukajtis	bneg,a	.begin
1121*25c28e83SPiotr Jasiukajtis	mov	%i1,%o4
1122*25c28e83SPiotr Jasiukajtis
1123*25c28e83SPiotr Jasiukajtis	faddd	%f36,%f34,%f8		! (4_1) res += dexp_lo;
1124*25c28e83SPiotr Jasiukajtis
1125*25c28e83SPiotr Jasiukajtis	add	%l6,stridex,%i0		! px += stridex
1126*25c28e83SPiotr Jasiukajtis
1127*25c28e83SPiotr Jasiukajtis	add	%i1,stridey,%l6		! px += stridey
1128*25c28e83SPiotr Jasiukajtis
1129*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f30,%f30		! (4_1) res += dexp_hi;
1130*25c28e83SPiotr Jasiukajtis
1131*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp6],%f18		! (4_1) dlexp = *(double*)lexp;
1132*25c28e83SPiotr Jasiukajtis
1133*25c28e83SPiotr Jasiukajtis	fmuld	%f30,%f18,%f6		! (4_1) res *= dlexp;
1134*25c28e83SPiotr Jasiukajtis
1135*25c28e83SPiotr Jasiukajtis	st	%f6,[%i1]		! (4_1) ((float*)py)[0] = ((float*)res)[0];
1136*25c28e83SPiotr Jasiukajtis
1137*25c28e83SPiotr Jasiukajtis	st	%f7,[%i1+4]		! (4_1) ((float*)py)[1] = ((float*)res)[1];
1138*25c28e83SPiotr Jasiukajtis
1139*25c28e83SPiotr Jasiukajtis	ba	.begin
1140*25c28e83SPiotr Jasiukajtis	add	%i1,stridey,%o4
1141*25c28e83SPiotr Jasiukajtis
1142*25c28e83SPiotr Jasiukajtis	.align	16
1143*25c28e83SPiotr Jasiukajtis.spec0:
1144*25c28e83SPiotr Jasiukajtis	fdivd	DONE,%f0,%f0		! res = DONE / res;
1145*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
1146*25c28e83SPiotr Jasiukajtis	st	%f0,[%o4]		! ((float*)py)[0] = ((float*)&res)[0];
1147*25c28e83SPiotr Jasiukajtis	st	%f1,[%o4+4]		! ((float*)py)[1] = ((float*)&res)[1];
1148*25c28e83SPiotr Jasiukajtis	add	%o4,stridey,%o4		! py += stridey
1149*25c28e83SPiotr Jasiukajtis	ba	.begin1
1150*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
1151*25c28e83SPiotr Jasiukajtis
1152*25c28e83SPiotr Jasiukajtis	.align	16
1153*25c28e83SPiotr Jasiukajtis.spec1:
1154*25c28e83SPiotr Jasiukajtis	orcc	%i2,%l4,%g0
1155*25c28e83SPiotr Jasiukajtis	bz,a	2f
1156*25c28e83SPiotr Jasiukajtis	fdivd	DONE,%f0,%f0		! res = DONE / res;
1157*25c28e83SPiotr Jasiukajtis
1158*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1159*25c28e83SPiotr Jasiukajtis	bl,a	2f
1160*25c28e83SPiotr Jasiukajtis	fsqrtd	%f0,%f0			! res = sqrt(res);
1161*25c28e83SPiotr Jasiukajtis
1162*25c28e83SPiotr Jasiukajtis	cmp	%g1,%i4
1163*25c28e83SPiotr Jasiukajtis	bge,a	1f
1164*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f18
1165*25c28e83SPiotr Jasiukajtis
1166*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
1167*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp0]
1168*25c28e83SPiotr Jasiukajtis
1169*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
1170*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp0],%g1
1171*25c28e83SPiotr Jasiukajtis
1172*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
1173*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (6_1) res = vis_for(res,DC1);
1174*25c28e83SPiotr Jasiukajtis
1175*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (6_1) hx >>= 10;
1176*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1177*25c28e83SPiotr Jasiukajtis
1178*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
1179*25c28e83SPiotr Jasiukajtis	ba	.cont_spec
1180*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
1181*25c28e83SPiotr Jasiukajtis
1182*25c28e83SPiotr Jasiukajtis1:
1183*25c28e83SPiotr Jasiukajtis	fand	%f0,%f18,%f0		! res = vis_fand(res,DC4);
1184*25c28e83SPiotr Jasiukajtis
1185*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f28
1186*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
1187*25c28e83SPiotr Jasiukajtis
1188*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f28,%f0		! res += D2ON51;
1189*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp0]
1190*25c28e83SPiotr Jasiukajtis
1191*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
1192*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp0],%g1
1193*25c28e83SPiotr Jasiukajtis
1194*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
1195*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (6_1) res = vis_for(res,DC1);
1196*25c28e83SPiotr Jasiukajtis
1197*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (6_1) hx >>= 10;
1198*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1199*25c28e83SPiotr Jasiukajtis
1200*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
1201*25c28e83SPiotr Jasiukajtis	ba	.cont_spec
1202*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
1203*25c28e83SPiotr Jasiukajtis
1204*25c28e83SPiotr Jasiukajtis2:
1205*25c28e83SPiotr Jasiukajtis	add	%i1,stridex,%i1		! px += stridex
1206*25c28e83SPiotr Jasiukajtis	st	%f0,[%o4]		! ((float*)py)[0] = ((float*)&res)[0];
1207*25c28e83SPiotr Jasiukajtis	st	%f1,[%o4+4]		! ((float*)py)[1] = ((float*)&res)[1];
1208*25c28e83SPiotr Jasiukajtis	add	%o4,stridey,%o4		! py += stridey
1209*25c28e83SPiotr Jasiukajtis	ba	.begin1
1210*25c28e83SPiotr Jasiukajtis	sub	counter,1,counter
1211*25c28e83SPiotr Jasiukajtis
1212*25c28e83SPiotr Jasiukajtis	.align	16
1213*25c28e83SPiotr Jasiukajtis.update0:
1214*25c28e83SPiotr Jasiukajtis	cmp	counter,1
1215*25c28e83SPiotr Jasiukajtis	ble	.cont0
1216*25c28e83SPiotr Jasiukajtis	nop
1217*25c28e83SPiotr Jasiukajtis
1218*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1219*25c28e83SPiotr Jasiukajtis	sub	counter,1,tmp_counter
1220*25c28e83SPiotr Jasiukajtis
1221*25c28e83SPiotr Jasiukajtis	ba	.cont0
1222*25c28e83SPiotr Jasiukajtis	mov	1,counter
1223*25c28e83SPiotr Jasiukajtis
1224*25c28e83SPiotr Jasiukajtis	.align	16
1225*25c28e83SPiotr Jasiukajtis.update1:
1226*25c28e83SPiotr Jasiukajtis	cmp	counter,1
1227*25c28e83SPiotr Jasiukajtis	ble	.cont1
1228*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,%i1
1229*25c28e83SPiotr Jasiukajtis
1230*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%i2
1231*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1232*25c28e83SPiotr Jasiukajtis	bl	1f
1233*25c28e83SPiotr Jasiukajtis
1234*25c28e83SPiotr Jasiukajtis	orcc	%g1,%i2,%g0
1235*25c28e83SPiotr Jasiukajtis	bz	1f
1236*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%i3
1237*25c28e83SPiotr Jasiukajtis
1238*25c28e83SPiotr Jasiukajtis	cmp	%g1,%i3
1239*25c28e83SPiotr Jasiukajtis	bge,a	2f
1240*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f18
1241*25c28e83SPiotr Jasiukajtis
1242*25c28e83SPiotr Jasiukajtis	fxtod	%f8,%f8			! res = *(long long*)&res;
1243*25c28e83SPiotr Jasiukajtis	st	%f8,[%fp+tmp7]
1244*25c28e83SPiotr Jasiukajtis
1245*25c28e83SPiotr Jasiukajtis	fand	%f8,DC0,%f16		! (0_0) res = vis_fand(res,DC0);
1246*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1247*25c28e83SPiotr Jasiukajtis
1248*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
1249*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (0_0) hx >>= 10;
1250*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (0_0) res = vis_for(res,DC1);
1251*25c28e83SPiotr Jasiukajtis
1252*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1253*25c28e83SPiotr Jasiukajtis
1254*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
1255*25c28e83SPiotr Jasiukajtis
1256*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (0_0) hx &= 0x7f8;
1257*25c28e83SPiotr Jasiukajtis	ba	.cont1
1258*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (0_0) iexp += 0x5fe;
1259*25c28e83SPiotr Jasiukajtis2:
1260*25c28e83SPiotr Jasiukajtis	fand	%f8,%f18,%f8
1261*25c28e83SPiotr Jasiukajtis	fxtod	%f8,%f8			! res = *(long long*)&res;
1262*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f18
1263*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f18,%f8
1264*25c28e83SPiotr Jasiukajtis	st	%f8,[%fp+tmp7]
1265*25c28e83SPiotr Jasiukajtis
1266*25c28e83SPiotr Jasiukajtis	fand	%f8,DC0,%f16		! (0_0) res = vis_fand(res,DC0);
1267*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1268*25c28e83SPiotr Jasiukajtis
1269*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
1270*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (0_0) hx >>= 10;
1271*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (0_0) res = vis_for(res,DC1);
1272*25c28e83SPiotr Jasiukajtis
1273*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1274*25c28e83SPiotr Jasiukajtis
1275*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
1276*25c28e83SPiotr Jasiukajtis
1277*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (0_0) hx &= 0x7f8;
1278*25c28e83SPiotr Jasiukajtis	ba	.cont1
1279*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (0_0) iexp += 0x5fe;
1280*25c28e83SPiotr Jasiukajtis1:
1281*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1282*25c28e83SPiotr Jasiukajtis	sub	counter,1,tmp_counter
1283*25c28e83SPiotr Jasiukajtis
1284*25c28e83SPiotr Jasiukajtis	ba	.cont1
1285*25c28e83SPiotr Jasiukajtis	mov	1,counter
1286*25c28e83SPiotr Jasiukajtis
1287*25c28e83SPiotr Jasiukajtis	.align	16
1288*25c28e83SPiotr Jasiukajtis.update2:
1289*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1290*25c28e83SPiotr Jasiukajtis	ble	.cont2
1291*25c28e83SPiotr Jasiukajtis	nop
1292*25c28e83SPiotr Jasiukajtis
1293*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1294*25c28e83SPiotr Jasiukajtis	sub	counter,2,tmp_counter
1295*25c28e83SPiotr Jasiukajtis
1296*25c28e83SPiotr Jasiukajtis	ba	.cont2
1297*25c28e83SPiotr Jasiukajtis	mov	2,counter
1298*25c28e83SPiotr Jasiukajtis
1299*25c28e83SPiotr Jasiukajtis	.align	16
1300*25c28e83SPiotr Jasiukajtis.update3:
1301*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1302*25c28e83SPiotr Jasiukajtis	ble	.cont3
1303*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,%i1
1304*25c28e83SPiotr Jasiukajtis
1305*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%i2
1306*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1307*25c28e83SPiotr Jasiukajtis	bl	1f
1308*25c28e83SPiotr Jasiukajtis
1309*25c28e83SPiotr Jasiukajtis	orcc	%g1,%i2,%g0
1310*25c28e83SPiotr Jasiukajtis	bz	1f
1311*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%i3
1312*25c28e83SPiotr Jasiukajtis
1313*25c28e83SPiotr Jasiukajtis	cmp	%g1,%i3
1314*25c28e83SPiotr Jasiukajtis	bge,a	2f
1315*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f18
1316*25c28e83SPiotr Jasiukajtis
1317*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
1318*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp7]
1319*25c28e83SPiotr Jasiukajtis
1320*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (1_0) res = vis_fand(res,DC0);
1321*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1322*25c28e83SPiotr Jasiukajtis
1323*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (1_0) iexp = hx >> 21;
1324*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (1_0) res = vis_for(res,DC1);
1325*25c28e83SPiotr Jasiukajtis
1326*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (1_0) hx >>= 10;
1327*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1328*25c28e83SPiotr Jasiukajtis	ba	.cont3
1329*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (1_0) hx &= 0x7f8;
1330*25c28e83SPiotr Jasiukajtis2:
1331*25c28e83SPiotr Jasiukajtis	fand	%f0,%f18,%f0
1332*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
1333*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f18
1334*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f18,%f0
1335*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp7]
1336*25c28e83SPiotr Jasiukajtis
1337*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (1_0) res = vis_fand(res,DC0);
1338*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1339*25c28e83SPiotr Jasiukajtis
1340*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (1_0) iexp = hx >> 21;
1341*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (1_0) res = vis_for(res,DC1);
1342*25c28e83SPiotr Jasiukajtis
1343*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (1_0) hx >>= 10;
1344*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1345*25c28e83SPiotr Jasiukajtis	ba	.cont3
1346*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (1_0) hx &= 0x7f8;
1347*25c28e83SPiotr Jasiukajtis1:
1348*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1349*25c28e83SPiotr Jasiukajtis	sub	counter,2,tmp_counter
1350*25c28e83SPiotr Jasiukajtis
1351*25c28e83SPiotr Jasiukajtis	ba	.cont3
1352*25c28e83SPiotr Jasiukajtis	mov	2,counter
1353*25c28e83SPiotr Jasiukajtis
1354*25c28e83SPiotr Jasiukajtis	.align	16
1355*25c28e83SPiotr Jasiukajtis.update4:
1356*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1357*25c28e83SPiotr Jasiukajtis	ble	.cont4
1358*25c28e83SPiotr Jasiukajtis	nop
1359*25c28e83SPiotr Jasiukajtis
1360*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1361*25c28e83SPiotr Jasiukajtis	sub	counter,3,tmp_counter
1362*25c28e83SPiotr Jasiukajtis
1363*25c28e83SPiotr Jasiukajtis	ba	.cont4
1364*25c28e83SPiotr Jasiukajtis	mov	3,counter
1365*25c28e83SPiotr Jasiukajtis
1366*25c28e83SPiotr Jasiukajtis	.align	16
1367*25c28e83SPiotr Jasiukajtis.update5:
1368*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1369*25c28e83SPiotr Jasiukajtis	ble	.cont5
1370*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,%i1
1371*25c28e83SPiotr Jasiukajtis
1372*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%i3
1373*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1374*25c28e83SPiotr Jasiukajtis	bl	1f
1375*25c28e83SPiotr Jasiukajtis
1376*25c28e83SPiotr Jasiukajtis	orcc	%g1,%i3,%g0
1377*25c28e83SPiotr Jasiukajtis	bz	1f
1378*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%i4
1379*25c28e83SPiotr Jasiukajtis
1380*25c28e83SPiotr Jasiukajtis	cmp	%g1,%i4
1381*25c28e83SPiotr Jasiukajtis	bge,a	2f
1382*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f18
1383*25c28e83SPiotr Jasiukajtis
1384*25c28e83SPiotr Jasiukajtis	fxtod	%f6,%f6			! res = *(long long*)&res;
1385*25c28e83SPiotr Jasiukajtis	st	%f6,[%fp+tmp7]
1386*25c28e83SPiotr Jasiukajtis
1387*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (2_0) res = vis_fand(res,DC0);
1388*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1389*25c28e83SPiotr Jasiukajtis
1390*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (2_0) iexp = hx >> 21;
1391*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (2_0) hx >>= 10;
1392*25c28e83SPiotr Jasiukajtis
1393*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1394*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (2_0) hx &= 0x7f8;
1395*25c28e83SPiotr Jasiukajtis	ba	.cont5
1396*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (2_0) res = vis_for(res,DC1);
1397*25c28e83SPiotr Jasiukajtis2:
1398*25c28e83SPiotr Jasiukajtis	fand	%f6,%f18,%f6
1399*25c28e83SPiotr Jasiukajtis	fxtod	%f6,%f6			! res = *(long long*)&res;
1400*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f18
1401*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f18,%f6
1402*25c28e83SPiotr Jasiukajtis	st	%f6,[%fp+tmp7]
1403*25c28e83SPiotr Jasiukajtis
1404*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (2_0) res = vis_fand(res,DC0);
1405*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1406*25c28e83SPiotr Jasiukajtis
1407*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (2_0) iexp = hx >> 21;
1408*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (2_0) hx >>= 10;
1409*25c28e83SPiotr Jasiukajtis
1410*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1411*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (2_0) hx &= 0x7f8;
1412*25c28e83SPiotr Jasiukajtis	ba	.cont5
1413*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f28		! (2_0) res = vis_for(res,DC1);
1414*25c28e83SPiotr Jasiukajtis1:
1415*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1416*25c28e83SPiotr Jasiukajtis	sub	counter,3,tmp_counter
1417*25c28e83SPiotr Jasiukajtis
1418*25c28e83SPiotr Jasiukajtis	ba	.cont5
1419*25c28e83SPiotr Jasiukajtis	mov	3,counter
1420*25c28e83SPiotr Jasiukajtis
1421*25c28e83SPiotr Jasiukajtis	.align	16
1422*25c28e83SPiotr Jasiukajtis.update6:
1423*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1424*25c28e83SPiotr Jasiukajtis	ble	.cont6
1425*25c28e83SPiotr Jasiukajtis	nop
1426*25c28e83SPiotr Jasiukajtis
1427*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1428*25c28e83SPiotr Jasiukajtis	sub	counter,4,tmp_counter
1429*25c28e83SPiotr Jasiukajtis
1430*25c28e83SPiotr Jasiukajtis	ba	.cont6
1431*25c28e83SPiotr Jasiukajtis	mov	4,counter
1432*25c28e83SPiotr Jasiukajtis
1433*25c28e83SPiotr Jasiukajtis	.align	16
1434*25c28e83SPiotr Jasiukajtis.update7:
1435*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,%i1
1436*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1437*25c28e83SPiotr Jasiukajtis	ble	.cont7
1438*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f6		! (6_1) res += K3;
1439*25c28e83SPiotr Jasiukajtis
1440*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%i3
1441*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1442*25c28e83SPiotr Jasiukajtis	bl	1f
1443*25c28e83SPiotr Jasiukajtis
1444*25c28e83SPiotr Jasiukajtis	orcc	%g1,%i3,%g0
1445*25c28e83SPiotr Jasiukajtis	bz	1f
1446*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%i5
1447*25c28e83SPiotr Jasiukajtis
1448*25c28e83SPiotr Jasiukajtis	cmp	%g1,%i5
1449*25c28e83SPiotr Jasiukajtis	bge,a	2f
1450*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f18
1451*25c28e83SPiotr Jasiukajtis
1452*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
1453*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp7]
1454*25c28e83SPiotr Jasiukajtis
1455*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (3_0) res = vis_fand(res,DC0);
1456*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1457*25c28e83SPiotr Jasiukajtis
1458*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (3_0) iexp = hx >> 21;
1459*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (3_0) hx >>= 10;
1460*25c28e83SPiotr Jasiukajtis
1461*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1462*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (3_0) hx &= 0x7f8;
1463*25c28e83SPiotr Jasiukajtis	ba	.cont7
1464*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (3_0) res = vis_for(res,DC1);
1465*25c28e83SPiotr Jasiukajtis2:
1466*25c28e83SPiotr Jasiukajtis	fand	%f0,%f18,%f0
1467*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
1468*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f18
1469*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f18,%f0
1470*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp7]
1471*25c28e83SPiotr Jasiukajtis
1472*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (3_0) res = vis_fand(res,DC0);
1473*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1474*25c28e83SPiotr Jasiukajtis
1475*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (3_0) iexp = hx >> 21;
1476*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (3_0) hx >>= 10;
1477*25c28e83SPiotr Jasiukajtis
1478*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1479*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (3_0) hx &= 0x7f8;
1480*25c28e83SPiotr Jasiukajtis	ba	.cont7
1481*25c28e83SPiotr Jasiukajtis	for	%f16,DC1,%f44		! (3_0) res = vis_for(res,DC1);
1482*25c28e83SPiotr Jasiukajtis1:
1483*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1484*25c28e83SPiotr Jasiukajtis	sub	counter,4,tmp_counter
1485*25c28e83SPiotr Jasiukajtis
1486*25c28e83SPiotr Jasiukajtis	ba	.cont7
1487*25c28e83SPiotr Jasiukajtis	mov	4,counter
1488*25c28e83SPiotr Jasiukajtis
1489*25c28e83SPiotr Jasiukajtis	.align	16
1490*25c28e83SPiotr Jasiukajtis.update8:
1491*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1492*25c28e83SPiotr Jasiukajtis	ble	.cont8
1493*25c28e83SPiotr Jasiukajtis	nop
1494*25c28e83SPiotr Jasiukajtis
1495*25c28e83SPiotr Jasiukajtis	mov	%l6,tmp_px
1496*25c28e83SPiotr Jasiukajtis	sub	counter,5,tmp_counter
1497*25c28e83SPiotr Jasiukajtis
1498*25c28e83SPiotr Jasiukajtis	ba	.cont8
1499*25c28e83SPiotr Jasiukajtis	mov	5,counter
1500*25c28e83SPiotr Jasiukajtis
1501*25c28e83SPiotr Jasiukajtis	.align	16
1502*25c28e83SPiotr Jasiukajtis.update9:
1503*25c28e83SPiotr Jasiukajtis	ld	[%l6+4],%i3
1504*25c28e83SPiotr Jasiukajtis	cmp	counter,5
1505*25c28e83SPiotr Jasiukajtis	ble	.cont9
1506*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (5_0) res = vis_fand(res,DC0);
1507*25c28e83SPiotr Jasiukajtis
1508*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1509*25c28e83SPiotr Jasiukajtis	bl	1f
1510*25c28e83SPiotr Jasiukajtis
1511*25c28e83SPiotr Jasiukajtis	orcc	%g1,%i3,%g0
1512*25c28e83SPiotr Jasiukajtis	bz	1f
1513*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%i1
1514*25c28e83SPiotr Jasiukajtis
1515*25c28e83SPiotr Jasiukajtis	cmp	%g1,%i1
1516*25c28e83SPiotr Jasiukajtis	bge,a	2f
1517*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f18
1518*25c28e83SPiotr Jasiukajtis
1519*25c28e83SPiotr Jasiukajtis	fxtod	%f8,%f8			! res = *(long long*)&res;
1520*25c28e83SPiotr Jasiukajtis	st	%f8,[%fp+tmp7]
1521*25c28e83SPiotr Jasiukajtis
1522*25c28e83SPiotr Jasiukajtis	fand	%f8,DC0,%f24		! (4_0) res = vis_fand(res,DC0);
1523*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1524*25c28e83SPiotr Jasiukajtis
1525*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (4_0) iexp = hx >> 21;
1526*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (4_0) hx >>= 10;
1527*25c28e83SPiotr Jasiukajtis
1528*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1529*25c28e83SPiotr Jasiukajtis
1530*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (4_0) hx &= 0x7f8;
1531*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (4_0) iexp = -iexp;
1532*25c28e83SPiotr Jasiukajtis	ba	.cont9
1533*25c28e83SPiotr Jasiukajtis	for	%f24,DC1,%f24		! (4_0) res = vis_for(res,DC1);
1534*25c28e83SPiotr Jasiukajtis2:
1535*25c28e83SPiotr Jasiukajtis	fand	%f8,%f18,%f8
1536*25c28e83SPiotr Jasiukajtis	fxtod	%f8,%f8			! res = *(long long*)&res;
1537*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f18
1538*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f18,%f8
1539*25c28e83SPiotr Jasiukajtis	st	%f8,[%fp+tmp7]
1540*25c28e83SPiotr Jasiukajtis
1541*25c28e83SPiotr Jasiukajtis	fand	%f8,DC0,%f24		! (4_0) res = vis_fand(res,DC0);
1542*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1543*25c28e83SPiotr Jasiukajtis
1544*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (4_0) iexp = hx >> 21;
1545*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (4_0) hx >>= 10;
1546*25c28e83SPiotr Jasiukajtis
1547*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1548*25c28e83SPiotr Jasiukajtis
1549*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (4_0) hx &= 0x7f8;
1550*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (4_0) iexp = -iexp;
1551*25c28e83SPiotr Jasiukajtis	ba	.cont9
1552*25c28e83SPiotr Jasiukajtis	for	%f24,DC1,%f24		! (4_0) res = vis_for(res,DC1);
1553*25c28e83SPiotr Jasiukajtis1:
1554*25c28e83SPiotr Jasiukajtis	mov	%l6,tmp_px
1555*25c28e83SPiotr Jasiukajtis	sub	counter,5,tmp_counter
1556*25c28e83SPiotr Jasiukajtis
1557*25c28e83SPiotr Jasiukajtis	ba	.cont9
1558*25c28e83SPiotr Jasiukajtis	mov	5,counter
1559*25c28e83SPiotr Jasiukajtis
1560*25c28e83SPiotr Jasiukajtis	.align	16
1561*25c28e83SPiotr Jasiukajtis.update10:
1562*25c28e83SPiotr Jasiukajtis	cmp	counter,6
1563*25c28e83SPiotr Jasiukajtis	ble	.cont10
1564*25c28e83SPiotr Jasiukajtis	nop
1565*25c28e83SPiotr Jasiukajtis
1566*25c28e83SPiotr Jasiukajtis	mov	%i0,tmp_px
1567*25c28e83SPiotr Jasiukajtis	sub	counter,6,tmp_counter
1568*25c28e83SPiotr Jasiukajtis
1569*25c28e83SPiotr Jasiukajtis	ba	.cont10
1570*25c28e83SPiotr Jasiukajtis	mov	6,counter
1571*25c28e83SPiotr Jasiukajtis
1572*25c28e83SPiotr Jasiukajtis	.align	16
1573*25c28e83SPiotr Jasiukajtis.update11:
1574*25c28e83SPiotr Jasiukajtis	ld	[%i0+4],%i3
1575*25c28e83SPiotr Jasiukajtis	cmp	counter,6
1576*25c28e83SPiotr Jasiukajtis	ble	.cont11
1577*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
1578*25c28e83SPiotr Jasiukajtis
1579*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1580*25c28e83SPiotr Jasiukajtis	bl	1f
1581*25c28e83SPiotr Jasiukajtis
1582*25c28e83SPiotr Jasiukajtis	orcc	%g1,%i3,%g0
1583*25c28e83SPiotr Jasiukajtis	bz	1f
1584*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%i3
1585*25c28e83SPiotr Jasiukajtis
1586*25c28e83SPiotr Jasiukajtis	cmp	%g1,%i3
1587*25c28e83SPiotr Jasiukajtis	bge,a	2f
1588*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f18
1589*25c28e83SPiotr Jasiukajtis
1590*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
1591*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp7]
1592*25c28e83SPiotr Jasiukajtis
1593*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f28		! (5_0) res = vis_fand(res,DC0);
1594*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1595*25c28e83SPiotr Jasiukajtis
1596*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (5_0) iexp = hx >> 21;
1597*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (5_0) hx >>= 10;
1598*25c28e83SPiotr Jasiukajtis
1599*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1600*25c28e83SPiotr Jasiukajtis
1601*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (5_0) iexp = -iexp;
1602*25c28e83SPiotr Jasiukajtis
1603*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (5_0) hx &= 0x7f8;
1604*25c28e83SPiotr Jasiukajtis	ba	.cont11
1605*25c28e83SPiotr Jasiukajtis	for	%f28,DC1,%f28		! (5_0) res = vis_for(res,DC1);
1606*25c28e83SPiotr Jasiukajtis2:
1607*25c28e83SPiotr Jasiukajtis	fand	%f0,%f18,%f0
1608*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
1609*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f18
1610*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f18,%f0
1611*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp7]
1612*25c28e83SPiotr Jasiukajtis
1613*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f28		! (5_0) res = vis_fand(res,DC0);
1614*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1615*25c28e83SPiotr Jasiukajtis
1616*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (5_0) iexp = hx >> 21;
1617*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (5_0) hx >>= 10;
1618*25c28e83SPiotr Jasiukajtis
1619*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1620*25c28e83SPiotr Jasiukajtis
1621*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (5_0) iexp = -iexp;
1622*25c28e83SPiotr Jasiukajtis
1623*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (5_0) hx &= 0x7f8;
1624*25c28e83SPiotr Jasiukajtis	ba	.cont11
1625*25c28e83SPiotr Jasiukajtis	for	%f28,DC1,%f28		! (5_0) res = vis_for(res,DC1);
1626*25c28e83SPiotr Jasiukajtis1:
1627*25c28e83SPiotr Jasiukajtis	mov	%i0,tmp_px
1628*25c28e83SPiotr Jasiukajtis	sub	counter,6,tmp_counter
1629*25c28e83SPiotr Jasiukajtis
1630*25c28e83SPiotr Jasiukajtis	ba	.cont11
1631*25c28e83SPiotr Jasiukajtis	mov	6,counter
1632*25c28e83SPiotr Jasiukajtis
1633*25c28e83SPiotr Jasiukajtis	.align	16
1634*25c28e83SPiotr Jasiukajtis.update12:
1635*25c28e83SPiotr Jasiukajtis	cmp	counter,0
1636*25c28e83SPiotr Jasiukajtis	ble	.cont12
1637*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f34		! (2_1) res += K3;
1638*25c28e83SPiotr Jasiukajtis
1639*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1640*25c28e83SPiotr Jasiukajtis	sub	counter,0,tmp_counter
1641*25c28e83SPiotr Jasiukajtis
1642*25c28e83SPiotr Jasiukajtis	ba	.cont12
1643*25c28e83SPiotr Jasiukajtis	mov	0,counter
1644*25c28e83SPiotr Jasiukajtis
1645*25c28e83SPiotr Jasiukajtis	.align	16
1646*25c28e83SPiotr Jasiukajtis.update13:
1647*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,%l4
1648*25c28e83SPiotr Jasiukajtis	cmp	counter,0
1649*25c28e83SPiotr Jasiukajtis	ble	.cont13
1650*25c28e83SPiotr Jasiukajtis	fpadd32	%f44,DC2,%f18		! (6_1) res_c = vis_fpadd32(res,DC2);
1651*25c28e83SPiotr Jasiukajtis
1652*25c28e83SPiotr Jasiukajtis	ld	[%l4+4],%l4
1653*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1654*25c28e83SPiotr Jasiukajtis	bl	1f
1655*25c28e83SPiotr Jasiukajtis
1656*25c28e83SPiotr Jasiukajtis	orcc	%g1,%l4,%g0
1657*25c28e83SPiotr Jasiukajtis	bz	1f
1658*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%l4
1659*25c28e83SPiotr Jasiukajtis
1660*25c28e83SPiotr Jasiukajtis	cmp	%g1,%l4
1661*25c28e83SPiotr Jasiukajtis	bge,a	2f
1662*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f62
1663*25c28e83SPiotr Jasiukajtis
1664*25c28e83SPiotr Jasiukajtis	fxtod	%f6,%f6			! res = *(long long*)&res;
1665*25c28e83SPiotr Jasiukajtis	st	%f6,[%fp+tmp7]
1666*25c28e83SPiotr Jasiukajtis
1667*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f44		! (6_0) res = vis_fand(res,DC0);
1668*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1669*25c28e83SPiotr Jasiukajtis
1670*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
1671*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (6_1) hx >>= 10;
1672*25c28e83SPiotr Jasiukajtis
1673*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1674*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
1675*25c28e83SPiotr Jasiukajtis	for	%f44,DC1,%f44		! (6_1) res = vis_for(res,DC1);
1676*25c28e83SPiotr Jasiukajtis
1677*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
1678*25c28e83SPiotr Jasiukajtis	ba	.cont13
1679*25c28e83SPiotr Jasiukajtis	fpadd32	%f44,DC2,%f18		! (6_1) res_c = vis_fpadd32(res,DC2);
1680*25c28e83SPiotr Jasiukajtis2:
1681*25c28e83SPiotr Jasiukajtis	fand	%f6,%f62,%f6
1682*25c28e83SPiotr Jasiukajtis	fxtod	%f6,%f6			! res = *(long long*)&res;
1683*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f62
1684*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f62,%f6
1685*25c28e83SPiotr Jasiukajtis	st	%f6,[%fp+tmp7]
1686*25c28e83SPiotr Jasiukajtis
1687*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f44		! (6_0) res = vis_fand(res,DC0);
1688*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1689*25c28e83SPiotr Jasiukajtis
1690*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (6_1) iexp = hx >> 21;
1691*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (6_1) hx >>= 10;
1692*25c28e83SPiotr Jasiukajtis	for	%f44,DC1,%f44		! (6_1) res = vis_for(res,DC1);
1693*25c28e83SPiotr Jasiukajtis
1694*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1695*25c28e83SPiotr Jasiukajtis
1696*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (6_1) hx &= 0x7f8;
1697*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (6_1) iexp = -iexp;
1698*25c28e83SPiotr Jasiukajtis	ba	.cont13
1699*25c28e83SPiotr Jasiukajtis	fpadd32	%f44,DC2,%f18		! (6_1) res_c = vis_fpadd32(res,DC2);
1700*25c28e83SPiotr Jasiukajtis1:
1701*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1702*25c28e83SPiotr Jasiukajtis	sub	counter,0,tmp_counter
1703*25c28e83SPiotr Jasiukajtis
1704*25c28e83SPiotr Jasiukajtis	ba	.cont13
1705*25c28e83SPiotr Jasiukajtis	mov	0,counter
1706*25c28e83SPiotr Jasiukajtis
1707*25c28e83SPiotr Jasiukajtis	.align	16
1708*25c28e83SPiotr Jasiukajtis.update14:
1709*25c28e83SPiotr Jasiukajtis	cmp	counter,1
1710*25c28e83SPiotr Jasiukajtis	ble	.cont14
1711*25c28e83SPiotr Jasiukajtis	faddd	%f34,K3,%f34		! (3_1) res += K3;
1712*25c28e83SPiotr Jasiukajtis
1713*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1714*25c28e83SPiotr Jasiukajtis	sub	counter,1,tmp_counter
1715*25c28e83SPiotr Jasiukajtis
1716*25c28e83SPiotr Jasiukajtis	ba	.cont14
1717*25c28e83SPiotr Jasiukajtis	mov	1,counter
1718*25c28e83SPiotr Jasiukajtis
1719*25c28e83SPiotr Jasiukajtis	.align	16
1720*25c28e83SPiotr Jasiukajtis.update15:
1721*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,%l2
1722*25c28e83SPiotr Jasiukajtis	cmp	counter,1
1723*25c28e83SPiotr Jasiukajtis	ble	.cont15
1724*25c28e83SPiotr Jasiukajtis	fpadd32	%f28,DC2,%f18		! (0_0) res_c = vis_fpadd32(res,DC2);
1725*25c28e83SPiotr Jasiukajtis
1726*25c28e83SPiotr Jasiukajtis	ld	[%l2+4],%l2
1727*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1728*25c28e83SPiotr Jasiukajtis	bl	1f
1729*25c28e83SPiotr Jasiukajtis
1730*25c28e83SPiotr Jasiukajtis	orcc	%g1,%l2,%g0
1731*25c28e83SPiotr Jasiukajtis	bz	1f
1732*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%l2
1733*25c28e83SPiotr Jasiukajtis
1734*25c28e83SPiotr Jasiukajtis	cmp	%g1,%l2
1735*25c28e83SPiotr Jasiukajtis	bge,a	2f
1736*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f62
1737*25c28e83SPiotr Jasiukajtis
1738*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
1739*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp7]
1740*25c28e83SPiotr Jasiukajtis
1741*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f18		! (0_0) res = vis_fand(res,DC0);
1742*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1743*25c28e83SPiotr Jasiukajtis
1744*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
1745*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (0_0) hx >>= 10;
1746*25c28e83SPiotr Jasiukajtis
1747*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1748*25c28e83SPiotr Jasiukajtis	for	%f18,DC1,%f28		! (0_0) res = vis_for(res,DC1);
1749*25c28e83SPiotr Jasiukajtis
1750*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
1751*25c28e83SPiotr Jasiukajtis
1752*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (0_0) hx &= 0x7f8;
1753*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (0_0) iexp += 0x5fe;
1754*25c28e83SPiotr Jasiukajtis	ba	.cont15
1755*25c28e83SPiotr Jasiukajtis	fpadd32	%f28,DC2,%f18		! (0_0) res_c = vis_fpadd32(res,DC2);
1756*25c28e83SPiotr Jasiukajtis2:
1757*25c28e83SPiotr Jasiukajtis	fand	%f0,%f62,%f0
1758*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
1759*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f62
1760*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f62,%f0
1761*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp7]
1762*25c28e83SPiotr Jasiukajtis
1763*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f18		! (0_0) res = vis_fand(res,DC0);
1764*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1765*25c28e83SPiotr Jasiukajtis
1766*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (0_0) iexp = hx >> 21;
1767*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (0_0) hx >>= 10;
1768*25c28e83SPiotr Jasiukajtis	for	%f18,DC1,%f28		! (0_0) res = vis_for(res,DC1);
1769*25c28e83SPiotr Jasiukajtis
1770*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1771*25c28e83SPiotr Jasiukajtis
1772*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (0_0) iexp = -iexp;
1773*25c28e83SPiotr Jasiukajtis
1774*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (0_0) hx &= 0x7f8;
1775*25c28e83SPiotr Jasiukajtis	add	%o7,1534,%o7		! (0_0) iexp += 0x5fe;
1776*25c28e83SPiotr Jasiukajtis	ba	.cont15
1777*25c28e83SPiotr Jasiukajtis	fpadd32	%f28,DC2,%f18		! (0_0) res_c = vis_fpadd32(res,DC2);
1778*25c28e83SPiotr Jasiukajtis1:
1779*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1780*25c28e83SPiotr Jasiukajtis	sub	counter,1,tmp_counter
1781*25c28e83SPiotr Jasiukajtis
1782*25c28e83SPiotr Jasiukajtis	ba	.cont15
1783*25c28e83SPiotr Jasiukajtis	mov	1,counter
1784*25c28e83SPiotr Jasiukajtis
1785*25c28e83SPiotr Jasiukajtis	.align	16
1786*25c28e83SPiotr Jasiukajtis.update16:
1787*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1788*25c28e83SPiotr Jasiukajtis	ble	.cont16
1789*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (0_0) res_c = vis_fand(res_c,DC3);
1790*25c28e83SPiotr Jasiukajtis
1791*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1792*25c28e83SPiotr Jasiukajtis	sub	counter,2,tmp_counter
1793*25c28e83SPiotr Jasiukajtis
1794*25c28e83SPiotr Jasiukajtis	ba	.cont16
1795*25c28e83SPiotr Jasiukajtis	mov	2,counter
1796*25c28e83SPiotr Jasiukajtis
1797*25c28e83SPiotr Jasiukajtis	.align	16
1798*25c28e83SPiotr Jasiukajtis.update17:
1799*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,%i2
1800*25c28e83SPiotr Jasiukajtis	cmp	counter,2
1801*25c28e83SPiotr Jasiukajtis	ble	.cont17
1802*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (2_0) res = vis_fand(res,DC0);
1803*25c28e83SPiotr Jasiukajtis
1804*25c28e83SPiotr Jasiukajtis	ld	[%i2+4],%i2
1805*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1806*25c28e83SPiotr Jasiukajtis	bl	1f
1807*25c28e83SPiotr Jasiukajtis
1808*25c28e83SPiotr Jasiukajtis	orcc	%g1,%i2,%g0
1809*25c28e83SPiotr Jasiukajtis	bz	1f
1810*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%i2
1811*25c28e83SPiotr Jasiukajtis
1812*25c28e83SPiotr Jasiukajtis	cmp	%g1,%i2
1813*25c28e83SPiotr Jasiukajtis	bge,a	2f
1814*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f2
1815*25c28e83SPiotr Jasiukajtis
1816*25c28e83SPiotr Jasiukajtis	fxtod	%f6,%f6			! res = *(long long*)&res;
1817*25c28e83SPiotr Jasiukajtis	st	%f6,[%fp+tmp7]
1818*25c28e83SPiotr Jasiukajtis
1819*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f44		! (1_0) res = vis_fand(res,DC0);
1820*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1821*25c28e83SPiotr Jasiukajtis
1822*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (1_0) iexp = hx >> 21;
1823*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (1_0) hx >>= 10;
1824*25c28e83SPiotr Jasiukajtis
1825*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1826*25c28e83SPiotr Jasiukajtis
1827*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (1_0) hx &= 0x7f8;
1828*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (1_0) iexp = -iexp;
1829*25c28e83SPiotr Jasiukajtis	ba	.cont17
1830*25c28e83SPiotr Jasiukajtis	for	%f44,DC1,%f44		! (1_0) res = vis_for(res,DC1);
1831*25c28e83SPiotr Jasiukajtis2:
1832*25c28e83SPiotr Jasiukajtis	fand	%f6,%f2,%f6
1833*25c28e83SPiotr Jasiukajtis	fxtod	%f6,%f6			! res = *(long long*)&res;
1834*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f2
1835*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f2,%f6
1836*25c28e83SPiotr Jasiukajtis	st	%f6,[%fp+tmp7]
1837*25c28e83SPiotr Jasiukajtis
1838*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f44		! (1_0) res = vis_fand(res,DC0);
1839*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1840*25c28e83SPiotr Jasiukajtis
1841*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (1_0) iexp = hx >> 21;
1842*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (1_0) hx >>= 10;
1843*25c28e83SPiotr Jasiukajtis
1844*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1845*25c28e83SPiotr Jasiukajtis
1846*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (1_0) hx &= 0x7f8;
1847*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (1_0) iexp = -iexp;
1848*25c28e83SPiotr Jasiukajtis	ba	.cont17
1849*25c28e83SPiotr Jasiukajtis	for	%f44,DC1,%f44		! (1_0) res = vis_for(res,DC1);
1850*25c28e83SPiotr Jasiukajtis1:
1851*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1852*25c28e83SPiotr Jasiukajtis	sub	counter,2,tmp_counter
1853*25c28e83SPiotr Jasiukajtis
1854*25c28e83SPiotr Jasiukajtis	ba	.cont17
1855*25c28e83SPiotr Jasiukajtis	mov	2,counter
1856*25c28e83SPiotr Jasiukajtis
1857*25c28e83SPiotr Jasiukajtis	.align	16
1858*25c28e83SPiotr Jasiukajtis.update18:
1859*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1860*25c28e83SPiotr Jasiukajtis	ble	.cont18
1861*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f8		! (1_0) res_c = vis_fand(res_c,DC3);
1862*25c28e83SPiotr Jasiukajtis
1863*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1864*25c28e83SPiotr Jasiukajtis	sub	counter,3,tmp_counter
1865*25c28e83SPiotr Jasiukajtis
1866*25c28e83SPiotr Jasiukajtis	ba	.cont18
1867*25c28e83SPiotr Jasiukajtis	mov	3,counter
1868*25c28e83SPiotr Jasiukajtis
1869*25c28e83SPiotr Jasiukajtis	.align	16
1870*25c28e83SPiotr Jasiukajtis.update19:
1871*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,%i4
1872*25c28e83SPiotr Jasiukajtis	cmp	counter,3
1873*25c28e83SPiotr Jasiukajtis	ble	.cont19
1874*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (3_0) res = vis_fand(res,DC0);
1875*25c28e83SPiotr Jasiukajtis
1876*25c28e83SPiotr Jasiukajtis	ld	[%i4+4],%i4
1877*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1878*25c28e83SPiotr Jasiukajtis	bl	1f
1879*25c28e83SPiotr Jasiukajtis
1880*25c28e83SPiotr Jasiukajtis	orcc	%g1,%i4,%g0
1881*25c28e83SPiotr Jasiukajtis	bz	1f
1882*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%i4
1883*25c28e83SPiotr Jasiukajtis
1884*25c28e83SPiotr Jasiukajtis	cmp	%g1,%i4
1885*25c28e83SPiotr Jasiukajtis	bge,a	2f
1886*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f2
1887*25c28e83SPiotr Jasiukajtis
1888*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
1889*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp7]
1890*25c28e83SPiotr Jasiukajtis
1891*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f28		! (2_0) res = vis_fand(res,DC0);
1892*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1893*25c28e83SPiotr Jasiukajtis
1894*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (2_0) iexp = hx >> 21;
1895*25c28e83SPiotr Jasiukajtis
1896*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (2_0) hx >>= 10;
1897*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1898*25c28e83SPiotr Jasiukajtis
1899*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (2_0) hx &= 0x7f8;
1900*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (2_0) iexp = -iexp;
1901*25c28e83SPiotr Jasiukajtis	ba	.cont19
1902*25c28e83SPiotr Jasiukajtis	for	%f28,DC1,%f28		! (2_0) res = vis_for(res,DC1);
1903*25c28e83SPiotr Jasiukajtis2:
1904*25c28e83SPiotr Jasiukajtis	fand	%f0,%f2,%f0
1905*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
1906*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f2
1907*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f2,%f0
1908*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp7]
1909*25c28e83SPiotr Jasiukajtis
1910*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f28		! (2_0) res = vis_fand(res,DC0);
1911*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1912*25c28e83SPiotr Jasiukajtis
1913*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (2_0) iexp = hx >> 21;
1914*25c28e83SPiotr Jasiukajtis
1915*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (2_0) hx >>= 10;
1916*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1917*25c28e83SPiotr Jasiukajtis
1918*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (2_0) hx &= 0x7f8;
1919*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (2_0) iexp = -iexp;
1920*25c28e83SPiotr Jasiukajtis	ba	.cont19
1921*25c28e83SPiotr Jasiukajtis	for	%f28,DC1,%f28		! (2_0) res = vis_for(res,DC1);
1922*25c28e83SPiotr Jasiukajtis1:
1923*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1924*25c28e83SPiotr Jasiukajtis	sub	counter,3,tmp_counter
1925*25c28e83SPiotr Jasiukajtis
1926*25c28e83SPiotr Jasiukajtis	ba	.cont19
1927*25c28e83SPiotr Jasiukajtis	mov	3,counter
1928*25c28e83SPiotr Jasiukajtis
1929*25c28e83SPiotr Jasiukajtis	.align	16
1930*25c28e83SPiotr Jasiukajtis.update20:
1931*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1932*25c28e83SPiotr Jasiukajtis	ble	.cont20
1933*25c28e83SPiotr Jasiukajtis	fand	%f18,DC3,%f4		! (2_0) res_c = vis_fand(res_c,DC3);
1934*25c28e83SPiotr Jasiukajtis
1935*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1936*25c28e83SPiotr Jasiukajtis	sub	counter,4,tmp_counter
1937*25c28e83SPiotr Jasiukajtis
1938*25c28e83SPiotr Jasiukajtis	ba	.cont20
1939*25c28e83SPiotr Jasiukajtis	mov	4,counter
1940*25c28e83SPiotr Jasiukajtis
1941*25c28e83SPiotr Jasiukajtis	.align	16
1942*25c28e83SPiotr Jasiukajtis.update21:
1943*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,%i5
1944*25c28e83SPiotr Jasiukajtis	cmp	counter,4
1945*25c28e83SPiotr Jasiukajtis	ble	.cont21
1946*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f16		! (4_0) res = vis_fand(res,DC0);
1947*25c28e83SPiotr Jasiukajtis
1948*25c28e83SPiotr Jasiukajtis	ld	[%i5+4],%i5
1949*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
1950*25c28e83SPiotr Jasiukajtis	bl	1f
1951*25c28e83SPiotr Jasiukajtis
1952*25c28e83SPiotr Jasiukajtis	orcc	%g1,%i5,%g0
1953*25c28e83SPiotr Jasiukajtis	bz	1f
1954*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%i5
1955*25c28e83SPiotr Jasiukajtis
1956*25c28e83SPiotr Jasiukajtis	cmp	%g1,%i5
1957*25c28e83SPiotr Jasiukajtis	bge,a	2f
1958*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f34
1959*25c28e83SPiotr Jasiukajtis
1960*25c28e83SPiotr Jasiukajtis	fxtod	%f6,%f6			! res = *(long long*)&res;
1961*25c28e83SPiotr Jasiukajtis	st	%f6,[%fp+tmp7]
1962*25c28e83SPiotr Jasiukajtis
1963*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f44		! (3_0) res = vis_fand(res,DC0);
1964*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1965*25c28e83SPiotr Jasiukajtis
1966*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (3_0) iexp = hx >> 21;
1967*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (3_0) hx >>= 10;
1968*25c28e83SPiotr Jasiukajtis
1969*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1970*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (3_0) hx &= 0x7f8;
1971*25c28e83SPiotr Jasiukajtis
1972*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (3_0) iexp = -iexp;
1973*25c28e83SPiotr Jasiukajtis	ba	.cont21
1974*25c28e83SPiotr Jasiukajtis	for	%f44,DC1,%f44		! (3_0) res = vis_for(res,DC1);
1975*25c28e83SPiotr Jasiukajtis2:
1976*25c28e83SPiotr Jasiukajtis	fand	%f6,%f34,%f6
1977*25c28e83SPiotr Jasiukajtis	fxtod	%f6,%f6			! res = *(long long*)&res;
1978*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f34
1979*25c28e83SPiotr Jasiukajtis	faddd	%f6,%f34,%f6
1980*25c28e83SPiotr Jasiukajtis	st	%f6,[%fp+tmp7]
1981*25c28e83SPiotr Jasiukajtis
1982*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f44		! (3_0) res = vis_fand(res,DC0);
1983*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
1984*25c28e83SPiotr Jasiukajtis
1985*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (3_0) iexp = hx >> 21;
1986*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (3_0) hx >>= 10;
1987*25c28e83SPiotr Jasiukajtis
1988*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
1989*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (3_0) hx &= 0x7f8;
1990*25c28e83SPiotr Jasiukajtis
1991*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (3_0) iexp = -iexp;
1992*25c28e83SPiotr Jasiukajtis	ba	.cont21
1993*25c28e83SPiotr Jasiukajtis	for	%f44,DC1,%f44		! (3_0) res = vis_for(res,DC1);
1994*25c28e83SPiotr Jasiukajtis1:
1995*25c28e83SPiotr Jasiukajtis	sub	%l6,stridex,tmp_px
1996*25c28e83SPiotr Jasiukajtis	sub	counter,4,tmp_counter
1997*25c28e83SPiotr Jasiukajtis
1998*25c28e83SPiotr Jasiukajtis	ba	.cont21
1999*25c28e83SPiotr Jasiukajtis	mov	4,counter
2000*25c28e83SPiotr Jasiukajtis
2001*25c28e83SPiotr Jasiukajtis	.align	16
2002*25c28e83SPiotr Jasiukajtis.update22:
2003*25c28e83SPiotr Jasiukajtis	cmp	counter,5
2004*25c28e83SPiotr Jasiukajtis	ble	.cont22
2005*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f38,%f62		! (1_0) res *= xx;
2006*25c28e83SPiotr Jasiukajtis
2007*25c28e83SPiotr Jasiukajtis	sub	%i0,stridex,tmp_px
2008*25c28e83SPiotr Jasiukajtis	sub	counter,5,tmp_counter
2009*25c28e83SPiotr Jasiukajtis
2010*25c28e83SPiotr Jasiukajtis	ba	.cont22
2011*25c28e83SPiotr Jasiukajtis	mov	5,counter
2012*25c28e83SPiotr Jasiukajtis
2013*25c28e83SPiotr Jasiukajtis	.align	16
2014*25c28e83SPiotr Jasiukajtis.update23:
2015*25c28e83SPiotr Jasiukajtis	sub	%i0,stridex,%l1
2016*25c28e83SPiotr Jasiukajtis	cmp	counter,5
2017*25c28e83SPiotr Jasiukajtis	ble	.cont23
2018*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (5_0) res = vis_fand(res,DC0);
2019*25c28e83SPiotr Jasiukajtis
2020*25c28e83SPiotr Jasiukajtis	ld	[%l1+4],%l1
2021*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
2022*25c28e83SPiotr Jasiukajtis	bl	1f
2023*25c28e83SPiotr Jasiukajtis
2024*25c28e83SPiotr Jasiukajtis	orcc	%g1,%l1,%g0
2025*25c28e83SPiotr Jasiukajtis	bz	1f
2026*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%l1
2027*25c28e83SPiotr Jasiukajtis
2028*25c28e83SPiotr Jasiukajtis	cmp	%g1,%l1
2029*25c28e83SPiotr Jasiukajtis	bge,a	2f
2030*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f34
2031*25c28e83SPiotr Jasiukajtis
2032*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
2033*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp7]
2034*25c28e83SPiotr Jasiukajtis
2035*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f24		! (4_0) res = vis_fand(res,DC0);
2036*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
2037*25c28e83SPiotr Jasiukajtis
2038*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (4_0) iexp = hx >> 21;
2039*25c28e83SPiotr Jasiukajtis
2040*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (4_0) hx >>= 10;
2041*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
2042*25c28e83SPiotr Jasiukajtis
2043*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (4_0) hx &= 0x7f8;
2044*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (4_0) iexp = -iexp;
2045*25c28e83SPiotr Jasiukajtis	ba	.cont23
2046*25c28e83SPiotr Jasiukajtis	for	%f24,DC1,%f24		! (4_0) res = vis_for(res,DC1);
2047*25c28e83SPiotr Jasiukajtis2:
2048*25c28e83SPiotr Jasiukajtis	fand	%f0,%f34,%f0
2049*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! res = *(long long*)&res;
2050*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f34
2051*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f34,%f0
2052*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp7]
2053*25c28e83SPiotr Jasiukajtis
2054*25c28e83SPiotr Jasiukajtis	fand	%f0,DC0,%f24		! (4_0) res = vis_fand(res,DC0);
2055*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
2056*25c28e83SPiotr Jasiukajtis
2057*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (4_0) iexp = hx >> 21;
2058*25c28e83SPiotr Jasiukajtis
2059*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (4_0) hx >>= 10;
2060*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
2061*25c28e83SPiotr Jasiukajtis
2062*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (4_0) hx &= 0x7f8;
2063*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (4_0) iexp = -iexp;
2064*25c28e83SPiotr Jasiukajtis	ba	.cont23
2065*25c28e83SPiotr Jasiukajtis	for	%f24,DC1,%f24		! (4_0) res = vis_for(res,DC1);
2066*25c28e83SPiotr Jasiukajtis1:
2067*25c28e83SPiotr Jasiukajtis	sub	%i0,stridex,tmp_px
2068*25c28e83SPiotr Jasiukajtis	sub	counter,5,tmp_counter
2069*25c28e83SPiotr Jasiukajtis
2070*25c28e83SPiotr Jasiukajtis	ba	.cont23
2071*25c28e83SPiotr Jasiukajtis	mov	5,counter
2072*25c28e83SPiotr Jasiukajtis
2073*25c28e83SPiotr Jasiukajtis	.align	16
2074*25c28e83SPiotr Jasiukajtis.update24:
2075*25c28e83SPiotr Jasiukajtis	cmp	counter,6
2076*25c28e83SPiotr Jasiukajtis	ble	.cont24
2077*25c28e83SPiotr Jasiukajtis	fmuld	%f62,%f36,%f62		! (2_0) res *= xx;
2078*25c28e83SPiotr Jasiukajtis
2079*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,tmp_px
2080*25c28e83SPiotr Jasiukajtis	sub	counter,6,tmp_counter
2081*25c28e83SPiotr Jasiukajtis
2082*25c28e83SPiotr Jasiukajtis	ba	.cont24
2083*25c28e83SPiotr Jasiukajtis	mov	6,counter
2084*25c28e83SPiotr Jasiukajtis
2085*25c28e83SPiotr Jasiukajtis	.align	16
2086*25c28e83SPiotr Jasiukajtis.update25:
2087*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,%i3
2088*25c28e83SPiotr Jasiukajtis	cmp	counter,6
2089*25c28e83SPiotr Jasiukajtis	ble	.cont25
2090*25c28e83SPiotr Jasiukajtis	fand	%f6,DC0,%f16		! (6_0) res = vis_fand(res,DC0);
2091*25c28e83SPiotr Jasiukajtis
2092*25c28e83SPiotr Jasiukajtis	ld	[%i3+4],%i3
2093*25c28e83SPiotr Jasiukajtis	cmp	%g1,0
2094*25c28e83SPiotr Jasiukajtis	bl	1f
2095*25c28e83SPiotr Jasiukajtis
2096*25c28e83SPiotr Jasiukajtis	orcc	%g1,%i3,%g0
2097*25c28e83SPiotr Jasiukajtis	bz	1f
2098*25c28e83SPiotr Jasiukajtis	nop
2099*25c28e83SPiotr Jasiukajtis
2100*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,%i3
2101*25c28e83SPiotr Jasiukajtis	ld	[%i3],%f10
2102*25c28e83SPiotr Jasiukajtis	ld	[%i3+4],%f11
2103*25c28e83SPiotr Jasiukajtis
2104*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x00080000),%i3
2105*25c28e83SPiotr Jasiukajtis
2106*25c28e83SPiotr Jasiukajtis	cmp	%g1,%i3
2107*25c28e83SPiotr Jasiukajtis	bge,a	2f
2108*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x50],%f60
2109*25c28e83SPiotr Jasiukajtis
2110*25c28e83SPiotr Jasiukajtis	fxtod	%f10,%f10		! res = *(long long*)&res;
2111*25c28e83SPiotr Jasiukajtis	st	%f10,[%fp+tmp7]
2112*25c28e83SPiotr Jasiukajtis
2113*25c28e83SPiotr Jasiukajtis	fand	%f10,DC0,%f28		! (5_0) res = vis_fand(res,DC0);
2114*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
2115*25c28e83SPiotr Jasiukajtis
2116*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (5_0) iexp = hx >> 21;
2117*25c28e83SPiotr Jasiukajtis
2118*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (5_0) hx >>= 10;
2119*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
2120*25c28e83SPiotr Jasiukajtis
2121*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (5_0) hx &= 0x7f8;
2122*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (5_0) iexp = -iexp;
2123*25c28e83SPiotr Jasiukajtis
2124*25c28e83SPiotr Jasiukajtis	ba	.cont25
2125*25c28e83SPiotr Jasiukajtis	for	%f28,DC1,%f28		! (5_0) res = vis_for(res,DC1);
2126*25c28e83SPiotr Jasiukajtis2:
2127*25c28e83SPiotr Jasiukajtis	fand	%f10,%f60,%f10
2128*25c28e83SPiotr Jasiukajtis	fxtod	%f10,%f10		! res = *(long long*)&res;
2129*25c28e83SPiotr Jasiukajtis	ldd	[%o3+0x58],%f60
2130*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f60,%f10
2131*25c28e83SPiotr Jasiukajtis	st	%f10,[%fp+tmp7]
2132*25c28e83SPiotr Jasiukajtis
2133*25c28e83SPiotr Jasiukajtis	fand	%f10,DC0,%f28		! (5_0) res = vis_fand(res,DC0);
2134*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp7],%g1
2135*25c28e83SPiotr Jasiukajtis
2136*25c28e83SPiotr Jasiukajtis	sra	%g1,21,%o7		! (5_0) iexp = hx >> 21;
2137*25c28e83SPiotr Jasiukajtis
2138*25c28e83SPiotr Jasiukajtis	sra	%g1,10,%o2		! (5_0) hx >>= 10;
2139*25c28e83SPiotr Jasiukajtis	sub	%o7,537,%o7
2140*25c28e83SPiotr Jasiukajtis
2141*25c28e83SPiotr Jasiukajtis	and	%o2,2040,%o2		! (5_0) hx &= 0x7f8;
2142*25c28e83SPiotr Jasiukajtis	sub	%g0,%o7,%o7		! (5_0) iexp = -iexp;
2143*25c28e83SPiotr Jasiukajtis
2144*25c28e83SPiotr Jasiukajtis	ba	.cont25
2145*25c28e83SPiotr Jasiukajtis	for	%f28,DC1,%f28		! (5_0) res = vis_for(res,DC1);
2146*25c28e83SPiotr Jasiukajtis1:
2147*25c28e83SPiotr Jasiukajtis	sub	%i1,stridex,tmp_px
2148*25c28e83SPiotr Jasiukajtis	sub	counter,6,tmp_counter
2149*25c28e83SPiotr Jasiukajtis
2150*25c28e83SPiotr Jasiukajtis	ba	.cont25
2151*25c28e83SPiotr Jasiukajtis	mov	6,counter
2152*25c28e83SPiotr Jasiukajtis
2153*25c28e83SPiotr Jasiukajtis.exit:
2154*25c28e83SPiotr Jasiukajtis	ret
2155*25c28e83SPiotr Jasiukajtis	restore
2156*25c28e83SPiotr Jasiukajtis	SET_SIZE(__vrsqrt)
2157*25c28e83SPiotr Jasiukajtis
2158