/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

	.file	"__vatanf.S"

#include "libm.h"

	RO_DATA
	.align	64

.CONST_TBL:
	.word	0x3fefffff, 0xfffccbbc	! K0 =  9.99999999976686608841e-01
	.word	0xbfd55554, 0x51c6b90f	! K1 = -3.33333091601972730504e-01
	.word	0x3fc98d6d, 0x926596cc	! K2 =  1.99628540499523379702e-01
	.word	0x00020000, 0x00000000	! DC1
	.word	0xfffc0000, 0x00000000	! DC2
	.word	0x7ff00000, 0x00000000	! DC3
	.word	0x3ff00000, 0x00000000	! DONE = 1.0
	.word	0x40000000, 0x00000000	! DTWO = 2.0

! parr0 = *(int*)&(1.0 / *(double*)&(((long long)i << 45) | 0x3ff0100000000000ULL)) + 0x3ff00000, i = [0, 127]

	.word	0x7fdfe01f, 0x7fdfa11c, 0x7fdf6310, 0x7fdf25f6
	.word	0x7fdee9c7, 0x7fdeae80, 0x7fde741a, 0x7fde3a91
	.word	0x7fde01e0, 0x7fddca01, 0x7fdd92f2, 0x7fdd5cac
	.word	0x7fdd272c, 0x7fdcf26e, 0x7fdcbe6d, 0x7fdc8b26
	.word	0x7fdc5894, 0x7fdc26b5, 0x7fdbf583, 0x7fdbc4fd
	.word	0x7fdb951e, 0x7fdb65e2, 0x7fdb3748, 0x7fdb094b
	.word	0x7fdadbe8, 0x7fdaaf1d, 0x7fda82e6, 0x7fda5741
	.word	0x7fda2c2a, 0x7fda01a0, 0x7fd9d79f, 0x7fd9ae24
	.word	0x7fd9852f, 0x7fd95cbb, 0x7fd934c6, 0x7fd90d4f
	.word	0x7fd8e652, 0x7fd8bfce, 0x7fd899c0, 0x7fd87427
	.word	0x7fd84f00, 0x7fd82a4a, 0x7fd80601, 0x7fd7e225
	.word	0x7fd7beb3, 0x7fd79baa, 0x7fd77908, 0x7fd756ca
	.word	0x7fd734f0, 0x7fd71378, 0x7fd6f260, 0x7fd6d1a6
	.word	0x7fd6b149, 0x7fd69147, 0x7fd6719f, 0x7fd6524f
	.word	0x7fd63356, 0x7fd614b3, 0x7fd5f664, 0x7fd5d867
	.word	0x7fd5babc, 0x7fd59d61, 0x7fd58056, 0x7fd56397
	.word	0x7fd54725, 0x7fd52aff, 0x7fd50f22, 0x7fd4f38f
	.word	0x7fd4d843, 0x7fd4bd3e, 0x7fd4a27f, 0x7fd48805
	.word	0x7fd46dce, 0x7fd453d9, 0x7fd43a27, 0x7fd420b5
	.word	0x7fd40782, 0x7fd3ee8f, 0x7fd3d5d9, 0x7fd3bd60
	.word	0x7fd3a524, 0x7fd38d22, 0x7fd3755b, 0x7fd35dce
	.word	0x7fd34679, 0x7fd32f5c, 0x7fd31877, 0x7fd301c8
	.word	0x7fd2eb4e, 0x7fd2d50a, 0x7fd2bef9, 0x7fd2a91c
	.word	0x7fd29372, 0x7fd27dfa, 0x7fd268b3, 0x7fd2539d
	.word	0x7fd23eb7, 0x7fd22a01, 0x7fd21579, 0x7fd20120
	.word	0x7fd1ecf4, 0x7fd1d8f5, 0x7fd1c522, 0x7fd1b17c
	.word	0x7fd19e01, 0x7fd18ab0, 0x7fd1778a, 0x7fd1648d
	.word	0x7fd151b9, 0x7fd13f0e, 0x7fd12c8b, 0x7fd11a30
	.word	0x7fd107fb, 0x7fd0f5ed, 0x7fd0e406, 0x7fd0d244
	.word	0x7fd0c0a7, 0x7fd0af2f, 0x7fd09ddb, 0x7fd08cab
	.word	0x7fd07b9f, 0x7fd06ab5, 0x7fd059ee, 0x7fd04949
	.word	0x7fd038c6, 0x7fd02864, 0x7fd01824, 0x7fd00804

	.word	0x3ff00000, 0x00000000	!  1.0
	.word	0xbff00000, 0x00000000	! -1.0

! parr1[i] = atan((double)*(float*)&((i + 460) << 21)), i = [0, 155]

	.word	0x3f2fffff, 0xf555555c, 0x3f33ffff, 0xf595555f
	.word	0x3f37ffff, 0xee000018, 0x3f3bffff, 0xe36aaadf
	.word	0x3f3fffff, 0xd55555bc, 0x3f43ffff, 0xd65555f2
	.word	0x3f47ffff, 0xb8000185, 0x3f4bffff, 0x8daaadf3
	.word	0x3f4fffff, 0x55555bbc, 0x3f53ffff, 0x59555f19
	.word	0x3f57fffe, 0xe000184d, 0x3f5bfffe, 0x36aadf30
	.word	0x3f5ffffd, 0x5555bbbc, 0x3f63fffd, 0x6555f195
	.word	0x3f67fffb, 0x800184cc, 0x3f6bfff8, 0xdaadf302
	.word	0x3f6ffff5, 0x555bbbb7, 0x3f73fff5, 0x955f194a
	.word	0x3f77ffee, 0x00184ca6, 0x3f7bffe3, 0x6adf2fd1
	.word	0x3f7fffd5, 0x55bbba97, 0x3f83ffd6, 0x55f1929c
	.word	0x3f87ffb8, 0x0184c30a, 0x3f8bff8d, 0xadf2e78c
	.word	0x3f8fff55, 0x5bbb729b, 0x3f93ff59, 0x5f18a700
	.word	0x3f97fee0, 0x184a5c36, 0x3f9bfe36, 0xdf291712
	.word	0x3f9ffd55, 0xbba97625, 0x3fa3fd65, 0xf169c9d9
	.word	0x3fa7fb81, 0x8430da2a, 0x3fabf8dd, 0xf139c444
	.word	0x3faff55b, 0xb72cfdea, 0x3fb3f59f, 0x0e7c559d
	.word	0x3fb7ee18, 0x2602f10f, 0x3fbbe39e, 0xbe6f07c4
	.word	0x3fbfd5ba, 0x9aac2f6e, 0x3fc3d6ee, 0xe8c6626c
	.word	0x3fc7b97b, 0x4bce5b02, 0x3fcb90d7, 0x529260a2
	.word	0x3fcf5b75, 0xf92c80dd, 0x3fd36277, 0x3707ebcc
	.word	0x3fd6f619, 0x41e4def1, 0x3fda64ee, 0xc3cc23fd
	.word	0x3fddac67, 0x0561bb4f, 0x3fe1e00b, 0xabdefeb4
	.word	0x3fe4978f, 0xa3269ee1, 0x3fe700a7, 0xc5784634
	.word	0x3fe921fb, 0x54442d18, 0x3fecac7c, 0x57846f9e
	.word	0x3fef730b, 0xd281f69b, 0x3ff0d38f, 0x2c5ba09f
	.word	0x3ff1b6e1, 0x92ebbe44, 0x3ff30b6d, 0x796a4da8
	.word	0x3ff3fc17, 0x6b7a8560, 0x3ff4ae10, 0xfc6589a5
	.word	0x3ff5368c, 0x951e9cfd, 0x3ff5f973, 0x15254857
	.word	0x3ff67d88, 0x63bc99bd, 0x3ff6dcc5, 0x7bb565fd
	.word	0x3ff7249f, 0xaa996a21, 0x3ff789bd, 0x2c160054
	.word	0x3ff7cd6f, 0x6dc59db4, 0x3ff7fde8, 0x0870c2a0
	.word	0x3ff82250, 0x768ac529, 0x3ff8555a, 0x2787981f
	.word	0x3ff87769, 0xeb8e956b, 0x3ff88fc2, 0x18ace9dc
	.word	0x3ff8a205, 0xfd558740, 0x3ff8bb9a, 0x63718f45
	.word	0x3ff8cca9, 0x27cf0b3d, 0x3ff8d8d8, 0xbf65316f
	.word	0x3ff8e1fc, 0xa98cb633, 0x3ff8eec8, 0xcfd00665
	.word	0x3ff8f751, 0x0eba96e6, 0x3ff8fd69, 0x4acf36b0
	.word	0x3ff901fb, 0x7eee715e, 0x3ff90861, 0xd082d9b5
	.word	0x3ff90ca6, 0x0b9322c5, 0x3ff90fb2, 0x37a7ea27
	.word	0x3ff911fb, 0x59997f3a, 0x3ff9152e, 0x8a326c38
	.word	0x3ff91750, 0xab2e0d12, 0x3ff918d6, 0xc2f9c9e2
	.word	0x3ff919fb, 0x54eed7a9, 0x3ff91b94, 0xee352849
	.word	0x3ff91ca5, 0xff216922, 0x3ff91d69, 0x0b3f72ff
	.word	0x3ff91dfb, 0x5459826d, 0x3ff91ec8, 0x211be619
	.word	0x3ff91f50, 0xa99fd49a, 0x3ff91fb2, 0x2fb5defa
	.word	0x3ff91ffb, 0x5446d7c3, 0x3ff92061, 0xbaabf105
	.word	0x3ff920a5, 0xfeefa208, 0x3ff920d6, 0xc1fb87e7
	.word	0x3ff920fb, 0x5444826e, 0x3ff9212e, 0x87778bfc
	.word	0x3ff92150, 0xa9999bb6, 0x3ff92169, 0x0b1faabb
	.word	0x3ff9217b, 0x544437c3, 0x3ff92194, 0xedddcc28
	.word	0x3ff921a5, 0xfeeedaec, 0x3ff921b2, 0x2fb1e5f1
	.word	0x3ff921bb, 0x54442e6e, 0x3ff921c8, 0x2110fa94
	.word	0x3ff921d0, 0xa99982d3, 0x3ff921d6, 0xc1fb08c6
	.word	0x3ff921db, 0x54442d43, 0x3ff921e1, 0xbaaa9395
	.word	0x3ff921e5, 0xfeeed7d0, 0x3ff921e9, 0x0b1f9ad7
	.word	0x3ff921eb, 0x54442d1e, 0x3ff921ee, 0x8777604e
	.word	0x3ff921f0, 0xa999826f, 0x3ff921f2, 0x2fb1e3f5
	.word	0x3ff921f3, 0x54442d19, 0x3ff921f4, 0xedddc6b2
	.word	0x3ff921f5, 0xfeeed7c3, 0x3ff921f6, 0xc1fb0886
	.word	0x3ff921f7, 0x54442d18, 0x3ff921f8, 0x2110f9e5
	.word	0x3ff921f8, 0xa999826e, 0x3ff921f9, 0x0b1f9acf
	.word	0x3ff921f9, 0x54442d18, 0x3ff921f9, 0xbaaa937f
	.word	0x3ff921f9, 0xfeeed7c3, 0x3ff921fa, 0x2fb1e3f4
	.word	0x3ff921fa, 0x54442d18, 0x3ff921fa, 0x8777604b
	.word	0x3ff921fa, 0xa999826e, 0x3ff921fa, 0xc1fb0886
	.word	0x3ff921fa, 0xd4442d18, 0x3ff921fa, 0xedddc6b2
	.word	0x3ff921fa, 0xfeeed7c3, 0x3ff921fb, 0x0b1f9acf
	.word	0x3ff921fb, 0x14442d18, 0x3ff921fb, 0x2110f9e5
	.word	0x3ff921fb, 0x2999826e, 0x3ff921fb, 0x2fb1e3f4
	.word	0x3ff921fb, 0x34442d18, 0x3ff921fb, 0x3aaa937f
	.word	0x3ff921fb, 0x3eeed7c3, 0x3ff921fb, 0x41fb0886
	.word	0x3ff921fb, 0x44442d18, 0x3ff921fb, 0x4777604b
	.word	0x3ff921fb, 0x4999826e, 0x3ff921fb, 0x4b1f9acf
	.word	0x3ff921fb, 0x4c442d18, 0x3ff921fb, 0x4dddc6b2
	.word	0x3ff921fb, 0x4eeed7c3, 0x3ff921fb, 0x4fb1e3f4
	.word	0x3ff921fb, 0x50442d18, 0x3ff921fb, 0x5110f9e5
	.word	0x3ff921fb, 0x5199826e, 0x3ff921fb, 0x51fb0886

#define DC2		%f2
#define DTWO		%f6
#define DONE		%f52
#define K0		%f54
#define K1		%f56
#define K2		%f58
#define DC1		%f60
#define DC3		%f62

#define stridex		%o2
#define stridey		%o3
#define MASK_0x7fffffff	%i1
#define MASK_0x100000	%i5

#define tmp_px		STACK_BIAS-32
#define tmp_counter	STACK_BIAS-24
#define tmp0		STACK_BIAS-16
#define tmp1		STACK_BIAS-8

#define counter		%l1

! sizeof temp storage - must be a multiple of 16 for V9
#define tmps		0x20

!--------------------------------------------------------------------
!		!!!!!	vatanf algorithm	!!!!!
!  ux = ((int*)px)[0];
!  ax = ux & 0x7fffffff;
!
!  if ( ax < 0x39b89c55 )
!  {
!    *(int*)py = ux;
!    goto next;
!  }
!
!  if ( ax > 0x4c700518 )
!  {
!    if ( ax > 0x7f800000 )
!    {
!      float fpx = fabsf(*px);
!      fpx *= fpx;
!      *py = fpx;
!      goto next;
!    }
!
!    sign = ux & 0x80000000;
!    sign |= pi_2;
!    *(int*)py = sign;
!    goto next;
!  }
!
!  ftmp0 = *px;
!  x = (double)ftmp0;
!  px += stridex;
!  y = vis_fpadd32(x,DC1);
!  y = vis_fand(y,DC2);
!  div = x * y;
!  xx = x - y;
!  div += DONE;
!  i = ((unsigned long long*)&div)[0];
!  y0 = vis_fand(div,DC3);
!  i >>= 43;
!  i &= 508;
!  *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
!  y0 = vis_fpsub32(dtmp0, y0);
!  dtmp0 = div0 * y0;
!  dtmp0 = DTWO - dtmp0;
!  y0 *= dtmp0;
!  dtmp1 = div0 * y0;
!  dtmp1 = DTWO - dtmp1;
!  y0 *= dtmp1;
!  ax = ux & 0x7fffffff;
!  ax += 0x00100000;
!  ax >>= 18;
!  ax &= -8;
!  res = *(double*)((char*)parr1 + ax);
!  ux >>= 28;
!  ux &= -8;
!  dtmp0 = *(double*)((char*)sign_arr + ux);
!  res *= dtmp0;
!  xx *= y0;
!  x2 = xx * xx;
!  dtmp0 = K2 * x2;
!  dtmp0 += K1;
!  dtmp0 *= x2;
!  dtmp0 += K0;
!  dtmp0 *= xx;
!  res += dtmp0;
!  ftmp0 = (float)res;
!  py[0] = ftmp0;
!  py += stridey;
!--------------------------------------------------------------------

	ENTRY(__vatanf)
	save	%sp,-SA(MINFRAME)-tmps,%sp
	PIC_SETUP(l7)
	PIC_SET(l7,.CONST_TBL,l2)

	st	%i0,[%fp+tmp_counter]

	sllx	%i2,2,stridex
	sllx	%i4,2,stridey

	or	%g0,%i3,%o1
	stx	%i1,[%fp+tmp_px]

	ldd	[%l2],K0
	ldd	[%l2+8],K1
	ldd	[%l2+16],K2
	ldd	[%l2+24],DC1
	ldd	[%l2+32],DC2
	ldd	[%l2+40],DC3
	ldd	[%l2+48],DONE
	ldd	[%l2+56],DTWO

	add	%l2,64,%i4
	add	%l2,64+512,%l0
	add	%l2,64+512+16-0x1cc*8,%l7

	sethi	%hi(0x100000),MASK_0x100000
	sethi	%hi(0x7ffffc00),MASK_0x7fffffff
	add	MASK_0x7fffffff,1023,MASK_0x7fffffff

	sethi	%hi(0x39b89c00),%o4
	add	%o4,0x55,%o4
	sethi	%hi(0x4c700400),%o5
	add	%o5,0x118,%o5

.begin:
	ld	[%fp+tmp_counter],counter
	ldx	[%fp+tmp_px],%i3
	st	%g0,[%fp+tmp_counter]
.begin1:
	cmp	counter,0
	ble,pn	%icc,.exit
	nop

	lda	[%i3]0x82,%l6		! (0_0) ux = ((int*)px)[0];

	and	%l6,MASK_0x7fffffff,%l5	! (0_0) ax = ux & 0x7fffffff;
	lda	[%i3]0x82,%f0		! (0_0) ftmp0 = *px;

	cmp	%l5,%o4			! (0_0) ax ? 0x39b89c55
	bl,pn	%icc,.spec0		! (0_0) if ( ax < 0x39b89c55 )
	nop

	cmp	%l5,%o5			! (0_0) ax ? 0x4c700518
	bg,pn	%icc,.spec1		! (0_0) if ( ax > 0x4c700518 )
	nop

	add	%i3,stridex,%l5		! px += stridex;
	fstod	%f0,%f22		! (0_0) ftmp0 = *px;
	mov	%l6,%i3

	lda	[%l5]0x82,%l6		! (1_0) ux = ((int*)px)[0];

	and	%l6,MASK_0x7fffffff,%o7	! (1_0) ax = ux & 0x7fffffff;
	lda	[%l5]0x82,%f0		! (1_0) ftmp0 = *px;
	add	%l5,stridex,%l4		! px += stridex;
	fpadd32	%f22,DC1,%f24		! (0_0) y = vis_fpadd32(x,dconst1);

	cmp	%o7,%o4			! (1_0) ax ? 0x39b89c55
	bl,pn	%icc,.update0		! (1_0) if ( ax < 0x39b89c55 )
	nop
.cont0:
	cmp	%o7,%o5			! (1_0) ax ? 0x4c700518
	bg,pn	%icc,.update1		! (1_0) if ( ax > 0x4c700518 )
	nop
.cont1:
	fstod	%f0,%f20		! (1_0) x = (double)ftmp0;
	mov	%l6,%l5

	fand	%f24,DC2,%f26		! (0_0) y = vis_fand(y,dconst2);

	fmuld	%f22,%f26,%f32		! (0_0) div = x * y;

	lda	[%l4]0x82,%l6		! (2_0) ux = ((int*)px)[0];
	fsubd	%f22,%f26,%f22		! (0_0) xx = x - y;

	and	%l6,MASK_0x7fffffff,%o7	! (2_0) ax = ux & 0x7fffffff;
	lda	[%l4]0x82,%f0		! (2_0) ftmp0 = *px;
	add	%l4,stridex,%l3		! px += stridex;
	fpadd32	%f20,DC1,%f24		! (1_0) y = vis_fpadd32(x,dconst1);

	cmp	%o7,%o4			! (2_0) ax ? 0x39b89c55
	bl,pn	%icc,.update2		! (2_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f32,%f32		! (0_0) div += done;
.cont2:
	cmp	%o7,%o5			! (2_0) ax ? 0x4c700518
	bg,pn	%icc,.update3		! (2_0) if ( ax > 0x4c700518 )
	nop
.cont3:
	std	%f32,[%fp+tmp0]		! (0_0) i = ((unsigned long long*)&div)[0];
	mov	%l6,%l4
	fstod	%f0,%f18		! (2_0) x = (double)ftmp0;

	fand	%f24,DC2,%f26		! (1_0) y = vis_fand(y,dconst2);

	fmuld	%f20,%f26,%f30		! (1_0) div = x * y;

	lda	[%l3]0x82,%l6		! (3_0) ux = ((int*)px)[0];
	fsubd	%f20,%f26,%f20		! (1_0) xx = x - y;

	and	%l6,MASK_0x7fffffff,%o7	! (3_0) ax = ux & 0x7fffffff;
	lda	[%l3]0x82,%f0		! (3_0) ftmp0 = *px;
	add	%l3,stridex,%i0		! px += stridex;
	fpadd32	%f18,DC1,%f24		! (2_0) y = vis_fpadd32(x,dconst1);

	cmp	%o7,%o4			! (3_0) ax ? 0x39b89c55
	bl,pn	%icc,.update4		! (3_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f30,%f30		! (1_0) div += done;
.cont4:
	cmp	%o7,%o5			! (3_0) ax ? 0x4c700518
	bg,pn	%icc,.update5		! (3_0) if ( ax > 0x4c700518 )
	nop
.cont5:
	std	%f30,[%fp+tmp1]		! (1_0) i = ((unsigned long long*)&div)[0];
	mov	%l6,%l3
	fstod	%f0,%f16		! (3_0) x = (double)ftmp0;

	ldx	[%fp+tmp0],%o0		! (0_0) i = ((unsigned long long*)&div)[0];
	fand	%f24,DC2,%f26		! (2_0) y = vis_fand(y,dconst2);

	fand	%f32,DC3,%f24		! (0_0) y0 = vis_fand(div,dconst3);

	srlx	%o0,43,%o0		! (0_0) i >>= 43;

	and	%o0,508,%l6		! (0_0) i &= 508;

	ld	[%i4+%l6],%f0		! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);

	fmuld	%f18,%f26,%f28		! (2_0) div = x * y;

	lda	[%i0]0x82,%l6		! (4_0) ux = ((int*)px)[0];
	fsubd	%f18,%f26,%f18		! (2_0) xx = x - y;

	fpsub32	%f0,%f24,%f40		! (0_0) y0 = vis_fpsub32(dtmp0, y0);

	and	%l6,MASK_0x7fffffff,%o7	! (4_0) ax = ux & 0x7fffffff;
	lda	[%i0]0x82,%f0		! (4_0) ftmp0 = *px;
	add	%i0,stridex,%i2		! px += stridex;
	fpadd32	%f16,DC1,%f24		! (3_0) y = vis_fpadd32(x,dconst1);

	cmp	%o7,%o4			! (4_0) ax ? 0x39b89c55
	bl,pn	%icc,.update6		! (4_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f28,%f28		! (2_0) div += done;
.cont6:
	fmuld	%f32,%f40,%f42		! (0_0) dtmp0 = div0 * y0;
	cmp	%o7,%o5			! (4_0) ax ? 0x4c700518
	bg,pn	%icc,.update7		! (4_0) if ( ax > 0x4c700518 )
	nop
.cont7:
	std	%f28,[%fp+tmp0]		! (2_0) i = ((unsigned long long*)&div)[0];
	mov	%l6,%i0
	fstod	%f0,%f14		! (4_0) x = (double)ftmp0;

	ldx	[%fp+tmp1],%g1		! (1_0) i = ((unsigned long long*)&div)[0];
	fand	%f24,DC2,%f26		! (3_0) y = vis_fand(y,dconst2);

	fand	%f30,DC3,%f24		! (1_0) y0 = vis_fand(div,dconst3);

	fsubd	DTWO,%f42,%f44		! (0_0) dtmp0 = dtwo - dtmp0;
	srlx	%g1,43,%g1		! (1_0) i >>= 43;

	and	%g1,508,%l6		! (1_0) i &= 508;

	ld	[%i4+%l6],%f0		! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);

	fmuld	%f16,%f26,%f34		! (3_0) div = x * y;

	lda	[%i2]0x82,%l6		! (5_0) ux = ((int*)px)[0];
	fsubd	%f16,%f26,%f16		! (3_0) xx = x - y;

	fpsub32	%f0,%f24,%f38		! (1_0) y0 = vis_fpsub32(dtmp0, y0);
	add	%i2,stridex,%l2		! px += stridex;

	fmuld	%f40,%f44,%f40		! (0_0) y0 *= dtmp0;
	and	%l6,MASK_0x7fffffff,%o7	! (5_0) ax = ux & 0x7fffffff;
	lda	[%i2]0x82,%f0		! (5_0) ftmp0 = *px;
	fpadd32	%f14,DC1,%f24		! (4_0) y = vis_fpadd32(x,dconst1);

	cmp	%o7,%o4			! (5_0) ax ? 0x39b89c55
	bl,pn	%icc,.update8		! (5_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f34,%f34		! (3_0) div += done;
.cont8:
	fmuld	%f30,%f38,%f42		! (1_0) dtmp0 = div0 * y0;
	cmp	%o7,%o5			! (5_0) ax ? 0x4c700518
	bg,pn	%icc,.update9		! (5_0) if ( ax > 0x4c700518 )
	nop
.cont9:
	std	%f34,[%fp+tmp1]		! (3_0) i = ((unsigned long long*)&div)[0];
	mov	%l6,%i2
	fstod	%f0,%f36		! (5_0) x = (double)ftmp0;

	fmuld	%f32,%f40,%f32		! (0_0) dtmp1 = div0 * y0;
	ldx	[%fp+tmp0],%o0		! (2_0) i = ((unsigned long long*)&div)[0];
	fand	%f24,DC2,%f26		! (4_0) y = vis_fand(y,dconst2);

	fand	%f28,DC3,%f24		! (2_0) y0 = vis_fand(div,dconst3);

	fsubd	DTWO,%f42,%f44		! (1_0) dtmp0 = dtwo - dtmp0;
	srlx	%o0,43,%o0		! (2_0) i >>= 43;

	and	%o0,508,%l6		! (2_0) i &= 508;
	fsubd	DTWO,%f32,%f46		! (0_0) dtmp1 = dtwo - dtmp1;

	ld	[%i4+%l6],%f0		! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);

	fmuld	%f14,%f26,%f32		! (4_0) div = x * y;

	lda	[%l2]0x82,%l6		! (6_0) ux = ((int*)px)[0];
	fsubd	%f14,%f26,%f14		! (4_0) xx = x - y;

	fmuld	%f40,%f46,%f26		! (0_0) y0 *= dtmp1;
	add	%l2,stridex,%g5		! px += stridex;
	fpsub32	%f0,%f24,%f40		! (2_0) y0 = vis_fpsub32(dtmp0, y0);

	fmuld	%f38,%f44,%f38		! (1_0) y0 *= dtmp0;
	and	%l6,MASK_0x7fffffff,%o7	! (6_0) ax = ux & 0x7fffffff;
	lda	[%l2]0x82,%f0		! (6_0) ftmp0 = *px;
	fpadd32	%f36,DC1,%f24		! (5_0) y = vis_fpadd32(x,dconst1);

	cmp	%o7,%o4			! (6_0) ax ? 0x39b89c55
	bl,pn	%icc,.update10		! (6_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f32,%f32		! (4_0) div += done;
.cont10:
	fmuld	%f28,%f40,%f42		! (2_0) dtmp0 = div0 * y0;
	cmp	%o7,%o5			! (6_0) ax ? 0x4c700518
	bg,pn	%icc,.update11		! (6_0) if ( ax > 0x4c700518 )
	nop
.cont11:
	fmuld	%f22,%f26,%f22		! (0_0) xx *= y0;
	mov	%l6,%l2
	std	%f32,[%fp+tmp0]		! (4_0) i = ((unsigned long long*)&div)[0];
	fstod	%f0,%f10		! (6_0) x = (double)ftmp0;

	fmuld	%f30,%f38,%f30		! (1_0) dtmp1 = div0 * y0;
	ldx	[%fp+tmp1],%g1		! (3_0) i = ((unsigned long long*)&div)[0];
	fand	%f24,DC2,%f26		! (5_0) y = vis_fand(y,dconst2);

	fand	%f34,DC3,%f24		! (3_0) y0 = vis_fand(div,dconst3);

	fmuld	%f22,%f22,%f50		! (0_0) x2 = xx * xx;
	srlx	%g1,43,%g1		! (3_0) i >>= 43;
	fsubd	DTWO,%f42,%f44		! (2_0) dtmp0 = dtwo - dtmp0;

	and	%g1,508,%l6		! (3_0) i &= 508;
	mov	%i3,%o7
	fsubd	DTWO,%f30,%f46		! (1_0) dtmp1 = dtwo - dtmp1;

	ld	[%i4+%l6],%f0		! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);

	fmuld	%f36,%f26,%f30		! (5_0) div = x * y;
	srl	%o7,28,%g1		! (0_0) ux >>= 28;
	add	%g5,stridex,%i3		! px += stridex;

	fmuld	K2,%f50,%f4		! (0_0) dtmp0 = K2 * x2;
	and	%o7,MASK_0x7fffffff,%o0	! (0_0) ax = ux & 0x7fffffff;
	lda	[%g5]0x82,%l6		! (7_0) ux = ((int*)px)[0];
	fsubd	%f36,%f26,%f36		! (5_0) xx = x - y;

	fmuld	%f38,%f46,%f26		! (1_0) y0 *= dtmp1;
	add	%o0,MASK_0x100000,%o0	! (0_0) ax += 0x00100000;
	and	%g1,-8,%g1		! (0_0) ux &= -8;
	fpsub32	%f0,%f24,%f38		! (3_0) y0 = vis_fpsub32(dtmp0, y0);

	fmuld	%f40,%f44,%f40		! (2_0) y0 *= dtmp0;
	and	%l6,MASK_0x7fffffff,%o7	! (7_0) ax = ux & 0x7fffffff;
	lda	[%g5]0x82,%f0		! (7_0) ftmp0 = *px;
	fpadd32	%f10,DC1,%f24		! (6_0) y = vis_fpadd32(x,dconst1);

	cmp	%o7,%o4			! (7_0) ax ? 0x39b89c55
	bl,pn	%icc,.update12		! (7_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f30,%f30		! (5_0) div += done;
.cont12:
	fmuld	%f34,%f38,%f42		! (3_0) dtmp0 = div0 * y0;
	cmp	%o7,%o5			! (7_0) ax ? 0x4c700518
	bg,pn	%icc,.update13		! (7_0) if ( ax > 0x4c700518 )
	faddd	%f4,K1,%f4		! (0_0) dtmp0 += K1;
.cont13:
	fmuld	%f20,%f26,%f20		! (1_0) xx *= y0;
	srl	%o0,18,%o7		! (0_0) ax >>= 18;
	std	%f30,[%fp+tmp1]		! (5_0) i = ((unsigned long long*)&div)[0];
	fstod	%f0,%f8			! (7_0) x = (double)ftmp0;

	fmuld	%f28,%f40,%f28		! (2_0) dtmp1 = div0 * y0;
	and	%o7,-8,%o7		! (0_0) ux &= -8;
	ldx	[%fp+tmp0],%o0		! (4_0) i = ((unsigned long long*)&div)[0];
	fand	%f24,DC2,%f26		! (6_0) y = vis_fand(y,dconst2);

	add	%o7,%l7,%o7		! (0_0) (char*)parr1 + ax;
	mov	%l6,%g5
	ldd	[%l0+%g1],%f48		! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux);

	fmuld	%f4,%f50,%f4		! (0_0) dtmp0 *= x2;
	srlx	%o0,43,%o0		! (4_0) i >>= 43;
	ldd	[%o7],%f0		! (0_0) res = *(double*)((char*)parr1 + ax);
	fand	%f32,DC3,%f24		! (4_0) y0 = vis_fand(div,dconst3);

	fmuld	%f20,%f20,%f50		! (1_0) x2 = xx * xx;
	and	%o0,508,%l6		! (4_0) i &= 508;
	mov	%l5,%o7
	fsubd	DTWO,%f42,%f44		! (3_0) dtmp0 = dtwo - dtmp0;

	fsubd	DTWO,%f28,%f46		! (2_0) dtmp1 = dtwo - dtmp1;

	fmuld	%f0,%f48,%f48		! (0_0) res *= dtmp0;
	srl	%o7,28,%l5		! (1_0) ux >>= 28;
	ld	[%i4+%l6],%f0		! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);

	fmuld	%f10,%f26,%f28		! (6_0) div = x * y;
	faddd	%f4,K0,%f42		! (0_0) dtmp0 += K0;

	subcc	counter,8,counter
	bneg,pn	%icc,.tail
	or	%g0,%o1,%o0

	add	%fp,tmp0,%g1
	lda	[%i3]0x82,%l6		! (0_0) ux = ((int*)px)[0];

	ba	.main_loop
	add	%i3,stridex,%l5		! px += stridex;

	.align	16
.main_loop:
	fsubd	%f10,%f26,%f10		! (6_1) xx = x - y;
	and	%o7,MASK_0x7fffffff,%o1	! (1_1) ax = ux & 0x7fffffff;
	st	%f12,[%g1]		! (7_1) py[0] = ftmp0;
	fmuld	K2,%f50,%f4		! (1_1) dtmp0 = K2 * x2;

	fmuld	%f40,%f46,%f26		! (2_1) y0 *= dtmp1;
	srl	%o7,28,%o7		! (1_0) ux >>= 28;
	add	%o1,MASK_0x100000,%g1	! (1_1) ax += 0x00100000;
	fpsub32	%f0,%f24,%f40		! (4_1) y0 = vis_fpsub32(dtmp0, y0);

	fmuld	%f38,%f44,%f38		! (3_1) y0 *= dtmp0;
	and	%l6,MASK_0x7fffffff,%o1	! (0_0) ax = ux & 0x7fffffff;
	lda	[%i3]0x82,%f0		! (0_0) ftmp0 = *px;
	fpadd32	%f8,DC1,%f24		! (7_1) y = vis_fpadd32(x,dconst1);

	fmuld	%f42,%f22,%f44		! (0_1) dtmp0 *= xx;
	cmp	%o1,%o4			! (0_0) ax ? 0x39b89c55
	bl,pn	%icc,.update14		! (0_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f28,%f28		! (6_1) div += done;
.cont14:
	fmuld	%f32,%f40,%f42		! (4_1) dtmp0 = div0 * y0;
	cmp	%o1,%o5			! (0_0) ax ? 0x4c700518
	bg,pn	%icc,.update15		! (0_0) if ( ax > 0x4c700518 )
	faddd	%f4,K1,%f4		! (1_1) dtmp0 += K1;
.cont15:
	fmuld	%f18,%f26,%f18		! (2_1) xx *= y0;
	srl	%g1,18,%o1		! (1_1) ax >>= 18;
	std	%f28,[%fp+tmp0]		! (6_1) i = ((unsigned long long*)&div)[0];
	fstod	%f0,%f22		! (0_0) ftmp0 = *px;

	fmuld	%f34,%f38,%f34		! (3_1) dtmp1 = div0 * y0;
	and	%o1,-8,%o1		! (1_1) ax &= -8;
	ldx	[%fp+tmp1],%g1		! (5_1) i = ((unsigned long long*)&div)[0];
	fand	%f24,DC2,%f26		! (7_1) y = vis_fand(y,dconst2);

	ldd	[%o1+%l7],%f0		! (1_1) res = *(double*)((char*)parr1 + ax);
	and	%o7,-8,%o7		! (1_1) ux &= -8;
	mov	%l6,%i3
	faddd	%f48,%f44,%f12		! (0_1) res += dtmp0;

	fmuld	%f4,%f50,%f4		! (1_1) dtmp0 *= x2;
	nop
	ldd	[%l0+%o7],%f48		! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux);
	fand	%f30,DC3,%f24		! (5_1) y0 = vis_fand(div,dconst3);

	fmuld	%f18,%f18,%f50		! (2_1) x2 = xx * xx;
	srlx	%g1,43,%g1		! (5_1) i >>= 43;
	mov	%l4,%o7
	fsubd	DTWO,%f42,%f44		! (4_1) dtmp0 = dtwo - dtmp0;

	and	%g1,508,%l6		! (5_1) i &= 508;
	nop
	bn,pn	%icc,.exit
	fsubd	DTWO,%f34,%f46		! (3_1) dtmp1 = dtwo - dtmp1;

	fmuld	%f0,%f48,%f48		! (1_1) res *= dtmp0;
	add	%o0,stridey,%g1		! py += stridey;
	ld	[%i4+%l6],%f0		! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
	fdtos	%f12,%f12		! (0_1) ftmp0 = (float)res;

	fmuld	%f8,%f26,%f34		! (7_1) div = x * y;
	srl	%o7,28,%o1		! (2_1) ux >>= 28;
	lda	[%l5]0x82,%l6		! (1_0) ux = ((int*)px)[0];
	faddd	%f4,K0,%f42		! (1_1) dtmp0 += K0;

	fmuld	K2,%f50,%f4		! (2_1) dtmp0 = K2 * x2;
	and	%o7,MASK_0x7fffffff,%o7	! (2_1) ax = ux & 0x7fffffff;
	st	%f12,[%o0]		! (0_1) py[0] = ftmp0;
	fsubd	%f8,%f26,%f8		! (7_1) xx = x - y;

	fmuld	%f38,%f46,%f26		! (3_1) y0 *= dtmp1;
	add	%l5,stridex,%l4		! px += stridex;
	add	%o7,MASK_0x100000,%o0	! (2_1) ax += 0x00100000;
	fpsub32	%f0,%f24,%f38		! (5_1) y0 = vis_fpsub32(dtmp0, y0);

	fmuld	%f40,%f44,%f40		! (4_1) y0 *= dtmp0;
	and	%l6,MASK_0x7fffffff,%o7	! (1_0) ax = ux & 0x7fffffff;
	lda	[%l5]0x82,%f0		! (1_0) ftmp0 = *px;
	fpadd32	%f22,DC1,%f24		! (0_0) y = vis_fpadd32(x,dconst1);

	fmuld	%f42,%f20,%f44		! (1_1) dtmp0 *= xx;
	cmp	%o7,%o4			! (1_0) ax ? 0x39b89c55
	bl,pn	%icc,.update16		! (1_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f34,%f34		! (7_1) div += done;
.cont16:
	fmuld	%f30,%f38,%f42		! (5_1) dtmp0 = div0 * y0;
	cmp	%o7,%o5			! (1_0) ax ? 0x4c700518
	bg,pn	%icc,.update17		! (1_0) if ( ax > 0x4c700518 )
	faddd	%f4,K1,%f4		! (2_1) dtmp0 += K1;
.cont17:
	fmuld	%f16,%f26,%f16		! (3_1) xx *= y0;
	srl	%o0,18,%o7		! (2_1) ax >>= 18;
	std	%f34,[%fp+tmp1]		! (7_1) i = ((unsigned long long*)&div)[0];
	fstod	%f0,%f20		! (1_0) x = (double)ftmp0;

	fmuld	%f32,%f40,%f32		! (4_1) dtmp1 = div0 * y0;
	ldx	[%fp+tmp0],%o0		! (6_1) i = ((unsigned long long*)&div)[0];
	and	%o1,-8,%o1		! (2_1) ux &= -8;
	fand	%f24,DC2,%f26		! (0_0) y = vis_fand(y,dconst2);

	faddd	%f48,%f44,%f12		! (1_1) res += dtmp0;
	and	%o7,-8,%o7		! (2_1) ax &= -8;
	ldd	[%l0+%o1],%f48		! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux);
	bn,pn	%icc,.exit

	ldd	[%o7+%l7],%f0		! (2_1) res = *(double*)((char*)parr1 + ax);
	mov	%l6,%l5
	fmuld	%f4,%f50,%f4		! (2_1) dtmp0 *= x2;
	fand	%f28,DC3,%f24		! (6_1) y0 = vis_fand(div,dconst3);

	fmuld	%f16,%f16,%f50		! (3_1) x2 = xx * xx;
	srlx	%o0,43,%o0		! (6_1) i >>= 43;
	mov	%l3,%o7
	fsubd	DTWO,%f42,%f44		! (5_1) dtmp0 = dtwo - dtmp0;

	and	%o0,508,%l6		! (6_1) i &= 508;
	add	%l4,stridex,%l3		! px += stridex;
	bn,pn	%icc,.exit
	fsubd	DTWO,%f32,%f46		! (4_1) dtmp1 = dtwo - dtmp1;

	fmuld	%f0,%f48,%f48		! (2_1) res *= dtmp0;
	add	%g1,stridey,%o0		! py += stridey;
	ld	[%i4+%l6],%f0		! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
	fdtos	%f12,%f12		! (1_1) ftmp0 = (float)res;

	fmuld	%f22,%f26,%f32		! (0_0) div = x * y;
	srl	%o7,28,%o1		! (3_1) ux >>= 28;
	lda	[%l4]0x82,%l6		! (2_0) ux = ((int*)px)[0];
	faddd	%f4,K0,%f42		! (2_1) dtmp0 += K0;

	fmuld	K2,%f50,%f4		! (3_1) dtmp0 = K2 * x2;
	and	%o7,MASK_0x7fffffff,%o7	! (3_1) ax = ux & 0x7fffffff;
	st	%f12,[%g1]		! (1_1) py[0] = ftmp0;
	fsubd	%f22,%f26,%f22		! (0_0) xx = x - y;

	fmuld	%f40,%f46,%f26		! (4_1) y0 *= dtmp1;
	add	%o7,MASK_0x100000,%g1	! (3_1) ax += 0x00100000;
	and	%o1,-8,%o1		! (3_1) ux &= -8;
	fpsub32	%f0,%f24,%f40		! (6_1) y0 = vis_fpsub32(dtmp0, y0);

	fmuld	%f38,%f44,%f38		! (5_1) y0 *= dtmp0;
	and	%l6,MASK_0x7fffffff,%o7	! (2_0) ax = ux & 0x7fffffff;
	lda	[%l4]0x82,%f0		! (2_0) ftmp0 = *px;
	fpadd32	%f20,DC1,%f24		! (1_0) y = vis_fpadd32(x,dconst1);

	fmuld	%f42,%f18,%f44		! (2_1) dtmp0 *= xx;
	cmp	%o7,%o4			! (2_0) ax ? 0x39b89c55
	bl,pn	%icc,.update18		! (2_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f32,%f32		! (0_0) div += done;
.cont18:
	fmuld	%f28,%f40,%f42		! (6_1) dtmp0 = div0 * y0;
	cmp	%o7,%o5			! (2_0) ax ? 0x4c700518
	bg,pn	%icc,.update19		! (2_0) if ( ax > 0x4c700518 )
	faddd	%f4,K1,%f4		! (3_1) dtmp0 += K1;
.cont19:
	fmuld	%f14,%f26,%f14		! (4_1) xx *= y0;
	srl	%g1,18,%o7		! (3_1) ax >>= 18;
	std	%f32,[%fp+tmp0]		! (0_0) i = ((unsigned long long*)&div)[0];
	fstod	%f0,%f18		! (2_0) x = (double)ftmp0;

	fmuld	%f30,%f38,%f30		! (5_1) dtmp1 = div0 * y0;
	and	%o7,-8,%o7		! (3_1) ax &= -8;
	ldx	[%fp+tmp1],%g1		! (7_1) i = ((unsigned long long*)&div)[0];
	fand	%f24,DC2,%f26		! (1_0) y = vis_fand(y,dconst2);

	faddd	%f48,%f44,%f12		! (2_1) res += dtmp0;
	mov	%l6,%l4
	ldd	[%l0+%o1],%f48		! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux);
	bn,pn	%icc,.exit

	fmuld	%f4,%f50,%f4		! (3_1) dtmp0 *= x2;
	ldd	[%o7+%l7],%f0		! (3_1) res = *(double*)((char*)parr1 + ax)
	nop
	fand	%f34,DC3,%f24		! (7_1) y0 = vis_fand(div,dconst3);

	fmuld	%f14,%f14,%f50		! (4_1) x2 = xx * xx;
	srlx	%g1,43,%g1		! (7_1) i >>= 43;
	mov	%i0,%o7
	fsubd	DTWO,%f42,%f44		! (6_1) dtmp0 = dtwo - dtmp0;

	and	%g1,508,%l6		! (7_1) i &= 508;
	add	%l3,stridex,%i0		! px += stridex;
	bn,pn	%icc,.exit
	fsubd	DTWO,%f30,%f46		! (5_1) dtmp1 = dtwo - dtmp1;

	fmuld	%f0,%f48,%f48		! (3_1) res *= dtmp0;
	add	%o0,stridey,%g1		! py += stridey;
	ld	[%i4+%l6],%f0		! (7_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
	fdtos	%f12,%f12		! (2_1) ftmp0 = (float)res;

	fmuld	%f20,%f26,%f30		! (1_0) div = x * y;
	srl	%o7,28,%o1		! (4_1) ux >>= 28;
	lda	[%l3]0x82,%l6		! (3_0) ux = ((int*)px)[0];
	faddd	%f4,K0,%f42		! (3_1) dtmp0 += K0;

	fmuld	K2,%f50,%f4		! (4_1) dtmp0 = K2 * x2;
	and	%o7,MASK_0x7fffffff,%o7	! (4_1) ax = ux & 0x7fffffff;
	st	%f12,[%o0]		! (2_1) py[0] = ftmp0;
	fsubd	%f20,%f26,%f20		! (1_0) xx = x - y;

	fmuld	%f38,%f46,%f26		! (5_1) y0 *= dtmp1;
	add	%o7,MASK_0x100000,%o0	! (4_1) ax += 0x00100000;
	and	%o1,-8,%o1		! (4_1) ux &= -8;
	fpsub32	%f0,%f24,%f38		! (7_1) y0 = vis_fpsub32(dtmp0, y0);

	fmuld	%f40,%f44,%f40		! (6_1) y0 *= dtmp0;
	and	%l6,MASK_0x7fffffff,%o7	! (3_0) ax = ux & 0x7fffffff;
	lda	[%l3]0x82,%f0		! (3_0) ftmp0 = *px;
	fpadd32	%f18,DC1,%f24		! (2_0) y = vis_fpadd32(x,dconst1);

	fmuld	%f42,%f16,%f44		! (3_1) dtmp0 *= xx;
	cmp	%o7,%o4			! (3_0) ax ? 0x39b89c55
	bl,pn	%icc,.update20		! (3_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f30,%f30		! (1_0) div += done;
.cont20:
	fmuld	%f34,%f38,%f42		! (7_1) dtmp0 = div0 * y0;
	cmp	%o7,%o5			! (3_0) ax ? 0x4c700518
	bg,pn	%icc,.update21		! (3_0) if ( ax > 0x4c700518 )
	faddd	%f4,K1,%f4		! (4_1) dtmp0 += K1;
.cont21:
	fmuld	%f36,%f26,%f36		! (5_1) xx *= y0;
	srl	%o0,18,%o7		! (4_1) ax >>= 18;
	std	%f30,[%fp+tmp1]		! (1_0) i = ((unsigned long long*)&div)[0];
	fstod	%f0,%f16		! (3_0) x = (double)ftmp0;

	fmuld	%f28,%f40,%f28		! (6_1) dtmp1 = div0 * y0;
	and	%o7,-8,%o7		! (4_1) ax &= -8;
	ldx	[%fp+tmp0],%o0		! (0_0) i = ((unsigned long long*)&div)[0];
	fand	%f24,DC2,%f26		! (2_0) y = vis_fand(y,dconst2);

	faddd	%f48,%f44,%f12		! (3_1) res += dtmp0;
	nop
	ldd	[%l0+%o1],%f48		! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux);
	bn,pn	%icc,.exit

	ldd	[%o7+%l7],%f0		! (4_1) res = *(double*)((char*)parr1 + ax);
	mov	%l6,%l3
	fmuld	%f4,%f50,%f4		! (4_1) dtmp0 *= x2;
	fand	%f32,DC3,%f24		! (0_0) y0 = vis_fand(div,dconst3);

	fmuld	%f36,%f36,%f50		! (5_1) x2 = xx * xx;
	srlx	%o0,43,%o0		! (0_0) i >>= 43;
	mov	%i2,%o7
	fsubd	DTWO,%f42,%f44		! (7_1) dtmp0 = dtwo - dtmp0;

	and	%o0,508,%l6		! (0_0) i &= 508;
	add	%i0,stridex,%i2		! px += stridex;
	bn,pn	%icc,.exit
	fsubd	DTWO,%f28,%f46		! (6_1) dtmp1 = dtwo - dtmp1;

	fmuld	%f0,%f48,%f48		! (4_1) res *= dtmp0;
	add	%g1,stridey,%o0		! py += stridey;
	ld	[%i4+%l6],%f0		! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
	fdtos	%f12,%f12		! (3_1) ftmp0 = (float)res;

	fmuld	%f18,%f26,%f28		! (2_0) div = x * y;
	srl	%o7,28,%o1		! (5_1) ux >>= 28;
	lda	[%i0]0x82,%l6		! (4_0) ux = ((int*)px)[0];
	faddd	%f4,K0,%f42		! (4_1) dtmp0 += K0;

	fmuld	K2,%f50,%f4		! (5_1) dtmp0 = K2 * x2;
	and	%o7,MASK_0x7fffffff,%o7	! (5_1) ax = ux & 0x7fffffff;
	st	%f12,[%g1]		! (3_1) py[0] = ftmp0;
	fsubd	%f18,%f26,%f18		! (2_0) xx = x - y;

	fmuld	%f40,%f46,%f26		! (6_1) y0 *= dtmp1;
	add	%o7,MASK_0x100000,%g1	! (5_1) ax += 0x00100000;
	and	%o1,-8,%o1		! (5_1) ux &= -8;
	fpsub32	%f0,%f24,%f40		! (0_0) y0 = vis_fpsub32(dtmp0, y0);

	fmuld	%f38,%f44,%f38		! (7_1) y0 *= dtmp0;
	and	%l6,MASK_0x7fffffff,%o7	! (4_0) ax = ux & 0x7fffffff;
	lda	[%i0]0x82,%f0		! (4_0) ftmp0 = *px;
	fpadd32	%f16,DC1,%f24		! (3_0) y = vis_fpadd32(x,dconst1);

	fmuld	%f42,%f14,%f44		! (4_1) dtmp0 *= xx;
	cmp	%o7,%o4			! (4_0) ax ? 0x39b89c55
	bl,pn	%icc,.update22		! (4_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f28,%f28		! (2_0) div += done;
.cont22:
	fmuld	%f32,%f40,%f42		! (0_0) dtmp0 = div0 * y0;
	cmp	%o7,%o5			! (4_0) ax ? 0x4c700518
	bg,pn	%icc,.update23		! (4_0) if ( ax > 0x4c700518 )
	faddd	%f4,K1,%f4		! (5_1) dtmp0 += K1;
.cont23:
	fmuld	%f10,%f26,%f10		! (6_1) xx *= y0;
	srl	%g1,18,%o7		! (5_1) ax >>= 18;
	std	%f28,[%fp+tmp0]		! (2_0) i = ((unsigned long long*)&div)[0];
	fstod	%f0,%f14		! (4_0) x = (double)ftmp0;

	fmuld	%f34,%f38,%f34		! (7_1) dtmp1 = div0 * y0;
	and	%o7,-8,%o7		! (5_1) ax &= -8;
	ldx	[%fp+tmp1],%g1		! (1_0) i = ((unsigned long long*)&div)[0];
	fand	%f24,DC2,%f26		! (3_0) y = vis_fand(y,dconst2);

	faddd	%f48,%f44,%f12		! (4_1) res += dtmp0;
	mov	%l6,%i0
	ldd	[%l0+%o1],%f48		! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux);
	bn,pn	%icc,.exit

	ldd	[%o7+%l7],%f0		! (5_1) res = *(double*)((char*)parr1 + ax);
	nop
	fmuld	%f4,%f50,%f4		! (5_1) dtmp0 *= x2;
	fand	%f30,DC3,%f24		! (1_0) y0 = vis_fand(div,dconst3);

	fmuld	%f10,%f10,%f50		! (6_1) x2 = xx * xx;
	srlx	%g1,43,%g1		! (1_0) i >>= 43;
	mov	%l2,%o7
	fsubd	DTWO,%f42,%f44		! (0_0) dtmp0 = dtwo - dtmp0;

	and	%g1,508,%l6		! (1_0) i &= 508;
	add	%i2,stridex,%l2		! px += stridex;
	bn,pn	%icc,.exit
	fsubd	DTWO,%f34,%f46		! (7_1) dtmp1 = dtwo - dtmp1;

	fmuld	%f0,%f48,%f48		! (5_1) res *= dtmp0;
	add	%o0,stridey,%g1		! py += stridey;
	ld	[%i4+%l6],%f0		! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
	fdtos	%f12,%f12		! (4_1) ftmp0 = (float)res;

	fmuld	%f16,%f26,%f34		! (3_0) div = x * y;
	srl	%o7,28,%o1		! (6_1) ux >>= 28;
	lda	[%i2]0x82,%l6		! (5_0) ux = ((int*)px)[0];
	faddd	%f4,K0,%f42		! (5_1) dtmp0 += K0;

	fmuld	K2,%f50,%f4		! (6_1) dtmp0 = K2 * x2;
	and	%o7,MASK_0x7fffffff,%o7	! (6_1) ax = ux & 0x7fffffff;
	st	%f12,[%o0]		! (4_1) py[0] = ftmp0;
	fsubd	%f16,%f26,%f16		! (3_0) xx = x - y;

	fmuld	%f38,%f46,%f26		! (7_1) y0 *= dtmp1;
	add	%o7,MASK_0x100000,%o0	! (6_1) ax += 0x00100000;
	and	%o1,-8,%o1		! (6_1) ux &= -8;
	fpsub32	%f0,%f24,%f38		! (1_0) y0 = vis_fpsub32(dtmp0, y0);

	fmuld	%f40,%f44,%f40		! (0_0) y0 *= dtmp0;
	and	%l6,MASK_0x7fffffff,%o7	! (5_0) ax = ux & 0x7fffffff;
	lda	[%i2]0x82,%f0		! (5_0) ftmp0 = *px;
	fpadd32	%f14,DC1,%f24		! (4_0) y = vis_fpadd32(x,dconst1);

	fmuld	%f42,%f36,%f44		! (5_1) dtmp0 *= xx;
	cmp	%o7,%o4			! (5_0) ax ? 0x39b89c55
	bl,pn	%icc,.update24		! (5_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f34,%f34		! (3_0) div += done;
.cont24:
	fmuld	%f30,%f38,%f42		! (1_0) dtmp0 = div0 * y0;
	cmp	%o7,%o5			! (5_0) ax ? 0x4c700518
	bg,pn	%icc,.update25		! (5_0) if ( ax > 0x4c700518 )
	faddd	%f4,K1,%f4		! (6_1) dtmp0 += K1;
.cont25:
	fmuld	%f8,%f26,%f8		! (7_1) xx *= y0;
	srl	%o0,18,%o7		! (6_1) ax >>= 18;
	std	%f34,[%fp+tmp1]		! (3_0) i = ((unsigned long long*)&div)[0];
	fstod	%f0,%f36		! (5_0) x = (double)ftmp0;

	fmuld	%f32,%f40,%f32		! (0_0) dtmp1 = div0 * y0;
	and	%o7,-8,%o7		! (6_1) ax &= -8;
	ldx	[%fp+tmp0],%o0		! (2_0) i = ((unsigned long long*)&div)[0];
	fand	%f24,DC2,%f26		! (4_0) y = vis_fand(y,dconst2);

	faddd	%f48,%f44,%f12		! (5_1) res += dtmp0;
	mov	%l6,%i2
	ldd	[%l0+%o1],%f48		! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux);
	bn,pn	%icc,.exit

	ldd	[%o7+%l7],%f0		! (6_1) res = *(double*)((char*)parr1 + ax);
	nop
	fmuld	%f4,%f50,%f4		! (6_1) dtmp0 *= x2;
	fand	%f28,DC3,%f24		! (2_0) y0 = vis_fand(div,dconst3);

	fmuld	%f8,%f8,%f50		! (7_1) x2 = xx * xx;
	srlx	%o0,43,%o0		! (2_0) i >>= 43;
	mov	%g5,%o7
	fsubd	DTWO,%f42,%f44		! (1_0) dtmp0 = dtwo - dtmp0;

	and	%o0,508,%l6		! (2_0) i &= 508;
	add	%l2,stridex,%g5		! px += stridex;
	bn,pn	%icc,.exit
	fsubd	DTWO,%f32,%f46		! (0_0) dtmp1 = dtwo - dtmp1;

	fmuld	%f0,%f48,%f48		! (6_1) res *= dtmp0;
	add	%g1,stridey,%o0		! py += stridey;
	ld	[%i4+%l6],%f0		! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
	fdtos	%f12,%f12		! (5_1) ftmp0 = (float)res;

	fmuld	%f14,%f26,%f32		! (4_0) div = x * y;
	srl	%o7,28,%o1		! (7_1) ux >>= 28;
	lda	[%l2]0x82,%l6		! (6_0) ux = ((int*)px)[0];
	faddd	%f4,K0,%f42		! (6_1) dtmp0 += K0;

	fmuld	K2,%f50,%f4		! (7_1) dtmp0 = K2 * x2;
	and	%o7,MASK_0x7fffffff,%o7	! (7_1) ax = ux & 0x7fffffff;
	st	%f12,[%g1]		! (5_1) py[0] = ftmp0;
	fsubd	%f14,%f26,%f14		! (4_0) xx = x - y;

	fmuld	%f40,%f46,%f26		! (0_0) y0 *= dtmp1;
	add	%o7,MASK_0x100000,%g1	! (7_1) ax += 0x00100000;
	and	%o1,-8,%o1		! (7_1) ux &= -8;
	fpsub32	%f0,%f24,%f40		! (2_0) y0 = vis_fpsub32(dtmp0, y0);

	fmuld	%f38,%f44,%f38		! (1_0) y0 *= dtmp0;
	and	%l6,MASK_0x7fffffff,%o7	! (6_0) ax = ux & 0x7fffffff;
	lda	[%l2]0x82,%f0		! (6_0) ftmp0 = *px;
	fpadd32	%f36,DC1,%f24		! (5_0) y = vis_fpadd32(x,dconst1);

	fmuld	%f42,%f10,%f44		! (6_1) dtmp0 *= xx;
	cmp	%o7,%o4			! (6_0) ax ? 0x39b89c55
	bl,pn	%icc,.update26		! (6_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f32,%f32		! (4_0) div += done;
.cont26:
	fmuld	%f28,%f40,%f42		! (2_0) dtmp0 = div0 * y0;
	cmp	%o7,%o5			! (6_0) ax ? 0x4c700518
	bg,pn	%icc,.update27		! (6_0) if ( ax > 0x4c700518 )
	faddd	%f4,K1,%f4		! (7_1) dtmp0 += K1;
.cont27:
	fmuld	%f22,%f26,%f22		! (0_0) xx *= y0;
	srl	%g1,18,%o7		! (7_1) ax >>= 18;
	std	%f32,[%fp+tmp0]		! (4_0) i = ((unsigned long long*)&div)[0];
	fstod	%f0,%f10		! (6_0) x = (double)ftmp0;

	fmuld	%f30,%f38,%f30		! (1_0) dtmp1 = div0 * y0;
	and	%o7,-8,%o7		! (7_1) ax &= -8;
	ldx	[%fp+tmp1],%g1		! (3_0) i = ((unsigned long long*)&div)[0];
	fand	%f24,DC2,%f26		! (5_0) y = vis_fand(y,dconst2);

	faddd	%f48,%f44,%f12		! (6_1) res += dtmp0;
	mov	%l6,%l2
	ldd	[%l0+%o1],%f48		! (7_1) dtmp0 = *(double*)((char*)sign_arr + ux);
	bn,pn	%icc,.exit

	ldd	[%o7+%l7],%f0		! (7_1) res = *(double*)((char*)parr1 + ax);
	nop
	fmuld	%f4,%f50,%f4		! (7_1) dtmp0 *= x2;
	fand	%f34,DC3,%f24		! (3_0) y0 = vis_fand(div,dconst3);

	fmuld	%f22,%f22,%f50		! (0_0) x2 = xx * xx;
	srlx	%g1,43,%g1		! (3_0) i >>= 43;
	mov	%i3,%o7
	fsubd	DTWO,%f42,%f44		! (2_0) dtmp0 = dtwo - dtmp0;

	and	%g1,508,%l6		! (3_0) i &= 508;
	add	%g5,stridex,%i3		! px += stridex;
	bn,pn	%icc,.exit
	fsubd	DTWO,%f30,%f46		! (1_0) dtmp1 = dtwo - dtmp1;

	fmuld	%f0,%f48,%f48		! (7_1) res *= dtmp0;
	add	%o0,stridey,%g1		! py += stridey;
	ld	[%i4+%l6],%f0		! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
	fdtos	%f12,%f12		! (6_1) ftmp0 = (float)res;

	fmuld	%f36,%f26,%f30		! (5_0) div = x * y;
	srl	%o7,28,%o1		! (0_0) ux >>= 28;
	lda	[%g5]0x82,%l6		! (7_0) ux = ((int*)px)[0];
	faddd	%f4,K0,%f42		! (7_1) dtmp0 += K0;

	fmuld	K2,%f50,%f4		! (0_0) dtmp0 = K2 * x2;
	and	%o7,MASK_0x7fffffff,%o7	! (0_0) ax = ux & 0x7fffffff;
	st	%f12,[%o0]		! (6_1) py[0] = ftmp0;
	fsubd	%f36,%f26,%f36		! (5_0) xx = x - y;

	fmuld	%f38,%f46,%f26		! (1_0) y0 *= dtmp1;
	add	%o7,MASK_0x100000,%o0	! (0_0) ax += 0x00100000;
	and	%o1,-8,%o1		! (0_0) ux &= -8;
	fpsub32	%f0,%f24,%f38		! (3_0) y0 = vis_fpsub32(dtmp0, y0);

	fmuld	%f40,%f44,%f40		! (2_0) y0 *= dtmp0;
	and	%l6,MASK_0x7fffffff,%o7	! (7_0) ax = ux & 0x7fffffff;
	lda	[%g5]0x82,%f0		! (7_0) ftmp0 = *px;
	fpadd32	%f10,DC1,%f24		! (6_0) y = vis_fpadd32(x,dconst1);

	fmuld	%f42,%f8,%f44		! (7_1) dtmp0 *= xx;
	cmp	%o7,%o4			! (7_0) ax ? 0x39b89c55
	bl,pn	%icc,.update28		! (7_0) if ( ax < 0x39b89c55 )
	faddd	DONE,%f30,%f30		! (5_0) div += done;
.cont28:
	fmuld	%f34,%f38,%f42		! (3_0) dtmp0 = div0 * y0;
	cmp	%o7,%o5			! (7_0) ax ? 0x4c700518
	bg,pn	%icc,.update29		! (7_0) if ( ax > 0x4c700518 )
	faddd	%f4,K1,%f4		! (0_0) dtmp0 += K1;
.cont29:
	fmuld	%f20,%f26,%f20		! (1_0) xx *= y0;
	srl	%o0,18,%o7		! (0_0) ax >>= 18;
	std	%f30,[%fp+tmp1]		! (5_0) i = ((unsigned long long*)&div)[0];
	fstod	%f0,%f8			! (7_0) x = (double)ftmp0;

	fmuld	%f28,%f40,%f28		! (2_0) dtmp1 = div0 * y0;
	and	%o7,-8,%o7		! (0_0) ux &= -8;
	ldx	[%fp+tmp0],%o0		! (4_0) i = ((unsigned long long*)&div)[0];
	fand	%f24,DC2,%f26		! (6_0) y = vis_fand(y,dconst2);

	faddd	%f48,%f44,%f12		! (7_1) res += dtmp0;
	subcc	counter,8,counter
	ldd	[%l0+%o1],%f48		! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux);
	bn,pn	%icc,.exit

	fmuld	%f4,%f50,%f4		! (0_0) dtmp0 *= x2;
	mov	%l6,%g5
	ldd	[%o7+%l7],%f0		! (0_0) res = *(double*)((char*)parr1 + ax);
	fand	%f32,DC3,%f24		! (4_0) y0 = vis_fand(div,dconst3);

	fmuld	%f20,%f20,%f50		! (1_0) x2 = xx * xx;
	srlx	%o0,43,%l6		! (4_0) i >>= 43;
	mov	%l5,%o7
	fsubd	DTWO,%f42,%f44		! (3_0) dtmp0 = dtwo - dtmp0;

	add	%g1,stridey,%o0		! py += stridey;
	and	%l6,508,%l6		! (4_0) i &= 508;
	bn,pn	%icc,.exit
	fsubd	DTWO,%f28,%f46		! (2_0) dtmp1 = dtwo - dtmp1;

	fmuld	%f0,%f48,%f48		! (0_0) res *= dtmp0;
	ld	[%i4+%l6],%f0		! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
	add	%i3,stridex,%l5		! px += stridex;
	fdtos	%f12,%f12		! (7_1) ftmp0 = (float)res;

	lda	[%i3]0x82,%l6		! (0_0) ux = ((int*)px)[0];
	fmuld	%f10,%f26,%f28		! (6_0) div = x * y;
	bpos,pt	%icc,.main_loop
	faddd	%f4,K0,%f42		! (0_0) dtmp0 += K0;

	srl	%o7,28,%l5		! (1_0) ux >>= 28;
	st	%f12,[%g1]		! (7_1) py[0] = ftmp0;

.tail:
	addcc	counter,7,counter
	bneg,pn	%icc,.begin
	or	%g0,%o0,%o1

	fsubd	%f10,%f26,%f10		! (6_1) xx = x - y;
	and	%o7,MASK_0x7fffffff,%g1	! (1_1) ax = ux & 0x7fffffff;
	fmuld	K2,%f50,%f4		! (1_1) dtmp0 = K2 * x2;

	fmuld	%f40,%f46,%f26		! (2_1) y0 *= dtmp1;
	add	%g1,MASK_0x100000,%g1	! (1_1) ax += 0x00100000;
	and	%l5,-8,%l5		! (1_1) ux &= -8;
	fpsub32	%f0,%f24,%f40		! (4_1) y0 = vis_fpsub32(dtmp0, y0);

	fmuld	%f38,%f44,%f38		! (3_1) y0 *= dtmp0;

	fmuld	%f42,%f22,%f44		! (0_1) dtmp0 *= xx;
	faddd	DONE,%f28,%f28		! (6_1) div += done;

	fmuld	%f32,%f40,%f42		! (4_1) dtmp0 = div0 * y0;
	faddd	%f4,K1,%f4		! (1_1) dtmp0 += K1;

	fmuld	%f18,%f26,%f18		! (2_1) xx *= y0;
	srl	%g1,18,%o7		! (1_1) ax >>= 18;
	std	%f28,[%fp+tmp0]		! (6_1) i = ((unsigned long long*)&div)[0];

	fmuld	%f34,%f38,%f34		! (3_1) dtmp1 = div0 * y0;
	and	%o7,-8,%o7		! (1_1) ax &= -8;
	ldx	[%fp+tmp1],%g1		! (5_1) i = ((unsigned long long*)&div)[0];

	faddd	%f48,%f44,%f12		! (0_1) res += dtmp0;
	add	%o7,%l7,%o7		! (1_1) (char*)parr1 + ax;
	ldd	[%l0+%l5],%f48		! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux);

	fmuld	%f4,%f50,%f4		! (1_1) dtmp0 *= x2;
	fand	%f30,DC3,%f24		! (5_1) y0 = vis_fand(div,dconst3);
	ldd	[%o7],%f0		! (1_1) res = *(double*)((char*)parr1 + ax);

	fmuld	%f18,%f18,%f50		! (2_1) x2 = xx * xx;
	fsubd	DTWO,%f42,%f44		! (4_1) dtmp0 = dtwo - dtmp0;
	srlx	%g1,43,%g1		! (5_1) i >>= 43;

	and	%g1,508,%l6		! (5_1) i &= 508;
	mov	%l4,%o7
	fsubd	DTWO,%f34,%f46		! (3_1) dtmp1 = dtwo - dtmp1;

	fmuld	%f0,%f48,%f48		! (1_1) res *= dtmp0;
	add	%o0,stridey,%g1		! py += stridey;
	ld	[%i4+%l6],%f0		! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
	fdtos	%f12,%f12		! (0_1) ftmp0 = (float)res;

	srl	%o7,28,%l4		! (2_1) ux >>= 28;
	st	%f12,[%o0]		! (0_1) py[0] = ftmp0;
	faddd	%f4,K0,%f42		! (1_1) dtmp0 += K0;

	subcc	counter,1,counter
	bneg,pn	%icc,.begin
	or	%g0,%g1,%o1

	fmuld	K2,%f50,%f4		! (2_1) dtmp0 = K2 * x2;
	and	%o7,MASK_0x7fffffff,%o0	! (2_1) ax = ux & 0x7fffffff;

	fmuld	%f38,%f46,%f26		! (3_1) y0 *= dtmp1;
	add	%o0,MASK_0x100000,%o0	! (2_1) ax += 0x00100000;
	and	%l4,-8,%l4		! (2_1) ux &= -8;
	fpsub32	%f0,%f24,%f38		! (5_1) y0 = vis_fpsub32(dtmp0, y0);

	fmuld	%f40,%f44,%f40		! (4_1) y0 *= dtmp0;

	fmuld	%f42,%f20,%f44		! (1_1) dtmp0 *= xx;

	fmuld	%f30,%f38,%f42		! (5_1) dtmp0 = div0 * y0;
	faddd	%f4,K1,%f4		! (2_1) dtmp0 += K1;

	fmuld	%f16,%f26,%f16		! (3_1) xx *= y0;
	srl	%o0,18,%o7		! (2_1) ax >>= 18;

	fmuld	%f32,%f40,%f32		! (4_1) dtmp1 = div0 * y0;
	and	%o7,-8,%o7		! (2_1) ax &= -8;
	ldx	[%fp+tmp0],%o0		! (6_1) i = ((unsigned long long*)&div)[0];

	faddd	%f48,%f44,%f12		! (1_1) res += dtmp0;
	add	%o7,%l7,%o7		! (2_1) (char*)parr1 + ax;
	ldd	[%l0+%l4],%f48		! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux);

	fmuld	%f4,%f50,%f4		! (2_1) dtmp0 *= x2;
	fand	%f28,DC3,%f24		! (6_1) y0 = vis_fand(div,dconst3);
	ldd	[%o7],%f0		! (2_1) res = *(double*)((char*)parr1 + ax);

	fmuld	%f16,%f16,%f50		! (3_1) x2 = xx * xx;
	fsubd	DTWO,%f42,%f44		! (5_1) dtmp0 = dtwo - dtmp0;
	srlx	%o0,43,%o0		! (6_1) i >>= 43;

	and	%o0,508,%l6		! (6_1) i &= 508;
	mov	%l3,%o7
	fsubd	DTWO,%f32,%f46		! (4_1) dtmp1 = dtwo - dtmp1;

	fmuld	%f0,%f48,%f48		! (2_1) res *= dtmp0;
	add	%g1,stridey,%o0		! py += stridey;
	ld	[%i4+%l6],%f0		! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
	fdtos	%f12,%f12		! (1_1) ftmp0 = (float)res;

	srl	%o7,28,%l3		! (3_1) ux >>= 28;
	st	%f12,[%g1]		! (1_1) py[0] = ftmp0;
	faddd	%f4,K0,%f42		! (2_1) dtmp0 += K0;

	subcc	counter,1,counter
	bneg,pn	%icc,.begin
	or	%g0,%o0,%o1

	fmuld	K2,%f50,%f4		! (3_1) dtmp0 = K2 * x2;
	and	%o7,MASK_0x7fffffff,%g1	! (3_1) ax = ux & 0x7fffffff;

	fmuld	%f40,%f46,%f26		! (4_1) y0 *= dtmp1;
	add	%g1,MASK_0x100000,%g1	! (3_1) ax += 0x00100000;
	and	%l3,-8,%l3		! (3_1) ux &= -8;
	fpsub32	%f0,%f24,%f40		! (6_1) y0 = vis_fpsub32(dtmp0, y0);

	fmuld	%f38,%f44,%f38		! (5_1) y0 *= dtmp0;

	fmuld	%f42,%f18,%f44		! (2_1) dtmp0 *= xx;

	fmuld	%f28,%f40,%f42		! (6_1) dtmp0 = div0 * y0;
	faddd	%f4,K1,%f4		! (3_1) dtmp0 += K1;

	fmuld	%f14,%f26,%f14		! (4_1) xx *= y0;
	srl	%g1,18,%o7		! (3_1) ax >>= 18;

	fmuld	%f30,%f38,%f30		! (5_1) dtmp1 = div0 * y0;
	and	%o7,-8,%o7		! (3_1) ax &= -8;

	faddd	%f48,%f44,%f12		! (2_1) res += dtmp0;
	add	%o7,%l7,%o7		! (3_1) (char*)parr1 + ax;
	ldd	[%l0+%l3],%f48		! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux);

	fmuld	%f4,%f50,%f4		! (3_1) dtmp0 *= x2;
	ldd	[%o7],%f0		! (3_1) res = *(double*)((char*)parr1 + ax)

	fmuld	%f14,%f14,%f50		! (4_1) x2 = xx * xx;
	fsubd	DTWO,%f42,%f44		! (6_1) dtmp0 = dtwo - dtmp0;

	mov	%i0,%o7
	fsubd	DTWO,%f30,%f46		! (5_1) dtmp1 = dtwo - dtmp1;

	fmuld	%f0,%f48,%f48		! (3_1) res *= dtmp0;
	add	%o0,stridey,%g1		! py += stridey;
	fdtos	%f12,%f12		! (2_1) ftmp0 = (float)res;

	srl	%o7,28,%i0		! (4_1) ux >>= 28;
	st	%f12,[%o0]		! (2_1) py[0] = ftmp0;
	faddd	%f4,K0,%f42		! (3_1) dtmp0 += K0;

	subcc	counter,1,counter
	bneg,pn	%icc,.begin
	or	%g0,%g1,%o1

	fmuld	K2,%f50,%f4		! (4_1) dtmp0 = K2 * x2;
	and	%o7,MASK_0x7fffffff,%o0	! (4_1) ax = ux & 0x7fffffff;

	fmuld	%f38,%f46,%f26		! (5_1) y0 *= dtmp1;
	add	%o0,MASK_0x100000,%o0	! (4_1) ax += 0x00100000;
	and	%i0,-8,%i0		! (4_1) ux &= -8;

	fmuld	%f40,%f44,%f40		! (6_1) y0 *= dtmp0;

	fmuld	%f42,%f16,%f44		! (3_1) dtmp0 *= xx;

	faddd	%f4,K1,%f4		! (4_1) dtmp0 += K1;

	fmuld	%f36,%f26,%f36		! (5_1) xx *= y0;
	srl	%o0,18,%o7		! (4_1) ax >>= 18;

	fmuld	%f28,%f40,%f28		! (6_1) dtmp1 = div0 * y0;
	and	%o7,-8,%o7		! (4_1) ax &= -8;

	faddd	%f48,%f44,%f12		! (3_1) res += dtmp0;
	add	%o7,%l7,%o7		! (4_1) (char*)parr1 + ax;
	ldd	[%l0+%i0],%f48		! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux);

	fmuld	%f4,%f50,%f4		! (4_1) dtmp0 *= x2;
	ldd	[%o7],%f0		! (4_1) res = *(double*)((char*)parr1 + ax);

	fmuld	%f36,%f36,%f50		! (5_1) x2 = xx * xx;

	mov	%i2,%o7
	fsubd	DTWO,%f28,%f46		! (6_1) dtmp1 = dtwo - dtmp1;

	fmuld	%f0,%f48,%f48		! (4_1) res *= dtmp0;
	add	%g1,stridey,%o0		! py += stridey;
	fdtos	%f12,%f12		! (3_1) ftmp0 = (float)res;

	srl	%o7,28,%i2		! (5_1) ux >>= 28;
	st	%f12,[%g1]		! (3_1) py[0] = ftmp0;
	faddd	%f4,K0,%f42		! (4_1) dtmp0 += K0;

	subcc	counter,1,counter
	bneg,pn	%icc,.begin
	or	%g0,%o0,%o1

	fmuld	K2,%f50,%f4		! (5_1) dtmp0 = K2 * x2;
	and	%o7,MASK_0x7fffffff,%g1	! (5_1) ax = ux & 0x7fffffff;

	fmuld	%f40,%f46,%f26		! (6_1) y0 *= dtmp1;
	add	%g1,MASK_0x100000,%g1	! (5_1) ax += 0x00100000;
	and	%i2,-8,%i2		! (5_1) ux &= -8;

	fmuld	%f42,%f14,%f44		! (4_1) dtmp0 *= xx;

	faddd	%f4,K1,%f4		! (5_1) dtmp0 += K1;

	fmuld	%f10,%f26,%f10		! (6_1) xx *= y0;
	srl	%g1,18,%o7		! (5_1) ax >>= 18;

	and	%o7,-8,%o7		! (5_1) ax &= -8;

	faddd	%f48,%f44,%f12		! (4_1) res += dtmp0;
	add	%o7,%l7,%o7		! (5_1) (char*)parr1 + ax;
	ldd	[%l0+%i2],%f48		! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux);

	fmuld	%f4,%f50,%f4		! (5_1) dtmp0 *= x2;
	ldd	[%o7],%f0		! (5_1) res = *(double*)((char*)parr1 + ax);

	fmuld	%f10,%f10,%f50		! (6_1) x2 = xx * xx;

	mov	%l2,%o7

	fmuld	%f0,%f48,%f48		! (5_1) res *= dtmp0;
	add	%o0,stridey,%g1		! py += stridey;
	fdtos	%f12,%f12		! (4_1) ftmp0 = (float)res;

	srl	%o7,28,%l2		! (6_1) ux >>= 28;
	st	%f12,[%o0]		! (4_1) py[0] = ftmp0;
	faddd	%f4,K0,%f42		! (5_1) dtmp0 += K0;

	subcc	counter,1,counter
	bneg,pn	%icc,.begin
	or	%g0,%g1,%o1

	fmuld	K2,%f50,%f4		! (6_1) dtmp0 = K2 * x2;
	and	%o7,MASK_0x7fffffff,%o0	! (6_1) ax = ux & 0x7fffffff;

	add	%o0,MASK_0x100000,%o0	! (6_1) ax += 0x00100000;
	and	%l2,-8,%l2		! (6_1) ux &= -8;

	fmuld	%f42,%f36,%f44		! (5_1) dtmp0 *= xx;

	faddd	%f4,K1,%f4		! (6_1) dtmp0 += K1;

	srl	%o0,18,%o7		! (6_1) ax >>= 18;

	and	%o7,-8,%o7		! (6_1) ax &= -8;

	faddd	%f48,%f44,%f12		! (5_1) res += dtmp0;
	add	%o7,%l7,%o7		! (6_1) (char*)parr1 + ax;
	ldd	[%l0+%l2],%f48		! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux);

	fmuld	%f4,%f50,%f4		! (6_1) dtmp0 *= x2;
	ldd	[%o7],%f0		! (6_1) res = *(double*)((char*)parr1 + ax);

	fmuld	%f0,%f48,%f48		! (6_1) res *= dtmp0;
	add	%g1,stridey,%o0		! py += stridey;
	fdtos	%f12,%f12		! (5_1) ftmp0 = (float)res;

	st	%f12,[%g1]		! (5_1) py[0] = ftmp0;
	faddd	%f4,K0,%f42		! (6_1) dtmp0 += K0;

	subcc	counter,1,counter
	bneg,pn	%icc,.begin
	or	%g0,%o0,%o1

	fmuld	%f42,%f10,%f44		! (6_1) dtmp0 *= xx;

	faddd	%f48,%f44,%f12		! (6_1) res += dtmp0;

	add	%o0,stridey,%g1		! py += stridey;
	fdtos	%f12,%f12		! (6_1) ftmp0 = (float)res;

	st	%f12,[%o0]		! (6_1) py[0] = ftmp0;

	ba	.begin
	or	%g0,%g1,%o1		! py += stridey;

.exit:
	ret
	restore	%g0,%g0,%g0

	.align	16
.spec0:
	add	%i3,stridex,%i3		! px += stridex;
	sub	counter,1,counter
	st	%l6,[%o1]		! *(int*)py = ux;

	ba	.begin1
	add	%o1,stridey,%o1		! py += stridey;

	.align	16
.spec1:
	sethi	%hi(0x7f800000),%l3
	sethi	%hi(0x3fc90c00),%l4	! pi_2

	sethi	%hi(0x80000000),%o0
	add	%l4,0x3db,%l4		! pi_2

	cmp	%l5,%l3			! if ( ax > 0x7f800000 )
	bg,a,pn	%icc,1f
	fabss	%f0,%f0			! fpx = fabsf(*px);

	and	%l6,%o0,%l6		! sign = ux & 0x80000000;

	or	%l6,%l4,%l6		! sign |= pi_2;

	add	%i3,stridex,%i3		! px += stridex;
	sub	counter,1,counter
	st	%l6,[%o1]		! *(int*)py = sign;

	ba	.begin1
	add	%o1,stridey,%o1		! py += stridey;

1:
	fmuls	%f0,%f0,%f0		! fpx *= fpx;

	add	%i3,stridex,%i3		! px += stridex
	sub	counter,1,counter
	st	%f0,[%o1]		! *py = fpx;

	ba	.begin1
	add	%o1,stridey,%o1		! py += stridey;

	.align	16
.update0:
	cmp	counter,1
	fzeros	%f0
	ble,a	.cont0
	sethi	%hi(0x3fffffff),%l6

	sub	counter,1,counter
	st	counter,[%fp+tmp_counter]

	stx	%l5,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont0
	or	%g0,1,counter

	.align	16
.update1:
	cmp	counter,1
	fzeros	%f0
	ble,a	.cont1
	sethi	%hi(0x3fffffff),%l6

	sub	counter,1,counter
	st	counter,[%fp+tmp_counter]

	stx	%l5,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont1
	or	%g0,1,counter

	.align	16
.update2:
	cmp	counter,2
	fzeros	%f0
	ble,a	.cont2
	sethi	%hi(0x3fffffff),%l6

	sub	counter,2,counter
	st	counter,[%fp+tmp_counter]

	stx	%l4,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont2
	or	%g0,2,counter

	.align	16
.update3:
	cmp	counter,2
	fzeros	%f0
	ble,a	.cont3
	sethi	%hi(0x3fffffff),%l6

	sub	counter,2,counter
	st	counter,[%fp+tmp_counter]

	stx	%l4,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont3
	or	%g0,2,counter

	.align	16
.update4:
	cmp	counter,3
	fzeros	%f0
	ble,a	.cont4
	sethi	%hi(0x3fffffff),%l6

	sub	counter,3,counter
	st	counter,[%fp+tmp_counter]

	stx	%l3,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont4
	or	%g0,3,counter

	.align	16
.update5:
	cmp	counter,3
	fzeros	%f0
	ble,a	.cont5
	sethi	%hi(0x3fffffff),%l6

	sub	counter,3,counter
	st	counter,[%fp+tmp_counter]

	stx	%l3,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont5
	or	%g0,3,counter

	.align	16
.update6:
	cmp	counter,4
	fzeros	%f0
	ble,a	.cont6
	sethi	%hi(0x3fffffff),%l6

	sub	counter,4,counter
	st	counter,[%fp+tmp_counter]

	stx	%i0,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont6
	or	%g0,4,counter

	.align	16
.update7:
	cmp	counter,4
	fzeros	%f0
	ble,a	.cont7
	sethi	%hi(0x3fffffff),%l6

	sub	counter,4,counter
	st	counter,[%fp+tmp_counter]

	stx	%i0,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont7
	or	%g0,4,counter

	.align	16
.update8:
	cmp	counter,5
	fzeros	%f0
	ble,a	.cont8
	sethi	%hi(0x3fffffff),%l6

	sub	counter,5,counter
	st	counter,[%fp+tmp_counter]

	stx	%i2,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont8
	or	%g0,5,counter

	.align	16
.update9:
	cmp	counter,5
	fzeros	%f0
	ble,a	.cont9
	sethi	%hi(0x3fffffff),%l6

	sub	counter,5,counter
	st	counter,[%fp+tmp_counter]

	stx	%i2,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont9
	or	%g0,5,counter

	.align	16
.update10:
	cmp	counter,6
	fzeros	%f0
	ble,a	.cont10
	sethi	%hi(0x3fffffff),%l6

	sub	counter,6,counter
	st	counter,[%fp+tmp_counter]

	stx	%l2,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont10
	or	%g0,6,counter

	.align	16
.update11:
	cmp	counter,6
	fzeros	%f0
	ble,a	.cont11
	sethi	%hi(0x3fffffff),%l6

	sub	counter,6,counter
	st	counter,[%fp+tmp_counter]

	stx	%l2,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont11
	or	%g0,6,counter

	.align	16
.update12:
	cmp	counter,7
	fzeros	%f0
	ble,a	.cont12
	sethi	%hi(0x3fffffff),%l6

	sub	counter,7,counter
	st	counter,[%fp+tmp_counter]

	stx	%g5,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont12
	or	%g0,7,counter

	.align	16
.update13:
	cmp	counter,7
	fzeros	%f0
	ble,a	.cont13
	sethi	%hi(0x3fffffff),%l6

	sub	counter,7,counter
	st	counter,[%fp+tmp_counter]

	stx	%g5,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont13
	or	%g0,7,counter

	.align	16
.update14:
	cmp	counter,0
	fzeros	%f0
	ble,a	.cont14
	sethi	%hi(0x3fffffff),%l6

	sub	counter,0,counter
	st	counter,[%fp+tmp_counter]

	stx	%i3,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont14
	or	%g0,0,counter

	.align	16
.update15:
	cmp	counter,0
	fzeros	%f0
	ble,a	.cont15
	sethi	%hi(0x3fffffff),%l6

	sub	counter,0,counter
	st	counter,[%fp+tmp_counter]

	stx	%i3,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont15
	or	%g0,0,counter

	.align	16
.update16:
	cmp	counter,1
	fzeros	%f0
	ble,a	.cont16
	sethi	%hi(0x3fffffff),%l6

	sub	counter,1,counter
	st	counter,[%fp+tmp_counter]

	stx	%l5,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont16
	or	%g0,1,counter

	.align	16
.update17:
	cmp	counter,1
	fzeros	%f0
	ble,a	.cont17
	sethi	%hi(0x3fffffff),%l6

	sub	counter,1,counter
	st	counter,[%fp+tmp_counter]

	stx	%l5,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont17
	or	%g0,1,counter

	.align	16
.update18:
	cmp	counter,2
	fzeros	%f0
	ble,a	.cont18
	sethi	%hi(0x3fffffff),%l6

	sub	counter,2,counter
	st	counter,[%fp+tmp_counter]

	stx	%l4,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont18
	or	%g0,2,counter

	.align	16
.update19:
	cmp	counter,2
	fzeros	%f0
	ble,a	.cont19
	sethi	%hi(0x3fffffff),%l6

	sub	counter,2,counter
	st	counter,[%fp+tmp_counter]

	stx	%l4,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont19
	or	%g0,2,counter

	.align	16
.update20:
	cmp	counter,3
	fzeros	%f0
	ble,a	.cont20
	sethi	%hi(0x3fffffff),%l6

	sub	counter,3,counter
	st	counter,[%fp+tmp_counter]

	stx	%l3,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont20
	or	%g0,3,counter

	.align	16
.update21:
	cmp	counter,3
	fzeros	%f0
	ble,a	.cont21
	sethi	%hi(0x3fffffff),%l6

	sub	counter,3,counter
	st	counter,[%fp+tmp_counter]

	stx	%l3,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont21
	or	%g0,3,counter

	.align	16
.update22:
	cmp	counter,4
	fzeros	%f0
	ble,a	.cont22
	sethi	%hi(0x3fffffff),%l6

	sub	counter,4,counter
	st	counter,[%fp+tmp_counter]

	stx	%i0,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont22
	or	%g0,4,counter

	.align	16
.update23:
	cmp	counter,4
	fzeros	%f0
	ble,a	.cont23
	sethi	%hi(0x3fffffff),%l6

	sub	counter,4,counter
	st	counter,[%fp+tmp_counter]

	stx	%i0,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont23
	or	%g0,4,counter

	.align	16
.update24:
	cmp	counter,5
	fzeros	%f0
	ble,a	.cont24
	sethi	%hi(0x3fffffff),%l6

	sub	counter,5,counter
	st	counter,[%fp+tmp_counter]

	stx	%i2,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont24
	or	%g0,5,counter

	.align	16
.update25:
	cmp	counter,5
	fzeros	%f0
	ble,a	.cont25
	sethi	%hi(0x3fffffff),%l6

	sub	counter,5,counter
	st	counter,[%fp+tmp_counter]

	stx	%i2,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont25
	or	%g0,5,counter

	.align	16
.update26:
	cmp	counter,6
	fzeros	%f0
	ble,a	.cont26
	sethi	%hi(0x3fffffff),%l6

	sub	counter,6,counter
	st	counter,[%fp+tmp_counter]

	stx	%l2,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont26
	or	%g0,6,counter

	.align	16
.update27:
	cmp	counter,6
	fzeros	%f0
	ble,a	.cont27
	sethi	%hi(0x3fffffff),%l6

	sub	counter,6,counter
	st	counter,[%fp+tmp_counter]

	stx	%l2,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont27
	or	%g0,6,counter

	.align	16
.update28:
	cmp	counter,7
	fzeros	%f0
	ble,a	.cont28
	sethi	%hi(0x3fffffff),%l6

	sub	counter,7,counter
	st	counter,[%fp+tmp_counter]

	stx	%g5,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont28
	or	%g0,7,counter

	.align	16
.update29:
	cmp	counter,7
	fzeros	%f0
	ble,a	.cont29
	sethi	%hi(0x3fffffff),%l6

	sub	counter,7,counter
	st	counter,[%fp+tmp_counter]

	stx	%g5,[%fp+tmp_px]
	sethi	%hi(0x3fffffff),%l6
	ba	.cont29
	or	%g0,7,counter

	SET_SIZE(__vatanf)