/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

	.file	"__vatan2.S"

#include "libm.h"

	RO_DATA
	.align	64
constants:
	.word	0x3ff921fb,0x54442d18	! pio2
	.word	0x3c91a626,0x33145c07	! pio2_lo
	.word	0xbfd55555,0x555554ee	! p1
	.word	0x3fc99999,0x997a1559	! p2
	.word	0xbfc24923,0x158dfe02	! p3
	.word	0x3fbc639d,0x0ed1347b	! p4
	.word	0xffffffff,0x00000000	! mask
	.word	0x3fc00000,0x00000000	! twom3
	.word	0x46d00000,0x00000000	! two110
	.word	0x3fe921fb,0x54442d18	! pio4

! local storage indices

#define xscl		STACK_BIAS-0x8
#define yscl		STACK_BIAS-0x10
#define twom3		STACK_BIAS-0x18
#define two110		STACK_BIAS-0x20
#define pio4		STACK_BIAS-0x28
#define junk		STACK_BIAS-0x30
! sizeof temp storage - must be a multiple of 16 for V9
#define tmps		0x30

! register use

! i0  n
! i1  y
! i2  stridey
! i3  x
! i4  stridex
! i5  z

! l0  k0
! l1  k1
! l2  k2
! l3  hx
! l4  pz0
! l5  pz1
! l6  pz2
! l7  stridez

! the following are 64-bit registers in both V8+ and V9

! g1  __vlibm_TBL_atan2
! g5  

! o0  hy
! o1  0x00004000
! o2  0x1420
! o3  0x7fe00000
! o4  0x03600000
! o5  0x00100000
! o7  

! f0  y0
! f2  x0
! f4  t0
! f6  ah0
! f8  al0
! f10 y1
! f12 x1
! f14 t1
! f16 ah1
! f18 al1
! f20 y2
! f22 x2
! f24 t2
! f26 ah2
! f28 al2
! f30 
! f32 
! f34 
! f36 sx0
! f38 sx1
! f40 sx2
! f42 sy0
! f44 sy1
! f46 sy2

#define mask	%f48
#define signbit	%f50
#define pio2	%f52
#define pio2_lo	%f54
#define p1	%f56
#define p2	%f58
#define p3	%f60
#define p4	%f62

	ENTRY(__vatan2)
	save	%sp,-SA(MINFRAME)-tmps,%sp
	PIC_SETUP(l7)
	PIC_SET(l7,constants,o0)
	PIC_SET(l7,__vlibm_TBL_atan2,o1)
	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
	mov	%o1, %g1
#ifdef __sparcv9
	ldx	[%fp+STACK_BIAS+0xb0],%l7
#else
	ld	[%fp+0x5c],%l7
#endif
	ldd	[%o0+0x00],pio2		! load/set up constants
	ldd	[%o0+0x08],pio2_lo
	ldd	[%o0+0x10],p1
	ldd	[%o0+0x18],p2
	ldd	[%o0+0x20],p3
	ldd	[%o0+0x28],p4
	ldd	[%o0+0x30],mask
	fzero	signbit
	fnegd	signbit,signbit
	sethi	%hi(0x00004000),%o1
	sethi	%hi(0x1420),%o2
	or	%o2,%lo(0x1420),%o2
	sethi	%hi(0x7fe00000),%o3
	sethi	%hi(0x03600000),%o4
	sethi	%hi(0x00100000),%o5
	ldd	[%o0+0x38],%f0		! copy rarely used constants to stack
	ldd	[%o0+0x40],%f2
	ldd	[%o0+0x48],%f4
	std	%f0,[%fp+twom3]
	std	%f2,[%fp+two110]
	std	%f4,[%fp+pio4]
	sll	%i2,3,%i2		! scale strides
	sll	%i4,3,%i4
	sll	%l7,3,%l7
	fzero	%f20			! loop prologue
	fzero	%f22
	fzero	%f24
	fzero	%f26
	fzero	%f46
	add	%fp,junk,%l6	
	ld	[%i1],%f0		! *y
	ld	[%i1+4],%f1
	ld	[%i3],%f8		! *x
	ld	[%i3+4],%f9
	ld	[%i1],%o0		! hy
	ba	.loop
	ld	[%i3],%l3		! hx

! 16-byte aligned
	.align	16
.loop:
	fabsd	%f0,%f4
	mov	%i5,%l4
	add	%i1,%i2,%i1		! y += stridey

	fabsd	%f8,%f2
	add	%i3,%i4,%i3		! x += stridex
	add	%i5,%l7,%i5		! z += stridez

	fand	%f0,signbit,%f42
	sethi	%hi(0x80000000),%g5

	fand	%f8,signbit,%f36
	andn	%o0,%g5,%o0
	andn	%l3,%g5,%l3

	fcmpd	%fcc0,%f4,%f2

	fmovd	%f4,%f0

	fmovdg	%fcc0,%f2,%f0		! swap if |y| > |x|

	fmovdg	%fcc0,%f4,%f2
	mov	%o0,%o7
	 lda	[%i1]%asi,%f10		! preload next argument

	  faddd	%f26,%f20,%f26
	 lda	[%i1+4]%asi,%f11

	  faddd	%f22,%f24,%f22
	movg	%fcc0,%l3,%o0

	movg	%fcc0,%o7,%l3

	fbu,pn	%fcc0,.nan0		! if x or y is nan
! delay slot
	 lda	[%i3]%asi,%f18

	sub	%l3,%o0,%l0		! hx - hy
	sub	%l3,%o3,%g5
	 fabsd	%f10,%f14
	 lda	[%i3+4]%asi,%f19

	sub	%l0,%o4,%o7
	  faddd	%f22,%f26,%f26

	andcc	%g5,%o7,%g0
	bge,pn	%icc,.big0		! if |x| or |x/y| is big
! delay slot
	nop

	 fabsd	%f18,%f12
	cmp	%o0,%o5
	bl,pn	%icc,.small0		! if |y| is small
! delay slot
	 lda	[%i1]%asi,%o0

	add	%l0,%o1,%l0		! k
	addcc	%i0,-1,%i0
	ble,pn	%icc,.last1
! delay slot
	 lda	[%i3]%asi,%l3

.cont1:
	srl	%l0,10,%l0
	 mov	%i5,%l5
	  fxor	%f26,%f46,%f26
	  st	%f26,[%l6]

	 fand	%f10,signbit,%f44
	andn	%l0,0x1f,%l0
	 add	%i1,%i2,%i1
	  st	%f27,[%l6+4]

	 fand	%f18,signbit,%f38
	cmp	%l0,%o2
	movg	%icc,%o2,%l0

	 fcmpd	%fcc1,%f14,%f12
	 add	%i3,%i4,%i3
	 add	%i5,%l7,%i5

	 fmovd	%f14,%f10
	add	%l0,%g1,%l0
	 sethi	%hi(0x80000000),%g5

	ldd	[%l0+0x10],%f4
	fand	%f2,mask,%f6
	 andn	%o0,%g5,%o0
	 andn	%l3,%g5,%l3

	 fmovdg	%fcc1,%f12,%f10

	 fmovdg	%fcc1,%f14,%f12
	 mov	%o0,%o7
	  lda	[%i1]%asi,%f20

	fsubd	%f2,%f6,%f30
	fmuld	%f6,%f4,%f6
	 movg	%fcc1,%l3,%o0

	fmuld	%f0,%f4,%f8
	 movg	%fcc1,%o7,%l3

	  lda	[%i1+4]%asi,%f21
	 fbu,pn	%fcc1,.nan1
! delay slot
	 nop

	  lda	[%i3]%asi,%f28
	 sub	%l3,%o0,%l1
	 sub	%l3,%o3,%g5

	  lda	[%i3+4]%asi,%f29
	fmuld	%f30,%f4,%f30
	fsubd	%f0,%f6,%f4
	 sub	%l1,%o4,%o7

	  fabsd	%f20,%f24
	 andcc	%g5,%o7,%g0
	 bge,pn	%icc,.big1
! delay slot
	 nop

	faddd	%f2,%f8,%f8
	 cmp	%o0,%o5
	 bl,pn	%icc,.small1
! delay slot
	  lda	[%i1]%asi,%o0

	  fabsd	%f28,%f22
	 add	%l1,%o1,%l1
	 addcc	%i0,-1,%i0
	  lda	[%i3]%asi,%l3

	fsubd	%f4,%f30,%f4
	 srl	%l1,10,%l1
	 ble,pn	%icc,.last2
! delay slot
	  mov	%i5,%l6

.cont2:
	  fand	%f20,signbit,%f46
	 andn	%l1,0x1f,%l1
	  add	%i1,%i2,%i1

	  fand	%f28,signbit,%f40
	 cmp	%l1,%o2
	 movg	%icc,%o2,%l1

	  fcmpd	%fcc2,%f24,%f22
	  add	%i3,%i4,%i3
	  add	%i5,%l7,%i5

	fdivd	%f4,%f8,%f4
	  fmovd	%f24,%f20
	 add	%l1,%g1,%l1
	  sethi	%hi(0x80000000),%g5

	 ldd	[%l1+0x10],%f14
	 fand	%f12,mask,%f16
	  andn	%o0,%g5,%o0
	  andn	%l3,%g5,%l3

	  fmovdg %fcc2,%f22,%f20

	  fmovdg %fcc2,%f24,%f22
	  mov	%o0,%o7

	 fsubd	%f12,%f16,%f32
	 fmuld	%f16,%f14,%f16
	  movg	%fcc2,%l3,%o0

	fnegd	pio2_lo,%f8		! al
	 fmuld	%f10,%f14,%f18
	  movg	%fcc2,%o7,%l3

	fzero	%f0
	  fbu,pn %fcc2,.nan2
! delay slot
	  nop

	fmovdg	%fcc0,signbit,%f0
	  sub	%l3,%o0,%l2
	  sub	%l3,%o3,%g5

	 fmuld	%f32,%f14,%f32
	 fsubd	%f10,%f16,%f14
	  sub	%l2,%o4,%o7

	 faddd	%f12,%f18,%f18
	  andcc	%g5,%o7,%g0
	  bge,pn %icc,.big2
! delay slot
	  nop

	fxor	%f36,%f0,%f36
	  cmp	%o0,%o5
	  bl,pn	%icc,.small2
! delay slot
	  nop

.cont3:
	fmovdg	%fcc0,signbit,%f8
	  add	%l2,%o1,%l2

	 fsubd	%f14,%f32,%f14
	  srl	%l2,10,%l2

	fxor	%f36,pio2_lo,%f30	! al
	  andn	%l2,0x1f,%l2

	fxor	%f36,pio2,%f0		! ah
	  cmp	%l2,%o2
	  movg	%icc,%o2,%l2

	fxor	%f42,%f36,%f42		! sy

	faddd	%f8,%f30,%f8
	ldd	[%l0+0x8],%f30
	  add	%l2,%g1,%l2

	 fdivd	%f14,%f18,%f14
	 fzero	%f10

	  ldd	[%l2+0x10],%f24
	  fand	%f22,mask,%f26

	 fmovdg	%fcc1,signbit,%f10

	fmuld	%f4,%f4,%f36
	faddd	%f8,%f30,%f8

	  fsubd	%f22,%f26,%f34
	  fmuld	%f26,%f24,%f26

	  fmuld	%f20,%f24,%f28
	 fxor	%f38,%f10,%f38

	fmuld	%f4,p3,%f6
	 fnegd	pio2_lo,%f18

	fmuld	%f36,p2,%f2
	 fmovdg	%fcc1,signbit,%f18

	fmuld	%f36,%f4,%f36
	 fxor	%f38,pio2,%f10

	  fmuld	%f34,%f24,%f34
	  fsubd	%f20,%f26,%f24

	  faddd	%f22,%f28,%f28

	faddd	%f2,p1,%f2

	fmuld	%f36,p4,%f30
	 fxor	%f38,pio2_lo,%f32

	  fsubd	%f24,%f34,%f24

	 fxor	%f44,%f38,%f44

	fmuld	%f36,%f2,%f2
	 faddd	%f18,%f32,%f18
	 ldd	[%l1+0x8],%f32

	fmuld	%f36,%f36,%f36
	faddd	%f6,%f30,%f30

	  fdivd	%f24,%f28,%f24
	  fzero	%f20

	  fmovdg %fcc2,signbit,%f20

	faddd	%f2,%f8,%f2

	 fmuld	%f14,%f14,%f38
	 faddd	%f18,%f32,%f18

	fmuld	%f36,%f30,%f36
	  fxor	%f40,%f20,%f40

	fnegd	pio2,%f6		! ah
	 fmuld	%f14,p3,%f16

	fmovdg	%fcc0,signbit,%f6

	 fmuld	%f38,p2,%f12
	  fnegd	pio2_lo,%f28

	faddd	%f2,%f36,%f2
	 fmuld	%f38,%f14,%f38

	faddd	%f6,%f0,%f6
	ldd	[%l0],%f0

	  fmovdg %fcc2,signbit,%f28

	 faddd	%f12,p1,%f12

	 fmuld	%f38,p4,%f32
	  fxor	%f40,pio2_lo,%f34

	  fxor	%f40,pio2,%f20

	faddd	%f2,%f4,%f2

	 fmuld	%f38,%f12,%f12
	  fxor	%f46,%f40,%f46

	 fmuld	%f38,%f38,%f38
	 faddd	%f16,%f32,%f32

	  faddd	%f28,%f34,%f28
	  ldd	[%l2+0x8],%f34

	faddd	%f6,%f0,%f6
	lda	[%i1]%asi,%f0		! preload next argument

	 faddd	%f12,%f18,%f12
	lda	[%i1+4]%asi,%f1

	  fmuld	%f24,%f24,%f40
	lda	[%i3]%asi,%f8

	 fmuld	%f38,%f32,%f38
	  faddd	%f28,%f34,%f28
	lda	[%i3+4]%asi,%f9

	 fnegd	pio2,%f16
	  fmuld	%f24,p3,%f26
	lda	[%i1]%asi,%o0

	 fmovdg	%fcc1,signbit,%f16
	lda	[%i3]%asi,%l3

	  fmuld	%f40,p2,%f22

	 faddd	%f12,%f38,%f12
	  fmuld	%f40,%f24,%f40

	faddd	%f2,%f6,%f6

	 faddd	%f16,%f10,%f16
	 ldd	[%l1],%f10

	  faddd	%f22,p1,%f22

	 faddd	%f12,%f14,%f12
	  fmuld	%f40,p4,%f34

	fxor	%f6,%f42,%f6
	st	%f6,[%l4]

	 faddd	%f16,%f10,%f16
	st	%f7,[%l4+4]

	  fmuld	%f40,%f22,%f22

	  fmuld	%f40,%f40,%f40
	  faddd	%f26,%f34,%f34

	  fnegd	pio2,%f26

	 faddd	%f12,%f16,%f16

	  faddd	%f22,%f28,%f22

	  fmuld	%f40,%f34,%f40
	  fmovdg %fcc2,signbit,%f26

! -

	 fxor	%f16,%f44,%f16
	 st	%f16,[%l5]

	  faddd	%f26,%f20,%f26
	 st	%f17,[%l5+4]
	addcc	%i0,-1,%i0

	  faddd	%f22,%f40,%f22
	bg,pt	%icc,.loop
! delay slot
	  ldd	[%l2],%f20


	  faddd	%f26,%f20,%f26
	  faddd	%f22,%f24,%f22
	  faddd	%f22,%f26,%f26
.done_from_special0:
	  fxor	%f26,%f46,%f26
	  st	%f26,[%l6]
	  st	%f27,[%l6+4]
	  ret
	  restore



	.align	16
.last1:
	fmovd	pio2,%f10		! set up dummy arguments
	fmovd	pio2,%f18
	fabsd	%f10,%f14
	fabsd	%f18,%f12
	sethi	%hi(0x3ff921fb),%o0
	or	%o0,%lo(0x3ff921fb),%o0
	mov	%o0,%l3
	ba,pt	%icc,.cont1
! delay slot
	add	%fp,junk,%i5



	.align	16
.last2:
	fmovd	pio2,%f20
	fmovd	pio2,%f28
	fabsd	%f20,%f24
	fabsd	%f28,%f22
	sethi	%hi(0x3ff921fb),%o0
	or	%o0,%lo(0x3ff921fb),%o0
	mov	%o0,%l3
	ba,pt	%icc,.cont2
! delay slot
	add	%fp,junk,%l6



	.align	16
.nan0:
	  faddd	%f22,%f26,%f26
.nan0_from_special0:
	 fabsd	%f10,%f14
	 lda	[%i3+4]%asi,%f19
	 fabsd	%f18,%f12
	 lda	[%i1]%asi,%o0
	 lda	[%i3]%asi,%l3
	ba,pt	%icc,.special0
! delay slot
	fmuld	%f0,%f2,%f6


	.align	16
.big0:
	 fabsd	%f18,%f12
	 lda	[%i1]%asi,%o0
	 lda	[%i3]%asi,%l3
	cmp	%g5,%o5	
	bge,pn	%icc,.return_ah0	! if hx >= 0x7ff00000
! delay slot
	nop
	cmp	%l0,%o4
	bge,pn	%icc,1f			! if hx - hy >= 0x03600000
! delay slot
	nop
	ldd	[%fp+twom3],%f6
	fmuld	%f0,%f6,%f0
	fmuld	%f2,%f6,%f2
	add	%l0,%o1,%l0
	addcc	%i0,-1,%i0
	ble,pn	%icc,.last1
! delay slot
	nop
	ba,pt	%icc,.cont1
! delay slot
	nop
1:
	fbg,pn	%fcc0,.return_ah0
! delay slot
	nop
	fcmpd	%fcc3,%f8,signbit
	fbl,pn	%fcc3,.return_ah0
! delay slot
	nop
	ba,pt	%icc,.special0
! delay slot
	fdivd	%f0,%f2,%f6


	.align	16
.small0:
	 lda	[%i3]%asi,%l3
	fcmpd	%fcc3,%f0,signbit
	fbe,pt	%fcc3,.return_ah0
! delay slot
	nop
	ldd	[%fp+two110],%f6
	fmuld	%f0,%f6,%f0
	fmuld	%f2,%f6,%f2
	st	%f0,[%fp+yscl]
	ld	[%fp+yscl],%o7
	st	%f2,[%fp+xscl]
	ld	[%fp+xscl],%l0
	sub	%l0,%o7,%l0
	add	%l0,%o1,%l0
	addcc	%i0,-1,%i0
	ble,pn	%icc,.last1
! delay slot
	nop
	ba,pt	%icc,.cont1
! delay slot
	nop


	.align	16
.return_ah0:
	fzero	%f0
	fmovdg	%fcc0,signbit,%f0
	fxor	%f36,%f0,%f36
	fxor	%f36,pio2,%f0
	fxor	%f42,%f36,%f42
	fnegd	pio2,%f6
	fmovdg	%fcc0,signbit,%f6
	faddd	%f6,%f0,%f6
	sub	%g5,%l0,%o7
	cmp	%o7,%o5
	bl,pt	%icc,1f			! if hy < 0x7ff00000
! delay slot
	nop
	ldd	[%fp+pio4],%f0
	faddd	%f6,%f0,%f6
1:
	fdtoi	%f6,%f4
.special0:
	fxor	%f6,%f42,%f6
	st	%f6,[%l4]
	st	%f7,[%l4+4]
	addcc	%i0,-1,%i0
	ble,pn	%icc,.done_from_special0
! delay slot
	nop
	fmovd	%f10,%f0
	fmovd	%f18,%f8
	fmovd	%f14,%f4
	fmovd	%f12,%f2
	mov	%i5,%l4
	add	%i1,%i2,%i1
	add	%i3,%i4,%i3
	add	%i5,%l7,%i5
	fand	%f0,signbit,%f42
	sethi	%hi(0x80000000),%g5
	fand	%f8,signbit,%f36
	andn	%o0,%g5,%o0
	andn	%l3,%g5,%l3
	fcmpd	%fcc0,%f4,%f2
	fmovd	%f4,%f0
	fmovdg	%fcc0,%f2,%f0
	fmovdg	%fcc0,%f4,%f2
	mov	%o0,%o7
	movg	%fcc0,%l3,%o0
	movg	%fcc0,%o7,%l3
	 lda	[%i1]%asi,%f10
	 lda	[%i1+4]%asi,%f11
	fbu,pn	%fcc0,.nan0_from_special0
! delay slot
	 lda	[%i3]%asi,%f18
	 fabsd	%f10,%f14
	 lda	[%i3+4]%asi,%f19
	sub	%l3,%o0,%l0
	sub	%l3,%o3,%g5
	sub	%l0,%o4,%o7
	andcc	%g5,%o7,%g0
	bge,pn	%icc,.big0
! delay slot
	nop
	 fabsd	%f18,%f12
	cmp	%o0,%o5
	bl,pn	%icc,.small0
! delay slot
	 lda	[%i1]%asi,%o0
	add	%l0,%o1,%l0
	addcc	%i0,-1,%i0
	ble,pn	%icc,.last1
! delay slot
	 lda	[%i3]%asi,%l3
	ba,pt	%icc,.cont1
! delay slot
	nop



	.align	16
.nan1:
	fmuld	%f30,%f4,%f30
	fsubd	%f0,%f6,%f4
	faddd	%f2,%f8,%f8
	fsubd	%f4,%f30,%f4
.nan1_from_special1:
	 lda	[%i3]%asi,%f28
	 lda	[%i3+4]%asi,%f29
	 fabsd	%f20,%f24
	 lda	[%i1]%asi,%o0
	 fabsd	%f28,%f22
	 lda	[%i3]%asi,%l3
	 mov	%i5,%l6
	ba,pt	%icc,.special1
! delay slot
	fmuld	%f10,%f12,%f16


	.align	16
.big1:
	faddd	%f2,%f8,%f8
	fsubd	%f4,%f30,%f4
.big1_from_special1:
	 lda	[%i1]%asi,%o0
	 fabsd	%f28,%f22
	 lda	[%i3]%asi,%l3
	 mov	%i5,%l6
	cmp	%g5,%o5	
	bge,pn	%icc,.return_ah1
! delay slot
	nop
	cmp	%l1,%o4
	bge,pn	%icc,1f
! delay slot
	nop
	ldd	[%fp+twom3],%f16
	fmuld	%f10,%f16,%f10
	fmuld	%f12,%f16,%f12
	add	%l1,%o1,%l1
	srl	%l1,10,%l1
	addcc	%i0,-1,%i0
	ble,pn	%icc,.last2
! delay slot
	nop
	ba,pt	%icc,.cont2
! delay slot
	nop
1:
	fbg,pn	%fcc1,.return_ah1
! delay slot
	nop
	fcmpd	%fcc3,%f18,signbit
	fbl,pn	%fcc3,.return_ah1
! delay slot
	nop
	ba,pt	%icc,.special1
! delay slot
	fdivd	%f10,%f12,%f16


	.align	16
.small1:
	fsubd	%f4,%f30,%f4
.small1_from_special1:
	 fabsd	%f28,%f22
	 lda	[%i3]%asi,%l3
	 mov	%i5,%l6
	fcmpd	%fcc3,%f10,signbit
	fbe,pt	%fcc3,.return_ah1
! delay slot
	nop
	ldd	[%fp+two110],%f16
	fmuld	%f10,%f16,%f10
	fmuld	%f12,%f16,%f12
	st	%f10,[%fp+yscl]
	ld	[%fp+yscl],%o7
	st	%f12,[%fp+xscl]
	ld	[%fp+xscl],%l1
	sub	%l1,%o7,%l1
	add	%l1,%o1,%l1
	srl	%l1,10,%l1
	addcc	%i0,-1,%i0
	ble,pn	%icc,.last2
! delay slot
	nop
	ba,pt	%icc,.cont2
! delay slot
	nop


	.align	16
.return_ah1:
	fzero	%f10
	fmovdg	%fcc1,signbit,%f10
	fxor	%f38,%f10,%f38
	fxor	%f38,pio2,%f10
	fxor	%f44,%f38,%f44
	fnegd	pio2,%f16
	fmovdg	%fcc1,signbit,%f16
	faddd	%f16,%f10,%f16
	sub	%g5,%l1,%o7
	cmp	%o7,%o5
	bl,pt	%icc,1f	
! delay slot
	nop
	ldd	[%fp+pio4],%f10
	faddd	%f16,%f10,%f16
1:
	fdtoi	%f16,%f14
.special1:
	fxor	%f16,%f44,%f16
	st	%f16,[%l5]
	st	%f17,[%l5+4]
	addcc	%i0,-1,%i0
	bg,pn	%icc,1f
! delay slot
	nop
	fmovd	pio2,%f20		! set up dummy argument
	fmovd	pio2,%f28
	fabsd	%f20,%f24
	fabsd	%f28,%f22
	sethi	%hi(0x3ff921fb),%o0
	or	%o0,%lo(0x3ff921fb),%o0
	mov	%o0,%l3
	add	%fp,junk,%i5
1:
	fmovd	%f20,%f10
	fmovd	%f28,%f18
	fmovd	%f24,%f14
	fmovd	%f22,%f12
	mov	%i5,%l5
	add	%i1,%i2,%i1
	add	%i3,%i4,%i3
	add	%i5,%l7,%i5
	fand	%f10,signbit,%f44
	sethi	%hi(0x80000000),%g5
	fand	%f18,signbit,%f38
	andn	%o0,%g5,%o0
	andn	%l3,%g5,%l3
	fcmpd	%fcc1,%f14,%f12
	fmovd	%f14,%f10
	fmovdg	%fcc1,%f12,%f10
	fmovdg	%fcc1,%f14,%f12
	mov	%o0,%o7
	movg	%fcc1,%l3,%o0
	movg	%fcc1,%o7,%l3
	 lda	[%i1]%asi,%f20
	 lda	[%i1+4]%asi,%f21
	fbu,pn	%fcc1,.nan1_from_special1
! delay slot
	nop
	 lda	[%i3]%asi,%f28
	 lda	[%i3+4]%asi,%f29
	 fabsd	%f20,%f24
	sub	%l3,%o0,%l1
	sub	%l3,%o3,%g5
	sub	%l1,%o4,%o7
	andcc	%g5,%o7,%g0
	bge,pn	%icc,.big1_from_special1
! delay slot
	nop
	cmp	%o0,%o5
	bl,pn	%icc,.small1_from_special1
! delay slot
	 lda	[%i1]%asi,%o0
	 fabsd	%f28,%f22
	 lda	[%i3]%asi,%l3
	add	%l1,%o1,%l1
	srl	%l1,10,%l1
	addcc	%i0,-1,%i0
	ble,pn	%icc,.last2
! delay slot
	 mov	%i5,%l6
	ba,pt	%icc,.cont2
! delay slot
	nop



	.align	16
.nan2:
	fmovdg	%fcc0,signbit,%f0
	 fmuld	%f32,%f14,%f32
	 fsubd	%f10,%f16,%f14
	 faddd	%f12,%f18,%f18
	fxor	%f36,%f0,%f36
.nan2_from_special2:
	ba,pt	%icc,.special2
! delay slot
	fmuld	%f20,%f22,%f26


	.align	16
.big2:
	fxor	%f36,%f0,%f36
.big2_from_special2:
	cmp	%g5,%o5	
	bge,pn	%icc,.return_ah2
! delay slot
	nop
	cmp	%l2,%o4
	bge,pn	%icc,1f
! delay slot
	nop
	ldd	[%fp+twom3],%f26
	fmuld	%f20,%f26,%f20
	fmuld	%f22,%f26,%f22
	ba,pt	%icc,.cont3
! delay slot
	nop
1:
	fbg,pn	%fcc2,.return_ah2
! delay slot
	nop
	fcmpd	%fcc3,%f28,signbit
	fbl,pn	%fcc3,.return_ah2
! delay slot
	nop
	ba,pt	%icc,.special2
! delay slot
	fdivd	%f20,%f22,%f26


	.align	16
.small2:
	fcmpd	%fcc3,%f20,signbit
	fbe,pt	%fcc3,.return_ah2
! delay slot
	nop
	ldd	[%fp+two110],%f26
	fmuld	%f20,%f26,%f20
	fmuld	%f22,%f26,%f22
	st	%f20,[%fp+yscl]
	ld	[%fp+yscl],%o7
	st	%f22,[%fp+xscl]
	ld	[%fp+xscl],%l2
	sub	%l2,%o7,%l2
	ba,pt	%icc,.cont3
! delay slot
	nop


	.align	16
.return_ah2:
	fzero	%f20
	fmovdg	%fcc2,signbit,%f20
	fxor	%f40,%f20,%f40
	fxor	%f40,pio2,%f20
	fxor	%f46,%f40,%f46
	fnegd	pio2,%f26
	fmovdg	%fcc2,signbit,%f26
	faddd	%f26,%f20,%f26
	sub	%g5,%l2,%o7
	cmp	%o7,%o5
	bl,pt	%icc,1f	
! delay slot
	nop
	ldd	[%fp+pio4],%f20
	faddd	%f26,%f20,%f26
1:
	fdtoi	%f26,%f24
.special2:
	fxor	%f26,%f46,%f26
	st	%f26,[%l6]
	st	%f27,[%l6+4]
	addcc	%i0,-1,%i0
	bg,pn	%icc,1f
! delay slot
	nop
	fmovd	pio2,%f20		! set up dummy argument
	fmovd	pio2,%f22
	fzero	%f40
	fzero	%f46
	mov	0,%l2
	ba,pt	%icc,.cont3
! delay slot
	add	%fp,junk,%l6
1:
	lda	[%i1]%asi,%f20
	lda	[%i1+4]%asi,%f21
	lda	[%i3]%asi,%f28
	lda	[%i3+4]%asi,%f29
	fabsd	%f20,%f24
	lda	[%i1]%asi,%o0
	fabsd	%f28,%f22
	lda	[%i3]%asi,%l3
	mov	%i5,%l6
	fand	%f20,signbit,%f46
	add	%i1,%i2,%i1
	fand	%f28,signbit,%f40
	fcmpd	%fcc2,%f24,%f22
	add	%i3,%i4,%i3
	add	%i5,%l7,%i5
	fmovd	%f24,%f20
	sethi	%hi(0x80000000),%g5
	andn	%o0,%g5,%o0
	andn	%l3,%g5,%l3
	fmovdg	%fcc2,%f22,%f20
	fmovdg	%fcc2,%f24,%f22
	mov	%o0,%o7
	movg	%fcc2,%l3,%o0
	movg	%fcc2,%o7,%l3
	fbu,pn	%fcc2,.nan2_from_special2
! delay slot
	nop
	sub	%l3,%o0,%l2
	sub	%l3,%o3,%g5
	sub	%l2,%o4,%o7
	andcc	%g5,%o7,%g0
	bge,pn	%icc,.big2_from_special2
! delay slot
	nop
	cmp	%o0,%o5
	bl,pn	%icc,.small2
! delay slot
	nop
	ba,pt	%icc,.cont3
! delay slot
	nop

	SET_SIZE(__vatan2)