xref: /illumos-gate/usr/src/lib/libmvec/common/vis/__vatan2.S (revision 3ba944265c4ae1fcf23ef758537c2e4f4feec16e)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vatan2.S"
30
31#include "libm.h"
32
33	RO_DATA
34	.align	64
35constants:
36	.word	0x3ff921fb,0x54442d18	! pio2
37	.word	0x3c91a626,0x33145c07	! pio2_lo
38	.word	0xbfd55555,0x555554ee	! p1
39	.word	0x3fc99999,0x997a1559	! p2
40	.word	0xbfc24923,0x158dfe02	! p3
41	.word	0x3fbc639d,0x0ed1347b	! p4
42	.word	0xffffffff,0x00000000	! mask
43	.word	0x3fc00000,0x00000000	! twom3
44	.word	0x46d00000,0x00000000	! two110
45	.word	0x3fe921fb,0x54442d18	! pio4
46
47! local storage indices
48
49#define xscl		STACK_BIAS-0x8
50#define yscl		STACK_BIAS-0x10
51#define twom3		STACK_BIAS-0x18
52#define two110		STACK_BIAS-0x20
53#define pio4		STACK_BIAS-0x28
54#define junk		STACK_BIAS-0x30
55! sizeof temp storage - must be a multiple of 16 for V9
56#define tmps		0x30
57
58! register use
59
60! i0  n
61! i1  y
62! i2  stridey
63! i3  x
64! i4  stridex
65! i5  z
66
67! l0  k0
68! l1  k1
69! l2  k2
70! l3  hx
71! l4  pz0
72! l5  pz1
73! l6  pz2
74! l7  stridez
75
76! the following are 64-bit registers in both V8+ and V9
77
78! g1  __vlibm_TBL_atan2
79! g5
80
81! o0  hy
82! o1  0x00004000
83! o2  0x1420
84! o3  0x7fe00000
85! o4  0x03600000
86! o5  0x00100000
87! o7
88
89! f0  y0
90! f2  x0
91! f4  t0
92! f6  ah0
93! f8  al0
94! f10 y1
95! f12 x1
96! f14 t1
97! f16 ah1
98! f18 al1
99! f20 y2
100! f22 x2
101! f24 t2
102! f26 ah2
103! f28 al2
104! f30
105! f32
106! f34
107! f36 sx0
108! f38 sx1
109! f40 sx2
110! f42 sy0
111! f44 sy1
112! f46 sy2
113
114#define mask	%f48
115#define signbit	%f50
116#define pio2	%f52
117#define pio2_lo	%f54
118#define p1	%f56
119#define p2	%f58
120#define p3	%f60
121#define p4	%f62
122
123	ENTRY(__vatan2)
124	save	%sp,-SA(MINFRAME)-tmps,%sp
125	PIC_SETUP(l7)
126	PIC_SET(l7,constants,o0)
127	PIC_SET(l7,__vlibm_TBL_atan2,o1)
128	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
129	mov	%o1, %g1
130#ifdef __sparcv9
131	ldx	[%fp+STACK_BIAS+0xb0],%l7
132#else
133	ld	[%fp+0x5c],%l7
134#endif
135	ldd	[%o0+0x00],pio2		! load/set up constants
136	ldd	[%o0+0x08],pio2_lo
137	ldd	[%o0+0x10],p1
138	ldd	[%o0+0x18],p2
139	ldd	[%o0+0x20],p3
140	ldd	[%o0+0x28],p4
141	ldd	[%o0+0x30],mask
142	fzero	signbit
143	fnegd	signbit,signbit
144	sethi	%hi(0x00004000),%o1
145	sethi	%hi(0x1420),%o2
146	or	%o2,%lo(0x1420),%o2
147	sethi	%hi(0x7fe00000),%o3
148	sethi	%hi(0x03600000),%o4
149	sethi	%hi(0x00100000),%o5
150	ldd	[%o0+0x38],%f0		! copy rarely used constants to stack
151	ldd	[%o0+0x40],%f2
152	ldd	[%o0+0x48],%f4
153	std	%f0,[%fp+twom3]
154	std	%f2,[%fp+two110]
155	std	%f4,[%fp+pio4]
156	sll	%i2,3,%i2		! scale strides
157	sll	%i4,3,%i4
158	sll	%l7,3,%l7
159	fzero	%f20			! loop prologue
160	fzero	%f22
161	fzero	%f24
162	fzero	%f26
163	fzero	%f46
164	add	%fp,junk,%l6
165	ld	[%i1],%f0		! *y
166	ld	[%i1+4],%f1
167	ld	[%i3],%f8		! *x
168	ld	[%i3+4],%f9
169	ld	[%i1],%o0		! hy
170	ba	.loop
171	ld	[%i3],%l3		! hx
172
173! 16-byte aligned
174	.align	16
175.loop:
176	fabsd	%f0,%f4
177	mov	%i5,%l4
178	add	%i1,%i2,%i1		! y += stridey
179
180	fabsd	%f8,%f2
181	add	%i3,%i4,%i3		! x += stridex
182	add	%i5,%l7,%i5		! z += stridez
183
184	fand	%f0,signbit,%f42
185	sethi	%hi(0x80000000),%g5
186
187	fand	%f8,signbit,%f36
188	andn	%o0,%g5,%o0
189	andn	%l3,%g5,%l3
190
191	fcmpd	%fcc0,%f4,%f2
192
193	fmovd	%f4,%f0
194
195	fmovdg	%fcc0,%f2,%f0		! swap if |y| > |x|
196
197	fmovdg	%fcc0,%f4,%f2
198	mov	%o0,%o7
199	 lda	[%i1]%asi,%f10		! preload next argument
200
201	  faddd	%f26,%f20,%f26
202	 lda	[%i1+4]%asi,%f11
203
204	  faddd	%f22,%f24,%f22
205	movg	%fcc0,%l3,%o0
206
207	movg	%fcc0,%o7,%l3
208
209	fbu,pn	%fcc0,.nan0		! if x or y is nan
210! delay slot
211	 lda	[%i3]%asi,%f18
212
213	sub	%l3,%o0,%l0		! hx - hy
214	sub	%l3,%o3,%g5
215	 fabsd	%f10,%f14
216	 lda	[%i3+4]%asi,%f19
217
218	sub	%l0,%o4,%o7
219	  faddd	%f22,%f26,%f26
220
221	andcc	%g5,%o7,%g0
222	bge,pn	%icc,.big0		! if |x| or |x/y| is big
223! delay slot
224	nop
225
226	 fabsd	%f18,%f12
227	cmp	%o0,%o5
228	bl,pn	%icc,.small0		! if |y| is small
229! delay slot
230	 lda	[%i1]%asi,%o0
231
232	add	%l0,%o1,%l0		! k
233	addcc	%i0,-1,%i0
234	ble,pn	%icc,.last1
235! delay slot
236	 lda	[%i3]%asi,%l3
237
238.cont1:
239	srl	%l0,10,%l0
240	 mov	%i5,%l5
241	  fxor	%f26,%f46,%f26
242	  st	%f26,[%l6]
243
244	 fand	%f10,signbit,%f44
245	andn	%l0,0x1f,%l0
246	 add	%i1,%i2,%i1
247	  st	%f27,[%l6+4]
248
249	 fand	%f18,signbit,%f38
250	cmp	%l0,%o2
251	movg	%icc,%o2,%l0
252
253	 fcmpd	%fcc1,%f14,%f12
254	 add	%i3,%i4,%i3
255	 add	%i5,%l7,%i5
256
257	 fmovd	%f14,%f10
258	add	%l0,%g1,%l0
259	 sethi	%hi(0x80000000),%g5
260
261	ldd	[%l0+0x10],%f4
262	fand	%f2,mask,%f6
263	 andn	%o0,%g5,%o0
264	 andn	%l3,%g5,%l3
265
266	 fmovdg	%fcc1,%f12,%f10
267
268	 fmovdg	%fcc1,%f14,%f12
269	 mov	%o0,%o7
270	  lda	[%i1]%asi,%f20
271
272	fsubd	%f2,%f6,%f30
273	fmuld	%f6,%f4,%f6
274	 movg	%fcc1,%l3,%o0
275
276	fmuld	%f0,%f4,%f8
277	 movg	%fcc1,%o7,%l3
278
279	  lda	[%i1+4]%asi,%f21
280	 fbu,pn	%fcc1,.nan1
281! delay slot
282	 nop
283
284	  lda	[%i3]%asi,%f28
285	 sub	%l3,%o0,%l1
286	 sub	%l3,%o3,%g5
287
288	  lda	[%i3+4]%asi,%f29
289	fmuld	%f30,%f4,%f30
290	fsubd	%f0,%f6,%f4
291	 sub	%l1,%o4,%o7
292
293	  fabsd	%f20,%f24
294	 andcc	%g5,%o7,%g0
295	 bge,pn	%icc,.big1
296! delay slot
297	 nop
298
299	faddd	%f2,%f8,%f8
300	 cmp	%o0,%o5
301	 bl,pn	%icc,.small1
302! delay slot
303	  lda	[%i1]%asi,%o0
304
305	  fabsd	%f28,%f22
306	 add	%l1,%o1,%l1
307	 addcc	%i0,-1,%i0
308	  lda	[%i3]%asi,%l3
309
310	fsubd	%f4,%f30,%f4
311	 srl	%l1,10,%l1
312	 ble,pn	%icc,.last2
313! delay slot
314	  mov	%i5,%l6
315
316.cont2:
317	  fand	%f20,signbit,%f46
318	 andn	%l1,0x1f,%l1
319	  add	%i1,%i2,%i1
320
321	  fand	%f28,signbit,%f40
322	 cmp	%l1,%o2
323	 movg	%icc,%o2,%l1
324
325	  fcmpd	%fcc2,%f24,%f22
326	  add	%i3,%i4,%i3
327	  add	%i5,%l7,%i5
328
329	fdivd	%f4,%f8,%f4
330	  fmovd	%f24,%f20
331	 add	%l1,%g1,%l1
332	  sethi	%hi(0x80000000),%g5
333
334	 ldd	[%l1+0x10],%f14
335	 fand	%f12,mask,%f16
336	  andn	%o0,%g5,%o0
337	  andn	%l3,%g5,%l3
338
339	  fmovdg %fcc2,%f22,%f20
340
341	  fmovdg %fcc2,%f24,%f22
342	  mov	%o0,%o7
343
344	 fsubd	%f12,%f16,%f32
345	 fmuld	%f16,%f14,%f16
346	  movg	%fcc2,%l3,%o0
347
348	fnegd	pio2_lo,%f8		! al
349	 fmuld	%f10,%f14,%f18
350	  movg	%fcc2,%o7,%l3
351
352	fzero	%f0
353	  fbu,pn %fcc2,.nan2
354! delay slot
355	  nop
356
357	fmovdg	%fcc0,signbit,%f0
358	  sub	%l3,%o0,%l2
359	  sub	%l3,%o3,%g5
360
361	 fmuld	%f32,%f14,%f32
362	 fsubd	%f10,%f16,%f14
363	  sub	%l2,%o4,%o7
364
365	 faddd	%f12,%f18,%f18
366	  andcc	%g5,%o7,%g0
367	  bge,pn %icc,.big2
368! delay slot
369	  nop
370
371	fxor	%f36,%f0,%f36
372	  cmp	%o0,%o5
373	  bl,pn	%icc,.small2
374! delay slot
375	  nop
376
377.cont3:
378	fmovdg	%fcc0,signbit,%f8
379	  add	%l2,%o1,%l2
380
381	 fsubd	%f14,%f32,%f14
382	  srl	%l2,10,%l2
383
384	fxor	%f36,pio2_lo,%f30	! al
385	  andn	%l2,0x1f,%l2
386
387	fxor	%f36,pio2,%f0		! ah
388	  cmp	%l2,%o2
389	  movg	%icc,%o2,%l2
390
391	fxor	%f42,%f36,%f42		! sy
392
393	faddd	%f8,%f30,%f8
394	ldd	[%l0+0x8],%f30
395	  add	%l2,%g1,%l2
396
397	 fdivd	%f14,%f18,%f14
398	 fzero	%f10
399
400	  ldd	[%l2+0x10],%f24
401	  fand	%f22,mask,%f26
402
403	 fmovdg	%fcc1,signbit,%f10
404
405	fmuld	%f4,%f4,%f36
406	faddd	%f8,%f30,%f8
407
408	  fsubd	%f22,%f26,%f34
409	  fmuld	%f26,%f24,%f26
410
411	  fmuld	%f20,%f24,%f28
412	 fxor	%f38,%f10,%f38
413
414	fmuld	%f4,p3,%f6
415	 fnegd	pio2_lo,%f18
416
417	fmuld	%f36,p2,%f2
418	 fmovdg	%fcc1,signbit,%f18
419
420	fmuld	%f36,%f4,%f36
421	 fxor	%f38,pio2,%f10
422
423	  fmuld	%f34,%f24,%f34
424	  fsubd	%f20,%f26,%f24
425
426	  faddd	%f22,%f28,%f28
427
428	faddd	%f2,p1,%f2
429
430	fmuld	%f36,p4,%f30
431	 fxor	%f38,pio2_lo,%f32
432
433	  fsubd	%f24,%f34,%f24
434
435	 fxor	%f44,%f38,%f44
436
437	fmuld	%f36,%f2,%f2
438	 faddd	%f18,%f32,%f18
439	 ldd	[%l1+0x8],%f32
440
441	fmuld	%f36,%f36,%f36
442	faddd	%f6,%f30,%f30
443
444	  fdivd	%f24,%f28,%f24
445	  fzero	%f20
446
447	  fmovdg %fcc2,signbit,%f20
448
449	faddd	%f2,%f8,%f2
450
451	 fmuld	%f14,%f14,%f38
452	 faddd	%f18,%f32,%f18
453
454	fmuld	%f36,%f30,%f36
455	  fxor	%f40,%f20,%f40
456
457	fnegd	pio2,%f6		! ah
458	 fmuld	%f14,p3,%f16
459
460	fmovdg	%fcc0,signbit,%f6
461
462	 fmuld	%f38,p2,%f12
463	  fnegd	pio2_lo,%f28
464
465	faddd	%f2,%f36,%f2
466	 fmuld	%f38,%f14,%f38
467
468	faddd	%f6,%f0,%f6
469	ldd	[%l0],%f0
470
471	  fmovdg %fcc2,signbit,%f28
472
473	 faddd	%f12,p1,%f12
474
475	 fmuld	%f38,p4,%f32
476	  fxor	%f40,pio2_lo,%f34
477
478	  fxor	%f40,pio2,%f20
479
480	faddd	%f2,%f4,%f2
481
482	 fmuld	%f38,%f12,%f12
483	  fxor	%f46,%f40,%f46
484
485	 fmuld	%f38,%f38,%f38
486	 faddd	%f16,%f32,%f32
487
488	  faddd	%f28,%f34,%f28
489	  ldd	[%l2+0x8],%f34
490
491	faddd	%f6,%f0,%f6
492	lda	[%i1]%asi,%f0		! preload next argument
493
494	 faddd	%f12,%f18,%f12
495	lda	[%i1+4]%asi,%f1
496
497	  fmuld	%f24,%f24,%f40
498	lda	[%i3]%asi,%f8
499
500	 fmuld	%f38,%f32,%f38
501	  faddd	%f28,%f34,%f28
502	lda	[%i3+4]%asi,%f9
503
504	 fnegd	pio2,%f16
505	  fmuld	%f24,p3,%f26
506	lda	[%i1]%asi,%o0
507
508	 fmovdg	%fcc1,signbit,%f16
509	lda	[%i3]%asi,%l3
510
511	  fmuld	%f40,p2,%f22
512
513	 faddd	%f12,%f38,%f12
514	  fmuld	%f40,%f24,%f40
515
516	faddd	%f2,%f6,%f6
517
518	 faddd	%f16,%f10,%f16
519	 ldd	[%l1],%f10
520
521	  faddd	%f22,p1,%f22
522
523	 faddd	%f12,%f14,%f12
524	  fmuld	%f40,p4,%f34
525
526	fxor	%f6,%f42,%f6
527	st	%f6,[%l4]
528
529	 faddd	%f16,%f10,%f16
530	st	%f7,[%l4+4]
531
532	  fmuld	%f40,%f22,%f22
533
534	  fmuld	%f40,%f40,%f40
535	  faddd	%f26,%f34,%f34
536
537	  fnegd	pio2,%f26
538
539	 faddd	%f12,%f16,%f16
540
541	  faddd	%f22,%f28,%f22
542
543	  fmuld	%f40,%f34,%f40
544	  fmovdg %fcc2,signbit,%f26
545
546! -
547
548	 fxor	%f16,%f44,%f16
549	 st	%f16,[%l5]
550
551	  faddd	%f26,%f20,%f26
552	 st	%f17,[%l5+4]
553	addcc	%i0,-1,%i0
554
555	  faddd	%f22,%f40,%f22
556	bg,pt	%icc,.loop
557! delay slot
558	  ldd	[%l2],%f20
559
560
561	  faddd	%f26,%f20,%f26
562	  faddd	%f22,%f24,%f22
563	  faddd	%f22,%f26,%f26
564.done_from_special0:
565	  fxor	%f26,%f46,%f26
566	  st	%f26,[%l6]
567	  st	%f27,[%l6+4]
568	  ret
569	  restore
570
571
572
573	.align	16
574.last1:
575	fmovd	pio2,%f10		! set up dummy arguments
576	fmovd	pio2,%f18
577	fabsd	%f10,%f14
578	fabsd	%f18,%f12
579	sethi	%hi(0x3ff921fb),%o0
580	or	%o0,%lo(0x3ff921fb),%o0
581	mov	%o0,%l3
582	ba,pt	%icc,.cont1
583! delay slot
584	add	%fp,junk,%i5
585
586
587
588	.align	16
589.last2:
590	fmovd	pio2,%f20
591	fmovd	pio2,%f28
592	fabsd	%f20,%f24
593	fabsd	%f28,%f22
594	sethi	%hi(0x3ff921fb),%o0
595	or	%o0,%lo(0x3ff921fb),%o0
596	mov	%o0,%l3
597	ba,pt	%icc,.cont2
598! delay slot
599	add	%fp,junk,%l6
600
601
602
603	.align	16
604.nan0:
605	  faddd	%f22,%f26,%f26
606.nan0_from_special0:
607	 fabsd	%f10,%f14
608	 lda	[%i3+4]%asi,%f19
609	 fabsd	%f18,%f12
610	 lda	[%i1]%asi,%o0
611	 lda	[%i3]%asi,%l3
612	ba,pt	%icc,.special0
613! delay slot
614	fmuld	%f0,%f2,%f6
615
616
617	.align	16
618.big0:
619	 fabsd	%f18,%f12
620	 lda	[%i1]%asi,%o0
621	 lda	[%i3]%asi,%l3
622	cmp	%g5,%o5
623	bge,pn	%icc,.return_ah0	! if hx >= 0x7ff00000
624! delay slot
625	nop
626	cmp	%l0,%o4
627	bge,pn	%icc,1f			! if hx - hy >= 0x03600000
628! delay slot
629	nop
630	ldd	[%fp+twom3],%f6
631	fmuld	%f0,%f6,%f0
632	fmuld	%f2,%f6,%f2
633	add	%l0,%o1,%l0
634	addcc	%i0,-1,%i0
635	ble,pn	%icc,.last1
636! delay slot
637	nop
638	ba,pt	%icc,.cont1
639! delay slot
640	nop
6411:
642	fbg,pn	%fcc0,.return_ah0
643! delay slot
644	nop
645	fcmpd	%fcc3,%f8,signbit
646	fbl,pn	%fcc3,.return_ah0
647! delay slot
648	nop
649	ba,pt	%icc,.special0
650! delay slot
651	fdivd	%f0,%f2,%f6
652
653
654	.align	16
655.small0:
656	 lda	[%i3]%asi,%l3
657	fcmpd	%fcc3,%f0,signbit
658	fbe,pt	%fcc3,.return_ah0
659! delay slot
660	nop
661	ldd	[%fp+two110],%f6
662	fmuld	%f0,%f6,%f0
663	fmuld	%f2,%f6,%f2
664	st	%f0,[%fp+yscl]
665	ld	[%fp+yscl],%o7
666	st	%f2,[%fp+xscl]
667	ld	[%fp+xscl],%l0
668	sub	%l0,%o7,%l0
669	add	%l0,%o1,%l0
670	addcc	%i0,-1,%i0
671	ble,pn	%icc,.last1
672! delay slot
673	nop
674	ba,pt	%icc,.cont1
675! delay slot
676	nop
677
678
679	.align	16
680.return_ah0:
681	fzero	%f0
682	fmovdg	%fcc0,signbit,%f0
683	fxor	%f36,%f0,%f36
684	fxor	%f36,pio2,%f0
685	fxor	%f42,%f36,%f42
686	fnegd	pio2,%f6
687	fmovdg	%fcc0,signbit,%f6
688	faddd	%f6,%f0,%f6
689	sub	%g5,%l0,%o7
690	cmp	%o7,%o5
691	bl,pt	%icc,1f			! if hy < 0x7ff00000
692! delay slot
693	nop
694	ldd	[%fp+pio4],%f0
695	faddd	%f6,%f0,%f6
6961:
697	fdtoi	%f6,%f4
698.special0:
699	fxor	%f6,%f42,%f6
700	st	%f6,[%l4]
701	st	%f7,[%l4+4]
702	addcc	%i0,-1,%i0
703	ble,pn	%icc,.done_from_special0
704! delay slot
705	nop
706	fmovd	%f10,%f0
707	fmovd	%f18,%f8
708	fmovd	%f14,%f4
709	fmovd	%f12,%f2
710	mov	%i5,%l4
711	add	%i1,%i2,%i1
712	add	%i3,%i4,%i3
713	add	%i5,%l7,%i5
714	fand	%f0,signbit,%f42
715	sethi	%hi(0x80000000),%g5
716	fand	%f8,signbit,%f36
717	andn	%o0,%g5,%o0
718	andn	%l3,%g5,%l3
719	fcmpd	%fcc0,%f4,%f2
720	fmovd	%f4,%f0
721	fmovdg	%fcc0,%f2,%f0
722	fmovdg	%fcc0,%f4,%f2
723	mov	%o0,%o7
724	movg	%fcc0,%l3,%o0
725	movg	%fcc0,%o7,%l3
726	 lda	[%i1]%asi,%f10
727	 lda	[%i1+4]%asi,%f11
728	fbu,pn	%fcc0,.nan0_from_special0
729! delay slot
730	 lda	[%i3]%asi,%f18
731	 fabsd	%f10,%f14
732	 lda	[%i3+4]%asi,%f19
733	sub	%l3,%o0,%l0
734	sub	%l3,%o3,%g5
735	sub	%l0,%o4,%o7
736	andcc	%g5,%o7,%g0
737	bge,pn	%icc,.big0
738! delay slot
739	nop
740	 fabsd	%f18,%f12
741	cmp	%o0,%o5
742	bl,pn	%icc,.small0
743! delay slot
744	 lda	[%i1]%asi,%o0
745	add	%l0,%o1,%l0
746	addcc	%i0,-1,%i0
747	ble,pn	%icc,.last1
748! delay slot
749	 lda	[%i3]%asi,%l3
750	ba,pt	%icc,.cont1
751! delay slot
752	nop
753
754
755
756	.align	16
757.nan1:
758	fmuld	%f30,%f4,%f30
759	fsubd	%f0,%f6,%f4
760	faddd	%f2,%f8,%f8
761	fsubd	%f4,%f30,%f4
762.nan1_from_special1:
763	 lda	[%i3]%asi,%f28
764	 lda	[%i3+4]%asi,%f29
765	 fabsd	%f20,%f24
766	 lda	[%i1]%asi,%o0
767	 fabsd	%f28,%f22
768	 lda	[%i3]%asi,%l3
769	 mov	%i5,%l6
770	ba,pt	%icc,.special1
771! delay slot
772	fmuld	%f10,%f12,%f16
773
774
775	.align	16
776.big1:
777	faddd	%f2,%f8,%f8
778	fsubd	%f4,%f30,%f4
779.big1_from_special1:
780	 lda	[%i1]%asi,%o0
781	 fabsd	%f28,%f22
782	 lda	[%i3]%asi,%l3
783	 mov	%i5,%l6
784	cmp	%g5,%o5
785	bge,pn	%icc,.return_ah1
786! delay slot
787	nop
788	cmp	%l1,%o4
789	bge,pn	%icc,1f
790! delay slot
791	nop
792	ldd	[%fp+twom3],%f16
793	fmuld	%f10,%f16,%f10
794	fmuld	%f12,%f16,%f12
795	add	%l1,%o1,%l1
796	srl	%l1,10,%l1
797	addcc	%i0,-1,%i0
798	ble,pn	%icc,.last2
799! delay slot
800	nop
801	ba,pt	%icc,.cont2
802! delay slot
803	nop
8041:
805	fbg,pn	%fcc1,.return_ah1
806! delay slot
807	nop
808	fcmpd	%fcc3,%f18,signbit
809	fbl,pn	%fcc3,.return_ah1
810! delay slot
811	nop
812	ba,pt	%icc,.special1
813! delay slot
814	fdivd	%f10,%f12,%f16
815
816
817	.align	16
818.small1:
819	fsubd	%f4,%f30,%f4
820.small1_from_special1:
821	 fabsd	%f28,%f22
822	 lda	[%i3]%asi,%l3
823	 mov	%i5,%l6
824	fcmpd	%fcc3,%f10,signbit
825	fbe,pt	%fcc3,.return_ah1
826! delay slot
827	nop
828	ldd	[%fp+two110],%f16
829	fmuld	%f10,%f16,%f10
830	fmuld	%f12,%f16,%f12
831	st	%f10,[%fp+yscl]
832	ld	[%fp+yscl],%o7
833	st	%f12,[%fp+xscl]
834	ld	[%fp+xscl],%l1
835	sub	%l1,%o7,%l1
836	add	%l1,%o1,%l1
837	srl	%l1,10,%l1
838	addcc	%i0,-1,%i0
839	ble,pn	%icc,.last2
840! delay slot
841	nop
842	ba,pt	%icc,.cont2
843! delay slot
844	nop
845
846
847	.align	16
848.return_ah1:
849	fzero	%f10
850	fmovdg	%fcc1,signbit,%f10
851	fxor	%f38,%f10,%f38
852	fxor	%f38,pio2,%f10
853	fxor	%f44,%f38,%f44
854	fnegd	pio2,%f16
855	fmovdg	%fcc1,signbit,%f16
856	faddd	%f16,%f10,%f16
857	sub	%g5,%l1,%o7
858	cmp	%o7,%o5
859	bl,pt	%icc,1f
860! delay slot
861	nop
862	ldd	[%fp+pio4],%f10
863	faddd	%f16,%f10,%f16
8641:
865	fdtoi	%f16,%f14
866.special1:
867	fxor	%f16,%f44,%f16
868	st	%f16,[%l5]
869	st	%f17,[%l5+4]
870	addcc	%i0,-1,%i0
871	bg,pn	%icc,1f
872! delay slot
873	nop
874	fmovd	pio2,%f20		! set up dummy argument
875	fmovd	pio2,%f28
876	fabsd	%f20,%f24
877	fabsd	%f28,%f22
878	sethi	%hi(0x3ff921fb),%o0
879	or	%o0,%lo(0x3ff921fb),%o0
880	mov	%o0,%l3
881	add	%fp,junk,%i5
8821:
883	fmovd	%f20,%f10
884	fmovd	%f28,%f18
885	fmovd	%f24,%f14
886	fmovd	%f22,%f12
887	mov	%i5,%l5
888	add	%i1,%i2,%i1
889	add	%i3,%i4,%i3
890	add	%i5,%l7,%i5
891	fand	%f10,signbit,%f44
892	sethi	%hi(0x80000000),%g5
893	fand	%f18,signbit,%f38
894	andn	%o0,%g5,%o0
895	andn	%l3,%g5,%l3
896	fcmpd	%fcc1,%f14,%f12
897	fmovd	%f14,%f10
898	fmovdg	%fcc1,%f12,%f10
899	fmovdg	%fcc1,%f14,%f12
900	mov	%o0,%o7
901	movg	%fcc1,%l3,%o0
902	movg	%fcc1,%o7,%l3
903	 lda	[%i1]%asi,%f20
904	 lda	[%i1+4]%asi,%f21
905	fbu,pn	%fcc1,.nan1_from_special1
906! delay slot
907	nop
908	 lda	[%i3]%asi,%f28
909	 lda	[%i3+4]%asi,%f29
910	 fabsd	%f20,%f24
911	sub	%l3,%o0,%l1
912	sub	%l3,%o3,%g5
913	sub	%l1,%o4,%o7
914	andcc	%g5,%o7,%g0
915	bge,pn	%icc,.big1_from_special1
916! delay slot
917	nop
918	cmp	%o0,%o5
919	bl,pn	%icc,.small1_from_special1
920! delay slot
921	 lda	[%i1]%asi,%o0
922	 fabsd	%f28,%f22
923	 lda	[%i3]%asi,%l3
924	add	%l1,%o1,%l1
925	srl	%l1,10,%l1
926	addcc	%i0,-1,%i0
927	ble,pn	%icc,.last2
928! delay slot
929	 mov	%i5,%l6
930	ba,pt	%icc,.cont2
931! delay slot
932	nop
933
934
935
936	.align	16
937.nan2:
938	fmovdg	%fcc0,signbit,%f0
939	 fmuld	%f32,%f14,%f32
940	 fsubd	%f10,%f16,%f14
941	 faddd	%f12,%f18,%f18
942	fxor	%f36,%f0,%f36
943.nan2_from_special2:
944	ba,pt	%icc,.special2
945! delay slot
946	fmuld	%f20,%f22,%f26
947
948
949	.align	16
950.big2:
951	fxor	%f36,%f0,%f36
952.big2_from_special2:
953	cmp	%g5,%o5
954	bge,pn	%icc,.return_ah2
955! delay slot
956	nop
957	cmp	%l2,%o4
958	bge,pn	%icc,1f
959! delay slot
960	nop
961	ldd	[%fp+twom3],%f26
962	fmuld	%f20,%f26,%f20
963	fmuld	%f22,%f26,%f22
964	ba,pt	%icc,.cont3
965! delay slot
966	nop
9671:
968	fbg,pn	%fcc2,.return_ah2
969! delay slot
970	nop
971	fcmpd	%fcc3,%f28,signbit
972	fbl,pn	%fcc3,.return_ah2
973! delay slot
974	nop
975	ba,pt	%icc,.special2
976! delay slot
977	fdivd	%f20,%f22,%f26
978
979
980	.align	16
981.small2:
982	fcmpd	%fcc3,%f20,signbit
983	fbe,pt	%fcc3,.return_ah2
984! delay slot
985	nop
986	ldd	[%fp+two110],%f26
987	fmuld	%f20,%f26,%f20
988	fmuld	%f22,%f26,%f22
989	st	%f20,[%fp+yscl]
990	ld	[%fp+yscl],%o7
991	st	%f22,[%fp+xscl]
992	ld	[%fp+xscl],%l2
993	sub	%l2,%o7,%l2
994	ba,pt	%icc,.cont3
995! delay slot
996	nop
997
998
999	.align	16
1000.return_ah2:
1001	fzero	%f20
1002	fmovdg	%fcc2,signbit,%f20
1003	fxor	%f40,%f20,%f40
1004	fxor	%f40,pio2,%f20
1005	fxor	%f46,%f40,%f46
1006	fnegd	pio2,%f26
1007	fmovdg	%fcc2,signbit,%f26
1008	faddd	%f26,%f20,%f26
1009	sub	%g5,%l2,%o7
1010	cmp	%o7,%o5
1011	bl,pt	%icc,1f
1012! delay slot
1013	nop
1014	ldd	[%fp+pio4],%f20
1015	faddd	%f26,%f20,%f26
10161:
1017	fdtoi	%f26,%f24
1018.special2:
1019	fxor	%f26,%f46,%f26
1020	st	%f26,[%l6]
1021	st	%f27,[%l6+4]
1022	addcc	%i0,-1,%i0
1023	bg,pn	%icc,1f
1024! delay slot
1025	nop
1026	fmovd	pio2,%f20		! set up dummy argument
1027	fmovd	pio2,%f22
1028	fzero	%f40
1029	fzero	%f46
1030	mov	0,%l2
1031	ba,pt	%icc,.cont3
1032! delay slot
1033	add	%fp,junk,%l6
10341:
1035	lda	[%i1]%asi,%f20
1036	lda	[%i1+4]%asi,%f21
1037	lda	[%i3]%asi,%f28
1038	lda	[%i3+4]%asi,%f29
1039	fabsd	%f20,%f24
1040	lda	[%i1]%asi,%o0
1041	fabsd	%f28,%f22
1042	lda	[%i3]%asi,%l3
1043	mov	%i5,%l6
1044	fand	%f20,signbit,%f46
1045	add	%i1,%i2,%i1
1046	fand	%f28,signbit,%f40
1047	fcmpd	%fcc2,%f24,%f22
1048	add	%i3,%i4,%i3
1049	add	%i5,%l7,%i5
1050	fmovd	%f24,%f20
1051	sethi	%hi(0x80000000),%g5
1052	andn	%o0,%g5,%o0
1053	andn	%l3,%g5,%l3
1054	fmovdg	%fcc2,%f22,%f20
1055	fmovdg	%fcc2,%f24,%f22
1056	mov	%o0,%o7
1057	movg	%fcc2,%l3,%o0
1058	movg	%fcc2,%o7,%l3
1059	fbu,pn	%fcc2,.nan2_from_special2
1060! delay slot
1061	nop
1062	sub	%l3,%o0,%l2
1063	sub	%l3,%o3,%g5
1064	sub	%l2,%o4,%o7
1065	andcc	%g5,%o7,%g0
1066	bge,pn	%icc,.big2_from_special2
1067! delay slot
1068	nop
1069	cmp	%o0,%o5
1070	bl,pn	%icc,.small2
1071! delay slot
1072	nop
1073	ba,pt	%icc,.cont3
1074! delay slot
1075	nop
1076
1077	SET_SIZE(__vatan2)
1078
1079