xref: /illumos-gate/usr/src/lib/libmvec/common/vis/__vcosf.S (revision 45ede40b2394db7967e59f19288fae9b62efd4aa)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vcosf.S"
30
31#include "libm.h"
32
33	RO_DATA
34	.align	64
35constants:
36	.word	0xbfc55554,0x60000000
37	.word	0x3f811077,0xe0000000
38	.word	0xbf29956b,0x60000000
39	.word	0x3ff00000,0x00000000
40	.word	0xbfe00000,0x00000000
41	.word	0x3fa55554,0xa0000000
42	.word	0xbf56c0c1,0xe0000000
43	.word	0x3ef99e24,0xe0000000
44	.word	0x3fe45f30,0x6dc9c883
45	.word	0x43380000,0x00000000
46	.word	0x3ff921fb,0x54400000
47	.word	0x3dd0b461,0x1a626331
48	.word	0x3f490fdb,0
49	.word	0x49c90fdb,0
50	.word	0x7f800000,0
51	.word	0x80000000,0
52
53#define S0		0x0
54#define S1		0x08
55#define S2		0x10
56#define one		0x18
57#define mhalf		0x20
58#define C0		0x28
59#define C1		0x30
60#define C2		0x38
61#define invpio2		0x40
62#define round		0x48
63#define pio2_1		0x50
64#define pio2_t		0x58
65#define thresh1		0x60
66#define thresh2		0x68
67#define inf		0x70
68#define signbit		0x78
69
70! local storage indices
71
72#define xsave		STACK_BIAS-0x8
73#define ysave		STACK_BIAS-0x10
74#define nsave		STACK_BIAS-0x14
75#define sxsave		STACK_BIAS-0x18
76#define sysave		STACK_BIAS-0x1c
77#define junk		STACK_BIAS-0x20
78#define n3		STACK_BIAS-0x24
79#define n2		STACK_BIAS-0x28
80#define n1		STACK_BIAS-0x2c
81#define n0		STACK_BIAS-0x30
82! sizeof temp storage - must be a multiple of 16 for V9
83#define tmps		0x30
84
85! register use
86
87! i0  n
88! i1  x
89! i2  stridex
90! i3  y
91! i4  stridey
92! i5  biguns
93
94! l0  n0
95! l1  n1
96! l2  n2
97! l3  n3
98! l4
99! l5
100! l6
101! l7
102
103! the following are 64-bit registers in both V8+ and V9
104
105! g1
106! g5
107
108! o0  py0
109! o1  py1
110! o2  py2
111! o3  py3
112! o4
113! o5
114! o7
115
116! f0  x0
117! f2  x1
118! f4  x2
119! f6  x3
120! f8  thresh1 (pi/4)
121! f10 y0
122! f12 y1
123! f14 y2
124! f16 y3
125! f18 thresh2 (2^19 pi)
126! f20
127! f22
128! f24
129! f26
130! f28 signbit
131! f30
132! f32
133! f34
134! f36
135! f38 inf
136! f40 S0
137! f42 S1
138! f44 S2
139! f46 one
140! f48 mhalf
141! f50 C0
142! f52 C1
143! f54 C2
144! f56 invpio2
145! f58 round
146! f60 pio2_1
147! f62 pio2_t
148
149	ENTRY(__vcosf)
150	save	%sp,-SA(MINFRAME)-tmps,%sp
151	PIC_SETUP(l7)
152	PIC_SET(l7,constants,l0)
153	mov	%l0,%g1
154	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
155#ifdef __sparcv9
156	stx	%i1,[%fp+xsave]		! save arguments
157	stx	%i3,[%fp+ysave]
158#else
159	st	%i1,[%fp+xsave]		! save arguments
160	st	%i3,[%fp+ysave]
161#endif
162	st	%i0,[%fp+nsave]
163	st	%i2,[%fp+sxsave]
164	st	%i4,[%fp+sysave]
165	mov	0,%i5			! biguns = 0
166	ldd	[%g1+S0],%f40		! load constants
167	ldd	[%g1+S1],%f42
168	ldd	[%g1+S2],%f44
169	ldd	[%g1+one],%f46
170	ldd	[%g1+mhalf],%f48
171	ldd	[%g1+C0],%f50
172	ldd	[%g1+C1],%f52
173	ldd	[%g1+C2],%f54
174	ldd	[%g1+invpio2],%f56
175	ldd	[%g1+round],%f58
176	ldd	[%g1+pio2_1],%f60
177	ldd	[%g1+pio2_t],%f62
178	ldd	[%g1+thresh1],%f8
179	ldd	[%g1+thresh2],%f18
180	ldd	[%g1+inf],%f38
181	ldd	[%g1+signbit],%f28
182	sll	%i2,2,%i2		! scale strides
183	sll	%i4,2,%i4
184	fzero	%f10			! loop prologue
185	add	%fp,junk,%o0
186	fzero	%f12
187	add	%fp,junk,%o1
188	fzero	%f14
189	add	%fp,junk,%o2
190	fzero	%f16
191	ba	.start
192	add	%fp,junk,%o3
193
194	.align	16
195! 16-byte aligned
196.start:
197	ld	[%i1],%f0		! *x
198	add	%i1,%i2,%i1		! x += stridex
199	addcc	%i0,-1,%i0
200	fdtos	%f10,%f10
201
202	st	%f10,[%o0]
203	mov	%i3,%o0			! py0 = y
204	ble,pn	%icc,.last1
205! delay slot
206	add	%i3,%i4,%i3		! y += stridey
207
208	ld	[%i1],%f2		! *x
209	add	%i1,%i2,%i1		! x += stridex
210	addcc	%i0,-1,%i0
211	fdtos	%f12,%f12
212
213	st	%f12,[%o1]
214	mov	%i3,%o1			! py1 = y
215	ble,pn	%icc,.last2
216! delay slot
217	add	%i3,%i4,%i3		! y += stridey
218
219	ld	[%i1],%f4		! *x
220	add	%i1,%i2,%i1		! x += stridex
221	addcc	%i0,-1,%i0
222	fdtos	%f14,%f14
223
224	st	%f14,[%o2]
225	mov	%i3,%o2			! py2 = y
226	ble,pn	%icc,.last3
227! delay slot
228	add	%i3,%i4,%i3		! y += stridey
229
230	ld	[%i1],%f6		! *x
231	add	%i1,%i2,%i1		! x += stridex
232	nop
233	fdtos	%f16,%f16
234
235	st	%f16,[%o3]
236	mov	%i3,%o3			! py3 = y
237	add	%i3,%i4,%i3		! y += stridey
238.cont:
239	fabsd	%f0,%f30
240
241	fabsd	%f2,%f32
242
243	fabsd	%f4,%f34
244
245	fabsd	%f6,%f36
246	fcmple32 %f30,%f18,%l0
247
248	fcmple32 %f32,%f18,%l1
249
250	fcmple32 %f34,%f18,%l2
251
252	fcmple32 %f36,%f18,%l3
253	nop
254
255! 16-byte aligned
256	andcc	%l0,2,%g0
257	bz,pn	%icc,.range0		! branch if > 2^19 pi
258! delay slot
259	fcmple32 %f30,%f8,%l0
260
261.check1:
262	andcc	%l1,2,%g0
263	bz,pn	%icc,.range1		! branch if > 2^19 pi
264! delay slot
265	fcmple32 %f32,%f8,%l1
266
267.check2:
268	andcc	%l2,2,%g0
269	bz,pn	%icc,.range2		! branch if > 2^19 pi
270! delay slot
271	fcmple32 %f34,%f8,%l2
272
273.check3:
274	andcc	%l3,2,%g0
275	bz,pn	%icc,.range3		! branch if > 2^19 pi
276! delay slot
277	fcmple32 %f36,%f8,%l3
278
279.checkprimary:
280	fsmuld	%f0,%f0,%f30
281	fstod	%f0,%f0
282
283	fsmuld	%f2,%f2,%f32
284	fstod	%f2,%f2
285	and	%l0,%l1,%o4
286
287	fsmuld	%f4,%f4,%f34
288	fstod	%f4,%f4
289
290	fsmuld	%f6,%f6,%f36
291	fstod	%f6,%f6
292	and	%l2,%l3,%o5
293
294	fmuld	%f30,%f54,%f10
295	and	%o4,%o5,%o5
296
297	fmuld	%f32,%f54,%f12
298	andcc	%o5,2,%g0
299	bz,pn	%icc,.medium		! branch if any argument is > pi/4
300! delay slot
301	nop
302
303	fmuld	%f34,%f54,%f14
304
305	fmuld	%f36,%f54,%f16
306
307	fmuld	%f30,%f48,%f20
308	faddd	%f10,%f52,%f10
309
310	fmuld	%f32,%f48,%f22
311	faddd	%f12,%f52,%f12
312
313	fmuld	%f34,%f48,%f24
314	faddd	%f14,%f52,%f14
315
316	fmuld	%f36,%f48,%f26
317	faddd	%f16,%f52,%f16
318
319	fmuld	%f30,%f10,%f10
320	faddd	%f20,%f46,%f20
321
322	fmuld	%f32,%f12,%f12
323	faddd	%f22,%f46,%f22
324
325	fmuld	%f34,%f14,%f14
326	faddd	%f24,%f46,%f24
327
328	fmuld	%f36,%f16,%f16
329	faddd	%f26,%f46,%f26
330
331	fmuld	%f30,%f30,%f30
332	faddd	%f10,%f50,%f10
333
334	fmuld	%f32,%f32,%f32
335	faddd	%f12,%f50,%f12
336
337	fmuld	%f34,%f34,%f34
338	faddd	%f14,%f50,%f14
339
340	fmuld	%f36,%f36,%f36
341	faddd	%f16,%f50,%f16
342
343	fmuld	%f30,%f10,%f10
344
345	fmuld	%f32,%f12,%f12
346
347	fmuld	%f34,%f14,%f14
348
349	fmuld	%f36,%f16,%f16
350
351	faddd	%f10,%f20,%f10
352
353	faddd	%f12,%f22,%f12
354
355	faddd	%f14,%f24,%f14
356
357	addcc	%i0,-1,%i0
358	bg,pt	%icc,.start
359! delay slot
360	faddd	%f16,%f26,%f16
361
362	ba,pt	%icc,.end
363! delay slot
364	nop
365
366
367	.align	16
368.medium:
369	fmuld	%f0,%f56,%f10
370
371	fmuld	%f2,%f56,%f12
372
373	fmuld	%f4,%f56,%f14
374
375	fmuld	%f6,%f56,%f16
376
377	faddd	%f10,%f58,%f10
378	st	%f11,[%fp+n0]
379
380	faddd	%f12,%f58,%f12
381	st	%f13,[%fp+n1]
382
383	faddd	%f14,%f58,%f14
384	st	%f15,[%fp+n2]
385
386	faddd	%f16,%f58,%f16
387	st	%f17,[%fp+n3]
388
389	fsubd	%f10,%f58,%f10
390
391	fsubd	%f12,%f58,%f12
392
393	fsubd	%f14,%f58,%f14
394
395	fsubd	%f16,%f58,%f16
396
397	fmuld	%f10,%f60,%f20
398	ld	[%fp+n0],%l0
399
400	fmuld	%f12,%f60,%f22
401	ld	[%fp+n1],%l1
402
403	fmuld	%f14,%f60,%f24
404	ld	[%fp+n2],%l2
405
406	fmuld	%f16,%f60,%f26
407	ld	[%fp+n3],%l3
408
409	fsubd	%f0,%f20,%f0
410	fmuld	%f10,%f62,%f30
411	add	%l0,1,%l0
412
413	fsubd	%f2,%f22,%f2
414	fmuld	%f12,%f62,%f32
415	add	%l1,1,%l1
416
417	fsubd	%f4,%f24,%f4
418	fmuld	%f14,%f62,%f34
419	add	%l2,1,%l2
420
421	fsubd	%f6,%f26,%f6
422	fmuld	%f16,%f62,%f36
423	add	%l3,1,%l3
424
425	fsubd	%f0,%f30,%f0
426
427	fsubd	%f2,%f32,%f2
428
429	fsubd	%f4,%f34,%f4
430
431	fsubd	%f6,%f36,%f6
432	andcc	%l0,1,%g0
433
434	fmuld	%f0,%f0,%f30
435	bz,pn	%icc,.case8
436! delay slot
437	andcc	%l1,1,%g0
438
439	fmuld	%f2,%f2,%f32
440	bz,pn	%icc,.case4
441! delay slot
442	andcc	%l2,1,%g0
443
444	fmuld	%f4,%f4,%f34
445	bz,pn	%icc,.case2
446! delay slot
447	andcc	%l3,1,%g0
448
449	fmuld	%f6,%f6,%f36
450	bz,pn	%icc,.case1
451! delay slot
452	nop
453
454!.case0:
455	fmuld	%f30,%f54,%f10		! cos(x0)
456	fzero	%f0
457
458	fmuld	%f32,%f54,%f12		! cos(x1)
459	fzero	%f2
460
461	fmuld	%f34,%f54,%f14		! cos(x2)
462	fzero	%f4
463
464	fmuld	%f36,%f54,%f16		! cos(x3)
465	fzero	%f6
466
467	fmuld	%f30,%f48,%f20
468	faddd	%f10,%f52,%f10
469
470	fmuld	%f32,%f48,%f22
471	faddd	%f12,%f52,%f12
472
473	fmuld	%f34,%f48,%f24
474	faddd	%f14,%f52,%f14
475
476	fmuld	%f36,%f48,%f26
477	faddd	%f16,%f52,%f16
478
479	fmuld	%f30,%f10,%f10
480	faddd	%f20,%f46,%f20
481
482	fmuld	%f32,%f12,%f12
483	faddd	%f22,%f46,%f22
484
485	fmuld	%f34,%f14,%f14
486	faddd	%f24,%f46,%f24
487
488	fmuld	%f36,%f16,%f16
489	faddd	%f26,%f46,%f26
490
491	fmuld	%f30,%f30,%f30
492	faddd	%f10,%f50,%f10
493	and	%l0,2,%g1
494
495	fmuld	%f32,%f32,%f32
496	faddd	%f12,%f50,%f12
497	and	%l1,2,%g5
498
499	fmuld	%f34,%f34,%f34
500	faddd	%f14,%f50,%f14
501	and	%l2,2,%o4
502
503	fmuld	%f36,%f36,%f36
504	faddd	%f16,%f50,%f16
505	and	%l3,2,%o5
506
507	fmuld	%f30,%f10,%f10
508	fmovrdnz %g1,%f28,%f0
509
510	fmuld	%f32,%f12,%f12
511	fmovrdnz %g5,%f28,%f2
512
513	fmuld	%f34,%f14,%f14
514	fmovrdnz %o4,%f28,%f4
515
516	fmuld	%f36,%f16,%f16
517	fmovrdnz %o5,%f28,%f6
518
519	faddd	%f10,%f20,%f10
520
521	faddd	%f12,%f22,%f12
522
523	faddd	%f14,%f24,%f14
524
525	faddd	%f16,%f26,%f16
526
527	fxor	%f10,%f0,%f10
528
529	fxor	%f12,%f2,%f12
530
531	fxor	%f14,%f4,%f14
532
533	addcc	%i0,-1,%i0
534	bg,pt	%icc,.start
535! delay slot
536	fxor	%f16,%f6,%f16
537
538	ba,pt	%icc,.end
539! delay slot
540	nop
541
542	.align	16
543.case1:
544	fmuld	%f30,%f54,%f10		! cos(x0)
545	fzero	%f0
546
547	fmuld	%f32,%f54,%f12		! cos(x1)
548	fzero	%f2
549
550	fmuld	%f34,%f54,%f14		! cos(x2)
551	fzero	%f4
552
553	fmuld	%f36,%f44,%f16		! sin(x3)
554
555	fmuld	%f30,%f48,%f20
556	faddd	%f10,%f52,%f10
557
558	fmuld	%f32,%f48,%f22
559	faddd	%f12,%f52,%f12
560
561	fmuld	%f34,%f48,%f24
562	faddd	%f14,%f52,%f14
563
564	fmuld	%f36,%f40,%f26
565	faddd	%f16,%f42,%f16
566
567	fmuld	%f30,%f10,%f10
568	faddd	%f20,%f46,%f20
569
570	fmuld	%f32,%f12,%f12
571	faddd	%f22,%f46,%f22
572
573	fmuld	%f34,%f14,%f14
574	faddd	%f24,%f46,%f24
575
576	fmuld	%f36,%f36,%f36
577	faddd	%f26,%f46,%f26
578
579	fmuld	%f30,%f30,%f30
580	faddd	%f10,%f50,%f10
581	and	%l0,2,%g1
582
583	fmuld	%f32,%f32,%f32
584	faddd	%f12,%f50,%f12
585	and	%l1,2,%g5
586
587	fmuld	%f34,%f34,%f34
588	faddd	%f14,%f50,%f14
589	and	%l2,2,%o4
590
591	fmuld	%f36,%f16,%f16
592	fzero	%f36
593
594	fmuld	%f30,%f10,%f10
595	fmovrdnz %g1,%f28,%f0
596
597	fmuld	%f32,%f12,%f12
598	fmovrdnz %g5,%f28,%f2
599
600	fmuld	%f34,%f14,%f14
601	fmovrdnz %o4,%f28,%f4
602
603	faddd	%f16,%f26,%f16
604	and	%l3,2,%o5
605
606	faddd	%f10,%f20,%f10
607
608	faddd	%f12,%f22,%f12
609
610	faddd	%f14,%f24,%f14
611
612	fmuld	%f6,%f16,%f16
613	fmovrdnz %o5,%f28,%f36
614
615	fxor	%f10,%f0,%f10
616
617	fxor	%f12,%f2,%f12
618
619	fxor	%f14,%f4,%f14
620
621	addcc	%i0,-1,%i0
622	bg,pt	%icc,.start
623! delay slot
624	fxor	%f16,%f36,%f16
625
626	ba,pt	%icc,.end
627! delay slot
628	nop
629
630	.align	16
631.case2:
632	fmuld	%f6,%f6,%f36
633	bz,pn	%icc,.case3
634! delay slot
635	nop
636
637	fmuld	%f30,%f54,%f10		! cos(x0)
638	fzero	%f0
639
640	fmuld	%f32,%f54,%f12		! cos(x1)
641	fzero	%f2
642
643	fmuld	%f34,%f44,%f14		! sin(x2)
644
645	fmuld	%f36,%f54,%f16		! cos(x3)
646	fzero	%f6
647
648	fmuld	%f30,%f48,%f20
649	faddd	%f10,%f52,%f10
650
651	fmuld	%f32,%f48,%f22
652	faddd	%f12,%f52,%f12
653
654	fmuld	%f34,%f40,%f24
655	faddd	%f14,%f42,%f14
656
657	fmuld	%f36,%f48,%f26
658	faddd	%f16,%f52,%f16
659
660	fmuld	%f30,%f10,%f10
661	faddd	%f20,%f46,%f20
662
663	fmuld	%f32,%f12,%f12
664	faddd	%f22,%f46,%f22
665
666	fmuld	%f34,%f34,%f34
667	faddd	%f24,%f46,%f24
668
669	fmuld	%f36,%f16,%f16
670	faddd	%f26,%f46,%f26
671
672	fmuld	%f30,%f30,%f30
673	faddd	%f10,%f50,%f10
674	and	%l0,2,%g1
675
676	fmuld	%f32,%f32,%f32
677	faddd	%f12,%f50,%f12
678	and	%l1,2,%g5
679
680	fmuld	%f34,%f14,%f14
681	fzero	%f34
682
683	fmuld	%f36,%f36,%f36
684	faddd	%f16,%f50,%f16
685	and	%l3,2,%o5
686
687	fmuld	%f30,%f10,%f10
688	fmovrdnz %g1,%f28,%f0
689
690	fmuld	%f32,%f12,%f12
691	fmovrdnz %g5,%f28,%f2
692
693	faddd	%f14,%f24,%f14
694	and	%l2,2,%o4
695
696	fmuld	%f36,%f16,%f16
697	fmovrdnz %o5,%f28,%f6
698
699	faddd	%f10,%f20,%f10
700
701	faddd	%f12,%f22,%f12
702
703	fmuld	%f4,%f14,%f14
704	fmovrdnz %o4,%f28,%f34
705
706	faddd	%f16,%f26,%f16
707
708	fxor	%f10,%f0,%f10
709
710	fxor	%f12,%f2,%f12
711
712	fxor	%f14,%f34,%f14
713
714	addcc	%i0,-1,%i0
715	bg,pt	%icc,.start
716! delay slot
717	fxor	%f16,%f6,%f16
718
719	ba,pt	%icc,.end
720! delay slot
721	nop
722
723	.align	16
724.case3:
725	fmuld	%f30,%f54,%f10		! cos(x0)
726	fzero	%f0
727
728	fmuld	%f32,%f54,%f12		! cos(x1)
729	fzero	%f2
730
731	fmuld	%f34,%f44,%f14		! sin(x2)
732
733	fmuld	%f36,%f44,%f16		! sin(x3)
734
735	fmuld	%f30,%f48,%f20
736	faddd	%f10,%f52,%f10
737
738	fmuld	%f32,%f48,%f22
739	faddd	%f12,%f52,%f12
740
741	fmuld	%f34,%f40,%f24
742	faddd	%f14,%f42,%f14
743
744	fmuld	%f36,%f40,%f26
745	faddd	%f16,%f42,%f16
746
747	fmuld	%f30,%f10,%f10
748	faddd	%f20,%f46,%f20
749
750	fmuld	%f32,%f12,%f12
751	faddd	%f22,%f46,%f22
752
753	fmuld	%f34,%f34,%f34
754	faddd	%f24,%f46,%f24
755
756	fmuld	%f36,%f36,%f36
757	faddd	%f26,%f46,%f26
758
759	fmuld	%f30,%f30,%f30
760	faddd	%f10,%f50,%f10
761	and	%l0,2,%g1
762
763	fmuld	%f32,%f32,%f32
764	faddd	%f12,%f50,%f12
765	and	%l1,2,%g5
766
767	fmuld	%f34,%f14,%f14
768	fzero	%f34
769
770	fmuld	%f36,%f16,%f16
771	fzero	%f36
772
773	fmuld	%f30,%f10,%f10
774	fmovrdnz %g1,%f28,%f0
775
776	fmuld	%f32,%f12,%f12
777	fmovrdnz %g5,%f28,%f2
778
779	faddd	%f14,%f24,%f14
780	and	%l2,2,%o4
781
782	faddd	%f16,%f26,%f16
783	and	%l3,2,%o5
784
785	faddd	%f10,%f20,%f10
786
787	faddd	%f12,%f22,%f12
788
789	fmuld	%f4,%f14,%f14
790	fmovrdnz %o4,%f28,%f34
791
792	fmuld	%f6,%f16,%f16
793	fmovrdnz %o5,%f28,%f36
794
795	fxor	%f10,%f0,%f10
796
797	fxor	%f12,%f2,%f12
798
799	fxor	%f14,%f34,%f14
800
801	addcc	%i0,-1,%i0
802	bg,pt	%icc,.start
803! delay slot
804	fxor	%f16,%f36,%f16
805
806	ba,pt	%icc,.end
807! delay slot
808	nop
809
810	.align	16
811.case4:
812	fmuld	%f4,%f4,%f34
813	bz,pn	%icc,.case6
814! delay slot
815	andcc	%l3,1,%g0
816
817	fmuld	%f6,%f6,%f36
818	bz,pn	%icc,.case5
819! delay slot
820	nop
821
822	fmuld	%f30,%f54,%f10		! cos(x0)
823	fzero	%f0
824
825	fmuld	%f32,%f44,%f12		! sin(x1)
826
827	fmuld	%f34,%f54,%f14		! cos(x2)
828	fzero	%f4
829
830	fmuld	%f36,%f54,%f16		! cos(x3)
831	fzero	%f6
832
833	fmuld	%f30,%f48,%f20
834	faddd	%f10,%f52,%f10
835
836	fmuld	%f32,%f40,%f22
837	faddd	%f12,%f42,%f12
838
839	fmuld	%f34,%f48,%f24
840	faddd	%f14,%f52,%f14
841
842	fmuld	%f36,%f48,%f26
843	faddd	%f16,%f52,%f16
844
845	fmuld	%f30,%f10,%f10
846	faddd	%f20,%f46,%f20
847
848	fmuld	%f32,%f32,%f32
849	faddd	%f22,%f46,%f22
850
851	fmuld	%f34,%f14,%f14
852	faddd	%f24,%f46,%f24
853
854	fmuld	%f36,%f16,%f16
855	faddd	%f26,%f46,%f26
856
857	fmuld	%f30,%f30,%f30
858	faddd	%f10,%f50,%f10
859	and	%l0,2,%g1
860
861	fmuld	%f32,%f12,%f12
862	fzero	%f32
863
864	fmuld	%f34,%f34,%f34
865	faddd	%f14,%f50,%f14
866	and	%l2,2,%o4
867
868	fmuld	%f36,%f36,%f36
869	faddd	%f16,%f50,%f16
870	and	%l3,2,%o5
871
872	fmuld	%f30,%f10,%f10
873	fmovrdnz %g1,%f28,%f0
874
875	faddd	%f12,%f22,%f12
876	and	%l1,2,%g5
877
878	fmuld	%f34,%f14,%f14
879	fmovrdnz %o4,%f28,%f4
880
881	fmuld	%f36,%f16,%f16
882	fmovrdnz %o5,%f28,%f6
883
884	faddd	%f10,%f20,%f10
885
886	fmuld	%f2,%f12,%f12
887	fmovrdnz %g5,%f28,%f32
888
889	faddd	%f14,%f24,%f14
890
891	faddd	%f16,%f26,%f16
892
893	fxor	%f10,%f0,%f10
894
895	fxor	%f12,%f32,%f12
896
897	fxor	%f14,%f4,%f14
898
899	addcc	%i0,-1,%i0
900	bg,pt	%icc,.start
901! delay slot
902	fxor	%f16,%f6,%f16
903
904	ba,pt	%icc,.end
905! delay slot
906	nop
907
908	.align	16
909.case5:
910	fmuld	%f30,%f54,%f10		! cos(x0)
911	fzero	%f0
912
913	fmuld	%f32,%f44,%f12		! sin(x1)
914
915	fmuld	%f34,%f54,%f14		! cos(x2)
916	fzero	%f4
917
918	fmuld	%f36,%f44,%f16		! sin(x3)
919
920	fmuld	%f30,%f48,%f20
921	faddd	%f10,%f52,%f10
922
923	fmuld	%f32,%f40,%f22
924	faddd	%f12,%f42,%f12
925
926	fmuld	%f34,%f48,%f24
927	faddd	%f14,%f52,%f14
928
929	fmuld	%f36,%f40,%f26
930	faddd	%f16,%f42,%f16
931
932	fmuld	%f30,%f10,%f10
933	faddd	%f20,%f46,%f20
934
935	fmuld	%f32,%f32,%f32
936	faddd	%f22,%f46,%f22
937
938	fmuld	%f34,%f14,%f14
939	faddd	%f24,%f46,%f24
940
941	fmuld	%f36,%f36,%f36
942	faddd	%f26,%f46,%f26
943
944	fmuld	%f30,%f30,%f30
945	faddd	%f10,%f50,%f10
946	and	%l0,2,%g1
947
948	fmuld	%f32,%f12,%f12
949	fzero	%f32
950
951	fmuld	%f34,%f34,%f34
952	faddd	%f14,%f50,%f14
953	and	%l2,2,%o4
954
955	fmuld	%f36,%f16,%f16
956	fzero	%f36
957
958	fmuld	%f30,%f10,%f10
959	fmovrdnz %g1,%f28,%f0
960
961	faddd	%f12,%f22,%f12
962	and	%l1,2,%g5
963
964	fmuld	%f34,%f14,%f14
965	fmovrdnz %o4,%f28,%f4
966
967	faddd	%f16,%f26,%f16
968	and	%l3,2,%o5
969
970	faddd	%f10,%f20,%f10
971
972	fmuld	%f2,%f12,%f12
973	fmovrdnz %g5,%f28,%f32
974
975	faddd	%f14,%f24,%f14
976
977	fmuld	%f6,%f16,%f16
978	fmovrdnz %o5,%f28,%f36
979
980	fxor	%f10,%f0,%f10
981
982	fxor	%f12,%f32,%f12
983
984	fxor	%f14,%f4,%f14
985
986	addcc	%i0,-1,%i0
987	bg,pt	%icc,.start
988! delay slot
989	fxor	%f16,%f36,%f16
990
991	ba,pt	%icc,.end
992! delay slot
993	nop
994
995	.align	16
996.case6:
997	fmuld	%f6,%f6,%f36
998	bz,pn	%icc,.case7
999! delay slot
1000	nop
1001
1002	fmuld	%f30,%f54,%f10		! cos(x0)
1003	fzero	%f0
1004
1005	fmuld	%f32,%f44,%f12		! sin(x1)
1006
1007	fmuld	%f34,%f44,%f14		! sin(x2)
1008
1009	fmuld	%f36,%f54,%f16		! cos(x3)
1010	fzero	%f6
1011
1012	fmuld	%f30,%f48,%f20
1013	faddd	%f10,%f52,%f10
1014
1015	fmuld	%f32,%f40,%f22
1016	faddd	%f12,%f42,%f12
1017
1018	fmuld	%f34,%f40,%f24
1019	faddd	%f14,%f42,%f14
1020
1021	fmuld	%f36,%f48,%f26
1022	faddd	%f16,%f52,%f16
1023
1024	fmuld	%f30,%f10,%f10
1025	faddd	%f20,%f46,%f20
1026
1027	fmuld	%f32,%f32,%f32
1028	faddd	%f22,%f46,%f22
1029
1030	fmuld	%f34,%f34,%f34
1031	faddd	%f24,%f46,%f24
1032
1033	fmuld	%f36,%f16,%f16
1034	faddd	%f26,%f46,%f26
1035
1036	fmuld	%f30,%f30,%f30
1037	faddd	%f10,%f50,%f10
1038	and	%l0,2,%g1
1039
1040	fmuld	%f32,%f12,%f12
1041	fzero	%f32
1042
1043	fmuld	%f34,%f14,%f14
1044	fzero	%f34
1045
1046	fmuld	%f36,%f36,%f36
1047	faddd	%f16,%f50,%f16
1048	and	%l3,2,%o5
1049
1050	fmuld	%f30,%f10,%f10
1051	fmovrdnz %g1,%f28,%f0
1052
1053	faddd	%f12,%f22,%f12
1054	and	%l1,2,%g5
1055
1056	faddd	%f14,%f24,%f14
1057	and	%l2,2,%o4
1058
1059	fmuld	%f36,%f16,%f16
1060	fmovrdnz %o5,%f28,%f6
1061
1062	faddd	%f10,%f20,%f10
1063
1064	fmuld	%f2,%f12,%f12
1065	fmovrdnz %g5,%f28,%f32
1066
1067	fmuld	%f4,%f14,%f14
1068	fmovrdnz %o4,%f28,%f34
1069
1070	faddd	%f16,%f26,%f16
1071
1072	fxor	%f10,%f0,%f10
1073
1074	fxor	%f12,%f32,%f12
1075
1076	fxor	%f14,%f34,%f14
1077
1078	addcc	%i0,-1,%i0
1079	bg,pt	%icc,.start
1080! delay slot
1081	fxor	%f16,%f6,%f16
1082
1083	ba,pt	%icc,.end
1084! delay slot
1085	nop
1086
1087	.align	16
1088.case7:
1089	fmuld	%f30,%f54,%f10		! cos(x0)
1090	fzero	%f0
1091
1092	fmuld	%f32,%f44,%f12		! sin(x1)
1093
1094	fmuld	%f34,%f44,%f14		! sin(x2)
1095
1096	fmuld	%f36,%f44,%f16		! sin(x3)
1097
1098	fmuld	%f30,%f48,%f20
1099	faddd	%f10,%f52,%f10
1100
1101	fmuld	%f32,%f40,%f22
1102	faddd	%f12,%f42,%f12
1103
1104	fmuld	%f34,%f40,%f24
1105	faddd	%f14,%f42,%f14
1106
1107	fmuld	%f36,%f40,%f26
1108	faddd	%f16,%f42,%f16
1109
1110	fmuld	%f30,%f10,%f10
1111	faddd	%f20,%f46,%f20
1112
1113	fmuld	%f32,%f32,%f32
1114	faddd	%f22,%f46,%f22
1115
1116	fmuld	%f34,%f34,%f34
1117	faddd	%f24,%f46,%f24
1118
1119	fmuld	%f36,%f36,%f36
1120	faddd	%f26,%f46,%f26
1121
1122	fmuld	%f30,%f30,%f30
1123	faddd	%f10,%f50,%f10
1124	and	%l0,2,%g1
1125
1126	fmuld	%f32,%f12,%f12
1127	fzero	%f32
1128
1129	fmuld	%f34,%f14,%f14
1130	fzero	%f34
1131
1132	fmuld	%f36,%f16,%f16
1133	fzero	%f36
1134
1135	fmuld	%f30,%f10,%f10
1136	fmovrdnz %g1,%f28,%f0
1137
1138	faddd	%f12,%f22,%f12
1139	and	%l1,2,%g5
1140
1141	faddd	%f14,%f24,%f14
1142	and	%l2,2,%o4
1143
1144	faddd	%f16,%f26,%f16
1145	and	%l3,2,%o5
1146
1147	faddd	%f10,%f20,%f10
1148
1149	fmuld	%f2,%f12,%f12
1150	fmovrdnz %g5,%f28,%f32
1151
1152	fmuld	%f4,%f14,%f14
1153	fmovrdnz %o4,%f28,%f34
1154
1155	fmuld	%f6,%f16,%f16
1156	fmovrdnz %o5,%f28,%f36
1157
1158	fxor	%f10,%f0,%f10
1159
1160	fxor	%f12,%f32,%f12
1161
1162	fxor	%f14,%f34,%f14
1163
1164	addcc	%i0,-1,%i0
1165	bg,pt	%icc,.start
1166! delay slot
1167	fxor	%f16,%f36,%f16
1168
1169	ba,pt	%icc,.end
1170! delay slot
1171	nop
1172
1173
1174	.align	16
1175.case8:
1176	fmuld	%f2,%f2,%f32
1177	bz,pn	%icc,.case12
1178! delay slot
1179	andcc	%l2,1,%g0
1180
1181	fmuld	%f4,%f4,%f34
1182	bz,pn	%icc,.case10
1183! delay slot
1184	andcc	%l3,1,%g0
1185
1186	fmuld	%f6,%f6,%f36
1187	bz,pn	%icc,.case9
1188! delay slot
1189	nop
1190
1191	fmuld	%f30,%f44,%f10		! sin(x0)
1192
1193	fmuld	%f32,%f54,%f12		! cos(x1)
1194	fzero	%f2
1195
1196	fmuld	%f34,%f54,%f14		! cos(x2)
1197	fzero	%f4
1198
1199	fmuld	%f36,%f54,%f16		! cos(x3)
1200	fzero	%f6
1201
1202	fmuld	%f30,%f40,%f20
1203	faddd	%f10,%f42,%f10
1204
1205	fmuld	%f32,%f48,%f22
1206	faddd	%f12,%f52,%f12
1207
1208	fmuld	%f34,%f48,%f24
1209	faddd	%f14,%f52,%f14
1210
1211	fmuld	%f36,%f48,%f26
1212	faddd	%f16,%f52,%f16
1213
1214	fmuld	%f30,%f30,%f30
1215	faddd	%f20,%f46,%f20
1216
1217	fmuld	%f32,%f12,%f12
1218	faddd	%f22,%f46,%f22
1219
1220	fmuld	%f34,%f14,%f14
1221	faddd	%f24,%f46,%f24
1222
1223	fmuld	%f36,%f16,%f16
1224	faddd	%f26,%f46,%f26
1225
1226	fmuld	%f30,%f10,%f10
1227	fzero	%f30
1228
1229	fmuld	%f32,%f32,%f32
1230	faddd	%f12,%f50,%f12
1231	and	%l1,2,%g5
1232
1233	fmuld	%f34,%f34,%f34
1234	faddd	%f14,%f50,%f14
1235	and	%l2,2,%o4
1236
1237	fmuld	%f36,%f36,%f36
1238	faddd	%f16,%f50,%f16
1239	and	%l3,2,%o5
1240
1241	faddd	%f10,%f20,%f10
1242	and	%l0,2,%g1
1243
1244	fmuld	%f32,%f12,%f12
1245	fmovrdnz %g5,%f28,%f2
1246
1247	fmuld	%f34,%f14,%f14
1248	fmovrdnz %o4,%f28,%f4
1249
1250	fmuld	%f36,%f16,%f16
1251	fmovrdnz %o5,%f28,%f6
1252
1253	fmuld	%f0,%f10,%f10
1254	fmovrdnz %g1,%f28,%f30
1255
1256	faddd	%f12,%f22,%f12
1257
1258	faddd	%f14,%f24,%f14
1259
1260	faddd	%f16,%f26,%f16
1261
1262	fxor	%f10,%f30,%f10
1263
1264	fxor	%f12,%f2,%f12
1265
1266	fxor	%f14,%f4,%f14
1267
1268	addcc	%i0,-1,%i0
1269	bg,pt	%icc,.start
1270! delay slot
1271	fxor	%f16,%f6,%f16
1272
1273	ba,pt	%icc,.end
1274! delay slot
1275	nop
1276
1277	.align	16
1278.case9:
1279	fmuld	%f30,%f44,%f10		! sin(x0)
1280
1281	fmuld	%f32,%f54,%f12		! cos(x1)
1282	fzero	%f2
1283
1284	fmuld	%f34,%f54,%f14		! cos(x2)
1285	fzero	%f4
1286
1287	fmuld	%f36,%f44,%f16		! sin(x3)
1288
1289	fmuld	%f30,%f40,%f20
1290	faddd	%f10,%f42,%f10
1291
1292	fmuld	%f32,%f48,%f22
1293	faddd	%f12,%f52,%f12
1294
1295	fmuld	%f34,%f48,%f24
1296	faddd	%f14,%f52,%f14
1297
1298	fmuld	%f36,%f40,%f26
1299	faddd	%f16,%f42,%f16
1300
1301	fmuld	%f30,%f30,%f30
1302	faddd	%f20,%f46,%f20
1303
1304	fmuld	%f32,%f12,%f12
1305	faddd	%f22,%f46,%f22
1306
1307	fmuld	%f34,%f14,%f14
1308	faddd	%f24,%f46,%f24
1309
1310	fmuld	%f36,%f36,%f36
1311	faddd	%f26,%f46,%f26
1312
1313	fmuld	%f30,%f10,%f10
1314	fzero	%f30
1315
1316	fmuld	%f32,%f32,%f32
1317	faddd	%f12,%f50,%f12
1318	and	%l1,2,%g5
1319
1320	fmuld	%f34,%f34,%f34
1321	faddd	%f14,%f50,%f14
1322	and	%l2,2,%o4
1323
1324	fmuld	%f36,%f16,%f16
1325	fzero	%f36
1326
1327	faddd	%f10,%f20,%f10
1328	and	%l0,2,%g1
1329
1330	fmuld	%f32,%f12,%f12
1331	fmovrdnz %g5,%f28,%f2
1332
1333	fmuld	%f34,%f14,%f14
1334	fmovrdnz %o4,%f28,%f4
1335
1336	faddd	%f16,%f26,%f16
1337	and	%l3,2,%o5
1338
1339	fmuld	%f0,%f10,%f10
1340	fmovrdnz %g1,%f28,%f30
1341
1342	faddd	%f12,%f22,%f12
1343
1344	faddd	%f14,%f24,%f14
1345
1346	fmuld	%f6,%f16,%f16
1347	fmovrdnz %o5,%f28,%f36
1348
1349	fxor	%f10,%f30,%f10
1350
1351	fxor	%f12,%f2,%f12
1352
1353	fxor	%f14,%f4,%f14
1354
1355	addcc	%i0,-1,%i0
1356	bg,pt	%icc,.start
1357! delay slot
1358	fxor	%f16,%f36,%f16
1359
1360	ba,pt	%icc,.end
1361! delay slot
1362	nop
1363
1364	.align	16
1365.case10:
1366	fmuld	%f6,%f6,%f36
1367	bz,pn	%icc,.case11
1368! delay slot
1369	nop
1370
1371	fmuld	%f30,%f44,%f10		! sin(x0)
1372
1373	fmuld	%f32,%f54,%f12		! cos(x1)
1374	fzero	%f2
1375
1376	fmuld	%f34,%f44,%f14		! sin(x2)
1377
1378	fmuld	%f36,%f54,%f16		! cos(x3)
1379	fzero	%f6
1380
1381	fmuld	%f30,%f40,%f20
1382	faddd	%f10,%f42,%f10
1383
1384	fmuld	%f32,%f48,%f22
1385	faddd	%f12,%f52,%f12
1386
1387	fmuld	%f34,%f40,%f24
1388	faddd	%f14,%f42,%f14
1389
1390	fmuld	%f36,%f48,%f26
1391	faddd	%f16,%f52,%f16
1392
1393	fmuld	%f30,%f30,%f30
1394	faddd	%f20,%f46,%f20
1395
1396	fmuld	%f32,%f12,%f12
1397	faddd	%f22,%f46,%f22
1398
1399	fmuld	%f34,%f34,%f34
1400	faddd	%f24,%f46,%f24
1401
1402	fmuld	%f36,%f16,%f16
1403	faddd	%f26,%f46,%f26
1404
1405	fmuld	%f30,%f10,%f10
1406	fzero	%f30
1407
1408	fmuld	%f32,%f32,%f32
1409	faddd	%f12,%f50,%f12
1410	and	%l1,2,%g5
1411
1412	fmuld	%f34,%f14,%f14
1413	fzero	%f34
1414
1415	fmuld	%f36,%f36,%f36
1416	faddd	%f16,%f50,%f16
1417	and	%l3,2,%o5
1418
1419	faddd	%f10,%f20,%f10
1420	and	%l0,2,%g1
1421
1422	fmuld	%f32,%f12,%f12
1423	fmovrdnz %g5,%f28,%f2
1424
1425	faddd	%f14,%f24,%f14
1426	and	%l2,2,%o4
1427
1428	fmuld	%f36,%f16,%f16
1429	fmovrdnz %o5,%f28,%f6
1430
1431	fmuld	%f0,%f10,%f10
1432	fmovrdnz %g1,%f28,%f30
1433
1434	faddd	%f12,%f22,%f12
1435
1436	fmuld	%f4,%f14,%f14
1437	fmovrdnz %o4,%f28,%f34
1438
1439	faddd	%f16,%f26,%f16
1440
1441	fxor	%f10,%f30,%f10
1442
1443	fxor	%f12,%f2,%f12
1444
1445	fxor	%f14,%f34,%f14
1446
1447	addcc	%i0,-1,%i0
1448	bg,pt	%icc,.start
1449! delay slot
1450	fxor	%f16,%f6,%f16
1451
1452	ba,pt	%icc,.end
1453! delay slot
1454	nop
1455
1456	.align	16
1457.case11:
1458	fmuld	%f30,%f44,%f10		! sin(x0)
1459
1460	fmuld	%f32,%f54,%f12		! cos(x1)
1461	fzero	%f2
1462
1463	fmuld	%f34,%f44,%f14		! sin(x2)
1464
1465	fmuld	%f36,%f44,%f16		! sin(x3)
1466
1467	fmuld	%f30,%f40,%f20
1468	faddd	%f10,%f42,%f10
1469
1470	fmuld	%f32,%f48,%f22
1471	faddd	%f12,%f52,%f12
1472
1473	fmuld	%f34,%f40,%f24
1474	faddd	%f14,%f42,%f14
1475
1476	fmuld	%f36,%f40,%f26
1477	faddd	%f16,%f42,%f16
1478
1479	fmuld	%f30,%f30,%f30
1480	faddd	%f20,%f46,%f20
1481
1482	fmuld	%f32,%f12,%f12
1483	faddd	%f22,%f46,%f22
1484
1485	fmuld	%f34,%f34,%f34
1486	faddd	%f24,%f46,%f24
1487
1488	fmuld	%f36,%f36,%f36
1489	faddd	%f26,%f46,%f26
1490
1491	fmuld	%f30,%f10,%f10
1492	fzero	%f30
1493
1494	fmuld	%f32,%f32,%f32
1495	faddd	%f12,%f50,%f12
1496	and	%l1,2,%g5
1497
1498	fmuld	%f34,%f14,%f14
1499	fzero	%f34
1500
1501	fmuld	%f36,%f16,%f16
1502	fzero	%f36
1503
1504	faddd	%f10,%f20,%f10
1505	and	%l0,2,%g1
1506
1507	fmuld	%f32,%f12,%f12
1508	fmovrdnz %g5,%f28,%f2
1509
1510	faddd	%f14,%f24,%f14
1511	and	%l2,2,%o4
1512
1513	faddd	%f16,%f26,%f16
1514	and	%l3,2,%o5
1515
1516	fmuld	%f0,%f10,%f10
1517	fmovrdnz %g1,%f28,%f30
1518
1519	faddd	%f12,%f22,%f12
1520
1521	fmuld	%f4,%f14,%f14
1522	fmovrdnz %o4,%f28,%f34
1523
1524	fmuld	%f6,%f16,%f16
1525	fmovrdnz %o5,%f28,%f36
1526
1527	fxor	%f10,%f30,%f10
1528
1529	fxor	%f12,%f2,%f12
1530
1531	fxor	%f14,%f34,%f14
1532
1533	addcc	%i0,-1,%i0
1534	bg,pt	%icc,.start
1535! delay slot
1536	fxor	%f16,%f36,%f16
1537
1538	ba,pt	%icc,.end
1539! delay slot
1540	nop
1541
1542	.align	16
1543.case12:
1544	fmuld	%f4,%f4,%f34
1545	bz,pn	%icc,.case14
1546! delay slot
1547	andcc	%l3,1,%g0
1548
1549	fmuld	%f6,%f6,%f36
1550	bz,pn	%icc,.case13
1551! delay slot
1552	nop
1553
1554	fmuld	%f30,%f44,%f10		! sin(x0)
1555
1556	fmuld	%f32,%f44,%f12		! sin(x1)
1557
1558	fmuld	%f34,%f54,%f14		! cos(x2)
1559	fzero	%f4
1560
1561	fmuld	%f36,%f54,%f16		! cos(x3)
1562	fzero	%f6
1563
1564	fmuld	%f30,%f40,%f20
1565	faddd	%f10,%f42,%f10
1566
1567	fmuld	%f32,%f40,%f22
1568	faddd	%f12,%f42,%f12
1569
1570	fmuld	%f34,%f48,%f24
1571	faddd	%f14,%f52,%f14
1572
1573	fmuld	%f36,%f48,%f26
1574	faddd	%f16,%f52,%f16
1575
1576	fmuld	%f30,%f30,%f30
1577	faddd	%f20,%f46,%f20
1578
1579	fmuld	%f32,%f32,%f32
1580	faddd	%f22,%f46,%f22
1581
1582	fmuld	%f34,%f14,%f14
1583	faddd	%f24,%f46,%f24
1584
1585	fmuld	%f36,%f16,%f16
1586	faddd	%f26,%f46,%f26
1587
1588	fmuld	%f30,%f10,%f10
1589	fzero	%f30
1590
1591	fmuld	%f32,%f12,%f12
1592	fzero	%f32
1593
1594	fmuld	%f34,%f34,%f34
1595	faddd	%f14,%f50,%f14
1596	and	%l2,2,%o4
1597
1598	fmuld	%f36,%f36,%f36
1599	faddd	%f16,%f50,%f16
1600	and	%l3,2,%o5
1601
1602	faddd	%f10,%f20,%f10
1603	and	%l0,2,%g1
1604
1605	faddd	%f12,%f22,%f12
1606	and	%l1,2,%g5
1607
1608	fmuld	%f34,%f14,%f14
1609	fmovrdnz %o4,%f28,%f4
1610
1611	fmuld	%f36,%f16,%f16
1612	fmovrdnz %o5,%f28,%f6
1613
1614	fmuld	%f0,%f10,%f10
1615	fmovrdnz %g1,%f28,%f30
1616
1617	fmuld	%f2,%f12,%f12
1618	fmovrdnz %g5,%f28,%f32
1619
1620	faddd	%f14,%f24,%f14
1621
1622	faddd	%f16,%f26,%f16
1623
1624	fxor	%f10,%f30,%f10
1625
1626	fxor	%f12,%f32,%f12
1627
1628	fxor	%f14,%f4,%f14
1629
1630	addcc	%i0,-1,%i0
1631	bg,pt	%icc,.start
1632! delay slot
1633	fxor	%f16,%f6,%f16
1634
1635	ba,pt	%icc,.end
1636! delay slot
1637	nop
1638
1639	.align	16
1640.case13:
1641	fmuld	%f30,%f44,%f10		! sin(x0)
1642
1643	fmuld	%f32,%f44,%f12		! sin(x1)
1644
1645	fmuld	%f34,%f54,%f14		! cos(x2)
1646	fzero	%f4
1647
1648	fmuld	%f36,%f44,%f16		! sin(x3)
1649
1650	fmuld	%f30,%f40,%f20
1651	faddd	%f10,%f42,%f10
1652
1653	fmuld	%f32,%f40,%f22
1654	faddd	%f12,%f42,%f12
1655
1656	fmuld	%f34,%f48,%f24
1657	faddd	%f14,%f52,%f14
1658
1659	fmuld	%f36,%f40,%f26
1660	faddd	%f16,%f42,%f16
1661
1662	fmuld	%f30,%f30,%f30
1663	faddd	%f20,%f46,%f20
1664
1665	fmuld	%f32,%f32,%f32
1666	faddd	%f22,%f46,%f22
1667
1668	fmuld	%f34,%f14,%f14
1669	faddd	%f24,%f46,%f24
1670
1671	fmuld	%f36,%f36,%f36
1672	faddd	%f26,%f46,%f26
1673
1674	fmuld	%f30,%f10,%f10
1675	fzero	%f30
1676
1677	fmuld	%f32,%f12,%f12
1678	fzero	%f32
1679
1680	fmuld	%f34,%f34,%f34
1681	faddd	%f14,%f50,%f14
1682	and	%l2,2,%o4
1683
1684	fmuld	%f36,%f16,%f16
1685	fzero	%f36
1686
1687	faddd	%f10,%f20,%f10
1688	and	%l0,2,%g1
1689
1690	faddd	%f12,%f22,%f12
1691	and	%l1,2,%g5
1692
1693	fmuld	%f34,%f14,%f14
1694	fmovrdnz %o4,%f28,%f4
1695
1696	faddd	%f16,%f26,%f16
1697	and	%l3,2,%o5
1698
1699	fmuld	%f0,%f10,%f10
1700	fmovrdnz %g1,%f28,%f30
1701
1702	fmuld	%f2,%f12,%f12
1703	fmovrdnz %g5,%f28,%f32
1704
1705	faddd	%f14,%f24,%f14
1706
1707	fmuld	%f6,%f16,%f16
1708	fmovrdnz %o5,%f28,%f36
1709
1710	fxor	%f10,%f30,%f10
1711
1712	fxor	%f12,%f32,%f12
1713
1714	fxor	%f14,%f4,%f14
1715
1716	addcc	%i0,-1,%i0
1717	bg,pt	%icc,.start
1718! delay slot
1719	fxor	%f16,%f36,%f16
1720
1721	ba,pt	%icc,.end
1722! delay slot
1723	nop
1724
1725	.align	16
1726.case14:
1727	fmuld	%f6,%f6,%f36
1728	bz,pn	%icc,.case15
1729! delay slot
1730	nop
1731
1732	fmuld	%f30,%f44,%f10		! sin(x0)
1733
1734	fmuld	%f32,%f44,%f12		! sin(x1)
1735
1736	fmuld	%f34,%f44,%f14		! sin(x2)
1737
1738	fmuld	%f36,%f54,%f16		! cos(x3)
1739	fzero	%f6
1740
1741	fmuld	%f30,%f40,%f20
1742	faddd	%f10,%f42,%f10
1743
1744	fmuld	%f32,%f40,%f22
1745	faddd	%f12,%f42,%f12
1746
1747	fmuld	%f34,%f40,%f24
1748	faddd	%f14,%f42,%f14
1749
1750	fmuld	%f36,%f48,%f26
1751	faddd	%f16,%f52,%f16
1752
1753	fmuld	%f30,%f30,%f30
1754	faddd	%f20,%f46,%f20
1755
1756	fmuld	%f32,%f32,%f32
1757	faddd	%f22,%f46,%f22
1758
1759	fmuld	%f34,%f34,%f34
1760	faddd	%f24,%f46,%f24
1761
1762	fmuld	%f36,%f16,%f16
1763	faddd	%f26,%f46,%f26
1764
1765	fmuld	%f30,%f10,%f10
1766	fzero	%f30
1767
1768	fmuld	%f32,%f12,%f12
1769	fzero	%f32
1770
1771	fmuld	%f34,%f14,%f14
1772	fzero	%f34
1773
1774	fmuld	%f36,%f36,%f36
1775	faddd	%f16,%f50,%f16
1776	and	%l3,2,%o5
1777
1778	faddd	%f10,%f20,%f10
1779	and	%l0,2,%g1
1780
1781	faddd	%f12,%f22,%f12
1782	and	%l1,2,%g5
1783
1784	faddd	%f14,%f24,%f14
1785	and	%l2,2,%o4
1786
1787	fmuld	%f36,%f16,%f16
1788	fmovrdnz %o5,%f28,%f6
1789
1790	fmuld	%f0,%f10,%f10
1791	fmovrdnz %g1,%f28,%f30
1792
1793	fmuld	%f2,%f12,%f12
1794	fmovrdnz %g5,%f28,%f32
1795
1796	fmuld	%f4,%f14,%f14
1797	fmovrdnz %o4,%f28,%f34
1798
1799	faddd	%f16,%f26,%f16
1800
1801	fxor	%f10,%f30,%f10
1802
1803	fxor	%f12,%f32,%f12
1804
1805	fxor	%f14,%f34,%f14
1806
1807	addcc	%i0,-1,%i0
1808	bg,pt	%icc,.start
1809! delay slot
1810	fxor	%f16,%f6,%f16
1811
1812	ba,pt	%icc,.end
1813! delay slot
1814	nop
1815
1816	.align	16
1817.case15:
1818	fmuld	%f30,%f44,%f10		! sin(x0)
1819
1820	fmuld	%f32,%f44,%f12		! sin(x1)
1821
1822	fmuld	%f34,%f44,%f14		! sin(x2)
1823
1824	fmuld	%f36,%f44,%f16		! sin(x3)
1825
1826	fmuld	%f30,%f40,%f20
1827	faddd	%f10,%f42,%f10
1828
1829	fmuld	%f32,%f40,%f22
1830	faddd	%f12,%f42,%f12
1831
1832	fmuld	%f34,%f40,%f24
1833	faddd	%f14,%f42,%f14
1834
1835	fmuld	%f36,%f40,%f26
1836	faddd	%f16,%f42,%f16
1837
1838	fmuld	%f30,%f30,%f30
1839	faddd	%f20,%f46,%f20
1840
1841	fmuld	%f32,%f32,%f32
1842	faddd	%f22,%f46,%f22
1843
1844	fmuld	%f34,%f34,%f34
1845	faddd	%f24,%f46,%f24
1846
1847	fmuld	%f36,%f36,%f36
1848	faddd	%f26,%f46,%f26
1849
1850	fmuld	%f30,%f10,%f10
1851	fzero	%f30
1852
1853	fmuld	%f32,%f12,%f12
1854	fzero	%f32
1855
1856	fmuld	%f34,%f14,%f14
1857	fzero	%f34
1858
1859	fmuld	%f36,%f16,%f16
1860	fzero	%f36
1861
1862	faddd	%f10,%f20,%f10
1863	and	%l0,2,%g1
1864
1865	faddd	%f12,%f22,%f12
1866	and	%l1,2,%g5
1867
1868	faddd	%f14,%f24,%f14
1869	and	%l2,2,%o4
1870
1871	faddd	%f16,%f26,%f16
1872	and	%l3,2,%o5
1873
1874	fmuld	%f0,%f10,%f10
1875	fmovrdnz %g1,%f28,%f30
1876
1877	fmuld	%f2,%f12,%f12
1878	fmovrdnz %g5,%f28,%f32
1879
1880	fmuld	%f4,%f14,%f14
1881	fmovrdnz %o4,%f28,%f34
1882
1883	fmuld	%f6,%f16,%f16
1884	fmovrdnz %o5,%f28,%f36
1885
1886	fxor	%f10,%f30,%f10
1887
1888	fxor	%f12,%f32,%f12
1889
1890	fxor	%f14,%f34,%f14
1891
1892	addcc	%i0,-1,%i0
1893	bg,pt	%icc,.start
1894! delay slot
1895	fxor	%f16,%f36,%f16
1896
1897	ba,pt	%icc,.end
1898! delay slot
1899	nop
1900
1901
1902	.align	32
1903.end:
1904	fdtos	%f10,%f10
1905	st	%f10,[%o0]
1906	fdtos	%f12,%f12
1907	st	%f12,[%o1]
1908	fdtos	%f14,%f14
1909	st	%f14,[%o2]
1910	fdtos	%f16,%f16
1911	tst	%i5			! check for huge arguments remaining
1912	be,pt	%icc,.exit
1913! delay slot
1914	st	%f16,[%o3]
1915#ifdef __sparcv9
1916	ldx	[%fp+xsave],%o1
1917	ldx	[%fp+ysave],%o3
1918#else
1919	ld	[%fp+xsave],%o1
1920	ld	[%fp+ysave],%o3
1921#endif
1922	ld	[%fp+nsave],%o0
1923	ld	[%fp+sxsave],%o2
1924	ld	[%fp+sysave],%o4
1925	sra	%o2,0,%o2		! sign-extend for V9
1926	call	__vlibm_vcos_bigf
1927	sra	%o4,0,%o4		! delay slot
1928
1929.exit:
1930	ret
1931	restore
1932
1933
1934	.align	32
1935.last1:
1936	fdtos	%f12,%f12
1937	st	%f12,[%o1]
1938	fzeros	%f2
1939	add	%fp,junk,%o1
1940.last2:
1941	fdtos	%f14,%f14
1942	st	%f14,[%o2]
1943	fzeros	%f4
1944	add	%fp,junk,%o2
1945.last3:
1946	fdtos	%f16,%f16
1947	st	%f16,[%o3]
1948	fzeros	%f6
1949	ba,pt	%icc,.cont
1950! delay slot
1951	add	%fp,junk,%o3
1952
1953
1954	.align	16
1955.range0:
1956	fcmpgt32 %f38,%f30,%l0
1957	andcc	%l0,2,%g0
1958	bnz,a,pt %icc,1f		! branch if finite
1959! delay slot, squashed if branch not taken
1960	mov	1,%i5			! set biguns
1961	fzeros	%f1
1962	fmuls	%f0,%f1,%f0
1963	st	%f0,[%o0]
19641:
1965	addcc	%i0,-1,%i0
1966	ble,pn	%icc,1f
1967! delay slot
1968	nop
1969	ld	[%i1],%f0
1970	add	%i1,%i2,%i1
1971	mov	%i3,%o0
1972	add	%i3,%i4,%i3
1973	fabsd	%f0,%f30
1974	fcmple32 %f30,%f18,%l0
1975	andcc	%l0,2,%g0
1976	bz,pn	%icc,.range0
1977! delay slot
1978	nop
1979	ba,pt	%icc,.check1
1980! delay slot
1981	fcmple32 %f30,%f8,%l0
19821:
1983	fzero	%f0			! set up dummy argument
1984	add	%fp,junk,%o0
1985	mov	2,%l0
1986	ba,pt	%icc,.check1
1987! delay slot
1988	fzero	%f30
1989
1990
1991	.align	16
1992.range1:
1993	fcmpgt32 %f38,%f32,%l1
1994	andcc	%l1,2,%g0
1995	bnz,a,pt %icc,1f		! branch if finite
1996! delay slot, squashed if branch not taken
1997	mov	1,%i5			! set biguns
1998	fzeros	%f3
1999	fmuls	%f2,%f3,%f2
2000	st	%f2,[%o1]
20011:
2002	addcc	%i0,-1,%i0
2003	ble,pn	%icc,1f
2004! delay slot
2005	nop
2006	ld	[%i1],%f2
2007	add	%i1,%i2,%i1
2008	mov	%i3,%o1
2009	add	%i3,%i4,%i3
2010	fabsd	%f2,%f32
2011	fcmple32 %f32,%f18,%l1
2012	andcc	%l1,2,%g0
2013	bz,pn	%icc,.range1
2014! delay slot
2015	nop
2016	ba,pt	%icc,.check2
2017! delay slot
2018	fcmple32 %f32,%f8,%l1
20191:
2020	fzero	%f2			! set up dummy argument
2021	add	%fp,junk,%o1
2022	mov	2,%l1
2023	ba,pt	%icc,.check2
2024! delay slot
2025	fzero	%f32
2026
2027
2028	.align	16
2029.range2:
2030	fcmpgt32 %f38,%f34,%l2
2031	andcc	%l2,2,%g0
2032	bnz,a,pt %icc,1f		! branch if finite
2033! delay slot, squashed if branch not taken
2034	mov	1,%i5			! set biguns
2035	fzeros	%f5
2036	fmuls	%f4,%f5,%f4
2037	st	%f4,[%o2]
20381:
2039	addcc	%i0,-1,%i0
2040	ble,pn	%icc,1f
2041! delay slot
2042	nop
2043	ld	[%i1],%f4
2044	add	%i1,%i2,%i1
2045	mov	%i3,%o2
2046	add	%i3,%i4,%i3
2047	fabsd	%f4,%f34
2048	fcmple32 %f34,%f18,%l2
2049	andcc	%l2,2,%g0
2050	bz,pn	%icc,.range2
2051! delay slot
2052	nop
2053	ba,pt	%icc,.check3
2054! delay slot
2055	fcmple32 %f34,%f8,%l2
20561:
2057	fzero	%f4			! set up dummy argument
2058	add	%fp,junk,%o2
2059	mov	2,%l2
2060	ba,pt	%icc,.check3
2061! delay slot
2062	fzero	%f34
2063
2064
2065	.align	16
2066.range3:
2067	fcmpgt32 %f38,%f36,%l3
2068	andcc	%l3,2,%g0
2069	bnz,a,pt %icc,1f		! branch if finite
2070! delay slot, squashed if branch not taken
2071	mov	1,%i5			! set biguns
2072	fzeros	%f7
2073	fmuls	%f6,%f7,%f6
2074	st	%f6,[%o3]
20751:
2076	addcc	%i0,-1,%i0
2077	ble,pn	%icc,1f
2078! delay slot
2079	nop
2080	ld	[%i1],%f6
2081	add	%i1,%i2,%i1
2082	mov	%i3,%o3
2083	add	%i3,%i4,%i3
2084	fabsd	%f6,%f36
2085	fcmple32 %f36,%f18,%l3
2086	andcc	%l3,2,%g0
2087	bz,pn	%icc,.range3
2088! delay slot
2089	nop
2090	ba,pt	%icc,.checkprimary
2091! delay slot
2092	fcmple32 %f36,%f8,%l3
20931:
2094	fzero	%f6			! set up dummy argument
2095	add	%fp,junk,%o3
2096	mov	2,%l3
2097	ba,pt	%icc,.checkprimary
2098! delay slot
2099	fzero	%f36
2100
2101	SET_SIZE(__vcosf)
2102
2103