xref: /illumos-gate/usr/src/lib/libmvec/common/vis/__vcos_ultra3.S (revision 66582b606a8194f7f3ba5b3a3a6dca5b0d346361)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vcos_ultra3.S"
30
31#include "libm.h"
32	.weak	__vcos
33	.type	__vcos,#function
34	__vcos = __vcos_ultra3
35
36	RO_DATA
37	.align	64
38constants:
39	.word	0x42c80000,0x00000000	! 3 * 2^44
40	.word	0x43380000,0x00000000	! 3 * 2^51
41	.word	0x3fe45f30,0x6dc9c883	! invpio2
42	.word	0x3ff921fb,0x54442c00	! pio2_1
43	.word	0x3d318469,0x898cc400	! pio2_2
44	.word	0x3a71701b,0x839a2520	! pio2_3
45	.word	0xbfc55555,0x55555533	! pp1
46	.word	0x3f811111,0x10e7d53b	! pp2
47	.word	0xbf2a0167,0xe6b3cf9b	! pp3
48	.word	0xbfdfffff,0xffffff65	! qq1
49	.word	0x3fa55555,0x54f88ed0	! qq2
50	.word	0xbf56c12c,0xdd185f60	! qq3
51
52! local storage indices
53
54#define xsave		STACK_BIAS-0x8
55#define ysave		STACK_BIAS-0x10
56#define nsave		STACK_BIAS-0x14
57#define sxsave		STACK_BIAS-0x18
58#define sysave		STACK_BIAS-0x1c
59#define biguns		STACK_BIAS-0x20
60#define nk3		STACK_BIAS-0x24
61#define nk2		STACK_BIAS-0x28
62#define nk1		STACK_BIAS-0x2c
63#define nk0		STACK_BIAS-0x30
64#define junk		STACK_BIAS-0x38
65! sizeof temp storage - must be a multiple of 16 for V9
66#define tmps		0x40
67
68! register use
69
70! i0  n
71! i1  x
72! i2  stridex
73! i3  y
74! i4  stridey
75! i5  0x80000000
76
77! l0  hx0
78! l1  hx1
79! l2  hx2
80! l3  hx3
81! l4  k0
82! l5  k1
83! l6  k2
84! l7  k3
85
86! the following are 64-bit registers in both V8+ and V9
87
88! g1  __vlibm_TBL_sincos2
89! g5  scratch
90
91! o0  py0
92! o1  py1
93! o2  py2
94! o3  py3
95! o4  0x3e400000
96! o5  0x3fe921fb,0x4099251e
97! o7  scratch
98
99! f0  hx0
100! f2
101! f4
102! f6
103! f8  hx1
104! f10
105! f12
106! f14
107! f16 hx2
108! f18
109! f20
110! f22
111! f24 hx3
112! f26
113! f28
114! f30
115! f32
116! f34
117! f36
118! f38
119
120#define c3two44	%f40
121#define c3two51	%f42
122#define invpio2	%f44
123#define pio2_1	%f46
124#define pio2_2	%f48
125#define pio2_3	%f50
126#define pp1	%f52
127#define pp2	%f54
128#define pp3	%f56
129#define qq1	%f58
130#define qq2	%f60
131#define qq3	%f62
132
133	ENTRY(__vcos_ultra3)
134	save	%sp,-SA(MINFRAME)-tmps,%sp
135	PIC_SETUP(l7)
136	PIC_SET(l7,constants,o0)
137	PIC_SET(l7,__vlibm_TBL_sincos2,o1)
138	mov	%o1,%g1
139	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
140#ifdef __sparcv9
141	stx	%i1,[%fp+xsave]		! save arguments
142	stx	%i3,[%fp+ysave]
143#else
144	st	%i1,[%fp+xsave]		! save arguments
145	st	%i3,[%fp+ysave]
146#endif
147	st	%i0,[%fp+nsave]
148	st	%i2,[%fp+sxsave]
149	st	%i4,[%fp+sysave]
150	st	%g0,[%fp+biguns]	! biguns = 0
151	ldd	[%o0+0x00],c3two44	! load/set up constants
152	ldd	[%o0+0x08],c3two51
153	ldd	[%o0+0x10],invpio2
154	ldd	[%o0+0x18],pio2_1
155	ldd	[%o0+0x20],pio2_2
156	ldd	[%o0+0x28],pio2_3
157	ldd	[%o0+0x30],pp1
158	ldd	[%o0+0x38],pp2
159	ldd	[%o0+0x40],pp3
160	ldd	[%o0+0x48],qq1
161	ldd	[%o0+0x50],qq2
162	ldd	[%o0+0x58],qq3
163	sethi	%hi(0x80000000),%i5
164	sethi	%hi(0x3e400000),%o4
165	sethi	%hi(0x3fe921fb),%o5
166	or	%o5,%lo(0x3fe921fb),%o5
167	sllx	%o5,32,%o5
168	sethi	%hi(0x4099251e),%o7
169	or	%o7,%lo(0x4099251e),%o7
170	or	%o5,%o7,%o5
171	sll	%i2,3,%i2		! scale strides
172	sll	%i4,3,%i4
173	add	%fp,junk,%o1		! loop prologue
174	add	%fp,junk,%o2
175	add	%fp,junk,%o3
176	ld	[%i1],%l0		! *x
177	ld	[%i1],%f0
178	ld	[%i1+4],%f3
179	andn	%l0,%i5,%l0		! mask off sign
180	add	%i1,%i2,%i1		! x += stridex
181	ba	.loop0
182	nop
183
184! 16-byte aligned
185	.align	16
186.loop0:
187	lda	[%i1]%asi,%l1		! preload next argument
188	sub	%l0,%o4,%g5
189	sub	%o5,%l0,%o7
190	fabss	%f0,%f2
191
192	lda	[%i1]%asi,%f8
193	orcc	%o7,%g5,%g0
194	mov	%i3,%o0			! py0 = y
195	bl,pn	%icc,.range0		! hx < 0x3e400000 or hx > 0x4099251e
196
197! delay slot
198	lda	[%i1+4]%asi,%f11
199	addcc	%i0,-1,%i0
200	add	%i3,%i4,%i3		! y += stridey
201	ble,pn	%icc,.last1
202
203! delay slot
204	andn	%l1,%i5,%l1
205	add	%i1,%i2,%i1		! x += stridex
206	faddd	%f2,c3two44,%f4
207	st	%f15,[%o1+4]
208
209.loop1:
210	lda	[%i1]%asi,%l2		! preload next argument
211	sub	%l1,%o4,%g5
212	sub	%o5,%l1,%o7
213	fabss	%f8,%f10
214
215	lda	[%i1]%asi,%f16
216	orcc	%o7,%g5,%g0
217	mov	%i3,%o1			! py1 = y
218	bl,pn	%icc,.range1		! hx < 0x3e400000 or hx > 0x4099251e
219
220! delay slot
221	lda	[%i1+4]%asi,%f19
222	addcc	%i0,-1,%i0
223	add	%i3,%i4,%i3		! y += stridey
224	ble,pn	%icc,.last2
225
226! delay slot
227	andn	%l2,%i5,%l2
228	add	%i1,%i2,%i1		! x += stridex
229	faddd	%f10,c3two44,%f12
230	st	%f23,[%o2+4]
231
232.loop2:
233	lda	[%i1]%asi,%l3		! preload next argument
234	sub	%l2,%o4,%g5
235	sub	%o5,%l2,%o7
236	fabss	%f16,%f18
237
238	lda	[%i1]%asi,%f24
239	orcc	%o7,%g5,%g0
240	mov	%i3,%o2			! py2 = y
241	bl,pn	%icc,.range2		! hx < 0x3e400000 or hx > 0x4099251e
242
243! delay slot
244	lda	[%i1+4]%asi,%f27
245	addcc	%i0,-1,%i0
246	add	%i3,%i4,%i3		! y += stridey
247	ble,pn	%icc,.last3
248
249! delay slot
250	andn	%l3,%i5,%l3
251	add	%i1,%i2,%i1		! x += stridex
252	faddd	%f18,c3two44,%f20
253	st	%f31,[%o3+4]
254
255.loop3:
256	sub	%l3,%o4,%g5
257	sub	%o5,%l3,%o7
258	fabss	%f24,%f26
259	st	%f5,[%fp+nk0]
260
261	orcc	%o7,%g5,%g0
262	mov	%i3,%o3			! py3 = y
263	bl,pn	%icc,.range3		! hx < 0x3e400000 or > hx 0x4099251e
264! delay slot
265	st	%f13,[%fp+nk1]
266
267!!! DONE?
268.cont:
269	srlx	%o5,32,%o7
270	add	%i3,%i4,%i3		! y += stridey
271	fmovs	%f3,%f1
272	st	%f21,[%fp+nk2]
273
274	sub	%o7,%l0,%l0
275	sub	%o7,%l1,%l1
276	faddd	%f26,c3two44,%f28
277	st	%f29,[%fp+nk3]
278
279	sub	%o7,%l2,%l2
280	sub	%o7,%l3,%l3
281	fmovs	%f11,%f9
282
283	or	%l0,%l1,%l0
284	or	%l2,%l3,%l2
285	fmovs	%f19,%f17
286
287	fmovs	%f27,%f25
288	fmuld	%f0,invpio2,%f6		! x * invpio2, for medium range
289
290	fmuld	%f8,invpio2,%f14
291	ld	[%fp+nk0],%l4
292
293	fmuld	%f16,invpio2,%f22
294	ld	[%fp+nk1],%l5
295
296	orcc	%l0,%l2,%g0
297	bl,pn	%icc,.medium
298! delay slot
299	fmuld	%f24,invpio2,%f30
300	ld	[%fp+nk2],%l6
301
302	ld	[%fp+nk3],%l7
303	sll	%l4,5,%l4		! k
304	fcmpd	%fcc0,%f0,pio2_3	! x < pio2_3 iff x < 0
305
306	sll	%l5,5,%l5
307	ldd	[%l4+%g1],%f4
308	fcmpd	%fcc1,%f8,pio2_3
309
310	sll	%l6,5,%l6
311	ldd	[%l5+%g1],%f12
312	fcmpd	%fcc2,%f16,pio2_3
313
314	sll	%l7,5,%l7
315	ldd	[%l6+%g1],%f20
316	fcmpd	%fcc3,%f24,pio2_3
317
318	ldd	[%l7+%g1],%f28
319	fsubd	%f2,%f4,%f2		! x -= __vlibm_TBL_sincos2[k]
320
321	fsubd	%f10,%f12,%f10
322
323	fsubd	%f18,%f20,%f18
324
325	fsubd	%f26,%f28,%f26
326
327	fmuld	%f2,%f2,%f0		! z = x * x
328
329	fmuld	%f10,%f10,%f8
330
331	fmuld	%f18,%f18,%f16
332
333	fmuld	%f26,%f26,%f24
334
335	fmuld	%f0,qq3,%f6
336
337	fmuld	%f8,qq3,%f14
338
339	fmuld	%f16,qq3,%f22
340
341	fmuld	%f24,qq3,%f30
342
343	faddd	%f6,qq2,%f6
344	fmuld	%f0,pp2,%f4
345
346	faddd	%f14,qq2,%f14
347	fmuld	%f8,pp2,%f12
348
349	faddd	%f22,qq2,%f22
350	fmuld	%f16,pp2,%f20
351
352	faddd	%f30,qq2,%f30
353	fmuld	%f24,pp2,%f28
354
355	fmuld	%f0,%f6,%f6
356	faddd	%f4,pp1,%f4
357
358	fmuld	%f8,%f14,%f14
359	faddd	%f12,pp1,%f12
360
361	fmuld	%f16,%f22,%f22
362	faddd	%f20,pp1,%f20
363
364	fmuld	%f24,%f30,%f30
365	faddd	%f28,pp1,%f28
366
367	faddd	%f6,qq1,%f6
368	fmuld	%f0,%f4,%f4
369	add	%l4,%g1,%l4
370
371	faddd	%f14,qq1,%f14
372	fmuld	%f8,%f12,%f12
373	add	%l5,%g1,%l5
374
375	faddd	%f22,qq1,%f22
376	fmuld	%f16,%f20,%f20
377	add	%l6,%g1,%l6
378
379	faddd	%f30,qq1,%f30
380	fmuld	%f24,%f28,%f28
381	add	%l7,%g1,%l7
382
383	fmuld	%f2,%f4,%f4
384
385	fmuld	%f10,%f12,%f12
386
387	fmuld	%f18,%f20,%f20
388
389	fmuld	%f26,%f28,%f28
390
391	fmuld	%f0,%f6,%f6
392	faddd	%f4,%f2,%f4
393	ldd	[%l4+16],%f32
394
395	fmuld	%f8,%f14,%f14
396	faddd	%f12,%f10,%f12
397	ldd	[%l5+16],%f34
398
399	fmuld	%f16,%f22,%f22
400	faddd	%f20,%f18,%f20
401	ldd	[%l6+16],%f36
402
403	fmuld	%f24,%f30,%f30
404	faddd	%f28,%f26,%f28
405	ldd	[%l7+16],%f38
406
407	fmuld	%f32,%f6,%f6
408	ldd	[%l4+8],%f2
409
410	fmuld	%f34,%f14,%f14
411	ldd	[%l5+8],%f10
412
413	fmuld	%f36,%f22,%f22
414	ldd	[%l6+8],%f18
415
416	fmuld	%f38,%f30,%f30
417	ldd	[%l7+8],%f26
418
419	fmuld	%f2,%f4,%f4
420
421	fmuld	%f10,%f12,%f12
422
423	fmuld	%f18,%f20,%f20
424
425	fmuld	%f26,%f28,%f28
426
427	fsubd	%f6,%f4,%f6
428	lda	[%i1]%asi,%l0		! preload next argument
429
430	fsubd	%f14,%f12,%f14
431	lda	[%i1]%asi,%f0
432
433	fsubd	%f22,%f20,%f22
434	lda	[%i1+4]%asi,%f3
435
436	fsubd	%f30,%f28,%f30
437	andn	%l0,%i5,%l0
438	add	%i1,%i2,%i1
439
440	faddd	%f6,%f32,%f6
441	st	%f6,[%o0]
442
443	faddd	%f14,%f34,%f14
444	st	%f14,[%o1]
445
446	faddd	%f22,%f36,%f22
447	st	%f22,[%o2]
448
449	faddd	%f30,%f38,%f30
450	st	%f30,[%o3]
451	addcc	%i0,-1,%i0
452
453	bg,pt	%icc,.loop0
454! delay slot
455	st	%f7,[%o0+4]
456
457	ba,pt	%icc,.end
458! delay slot
459	nop
460
461
462	.align	16
463.medium:
464	faddd	%f6,c3two51,%f4
465	st	%f5,[%fp+nk0]
466
467	faddd	%f14,c3two51,%f12
468	st	%f13,[%fp+nk1]
469
470	faddd	%f22,c3two51,%f20
471	st	%f21,[%fp+nk2]
472
473	faddd	%f30,c3two51,%f28
474	st	%f29,[%fp+nk3]
475
476	fsubd	%f4,c3two51,%f6
477
478	fsubd	%f12,c3two51,%f14
479
480	fsubd	%f20,c3two51,%f22
481
482	fsubd	%f28,c3two51,%f30
483
484	fmuld	%f6,pio2_1,%f2
485	ld	[%fp+nk0],%l0		! n
486
487	fmuld	%f14,pio2_1,%f10
488	ld	[%fp+nk1],%l1
489
490	fmuld	%f22,pio2_1,%f18
491	ld	[%fp+nk2],%l2
492
493	fmuld	%f30,pio2_1,%f26
494	ld	[%fp+nk3],%l3
495
496	fsubd	%f0,%f2,%f0
497	fmuld	%f6,pio2_2,%f4
498	add	%l0,1,%l0
499
500	fsubd	%f8,%f10,%f8
501	fmuld	%f14,pio2_2,%f12
502	add	%l1,1,%l1
503
504	fsubd	%f16,%f18,%f16
505	fmuld	%f22,pio2_2,%f20
506	add	%l2,1,%l2
507
508	fsubd	%f24,%f26,%f24
509	fmuld	%f30,pio2_2,%f28
510	add	%l3,1,%l3
511
512	fsubd	%f0,%f4,%f32
513
514	fsubd	%f8,%f12,%f34
515
516	fsubd	%f16,%f20,%f36
517
518	fsubd	%f24,%f28,%f38
519
520	fsubd	%f0,%f32,%f0
521	fcmple32 %f32,pio2_3,%l4	! x <= pio2_3 iff x < 0
522
523	fsubd	%f8,%f34,%f8
524	fcmple32 %f34,pio2_3,%l5
525
526	fsubd	%f16,%f36,%f16
527	fcmple32 %f36,pio2_3,%l6
528
529	fsubd	%f24,%f38,%f24
530	fcmple32 %f38,pio2_3,%l7
531
532	fsubd	%f0,%f4,%f0
533	fmuld	%f6,pio2_3,%f6
534	sll	%l4,30,%l4		! if (x < 0) n = -n ^ 2
535
536	fsubd	%f8,%f12,%f8
537	fmuld	%f14,pio2_3,%f14
538	sll	%l5,30,%l5
539
540	fsubd	%f16,%f20,%f16
541	fmuld	%f22,pio2_3,%f22
542	sll	%l6,30,%l6
543
544	fsubd	%f24,%f28,%f24
545	fmuld	%f30,pio2_3,%f30
546	sll	%l7,30,%l7
547
548	fsubd	%f6,%f0,%f6
549	sra	%l4,31,%l4
550
551	fsubd	%f14,%f8,%f14
552	sra	%l5,31,%l5
553
554	fsubd	%f22,%f16,%f22
555	sra	%l6,31,%l6
556
557	fsubd	%f30,%f24,%f30
558	sra	%l7,31,%l7
559
560	fsubd	%f32,%f6,%f0		! reduced x
561	xor	%l0,%l4,%l0
562
563	fsubd	%f34,%f14,%f8
564	xor	%l1,%l5,%l1
565
566	fsubd	%f36,%f22,%f16
567	xor	%l2,%l6,%l2
568
569	fsubd	%f38,%f30,%f24
570	xor	%l3,%l7,%l3
571
572	fabsd	%f0,%f2
573	sub	%l0,%l4,%l0
574
575	fabsd	%f8,%f10
576	sub	%l1,%l5,%l1
577
578	fabsd	%f16,%f18
579	sub	%l2,%l6,%l2
580
581	fabsd	%f24,%f26
582	sub	%l3,%l7,%l3
583
584	faddd	%f2,c3two44,%f4
585	st	%f5,[%fp+nk0]
586	and	%l4,2,%l4
587
588	faddd	%f10,c3two44,%f12
589	st	%f13,[%fp+nk1]
590	and	%l5,2,%l5
591
592	faddd	%f18,c3two44,%f20
593	st	%f21,[%fp+nk2]
594	and	%l6,2,%l6
595
596	faddd	%f26,c3two44,%f28
597	st	%f29,[%fp+nk3]
598	and	%l7,2,%l7
599
600	fsubd	%f32,%f0,%f4
601	xor	%l0,%l4,%l0
602
603	fsubd	%f34,%f8,%f12
604	xor	%l1,%l5,%l1
605
606	fsubd	%f36,%f16,%f20
607	xor	%l2,%l6,%l2
608
609	fsubd	%f38,%f24,%f28
610	xor	%l3,%l7,%l3
611
612	fzero	%f38
613	ld	[%fp+nk0],%l4
614
615	fsubd	%f4,%f6,%f6		! w
616	ld	[%fp+nk1],%l5
617
618	fsubd	%f12,%f14,%f14
619	ld	[%fp+nk2],%l6
620
621	fnegd	%f38,%f38
622	ld	[%fp+nk3],%l7
623	sll	%l4,5,%l4		! k
624
625	fsubd	%f20,%f22,%f22
626	sll	%l5,5,%l5
627
628	fsubd	%f28,%f30,%f30
629	sll	%l6,5,%l6
630
631	fand	%f0,%f38,%f32		! sign bit of x
632	ldd	[%l4+%g1],%f4
633	sll	%l7,5,%l7
634
635	fand	%f8,%f38,%f34
636	ldd	[%l5+%g1],%f12
637
638	fand	%f16,%f38,%f36
639	ldd	[%l6+%g1],%f20
640
641	fand	%f24,%f38,%f38
642	ldd	[%l7+%g1],%f28
643
644	fsubd	%f2,%f4,%f2		! x -= __vlibm_TBL_sincos2[k]
645
646	fsubd	%f10,%f12,%f10
647
648	fsubd	%f18,%f20,%f18
649	nop
650
651	fsubd	%f26,%f28,%f26
652	nop
653
654! 16-byte aligned
655	fmuld	%f2,%f2,%f0		! z = x * x
656	andcc	%l0,1,%g0
657	bz,pn	%icc,.case8
658! delay slot
659	fxor	%f6,%f32,%f32
660
661	fmuld	%f10,%f10,%f8
662	andcc	%l1,1,%g0
663	bz,pn	%icc,.case4
664! delay slot
665	fxor	%f14,%f34,%f34
666
667	fmuld	%f18,%f18,%f16
668	andcc	%l2,1,%g0
669	bz,pn	%icc,.case2
670! delay slot
671	fxor	%f22,%f36,%f36
672
673	fmuld	%f26,%f26,%f24
674	andcc	%l3,1,%g0
675	bz,pn	%icc,.case1
676! delay slot
677	fxor	%f30,%f38,%f38
678
679!.case0:
680	fmuld	%f0,qq3,%f6		! cos(x0)
681
682	fmuld	%f8,qq3,%f14		! cos(x1)
683
684	fmuld	%f16,qq3,%f22		! cos(x2)
685
686	fmuld	%f24,qq3,%f30		! cos(x3)
687
688	faddd	%f6,qq2,%f6
689	fmuld	%f0,pp2,%f4
690
691	faddd	%f14,qq2,%f14
692	fmuld	%f8,pp2,%f12
693
694	faddd	%f22,qq2,%f22
695	fmuld	%f16,pp2,%f20
696
697	faddd	%f30,qq2,%f30
698	fmuld	%f24,pp2,%f28
699
700	fmuld	%f0,%f6,%f6
701	faddd	%f4,pp1,%f4
702
703	fmuld	%f8,%f14,%f14
704	faddd	%f12,pp1,%f12
705
706	fmuld	%f16,%f22,%f22
707	faddd	%f20,pp1,%f20
708
709	fmuld	%f24,%f30,%f30
710	faddd	%f28,pp1,%f28
711
712	faddd	%f6,qq1,%f6
713	fmuld	%f0,%f4,%f4
714	add	%l4,%g1,%l4
715
716	faddd	%f14,qq1,%f14
717	fmuld	%f8,%f12,%f12
718	add	%l5,%g1,%l5
719
720	faddd	%f22,qq1,%f22
721	fmuld	%f16,%f20,%f20
722	add	%l6,%g1,%l6
723
724	faddd	%f30,qq1,%f30
725	fmuld	%f24,%f28,%f28
726	add	%l7,%g1,%l7
727
728	fmuld	%f2,%f4,%f4
729
730	fmuld	%f10,%f12,%f12
731
732	fmuld	%f18,%f20,%f20
733
734	fmuld	%f26,%f28,%f28
735
736	fmuld	%f0,%f6,%f6
737	faddd	%f4,%f32,%f4
738	ldd	[%l4+16],%f0
739
740	fmuld	%f8,%f14,%f14
741	faddd	%f12,%f34,%f12
742	ldd	[%l5+16],%f8
743
744	fmuld	%f16,%f22,%f22
745	faddd	%f20,%f36,%f20
746	ldd	[%l6+16],%f16
747
748	fmuld	%f24,%f30,%f30
749	faddd	%f28,%f38,%f28
750	ldd	[%l7+16],%f24
751
752	fmuld	%f0,%f6,%f6
753	faddd	%f4,%f2,%f4
754	ldd	[%l4+8],%f32
755
756	fmuld	%f8,%f14,%f14
757	faddd	%f12,%f10,%f12
758	ldd	[%l5+8],%f34
759
760	fmuld	%f16,%f22,%f22
761	faddd	%f20,%f18,%f20
762	ldd	[%l6+8],%f36
763
764	fmuld	%f24,%f30,%f30
765	faddd	%f28,%f26,%f28
766	ldd	[%l7+8],%f38
767
768	fmuld	%f32,%f4,%f4
769
770	fmuld	%f34,%f12,%f12
771
772	fmuld	%f36,%f20,%f20
773
774	fmuld	%f38,%f28,%f28
775
776	fsubd	%f6,%f4,%f6
777
778	fsubd	%f14,%f12,%f14
779
780	fsubd	%f22,%f20,%f22
781
782	fsubd	%f30,%f28,%f30
783
784	faddd	%f6,%f0,%f6
785
786	faddd	%f14,%f8,%f14
787
788	faddd	%f22,%f16,%f22
789
790	faddd	%f30,%f24,%f30
791	mov	%l0,%l4
792
793	fnegd	%f6,%f4
794	lda	[%i1]%asi,%l0		! preload next argument
795
796	fnegd	%f14,%f12
797	lda	[%i1]%asi,%f0
798
799	fnegd	%f22,%f20
800	lda	[%i1+4]%asi,%f3
801
802	fnegd	%f30,%f28
803	andn	%l0,%i5,%l0
804	add	%i1,%i2,%i1
805
806	andcc	%l4,2,%g0
807	fmovdnz	%icc,%f4,%f6
808	st	%f6,[%o0]
809
810	andcc	%l1,2,%g0
811	fmovdnz	%icc,%f12,%f14
812	st	%f14,[%o1]
813
814	andcc	%l2,2,%g0
815	fmovdnz	%icc,%f20,%f22
816	st	%f22,[%o2]
817
818	andcc	%l3,2,%g0
819	fmovdnz	%icc,%f28,%f30
820	st	%f30,[%o3]
821
822	addcc	%i0,-1,%i0
823	bg,pt	%icc,.loop0
824! delay slot
825	st	%f7,[%o0+4]
826
827	ba,pt	%icc,.end
828! delay slot
829	nop
830
831	.align	16
832.case1:
833	fmuld	%f24,pp3,%f30		! sin(x3)
834
835	fmuld	%f0,qq3,%f6		! cos(x0)
836
837	fmuld	%f8,qq3,%f14		! cos(x1)
838
839	fmuld	%f16,qq3,%f22		! cos(x2)
840
841	faddd	%f30,pp2,%f30
842	fmuld	%f24,qq2,%f28
843
844	faddd	%f6,qq2,%f6
845	fmuld	%f0,pp2,%f4
846
847	faddd	%f14,qq2,%f14
848	fmuld	%f8,pp2,%f12
849
850	faddd	%f22,qq2,%f22
851	fmuld	%f16,pp2,%f20
852
853	fmuld	%f24,%f30,%f30
854	faddd	%f28,qq1,%f28
855
856	fmuld	%f0,%f6,%f6
857	faddd	%f4,pp1,%f4
858
859	fmuld	%f8,%f14,%f14
860	faddd	%f12,pp1,%f12
861
862	fmuld	%f16,%f22,%f22
863	faddd	%f20,pp1,%f20
864
865	faddd	%f30,pp1,%f30
866	fmuld	%f24,%f28,%f28
867	add	%l7,%g1,%l7
868
869	faddd	%f6,qq1,%f6
870	fmuld	%f0,%f4,%f4
871	add	%l4,%g1,%l4
872
873	faddd	%f14,qq1,%f14
874	fmuld	%f8,%f12,%f12
875	add	%l5,%g1,%l5
876
877	faddd	%f22,qq1,%f22
878	fmuld	%f16,%f20,%f20
879	add	%l6,%g1,%l6
880
881	fmuld	%f24,%f30,%f30
882
883	fmuld	%f2,%f4,%f4
884
885	fmuld	%f10,%f12,%f12
886
887	fmuld	%f18,%f20,%f20
888
889	fmuld	%f26,%f30,%f30
890	ldd	[%l7+8],%f24
891
892	fmuld	%f0,%f6,%f6
893	faddd	%f4,%f32,%f4
894	ldd	[%l4+16],%f0
895
896	fmuld	%f8,%f14,%f14
897	faddd	%f12,%f34,%f12
898	ldd	[%l5+16],%f8
899
900	fmuld	%f16,%f22,%f22
901	faddd	%f20,%f36,%f20
902	ldd	[%l6+16],%f16
903
904	fmuld	%f24,%f28,%f28
905	faddd	%f38,%f30,%f30
906
907	fmuld	%f0,%f6,%f6
908	faddd	%f4,%f2,%f4
909	ldd	[%l4+8],%f32
910
911	fmuld	%f8,%f14,%f14
912	faddd	%f12,%f10,%f12
913	ldd	[%l5+8],%f34
914
915	fmuld	%f16,%f22,%f22
916	faddd	%f20,%f18,%f20
917	ldd	[%l6+8],%f36
918
919	faddd	%f26,%f30,%f30
920	ldd	[%l7+16],%f38
921
922	fmuld	%f32,%f4,%f4
923
924	fmuld	%f34,%f12,%f12
925
926	fmuld	%f36,%f20,%f20
927
928	fmuld	%f38,%f30,%f30
929
930	fsubd	%f6,%f4,%f6
931
932	fsubd	%f14,%f12,%f14
933
934	fsubd	%f22,%f20,%f22
935
936	faddd	%f30,%f28,%f30
937
938	faddd	%f6,%f0,%f6
939
940	faddd	%f14,%f8,%f14
941
942	faddd	%f22,%f16,%f22
943
944	faddd	%f30,%f24,%f30
945	mov	%l0,%l4
946
947	fnegd	%f6,%f4
948	lda	[%i1]%asi,%l0		! preload next argument
949
950	fnegd	%f14,%f12
951	lda	[%i1]%asi,%f0
952
953	fnegd	%f22,%f20
954	lda	[%i1+4]%asi,%f3
955
956	fnegd	%f30,%f28
957	andn	%l0,%i5,%l0
958	add	%i1,%i2,%i1
959
960	andcc	%l4,2,%g0
961	fmovdnz	%icc,%f4,%f6
962	st	%f6,[%o0]
963
964	andcc	%l1,2,%g0
965	fmovdnz	%icc,%f12,%f14
966	st	%f14,[%o1]
967
968	andcc	%l2,2,%g0
969	fmovdnz	%icc,%f20,%f22
970	st	%f22,[%o2]
971
972	andcc	%l3,2,%g0
973	fmovdnz	%icc,%f28,%f30
974	st	%f30,[%o3]
975
976	addcc	%i0,-1,%i0
977	bg,pt	%icc,.loop0
978! delay slot
979	st	%f7,[%o0+4]
980
981	ba,pt	%icc,.end
982! delay slot
983	nop
984
985	.align	16
986.case2:
987	fmuld	%f26,%f26,%f24
988	andcc	%l3,1,%g0
989	bz,pn	%icc,.case3
990! delay slot
991	fxor	%f30,%f38,%f38
992
993	fmuld	%f16,pp3,%f22		! sin(x2)
994
995	fmuld	%f0,qq3,%f6		! cos(x0)
996
997	fmuld	%f8,qq3,%f14		! cos(x1)
998
999	faddd	%f22,pp2,%f22
1000	fmuld	%f16,qq2,%f20
1001
1002	fmuld	%f24,qq3,%f30		! cos(x3)
1003
1004	faddd	%f6,qq2,%f6
1005	fmuld	%f0,pp2,%f4
1006
1007	faddd	%f14,qq2,%f14
1008	fmuld	%f8,pp2,%f12
1009
1010	fmuld	%f16,%f22,%f22
1011	faddd	%f20,qq1,%f20
1012
1013	faddd	%f30,qq2,%f30
1014	fmuld	%f24,pp2,%f28
1015
1016	fmuld	%f0,%f6,%f6
1017	faddd	%f4,pp1,%f4
1018
1019	fmuld	%f8,%f14,%f14
1020	faddd	%f12,pp1,%f12
1021
1022	faddd	%f22,pp1,%f22
1023	fmuld	%f16,%f20,%f20
1024	add	%l6,%g1,%l6
1025
1026	fmuld	%f24,%f30,%f30
1027	faddd	%f28,pp1,%f28
1028
1029	faddd	%f6,qq1,%f6
1030	fmuld	%f0,%f4,%f4
1031	add	%l4,%g1,%l4
1032
1033	faddd	%f14,qq1,%f14
1034	fmuld	%f8,%f12,%f12
1035	add	%l5,%g1,%l5
1036
1037	fmuld	%f16,%f22,%f22
1038
1039	faddd	%f30,qq1,%f30
1040	fmuld	%f24,%f28,%f28
1041	add	%l7,%g1,%l7
1042
1043	fmuld	%f2,%f4,%f4
1044
1045	fmuld	%f10,%f12,%f12
1046
1047	fmuld	%f18,%f22,%f22
1048	ldd	[%l6+8],%f16
1049
1050	fmuld	%f26,%f28,%f28
1051
1052	fmuld	%f0,%f6,%f6
1053	faddd	%f4,%f32,%f4
1054	ldd	[%l4+16],%f0
1055
1056	fmuld	%f8,%f14,%f14
1057	faddd	%f12,%f34,%f12
1058	ldd	[%l5+16],%f8
1059
1060	fmuld	%f16,%f20,%f20
1061	faddd	%f36,%f22,%f22
1062
1063	fmuld	%f24,%f30,%f30
1064	faddd	%f28,%f38,%f28
1065	ldd	[%l7+16],%f24
1066
1067	fmuld	%f0,%f6,%f6
1068	faddd	%f4,%f2,%f4
1069	ldd	[%l4+8],%f32
1070
1071	fmuld	%f8,%f14,%f14
1072	faddd	%f12,%f10,%f12
1073	ldd	[%l5+8],%f34
1074
1075	faddd	%f18,%f22,%f22
1076	ldd	[%l6+16],%f36
1077
1078	fmuld	%f24,%f30,%f30
1079	faddd	%f28,%f26,%f28
1080	ldd	[%l7+8],%f38
1081
1082	fmuld	%f32,%f4,%f4
1083
1084	fmuld	%f34,%f12,%f12
1085
1086	fmuld	%f36,%f22,%f22
1087
1088	fmuld	%f38,%f28,%f28
1089
1090	fsubd	%f6,%f4,%f6
1091
1092	fsubd	%f14,%f12,%f14
1093
1094	faddd	%f22,%f20,%f22
1095
1096	fsubd	%f30,%f28,%f30
1097
1098	faddd	%f6,%f0,%f6
1099
1100	faddd	%f14,%f8,%f14
1101
1102	faddd	%f22,%f16,%f22
1103
1104	faddd	%f30,%f24,%f30
1105	mov	%l0,%l4
1106
1107	fnegd	%f6,%f4
1108	lda	[%i1]%asi,%l0		! preload next argument
1109
1110	fnegd	%f14,%f12
1111	lda	[%i1]%asi,%f0
1112
1113	fnegd	%f22,%f20
1114	lda	[%i1+4]%asi,%f3
1115
1116	fnegd	%f30,%f28
1117	andn	%l0,%i5,%l0
1118	add	%i1,%i2,%i1
1119
1120	andcc	%l4,2,%g0
1121	fmovdnz	%icc,%f4,%f6
1122	st	%f6,[%o0]
1123
1124	andcc	%l1,2,%g0
1125	fmovdnz	%icc,%f12,%f14
1126	st	%f14,[%o1]
1127
1128	andcc	%l2,2,%g0
1129	fmovdnz	%icc,%f20,%f22
1130	st	%f22,[%o2]
1131
1132	andcc	%l3,2,%g0
1133	fmovdnz	%icc,%f28,%f30
1134	st	%f30,[%o3]
1135
1136	addcc	%i0,-1,%i0
1137	bg,pt	%icc,.loop0
1138! delay slot
1139	st	%f7,[%o0+4]
1140
1141	ba,pt	%icc,.end
1142! delay slot
1143	nop
1144
1145	.align	16
1146.case3:
1147	fmuld	%f16,pp3,%f22		! sin(x2)
1148
1149	fmuld	%f24,pp3,%f30		! sin(x3)
1150
1151	fmuld	%f0,qq3,%f6		! cos(x0)
1152
1153	fmuld	%f8,qq3,%f14		! cos(x1)
1154
1155	faddd	%f22,pp2,%f22
1156	fmuld	%f16,qq2,%f20
1157
1158	faddd	%f30,pp2,%f30
1159	fmuld	%f24,qq2,%f28
1160
1161	faddd	%f6,qq2,%f6
1162	fmuld	%f0,pp2,%f4
1163
1164	faddd	%f14,qq2,%f14
1165	fmuld	%f8,pp2,%f12
1166
1167	fmuld	%f16,%f22,%f22
1168	faddd	%f20,qq1,%f20
1169
1170	fmuld	%f24,%f30,%f30
1171	faddd	%f28,qq1,%f28
1172
1173	fmuld	%f0,%f6,%f6
1174	faddd	%f4,pp1,%f4
1175
1176	fmuld	%f8,%f14,%f14
1177	faddd	%f12,pp1,%f12
1178
1179	faddd	%f22,pp1,%f22
1180	fmuld	%f16,%f20,%f20
1181	add	%l6,%g1,%l6
1182
1183	faddd	%f30,pp1,%f30
1184	fmuld	%f24,%f28,%f28
1185	add	%l7,%g1,%l7
1186
1187	faddd	%f6,qq1,%f6
1188	fmuld	%f0,%f4,%f4
1189	add	%l4,%g1,%l4
1190
1191	faddd	%f14,qq1,%f14
1192	fmuld	%f8,%f12,%f12
1193	add	%l5,%g1,%l5
1194
1195	fmuld	%f16,%f22,%f22
1196
1197	fmuld	%f24,%f30,%f30
1198
1199	fmuld	%f2,%f4,%f4
1200
1201	fmuld	%f10,%f12,%f12
1202
1203	fmuld	%f18,%f22,%f22
1204	ldd	[%l6+8],%f16
1205
1206	fmuld	%f26,%f30,%f30
1207	ldd	[%l7+8],%f24
1208
1209	fmuld	%f0,%f6,%f6
1210	faddd	%f4,%f32,%f4
1211	ldd	[%l4+16],%f0
1212
1213	fmuld	%f8,%f14,%f14
1214	faddd	%f12,%f34,%f12
1215	ldd	[%l5+16],%f8
1216
1217	fmuld	%f16,%f20,%f20
1218	faddd	%f36,%f22,%f22
1219
1220	fmuld	%f24,%f28,%f28
1221	faddd	%f38,%f30,%f30
1222
1223	fmuld	%f0,%f6,%f6
1224	faddd	%f4,%f2,%f4
1225	ldd	[%l4+8],%f32
1226
1227	fmuld	%f8,%f14,%f14
1228	faddd	%f12,%f10,%f12
1229	ldd	[%l5+8],%f34
1230
1231	faddd	%f18,%f22,%f22
1232	ldd	[%l6+16],%f36
1233
1234	faddd	%f26,%f30,%f30
1235	ldd	[%l7+16],%f38
1236
1237	fmuld	%f32,%f4,%f4
1238
1239	fmuld	%f34,%f12,%f12
1240
1241	fmuld	%f36,%f22,%f22
1242
1243	fmuld	%f38,%f30,%f30
1244
1245	fsubd	%f6,%f4,%f6
1246
1247	fsubd	%f14,%f12,%f14
1248
1249	faddd	%f22,%f20,%f22
1250
1251	faddd	%f30,%f28,%f30
1252
1253	faddd	%f6,%f0,%f6
1254
1255	faddd	%f14,%f8,%f14
1256
1257	faddd	%f22,%f16,%f22
1258
1259	faddd	%f30,%f24,%f30
1260	mov	%l0,%l4
1261
1262	fnegd	%f6,%f4
1263	lda	[%i1]%asi,%l0		! preload next argument
1264
1265	fnegd	%f14,%f12
1266	lda	[%i1]%asi,%f0
1267
1268	fnegd	%f22,%f20
1269	lda	[%i1+4]%asi,%f3
1270
1271	fnegd	%f30,%f28
1272	andn	%l0,%i5,%l0
1273	add	%i1,%i2,%i1
1274
1275	andcc	%l4,2,%g0
1276	fmovdnz	%icc,%f4,%f6
1277	st	%f6,[%o0]
1278
1279	andcc	%l1,2,%g0
1280	fmovdnz	%icc,%f12,%f14
1281	st	%f14,[%o1]
1282
1283	andcc	%l2,2,%g0
1284	fmovdnz	%icc,%f20,%f22
1285	st	%f22,[%o2]
1286
1287	andcc	%l3,2,%g0
1288	fmovdnz	%icc,%f28,%f30
1289	st	%f30,[%o3]
1290
1291	addcc	%i0,-1,%i0
1292	bg,pt	%icc,.loop0
1293! delay slot
1294	st	%f7,[%o0+4]
1295
1296	ba,pt	%icc,.end
1297! delay slot
1298	nop
1299
1300	.align	16
1301.case4:
1302	fmuld	%f18,%f18,%f16
1303	andcc	%l2,1,%g0
1304	bz,pn	%icc,.case6
1305! delay slot
1306	fxor	%f22,%f36,%f36
1307
1308	fmuld	%f26,%f26,%f24
1309	andcc	%l3,1,%g0
1310	bz,pn	%icc,.case5
1311! delay slot
1312	fxor	%f30,%f38,%f38
1313
1314	fmuld	%f8,pp3,%f14		! sin(x1)
1315
1316	fmuld	%f0,qq3,%f6		! cos(x0)
1317
1318	faddd	%f14,pp2,%f14
1319	fmuld	%f8,qq2,%f12
1320
1321	fmuld	%f16,qq3,%f22		! cos(x2)
1322
1323	fmuld	%f24,qq3,%f30		! cos(x3)
1324
1325	faddd	%f6,qq2,%f6
1326	fmuld	%f0,pp2,%f4
1327
1328	fmuld	%f8,%f14,%f14
1329	faddd	%f12,qq1,%f12
1330
1331	faddd	%f22,qq2,%f22
1332	fmuld	%f16,pp2,%f20
1333
1334	faddd	%f30,qq2,%f30
1335	fmuld	%f24,pp2,%f28
1336
1337	fmuld	%f0,%f6,%f6
1338	faddd	%f4,pp1,%f4
1339
1340	faddd	%f14,pp1,%f14
1341	fmuld	%f8,%f12,%f12
1342	add	%l5,%g1,%l5
1343
1344	fmuld	%f16,%f22,%f22
1345	faddd	%f20,pp1,%f20
1346
1347	fmuld	%f24,%f30,%f30
1348	faddd	%f28,pp1,%f28
1349
1350	faddd	%f6,qq1,%f6
1351	fmuld	%f0,%f4,%f4
1352	add	%l4,%g1,%l4
1353
1354	fmuld	%f8,%f14,%f14
1355
1356	faddd	%f22,qq1,%f22
1357	fmuld	%f16,%f20,%f20
1358	add	%l6,%g1,%l6
1359
1360	faddd	%f30,qq1,%f30
1361	fmuld	%f24,%f28,%f28
1362	add	%l7,%g1,%l7
1363
1364	fmuld	%f2,%f4,%f4
1365
1366	fmuld	%f10,%f14,%f14
1367	ldd	[%l5+8],%f8
1368
1369	fmuld	%f18,%f20,%f20
1370
1371	fmuld	%f26,%f28,%f28
1372
1373	fmuld	%f0,%f6,%f6
1374	faddd	%f4,%f32,%f4
1375	ldd	[%l4+16],%f0
1376
1377	fmuld	%f8,%f12,%f12
1378	faddd	%f34,%f14,%f14
1379
1380	fmuld	%f16,%f22,%f22
1381	faddd	%f20,%f36,%f20
1382	ldd	[%l6+16],%f16
1383
1384	fmuld	%f24,%f30,%f30
1385	faddd	%f28,%f38,%f28
1386	ldd	[%l7+16],%f24
1387
1388	fmuld	%f0,%f6,%f6
1389	faddd	%f4,%f2,%f4
1390	ldd	[%l4+8],%f32
1391
1392	faddd	%f10,%f14,%f14
1393	ldd	[%l5+16],%f34
1394
1395	fmuld	%f16,%f22,%f22
1396	faddd	%f20,%f18,%f20
1397	ldd	[%l6+8],%f36
1398
1399	fmuld	%f24,%f30,%f30
1400	faddd	%f28,%f26,%f28
1401	ldd	[%l7+8],%f38
1402
1403	fmuld	%f32,%f4,%f4
1404
1405	fmuld	%f34,%f14,%f14
1406
1407	fmuld	%f36,%f20,%f20
1408
1409	fmuld	%f38,%f28,%f28
1410
1411	fsubd	%f6,%f4,%f6
1412
1413	faddd	%f14,%f12,%f14
1414
1415	fsubd	%f22,%f20,%f22
1416
1417	fsubd	%f30,%f28,%f30
1418
1419	faddd	%f6,%f0,%f6
1420
1421	faddd	%f14,%f8,%f14
1422
1423	faddd	%f22,%f16,%f22
1424
1425	faddd	%f30,%f24,%f30
1426	mov	%l0,%l4
1427
1428	fnegd	%f6,%f4
1429	lda	[%i1]%asi,%l0		! preload next argument
1430
1431	fnegd	%f14,%f12
1432	lda	[%i1]%asi,%f0
1433
1434	fnegd	%f22,%f20
1435	lda	[%i1+4]%asi,%f3
1436
1437	fnegd	%f30,%f28
1438	andn	%l0,%i5,%l0
1439	add	%i1,%i2,%i1
1440
1441	andcc	%l4,2,%g0
1442	fmovdnz	%icc,%f4,%f6
1443	st	%f6,[%o0]
1444
1445	andcc	%l1,2,%g0
1446	fmovdnz	%icc,%f12,%f14
1447	st	%f14,[%o1]
1448
1449	andcc	%l2,2,%g0
1450	fmovdnz	%icc,%f20,%f22
1451	st	%f22,[%o2]
1452
1453	andcc	%l3,2,%g0
1454	fmovdnz	%icc,%f28,%f30
1455	st	%f30,[%o3]
1456
1457	addcc	%i0,-1,%i0
1458	bg,pt	%icc,.loop0
1459! delay slot
1460	st	%f7,[%o0+4]
1461
1462	ba,pt	%icc,.end
1463! delay slot
1464	nop
1465
1466	.align	16
1467.case5:
1468	fmuld	%f8,pp3,%f14		! sin(x1)
1469
1470	fmuld	%f24,pp3,%f30		! sin(x3)
1471
1472	fmuld	%f0,qq3,%f6		! cos(x0)
1473
1474	faddd	%f14,pp2,%f14
1475	fmuld	%f8,qq2,%f12
1476
1477	fmuld	%f16,qq3,%f22		! cos(x2)
1478
1479	faddd	%f30,pp2,%f30
1480	fmuld	%f24,qq2,%f28
1481
1482	faddd	%f6,qq2,%f6
1483	fmuld	%f0,pp2,%f4
1484
1485	fmuld	%f8,%f14,%f14
1486	faddd	%f12,qq1,%f12
1487
1488	faddd	%f22,qq2,%f22
1489	fmuld	%f16,pp2,%f20
1490
1491	fmuld	%f24,%f30,%f30
1492	faddd	%f28,qq1,%f28
1493
1494	fmuld	%f0,%f6,%f6
1495	faddd	%f4,pp1,%f4
1496
1497	faddd	%f14,pp1,%f14
1498	fmuld	%f8,%f12,%f12
1499	add	%l5,%g1,%l5
1500
1501	fmuld	%f16,%f22,%f22
1502	faddd	%f20,pp1,%f20
1503
1504	faddd	%f30,pp1,%f30
1505	fmuld	%f24,%f28,%f28
1506	add	%l7,%g1,%l7
1507
1508	faddd	%f6,qq1,%f6
1509	fmuld	%f0,%f4,%f4
1510	add	%l4,%g1,%l4
1511
1512	fmuld	%f8,%f14,%f14
1513
1514	faddd	%f22,qq1,%f22
1515	fmuld	%f16,%f20,%f20
1516	add	%l6,%g1,%l6
1517
1518	fmuld	%f24,%f30,%f30
1519
1520	fmuld	%f2,%f4,%f4
1521
1522	fmuld	%f10,%f14,%f14
1523	ldd	[%l5+8],%f8
1524
1525	fmuld	%f18,%f20,%f20
1526
1527	fmuld	%f26,%f30,%f30
1528	ldd	[%l7+8],%f24
1529
1530	fmuld	%f0,%f6,%f6
1531	faddd	%f4,%f32,%f4
1532	ldd	[%l4+16],%f0
1533
1534	fmuld	%f8,%f12,%f12
1535	faddd	%f34,%f14,%f14
1536
1537	fmuld	%f16,%f22,%f22
1538	faddd	%f20,%f36,%f20
1539	ldd	[%l6+16],%f16
1540
1541	fmuld	%f24,%f28,%f28
1542	faddd	%f38,%f30,%f30
1543
1544	fmuld	%f0,%f6,%f6
1545	faddd	%f4,%f2,%f4
1546	ldd	[%l4+8],%f32
1547
1548	faddd	%f10,%f14,%f14
1549	ldd	[%l5+16],%f34
1550
1551	fmuld	%f16,%f22,%f22
1552	faddd	%f20,%f18,%f20
1553	ldd	[%l6+8],%f36
1554
1555	faddd	%f26,%f30,%f30
1556	ldd	[%l7+16],%f38
1557
1558	fmuld	%f32,%f4,%f4
1559
1560	fmuld	%f34,%f14,%f14
1561
1562	fmuld	%f36,%f20,%f20
1563
1564	fmuld	%f38,%f30,%f30
1565
1566	fsubd	%f6,%f4,%f6
1567
1568	faddd	%f14,%f12,%f14
1569
1570	fsubd	%f22,%f20,%f22
1571
1572	faddd	%f30,%f28,%f30
1573
1574	faddd	%f6,%f0,%f6
1575
1576	faddd	%f14,%f8,%f14
1577
1578	faddd	%f22,%f16,%f22
1579
1580	faddd	%f30,%f24,%f30
1581	mov	%l0,%l4
1582
1583	fnegd	%f6,%f4
1584	lda	[%i1]%asi,%l0		! preload next argument
1585
1586	fnegd	%f14,%f12
1587	lda	[%i1]%asi,%f0
1588
1589	fnegd	%f22,%f20
1590	lda	[%i1+4]%asi,%f3
1591
1592	fnegd	%f30,%f28
1593	andn	%l0,%i5,%l0
1594	add	%i1,%i2,%i1
1595
1596	andcc	%l4,2,%g0
1597	fmovdnz	%icc,%f4,%f6
1598	st	%f6,[%o0]
1599
1600	andcc	%l1,2,%g0
1601	fmovdnz	%icc,%f12,%f14
1602	st	%f14,[%o1]
1603
1604	andcc	%l2,2,%g0
1605	fmovdnz	%icc,%f20,%f22
1606	st	%f22,[%o2]
1607
1608	andcc	%l3,2,%g0
1609	fmovdnz	%icc,%f28,%f30
1610	st	%f30,[%o3]
1611
1612	addcc	%i0,-1,%i0
1613	bg,pt	%icc,.loop0
1614! delay slot
1615	st	%f7,[%o0+4]
1616
1617	ba,pt	%icc,.end
1618! delay slot
1619	nop
1620
1621	.align	16
1622.case6:
1623	fmuld	%f26,%f26,%f24
1624	andcc	%l3,1,%g0
1625	bz,pn	%icc,.case7
1626! delay slot
1627	fxor	%f30,%f38,%f38
1628
1629	fmuld	%f8,pp3,%f14		! sin(x1)
1630
1631	fmuld	%f16,pp3,%f22		! sin(x2)
1632
1633	fmuld	%f0,qq3,%f6		! cos(x0)
1634
1635	faddd	%f14,pp2,%f14
1636	fmuld	%f8,qq2,%f12
1637
1638	faddd	%f22,pp2,%f22
1639	fmuld	%f16,qq2,%f20
1640
1641	fmuld	%f24,qq3,%f30		! cos(x3)
1642
1643	faddd	%f6,qq2,%f6
1644	fmuld	%f0,pp2,%f4
1645
1646	fmuld	%f8,%f14,%f14
1647	faddd	%f12,qq1,%f12
1648
1649	fmuld	%f16,%f22,%f22
1650	faddd	%f20,qq1,%f20
1651
1652	faddd	%f30,qq2,%f30
1653	fmuld	%f24,pp2,%f28
1654
1655	fmuld	%f0,%f6,%f6
1656	faddd	%f4,pp1,%f4
1657
1658	faddd	%f14,pp1,%f14
1659	fmuld	%f8,%f12,%f12
1660	add	%l5,%g1,%l5
1661
1662	faddd	%f22,pp1,%f22
1663	fmuld	%f16,%f20,%f20
1664	add	%l6,%g1,%l6
1665
1666	fmuld	%f24,%f30,%f30
1667	faddd	%f28,pp1,%f28
1668
1669	faddd	%f6,qq1,%f6
1670	fmuld	%f0,%f4,%f4
1671	add	%l4,%g1,%l4
1672
1673	fmuld	%f8,%f14,%f14
1674
1675	fmuld	%f16,%f22,%f22
1676
1677	faddd	%f30,qq1,%f30
1678	fmuld	%f24,%f28,%f28
1679	add	%l7,%g1,%l7
1680
1681	fmuld	%f2,%f4,%f4
1682
1683	fmuld	%f10,%f14,%f14
1684	ldd	[%l5+8],%f8
1685
1686	fmuld	%f18,%f22,%f22
1687	ldd	[%l6+8],%f16
1688
1689	fmuld	%f26,%f28,%f28
1690
1691	fmuld	%f0,%f6,%f6
1692	faddd	%f4,%f32,%f4
1693	ldd	[%l4+16],%f0
1694
1695	fmuld	%f8,%f12,%f12
1696	faddd	%f34,%f14,%f14
1697
1698	fmuld	%f16,%f20,%f20
1699	faddd	%f36,%f22,%f22
1700
1701	fmuld	%f24,%f30,%f30
1702	faddd	%f28,%f38,%f28
1703	ldd	[%l7+16],%f24
1704
1705	fmuld	%f0,%f6,%f6
1706	faddd	%f4,%f2,%f4
1707	ldd	[%l4+8],%f32
1708
1709	faddd	%f10,%f14,%f14
1710	ldd	[%l5+16],%f34
1711
1712	faddd	%f18,%f22,%f22
1713	ldd	[%l6+16],%f36
1714
1715	fmuld	%f24,%f30,%f30
1716	faddd	%f28,%f26,%f28
1717	ldd	[%l7+8],%f38
1718
1719	fmuld	%f32,%f4,%f4
1720
1721	fmuld	%f34,%f14,%f14
1722
1723	fmuld	%f36,%f22,%f22
1724
1725	fmuld	%f38,%f28,%f28
1726
1727	fsubd	%f6,%f4,%f6
1728
1729	faddd	%f14,%f12,%f14
1730
1731	faddd	%f22,%f20,%f22
1732
1733	fsubd	%f30,%f28,%f30
1734
1735	faddd	%f6,%f0,%f6
1736
1737	faddd	%f14,%f8,%f14
1738
1739	faddd	%f22,%f16,%f22
1740
1741	faddd	%f30,%f24,%f30
1742	mov	%l0,%l4
1743
1744	fnegd	%f6,%f4
1745	lda	[%i1]%asi,%l0		! preload next argument
1746
1747	fnegd	%f14,%f12
1748	lda	[%i1]%asi,%f0
1749
1750	fnegd	%f22,%f20
1751	lda	[%i1+4]%asi,%f3
1752
1753	fnegd	%f30,%f28
1754	andn	%l0,%i5,%l0
1755	add	%i1,%i2,%i1
1756
1757	andcc	%l4,2,%g0
1758	fmovdnz	%icc,%f4,%f6
1759	st	%f6,[%o0]
1760
1761	andcc	%l1,2,%g0
1762	fmovdnz	%icc,%f12,%f14
1763	st	%f14,[%o1]
1764
1765	andcc	%l2,2,%g0
1766	fmovdnz	%icc,%f20,%f22
1767	st	%f22,[%o2]
1768
1769	andcc	%l3,2,%g0
1770	fmovdnz	%icc,%f28,%f30
1771	st	%f30,[%o3]
1772
1773	addcc	%i0,-1,%i0
1774	bg,pt	%icc,.loop0
1775! delay slot
1776	st	%f7,[%o0+4]
1777
1778	ba,pt	%icc,.end
1779! delay slot
1780	nop
1781
1782	.align	16
1783.case7:
1784	fmuld	%f8,pp3,%f14		! sin(x1)
1785
1786	fmuld	%f16,pp3,%f22		! sin(x2)
1787
1788	fmuld	%f24,pp3,%f30		! sin(x3)
1789
1790	fmuld	%f0,qq3,%f6		! cos(x0)
1791
1792	faddd	%f14,pp2,%f14
1793	fmuld	%f8,qq2,%f12
1794
1795	faddd	%f22,pp2,%f22
1796	fmuld	%f16,qq2,%f20
1797
1798	faddd	%f30,pp2,%f30
1799	fmuld	%f24,qq2,%f28
1800
1801	faddd	%f6,qq2,%f6
1802	fmuld	%f0,pp2,%f4
1803
1804	fmuld	%f8,%f14,%f14
1805	faddd	%f12,qq1,%f12
1806
1807	fmuld	%f16,%f22,%f22
1808	faddd	%f20,qq1,%f20
1809
1810	fmuld	%f24,%f30,%f30
1811	faddd	%f28,qq1,%f28
1812
1813	fmuld	%f0,%f6,%f6
1814	faddd	%f4,pp1,%f4
1815
1816	faddd	%f14,pp1,%f14
1817	fmuld	%f8,%f12,%f12
1818	add	%l5,%g1,%l5
1819
1820	faddd	%f22,pp1,%f22
1821	fmuld	%f16,%f20,%f20
1822	add	%l6,%g1,%l6
1823
1824	faddd	%f30,pp1,%f30
1825	fmuld	%f24,%f28,%f28
1826	add	%l7,%g1,%l7
1827
1828	faddd	%f6,qq1,%f6
1829	fmuld	%f0,%f4,%f4
1830	add	%l4,%g1,%l4
1831
1832	fmuld	%f8,%f14,%f14
1833
1834	fmuld	%f16,%f22,%f22
1835
1836	fmuld	%f24,%f30,%f30
1837
1838	fmuld	%f2,%f4,%f4
1839
1840	fmuld	%f10,%f14,%f14
1841	ldd	[%l5+8],%f8
1842
1843	fmuld	%f18,%f22,%f22
1844	ldd	[%l6+8],%f16
1845
1846	fmuld	%f26,%f30,%f30
1847	ldd	[%l7+8],%f24
1848
1849	fmuld	%f0,%f6,%f6
1850	faddd	%f4,%f32,%f4
1851	ldd	[%l4+16],%f0
1852
1853	fmuld	%f8,%f12,%f12
1854	faddd	%f34,%f14,%f14
1855
1856	fmuld	%f16,%f20,%f20
1857	faddd	%f36,%f22,%f22
1858
1859	fmuld	%f24,%f28,%f28
1860	faddd	%f38,%f30,%f30
1861
1862	fmuld	%f0,%f6,%f6
1863	faddd	%f4,%f2,%f4
1864	ldd	[%l4+8],%f32
1865
1866	faddd	%f10,%f14,%f14
1867	ldd	[%l5+16],%f34
1868
1869	faddd	%f18,%f22,%f22
1870	ldd	[%l6+16],%f36
1871
1872	faddd	%f26,%f30,%f30
1873	ldd	[%l7+16],%f38
1874
1875	fmuld	%f32,%f4,%f4
1876
1877	fmuld	%f34,%f14,%f14
1878
1879	fmuld	%f36,%f22,%f22
1880
1881	fmuld	%f38,%f30,%f30
1882
1883	fsubd	%f6,%f4,%f6
1884
1885	faddd	%f14,%f12,%f14
1886
1887	faddd	%f22,%f20,%f22
1888
1889	faddd	%f30,%f28,%f30
1890
1891	faddd	%f6,%f0,%f6
1892
1893	faddd	%f14,%f8,%f14
1894
1895	faddd	%f22,%f16,%f22
1896
1897	faddd	%f30,%f24,%f30
1898	mov	%l0,%l4
1899
1900	fnegd	%f6,%f4
1901	lda	[%i1]%asi,%l0		! preload next argument
1902
1903	fnegd	%f14,%f12
1904	lda	[%i1]%asi,%f0
1905
1906	fnegd	%f22,%f20
1907	lda	[%i1+4]%asi,%f3
1908
1909	fnegd	%f30,%f28
1910	andn	%l0,%i5,%l0
1911	add	%i1,%i2,%i1
1912
1913	andcc	%l4,2,%g0
1914	fmovdnz	%icc,%f4,%f6
1915	st	%f6,[%o0]
1916
1917	andcc	%l1,2,%g0
1918	fmovdnz	%icc,%f12,%f14
1919	st	%f14,[%o1]
1920
1921	andcc	%l2,2,%g0
1922	fmovdnz	%icc,%f20,%f22
1923	st	%f22,[%o2]
1924
1925	andcc	%l3,2,%g0
1926	fmovdnz	%icc,%f28,%f30
1927	st	%f30,[%o3]
1928
1929	addcc	%i0,-1,%i0
1930	bg,pt	%icc,.loop0
1931! delay slot
1932	st	%f7,[%o0+4]
1933
1934	ba,pt	%icc,.end
1935! delay slot
1936	nop
1937
1938	.align	16
1939.case8:
1940	fmuld	%f10,%f10,%f8
1941	andcc	%l1,1,%g0
1942	bz,pn	%icc,.case12
1943! delay slot
1944	fxor	%f14,%f34,%f34
1945
1946	fmuld	%f18,%f18,%f16
1947	andcc	%l2,1,%g0
1948	bz,pn	%icc,.case10
1949! delay slot
1950	fxor	%f22,%f36,%f36
1951
1952	fmuld	%f26,%f26,%f24
1953	andcc	%l3,1,%g0
1954	bz,pn	%icc,.case9
1955! delay slot
1956	fxor	%f30,%f38,%f38
1957
1958	fmuld	%f0,pp3,%f6		! sin(x0)
1959
1960	faddd	%f6,pp2,%f6
1961	fmuld	%f0,qq2,%f4
1962
1963	fmuld	%f8,qq3,%f14		! cos(x1)
1964
1965	fmuld	%f16,qq3,%f22		! cos(x2)
1966
1967	fmuld	%f24,qq3,%f30		! cos(x3)
1968
1969	fmuld	%f0,%f6,%f6
1970	faddd	%f4,qq1,%f4
1971
1972	faddd	%f14,qq2,%f14
1973	fmuld	%f8,pp2,%f12
1974
1975	faddd	%f22,qq2,%f22
1976	fmuld	%f16,pp2,%f20
1977
1978	faddd	%f30,qq2,%f30
1979	fmuld	%f24,pp2,%f28
1980
1981	faddd	%f6,pp1,%f6
1982	fmuld	%f0,%f4,%f4
1983	add	%l4,%g1,%l4
1984
1985	fmuld	%f8,%f14,%f14
1986	faddd	%f12,pp1,%f12
1987
1988	fmuld	%f16,%f22,%f22
1989	faddd	%f20,pp1,%f20
1990
1991	fmuld	%f24,%f30,%f30
1992	faddd	%f28,pp1,%f28
1993
1994	fmuld	%f0,%f6,%f6
1995
1996	faddd	%f14,qq1,%f14
1997	fmuld	%f8,%f12,%f12
1998	add	%l5,%g1,%l5
1999
2000	faddd	%f22,qq1,%f22
2001	fmuld	%f16,%f20,%f20
2002	add	%l6,%g1,%l6
2003
2004	faddd	%f30,qq1,%f30
2005	fmuld	%f24,%f28,%f28
2006	add	%l7,%g1,%l7
2007
2008	fmuld	%f2,%f6,%f6
2009	ldd	[%l4+8],%f0
2010
2011	fmuld	%f10,%f12,%f12
2012
2013	fmuld	%f18,%f20,%f20
2014
2015	fmuld	%f26,%f28,%f28
2016
2017	fmuld	%f0,%f4,%f4
2018	faddd	%f32,%f6,%f6
2019
2020	fmuld	%f8,%f14,%f14
2021	faddd	%f12,%f34,%f12
2022	ldd	[%l5+16],%f8
2023
2024	fmuld	%f16,%f22,%f22
2025	faddd	%f20,%f36,%f20
2026	ldd	[%l6+16],%f16
2027
2028	fmuld	%f24,%f30,%f30
2029	faddd	%f28,%f38,%f28
2030	ldd	[%l7+16],%f24
2031
2032	faddd	%f2,%f6,%f6
2033	ldd	[%l4+16],%f32
2034
2035	fmuld	%f8,%f14,%f14
2036	faddd	%f12,%f10,%f12
2037	ldd	[%l5+8],%f34
2038
2039	fmuld	%f16,%f22,%f22
2040	faddd	%f20,%f18,%f20
2041	ldd	[%l6+8],%f36
2042
2043	fmuld	%f24,%f30,%f30
2044	faddd	%f28,%f26,%f28
2045	ldd	[%l7+8],%f38
2046
2047	fmuld	%f32,%f6,%f6
2048
2049	fmuld	%f34,%f12,%f12
2050
2051	fmuld	%f36,%f20,%f20
2052
2053	fmuld	%f38,%f28,%f28
2054
2055	faddd	%f6,%f4,%f6
2056
2057	fsubd	%f14,%f12,%f14
2058
2059	fsubd	%f22,%f20,%f22
2060
2061	fsubd	%f30,%f28,%f30
2062
2063	faddd	%f6,%f0,%f6
2064
2065	faddd	%f14,%f8,%f14
2066
2067	faddd	%f22,%f16,%f22
2068
2069	faddd	%f30,%f24,%f30
2070	mov	%l0,%l4
2071
2072	fnegd	%f6,%f4
2073	lda	[%i1]%asi,%l0		! preload next argument
2074
2075	fnegd	%f14,%f12
2076	lda	[%i1]%asi,%f0
2077
2078	fnegd	%f22,%f20
2079	lda	[%i1+4]%asi,%f3
2080
2081	fnegd	%f30,%f28
2082	andn	%l0,%i5,%l0
2083	add	%i1,%i2,%i1
2084
2085	andcc	%l4,2,%g0
2086	fmovdnz	%icc,%f4,%f6
2087	st	%f6,[%o0]
2088
2089	andcc	%l1,2,%g0
2090	fmovdnz	%icc,%f12,%f14
2091	st	%f14,[%o1]
2092
2093	andcc	%l2,2,%g0
2094	fmovdnz	%icc,%f20,%f22
2095	st	%f22,[%o2]
2096
2097	andcc	%l3,2,%g0
2098	fmovdnz	%icc,%f28,%f30
2099	st	%f30,[%o3]
2100
2101	addcc	%i0,-1,%i0
2102	bg,pt	%icc,.loop0
2103! delay slot
2104	st	%f7,[%o0+4]
2105
2106	ba,pt	%icc,.end
2107! delay slot
2108	nop
2109
2110	.align	16
2111.case9:
2112	fmuld	%f0,pp3,%f6		! sin(x0)
2113
2114	fmuld	%f24,pp3,%f30		! sin(x3)
2115
2116	faddd	%f6,pp2,%f6
2117	fmuld	%f0,qq2,%f4
2118
2119	fmuld	%f8,qq3,%f14		! cos(x1)
2120
2121	fmuld	%f16,qq3,%f22		! cos(x2)
2122
2123	faddd	%f30,pp2,%f30
2124	fmuld	%f24,qq2,%f28
2125
2126	fmuld	%f0,%f6,%f6
2127	faddd	%f4,qq1,%f4
2128
2129	faddd	%f14,qq2,%f14
2130	fmuld	%f8,pp2,%f12
2131
2132	faddd	%f22,qq2,%f22
2133	fmuld	%f16,pp2,%f20
2134
2135	fmuld	%f24,%f30,%f30
2136	faddd	%f28,qq1,%f28
2137
2138	faddd	%f6,pp1,%f6
2139	fmuld	%f0,%f4,%f4
2140	add	%l4,%g1,%l4
2141
2142	fmuld	%f8,%f14,%f14
2143	faddd	%f12,pp1,%f12
2144
2145	fmuld	%f16,%f22,%f22
2146	faddd	%f20,pp1,%f20
2147
2148	faddd	%f30,pp1,%f30
2149	fmuld	%f24,%f28,%f28
2150	add	%l7,%g1,%l7
2151
2152	fmuld	%f0,%f6,%f6
2153
2154	faddd	%f14,qq1,%f14
2155	fmuld	%f8,%f12,%f12
2156	add	%l5,%g1,%l5
2157
2158	faddd	%f22,qq1,%f22
2159	fmuld	%f16,%f20,%f20
2160	add	%l6,%g1,%l6
2161
2162	fmuld	%f24,%f30,%f30
2163
2164	fmuld	%f2,%f6,%f6
2165	ldd	[%l4+8],%f0
2166
2167	fmuld	%f10,%f12,%f12
2168
2169	fmuld	%f18,%f20,%f20
2170
2171	fmuld	%f26,%f30,%f30
2172	ldd	[%l7+8],%f24
2173
2174	fmuld	%f0,%f4,%f4
2175	faddd	%f32,%f6,%f6
2176
2177	fmuld	%f8,%f14,%f14
2178	faddd	%f12,%f34,%f12
2179	ldd	[%l5+16],%f8
2180
2181	fmuld	%f16,%f22,%f22
2182	faddd	%f20,%f36,%f20
2183	ldd	[%l6+16],%f16
2184
2185	fmuld	%f24,%f28,%f28
2186	faddd	%f38,%f30,%f30
2187
2188	faddd	%f2,%f6,%f6
2189	ldd	[%l4+16],%f32
2190
2191	fmuld	%f8,%f14,%f14
2192	faddd	%f12,%f10,%f12
2193	ldd	[%l5+8],%f34
2194
2195	fmuld	%f16,%f22,%f22
2196	faddd	%f20,%f18,%f20
2197	ldd	[%l6+8],%f36
2198
2199	faddd	%f26,%f30,%f30
2200	ldd	[%l7+16],%f38
2201
2202	fmuld	%f32,%f6,%f6
2203
2204	fmuld	%f34,%f12,%f12
2205
2206	fmuld	%f36,%f20,%f20
2207
2208	fmuld	%f38,%f30,%f30
2209
2210	faddd	%f6,%f4,%f6
2211
2212	fsubd	%f14,%f12,%f14
2213
2214	fsubd	%f22,%f20,%f22
2215
2216	faddd	%f30,%f28,%f30
2217
2218	faddd	%f6,%f0,%f6
2219
2220	faddd	%f14,%f8,%f14
2221
2222	faddd	%f22,%f16,%f22
2223
2224	faddd	%f30,%f24,%f30
2225	mov	%l0,%l4
2226
2227	fnegd	%f6,%f4
2228	lda	[%i1]%asi,%l0		! preload next argument
2229
2230	fnegd	%f14,%f12
2231	lda	[%i1]%asi,%f0
2232
2233	fnegd	%f22,%f20
2234	lda	[%i1+4]%asi,%f3
2235
2236	fnegd	%f30,%f28
2237	andn	%l0,%i5,%l0
2238	add	%i1,%i2,%i1
2239
2240	andcc	%l4,2,%g0
2241	fmovdnz	%icc,%f4,%f6
2242	st	%f6,[%o0]
2243
2244	andcc	%l1,2,%g0
2245	fmovdnz	%icc,%f12,%f14
2246	st	%f14,[%o1]
2247
2248	andcc	%l2,2,%g0
2249	fmovdnz	%icc,%f20,%f22
2250	st	%f22,[%o2]
2251
2252	andcc	%l3,2,%g0
2253	fmovdnz	%icc,%f28,%f30
2254	st	%f30,[%o3]
2255
2256	addcc	%i0,-1,%i0
2257	bg,pt	%icc,.loop0
2258! delay slot
2259	st	%f7,[%o0+4]
2260
2261	ba,pt	%icc,.end
2262! delay slot
2263	nop
2264
2265	.align	16
2266.case10:
2267	fmuld	%f26,%f26,%f24
2268	andcc	%l3,1,%g0
2269	bz,pn	%icc,.case11
2270! delay slot
2271	fxor	%f30,%f38,%f38
2272
2273	fmuld	%f0,pp3,%f6		! sin(x0)
2274
2275	fmuld	%f16,pp3,%f22		! sin(x2)
2276
2277	faddd	%f6,pp2,%f6
2278	fmuld	%f0,qq2,%f4
2279
2280	fmuld	%f8,qq3,%f14		! cos(x1)
2281
2282	faddd	%f22,pp2,%f22
2283	fmuld	%f16,qq2,%f20
2284
2285	fmuld	%f24,qq3,%f30		! cos(x3)
2286
2287	fmuld	%f0,%f6,%f6
2288	faddd	%f4,qq1,%f4
2289
2290	faddd	%f14,qq2,%f14
2291	fmuld	%f8,pp2,%f12
2292
2293	fmuld	%f16,%f22,%f22
2294	faddd	%f20,qq1,%f20
2295
2296	faddd	%f30,qq2,%f30
2297	fmuld	%f24,pp2,%f28
2298
2299	faddd	%f6,pp1,%f6
2300	fmuld	%f0,%f4,%f4
2301	add	%l4,%g1,%l4
2302
2303	fmuld	%f8,%f14,%f14
2304	faddd	%f12,pp1,%f12
2305
2306	faddd	%f22,pp1,%f22
2307	fmuld	%f16,%f20,%f20
2308	add	%l6,%g1,%l6
2309
2310	fmuld	%f24,%f30,%f30
2311	faddd	%f28,pp1,%f28
2312
2313	fmuld	%f0,%f6,%f6
2314
2315	faddd	%f14,qq1,%f14
2316	fmuld	%f8,%f12,%f12
2317	add	%l5,%g1,%l5
2318
2319	fmuld	%f16,%f22,%f22
2320
2321	faddd	%f30,qq1,%f30
2322	fmuld	%f24,%f28,%f28
2323	add	%l7,%g1,%l7
2324
2325	fmuld	%f2,%f6,%f6
2326	ldd	[%l4+8],%f0
2327
2328	fmuld	%f10,%f12,%f12
2329
2330	fmuld	%f18,%f22,%f22
2331	ldd	[%l6+8],%f16
2332
2333	fmuld	%f26,%f28,%f28
2334
2335	fmuld	%f0,%f4,%f4
2336	faddd	%f32,%f6,%f6
2337
2338	fmuld	%f8,%f14,%f14
2339	faddd	%f12,%f34,%f12
2340	ldd	[%l5+16],%f8
2341
2342	fmuld	%f16,%f20,%f20
2343	faddd	%f36,%f22,%f22
2344
2345	fmuld	%f24,%f30,%f30
2346	faddd	%f28,%f38,%f28
2347	ldd	[%l7+16],%f24
2348
2349	faddd	%f2,%f6,%f6
2350	ldd	[%l4+16],%f32
2351
2352	fmuld	%f8,%f14,%f14
2353	faddd	%f12,%f10,%f12
2354	ldd	[%l5+8],%f34
2355
2356	faddd	%f18,%f22,%f22
2357	ldd	[%l6+16],%f36
2358
2359	fmuld	%f24,%f30,%f30
2360	faddd	%f28,%f26,%f28
2361	ldd	[%l7+8],%f38
2362
2363	fmuld	%f32,%f6,%f6
2364
2365	fmuld	%f34,%f12,%f12
2366
2367	fmuld	%f36,%f22,%f22
2368
2369	fmuld	%f38,%f28,%f28
2370
2371	faddd	%f6,%f4,%f6
2372
2373	fsubd	%f14,%f12,%f14
2374
2375	faddd	%f22,%f20,%f22
2376
2377	fsubd	%f30,%f28,%f30
2378
2379	faddd	%f6,%f0,%f6
2380
2381	faddd	%f14,%f8,%f14
2382
2383	faddd	%f22,%f16,%f22
2384
2385	faddd	%f30,%f24,%f30
2386	mov	%l0,%l4
2387
2388	fnegd	%f6,%f4
2389	lda	[%i1]%asi,%l0		! preload next argument
2390
2391	fnegd	%f14,%f12
2392	lda	[%i1]%asi,%f0
2393
2394	fnegd	%f22,%f20
2395	lda	[%i1+4]%asi,%f3
2396
2397	fnegd	%f30,%f28
2398	andn	%l0,%i5,%l0
2399	add	%i1,%i2,%i1
2400
2401	andcc	%l4,2,%g0
2402	fmovdnz	%icc,%f4,%f6
2403	st	%f6,[%o0]
2404
2405	andcc	%l1,2,%g0
2406	fmovdnz	%icc,%f12,%f14
2407	st	%f14,[%o1]
2408
2409	andcc	%l2,2,%g0
2410	fmovdnz	%icc,%f20,%f22
2411	st	%f22,[%o2]
2412
2413	andcc	%l3,2,%g0
2414	fmovdnz	%icc,%f28,%f30
2415	st	%f30,[%o3]
2416
2417	addcc	%i0,-1,%i0
2418	bg,pt	%icc,.loop0
2419! delay slot
2420	st	%f7,[%o0+4]
2421
2422	ba,pt	%icc,.end
2423! delay slot
2424	nop
2425
2426	.align	16
2427.case11:
2428	fmuld	%f0,pp3,%f6		! sin(x0)
2429
2430	fmuld	%f16,pp3,%f22		! sin(x2)
2431
2432	fmuld	%f24,pp3,%f30		! sin(x3)
2433
2434	faddd	%f6,pp2,%f6
2435	fmuld	%f0,qq2,%f4
2436
2437	fmuld	%f8,qq3,%f14		! cos(x1)
2438
2439	faddd	%f22,pp2,%f22
2440	fmuld	%f16,qq2,%f20
2441
2442	faddd	%f30,pp2,%f30
2443	fmuld	%f24,qq2,%f28
2444
2445	fmuld	%f0,%f6,%f6
2446	faddd	%f4,qq1,%f4
2447
2448	faddd	%f14,qq2,%f14
2449	fmuld	%f8,pp2,%f12
2450
2451	fmuld	%f16,%f22,%f22
2452	faddd	%f20,qq1,%f20
2453
2454	fmuld	%f24,%f30,%f30
2455	faddd	%f28,qq1,%f28
2456
2457	faddd	%f6,pp1,%f6
2458	fmuld	%f0,%f4,%f4
2459	add	%l4,%g1,%l4
2460
2461	fmuld	%f8,%f14,%f14
2462	faddd	%f12,pp1,%f12
2463
2464	faddd	%f22,pp1,%f22
2465	fmuld	%f16,%f20,%f20
2466	add	%l6,%g1,%l6
2467
2468	faddd	%f30,pp1,%f30
2469	fmuld	%f24,%f28,%f28
2470	add	%l7,%g1,%l7
2471
2472	fmuld	%f0,%f6,%f6
2473
2474	faddd	%f14,qq1,%f14
2475	fmuld	%f8,%f12,%f12
2476	add	%l5,%g1,%l5
2477
2478	fmuld	%f16,%f22,%f22
2479
2480	fmuld	%f24,%f30,%f30
2481
2482	fmuld	%f2,%f6,%f6
2483	ldd	[%l4+8],%f0
2484
2485	fmuld	%f10,%f12,%f12
2486
2487	fmuld	%f18,%f22,%f22
2488	ldd	[%l6+8],%f16
2489
2490	fmuld	%f26,%f30,%f30
2491	ldd	[%l7+8],%f24
2492
2493	fmuld	%f0,%f4,%f4
2494	faddd	%f32,%f6,%f6
2495
2496	fmuld	%f8,%f14,%f14
2497	faddd	%f12,%f34,%f12
2498	ldd	[%l5+16],%f8
2499
2500	fmuld	%f16,%f20,%f20
2501	faddd	%f36,%f22,%f22
2502
2503	fmuld	%f24,%f28,%f28
2504	faddd	%f38,%f30,%f30
2505
2506	faddd	%f2,%f6,%f6
2507	ldd	[%l4+16],%f32
2508
2509	fmuld	%f8,%f14,%f14
2510	faddd	%f12,%f10,%f12
2511	ldd	[%l5+8],%f34
2512
2513	faddd	%f18,%f22,%f22
2514	ldd	[%l6+16],%f36
2515
2516	faddd	%f26,%f30,%f30
2517	ldd	[%l7+16],%f38
2518
2519	fmuld	%f32,%f6,%f6
2520
2521	fmuld	%f34,%f12,%f12
2522
2523	fmuld	%f36,%f22,%f22
2524
2525	fmuld	%f38,%f30,%f30
2526
2527	faddd	%f6,%f4,%f6
2528
2529	fsubd	%f14,%f12,%f14
2530
2531	faddd	%f22,%f20,%f22
2532
2533	faddd	%f30,%f28,%f30
2534
2535	faddd	%f6,%f0,%f6
2536
2537	faddd	%f14,%f8,%f14
2538
2539	faddd	%f22,%f16,%f22
2540
2541	faddd	%f30,%f24,%f30
2542	mov	%l0,%l4
2543
2544	fnegd	%f6,%f4
2545	lda	[%i1]%asi,%l0		! preload next argument
2546
2547	fnegd	%f14,%f12
2548	lda	[%i1]%asi,%f0
2549
2550	fnegd	%f22,%f20
2551	lda	[%i1+4]%asi,%f3
2552
2553	fnegd	%f30,%f28
2554	andn	%l0,%i5,%l0
2555	add	%i1,%i2,%i1
2556
2557	andcc	%l4,2,%g0
2558	fmovdnz	%icc,%f4,%f6
2559	st	%f6,[%o0]
2560
2561	andcc	%l1,2,%g0
2562	fmovdnz	%icc,%f12,%f14
2563	st	%f14,[%o1]
2564
2565	andcc	%l2,2,%g0
2566	fmovdnz	%icc,%f20,%f22
2567	st	%f22,[%o2]
2568
2569	andcc	%l3,2,%g0
2570	fmovdnz	%icc,%f28,%f30
2571	st	%f30,[%o3]
2572
2573	addcc	%i0,-1,%i0
2574	bg,pt	%icc,.loop0
2575! delay slot
2576	st	%f7,[%o0+4]
2577
2578	ba,pt	%icc,.end
2579! delay slot
2580	nop
2581
2582	.align	16
2583.case12:
2584	fmuld	%f18,%f18,%f16
2585	andcc	%l2,1,%g0
2586	bz,pn	%icc,.case14
2587! delay slot
2588	fxor	%f22,%f36,%f36
2589
2590	fmuld	%f26,%f26,%f24
2591	andcc	%l3,1,%g0
2592	bz,pn	%icc,.case13
2593! delay slot
2594	fxor	%f30,%f38,%f38
2595
2596	fmuld	%f0,pp3,%f6		! sin(x0)
2597
2598	fmuld	%f8,pp3,%f14		! sin(x1)
2599
2600	faddd	%f6,pp2,%f6
2601	fmuld	%f0,qq2,%f4
2602
2603	faddd	%f14,pp2,%f14
2604	fmuld	%f8,qq2,%f12
2605
2606	fmuld	%f16,qq3,%f22		! cos(x2)
2607
2608	fmuld	%f24,qq3,%f30		! cos(x3)
2609
2610	fmuld	%f0,%f6,%f6
2611	faddd	%f4,qq1,%f4
2612
2613	fmuld	%f8,%f14,%f14
2614	faddd	%f12,qq1,%f12
2615
2616	faddd	%f22,qq2,%f22
2617	fmuld	%f16,pp2,%f20
2618
2619	faddd	%f30,qq2,%f30
2620	fmuld	%f24,pp2,%f28
2621
2622	faddd	%f6,pp1,%f6
2623	fmuld	%f0,%f4,%f4
2624	add	%l4,%g1,%l4
2625
2626	faddd	%f14,pp1,%f14
2627	fmuld	%f8,%f12,%f12
2628	add	%l5,%g1,%l5
2629
2630	fmuld	%f16,%f22,%f22
2631	faddd	%f20,pp1,%f20
2632
2633	fmuld	%f24,%f30,%f30
2634	faddd	%f28,pp1,%f28
2635
2636	fmuld	%f0,%f6,%f6
2637
2638	fmuld	%f8,%f14,%f14
2639
2640	faddd	%f22,qq1,%f22
2641	fmuld	%f16,%f20,%f20
2642	add	%l6,%g1,%l6
2643
2644	faddd	%f30,qq1,%f30
2645	fmuld	%f24,%f28,%f28
2646	add	%l7,%g1,%l7
2647
2648	fmuld	%f2,%f6,%f6
2649	ldd	[%l4+8],%f0
2650
2651	fmuld	%f10,%f14,%f14
2652	ldd	[%l5+8],%f8
2653
2654	fmuld	%f18,%f20,%f20
2655
2656	fmuld	%f26,%f28,%f28
2657
2658	fmuld	%f0,%f4,%f4
2659	faddd	%f32,%f6,%f6
2660
2661	fmuld	%f8,%f12,%f12
2662	faddd	%f34,%f14,%f14
2663
2664	fmuld	%f16,%f22,%f22
2665	faddd	%f20,%f36,%f20
2666	ldd	[%l6+16],%f16
2667
2668	fmuld	%f24,%f30,%f30
2669	faddd	%f28,%f38,%f28
2670	ldd	[%l7+16],%f24
2671
2672	faddd	%f2,%f6,%f6
2673	ldd	[%l4+16],%f32
2674
2675	faddd	%f10,%f14,%f14
2676	ldd	[%l5+16],%f34
2677
2678	fmuld	%f16,%f22,%f22
2679	faddd	%f20,%f18,%f20
2680	ldd	[%l6+8],%f36
2681
2682	fmuld	%f24,%f30,%f30
2683	faddd	%f28,%f26,%f28
2684	ldd	[%l7+8],%f38
2685
2686	fmuld	%f32,%f6,%f6
2687
2688	fmuld	%f34,%f14,%f14
2689
2690	fmuld	%f36,%f20,%f20
2691
2692	fmuld	%f38,%f28,%f28
2693
2694	faddd	%f6,%f4,%f6
2695
2696	faddd	%f14,%f12,%f14
2697
2698	fsubd	%f22,%f20,%f22
2699
2700	fsubd	%f30,%f28,%f30
2701
2702	faddd	%f6,%f0,%f6
2703
2704	faddd	%f14,%f8,%f14
2705
2706	faddd	%f22,%f16,%f22
2707
2708	faddd	%f30,%f24,%f30
2709	mov	%l0,%l4
2710
2711	fnegd	%f6,%f4
2712	lda	[%i1]%asi,%l0		! preload next argument
2713
2714	fnegd	%f14,%f12
2715	lda	[%i1]%asi,%f0
2716
2717	fnegd	%f22,%f20
2718	lda	[%i1+4]%asi,%f3
2719
2720	fnegd	%f30,%f28
2721	andn	%l0,%i5,%l0
2722	add	%i1,%i2,%i1
2723
2724	andcc	%l4,2,%g0
2725	fmovdnz	%icc,%f4,%f6
2726	st	%f6,[%o0]
2727
2728	andcc	%l1,2,%g0
2729	fmovdnz	%icc,%f12,%f14
2730	st	%f14,[%o1]
2731
2732	andcc	%l2,2,%g0
2733	fmovdnz	%icc,%f20,%f22
2734	st	%f22,[%o2]
2735
2736	andcc	%l3,2,%g0
2737	fmovdnz	%icc,%f28,%f30
2738	st	%f30,[%o3]
2739
2740	addcc	%i0,-1,%i0
2741	bg,pt	%icc,.loop0
2742! delay slot
2743	st	%f7,[%o0+4]
2744
2745	ba,pt	%icc,.end
2746! delay slot
2747	nop
2748
2749	.align	16
2750.case13:
2751	fmuld	%f0,pp3,%f6		! sin(x0)
2752
2753	fmuld	%f8,pp3,%f14		! sin(x1)
2754
2755	fmuld	%f24,pp3,%f30		! sin(x3)
2756
2757	faddd	%f6,pp2,%f6
2758	fmuld	%f0,qq2,%f4
2759
2760	faddd	%f14,pp2,%f14
2761	fmuld	%f8,qq2,%f12
2762
2763	fmuld	%f16,qq3,%f22		! cos(x2)
2764
2765	faddd	%f30,pp2,%f30
2766	fmuld	%f24,qq2,%f28
2767
2768	fmuld	%f0,%f6,%f6
2769	faddd	%f4,qq1,%f4
2770
2771	fmuld	%f8,%f14,%f14
2772	faddd	%f12,qq1,%f12
2773
2774	faddd	%f22,qq2,%f22
2775	fmuld	%f16,pp2,%f20
2776
2777	fmuld	%f24,%f30,%f30
2778	faddd	%f28,qq1,%f28
2779
2780	faddd	%f6,pp1,%f6
2781	fmuld	%f0,%f4,%f4
2782	add	%l4,%g1,%l4
2783
2784	faddd	%f14,pp1,%f14
2785	fmuld	%f8,%f12,%f12
2786	add	%l5,%g1,%l5
2787
2788	fmuld	%f16,%f22,%f22
2789	faddd	%f20,pp1,%f20
2790
2791	faddd	%f30,pp1,%f30
2792	fmuld	%f24,%f28,%f28
2793	add	%l7,%g1,%l7
2794
2795	fmuld	%f0,%f6,%f6
2796
2797	fmuld	%f8,%f14,%f14
2798
2799	faddd	%f22,qq1,%f22
2800	fmuld	%f16,%f20,%f20
2801	add	%l6,%g1,%l6
2802
2803	fmuld	%f24,%f30,%f30
2804
2805	fmuld	%f2,%f6,%f6
2806	ldd	[%l4+8],%f0
2807
2808	fmuld	%f10,%f14,%f14
2809	ldd	[%l5+8],%f8
2810
2811	fmuld	%f18,%f20,%f20
2812
2813	fmuld	%f26,%f30,%f30
2814	ldd	[%l7+8],%f24
2815
2816	fmuld	%f0,%f4,%f4
2817	faddd	%f32,%f6,%f6
2818
2819	fmuld	%f8,%f12,%f12
2820	faddd	%f34,%f14,%f14
2821
2822	fmuld	%f16,%f22,%f22
2823	faddd	%f20,%f36,%f20
2824	ldd	[%l6+16],%f16
2825
2826	fmuld	%f24,%f28,%f28
2827	faddd	%f38,%f30,%f30
2828
2829	faddd	%f2,%f6,%f6
2830	ldd	[%l4+16],%f32
2831
2832	faddd	%f10,%f14,%f14
2833	ldd	[%l5+16],%f34
2834
2835	fmuld	%f16,%f22,%f22
2836	faddd	%f20,%f18,%f20
2837	ldd	[%l6+8],%f36
2838
2839	faddd	%f26,%f30,%f30
2840	ldd	[%l7+16],%f38
2841
2842	fmuld	%f32,%f6,%f6
2843
2844	fmuld	%f34,%f14,%f14
2845
2846	fmuld	%f36,%f20,%f20
2847
2848	fmuld	%f38,%f30,%f30
2849
2850	faddd	%f6,%f4,%f6
2851
2852	faddd	%f14,%f12,%f14
2853
2854	fsubd	%f22,%f20,%f22
2855
2856	faddd	%f30,%f28,%f30
2857
2858	faddd	%f6,%f0,%f6
2859
2860	faddd	%f14,%f8,%f14
2861
2862	faddd	%f22,%f16,%f22
2863
2864	faddd	%f30,%f24,%f30
2865	mov	%l0,%l4
2866
2867	fnegd	%f6,%f4
2868	lda	[%i1]%asi,%l0		! preload next argument
2869
2870	fnegd	%f14,%f12
2871	lda	[%i1]%asi,%f0
2872
2873	fnegd	%f22,%f20
2874	lda	[%i1+4]%asi,%f3
2875
2876	fnegd	%f30,%f28
2877	andn	%l0,%i5,%l0
2878	add	%i1,%i2,%i1
2879
2880	andcc	%l4,2,%g0
2881	fmovdnz	%icc,%f4,%f6
2882	st	%f6,[%o0]
2883
2884	andcc	%l1,2,%g0
2885	fmovdnz	%icc,%f12,%f14
2886	st	%f14,[%o1]
2887
2888	andcc	%l2,2,%g0
2889	fmovdnz	%icc,%f20,%f22
2890	st	%f22,[%o2]
2891
2892	andcc	%l3,2,%g0
2893	fmovdnz	%icc,%f28,%f30
2894	st	%f30,[%o3]
2895
2896	addcc	%i0,-1,%i0
2897	bg,pt	%icc,.loop0
2898! delay slot
2899	st	%f7,[%o0+4]
2900
2901	ba,pt	%icc,.end
2902! delay slot
2903	nop
2904
2905	.align	16
2906.case14:
2907	fmuld	%f26,%f26,%f24
2908	andcc	%l3,1,%g0
2909	bz,pn	%icc,.case15
2910! delay slot
2911	fxor	%f30,%f38,%f38
2912
2913	fmuld	%f0,pp3,%f6		! sin(x0)
2914
2915	fmuld	%f8,pp3,%f14		! sin(x1)
2916
2917	fmuld	%f16,pp3,%f22		! sin(x2)
2918
2919	faddd	%f6,pp2,%f6
2920	fmuld	%f0,qq2,%f4
2921
2922	faddd	%f14,pp2,%f14
2923	fmuld	%f8,qq2,%f12
2924
2925	faddd	%f22,pp2,%f22
2926	fmuld	%f16,qq2,%f20
2927
2928	fmuld	%f24,qq3,%f30		! cos(x3)
2929
2930	fmuld	%f0,%f6,%f6
2931	faddd	%f4,qq1,%f4
2932
2933	fmuld	%f8,%f14,%f14
2934	faddd	%f12,qq1,%f12
2935
2936	fmuld	%f16,%f22,%f22
2937	faddd	%f20,qq1,%f20
2938
2939	faddd	%f30,qq2,%f30
2940	fmuld	%f24,pp2,%f28
2941
2942	faddd	%f6,pp1,%f6
2943	fmuld	%f0,%f4,%f4
2944	add	%l4,%g1,%l4
2945
2946	faddd	%f14,pp1,%f14
2947	fmuld	%f8,%f12,%f12
2948	add	%l5,%g1,%l5
2949
2950	faddd	%f22,pp1,%f22
2951	fmuld	%f16,%f20,%f20
2952	add	%l6,%g1,%l6
2953
2954	fmuld	%f24,%f30,%f30
2955	faddd	%f28,pp1,%f28
2956
2957	fmuld	%f0,%f6,%f6
2958
2959	fmuld	%f8,%f14,%f14
2960
2961	fmuld	%f16,%f22,%f22
2962
2963	faddd	%f30,qq1,%f30
2964	fmuld	%f24,%f28,%f28
2965	add	%l7,%g1,%l7
2966
2967	fmuld	%f2,%f6,%f6
2968	ldd	[%l4+8],%f0
2969
2970	fmuld	%f10,%f14,%f14
2971	ldd	[%l5+8],%f8
2972
2973	fmuld	%f18,%f22,%f22
2974	ldd	[%l6+8],%f16
2975
2976	fmuld	%f26,%f28,%f28
2977
2978	fmuld	%f0,%f4,%f4
2979	faddd	%f32,%f6,%f6
2980
2981	fmuld	%f8,%f12,%f12
2982	faddd	%f34,%f14,%f14
2983
2984	fmuld	%f16,%f20,%f20
2985	faddd	%f36,%f22,%f22
2986
2987	fmuld	%f24,%f30,%f30
2988	faddd	%f28,%f38,%f28
2989	ldd	[%l7+16],%f24
2990
2991	faddd	%f2,%f6,%f6
2992	ldd	[%l4+16],%f32
2993
2994	faddd	%f10,%f14,%f14
2995	ldd	[%l5+16],%f34
2996
2997	faddd	%f18,%f22,%f22
2998	ldd	[%l6+16],%f36
2999
3000	fmuld	%f24,%f30,%f30
3001	faddd	%f28,%f26,%f28
3002	ldd	[%l7+8],%f38
3003
3004	fmuld	%f32,%f6,%f6
3005
3006	fmuld	%f34,%f14,%f14
3007
3008	fmuld	%f36,%f22,%f22
3009
3010	fmuld	%f38,%f28,%f28
3011
3012	faddd	%f6,%f4,%f6
3013
3014	faddd	%f14,%f12,%f14
3015
3016	faddd	%f22,%f20,%f22
3017
3018	fsubd	%f30,%f28,%f30
3019
3020	faddd	%f6,%f0,%f6
3021
3022	faddd	%f14,%f8,%f14
3023
3024	faddd	%f22,%f16,%f22
3025
3026	faddd	%f30,%f24,%f30
3027	mov	%l0,%l4
3028
3029	fnegd	%f6,%f4
3030	lda	[%i1]%asi,%l0		! preload next argument
3031
3032	fnegd	%f14,%f12
3033	lda	[%i1]%asi,%f0
3034
3035	fnegd	%f22,%f20
3036	lda	[%i1+4]%asi,%f3
3037
3038	fnegd	%f30,%f28
3039	andn	%l0,%i5,%l0
3040	add	%i1,%i2,%i1
3041
3042	andcc	%l4,2,%g0
3043	fmovdnz	%icc,%f4,%f6
3044	st	%f6,[%o0]
3045
3046	andcc	%l1,2,%g0
3047	fmovdnz	%icc,%f12,%f14
3048	st	%f14,[%o1]
3049
3050	andcc	%l2,2,%g0
3051	fmovdnz	%icc,%f20,%f22
3052	st	%f22,[%o2]
3053
3054	andcc	%l3,2,%g0
3055	fmovdnz	%icc,%f28,%f30
3056	st	%f30,[%o3]
3057
3058	addcc	%i0,-1,%i0
3059	bg,pt	%icc,.loop0
3060! delay slot
3061	st	%f7,[%o0+4]
3062
3063	ba,pt	%icc,.end
3064! delay slot
3065	nop
3066
3067	.align	16
3068.case15:
3069	fmuld	%f0,pp3,%f6		! sin(x0)
3070
3071	fmuld	%f8,pp3,%f14		! sin(x1)
3072
3073	fmuld	%f16,pp3,%f22		! sin(x2)
3074
3075	fmuld	%f24,pp3,%f30		! sin(x3)
3076
3077	faddd	%f6,pp2,%f6
3078	fmuld	%f0,qq2,%f4
3079
3080	faddd	%f14,pp2,%f14
3081	fmuld	%f8,qq2,%f12
3082
3083	faddd	%f22,pp2,%f22
3084	fmuld	%f16,qq2,%f20
3085
3086	faddd	%f30,pp2,%f30
3087	fmuld	%f24,qq2,%f28
3088
3089	fmuld	%f0,%f6,%f6
3090	faddd	%f4,qq1,%f4
3091
3092	fmuld	%f8,%f14,%f14
3093	faddd	%f12,qq1,%f12
3094
3095	fmuld	%f16,%f22,%f22
3096	faddd	%f20,qq1,%f20
3097
3098	fmuld	%f24,%f30,%f30
3099	faddd	%f28,qq1,%f28
3100
3101	faddd	%f6,pp1,%f6
3102	fmuld	%f0,%f4,%f4
3103	add	%l4,%g1,%l4
3104
3105	faddd	%f14,pp1,%f14
3106	fmuld	%f8,%f12,%f12
3107	add	%l5,%g1,%l5
3108
3109	faddd	%f22,pp1,%f22
3110	fmuld	%f16,%f20,%f20
3111	add	%l6,%g1,%l6
3112
3113	faddd	%f30,pp1,%f30
3114	fmuld	%f24,%f28,%f28
3115	add	%l7,%g1,%l7
3116
3117	fmuld	%f0,%f6,%f6
3118
3119	fmuld	%f8,%f14,%f14
3120
3121	fmuld	%f16,%f22,%f22
3122
3123	fmuld	%f24,%f30,%f30
3124
3125	fmuld	%f2,%f6,%f6
3126	ldd	[%l4+8],%f0
3127
3128	fmuld	%f10,%f14,%f14
3129	ldd	[%l5+8],%f8
3130
3131	fmuld	%f18,%f22,%f22
3132	ldd	[%l6+8],%f16
3133
3134	fmuld	%f26,%f30,%f30
3135	ldd	[%l7+8],%f24
3136
3137	fmuld	%f0,%f4,%f4
3138	faddd	%f32,%f6,%f6
3139
3140	fmuld	%f8,%f12,%f12
3141	faddd	%f34,%f14,%f14
3142
3143	fmuld	%f16,%f20,%f20
3144	faddd	%f36,%f22,%f22
3145
3146	fmuld	%f24,%f28,%f28
3147	faddd	%f38,%f30,%f30
3148
3149	faddd	%f2,%f6,%f6
3150	ldd	[%l4+16],%f32
3151
3152	faddd	%f10,%f14,%f14
3153	ldd	[%l5+16],%f34
3154
3155	faddd	%f18,%f22,%f22
3156	ldd	[%l6+16],%f36
3157
3158	faddd	%f26,%f30,%f30
3159	ldd	[%l7+16],%f38
3160
3161	fmuld	%f32,%f6,%f6
3162
3163	fmuld	%f34,%f14,%f14
3164
3165	fmuld	%f36,%f22,%f22
3166
3167	fmuld	%f38,%f30,%f30
3168
3169	faddd	%f6,%f4,%f6
3170
3171	faddd	%f14,%f12,%f14
3172
3173	faddd	%f22,%f20,%f22
3174
3175	faddd	%f30,%f28,%f30
3176
3177	faddd	%f6,%f0,%f6
3178
3179	faddd	%f14,%f8,%f14
3180
3181	faddd	%f22,%f16,%f22
3182
3183	faddd	%f30,%f24,%f30
3184	mov	%l0,%l4
3185
3186	fnegd	%f6,%f4
3187	lda	[%i1]%asi,%l0		! preload next argument
3188
3189	fnegd	%f14,%f12
3190	lda	[%i1]%asi,%f0
3191
3192	fnegd	%f22,%f20
3193	lda	[%i1+4]%asi,%f3
3194
3195	fnegd	%f30,%f28
3196	andn	%l0,%i5,%l0
3197	add	%i1,%i2,%i1
3198
3199	andcc	%l4,2,%g0
3200	fmovdnz	%icc,%f4,%f6
3201	st	%f6,[%o0]
3202
3203	andcc	%l1,2,%g0
3204	fmovdnz	%icc,%f12,%f14
3205	st	%f14,[%o1]
3206
3207	andcc	%l2,2,%g0
3208	fmovdnz	%icc,%f20,%f22
3209	st	%f22,[%o2]
3210
3211	andcc	%l3,2,%g0
3212	fmovdnz	%icc,%f28,%f30
3213	st	%f30,[%o3]
3214
3215	addcc	%i0,-1,%i0
3216	bg,pt	%icc,.loop0
3217! delay slot
3218	st	%f7,[%o0+4]
3219
3220	ba,pt	%icc,.end
3221! delay slot
3222	nop
3223
3224
3225	.align	16
3226.end:
3227	st	%f15,[%o1+4]
3228	st	%f23,[%o2+4]
3229	st	%f31,[%o3+4]
3230	ld	[%fp+biguns],%i5
3231	tst	%i5			! check for huge arguments remaining
3232	be,pt	%icc,.exit
3233! delay slot
3234	nop
3235#ifdef __sparcv9
3236	ldx	[%fp+xsave],%o1
3237	ldx	[%fp+ysave],%o3
3238#else
3239	ld	[%fp+xsave],%o1
3240	ld	[%fp+ysave],%o3
3241#endif
3242	ld	[%fp+nsave],%o0
3243	ld	[%fp+sxsave],%o2
3244	ld	[%fp+sysave],%o4
3245	sra	%o2,0,%o2		! sign-extend for V9
3246	sra	%o4,0,%o4
3247	call	__vlibm_vcos_big_ultra3
3248	sra	%o5,0,%o5		! delay slot
3249
3250.exit:
3251	ret
3252	restore
3253
3254
3255	.align	16
3256.last1:
3257	faddd	%f2,c3two44,%f4
3258	st	%f15,[%o1+4]
3259.last1_from_range1:
3260	mov	0,%l1
3261	fzeros	%f8
3262	fzero	%f10
3263	add	%fp,junk,%o1
3264.last2:
3265	faddd	%f10,c3two44,%f12
3266	st	%f23,[%o2+4]
3267.last2_from_range2:
3268	mov	0,%l2
3269	fzeros	%f16
3270	fzero	%f18
3271	add	%fp,junk,%o2
3272.last3:
3273	faddd	%f18,c3two44,%f20
3274	st	%f31,[%o3+4]
3275	st	%f5,[%fp+nk0]
3276	st	%f13,[%fp+nk1]
3277.last3_from_range3:
3278	mov	0,%l3
3279	fzeros	%f24
3280	fzero	%f26
3281	ba,pt	%icc,.cont
3282! delay slot
3283	add	%fp,junk,%o3
3284
3285
3286	.align	16
3287.range0:
3288	cmp	%l0,%o4
3289	bl,pt	%icc,1f			! hx < 0x3e400000
3290! delay slot, harmless if branch taken
3291	sethi	%hi(0x7ff00000),%o7
3292	cmp	%l0,%o7
3293	bl,a,pt	%icc,2f			! branch if finite
3294! delay slot, squashed if branch not taken
3295	st	%o4,[%fp+biguns]	! set biguns
3296	fzero	%f0
3297	fmuld	%f2,%f0,%f2
3298	st	%f2,[%o0]
3299	ba,pt	%icc,2f
3300! delay slot
3301	st	%f3,[%o0+4]
33021:
3303	fdtoi	%f2,%f4			! raise inexact if not zero
3304	sethi	%hi(0x3ff00000),%o7
3305	st	%o7,[%o0]
3306	st	%g0,[%o0+4]
33072:
3308	addcc	%i0,-1,%i0
3309	ble,pn	%icc,.end
3310! delay slot, harmless if branch taken
3311	add	%i3,%i4,%i3		! y += stridey
3312	andn	%l1,%i5,%l0		! hx &= ~0x80000000
3313	fmovs	%f8,%f0
3314	fmovs	%f11,%f3
3315	ba,pt	%icc,.loop0
3316! delay slot
3317	add	%i1,%i2,%i1		! x += stridex
3318
3319
3320	.align	16
3321.range1:
3322	cmp	%l1,%o4
3323	bl,pt	%icc,1f			! hx < 0x3e400000
3324! delay slot, harmless if branch taken
3325	sethi	%hi(0x7ff00000),%o7
3326	cmp	%l1,%o7
3327	bl,a,pt	%icc,2f			! branch if finite
3328! delay slot, squashed if branch not taken
3329	st	%o4,[%fp+biguns]	! set biguns
3330	fzero	%f8
3331	fmuld	%f10,%f8,%f10
3332	st	%f10,[%o1]
3333	ba,pt	%icc,2f
3334! delay slot
3335	st	%f11,[%o1+4]
33361:
3337	fdtoi	%f10,%f12		! raise inexact if not zero
3338	sethi	%hi(0x3ff00000),%o7
3339	st	%o7,[%o1]
3340	st	%g0,[%o1+4]
33412:
3342	addcc	%i0,-1,%i0
3343	ble,pn	%icc,.last1_from_range1
3344! delay slot, harmless if branch taken
3345	add	%i3,%i4,%i3		! y += stridey
3346	andn	%l2,%i5,%l1		! hx &= ~0x80000000
3347	fmovs	%f16,%f8
3348	fmovs	%f19,%f11
3349	ba,pt	%icc,.loop1
3350! delay slot
3351	add	%i1,%i2,%i1		! x += stridex
3352
3353
3354	.align	16
3355.range2:
3356	cmp	%l2,%o4
3357	bl,pt	%icc,1f			! hx < 0x3e400000
3358! delay slot, harmless if branch taken
3359	sethi	%hi(0x7ff00000),%o7
3360	cmp	%l2,%o7
3361	bl,a,pt	%icc,2f			! branch if finite
3362! delay slot, squashed if branch not taken
3363	st	%o4,[%fp+biguns]	! set biguns
3364	fzero	%f16
3365	fmuld	%f18,%f16,%f18
3366	st	%f18,[%o2]
3367	ba,pt	%icc,2f
3368! delay slot
3369	st	%f19,[%o2+4]
33701:
3371	fdtoi	%f18,%f20		! raise inexact if not zero
3372	sethi	%hi(0x3ff00000),%o7
3373	st	%o7,[%o2]
3374	st	%g0,[%o2+4]
33752:
3376	addcc	%i0,-1,%i0
3377	ble,pn	%icc,.last2_from_range2
3378! delay slot, harmless if branch taken
3379	add	%i3,%i4,%i3		! y += stridey
3380	andn	%l3,%i5,%l2		! hx &= ~0x80000000
3381	fmovs	%f24,%f16
3382	fmovs	%f27,%f19
3383	ba,pt	%icc,.loop2
3384! delay slot
3385	add	%i1,%i2,%i1		! x += stridex
3386
3387
3388	.align	16
3389.range3:
3390	cmp	%l3,%o4
3391	bl,pt	%icc,1f			! hx < 0x3e400000
3392! delay slot, harmless if branch taken
3393	sethi	%hi(0x7ff00000),%o7
3394	cmp	%l3,%o7
3395	bl,a,pt	%icc,2f			! branch if finite
3396! delay slot, squashed if branch not taken
3397	st	%o4,[%fp+biguns]	! set biguns
3398	fzero	%f24
3399	fmuld	%f26,%f24,%f26
3400	st	%f26,[%o3]
3401	ba,pt	%icc,2f
3402! delay slot
3403	st	%f27,[%o3+4]
34041:
3405	fdtoi	%f26,%f28		! raise inexact if not zero
3406	sethi	%hi(0x3ff00000),%o7
3407	st	%o7,[%o3]
3408	st	%g0,[%o3+4]
34092:
3410	addcc	%i0,-1,%i0
3411	ble,pn	%icc,.last3_from_range3
3412! delay slot, harmless if branch taken
3413	add	%i3,%i4,%i3		! y += stridey
3414	ld	[%i1],%l3
3415	ld	[%i1],%f24
3416	ld	[%i1+4],%f27
3417	andn	%l3,%i5,%l3		! hx &= ~0x80000000
3418	ba,pt	%icc,.loop3
3419! delay slot
3420	add	%i1,%i2,%i1		! x += stridex
3421
3422	SET_SIZE(__vcos_ultra3)
3423
3424