xref: /illumos-gate/usr/src/lib/libmvec/common/vis/__vsin_ultra3.S (revision 66582b606a8194f7f3ba5b3a3a6dca5b0d346361)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vsin_ultra3.S"
30
31#include "libm.h"
32	.weak	__vsin
33	.type	__vsin,#function
34	__vsin = __vsin_ultra3
35
36	RO_DATA
37	.align	64
38constants:
39	.word	0x42c80000,0x00000000	! 3 * 2^44
40	.word	0x43380000,0x00000000	! 3 * 2^51
41	.word	0x3fe45f30,0x6dc9c883	! invpio2
42	.word	0x3ff921fb,0x54442c00	! pio2_1
43	.word	0x3d318469,0x898cc400	! pio2_2
44	.word	0x3a71701b,0x839a2520	! pio2_3
45	.word	0xbfc55555,0x55555533	! pp1
46	.word	0x3f811111,0x10e7d53b	! pp2
47	.word	0xbf2a0167,0xe6b3cf9b	! pp3
48	.word	0xbfdfffff,0xffffff65	! qq1
49	.word	0x3fa55555,0x54f88ed0	! qq2
50	.word	0xbf56c12c,0xdd185f60	! qq3
51
52! local storage indices
53
54#define xsave		STACK_BIAS-0x8
55#define ysave		STACK_BIAS-0x10
56#define nsave		STACK_BIAS-0x14
57#define sxsave		STACK_BIAS-0x18
58#define sysave		STACK_BIAS-0x1c
59#define biguns		STACK_BIAS-0x20
60#define nk3		STACK_BIAS-0x24
61#define nk2		STACK_BIAS-0x28
62#define nk1		STACK_BIAS-0x2c
63#define nk0		STACK_BIAS-0x30
64#define junk		STACK_BIAS-0x38
65! sizeof temp storage - must be a multiple of 16 for V9
66#define tmps		0x40
67
68! register use
69
70! i0  n
71! i1  x
72! i2  stridex
73! i3  y
74! i4  stridey
75! i5  0x80000000
76
77! l0  hx0
78! l1  hx1
79! l2  hx2
80! l3  hx3
81! l4  k0
82! l5  k1
83! l6  k2
84! l7  k3
85
86! the following are 64-bit registers in both V8+ and V9
87
88! g1  __vlibm_TBL_sincos2
89! g5  scratch
90
91! o0  py0
92! o1  py1
93! o2  py2
94! o3  py3
95! o4  0x3e400000
96! o5  0x3fe921fb,0x4099251e
97! o7  scratch
98
99! f0  hx0
100! f2
101! f4
102! f6
103! f8  hx1
104! f10
105! f12
106! f14
107! f16 hx2
108! f18
109! f20
110! f22
111! f24 hx3
112! f26
113! f28
114! f30
115! f32
116! f34
117! f36
118! f38
119
120#define c3two44	%f40
121#define c3two51	%f42
122#define invpio2	%f44
123#define pio2_1	%f46
124#define pio2_2	%f48
125#define pio2_3	%f50
126#define pp1	%f52
127#define pp2	%f54
128#define pp3	%f56
129#define qq1	%f58
130#define qq2	%f60
131#define qq3	%f62
132
133	ENTRY(__vsin_ultra3)
134	save	%sp,-SA(MINFRAME)-tmps,%sp
135	PIC_SETUP(l7)
136	PIC_SET(l7,constants,o0)
137	PIC_SET(l7,__vlibm_TBL_sincos2,o1)
138	mov	%o1,%g1
139	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
140#ifdef __sparcv9
141	stx	%i1,[%fp+xsave]		! save arguments
142	stx	%i3,[%fp+ysave]
143#else
144	st	%i1,[%fp+xsave]		! save arguments
145	st	%i3,[%fp+ysave]
146#endif
147	st	%i0,[%fp+nsave]
148	st	%i2,[%fp+sxsave]
149	st	%i4,[%fp+sysave]
150	st	%g0,[%fp+biguns]	! biguns = 0
151	ldd	[%o0+0x00],c3two44	! load/set up constants
152	ldd	[%o0+0x08],c3two51
153	ldd	[%o0+0x10],invpio2
154	ldd	[%o0+0x18],pio2_1
155	ldd	[%o0+0x20],pio2_2
156	ldd	[%o0+0x28],pio2_3
157	ldd	[%o0+0x30],pp1
158	ldd	[%o0+0x38],pp2
159	ldd	[%o0+0x40],pp3
160	ldd	[%o0+0x48],qq1
161	ldd	[%o0+0x50],qq2
162	ldd	[%o0+0x58],qq3
163	sethi	%hi(0x80000000),%i5
164	sethi	%hi(0x3e400000),%o4
165	sethi	%hi(0x3fe921fb),%o5
166	or	%o5,%lo(0x3fe921fb),%o5
167	sllx	%o5,32,%o5
168	sethi	%hi(0x4099251e),%o7
169	or	%o7,%lo(0x4099251e),%o7
170	or	%o5,%o7,%o5
171	sll	%i2,3,%i2		! scale strides
172	sll	%i4,3,%i4
173	add	%fp,junk,%o1		! loop prologue
174	add	%fp,junk,%o2
175	add	%fp,junk,%o3
176	ld	[%i1],%l0		! *x
177	ld	[%i1],%f0
178	ld	[%i1+4],%f3
179	andn	%l0,%i5,%l0		! mask off sign
180	ba	.loop0
181	add	%i1,%i2,%i1		! x += stridex
182
183! 16-byte aligned
184	.align	16
185.loop0:
186	lda	[%i1]%asi,%l1		! preload next argument
187	sub	%l0,%o4,%g5
188	sub	%o5,%l0,%o7
189	fabss	%f0,%f2
190
191	lda	[%i1]%asi,%f8
192	orcc	%o7,%g5,%g0
193	mov	%i3,%o0			! py0 = y
194	bl,pn	%icc,.range0		! hx < 0x3e400000 or hx > 0x4099251e
195
196! delay slot
197	lda	[%i1+4]%asi,%f11
198	addcc	%i0,-1,%i0
199	add	%i3,%i4,%i3		! y += stridey
200	ble,pn	%icc,.last1
201
202! delay slot
203	andn	%l1,%i5,%l1
204	add	%i1,%i2,%i1		! x += stridex
205	faddd	%f2,c3two44,%f4
206	st	%f15,[%o1+4]
207
208.loop1:
209	lda	[%i1]%asi,%l2		! preload next argument
210	sub	%l1,%o4,%g5
211	sub	%o5,%l1,%o7
212	fabss	%f8,%f10
213
214	lda	[%i1]%asi,%f16
215	orcc	%o7,%g5,%g0
216	mov	%i3,%o1			! py1 = y
217	bl,pn	%icc,.range1		! hx < 0x3e400000 or hx > 0x4099251e
218
219! delay slot
220	lda	[%i1+4]%asi,%f19
221	addcc	%i0,-1,%i0
222	add	%i3,%i4,%i3		! y += stridey
223	ble,pn	%icc,.last2
224
225! delay slot
226	andn	%l2,%i5,%l2
227	add	%i1,%i2,%i1		! x += stridex
228	faddd	%f10,c3two44,%f12
229	st	%f23,[%o2+4]
230
231.loop2:
232	lda	[%i1]%asi,%l3		! preload next argument
233	sub	%l2,%o4,%g5
234	sub	%o5,%l2,%o7
235	fabss	%f16,%f18
236
237	lda	[%i1]%asi,%f24
238	orcc	%o7,%g5,%g0
239	mov	%i3,%o2			! py2 = y
240	bl,pn	%icc,.range2		! hx < 0x3e400000 or hx > 0x4099251e
241
242! delay slot
243	lda	[%i1+4]%asi,%f27
244	addcc	%i0,-1,%i0
245	add	%i3,%i4,%i3		! y += stridey
246	ble,pn	%icc,.last3
247
248! delay slot
249	andn	%l3,%i5,%l3
250	add	%i1,%i2,%i1		! x += stridex
251	faddd	%f18,c3two44,%f20
252	st	%f31,[%o3+4]
253
254.loop3:
255	sub	%l3,%o4,%g5
256	sub	%o5,%l3,%o7
257	fabss	%f24,%f26
258	st	%f5,[%fp+nk0]
259
260	orcc	%o7,%g5,%g0
261	mov	%i3,%o3			! py3 = y
262	bl,pn	%icc,.range3		! hx < 0x3e400000 or > hx 0x4099251e
263! delay slot
264	st	%f13,[%fp+nk1]
265
266!!! DONE?
267.cont:
268	srlx	%o5,32,%o7
269	add	%i3,%i4,%i3		! y += stridey
270	fmovs	%f3,%f1
271	st	%f21,[%fp+nk2]
272
273	sub	%o7,%l0,%l0
274	sub	%o7,%l1,%l1
275	faddd	%f26,c3two44,%f28
276	st	%f29,[%fp+nk3]
277
278	sub	%o7,%l2,%l2
279	sub	%o7,%l3,%l3
280	fmovs	%f11,%f9
281
282	or	%l0,%l1,%l0
283	or	%l2,%l3,%l2
284	fmovs	%f19,%f17
285
286	fmovs	%f27,%f25
287	fmuld	%f0,invpio2,%f6		! x * invpio2, for medium range
288
289	fmuld	%f8,invpio2,%f14
290	ld	[%fp+nk0],%l4
291
292	fmuld	%f16,invpio2,%f22
293	ld	[%fp+nk1],%l5
294
295	orcc	%l0,%l2,%g0
296	bl,pn	%icc,.medium
297! delay slot
298	fmuld	%f24,invpio2,%f30
299	ld	[%fp+nk2],%l6
300
301	ld	[%fp+nk3],%l7
302	sll	%l4,5,%l4		! k
303	fcmpd	%fcc0,%f0,pio2_3	! x < pio2_3 iff x < 0
304
305	sll	%l5,5,%l5
306	ldd	[%l4+%g1],%f4
307	fcmpd	%fcc1,%f8,pio2_3
308
309	sll	%l6,5,%l6
310	ldd	[%l5+%g1],%f12
311	fcmpd	%fcc2,%f16,pio2_3
312
313	sll	%l7,5,%l7
314	ldd	[%l6+%g1],%f20
315	fcmpd	%fcc3,%f24,pio2_3
316
317	ldd	[%l7+%g1],%f28
318	fsubd	%f2,%f4,%f2		! x -= __vlibm_TBL_sincos2[k]
319
320	fsubd	%f10,%f12,%f10
321
322	fsubd	%f18,%f20,%f18
323
324	fsubd	%f26,%f28,%f26
325
326	fmuld	%f2,%f2,%f0		! z = x * x
327
328	fmuld	%f10,%f10,%f8
329
330	fmuld	%f18,%f18,%f16
331
332	fmuld	%f26,%f26,%f24
333
334	fmuld	%f0,pp3,%f6
335
336	fmuld	%f8,pp3,%f14
337
338	fmuld	%f16,pp3,%f22
339
340	fmuld	%f24,pp3,%f30
341
342	faddd	%f6,pp2,%f6
343	fmuld	%f0,qq2,%f4
344
345	faddd	%f14,pp2,%f14
346	fmuld	%f8,qq2,%f12
347
348	faddd	%f22,pp2,%f22
349	fmuld	%f16,qq2,%f20
350
351	faddd	%f30,pp2,%f30
352	fmuld	%f24,qq2,%f28
353
354	fmuld	%f0,%f6,%f6
355	faddd	%f4,qq1,%f4
356
357	fmuld	%f8,%f14,%f14
358	faddd	%f12,qq1,%f12
359
360	fmuld	%f16,%f22,%f22
361	faddd	%f20,qq1,%f20
362
363	fmuld	%f24,%f30,%f30
364	faddd	%f28,qq1,%f28
365
366	faddd	%f6,pp1,%f6
367	fmuld	%f0,%f4,%f4
368	add	%l4,%g1,%l4
369
370	faddd	%f14,pp1,%f14
371	fmuld	%f8,%f12,%f12
372	add	%l5,%g1,%l5
373
374	faddd	%f22,pp1,%f22
375	fmuld	%f16,%f20,%f20
376	add	%l6,%g1,%l6
377
378	faddd	%f30,pp1,%f30
379	fmuld	%f24,%f28,%f28
380	add	%l7,%g1,%l7
381
382	fmuld	%f0,%f6,%f6
383	ldd	[%l4+8],%f0
384
385	fmuld	%f8,%f14,%f14
386	ldd	[%l5+8],%f8
387
388	fmuld	%f16,%f22,%f22
389	ldd	[%l6+8],%f16
390
391	fmuld	%f24,%f30,%f30
392	ldd	[%l7+8],%f24
393
394	fmuld	%f2,%f6,%f6
395
396	fmuld	%f10,%f14,%f14
397
398	fmuld	%f18,%f22,%f22
399
400	fmuld	%f26,%f30,%f30
401
402	faddd	%f6,%f2,%f6
403	fmuld	%f0,%f4,%f4
404	ldd	[%l4+16],%f2
405
406	faddd	%f14,%f10,%f14
407	fmuld	%f8,%f12,%f12
408	ldd	[%l5+16],%f10
409
410	faddd	%f22,%f18,%f22
411	fmuld	%f16,%f20,%f20
412	ldd	[%l6+16],%f18
413
414	faddd	%f30,%f26,%f30
415	fmuld	%f24,%f28,%f28
416	ldd	[%l7+16],%f26
417
418	fmuld	%f2,%f6,%f6
419
420	fmuld	%f10,%f14,%f14
421
422	fmuld	%f18,%f22,%f22
423
424	fmuld	%f26,%f30,%f30
425
426	faddd	%f6,%f4,%f6
427
428	faddd	%f14,%f12,%f14
429
430	faddd	%f22,%f20,%f22
431
432	faddd	%f30,%f28,%f30
433
434	faddd	%f6,%f0,%f6
435
436	faddd	%f14,%f8,%f14
437
438	faddd	%f22,%f16,%f22
439
440	faddd	%f30,%f24,%f30
441
442	fnegd	%f6,%f4
443	lda	[%i1]%asi,%l0		! preload next argument
444
445	fnegd	%f14,%f12
446	lda	[%i1]%asi,%f0
447
448	fnegd	%f22,%f20
449	lda	[%i1+4]%asi,%f3
450
451	fnegd	%f30,%f28
452	andn	%l0,%i5,%l0
453	add	%i1,%i2,%i1
454
455	fmovdl	%fcc0,%f4,%f6		! (hx < -0)? -s : s
456	st	%f6,[%o0]
457
458	fmovdl	%fcc1,%f12,%f14
459	st	%f14,[%o1]
460
461	fmovdl	%fcc2,%f20,%f22
462	st	%f22,[%o2]
463
464	fmovdl	%fcc3,%f28,%f30
465	st	%f30,[%o3]
466	addcc	%i0,-1,%i0
467
468	bg,pt	%icc,.loop0
469! delay slot
470	st	%f7,[%o0+4]
471
472	ba,pt	%icc,.end
473! delay slot
474	nop
475
476
477	.align	16
478.medium:
479	faddd	%f6,c3two51,%f4
480	st	%f5,[%fp+nk0]
481
482	faddd	%f14,c3two51,%f12
483	st	%f13,[%fp+nk1]
484
485	faddd	%f22,c3two51,%f20
486	st	%f21,[%fp+nk2]
487
488	faddd	%f30,c3two51,%f28
489	st	%f29,[%fp+nk3]
490
491	fsubd	%f4,c3two51,%f6
492
493	fsubd	%f12,c3two51,%f14
494
495	fsubd	%f20,c3two51,%f22
496
497	fsubd	%f28,c3two51,%f30
498
499	fmuld	%f6,pio2_1,%f2
500	ld	[%fp+nk0],%l0		! n
501
502	fmuld	%f14,pio2_1,%f10
503	ld	[%fp+nk1],%l1
504
505	fmuld	%f22,pio2_1,%f18
506	ld	[%fp+nk2],%l2
507
508	fmuld	%f30,pio2_1,%f26
509	ld	[%fp+nk3],%l3
510
511	fsubd	%f0,%f2,%f0
512	fmuld	%f6,pio2_2,%f4
513
514	fsubd	%f8,%f10,%f8
515	fmuld	%f14,pio2_2,%f12
516
517	fsubd	%f16,%f18,%f16
518	fmuld	%f22,pio2_2,%f20
519
520	fsubd	%f24,%f26,%f24
521	fmuld	%f30,pio2_2,%f28
522
523	fsubd	%f0,%f4,%f32
524
525	fsubd	%f8,%f12,%f34
526
527	fsubd	%f16,%f20,%f36
528
529	fsubd	%f24,%f28,%f38
530
531	fsubd	%f0,%f32,%f0
532	fcmple32 %f32,pio2_3,%l4	! x <= pio2_3 iff x < 0
533
534	fsubd	%f8,%f34,%f8
535	fcmple32 %f34,pio2_3,%l5
536
537	fsubd	%f16,%f36,%f16
538	fcmple32 %f36,pio2_3,%l6
539
540	fsubd	%f24,%f38,%f24
541	fcmple32 %f38,pio2_3,%l7
542
543	fsubd	%f0,%f4,%f0
544	fmuld	%f6,pio2_3,%f6
545	sll	%l4,30,%l4		! if (x < 0) n = -n ^ 2
546
547	fsubd	%f8,%f12,%f8
548	fmuld	%f14,pio2_3,%f14
549	sll	%l5,30,%l5
550
551	fsubd	%f16,%f20,%f16
552	fmuld	%f22,pio2_3,%f22
553	sll	%l6,30,%l6
554
555	fsubd	%f24,%f28,%f24
556	fmuld	%f30,pio2_3,%f30
557	sll	%l7,30,%l7
558
559	fsubd	%f6,%f0,%f6
560	sra	%l4,31,%l4
561
562	fsubd	%f14,%f8,%f14
563	sra	%l5,31,%l5
564
565	fsubd	%f22,%f16,%f22
566	sra	%l6,31,%l6
567
568	fsubd	%f30,%f24,%f30
569	sra	%l7,31,%l7
570
571	fsubd	%f32,%f6,%f0		! reduced x
572	xor	%l0,%l4,%l0
573
574	fsubd	%f34,%f14,%f8
575	xor	%l1,%l5,%l1
576
577	fsubd	%f36,%f22,%f16
578	xor	%l2,%l6,%l2
579
580	fsubd	%f38,%f30,%f24
581	xor	%l3,%l7,%l3
582
583	fabsd	%f0,%f2
584	sub	%l0,%l4,%l0
585
586	fabsd	%f8,%f10
587	sub	%l1,%l5,%l1
588
589	fabsd	%f16,%f18
590	sub	%l2,%l6,%l2
591
592	fabsd	%f24,%f26
593	sub	%l3,%l7,%l3
594
595	faddd	%f2,c3two44,%f4
596	st	%f5,[%fp+nk0]
597	and	%l4,2,%l4
598
599	faddd	%f10,c3two44,%f12
600	st	%f13,[%fp+nk1]
601	and	%l5,2,%l5
602
603	faddd	%f18,c3two44,%f20
604	st	%f21,[%fp+nk2]
605	and	%l6,2,%l6
606
607	faddd	%f26,c3two44,%f28
608	st	%f29,[%fp+nk3]
609	and	%l7,2,%l7
610
611	fsubd	%f32,%f0,%f4
612	xor	%l0,%l4,%l0
613
614	fsubd	%f34,%f8,%f12
615	xor	%l1,%l5,%l1
616
617	fsubd	%f36,%f16,%f20
618	xor	%l2,%l6,%l2
619
620	fsubd	%f38,%f24,%f28
621	xor	%l3,%l7,%l3
622
623	fzero	%f38
624	ld	[%fp+nk0],%l4
625
626	fsubd	%f4,%f6,%f6		! w
627	ld	[%fp+nk1],%l5
628
629	fsubd	%f12,%f14,%f14
630	ld	[%fp+nk2],%l6
631
632	fnegd	%f38,%f38
633	ld	[%fp+nk3],%l7
634	sll	%l4,5,%l4		! k
635
636	fsubd	%f20,%f22,%f22
637	sll	%l5,5,%l5
638
639	fsubd	%f28,%f30,%f30
640	sll	%l6,5,%l6
641
642	fand	%f0,%f38,%f32		! sign bit of x
643	ldd	[%l4+%g1],%f4
644	sll	%l7,5,%l7
645
646	fand	%f8,%f38,%f34
647	ldd	[%l5+%g1],%f12
648
649	fand	%f16,%f38,%f36
650	ldd	[%l6+%g1],%f20
651
652	fand	%f24,%f38,%f38
653	ldd	[%l7+%g1],%f28
654
655	fsubd	%f2,%f4,%f2		! x -= __vlibm_TBL_sincos2[k]
656
657	fsubd	%f10,%f12,%f10
658
659	fsubd	%f18,%f20,%f18
660	nop
661
662	fsubd	%f26,%f28,%f26
663	nop
664
665! 16-byte aligned
666	fmuld	%f2,%f2,%f0		! z = x * x
667	andcc	%l0,1,%g0
668	bz,pn	%icc,.case8
669! delay slot
670	fxor	%f6,%f32,%f32
671
672	fmuld	%f10,%f10,%f8
673	andcc	%l1,1,%g0
674	bz,pn	%icc,.case4
675! delay slot
676	fxor	%f14,%f34,%f34
677
678	fmuld	%f18,%f18,%f16
679	andcc	%l2,1,%g0
680	bz,pn	%icc,.case2
681! delay slot
682	fxor	%f22,%f36,%f36
683
684	fmuld	%f26,%f26,%f24
685	andcc	%l3,1,%g0
686	bz,pn	%icc,.case1
687! delay slot
688	fxor	%f30,%f38,%f38
689
690!.case0:
691	fmuld	%f0,qq3,%f6		! cos(x0)
692
693	fmuld	%f8,qq3,%f14		! cos(x1)
694
695	fmuld	%f16,qq3,%f22		! cos(x2)
696
697	fmuld	%f24,qq3,%f30		! cos(x3)
698
699	faddd	%f6,qq2,%f6
700	fmuld	%f0,pp2,%f4
701
702	faddd	%f14,qq2,%f14
703	fmuld	%f8,pp2,%f12
704
705	faddd	%f22,qq2,%f22
706	fmuld	%f16,pp2,%f20
707
708	faddd	%f30,qq2,%f30
709	fmuld	%f24,pp2,%f28
710
711	fmuld	%f0,%f6,%f6
712	faddd	%f4,pp1,%f4
713
714	fmuld	%f8,%f14,%f14
715	faddd	%f12,pp1,%f12
716
717	fmuld	%f16,%f22,%f22
718	faddd	%f20,pp1,%f20
719
720	fmuld	%f24,%f30,%f30
721	faddd	%f28,pp1,%f28
722
723	faddd	%f6,qq1,%f6
724	fmuld	%f0,%f4,%f4
725	add	%l4,%g1,%l4
726
727	faddd	%f14,qq1,%f14
728	fmuld	%f8,%f12,%f12
729	add	%l5,%g1,%l5
730
731	faddd	%f22,qq1,%f22
732	fmuld	%f16,%f20,%f20
733	add	%l6,%g1,%l6
734
735	faddd	%f30,qq1,%f30
736	fmuld	%f24,%f28,%f28
737	add	%l7,%g1,%l7
738
739	fmuld	%f2,%f4,%f4
740
741	fmuld	%f10,%f12,%f12
742
743	fmuld	%f18,%f20,%f20
744
745	fmuld	%f26,%f28,%f28
746
747	fmuld	%f0,%f6,%f6
748	faddd	%f4,%f32,%f4
749	ldd	[%l4+16],%f0
750
751	fmuld	%f8,%f14,%f14
752	faddd	%f12,%f34,%f12
753	ldd	[%l5+16],%f8
754
755	fmuld	%f16,%f22,%f22
756	faddd	%f20,%f36,%f20
757	ldd	[%l6+16],%f16
758
759	fmuld	%f24,%f30,%f30
760	faddd	%f28,%f38,%f28
761	ldd	[%l7+16],%f24
762
763	fmuld	%f0,%f6,%f6
764	faddd	%f4,%f2,%f4
765	ldd	[%l4+8],%f32
766
767	fmuld	%f8,%f14,%f14
768	faddd	%f12,%f10,%f12
769	ldd	[%l5+8],%f34
770
771	fmuld	%f16,%f22,%f22
772	faddd	%f20,%f18,%f20
773	ldd	[%l6+8],%f36
774
775	fmuld	%f24,%f30,%f30
776	faddd	%f28,%f26,%f28
777	ldd	[%l7+8],%f38
778
779	fmuld	%f32,%f4,%f4
780
781	fmuld	%f34,%f12,%f12
782
783	fmuld	%f36,%f20,%f20
784
785	fmuld	%f38,%f28,%f28
786
787	fsubd	%f6,%f4,%f6
788
789	fsubd	%f14,%f12,%f14
790
791	fsubd	%f22,%f20,%f22
792
793	fsubd	%f30,%f28,%f30
794
795	faddd	%f6,%f0,%f6
796
797	faddd	%f14,%f8,%f14
798
799	faddd	%f22,%f16,%f22
800
801	faddd	%f30,%f24,%f30
802	mov	%l0,%l4
803
804	fnegd	%f6,%f4
805	lda	[%i1]%asi,%l0		! preload next argument
806
807	fnegd	%f14,%f12
808	lda	[%i1]%asi,%f0
809
810	fnegd	%f22,%f20
811	lda	[%i1+4]%asi,%f3
812
813	fnegd	%f30,%f28
814	andn	%l0,%i5,%l0
815	add	%i1,%i2,%i1
816
817	andcc	%l4,2,%g0
818	fmovdnz	%icc,%f4,%f6
819	st	%f6,[%o0]
820
821	andcc	%l1,2,%g0
822	fmovdnz	%icc,%f12,%f14
823	st	%f14,[%o1]
824
825	andcc	%l2,2,%g0
826	fmovdnz	%icc,%f20,%f22
827	st	%f22,[%o2]
828
829	andcc	%l3,2,%g0
830	fmovdnz	%icc,%f28,%f30
831	st	%f30,[%o3]
832
833	addcc	%i0,-1,%i0
834	bg,pt	%icc,.loop0
835! delay slot
836	st	%f7,[%o0+4]
837
838	ba,pt	%icc,.end
839! delay slot
840	nop
841
842	.align	16
843.case1:
844	fmuld	%f24,pp3,%f30		! sin(x3)
845
846	fmuld	%f0,qq3,%f6		! cos(x0)
847
848	fmuld	%f8,qq3,%f14		! cos(x1)
849
850	fmuld	%f16,qq3,%f22		! cos(x2)
851
852	faddd	%f30,pp2,%f30
853	fmuld	%f24,qq2,%f28
854
855	faddd	%f6,qq2,%f6
856	fmuld	%f0,pp2,%f4
857
858	faddd	%f14,qq2,%f14
859	fmuld	%f8,pp2,%f12
860
861	faddd	%f22,qq2,%f22
862	fmuld	%f16,pp2,%f20
863
864	fmuld	%f24,%f30,%f30
865	faddd	%f28,qq1,%f28
866
867	fmuld	%f0,%f6,%f6
868	faddd	%f4,pp1,%f4
869
870	fmuld	%f8,%f14,%f14
871	faddd	%f12,pp1,%f12
872
873	fmuld	%f16,%f22,%f22
874	faddd	%f20,pp1,%f20
875
876	faddd	%f30,pp1,%f30
877	fmuld	%f24,%f28,%f28
878	add	%l7,%g1,%l7
879
880	faddd	%f6,qq1,%f6
881	fmuld	%f0,%f4,%f4
882	add	%l4,%g1,%l4
883
884	faddd	%f14,qq1,%f14
885	fmuld	%f8,%f12,%f12
886	add	%l5,%g1,%l5
887
888	faddd	%f22,qq1,%f22
889	fmuld	%f16,%f20,%f20
890	add	%l6,%g1,%l6
891
892	fmuld	%f24,%f30,%f30
893
894	fmuld	%f2,%f4,%f4
895
896	fmuld	%f10,%f12,%f12
897
898	fmuld	%f18,%f20,%f20
899
900	fmuld	%f26,%f30,%f30
901	ldd	[%l7+8],%f24
902
903	fmuld	%f0,%f6,%f6
904	faddd	%f4,%f32,%f4
905	ldd	[%l4+16],%f0
906
907	fmuld	%f8,%f14,%f14
908	faddd	%f12,%f34,%f12
909	ldd	[%l5+16],%f8
910
911	fmuld	%f16,%f22,%f22
912	faddd	%f20,%f36,%f20
913	ldd	[%l6+16],%f16
914
915	fmuld	%f24,%f28,%f28
916	faddd	%f38,%f30,%f30
917
918	fmuld	%f0,%f6,%f6
919	faddd	%f4,%f2,%f4
920	ldd	[%l4+8],%f32
921
922	fmuld	%f8,%f14,%f14
923	faddd	%f12,%f10,%f12
924	ldd	[%l5+8],%f34
925
926	fmuld	%f16,%f22,%f22
927	faddd	%f20,%f18,%f20
928	ldd	[%l6+8],%f36
929
930	faddd	%f26,%f30,%f30
931	ldd	[%l7+16],%f38
932
933	fmuld	%f32,%f4,%f4
934
935	fmuld	%f34,%f12,%f12
936
937	fmuld	%f36,%f20,%f20
938
939	fmuld	%f38,%f30,%f30
940
941	fsubd	%f6,%f4,%f6
942
943	fsubd	%f14,%f12,%f14
944
945	fsubd	%f22,%f20,%f22
946
947	faddd	%f30,%f28,%f30
948
949	faddd	%f6,%f0,%f6
950
951	faddd	%f14,%f8,%f14
952
953	faddd	%f22,%f16,%f22
954
955	faddd	%f30,%f24,%f30
956	mov	%l0,%l4
957
958	fnegd	%f6,%f4
959	lda	[%i1]%asi,%l0		! preload next argument
960
961	fnegd	%f14,%f12
962	lda	[%i1]%asi,%f0
963
964	fnegd	%f22,%f20
965	lda	[%i1+4]%asi,%f3
966
967	fnegd	%f30,%f28
968	andn	%l0,%i5,%l0
969	add	%i1,%i2,%i1
970
971	andcc	%l4,2,%g0
972	fmovdnz	%icc,%f4,%f6
973	st	%f6,[%o0]
974
975	andcc	%l1,2,%g0
976	fmovdnz	%icc,%f12,%f14
977	st	%f14,[%o1]
978
979	andcc	%l2,2,%g0
980	fmovdnz	%icc,%f20,%f22
981	st	%f22,[%o2]
982
983	andcc	%l3,2,%g0
984	fmovdnz	%icc,%f28,%f30
985	st	%f30,[%o3]
986
987	addcc	%i0,-1,%i0
988	bg,pt	%icc,.loop0
989! delay slot
990	st	%f7,[%o0+4]
991
992	ba,pt	%icc,.end
993! delay slot
994	nop
995
996	.align	16
997.case2:
998	fmuld	%f26,%f26,%f24
999	andcc	%l3,1,%g0
1000	bz,pn	%icc,.case3
1001! delay slot
1002	fxor	%f30,%f38,%f38
1003
1004	fmuld	%f16,pp3,%f22		! sin(x2)
1005
1006	fmuld	%f0,qq3,%f6		! cos(x0)
1007
1008	fmuld	%f8,qq3,%f14		! cos(x1)
1009
1010	faddd	%f22,pp2,%f22
1011	fmuld	%f16,qq2,%f20
1012
1013	fmuld	%f24,qq3,%f30		! cos(x3)
1014
1015	faddd	%f6,qq2,%f6
1016	fmuld	%f0,pp2,%f4
1017
1018	faddd	%f14,qq2,%f14
1019	fmuld	%f8,pp2,%f12
1020
1021	fmuld	%f16,%f22,%f22
1022	faddd	%f20,qq1,%f20
1023
1024	faddd	%f30,qq2,%f30
1025	fmuld	%f24,pp2,%f28
1026
1027	fmuld	%f0,%f6,%f6
1028	faddd	%f4,pp1,%f4
1029
1030	fmuld	%f8,%f14,%f14
1031	faddd	%f12,pp1,%f12
1032
1033	faddd	%f22,pp1,%f22
1034	fmuld	%f16,%f20,%f20
1035	add	%l6,%g1,%l6
1036
1037	fmuld	%f24,%f30,%f30
1038	faddd	%f28,pp1,%f28
1039
1040	faddd	%f6,qq1,%f6
1041	fmuld	%f0,%f4,%f4
1042	add	%l4,%g1,%l4
1043
1044	faddd	%f14,qq1,%f14
1045	fmuld	%f8,%f12,%f12
1046	add	%l5,%g1,%l5
1047
1048	fmuld	%f16,%f22,%f22
1049
1050	faddd	%f30,qq1,%f30
1051	fmuld	%f24,%f28,%f28
1052	add	%l7,%g1,%l7
1053
1054	fmuld	%f2,%f4,%f4
1055
1056	fmuld	%f10,%f12,%f12
1057
1058	fmuld	%f18,%f22,%f22
1059	ldd	[%l6+8],%f16
1060
1061	fmuld	%f26,%f28,%f28
1062
1063	fmuld	%f0,%f6,%f6
1064	faddd	%f4,%f32,%f4
1065	ldd	[%l4+16],%f0
1066
1067	fmuld	%f8,%f14,%f14
1068	faddd	%f12,%f34,%f12
1069	ldd	[%l5+16],%f8
1070
1071	fmuld	%f16,%f20,%f20
1072	faddd	%f36,%f22,%f22
1073
1074	fmuld	%f24,%f30,%f30
1075	faddd	%f28,%f38,%f28
1076	ldd	[%l7+16],%f24
1077
1078	fmuld	%f0,%f6,%f6
1079	faddd	%f4,%f2,%f4
1080	ldd	[%l4+8],%f32
1081
1082	fmuld	%f8,%f14,%f14
1083	faddd	%f12,%f10,%f12
1084	ldd	[%l5+8],%f34
1085
1086	faddd	%f18,%f22,%f22
1087	ldd	[%l6+16],%f36
1088
1089	fmuld	%f24,%f30,%f30
1090	faddd	%f28,%f26,%f28
1091	ldd	[%l7+8],%f38
1092
1093	fmuld	%f32,%f4,%f4
1094
1095	fmuld	%f34,%f12,%f12
1096
1097	fmuld	%f36,%f22,%f22
1098
1099	fmuld	%f38,%f28,%f28
1100
1101	fsubd	%f6,%f4,%f6
1102
1103	fsubd	%f14,%f12,%f14
1104
1105	faddd	%f22,%f20,%f22
1106
1107	fsubd	%f30,%f28,%f30
1108
1109	faddd	%f6,%f0,%f6
1110
1111	faddd	%f14,%f8,%f14
1112
1113	faddd	%f22,%f16,%f22
1114
1115	faddd	%f30,%f24,%f30
1116	mov	%l0,%l4
1117
1118	fnegd	%f6,%f4
1119	lda	[%i1]%asi,%l0		! preload next argument
1120
1121	fnegd	%f14,%f12
1122	lda	[%i1]%asi,%f0
1123
1124	fnegd	%f22,%f20
1125	lda	[%i1+4]%asi,%f3
1126
1127	fnegd	%f30,%f28
1128	andn	%l0,%i5,%l0
1129	add	%i1,%i2,%i1
1130
1131	andcc	%l4,2,%g0
1132	fmovdnz	%icc,%f4,%f6
1133	st	%f6,[%o0]
1134
1135	andcc	%l1,2,%g0
1136	fmovdnz	%icc,%f12,%f14
1137	st	%f14,[%o1]
1138
1139	andcc	%l2,2,%g0
1140	fmovdnz	%icc,%f20,%f22
1141	st	%f22,[%o2]
1142
1143	andcc	%l3,2,%g0
1144	fmovdnz	%icc,%f28,%f30
1145	st	%f30,[%o3]
1146
1147	addcc	%i0,-1,%i0
1148	bg,pt	%icc,.loop0
1149! delay slot
1150	st	%f7,[%o0+4]
1151
1152	ba,pt	%icc,.end
1153! delay slot
1154	nop
1155
1156	.align	16
1157.case3:
1158	fmuld	%f16,pp3,%f22		! sin(x2)
1159
1160	fmuld	%f24,pp3,%f30		! sin(x3)
1161
1162	fmuld	%f0,qq3,%f6		! cos(x0)
1163
1164	fmuld	%f8,qq3,%f14		! cos(x1)
1165
1166	faddd	%f22,pp2,%f22
1167	fmuld	%f16,qq2,%f20
1168
1169	faddd	%f30,pp2,%f30
1170	fmuld	%f24,qq2,%f28
1171
1172	faddd	%f6,qq2,%f6
1173	fmuld	%f0,pp2,%f4
1174
1175	faddd	%f14,qq2,%f14
1176	fmuld	%f8,pp2,%f12
1177
1178	fmuld	%f16,%f22,%f22
1179	faddd	%f20,qq1,%f20
1180
1181	fmuld	%f24,%f30,%f30
1182	faddd	%f28,qq1,%f28
1183
1184	fmuld	%f0,%f6,%f6
1185	faddd	%f4,pp1,%f4
1186
1187	fmuld	%f8,%f14,%f14
1188	faddd	%f12,pp1,%f12
1189
1190	faddd	%f22,pp1,%f22
1191	fmuld	%f16,%f20,%f20
1192	add	%l6,%g1,%l6
1193
1194	faddd	%f30,pp1,%f30
1195	fmuld	%f24,%f28,%f28
1196	add	%l7,%g1,%l7
1197
1198	faddd	%f6,qq1,%f6
1199	fmuld	%f0,%f4,%f4
1200	add	%l4,%g1,%l4
1201
1202	faddd	%f14,qq1,%f14
1203	fmuld	%f8,%f12,%f12
1204	add	%l5,%g1,%l5
1205
1206	fmuld	%f16,%f22,%f22
1207
1208	fmuld	%f24,%f30,%f30
1209
1210	fmuld	%f2,%f4,%f4
1211
1212	fmuld	%f10,%f12,%f12
1213
1214	fmuld	%f18,%f22,%f22
1215	ldd	[%l6+8],%f16
1216
1217	fmuld	%f26,%f30,%f30
1218	ldd	[%l7+8],%f24
1219
1220	fmuld	%f0,%f6,%f6
1221	faddd	%f4,%f32,%f4
1222	ldd	[%l4+16],%f0
1223
1224	fmuld	%f8,%f14,%f14
1225	faddd	%f12,%f34,%f12
1226	ldd	[%l5+16],%f8
1227
1228	fmuld	%f16,%f20,%f20
1229	faddd	%f36,%f22,%f22
1230
1231	fmuld	%f24,%f28,%f28
1232	faddd	%f38,%f30,%f30
1233
1234	fmuld	%f0,%f6,%f6
1235	faddd	%f4,%f2,%f4
1236	ldd	[%l4+8],%f32
1237
1238	fmuld	%f8,%f14,%f14
1239	faddd	%f12,%f10,%f12
1240	ldd	[%l5+8],%f34
1241
1242	faddd	%f18,%f22,%f22
1243	ldd	[%l6+16],%f36
1244
1245	faddd	%f26,%f30,%f30
1246	ldd	[%l7+16],%f38
1247
1248	fmuld	%f32,%f4,%f4
1249
1250	fmuld	%f34,%f12,%f12
1251
1252	fmuld	%f36,%f22,%f22
1253
1254	fmuld	%f38,%f30,%f30
1255
1256	fsubd	%f6,%f4,%f6
1257
1258	fsubd	%f14,%f12,%f14
1259
1260	faddd	%f22,%f20,%f22
1261
1262	faddd	%f30,%f28,%f30
1263
1264	faddd	%f6,%f0,%f6
1265
1266	faddd	%f14,%f8,%f14
1267
1268	faddd	%f22,%f16,%f22
1269
1270	faddd	%f30,%f24,%f30
1271	mov	%l0,%l4
1272
1273	fnegd	%f6,%f4
1274	lda	[%i1]%asi,%l0		! preload next argument
1275
1276	fnegd	%f14,%f12
1277	lda	[%i1]%asi,%f0
1278
1279	fnegd	%f22,%f20
1280	lda	[%i1+4]%asi,%f3
1281
1282	fnegd	%f30,%f28
1283	andn	%l0,%i5,%l0
1284	add	%i1,%i2,%i1
1285
1286	andcc	%l4,2,%g0
1287	fmovdnz	%icc,%f4,%f6
1288	st	%f6,[%o0]
1289
1290	andcc	%l1,2,%g0
1291	fmovdnz	%icc,%f12,%f14
1292	st	%f14,[%o1]
1293
1294	andcc	%l2,2,%g0
1295	fmovdnz	%icc,%f20,%f22
1296	st	%f22,[%o2]
1297
1298	andcc	%l3,2,%g0
1299	fmovdnz	%icc,%f28,%f30
1300	st	%f30,[%o3]
1301
1302	addcc	%i0,-1,%i0
1303	bg,pt	%icc,.loop0
1304! delay slot
1305	st	%f7,[%o0+4]
1306
1307	ba,pt	%icc,.end
1308! delay slot
1309	nop
1310
1311	.align	16
1312.case4:
1313	fmuld	%f18,%f18,%f16
1314	andcc	%l2,1,%g0
1315	bz,pn	%icc,.case6
1316! delay slot
1317	fxor	%f22,%f36,%f36
1318
1319	fmuld	%f26,%f26,%f24
1320	andcc	%l3,1,%g0
1321	bz,pn	%icc,.case5
1322! delay slot
1323	fxor	%f30,%f38,%f38
1324
1325	fmuld	%f8,pp3,%f14		! sin(x1)
1326
1327	fmuld	%f0,qq3,%f6		! cos(x0)
1328
1329	faddd	%f14,pp2,%f14
1330	fmuld	%f8,qq2,%f12
1331
1332	fmuld	%f16,qq3,%f22		! cos(x2)
1333
1334	fmuld	%f24,qq3,%f30		! cos(x3)
1335
1336	faddd	%f6,qq2,%f6
1337	fmuld	%f0,pp2,%f4
1338
1339	fmuld	%f8,%f14,%f14
1340	faddd	%f12,qq1,%f12
1341
1342	faddd	%f22,qq2,%f22
1343	fmuld	%f16,pp2,%f20
1344
1345	faddd	%f30,qq2,%f30
1346	fmuld	%f24,pp2,%f28
1347
1348	fmuld	%f0,%f6,%f6
1349	faddd	%f4,pp1,%f4
1350
1351	faddd	%f14,pp1,%f14
1352	fmuld	%f8,%f12,%f12
1353	add	%l5,%g1,%l5
1354
1355	fmuld	%f16,%f22,%f22
1356	faddd	%f20,pp1,%f20
1357
1358	fmuld	%f24,%f30,%f30
1359	faddd	%f28,pp1,%f28
1360
1361	faddd	%f6,qq1,%f6
1362	fmuld	%f0,%f4,%f4
1363	add	%l4,%g1,%l4
1364
1365	fmuld	%f8,%f14,%f14
1366
1367	faddd	%f22,qq1,%f22
1368	fmuld	%f16,%f20,%f20
1369	add	%l6,%g1,%l6
1370
1371	faddd	%f30,qq1,%f30
1372	fmuld	%f24,%f28,%f28
1373	add	%l7,%g1,%l7
1374
1375	fmuld	%f2,%f4,%f4
1376
1377	fmuld	%f10,%f14,%f14
1378	ldd	[%l5+8],%f8
1379
1380	fmuld	%f18,%f20,%f20
1381
1382	fmuld	%f26,%f28,%f28
1383
1384	fmuld	%f0,%f6,%f6
1385	faddd	%f4,%f32,%f4
1386	ldd	[%l4+16],%f0
1387
1388	fmuld	%f8,%f12,%f12
1389	faddd	%f34,%f14,%f14
1390
1391	fmuld	%f16,%f22,%f22
1392	faddd	%f20,%f36,%f20
1393	ldd	[%l6+16],%f16
1394
1395	fmuld	%f24,%f30,%f30
1396	faddd	%f28,%f38,%f28
1397	ldd	[%l7+16],%f24
1398
1399	fmuld	%f0,%f6,%f6
1400	faddd	%f4,%f2,%f4
1401	ldd	[%l4+8],%f32
1402
1403	faddd	%f10,%f14,%f14
1404	ldd	[%l5+16],%f34
1405
1406	fmuld	%f16,%f22,%f22
1407	faddd	%f20,%f18,%f20
1408	ldd	[%l6+8],%f36
1409
1410	fmuld	%f24,%f30,%f30
1411	faddd	%f28,%f26,%f28
1412	ldd	[%l7+8],%f38
1413
1414	fmuld	%f32,%f4,%f4
1415
1416	fmuld	%f34,%f14,%f14
1417
1418	fmuld	%f36,%f20,%f20
1419
1420	fmuld	%f38,%f28,%f28
1421
1422	fsubd	%f6,%f4,%f6
1423
1424	faddd	%f14,%f12,%f14
1425
1426	fsubd	%f22,%f20,%f22
1427
1428	fsubd	%f30,%f28,%f30
1429
1430	faddd	%f6,%f0,%f6
1431
1432	faddd	%f14,%f8,%f14
1433
1434	faddd	%f22,%f16,%f22
1435
1436	faddd	%f30,%f24,%f30
1437	mov	%l0,%l4
1438
1439	fnegd	%f6,%f4
1440	lda	[%i1]%asi,%l0		! preload next argument
1441
1442	fnegd	%f14,%f12
1443	lda	[%i1]%asi,%f0
1444
1445	fnegd	%f22,%f20
1446	lda	[%i1+4]%asi,%f3
1447
1448	fnegd	%f30,%f28
1449	andn	%l0,%i5,%l0
1450	add	%i1,%i2,%i1
1451
1452	andcc	%l4,2,%g0
1453	fmovdnz	%icc,%f4,%f6
1454	st	%f6,[%o0]
1455
1456	andcc	%l1,2,%g0
1457	fmovdnz	%icc,%f12,%f14
1458	st	%f14,[%o1]
1459
1460	andcc	%l2,2,%g0
1461	fmovdnz	%icc,%f20,%f22
1462	st	%f22,[%o2]
1463
1464	andcc	%l3,2,%g0
1465	fmovdnz	%icc,%f28,%f30
1466	st	%f30,[%o3]
1467
1468	addcc	%i0,-1,%i0
1469	bg,pt	%icc,.loop0
1470! delay slot
1471	st	%f7,[%o0+4]
1472
1473	ba,pt	%icc,.end
1474! delay slot
1475	nop
1476
1477	.align	16
1478.case5:
1479	fmuld	%f8,pp3,%f14		! sin(x1)
1480
1481	fmuld	%f24,pp3,%f30		! sin(x3)
1482
1483	fmuld	%f0,qq3,%f6		! cos(x0)
1484
1485	faddd	%f14,pp2,%f14
1486	fmuld	%f8,qq2,%f12
1487
1488	fmuld	%f16,qq3,%f22		! cos(x2)
1489
1490	faddd	%f30,pp2,%f30
1491	fmuld	%f24,qq2,%f28
1492
1493	faddd	%f6,qq2,%f6
1494	fmuld	%f0,pp2,%f4
1495
1496	fmuld	%f8,%f14,%f14
1497	faddd	%f12,qq1,%f12
1498
1499	faddd	%f22,qq2,%f22
1500	fmuld	%f16,pp2,%f20
1501
1502	fmuld	%f24,%f30,%f30
1503	faddd	%f28,qq1,%f28
1504
1505	fmuld	%f0,%f6,%f6
1506	faddd	%f4,pp1,%f4
1507
1508	faddd	%f14,pp1,%f14
1509	fmuld	%f8,%f12,%f12
1510	add	%l5,%g1,%l5
1511
1512	fmuld	%f16,%f22,%f22
1513	faddd	%f20,pp1,%f20
1514
1515	faddd	%f30,pp1,%f30
1516	fmuld	%f24,%f28,%f28
1517	add	%l7,%g1,%l7
1518
1519	faddd	%f6,qq1,%f6
1520	fmuld	%f0,%f4,%f4
1521	add	%l4,%g1,%l4
1522
1523	fmuld	%f8,%f14,%f14
1524
1525	faddd	%f22,qq1,%f22
1526	fmuld	%f16,%f20,%f20
1527	add	%l6,%g1,%l6
1528
1529	fmuld	%f24,%f30,%f30
1530
1531	fmuld	%f2,%f4,%f4
1532
1533	fmuld	%f10,%f14,%f14
1534	ldd	[%l5+8],%f8
1535
1536	fmuld	%f18,%f20,%f20
1537
1538	fmuld	%f26,%f30,%f30
1539	ldd	[%l7+8],%f24
1540
1541	fmuld	%f0,%f6,%f6
1542	faddd	%f4,%f32,%f4
1543	ldd	[%l4+16],%f0
1544
1545	fmuld	%f8,%f12,%f12
1546	faddd	%f34,%f14,%f14
1547
1548	fmuld	%f16,%f22,%f22
1549	faddd	%f20,%f36,%f20
1550	ldd	[%l6+16],%f16
1551
1552	fmuld	%f24,%f28,%f28
1553	faddd	%f38,%f30,%f30
1554
1555	fmuld	%f0,%f6,%f6
1556	faddd	%f4,%f2,%f4
1557	ldd	[%l4+8],%f32
1558
1559	faddd	%f10,%f14,%f14
1560	ldd	[%l5+16],%f34
1561
1562	fmuld	%f16,%f22,%f22
1563	faddd	%f20,%f18,%f20
1564	ldd	[%l6+8],%f36
1565
1566	faddd	%f26,%f30,%f30
1567	ldd	[%l7+16],%f38
1568
1569	fmuld	%f32,%f4,%f4
1570
1571	fmuld	%f34,%f14,%f14
1572
1573	fmuld	%f36,%f20,%f20
1574
1575	fmuld	%f38,%f30,%f30
1576
1577	fsubd	%f6,%f4,%f6
1578
1579	faddd	%f14,%f12,%f14
1580
1581	fsubd	%f22,%f20,%f22
1582
1583	faddd	%f30,%f28,%f30
1584
1585	faddd	%f6,%f0,%f6
1586
1587	faddd	%f14,%f8,%f14
1588
1589	faddd	%f22,%f16,%f22
1590
1591	faddd	%f30,%f24,%f30
1592	mov	%l0,%l4
1593
1594	fnegd	%f6,%f4
1595	lda	[%i1]%asi,%l0		! preload next argument
1596
1597	fnegd	%f14,%f12
1598	lda	[%i1]%asi,%f0
1599
1600	fnegd	%f22,%f20
1601	lda	[%i1+4]%asi,%f3
1602
1603	fnegd	%f30,%f28
1604	andn	%l0,%i5,%l0
1605	add	%i1,%i2,%i1
1606
1607	andcc	%l4,2,%g0
1608	fmovdnz	%icc,%f4,%f6
1609	st	%f6,[%o0]
1610
1611	andcc	%l1,2,%g0
1612	fmovdnz	%icc,%f12,%f14
1613	st	%f14,[%o1]
1614
1615	andcc	%l2,2,%g0
1616	fmovdnz	%icc,%f20,%f22
1617	st	%f22,[%o2]
1618
1619	andcc	%l3,2,%g0
1620	fmovdnz	%icc,%f28,%f30
1621	st	%f30,[%o3]
1622
1623	addcc	%i0,-1,%i0
1624	bg,pt	%icc,.loop0
1625! delay slot
1626	st	%f7,[%o0+4]
1627
1628	ba,pt	%icc,.end
1629! delay slot
1630	nop
1631
1632	.align	16
1633.case6:
1634	fmuld	%f26,%f26,%f24
1635	andcc	%l3,1,%g0
1636	bz,pn	%icc,.case7
1637! delay slot
1638	fxor	%f30,%f38,%f38
1639
1640	fmuld	%f8,pp3,%f14		! sin(x1)
1641
1642	fmuld	%f16,pp3,%f22		! sin(x2)
1643
1644	fmuld	%f0,qq3,%f6		! cos(x0)
1645
1646	faddd	%f14,pp2,%f14
1647	fmuld	%f8,qq2,%f12
1648
1649	faddd	%f22,pp2,%f22
1650	fmuld	%f16,qq2,%f20
1651
1652	fmuld	%f24,qq3,%f30		! cos(x3)
1653
1654	faddd	%f6,qq2,%f6
1655	fmuld	%f0,pp2,%f4
1656
1657	fmuld	%f8,%f14,%f14
1658	faddd	%f12,qq1,%f12
1659
1660	fmuld	%f16,%f22,%f22
1661	faddd	%f20,qq1,%f20
1662
1663	faddd	%f30,qq2,%f30
1664	fmuld	%f24,pp2,%f28
1665
1666	fmuld	%f0,%f6,%f6
1667	faddd	%f4,pp1,%f4
1668
1669	faddd	%f14,pp1,%f14
1670	fmuld	%f8,%f12,%f12
1671	add	%l5,%g1,%l5
1672
1673	faddd	%f22,pp1,%f22
1674	fmuld	%f16,%f20,%f20
1675	add	%l6,%g1,%l6
1676
1677	fmuld	%f24,%f30,%f30
1678	faddd	%f28,pp1,%f28
1679
1680	faddd	%f6,qq1,%f6
1681	fmuld	%f0,%f4,%f4
1682	add	%l4,%g1,%l4
1683
1684	fmuld	%f8,%f14,%f14
1685
1686	fmuld	%f16,%f22,%f22
1687
1688	faddd	%f30,qq1,%f30
1689	fmuld	%f24,%f28,%f28
1690	add	%l7,%g1,%l7
1691
1692	fmuld	%f2,%f4,%f4
1693
1694	fmuld	%f10,%f14,%f14
1695	ldd	[%l5+8],%f8
1696
1697	fmuld	%f18,%f22,%f22
1698	ldd	[%l6+8],%f16
1699
1700	fmuld	%f26,%f28,%f28
1701
1702	fmuld	%f0,%f6,%f6
1703	faddd	%f4,%f32,%f4
1704	ldd	[%l4+16],%f0
1705
1706	fmuld	%f8,%f12,%f12
1707	faddd	%f34,%f14,%f14
1708
1709	fmuld	%f16,%f20,%f20
1710	faddd	%f36,%f22,%f22
1711
1712	fmuld	%f24,%f30,%f30
1713	faddd	%f28,%f38,%f28
1714	ldd	[%l7+16],%f24
1715
1716	fmuld	%f0,%f6,%f6
1717	faddd	%f4,%f2,%f4
1718	ldd	[%l4+8],%f32
1719
1720	faddd	%f10,%f14,%f14
1721	ldd	[%l5+16],%f34
1722
1723	faddd	%f18,%f22,%f22
1724	ldd	[%l6+16],%f36
1725
1726	fmuld	%f24,%f30,%f30
1727	faddd	%f28,%f26,%f28
1728	ldd	[%l7+8],%f38
1729
1730	fmuld	%f32,%f4,%f4
1731
1732	fmuld	%f34,%f14,%f14
1733
1734	fmuld	%f36,%f22,%f22
1735
1736	fmuld	%f38,%f28,%f28
1737
1738	fsubd	%f6,%f4,%f6
1739
1740	faddd	%f14,%f12,%f14
1741
1742	faddd	%f22,%f20,%f22
1743
1744	fsubd	%f30,%f28,%f30
1745
1746	faddd	%f6,%f0,%f6
1747
1748	faddd	%f14,%f8,%f14
1749
1750	faddd	%f22,%f16,%f22
1751
1752	faddd	%f30,%f24,%f30
1753	mov	%l0,%l4
1754
1755	fnegd	%f6,%f4
1756	lda	[%i1]%asi,%l0		! preload next argument
1757
1758	fnegd	%f14,%f12
1759	lda	[%i1]%asi,%f0
1760
1761	fnegd	%f22,%f20
1762	lda	[%i1+4]%asi,%f3
1763
1764	fnegd	%f30,%f28
1765	andn	%l0,%i5,%l0
1766	add	%i1,%i2,%i1
1767
1768	andcc	%l4,2,%g0
1769	fmovdnz	%icc,%f4,%f6
1770	st	%f6,[%o0]
1771
1772	andcc	%l1,2,%g0
1773	fmovdnz	%icc,%f12,%f14
1774	st	%f14,[%o1]
1775
1776	andcc	%l2,2,%g0
1777	fmovdnz	%icc,%f20,%f22
1778	st	%f22,[%o2]
1779
1780	andcc	%l3,2,%g0
1781	fmovdnz	%icc,%f28,%f30
1782	st	%f30,[%o3]
1783
1784	addcc	%i0,-1,%i0
1785	bg,pt	%icc,.loop0
1786! delay slot
1787	st	%f7,[%o0+4]
1788
1789	ba,pt	%icc,.end
1790! delay slot
1791	nop
1792
1793	.align	16
1794.case7:
1795	fmuld	%f8,pp3,%f14		! sin(x1)
1796
1797	fmuld	%f16,pp3,%f22		! sin(x2)
1798
1799	fmuld	%f24,pp3,%f30		! sin(x3)
1800
1801	fmuld	%f0,qq3,%f6		! cos(x0)
1802
1803	faddd	%f14,pp2,%f14
1804	fmuld	%f8,qq2,%f12
1805
1806	faddd	%f22,pp2,%f22
1807	fmuld	%f16,qq2,%f20
1808
1809	faddd	%f30,pp2,%f30
1810	fmuld	%f24,qq2,%f28
1811
1812	faddd	%f6,qq2,%f6
1813	fmuld	%f0,pp2,%f4
1814
1815	fmuld	%f8,%f14,%f14
1816	faddd	%f12,qq1,%f12
1817
1818	fmuld	%f16,%f22,%f22
1819	faddd	%f20,qq1,%f20
1820
1821	fmuld	%f24,%f30,%f30
1822	faddd	%f28,qq1,%f28
1823
1824	fmuld	%f0,%f6,%f6
1825	faddd	%f4,pp1,%f4
1826
1827	faddd	%f14,pp1,%f14
1828	fmuld	%f8,%f12,%f12
1829	add	%l5,%g1,%l5
1830
1831	faddd	%f22,pp1,%f22
1832	fmuld	%f16,%f20,%f20
1833	add	%l6,%g1,%l6
1834
1835	faddd	%f30,pp1,%f30
1836	fmuld	%f24,%f28,%f28
1837	add	%l7,%g1,%l7
1838
1839	faddd	%f6,qq1,%f6
1840	fmuld	%f0,%f4,%f4
1841	add	%l4,%g1,%l4
1842
1843	fmuld	%f8,%f14,%f14
1844
1845	fmuld	%f16,%f22,%f22
1846
1847	fmuld	%f24,%f30,%f30
1848
1849	fmuld	%f2,%f4,%f4
1850
1851	fmuld	%f10,%f14,%f14
1852	ldd	[%l5+8],%f8
1853
1854	fmuld	%f18,%f22,%f22
1855	ldd	[%l6+8],%f16
1856
1857	fmuld	%f26,%f30,%f30
1858	ldd	[%l7+8],%f24
1859
1860	fmuld	%f0,%f6,%f6
1861	faddd	%f4,%f32,%f4
1862	ldd	[%l4+16],%f0
1863
1864	fmuld	%f8,%f12,%f12
1865	faddd	%f34,%f14,%f14
1866
1867	fmuld	%f16,%f20,%f20
1868	faddd	%f36,%f22,%f22
1869
1870	fmuld	%f24,%f28,%f28
1871	faddd	%f38,%f30,%f30
1872
1873	fmuld	%f0,%f6,%f6
1874	faddd	%f4,%f2,%f4
1875	ldd	[%l4+8],%f32
1876
1877	faddd	%f10,%f14,%f14
1878	ldd	[%l5+16],%f34
1879
1880	faddd	%f18,%f22,%f22
1881	ldd	[%l6+16],%f36
1882
1883	faddd	%f26,%f30,%f30
1884	ldd	[%l7+16],%f38
1885
1886	fmuld	%f32,%f4,%f4
1887
1888	fmuld	%f34,%f14,%f14
1889
1890	fmuld	%f36,%f22,%f22
1891
1892	fmuld	%f38,%f30,%f30
1893
1894	fsubd	%f6,%f4,%f6
1895
1896	faddd	%f14,%f12,%f14
1897
1898	faddd	%f22,%f20,%f22
1899
1900	faddd	%f30,%f28,%f30
1901
1902	faddd	%f6,%f0,%f6
1903
1904	faddd	%f14,%f8,%f14
1905
1906	faddd	%f22,%f16,%f22
1907
1908	faddd	%f30,%f24,%f30
1909	mov	%l0,%l4
1910
1911	fnegd	%f6,%f4
1912	lda	[%i1]%asi,%l0		! preload next argument
1913
1914	fnegd	%f14,%f12
1915	lda	[%i1]%asi,%f0
1916
1917	fnegd	%f22,%f20
1918	lda	[%i1+4]%asi,%f3
1919
1920	fnegd	%f30,%f28
1921	andn	%l0,%i5,%l0
1922	add	%i1,%i2,%i1
1923
1924	andcc	%l4,2,%g0
1925	fmovdnz	%icc,%f4,%f6
1926	st	%f6,[%o0]
1927
1928	andcc	%l1,2,%g0
1929	fmovdnz	%icc,%f12,%f14
1930	st	%f14,[%o1]
1931
1932	andcc	%l2,2,%g0
1933	fmovdnz	%icc,%f20,%f22
1934	st	%f22,[%o2]
1935
1936	andcc	%l3,2,%g0
1937	fmovdnz	%icc,%f28,%f30
1938	st	%f30,[%o3]
1939
1940	addcc	%i0,-1,%i0
1941	bg,pt	%icc,.loop0
1942! delay slot
1943	st	%f7,[%o0+4]
1944
1945	ba,pt	%icc,.end
1946! delay slot
1947	nop
1948
1949	.align	16
1950.case8:
1951	fmuld	%f10,%f10,%f8
1952	andcc	%l1,1,%g0
1953	bz,pn	%icc,.case12
1954! delay slot
1955	fxor	%f14,%f34,%f34
1956
1957	fmuld	%f18,%f18,%f16
1958	andcc	%l2,1,%g0
1959	bz,pn	%icc,.case10
1960! delay slot
1961	fxor	%f22,%f36,%f36
1962
1963	fmuld	%f26,%f26,%f24
1964	andcc	%l3,1,%g0
1965	bz,pn	%icc,.case9
1966! delay slot
1967	fxor	%f30,%f38,%f38
1968
1969	fmuld	%f0,pp3,%f6		! sin(x0)
1970
1971	faddd	%f6,pp2,%f6
1972	fmuld	%f0,qq2,%f4
1973
1974	fmuld	%f8,qq3,%f14		! cos(x1)
1975
1976	fmuld	%f16,qq3,%f22		! cos(x2)
1977
1978	fmuld	%f24,qq3,%f30		! cos(x3)
1979
1980	fmuld	%f0,%f6,%f6
1981	faddd	%f4,qq1,%f4
1982
1983	faddd	%f14,qq2,%f14
1984	fmuld	%f8,pp2,%f12
1985
1986	faddd	%f22,qq2,%f22
1987	fmuld	%f16,pp2,%f20
1988
1989	faddd	%f30,qq2,%f30
1990	fmuld	%f24,pp2,%f28
1991
1992	faddd	%f6,pp1,%f6
1993	fmuld	%f0,%f4,%f4
1994	add	%l4,%g1,%l4
1995
1996	fmuld	%f8,%f14,%f14
1997	faddd	%f12,pp1,%f12
1998
1999	fmuld	%f16,%f22,%f22
2000	faddd	%f20,pp1,%f20
2001
2002	fmuld	%f24,%f30,%f30
2003	faddd	%f28,pp1,%f28
2004
2005	fmuld	%f0,%f6,%f6
2006
2007	faddd	%f14,qq1,%f14
2008	fmuld	%f8,%f12,%f12
2009	add	%l5,%g1,%l5
2010
2011	faddd	%f22,qq1,%f22
2012	fmuld	%f16,%f20,%f20
2013	add	%l6,%g1,%l6
2014
2015	faddd	%f30,qq1,%f30
2016	fmuld	%f24,%f28,%f28
2017	add	%l7,%g1,%l7
2018
2019	fmuld	%f2,%f6,%f6
2020	ldd	[%l4+8],%f0
2021
2022	fmuld	%f10,%f12,%f12
2023
2024	fmuld	%f18,%f20,%f20
2025
2026	fmuld	%f26,%f28,%f28
2027
2028	fmuld	%f0,%f4,%f4
2029	faddd	%f32,%f6,%f6
2030
2031	fmuld	%f8,%f14,%f14
2032	faddd	%f12,%f34,%f12
2033	ldd	[%l5+16],%f8
2034
2035	fmuld	%f16,%f22,%f22
2036	faddd	%f20,%f36,%f20
2037	ldd	[%l6+16],%f16
2038
2039	fmuld	%f24,%f30,%f30
2040	faddd	%f28,%f38,%f28
2041	ldd	[%l7+16],%f24
2042
2043	faddd	%f2,%f6,%f6
2044	ldd	[%l4+16],%f32
2045
2046	fmuld	%f8,%f14,%f14
2047	faddd	%f12,%f10,%f12
2048	ldd	[%l5+8],%f34
2049
2050	fmuld	%f16,%f22,%f22
2051	faddd	%f20,%f18,%f20
2052	ldd	[%l6+8],%f36
2053
2054	fmuld	%f24,%f30,%f30
2055	faddd	%f28,%f26,%f28
2056	ldd	[%l7+8],%f38
2057
2058	fmuld	%f32,%f6,%f6
2059
2060	fmuld	%f34,%f12,%f12
2061
2062	fmuld	%f36,%f20,%f20
2063
2064	fmuld	%f38,%f28,%f28
2065
2066	faddd	%f6,%f4,%f6
2067
2068	fsubd	%f14,%f12,%f14
2069
2070	fsubd	%f22,%f20,%f22
2071
2072	fsubd	%f30,%f28,%f30
2073
2074	faddd	%f6,%f0,%f6
2075
2076	faddd	%f14,%f8,%f14
2077
2078	faddd	%f22,%f16,%f22
2079
2080	faddd	%f30,%f24,%f30
2081	mov	%l0,%l4
2082
2083	fnegd	%f6,%f4
2084	lda	[%i1]%asi,%l0		! preload next argument
2085
2086	fnegd	%f14,%f12
2087	lda	[%i1]%asi,%f0
2088
2089	fnegd	%f22,%f20
2090	lda	[%i1+4]%asi,%f3
2091
2092	fnegd	%f30,%f28
2093	andn	%l0,%i5,%l0
2094	add	%i1,%i2,%i1
2095
2096	andcc	%l4,2,%g0
2097	fmovdnz	%icc,%f4,%f6
2098	st	%f6,[%o0]
2099
2100	andcc	%l1,2,%g0
2101	fmovdnz	%icc,%f12,%f14
2102	st	%f14,[%o1]
2103
2104	andcc	%l2,2,%g0
2105	fmovdnz	%icc,%f20,%f22
2106	st	%f22,[%o2]
2107
2108	andcc	%l3,2,%g0
2109	fmovdnz	%icc,%f28,%f30
2110	st	%f30,[%o3]
2111
2112	addcc	%i0,-1,%i0
2113	bg,pt	%icc,.loop0
2114! delay slot
2115	st	%f7,[%o0+4]
2116
2117	ba,pt	%icc,.end
2118! delay slot
2119	nop
2120
2121	.align	16
2122.case9:
2123	fmuld	%f0,pp3,%f6		! sin(x0)
2124
2125	fmuld	%f24,pp3,%f30		! sin(x3)
2126
2127	faddd	%f6,pp2,%f6
2128	fmuld	%f0,qq2,%f4
2129
2130	fmuld	%f8,qq3,%f14		! cos(x1)
2131
2132	fmuld	%f16,qq3,%f22		! cos(x2)
2133
2134	faddd	%f30,pp2,%f30
2135	fmuld	%f24,qq2,%f28
2136
2137	fmuld	%f0,%f6,%f6
2138	faddd	%f4,qq1,%f4
2139
2140	faddd	%f14,qq2,%f14
2141	fmuld	%f8,pp2,%f12
2142
2143	faddd	%f22,qq2,%f22
2144	fmuld	%f16,pp2,%f20
2145
2146	fmuld	%f24,%f30,%f30
2147	faddd	%f28,qq1,%f28
2148
2149	faddd	%f6,pp1,%f6
2150	fmuld	%f0,%f4,%f4
2151	add	%l4,%g1,%l4
2152
2153	fmuld	%f8,%f14,%f14
2154	faddd	%f12,pp1,%f12
2155
2156	fmuld	%f16,%f22,%f22
2157	faddd	%f20,pp1,%f20
2158
2159	faddd	%f30,pp1,%f30
2160	fmuld	%f24,%f28,%f28
2161	add	%l7,%g1,%l7
2162
2163	fmuld	%f0,%f6,%f6
2164
2165	faddd	%f14,qq1,%f14
2166	fmuld	%f8,%f12,%f12
2167	add	%l5,%g1,%l5
2168
2169	faddd	%f22,qq1,%f22
2170	fmuld	%f16,%f20,%f20
2171	add	%l6,%g1,%l6
2172
2173	fmuld	%f24,%f30,%f30
2174
2175	fmuld	%f2,%f6,%f6
2176	ldd	[%l4+8],%f0
2177
2178	fmuld	%f10,%f12,%f12
2179
2180	fmuld	%f18,%f20,%f20
2181
2182	fmuld	%f26,%f30,%f30
2183	ldd	[%l7+8],%f24
2184
2185	fmuld	%f0,%f4,%f4
2186	faddd	%f32,%f6,%f6
2187
2188	fmuld	%f8,%f14,%f14
2189	faddd	%f12,%f34,%f12
2190	ldd	[%l5+16],%f8
2191
2192	fmuld	%f16,%f22,%f22
2193	faddd	%f20,%f36,%f20
2194	ldd	[%l6+16],%f16
2195
2196	fmuld	%f24,%f28,%f28
2197	faddd	%f38,%f30,%f30
2198
2199	faddd	%f2,%f6,%f6
2200	ldd	[%l4+16],%f32
2201
2202	fmuld	%f8,%f14,%f14
2203	faddd	%f12,%f10,%f12
2204	ldd	[%l5+8],%f34
2205
2206	fmuld	%f16,%f22,%f22
2207	faddd	%f20,%f18,%f20
2208	ldd	[%l6+8],%f36
2209
2210	faddd	%f26,%f30,%f30
2211	ldd	[%l7+16],%f38
2212
2213	fmuld	%f32,%f6,%f6
2214
2215	fmuld	%f34,%f12,%f12
2216
2217	fmuld	%f36,%f20,%f20
2218
2219	fmuld	%f38,%f30,%f30
2220
2221	faddd	%f6,%f4,%f6
2222
2223	fsubd	%f14,%f12,%f14
2224
2225	fsubd	%f22,%f20,%f22
2226
2227	faddd	%f30,%f28,%f30
2228
2229	faddd	%f6,%f0,%f6
2230
2231	faddd	%f14,%f8,%f14
2232
2233	faddd	%f22,%f16,%f22
2234
2235	faddd	%f30,%f24,%f30
2236	mov	%l0,%l4
2237
2238	fnegd	%f6,%f4
2239	lda	[%i1]%asi,%l0		! preload next argument
2240
2241	fnegd	%f14,%f12
2242	lda	[%i1]%asi,%f0
2243
2244	fnegd	%f22,%f20
2245	lda	[%i1+4]%asi,%f3
2246
2247	fnegd	%f30,%f28
2248	andn	%l0,%i5,%l0
2249	add	%i1,%i2,%i1
2250
2251	andcc	%l4,2,%g0
2252	fmovdnz	%icc,%f4,%f6
2253	st	%f6,[%o0]
2254
2255	andcc	%l1,2,%g0
2256	fmovdnz	%icc,%f12,%f14
2257	st	%f14,[%o1]
2258
2259	andcc	%l2,2,%g0
2260	fmovdnz	%icc,%f20,%f22
2261	st	%f22,[%o2]
2262
2263	andcc	%l3,2,%g0
2264	fmovdnz	%icc,%f28,%f30
2265	st	%f30,[%o3]
2266
2267	addcc	%i0,-1,%i0
2268	bg,pt	%icc,.loop0
2269! delay slot
2270	st	%f7,[%o0+4]
2271
2272	ba,pt	%icc,.end
2273! delay slot
2274	nop
2275
2276	.align	16
2277.case10:
2278	fmuld	%f26,%f26,%f24
2279	andcc	%l3,1,%g0
2280	bz,pn	%icc,.case11
2281! delay slot
2282	fxor	%f30,%f38,%f38
2283
2284	fmuld	%f0,pp3,%f6		! sin(x0)
2285
2286	fmuld	%f16,pp3,%f22		! sin(x2)
2287
2288	faddd	%f6,pp2,%f6
2289	fmuld	%f0,qq2,%f4
2290
2291	fmuld	%f8,qq3,%f14		! cos(x1)
2292
2293	faddd	%f22,pp2,%f22
2294	fmuld	%f16,qq2,%f20
2295
2296	fmuld	%f24,qq3,%f30		! cos(x3)
2297
2298	fmuld	%f0,%f6,%f6
2299	faddd	%f4,qq1,%f4
2300
2301	faddd	%f14,qq2,%f14
2302	fmuld	%f8,pp2,%f12
2303
2304	fmuld	%f16,%f22,%f22
2305	faddd	%f20,qq1,%f20
2306
2307	faddd	%f30,qq2,%f30
2308	fmuld	%f24,pp2,%f28
2309
2310	faddd	%f6,pp1,%f6
2311	fmuld	%f0,%f4,%f4
2312	add	%l4,%g1,%l4
2313
2314	fmuld	%f8,%f14,%f14
2315	faddd	%f12,pp1,%f12
2316
2317	faddd	%f22,pp1,%f22
2318	fmuld	%f16,%f20,%f20
2319	add	%l6,%g1,%l6
2320
2321	fmuld	%f24,%f30,%f30
2322	faddd	%f28,pp1,%f28
2323
2324	fmuld	%f0,%f6,%f6
2325
2326	faddd	%f14,qq1,%f14
2327	fmuld	%f8,%f12,%f12
2328	add	%l5,%g1,%l5
2329
2330	fmuld	%f16,%f22,%f22
2331
2332	faddd	%f30,qq1,%f30
2333	fmuld	%f24,%f28,%f28
2334	add	%l7,%g1,%l7
2335
2336	fmuld	%f2,%f6,%f6
2337	ldd	[%l4+8],%f0
2338
2339	fmuld	%f10,%f12,%f12
2340
2341	fmuld	%f18,%f22,%f22
2342	ldd	[%l6+8],%f16
2343
2344	fmuld	%f26,%f28,%f28
2345
2346	fmuld	%f0,%f4,%f4
2347	faddd	%f32,%f6,%f6
2348
2349	fmuld	%f8,%f14,%f14
2350	faddd	%f12,%f34,%f12
2351	ldd	[%l5+16],%f8
2352
2353	fmuld	%f16,%f20,%f20
2354	faddd	%f36,%f22,%f22
2355
2356	fmuld	%f24,%f30,%f30
2357	faddd	%f28,%f38,%f28
2358	ldd	[%l7+16],%f24
2359
2360	faddd	%f2,%f6,%f6
2361	ldd	[%l4+16],%f32
2362
2363	fmuld	%f8,%f14,%f14
2364	faddd	%f12,%f10,%f12
2365	ldd	[%l5+8],%f34
2366
2367	faddd	%f18,%f22,%f22
2368	ldd	[%l6+16],%f36
2369
2370	fmuld	%f24,%f30,%f30
2371	faddd	%f28,%f26,%f28
2372	ldd	[%l7+8],%f38
2373
2374	fmuld	%f32,%f6,%f6
2375
2376	fmuld	%f34,%f12,%f12
2377
2378	fmuld	%f36,%f22,%f22
2379
2380	fmuld	%f38,%f28,%f28
2381
2382	faddd	%f6,%f4,%f6
2383
2384	fsubd	%f14,%f12,%f14
2385
2386	faddd	%f22,%f20,%f22
2387
2388	fsubd	%f30,%f28,%f30
2389
2390	faddd	%f6,%f0,%f6
2391
2392	faddd	%f14,%f8,%f14
2393
2394	faddd	%f22,%f16,%f22
2395
2396	faddd	%f30,%f24,%f30
2397	mov	%l0,%l4
2398
2399	fnegd	%f6,%f4
2400	lda	[%i1]%asi,%l0		! preload next argument
2401
2402	fnegd	%f14,%f12
2403	lda	[%i1]%asi,%f0
2404
2405	fnegd	%f22,%f20
2406	lda	[%i1+4]%asi,%f3
2407
2408	fnegd	%f30,%f28
2409	andn	%l0,%i5,%l0
2410	add	%i1,%i2,%i1
2411
2412	andcc	%l4,2,%g0
2413	fmovdnz	%icc,%f4,%f6
2414	st	%f6,[%o0]
2415
2416	andcc	%l1,2,%g0
2417	fmovdnz	%icc,%f12,%f14
2418	st	%f14,[%o1]
2419
2420	andcc	%l2,2,%g0
2421	fmovdnz	%icc,%f20,%f22
2422	st	%f22,[%o2]
2423
2424	andcc	%l3,2,%g0
2425	fmovdnz	%icc,%f28,%f30
2426	st	%f30,[%o3]
2427
2428	addcc	%i0,-1,%i0
2429	bg,pt	%icc,.loop0
2430! delay slot
2431	st	%f7,[%o0+4]
2432
2433	ba,pt	%icc,.end
2434! delay slot
2435	nop
2436
2437	.align	16
2438.case11:
2439	fmuld	%f0,pp3,%f6		! sin(x0)
2440
2441	fmuld	%f16,pp3,%f22		! sin(x2)
2442
2443	fmuld	%f24,pp3,%f30		! sin(x3)
2444
2445	faddd	%f6,pp2,%f6
2446	fmuld	%f0,qq2,%f4
2447
2448	fmuld	%f8,qq3,%f14		! cos(x1)
2449
2450	faddd	%f22,pp2,%f22
2451	fmuld	%f16,qq2,%f20
2452
2453	faddd	%f30,pp2,%f30
2454	fmuld	%f24,qq2,%f28
2455
2456	fmuld	%f0,%f6,%f6
2457	faddd	%f4,qq1,%f4
2458
2459	faddd	%f14,qq2,%f14
2460	fmuld	%f8,pp2,%f12
2461
2462	fmuld	%f16,%f22,%f22
2463	faddd	%f20,qq1,%f20
2464
2465	fmuld	%f24,%f30,%f30
2466	faddd	%f28,qq1,%f28
2467
2468	faddd	%f6,pp1,%f6
2469	fmuld	%f0,%f4,%f4
2470	add	%l4,%g1,%l4
2471
2472	fmuld	%f8,%f14,%f14
2473	faddd	%f12,pp1,%f12
2474
2475	faddd	%f22,pp1,%f22
2476	fmuld	%f16,%f20,%f20
2477	add	%l6,%g1,%l6
2478
2479	faddd	%f30,pp1,%f30
2480	fmuld	%f24,%f28,%f28
2481	add	%l7,%g1,%l7
2482
2483	fmuld	%f0,%f6,%f6
2484
2485	faddd	%f14,qq1,%f14
2486	fmuld	%f8,%f12,%f12
2487	add	%l5,%g1,%l5
2488
2489	fmuld	%f16,%f22,%f22
2490
2491	fmuld	%f24,%f30,%f30
2492
2493	fmuld	%f2,%f6,%f6
2494	ldd	[%l4+8],%f0
2495
2496	fmuld	%f10,%f12,%f12
2497
2498	fmuld	%f18,%f22,%f22
2499	ldd	[%l6+8],%f16
2500
2501	fmuld	%f26,%f30,%f30
2502	ldd	[%l7+8],%f24
2503
2504	fmuld	%f0,%f4,%f4
2505	faddd	%f32,%f6,%f6
2506
2507	fmuld	%f8,%f14,%f14
2508	faddd	%f12,%f34,%f12
2509	ldd	[%l5+16],%f8
2510
2511	fmuld	%f16,%f20,%f20
2512	faddd	%f36,%f22,%f22
2513
2514	fmuld	%f24,%f28,%f28
2515	faddd	%f38,%f30,%f30
2516
2517	faddd	%f2,%f6,%f6
2518	ldd	[%l4+16],%f32
2519
2520	fmuld	%f8,%f14,%f14
2521	faddd	%f12,%f10,%f12
2522	ldd	[%l5+8],%f34
2523
2524	faddd	%f18,%f22,%f22
2525	ldd	[%l6+16],%f36
2526
2527	faddd	%f26,%f30,%f30
2528	ldd	[%l7+16],%f38
2529
2530	fmuld	%f32,%f6,%f6
2531
2532	fmuld	%f34,%f12,%f12
2533
2534	fmuld	%f36,%f22,%f22
2535
2536	fmuld	%f38,%f30,%f30
2537
2538	faddd	%f6,%f4,%f6
2539
2540	fsubd	%f14,%f12,%f14
2541
2542	faddd	%f22,%f20,%f22
2543
2544	faddd	%f30,%f28,%f30
2545
2546	faddd	%f6,%f0,%f6
2547
2548	faddd	%f14,%f8,%f14
2549
2550	faddd	%f22,%f16,%f22
2551
2552	faddd	%f30,%f24,%f30
2553	mov	%l0,%l4
2554
2555	fnegd	%f6,%f4
2556	lda	[%i1]%asi,%l0		! preload next argument
2557
2558	fnegd	%f14,%f12
2559	lda	[%i1]%asi,%f0
2560
2561	fnegd	%f22,%f20
2562	lda	[%i1+4]%asi,%f3
2563
2564	fnegd	%f30,%f28
2565	andn	%l0,%i5,%l0
2566	add	%i1,%i2,%i1
2567
2568	andcc	%l4,2,%g0
2569	fmovdnz	%icc,%f4,%f6
2570	st	%f6,[%o0]
2571
2572	andcc	%l1,2,%g0
2573	fmovdnz	%icc,%f12,%f14
2574	st	%f14,[%o1]
2575
2576	andcc	%l2,2,%g0
2577	fmovdnz	%icc,%f20,%f22
2578	st	%f22,[%o2]
2579
2580	andcc	%l3,2,%g0
2581	fmovdnz	%icc,%f28,%f30
2582	st	%f30,[%o3]
2583
2584	addcc	%i0,-1,%i0
2585	bg,pt	%icc,.loop0
2586! delay slot
2587	st	%f7,[%o0+4]
2588
2589	ba,pt	%icc,.end
2590! delay slot
2591	nop
2592
2593	.align	16
2594.case12:
2595	fmuld	%f18,%f18,%f16
2596	andcc	%l2,1,%g0
2597	bz,pn	%icc,.case14
2598! delay slot
2599	fxor	%f22,%f36,%f36
2600
2601	fmuld	%f26,%f26,%f24
2602	andcc	%l3,1,%g0
2603	bz,pn	%icc,.case13
2604! delay slot
2605	fxor	%f30,%f38,%f38
2606
2607	fmuld	%f0,pp3,%f6		! sin(x0)
2608
2609	fmuld	%f8,pp3,%f14		! sin(x1)
2610
2611	faddd	%f6,pp2,%f6
2612	fmuld	%f0,qq2,%f4
2613
2614	faddd	%f14,pp2,%f14
2615	fmuld	%f8,qq2,%f12
2616
2617	fmuld	%f16,qq3,%f22		! cos(x2)
2618
2619	fmuld	%f24,qq3,%f30		! cos(x3)
2620
2621	fmuld	%f0,%f6,%f6
2622	faddd	%f4,qq1,%f4
2623
2624	fmuld	%f8,%f14,%f14
2625	faddd	%f12,qq1,%f12
2626
2627	faddd	%f22,qq2,%f22
2628	fmuld	%f16,pp2,%f20
2629
2630	faddd	%f30,qq2,%f30
2631	fmuld	%f24,pp2,%f28
2632
2633	faddd	%f6,pp1,%f6
2634	fmuld	%f0,%f4,%f4
2635	add	%l4,%g1,%l4
2636
2637	faddd	%f14,pp1,%f14
2638	fmuld	%f8,%f12,%f12
2639	add	%l5,%g1,%l5
2640
2641	fmuld	%f16,%f22,%f22
2642	faddd	%f20,pp1,%f20
2643
2644	fmuld	%f24,%f30,%f30
2645	faddd	%f28,pp1,%f28
2646
2647	fmuld	%f0,%f6,%f6
2648
2649	fmuld	%f8,%f14,%f14
2650
2651	faddd	%f22,qq1,%f22
2652	fmuld	%f16,%f20,%f20
2653	add	%l6,%g1,%l6
2654
2655	faddd	%f30,qq1,%f30
2656	fmuld	%f24,%f28,%f28
2657	add	%l7,%g1,%l7
2658
2659	fmuld	%f2,%f6,%f6
2660	ldd	[%l4+8],%f0
2661
2662	fmuld	%f10,%f14,%f14
2663	ldd	[%l5+8],%f8
2664
2665	fmuld	%f18,%f20,%f20
2666
2667	fmuld	%f26,%f28,%f28
2668
2669	fmuld	%f0,%f4,%f4
2670	faddd	%f32,%f6,%f6
2671
2672	fmuld	%f8,%f12,%f12
2673	faddd	%f34,%f14,%f14
2674
2675	fmuld	%f16,%f22,%f22
2676	faddd	%f20,%f36,%f20
2677	ldd	[%l6+16],%f16
2678
2679	fmuld	%f24,%f30,%f30
2680	faddd	%f28,%f38,%f28
2681	ldd	[%l7+16],%f24
2682
2683	faddd	%f2,%f6,%f6
2684	ldd	[%l4+16],%f32
2685
2686	faddd	%f10,%f14,%f14
2687	ldd	[%l5+16],%f34
2688
2689	fmuld	%f16,%f22,%f22
2690	faddd	%f20,%f18,%f20
2691	ldd	[%l6+8],%f36
2692
2693	fmuld	%f24,%f30,%f30
2694	faddd	%f28,%f26,%f28
2695	ldd	[%l7+8],%f38
2696
2697	fmuld	%f32,%f6,%f6
2698
2699	fmuld	%f34,%f14,%f14
2700
2701	fmuld	%f36,%f20,%f20
2702
2703	fmuld	%f38,%f28,%f28
2704
2705	faddd	%f6,%f4,%f6
2706
2707	faddd	%f14,%f12,%f14
2708
2709	fsubd	%f22,%f20,%f22
2710
2711	fsubd	%f30,%f28,%f30
2712
2713	faddd	%f6,%f0,%f6
2714
2715	faddd	%f14,%f8,%f14
2716
2717	faddd	%f22,%f16,%f22
2718
2719	faddd	%f30,%f24,%f30
2720	mov	%l0,%l4
2721
2722	fnegd	%f6,%f4
2723	lda	[%i1]%asi,%l0		! preload next argument
2724
2725	fnegd	%f14,%f12
2726	lda	[%i1]%asi,%f0
2727
2728	fnegd	%f22,%f20
2729	lda	[%i1+4]%asi,%f3
2730
2731	fnegd	%f30,%f28
2732	andn	%l0,%i5,%l0
2733	add	%i1,%i2,%i1
2734
2735	andcc	%l4,2,%g0
2736	fmovdnz	%icc,%f4,%f6
2737	st	%f6,[%o0]
2738
2739	andcc	%l1,2,%g0
2740	fmovdnz	%icc,%f12,%f14
2741	st	%f14,[%o1]
2742
2743	andcc	%l2,2,%g0
2744	fmovdnz	%icc,%f20,%f22
2745	st	%f22,[%o2]
2746
2747	andcc	%l3,2,%g0
2748	fmovdnz	%icc,%f28,%f30
2749	st	%f30,[%o3]
2750
2751	addcc	%i0,-1,%i0
2752	bg,pt	%icc,.loop0
2753! delay slot
2754	st	%f7,[%o0+4]
2755
2756	ba,pt	%icc,.end
2757! delay slot
2758	nop
2759
2760	.align	16
2761.case13:
2762	fmuld	%f0,pp3,%f6		! sin(x0)
2763
2764	fmuld	%f8,pp3,%f14		! sin(x1)
2765
2766	fmuld	%f24,pp3,%f30		! sin(x3)
2767
2768	faddd	%f6,pp2,%f6
2769	fmuld	%f0,qq2,%f4
2770
2771	faddd	%f14,pp2,%f14
2772	fmuld	%f8,qq2,%f12
2773
2774	fmuld	%f16,qq3,%f22		! cos(x2)
2775
2776	faddd	%f30,pp2,%f30
2777	fmuld	%f24,qq2,%f28
2778
2779	fmuld	%f0,%f6,%f6
2780	faddd	%f4,qq1,%f4
2781
2782	fmuld	%f8,%f14,%f14
2783	faddd	%f12,qq1,%f12
2784
2785	faddd	%f22,qq2,%f22
2786	fmuld	%f16,pp2,%f20
2787
2788	fmuld	%f24,%f30,%f30
2789	faddd	%f28,qq1,%f28
2790
2791	faddd	%f6,pp1,%f6
2792	fmuld	%f0,%f4,%f4
2793	add	%l4,%g1,%l4
2794
2795	faddd	%f14,pp1,%f14
2796	fmuld	%f8,%f12,%f12
2797	add	%l5,%g1,%l5
2798
2799	fmuld	%f16,%f22,%f22
2800	faddd	%f20,pp1,%f20
2801
2802	faddd	%f30,pp1,%f30
2803	fmuld	%f24,%f28,%f28
2804	add	%l7,%g1,%l7
2805
2806	fmuld	%f0,%f6,%f6
2807
2808	fmuld	%f8,%f14,%f14
2809
2810	faddd	%f22,qq1,%f22
2811	fmuld	%f16,%f20,%f20
2812	add	%l6,%g1,%l6
2813
2814	fmuld	%f24,%f30,%f30
2815
2816	fmuld	%f2,%f6,%f6
2817	ldd	[%l4+8],%f0
2818
2819	fmuld	%f10,%f14,%f14
2820	ldd	[%l5+8],%f8
2821
2822	fmuld	%f18,%f20,%f20
2823
2824	fmuld	%f26,%f30,%f30
2825	ldd	[%l7+8],%f24
2826
2827	fmuld	%f0,%f4,%f4
2828	faddd	%f32,%f6,%f6
2829
2830	fmuld	%f8,%f12,%f12
2831	faddd	%f34,%f14,%f14
2832
2833	fmuld	%f16,%f22,%f22
2834	faddd	%f20,%f36,%f20
2835	ldd	[%l6+16],%f16
2836
2837	fmuld	%f24,%f28,%f28
2838	faddd	%f38,%f30,%f30
2839
2840	faddd	%f2,%f6,%f6
2841	ldd	[%l4+16],%f32
2842
2843	faddd	%f10,%f14,%f14
2844	ldd	[%l5+16],%f34
2845
2846	fmuld	%f16,%f22,%f22
2847	faddd	%f20,%f18,%f20
2848	ldd	[%l6+8],%f36
2849
2850	faddd	%f26,%f30,%f30
2851	ldd	[%l7+16],%f38
2852
2853	fmuld	%f32,%f6,%f6
2854
2855	fmuld	%f34,%f14,%f14
2856
2857	fmuld	%f36,%f20,%f20
2858
2859	fmuld	%f38,%f30,%f30
2860
2861	faddd	%f6,%f4,%f6
2862
2863	faddd	%f14,%f12,%f14
2864
2865	fsubd	%f22,%f20,%f22
2866
2867	faddd	%f30,%f28,%f30
2868
2869	faddd	%f6,%f0,%f6
2870
2871	faddd	%f14,%f8,%f14
2872
2873	faddd	%f22,%f16,%f22
2874
2875	faddd	%f30,%f24,%f30
2876	mov	%l0,%l4
2877
2878	fnegd	%f6,%f4
2879	lda	[%i1]%asi,%l0		! preload next argument
2880
2881	fnegd	%f14,%f12
2882	lda	[%i1]%asi,%f0
2883
2884	fnegd	%f22,%f20
2885	lda	[%i1+4]%asi,%f3
2886
2887	fnegd	%f30,%f28
2888	andn	%l0,%i5,%l0
2889	add	%i1,%i2,%i1
2890
2891	andcc	%l4,2,%g0
2892	fmovdnz	%icc,%f4,%f6
2893	st	%f6,[%o0]
2894
2895	andcc	%l1,2,%g0
2896	fmovdnz	%icc,%f12,%f14
2897	st	%f14,[%o1]
2898
2899	andcc	%l2,2,%g0
2900	fmovdnz	%icc,%f20,%f22
2901	st	%f22,[%o2]
2902
2903	andcc	%l3,2,%g0
2904	fmovdnz	%icc,%f28,%f30
2905	st	%f30,[%o3]
2906
2907	addcc	%i0,-1,%i0
2908	bg,pt	%icc,.loop0
2909! delay slot
2910	st	%f7,[%o0+4]
2911
2912	ba,pt	%icc,.end
2913! delay slot
2914	nop
2915
2916	.align	16
2917.case14:
2918	fmuld	%f26,%f26,%f24
2919	andcc	%l3,1,%g0
2920	bz,pn	%icc,.case15
2921! delay slot
2922	fxor	%f30,%f38,%f38
2923
2924	fmuld	%f0,pp3,%f6		! sin(x0)
2925
2926	fmuld	%f8,pp3,%f14		! sin(x1)
2927
2928	fmuld	%f16,pp3,%f22		! sin(x2)
2929
2930	faddd	%f6,pp2,%f6
2931	fmuld	%f0,qq2,%f4
2932
2933	faddd	%f14,pp2,%f14
2934	fmuld	%f8,qq2,%f12
2935
2936	faddd	%f22,pp2,%f22
2937	fmuld	%f16,qq2,%f20
2938
2939	fmuld	%f24,qq3,%f30		! cos(x3)
2940
2941	fmuld	%f0,%f6,%f6
2942	faddd	%f4,qq1,%f4
2943
2944	fmuld	%f8,%f14,%f14
2945	faddd	%f12,qq1,%f12
2946
2947	fmuld	%f16,%f22,%f22
2948	faddd	%f20,qq1,%f20
2949
2950	faddd	%f30,qq2,%f30
2951	fmuld	%f24,pp2,%f28
2952
2953	faddd	%f6,pp1,%f6
2954	fmuld	%f0,%f4,%f4
2955	add	%l4,%g1,%l4
2956
2957	faddd	%f14,pp1,%f14
2958	fmuld	%f8,%f12,%f12
2959	add	%l5,%g1,%l5
2960
2961	faddd	%f22,pp1,%f22
2962	fmuld	%f16,%f20,%f20
2963	add	%l6,%g1,%l6
2964
2965	fmuld	%f24,%f30,%f30
2966	faddd	%f28,pp1,%f28
2967
2968	fmuld	%f0,%f6,%f6
2969
2970	fmuld	%f8,%f14,%f14
2971
2972	fmuld	%f16,%f22,%f22
2973
2974	faddd	%f30,qq1,%f30
2975	fmuld	%f24,%f28,%f28
2976	add	%l7,%g1,%l7
2977
2978	fmuld	%f2,%f6,%f6
2979	ldd	[%l4+8],%f0
2980
2981	fmuld	%f10,%f14,%f14
2982	ldd	[%l5+8],%f8
2983
2984	fmuld	%f18,%f22,%f22
2985	ldd	[%l6+8],%f16
2986
2987	fmuld	%f26,%f28,%f28
2988
2989	fmuld	%f0,%f4,%f4
2990	faddd	%f32,%f6,%f6
2991
2992	fmuld	%f8,%f12,%f12
2993	faddd	%f34,%f14,%f14
2994
2995	fmuld	%f16,%f20,%f20
2996	faddd	%f36,%f22,%f22
2997
2998	fmuld	%f24,%f30,%f30
2999	faddd	%f28,%f38,%f28
3000	ldd	[%l7+16],%f24
3001
3002	faddd	%f2,%f6,%f6
3003	ldd	[%l4+16],%f32
3004
3005	faddd	%f10,%f14,%f14
3006	ldd	[%l5+16],%f34
3007
3008	faddd	%f18,%f22,%f22
3009	ldd	[%l6+16],%f36
3010
3011	fmuld	%f24,%f30,%f30
3012	faddd	%f28,%f26,%f28
3013	ldd	[%l7+8],%f38
3014
3015	fmuld	%f32,%f6,%f6
3016
3017	fmuld	%f34,%f14,%f14
3018
3019	fmuld	%f36,%f22,%f22
3020
3021	fmuld	%f38,%f28,%f28
3022
3023	faddd	%f6,%f4,%f6
3024
3025	faddd	%f14,%f12,%f14
3026
3027	faddd	%f22,%f20,%f22
3028
3029	fsubd	%f30,%f28,%f30
3030
3031	faddd	%f6,%f0,%f6
3032
3033	faddd	%f14,%f8,%f14
3034
3035	faddd	%f22,%f16,%f22
3036
3037	faddd	%f30,%f24,%f30
3038	mov	%l0,%l4
3039
3040	fnegd	%f6,%f4
3041	lda	[%i1]%asi,%l0		! preload next argument
3042
3043	fnegd	%f14,%f12
3044	lda	[%i1]%asi,%f0
3045
3046	fnegd	%f22,%f20
3047	lda	[%i1+4]%asi,%f3
3048
3049	fnegd	%f30,%f28
3050	andn	%l0,%i5,%l0
3051	add	%i1,%i2,%i1
3052
3053	andcc	%l4,2,%g0
3054	fmovdnz	%icc,%f4,%f6
3055	st	%f6,[%o0]
3056
3057	andcc	%l1,2,%g0
3058	fmovdnz	%icc,%f12,%f14
3059	st	%f14,[%o1]
3060
3061	andcc	%l2,2,%g0
3062	fmovdnz	%icc,%f20,%f22
3063	st	%f22,[%o2]
3064
3065	andcc	%l3,2,%g0
3066	fmovdnz	%icc,%f28,%f30
3067	st	%f30,[%o3]
3068
3069	addcc	%i0,-1,%i0
3070	bg,pt	%icc,.loop0
3071! delay slot
3072	st	%f7,[%o0+4]
3073
3074	ba,pt	%icc,.end
3075! delay slot
3076	nop
3077
3078	.align	16
3079.case15:
3080	fmuld	%f0,pp3,%f6		! sin(x0)
3081
3082	fmuld	%f8,pp3,%f14		! sin(x1)
3083
3084	fmuld	%f16,pp3,%f22		! sin(x2)
3085
3086	fmuld	%f24,pp3,%f30		! sin(x3)
3087
3088	faddd	%f6,pp2,%f6
3089	fmuld	%f0,qq2,%f4
3090
3091	faddd	%f14,pp2,%f14
3092	fmuld	%f8,qq2,%f12
3093
3094	faddd	%f22,pp2,%f22
3095	fmuld	%f16,qq2,%f20
3096
3097	faddd	%f30,pp2,%f30
3098	fmuld	%f24,qq2,%f28
3099
3100	fmuld	%f0,%f6,%f6
3101	faddd	%f4,qq1,%f4
3102
3103	fmuld	%f8,%f14,%f14
3104	faddd	%f12,qq1,%f12
3105
3106	fmuld	%f16,%f22,%f22
3107	faddd	%f20,qq1,%f20
3108
3109	fmuld	%f24,%f30,%f30
3110	faddd	%f28,qq1,%f28
3111
3112	faddd	%f6,pp1,%f6
3113	fmuld	%f0,%f4,%f4
3114	add	%l4,%g1,%l4
3115
3116	faddd	%f14,pp1,%f14
3117	fmuld	%f8,%f12,%f12
3118	add	%l5,%g1,%l5
3119
3120	faddd	%f22,pp1,%f22
3121	fmuld	%f16,%f20,%f20
3122	add	%l6,%g1,%l6
3123
3124	faddd	%f30,pp1,%f30
3125	fmuld	%f24,%f28,%f28
3126	add	%l7,%g1,%l7
3127
3128	fmuld	%f0,%f6,%f6
3129
3130	fmuld	%f8,%f14,%f14
3131
3132	fmuld	%f16,%f22,%f22
3133
3134	fmuld	%f24,%f30,%f30
3135
3136	fmuld	%f2,%f6,%f6
3137	ldd	[%l4+8],%f0
3138
3139	fmuld	%f10,%f14,%f14
3140	ldd	[%l5+8],%f8
3141
3142	fmuld	%f18,%f22,%f22
3143	ldd	[%l6+8],%f16
3144
3145	fmuld	%f26,%f30,%f30
3146	ldd	[%l7+8],%f24
3147
3148	fmuld	%f0,%f4,%f4
3149	faddd	%f32,%f6,%f6
3150
3151	fmuld	%f8,%f12,%f12
3152	faddd	%f34,%f14,%f14
3153
3154	fmuld	%f16,%f20,%f20
3155	faddd	%f36,%f22,%f22
3156
3157	fmuld	%f24,%f28,%f28
3158	faddd	%f38,%f30,%f30
3159
3160	faddd	%f2,%f6,%f6
3161	ldd	[%l4+16],%f32
3162
3163	faddd	%f10,%f14,%f14
3164	ldd	[%l5+16],%f34
3165
3166	faddd	%f18,%f22,%f22
3167	ldd	[%l6+16],%f36
3168
3169	faddd	%f26,%f30,%f30
3170	ldd	[%l7+16],%f38
3171
3172	fmuld	%f32,%f6,%f6
3173
3174	fmuld	%f34,%f14,%f14
3175
3176	fmuld	%f36,%f22,%f22
3177
3178	fmuld	%f38,%f30,%f30
3179
3180	faddd	%f6,%f4,%f6
3181
3182	faddd	%f14,%f12,%f14
3183
3184	faddd	%f22,%f20,%f22
3185
3186	faddd	%f30,%f28,%f30
3187
3188	faddd	%f6,%f0,%f6
3189
3190	faddd	%f14,%f8,%f14
3191
3192	faddd	%f22,%f16,%f22
3193
3194	faddd	%f30,%f24,%f30
3195	mov	%l0,%l4
3196
3197	fnegd	%f6,%f4
3198	lda	[%i1]%asi,%l0		! preload next argument
3199
3200	fnegd	%f14,%f12
3201	lda	[%i1]%asi,%f0
3202
3203	fnegd	%f22,%f20
3204	lda	[%i1+4]%asi,%f3
3205
3206	fnegd	%f30,%f28
3207	andn	%l0,%i5,%l0
3208	add	%i1,%i2,%i1
3209
3210	andcc	%l4,2,%g0
3211	fmovdnz	%icc,%f4,%f6
3212	st	%f6,[%o0]
3213
3214	andcc	%l1,2,%g0
3215	fmovdnz	%icc,%f12,%f14
3216	st	%f14,[%o1]
3217
3218	andcc	%l2,2,%g0
3219	fmovdnz	%icc,%f20,%f22
3220	st	%f22,[%o2]
3221
3222	andcc	%l3,2,%g0
3223	fmovdnz	%icc,%f28,%f30
3224	st	%f30,[%o3]
3225
3226	addcc	%i0,-1,%i0
3227	bg,pt	%icc,.loop0
3228! delay slot
3229	st	%f7,[%o0+4]
3230
3231	ba,pt	%icc,.end
3232! delay slot
3233	nop
3234
3235
3236	.align	16
3237.end:
3238	st	%f15,[%o1+4]
3239	st	%f23,[%o2+4]
3240	st	%f31,[%o3+4]
3241	ld	[%fp+biguns],%i5
3242	tst	%i5			! check for huge arguments remaining
3243	be,pt	%icc,.exit
3244! delay slot
3245	nop
3246#ifdef __sparcv9
3247	ldx	[%fp+xsave],%o1
3248	ldx	[%fp+ysave],%o3
3249#else
3250	ld	[%fp+xsave],%o1
3251	ld	[%fp+ysave],%o3
3252#endif
3253	ld	[%fp+nsave],%o0
3254	ld	[%fp+sxsave],%o2
3255	ld	[%fp+sysave],%o4
3256	sra	%o2,0,%o2		! sign-extend for V9
3257	sra	%o4,0,%o4
3258	call	__vlibm_vsin_big_ultra3
3259	sra	%o5,0,%o5		! delay slot
3260
3261.exit:
3262	ret
3263	restore
3264
3265
3266	.align	16
3267.last1:
3268	faddd	%f2,c3two44,%f4
3269	st	%f15,[%o1+4]
3270.last1_from_range1:
3271	mov	0,%l1
3272	fzeros	%f8
3273	fzero	%f10
3274	add	%fp,junk,%o1
3275.last2:
3276	faddd	%f10,c3two44,%f12
3277	st	%f23,[%o2+4]
3278.last2_from_range2:
3279	mov	0,%l2
3280	fzeros	%f16
3281	fzero	%f18
3282	add	%fp,junk,%o2
3283.last3:
3284	faddd	%f18,c3two44,%f20
3285	st	%f31,[%o3+4]
3286	st	%f5,[%fp+nk0]
3287	st	%f13,[%fp+nk1]
3288.last3_from_range3:
3289	mov	0,%l3
3290	fzeros	%f24
3291	fzero	%f26
3292	ba,pt	%icc,.cont
3293! delay slot
3294	add	%fp,junk,%o3
3295
3296
3297	.align	16
3298.range0:
3299	cmp	%l0,%o4
3300	bl,pt	%icc,1f			! hx < 0x3e400000
3301! delay slot, harmless if branch taken
3302	sethi	%hi(0x7ff00000),%o7
3303	cmp	%l0,%o7
3304	bl,a,pt	%icc,2f			! branch if finite
3305! delay slot, squashed if branch not taken
3306	st	%o4,[%fp+biguns]	! set biguns
3307	fzero	%f0
3308	fmuld	%f2,%f0,%f2
3309	st	%f2,[%o0]
3310	ba,pt	%icc,2f
3311! delay slot
3312	st	%f3,[%o0+4]
33131:
3314	fdtoi	%f2,%f4			! raise inexact if not zero
3315	st	%f0,[%o0]
3316	st	%f3,[%o0+4]
33172:
3318	addcc	%i0,-1,%i0
3319	ble,pn	%icc,.end
3320! delay slot, harmless if branch taken
3321	add	%i3,%i4,%i3		! y += stridey
3322	andn	%l1,%i5,%l0		! hx &= ~0x80000000
3323	fmovs	%f8,%f0
3324	fmovs	%f11,%f3
3325	ba,pt	%icc,.loop0
3326! delay slot
3327	add	%i1,%i2,%i1		! x += stridex
3328
3329
3330	.align	16
3331.range1:
3332	cmp	%l1,%o4
3333	bl,pt	%icc,1f			! hx < 0x3e400000
3334! delay slot, harmless if branch taken
3335	sethi	%hi(0x7ff00000),%o7
3336	cmp	%l1,%o7
3337	bl,a,pt	%icc,2f			! branch if finite
3338! delay slot, squashed if branch not taken
3339	st	%o4,[%fp+biguns]	! set biguns
3340	fzero	%f8
3341	fmuld	%f10,%f8,%f10
3342	st	%f10,[%o1]
3343	ba,pt	%icc,2f
3344! delay slot
3345	st	%f11,[%o1+4]
33461:
3347	fdtoi	%f10,%f12		! raise inexact if not zero
3348	st	%f8,[%o1]
3349	st	%f11,[%o1+4]
33502:
3351	addcc	%i0,-1,%i0
3352	ble,pn	%icc,.last1_from_range1
3353! delay slot, harmless if branch taken
3354	add	%i3,%i4,%i3		! y += stridey
3355	andn	%l2,%i5,%l1		! hx &= ~0x80000000
3356	fmovs	%f16,%f8
3357	fmovs	%f19,%f11
3358	ba,pt	%icc,.loop1
3359! delay slot
3360	add	%i1,%i2,%i1		! x += stridex
3361
3362
3363	.align	16
3364.range2:
3365	cmp	%l2,%o4
3366	bl,pt	%icc,1f			! hx < 0x3e400000
3367! delay slot, harmless if branch taken
3368	sethi	%hi(0x7ff00000),%o7
3369	cmp	%l2,%o7
3370	bl,a,pt	%icc,2f			! branch if finite
3371! delay slot, squashed if branch not taken
3372	st	%o4,[%fp+biguns]	! set biguns
3373	fzero	%f16
3374	fmuld	%f18,%f16,%f18
3375	st	%f18,[%o2]
3376	ba,pt	%icc,2f
3377! delay slot
3378	st	%f19,[%o2+4]
33791:
3380	fdtoi	%f18,%f20		! raise inexact if not zero
3381	st	%f16,[%o2]
3382	st	%f19,[%o2+4]
33832:
3384	addcc	%i0,-1,%i0
3385	ble,pn	%icc,.last2_from_range2
3386! delay slot, harmless if branch taken
3387	add	%i3,%i4,%i3		! y += stridey
3388	andn	%l3,%i5,%l2		! hx &= ~0x80000000
3389	fmovs	%f24,%f16
3390	fmovs	%f27,%f19
3391	ba,pt	%icc,.loop2
3392! delay slot
3393	add	%i1,%i2,%i1		! x += stridex
3394
3395
3396	.align	16
3397.range3:
3398	cmp	%l3,%o4
3399	bl,pt	%icc,1f			! hx < 0x3e400000
3400! delay slot, harmless if branch taken
3401	sethi	%hi(0x7ff00000),%o7
3402	cmp	%l3,%o7
3403	bl,a,pt	%icc,2f			! branch if finite
3404! delay slot, squashed if branch not taken
3405	st	%o4,[%fp+biguns]	! set biguns
3406	fzero	%f24
3407	fmuld	%f26,%f24,%f26
3408	st	%f26,[%o3]
3409	ba,pt	%icc,2f
3410! delay slot
3411	st	%f27,[%o3+4]
34121:
3413	fdtoi	%f26,%f28		! raise inexact if not zero
3414	st	%f24,[%o3]
3415	st	%f27,[%o3+4]
34162:
3417	addcc	%i0,-1,%i0
3418	ble,pn	%icc,.last3_from_range3
3419! delay slot, harmless if branch taken
3420	add	%i3,%i4,%i3		! y += stridey
3421	ld	[%i1],%l3
3422	ld	[%i1],%f24
3423	ld	[%i1+4],%f27
3424	andn	%l3,%i5,%l3		! hx &= ~0x80000000
3425	ba,pt	%icc,.loop3
3426! delay slot
3427	add	%i1,%i2,%i1		! x += stridex
3428
3429	SET_SIZE(__vsin_ultra3)
3430
3431