xref: /illumos-gate/usr/src/lib/libmvec/common/vis/__vsin_ultra3.S (revision cffcfaee1e6b29ef9ceb7d80e4e053ffd029906b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vsin_ultra3.S"
30
31#include "libm.h"
32#if defined(LIBMVEC_SO_BUILD)
33	.weak	__vsin
34	.type	__vsin,#function
35	__vsin = __vsin_ultra3
36#endif
37
38	RO_DATA
39	.align	64
40constants:
41	.word	0x42c80000,0x00000000	! 3 * 2^44
42	.word	0x43380000,0x00000000	! 3 * 2^51
43	.word	0x3fe45f30,0x6dc9c883	! invpio2
44	.word	0x3ff921fb,0x54442c00	! pio2_1
45	.word	0x3d318469,0x898cc400	! pio2_2
46	.word	0x3a71701b,0x839a2520	! pio2_3
47	.word	0xbfc55555,0x55555533	! pp1
48	.word	0x3f811111,0x10e7d53b	! pp2
49	.word	0xbf2a0167,0xe6b3cf9b	! pp3
50	.word	0xbfdfffff,0xffffff65	! qq1
51	.word	0x3fa55555,0x54f88ed0	! qq2
52	.word	0xbf56c12c,0xdd185f60	! qq3
53
54! local storage indices
55
56#define xsave		STACK_BIAS-0x8
57#define ysave		STACK_BIAS-0x10
58#define nsave		STACK_BIAS-0x14
59#define sxsave		STACK_BIAS-0x18
60#define sysave		STACK_BIAS-0x1c
61#define biguns		STACK_BIAS-0x20
62#define nk3		STACK_BIAS-0x24
63#define nk2		STACK_BIAS-0x28
64#define nk1		STACK_BIAS-0x2c
65#define nk0		STACK_BIAS-0x30
66#define junk		STACK_BIAS-0x38
67! sizeof temp storage - must be a multiple of 16 for V9
68#define tmps		0x40
69
70! register use
71
72! i0  n
73! i1  x
74! i2  stridex
75! i3  y
76! i4  stridey
77! i5  0x80000000
78
79! l0  hx0
80! l1  hx1
81! l2  hx2
82! l3  hx3
83! l4  k0
84! l5  k1
85! l6  k2
86! l7  k3
87
88! the following are 64-bit registers in both V8+ and V9
89
90! g1  __vlibm_TBL_sincos2
91! g5  scratch
92
93! o0  py0
94! o1  py1
95! o2  py2
96! o3  py3
97! o4  0x3e400000
98! o5  0x3fe921fb,0x4099251e
99! o7  scratch
100
101! f0  hx0
102! f2
103! f4
104! f6
105! f8  hx1
106! f10
107! f12
108! f14
109! f16 hx2
110! f18
111! f20
112! f22
113! f24 hx3
114! f26
115! f28
116! f30
117! f32
118! f34
119! f36
120! f38
121
122#define c3two44	%f40
123#define c3two51	%f42
124#define invpio2	%f44
125#define pio2_1	%f46
126#define pio2_2	%f48
127#define pio2_3	%f50
128#define pp1	%f52
129#define pp2	%f54
130#define pp3	%f56
131#define qq1	%f58
132#define qq2	%f60
133#define qq3	%f62
134
135	ENTRY(__vsin_ultra3)
136	save	%sp,-SA(MINFRAME)-tmps,%sp
137	PIC_SETUP(l7)
138	PIC_SET(l7,constants,o0)
139	PIC_SET(l7,__vlibm_TBL_sincos2,o1)
140	mov	%o1,%g1
141	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
142#ifdef __sparcv9
143	stx	%i1,[%fp+xsave]		! save arguments
144	stx	%i3,[%fp+ysave]
145#else
146	st	%i1,[%fp+xsave]		! save arguments
147	st	%i3,[%fp+ysave]
148#endif
149	st	%i0,[%fp+nsave]
150	st	%i2,[%fp+sxsave]
151	st	%i4,[%fp+sysave]
152	st	%g0,[%fp+biguns]	! biguns = 0
153	ldd	[%o0+0x00],c3two44	! load/set up constants
154	ldd	[%o0+0x08],c3two51
155	ldd	[%o0+0x10],invpio2
156	ldd	[%o0+0x18],pio2_1
157	ldd	[%o0+0x20],pio2_2
158	ldd	[%o0+0x28],pio2_3
159	ldd	[%o0+0x30],pp1
160	ldd	[%o0+0x38],pp2
161	ldd	[%o0+0x40],pp3
162	ldd	[%o0+0x48],qq1
163	ldd	[%o0+0x50],qq2
164	ldd	[%o0+0x58],qq3
165	sethi	%hi(0x80000000),%i5
166	sethi	%hi(0x3e400000),%o4
167	sethi	%hi(0x3fe921fb),%o5
168	or	%o5,%lo(0x3fe921fb),%o5
169	sllx	%o5,32,%o5
170	sethi	%hi(0x4099251e),%o7
171	or	%o7,%lo(0x4099251e),%o7
172	or	%o5,%o7,%o5
173	sll	%i2,3,%i2		! scale strides
174	sll	%i4,3,%i4
175	add	%fp,junk,%o1		! loop prologue
176	add	%fp,junk,%o2
177	add	%fp,junk,%o3
178	ld	[%i1],%l0		! *x
179	ld	[%i1],%f0
180	ld	[%i1+4],%f3
181	andn	%l0,%i5,%l0		! mask off sign
182	ba	.loop0
183	add	%i1,%i2,%i1		! x += stridex
184
185! 16-byte aligned
186	.align	16
187.loop0:
188	lda	[%i1]%asi,%l1		! preload next argument
189	sub	%l0,%o4,%g5
190	sub	%o5,%l0,%o7
191	fabss	%f0,%f2
192
193	lda	[%i1]%asi,%f8
194	orcc	%o7,%g5,%g0
195	mov	%i3,%o0			! py0 = y
196	bl,pn	%icc,.range0		! hx < 0x3e400000 or hx > 0x4099251e
197
198! delay slot
199	lda	[%i1+4]%asi,%f11
200	addcc	%i0,-1,%i0
201	add	%i3,%i4,%i3		! y += stridey
202	ble,pn	%icc,.last1
203
204! delay slot
205	andn	%l1,%i5,%l1
206	add	%i1,%i2,%i1		! x += stridex
207	faddd	%f2,c3two44,%f4
208	st	%f15,[%o1+4]
209
210.loop1:
211	lda	[%i1]%asi,%l2		! preload next argument
212	sub	%l1,%o4,%g5
213	sub	%o5,%l1,%o7
214	fabss	%f8,%f10
215
216	lda	[%i1]%asi,%f16
217	orcc	%o7,%g5,%g0
218	mov	%i3,%o1			! py1 = y
219	bl,pn	%icc,.range1		! hx < 0x3e400000 or hx > 0x4099251e
220
221! delay slot
222	lda	[%i1+4]%asi,%f19
223	addcc	%i0,-1,%i0
224	add	%i3,%i4,%i3		! y += stridey
225	ble,pn	%icc,.last2
226
227! delay slot
228	andn	%l2,%i5,%l2
229	add	%i1,%i2,%i1		! x += stridex
230	faddd	%f10,c3two44,%f12
231	st	%f23,[%o2+4]
232
233.loop2:
234	lda	[%i1]%asi,%l3		! preload next argument
235	sub	%l2,%o4,%g5
236	sub	%o5,%l2,%o7
237	fabss	%f16,%f18
238
239	lda	[%i1]%asi,%f24
240	orcc	%o7,%g5,%g0
241	mov	%i3,%o2			! py2 = y
242	bl,pn	%icc,.range2		! hx < 0x3e400000 or hx > 0x4099251e
243
244! delay slot
245	lda	[%i1+4]%asi,%f27
246	addcc	%i0,-1,%i0
247	add	%i3,%i4,%i3		! y += stridey
248	ble,pn	%icc,.last3
249
250! delay slot
251	andn	%l3,%i5,%l3
252	add	%i1,%i2,%i1		! x += stridex
253	faddd	%f18,c3two44,%f20
254	st	%f31,[%o3+4]
255
256.loop3:
257	sub	%l3,%o4,%g5
258	sub	%o5,%l3,%o7
259	fabss	%f24,%f26
260	st	%f5,[%fp+nk0]
261
262	orcc	%o7,%g5,%g0
263	mov	%i3,%o3			! py3 = y
264	bl,pn	%icc,.range3		! hx < 0x3e400000 or > hx 0x4099251e
265! delay slot
266	st	%f13,[%fp+nk1]
267
268!!! DONE?
269.cont:
270	srlx	%o5,32,%o7
271	add	%i3,%i4,%i3		! y += stridey
272	fmovs	%f3,%f1
273	st	%f21,[%fp+nk2]
274
275	sub	%o7,%l0,%l0
276	sub	%o7,%l1,%l1
277	faddd	%f26,c3two44,%f28
278	st	%f29,[%fp+nk3]
279
280	sub	%o7,%l2,%l2
281	sub	%o7,%l3,%l3
282	fmovs	%f11,%f9
283
284	or	%l0,%l1,%l0
285	or	%l2,%l3,%l2
286	fmovs	%f19,%f17
287
288	fmovs	%f27,%f25
289	fmuld	%f0,invpio2,%f6		! x * invpio2, for medium range
290
291	fmuld	%f8,invpio2,%f14
292	ld	[%fp+nk0],%l4
293
294	fmuld	%f16,invpio2,%f22
295	ld	[%fp+nk1],%l5
296
297	orcc	%l0,%l2,%g0
298	bl,pn	%icc,.medium
299! delay slot
300	fmuld	%f24,invpio2,%f30
301	ld	[%fp+nk2],%l6
302
303	ld	[%fp+nk3],%l7
304	sll	%l4,5,%l4		! k
305	fcmpd	%fcc0,%f0,pio2_3	! x < pio2_3 iff x < 0
306
307	sll	%l5,5,%l5
308	ldd	[%l4+%g1],%f4
309	fcmpd	%fcc1,%f8,pio2_3
310
311	sll	%l6,5,%l6
312	ldd	[%l5+%g1],%f12
313	fcmpd	%fcc2,%f16,pio2_3
314
315	sll	%l7,5,%l7
316	ldd	[%l6+%g1],%f20
317	fcmpd	%fcc3,%f24,pio2_3
318
319	ldd	[%l7+%g1],%f28
320	fsubd	%f2,%f4,%f2		! x -= __vlibm_TBL_sincos2[k]
321
322	fsubd	%f10,%f12,%f10
323
324	fsubd	%f18,%f20,%f18
325
326	fsubd	%f26,%f28,%f26
327
328	fmuld	%f2,%f2,%f0		! z = x * x
329
330	fmuld	%f10,%f10,%f8
331
332	fmuld	%f18,%f18,%f16
333
334	fmuld	%f26,%f26,%f24
335
336	fmuld	%f0,pp3,%f6
337
338	fmuld	%f8,pp3,%f14
339
340	fmuld	%f16,pp3,%f22
341
342	fmuld	%f24,pp3,%f30
343
344	faddd	%f6,pp2,%f6
345	fmuld	%f0,qq2,%f4
346
347	faddd	%f14,pp2,%f14
348	fmuld	%f8,qq2,%f12
349
350	faddd	%f22,pp2,%f22
351	fmuld	%f16,qq2,%f20
352
353	faddd	%f30,pp2,%f30
354	fmuld	%f24,qq2,%f28
355
356	fmuld	%f0,%f6,%f6
357	faddd	%f4,qq1,%f4
358
359	fmuld	%f8,%f14,%f14
360	faddd	%f12,qq1,%f12
361
362	fmuld	%f16,%f22,%f22
363	faddd	%f20,qq1,%f20
364
365	fmuld	%f24,%f30,%f30
366	faddd	%f28,qq1,%f28
367
368	faddd	%f6,pp1,%f6
369	fmuld	%f0,%f4,%f4
370	add	%l4,%g1,%l4
371
372	faddd	%f14,pp1,%f14
373	fmuld	%f8,%f12,%f12
374	add	%l5,%g1,%l5
375
376	faddd	%f22,pp1,%f22
377	fmuld	%f16,%f20,%f20
378	add	%l6,%g1,%l6
379
380	faddd	%f30,pp1,%f30
381	fmuld	%f24,%f28,%f28
382	add	%l7,%g1,%l7
383
384	fmuld	%f0,%f6,%f6
385	ldd	[%l4+8],%f0
386
387	fmuld	%f8,%f14,%f14
388	ldd	[%l5+8],%f8
389
390	fmuld	%f16,%f22,%f22
391	ldd	[%l6+8],%f16
392
393	fmuld	%f24,%f30,%f30
394	ldd	[%l7+8],%f24
395
396	fmuld	%f2,%f6,%f6
397
398	fmuld	%f10,%f14,%f14
399
400	fmuld	%f18,%f22,%f22
401
402	fmuld	%f26,%f30,%f30
403
404	faddd	%f6,%f2,%f6
405	fmuld	%f0,%f4,%f4
406	ldd	[%l4+16],%f2
407
408	faddd	%f14,%f10,%f14
409	fmuld	%f8,%f12,%f12
410	ldd	[%l5+16],%f10
411
412	faddd	%f22,%f18,%f22
413	fmuld	%f16,%f20,%f20
414	ldd	[%l6+16],%f18
415
416	faddd	%f30,%f26,%f30
417	fmuld	%f24,%f28,%f28
418	ldd	[%l7+16],%f26
419
420	fmuld	%f2,%f6,%f6
421
422	fmuld	%f10,%f14,%f14
423
424	fmuld	%f18,%f22,%f22
425
426	fmuld	%f26,%f30,%f30
427
428	faddd	%f6,%f4,%f6
429
430	faddd	%f14,%f12,%f14
431
432	faddd	%f22,%f20,%f22
433
434	faddd	%f30,%f28,%f30
435
436	faddd	%f6,%f0,%f6
437
438	faddd	%f14,%f8,%f14
439
440	faddd	%f22,%f16,%f22
441
442	faddd	%f30,%f24,%f30
443
444	fnegd	%f6,%f4
445	lda	[%i1]%asi,%l0		! preload next argument
446
447	fnegd	%f14,%f12
448	lda	[%i1]%asi,%f0
449
450	fnegd	%f22,%f20
451	lda	[%i1+4]%asi,%f3
452
453	fnegd	%f30,%f28
454	andn	%l0,%i5,%l0
455	add	%i1,%i2,%i1
456
457	fmovdl	%fcc0,%f4,%f6		! (hx < -0)? -s : s
458	st	%f6,[%o0]
459
460	fmovdl	%fcc1,%f12,%f14
461	st	%f14,[%o1]
462
463	fmovdl	%fcc2,%f20,%f22
464	st	%f22,[%o2]
465
466	fmovdl	%fcc3,%f28,%f30
467	st	%f30,[%o3]
468	addcc	%i0,-1,%i0
469
470	bg,pt	%icc,.loop0
471! delay slot
472	st	%f7,[%o0+4]
473
474	ba,pt	%icc,.end
475! delay slot
476	nop
477
478
479	.align	16
480.medium:
481	faddd	%f6,c3two51,%f4
482	st	%f5,[%fp+nk0]
483
484	faddd	%f14,c3two51,%f12
485	st	%f13,[%fp+nk1]
486
487	faddd	%f22,c3two51,%f20
488	st	%f21,[%fp+nk2]
489
490	faddd	%f30,c3two51,%f28
491	st	%f29,[%fp+nk3]
492
493	fsubd	%f4,c3two51,%f6
494
495	fsubd	%f12,c3two51,%f14
496
497	fsubd	%f20,c3two51,%f22
498
499	fsubd	%f28,c3two51,%f30
500
501	fmuld	%f6,pio2_1,%f2
502	ld	[%fp+nk0],%l0		! n
503
504	fmuld	%f14,pio2_1,%f10
505	ld	[%fp+nk1],%l1
506
507	fmuld	%f22,pio2_1,%f18
508	ld	[%fp+nk2],%l2
509
510	fmuld	%f30,pio2_1,%f26
511	ld	[%fp+nk3],%l3
512
513	fsubd	%f0,%f2,%f0
514	fmuld	%f6,pio2_2,%f4
515
516	fsubd	%f8,%f10,%f8
517	fmuld	%f14,pio2_2,%f12
518
519	fsubd	%f16,%f18,%f16
520	fmuld	%f22,pio2_2,%f20
521
522	fsubd	%f24,%f26,%f24
523	fmuld	%f30,pio2_2,%f28
524
525	fsubd	%f0,%f4,%f32
526
527	fsubd	%f8,%f12,%f34
528
529	fsubd	%f16,%f20,%f36
530
531	fsubd	%f24,%f28,%f38
532
533	fsubd	%f0,%f32,%f0
534	fcmple32 %f32,pio2_3,%l4	! x <= pio2_3 iff x < 0
535
536	fsubd	%f8,%f34,%f8
537	fcmple32 %f34,pio2_3,%l5
538
539	fsubd	%f16,%f36,%f16
540	fcmple32 %f36,pio2_3,%l6
541
542	fsubd	%f24,%f38,%f24
543	fcmple32 %f38,pio2_3,%l7
544
545	fsubd	%f0,%f4,%f0
546	fmuld	%f6,pio2_3,%f6
547	sll	%l4,30,%l4		! if (x < 0) n = -n ^ 2
548
549	fsubd	%f8,%f12,%f8
550	fmuld	%f14,pio2_3,%f14
551	sll	%l5,30,%l5
552
553	fsubd	%f16,%f20,%f16
554	fmuld	%f22,pio2_3,%f22
555	sll	%l6,30,%l6
556
557	fsubd	%f24,%f28,%f24
558	fmuld	%f30,pio2_3,%f30
559	sll	%l7,30,%l7
560
561	fsubd	%f6,%f0,%f6
562	sra	%l4,31,%l4
563
564	fsubd	%f14,%f8,%f14
565	sra	%l5,31,%l5
566
567	fsubd	%f22,%f16,%f22
568	sra	%l6,31,%l6
569
570	fsubd	%f30,%f24,%f30
571	sra	%l7,31,%l7
572
573	fsubd	%f32,%f6,%f0		! reduced x
574	xor	%l0,%l4,%l0
575
576	fsubd	%f34,%f14,%f8
577	xor	%l1,%l5,%l1
578
579	fsubd	%f36,%f22,%f16
580	xor	%l2,%l6,%l2
581
582	fsubd	%f38,%f30,%f24
583	xor	%l3,%l7,%l3
584
585	fabsd	%f0,%f2
586	sub	%l0,%l4,%l0
587
588	fabsd	%f8,%f10
589	sub	%l1,%l5,%l1
590
591	fabsd	%f16,%f18
592	sub	%l2,%l6,%l2
593
594	fabsd	%f24,%f26
595	sub	%l3,%l7,%l3
596
597	faddd	%f2,c3two44,%f4
598	st	%f5,[%fp+nk0]
599	and	%l4,2,%l4
600
601	faddd	%f10,c3two44,%f12
602	st	%f13,[%fp+nk1]
603	and	%l5,2,%l5
604
605	faddd	%f18,c3two44,%f20
606	st	%f21,[%fp+nk2]
607	and	%l6,2,%l6
608
609	faddd	%f26,c3two44,%f28
610	st	%f29,[%fp+nk3]
611	and	%l7,2,%l7
612
613	fsubd	%f32,%f0,%f4
614	xor	%l0,%l4,%l0
615
616	fsubd	%f34,%f8,%f12
617	xor	%l1,%l5,%l1
618
619	fsubd	%f36,%f16,%f20
620	xor	%l2,%l6,%l2
621
622	fsubd	%f38,%f24,%f28
623	xor	%l3,%l7,%l3
624
625	fzero	%f38
626	ld	[%fp+nk0],%l4
627
628	fsubd	%f4,%f6,%f6		! w
629	ld	[%fp+nk1],%l5
630
631	fsubd	%f12,%f14,%f14
632	ld	[%fp+nk2],%l6
633
634	fnegd	%f38,%f38
635	ld	[%fp+nk3],%l7
636	sll	%l4,5,%l4		! k
637
638	fsubd	%f20,%f22,%f22
639	sll	%l5,5,%l5
640
641	fsubd	%f28,%f30,%f30
642	sll	%l6,5,%l6
643
644	fand	%f0,%f38,%f32		! sign bit of x
645	ldd	[%l4+%g1],%f4
646	sll	%l7,5,%l7
647
648	fand	%f8,%f38,%f34
649	ldd	[%l5+%g1],%f12
650
651	fand	%f16,%f38,%f36
652	ldd	[%l6+%g1],%f20
653
654	fand	%f24,%f38,%f38
655	ldd	[%l7+%g1],%f28
656
657	fsubd	%f2,%f4,%f2		! x -= __vlibm_TBL_sincos2[k]
658
659	fsubd	%f10,%f12,%f10
660
661	fsubd	%f18,%f20,%f18
662	nop
663
664	fsubd	%f26,%f28,%f26
665	nop
666
667! 16-byte aligned
668	fmuld	%f2,%f2,%f0		! z = x * x
669	andcc	%l0,1,%g0
670	bz,pn	%icc,.case8
671! delay slot
672	fxor	%f6,%f32,%f32
673
674	fmuld	%f10,%f10,%f8
675	andcc	%l1,1,%g0
676	bz,pn	%icc,.case4
677! delay slot
678	fxor	%f14,%f34,%f34
679
680	fmuld	%f18,%f18,%f16
681	andcc	%l2,1,%g0
682	bz,pn	%icc,.case2
683! delay slot
684	fxor	%f22,%f36,%f36
685
686	fmuld	%f26,%f26,%f24
687	andcc	%l3,1,%g0
688	bz,pn	%icc,.case1
689! delay slot
690	fxor	%f30,%f38,%f38
691
692!.case0:
693	fmuld	%f0,qq3,%f6		! cos(x0)
694
695	fmuld	%f8,qq3,%f14		! cos(x1)
696
697	fmuld	%f16,qq3,%f22		! cos(x2)
698
699	fmuld	%f24,qq3,%f30		! cos(x3)
700
701	faddd	%f6,qq2,%f6
702	fmuld	%f0,pp2,%f4
703
704	faddd	%f14,qq2,%f14
705	fmuld	%f8,pp2,%f12
706
707	faddd	%f22,qq2,%f22
708	fmuld	%f16,pp2,%f20
709
710	faddd	%f30,qq2,%f30
711	fmuld	%f24,pp2,%f28
712
713	fmuld	%f0,%f6,%f6
714	faddd	%f4,pp1,%f4
715
716	fmuld	%f8,%f14,%f14
717	faddd	%f12,pp1,%f12
718
719	fmuld	%f16,%f22,%f22
720	faddd	%f20,pp1,%f20
721
722	fmuld	%f24,%f30,%f30
723	faddd	%f28,pp1,%f28
724
725	faddd	%f6,qq1,%f6
726	fmuld	%f0,%f4,%f4
727	add	%l4,%g1,%l4
728
729	faddd	%f14,qq1,%f14
730	fmuld	%f8,%f12,%f12
731	add	%l5,%g1,%l5
732
733	faddd	%f22,qq1,%f22
734	fmuld	%f16,%f20,%f20
735	add	%l6,%g1,%l6
736
737	faddd	%f30,qq1,%f30
738	fmuld	%f24,%f28,%f28
739	add	%l7,%g1,%l7
740
741	fmuld	%f2,%f4,%f4
742
743	fmuld	%f10,%f12,%f12
744
745	fmuld	%f18,%f20,%f20
746
747	fmuld	%f26,%f28,%f28
748
749	fmuld	%f0,%f6,%f6
750	faddd	%f4,%f32,%f4
751	ldd	[%l4+16],%f0
752
753	fmuld	%f8,%f14,%f14
754	faddd	%f12,%f34,%f12
755	ldd	[%l5+16],%f8
756
757	fmuld	%f16,%f22,%f22
758	faddd	%f20,%f36,%f20
759	ldd	[%l6+16],%f16
760
761	fmuld	%f24,%f30,%f30
762	faddd	%f28,%f38,%f28
763	ldd	[%l7+16],%f24
764
765	fmuld	%f0,%f6,%f6
766	faddd	%f4,%f2,%f4
767	ldd	[%l4+8],%f32
768
769	fmuld	%f8,%f14,%f14
770	faddd	%f12,%f10,%f12
771	ldd	[%l5+8],%f34
772
773	fmuld	%f16,%f22,%f22
774	faddd	%f20,%f18,%f20
775	ldd	[%l6+8],%f36
776
777	fmuld	%f24,%f30,%f30
778	faddd	%f28,%f26,%f28
779	ldd	[%l7+8],%f38
780
781	fmuld	%f32,%f4,%f4
782
783	fmuld	%f34,%f12,%f12
784
785	fmuld	%f36,%f20,%f20
786
787	fmuld	%f38,%f28,%f28
788
789	fsubd	%f6,%f4,%f6
790
791	fsubd	%f14,%f12,%f14
792
793	fsubd	%f22,%f20,%f22
794
795	fsubd	%f30,%f28,%f30
796
797	faddd	%f6,%f0,%f6
798
799	faddd	%f14,%f8,%f14
800
801	faddd	%f22,%f16,%f22
802
803	faddd	%f30,%f24,%f30
804	mov	%l0,%l4
805
806	fnegd	%f6,%f4
807	lda	[%i1]%asi,%l0		! preload next argument
808
809	fnegd	%f14,%f12
810	lda	[%i1]%asi,%f0
811
812	fnegd	%f22,%f20
813	lda	[%i1+4]%asi,%f3
814
815	fnegd	%f30,%f28
816	andn	%l0,%i5,%l0
817	add	%i1,%i2,%i1
818
819	andcc	%l4,2,%g0
820	fmovdnz	%icc,%f4,%f6
821	st	%f6,[%o0]
822
823	andcc	%l1,2,%g0
824	fmovdnz	%icc,%f12,%f14
825	st	%f14,[%o1]
826
827	andcc	%l2,2,%g0
828	fmovdnz	%icc,%f20,%f22
829	st	%f22,[%o2]
830
831	andcc	%l3,2,%g0
832	fmovdnz	%icc,%f28,%f30
833	st	%f30,[%o3]
834
835	addcc	%i0,-1,%i0
836	bg,pt	%icc,.loop0
837! delay slot
838	st	%f7,[%o0+4]
839
840	ba,pt	%icc,.end
841! delay slot
842	nop
843
844	.align	16
845.case1:
846	fmuld	%f24,pp3,%f30		! sin(x3)
847
848	fmuld	%f0,qq3,%f6		! cos(x0)
849
850	fmuld	%f8,qq3,%f14		! cos(x1)
851
852	fmuld	%f16,qq3,%f22		! cos(x2)
853
854	faddd	%f30,pp2,%f30
855	fmuld	%f24,qq2,%f28
856
857	faddd	%f6,qq2,%f6
858	fmuld	%f0,pp2,%f4
859
860	faddd	%f14,qq2,%f14
861	fmuld	%f8,pp2,%f12
862
863	faddd	%f22,qq2,%f22
864	fmuld	%f16,pp2,%f20
865
866	fmuld	%f24,%f30,%f30
867	faddd	%f28,qq1,%f28
868
869	fmuld	%f0,%f6,%f6
870	faddd	%f4,pp1,%f4
871
872	fmuld	%f8,%f14,%f14
873	faddd	%f12,pp1,%f12
874
875	fmuld	%f16,%f22,%f22
876	faddd	%f20,pp1,%f20
877
878	faddd	%f30,pp1,%f30
879	fmuld	%f24,%f28,%f28
880	add	%l7,%g1,%l7
881
882	faddd	%f6,qq1,%f6
883	fmuld	%f0,%f4,%f4
884	add	%l4,%g1,%l4
885
886	faddd	%f14,qq1,%f14
887	fmuld	%f8,%f12,%f12
888	add	%l5,%g1,%l5
889
890	faddd	%f22,qq1,%f22
891	fmuld	%f16,%f20,%f20
892	add	%l6,%g1,%l6
893
894	fmuld	%f24,%f30,%f30
895
896	fmuld	%f2,%f4,%f4
897
898	fmuld	%f10,%f12,%f12
899
900	fmuld	%f18,%f20,%f20
901
902	fmuld	%f26,%f30,%f30
903	ldd	[%l7+8],%f24
904
905	fmuld	%f0,%f6,%f6
906	faddd	%f4,%f32,%f4
907	ldd	[%l4+16],%f0
908
909	fmuld	%f8,%f14,%f14
910	faddd	%f12,%f34,%f12
911	ldd	[%l5+16],%f8
912
913	fmuld	%f16,%f22,%f22
914	faddd	%f20,%f36,%f20
915	ldd	[%l6+16],%f16
916
917	fmuld	%f24,%f28,%f28
918	faddd	%f38,%f30,%f30
919
920	fmuld	%f0,%f6,%f6
921	faddd	%f4,%f2,%f4
922	ldd	[%l4+8],%f32
923
924	fmuld	%f8,%f14,%f14
925	faddd	%f12,%f10,%f12
926	ldd	[%l5+8],%f34
927
928	fmuld	%f16,%f22,%f22
929	faddd	%f20,%f18,%f20
930	ldd	[%l6+8],%f36
931
932	faddd	%f26,%f30,%f30
933	ldd	[%l7+16],%f38
934
935	fmuld	%f32,%f4,%f4
936
937	fmuld	%f34,%f12,%f12
938
939	fmuld	%f36,%f20,%f20
940
941	fmuld	%f38,%f30,%f30
942
943	fsubd	%f6,%f4,%f6
944
945	fsubd	%f14,%f12,%f14
946
947	fsubd	%f22,%f20,%f22
948
949	faddd	%f30,%f28,%f30
950
951	faddd	%f6,%f0,%f6
952
953	faddd	%f14,%f8,%f14
954
955	faddd	%f22,%f16,%f22
956
957	faddd	%f30,%f24,%f30
958	mov	%l0,%l4
959
960	fnegd	%f6,%f4
961	lda	[%i1]%asi,%l0		! preload next argument
962
963	fnegd	%f14,%f12
964	lda	[%i1]%asi,%f0
965
966	fnegd	%f22,%f20
967	lda	[%i1+4]%asi,%f3
968
969	fnegd	%f30,%f28
970	andn	%l0,%i5,%l0
971	add	%i1,%i2,%i1
972
973	andcc	%l4,2,%g0
974	fmovdnz	%icc,%f4,%f6
975	st	%f6,[%o0]
976
977	andcc	%l1,2,%g0
978	fmovdnz	%icc,%f12,%f14
979	st	%f14,[%o1]
980
981	andcc	%l2,2,%g0
982	fmovdnz	%icc,%f20,%f22
983	st	%f22,[%o2]
984
985	andcc	%l3,2,%g0
986	fmovdnz	%icc,%f28,%f30
987	st	%f30,[%o3]
988
989	addcc	%i0,-1,%i0
990	bg,pt	%icc,.loop0
991! delay slot
992	st	%f7,[%o0+4]
993
994	ba,pt	%icc,.end
995! delay slot
996	nop
997
998	.align	16
999.case2:
1000	fmuld	%f26,%f26,%f24
1001	andcc	%l3,1,%g0
1002	bz,pn	%icc,.case3
1003! delay slot
1004	fxor	%f30,%f38,%f38
1005
1006	fmuld	%f16,pp3,%f22		! sin(x2)
1007
1008	fmuld	%f0,qq3,%f6		! cos(x0)
1009
1010	fmuld	%f8,qq3,%f14		! cos(x1)
1011
1012	faddd	%f22,pp2,%f22
1013	fmuld	%f16,qq2,%f20
1014
1015	fmuld	%f24,qq3,%f30		! cos(x3)
1016
1017	faddd	%f6,qq2,%f6
1018	fmuld	%f0,pp2,%f4
1019
1020	faddd	%f14,qq2,%f14
1021	fmuld	%f8,pp2,%f12
1022
1023	fmuld	%f16,%f22,%f22
1024	faddd	%f20,qq1,%f20
1025
1026	faddd	%f30,qq2,%f30
1027	fmuld	%f24,pp2,%f28
1028
1029	fmuld	%f0,%f6,%f6
1030	faddd	%f4,pp1,%f4
1031
1032	fmuld	%f8,%f14,%f14
1033	faddd	%f12,pp1,%f12
1034
1035	faddd	%f22,pp1,%f22
1036	fmuld	%f16,%f20,%f20
1037	add	%l6,%g1,%l6
1038
1039	fmuld	%f24,%f30,%f30
1040	faddd	%f28,pp1,%f28
1041
1042	faddd	%f6,qq1,%f6
1043	fmuld	%f0,%f4,%f4
1044	add	%l4,%g1,%l4
1045
1046	faddd	%f14,qq1,%f14
1047	fmuld	%f8,%f12,%f12
1048	add	%l5,%g1,%l5
1049
1050	fmuld	%f16,%f22,%f22
1051
1052	faddd	%f30,qq1,%f30
1053	fmuld	%f24,%f28,%f28
1054	add	%l7,%g1,%l7
1055
1056	fmuld	%f2,%f4,%f4
1057
1058	fmuld	%f10,%f12,%f12
1059
1060	fmuld	%f18,%f22,%f22
1061	ldd	[%l6+8],%f16
1062
1063	fmuld	%f26,%f28,%f28
1064
1065	fmuld	%f0,%f6,%f6
1066	faddd	%f4,%f32,%f4
1067	ldd	[%l4+16],%f0
1068
1069	fmuld	%f8,%f14,%f14
1070	faddd	%f12,%f34,%f12
1071	ldd	[%l5+16],%f8
1072
1073	fmuld	%f16,%f20,%f20
1074	faddd	%f36,%f22,%f22
1075
1076	fmuld	%f24,%f30,%f30
1077	faddd	%f28,%f38,%f28
1078	ldd	[%l7+16],%f24
1079
1080	fmuld	%f0,%f6,%f6
1081	faddd	%f4,%f2,%f4
1082	ldd	[%l4+8],%f32
1083
1084	fmuld	%f8,%f14,%f14
1085	faddd	%f12,%f10,%f12
1086	ldd	[%l5+8],%f34
1087
1088	faddd	%f18,%f22,%f22
1089	ldd	[%l6+16],%f36
1090
1091	fmuld	%f24,%f30,%f30
1092	faddd	%f28,%f26,%f28
1093	ldd	[%l7+8],%f38
1094
1095	fmuld	%f32,%f4,%f4
1096
1097	fmuld	%f34,%f12,%f12
1098
1099	fmuld	%f36,%f22,%f22
1100
1101	fmuld	%f38,%f28,%f28
1102
1103	fsubd	%f6,%f4,%f6
1104
1105	fsubd	%f14,%f12,%f14
1106
1107	faddd	%f22,%f20,%f22
1108
1109	fsubd	%f30,%f28,%f30
1110
1111	faddd	%f6,%f0,%f6
1112
1113	faddd	%f14,%f8,%f14
1114
1115	faddd	%f22,%f16,%f22
1116
1117	faddd	%f30,%f24,%f30
1118	mov	%l0,%l4
1119
1120	fnegd	%f6,%f4
1121	lda	[%i1]%asi,%l0		! preload next argument
1122
1123	fnegd	%f14,%f12
1124	lda	[%i1]%asi,%f0
1125
1126	fnegd	%f22,%f20
1127	lda	[%i1+4]%asi,%f3
1128
1129	fnegd	%f30,%f28
1130	andn	%l0,%i5,%l0
1131	add	%i1,%i2,%i1
1132
1133	andcc	%l4,2,%g0
1134	fmovdnz	%icc,%f4,%f6
1135	st	%f6,[%o0]
1136
1137	andcc	%l1,2,%g0
1138	fmovdnz	%icc,%f12,%f14
1139	st	%f14,[%o1]
1140
1141	andcc	%l2,2,%g0
1142	fmovdnz	%icc,%f20,%f22
1143	st	%f22,[%o2]
1144
1145	andcc	%l3,2,%g0
1146	fmovdnz	%icc,%f28,%f30
1147	st	%f30,[%o3]
1148
1149	addcc	%i0,-1,%i0
1150	bg,pt	%icc,.loop0
1151! delay slot
1152	st	%f7,[%o0+4]
1153
1154	ba,pt	%icc,.end
1155! delay slot
1156	nop
1157
1158	.align	16
1159.case3:
1160	fmuld	%f16,pp3,%f22		! sin(x2)
1161
1162	fmuld	%f24,pp3,%f30		! sin(x3)
1163
1164	fmuld	%f0,qq3,%f6		! cos(x0)
1165
1166	fmuld	%f8,qq3,%f14		! cos(x1)
1167
1168	faddd	%f22,pp2,%f22
1169	fmuld	%f16,qq2,%f20
1170
1171	faddd	%f30,pp2,%f30
1172	fmuld	%f24,qq2,%f28
1173
1174	faddd	%f6,qq2,%f6
1175	fmuld	%f0,pp2,%f4
1176
1177	faddd	%f14,qq2,%f14
1178	fmuld	%f8,pp2,%f12
1179
1180	fmuld	%f16,%f22,%f22
1181	faddd	%f20,qq1,%f20
1182
1183	fmuld	%f24,%f30,%f30
1184	faddd	%f28,qq1,%f28
1185
1186	fmuld	%f0,%f6,%f6
1187	faddd	%f4,pp1,%f4
1188
1189	fmuld	%f8,%f14,%f14
1190	faddd	%f12,pp1,%f12
1191
1192	faddd	%f22,pp1,%f22
1193	fmuld	%f16,%f20,%f20
1194	add	%l6,%g1,%l6
1195
1196	faddd	%f30,pp1,%f30
1197	fmuld	%f24,%f28,%f28
1198	add	%l7,%g1,%l7
1199
1200	faddd	%f6,qq1,%f6
1201	fmuld	%f0,%f4,%f4
1202	add	%l4,%g1,%l4
1203
1204	faddd	%f14,qq1,%f14
1205	fmuld	%f8,%f12,%f12
1206	add	%l5,%g1,%l5
1207
1208	fmuld	%f16,%f22,%f22
1209
1210	fmuld	%f24,%f30,%f30
1211
1212	fmuld	%f2,%f4,%f4
1213
1214	fmuld	%f10,%f12,%f12
1215
1216	fmuld	%f18,%f22,%f22
1217	ldd	[%l6+8],%f16
1218
1219	fmuld	%f26,%f30,%f30
1220	ldd	[%l7+8],%f24
1221
1222	fmuld	%f0,%f6,%f6
1223	faddd	%f4,%f32,%f4
1224	ldd	[%l4+16],%f0
1225
1226	fmuld	%f8,%f14,%f14
1227	faddd	%f12,%f34,%f12
1228	ldd	[%l5+16],%f8
1229
1230	fmuld	%f16,%f20,%f20
1231	faddd	%f36,%f22,%f22
1232
1233	fmuld	%f24,%f28,%f28
1234	faddd	%f38,%f30,%f30
1235
1236	fmuld	%f0,%f6,%f6
1237	faddd	%f4,%f2,%f4
1238	ldd	[%l4+8],%f32
1239
1240	fmuld	%f8,%f14,%f14
1241	faddd	%f12,%f10,%f12
1242	ldd	[%l5+8],%f34
1243
1244	faddd	%f18,%f22,%f22
1245	ldd	[%l6+16],%f36
1246
1247	faddd	%f26,%f30,%f30
1248	ldd	[%l7+16],%f38
1249
1250	fmuld	%f32,%f4,%f4
1251
1252	fmuld	%f34,%f12,%f12
1253
1254	fmuld	%f36,%f22,%f22
1255
1256	fmuld	%f38,%f30,%f30
1257
1258	fsubd	%f6,%f4,%f6
1259
1260	fsubd	%f14,%f12,%f14
1261
1262	faddd	%f22,%f20,%f22
1263
1264	faddd	%f30,%f28,%f30
1265
1266	faddd	%f6,%f0,%f6
1267
1268	faddd	%f14,%f8,%f14
1269
1270	faddd	%f22,%f16,%f22
1271
1272	faddd	%f30,%f24,%f30
1273	mov	%l0,%l4
1274
1275	fnegd	%f6,%f4
1276	lda	[%i1]%asi,%l0		! preload next argument
1277
1278	fnegd	%f14,%f12
1279	lda	[%i1]%asi,%f0
1280
1281	fnegd	%f22,%f20
1282	lda	[%i1+4]%asi,%f3
1283
1284	fnegd	%f30,%f28
1285	andn	%l0,%i5,%l0
1286	add	%i1,%i2,%i1
1287
1288	andcc	%l4,2,%g0
1289	fmovdnz	%icc,%f4,%f6
1290	st	%f6,[%o0]
1291
1292	andcc	%l1,2,%g0
1293	fmovdnz	%icc,%f12,%f14
1294	st	%f14,[%o1]
1295
1296	andcc	%l2,2,%g0
1297	fmovdnz	%icc,%f20,%f22
1298	st	%f22,[%o2]
1299
1300	andcc	%l3,2,%g0
1301	fmovdnz	%icc,%f28,%f30
1302	st	%f30,[%o3]
1303
1304	addcc	%i0,-1,%i0
1305	bg,pt	%icc,.loop0
1306! delay slot
1307	st	%f7,[%o0+4]
1308
1309	ba,pt	%icc,.end
1310! delay slot
1311	nop
1312
1313	.align	16
1314.case4:
1315	fmuld	%f18,%f18,%f16
1316	andcc	%l2,1,%g0
1317	bz,pn	%icc,.case6
1318! delay slot
1319	fxor	%f22,%f36,%f36
1320
1321	fmuld	%f26,%f26,%f24
1322	andcc	%l3,1,%g0
1323	bz,pn	%icc,.case5
1324! delay slot
1325	fxor	%f30,%f38,%f38
1326
1327	fmuld	%f8,pp3,%f14		! sin(x1)
1328
1329	fmuld	%f0,qq3,%f6		! cos(x0)
1330
1331	faddd	%f14,pp2,%f14
1332	fmuld	%f8,qq2,%f12
1333
1334	fmuld	%f16,qq3,%f22		! cos(x2)
1335
1336	fmuld	%f24,qq3,%f30		! cos(x3)
1337
1338	faddd	%f6,qq2,%f6
1339	fmuld	%f0,pp2,%f4
1340
1341	fmuld	%f8,%f14,%f14
1342	faddd	%f12,qq1,%f12
1343
1344	faddd	%f22,qq2,%f22
1345	fmuld	%f16,pp2,%f20
1346
1347	faddd	%f30,qq2,%f30
1348	fmuld	%f24,pp2,%f28
1349
1350	fmuld	%f0,%f6,%f6
1351	faddd	%f4,pp1,%f4
1352
1353	faddd	%f14,pp1,%f14
1354	fmuld	%f8,%f12,%f12
1355	add	%l5,%g1,%l5
1356
1357	fmuld	%f16,%f22,%f22
1358	faddd	%f20,pp1,%f20
1359
1360	fmuld	%f24,%f30,%f30
1361	faddd	%f28,pp1,%f28
1362
1363	faddd	%f6,qq1,%f6
1364	fmuld	%f0,%f4,%f4
1365	add	%l4,%g1,%l4
1366
1367	fmuld	%f8,%f14,%f14
1368
1369	faddd	%f22,qq1,%f22
1370	fmuld	%f16,%f20,%f20
1371	add	%l6,%g1,%l6
1372
1373	faddd	%f30,qq1,%f30
1374	fmuld	%f24,%f28,%f28
1375	add	%l7,%g1,%l7
1376
1377	fmuld	%f2,%f4,%f4
1378
1379	fmuld	%f10,%f14,%f14
1380	ldd	[%l5+8],%f8
1381
1382	fmuld	%f18,%f20,%f20
1383
1384	fmuld	%f26,%f28,%f28
1385
1386	fmuld	%f0,%f6,%f6
1387	faddd	%f4,%f32,%f4
1388	ldd	[%l4+16],%f0
1389
1390	fmuld	%f8,%f12,%f12
1391	faddd	%f34,%f14,%f14
1392
1393	fmuld	%f16,%f22,%f22
1394	faddd	%f20,%f36,%f20
1395	ldd	[%l6+16],%f16
1396
1397	fmuld	%f24,%f30,%f30
1398	faddd	%f28,%f38,%f28
1399	ldd	[%l7+16],%f24
1400
1401	fmuld	%f0,%f6,%f6
1402	faddd	%f4,%f2,%f4
1403	ldd	[%l4+8],%f32
1404
1405	faddd	%f10,%f14,%f14
1406	ldd	[%l5+16],%f34
1407
1408	fmuld	%f16,%f22,%f22
1409	faddd	%f20,%f18,%f20
1410	ldd	[%l6+8],%f36
1411
1412	fmuld	%f24,%f30,%f30
1413	faddd	%f28,%f26,%f28
1414	ldd	[%l7+8],%f38
1415
1416	fmuld	%f32,%f4,%f4
1417
1418	fmuld	%f34,%f14,%f14
1419
1420	fmuld	%f36,%f20,%f20
1421
1422	fmuld	%f38,%f28,%f28
1423
1424	fsubd	%f6,%f4,%f6
1425
1426	faddd	%f14,%f12,%f14
1427
1428	fsubd	%f22,%f20,%f22
1429
1430	fsubd	%f30,%f28,%f30
1431
1432	faddd	%f6,%f0,%f6
1433
1434	faddd	%f14,%f8,%f14
1435
1436	faddd	%f22,%f16,%f22
1437
1438	faddd	%f30,%f24,%f30
1439	mov	%l0,%l4
1440
1441	fnegd	%f6,%f4
1442	lda	[%i1]%asi,%l0		! preload next argument
1443
1444	fnegd	%f14,%f12
1445	lda	[%i1]%asi,%f0
1446
1447	fnegd	%f22,%f20
1448	lda	[%i1+4]%asi,%f3
1449
1450	fnegd	%f30,%f28
1451	andn	%l0,%i5,%l0
1452	add	%i1,%i2,%i1
1453
1454	andcc	%l4,2,%g0
1455	fmovdnz	%icc,%f4,%f6
1456	st	%f6,[%o0]
1457
1458	andcc	%l1,2,%g0
1459	fmovdnz	%icc,%f12,%f14
1460	st	%f14,[%o1]
1461
1462	andcc	%l2,2,%g0
1463	fmovdnz	%icc,%f20,%f22
1464	st	%f22,[%o2]
1465
1466	andcc	%l3,2,%g0
1467	fmovdnz	%icc,%f28,%f30
1468	st	%f30,[%o3]
1469
1470	addcc	%i0,-1,%i0
1471	bg,pt	%icc,.loop0
1472! delay slot
1473	st	%f7,[%o0+4]
1474
1475	ba,pt	%icc,.end
1476! delay slot
1477	nop
1478
1479	.align	16
1480.case5:
1481	fmuld	%f8,pp3,%f14		! sin(x1)
1482
1483	fmuld	%f24,pp3,%f30		! sin(x3)
1484
1485	fmuld	%f0,qq3,%f6		! cos(x0)
1486
1487	faddd	%f14,pp2,%f14
1488	fmuld	%f8,qq2,%f12
1489
1490	fmuld	%f16,qq3,%f22		! cos(x2)
1491
1492	faddd	%f30,pp2,%f30
1493	fmuld	%f24,qq2,%f28
1494
1495	faddd	%f6,qq2,%f6
1496	fmuld	%f0,pp2,%f4
1497
1498	fmuld	%f8,%f14,%f14
1499	faddd	%f12,qq1,%f12
1500
1501	faddd	%f22,qq2,%f22
1502	fmuld	%f16,pp2,%f20
1503
1504	fmuld	%f24,%f30,%f30
1505	faddd	%f28,qq1,%f28
1506
1507	fmuld	%f0,%f6,%f6
1508	faddd	%f4,pp1,%f4
1509
1510	faddd	%f14,pp1,%f14
1511	fmuld	%f8,%f12,%f12
1512	add	%l5,%g1,%l5
1513
1514	fmuld	%f16,%f22,%f22
1515	faddd	%f20,pp1,%f20
1516
1517	faddd	%f30,pp1,%f30
1518	fmuld	%f24,%f28,%f28
1519	add	%l7,%g1,%l7
1520
1521	faddd	%f6,qq1,%f6
1522	fmuld	%f0,%f4,%f4
1523	add	%l4,%g1,%l4
1524
1525	fmuld	%f8,%f14,%f14
1526
1527	faddd	%f22,qq1,%f22
1528	fmuld	%f16,%f20,%f20
1529	add	%l6,%g1,%l6
1530
1531	fmuld	%f24,%f30,%f30
1532
1533	fmuld	%f2,%f4,%f4
1534
1535	fmuld	%f10,%f14,%f14
1536	ldd	[%l5+8],%f8
1537
1538	fmuld	%f18,%f20,%f20
1539
1540	fmuld	%f26,%f30,%f30
1541	ldd	[%l7+8],%f24
1542
1543	fmuld	%f0,%f6,%f6
1544	faddd	%f4,%f32,%f4
1545	ldd	[%l4+16],%f0
1546
1547	fmuld	%f8,%f12,%f12
1548	faddd	%f34,%f14,%f14
1549
1550	fmuld	%f16,%f22,%f22
1551	faddd	%f20,%f36,%f20
1552	ldd	[%l6+16],%f16
1553
1554	fmuld	%f24,%f28,%f28
1555	faddd	%f38,%f30,%f30
1556
1557	fmuld	%f0,%f6,%f6
1558	faddd	%f4,%f2,%f4
1559	ldd	[%l4+8],%f32
1560
1561	faddd	%f10,%f14,%f14
1562	ldd	[%l5+16],%f34
1563
1564	fmuld	%f16,%f22,%f22
1565	faddd	%f20,%f18,%f20
1566	ldd	[%l6+8],%f36
1567
1568	faddd	%f26,%f30,%f30
1569	ldd	[%l7+16],%f38
1570
1571	fmuld	%f32,%f4,%f4
1572
1573	fmuld	%f34,%f14,%f14
1574
1575	fmuld	%f36,%f20,%f20
1576
1577	fmuld	%f38,%f30,%f30
1578
1579	fsubd	%f6,%f4,%f6
1580
1581	faddd	%f14,%f12,%f14
1582
1583	fsubd	%f22,%f20,%f22
1584
1585	faddd	%f30,%f28,%f30
1586
1587	faddd	%f6,%f0,%f6
1588
1589	faddd	%f14,%f8,%f14
1590
1591	faddd	%f22,%f16,%f22
1592
1593	faddd	%f30,%f24,%f30
1594	mov	%l0,%l4
1595
1596	fnegd	%f6,%f4
1597	lda	[%i1]%asi,%l0		! preload next argument
1598
1599	fnegd	%f14,%f12
1600	lda	[%i1]%asi,%f0
1601
1602	fnegd	%f22,%f20
1603	lda	[%i1+4]%asi,%f3
1604
1605	fnegd	%f30,%f28
1606	andn	%l0,%i5,%l0
1607	add	%i1,%i2,%i1
1608
1609	andcc	%l4,2,%g0
1610	fmovdnz	%icc,%f4,%f6
1611	st	%f6,[%o0]
1612
1613	andcc	%l1,2,%g0
1614	fmovdnz	%icc,%f12,%f14
1615	st	%f14,[%o1]
1616
1617	andcc	%l2,2,%g0
1618	fmovdnz	%icc,%f20,%f22
1619	st	%f22,[%o2]
1620
1621	andcc	%l3,2,%g0
1622	fmovdnz	%icc,%f28,%f30
1623	st	%f30,[%o3]
1624
1625	addcc	%i0,-1,%i0
1626	bg,pt	%icc,.loop0
1627! delay slot
1628	st	%f7,[%o0+4]
1629
1630	ba,pt	%icc,.end
1631! delay slot
1632	nop
1633
1634	.align	16
1635.case6:
1636	fmuld	%f26,%f26,%f24
1637	andcc	%l3,1,%g0
1638	bz,pn	%icc,.case7
1639! delay slot
1640	fxor	%f30,%f38,%f38
1641
1642	fmuld	%f8,pp3,%f14		! sin(x1)
1643
1644	fmuld	%f16,pp3,%f22		! sin(x2)
1645
1646	fmuld	%f0,qq3,%f6		! cos(x0)
1647
1648	faddd	%f14,pp2,%f14
1649	fmuld	%f8,qq2,%f12
1650
1651	faddd	%f22,pp2,%f22
1652	fmuld	%f16,qq2,%f20
1653
1654	fmuld	%f24,qq3,%f30		! cos(x3)
1655
1656	faddd	%f6,qq2,%f6
1657	fmuld	%f0,pp2,%f4
1658
1659	fmuld	%f8,%f14,%f14
1660	faddd	%f12,qq1,%f12
1661
1662	fmuld	%f16,%f22,%f22
1663	faddd	%f20,qq1,%f20
1664
1665	faddd	%f30,qq2,%f30
1666	fmuld	%f24,pp2,%f28
1667
1668	fmuld	%f0,%f6,%f6
1669	faddd	%f4,pp1,%f4
1670
1671	faddd	%f14,pp1,%f14
1672	fmuld	%f8,%f12,%f12
1673	add	%l5,%g1,%l5
1674
1675	faddd	%f22,pp1,%f22
1676	fmuld	%f16,%f20,%f20
1677	add	%l6,%g1,%l6
1678
1679	fmuld	%f24,%f30,%f30
1680	faddd	%f28,pp1,%f28
1681
1682	faddd	%f6,qq1,%f6
1683	fmuld	%f0,%f4,%f4
1684	add	%l4,%g1,%l4
1685
1686	fmuld	%f8,%f14,%f14
1687
1688	fmuld	%f16,%f22,%f22
1689
1690	faddd	%f30,qq1,%f30
1691	fmuld	%f24,%f28,%f28
1692	add	%l7,%g1,%l7
1693
1694	fmuld	%f2,%f4,%f4
1695
1696	fmuld	%f10,%f14,%f14
1697	ldd	[%l5+8],%f8
1698
1699	fmuld	%f18,%f22,%f22
1700	ldd	[%l6+8],%f16
1701
1702	fmuld	%f26,%f28,%f28
1703
1704	fmuld	%f0,%f6,%f6
1705	faddd	%f4,%f32,%f4
1706	ldd	[%l4+16],%f0
1707
1708	fmuld	%f8,%f12,%f12
1709	faddd	%f34,%f14,%f14
1710
1711	fmuld	%f16,%f20,%f20
1712	faddd	%f36,%f22,%f22
1713
1714	fmuld	%f24,%f30,%f30
1715	faddd	%f28,%f38,%f28
1716	ldd	[%l7+16],%f24
1717
1718	fmuld	%f0,%f6,%f6
1719	faddd	%f4,%f2,%f4
1720	ldd	[%l4+8],%f32
1721
1722	faddd	%f10,%f14,%f14
1723	ldd	[%l5+16],%f34
1724
1725	faddd	%f18,%f22,%f22
1726	ldd	[%l6+16],%f36
1727
1728	fmuld	%f24,%f30,%f30
1729	faddd	%f28,%f26,%f28
1730	ldd	[%l7+8],%f38
1731
1732	fmuld	%f32,%f4,%f4
1733
1734	fmuld	%f34,%f14,%f14
1735
1736	fmuld	%f36,%f22,%f22
1737
1738	fmuld	%f38,%f28,%f28
1739
1740	fsubd	%f6,%f4,%f6
1741
1742	faddd	%f14,%f12,%f14
1743
1744	faddd	%f22,%f20,%f22
1745
1746	fsubd	%f30,%f28,%f30
1747
1748	faddd	%f6,%f0,%f6
1749
1750	faddd	%f14,%f8,%f14
1751
1752	faddd	%f22,%f16,%f22
1753
1754	faddd	%f30,%f24,%f30
1755	mov	%l0,%l4
1756
1757	fnegd	%f6,%f4
1758	lda	[%i1]%asi,%l0		! preload next argument
1759
1760	fnegd	%f14,%f12
1761	lda	[%i1]%asi,%f0
1762
1763	fnegd	%f22,%f20
1764	lda	[%i1+4]%asi,%f3
1765
1766	fnegd	%f30,%f28
1767	andn	%l0,%i5,%l0
1768	add	%i1,%i2,%i1
1769
1770	andcc	%l4,2,%g0
1771	fmovdnz	%icc,%f4,%f6
1772	st	%f6,[%o0]
1773
1774	andcc	%l1,2,%g0
1775	fmovdnz	%icc,%f12,%f14
1776	st	%f14,[%o1]
1777
1778	andcc	%l2,2,%g0
1779	fmovdnz	%icc,%f20,%f22
1780	st	%f22,[%o2]
1781
1782	andcc	%l3,2,%g0
1783	fmovdnz	%icc,%f28,%f30
1784	st	%f30,[%o3]
1785
1786	addcc	%i0,-1,%i0
1787	bg,pt	%icc,.loop0
1788! delay slot
1789	st	%f7,[%o0+4]
1790
1791	ba,pt	%icc,.end
1792! delay slot
1793	nop
1794
1795	.align	16
1796.case7:
1797	fmuld	%f8,pp3,%f14		! sin(x1)
1798
1799	fmuld	%f16,pp3,%f22		! sin(x2)
1800
1801	fmuld	%f24,pp3,%f30		! sin(x3)
1802
1803	fmuld	%f0,qq3,%f6		! cos(x0)
1804
1805	faddd	%f14,pp2,%f14
1806	fmuld	%f8,qq2,%f12
1807
1808	faddd	%f22,pp2,%f22
1809	fmuld	%f16,qq2,%f20
1810
1811	faddd	%f30,pp2,%f30
1812	fmuld	%f24,qq2,%f28
1813
1814	faddd	%f6,qq2,%f6
1815	fmuld	%f0,pp2,%f4
1816
1817	fmuld	%f8,%f14,%f14
1818	faddd	%f12,qq1,%f12
1819
1820	fmuld	%f16,%f22,%f22
1821	faddd	%f20,qq1,%f20
1822
1823	fmuld	%f24,%f30,%f30
1824	faddd	%f28,qq1,%f28
1825
1826	fmuld	%f0,%f6,%f6
1827	faddd	%f4,pp1,%f4
1828
1829	faddd	%f14,pp1,%f14
1830	fmuld	%f8,%f12,%f12
1831	add	%l5,%g1,%l5
1832
1833	faddd	%f22,pp1,%f22
1834	fmuld	%f16,%f20,%f20
1835	add	%l6,%g1,%l6
1836
1837	faddd	%f30,pp1,%f30
1838	fmuld	%f24,%f28,%f28
1839	add	%l7,%g1,%l7
1840
1841	faddd	%f6,qq1,%f6
1842	fmuld	%f0,%f4,%f4
1843	add	%l4,%g1,%l4
1844
1845	fmuld	%f8,%f14,%f14
1846
1847	fmuld	%f16,%f22,%f22
1848
1849	fmuld	%f24,%f30,%f30
1850
1851	fmuld	%f2,%f4,%f4
1852
1853	fmuld	%f10,%f14,%f14
1854	ldd	[%l5+8],%f8
1855
1856	fmuld	%f18,%f22,%f22
1857	ldd	[%l6+8],%f16
1858
1859	fmuld	%f26,%f30,%f30
1860	ldd	[%l7+8],%f24
1861
1862	fmuld	%f0,%f6,%f6
1863	faddd	%f4,%f32,%f4
1864	ldd	[%l4+16],%f0
1865
1866	fmuld	%f8,%f12,%f12
1867	faddd	%f34,%f14,%f14
1868
1869	fmuld	%f16,%f20,%f20
1870	faddd	%f36,%f22,%f22
1871
1872	fmuld	%f24,%f28,%f28
1873	faddd	%f38,%f30,%f30
1874
1875	fmuld	%f0,%f6,%f6
1876	faddd	%f4,%f2,%f4
1877	ldd	[%l4+8],%f32
1878
1879	faddd	%f10,%f14,%f14
1880	ldd	[%l5+16],%f34
1881
1882	faddd	%f18,%f22,%f22
1883	ldd	[%l6+16],%f36
1884
1885	faddd	%f26,%f30,%f30
1886	ldd	[%l7+16],%f38
1887
1888	fmuld	%f32,%f4,%f4
1889
1890	fmuld	%f34,%f14,%f14
1891
1892	fmuld	%f36,%f22,%f22
1893
1894	fmuld	%f38,%f30,%f30
1895
1896	fsubd	%f6,%f4,%f6
1897
1898	faddd	%f14,%f12,%f14
1899
1900	faddd	%f22,%f20,%f22
1901
1902	faddd	%f30,%f28,%f30
1903
1904	faddd	%f6,%f0,%f6
1905
1906	faddd	%f14,%f8,%f14
1907
1908	faddd	%f22,%f16,%f22
1909
1910	faddd	%f30,%f24,%f30
1911	mov	%l0,%l4
1912
1913	fnegd	%f6,%f4
1914	lda	[%i1]%asi,%l0		! preload next argument
1915
1916	fnegd	%f14,%f12
1917	lda	[%i1]%asi,%f0
1918
1919	fnegd	%f22,%f20
1920	lda	[%i1+4]%asi,%f3
1921
1922	fnegd	%f30,%f28
1923	andn	%l0,%i5,%l0
1924	add	%i1,%i2,%i1
1925
1926	andcc	%l4,2,%g0
1927	fmovdnz	%icc,%f4,%f6
1928	st	%f6,[%o0]
1929
1930	andcc	%l1,2,%g0
1931	fmovdnz	%icc,%f12,%f14
1932	st	%f14,[%o1]
1933
1934	andcc	%l2,2,%g0
1935	fmovdnz	%icc,%f20,%f22
1936	st	%f22,[%o2]
1937
1938	andcc	%l3,2,%g0
1939	fmovdnz	%icc,%f28,%f30
1940	st	%f30,[%o3]
1941
1942	addcc	%i0,-1,%i0
1943	bg,pt	%icc,.loop0
1944! delay slot
1945	st	%f7,[%o0+4]
1946
1947	ba,pt	%icc,.end
1948! delay slot
1949	nop
1950
1951	.align	16
1952.case8:
1953	fmuld	%f10,%f10,%f8
1954	andcc	%l1,1,%g0
1955	bz,pn	%icc,.case12
1956! delay slot
1957	fxor	%f14,%f34,%f34
1958
1959	fmuld	%f18,%f18,%f16
1960	andcc	%l2,1,%g0
1961	bz,pn	%icc,.case10
1962! delay slot
1963	fxor	%f22,%f36,%f36
1964
1965	fmuld	%f26,%f26,%f24
1966	andcc	%l3,1,%g0
1967	bz,pn	%icc,.case9
1968! delay slot
1969	fxor	%f30,%f38,%f38
1970
1971	fmuld	%f0,pp3,%f6		! sin(x0)
1972
1973	faddd	%f6,pp2,%f6
1974	fmuld	%f0,qq2,%f4
1975
1976	fmuld	%f8,qq3,%f14		! cos(x1)
1977
1978	fmuld	%f16,qq3,%f22		! cos(x2)
1979
1980	fmuld	%f24,qq3,%f30		! cos(x3)
1981
1982	fmuld	%f0,%f6,%f6
1983	faddd	%f4,qq1,%f4
1984
1985	faddd	%f14,qq2,%f14
1986	fmuld	%f8,pp2,%f12
1987
1988	faddd	%f22,qq2,%f22
1989	fmuld	%f16,pp2,%f20
1990
1991	faddd	%f30,qq2,%f30
1992	fmuld	%f24,pp2,%f28
1993
1994	faddd	%f6,pp1,%f6
1995	fmuld	%f0,%f4,%f4
1996	add	%l4,%g1,%l4
1997
1998	fmuld	%f8,%f14,%f14
1999	faddd	%f12,pp1,%f12
2000
2001	fmuld	%f16,%f22,%f22
2002	faddd	%f20,pp1,%f20
2003
2004	fmuld	%f24,%f30,%f30
2005	faddd	%f28,pp1,%f28
2006
2007	fmuld	%f0,%f6,%f6
2008
2009	faddd	%f14,qq1,%f14
2010	fmuld	%f8,%f12,%f12
2011	add	%l5,%g1,%l5
2012
2013	faddd	%f22,qq1,%f22
2014	fmuld	%f16,%f20,%f20
2015	add	%l6,%g1,%l6
2016
2017	faddd	%f30,qq1,%f30
2018	fmuld	%f24,%f28,%f28
2019	add	%l7,%g1,%l7
2020
2021	fmuld	%f2,%f6,%f6
2022	ldd	[%l4+8],%f0
2023
2024	fmuld	%f10,%f12,%f12
2025
2026	fmuld	%f18,%f20,%f20
2027
2028	fmuld	%f26,%f28,%f28
2029
2030	fmuld	%f0,%f4,%f4
2031	faddd	%f32,%f6,%f6
2032
2033	fmuld	%f8,%f14,%f14
2034	faddd	%f12,%f34,%f12
2035	ldd	[%l5+16],%f8
2036
2037	fmuld	%f16,%f22,%f22
2038	faddd	%f20,%f36,%f20
2039	ldd	[%l6+16],%f16
2040
2041	fmuld	%f24,%f30,%f30
2042	faddd	%f28,%f38,%f28
2043	ldd	[%l7+16],%f24
2044
2045	faddd	%f2,%f6,%f6
2046	ldd	[%l4+16],%f32
2047
2048	fmuld	%f8,%f14,%f14
2049	faddd	%f12,%f10,%f12
2050	ldd	[%l5+8],%f34
2051
2052	fmuld	%f16,%f22,%f22
2053	faddd	%f20,%f18,%f20
2054	ldd	[%l6+8],%f36
2055
2056	fmuld	%f24,%f30,%f30
2057	faddd	%f28,%f26,%f28
2058	ldd	[%l7+8],%f38
2059
2060	fmuld	%f32,%f6,%f6
2061
2062	fmuld	%f34,%f12,%f12
2063
2064	fmuld	%f36,%f20,%f20
2065
2066	fmuld	%f38,%f28,%f28
2067
2068	faddd	%f6,%f4,%f6
2069
2070	fsubd	%f14,%f12,%f14
2071
2072	fsubd	%f22,%f20,%f22
2073
2074	fsubd	%f30,%f28,%f30
2075
2076	faddd	%f6,%f0,%f6
2077
2078	faddd	%f14,%f8,%f14
2079
2080	faddd	%f22,%f16,%f22
2081
2082	faddd	%f30,%f24,%f30
2083	mov	%l0,%l4
2084
2085	fnegd	%f6,%f4
2086	lda	[%i1]%asi,%l0		! preload next argument
2087
2088	fnegd	%f14,%f12
2089	lda	[%i1]%asi,%f0
2090
2091	fnegd	%f22,%f20
2092	lda	[%i1+4]%asi,%f3
2093
2094	fnegd	%f30,%f28
2095	andn	%l0,%i5,%l0
2096	add	%i1,%i2,%i1
2097
2098	andcc	%l4,2,%g0
2099	fmovdnz	%icc,%f4,%f6
2100	st	%f6,[%o0]
2101
2102	andcc	%l1,2,%g0
2103	fmovdnz	%icc,%f12,%f14
2104	st	%f14,[%o1]
2105
2106	andcc	%l2,2,%g0
2107	fmovdnz	%icc,%f20,%f22
2108	st	%f22,[%o2]
2109
2110	andcc	%l3,2,%g0
2111	fmovdnz	%icc,%f28,%f30
2112	st	%f30,[%o3]
2113
2114	addcc	%i0,-1,%i0
2115	bg,pt	%icc,.loop0
2116! delay slot
2117	st	%f7,[%o0+4]
2118
2119	ba,pt	%icc,.end
2120! delay slot
2121	nop
2122
2123	.align	16
2124.case9:
2125	fmuld	%f0,pp3,%f6		! sin(x0)
2126
2127	fmuld	%f24,pp3,%f30		! sin(x3)
2128
2129	faddd	%f6,pp2,%f6
2130	fmuld	%f0,qq2,%f4
2131
2132	fmuld	%f8,qq3,%f14		! cos(x1)
2133
2134	fmuld	%f16,qq3,%f22		! cos(x2)
2135
2136	faddd	%f30,pp2,%f30
2137	fmuld	%f24,qq2,%f28
2138
2139	fmuld	%f0,%f6,%f6
2140	faddd	%f4,qq1,%f4
2141
2142	faddd	%f14,qq2,%f14
2143	fmuld	%f8,pp2,%f12
2144
2145	faddd	%f22,qq2,%f22
2146	fmuld	%f16,pp2,%f20
2147
2148	fmuld	%f24,%f30,%f30
2149	faddd	%f28,qq1,%f28
2150
2151	faddd	%f6,pp1,%f6
2152	fmuld	%f0,%f4,%f4
2153	add	%l4,%g1,%l4
2154
2155	fmuld	%f8,%f14,%f14
2156	faddd	%f12,pp1,%f12
2157
2158	fmuld	%f16,%f22,%f22
2159	faddd	%f20,pp1,%f20
2160
2161	faddd	%f30,pp1,%f30
2162	fmuld	%f24,%f28,%f28
2163	add	%l7,%g1,%l7
2164
2165	fmuld	%f0,%f6,%f6
2166
2167	faddd	%f14,qq1,%f14
2168	fmuld	%f8,%f12,%f12
2169	add	%l5,%g1,%l5
2170
2171	faddd	%f22,qq1,%f22
2172	fmuld	%f16,%f20,%f20
2173	add	%l6,%g1,%l6
2174
2175	fmuld	%f24,%f30,%f30
2176
2177	fmuld	%f2,%f6,%f6
2178	ldd	[%l4+8],%f0
2179
2180	fmuld	%f10,%f12,%f12
2181
2182	fmuld	%f18,%f20,%f20
2183
2184	fmuld	%f26,%f30,%f30
2185	ldd	[%l7+8],%f24
2186
2187	fmuld	%f0,%f4,%f4
2188	faddd	%f32,%f6,%f6
2189
2190	fmuld	%f8,%f14,%f14
2191	faddd	%f12,%f34,%f12
2192	ldd	[%l5+16],%f8
2193
2194	fmuld	%f16,%f22,%f22
2195	faddd	%f20,%f36,%f20
2196	ldd	[%l6+16],%f16
2197
2198	fmuld	%f24,%f28,%f28
2199	faddd	%f38,%f30,%f30
2200
2201	faddd	%f2,%f6,%f6
2202	ldd	[%l4+16],%f32
2203
2204	fmuld	%f8,%f14,%f14
2205	faddd	%f12,%f10,%f12
2206	ldd	[%l5+8],%f34
2207
2208	fmuld	%f16,%f22,%f22
2209	faddd	%f20,%f18,%f20
2210	ldd	[%l6+8],%f36
2211
2212	faddd	%f26,%f30,%f30
2213	ldd	[%l7+16],%f38
2214
2215	fmuld	%f32,%f6,%f6
2216
2217	fmuld	%f34,%f12,%f12
2218
2219	fmuld	%f36,%f20,%f20
2220
2221	fmuld	%f38,%f30,%f30
2222
2223	faddd	%f6,%f4,%f6
2224
2225	fsubd	%f14,%f12,%f14
2226
2227	fsubd	%f22,%f20,%f22
2228
2229	faddd	%f30,%f28,%f30
2230
2231	faddd	%f6,%f0,%f6
2232
2233	faddd	%f14,%f8,%f14
2234
2235	faddd	%f22,%f16,%f22
2236
2237	faddd	%f30,%f24,%f30
2238	mov	%l0,%l4
2239
2240	fnegd	%f6,%f4
2241	lda	[%i1]%asi,%l0		! preload next argument
2242
2243	fnegd	%f14,%f12
2244	lda	[%i1]%asi,%f0
2245
2246	fnegd	%f22,%f20
2247	lda	[%i1+4]%asi,%f3
2248
2249	fnegd	%f30,%f28
2250	andn	%l0,%i5,%l0
2251	add	%i1,%i2,%i1
2252
2253	andcc	%l4,2,%g0
2254	fmovdnz	%icc,%f4,%f6
2255	st	%f6,[%o0]
2256
2257	andcc	%l1,2,%g0
2258	fmovdnz	%icc,%f12,%f14
2259	st	%f14,[%o1]
2260
2261	andcc	%l2,2,%g0
2262	fmovdnz	%icc,%f20,%f22
2263	st	%f22,[%o2]
2264
2265	andcc	%l3,2,%g0
2266	fmovdnz	%icc,%f28,%f30
2267	st	%f30,[%o3]
2268
2269	addcc	%i0,-1,%i0
2270	bg,pt	%icc,.loop0
2271! delay slot
2272	st	%f7,[%o0+4]
2273
2274	ba,pt	%icc,.end
2275! delay slot
2276	nop
2277
2278	.align	16
2279.case10:
2280	fmuld	%f26,%f26,%f24
2281	andcc	%l3,1,%g0
2282	bz,pn	%icc,.case11
2283! delay slot
2284	fxor	%f30,%f38,%f38
2285
2286	fmuld	%f0,pp3,%f6		! sin(x0)
2287
2288	fmuld	%f16,pp3,%f22		! sin(x2)
2289
2290	faddd	%f6,pp2,%f6
2291	fmuld	%f0,qq2,%f4
2292
2293	fmuld	%f8,qq3,%f14		! cos(x1)
2294
2295	faddd	%f22,pp2,%f22
2296	fmuld	%f16,qq2,%f20
2297
2298	fmuld	%f24,qq3,%f30		! cos(x3)
2299
2300	fmuld	%f0,%f6,%f6
2301	faddd	%f4,qq1,%f4
2302
2303	faddd	%f14,qq2,%f14
2304	fmuld	%f8,pp2,%f12
2305
2306	fmuld	%f16,%f22,%f22
2307	faddd	%f20,qq1,%f20
2308
2309	faddd	%f30,qq2,%f30
2310	fmuld	%f24,pp2,%f28
2311
2312	faddd	%f6,pp1,%f6
2313	fmuld	%f0,%f4,%f4
2314	add	%l4,%g1,%l4
2315
2316	fmuld	%f8,%f14,%f14
2317	faddd	%f12,pp1,%f12
2318
2319	faddd	%f22,pp1,%f22
2320	fmuld	%f16,%f20,%f20
2321	add	%l6,%g1,%l6
2322
2323	fmuld	%f24,%f30,%f30
2324	faddd	%f28,pp1,%f28
2325
2326	fmuld	%f0,%f6,%f6
2327
2328	faddd	%f14,qq1,%f14
2329	fmuld	%f8,%f12,%f12
2330	add	%l5,%g1,%l5
2331
2332	fmuld	%f16,%f22,%f22
2333
2334	faddd	%f30,qq1,%f30
2335	fmuld	%f24,%f28,%f28
2336	add	%l7,%g1,%l7
2337
2338	fmuld	%f2,%f6,%f6
2339	ldd	[%l4+8],%f0
2340
2341	fmuld	%f10,%f12,%f12
2342
2343	fmuld	%f18,%f22,%f22
2344	ldd	[%l6+8],%f16
2345
2346	fmuld	%f26,%f28,%f28
2347
2348	fmuld	%f0,%f4,%f4
2349	faddd	%f32,%f6,%f6
2350
2351	fmuld	%f8,%f14,%f14
2352	faddd	%f12,%f34,%f12
2353	ldd	[%l5+16],%f8
2354
2355	fmuld	%f16,%f20,%f20
2356	faddd	%f36,%f22,%f22
2357
2358	fmuld	%f24,%f30,%f30
2359	faddd	%f28,%f38,%f28
2360	ldd	[%l7+16],%f24
2361
2362	faddd	%f2,%f6,%f6
2363	ldd	[%l4+16],%f32
2364
2365	fmuld	%f8,%f14,%f14
2366	faddd	%f12,%f10,%f12
2367	ldd	[%l5+8],%f34
2368
2369	faddd	%f18,%f22,%f22
2370	ldd	[%l6+16],%f36
2371
2372	fmuld	%f24,%f30,%f30
2373	faddd	%f28,%f26,%f28
2374	ldd	[%l7+8],%f38
2375
2376	fmuld	%f32,%f6,%f6
2377
2378	fmuld	%f34,%f12,%f12
2379
2380	fmuld	%f36,%f22,%f22
2381
2382	fmuld	%f38,%f28,%f28
2383
2384	faddd	%f6,%f4,%f6
2385
2386	fsubd	%f14,%f12,%f14
2387
2388	faddd	%f22,%f20,%f22
2389
2390	fsubd	%f30,%f28,%f30
2391
2392	faddd	%f6,%f0,%f6
2393
2394	faddd	%f14,%f8,%f14
2395
2396	faddd	%f22,%f16,%f22
2397
2398	faddd	%f30,%f24,%f30
2399	mov	%l0,%l4
2400
2401	fnegd	%f6,%f4
2402	lda	[%i1]%asi,%l0		! preload next argument
2403
2404	fnegd	%f14,%f12
2405	lda	[%i1]%asi,%f0
2406
2407	fnegd	%f22,%f20
2408	lda	[%i1+4]%asi,%f3
2409
2410	fnegd	%f30,%f28
2411	andn	%l0,%i5,%l0
2412	add	%i1,%i2,%i1
2413
2414	andcc	%l4,2,%g0
2415	fmovdnz	%icc,%f4,%f6
2416	st	%f6,[%o0]
2417
2418	andcc	%l1,2,%g0
2419	fmovdnz	%icc,%f12,%f14
2420	st	%f14,[%o1]
2421
2422	andcc	%l2,2,%g0
2423	fmovdnz	%icc,%f20,%f22
2424	st	%f22,[%o2]
2425
2426	andcc	%l3,2,%g0
2427	fmovdnz	%icc,%f28,%f30
2428	st	%f30,[%o3]
2429
2430	addcc	%i0,-1,%i0
2431	bg,pt	%icc,.loop0
2432! delay slot
2433	st	%f7,[%o0+4]
2434
2435	ba,pt	%icc,.end
2436! delay slot
2437	nop
2438
2439	.align	16
2440.case11:
2441	fmuld	%f0,pp3,%f6		! sin(x0)
2442
2443	fmuld	%f16,pp3,%f22		! sin(x2)
2444
2445	fmuld	%f24,pp3,%f30		! sin(x3)
2446
2447	faddd	%f6,pp2,%f6
2448	fmuld	%f0,qq2,%f4
2449
2450	fmuld	%f8,qq3,%f14		! cos(x1)
2451
2452	faddd	%f22,pp2,%f22
2453	fmuld	%f16,qq2,%f20
2454
2455	faddd	%f30,pp2,%f30
2456	fmuld	%f24,qq2,%f28
2457
2458	fmuld	%f0,%f6,%f6
2459	faddd	%f4,qq1,%f4
2460
2461	faddd	%f14,qq2,%f14
2462	fmuld	%f8,pp2,%f12
2463
2464	fmuld	%f16,%f22,%f22
2465	faddd	%f20,qq1,%f20
2466
2467	fmuld	%f24,%f30,%f30
2468	faddd	%f28,qq1,%f28
2469
2470	faddd	%f6,pp1,%f6
2471	fmuld	%f0,%f4,%f4
2472	add	%l4,%g1,%l4
2473
2474	fmuld	%f8,%f14,%f14
2475	faddd	%f12,pp1,%f12
2476
2477	faddd	%f22,pp1,%f22
2478	fmuld	%f16,%f20,%f20
2479	add	%l6,%g1,%l6
2480
2481	faddd	%f30,pp1,%f30
2482	fmuld	%f24,%f28,%f28
2483	add	%l7,%g1,%l7
2484
2485	fmuld	%f0,%f6,%f6
2486
2487	faddd	%f14,qq1,%f14
2488	fmuld	%f8,%f12,%f12
2489	add	%l5,%g1,%l5
2490
2491	fmuld	%f16,%f22,%f22
2492
2493	fmuld	%f24,%f30,%f30
2494
2495	fmuld	%f2,%f6,%f6
2496	ldd	[%l4+8],%f0
2497
2498	fmuld	%f10,%f12,%f12
2499
2500	fmuld	%f18,%f22,%f22
2501	ldd	[%l6+8],%f16
2502
2503	fmuld	%f26,%f30,%f30
2504	ldd	[%l7+8],%f24
2505
2506	fmuld	%f0,%f4,%f4
2507	faddd	%f32,%f6,%f6
2508
2509	fmuld	%f8,%f14,%f14
2510	faddd	%f12,%f34,%f12
2511	ldd	[%l5+16],%f8
2512
2513	fmuld	%f16,%f20,%f20
2514	faddd	%f36,%f22,%f22
2515
2516	fmuld	%f24,%f28,%f28
2517	faddd	%f38,%f30,%f30
2518
2519	faddd	%f2,%f6,%f6
2520	ldd	[%l4+16],%f32
2521
2522	fmuld	%f8,%f14,%f14
2523	faddd	%f12,%f10,%f12
2524	ldd	[%l5+8],%f34
2525
2526	faddd	%f18,%f22,%f22
2527	ldd	[%l6+16],%f36
2528
2529	faddd	%f26,%f30,%f30
2530	ldd	[%l7+16],%f38
2531
2532	fmuld	%f32,%f6,%f6
2533
2534	fmuld	%f34,%f12,%f12
2535
2536	fmuld	%f36,%f22,%f22
2537
2538	fmuld	%f38,%f30,%f30
2539
2540	faddd	%f6,%f4,%f6
2541
2542	fsubd	%f14,%f12,%f14
2543
2544	faddd	%f22,%f20,%f22
2545
2546	faddd	%f30,%f28,%f30
2547
2548	faddd	%f6,%f0,%f6
2549
2550	faddd	%f14,%f8,%f14
2551
2552	faddd	%f22,%f16,%f22
2553
2554	faddd	%f30,%f24,%f30
2555	mov	%l0,%l4
2556
2557	fnegd	%f6,%f4
2558	lda	[%i1]%asi,%l0		! preload next argument
2559
2560	fnegd	%f14,%f12
2561	lda	[%i1]%asi,%f0
2562
2563	fnegd	%f22,%f20
2564	lda	[%i1+4]%asi,%f3
2565
2566	fnegd	%f30,%f28
2567	andn	%l0,%i5,%l0
2568	add	%i1,%i2,%i1
2569
2570	andcc	%l4,2,%g0
2571	fmovdnz	%icc,%f4,%f6
2572	st	%f6,[%o0]
2573
2574	andcc	%l1,2,%g0
2575	fmovdnz	%icc,%f12,%f14
2576	st	%f14,[%o1]
2577
2578	andcc	%l2,2,%g0
2579	fmovdnz	%icc,%f20,%f22
2580	st	%f22,[%o2]
2581
2582	andcc	%l3,2,%g0
2583	fmovdnz	%icc,%f28,%f30
2584	st	%f30,[%o3]
2585
2586	addcc	%i0,-1,%i0
2587	bg,pt	%icc,.loop0
2588! delay slot
2589	st	%f7,[%o0+4]
2590
2591	ba,pt	%icc,.end
2592! delay slot
2593	nop
2594
2595	.align	16
2596.case12:
2597	fmuld	%f18,%f18,%f16
2598	andcc	%l2,1,%g0
2599	bz,pn	%icc,.case14
2600! delay slot
2601	fxor	%f22,%f36,%f36
2602
2603	fmuld	%f26,%f26,%f24
2604	andcc	%l3,1,%g0
2605	bz,pn	%icc,.case13
2606! delay slot
2607	fxor	%f30,%f38,%f38
2608
2609	fmuld	%f0,pp3,%f6		! sin(x0)
2610
2611	fmuld	%f8,pp3,%f14		! sin(x1)
2612
2613	faddd	%f6,pp2,%f6
2614	fmuld	%f0,qq2,%f4
2615
2616	faddd	%f14,pp2,%f14
2617	fmuld	%f8,qq2,%f12
2618
2619	fmuld	%f16,qq3,%f22		! cos(x2)
2620
2621	fmuld	%f24,qq3,%f30		! cos(x3)
2622
2623	fmuld	%f0,%f6,%f6
2624	faddd	%f4,qq1,%f4
2625
2626	fmuld	%f8,%f14,%f14
2627	faddd	%f12,qq1,%f12
2628
2629	faddd	%f22,qq2,%f22
2630	fmuld	%f16,pp2,%f20
2631
2632	faddd	%f30,qq2,%f30
2633	fmuld	%f24,pp2,%f28
2634
2635	faddd	%f6,pp1,%f6
2636	fmuld	%f0,%f4,%f4
2637	add	%l4,%g1,%l4
2638
2639	faddd	%f14,pp1,%f14
2640	fmuld	%f8,%f12,%f12
2641	add	%l5,%g1,%l5
2642
2643	fmuld	%f16,%f22,%f22
2644	faddd	%f20,pp1,%f20
2645
2646	fmuld	%f24,%f30,%f30
2647	faddd	%f28,pp1,%f28
2648
2649	fmuld	%f0,%f6,%f6
2650
2651	fmuld	%f8,%f14,%f14
2652
2653	faddd	%f22,qq1,%f22
2654	fmuld	%f16,%f20,%f20
2655	add	%l6,%g1,%l6
2656
2657	faddd	%f30,qq1,%f30
2658	fmuld	%f24,%f28,%f28
2659	add	%l7,%g1,%l7
2660
2661	fmuld	%f2,%f6,%f6
2662	ldd	[%l4+8],%f0
2663
2664	fmuld	%f10,%f14,%f14
2665	ldd	[%l5+8],%f8
2666
2667	fmuld	%f18,%f20,%f20
2668
2669	fmuld	%f26,%f28,%f28
2670
2671	fmuld	%f0,%f4,%f4
2672	faddd	%f32,%f6,%f6
2673
2674	fmuld	%f8,%f12,%f12
2675	faddd	%f34,%f14,%f14
2676
2677	fmuld	%f16,%f22,%f22
2678	faddd	%f20,%f36,%f20
2679	ldd	[%l6+16],%f16
2680
2681	fmuld	%f24,%f30,%f30
2682	faddd	%f28,%f38,%f28
2683	ldd	[%l7+16],%f24
2684
2685	faddd	%f2,%f6,%f6
2686	ldd	[%l4+16],%f32
2687
2688	faddd	%f10,%f14,%f14
2689	ldd	[%l5+16],%f34
2690
2691	fmuld	%f16,%f22,%f22
2692	faddd	%f20,%f18,%f20
2693	ldd	[%l6+8],%f36
2694
2695	fmuld	%f24,%f30,%f30
2696	faddd	%f28,%f26,%f28
2697	ldd	[%l7+8],%f38
2698
2699	fmuld	%f32,%f6,%f6
2700
2701	fmuld	%f34,%f14,%f14
2702
2703	fmuld	%f36,%f20,%f20
2704
2705	fmuld	%f38,%f28,%f28
2706
2707	faddd	%f6,%f4,%f6
2708
2709	faddd	%f14,%f12,%f14
2710
2711	fsubd	%f22,%f20,%f22
2712
2713	fsubd	%f30,%f28,%f30
2714
2715	faddd	%f6,%f0,%f6
2716
2717	faddd	%f14,%f8,%f14
2718
2719	faddd	%f22,%f16,%f22
2720
2721	faddd	%f30,%f24,%f30
2722	mov	%l0,%l4
2723
2724	fnegd	%f6,%f4
2725	lda	[%i1]%asi,%l0		! preload next argument
2726
2727	fnegd	%f14,%f12
2728	lda	[%i1]%asi,%f0
2729
2730	fnegd	%f22,%f20
2731	lda	[%i1+4]%asi,%f3
2732
2733	fnegd	%f30,%f28
2734	andn	%l0,%i5,%l0
2735	add	%i1,%i2,%i1
2736
2737	andcc	%l4,2,%g0
2738	fmovdnz	%icc,%f4,%f6
2739	st	%f6,[%o0]
2740
2741	andcc	%l1,2,%g0
2742	fmovdnz	%icc,%f12,%f14
2743	st	%f14,[%o1]
2744
2745	andcc	%l2,2,%g0
2746	fmovdnz	%icc,%f20,%f22
2747	st	%f22,[%o2]
2748
2749	andcc	%l3,2,%g0
2750	fmovdnz	%icc,%f28,%f30
2751	st	%f30,[%o3]
2752
2753	addcc	%i0,-1,%i0
2754	bg,pt	%icc,.loop0
2755! delay slot
2756	st	%f7,[%o0+4]
2757
2758	ba,pt	%icc,.end
2759! delay slot
2760	nop
2761
2762	.align	16
2763.case13:
2764	fmuld	%f0,pp3,%f6		! sin(x0)
2765
2766	fmuld	%f8,pp3,%f14		! sin(x1)
2767
2768	fmuld	%f24,pp3,%f30		! sin(x3)
2769
2770	faddd	%f6,pp2,%f6
2771	fmuld	%f0,qq2,%f4
2772
2773	faddd	%f14,pp2,%f14
2774	fmuld	%f8,qq2,%f12
2775
2776	fmuld	%f16,qq3,%f22		! cos(x2)
2777
2778	faddd	%f30,pp2,%f30
2779	fmuld	%f24,qq2,%f28
2780
2781	fmuld	%f0,%f6,%f6
2782	faddd	%f4,qq1,%f4
2783
2784	fmuld	%f8,%f14,%f14
2785	faddd	%f12,qq1,%f12
2786
2787	faddd	%f22,qq2,%f22
2788	fmuld	%f16,pp2,%f20
2789
2790	fmuld	%f24,%f30,%f30
2791	faddd	%f28,qq1,%f28
2792
2793	faddd	%f6,pp1,%f6
2794	fmuld	%f0,%f4,%f4
2795	add	%l4,%g1,%l4
2796
2797	faddd	%f14,pp1,%f14
2798	fmuld	%f8,%f12,%f12
2799	add	%l5,%g1,%l5
2800
2801	fmuld	%f16,%f22,%f22
2802	faddd	%f20,pp1,%f20
2803
2804	faddd	%f30,pp1,%f30
2805	fmuld	%f24,%f28,%f28
2806	add	%l7,%g1,%l7
2807
2808	fmuld	%f0,%f6,%f6
2809
2810	fmuld	%f8,%f14,%f14
2811
2812	faddd	%f22,qq1,%f22
2813	fmuld	%f16,%f20,%f20
2814	add	%l6,%g1,%l6
2815
2816	fmuld	%f24,%f30,%f30
2817
2818	fmuld	%f2,%f6,%f6
2819	ldd	[%l4+8],%f0
2820
2821	fmuld	%f10,%f14,%f14
2822	ldd	[%l5+8],%f8
2823
2824	fmuld	%f18,%f20,%f20
2825
2826	fmuld	%f26,%f30,%f30
2827	ldd	[%l7+8],%f24
2828
2829	fmuld	%f0,%f4,%f4
2830	faddd	%f32,%f6,%f6
2831
2832	fmuld	%f8,%f12,%f12
2833	faddd	%f34,%f14,%f14
2834
2835	fmuld	%f16,%f22,%f22
2836	faddd	%f20,%f36,%f20
2837	ldd	[%l6+16],%f16
2838
2839	fmuld	%f24,%f28,%f28
2840	faddd	%f38,%f30,%f30
2841
2842	faddd	%f2,%f6,%f6
2843	ldd	[%l4+16],%f32
2844
2845	faddd	%f10,%f14,%f14
2846	ldd	[%l5+16],%f34
2847
2848	fmuld	%f16,%f22,%f22
2849	faddd	%f20,%f18,%f20
2850	ldd	[%l6+8],%f36
2851
2852	faddd	%f26,%f30,%f30
2853	ldd	[%l7+16],%f38
2854
2855	fmuld	%f32,%f6,%f6
2856
2857	fmuld	%f34,%f14,%f14
2858
2859	fmuld	%f36,%f20,%f20
2860
2861	fmuld	%f38,%f30,%f30
2862
2863	faddd	%f6,%f4,%f6
2864
2865	faddd	%f14,%f12,%f14
2866
2867	fsubd	%f22,%f20,%f22
2868
2869	faddd	%f30,%f28,%f30
2870
2871	faddd	%f6,%f0,%f6
2872
2873	faddd	%f14,%f8,%f14
2874
2875	faddd	%f22,%f16,%f22
2876
2877	faddd	%f30,%f24,%f30
2878	mov	%l0,%l4
2879
2880	fnegd	%f6,%f4
2881	lda	[%i1]%asi,%l0		! preload next argument
2882
2883	fnegd	%f14,%f12
2884	lda	[%i1]%asi,%f0
2885
2886	fnegd	%f22,%f20
2887	lda	[%i1+4]%asi,%f3
2888
2889	fnegd	%f30,%f28
2890	andn	%l0,%i5,%l0
2891	add	%i1,%i2,%i1
2892
2893	andcc	%l4,2,%g0
2894	fmovdnz	%icc,%f4,%f6
2895	st	%f6,[%o0]
2896
2897	andcc	%l1,2,%g0
2898	fmovdnz	%icc,%f12,%f14
2899	st	%f14,[%o1]
2900
2901	andcc	%l2,2,%g0
2902	fmovdnz	%icc,%f20,%f22
2903	st	%f22,[%o2]
2904
2905	andcc	%l3,2,%g0
2906	fmovdnz	%icc,%f28,%f30
2907	st	%f30,[%o3]
2908
2909	addcc	%i0,-1,%i0
2910	bg,pt	%icc,.loop0
2911! delay slot
2912	st	%f7,[%o0+4]
2913
2914	ba,pt	%icc,.end
2915! delay slot
2916	nop
2917
2918	.align	16
2919.case14:
2920	fmuld	%f26,%f26,%f24
2921	andcc	%l3,1,%g0
2922	bz,pn	%icc,.case15
2923! delay slot
2924	fxor	%f30,%f38,%f38
2925
2926	fmuld	%f0,pp3,%f6		! sin(x0)
2927
2928	fmuld	%f8,pp3,%f14		! sin(x1)
2929
2930	fmuld	%f16,pp3,%f22		! sin(x2)
2931
2932	faddd	%f6,pp2,%f6
2933	fmuld	%f0,qq2,%f4
2934
2935	faddd	%f14,pp2,%f14
2936	fmuld	%f8,qq2,%f12
2937
2938	faddd	%f22,pp2,%f22
2939	fmuld	%f16,qq2,%f20
2940
2941	fmuld	%f24,qq3,%f30		! cos(x3)
2942
2943	fmuld	%f0,%f6,%f6
2944	faddd	%f4,qq1,%f4
2945
2946	fmuld	%f8,%f14,%f14
2947	faddd	%f12,qq1,%f12
2948
2949	fmuld	%f16,%f22,%f22
2950	faddd	%f20,qq1,%f20
2951
2952	faddd	%f30,qq2,%f30
2953	fmuld	%f24,pp2,%f28
2954
2955	faddd	%f6,pp1,%f6
2956	fmuld	%f0,%f4,%f4
2957	add	%l4,%g1,%l4
2958
2959	faddd	%f14,pp1,%f14
2960	fmuld	%f8,%f12,%f12
2961	add	%l5,%g1,%l5
2962
2963	faddd	%f22,pp1,%f22
2964	fmuld	%f16,%f20,%f20
2965	add	%l6,%g1,%l6
2966
2967	fmuld	%f24,%f30,%f30
2968	faddd	%f28,pp1,%f28
2969
2970	fmuld	%f0,%f6,%f6
2971
2972	fmuld	%f8,%f14,%f14
2973
2974	fmuld	%f16,%f22,%f22
2975
2976	faddd	%f30,qq1,%f30
2977	fmuld	%f24,%f28,%f28
2978	add	%l7,%g1,%l7
2979
2980	fmuld	%f2,%f6,%f6
2981	ldd	[%l4+8],%f0
2982
2983	fmuld	%f10,%f14,%f14
2984	ldd	[%l5+8],%f8
2985
2986	fmuld	%f18,%f22,%f22
2987	ldd	[%l6+8],%f16
2988
2989	fmuld	%f26,%f28,%f28
2990
2991	fmuld	%f0,%f4,%f4
2992	faddd	%f32,%f6,%f6
2993
2994	fmuld	%f8,%f12,%f12
2995	faddd	%f34,%f14,%f14
2996
2997	fmuld	%f16,%f20,%f20
2998	faddd	%f36,%f22,%f22
2999
3000	fmuld	%f24,%f30,%f30
3001	faddd	%f28,%f38,%f28
3002	ldd	[%l7+16],%f24
3003
3004	faddd	%f2,%f6,%f6
3005	ldd	[%l4+16],%f32
3006
3007	faddd	%f10,%f14,%f14
3008	ldd	[%l5+16],%f34
3009
3010	faddd	%f18,%f22,%f22
3011	ldd	[%l6+16],%f36
3012
3013	fmuld	%f24,%f30,%f30
3014	faddd	%f28,%f26,%f28
3015	ldd	[%l7+8],%f38
3016
3017	fmuld	%f32,%f6,%f6
3018
3019	fmuld	%f34,%f14,%f14
3020
3021	fmuld	%f36,%f22,%f22
3022
3023	fmuld	%f38,%f28,%f28
3024
3025	faddd	%f6,%f4,%f6
3026
3027	faddd	%f14,%f12,%f14
3028
3029	faddd	%f22,%f20,%f22
3030
3031	fsubd	%f30,%f28,%f30
3032
3033	faddd	%f6,%f0,%f6
3034
3035	faddd	%f14,%f8,%f14
3036
3037	faddd	%f22,%f16,%f22
3038
3039	faddd	%f30,%f24,%f30
3040	mov	%l0,%l4
3041
3042	fnegd	%f6,%f4
3043	lda	[%i1]%asi,%l0		! preload next argument
3044
3045	fnegd	%f14,%f12
3046	lda	[%i1]%asi,%f0
3047
3048	fnegd	%f22,%f20
3049	lda	[%i1+4]%asi,%f3
3050
3051	fnegd	%f30,%f28
3052	andn	%l0,%i5,%l0
3053	add	%i1,%i2,%i1
3054
3055	andcc	%l4,2,%g0
3056	fmovdnz	%icc,%f4,%f6
3057	st	%f6,[%o0]
3058
3059	andcc	%l1,2,%g0
3060	fmovdnz	%icc,%f12,%f14
3061	st	%f14,[%o1]
3062
3063	andcc	%l2,2,%g0
3064	fmovdnz	%icc,%f20,%f22
3065	st	%f22,[%o2]
3066
3067	andcc	%l3,2,%g0
3068	fmovdnz	%icc,%f28,%f30
3069	st	%f30,[%o3]
3070
3071	addcc	%i0,-1,%i0
3072	bg,pt	%icc,.loop0
3073! delay slot
3074	st	%f7,[%o0+4]
3075
3076	ba,pt	%icc,.end
3077! delay slot
3078	nop
3079
3080	.align	16
3081.case15:
3082	fmuld	%f0,pp3,%f6		! sin(x0)
3083
3084	fmuld	%f8,pp3,%f14		! sin(x1)
3085
3086	fmuld	%f16,pp3,%f22		! sin(x2)
3087
3088	fmuld	%f24,pp3,%f30		! sin(x3)
3089
3090	faddd	%f6,pp2,%f6
3091	fmuld	%f0,qq2,%f4
3092
3093	faddd	%f14,pp2,%f14
3094	fmuld	%f8,qq2,%f12
3095
3096	faddd	%f22,pp2,%f22
3097	fmuld	%f16,qq2,%f20
3098
3099	faddd	%f30,pp2,%f30
3100	fmuld	%f24,qq2,%f28
3101
3102	fmuld	%f0,%f6,%f6
3103	faddd	%f4,qq1,%f4
3104
3105	fmuld	%f8,%f14,%f14
3106	faddd	%f12,qq1,%f12
3107
3108	fmuld	%f16,%f22,%f22
3109	faddd	%f20,qq1,%f20
3110
3111	fmuld	%f24,%f30,%f30
3112	faddd	%f28,qq1,%f28
3113
3114	faddd	%f6,pp1,%f6
3115	fmuld	%f0,%f4,%f4
3116	add	%l4,%g1,%l4
3117
3118	faddd	%f14,pp1,%f14
3119	fmuld	%f8,%f12,%f12
3120	add	%l5,%g1,%l5
3121
3122	faddd	%f22,pp1,%f22
3123	fmuld	%f16,%f20,%f20
3124	add	%l6,%g1,%l6
3125
3126	faddd	%f30,pp1,%f30
3127	fmuld	%f24,%f28,%f28
3128	add	%l7,%g1,%l7
3129
3130	fmuld	%f0,%f6,%f6
3131
3132	fmuld	%f8,%f14,%f14
3133
3134	fmuld	%f16,%f22,%f22
3135
3136	fmuld	%f24,%f30,%f30
3137
3138	fmuld	%f2,%f6,%f6
3139	ldd	[%l4+8],%f0
3140
3141	fmuld	%f10,%f14,%f14
3142	ldd	[%l5+8],%f8
3143
3144	fmuld	%f18,%f22,%f22
3145	ldd	[%l6+8],%f16
3146
3147	fmuld	%f26,%f30,%f30
3148	ldd	[%l7+8],%f24
3149
3150	fmuld	%f0,%f4,%f4
3151	faddd	%f32,%f6,%f6
3152
3153	fmuld	%f8,%f12,%f12
3154	faddd	%f34,%f14,%f14
3155
3156	fmuld	%f16,%f20,%f20
3157	faddd	%f36,%f22,%f22
3158
3159	fmuld	%f24,%f28,%f28
3160	faddd	%f38,%f30,%f30
3161
3162	faddd	%f2,%f6,%f6
3163	ldd	[%l4+16],%f32
3164
3165	faddd	%f10,%f14,%f14
3166	ldd	[%l5+16],%f34
3167
3168	faddd	%f18,%f22,%f22
3169	ldd	[%l6+16],%f36
3170
3171	faddd	%f26,%f30,%f30
3172	ldd	[%l7+16],%f38
3173
3174	fmuld	%f32,%f6,%f6
3175
3176	fmuld	%f34,%f14,%f14
3177
3178	fmuld	%f36,%f22,%f22
3179
3180	fmuld	%f38,%f30,%f30
3181
3182	faddd	%f6,%f4,%f6
3183
3184	faddd	%f14,%f12,%f14
3185
3186	faddd	%f22,%f20,%f22
3187
3188	faddd	%f30,%f28,%f30
3189
3190	faddd	%f6,%f0,%f6
3191
3192	faddd	%f14,%f8,%f14
3193
3194	faddd	%f22,%f16,%f22
3195
3196	faddd	%f30,%f24,%f30
3197	mov	%l0,%l4
3198
3199	fnegd	%f6,%f4
3200	lda	[%i1]%asi,%l0		! preload next argument
3201
3202	fnegd	%f14,%f12
3203	lda	[%i1]%asi,%f0
3204
3205	fnegd	%f22,%f20
3206	lda	[%i1+4]%asi,%f3
3207
3208	fnegd	%f30,%f28
3209	andn	%l0,%i5,%l0
3210	add	%i1,%i2,%i1
3211
3212	andcc	%l4,2,%g0
3213	fmovdnz	%icc,%f4,%f6
3214	st	%f6,[%o0]
3215
3216	andcc	%l1,2,%g0
3217	fmovdnz	%icc,%f12,%f14
3218	st	%f14,[%o1]
3219
3220	andcc	%l2,2,%g0
3221	fmovdnz	%icc,%f20,%f22
3222	st	%f22,[%o2]
3223
3224	andcc	%l3,2,%g0
3225	fmovdnz	%icc,%f28,%f30
3226	st	%f30,[%o3]
3227
3228	addcc	%i0,-1,%i0
3229	bg,pt	%icc,.loop0
3230! delay slot
3231	st	%f7,[%o0+4]
3232
3233	ba,pt	%icc,.end
3234! delay slot
3235	nop
3236
3237
3238	.align	16
3239.end:
3240	st	%f15,[%o1+4]
3241	st	%f23,[%o2+4]
3242	st	%f31,[%o3+4]
3243	ld	[%fp+biguns],%i5
3244	tst	%i5			! check for huge arguments remaining
3245	be,pt	%icc,.exit
3246! delay slot
3247	nop
3248#ifdef __sparcv9
3249	ldx	[%fp+xsave],%o1
3250	ldx	[%fp+ysave],%o3
3251#else
3252	ld	[%fp+xsave],%o1
3253	ld	[%fp+ysave],%o3
3254#endif
3255	ld	[%fp+nsave],%o0
3256	ld	[%fp+sxsave],%o2
3257	ld	[%fp+sysave],%o4
3258	sra	%o2,0,%o2		! sign-extend for V9
3259	sra	%o4,0,%o4
3260	call	__vlibm_vsin_big_ultra3
3261	sra	%o5,0,%o5		! delay slot
3262
3263.exit:
3264	ret
3265	restore
3266
3267
3268	.align	16
3269.last1:
3270	faddd	%f2,c3two44,%f4
3271	st	%f15,[%o1+4]
3272.last1_from_range1:
3273	mov	0,%l1
3274	fzeros	%f8
3275	fzero	%f10
3276	add	%fp,junk,%o1
3277.last2:
3278	faddd	%f10,c3two44,%f12
3279	st	%f23,[%o2+4]
3280.last2_from_range2:
3281	mov	0,%l2
3282	fzeros	%f16
3283	fzero	%f18
3284	add	%fp,junk,%o2
3285.last3:
3286	faddd	%f18,c3two44,%f20
3287	st	%f31,[%o3+4]
3288	st	%f5,[%fp+nk0]
3289	st	%f13,[%fp+nk1]
3290.last3_from_range3:
3291	mov	0,%l3
3292	fzeros	%f24
3293	fzero	%f26
3294	ba,pt	%icc,.cont
3295! delay slot
3296	add	%fp,junk,%o3
3297
3298
3299	.align	16
3300.range0:
3301	cmp	%l0,%o4
3302	bl,pt	%icc,1f			! hx < 0x3e400000
3303! delay slot, harmless if branch taken
3304	sethi	%hi(0x7ff00000),%o7
3305	cmp	%l0,%o7
3306	bl,a,pt	%icc,2f			! branch if finite
3307! delay slot, squashed if branch not taken
3308	st	%o4,[%fp+biguns]	! set biguns
3309	fzero	%f0
3310	fmuld	%f2,%f0,%f2
3311	st	%f2,[%o0]
3312	ba,pt	%icc,2f
3313! delay slot
3314	st	%f3,[%o0+4]
33151:
3316	fdtoi	%f2,%f4			! raise inexact if not zero
3317	st	%f0,[%o0]
3318	st	%f3,[%o0+4]
33192:
3320	addcc	%i0,-1,%i0
3321	ble,pn	%icc,.end
3322! delay slot, harmless if branch taken
3323	add	%i3,%i4,%i3		! y += stridey
3324	andn	%l1,%i5,%l0		! hx &= ~0x80000000
3325	fmovs	%f8,%f0
3326	fmovs	%f11,%f3
3327	ba,pt	%icc,.loop0
3328! delay slot
3329	add	%i1,%i2,%i1		! x += stridex
3330
3331
3332	.align	16
3333.range1:
3334	cmp	%l1,%o4
3335	bl,pt	%icc,1f			! hx < 0x3e400000
3336! delay slot, harmless if branch taken
3337	sethi	%hi(0x7ff00000),%o7
3338	cmp	%l1,%o7
3339	bl,a,pt	%icc,2f			! branch if finite
3340! delay slot, squashed if branch not taken
3341	st	%o4,[%fp+biguns]	! set biguns
3342	fzero	%f8
3343	fmuld	%f10,%f8,%f10
3344	st	%f10,[%o1]
3345	ba,pt	%icc,2f
3346! delay slot
3347	st	%f11,[%o1+4]
33481:
3349	fdtoi	%f10,%f12		! raise inexact if not zero
3350	st	%f8,[%o1]
3351	st	%f11,[%o1+4]
33522:
3353	addcc	%i0,-1,%i0
3354	ble,pn	%icc,.last1_from_range1
3355! delay slot, harmless if branch taken
3356	add	%i3,%i4,%i3		! y += stridey
3357	andn	%l2,%i5,%l1		! hx &= ~0x80000000
3358	fmovs	%f16,%f8
3359	fmovs	%f19,%f11
3360	ba,pt	%icc,.loop1
3361! delay slot
3362	add	%i1,%i2,%i1		! x += stridex
3363
3364
3365	.align	16
3366.range2:
3367	cmp	%l2,%o4
3368	bl,pt	%icc,1f			! hx < 0x3e400000
3369! delay slot, harmless if branch taken
3370	sethi	%hi(0x7ff00000),%o7
3371	cmp	%l2,%o7
3372	bl,a,pt	%icc,2f			! branch if finite
3373! delay slot, squashed if branch not taken
3374	st	%o4,[%fp+biguns]	! set biguns
3375	fzero	%f16
3376	fmuld	%f18,%f16,%f18
3377	st	%f18,[%o2]
3378	ba,pt	%icc,2f
3379! delay slot
3380	st	%f19,[%o2+4]
33811:
3382	fdtoi	%f18,%f20		! raise inexact if not zero
3383	st	%f16,[%o2]
3384	st	%f19,[%o2+4]
33852:
3386	addcc	%i0,-1,%i0
3387	ble,pn	%icc,.last2_from_range2
3388! delay slot, harmless if branch taken
3389	add	%i3,%i4,%i3		! y += stridey
3390	andn	%l3,%i5,%l2		! hx &= ~0x80000000
3391	fmovs	%f24,%f16
3392	fmovs	%f27,%f19
3393	ba,pt	%icc,.loop2
3394! delay slot
3395	add	%i1,%i2,%i1		! x += stridex
3396
3397
3398	.align	16
3399.range3:
3400	cmp	%l3,%o4
3401	bl,pt	%icc,1f			! hx < 0x3e400000
3402! delay slot, harmless if branch taken
3403	sethi	%hi(0x7ff00000),%o7
3404	cmp	%l3,%o7
3405	bl,a,pt	%icc,2f			! branch if finite
3406! delay slot, squashed if branch not taken
3407	st	%o4,[%fp+biguns]	! set biguns
3408	fzero	%f24
3409	fmuld	%f26,%f24,%f26
3410	st	%f26,[%o3]
3411	ba,pt	%icc,2f
3412! delay slot
3413	st	%f27,[%o3+4]
34141:
3415	fdtoi	%f26,%f28		! raise inexact if not zero
3416	st	%f24,[%o3]
3417	st	%f27,[%o3+4]
34182:
3419	addcc	%i0,-1,%i0
3420	ble,pn	%icc,.last3_from_range3
3421! delay slot, harmless if branch taken
3422	add	%i3,%i4,%i3		! y += stridey
3423	ld	[%i1],%l3
3424	ld	[%i1],%f24
3425	ld	[%i1+4],%f27
3426	andn	%l3,%i5,%l3		! hx &= ~0x80000000
3427	ba,pt	%icc,.loop3
3428! delay slot
3429	add	%i1,%i2,%i1		! x += stridex
3430
3431	SET_SIZE(__vsin_ultra3)
3432
3433