xref: /illumos-gate/usr/src/lib/libmvec/common/vis/__vexpf.S (revision 6bb6b5762ca4b17cd5fb3c6c123f17489d5635aa)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vexpf.S"
30
31#include "libm.h"
32
33	RO_DATA
34	.align	64
35!!  2^(i/256) - ((i & 0xf0) << 44), i = [0, 255]
36.CONST_TBL:
37	.word	0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf
38	.word	0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281
39	.word	0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc
40	.word	0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1
41	.word	0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89
42	.word	0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836
43	.word	0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0
44	.word	0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919
45	.word	0x3fefb558, 0x6cf9890f, 0x3fefc0f1, 0x45e46c85
46	.word	0x3fefcc92, 0x2b7247f7, 0x3fefd83b, 0x23395dec
47	.word	0x3fefe3ec, 0x32d3d1a2, 0x3fefefa5, 0x5fdfa9c5
48	.word	0x3feffb66, 0xaffed31b, 0x3ff00730, 0x28d7233e
49	.word	0x3ff01301, 0xd0125b51, 0x3ff01edb, 0xab5e2ab6
50	.word	0x3ff02abd, 0xc06c31cc, 0x3ff036a8, 0x14f204ab
51	.word	0x3ff0429a, 0xaea92de0, 0x3ff04e95, 0x934f312e
52	.word	0x3ff05a98, 0xc8a58e51, 0x3ff066a4, 0x5471c3c2
53	.word	0x3fef72b8, 0x3c7d517b, 0x3fef7ed4, 0x8695bbc0
54	.word	0x3fef8af9, 0x388c8dea, 0x3fef9726, 0x58375d2f
55	.word	0x3fefa35b, 0xeb6fcb75, 0x3fefaf99, 0xf8138a1c
56	.word	0x3fefbbe0, 0x84045cd4, 0x3fefc82f, 0x95281c6b
57	.word	0x3fefd487, 0x3168b9aa, 0x3fefe0e7, 0x5eb44027
58	.word	0x3fefed50, 0x22fcd91d, 0x3feff9c1, 0x8438ce4d
59	.word	0x3ff0063b, 0x88628cd6, 0x3ff012be, 0x3578a819
60	.word	0x3ff01f49, 0x917ddc96, 0x3ff02bdd, 0xa27912d1
61	.word	0x3fef387a, 0x6e756238, 0x3fef451f, 0xfb82140a
62	.word	0x3fef51ce, 0x4fb2a63f, 0x3fef5e85, 0x711ece75
63	.word	0x3fef6b45, 0x65e27cdd, 0x3fef780e, 0x341ddf29
64	.word	0x3fef84df, 0xe1f56381, 0x3fef91ba, 0x7591bb70
65	.word	0x3fef9e9d, 0xf51fdee1, 0x3fefab8a, 0x66d10f13
66	.word	0x3fefb87f, 0xd0dad990, 0x3fefc57e, 0x39771b2f
67	.word	0x3fefd285, 0xa6e4030b, 0x3fefdf96, 0x1f641589
68	.word	0x3fefecaf, 0xa93e2f56, 0x3feff9d2, 0x4abd886b
69	.word	0x3fef06fe, 0x0a31b715, 0x3fef1432, 0xedeeb2fd
70	.word	0x3fef2170, 0xfc4cd831, 0x3fef2eb8, 0x3ba8ea32
71	.word	0x3fef3c08, 0xb26416ff, 0x3fef4962, 0x66e3fa2d
72	.word	0x3fef56c5, 0x5f929ff1, 0x3fef6431, 0xa2de883b
73	.word	0x3fef71a7, 0x373aa9cb, 0x3fef7f26, 0x231e754a
74	.word	0x3fef8cae, 0x6d05d866, 0x3fef9a40, 0x1b7140ef
75	.word	0x3fefa7db, 0x34e59ff7, 0x3fefb57f, 0xbfec6cf4
76	.word	0x3fefc32d, 0xc313a8e5, 0x3fefd0e5, 0x44ede173
77	.word	0x3feedea6, 0x4c123422, 0x3feeec70, 0xdf1c5175
78	.word	0x3feefa45, 0x04ac801c, 0x3fef0822, 0xc367a024
79	.word	0x3fef160a, 0x21f72e2a, 0x3fef23fb, 0x2709468a
80	.word	0x3fef31f5, 0xd950a897, 0x3fef3ffa, 0x3f84b9d4
81	.word	0x3fef4e08, 0x6061892d, 0x3fef5c20, 0x42a7d232
82	.word	0x3fef6a41, 0xed1d0057, 0x3fef786d, 0x668b3237
83	.word	0x3fef86a2, 0xb5c13cd0, 0x3fef94e1, 0xe192aed2
84	.word	0x3fefa32a, 0xf0d7d3de, 0x3fefb17d, 0xea6db7d7
85	.word	0x3feebfda, 0xd5362a27, 0x3feece41, 0xb817c114
86	.word	0x3feedcb2, 0x99fddd0d, 0x3feeeb2d, 0x81d8abff
87	.word	0x3feef9b2, 0x769d2ca7, 0x3fef0841, 0x7f4531ee
88	.word	0x3fef16da, 0xa2cf6642, 0x3fef257d, 0xe83f4eef
89	.word	0x3fef342b, 0x569d4f82, 0x3fef42e2, 0xf4f6ad27
90	.word	0x3fef51a4, 0xca5d920f, 0x3fef6070, 0xdde910d2
91	.word	0x3fef6f47, 0x36b527da, 0x3fef7e27, 0xdbe2c4cf
92	.word	0x3fef8d12, 0xd497c7fd, 0x3fef9c08, 0x27ff07cc
93	.word	0x3feeab07, 0xdd485429, 0x3feeba11, 0xfba87a03
94	.word	0x3feec926, 0x8a5946b7, 0x3feed845, 0x90998b93
95	.word	0x3feee76f, 0x15ad2148, 0x3feef6a3, 0x20dceb71
96	.word	0x3fef05e1, 0xb976dc09, 0x3fef152a, 0xe6cdf6f4
97	.word	0x3fef247e, 0xb03a5585, 0x3fef33dd, 0x1d1929fd
98	.word	0x3fef4346, 0x34ccc320, 0x3fef52b9, 0xfebc8fb7
99	.word	0x3fef6238, 0x82552225, 0x3fef71c1, 0xc70833f6
100	.word	0x3fef8155, 0xd44ca973, 0x3fef90f4, 0xb19e9538
101	.word	0x3feea09e, 0x667f3bcd, 0x3feeb052, 0xfa75173e
102	.word	0x3feec012, 0x750bdabf, 0x3feecfdc, 0xddd47645
103	.word	0x3feedfb2, 0x3c651a2f, 0x3feeef92, 0x98593ae5
104	.word	0x3feeff7d, 0xf9519484, 0x3fef0f74, 0x66f42e87
105	.word	0x3fef1f75, 0xe8ec5f74, 0x3fef2f82, 0x86ead08a
106	.word	0x3fef3f9a, 0x48a58174, 0x3fef4fbd, 0x35d7cbfd
107	.word	0x3fef5feb, 0x564267c9, 0x3fef7024, 0xb1ab6e09
108	.word	0x3fef8069, 0x4fde5d3f, 0x3fef90b9, 0x38ac1cf6
109	.word	0x3feea114, 0x73eb0187, 0x3feeb17b, 0x0976cfdb
110	.word	0x3feec1ed, 0x0130c132, 0x3feed26a, 0x62ff86f0
111	.word	0x3feee2f3, 0x36cf4e62, 0x3feef387, 0x8491c491
112	.word	0x3fef0427, 0x543e1a12, 0x3fef14d2, 0xadd106d9
113	.word	0x3fef2589, 0x994cce13, 0x3fef364c, 0x1eb941f7
114	.word	0x3fef471a, 0x4623c7ad, 0x3fef57f4, 0x179f5b21
115	.word	0x3fef68d9, 0x9b4492ed, 0x3fef79ca, 0xd931a436
116	.word	0x3fef8ac7, 0xd98a6699, 0x3fef9bd0, 0xa478580f
117	.word	0x3feeace5, 0x422aa0db, 0x3feebe05, 0xbad61778
118	.word	0x3feecf32, 0x16b5448c, 0x3feee06a, 0x5e0866d9
119	.word	0x3feef1ae, 0x99157736, 0x3fef02fe, 0xd0282c8a
120	.word	0x3fef145b, 0x0b91ffc6, 0x3fef25c3, 0x53aa2fe2
121	.word	0x3fef3737, 0xb0cdc5e5, 0x3fef48b8, 0x2b5f98e5
122	.word	0x3fef5a44, 0xcbc8520f, 0x3fef6bdd, 0x9a7670b3
123	.word	0x3fef7d82, 0x9fde4e50, 0x3fef8f33, 0xe47a22a2
124	.word	0x3fefa0f1, 0x70ca07ba, 0x3fefb2bb, 0x4d53fe0d
125	.word	0x3feec491, 0x82a3f090, 0x3feed674, 0x194bb8d5
126	.word	0x3feee863, 0x19e32323, 0x3feefa5e, 0x8d07f29e
127	.word	0x3fef0c66, 0x7b5de565, 0x3fef1e7a, 0xed8eb8bb
128	.word	0x3fef309b, 0xec4a2d33, 0x3fef42c9, 0x80460ad8
129	.word	0x3fef5503, 0xb23e255d, 0x3fef674a, 0x8af46052
130	.word	0x3fef799e, 0x1330b358, 0x3fef8bfe, 0x53c12e59
131	.word	0x3fef9e6b, 0x5579fdbf, 0x3fefb0e5, 0x21356eba
132	.word	0x3fefc36b, 0xbfd3f37a, 0x3fefd5ff, 0x3a3c2774
133	.word	0x3feee89f, 0x995ad3ad, 0x3feefb4c, 0xe622f2ff
134	.word	0x3fef0e07, 0x298db666, 0x3fef20ce, 0x6c9a8952
135	.word	0x3fef33a2, 0xb84f15fb, 0x3fef4684, 0x15b749b1
136	.word	0x3fef5972, 0x8de5593a, 0x3fef6c6e, 0x29f1c52a
137	.word	0x3fef7f76, 0xf2fb5e47, 0x3fef928c, 0xf22749e4
138	.word	0x3fefa5b0, 0x30a1064a, 0x3fefb8e0, 0xb79a6f1f
139	.word	0x3fefcc1e, 0x904bc1d2, 0x3fefdf69, 0xc3f3a207
140	.word	0x3feff2c2, 0x5bd71e09, 0x3ff00628, 0x6141b33d
141	.word	0x3fef199b, 0xdd85529c, 0x3fef2d1c, 0xd9fa652c
142	.word	0x3fef40ab, 0x5fffd07a, 0x3fef5447, 0x78fafb22
143	.word	0x3fef67f1, 0x2e57d14b, 0x3fef7ba8, 0x8988c933
144	.word	0x3fef8f6d, 0x9406e7b5, 0x3fefa340, 0x5751c4db
145	.word	0x3fefb720, 0xdcef9069, 0x3fefcb0f, 0x2e6d1675
146	.word	0x3fefdf0b, 0x555dc3fa, 0x3feff315, 0x5b5bab74
147	.word	0x3ff0072d, 0x4a07897c, 0x3ff01b53, 0x2b08c968
148	.word	0x3ff02f87, 0x080d89f2, 0x3ff043c8, 0xeacaa1d6
149	.word	0x3fef5818, 0xdcfba487, 0x3fef6c76, 0xe862e6d3
150	.word	0x3fef80e3, 0x16c98398, 0x3fef955d, 0x71ff6075
151	.word	0x3fefa9e6, 0x03db3285, 0x3fefbe7c, 0xd63a8315
152	.word	0x3fefd321, 0xf301b460, 0x3fefe7d5, 0x641c0658
153	.word	0x3feffc97, 0x337b9b5f, 0x3ff01167, 0x6b197d17
154	.word	0x3ff02646, 0x14f5a129, 0x3ff03b33, 0x3b16ee12
155	.word	0x3ff0502e, 0xe78b3ff6, 0x3ff06539, 0x24676d76
156	.word	0x3ff07a51, 0xfbc74c83, 0x3ff08f79, 0x77cdb740
157	.word	0x3fefa4af, 0xa2a490da, 0x3fefb9f4, 0x867cca6e
158	.word	0x3fefcf48, 0x2d8e67f1, 0x3fefe4aa, 0xa2188510
159	.word	0x3feffa1b, 0xee615a27, 0x3ff00f9c, 0x1cb6412a
160	.word	0x3ff0252b, 0x376bba97, 0x3ff03ac9, 0x48dd7274
161	.word	0x3ff05076, 0x5b6e4540, 0x3ff06632, 0x798844f8
162	.word	0x3ff07bfd, 0xad9cbe14, 0x3ff091d8, 0x02243c89
163	.word	0x3ff0a7c1, 0x819e90d8, 0x3ff0bdba, 0x3692d514
164	.word	0x3ff0d3c2, 0x2b8f71f1, 0x3ff0e9d9, 0x6b2a23d9
165
166	.word	0x7149f2ca, 0x0da24260	! 1.0e30f, 1.0e-30f
167	.word	0x3ecebfbe, 0x9d182250	! KA2 = 3.66556671660783833261e-06
168	.word	0x3f662e43, 0xe2528362	! KA1 = 2.70760782821392980564e-03
169	.word	0x40771547, 0x652b82fe	! K256ONLN2 = 369.3299304675746271
170	.word	0x42aeac4f, 0x42b17218	! THRESHOLD = 87.3365402f
171					! THRESHOLDL = 88.7228394f
172! local storage indices
173
174#define tmp0		STACK_BIAS-32
175#define tmp1		STACK_BIAS-28
176#define tmp2		STACK_BIAS-24
177#define tmp3		STACK_BIAS-20
178#define tmp4		STACK_BIAS-16
179#define tmp5		STACK_BIAS-12
180#define tmp6		STACK_BIAS-8
181#define tmp7		STACK_BIAS-4
182
183! sizeof temp storage - must be a multiple of 16 for V9
184#define tmps 		0x20
185
186#define I5_THRESHOLD	%i5
187#define G1_CONST_TBL	%g5
188#define G5_CONST	%g1
189
190#define F62_K256ONLN2	%f62
191#define F60_KA2		%f60
192#define F58_KA1		%f58
193
194#define THRESHOLDL	%f0
195
196! register use
197! i0  n
198! i1  x
199! i2  stridex
200! i3  y
201! i4  stridey
202
203! i5  0x42aeac4f (87.3365402f)
204
205! g1  CONST_TBL
206! g5  0x7fffffff
207
208! f62 K256ONLN2 = 369.3299304675746271
209! f60 KA2 = 3.66556671660783833261e-06
210! f58 KA1 = 2.70760782821392980564e-03
211
212
213!		!!!!!  Algorithm  !!!!!
214!
215!  double y, dtmp, drez;
216!  int k, sign, Xi;
217!  float X, Y;
218!  int THRESHOLD = 0x42aeac4f; /* 87.3365402f */
219!  float THRESHOLDL = 88.7228394f;
220!  double KA2 = 3.66556671660783833261e-06;
221!  double KA1 = 2.70760782821392980564e-03;
222!  double K256ONLN2 = 369.3299304675746271;
223!  char *CONST_TBL;
224!
225!  X  = px[0];
226!  Xi = ((int*)px)[0];
227!  ax = Xi & 0x7fffffff;
228!
229!  if (ax > THRESHOLD) {
230!    sign = ((unsigned)Xi >> 29) & 4;
231!    if (ax >= 0x7f800000) {      /* Inf or NaN */
232!      if (ax > 0x7f800000) {     /* NaN */
233!        Y = X * X;               /* NaN -> NaN */
234!        return Y;
235!      }
236!      Y = (sign) ? zero : X;     /* +Inf -> +Inf , -Inf -> zero */
237!      return Y;
238!    }
239!
240!    if ( X < 0.0f || X >= THRESHOLDL ) {
241!      Y = ((float*)(CONST_TBL + 2048 + sign))[0];
242!         /* Xi >= THRESHOLDL : Y = 1.0e+30f */
243!         /* Xi < -THRESHOLD  : Y = 1.0e-30f */
244!      Y =  Y * Y;
245!         /* Xi >= THRESHOLDL : +Inf + overflow  */
246!         /* Xi < -THRESHOLD  : +0 + underflow */
247!      return Y;
248!    }
249!  }
250!  vis_write_gsr(12 << 3);
251!  y = (double) X;
252!  y = K256ONLN2 * y;
253!  k = (int) y;
254!  dtmp = (double) k;
255!  y -= dtmp;
256!  dtmp = y * KA2;
257!  dtmp += KA1;
258!  y *= dtmp;
259!  y = (y * KA2 + KA1) * y;
260!  ((int*)&drez)[0] = k;
261!  ((int*)&drez)[1] = 0;
262!  ((float*)&drez)[0] = vis_fpackfix(drez);
263!  k &= 255;
264!  k <<= 3;
265!  dtmp = ((double*)(CONST_TBL + k))[0];
266!  drez = vis_fpadd32(drez,dtmp);
267!  y *= drez;
268!  y += drez;
269!  Y = (float) y;
270!
271!
272!  fstod %f16,%f40			! y = (double) X
273!  fmuld F62_K256ONLN2,%f40,%f40	! y *= K256ONLN2
274!  fdtoi %f40,%f16			! k = (int) y
275!  st  %f16,[%fp+tmp0]			! store k
276!  fitod %f16,%f34			! dtmp = (double) k
277!  fpackfix  %f16,%f16			! ((float*)&drez)[0] = vis_fpackfix(drez)
278!  fsubd %f40,%f34,%f40			! y -= dtmp
279!  fmuld F60_KA2,%f40,%f34		! dtmp = y * KA2
280!  faddd F58_KA1,%f34,%f34		! dtmp += KA1
281!  ld  [%fp+tmp0],%o0			! load k
282!  fmuld %f34,%f40,%f40			! y *= dtmp
283!  and %o0,255,%o0			! k &= 255
284!  sll  %o0,3,%o0			! k <<= 3
285!  ldd [G1_CONST_TBL+%o0],%f34		! dtmp = ((double*)(CONST_TBL + k))[0]
286!  fpadd32 %f16,%f34,%f34		! drez = vis_fpadd32(drez,dtmp)
287!  fmuld %f34,%f40,%f40			! y *= drez
288!  faddd %f34,%f40,%f40			! y += drez
289!  fdtos %f40,%f26			! (float) y
290!--------------------------------------------------------------------
291
292	ENTRY(__vexpf)
293	save	%sp,-SA(MINFRAME)-tmps,%sp
294	PIC_SETUP(l7)
295	PIC_SET(l7,.CONST_TBL,g5)
296
297	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
298	wr	%g0,0x60,%gsr
299
300	sll	%i2,2,%i2
301	sll	%i4,2,%i4
302
303	ldd	[G1_CONST_TBL+2056],F60_KA2
304	sethi	%hi(0x7ffffc00),G5_CONST
305	ldd	[G1_CONST_TBL+2064],F58_KA1
306	add	G5_CONST,1023,G5_CONST
307	ldd	[G1_CONST_TBL+2072],F62_K256ONLN2
308	ld	[G1_CONST_TBL+2080],I5_THRESHOLD
309	ld	[G1_CONST_TBL+2084],THRESHOLDL
310
311	subcc	%i0,8,%i0
312	bneg,pn	%icc,.tail
313	fzeros	%f3
314
315.main_loop_preload:
316
317! preload 8 elements and get absolute values
318	ld	[%i1],%l0		! (0) Xi = ((int*)px)[0]
319	fzeros	%f5
320	ld	[%i1],%f16		! (0) X = px[0]
321	fzeros	%f7
322	add	%i1,%i2,%o5		! px += stridex
323	ld	[%o5],%l1		! (1) Xi = ((int*)px)[0]
324	and	%l0,G5_CONST,%l0	! (0) ax = Xi & 0x7fffffff
325	fzeros	%f9
326	ld	[%o5],%f2		! (1) X = px[0]
327	fzeros	%f11
328	add	%o5,%i2,%i1		! px += stridex
329	ld	[%i1],%l2		! (2) Xi = ((int*)px)[0]
330	and	%l1,G5_CONST,%l1	! (1) ax = Xi & 0x7fffffff
331	fzeros	%f13
332	ld	[%i1],%f4		! (2) X = px[0]
333	fzeros	%f15
334	add	%i1,%i2,%o5		! px += stridex
335	ld	[%o5],%l3		! (3) Xi = ((int*)px)[0]
336	and	%l2,G5_CONST,%l2	! (2) ax = Xi & 0x7fffffff
337	fzeros	%f17
338	ld	[%o5],%f6		! (3) X = px[0]
339	add	%o5,%i2,%o0		! px += stridex
340	ld	[%o0],%l4		! (4) Xi = ((int*)px)[0]
341	and	%l3,G5_CONST,%l3	! (3) ax = Xi & 0x7fffffff
342	add	%o0,%i2,%o1		! px += stridex
343	ld	[%o1],%l5		! (5) Xi = ((int*)px)[0]
344	add	%o1,%i2,%o2		! px += stridex
345	ld	[%o2],%l6		! (6) Xi = ((int*)px)[0]
346	and	%l4,G5_CONST,%l4	! (4) ax = Xi & 0x7fffffff
347	add	%o2,%i2,%o3		! px += stridex
348	ld	[%o3],%l7		! (7) Xi = ((int*)px)[0]
349	add	%o3,%i2,%i1		! px += stridex
350	and	%l5,G5_CONST,%l5	! (5) ax = Xi & 0x7fffffff
351	and	%l6,G5_CONST,%l6	! (6) ax = Xi & 0x7fffffff
352	ba	.main_loop
353	and	%l7,G5_CONST,%l7	! (7) ax = Xi & 0x7fffffff
354
355	.align	16
356.main_loop:
357	cmp	%l0,I5_THRESHOLD
358	bg,pn	%icc,.spec0		! (0) if (ax > THRESHOLD)
359	lda	[%o0]%asi,%f8		! (4) X = px[0]
360	fstod	%f16,%f40		! (0) y = (double) X
361.spec0_cont:
362	cmp	%l1,I5_THRESHOLD
363	bg,pn	%icc,.spec1		! (1) if (ax > THRESHOLD)
364	lda	[%o1]%asi,%f10		! (5) X = px[0]
365	fstod	%f2,%f42		! (1) y = (double) X
366.spec1_cont:
367	cmp	%l2,I5_THRESHOLD
368	bg,pn	%icc,.spec2		! (2) if (ax > THRESHOLD)
369	lda	[%o2]%asi,%f12		! (6) X = px[0]
370	fstod	%f4,%f44		! (2) y = (double) X
371.spec2_cont:
372	cmp	%l3,I5_THRESHOLD
373	bg,pn	%icc,.spec3		! (3) if (ax > THRESHOLD)
374	lda	[%o3]%asi,%f14		! (7) X = px[0]
375	fstod	%f6,%f46		! (3) y = (double) X
376.spec3_cont:
377	cmp	%l4,I5_THRESHOLD
378	bg,pn	%icc,.spec4		! (4) if (ax > THRESHOLD)
379	fmuld	F62_K256ONLN2,%f40,%f40	! (0) y *= K256ONLN2
380	fstod	%f8,%f48		! (4) y = (double) X
381.spec4_cont:
382	cmp	%l5,I5_THRESHOLD
383	bg,pn	%icc,.spec5		! (5) if (ax > THRESHOLD)
384	fmuld	F62_K256ONLN2,%f42,%f42	! (1) y *= K256ONLN2
385	fstod	%f10,%f50		! (5) y = (double) X
386.spec5_cont:
387	cmp	%l6,I5_THRESHOLD
388	bg,pn	%icc,.spec6		! (6) if (ax > THRESHOLD)
389	fmuld	F62_K256ONLN2,%f44,%f44	! (2) y *= K256ONLN2
390	fstod	%f12,%f52		! (6) y = (double) X
391.spec6_cont:
392	cmp	%l7,I5_THRESHOLD
393	bg,pn	%icc,.spec7		! (7) if (ax > THRESHOLD)
394	fmuld	F62_K256ONLN2,%f46,%f46	! (3) y *= K256ONLN2
395	fstod	%f14,%f54		! (7) y = (double) X
396.spec7_cont:
397	fdtoi	%f40,%f16		! (0) k = (int) y
398	st	%f16,[%fp+tmp0]
399	fmuld	F62_K256ONLN2,%f48,%f48	! (4) y *= K256ONLN2
400
401	fdtoi	%f42,%f2		! (1) k = (int) y
402	st	%f2,[%fp+tmp1]
403	fmuld	F62_K256ONLN2,%f50,%f50	! (5) y *= K256ONLN2
404
405	fdtoi	%f44,%f4		! (2) k = (int) y
406	st	%f4,[%fp+tmp2]
407	fmuld	F62_K256ONLN2,%f52,%f52	! (6) y *= K256ONLN2
408
409	fdtoi	%f46,%f6		! (3) k = (int) y
410	st	%f6,[%fp+tmp3]
411	fmuld	F62_K256ONLN2,%f54,%f54	! (7) y *= K256ONLN2
412
413	fdtoi	%f48,%f8		! (4) k = (int) y
414	st	%f8,[%fp+tmp4]
415
416	fdtoi	%f50,%f10		! (5) k = (int) y
417	st	%f10,[%fp+tmp5]
418
419	fitod	%f16,%f34		! (0) dtmp = (double) k
420	fpackfix	%f16,%f16	! (0) ((float*)&drez)[0] = vis_fpackfix(drez)
421	nop
422	nop
423
424	fdtoi	%f52,%f12		! (6) k = (int) y
425	st	%f12,[%fp+tmp6]
426
427	fdtoi	%f54,%f14		! (7) k = (int) y
428	st	%f14,[%fp+tmp7]
429
430	lda	[%i1]%asi,%l0		! (8) Xi = ((int*)px)[0]
431	add	%i1,%i2,%o5		! px += stridex
432	fitod	%f2,%f18		! (1) dtmp = (double) k
433	fpackfix	%f2,%f2		! (1) ((float*)&drez)[0] = vis_fpackfix(drez)
434
435	lda	[%o5]%asi,%l1		! (9) Xi = ((int*)px)[0]
436	add	%o5,%i2,%i1		! px += stridex
437	fitod	%f4,%f20		! (2) dtmp = (double) k
438	fpackfix	%f4,%f4		! (2) ((float*)&drez)[0] = vis_fpackfix(drez)
439
440	lda	[%i1]%asi,%l2		! (10) Xi = ((int*)px)[0]
441	add	%i1,%i2,%o5		! px += stridex
442	fitod	%f6,%f22		! (3) dtmp = (double) k
443	fpackfix	%f6,%f6		! (3) ((float*)&drez)[0] = vis_fpackfix(drez)
444
445	lda	[%o5]%asi,%l3		! (11) Xi = ((int*)px)[0]
446	add	%o5,%i2,%i1		! px += stridex
447	fitod	%f8,%f24		! (4) dtmp = (double) k
448	fpackfix	%f8,%f8		! (4) ((float*)&drez)[0] = vis_fpackfix(drez)
449
450	fitod	%f10,%f26		! (5) dtmp = (double) k
451	fpackfix	%f10,%f10	! (5) ((float*)&drez)[0] = vis_fpackfix(drez)
452
453	fitod	%f12,%f28		! (6) dtmp = (double) k
454	fpackfix	%f12,%f12	! (6) ((float*)&drez)[0] = vis_fpackfix(drez)
455
456	fitod	%f14,%f30		! (7) dtmp = (double) k
457	fpackfix	%f14,%f14	! (7) ((float*)&drez)[0] = vis_fpackfix(drez)
458
459	ld	[%fp+tmp0],%o0		! (0) load k
460	and	%l0,G5_CONST,%l0	! (8) ax = Xi & 0x7fffffff
461	fsubd	%f40,%f34,%f40		! (0) y -= dtmp
462
463	ld	[%fp+tmp1],%o1		! (1) load k
464	and	%l1,G5_CONST,%l1	! (9) ax = Xi & 0x7fffffff
465	fsubd	%f42,%f18,%f42		! (1) y -= dtmp
466
467	ld	[%fp+tmp2],%o2		! (2) load k
468	and	%l2,G5_CONST,%l2	! (10) ax = Xi & 0x7fffffff
469	and	%o0,255,%o0		! (0) k &= 255
470	fsubd	%f44,%f20,%f44		! (2) y -= dtmp
471
472	ld	[%fp+tmp3],%o3		! (3) load k
473	and	%o1,255,%o1		! (1) k &= 255
474	fsubd	%f46,%f22,%f46		! (3) y -= dtmp
475
476	sll	%o0,3,%o0		! (0) k <<= 3
477	sll	%o1,3,%o1		! (1) k <<= 3
478	fmuld	F60_KA2,%f40,%f34	! (0) dtmp = y * KA2
479	fsubd	%f48,%f24,%f48		! (4) y -= dtmp
480
481	and	%l3,G5_CONST,%l3	! (11) ax = Xi & 0x7fffffff
482	and	%o2,255,%o2		! (2) k &= 255
483	fmuld	F60_KA2,%f42,%f18	! (1) dtmp = y * KA2
484	fsubd	%f50,%f26,%f50		! (5) y -= dtmp
485
486	sll	%o2,3,%o2		! (2) k <<= 3
487	fmuld	F60_KA2,%f44,%f20	! (2) dtmp = y * KA2
488	fsubd	%f52,%f28,%f52		! (6) y -= dtmp
489
490	ld	[%fp+tmp4],%o4		! (4) load k
491	and	%o3,255,%o3		! (3) k &= 255
492	fmuld	F60_KA2,%f46,%f22	! (3) dtmp = y * KA2
493	fsubd	%f54,%f30,%f54		! (7) y -= dtmp
494
495	ld	[%fp+tmp5],%o5		! (5) load k
496	sll	%o3,3,%o3		! (3) k <<= 3
497	fmuld	F60_KA2,%f48,%f24	! (4) dtmp = y * KA2
498	faddd	F58_KA1,%f34,%f34	! (0) dtmp += KA1
499
500	ld	[%fp+tmp6],%o7		! (6) load k
501	and	%o4,255,%o4		! (4) k &= 255
502	fmuld	F60_KA2,%f50,%f26	! (5) dtmp = y * KA2
503	faddd	F58_KA1,%f18,%f18	! (1) dtmp += KA1
504
505	ld	[%fp+tmp7],%l4		! (7) load k
506	and	%o5,255,%o5		! (5) k &= 255
507	fmuld	F60_KA2,%f52,%f28	! (6) dtmp = y * KA2
508	faddd	F58_KA1,%f20,%f20	! (2) dtmp += KA1
509
510	sll	%o5,3,%o5		! (5) k <<= 3
511	fmuld	F60_KA2,%f54,%f30	! (7) dtmp = y * KA2
512	faddd	F58_KA1,%f22,%f22	! (3) dtmp += KA1
513
514	fmuld	%f34,%f40,%f40		! (0) y *= dtmp
515	ldd	[G1_CONST_TBL+%o0],%f34	! (0) dtmp = ((double*)(CONST_TBL + k))[0]
516	and	%l4,255,%l4		! (7) k &= 255
517	faddd	F58_KA1,%f24,%f24	! (4) dtmp += KA1
518
519	fmuld	%f18,%f42,%f42		! (1) y *= dtmp
520	ldd	[G1_CONST_TBL+%o1],%f18	! (1) dtmp = ((double*)(CONST_TBL + k))[0]
521	sll	%l4,3,%l4		! (7) k <<= 3
522	faddd	F58_KA1,%f26,%f26	! (5) dtmp += KA1
523
524	fmuld	%f20,%f44,%f44		! (2) y *= dtmp
525	ldd	[G1_CONST_TBL+%o2],%f20	! (2) dtmp = ((double*)(CONST_TBL + k))[0]
526	faddd	F58_KA1,%f28,%f28	! (6) dtmp += KA1
527
528	fmuld	%f22,%f46,%f46		! (3) y *= dtmp
529	ldd	[G1_CONST_TBL+%o3],%f22	! (3) dtmp = ((double*)(CONST_TBL + k))[0]
530	sll	%o4,3,%o4		! (4) k <<= 3
531	faddd	F58_KA1,%f30,%f30	! (7) dtmp += KA1
532
533	fmuld	%f24,%f48,%f48		! (4) y *= dtmp
534	ldd	[G1_CONST_TBL+%o4],%f24	! (4) dtmp = ((double*)(CONST_TBL + k))[0]
535	and	%o7,255,%o7		! (6) k &= 255
536	fpadd32 %f16,%f34,%f34		! (0) drez = vis_fpadd32(drez,dtmp)
537
538	fmuld	%f26,%f50,%f50		! (5) y *= dtmp
539	ldd	[G1_CONST_TBL+%o5],%f26	! (5) dtmp = ((double*)(CONST_TBL + k))[0]
540	sll	%o7,3,%o7		! (6) k <<= 3
541	fpadd32 %f2,%f18,%f18		! (1) drez = vis_fpadd32(drez,dtmp)
542
543	fmuld	%f28,%f52,%f52		! (6) y *= dtmp
544	ldd	[G1_CONST_TBL+%o7],%f28	! (6) dtmp = ((double*)(CONST_TBL + k))[0]
545	sll	%i2,2,%o0
546	fpadd32 %f4,%f20,%f20		! (2) drez = vis_fpadd32(drez,dtmp)
547
548	fmuld	%f30,%f54,%f54		! (7) y *= dtmp
549	ldd	[G1_CONST_TBL+%l4],%f30	! (7) dtmp = ((double*)(CONST_TBL + k))[0]
550	sub	%i1,%o0,%o0
551	fpadd32 %f6,%f22,%f22		! (3) drez = vis_fpadd32(drez,dtmp)
552
553	lda	[%i1]%asi,%l4		! (12) Xi = ((int*)px)[0]
554	add	%i1,%i2,%o1		! px += stridex
555	fpadd32 %f8,%f24,%f24		! (4) drez = vis_fpadd32(drez,dtmp)
556	fmuld	%f34,%f40,%f40		! (0) y *= drez
557
558	lda	[%o1]%asi,%l5		! (13) Xi = ((int*)px)[0]
559	add	%o1,%i2,%o2		! px += stridex
560	fpadd32 %f10,%f26,%f26		! (5)  drez = vis_fpadd32(drez,dtmp)
561	fmuld	%f18,%f42,%f42		! (1)  y *= drez
562
563	lda	[%o2]%asi,%l6		! (14) Xi = ((int*)px)[0]
564	add	%o2,%i2,%o3		! px += stridex
565	fpadd32 %f12,%f28,%f28		! (6)  drez = vis_fpadd32(drez,dtmp)
566	fmuld	%f20,%f44,%f44		! (2)  y *= drez
567
568	lda	[%o3]%asi,%l7		! (15) Xi = ((int*)px)[0]
569	add	%o3,%i2,%i1		! px += stridex
570	fpadd32 %f14,%f30,%f30		! (7)  drez = vis_fpadd32(drez,dtmp)
571	fmuld	%f22,%f46,%f46		! (3)  y *= drez
572
573	lda	[%o0]%asi,%f16		! (8)  X = px[0]
574	add	%o0,%i2,%o5
575	fmuld	%f24,%f48,%f48		! (4)  y *= drez
576	faddd	%f34,%f40,%f40		! (0)  y += drez
577
578	lda	[%o5]%asi,%f2		! (9)  X = px[0]
579	add	%o5,%i2,%o0
580	fmuld	%f26,%f50,%f50		! (5)  y *= drez
581	faddd	%f18,%f42,%f42		! (1)  y += drez
582
583	lda	[%o0]%asi,%f4		! (10) X = px[0]
584	add	%o0,%i2,%o5
585	fmuld	%f28,%f52,%f52		! (6)  y *= drez
586	faddd	%f20,%f44,%f44		! (2)  y += drez
587
588	lda	[%o5]%asi,%f6		! (11) X = px[0]
589	add	%o5,%i2,%o0
590	fmuld	%f30,%f54,%f54		! (7)  y *= drez
591	faddd	%f22,%f46,%f46		! (3)  y += drez
592
593	and	%l4,G5_CONST,%l4	! (12) ax = Xi & 0x7fffffff
594	faddd	%f24,%f48,%f48		! (4)  y += drez
595
596	and	%l5,G5_CONST,%l5	! (13) ax = Xi & 0x7fffffff
597	faddd	%f26,%f50,%f50		! (5)  y += drez
598
599	and	%l6,G5_CONST,%l6	! (14) ax = Xi & 0x7fffffff
600	faddd	%f28,%f52,%f52		! (6)  y += drez
601
602	and	%l7,G5_CONST,%l7	! (15) ax = Xi & 0x7fffffff
603	faddd	%f30,%f54,%f54		! (7)  y += drez
604
605	fdtos	%f40,%f26		! (0) (float) y
606	st	%f26,[%i3]
607	add	%i3,%i4,%o4		! py += stridey
608
609	fdtos	%f42,%f18		! (1) (float) y
610	st	%f18,[%o4]
611	add	%o4,%i4,%i3		! py += stridey
612
613	fdtos	%f44,%f20		! (2) (float) y
614	st	%f20,[%i3]
615	add	%i3,%i4,%o4		! py += stridey
616
617	fdtos	%f46,%f22		! (3) (float) y
618	st	%f22,[%o4]
619	add	%o4,%i4,%i3		! py += stridey
620
621	fdtos	%f48,%f24		! (4) (float) y
622	st	%f24,[%i3]
623	subcc	%i0,8,%i0
624	add	%i3,%i4,%o4		! py += stridey
625
626	fdtos	%f50,%f26		! (5) (float) y
627	st	%f26,[%o4]
628	add	%o4,%i4,%o5		! py += stridey
629	add	%i4,%i4,%o7
630
631	fdtos	%f52,%f28		! (6) (float) y
632	st	%f28,[%o5]
633	add	%o5,%i4,%o4		! py += stridey
634	add	%o5,%o7,%i3		! py += stridey
635
636	fdtos	%f54,%f30		! (7) (float) y
637	st	%f30,[%o4]
638	bpos,pt	%icc,.main_loop
639	nop
640.after_main_loop:
641	sll	%i2,3,%o2
642	sub	%i1,%o2,%i1
643
644.tail:
645	add	%i0,8,%i0
646	subcc	%i0,1,%i0
647	bneg,pn	%icc,.exit
648
649	ld	[%i1],%l0
650	ld	[%i1],%f2
651	add	%i1,%i2,%i1
652
653.tail_loop:
654	and	%l0,G5_CONST,%l1
655	cmp	%l1,I5_THRESHOLD
656	bg,pn	%icc,.tail_spec
657	nop
658.tail_spec_cont:
659	fstod	%f2,%f40
660	fmuld	F62_K256ONLN2,%f40,%f40
661	fdtoi	%f40,%f2
662	st	%f2,[%fp+tmp0]
663	fitod	%f2,%f16
664	fpackfix	%f2,%f2
665	fsubd	%f40,%f16,%f40
666	fmuld	F60_KA2,%f40,%f16
667	faddd	F58_KA1,%f16,%f16
668	ld	[%fp+tmp0],%o0
669	fmuld	%f16,%f40,%f40
670	and	%o0,255,%o0
671	sll	%o0,3,%o0
672	ldd	[G1_CONST_TBL+%o0],%f16
673	fpadd32 %f2,%f16,%f16
674	lda	[%i1]%asi,%l0
675	fmuld	%f16,%f40,%f40
676	lda	[%i1]%asi,%f2
677	faddd	%f16,%f40,%f40
678	add	%i1,%i2,%i1
679	fdtos	%f40,%f16
680	st	%f16,[%i3]
681	add	%i3,%i4,%i3
682	subcc	%i0,1,%i0
683	bpos,pt	%icc,.tail_loop
684	nop
685
686.exit:
687	ret
688	restore
689
690.tail_spec:
691	sethi	%hi(0x7f800000),%o4
692	cmp	%l1,%o4
693	bl,pt	%icc,.tail_spec_out_of_range
694	nop
695
696	srl	%l0,29,%l0
697	ble,pn	%icc,.tail_spec_inf
698	andcc	%l0,4,%g0
699
700! NaN -> NaN
701
702	fmuls	%f2,%f2,%f2
703	ba	.tail_spec_exit
704	st	%f2,[%i3]
705
706.tail_spec_inf:
707	be,a,pn	%icc,.tail_spec_exit
708	st	%f2,[%i3]
709
710	ba	.tail_spec_exit
711	st	%f3,[%i3]
712
713.tail_spec_out_of_range:
714	fcmpes	%fcc0,%f2,%f3
715	fcmpes	%fcc1,%f2,THRESHOLDL
716	fbl,pn	%fcc0,1f		! if ( X < 0.0f )
717	nop
718	fbl,pt	%fcc1,.tail_spec_cont	! if ( X < THRESHOLDL )
719	nop
7201:
721	srl	%l0,29,%l0
722	and	%l0,4,%l0
723	add	%l0,2048,%l0
724	ld	[G1_CONST_TBL+%l0],%f2
725	fmuls	%f2,%f2,%f2
726	st	%f2,[%i3]
727
728.tail_spec_exit:
729	lda	[%i1]%asi,%l0
730	lda	[%i1]%asi,%f2
731	add	%i1,%i2,%i1
732
733	subcc	%i0,1,%i0
734	bpos,pt	%icc,.tail_loop
735	add	%i3,%i4,%i3
736	ba	.exit
737	nop
738
739	.align	16
740.spec0:
741	sethi	%hi(0x7f800000),%o5
742	cmp	%l0,%o5
743	bl,pt	%icc,.spec0_out_of_range
744	sll	%i2,3,%o4
745
746	ble,pn	%icc,.spec0_inf
747	sub	%i1,%o4,%o4
748
749! NaN -> NaN
750
751	fmuls	%f16,%f16,%f16
752	ba	.spec0_exit
753	st	%f16,[%i3]
754
755.spec0_inf:
756	ld	[%o4],%l0
757	srl	%l0,29,%l0
758	andcc	%l0,4,%l0
759	be,a,pn	%icc,.spec0_exit
760	st	%f16,[%i3]
761
762	ba	.spec0_exit
763	st	%f3,[%i3]
764
765.spec0_out_of_range:
766	fcmpes	%fcc0,%f16,%f3
767	fcmpes	%fcc1,%f16,THRESHOLDL
768	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
769	fstod	%f16,%f40			! (0) y = (double) X
770	fbl,a,pt	%fcc1,.spec0_cont	! if ( X < THRESHOLDL )
771	fstod	%f16,%f40			! (0) y = (double) X
7721:
773	sub	%i1,%o4,%o4
774	ld	[%o4],%l0
775	srl	%l0,29,%l0
776	and	%l0,4,%l0
777	add	%l0,2048,%l0
778	ld	[G1_CONST_TBL+%l0],%f16
779	fmuls	%f16,%f16,%f16
780	st	%f16,[%i3]
781
782.spec0_exit:
783	fmovs	%f2,%f16
784	mov	%l1,%l0
785	fmovs	%f4,%f2
786	mov	%l2,%l1
787	fmovs	%f6,%f4
788	mov	%l3,%l2
789	fmovs	%f8,%f6
790	mov	%l4,%l3
791	mov	%l5,%l4
792	mov	%l6,%l5
793	mov	%l7,%l6
794	lda	[%i1]%asi,%l7
795	add	%i1,%i2,%i1
796	mov	%o1,%o0
797	mov	%o2,%o1
798	mov	%o3,%o2
799	and	%l7,G5_CONST,%l7
800	add	%o2,%i2,%o3
801
802	subcc	%i0,1,%i0
803	bpos,pt	%icc,.main_loop
804	add	%i3,%i4,%i3
805	ba	.after_main_loop
806	nop
807
808	.align	16
809.spec1:
810	sethi	%hi(0x7f800000),%o5
811	cmp	%l1,%o5
812	bge,pn	%icc,1f
813	nop
814	fcmpes	%fcc0,%f2,%f3
815	fcmpes	%fcc1,%f2,THRESHOLDL
816	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
817	fstod	%f2,%f42			! (1) y = (double) X
818	fbl,a,pt	%fcc1,.spec1_cont	! if ( X < THRESHOLDL )
819	fstod	%f2,%f42			! (1) y = (double) X
8201:
821	fmuld	F62_K256ONLN2,%f40,%f40
822	fdtoi	%f40,%f16
823	st	%f16,[%fp+tmp0]
824	fitod	%f16,%f34
825	fpackfix	%f16,%f16
826	fsubd	%f40,%f34,%f40
827	fmuld	F60_KA2,%f40,%f34
828	faddd	F58_KA1,%f34,%f34
829	ld	[%fp+tmp0],%o0
830	fmuld	%f34,%f40,%f40
831	and	%o0,255,%o0
832	sll	%o0,3,%o0
833	ldd	[G1_CONST_TBL+%o0],%f34
834	fpadd32 %f16,%f34,%f34
835	fmuld	%f34,%f40,%f40
836	faddd	%f34,%f40,%f40
837	fdtos	%f40,%f26
838	st	%f26,[%i3]
839	add	%i3,%i4,%i3
840
841	cmp	%l1,%o5
842	bl,pt	%icc,.spec1_out_of_range
843	sll	%i2,3,%o4
844
845	ble,pn	%icc,.spec1_inf
846	sub	%i1,%o4,%o4
847
848! NaN -> NaN
849
850	fmuls	%f2,%f2,%f2
851	ba	.spec1_exit
852	st	%f2,[%i3]
853
854.spec1_inf:
855	add	%o4,%i2,%o4
856	ld	[%o4],%l0
857	srl	%l0,29,%l0
858	andcc	%l0,4,%l0
859	be,a,pn	%icc,.spec1_exit
860	st	%f2,[%i3]
861
862	ba	.spec1_exit
863	st	%f3,[%i3]
864
865.spec1_out_of_range:
866	sub	%i1,%o4,%o4
867	add	%o4,%i2,%o4
868	ld	[%o4],%l0
869	srl	%l0,29,%l0
870	and	%l0,4,%l0
871	add	%l0,2048,%l0
872	ld	[G1_CONST_TBL+%l0],%f2
873	fmuls	%f2,%f2,%f2
874	st	%f2,[%i3]
875
876.spec1_exit:
877	fmovs	%f4,%f16
878	mov	%l2,%l0
879	fmovs	%f6,%f2
880	mov	%l3,%l1
881	fmovs	%f8,%f4
882	mov	%l4,%l2
883	fmovs	%f10,%f6
884	mov	%l5,%l3
885	mov	%l6,%l4
886	mov	%l7,%l5
887	lda	[%i1]%asi,%l6
888	add	%i1,%i2,%i1
889	lda	[%i1]%asi,%l7
890	add	%i1,%i2,%i1
891	and	%l6,G5_CONST,%l6
892	and	%l7,G5_CONST,%l7
893	mov	%o2,%o0
894	mov	%o3,%o1
895	add	%o1,%i2,%o2
896	add	%o2,%i2,%o3
897
898	subcc	%i0,2,%i0
899	bpos,pt	%icc,.main_loop
900	add	%i3,%i4,%i3
901	ba	.after_main_loop
902	nop
903
904	.align	16
905.spec2:
906	sethi	%hi(0x7f800000),%o5
907	cmp	%l2,%o5
908	bge,pn	%icc,1f
909	nop
910	fcmpes	%fcc0,%f4,%f3
911	fcmpes	%fcc1,%f4,THRESHOLDL
912	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
913	fstod	%f4,%f44			! (2) y = (double) X
914	fbl,a,pt	%fcc1,.spec2_cont	! if ( X < THRESHOLDL )
915	fstod	%f4,%f44			! (2) y = (double) X
9161:
917	fmuld	F62_K256ONLN2,%f40,%f40
918
919	fmuld	F62_K256ONLN2,%f42,%f42
920
921	fdtoi	%f40,%f16
922	st	%f16,[%fp+tmp0]
923
924	fdtoi	%f42,%f2
925	st	%f2,[%fp+tmp1]
926
927	fitod	%f16,%f34
928	fpackfix	%f16,%f16
929
930	fitod	%f2,%f18
931	fpackfix	%f2,%f2
932
933	fsubd	%f40,%f34,%f40
934
935	fsubd	%f42,%f18,%f42
936
937	fmuld	F60_KA2,%f40,%f34
938
939	fmuld	F60_KA2,%f42,%f18
940
941	faddd	F58_KA1,%f34,%f34
942
943	faddd	F58_KA1,%f18,%f18
944
945	ld	[%fp+tmp0],%o0
946	fmuld	%f34,%f40,%f40
947
948	ld	[%fp+tmp1],%o1
949	fmuld	%f18,%f42,%f42
950
951	and	%o0,255,%o0
952
953	and	%o1,255,%o1
954
955	sll	%o0,3,%o0
956
957	sll	%o1,3,%o1
958
959	ldd	[G1_CONST_TBL+%o0],%f34
960
961	ldd	[G1_CONST_TBL+%o1],%f18
962
963	fpadd32 %f16,%f34,%f34
964
965	fpadd32 %f2,%f18,%f18
966
967	fmuld	%f34,%f40,%f40
968
969	fmuld	%f18,%f42,%f42
970
971	faddd	%f34,%f40,%f40
972
973	faddd	%f18,%f42,%f42
974
975	fdtos	%f40,%f26
976	st	%f26,[%i3]
977	add	%i3,%i4,%o4
978
979	fdtos	%f42,%f18
980	st	%f18,[%o4]
981	add	%o4,%i4,%i3
982
983	cmp	%l2,%o5
984	sll	%i2,1,%o5
985	bl,pt	%icc,.spec2_out_of_range
986	sll	%i2,2,%o4
987
988	ble,pn	%icc,.spec2_inf
989	add	%o4,%o5,%o4
990
991! NaN -> NaN
992
993	fmuls	%f4,%f4,%f4
994	ba	.spec2_exit
995	st	%f4,[%i3]
996
997.spec2_inf:
998	sub	%i1,%o4,%o4
999	ld	[%o4],%l0
1000	srl	%l0,29,%l0
1001	andcc	%l0,4,%l0
1002	be,a,pn	%icc,.spec2_exit
1003	st	%f4,[%i3]
1004
1005	ba	.spec2_exit
1006	st	%f3,[%i3]
1007
1008.spec2_out_of_range:
1009	add	%o4,%o5,%o4
1010	sub	%i1,%o4,%o4
1011	ld	[%o4],%l0
1012	srl	%l0,29,%l0
1013	and	%l0,4,%l0
1014	add	%l0,2048,%l0
1015	ld	[G1_CONST_TBL+%l0],%f2
1016	fmuls	%f2,%f2,%f2
1017	st	%f2,[%i3]
1018
1019.spec2_exit:
1020	fmovs	%f6,%f16
1021	mov	%l3,%l0
1022	mov	%o3,%o0
1023	fmovs	%f8,%f2
1024	mov	%l4,%l1
1025	add	%o0,%i2,%o1
1026	fmovs	%f10,%f4
1027	mov	%l5,%l2
1028	add	%o1,%i2,%o2
1029	fmovs	%f12,%f6
1030	mov	%l6,%l3
1031	mov	%l7,%l4
1032	lda	[%i1]%asi,%l5
1033	add	%i1,%i2,%i1
1034	add	%o2,%i2,%o3
1035	lda	[%i1]%asi,%l6
1036	add	%i1,%i2,%i1
1037	lda	[%i1]%asi,%l7
1038	add	%i1,%i2,%i1
1039	and	%l5,G5_CONST,%l5
1040	and	%l6,G5_CONST,%l6
1041	and	%l7,G5_CONST,%l7
1042
1043	subcc	%i0,3,%i0
1044	bpos,pt	%icc,.main_loop
1045	add	%i3,%i4,%i3
1046	ba	.after_main_loop
1047	nop
1048.spec3:
1049	sethi	%hi(0x7f800000),%o5
1050	cmp	%l3,%o5
1051	bge,pn	%icc,1f
1052	nop
1053	fcmpes	%fcc0,%f6,%f3
1054	fcmpes	%fcc1,%f6,THRESHOLDL
1055	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
1056	fstod	%f6,%f46			! (3) y = (double) X
1057	fbl,a,pt	%fcc1,.spec3_cont	! if ( X < THRESHOLDL )
1058	fstod	%f6,%f46			! (3) y = (double) X
10591:
1060	fmuld	F62_K256ONLN2,%f40,%f40
1061
1062	fmuld	F62_K256ONLN2,%f42,%f42
1063
1064	fmuld	F62_K256ONLN2,%f44,%f44
1065
1066	fdtoi	%f40,%f16
1067	st	%f16,[%fp+tmp0]
1068
1069	fdtoi	%f42,%f2
1070	st	%f2,[%fp+tmp1]
1071
1072	fdtoi	%f44,%f4
1073	st	%f4,[%fp+tmp2]
1074
1075	fitod	%f16,%f34
1076	fpackfix	%f16,%f16
1077
1078	fitod	%f2,%f18
1079	fpackfix	%f2,%f2
1080
1081	fitod	%f4,%f20
1082	fpackfix	%f4,%f4
1083
1084	fsubd	%f40,%f34,%f40
1085
1086	fsubd	%f42,%f18,%f42
1087
1088	fsubd	%f44,%f20,%f44
1089
1090	fmuld	F60_KA2,%f40,%f34
1091
1092	fmuld	F60_KA2,%f42,%f18
1093
1094	fmuld	F60_KA2,%f44,%f20
1095
1096	faddd	F58_KA1,%f34,%f34
1097
1098	faddd	F58_KA1,%f18,%f18
1099
1100	faddd	F58_KA1,%f20,%f20
1101
1102	ld	[%fp+tmp0],%o0
1103	fmuld	%f34,%f40,%f40
1104
1105	ld	[%fp+tmp1],%o1
1106	fmuld	%f18,%f42,%f42
1107
1108	ld	[%fp+tmp2],%o2
1109	fmuld	%f20,%f44,%f44
1110
1111	and	%o0,255,%o0
1112	and	%o1,255,%o1
1113
1114	and	%o2,255,%o2
1115	sll	%o0,3,%o0
1116
1117	sll	%o1,3,%o1
1118	sll	%o2,3,%o2
1119
1120	ldd	[G1_CONST_TBL+%o0],%f34
1121
1122	ldd	[G1_CONST_TBL+%o1],%f18
1123
1124	ldd	[G1_CONST_TBL+%o2],%f20
1125
1126	fpadd32 %f16,%f34,%f34
1127
1128	fpadd32 %f2,%f18,%f18
1129
1130	fpadd32 %f4,%f20,%f20
1131
1132	fmuld	%f34,%f40,%f40
1133
1134	fmuld	%f18,%f42,%f42
1135
1136	fmuld	%f20,%f44,%f44
1137
1138	faddd	%f34,%f40,%f40
1139
1140	faddd	%f18,%f42,%f42
1141
1142	faddd	%f20,%f44,%f44
1143
1144	fdtos	%f40,%f26
1145	st	%f26,[%i3]
1146	add	%i3,%i4,%o4
1147
1148	fdtos	%f42,%f18
1149	st	%f18,[%o4]
1150	add	%o4,%i4,%i3
1151
1152	fdtos	%f44,%f20
1153	st	%f20,[%i3]
1154	add	%i3,%i4,%i3
1155
1156	cmp	%l3,%o5
1157	bl,pt	%icc,.spec3_out_of_range
1158	sll	%i2,2,%o4
1159
1160	ble,pn	%icc,.spec3_inf
1161	add	%o4,%i2,%o4
1162
1163! NaN -> NaN
1164
1165	fmuls	%f6,%f6,%f6
1166	ba	.spec3_exit
1167	st	%f6,[%i3]
1168
1169.spec3_inf:
1170	sub	%i1,%o4,%o4
1171	ld	[%o4],%l0
1172	srl	%l0,29,%l0
1173	andcc	%l0,4,%l0
1174	be,a,pn	%icc,.spec3_exit
1175	st	%f6,[%i3]
1176
1177	ba	.spec3_exit
1178	st	%f3,[%i3]
1179
1180.spec3_out_of_range:
1181	add	%o4,%i2,%o4
1182	sub	%i1,%o4,%o4
1183	ld	[%o4],%l0
1184	srl	%l0,29,%l0
1185	and	%l0,4,%l0
1186	add	%l0,2048,%l0
1187	ld	[G1_CONST_TBL+%l0],%f2
1188	fmuls	%f2,%f2,%f2
1189	st	%f2,[%i3]
1190
1191.spec3_exit:
1192	fmovs	%f8,%f16
1193	mov	%l4,%l0
1194	fmovs	%f10,%f2
1195	mov	%l5,%l1
1196	fmovs	%f12,%f4
1197	mov	%l6,%l2
1198	fmovs	%f14,%f6
1199	mov	%l7,%l3
1200	mov	%i1,%o0
1201	lda	[%o0]%asi,%l4
1202	add	%o0,%i2,%o1
1203	lda	[%o1]%asi,%l5
1204	add	%o1,%i2,%o2
1205	lda	[%o2]%asi,%l6
1206	add	%o2,%i2,%o3
1207	lda	[%o3]%asi,%l7
1208	add	%o3,%i2,%i1
1209	and	%l4,G5_CONST,%l4
1210	and	%l5,G5_CONST,%l5
1211	and	%l6,G5_CONST,%l6
1212	and	%l7,G5_CONST,%l7
1213
1214	subcc	%i0,4,%i0
1215	bpos,pt	%icc,.main_loop
1216	add	%i3,%i4,%i3
1217	ba	.after_main_loop
1218	nop
1219
1220	.align	16
1221.spec4:
1222	sethi	%hi(0x7f800000),%o5
1223	cmp	%l4,%o5
1224	bge,pn	%icc,1f
1225	nop
1226	fcmpes	%fcc0,%f8,%f3
1227	fcmpes	%fcc1,%f8,THRESHOLDL
1228	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
1229	fstod	%f8,%f48			! (4) y = (double) X
1230	fbl,a,pt	%fcc1,.spec4_cont	! if ( X < THRESHOLDL )
1231	fstod	%f8,%f48			! (4) y = (double) X
12321:
1233	fmuld	F62_K256ONLN2,%f42,%f42
1234
1235	fmuld	F62_K256ONLN2,%f44,%f44
1236
1237	fmuld	F62_K256ONLN2,%f46,%f46
1238
1239	fdtoi	%f40,%f16
1240	st	%f16,[%fp+tmp0]
1241
1242	fdtoi	%f42,%f2
1243	st	%f2,[%fp+tmp1]
1244
1245	fdtoi	%f44,%f4
1246	st	%f4,[%fp+tmp2]
1247
1248	fdtoi	%f46,%f6
1249	st	%f6,[%fp+tmp3]
1250
1251	fitod	%f16,%f34
1252	fpackfix	%f16,%f16
1253
1254	fitod	%f2,%f18
1255	fpackfix	%f2,%f2
1256
1257	fitod	%f4,%f20
1258	fpackfix	%f4,%f4
1259
1260	fitod	%f6,%f22
1261	fpackfix	%f6,%f6
1262
1263	fsubd	%f40,%f34,%f40
1264
1265	fsubd	%f42,%f18,%f42
1266
1267	fsubd	%f44,%f20,%f44
1268
1269	fsubd	%f46,%f22,%f46
1270
1271	fmuld	F60_KA2,%f40,%f34
1272
1273	fmuld	F60_KA2,%f42,%f18
1274
1275	fmuld	F60_KA2,%f44,%f20
1276
1277	fmuld	F60_KA2,%f46,%f22
1278
1279	faddd	F58_KA1,%f34,%f34
1280
1281	faddd	F58_KA1,%f18,%f18
1282
1283	faddd	F58_KA1,%f20,%f20
1284
1285	faddd	F58_KA1,%f22,%f22
1286
1287	ld	[%fp+tmp0],%o0
1288	fmuld	%f34,%f40,%f40
1289
1290	ld	[%fp+tmp1],%o1
1291	fmuld	%f18,%f42,%f42
1292
1293	ld	[%fp+tmp2],%o2
1294	fmuld	%f20,%f44,%f44
1295
1296	ld	[%fp+tmp3],%o3
1297	fmuld	%f22,%f46,%f46
1298
1299	and	%o0,255,%o0
1300	and	%o1,255,%o1
1301
1302	and	%o2,255,%o2
1303	and	%o3,255,%o3
1304
1305	sll	%o0,3,%o0
1306	sll	%o1,3,%o1
1307
1308	sll	%o2,3,%o2
1309	sll	%o3,3,%o3
1310
1311	ldd	[G1_CONST_TBL+%o0],%f34
1312
1313	ldd	[G1_CONST_TBL+%o1],%f18
1314
1315	ldd	[G1_CONST_TBL+%o2],%f20
1316
1317	ldd	[G1_CONST_TBL+%o3],%f22
1318
1319	fpadd32 %f16,%f34,%f34
1320
1321	fpadd32 %f2,%f18,%f18
1322
1323	fpadd32 %f4,%f20,%f20
1324
1325	fpadd32 %f6,%f22,%f22
1326
1327	fmuld	%f34,%f40,%f40
1328
1329	fmuld	%f18,%f42,%f42
1330
1331	fmuld	%f20,%f44,%f44
1332
1333	fmuld	%f22,%f46,%f46
1334
1335	faddd	%f34,%f40,%f40
1336
1337	faddd	%f18,%f42,%f42
1338
1339	faddd	%f20,%f44,%f44
1340
1341	faddd	%f22,%f46,%f46
1342
1343	fdtos	%f40,%f26
1344	st	%f26,[%i3]
1345	add	%i3,%i4,%o4
1346
1347	fdtos	%f42,%f18
1348	st	%f18,[%o4]
1349	add	%o4,%i4,%i3
1350
1351	fdtos	%f44,%f20
1352	st	%f20,[%i3]
1353	add	%i3,%i4,%o4
1354
1355	fdtos	%f46,%f22
1356	st	%f22,[%o4]
1357	add	%o4,%i4,%i3
1358
1359	cmp	%l4,%o5
1360	bl,pt	%icc,.spec4_out_of_range
1361	sll	%i2,2,%o4
1362
1363	ble,pn	%icc,.spec4_inf
1364	sub	%i1,%o4,%o4
1365
1366! NaN -> NaN
1367
1368	fmuls	%f8,%f8,%f8
1369	ba	.spec4_exit
1370	st	%f8,[%i3]
1371
1372.spec4_inf:
1373	ld	[%o4],%l0
1374	srl	%l0,29,%l0
1375	andcc	%l0,4,%l0
1376	be,a,pn	%icc,.spec4_exit
1377	st	%f8,[%i3]
1378
1379	ba	.spec4_exit
1380	st	%f3,[%i3]
1381
1382.spec4_out_of_range:
1383	sub	%i1,%o4,%o4
1384	ld	[%o4],%l0
1385	srl	%l0,29,%l0
1386	and	%l0,4,%l0
1387	add	%l0,2048,%l0
1388	ld	[G1_CONST_TBL+%l0],%f2
1389	fmuls	%f2,%f2,%f2
1390	st	%f2,[%i3]
1391
1392.spec4_exit:
1393	fmovs	%f10,%f16
1394	mov	%l5,%l0
1395	fmovs	%f12,%f2
1396	mov	%l6,%l1
1397	fmovs	%f14,%f4
1398	mov	%l7,%l2
1399	lda	[%i1]%asi,%l3
1400	lda	[%i1]%asi,%f6
1401	add	%i1,%i2,%o0
1402	lda	[%o0]%asi,%l4
1403	add	%o0,%i2,%o1
1404	lda	[%o1]%asi,%l5
1405	add	%o1,%i2,%o2
1406	lda	[%o2]%asi,%l6
1407	add	%o2,%i2,%o3
1408	lda	[%o3]%asi,%l7
1409	add	%o3,%i2,%i1
1410	and	%l3,G5_CONST,%l3
1411	and	%l4,G5_CONST,%l4
1412	and	%l5,G5_CONST,%l5
1413	and	%l6,G5_CONST,%l6
1414	and	%l7,G5_CONST,%l7
1415
1416	subcc	%i0,5,%i0
1417	bpos,pt	%icc,.main_loop
1418	add	%i3,%i4,%i3
1419	ba	.after_main_loop
1420	nop
1421
1422	.align 16
1423.spec5:
1424	sethi	%hi(0x7f800000),%o5
1425	cmp	%l5,%o5
1426	bge,pn	%icc,1f
1427	nop
1428	fcmpes	%fcc0,%f10,%f3
1429	fcmpes	%fcc1,%f10,THRESHOLDL
1430	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
1431	fstod	%f10,%f50			! (5) y = (double) X
1432	fbl,a,pt	%fcc1,.spec5_cont	! if ( X < THRESHOLDL )
1433	fstod	%f10,%f50			! (5) y = (double) X
14341:
1435	fmuld	F62_K256ONLN2,%f44,%f44
1436
1437	fmuld	F62_K256ONLN2,%f46,%f46
1438
1439	fdtoi	%f40,%f16
1440	st	%f16,[%fp+tmp0]
1441	fmuld	F62_K256ONLN2,%f48,%f48
1442
1443	fdtoi	%f42,%f2
1444	st	%f2,[%fp+tmp1]
1445
1446	fdtoi	%f44,%f4
1447	st	%f4,[%fp+tmp2]
1448
1449	fdtoi	%f46,%f6
1450	st	%f6,[%fp+tmp3]
1451
1452	fdtoi	%f48,%f8
1453	st	%f8,[%fp+tmp4]
1454
1455	fitod	%f16,%f34
1456	fpackfix	%f16,%f16
1457
1458	fitod	%f2,%f18
1459	fpackfix	%f2,%f2
1460
1461	fitod	%f4,%f20
1462	fpackfix	%f4,%f4
1463
1464	fitod	%f6,%f22
1465	fpackfix	%f6,%f6
1466
1467	fitod	%f8,%f24
1468	fpackfix	%f8,%f8
1469
1470	ld	[%fp+tmp0],%o0
1471	fsubd	%f40,%f34,%f40
1472
1473	ld	[%fp+tmp1],%o1
1474	fsubd	%f42,%f18,%f42
1475
1476	ld	[%fp+tmp2],%o2
1477	and	%o0,255,%o0
1478	fsubd	%f44,%f20,%f44
1479
1480	ld	[%fp+tmp3],%o3
1481	and	%o1,255,%o1
1482	fsubd	%f46,%f22,%f46
1483
1484	sll	%o0,3,%o0
1485	sll	%o1,3,%o1
1486	fmuld	F60_KA2,%f40,%f34
1487	fsubd	%f48,%f24,%f48
1488
1489	and	%o2,255,%o2
1490	fmuld	F60_KA2,%f42,%f18
1491
1492	sll	%o2,3,%o2
1493	fmuld	F60_KA2,%f44,%f20
1494
1495	ld	[%fp+tmp4],%o4
1496	and	%o3,255,%o3
1497	fmuld	F60_KA2,%f46,%f22
1498
1499	sll	%o3,3,%o3
1500	fmuld	F60_KA2,%f48,%f24
1501	faddd	F58_KA1,%f34,%f34
1502
1503	and	%o4,255,%o4
1504	faddd	F58_KA1,%f18,%f18
1505
1506	faddd	F58_KA1,%f20,%f20
1507
1508	faddd	F58_KA1,%f22,%f22
1509
1510	fmuld	%f34,%f40,%f40
1511	ldd	[G1_CONST_TBL+%o0],%f34
1512	faddd	F58_KA1,%f24,%f24
1513
1514	fmuld	%f18,%f42,%f42
1515	ldd	[G1_CONST_TBL+%o1],%f18
1516
1517	fmuld	%f20,%f44,%f44
1518	ldd	[G1_CONST_TBL+%o2],%f20
1519
1520	fmuld	%f22,%f46,%f46
1521	ldd	[G1_CONST_TBL+%o3],%f22
1522	sll	%o4,3,%o4
1523
1524	fmuld	%f24,%f48,%f48
1525	ldd	[G1_CONST_TBL+%o4],%f24
1526	fpadd32 %f16,%f34,%f34
1527
1528	fpadd32 %f2,%f18,%f18
1529
1530	fpadd32 %f4,%f20,%f20
1531
1532	fpadd32 %f6,%f22,%f22
1533
1534	fpadd32 %f8,%f24,%f24
1535	fmuld	%f34,%f40,%f40
1536
1537	fmuld	%f18,%f42,%f42
1538
1539	fmuld	%f20,%f44,%f44
1540
1541	fmuld	%f22,%f46,%f46
1542
1543	fmuld	%f24,%f48,%f48
1544	faddd	%f34,%f40,%f40
1545
1546	faddd	%f18,%f42,%f42
1547
1548	faddd	%f20,%f44,%f44
1549
1550	faddd	%f22,%f46,%f46
1551
1552	faddd	%f24,%f48,%f48
1553
1554	fdtos	%f40,%f26
1555	st	%f26,[%i3]
1556	add	%i3,%i4,%o4
1557
1558	fdtos	%f42,%f18
1559	st	%f18,[%o4]
1560	add	%o4,%i4,%i3
1561
1562	fdtos	%f44,%f20
1563	st	%f20,[%i3]
1564	add	%i3,%i4,%o4
1565
1566	fdtos	%f46,%f22
1567	st	%f22,[%o4]
1568	add	%o4,%i4,%i3
1569
1570	fdtos	%f48,%f24
1571	st	%f24,[%i3]
1572	add	%i3,%i4,%i3
1573
1574	cmp	%l5,%o5
1575	bl,pt	%icc,.spec5_out_of_range
1576	sll	%i2,2,%o4
1577
1578	ble,pn	%icc,.spec5_inf
1579	sub	%o4,%i2,%o4
1580
1581! NaN -> NaN
1582
1583	fmuls	%f10,%f10,%f10
1584	ba	.spec5_exit
1585	st	%f10,[%i3]
1586
1587.spec5_inf:
1588	sub	%i1,%o4,%o4
1589	ld	[%o4],%l0
1590	srl	%l0,29,%l0
1591	andcc	%l0,4,%l0
1592	be,a,pn	%icc,.spec5_exit
1593	st	%f10,[%i3]
1594
1595	ba	.spec5_exit
1596	st	%f3,[%i3]
1597
1598.spec5_out_of_range:
1599	sub	%o4,%i2,%o4
1600	sub	%i1,%o4,%o4
1601	ld	[%o4],%l0
1602	srl	%l0,29,%l0
1603	and	%l0,4,%l0
1604	add	%l0,2048,%l0
1605	ld	[G1_CONST_TBL+%l0],%f2
1606	fmuls	%f2,%f2,%f2
1607	st	%f2,[%i3]
1608
1609.spec5_exit:
1610	fmovs	%f12,%f16
1611	mov	%l6,%l0
1612	fmovs	%f14,%f2
1613	mov	%l7,%l1
1614	lda	[%i1]%asi,%l2
1615	lda	[%i1]%asi,%f4
1616	add	%i1,%i2,%i1
1617	lda	[%i1]%asi,%l3
1618	lda	[%i1]%asi,%f6
1619	add	%i1,%i2,%o0
1620	lda	[%o0]%asi,%l4
1621	add	%o0,%i2,%o1
1622	lda	[%o1]%asi,%l5
1623	add	%o1,%i2,%o2
1624	lda	[%o2]%asi,%l6
1625	add	%o2,%i2,%o3
1626	lda	[%o3]%asi,%l7
1627	add	%o3,%i2,%i1
1628	and	%l2,G5_CONST,%l2
1629	and	%l3,G5_CONST,%l3
1630	and	%l4,G5_CONST,%l4
1631	and	%l5,G5_CONST,%l5
1632	and	%l6,G5_CONST,%l6
1633	and	%l7,G5_CONST,%l7
1634
1635	subcc	%i0,6,%i0
1636	bpos,pt	%icc,.main_loop
1637	add	%i3,%i4,%i3
1638	ba	.after_main_loop
1639	nop
1640.spec6:
1641	sethi	%hi(0x7f800000),%o5
1642	cmp	%l6,%o5
1643	bge,pn	%icc,1f
1644	nop
1645	fcmpes	%fcc0,%f12,%f3
1646	fcmpes	%fcc1,%f12,THRESHOLDL
1647	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
1648	fstod	%f12,%f52			! (6) y = (double) X
1649	fbl,a,pt	%fcc1,.spec6_cont	! if ( X < THRESHOLDL )
1650	fstod	%f12,%f52			! (6) y = (double) X
16511:
1652	fmuld	F62_K256ONLN2,%f46,%f46
1653
1654	fdtoi	%f40,%f16
1655	st	%f16,[%fp+tmp0]
1656	fmuld	F62_K256ONLN2,%f48,%f48
1657
1658	fdtoi	%f42,%f2
1659	st	%f2,[%fp+tmp1]
1660	fmuld	F62_K256ONLN2,%f50,%f50
1661
1662	fdtoi	%f44,%f4
1663	st	%f4,[%fp+tmp2]
1664
1665	fdtoi	%f46,%f6
1666	st	%f6,[%fp+tmp3]
1667
1668	fdtoi	%f48,%f8
1669	st	%f8,[%fp+tmp4]
1670
1671	fdtoi	%f50,%f10
1672	st	%f10,[%fp+tmp5]
1673
1674	fitod	%f16,%f34
1675	fpackfix	%f16,%f16
1676
1677	fitod	%f2,%f18
1678	fpackfix	%f2,%f2
1679
1680	fitod	%f4,%f20
1681	fpackfix	%f4,%f4
1682
1683	fitod	%f6,%f22
1684	fpackfix	%f6,%f6
1685
1686	fitod	%f8,%f24
1687	fpackfix	%f8,%f8
1688
1689	fitod	%f10,%f26
1690	fpackfix	%f10,%f10
1691
1692	ld	[%fp+tmp0],%o0
1693	fsubd	%f40,%f34,%f40
1694
1695	ld	[%fp+tmp1],%o1
1696	fsubd	%f42,%f18,%f42
1697
1698	ld	[%fp+tmp2],%o2
1699	and	%o0,255,%o0
1700	fsubd	%f44,%f20,%f44
1701
1702	ld	[%fp+tmp3],%o3
1703	and	%o1,255,%o1
1704	fsubd	%f46,%f22,%f46
1705
1706	sll	%o0,3,%o0
1707	sll	%o1,3,%o1
1708	fmuld	F60_KA2,%f40,%f34
1709	fsubd	%f48,%f24,%f48
1710
1711	and	%o2,255,%o2
1712	fmuld	F60_KA2,%f42,%f18
1713	fsubd	%f50,%f26,%f50
1714
1715	sll	%o2,3,%o2
1716	fmuld	F60_KA2,%f44,%f20
1717
1718	ld	[%fp+tmp4],%o4
1719	and	%o3,255,%o3
1720	fmuld	F60_KA2,%f46,%f22
1721
1722	ld	[%fp+tmp5],%o5
1723	sll	%o3,3,%o3
1724	fmuld	F60_KA2,%f48,%f24
1725	faddd	F58_KA1,%f34,%f34
1726
1727	and	%o4,255,%o4
1728	fmuld	F60_KA2,%f50,%f26
1729	faddd	F58_KA1,%f18,%f18
1730
1731	and	%o5,255,%o5
1732	faddd	F58_KA1,%f20,%f20
1733
1734	sll	%o5,3,%o5
1735	faddd	F58_KA1,%f22,%f22
1736
1737	fmuld	%f34,%f40,%f40
1738	ldd	[G1_CONST_TBL+%o0],%f34
1739	faddd	F58_KA1,%f24,%f24
1740
1741	fmuld	%f18,%f42,%f42
1742	ldd	[G1_CONST_TBL+%o1],%f18
1743	faddd	F58_KA1,%f26,%f26
1744
1745	fmuld	%f20,%f44,%f44
1746	ldd	[G1_CONST_TBL+%o2],%f20
1747
1748	fmuld	%f22,%f46,%f46
1749	ldd	[G1_CONST_TBL+%o3],%f22
1750	sll	%o4,3,%o4
1751
1752	fmuld	%f24,%f48,%f48
1753	ldd	[G1_CONST_TBL+%o4],%f24
1754	fpadd32 %f16,%f34,%f34
1755
1756	fmuld	%f26,%f50,%f50
1757	ldd	[G1_CONST_TBL+%o5],%f26
1758	fpadd32 %f2,%f18,%f18
1759
1760	fpadd32 %f4,%f20,%f20
1761
1762	fpadd32 %f6,%f22,%f22
1763
1764	fpadd32 %f8,%f24,%f24
1765	fmuld	%f34,%f40,%f40
1766
1767	fpadd32 %f10,%f26,%f26
1768	fmuld	%f18,%f42,%f42
1769
1770	fmuld	%f20,%f44,%f44
1771
1772	fmuld	%f22,%f46,%f46
1773
1774	fmuld	%f24,%f48,%f48
1775	faddd	%f34,%f40,%f40
1776
1777	fmuld	%f26,%f50,%f50
1778	faddd	%f18,%f42,%f42
1779
1780	faddd	%f20,%f44,%f44
1781
1782	faddd	%f22,%f46,%f46
1783
1784	faddd	%f24,%f48,%f48
1785
1786	faddd	%f26,%f50,%f50
1787
1788	fdtos	%f40,%f26
1789	st	%f26,[%i3]
1790	add	%i3,%i4,%o4
1791
1792	fdtos	%f42,%f18
1793	st	%f18,[%o4]
1794	add	%o4,%i4,%i3
1795
1796	fdtos	%f44,%f20
1797	st	%f20,[%i3]
1798	add	%i3,%i4,%o4
1799
1800	fdtos	%f46,%f22
1801	st	%f22,[%o4]
1802	add	%o4,%i4,%i3
1803
1804	fdtos	%f48,%f24
1805	st	%f24,[%i3]
1806	add	%i3,%i4,%o4
1807
1808	fdtos	%f50,%f26
1809	st	%f26,[%o4]
1810	add	%o4,%i4,%i3
1811
1812	sethi	%hi(0x7f800000),%o5
1813	cmp	%l6,%o5
1814	bl,pt	%icc,.spec6_out_of_range
1815	sll	%i2,1,%o4
1816
1817	ble,pn	%icc,.spec6_inf
1818	sub	%i1,%o4,%o4
1819
1820! NaN -> NaN
1821
1822	fmuls	%f12,%f12,%f12
1823	ba	.spec6_exit
1824	st	%f12,[%i3]
1825
1826.spec6_inf:
1827	ld	[%o4],%l0
1828	srl	%l0,29,%l0
1829	andcc	%l0,4,%l0
1830	be,a,pn	%icc,.spec6_exit
1831	st	%f12,[%i3]
1832
1833	ba	.spec6_exit
1834	st	%f3,[%i3]
1835
1836.spec6_out_of_range:
1837	sub	%i1,%o4,%o4
1838	ld	[%o4],%l0
1839	srl	%l0,29,%l0
1840	and	%l0,4,%l0
1841	add	%l0,2048,%l0
1842	ld	[G1_CONST_TBL+%l0],%f2
1843	fmuls	%f2,%f2,%f2
1844	st	%f2,[%i3]
1845
1846.spec6_exit:
1847	fmovs	%f14,%f16
1848	mov	%l7,%l0
1849	lda	[%i1]%asi,%l1
1850	lda	[%i1]%asi,%f2
1851	add	%i1,%i2,%i1
1852	lda	[%i1]%asi,%l2
1853	lda	[%i1]%asi,%f4
1854	add	%i1,%i2,%i1
1855	lda	[%i1]%asi,%l3
1856	lda	[%i1]%asi,%f6
1857	add	%i1,%i2,%o0
1858	lda	[%o0]%asi,%l4
1859	add	%o0,%i2,%o1
1860	lda	[%o1]%asi,%l5
1861	add	%o1,%i2,%o2
1862	lda	[%o2]%asi,%l6
1863	add	%o2,%i2,%o3
1864	lda	[%o3]%asi,%l7
1865	add	%o3,%i2,%i1
1866	and	%l1,G5_CONST,%l1
1867	and	%l2,G5_CONST,%l2
1868	and	%l3,G5_CONST,%l3
1869	and	%l4,G5_CONST,%l4
1870	and	%l5,G5_CONST,%l5
1871	and	%l6,G5_CONST,%l6
1872	and	%l7,G5_CONST,%l7
1873
1874	subcc	%i0,7,%i0
1875	bpos,pt	%icc,.main_loop
1876	add	%i3,%i4,%i3
1877	ba	.after_main_loop
1878	nop
1879
1880	.align	16
1881.spec7:
1882	sethi	%hi(0x7f800000),%o5
1883	cmp	%l7,%o5
1884	bge,pn	%icc,1f
1885	nop
1886	fcmpes	%fcc0,%f14,%f3
1887	fcmpes	%fcc1,%f14,THRESHOLDL
1888	fbl,a,pn	%fcc0,1f		! if ( X < 0.0f )
1889	fstod	%f14,%f54			! (7) y = (double) X
1890	fbl,a,pt	%fcc1,.spec7_cont	! if ( X < THRESHOLDL )
1891	fstod	%f14,%f54			! (7) y = (double) X
18921:
1893	fdtoi	%f40,%f16
1894	st	%f16,[%fp+tmp0]
1895	fmuld	F62_K256ONLN2,%f48,%f48
1896
1897	fdtoi	%f42,%f2
1898	st	%f2,[%fp+tmp1]
1899	fmuld	F62_K256ONLN2,%f50,%f50
1900
1901	fdtoi	%f44,%f4
1902	st	%f4,[%fp+tmp2]
1903	fmuld	F62_K256ONLN2,%f52,%f52
1904
1905	fdtoi	%f46,%f6
1906	st	%f6,[%fp+tmp3]
1907
1908	fdtoi	%f48,%f8
1909	st	%f8,[%fp+tmp4]
1910
1911	fdtoi	%f50,%f10
1912	st	%f10,[%fp+tmp5]
1913
1914	fdtoi	%f52,%f12
1915	st	%f12,[%fp+tmp6]
1916
1917	fitod	%f16,%f34
1918	fpackfix	%f16,%f16
1919
1920	fitod	%f2,%f18
1921	fpackfix	%f2,%f2
1922
1923	fitod	%f4,%f20
1924	fpackfix	%f4,%f4
1925
1926	fitod	%f6,%f22
1927	fpackfix	%f6,%f6
1928
1929	fitod	%f8,%f24
1930	fpackfix	%f8,%f8
1931
1932	fitod	%f10,%f26
1933	fpackfix	%f10,%f10
1934
1935	fitod	%f12,%f28
1936	fpackfix	%f12,%f12
1937
1938	ld	[%fp+tmp0],%o0
1939	fsubd	%f40,%f34,%f40
1940
1941	ld	[%fp+tmp1],%o1
1942	fsubd	%f42,%f18,%f42
1943
1944	ld	[%fp+tmp2],%o2
1945	and	%o0,255,%o0
1946	fsubd	%f44,%f20,%f44
1947
1948	ld	[%fp+tmp3],%o3
1949	and	%o1,255,%o1
1950	fsubd	%f46,%f22,%f46
1951
1952	sll	%o0,3,%o0
1953	sll	%o1,3,%o1
1954	fmuld	F60_KA2,%f40,%f34
1955	fsubd	%f48,%f24,%f48
1956
1957	and	%o2,255,%o2
1958	fmuld	F60_KA2,%f42,%f18
1959	fsubd	%f50,%f26,%f50
1960
1961	sll	%o2,3,%o2
1962	fmuld	F60_KA2,%f44,%f20
1963	fsubd	%f52,%f28,%f52
1964
1965	ld	[%fp+tmp4],%o4
1966	and	%o3,255,%o3
1967	fmuld	F60_KA2,%f46,%f22
1968
1969	ld	[%fp+tmp5],%o5
1970	sll	%o3,3,%o3
1971	fmuld	F60_KA2,%f48,%f24
1972	faddd	F58_KA1,%f34,%f34
1973
1974	ld	[%fp+tmp6],%o7
1975	and	%o4,255,%o4
1976	fmuld	F60_KA2,%f50,%f26
1977	faddd	F58_KA1,%f18,%f18
1978
1979	and	%o5,255,%o5
1980	fmuld	F60_KA2,%f52,%f28
1981	faddd	F58_KA1,%f20,%f20
1982
1983	sll	%o5,3,%o5
1984	faddd	F58_KA1,%f22,%f22
1985
1986	fmuld	%f34,%f40,%f40
1987	ldd	[G1_CONST_TBL+%o0],%f34
1988	faddd	F58_KA1,%f24,%f24
1989
1990	fmuld	%f18,%f42,%f42
1991	ldd	[G1_CONST_TBL+%o1],%f18
1992	faddd	F58_KA1,%f26,%f26
1993
1994	fmuld	%f20,%f44,%f44
1995	ldd	[G1_CONST_TBL+%o2],%f20
1996	faddd	F58_KA1,%f28,%f28
1997
1998	fmuld	%f22,%f46,%f46
1999	ldd	[G1_CONST_TBL+%o3],%f22
2000	sll	%o4,3,%o4
2001
2002	fmuld	%f24,%f48,%f48
2003	ldd	[G1_CONST_TBL+%o4],%f24
2004	and	%o7,255,%o7
2005	fpadd32 %f16,%f34,%f34
2006
2007	fmuld	%f26,%f50,%f50
2008	ldd	[G1_CONST_TBL+%o5],%f26
2009	sll	%o7,3,%o7
2010	fpadd32 %f2,%f18,%f18
2011
2012	fmuld	%f28,%f52,%f52
2013	ldd	[G1_CONST_TBL+%o7],%f28
2014	fpadd32 %f4,%f20,%f20
2015
2016	fpadd32 %f6,%f22,%f22
2017
2018	fpadd32 %f8,%f24,%f24
2019	fmuld	%f34,%f40,%f40
2020
2021	fpadd32 %f10,%f26,%f26
2022	fmuld	%f18,%f42,%f42
2023
2024	fpadd32 %f12,%f28,%f28
2025	fmuld	%f20,%f44,%f44
2026
2027	fmuld	%f22,%f46,%f46
2028
2029	fmuld	%f24,%f48,%f48
2030	faddd	%f34,%f40,%f40
2031
2032	fmuld	%f26,%f50,%f50
2033	faddd	%f18,%f42,%f42
2034
2035	fmuld	%f28,%f52,%f52
2036	faddd	%f20,%f44,%f44
2037
2038	faddd	%f22,%f46,%f46
2039
2040	faddd	%f24,%f48,%f48
2041
2042	faddd	%f26,%f50,%f50
2043
2044	faddd	%f28,%f52,%f52
2045
2046	fdtos	%f40,%f26
2047	st	%f26,[%i3]
2048	add	%i3,%i4,%o4
2049
2050	fdtos	%f42,%f18
2051	st	%f18,[%o4]
2052	add	%o4,%i4,%i3
2053
2054	fdtos	%f44,%f20
2055	st	%f20,[%i3]
2056	add	%i3,%i4,%o4
2057
2058	fdtos	%f46,%f22
2059	st	%f22,[%o4]
2060	add	%o4,%i4,%i3
2061
2062	fdtos	%f48,%f24
2063	st	%f24,[%i3]
2064	add	%i3,%i4,%o4
2065
2066	fdtos	%f50,%f26
2067	st	%f26,[%o4]
2068	add	%o4,%i4,%i3
2069
2070	fdtos	%f52,%f28
2071	st	%f28,[%i3]
2072	add	%i3,%i4,%i3
2073
2074	sethi	%hi(0x7f800000),%o5
2075	cmp	%l7,%o5
2076	bl,pt	%icc,.spec7_out_of_range
2077	sub	%i1,%i2,%o4
2078
2079	ble,pn	%icc,.spec7_inf
2080	ld	[%o4],%l0
2081
2082! NaN -> NaN
2083
2084	fmuls	%f14,%f14,%f14
2085	ba	.spec7_exit
2086	st	%f14,[%i3]
2087
2088.spec7_inf:
2089	srl	%l0,29,%l0
2090	andcc	%l0,4,%l0
2091	be,a,pn	%icc,.spec7_exit
2092	st	%f14,[%i3]
2093
2094	ba	.spec7_exit
2095	st	%f3,[%i3]
2096
2097.spec7_out_of_range:
2098	ld	[%o4],%l0
2099	srl	%l0,29,%l0
2100	and	%l0,4,%l0
2101	add	%l0,2048,%l0
2102	ld	[G1_CONST_TBL+%l0],%f2
2103	fmuls	%f2,%f2,%f2
2104	st	%f2,[%i3]
2105
2106.spec7_exit:
2107	subcc	%i0,8,%i0
2108	bpos,pt	%icc,.main_loop_preload
2109	add	%i3,%i4,%i3
2110
2111	ba	.tail
2112	nop
2113	SET_SIZE(__vexpf)
2114
2115