xref: /titanic_44/usr/src/lib/libmvec/common/vis/__vexp.S (revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vexp.S"
30
31#include "libm.h"
32
33	RO_DATA
34
35/********************************************************************
36 * vexp() algorithm is from mopt:f_exp.c.  Basics are included here
37 * to supplement comments within this file.  vexp() has been unrolled
38 * to a depth of 3.  Only element 0 is documented.
39 *
40 * Note 1: INVLN2_256, LN2_256H, and LN2_256L were originally scaled by
41 *	2^44 to allow *2^k w/o shifting within the FP registers.  These
42 *	had to be removed for CHEETAH to avoid the fdtox of a very large
43 *	number, which would trap to kernel (2^52).
44 *
45 * Let 	x = (k + j/256)ln2 + r
46 * then	exp(x) = exp(ln2^(k+j/256)) * exp(r)
47 *	       = 2^k * 2^(j/256) * exp(r)
48 * where r is polynomial approximation
49 *	exp(r) = 1 + r + r^2*B1 + r^3*B2 + r^4*B3
50 *	       = 1 + r*(1+r*(B1+r*(B2+r*B3)))
51 *	let
52 *	p = r*(1+r*(B1+r*(B2+r*B3)))	! notice, not quite exp(r)
53 *	q = 2^(j/256) (high 64 bits)
54 *	t = 2^(j/256) (extra precision)	! both from _TBL_exp_z[]
55 *	then
56 *	2^(j/256) * exp(r) = (q+t)(1+p) ~ q + ( t + q*p )
57 *	then actual computation is 2^k * ( q + ( t + q*p ) )
58 *
59 ********************************************************************/
60
61	.align	16
62TBL:
63	.word	0x3ff00000,0x00000000
64	.word	0x00000000,0x00000000
65	.word	0x3ff00b1a,0xfa5abcbf
66	.word	0xbc84f6b2,0xa7609f71
67	.word	0x3ff0163d,0xa9fb3335
68	.word	0x3c9b6129,0x9ab8cdb7
69	.word	0x3ff02168,0x143b0281
70	.word	0xbc82bf31,0x0fc54eb6
71	.word	0x3ff02c9a,0x3e778061
72	.word	0xbc719083,0x535b085d
73	.word	0x3ff037d4,0x2e11bbcc
74	.word	0x3c656811,0xeeade11a
75	.word	0x3ff04315,0xe86e7f85
76	.word	0xbc90a31c,0x1977c96e
77	.word	0x3ff04e5f,0x72f654b1
78	.word	0x3c84c379,0x3aa0d08c
79	.word	0x3ff059b0,0xd3158574
80	.word	0x3c8d73e2,0xa475b465
81	.word	0x3ff0650a,0x0e3c1f89
82	.word	0xbc95cb7b,0x5799c396
83	.word	0x3ff0706b,0x29ddf6de
84	.word	0xbc8c91df,0xe2b13c26
85	.word	0x3ff07bd4,0x2b72a836
86	.word	0x3c832334,0x54458700
87	.word	0x3ff08745,0x18759bc8
88	.word	0x3c6186be,0x4bb284ff
89	.word	0x3ff092bd,0xf66607e0
90	.word	0xbc968063,0x800a3fd1
91	.word	0x3ff09e3e,0xcac6f383
92	.word	0x3c914878,0x18316136
93	.word	0x3ff0a9c7,0x9b1f3919
94	.word	0x3c85d16c,0x873d1d38
95	.word	0x3ff0b558,0x6cf9890f
96	.word	0x3c98a62e,0x4adc610a
97	.word	0x3ff0c0f1,0x45e46c85
98	.word	0x3c94f989,0x06d21cef
99	.word	0x3ff0cc92,0x2b7247f7
100	.word	0x3c901edc,0x16e24f71
101	.word	0x3ff0d83b,0x23395dec
102	.word	0xbc9bc14d,0xe43f316a
103	.word	0x3ff0e3ec,0x32d3d1a2
104	.word	0x3c403a17,0x27c57b53
105	.word	0x3ff0efa5,0x5fdfa9c5
106	.word	0xbc949db9,0xbc54021b
107	.word	0x3ff0fb66,0xaffed31b
108	.word	0xbc6b9bed,0xc44ebd7b
109	.word	0x3ff10730,0x28d7233e
110	.word	0x3c8d46eb,0x1692fdd5
111	.word	0x3ff11301,0xd0125b51
112	.word	0xbc96c510,0x39449b3a
113	.word	0x3ff11edb,0xab5e2ab6
114	.word	0xbc9ca454,0xf703fb72
115	.word	0x3ff12abd,0xc06c31cc
116	.word	0xbc51b514,0xb36ca5c7
117	.word	0x3ff136a8,0x14f204ab
118	.word	0xbc67108f,0xba48dcf0
119	.word	0x3ff1429a,0xaea92de0
120	.word	0xbc932fbf,0x9af1369e
121	.word	0x3ff14e95,0x934f312e
122	.word	0xbc8b91e8,0x39bf44ab
123	.word	0x3ff15a98,0xc8a58e51
124	.word	0x3c82406a,0xb9eeab0a
125	.word	0x3ff166a4,0x5471c3c2
126	.word	0x3c58f23b,0x82ea1a32
127	.word	0x3ff172b8,0x3c7d517b
128	.word	0xbc819041,0xb9d78a76
129	.word	0x3ff17ed4,0x8695bbc0
130	.word	0x3c709e3f,0xe2ac5a64
131	.word	0x3ff18af9,0x388c8dea
132	.word	0xbc911023,0xd1970f6c
133	.word	0x3ff19726,0x58375d2f
134	.word	0x3c94aadd,0x85f17e08
135	.word	0x3ff1a35b,0xeb6fcb75
136	.word	0x3c8e5b4c,0x7b4968e4
137	.word	0x3ff1af99,0xf8138a1c
138	.word	0x3c97bf85,0xa4b69280
139	.word	0x3ff1bbe0,0x84045cd4
140	.word	0xbc995386,0x352ef607
141	.word	0x3ff1c82f,0x95281c6b
142	.word	0x3c900977,0x8010f8c9
143	.word	0x3ff1d487,0x3168b9aa
144	.word	0x3c9e016e,0x00a2643c
145	.word	0x3ff1e0e7,0x5eb44027
146	.word	0xbc96fdd8,0x088cb6de
147	.word	0x3ff1ed50,0x22fcd91d
148	.word	0xbc91df98,0x027bb78c
149	.word	0x3ff1f9c1,0x8438ce4d
150	.word	0xbc9bf524,0xa097af5c
151	.word	0x3ff2063b,0x88628cd6
152	.word	0x3c8dc775,0x814a8494
153	.word	0x3ff212be,0x3578a819
154	.word	0x3c93592d,0x2cfcaac9
155	.word	0x3ff21f49,0x917ddc96
156	.word	0x3c82a97e,0x9494a5ee
157	.word	0x3ff22bdd,0xa27912d1
158	.word	0x3c8d34fb,0x5577d69e
159	.word	0x3ff2387a,0x6e756238
160	.word	0x3c99b07e,0xb6c70573
161	.word	0x3ff2451f,0xfb82140a
162	.word	0x3c8acfcc,0x911ca996
163	.word	0x3ff251ce,0x4fb2a63f
164	.word	0x3c8ac155,0xbef4f4a4
165	.word	0x3ff25e85,0x711ece75
166	.word	0x3c93e1a2,0x4ac31b2c
167	.word	0x3ff26b45,0x65e27cdd
168	.word	0x3c82bd33,0x9940e9d9
169	.word	0x3ff2780e,0x341ddf29
170	.word	0x3c9e067c,0x05f9e76c
171	.word	0x3ff284df,0xe1f56381
172	.word	0xbc9a4c3a,0x8c3f0d7e
173	.word	0x3ff291ba,0x7591bb70
174	.word	0xbc82cc72,0x28401cbc
175	.word	0x3ff29e9d,0xf51fdee1
176	.word	0x3c8612e8,0xafad1255
177	.word	0x3ff2ab8a,0x66d10f13
178	.word	0xbc995743,0x191690a7
179	.word	0x3ff2b87f,0xd0dad990
180	.word	0xbc410adc,0xd6381aa4
181	.word	0x3ff2c57e,0x39771b2f
182	.word	0xbc950145,0xa6eb5124
183	.word	0x3ff2d285,0xa6e4030b
184	.word	0x3c900247,0x54db41d5
185	.word	0x3ff2df96,0x1f641589
186	.word	0x3c9d16cf,0xfbbce198
187	.word	0x3ff2ecaf,0xa93e2f56
188	.word	0x3c71ca0f,0x45d52383
189	.word	0x3ff2f9d2,0x4abd886b
190	.word	0xbc653c55,0x532bda93
191	.word	0x3ff306fe,0x0a31b715
192	.word	0x3c86f46a,0xd23182e4
193	.word	0x3ff31432,0xedeeb2fd
194	.word	0x3c8959a3,0xf3f3fcd0
195	.word	0x3ff32170,0xfc4cd831
196	.word	0x3c8a9ce7,0x8e18047c
197	.word	0x3ff32eb8,0x3ba8ea32
198	.word	0xbc9c45e8,0x3cb4f318
199	.word	0x3ff33c08,0xb26416ff
200	.word	0x3c932721,0x843659a6
201	.word	0x3ff34962,0x66e3fa2d
202	.word	0xbc835a75,0x930881a4
203	.word	0x3ff356c5,0x5f929ff1
204	.word	0xbc8b5cee,0x5c4e4628
205	.word	0x3ff36431,0xa2de883b
206	.word	0xbc8c3144,0xa06cb85e
207	.word	0x3ff371a7,0x373aa9cb
208	.word	0xbc963aea,0xbf42eae2
209	.word	0x3ff37f26,0x231e754a
210	.word	0xbc99f5ca,0x9eceb23c
211	.word	0x3ff38cae,0x6d05d866
212	.word	0xbc9e958d,0x3c9904bd
213	.word	0x3ff39a40,0x1b7140ef
214	.word	0xbc99a9a5,0xfc8e2934
215	.word	0x3ff3a7db,0x34e59ff7
216	.word	0xbc75e436,0xd661f5e3
217	.word	0x3ff3b57f,0xbfec6cf4
218	.word	0x3c954c66,0xe26fff18
219	.word	0x3ff3c32d,0xc313a8e5
220	.word	0xbc9efff8,0x375d29c3
221	.word	0x3ff3d0e5,0x44ede173
222	.word	0x3c7fe8d0,0x8c284c71
223	.word	0x3ff3dea6,0x4c123422
224	.word	0x3c8ada09,0x11f09ebc
225	.word	0x3ff3ec70,0xdf1c5175
226	.word	0xbc8af663,0x7b8c9bca
227	.word	0x3ff3fa45,0x04ac801c
228	.word	0xbc97d023,0xf956f9f3
229	.word	0x3ff40822,0xc367a024
230	.word	0x3c8bddf8,0xb6f4d048
231	.word	0x3ff4160a,0x21f72e2a
232	.word	0xbc5ef369,0x1c309278
233	.word	0x3ff423fb,0x2709468a
234	.word	0xbc98462d,0xc0b314dd
235	.word	0x3ff431f5,0xd950a897
236	.word	0xbc81c7dd,0xe35f7998
237	.word	0x3ff43ffa,0x3f84b9d4
238	.word	0x3c8880be,0x9704c002
239	.word	0x3ff44e08,0x6061892d
240	.word	0x3c489b7a,0x04ef80d0
241	.word	0x3ff45c20,0x42a7d232
242	.word	0xbc686419,0x82fb1f8e
243	.word	0x3ff46a41,0xed1d0057
244	.word	0x3c9c944b,0xd1648a76
245	.word	0x3ff4786d,0x668b3237
246	.word	0xbc9c20f0,0xed445733
247	.word	0x3ff486a2,0xb5c13cd0
248	.word	0x3c73c1a3,0xb69062f0
249	.word	0x3ff494e1,0xe192aed2
250	.word	0xbc83b289,0x5e499ea0
251	.word	0x3ff4a32a,0xf0d7d3de
252	.word	0x3c99cb62,0xf3d1be56
253	.word	0x3ff4b17d,0xea6db7d7
254	.word	0xbc8125b8,0x7f2897f0
255	.word	0x3ff4bfda,0xd5362a27
256	.word	0x3c7d4397,0xafec42e2
257	.word	0x3ff4ce41,0xb817c114
258	.word	0x3c905e29,0x690abd5d
259	.word	0x3ff4dcb2,0x99fddd0d
260	.word	0x3c98ecdb,0xbc6a7833
261	.word	0x3ff4eb2d,0x81d8abff
262	.word	0xbc95257d,0x2e5d7a52
263	.word	0x3ff4f9b2,0x769d2ca7
264	.word	0xbc94b309,0xd25957e3
265	.word	0x3ff50841,0x7f4531ee
266	.word	0x3c7a249b,0x49b7465f
267	.word	0x3ff516da,0xa2cf6642
268	.word	0xbc8f7685,0x69bd93ee
269	.word	0x3ff5257d,0xe83f4eef
270	.word	0xbc7c998d,0x43efef71
271	.word	0x3ff5342b,0x569d4f82
272	.word	0xbc807abe,0x1db13cac
273	.word	0x3ff542e2,0xf4f6ad27
274	.word	0x3c87926d,0x192d5f7e
275	.word	0x3ff551a4,0xca5d920f
276	.word	0xbc8d689c,0xefede59a
277	.word	0x3ff56070,0xdde910d2
278	.word	0xbc90fb6e,0x168eebf0
279	.word	0x3ff56f47,0x36b527da
280	.word	0x3c99bb2c,0x011d93ad
281	.word	0x3ff57e27,0xdbe2c4cf
282	.word	0xbc90b98c,0x8a57b9c4
283	.word	0x3ff58d12,0xd497c7fd
284	.word	0x3c8295e1,0x5b9a1de8
285	.word	0x3ff59c08,0x27ff07cc
286	.word	0xbc97e2ce,0xe467e60f
287	.word	0x3ff5ab07,0xdd485429
288	.word	0x3c96324c,0x054647ad
289	.word	0x3ff5ba11,0xfba87a03
290	.word	0xbc9b77a1,0x4c233e1a
291	.word	0x3ff5c926,0x8a5946b7
292	.word	0x3c3c4b1b,0x816986a2
293	.word	0x3ff5d845,0x90998b93
294	.word	0xbc9cd6a7,0xa8b45642
295	.word	0x3ff5e76f,0x15ad2148
296	.word	0x3c9ba6f9,0x3080e65e
297	.word	0x3ff5f6a3,0x20dceb71
298	.word	0xbc89eadd,0xe3cdcf92
299	.word	0x3ff605e1,0xb976dc09
300	.word	0xbc93e242,0x9b56de47
301	.word	0x3ff6152a,0xe6cdf6f4
302	.word	0x3c9e4b3e,0x4ab84c27
303	.word	0x3ff6247e,0xb03a5585
304	.word	0xbc9383c1,0x7e40b497
305	.word	0x3ff633dd,0x1d1929fd
306	.word	0x3c984710,0xbeb964e5
307	.word	0x3ff64346,0x34ccc320
308	.word	0xbc8c483c,0x759d8932
309	.word	0x3ff652b9,0xfebc8fb7
310	.word	0xbc9ae3d5,0xc9a73e08
311	.word	0x3ff66238,0x82552225
312	.word	0xbc9bb609,0x87591c34
313	.word	0x3ff671c1,0xc70833f6
314	.word	0xbc8e8732,0x586c6134
315	.word	0x3ff68155,0xd44ca973
316	.word	0x3c6038ae,0x44f73e65
317	.word	0x3ff690f4,0xb19e9538
318	.word	0x3c8804bd,0x9aeb445c
319	.word	0x3ff6a09e,0x667f3bcd
320	.word	0xbc9bdd34,0x13b26456
321	.word	0x3ff6b052,0xfa75173e
322	.word	0x3c7a38f5,0x2c9a9d0e
323	.word	0x3ff6c012,0x750bdabf
324	.word	0xbc728956,0x67ff0b0d
325	.word	0x3ff6cfdc,0xddd47645
326	.word	0x3c9c7aa9,0xb6f17309
327	.word	0x3ff6dfb2,0x3c651a2f
328	.word	0xbc6bbe3a,0x683c88ab
329	.word	0x3ff6ef92,0x98593ae5
330	.word	0xbc90b974,0x9e1ac8b2
331	.word	0x3ff6ff7d,0xf9519484
332	.word	0xbc883c0f,0x25860ef6
333	.word	0x3ff70f74,0x66f42e87
334	.word	0x3c59d644,0xd45aa65f
335	.word	0x3ff71f75,0xe8ec5f74
336	.word	0xbc816e47,0x86887a99
337	.word	0x3ff72f82,0x86ead08a
338	.word	0xbc920aa0,0x2cd62c72
339	.word	0x3ff73f9a,0x48a58174
340	.word	0xbc90a8d9,0x6c65d53c
341	.word	0x3ff74fbd,0x35d7cbfd
342	.word	0x3c9047fd,0x618a6e1c
343	.word	0x3ff75feb,0x564267c9
344	.word	0xbc902459,0x57316dd3
345	.word	0x3ff77024,0xb1ab6e09
346	.word	0x3c9b7877,0x169147f8
347	.word	0x3ff78069,0x4fde5d3f
348	.word	0x3c9866b8,0x0a02162c
349	.word	0x3ff790b9,0x38ac1cf6
350	.word	0x3c9349a8,0x62aadd3e
351	.word	0x3ff7a114,0x73eb0187
352	.word	0xbc841577,0xee04992f
353	.word	0x3ff7b17b,0x0976cfdb
354	.word	0xbc9bebb5,0x8468dc88
355	.word	0x3ff7c1ed,0x0130c132
356	.word	0x3c9f124c,0xd1164dd6
357	.word	0x3ff7d26a,0x62ff86f0
358	.word	0x3c91bddb,0xfb72b8b4
359	.word	0x3ff7e2f3,0x36cf4e62
360	.word	0x3c705d02,0xba15797e
361	.word	0x3ff7f387,0x8491c491
362	.word	0xbc807f11,0xcf9311ae
363	.word	0x3ff80427,0x543e1a12
364	.word	0xbc927c86,0x626d972b
365	.word	0x3ff814d2,0xadd106d9
366	.word	0x3c946437,0x0d151d4d
367	.word	0x3ff82589,0x994cce13
368	.word	0xbc9d4c1d,0xd41532d8
369	.word	0x3ff8364c,0x1eb941f7
370	.word	0x3c999b9a,0x31df2bd5
371	.word	0x3ff8471a,0x4623c7ad
372	.word	0xbc88d684,0xa341cdfb
373	.word	0x3ff857f4,0x179f5b21
374	.word	0xbc5ba748,0xf8b216d0
375	.word	0x3ff868d9,0x9b4492ec
376	.word	0x3ca01c83,0xb21584a3
377	.word	0x3ff879ca,0xd931a436
378	.word	0x3c85d2d7,0xd2db47bc
379	.word	0x3ff88ac7,0xd98a6699
380	.word	0x3c9994c2,0xf37cb53a
381	.word	0x3ff89bd0,0xa478580f
382	.word	0x3c9d5395,0x4475202a
383	.word	0x3ff8ace5,0x422aa0db
384	.word	0x3c96e9f1,0x56864b27
385	.word	0x3ff8be05,0xbad61778
386	.word	0x3c9ecb5e,0xfc43446e
387	.word	0x3ff8cf32,0x16b5448c
388	.word	0xbc70d55e,0x32e9e3aa
389	.word	0x3ff8e06a,0x5e0866d9
390	.word	0xbc97114a,0x6fc9b2e6
391	.word	0x3ff8f1ae,0x99157736
392	.word	0x3c85cc13,0xa2e3976c
393	.word	0x3ff902fe,0xd0282c8a
394	.word	0x3c9592ca,0x85fe3fd2
395	.word	0x3ff9145b,0x0b91ffc6
396	.word	0xbc9dd679,0x2e582524
397	.word	0x3ff925c3,0x53aa2fe2
398	.word	0xbc83455f,0xa639db7f
399	.word	0x3ff93737,0xb0cdc5e5
400	.word	0xbc675fc7,0x81b57ebc
401	.word	0x3ff948b8,0x2b5f98e5
402	.word	0xbc8dc3d6,0x797d2d99
403	.word	0x3ff95a44,0xcbc8520f
404	.word	0xbc764b7c,0x96a5f039
405	.word	0x3ff96bdd,0x9a7670b3
406	.word	0xbc5ba596,0x7f19c896
407	.word	0x3ff97d82,0x9fde4e50
408	.word	0xbc9d185b,0x7c1b85d0
409	.word	0x3ff98f33,0xe47a22a2
410	.word	0x3c7cabda,0xa24c78ed
411	.word	0x3ff9a0f1,0x70ca07ba
412	.word	0xbc9173bd,0x91cee632
413	.word	0x3ff9b2bb,0x4d53fe0d
414	.word	0xbc9dd84e,0x4df6d518
415	.word	0x3ff9c491,0x82a3f090
416	.word	0x3c7c7c46,0xb071f2be
417	.word	0x3ff9d674,0x194bb8d5
418	.word	0xbc9516be,0xa3dd8233
419	.word	0x3ff9e863,0x19e32323
420	.word	0x3c7824ca,0x78e64c6e
421	.word	0x3ff9fa5e,0x8d07f29e
422	.word	0xbc84a9ce,0xaaf1face
423	.word	0x3ffa0c66,0x7b5de565
424	.word	0xbc935949,0x5d1cd533
425	.word	0x3ffa1e7a,0xed8eb8bb
426	.word	0x3c9c6618,0xee8be70e
427	.word	0x3ffa309b,0xec4a2d33
428	.word	0x3c96305c,0x7ddc36ab
429	.word	0x3ffa42c9,0x80460ad8
430	.word	0xbc9aa780,0x589fb120
431	.word	0x3ffa5503,0xb23e255d
432	.word	0xbc9d2f6e,0xdb8d41e1
433	.word	0x3ffa674a,0x8af46052
434	.word	0x3c650f56,0x30670366
435	.word	0x3ffa799e,0x1330b358
436	.word	0x3c9bcb7e,0xcac563c6
437	.word	0x3ffa8bfe,0x53c12e59
438	.word	0xbc94f867,0xb2ba15a8
439	.word	0x3ffa9e6b,0x5579fdbf
440	.word	0x3c90fac9,0x0ef7fd31
441	.word	0x3ffab0e5,0x21356eba
442	.word	0x3c889c31,0xdae94544
443	.word	0x3ffac36b,0xbfd3f37a
444	.word	0xbc8f9234,0xcae76cd0
445	.word	0x3ffad5ff,0x3a3c2774
446	.word	0x3c97ef3b,0xb6b1b8e4
447	.word	0x3ffae89f,0x995ad3ad
448	.word	0x3c97a1cd,0x345dcc81
449	.word	0x3ffafb4c,0xe622f2ff
450	.word	0xbc94b2fc,0x0f315ecc
451	.word	0x3ffb0e07,0x298db666
452	.word	0xbc9bdef5,0x4c80e425
453	.word	0x3ffb20ce,0x6c9a8952
454	.word	0x3c94dd02,0x4a0756cc
455	.word	0x3ffb33a2,0xb84f15fb
456	.word	0xbc62805e,0x3084d708
457	.word	0x3ffb4684,0x15b749b1
458	.word	0xbc7f763d,0xe9df7c90
459	.word	0x3ffb5972,0x8de5593a
460	.word	0xbc9c71df,0xbbba6de3
461	.word	0x3ffb6c6e,0x29f1c52a
462	.word	0x3c92a8f3,0x52883f6e
463	.word	0x3ffb7f76,0xf2fb5e47
464	.word	0xbc75584f,0x7e54ac3b
465	.word	0x3ffb928c,0xf22749e4
466	.word	0xbc9b7216,0x54cb65c6
467	.word	0x3ffba5b0,0x30a1064a
468	.word	0xbc9efcd3,0x0e54292e
469	.word	0x3ffbb8e0,0xb79a6f1f
470	.word	0xbc3f52d1,0xc9696205
471	.word	0x3ffbcc1e,0x904bc1d2
472	.word	0x3c823dd0,0x7a2d9e84
473	.word	0x3ffbdf69,0xc3f3a207
474	.word	0xbc3c2623,0x60ea5b52
475	.word	0x3ffbf2c2,0x5bd71e09
476	.word	0xbc9efdca,0x3f6b9c73
477	.word	0x3ffc0628,0x6141b33d
478	.word	0xbc8d8a5a,0xa1fbca34
479	.word	0x3ffc199b,0xdd85529c
480	.word	0x3c811065,0x895048dd
481	.word	0x3ffc2d1c,0xd9fa652c
482	.word	0xbc96e516,0x17c8a5d7
483	.word	0x3ffc40ab,0x5fffd07a
484	.word	0x3c9b4537,0xe083c60a
485	.word	0x3ffc5447,0x78fafb22
486	.word	0x3c912f07,0x2493b5af
487	.word	0x3ffc67f1,0x2e57d14b
488	.word	0x3c92884d,0xff483cad
489	.word	0x3ffc7ba8,0x8988c933
490	.word	0xbc8e76bb,0xbe255559
491	.word	0x3ffc8f6d,0x9406e7b5
492	.word	0x3c71acbc,0x48805c44
493	.word	0x3ffca340,0x5751c4db
494	.word	0xbc87f2be,0xd10d08f4
495	.word	0x3ffcb720,0xdcef9069
496	.word	0x3c7503cb,0xd1e949db
497	.word	0x3ffccb0f,0x2e6d1675
498	.word	0xbc7d220f,0x86009093
499	.word	0x3ffcdf0b,0x555dc3fa
500	.word	0xbc8dd83b,0x53829d72
501	.word	0x3ffcf315,0x5b5bab74
502	.word	0xbc9a08e9,0xb86dff57
503	.word	0x3ffd072d,0x4a07897c
504	.word	0xbc9cbc37,0x43797a9c
505	.word	0x3ffd1b53,0x2b08c968
506	.word	0x3c955636,0x219a36ee
507	.word	0x3ffd2f87,0x080d89f2
508	.word	0xbc9d487b,0x719d8578
509	.word	0x3ffd43c8,0xeacaa1d6
510	.word	0x3c93db53,0xbf5a1614
511	.word	0x3ffd5818,0xdcfba487
512	.word	0x3c82ed02,0xd75b3706
513	.word	0x3ffd6c76,0xe862e6d3
514	.word	0x3c5fe87a,0x4a8165a0
515	.word	0x3ffd80e3,0x16c98398
516	.word	0xbc911ec1,0x8beddfe8
517	.word	0x3ffd955d,0x71ff6075
518	.word	0x3c9a052d,0xbb9af6be
519	.word	0x3ffda9e6,0x03db3285
520	.word	0x3c9c2300,0x696db532
521	.word	0x3ffdbe7c,0xd63a8315
522	.word	0xbc9b76f1,0x926b8be4
523	.word	0x3ffdd321,0xf301b460
524	.word	0x3c92da57,0x78f018c2
525	.word	0x3ffde7d5,0x641c0658
526	.word	0xbc9ca552,0x8e79ba8f
527	.word	0x3ffdfc97,0x337b9b5f
528	.word	0xbc91a5cd,0x4f184b5c
529	.word	0x3ffe1167,0x6b197d17
530	.word	0xbc72b529,0xbd5c7f44
531	.word	0x3ffe2646,0x14f5a129
532	.word	0xbc97b627,0x817a1496
533	.word	0x3ffe3b33,0x3b16ee12
534	.word	0xbc99f4a4,0x31fdc68a
535	.word	0x3ffe502e,0xe78b3ff6
536	.word	0x3c839e89,0x80a9cc8f
537	.word	0x3ffe6539,0x24676d76
538	.word	0xbc863ff8,0x7522b734
539	.word	0x3ffe7a51,0xfbc74c83
540	.word	0x3c92d522,0xca0c8de2
541	.word	0x3ffe8f79,0x77cdb740
542	.word	0xbc910894,0x80b054b1
543	.word	0x3ffea4af,0xa2a490da
544	.word	0xbc9e9c23,0x179c2893
545	.word	0x3ffeb9f4,0x867cca6e
546	.word	0x3c94832f,0x2293e4f2
547	.word	0x3ffecf48,0x2d8e67f1
548	.word	0xbc9c93f3,0xb411ad8c
549	.word	0x3ffee4aa,0xa2188510
550	.word	0x3c91c68d,0xa487568d
551	.word	0x3ffefa1b,0xee615a27
552	.word	0x3c9dc7f4,0x86a4b6b0
553	.word	0x3fff0f9c,0x1cb6412a
554	.word	0xbc932200,0x65181d45
555	.word	0x3fff252b,0x376bba97
556	.word	0x3c93a1a5,0xbf0d8e43
557	.word	0x3fff3ac9,0x48dd7274
558	.word	0xbc795a5a,0x3ed837de
559	.word	0x3fff5076,0x5b6e4540
560	.word	0x3c99d3e1,0x2dd8a18b
561	.word	0x3fff6632,0x798844f8
562	.word	0x3c9fa37b,0x3539343e
563	.word	0x3fff7bfd,0xad9cbe14
564	.word	0xbc9dbb12,0xd006350a
565	.word	0x3fff91d8,0x02243c89
566	.word	0xbc612ea8,0xa779f689
567	.word	0x3fffa7c1,0x819e90d8
568	.word	0x3c874853,0xf3a5931e
569	.word	0x3fffbdba,0x3692d514
570	.word	0xbc796773,0x15098eb6
571	.word	0x3fffd3c2,0x2b8f71f1
572	.word	0x3c62eb74,0x966579e7
573	.word	0x3fffe9d9,0x6b2a23d9
574	.word	0x3c74a603,0x7442fde3
575
576	.align	16
577constants:
578	.word	0x3ef00000,0x00000000
579	.word	0x40862e42,0xfefa39ef
580	.word	0x01000000,0x00000000
581	.word	0x7f000000,0x00000000
582	.word	0x80000000,0x00000000
583	.word	0x43f00000,0x00000000 ! scaling 2^12 two96
584	.word	0xfff00000,0x00000000
585	.word	0x3ff00000,0x00000000
586	.word	0x3fdfffff,0xfffffff6
587	.word	0x3fc55555,0x721a1d14
588	.word	0x3fa55555,0x6e0896af
589	.word	0x41371547,0x652b82fe ! scaling 2^12 invln2_256
590	.word	0x3ea62e42,0xfee00000 ! scaling 2^(-12) ln2_256h
591	.word	0x3caa39ef,0x35793c76 ! scaling 2^(-12) ln2_256l
592
593	! base set w/o scaling
594	! .word	0x43300000,0x00000000 ! scaling  two96
595	! .word	0x40771547,0x652b82fe ! scaling  invln2_256
596	! .word	0x3f662e42,0xfee00000 ! scaling  ln2_256h
597	! .word	0x3d6a39ef,0x35793c76 ! scaling  ln2_256l
598
599#define ox3ef		0x0
600#define thresh		0x8
601#define tiny		0x10
602#define huge		0x18
603#define signbit		0x20
604#define two96		0x28
605#define neginf		0x30
606#define one		0x38
607#define B1OFF		0x40
608#define B2OFF		0x48
609#define B3OFF		0x50
610#define invln2_256	0x58
611#define ln2_256h	0x60
612#define ln2_256l	0x68
613
614! local storage indices
615
616#define m2		STACK_BIAS-0x4
617#define m1		STACK_BIAS-0x8
618#define m0		STACK_BIAS-0xc
619#define jnk		STACK_BIAS-0x20
620! sizeof temp storage - must be a multiple of 16 for V9
621#define tmps		0x20
622
623! register use
624
625! i0  n
626! i1  x
627! i2  stridex
628! i3  y
629! i4  stridey
630! i5  0x80000000
631
632! g1  TBL
633
634! l0  m0
635! l1  m1
636! l2  m2
637! l3  j0,oy0
638! l4  j1,oy1
639! l5  j2,oy2
640! l6  0x3e300000
641! l7  0x40862e41
642
643! o0  py0
644! o1  py1
645! o2  py2
646! o3  scratch
647! o4  scratch
648! o5  0x40874910
649! o7  0x7ff00000
650
651! f0  x0
652! f2
653! f4
654! f6
655! f8
656! f10 x1
657! f12
658! f14
659! f16
660! f18
661! f20 x2
662! f22
663! f24
664! f26
665! f28
666! f30
667! f32
668! f34
669! f36 0x3ef0...
670! f38 thresh
671! f40 tiny
672! f42 huge
673! f44 signbit
674! f46 two96
675! f48 neginf
676! f50 one
677! f52 B1
678! f54 B2
679! f56 B3
680! f58 invln2_256
681! f60 ln2_256h
682! f62 ln2_256l
683#define BOUNDRY %f36
684#define THRESH %f38
685#define TINY %f40
686#define HUGE %f42
687#define SIGNBIT %f44
688#define TWO96 %f46
689#define NEGINF %f48
690#define ONE %f50
691#define B1 %f52
692#define B2 %f54
693#define B3 %f56
694#define INVLN2_256 %f58
695#define LN2_256H %f60
696#define LN2_256L %f62
697
698	ENTRY(__vexp)
699	save	%sp,-SA(MINFRAME)-tmps,%sp
700	PIC_SETUP(l7)
701	PIC_SET(l7,constants,o3)
702	PIC_SET(l7,TBL,o0)
703	mov	%o0,%g1
704	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
705
706	sethi	%hi(0x80000000),%i5
707	sethi	%hi(0x3e300000),%l6
708	sethi	%hi(0x40862e41),%l7
709	or	%l7,%lo(0x40862e41),%l7
710	sethi	%hi(0x40874910),%o5
711	or	%o5,%lo(0x40874910),%o5
712	sethi	%hi(0x7ff00000),%o7
713	ldd	[%o3+ox3ef],BOUNDRY
714	ldd	[%o3+thresh],THRESH
715	ldd	[%o3+tiny],TINY
716	ldd	[%o3+huge],HUGE
717	ldd	[%o3+signbit],SIGNBIT
718	ldd	[%o3+two96],TWO96
719	ldd	[%o3+neginf],NEGINF
720	ldd	[%o3+one],ONE
721	ldd	[%o3+B1OFF],B1
722	ldd	[%o3+B2OFF],B2
723	ldd	[%o3+B3OFF],B3
724	ldd	[%o3+invln2_256],INVLN2_256
725	ldd	[%o3+ln2_256h],LN2_256H
726	ldd	[%o3+ln2_256l],LN2_256L
727	sll	%i2,3,%i2		! scale strides
728	sll	%i4,3,%i4
729	add	%fp,jnk,%l3		! precondition loop
730	add	%fp,jnk,%l4
731	add	%fp,jnk,%l5
732	ld	[%i1],%l0		! hx = *x
733	ld	[%i1],%f0
734	ld	[%i1+4],%f1
735	andn	%l0,%i5,%l0		! hx &= ~0x80000000
736	ba	.loop0
737	add	%i1,%i2,%i1		! x += stridex
738
739	.align	16
740! -- 16 byte aligned
741.loop0:
742	lda	[%i1]%asi,%l1		! preload next argument
743	sub	%l0,%l6,%o3
744	sub	%l7,%l0,%o4
745	fand	%f0,SIGNBIT,%f2		! get sign bit
746
747	lda	[%i1]%asi,%f10
748	orcc	%o3,%o4,%g0
749	mov	%i3,%o0			! py0 = y
750	bl,pn	%icc,.range0		! if hx < 0x3e300000 or > 0x40862e41
751
752! delay slot
753	lda	[%i1+4]%asi,%f11
754	addcc	%i0,-1,%i0
755	add	%i3,%i4,%i3		! y += stridey
756	ble,pn	%icc,.endloop1
757
758! delay slot
759	andn	%l1,%i5,%l1
760	add	%i1,%i2,%i1		! x += stridex
761	for	%f2,TWO96,%f2		! used to strip least sig bits
762	fmuld	%f0,INVLN2_256,%f4	! x/ (ln2/256)  , creating k
763
764.loop1:
765	lda	[%i1]%asi,%l2		! preload next argument
766	sub	%l1,%l6,%o3
767	sub	%l7,%l1,%o4
768	fand	%f10,SIGNBIT,%f12
769
770	lda	[%i1]%asi,%f20
771	orcc	%o3,%o4,%g0
772	mov	%i3,%o1			! py1 = y
773	bl,pn	%icc,.range1		! if hx < 0x3e300000 or > 0x40862e41
774
775! delay slot
776	lda	[%i1+4]%asi,%f21
777	addcc	%i0,-1,%i0
778	add	%i3,%i4,%i3		! y += stridey
779	ble,pn	%icc,.endloop2
780
781! delay slot
782	andn	%l2,%i5,%l2
783	add	%i1,%i2,%i1		! x += stridex
784	for	%f12,TWO96,%f12
785	fmuld	%f10,INVLN2_256,%f14
786
787.loop2:
788	sub	%l2,%l6,%o3
789	sub	%l7,%l2,%o4
790	fand	%f20,SIGNBIT,%f22
791	fmuld	%f20,INVLN2_256,%f24		! okay to put this here; for alignment
792
793	orcc	%o3,%o4,%g0
794	bl,pn	%icc,.range2		! if hx < 0x3e300000 or > 0x40862e41
795! delay slot
796	for	%f22,TWO96,%f22
797	faddd	%f4,%f2,%f4		! creating k+j/256, sra to zero bits
798
799.cont:
800	faddd	%f14,%f12,%f14
801	mov	%i3,%o2			! py2 = y
802
803	faddd	%f24,%f22,%f24
804	add	%i3,%i4,%i3		! y += stridey
805
806	! BUBBLE USIII
807
808	fsubd	%f4,%f2,%f8		! creating k+j/256: sll
809	st	%f6,[%l3]		! store previous loop x0
810
811	fsubd	%f14,%f12,%f18
812	st	%f7,[%l3+4]		! store previous loop x0
813
814	fsubd	%f24,%f22,%f28
815	st	%f16,[%l4]
816
817	! BUBBLE USIII
818
819	fmuld	%f8,LN2_256H,%f2	! closest LN2_256 to x
820	st	%f17,[%l4+4]
821
822	fmuld	%f18,LN2_256H,%f12
823	st	%f26,[%l5]
824
825	fmuld	%f28,LN2_256H,%f22
826	st	%f27,[%l5+4]
827
828	! BUBBLE USIII
829
830	fsubd	%f0,%f2,%f0		! r = x - p*LN2_256H
831	fmuld	%f8,LN2_256L,%f4	! closest LN2_256 to x , added prec
832
833	fsubd	%f10,%f12,%f10
834	fmuld	%f18,LN2_256L,%f14
835
836	fsubd	%f20,%f22,%f20
837	fmuld	%f28,LN2_256L,%f24
838
839	! BUBBLE USIII
840
841	fsubd	%f0,%f4,%f0		! r -= p*LN2_256L
842
843	fsubd	%f10,%f14,%f10
844
845	fsubd	%f20,%f24,%f20
846
847!!!!!!!!!!!!!!!!!!! New polynomial reorder starts here
848
849	! Alternate polynomial grouping allowing non-sequential calc of p
850	! OLD : p = r * ( 1 + r * ( B1 + r * ( B2 + r * B3) ) )
851	! NEW : p = r * [ (1+r*B1) + (r*r) * ( B2 + r * B3) ) ]
852	!
853	! let               SLi        Ri           SRi		be accumulators
854
855	fmuld	%f0,B3,%f2	! SR1 = r1 * B3
856	fdtoi	%f8,%f8				! convert k+j/256 to int
857	st      %f8,[%fp+m0]			! store k, to shift return/use
858
859	fmuld	%f10,B3,%f12	! SR2 = r2 * B3
860	fdtoi	%f18,%f18			! convert k+j/256 to int
861	st      %f18,[%fp+m1]			! store k, to shift return/use
862
863	fmuld	%f20,B3,%f22	! SR3 = r3 * B3
864	fdtoi	%f28,%f28			! convert k+j/256 to int
865	st      %f28,[%fp+m2]			! store k, to shift return/use
866
867	fmuld	%f0,%f0,%f4	! R1 = r1 * r1
868
869	fmuld	%f10,%f10,%f14	! R2 = r2 * r2
870	faddd	%f2,B2,%f2	! SR1 += B2
871
872	fmuld	%f20,%f20,%f24	! R3 = r3 * r3
873	faddd	%f12,B2,%f12	! SR2 += B2
874
875	faddd	%f22,B2,%f22	! SR3 += B2
876	fmuld	%f0,B1,%f6	! SL1 = r1 * B1
877
878	fmuld	%f10,B1,%f32	! SL2 = r2 * B1
879	fand	%f8,NEGINF,%f8
880	! best here for RAW BYPASS
881	ld	[%fp+m0],%l0			! get nonshifted k into intreg
882
883	fmuld	%f20,B1,%f34	! SL3 = r3 * B1
884	fand	%f18,NEGINF,%f18
885	ld	[%fp+m1],%l1			! get nonshifted k into intreg
886
887	fmuld	%f4,%f2,%f4	! R1 = R1 * SR1
888	fand	%f28,NEGINF,%f28
889	ld	[%fp+m2],%l2			! get nonshifted k into intreg
890
891	fmuld	%f14,%f12,%f14	! R2 = R2 * SR2
892	faddd	%f6,ONE,%f6	! SL1 += 1
893
894	fmuld	%f24,%f22,%f24	! R3 = R3 * SR3
895	faddd	%f32,ONE,%f32	! SL2 += 1
896	sra	%l0,8,%l3			! shift k tobe offset 256-8byte
897
898	faddd	%f34,ONE,%f34	! SL3 += 1
899	sra	%l1,8,%l4			! shift k tobe offset 256-8byte
900	sra	%l2,8,%l5			! shift k tobe offset 256-8byte
901
902	! BUBBLE in USIII
903	and	%l3,0xff0,%l3
904	and	%l4,0xff0,%l4
905
906
907
908	faddd	%f6,%f4,%f6	! R1 = SL1 + R1
909	ldd     [%g1+%l3],%f4			! tbl[j]
910	add     %l3,8,%l3			! inc j
911	and	%l5,0xff0,%l5
912
913
914	faddd	%f32,%f14,%f32	! R2 = SL2 + R2
915	ldd     [%g1+%l4],%f14			! tbl[j]
916	add     %l4,8,%l4			! inc j
917	sra	%l0,20,%o3
918
919	faddd	%f34,%f24,%f34	! R3 = SL3 + R3
920	ldd     [%g1+%l5],%f24			! tbl[j]
921	add     %l5,8,%l5			! inc j
922	sra	%l1,20,%l1
923
924	! BUBBLE in USIII
925	ldd     [%g1+%l4],%f16		! tbl[j+1]
926	add     %o3,1021,%o3		! inc j
927
928	fmuld	%f0,%f6,%f0	! p1 = r1 * R1
929	ldd     [%g1+%l3],%f6           ! tbl[j+1]
930	add     %l1,1021,%l1		! inc j
931	sra	%l2,20,%l2
932
933	fmuld	%f10,%f32,%f10	! p2 = r2 * R2
934	ldd     [%g1+%l5],%f26		! tbl[j+1]
935	add     %l2,1021,%l2		! inc j
936
937	fmuld	%f20,%f34,%f20	! p3 = r3 * R3
938
939
940
941
942
943!!!!!!!!!!!!!!!!!!! poly-reorder - ends here
944
945	fmuld	%f0,%f4,%f0		! start exp(x) = exp(r) * tbl[j]
946	mov	%o0,%l3
947
948	fmuld	%f10,%f14,%f10
949	mov	%o1,%l4
950
951	fmuld	%f20,%f24,%f20
952	mov	%o2,%l5
953
954	faddd	%f0,%f6,%f6		! cont exp(x) : apply tbl[j] high bits
955	lda	[%i1]%asi,%l0		! preload next argument
956
957	faddd	%f10,%f16,%f16
958	lda	[%i1]%asi,%f0
959
960	faddd	%f20,%f26,%f26
961	lda	[%i1+4]%asi,%f1
962
963	faddd	%f6,%f4,%f6		! cont exp(x) : apply tbl[j+1] low bits
964	add	%i1,%i2,%i1		! x += stridex
965
966	faddd	%f16,%f14,%f16
967	andn	%l0,%i5,%l0
968	or	%o3,%l1,%o4
969
970! -- 16 byte aligned
971	orcc	%o4,%l2,%o4
972	bl,pn	%icc,.small
973! delay slot
974	faddd	%f26,%f24,%f26
975
976	fpadd32	%f6,%f8,%f6		! done exp(x) : apply 2^k
977	fpadd32	%f16,%f18,%f16
978
979
980	addcc	%i0,-1,%i0
981	bg,pn	%icc,.loop0
982! delay slot
983	fpadd32	%f26,%f28,%f26
984
985	ba,pt	%icc,.endloop0
986! delay slot
987	nop
988
989
990	.align	16
991.small:
992	tst	%o3
993	bge,pt	%icc,1f
994! delay slot
995	fpadd32	%f6,%f8,%f6
996	fpadd32	%f6,BOUNDRY,%f6
997	fmuld	%f6,TINY,%f6
9981:
999	tst	%l1
1000	bge,pt	%icc,1f
1001! delay slot
1002	fpadd32	%f16,%f18,%f16
1003	fpadd32	%f16,BOUNDRY,%f16
1004	fmuld	%f16,TINY,%f16
10051:
1006	tst	%l2
1007	bge,pt	%icc,1f
1008! delay slot
1009	fpadd32	%f26,%f28,%f26
1010	fpadd32	%f26,BOUNDRY,%f26
1011	fmuld	%f26,TINY,%f26
10121:
1013	addcc	%i0,-1,%i0
1014	bg,pn	%icc,.loop0
1015! delay slot
1016	nop
1017	ba,pt	%icc,.endloop0
1018! delay slot
1019	nop
1020
1021
1022.endloop2:
1023	for	%f12,TWO96,%f12
1024	fmuld	%f10,INVLN2_256,%f14
1025	faddd	%f14,%f12,%f14
1026	fsubd	%f14,%f12,%f18
1027	fmuld	%f18,LN2_256H,%f12
1028	fsubd	%f10,%f12,%f10
1029	fmuld	%f18,LN2_256L,%f14
1030	fsubd	%f10,%f14,%f10
1031	fmuld	%f10,B3,%f12
1032	fdtoi	%f18,%f18
1033	st      %f18,[%fp+m1]
1034	fmuld	%f10,%f10,%f14
1035	faddd	%f12,B2,%f12
1036	fmuld	%f10,B1,%f32
1037	fand	%f18,NEGINF,%f18
1038	ld	[%fp+m1],%l1
1039	fmuld	%f14,%f12,%f14
1040	faddd	%f32,ONE,%f32
1041	sra	%l1,8,%o4
1042	and	%o4,0xff0,%o4
1043	faddd	%f32,%f14,%f32
1044	ldd     [%g1+%o4],%f14
1045	add     %o4,8,%o4
1046	sra	%l1,20,%l1
1047	ldd     [%g1+%o4],%f30
1048	addcc	%l1,1021,%l1
1049	fmuld	%f10,%f32,%f10
1050	fmuld	%f10,%f14,%f10
1051	faddd	%f10,%f30,%f30
1052	faddd	%f30,%f14,%f30
1053	bge,pt	%icc,1f
1054! delay slot
1055	fpadd32	%f30,%f18,%f30
1056	fpadd32	%f30,BOUNDRY,%f30
1057	fmuld	%f30,TINY,%f30
10581:
1059	st	%f30,[%o1]
1060	st	%f31,[%o1+4]
1061
1062.endloop1:
1063	for	%f2,TWO96,%f2
1064	fmuld	%f0,INVLN2_256,%f4
1065	faddd	%f4,%f2,%f4
1066	fsubd	%f4,%f2,%f8
1067	fmuld	%f8,LN2_256H,%f2
1068	fsubd	%f0,%f2,%f0
1069	fmuld	%f8,LN2_256L,%f4
1070	fsubd	%f0,%f4,%f0
1071	fmuld	%f0,B3,%f2
1072	fdtoi	%f8,%f8
1073	st	%f8,[%fp+m0]
1074	fmuld	%f0,%f0,%f4
1075	faddd	%f2,B2,%f2
1076	fmuld	%f0,B1,%f32
1077	fand	%f8,NEGINF,%f8
1078	ld	[%fp+m0],%l0
1079	fmuld	%f4,%f2,%f4
1080	faddd	%f32,ONE,%f32
1081	sra	%l0,8,%o4
1082	and	%o4,0xff0,%o4
1083	faddd	%f32,%f4,%f32
1084	ldd     [%g1+%o4],%f4
1085	add     %o4,8,%o4
1086	sra	%l0,20,%o3
1087	ldd     [%g1+%o4],%f30
1088	addcc	%o3,1021,%o3
1089	fmuld	%f0,%f32,%f0
1090	fmuld	%f0,%f4,%f0
1091	faddd	%f0,%f30,%f30
1092	faddd	%f30,%f4,%f30
1093	bge,pt	%icc,1f
1094! delay slot
1095	fpadd32	%f30,%f8,%f30
1096	fpadd32	%f30,BOUNDRY,%f30
1097	fmuld	%f30,TINY,%f30
10981:
1099	st	%f30,[%o0]
1100	st	%f31,[%o0+4]
1101
1102.endloop0:
1103	st	%f6,[%l3]
1104	st	%f7,[%l3+4]
1105	st	%f16,[%l4]
1106	st	%f17,[%l4+4]
1107	st	%f26,[%l5]
1108	st	%f27,[%l5+4]
1109	ret
1110	restore
1111
1112
1113.range0:
1114	cmp	%l0,%l6
1115	bl,a,pt	%icc,3f			! if x is tiny
1116! delay slot, annulled if branch not taken
1117	faddd	%f0,ONE,%f4
1118
1119	cmp	%l0,%o5
1120	bg,pt	%icc,1f			! if x is huge, inf, nan
1121! delay slot
1122	nop
1123
1124	fcmpd	%fcc0,%f0,THRESH
1125	fbg,a,pt %fcc0,3f		! if x is huge and positive
1126! delay slot, annulled if branch not taken
1127	fmuld	HUGE,HUGE,%f4
1128
1129! x is near the extremes but within range; return to the loop
1130	addcc	%i0,-1,%i0
1131	add	%i3,%i4,%i3		! y += stridey
1132	ble,pn	%icc,.endloop1
1133! delay slot
1134	andn	%l1,%i5,%l1
1135	add	%i1,%i2,%i1		! x += stridex
1136	for	%f2,TWO96,%f2
1137	ba,pt	%icc,.loop1
1138! delay slot
1139	fmuld	%f0,INVLN2_256,%f4
1140
11411:
1142	cmp	%l0,%o7
1143	bl,pn	%icc,2f			! if x is finite
1144! delay slot
1145	nop
1146	fzero	%f4
1147	fcmpd	%fcc0,%f0,NEGINF
1148	fmovdne	%fcc0,%f0,%f4
1149	ba,pt	%icc,3f
1150	fmuld	%f4,%f4,%f4		! x*x or zero*zero
11512:
1152	fmovd	HUGE,%f4
1153	fcmpd	%fcc0,%f0,ONE
1154	fmovdl	%fcc0,TINY,%f4
1155	fmuld	%f4,%f4,%f4		! huge*huge or tiny*tiny
11563:
1157	st	%f4,[%o0]
1158	andn	%l1,%i5,%l0
1159	add	%i1,%i2,%i1		! x += stridex
1160	fmovd	%f10,%f0
1161	st	%f5,[%o0+4]
1162	addcc	%i0,-1,%i0
1163	bg,pt	%icc,.loop0
1164! delay slot
1165	add	%i3,%i4,%i3		! y += stridey
1166	ba,pt	%icc,.endloop0
1167! delay slot
1168	nop
1169
1170
1171.range1:
1172	cmp	%l1,%l6
1173	bl,a,pt	%icc,3f			! if x is tiny
1174! delay slot, annulled if branch not taken
1175	faddd	%f10,ONE,%f14
1176
1177	cmp	%l1,%o5
1178	bg,pt	%icc,1f			! if x is huge, inf, nan
1179! delay slot
1180	nop
1181
1182	fcmpd	%fcc0,%f10,THRESH
1183	fbg,a,pt %fcc0,3f		! if x is huge and positive
1184! delay slot, annulled if branch not taken
1185	fmuld	HUGE,HUGE,%f14
1186
1187! x is near the extremes but within range; return to the loop
1188	addcc	%i0,-1,%i0
1189	add	%i3,%i4,%i3		! y += stridey
1190	ble,pn	%icc,.endloop2
1191! delay slot
1192	andn	%l2,%i5,%l2
1193	add	%i1,%i2,%i1		! x += stridex
1194	for	%f12,TWO96,%f12
1195	ba,pt	%icc,.loop2
1196! delay slot
1197	fmuld	%f10,INVLN2_256,%f14
1198
11991:
1200	cmp	%l1,%o7
1201	bl,pn	%icc,2f			! if x is finite
1202! delay slot
1203	nop
1204	fzero	%f14
1205	fcmpd	%fcc0,%f10,NEGINF
1206	fmovdne	%fcc0,%f10,%f14
1207	ba,pt	%icc,3f
1208	fmuld	%f14,%f14,%f14		! x*x or zero*zero
12092:
1210	fmovd	HUGE,%f14
1211	fcmpd	%fcc0,%f10,ONE
1212	fmovdl	%fcc0,TINY,%f14
1213	fmuld	%f14,%f14,%f14		! huge*huge or tiny*tiny
12143:
1215	st	%f14,[%o1]
1216	andn	%l2,%i5,%l1
1217	add	%i1,%i2,%i1		! x += stridex
1218	fmovd	%f20,%f10
1219	st	%f15,[%o1+4]
1220	addcc	%i0,-1,%i0
1221	bg,pt	%icc,.loop1
1222! delay slot
1223	add	%i3,%i4,%i3		! y += stridey
1224	ba,pt	%icc,.endloop1
1225! delay slot
1226	nop
1227
1228
1229.range2:
1230	cmp	%l2,%l6
1231	bl,a,pt	%icc,3f			! if x is tiny
1232! delay slot, annulled if branch not taken
1233	faddd	%f20,ONE,%f24
1234
1235	cmp	%l2,%o5
1236	bg,pt	%icc,1f			! if x is huge, inf, nan
1237! delay slot
1238	nop
1239
1240	fcmpd	%fcc0,%f20,THRESH
1241	fbg,a,pt %fcc0,3f		! if x is huge and positive
1242! delay slot, annulled if branch not taken
1243	fmuld	HUGE,HUGE,%f24
1244
1245! x is near the extremes but within range; return to the loop
1246	ba,pt	%icc,.cont
1247! delay slot
1248	faddd	%f4,%f2,%f4
1249
12501:
1251	cmp	%l2,%o7
1252	bl,pn	%icc,2f			! if x is finite
1253! delay slot
1254	nop
1255	fzero	%f24
1256	fcmpd	%fcc0,%f20,NEGINF
1257	fmovdne	%fcc0,%f20,%f24
1258	ba,pt	%icc,3f
1259	fmuld	%f24,%f24,%f24		! x*x or zero*zero
12602:
1261	fmovd	HUGE,%f24
1262	fcmpd	%fcc0,%f20,ONE
1263	fmovdl	%fcc0,TINY,%f24
1264	fmuld	%f24,%f24,%f24		! huge*huge or tiny*tiny
12653:
1266	st	%f24,[%i3]
1267	st	%f25,[%i3+4]
1268	lda	[%i1]%asi,%l2		! preload next argument
1269	lda	[%i1]%asi,%f20
1270	lda	[%i1+4]%asi,%f21
1271	andn	%l2,%i5,%l2
1272	add	%i1,%i2,%i1		! x += stridex
1273	addcc	%i0,-1,%i0
1274	bg,pt	%icc,.loop2
1275! delay slot
1276	add	%i3,%i4,%i3		! y += stridey
1277	ba,pt	%icc,.endloop2
1278! delay slot
1279	nop
1280
1281	SET_SIZE(__vexp)
1282
1283