xref: /illumos-gate/usr/src/lib/libmvec/common/vis/__vlogf.S (revision eb9a1df2aeb866bf1de4494433b6d7e5fa07b3ae)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vlogf.S"
30
31#include "libm.h"
32
33	RO_DATA
34	.align	64
35!! CONST_TBL[2*i]   = 127*log(2) - log(1+i/32), i = [0, 32]
36!! CONST_TBL[2*i+1] = 2**(-23)/(1+i/32),        i = [0, 32]
37
38.CONST_TBL:
39	.word  	0x405601e6, 	0x78fc457b, 	0x3e800000, 	0x00000000,
40	.word  	0x4055ffee, 	0x4f4b5df8,	0x3e7f07c1,	0xf07c1f08,
41	.word	0x4055fe05,	0x32e4434f,	0x3e7e1e1e,	0x1e1e1e1e,
42	.word	0x4055fc2a,	0x44598c21,	0x3e7d41d4,	0x1d41d41d,
43	.word	0x4055fa5c,	0xb720babf,	0x3e7c71c7,	0x1c71c71c,
44	.word	0x4055f89b,	0xcf803581,	0x3e7bacf9,	0x14c1bad0,
45	.word	0x4055f6e6,	0xe0c3f1b1,	0x3e7af286,	0xbca1af28,
46	.word	0x4055f53d,	0x4badcb50,	0x3e7a41a4,	0x1a41a41a,
47	.word	0x4055f39e,	0x7d18782e,	0x3e799999,	0x9999999a,
48	.word	0x4055f209,	0xecc5965c,	0x3e78f9c1,	0x8f9c18fa,
49	.word	0x4055f07f,	0x1c5099d5,	0x3e786186,	0x18618618,
50	.word	0x4055eefd,	0x9641645e,	0x3e77d05f,	0x417d05f4,
51	.word	0x4055ed84,	0xed3a291d,	0x3e7745d1,	0x745d1746,
52	.word	0x4055ec14,	0xbb3ced72,	0x3e76c16c,	0x16c16c17,
53	.word	0x4055eaac,	0xa10589ab,	0x3e7642c8,	0x590b2164,
54	.word	0x4055e94c,	0x45758439,	0x3e75c988,	0x2b931057,
55	.word	0x4055e7f3,	0x550f85e3,	0x3e755555,	0x55555555,
56	.word	0x4055e6a1,	0x818078ec,	0x3e74e5e0,	0xa72f0539,
57	.word	0x4055e556,	0x8134aae1,	0x3e747ae1,	0x47ae147b,
58	.word	0x4055e412,	0x0ef783b7,	0x3e741414,	0x14141414,
59	.word	0x4055e2d3,	0xe99c9674,	0x3e73b13b,	0x13b13b14,
60	.word	0x4055e19b,	0xd3b0f9d9,	0x3e73521c,	0xfb2b78c1,
61	.word	0x4055e069,	0x9333fb26,	0x3e72f684,	0xbda12f68,
62	.word	0x4055df3c,	0xf1565bd0,	0x3e729e41,	0x29e4129e,
63	.word	0x4055de15,	0xba3f64fa,	0x3e724924,	0x92492492,
64	.word	0x4055dcf3,	0xbcd73219,	0x3e71f704,	0x7dc11f70,
65	.word	0x4055dbd6,	0xca95a75a,	0x3e71a7b9,	0x611a7b96,
66	.word	0x4055dabe,	0xb7559927,	0x3e715b1e,	0x5f75270d,
67	.word	0x4055d9ab,	0x592bb896,	0x3e711111,	0x11111111,
68	.word	0x4055d89c,	0x8840e4fe,	0x3e70c971,	0x4fbcda3b,
69	.word	0x4055d792,	0x1eaf8df0,	0x3e708421,	0x08421084,
70	.word	0x4055d68b,	0xf863da3d,	0x3e704104,	0x10410410,
71	.word	0x4055d589,	0xf2fe5107,	0x3e700000,	0x00000000,
72	.word	0xbfcffb16,	0xbfa3db6e,	! K3 = -2.49850123953105416108e-01
73	.word	0x3fd5561b,	0xa4b3110b,	! K2 =  3.33380614127478394992e-01
74	.word	0xbfe00000,	0x0b666d0b,	! K1 = -5.00000021234343492201e-01
75	.word	0x3fefffff,	0xff3fd118,	! K0 =  9.99999998601683029714e-01
76	.word	0x3fe62e42,	0xfefa39ef,	! LN2 = 6.931471805599452862e-01
77	.word	0xbf800000,	0x7f800000,	! MONE = -1.0f ;    INF
78
79! local storage indices
80#define tmp0		STACK_BIAS-0x8
81#define tmp1		STACK_BIAS-0x10
82#define tmp2		STACK_BIAS-0x18
83#define tmp3		STACK_BIAS-0x20
84#define tmp4		STACK_BIAS-0x28
85#define tmp5		STACK_BIAS-0x30
86! sizeof temp storage - must be a multiple of 16 for V9
87#define tmps		0x30
88
89#define ZERO		%f28
90#define K3		%f30
91#define K2		%f32
92#define K1		%f34
93#define K0		%f36
94#define LN2		%f38
95
96#define stridex		%o0
97#define stridex2	%o1
98#define stridey		%o2
99#define x0		%o3
100#define x1		%o4
101#define y		%o5
102
103#define ind0		%i0
104#define ind1		%i1
105#define ind2		%i2
106#define ind3		%i3
107#define MASK_0x007fffff	%i4
108#define MASK_0xfffc0000	%i5
109#define CONST_0x20000	%o7
110#define MASK_0x7f800000	%l3
111
112#define ival0		%l0
113#define iy0		%l1
114#define ival1		%l2
115#define iy1		%l1
116#define ival2		%l4
117#define iy2		%l5
118#define ival3		%l6
119#define iy3		%l2
120#define counter		%l7
121
122#define LOGFTBL		%g5
123#define LOGFTBL_P8	%g1
124
125! register use
126
127! i0  ind0
128! i1  ind1
129! i2  ind2
130! i3  ind3
131! i4  0x007fffff
132! i5  0xfffc0000
133
134! l0  ival0
135! l1  iy0, iy1
136! l2  ival1, iy3
137! l3  0x7f800000
138! l4  ival2
139! l5  iy2
140! l6  ival3
141! l7  cycle counter
142
143! o0  stridex
144! o1  stridex * 2
145! o2  stridey
146! o3  x
147! o4  x
148! o5  y
149! o7  0x20000
150
151! g1  CONST_TBL
152! g5  CONST_TBL + 8
153
154! f2
155! f4
156! f6
157! f8
158! f9
159! f10
160! f12
161! f14
162! f16
163! f18
164! f19
165! f20
166! f22
167! f24
168! f26
169! f28 ZERO = 0
170! f30 K3 = -2.49850123953105416108e-01
171! f32 K2 =  3.33380614127478394992e-01
172! f34 K1 = -5.00000021234343492201e-01
173! f36 K0 =  9.99999998601683029714e-01
174! f38 LN2 = 6.931471805599452862e-01
175! f40
176! f42
177! f44
178! f46
179! f48
180! f50
181! f52
182! f54
183! f56
184! f58
185! f60
186! f62
187
188
189!  !!!!!  Algorithm   !!!!!
190!
191!  double exp, ty, yy, ldtmp0, ldtmp1;
192!  double dtmp0, dtmp1, dtmp2, dtmp3, dtmp4, dtmp5;
193!  float  value;
194!  int    ival, iy, i, ind, iexp;
195!  double K3   = -2.49850123953105416108e-01;
196!  double K2   =  3.33380614127478394992e-01;
197!  double K1   = -5.00000021234343492201e-01;
198!  double K0   =  9.99999998601683029714e-01;
199!  double LN2  =  6.931471805599452862e-01;
200!  double ZERO =  0;
201!  float  INF;
202!
203!  ival = *(int*)(x);
204!  if (ival >= 0x7f800000) goto spec;
205!  if (ival <= 0x7fffff) goto spec;
206!  *(float*)&*(float*)&exp = *(float*)(x);
207!  exp = vis_fpack32(ZERO, exp);
208!  iy = ival & 0x007fffff;
209!  ival = iy + 0x20000;
210!  ival = ival & 0xfffc0000;
211!  i  = ival >> 14;
212!  ind  = i & (-8);
213!  iy = iy - ival;
214!  ty = LN2 * (double)(*(int*)&exp);
215!  ldtmp0 = *(double*)((char*)CONST_TBL+ind);
216!  ldtmp1 = *(double*)((char*)CONST_TBL+ind+8);
217!  ty = ty - ldtmp0;
218!  yy = (double) iy;
219!  yy = yy * ldtmp1;
220!  dtmp0 = K3 * yy;
221!  dtmp1 = dtmp0 + K2;
222!  dtmp2 = dtmp1 * yy;
223!  dtmp3 = dtmp2 + K1;
224!  dtmp4 = dtmp3 * yy;
225!  dtmp5 = dtmp4 + K0;
226!  yy    = dtmp5 * yy;
227!  yy    = yy + ty;
228!  y[0] = (float)(yy);
229!  return;
230!
231!spec:
232!  if ((ival & 0x7fffffff) >= 0x7f800000) { /* X = NaN or Inf */
233!    value = *(float*) &ival;
234!    y[0] = (value < 0.0f? 0.0f : value) * value;
235!    return;
236!  } else if (ival <= 0) {
237!    y[0] = ((ival & 0x7fffffff) == 0) ?
238!      -1.0f / 0f. : 0f. /0f.;      /* X = +-0 : X < 0 */
239!    return;
240!  } else { /* Denom. number */
241!    value = (float) ival;
242!    ival = *(int*) &value;
243!    iexp = (ival >> 23) - 149;
244!    iy = ival & 0x007fffff;
245!    ival = iy + 0x20000;
246!    ival = ival & 0xfffc0000;
247!    i  = ival >> 14;
248!    ind  = i & (-8);
249!    iy = iy - ival;
250!    ty = LN2 * (double)iexp;
251!    ldtmp0 = *(double*)((char*)CONST_TBL+ind);
252!    ldtmp1 = *(double*)((char*)CONST_TBL+ind+8);
253!    ty = ty - ldtmp0;
254!    yy = (double) iy;
255!    yy = yy * ldtmp1;
256!    dtmp0 = K3 * yy;
257!    dtmp1 = dtmp0 + K2;
258!    dtmp2 = dtmp1 * yy;
259!    dtmp3 = dtmp2 + K1;
260!    dtmp4 = dtmp3 * yy;
261!    dtmp5 = dtmp4 + K0;
262!    yy    = dtmp5 * yy;
263!    yy    = yy + ty;
264!    y[0] = (float)(yy);
265!    return;
266!  }
267!--------------------------------------------------------------------
268
269	ENTRY(__vlogf)
270	save	%sp,-SA(MINFRAME)-tmps,%sp
271	PIC_SETUP(l7)
272	PIC_SET(l7,.CONST_TBL,g5)
273	wr	%g0,0,%gsr
274
275	st	%i0,[%fp+tmp0]
276	stx	%i1,[%fp+tmp5]
277
278	sra	%i2,0,%l4
279	ldd	[LOGFTBL+528],K3
280	add	%i3,0,y
281	sllx	%l4,2,stridex
282	sllx	%l4,3,stridex2
283	ldd	[LOGFTBL+536],K2
284	sra	%i4,0,%l3
285	ldd	[LOGFTBL+544],K1
286	sllx	%l3,2,stridey
287	sethi	%hi(0x7ffc00),MASK_0x007fffff
288	add	MASK_0x007fffff,1023,MASK_0x007fffff
289	ldd	[LOGFTBL+552],K0
290	sethi	%hi(0xfffc0000),MASK_0xfffc0000
291	ldd	[LOGFTBL+560],LN2
292	sethi	%hi(0x20000),CONST_0x20000
293	fzero	ZERO
294	sethi	%hi(0x7f800000),MASK_0x7f800000
295	sub	y,stridey,y
296
297.begin:
298	ld	[%fp+tmp0],counter
299	ldx	[%fp+tmp5],x0
300	st	%g0,[%fp+tmp0]
301.begin1:
302	add	x0,stridex2,x1! x += 2*stridex
303	subcc	counter,1,counter
304	bneg,pn	%icc,.end
305	lda	[x0]0x82,ival0			! (Y0_0) ival = *(int*)(x)
306
307	add	LOGFTBL,8,LOGFTBL_P8
308	lda	[stridex+x0]0x82,ival1		! (Y1_0) ival = *(int*)(x)
309
310	cmp	ival0,MASK_0x7f800000		! (Y0_0) if (ival >= 0x7f800000)
311	lda	[x1]0x82,ival2			! (Y2_0) ival = *(int*)(x);
312
313	bge,pn	%icc,.spec			! (Y0_0) if (ival >= 0x7f800000)
314	nop
315
316	cmp	ival0,MASK_0x007fffff		! (Y0_0) if (ival <= 0x7fffff)
317	ble,pn	%icc,.spec			! (Y0_0) if (ival <= 0x7fffff)
318	nop
319
320	cmp	ival1,MASK_0x7f800000		! (Y1_0) if (ival >= 0x7f800000)
321	and	ival0,MASK_0x007fffff,iy0	! (Y0_0) iy = ival & 0x007fffff
322
323
324	add	iy0,CONST_0x20000,ival0		! (Y0_0) ival = iy + 0x20000
325
326	and	ival0,MASK_0xfffc0000,ival0	! (Y0_0) ival = ival & 0xfffc0000
327	bge,pn	%icc,.update2			! (Y1_0) if (ival >= 0x7f800000)
328	nop
329.cont2:
330	sub	iy0,ival0,iy0			! (Y0_0) iy = iy - ival
331	cmp	ival1,MASK_0x007fffff		! (Y1_0) if (ival <= 0x7fffff)
332	lda	[stridex+x1]0x82,ival3		! (Y3_0) ival = *(int*)(x)
333
334	st	iy0,[%fp+tmp1]			! (Y0_0) (double) iy
335	ble,pn	%icc,.update3			! (Y1_0) if (ival <= 0x7fffff)
336	nop
337.cont3:
338	cmp	ival2,MASK_0x7f800000		! (Y2_0) if (ival >= 0x7f800000)
339	and	ival1,MASK_0x007fffff,iy1	! (Y1_0) iy = ival & 0x007fffff
340	bge,pn	%icc,.update4			! (Y2_0) if (ival >= 0x7f800000)
341	nop
342.cont4:
343	cmp	ival2,MASK_0x007fffff		! (Y2_0) if (ival <= 0x7fffff)
344	ble,pn	%icc,.update5			! (Y2_0) if (ival <= 0x7fffff)
345	nop
346.cont5:
347	add	iy1,CONST_0x20000,ival1		! (Y1_0) ival = iy + 0x20000
348	and	ival2,MASK_0x007fffff,iy2	! (Y2_0) iy = ival & 0x007fffff
349
350	and	ival1,MASK_0xfffc0000,ival1	! (Y1_0) ival = ival & 0xfffc0000
351	add	iy2,CONST_0x20000,ival2		! (Y2_0) ival = iy + 0x20000
352
353	sub	iy1,ival1,iy1			! (Y1_0) iy = iy - ival
354	and	ival2,MASK_0xfffc0000,ival2	! (Y2_0) ival = ival & 0xfffc0000
355
356	cmp	ival3,MASK_0x7f800000		! (Y3_0) (ival >= 0x7f800000)
357	sub	iy2,ival2,iy2			! (Y2_0) iy = iy - ival
358	st 	iy1,[%fp+tmp3]			! (Y1_0) (double) iy
359
360	st	iy2,[%fp+tmp2]			! (Y2_0) (double) iy
361	bge,pn	%icc,.update6			! (Y3_0) (ival >= 0x7f800000)
362	nop
363.cont6:
364	cmp	ival3,MASK_0x007fffff		! (Y3_0) if (ival <= 0x7fffff)
365	ld	[%fp+tmp1],%f2			! (Y0_0) (double) iy
366	ble,pn	%icc,.update7			! (Y3_0) if (ival <= 0x7fffff)
367	sra	ival0,14,ival0			! (Y0_0) i  = ival >> 14;
368.cont7:
369	sra	ival1,14,ind1			! (Y1_0) i  = ival >> 14;
370	ld	[%fp+tmp3],%f4			! (Y1_0) (double) iy
371
372	sra	ival2,14,ival2			! (Y2_0) i  = ival >> 14;
373	and	ival0,-8,ind0			! (Y0_0) ind  = i & (-8)
374	lda	[x0]0x82,%f6			! (Y0_0) *(float*)&exp = *(float*)(x)
375
376	and	ind1,-8,ind1			! (Y1_0) ind  = i & (-8)
377	ldd	[LOGFTBL_P8+ind0],%f14		! (Y0_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
378	fitod	%f2,%f48			! (Y0_0) yy = (double) iy
379
380	and	ival3,MASK_0x007fffff,iy3	! (Y3_0) iy = ival & 0x007fffff
381	lda	[stridex+x0]0x82,%f8		! (Y1_0) *(float*)&exp = *(float*)(x)
382
383	add	iy3,CONST_0x20000,ival3		! (Y3_0) iy + 0x20000
384	ldd	[LOGFTBL_P8+ind1],%f16		! (Y1_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
385	fitod	%f4,%f26			! (Y1_0) yy = (double) iy
386
387	sub	y,stridey,y			! y += stridey
388	and	ival3,MASK_0xfffc0000,ival3	! (Y3_0) ival = ival & 0xfffc0000
389	lda	[x1]0x82,%f10			! (Y2_0) *(float*)&exp = *(float*)(x)
390
391	add	x1,stridex2,x0			! x += 2*stridex
392	sub	iy3,ival3,iy3			! (Y3_0) iy = iy - ival
393	ld	[%fp+tmp2],%f2			! (Y2_0) (double) iy
394	fmuld	%f48,%f14,%f46			! (Y0_0) yy = yy * ldtmp1
395
396	lda	[stridex+x1]0x82,%f12		! (Y3_0) *(float*)&exp = *(float*)(x)
397	fmuld	%f26,%f16,%f62			! (Y1_0) yy = yy * ldtmp1
398
399	sra	ival3,14,ival3			! (Y3_0) i  = ival >> 14;
400	lda	[x0]0x82,ival0			! (Y0_1) ival = *(int*)(x)
401
402	add	x0,stridex2,x1			! x += 2*stridex
403	st	iy3,[%fp+tmp3]			! (Y3_0) (double) iy
404	fmuld	K3,%f46,%f22			! (Y0_0) dtmp0 = K3 * yy
405
406	and	ival2,-8,ind2			! (Y2_0) ind  = i & (-8)
407	lda	[stridex+x0]0x82,ival1		! (Y1_1) ival = *(int*)(x)
408
409	cmp	ival0,MASK_0x7f800000		! (Y0_1) if (ival >= 0x7f800000)
410	lda	[x1]0x82,ival2			! (Y2_1) ival = *(int*)(x);
411	fmuld	K3,%f62,%f50			! (Y1_0) dtmp0 = K3 * yy
412
413	bge,pn  %icc,.update8			! (Y0_1) if (ival >= 0x7f800000)
414	nop
415.cont8:
416	cmp	ival0,MASK_0x007fffff		! (Y0_1) if (ival <= 0x7fffff)
417	ble,pn	%icc,.update9			! (Y0_1) if (ival <= 0x7fffff)
418	faddd	%f22,K2,%f48			! (Y0_0) dtmp1 = dtmp0 + K2
419
420.cont9:
421	cmp	ival1,MASK_0x7f800000		! (Y1_1) if (ival >= 0x7f800000)
422	and	ival0,MASK_0x007fffff,iy0	! (Y0_1) iy = ival & 0x007fffff
423
424	add	iy0,CONST_0x20000,ival0		! (Y0_1) ival = iy + 0x20000
425	ldd	[LOGFTBL_P8+ind2],%f14		! (Y2_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8);
426	fpack32	ZERO,%f6,%f6			! (Y0_0) exp = vis_fpack32(ZERO, exp)
427
428	and	ival0,MASK_0xfffc0000,ival0	! (Y0_1) ival = ival & 0xfffc0000
429	faddd	%f50,K2,%f26			! (Y1_0) dtmp1 = dtmp0 + K2
430	bge,pn	%icc,.update10			! (Y1_1) if (ival >= 0x7f800000)
431	nop
432.cont10:
433	sub	iy0,ival0,iy0			! (Y0_1) iy = iy - ival
434	and	ival3,-8,ind3			! (Y3_0) ind  = i & (-8)
435	ld	[%fp+tmp3],%f4			! (Y3_0) (double) iy
436
437	cmp	ival1,MASK_0x007fffff		! (Y1_1) if (ival <= 0x7fffff)
438	lda	[stridex+x1]0x82,ival3		! (Y3_1) ival = *(int*)(x)
439	fmuld	%f48,%f46,%f50			! (Y0_0) dtmp2 = dtmp1 * yy
440	fitod	%f2,%f48			! (Y2_0) yy = (double) iy
441
442	st	iy0,[%fp+tmp1]			! (Y0_1) (double) iy
443	ble,pn	%icc,.update11			! (Y1_1) if (ival <= 0x7fffff)
444	nop
445.cont11:
446	cmp	ival2,MASK_0x7f800000		! (Y2_1) if (ival >= 0x7f800000)
447	and	ival1,MASK_0x007fffff,iy1	! (Y1_1) iy = ival & 0x007fffff
448	bge,pn	%icc,.update12			! (Y2_1) if (ival >= 0x7f800000)
449	fmuld	%f26,%f62,%f42			! (Y1_0) dtmp2 = dtmp1 * yy
450.cont12:
451	cmp	ival2,MASK_0x007fffff		! (Y2_1) if (ival <= 0x7fffff)
452	ldd	[LOGFTBL_P8+ind3],%f16		! (Y3_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
453	ble,pn 	%icc,.update13			! (Y2_1) if (ival <= 0x7fffff)
454	fitod	%f4,%f26			! (Y3_0) yy = (double) iy
455.cont13:
456	add	iy1,CONST_0x20000,ival1		! (Y1_1) ival = iy + 0x20000
457	and	ival2,MASK_0x007fffff,iy2	! (Y2_1) iy = ival & 0x007fffff
458
459	and	ival1,MASK_0xfffc0000,ival1	! (Y1_1) ival = ival & 0xfffc0000
460	add	iy2,CONST_0x20000,ival2		! (Y2_1) ival = iy + 0x20000
461	fmuld	%f48,%f14,%f44			! (Y2_0) yy = yy * ldtmp1
462	faddd	%f50,K1,%f50			! (Y0_0) dtmp3 = dtmp2 + K1
463
464	cmp	ival3,MASK_0x7f800000		! (Y3_1) if (ival >= 0x7f800000)
465	sub	iy1,ival1,iy1			! (Y1_1) iy = iy - ival
466	and	ival2,MASK_0xfffc0000,ival2	! (Y2_1) ival = ival & 0xfffc0000
467	fpack32	ZERO,%f8,%f8			! (Y1_0) exp = vis_fpack32(ZERO, exp)
468
469	sub	iy2,ival2,iy2			! (Y2_1) iy = iy - ival
470	st	iy1,[%fp+tmp3]			! (Y1_1) (double) iy
471	fmuld	%f26,%f16,%f60			! (Y3_0) yy = yy * ldtmp1
472	faddd	%f42,K1,%f54			! (Y1_0) dtmp3 = dtmp2 + K1
473
474	st	iy2,[%fp+tmp2]			! (Y2_1) (double) iy
475	fmuld	K3,%f44,%f22			! (Y2_0) dtmp0 = K3 * yy
476	bge,pn	%icc,.update14			! (Y3_1) if (ival >= 0x7f800000)
477	fitod	%f6,%f40			! (Y0_0) (double)(*(int*)&exp)
478.cont14:
479	cmp	ival3,MASK_0x007fffff		! (Y3_1) if (ival <= 0x7fffff)
480	ldd	[LOGFTBL+ind1],%f58		! (Y1_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
481	fmuld	%f50,%f46,%f52			! (Y0_0) dtmp4 = dtmp3 * yy
482	fitod	%f8,%f56			! (Y1_0) (double)(*(int*)&exp)
483
484	ld	[%fp+tmp1],%f2			! (Y0_1) (double) iy
485	fmuld	K3,%f60,%f50			! (Y3_0) dtmp0 = K3 * yy
486	ble,pn	%icc,.update15			! (Y3_1) if (ival <= 0x7fffff)
487	nop
488.cont15:
489	subcc	counter,7,counter
490	fmuld	%f54,%f62,%f54			! (Y1_0) dtmp4 = dtmp3 * yy
491
492	sra	ival0,14,ival0			! (Y0_1) i  = ival >> 14;
493	bneg,pn	%icc,.tail
494	faddd	%f22,K2,%f48			! (Y2_0) dtmp1 = dtmp0 + K2
495
496	ba	.main_loop
497	nop
498
499	.align	16
500.main_loop:
501	sra	ival2,14,ival2			! (Y2_1) i  = ival >> 14;
502	ldd	[LOGFTBL+ind0],%f42		! (Y0_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
503	fmuld	LN2,%f40,%f40			! (Y0_0) ty = LN2 * (double)(*(int*)&exp)
504	faddd	%f52,K0,%f22			! (Y0_0) dtmp5 = dtmp4 + K0
505
506	sra	ival1,14,ind1			! (Y1_1) i  = ival >> 14;
507	ld	[%fp+tmp3],%f4			! (Y1_1) (double) iy
508	fpack32	ZERO,%f10,%f18			! (Y2_0) exp = vis_fpack32(ZERO, exp)
509	faddd	%f50,K2,%f26			! (Y3_0) dtmp1 = dtmp0 + K2
510
511	and	ival0,-8,ind0			! (Y0_1) ind  = i & (-8)
512	lda	[x0]0x82,%f6			! (Y0_1) *(float*)&exp = *(float*)(x)
513	fmuld	LN2,%f56,%f56			! (Y1_0) LN2 * (double)(*(int*)&exp)
514	faddd	%f54,K0,%f24			! (Y1_0) dtmp5 = dtmp4 + K0
515
516	and	ind1,-8,ind1			! (Y1_1) ind  = i & (-8)
517	ldd	[LOGFTBL_P8+ind0],%f14		! (Y0_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
518	fmuld	%f48,%f44,%f50			! (Y2_0) dtmp2 = dtmp1 * yy
519	fitod	%f2,%f48			! (Y0_1) yy = (double) iy
520
521	and	ival3,MASK_0x007fffff,iy3	! (Y3_1) iy = ival & 0x007fffff
522	lda	[stridex+x0]0x82,%f8		! (Y1_1) *(float*)&exp = *(float*)(x)
523	fmuld	%f22,%f46,%f22			! (Y0_0) yy = dtmp5 * yy
524	fsubd	%f40,%f42,%f40			! (Y0_0) ty = ty - ldtmp0
525
526	add	iy3,CONST_0x20000,ival3		! (Y3_1) iy + 0x20000
527	ldd	[LOGFTBL_P8+ind1],%f16		! (Y1_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
528	fmuld	%f26,%f60,%f42			! (Y3_0) dtmp2 = dtmp1 * yy
529	fitod	%f4,%f26			! (Y1_1) yy = (double) iy
530
531	and	ival3,MASK_0xfffc0000,ival3	! (Y3_1) ival = ival & 0xfffc0000
532	lda	[x1]0x82,%f10			! (Y2_1) *(float*)&exp = *(float*)(x)
533	fmuld	%f24,%f62,%f24			! (Y1_0) yy = dtmp5 * yy
534	fsubd	%f56,%f58,%f58			! (Y1_0) ty = ty - ldtmp0
535
536	sub	iy3,ival3,iy3			! (Y3_1) iy = iy - ival
537	ld	[%fp+tmp2],%f2			! (Y2_1) (double) iy
538	fmuld	%f48,%f14,%f46			! (Y0_1) yy = yy * ldtmp1
539	faddd	%f50,K1,%f50			! (Y2_0) dtmp3 = dtmp2 + K1
540
541	add	x1,stridex2,x0			! x += 2*stridex
542	st	iy3,[%fp+tmp3]			! (Y3_1) (double) iy
543	fpack32	ZERO,%f12,%f20			! (Y3_0) exp = vis_fpack32(ZERO, exp)
544	faddd	%f22,%f40,%f48			! (Y0_0) yy = yy + ty
545
546	add	y,stridey,y			! y += stridey
547	lda	[stridex+x1]0x82,%f12		! (Y3_1) *(float*)&exp = *(float*)(x)
548	fmuld	%f26,%f16,%f62			! (Y1_1) yy = yy * ldtmp1
549	faddd	%f42,K1,%f54			! (Y3_0) dtmp3 = dtmp2 + K1
550
551	sra	ival3,14,ival3			! (Y3_1) i  = ival >> 14;
552	add	y,stridey,y			! y += stridey
553	lda	[x0]0x82,ival0			! (Y0_2) ival = *(int*)(x)
554	faddd	%f24,%f58,%f24			! (Y1_0) yy = yy + ty
555
556	add	x0,stridex2,x1			! x += 2*stridex
557	ldd	[LOGFTBL+ind2],%f42		! (Y2_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
558	fmuld	K3,%f46,%f22			! (Y0_1) dtmp0 = K3 * yy
559	fitod	%f18,%f40			! (Y2_0) (double)(*(int*)&exp)
560
561	and	ival2,-8,ind2			! (Y2_1) ind  = i & (-8)
562	lda	[stridex+x0]0x82,ival1		! (Y1_2) ival = *(int*)(x)
563	fmuld	%f50,%f44,%f52			! (Y2_0) dtmp4 = dtmp3 * yy
564	fitod	%f20,%f56			! (Y3_0) (double)(*(int*)&exp)
565
566	cmp	ival0,MASK_0x7f800000		! (Y0_2) if (ival >= 0x7f800000)
567	lda	[x1]0x82,ival2			! (Y2_2) ival = *(int*)(x);
568	fmuld	K3,%f62,%f50			! (Y1_1) dtmp0 = K3 * yy
569	fdtos	%f48,%f4			! (Y0_0) (float)(yy)
570
571	st	%f4,[y]				! (Y0_0) write into memory
572	fmuld	%f54,%f60,%f54			! (Y3_0) dtmp4 = dtmp3 * yy
573	bge,pn 	%icc,.update16			! (Y0_2) if (ival >= 0x7f800000)
574	fdtos	%f24,%f4			! (Y1_0) (float)(yy)
575.cont16:
576	cmp	ival0,MASK_0x007fffff		! (Y0_2) if (ival <= 0x7fffff
577	ldd	[LOGFTBL+ind3],%f58		! (Y3_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
578	ble,pn	%icc,.update17			! (Y0_2) if (ival <= 0x7fffff
579	faddd	%f22,K2,%f48			! (Y0_1) dtmp1 = dtmp0 + K2
580.cont17:
581	cmp	ival1,MASK_0x7f800000		! (Y1_2) if (ival >= 0x7f800000)
582	and	ival0,MASK_0x007fffff,iy0	! (Y0_2) iy = ival & 0x007fffff
583	st	%f4,[stridey+y]			! (Y1_0) write into memory
584	fmuld	LN2,%f40,%f40			! (Y2_0) ty = LN2 * (double)(*(int*)&exp)
585
586	add	iy0,CONST_0x20000,ival0		! (Y0_2) ival = iy + 0x20000
587	ldd	[LOGFTBL_P8+ind2],%f14		! (Y2_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8);
588	faddd	%f52,K0,%f22			! (Y2_0) dtmp5 = dtmp4 + K0
589	fpack32	ZERO,%f6,%f6			! (Y0_1) exp = vis_fpack32(ZERO, exp)
590
591	and	ival0,MASK_0xfffc0000,ival0	! (Y0_2) ival = ival & 0xfffc0000
592	faddd	%f50,K2,%f26			! (Y1_1) dtmp1 = dtmp0 + K2
593	bge,pn	%icc,.update18			! (Y1_2) if (ival >= 0x7f800000)
594	fmuld	LN2,%f56,%f56			! (Y3_0) ty = LN2 * (double)(*(int*)&exp)
595.cont18:
596	sub	iy0,ival0,iy0			! (Y0_2) iy = iy - ival
597	and	ival3,-8,ind3			! (Y3_1) ind  = i & (-8)
598	ld	[%fp+tmp3],%f4			! (Y3_1) (double) iy
599	faddd	%f54,K0,%f24			! (Y3_0) dtmp5 = dtmp4 + K0
600
601	cmp	ival1,MASK_0x007fffff		! (Y1_2) if (ival <= 0x7fffff)
602	lda	[stridex+x1]0x82,ival3		! (Y3_2) ival = *(int*)(x)
603	fmuld	%f48,%f46,%f50			! (Y0_1) dtmp2 = dtmp1 * yy
604	fitod	%f2,%f48			! (Y2_1) yy = (double) iy
605
606	st	iy0,[%fp+tmp1]			! (Y0_2) (double) iy
607	fmuld	%f22,%f44,%f22			! (Y2_0) yy = dtmp5 * yy
608	ble,pn	%icc,.update19			! (Y1_2) if (ival <= 0x7fffff)
609	fsubd	%f40,%f42,%f40			! (Y2_0) ty = ty - ldtmp0
610.cont19:
611	cmp	ival2,MASK_0x7f800000		! (Y2_2) if (ival >= 0x7f800000)
612	and	ival1,MASK_0x007fffff,iy1	! (Y1_2) iy = ival & 0x007fffff
613	bge,pn	%icc,.update20			! (Y2_2) if (ival >= 0x7f800000)
614	fmuld	%f26,%f62,%f42			! (Y1_1) dtmp2 = dtmp1 * yy
615.cont20:
616	cmp	ival2,MASK_0x007fffff		! (Y2_2) if (ival <= 0x7fffff)
617	ldd	[LOGFTBL_P8+ind3],%f16		! (Y3_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
618	ble,pn	%icc,.update21			! (Y2_2) if (ival <= 0x7fffff)
619	fitod	%f4,%f26			! (Y3_1) yy = (double) iy
620.cont21:
621	add	iy1,CONST_0x20000,ival1		! (Y1_2) ival = iy + 0x20000
622	and	ival2,MASK_0x007fffff,iy2	! (Y2_2) iy = ival & 0x007fffff
623	fmuld	%f24,%f60,%f24			! (Y3_0) yy = dtmp5 * yy
624	fsubd	%f56,%f58,%f58			! (Y3_0) ty = ty - ldtmp0
625
626	and	ival1,MASK_0xfffc0000,ival1	! (Y1_2) ival = ival & 0xfffc0000
627	add	iy2,CONST_0x20000,ival2		! (Y2_2) ival = iy + 0x20000
628	fmuld	%f48,%f14,%f44			! (Y2_1) yy = yy * ldtmp1
629	faddd	%f50,K1,%f50			! (Y0_1) dtmp3 = dtmp2 + K1
630
631	sub	iy1,ival1,iy1			! (Y1_2) iy = iy - ival
632	and	ival2,MASK_0xfffc0000,ival2	! (Y2_2) ival = ival & 0xfffc0000
633	fpack32	ZERO,%f8,%f8			! (Y1_1) exp = vis_fpack32(ZERO, exp)
634	faddd	%f22,%f40,%f48			! (Y2_0) yy = yy + ty
635
636	sub	iy2,ival2,iy2			! (Y2_2) iy = iy - ival
637	st	iy1,[%fp+tmp3]			! (Y1_2) (double) iy
638	fmuld	%f26,%f16,%f60			! (Y3_1) yy = yy * ldtmp1
639	faddd	%f42,K1,%f54			! (Y1_1) dtmp3 = dtmp2 + K1
640
641	cmp	ival3,MASK_0x7f800000		! (Y3_2) if (ival >= 0x7f800000)
642	add	y,stridey,y			! y += stridey
643	st	iy2,[%fp+tmp2]			! (Y2_2) (double) iy
644	faddd	%f24,%f58,%f24			! (Y3_0) yy = yy + ty
645
646	add	y,stridey,y			! y += stridey
647	fmuld	K3,%f44,%f22			! (Y2_1) dtmp0 = K3 * yy
648	bge,pn	%icc,.update22			! (Y3_2) if (ival >= 0x7f800000)
649	fitod	%f6,%f40			! (Y0_1)(double)(*(int*)&exp)
650.cont22:
651	cmp	ival3,MASK_0x007fffff		! (Y3_2) if (ival <= 0x7fffff)
652	ldd	[LOGFTBL+ind1],%f58		! (Y1_1) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
653	fmuld	%f50,%f46,%f52			! (Y0_1) dtmp4 = dtmp3 * yy
654	fitod	%f8,%f56			! (Y1_1) (double)(*(int*)&exp)
655
656	ld	[%fp+tmp1],%f2			! (Y0_2) (double) iy
657	fmuld	K3,%f60,%f50			! (Y3_1) dtmp0 = K3 * yy
658	ble,pn	%icc,.update23			! (Y3_2) if (ival <= 0x7fffff)
659	fdtos	%f48,%f4			! (Y2_0) (float)(yy)
660.cont23:
661	subcc	counter,4,counter		! update cycle counter
662	st	%f4,[y]				! (Y2_0) write into memory
663	fmuld	%f54,%f62,%f54			! (Y1_1) dtmp4 = dtmp3 * yy
664	fdtos	%f24,%f4			! (Y3_0)(float)(yy)
665
666	sra	ival0,14,ival0			! (Y0_2) i  = ival >> 14;
667	st	%f4,[stridey+y]			! (Y3_0) write into memory
668	bpos,pt	%icc,.main_loop
669	faddd	%f22,K2,%f48			! (Y2_1) dtmp1 = dtmp0 + K2
670
671.tail:
672	addcc	counter,7,counter
673	add	y,stridey,y			! y += stridey
674	bneg,pn	%icc,.end_loop
675
676	sra	ival2,14,ival2			! (Y2_1) i  = ival >> 14;
677	ldd	[LOGFTBL+ind0],%f42		! (Y0_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
678	fmuld	LN2,%f40,%f40			! (Y0_0) ty = LN2 * (double)(*(int*)&exp)
679	faddd	%f52,K0,%f22			! (Y0_0) dtmp5 = dtmp4 + K0
680
681	sra	ival1,14,ind1			! (Y1_1) i  = ival >> 14;
682	ld	[%fp+tmp3],%f4			! (Y1_1) (double) iy
683	fpack32	ZERO,%f10,%f18			! (Y2_0) exp = vis_fpack32(ZERO, exp)
684	faddd	%f50,K2,%f26			! (Y3_0) dtmp1 = dtmp0 + K2
685
686	and	ival0,-8,ind0			! (Y0_1) ind  = i & (-8)
687	lda	[x0]0x82,%f6			! (Y0_1) *(float*)&exp = *(float*)(x)
688	fmuld	LN2,%f56,%f56			! (Y1_0) LN2 * (double)(*(int*)&exp)
689	faddd	%f54,K0,%f24			! (Y1_0) dtmp5 = dtmp4 + K0
690
691	and	ind1,-8,ind1			! (Y1_1) ind  = i & (-8)
692	ldd	[LOGFTBL_P8+ind0],%f14		! (Y0_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
693	fmuld	%f48,%f44,%f50			! (Y2_0) dtmp2 = dtmp1 * yy
694	fitod	%f2,%f48			! (Y0_1) yy = (double) iy
695
696	and	ival3,MASK_0x007fffff,ival1	! (Y3_1) iy = ival & 0x007fffff
697	lda	[stridex+x0]0x82,%f8		! (Y1_1) *(float*)&exp = *(float*)(x)
698	fmuld	%f22,%f46,%f22			! (Y0_0) yy = dtmp5 * yy
699	fsubd	%f40,%f42,%f40			! (Y0_0) ty = ty - ldtmp0
700
701	add	iy3,CONST_0x20000,ival3		! (Y3_1) iy + 0x20000
702	ldd	[LOGFTBL_P8+ind1],%f16		! (Y1_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
703	fmuld	%f26,%f60,%f42			! (Y3_0) dtmp2 = dtmp1 * yy
704	fitod	%f4,%f26			! (Y1_1) yy = (double) iy
705
706	and	ival3,MASK_0xfffc0000,ival3	! (Y3_1) ival = ival & 0xfffc0000
707	lda	[x1]0x82,%f10			! (Y2_1) *(float*)&exp = *(float*)(x)
708	fmuld	%f24,%f62,%f24			! (Y1_0) yy = dtmp5 * yy
709	fsubd	%f56,%f58,%f58			! (Y1_0) ty = ty - ldtmp0
710
711	sub	iy3,ival3,iy3			! (Y3_1) iy = iy - ival
712	ld	[%fp+tmp2],%f2			! (Y2_1) (double) iy
713	fmuld	%f48,%f14,%f46			! (Y0_1) yy = yy * ldtmp1
714	faddd	%f50,K1,%f50			! (Y2_0) dtmp3 = dtmp2 + K1
715
716	add	x1,stridex2,x0			! x += 2*stridex
717	st	iy3,[%fp+tmp3]			! (Y3_1) (double) iy
718	fpack32	ZERO,%f12,%f20			! (Y3_0) exp = vis_fpack32(ZERO, exp)
719	faddd	%f22,%f40,%f48			! (Y0_0) yy = yy + ty
720
721	lda	[stridex+x1]0x82,%f12		! (Y3_1) *(float*)&exp = *(float*)(x)
722	fmuld	%f26,%f16,%f62			! (Y1_1) yy = yy * ldtmp1
723	faddd	%f42,K1,%f54			! (Y3_0) dtmp3 = dtmp2 + K1
724
725	sra	ival3,14,ival3			! (Y3_1) i  = ival >> 14;
726	add	y,stridey,y			! y += stridey
727	faddd	%f24,%f58,%f24			! (Y1_0) yy = yy + ty
728
729	subcc	counter,1,counter
730	ldd	[LOGFTBL+ind2],%f42		! (Y2_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
731	fmuld	K3,%f46,%f22			! (Y0_1) dtmp0 = K3 * yy
732	fitod	%f18,%f40			! (Y2_0) (double)(*(int*)&exp)
733
734	and	ival2,-8,ind2			! (Y2_1) ind  = i & (-8)
735	fmuld	%f50,%f44,%f52			! (Y2_0) dtmp4 = dtmp3 * yy
736	fitod	%f20,%f56			! (Y3_0) (double)(*(int*)&exp)
737
738	fmuld	K3,%f62,%f50			! (Y1_1) dtmp0 = K3 * yy
739	fdtos	%f48,%f4			! (Y0_0) (float)(yy)
740
741	st	%f4,[y]				! (Y0_0) write into memory
742	fmuld	%f54,%f60,%f54			! (Y3_0) dtmp4 = dtmp3 * yy
743	bneg,pn	%icc,.end_loop
744	fdtos	%f24,%f4			! (Y1_0) (float)(yy)
745
746	add	y,stridey,y			! y += stridey
747	subcc	counter,1,counter
748	ldd	[LOGFTBL+ind3],%f58		! (Y3_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
749	faddd	%f22,K2,%f48			! (Y0_1) dtmp1 = dtmp0 + K2
750
751	st	%f4,[y]				! (Y1_0) write into memory
752	bneg,pn	%icc,.end_loop
753	fmuld	LN2,%f40,%f40			! (Y2_0) ty = LN2 * (double)(*(int*)&exp)
754
755	ldd	[LOGFTBL_P8+ind2],%f14		! (Y2_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8);
756	faddd	%f52,K0,%f22			! (Y2_0) dtmp5 = dtmp4 + K0
757	fpack32	ZERO,%f6,%f6			! (Y0_1) exp = vis_fpack32(ZERO, exp)
758
759	faddd	%f50,K2,%f26			! (Y1_1) dtmp1 = dtmp0 + K2
760	fmuld	LN2,%f56,%f56			! (Y3_0) ty = LN2 * (double)(*(int*)&exp)
761
762	and	ival3,-8,ind3			! (Y3_1) ind  = i & (-8)
763	ld	[%fp+tmp3],%f4			! (Y3_1) (double) iy
764	faddd	%f54,K0,%f24			! (Y3_0) dtmp5 = dtmp4 + K0
765
766	fmuld	%f48,%f46,%f50			! (Y0_1) dtmp2 = dtmp1 * yy
767	fitod	%f2,%f48			! (Y2_1) yy = (double) iy
768
769	fmuld	%f22,%f44,%f22			! (Y2_0) yy = dtmp5 * yy
770	fsubd	%f40,%f42,%f40			! (Y2_0) ty = ty - ldtmp0
771
772	fmuld	%f26,%f62,%f42			! (Y1_1) dtmp2 = dtmp1 * yy
773
774	ldd	[LOGFTBL_P8+ind3],%f16		! (Y3_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
775	fitod	%f4,%f26			! (Y3_1) yy = (double) iy
776
777	fmuld	%f24,%f60,%f24			! (Y3_0) yy = dtmp5 * yy
778	fsubd	%f56,%f58,%f58			! (Y3_0) ty = ty - ldtmp0
779
780	fmuld	%f48,%f14,%f44			! (Y2_1) yy = yy * ldtmp1
781	faddd	%f50,K1,%f50			! (Y0_1) dtmp3 = dtmp2 + K1
782
783	fpack32	ZERO,%f8,%f8			! (Y1_1) exp = vis_fpack32(ZERO, exp)
784	faddd	%f22,%f40,%f48			! (Y2_0) yy = yy + ty
785
786	fmuld	%f26,%f16,%f60			! (Y3_1) yy = yy * ldtmp1
787	faddd	%f42,K1,%f54			! (Y1_1) dtmp3 = dtmp2 + K1
788
789	add	y,stridey,y			! y += stridey
790	faddd	%f24,%f58,%f24			! (Y3_0) yy = yy + ty
791
792	subcc	counter,1,counter
793	fmuld	K3,%f44,%f22			! (Y2_1) dtmp0 = K3 * yy
794	fitod	%f6,%f40			! (Y0_1)(double)(*(int*)&exp)
795
796	ldd	[LOGFTBL+ind1],%f58		! (Y1_1) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
797	fmuld	%f50,%f46,%f52			! (Y0_1) dtmp4 = dtmp3 * yy
798	fitod	%f8,%f56			! (Y1_1) (double)(*(int*)&exp)
799
800	fmuld	K3,%f60,%f50			! (Y3_1) dtmp0 = K3 * yy
801	fdtos	%f48,%f4			! (Y2_0) (float)(yy)
802
803	st	%f4,[y]				! (Y2_0) write into memory
804	fmuld	%f54,%f62,%f54			! (Y1_1) dtmp4 = dtmp3 * yy
805	bneg,pn	%icc,.end_loop
806	fdtos	%f24,%f4			! (Y3_0)(float)(yy)
807
808	subcc	counter,1,counter		! update cycle counter
809	add	y,stridey,y
810
811	st	%f4,[y]				! (Y3_0) write into memory
812	bneg,pn	%icc,.end_loop
813	faddd	%f22,K2,%f48			! (Y2_1) dtmp1 = dtmp0 + K2
814
815	ldd	[LOGFTBL+ind0],%f42		! (Y0_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
816	fmuld	LN2,%f40,%f40			! (Y0_0) ty = LN2 * (double)(*(int*)&exp)
817	faddd	%f52,K0,%f22			! (Y0_0) dtmp5 = dtmp4 + K0
818
819	fpack32	ZERO,%f10,%f18			! (Y2_0) exp = vis_fpack32(ZERO, exp)
820
821	fmuld	LN2,%f56,%f56			! (Y1_0) LN2 * (double)(*(int*)&exp)
822	faddd	%f54,K0,%f24			! (Y1_0) dtmp5 = dtmp4 + K0
823
824	fmuld	%f48,%f44,%f50			! (Y2_0) dtmp2 = dtmp1 * yy
825
826	fmuld	%f22,%f46,%f22			! (Y0_0) yy = dtmp5 * yy
827	fsubd	%f40,%f42,%f40			! (Y0_0) ty = ty - ldtmp0
828
829	fmuld	%f24,%f62,%f24			! (Y1_0) yy = dtmp5 * yy
830	fsubd	%f56,%f58,%f58			! (Y1_0) ty = ty - ldtmp0
831
832	subcc	counter,1,counter
833	faddd	%f50,K1,%f50			! (Y2_0) dtmp3 = dtmp2 + K1
834
835	faddd	%f22,%f40,%f48			! (Y0_0) yy = yy + ty
836
837	add	y,stridey,y			! y += stridey
838	faddd	%f24,%f58,%f24			! (Y1_0) yy = yy + ty
839
840	ldd	[LOGFTBL+ind2],%f42		! (Y2_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
841	fitod	%f18,%f40			! (Y2_0) (double)(*(int*)&exp)
842
843	fmuld	%f50,%f44,%f52			! (Y2_0) dtmp4 = dtmp3 * yy
844
845	fdtos	%f48,%f4			! (Y0_0) (float)(yy)
846
847	st	%f4,[y]				! (Y0_0) write into memory
848	bneg,pn	%icc,.end_loop
849	fdtos	%f24,%f4			! (Y1_0) (float)(yy)
850
851	add	y,stridey,y			! y += stridey
852	subcc	counter,1,counter
853	st	%f4,[y]				! (Y1_0) write into memory
854	bneg,pn	%icc,.end_loop
855	fmuld	LN2,%f40,%f40			! (Y2_0) ty = LN2 * (double)(*(int*)&exp)
856
857	faddd	%f52,K0,%f22			! (Y2_0) dtmp5 = dtmp4 + K0
858
859	fmuld	%f22,%f44,%f22			! (Y2_0) yy = dtmp5 * yy
860	fsubd	%f40,%f42,%f40			! (Y2_0) ty = ty - ldtmp0
861
862	add	y,stridey,y			! y += stridey
863	faddd	%f22,%f40,%f48			! (Y2_0) yy = yy + ty
864
865	fdtos	%f48,%f4			! (Y2_0) (float)(yy)
866
867	st	%f4,[y]				! (Y2_0) write into memory
868.end_loop:
869	ba	.begin
870	nop
871
872.end:
873	ret
874	restore	%g0,0,%o0
875
876	.align	16
877.update2:
878	cmp	counter,0
879	ble	.cont2
880	nop
881
882	add	x0,stridex,x0
883	stx	x0,[%fp+tmp5]
884	sub	x0,stridex,x0
885	st	counter,[%fp+tmp0]
886	or	%g0,0,counter
887	ba	.cont2
888	nop
889
890	.align	16
891.update3:
892	cmp	counter,0
893	ble	.cont3
894	nop
895
896	add	x0,stridex,x0
897	stx	x0,[%fp+tmp5]
898	sub	x0,stridex,x0
899	st	counter,[%fp+tmp0]
900	or	%g0,0,counter
901	ba	.cont3
902	nop
903
904	.align	16
905.update4:
906	cmp	counter,1
907	ble	.cont4
908	nop
909
910	stx	x1,[%fp+tmp5]
911	sub	counter,1,counter
912	st	counter,[%fp+tmp0]
913	or	%g0,1,counter
914	ba	.cont4
915	nop
916
917	.align	16
918.update5:
919	cmp	counter,1
920	ble	.cont5
921	nop
922
923	stx	x1,[%fp+tmp5]
924	sub	counter,1,counter
925	st	counter,[%fp+tmp0]
926	or	%g0,1,counter
927	ba	.cont5
928	nop
929
930	.align	16
931.update6:
932	cmp	counter,2
933	ble	.cont6
934	nop
935
936	add	x1,stridex,x1
937	stx	x1,[%fp+tmp5]
938	sub	x1,stridex,x1
939	sub	counter,2,counter
940	st	counter,[%fp+tmp0]
941	or	%g0,2,counter
942	ba	.cont6
943	nop
944
945	.align	16
946.update7:
947	cmp	counter,2
948	ble	.cont7
949	nop
950
951	add	x1,stridex,x1
952	stx	x1,[%fp+tmp5]
953	sub	x1,stridex,x1
954	sub	counter,2,counter
955	st	counter,[%fp+tmp0]
956	or	%g0,2,counter
957	ba	.cont7
958	nop
959
960	.align	16
961.update8:
962	cmp	counter,3
963	ble	.cont8
964	nop
965
966	stx	x0,[%fp+tmp5]
967	sub	counter,3,counter
968	st	counter,[%fp+tmp0]
969	or	%g0,3,counter
970	ba	.cont8
971	nop
972
973	.align	16
974.update9:
975	cmp	counter,3
976	ble	.cont9
977	nop
978
979	stx	x0,[%fp+tmp5]
980	sub	counter,3,counter
981	st	counter,[%fp+tmp0]
982	or	%g0,3,counter
983	ba	.cont9
984	nop
985
986	.align	16
987.update10:
988	cmp	counter,4
989	ble	.cont10
990	nop
991
992	add	x0,stridex,x0
993	stx	x0,[%fp+tmp5]
994	sub	x0, stridex, x0
995	sub	counter,4,counter
996	st	counter,[%fp+tmp0]
997	or	%g0,4,counter
998	ba	.cont10
999	nop
1000
1001	.align	16
1002.update11:
1003	cmp	counter,4
1004	ble	.cont11
1005	nop
1006
1007	add	x0,stridex,x0
1008	stx	x0,[%fp+tmp5]
1009	sub	x0,stridex,x0
1010	sub	counter,4,counter
1011	st	counter,[%fp+tmp0]
1012	or	%g0,4,counter
1013	ba	.cont11
1014	nop
1015
1016	.align	16
1017.update12:
1018	cmp	counter,5
1019	ble	.cont12
1020	nop
1021
1022	stx	x1,[%fp+tmp5]
1023	sub	counter,5,counter
1024	st	counter,[%fp+tmp0]
1025	or	%g0,5,counter
1026	ba	.cont12
1027	nop
1028
1029	.align	16
1030.update13:
1031	cmp	counter,5
1032	ble	.cont13
1033	nop
1034
1035	stx	x1,[%fp+tmp5]
1036	sub	counter,5,counter
1037	st	counter,[%fp+tmp0]
1038	or	%g0,5,counter
1039	ba	.cont13
1040	nop
1041
1042	.align	16
1043.update14:
1044	cmp	counter,6
1045	ble	.cont14
1046	nop
1047
1048	add	x1,stridex,x1
1049	stx	x1,[%fp+tmp5]
1050	sub	x1, stridex, x1
1051	sub	counter,6,counter
1052	st	counter,[%fp+tmp0]
1053	or	%g0,6,counter
1054	ba 	.cont14
1055	nop
1056
1057	.align	16
1058.update15:
1059	cmp	counter,6
1060	ble	.cont15
1061	nop
1062
1063	add	x1,stridex,x1
1064	stx	x1,[%fp+tmp5]
1065	sub	x1, stridex, x1
1066	sub	counter,6,counter
1067	st	counter,[%fp+tmp0]
1068	or	%g0,6,counter
1069	ba	.cont15
1070	nop
1071
1072	.align	16
1073.update16:
1074	cmp	counter,0
1075	ble,pt	%icc, .cont16
1076	nop
1077
1078	stx	x0,[%fp+tmp5]
1079	st	counter,[%fp+tmp0]
1080	or	%g0,0,counter
1081	ba	.cont16
1082	nop
1083
1084	.align	16
1085.update17:
1086	cmp	counter,0
1087	ble,pt	%icc, .cont17
1088	nop
1089
1090	stx	x0,[%fp+tmp5]
1091	st	counter,[%fp+tmp0]
1092	or	%g0,0,counter
1093	ba	.cont17
1094	nop
1095
1096	.align	16
1097.update18:
1098	cmp	counter,1
1099	ble,pt	%icc, .cont18
1100	nop
1101
1102	add	x0,stridex,x0
1103	stx	x0,[%fp+tmp5]
1104	sub	x0,stridex,x0
1105	sub	counter,1,counter
1106	st	counter,[%fp+tmp0]
1107	or	%g0,1,counter
1108	ba	.cont18
1109	nop
1110
1111	.align	16
1112.update19:
1113	cmp	counter,1
1114	ble,pt	%icc, .cont19
1115	nop
1116
1117	add	x0,stridex,x0
1118	sub	counter,1,counter
1119	stx	x0,[%fp+tmp5]
1120	sub	x0, stridex, x0
1121	st	counter,[%fp+tmp0]
1122	or	%g0,1,counter
1123	ba	.cont19
1124	nop
1125
1126	.align	16
1127.update20:
1128	cmp	counter,2
1129	ble,pt	%icc, .cont20
1130	nop
1131
1132	stx	x1,[%fp+tmp5]
1133	sub	counter,2,counter
1134	st	counter,[%fp+tmp0]
1135	or	%g0,2,counter
1136	ba	.cont20
1137	nop
1138
1139	.align	16
1140.update21:
1141	cmp	counter,2
1142	ble,pt	%icc, .cont21
1143	nop
1144
1145	stx x1,[%fp+tmp5]
1146	sub	counter, 2, counter
1147	st	counter,[%fp+tmp0]
1148	or	%g0,2,counter
1149	ba	.cont21
1150	nop
1151
1152	.align	16
1153.update22:
1154	cmp	counter,3
1155	ble,pt	%icc, .cont22
1156	nop
1157
1158	add	x1,stridex,x1
1159	stx	x1,[%fp+tmp5]
1160	sub	x1,stridex,x1
1161	sub	counter,3,counter
1162	st	counter,[%fp+tmp0]
1163	or	%g0,3,counter
1164	ba	.cont22
1165	nop
1166
1167	.align	16
1168.update23:
1169	cmp	counter,3
1170	ble,pt	%icc, .cont23
1171	nop
1172
1173	add	x1,stridex,x1
1174	stx	x1,[%fp+tmp5]
1175	sub	x1,stridex,x1
1176	sub	counter,3,counter
1177	st	counter,[%fp+tmp0]
1178	or	%g0,3,counter
1179	ba	.cont23
1180	nop
1181
1182	.align	16
1183.spec:
1184	or	%g0,1,ind3			! ind3 = 1
1185	sll	ind3,31,ind3			! ind3 = 0x8000000
1186	add	x0,stridex,x0			! x += stridex
1187	sub	ind3,1,ind3			! ind3 = 0x7ffffff
1188	add	y,stridey,y			! y += stridey
1189	and	ival0,ind3,iy0			! ival & 0x7fffffff
1190	cmp	iy0,MASK_0x7f800000		! if ((ival & 0x7fffffff) >= 0x7f800000)
1191	bge,pn	%icc, .spec0			! if ((ival & 0x7fffffff) >= 0x7f800000)
1192	st	ival0,[%fp+tmp1]
1193	cmp	ival0,0				! if (ival <= 0)
1194	ble,pn	%icc,.spec1			! if (ival <= 0)
1195	nop
1196
1197	ld	[%fp+tmp1],%f12
1198	fitos	%f12,%f14			! value = (float) ival
1199	st	%f14,[%fp+tmp2]			! ival = *(int*) &value
1200	ld	[%fp+tmp2],ival0		! ival = *(int*) &value
1201
1202	and	ival0,MASK_0x007fffff,iy0	!  iy = ival & 0x007fffff
1203	sra	ival0,23,ival2			!  iexp = ival >> 23
1204
1205	add	iy0,CONST_0x20000,ival0		!  ival = iy + 0x20000
1206	sub	ival2,149,ival2			!  iexp = iexp - 149
1207
1208	and	ival0,MASK_0xfffc0000,ival0	!  ival = ival & 0xfffc0000
1209	st 	ival2,[%fp+tmp2]		!  (double) iexp
1210
1211	sub	iy0,ival0,iy0			!  iy = iy - ival
1212
1213	sra	ival0,14,ival0			!  i  = ival >> 14;
1214	st	iy0,[%fp+tmp1]			!  (double) iy
1215
1216	and	ival0,-8,ind0			!  ind  = i & (-8)
1217	ld	[%fp+tmp1],%f2			!  (double) iy
1218
1219	ldd	[LOGFTBL_P8+ind0],%f14		!  ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
1220	fitod	%f2,%f48			!  yy = (double) iy
1221
1222	fmuld	%f48,%f14,%f46			!  yy = yy * ldtmp1
1223
1224	ld	[%fp+tmp2],%f6			!  (double) iexp
1225	fmuld	K3,%f46,%f22			!  dtmp0 = K3 * yy
1226
1227	ldd	[LOGFTBL+ind0],%f42		!  ldtmp0 = *(double*)((char*)CONST_TBL+ind)
1228	faddd	%f22,K2,%f48			!  dtmp1 = dtmp0 + K2
1229
1230	fmuld	%f48,%f46,%f50			!  dtmp2 = dtmp1 * yy
1231
1232	faddd	%f50,K1,%f50			!  dtmp3 = dtmp2 + K1
1233
1234	fitod	%f6,%f40			!  (double) iexp
1235	fmuld	%f50,%f46,%f52			!  dtmp4 = dtmp3 * yy
1236
1237	fmuld	LN2,%f40,%f40			!  ty = LN2 * (double) iexp
1238	faddd	%f52,K0,%f22			!  dtmp5 = dtmp4 + K0
1239
1240	fmuld	%f22,%f46,%f22			!  yy = dtmp5 * yy
1241	fsubd	%f40,%f42,%f40			!  ty = ty - ldtmp0
1242
1243	faddd	%f22,%f40,%f48			!  yy = yy + ty
1244
1245	fdtos	%f48,%f4			!  (float)(yy)
1246
1247	ba	.begin1
1248	st	%f4,[y]				! write into memory
1249
1250	.align	16
1251.spec0:
1252	ld	[%fp+tmp1],%f12			! value = *(float*) &ival
1253	fzeros	%f2				! y[0] = (value < 0.0f?
1254	fcmps	%fcc0,%f12,%f2			!   0.0f : value) * value
1255	fmovsug	%fcc0,%f12,%f2
1256	fmuls	%f12,%f2,%f2
1257	ba	.begin1
1258	st	%f2,[y]				! write into memory
1259
1260	.align	16
1261.spec1:
1262	cmp	iy0,0				! if ((ival & 0x7fffffff) == 0)
1263	bne,pn	%icc,.spec2			! if ((ival & 0x7fffffff) == 0)
1264	nop
1265	ld	[LOGFTBL+568],%f4
1266	fdivs	%f4,ZERO,%f6			! y[0] = -1.0f / 0f
1267	ba	.begin1
1268	st	%f6,[y]				! write into memory
1269
1270	.align	16
1271.spec2:
1272	fdivs	ZERO,ZERO,%f6			! y[0] = 0f / 0f
1273	ba	.begin1
1274	st	%f6,[y]				! write into memory
1275
1276	SET_SIZE(__vlogf)
1277
1278