xref: /illumos-gate/usr/src/lib/libmvec/common/vis/__vlog.S (revision 20a7641f9918de8574b8b3b47dbe35c4bfc78df1)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23 */
24/*
25 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26 * Use is subject to license terms.
27 */
28
29	.file	"__vlog.S"
30
31#include "libm.h"
32
33	RO_DATA
34	.align	32
35TBL:
36	.word	0xbfd522ae, 0x0738a000
37	.word	0xbd2ebe70, 0x8164c759
38	.word	0xbfd3c252, 0x77333000
39	.word	0xbd183b54, 0xb606bd5c
40	.word	0xbfd26962, 0x1134e000
41	.word	0x3d31b61f, 0x10522625
42	.word	0xbfd1178e, 0x8227e000
43	.word	0xbd31ef78, 0xce2d07f2
44	.word	0xbfcf991c, 0x6cb3c000
45	.word	0x3d390d04, 0xcd7cc834
46	.word	0xbfcd1037, 0xf2656000
47	.word	0x3d084a7e, 0x75b6f6e4
48	.word	0xbfca93ed, 0x3c8ae000
49	.word	0x3d287243, 0x50562169
50	.word	0xbfc823c1, 0x6551a000
51	.word	0xbd1e0ddb, 0x9a631e83
52	.word	0xbfc5bf40, 0x6b544000
53	.word	0x3d127023, 0xeb68981c
54	.word	0xbfc365fc, 0xb015a000
55	.word	0x3d3fd3a0, 0xafb9691b
56	.word	0xbfc1178e, 0x8227e000
57	.word	0xbd21ef78, 0xce2d07f2
58	.word	0xbfbda727, 0x63844000
59	.word	0xbd1a8940, 0x1fa71733
60	.word	0xbfb9335e, 0x5d594000
61	.word	0xbd23115c, 0x3abd47da
62	.word	0xbfb4d311, 0x5d208000
63	.word	0x3cf53a25, 0x82f4e1ef
64	.word	0xbfb08598, 0xb59e4000
65	.word	0x3d17e5dd, 0x7009902c
66	.word	0xbfa894aa, 0x149f8000
67	.word	0xbd39a19a, 0x8be97661
68	.word	0xbfa0415d, 0x89e78000
69	.word	0x3d3dddc7, 0xf461c516
70	.word	0xbf902056, 0x58930000
71	.word	0xbd3611d2, 0x7c8e8417
72	.word	0x00000000, 0x00000000
73	.word	0x00000000, 0x00000000
74	.word	0x3f9f829b, 0x0e780000
75	.word	0x3d298026, 0x7c7e09e4
76	.word	0x3faf0a30, 0xc0110000
77	.word	0x3d48a998, 0x5f325c5c
78	.word	0x3fb6f0d2, 0x8ae58000
79	.word	0xbd34b464, 0x1b664613
80	.word	0x3fbe2707, 0x6e2b0000
81	.word	0xbd2a342c, 0x2af0003c
82	.word	0x3fc29552, 0xf8200000
83	.word	0xbd35b967, 0xf4471dfc
84	.word	0x3fc5ff30, 0x70a78000
85	.word	0x3d43d3c8, 0x73e20a07
86	.word	0x3fc9525a, 0x9cf44000
87	.word	0x3d46b476, 0x41307539
88	.word	0x3fcc8ff7, 0xc79a8000
89	.word	0x3d4a21ac, 0x25d81ef3
90	.word	0x3fcfb918, 0x6d5e4000
91	.word	0xbd0d572a, 0xab993c87
92	.word	0x3fd1675c, 0xababa000
93	.word	0x3d38380e, 0x731f55c4
94	.word	0x3fd2e8e2, 0xbae12000
95	.word	0xbd267b1e, 0x99b72bd8
96	.word	0x3fd4618b, 0xc21c6000
97	.word	0xbd13d82f, 0x484c84cc
98	.word	0x3fd5d1bd, 0xbf580000
99	.word	0x3d4394a1, 0x1b1c1ee4
100! constants:
101	.word	0x40000000,0x00000000
102	.word	0x3fe55555,0x555571da
103	.word	0x3fd99999,0x8702be3a
104	.word	0x3fd24af7,0x3f4569b1
105	.word	0x3ea62e42,0xfee00000	! scaled by 2**-20
106	.word	0x3caa39ef,0x35793c76	! scaled by 2**-20
107	.word	0xffff8000,0x00000000
108	.word	0x43200000
109	.word	0xfff00000
110	.word	0xc0194000
111	.word	0x4000
112
113#define two		0x200
114#define A1		0x208
115#define A2		0x210
116#define A3		0x218
117#define ln2hi		0x220
118#define ln2lo		0x228
119#define mask		0x230
120#define ox43200000	0x238
121#define oxfff00000	0x23c
122#define oxc0194000	0x240
123#define ox4000		0x244
124
125! local storage indices
126
127#define jnk		STACK_BIAS-0x8
128#define tmp2		STACK_BIAS-0x10
129#define tmp1		STACK_BIAS-0x18
130#define tmp0		STACK_BIAS-0x20
131! sizeof temp storage - must be a multiple of 16 for V9
132#define tmps		0x20
133
134! register use
135
136! i0  n
137! i1  x
138! i2  stridex
139! i3  y
140! i4  stridey
141! i5
142
143! g1  TBL
144
145! l0  j0
146! l1  j1
147! l2  j2
148! l3
149! l4  0x94000
150! l5
151! l6  0x000fffff
152! l7  0x7ff00000
153
154! o0  py0
155! o1  py1
156! o2  py2
157! o3
158! o4
159! o5
160! o7
161
162! f0  u0,q0
163! f2  v0,(two-v0)-u0,z0
164! f4  n0,f0,q0
165! f6  s0
166! f8  q
167! f10 u1,q1
168! f12 v1,(two-v1)-u1,z1
169! f14 n1,f1,q1
170! f16 s1
171! f18 t
172! f20 u2,q2
173! f22 v2,(two-v2)-u2,q2
174! f24 n2,f2,q2
175! f26 s2
176! f28 0xfff00000
177! f29 0x43200000
178! f30 0x4000
179! f31 0xc0194000
180! f32 t0
181! f34 h0,f0-(c0-h0)
182! f36 c0
183! f38 A1
184! f40 two
185! f42 t1
186! f44 h1,f1-(c1-h1)
187! f46 c1
188! f48 A2
189! f50 0xffff8000...
190! f52 t2
191! f54 h2,f2-(c2-h2)
192! f56 c2
193! f58 A3
194! f60 ln2hi
195! f62 ln2lo
196
197	ENTRY(__vlog)
198	save	%sp,-SA(MINFRAME)-tmps,%sp
199	PIC_SETUP(l7)
200	PIC_SET(l7,TBL,o0)
201	mov	%o0,%g1
202	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
203	sethi	%hi(0x94000),%l4
204	sethi	%hi(0x000fffff),%l6
205	or	%l6,%lo(0x000fffff),%l6
206	sethi	%hi(0x7ff00000),%l7
207	ldd	[%g1+two],%f40
208	ldd	[%g1+A1],%f38
209	ldd	[%g1+A2],%f48
210	ldd	[%g1+A3],%f58
211	ldd	[%g1+ln2hi],%f60
212	ldd	[%g1+ln2lo],%f62
213	ldd	[%g1+mask],%f50
214	ld	[%g1+ox43200000],%f29
215	ld	[%g1+oxfff00000],%f28
216	ld	[%g1+oxc0194000],%f31
217	ld	[%g1+ox4000],%f30
218	sll	%i2,3,%i2		! scale strides
219	sll	%i4,3,%i4
220	add	%fp,jnk,%o0		! precondition loop
221	add	%fp,jnk,%o1
222	add	%fp,jnk,%o2
223	fzero	%f2
224	fzero	%f6
225	fzero	%f18
226	fzero	%f36
227	fzero	%f12
228	fzero	%f14
229	fzero	%f16
230	fzero	%f42
231	fzero	%f44
232	fzero	%f46
233	std	%f46,[%fp+tmp1]
234	fzero	%f24
235	fzero	%f26
236	fzero	%f52
237	fzero	%f54
238	std	%f54,[%fp+tmp2]
239	sub	%i3,%i4,%i3
240	ld	[%i1],%l0		! ix
241	ld	[%i1],%f0		! u.l[0] = *x
242	ba	.loop0
243	ld	[%i1+4],%f1		! u.l[1] = *(1+x)
244
245	.align	16
246! -- 16 byte aligned
247.loop0:
248	sub	%l0,%l7,%o3
249	sub	%l6,%l0,%o4
250	fpadd32s %f0,%f31,%f4		! n = (ix + 0xc0194000) & 0xfff00000
251	fmuld	%f6,%f2,%f8		! (previous iteration)
252
253	andcc	%o3,%o4,%o4
254	bge,pn	%icc,.range0		! ix <= 0x000fffff or >= 0x7ff00000
255! delay slot
256	fands	%f4,%f28,%f4
257
258	add	%i1,%i2,%i1		! x += stridex
259	add	%i3,%i4,%i3		! y += stridey
260	fpsub32s %f0,%f4,%f0		! u.l[0] -= n
261
262.cont0:
263	lda	[%i1]%asi,%l1		! preload next argument
264	add	%l0,%l4,%l0		! j = ix + 0x94000
265	fpadd32s %f0,%f30,%f2		! v.l[0] = u.l[0] + 0x4000
266
267	lda	[%i1]%asi,%f10
268	srl	%l0,11,%l0		! j = (j >> 11) & 0x1f0
269	fand	%f2,%f50,%f2		! v.l &= 0xffff8000...
270
271	lda	[%i1+4]%asi,%f11
272	and	%l0,0x1f0,%l0
273	fitod	%f4,%f32		! (double) n
274
275	add	%l0,8,%l3
276	fsubd	%f0,%f2,%f4		! f = u.d - v.d
277
278	faddd	%f0,%f2,%f6		! s = f / (u.d + v.d)
279
280	fsubd	%f40,%f2,%f2		! two - v.d
281	fmuld	%f32,%f60,%f34		! h = n * ln2hi + TBL[j]
282
283	faddd	%f8,%f18,%f8		! y = c + (t + q)
284	fmuld	%f32,%f62,%f32		! t = n * ln2lo + TBL[j+1]
285
286	fdivd	%f4,%f6,%f6
287
288	faddd	%f54,%f24,%f56		! c = h + f
289	fmuld	%f26,%f26,%f22		! z = s * s
290
291	faddd	%f8,%f36,%f8
292	st	%f8,[%o0]
293
294	st	%f9,[%o0+4]
295	mov	%i3,%o0
296	faddd	%f14,%f38,%f14
297
298	fsubd	%f56,%f54,%f54		! t += f - (c - h)
299	fmuld	%f22,%f58,%f20		! q = ...
300
301	fsubd	%f2,%f0,%f2		! (two - v.d) - u.d
302	ldd	[%g1+%l0],%f36
303
304	faddd	%f42,%f44,%f18
305	fmuld	%f12,%f14,%f14
306	ldd	[%fp+tmp1],%f12
307
308	faddd	%f20,%f48,%f20
309	nop
310
311	faddd	%f34,%f36,%f34
312	ldd	[%g1+%l3],%f0
313
314	faddd	%f14,%f12,%f12
315
316	fsubd	%f24,%f54,%f54
317	fmuld	%f22,%f20,%f24
318
319	std	%f2,[%fp+tmp0]
320	addcc	%i0,-1,%i0
321	ble,pn	%icc,.endloop0
322! delay slot
323	faddd	%f32,%f0,%f32
324
325! -- 16 byte aligned
326.loop1:
327	sub	%l1,%l7,%o3
328	sub	%l6,%l1,%o4
329	fpadd32s %f10,%f31,%f14		! n = (ix + 0xc0194000) & 0xfff00000
330	fmuld	%f16,%f12,%f8		! (previous iteration)
331
332	andcc	%o3,%o4,%o4
333	bge,pn	%icc,.range1		! ix <= 0x000fffff or >= 0x7ff00000
334! delay slot
335	fands	%f14,%f28,%f14
336
337	add	%i1,%i2,%i1		! x += stridex
338	add	%i3,%i4,%i3		! y += stridey
339	fpsub32s %f10,%f14,%f10		! u.l[0] -= n
340
341.cont1:
342	lda	[%i1]%asi,%l2		! preload next argument
343	add	%l1,%l4,%l1		! j = ix + 0x94000
344	fpadd32s %f10,%f30,%f12		! v.l[0] = u.l[0] + 0x4000
345
346	lda	[%i1]%asi,%f20
347	srl	%l1,11,%l1		! j = (j >> 11) & 0x1f0
348	fand	%f12,%f50,%f12		! v.l &= 0xffff8000...
349
350	lda	[%i1+4]%asi,%f21
351	and	%l1,0x1f0,%l1
352	fitod	%f14,%f42		! (double) n
353
354	add	%l1,8,%l3
355	fsubd	%f10,%f12,%f14		! f = u.d - v.d
356
357	faddd	%f10,%f12,%f16		! s = f / (u.d + v.d)
358
359	fsubd	%f40,%f12,%f12		! two - v.d
360	fmuld	%f42,%f60,%f44		! h = n * ln2hi + TBL[j]
361
362	faddd	%f8,%f18,%f8		! y = c + (t + q)
363	fmuld	%f42,%f62,%f42		! t = n * ln2lo + TBL[j+1]
364
365	fdivd	%f14,%f16,%f16
366
367	faddd	%f34,%f4,%f36		! c = h + f
368	fmuld	%f6,%f6,%f2		! z = s * s
369
370	faddd	%f8,%f46,%f8
371	st	%f8,[%o1]
372
373	st	%f9,[%o1+4]
374	mov	%i3,%o1
375	faddd	%f24,%f38,%f24
376
377	fsubd	%f36,%f34,%f34		! t += f - (c - h)
378	fmuld	%f2,%f58,%f0		! q = ...
379
380	fsubd	%f12,%f10,%f12		! (two - v.d) - u.d
381	ldd	[%g1+%l1],%f46
382
383	faddd	%f52,%f54,%f18
384	fmuld	%f22,%f24,%f24
385	ldd	[%fp+tmp2],%f22
386
387	faddd	%f0,%f48,%f0
388	nop
389
390	faddd	%f44,%f46,%f44
391	ldd	[%g1+%l3],%f10
392
393	faddd	%f24,%f22,%f22
394
395	fsubd	%f4,%f34,%f34
396	fmuld	%f2,%f0,%f4
397
398	std	%f12,[%fp+tmp1]
399	addcc	%i0,-1,%i0
400	ble,pn	%icc,.endloop1
401! delay slot
402	faddd	%f42,%f10,%f42
403
404! -- 16 byte aligned
405.loop2:
406	sub	%l2,%l7,%o3
407	sub	%l6,%l2,%o4
408	fpadd32s %f20,%f31,%f24		! n = (ix + 0xc0194000) & 0xfff00000
409	fmuld	%f26,%f22,%f8		! (previous iteration)
410
411	andcc	%o3,%o4,%o4
412	bge,pn	%icc,.range2		! ix <= 0x000fffff or >= 0x7ff00000
413! delay slot
414	fands	%f24,%f28,%f24
415
416	add	%i1,%i2,%i1		! x += stridex
417	add	%i3,%i4,%i3		! y += stridey
418	fpsub32s %f20,%f24,%f20		! u.l[0] -= n
419
420.cont2:
421	lda	[%i1]%asi,%l0		! preload next argument
422	add	%l2,%l4,%l2		! j = ix + 0x94000
423	fpadd32s %f20,%f30,%f22		! v.l[0] = u.l[0] + 0x4000
424
425	lda	[%i1]%asi,%f0
426	srl	%l2,11,%l2		! j = (j >> 11) & 0x1f0
427	fand	%f22,%f50,%f22		! v.l &= 0xffff8000...
428
429	lda	[%i1+4]%asi,%f1
430	and	%l2,0x1f0,%l2
431	fitod	%f24,%f52		! (double) n
432
433	add	%l2,8,%l3
434	fsubd	%f20,%f22,%f24		! f = u.d - v.d
435
436	faddd	%f20,%f22,%f26		! s = f / (u.d + v.d)
437
438	fsubd	%f40,%f22,%f22		! two - v.d
439	fmuld	%f52,%f60,%f54		! h = n * ln2hi + TBL[j]
440
441	faddd	%f8,%f18,%f8		! y = c + (t + q)
442	fmuld	%f52,%f62,%f52		! t = n * ln2lo + TBL[j+1]
443
444	fdivd	%f24,%f26,%f26
445
446	faddd	%f44,%f14,%f46		! c = h + f
447	fmuld	%f16,%f16,%f12		! z = s * s
448
449	faddd	%f8,%f56,%f8
450	st	%f8,[%o2]
451
452	st	%f9,[%o2+4]
453	mov	%i3,%o2
454	faddd	%f4,%f38,%f4
455
456	fsubd	%f46,%f44,%f44		! t += f - (c - h)
457	fmuld	%f12,%f58,%f10		! q = ...
458
459	fsubd	%f22,%f20,%f22		! (two - v.d) - u.d
460	ldd	[%g1+%l2],%f56
461
462	faddd	%f32,%f34,%f18
463	fmuld	%f2,%f4,%f4
464	ldd	[%fp+tmp0],%f2
465
466	faddd	%f10,%f48,%f10
467	nop
468
469	faddd	%f54,%f56,%f54
470	ldd	[%g1+%l3],%f20
471
472	faddd	%f4,%f2,%f2
473
474	fsubd	%f14,%f44,%f44
475	fmuld	%f12,%f10,%f14
476
477	std	%f22,[%fp+tmp2]
478	addcc	%i0,-1,%i0
479	bg,pt	%icc,.loop0
480! delay slot
481	faddd	%f52,%f20,%f52
482
483
484! Once we get to the last element, we loop three more times to finish
485! the computations in progress.  This means we will load past the end
486! of the argument vector, but since we use non-faulting loads and never
487! use the data, the only potential problem is cache miss.  (Note that
488! when the argument is 2, the only exception that occurs in the compu-
489! tation is an inexact result in the final addition, and we break out
490! of the "extra" iterations before then.)
491.endloop2:
492	sethi	%hi(0x40000000),%l0	! "next argument" = two
493	cmp	%i0,-3
494	bg,a,pt	%icc,.loop0
495! delay slot
496	fmovd	%f40,%f0
497	ret
498	restore
499
500	.align	16
501.endloop0:
502	sethi	%hi(0x40000000),%l1	! "next argument" = two
503	cmp	%i0,-3
504	bg,a,pt	%icc,.loop1
505! delay slot
506	fmovd	%f40,%f10
507	ret
508	restore
509
510	.align	16
511.endloop1:
512	sethi	%hi(0x40000000),%l2	! "next argument" = two
513	cmp	%i0,-3
514	bg,a,pt	%icc,.loop2
515! delay slot
516	fmovd	%f40,%f20
517	ret
518	restore
519
520
521	.align	16
522.range0:
523	cmp	%l0,%l7
524	bgeu,pn	%icc,2f			! if (unsigned) ix >= 0x7ff00000
525! delay slot
526	ld	[%i1+4],%o5
527	fxtod	%f0,%f0			! scale by 2**1074 w/o trapping
528	st	%f0,[%fp+tmp0]
529	add	%i1,%i2,%i1		! x += stridex
530	orcc	%l0,%o5,%g0
531	be,pn	%icc,1f			! if x == 0
532! delay slot
533	add	%i3,%i4,%i3		! y += stridey
534	fpadd32s %f0,%f31,%f4		! n = (ix + 0xc0194000) & 0xfff00000
535	fands	%f4,%f28,%f4
536	fpsub32s %f0,%f4,%f0		! u.l[0] -= n
537	ld	[%fp+tmp0],%l0
538	ba,pt	%icc,.cont0
539! delay slot
540	fpsub32s %f4,%f29,%f4		! n -= 0x43200000
5411:
542	fdivs	%f29,%f1,%f4		! raise div-by-zero
543	ba,pt	%icc,3f
544! delay slot
545	st	%f28,[%i3]		! store -inf
5462:
547	sll	%l0,1,%l0		! lop off sign bit
548	add	%i1,%i2,%i1		! x += stridex
549	orcc	%l0,%o5,%g0
550	be,pn	%icc,1b			! if x == -0
551! delay slot
552	add	%i3,%i4,%i3		! y += stridey
553	fabsd	%f0,%f4			! *y = (x + |x|) * inf
554	faddd	%f0,%f4,%f0
555	fand	%f28,%f50,%f4
556	fnegd	%f4,%f4
557	fmuld	%f0,%f4,%f0
558	st	%f0,[%i3]
5593:
560	addcc	%i0,-1,%i0
561	ble,pn	%icc,.endloop2
562! delay slot
563	st	%f1,[%i3+4]
564	ld	[%i1],%l0		! get next argument
565	ld	[%i1],%f0
566	ba,pt	%icc,.loop0
567! delay slot
568	ld	[%i1+4],%f1
569
570
571	.align	16
572.range1:
573	cmp	%l1,%l7
574	bgeu,pn	%icc,2f			! if (unsigned) ix >= 0x7ff00000
575! delay slot
576	ld	[%i1+4],%o5
577	fxtod	%f10,%f10		! scale by 2**1074 w/o trapping
578	st	%f10,[%fp+tmp1]
579	add	%i1,%i2,%i1		! x += stridex
580	orcc	%l1,%o5,%g0
581	be,pn	%icc,1f			! if x == 0
582! delay slot
583	add	%i3,%i4,%i3		! y += stridey
584	fpadd32s %f10,%f31,%f14		! n = (ix + 0xc0194000) & 0xfff00000
585	fands	%f14,%f28,%f14
586	fpsub32s %f10,%f14,%f10		! u.l[0] -= n
587	ld	[%fp+tmp1],%l1
588	ba,pt	%icc,.cont1
589! delay slot
590	fpsub32s %f14,%f29,%f14		! n -= 0x43200000
5911:
592	fdivs	%f29,%f11,%f14		! raise div-by-zero
593	ba,pt	%icc,3f
594! delay slot
595	st	%f28,[%i3]		! store -inf
5962:
597	sll	%l1,1,%l1		! lop off sign bit
598	add	%i1,%i2,%i1		! x += stridex
599	orcc	%l1,%o5,%g0
600	be,pn	%icc,1b			! if x == -0
601! delay slot
602	add	%i3,%i4,%i3		! y += stridey
603	fabsd	%f10,%f14		! *y = (x + |x|) * inf
604	faddd	%f10,%f14,%f10
605	fand	%f28,%f50,%f14
606	fnegd	%f14,%f14
607	fmuld	%f10,%f14,%f10
608	st	%f10,[%i3]
6093:
610	addcc	%i0,-1,%i0
611	ble,pn	%icc,.endloop0
612! delay slot
613	st	%f11,[%i3+4]
614	ld	[%i1],%l1		! get next argument
615	ld	[%i1],%f10
616	ba,pt	%icc,.loop1
617! delay slot
618	ld	[%i1+4],%f11
619
620
621	.align	16
622.range2:
623	cmp	%l2,%l7
624	bgeu,pn	%icc,2f			! if (unsigned) ix >= 0x7ff00000
625! delay slot
626	ld	[%i1+4],%o5
627	fxtod	%f20,%f20		! scale by 2**1074 w/o trapping
628	st	%f20,[%fp+tmp2]
629	add	%i1,%i2,%i1		! x += stridex
630	orcc	%l2,%o5,%g0
631	be,pn	%icc,1f			! if x == 0
632! delay slot
633	add	%i3,%i4,%i3		! y += stridey
634	fpadd32s %f20,%f31,%f24		! n = (ix + 0xc0194000) & 0xfff00000
635	fands	%f24,%f28,%f24
636	fpsub32s %f20,%f24,%f20		! u.l[0] -= n
637	ld	[%fp+tmp2],%l2
638	ba,pt	%icc,.cont2
639! delay slot
640	fpsub32s %f24,%f29,%f24		! n -= 0x43200000
6411:
642	fdivs	%f29,%f21,%f24		! raise div-by-zero
643	ba,pt	%icc,3f
644! delay slot
645	st	%f28,[%i3]		! store -inf
6462:
647	sll	%l2,1,%l2		! lop off sign bit
648	add	%i1,%i2,%i1		! x += stridex
649	orcc	%l2,%o5,%g0
650	be,pn	%icc,1b			! if x == -0
651! delay slot
652	add	%i3,%i4,%i3		! y += stridey
653	fabsd	%f20,%f24		! *y = (x + |x|) * inf
654	faddd	%f20,%f24,%f20
655	fand	%f28,%f50,%f24
656	fnegd	%f24,%f24
657	fmuld	%f20,%f24,%f20
658	st	%f20,[%i3]
6593:
660	addcc	%i0,-1,%i0
661	ble,pn	%icc,.endloop1
662! delay slot
663	st	%f21,[%i3+4]
664	ld	[%i1],%l2		! get next argument
665	ld	[%i1],%f20
666	ba,pt	%icc,.loop2
667! delay slot
668	ld	[%i1+4],%f21
669
670	SET_SIZE(__vlog)
671
672