xref: /titanic_44/usr/src/lib/libmvec/common/vis/__vlog.S (revision 25c28e83beb90e7c80452a7c818c5e6f73a07dc8)
1*25c28e83SPiotr Jasiukajtis/*
2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis *
4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis *
8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis *
13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis *
19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis */
21*25c28e83SPiotr Jasiukajtis/*
22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
23*25c28e83SPiotr Jasiukajtis */
24*25c28e83SPiotr Jasiukajtis/*
25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms.
27*25c28e83SPiotr Jasiukajtis */
28*25c28e83SPiotr Jasiukajtis
29*25c28e83SPiotr Jasiukajtis	.file	"__vlog.S"
30*25c28e83SPiotr Jasiukajtis
31*25c28e83SPiotr Jasiukajtis#include "libm.h"
32*25c28e83SPiotr Jasiukajtis
33*25c28e83SPiotr Jasiukajtis	RO_DATA
34*25c28e83SPiotr Jasiukajtis	.align	32
35*25c28e83SPiotr JasiukajtisTBL:
36*25c28e83SPiotr Jasiukajtis	.word	0xbfd522ae, 0x0738a000
37*25c28e83SPiotr Jasiukajtis	.word	0xbd2ebe70, 0x8164c759
38*25c28e83SPiotr Jasiukajtis	.word	0xbfd3c252, 0x77333000
39*25c28e83SPiotr Jasiukajtis	.word	0xbd183b54, 0xb606bd5c
40*25c28e83SPiotr Jasiukajtis	.word	0xbfd26962, 0x1134e000
41*25c28e83SPiotr Jasiukajtis	.word	0x3d31b61f, 0x10522625
42*25c28e83SPiotr Jasiukajtis	.word	0xbfd1178e, 0x8227e000
43*25c28e83SPiotr Jasiukajtis	.word	0xbd31ef78, 0xce2d07f2
44*25c28e83SPiotr Jasiukajtis	.word	0xbfcf991c, 0x6cb3c000
45*25c28e83SPiotr Jasiukajtis	.word	0x3d390d04, 0xcd7cc834
46*25c28e83SPiotr Jasiukajtis	.word	0xbfcd1037, 0xf2656000
47*25c28e83SPiotr Jasiukajtis	.word	0x3d084a7e, 0x75b6f6e4
48*25c28e83SPiotr Jasiukajtis	.word	0xbfca93ed, 0x3c8ae000
49*25c28e83SPiotr Jasiukajtis	.word	0x3d287243, 0x50562169
50*25c28e83SPiotr Jasiukajtis	.word	0xbfc823c1, 0x6551a000
51*25c28e83SPiotr Jasiukajtis	.word	0xbd1e0ddb, 0x9a631e83
52*25c28e83SPiotr Jasiukajtis	.word	0xbfc5bf40, 0x6b544000
53*25c28e83SPiotr Jasiukajtis	.word	0x3d127023, 0xeb68981c
54*25c28e83SPiotr Jasiukajtis	.word	0xbfc365fc, 0xb015a000
55*25c28e83SPiotr Jasiukajtis	.word	0x3d3fd3a0, 0xafb9691b
56*25c28e83SPiotr Jasiukajtis	.word	0xbfc1178e, 0x8227e000
57*25c28e83SPiotr Jasiukajtis	.word	0xbd21ef78, 0xce2d07f2
58*25c28e83SPiotr Jasiukajtis	.word	0xbfbda727, 0x63844000
59*25c28e83SPiotr Jasiukajtis	.word	0xbd1a8940, 0x1fa71733
60*25c28e83SPiotr Jasiukajtis	.word	0xbfb9335e, 0x5d594000
61*25c28e83SPiotr Jasiukajtis	.word	0xbd23115c, 0x3abd47da
62*25c28e83SPiotr Jasiukajtis	.word	0xbfb4d311, 0x5d208000
63*25c28e83SPiotr Jasiukajtis	.word	0x3cf53a25, 0x82f4e1ef
64*25c28e83SPiotr Jasiukajtis	.word	0xbfb08598, 0xb59e4000
65*25c28e83SPiotr Jasiukajtis	.word	0x3d17e5dd, 0x7009902c
66*25c28e83SPiotr Jasiukajtis	.word	0xbfa894aa, 0x149f8000
67*25c28e83SPiotr Jasiukajtis	.word	0xbd39a19a, 0x8be97661
68*25c28e83SPiotr Jasiukajtis	.word	0xbfa0415d, 0x89e78000
69*25c28e83SPiotr Jasiukajtis	.word	0x3d3dddc7, 0xf461c516
70*25c28e83SPiotr Jasiukajtis	.word	0xbf902056, 0x58930000
71*25c28e83SPiotr Jasiukajtis	.word	0xbd3611d2, 0x7c8e8417
72*25c28e83SPiotr Jasiukajtis	.word	0x00000000, 0x00000000
73*25c28e83SPiotr Jasiukajtis	.word	0x00000000, 0x00000000
74*25c28e83SPiotr Jasiukajtis	.word	0x3f9f829b, 0x0e780000
75*25c28e83SPiotr Jasiukajtis	.word	0x3d298026, 0x7c7e09e4
76*25c28e83SPiotr Jasiukajtis	.word	0x3faf0a30, 0xc0110000
77*25c28e83SPiotr Jasiukajtis	.word	0x3d48a998, 0x5f325c5c
78*25c28e83SPiotr Jasiukajtis	.word	0x3fb6f0d2, 0x8ae58000
79*25c28e83SPiotr Jasiukajtis	.word	0xbd34b464, 0x1b664613
80*25c28e83SPiotr Jasiukajtis	.word	0x3fbe2707, 0x6e2b0000
81*25c28e83SPiotr Jasiukajtis	.word	0xbd2a342c, 0x2af0003c
82*25c28e83SPiotr Jasiukajtis	.word	0x3fc29552, 0xf8200000
83*25c28e83SPiotr Jasiukajtis	.word	0xbd35b967, 0xf4471dfc
84*25c28e83SPiotr Jasiukajtis	.word	0x3fc5ff30, 0x70a78000
85*25c28e83SPiotr Jasiukajtis	.word	0x3d43d3c8, 0x73e20a07
86*25c28e83SPiotr Jasiukajtis	.word	0x3fc9525a, 0x9cf44000
87*25c28e83SPiotr Jasiukajtis	.word	0x3d46b476, 0x41307539
88*25c28e83SPiotr Jasiukajtis	.word	0x3fcc8ff7, 0xc79a8000
89*25c28e83SPiotr Jasiukajtis	.word	0x3d4a21ac, 0x25d81ef3
90*25c28e83SPiotr Jasiukajtis	.word	0x3fcfb918, 0x6d5e4000
91*25c28e83SPiotr Jasiukajtis	.word	0xbd0d572a, 0xab993c87
92*25c28e83SPiotr Jasiukajtis	.word	0x3fd1675c, 0xababa000
93*25c28e83SPiotr Jasiukajtis	.word	0x3d38380e, 0x731f55c4
94*25c28e83SPiotr Jasiukajtis	.word	0x3fd2e8e2, 0xbae12000
95*25c28e83SPiotr Jasiukajtis	.word	0xbd267b1e, 0x99b72bd8
96*25c28e83SPiotr Jasiukajtis	.word	0x3fd4618b, 0xc21c6000
97*25c28e83SPiotr Jasiukajtis	.word	0xbd13d82f, 0x484c84cc
98*25c28e83SPiotr Jasiukajtis	.word	0x3fd5d1bd, 0xbf580000
99*25c28e83SPiotr Jasiukajtis	.word	0x3d4394a1, 0x1b1c1ee4
100*25c28e83SPiotr Jasiukajtis! constants:
101*25c28e83SPiotr Jasiukajtis	.word	0x40000000,0x00000000
102*25c28e83SPiotr Jasiukajtis	.word	0x3fe55555,0x555571da
103*25c28e83SPiotr Jasiukajtis	.word	0x3fd99999,0x8702be3a
104*25c28e83SPiotr Jasiukajtis	.word	0x3fd24af7,0x3f4569b1
105*25c28e83SPiotr Jasiukajtis	.word	0x3ea62e42,0xfee00000	! scaled by 2**-20
106*25c28e83SPiotr Jasiukajtis	.word	0x3caa39ef,0x35793c76	! scaled by 2**-20
107*25c28e83SPiotr Jasiukajtis	.word	0xffff8000,0x00000000
108*25c28e83SPiotr Jasiukajtis	.word	0x43200000
109*25c28e83SPiotr Jasiukajtis	.word	0xfff00000
110*25c28e83SPiotr Jasiukajtis	.word	0xc0194000
111*25c28e83SPiotr Jasiukajtis	.word	0x4000
112*25c28e83SPiotr Jasiukajtis
113*25c28e83SPiotr Jasiukajtis#define two		0x200
114*25c28e83SPiotr Jasiukajtis#define A1		0x208
115*25c28e83SPiotr Jasiukajtis#define A2		0x210
116*25c28e83SPiotr Jasiukajtis#define A3		0x218
117*25c28e83SPiotr Jasiukajtis#define ln2hi		0x220
118*25c28e83SPiotr Jasiukajtis#define ln2lo		0x228
119*25c28e83SPiotr Jasiukajtis#define mask		0x230
120*25c28e83SPiotr Jasiukajtis#define ox43200000	0x238
121*25c28e83SPiotr Jasiukajtis#define oxfff00000	0x23c
122*25c28e83SPiotr Jasiukajtis#define oxc0194000	0x240
123*25c28e83SPiotr Jasiukajtis#define ox4000		0x244
124*25c28e83SPiotr Jasiukajtis
125*25c28e83SPiotr Jasiukajtis! local storage indices
126*25c28e83SPiotr Jasiukajtis
127*25c28e83SPiotr Jasiukajtis#define jnk		STACK_BIAS-0x8
128*25c28e83SPiotr Jasiukajtis#define tmp2		STACK_BIAS-0x10
129*25c28e83SPiotr Jasiukajtis#define tmp1		STACK_BIAS-0x18
130*25c28e83SPiotr Jasiukajtis#define tmp0		STACK_BIAS-0x20
131*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9
132*25c28e83SPiotr Jasiukajtis#define tmps		0x20
133*25c28e83SPiotr Jasiukajtis
134*25c28e83SPiotr Jasiukajtis! register use
135*25c28e83SPiotr Jasiukajtis
136*25c28e83SPiotr Jasiukajtis! i0  n
137*25c28e83SPiotr Jasiukajtis! i1  x
138*25c28e83SPiotr Jasiukajtis! i2  stridex
139*25c28e83SPiotr Jasiukajtis! i3  y
140*25c28e83SPiotr Jasiukajtis! i4  stridey
141*25c28e83SPiotr Jasiukajtis! i5
142*25c28e83SPiotr Jasiukajtis
143*25c28e83SPiotr Jasiukajtis! g1  TBL
144*25c28e83SPiotr Jasiukajtis
145*25c28e83SPiotr Jasiukajtis! l0  j0
146*25c28e83SPiotr Jasiukajtis! l1  j1
147*25c28e83SPiotr Jasiukajtis! l2  j2
148*25c28e83SPiotr Jasiukajtis! l3
149*25c28e83SPiotr Jasiukajtis! l4  0x94000
150*25c28e83SPiotr Jasiukajtis! l5
151*25c28e83SPiotr Jasiukajtis! l6  0x000fffff
152*25c28e83SPiotr Jasiukajtis! l7  0x7ff00000
153*25c28e83SPiotr Jasiukajtis
154*25c28e83SPiotr Jasiukajtis! o0  py0
155*25c28e83SPiotr Jasiukajtis! o1  py1
156*25c28e83SPiotr Jasiukajtis! o2  py2
157*25c28e83SPiotr Jasiukajtis! o3
158*25c28e83SPiotr Jasiukajtis! o4
159*25c28e83SPiotr Jasiukajtis! o5
160*25c28e83SPiotr Jasiukajtis! o7
161*25c28e83SPiotr Jasiukajtis
162*25c28e83SPiotr Jasiukajtis! f0  u0,q0
163*25c28e83SPiotr Jasiukajtis! f2  v0,(two-v0)-u0,z0
164*25c28e83SPiotr Jasiukajtis! f4  n0,f0,q0
165*25c28e83SPiotr Jasiukajtis! f6  s0
166*25c28e83SPiotr Jasiukajtis! f8  q
167*25c28e83SPiotr Jasiukajtis! f10 u1,q1
168*25c28e83SPiotr Jasiukajtis! f12 v1,(two-v1)-u1,z1
169*25c28e83SPiotr Jasiukajtis! f14 n1,f1,q1
170*25c28e83SPiotr Jasiukajtis! f16 s1
171*25c28e83SPiotr Jasiukajtis! f18 t
172*25c28e83SPiotr Jasiukajtis! f20 u2,q2
173*25c28e83SPiotr Jasiukajtis! f22 v2,(two-v2)-u2,q2
174*25c28e83SPiotr Jasiukajtis! f24 n2,f2,q2
175*25c28e83SPiotr Jasiukajtis! f26 s2
176*25c28e83SPiotr Jasiukajtis! f28 0xfff00000
177*25c28e83SPiotr Jasiukajtis! f29 0x43200000
178*25c28e83SPiotr Jasiukajtis! f30 0x4000
179*25c28e83SPiotr Jasiukajtis! f31 0xc0194000
180*25c28e83SPiotr Jasiukajtis! f32 t0
181*25c28e83SPiotr Jasiukajtis! f34 h0,f0-(c0-h0)
182*25c28e83SPiotr Jasiukajtis! f36 c0
183*25c28e83SPiotr Jasiukajtis! f38 A1
184*25c28e83SPiotr Jasiukajtis! f40 two
185*25c28e83SPiotr Jasiukajtis! f42 t1
186*25c28e83SPiotr Jasiukajtis! f44 h1,f1-(c1-h1)
187*25c28e83SPiotr Jasiukajtis! f46 c1
188*25c28e83SPiotr Jasiukajtis! f48 A2
189*25c28e83SPiotr Jasiukajtis! f50 0xffff8000...
190*25c28e83SPiotr Jasiukajtis! f52 t2
191*25c28e83SPiotr Jasiukajtis! f54 h2,f2-(c2-h2)
192*25c28e83SPiotr Jasiukajtis! f56 c2
193*25c28e83SPiotr Jasiukajtis! f58 A3
194*25c28e83SPiotr Jasiukajtis! f60 ln2hi
195*25c28e83SPiotr Jasiukajtis! f62 ln2lo
196*25c28e83SPiotr Jasiukajtis
197*25c28e83SPiotr Jasiukajtis	ENTRY(__vlog)
198*25c28e83SPiotr Jasiukajtis	save	%sp,-SA(MINFRAME)-tmps,%sp
199*25c28e83SPiotr Jasiukajtis	PIC_SETUP(l7)
200*25c28e83SPiotr Jasiukajtis	PIC_SET(l7,TBL,o0)
201*25c28e83SPiotr Jasiukajtis	mov	%o0,%g1
202*25c28e83SPiotr Jasiukajtis	wr	%g0,0x82,%asi		! set %asi for non-faulting loads
203*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x94000),%l4
204*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x000fffff),%l6
205*25c28e83SPiotr Jasiukajtis	or	%l6,%lo(0x000fffff),%l6
206*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x7ff00000),%l7
207*25c28e83SPiotr Jasiukajtis	ldd	[%g1+two],%f40
208*25c28e83SPiotr Jasiukajtis	ldd	[%g1+A1],%f38
209*25c28e83SPiotr Jasiukajtis	ldd	[%g1+A2],%f48
210*25c28e83SPiotr Jasiukajtis	ldd	[%g1+A3],%f58
211*25c28e83SPiotr Jasiukajtis	ldd	[%g1+ln2hi],%f60
212*25c28e83SPiotr Jasiukajtis	ldd	[%g1+ln2lo],%f62
213*25c28e83SPiotr Jasiukajtis	ldd	[%g1+mask],%f50
214*25c28e83SPiotr Jasiukajtis	ld	[%g1+ox43200000],%f29
215*25c28e83SPiotr Jasiukajtis	ld	[%g1+oxfff00000],%f28
216*25c28e83SPiotr Jasiukajtis	ld	[%g1+oxc0194000],%f31
217*25c28e83SPiotr Jasiukajtis	ld	[%g1+ox4000],%f30
218*25c28e83SPiotr Jasiukajtis	sll	%i2,3,%i2		! scale strides
219*25c28e83SPiotr Jasiukajtis	sll	%i4,3,%i4
220*25c28e83SPiotr Jasiukajtis	add	%fp,jnk,%o0		! precondition loop
221*25c28e83SPiotr Jasiukajtis	add	%fp,jnk,%o1
222*25c28e83SPiotr Jasiukajtis	add	%fp,jnk,%o2
223*25c28e83SPiotr Jasiukajtis	fzero	%f2
224*25c28e83SPiotr Jasiukajtis	fzero	%f6
225*25c28e83SPiotr Jasiukajtis	fzero	%f18
226*25c28e83SPiotr Jasiukajtis	fzero	%f36
227*25c28e83SPiotr Jasiukajtis	fzero	%f12
228*25c28e83SPiotr Jasiukajtis	fzero	%f14
229*25c28e83SPiotr Jasiukajtis	fzero	%f16
230*25c28e83SPiotr Jasiukajtis	fzero	%f42
231*25c28e83SPiotr Jasiukajtis	fzero	%f44
232*25c28e83SPiotr Jasiukajtis	fzero	%f46
233*25c28e83SPiotr Jasiukajtis	std	%f46,[%fp+tmp1]
234*25c28e83SPiotr Jasiukajtis	fzero	%f24
235*25c28e83SPiotr Jasiukajtis	fzero	%f26
236*25c28e83SPiotr Jasiukajtis	fzero	%f52
237*25c28e83SPiotr Jasiukajtis	fzero	%f54
238*25c28e83SPiotr Jasiukajtis	std	%f54,[%fp+tmp2]
239*25c28e83SPiotr Jasiukajtis	sub	%i3,%i4,%i3
240*25c28e83SPiotr Jasiukajtis	ld	[%i1],%l0		! ix
241*25c28e83SPiotr Jasiukajtis	ld	[%i1],%f0		! u.l[0] = *x
242*25c28e83SPiotr Jasiukajtis	ba	.loop0
243*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%f1		! u.l[1] = *(1+x)
244*25c28e83SPiotr Jasiukajtis
245*25c28e83SPiotr Jasiukajtis	.align	16
246*25c28e83SPiotr Jasiukajtis! -- 16 byte aligned
247*25c28e83SPiotr Jasiukajtis.loop0:
248*25c28e83SPiotr Jasiukajtis	sub	%l0,%l7,%o3
249*25c28e83SPiotr Jasiukajtis	sub	%l6,%l0,%o4
250*25c28e83SPiotr Jasiukajtis	fpadd32s %f0,%f31,%f4		! n = (ix + 0xc0194000) & 0xfff00000
251*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f2,%f8		! (previous iteration)
252*25c28e83SPiotr Jasiukajtis
253*25c28e83SPiotr Jasiukajtis	andcc	%o3,%o4,%o4
254*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.range0		! ix <= 0x000fffff or >= 0x7ff00000
255*25c28e83SPiotr Jasiukajtis! delay slot
256*25c28e83SPiotr Jasiukajtis	fands	%f4,%f28,%f4
257*25c28e83SPiotr Jasiukajtis
258*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
259*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
260*25c28e83SPiotr Jasiukajtis	fpsub32s %f0,%f4,%f0		! u.l[0] -= n
261*25c28e83SPiotr Jasiukajtis
262*25c28e83SPiotr Jasiukajtis.cont0:
263*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l1		! preload next argument
264*25c28e83SPiotr Jasiukajtis	add	%l0,%l4,%l0		! j = ix + 0x94000
265*25c28e83SPiotr Jasiukajtis	fpadd32s %f0,%f30,%f2		! v.l[0] = u.l[0] + 0x4000
266*25c28e83SPiotr Jasiukajtis
267*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%f10
268*25c28e83SPiotr Jasiukajtis	srl	%l0,11,%l0		! j = (j >> 11) & 0x1f0
269*25c28e83SPiotr Jasiukajtis	fand	%f2,%f50,%f2		! v.l &= 0xffff8000...
270*25c28e83SPiotr Jasiukajtis
271*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%f11
272*25c28e83SPiotr Jasiukajtis	and	%l0,0x1f0,%l0
273*25c28e83SPiotr Jasiukajtis	fitod	%f4,%f32		! (double) n
274*25c28e83SPiotr Jasiukajtis
275*25c28e83SPiotr Jasiukajtis	add	%l0,8,%l3
276*25c28e83SPiotr Jasiukajtis	fsubd	%f0,%f2,%f4		! f = u.d - v.d
277*25c28e83SPiotr Jasiukajtis
278*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f2,%f6		! s = f / (u.d + v.d)
279*25c28e83SPiotr Jasiukajtis
280*25c28e83SPiotr Jasiukajtis	fsubd	%f40,%f2,%f2		! two - v.d
281*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f60,%f34		! h = n * ln2hi + TBL[j]
282*25c28e83SPiotr Jasiukajtis
283*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f18,%f8		! y = c + (t + q)
284*25c28e83SPiotr Jasiukajtis	fmuld	%f32,%f62,%f32		! t = n * ln2lo + TBL[j+1]
285*25c28e83SPiotr Jasiukajtis
286*25c28e83SPiotr Jasiukajtis	fdivd	%f4,%f6,%f6
287*25c28e83SPiotr Jasiukajtis
288*25c28e83SPiotr Jasiukajtis	faddd	%f54,%f24,%f56		! c = h + f
289*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f26,%f22		! z = s * s
290*25c28e83SPiotr Jasiukajtis
291*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f36,%f8
292*25c28e83SPiotr Jasiukajtis	st	%f8,[%o0]
293*25c28e83SPiotr Jasiukajtis
294*25c28e83SPiotr Jasiukajtis	st	%f9,[%o0+4]
295*25c28e83SPiotr Jasiukajtis	mov	%i3,%o0
296*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f38,%f14
297*25c28e83SPiotr Jasiukajtis
298*25c28e83SPiotr Jasiukajtis	fsubd	%f56,%f54,%f54		! t += f - (c - h)
299*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f58,%f20		! q = ...
300*25c28e83SPiotr Jasiukajtis
301*25c28e83SPiotr Jasiukajtis	fsubd	%f2,%f0,%f2		! (two - v.d) - u.d
302*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l0],%f36
303*25c28e83SPiotr Jasiukajtis
304*25c28e83SPiotr Jasiukajtis	faddd	%f42,%f44,%f18
305*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f14,%f14
306*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp1],%f12
307*25c28e83SPiotr Jasiukajtis
308*25c28e83SPiotr Jasiukajtis	faddd	%f20,%f48,%f20
309*25c28e83SPiotr Jasiukajtis	nop
310*25c28e83SPiotr Jasiukajtis
311*25c28e83SPiotr Jasiukajtis	faddd	%f34,%f36,%f34
312*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l3],%f0
313*25c28e83SPiotr Jasiukajtis
314*25c28e83SPiotr Jasiukajtis	faddd	%f14,%f12,%f12
315*25c28e83SPiotr Jasiukajtis
316*25c28e83SPiotr Jasiukajtis	fsubd	%f24,%f54,%f54
317*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f20,%f24
318*25c28e83SPiotr Jasiukajtis
319*25c28e83SPiotr Jasiukajtis	std	%f2,[%fp+tmp0]
320*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
321*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.endloop0
322*25c28e83SPiotr Jasiukajtis! delay slot
323*25c28e83SPiotr Jasiukajtis	faddd	%f32,%f0,%f32
324*25c28e83SPiotr Jasiukajtis
325*25c28e83SPiotr Jasiukajtis! -- 16 byte aligned
326*25c28e83SPiotr Jasiukajtis.loop1:
327*25c28e83SPiotr Jasiukajtis	sub	%l1,%l7,%o3
328*25c28e83SPiotr Jasiukajtis	sub	%l6,%l1,%o4
329*25c28e83SPiotr Jasiukajtis	fpadd32s %f10,%f31,%f14		! n = (ix + 0xc0194000) & 0xfff00000
330*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f12,%f8		! (previous iteration)
331*25c28e83SPiotr Jasiukajtis
332*25c28e83SPiotr Jasiukajtis	andcc	%o3,%o4,%o4
333*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.range1		! ix <= 0x000fffff or >= 0x7ff00000
334*25c28e83SPiotr Jasiukajtis! delay slot
335*25c28e83SPiotr Jasiukajtis	fands	%f14,%f28,%f14
336*25c28e83SPiotr Jasiukajtis
337*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
338*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
339*25c28e83SPiotr Jasiukajtis	fpsub32s %f10,%f14,%f10		! u.l[0] -= n
340*25c28e83SPiotr Jasiukajtis
341*25c28e83SPiotr Jasiukajtis.cont1:
342*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l2		! preload next argument
343*25c28e83SPiotr Jasiukajtis	add	%l1,%l4,%l1		! j = ix + 0x94000
344*25c28e83SPiotr Jasiukajtis	fpadd32s %f10,%f30,%f12		! v.l[0] = u.l[0] + 0x4000
345*25c28e83SPiotr Jasiukajtis
346*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%f20
347*25c28e83SPiotr Jasiukajtis	srl	%l1,11,%l1		! j = (j >> 11) & 0x1f0
348*25c28e83SPiotr Jasiukajtis	fand	%f12,%f50,%f12		! v.l &= 0xffff8000...
349*25c28e83SPiotr Jasiukajtis
350*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%f21
351*25c28e83SPiotr Jasiukajtis	and	%l1,0x1f0,%l1
352*25c28e83SPiotr Jasiukajtis	fitod	%f14,%f42		! (double) n
353*25c28e83SPiotr Jasiukajtis
354*25c28e83SPiotr Jasiukajtis	add	%l1,8,%l3
355*25c28e83SPiotr Jasiukajtis	fsubd	%f10,%f12,%f14		! f = u.d - v.d
356*25c28e83SPiotr Jasiukajtis
357*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f12,%f16		! s = f / (u.d + v.d)
358*25c28e83SPiotr Jasiukajtis
359*25c28e83SPiotr Jasiukajtis	fsubd	%f40,%f12,%f12		! two - v.d
360*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f60,%f44		! h = n * ln2hi + TBL[j]
361*25c28e83SPiotr Jasiukajtis
362*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f18,%f8		! y = c + (t + q)
363*25c28e83SPiotr Jasiukajtis	fmuld	%f42,%f62,%f42		! t = n * ln2lo + TBL[j+1]
364*25c28e83SPiotr Jasiukajtis
365*25c28e83SPiotr Jasiukajtis	fdivd	%f14,%f16,%f16
366*25c28e83SPiotr Jasiukajtis
367*25c28e83SPiotr Jasiukajtis	faddd	%f34,%f4,%f36		! c = h + f
368*25c28e83SPiotr Jasiukajtis	fmuld	%f6,%f6,%f2		! z = s * s
369*25c28e83SPiotr Jasiukajtis
370*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f46,%f8
371*25c28e83SPiotr Jasiukajtis	st	%f8,[%o1]
372*25c28e83SPiotr Jasiukajtis
373*25c28e83SPiotr Jasiukajtis	st	%f9,[%o1+4]
374*25c28e83SPiotr Jasiukajtis	mov	%i3,%o1
375*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f38,%f24
376*25c28e83SPiotr Jasiukajtis
377*25c28e83SPiotr Jasiukajtis	fsubd	%f36,%f34,%f34		! t += f - (c - h)
378*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f58,%f0		! q = ...
379*25c28e83SPiotr Jasiukajtis
380*25c28e83SPiotr Jasiukajtis	fsubd	%f12,%f10,%f12		! (two - v.d) - u.d
381*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l1],%f46
382*25c28e83SPiotr Jasiukajtis
383*25c28e83SPiotr Jasiukajtis	faddd	%f52,%f54,%f18
384*25c28e83SPiotr Jasiukajtis	fmuld	%f22,%f24,%f24
385*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp2],%f22
386*25c28e83SPiotr Jasiukajtis
387*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f48,%f0
388*25c28e83SPiotr Jasiukajtis	nop
389*25c28e83SPiotr Jasiukajtis
390*25c28e83SPiotr Jasiukajtis	faddd	%f44,%f46,%f44
391*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l3],%f10
392*25c28e83SPiotr Jasiukajtis
393*25c28e83SPiotr Jasiukajtis	faddd	%f24,%f22,%f22
394*25c28e83SPiotr Jasiukajtis
395*25c28e83SPiotr Jasiukajtis	fsubd	%f4,%f34,%f34
396*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f0,%f4
397*25c28e83SPiotr Jasiukajtis
398*25c28e83SPiotr Jasiukajtis	std	%f12,[%fp+tmp1]
399*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
400*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.endloop1
401*25c28e83SPiotr Jasiukajtis! delay slot
402*25c28e83SPiotr Jasiukajtis	faddd	%f42,%f10,%f42
403*25c28e83SPiotr Jasiukajtis
404*25c28e83SPiotr Jasiukajtis! -- 16 byte aligned
405*25c28e83SPiotr Jasiukajtis.loop2:
406*25c28e83SPiotr Jasiukajtis	sub	%l2,%l7,%o3
407*25c28e83SPiotr Jasiukajtis	sub	%l6,%l2,%o4
408*25c28e83SPiotr Jasiukajtis	fpadd32s %f20,%f31,%f24		! n = (ix + 0xc0194000) & 0xfff00000
409*25c28e83SPiotr Jasiukajtis	fmuld	%f26,%f22,%f8		! (previous iteration)
410*25c28e83SPiotr Jasiukajtis
411*25c28e83SPiotr Jasiukajtis	andcc	%o3,%o4,%o4
412*25c28e83SPiotr Jasiukajtis	bge,pn	%icc,.range2		! ix <= 0x000fffff or >= 0x7ff00000
413*25c28e83SPiotr Jasiukajtis! delay slot
414*25c28e83SPiotr Jasiukajtis	fands	%f24,%f28,%f24
415*25c28e83SPiotr Jasiukajtis
416*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
417*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
418*25c28e83SPiotr Jasiukajtis	fpsub32s %f20,%f24,%f20		! u.l[0] -= n
419*25c28e83SPiotr Jasiukajtis
420*25c28e83SPiotr Jasiukajtis.cont2:
421*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%l0		! preload next argument
422*25c28e83SPiotr Jasiukajtis	add	%l2,%l4,%l2		! j = ix + 0x94000
423*25c28e83SPiotr Jasiukajtis	fpadd32s %f20,%f30,%f22		! v.l[0] = u.l[0] + 0x4000
424*25c28e83SPiotr Jasiukajtis
425*25c28e83SPiotr Jasiukajtis	lda	[%i1]%asi,%f0
426*25c28e83SPiotr Jasiukajtis	srl	%l2,11,%l2		! j = (j >> 11) & 0x1f0
427*25c28e83SPiotr Jasiukajtis	fand	%f22,%f50,%f22		! v.l &= 0xffff8000...
428*25c28e83SPiotr Jasiukajtis
429*25c28e83SPiotr Jasiukajtis	lda	[%i1+4]%asi,%f1
430*25c28e83SPiotr Jasiukajtis	and	%l2,0x1f0,%l2
431*25c28e83SPiotr Jasiukajtis	fitod	%f24,%f52		! (double) n
432*25c28e83SPiotr Jasiukajtis
433*25c28e83SPiotr Jasiukajtis	add	%l2,8,%l3
434*25c28e83SPiotr Jasiukajtis	fsubd	%f20,%f22,%f24		! f = u.d - v.d
435*25c28e83SPiotr Jasiukajtis
436*25c28e83SPiotr Jasiukajtis	faddd	%f20,%f22,%f26		! s = f / (u.d + v.d)
437*25c28e83SPiotr Jasiukajtis
438*25c28e83SPiotr Jasiukajtis	fsubd	%f40,%f22,%f22		! two - v.d
439*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f60,%f54		! h = n * ln2hi + TBL[j]
440*25c28e83SPiotr Jasiukajtis
441*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f18,%f8		! y = c + (t + q)
442*25c28e83SPiotr Jasiukajtis	fmuld	%f52,%f62,%f52		! t = n * ln2lo + TBL[j+1]
443*25c28e83SPiotr Jasiukajtis
444*25c28e83SPiotr Jasiukajtis	fdivd	%f24,%f26,%f26
445*25c28e83SPiotr Jasiukajtis
446*25c28e83SPiotr Jasiukajtis	faddd	%f44,%f14,%f46		! c = h + f
447*25c28e83SPiotr Jasiukajtis	fmuld	%f16,%f16,%f12		! z = s * s
448*25c28e83SPiotr Jasiukajtis
449*25c28e83SPiotr Jasiukajtis	faddd	%f8,%f56,%f8
450*25c28e83SPiotr Jasiukajtis	st	%f8,[%o2]
451*25c28e83SPiotr Jasiukajtis
452*25c28e83SPiotr Jasiukajtis	st	%f9,[%o2+4]
453*25c28e83SPiotr Jasiukajtis	mov	%i3,%o2
454*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f38,%f4
455*25c28e83SPiotr Jasiukajtis
456*25c28e83SPiotr Jasiukajtis	fsubd	%f46,%f44,%f44		! t += f - (c - h)
457*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f58,%f10		! q = ...
458*25c28e83SPiotr Jasiukajtis
459*25c28e83SPiotr Jasiukajtis	fsubd	%f22,%f20,%f22		! (two - v.d) - u.d
460*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l2],%f56
461*25c28e83SPiotr Jasiukajtis
462*25c28e83SPiotr Jasiukajtis	faddd	%f32,%f34,%f18
463*25c28e83SPiotr Jasiukajtis	fmuld	%f2,%f4,%f4
464*25c28e83SPiotr Jasiukajtis	ldd	[%fp+tmp0],%f2
465*25c28e83SPiotr Jasiukajtis
466*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f48,%f10
467*25c28e83SPiotr Jasiukajtis	nop
468*25c28e83SPiotr Jasiukajtis
469*25c28e83SPiotr Jasiukajtis	faddd	%f54,%f56,%f54
470*25c28e83SPiotr Jasiukajtis	ldd	[%g1+%l3],%f20
471*25c28e83SPiotr Jasiukajtis
472*25c28e83SPiotr Jasiukajtis	faddd	%f4,%f2,%f2
473*25c28e83SPiotr Jasiukajtis
474*25c28e83SPiotr Jasiukajtis	fsubd	%f14,%f44,%f44
475*25c28e83SPiotr Jasiukajtis	fmuld	%f12,%f10,%f14
476*25c28e83SPiotr Jasiukajtis
477*25c28e83SPiotr Jasiukajtis	std	%f22,[%fp+tmp2]
478*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
479*25c28e83SPiotr Jasiukajtis	bg,pt	%icc,.loop0
480*25c28e83SPiotr Jasiukajtis! delay slot
481*25c28e83SPiotr Jasiukajtis	faddd	%f52,%f20,%f52
482*25c28e83SPiotr Jasiukajtis
483*25c28e83SPiotr Jasiukajtis
484*25c28e83SPiotr Jasiukajtis! Once we get to the last element, we loop three more times to finish
485*25c28e83SPiotr Jasiukajtis! the computations in progress.  This means we will load past the end
486*25c28e83SPiotr Jasiukajtis! of the argument vector, but since we use non-faulting loads and never
487*25c28e83SPiotr Jasiukajtis! use the data, the only potential problem is cache miss.  (Note that
488*25c28e83SPiotr Jasiukajtis! when the argument is 2, the only exception that occurs in the compu-
489*25c28e83SPiotr Jasiukajtis! tation is an inexact result in the final addition, and we break out
490*25c28e83SPiotr Jasiukajtis! of the "extra" iterations before then.)
491*25c28e83SPiotr Jasiukajtis.endloop2:
492*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x40000000),%l0	! "next argument" = two
493*25c28e83SPiotr Jasiukajtis	cmp	%i0,-3
494*25c28e83SPiotr Jasiukajtis	bg,a,pt	%icc,.loop0
495*25c28e83SPiotr Jasiukajtis! delay slot
496*25c28e83SPiotr Jasiukajtis	fmovd	%f40,%f0
497*25c28e83SPiotr Jasiukajtis	ret
498*25c28e83SPiotr Jasiukajtis	restore
499*25c28e83SPiotr Jasiukajtis
500*25c28e83SPiotr Jasiukajtis	.align	16
501*25c28e83SPiotr Jasiukajtis.endloop0:
502*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x40000000),%l1	! "next argument" = two
503*25c28e83SPiotr Jasiukajtis	cmp	%i0,-3
504*25c28e83SPiotr Jasiukajtis	bg,a,pt	%icc,.loop1
505*25c28e83SPiotr Jasiukajtis! delay slot
506*25c28e83SPiotr Jasiukajtis	fmovd	%f40,%f10
507*25c28e83SPiotr Jasiukajtis	ret
508*25c28e83SPiotr Jasiukajtis	restore
509*25c28e83SPiotr Jasiukajtis
510*25c28e83SPiotr Jasiukajtis	.align	16
511*25c28e83SPiotr Jasiukajtis.endloop1:
512*25c28e83SPiotr Jasiukajtis	sethi	%hi(0x40000000),%l2	! "next argument" = two
513*25c28e83SPiotr Jasiukajtis	cmp	%i0,-3
514*25c28e83SPiotr Jasiukajtis	bg,a,pt	%icc,.loop2
515*25c28e83SPiotr Jasiukajtis! delay slot
516*25c28e83SPiotr Jasiukajtis	fmovd	%f40,%f20
517*25c28e83SPiotr Jasiukajtis	ret
518*25c28e83SPiotr Jasiukajtis	restore
519*25c28e83SPiotr Jasiukajtis
520*25c28e83SPiotr Jasiukajtis
521*25c28e83SPiotr Jasiukajtis	.align	16
522*25c28e83SPiotr Jasiukajtis.range0:
523*25c28e83SPiotr Jasiukajtis	cmp	%l0,%l7
524*25c28e83SPiotr Jasiukajtis	bgeu,pn	%icc,2f			! if (unsigned) ix >= 0x7ff00000
525*25c28e83SPiotr Jasiukajtis! delay slot
526*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%o5
527*25c28e83SPiotr Jasiukajtis	fxtod	%f0,%f0			! scale by 2**1074 w/o trapping
528*25c28e83SPiotr Jasiukajtis	st	%f0,[%fp+tmp0]
529*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
530*25c28e83SPiotr Jasiukajtis	orcc	%l0,%o5,%g0
531*25c28e83SPiotr Jasiukajtis	be,pn	%icc,1f			! if x == 0
532*25c28e83SPiotr Jasiukajtis! delay slot
533*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
534*25c28e83SPiotr Jasiukajtis	fpadd32s %f0,%f31,%f4		! n = (ix + 0xc0194000) & 0xfff00000
535*25c28e83SPiotr Jasiukajtis	fands	%f4,%f28,%f4
536*25c28e83SPiotr Jasiukajtis	fpsub32s %f0,%f4,%f0		! u.l[0] -= n
537*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp0],%l0
538*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.cont0
539*25c28e83SPiotr Jasiukajtis! delay slot
540*25c28e83SPiotr Jasiukajtis	fpsub32s %f4,%f29,%f4		! n -= 0x43200000
541*25c28e83SPiotr Jasiukajtis1:
542*25c28e83SPiotr Jasiukajtis	fdivs	%f29,%f1,%f4		! raise div-by-zero
543*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,3f
544*25c28e83SPiotr Jasiukajtis! delay slot
545*25c28e83SPiotr Jasiukajtis	st	%f28,[%i3]		! store -inf
546*25c28e83SPiotr Jasiukajtis2:
547*25c28e83SPiotr Jasiukajtis	sll	%l0,1,%l0		! lop off sign bit
548*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
549*25c28e83SPiotr Jasiukajtis	orcc	%l0,%o5,%g0
550*25c28e83SPiotr Jasiukajtis	be,pn	%icc,1b			! if x == -0
551*25c28e83SPiotr Jasiukajtis! delay slot
552*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
553*25c28e83SPiotr Jasiukajtis	fabsd	%f0,%f4			! *y = (x + |x|) * inf
554*25c28e83SPiotr Jasiukajtis	faddd	%f0,%f4,%f0
555*25c28e83SPiotr Jasiukajtis	fand	%f28,%f50,%f4
556*25c28e83SPiotr Jasiukajtis	fnegd	%f4,%f4
557*25c28e83SPiotr Jasiukajtis	fmuld	%f0,%f4,%f0
558*25c28e83SPiotr Jasiukajtis	st	%f0,[%i3]
559*25c28e83SPiotr Jasiukajtis3:
560*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
561*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.endloop2
562*25c28e83SPiotr Jasiukajtis! delay slot
563*25c28e83SPiotr Jasiukajtis	st	%f1,[%i3+4]
564*25c28e83SPiotr Jasiukajtis	ld	[%i1],%l0		! get next argument
565*25c28e83SPiotr Jasiukajtis	ld	[%i1],%f0
566*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.loop0
567*25c28e83SPiotr Jasiukajtis! delay slot
568*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%f1
569*25c28e83SPiotr Jasiukajtis
570*25c28e83SPiotr Jasiukajtis
571*25c28e83SPiotr Jasiukajtis	.align	16
572*25c28e83SPiotr Jasiukajtis.range1:
573*25c28e83SPiotr Jasiukajtis	cmp	%l1,%l7
574*25c28e83SPiotr Jasiukajtis	bgeu,pn	%icc,2f			! if (unsigned) ix >= 0x7ff00000
575*25c28e83SPiotr Jasiukajtis! delay slot
576*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%o5
577*25c28e83SPiotr Jasiukajtis	fxtod	%f10,%f10		! scale by 2**1074 w/o trapping
578*25c28e83SPiotr Jasiukajtis	st	%f10,[%fp+tmp1]
579*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
580*25c28e83SPiotr Jasiukajtis	orcc	%l1,%o5,%g0
581*25c28e83SPiotr Jasiukajtis	be,pn	%icc,1f			! if x == 0
582*25c28e83SPiotr Jasiukajtis! delay slot
583*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
584*25c28e83SPiotr Jasiukajtis	fpadd32s %f10,%f31,%f14		! n = (ix + 0xc0194000) & 0xfff00000
585*25c28e83SPiotr Jasiukajtis	fands	%f14,%f28,%f14
586*25c28e83SPiotr Jasiukajtis	fpsub32s %f10,%f14,%f10		! u.l[0] -= n
587*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp1],%l1
588*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.cont1
589*25c28e83SPiotr Jasiukajtis! delay slot
590*25c28e83SPiotr Jasiukajtis	fpsub32s %f14,%f29,%f14		! n -= 0x43200000
591*25c28e83SPiotr Jasiukajtis1:
592*25c28e83SPiotr Jasiukajtis	fdivs	%f29,%f11,%f14		! raise div-by-zero
593*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,3f
594*25c28e83SPiotr Jasiukajtis! delay slot
595*25c28e83SPiotr Jasiukajtis	st	%f28,[%i3]		! store -inf
596*25c28e83SPiotr Jasiukajtis2:
597*25c28e83SPiotr Jasiukajtis	sll	%l1,1,%l1		! lop off sign bit
598*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
599*25c28e83SPiotr Jasiukajtis	orcc	%l1,%o5,%g0
600*25c28e83SPiotr Jasiukajtis	be,pn	%icc,1b			! if x == -0
601*25c28e83SPiotr Jasiukajtis! delay slot
602*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
603*25c28e83SPiotr Jasiukajtis	fabsd	%f10,%f14		! *y = (x + |x|) * inf
604*25c28e83SPiotr Jasiukajtis	faddd	%f10,%f14,%f10
605*25c28e83SPiotr Jasiukajtis	fand	%f28,%f50,%f14
606*25c28e83SPiotr Jasiukajtis	fnegd	%f14,%f14
607*25c28e83SPiotr Jasiukajtis	fmuld	%f10,%f14,%f10
608*25c28e83SPiotr Jasiukajtis	st	%f10,[%i3]
609*25c28e83SPiotr Jasiukajtis3:
610*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
611*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.endloop0
612*25c28e83SPiotr Jasiukajtis! delay slot
613*25c28e83SPiotr Jasiukajtis	st	%f11,[%i3+4]
614*25c28e83SPiotr Jasiukajtis	ld	[%i1],%l1		! get next argument
615*25c28e83SPiotr Jasiukajtis	ld	[%i1],%f10
616*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.loop1
617*25c28e83SPiotr Jasiukajtis! delay slot
618*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%f11
619*25c28e83SPiotr Jasiukajtis
620*25c28e83SPiotr Jasiukajtis
621*25c28e83SPiotr Jasiukajtis	.align	16
622*25c28e83SPiotr Jasiukajtis.range2:
623*25c28e83SPiotr Jasiukajtis	cmp	%l2,%l7
624*25c28e83SPiotr Jasiukajtis	bgeu,pn	%icc,2f			! if (unsigned) ix >= 0x7ff00000
625*25c28e83SPiotr Jasiukajtis! delay slot
626*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%o5
627*25c28e83SPiotr Jasiukajtis	fxtod	%f20,%f20		! scale by 2**1074 w/o trapping
628*25c28e83SPiotr Jasiukajtis	st	%f20,[%fp+tmp2]
629*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
630*25c28e83SPiotr Jasiukajtis	orcc	%l2,%o5,%g0
631*25c28e83SPiotr Jasiukajtis	be,pn	%icc,1f			! if x == 0
632*25c28e83SPiotr Jasiukajtis! delay slot
633*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
634*25c28e83SPiotr Jasiukajtis	fpadd32s %f20,%f31,%f24		! n = (ix + 0xc0194000) & 0xfff00000
635*25c28e83SPiotr Jasiukajtis	fands	%f24,%f28,%f24
636*25c28e83SPiotr Jasiukajtis	fpsub32s %f20,%f24,%f20		! u.l[0] -= n
637*25c28e83SPiotr Jasiukajtis	ld	[%fp+tmp2],%l2
638*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.cont2
639*25c28e83SPiotr Jasiukajtis! delay slot
640*25c28e83SPiotr Jasiukajtis	fpsub32s %f24,%f29,%f24		! n -= 0x43200000
641*25c28e83SPiotr Jasiukajtis1:
642*25c28e83SPiotr Jasiukajtis	fdivs	%f29,%f21,%f24		! raise div-by-zero
643*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,3f
644*25c28e83SPiotr Jasiukajtis! delay slot
645*25c28e83SPiotr Jasiukajtis	st	%f28,[%i3]		! store -inf
646*25c28e83SPiotr Jasiukajtis2:
647*25c28e83SPiotr Jasiukajtis	sll	%l2,1,%l2		! lop off sign bit
648*25c28e83SPiotr Jasiukajtis	add	%i1,%i2,%i1		! x += stridex
649*25c28e83SPiotr Jasiukajtis	orcc	%l2,%o5,%g0
650*25c28e83SPiotr Jasiukajtis	be,pn	%icc,1b			! if x == -0
651*25c28e83SPiotr Jasiukajtis! delay slot
652*25c28e83SPiotr Jasiukajtis	add	%i3,%i4,%i3		! y += stridey
653*25c28e83SPiotr Jasiukajtis	fabsd	%f20,%f24		! *y = (x + |x|) * inf
654*25c28e83SPiotr Jasiukajtis	faddd	%f20,%f24,%f20
655*25c28e83SPiotr Jasiukajtis	fand	%f28,%f50,%f24
656*25c28e83SPiotr Jasiukajtis	fnegd	%f24,%f24
657*25c28e83SPiotr Jasiukajtis	fmuld	%f20,%f24,%f20
658*25c28e83SPiotr Jasiukajtis	st	%f20,[%i3]
659*25c28e83SPiotr Jasiukajtis3:
660*25c28e83SPiotr Jasiukajtis	addcc	%i0,-1,%i0
661*25c28e83SPiotr Jasiukajtis	ble,pn	%icc,.endloop1
662*25c28e83SPiotr Jasiukajtis! delay slot
663*25c28e83SPiotr Jasiukajtis	st	%f21,[%i3+4]
664*25c28e83SPiotr Jasiukajtis	ld	[%i1],%l2		! get next argument
665*25c28e83SPiotr Jasiukajtis	ld	[%i1],%f20
666*25c28e83SPiotr Jasiukajtis	ba,pt	%icc,.loop2
667*25c28e83SPiotr Jasiukajtis! delay slot
668*25c28e83SPiotr Jasiukajtis	ld	[%i1+4],%f21
669*25c28e83SPiotr Jasiukajtis
670*25c28e83SPiotr Jasiukajtis	SET_SIZE(__vlog)
671*25c28e83SPiotr Jasiukajtis
672