1*25c28e83SPiotr Jasiukajtis/* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis/* 22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23*25c28e83SPiotr Jasiukajtis */ 24*25c28e83SPiotr Jasiukajtis/* 25*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 27*25c28e83SPiotr Jasiukajtis */ 28*25c28e83SPiotr Jasiukajtis 29*25c28e83SPiotr Jasiukajtis .file "__vlog.S" 30*25c28e83SPiotr Jasiukajtis 31*25c28e83SPiotr Jasiukajtis#include "libm.h" 32*25c28e83SPiotr Jasiukajtis 33*25c28e83SPiotr Jasiukajtis RO_DATA 34*25c28e83SPiotr Jasiukajtis .align 32 35*25c28e83SPiotr JasiukajtisTBL: 36*25c28e83SPiotr Jasiukajtis .word 0xbfd522ae, 0x0738a000 37*25c28e83SPiotr Jasiukajtis .word 0xbd2ebe70, 0x8164c759 38*25c28e83SPiotr Jasiukajtis .word 0xbfd3c252, 0x77333000 39*25c28e83SPiotr Jasiukajtis .word 0xbd183b54, 0xb606bd5c 40*25c28e83SPiotr Jasiukajtis .word 0xbfd26962, 0x1134e000 41*25c28e83SPiotr Jasiukajtis .word 0x3d31b61f, 0x10522625 42*25c28e83SPiotr Jasiukajtis .word 0xbfd1178e, 0x8227e000 43*25c28e83SPiotr Jasiukajtis .word 0xbd31ef78, 0xce2d07f2 44*25c28e83SPiotr Jasiukajtis .word 0xbfcf991c, 0x6cb3c000 45*25c28e83SPiotr Jasiukajtis .word 0x3d390d04, 0xcd7cc834 46*25c28e83SPiotr Jasiukajtis .word 0xbfcd1037, 0xf2656000 47*25c28e83SPiotr Jasiukajtis .word 0x3d084a7e, 0x75b6f6e4 48*25c28e83SPiotr Jasiukajtis .word 0xbfca93ed, 0x3c8ae000 49*25c28e83SPiotr Jasiukajtis .word 0x3d287243, 0x50562169 50*25c28e83SPiotr Jasiukajtis .word 0xbfc823c1, 0x6551a000 51*25c28e83SPiotr Jasiukajtis .word 0xbd1e0ddb, 0x9a631e83 52*25c28e83SPiotr Jasiukajtis .word 0xbfc5bf40, 0x6b544000 53*25c28e83SPiotr Jasiukajtis .word 0x3d127023, 0xeb68981c 54*25c28e83SPiotr Jasiukajtis .word 0xbfc365fc, 0xb015a000 55*25c28e83SPiotr Jasiukajtis .word 0x3d3fd3a0, 0xafb9691b 56*25c28e83SPiotr Jasiukajtis .word 0xbfc1178e, 0x8227e000 57*25c28e83SPiotr Jasiukajtis .word 0xbd21ef78, 0xce2d07f2 58*25c28e83SPiotr Jasiukajtis .word 0xbfbda727, 0x63844000 59*25c28e83SPiotr Jasiukajtis .word 0xbd1a8940, 0x1fa71733 60*25c28e83SPiotr Jasiukajtis .word 0xbfb9335e, 0x5d594000 61*25c28e83SPiotr Jasiukajtis .word 0xbd23115c, 0x3abd47da 62*25c28e83SPiotr Jasiukajtis .word 0xbfb4d311, 0x5d208000 63*25c28e83SPiotr Jasiukajtis .word 0x3cf53a25, 0x82f4e1ef 64*25c28e83SPiotr Jasiukajtis .word 0xbfb08598, 0xb59e4000 65*25c28e83SPiotr Jasiukajtis .word 0x3d17e5dd, 0x7009902c 66*25c28e83SPiotr Jasiukajtis .word 0xbfa894aa, 0x149f8000 67*25c28e83SPiotr Jasiukajtis .word 0xbd39a19a, 0x8be97661 68*25c28e83SPiotr Jasiukajtis .word 0xbfa0415d, 0x89e78000 69*25c28e83SPiotr Jasiukajtis .word 0x3d3dddc7, 0xf461c516 70*25c28e83SPiotr Jasiukajtis .word 0xbf902056, 0x58930000 71*25c28e83SPiotr Jasiukajtis .word 0xbd3611d2, 0x7c8e8417 72*25c28e83SPiotr Jasiukajtis .word 0x00000000, 0x00000000 73*25c28e83SPiotr Jasiukajtis .word 0x00000000, 0x00000000 74*25c28e83SPiotr Jasiukajtis .word 0x3f9f829b, 0x0e780000 75*25c28e83SPiotr Jasiukajtis .word 0x3d298026, 0x7c7e09e4 76*25c28e83SPiotr Jasiukajtis .word 0x3faf0a30, 0xc0110000 77*25c28e83SPiotr Jasiukajtis .word 0x3d48a998, 0x5f325c5c 78*25c28e83SPiotr Jasiukajtis .word 0x3fb6f0d2, 0x8ae58000 79*25c28e83SPiotr Jasiukajtis .word 0xbd34b464, 0x1b664613 80*25c28e83SPiotr Jasiukajtis .word 0x3fbe2707, 0x6e2b0000 81*25c28e83SPiotr Jasiukajtis .word 0xbd2a342c, 0x2af0003c 82*25c28e83SPiotr Jasiukajtis .word 0x3fc29552, 0xf8200000 83*25c28e83SPiotr Jasiukajtis .word 0xbd35b967, 0xf4471dfc 84*25c28e83SPiotr Jasiukajtis .word 0x3fc5ff30, 0x70a78000 85*25c28e83SPiotr Jasiukajtis .word 0x3d43d3c8, 0x73e20a07 86*25c28e83SPiotr Jasiukajtis .word 0x3fc9525a, 0x9cf44000 87*25c28e83SPiotr Jasiukajtis .word 0x3d46b476, 0x41307539 88*25c28e83SPiotr Jasiukajtis .word 0x3fcc8ff7, 0xc79a8000 89*25c28e83SPiotr Jasiukajtis .word 0x3d4a21ac, 0x25d81ef3 90*25c28e83SPiotr Jasiukajtis .word 0x3fcfb918, 0x6d5e4000 91*25c28e83SPiotr Jasiukajtis .word 0xbd0d572a, 0xab993c87 92*25c28e83SPiotr Jasiukajtis .word 0x3fd1675c, 0xababa000 93*25c28e83SPiotr Jasiukajtis .word 0x3d38380e, 0x731f55c4 94*25c28e83SPiotr Jasiukajtis .word 0x3fd2e8e2, 0xbae12000 95*25c28e83SPiotr Jasiukajtis .word 0xbd267b1e, 0x99b72bd8 96*25c28e83SPiotr Jasiukajtis .word 0x3fd4618b, 0xc21c6000 97*25c28e83SPiotr Jasiukajtis .word 0xbd13d82f, 0x484c84cc 98*25c28e83SPiotr Jasiukajtis .word 0x3fd5d1bd, 0xbf580000 99*25c28e83SPiotr Jasiukajtis .word 0x3d4394a1, 0x1b1c1ee4 100*25c28e83SPiotr Jasiukajtis! constants: 101*25c28e83SPiotr Jasiukajtis .word 0x40000000,0x00000000 102*25c28e83SPiotr Jasiukajtis .word 0x3fe55555,0x555571da 103*25c28e83SPiotr Jasiukajtis .word 0x3fd99999,0x8702be3a 104*25c28e83SPiotr Jasiukajtis .word 0x3fd24af7,0x3f4569b1 105*25c28e83SPiotr Jasiukajtis .word 0x3ea62e42,0xfee00000 ! scaled by 2**-20 106*25c28e83SPiotr Jasiukajtis .word 0x3caa39ef,0x35793c76 ! scaled by 2**-20 107*25c28e83SPiotr Jasiukajtis .word 0xffff8000,0x00000000 108*25c28e83SPiotr Jasiukajtis .word 0x43200000 109*25c28e83SPiotr Jasiukajtis .word 0xfff00000 110*25c28e83SPiotr Jasiukajtis .word 0xc0194000 111*25c28e83SPiotr Jasiukajtis .word 0x4000 112*25c28e83SPiotr Jasiukajtis 113*25c28e83SPiotr Jasiukajtis#define two 0x200 114*25c28e83SPiotr Jasiukajtis#define A1 0x208 115*25c28e83SPiotr Jasiukajtis#define A2 0x210 116*25c28e83SPiotr Jasiukajtis#define A3 0x218 117*25c28e83SPiotr Jasiukajtis#define ln2hi 0x220 118*25c28e83SPiotr Jasiukajtis#define ln2lo 0x228 119*25c28e83SPiotr Jasiukajtis#define mask 0x230 120*25c28e83SPiotr Jasiukajtis#define ox43200000 0x238 121*25c28e83SPiotr Jasiukajtis#define oxfff00000 0x23c 122*25c28e83SPiotr Jasiukajtis#define oxc0194000 0x240 123*25c28e83SPiotr Jasiukajtis#define ox4000 0x244 124*25c28e83SPiotr Jasiukajtis 125*25c28e83SPiotr Jasiukajtis! local storage indices 126*25c28e83SPiotr Jasiukajtis 127*25c28e83SPiotr Jasiukajtis#define jnk STACK_BIAS-0x8 128*25c28e83SPiotr Jasiukajtis#define tmp2 STACK_BIAS-0x10 129*25c28e83SPiotr Jasiukajtis#define tmp1 STACK_BIAS-0x18 130*25c28e83SPiotr Jasiukajtis#define tmp0 STACK_BIAS-0x20 131*25c28e83SPiotr Jasiukajtis! sizeof temp storage - must be a multiple of 16 for V9 132*25c28e83SPiotr Jasiukajtis#define tmps 0x20 133*25c28e83SPiotr Jasiukajtis 134*25c28e83SPiotr Jasiukajtis! register use 135*25c28e83SPiotr Jasiukajtis 136*25c28e83SPiotr Jasiukajtis! i0 n 137*25c28e83SPiotr Jasiukajtis! i1 x 138*25c28e83SPiotr Jasiukajtis! i2 stridex 139*25c28e83SPiotr Jasiukajtis! i3 y 140*25c28e83SPiotr Jasiukajtis! i4 stridey 141*25c28e83SPiotr Jasiukajtis! i5 142*25c28e83SPiotr Jasiukajtis 143*25c28e83SPiotr Jasiukajtis! g1 TBL 144*25c28e83SPiotr Jasiukajtis 145*25c28e83SPiotr Jasiukajtis! l0 j0 146*25c28e83SPiotr Jasiukajtis! l1 j1 147*25c28e83SPiotr Jasiukajtis! l2 j2 148*25c28e83SPiotr Jasiukajtis! l3 149*25c28e83SPiotr Jasiukajtis! l4 0x94000 150*25c28e83SPiotr Jasiukajtis! l5 151*25c28e83SPiotr Jasiukajtis! l6 0x000fffff 152*25c28e83SPiotr Jasiukajtis! l7 0x7ff00000 153*25c28e83SPiotr Jasiukajtis 154*25c28e83SPiotr Jasiukajtis! o0 py0 155*25c28e83SPiotr Jasiukajtis! o1 py1 156*25c28e83SPiotr Jasiukajtis! o2 py2 157*25c28e83SPiotr Jasiukajtis! o3 158*25c28e83SPiotr Jasiukajtis! o4 159*25c28e83SPiotr Jasiukajtis! o5 160*25c28e83SPiotr Jasiukajtis! o7 161*25c28e83SPiotr Jasiukajtis 162*25c28e83SPiotr Jasiukajtis! f0 u0,q0 163*25c28e83SPiotr Jasiukajtis! f2 v0,(two-v0)-u0,z0 164*25c28e83SPiotr Jasiukajtis! f4 n0,f0,q0 165*25c28e83SPiotr Jasiukajtis! f6 s0 166*25c28e83SPiotr Jasiukajtis! f8 q 167*25c28e83SPiotr Jasiukajtis! f10 u1,q1 168*25c28e83SPiotr Jasiukajtis! f12 v1,(two-v1)-u1,z1 169*25c28e83SPiotr Jasiukajtis! f14 n1,f1,q1 170*25c28e83SPiotr Jasiukajtis! f16 s1 171*25c28e83SPiotr Jasiukajtis! f18 t 172*25c28e83SPiotr Jasiukajtis! f20 u2,q2 173*25c28e83SPiotr Jasiukajtis! f22 v2,(two-v2)-u2,q2 174*25c28e83SPiotr Jasiukajtis! f24 n2,f2,q2 175*25c28e83SPiotr Jasiukajtis! f26 s2 176*25c28e83SPiotr Jasiukajtis! f28 0xfff00000 177*25c28e83SPiotr Jasiukajtis! f29 0x43200000 178*25c28e83SPiotr Jasiukajtis! f30 0x4000 179*25c28e83SPiotr Jasiukajtis! f31 0xc0194000 180*25c28e83SPiotr Jasiukajtis! f32 t0 181*25c28e83SPiotr Jasiukajtis! f34 h0,f0-(c0-h0) 182*25c28e83SPiotr Jasiukajtis! f36 c0 183*25c28e83SPiotr Jasiukajtis! f38 A1 184*25c28e83SPiotr Jasiukajtis! f40 two 185*25c28e83SPiotr Jasiukajtis! f42 t1 186*25c28e83SPiotr Jasiukajtis! f44 h1,f1-(c1-h1) 187*25c28e83SPiotr Jasiukajtis! f46 c1 188*25c28e83SPiotr Jasiukajtis! f48 A2 189*25c28e83SPiotr Jasiukajtis! f50 0xffff8000... 190*25c28e83SPiotr Jasiukajtis! f52 t2 191*25c28e83SPiotr Jasiukajtis! f54 h2,f2-(c2-h2) 192*25c28e83SPiotr Jasiukajtis! f56 c2 193*25c28e83SPiotr Jasiukajtis! f58 A3 194*25c28e83SPiotr Jasiukajtis! f60 ln2hi 195*25c28e83SPiotr Jasiukajtis! f62 ln2lo 196*25c28e83SPiotr Jasiukajtis 197*25c28e83SPiotr Jasiukajtis ENTRY(__vlog) 198*25c28e83SPiotr Jasiukajtis save %sp,-SA(MINFRAME)-tmps,%sp 199*25c28e83SPiotr Jasiukajtis PIC_SETUP(l7) 200*25c28e83SPiotr Jasiukajtis PIC_SET(l7,TBL,o0) 201*25c28e83SPiotr Jasiukajtis mov %o0,%g1 202*25c28e83SPiotr Jasiukajtis wr %g0,0x82,%asi ! set %asi for non-faulting loads 203*25c28e83SPiotr Jasiukajtis sethi %hi(0x94000),%l4 204*25c28e83SPiotr Jasiukajtis sethi %hi(0x000fffff),%l6 205*25c28e83SPiotr Jasiukajtis or %l6,%lo(0x000fffff),%l6 206*25c28e83SPiotr Jasiukajtis sethi %hi(0x7ff00000),%l7 207*25c28e83SPiotr Jasiukajtis ldd [%g1+two],%f40 208*25c28e83SPiotr Jasiukajtis ldd [%g1+A1],%f38 209*25c28e83SPiotr Jasiukajtis ldd [%g1+A2],%f48 210*25c28e83SPiotr Jasiukajtis ldd [%g1+A3],%f58 211*25c28e83SPiotr Jasiukajtis ldd [%g1+ln2hi],%f60 212*25c28e83SPiotr Jasiukajtis ldd [%g1+ln2lo],%f62 213*25c28e83SPiotr Jasiukajtis ldd [%g1+mask],%f50 214*25c28e83SPiotr Jasiukajtis ld [%g1+ox43200000],%f29 215*25c28e83SPiotr Jasiukajtis ld [%g1+oxfff00000],%f28 216*25c28e83SPiotr Jasiukajtis ld [%g1+oxc0194000],%f31 217*25c28e83SPiotr Jasiukajtis ld [%g1+ox4000],%f30 218*25c28e83SPiotr Jasiukajtis sll %i2,3,%i2 ! scale strides 219*25c28e83SPiotr Jasiukajtis sll %i4,3,%i4 220*25c28e83SPiotr Jasiukajtis add %fp,jnk,%o0 ! precondition loop 221*25c28e83SPiotr Jasiukajtis add %fp,jnk,%o1 222*25c28e83SPiotr Jasiukajtis add %fp,jnk,%o2 223*25c28e83SPiotr Jasiukajtis fzero %f2 224*25c28e83SPiotr Jasiukajtis fzero %f6 225*25c28e83SPiotr Jasiukajtis fzero %f18 226*25c28e83SPiotr Jasiukajtis fzero %f36 227*25c28e83SPiotr Jasiukajtis fzero %f12 228*25c28e83SPiotr Jasiukajtis fzero %f14 229*25c28e83SPiotr Jasiukajtis fzero %f16 230*25c28e83SPiotr Jasiukajtis fzero %f42 231*25c28e83SPiotr Jasiukajtis fzero %f44 232*25c28e83SPiotr Jasiukajtis fzero %f46 233*25c28e83SPiotr Jasiukajtis std %f46,[%fp+tmp1] 234*25c28e83SPiotr Jasiukajtis fzero %f24 235*25c28e83SPiotr Jasiukajtis fzero %f26 236*25c28e83SPiotr Jasiukajtis fzero %f52 237*25c28e83SPiotr Jasiukajtis fzero %f54 238*25c28e83SPiotr Jasiukajtis std %f54,[%fp+tmp2] 239*25c28e83SPiotr Jasiukajtis sub %i3,%i4,%i3 240*25c28e83SPiotr Jasiukajtis ld [%i1],%l0 ! ix 241*25c28e83SPiotr Jasiukajtis ld [%i1],%f0 ! u.l[0] = *x 242*25c28e83SPiotr Jasiukajtis ba .loop0 243*25c28e83SPiotr Jasiukajtis ld [%i1+4],%f1 ! u.l[1] = *(1+x) 244*25c28e83SPiotr Jasiukajtis 245*25c28e83SPiotr Jasiukajtis .align 16 246*25c28e83SPiotr Jasiukajtis! -- 16 byte aligned 247*25c28e83SPiotr Jasiukajtis.loop0: 248*25c28e83SPiotr Jasiukajtis sub %l0,%l7,%o3 249*25c28e83SPiotr Jasiukajtis sub %l6,%l0,%o4 250*25c28e83SPiotr Jasiukajtis fpadd32s %f0,%f31,%f4 ! n = (ix + 0xc0194000) & 0xfff00000 251*25c28e83SPiotr Jasiukajtis fmuld %f6,%f2,%f8 ! (previous iteration) 252*25c28e83SPiotr Jasiukajtis 253*25c28e83SPiotr Jasiukajtis andcc %o3,%o4,%o4 254*25c28e83SPiotr Jasiukajtis bge,pn %icc,.range0 ! ix <= 0x000fffff or >= 0x7ff00000 255*25c28e83SPiotr Jasiukajtis! delay slot 256*25c28e83SPiotr Jasiukajtis fands %f4,%f28,%f4 257*25c28e83SPiotr Jasiukajtis 258*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 259*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 260*25c28e83SPiotr Jasiukajtis fpsub32s %f0,%f4,%f0 ! u.l[0] -= n 261*25c28e83SPiotr Jasiukajtis 262*25c28e83SPiotr Jasiukajtis.cont0: 263*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l1 ! preload next argument 264*25c28e83SPiotr Jasiukajtis add %l0,%l4,%l0 ! j = ix + 0x94000 265*25c28e83SPiotr Jasiukajtis fpadd32s %f0,%f30,%f2 ! v.l[0] = u.l[0] + 0x4000 266*25c28e83SPiotr Jasiukajtis 267*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%f10 268*25c28e83SPiotr Jasiukajtis srl %l0,11,%l0 ! j = (j >> 11) & 0x1f0 269*25c28e83SPiotr Jasiukajtis fand %f2,%f50,%f2 ! v.l &= 0xffff8000... 270*25c28e83SPiotr Jasiukajtis 271*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,%f11 272*25c28e83SPiotr Jasiukajtis and %l0,0x1f0,%l0 273*25c28e83SPiotr Jasiukajtis fitod %f4,%f32 ! (double) n 274*25c28e83SPiotr Jasiukajtis 275*25c28e83SPiotr Jasiukajtis add %l0,8,%l3 276*25c28e83SPiotr Jasiukajtis fsubd %f0,%f2,%f4 ! f = u.d - v.d 277*25c28e83SPiotr Jasiukajtis 278*25c28e83SPiotr Jasiukajtis faddd %f0,%f2,%f6 ! s = f / (u.d + v.d) 279*25c28e83SPiotr Jasiukajtis 280*25c28e83SPiotr Jasiukajtis fsubd %f40,%f2,%f2 ! two - v.d 281*25c28e83SPiotr Jasiukajtis fmuld %f32,%f60,%f34 ! h = n * ln2hi + TBL[j] 282*25c28e83SPiotr Jasiukajtis 283*25c28e83SPiotr Jasiukajtis faddd %f8,%f18,%f8 ! y = c + (t + q) 284*25c28e83SPiotr Jasiukajtis fmuld %f32,%f62,%f32 ! t = n * ln2lo + TBL[j+1] 285*25c28e83SPiotr Jasiukajtis 286*25c28e83SPiotr Jasiukajtis fdivd %f4,%f6,%f6 287*25c28e83SPiotr Jasiukajtis 288*25c28e83SPiotr Jasiukajtis faddd %f54,%f24,%f56 ! c = h + f 289*25c28e83SPiotr Jasiukajtis fmuld %f26,%f26,%f22 ! z = s * s 290*25c28e83SPiotr Jasiukajtis 291*25c28e83SPiotr Jasiukajtis faddd %f8,%f36,%f8 292*25c28e83SPiotr Jasiukajtis st %f8,[%o0] 293*25c28e83SPiotr Jasiukajtis 294*25c28e83SPiotr Jasiukajtis st %f9,[%o0+4] 295*25c28e83SPiotr Jasiukajtis mov %i3,%o0 296*25c28e83SPiotr Jasiukajtis faddd %f14,%f38,%f14 297*25c28e83SPiotr Jasiukajtis 298*25c28e83SPiotr Jasiukajtis fsubd %f56,%f54,%f54 ! t += f - (c - h) 299*25c28e83SPiotr Jasiukajtis fmuld %f22,%f58,%f20 ! q = ... 300*25c28e83SPiotr Jasiukajtis 301*25c28e83SPiotr Jasiukajtis fsubd %f2,%f0,%f2 ! (two - v.d) - u.d 302*25c28e83SPiotr Jasiukajtis ldd [%g1+%l0],%f36 303*25c28e83SPiotr Jasiukajtis 304*25c28e83SPiotr Jasiukajtis faddd %f42,%f44,%f18 305*25c28e83SPiotr Jasiukajtis fmuld %f12,%f14,%f14 306*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp1],%f12 307*25c28e83SPiotr Jasiukajtis 308*25c28e83SPiotr Jasiukajtis faddd %f20,%f48,%f20 309*25c28e83SPiotr Jasiukajtis nop 310*25c28e83SPiotr Jasiukajtis 311*25c28e83SPiotr Jasiukajtis faddd %f34,%f36,%f34 312*25c28e83SPiotr Jasiukajtis ldd [%g1+%l3],%f0 313*25c28e83SPiotr Jasiukajtis 314*25c28e83SPiotr Jasiukajtis faddd %f14,%f12,%f12 315*25c28e83SPiotr Jasiukajtis 316*25c28e83SPiotr Jasiukajtis fsubd %f24,%f54,%f54 317*25c28e83SPiotr Jasiukajtis fmuld %f22,%f20,%f24 318*25c28e83SPiotr Jasiukajtis 319*25c28e83SPiotr Jasiukajtis std %f2,[%fp+tmp0] 320*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 321*25c28e83SPiotr Jasiukajtis ble,pn %icc,.endloop0 322*25c28e83SPiotr Jasiukajtis! delay slot 323*25c28e83SPiotr Jasiukajtis faddd %f32,%f0,%f32 324*25c28e83SPiotr Jasiukajtis 325*25c28e83SPiotr Jasiukajtis! -- 16 byte aligned 326*25c28e83SPiotr Jasiukajtis.loop1: 327*25c28e83SPiotr Jasiukajtis sub %l1,%l7,%o3 328*25c28e83SPiotr Jasiukajtis sub %l6,%l1,%o4 329*25c28e83SPiotr Jasiukajtis fpadd32s %f10,%f31,%f14 ! n = (ix + 0xc0194000) & 0xfff00000 330*25c28e83SPiotr Jasiukajtis fmuld %f16,%f12,%f8 ! (previous iteration) 331*25c28e83SPiotr Jasiukajtis 332*25c28e83SPiotr Jasiukajtis andcc %o3,%o4,%o4 333*25c28e83SPiotr Jasiukajtis bge,pn %icc,.range1 ! ix <= 0x000fffff or >= 0x7ff00000 334*25c28e83SPiotr Jasiukajtis! delay slot 335*25c28e83SPiotr Jasiukajtis fands %f14,%f28,%f14 336*25c28e83SPiotr Jasiukajtis 337*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 338*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 339*25c28e83SPiotr Jasiukajtis fpsub32s %f10,%f14,%f10 ! u.l[0] -= n 340*25c28e83SPiotr Jasiukajtis 341*25c28e83SPiotr Jasiukajtis.cont1: 342*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l2 ! preload next argument 343*25c28e83SPiotr Jasiukajtis add %l1,%l4,%l1 ! j = ix + 0x94000 344*25c28e83SPiotr Jasiukajtis fpadd32s %f10,%f30,%f12 ! v.l[0] = u.l[0] + 0x4000 345*25c28e83SPiotr Jasiukajtis 346*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%f20 347*25c28e83SPiotr Jasiukajtis srl %l1,11,%l1 ! j = (j >> 11) & 0x1f0 348*25c28e83SPiotr Jasiukajtis fand %f12,%f50,%f12 ! v.l &= 0xffff8000... 349*25c28e83SPiotr Jasiukajtis 350*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,%f21 351*25c28e83SPiotr Jasiukajtis and %l1,0x1f0,%l1 352*25c28e83SPiotr Jasiukajtis fitod %f14,%f42 ! (double) n 353*25c28e83SPiotr Jasiukajtis 354*25c28e83SPiotr Jasiukajtis add %l1,8,%l3 355*25c28e83SPiotr Jasiukajtis fsubd %f10,%f12,%f14 ! f = u.d - v.d 356*25c28e83SPiotr Jasiukajtis 357*25c28e83SPiotr Jasiukajtis faddd %f10,%f12,%f16 ! s = f / (u.d + v.d) 358*25c28e83SPiotr Jasiukajtis 359*25c28e83SPiotr Jasiukajtis fsubd %f40,%f12,%f12 ! two - v.d 360*25c28e83SPiotr Jasiukajtis fmuld %f42,%f60,%f44 ! h = n * ln2hi + TBL[j] 361*25c28e83SPiotr Jasiukajtis 362*25c28e83SPiotr Jasiukajtis faddd %f8,%f18,%f8 ! y = c + (t + q) 363*25c28e83SPiotr Jasiukajtis fmuld %f42,%f62,%f42 ! t = n * ln2lo + TBL[j+1] 364*25c28e83SPiotr Jasiukajtis 365*25c28e83SPiotr Jasiukajtis fdivd %f14,%f16,%f16 366*25c28e83SPiotr Jasiukajtis 367*25c28e83SPiotr Jasiukajtis faddd %f34,%f4,%f36 ! c = h + f 368*25c28e83SPiotr Jasiukajtis fmuld %f6,%f6,%f2 ! z = s * s 369*25c28e83SPiotr Jasiukajtis 370*25c28e83SPiotr Jasiukajtis faddd %f8,%f46,%f8 371*25c28e83SPiotr Jasiukajtis st %f8,[%o1] 372*25c28e83SPiotr Jasiukajtis 373*25c28e83SPiotr Jasiukajtis st %f9,[%o1+4] 374*25c28e83SPiotr Jasiukajtis mov %i3,%o1 375*25c28e83SPiotr Jasiukajtis faddd %f24,%f38,%f24 376*25c28e83SPiotr Jasiukajtis 377*25c28e83SPiotr Jasiukajtis fsubd %f36,%f34,%f34 ! t += f - (c - h) 378*25c28e83SPiotr Jasiukajtis fmuld %f2,%f58,%f0 ! q = ... 379*25c28e83SPiotr Jasiukajtis 380*25c28e83SPiotr Jasiukajtis fsubd %f12,%f10,%f12 ! (two - v.d) - u.d 381*25c28e83SPiotr Jasiukajtis ldd [%g1+%l1],%f46 382*25c28e83SPiotr Jasiukajtis 383*25c28e83SPiotr Jasiukajtis faddd %f52,%f54,%f18 384*25c28e83SPiotr Jasiukajtis fmuld %f22,%f24,%f24 385*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp2],%f22 386*25c28e83SPiotr Jasiukajtis 387*25c28e83SPiotr Jasiukajtis faddd %f0,%f48,%f0 388*25c28e83SPiotr Jasiukajtis nop 389*25c28e83SPiotr Jasiukajtis 390*25c28e83SPiotr Jasiukajtis faddd %f44,%f46,%f44 391*25c28e83SPiotr Jasiukajtis ldd [%g1+%l3],%f10 392*25c28e83SPiotr Jasiukajtis 393*25c28e83SPiotr Jasiukajtis faddd %f24,%f22,%f22 394*25c28e83SPiotr Jasiukajtis 395*25c28e83SPiotr Jasiukajtis fsubd %f4,%f34,%f34 396*25c28e83SPiotr Jasiukajtis fmuld %f2,%f0,%f4 397*25c28e83SPiotr Jasiukajtis 398*25c28e83SPiotr Jasiukajtis std %f12,[%fp+tmp1] 399*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 400*25c28e83SPiotr Jasiukajtis ble,pn %icc,.endloop1 401*25c28e83SPiotr Jasiukajtis! delay slot 402*25c28e83SPiotr Jasiukajtis faddd %f42,%f10,%f42 403*25c28e83SPiotr Jasiukajtis 404*25c28e83SPiotr Jasiukajtis! -- 16 byte aligned 405*25c28e83SPiotr Jasiukajtis.loop2: 406*25c28e83SPiotr Jasiukajtis sub %l2,%l7,%o3 407*25c28e83SPiotr Jasiukajtis sub %l6,%l2,%o4 408*25c28e83SPiotr Jasiukajtis fpadd32s %f20,%f31,%f24 ! n = (ix + 0xc0194000) & 0xfff00000 409*25c28e83SPiotr Jasiukajtis fmuld %f26,%f22,%f8 ! (previous iteration) 410*25c28e83SPiotr Jasiukajtis 411*25c28e83SPiotr Jasiukajtis andcc %o3,%o4,%o4 412*25c28e83SPiotr Jasiukajtis bge,pn %icc,.range2 ! ix <= 0x000fffff or >= 0x7ff00000 413*25c28e83SPiotr Jasiukajtis! delay slot 414*25c28e83SPiotr Jasiukajtis fands %f24,%f28,%f24 415*25c28e83SPiotr Jasiukajtis 416*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 417*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 418*25c28e83SPiotr Jasiukajtis fpsub32s %f20,%f24,%f20 ! u.l[0] -= n 419*25c28e83SPiotr Jasiukajtis 420*25c28e83SPiotr Jasiukajtis.cont2: 421*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%l0 ! preload next argument 422*25c28e83SPiotr Jasiukajtis add %l2,%l4,%l2 ! j = ix + 0x94000 423*25c28e83SPiotr Jasiukajtis fpadd32s %f20,%f30,%f22 ! v.l[0] = u.l[0] + 0x4000 424*25c28e83SPiotr Jasiukajtis 425*25c28e83SPiotr Jasiukajtis lda [%i1]%asi,%f0 426*25c28e83SPiotr Jasiukajtis srl %l2,11,%l2 ! j = (j >> 11) & 0x1f0 427*25c28e83SPiotr Jasiukajtis fand %f22,%f50,%f22 ! v.l &= 0xffff8000... 428*25c28e83SPiotr Jasiukajtis 429*25c28e83SPiotr Jasiukajtis lda [%i1+4]%asi,%f1 430*25c28e83SPiotr Jasiukajtis and %l2,0x1f0,%l2 431*25c28e83SPiotr Jasiukajtis fitod %f24,%f52 ! (double) n 432*25c28e83SPiotr Jasiukajtis 433*25c28e83SPiotr Jasiukajtis add %l2,8,%l3 434*25c28e83SPiotr Jasiukajtis fsubd %f20,%f22,%f24 ! f = u.d - v.d 435*25c28e83SPiotr Jasiukajtis 436*25c28e83SPiotr Jasiukajtis faddd %f20,%f22,%f26 ! s = f / (u.d + v.d) 437*25c28e83SPiotr Jasiukajtis 438*25c28e83SPiotr Jasiukajtis fsubd %f40,%f22,%f22 ! two - v.d 439*25c28e83SPiotr Jasiukajtis fmuld %f52,%f60,%f54 ! h = n * ln2hi + TBL[j] 440*25c28e83SPiotr Jasiukajtis 441*25c28e83SPiotr Jasiukajtis faddd %f8,%f18,%f8 ! y = c + (t + q) 442*25c28e83SPiotr Jasiukajtis fmuld %f52,%f62,%f52 ! t = n * ln2lo + TBL[j+1] 443*25c28e83SPiotr Jasiukajtis 444*25c28e83SPiotr Jasiukajtis fdivd %f24,%f26,%f26 445*25c28e83SPiotr Jasiukajtis 446*25c28e83SPiotr Jasiukajtis faddd %f44,%f14,%f46 ! c = h + f 447*25c28e83SPiotr Jasiukajtis fmuld %f16,%f16,%f12 ! z = s * s 448*25c28e83SPiotr Jasiukajtis 449*25c28e83SPiotr Jasiukajtis faddd %f8,%f56,%f8 450*25c28e83SPiotr Jasiukajtis st %f8,[%o2] 451*25c28e83SPiotr Jasiukajtis 452*25c28e83SPiotr Jasiukajtis st %f9,[%o2+4] 453*25c28e83SPiotr Jasiukajtis mov %i3,%o2 454*25c28e83SPiotr Jasiukajtis faddd %f4,%f38,%f4 455*25c28e83SPiotr Jasiukajtis 456*25c28e83SPiotr Jasiukajtis fsubd %f46,%f44,%f44 ! t += f - (c - h) 457*25c28e83SPiotr Jasiukajtis fmuld %f12,%f58,%f10 ! q = ... 458*25c28e83SPiotr Jasiukajtis 459*25c28e83SPiotr Jasiukajtis fsubd %f22,%f20,%f22 ! (two - v.d) - u.d 460*25c28e83SPiotr Jasiukajtis ldd [%g1+%l2],%f56 461*25c28e83SPiotr Jasiukajtis 462*25c28e83SPiotr Jasiukajtis faddd %f32,%f34,%f18 463*25c28e83SPiotr Jasiukajtis fmuld %f2,%f4,%f4 464*25c28e83SPiotr Jasiukajtis ldd [%fp+tmp0],%f2 465*25c28e83SPiotr Jasiukajtis 466*25c28e83SPiotr Jasiukajtis faddd %f10,%f48,%f10 467*25c28e83SPiotr Jasiukajtis nop 468*25c28e83SPiotr Jasiukajtis 469*25c28e83SPiotr Jasiukajtis faddd %f54,%f56,%f54 470*25c28e83SPiotr Jasiukajtis ldd [%g1+%l3],%f20 471*25c28e83SPiotr Jasiukajtis 472*25c28e83SPiotr Jasiukajtis faddd %f4,%f2,%f2 473*25c28e83SPiotr Jasiukajtis 474*25c28e83SPiotr Jasiukajtis fsubd %f14,%f44,%f44 475*25c28e83SPiotr Jasiukajtis fmuld %f12,%f10,%f14 476*25c28e83SPiotr Jasiukajtis 477*25c28e83SPiotr Jasiukajtis std %f22,[%fp+tmp2] 478*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 479*25c28e83SPiotr Jasiukajtis bg,pt %icc,.loop0 480*25c28e83SPiotr Jasiukajtis! delay slot 481*25c28e83SPiotr Jasiukajtis faddd %f52,%f20,%f52 482*25c28e83SPiotr Jasiukajtis 483*25c28e83SPiotr Jasiukajtis 484*25c28e83SPiotr Jasiukajtis! Once we get to the last element, we loop three more times to finish 485*25c28e83SPiotr Jasiukajtis! the computations in progress. This means we will load past the end 486*25c28e83SPiotr Jasiukajtis! of the argument vector, but since we use non-faulting loads and never 487*25c28e83SPiotr Jasiukajtis! use the data, the only potential problem is cache miss. (Note that 488*25c28e83SPiotr Jasiukajtis! when the argument is 2, the only exception that occurs in the compu- 489*25c28e83SPiotr Jasiukajtis! tation is an inexact result in the final addition, and we break out 490*25c28e83SPiotr Jasiukajtis! of the "extra" iterations before then.) 491*25c28e83SPiotr Jasiukajtis.endloop2: 492*25c28e83SPiotr Jasiukajtis sethi %hi(0x40000000),%l0 ! "next argument" = two 493*25c28e83SPiotr Jasiukajtis cmp %i0,-3 494*25c28e83SPiotr Jasiukajtis bg,a,pt %icc,.loop0 495*25c28e83SPiotr Jasiukajtis! delay slot 496*25c28e83SPiotr Jasiukajtis fmovd %f40,%f0 497*25c28e83SPiotr Jasiukajtis ret 498*25c28e83SPiotr Jasiukajtis restore 499*25c28e83SPiotr Jasiukajtis 500*25c28e83SPiotr Jasiukajtis .align 16 501*25c28e83SPiotr Jasiukajtis.endloop0: 502*25c28e83SPiotr Jasiukajtis sethi %hi(0x40000000),%l1 ! "next argument" = two 503*25c28e83SPiotr Jasiukajtis cmp %i0,-3 504*25c28e83SPiotr Jasiukajtis bg,a,pt %icc,.loop1 505*25c28e83SPiotr Jasiukajtis! delay slot 506*25c28e83SPiotr Jasiukajtis fmovd %f40,%f10 507*25c28e83SPiotr Jasiukajtis ret 508*25c28e83SPiotr Jasiukajtis restore 509*25c28e83SPiotr Jasiukajtis 510*25c28e83SPiotr Jasiukajtis .align 16 511*25c28e83SPiotr Jasiukajtis.endloop1: 512*25c28e83SPiotr Jasiukajtis sethi %hi(0x40000000),%l2 ! "next argument" = two 513*25c28e83SPiotr Jasiukajtis cmp %i0,-3 514*25c28e83SPiotr Jasiukajtis bg,a,pt %icc,.loop2 515*25c28e83SPiotr Jasiukajtis! delay slot 516*25c28e83SPiotr Jasiukajtis fmovd %f40,%f20 517*25c28e83SPiotr Jasiukajtis ret 518*25c28e83SPiotr Jasiukajtis restore 519*25c28e83SPiotr Jasiukajtis 520*25c28e83SPiotr Jasiukajtis 521*25c28e83SPiotr Jasiukajtis .align 16 522*25c28e83SPiotr Jasiukajtis.range0: 523*25c28e83SPiotr Jasiukajtis cmp %l0,%l7 524*25c28e83SPiotr Jasiukajtis bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 525*25c28e83SPiotr Jasiukajtis! delay slot 526*25c28e83SPiotr Jasiukajtis ld [%i1+4],%o5 527*25c28e83SPiotr Jasiukajtis fxtod %f0,%f0 ! scale by 2**1074 w/o trapping 528*25c28e83SPiotr Jasiukajtis st %f0,[%fp+tmp0] 529*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 530*25c28e83SPiotr Jasiukajtis orcc %l0,%o5,%g0 531*25c28e83SPiotr Jasiukajtis be,pn %icc,1f ! if x == 0 532*25c28e83SPiotr Jasiukajtis! delay slot 533*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 534*25c28e83SPiotr Jasiukajtis fpadd32s %f0,%f31,%f4 ! n = (ix + 0xc0194000) & 0xfff00000 535*25c28e83SPiotr Jasiukajtis fands %f4,%f28,%f4 536*25c28e83SPiotr Jasiukajtis fpsub32s %f0,%f4,%f0 ! u.l[0] -= n 537*25c28e83SPiotr Jasiukajtis ld [%fp+tmp0],%l0 538*25c28e83SPiotr Jasiukajtis ba,pt %icc,.cont0 539*25c28e83SPiotr Jasiukajtis! delay slot 540*25c28e83SPiotr Jasiukajtis fpsub32s %f4,%f29,%f4 ! n -= 0x43200000 541*25c28e83SPiotr Jasiukajtis1: 542*25c28e83SPiotr Jasiukajtis fdivs %f29,%f1,%f4 ! raise div-by-zero 543*25c28e83SPiotr Jasiukajtis ba,pt %icc,3f 544*25c28e83SPiotr Jasiukajtis! delay slot 545*25c28e83SPiotr Jasiukajtis st %f28,[%i3] ! store -inf 546*25c28e83SPiotr Jasiukajtis2: 547*25c28e83SPiotr Jasiukajtis sll %l0,1,%l0 ! lop off sign bit 548*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 549*25c28e83SPiotr Jasiukajtis orcc %l0,%o5,%g0 550*25c28e83SPiotr Jasiukajtis be,pn %icc,1b ! if x == -0 551*25c28e83SPiotr Jasiukajtis! delay slot 552*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 553*25c28e83SPiotr Jasiukajtis fabsd %f0,%f4 ! *y = (x + |x|) * inf 554*25c28e83SPiotr Jasiukajtis faddd %f0,%f4,%f0 555*25c28e83SPiotr Jasiukajtis fand %f28,%f50,%f4 556*25c28e83SPiotr Jasiukajtis fnegd %f4,%f4 557*25c28e83SPiotr Jasiukajtis fmuld %f0,%f4,%f0 558*25c28e83SPiotr Jasiukajtis st %f0,[%i3] 559*25c28e83SPiotr Jasiukajtis3: 560*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 561*25c28e83SPiotr Jasiukajtis ble,pn %icc,.endloop2 562*25c28e83SPiotr Jasiukajtis! delay slot 563*25c28e83SPiotr Jasiukajtis st %f1,[%i3+4] 564*25c28e83SPiotr Jasiukajtis ld [%i1],%l0 ! get next argument 565*25c28e83SPiotr Jasiukajtis ld [%i1],%f0 566*25c28e83SPiotr Jasiukajtis ba,pt %icc,.loop0 567*25c28e83SPiotr Jasiukajtis! delay slot 568*25c28e83SPiotr Jasiukajtis ld [%i1+4],%f1 569*25c28e83SPiotr Jasiukajtis 570*25c28e83SPiotr Jasiukajtis 571*25c28e83SPiotr Jasiukajtis .align 16 572*25c28e83SPiotr Jasiukajtis.range1: 573*25c28e83SPiotr Jasiukajtis cmp %l1,%l7 574*25c28e83SPiotr Jasiukajtis bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 575*25c28e83SPiotr Jasiukajtis! delay slot 576*25c28e83SPiotr Jasiukajtis ld [%i1+4],%o5 577*25c28e83SPiotr Jasiukajtis fxtod %f10,%f10 ! scale by 2**1074 w/o trapping 578*25c28e83SPiotr Jasiukajtis st %f10,[%fp+tmp1] 579*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 580*25c28e83SPiotr Jasiukajtis orcc %l1,%o5,%g0 581*25c28e83SPiotr Jasiukajtis be,pn %icc,1f ! if x == 0 582*25c28e83SPiotr Jasiukajtis! delay slot 583*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 584*25c28e83SPiotr Jasiukajtis fpadd32s %f10,%f31,%f14 ! n = (ix + 0xc0194000) & 0xfff00000 585*25c28e83SPiotr Jasiukajtis fands %f14,%f28,%f14 586*25c28e83SPiotr Jasiukajtis fpsub32s %f10,%f14,%f10 ! u.l[0] -= n 587*25c28e83SPiotr Jasiukajtis ld [%fp+tmp1],%l1 588*25c28e83SPiotr Jasiukajtis ba,pt %icc,.cont1 589*25c28e83SPiotr Jasiukajtis! delay slot 590*25c28e83SPiotr Jasiukajtis fpsub32s %f14,%f29,%f14 ! n -= 0x43200000 591*25c28e83SPiotr Jasiukajtis1: 592*25c28e83SPiotr Jasiukajtis fdivs %f29,%f11,%f14 ! raise div-by-zero 593*25c28e83SPiotr Jasiukajtis ba,pt %icc,3f 594*25c28e83SPiotr Jasiukajtis! delay slot 595*25c28e83SPiotr Jasiukajtis st %f28,[%i3] ! store -inf 596*25c28e83SPiotr Jasiukajtis2: 597*25c28e83SPiotr Jasiukajtis sll %l1,1,%l1 ! lop off sign bit 598*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 599*25c28e83SPiotr Jasiukajtis orcc %l1,%o5,%g0 600*25c28e83SPiotr Jasiukajtis be,pn %icc,1b ! if x == -0 601*25c28e83SPiotr Jasiukajtis! delay slot 602*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 603*25c28e83SPiotr Jasiukajtis fabsd %f10,%f14 ! *y = (x + |x|) * inf 604*25c28e83SPiotr Jasiukajtis faddd %f10,%f14,%f10 605*25c28e83SPiotr Jasiukajtis fand %f28,%f50,%f14 606*25c28e83SPiotr Jasiukajtis fnegd %f14,%f14 607*25c28e83SPiotr Jasiukajtis fmuld %f10,%f14,%f10 608*25c28e83SPiotr Jasiukajtis st %f10,[%i3] 609*25c28e83SPiotr Jasiukajtis3: 610*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 611*25c28e83SPiotr Jasiukajtis ble,pn %icc,.endloop0 612*25c28e83SPiotr Jasiukajtis! delay slot 613*25c28e83SPiotr Jasiukajtis st %f11,[%i3+4] 614*25c28e83SPiotr Jasiukajtis ld [%i1],%l1 ! get next argument 615*25c28e83SPiotr Jasiukajtis ld [%i1],%f10 616*25c28e83SPiotr Jasiukajtis ba,pt %icc,.loop1 617*25c28e83SPiotr Jasiukajtis! delay slot 618*25c28e83SPiotr Jasiukajtis ld [%i1+4],%f11 619*25c28e83SPiotr Jasiukajtis 620*25c28e83SPiotr Jasiukajtis 621*25c28e83SPiotr Jasiukajtis .align 16 622*25c28e83SPiotr Jasiukajtis.range2: 623*25c28e83SPiotr Jasiukajtis cmp %l2,%l7 624*25c28e83SPiotr Jasiukajtis bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 625*25c28e83SPiotr Jasiukajtis! delay slot 626*25c28e83SPiotr Jasiukajtis ld [%i1+4],%o5 627*25c28e83SPiotr Jasiukajtis fxtod %f20,%f20 ! scale by 2**1074 w/o trapping 628*25c28e83SPiotr Jasiukajtis st %f20,[%fp+tmp2] 629*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 630*25c28e83SPiotr Jasiukajtis orcc %l2,%o5,%g0 631*25c28e83SPiotr Jasiukajtis be,pn %icc,1f ! if x == 0 632*25c28e83SPiotr Jasiukajtis! delay slot 633*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 634*25c28e83SPiotr Jasiukajtis fpadd32s %f20,%f31,%f24 ! n = (ix + 0xc0194000) & 0xfff00000 635*25c28e83SPiotr Jasiukajtis fands %f24,%f28,%f24 636*25c28e83SPiotr Jasiukajtis fpsub32s %f20,%f24,%f20 ! u.l[0] -= n 637*25c28e83SPiotr Jasiukajtis ld [%fp+tmp2],%l2 638*25c28e83SPiotr Jasiukajtis ba,pt %icc,.cont2 639*25c28e83SPiotr Jasiukajtis! delay slot 640*25c28e83SPiotr Jasiukajtis fpsub32s %f24,%f29,%f24 ! n -= 0x43200000 641*25c28e83SPiotr Jasiukajtis1: 642*25c28e83SPiotr Jasiukajtis fdivs %f29,%f21,%f24 ! raise div-by-zero 643*25c28e83SPiotr Jasiukajtis ba,pt %icc,3f 644*25c28e83SPiotr Jasiukajtis! delay slot 645*25c28e83SPiotr Jasiukajtis st %f28,[%i3] ! store -inf 646*25c28e83SPiotr Jasiukajtis2: 647*25c28e83SPiotr Jasiukajtis sll %l2,1,%l2 ! lop off sign bit 648*25c28e83SPiotr Jasiukajtis add %i1,%i2,%i1 ! x += stridex 649*25c28e83SPiotr Jasiukajtis orcc %l2,%o5,%g0 650*25c28e83SPiotr Jasiukajtis be,pn %icc,1b ! if x == -0 651*25c28e83SPiotr Jasiukajtis! delay slot 652*25c28e83SPiotr Jasiukajtis add %i3,%i4,%i3 ! y += stridey 653*25c28e83SPiotr Jasiukajtis fabsd %f20,%f24 ! *y = (x + |x|) * inf 654*25c28e83SPiotr Jasiukajtis faddd %f20,%f24,%f20 655*25c28e83SPiotr Jasiukajtis fand %f28,%f50,%f24 656*25c28e83SPiotr Jasiukajtis fnegd %f24,%f24 657*25c28e83SPiotr Jasiukajtis fmuld %f20,%f24,%f20 658*25c28e83SPiotr Jasiukajtis st %f20,[%i3] 659*25c28e83SPiotr Jasiukajtis3: 660*25c28e83SPiotr Jasiukajtis addcc %i0,-1,%i0 661*25c28e83SPiotr Jasiukajtis ble,pn %icc,.endloop1 662*25c28e83SPiotr Jasiukajtis! delay slot 663*25c28e83SPiotr Jasiukajtis st %f21,[%i3+4] 664*25c28e83SPiotr Jasiukajtis ld [%i1],%l2 ! get next argument 665*25c28e83SPiotr Jasiukajtis ld [%i1],%f20 666*25c28e83SPiotr Jasiukajtis ba,pt %icc,.loop2 667*25c28e83SPiotr Jasiukajtis! delay slot 668*25c28e83SPiotr Jasiukajtis ld [%i1+4],%f21 669*25c28e83SPiotr Jasiukajtis 670*25c28e83SPiotr Jasiukajtis SET_SIZE(__vlog) 671*25c28e83SPiotr Jasiukajtis 672