1*25c28e83SPiotr Jasiukajtis/* 2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START 3*25c28e83SPiotr Jasiukajtis * 4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the 5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License"). 6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License. 7*25c28e83SPiotr Jasiukajtis * 8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing. 10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions 11*25c28e83SPiotr Jasiukajtis * and limitations under the License. 12*25c28e83SPiotr Jasiukajtis * 13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each 14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the 16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying 17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner] 18*25c28e83SPiotr Jasiukajtis * 19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END 20*25c28e83SPiotr Jasiukajtis */ 21*25c28e83SPiotr Jasiukajtis/* 22*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23*25c28e83SPiotr Jasiukajtis */ 24*25c28e83SPiotr Jasiukajtis/* 25*25c28e83SPiotr Jasiukajtis * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 26*25c28e83SPiotr Jasiukajtis * Use is subject to license terms. 27*25c28e83SPiotr Jasiukajtis */ 28*25c28e83SPiotr Jasiukajtis 29*25c28e83SPiotr Jasiukajtis .file "expm1.s" 30*25c28e83SPiotr Jasiukajtis 31*25c28e83SPiotr Jasiukajtis#include "libm.h" 32*25c28e83SPiotr JasiukajtisLIBM_ANSI_PRAGMA_WEAK(expm1,function) 33*25c28e83SPiotr Jasiukajtis 34*25c28e83SPiotr Jasiukajtis .data 35*25c28e83SPiotr Jasiukajtis .align 4 36*25c28e83SPiotr Jasiukajtis.mhundred: .float -100.0 37*25c28e83SPiotr Jasiukajtis 38*25c28e83SPiotr Jasiukajtis ENTRY(expm1) 39*25c28e83SPiotr Jasiukajtis movl 8(%esp),%ecx / ecx <-- hi_32(x) 40*25c28e83SPiotr Jasiukajtis andl $0x7fffffff,%ecx / ecx <-- hi_32(|x|) 41*25c28e83SPiotr Jasiukajtis cmpl $0x3fe62e42,%ecx / Is |x| < ln(2)? 42*25c28e83SPiotr Jasiukajtis jb .shortcut / If so, take a shortcut. 43*25c28e83SPiotr Jasiukajtis je .check_tail / |x| may be only slightly < ln(2) 44*25c28e83SPiotr Jasiukajtis cmpl $0x7ff00000,%ecx / hi_32(|x|) >= hi_32(INF)? 45*25c28e83SPiotr Jasiukajtis jae .not_finite / if so, x is not finite 46*25c28e83SPiotr Jasiukajtis.finite_non_special: / Here, ln(2) < |x| < INF 47*25c28e83SPiotr Jasiukajtis fldl 4(%esp) / push x 48*25c28e83SPiotr Jasiukajtis 49*25c28e83SPiotr Jasiukajtis subl $8,%esp / save RP and set round-to-64-bits 50*25c28e83SPiotr Jasiukajtis fstcw (%esp) 51*25c28e83SPiotr Jasiukajtis movw (%esp),%ax 52*25c28e83SPiotr Jasiukajtis movw %ax,4(%esp) 53*25c28e83SPiotr Jasiukajtis orw $0x0300,%ax 54*25c28e83SPiotr Jasiukajtis movw %ax,(%esp) 55*25c28e83SPiotr Jasiukajtis fldcw (%esp) 56*25c28e83SPiotr Jasiukajtis 57*25c28e83SPiotr Jasiukajtis fldl2e / push log2e }not for xtndd_dbl 58*25c28e83SPiotr Jasiukajtis fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl 59*25c28e83SPiotr Jasiukajtis fld %st(0) / duplicate stack top 60*25c28e83SPiotr Jasiukajtis frndint / [z],z 61*25c28e83SPiotr Jasiukajtis / [z] != 0, compute exp(x) and then subtract one to get expm1(x) 62*25c28e83SPiotr Jasiukajtis fxch / z,[z] 63*25c28e83SPiotr Jasiukajtis fsub %st(1),%st / z-[z],[z] 64*25c28e83SPiotr Jasiukajtis f2xm1 / 2**(z-[z])-1,[z] 65*25c28e83SPiotr Jasiukajtis / avoid spurious underflow when scaling to compute exp(x) 66*25c28e83SPiotr Jasiukajtis PIC_SETUP(1) 67*25c28e83SPiotr Jasiukajtis flds PIC_L(.mhundred) 68*25c28e83SPiotr Jasiukajtis PIC_WRAPUP 69*25c28e83SPiotr Jasiukajtis fucom %st(2) / if -100 !< [z], then use -100 70*25c28e83SPiotr Jasiukajtis fstsw %ax 71*25c28e83SPiotr Jasiukajtis sahf 72*25c28e83SPiotr Jasiukajtis jb .got_int_part 73*25c28e83SPiotr Jasiukajtis fxch %st(2) 74*25c28e83SPiotr Jasiukajtis.got_int_part: 75*25c28e83SPiotr Jasiukajtis fstp %st(0) / 2**(z-[z])-1,max([z],-100) 76*25c28e83SPiotr Jasiukajtis fld1 / 1,2**(z-[z])-1,max([z],-100) 77*25c28e83SPiotr Jasiukajtis faddp %st,%st(1) / 2**(z-[z]) ,max([z],-100) 78*25c28e83SPiotr Jasiukajtis fscale / exp(x) ,max([z],-100) 79*25c28e83SPiotr Jasiukajtis fld1 / 1,exp(x) ,max([z],-100) 80*25c28e83SPiotr Jasiukajtis fxch / exp(x),1 ,max([z],-100) 81*25c28e83SPiotr Jasiukajtis fsubp %st,%st(1) / exp(x)-1 ,max([z],-100) 82*25c28e83SPiotr Jasiukajtis fstp %st(1) 83*25c28e83SPiotr Jasiukajtis 84*25c28e83SPiotr Jasiukajtis fstcw (%esp) / restore old RP 85*25c28e83SPiotr Jasiukajtis movw (%esp),%dx 86*25c28e83SPiotr Jasiukajtis andw $0xfcff,%dx 87*25c28e83SPiotr Jasiukajtis movw 4(%esp),%cx 88*25c28e83SPiotr Jasiukajtis andw $0x0300,%cx 89*25c28e83SPiotr Jasiukajtis orw %dx,%cx 90*25c28e83SPiotr Jasiukajtis movw %cx,(%esp) 91*25c28e83SPiotr Jasiukajtis fldcw (%esp) 92*25c28e83SPiotr Jasiukajtis add $8,%esp 93*25c28e83SPiotr Jasiukajtis 94*25c28e83SPiotr Jasiukajtis ret 95*25c28e83SPiotr Jasiukajtis 96*25c28e83SPiotr Jasiukajtis.check_tail: 97*25c28e83SPiotr Jasiukajtis movl 4(%esp),%edx / edx <-- lo_32(x) 98*25c28e83SPiotr Jasiukajtis cmpl $0xfefa39ef,%edx / Is |x| slightly < ln(2)? 99*25c28e83SPiotr Jasiukajtis ja .finite_non_special / branch if |x| slightly > ln(2) 100*25c28e83SPiotr Jasiukajtis.shortcut: 101*25c28e83SPiotr Jasiukajtis / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1, 102*25c28e83SPiotr Jasiukajtis / whence z is in f2xm1's domain. 103*25c28e83SPiotr Jasiukajtis fldl 4(%esp) / push x 104*25c28e83SPiotr Jasiukajtis fldl2e / push log2e }not for xtndd_dbl 105*25c28e83SPiotr Jasiukajtis fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl 106*25c28e83SPiotr Jasiukajtis f2xm1 / 2**(x*log2(e))-1 = e**x - 1 107*25c28e83SPiotr Jasiukajtis ret 108*25c28e83SPiotr Jasiukajtis 109*25c28e83SPiotr Jasiukajtis.not_finite: 110*25c28e83SPiotr Jasiukajtis / Here, flags still have settings from execution of 111*25c28e83SPiotr Jasiukajtis / cmpl $0x7ff00000,%ecx / hi_32(|x|) > hi_32(INF)? 112*25c28e83SPiotr Jasiukajtis ja .NaN_or_pinf / if not, x may be +/- INF 113*25c28e83SPiotr Jasiukajtis movl 4(%esp),%edx / edx <-- lo_32(x) 114*25c28e83SPiotr Jasiukajtis cmpl $0,%edx / lo_32(x) = 0? 115*25c28e83SPiotr Jasiukajtis jne .NaN_or_pinf / if not, x is NaN 116*25c28e83SPiotr Jasiukajtis movl 8(%esp),%eax / eax <-- hi_32(x) 117*25c28e83SPiotr Jasiukajtis andl $0x80000000,%eax / here, x is infinite, but +/-? 118*25c28e83SPiotr Jasiukajtis jz .NaN_or_pinf / branch if x = +INF 119*25c28e83SPiotr Jasiukajtis fld1 / Here, x = -inf, so return -1 120*25c28e83SPiotr Jasiukajtis fchs 121*25c28e83SPiotr Jasiukajtis ret 122*25c28e83SPiotr Jasiukajtis 123*25c28e83SPiotr Jasiukajtis.NaN_or_pinf: 124*25c28e83SPiotr Jasiukajtis / Here, x = NaN or +inf, so load x and return immediately. 125*25c28e83SPiotr Jasiukajtis fldl 4(%esp) 126*25c28e83SPiotr Jasiukajtis fwait 127*25c28e83SPiotr Jasiukajtis ret 128*25c28e83SPiotr Jasiukajtis .align 4 129*25c28e83SPiotr Jasiukajtis SET_SIZE(expm1) 130