1*0b57cec5SDimitry Andric//===----------------------Hexagon builtin routine ------------------------===// 2*0b57cec5SDimitry Andric// 3*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric// 7*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric 9*0b57cec5SDimitry Andric// Double Precision square root 10*0b57cec5SDimitry Andric 11*0b57cec5SDimitry Andric#define EXP r28 12*0b57cec5SDimitry Andric 13*0b57cec5SDimitry Andric#define A r1:0 14*0b57cec5SDimitry Andric#define AH r1 15*0b57cec5SDimitry Andric#define AL r0 16*0b57cec5SDimitry Andric 17*0b57cec5SDimitry Andric#define SFSH r3:2 18*0b57cec5SDimitry Andric#define SF_S r3 19*0b57cec5SDimitry Andric#define SF_H r2 20*0b57cec5SDimitry Andric 21*0b57cec5SDimitry Andric#define SFHALF_SONE r5:4 22*0b57cec5SDimitry Andric#define S_ONE r4 23*0b57cec5SDimitry Andric#define SFHALF r5 24*0b57cec5SDimitry Andric#define SF_D r6 25*0b57cec5SDimitry Andric#define SF_E r7 26*0b57cec5SDimitry Andric#define RECIPEST r8 27*0b57cec5SDimitry Andric#define SFRAD r9 28*0b57cec5SDimitry Andric 29*0b57cec5SDimitry Andric#define FRACRAD r11:10 30*0b57cec5SDimitry Andric#define FRACRADH r11 31*0b57cec5SDimitry Andric#define FRACRADL r10 32*0b57cec5SDimitry Andric 33*0b57cec5SDimitry Andric#define ROOT r13:12 34*0b57cec5SDimitry Andric#define ROOTHI r13 35*0b57cec5SDimitry Andric#define ROOTLO r12 36*0b57cec5SDimitry Andric 37*0b57cec5SDimitry Andric#define PROD r15:14 38*0b57cec5SDimitry Andric#define PRODHI r15 39*0b57cec5SDimitry Andric#define PRODLO r14 40*0b57cec5SDimitry Andric 41*0b57cec5SDimitry Andric#define P_TMP p0 42*0b57cec5SDimitry Andric#define P_EXP1 p1 43*0b57cec5SDimitry Andric#define NORMAL p2 44*0b57cec5SDimitry Andric 45*0b57cec5SDimitry Andric#define SF_EXPBITS 8 46*0b57cec5SDimitry Andric#define SF_MANTBITS 23 47*0b57cec5SDimitry Andric 48*0b57cec5SDimitry Andric#define DF_EXPBITS 11 49*0b57cec5SDimitry Andric#define DF_MANTBITS 52 50*0b57cec5SDimitry Andric 51*0b57cec5SDimitry Andric#define DF_BIAS 0x3ff 52*0b57cec5SDimitry Andric 53*0b57cec5SDimitry Andric#define DFCLASS_ZERO 0x01 54*0b57cec5SDimitry Andric#define DFCLASS_NORMAL 0x02 55*0b57cec5SDimitry Andric#define DFCLASS_DENORMAL 0x02 56*0b57cec5SDimitry Andric#define DFCLASS_INFINITE 0x08 57*0b57cec5SDimitry Andric#define DFCLASS_NAN 0x10 58*0b57cec5SDimitry Andric 59*0b57cec5SDimitry Andric#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG; .type __qdsp_##TAG,@function 60*0b57cec5SDimitry Andric#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG; .type __hexagon_fast_##TAG,@function 61*0b57cec5SDimitry Andric#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG; .type __hexagon_fast2_##TAG,@function 62*0b57cec5SDimitry Andric#define END(TAG) .size TAG,.-TAG 63*0b57cec5SDimitry Andric 64*0b57cec5SDimitry Andric .text 65*0b57cec5SDimitry Andric .global __hexagon_sqrtdf2 66*0b57cec5SDimitry Andric .type __hexagon_sqrtdf2,@function 67*0b57cec5SDimitry Andric .global __hexagon_sqrt 68*0b57cec5SDimitry Andric .type __hexagon_sqrt,@function 69*0b57cec5SDimitry Andric Q6_ALIAS(sqrtdf2) 70*0b57cec5SDimitry Andric Q6_ALIAS(sqrt) 71*0b57cec5SDimitry Andric FAST_ALIAS(sqrtdf2) 72*0b57cec5SDimitry Andric FAST_ALIAS(sqrt) 73*0b57cec5SDimitry Andric FAST2_ALIAS(sqrtdf2) 74*0b57cec5SDimitry Andric FAST2_ALIAS(sqrt) 75*0b57cec5SDimitry Andric .type sqrt,@function 76*0b57cec5SDimitry Andric .p2align 5 77*0b57cec5SDimitry Andric__hexagon_sqrtdf2: 78*0b57cec5SDimitry Andric__hexagon_sqrt: 79*0b57cec5SDimitry Andric { 80*0b57cec5SDimitry Andric PROD = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS) 81*0b57cec5SDimitry Andric EXP = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32) 82*0b57cec5SDimitry Andric SFHALF_SONE = combine(##0x3f000004,#1) 83*0b57cec5SDimitry Andric } 84*0b57cec5SDimitry Andric { 85*0b57cec5SDimitry Andric NORMAL = dfclass(A,#DFCLASS_NORMAL) // Is it normal 86*0b57cec5SDimitry Andric NORMAL = cmp.gt(AH,#-1) // and positive? 87*0b57cec5SDimitry Andric if (!NORMAL.new) jump:nt .Lsqrt_abnormal 88*0b57cec5SDimitry Andric SFRAD = or(SFHALF,PRODLO) 89*0b57cec5SDimitry Andric } 90*0b57cec5SDimitry Andric#undef NORMAL 91*0b57cec5SDimitry Andric.Ldenormal_restart: 92*0b57cec5SDimitry Andric { 93*0b57cec5SDimitry Andric FRACRAD = A 94*0b57cec5SDimitry Andric SF_E,P_TMP = sfinvsqrta(SFRAD) 95*0b57cec5SDimitry Andric SFHALF = and(SFHALF,#-16) 96*0b57cec5SDimitry Andric SFSH = #0 97*0b57cec5SDimitry Andric } 98*0b57cec5SDimitry Andric#undef A 99*0b57cec5SDimitry Andric#undef AH 100*0b57cec5SDimitry Andric#undef AL 101*0b57cec5SDimitry Andric#define ERROR r1:0 102*0b57cec5SDimitry Andric#define ERRORHI r1 103*0b57cec5SDimitry Andric#define ERRORLO r0 104*0b57cec5SDimitry Andric // SF_E : reciprocal square root 105*0b57cec5SDimitry Andric // SF_H : half rsqrt 106*0b57cec5SDimitry Andric // sf_S : square root 107*0b57cec5SDimitry Andric // SF_D : error term 108*0b57cec5SDimitry Andric // SFHALF: 0.5 109*0b57cec5SDimitry Andric { 110*0b57cec5SDimitry Andric SF_S += sfmpy(SF_E,SFRAD):lib // s0: root 111*0b57cec5SDimitry Andric SF_H += sfmpy(SF_E,SFHALF):lib // h0: 0.5*y0. Could also decrement exponent... 112*0b57cec5SDimitry Andric SF_D = SFHALF 113*0b57cec5SDimitry Andric#undef SFRAD 114*0b57cec5SDimitry Andric#define SHIFTAMT r9 115*0b57cec5SDimitry Andric SHIFTAMT = and(EXP,#1) 116*0b57cec5SDimitry Andric } 117*0b57cec5SDimitry Andric { 118*0b57cec5SDimitry Andric SF_D -= sfmpy(SF_S,SF_H):lib // d0: 0.5-H*S = 0.5-0.5*~1 119*0b57cec5SDimitry Andric FRACRADH = insert(S_ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) // replace upper bits with hidden 120*0b57cec5SDimitry Andric P_EXP1 = cmp.gtu(SHIFTAMT,#0) 121*0b57cec5SDimitry Andric } 122*0b57cec5SDimitry Andric { 123*0b57cec5SDimitry Andric SF_S += sfmpy(SF_S,SF_D):lib // s1: refine sqrt 124*0b57cec5SDimitry Andric SF_H += sfmpy(SF_H,SF_D):lib // h1: refine half-recip 125*0b57cec5SDimitry Andric SF_D = SFHALF 126*0b57cec5SDimitry Andric SHIFTAMT = mux(P_EXP1,#8,#9) 127*0b57cec5SDimitry Andric } 128*0b57cec5SDimitry Andric { 129*0b57cec5SDimitry Andric SF_D -= sfmpy(SF_S,SF_H):lib // d1: error term 130*0b57cec5SDimitry Andric FRACRAD = asl(FRACRAD,SHIFTAMT) // Move fracrad bits to right place 131*0b57cec5SDimitry Andric SHIFTAMT = mux(P_EXP1,#3,#2) 132*0b57cec5SDimitry Andric } 133*0b57cec5SDimitry Andric { 134*0b57cec5SDimitry Andric SF_H += sfmpy(SF_H,SF_D):lib // d2: rsqrt 135*0b57cec5SDimitry Andric // cool trick: half of 1/sqrt(x) has same mantissa as 1/sqrt(x). 136*0b57cec5SDimitry Andric PROD = asl(FRACRAD,SHIFTAMT) // fracrad<<(2+exp1) 137*0b57cec5SDimitry Andric } 138*0b57cec5SDimitry Andric { 139*0b57cec5SDimitry Andric SF_H = and(SF_H,##0x007fffff) 140*0b57cec5SDimitry Andric } 141*0b57cec5SDimitry Andric { 142*0b57cec5SDimitry Andric SF_H = add(SF_H,##0x00800000 - 3) 143*0b57cec5SDimitry Andric SHIFTAMT = mux(P_EXP1,#7,#8) 144*0b57cec5SDimitry Andric } 145*0b57cec5SDimitry Andric { 146*0b57cec5SDimitry Andric RECIPEST = asl(SF_H,SHIFTAMT) 147*0b57cec5SDimitry Andric SHIFTAMT = mux(P_EXP1,#15-(1+1),#15-(1+0)) 148*0b57cec5SDimitry Andric } 149*0b57cec5SDimitry Andric { 150*0b57cec5SDimitry Andric ROOT = mpyu(RECIPEST,PRODHI) // root = mpyu_full(recipest,hi(fracrad<<(2+exp1))) 151*0b57cec5SDimitry Andric } 152*0b57cec5SDimitry Andric 153*0b57cec5SDimitry Andric#undef SFSH // r3:2 154*0b57cec5SDimitry Andric#undef SF_H // r2 155*0b57cec5SDimitry Andric#undef SF_S // r3 156*0b57cec5SDimitry Andric#undef S_ONE // r4 157*0b57cec5SDimitry Andric#undef SFHALF // r5 158*0b57cec5SDimitry Andric#undef SFHALF_SONE // r5:4 159*0b57cec5SDimitry Andric#undef SF_D // r6 160*0b57cec5SDimitry Andric#undef SF_E // r7 161*0b57cec5SDimitry Andric 162*0b57cec5SDimitry Andric#define HL r3:2 163*0b57cec5SDimitry Andric#define LL r5:4 164*0b57cec5SDimitry Andric#define HH r7:6 165*0b57cec5SDimitry Andric 166*0b57cec5SDimitry Andric#undef P_EXP1 167*0b57cec5SDimitry Andric#define P_CARRY0 p1 168*0b57cec5SDimitry Andric#define P_CARRY1 p2 169*0b57cec5SDimitry Andric#define P_CARRY2 p3 170*0b57cec5SDimitry Andric 171*0b57cec5SDimitry Andric // Iteration 0 172*0b57cec5SDimitry Andric // Maybe we can save a cycle by starting with ERROR=asl(fracrad), then as we multiply 173*0b57cec5SDimitry Andric // We can shift and subtract instead of shift and add? 174*0b57cec5SDimitry Andric { 175*0b57cec5SDimitry Andric ERROR = asl(FRACRAD,#15) 176*0b57cec5SDimitry Andric PROD = mpyu(ROOTHI,ROOTHI) 177*0b57cec5SDimitry Andric P_CARRY0 = cmp.eq(r0,r0) 178*0b57cec5SDimitry Andric } 179*0b57cec5SDimitry Andric { 180*0b57cec5SDimitry Andric ERROR -= asl(PROD,#15) 181*0b57cec5SDimitry Andric PROD = mpyu(ROOTHI,ROOTLO) 182*0b57cec5SDimitry Andric P_CARRY1 = cmp.eq(r0,r0) 183*0b57cec5SDimitry Andric } 184*0b57cec5SDimitry Andric { 185*0b57cec5SDimitry Andric ERROR -= lsr(PROD,#16) 186*0b57cec5SDimitry Andric P_CARRY2 = cmp.eq(r0,r0) 187*0b57cec5SDimitry Andric } 188*0b57cec5SDimitry Andric { 189*0b57cec5SDimitry Andric ERROR = mpyu(ERRORHI,RECIPEST) 190*0b57cec5SDimitry Andric } 191*0b57cec5SDimitry Andric { 192*0b57cec5SDimitry Andric ROOT += lsr(ERROR,SHIFTAMT) 193*0b57cec5SDimitry Andric SHIFTAMT = add(SHIFTAMT,#16) 194*0b57cec5SDimitry Andric ERROR = asl(FRACRAD,#31) // for next iter 195*0b57cec5SDimitry Andric } 196*0b57cec5SDimitry Andric // Iteration 1 197*0b57cec5SDimitry Andric { 198*0b57cec5SDimitry Andric PROD = mpyu(ROOTHI,ROOTHI) 199*0b57cec5SDimitry Andric ERROR -= mpyu(ROOTHI,ROOTLO) // amount is 31, no shift needed 200*0b57cec5SDimitry Andric } 201*0b57cec5SDimitry Andric { 202*0b57cec5SDimitry Andric ERROR -= asl(PROD,#31) 203*0b57cec5SDimitry Andric PROD = mpyu(ROOTLO,ROOTLO) 204*0b57cec5SDimitry Andric } 205*0b57cec5SDimitry Andric { 206*0b57cec5SDimitry Andric ERROR -= lsr(PROD,#33) 207*0b57cec5SDimitry Andric } 208*0b57cec5SDimitry Andric { 209*0b57cec5SDimitry Andric ERROR = mpyu(ERRORHI,RECIPEST) 210*0b57cec5SDimitry Andric } 211*0b57cec5SDimitry Andric { 212*0b57cec5SDimitry Andric ROOT += lsr(ERROR,SHIFTAMT) 213*0b57cec5SDimitry Andric SHIFTAMT = add(SHIFTAMT,#16) 214*0b57cec5SDimitry Andric ERROR = asl(FRACRAD,#47) // for next iter 215*0b57cec5SDimitry Andric } 216*0b57cec5SDimitry Andric // Iteration 2 217*0b57cec5SDimitry Andric { 218*0b57cec5SDimitry Andric PROD = mpyu(ROOTHI,ROOTHI) 219*0b57cec5SDimitry Andric } 220*0b57cec5SDimitry Andric { 221*0b57cec5SDimitry Andric ERROR -= asl(PROD,#47) 222*0b57cec5SDimitry Andric PROD = mpyu(ROOTHI,ROOTLO) 223*0b57cec5SDimitry Andric } 224*0b57cec5SDimitry Andric { 225*0b57cec5SDimitry Andric ERROR -= asl(PROD,#16) // bidir shr 31-47 226*0b57cec5SDimitry Andric PROD = mpyu(ROOTLO,ROOTLO) 227*0b57cec5SDimitry Andric } 228*0b57cec5SDimitry Andric { 229*0b57cec5SDimitry Andric ERROR -= lsr(PROD,#17) // 64-47 230*0b57cec5SDimitry Andric } 231*0b57cec5SDimitry Andric { 232*0b57cec5SDimitry Andric ERROR = mpyu(ERRORHI,RECIPEST) 233*0b57cec5SDimitry Andric } 234*0b57cec5SDimitry Andric { 235*0b57cec5SDimitry Andric ROOT += lsr(ERROR,SHIFTAMT) 236*0b57cec5SDimitry Andric } 237*0b57cec5SDimitry Andric#undef ERROR 238*0b57cec5SDimitry Andric#undef PROD 239*0b57cec5SDimitry Andric#undef PRODHI 240*0b57cec5SDimitry Andric#undef PRODLO 241*0b57cec5SDimitry Andric#define REM_HI r15:14 242*0b57cec5SDimitry Andric#define REM_HI_HI r15 243*0b57cec5SDimitry Andric#define REM_LO r1:0 244*0b57cec5SDimitry Andric#undef RECIPEST 245*0b57cec5SDimitry Andric#undef SHIFTAMT 246*0b57cec5SDimitry Andric#define TWOROOT_LO r9:8 247*0b57cec5SDimitry Andric // Adjust Root 248*0b57cec5SDimitry Andric { 249*0b57cec5SDimitry Andric HL = mpyu(ROOTHI,ROOTLO) 250*0b57cec5SDimitry Andric LL = mpyu(ROOTLO,ROOTLO) 251*0b57cec5SDimitry Andric REM_HI = #0 252*0b57cec5SDimitry Andric REM_LO = #0 253*0b57cec5SDimitry Andric } 254*0b57cec5SDimitry Andric { 255*0b57cec5SDimitry Andric HL += lsr(LL,#33) 256*0b57cec5SDimitry Andric LL += asl(HL,#33) 257*0b57cec5SDimitry Andric P_CARRY0 = cmp.eq(r0,r0) 258*0b57cec5SDimitry Andric } 259*0b57cec5SDimitry Andric { 260*0b57cec5SDimitry Andric HH = mpyu(ROOTHI,ROOTHI) 261*0b57cec5SDimitry Andric REM_LO = sub(REM_LO,LL,P_CARRY0):carry 262*0b57cec5SDimitry Andric TWOROOT_LO = #1 263*0b57cec5SDimitry Andric } 264*0b57cec5SDimitry Andric { 265*0b57cec5SDimitry Andric HH += lsr(HL,#31) 266*0b57cec5SDimitry Andric TWOROOT_LO += asl(ROOT,#1) 267*0b57cec5SDimitry Andric } 268*0b57cec5SDimitry Andric#undef HL 269*0b57cec5SDimitry Andric#undef LL 270*0b57cec5SDimitry Andric#define REM_HI_TMP r3:2 271*0b57cec5SDimitry Andric#define REM_HI_TMP_HI r3 272*0b57cec5SDimitry Andric#define REM_LO_TMP r5:4 273*0b57cec5SDimitry Andric { 274*0b57cec5SDimitry Andric REM_HI = sub(FRACRAD,HH,P_CARRY0):carry 275*0b57cec5SDimitry Andric REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY1):carry 276*0b57cec5SDimitry Andric#undef FRACRAD 277*0b57cec5SDimitry Andric#undef HH 278*0b57cec5SDimitry Andric#define ZERO r11:10 279*0b57cec5SDimitry Andric#define ONE r7:6 280*0b57cec5SDimitry Andric ONE = #1 281*0b57cec5SDimitry Andric ZERO = #0 282*0b57cec5SDimitry Andric } 283*0b57cec5SDimitry Andric { 284*0b57cec5SDimitry Andric REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY1):carry 285*0b57cec5SDimitry Andric ONE = add(ROOT,ONE) 286*0b57cec5SDimitry Andric EXP = add(EXP,#-DF_BIAS) // subtract bias --> signed exp 287*0b57cec5SDimitry Andric } 288*0b57cec5SDimitry Andric { 289*0b57cec5SDimitry Andric // If carry set, no borrow: result was still positive 290*0b57cec5SDimitry Andric if (P_CARRY1) ROOT = ONE 291*0b57cec5SDimitry Andric if (P_CARRY1) REM_LO = REM_LO_TMP 292*0b57cec5SDimitry Andric if (P_CARRY1) REM_HI = REM_HI_TMP 293*0b57cec5SDimitry Andric } 294*0b57cec5SDimitry Andric { 295*0b57cec5SDimitry Andric REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY2):carry 296*0b57cec5SDimitry Andric ONE = #1 297*0b57cec5SDimitry Andric EXP = asr(EXP,#1) // divide signed exp by 2 298*0b57cec5SDimitry Andric } 299*0b57cec5SDimitry Andric { 300*0b57cec5SDimitry Andric REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY2):carry 301*0b57cec5SDimitry Andric ONE = add(ROOT,ONE) 302*0b57cec5SDimitry Andric } 303*0b57cec5SDimitry Andric { 304*0b57cec5SDimitry Andric if (P_CARRY2) ROOT = ONE 305*0b57cec5SDimitry Andric if (P_CARRY2) REM_LO = REM_LO_TMP 306*0b57cec5SDimitry Andric // since tworoot <= 2^32, remhi must be zero 307*0b57cec5SDimitry Andric#undef REM_HI_TMP 308*0b57cec5SDimitry Andric#undef REM_HI_TMP_HI 309*0b57cec5SDimitry Andric#define S_ONE r2 310*0b57cec5SDimitry Andric#define ADJ r3 311*0b57cec5SDimitry Andric S_ONE = #1 312*0b57cec5SDimitry Andric } 313*0b57cec5SDimitry Andric { 314*0b57cec5SDimitry Andric P_TMP = cmp.eq(REM_LO,ZERO) // is the low part zero 315*0b57cec5SDimitry Andric if (!P_TMP.new) ROOTLO = or(ROOTLO,S_ONE) // if so, it's exact... hopefully 316*0b57cec5SDimitry Andric ADJ = cl0(ROOT) 317*0b57cec5SDimitry Andric EXP = add(EXP,#-63) 318*0b57cec5SDimitry Andric } 319*0b57cec5SDimitry Andric#undef REM_LO 320*0b57cec5SDimitry Andric#define RET r1:0 321*0b57cec5SDimitry Andric#define RETHI r1 322*0b57cec5SDimitry Andric { 323*0b57cec5SDimitry Andric RET = convert_ud2df(ROOT) // set up mantissa, maybe set inexact flag 324*0b57cec5SDimitry Andric EXP = add(EXP,ADJ) // add back bias 325*0b57cec5SDimitry Andric } 326*0b57cec5SDimitry Andric { 327*0b57cec5SDimitry Andric RETHI += asl(EXP,#DF_MANTBITS-32) // add exponent adjust 328*0b57cec5SDimitry Andric jumpr r31 329*0b57cec5SDimitry Andric } 330*0b57cec5SDimitry Andric#undef REM_LO_TMP 331*0b57cec5SDimitry Andric#undef REM_HI_TMP 332*0b57cec5SDimitry Andric#undef REM_HI_TMP_HI 333*0b57cec5SDimitry Andric#undef REM_LO 334*0b57cec5SDimitry Andric#undef REM_HI 335*0b57cec5SDimitry Andric#undef TWOROOT_LO 336*0b57cec5SDimitry Andric 337*0b57cec5SDimitry Andric#undef RET 338*0b57cec5SDimitry Andric#define A r1:0 339*0b57cec5SDimitry Andric#define AH r1 340*0b57cec5SDimitry Andric#define AL r1 341*0b57cec5SDimitry Andric#undef S_ONE 342*0b57cec5SDimitry Andric#define TMP r3:2 343*0b57cec5SDimitry Andric#define TMPHI r3 344*0b57cec5SDimitry Andric#define TMPLO r2 345*0b57cec5SDimitry Andric#undef P_CARRY0 346*0b57cec5SDimitry Andric#define P_NEG p1 347*0b57cec5SDimitry Andric 348*0b57cec5SDimitry Andric 349*0b57cec5SDimitry Andric#define SFHALF r5 350*0b57cec5SDimitry Andric#define SFRAD r9 351*0b57cec5SDimitry Andric.Lsqrt_abnormal: 352*0b57cec5SDimitry Andric { 353*0b57cec5SDimitry Andric P_TMP = dfclass(A,#DFCLASS_ZERO) // zero? 354*0b57cec5SDimitry Andric if (P_TMP.new) jumpr:t r31 355*0b57cec5SDimitry Andric } 356*0b57cec5SDimitry Andric { 357*0b57cec5SDimitry Andric P_TMP = dfclass(A,#DFCLASS_NAN) 358*0b57cec5SDimitry Andric if (P_TMP.new) jump:nt .Lsqrt_nan 359*0b57cec5SDimitry Andric } 360*0b57cec5SDimitry Andric { 361*0b57cec5SDimitry Andric P_TMP = cmp.gt(AH,#-1) 362*0b57cec5SDimitry Andric if (!P_TMP.new) jump:nt .Lsqrt_invalid_neg 363*0b57cec5SDimitry Andric if (!P_TMP.new) EXP = ##0x7F800001 // sNaN 364*0b57cec5SDimitry Andric } 365*0b57cec5SDimitry Andric { 366*0b57cec5SDimitry Andric P_TMP = dfclass(A,#DFCLASS_INFINITE) 367*0b57cec5SDimitry Andric if (P_TMP.new) jumpr:nt r31 368*0b57cec5SDimitry Andric } 369*0b57cec5SDimitry Andric // If we got here, we're denormal 370*0b57cec5SDimitry Andric // prepare to restart 371*0b57cec5SDimitry Andric { 372*0b57cec5SDimitry Andric A = extractu(A,#DF_MANTBITS,#0) // Extract mantissa 373*0b57cec5SDimitry Andric } 374*0b57cec5SDimitry Andric { 375*0b57cec5SDimitry Andric EXP = add(clb(A),#-DF_EXPBITS) // how much to normalize? 376*0b57cec5SDimitry Andric } 377*0b57cec5SDimitry Andric { 378*0b57cec5SDimitry Andric A = asl(A,EXP) // Shift mantissa 379*0b57cec5SDimitry Andric EXP = sub(#1,EXP) // Form exponent 380*0b57cec5SDimitry Andric } 381*0b57cec5SDimitry Andric { 382*0b57cec5SDimitry Andric AH = insert(EXP,#1,#DF_MANTBITS-32) // insert lsb of exponent 383*0b57cec5SDimitry Andric } 384*0b57cec5SDimitry Andric { 385*0b57cec5SDimitry Andric TMP = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS) // get sf value (mant+exp1) 386*0b57cec5SDimitry Andric SFHALF = ##0x3f000004 // form half constant 387*0b57cec5SDimitry Andric } 388*0b57cec5SDimitry Andric { 389*0b57cec5SDimitry Andric SFRAD = or(SFHALF,TMPLO) // form sf value 390*0b57cec5SDimitry Andric SFHALF = and(SFHALF,#-16) 391*0b57cec5SDimitry Andric jump .Ldenormal_restart // restart 392*0b57cec5SDimitry Andric } 393*0b57cec5SDimitry Andric.Lsqrt_nan: 394*0b57cec5SDimitry Andric { 395*0b57cec5SDimitry Andric EXP = convert_df2sf(A) // if sNaN, get invalid 396*0b57cec5SDimitry Andric A = #-1 // qNaN 397*0b57cec5SDimitry Andric jumpr r31 398*0b57cec5SDimitry Andric } 399*0b57cec5SDimitry Andric.Lsqrt_invalid_neg: 400*0b57cec5SDimitry Andric { 401*0b57cec5SDimitry Andric A = convert_sf2df(EXP) // Invalid,NaNval 402*0b57cec5SDimitry Andric jumpr r31 403*0b57cec5SDimitry Andric } 404*0b57cec5SDimitry AndricEND(__hexagon_sqrt) 405*0b57cec5SDimitry AndricEND(__hexagon_sqrtdf2) 406