1//===----------------------Hexagon builtin routine ------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9// Double Precision square root 10 11#define EXP r28 12 13#define A r1:0 14#define AH r1 15#define AL r0 16 17#define SFSH r3:2 18#define SF_S r3 19#define SF_H r2 20 21#define SFHALF_SONE r5:4 22#define S_ONE r4 23#define SFHALF r5 24#define SF_D r6 25#define SF_E r7 26#define RECIPEST r8 27#define SFRAD r9 28 29#define FRACRAD r11:10 30#define FRACRADH r11 31#define FRACRADL r10 32 33#define ROOT r13:12 34#define ROOTHI r13 35#define ROOTLO r12 36 37#define PROD r15:14 38#define PRODHI r15 39#define PRODLO r14 40 41#define P_TMP p0 42#define P_EXP1 p1 43#define NORMAL p2 44 45#define SF_EXPBITS 8 46#define SF_MANTBITS 23 47 48#define DF_EXPBITS 11 49#define DF_MANTBITS 52 50 51#define DF_BIAS 0x3ff 52 53#define DFCLASS_ZERO 0x01 54#define DFCLASS_NORMAL 0x02 55#define DFCLASS_DENORMAL 0x02 56#define DFCLASS_INFINITE 0x08 57#define DFCLASS_NAN 0x10 58 59#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG; .type __qdsp_##TAG,@function 60#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG; .type __hexagon_fast_##TAG,@function 61#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG; .type __hexagon_fast2_##TAG,@function 62#define END(TAG) .size TAG,.-TAG 63 64 .text 65 .global __hexagon_sqrtdf2 66 .type __hexagon_sqrtdf2,@function 67 .global __hexagon_sqrt 68 .type __hexagon_sqrt,@function 69 Q6_ALIAS(sqrtdf2) 70 Q6_ALIAS(sqrt) 71 FAST_ALIAS(sqrtdf2) 72 FAST_ALIAS(sqrt) 73 FAST2_ALIAS(sqrtdf2) 74 FAST2_ALIAS(sqrt) 75 .type sqrt,@function 76 .p2align 5 77__hexagon_sqrtdf2: 78__hexagon_sqrt: 79 { 80 PROD = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS) 81 EXP = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32) 82 SFHALF_SONE = combine(##0x3f000004,#1) 83 } 84 { 85 NORMAL = dfclass(A,#DFCLASS_NORMAL) // Is it normal 86 NORMAL = cmp.gt(AH,#-1) // and positive? 87 if (!NORMAL.new) jump:nt .Lsqrt_abnormal 88 SFRAD = or(SFHALF,PRODLO) 89 } 90#undef NORMAL 91.Ldenormal_restart: 92 { 93 FRACRAD = A 94 SF_E,P_TMP = sfinvsqrta(SFRAD) 95 SFHALF = and(SFHALF,#-16) 96 SFSH = #0 97 } 98#undef A 99#undef AH 100#undef AL 101#define ERROR r1:0 102#define ERRORHI r1 103#define ERRORLO r0 104 // SF_E : reciprocal square root 105 // SF_H : half rsqrt 106 // sf_S : square root 107 // SF_D : error term 108 // SFHALF: 0.5 109 { 110 SF_S += sfmpy(SF_E,SFRAD):lib // s0: root 111 SF_H += sfmpy(SF_E,SFHALF):lib // h0: 0.5*y0. Could also decrement exponent... 112 SF_D = SFHALF 113#undef SFRAD 114#define SHIFTAMT r9 115 SHIFTAMT = and(EXP,#1) 116 } 117 { 118 SF_D -= sfmpy(SF_S,SF_H):lib // d0: 0.5-H*S = 0.5-0.5*~1 119 FRACRADH = insert(S_ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) // replace upper bits with hidden 120 P_EXP1 = cmp.gtu(SHIFTAMT,#0) 121 } 122 { 123 SF_S += sfmpy(SF_S,SF_D):lib // s1: refine sqrt 124 SF_H += sfmpy(SF_H,SF_D):lib // h1: refine half-recip 125 SF_D = SFHALF 126 SHIFTAMT = mux(P_EXP1,#8,#9) 127 } 128 { 129 SF_D -= sfmpy(SF_S,SF_H):lib // d1: error term 130 FRACRAD = asl(FRACRAD,SHIFTAMT) // Move fracrad bits to right place 131 SHIFTAMT = mux(P_EXP1,#3,#2) 132 } 133 { 134 SF_H += sfmpy(SF_H,SF_D):lib // d2: rsqrt 135 // cool trick: half of 1/sqrt(x) has same mantissa as 1/sqrt(x). 136 PROD = asl(FRACRAD,SHIFTAMT) // fracrad<<(2+exp1) 137 } 138 { 139 SF_H = and(SF_H,##0x007fffff) 140 } 141 { 142 SF_H = add(SF_H,##0x00800000 - 3) 143 SHIFTAMT = mux(P_EXP1,#7,#8) 144 } 145 { 146 RECIPEST = asl(SF_H,SHIFTAMT) 147 SHIFTAMT = mux(P_EXP1,#15-(1+1),#15-(1+0)) 148 } 149 { 150 ROOT = mpyu(RECIPEST,PRODHI) // root = mpyu_full(recipest,hi(fracrad<<(2+exp1))) 151 } 152 153#undef SFSH // r3:2 154#undef SF_H // r2 155#undef SF_S // r3 156#undef S_ONE // r4 157#undef SFHALF // r5 158#undef SFHALF_SONE // r5:4 159#undef SF_D // r6 160#undef SF_E // r7 161 162#define HL r3:2 163#define LL r5:4 164#define HH r7:6 165 166#undef P_EXP1 167#define P_CARRY0 p1 168#define P_CARRY1 p2 169#define P_CARRY2 p3 170 171 // Iteration 0 172 // Maybe we can save a cycle by starting with ERROR=asl(fracrad), then as we multiply 173 // We can shift and subtract instead of shift and add? 174 { 175 ERROR = asl(FRACRAD,#15) 176 PROD = mpyu(ROOTHI,ROOTHI) 177 P_CARRY0 = cmp.eq(r0,r0) 178 } 179 { 180 ERROR -= asl(PROD,#15) 181 PROD = mpyu(ROOTHI,ROOTLO) 182 P_CARRY1 = cmp.eq(r0,r0) 183 } 184 { 185 ERROR -= lsr(PROD,#16) 186 P_CARRY2 = cmp.eq(r0,r0) 187 } 188 { 189 ERROR = mpyu(ERRORHI,RECIPEST) 190 } 191 { 192 ROOT += lsr(ERROR,SHIFTAMT) 193 SHIFTAMT = add(SHIFTAMT,#16) 194 ERROR = asl(FRACRAD,#31) // for next iter 195 } 196 // Iteration 1 197 { 198 PROD = mpyu(ROOTHI,ROOTHI) 199 ERROR -= mpyu(ROOTHI,ROOTLO) // amount is 31, no shift needed 200 } 201 { 202 ERROR -= asl(PROD,#31) 203 PROD = mpyu(ROOTLO,ROOTLO) 204 } 205 { 206 ERROR -= lsr(PROD,#33) 207 } 208 { 209 ERROR = mpyu(ERRORHI,RECIPEST) 210 } 211 { 212 ROOT += lsr(ERROR,SHIFTAMT) 213 SHIFTAMT = add(SHIFTAMT,#16) 214 ERROR = asl(FRACRAD,#47) // for next iter 215 } 216 // Iteration 2 217 { 218 PROD = mpyu(ROOTHI,ROOTHI) 219 } 220 { 221 ERROR -= asl(PROD,#47) 222 PROD = mpyu(ROOTHI,ROOTLO) 223 } 224 { 225 ERROR -= asl(PROD,#16) // bidir shr 31-47 226 PROD = mpyu(ROOTLO,ROOTLO) 227 } 228 { 229 ERROR -= lsr(PROD,#17) // 64-47 230 } 231 { 232 ERROR = mpyu(ERRORHI,RECIPEST) 233 } 234 { 235 ROOT += lsr(ERROR,SHIFTAMT) 236 } 237#undef ERROR 238#undef PROD 239#undef PRODHI 240#undef PRODLO 241#define REM_HI r15:14 242#define REM_HI_HI r15 243#define REM_LO r1:0 244#undef RECIPEST 245#undef SHIFTAMT 246#define TWOROOT_LO r9:8 247 // Adjust Root 248 { 249 HL = mpyu(ROOTHI,ROOTLO) 250 LL = mpyu(ROOTLO,ROOTLO) 251 REM_HI = #0 252 REM_LO = #0 253 } 254 { 255 HL += lsr(LL,#33) 256 LL += asl(HL,#33) 257 P_CARRY0 = cmp.eq(r0,r0) 258 } 259 { 260 HH = mpyu(ROOTHI,ROOTHI) 261 REM_LO = sub(REM_LO,LL,P_CARRY0):carry 262 TWOROOT_LO = #1 263 } 264 { 265 HH += lsr(HL,#31) 266 TWOROOT_LO += asl(ROOT,#1) 267 } 268#undef HL 269#undef LL 270#define REM_HI_TMP r3:2 271#define REM_HI_TMP_HI r3 272#define REM_LO_TMP r5:4 273 { 274 REM_HI = sub(FRACRAD,HH,P_CARRY0):carry 275 REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY1):carry 276#undef FRACRAD 277#undef HH 278#define ZERO r11:10 279#define ONE r7:6 280 ONE = #1 281 ZERO = #0 282 } 283 { 284 REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY1):carry 285 ONE = add(ROOT,ONE) 286 EXP = add(EXP,#-DF_BIAS) // subtract bias --> signed exp 287 } 288 { 289 // If carry set, no borrow: result was still positive 290 if (P_CARRY1) ROOT = ONE 291 if (P_CARRY1) REM_LO = REM_LO_TMP 292 if (P_CARRY1) REM_HI = REM_HI_TMP 293 } 294 { 295 REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY2):carry 296 ONE = #1 297 EXP = asr(EXP,#1) // divide signed exp by 2 298 } 299 { 300 REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY2):carry 301 ONE = add(ROOT,ONE) 302 } 303 { 304 if (P_CARRY2) ROOT = ONE 305 if (P_CARRY2) REM_LO = REM_LO_TMP 306 // since tworoot <= 2^32, remhi must be zero 307#undef REM_HI_TMP 308#undef REM_HI_TMP_HI 309#define S_ONE r2 310#define ADJ r3 311 S_ONE = #1 312 } 313 { 314 P_TMP = cmp.eq(REM_LO,ZERO) // is the low part zero 315 if (!P_TMP.new) ROOTLO = or(ROOTLO,S_ONE) // if so, it's exact... hopefully 316 ADJ = cl0(ROOT) 317 EXP = add(EXP,#-63) 318 } 319#undef REM_LO 320#define RET r1:0 321#define RETHI r1 322 { 323 RET = convert_ud2df(ROOT) // set up mantissa, maybe set inexact flag 324 EXP = add(EXP,ADJ) // add back bias 325 } 326 { 327 RETHI += asl(EXP,#DF_MANTBITS-32) // add exponent adjust 328 jumpr r31 329 } 330#undef REM_LO_TMP 331#undef REM_HI_TMP 332#undef REM_HI_TMP_HI 333#undef REM_LO 334#undef REM_HI 335#undef TWOROOT_LO 336 337#undef RET 338#define A r1:0 339#define AH r1 340#define AL r1 341#undef S_ONE 342#define TMP r3:2 343#define TMPHI r3 344#define TMPLO r2 345#undef P_CARRY0 346#define P_NEG p1 347 348 349#define SFHALF r5 350#define SFRAD r9 351.Lsqrt_abnormal: 352 { 353 P_TMP = dfclass(A,#DFCLASS_ZERO) // zero? 354 if (P_TMP.new) jumpr:t r31 355 } 356 { 357 P_TMP = dfclass(A,#DFCLASS_NAN) 358 if (P_TMP.new) jump:nt .Lsqrt_nan 359 } 360 { 361 P_TMP = cmp.gt(AH,#-1) 362 if (!P_TMP.new) jump:nt .Lsqrt_invalid_neg 363 if (!P_TMP.new) EXP = ##0x7F800001 // sNaN 364 } 365 { 366 P_TMP = dfclass(A,#DFCLASS_INFINITE) 367 if (P_TMP.new) jumpr:nt r31 368 } 369 // If we got here, we're denormal 370 // prepare to restart 371 { 372 A = extractu(A,#DF_MANTBITS,#0) // Extract mantissa 373 } 374 { 375 EXP = add(clb(A),#-DF_EXPBITS) // how much to normalize? 376 } 377 { 378 A = asl(A,EXP) // Shift mantissa 379 EXP = sub(#1,EXP) // Form exponent 380 } 381 { 382 AH = insert(EXP,#1,#DF_MANTBITS-32) // insert lsb of exponent 383 } 384 { 385 TMP = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS) // get sf value (mant+exp1) 386 SFHALF = ##0x3f000004 // form half constant 387 } 388 { 389 SFRAD = or(SFHALF,TMPLO) // form sf value 390 SFHALF = and(SFHALF,#-16) 391 jump .Ldenormal_restart // restart 392 } 393.Lsqrt_nan: 394 { 395 EXP = convert_df2sf(A) // if sNaN, get invalid 396 A = #-1 // qNaN 397 jumpr r31 398 } 399.Lsqrt_invalid_neg: 400 { 401 A = convert_sf2df(EXP) // Invalid,NaNval 402 jumpr r31 403 } 404END(__hexagon_sqrt) 405END(__hexagon_sqrtdf2) 406