1//===----------------------Hexagon builtin routine ------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9// Double Precision Multiply 10 11#define A r1:0 12#define AH r1 13#define AL r0 14#define B r3:2 15#define BH r3 16#define BL r2 17 18#define EXPA r4 19#define EXPB r5 20#define EXPB_A r5:4 21 22#define ZTMP r7:6 23#define ZTMPH r7 24#define ZTMPL r6 25 26#define ATMP r13:12 27#define ATMPH r13 28#define ATMPL r12 29 30#define BTMP r9:8 31#define BTMPH r9 32#define BTMPL r8 33 34#define ATMP2 r11:10 35#define ATMP2H r11 36#define ATMP2L r10 37 38#define EXPDIFF r15 39#define EXTRACTOFF r14 40#define EXTRACTAMT r15:14 41 42#define TMP r28 43 44#define MANTBITS 52 45#define HI_MANTBITS 20 46#define EXPBITS 11 47#define BIAS 1024 48#define MANTISSA_TO_INT_BIAS 52 49#define SR_BIT_INEXACT 5 50 51#ifndef SR_ROUND_OFF 52#define SR_ROUND_OFF 22 53#endif 54 55#define NORMAL p3 56#define BIGB p2 57 58#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG 59#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG 60#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG 61#define END(TAG) .size TAG,.-TAG 62 63 .text 64 .global __hexagon_adddf3 65 .global __hexagon_subdf3 66 .type __hexagon_adddf3, @function 67 .type __hexagon_subdf3, @function 68 69Q6_ALIAS(adddf3) 70FAST_ALIAS(adddf3) 71FAST2_ALIAS(adddf3) 72Q6_ALIAS(subdf3) 73FAST_ALIAS(subdf3) 74FAST2_ALIAS(subdf3) 75 76 .p2align 5 77__hexagon_adddf3: 78 { 79 EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) 80 EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS) 81 ATMP = combine(##0x20000000,#0) 82 } 83 { 84 NORMAL = dfclass(A,#2) 85 NORMAL = dfclass(B,#2) 86 BTMP = ATMP 87 BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A? 88 } 89 { 90 if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code 91 if (BIGB) A = B // if B >> A, swap A and B 92 if (BIGB) B = A // If B >> A, swap A and B 93 if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents 94 } 95 { 96 ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62 97 BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62 98 EXPDIFF = sub(EXPA,EXPB) 99 ZTMP = combine(#62,#1) 100 } 101#undef BIGB 102#undef NORMAL 103#define B_POS p3 104#define A_POS p2 105#define NO_STICKIES p1 106.Ladd_continue: 107 { 108 EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60, 109 // will collapse to sticky bit 110 ATMP2 = neg(ATMP) 111 A_POS = cmp.gt(AH,#-1) 112 EXTRACTOFF = #0 113 } 114 { 115 if (!A_POS) ATMP = ATMP2 116 ATMP2 = extractu(BTMP,EXTRACTAMT) 117 BTMP = ASR(BTMP,EXPDIFF) 118#undef EXTRACTAMT 119#undef EXPDIFF 120#undef EXTRACTOFF 121#define ZERO r15:14 122 ZERO = #0 123 } 124 { 125 NO_STICKIES = cmp.eq(ATMP2,ZERO) 126 if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL) 127 EXPB = add(EXPA,#-BIAS-60) 128 B_POS = cmp.gt(BH,#-1) 129 } 130 { 131 ATMP = add(ATMP,BTMP) // ADD!!! 132 ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!! 133 ZTMP = combine(#54,##2045) 134 } 135 { 136 p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation 137 p0 = !cmp.gtu(EXPA,ZTMPL) 138 if (!p0.new) jump:nt .Ladd_ovf_unf 139 if (!B_POS) ATMP = ATMP2 // if B neg, pick difference 140 } 141 { 142 A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice! 143 p0 = cmp.eq(ATMPH,#0) 144 p0 = cmp.eq(ATMPL,#0) 145 if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly? 146 } 147 { 148 AH += asl(EXPB,#HI_MANTBITS) 149 jumpr r31 150 } 151 .falign 152__hexagon_subdf3: 153 { 154 BH = togglebit(BH,#31) 155 jump __qdsp_adddf3 156 } 157 158 159 .falign 160.Ladd_zero: 161 // True zero, full cancellation 162 // +0 unless round towards negative infinity 163 { 164 TMP = USR 165 A = #0 166 BH = #1 167 } 168 { 169 TMP = extractu(TMP,#2,#22) 170 BH = asl(BH,#31) 171 } 172 { 173 p0 = cmp.eq(TMP,#2) 174 if (p0.new) AH = xor(AH,BH) 175 jumpr r31 176 } 177 .falign 178.Ladd_ovf_unf: 179 // Overflow or Denormal is possible 180 // Good news: Underflow flag is not possible! 181 182 // ATMP has 2's complement value 183 // 184 // EXPA has A's exponent, EXPB has EXPA-BIAS-60 185 // 186 // Convert, extract exponent, add adjustment. 187 // If > 2046, overflow 188 // If <= 0, denormal 189 // 190 // Note that we've not done our zero check yet, so do that too 191 192 { 193 A = convert_d2df(ATMP) 194 p0 = cmp.eq(ATMPH,#0) 195 p0 = cmp.eq(ATMPL,#0) 196 if (p0.new) jump:nt .Ladd_zero 197 } 198 { 199 TMP = extractu(AH,#EXPBITS,#HI_MANTBITS) 200 AH += asl(EXPB,#HI_MANTBITS) 201 } 202 { 203 EXPB = add(EXPB,TMP) 204 B = combine(##0x00100000,#0) 205 } 206 { 207 p0 = cmp.gt(EXPB,##BIAS+BIAS-2) 208 if (p0.new) jump:nt .Ladd_ovf 209 } 210 { 211 p0 = cmp.gt(EXPB,#0) 212 if (p0.new) jumpr:t r31 213 TMP = sub(#1,EXPB) 214 } 215 { 216 B = insert(A,#MANTBITS,#0) 217 A = ATMP 218 } 219 { 220 B = lsr(B,TMP) 221 } 222 { 223 A = insert(B,#63,#0) 224 jumpr r31 225 } 226 .falign 227.Ladd_ovf: 228 // We get either max finite value or infinity. Either way, overflow+inexact 229 { 230 A = ATMP // 2's complement value 231 TMP = USR 232 ATMP = combine(##0x7fefffff,#-1) // positive max finite 233 } 234 { 235 EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits 236 TMP = or(TMP,#0x28) // inexact + overflow 237 BTMP = combine(##0x7ff00000,#0) // positive infinity 238 } 239 { 240 USR = TMP 241 EXPB ^= lsr(AH,#31) // Does sign match rounding? 242 TMP = EXPB // unmodified rounding mode 243 } 244 { 245 p0 = !cmp.eq(TMP,#1) // If not round-to-zero and 246 p0 = !cmp.eq(EXPB,#2) // Not rounding the other way, 247 if (p0.new) ATMP = BTMP // we should get infinity 248 } 249 { 250 A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign 251 } 252 { 253 p0 = dfcmp.eq(A,A) 254 jumpr r31 255 } 256 257.Ladd_abnormal: 258 { 259 ATMP = extractu(A,#63,#0) // strip off sign 260 BTMP = extractu(B,#63,#0) // strip off sign 261 } 262 { 263 p3 = cmp.gtu(ATMP,BTMP) 264 if (!p3.new) A = B // sort values 265 if (!p3.new) B = A // sort values 266 } 267 { 268 // Any NaN --> NaN, possibly raise invalid if sNaN 269 p0 = dfclass(A,#0x0f) // A not NaN? 270 if (!p0.new) jump:nt .Linvalid_nan_add 271 if (!p3) ATMP = BTMP 272 if (!p3) BTMP = ATMP 273 } 274 { 275 // Infinity + non-infinity number is infinity 276 // Infinity + infinity --> inf or nan 277 p1 = dfclass(A,#0x08) // A is infinity 278 if (p1.new) jump:nt .Linf_add 279 } 280 { 281 p2 = dfclass(B,#0x01) // B is zero 282 if (p2.new) jump:nt .LB_zero // so return A or special 0+0 283 ATMP = #0 284 } 285 // We are left with adding one or more subnormals 286 { 287 p0 = dfclass(A,#4) 288 if (p0.new) jump:nt .Ladd_two_subnormal 289 ATMP = combine(##0x20000000,#0) 290 } 291 { 292 EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) 293 EXPB = #1 294 // BTMP already ABS(B) 295 BTMP = asl(BTMP,#EXPBITS-2) 296 } 297#undef ZERO 298#define EXTRACTOFF r14 299#define EXPDIFF r15 300 { 301 ATMP = insert(A,#MANTBITS,#EXPBITS-2) 302 EXPDIFF = sub(EXPA,EXPB) 303 ZTMP = combine(#62,#1) 304 jump .Ladd_continue 305 } 306 307.Ladd_two_subnormal: 308 { 309 ATMP = extractu(A,#63,#0) 310 BTMP = extractu(B,#63,#0) 311 } 312 { 313 ATMP = neg(ATMP) 314 BTMP = neg(BTMP) 315 p0 = cmp.gt(AH,#-1) 316 p1 = cmp.gt(BH,#-1) 317 } 318 { 319 if (p0) ATMP = A 320 if (p1) BTMP = B 321 } 322 { 323 ATMP = add(ATMP,BTMP) 324 } 325 { 326 BTMP = neg(ATMP) 327 p0 = cmp.gt(ATMPH,#-1) 328 B = #0 329 } 330 { 331 if (!p0) A = BTMP 332 if (p0) A = ATMP 333 BH = ##0x80000000 334 } 335 { 336 if (!p0) AH = or(AH,BH) 337 p0 = dfcmp.eq(A,B) 338 if (p0.new) jump:nt .Lzero_plus_zero 339 } 340 { 341 jumpr r31 342 } 343 344.Linvalid_nan_add: 345 { 346 TMP = convert_df2sf(A) // will generate invalid if sNaN 347 p0 = dfclass(B,#0x0f) // if B is not NaN 348 if (p0.new) B = A // make it whatever A is 349 } 350 { 351 BL = convert_df2sf(B) // will generate invalid if sNaN 352 A = #-1 353 jumpr r31 354 } 355 .falign 356.LB_zero: 357 { 358 p0 = dfcmp.eq(ATMP,A) // is A also zero? 359 if (!p0.new) jumpr:t r31 // If not, just return A 360 } 361 // 0 + 0 is special 362 // if equal integral values, they have the same sign, which is fine for all rounding 363 // modes. 364 // If unequal in sign, we get +0 for all rounding modes except round down 365.Lzero_plus_zero: 366 { 367 p0 = cmp.eq(A,B) 368 if (p0.new) jumpr:t r31 369 } 370 { 371 TMP = USR 372 } 373 { 374 TMP = extractu(TMP,#2,#SR_ROUND_OFF) 375 A = #0 376 } 377 { 378 p0 = cmp.eq(TMP,#2) 379 if (p0.new) AH = ##0x80000000 380 jumpr r31 381 } 382.Linf_add: 383 // adding infinities is only OK if they are equal 384 { 385 p0 = !cmp.eq(AH,BH) // Do they have different signs 386 p0 = dfclass(B,#8) // And is B also infinite? 387 if (!p0.new) jumpr:t r31 // If not, just a normal inf 388 } 389 { 390 BL = ##0x7f800001 // sNAN 391 } 392 { 393 A = convert_sf2df(BL) // trigger invalid, set NaN 394 jumpr r31 395 } 396END(__hexagon_adddf3) 397