1 //===--- ExpandFp.cpp - Expand fp instructions ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This pass expands certain floating point instructions at the IR level. 9 // 10 // It expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, ‘sitofp 11 // .. to’ instructions with a bitwidth above a threshold. This is 12 // useful for targets like x86_64 that cannot lower fp convertions 13 // with more than 128 bits. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/CodeGen/ExpandFp.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/Analysis/GlobalsModRef.h" 20 #include "llvm/CodeGen/Passes.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/CodeGen/TargetPassConfig.h" 23 #include "llvm/CodeGen/TargetSubtargetInfo.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/InstIterator.h" 26 #include "llvm/IR/PassManager.h" 27 #include "llvm/InitializePasses.h" 28 #include "llvm/Pass.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Target/TargetMachine.h" 31 32 using namespace llvm; 33 34 static cl::opt<unsigned> 35 ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden, 36 cl::init(llvm::IntegerType::MAX_INT_BITS), 37 cl::desc("fp convert instructions on integers with " 38 "more than <N> bits are expanded.")); 39 40 // clang-format off: preserve formatting of the following example 41 42 /// Generate code to convert a fp number to integer, replacing FPToS(U)I with 43 /// the generated code. This currently generates code similarly to compiler-rt's 44 /// implementations. 45 /// 46 /// An example IR generated from compiler-rt/fixsfdi.c looks like below: 47 /// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 { 48 /// entry: 49 /// %0 = bitcast float %a to i32 50 /// %conv.i = zext i32 %0 to i64 51 /// %tobool.not = icmp sgt i32 %0, -1 52 /// %conv = select i1 %tobool.not, i64 1, i64 -1 53 /// %and = lshr i64 %conv.i, 23 54 /// %shr = and i64 %and, 255 55 /// %and2 = and i64 %conv.i, 8388607 56 /// %or = or i64 %and2, 8388608 57 /// %cmp = icmp ult i64 %shr, 127 58 /// br i1 %cmp, label %cleanup, label %if.end 59 /// 60 /// if.end: ; preds = %entry 61 /// %sub = add nuw nsw i64 %shr, 4294967169 62 /// %conv5 = and i64 %sub, 4294967232 63 /// %cmp6.not = icmp eq i64 %conv5, 0 64 /// br i1 %cmp6.not, label %if.end12, label %if.then8 65 /// 66 /// if.then8: ; preds = %if.end 67 /// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808 68 /// br label %cleanup 69 /// 70 /// if.end12: ; preds = %if.end 71 /// %cmp13 = icmp ult i64 %shr, 150 72 /// br i1 %cmp13, label %if.then15, label %if.else 73 /// 74 /// if.then15: ; preds = %if.end12 75 /// %sub16 = sub nuw nsw i64 150, %shr 76 /// %shr17 = lshr i64 %or, %sub16 77 /// %mul = mul nsw i64 %shr17, %conv 78 /// br label %cleanup 79 /// 80 /// if.else: ; preds = %if.end12 81 /// %sub18 = add nsw i64 %shr, -150 82 /// %shl = shl i64 %or, %sub18 83 /// %mul19 = mul nsw i64 %shl, %conv 84 /// br label %cleanup 85 /// 86 /// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8 87 /// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ] 88 /// ret i64 %retval.0 89 /// } 90 /// 91 /// Replace fp to integer with generated code. 92 static void expandFPToI(Instruction *FPToI) { 93 // clang-format on 94 IRBuilder<> Builder(FPToI); 95 auto *FloatVal = FPToI->getOperand(0); 96 IntegerType *IntTy = cast<IntegerType>(FPToI->getType()); 97 98 unsigned BitWidth = FPToI->getType()->getIntegerBitWidth(); 99 unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1; 100 101 // FIXME: fp16's range is covered by i32. So `fptoi half` can convert 102 // to i32 first following a sext/zext to target integer type. 103 Value *A1 = nullptr; 104 if (FloatVal->getType()->isHalfTy()) { 105 if (FPToI->getOpcode() == Instruction::FPToUI) { 106 Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32)); 107 A1 = Builder.CreateZExt(A0, IntTy); 108 } else { // FPToSI 109 Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32)); 110 A1 = Builder.CreateSExt(A0, IntTy); 111 } 112 FPToI->replaceAllUsesWith(A1); 113 FPToI->dropAllReferences(); 114 FPToI->eraseFromParent(); 115 return; 116 } 117 118 // fp80 conversion is implemented by fpext to fp128 first then do the 119 // conversion. 120 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth; 121 unsigned FloatWidth = 122 PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits()); 123 unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1; 124 unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1; 125 Value *ImplicitBit = Builder.CreateShl( 126 Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth)); 127 Value *SignificandMask = 128 Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1)); 129 Value *NegOne = Builder.CreateSExt( 130 ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy); 131 Value *NegInf = 132 Builder.CreateShl(ConstantInt::getSigned(IntTy, 1), 133 ConstantInt::getSigned(IntTy, BitWidth - 1)); 134 135 BasicBlock *Entry = Builder.GetInsertBlock(); 136 Function *F = Entry->getParent(); 137 Entry->setName(Twine(Entry->getName(), "fp-to-i-entry")); 138 BasicBlock *End = 139 Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup"); 140 BasicBlock *IfEnd = 141 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End); 142 BasicBlock *IfThen5 = 143 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End); 144 BasicBlock *IfEnd9 = 145 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End); 146 BasicBlock *IfThen12 = 147 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End); 148 BasicBlock *IfElse = 149 BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End); 150 151 Entry->getTerminator()->eraseFromParent(); 152 153 // entry: 154 Builder.SetInsertPoint(Entry); 155 Value *FloatVal0 = FloatVal; 156 // fp80 conversion is implemented by fpext to fp128 first then do the 157 // conversion. 158 if (FloatVal->getType()->isX86_FP80Ty()) 159 FloatVal0 = 160 Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext())); 161 Value *ARep0 = 162 Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth)); 163 Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType()); 164 Value *PosOrNeg = Builder.CreateICmpSGT( 165 ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1)); 166 Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1), 167 ConstantInt::getSigned(IntTy, -1)); 168 Value *And = 169 Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth)); 170 Value *And2 = Builder.CreateAnd( 171 And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1)); 172 Value *Abs = Builder.CreateAnd(ARep, SignificandMask); 173 Value *Or = Builder.CreateOr(Abs, ImplicitBit); 174 Value *Cmp = 175 Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias)); 176 Builder.CreateCondBr(Cmp, End, IfEnd); 177 178 // if.end: 179 Builder.SetInsertPoint(IfEnd); 180 Value *Add1 = Builder.CreateAdd( 181 And2, ConstantInt::getSigned( 182 IntTy, -static_cast<int64_t>(ExponentBias + BitWidth))); 183 Value *Cmp3 = Builder.CreateICmpULT( 184 Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth))); 185 Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9); 186 187 // if.then5: 188 Builder.SetInsertPoint(IfThen5); 189 Value *PosInf = Builder.CreateXor(NegOne, NegInf); 190 Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf); 191 Builder.CreateBr(End); 192 193 // if.end9: 194 Builder.SetInsertPoint(IfEnd9); 195 Value *Cmp10 = Builder.CreateICmpULT( 196 And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth)); 197 Builder.CreateCondBr(Cmp10, IfThen12, IfElse); 198 199 // if.then12: 200 Builder.SetInsertPoint(IfThen12); 201 Value *Sub13 = Builder.CreateSub( 202 Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2); 203 Value *Shr14 = Builder.CreateLShr(Or, Sub13); 204 Value *Mul = Builder.CreateMul(Shr14, Sign); 205 Builder.CreateBr(End); 206 207 // if.else: 208 Builder.SetInsertPoint(IfElse); 209 Value *Sub15 = Builder.CreateAdd( 210 And2, ConstantInt::getSigned( 211 IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth))); 212 Value *Shl = Builder.CreateShl(Or, Sub15); 213 Value *Mul16 = Builder.CreateMul(Shl, Sign); 214 Builder.CreateBr(End); 215 216 // cleanup: 217 Builder.SetInsertPoint(End, End->begin()); 218 PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4); 219 220 Retval0->addIncoming(Cond8, IfThen5); 221 Retval0->addIncoming(Mul, IfThen12); 222 Retval0->addIncoming(Mul16, IfElse); 223 Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry); 224 225 FPToI->replaceAllUsesWith(Retval0); 226 FPToI->dropAllReferences(); 227 FPToI->eraseFromParent(); 228 } 229 230 // clang-format off: preserve formatting of the following example 231 232 /// Generate code to convert a fp number to integer, replacing S(U)IToFP with 233 /// the generated code. This currently generates code similarly to compiler-rt's 234 /// implementations. This implementation has an implicit assumption that integer 235 /// width is larger than fp. 236 /// 237 /// An example IR generated from compiler-rt/floatdisf.c looks like below: 238 /// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 { 239 /// entry: 240 /// %cmp = icmp eq i64 %a, 0 241 /// br i1 %cmp, label %return, label %if.end 242 /// 243 /// if.end: ; preds = %entry 244 /// %shr = ashr i64 %a, 63 245 /// %xor = xor i64 %shr, %a 246 /// %sub = sub nsw i64 %xor, %shr 247 /// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5 248 /// %cast = trunc i64 %0 to i32 249 /// %sub1 = sub nuw nsw i32 64, %cast 250 /// %sub2 = xor i32 %cast, 63 251 /// %cmp3 = icmp ult i32 %cast, 40 252 /// br i1 %cmp3, label %if.then4, label %if.else 253 /// 254 /// if.then4: ; preds = %if.end 255 /// switch i32 %sub1, label %sw.default [ 256 /// i32 25, label %sw.bb 257 /// i32 26, label %sw.epilog 258 /// ] 259 /// 260 /// sw.bb: ; preds = %if.then4 261 /// %shl = shl i64 %sub, 1 262 /// br label %sw.epilog 263 /// 264 /// sw.default: ; preds = %if.then4 265 /// %sub5 = sub nsw i64 38, %0 266 /// %sh_prom = and i64 %sub5, 4294967295 267 /// %shr6 = lshr i64 %sub, %sh_prom 268 /// %shr9 = lshr i64 274877906943, %0 269 /// %and = and i64 %shr9, %sub 270 /// %cmp10 = icmp ne i64 %and, 0 271 /// %conv11 = zext i1 %cmp10 to i64 272 /// %or = or i64 %shr6, %conv11 273 /// br label %sw.epilog 274 /// 275 /// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb 276 /// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ] 277 /// %1 = lshr i64 %a.addr.0, 2 278 /// %2 = and i64 %1, 1 279 /// %or16 = or i64 %2, %a.addr.0 280 /// %inc = add nsw i64 %or16, 1 281 /// %3 = and i64 %inc, 67108864 282 /// %tobool.not = icmp eq i64 %3, 0 283 /// %spec.select.v = select i1 %tobool.not, i64 2, i64 3 284 /// %spec.select = ashr i64 %inc, %spec.select.v 285 /// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1 286 /// br label %if.end26 287 /// 288 /// if.else: ; preds = %if.end 289 /// %sub23 = add nuw nsw i64 %0, 4294967256 290 /// %sh_prom24 = and i64 %sub23, 4294967295 291 /// %shl25 = shl i64 %sub, %sh_prom24 292 /// br label %if.end26 293 /// 294 /// if.end26: ; preds = %sw.epilog, %if.else 295 /// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ] 296 /// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ] 297 /// %conv27 = trunc i64 %shr to i32 298 /// %and28 = and i32 %conv27, -2147483648 299 /// %add = shl nuw nsw i32 %e.0, 23 300 /// %shl29 = add nuw nsw i32 %add, 1065353216 301 /// %conv31 = trunc i64 %a.addr.1 to i32 302 /// %and32 = and i32 %conv31, 8388607 303 /// %or30 = or i32 %and32, %and28 304 /// %or33 = or i32 %or30, %shl29 305 /// %4 = bitcast i32 %or33 to float 306 /// br label %return 307 /// 308 /// return: ; preds = %entry, %if.end26 309 /// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ] 310 /// ret float %retval.0 311 /// } 312 /// 313 /// Replace integer to fp with generated code. 314 static void expandIToFP(Instruction *IToFP) { 315 // clang-format on 316 IRBuilder<> Builder(IToFP); 317 auto *IntVal = IToFP->getOperand(0); 318 IntegerType *IntTy = cast<IntegerType>(IntVal->getType()); 319 320 unsigned BitWidth = IntVal->getType()->getIntegerBitWidth(); 321 unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1; 322 // fp80 conversion is implemented by conversion tp fp128 first following 323 // a fptrunc to fp80. 324 FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth; 325 // FIXME: As there is no related builtins added in compliler-rt, 326 // here currently utilized the fp32 <-> fp16 lib calls to implement. 327 FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth; 328 FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth; 329 unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth); 330 bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP; 331 332 assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() " 333 "assumes integer width is larger than fp."); 334 335 Value *Temp1 = 336 Builder.CreateShl(Builder.getIntN(BitWidth, 1), 337 Builder.getIntN(BitWidth, FPMantissaWidth + 3)); 338 339 BasicBlock *Entry = Builder.GetInsertBlock(); 340 Function *F = Entry->getParent(); 341 Entry->setName(Twine(Entry->getName(), "itofp-entry")); 342 BasicBlock *End = 343 Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return"); 344 BasicBlock *IfEnd = 345 BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End); 346 BasicBlock *IfThen4 = 347 BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End); 348 BasicBlock *SwBB = 349 BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End); 350 BasicBlock *SwDefault = 351 BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End); 352 BasicBlock *SwEpilog = 353 BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End); 354 BasicBlock *IfThen20 = 355 BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End); 356 BasicBlock *IfElse = 357 BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End); 358 BasicBlock *IfEnd26 = 359 BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End); 360 361 Entry->getTerminator()->eraseFromParent(); 362 363 Function *CTLZ = 364 Intrinsic::getOrInsertDeclaration(F->getParent(), Intrinsic::ctlz, IntTy); 365 ConstantInt *True = Builder.getTrue(); 366 367 // entry: 368 Builder.SetInsertPoint(Entry); 369 Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0)); 370 Builder.CreateCondBr(Cmp, End, IfEnd); 371 372 // if.end: 373 Builder.SetInsertPoint(IfEnd); 374 Value *Shr = 375 Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1)); 376 Value *Xor = Builder.CreateXor(Shr, IntVal); 377 Value *Sub = Builder.CreateSub(Xor, Shr); 378 Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True}); 379 Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty()); 380 int BitWidthNew = FloatWidth == 128 ? BitWidth : 32; 381 Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth), 382 FloatWidth == 128 ? Call : Cast); 383 Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1), 384 FloatWidth == 128 ? Call : Cast); 385 Value *Cmp3 = Builder.CreateICmpSGT( 386 Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1)); 387 Builder.CreateCondBr(Cmp3, IfThen4, IfElse); 388 389 // if.then4: 390 Builder.SetInsertPoint(IfThen4); 391 llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault); 392 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB); 393 SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog); 394 395 // sw.bb: 396 Builder.SetInsertPoint(SwBB); 397 Value *Shl = 398 Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1)); 399 Builder.CreateBr(SwEpilog); 400 401 // sw.default: 402 Builder.SetInsertPoint(SwDefault); 403 Value *Sub5 = Builder.CreateSub( 404 Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3), 405 FloatWidth == 128 ? Call : Cast); 406 Value *ShProm = Builder.CreateZExt(Sub5, IntTy); 407 Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal, 408 FloatWidth == 128 ? Sub5 : ShProm); 409 Value *Sub8 = 410 Builder.CreateAdd(FloatWidth == 128 ? Call : Cast, 411 Builder.getIntN(BitWidthNew, FPMantissaWidth + 3)); 412 Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy); 413 Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1), 414 FloatWidth == 128 ? Sub8 : ShProm9); 415 Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal); 416 Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0)); 417 Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy); 418 Value *Or = Builder.CreateOr(Shr6, Conv11); 419 Builder.CreateBr(SwEpilog); 420 421 // sw.epilog: 422 Builder.SetInsertPoint(SwEpilog); 423 PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3); 424 AAddr0->addIncoming(Or, SwDefault); 425 AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4); 426 AAddr0->addIncoming(Shl, SwBB); 427 Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty()); 428 Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2)); 429 Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1)); 430 Value *Conv16 = Builder.CreateZExt(A2, IntTy); 431 Value *Or17 = Builder.CreateOr(AAddr0, Conv16); 432 Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1)); 433 Value *Shr18 = nullptr; 434 if (IsSigned) 435 Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2)); 436 else 437 Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2)); 438 Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3"); 439 Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0)); 440 Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth)); 441 Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32)); 442 Value *ExtractT64 = nullptr; 443 if (FloatWidth > 80) 444 ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty()); 445 else 446 ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty()); 447 Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20); 448 449 // if.then20 450 Builder.SetInsertPoint(IfThen20); 451 Value *Shr21 = nullptr; 452 if (IsSigned) 453 Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3)); 454 else 455 Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3)); 456 Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth)); 457 Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32)); 458 Value *ExtractT62 = nullptr; 459 if (FloatWidth > 80) 460 ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64)); 461 else 462 ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32)); 463 Builder.CreateBr(IfEnd26); 464 465 // if.else: 466 Builder.SetInsertPoint(IfElse); 467 Value *Sub24 = Builder.CreateAdd( 468 FloatWidth == 128 ? Call : Cast, 469 ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew), 470 -(BitWidth - FPMantissaWidth - 1))); 471 Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy); 472 Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal, 473 FloatWidth == 128 ? Sub24 : ShProm25); 474 Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth)); 475 Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32)); 476 Value *ExtractT66 = nullptr; 477 if (FloatWidth > 80) 478 ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64)); 479 else 480 ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty()); 481 Builder.CreateBr(IfEnd26); 482 483 // if.end26: 484 Builder.SetInsertPoint(IfEnd26); 485 PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3); 486 AAddr1Off0->addIncoming(ExtractT, IfThen20); 487 AAddr1Off0->addIncoming(ExtractT60, SwEpilog); 488 AAddr1Off0->addIncoming(ExtractT61, IfElse); 489 PHINode *AAddr1Off32 = nullptr; 490 if (FloatWidth > 32) { 491 AAddr1Off32 = 492 Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3); 493 AAddr1Off32->addIncoming(ExtractT62, IfThen20); 494 AAddr1Off32->addIncoming(ExtractT64, SwEpilog); 495 AAddr1Off32->addIncoming(ExtractT66, IfElse); 496 } 497 PHINode *E0 = nullptr; 498 if (FloatWidth <= 80) { 499 E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3); 500 E0->addIncoming(Sub1, IfThen20); 501 E0->addIncoming(Sub2, SwEpilog); 502 E0->addIncoming(Sub2, IfElse); 503 } 504 Value *And29 = nullptr; 505 if (FloatWidth > 80) { 506 Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1), 507 Builder.getIntN(BitWidth, 63)); 508 And29 = Builder.CreateAnd(Shr, Temp2, "and29"); 509 } else { 510 Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32)); 511 And29 = Builder.CreateAnd( 512 Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000)); 513 } 514 unsigned TempMod = FPMantissaWidth % 32; 515 Value *And34 = nullptr; 516 Value *Shl30 = nullptr; 517 if (FloatWidth > 80) { 518 TempMod += 32; 519 Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod)); 520 Shl30 = Builder.CreateAdd( 521 Add, 522 Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod)); 523 And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128)); 524 } else { 525 Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod)); 526 Shl30 = Builder.CreateAdd( 527 Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod)); 528 And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0, 529 Builder.getIntN(32, (1 << TempMod) - 1)); 530 } 531 Value *Or35 = nullptr; 532 if (FloatWidth > 80) { 533 Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128)); 534 Value *Or31 = Builder.CreateOr(And29Trunc, And34); 535 Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64)); 536 Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1), 537 Builder.getIntN(128, FPMantissaWidth)); 538 Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1)); 539 Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4); 540 Or35 = Builder.CreateOr(Or34, A6); 541 } else { 542 Value *Or31 = Builder.CreateOr(And34, And29); 543 Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30); 544 } 545 Value *A4 = nullptr; 546 if (IToFP->getType()->isDoubleTy()) { 547 Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth)); 548 Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32)); 549 Value *And1 = 550 Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF)); 551 Value *Or1 = Builder.CreateOr(Shl1, And1); 552 A4 = Builder.CreateBitCast(Or1, IToFP->getType()); 553 } else if (IToFP->getType()->isX86_FP80Ty()) { 554 Value *A40 = 555 Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext())); 556 A4 = Builder.CreateFPTrunc(A40, IToFP->getType()); 557 } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) { 558 // Deal with "half" situation. This is a workaround since we don't have 559 // floattihf.c currently as referring. 560 Value *A40 = 561 Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext())); 562 A4 = Builder.CreateFPTrunc(A40, IToFP->getType()); 563 } else // float type 564 A4 = Builder.CreateBitCast(Or35, IToFP->getType()); 565 Builder.CreateBr(End); 566 567 // return: 568 Builder.SetInsertPoint(End, End->begin()); 569 PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2); 570 Retval0->addIncoming(A4, IfEnd26); 571 Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry); 572 573 IToFP->replaceAllUsesWith(Retval0); 574 IToFP->dropAllReferences(); 575 IToFP->eraseFromParent(); 576 } 577 578 static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) { 579 VectorType *VTy = cast<FixedVectorType>(I->getType()); 580 581 IRBuilder<> Builder(I); 582 583 unsigned NumElements = VTy->getElementCount().getFixedValue(); 584 Value *Result = PoisonValue::get(VTy); 585 for (unsigned Idx = 0; Idx < NumElements; ++Idx) { 586 Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx); 587 Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext, 588 I->getType()->getScalarType()); 589 Result = Builder.CreateInsertElement(Result, Cast, Idx); 590 if (isa<Instruction>(Cast)) 591 Replace.push_back(cast<Instruction>(Cast)); 592 } 593 I->replaceAllUsesWith(Result); 594 I->dropAllReferences(); 595 I->eraseFromParent(); 596 } 597 598 static bool runImpl(Function &F, const TargetLowering &TLI) { 599 SmallVector<Instruction *, 4> Replace; 600 SmallVector<Instruction *, 4> ReplaceVector; 601 bool Modified = false; 602 603 unsigned MaxLegalFpConvertBitWidth = 604 TLI.getMaxLargeFPConvertBitWidthSupported(); 605 if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS) 606 MaxLegalFpConvertBitWidth = ExpandFpConvertBits; 607 608 if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS) 609 return false; 610 611 for (auto &I : instructions(F)) { 612 switch (I.getOpcode()) { 613 case Instruction::FPToUI: 614 case Instruction::FPToSI: { 615 // TODO: This pass doesn't handle scalable vectors. 616 if (I.getOperand(0)->getType()->isScalableTy()) 617 continue; 618 619 auto *IntTy = cast<IntegerType>(I.getType()->getScalarType()); 620 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) 621 continue; 622 623 if (I.getOperand(0)->getType()->isVectorTy()) 624 ReplaceVector.push_back(&I); 625 else 626 Replace.push_back(&I); 627 Modified = true; 628 break; 629 } 630 case Instruction::UIToFP: 631 case Instruction::SIToFP: { 632 // TODO: This pass doesn't handle scalable vectors. 633 if (I.getOperand(0)->getType()->isScalableTy()) 634 continue; 635 636 auto *IntTy = 637 cast<IntegerType>(I.getOperand(0)->getType()->getScalarType()); 638 if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) 639 continue; 640 641 if (I.getOperand(0)->getType()->isVectorTy()) 642 ReplaceVector.push_back(&I); 643 else 644 Replace.push_back(&I); 645 Modified = true; 646 break; 647 } 648 default: 649 break; 650 } 651 } 652 653 while (!ReplaceVector.empty()) { 654 Instruction *I = ReplaceVector.pop_back_val(); 655 scalarize(I, Replace); 656 } 657 658 if (Replace.empty()) 659 return false; 660 661 while (!Replace.empty()) { 662 Instruction *I = Replace.pop_back_val(); 663 if (I->getOpcode() == Instruction::FPToUI || 664 I->getOpcode() == Instruction::FPToSI) { 665 expandFPToI(I); 666 } else { 667 expandIToFP(I); 668 } 669 } 670 671 return Modified; 672 } 673 674 namespace { 675 class ExpandFpLegacyPass : public FunctionPass { 676 public: 677 static char ID; 678 679 ExpandFpLegacyPass() : FunctionPass(ID) { 680 initializeExpandFpLegacyPassPass(*PassRegistry::getPassRegistry()); 681 } 682 683 bool runOnFunction(Function &F) override { 684 auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); 685 auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); 686 return runImpl(F, *TLI); 687 } 688 689 void getAnalysisUsage(AnalysisUsage &AU) const override { 690 AU.addRequired<TargetPassConfig>(); 691 AU.addPreserved<AAResultsWrapperPass>(); 692 AU.addPreserved<GlobalsAAWrapperPass>(); 693 } 694 }; 695 } // namespace 696 697 PreservedAnalyses ExpandFpPass::run(Function &F, FunctionAnalysisManager &FAM) { 698 const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F); 699 return runImpl(F, *STI->getTargetLowering()) ? PreservedAnalyses::none() 700 : PreservedAnalyses::all(); 701 } 702 703 char ExpandFpLegacyPass::ID = 0; 704 INITIALIZE_PASS_BEGIN(ExpandFpLegacyPass, "expand-fp", 705 "Expand certain fp instructions", false, false) 706 INITIALIZE_PASS_END(ExpandFpLegacyPass, "expand-fp", "Expand fp", false, false) 707 708 FunctionPass *llvm::createExpandFpPass() { return new ExpandFpLegacyPass(); } 709