xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (revision 79ac3c12a714bcd3f2354c52d948aed9575c46d6)
1 //===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the IRTranslator class.
10 //===----------------------------------------------------------------------===//
11 
12 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
13 #include "llvm/ADT/PostOrderIterator.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/ScopeExit.h"
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/BranchProbabilityInfo.h"
19 #include "llvm/Analysis/Loads.h"
20 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/CodeGen/Analysis.h"
23 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
24 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
25 #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
26 #include "llvm/CodeGen/LowLevelType.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineFrameInfo.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineMemOperand.h"
32 #include "llvm/CodeGen/MachineModuleInfo.h"
33 #include "llvm/CodeGen/MachineOperand.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/StackProtector.h"
36 #include "llvm/CodeGen/SwitchLoweringUtils.h"
37 #include "llvm/CodeGen/TargetFrameLowering.h"
38 #include "llvm/CodeGen/TargetInstrInfo.h"
39 #include "llvm/CodeGen/TargetLowering.h"
40 #include "llvm/CodeGen/TargetPassConfig.h"
41 #include "llvm/CodeGen/TargetRegisterInfo.h"
42 #include "llvm/CodeGen/TargetSubtargetInfo.h"
43 #include "llvm/IR/BasicBlock.h"
44 #include "llvm/IR/CFG.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DataLayout.h"
48 #include "llvm/IR/DebugInfo.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Function.h"
51 #include "llvm/IR/GetElementPtrTypeIterator.h"
52 #include "llvm/IR/InlineAsm.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/IntrinsicInst.h"
56 #include "llvm/IR/Intrinsics.h"
57 #include "llvm/IR/LLVMContext.h"
58 #include "llvm/IR/Metadata.h"
59 #include "llvm/IR/PatternMatch.h"
60 #include "llvm/IR/Type.h"
61 #include "llvm/IR/User.h"
62 #include "llvm/IR/Value.h"
63 #include "llvm/InitializePasses.h"
64 #include "llvm/MC/MCContext.h"
65 #include "llvm/Pass.h"
66 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/CodeGen.h"
68 #include "llvm/Support/Debug.h"
69 #include "llvm/Support/ErrorHandling.h"
70 #include "llvm/Support/LowLevelTypeImpl.h"
71 #include "llvm/Support/MathExtras.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include "llvm/Target/TargetIntrinsicInfo.h"
74 #include "llvm/Target/TargetMachine.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstddef>
78 #include <cstdint>
79 #include <iterator>
80 #include <string>
81 #include <utility>
82 #include <vector>
83 
84 #define DEBUG_TYPE "irtranslator"
85 
86 using namespace llvm;
87 
88 static cl::opt<bool>
89     EnableCSEInIRTranslator("enable-cse-in-irtranslator",
90                             cl::desc("Should enable CSE in irtranslator"),
91                             cl::Optional, cl::init(false));
92 char IRTranslator::ID = 0;
93 
94 INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
95                 false, false)
96 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
97 INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
98 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
99 INITIALIZE_PASS_DEPENDENCY(StackProtector)
100 INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
101                 false, false)
102 
103 static void reportTranslationError(MachineFunction &MF,
104                                    const TargetPassConfig &TPC,
105                                    OptimizationRemarkEmitter &ORE,
106                                    OptimizationRemarkMissed &R) {
107   MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
108 
109   // Print the function name explicitly if we don't have a debug location (which
110   // makes the diagnostic less useful) or if we're going to emit a raw error.
111   if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
112     R << (" (in function: " + MF.getName() + ")").str();
113 
114   if (TPC.isGlobalISelAbortEnabled())
115     report_fatal_error(R.getMsg());
116   else
117     ORE.emit(R);
118 }
119 
120 IRTranslator::IRTranslator(CodeGenOpt::Level optlevel)
121     : MachineFunctionPass(ID), OptLevel(optlevel) {}
122 
123 #ifndef NDEBUG
124 namespace {
125 /// Verify that every instruction created has the same DILocation as the
126 /// instruction being translated.
127 class DILocationVerifier : public GISelChangeObserver {
128   const Instruction *CurrInst = nullptr;
129 
130 public:
131   DILocationVerifier() = default;
132   ~DILocationVerifier() = default;
133 
134   const Instruction *getCurrentInst() const { return CurrInst; }
135   void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; }
136 
137   void erasingInstr(MachineInstr &MI) override {}
138   void changingInstr(MachineInstr &MI) override {}
139   void changedInstr(MachineInstr &MI) override {}
140 
141   void createdInstr(MachineInstr &MI) override {
142     assert(getCurrentInst() && "Inserted instruction without a current MI");
143 
144     // Only print the check message if we're actually checking it.
145 #ifndef NDEBUG
146     LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
147                       << " was copied to " << MI);
148 #endif
149     // We allow insts in the entry block to have a debug loc line of 0 because
150     // they could have originated from constants, and we don't want a jumpy
151     // debug experience.
152     assert((CurrInst->getDebugLoc() == MI.getDebugLoc() ||
153             MI.getDebugLoc().getLine() == 0) &&
154            "Line info was not transferred to all instructions");
155   }
156 };
157 } // namespace
158 #endif // ifndef NDEBUG
159 
160 
161 void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
162   AU.addRequired<StackProtector>();
163   AU.addRequired<TargetPassConfig>();
164   AU.addRequired<GISelCSEAnalysisWrapperPass>();
165   if (OptLevel != CodeGenOpt::None)
166     AU.addRequired<BranchProbabilityInfoWrapperPass>();
167   getSelectionDAGFallbackAnalysisUsage(AU);
168   MachineFunctionPass::getAnalysisUsage(AU);
169 }
170 
171 IRTranslator::ValueToVRegInfo::VRegListT &
172 IRTranslator::allocateVRegs(const Value &Val) {
173   auto VRegsIt = VMap.findVRegs(Val);
174   if (VRegsIt != VMap.vregs_end())
175     return *VRegsIt->second;
176   auto *Regs = VMap.getVRegs(Val);
177   auto *Offsets = VMap.getOffsets(Val);
178   SmallVector<LLT, 4> SplitTys;
179   computeValueLLTs(*DL, *Val.getType(), SplitTys,
180                    Offsets->empty() ? Offsets : nullptr);
181   for (unsigned i = 0; i < SplitTys.size(); ++i)
182     Regs->push_back(0);
183   return *Regs;
184 }
185 
186 ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
187   auto VRegsIt = VMap.findVRegs(Val);
188   if (VRegsIt != VMap.vregs_end())
189     return *VRegsIt->second;
190 
191   if (Val.getType()->isVoidTy())
192     return *VMap.getVRegs(Val);
193 
194   // Create entry for this type.
195   auto *VRegs = VMap.getVRegs(Val);
196   auto *Offsets = VMap.getOffsets(Val);
197 
198   assert(Val.getType()->isSized() &&
199          "Don't know how to create an empty vreg");
200 
201   SmallVector<LLT, 4> SplitTys;
202   computeValueLLTs(*DL, *Val.getType(), SplitTys,
203                    Offsets->empty() ? Offsets : nullptr);
204 
205   if (!isa<Constant>(Val)) {
206     for (auto Ty : SplitTys)
207       VRegs->push_back(MRI->createGenericVirtualRegister(Ty));
208     return *VRegs;
209   }
210 
211   if (Val.getType()->isAggregateType()) {
212     // UndefValue, ConstantAggregateZero
213     auto &C = cast<Constant>(Val);
214     unsigned Idx = 0;
215     while (auto Elt = C.getAggregateElement(Idx++)) {
216       auto EltRegs = getOrCreateVRegs(*Elt);
217       llvm::copy(EltRegs, std::back_inserter(*VRegs));
218     }
219   } else {
220     assert(SplitTys.size() == 1 && "unexpectedly split LLT");
221     VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0]));
222     bool Success = translate(cast<Constant>(Val), VRegs->front());
223     if (!Success) {
224       OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
225                                  MF->getFunction().getSubprogram(),
226                                  &MF->getFunction().getEntryBlock());
227       R << "unable to translate constant: " << ore::NV("Type", Val.getType());
228       reportTranslationError(*MF, *TPC, *ORE, R);
229       return *VRegs;
230     }
231   }
232 
233   return *VRegs;
234 }
235 
236 int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
237   auto MapEntry = FrameIndices.find(&AI);
238   if (MapEntry != FrameIndices.end())
239     return MapEntry->second;
240 
241   uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
242   uint64_t Size =
243       ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
244 
245   // Always allocate at least one byte.
246   Size = std::max<uint64_t>(Size, 1u);
247 
248   int &FI = FrameIndices[&AI];
249   FI = MF->getFrameInfo().CreateStackObject(Size, AI.getAlign(), false, &AI);
250   return FI;
251 }
252 
253 Align IRTranslator::getMemOpAlign(const Instruction &I) {
254   if (const StoreInst *SI = dyn_cast<StoreInst>(&I))
255     return SI->getAlign();
256   if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
257     return LI->getAlign();
258   }
259   if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
260     // TODO(PR27168): This instruction has no alignment attribute, but unlike
261     // the default alignment for load/store, the default here is to assume
262     // it has NATURAL alignment, not DataLayout-specified alignment.
263     const DataLayout &DL = AI->getModule()->getDataLayout();
264     return Align(DL.getTypeStoreSize(AI->getCompareOperand()->getType()));
265   }
266   if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
267     // TODO(PR27168): This instruction has no alignment attribute, but unlike
268     // the default alignment for load/store, the default here is to assume
269     // it has NATURAL alignment, not DataLayout-specified alignment.
270     const DataLayout &DL = AI->getModule()->getDataLayout();
271     return Align(DL.getTypeStoreSize(AI->getValOperand()->getType()));
272   }
273   OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
274   R << "unable to translate memop: " << ore::NV("Opcode", &I);
275   reportTranslationError(*MF, *TPC, *ORE, R);
276   return Align(1);
277 }
278 
279 MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) {
280   MachineBasicBlock *&MBB = BBToMBB[&BB];
281   assert(MBB && "BasicBlock was not encountered before");
282   return *MBB;
283 }
284 
285 void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
286   assert(NewPred && "new predecessor must be a real MachineBasicBlock");
287   MachinePreds[Edge].push_back(NewPred);
288 }
289 
290 bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
291                                      MachineIRBuilder &MIRBuilder) {
292   // Get or create a virtual register for each value.
293   // Unless the value is a Constant => loadimm cst?
294   // or inline constant each time?
295   // Creation of a virtual register needs to have a size.
296   Register Op0 = getOrCreateVReg(*U.getOperand(0));
297   Register Op1 = getOrCreateVReg(*U.getOperand(1));
298   Register Res = getOrCreateVReg(U);
299   uint16_t Flags = 0;
300   if (isa<Instruction>(U)) {
301     const Instruction &I = cast<Instruction>(U);
302     Flags = MachineInstr::copyFlagsFromInstruction(I);
303   }
304 
305   MIRBuilder.buildInstr(Opcode, {Res}, {Op0, Op1}, Flags);
306   return true;
307 }
308 
309 bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
310                                     MachineIRBuilder &MIRBuilder) {
311   Register Op0 = getOrCreateVReg(*U.getOperand(0));
312   Register Res = getOrCreateVReg(U);
313   uint16_t Flags = 0;
314   if (isa<Instruction>(U)) {
315     const Instruction &I = cast<Instruction>(U);
316     Flags = MachineInstr::copyFlagsFromInstruction(I);
317   }
318   MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags);
319   return true;
320 }
321 
322 bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
323   return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder);
324 }
325 
326 bool IRTranslator::translateCompare(const User &U,
327                                     MachineIRBuilder &MIRBuilder) {
328   auto *CI = dyn_cast<CmpInst>(&U);
329   Register Op0 = getOrCreateVReg(*U.getOperand(0));
330   Register Op1 = getOrCreateVReg(*U.getOperand(1));
331   Register Res = getOrCreateVReg(U);
332   CmpInst::Predicate Pred =
333       CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
334                                     cast<ConstantExpr>(U).getPredicate());
335   if (CmpInst::isIntPredicate(Pred))
336     MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
337   else if (Pred == CmpInst::FCMP_FALSE)
338     MIRBuilder.buildCopy(
339         Res, getOrCreateVReg(*Constant::getNullValue(U.getType())));
340   else if (Pred == CmpInst::FCMP_TRUE)
341     MIRBuilder.buildCopy(
342         Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
343   else {
344     assert(CI && "Instruction should be CmpInst");
345     MIRBuilder.buildFCmp(Pred, Res, Op0, Op1,
346                          MachineInstr::copyFlagsFromInstruction(*CI));
347   }
348 
349   return true;
350 }
351 
352 bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
353   const ReturnInst &RI = cast<ReturnInst>(U);
354   const Value *Ret = RI.getReturnValue();
355   if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
356     Ret = nullptr;
357 
358   ArrayRef<Register> VRegs;
359   if (Ret)
360     VRegs = getOrCreateVRegs(*Ret);
361 
362   Register SwiftErrorVReg = 0;
363   if (CLI->supportSwiftError() && SwiftError.getFunctionArg()) {
364     SwiftErrorVReg = SwiftError.getOrCreateVRegUseAt(
365         &RI, &MIRBuilder.getMBB(), SwiftError.getFunctionArg());
366   }
367 
368   // The target may mess up with the insertion point, but
369   // this is not important as a return is the last instruction
370   // of the block anyway.
371   return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg);
372 }
373 
374 void IRTranslator::emitBranchForMergedCondition(
375     const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
376     MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
377     BranchProbability TProb, BranchProbability FProb, bool InvertCond) {
378   // If the leaf of the tree is a comparison, merge the condition into
379   // the caseblock.
380   if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
381     CmpInst::Predicate Condition;
382     if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
383       Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate();
384     } else {
385       const FCmpInst *FC = cast<FCmpInst>(Cond);
386       Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate();
387     }
388 
389     SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0),
390                            BOp->getOperand(1), nullptr, TBB, FBB, CurBB,
391                            CurBuilder->getDebugLoc(), TProb, FProb);
392     SL->SwitchCases.push_back(CB);
393     return;
394   }
395 
396   // Create a CaseBlock record representing this branch.
397   CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
398   SwitchCG::CaseBlock CB(
399       Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()),
400       nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb);
401   SL->SwitchCases.push_back(CB);
402 }
403 
404 static bool isValInBlock(const Value *V, const BasicBlock *BB) {
405   if (const Instruction *I = dyn_cast<Instruction>(V))
406     return I->getParent() == BB;
407   return true;
408 }
409 
410 void IRTranslator::findMergedConditions(
411     const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
412     MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
413     Instruction::BinaryOps Opc, BranchProbability TProb,
414     BranchProbability FProb, bool InvertCond) {
415   using namespace PatternMatch;
416   assert((Opc == Instruction::And || Opc == Instruction::Or) &&
417          "Expected Opc to be AND/OR");
418   // Skip over not part of the tree and remember to invert op and operands at
419   // next level.
420   Value *NotCond;
421   if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
422       isValInBlock(NotCond, CurBB->getBasicBlock())) {
423     findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
424                          !InvertCond);
425     return;
426   }
427 
428   const Instruction *BOp = dyn_cast<Instruction>(Cond);
429   const Value *BOpOp0, *BOpOp1;
430   // Compute the effective opcode for Cond, taking into account whether it needs
431   // to be inverted, e.g.
432   //   and (not (or A, B)), C
433   // gets lowered as
434   //   and (and (not A, not B), C)
435   Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
436   if (BOp) {
437     BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
438                ? Instruction::And
439                : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
440                       ? Instruction::Or
441                       : (Instruction::BinaryOps)0);
442     if (InvertCond) {
443       if (BOpc == Instruction::And)
444         BOpc = Instruction::Or;
445       else if (BOpc == Instruction::Or)
446         BOpc = Instruction::And;
447     }
448   }
449 
450   // If this node is not part of the or/and tree, emit it as a branch.
451   // Note that all nodes in the tree should have same opcode.
452   bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
453   if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
454       !isValInBlock(BOpOp0, CurBB->getBasicBlock()) ||
455       !isValInBlock(BOpOp1, CurBB->getBasicBlock())) {
456     emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb,
457                                  InvertCond);
458     return;
459   }
460 
461   //  Create TmpBB after CurBB.
462   MachineFunction::iterator BBI(CurBB);
463   MachineBasicBlock *TmpBB =
464       MF->CreateMachineBasicBlock(CurBB->getBasicBlock());
465   CurBB->getParent()->insert(++BBI, TmpBB);
466 
467   if (Opc == Instruction::Or) {
468     // Codegen X | Y as:
469     // BB1:
470     //   jmp_if_X TBB
471     //   jmp TmpBB
472     // TmpBB:
473     //   jmp_if_Y TBB
474     //   jmp FBB
475     //
476 
477     // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
478     // The requirement is that
479     //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
480     //     = TrueProb for original BB.
481     // Assuming the original probabilities are A and B, one choice is to set
482     // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
483     // A/(1+B) and 2B/(1+B). This choice assumes that
484     //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
485     // Another choice is to assume TrueProb for BB1 equals to TrueProb for
486     // TmpBB, but the math is more complicated.
487 
488     auto NewTrueProb = TProb / 2;
489     auto NewFalseProb = TProb / 2 + FProb;
490     // Emit the LHS condition.
491     findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
492                          NewFalseProb, InvertCond);
493 
494     // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
495     SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
496     BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
497     // Emit the RHS condition into TmpBB.
498     findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
499                          Probs[1], InvertCond);
500   } else {
501     assert(Opc == Instruction::And && "Unknown merge op!");
502     // Codegen X & Y as:
503     // BB1:
504     //   jmp_if_X TmpBB
505     //   jmp FBB
506     // TmpBB:
507     //   jmp_if_Y TBB
508     //   jmp FBB
509     //
510     //  This requires creation of TmpBB after CurBB.
511 
512     // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
513     // The requirement is that
514     //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
515     //     = FalseProb for original BB.
516     // Assuming the original probabilities are A and B, one choice is to set
517     // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
518     // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
519     // TrueProb for BB1 * FalseProb for TmpBB.
520 
521     auto NewTrueProb = TProb + FProb / 2;
522     auto NewFalseProb = FProb / 2;
523     // Emit the LHS condition.
524     findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
525                          NewFalseProb, InvertCond);
526 
527     // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
528     SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
529     BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
530     // Emit the RHS condition into TmpBB.
531     findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
532                          Probs[1], InvertCond);
533   }
534 }
535 
536 bool IRTranslator::shouldEmitAsBranches(
537     const std::vector<SwitchCG::CaseBlock> &Cases) {
538   // For multiple cases, it's better to emit as branches.
539   if (Cases.size() != 2)
540     return true;
541 
542   // If this is two comparisons of the same values or'd or and'd together, they
543   // will get folded into a single comparison, so don't emit two blocks.
544   if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
545        Cases[0].CmpRHS == Cases[1].CmpRHS) ||
546       (Cases[0].CmpRHS == Cases[1].CmpLHS &&
547        Cases[0].CmpLHS == Cases[1].CmpRHS)) {
548     return false;
549   }
550 
551   // Handle: (X != null) | (Y != null) --> (X|Y) != 0
552   // Handle: (X == null) & (Y == null) --> (X|Y) == 0
553   if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
554       Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred &&
555       isa<Constant>(Cases[0].CmpRHS) &&
556       cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
557     if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ &&
558         Cases[0].TrueBB == Cases[1].ThisBB)
559       return false;
560     if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE &&
561         Cases[0].FalseBB == Cases[1].ThisBB)
562       return false;
563   }
564 
565   return true;
566 }
567 
568 bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
569   const BranchInst &BrInst = cast<BranchInst>(U);
570   auto &CurMBB = MIRBuilder.getMBB();
571   auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0));
572 
573   if (BrInst.isUnconditional()) {
574     // If the unconditional target is the layout successor, fallthrough.
575     if (!CurMBB.isLayoutSuccessor(Succ0MBB))
576       MIRBuilder.buildBr(*Succ0MBB);
577 
578     // Link successors.
579     for (const BasicBlock *Succ : successors(&BrInst))
580       CurMBB.addSuccessor(&getMBB(*Succ));
581     return true;
582   }
583 
584   // If this condition is one of the special cases we handle, do special stuff
585   // now.
586   const Value *CondVal = BrInst.getCondition();
587   MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1));
588 
589   const auto &TLI = *MF->getSubtarget().getTargetLowering();
590 
591   // If this is a series of conditions that are or'd or and'd together, emit
592   // this as a sequence of branches instead of setcc's with and/or operations.
593   // As long as jumps are not expensive (exceptions for multi-use logic ops,
594   // unpredictable branches, and vector extracts because those jumps are likely
595   // expensive for any target), this should improve performance.
596   // For example, instead of something like:
597   //     cmp A, B
598   //     C = seteq
599   //     cmp D, E
600   //     F = setle
601   //     or C, F
602   //     jnz foo
603   // Emit:
604   //     cmp A, B
605   //     je foo
606   //     cmp D, E
607   //     jle foo
608   using namespace PatternMatch;
609   const Instruction *CondI = dyn_cast<Instruction>(CondVal);
610   if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() &&
611       !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) {
612     Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
613     Value *Vec;
614     const Value *BOp0, *BOp1;
615     if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
616       Opcode = Instruction::And;
617     else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
618       Opcode = Instruction::Or;
619 
620     if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
621                     match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
622       findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode,
623                            getEdgeProbability(&CurMBB, Succ0MBB),
624                            getEdgeProbability(&CurMBB, Succ1MBB),
625                            /*InvertCond=*/false);
626       assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!");
627 
628       // Allow some cases to be rejected.
629       if (shouldEmitAsBranches(SL->SwitchCases)) {
630         // Emit the branch for this block.
631         emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder);
632         SL->SwitchCases.erase(SL->SwitchCases.begin());
633         return true;
634       }
635 
636       // Okay, we decided not to do this, remove any inserted MBB's and clear
637       // SwitchCases.
638       for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I)
639         MF->erase(SL->SwitchCases[I].ThisBB);
640 
641       SL->SwitchCases.clear();
642     }
643   }
644 
645   // Create a CaseBlock record representing this branch.
646   SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal,
647                          ConstantInt::getTrue(MF->getFunction().getContext()),
648                          nullptr, Succ0MBB, Succ1MBB, &CurMBB,
649                          CurBuilder->getDebugLoc());
650 
651   // Use emitSwitchCase to actually insert the fast branch sequence for this
652   // cond branch.
653   emitSwitchCase(CB, &CurMBB, *CurBuilder);
654   return true;
655 }
656 
657 void IRTranslator::addSuccessorWithProb(MachineBasicBlock *Src,
658                                         MachineBasicBlock *Dst,
659                                         BranchProbability Prob) {
660   if (!FuncInfo.BPI) {
661     Src->addSuccessorWithoutProb(Dst);
662     return;
663   }
664   if (Prob.isUnknown())
665     Prob = getEdgeProbability(Src, Dst);
666   Src->addSuccessor(Dst, Prob);
667 }
668 
669 BranchProbability
670 IRTranslator::getEdgeProbability(const MachineBasicBlock *Src,
671                                  const MachineBasicBlock *Dst) const {
672   const BasicBlock *SrcBB = Src->getBasicBlock();
673   const BasicBlock *DstBB = Dst->getBasicBlock();
674   if (!FuncInfo.BPI) {
675     // If BPI is not available, set the default probability as 1 / N, where N is
676     // the number of successors.
677     auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
678     return BranchProbability(1, SuccSize);
679   }
680   return FuncInfo.BPI->getEdgeProbability(SrcBB, DstBB);
681 }
682 
683 bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
684   using namespace SwitchCG;
685   // Extract cases from the switch.
686   const SwitchInst &SI = cast<SwitchInst>(U);
687   BranchProbabilityInfo *BPI = FuncInfo.BPI;
688   CaseClusterVector Clusters;
689   Clusters.reserve(SI.getNumCases());
690   for (auto &I : SI.cases()) {
691     MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor());
692     assert(Succ && "Could not find successor mbb in mapping");
693     const ConstantInt *CaseVal = I.getCaseValue();
694     BranchProbability Prob =
695         BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
696             : BranchProbability(1, SI.getNumCases() + 1);
697     Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
698   }
699 
700   MachineBasicBlock *DefaultMBB = &getMBB(*SI.getDefaultDest());
701 
702   // Cluster adjacent cases with the same destination. We do this at all
703   // optimization levels because it's cheap to do and will make codegen faster
704   // if there are many clusters.
705   sortAndRangeify(Clusters);
706 
707   MachineBasicBlock *SwitchMBB = &getMBB(*SI.getParent());
708 
709   // If there is only the default destination, jump there directly.
710   if (Clusters.empty()) {
711     SwitchMBB->addSuccessor(DefaultMBB);
712     if (DefaultMBB != SwitchMBB->getNextNode())
713       MIB.buildBr(*DefaultMBB);
714     return true;
715   }
716 
717   SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
718   SL->findBitTestClusters(Clusters, &SI);
719 
720   LLVM_DEBUG({
721     dbgs() << "Case clusters: ";
722     for (const CaseCluster &C : Clusters) {
723       if (C.Kind == CC_JumpTable)
724         dbgs() << "JT:";
725       if (C.Kind == CC_BitTests)
726         dbgs() << "BT:";
727 
728       C.Low->getValue().print(dbgs(), true);
729       if (C.Low != C.High) {
730         dbgs() << '-';
731         C.High->getValue().print(dbgs(), true);
732       }
733       dbgs() << ' ';
734     }
735     dbgs() << '\n';
736   });
737 
738   assert(!Clusters.empty());
739   SwitchWorkList WorkList;
740   CaseClusterIt First = Clusters.begin();
741   CaseClusterIt Last = Clusters.end() - 1;
742   auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
743   WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
744 
745   // FIXME: At the moment we don't do any splitting optimizations here like
746   // SelectionDAG does, so this worklist only has one entry.
747   while (!WorkList.empty()) {
748     SwitchWorkListItem W = WorkList.back();
749     WorkList.pop_back();
750     if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
751       return false;
752   }
753   return true;
754 }
755 
756 void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
757                                  MachineBasicBlock *MBB) {
758   // Emit the code for the jump table
759   assert(JT.Reg != -1U && "Should lower JT Header first!");
760   MachineIRBuilder MIB(*MBB->getParent());
761   MIB.setMBB(*MBB);
762   MIB.setDebugLoc(CurBuilder->getDebugLoc());
763 
764   Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
765   const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
766 
767   auto Table = MIB.buildJumpTable(PtrTy, JT.JTI);
768   MIB.buildBrJT(Table.getReg(0), JT.JTI, JT.Reg);
769 }
770 
771 bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
772                                        SwitchCG::JumpTableHeader &JTH,
773                                        MachineBasicBlock *HeaderBB) {
774   MachineIRBuilder MIB(*HeaderBB->getParent());
775   MIB.setMBB(*HeaderBB);
776   MIB.setDebugLoc(CurBuilder->getDebugLoc());
777 
778   const Value &SValue = *JTH.SValue;
779   // Subtract the lowest switch case value from the value being switched on.
780   const LLT SwitchTy = getLLTForType(*SValue.getType(), *DL);
781   Register SwitchOpReg = getOrCreateVReg(SValue);
782   auto FirstCst = MIB.buildConstant(SwitchTy, JTH.First);
783   auto Sub = MIB.buildSub({SwitchTy}, SwitchOpReg, FirstCst);
784 
785   // This value may be smaller or larger than the target's pointer type, and
786   // therefore require extension or truncating.
787   Type *PtrIRTy = SValue.getType()->getPointerTo();
788   const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy));
789   Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub);
790 
791   JT.Reg = Sub.getReg(0);
792 
793   if (JTH.OmitRangeCheck) {
794     if (JT.MBB != HeaderBB->getNextNode())
795       MIB.buildBr(*JT.MBB);
796     return true;
797   }
798 
799   // Emit the range check for the jump table, and branch to the default block
800   // for the switch statement if the value being switched on exceeds the
801   // largest case in the switch.
802   auto Cst = getOrCreateVReg(
803       *ConstantInt::get(SValue.getType(), JTH.Last - JTH.First));
804   Cst = MIB.buildZExtOrTrunc(PtrScalarTy, Cst).getReg(0);
805   auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, LLT::scalar(1), Sub, Cst);
806 
807   auto BrCond = MIB.buildBrCond(Cmp.getReg(0), *JT.Default);
808 
809   // Avoid emitting unnecessary branches to the next block.
810   if (JT.MBB != HeaderBB->getNextNode())
811     BrCond = MIB.buildBr(*JT.MBB);
812   return true;
813 }
814 
815 void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
816                                   MachineBasicBlock *SwitchBB,
817                                   MachineIRBuilder &MIB) {
818   Register CondLHS = getOrCreateVReg(*CB.CmpLHS);
819   Register Cond;
820   DebugLoc OldDbgLoc = MIB.getDebugLoc();
821   MIB.setDebugLoc(CB.DbgLoc);
822   MIB.setMBB(*CB.ThisBB);
823 
824   if (CB.PredInfo.NoCmp) {
825     // Branch or fall through to TrueBB.
826     addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
827     addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
828                       CB.ThisBB);
829     CB.ThisBB->normalizeSuccProbs();
830     if (CB.TrueBB != CB.ThisBB->getNextNode())
831       MIB.buildBr(*CB.TrueBB);
832     MIB.setDebugLoc(OldDbgLoc);
833     return;
834   }
835 
836   const LLT i1Ty = LLT::scalar(1);
837   // Build the compare.
838   if (!CB.CmpMHS) {
839     const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS);
840     // For conditional branch lowering, we might try to do something silly like
841     // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
842     // just re-use the existing condition vreg.
843     if (CI && CI->getZExtValue() == 1 &&
844         MRI->getType(CondLHS).getSizeInBits() == 1 &&
845         CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
846       Cond = CondLHS;
847     } else {
848       Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
849       if (CmpInst::isFPPredicate(CB.PredInfo.Pred))
850         Cond =
851             MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
852       else
853         Cond =
854             MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
855     }
856   } else {
857     assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
858            "Can only handle SLE ranges");
859 
860     const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
861     const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
862 
863     Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS);
864     if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
865       Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
866       Cond =
867           MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0);
868     } else {
869       const LLT CmpTy = MRI->getType(CmpOpReg);
870       auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS);
871       auto Diff = MIB.buildConstant(CmpTy, High - Low);
872       Cond = MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, Sub, Diff).getReg(0);
873     }
874   }
875 
876   // Update successor info
877   addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
878 
879   addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
880                     CB.ThisBB);
881 
882   // TrueBB and FalseBB are always different unless the incoming IR is
883   // degenerate. This only happens when running llc on weird IR.
884   if (CB.TrueBB != CB.FalseBB)
885     addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
886   CB.ThisBB->normalizeSuccProbs();
887 
888   addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
889                     CB.ThisBB);
890 
891   MIB.buildBrCond(Cond, *CB.TrueBB);
892   MIB.buildBr(*CB.FalseBB);
893   MIB.setDebugLoc(OldDbgLoc);
894 }
895 
896 bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
897                                           MachineBasicBlock *SwitchMBB,
898                                           MachineBasicBlock *CurMBB,
899                                           MachineBasicBlock *DefaultMBB,
900                                           MachineIRBuilder &MIB,
901                                           MachineFunction::iterator BBI,
902                                           BranchProbability UnhandledProbs,
903                                           SwitchCG::CaseClusterIt I,
904                                           MachineBasicBlock *Fallthrough,
905                                           bool FallthroughUnreachable) {
906   using namespace SwitchCG;
907   MachineFunction *CurMF = SwitchMBB->getParent();
908   // FIXME: Optimize away range check based on pivot comparisons.
909   JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
910   SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
911   BranchProbability DefaultProb = W.DefaultProb;
912 
913   // The jump block hasn't been inserted yet; insert it here.
914   MachineBasicBlock *JumpMBB = JT->MBB;
915   CurMF->insert(BBI, JumpMBB);
916 
917   // Since the jump table block is separate from the switch block, we need
918   // to keep track of it as a machine predecessor to the default block,
919   // otherwise we lose the phi edges.
920   addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
921                     CurMBB);
922   addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
923                     JumpMBB);
924 
925   auto JumpProb = I->Prob;
926   auto FallthroughProb = UnhandledProbs;
927 
928   // If the default statement is a target of the jump table, we evenly
929   // distribute the default probability to successors of CurMBB. Also
930   // update the probability on the edge from JumpMBB to Fallthrough.
931   for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
932                                         SE = JumpMBB->succ_end();
933        SI != SE; ++SI) {
934     if (*SI == DefaultMBB) {
935       JumpProb += DefaultProb / 2;
936       FallthroughProb -= DefaultProb / 2;
937       JumpMBB->setSuccProbability(SI, DefaultProb / 2);
938       JumpMBB->normalizeSuccProbs();
939     } else {
940       // Also record edges from the jump table block to it's successors.
941       addMachineCFGPred({SwitchMBB->getBasicBlock(), (*SI)->getBasicBlock()},
942                         JumpMBB);
943     }
944   }
945 
946   // Skip the range check if the fallthrough block is unreachable.
947   if (FallthroughUnreachable)
948     JTH->OmitRangeCheck = true;
949 
950   if (!JTH->OmitRangeCheck)
951     addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
952   addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
953   CurMBB->normalizeSuccProbs();
954 
955   // The jump table header will be inserted in our current block, do the
956   // range check, and fall through to our fallthrough block.
957   JTH->HeaderBB = CurMBB;
958   JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
959 
960   // If we're in the right place, emit the jump table header right now.
961   if (CurMBB == SwitchMBB) {
962     if (!emitJumpTableHeader(*JT, *JTH, CurMBB))
963       return false;
964     JTH->Emitted = true;
965   }
966   return true;
967 }
968 bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
969                                             Value *Cond,
970                                             MachineBasicBlock *Fallthrough,
971                                             bool FallthroughUnreachable,
972                                             BranchProbability UnhandledProbs,
973                                             MachineBasicBlock *CurMBB,
974                                             MachineIRBuilder &MIB,
975                                             MachineBasicBlock *SwitchMBB) {
976   using namespace SwitchCG;
977   const Value *RHS, *LHS, *MHS;
978   CmpInst::Predicate Pred;
979   if (I->Low == I->High) {
980     // Check Cond == I->Low.
981     Pred = CmpInst::ICMP_EQ;
982     LHS = Cond;
983     RHS = I->Low;
984     MHS = nullptr;
985   } else {
986     // Check I->Low <= Cond <= I->High.
987     Pred = CmpInst::ICMP_SLE;
988     LHS = I->Low;
989     MHS = Cond;
990     RHS = I->High;
991   }
992 
993   // If Fallthrough is unreachable, fold away the comparison.
994   // The false probability is the sum of all unhandled cases.
995   CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough,
996                CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs);
997 
998   emitSwitchCase(CB, SwitchMBB, MIB);
999   return true;
1000 }
1001 
1002 void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
1003                                      MachineBasicBlock *SwitchBB) {
1004   MachineIRBuilder &MIB = *CurBuilder;
1005   MIB.setMBB(*SwitchBB);
1006 
1007   // Subtract the minimum value.
1008   Register SwitchOpReg = getOrCreateVReg(*B.SValue);
1009 
1010   LLT SwitchOpTy = MRI->getType(SwitchOpReg);
1011   Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
1012   auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
1013 
1014   // Ensure that the type will fit the mask value.
1015   LLT MaskTy = SwitchOpTy;
1016   for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
1017     if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
1018       // Switch table case range are encoded into series of masks.
1019       // Just use pointer type, it's guaranteed to fit.
1020       MaskTy = LLT::scalar(64);
1021       break;
1022     }
1023   }
1024   Register SubReg = RangeSub.getReg(0);
1025   if (SwitchOpTy != MaskTy)
1026     SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0);
1027 
1028   B.RegVT = getMVTForLLT(MaskTy);
1029   B.Reg = SubReg;
1030 
1031   MachineBasicBlock *MBB = B.Cases[0].ThisBB;
1032 
1033   if (!B.OmitRangeCheck)
1034     addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
1035   addSuccessorWithProb(SwitchBB, MBB, B.Prob);
1036 
1037   SwitchBB->normalizeSuccProbs();
1038 
1039   if (!B.OmitRangeCheck) {
1040     // Conditional branch to the default block.
1041     auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
1042     auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
1043                                   RangeSub, RangeCst);
1044     MIB.buildBrCond(RangeCmp, *B.Default);
1045   }
1046 
1047   // Avoid emitting unnecessary branches to the next block.
1048   if (MBB != SwitchBB->getNextNode())
1049     MIB.buildBr(*MBB);
1050 }
1051 
1052 void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
1053                                    MachineBasicBlock *NextMBB,
1054                                    BranchProbability BranchProbToNext,
1055                                    Register Reg, SwitchCG::BitTestCase &B,
1056                                    MachineBasicBlock *SwitchBB) {
1057   MachineIRBuilder &MIB = *CurBuilder;
1058   MIB.setMBB(*SwitchBB);
1059 
1060   LLT SwitchTy = getLLTForMVT(BB.RegVT);
1061   Register Cmp;
1062   unsigned PopCount = countPopulation(B.Mask);
1063   if (PopCount == 1) {
1064     // Testing for a single bit; just compare the shift count with what it
1065     // would need to be to shift a 1 bit in that position.
1066     auto MaskTrailingZeros =
1067         MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask));
1068     Cmp =
1069         MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
1070             .getReg(0);
1071   } else if (PopCount == BB.Range) {
1072     // There is only one zero bit in the range, test for it directly.
1073     auto MaskTrailingOnes =
1074         MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask));
1075     Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
1076               .getReg(0);
1077   } else {
1078     // Make desired shift.
1079     auto CstOne = MIB.buildConstant(SwitchTy, 1);
1080     auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg);
1081 
1082     // Emit bit tests and jumps.
1083     auto CstMask = MIB.buildConstant(SwitchTy, B.Mask);
1084     auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask);
1085     auto CstZero = MIB.buildConstant(SwitchTy, 0);
1086     Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero)
1087               .getReg(0);
1088   }
1089 
1090   // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
1091   addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
1092   // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
1093   addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
1094   // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
1095   // one as they are relative probabilities (and thus work more like weights),
1096   // and hence we need to normalize them to let the sum of them become one.
1097   SwitchBB->normalizeSuccProbs();
1098 
1099   // Record the fact that the IR edge from the header to the bit test target
1100   // will go through our new block. Neeeded for PHIs to have nodes added.
1101   addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()},
1102                     SwitchBB);
1103 
1104   MIB.buildBrCond(Cmp, *B.TargetBB);
1105 
1106   // Avoid emitting unnecessary branches to the next block.
1107   if (NextMBB != SwitchBB->getNextNode())
1108     MIB.buildBr(*NextMBB);
1109 }
1110 
1111 bool IRTranslator::lowerBitTestWorkItem(
1112     SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
1113     MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
1114     MachineIRBuilder &MIB, MachineFunction::iterator BBI,
1115     BranchProbability DefaultProb, BranchProbability UnhandledProbs,
1116     SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
1117     bool FallthroughUnreachable) {
1118   using namespace SwitchCG;
1119   MachineFunction *CurMF = SwitchMBB->getParent();
1120   // FIXME: Optimize away range check based on pivot comparisons.
1121   BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
1122   // The bit test blocks haven't been inserted yet; insert them here.
1123   for (BitTestCase &BTC : BTB->Cases)
1124     CurMF->insert(BBI, BTC.ThisBB);
1125 
1126   // Fill in fields of the BitTestBlock.
1127   BTB->Parent = CurMBB;
1128   BTB->Default = Fallthrough;
1129 
1130   BTB->DefaultProb = UnhandledProbs;
1131   // If the cases in bit test don't form a contiguous range, we evenly
1132   // distribute the probability on the edge to Fallthrough to two
1133   // successors of CurMBB.
1134   if (!BTB->ContiguousRange) {
1135     BTB->Prob += DefaultProb / 2;
1136     BTB->DefaultProb -= DefaultProb / 2;
1137   }
1138 
1139   if (FallthroughUnreachable) {
1140     // Skip the range check if the fallthrough block is unreachable.
1141     BTB->OmitRangeCheck = true;
1142   }
1143 
1144   // If we're in the right place, emit the bit test header right now.
1145   if (CurMBB == SwitchMBB) {
1146     emitBitTestHeader(*BTB, SwitchMBB);
1147     BTB->Emitted = true;
1148   }
1149   return true;
1150 }
1151 
1152 bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
1153                                        Value *Cond,
1154                                        MachineBasicBlock *SwitchMBB,
1155                                        MachineBasicBlock *DefaultMBB,
1156                                        MachineIRBuilder &MIB) {
1157   using namespace SwitchCG;
1158   MachineFunction *CurMF = FuncInfo.MF;
1159   MachineBasicBlock *NextMBB = nullptr;
1160   MachineFunction::iterator BBI(W.MBB);
1161   if (++BBI != FuncInfo.MF->end())
1162     NextMBB = &*BBI;
1163 
1164   if (EnableOpts) {
1165     // Here, we order cases by probability so the most likely case will be
1166     // checked first. However, two clusters can have the same probability in
1167     // which case their relative ordering is non-deterministic. So we use Low
1168     // as a tie-breaker as clusters are guaranteed to never overlap.
1169     llvm::sort(W.FirstCluster, W.LastCluster + 1,
1170                [](const CaseCluster &a, const CaseCluster &b) {
1171                  return a.Prob != b.Prob
1172                             ? a.Prob > b.Prob
1173                             : a.Low->getValue().slt(b.Low->getValue());
1174                });
1175 
1176     // Rearrange the case blocks so that the last one falls through if possible
1177     // without changing the order of probabilities.
1178     for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster;) {
1179       --I;
1180       if (I->Prob > W.LastCluster->Prob)
1181         break;
1182       if (I->Kind == CC_Range && I->MBB == NextMBB) {
1183         std::swap(*I, *W.LastCluster);
1184         break;
1185       }
1186     }
1187   }
1188 
1189   // Compute total probability.
1190   BranchProbability DefaultProb = W.DefaultProb;
1191   BranchProbability UnhandledProbs = DefaultProb;
1192   for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
1193     UnhandledProbs += I->Prob;
1194 
1195   MachineBasicBlock *CurMBB = W.MBB;
1196   for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
1197     bool FallthroughUnreachable = false;
1198     MachineBasicBlock *Fallthrough;
1199     if (I == W.LastCluster) {
1200       // For the last cluster, fall through to the default destination.
1201       Fallthrough = DefaultMBB;
1202       FallthroughUnreachable = isa<UnreachableInst>(
1203           DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
1204     } else {
1205       Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
1206       CurMF->insert(BBI, Fallthrough);
1207     }
1208     UnhandledProbs -= I->Prob;
1209 
1210     switch (I->Kind) {
1211     case CC_BitTests: {
1212       if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
1213                                 DefaultProb, UnhandledProbs, I, Fallthrough,
1214                                 FallthroughUnreachable)) {
1215         LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch");
1216         return false;
1217       }
1218       break;
1219     }
1220 
1221     case CC_JumpTable: {
1222       if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
1223                                   UnhandledProbs, I, Fallthrough,
1224                                   FallthroughUnreachable)) {
1225         LLVM_DEBUG(dbgs() << "Failed to lower jump table");
1226         return false;
1227       }
1228       break;
1229     }
1230     case CC_Range: {
1231       if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough,
1232                                     FallthroughUnreachable, UnhandledProbs,
1233                                     CurMBB, MIB, SwitchMBB)) {
1234         LLVM_DEBUG(dbgs() << "Failed to lower switch range");
1235         return false;
1236       }
1237       break;
1238     }
1239     }
1240     CurMBB = Fallthrough;
1241   }
1242 
1243   return true;
1244 }
1245 
1246 bool IRTranslator::translateIndirectBr(const User &U,
1247                                        MachineIRBuilder &MIRBuilder) {
1248   const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);
1249 
1250   const Register Tgt = getOrCreateVReg(*BrInst.getAddress());
1251   MIRBuilder.buildBrIndirect(Tgt);
1252 
1253   // Link successors.
1254   SmallPtrSet<const BasicBlock *, 32> AddedSuccessors;
1255   MachineBasicBlock &CurBB = MIRBuilder.getMBB();
1256   for (const BasicBlock *Succ : successors(&BrInst)) {
1257     // It's legal for indirectbr instructions to have duplicate blocks in the
1258     // destination list. We don't allow this in MIR. Skip anything that's
1259     // already a successor.
1260     if (!AddedSuccessors.insert(Succ).second)
1261       continue;
1262     CurBB.addSuccessor(&getMBB(*Succ));
1263   }
1264 
1265   return true;
1266 }
1267 
1268 static bool isSwiftError(const Value *V) {
1269   if (auto Arg = dyn_cast<Argument>(V))
1270     return Arg->hasSwiftErrorAttr();
1271   if (auto AI = dyn_cast<AllocaInst>(V))
1272     return AI->isSwiftError();
1273   return false;
1274 }
1275 
1276 bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
1277   const LoadInst &LI = cast<LoadInst>(U);
1278   if (DL->getTypeStoreSize(LI.getType()) == 0)
1279     return true;
1280 
1281   ArrayRef<Register> Regs = getOrCreateVRegs(LI);
1282   ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
1283   Register Base = getOrCreateVReg(*LI.getPointerOperand());
1284 
1285   Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType());
1286   LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
1287 
1288   if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) {
1289     assert(Regs.size() == 1 && "swifterror should be single pointer");
1290     Register VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(),
1291                                                     LI.getPointerOperand());
1292     MIRBuilder.buildCopy(Regs[0], VReg);
1293     return true;
1294   }
1295 
1296   auto &TLI = *MF->getSubtarget().getTargetLowering();
1297   MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL);
1298 
1299   const MDNode *Ranges =
1300       Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
1301   for (unsigned i = 0; i < Regs.size(); ++i) {
1302     Register Addr;
1303     MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
1304 
1305     MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
1306     Align BaseAlign = getMemOpAlign(LI);
1307     AAMDNodes AAMetadata;
1308     LI.getAAMetadata(AAMetadata);
1309     auto MMO = MF->getMachineMemOperand(
1310         Ptr, Flags, MRI->getType(Regs[i]).getSizeInBytes(),
1311         commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges,
1312         LI.getSyncScopeID(), LI.getOrdering());
1313     MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
1314   }
1315 
1316   return true;
1317 }
1318 
1319 bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
1320   const StoreInst &SI = cast<StoreInst>(U);
1321   if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
1322     return true;
1323 
1324   ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand());
1325   ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
1326   Register Base = getOrCreateVReg(*SI.getPointerOperand());
1327 
1328   Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType());
1329   LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
1330 
1331   if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) {
1332     assert(Vals.size() == 1 && "swifterror should be single pointer");
1333 
1334     Register VReg = SwiftError.getOrCreateVRegDefAt(&SI, &MIRBuilder.getMBB(),
1335                                                     SI.getPointerOperand());
1336     MIRBuilder.buildCopy(VReg, Vals[0]);
1337     return true;
1338   }
1339 
1340   auto &TLI = *MF->getSubtarget().getTargetLowering();
1341   MachineMemOperand::Flags Flags = TLI.getStoreMemOperandFlags(SI, *DL);
1342 
1343   for (unsigned i = 0; i < Vals.size(); ++i) {
1344     Register Addr;
1345     MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
1346 
1347     MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
1348     Align BaseAlign = getMemOpAlign(SI);
1349     AAMDNodes AAMetadata;
1350     SI.getAAMetadata(AAMetadata);
1351     auto MMO = MF->getMachineMemOperand(
1352         Ptr, Flags, MRI->getType(Vals[i]).getSizeInBytes(),
1353         commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr,
1354         SI.getSyncScopeID(), SI.getOrdering());
1355     MIRBuilder.buildStore(Vals[i], Addr, *MMO);
1356   }
1357   return true;
1358 }
1359 
1360 static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) {
1361   const Value *Src = U.getOperand(0);
1362   Type *Int32Ty = Type::getInt32Ty(U.getContext());
1363 
1364   // getIndexedOffsetInType is designed for GEPs, so the first index is the
1365   // usual array element rather than looking into the actual aggregate.
1366   SmallVector<Value *, 1> Indices;
1367   Indices.push_back(ConstantInt::get(Int32Ty, 0));
1368 
1369   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {
1370     for (auto Idx : EVI->indices())
1371       Indices.push_back(ConstantInt::get(Int32Ty, Idx));
1372   } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
1373     for (auto Idx : IVI->indices())
1374       Indices.push_back(ConstantInt::get(Int32Ty, Idx));
1375   } else {
1376     for (unsigned i = 1; i < U.getNumOperands(); ++i)
1377       Indices.push_back(U.getOperand(i));
1378   }
1379 
1380   return 8 * static_cast<uint64_t>(
1381                  DL.getIndexedOffsetInType(Src->getType(), Indices));
1382 }
1383 
1384 bool IRTranslator::translateExtractValue(const User &U,
1385                                          MachineIRBuilder &MIRBuilder) {
1386   const Value *Src = U.getOperand(0);
1387   uint64_t Offset = getOffsetFromIndices(U, *DL);
1388   ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
1389   ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src);
1390   unsigned Idx = llvm::lower_bound(Offsets, Offset) - Offsets.begin();
1391   auto &DstRegs = allocateVRegs(U);
1392 
1393   for (unsigned i = 0; i < DstRegs.size(); ++i)
1394     DstRegs[i] = SrcRegs[Idx++];
1395 
1396   return true;
1397 }
1398 
1399 bool IRTranslator::translateInsertValue(const User &U,
1400                                         MachineIRBuilder &MIRBuilder) {
1401   const Value *Src = U.getOperand(0);
1402   uint64_t Offset = getOffsetFromIndices(U, *DL);
1403   auto &DstRegs = allocateVRegs(U);
1404   ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
1405   ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
1406   ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
1407   auto InsertedIt = InsertedRegs.begin();
1408 
1409   for (unsigned i = 0; i < DstRegs.size(); ++i) {
1410     if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end())
1411       DstRegs[i] = *InsertedIt++;
1412     else
1413       DstRegs[i] = SrcRegs[i];
1414   }
1415 
1416   return true;
1417 }
1418 
1419 bool IRTranslator::translateSelect(const User &U,
1420                                    MachineIRBuilder &MIRBuilder) {
1421   Register Tst = getOrCreateVReg(*U.getOperand(0));
1422   ArrayRef<Register> ResRegs = getOrCreateVRegs(U);
1423   ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
1424   ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
1425 
1426   uint16_t Flags = 0;
1427   if (const SelectInst *SI = dyn_cast<SelectInst>(&U))
1428     Flags = MachineInstr::copyFlagsFromInstruction(*SI);
1429 
1430   for (unsigned i = 0; i < ResRegs.size(); ++i) {
1431     MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i], Flags);
1432   }
1433 
1434   return true;
1435 }
1436 
1437 bool IRTranslator::translateCopy(const User &U, const Value &V,
1438                                  MachineIRBuilder &MIRBuilder) {
1439   Register Src = getOrCreateVReg(V);
1440   auto &Regs = *VMap.getVRegs(U);
1441   if (Regs.empty()) {
1442     Regs.push_back(Src);
1443     VMap.getOffsets(U)->push_back(0);
1444   } else {
1445     // If we already assigned a vreg for this instruction, we can't change that.
1446     // Emit a copy to satisfy the users we already emitted.
1447     MIRBuilder.buildCopy(Regs[0], Src);
1448   }
1449   return true;
1450 }
1451 
1452 bool IRTranslator::translateBitCast(const User &U,
1453                                     MachineIRBuilder &MIRBuilder) {
1454   // If we're bitcasting to the source type, we can reuse the source vreg.
1455   if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
1456       getLLTForType(*U.getType(), *DL))
1457     return translateCopy(U, *U.getOperand(0), MIRBuilder);
1458 
1459   return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
1460 }
1461 
1462 bool IRTranslator::translateCast(unsigned Opcode, const User &U,
1463                                  MachineIRBuilder &MIRBuilder) {
1464   Register Op = getOrCreateVReg(*U.getOperand(0));
1465   Register Res = getOrCreateVReg(U);
1466   MIRBuilder.buildInstr(Opcode, {Res}, {Op});
1467   return true;
1468 }
1469 
1470 bool IRTranslator::translateGetElementPtr(const User &U,
1471                                           MachineIRBuilder &MIRBuilder) {
1472   Value &Op0 = *U.getOperand(0);
1473   Register BaseReg = getOrCreateVReg(Op0);
1474   Type *PtrIRTy = Op0.getType();
1475   LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
1476   Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
1477   LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
1478 
1479   // Normalize Vector GEP - all scalar operands should be converted to the
1480   // splat vector.
1481   unsigned VectorWidth = 0;
1482   if (auto *VT = dyn_cast<VectorType>(U.getType()))
1483     VectorWidth = cast<FixedVectorType>(VT)->getNumElements();
1484 
1485   // We might need to splat the base pointer into a vector if the offsets
1486   // are vectors.
1487   if (VectorWidth && !PtrTy.isVector()) {
1488     BaseReg =
1489         MIRBuilder.buildSplatVector(LLT::vector(VectorWidth, PtrTy), BaseReg)
1490             .getReg(0);
1491     PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth);
1492     PtrTy = getLLTForType(*PtrIRTy, *DL);
1493     OffsetIRTy = DL->getIntPtrType(PtrIRTy);
1494     OffsetTy = getLLTForType(*OffsetIRTy, *DL);
1495   }
1496 
1497   int64_t Offset = 0;
1498   for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
1499        GTI != E; ++GTI) {
1500     const Value *Idx = GTI.getOperand();
1501     if (StructType *StTy = GTI.getStructTypeOrNull()) {
1502       unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
1503       Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
1504       continue;
1505     } else {
1506       uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
1507 
1508       // If this is a scalar constant or a splat vector of constants,
1509       // handle it quickly.
1510       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
1511         Offset += ElementSize * CI->getSExtValue();
1512         continue;
1513       }
1514 
1515       if (Offset != 0) {
1516         auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
1517         BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0))
1518                       .getReg(0);
1519         Offset = 0;
1520       }
1521 
1522       Register IdxReg = getOrCreateVReg(*Idx);
1523       LLT IdxTy = MRI->getType(IdxReg);
1524       if (IdxTy != OffsetTy) {
1525         if (!IdxTy.isVector() && VectorWidth) {
1526           IdxReg = MIRBuilder.buildSplatVector(
1527             OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0);
1528         }
1529 
1530         IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0);
1531       }
1532 
1533       // N = N + Idx * ElementSize;
1534       // Avoid doing it for ElementSize of 1.
1535       Register GepOffsetReg;
1536       if (ElementSize != 1) {
1537         auto ElementSizeMIB = MIRBuilder.buildConstant(
1538             getLLTForType(*OffsetIRTy, *DL), ElementSize);
1539         GepOffsetReg =
1540             MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB).getReg(0);
1541       } else
1542         GepOffsetReg = IdxReg;
1543 
1544       BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg).getReg(0);
1545     }
1546   }
1547 
1548   if (Offset != 0) {
1549     auto OffsetMIB =
1550         MIRBuilder.buildConstant(OffsetTy, Offset);
1551     MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
1552     return true;
1553   }
1554 
1555   MIRBuilder.buildCopy(getOrCreateVReg(U), BaseReg);
1556   return true;
1557 }
1558 
1559 bool IRTranslator::translateMemFunc(const CallInst &CI,
1560                                     MachineIRBuilder &MIRBuilder,
1561                                     unsigned Opcode) {
1562 
1563   // If the source is undef, then just emit a nop.
1564   if (isa<UndefValue>(CI.getArgOperand(1)))
1565     return true;
1566 
1567   SmallVector<Register, 3> SrcRegs;
1568 
1569   unsigned MinPtrSize = UINT_MAX;
1570   for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) {
1571     Register SrcReg = getOrCreateVReg(**AI);
1572     LLT SrcTy = MRI->getType(SrcReg);
1573     if (SrcTy.isPointer())
1574       MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize);
1575     SrcRegs.push_back(SrcReg);
1576   }
1577 
1578   LLT SizeTy = LLT::scalar(MinPtrSize);
1579 
1580   // The size operand should be the minimum of the pointer sizes.
1581   Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1];
1582   if (MRI->getType(SizeOpReg) != SizeTy)
1583     SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0);
1584 
1585   auto ICall = MIRBuilder.buildInstr(Opcode);
1586   for (Register SrcReg : SrcRegs)
1587     ICall.addUse(SrcReg);
1588 
1589   Align DstAlign;
1590   Align SrcAlign;
1591   unsigned IsVol =
1592       cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1))
1593           ->getZExtValue();
1594 
1595   if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
1596     DstAlign = MCI->getDestAlign().valueOrOne();
1597     SrcAlign = MCI->getSourceAlign().valueOrOne();
1598   } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
1599     DstAlign = MMI->getDestAlign().valueOrOne();
1600     SrcAlign = MMI->getSourceAlign().valueOrOne();
1601   } else {
1602     auto *MSI = cast<MemSetInst>(&CI);
1603     DstAlign = MSI->getDestAlign().valueOrOne();
1604   }
1605 
1606   // We need to propagate the tail call flag from the IR inst as an argument.
1607   // Otherwise, we have to pessimize and assume later that we cannot tail call
1608   // any memory intrinsics.
1609   ICall.addImm(CI.isTailCall() ? 1 : 0);
1610 
1611   // Create mem operands to store the alignment and volatile info.
1612   auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
1613   ICall.addMemOperand(MF->getMachineMemOperand(
1614       MachinePointerInfo(CI.getArgOperand(0)),
1615       MachineMemOperand::MOStore | VolFlag, 1, DstAlign));
1616   if (Opcode != TargetOpcode::G_MEMSET)
1617     ICall.addMemOperand(MF->getMachineMemOperand(
1618         MachinePointerInfo(CI.getArgOperand(1)),
1619         MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign));
1620 
1621   return true;
1622 }
1623 
1624 void IRTranslator::getStackGuard(Register DstReg,
1625                                  MachineIRBuilder &MIRBuilder) {
1626   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1627   MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
1628   auto MIB =
1629       MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {});
1630 
1631   auto &TLI = *MF->getSubtarget().getTargetLowering();
1632   Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent());
1633   if (!Global)
1634     return;
1635 
1636   MachinePointerInfo MPInfo(Global);
1637   auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
1638                MachineMemOperand::MODereferenceable;
1639   MachineMemOperand *MemRef =
1640       MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
1641                                DL->getPointerABIAlignment(0));
1642   MIB.setMemRefs({MemRef});
1643 }
1644 
1645 bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
1646                                               MachineIRBuilder &MIRBuilder) {
1647   ArrayRef<Register> ResRegs = getOrCreateVRegs(CI);
1648   MIRBuilder.buildInstr(
1649       Op, {ResRegs[0], ResRegs[1]},
1650       {getOrCreateVReg(*CI.getOperand(0)), getOrCreateVReg(*CI.getOperand(1))});
1651 
1652   return true;
1653 }
1654 
1655 bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
1656                                                 MachineIRBuilder &MIRBuilder) {
1657   Register Dst = getOrCreateVReg(CI);
1658   Register Src0 = getOrCreateVReg(*CI.getOperand(0));
1659   Register Src1 = getOrCreateVReg(*CI.getOperand(1));
1660   uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
1661   MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale });
1662   return true;
1663 }
1664 
1665 unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
1666   switch (ID) {
1667     default:
1668       break;
1669     case Intrinsic::bswap:
1670       return TargetOpcode::G_BSWAP;
1671     case Intrinsic::bitreverse:
1672       return TargetOpcode::G_BITREVERSE;
1673     case Intrinsic::fshl:
1674       return TargetOpcode::G_FSHL;
1675     case Intrinsic::fshr:
1676       return TargetOpcode::G_FSHR;
1677     case Intrinsic::ceil:
1678       return TargetOpcode::G_FCEIL;
1679     case Intrinsic::cos:
1680       return TargetOpcode::G_FCOS;
1681     case Intrinsic::ctpop:
1682       return TargetOpcode::G_CTPOP;
1683     case Intrinsic::exp:
1684       return TargetOpcode::G_FEXP;
1685     case Intrinsic::exp2:
1686       return TargetOpcode::G_FEXP2;
1687     case Intrinsic::fabs:
1688       return TargetOpcode::G_FABS;
1689     case Intrinsic::copysign:
1690       return TargetOpcode::G_FCOPYSIGN;
1691     case Intrinsic::minnum:
1692       return TargetOpcode::G_FMINNUM;
1693     case Intrinsic::maxnum:
1694       return TargetOpcode::G_FMAXNUM;
1695     case Intrinsic::minimum:
1696       return TargetOpcode::G_FMINIMUM;
1697     case Intrinsic::maximum:
1698       return TargetOpcode::G_FMAXIMUM;
1699     case Intrinsic::canonicalize:
1700       return TargetOpcode::G_FCANONICALIZE;
1701     case Intrinsic::floor:
1702       return TargetOpcode::G_FFLOOR;
1703     case Intrinsic::fma:
1704       return TargetOpcode::G_FMA;
1705     case Intrinsic::log:
1706       return TargetOpcode::G_FLOG;
1707     case Intrinsic::log2:
1708       return TargetOpcode::G_FLOG2;
1709     case Intrinsic::log10:
1710       return TargetOpcode::G_FLOG10;
1711     case Intrinsic::nearbyint:
1712       return TargetOpcode::G_FNEARBYINT;
1713     case Intrinsic::pow:
1714       return TargetOpcode::G_FPOW;
1715     case Intrinsic::powi:
1716       return TargetOpcode::G_FPOWI;
1717     case Intrinsic::rint:
1718       return TargetOpcode::G_FRINT;
1719     case Intrinsic::round:
1720       return TargetOpcode::G_INTRINSIC_ROUND;
1721     case Intrinsic::roundeven:
1722       return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
1723     case Intrinsic::sin:
1724       return TargetOpcode::G_FSIN;
1725     case Intrinsic::sqrt:
1726       return TargetOpcode::G_FSQRT;
1727     case Intrinsic::trunc:
1728       return TargetOpcode::G_INTRINSIC_TRUNC;
1729     case Intrinsic::readcyclecounter:
1730       return TargetOpcode::G_READCYCLECOUNTER;
1731     case Intrinsic::ptrmask:
1732       return TargetOpcode::G_PTRMASK;
1733     case Intrinsic::lrint:
1734       return TargetOpcode::G_INTRINSIC_LRINT;
1735     // FADD/FMUL require checking the FMF, so are handled elsewhere.
1736     case Intrinsic::vector_reduce_fmin:
1737       return TargetOpcode::G_VECREDUCE_FMIN;
1738     case Intrinsic::vector_reduce_fmax:
1739       return TargetOpcode::G_VECREDUCE_FMAX;
1740     case Intrinsic::vector_reduce_add:
1741       return TargetOpcode::G_VECREDUCE_ADD;
1742     case Intrinsic::vector_reduce_mul:
1743       return TargetOpcode::G_VECREDUCE_MUL;
1744     case Intrinsic::vector_reduce_and:
1745       return TargetOpcode::G_VECREDUCE_AND;
1746     case Intrinsic::vector_reduce_or:
1747       return TargetOpcode::G_VECREDUCE_OR;
1748     case Intrinsic::vector_reduce_xor:
1749       return TargetOpcode::G_VECREDUCE_XOR;
1750     case Intrinsic::vector_reduce_smax:
1751       return TargetOpcode::G_VECREDUCE_SMAX;
1752     case Intrinsic::vector_reduce_smin:
1753       return TargetOpcode::G_VECREDUCE_SMIN;
1754     case Intrinsic::vector_reduce_umax:
1755       return TargetOpcode::G_VECREDUCE_UMAX;
1756     case Intrinsic::vector_reduce_umin:
1757       return TargetOpcode::G_VECREDUCE_UMIN;
1758   }
1759   return Intrinsic::not_intrinsic;
1760 }
1761 
1762 bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
1763                                             Intrinsic::ID ID,
1764                                             MachineIRBuilder &MIRBuilder) {
1765 
1766   unsigned Op = getSimpleIntrinsicOpcode(ID);
1767 
1768   // Is this a simple intrinsic?
1769   if (Op == Intrinsic::not_intrinsic)
1770     return false;
1771 
1772   // Yes. Let's translate it.
1773   SmallVector<llvm::SrcOp, 4> VRegs;
1774   for (auto &Arg : CI.arg_operands())
1775     VRegs.push_back(getOrCreateVReg(*Arg));
1776 
1777   MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs,
1778                         MachineInstr::copyFlagsFromInstruction(CI));
1779   return true;
1780 }
1781 
1782 // TODO: Include ConstainedOps.def when all strict instructions are defined.
1783 static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
1784   switch (ID) {
1785   case Intrinsic::experimental_constrained_fadd:
1786     return TargetOpcode::G_STRICT_FADD;
1787   case Intrinsic::experimental_constrained_fsub:
1788     return TargetOpcode::G_STRICT_FSUB;
1789   case Intrinsic::experimental_constrained_fmul:
1790     return TargetOpcode::G_STRICT_FMUL;
1791   case Intrinsic::experimental_constrained_fdiv:
1792     return TargetOpcode::G_STRICT_FDIV;
1793   case Intrinsic::experimental_constrained_frem:
1794     return TargetOpcode::G_STRICT_FREM;
1795   case Intrinsic::experimental_constrained_fma:
1796     return TargetOpcode::G_STRICT_FMA;
1797   case Intrinsic::experimental_constrained_sqrt:
1798     return TargetOpcode::G_STRICT_FSQRT;
1799   default:
1800     return 0;
1801   }
1802 }
1803 
1804 bool IRTranslator::translateConstrainedFPIntrinsic(
1805   const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
1806   fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();
1807 
1808   unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID());
1809   if (!Opcode)
1810     return false;
1811 
1812   unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI);
1813   if (EB == fp::ExceptionBehavior::ebIgnore)
1814     Flags |= MachineInstr::NoFPExcept;
1815 
1816   SmallVector<llvm::SrcOp, 4> VRegs;
1817   VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(0)));
1818   if (!FPI.isUnaryOp())
1819     VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(1)));
1820   if (FPI.isTernaryOp())
1821     VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(2)));
1822 
1823   MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(FPI)}, VRegs, Flags);
1824   return true;
1825 }
1826 
1827 bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
1828                                            MachineIRBuilder &MIRBuilder) {
1829 
1830   // If this is a simple intrinsic (that is, we just need to add a def of
1831   // a vreg, and uses for each arg operand, then translate it.
1832   if (translateSimpleIntrinsic(CI, ID, MIRBuilder))
1833     return true;
1834 
1835   switch (ID) {
1836   default:
1837     break;
1838   case Intrinsic::lifetime_start:
1839   case Intrinsic::lifetime_end: {
1840     // No stack colouring in O0, discard region information.
1841     if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
1842       return true;
1843 
1844     unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
1845                                                   : TargetOpcode::LIFETIME_END;
1846 
1847     // Get the underlying objects for the location passed on the lifetime
1848     // marker.
1849     SmallVector<const Value *, 4> Allocas;
1850     getUnderlyingObjects(CI.getArgOperand(1), Allocas);
1851 
1852     // Iterate over each underlying object, creating lifetime markers for each
1853     // static alloca. Quit if we find a non-static alloca.
1854     for (const Value *V : Allocas) {
1855       const AllocaInst *AI = dyn_cast<AllocaInst>(V);
1856       if (!AI)
1857         continue;
1858 
1859       if (!AI->isStaticAlloca())
1860         return true;
1861 
1862       MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI));
1863     }
1864     return true;
1865   }
1866   case Intrinsic::dbg_declare: {
1867     const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
1868     assert(DI.getVariable() && "Missing variable");
1869 
1870     const Value *Address = DI.getAddress();
1871     if (!Address || isa<UndefValue>(Address)) {
1872       LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
1873       return true;
1874     }
1875 
1876     assert(DI.getVariable()->isValidLocationForIntrinsic(
1877                MIRBuilder.getDebugLoc()) &&
1878            "Expected inlined-at fields to agree");
1879     auto AI = dyn_cast<AllocaInst>(Address);
1880     if (AI && AI->isStaticAlloca()) {
1881       // Static allocas are tracked at the MF level, no need for DBG_VALUE
1882       // instructions (in fact, they get ignored if they *do* exist).
1883       MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
1884                              getOrCreateFrameIndex(*AI), DI.getDebugLoc());
1885     } else {
1886       // A dbg.declare describes the address of a source variable, so lower it
1887       // into an indirect DBG_VALUE.
1888       MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
1889                                        DI.getVariable(), DI.getExpression());
1890     }
1891     return true;
1892   }
1893   case Intrinsic::dbg_label: {
1894     const DbgLabelInst &DI = cast<DbgLabelInst>(CI);
1895     assert(DI.getLabel() && "Missing label");
1896 
1897     assert(DI.getLabel()->isValidLocationForIntrinsic(
1898                MIRBuilder.getDebugLoc()) &&
1899            "Expected inlined-at fields to agree");
1900 
1901     MIRBuilder.buildDbgLabel(DI.getLabel());
1902     return true;
1903   }
1904   case Intrinsic::vaend:
1905     // No target I know of cares about va_end. Certainly no in-tree target
1906     // does. Simplest intrinsic ever!
1907     return true;
1908   case Intrinsic::vastart: {
1909     auto &TLI = *MF->getSubtarget().getTargetLowering();
1910     Value *Ptr = CI.getArgOperand(0);
1911     unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
1912 
1913     // FIXME: Get alignment
1914     MIRBuilder.buildInstr(TargetOpcode::G_VASTART, {}, {getOrCreateVReg(*Ptr)})
1915         .addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Ptr),
1916                                                 MachineMemOperand::MOStore,
1917                                                 ListSize, Align(1)));
1918     return true;
1919   }
1920   case Intrinsic::dbg_value: {
1921     // This form of DBG_VALUE is target-independent.
1922     const DbgValueInst &DI = cast<DbgValueInst>(CI);
1923     const Value *V = DI.getValue();
1924     assert(DI.getVariable()->isValidLocationForIntrinsic(
1925                MIRBuilder.getDebugLoc()) &&
1926            "Expected inlined-at fields to agree");
1927     if (!V) {
1928       // Currently the optimizer can produce this; insert an undef to
1929       // help debugging.  Probably the optimizer should not do this.
1930       MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
1931     } else if (const auto *CI = dyn_cast<Constant>(V)) {
1932       MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
1933     } else {
1934       for (Register Reg : getOrCreateVRegs(*V)) {
1935         // FIXME: This does not handle register-indirect values at offset 0. The
1936         // direct/indirect thing shouldn't really be handled by something as
1937         // implicit as reg+noreg vs reg+imm in the first place, but it seems
1938         // pretty baked in right now.
1939         MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
1940       }
1941     }
1942     return true;
1943   }
1944   case Intrinsic::uadd_with_overflow:
1945     return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDO, MIRBuilder);
1946   case Intrinsic::sadd_with_overflow:
1947     return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
1948   case Intrinsic::usub_with_overflow:
1949     return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBO, MIRBuilder);
1950   case Intrinsic::ssub_with_overflow:
1951     return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
1952   case Intrinsic::umul_with_overflow:
1953     return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
1954   case Intrinsic::smul_with_overflow:
1955     return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
1956   case Intrinsic::uadd_sat:
1957     return translateBinaryOp(TargetOpcode::G_UADDSAT, CI, MIRBuilder);
1958   case Intrinsic::sadd_sat:
1959     return translateBinaryOp(TargetOpcode::G_SADDSAT, CI, MIRBuilder);
1960   case Intrinsic::usub_sat:
1961     return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder);
1962   case Intrinsic::ssub_sat:
1963     return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder);
1964   case Intrinsic::ushl_sat:
1965     return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder);
1966   case Intrinsic::sshl_sat:
1967     return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder);
1968   case Intrinsic::umin:
1969     return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder);
1970   case Intrinsic::umax:
1971     return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder);
1972   case Intrinsic::smin:
1973     return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder);
1974   case Intrinsic::smax:
1975     return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder);
1976   case Intrinsic::abs:
1977     // TODO: Preserve "int min is poison" arg in GMIR?
1978     return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
1979   case Intrinsic::smul_fix:
1980     return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder);
1981   case Intrinsic::umul_fix:
1982     return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
1983   case Intrinsic::smul_fix_sat:
1984     return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
1985   case Intrinsic::umul_fix_sat:
1986     return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
1987   case Intrinsic::sdiv_fix:
1988     return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
1989   case Intrinsic::udiv_fix:
1990     return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
1991   case Intrinsic::sdiv_fix_sat:
1992     return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
1993   case Intrinsic::udiv_fix_sat:
1994     return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
1995   case Intrinsic::fmuladd: {
1996     const TargetMachine &TM = MF->getTarget();
1997     const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
1998     Register Dst = getOrCreateVReg(CI);
1999     Register Op0 = getOrCreateVReg(*CI.getArgOperand(0));
2000     Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
2001     Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
2002     if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
2003         TLI.isFMAFasterThanFMulAndFAdd(*MF,
2004                                        TLI.getValueType(*DL, CI.getType()))) {
2005       // TODO: Revisit this to see if we should move this part of the
2006       // lowering to the combiner.
2007       MIRBuilder.buildFMA(Dst, Op0, Op1, Op2,
2008                           MachineInstr::copyFlagsFromInstruction(CI));
2009     } else {
2010       LLT Ty = getLLTForType(*CI.getType(), *DL);
2011       auto FMul = MIRBuilder.buildFMul(
2012           Ty, Op0, Op1, MachineInstr::copyFlagsFromInstruction(CI));
2013       MIRBuilder.buildFAdd(Dst, FMul, Op2,
2014                            MachineInstr::copyFlagsFromInstruction(CI));
2015     }
2016     return true;
2017   }
2018   case Intrinsic::convert_from_fp16:
2019     // FIXME: This intrinsic should probably be removed from the IR.
2020     MIRBuilder.buildFPExt(getOrCreateVReg(CI),
2021                           getOrCreateVReg(*CI.getArgOperand(0)),
2022                           MachineInstr::copyFlagsFromInstruction(CI));
2023     return true;
2024   case Intrinsic::convert_to_fp16:
2025     // FIXME: This intrinsic should probably be removed from the IR.
2026     MIRBuilder.buildFPTrunc(getOrCreateVReg(CI),
2027                             getOrCreateVReg(*CI.getArgOperand(0)),
2028                             MachineInstr::copyFlagsFromInstruction(CI));
2029     return true;
2030   case Intrinsic::memcpy:
2031     return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
2032   case Intrinsic::memmove:
2033     return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE);
2034   case Intrinsic::memset:
2035     return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET);
2036   case Intrinsic::eh_typeid_for: {
2037     GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
2038     Register Reg = getOrCreateVReg(CI);
2039     unsigned TypeID = MF->getTypeIDFor(GV);
2040     MIRBuilder.buildConstant(Reg, TypeID);
2041     return true;
2042   }
2043   case Intrinsic::objectsize:
2044     llvm_unreachable("llvm.objectsize.* should have been lowered already");
2045 
2046   case Intrinsic::is_constant:
2047     llvm_unreachable("llvm.is.constant.* should have been lowered already");
2048 
2049   case Intrinsic::stackguard:
2050     getStackGuard(getOrCreateVReg(CI), MIRBuilder);
2051     return true;
2052   case Intrinsic::stackprotector: {
2053     LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
2054     Register GuardVal = MRI->createGenericVirtualRegister(PtrTy);
2055     getStackGuard(GuardVal, MIRBuilder);
2056 
2057     AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
2058     int FI = getOrCreateFrameIndex(*Slot);
2059     MF->getFrameInfo().setStackProtectorIndex(FI);
2060 
2061     MIRBuilder.buildStore(
2062         GuardVal, getOrCreateVReg(*Slot),
2063         *MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
2064                                   MachineMemOperand::MOStore |
2065                                       MachineMemOperand::MOVolatile,
2066                                   PtrTy.getSizeInBits() / 8, Align(8)));
2067     return true;
2068   }
2069   case Intrinsic::stacksave: {
2070     // Save the stack pointer to the location provided by the intrinsic.
2071     Register Reg = getOrCreateVReg(CI);
2072     Register StackPtr = MF->getSubtarget()
2073                             .getTargetLowering()
2074                             ->getStackPointerRegisterToSaveRestore();
2075 
2076     // If the target doesn't specify a stack pointer, then fall back.
2077     if (!StackPtr)
2078       return false;
2079 
2080     MIRBuilder.buildCopy(Reg, StackPtr);
2081     return true;
2082   }
2083   case Intrinsic::stackrestore: {
2084     // Restore the stack pointer from the location provided by the intrinsic.
2085     Register Reg = getOrCreateVReg(*CI.getArgOperand(0));
2086     Register StackPtr = MF->getSubtarget()
2087                             .getTargetLowering()
2088                             ->getStackPointerRegisterToSaveRestore();
2089 
2090     // If the target doesn't specify a stack pointer, then fall back.
2091     if (!StackPtr)
2092       return false;
2093 
2094     MIRBuilder.buildCopy(StackPtr, Reg);
2095     return true;
2096   }
2097   case Intrinsic::cttz:
2098   case Intrinsic::ctlz: {
2099     ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
2100     bool isTrailing = ID == Intrinsic::cttz;
2101     unsigned Opcode = isTrailing
2102                           ? Cst->isZero() ? TargetOpcode::G_CTTZ
2103                                           : TargetOpcode::G_CTTZ_ZERO_UNDEF
2104                           : Cst->isZero() ? TargetOpcode::G_CTLZ
2105                                           : TargetOpcode::G_CTLZ_ZERO_UNDEF;
2106     MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(CI)},
2107                           {getOrCreateVReg(*CI.getArgOperand(0))});
2108     return true;
2109   }
2110   case Intrinsic::invariant_start: {
2111     LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
2112     Register Undef = MRI->createGenericVirtualRegister(PtrTy);
2113     MIRBuilder.buildUndef(Undef);
2114     return true;
2115   }
2116   case Intrinsic::invariant_end:
2117     return true;
2118   case Intrinsic::expect:
2119   case Intrinsic::annotation:
2120   case Intrinsic::ptr_annotation:
2121   case Intrinsic::launder_invariant_group:
2122   case Intrinsic::strip_invariant_group: {
2123     // Drop the intrinsic, but forward the value.
2124     MIRBuilder.buildCopy(getOrCreateVReg(CI),
2125                          getOrCreateVReg(*CI.getArgOperand(0)));
2126     return true;
2127   }
2128   case Intrinsic::assume:
2129   case Intrinsic::experimental_noalias_scope_decl:
2130   case Intrinsic::var_annotation:
2131   case Intrinsic::sideeffect:
2132     // Discard annotate attributes, assumptions, and artificial side-effects.
2133     return true;
2134   case Intrinsic::read_volatile_register:
2135   case Intrinsic::read_register: {
2136     Value *Arg = CI.getArgOperand(0);
2137     MIRBuilder
2138         .buildInstr(TargetOpcode::G_READ_REGISTER, {getOrCreateVReg(CI)}, {})
2139         .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()));
2140     return true;
2141   }
2142   case Intrinsic::write_register: {
2143     Value *Arg = CI.getArgOperand(0);
2144     MIRBuilder.buildInstr(TargetOpcode::G_WRITE_REGISTER)
2145       .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()))
2146       .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
2147     return true;
2148   }
2149   case Intrinsic::localescape: {
2150     MachineBasicBlock &EntryMBB = MF->front();
2151     StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName());
2152 
2153     // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
2154     // is the same on all targets.
2155     for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) {
2156       Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
2157       if (isa<ConstantPointerNull>(Arg))
2158         continue; // Skip null pointers. They represent a hole in index space.
2159 
2160       int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg));
2161       MCSymbol *FrameAllocSym =
2162           MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName,
2163                                                                 Idx);
2164 
2165       // This should be inserted at the start of the entry block.
2166       auto LocalEscape =
2167           MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE)
2168               .addSym(FrameAllocSym)
2169               .addFrameIndex(FI);
2170 
2171       EntryMBB.insert(EntryMBB.begin(), LocalEscape);
2172     }
2173 
2174     return true;
2175   }
2176   case Intrinsic::vector_reduce_fadd:
2177   case Intrinsic::vector_reduce_fmul: {
2178     // Need to check for the reassoc flag to decide whether we want a
2179     // sequential reduction opcode or not.
2180     Register Dst = getOrCreateVReg(CI);
2181     Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0));
2182     Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1));
2183     unsigned Opc = 0;
2184     if (!CI.hasAllowReassoc()) {
2185       // The sequential ordering case.
2186       Opc = ID == Intrinsic::vector_reduce_fadd
2187                 ? TargetOpcode::G_VECREDUCE_SEQ_FADD
2188                 : TargetOpcode::G_VECREDUCE_SEQ_FMUL;
2189       MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
2190                             MachineInstr::copyFlagsFromInstruction(CI));
2191       return true;
2192     }
2193     // We split the operation into a separate G_FADD/G_FMUL + the reduce,
2194     // since the associativity doesn't matter.
2195     unsigned ScalarOpc;
2196     if (ID == Intrinsic::vector_reduce_fadd) {
2197       Opc = TargetOpcode::G_VECREDUCE_FADD;
2198       ScalarOpc = TargetOpcode::G_FADD;
2199     } else {
2200       Opc = TargetOpcode::G_VECREDUCE_FMUL;
2201       ScalarOpc = TargetOpcode::G_FMUL;
2202     }
2203     LLT DstTy = MRI->getType(Dst);
2204     auto Rdx = MIRBuilder.buildInstr(
2205         Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI));
2206     MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx},
2207                           MachineInstr::copyFlagsFromInstruction(CI));
2208 
2209     return true;
2210   }
2211 #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC)  \
2212   case Intrinsic::INTRINSIC:
2213 #include "llvm/IR/ConstrainedOps.def"
2214     return translateConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(CI),
2215                                            MIRBuilder);
2216 
2217   }
2218   return false;
2219 }
2220 
2221 bool IRTranslator::translateInlineAsm(const CallBase &CB,
2222                                       MachineIRBuilder &MIRBuilder) {
2223 
2224   const InlineAsmLowering *ALI = MF->getSubtarget().getInlineAsmLowering();
2225 
2226   if (!ALI) {
2227     LLVM_DEBUG(
2228         dbgs() << "Inline asm lowering is not supported for this target yet\n");
2229     return false;
2230   }
2231 
2232   return ALI->lowerInlineAsm(
2233       MIRBuilder, CB, [&](const Value &Val) { return getOrCreateVRegs(Val); });
2234 }
2235 
2236 bool IRTranslator::translateCallBase(const CallBase &CB,
2237                                      MachineIRBuilder &MIRBuilder) {
2238   ArrayRef<Register> Res = getOrCreateVRegs(CB);
2239 
2240   SmallVector<ArrayRef<Register>, 8> Args;
2241   Register SwiftInVReg = 0;
2242   Register SwiftErrorVReg = 0;
2243   for (auto &Arg : CB.args()) {
2244     if (CLI->supportSwiftError() && isSwiftError(Arg)) {
2245       assert(SwiftInVReg == 0 && "Expected only one swift error argument");
2246       LLT Ty = getLLTForType(*Arg->getType(), *DL);
2247       SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
2248       MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
2249                                             &CB, &MIRBuilder.getMBB(), Arg));
2250       Args.emplace_back(makeArrayRef(SwiftInVReg));
2251       SwiftErrorVReg =
2252           SwiftError.getOrCreateVRegDefAt(&CB, &MIRBuilder.getMBB(), Arg);
2253       continue;
2254     }
2255     Args.push_back(getOrCreateVRegs(*Arg));
2256   }
2257 
2258   // We don't set HasCalls on MFI here yet because call lowering may decide to
2259   // optimize into tail calls. Instead, we defer that to selection where a final
2260   // scan is done to check if any instructions are calls.
2261   bool Success =
2262       CLI->lowerCall(MIRBuilder, CB, Res, Args, SwiftErrorVReg,
2263                      [&]() { return getOrCreateVReg(*CB.getCalledOperand()); });
2264 
2265   // Check if we just inserted a tail call.
2266   if (Success) {
2267     assert(!HasTailCall && "Can't tail call return twice from block?");
2268     const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
2269     HasTailCall = TII->isTailCall(*std::prev(MIRBuilder.getInsertPt()));
2270   }
2271 
2272   return Success;
2273 }
2274 
2275 bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
2276   const CallInst &CI = cast<CallInst>(U);
2277   auto TII = MF->getTarget().getIntrinsicInfo();
2278   const Function *F = CI.getCalledFunction();
2279 
2280   // FIXME: support Windows dllimport function calls.
2281   if (F && (F->hasDLLImportStorageClass() ||
2282             (MF->getTarget().getTargetTriple().isOSWindows() &&
2283              F->hasExternalWeakLinkage())))
2284     return false;
2285 
2286   // FIXME: support control flow guard targets.
2287   if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
2288     return false;
2289 
2290   if (CI.isInlineAsm())
2291     return translateInlineAsm(CI, MIRBuilder);
2292 
2293   Intrinsic::ID ID = Intrinsic::not_intrinsic;
2294   if (F && F->isIntrinsic()) {
2295     ID = F->getIntrinsicID();
2296     if (TII && ID == Intrinsic::not_intrinsic)
2297       ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
2298   }
2299 
2300   if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic)
2301     return translateCallBase(CI, MIRBuilder);
2302 
2303   assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
2304 
2305   if (translateKnownIntrinsic(CI, ID, MIRBuilder))
2306     return true;
2307 
2308   ArrayRef<Register> ResultRegs;
2309   if (!CI.getType()->isVoidTy())
2310     ResultRegs = getOrCreateVRegs(CI);
2311 
2312   // Ignore the callsite attributes. Backend code is most likely not expecting
2313   // an intrinsic to sometimes have side effects and sometimes not.
2314   MachineInstrBuilder MIB =
2315       MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
2316   if (isa<FPMathOperator>(CI))
2317     MIB->copyIRFlags(CI);
2318 
2319   for (auto &Arg : enumerate(CI.arg_operands())) {
2320     // If this is required to be an immediate, don't materialize it in a
2321     // register.
2322     if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
2323       if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
2324         // imm arguments are more convenient than cimm (and realistically
2325         // probably sufficient), so use them.
2326         assert(CI->getBitWidth() <= 64 &&
2327                "large intrinsic immediates not handled");
2328         MIB.addImm(CI->getSExtValue());
2329       } else {
2330         MIB.addFPImm(cast<ConstantFP>(Arg.value()));
2331       }
2332     } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) {
2333       auto *MDN = dyn_cast<MDNode>(MD->getMetadata());
2334       if (!MDN) // This was probably an MDString.
2335         return false;
2336       MIB.addMetadata(MDN);
2337     } else {
2338       ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
2339       if (VRegs.size() > 1)
2340         return false;
2341       MIB.addUse(VRegs[0]);
2342     }
2343   }
2344 
2345   // Add a MachineMemOperand if it is a target mem intrinsic.
2346   const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
2347   TargetLowering::IntrinsicInfo Info;
2348   // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
2349   if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
2350     Align Alignment = Info.align.getValueOr(
2351         DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
2352 
2353     uint64_t Size = Info.memVT.getStoreSize();
2354     MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
2355                                                Info.flags, Size, Alignment));
2356   }
2357 
2358   return true;
2359 }
2360 
2361 bool IRTranslator::findUnwindDestinations(
2362     const BasicBlock *EHPadBB,
2363     BranchProbability Prob,
2364     SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
2365         &UnwindDests) {
2366   EHPersonality Personality = classifyEHPersonality(
2367       EHPadBB->getParent()->getFunction().getPersonalityFn());
2368   bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
2369   bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
2370   bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
2371   bool IsSEH = isAsynchronousEHPersonality(Personality);
2372 
2373   if (IsWasmCXX) {
2374     // Ignore this for now.
2375     return false;
2376   }
2377 
2378   while (EHPadBB) {
2379     const Instruction *Pad = EHPadBB->getFirstNonPHI();
2380     BasicBlock *NewEHPadBB = nullptr;
2381     if (isa<LandingPadInst>(Pad)) {
2382       // Stop on landingpads. They are not funclets.
2383       UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
2384       break;
2385     }
2386     if (isa<CleanupPadInst>(Pad)) {
2387       // Stop on cleanup pads. Cleanups are always funclet entries for all known
2388       // personalities.
2389       UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
2390       UnwindDests.back().first->setIsEHScopeEntry();
2391       UnwindDests.back().first->setIsEHFuncletEntry();
2392       break;
2393     }
2394     if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
2395       // Add the catchpad handlers to the possible destinations.
2396       for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
2397         UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob);
2398         // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
2399         if (IsMSVCCXX || IsCoreCLR)
2400           UnwindDests.back().first->setIsEHFuncletEntry();
2401         if (!IsSEH)
2402           UnwindDests.back().first->setIsEHScopeEntry();
2403       }
2404       NewEHPadBB = CatchSwitch->getUnwindDest();
2405     } else {
2406       continue;
2407     }
2408 
2409     BranchProbabilityInfo *BPI = FuncInfo.BPI;
2410     if (BPI && NewEHPadBB)
2411       Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
2412     EHPadBB = NewEHPadBB;
2413   }
2414   return true;
2415 }
2416 
2417 bool IRTranslator::translateInvoke(const User &U,
2418                                    MachineIRBuilder &MIRBuilder) {
2419   const InvokeInst &I = cast<InvokeInst>(U);
2420   MCContext &Context = MF->getContext();
2421 
2422   const BasicBlock *ReturnBB = I.getSuccessor(0);
2423   const BasicBlock *EHPadBB = I.getSuccessor(1);
2424 
2425   const Function *Fn = I.getCalledFunction();
2426   if (I.isInlineAsm())
2427     return false;
2428 
2429   // FIXME: support invoking patchpoint and statepoint intrinsics.
2430   if (Fn && Fn->isIntrinsic())
2431     return false;
2432 
2433   // FIXME: support whatever these are.
2434   if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
2435     return false;
2436 
2437   // FIXME: support control flow guard targets.
2438   if (I.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
2439     return false;
2440 
2441   // FIXME: support Windows exception handling.
2442   if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
2443     return false;
2444 
2445   // Emit the actual call, bracketed by EH_LABELs so that the MF knows about
2446   // the region covered by the try.
2447   MCSymbol *BeginSymbol = Context.createTempSymbol();
2448   MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
2449 
2450   if (!translateCallBase(I, MIRBuilder))
2451     return false;
2452 
2453   MCSymbol *EndSymbol = Context.createTempSymbol();
2454   MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
2455 
2456   SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
2457   BranchProbabilityInfo *BPI = FuncInfo.BPI;
2458   MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB();
2459   BranchProbability EHPadBBProb =
2460       BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
2461           : BranchProbability::getZero();
2462 
2463   if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests))
2464     return false;
2465 
2466   MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
2467                     &ReturnMBB = getMBB(*ReturnBB);
2468   // Update successor info.
2469   addSuccessorWithProb(InvokeMBB, &ReturnMBB);
2470   for (auto &UnwindDest : UnwindDests) {
2471     UnwindDest.first->setIsEHPad();
2472     addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
2473   }
2474   InvokeMBB->normalizeSuccProbs();
2475 
2476   MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
2477   MIRBuilder.buildBr(ReturnMBB);
2478   return true;
2479 }
2480 
2481 bool IRTranslator::translateCallBr(const User &U,
2482                                    MachineIRBuilder &MIRBuilder) {
2483   // FIXME: Implement this.
2484   return false;
2485 }
2486 
2487 bool IRTranslator::translateLandingPad(const User &U,
2488                                        MachineIRBuilder &MIRBuilder) {
2489   const LandingPadInst &LP = cast<LandingPadInst>(U);
2490 
2491   MachineBasicBlock &MBB = MIRBuilder.getMBB();
2492 
2493   MBB.setIsEHPad();
2494 
2495   // If there aren't registers to copy the values into (e.g., during SjLj
2496   // exceptions), then don't bother.
2497   auto &TLI = *MF->getSubtarget().getTargetLowering();
2498   const Constant *PersonalityFn = MF->getFunction().getPersonalityFn();
2499   if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
2500       TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
2501     return true;
2502 
2503   // If landingpad's return type is token type, we don't create DAG nodes
2504   // for its exception pointer and selector value. The extraction of exception
2505   // pointer or selector value from token type landingpads is not currently
2506   // supported.
2507   if (LP.getType()->isTokenTy())
2508     return true;
2509 
2510   // Add a label to mark the beginning of the landing pad.  Deletion of the
2511   // landing pad can thus be detected via the MachineModuleInfo.
2512   MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
2513     .addSym(MF->addLandingPad(&MBB));
2514 
2515   // If the unwinder does not preserve all registers, ensure that the
2516   // function marks the clobbered registers as used.
2517   const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
2518   if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
2519     MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
2520 
2521   LLT Ty = getLLTForType(*LP.getType(), *DL);
2522   Register Undef = MRI->createGenericVirtualRegister(Ty);
2523   MIRBuilder.buildUndef(Undef);
2524 
2525   SmallVector<LLT, 2> Tys;
2526   for (Type *Ty : cast<StructType>(LP.getType())->elements())
2527     Tys.push_back(getLLTForType(*Ty, *DL));
2528   assert(Tys.size() == 2 && "Only two-valued landingpads are supported");
2529 
2530   // Mark exception register as live in.
2531   Register ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
2532   if (!ExceptionReg)
2533     return false;
2534 
2535   MBB.addLiveIn(ExceptionReg);
2536   ArrayRef<Register> ResRegs = getOrCreateVRegs(LP);
2537   MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);
2538 
2539   Register SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
2540   if (!SelectorReg)
2541     return false;
2542 
2543   MBB.addLiveIn(SelectorReg);
2544   Register PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
2545   MIRBuilder.buildCopy(PtrVReg, SelectorReg);
2546   MIRBuilder.buildCast(ResRegs[1], PtrVReg);
2547 
2548   return true;
2549 }
2550 
2551 bool IRTranslator::translateAlloca(const User &U,
2552                                    MachineIRBuilder &MIRBuilder) {
2553   auto &AI = cast<AllocaInst>(U);
2554 
2555   if (AI.isSwiftError())
2556     return true;
2557 
2558   if (AI.isStaticAlloca()) {
2559     Register Res = getOrCreateVReg(AI);
2560     int FI = getOrCreateFrameIndex(AI);
2561     MIRBuilder.buildFrameIndex(Res, FI);
2562     return true;
2563   }
2564 
2565   // FIXME: support stack probing for Windows.
2566   if (MF->getTarget().getTargetTriple().isOSWindows())
2567     return false;
2568 
2569   // Now we're in the harder dynamic case.
2570   Register NumElts = getOrCreateVReg(*AI.getArraySize());
2571   Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
2572   LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
2573   if (MRI->getType(NumElts) != IntPtrTy) {
2574     Register ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
2575     MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
2576     NumElts = ExtElts;
2577   }
2578 
2579   Type *Ty = AI.getAllocatedType();
2580 
2581   Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
2582   Register TySize =
2583       getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
2584   MIRBuilder.buildMul(AllocSize, NumElts, TySize);
2585 
2586   // Round the size of the allocation up to the stack alignment size
2587   // by add SA-1 to the size. This doesn't overflow because we're computing
2588   // an address inside an alloca.
2589   Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
2590   auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
2591   auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
2592                                       MachineInstr::NoUWrap);
2593   auto AlignCst =
2594       MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
2595   auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
2596 
2597   Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
2598   if (Alignment <= StackAlign)
2599     Alignment = Align(1);
2600   MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
2601 
2602   MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI);
2603   assert(MF->getFrameInfo().hasVarSizedObjects());
2604   return true;
2605 }
2606 
2607 bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
2608   // FIXME: We may need more info about the type. Because of how LLT works,
2609   // we're completely discarding the i64/double distinction here (amongst
2610   // others). Fortunately the ABIs I know of where that matters don't use va_arg
2611   // anyway but that's not guaranteed.
2612   MIRBuilder.buildInstr(TargetOpcode::G_VAARG, {getOrCreateVReg(U)},
2613                         {getOrCreateVReg(*U.getOperand(0)),
2614                          DL->getABITypeAlign(U.getType()).value()});
2615   return true;
2616 }
2617 
2618 bool IRTranslator::translateInsertElement(const User &U,
2619                                           MachineIRBuilder &MIRBuilder) {
2620   // If it is a <1 x Ty> vector, use the scalar as it is
2621   // not a legal vector type in LLT.
2622   if (cast<FixedVectorType>(U.getType())->getNumElements() == 1)
2623     return translateCopy(U, *U.getOperand(1), MIRBuilder);
2624 
2625   Register Res = getOrCreateVReg(U);
2626   Register Val = getOrCreateVReg(*U.getOperand(0));
2627   Register Elt = getOrCreateVReg(*U.getOperand(1));
2628   Register Idx = getOrCreateVReg(*U.getOperand(2));
2629   MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
2630   return true;
2631 }
2632 
2633 bool IRTranslator::translateExtractElement(const User &U,
2634                                            MachineIRBuilder &MIRBuilder) {
2635   // If it is a <1 x Ty> vector, use the scalar as it is
2636   // not a legal vector type in LLT.
2637   if (cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements() == 1)
2638     return translateCopy(U, *U.getOperand(0), MIRBuilder);
2639 
2640   Register Res = getOrCreateVReg(U);
2641   Register Val = getOrCreateVReg(*U.getOperand(0));
2642   const auto &TLI = *MF->getSubtarget().getTargetLowering();
2643   unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
2644   Register Idx;
2645   if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
2646     if (CI->getBitWidth() != PreferredVecIdxWidth) {
2647       APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
2648       auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
2649       Idx = getOrCreateVReg(*NewIdxCI);
2650     }
2651   }
2652   if (!Idx)
2653     Idx = getOrCreateVReg(*U.getOperand(1));
2654   if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
2655     const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
2656     Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0);
2657   }
2658   MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
2659   return true;
2660 }
2661 
2662 bool IRTranslator::translateShuffleVector(const User &U,
2663                                           MachineIRBuilder &MIRBuilder) {
2664   ArrayRef<int> Mask;
2665   if (auto *SVI = dyn_cast<ShuffleVectorInst>(&U))
2666     Mask = SVI->getShuffleMask();
2667   else
2668     Mask = cast<ConstantExpr>(U).getShuffleMask();
2669   ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);
2670   MIRBuilder
2671       .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)},
2672                   {getOrCreateVReg(*U.getOperand(0)),
2673                    getOrCreateVReg(*U.getOperand(1))})
2674       .addShuffleMask(MaskAlloc);
2675   return true;
2676 }
2677 
2678 bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
2679   const PHINode &PI = cast<PHINode>(U);
2680 
2681   SmallVector<MachineInstr *, 4> Insts;
2682   for (auto Reg : getOrCreateVRegs(PI)) {
2683     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, {Reg}, {});
2684     Insts.push_back(MIB.getInstr());
2685   }
2686 
2687   PendingPHIs.emplace_back(&PI, std::move(Insts));
2688   return true;
2689 }
2690 
2691 bool IRTranslator::translateAtomicCmpXchg(const User &U,
2692                                           MachineIRBuilder &MIRBuilder) {
2693   const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U);
2694 
2695   auto &TLI = *MF->getSubtarget().getTargetLowering();
2696   auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
2697 
2698   Type *ResType = I.getType();
2699   Type *ValType = ResType->Type::getStructElementType(0);
2700 
2701   auto Res = getOrCreateVRegs(I);
2702   Register OldValRes = Res[0];
2703   Register SuccessRes = Res[1];
2704   Register Addr = getOrCreateVReg(*I.getPointerOperand());
2705   Register Cmp = getOrCreateVReg(*I.getCompareOperand());
2706   Register NewVal = getOrCreateVReg(*I.getNewValOperand());
2707 
2708   AAMDNodes AAMetadata;
2709   I.getAAMetadata(AAMetadata);
2710 
2711   MIRBuilder.buildAtomicCmpXchgWithSuccess(
2712       OldValRes, SuccessRes, Addr, Cmp, NewVal,
2713       *MF->getMachineMemOperand(
2714           MachinePointerInfo(I.getPointerOperand()), Flags,
2715           DL->getTypeStoreSize(ValType), getMemOpAlign(I), AAMetadata, nullptr,
2716           I.getSyncScopeID(), I.getSuccessOrdering(), I.getFailureOrdering()));
2717   return true;
2718 }
2719 
2720 bool IRTranslator::translateAtomicRMW(const User &U,
2721                                       MachineIRBuilder &MIRBuilder) {
2722   const AtomicRMWInst &I = cast<AtomicRMWInst>(U);
2723   auto &TLI = *MF->getSubtarget().getTargetLowering();
2724   auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
2725 
2726   Type *ResType = I.getType();
2727 
2728   Register Res = getOrCreateVReg(I);
2729   Register Addr = getOrCreateVReg(*I.getPointerOperand());
2730   Register Val = getOrCreateVReg(*I.getValOperand());
2731 
2732   unsigned Opcode = 0;
2733   switch (I.getOperation()) {
2734   default:
2735     return false;
2736   case AtomicRMWInst::Xchg:
2737     Opcode = TargetOpcode::G_ATOMICRMW_XCHG;
2738     break;
2739   case AtomicRMWInst::Add:
2740     Opcode = TargetOpcode::G_ATOMICRMW_ADD;
2741     break;
2742   case AtomicRMWInst::Sub:
2743     Opcode = TargetOpcode::G_ATOMICRMW_SUB;
2744     break;
2745   case AtomicRMWInst::And:
2746     Opcode = TargetOpcode::G_ATOMICRMW_AND;
2747     break;
2748   case AtomicRMWInst::Nand:
2749     Opcode = TargetOpcode::G_ATOMICRMW_NAND;
2750     break;
2751   case AtomicRMWInst::Or:
2752     Opcode = TargetOpcode::G_ATOMICRMW_OR;
2753     break;
2754   case AtomicRMWInst::Xor:
2755     Opcode = TargetOpcode::G_ATOMICRMW_XOR;
2756     break;
2757   case AtomicRMWInst::Max:
2758     Opcode = TargetOpcode::G_ATOMICRMW_MAX;
2759     break;
2760   case AtomicRMWInst::Min:
2761     Opcode = TargetOpcode::G_ATOMICRMW_MIN;
2762     break;
2763   case AtomicRMWInst::UMax:
2764     Opcode = TargetOpcode::G_ATOMICRMW_UMAX;
2765     break;
2766   case AtomicRMWInst::UMin:
2767     Opcode = TargetOpcode::G_ATOMICRMW_UMIN;
2768     break;
2769   case AtomicRMWInst::FAdd:
2770     Opcode = TargetOpcode::G_ATOMICRMW_FADD;
2771     break;
2772   case AtomicRMWInst::FSub:
2773     Opcode = TargetOpcode::G_ATOMICRMW_FSUB;
2774     break;
2775   }
2776 
2777   AAMDNodes AAMetadata;
2778   I.getAAMetadata(AAMetadata);
2779 
2780   MIRBuilder.buildAtomicRMW(
2781       Opcode, Res, Addr, Val,
2782       *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
2783                                 Flags, DL->getTypeStoreSize(ResType),
2784                                 getMemOpAlign(I), AAMetadata, nullptr,
2785                                 I.getSyncScopeID(), I.getOrdering()));
2786   return true;
2787 }
2788 
2789 bool IRTranslator::translateFence(const User &U,
2790                                   MachineIRBuilder &MIRBuilder) {
2791   const FenceInst &Fence = cast<FenceInst>(U);
2792   MIRBuilder.buildFence(static_cast<unsigned>(Fence.getOrdering()),
2793                         Fence.getSyncScopeID());
2794   return true;
2795 }
2796 
2797 bool IRTranslator::translateFreeze(const User &U,
2798                                    MachineIRBuilder &MIRBuilder) {
2799   const ArrayRef<Register> DstRegs = getOrCreateVRegs(U);
2800   const ArrayRef<Register> SrcRegs = getOrCreateVRegs(*U.getOperand(0));
2801 
2802   assert(DstRegs.size() == SrcRegs.size() &&
2803          "Freeze with different source and destination type?");
2804 
2805   for (unsigned I = 0; I < DstRegs.size(); ++I) {
2806     MIRBuilder.buildFreeze(DstRegs[I], SrcRegs[I]);
2807   }
2808 
2809   return true;
2810 }
2811 
2812 void IRTranslator::finishPendingPhis() {
2813 #ifndef NDEBUG
2814   DILocationVerifier Verifier;
2815   GISelObserverWrapper WrapperObserver(&Verifier);
2816   RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
2817 #endif // ifndef NDEBUG
2818   for (auto &Phi : PendingPHIs) {
2819     const PHINode *PI = Phi.first;
2820     ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
2821     MachineBasicBlock *PhiMBB = ComponentPHIs[0]->getParent();
2822     EntryBuilder->setDebugLoc(PI->getDebugLoc());
2823 #ifndef NDEBUG
2824     Verifier.setCurrentInst(PI);
2825 #endif // ifndef NDEBUG
2826 
2827     SmallSet<const MachineBasicBlock *, 16> SeenPreds;
2828     for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
2829       auto IRPred = PI->getIncomingBlock(i);
2830       ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
2831       for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
2832         if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred))
2833           continue;
2834         SeenPreds.insert(Pred);
2835         for (unsigned j = 0; j < ValRegs.size(); ++j) {
2836           MachineInstrBuilder MIB(*MF, ComponentPHIs[j]);
2837           MIB.addUse(ValRegs[j]);
2838           MIB.addMBB(Pred);
2839         }
2840       }
2841     }
2842   }
2843 }
2844 
2845 bool IRTranslator::valueIsSplit(const Value &V,
2846                                 SmallVectorImpl<uint64_t> *Offsets) {
2847   SmallVector<LLT, 4> SplitTys;
2848   if (Offsets && !Offsets->empty())
2849     Offsets->clear();
2850   computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets);
2851   return SplitTys.size() > 1;
2852 }
2853 
2854 bool IRTranslator::translate(const Instruction &Inst) {
2855   CurBuilder->setDebugLoc(Inst.getDebugLoc());
2856   // We only emit constants into the entry block from here. To prevent jumpy
2857   // debug behaviour set the line to 0.
2858   if (const DebugLoc &DL = Inst.getDebugLoc())
2859     EntryBuilder->setDebugLoc(DILocation::get(
2860         Inst.getContext(), 0, 0, DL.getScope(), DL.getInlinedAt()));
2861   else
2862     EntryBuilder->setDebugLoc(DebugLoc());
2863 
2864   auto &TLI = *MF->getSubtarget().getTargetLowering();
2865   if (TLI.fallBackToDAGISel(Inst))
2866     return false;
2867 
2868   switch (Inst.getOpcode()) {
2869 #define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
2870   case Instruction::OPCODE:                                                    \
2871     return translate##OPCODE(Inst, *CurBuilder.get());
2872 #include "llvm/IR/Instruction.def"
2873   default:
2874     return false;
2875   }
2876 }
2877 
2878 bool IRTranslator::translate(const Constant &C, Register Reg) {
2879   if (auto CI = dyn_cast<ConstantInt>(&C))
2880     EntryBuilder->buildConstant(Reg, *CI);
2881   else if (auto CF = dyn_cast<ConstantFP>(&C))
2882     EntryBuilder->buildFConstant(Reg, *CF);
2883   else if (isa<UndefValue>(C))
2884     EntryBuilder->buildUndef(Reg);
2885   else if (isa<ConstantPointerNull>(C))
2886     EntryBuilder->buildConstant(Reg, 0);
2887   else if (auto GV = dyn_cast<GlobalValue>(&C))
2888     EntryBuilder->buildGlobalValue(Reg, GV);
2889   else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
2890     if (!CAZ->getType()->isVectorTy())
2891       return false;
2892     // Return the scalar if it is a <1 x Ty> vector.
2893     if (CAZ->getNumElements() == 1)
2894       return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder.get());
2895     SmallVector<Register, 4> Ops;
2896     for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
2897       Constant &Elt = *CAZ->getElementValue(i);
2898       Ops.push_back(getOrCreateVReg(Elt));
2899     }
2900     EntryBuilder->buildBuildVector(Reg, Ops);
2901   } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
2902     // Return the scalar if it is a <1 x Ty> vector.
2903     if (CV->getNumElements() == 1)
2904       return translateCopy(C, *CV->getElementAsConstant(0),
2905                            *EntryBuilder.get());
2906     SmallVector<Register, 4> Ops;
2907     for (unsigned i = 0; i < CV->getNumElements(); ++i) {
2908       Constant &Elt = *CV->getElementAsConstant(i);
2909       Ops.push_back(getOrCreateVReg(Elt));
2910     }
2911     EntryBuilder->buildBuildVector(Reg, Ops);
2912   } else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
2913     switch(CE->getOpcode()) {
2914 #define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
2915   case Instruction::OPCODE:                                                    \
2916     return translate##OPCODE(*CE, *EntryBuilder.get());
2917 #include "llvm/IR/Instruction.def"
2918     default:
2919       return false;
2920     }
2921   } else if (auto CV = dyn_cast<ConstantVector>(&C)) {
2922     if (CV->getNumOperands() == 1)
2923       return translateCopy(C, *CV->getOperand(0), *EntryBuilder.get());
2924     SmallVector<Register, 4> Ops;
2925     for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
2926       Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
2927     }
2928     EntryBuilder->buildBuildVector(Reg, Ops);
2929   } else if (auto *BA = dyn_cast<BlockAddress>(&C)) {
2930     EntryBuilder->buildBlockAddress(Reg, BA);
2931   } else
2932     return false;
2933 
2934   return true;
2935 }
2936 
2937 void IRTranslator::finalizeBasicBlock() {
2938   for (auto &BTB : SL->BitTestCases) {
2939     // Emit header first, if it wasn't already emitted.
2940     if (!BTB.Emitted)
2941       emitBitTestHeader(BTB, BTB.Parent);
2942 
2943     BranchProbability UnhandledProb = BTB.Prob;
2944     for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
2945       UnhandledProb -= BTB.Cases[j].ExtraProb;
2946       // Set the current basic block to the mbb we wish to insert the code into
2947       MachineBasicBlock *MBB = BTB.Cases[j].ThisBB;
2948       // If all cases cover a contiguous range, it is not necessary to jump to
2949       // the default block after the last bit test fails. This is because the
2950       // range check during bit test header creation has guaranteed that every
2951       // case here doesn't go outside the range. In this case, there is no need
2952       // to perform the last bit test, as it will always be true. Instead, make
2953       // the second-to-last bit-test fall through to the target of the last bit
2954       // test, and delete the last bit test.
2955 
2956       MachineBasicBlock *NextMBB;
2957       if (BTB.ContiguousRange && j + 2 == ej) {
2958         // Second-to-last bit-test with contiguous range: fall through to the
2959         // target of the final bit test.
2960         NextMBB = BTB.Cases[j + 1].TargetBB;
2961       } else if (j + 1 == ej) {
2962         // For the last bit test, fall through to Default.
2963         NextMBB = BTB.Default;
2964       } else {
2965         // Otherwise, fall through to the next bit test.
2966         NextMBB = BTB.Cases[j + 1].ThisBB;
2967       }
2968 
2969       emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
2970 
2971       // FIXME delete this block below?
2972       if (BTB.ContiguousRange && j + 2 == ej) {
2973         // Since we're not going to use the final bit test, remove it.
2974         BTB.Cases.pop_back();
2975         break;
2976       }
2977     }
2978     // This is "default" BB. We have two jumps to it. From "header" BB and from
2979     // last "case" BB, unless the latter was skipped.
2980     CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(),
2981                                    BTB.Default->getBasicBlock()};
2982     addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent);
2983     if (!BTB.ContiguousRange) {
2984       addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB);
2985     }
2986   }
2987   SL->BitTestCases.clear();
2988 
2989   for (auto &JTCase : SL->JTCases) {
2990     // Emit header first, if it wasn't already emitted.
2991     if (!JTCase.first.Emitted)
2992       emitJumpTableHeader(JTCase.second, JTCase.first, JTCase.first.HeaderBB);
2993 
2994     emitJumpTable(JTCase.second, JTCase.second.MBB);
2995   }
2996   SL->JTCases.clear();
2997 
2998   for (auto &SwCase : SL->SwitchCases)
2999     emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
3000   SL->SwitchCases.clear();
3001 }
3002 
3003 void IRTranslator::finalizeFunction() {
3004   // Release the memory used by the different maps we
3005   // needed during the translation.
3006   PendingPHIs.clear();
3007   VMap.reset();
3008   FrameIndices.clear();
3009   MachinePreds.clear();
3010   // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
3011   // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
3012   // destroying it twice (in ~IRTranslator() and ~LLVMContext())
3013   EntryBuilder.reset();
3014   CurBuilder.reset();
3015   FuncInfo.clear();
3016 }
3017 
3018 /// Returns true if a BasicBlock \p BB within a variadic function contains a
3019 /// variadic musttail call.
3020 static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
3021   if (!IsVarArg)
3022     return false;
3023 
3024   // Walk the block backwards, because tail calls usually only appear at the end
3025   // of a block.
3026   return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) {
3027     const auto *CI = dyn_cast<CallInst>(&I);
3028     return CI && CI->isMustTailCall();
3029   });
3030 }
3031 
3032 bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
3033   MF = &CurMF;
3034   const Function &F = MF->getFunction();
3035   if (F.empty())
3036     return false;
3037   GISelCSEAnalysisWrapper &Wrapper =
3038       getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
3039   // Set the CSEConfig and run the analysis.
3040   GISelCSEInfo *CSEInfo = nullptr;
3041   TPC = &getAnalysis<TargetPassConfig>();
3042   bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences()
3043                        ? EnableCSEInIRTranslator
3044                        : TPC->isGISelCSEEnabled();
3045 
3046   if (EnableCSE) {
3047     EntryBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
3048     CSEInfo = &Wrapper.get(TPC->getCSEConfig());
3049     EntryBuilder->setCSEInfo(CSEInfo);
3050     CurBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
3051     CurBuilder->setCSEInfo(CSEInfo);
3052   } else {
3053     EntryBuilder = std::make_unique<MachineIRBuilder>();
3054     CurBuilder = std::make_unique<MachineIRBuilder>();
3055   }
3056   CLI = MF->getSubtarget().getCallLowering();
3057   CurBuilder->setMF(*MF);
3058   EntryBuilder->setMF(*MF);
3059   MRI = &MF->getRegInfo();
3060   DL = &F.getParent()->getDataLayout();
3061   ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
3062   const TargetMachine &TM = MF->getTarget();
3063   TM.resetTargetOptions(F);
3064   EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
3065   FuncInfo.MF = MF;
3066   if (EnableOpts)
3067     FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
3068   else
3069     FuncInfo.BPI = nullptr;
3070 
3071   FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF);
3072 
3073   const auto &TLI = *MF->getSubtarget().getTargetLowering();
3074 
3075   SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo);
3076   SL->init(TLI, TM, *DL);
3077 
3078 
3079 
3080   assert(PendingPHIs.empty() && "stale PHIs");
3081 
3082   if (!DL->isLittleEndian()) {
3083     // Currently we don't properly handle big endian code.
3084     OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
3085                                F.getSubprogram(), &F.getEntryBlock());
3086     R << "unable to translate in big endian mode";
3087     reportTranslationError(*MF, *TPC, *ORE, R);
3088   }
3089 
3090   // Release the per-function state when we return, whether we succeeded or not.
3091   auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); });
3092 
3093   // Setup a separate basic-block for the arguments and constants
3094   MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
3095   MF->push_back(EntryBB);
3096   EntryBuilder->setMBB(*EntryBB);
3097 
3098   DebugLoc DbgLoc = F.getEntryBlock().getFirstNonPHI()->getDebugLoc();
3099   SwiftError.setFunction(CurMF);
3100   SwiftError.createEntriesInEntryBlock(DbgLoc);
3101 
3102   bool IsVarArg = F.isVarArg();
3103   bool HasMustTailInVarArgFn = false;
3104 
3105   // Create all blocks, in IR order, to preserve the layout.
3106   for (const BasicBlock &BB: F) {
3107     auto *&MBB = BBToMBB[&BB];
3108 
3109     MBB = MF->CreateMachineBasicBlock(&BB);
3110     MF->push_back(MBB);
3111 
3112     if (BB.hasAddressTaken())
3113       MBB->setHasAddressTaken();
3114 
3115     if (!HasMustTailInVarArgFn)
3116       HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB);
3117   }
3118 
3119   MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn);
3120 
3121   // Make our arguments/constants entry block fallthrough to the IR entry block.
3122   EntryBB->addSuccessor(&getMBB(F.front()));
3123 
3124   if (CLI->fallBackToDAGISel(F)) {
3125     OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
3126                                F.getSubprogram(), &F.getEntryBlock());
3127     R << "unable to lower function: " << ore::NV("Prototype", F.getType());
3128     reportTranslationError(*MF, *TPC, *ORE, R);
3129     return false;
3130   }
3131 
3132   // Lower the actual args into this basic block.
3133   SmallVector<ArrayRef<Register>, 8> VRegArgs;
3134   for (const Argument &Arg: F.args()) {
3135     if (DL->getTypeStoreSize(Arg.getType()).isZero())
3136       continue; // Don't handle zero sized types.
3137     ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
3138     VRegArgs.push_back(VRegs);
3139 
3140     if (Arg.hasSwiftErrorAttr()) {
3141       assert(VRegs.size() == 1 && "Too many vregs for Swift error");
3142       SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]);
3143     }
3144   }
3145 
3146   if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) {
3147     OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
3148                                F.getSubprogram(), &F.getEntryBlock());
3149     R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
3150     reportTranslationError(*MF, *TPC, *ORE, R);
3151     return false;
3152   }
3153 
3154   // Need to visit defs before uses when translating instructions.
3155   GISelObserverWrapper WrapperObserver;
3156   if (EnableCSE && CSEInfo)
3157     WrapperObserver.addObserver(CSEInfo);
3158   {
3159     ReversePostOrderTraversal<const Function *> RPOT(&F);
3160 #ifndef NDEBUG
3161     DILocationVerifier Verifier;
3162     WrapperObserver.addObserver(&Verifier);
3163 #endif // ifndef NDEBUG
3164     RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
3165     RAIIMFObserverInstaller ObsInstall(*MF, WrapperObserver);
3166     for (const BasicBlock *BB : RPOT) {
3167       MachineBasicBlock &MBB = getMBB(*BB);
3168       // Set the insertion point of all the following translations to
3169       // the end of this basic block.
3170       CurBuilder->setMBB(MBB);
3171       HasTailCall = false;
3172       for (const Instruction &Inst : *BB) {
3173         // If we translated a tail call in the last step, then we know
3174         // everything after the call is either a return, or something that is
3175         // handled by the call itself. (E.g. a lifetime marker or assume
3176         // intrinsic.) In this case, we should stop translating the block and
3177         // move on.
3178         if (HasTailCall)
3179           break;
3180 #ifndef NDEBUG
3181         Verifier.setCurrentInst(&Inst);
3182 #endif // ifndef NDEBUG
3183         if (translate(Inst))
3184           continue;
3185 
3186         OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
3187                                    Inst.getDebugLoc(), BB);
3188         R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
3189 
3190         if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
3191           std::string InstStrStorage;
3192           raw_string_ostream InstStr(InstStrStorage);
3193           InstStr << Inst;
3194 
3195           R << ": '" << InstStr.str() << "'";
3196         }
3197 
3198         reportTranslationError(*MF, *TPC, *ORE, R);
3199         return false;
3200       }
3201 
3202       finalizeBasicBlock();
3203     }
3204 #ifndef NDEBUG
3205     WrapperObserver.removeObserver(&Verifier);
3206 #endif
3207   }
3208 
3209   finishPendingPhis();
3210 
3211   SwiftError.propagateVRegs();
3212 
3213   // Merge the argument lowering and constants block with its single
3214   // successor, the LLVM-IR entry block.  We want the basic block to
3215   // be maximal.
3216   assert(EntryBB->succ_size() == 1 &&
3217          "Custom BB used for lowering should have only one successor");
3218   // Get the successor of the current entry block.
3219   MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
3220   assert(NewEntryBB.pred_size() == 1 &&
3221          "LLVM-IR entry block has a predecessor!?");
3222   // Move all the instruction from the current entry block to the
3223   // new entry block.
3224   NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
3225                     EntryBB->end());
3226 
3227   // Update the live-in information for the new entry block.
3228   for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
3229     NewEntryBB.addLiveIn(LiveIn);
3230   NewEntryBB.sortUniqueLiveIns();
3231 
3232   // Get rid of the now empty basic block.
3233   EntryBB->removeSuccessor(&NewEntryBB);
3234   MF->remove(EntryBB);
3235   MF->DeleteMachineBasicBlock(EntryBB);
3236 
3237   assert(&MF->front() == &NewEntryBB &&
3238          "New entry wasn't next in the list of basic block!");
3239 
3240   // Initialize stack protector information.
3241   StackProtector &SP = getAnalysis<StackProtector>();
3242   SP.copyToMachineFrameInfo(MF->getFrameInfo());
3243 
3244   return false;
3245 }
3246