xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1 //===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass converts selects to conditional jumps when profitable.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/SelectOptimize.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/Statistic.h"
16 #include "llvm/Analysis/BlockFrequencyInfo.h"
17 #include "llvm/Analysis/BranchProbabilityInfo.h"
18 #include "llvm/Analysis/LoopInfo.h"
19 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
20 #include "llvm/Analysis/ProfileSummaryInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/CodeGen/Passes.h"
23 #include "llvm/CodeGen/TargetLowering.h"
24 #include "llvm/CodeGen/TargetPassConfig.h"
25 #include "llvm/CodeGen/TargetSchedule.h"
26 #include "llvm/CodeGen/TargetSubtargetInfo.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/PatternMatch.h"
33 #include "llvm/IR/ProfDataUtils.h"
34 #include "llvm/InitializePasses.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/ScaledNumber.h"
37 #include "llvm/Target/TargetMachine.h"
38 #include "llvm/Transforms/Utils/SizeOpts.h"
39 #include <algorithm>
40 #include <memory>
41 #include <queue>
42 #include <stack>
43 
44 using namespace llvm;
45 
46 #define DEBUG_TYPE "select-optimize"
47 
48 STATISTIC(NumSelectOptAnalyzed,
49           "Number of select groups considered for conversion to branch");
50 STATISTIC(NumSelectConvertedExpColdOperand,
51           "Number of select groups converted due to expensive cold operand");
52 STATISTIC(NumSelectConvertedHighPred,
53           "Number of select groups converted due to high-predictability");
54 STATISTIC(NumSelectUnPred,
55           "Number of select groups not converted due to unpredictability");
56 STATISTIC(NumSelectColdBB,
57           "Number of select groups not converted due to cold basic block");
58 STATISTIC(NumSelectConvertedLoop,
59           "Number of select groups converted due to loop-level analysis");
60 STATISTIC(NumSelectsConverted, "Number of selects converted");
61 
62 static cl::opt<unsigned> ColdOperandThreshold(
63     "cold-operand-threshold",
64     cl::desc("Maximum frequency of path for an operand to be considered cold."),
65     cl::init(20), cl::Hidden);
66 
67 static cl::opt<unsigned> ColdOperandMaxCostMultiplier(
68     "cold-operand-max-cost-multiplier",
69     cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
70              "slice of a cold operand to be considered inexpensive."),
71     cl::init(1), cl::Hidden);
72 
73 static cl::opt<unsigned>
74     GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
75                           cl::desc("Gradient gain threshold (%)."),
76                           cl::init(25), cl::Hidden);
77 
78 static cl::opt<unsigned>
79     GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
80                        cl::desc("Minimum gain per loop (in cycles) threshold."),
81                        cl::init(4), cl::Hidden);
82 
83 static cl::opt<unsigned> GainRelativeThreshold(
84     "select-opti-loop-relative-gain-threshold",
85     cl::desc(
86         "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
87     cl::init(8), cl::Hidden);
88 
89 static cl::opt<unsigned> MispredictDefaultRate(
90     "mispredict-default-rate", cl::Hidden, cl::init(25),
91     cl::desc("Default mispredict rate (initialized to 25%)."));
92 
93 static cl::opt<bool>
94     DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
95                                cl::init(false),
96                                cl::desc("Disable loop-level heuristics."));
97 
98 namespace {
99 
100 class SelectOptimizeImpl {
101   const TargetMachine *TM = nullptr;
102   const TargetSubtargetInfo *TSI = nullptr;
103   const TargetLowering *TLI = nullptr;
104   const TargetTransformInfo *TTI = nullptr;
105   const LoopInfo *LI = nullptr;
106   BlockFrequencyInfo *BFI;
107   ProfileSummaryInfo *PSI = nullptr;
108   OptimizationRemarkEmitter *ORE = nullptr;
109   TargetSchedModel TSchedModel;
110 
111 public:
112   SelectOptimizeImpl() = default;
113   SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};
114   PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
115   bool runOnFunction(Function &F, Pass &P);
116 
117 private:
118   // Select groups consist of consecutive select instructions with the same
119   // condition.
120   using SelectGroup = SmallVector<SelectInst *, 2>;
121   using SelectGroups = SmallVector<SelectGroup, 2>;
122 
123   using Scaled64 = ScaledNumber<uint64_t>;
124 
125   struct CostInfo {
126     /// Predicated cost (with selects as conditional moves).
127     Scaled64 PredCost;
128     /// Non-predicated cost (with selects converted to branches).
129     Scaled64 NonPredCost;
130   };
131 
132   // Converts select instructions of a function to conditional jumps when deemed
133   // profitable. Returns true if at least one select was converted.
134   bool optimizeSelects(Function &F);
135 
136   // Heuristics for determining which select instructions can be profitably
137   // conveted to branches. Separate heuristics for selects in inner-most loops
138   // and the rest of code regions (base heuristics for non-inner-most loop
139   // regions).
140   void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
141   void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
142 
143   // Converts to branches the select groups that were deemed
144   // profitable-to-convert.
145   void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
146 
147   // Splits selects of a given basic block into select groups.
148   void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
149 
150   // Determines for which select groups it is profitable converting to branches
151   // (base and inner-most-loop heuristics).
152   void findProfitableSIGroupsBase(SelectGroups &SIGroups,
153                                   SelectGroups &ProfSIGroups);
154   void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
155                                         SelectGroups &ProfSIGroups);
156 
157   // Determines if a select group should be converted to a branch (base
158   // heuristics).
159   bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI);
160 
161   // Returns true if there are expensive instructions in the cold value
162   // operand's (if any) dependence slice of any of the selects of the given
163   // group.
164   bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI);
165 
166   // For a given source instruction, collect its backwards dependence slice
167   // consisting of instructions exclusively computed for producing the operands
168   // of the source instruction.
169   void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
170                              Instruction *SI, bool ForSinking = false);
171 
172   // Returns true if the condition of the select is highly predictable.
173   bool isSelectHighlyPredictable(const SelectInst *SI);
174 
175   // Loop-level checks to determine if a non-predicated version (with branches)
176   // of the given loop is more profitable than its predicated version.
177   bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
178 
179   // Computes instruction and loop-critical-path costs for both the predicated
180   // and non-predicated version of the given loop.
181   bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
182                         DenseMap<const Instruction *, CostInfo> &InstCostMap,
183                         CostInfo *LoopCost);
184 
185   // Returns a set of all the select instructions in the given select groups.
186   SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups);
187 
188   // Returns the latency cost of a given instruction.
189   std::optional<uint64_t> computeInstCost(const Instruction *I);
190 
191   // Returns the misprediction cost of a given select when converted to branch.
192   Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost);
193 
194   // Returns the cost of a branch when the prediction is correct.
195   Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
196                                 const SelectInst *SI);
197 
198   // Returns true if the target architecture supports lowering a given select.
199   bool isSelectKindSupported(SelectInst *SI);
200 };
201 
202 class SelectOptimize : public FunctionPass {
203   SelectOptimizeImpl Impl;
204 
205 public:
206   static char ID;
207 
208   SelectOptimize() : FunctionPass(ID) {
209     initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
210   }
211 
212   bool runOnFunction(Function &F) override {
213     return Impl.runOnFunction(F, *this);
214   }
215 
216   void getAnalysisUsage(AnalysisUsage &AU) const override {
217     AU.addRequired<ProfileSummaryInfoWrapperPass>();
218     AU.addRequired<TargetPassConfig>();
219     AU.addRequired<TargetTransformInfoWrapperPass>();
220     AU.addRequired<LoopInfoWrapperPass>();
221     AU.addRequired<BlockFrequencyInfoWrapperPass>();
222     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
223   }
224 };
225 
226 } // namespace
227 
228 PreservedAnalyses SelectOptimizePass::run(Function &F,
229                                           FunctionAnalysisManager &FAM) {
230   SelectOptimizeImpl Impl(TM);
231   return Impl.run(F, FAM);
232 }
233 
234 char SelectOptimize::ID = 0;
235 
236 INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
237                       false)
238 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
239 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
240 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
241 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
242 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
243 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
244 INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
245                     false)
246 
247 FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
248 
249 PreservedAnalyses SelectOptimizeImpl::run(Function &F,
250                                           FunctionAnalysisManager &FAM) {
251   TSI = TM->getSubtargetImpl(F);
252   TLI = TSI->getTargetLowering();
253 
254   // If none of the select types are supported then skip this pass.
255   // This is an optimization pass. Legality issues will be handled by
256   // instruction selection.
257   if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
258       !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
259       !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
260     return PreservedAnalyses::all();
261 
262   TTI = &FAM.getResult<TargetIRAnalysis>(F);
263   if (!TTI->enableSelectOptimize())
264     return PreservedAnalyses::all();
265 
266   PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
267             .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
268   assert(PSI && "This pass requires module analysis pass `profile-summary`!");
269   BFI = &FAM.getResult<BlockFrequencyAnalysis>(F);
270 
271   // When optimizing for size, selects are preferable over branches.
272   if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI))
273     return PreservedAnalyses::all();
274 
275   LI = &FAM.getResult<LoopAnalysis>(F);
276   ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
277   TSchedModel.init(TSI);
278 
279   bool Changed = optimizeSelects(F);
280   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
281 }
282 
283 bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {
284   TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
285   TSI = TM->getSubtargetImpl(F);
286   TLI = TSI->getTargetLowering();
287 
288   // If none of the select types are supported then skip this pass.
289   // This is an optimization pass. Legality issues will be handled by
290   // instruction selection.
291   if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
292       !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
293       !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
294     return false;
295 
296   TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
297 
298   if (!TTI->enableSelectOptimize())
299     return false;
300 
301   LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
302   BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
303   PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
304   ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
305   TSchedModel.init(TSI);
306 
307   // When optimizing for size, selects are preferable over branches.
308   if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI))
309     return false;
310 
311   return optimizeSelects(F);
312 }
313 
314 bool SelectOptimizeImpl::optimizeSelects(Function &F) {
315   // Determine for which select groups it is profitable converting to branches.
316   SelectGroups ProfSIGroups;
317   // Base heuristics apply only to non-loops and outer loops.
318   optimizeSelectsBase(F, ProfSIGroups);
319   // Separate heuristics for inner-most loops.
320   optimizeSelectsInnerLoops(F, ProfSIGroups);
321 
322   // Convert to branches the select groups that were deemed
323   // profitable-to-convert.
324   convertProfitableSIGroups(ProfSIGroups);
325 
326   // Code modified if at least one select group was converted.
327   return !ProfSIGroups.empty();
328 }
329 
330 void SelectOptimizeImpl::optimizeSelectsBase(Function &F,
331                                              SelectGroups &ProfSIGroups) {
332   // Collect all the select groups.
333   SelectGroups SIGroups;
334   for (BasicBlock &BB : F) {
335     // Base heuristics apply only to non-loops and outer loops.
336     Loop *L = LI->getLoopFor(&BB);
337     if (L && L->isInnermost())
338       continue;
339     collectSelectGroups(BB, SIGroups);
340   }
341 
342   // Determine for which select groups it is profitable converting to branches.
343   findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
344 }
345 
346 void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,
347                                                    SelectGroups &ProfSIGroups) {
348   SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
349   // Need to check size on each iteration as we accumulate child loops.
350   for (unsigned long i = 0; i < Loops.size(); ++i)
351     for (Loop *ChildL : Loops[i]->getSubLoops())
352       Loops.push_back(ChildL);
353 
354   for (Loop *L : Loops) {
355     if (!L->isInnermost())
356       continue;
357 
358     SelectGroups SIGroups;
359     for (BasicBlock *BB : L->getBlocks())
360       collectSelectGroups(*BB, SIGroups);
361 
362     findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
363   }
364 }
365 
366 /// If \p isTrue is true, return the true value of \p SI, otherwise return
367 /// false value of \p SI. If the true/false value of \p SI is defined by any
368 /// select instructions in \p Selects, look through the defining select
369 /// instruction until the true/false value is not defined in \p Selects.
370 static Value *
371 getTrueOrFalseValue(SelectInst *SI, bool isTrue,
372                     const SmallPtrSet<const Instruction *, 2> &Selects) {
373   Value *V = nullptr;
374   for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
375        DefSI = dyn_cast<SelectInst>(V)) {
376     assert(DefSI->getCondition() == SI->getCondition() &&
377            "The condition of DefSI does not match with SI");
378     V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
379   }
380   assert(V && "Failed to get select true/false value");
381   return V;
382 }
383 
384 void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
385   for (SelectGroup &ASI : ProfSIGroups) {
386     // The code transformation here is a modified version of the sinking
387     // transformation in CodeGenPrepare::optimizeSelectInst with a more
388     // aggressive strategy of which instructions to sink.
389     //
390     // TODO: eliminate the redundancy of logic transforming selects to branches
391     // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
392     // selects for all cases (with and without profile information).
393 
394     // Transform a sequence like this:
395     //    start:
396     //       %cmp = cmp uge i32 %a, %b
397     //       %sel = select i1 %cmp, i32 %c, i32 %d
398     //
399     // Into:
400     //    start:
401     //       %cmp = cmp uge i32 %a, %b
402     //       %cmp.frozen = freeze %cmp
403     //       br i1 %cmp.frozen, label %select.true, label %select.false
404     //    select.true:
405     //       br label %select.end
406     //    select.false:
407     //       br label %select.end
408     //    select.end:
409     //       %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
410     //
411     // %cmp should be frozen, otherwise it may introduce undefined behavior.
412     // In addition, we may sink instructions that produce %c or %d into the
413     // destination(s) of the new branch.
414     // If the true or false blocks do not contain a sunken instruction, that
415     // block and its branch may be optimized away. In that case, one side of the
416     // first branch will point directly to select.end, and the corresponding PHI
417     // predecessor block will be the start block.
418 
419     // Find all the instructions that can be soundly sunk to the true/false
420     // blocks. These are instructions that are computed solely for producing the
421     // operands of the select instructions in the group and can be sunk without
422     // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
423     // with side effects).
424     SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
425     typedef std::stack<Instruction *>::size_type StackSizeType;
426     StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
427     for (SelectInst *SI : ASI) {
428       // For each select, compute the sinkable dependence chains of the true and
429       // false operands.
430       if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) {
431         std::stack<Instruction *> TrueSlice;
432         getExclBackwardsSlice(TI, TrueSlice, SI, true);
433         maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
434         TrueSlices.push_back(TrueSlice);
435       }
436       if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) {
437         std::stack<Instruction *> FalseSlice;
438         getExclBackwardsSlice(FI, FalseSlice, SI, true);
439         maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
440         FalseSlices.push_back(FalseSlice);
441       }
442     }
443     // In the case of multiple select instructions in the same group, the order
444     // of non-dependent instructions (instructions of different dependence
445     // slices) in the true/false blocks appears to affect performance.
446     // Interleaving the slices seems to experimentally be the optimal approach.
447     // This interleaving scheduling allows for more ILP (with a natural downside
448     // of increasing a bit register pressure) compared to a simple ordering of
449     // one whole chain after another. One would expect that this ordering would
450     // not matter since the scheduling in the backend of the compiler  would
451     // take care of it, but apparently the scheduler fails to deliver optimal
452     // ILP with a naive ordering here.
453     SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
454     for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
455       for (auto &S : TrueSlices) {
456         if (!S.empty()) {
457           TrueSlicesInterleaved.push_back(S.top());
458           S.pop();
459         }
460       }
461     }
462     for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
463       for (auto &S : FalseSlices) {
464         if (!S.empty()) {
465           FalseSlicesInterleaved.push_back(S.top());
466           S.pop();
467         }
468       }
469     }
470 
471     // We split the block containing the select(s) into two blocks.
472     SelectInst *SI = ASI.front();
473     SelectInst *LastSI = ASI.back();
474     BasicBlock *StartBlock = SI->getParent();
475     BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
476     BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
477     BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
478     // Delete the unconditional branch that was just created by the split.
479     StartBlock->getTerminator()->eraseFromParent();
480 
481     // Move any debug/pseudo instructions that were in-between the select
482     // group to the newly-created end block.
483     SmallVector<Instruction *, 2> DebugPseudoINS;
484     auto DIt = SI->getIterator();
485     while (&*DIt != LastSI) {
486       if (DIt->isDebugOrPseudoInst())
487         DebugPseudoINS.push_back(&*DIt);
488       DIt++;
489     }
490     for (auto *DI : DebugPseudoINS) {
491       DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt());
492     }
493 
494     // These are the new basic blocks for the conditional branch.
495     // At least one will become an actual new basic block.
496     BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
497     BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
498     if (!TrueSlicesInterleaved.empty()) {
499       TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink",
500                                      EndBlock->getParent(), EndBlock);
501       TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
502       TrueBranch->setDebugLoc(LastSI->getDebugLoc());
503       for (Instruction *TrueInst : TrueSlicesInterleaved)
504         TrueInst->moveBefore(TrueBranch);
505     }
506     if (!FalseSlicesInterleaved.empty()) {
507       FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink",
508                                       EndBlock->getParent(), EndBlock);
509       FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
510       FalseBranch->setDebugLoc(LastSI->getDebugLoc());
511       for (Instruction *FalseInst : FalseSlicesInterleaved)
512         FalseInst->moveBefore(FalseBranch);
513     }
514     // If there was nothing to sink, then arbitrarily choose the 'false' side
515     // for a new input value to the PHI.
516     if (TrueBlock == FalseBlock) {
517       assert(TrueBlock == nullptr &&
518              "Unexpected basic block transform while optimizing select");
519 
520       FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
521                                       EndBlock->getParent(), EndBlock);
522       auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
523       FalseBranch->setDebugLoc(SI->getDebugLoc());
524     }
525 
526     // Insert the real conditional branch based on the original condition.
527     // If we did not create a new block for one of the 'true' or 'false' paths
528     // of the condition, it means that side of the branch goes to the end block
529     // directly and the path originates from the start block from the point of
530     // view of the new PHI.
531     BasicBlock *TT, *FT;
532     if (TrueBlock == nullptr) {
533       TT = EndBlock;
534       FT = FalseBlock;
535       TrueBlock = StartBlock;
536     } else if (FalseBlock == nullptr) {
537       TT = TrueBlock;
538       FT = EndBlock;
539       FalseBlock = StartBlock;
540     } else {
541       TT = TrueBlock;
542       FT = FalseBlock;
543     }
544     IRBuilder<> IB(SI);
545     auto *CondFr =
546         IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
547     IB.CreateCondBr(CondFr, TT, FT, SI);
548 
549     SmallPtrSet<const Instruction *, 2> INS;
550     INS.insert(ASI.begin(), ASI.end());
551     // Use reverse iterator because later select may use the value of the
552     // earlier select, and we need to propagate value through earlier select
553     // to get the PHI operand.
554     for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
555       SelectInst *SI = *It;
556       // The select itself is replaced with a PHI Node.
557       PHINode *PN = PHINode::Create(SI->getType(), 2, "");
558       PN->insertBefore(EndBlock->begin());
559       PN->takeName(SI);
560       PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
561       PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
562       PN->setDebugLoc(SI->getDebugLoc());
563 
564       SI->replaceAllUsesWith(PN);
565       SI->eraseFromParent();
566       INS.erase(SI);
567       ++NumSelectsConverted;
568     }
569   }
570 }
571 
572 static bool isSpecialSelect(SelectInst *SI) {
573   using namespace llvm::PatternMatch;
574 
575   // If the select is a logical-and/logical-or then it is better treated as a
576   // and/or by the backend.
577   if (match(SI, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()),
578                             m_LogicalOr(m_Value(), m_Value()))))
579     return true;
580 
581   return false;
582 }
583 
584 void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
585                                              SelectGroups &SIGroups) {
586   BasicBlock::iterator BBIt = BB.begin();
587   while (BBIt != BB.end()) {
588     Instruction *I = &*BBIt++;
589     if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
590       if (isSpecialSelect(SI))
591         continue;
592 
593       SelectGroup SIGroup;
594       SIGroup.push_back(SI);
595       while (BBIt != BB.end()) {
596         Instruction *NI = &*BBIt;
597         SelectInst *NSI = dyn_cast<SelectInst>(NI);
598         if (NSI && SI->getCondition() == NSI->getCondition()) {
599           SIGroup.push_back(NSI);
600         } else if (!NI->isDebugOrPseudoInst()) {
601           // Debug/pseudo instructions should be skipped and not prevent the
602           // formation of a select group.
603           break;
604         }
605         ++BBIt;
606       }
607 
608       // If the select type is not supported, no point optimizing it.
609       // Instruction selection will take care of it.
610       if (!isSelectKindSupported(SI))
611         continue;
612 
613       SIGroups.push_back(SIGroup);
614     }
615   }
616 }
617 
618 void SelectOptimizeImpl::findProfitableSIGroupsBase(
619     SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
620   for (SelectGroup &ASI : SIGroups) {
621     ++NumSelectOptAnalyzed;
622     if (isConvertToBranchProfitableBase(ASI))
623       ProfSIGroups.push_back(ASI);
624   }
625 }
626 
627 static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE,
628                                DiagnosticInfoOptimizationBase &Rem) {
629   LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n");
630   ORE->emit(Rem);
631 }
632 
633 void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
634     const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
635   NumSelectOptAnalyzed += SIGroups.size();
636   // For each select group in an inner-most loop,
637   // a branch is more preferable than a select/conditional-move if:
638   // i) conversion to branches for all the select groups of the loop satisfies
639   //    loop-level heuristics including reducing the loop's critical path by
640   //    some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and
641   // ii) the total cost of the select group is cheaper with a branch compared
642   //     to its predicated version. The cost is in terms of latency and the cost
643   //     of a select group is the cost of its most expensive select instruction
644   //     (assuming infinite resources and thus fully leveraging available ILP).
645 
646   DenseMap<const Instruction *, CostInfo> InstCostMap;
647   CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
648                           {Scaled64::getZero(), Scaled64::getZero()}};
649   if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
650       !checkLoopHeuristics(L, LoopCost)) {
651     return;
652   }
653 
654   for (SelectGroup &ASI : SIGroups) {
655     // Assuming infinite resources, the cost of a group of instructions is the
656     // cost of the most expensive instruction of the group.
657     Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
658     for (SelectInst *SI : ASI) {
659       SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost);
660       BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost);
661     }
662     if (BranchCost < SelectCost) {
663       OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front());
664       OR << "Profitable to convert to branch (loop analysis). BranchCost="
665          << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
666          << ". ";
667       EmitAndPrintRemark(ORE, OR);
668       ++NumSelectConvertedLoop;
669       ProfSIGroups.push_back(ASI);
670     } else {
671       OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
672       ORmiss << "Select is more profitable (loop analysis). BranchCost="
673              << BranchCost.toString()
674              << ", SelectCost=" << SelectCost.toString() << ". ";
675       EmitAndPrintRemark(ORE, ORmiss);
676     }
677   }
678 }
679 
680 bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
681     const SmallVector<SelectInst *, 2> &ASI) {
682   SelectInst *SI = ASI.front();
683   LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n");
684   OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI);
685   OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
686 
687   // Skip cold basic blocks. Better to optimize for size for cold blocks.
688   if (PSI->isColdBlock(SI->getParent(), BFI)) {
689     ++NumSelectColdBB;
690     ORmiss << "Not converted to branch because of cold basic block. ";
691     EmitAndPrintRemark(ORE, ORmiss);
692     return false;
693   }
694 
695   // If unpredictable, branch form is less profitable.
696   if (SI->getMetadata(LLVMContext::MD_unpredictable)) {
697     ++NumSelectUnPred;
698     ORmiss << "Not converted to branch because of unpredictable branch. ";
699     EmitAndPrintRemark(ORE, ORmiss);
700     return false;
701   }
702 
703   // If highly predictable, branch form is more profitable, unless a
704   // predictable select is inexpensive in the target architecture.
705   if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
706     ++NumSelectConvertedHighPred;
707     OR << "Converted to branch because of highly predictable branch. ";
708     EmitAndPrintRemark(ORE, OR);
709     return true;
710   }
711 
712   // Look for expensive instructions in the cold operand's (if any) dependence
713   // slice of any of the selects in the group.
714   if (hasExpensiveColdOperand(ASI)) {
715     ++NumSelectConvertedExpColdOperand;
716     OR << "Converted to branch because of expensive cold operand.";
717     EmitAndPrintRemark(ORE, OR);
718     return true;
719   }
720 
721   ORmiss << "Not profitable to convert to branch (base heuristic).";
722   EmitAndPrintRemark(ORE, ORmiss);
723   return false;
724 }
725 
726 static InstructionCost divideNearest(InstructionCost Numerator,
727                                      uint64_t Denominator) {
728   return (Numerator + (Denominator / 2)) / Denominator;
729 }
730 
731 bool SelectOptimizeImpl::hasExpensiveColdOperand(
732     const SmallVector<SelectInst *, 2> &ASI) {
733   bool ColdOperand = false;
734   uint64_t TrueWeight, FalseWeight, TotalWeight;
735   if (extractBranchWeights(*ASI.front(), TrueWeight, FalseWeight)) {
736     uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
737     TotalWeight = TrueWeight + FalseWeight;
738     // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
739     ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
740   } else if (PSI->hasProfileSummary()) {
741     OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
742     ORmiss << "Profile data available but missing branch-weights metadata for "
743               "select instruction. ";
744     EmitAndPrintRemark(ORE, ORmiss);
745   }
746   if (!ColdOperand)
747     return false;
748   // Check if the cold path's dependence slice is expensive for any of the
749   // selects of the group.
750   for (SelectInst *SI : ASI) {
751     Instruction *ColdI = nullptr;
752     uint64_t HotWeight;
753     if (TrueWeight < FalseWeight) {
754       ColdI = dyn_cast<Instruction>(SI->getTrueValue());
755       HotWeight = FalseWeight;
756     } else {
757       ColdI = dyn_cast<Instruction>(SI->getFalseValue());
758       HotWeight = TrueWeight;
759     }
760     if (ColdI) {
761       std::stack<Instruction *> ColdSlice;
762       getExclBackwardsSlice(ColdI, ColdSlice, SI);
763       InstructionCost SliceCost = 0;
764       while (!ColdSlice.empty()) {
765         SliceCost += TTI->getInstructionCost(ColdSlice.top(),
766                                              TargetTransformInfo::TCK_Latency);
767         ColdSlice.pop();
768       }
769       // The colder the cold value operand of the select is the more expensive
770       // the cmov becomes for computing the cold value operand every time. Thus,
771       // the colder the cold operand is the more its cost counts.
772       // Get nearest integer cost adjusted for coldness.
773       InstructionCost AdjSliceCost =
774           divideNearest(SliceCost * HotWeight, TotalWeight);
775       if (AdjSliceCost >=
776           ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive)
777         return true;
778     }
779   }
780   return false;
781 }
782 
783 // Check if it is safe to move LoadI next to the SI.
784 // Conservatively assume it is safe only if there is no instruction
785 // modifying memory in-between the load and the select instruction.
786 static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {
787   // Assume loads from different basic blocks are unsafe to move.
788   if (LoadI->getParent() != SI->getParent())
789     return false;
790   auto It = LoadI->getIterator();
791   while (&*It != SI) {
792     if (It->mayWriteToMemory())
793       return false;
794     It++;
795   }
796   return true;
797 }
798 
799 // For a given source instruction, collect its backwards dependence slice
800 // consisting of instructions exclusively computed for the purpose of producing
801 // the operands of the source instruction. As an approximation
802 // (sufficiently-accurate in practice), we populate this set with the
803 // instructions of the backwards dependence slice that only have one-use and
804 // form an one-use chain that leads to the source instruction.
805 void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,
806                                                std::stack<Instruction *> &Slice,
807                                                Instruction *SI,
808                                                bool ForSinking) {
809   SmallPtrSet<Instruction *, 2> Visited;
810   std::queue<Instruction *> Worklist;
811   Worklist.push(I);
812   while (!Worklist.empty()) {
813     Instruction *II = Worklist.front();
814     Worklist.pop();
815 
816     // Avoid cycles.
817     if (!Visited.insert(II).second)
818       continue;
819 
820     if (!II->hasOneUse())
821       continue;
822 
823     // Cannot soundly sink instructions with side-effects.
824     // Terminator or phi instructions cannot be sunk.
825     // Avoid sinking other select instructions (should be handled separetely).
826     if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
827                        isa<SelectInst>(II) || isa<PHINode>(II)))
828       continue;
829 
830     // Avoid sinking loads in order not to skip state-modifying instructions,
831     // that may alias with the loaded address.
832     // Only allow sinking of loads within the same basic block that are
833     // conservatively proven to be safe.
834     if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI))
835       continue;
836 
837     // Avoid considering instructions with less frequency than the source
838     // instruction (i.e., avoid colder code regions of the dependence slice).
839     if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
840       continue;
841 
842     // Eligible one-use instruction added to the dependence slice.
843     Slice.push(II);
844 
845     // Explore all the operands of the current instruction to expand the slice.
846     for (unsigned k = 0; k < II->getNumOperands(); ++k)
847       if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k)))
848         Worklist.push(OpI);
849   }
850 }
851 
852 bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectInst *SI) {
853   uint64_t TrueWeight, FalseWeight;
854   if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
855     uint64_t Max = std::max(TrueWeight, FalseWeight);
856     uint64_t Sum = TrueWeight + FalseWeight;
857     if (Sum != 0) {
858       auto Probability = BranchProbability::getBranchProbability(Max, Sum);
859       if (Probability > TTI->getPredictableBranchThreshold())
860         return true;
861     }
862   }
863   return false;
864 }
865 
866 bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,
867                                              const CostInfo LoopCost[2]) {
868   // Loop-level checks to determine if a non-predicated version (with branches)
869   // of the loop is more profitable than its predicated version.
870 
871   if (DisableLoopLevelHeuristics)
872     return true;
873 
874   OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
875                                    L->getHeader()->getFirstNonPHI());
876 
877   if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
878       LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
879     ORmissL << "No select conversion in the loop due to no reduction of loop's "
880                "critical path. ";
881     EmitAndPrintRemark(ORE, ORmissL);
882     return false;
883   }
884 
885   Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
886                       LoopCost[1].PredCost - LoopCost[1].NonPredCost};
887 
888   // Profitably converting to branches need to reduce the loop's critical path
889   // by at least some threshold (absolute gain of GainCycleThreshold cycles and
890   // relative gain of 12.5%).
891   if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
892       Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
893     Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
894     ORmissL << "No select conversion in the loop due to small reduction of "
895                "loop's critical path. Gain="
896             << Gain[1].toString()
897             << ", RelativeGain=" << RelativeGain.toString() << "%. ";
898     EmitAndPrintRemark(ORE, ORmissL);
899     return false;
900   }
901 
902   // If the loop's critical path involves loop-carried dependences, the gradient
903   // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
904   // This check ensures that the latency reduction for the loop's critical path
905   // keeps decreasing with sufficient rate beyond the two analyzed loop
906   // iterations.
907   if (Gain[1] > Gain[0]) {
908     Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
909                             (LoopCost[1].PredCost - LoopCost[0].PredCost);
910     if (GradientGain < Scaled64::get(GainGradientThreshold)) {
911       ORmissL << "No select conversion in the loop due to small gradient gain. "
912                  "GradientGain="
913               << GradientGain.toString() << "%. ";
914       EmitAndPrintRemark(ORE, ORmissL);
915       return false;
916     }
917   }
918   // If the gain decreases it is not profitable to convert.
919   else if (Gain[1] < Gain[0]) {
920     ORmissL
921         << "No select conversion in the loop due to negative gradient gain. ";
922     EmitAndPrintRemark(ORE, ORmissL);
923     return false;
924   }
925 
926   // Non-predicated version of the loop is more profitable than its
927   // predicated version.
928   return true;
929 }
930 
931 // Computes instruction and loop-critical-path costs for both the predicated
932 // and non-predicated version of the given loop.
933 // Returns false if unable to compute these costs due to invalid cost of loop
934 // instruction(s).
935 bool SelectOptimizeImpl::computeLoopCosts(
936     const Loop *L, const SelectGroups &SIGroups,
937     DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
938   LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
939                     << L->getHeader()->getName() << "\n");
940   const auto &SIset = getSIset(SIGroups);
941   // Compute instruction and loop-critical-path costs across two iterations for
942   // both predicated and non-predicated version.
943   const unsigned Iterations = 2;
944   for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
945     // Cost of the loop's critical path.
946     CostInfo &MaxCost = LoopCost[Iter];
947     for (BasicBlock *BB : L->getBlocks()) {
948       for (const Instruction &I : *BB) {
949         if (I.isDebugOrPseudoInst())
950           continue;
951         // Compute the predicated and non-predicated cost of the instruction.
952         Scaled64 IPredCost = Scaled64::getZero(),
953                  INonPredCost = Scaled64::getZero();
954 
955         // Assume infinite resources that allow to fully exploit the available
956         // instruction-level parallelism.
957         // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
958         for (const Use &U : I.operands()) {
959           auto UI = dyn_cast<Instruction>(U.get());
960           if (!UI)
961             continue;
962           if (InstCostMap.count(UI)) {
963             IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);
964             INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);
965           }
966         }
967         auto ILatency = computeInstCost(&I);
968         if (!ILatency) {
969           OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
970           ORmissL << "Invalid instruction cost preventing analysis and "
971                      "optimization of the inner-most loop containing this "
972                      "instruction. ";
973           EmitAndPrintRemark(ORE, ORmissL);
974           return false;
975         }
976         IPredCost += Scaled64::get(*ILatency);
977         INonPredCost += Scaled64::get(*ILatency);
978 
979         // For a select that can be converted to branch,
980         // compute its cost as a branch (non-predicated cost).
981         //
982         // BranchCost = PredictedPathCost + MispredictCost
983         // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
984         // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
985         if (SIset.contains(&I)) {
986           auto SI = cast<SelectInst>(&I);
987 
988           Scaled64 TrueOpCost = Scaled64::getZero(),
989                    FalseOpCost = Scaled64::getZero();
990           if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue()))
991             if (InstCostMap.count(TI))
992               TrueOpCost = InstCostMap[TI].NonPredCost;
993           if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue()))
994             if (InstCostMap.count(FI))
995               FalseOpCost = InstCostMap[FI].NonPredCost;
996           Scaled64 PredictedPathCost =
997               getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
998 
999           Scaled64 CondCost = Scaled64::getZero();
1000           if (auto *CI = dyn_cast<Instruction>(SI->getCondition()))
1001             if (InstCostMap.count(CI))
1002               CondCost = InstCostMap[CI].NonPredCost;
1003           Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
1004 
1005           INonPredCost = PredictedPathCost + MispredictCost;
1006         }
1007         LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
1008                           << INonPredCost << " for " << I << "\n");
1009 
1010         InstCostMap[&I] = {IPredCost, INonPredCost};
1011         MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
1012         MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
1013       }
1014     }
1015     LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1
1016                       << " MaxCost = " << MaxCost.PredCost << " "
1017                       << MaxCost.NonPredCost << "\n");
1018   }
1019   return true;
1020 }
1021 
1022 SmallPtrSet<const Instruction *, 2>
1023 SelectOptimizeImpl::getSIset(const SelectGroups &SIGroups) {
1024   SmallPtrSet<const Instruction *, 2> SIset;
1025   for (const SelectGroup &ASI : SIGroups)
1026     for (const SelectInst *SI : ASI)
1027       SIset.insert(SI);
1028   return SIset;
1029 }
1030 
1031 std::optional<uint64_t>
1032 SelectOptimizeImpl::computeInstCost(const Instruction *I) {
1033   InstructionCost ICost =
1034       TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
1035   if (auto OC = ICost.getValue())
1036     return std::optional<uint64_t>(*OC);
1037   return std::nullopt;
1038 }
1039 
1040 ScaledNumber<uint64_t>
1041 SelectOptimizeImpl::getMispredictionCost(const SelectInst *SI,
1042                                          const Scaled64 CondCost) {
1043   uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
1044 
1045   // Account for the default misprediction rate when using a branch
1046   // (conservatively set to 25% by default).
1047   uint64_t MispredictRate = MispredictDefaultRate;
1048   // If the select condition is obviously predictable, then the misprediction
1049   // rate is zero.
1050   if (isSelectHighlyPredictable(SI))
1051     MispredictRate = 0;
1052 
1053   // CondCost is included to account for cases where the computation of the
1054   // condition is part of a long dependence chain (potentially loop-carried)
1055   // that would delay detection of a misprediction and increase its cost.
1056   Scaled64 MispredictCost =
1057       std::max(Scaled64::get(MispredictPenalty), CondCost) *
1058       Scaled64::get(MispredictRate);
1059   MispredictCost /= Scaled64::get(100);
1060 
1061   return MispredictCost;
1062 }
1063 
1064 // Returns the cost of a branch when the prediction is correct.
1065 // TrueCost * TrueProbability + FalseCost * FalseProbability.
1066 ScaledNumber<uint64_t>
1067 SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1068                                          const SelectInst *SI) {
1069   Scaled64 PredPathCost;
1070   uint64_t TrueWeight, FalseWeight;
1071   if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
1072     uint64_t SumWeight = TrueWeight + FalseWeight;
1073     if (SumWeight != 0) {
1074       PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1075                      FalseCost * Scaled64::get(FalseWeight);
1076       PredPathCost /= Scaled64::get(SumWeight);
1077       return PredPathCost;
1078     }
1079   }
1080   // Without branch weight metadata, we assume 75% for the one path and 25% for
1081   // the other, and pick the result with the biggest cost.
1082   PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1083                           FalseCost * Scaled64::get(3) + TrueCost);
1084   PredPathCost /= Scaled64::get(4);
1085   return PredPathCost;
1086 }
1087 
1088 bool SelectOptimizeImpl::isSelectKindSupported(SelectInst *SI) {
1089   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
1090   if (VectorCond)
1091     return false;
1092   TargetLowering::SelectSupportKind SelectKind;
1093   if (SI->getType()->isVectorTy())
1094     SelectKind = TargetLowering::ScalarCondVectorVal;
1095   else
1096     SelectKind = TargetLowering::ScalarValSelect;
1097   return TLI->isSelectSupported(SelectKind);
1098 }
1099