Lines Matching +full:oc +full:- +full:level +full:- +full:select
1 //===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
47 #define DEBUG_TYPE "select-optimize"
50 "Number of select groups considered for conversion to branch");
52 "Number of select groups converted due to expensive cold operand");
54 "Number of select groups converted due to high-predictability");
56 "Number of select groups not converted due to unpredictability");
58 "Number of select groups not converted due to cold basic block");
60 "Number of select groups converted due to loop-level analysis");
64 "cold-operand-threshold",
69 "cold-operand-max-cost-multiplier",
75 GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
80 GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
85 "select-opti-loop-relative-gain-threshold",
91 "mispredict-default-rate", cl::Hidden, cl::init(25),
95 DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
97 cl::desc("Disable loop-level heuristics."));
123 /// Non-predicated cost (with selects converted to branches).
129 /// select(icmp, X|1, X).
133 /// The select (/or) instruction.
135 /// Whether this select is inverted, "not(cond), FalseVal, TrueVal", as
140 /// Match a select or select-like instruction, returning a SelectLike.
142 // Select instruction are what we are usually looking for. in match()
146 // An Or(zext(i1 X), Y) can also be treated like a select, with condition in match()
151 X->getType()->isIntegerTy(1)) in match()
160 /// Invert the select by inverting the condition and switching the operands.
164 cast<Instruction>(getCondition())->getOpcode() == in setInverted()
173 Type *getType() const { return I->getType(); } in getType()
177 return Sel->getCondition(); in getNonInvertedCondition()
181 if (PatternMatch::match(BO->getOperand(0), in getNonInvertedCondition()
184 if (PatternMatch::match(BO->getOperand(1), in getNonInvertedCondition()
193 /// condition of a select or c in `or(zext(c), x)`
199 return cast<Instruction>(CC)->getOperand(0); in getCondition()
211 return Sel->getTrueValue(); in getTrueValue()
212 // Or(zext) case - The true value is Or(X), so return nullptr as the value in getTrueValue()
221 /// getFalseValue of a select or `x` in `or(zext(c), x)` (which is
222 /// `select(c, x|1, x)`)
227 return Sel->getFalseValue(); in getFalseValue()
228 // Or(zext) case - return the operand which is not the zext. in getFalseValue()
231 if (PatternMatch::match(BO->getOperand(0), in getFalseValue()
233 return BO->getOperand(1); in getFalseValue()
234 if (PatternMatch::match(BO->getOperand(1), in getFalseValue()
236 return BO->getOperand(0); in getFalseValue()
243 /// InstCostMap. This may need to be generated for select-like instructions.
251 // Or case - add the cost of an extra Or to the cost of the False case. in getTrueOpCost()
255 InstructionCost OrCost = TTI->getArithmeticInstrCost( in getTrueOpCost()
256 Instruction::Or, I->getType(), TargetTransformInfo::TCK_Latency, in getTrueOpCost()
268 /// InstCostMap. This may need to be generated for select-like instructions.
277 // Or case - return the cost of the false case in getFalseOpCost()
288 // Select groups consist of consecutive select instructions with the same
293 // Converts select instructions of a function to conditional jumps when deemed
294 // profitable. Returns true if at least one select was converted.
297 // Heuristics for determining which select instructions can be profitably
298 // conveted to branches. Separate heuristics for selects in inner-most loops
299 // and the rest of code regions (base heuristics for non-inner-most loop
304 // Converts to branches the select groups that were deemed
305 // profitable-to-convert.
308 // Splits selects of a given basic block into select groups.
311 // Determines for which select groups it is profitable converting to branches
312 // (base and inner-most-loop heuristics).
318 // Determines if a select group should be converted to a branch (base
333 // Returns true if the condition of the select is highly predictable.
336 // Loop-level checks to determine if a non-predicated version (with branches)
340 // Computes instruction and loop-critical-path costs for both the predicated
341 // and non-predicated version of the given loop.
346 // Returns a set of all the select instructions in the given select groups.
353 // Returns the misprediction cost of a given select when converted to branch.
360 // Returns true if the target architecture supports lowering a given select.
413 TSI = TM->getSubtargetImpl(F); in run()
414 TLI = TSI->getTargetLowering(); in run()
416 // If none of the select types are supported then skip this pass. in run()
419 if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && in run()
420 !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && in run()
421 !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) in run()
425 if (!TTI->enableSelectOptimize()) in run()
430 assert(PSI && "This pass requires module analysis pass `profile-summary`!"); in run()
447 TSI = TM->getSubtargetImpl(F); in runOnFunction()
448 TLI = TSI->getTargetLowering(); in runOnFunction()
450 // If none of the select types are supported then skip this pass. in runOnFunction()
453 if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && in runOnFunction()
454 !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && in runOnFunction()
455 !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) in runOnFunction()
460 if (!TTI->enableSelectOptimize()) in runOnFunction()
477 // Determine for which select groups it is profitable converting to branches. in optimizeSelects()
479 // Base heuristics apply only to non-loops and outer loops. in optimizeSelects()
481 // Separate heuristics for inner-most loops. in optimizeSelects()
484 // Convert to branches the select groups that were deemed in optimizeSelects()
485 // profitable-to-convert. in optimizeSelects()
488 // Code modified if at least one select group was converted. in optimizeSelects()
494 // Collect all the select groups. in optimizeSelectsBase()
497 // Base heuristics apply only to non-loops and outer loops. in optimizeSelectsBase()
498 Loop *L = LI->getLoopFor(&BB); in optimizeSelectsBase()
499 if (L && L->isInnermost()) in optimizeSelectsBase()
504 // Determine for which select groups it is profitable converting to branches. in optimizeSelectsBase()
510 SmallVector<Loop *, 4> Loops(LI->begin(), LI->end()); in optimizeSelectsInnerLoops()
513 for (Loop *ChildL : Loops[i]->getSubLoops()) in optimizeSelectsInnerLoops()
517 if (!L->isInnermost()) in optimizeSelectsInnerLoops()
521 for (BasicBlock *BB : L->getBlocks()) in optimizeSelectsInnerLoops()
530 /// select instructions in \p Selects, look through the defining select
540 if (DefSI->getCondition() == SI.getCondition()) in getTrueOrFalseValue()
541 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); in getTrueOrFalseValue()
543 V = (!isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); in getTrueOrFalseValue()
547 assert(SI.getI()->getOpcode() == Instruction::Or && in getTrueOrFalseValue()
551 V = IB.CreateOr(V, ConstantInt::get(V->getType(), 1)); in getTrueOrFalseValue()
554 assert(V && "Failed to get select true/false value"); in getTrueOrFalseValue()
571 // %sel = select i1 %cmp, i32 %c, i32 %d in convertProfitableSIGroups()
577 // br i1 %cmp.frozen, label %select.true, label %select.false in convertProfitableSIGroups()
578 // select.true: in convertProfitableSIGroups()
579 // br label %select.end in convertProfitableSIGroups()
580 // select.false: in convertProfitableSIGroups()
581 // br label %select.end in convertProfitableSIGroups()
582 // select.end: in convertProfitableSIGroups()
583 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] in convertProfitableSIGroups()
590 // first branch will point directly to select.end, and the corresponding PHI in convertProfitableSIGroups()
595 // operands of the select instructions in the group and can be sunk without in convertProfitableSIGroups()
602 // For each select, compute the sinkable dependence chains of the true and in convertProfitableSIGroups()
611 if (isa<SelectInst>(SI.getI()) || !FI->hasOneUse()) { in convertProfitableSIGroups()
619 // In the case of multiple select instructions in the same group, the order in convertProfitableSIGroups()
620 // of non-dependent instructions (instructions of different dependence in convertProfitableSIGroups()
647 // We split the block containing the select(s) into two blocks. in convertProfitableSIGroups()
650 BasicBlock *StartBlock = SI.getI()->getParent(); in convertProfitableSIGroups()
658 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); in convertProfitableSIGroups()
659 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock)); in convertProfitableSIGroups()
661 StartBlock->getTerminator()->eraseFromParent(); in convertProfitableSIGroups()
663 // Move any debug/pseudo instructions and not's that were in-between the in convertProfitableSIGroups()
664 // select group to the newly-created end block. in convertProfitableSIGroups()
666 auto DIt = SI.getI()->getIterator(); in convertProfitableSIGroups()
668 if (DIt->isDebugOrPseudoInst()) in convertProfitableSIGroups()
675 DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt()); in convertProfitableSIGroups()
677 // Duplicate implementation for DbgRecords, the non-instruction debug-info in convertProfitableSIGroups()
683 EndBlock->insertDbgRecordBefore(&DbgRecord, in convertProfitableSIGroups()
684 EndBlock->getFirstInsertionPt()); in convertProfitableSIGroups()
690 // middle" of the select group. in convertProfitableSIGroups()
691 auto R = make_range(std::next(SI.getI()->getIterator()), in convertProfitableSIGroups()
692 std::next(LastSI.getI()->getIterator())); in convertProfitableSIGroups()
700 TrueBlock = BasicBlock::Create(EndBlock->getContext(), "select.true.sink", in convertProfitableSIGroups()
701 EndBlock->getParent(), EndBlock); in convertProfitableSIGroups()
703 TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc()); in convertProfitableSIGroups()
705 TrueInst->moveBefore(TrueBranch); in convertProfitableSIGroups()
709 BasicBlock::Create(EndBlock->getContext(), "select.false.sink", in convertProfitableSIGroups()
710 EndBlock->getParent(), EndBlock); in convertProfitableSIGroups()
712 FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc()); in convertProfitableSIGroups()
714 FalseInst->moveBefore(FalseBranch); in convertProfitableSIGroups()
720 "Unexpected basic block transform while optimizing select"); in convertProfitableSIGroups()
722 FalseBlock = BasicBlock::Create(StartBlock->getContext(), "select.false", in convertProfitableSIGroups()
723 EndBlock->getParent(), EndBlock); in convertProfitableSIGroups()
725 FalseBranch->setDebugLoc(SI.getI()->getDebugLoc()); in convertProfitableSIGroups()
748 SI.getCondition()->getName() + ".frozen"); in convertProfitableSIGroups()
754 // Use reverse iterator because later select may use the value of the in convertProfitableSIGroups()
755 // earlier select, and we need to propagate value through earlier select in convertProfitableSIGroups()
759 // The select itself is replaced with a PHI Node. in convertProfitableSIGroups()
761 PN->insertBefore(EndBlock->begin()); in convertProfitableSIGroups()
762 PN->takeName(SI.getI()); in convertProfitableSIGroups()
763 PN->addIncoming(getTrueOrFalseValue(SI, true, INS, IB), TrueBlock); in convertProfitableSIGroups()
764 PN->addIncoming(getTrueOrFalseValue(SI, false, INS, IB), FalseBlock); in convertProfitableSIGroups()
765 PN->setDebugLoc(SI.getI()->getDebugLoc()); in convertProfitableSIGroups()
766 SI.getI()->replaceAllUsesWith(PN); in convertProfitableSIGroups()
772 // Remove the old select instructions, now that they are not longer used. in convertProfitableSIGroups()
774 SI.getI()->eraseFromParent(); in convertProfitableSIGroups()
784 if (!TTI->shouldTreatInstructionLikeSelect(I)) in collectSelectGroups()
792 // formation of a select group. in collectSelectGroups()
793 if (NI->isDebugOrPseudoInst()) { in collectSelectGroups()
798 // Skip not(select(..)), if the not is part of the same select group in collectSelectGroups()
804 // We only allow selects in the same group, not other select-like in collectSelectGroups()
821 // If the select type is not supported, no point optimizing it. in collectSelectGroups()
827 dbgs() << "New Select group with\n"; in collectSelectGroups()
849 ORE->emit(Rem); in EmitAndPrintRemark()
855 // For each select group in an inner-most loop, in findProfitableSIGroupsInnerLoops()
856 // a branch is more preferable than a select/conditional-move if: in findProfitableSIGroupsInnerLoops()
857 // i) conversion to branches for all the select groups of the loop satisfies in findProfitableSIGroupsInnerLoops()
858 // loop-level heuristics including reducing the loop's critical path by in findProfitableSIGroupsInnerLoops()
860 // ii) the total cost of the select group is cheaper with a branch compared in findProfitableSIGroupsInnerLoops()
862 // of a select group is the cost of its most expensive select instruction in findProfitableSIGroupsInnerLoops()
892 ORmiss << "Select is more profitable (loop analysis). BranchCost=" in findProfitableSIGroupsInnerLoops()
903 LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI() in isConvertToBranchProfitableBase()
909 if (PSI->isColdBlock(SI.getI()->getParent(), BFI)) { in isConvertToBranchProfitableBase()
917 if (SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) { in isConvertToBranchProfitableBase()
925 // predictable select is inexpensive in the target architecture. in isConvertToBranchProfitableBase()
926 if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) { in isConvertToBranchProfitableBase()
967 } else if (PSI->hasProfileSummary()) { in hasExpensiveColdOperand()
970 ORmiss << "Profile data available but missing branch-weights metadata for " in hasExpensiveColdOperand()
971 "select instruction. "; in hasExpensiveColdOperand()
993 SliceCost += TTI->getInstructionCost(ColdSlice.top(), in hasExpensiveColdOperand()
997 // The colder the cold value operand of the select is the more expensive in hasExpensiveColdOperand()
1013 // modifying memory in-between the load and the select instruction.
1016 if (LoadI->getParent() != SI->getParent()) in isSafeToSinkLoad()
1018 auto It = LoadI->getIterator(); in isSafeToSinkLoad()
1020 if (It->mayWriteToMemory()) in isSafeToSinkLoad()
1030 // (sufficiently-accurate in practice), we populate this set with the
1031 // instructions of the backwards dependence slice that only have one-use and
1032 // form an one-use chain that leads to the source instruction.
1048 if (!II->hasOneUse()) in getExclBackwardsSlice()
1051 // Cannot soundly sink instructions with side-effects. in getExclBackwardsSlice()
1053 // Avoid sinking other select instructions (should be handled separetely). in getExclBackwardsSlice()
1054 if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() || in getExclBackwardsSlice()
1058 // Avoid sinking loads in order not to skip state-modifying instructions, in getExclBackwardsSlice()
1062 if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI)) in getExclBackwardsSlice()
1067 if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) in getExclBackwardsSlice()
1070 // Eligible one-use instruction added to the dependence slice. in getExclBackwardsSlice()
1074 for (Value *Op : II->operand_values()) in getExclBackwardsSlice()
1087 if (Probability > TTI->getPredictableBranchThreshold()) in isSelectHighlyPredictable()
1096 // Loop-level checks to determine if a non-predicated version (with branches) in checkLoopHeuristics()
1103 L->getHeader()->getFirstNonPHI()); in checkLoopHeuristics()
1107 ORmissL << "No select conversion in the loop due to no reduction of loop's " in checkLoopHeuristics()
1113 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost, in checkLoopHeuristics()
1114 LoopCost[1].PredCost - LoopCost[1].NonPredCost}; in checkLoopHeuristics()
1122 ORmissL << "No select conversion in the loop due to small reduction of " in checkLoopHeuristics()
1130 // If the loop's critical path involves loop-carried dependences, the gradient in checkLoopHeuristics()
1136 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) / in checkLoopHeuristics()
1137 (LoopCost[1].PredCost - LoopCost[0].PredCost); in checkLoopHeuristics()
1139 ORmissL << "No select conversion in the loop due to small gradient gain. " in checkLoopHeuristics()
1149 << "No select conversion in the loop due to negative gradient gain. "; in checkLoopHeuristics()
1154 // Non-predicated version of the loop is more profitable than its in checkLoopHeuristics()
1159 // Computes instruction and loop-critical-path costs for both the predicated
1160 // and non-predicated version of the given loop.
1167 << L->getHeader()->getName() << "\n"); in computeLoopCosts()
1169 // Compute instruction and loop-critical-path costs across two iterations for in computeLoopCosts()
1170 // both predicated and non-predicated version. in computeLoopCosts()
1175 for (BasicBlock *BB : L->getBlocks()) { in computeLoopCosts()
1179 // Compute the predicated and non-predicated cost of the instruction. in computeLoopCosts()
1184 // instruction-level parallelism. in computeLoopCosts()
1199 "optimization of the inner-most loop containing this " in computeLoopCosts()
1207 // For a select that can be converted to branch, in computeLoopCosts()
1208 // compute its cost as a branch (non-predicated cost). in computeLoopCosts()
1255 TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); in computeInstCost()
1256 if (auto OC = ICost.getValue()) in computeInstCost() local
1257 return std::optional<uint64_t>(*OC); in computeInstCost()
1264 uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; in getMispredictionCost()
1269 // If the select condition is obviously predictable, then the misprediction in getMispredictionCost()
1275 // condition is part of a long dependence chain (potentially loop-carried) in getMispredictionCost()
1310 bool VectorCond = !SI.getCondition()->getType()->isIntegerTy(1); in isSelectKindSupported()
1314 if (SI.getType()->isVectorTy()) in isSelectKindSupported()
1318 return TLI->isSelectSupported(SelectKind); in isSelectKindSupported()