Lines Matching +full:non +full:- +full:inverted
1 //===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
47 #define DEBUG_TYPE "select-optimize"
54 "Number of select groups converted due to high-predictability");
60 "Number of select groups converted due to loop-level analysis");
64 "cold-operand-threshold",
69 "cold-operand-max-cost-multiplier",
75 GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
80 GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
85 "select-opti-loop-relative-gain-threshold",
91 "mispredict-default-rate", cl::Hidden, cl::init(25),
95 DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
97 cl::desc("Disable loop-level heuristics."));
123 /// Non-predicated cost (with selects converted to branches).
135 /// Whether this select is inverted, "not(cond), FalseVal, TrueVal", as
137 bool Inverted = false; member in __anon135b54a50111::SelectOptimizeImpl::SelectLike
140 /// Match a select or select-like instruction, returning a SelectLike.
151 X->getType()->isIntegerTy(1)) in match()
162 assert(!Inverted && "Trying to invert an inverted SelectLike"); in setInverted()
164 cast<Instruction>(getCondition())->getOpcode() == in setInverted()
166 Inverted = true; in setInverted()
168 bool isInverted() const { return Inverted; } in isInverted()
173 Type *getType() const { return I->getType(); } in getType()
177 return Sel->getCondition(); in getNonInvertedCondition()
181 if (PatternMatch::match(BO->getOperand(0), in getNonInvertedCondition()
184 if (PatternMatch::match(BO->getOperand(1), in getNonInvertedCondition()
196 // For inverted conditions the CC is checked when created to be a not in getCondition()
198 if (Inverted) in getCondition()
199 return cast<Instruction>(CC)->getOperand(0); in getCondition()
208 if (Inverted && HonorInverts) in getTrueValue()
211 return Sel->getTrueValue(); in getTrueValue()
212 // Or(zext) case - The true value is Or(X), so return nullptr as the value in getTrueValue()
224 if (Inverted && HonorInverts) in getFalseValue()
227 return Sel->getFalseValue(); in getFalseValue()
228 // Or(zext) case - return the operand which is not the zext. in getFalseValue()
231 if (PatternMatch::match(BO->getOperand(0), in getFalseValue()
233 return BO->getOperand(1); in getFalseValue()
234 if (PatternMatch::match(BO->getOperand(1), in getFalseValue()
236 return BO->getOperand(0); in getFalseValue()
243 /// InstCostMap. This may need to be generated for select-like instructions.
251 // Or case - add the cost of an extra Or to the cost of the False case. in getTrueOpCost()
255 InstructionCost OrCost = TTI->getArithmeticInstrCost( in getTrueOpCost()
256 Instruction::Or, I->getType(), TargetTransformInfo::TCK_Latency, in getTrueOpCost()
268 /// InstCostMap. This may need to be generated for select-like instructions.
277 // Or case - return the cost of the false case in getFalseOpCost()
298 // conveted to branches. Separate heuristics for selects in inner-most loops
299 // and the rest of code regions (base heuristics for non-inner-most loop
305 // profitable-to-convert.
312 // (base and inner-most-loop heuristics).
336 // Loop-level checks to determine if a non-predicated version (with branches)
340 // Computes instruction and loop-critical-path costs for both the predicated
341 // and non-predicated version of the given loop.
413 TSI = TM->getSubtargetImpl(F); in run()
414 TLI = TSI->getTargetLowering(); in run()
419 if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && in run()
420 !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && in run()
421 !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) in run()
425 if (!TTI->enableSelectOptimize()) in run()
430 assert(PSI && "This pass requires module analysis pass `profile-summary`!"); in run()
447 TSI = TM->getSubtargetImpl(F); in runOnFunction()
448 TLI = TSI->getTargetLowering(); in runOnFunction()
453 if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && in runOnFunction()
454 !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && in runOnFunction()
455 !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) in runOnFunction()
460 if (!TTI->enableSelectOptimize()) in runOnFunction()
479 // Base heuristics apply only to non-loops and outer loops. in optimizeSelects()
481 // Separate heuristics for inner-most loops. in optimizeSelects()
485 // profitable-to-convert. in optimizeSelects()
497 // Base heuristics apply only to non-loops and outer loops. in optimizeSelectsBase()
498 Loop *L = LI->getLoopFor(&BB); in optimizeSelectsBase()
499 if (L && L->isInnermost()) in optimizeSelectsBase()
510 SmallVector<Loop *, 4> Loops(LI->begin(), LI->end()); in optimizeSelectsInnerLoops()
513 for (Loop *ChildL : Loops[i]->getSubLoops()) in optimizeSelectsInnerLoops()
517 if (!L->isInnermost()) in optimizeSelectsInnerLoops()
521 for (BasicBlock *BB : L->getBlocks()) in optimizeSelectsInnerLoops()
540 if (DefSI->getCondition() == SI.getCondition()) in getTrueOrFalseValue()
541 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); in getTrueOrFalseValue()
542 else // Handle inverted SI in getTrueOrFalseValue()
543 V = (!isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); in getTrueOrFalseValue()
547 assert(SI.getI()->getOpcode() == Instruction::Or && in getTrueOrFalseValue()
551 V = IB.CreateOr(V, ConstantInt::get(V->getType(), 1)); in getTrueOrFalseValue()
611 if (isa<SelectInst>(SI.getI()) || !FI->hasOneUse()) { in convertProfitableSIGroups()
620 // of non-dependent instructions (instructions of different dependence in convertProfitableSIGroups()
650 BasicBlock *StartBlock = SI.getI()->getParent(); in convertProfitableSIGroups()
658 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); in convertProfitableSIGroups()
659 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock)); in convertProfitableSIGroups()
661 StartBlock->getTerminator()->eraseFromParent(); in convertProfitableSIGroups()
663 // Move any debug/pseudo instructions and not's that were in-between the in convertProfitableSIGroups()
664 // select group to the newly-created end block. in convertProfitableSIGroups()
666 auto DIt = SI.getI()->getIterator(); in convertProfitableSIGroups()
668 if (DIt->isDebugOrPseudoInst()) in convertProfitableSIGroups()
675 DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt()); in convertProfitableSIGroups()
677 // Duplicate implementation for DbgRecords, the non-instruction debug-info in convertProfitableSIGroups()
683 EndBlock->insertDbgRecordBefore(&DbgRecord, in convertProfitableSIGroups()
684 EndBlock->getFirstInsertionPt()); in convertProfitableSIGroups()
691 auto R = make_range(std::next(SI.getI()->getIterator()), in convertProfitableSIGroups()
692 std::next(LastSI.getI()->getIterator())); in convertProfitableSIGroups()
700 TrueBlock = BasicBlock::Create(EndBlock->getContext(), "select.true.sink", in convertProfitableSIGroups()
701 EndBlock->getParent(), EndBlock); in convertProfitableSIGroups()
703 TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc()); in convertProfitableSIGroups()
705 TrueInst->moveBefore(TrueBranch); in convertProfitableSIGroups()
709 BasicBlock::Create(EndBlock->getContext(), "select.false.sink", in convertProfitableSIGroups()
710 EndBlock->getParent(), EndBlock); in convertProfitableSIGroups()
712 FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc()); in convertProfitableSIGroups()
714 FalseInst->moveBefore(FalseBranch); in convertProfitableSIGroups()
722 FalseBlock = BasicBlock::Create(StartBlock->getContext(), "select.false", in convertProfitableSIGroups()
723 EndBlock->getParent(), EndBlock); in convertProfitableSIGroups()
725 FalseBranch->setDebugLoc(SI.getI()->getDebugLoc()); in convertProfitableSIGroups()
748 SI.getCondition()->getName() + ".frozen"); in convertProfitableSIGroups()
761 PN->insertBefore(EndBlock->begin()); in convertProfitableSIGroups()
762 PN->takeName(SI.getI()); in convertProfitableSIGroups()
763 PN->addIncoming(getTrueOrFalseValue(SI, true, INS, IB), TrueBlock); in convertProfitableSIGroups()
764 PN->addIncoming(getTrueOrFalseValue(SI, false, INS, IB), FalseBlock); in convertProfitableSIGroups()
765 PN->setDebugLoc(SI.getI()->getDebugLoc()); in convertProfitableSIGroups()
766 SI.getI()->replaceAllUsesWith(PN); in convertProfitableSIGroups()
774 SI.getI()->eraseFromParent(); in convertProfitableSIGroups()
784 if (!TTI->shouldTreatInstructionLikeSelect(I)) in collectSelectGroups()
793 if (NI->isDebugOrPseudoInst()) { in collectSelectGroups()
804 // We only allow selects in the same group, not other select-like in collectSelectGroups()
849 ORE->emit(Rem); in EmitAndPrintRemark()
855 // For each select group in an inner-most loop, in findProfitableSIGroupsInnerLoops()
856 // a branch is more preferable than a select/conditional-move if: in findProfitableSIGroupsInnerLoops()
858 // loop-level heuristics including reducing the loop's critical path by in findProfitableSIGroupsInnerLoops()
909 if (PSI->isColdBlock(SI.getI()->getParent(), BFI)) { in isConvertToBranchProfitableBase()
917 if (SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) { in isConvertToBranchProfitableBase()
926 if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) { in isConvertToBranchProfitableBase()
967 } else if (PSI->hasProfileSummary()) { in hasExpensiveColdOperand()
970 ORmiss << "Profile data available but missing branch-weights metadata for " in hasExpensiveColdOperand()
993 SliceCost += TTI->getInstructionCost(ColdSlice.top(), in hasExpensiveColdOperand()
1013 // modifying memory in-between the load and the select instruction.
1016 if (LoadI->getParent() != SI->getParent()) in isSafeToSinkLoad()
1018 auto It = LoadI->getIterator(); in isSafeToSinkLoad()
1020 if (It->mayWriteToMemory()) in isSafeToSinkLoad()
1030 // (sufficiently-accurate in practice), we populate this set with the
1031 // instructions of the backwards dependence slice that only have one-use and
1032 // form an one-use chain that leads to the source instruction.
1048 if (!II->hasOneUse()) in getExclBackwardsSlice()
1051 // Cannot soundly sink instructions with side-effects. in getExclBackwardsSlice()
1054 if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() || in getExclBackwardsSlice()
1058 // Avoid sinking loads in order not to skip state-modifying instructions, in getExclBackwardsSlice()
1062 if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI)) in getExclBackwardsSlice()
1067 if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) in getExclBackwardsSlice()
1070 // Eligible one-use instruction added to the dependence slice. in getExclBackwardsSlice()
1074 for (Value *Op : II->operand_values()) in getExclBackwardsSlice()
1087 if (Probability > TTI->getPredictableBranchThreshold()) in isSelectHighlyPredictable()
1096 // Loop-level checks to determine if a non-predicated version (with branches) in checkLoopHeuristics()
1103 L->getHeader()->getFirstNonPHI()); in checkLoopHeuristics()
1113 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost, in checkLoopHeuristics()
1114 LoopCost[1].PredCost - LoopCost[1].NonPredCost}; in checkLoopHeuristics()
1130 // If the loop's critical path involves loop-carried dependences, the gradient in checkLoopHeuristics()
1136 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) / in checkLoopHeuristics()
1137 (LoopCost[1].PredCost - LoopCost[0].PredCost); in checkLoopHeuristics()
1154 // Non-predicated version of the loop is more profitable than its in checkLoopHeuristics()
1159 // Computes instruction and loop-critical-path costs for both the predicated
1160 // and non-predicated version of the given loop.
1167 << L->getHeader()->getName() << "\n"); in computeLoopCosts()
1169 // Compute instruction and loop-critical-path costs across two iterations for in computeLoopCosts()
1170 // both predicated and non-predicated version. in computeLoopCosts()
1175 for (BasicBlock *BB : L->getBlocks()) { in computeLoopCosts()
1179 // Compute the predicated and non-predicated cost of the instruction. in computeLoopCosts()
1184 // instruction-level parallelism. in computeLoopCosts()
1199 "optimization of the inner-most loop containing this " in computeLoopCosts()
1208 // compute its cost as a branch (non-predicated cost). in computeLoopCosts()
1255 TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); in computeInstCost()
1264 uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; in getMispredictionCost()
1275 // condition is part of a long dependence chain (potentially loop-carried) in getMispredictionCost()
1310 bool VectorCond = !SI.getCondition()->getType()->isIntegerTy(1); in isSelectKindSupported()
1314 if (SI.getType()->isVectorTy()) in isSelectKindSupported()
1318 return TLI->isSelectSupported(SelectKind); in isSelectKindSupported()