Lines Matching +full:hot +full:- +full:swap

1 //====- X86CmovConversion.cpp - Convert Cmov to Branch --------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
29 /// than the average cost of its true-value and false-value by 25% of
30 /// branch-misprediction-penalty. This assures no degradation even with
35 //===----------------------------------------------------------------------===//
41 //===----------------------------------------------------------------------===//
78 #define DEBUG_TYPE "x86-cmov-conversion"
80 STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups");
81 STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates");
82 STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops");
83 STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups");
87 EnableCmovConverter("x86-cmov-converter",
88 cl::desc("Enable the X86 cmov-to-branch optimization."),
92 GainCycleThreshold("x86-cmov-converter-threshold",
97 "x86-cmov-converter-force-mem-operand",
102 "x86-cmov-converter-force-all",
131 /// Collect all CMOV-group-candidates in \p CurrLoop and update \p
136 /// \returns true iff it found any CMOV-group-candidate.
141 /// Check if it is profitable to transform each CMOV-group-candidates into
146 /// \returns true iff any CMOV-group-candidate remain.
186 // Before we handle the more subtle cases of register-register CMOVs inside in runOnMachineFunction()
187 // of potentially hot loops, we want to quickly remove all CMOVs (ForceAll) or in runOnMachineFunction()
200 llvm::none_of(Group, [&](MachineInstr *I) { return I->mayLoad(); })) in runOnMachineFunction()
215 //===--------------------------------------------------------------------===// in runOnMachineFunction()
216 // Register-operand Conversion Algorithm in runOnMachineFunction()
217 // --------- in runOnMachineFunction()
220 // Find all CMOV-group-candidates. in runOnMachineFunction()
224 // * Calculate both loop-depth and optimized-loop-depth. in runOnMachineFunction()
226 // * Check for CMOV-group-candidate transformation profitability. in runOnMachineFunction()
229 // For each profitable CMOV-group-candidate in runOnMachineFunction()
236 //===--------------------------------------------------------------------===// in runOnMachineFunction()
238 // Build up the loops in pre-order. in runOnMachineFunction()
239 SmallVector<MachineLoop *, 4> Loops(MLI->begin(), MLI->end()); in runOnMachineFunction()
243 for (MachineLoop *Child : Loops[i]->getSubLoops()) in runOnMachineFunction()
248 if (!CurrLoop->getSubLoops().empty()) in runOnMachineFunction()
254 if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups)) in runOnMachineFunction()
257 if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(), in runOnMachineFunction()
272 //===--------------------------------------------------------------------===// in collectCmovCandidates()
273 // Collect all CMOV-group-candidates and add them into CmovInstGroups. in collectCmovCandidates()
275 // CMOV-group: in collectCmovCandidates()
278 // CMOV-group-candidate: in collectCmovCandidates()
279 // CMOV-group where all the CMOV instructions are in collectCmovCandidates()
283 //===--------------------------------------------------------------------===// in collectCmovCandidates()
285 // -------------------------------------- in collectCmovCandidates()
288 // TODO: Add support for CMOV-groups with non consecutive CMOV instructions. in collectCmovCandidates()
289 //===--------------------------------------------------------------------===// in collectCmovCandidates()
291 // Current processed CMOV-Group. in collectCmovCandidates()
301 // Indicator for current processed CMOV-group if it should be skipped. in collectCmovCandidates()
325 // Check if it is a non-consecutive CMOV instruction or it has different in collectCmovCandidates()
328 // Mark the SKipGroup indicator to skip current processed CMOV-Group. in collectCmovCandidates()
338 // Check if we were relying on zero-extending behavior of the CMOV. in collectCmovCandidates()
341 MRI->use_nodbg_instructions(I.defs().begin()->getReg()), in collectCmovCandidates()
346 // the zero-extension rather than just refusing to handle this. in collectCmovCandidates()
359 // Check if current processed CMOV-group should not be skipped and add in collectCmovCandidates()
360 // it as a CMOV-group-candidate. in collectCmovCandidates()
369 // CMOV-group should not be skipped and add it as a CMOV-group-candidate. in collectCmovCandidates()
421 //===--------------------------------------------------------------------===// in checkForProfitableCmovCandidates()
423 // Optimized-Loop: in checkForProfitableCmovCandidates()
424 // loop with CMOV-group-candidates converted into branches. in checkForProfitableCmovCandidates()
426 // Instruction-Depth: in checkForProfitableCmovCandidates()
429 // CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth) in checkForProfitableCmovCandidates()
433 // Loop-Depth: in checkForProfitableCmovCandidates()
435 // Note: instruction with max depth represents the critical-path in the loop. in checkForProfitableCmovCandidates()
437 // Loop-Depth[i]: in checkForProfitableCmovCandidates()
438 // Loop-Depth calculated for first `i` iterations. in checkForProfitableCmovCandidates()
441 // Depth-Diff[i]: in checkForProfitableCmovCandidates()
443 //===--------------------------------------------------------------------===// in checkForProfitableCmovCandidates()
491 unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth, in checkForProfitableCmovCandidates()
492 LoopDepth[1].Depth - LoopDepth[1].OptDepth}; in checkForProfitableCmovCandidates()
494 //===--------------------------------------------------------------------===// in checkForProfitableCmovCandidates()
496 // Worth-Optimize-Loop: in checkForProfitableCmovCandidates()
498 // Critical-path is iteration independent - there is no dependency in checkForProfitableCmovCandidates()
499 // of critical-path instructions on critical-path instructions of in checkForProfitableCmovCandidates()
501 // Thus, it is enough to check gain percent of 1st iteration - in checkForProfitableCmovCandidates()
506 // Critical-path is iteration dependent - there is dependency of in checkForProfitableCmovCandidates()
507 // critical-path instructions on critical-path instructions of in checkForProfitableCmovCandidates()
511 // the gain - the change in Depth-Diff compared to the change in in checkForProfitableCmovCandidates()
512 // Loop-Depth between 1st and 2nd iterations. in checkForProfitableCmovCandidates()
519 // If loop is not worth optimizing, remove all CMOV-group-candidates. in checkForProfitableCmovCandidates()
520 //===--------------------------------------------------------------------===// in checkForProfitableCmovCandidates()
529 (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth) && in checkForProfitableCmovCandidates()
537 //===--------------------------------------------------------------------===// in checkForProfitableCmovCandidates()
538 // Step 3: Check for each CMOV-group-candidate if it worth to be optimized. in checkForProfitableCmovCandidates()
539 // Worth-Optimize-Group: in checkForProfitableCmovCandidates()
542 // Worth-Optimize-CMOV: in checkForProfitableCmovCandidates()
546 // at least 25% of branch-misprediction-penalty. in checkForProfitableCmovCandidates()
547 //===--------------------------------------------------------------------===// in checkForProfitableCmovCandidates()
548 unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; in checkForProfitableCmovCandidates()
550 std::swap(TempGroups, CmovInstGroups); in checkForProfitableCmovCandidates()
556 // used with tree-search like algorithm, where the branch is unpredicted. in checkForProfitableCmovCandidates()
557 auto UIs = MRI->use_instructions(MI->defs().begin()->getReg()); in checkForProfitableCmovCandidates()
559 unsigned Op = UIs.begin()->getOpcode(); in checkForProfitableCmovCandidates()
567 DepthMap[OperandToDefMap.lookup(&MI->getOperand(4))].Depth; in checkForProfitableCmovCandidates()
569 DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth, in checkForProfitableCmovCandidates()
570 DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth); in checkForProfitableCmovCandidates()
571 if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) { in checkForProfitableCmovCandidates()
585 if (MI->killsRegister(X86::EFLAGS, /*TRI=*/nullptr)) in checkEFLAGSLive()
590 MachineBasicBlock *BB = MI->getParent(); in checkEFLAGSLive()
594 for (auto I = std::next(ItrMI), E = BB->end(); I != E; ++I) { in checkEFLAGSLive()
595 if (I->readsRegister(X86::EFLAGS, /*TRI=*/nullptr)) in checkEFLAGSLive()
597 if (I->definesRegister(X86::EFLAGS, /*TRI=*/nullptr)) in checkEFLAGSLive()
602 for (MachineBasicBlock *Succ : BB->successors()) in checkEFLAGSLive()
603 if (Succ->isLiveIn(X86::EFLAGS)) in checkEFLAGSLive()
618 for (auto I = First->getIterator(), E = Last->getIterator(); I != E; I++) { in packCmovGroup()
619 if (I->isDebugInstr()) in packCmovGroup()
624 MachineBasicBlock *MBB = First->getParent(); in packCmovGroup()
626 MBB->insertAfter(Last, MI->removeFromParent()); in packCmovGroup()
640 // control-flow pattern. The incoming instruction knows the destination vreg in convertCmovInstsToBranches()
645 // ----- in convertCmovInstsToBranches()
653 // ----- in convertCmovInstsToBranches()
664 // ; true-value with false-value in convertCmovInstsToBranches()
674 // Potentially swap the condition codes so that any memory operand to a CMOV in convertCmovInstsToBranches()
676 // any non-memory operand CMOV instructions to cope with this and we ensure in convertCmovInstsToBranches()
679 return I->mayLoad() && X86::getCondFromCMov(*I) == CC; in convertCmovInstsToBranches()
681 std::swap(CC, OppCC); in convertCmovInstsToBranches()
684 MachineFunction::iterator It = ++MBB->getIterator(); in convertCmovInstsToBranches()
685 MachineFunction *F = MBB->getParent(); in convertCmovInstsToBranches()
686 const BasicBlock *BB = MBB->getBasicBlock(); in convertCmovInstsToBranches()
688 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB); in convertCmovInstsToBranches()
689 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB); in convertCmovInstsToBranches()
690 F->insert(It, FalseMBB); in convertCmovInstsToBranches()
691 F->insert(It, SinkMBB); in convertCmovInstsToBranches()
696 FalseMBB->addLiveIn(X86::EFLAGS); in convertCmovInstsToBranches()
697 SinkMBB->addLiveIn(X86::EFLAGS); in convertCmovInstsToBranches()
701 SinkMBB->splice(SinkMBB->begin(), MBB, in convertCmovInstsToBranches()
702 std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end()); in convertCmovInstsToBranches()
703 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB); in convertCmovInstsToBranches()
706 MBB->addSuccessor(FalseMBB); in convertCmovInstsToBranches()
707 MBB->addSuccessor(SinkMBB); in convertCmovInstsToBranches()
710 BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC); in convertCmovInstsToBranches()
713 FalseMBB->addSuccessor(SinkMBB); in convertCmovInstsToBranches()
719 MachineBasicBlock::iterator FalseInsertionPoint = FalseMBB->begin(); in convertCmovInstsToBranches()
720 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); in convertCmovInstsToBranches()
731 // Remember the false-side register input. in convertCmovInstsToBranches()
739 FalseReg = FRIt->second; in convertCmovInstsToBranches()
749 "Can only handle memory-operand cmov instructions with a condition " in convertCmovInstsToBranches()
774 const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg()); in convertCmovInstsToBranches()
775 Register TmpReg = MRI->createVirtualRegister(RC); in convertCmovInstsToBranches()
780 bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg, in convertCmovInstsToBranches()
791 LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump()); in convertCmovInstsToBranches()
792 MBB->insert(MachineBasicBlock::iterator(MI), NewCMOV); in convertCmovInstsToBranches()
797 NewCMOV->setDebugInstrNum(OldDebugInstrNum); in convertCmovInstsToBranches()
802 LLVM_DEBUG(dbgs() << "\tRewritten load instr: "; NewMI->dump()); in convertCmovInstsToBranches()
803 FalseMBB->insert(FalseInsertionPoint, NewMI); in convertCmovInstsToBranches()
804 // Re-map any operands that are from other cmovs to the inputs for this block. in convertCmovInstsToBranches()
805 for (auto &MOp : NewMI->uses()) { in convertCmovInstsToBranches()
812 MOp.setReg(It->second); in convertCmovInstsToBranches()
821 MBB->erase(&MI); in convertCmovInstsToBranches()
824 FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg; in convertCmovInstsToBranches()
836 Register DestReg = MIIt->getOperand(0).getReg(); in convertCmovInstsToBranches()
837 Register Op1Reg = MIIt->getOperand(1).getReg(); in convertCmovInstsToBranches()
838 Register Op2Reg = MIIt->getOperand(2).getReg(); in convertCmovInstsToBranches()
841 // generated, then we have to swap the operands for the PHI that is going to in convertCmovInstsToBranches()
844 std::swap(Op1Reg, Op2Reg); in convertCmovInstsToBranches()
848 Op1Reg = Op1Itr->second.first; in convertCmovInstsToBranches()
852 Op2Reg = Op2Itr->second.second; in convertCmovInstsToBranches()
857 MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg) in convertCmovInstsToBranches()
863 LLVM_DEBUG(dbgs() << "\tFrom: "; MIIt->dump()); in convertCmovInstsToBranches()
864 LLVM_DEBUG(dbgs() << "\tTo: "; MIB->dump()); in convertCmovInstsToBranches()
866 // debug-info: we can just copy the instr-ref number from one instruction in convertCmovInstsToBranches()
867 // to the other, seeing how it's a one-for-one substitution. in convertCmovInstsToBranches()
868 if (unsigned InstrNum = MIIt->peekDebugInstrNum()) in convertCmovInstsToBranches()
869 MIB->setDebugInstrNum(InstrNum); in convertCmovInstsToBranches()
878 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); in convertCmovInstsToBranches()
881 MBB->erase(MIItBegin, MIItEnd); in convertCmovInstsToBranches()
884 if (MachineLoop *L = MLI->getLoopFor(MBB)) { in convertCmovInstsToBranches()
885 L->addBasicBlockToLoop(FalseMBB, *MLI); in convertCmovInstsToBranches()
886 L->addBasicBlockToLoop(SinkMBB, *MLI); in convertCmovInstsToBranches()