1 //===- HexagonExpandCondsets.cpp ------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // Replace mux instructions with the corresponding legal instructions. 10 // It is meant to work post-SSA, but still on virtual registers. It was 11 // originally placed between register coalescing and machine instruction 12 // scheduler. 13 // In this place in the optimization sequence, live interval analysis had 14 // been performed, and the live intervals should be preserved. A large part 15 // of the code deals with preserving the liveness information. 16 // 17 // Liveness tracking aside, the main functionality of this pass is divided 18 // into two steps. The first step is to replace an instruction 19 // %0 = C2_mux %1, %2, %3 20 // with a pair of conditional transfers 21 // %0 = A2_tfrt %1, %2 22 // %0 = A2_tfrf %1, %3 23 // It is the intention that the execution of this pass could be terminated 24 // after this step, and the code generated would be functionally correct. 25 // 26 // If the uses of the source values %1 and %2 are kills, and their 27 // definitions are predicable, then in the second step, the conditional 28 // transfers will then be rewritten as predicated instructions. E.g. 29 // %0 = A2_or %1, %2 30 // %3 = A2_tfrt %99, killed %0 31 // will be rewritten as 32 // %3 = A2_port %99, %1, %2 33 // 34 // This replacement has two variants: "up" and "down". Consider this case: 35 // %0 = A2_or %1, %2 36 // ... [intervening instructions] ... 37 // %3 = A2_tfrt %99, killed %0 38 // variant "up": 39 // %3 = A2_port %99, %1, %2 40 // ... [intervening instructions, %0->vreg3] ... 41 // [deleted] 42 // variant "down": 43 // [deleted] 44 // ... [intervening instructions] ... 45 // %3 = A2_port %99, %1, %2 46 // 47 // Both, one or none of these variants may be valid, and checks are made 48 // to rule out inapplicable variants. 49 // 50 // As an additional optimization, before either of the two steps above is 51 // executed, the pass attempts to coalesce the target register with one of 52 // the source registers, e.g. given an instruction 53 // %3 = C2_mux %0, %1, %2 54 // %3 will be coalesced with either %1 or %2. If this succeeds, 55 // the instruction would then be (for example) 56 // %3 = C2_mux %0, %3, %2 57 // and, under certain circumstances, this could result in only one predicated 58 // instruction: 59 // %3 = A2_tfrf %0, %2 60 // 61 62 // Splitting a definition of a register into two predicated transfers 63 // creates a complication in liveness tracking. Live interval computation 64 // will see both instructions as actual definitions, and will mark the 65 // first one as dead. The definition is not actually dead, and this 66 // situation will need to be fixed. For example: 67 // dead %1 = A2_tfrt ... ; marked as dead 68 // %1 = A2_tfrf ... 69 // 70 // Since any of the individual predicated transfers may end up getting 71 // removed (in case it is an identity copy), some pre-existing def may 72 // be marked as dead after live interval recomputation: 73 // dead %1 = ... ; marked as dead 74 // ... 75 // %1 = A2_tfrf ... ; if A2_tfrt is removed 76 // This case happens if %1 was used as a source in A2_tfrt, which means 77 // that is it actually live at the A2_tfrf, and so the now dead definition 78 // of %1 will need to be updated to non-dead at some point. 79 // 80 // This issue could be remedied by adding implicit uses to the predicated 81 // transfers, but this will create a problem with subsequent predication, 82 // since the transfers will no longer be possible to reorder. To avoid 83 // that, the initial splitting will not add any implicit uses. These 84 // implicit uses will be added later, after predication. The extra price, 85 // however, is that finding the locations where the implicit uses need 86 // to be added, and updating the live ranges will be more involved. 87 88 #include "HexagonInstrInfo.h" 89 #include "HexagonRegisterInfo.h" 90 #include "llvm/ADT/DenseMap.h" 91 #include "llvm/ADT/SetVector.h" 92 #include "llvm/ADT/SmallVector.h" 93 #include "llvm/ADT/StringRef.h" 94 #include "llvm/CodeGen/LiveInterval.h" 95 #include "llvm/CodeGen/LiveIntervals.h" 96 #include "llvm/CodeGen/MachineBasicBlock.h" 97 #include "llvm/CodeGen/MachineDominators.h" 98 #include "llvm/CodeGen/MachineFunction.h" 99 #include "llvm/CodeGen/MachineFunctionPass.h" 100 #include "llvm/CodeGen/MachineInstr.h" 101 #include "llvm/CodeGen/MachineInstrBuilder.h" 102 #include "llvm/CodeGen/MachineOperand.h" 103 #include "llvm/CodeGen/MachineRegisterInfo.h" 104 #include "llvm/CodeGen/SlotIndexes.h" 105 #include "llvm/CodeGen/TargetRegisterInfo.h" 106 #include "llvm/CodeGen/TargetSubtargetInfo.h" 107 #include "llvm/IR/DebugLoc.h" 108 #include "llvm/IR/Function.h" 109 #include "llvm/MC/LaneBitmask.h" 110 #include "llvm/Pass.h" 111 #include "llvm/Support/CommandLine.h" 112 #include "llvm/Support/Debug.h" 113 #include "llvm/Support/ErrorHandling.h" 114 #include "llvm/Support/raw_ostream.h" 115 #include <cassert> 116 #include <iterator> 117 #include <set> 118 #include <utility> 119 120 #define DEBUG_TYPE "expand-condsets" 121 122 using namespace llvm; 123 124 static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit", 125 cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions")); 126 static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit", 127 cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings")); 128 129 namespace llvm { 130 131 void initializeHexagonExpandCondsetsPass(PassRegistry&); 132 FunctionPass *createHexagonExpandCondsets(); 133 134 } // end namespace llvm 135 136 namespace { 137 138 class HexagonExpandCondsets : public MachineFunctionPass { 139 public: 140 static char ID; 141 142 HexagonExpandCondsets() : MachineFunctionPass(ID) { 143 if (OptCoaLimit.getPosition()) 144 CoaLimitActive = true, CoaLimit = OptCoaLimit; 145 if (OptTfrLimit.getPosition()) 146 TfrLimitActive = true, TfrLimit = OptTfrLimit; 147 initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); 148 } 149 150 StringRef getPassName() const override { return "Hexagon Expand Condsets"; } 151 152 void getAnalysisUsage(AnalysisUsage &AU) const override { 153 AU.addRequired<LiveIntervals>(); 154 AU.addPreserved<LiveIntervals>(); 155 AU.addPreserved<SlotIndexes>(); 156 AU.addRequired<MachineDominatorTree>(); 157 AU.addPreserved<MachineDominatorTree>(); 158 MachineFunctionPass::getAnalysisUsage(AU); 159 } 160 161 bool runOnMachineFunction(MachineFunction &MF) override; 162 163 private: 164 const HexagonInstrInfo *HII = nullptr; 165 const TargetRegisterInfo *TRI = nullptr; 166 MachineDominatorTree *MDT; 167 MachineRegisterInfo *MRI = nullptr; 168 LiveIntervals *LIS = nullptr; 169 bool CoaLimitActive = false; 170 bool TfrLimitActive = false; 171 unsigned CoaLimit; 172 unsigned TfrLimit; 173 unsigned CoaCounter = 0; 174 unsigned TfrCounter = 0; 175 176 struct RegisterRef { 177 RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()), 178 Sub(Op.getSubReg()) {} 179 RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {} 180 181 bool operator== (RegisterRef RR) const { 182 return Reg == RR.Reg && Sub == RR.Sub; 183 } 184 bool operator!= (RegisterRef RR) const { return !operator==(RR); } 185 bool operator< (RegisterRef RR) const { 186 return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub); 187 } 188 189 unsigned Reg, Sub; 190 }; 191 192 using ReferenceMap = DenseMap<unsigned, unsigned>; 193 enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) }; 194 enum { Exec_Then = 0x10, Exec_Else = 0x20 }; 195 196 unsigned getMaskForSub(unsigned Sub); 197 bool isCondset(const MachineInstr &MI); 198 LaneBitmask getLaneMask(unsigned Reg, unsigned Sub); 199 200 void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec); 201 bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec); 202 203 void updateDeadsInRange(unsigned Reg, LaneBitmask LM, LiveRange &Range); 204 void updateKillFlags(unsigned Reg); 205 void updateDeadFlags(unsigned Reg); 206 void recalculateLiveInterval(unsigned Reg); 207 void removeInstr(MachineInstr &MI); 208 void updateLiveness(std::set<unsigned> &RegSet, bool Recalc, 209 bool UpdateKills, bool UpdateDeads); 210 211 unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond); 212 MachineInstr *genCondTfrFor(MachineOperand &SrcOp, 213 MachineBasicBlock::iterator At, unsigned DstR, 214 unsigned DstSR, const MachineOperand &PredOp, bool PredSense, 215 bool ReadUndef, bool ImpUse); 216 bool split(MachineInstr &MI, std::set<unsigned> &UpdRegs); 217 218 bool isPredicable(MachineInstr *MI); 219 MachineInstr *getReachingDefForPred(RegisterRef RD, 220 MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond); 221 bool canMoveOver(MachineInstr &MI, ReferenceMap &Defs, ReferenceMap &Uses); 222 bool canMoveMemTo(MachineInstr &MI, MachineInstr &ToI, bool IsDown); 223 void predicateAt(const MachineOperand &DefOp, MachineInstr &MI, 224 MachineBasicBlock::iterator Where, 225 const MachineOperand &PredOp, bool Cond, 226 std::set<unsigned> &UpdRegs); 227 void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR, 228 bool Cond, MachineBasicBlock::iterator First, 229 MachineBasicBlock::iterator Last); 230 bool predicate(MachineInstr &TfrI, bool Cond, std::set<unsigned> &UpdRegs); 231 bool predicateInBlock(MachineBasicBlock &B, 232 std::set<unsigned> &UpdRegs); 233 234 bool isIntReg(RegisterRef RR, unsigned &BW); 235 bool isIntraBlocks(LiveInterval &LI); 236 bool coalesceRegisters(RegisterRef R1, RegisterRef R2); 237 bool coalesceSegments(const SmallVectorImpl<MachineInstr*> &Condsets, 238 std::set<unsigned> &UpdRegs); 239 }; 240 241 } // end anonymous namespace 242 243 char HexagonExpandCondsets::ID = 0; 244 245 namespace llvm { 246 247 char &HexagonExpandCondsetsID = HexagonExpandCondsets::ID; 248 249 } // end namespace llvm 250 251 INITIALIZE_PASS_BEGIN(HexagonExpandCondsets, "expand-condsets", 252 "Hexagon Expand Condsets", false, false) 253 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 254 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 255 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 256 INITIALIZE_PASS_END(HexagonExpandCondsets, "expand-condsets", 257 "Hexagon Expand Condsets", false, false) 258 259 unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) { 260 switch (Sub) { 261 case Hexagon::isub_lo: 262 case Hexagon::vsub_lo: 263 return Sub_Low; 264 case Hexagon::isub_hi: 265 case Hexagon::vsub_hi: 266 return Sub_High; 267 case Hexagon::NoSubRegister: 268 return Sub_None; 269 } 270 llvm_unreachable("Invalid subregister"); 271 } 272 273 bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) { 274 unsigned Opc = MI.getOpcode(); 275 switch (Opc) { 276 case Hexagon::C2_mux: 277 case Hexagon::C2_muxii: 278 case Hexagon::C2_muxir: 279 case Hexagon::C2_muxri: 280 case Hexagon::PS_pselect: 281 return true; 282 break; 283 } 284 return false; 285 } 286 287 LaneBitmask HexagonExpandCondsets::getLaneMask(unsigned Reg, unsigned Sub) { 288 assert(Register::isVirtualRegister(Reg)); 289 return Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub) 290 : MRI->getMaxLaneMaskForVReg(Reg); 291 } 292 293 void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map, 294 unsigned Exec) { 295 unsigned Mask = getMaskForSub(RR.Sub) | Exec; 296 ReferenceMap::iterator F = Map.find(RR.Reg); 297 if (F == Map.end()) 298 Map.insert(std::make_pair(RR.Reg, Mask)); 299 else 300 F->second |= Mask; 301 } 302 303 bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map, 304 unsigned Exec) { 305 ReferenceMap::iterator F = Map.find(RR.Reg); 306 if (F == Map.end()) 307 return false; 308 unsigned Mask = getMaskForSub(RR.Sub) | Exec; 309 if (Mask & F->second) 310 return true; 311 return false; 312 } 313 314 void HexagonExpandCondsets::updateKillFlags(unsigned Reg) { 315 auto KillAt = [this,Reg] (SlotIndex K, LaneBitmask LM) -> void { 316 // Set the <kill> flag on a use of Reg whose lane mask is contained in LM. 317 MachineInstr *MI = LIS->getInstructionFromIndex(K); 318 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 319 MachineOperand &Op = MI->getOperand(i); 320 if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg || 321 MI->isRegTiedToDefOperand(i)) 322 continue; 323 LaneBitmask SLM = getLaneMask(Reg, Op.getSubReg()); 324 if ((SLM & LM) == SLM) { 325 // Only set the kill flag on the first encountered use of Reg in this 326 // instruction. 327 Op.setIsKill(true); 328 break; 329 } 330 } 331 }; 332 333 LiveInterval &LI = LIS->getInterval(Reg); 334 for (auto I = LI.begin(), E = LI.end(); I != E; ++I) { 335 if (!I->end.isRegister()) 336 continue; 337 // Do not mark the end of the segment as <kill>, if the next segment 338 // starts with a predicated instruction. 339 auto NextI = std::next(I); 340 if (NextI != E && NextI->start.isRegister()) { 341 MachineInstr *DefI = LIS->getInstructionFromIndex(NextI->start); 342 if (HII->isPredicated(*DefI)) 343 continue; 344 } 345 bool WholeReg = true; 346 if (LI.hasSubRanges()) { 347 auto EndsAtI = [I] (LiveInterval::SubRange &S) -> bool { 348 LiveRange::iterator F = S.find(I->end); 349 return F != S.end() && I->end == F->end; 350 }; 351 // Check if all subranges end at I->end. If so, make sure to kill 352 // the whole register. 353 for (LiveInterval::SubRange &S : LI.subranges()) { 354 if (EndsAtI(S)) 355 KillAt(I->end, S.LaneMask); 356 else 357 WholeReg = false; 358 } 359 } 360 if (WholeReg) 361 KillAt(I->end, MRI->getMaxLaneMaskForVReg(Reg)); 362 } 363 } 364 365 void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, 366 LiveRange &Range) { 367 assert(Register::isVirtualRegister(Reg)); 368 if (Range.empty()) 369 return; 370 371 // Return two booleans: { def-modifes-reg, def-covers-reg }. 372 auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair<bool,bool> { 373 if (!Op.isReg() || !Op.isDef()) 374 return { false, false }; 375 Register DR = Op.getReg(), DSR = Op.getSubReg(); 376 if (!Register::isVirtualRegister(DR) || DR != Reg) 377 return { false, false }; 378 LaneBitmask SLM = getLaneMask(DR, DSR); 379 LaneBitmask A = SLM & LM; 380 return { A.any(), A == SLM }; 381 }; 382 383 // The splitting step will create pairs of predicated definitions without 384 // any implicit uses (since implicit uses would interfere with predication). 385 // This can cause the reaching defs to become dead after live range 386 // recomputation, even though they are not really dead. 387 // We need to identify predicated defs that need implicit uses, and 388 // dead defs that are not really dead, and correct both problems. 389 390 auto Dominate = [this] (SetVector<MachineBasicBlock*> &Defs, 391 MachineBasicBlock *Dest) -> bool { 392 for (MachineBasicBlock *D : Defs) 393 if (D != Dest && MDT->dominates(D, Dest)) 394 return true; 395 396 MachineBasicBlock *Entry = &Dest->getParent()->front(); 397 SetVector<MachineBasicBlock*> Work(Dest->pred_begin(), Dest->pred_end()); 398 for (unsigned i = 0; i < Work.size(); ++i) { 399 MachineBasicBlock *B = Work[i]; 400 if (Defs.count(B)) 401 continue; 402 if (B == Entry) 403 return false; 404 for (auto *P : B->predecessors()) 405 Work.insert(P); 406 } 407 return true; 408 }; 409 410 // First, try to extend live range within individual basic blocks. This 411 // will leave us only with dead defs that do not reach any predicated 412 // defs in the same block. 413 SetVector<MachineBasicBlock*> Defs; 414 SmallVector<SlotIndex,4> PredDefs; 415 for (auto &Seg : Range) { 416 if (!Seg.start.isRegister()) 417 continue; 418 MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); 419 Defs.insert(DefI->getParent()); 420 if (HII->isPredicated(*DefI)) 421 PredDefs.push_back(Seg.start); 422 } 423 424 SmallVector<SlotIndex,8> Undefs; 425 LiveInterval &LI = LIS->getInterval(Reg); 426 LI.computeSubRangeUndefs(Undefs, LM, *MRI, *LIS->getSlotIndexes()); 427 428 for (auto &SI : PredDefs) { 429 MachineBasicBlock *BB = LIS->getMBBFromIndex(SI); 430 auto P = Range.extendInBlock(Undefs, LIS->getMBBStartIdx(BB), SI); 431 if (P.first != nullptr || P.second) 432 SI = SlotIndex(); 433 } 434 435 // Calculate reachability for those predicated defs that were not handled 436 // by the in-block extension. 437 SmallVector<SlotIndex,4> ExtTo; 438 for (auto &SI : PredDefs) { 439 if (!SI.isValid()) 440 continue; 441 MachineBasicBlock *BB = LIS->getMBBFromIndex(SI); 442 if (BB->pred_empty()) 443 continue; 444 // If the defs from this range reach SI via all predecessors, it is live. 445 // It can happen that SI is reached by the defs through some paths, but 446 // not all. In the IR coming into this optimization, SI would not be 447 // considered live, since the defs would then not jointly dominate SI. 448 // That means that SI is an overwriting def, and no implicit use is 449 // needed at this point. Do not add SI to the extension points, since 450 // extendToIndices will abort if there is no joint dominance. 451 // If the abort was avoided by adding extra undefs added to Undefs, 452 // extendToIndices could actually indicate that SI is live, contrary 453 // to the original IR. 454 if (Dominate(Defs, BB)) 455 ExtTo.push_back(SI); 456 } 457 458 if (!ExtTo.empty()) 459 LIS->extendToIndices(Range, ExtTo, Undefs); 460 461 // Remove <dead> flags from all defs that are not dead after live range 462 // extension, and collect all def operands. They will be used to generate 463 // the necessary implicit uses. 464 // At the same time, add <dead> flag to all defs that are actually dead. 465 // This can happen, for example, when a mux with identical inputs is 466 // replaced with a COPY: the use of the predicate register disappears and 467 // the dead can become dead. 468 std::set<RegisterRef> DefRegs; 469 for (auto &Seg : Range) { 470 if (!Seg.start.isRegister()) 471 continue; 472 MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); 473 for (auto &Op : DefI->operands()) { 474 auto P = IsRegDef(Op); 475 if (P.second && Seg.end.isDead()) { 476 Op.setIsDead(true); 477 } else if (P.first) { 478 DefRegs.insert(Op); 479 Op.setIsDead(false); 480 } 481 } 482 } 483 484 // Now, add implicit uses to each predicated def that is reached 485 // by other defs. 486 for (auto &Seg : Range) { 487 if (!Seg.start.isRegister() || !Range.liveAt(Seg.start.getPrevSlot())) 488 continue; 489 MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); 490 if (!HII->isPredicated(*DefI)) 491 continue; 492 // Construct the set of all necessary implicit uses, based on the def 493 // operands in the instruction. We need to tie the implicit uses to 494 // the corresponding defs. 495 std::map<RegisterRef,unsigned> ImpUses; 496 for (unsigned i = 0, e = DefI->getNumOperands(); i != e; ++i) { 497 MachineOperand &Op = DefI->getOperand(i); 498 if (!Op.isReg() || !DefRegs.count(Op)) 499 continue; 500 if (Op.isDef()) { 501 // Tied defs will always have corresponding uses, so no extra 502 // implicit uses are needed. 503 if (!Op.isTied()) 504 ImpUses.insert({Op, i}); 505 } else { 506 // This function can be called for the same register with different 507 // lane masks. If the def in this instruction was for the whole 508 // register, we can get here more than once. Avoid adding multiple 509 // implicit uses (or adding an implicit use when an explicit one is 510 // present). 511 if (Op.isTied()) 512 ImpUses.erase(Op); 513 } 514 } 515 if (ImpUses.empty()) 516 continue; 517 MachineFunction &MF = *DefI->getParent()->getParent(); 518 for (std::pair<RegisterRef, unsigned> P : ImpUses) { 519 RegisterRef R = P.first; 520 MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub); 521 DefI->tieOperands(P.second, DefI->getNumOperands()-1); 522 } 523 } 524 } 525 526 void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) { 527 LiveInterval &LI = LIS->getInterval(Reg); 528 if (LI.hasSubRanges()) { 529 for (LiveInterval::SubRange &S : LI.subranges()) { 530 updateDeadsInRange(Reg, S.LaneMask, S); 531 LIS->shrinkToUses(S, Reg); 532 } 533 LI.clear(); 534 LIS->constructMainRangeFromSubranges(LI); 535 } else { 536 updateDeadsInRange(Reg, MRI->getMaxLaneMaskForVReg(Reg), LI); 537 } 538 } 539 540 void HexagonExpandCondsets::recalculateLiveInterval(unsigned Reg) { 541 LIS->removeInterval(Reg); 542 LIS->createAndComputeVirtRegInterval(Reg); 543 } 544 545 void HexagonExpandCondsets::removeInstr(MachineInstr &MI) { 546 LIS->RemoveMachineInstrFromMaps(MI); 547 MI.eraseFromParent(); 548 } 549 550 void HexagonExpandCondsets::updateLiveness(std::set<unsigned> &RegSet, 551 bool Recalc, bool UpdateKills, bool UpdateDeads) { 552 UpdateKills |= UpdateDeads; 553 for (unsigned R : RegSet) { 554 if (!Register::isVirtualRegister(R)) { 555 assert(Register::isPhysicalRegister(R)); 556 // There shouldn't be any physical registers as operands, except 557 // possibly reserved registers. 558 assert(MRI->isReserved(R)); 559 continue; 560 } 561 if (Recalc) 562 recalculateLiveInterval(R); 563 if (UpdateKills) 564 MRI->clearKillFlags(R); 565 if (UpdateDeads) 566 updateDeadFlags(R); 567 // Fixing <dead> flags may extend live ranges, so reset <kill> flags 568 // after that. 569 if (UpdateKills) 570 updateKillFlags(R); 571 LIS->getInterval(R).verify(); 572 } 573 } 574 575 /// Get the opcode for a conditional transfer of the value in SO (source 576 /// operand). The condition (true/false) is given in Cond. 577 unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, 578 bool IfTrue) { 579 using namespace Hexagon; 580 581 if (SO.isReg()) { 582 Register PhysR; 583 RegisterRef RS = SO; 584 if (Register::isVirtualRegister(RS.Reg)) { 585 const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg); 586 assert(VC->begin() != VC->end() && "Empty register class"); 587 PhysR = *VC->begin(); 588 } else { 589 assert(Register::isPhysicalRegister(RS.Reg)); 590 PhysR = RS.Reg; 591 } 592 Register PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub); 593 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS); 594 switch (TRI->getRegSizeInBits(*RC)) { 595 case 32: 596 return IfTrue ? A2_tfrt : A2_tfrf; 597 case 64: 598 return IfTrue ? A2_tfrpt : A2_tfrpf; 599 } 600 llvm_unreachable("Invalid register operand"); 601 } 602 switch (SO.getType()) { 603 case MachineOperand::MO_Immediate: 604 case MachineOperand::MO_FPImmediate: 605 case MachineOperand::MO_ConstantPoolIndex: 606 case MachineOperand::MO_TargetIndex: 607 case MachineOperand::MO_JumpTableIndex: 608 case MachineOperand::MO_ExternalSymbol: 609 case MachineOperand::MO_GlobalAddress: 610 case MachineOperand::MO_BlockAddress: 611 return IfTrue ? C2_cmoveit : C2_cmoveif; 612 default: 613 break; 614 } 615 llvm_unreachable("Unexpected source operand"); 616 } 617 618 /// Generate a conditional transfer, copying the value SrcOp to the 619 /// destination register DstR:DstSR, and using the predicate register from 620 /// PredOp. The Cond argument specifies whether the predicate is to be 621 /// if(PredOp), or if(!PredOp). 622 MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp, 623 MachineBasicBlock::iterator At, 624 unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, 625 bool PredSense, bool ReadUndef, bool ImpUse) { 626 MachineInstr *MI = SrcOp.getParent(); 627 MachineBasicBlock &B = *At->getParent(); 628 const DebugLoc &DL = MI->getDebugLoc(); 629 630 // Don't avoid identity copies here (i.e. if the source and the destination 631 // are the same registers). It is actually better to generate them here, 632 // since this would cause the copy to potentially be predicated in the next 633 // step. The predication will remove such a copy if it is unable to 634 /// predicate. 635 636 unsigned Opc = getCondTfrOpcode(SrcOp, PredSense); 637 unsigned DstState = RegState::Define | (ReadUndef ? RegState::Undef : 0); 638 unsigned PredState = getRegState(PredOp) & ~RegState::Kill; 639 MachineInstrBuilder MIB; 640 641 if (SrcOp.isReg()) { 642 unsigned SrcState = getRegState(SrcOp); 643 if (RegisterRef(SrcOp) == RegisterRef(DstR, DstSR)) 644 SrcState &= ~RegState::Kill; 645 MIB = BuildMI(B, At, DL, HII->get(Opc)) 646 .addReg(DstR, DstState, DstSR) 647 .addReg(PredOp.getReg(), PredState, PredOp.getSubReg()) 648 .addReg(SrcOp.getReg(), SrcState, SrcOp.getSubReg()); 649 } else { 650 MIB = BuildMI(B, At, DL, HII->get(Opc)) 651 .addReg(DstR, DstState, DstSR) 652 .addReg(PredOp.getReg(), PredState, PredOp.getSubReg()) 653 .add(SrcOp); 654 } 655 656 LLVM_DEBUG(dbgs() << "created an initial copy: " << *MIB); 657 return &*MIB; 658 } 659 660 /// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function 661 /// performs all necessary changes to complete the replacement. 662 bool HexagonExpandCondsets::split(MachineInstr &MI, 663 std::set<unsigned> &UpdRegs) { 664 if (TfrLimitActive) { 665 if (TfrCounter >= TfrLimit) 666 return false; 667 TfrCounter++; 668 } 669 LLVM_DEBUG(dbgs() << "\nsplitting " << printMBBReference(*MI.getParent()) 670 << ": " << MI); 671 MachineOperand &MD = MI.getOperand(0); // Definition 672 MachineOperand &MP = MI.getOperand(1); // Predicate register 673 assert(MD.isDef()); 674 Register DR = MD.getReg(), DSR = MD.getSubReg(); 675 bool ReadUndef = MD.isUndef(); 676 MachineBasicBlock::iterator At = MI; 677 678 auto updateRegs = [&UpdRegs] (const MachineInstr &MI) -> void { 679 for (auto &Op : MI.operands()) 680 if (Op.isReg()) 681 UpdRegs.insert(Op.getReg()); 682 }; 683 684 // If this is a mux of the same register, just replace it with COPY. 685 // Ideally, this would happen earlier, so that register coalescing would 686 // see it. 687 MachineOperand &ST = MI.getOperand(2); 688 MachineOperand &SF = MI.getOperand(3); 689 if (ST.isReg() && SF.isReg()) { 690 RegisterRef RT(ST); 691 if (RT == RegisterRef(SF)) { 692 // Copy regs to update first. 693 updateRegs(MI); 694 MI.setDesc(HII->get(TargetOpcode::COPY)); 695 unsigned S = getRegState(ST); 696 while (MI.getNumOperands() > 1) 697 MI.RemoveOperand(MI.getNumOperands()-1); 698 MachineFunction &MF = *MI.getParent()->getParent(); 699 MachineInstrBuilder(MF, MI).addReg(RT.Reg, S, RT.Sub); 700 return true; 701 } 702 } 703 704 // First, create the two invididual conditional transfers, and add each 705 // of them to the live intervals information. Do that first and then remove 706 // the old instruction from live intervals. 707 MachineInstr *TfrT = 708 genCondTfrFor(ST, At, DR, DSR, MP, true, ReadUndef, false); 709 MachineInstr *TfrF = 710 genCondTfrFor(SF, At, DR, DSR, MP, false, ReadUndef, true); 711 LIS->InsertMachineInstrInMaps(*TfrT); 712 LIS->InsertMachineInstrInMaps(*TfrF); 713 714 // Will need to recalculate live intervals for all registers in MI. 715 updateRegs(MI); 716 717 removeInstr(MI); 718 return true; 719 } 720 721 bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) { 722 if (HII->isPredicated(*MI) || !HII->isPredicable(*MI)) 723 return false; 724 if (MI->hasUnmodeledSideEffects() || MI->mayStore()) 725 return false; 726 // Reject instructions with multiple defs (e.g. post-increment loads). 727 bool HasDef = false; 728 for (auto &Op : MI->operands()) { 729 if (!Op.isReg() || !Op.isDef()) 730 continue; 731 if (HasDef) 732 return false; 733 HasDef = true; 734 } 735 for (auto &Mo : MI->memoperands()) 736 if (Mo->isVolatile() || Mo->isAtomic()) 737 return false; 738 return true; 739 } 740 741 /// Find the reaching definition for a predicated use of RD. The RD is used 742 /// under the conditions given by PredR and Cond, and this function will ignore 743 /// definitions that set RD under the opposite conditions. 744 MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD, 745 MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) { 746 MachineBasicBlock &B = *UseIt->getParent(); 747 MachineBasicBlock::iterator I = UseIt, S = B.begin(); 748 if (I == S) 749 return nullptr; 750 751 bool PredValid = true; 752 do { 753 --I; 754 MachineInstr *MI = &*I; 755 // Check if this instruction can be ignored, i.e. if it is predicated 756 // on the complementary condition. 757 if (PredValid && HII->isPredicated(*MI)) { 758 if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(*MI))) 759 continue; 760 } 761 762 // Check the defs. If the PredR is defined, invalidate it. If RD is 763 // defined, return the instruction or 0, depending on the circumstances. 764 for (auto &Op : MI->operands()) { 765 if (!Op.isReg() || !Op.isDef()) 766 continue; 767 RegisterRef RR = Op; 768 if (RR.Reg == PredR) { 769 PredValid = false; 770 continue; 771 } 772 if (RR.Reg != RD.Reg) 773 continue; 774 // If the "Reg" part agrees, there is still the subregister to check. 775 // If we are looking for %1:loreg, we can skip %1:hireg, but 776 // not %1 (w/o subregisters). 777 if (RR.Sub == RD.Sub) 778 return MI; 779 if (RR.Sub == 0 || RD.Sub == 0) 780 return nullptr; 781 // We have different subregisters, so we can continue looking. 782 } 783 } while (I != S); 784 785 return nullptr; 786 } 787 788 /// Check if the instruction MI can be safely moved over a set of instructions 789 /// whose side-effects (in terms of register defs and uses) are expressed in 790 /// the maps Defs and Uses. These maps reflect the conditional defs and uses 791 /// that depend on the same predicate register to allow moving instructions 792 /// over instructions predicated on the opposite condition. 793 bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs, 794 ReferenceMap &Uses) { 795 // In order to be able to safely move MI over instructions that define 796 // "Defs" and use "Uses", no def operand from MI can be defined or used 797 // and no use operand can be defined. 798 for (auto &Op : MI.operands()) { 799 if (!Op.isReg()) 800 continue; 801 RegisterRef RR = Op; 802 // For physical register we would need to check register aliases, etc. 803 // and we don't want to bother with that. It would be of little value 804 // before the actual register rewriting (from virtual to physical). 805 if (!Register::isVirtualRegister(RR.Reg)) 806 return false; 807 // No redefs for any operand. 808 if (isRefInMap(RR, Defs, Exec_Then)) 809 return false; 810 // For defs, there cannot be uses. 811 if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then)) 812 return false; 813 } 814 return true; 815 } 816 817 /// Check if the instruction accessing memory (TheI) can be moved to the 818 /// location ToI. 819 bool HexagonExpandCondsets::canMoveMemTo(MachineInstr &TheI, MachineInstr &ToI, 820 bool IsDown) { 821 bool IsLoad = TheI.mayLoad(), IsStore = TheI.mayStore(); 822 if (!IsLoad && !IsStore) 823 return true; 824 if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI)) 825 return true; 826 if (TheI.hasUnmodeledSideEffects()) 827 return false; 828 829 MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI; 830 MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI; 831 bool Ordered = TheI.hasOrderedMemoryRef(); 832 833 // Search for aliased memory reference in (StartI, EndI). 834 for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) { 835 MachineInstr *MI = &*I; 836 if (MI->hasUnmodeledSideEffects()) 837 return false; 838 bool L = MI->mayLoad(), S = MI->mayStore(); 839 if (!L && !S) 840 continue; 841 if (Ordered && MI->hasOrderedMemoryRef()) 842 return false; 843 844 bool Conflict = (L && IsStore) || S; 845 if (Conflict) 846 return false; 847 } 848 return true; 849 } 850 851 /// Generate a predicated version of MI (where the condition is given via 852 /// PredR and Cond) at the point indicated by Where. 853 void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp, 854 MachineInstr &MI, 855 MachineBasicBlock::iterator Where, 856 const MachineOperand &PredOp, bool Cond, 857 std::set<unsigned> &UpdRegs) { 858 // The problem with updating live intervals is that we can move one def 859 // past another def. In particular, this can happen when moving an A2_tfrt 860 // over an A2_tfrf defining the same register. From the point of view of 861 // live intervals, these two instructions are two separate definitions, 862 // and each one starts another live segment. LiveIntervals's "handleMove" 863 // does not allow such moves, so we need to handle it ourselves. To avoid 864 // invalidating liveness data while we are using it, the move will be 865 // implemented in 4 steps: (1) add a clone of the instruction MI at the 866 // target location, (2) update liveness, (3) delete the old instruction, 867 // and (4) update liveness again. 868 869 MachineBasicBlock &B = *MI.getParent(); 870 DebugLoc DL = Where->getDebugLoc(); // "Where" points to an instruction. 871 unsigned Opc = MI.getOpcode(); 872 unsigned PredOpc = HII->getCondOpcode(Opc, !Cond); 873 MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc)); 874 unsigned Ox = 0, NP = MI.getNumOperands(); 875 // Skip all defs from MI first. 876 while (Ox < NP) { 877 MachineOperand &MO = MI.getOperand(Ox); 878 if (!MO.isReg() || !MO.isDef()) 879 break; 880 Ox++; 881 } 882 // Add the new def, then the predicate register, then the rest of the 883 // operands. 884 MB.addReg(DefOp.getReg(), getRegState(DefOp), DefOp.getSubReg()); 885 MB.addReg(PredOp.getReg(), PredOp.isUndef() ? RegState::Undef : 0, 886 PredOp.getSubReg()); 887 while (Ox < NP) { 888 MachineOperand &MO = MI.getOperand(Ox); 889 if (!MO.isReg() || !MO.isImplicit()) 890 MB.add(MO); 891 Ox++; 892 } 893 MB.cloneMemRefs(MI); 894 895 MachineInstr *NewI = MB; 896 NewI->clearKillInfo(); 897 LIS->InsertMachineInstrInMaps(*NewI); 898 899 for (auto &Op : NewI->operands()) 900 if (Op.isReg()) 901 UpdRegs.insert(Op.getReg()); 902 } 903 904 /// In the range [First, Last], rename all references to the "old" register RO 905 /// to the "new" register RN, but only in instructions predicated on the given 906 /// condition. 907 void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN, 908 unsigned PredR, bool Cond, MachineBasicBlock::iterator First, 909 MachineBasicBlock::iterator Last) { 910 MachineBasicBlock::iterator End = std::next(Last); 911 for (MachineBasicBlock::iterator I = First; I != End; ++I) { 912 MachineInstr *MI = &*I; 913 // Do not touch instructions that are not predicated, or are predicated 914 // on the opposite condition. 915 if (!HII->isPredicated(*MI)) 916 continue; 917 if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(*MI))) 918 continue; 919 920 for (auto &Op : MI->operands()) { 921 if (!Op.isReg() || RO != RegisterRef(Op)) 922 continue; 923 Op.setReg(RN.Reg); 924 Op.setSubReg(RN.Sub); 925 // In practice, this isn't supposed to see any defs. 926 assert(!Op.isDef() && "Not expecting a def"); 927 } 928 } 929 } 930 931 /// For a given conditional copy, predicate the definition of the source of 932 /// the copy under the given condition (using the same predicate register as 933 /// the copy). 934 bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, 935 std::set<unsigned> &UpdRegs) { 936 // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi). 937 unsigned Opc = TfrI.getOpcode(); 938 (void)Opc; 939 assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf); 940 LLVM_DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false") 941 << ": " << TfrI); 942 943 MachineOperand &MD = TfrI.getOperand(0); 944 MachineOperand &MP = TfrI.getOperand(1); 945 MachineOperand &MS = TfrI.getOperand(2); 946 // The source operand should be a <kill>. This is not strictly necessary, 947 // but it makes things a lot simpler. Otherwise, we would need to rename 948 // some registers, which would complicate the transformation considerably. 949 if (!MS.isKill()) 950 return false; 951 // Avoid predicating instructions that define a subregister if subregister 952 // liveness tracking is not enabled. 953 if (MD.getSubReg() && !MRI->shouldTrackSubRegLiveness(MD.getReg())) 954 return false; 955 956 RegisterRef RT(MS); 957 Register PredR = MP.getReg(); 958 MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond); 959 if (!DefI || !isPredicable(DefI)) 960 return false; 961 962 LLVM_DEBUG(dbgs() << "Source def: " << *DefI); 963 964 // Collect the information about registers defined and used between the 965 // DefI and the TfrI. 966 // Map: reg -> bitmask of subregs 967 ReferenceMap Uses, Defs; 968 MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI; 969 970 // Check if the predicate register is valid between DefI and TfrI. 971 // If it is, we can then ignore instructions predicated on the negated 972 // conditions when collecting def and use information. 973 bool PredValid = true; 974 for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) { 975 if (!I->modifiesRegister(PredR, nullptr)) 976 continue; 977 PredValid = false; 978 break; 979 } 980 981 for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) { 982 MachineInstr *MI = &*I; 983 // If this instruction is predicated on the same register, it could 984 // potentially be ignored. 985 // By default assume that the instruction executes on the same condition 986 // as TfrI (Exec_Then), and also on the opposite one (Exec_Else). 987 unsigned Exec = Exec_Then | Exec_Else; 988 if (PredValid && HII->isPredicated(*MI) && MI->readsRegister(PredR)) 989 Exec = (Cond == HII->isPredicatedTrue(*MI)) ? Exec_Then : Exec_Else; 990 991 for (auto &Op : MI->operands()) { 992 if (!Op.isReg()) 993 continue; 994 // We don't want to deal with physical registers. The reason is that 995 // they can be aliased with other physical registers. Aliased virtual 996 // registers must share the same register number, and can only differ 997 // in the subregisters, which we are keeping track of. Physical 998 // registers ters no longer have subregisters---their super- and 999 // subregisters are other physical registers, and we are not checking 1000 // that. 1001 RegisterRef RR = Op; 1002 if (!Register::isVirtualRegister(RR.Reg)) 1003 return false; 1004 1005 ReferenceMap &Map = Op.isDef() ? Defs : Uses; 1006 if (Op.isDef() && Op.isUndef()) { 1007 assert(RR.Sub && "Expecting a subregister on <def,read-undef>"); 1008 // If this is a <def,read-undef>, then it invalidates the non-written 1009 // part of the register. For the purpose of checking the validity of 1010 // the move, assume that it modifies the whole register. 1011 RR.Sub = 0; 1012 } 1013 addRefToMap(RR, Map, Exec); 1014 } 1015 } 1016 1017 // The situation: 1018 // RT = DefI 1019 // ... 1020 // RD = TfrI ..., RT 1021 1022 // If the register-in-the-middle (RT) is used or redefined between 1023 // DefI and TfrI, we may not be able proceed with this transformation. 1024 // We can ignore a def that will not execute together with TfrI, and a 1025 // use that will. If there is such a use (that does execute together with 1026 // TfrI), we will not be able to move DefI down. If there is a use that 1027 // executed if TfrI's condition is false, then RT must be available 1028 // unconditionally (cannot be predicated). 1029 // Essentially, we need to be able to rename RT to RD in this segment. 1030 if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else)) 1031 return false; 1032 RegisterRef RD = MD; 1033 // If the predicate register is defined between DefI and TfrI, the only 1034 // potential thing to do would be to move the DefI down to TfrI, and then 1035 // predicate. The reaching def (DefI) must be movable down to the location 1036 // of the TfrI. 1037 // If the target register of the TfrI (RD) is not used or defined between 1038 // DefI and TfrI, consider moving TfrI up to DefI. 1039 bool CanUp = canMoveOver(TfrI, Defs, Uses); 1040 bool CanDown = canMoveOver(*DefI, Defs, Uses); 1041 // The TfrI does not access memory, but DefI could. Check if it's safe 1042 // to move DefI down to TfrI. 1043 if (DefI->mayLoad() || DefI->mayStore()) 1044 if (!canMoveMemTo(*DefI, TfrI, true)) 1045 CanDown = false; 1046 1047 LLVM_DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no") 1048 << ", can move down: " << (CanDown ? "yes\n" : "no\n")); 1049 MachineBasicBlock::iterator PastDefIt = std::next(DefIt); 1050 if (CanUp) 1051 predicateAt(MD, *DefI, PastDefIt, MP, Cond, UpdRegs); 1052 else if (CanDown) 1053 predicateAt(MD, *DefI, TfrIt, MP, Cond, UpdRegs); 1054 else 1055 return false; 1056 1057 if (RT != RD) { 1058 renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt); 1059 UpdRegs.insert(RT.Reg); 1060 } 1061 1062 removeInstr(TfrI); 1063 removeInstr(*DefI); 1064 return true; 1065 } 1066 1067 /// Predicate all cases of conditional copies in the specified block. 1068 bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B, 1069 std::set<unsigned> &UpdRegs) { 1070 bool Changed = false; 1071 MachineBasicBlock::iterator I, E, NextI; 1072 for (I = B.begin(), E = B.end(); I != E; I = NextI) { 1073 NextI = std::next(I); 1074 unsigned Opc = I->getOpcode(); 1075 if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) { 1076 bool Done = predicate(*I, (Opc == Hexagon::A2_tfrt), UpdRegs); 1077 if (!Done) { 1078 // If we didn't predicate I, we may need to remove it in case it is 1079 // an "identity" copy, e.g. %1 = A2_tfrt %2, %1. 1080 if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))) { 1081 for (auto &Op : I->operands()) 1082 if (Op.isReg()) 1083 UpdRegs.insert(Op.getReg()); 1084 removeInstr(*I); 1085 } 1086 } 1087 Changed |= Done; 1088 } 1089 } 1090 return Changed; 1091 } 1092 1093 bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) { 1094 if (!Register::isVirtualRegister(RR.Reg)) 1095 return false; 1096 const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg); 1097 if (RC == &Hexagon::IntRegsRegClass) { 1098 BW = 32; 1099 return true; 1100 } 1101 if (RC == &Hexagon::DoubleRegsRegClass) { 1102 BW = (RR.Sub != 0) ? 32 : 64; 1103 return true; 1104 } 1105 return false; 1106 } 1107 1108 bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) { 1109 for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { 1110 LiveRange::Segment &LR = *I; 1111 // Range must start at a register... 1112 if (!LR.start.isRegister()) 1113 return false; 1114 // ...and end in a register or in a dead slot. 1115 if (!LR.end.isRegister() && !LR.end.isDead()) 1116 return false; 1117 } 1118 return true; 1119 } 1120 1121 bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { 1122 if (CoaLimitActive) { 1123 if (CoaCounter >= CoaLimit) 1124 return false; 1125 CoaCounter++; 1126 } 1127 unsigned BW1, BW2; 1128 if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2) 1129 return false; 1130 if (MRI->isLiveIn(R1.Reg)) 1131 return false; 1132 if (MRI->isLiveIn(R2.Reg)) 1133 return false; 1134 1135 LiveInterval &L1 = LIS->getInterval(R1.Reg); 1136 LiveInterval &L2 = LIS->getInterval(R2.Reg); 1137 if (L2.empty()) 1138 return false; 1139 if (L1.hasSubRanges() || L2.hasSubRanges()) 1140 return false; 1141 bool Overlap = L1.overlaps(L2); 1142 1143 LLVM_DEBUG(dbgs() << "compatible registers: (" 1144 << (Overlap ? "overlap" : "disjoint") << ")\n " 1145 << printReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n " 1146 << printReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n"); 1147 if (R1.Sub || R2.Sub) 1148 return false; 1149 if (Overlap) 1150 return false; 1151 1152 // Coalescing could have a negative impact on scheduling, so try to limit 1153 // to some reasonable extent. Only consider coalescing segments, when one 1154 // of them does not cross basic block boundaries. 1155 if (!isIntraBlocks(L1) && !isIntraBlocks(L2)) 1156 return false; 1157 1158 MRI->replaceRegWith(R2.Reg, R1.Reg); 1159 1160 // Move all live segments from L2 to L1. 1161 using ValueInfoMap = DenseMap<VNInfo *, VNInfo *>; 1162 ValueInfoMap VM; 1163 for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) { 1164 VNInfo *NewVN, *OldVN = I->valno; 1165 ValueInfoMap::iterator F = VM.find(OldVN); 1166 if (F == VM.end()) { 1167 NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator()); 1168 VM.insert(std::make_pair(OldVN, NewVN)); 1169 } else { 1170 NewVN = F->second; 1171 } 1172 L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN)); 1173 } 1174 while (L2.begin() != L2.end()) 1175 L2.removeSegment(*L2.begin()); 1176 LIS->removeInterval(R2.Reg); 1177 1178 updateKillFlags(R1.Reg); 1179 LLVM_DEBUG(dbgs() << "coalesced: " << L1 << "\n"); 1180 L1.verify(); 1181 1182 return true; 1183 } 1184 1185 /// Attempt to coalesce one of the source registers to a MUX instruction with 1186 /// the destination register. This could lead to having only one predicated 1187 /// instruction in the end instead of two. 1188 bool HexagonExpandCondsets::coalesceSegments( 1189 const SmallVectorImpl<MachineInstr*> &Condsets, 1190 std::set<unsigned> &UpdRegs) { 1191 SmallVector<MachineInstr*,16> TwoRegs; 1192 for (MachineInstr *MI : Condsets) { 1193 MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3); 1194 if (!S1.isReg() && !S2.isReg()) 1195 continue; 1196 TwoRegs.push_back(MI); 1197 } 1198 1199 bool Changed = false; 1200 for (MachineInstr *CI : TwoRegs) { 1201 RegisterRef RD = CI->getOperand(0); 1202 RegisterRef RP = CI->getOperand(1); 1203 MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3); 1204 bool Done = false; 1205 // Consider this case: 1206 // %1 = instr1 ... 1207 // %2 = instr2 ... 1208 // %0 = C2_mux ..., %1, %2 1209 // If %0 was coalesced with %1, we could end up with the following 1210 // code: 1211 // %0 = instr1 ... 1212 // %2 = instr2 ... 1213 // %0 = A2_tfrf ..., %2 1214 // which will later become: 1215 // %0 = instr1 ... 1216 // %0 = instr2_cNotPt ... 1217 // i.e. there will be an unconditional definition (instr1) of %0 1218 // followed by a conditional one. The output dependency was there before 1219 // and it unavoidable, but if instr1 is predicable, we will no longer be 1220 // able to predicate it here. 1221 // To avoid this scenario, don't coalesce the destination register with 1222 // a source register that is defined by a predicable instruction. 1223 if (S1.isReg()) { 1224 RegisterRef RS = S1; 1225 MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true); 1226 if (!RDef || !HII->isPredicable(*RDef)) { 1227 Done = coalesceRegisters(RD, RegisterRef(S1)); 1228 if (Done) { 1229 UpdRegs.insert(RD.Reg); 1230 UpdRegs.insert(S1.getReg()); 1231 } 1232 } 1233 } 1234 if (!Done && S2.isReg()) { 1235 RegisterRef RS = S2; 1236 MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false); 1237 if (!RDef || !HII->isPredicable(*RDef)) { 1238 Done = coalesceRegisters(RD, RegisterRef(S2)); 1239 if (Done) { 1240 UpdRegs.insert(RD.Reg); 1241 UpdRegs.insert(S2.getReg()); 1242 } 1243 } 1244 } 1245 Changed |= Done; 1246 } 1247 return Changed; 1248 } 1249 1250 bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { 1251 if (skipFunction(MF.getFunction())) 1252 return false; 1253 1254 HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo()); 1255 TRI = MF.getSubtarget().getRegisterInfo(); 1256 MDT = &getAnalysis<MachineDominatorTree>(); 1257 LIS = &getAnalysis<LiveIntervals>(); 1258 MRI = &MF.getRegInfo(); 1259 1260 LLVM_DEBUG(LIS->print(dbgs() << "Before expand-condsets\n", 1261 MF.getFunction().getParent())); 1262 1263 bool Changed = false; 1264 std::set<unsigned> CoalUpd, PredUpd; 1265 1266 SmallVector<MachineInstr*,16> Condsets; 1267 for (auto &B : MF) 1268 for (auto &I : B) 1269 if (isCondset(I)) 1270 Condsets.push_back(&I); 1271 1272 // Try to coalesce the target of a mux with one of its sources. 1273 // This could eliminate a register copy in some circumstances. 1274 Changed |= coalesceSegments(Condsets, CoalUpd); 1275 1276 // Update kill flags on all source operands. This is done here because 1277 // at this moment (when expand-condsets runs), there are no kill flags 1278 // in the IR (they have been removed by live range analysis). 1279 // Updating them right before we split is the easiest, because splitting 1280 // adds definitions which would interfere with updating kills afterwards. 1281 std::set<unsigned> KillUpd; 1282 for (MachineInstr *MI : Condsets) 1283 for (MachineOperand &Op : MI->operands()) 1284 if (Op.isReg() && Op.isUse()) 1285 if (!CoalUpd.count(Op.getReg())) 1286 KillUpd.insert(Op.getReg()); 1287 updateLiveness(KillUpd, false, true, false); 1288 LLVM_DEBUG( 1289 LIS->print(dbgs() << "After coalescing\n", MF.getFunction().getParent())); 1290 1291 // First, simply split all muxes into a pair of conditional transfers 1292 // and update the live intervals to reflect the new arrangement. The 1293 // goal is to update the kill flags, since predication will rely on 1294 // them. 1295 for (MachineInstr *MI : Condsets) 1296 Changed |= split(*MI, PredUpd); 1297 Condsets.clear(); // The contents of Condsets are invalid here anyway. 1298 1299 // Do not update live ranges after splitting. Recalculation of live 1300 // intervals removes kill flags, which were preserved by splitting on 1301 // the source operands of condsets. These kill flags are needed by 1302 // predication, and after splitting they are difficult to recalculate 1303 // (because of predicated defs), so make sure they are left untouched. 1304 // Predication does not use live intervals. 1305 LLVM_DEBUG( 1306 LIS->print(dbgs() << "After splitting\n", MF.getFunction().getParent())); 1307 1308 // Traverse all blocks and collapse predicable instructions feeding 1309 // conditional transfers into predicated instructions. 1310 // Walk over all the instructions again, so we may catch pre-existing 1311 // cases that were not created in the previous step. 1312 for (auto &B : MF) 1313 Changed |= predicateInBlock(B, PredUpd); 1314 LLVM_DEBUG(LIS->print(dbgs() << "After predicating\n", 1315 MF.getFunction().getParent())); 1316 1317 PredUpd.insert(CoalUpd.begin(), CoalUpd.end()); 1318 updateLiveness(PredUpd, true, true, true); 1319 1320 LLVM_DEBUG({ 1321 if (Changed) 1322 LIS->print(dbgs() << "After expand-condsets\n", 1323 MF.getFunction().getParent()); 1324 }); 1325 1326 return Changed; 1327 } 1328 1329 //===----------------------------------------------------------------------===// 1330 // Public Constructor Functions 1331 //===----------------------------------------------------------------------===// 1332 FunctionPass *llvm::createHexagonExpandCondsets() { 1333 return new HexagonExpandCondsets(); 1334 } 1335