1 //===-- TargetInstrInfo.cpp - Target Instruction Information --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/CodeGen/TargetInstrInfo.h" 14 #include "llvm/ADT/SmallSet.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/BinaryFormat/Dwarf.h" 17 #include "llvm/CodeGen/MachineCombinerPattern.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/CodeGen/MachineMemOperand.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/MachineScheduler.h" 23 #include "llvm/CodeGen/MachineTraceMetrics.h" 24 #include "llvm/CodeGen/PseudoSourceValue.h" 25 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" 26 #include "llvm/CodeGen/StackMaps.h" 27 #include "llvm/CodeGen/TargetFrameLowering.h" 28 #include "llvm/CodeGen/TargetLowering.h" 29 #include "llvm/CodeGen/TargetRegisterInfo.h" 30 #include "llvm/CodeGen/TargetSchedule.h" 31 #include "llvm/IR/DataLayout.h" 32 #include "llvm/IR/DebugInfoMetadata.h" 33 #include "llvm/MC/MCAsmInfo.h" 34 #include "llvm/MC/MCInstrItineraries.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/Target/TargetMachine.h" 39 40 using namespace llvm; 41 42 static cl::opt<bool> DisableHazardRecognizer( 43 "disable-sched-hazard", cl::Hidden, cl::init(false), 44 cl::desc("Disable hazard detection during preRA scheduling")); 45 46 static cl::opt<bool> EnableAccReassociation( 47 "acc-reassoc", cl::Hidden, cl::init(true), 48 cl::desc("Enable reassociation of accumulation chains")); 49 50 static cl::opt<unsigned int> 51 MinAccumulatorDepth("acc-min-depth", cl::Hidden, cl::init(8), 52 cl::desc("Minimum length of accumulator chains " 53 "required for the optimization to kick in")); 54 55 static cl::opt<unsigned int> MaxAccumulatorWidth( 56 "acc-max-width", cl::Hidden, cl::init(3), 57 cl::desc("Maximum number of branches in the accumulator tree")); 58 59 TargetInstrInfo::~TargetInstrInfo() = default; 60 61 const TargetRegisterClass* 62 TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, 63 const TargetRegisterInfo *TRI, 64 const MachineFunction &MF) const { 65 if (OpNum >= MCID.getNumOperands()) 66 return nullptr; 67 68 short RegClass = MCID.operands()[OpNum].RegClass; 69 if (MCID.operands()[OpNum].isLookupPtrRegClass()) 70 return TRI->getPointerRegClass(MF, RegClass); 71 72 // Instructions like INSERT_SUBREG do not have fixed register classes. 73 if (RegClass < 0) 74 return nullptr; 75 76 // Otherwise just look it up normally. 77 return TRI->getRegClass(RegClass); 78 } 79 80 /// insertNoop - Insert a noop into the instruction stream at the specified 81 /// point. 82 void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, 83 MachineBasicBlock::iterator MI) const { 84 llvm_unreachable("Target didn't implement insertNoop!"); 85 } 86 87 /// insertNoops - Insert noops into the instruction stream at the specified 88 /// point. 89 void TargetInstrInfo::insertNoops(MachineBasicBlock &MBB, 90 MachineBasicBlock::iterator MI, 91 unsigned Quantity) const { 92 for (unsigned i = 0; i < Quantity; ++i) 93 insertNoop(MBB, MI); 94 } 95 96 static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) { 97 return strncmp(Str, MAI.getCommentString().data(), 98 MAI.getCommentString().size()) == 0; 99 } 100 101 /// Measure the specified inline asm to determine an approximation of its 102 /// length. 103 /// Comments (which run till the next SeparatorString or newline) do not 104 /// count as an instruction. 105 /// Any other non-whitespace text is considered an instruction, with 106 /// multiple instructions separated by SeparatorString or newlines. 107 /// Variable-length instructions are not handled here; this function 108 /// may be overloaded in the target code to do that. 109 /// We implement a special case of the .space directive which takes only a 110 /// single integer argument in base 10 that is the size in bytes. This is a 111 /// restricted form of the GAS directive in that we only interpret 112 /// simple--i.e. not a logical or arithmetic expression--size values without 113 /// the optional fill value. This is primarily used for creating arbitrary 114 /// sized inline asm blocks for testing purposes. 115 unsigned TargetInstrInfo::getInlineAsmLength( 116 const char *Str, 117 const MCAsmInfo &MAI, const TargetSubtargetInfo *STI) const { 118 // Count the number of instructions in the asm. 119 bool AtInsnStart = true; 120 unsigned Length = 0; 121 const unsigned MaxInstLength = MAI.getMaxInstLength(STI); 122 for (; *Str; ++Str) { 123 if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), 124 strlen(MAI.getSeparatorString())) == 0) { 125 AtInsnStart = true; 126 } else if (isAsmComment(Str, MAI)) { 127 // Stop counting as an instruction after a comment until the next 128 // separator. 129 AtInsnStart = false; 130 } 131 132 if (AtInsnStart && !isSpace(static_cast<unsigned char>(*Str))) { 133 unsigned AddLength = MaxInstLength; 134 if (strncmp(Str, ".space", 6) == 0) { 135 char *EStr; 136 int SpaceSize; 137 SpaceSize = strtol(Str + 6, &EStr, 10); 138 SpaceSize = SpaceSize < 0 ? 0 : SpaceSize; 139 while (*EStr != '\n' && isSpace(static_cast<unsigned char>(*EStr))) 140 ++EStr; 141 if (*EStr == '\0' || *EStr == '\n' || 142 isAsmComment(EStr, MAI)) // Successfully parsed .space argument 143 AddLength = SpaceSize; 144 } 145 Length += AddLength; 146 AtInsnStart = false; 147 } 148 } 149 150 return Length; 151 } 152 153 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything 154 /// after it, replacing it with an unconditional branch to NewDest. 155 void 156 TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, 157 MachineBasicBlock *NewDest) const { 158 MachineBasicBlock *MBB = Tail->getParent(); 159 160 // Remove all the old successors of MBB from the CFG. 161 while (!MBB->succ_empty()) 162 MBB->removeSuccessor(MBB->succ_begin()); 163 164 // Save off the debug loc before erasing the instruction. 165 DebugLoc DL = Tail->getDebugLoc(); 166 167 // Update call info and remove all the dead instructions 168 // from the end of MBB. 169 while (Tail != MBB->end()) { 170 auto MI = Tail++; 171 if (MI->shouldUpdateAdditionalCallInfo()) 172 MBB->getParent()->eraseAdditionalCallInfo(&*MI); 173 MBB->erase(MI); 174 } 175 176 // If MBB isn't immediately before MBB, insert a branch to it. 177 if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) 178 insertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), DL); 179 MBB->addSuccessor(NewDest); 180 } 181 182 MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, 183 bool NewMI, unsigned Idx1, 184 unsigned Idx2) const { 185 const MCInstrDesc &MCID = MI.getDesc(); 186 bool HasDef = MCID.getNumDefs(); 187 if (HasDef && !MI.getOperand(0).isReg()) 188 // No idea how to commute this instruction. Target should implement its own. 189 return nullptr; 190 191 unsigned CommutableOpIdx1 = Idx1; (void)CommutableOpIdx1; 192 unsigned CommutableOpIdx2 = Idx2; (void)CommutableOpIdx2; 193 assert(findCommutedOpIndices(MI, CommutableOpIdx1, CommutableOpIdx2) && 194 CommutableOpIdx1 == Idx1 && CommutableOpIdx2 == Idx2 && 195 "TargetInstrInfo::CommuteInstructionImpl(): not commutable operands."); 196 assert(MI.getOperand(Idx1).isReg() && MI.getOperand(Idx2).isReg() && 197 "This only knows how to commute register operands so far"); 198 199 Register Reg0 = HasDef ? MI.getOperand(0).getReg() : Register(); 200 Register Reg1 = MI.getOperand(Idx1).getReg(); 201 Register Reg2 = MI.getOperand(Idx2).getReg(); 202 unsigned SubReg0 = HasDef ? MI.getOperand(0).getSubReg() : 0; 203 unsigned SubReg1 = MI.getOperand(Idx1).getSubReg(); 204 unsigned SubReg2 = MI.getOperand(Idx2).getSubReg(); 205 bool Reg1IsKill = MI.getOperand(Idx1).isKill(); 206 bool Reg2IsKill = MI.getOperand(Idx2).isKill(); 207 bool Reg1IsUndef = MI.getOperand(Idx1).isUndef(); 208 bool Reg2IsUndef = MI.getOperand(Idx2).isUndef(); 209 bool Reg1IsInternal = MI.getOperand(Idx1).isInternalRead(); 210 bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead(); 211 // Avoid calling isRenamable for virtual registers since we assert that 212 // renamable property is only queried/set for physical registers. 213 bool Reg1IsRenamable = 214 Reg1.isPhysical() ? MI.getOperand(Idx1).isRenamable() : false; 215 bool Reg2IsRenamable = 216 Reg2.isPhysical() ? MI.getOperand(Idx2).isRenamable() : false; 217 218 // For a case like this: 219 // %0.sub = INST %0.sub(tied), %1.sub, implicit-def %0 220 // we need to update the implicit-def after commuting to result in: 221 // %1.sub = INST %1.sub(tied), %0.sub, implicit-def %1 222 SmallVector<unsigned> UpdateImplicitDefIdx; 223 if (HasDef && MI.hasImplicitDef()) { 224 const TargetRegisterInfo *TRI = 225 MI.getMF()->getSubtarget().getRegisterInfo(); 226 for (auto [OpNo, MO] : llvm::enumerate(MI.implicit_operands())) { 227 Register ImplReg = MO.getReg(); 228 if ((ImplReg.isVirtual() && ImplReg == Reg0) || 229 (ImplReg.isPhysical() && Reg0.isPhysical() && 230 TRI->isSubRegisterEq(ImplReg, Reg0))) 231 UpdateImplicitDefIdx.push_back(OpNo + MI.getNumExplicitOperands()); 232 } 233 } 234 235 // If destination is tied to either of the commuted source register, then 236 // it must be updated. 237 if (HasDef && Reg0 == Reg1 && 238 MI.getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) { 239 Reg2IsKill = false; 240 Reg0 = Reg2; 241 SubReg0 = SubReg2; 242 } else if (HasDef && Reg0 == Reg2 && 243 MI.getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) { 244 Reg1IsKill = false; 245 Reg0 = Reg1; 246 SubReg0 = SubReg1; 247 } 248 249 MachineInstr *CommutedMI = nullptr; 250 if (NewMI) { 251 // Create a new instruction. 252 MachineFunction &MF = *MI.getMF(); 253 CommutedMI = MF.CloneMachineInstr(&MI); 254 } else { 255 CommutedMI = &MI; 256 } 257 258 if (HasDef) { 259 CommutedMI->getOperand(0).setReg(Reg0); 260 CommutedMI->getOperand(0).setSubReg(SubReg0); 261 for (unsigned Idx : UpdateImplicitDefIdx) 262 CommutedMI->getOperand(Idx).setReg(Reg0); 263 } 264 CommutedMI->getOperand(Idx2).setReg(Reg1); 265 CommutedMI->getOperand(Idx1).setReg(Reg2); 266 CommutedMI->getOperand(Idx2).setSubReg(SubReg1); 267 CommutedMI->getOperand(Idx1).setSubReg(SubReg2); 268 CommutedMI->getOperand(Idx2).setIsKill(Reg1IsKill); 269 CommutedMI->getOperand(Idx1).setIsKill(Reg2IsKill); 270 CommutedMI->getOperand(Idx2).setIsUndef(Reg1IsUndef); 271 CommutedMI->getOperand(Idx1).setIsUndef(Reg2IsUndef); 272 CommutedMI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal); 273 CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal); 274 // Avoid calling setIsRenamable for virtual registers since we assert that 275 // renamable property is only queried/set for physical registers. 276 if (Reg1.isPhysical()) 277 CommutedMI->getOperand(Idx2).setIsRenamable(Reg1IsRenamable); 278 if (Reg2.isPhysical()) 279 CommutedMI->getOperand(Idx1).setIsRenamable(Reg2IsRenamable); 280 return CommutedMI; 281 } 282 283 MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr &MI, bool NewMI, 284 unsigned OpIdx1, 285 unsigned OpIdx2) const { 286 // If OpIdx1 or OpIdx2 is not specified, then this method is free to choose 287 // any commutable operand, which is done in findCommutedOpIndices() method 288 // called below. 289 if ((OpIdx1 == CommuteAnyOperandIndex || OpIdx2 == CommuteAnyOperandIndex) && 290 !findCommutedOpIndices(MI, OpIdx1, OpIdx2)) { 291 assert(MI.isCommutable() && 292 "Precondition violation: MI must be commutable."); 293 return nullptr; 294 } 295 return commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 296 } 297 298 bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1, 299 unsigned &ResultIdx2, 300 unsigned CommutableOpIdx1, 301 unsigned CommutableOpIdx2) { 302 if (ResultIdx1 == CommuteAnyOperandIndex && 303 ResultIdx2 == CommuteAnyOperandIndex) { 304 ResultIdx1 = CommutableOpIdx1; 305 ResultIdx2 = CommutableOpIdx2; 306 } else if (ResultIdx1 == CommuteAnyOperandIndex) { 307 if (ResultIdx2 == CommutableOpIdx1) 308 ResultIdx1 = CommutableOpIdx2; 309 else if (ResultIdx2 == CommutableOpIdx2) 310 ResultIdx1 = CommutableOpIdx1; 311 else 312 return false; 313 } else if (ResultIdx2 == CommuteAnyOperandIndex) { 314 if (ResultIdx1 == CommutableOpIdx1) 315 ResultIdx2 = CommutableOpIdx2; 316 else if (ResultIdx1 == CommutableOpIdx2) 317 ResultIdx2 = CommutableOpIdx1; 318 else 319 return false; 320 } else 321 // Check that the result operand indices match the given commutable 322 // operand indices. 323 return (ResultIdx1 == CommutableOpIdx1 && ResultIdx2 == CommutableOpIdx2) || 324 (ResultIdx1 == CommutableOpIdx2 && ResultIdx2 == CommutableOpIdx1); 325 326 return true; 327 } 328 329 bool TargetInstrInfo::findCommutedOpIndices(const MachineInstr &MI, 330 unsigned &SrcOpIdx1, 331 unsigned &SrcOpIdx2) const { 332 assert(!MI.isBundle() && 333 "TargetInstrInfo::findCommutedOpIndices() can't handle bundles"); 334 335 const MCInstrDesc &MCID = MI.getDesc(); 336 if (!MCID.isCommutable()) 337 return false; 338 339 // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this 340 // is not true, then the target must implement this. 341 unsigned CommutableOpIdx1 = MCID.getNumDefs(); 342 unsigned CommutableOpIdx2 = CommutableOpIdx1 + 1; 343 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 344 CommutableOpIdx1, CommutableOpIdx2)) 345 return false; 346 347 if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg()) 348 // No idea. 349 return false; 350 return true; 351 } 352 353 bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { 354 if (!MI.isTerminator()) return false; 355 356 // Conditional branch is a special case. 357 if (MI.isBranch() && !MI.isBarrier()) 358 return true; 359 if (!MI.isPredicable()) 360 return true; 361 return !isPredicated(MI); 362 } 363 364 bool TargetInstrInfo::PredicateInstruction( 365 MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { 366 bool MadeChange = false; 367 368 assert(!MI.isBundle() && 369 "TargetInstrInfo::PredicateInstruction() can't handle bundles"); 370 371 const MCInstrDesc &MCID = MI.getDesc(); 372 if (!MI.isPredicable()) 373 return false; 374 375 for (unsigned j = 0, i = 0, e = MI.getNumOperands(); i != e; ++i) { 376 if (MCID.operands()[i].isPredicate()) { 377 MachineOperand &MO = MI.getOperand(i); 378 if (MO.isReg()) { 379 MO.setReg(Pred[j].getReg()); 380 MadeChange = true; 381 } else if (MO.isImm()) { 382 MO.setImm(Pred[j].getImm()); 383 MadeChange = true; 384 } else if (MO.isMBB()) { 385 MO.setMBB(Pred[j].getMBB()); 386 MadeChange = true; 387 } 388 ++j; 389 } 390 } 391 return MadeChange; 392 } 393 394 bool TargetInstrInfo::hasLoadFromStackSlot( 395 const MachineInstr &MI, 396 SmallVectorImpl<const MachineMemOperand *> &Accesses) const { 397 size_t StartSize = Accesses.size(); 398 for (MachineInstr::mmo_iterator o = MI.memoperands_begin(), 399 oe = MI.memoperands_end(); 400 o != oe; ++o) { 401 if ((*o)->isLoad() && 402 isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) 403 Accesses.push_back(*o); 404 } 405 return Accesses.size() != StartSize; 406 } 407 408 bool TargetInstrInfo::hasStoreToStackSlot( 409 const MachineInstr &MI, 410 SmallVectorImpl<const MachineMemOperand *> &Accesses) const { 411 size_t StartSize = Accesses.size(); 412 for (MachineInstr::mmo_iterator o = MI.memoperands_begin(), 413 oe = MI.memoperands_end(); 414 o != oe; ++o) { 415 if ((*o)->isStore() && 416 isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) 417 Accesses.push_back(*o); 418 } 419 return Accesses.size() != StartSize; 420 } 421 422 bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, 423 unsigned SubIdx, unsigned &Size, 424 unsigned &Offset, 425 const MachineFunction &MF) const { 426 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 427 if (!SubIdx) { 428 Size = TRI->getSpillSize(*RC); 429 Offset = 0; 430 return true; 431 } 432 unsigned BitSize = TRI->getSubRegIdxSize(SubIdx); 433 // Convert bit size to byte size. 434 if (BitSize % 8) 435 return false; 436 437 int BitOffset = TRI->getSubRegIdxOffset(SubIdx); 438 if (BitOffset < 0 || BitOffset % 8) 439 return false; 440 441 Size = BitSize / 8; 442 Offset = (unsigned)BitOffset / 8; 443 444 assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range"); 445 446 if (!MF.getDataLayout().isLittleEndian()) { 447 Offset = TRI->getSpillSize(*RC) - (Offset + Size); 448 } 449 return true; 450 } 451 452 void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB, 453 MachineBasicBlock::iterator I, 454 Register DestReg, unsigned SubIdx, 455 const MachineInstr &Orig, 456 const TargetRegisterInfo &TRI) const { 457 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); 458 MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI); 459 MBB.insert(I, MI); 460 } 461 462 bool TargetInstrInfo::produceSameValue(const MachineInstr &MI0, 463 const MachineInstr &MI1, 464 const MachineRegisterInfo *MRI) const { 465 return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); 466 } 467 468 MachineInstr & 469 TargetInstrInfo::duplicate(MachineBasicBlock &MBB, 470 MachineBasicBlock::iterator InsertBefore, 471 const MachineInstr &Orig) const { 472 MachineFunction &MF = *MBB.getParent(); 473 // CFI instructions are marked as non-duplicable, because Darwin compact 474 // unwind info emission can't handle multiple prologue setups. 475 assert((!Orig.isNotDuplicable() || 476 (!MF.getTarget().getTargetTriple().isOSDarwin() && 477 Orig.isCFIInstruction())) && 478 "Instruction cannot be duplicated"); 479 480 return MF.cloneMachineInstrBundle(MBB, InsertBefore, Orig); 481 } 482 483 // If the COPY instruction in MI can be folded to a stack operation, return 484 // the register class to use. 485 static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, 486 const TargetInstrInfo &TII, 487 unsigned FoldIdx) { 488 assert(TII.isCopyInstr(MI) && "MI must be a COPY instruction"); 489 if (MI.getNumOperands() != 2) 490 return nullptr; 491 assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); 492 493 const MachineOperand &FoldOp = MI.getOperand(FoldIdx); 494 const MachineOperand &LiveOp = MI.getOperand(1 - FoldIdx); 495 496 if (FoldOp.getSubReg() || LiveOp.getSubReg()) 497 return nullptr; 498 499 Register FoldReg = FoldOp.getReg(); 500 Register LiveReg = LiveOp.getReg(); 501 502 assert(FoldReg.isVirtual() && "Cannot fold physregs"); 503 504 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); 505 const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); 506 507 if (LiveOp.getReg().isPhysical()) 508 return RC->contains(LiveOp.getReg()) ? RC : nullptr; 509 510 if (RC->hasSubClassEq(MRI.getRegClass(LiveReg))) 511 return RC; 512 513 // FIXME: Allow folding when register classes are memory compatible. 514 return nullptr; 515 } 516 517 MCInst TargetInstrInfo::getNop() const { llvm_unreachable("Not implemented"); } 518 519 /// Try to remove the load by folding it to a register 520 /// operand at the use. We fold the load instructions if load defines a virtual 521 /// register, the virtual register is used once in the same BB, and the 522 /// instructions in-between do not load or store, and have no side effects. 523 MachineInstr *TargetInstrInfo::optimizeLoadInstr(MachineInstr &MI, 524 const MachineRegisterInfo *MRI, 525 Register &FoldAsLoadDefReg, 526 MachineInstr *&DefMI) const { 527 // Check whether we can move DefMI here. 528 DefMI = MRI->getVRegDef(FoldAsLoadDefReg); 529 assert(DefMI); 530 bool SawStore = false; 531 if (!DefMI->isSafeToMove(SawStore)) 532 return nullptr; 533 534 // Collect information about virtual register operands of MI. 535 SmallVector<unsigned, 1> SrcOperandIds; 536 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 537 MachineOperand &MO = MI.getOperand(i); 538 if (!MO.isReg()) 539 continue; 540 Register Reg = MO.getReg(); 541 if (Reg != FoldAsLoadDefReg) 542 continue; 543 // Do not fold if we have a subreg use or a def. 544 if (MO.getSubReg() || MO.isDef()) 545 return nullptr; 546 SrcOperandIds.push_back(i); 547 } 548 if (SrcOperandIds.empty()) 549 return nullptr; 550 551 // Check whether we can fold the def into SrcOperandId. 552 if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandIds, *DefMI)) { 553 FoldAsLoadDefReg = 0; 554 return FoldMI; 555 } 556 557 return nullptr; 558 } 559 560 std::pair<unsigned, unsigned> 561 TargetInstrInfo::getPatchpointUnfoldableRange(const MachineInstr &MI) const { 562 switch (MI.getOpcode()) { 563 case TargetOpcode::STACKMAP: 564 // StackMapLiveValues are foldable 565 return std::make_pair(0, StackMapOpers(&MI).getVarIdx()); 566 case TargetOpcode::PATCHPOINT: 567 // For PatchPoint, the call args are not foldable (even if reported in the 568 // stackmap e.g. via anyregcc). 569 return std::make_pair(0, PatchPointOpers(&MI).getVarIdx()); 570 case TargetOpcode::STATEPOINT: 571 // For statepoints, fold deopt and gc arguments, but not call arguments. 572 return std::make_pair(MI.getNumDefs(), StatepointOpers(&MI).getVarIdx()); 573 default: 574 llvm_unreachable("unexpected stackmap opcode"); 575 } 576 } 577 578 static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, 579 ArrayRef<unsigned> Ops, int FrameIndex, 580 const TargetInstrInfo &TII) { 581 unsigned StartIdx = 0; 582 unsigned NumDefs = 0; 583 // getPatchpointUnfoldableRange throws guarantee if MI is not a patchpoint. 584 std::tie(NumDefs, StartIdx) = TII.getPatchpointUnfoldableRange(MI); 585 586 unsigned DefToFoldIdx = MI.getNumOperands(); 587 588 // Return false if any operands requested for folding are not foldable (not 589 // part of the stackmap's live values). 590 for (unsigned Op : Ops) { 591 if (Op < NumDefs) { 592 assert(DefToFoldIdx == MI.getNumOperands() && "Folding multiple defs"); 593 DefToFoldIdx = Op; 594 } else if (Op < StartIdx) { 595 return nullptr; 596 } 597 if (MI.getOperand(Op).isTied()) 598 return nullptr; 599 } 600 601 MachineInstr *NewMI = 602 MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true); 603 MachineInstrBuilder MIB(MF, NewMI); 604 605 // No need to fold return, the meta data, and function arguments 606 for (unsigned i = 0; i < StartIdx; ++i) 607 if (i != DefToFoldIdx) 608 MIB.add(MI.getOperand(i)); 609 610 for (unsigned i = StartIdx, e = MI.getNumOperands(); i < e; ++i) { 611 MachineOperand &MO = MI.getOperand(i); 612 unsigned TiedTo = e; 613 (void)MI.isRegTiedToDefOperand(i, &TiedTo); 614 615 if (is_contained(Ops, i)) { 616 assert(TiedTo == e && "Cannot fold tied operands"); 617 unsigned SpillSize; 618 unsigned SpillOffset; 619 // Compute the spill slot size and offset. 620 const TargetRegisterClass *RC = 621 MF.getRegInfo().getRegClass(MO.getReg()); 622 bool Valid = 623 TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, SpillOffset, MF); 624 if (!Valid) 625 report_fatal_error("cannot spill patchpoint subregister operand"); 626 MIB.addImm(StackMaps::IndirectMemRefOp); 627 MIB.addImm(SpillSize); 628 MIB.addFrameIndex(FrameIndex); 629 MIB.addImm(SpillOffset); 630 } else { 631 MIB.add(MO); 632 if (TiedTo < e) { 633 assert(TiedTo < NumDefs && "Bad tied operand"); 634 if (TiedTo > DefToFoldIdx) 635 --TiedTo; 636 NewMI->tieOperands(TiedTo, NewMI->getNumOperands() - 1); 637 } 638 } 639 } 640 return NewMI; 641 } 642 643 static void foldInlineAsmMemOperand(MachineInstr *MI, unsigned OpNo, int FI, 644 const TargetInstrInfo &TII) { 645 // If the machine operand is tied, untie it first. 646 if (MI->getOperand(OpNo).isTied()) { 647 unsigned TiedTo = MI->findTiedOperandIdx(OpNo); 648 MI->untieRegOperand(OpNo); 649 // Intentional recursion! 650 foldInlineAsmMemOperand(MI, TiedTo, FI, TII); 651 } 652 653 SmallVector<MachineOperand, 5> NewOps; 654 TII.getFrameIndexOperands(NewOps, FI); 655 assert(!NewOps.empty() && "getFrameIndexOperands didn't create any operands"); 656 MI->removeOperand(OpNo); 657 MI->insert(MI->operands_begin() + OpNo, NewOps); 658 659 // Change the previous operand to a MemKind InlineAsm::Flag. The second param 660 // is the per-target number of operands that represent the memory operand 661 // excluding this one (MD). This includes MO. 662 InlineAsm::Flag F(InlineAsm::Kind::Mem, NewOps.size()); 663 F.setMemConstraint(InlineAsm::ConstraintCode::m); 664 MachineOperand &MD = MI->getOperand(OpNo - 1); 665 MD.setImm(F); 666 } 667 668 // Returns nullptr if not possible to fold. 669 static MachineInstr *foldInlineAsmMemOperand(MachineInstr &MI, 670 ArrayRef<unsigned> Ops, int FI, 671 const TargetInstrInfo &TII) { 672 assert(MI.isInlineAsm() && "wrong opcode"); 673 if (Ops.size() > 1) 674 return nullptr; 675 unsigned Op = Ops[0]; 676 assert(Op && "should never be first operand"); 677 assert(MI.getOperand(Op).isReg() && "shouldn't be folding non-reg operands"); 678 679 if (!MI.mayFoldInlineAsmRegOp(Op)) 680 return nullptr; 681 682 MachineInstr &NewMI = TII.duplicate(*MI.getParent(), MI.getIterator(), MI); 683 684 foldInlineAsmMemOperand(&NewMI, Op, FI, TII); 685 686 // Update mayload/maystore metadata, and memoperands. 687 const VirtRegInfo &RI = 688 AnalyzeVirtRegInBundle(MI, MI.getOperand(Op).getReg()); 689 MachineOperand &ExtraMO = NewMI.getOperand(InlineAsm::MIOp_ExtraInfo); 690 MachineMemOperand::Flags Flags = MachineMemOperand::MONone; 691 if (RI.Reads) { 692 ExtraMO.setImm(ExtraMO.getImm() | InlineAsm::Extra_MayLoad); 693 Flags |= MachineMemOperand::MOLoad; 694 } 695 if (RI.Writes) { 696 ExtraMO.setImm(ExtraMO.getImm() | InlineAsm::Extra_MayStore); 697 Flags |= MachineMemOperand::MOStore; 698 } 699 MachineFunction *MF = NewMI.getMF(); 700 const MachineFrameInfo &MFI = MF->getFrameInfo(); 701 MachineMemOperand *MMO = MF->getMachineMemOperand( 702 MachinePointerInfo::getFixedStack(*MF, FI), Flags, MFI.getObjectSize(FI), 703 MFI.getObjectAlign(FI)); 704 NewMI.addMemOperand(*MF, MMO); 705 706 return &NewMI; 707 } 708 709 MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, 710 ArrayRef<unsigned> Ops, int FI, 711 LiveIntervals *LIS, 712 VirtRegMap *VRM) const { 713 auto Flags = MachineMemOperand::MONone; 714 for (unsigned OpIdx : Ops) 715 Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore 716 : MachineMemOperand::MOLoad; 717 718 MachineBasicBlock *MBB = MI.getParent(); 719 assert(MBB && "foldMemoryOperand needs an inserted instruction"); 720 MachineFunction &MF = *MBB->getParent(); 721 722 // If we're not folding a load into a subreg, the size of the load is the 723 // size of the spill slot. But if we are, we need to figure out what the 724 // actual load size is. 725 int64_t MemSize = 0; 726 const MachineFrameInfo &MFI = MF.getFrameInfo(); 727 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 728 729 if (Flags & MachineMemOperand::MOStore) { 730 MemSize = MFI.getObjectSize(FI); 731 } else { 732 for (unsigned OpIdx : Ops) { 733 int64_t OpSize = MFI.getObjectSize(FI); 734 735 if (auto SubReg = MI.getOperand(OpIdx).getSubReg()) { 736 unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg); 737 if (SubRegSize > 0 && !(SubRegSize % 8)) 738 OpSize = SubRegSize / 8; 739 } 740 741 MemSize = std::max(MemSize, OpSize); 742 } 743 } 744 745 assert(MemSize && "Did not expect a zero-sized stack slot"); 746 747 MachineInstr *NewMI = nullptr; 748 749 if (MI.getOpcode() == TargetOpcode::STACKMAP || 750 MI.getOpcode() == TargetOpcode::PATCHPOINT || 751 MI.getOpcode() == TargetOpcode::STATEPOINT) { 752 // Fold stackmap/patchpoint. 753 NewMI = foldPatchpoint(MF, MI, Ops, FI, *this); 754 if (NewMI) 755 MBB->insert(MI, NewMI); 756 } else if (MI.isInlineAsm()) { 757 return foldInlineAsmMemOperand(MI, Ops, FI, *this); 758 } else { 759 // Ask the target to do the actual folding. 760 NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM); 761 } 762 763 if (NewMI) { 764 NewMI->setMemRefs(MF, MI.memoperands()); 765 // Add a memory operand, foldMemoryOperandImpl doesn't do that. 766 assert((!(Flags & MachineMemOperand::MOStore) || 767 NewMI->mayStore()) && 768 "Folded a def to a non-store!"); 769 assert((!(Flags & MachineMemOperand::MOLoad) || 770 NewMI->mayLoad()) && 771 "Folded a use to a non-load!"); 772 assert(MFI.getObjectOffset(FI) != -1); 773 MachineMemOperand *MMO = 774 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), 775 Flags, MemSize, MFI.getObjectAlign(FI)); 776 NewMI->addMemOperand(MF, MMO); 777 778 // The pass "x86 speculative load hardening" always attaches symbols to 779 // call instructions. We need copy it form old instruction. 780 NewMI->cloneInstrSymbols(MF, MI); 781 782 return NewMI; 783 } 784 785 // Straight COPY may fold as load/store. 786 if (!isCopyInstr(MI) || Ops.size() != 1) 787 return nullptr; 788 789 const TargetRegisterClass *RC = canFoldCopy(MI, *this, Ops[0]); 790 if (!RC) 791 return nullptr; 792 793 const MachineOperand &MO = MI.getOperand(1 - Ops[0]); 794 MachineBasicBlock::iterator Pos = MI; 795 796 if (Flags == MachineMemOperand::MOStore) 797 storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI, 798 Register()); 799 else 800 loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI, Register()); 801 return &*--Pos; 802 } 803 804 MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, 805 ArrayRef<unsigned> Ops, 806 MachineInstr &LoadMI, 807 LiveIntervals *LIS) const { 808 assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!"); 809 #ifndef NDEBUG 810 for (unsigned OpIdx : Ops) 811 assert(MI.getOperand(OpIdx).isUse() && "Folding load into def!"); 812 #endif 813 814 MachineBasicBlock &MBB = *MI.getParent(); 815 MachineFunction &MF = *MBB.getParent(); 816 817 // Ask the target to do the actual folding. 818 MachineInstr *NewMI = nullptr; 819 int FrameIndex = 0; 820 821 if ((MI.getOpcode() == TargetOpcode::STACKMAP || 822 MI.getOpcode() == TargetOpcode::PATCHPOINT || 823 MI.getOpcode() == TargetOpcode::STATEPOINT) && 824 isLoadFromStackSlot(LoadMI, FrameIndex)) { 825 // Fold stackmap/patchpoint. 826 NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this); 827 if (NewMI) 828 NewMI = &*MBB.insert(MI, NewMI); 829 } else if (MI.isInlineAsm() && isLoadFromStackSlot(LoadMI, FrameIndex)) { 830 return foldInlineAsmMemOperand(MI, Ops, FrameIndex, *this); 831 } else { 832 // Ask the target to do the actual folding. 833 NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS); 834 } 835 836 if (!NewMI) 837 return nullptr; 838 839 // Copy the memoperands from the load to the folded instruction. 840 if (MI.memoperands_empty()) { 841 NewMI->setMemRefs(MF, LoadMI.memoperands()); 842 } else { 843 // Handle the rare case of folding multiple loads. 844 NewMI->setMemRefs(MF, MI.memoperands()); 845 for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(), 846 E = LoadMI.memoperands_end(); 847 I != E; ++I) { 848 NewMI->addMemOperand(MF, *I); 849 } 850 } 851 return NewMI; 852 } 853 854 /// transferImplicitOperands - MI is a pseudo-instruction, and the lowered 855 /// replacement instructions immediately precede it. Copy any implicit 856 /// operands from MI to the replacement instruction. 857 static void transferImplicitOperands(MachineInstr *MI, 858 const TargetRegisterInfo *TRI) { 859 MachineBasicBlock::iterator CopyMI = MI; 860 --CopyMI; 861 862 Register DstReg = MI->getOperand(0).getReg(); 863 for (const MachineOperand &MO : MI->implicit_operands()) { 864 CopyMI->addOperand(MO); 865 866 // Be conservative about preserving kills when subregister defs are 867 // involved. If there was implicit kill of a super-register overlapping the 868 // copy result, we would kill the subregisters previous copies defined. 869 870 if (MO.isKill() && TRI->regsOverlap(DstReg, MO.getReg())) 871 CopyMI->getOperand(CopyMI->getNumOperands() - 1).setIsKill(false); 872 } 873 } 874 875 void TargetInstrInfo::lowerCopy(MachineInstr *MI, 876 const TargetRegisterInfo *TRI) const { 877 if (MI->allDefsAreDead()) { 878 MI->setDesc(get(TargetOpcode::KILL)); 879 return; 880 } 881 882 MachineOperand &DstMO = MI->getOperand(0); 883 MachineOperand &SrcMO = MI->getOperand(1); 884 885 bool IdentityCopy = (SrcMO.getReg() == DstMO.getReg()); 886 if (IdentityCopy || SrcMO.isUndef()) { 887 // No need to insert an identity copy instruction, but replace with a KILL 888 // if liveness is changed. 889 if (SrcMO.isUndef() || MI->getNumOperands() > 2) { 890 // We must make sure the super-register gets killed. Replace the 891 // instruction with KILL. 892 MI->setDesc(get(TargetOpcode::KILL)); 893 return; 894 } 895 // Vanilla identity copy. 896 MI->eraseFromParent(); 897 return; 898 } 899 900 copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), DstMO.getReg(), 901 SrcMO.getReg(), SrcMO.isKill(), 902 DstMO.getReg().isPhysical() ? DstMO.isRenamable() : false, 903 SrcMO.getReg().isPhysical() ? SrcMO.isRenamable() : false); 904 905 if (MI->getNumOperands() > 2) 906 transferImplicitOperands(MI, TRI); 907 MI->eraseFromParent(); 908 } 909 910 bool TargetInstrInfo::hasReassociableOperands( 911 const MachineInstr &Inst, const MachineBasicBlock *MBB) const { 912 const MachineOperand &Op1 = Inst.getOperand(1); 913 const MachineOperand &Op2 = Inst.getOperand(2); 914 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 915 916 // We need virtual register definitions for the operands that we will 917 // reassociate. 918 MachineInstr *MI1 = nullptr; 919 MachineInstr *MI2 = nullptr; 920 if (Op1.isReg() && Op1.getReg().isVirtual()) 921 MI1 = MRI.getUniqueVRegDef(Op1.getReg()); 922 if (Op2.isReg() && Op2.getReg().isVirtual()) 923 MI2 = MRI.getUniqueVRegDef(Op2.getReg()); 924 925 // And at least one operand must be defined in MBB. 926 return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB); 927 } 928 929 bool TargetInstrInfo::areOpcodesEqualOrInverse(unsigned Opcode1, 930 unsigned Opcode2) const { 931 return Opcode1 == Opcode2 || getInverseOpcode(Opcode1) == Opcode2; 932 } 933 934 bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst, 935 bool &Commuted) const { 936 const MachineBasicBlock *MBB = Inst.getParent(); 937 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 938 MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); 939 MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); 940 unsigned Opcode = Inst.getOpcode(); 941 942 // If only one operand has the same or inverse opcode and it's the second 943 // source operand, the operands must be commuted. 944 Commuted = !areOpcodesEqualOrInverse(Opcode, MI1->getOpcode()) && 945 areOpcodesEqualOrInverse(Opcode, MI2->getOpcode()); 946 if (Commuted) 947 std::swap(MI1, MI2); 948 949 // 1. The previous instruction must be the same type as Inst. 950 // 2. The previous instruction must also be associative/commutative or be the 951 // inverse of such an operation (this can be different even for 952 // instructions with the same opcode if traits like fast-math-flags are 953 // included). 954 // 3. The previous instruction must have virtual register definitions for its 955 // operands in the same basic block as Inst. 956 // 4. The previous instruction's result must only be used by Inst. 957 return areOpcodesEqualOrInverse(Opcode, MI1->getOpcode()) && 958 (isAssociativeAndCommutative(*MI1) || 959 isAssociativeAndCommutative(*MI1, /* Invert */ true)) && 960 hasReassociableOperands(*MI1, MBB) && 961 MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()); 962 } 963 964 // 1. The operation must be associative and commutative or be the inverse of 965 // such an operation. 966 // 2. The instruction must have virtual register definitions for its 967 // operands in the same basic block. 968 // 3. The instruction must have a reassociable sibling. 969 bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst, 970 bool &Commuted) const { 971 return (isAssociativeAndCommutative(Inst) || 972 isAssociativeAndCommutative(Inst, /* Invert */ true)) && 973 hasReassociableOperands(Inst, Inst.getParent()) && 974 hasReassociableSibling(Inst, Commuted); 975 } 976 977 // Utility routine that checks if \param MO is defined by an 978 // \param CombineOpc instruction in the basic block \param MBB. 979 // If \param CombineOpc is not provided, the OpCode check will 980 // be skipped. 981 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, 982 unsigned CombineOpc = 0) { 983 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 984 MachineInstr *MI = nullptr; 985 986 if (MO.isReg() && MO.getReg().isVirtual()) 987 MI = MRI.getUniqueVRegDef(MO.getReg()); 988 // And it needs to be in the trace (otherwise, it won't have a depth). 989 if (!MI || MI->getParent() != &MBB || 990 ((unsigned)MI->getOpcode() != CombineOpc && CombineOpc != 0)) 991 return false; 992 // Must only used by the user we combine with. 993 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 994 return false; 995 996 return true; 997 } 998 999 // A chain of accumulation instructions will be selected IFF: 1000 // 1. All the accumulation instructions in the chain have the same opcode, 1001 // besides the first that has a slightly different opcode because it does 1002 // not accumulate into a register. 1003 // 2. All the instructions in the chain are combinable (have a single use 1004 // which itself is part of the chain). 1005 // 3. Meets the required minimum length. 1006 void TargetInstrInfo::getAccumulatorChain( 1007 MachineInstr *CurrentInstr, SmallVectorImpl<Register> &Chain) const { 1008 // Walk up the chain of accumulation instructions and collect them in the 1009 // vector. 1010 MachineBasicBlock &MBB = *CurrentInstr->getParent(); 1011 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 1012 unsigned AccumulatorOpcode = CurrentInstr->getOpcode(); 1013 std::optional<unsigned> ChainStartOpCode = 1014 getAccumulationStartOpcode(AccumulatorOpcode); 1015 1016 if (!ChainStartOpCode.has_value()) 1017 return; 1018 1019 // Push the first accumulator result to the start of the chain. 1020 Chain.push_back(CurrentInstr->getOperand(0).getReg()); 1021 1022 // Collect the accumulator input register from all instructions in the chain. 1023 while (CurrentInstr && 1024 canCombine(MBB, CurrentInstr->getOperand(1), AccumulatorOpcode)) { 1025 Chain.push_back(CurrentInstr->getOperand(1).getReg()); 1026 CurrentInstr = MRI.getUniqueVRegDef(CurrentInstr->getOperand(1).getReg()); 1027 } 1028 1029 // Add the instruction at the top of the chain. 1030 if (CurrentInstr->getOpcode() == AccumulatorOpcode && 1031 canCombine(MBB, CurrentInstr->getOperand(1))) 1032 Chain.push_back(CurrentInstr->getOperand(1).getReg()); 1033 } 1034 1035 /// Find chains of accumulations that can be rewritten as a tree for increased 1036 /// ILP. 1037 bool TargetInstrInfo::getAccumulatorReassociationPatterns( 1038 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns) const { 1039 if (!EnableAccReassociation) 1040 return false; 1041 1042 unsigned Opc = Root.getOpcode(); 1043 if (!isAccumulationOpcode(Opc)) 1044 return false; 1045 1046 // Verify that this is the end of the chain. 1047 MachineBasicBlock &MBB = *Root.getParent(); 1048 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 1049 if (!MRI.hasOneNonDBGUser(Root.getOperand(0).getReg())) 1050 return false; 1051 1052 auto User = MRI.use_instr_begin(Root.getOperand(0).getReg()); 1053 if (User->getOpcode() == Opc) 1054 return false; 1055 1056 // Walk up the use chain and collect the reduction chain. 1057 SmallVector<Register, 32> Chain; 1058 getAccumulatorChain(&Root, Chain); 1059 1060 // Reject chains which are too short to be worth modifying. 1061 if (Chain.size() < MinAccumulatorDepth) 1062 return false; 1063 1064 // Check if the MBB this instruction is a part of contains any other chains. 1065 // If so, don't apply it. 1066 SmallSet<Register, 32> ReductionChain(llvm::from_range, Chain); 1067 for (const auto &I : MBB) { 1068 if (I.getOpcode() == Opc && 1069 !ReductionChain.contains(I.getOperand(0).getReg())) 1070 return false; 1071 } 1072 1073 Patterns.push_back(MachineCombinerPattern::ACC_CHAIN); 1074 return true; 1075 } 1076 1077 // Reduce branches of the accumulator tree by adding them together. 1078 void TargetInstrInfo::reduceAccumulatorTree( 1079 SmallVectorImpl<Register> &RegistersToReduce, 1080 SmallVectorImpl<MachineInstr *> &InsInstrs, MachineFunction &MF, 1081 MachineInstr &Root, MachineRegisterInfo &MRI, 1082 DenseMap<Register, unsigned> &InstrIdxForVirtReg, 1083 Register ResultReg) const { 1084 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 1085 SmallVector<Register, 8> NewRegs; 1086 1087 // Get the opcode for the reduction instruction we will need to build. 1088 // If for some reason it is not defined, early exit and don't apply this. 1089 unsigned ReduceOpCode = getReduceOpcodeForAccumulator(Root.getOpcode()); 1090 1091 for (unsigned int i = 1; i <= (RegistersToReduce.size() / 2); i += 2) { 1092 auto RHS = RegistersToReduce[i - 1]; 1093 auto LHS = RegistersToReduce[i]; 1094 Register Dest; 1095 // If we are reducing 2 registers, reuse the original result register. 1096 if (RegistersToReduce.size() == 2) 1097 Dest = ResultReg; 1098 // Otherwise, create a new virtual register to hold the partial sum. 1099 else { 1100 auto NewVR = MRI.createVirtualRegister( 1101 MRI.getRegClass(Root.getOperand(0).getReg())); 1102 Dest = NewVR; 1103 NewRegs.push_back(Dest); 1104 InstrIdxForVirtReg.insert(std::make_pair(Dest, InsInstrs.size())); 1105 } 1106 1107 // Create the new reduction instruction. 1108 MachineInstrBuilder MIB = 1109 BuildMI(MF, MIMetadata(Root), TII->get(ReduceOpCode), Dest) 1110 .addReg(RHS, getKillRegState(true)) 1111 .addReg(LHS, getKillRegState(true)); 1112 // Copy any flags needed from the original instruction. 1113 MIB->setFlags(Root.getFlags()); 1114 InsInstrs.push_back(MIB); 1115 } 1116 1117 // If the number of registers to reduce is odd, add the remaining register to 1118 // the vector of registers to reduce. 1119 if (RegistersToReduce.size() % 2 != 0) 1120 NewRegs.push_back(RegistersToReduce[RegistersToReduce.size() - 1]); 1121 1122 RegistersToReduce = NewRegs; 1123 } 1124 1125 // The concept of the reassociation pass is that these operations can benefit 1126 // from this kind of transformation: 1127 // 1128 // A = ? op ? 1129 // B = A op X (Prev) 1130 // C = B op Y (Root) 1131 // --> 1132 // A = ? op ? 1133 // B = X op Y 1134 // C = A op B 1135 // 1136 // breaking the dependency between A and B, allowing them to be executed in 1137 // parallel (or back-to-back in a pipeline) instead of depending on each other. 1138 1139 // FIXME: This has the potential to be expensive (compile time) while not 1140 // improving the code at all. Some ways to limit the overhead: 1141 // 1. Track successful transforms; bail out if hit rate gets too low. 1142 // 2. Only enable at -O3 or some other non-default optimization level. 1143 // 3. Pre-screen pattern candidates here: if an operand of the previous 1144 // instruction is known to not increase the critical path, then don't match 1145 // that pattern. 1146 bool TargetInstrInfo::getMachineCombinerPatterns( 1147 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns, 1148 bool DoRegPressureReduce) const { 1149 bool Commute; 1150 if (isReassociationCandidate(Root, Commute)) { 1151 // We found a sequence of instructions that may be suitable for a 1152 // reassociation of operands to increase ILP. Specify each commutation 1153 // possibility for the Prev instruction in the sequence and let the 1154 // machine combiner decide if changing the operands is worthwhile. 1155 if (Commute) { 1156 Patterns.push_back(MachineCombinerPattern::REASSOC_AX_YB); 1157 Patterns.push_back(MachineCombinerPattern::REASSOC_XA_YB); 1158 } else { 1159 Patterns.push_back(MachineCombinerPattern::REASSOC_AX_BY); 1160 Patterns.push_back(MachineCombinerPattern::REASSOC_XA_BY); 1161 } 1162 return true; 1163 } 1164 if (getAccumulatorReassociationPatterns(Root, Patterns)) 1165 return true; 1166 1167 return false; 1168 } 1169 1170 /// Return true when a code sequence can improve loop throughput. 1171 bool TargetInstrInfo::isThroughputPattern(unsigned Pattern) const { 1172 return false; 1173 } 1174 1175 CombinerObjective 1176 TargetInstrInfo::getCombinerObjective(unsigned Pattern) const { 1177 switch (Pattern) { 1178 case MachineCombinerPattern::ACC_CHAIN: 1179 return CombinerObjective::MustReduceDepth; 1180 default: 1181 return CombinerObjective::Default; 1182 } 1183 } 1184 1185 std::pair<unsigned, unsigned> 1186 TargetInstrInfo::getReassociationOpcodes(unsigned Pattern, 1187 const MachineInstr &Root, 1188 const MachineInstr &Prev) const { 1189 bool AssocCommutRoot = isAssociativeAndCommutative(Root); 1190 bool AssocCommutPrev = isAssociativeAndCommutative(Prev); 1191 1192 // Early exit if both opcodes are associative and commutative. It's a trivial 1193 // reassociation when we only change operands order. In this case opcodes are 1194 // not required to have inverse versions. 1195 if (AssocCommutRoot && AssocCommutPrev) { 1196 assert(Root.getOpcode() == Prev.getOpcode() && "Expected to be equal"); 1197 return std::make_pair(Root.getOpcode(), Root.getOpcode()); 1198 } 1199 1200 // At least one instruction is not associative or commutative. 1201 // Since we have matched one of the reassociation patterns, we expect that the 1202 // instructions' opcodes are equal or one of them is the inversion of the 1203 // other. 1204 assert(areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode()) && 1205 "Incorrectly matched pattern"); 1206 unsigned AssocCommutOpcode = Root.getOpcode(); 1207 unsigned InverseOpcode = *getInverseOpcode(Root.getOpcode()); 1208 if (!AssocCommutRoot) 1209 std::swap(AssocCommutOpcode, InverseOpcode); 1210 1211 // The transformation rule (`+` is any associative and commutative binary 1212 // operation, `-` is the inverse): 1213 // REASSOC_AX_BY: 1214 // (A + X) + Y => A + (X + Y) 1215 // (A + X) - Y => A + (X - Y) 1216 // (A - X) + Y => A - (X - Y) 1217 // (A - X) - Y => A - (X + Y) 1218 // REASSOC_XA_BY: 1219 // (X + A) + Y => (X + Y) + A 1220 // (X + A) - Y => (X - Y) + A 1221 // (X - A) + Y => (X + Y) - A 1222 // (X - A) - Y => (X - Y) - A 1223 // REASSOC_AX_YB: 1224 // Y + (A + X) => (Y + X) + A 1225 // Y - (A + X) => (Y - X) - A 1226 // Y + (A - X) => (Y - X) + A 1227 // Y - (A - X) => (Y + X) - A 1228 // REASSOC_XA_YB: 1229 // Y + (X + A) => (Y + X) + A 1230 // Y - (X + A) => (Y - X) - A 1231 // Y + (X - A) => (Y + X) - A 1232 // Y - (X - A) => (Y - X) + A 1233 switch (Pattern) { 1234 default: 1235 llvm_unreachable("Unexpected pattern"); 1236 case MachineCombinerPattern::REASSOC_AX_BY: 1237 if (!AssocCommutRoot && AssocCommutPrev) 1238 return {AssocCommutOpcode, InverseOpcode}; 1239 if (AssocCommutRoot && !AssocCommutPrev) 1240 return {InverseOpcode, InverseOpcode}; 1241 if (!AssocCommutRoot && !AssocCommutPrev) 1242 return {InverseOpcode, AssocCommutOpcode}; 1243 break; 1244 case MachineCombinerPattern::REASSOC_XA_BY: 1245 if (!AssocCommutRoot && AssocCommutPrev) 1246 return {AssocCommutOpcode, InverseOpcode}; 1247 if (AssocCommutRoot && !AssocCommutPrev) 1248 return {InverseOpcode, AssocCommutOpcode}; 1249 if (!AssocCommutRoot && !AssocCommutPrev) 1250 return {InverseOpcode, InverseOpcode}; 1251 break; 1252 case MachineCombinerPattern::REASSOC_AX_YB: 1253 if (!AssocCommutRoot && AssocCommutPrev) 1254 return {InverseOpcode, InverseOpcode}; 1255 if (AssocCommutRoot && !AssocCommutPrev) 1256 return {AssocCommutOpcode, InverseOpcode}; 1257 if (!AssocCommutRoot && !AssocCommutPrev) 1258 return {InverseOpcode, AssocCommutOpcode}; 1259 break; 1260 case MachineCombinerPattern::REASSOC_XA_YB: 1261 if (!AssocCommutRoot && AssocCommutPrev) 1262 return {InverseOpcode, InverseOpcode}; 1263 if (AssocCommutRoot && !AssocCommutPrev) 1264 return {InverseOpcode, AssocCommutOpcode}; 1265 if (!AssocCommutRoot && !AssocCommutPrev) 1266 return {AssocCommutOpcode, InverseOpcode}; 1267 break; 1268 } 1269 llvm_unreachable("Unhandled combination"); 1270 } 1271 1272 // Return a pair of boolean flags showing if the new root and new prev operands 1273 // must be swapped. See visual example of the rule in 1274 // TargetInstrInfo::getReassociationOpcodes. 1275 static std::pair<bool, bool> mustSwapOperands(unsigned Pattern) { 1276 switch (Pattern) { 1277 default: 1278 llvm_unreachable("Unexpected pattern"); 1279 case MachineCombinerPattern::REASSOC_AX_BY: 1280 return {false, false}; 1281 case MachineCombinerPattern::REASSOC_XA_BY: 1282 return {true, false}; 1283 case MachineCombinerPattern::REASSOC_AX_YB: 1284 return {true, true}; 1285 case MachineCombinerPattern::REASSOC_XA_YB: 1286 return {true, true}; 1287 } 1288 } 1289 1290 void TargetInstrInfo::getReassociateOperandIndices( 1291 const MachineInstr &Root, unsigned Pattern, 1292 std::array<unsigned, 5> &OperandIndices) const { 1293 switch (Pattern) { 1294 case MachineCombinerPattern::REASSOC_AX_BY: 1295 OperandIndices = {1, 1, 1, 2, 2}; 1296 break; 1297 case MachineCombinerPattern::REASSOC_AX_YB: 1298 OperandIndices = {2, 1, 2, 2, 1}; 1299 break; 1300 case MachineCombinerPattern::REASSOC_XA_BY: 1301 OperandIndices = {1, 2, 1, 1, 2}; 1302 break; 1303 case MachineCombinerPattern::REASSOC_XA_YB: 1304 OperandIndices = {2, 2, 2, 1, 1}; 1305 break; 1306 default: 1307 llvm_unreachable("unexpected MachineCombinerPattern"); 1308 } 1309 } 1310 1311 /// Attempt the reassociation transformation to reduce critical path length. 1312 /// See the above comments before getMachineCombinerPatterns(). 1313 void TargetInstrInfo::reassociateOps( 1314 MachineInstr &Root, MachineInstr &Prev, unsigned Pattern, 1315 SmallVectorImpl<MachineInstr *> &InsInstrs, 1316 SmallVectorImpl<MachineInstr *> &DelInstrs, 1317 ArrayRef<unsigned> OperandIndices, 1318 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const { 1319 MachineFunction *MF = Root.getMF(); 1320 MachineRegisterInfo &MRI = MF->getRegInfo(); 1321 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 1322 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1323 const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); 1324 1325 MachineOperand &OpA = Prev.getOperand(OperandIndices[1]); 1326 MachineOperand &OpB = Root.getOperand(OperandIndices[2]); 1327 MachineOperand &OpX = Prev.getOperand(OperandIndices[3]); 1328 MachineOperand &OpY = Root.getOperand(OperandIndices[4]); 1329 MachineOperand &OpC = Root.getOperand(0); 1330 1331 Register RegA = OpA.getReg(); 1332 Register RegB = OpB.getReg(); 1333 Register RegX = OpX.getReg(); 1334 Register RegY = OpY.getReg(); 1335 Register RegC = OpC.getReg(); 1336 1337 if (RegA.isVirtual()) 1338 MRI.constrainRegClass(RegA, RC); 1339 if (RegB.isVirtual()) 1340 MRI.constrainRegClass(RegB, RC); 1341 if (RegX.isVirtual()) 1342 MRI.constrainRegClass(RegX, RC); 1343 if (RegY.isVirtual()) 1344 MRI.constrainRegClass(RegY, RC); 1345 if (RegC.isVirtual()) 1346 MRI.constrainRegClass(RegC, RC); 1347 1348 // Create a new virtual register for the result of (X op Y) instead of 1349 // recycling RegB because the MachineCombiner's computation of the critical 1350 // path requires a new register definition rather than an existing one. 1351 Register NewVR = MRI.createVirtualRegister(RC); 1352 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 1353 1354 auto [NewRootOpc, NewPrevOpc] = getReassociationOpcodes(Pattern, Root, Prev); 1355 bool KillA = OpA.isKill(); 1356 bool KillX = OpX.isKill(); 1357 bool KillY = OpY.isKill(); 1358 bool KillNewVR = true; 1359 1360 auto [SwapRootOperands, SwapPrevOperands] = mustSwapOperands(Pattern); 1361 1362 if (SwapPrevOperands) { 1363 std::swap(RegX, RegY); 1364 std::swap(KillX, KillY); 1365 } 1366 1367 unsigned PrevFirstOpIdx, PrevSecondOpIdx; 1368 unsigned RootFirstOpIdx, RootSecondOpIdx; 1369 switch (Pattern) { 1370 case MachineCombinerPattern::REASSOC_AX_BY: 1371 PrevFirstOpIdx = OperandIndices[1]; 1372 PrevSecondOpIdx = OperandIndices[3]; 1373 RootFirstOpIdx = OperandIndices[2]; 1374 RootSecondOpIdx = OperandIndices[4]; 1375 break; 1376 case MachineCombinerPattern::REASSOC_AX_YB: 1377 PrevFirstOpIdx = OperandIndices[1]; 1378 PrevSecondOpIdx = OperandIndices[3]; 1379 RootFirstOpIdx = OperandIndices[4]; 1380 RootSecondOpIdx = OperandIndices[2]; 1381 break; 1382 case MachineCombinerPattern::REASSOC_XA_BY: 1383 PrevFirstOpIdx = OperandIndices[3]; 1384 PrevSecondOpIdx = OperandIndices[1]; 1385 RootFirstOpIdx = OperandIndices[2]; 1386 RootSecondOpIdx = OperandIndices[4]; 1387 break; 1388 case MachineCombinerPattern::REASSOC_XA_YB: 1389 PrevFirstOpIdx = OperandIndices[3]; 1390 PrevSecondOpIdx = OperandIndices[1]; 1391 RootFirstOpIdx = OperandIndices[4]; 1392 RootSecondOpIdx = OperandIndices[2]; 1393 break; 1394 default: 1395 llvm_unreachable("unexpected MachineCombinerPattern"); 1396 } 1397 1398 // Basically BuildMI but doesn't add implicit operands by default. 1399 auto buildMINoImplicit = [](MachineFunction &MF, const MIMetadata &MIMD, 1400 const MCInstrDesc &MCID, Register DestReg) { 1401 return MachineInstrBuilder( 1402 MF, MF.CreateMachineInstr(MCID, MIMD.getDL(), /*NoImpl=*/true)) 1403 .setPCSections(MIMD.getPCSections()) 1404 .addReg(DestReg, RegState::Define); 1405 }; 1406 1407 // Create new instructions for insertion. 1408 MachineInstrBuilder MIB1 = 1409 buildMINoImplicit(*MF, MIMetadata(Prev), TII->get(NewPrevOpc), NewVR); 1410 for (const auto &MO : Prev.explicit_operands()) { 1411 unsigned Idx = MO.getOperandNo(); 1412 // Skip the result operand we'd already added. 1413 if (Idx == 0) 1414 continue; 1415 if (Idx == PrevFirstOpIdx) 1416 MIB1.addReg(RegX, getKillRegState(KillX)); 1417 else if (Idx == PrevSecondOpIdx) 1418 MIB1.addReg(RegY, getKillRegState(KillY)); 1419 else 1420 MIB1.add(MO); 1421 } 1422 MIB1.copyImplicitOps(Prev); 1423 1424 if (SwapRootOperands) { 1425 std::swap(RegA, NewVR); 1426 std::swap(KillA, KillNewVR); 1427 } 1428 1429 MachineInstrBuilder MIB2 = 1430 buildMINoImplicit(*MF, MIMetadata(Root), TII->get(NewRootOpc), RegC); 1431 for (const auto &MO : Root.explicit_operands()) { 1432 unsigned Idx = MO.getOperandNo(); 1433 // Skip the result operand. 1434 if (Idx == 0) 1435 continue; 1436 if (Idx == RootFirstOpIdx) 1437 MIB2 = MIB2.addReg(RegA, getKillRegState(KillA)); 1438 else if (Idx == RootSecondOpIdx) 1439 MIB2 = MIB2.addReg(NewVR, getKillRegState(KillNewVR)); 1440 else 1441 MIB2 = MIB2.add(MO); 1442 } 1443 MIB2.copyImplicitOps(Root); 1444 1445 // Propagate FP flags from the original instructions. 1446 // But clear poison-generating flags because those may not be valid now. 1447 // TODO: There should be a helper function for copying only fast-math-flags. 1448 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags(); 1449 MIB1->setFlags(IntersectedFlags); 1450 MIB1->clearFlag(MachineInstr::MIFlag::NoSWrap); 1451 MIB1->clearFlag(MachineInstr::MIFlag::NoUWrap); 1452 MIB1->clearFlag(MachineInstr::MIFlag::IsExact); 1453 1454 MIB2->setFlags(IntersectedFlags); 1455 MIB2->clearFlag(MachineInstr::MIFlag::NoSWrap); 1456 MIB2->clearFlag(MachineInstr::MIFlag::NoUWrap); 1457 MIB2->clearFlag(MachineInstr::MIFlag::IsExact); 1458 1459 setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); 1460 1461 // Record new instructions for insertion and old instructions for deletion. 1462 InsInstrs.push_back(MIB1); 1463 InsInstrs.push_back(MIB2); 1464 DelInstrs.push_back(&Prev); 1465 DelInstrs.push_back(&Root); 1466 1467 // We transformed: 1468 // B = A op X (Prev) 1469 // C = B op Y (Root) 1470 // Into: 1471 // B = X op Y (MIB1) 1472 // C = A op B (MIB2) 1473 // C has the same value as before, B doesn't; as such, keep the debug number 1474 // of C but not of B. 1475 if (unsigned OldRootNum = Root.peekDebugInstrNum()) 1476 MIB2.getInstr()->setDebugInstrNum(OldRootNum); 1477 } 1478 1479 void TargetInstrInfo::genAlternativeCodeSequence( 1480 MachineInstr &Root, unsigned Pattern, 1481 SmallVectorImpl<MachineInstr *> &InsInstrs, 1482 SmallVectorImpl<MachineInstr *> &DelInstrs, 1483 DenseMap<Register, unsigned> &InstIdxForVirtReg) const { 1484 MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); 1485 MachineBasicBlock &MBB = *Root.getParent(); 1486 MachineFunction &MF = *MBB.getParent(); 1487 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 1488 1489 switch (Pattern) { 1490 case MachineCombinerPattern::REASSOC_AX_BY: 1491 case MachineCombinerPattern::REASSOC_AX_YB: 1492 case MachineCombinerPattern::REASSOC_XA_BY: 1493 case MachineCombinerPattern::REASSOC_XA_YB: { 1494 // Select the previous instruction in the sequence based on the input 1495 // pattern. 1496 std::array<unsigned, 5> OperandIndices; 1497 getReassociateOperandIndices(Root, Pattern, OperandIndices); 1498 MachineInstr *Prev = 1499 MRI.getUniqueVRegDef(Root.getOperand(OperandIndices[0]).getReg()); 1500 1501 // Don't reassociate if Prev and Root are in different blocks. 1502 if (Prev->getParent() != Root.getParent()) 1503 return; 1504 1505 reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, OperandIndices, 1506 InstIdxForVirtReg); 1507 break; 1508 } 1509 case MachineCombinerPattern::ACC_CHAIN: { 1510 SmallVector<Register, 32> ChainRegs; 1511 getAccumulatorChain(&Root, ChainRegs); 1512 unsigned int Depth = ChainRegs.size(); 1513 assert(MaxAccumulatorWidth > 1 && 1514 "Max accumulator width set to illegal value"); 1515 unsigned int MaxWidth = Log2_32(Depth) < MaxAccumulatorWidth 1516 ? Log2_32(Depth) 1517 : MaxAccumulatorWidth; 1518 1519 // Walk down the chain and rewrite it as a tree. 1520 for (auto IndexedReg : llvm::enumerate(llvm::reverse(ChainRegs))) { 1521 // No need to rewrite the first node, it is already perfect as it is. 1522 if (IndexedReg.index() == 0) 1523 continue; 1524 1525 MachineInstr *Instr = MRI.getUniqueVRegDef(IndexedReg.value()); 1526 MachineInstrBuilder MIB; 1527 Register AccReg; 1528 if (IndexedReg.index() < MaxWidth) { 1529 // Now we need to create new instructions for the first row. 1530 AccReg = Instr->getOperand(0).getReg(); 1531 unsigned OpCode = getAccumulationStartOpcode(Root.getOpcode()); 1532 1533 MIB = BuildMI(MF, MIMetadata(*Instr), TII->get(OpCode), AccReg) 1534 .addReg(Instr->getOperand(2).getReg(), 1535 getKillRegState(Instr->getOperand(2).isKill())) 1536 .addReg(Instr->getOperand(3).getReg(), 1537 getKillRegState(Instr->getOperand(3).isKill())); 1538 } else { 1539 // For the remaining cases, we need to use an output register of one of 1540 // the newly inserted instuctions as operand 1 1541 AccReg = Instr->getOperand(0).getReg() == Root.getOperand(0).getReg() 1542 ? MRI.createVirtualRegister( 1543 MRI.getRegClass(Root.getOperand(0).getReg())) 1544 : Instr->getOperand(0).getReg(); 1545 assert(IndexedReg.index() >= MaxWidth); 1546 auto AccumulatorInput = 1547 ChainRegs[Depth - (IndexedReg.index() - MaxWidth) - 1]; 1548 MIB = BuildMI(MF, MIMetadata(*Instr), TII->get(Instr->getOpcode()), 1549 AccReg) 1550 .addReg(AccumulatorInput, getKillRegState(true)) 1551 .addReg(Instr->getOperand(2).getReg(), 1552 getKillRegState(Instr->getOperand(2).isKill())) 1553 .addReg(Instr->getOperand(3).getReg(), 1554 getKillRegState(Instr->getOperand(3).isKill())); 1555 } 1556 1557 MIB->setFlags(Instr->getFlags()); 1558 InstIdxForVirtReg.insert(std::make_pair(AccReg, InsInstrs.size())); 1559 InsInstrs.push_back(MIB); 1560 DelInstrs.push_back(Instr); 1561 } 1562 1563 SmallVector<Register, 8> RegistersToReduce; 1564 for (unsigned i = (InsInstrs.size() - MaxWidth); i < InsInstrs.size(); 1565 ++i) { 1566 auto Reg = InsInstrs[i]->getOperand(0).getReg(); 1567 RegistersToReduce.push_back(Reg); 1568 } 1569 1570 while (RegistersToReduce.size() > 1) 1571 reduceAccumulatorTree(RegistersToReduce, InsInstrs, MF, Root, MRI, 1572 InstIdxForVirtReg, Root.getOperand(0).getReg()); 1573 1574 break; 1575 } 1576 } 1577 } 1578 1579 MachineTraceStrategy TargetInstrInfo::getMachineCombinerTraceStrategy() const { 1580 return MachineTraceStrategy::TS_MinInstrCount; 1581 } 1582 1583 bool TargetInstrInfo::isReallyTriviallyReMaterializable( 1584 const MachineInstr &MI) const { 1585 const MachineFunction &MF = *MI.getMF(); 1586 const MachineRegisterInfo &MRI = MF.getRegInfo(); 1587 1588 // Remat clients assume operand 0 is the defined register. 1589 if (!MI.getNumOperands() || !MI.getOperand(0).isReg()) 1590 return false; 1591 Register DefReg = MI.getOperand(0).getReg(); 1592 1593 // A sub-register definition can only be rematerialized if the instruction 1594 // doesn't read the other parts of the register. Otherwise it is really a 1595 // read-modify-write operation on the full virtual register which cannot be 1596 // moved safely. 1597 if (DefReg.isVirtual() && MI.getOperand(0).getSubReg() && 1598 MI.readsVirtualRegister(DefReg)) 1599 return false; 1600 1601 // A load from a fixed stack slot can be rematerialized. This may be 1602 // redundant with subsequent checks, but it's target-independent, 1603 // simple, and a common case. 1604 int FrameIdx = 0; 1605 if (isLoadFromStackSlot(MI, FrameIdx) && 1606 MF.getFrameInfo().isImmutableObjectIndex(FrameIdx)) 1607 return true; 1608 1609 // Avoid instructions obviously unsafe for remat. 1610 if (MI.isNotDuplicable() || MI.mayStore() || MI.mayRaiseFPException() || 1611 MI.hasUnmodeledSideEffects()) 1612 return false; 1613 1614 // Don't remat inline asm. We have no idea how expensive it is 1615 // even if it's side effect free. 1616 if (MI.isInlineAsm()) 1617 return false; 1618 1619 // Avoid instructions which load from potentially varying memory. 1620 if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad()) 1621 return false; 1622 1623 // If any of the registers accessed are non-constant, conservatively assume 1624 // the instruction is not rematerializable. 1625 for (const MachineOperand &MO : MI.operands()) { 1626 if (!MO.isReg()) continue; 1627 Register Reg = MO.getReg(); 1628 if (Reg == 0) 1629 continue; 1630 1631 // Check for a well-behaved physical register. 1632 if (Reg.isPhysical()) { 1633 if (MO.isUse()) { 1634 // If the physreg has no defs anywhere, it's just an ambient register 1635 // and we can freely move its uses. Alternatively, if it's allocatable, 1636 // it could get allocated to something with a def during allocation. 1637 if (!MRI.isConstantPhysReg(Reg)) 1638 return false; 1639 } else { 1640 // A physreg def. We can't remat it. 1641 return false; 1642 } 1643 continue; 1644 } 1645 1646 // Only allow one virtual-register def. There may be multiple defs of the 1647 // same virtual register, though. 1648 if (MO.isDef() && Reg != DefReg) 1649 return false; 1650 1651 // Don't allow any virtual-register uses. Rematting an instruction with 1652 // virtual register uses would length the live ranges of the uses, which 1653 // is not necessarily a good idea, certainly not "trivial". 1654 if (MO.isUse()) 1655 return false; 1656 } 1657 1658 // Everything checked out. 1659 return true; 1660 } 1661 1662 int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const { 1663 const MachineFunction *MF = MI.getMF(); 1664 const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); 1665 bool StackGrowsDown = 1666 TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; 1667 1668 unsigned FrameSetupOpcode = getCallFrameSetupOpcode(); 1669 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode(); 1670 1671 if (!isFrameInstr(MI)) 1672 return 0; 1673 1674 int SPAdj = TFI->alignSPAdjust(getFrameSize(MI)); 1675 1676 if ((!StackGrowsDown && MI.getOpcode() == FrameSetupOpcode) || 1677 (StackGrowsDown && MI.getOpcode() == FrameDestroyOpcode)) 1678 SPAdj = -SPAdj; 1679 1680 return SPAdj; 1681 } 1682 1683 /// isSchedulingBoundary - Test if the given instruction should be 1684 /// considered a scheduling boundary. This primarily includes labels 1685 /// and terminators. 1686 bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr &MI, 1687 const MachineBasicBlock *MBB, 1688 const MachineFunction &MF) const { 1689 // Terminators and labels can't be scheduled around. 1690 if (MI.isTerminator() || MI.isPosition()) 1691 return true; 1692 1693 // INLINEASM_BR can jump to another block 1694 if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) 1695 return true; 1696 1697 // Don't attempt to schedule around any instruction that defines 1698 // a stack-oriented pointer, as it's unlikely to be profitable. This 1699 // saves compile time, because it doesn't require every single 1700 // stack slot reference to depend on the instruction that does the 1701 // modification. 1702 const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); 1703 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1704 return MI.modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI); 1705 } 1706 1707 // Provide a global flag for disabling the PreRA hazard recognizer that targets 1708 // may choose to honor. 1709 bool TargetInstrInfo::usePreRAHazardRecognizer() const { 1710 return !DisableHazardRecognizer; 1711 } 1712 1713 // Default implementation of CreateTargetRAHazardRecognizer. 1714 ScheduleHazardRecognizer *TargetInstrInfo:: 1715 CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, 1716 const ScheduleDAG *DAG) const { 1717 // Dummy hazard recognizer allows all instructions to issue. 1718 return new ScheduleHazardRecognizer(); 1719 } 1720 1721 // Default implementation of CreateTargetMIHazardRecognizer. 1722 ScheduleHazardRecognizer *TargetInstrInfo::CreateTargetMIHazardRecognizer( 1723 const InstrItineraryData *II, const ScheduleDAGMI *DAG) const { 1724 return new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler"); 1725 } 1726 1727 // Default implementation of CreateTargetPostRAHazardRecognizer. 1728 ScheduleHazardRecognizer *TargetInstrInfo:: 1729 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 1730 const ScheduleDAG *DAG) const { 1731 return new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); 1732 } 1733 1734 // Default implementation of getMemOperandWithOffset. 1735 bool TargetInstrInfo::getMemOperandWithOffset( 1736 const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, 1737 bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const { 1738 SmallVector<const MachineOperand *, 4> BaseOps; 1739 LocationSize Width = LocationSize::precise(0); 1740 if (!getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable, 1741 Width, TRI) || 1742 BaseOps.size() != 1) 1743 return false; 1744 BaseOp = BaseOps.front(); 1745 return true; 1746 } 1747 1748 //===----------------------------------------------------------------------===// 1749 // SelectionDAG latency interface. 1750 //===----------------------------------------------------------------------===// 1751 1752 std::optional<unsigned> 1753 TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 1754 SDNode *DefNode, unsigned DefIdx, 1755 SDNode *UseNode, unsigned UseIdx) const { 1756 if (!ItinData || ItinData->isEmpty()) 1757 return std::nullopt; 1758 1759 if (!DefNode->isMachineOpcode()) 1760 return std::nullopt; 1761 1762 unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass(); 1763 if (!UseNode->isMachineOpcode()) 1764 return ItinData->getOperandCycle(DefClass, DefIdx); 1765 unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass(); 1766 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); 1767 } 1768 1769 unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 1770 SDNode *N) const { 1771 if (!ItinData || ItinData->isEmpty()) 1772 return 1; 1773 1774 if (!N->isMachineOpcode()) 1775 return 1; 1776 1777 return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass()); 1778 } 1779 1780 //===----------------------------------------------------------------------===// 1781 // MachineInstr latency interface. 1782 //===----------------------------------------------------------------------===// 1783 1784 unsigned TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, 1785 const MachineInstr &MI) const { 1786 if (!ItinData || ItinData->isEmpty()) 1787 return 1; 1788 1789 unsigned Class = MI.getDesc().getSchedClass(); 1790 int UOps = ItinData->Itineraries[Class].NumMicroOps; 1791 if (UOps >= 0) 1792 return UOps; 1793 1794 // The # of u-ops is dynamically determined. The specific target should 1795 // override this function to return the right number. 1796 return 1; 1797 } 1798 1799 /// Return the default expected latency for a def based on it's opcode. 1800 unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel &SchedModel, 1801 const MachineInstr &DefMI) const { 1802 if (DefMI.isTransient()) 1803 return 0; 1804 if (DefMI.mayLoad()) 1805 return SchedModel.LoadLatency; 1806 if (isHighLatencyDef(DefMI.getOpcode())) 1807 return SchedModel.HighLatency; 1808 return 1; 1809 } 1810 1811 unsigned TargetInstrInfo::getPredicationCost(const MachineInstr &) const { 1812 return 0; 1813 } 1814 1815 unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 1816 const MachineInstr &MI, 1817 unsigned *PredCost) const { 1818 // Default to one cycle for no itinerary. However, an "empty" itinerary may 1819 // still have a MinLatency property, which getStageLatency checks. 1820 if (!ItinData) 1821 return MI.mayLoad() ? 2 : 1; 1822 1823 return ItinData->getStageLatency(MI.getDesc().getSchedClass()); 1824 } 1825 1826 bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, 1827 const MachineInstr &DefMI, 1828 unsigned DefIdx) const { 1829 const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); 1830 if (!ItinData || ItinData->isEmpty()) 1831 return false; 1832 1833 unsigned DefClass = DefMI.getDesc().getSchedClass(); 1834 std::optional<unsigned> DefCycle = 1835 ItinData->getOperandCycle(DefClass, DefIdx); 1836 return DefCycle && DefCycle <= 1U; 1837 } 1838 1839 bool TargetInstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const { 1840 // TODO: We don't split functions where a section attribute has been set 1841 // since the split part may not be placed in a contiguous region. It may also 1842 // be more beneficial to augment the linker to ensure contiguous layout of 1843 // split functions within the same section as specified by the attribute. 1844 if (MF.getFunction().hasSection()) 1845 return false; 1846 1847 // We don't want to proceed further for cold functions 1848 // or functions of unknown hotness. Lukewarm functions have no prefix. 1849 std::optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix(); 1850 if (SectionPrefix && 1851 (*SectionPrefix == "unlikely" || *SectionPrefix == "unknown")) { 1852 return false; 1853 } 1854 1855 return true; 1856 } 1857 1858 std::optional<ParamLoadedValue> 1859 TargetInstrInfo::describeLoadedValue(const MachineInstr &MI, 1860 Register Reg) const { 1861 const MachineFunction *MF = MI.getMF(); 1862 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1863 DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {}); 1864 int64_t Offset; 1865 bool OffsetIsScalable; 1866 1867 // To simplify the sub-register handling, verify that we only need to 1868 // consider physical registers. 1869 assert(MF->getProperties().hasNoVRegs()); 1870 1871 if (auto DestSrc = isCopyInstr(MI)) { 1872 Register DestReg = DestSrc->Destination->getReg(); 1873 1874 // If the copy destination is the forwarding reg, describe the forwarding 1875 // reg using the copy source as the backup location. Example: 1876 // 1877 // x0 = MOV x7 1878 // call callee(x0) ; x0 described as x7 1879 if (Reg == DestReg) 1880 return ParamLoadedValue(*DestSrc->Source, Expr); 1881 1882 // If the target's hook couldn't describe this copy, give up. 1883 return std::nullopt; 1884 } else if (auto RegImm = isAddImmediate(MI, Reg)) { 1885 Register SrcReg = RegImm->Reg; 1886 Offset = RegImm->Imm; 1887 Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, Offset); 1888 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr); 1889 } else if (MI.hasOneMemOperand()) { 1890 // Only describe memory which provably does not escape the function. As 1891 // described in llvm.org/PR43343, escaped memory may be clobbered by the 1892 // callee (or by another thread). 1893 const auto &TII = MF->getSubtarget().getInstrInfo(); 1894 const MachineFrameInfo &MFI = MF->getFrameInfo(); 1895 const MachineMemOperand *MMO = MI.memoperands()[0]; 1896 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 1897 1898 // If the address points to "special" memory (e.g. a spill slot), it's 1899 // sufficient to check that it isn't aliased by any high-level IR value. 1900 if (!PSV || PSV->mayAlias(&MFI)) 1901 return std::nullopt; 1902 1903 const MachineOperand *BaseOp; 1904 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, 1905 TRI)) 1906 return std::nullopt; 1907 1908 // FIXME: Scalable offsets are not yet handled in the offset code below. 1909 if (OffsetIsScalable) 1910 return std::nullopt; 1911 1912 // TODO: Can currently only handle mem instructions with a single define. 1913 // An example from the x86 target: 1914 // ... 1915 // DIV64m $rsp, 1, $noreg, 24, $noreg, implicit-def dead $rax, implicit-def $rdx 1916 // ... 1917 // 1918 if (MI.getNumExplicitDefs() != 1) 1919 return std::nullopt; 1920 1921 // TODO: In what way do we need to take Reg into consideration here? 1922 1923 SmallVector<uint64_t, 8> Ops; 1924 DIExpression::appendOffset(Ops, Offset); 1925 Ops.push_back(dwarf::DW_OP_deref_size); 1926 Ops.push_back(MMO->getSize().hasValue() ? MMO->getSize().getValue() 1927 : ~UINT64_C(0)); 1928 Expr = DIExpression::prependOpcodes(Expr, Ops); 1929 return ParamLoadedValue(*BaseOp, Expr); 1930 } 1931 1932 return std::nullopt; 1933 } 1934 1935 // Get the call frame size just before MI. 1936 unsigned TargetInstrInfo::getCallFrameSizeAt(MachineInstr &MI) const { 1937 // Search backwards from MI for the most recent call frame instruction. 1938 MachineBasicBlock *MBB = MI.getParent(); 1939 for (auto &AdjI : reverse(make_range(MBB->instr_begin(), MI.getIterator()))) { 1940 if (AdjI.getOpcode() == getCallFrameSetupOpcode()) 1941 return getFrameTotalSize(AdjI); 1942 if (AdjI.getOpcode() == getCallFrameDestroyOpcode()) 1943 return 0; 1944 } 1945 1946 // If none was found, use the call frame size from the start of the basic 1947 // block. 1948 return MBB->getCallFrameSize(); 1949 } 1950 1951 /// Both DefMI and UseMI must be valid. By default, call directly to the 1952 /// itinerary. This may be overriden by the target. 1953 std::optional<unsigned> TargetInstrInfo::getOperandLatency( 1954 const InstrItineraryData *ItinData, const MachineInstr &DefMI, 1955 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const { 1956 unsigned DefClass = DefMI.getDesc().getSchedClass(); 1957 unsigned UseClass = UseMI.getDesc().getSchedClass(); 1958 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); 1959 } 1960 1961 bool TargetInstrInfo::getRegSequenceInputs( 1962 const MachineInstr &MI, unsigned DefIdx, 1963 SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { 1964 assert((MI.isRegSequence() || 1965 MI.isRegSequenceLike()) && "Instruction do not have the proper type"); 1966 1967 if (!MI.isRegSequence()) 1968 return getRegSequenceLikeInputs(MI, DefIdx, InputRegs); 1969 1970 // We are looking at: 1971 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... 1972 assert(DefIdx == 0 && "REG_SEQUENCE only has one def"); 1973 for (unsigned OpIdx = 1, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx; 1974 OpIdx += 2) { 1975 const MachineOperand &MOReg = MI.getOperand(OpIdx); 1976 if (MOReg.isUndef()) 1977 continue; 1978 const MachineOperand &MOSubIdx = MI.getOperand(OpIdx + 1); 1979 assert(MOSubIdx.isImm() && 1980 "One of the subindex of the reg_sequence is not an immediate"); 1981 // Record Reg:SubReg, SubIdx. 1982 InputRegs.push_back(RegSubRegPairAndIdx(MOReg.getReg(), MOReg.getSubReg(), 1983 (unsigned)MOSubIdx.getImm())); 1984 } 1985 return true; 1986 } 1987 1988 bool TargetInstrInfo::getExtractSubregInputs( 1989 const MachineInstr &MI, unsigned DefIdx, 1990 RegSubRegPairAndIdx &InputReg) const { 1991 assert((MI.isExtractSubreg() || 1992 MI.isExtractSubregLike()) && "Instruction do not have the proper type"); 1993 1994 if (!MI.isExtractSubreg()) 1995 return getExtractSubregLikeInputs(MI, DefIdx, InputReg); 1996 1997 // We are looking at: 1998 // Def = EXTRACT_SUBREG v0.sub1, sub0. 1999 assert(DefIdx == 0 && "EXTRACT_SUBREG only has one def"); 2000 const MachineOperand &MOReg = MI.getOperand(1); 2001 if (MOReg.isUndef()) 2002 return false; 2003 const MachineOperand &MOSubIdx = MI.getOperand(2); 2004 assert(MOSubIdx.isImm() && 2005 "The subindex of the extract_subreg is not an immediate"); 2006 2007 InputReg.Reg = MOReg.getReg(); 2008 InputReg.SubReg = MOReg.getSubReg(); 2009 InputReg.SubIdx = (unsigned)MOSubIdx.getImm(); 2010 return true; 2011 } 2012 2013 bool TargetInstrInfo::getInsertSubregInputs( 2014 const MachineInstr &MI, unsigned DefIdx, 2015 RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const { 2016 assert((MI.isInsertSubreg() || 2017 MI.isInsertSubregLike()) && "Instruction do not have the proper type"); 2018 2019 if (!MI.isInsertSubreg()) 2020 return getInsertSubregLikeInputs(MI, DefIdx, BaseReg, InsertedReg); 2021 2022 // We are looking at: 2023 // Def = INSERT_SEQUENCE v0, v1, sub0. 2024 assert(DefIdx == 0 && "INSERT_SUBREG only has one def"); 2025 const MachineOperand &MOBaseReg = MI.getOperand(1); 2026 const MachineOperand &MOInsertedReg = MI.getOperand(2); 2027 if (MOInsertedReg.isUndef()) 2028 return false; 2029 const MachineOperand &MOSubIdx = MI.getOperand(3); 2030 assert(MOSubIdx.isImm() && 2031 "One of the subindex of the reg_sequence is not an immediate"); 2032 BaseReg.Reg = MOBaseReg.getReg(); 2033 BaseReg.SubReg = MOBaseReg.getSubReg(); 2034 2035 InsertedReg.Reg = MOInsertedReg.getReg(); 2036 InsertedReg.SubReg = MOInsertedReg.getSubReg(); 2037 InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm(); 2038 return true; 2039 } 2040 2041 // Returns a MIRPrinter comment for this machine operand. 2042 std::string TargetInstrInfo::createMIROperandComment( 2043 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, 2044 const TargetRegisterInfo *TRI) const { 2045 2046 if (!MI.isInlineAsm()) 2047 return ""; 2048 2049 std::string Flags; 2050 raw_string_ostream OS(Flags); 2051 2052 if (OpIdx == InlineAsm::MIOp_ExtraInfo) { 2053 // Print HasSideEffects, MayLoad, MayStore, IsAlignStack 2054 unsigned ExtraInfo = Op.getImm(); 2055 bool First = true; 2056 for (StringRef Info : InlineAsm::getExtraInfoNames(ExtraInfo)) { 2057 if (!First) 2058 OS << " "; 2059 First = false; 2060 OS << Info; 2061 } 2062 2063 return Flags; 2064 } 2065 2066 int FlagIdx = MI.findInlineAsmFlagIdx(OpIdx); 2067 if (FlagIdx < 0 || (unsigned)FlagIdx != OpIdx) 2068 return ""; 2069 2070 assert(Op.isImm() && "Expected flag operand to be an immediate"); 2071 // Pretty print the inline asm operand descriptor. 2072 unsigned Flag = Op.getImm(); 2073 const InlineAsm::Flag F(Flag); 2074 OS << F.getKindName(); 2075 2076 unsigned RCID; 2077 if (!F.isImmKind() && !F.isMemKind() && F.hasRegClassConstraint(RCID)) { 2078 if (TRI) { 2079 OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID)); 2080 } else 2081 OS << ":RC" << RCID; 2082 } 2083 2084 if (F.isMemKind()) { 2085 InlineAsm::ConstraintCode MCID = F.getMemoryConstraintID(); 2086 OS << ":" << InlineAsm::getMemConstraintName(MCID); 2087 } 2088 2089 unsigned TiedTo; 2090 if (F.isUseOperandTiedToDef(TiedTo)) 2091 OS << " tiedto:$" << TiedTo; 2092 2093 if ((F.isRegDefKind() || F.isRegDefEarlyClobberKind() || F.isRegUseKind()) && 2094 F.getRegMayBeFolded()) 2095 OS << " foldable"; 2096 2097 return Flags; 2098 } 2099 2100 TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() = default; 2101 2102 void TargetInstrInfo::mergeOutliningCandidateAttributes( 2103 Function &F, std::vector<outliner::Candidate> &Candidates) const { 2104 // Include target features from an arbitrary candidate for the outlined 2105 // function. This makes sure the outlined function knows what kinds of 2106 // instructions are going into it. This is fine, since all parent functions 2107 // must necessarily support the instructions that are in the outlined region. 2108 outliner::Candidate &FirstCand = Candidates.front(); 2109 const Function &ParentFn = FirstCand.getMF()->getFunction(); 2110 if (ParentFn.hasFnAttribute("target-features")) 2111 F.addFnAttr(ParentFn.getFnAttribute("target-features")); 2112 if (ParentFn.hasFnAttribute("target-cpu")) 2113 F.addFnAttr(ParentFn.getFnAttribute("target-cpu")); 2114 2115 // Set nounwind, so we don't generate eh_frame. 2116 if (llvm::all_of(Candidates, [](const outliner::Candidate &C) { 2117 return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind); 2118 })) 2119 F.addFnAttr(Attribute::NoUnwind); 2120 } 2121 2122 outliner::InstrType 2123 TargetInstrInfo::getOutliningType(const MachineModuleInfo &MMI, 2124 MachineBasicBlock::iterator &MIT, 2125 unsigned Flags) const { 2126 MachineInstr &MI = *MIT; 2127 2128 // NOTE: MI.isMetaInstruction() will match CFI_INSTRUCTION, but some targets 2129 // have support for outlining those. Special-case that here. 2130 if (MI.isCFIInstruction()) 2131 // Just go right to the target implementation. 2132 return getOutliningTypeImpl(MMI, MIT, Flags); 2133 2134 // Be conservative about inline assembly. 2135 if (MI.isInlineAsm()) 2136 return outliner::InstrType::Illegal; 2137 2138 // Labels generally can't safely be outlined. 2139 if (MI.isLabel()) 2140 return outliner::InstrType::Illegal; 2141 2142 // Don't let debug instructions impact analysis. 2143 if (MI.isDebugInstr()) 2144 return outliner::InstrType::Invisible; 2145 2146 // Some other special cases. 2147 switch (MI.getOpcode()) { 2148 case TargetOpcode::IMPLICIT_DEF: 2149 case TargetOpcode::KILL: 2150 case TargetOpcode::LIFETIME_START: 2151 case TargetOpcode::LIFETIME_END: 2152 return outliner::InstrType::Invisible; 2153 default: 2154 break; 2155 } 2156 2157 // Is this a terminator for a basic block? 2158 if (MI.isTerminator()) { 2159 // If this is a branch to another block, we can't outline it. 2160 if (!MI.getParent()->succ_empty()) 2161 return outliner::InstrType::Illegal; 2162 2163 // Don't outline if the branch is not unconditional. 2164 if (isPredicated(MI)) 2165 return outliner::InstrType::Illegal; 2166 } 2167 2168 // Make sure none of the operands of this instruction do anything that 2169 // might break if they're moved outside their current function. 2170 // This includes MachineBasicBlock references, BlockAddressses, 2171 // Constant pool indices and jump table indices. 2172 // 2173 // A quick note on MO_TargetIndex: 2174 // This doesn't seem to be used in any of the architectures that the 2175 // MachineOutliner supports, but it was still filtered out in all of them. 2176 // There was one exception (RISC-V), but MO_TargetIndex also isn't used there. 2177 // As such, this check is removed both here and in the target-specific 2178 // implementations. Instead, we assert to make sure this doesn't 2179 // catch anyone off-guard somewhere down the line. 2180 for (const MachineOperand &MOP : MI.operands()) { 2181 // If you hit this assertion, please remove it and adjust 2182 // `getOutliningTypeImpl` for your target appropriately if necessary. 2183 // Adding the assertion back to other supported architectures 2184 // would be nice too :) 2185 assert(!MOP.isTargetIndex() && "This isn't used quite yet!"); 2186 2187 // CFI instructions should already have been filtered out at this point. 2188 assert(!MOP.isCFIIndex() && "CFI instructions handled elsewhere!"); 2189 2190 // PrologEpilogInserter should've already run at this point. 2191 assert(!MOP.isFI() && "FrameIndex instructions should be gone by now!"); 2192 2193 if (MOP.isMBB() || MOP.isBlockAddress() || MOP.isCPI() || MOP.isJTI()) 2194 return outliner::InstrType::Illegal; 2195 } 2196 2197 // If we don't know, delegate to the target-specific hook. 2198 return getOutliningTypeImpl(MMI, MIT, Flags); 2199 } 2200 2201 bool TargetInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, 2202 unsigned &Flags) const { 2203 // Some instrumentations create special TargetOpcode at the start which 2204 // expands to special code sequences which must be present. 2205 auto First = MBB.getFirstNonDebugInstr(); 2206 if (First == MBB.end()) 2207 return true; 2208 2209 if (First->getOpcode() == TargetOpcode::FENTRY_CALL || 2210 First->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_ENTER) 2211 return false; 2212 2213 // Some instrumentations create special pseudo-instructions at or just before 2214 // the end that must be present. 2215 auto Last = MBB.getLastNonDebugInstr(); 2216 if (Last->getOpcode() == TargetOpcode::PATCHABLE_RET || 2217 Last->getOpcode() == TargetOpcode::PATCHABLE_TAIL_CALL) 2218 return false; 2219 2220 if (Last != First && Last->isReturn()) { 2221 --Last; 2222 if (Last->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_EXIT || 2223 Last->getOpcode() == TargetOpcode::PATCHABLE_TAIL_CALL) 2224 return false; 2225 } 2226 return true; 2227 } 2228 2229 bool TargetInstrInfo::isGlobalMemoryObject(const MachineInstr *MI) const { 2230 return MI->isCall() || MI->hasUnmodeledSideEffects() || 2231 (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad()); 2232 } 2233