1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition for SIInstrInfo. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 16 17 #include "AMDGPUMIRFormatter.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIRegisterInfo.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/CodeGen/TargetInstrInfo.h" 23 #include "llvm/CodeGen/TargetSchedule.h" 24 25 #define GET_INSTRINFO_HEADER 26 #include "AMDGPUGenInstrInfo.inc" 27 28 namespace llvm { 29 30 class APInt; 31 class GCNSubtarget; 32 class LiveVariables; 33 class MachineDominatorTree; 34 class MachineRegisterInfo; 35 class RegScavenger; 36 class TargetRegisterClass; 37 class ScheduleHazardRecognizer; 38 39 /// Mark the MMO of a uniform load if there are no potentially clobbering stores 40 /// on any path from the start of an entry function to this load. 41 static const MachineMemOperand::Flags MONoClobber = 42 MachineMemOperand::MOTargetFlag1; 43 44 /// Utility to store machine instructions worklist. 45 struct SIInstrWorklist { 46 SIInstrWorklist() : InstrList() {} 47 48 void insert(MachineInstr *MI); 49 50 MachineInstr *top() const { 51 auto iter = InstrList.begin(); 52 return *iter; 53 } 54 55 void erase_top() { 56 auto iter = InstrList.begin(); 57 InstrList.erase(iter); 58 } 59 60 bool empty() const { return InstrList.empty(); } 61 62 void clear() { 63 InstrList.clear(); 64 DeferredList.clear(); 65 } 66 67 bool isDeferred(MachineInstr *MI); 68 69 SetVector<MachineInstr *> &getDeferredList() { return DeferredList; } 70 71 private: 72 /// InstrList contains the MachineInstrs. 73 SetVector<MachineInstr *> InstrList; 74 /// Deferred instructions are specific MachineInstr 75 /// that will be added by insert method. 76 SetVector<MachineInstr *> DeferredList; 77 }; 78 79 class SIInstrInfo final : public AMDGPUGenInstrInfo { 80 private: 81 const SIRegisterInfo RI; 82 const GCNSubtarget &ST; 83 TargetSchedModel SchedModel; 84 mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter; 85 86 // The inverse predicate should have the negative value. 87 enum BranchPredicate { 88 INVALID_BR = 0, 89 SCC_TRUE = 1, 90 SCC_FALSE = -1, 91 VCCNZ = 2, 92 VCCZ = -2, 93 EXECNZ = -3, 94 EXECZ = 3 95 }; 96 97 using SetVectorType = SmallSetVector<MachineInstr *, 32>; 98 99 static unsigned getBranchOpcode(BranchPredicate Cond); 100 static BranchPredicate getBranchPredicate(unsigned Opcode); 101 102 public: 103 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, 104 MachineRegisterInfo &MRI, 105 MachineOperand &SuperReg, 106 const TargetRegisterClass *SuperRC, 107 unsigned SubIdx, 108 const TargetRegisterClass *SubRC) const; 109 MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, 110 MachineRegisterInfo &MRI, 111 MachineOperand &SuperReg, 112 const TargetRegisterClass *SuperRC, 113 unsigned SubIdx, 114 const TargetRegisterClass *SubRC) const; 115 private: 116 void swapOperands(MachineInstr &Inst) const; 117 118 std::pair<bool, MachineBasicBlock *> 119 moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst, 120 MachineDominatorTree *MDT = nullptr) const; 121 122 void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, 123 MachineDominatorTree *MDT = nullptr) const; 124 125 void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 126 127 void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 128 129 void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst, 130 unsigned Opcode) const; 131 132 void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst, 133 unsigned Opcode) const; 134 135 void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 136 unsigned Opcode, bool Swap = false) const; 137 138 void splitScalar64BitAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst, 139 MachineDominatorTree *MDT = nullptr) const; 140 141 void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 142 unsigned Opcode, 143 MachineDominatorTree *MDT = nullptr) const; 144 145 void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst, 146 MachineDominatorTree *MDT = nullptr) const; 147 148 void splitScalar64BitBCNT(SIInstrWorklist &Worklist, 149 MachineInstr &Inst) const; 150 void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 151 void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI, 152 MachineInstr &Inst) const; 153 154 void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI, 155 SIInstrWorklist &Worklist) const; 156 157 void addSCCDefUsersToVALUWorklist(MachineOperand &Op, 158 MachineInstr &SCCDefInst, 159 SIInstrWorklist &Worklist, 160 Register NewCond = Register()) const; 161 void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, 162 SIInstrWorklist &Worklist) const; 163 164 const TargetRegisterClass * 165 getDestEquivalentVGPRClass(const MachineInstr &Inst) const; 166 167 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, 168 const MachineInstr &MIb) const; 169 170 Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; 171 172 protected: 173 /// If the specific machine instruction is a instruction that moves/copies 174 /// value from one register to another register return destination and source 175 /// registers as machine operands. 176 std::optional<DestSourcePair> 177 isCopyInstrImpl(const MachineInstr &MI) const override; 178 179 bool swapSourceModifiers(MachineInstr &MI, 180 MachineOperand &Src0, unsigned Src0OpName, 181 MachineOperand &Src1, unsigned Src1OpName) const; 182 183 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, 184 unsigned OpIdx0, 185 unsigned OpIdx1) const override; 186 187 public: 188 enum TargetOperandFlags { 189 MO_MASK = 0xf, 190 191 MO_NONE = 0, 192 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. 193 MO_GOTPCREL = 1, 194 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. 195 MO_GOTPCREL32 = 2, 196 MO_GOTPCREL32_LO = 2, 197 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. 198 MO_GOTPCREL32_HI = 3, 199 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. 200 MO_REL32 = 4, 201 MO_REL32_LO = 4, 202 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. 203 MO_REL32_HI = 5, 204 205 MO_FAR_BRANCH_OFFSET = 6, 206 207 MO_ABS32_LO = 8, 208 MO_ABS32_HI = 9, 209 }; 210 211 explicit SIInstrInfo(const GCNSubtarget &ST); 212 213 const SIRegisterInfo &getRegisterInfo() const { 214 return RI; 215 } 216 217 const GCNSubtarget &getSubtarget() const { 218 return ST; 219 } 220 221 bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; 222 223 bool isIgnorableUse(const MachineOperand &MO) const override; 224 225 bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, 226 int64_t &Offset1) const override; 227 228 bool getMemOperandsWithOffsetWidth( 229 const MachineInstr &LdSt, 230 SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset, 231 bool &OffsetIsScalable, unsigned &Width, 232 const TargetRegisterInfo *TRI) const final; 233 234 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, 235 ArrayRef<const MachineOperand *> BaseOps2, 236 unsigned NumLoads, unsigned NumBytes) const override; 237 238 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, 239 int64_t Offset1, unsigned NumLoads) const override; 240 241 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 242 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, 243 bool KillSrc) const override; 244 245 void materializeImmediate(MachineBasicBlock &MBB, 246 MachineBasicBlock::iterator MI, const DebugLoc &DL, 247 Register DestReg, int64_t Value) const; 248 249 const TargetRegisterClass *getPreferredSelectRegClass( 250 unsigned Size) const; 251 252 Register insertNE(MachineBasicBlock *MBB, 253 MachineBasicBlock::iterator I, const DebugLoc &DL, 254 Register SrcReg, int Value) const; 255 256 Register insertEQ(MachineBasicBlock *MBB, 257 MachineBasicBlock::iterator I, const DebugLoc &DL, 258 Register SrcReg, int Value) const; 259 260 void storeRegToStackSlot(MachineBasicBlock &MBB, 261 MachineBasicBlock::iterator MI, Register SrcReg, 262 bool isKill, int FrameIndex, 263 const TargetRegisterClass *RC, 264 const TargetRegisterInfo *TRI, 265 Register VReg) const override; 266 267 void loadRegFromStackSlot(MachineBasicBlock &MBB, 268 MachineBasicBlock::iterator MI, Register DestReg, 269 int FrameIndex, const TargetRegisterClass *RC, 270 const TargetRegisterInfo *TRI, 271 Register VReg) const override; 272 273 bool expandPostRAPseudo(MachineInstr &MI) const override; 274 275 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp 276 // instructions. Returns a pair of generated instructions. 277 // Can split either post-RA with physical registers or pre-RA with 278 // virtual registers. In latter case IR needs to be in SSA form and 279 // and a REG_SEQUENCE is produced to define original register. 280 std::pair<MachineInstr*, MachineInstr*> 281 expandMovDPP64(MachineInstr &MI) const; 282 283 // Returns an opcode that can be used to move a value to a \p DstRC 284 // register. If there is no hardware instruction that can store to \p 285 // DstRC, then AMDGPU::COPY is returned. 286 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; 287 288 const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize, 289 unsigned EltSize, 290 bool IsSGPR) const; 291 292 const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize, 293 bool IsIndirectSrc) const; 294 LLVM_READONLY 295 int commuteOpcode(unsigned Opc) const; 296 297 LLVM_READONLY 298 inline int commuteOpcode(const MachineInstr &MI) const { 299 return commuteOpcode(MI.getOpcode()); 300 } 301 302 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, 303 unsigned &SrcOpIdx1) const override; 304 305 bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0, 306 unsigned &SrcOpIdx1) const; 307 308 bool isBranchOffsetInRange(unsigned BranchOpc, 309 int64_t BrOffset) const override; 310 311 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; 312 313 /// Return whether the block terminate with divergent branch. 314 /// Note this only work before lowering the pseudo control flow instructions. 315 bool hasDivergentBranch(const MachineBasicBlock *MBB) const; 316 317 void insertIndirectBranch(MachineBasicBlock &MBB, 318 MachineBasicBlock &NewDestBB, 319 MachineBasicBlock &RestoreBB, const DebugLoc &DL, 320 int64_t BrOffset, RegScavenger *RS) const override; 321 322 bool analyzeBranchImpl(MachineBasicBlock &MBB, 323 MachineBasicBlock::iterator I, 324 MachineBasicBlock *&TBB, 325 MachineBasicBlock *&FBB, 326 SmallVectorImpl<MachineOperand> &Cond, 327 bool AllowModify) const; 328 329 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 330 MachineBasicBlock *&FBB, 331 SmallVectorImpl<MachineOperand> &Cond, 332 bool AllowModify = false) const override; 333 334 unsigned removeBranch(MachineBasicBlock &MBB, 335 int *BytesRemoved = nullptr) const override; 336 337 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 338 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, 339 const DebugLoc &DL, 340 int *BytesAdded = nullptr) const override; 341 342 bool reverseBranchCondition( 343 SmallVectorImpl<MachineOperand> &Cond) const override; 344 345 bool canInsertSelect(const MachineBasicBlock &MBB, 346 ArrayRef<MachineOperand> Cond, Register DstReg, 347 Register TrueReg, Register FalseReg, int &CondCycles, 348 int &TrueCycles, int &FalseCycles) const override; 349 350 void insertSelect(MachineBasicBlock &MBB, 351 MachineBasicBlock::iterator I, const DebugLoc &DL, 352 Register DstReg, ArrayRef<MachineOperand> Cond, 353 Register TrueReg, Register FalseReg) const override; 354 355 void insertVectorSelect(MachineBasicBlock &MBB, 356 MachineBasicBlock::iterator I, const DebugLoc &DL, 357 Register DstReg, ArrayRef<MachineOperand> Cond, 358 Register TrueReg, Register FalseReg) const; 359 360 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, 361 Register &SrcReg2, int64_t &CmpMask, 362 int64_t &CmpValue) const override; 363 364 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, 365 Register SrcReg2, int64_t CmpMask, int64_t CmpValue, 366 const MachineRegisterInfo *MRI) const override; 367 368 bool 369 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 370 const MachineInstr &MIb) const override; 371 372 static bool isFoldableCopy(const MachineInstr &MI); 373 374 void removeModOperands(MachineInstr &MI) const; 375 376 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, 377 MachineRegisterInfo *MRI) const final; 378 379 unsigned getMachineCSELookAheadLimit() const override { return 500; } 380 381 MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, 382 LiveIntervals *LIS) const override; 383 384 bool isSchedulingBoundary(const MachineInstr &MI, 385 const MachineBasicBlock *MBB, 386 const MachineFunction &MF) const override; 387 388 static bool isSALU(const MachineInstr &MI) { 389 return MI.getDesc().TSFlags & SIInstrFlags::SALU; 390 } 391 392 bool isSALU(uint16_t Opcode) const { 393 return get(Opcode).TSFlags & SIInstrFlags::SALU; 394 } 395 396 static bool isVALU(const MachineInstr &MI) { 397 return MI.getDesc().TSFlags & SIInstrFlags::VALU; 398 } 399 400 bool isVALU(uint16_t Opcode) const { 401 return get(Opcode).TSFlags & SIInstrFlags::VALU; 402 } 403 404 static bool isVMEM(const MachineInstr &MI) { 405 return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI); 406 } 407 408 bool isVMEM(uint16_t Opcode) const { 409 return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode); 410 } 411 412 static bool isSOP1(const MachineInstr &MI) { 413 return MI.getDesc().TSFlags & SIInstrFlags::SOP1; 414 } 415 416 bool isSOP1(uint16_t Opcode) const { 417 return get(Opcode).TSFlags & SIInstrFlags::SOP1; 418 } 419 420 static bool isSOP2(const MachineInstr &MI) { 421 return MI.getDesc().TSFlags & SIInstrFlags::SOP2; 422 } 423 424 bool isSOP2(uint16_t Opcode) const { 425 return get(Opcode).TSFlags & SIInstrFlags::SOP2; 426 } 427 428 static bool isSOPC(const MachineInstr &MI) { 429 return MI.getDesc().TSFlags & SIInstrFlags::SOPC; 430 } 431 432 bool isSOPC(uint16_t Opcode) const { 433 return get(Opcode).TSFlags & SIInstrFlags::SOPC; 434 } 435 436 static bool isSOPK(const MachineInstr &MI) { 437 return MI.getDesc().TSFlags & SIInstrFlags::SOPK; 438 } 439 440 bool isSOPK(uint16_t Opcode) const { 441 return get(Opcode).TSFlags & SIInstrFlags::SOPK; 442 } 443 444 static bool isSOPP(const MachineInstr &MI) { 445 return MI.getDesc().TSFlags & SIInstrFlags::SOPP; 446 } 447 448 bool isSOPP(uint16_t Opcode) const { 449 return get(Opcode).TSFlags & SIInstrFlags::SOPP; 450 } 451 452 static bool isPacked(const MachineInstr &MI) { 453 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked; 454 } 455 456 bool isPacked(uint16_t Opcode) const { 457 return get(Opcode).TSFlags & SIInstrFlags::IsPacked; 458 } 459 460 static bool isVOP1(const MachineInstr &MI) { 461 return MI.getDesc().TSFlags & SIInstrFlags::VOP1; 462 } 463 464 bool isVOP1(uint16_t Opcode) const { 465 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 466 } 467 468 static bool isVOP2(const MachineInstr &MI) { 469 return MI.getDesc().TSFlags & SIInstrFlags::VOP2; 470 } 471 472 bool isVOP2(uint16_t Opcode) const { 473 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 474 } 475 476 static bool isVOP3(const MachineInstr &MI) { 477 return MI.getDesc().TSFlags & SIInstrFlags::VOP3; 478 } 479 480 bool isVOP3(uint16_t Opcode) const { 481 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 482 } 483 484 static bool isSDWA(const MachineInstr &MI) { 485 return MI.getDesc().TSFlags & SIInstrFlags::SDWA; 486 } 487 488 bool isSDWA(uint16_t Opcode) const { 489 return get(Opcode).TSFlags & SIInstrFlags::SDWA; 490 } 491 492 static bool isVOPC(const MachineInstr &MI) { 493 return MI.getDesc().TSFlags & SIInstrFlags::VOPC; 494 } 495 496 bool isVOPC(uint16_t Opcode) const { 497 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 498 } 499 500 static bool isMUBUF(const MachineInstr &MI) { 501 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; 502 } 503 504 bool isMUBUF(uint16_t Opcode) const { 505 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; 506 } 507 508 static bool isMTBUF(const MachineInstr &MI) { 509 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; 510 } 511 512 bool isMTBUF(uint16_t Opcode) const { 513 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; 514 } 515 516 static bool isSMRD(const MachineInstr &MI) { 517 return MI.getDesc().TSFlags & SIInstrFlags::SMRD; 518 } 519 520 bool isSMRD(uint16_t Opcode) const { 521 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 522 } 523 524 bool isBufferSMRD(const MachineInstr &MI) const; 525 526 static bool isDS(const MachineInstr &MI) { 527 return MI.getDesc().TSFlags & SIInstrFlags::DS; 528 } 529 530 bool isDS(uint16_t Opcode) const { 531 return get(Opcode).TSFlags & SIInstrFlags::DS; 532 } 533 534 bool isAlwaysGDS(uint16_t Opcode) const; 535 536 static bool isMIMG(const MachineInstr &MI) { 537 return MI.getDesc().TSFlags & SIInstrFlags::MIMG; 538 } 539 540 bool isMIMG(uint16_t Opcode) const { 541 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 542 } 543 544 static bool isGather4(const MachineInstr &MI) { 545 return MI.getDesc().TSFlags & SIInstrFlags::Gather4; 546 } 547 548 bool isGather4(uint16_t Opcode) const { 549 return get(Opcode).TSFlags & SIInstrFlags::Gather4; 550 } 551 552 static bool isFLAT(const MachineInstr &MI) { 553 return MI.getDesc().TSFlags & SIInstrFlags::FLAT; 554 } 555 556 // Is a FLAT encoded instruction which accesses a specific segment, 557 // i.e. global_* or scratch_*. 558 static bool isSegmentSpecificFLAT(const MachineInstr &MI) { 559 auto Flags = MI.getDesc().TSFlags; 560 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 561 } 562 563 bool isSegmentSpecificFLAT(uint16_t Opcode) const { 564 auto Flags = get(Opcode).TSFlags; 565 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 566 } 567 568 static bool isFLATGlobal(const MachineInstr &MI) { 569 return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal; 570 } 571 572 bool isFLATGlobal(uint16_t Opcode) const { 573 return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal; 574 } 575 576 static bool isFLATScratch(const MachineInstr &MI) { 577 return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch; 578 } 579 580 bool isFLATScratch(uint16_t Opcode) const { 581 return get(Opcode).TSFlags & SIInstrFlags::FlatScratch; 582 } 583 584 // Any FLAT encoded instruction, including global_* and scratch_*. 585 bool isFLAT(uint16_t Opcode) const { 586 return get(Opcode).TSFlags & SIInstrFlags::FLAT; 587 } 588 589 static bool isEXP(const MachineInstr &MI) { 590 return MI.getDesc().TSFlags & SIInstrFlags::EXP; 591 } 592 593 static bool isDualSourceBlendEXP(const MachineInstr &MI) { 594 if (!isEXP(MI)) 595 return false; 596 unsigned Target = MI.getOperand(0).getImm(); 597 return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 || 598 Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1; 599 } 600 601 bool isEXP(uint16_t Opcode) const { 602 return get(Opcode).TSFlags & SIInstrFlags::EXP; 603 } 604 605 static bool isAtomicNoRet(const MachineInstr &MI) { 606 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet; 607 } 608 609 bool isAtomicNoRet(uint16_t Opcode) const { 610 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet; 611 } 612 613 static bool isAtomicRet(const MachineInstr &MI) { 614 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet; 615 } 616 617 bool isAtomicRet(uint16_t Opcode) const { 618 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet; 619 } 620 621 static bool isAtomic(const MachineInstr &MI) { 622 return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet | 623 SIInstrFlags::IsAtomicNoRet); 624 } 625 626 bool isAtomic(uint16_t Opcode) const { 627 return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet | 628 SIInstrFlags::IsAtomicNoRet); 629 } 630 631 static bool isWQM(const MachineInstr &MI) { 632 return MI.getDesc().TSFlags & SIInstrFlags::WQM; 633 } 634 635 bool isWQM(uint16_t Opcode) const { 636 return get(Opcode).TSFlags & SIInstrFlags::WQM; 637 } 638 639 static bool isDisableWQM(const MachineInstr &MI) { 640 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; 641 } 642 643 bool isDisableWQM(uint16_t Opcode) const { 644 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; 645 } 646 647 static bool isVGPRSpill(const MachineInstr &MI) { 648 return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill; 649 } 650 651 bool isVGPRSpill(uint16_t Opcode) const { 652 return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; 653 } 654 655 static bool isSGPRSpill(const MachineInstr &MI) { 656 return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill; 657 } 658 659 bool isSGPRSpill(uint16_t Opcode) const { 660 return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill; 661 } 662 663 static bool isWWMRegSpillOpcode(uint16_t Opcode) { 664 return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE || 665 Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE; 666 } 667 668 static bool isDPP(const MachineInstr &MI) { 669 return MI.getDesc().TSFlags & SIInstrFlags::DPP; 670 } 671 672 bool isDPP(uint16_t Opcode) const { 673 return get(Opcode).TSFlags & SIInstrFlags::DPP; 674 } 675 676 static bool isTRANS(const MachineInstr &MI) { 677 return MI.getDesc().TSFlags & SIInstrFlags::TRANS; 678 } 679 680 bool isTRANS(uint16_t Opcode) const { 681 return get(Opcode).TSFlags & SIInstrFlags::TRANS; 682 } 683 684 static bool isVOP3P(const MachineInstr &MI) { 685 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; 686 } 687 688 bool isVOP3P(uint16_t Opcode) const { 689 return get(Opcode).TSFlags & SIInstrFlags::VOP3P; 690 } 691 692 static bool isVINTRP(const MachineInstr &MI) { 693 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; 694 } 695 696 bool isVINTRP(uint16_t Opcode) const { 697 return get(Opcode).TSFlags & SIInstrFlags::VINTRP; 698 } 699 700 static bool isMAI(const MachineInstr &MI) { 701 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI; 702 } 703 704 bool isMAI(uint16_t Opcode) const { 705 return get(Opcode).TSFlags & SIInstrFlags::IsMAI; 706 } 707 708 static bool isMFMA(const MachineInstr &MI) { 709 return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 710 MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; 711 } 712 713 static bool isDOT(const MachineInstr &MI) { 714 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; 715 } 716 717 static bool isWMMA(const MachineInstr &MI) { 718 return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA; 719 } 720 721 bool isWMMA(uint16_t Opcode) const { 722 return get(Opcode).TSFlags & SIInstrFlags::IsWMMA; 723 } 724 725 static bool isMFMAorWMMA(const MachineInstr &MI) { 726 return isMFMA(MI) || isWMMA(MI); 727 } 728 729 bool isDOT(uint16_t Opcode) const { 730 return get(Opcode).TSFlags & SIInstrFlags::IsDOT; 731 } 732 733 static bool isLDSDIR(const MachineInstr &MI) { 734 return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR; 735 } 736 737 bool isLDSDIR(uint16_t Opcode) const { 738 return get(Opcode).TSFlags & SIInstrFlags::LDSDIR; 739 } 740 741 static bool isVINTERP(const MachineInstr &MI) { 742 return MI.getDesc().TSFlags & SIInstrFlags::VINTERP; 743 } 744 745 bool isVINTERP(uint16_t Opcode) const { 746 return get(Opcode).TSFlags & SIInstrFlags::VINTERP; 747 } 748 749 static bool isScalarUnit(const MachineInstr &MI) { 750 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); 751 } 752 753 static bool usesVM_CNT(const MachineInstr &MI) { 754 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; 755 } 756 757 static bool usesLGKM_CNT(const MachineInstr &MI) { 758 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; 759 } 760 761 static bool sopkIsZext(const MachineInstr &MI) { 762 return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT; 763 } 764 765 bool sopkIsZext(uint16_t Opcode) const { 766 return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT; 767 } 768 769 /// \returns true if this is an s_store_dword* instruction. This is more 770 /// specific than isSMEM && mayStore. 771 static bool isScalarStore(const MachineInstr &MI) { 772 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; 773 } 774 775 bool isScalarStore(uint16_t Opcode) const { 776 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; 777 } 778 779 static bool isFixedSize(const MachineInstr &MI) { 780 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; 781 } 782 783 bool isFixedSize(uint16_t Opcode) const { 784 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; 785 } 786 787 static bool hasFPClamp(const MachineInstr &MI) { 788 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; 789 } 790 791 bool hasFPClamp(uint16_t Opcode) const { 792 return get(Opcode).TSFlags & SIInstrFlags::FPClamp; 793 } 794 795 static bool hasIntClamp(const MachineInstr &MI) { 796 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp; 797 } 798 799 uint64_t getClampMask(const MachineInstr &MI) const { 800 const uint64_t ClampFlags = SIInstrFlags::FPClamp | 801 SIInstrFlags::IntClamp | 802 SIInstrFlags::ClampLo | 803 SIInstrFlags::ClampHi; 804 return MI.getDesc().TSFlags & ClampFlags; 805 } 806 807 static bool usesFPDPRounding(const MachineInstr &MI) { 808 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding; 809 } 810 811 bool usesFPDPRounding(uint16_t Opcode) const { 812 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; 813 } 814 815 static bool isFPAtomic(const MachineInstr &MI) { 816 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; 817 } 818 819 bool isFPAtomic(uint16_t Opcode) const { 820 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; 821 } 822 823 static bool isNeverUniform(const MachineInstr &MI) { 824 return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform; 825 } 826 827 static bool doesNotReadTiedSource(const MachineInstr &MI) { 828 return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; 829 } 830 831 bool doesNotReadTiedSource(uint16_t Opcode) const { 832 return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead; 833 } 834 835 bool isVGPRCopy(const MachineInstr &MI) const { 836 assert(isCopyInstr(MI)); 837 Register Dest = MI.getOperand(0).getReg(); 838 const MachineFunction &MF = *MI.getParent()->getParent(); 839 const MachineRegisterInfo &MRI = MF.getRegInfo(); 840 return !RI.isSGPRReg(MRI, Dest); 841 } 842 843 bool hasVGPRUses(const MachineInstr &MI) const { 844 const MachineFunction &MF = *MI.getParent()->getParent(); 845 const MachineRegisterInfo &MRI = MF.getRegInfo(); 846 return llvm::any_of(MI.explicit_uses(), 847 [&MRI, this](const MachineOperand &MO) { 848 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());}); 849 } 850 851 /// Return true if the instruction modifies the mode register.q 852 static bool modifiesModeRegister(const MachineInstr &MI); 853 854 /// Whether we must prevent this instruction from executing with EXEC = 0. 855 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; 856 857 /// Returns true if the instruction could potentially depend on the value of 858 /// exec. If false, exec dependencies may safely be ignored. 859 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; 860 861 bool isInlineConstant(const APInt &Imm) const; 862 863 bool isInlineConstant(const APFloat &Imm) const { 864 return isInlineConstant(Imm.bitcastToAPInt()); 865 } 866 867 // Returns true if this non-register operand definitely does not need to be 868 // encoded as a 32-bit literal. Note that this function handles all kinds of 869 // operands, not just immediates. 870 // 871 // Some operands like FrameIndexes could resolve to an inline immediate value 872 // that will not require an additional 4-bytes; this function assumes that it 873 // will. 874 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; 875 876 bool isInlineConstant(const MachineOperand &MO, 877 const MCOperandInfo &OpInfo) const { 878 return isInlineConstant(MO, OpInfo.OperandType); 879 } 880 881 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would 882 /// be an inline immediate. 883 bool isInlineConstant(const MachineInstr &MI, 884 const MachineOperand &UseMO, 885 const MachineOperand &DefMO) const { 886 assert(UseMO.getParent() == &MI); 887 int OpIdx = UseMO.getOperandNo(); 888 if (OpIdx >= MI.getDesc().NumOperands) 889 return false; 890 891 return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]); 892 } 893 894 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline 895 /// immediate. 896 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const { 897 const MachineOperand &MO = MI.getOperand(OpIdx); 898 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 899 } 900 901 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, 902 const MachineOperand &MO) const { 903 if (OpIdx >= MI.getDesc().NumOperands) 904 return false; 905 906 if (isCopyInstr(MI)) { 907 unsigned Size = getOpSize(MI, OpIdx); 908 assert(Size == 8 || Size == 4); 909 910 uint8_t OpType = (Size == 8) ? 911 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32; 912 return isInlineConstant(MO, OpType); 913 } 914 915 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 916 } 917 918 bool isInlineConstant(const MachineOperand &MO) const { 919 return isInlineConstant(*MO.getParent(), MO.getOperandNo()); 920 } 921 922 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 923 const MachineOperand &MO) const; 924 925 /// Return true if this 64-bit VALU instruction has a 32-bit encoding. 926 /// This function will return false if you pass it a 32-bit instruction. 927 bool hasVALU32BitEncoding(unsigned Opcode) const; 928 929 /// Returns true if this operand uses the constant bus. 930 bool usesConstantBus(const MachineRegisterInfo &MRI, 931 const MachineOperand &MO, 932 const MCOperandInfo &OpInfo) const; 933 934 /// Return true if this instruction has any modifiers. 935 /// e.g. src[012]_mod, omod, clamp. 936 bool hasModifiers(unsigned Opcode) const; 937 938 bool hasModifiersSet(const MachineInstr &MI, 939 unsigned OpName) const; 940 bool hasAnyModifiersSet(const MachineInstr &MI) const; 941 942 bool canShrink(const MachineInstr &MI, 943 const MachineRegisterInfo &MRI) const; 944 945 MachineInstr *buildShrunkInst(MachineInstr &MI, 946 unsigned NewOpcode) const; 947 948 bool verifyInstruction(const MachineInstr &MI, 949 StringRef &ErrInfo) const override; 950 951 unsigned getVALUOp(const MachineInstr &MI) const; 952 953 void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, 954 MachineBasicBlock::iterator MBBI, 955 const DebugLoc &DL, Register Reg, bool IsSCCLive, 956 SlotIndexes *Indexes = nullptr) const; 957 958 void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, 959 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 960 Register Reg, SlotIndexes *Indexes = nullptr) const; 961 962 /// Return the correct register class for \p OpNo. For target-specific 963 /// instructions, this will return the register class that has been defined 964 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return 965 /// the register class of its machine operand. 966 /// to infer the correct register class base on the other operands. 967 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, 968 unsigned OpNo) const; 969 970 /// Return the size in bytes of the operand OpNo on the given 971 // instruction opcode. 972 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { 973 const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo]; 974 975 if (OpInfo.RegClass == -1) { 976 // If this is an immediate operand, this must be a 32-bit literal. 977 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); 978 return 4; 979 } 980 981 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; 982 } 983 984 /// This form should usually be preferred since it handles operands 985 /// with unknown register classes. 986 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { 987 const MachineOperand &MO = MI.getOperand(OpNo); 988 if (MO.isReg()) { 989 if (unsigned SubReg = MO.getSubReg()) { 990 return RI.getSubRegIdxSize(SubReg) / 8; 991 } 992 } 993 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; 994 } 995 996 /// Legalize the \p OpIndex operand of this instruction by inserting 997 /// a MOV. For example: 998 /// ADD_I32_e32 VGPR0, 15 999 /// to 1000 /// MOV VGPR1, 15 1001 /// ADD_I32_e32 VGPR0, VGPR1 1002 /// 1003 /// If the operand being legalized is a register, then a COPY will be used 1004 /// instead of MOV. 1005 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; 1006 1007 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand 1008 /// for \p MI. 1009 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 1010 const MachineOperand *MO = nullptr) const; 1011 1012 /// Check if \p MO would be a valid operand for the given operand 1013 /// definition \p OpInfo. Note this does not attempt to validate constant bus 1014 /// restrictions (e.g. literal constant usage). 1015 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, 1016 const MCOperandInfo &OpInfo, 1017 const MachineOperand &MO) const; 1018 1019 /// Check if \p MO (a register operand) is a legal register for the 1020 /// given operand description. 1021 bool isLegalRegOperand(const MachineRegisterInfo &MRI, 1022 const MCOperandInfo &OpInfo, 1023 const MachineOperand &MO) const; 1024 1025 /// Legalize operands in \p MI by either commuting it or inserting a 1026 /// copy of src1. 1027 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1028 1029 /// Fix operands in \p MI to satisfy constant bus requirements. 1030 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1031 1032 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only 1033 /// be used when it is know that the value in SrcReg is same across all 1034 /// threads in the wave. 1035 /// \returns The SGPR register that \p SrcReg was copied to. 1036 Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, 1037 MachineRegisterInfo &MRI) const; 1038 1039 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1040 void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1041 1042 void legalizeGenericOperand(MachineBasicBlock &InsertMBB, 1043 MachineBasicBlock::iterator I, 1044 const TargetRegisterClass *DstRC, 1045 MachineOperand &Op, MachineRegisterInfo &MRI, 1046 const DebugLoc &DL) const; 1047 1048 /// Legalize all operands in this instruction. This function may create new 1049 /// instructions and control-flow around \p MI. If present, \p MDT is 1050 /// updated. 1051 /// \returns A new basic block that contains \p MI if new blocks were created. 1052 MachineBasicBlock * 1053 legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; 1054 1055 /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand 1056 /// was moved to VGPR. \returns true if succeeded. 1057 bool moveFlatAddrToVGPR(MachineInstr &Inst) const; 1058 1059 /// Replace the instructions opcode with the equivalent VALU 1060 /// opcode. This function will also move the users of MachineInstruntions 1061 /// in the \p WorkList to the VALU if necessary. If present, \p MDT is 1062 /// updated. 1063 void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const; 1064 1065 void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, 1066 MachineInstr &Inst) const; 1067 1068 void insertNoop(MachineBasicBlock &MBB, 1069 MachineBasicBlock::iterator MI) const override; 1070 1071 void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1072 unsigned Quantity) const override; 1073 1074 void insertReturn(MachineBasicBlock &MBB) const; 1075 /// Return the number of wait states that result from executing this 1076 /// instruction. 1077 static unsigned getNumWaitStates(const MachineInstr &MI); 1078 1079 /// Returns the operand named \p Op. If \p MI does not have an 1080 /// operand named \c Op, this function returns nullptr. 1081 LLVM_READONLY 1082 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; 1083 1084 LLVM_READONLY 1085 const MachineOperand *getNamedOperand(const MachineInstr &MI, 1086 unsigned OpName) const { 1087 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName); 1088 } 1089 1090 /// Get required immediate operand 1091 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const { 1092 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); 1093 return MI.getOperand(Idx).getImm(); 1094 } 1095 1096 uint64_t getDefaultRsrcDataFormat() const; 1097 uint64_t getScratchRsrcWords23() const; 1098 1099 bool isLowLatencyInstruction(const MachineInstr &MI) const; 1100 bool isHighLatencyDef(int Opc) const override; 1101 1102 /// Return the descriptor of the target-specific machine instruction 1103 /// that corresponds to the specified pseudo or native opcode. 1104 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { 1105 return get(pseudoToMCOpcode(Opcode)); 1106 } 1107 1108 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1109 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1110 1111 unsigned isLoadFromStackSlot(const MachineInstr &MI, 1112 int &FrameIndex) const override; 1113 unsigned isStoreToStackSlot(const MachineInstr &MI, 1114 int &FrameIndex) const override; 1115 1116 unsigned getInstBundleSize(const MachineInstr &MI) const; 1117 unsigned getInstSizeInBytes(const MachineInstr &MI) const override; 1118 1119 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; 1120 1121 bool isNonUniformBranchInstr(MachineInstr &Instr) const; 1122 1123 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, 1124 MachineBasicBlock *IfEnd) const; 1125 1126 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, 1127 MachineBasicBlock *LoopEnd) const; 1128 1129 std::pair<unsigned, unsigned> 1130 decomposeMachineOperandsTargetFlags(unsigned TF) const override; 1131 1132 ArrayRef<std::pair<int, const char *>> 1133 getSerializableTargetIndices() const override; 1134 1135 ArrayRef<std::pair<unsigned, const char *>> 1136 getSerializableDirectMachineOperandTargetFlags() const override; 1137 1138 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 1139 getSerializableMachineMemOperandTargetFlags() const override; 1140 1141 ScheduleHazardRecognizer * 1142 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 1143 const ScheduleDAG *DAG) const override; 1144 1145 ScheduleHazardRecognizer * 1146 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; 1147 1148 ScheduleHazardRecognizer * 1149 CreateTargetMIHazardRecognizer(const InstrItineraryData *II, 1150 const ScheduleDAGMI *DAG) const override; 1151 1152 unsigned getLiveRangeSplitOpcode(Register Reg, 1153 const MachineFunction &MF) const override; 1154 1155 bool isBasicBlockPrologue(const MachineInstr &MI) const override; 1156 1157 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, 1158 MachineBasicBlock::iterator InsPt, 1159 const DebugLoc &DL, Register Src, 1160 Register Dst) const override; 1161 1162 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, 1163 MachineBasicBlock::iterator InsPt, 1164 const DebugLoc &DL, Register Src, 1165 unsigned SrcSubReg, 1166 Register Dst) const override; 1167 1168 bool isWave32() const; 1169 1170 /// Return a partially built integer add instruction without carry. 1171 /// Caller must add source operands. 1172 /// For pre-GFX9 it will generate unused carry destination operand. 1173 /// TODO: After GFX9 it should return a no-carry operation. 1174 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1175 MachineBasicBlock::iterator I, 1176 const DebugLoc &DL, 1177 Register DestReg) const; 1178 1179 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1180 MachineBasicBlock::iterator I, 1181 const DebugLoc &DL, 1182 Register DestReg, 1183 RegScavenger &RS) const; 1184 1185 static bool isKillTerminator(unsigned Opcode); 1186 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; 1187 1188 static bool isLegalMUBUFImmOffset(unsigned Imm) { 1189 return isUInt<12>(Imm); 1190 } 1191 1192 static unsigned getMaxMUBUFImmOffset(); 1193 1194 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 1195 Align Alignment = Align(4)) const; 1196 1197 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT 1198 /// encoded instruction. If \p Signed, this is for an instruction that 1199 /// interprets the offset as signed. 1200 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, 1201 uint64_t FlatVariant) const; 1202 1203 /// Split \p COffsetVal into {immediate offset field, remainder offset} 1204 /// values. 1205 std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal, 1206 unsigned AddrSpace, 1207 uint64_t FlatVariant) const; 1208 1209 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. 1210 /// Return -1 if the target-specific opcode for the pseudo instruction does 1211 /// not exist. If Opcode is not a pseudo instruction, this is identity. 1212 int pseudoToMCOpcode(int Opcode) const; 1213 1214 /// \brief Check if this instruction should only be used by assembler. 1215 /// Return true if this opcode should not be used by codegen. 1216 bool isAsmOnlyOpcode(int MCOp) const; 1217 1218 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, 1219 const TargetRegisterInfo *TRI, 1220 const MachineFunction &MF) 1221 const override; 1222 1223 void fixImplicitOperands(MachineInstr &MI) const; 1224 1225 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, 1226 ArrayRef<unsigned> Ops, 1227 MachineBasicBlock::iterator InsertPt, 1228 int FrameIndex, 1229 LiveIntervals *LIS = nullptr, 1230 VirtRegMap *VRM = nullptr) const override; 1231 1232 unsigned getInstrLatency(const InstrItineraryData *ItinData, 1233 const MachineInstr &MI, 1234 unsigned *PredCost = nullptr) const override; 1235 1236 InstructionUniformity 1237 getInstructionUniformity(const MachineInstr &MI) const override final; 1238 1239 InstructionUniformity 1240 getGenericInstructionUniformity(const MachineInstr &MI) const; 1241 1242 const MIRFormatter *getMIRFormatter() const override { 1243 if (!Formatter.get()) 1244 Formatter = std::make_unique<AMDGPUMIRFormatter>(); 1245 return Formatter.get(); 1246 } 1247 1248 static unsigned getDSShaderTypeValue(const MachineFunction &MF); 1249 1250 const TargetSchedModel &getSchedModel() const { return SchedModel; } 1251 1252 // Enforce operand's \p OpName even alignment if required by target. 1253 // This is used if an operand is a 32 bit register but needs to be aligned 1254 // regardless. 1255 void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const; 1256 }; 1257 1258 /// \brief Returns true if a reg:subreg pair P has a TRC class 1259 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, 1260 const TargetRegisterClass &TRC, 1261 MachineRegisterInfo &MRI) { 1262 auto *RC = MRI.getRegClass(P.Reg); 1263 if (!P.SubReg) 1264 return RC == &TRC; 1265 auto *TRI = MRI.getTargetRegisterInfo(); 1266 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg); 1267 } 1268 1269 /// \brief Create RegSubRegPair from a register MachineOperand 1270 inline 1271 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) { 1272 assert(O.isReg()); 1273 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg()); 1274 } 1275 1276 /// \brief Return the SubReg component from REG_SEQUENCE 1277 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, 1278 unsigned SubReg); 1279 1280 /// \brief Return the defining instruction for a given reg:subreg pair 1281 /// skipping copy like instructions and subreg-manipulation pseudos. 1282 /// Following another subreg of a reg:subreg isn't supported. 1283 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, 1284 MachineRegisterInfo &MRI); 1285 1286 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1287 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not 1288 /// attempt to track between blocks. 1289 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, 1290 Register VReg, 1291 const MachineInstr &DefMI, 1292 const MachineInstr &UseMI); 1293 1294 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1295 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to 1296 /// track between blocks. 1297 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, 1298 Register VReg, 1299 const MachineInstr &DefMI); 1300 1301 namespace AMDGPU { 1302 1303 LLVM_READONLY 1304 int getVOPe64(uint16_t Opcode); 1305 1306 LLVM_READONLY 1307 int getVOPe32(uint16_t Opcode); 1308 1309 LLVM_READONLY 1310 int getSDWAOp(uint16_t Opcode); 1311 1312 LLVM_READONLY 1313 int getDPPOp32(uint16_t Opcode); 1314 1315 LLVM_READONLY 1316 int getDPPOp64(uint16_t Opcode); 1317 1318 LLVM_READONLY 1319 int getBasicFromSDWAOp(uint16_t Opcode); 1320 1321 LLVM_READONLY 1322 int getCommuteRev(uint16_t Opcode); 1323 1324 LLVM_READONLY 1325 int getCommuteOrig(uint16_t Opcode); 1326 1327 LLVM_READONLY 1328 int getAddr64Inst(uint16_t Opcode); 1329 1330 /// Check if \p Opcode is an Addr64 opcode. 1331 /// 1332 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1. 1333 LLVM_READONLY 1334 int getIfAddr64Inst(uint16_t Opcode); 1335 1336 LLVM_READONLY 1337 int getAtomicNoRetOp(uint16_t Opcode); 1338 1339 LLVM_READONLY 1340 int getSOPKOp(uint16_t Opcode); 1341 1342 /// \returns SADDR form of a FLAT Global instruction given an \p Opcode 1343 /// of a VADDR form. 1344 LLVM_READONLY 1345 int getGlobalSaddrOp(uint16_t Opcode); 1346 1347 /// \returns VADDR form of a FLAT Global instruction given an \p Opcode 1348 /// of a SADDR form. 1349 LLVM_READONLY 1350 int getGlobalVaddrOp(uint16_t Opcode); 1351 1352 LLVM_READONLY 1353 int getVCMPXNoSDstOp(uint16_t Opcode); 1354 1355 /// \returns ST form with only immediate offset of a FLAT Scratch instruction 1356 /// given an \p Opcode of an SS (SADDR) form. 1357 LLVM_READONLY 1358 int getFlatScratchInstSTfromSS(uint16_t Opcode); 1359 1360 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1361 /// of an SVS (SADDR + VADDR) form. 1362 LLVM_READONLY 1363 int getFlatScratchInstSVfromSVS(uint16_t Opcode); 1364 1365 /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode 1366 /// of an SV (VADDR) form. 1367 LLVM_READONLY 1368 int getFlatScratchInstSSfromSV(uint16_t Opcode); 1369 1370 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1371 /// of an SS (SADDR) form. 1372 LLVM_READONLY 1373 int getFlatScratchInstSVfromSS(uint16_t Opcode); 1374 1375 /// \returns earlyclobber version of a MAC MFMA is exists. 1376 LLVM_READONLY 1377 int getMFMAEarlyClobberOp(uint16_t Opcode); 1378 1379 /// \returns v_cmpx version of a v_cmp instruction. 1380 LLVM_READONLY 1381 int getVCMPXOpFromVCMP(uint16_t Opcode); 1382 1383 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; 1384 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); 1385 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); 1386 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); 1387 1388 } // end namespace AMDGPU 1389 1390 namespace SI { 1391 namespace KernelInputOffsets { 1392 1393 /// Offsets in bytes from the start of the input buffer 1394 enum Offsets { 1395 NGROUPS_X = 0, 1396 NGROUPS_Y = 4, 1397 NGROUPS_Z = 8, 1398 GLOBAL_SIZE_X = 12, 1399 GLOBAL_SIZE_Y = 16, 1400 GLOBAL_SIZE_Z = 20, 1401 LOCAL_SIZE_X = 24, 1402 LOCAL_SIZE_Y = 28, 1403 LOCAL_SIZE_Z = 32 1404 }; 1405 1406 } // end namespace KernelInputOffsets 1407 } // end namespace SI 1408 1409 } // end namespace llvm 1410 1411 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 1412