1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition for SIInstrInfo. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 16 17 #include "AMDGPUMIRFormatter.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIRegisterInfo.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/CodeGen/TargetInstrInfo.h" 23 #include "llvm/CodeGen/TargetSchedule.h" 24 25 #define GET_INSTRINFO_HEADER 26 #include "AMDGPUGenInstrInfo.inc" 27 28 namespace llvm { 29 30 class APInt; 31 class GCNSubtarget; 32 class LiveVariables; 33 class MachineDominatorTree; 34 class MachineRegisterInfo; 35 class RegScavenger; 36 class TargetRegisterClass; 37 class ScheduleHazardRecognizer; 38 39 /// Mark the MMO of a uniform load if there are no potentially clobbering stores 40 /// on any path from the start of an entry function to this load. 41 static const MachineMemOperand::Flags MONoClobber = 42 MachineMemOperand::MOTargetFlag1; 43 44 /// Mark the MMO of a load as the last use. 45 static const MachineMemOperand::Flags MOLastUse = 46 MachineMemOperand::MOTargetFlag2; 47 48 /// Utility to store machine instructions worklist. 49 struct SIInstrWorklist { 50 SIInstrWorklist() = default; 51 52 void insert(MachineInstr *MI); 53 54 MachineInstr *top() const { 55 auto iter = InstrList.begin(); 56 return *iter; 57 } 58 59 void erase_top() { 60 auto iter = InstrList.begin(); 61 InstrList.erase(iter); 62 } 63 64 bool empty() const { return InstrList.empty(); } 65 66 void clear() { 67 InstrList.clear(); 68 DeferredList.clear(); 69 } 70 71 bool isDeferred(MachineInstr *MI); 72 73 SetVector<MachineInstr *> &getDeferredList() { return DeferredList; } 74 75 private: 76 /// InstrList contains the MachineInstrs. 77 SetVector<MachineInstr *> InstrList; 78 /// Deferred instructions are specific MachineInstr 79 /// that will be added by insert method. 80 SetVector<MachineInstr *> DeferredList; 81 }; 82 83 class SIInstrInfo final : public AMDGPUGenInstrInfo { 84 private: 85 const SIRegisterInfo RI; 86 const GCNSubtarget &ST; 87 TargetSchedModel SchedModel; 88 mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter; 89 90 // The inverse predicate should have the negative value. 91 enum BranchPredicate { 92 INVALID_BR = 0, 93 SCC_TRUE = 1, 94 SCC_FALSE = -1, 95 VCCNZ = 2, 96 VCCZ = -2, 97 EXECNZ = -3, 98 EXECZ = 3 99 }; 100 101 using SetVectorType = SmallSetVector<MachineInstr *, 32>; 102 103 static unsigned getBranchOpcode(BranchPredicate Cond); 104 static BranchPredicate getBranchPredicate(unsigned Opcode); 105 106 public: 107 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, 108 MachineRegisterInfo &MRI, 109 const MachineOperand &SuperReg, 110 const TargetRegisterClass *SuperRC, 111 unsigned SubIdx, 112 const TargetRegisterClass *SubRC) const; 113 MachineOperand buildExtractSubRegOrImm( 114 MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, 115 const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, 116 unsigned SubIdx, const TargetRegisterClass *SubRC) const; 117 118 private: 119 void swapOperands(MachineInstr &Inst) const; 120 121 std::pair<bool, MachineBasicBlock *> 122 moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst, 123 MachineDominatorTree *MDT = nullptr) const; 124 125 void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, 126 MachineDominatorTree *MDT = nullptr) const; 127 128 void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 129 130 void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 131 132 void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst, 133 unsigned Opcode) const; 134 135 void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst, 136 unsigned Opcode) const; 137 138 void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 139 unsigned Opcode, bool Swap = false) const; 140 141 void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 142 unsigned Opcode, 143 MachineDominatorTree *MDT = nullptr) const; 144 145 void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst, 146 MachineDominatorTree *MDT) const; 147 148 void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst, 149 MachineDominatorTree *MDT) const; 150 151 void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst, 152 MachineDominatorTree *MDT = nullptr) const; 153 154 void splitScalar64BitBCNT(SIInstrWorklist &Worklist, 155 MachineInstr &Inst) const; 156 void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 157 void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 158 unsigned Opcode, 159 MachineDominatorTree *MDT = nullptr) const; 160 void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI, 161 MachineInstr &Inst) const; 162 163 void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI, 164 SIInstrWorklist &Worklist) const; 165 166 void addSCCDefUsersToVALUWorklist(MachineOperand &Op, 167 MachineInstr &SCCDefInst, 168 SIInstrWorklist &Worklist, 169 Register NewCond = Register()) const; 170 void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, 171 SIInstrWorklist &Worklist) const; 172 173 const TargetRegisterClass * 174 getDestEquivalentVGPRClass(const MachineInstr &Inst) const; 175 176 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, 177 const MachineInstr &MIb) const; 178 179 Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; 180 181 protected: 182 /// If the specific machine instruction is a instruction that moves/copies 183 /// value from one register to another register return destination and source 184 /// registers as machine operands. 185 std::optional<DestSourcePair> 186 isCopyInstrImpl(const MachineInstr &MI) const override; 187 188 bool swapSourceModifiers(MachineInstr &MI, 189 MachineOperand &Src0, unsigned Src0OpName, 190 MachineOperand &Src1, unsigned Src1OpName) const; 191 192 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, 193 unsigned OpIdx0, 194 unsigned OpIdx1) const override; 195 196 public: 197 enum TargetOperandFlags { 198 MO_MASK = 0xf, 199 200 MO_NONE = 0, 201 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. 202 MO_GOTPCREL = 1, 203 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. 204 MO_GOTPCREL32 = 2, 205 MO_GOTPCREL32_LO = 2, 206 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. 207 MO_GOTPCREL32_HI = 3, 208 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. 209 MO_REL32 = 4, 210 MO_REL32_LO = 4, 211 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. 212 MO_REL32_HI = 5, 213 214 MO_FAR_BRANCH_OFFSET = 6, 215 216 MO_ABS32_LO = 8, 217 MO_ABS32_HI = 9, 218 }; 219 220 explicit SIInstrInfo(const GCNSubtarget &ST); 221 222 const SIRegisterInfo &getRegisterInfo() const { 223 return RI; 224 } 225 226 const GCNSubtarget &getSubtarget() const { 227 return ST; 228 } 229 230 bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; 231 232 bool isIgnorableUse(const MachineOperand &MO) const override; 233 234 bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, 235 MachineCycleInfo *CI) const override; 236 237 bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, 238 int64_t &Offset1) const override; 239 240 bool getMemOperandsWithOffsetWidth( 241 const MachineInstr &LdSt, 242 SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset, 243 bool &OffsetIsScalable, LocationSize &Width, 244 const TargetRegisterInfo *TRI) const final; 245 246 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, 247 int64_t Offset1, bool OffsetIsScalable1, 248 ArrayRef<const MachineOperand *> BaseOps2, 249 int64_t Offset2, bool OffsetIsScalable2, 250 unsigned ClusterSize, 251 unsigned NumBytes) const override; 252 253 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, 254 int64_t Offset1, unsigned NumLoads) const override; 255 256 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 257 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, 258 bool KillSrc) const override; 259 260 void materializeImmediate(MachineBasicBlock &MBB, 261 MachineBasicBlock::iterator MI, const DebugLoc &DL, 262 Register DestReg, int64_t Value) const; 263 264 const TargetRegisterClass *getPreferredSelectRegClass( 265 unsigned Size) const; 266 267 Register insertNE(MachineBasicBlock *MBB, 268 MachineBasicBlock::iterator I, const DebugLoc &DL, 269 Register SrcReg, int Value) const; 270 271 Register insertEQ(MachineBasicBlock *MBB, 272 MachineBasicBlock::iterator I, const DebugLoc &DL, 273 Register SrcReg, int Value) const; 274 275 void storeRegToStackSlot(MachineBasicBlock &MBB, 276 MachineBasicBlock::iterator MI, Register SrcReg, 277 bool isKill, int FrameIndex, 278 const TargetRegisterClass *RC, 279 const TargetRegisterInfo *TRI, 280 Register VReg) const override; 281 282 void loadRegFromStackSlot(MachineBasicBlock &MBB, 283 MachineBasicBlock::iterator MI, Register DestReg, 284 int FrameIndex, const TargetRegisterClass *RC, 285 const TargetRegisterInfo *TRI, 286 Register VReg) const override; 287 288 bool expandPostRAPseudo(MachineInstr &MI) const override; 289 290 void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 291 Register DestReg, unsigned SubIdx, 292 const MachineInstr &Orig, 293 const TargetRegisterInfo &TRI) const override; 294 295 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp 296 // instructions. Returns a pair of generated instructions. 297 // Can split either post-RA with physical registers or pre-RA with 298 // virtual registers. In latter case IR needs to be in SSA form and 299 // and a REG_SEQUENCE is produced to define original register. 300 std::pair<MachineInstr*, MachineInstr*> 301 expandMovDPP64(MachineInstr &MI) const; 302 303 // Returns an opcode that can be used to move a value to a \p DstRC 304 // register. If there is no hardware instruction that can store to \p 305 // DstRC, then AMDGPU::COPY is returned. 306 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; 307 308 const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize, 309 unsigned EltSize, 310 bool IsSGPR) const; 311 312 const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize, 313 bool IsIndirectSrc) const; 314 LLVM_READONLY 315 int commuteOpcode(unsigned Opc) const; 316 317 LLVM_READONLY 318 inline int commuteOpcode(const MachineInstr &MI) const { 319 return commuteOpcode(MI.getOpcode()); 320 } 321 322 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, 323 unsigned &SrcOpIdx1) const override; 324 325 bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0, 326 unsigned &SrcOpIdx1) const; 327 328 bool isBranchOffsetInRange(unsigned BranchOpc, 329 int64_t BrOffset) const override; 330 331 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; 332 333 /// Return whether the block terminate with divergent branch. 334 /// Note this only work before lowering the pseudo control flow instructions. 335 bool hasDivergentBranch(const MachineBasicBlock *MBB) const; 336 337 void insertIndirectBranch(MachineBasicBlock &MBB, 338 MachineBasicBlock &NewDestBB, 339 MachineBasicBlock &RestoreBB, const DebugLoc &DL, 340 int64_t BrOffset, RegScavenger *RS) const override; 341 342 bool analyzeBranchImpl(MachineBasicBlock &MBB, 343 MachineBasicBlock::iterator I, 344 MachineBasicBlock *&TBB, 345 MachineBasicBlock *&FBB, 346 SmallVectorImpl<MachineOperand> &Cond, 347 bool AllowModify) const; 348 349 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 350 MachineBasicBlock *&FBB, 351 SmallVectorImpl<MachineOperand> &Cond, 352 bool AllowModify = false) const override; 353 354 unsigned removeBranch(MachineBasicBlock &MBB, 355 int *BytesRemoved = nullptr) const override; 356 357 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 358 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, 359 const DebugLoc &DL, 360 int *BytesAdded = nullptr) const override; 361 362 bool reverseBranchCondition( 363 SmallVectorImpl<MachineOperand> &Cond) const override; 364 365 bool canInsertSelect(const MachineBasicBlock &MBB, 366 ArrayRef<MachineOperand> Cond, Register DstReg, 367 Register TrueReg, Register FalseReg, int &CondCycles, 368 int &TrueCycles, int &FalseCycles) const override; 369 370 void insertSelect(MachineBasicBlock &MBB, 371 MachineBasicBlock::iterator I, const DebugLoc &DL, 372 Register DstReg, ArrayRef<MachineOperand> Cond, 373 Register TrueReg, Register FalseReg) const override; 374 375 void insertVectorSelect(MachineBasicBlock &MBB, 376 MachineBasicBlock::iterator I, const DebugLoc &DL, 377 Register DstReg, ArrayRef<MachineOperand> Cond, 378 Register TrueReg, Register FalseReg) const; 379 380 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, 381 Register &SrcReg2, int64_t &CmpMask, 382 int64_t &CmpValue) const override; 383 384 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, 385 Register SrcReg2, int64_t CmpMask, int64_t CmpValue, 386 const MachineRegisterInfo *MRI) const override; 387 388 bool 389 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 390 const MachineInstr &MIb) const override; 391 392 static bool isFoldableCopy(const MachineInstr &MI); 393 394 void removeModOperands(MachineInstr &MI) const; 395 396 bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, 397 MachineRegisterInfo *MRI) const final; 398 399 unsigned getMachineCSELookAheadLimit() const override { return 500; } 400 401 MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, 402 LiveIntervals *LIS) const override; 403 404 bool isSchedulingBoundary(const MachineInstr &MI, 405 const MachineBasicBlock *MBB, 406 const MachineFunction &MF) const override; 407 408 static bool isSALU(const MachineInstr &MI) { 409 return MI.getDesc().TSFlags & SIInstrFlags::SALU; 410 } 411 412 bool isSALU(uint16_t Opcode) const { 413 return get(Opcode).TSFlags & SIInstrFlags::SALU; 414 } 415 416 static bool isVALU(const MachineInstr &MI) { 417 return MI.getDesc().TSFlags & SIInstrFlags::VALU; 418 } 419 420 bool isVALU(uint16_t Opcode) const { 421 return get(Opcode).TSFlags & SIInstrFlags::VALU; 422 } 423 424 static bool isImage(const MachineInstr &MI) { 425 return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI); 426 } 427 428 bool isImage(uint16_t Opcode) const { 429 return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode); 430 } 431 432 static bool isVMEM(const MachineInstr &MI) { 433 return isMUBUF(MI) || isMTBUF(MI) || isImage(MI); 434 } 435 436 bool isVMEM(uint16_t Opcode) const { 437 return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode); 438 } 439 440 static bool isSOP1(const MachineInstr &MI) { 441 return MI.getDesc().TSFlags & SIInstrFlags::SOP1; 442 } 443 444 bool isSOP1(uint16_t Opcode) const { 445 return get(Opcode).TSFlags & SIInstrFlags::SOP1; 446 } 447 448 static bool isSOP2(const MachineInstr &MI) { 449 return MI.getDesc().TSFlags & SIInstrFlags::SOP2; 450 } 451 452 bool isSOP2(uint16_t Opcode) const { 453 return get(Opcode).TSFlags & SIInstrFlags::SOP2; 454 } 455 456 static bool isSOPC(const MachineInstr &MI) { 457 return MI.getDesc().TSFlags & SIInstrFlags::SOPC; 458 } 459 460 bool isSOPC(uint16_t Opcode) const { 461 return get(Opcode).TSFlags & SIInstrFlags::SOPC; 462 } 463 464 static bool isSOPK(const MachineInstr &MI) { 465 return MI.getDesc().TSFlags & SIInstrFlags::SOPK; 466 } 467 468 bool isSOPK(uint16_t Opcode) const { 469 return get(Opcode).TSFlags & SIInstrFlags::SOPK; 470 } 471 472 static bool isSOPP(const MachineInstr &MI) { 473 return MI.getDesc().TSFlags & SIInstrFlags::SOPP; 474 } 475 476 bool isSOPP(uint16_t Opcode) const { 477 return get(Opcode).TSFlags & SIInstrFlags::SOPP; 478 } 479 480 static bool isPacked(const MachineInstr &MI) { 481 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked; 482 } 483 484 bool isPacked(uint16_t Opcode) const { 485 return get(Opcode).TSFlags & SIInstrFlags::IsPacked; 486 } 487 488 static bool isVOP1(const MachineInstr &MI) { 489 return MI.getDesc().TSFlags & SIInstrFlags::VOP1; 490 } 491 492 bool isVOP1(uint16_t Opcode) const { 493 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 494 } 495 496 static bool isVOP2(const MachineInstr &MI) { 497 return MI.getDesc().TSFlags & SIInstrFlags::VOP2; 498 } 499 500 bool isVOP2(uint16_t Opcode) const { 501 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 502 } 503 504 static bool isVOP3(const MachineInstr &MI) { 505 return MI.getDesc().TSFlags & SIInstrFlags::VOP3; 506 } 507 508 bool isVOP3(uint16_t Opcode) const { 509 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 510 } 511 512 static bool isSDWA(const MachineInstr &MI) { 513 return MI.getDesc().TSFlags & SIInstrFlags::SDWA; 514 } 515 516 bool isSDWA(uint16_t Opcode) const { 517 return get(Opcode).TSFlags & SIInstrFlags::SDWA; 518 } 519 520 static bool isVOPC(const MachineInstr &MI) { 521 return MI.getDesc().TSFlags & SIInstrFlags::VOPC; 522 } 523 524 bool isVOPC(uint16_t Opcode) const { 525 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 526 } 527 528 static bool isMUBUF(const MachineInstr &MI) { 529 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; 530 } 531 532 bool isMUBUF(uint16_t Opcode) const { 533 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; 534 } 535 536 static bool isMTBUF(const MachineInstr &MI) { 537 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; 538 } 539 540 bool isMTBUF(uint16_t Opcode) const { 541 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; 542 } 543 544 static bool isSMRD(const MachineInstr &MI) { 545 return MI.getDesc().TSFlags & SIInstrFlags::SMRD; 546 } 547 548 bool isSMRD(uint16_t Opcode) const { 549 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 550 } 551 552 bool isBufferSMRD(const MachineInstr &MI) const; 553 554 static bool isDS(const MachineInstr &MI) { 555 return MI.getDesc().TSFlags & SIInstrFlags::DS; 556 } 557 558 bool isDS(uint16_t Opcode) const { 559 return get(Opcode).TSFlags & SIInstrFlags::DS; 560 } 561 562 static bool isLDSDMA(const MachineInstr &MI) { 563 return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI)); 564 } 565 566 bool isLDSDMA(uint16_t Opcode) { 567 return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode)); 568 } 569 570 static bool isGWS(const MachineInstr &MI) { 571 return MI.getDesc().TSFlags & SIInstrFlags::GWS; 572 } 573 574 bool isGWS(uint16_t Opcode) const { 575 return get(Opcode).TSFlags & SIInstrFlags::GWS; 576 } 577 578 bool isAlwaysGDS(uint16_t Opcode) const; 579 580 static bool isMIMG(const MachineInstr &MI) { 581 return MI.getDesc().TSFlags & SIInstrFlags::MIMG; 582 } 583 584 bool isMIMG(uint16_t Opcode) const { 585 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 586 } 587 588 static bool isVIMAGE(const MachineInstr &MI) { 589 return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE; 590 } 591 592 bool isVIMAGE(uint16_t Opcode) const { 593 return get(Opcode).TSFlags & SIInstrFlags::VIMAGE; 594 } 595 596 static bool isVSAMPLE(const MachineInstr &MI) { 597 return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE; 598 } 599 600 bool isVSAMPLE(uint16_t Opcode) const { 601 return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE; 602 } 603 604 static bool isGather4(const MachineInstr &MI) { 605 return MI.getDesc().TSFlags & SIInstrFlags::Gather4; 606 } 607 608 bool isGather4(uint16_t Opcode) const { 609 return get(Opcode).TSFlags & SIInstrFlags::Gather4; 610 } 611 612 static bool isFLAT(const MachineInstr &MI) { 613 return MI.getDesc().TSFlags & SIInstrFlags::FLAT; 614 } 615 616 // Is a FLAT encoded instruction which accesses a specific segment, 617 // i.e. global_* or scratch_*. 618 static bool isSegmentSpecificFLAT(const MachineInstr &MI) { 619 auto Flags = MI.getDesc().TSFlags; 620 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 621 } 622 623 bool isSegmentSpecificFLAT(uint16_t Opcode) const { 624 auto Flags = get(Opcode).TSFlags; 625 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 626 } 627 628 static bool isFLATGlobal(const MachineInstr &MI) { 629 return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal; 630 } 631 632 bool isFLATGlobal(uint16_t Opcode) const { 633 return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal; 634 } 635 636 static bool isFLATScratch(const MachineInstr &MI) { 637 return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch; 638 } 639 640 bool isFLATScratch(uint16_t Opcode) const { 641 return get(Opcode).TSFlags & SIInstrFlags::FlatScratch; 642 } 643 644 // Any FLAT encoded instruction, including global_* and scratch_*. 645 bool isFLAT(uint16_t Opcode) const { 646 return get(Opcode).TSFlags & SIInstrFlags::FLAT; 647 } 648 649 static bool isEXP(const MachineInstr &MI) { 650 return MI.getDesc().TSFlags & SIInstrFlags::EXP; 651 } 652 653 static bool isDualSourceBlendEXP(const MachineInstr &MI) { 654 if (!isEXP(MI)) 655 return false; 656 unsigned Target = MI.getOperand(0).getImm(); 657 return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 || 658 Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1; 659 } 660 661 bool isEXP(uint16_t Opcode) const { 662 return get(Opcode).TSFlags & SIInstrFlags::EXP; 663 } 664 665 static bool isAtomicNoRet(const MachineInstr &MI) { 666 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet; 667 } 668 669 bool isAtomicNoRet(uint16_t Opcode) const { 670 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet; 671 } 672 673 static bool isAtomicRet(const MachineInstr &MI) { 674 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet; 675 } 676 677 bool isAtomicRet(uint16_t Opcode) const { 678 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet; 679 } 680 681 static bool isAtomic(const MachineInstr &MI) { 682 return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet | 683 SIInstrFlags::IsAtomicNoRet); 684 } 685 686 bool isAtomic(uint16_t Opcode) const { 687 return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet | 688 SIInstrFlags::IsAtomicNoRet); 689 } 690 691 static bool mayWriteLDSThroughDMA(const MachineInstr &MI) { 692 return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD; 693 } 694 695 static bool isWQM(const MachineInstr &MI) { 696 return MI.getDesc().TSFlags & SIInstrFlags::WQM; 697 } 698 699 bool isWQM(uint16_t Opcode) const { 700 return get(Opcode).TSFlags & SIInstrFlags::WQM; 701 } 702 703 static bool isDisableWQM(const MachineInstr &MI) { 704 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; 705 } 706 707 bool isDisableWQM(uint16_t Opcode) const { 708 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; 709 } 710 711 // SI_SPILL_S32_TO_VGPR and SI_RESTORE_S32_FROM_VGPR form a special case of 712 // SGPRs spilling to VGPRs which are SGPR spills but from VALU instructions 713 // therefore we need an explicit check for them since just checking if the 714 // Spill bit is set and what instruction type it came from misclassifies 715 // them. 716 static bool isVGPRSpill(const MachineInstr &MI) { 717 return MI.getOpcode() != AMDGPU::SI_SPILL_S32_TO_VGPR && 718 MI.getOpcode() != AMDGPU::SI_RESTORE_S32_FROM_VGPR && 719 (isSpill(MI) && isVALU(MI)); 720 } 721 722 bool isVGPRSpill(uint16_t Opcode) const { 723 return Opcode != AMDGPU::SI_SPILL_S32_TO_VGPR && 724 Opcode != AMDGPU::SI_RESTORE_S32_FROM_VGPR && 725 (isSpill(Opcode) && isVALU(Opcode)); 726 } 727 728 static bool isSGPRSpill(const MachineInstr &MI) { 729 return MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR || 730 MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR || 731 (isSpill(MI) && isSALU(MI)); 732 } 733 734 bool isSGPRSpill(uint16_t Opcode) const { 735 return Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR || 736 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR || 737 (isSpill(Opcode) && isSALU(Opcode)); 738 } 739 740 bool isSpill(uint16_t Opcode) const { 741 return get(Opcode).TSFlags & SIInstrFlags::Spill; 742 } 743 744 static bool isSpill(const MachineInstr &MI) { 745 return MI.getDesc().TSFlags & SIInstrFlags::Spill; 746 } 747 748 static bool isWWMRegSpillOpcode(uint16_t Opcode) { 749 return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE || 750 Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE || 751 Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE || 752 Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE; 753 } 754 755 static bool isChainCallOpcode(uint64_t Opcode) { 756 return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 || 757 Opcode == AMDGPU::SI_CS_CHAIN_TC_W64; 758 } 759 760 static bool isDPP(const MachineInstr &MI) { 761 return MI.getDesc().TSFlags & SIInstrFlags::DPP; 762 } 763 764 bool isDPP(uint16_t Opcode) const { 765 return get(Opcode).TSFlags & SIInstrFlags::DPP; 766 } 767 768 static bool isTRANS(const MachineInstr &MI) { 769 return MI.getDesc().TSFlags & SIInstrFlags::TRANS; 770 } 771 772 bool isTRANS(uint16_t Opcode) const { 773 return get(Opcode).TSFlags & SIInstrFlags::TRANS; 774 } 775 776 static bool isVOP3P(const MachineInstr &MI) { 777 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; 778 } 779 780 bool isVOP3P(uint16_t Opcode) const { 781 return get(Opcode).TSFlags & SIInstrFlags::VOP3P; 782 } 783 784 static bool isVINTRP(const MachineInstr &MI) { 785 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; 786 } 787 788 bool isVINTRP(uint16_t Opcode) const { 789 return get(Opcode).TSFlags & SIInstrFlags::VINTRP; 790 } 791 792 static bool isMAI(const MachineInstr &MI) { 793 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI; 794 } 795 796 bool isMAI(uint16_t Opcode) const { 797 return get(Opcode).TSFlags & SIInstrFlags::IsMAI; 798 } 799 800 static bool isMFMA(const MachineInstr &MI) { 801 return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 802 MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; 803 } 804 805 static bool isDOT(const MachineInstr &MI) { 806 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; 807 } 808 809 static bool isWMMA(const MachineInstr &MI) { 810 return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA; 811 } 812 813 bool isWMMA(uint16_t Opcode) const { 814 return get(Opcode).TSFlags & SIInstrFlags::IsWMMA; 815 } 816 817 static bool isMFMAorWMMA(const MachineInstr &MI) { 818 return isMFMA(MI) || isWMMA(MI) || isSWMMAC(MI); 819 } 820 821 static bool isSWMMAC(const MachineInstr &MI) { 822 return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC; 823 } 824 825 bool isSWMMAC(uint16_t Opcode) const { 826 return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC; 827 } 828 829 bool isDOT(uint16_t Opcode) const { 830 return get(Opcode).TSFlags & SIInstrFlags::IsDOT; 831 } 832 833 static bool isLDSDIR(const MachineInstr &MI) { 834 return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR; 835 } 836 837 bool isLDSDIR(uint16_t Opcode) const { 838 return get(Opcode).TSFlags & SIInstrFlags::LDSDIR; 839 } 840 841 static bool isVINTERP(const MachineInstr &MI) { 842 return MI.getDesc().TSFlags & SIInstrFlags::VINTERP; 843 } 844 845 bool isVINTERP(uint16_t Opcode) const { 846 return get(Opcode).TSFlags & SIInstrFlags::VINTERP; 847 } 848 849 static bool isScalarUnit(const MachineInstr &MI) { 850 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); 851 } 852 853 static bool usesVM_CNT(const MachineInstr &MI) { 854 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; 855 } 856 857 static bool usesLGKM_CNT(const MachineInstr &MI) { 858 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; 859 } 860 861 // Most sopk treat the immediate as a signed 16-bit, however some 862 // use it as unsigned. 863 static bool sopkIsZext(unsigned Opcode) { 864 return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 || 865 Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 || 866 Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 || 867 Opcode == AMDGPU::S_GETREG_B32; 868 } 869 870 /// \returns true if this is an s_store_dword* instruction. This is more 871 /// specific than isSMEM && mayStore. 872 static bool isScalarStore(const MachineInstr &MI) { 873 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; 874 } 875 876 bool isScalarStore(uint16_t Opcode) const { 877 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; 878 } 879 880 static bool isFixedSize(const MachineInstr &MI) { 881 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; 882 } 883 884 bool isFixedSize(uint16_t Opcode) const { 885 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; 886 } 887 888 static bool hasFPClamp(const MachineInstr &MI) { 889 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; 890 } 891 892 bool hasFPClamp(uint16_t Opcode) const { 893 return get(Opcode).TSFlags & SIInstrFlags::FPClamp; 894 } 895 896 static bool hasIntClamp(const MachineInstr &MI) { 897 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp; 898 } 899 900 uint64_t getClampMask(const MachineInstr &MI) const { 901 const uint64_t ClampFlags = SIInstrFlags::FPClamp | 902 SIInstrFlags::IntClamp | 903 SIInstrFlags::ClampLo | 904 SIInstrFlags::ClampHi; 905 return MI.getDesc().TSFlags & ClampFlags; 906 } 907 908 static bool usesFPDPRounding(const MachineInstr &MI) { 909 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding; 910 } 911 912 bool usesFPDPRounding(uint16_t Opcode) const { 913 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; 914 } 915 916 static bool isFPAtomic(const MachineInstr &MI) { 917 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; 918 } 919 920 bool isFPAtomic(uint16_t Opcode) const { 921 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; 922 } 923 924 static bool isNeverUniform(const MachineInstr &MI) { 925 return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform; 926 } 927 928 // Check to see if opcode is for a barrier start. Pre gfx12 this is just the 929 // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want 930 // to check for the barrier start (S_BARRIER_SIGNAL*) 931 bool isBarrierStart(unsigned Opcode) const { 932 return Opcode == AMDGPU::S_BARRIER || 933 Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 || 934 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 || 935 Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM || 936 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM; 937 } 938 939 bool isBarrier(unsigned Opcode) const { 940 return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT || 941 Opcode == AMDGPU::S_BARRIER_INIT_M0 || 942 Opcode == AMDGPU::S_BARRIER_INIT_IMM || 943 Opcode == AMDGPU::S_BARRIER_JOIN_IMM || 944 Opcode == AMDGPU::S_BARRIER_LEAVE || 945 Opcode == AMDGPU::DS_GWS_INIT || 946 Opcode == AMDGPU::DS_GWS_BARRIER; 947 } 948 949 static bool isF16PseudoScalarTrans(unsigned Opcode) { 950 return Opcode == AMDGPU::V_S_EXP_F16_e64 || 951 Opcode == AMDGPU::V_S_LOG_F16_e64 || 952 Opcode == AMDGPU::V_S_RCP_F16_e64 || 953 Opcode == AMDGPU::V_S_RSQ_F16_e64 || 954 Opcode == AMDGPU::V_S_SQRT_F16_e64; 955 } 956 957 static bool doesNotReadTiedSource(const MachineInstr &MI) { 958 return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; 959 } 960 961 bool doesNotReadTiedSource(uint16_t Opcode) const { 962 return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead; 963 } 964 965 static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) { 966 switch (Opcode) { 967 case AMDGPU::S_WAITCNT_soft: 968 return AMDGPU::S_WAITCNT; 969 case AMDGPU::S_WAITCNT_VSCNT_soft: 970 return AMDGPU::S_WAITCNT_VSCNT; 971 case AMDGPU::S_WAIT_LOADCNT_soft: 972 return AMDGPU::S_WAIT_LOADCNT; 973 case AMDGPU::S_WAIT_STORECNT_soft: 974 return AMDGPU::S_WAIT_STORECNT; 975 case AMDGPU::S_WAIT_SAMPLECNT_soft: 976 return AMDGPU::S_WAIT_SAMPLECNT; 977 case AMDGPU::S_WAIT_BVHCNT_soft: 978 return AMDGPU::S_WAIT_BVHCNT; 979 case AMDGPU::S_WAIT_DSCNT_soft: 980 return AMDGPU::S_WAIT_DSCNT; 981 case AMDGPU::S_WAIT_KMCNT_soft: 982 return AMDGPU::S_WAIT_KMCNT; 983 default: 984 return Opcode; 985 } 986 } 987 988 bool isWaitcnt(unsigned Opcode) const { 989 switch (getNonSoftWaitcntOpcode(Opcode)) { 990 case AMDGPU::S_WAITCNT: 991 case AMDGPU::S_WAITCNT_VSCNT: 992 case AMDGPU::S_WAITCNT_VMCNT: 993 case AMDGPU::S_WAITCNT_EXPCNT: 994 case AMDGPU::S_WAITCNT_LGKMCNT: 995 case AMDGPU::S_WAIT_LOADCNT: 996 case AMDGPU::S_WAIT_LOADCNT_DSCNT: 997 case AMDGPU::S_WAIT_STORECNT: 998 case AMDGPU::S_WAIT_STORECNT_DSCNT: 999 case AMDGPU::S_WAIT_SAMPLECNT: 1000 case AMDGPU::S_WAIT_BVHCNT: 1001 case AMDGPU::S_WAIT_EXPCNT: 1002 case AMDGPU::S_WAIT_DSCNT: 1003 case AMDGPU::S_WAIT_KMCNT: 1004 case AMDGPU::S_WAIT_IDLE: 1005 return true; 1006 default: 1007 return false; 1008 } 1009 } 1010 1011 bool isVGPRCopy(const MachineInstr &MI) const { 1012 assert(isCopyInstr(MI)); 1013 Register Dest = MI.getOperand(0).getReg(); 1014 const MachineFunction &MF = *MI.getParent()->getParent(); 1015 const MachineRegisterInfo &MRI = MF.getRegInfo(); 1016 return !RI.isSGPRReg(MRI, Dest); 1017 } 1018 1019 bool hasVGPRUses(const MachineInstr &MI) const { 1020 const MachineFunction &MF = *MI.getParent()->getParent(); 1021 const MachineRegisterInfo &MRI = MF.getRegInfo(); 1022 return llvm::any_of(MI.explicit_uses(), 1023 [&MRI, this](const MachineOperand &MO) { 1024 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());}); 1025 } 1026 1027 /// Return true if the instruction modifies the mode register.q 1028 static bool modifiesModeRegister(const MachineInstr &MI); 1029 1030 /// This function is used to determine if an instruction can be safely 1031 /// executed under EXEC = 0 without hardware error, indeterminate results, 1032 /// and/or visible effects on future vector execution or outside the shader. 1033 /// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is 1034 /// used in removing branches over short EXEC = 0 sequences. 1035 /// As such it embeds certain assumptions which may not apply to every case 1036 /// of EXEC = 0 execution. 1037 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; 1038 1039 /// Returns true if the instruction could potentially depend on the value of 1040 /// exec. If false, exec dependencies may safely be ignored. 1041 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; 1042 1043 bool isInlineConstant(const APInt &Imm) const; 1044 1045 bool isInlineConstant(const APFloat &Imm) const; 1046 1047 // Returns true if this non-register operand definitely does not need to be 1048 // encoded as a 32-bit literal. Note that this function handles all kinds of 1049 // operands, not just immediates. 1050 // 1051 // Some operands like FrameIndexes could resolve to an inline immediate value 1052 // that will not require an additional 4-bytes; this function assumes that it 1053 // will. 1054 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; 1055 1056 bool isInlineConstant(const MachineOperand &MO, 1057 const MCOperandInfo &OpInfo) const { 1058 return isInlineConstant(MO, OpInfo.OperandType); 1059 } 1060 1061 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would 1062 /// be an inline immediate. 1063 bool isInlineConstant(const MachineInstr &MI, 1064 const MachineOperand &UseMO, 1065 const MachineOperand &DefMO) const { 1066 assert(UseMO.getParent() == &MI); 1067 int OpIdx = UseMO.getOperandNo(); 1068 if (OpIdx >= MI.getDesc().NumOperands) 1069 return false; 1070 1071 return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]); 1072 } 1073 1074 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline 1075 /// immediate. 1076 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const { 1077 const MachineOperand &MO = MI.getOperand(OpIdx); 1078 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 1079 } 1080 1081 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, 1082 const MachineOperand &MO) const { 1083 if (OpIdx >= MI.getDesc().NumOperands) 1084 return false; 1085 1086 if (isCopyInstr(MI)) { 1087 unsigned Size = getOpSize(MI, OpIdx); 1088 assert(Size == 8 || Size == 4); 1089 1090 uint8_t OpType = (Size == 8) ? 1091 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32; 1092 return isInlineConstant(MO, OpType); 1093 } 1094 1095 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 1096 } 1097 1098 bool isInlineConstant(const MachineOperand &MO) const { 1099 return isInlineConstant(*MO.getParent(), MO.getOperandNo()); 1100 } 1101 1102 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 1103 const MachineOperand &MO) const; 1104 1105 /// Return true if this 64-bit VALU instruction has a 32-bit encoding. 1106 /// This function will return false if you pass it a 32-bit instruction. 1107 bool hasVALU32BitEncoding(unsigned Opcode) const; 1108 1109 /// Returns true if this operand uses the constant bus. 1110 bool usesConstantBus(const MachineRegisterInfo &MRI, 1111 const MachineOperand &MO, 1112 const MCOperandInfo &OpInfo) const; 1113 1114 /// Return true if this instruction has any modifiers. 1115 /// e.g. src[012]_mod, omod, clamp. 1116 bool hasModifiers(unsigned Opcode) const; 1117 1118 bool hasModifiersSet(const MachineInstr &MI, 1119 unsigned OpName) const; 1120 bool hasAnyModifiersSet(const MachineInstr &MI) const; 1121 1122 bool canShrink(const MachineInstr &MI, 1123 const MachineRegisterInfo &MRI) const; 1124 1125 MachineInstr *buildShrunkInst(MachineInstr &MI, 1126 unsigned NewOpcode) const; 1127 1128 bool verifyInstruction(const MachineInstr &MI, 1129 StringRef &ErrInfo) const override; 1130 1131 unsigned getVALUOp(const MachineInstr &MI) const; 1132 1133 void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, 1134 MachineBasicBlock::iterator MBBI, 1135 const DebugLoc &DL, Register Reg, bool IsSCCLive, 1136 SlotIndexes *Indexes = nullptr) const; 1137 1138 void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, 1139 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 1140 Register Reg, SlotIndexes *Indexes = nullptr) const; 1141 1142 /// Return the correct register class for \p OpNo. For target-specific 1143 /// instructions, this will return the register class that has been defined 1144 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return 1145 /// the register class of its machine operand. 1146 /// to infer the correct register class base on the other operands. 1147 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, 1148 unsigned OpNo) const; 1149 1150 /// Return the size in bytes of the operand OpNo on the given 1151 // instruction opcode. 1152 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { 1153 const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo]; 1154 1155 if (OpInfo.RegClass == -1) { 1156 // If this is an immediate operand, this must be a 32-bit literal. 1157 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); 1158 return 4; 1159 } 1160 1161 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; 1162 } 1163 1164 /// This form should usually be preferred since it handles operands 1165 /// with unknown register classes. 1166 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { 1167 const MachineOperand &MO = MI.getOperand(OpNo); 1168 if (MO.isReg()) { 1169 if (unsigned SubReg = MO.getSubReg()) { 1170 return RI.getSubRegIdxSize(SubReg) / 8; 1171 } 1172 } 1173 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; 1174 } 1175 1176 /// Legalize the \p OpIndex operand of this instruction by inserting 1177 /// a MOV. For example: 1178 /// ADD_I32_e32 VGPR0, 15 1179 /// to 1180 /// MOV VGPR1, 15 1181 /// ADD_I32_e32 VGPR0, VGPR1 1182 /// 1183 /// If the operand being legalized is a register, then a COPY will be used 1184 /// instead of MOV. 1185 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; 1186 1187 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand 1188 /// for \p MI. 1189 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 1190 const MachineOperand *MO = nullptr) const; 1191 1192 /// Check if \p MO would be a valid operand for the given operand 1193 /// definition \p OpInfo. Note this does not attempt to validate constant bus 1194 /// restrictions (e.g. literal constant usage). 1195 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, 1196 const MCOperandInfo &OpInfo, 1197 const MachineOperand &MO) const; 1198 1199 /// Check if \p MO (a register operand) is a legal register for the 1200 /// given operand description. 1201 bool isLegalRegOperand(const MachineRegisterInfo &MRI, 1202 const MCOperandInfo &OpInfo, 1203 const MachineOperand &MO) const; 1204 1205 /// Legalize operands in \p MI by either commuting it or inserting a 1206 /// copy of src1. 1207 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1208 1209 /// Fix operands in \p MI to satisfy constant bus requirements. 1210 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1211 1212 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only 1213 /// be used when it is know that the value in SrcReg is same across all 1214 /// threads in the wave. 1215 /// \returns The SGPR register that \p SrcReg was copied to. 1216 Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, 1217 MachineRegisterInfo &MRI) const; 1218 1219 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1220 void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1221 1222 void legalizeGenericOperand(MachineBasicBlock &InsertMBB, 1223 MachineBasicBlock::iterator I, 1224 const TargetRegisterClass *DstRC, 1225 MachineOperand &Op, MachineRegisterInfo &MRI, 1226 const DebugLoc &DL) const; 1227 1228 /// Legalize all operands in this instruction. This function may create new 1229 /// instructions and control-flow around \p MI. If present, \p MDT is 1230 /// updated. 1231 /// \returns A new basic block that contains \p MI if new blocks were created. 1232 MachineBasicBlock * 1233 legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; 1234 1235 /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand 1236 /// was moved to VGPR. \returns true if succeeded. 1237 bool moveFlatAddrToVGPR(MachineInstr &Inst) const; 1238 1239 /// Replace the instructions opcode with the equivalent VALU 1240 /// opcode. This function will also move the users of MachineInstruntions 1241 /// in the \p WorkList to the VALU if necessary. If present, \p MDT is 1242 /// updated. 1243 void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const; 1244 1245 void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, 1246 MachineInstr &Inst) const; 1247 1248 void insertNoop(MachineBasicBlock &MBB, 1249 MachineBasicBlock::iterator MI) const override; 1250 1251 void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1252 unsigned Quantity) const override; 1253 1254 void insertReturn(MachineBasicBlock &MBB) const; 1255 1256 /// Build instructions that simulate the behavior of a `s_trap 2` instructions 1257 /// for hardware (namely, gfx11) that runs in PRIV=1 mode. There, s_trap is 1258 /// interpreted as a nop. 1259 MachineBasicBlock *insertSimulatedTrap(MachineRegisterInfo &MRI, 1260 MachineBasicBlock &MBB, 1261 MachineInstr &MI, 1262 const DebugLoc &DL) const; 1263 1264 /// Return the number of wait states that result from executing this 1265 /// instruction. 1266 static unsigned getNumWaitStates(const MachineInstr &MI); 1267 1268 /// Returns the operand named \p Op. If \p MI does not have an 1269 /// operand named \c Op, this function returns nullptr. 1270 LLVM_READONLY 1271 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; 1272 1273 LLVM_READONLY 1274 const MachineOperand *getNamedOperand(const MachineInstr &MI, 1275 unsigned OpName) const { 1276 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName); 1277 } 1278 1279 /// Get required immediate operand 1280 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const { 1281 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); 1282 return MI.getOperand(Idx).getImm(); 1283 } 1284 1285 uint64_t getDefaultRsrcDataFormat() const; 1286 uint64_t getScratchRsrcWords23() const; 1287 1288 bool isLowLatencyInstruction(const MachineInstr &MI) const; 1289 bool isHighLatencyDef(int Opc) const override; 1290 1291 /// Return the descriptor of the target-specific machine instruction 1292 /// that corresponds to the specified pseudo or native opcode. 1293 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { 1294 return get(pseudoToMCOpcode(Opcode)); 1295 } 1296 1297 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1298 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1299 1300 Register isLoadFromStackSlot(const MachineInstr &MI, 1301 int &FrameIndex) const override; 1302 Register isStoreToStackSlot(const MachineInstr &MI, 1303 int &FrameIndex) const override; 1304 1305 unsigned getInstBundleSize(const MachineInstr &MI) const; 1306 unsigned getInstSizeInBytes(const MachineInstr &MI) const override; 1307 1308 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; 1309 1310 bool isNonUniformBranchInstr(MachineInstr &Instr) const; 1311 1312 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, 1313 MachineBasicBlock *IfEnd) const; 1314 1315 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, 1316 MachineBasicBlock *LoopEnd) const; 1317 1318 std::pair<unsigned, unsigned> 1319 decomposeMachineOperandsTargetFlags(unsigned TF) const override; 1320 1321 ArrayRef<std::pair<int, const char *>> 1322 getSerializableTargetIndices() const override; 1323 1324 ArrayRef<std::pair<unsigned, const char *>> 1325 getSerializableDirectMachineOperandTargetFlags() const override; 1326 1327 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 1328 getSerializableMachineMemOperandTargetFlags() const override; 1329 1330 ScheduleHazardRecognizer * 1331 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 1332 const ScheduleDAG *DAG) const override; 1333 1334 ScheduleHazardRecognizer * 1335 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; 1336 1337 ScheduleHazardRecognizer * 1338 CreateTargetMIHazardRecognizer(const InstrItineraryData *II, 1339 const ScheduleDAGMI *DAG) const override; 1340 1341 unsigned getLiveRangeSplitOpcode(Register Reg, 1342 const MachineFunction &MF) const override; 1343 1344 bool isBasicBlockPrologue(const MachineInstr &MI, 1345 Register Reg = Register()) const override; 1346 1347 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, 1348 MachineBasicBlock::iterator InsPt, 1349 const DebugLoc &DL, Register Src, 1350 Register Dst) const override; 1351 1352 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, 1353 MachineBasicBlock::iterator InsPt, 1354 const DebugLoc &DL, Register Src, 1355 unsigned SrcSubReg, 1356 Register Dst) const override; 1357 1358 bool isWave32() const; 1359 1360 /// Return a partially built integer add instruction without carry. 1361 /// Caller must add source operands. 1362 /// For pre-GFX9 it will generate unused carry destination operand. 1363 /// TODO: After GFX9 it should return a no-carry operation. 1364 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1365 MachineBasicBlock::iterator I, 1366 const DebugLoc &DL, 1367 Register DestReg) const; 1368 1369 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1370 MachineBasicBlock::iterator I, 1371 const DebugLoc &DL, 1372 Register DestReg, 1373 RegScavenger &RS) const; 1374 1375 static bool isKillTerminator(unsigned Opcode); 1376 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; 1377 1378 bool isLegalMUBUFImmOffset(unsigned Imm) const; 1379 1380 static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST); 1381 1382 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 1383 Align Alignment = Align(4)) const; 1384 1385 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT 1386 /// encoded instruction. If \p Signed, this is for an instruction that 1387 /// interprets the offset as signed. 1388 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, 1389 uint64_t FlatVariant) const; 1390 1391 /// Split \p COffsetVal into {immediate offset field, remainder offset} 1392 /// values. 1393 std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal, 1394 unsigned AddrSpace, 1395 uint64_t FlatVariant) const; 1396 1397 /// Returns true if negative offsets are allowed for the given \p FlatVariant. 1398 bool allowNegativeFlatOffset(uint64_t FlatVariant) const; 1399 1400 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. 1401 /// Return -1 if the target-specific opcode for the pseudo instruction does 1402 /// not exist. If Opcode is not a pseudo instruction, this is identity. 1403 int pseudoToMCOpcode(int Opcode) const; 1404 1405 /// \brief Check if this instruction should only be used by assembler. 1406 /// Return true if this opcode should not be used by codegen. 1407 bool isAsmOnlyOpcode(int MCOp) const; 1408 1409 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, 1410 const TargetRegisterInfo *TRI, 1411 const MachineFunction &MF) 1412 const override; 1413 1414 void fixImplicitOperands(MachineInstr &MI) const; 1415 1416 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, 1417 ArrayRef<unsigned> Ops, 1418 MachineBasicBlock::iterator InsertPt, 1419 int FrameIndex, 1420 LiveIntervals *LIS = nullptr, 1421 VirtRegMap *VRM = nullptr) const override; 1422 1423 unsigned getInstrLatency(const InstrItineraryData *ItinData, 1424 const MachineInstr &MI, 1425 unsigned *PredCost = nullptr) const override; 1426 1427 InstructionUniformity 1428 getInstructionUniformity(const MachineInstr &MI) const override final; 1429 1430 InstructionUniformity 1431 getGenericInstructionUniformity(const MachineInstr &MI) const; 1432 1433 const MIRFormatter *getMIRFormatter() const override { 1434 if (!Formatter) 1435 Formatter = std::make_unique<AMDGPUMIRFormatter>(); 1436 return Formatter.get(); 1437 } 1438 1439 static unsigned getDSShaderTypeValue(const MachineFunction &MF); 1440 1441 const TargetSchedModel &getSchedModel() const { return SchedModel; } 1442 1443 // Enforce operand's \p OpName even alignment if required by target. 1444 // This is used if an operand is a 32 bit register but needs to be aligned 1445 // regardless. 1446 void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const; 1447 }; 1448 1449 /// \brief Returns true if a reg:subreg pair P has a TRC class 1450 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, 1451 const TargetRegisterClass &TRC, 1452 MachineRegisterInfo &MRI) { 1453 auto *RC = MRI.getRegClass(P.Reg); 1454 if (!P.SubReg) 1455 return RC == &TRC; 1456 auto *TRI = MRI.getTargetRegisterInfo(); 1457 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg); 1458 } 1459 1460 /// \brief Create RegSubRegPair from a register MachineOperand 1461 inline 1462 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) { 1463 assert(O.isReg()); 1464 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg()); 1465 } 1466 1467 /// \brief Return the SubReg component from REG_SEQUENCE 1468 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, 1469 unsigned SubReg); 1470 1471 /// \brief Return the defining instruction for a given reg:subreg pair 1472 /// skipping copy like instructions and subreg-manipulation pseudos. 1473 /// Following another subreg of a reg:subreg isn't supported. 1474 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, 1475 MachineRegisterInfo &MRI); 1476 1477 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1478 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not 1479 /// attempt to track between blocks. 1480 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, 1481 Register VReg, 1482 const MachineInstr &DefMI, 1483 const MachineInstr &UseMI); 1484 1485 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1486 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to 1487 /// track between blocks. 1488 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, 1489 Register VReg, 1490 const MachineInstr &DefMI); 1491 1492 namespace AMDGPU { 1493 1494 LLVM_READONLY 1495 int getVOPe64(uint16_t Opcode); 1496 1497 LLVM_READONLY 1498 int getVOPe32(uint16_t Opcode); 1499 1500 LLVM_READONLY 1501 int getSDWAOp(uint16_t Opcode); 1502 1503 LLVM_READONLY 1504 int getDPPOp32(uint16_t Opcode); 1505 1506 LLVM_READONLY 1507 int getDPPOp64(uint16_t Opcode); 1508 1509 LLVM_READONLY 1510 int getBasicFromSDWAOp(uint16_t Opcode); 1511 1512 LLVM_READONLY 1513 int getCommuteRev(uint16_t Opcode); 1514 1515 LLVM_READONLY 1516 int getCommuteOrig(uint16_t Opcode); 1517 1518 LLVM_READONLY 1519 int getAddr64Inst(uint16_t Opcode); 1520 1521 /// Check if \p Opcode is an Addr64 opcode. 1522 /// 1523 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1. 1524 LLVM_READONLY 1525 int getIfAddr64Inst(uint16_t Opcode); 1526 1527 LLVM_READONLY 1528 int getSOPKOp(uint16_t Opcode); 1529 1530 /// \returns SADDR form of a FLAT Global instruction given an \p Opcode 1531 /// of a VADDR form. 1532 LLVM_READONLY 1533 int getGlobalSaddrOp(uint16_t Opcode); 1534 1535 /// \returns VADDR form of a FLAT Global instruction given an \p Opcode 1536 /// of a SADDR form. 1537 LLVM_READONLY 1538 int getGlobalVaddrOp(uint16_t Opcode); 1539 1540 LLVM_READONLY 1541 int getVCMPXNoSDstOp(uint16_t Opcode); 1542 1543 /// \returns ST form with only immediate offset of a FLAT Scratch instruction 1544 /// given an \p Opcode of an SS (SADDR) form. 1545 LLVM_READONLY 1546 int getFlatScratchInstSTfromSS(uint16_t Opcode); 1547 1548 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1549 /// of an SVS (SADDR + VADDR) form. 1550 LLVM_READONLY 1551 int getFlatScratchInstSVfromSVS(uint16_t Opcode); 1552 1553 /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode 1554 /// of an SV (VADDR) form. 1555 LLVM_READONLY 1556 int getFlatScratchInstSSfromSV(uint16_t Opcode); 1557 1558 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1559 /// of an SS (SADDR) form. 1560 LLVM_READONLY 1561 int getFlatScratchInstSVfromSS(uint16_t Opcode); 1562 1563 /// \returns earlyclobber version of a MAC MFMA is exists. 1564 LLVM_READONLY 1565 int getMFMAEarlyClobberOp(uint16_t Opcode); 1566 1567 /// \returns v_cmpx version of a v_cmp instruction. 1568 LLVM_READONLY 1569 int getVCMPXOpFromVCMP(uint16_t Opcode); 1570 1571 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; 1572 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); 1573 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); 1574 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); 1575 1576 } // end namespace AMDGPU 1577 1578 namespace AMDGPU { 1579 enum AsmComments { 1580 // For sgpr to vgpr spill instructions 1581 SGPR_SPILL = MachineInstr::TAsmComments 1582 }; 1583 } // namespace AMDGPU 1584 1585 namespace SI { 1586 namespace KernelInputOffsets { 1587 1588 /// Offsets in bytes from the start of the input buffer 1589 enum Offsets { 1590 NGROUPS_X = 0, 1591 NGROUPS_Y = 4, 1592 NGROUPS_Z = 8, 1593 GLOBAL_SIZE_X = 12, 1594 GLOBAL_SIZE_Y = 16, 1595 GLOBAL_SIZE_Z = 20, 1596 LOCAL_SIZE_X = 24, 1597 LOCAL_SIZE_Y = 28, 1598 LOCAL_SIZE_Z = 32 1599 }; 1600 1601 } // end namespace KernelInputOffsets 1602 } // end namespace SI 1603 1604 } // end namespace llvm 1605 1606 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 1607