1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition for SIInstrInfo. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 16 17 #include "AMDGPUMIRFormatter.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIRegisterInfo.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/CodeGen/TargetInstrInfo.h" 23 #include "llvm/CodeGen/TargetSchedule.h" 24 25 #define GET_INSTRINFO_HEADER 26 #include "AMDGPUGenInstrInfo.inc" 27 28 namespace llvm { 29 30 class APInt; 31 class GCNSubtarget; 32 class LiveVariables; 33 class MachineDominatorTree; 34 class MachineRegisterInfo; 35 class RegScavenger; 36 class TargetRegisterClass; 37 class ScheduleHazardRecognizer; 38 39 /// Mark the MMO of a uniform load if there are no potentially clobbering stores 40 /// on any path from the start of an entry function to this load. 41 static const MachineMemOperand::Flags MONoClobber = 42 MachineMemOperand::MOTargetFlag1; 43 44 /// Mark the MMO of a load as the last use. 45 static const MachineMemOperand::Flags MOLastUse = 46 MachineMemOperand::MOTargetFlag2; 47 48 /// Utility to store machine instructions worklist. 49 struct SIInstrWorklist { 50 SIInstrWorklist() = default; 51 52 void insert(MachineInstr *MI); 53 54 MachineInstr *top() const { 55 auto iter = InstrList.begin(); 56 return *iter; 57 } 58 59 void erase_top() { 60 auto iter = InstrList.begin(); 61 InstrList.erase(iter); 62 } 63 64 bool empty() const { return InstrList.empty(); } 65 66 void clear() { 67 InstrList.clear(); 68 DeferredList.clear(); 69 } 70 71 bool isDeferred(MachineInstr *MI); 72 73 SetVector<MachineInstr *> &getDeferredList() { return DeferredList; } 74 75 private: 76 /// InstrList contains the MachineInstrs. 77 SetVector<MachineInstr *> InstrList; 78 /// Deferred instructions are specific MachineInstr 79 /// that will be added by insert method. 80 SetVector<MachineInstr *> DeferredList; 81 }; 82 83 class SIInstrInfo final : public AMDGPUGenInstrInfo { 84 private: 85 const SIRegisterInfo RI; 86 const GCNSubtarget &ST; 87 TargetSchedModel SchedModel; 88 mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter; 89 90 // The inverse predicate should have the negative value. 91 enum BranchPredicate { 92 INVALID_BR = 0, 93 SCC_TRUE = 1, 94 SCC_FALSE = -1, 95 VCCNZ = 2, 96 VCCZ = -2, 97 EXECNZ = -3, 98 EXECZ = 3 99 }; 100 101 using SetVectorType = SmallSetVector<MachineInstr *, 32>; 102 103 static unsigned getBranchOpcode(BranchPredicate Cond); 104 static BranchPredicate getBranchPredicate(unsigned Opcode); 105 106 public: 107 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, 108 MachineRegisterInfo &MRI, 109 const MachineOperand &SuperReg, 110 const TargetRegisterClass *SuperRC, 111 unsigned SubIdx, 112 const TargetRegisterClass *SubRC) const; 113 MachineOperand buildExtractSubRegOrImm( 114 MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, 115 const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, 116 unsigned SubIdx, const TargetRegisterClass *SubRC) const; 117 118 private: 119 void swapOperands(MachineInstr &Inst) const; 120 121 std::pair<bool, MachineBasicBlock *> 122 moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst, 123 MachineDominatorTree *MDT = nullptr) const; 124 125 void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, 126 MachineDominatorTree *MDT = nullptr) const; 127 128 void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 129 130 void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 131 132 void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst, 133 unsigned Opcode) const; 134 135 void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst, 136 unsigned Opcode) const; 137 138 void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 139 unsigned Opcode, bool Swap = false) const; 140 141 void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 142 unsigned Opcode, 143 MachineDominatorTree *MDT = nullptr) const; 144 145 void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst, 146 MachineDominatorTree *MDT) const; 147 148 void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst, 149 MachineDominatorTree *MDT) const; 150 151 void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst, 152 MachineDominatorTree *MDT = nullptr) const; 153 154 void splitScalar64BitBCNT(SIInstrWorklist &Worklist, 155 MachineInstr &Inst) const; 156 void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 157 void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 158 unsigned Opcode, 159 MachineDominatorTree *MDT = nullptr) const; 160 void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI, 161 MachineInstr &Inst) const; 162 163 void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI, 164 SIInstrWorklist &Worklist) const; 165 166 void addSCCDefUsersToVALUWorklist(MachineOperand &Op, 167 MachineInstr &SCCDefInst, 168 SIInstrWorklist &Worklist, 169 Register NewCond = Register()) const; 170 void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, 171 SIInstrWorklist &Worklist) const; 172 173 const TargetRegisterClass * 174 getDestEquivalentVGPRClass(const MachineInstr &Inst) const; 175 176 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, 177 const MachineInstr &MIb) const; 178 179 Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; 180 181 protected: 182 /// If the specific machine instruction is a instruction that moves/copies 183 /// value from one register to another register return destination and source 184 /// registers as machine operands. 185 std::optional<DestSourcePair> 186 isCopyInstrImpl(const MachineInstr &MI) const override; 187 188 bool swapSourceModifiers(MachineInstr &MI, 189 MachineOperand &Src0, unsigned Src0OpName, 190 MachineOperand &Src1, unsigned Src1OpName) const; 191 192 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, 193 unsigned OpIdx0, 194 unsigned OpIdx1) const override; 195 196 public: 197 enum TargetOperandFlags { 198 MO_MASK = 0xf, 199 200 MO_NONE = 0, 201 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. 202 MO_GOTPCREL = 1, 203 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. 204 MO_GOTPCREL32 = 2, 205 MO_GOTPCREL32_LO = 2, 206 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. 207 MO_GOTPCREL32_HI = 3, 208 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. 209 MO_REL32 = 4, 210 MO_REL32_LO = 4, 211 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. 212 MO_REL32_HI = 5, 213 214 MO_FAR_BRANCH_OFFSET = 6, 215 216 MO_ABS32_LO = 8, 217 MO_ABS32_HI = 9, 218 }; 219 220 explicit SIInstrInfo(const GCNSubtarget &ST); 221 222 const SIRegisterInfo &getRegisterInfo() const { 223 return RI; 224 } 225 226 const GCNSubtarget &getSubtarget() const { 227 return ST; 228 } 229 230 bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; 231 232 bool isIgnorableUse(const MachineOperand &MO) const override; 233 234 bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, 235 MachineCycleInfo *CI) const override; 236 237 bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, 238 int64_t &Offset1) const override; 239 240 bool getMemOperandsWithOffsetWidth( 241 const MachineInstr &LdSt, 242 SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset, 243 bool &OffsetIsScalable, unsigned &Width, 244 const TargetRegisterInfo *TRI) const final; 245 246 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, 247 int64_t Offset1, bool OffsetIsScalable1, 248 ArrayRef<const MachineOperand *> BaseOps2, 249 int64_t Offset2, bool OffsetIsScalable2, 250 unsigned ClusterSize, 251 unsigned NumBytes) const override; 252 253 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, 254 int64_t Offset1, unsigned NumLoads) const override; 255 256 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 257 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, 258 bool KillSrc) const override; 259 260 void materializeImmediate(MachineBasicBlock &MBB, 261 MachineBasicBlock::iterator MI, const DebugLoc &DL, 262 Register DestReg, int64_t Value) const; 263 264 const TargetRegisterClass *getPreferredSelectRegClass( 265 unsigned Size) const; 266 267 Register insertNE(MachineBasicBlock *MBB, 268 MachineBasicBlock::iterator I, const DebugLoc &DL, 269 Register SrcReg, int Value) const; 270 271 Register insertEQ(MachineBasicBlock *MBB, 272 MachineBasicBlock::iterator I, const DebugLoc &DL, 273 Register SrcReg, int Value) const; 274 275 void storeRegToStackSlot(MachineBasicBlock &MBB, 276 MachineBasicBlock::iterator MI, Register SrcReg, 277 bool isKill, int FrameIndex, 278 const TargetRegisterClass *RC, 279 const TargetRegisterInfo *TRI, 280 Register VReg) const override; 281 282 void loadRegFromStackSlot(MachineBasicBlock &MBB, 283 MachineBasicBlock::iterator MI, Register DestReg, 284 int FrameIndex, const TargetRegisterClass *RC, 285 const TargetRegisterInfo *TRI, 286 Register VReg) const override; 287 288 bool expandPostRAPseudo(MachineInstr &MI) const override; 289 290 void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 291 Register DestReg, unsigned SubIdx, 292 const MachineInstr &Orig, 293 const TargetRegisterInfo &TRI) const override; 294 295 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp 296 // instructions. Returns a pair of generated instructions. 297 // Can split either post-RA with physical registers or pre-RA with 298 // virtual registers. In latter case IR needs to be in SSA form and 299 // and a REG_SEQUENCE is produced to define original register. 300 std::pair<MachineInstr*, MachineInstr*> 301 expandMovDPP64(MachineInstr &MI) const; 302 303 // Returns an opcode that can be used to move a value to a \p DstRC 304 // register. If there is no hardware instruction that can store to \p 305 // DstRC, then AMDGPU::COPY is returned. 306 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; 307 308 const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize, 309 unsigned EltSize, 310 bool IsSGPR) const; 311 312 const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize, 313 bool IsIndirectSrc) const; 314 LLVM_READONLY 315 int commuteOpcode(unsigned Opc) const; 316 317 LLVM_READONLY 318 inline int commuteOpcode(const MachineInstr &MI) const { 319 return commuteOpcode(MI.getOpcode()); 320 } 321 322 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, 323 unsigned &SrcOpIdx1) const override; 324 325 bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0, 326 unsigned &SrcOpIdx1) const; 327 328 bool isBranchOffsetInRange(unsigned BranchOpc, 329 int64_t BrOffset) const override; 330 331 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; 332 333 /// Return whether the block terminate with divergent branch. 334 /// Note this only work before lowering the pseudo control flow instructions. 335 bool hasDivergentBranch(const MachineBasicBlock *MBB) const; 336 337 void insertIndirectBranch(MachineBasicBlock &MBB, 338 MachineBasicBlock &NewDestBB, 339 MachineBasicBlock &RestoreBB, const DebugLoc &DL, 340 int64_t BrOffset, RegScavenger *RS) const override; 341 342 bool analyzeBranchImpl(MachineBasicBlock &MBB, 343 MachineBasicBlock::iterator I, 344 MachineBasicBlock *&TBB, 345 MachineBasicBlock *&FBB, 346 SmallVectorImpl<MachineOperand> &Cond, 347 bool AllowModify) const; 348 349 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 350 MachineBasicBlock *&FBB, 351 SmallVectorImpl<MachineOperand> &Cond, 352 bool AllowModify = false) const override; 353 354 unsigned removeBranch(MachineBasicBlock &MBB, 355 int *BytesRemoved = nullptr) const override; 356 357 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 358 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, 359 const DebugLoc &DL, 360 int *BytesAdded = nullptr) const override; 361 362 bool reverseBranchCondition( 363 SmallVectorImpl<MachineOperand> &Cond) const override; 364 365 bool canInsertSelect(const MachineBasicBlock &MBB, 366 ArrayRef<MachineOperand> Cond, Register DstReg, 367 Register TrueReg, Register FalseReg, int &CondCycles, 368 int &TrueCycles, int &FalseCycles) const override; 369 370 void insertSelect(MachineBasicBlock &MBB, 371 MachineBasicBlock::iterator I, const DebugLoc &DL, 372 Register DstReg, ArrayRef<MachineOperand> Cond, 373 Register TrueReg, Register FalseReg) const override; 374 375 void insertVectorSelect(MachineBasicBlock &MBB, 376 MachineBasicBlock::iterator I, const DebugLoc &DL, 377 Register DstReg, ArrayRef<MachineOperand> Cond, 378 Register TrueReg, Register FalseReg) const; 379 380 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, 381 Register &SrcReg2, int64_t &CmpMask, 382 int64_t &CmpValue) const override; 383 384 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, 385 Register SrcReg2, int64_t CmpMask, int64_t CmpValue, 386 const MachineRegisterInfo *MRI) const override; 387 388 bool 389 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 390 const MachineInstr &MIb) const override; 391 392 static bool isFoldableCopy(const MachineInstr &MI); 393 394 void removeModOperands(MachineInstr &MI) const; 395 396 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, 397 MachineRegisterInfo *MRI) const final; 398 399 unsigned getMachineCSELookAheadLimit() const override { return 500; } 400 401 MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, 402 LiveIntervals *LIS) const override; 403 404 bool isSchedulingBoundary(const MachineInstr &MI, 405 const MachineBasicBlock *MBB, 406 const MachineFunction &MF) const override; 407 408 static bool isSALU(const MachineInstr &MI) { 409 return MI.getDesc().TSFlags & SIInstrFlags::SALU; 410 } 411 412 bool isSALU(uint16_t Opcode) const { 413 return get(Opcode).TSFlags & SIInstrFlags::SALU; 414 } 415 416 static bool isVALU(const MachineInstr &MI) { 417 return MI.getDesc().TSFlags & SIInstrFlags::VALU; 418 } 419 420 bool isVALU(uint16_t Opcode) const { 421 return get(Opcode).TSFlags & SIInstrFlags::VALU; 422 } 423 424 static bool isImage(const MachineInstr &MI) { 425 return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI); 426 } 427 428 bool isImage(uint16_t Opcode) const { 429 return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode); 430 } 431 432 static bool isVMEM(const MachineInstr &MI) { 433 return isMUBUF(MI) || isMTBUF(MI) || isImage(MI); 434 } 435 436 bool isVMEM(uint16_t Opcode) const { 437 return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode); 438 } 439 440 static bool isSOP1(const MachineInstr &MI) { 441 return MI.getDesc().TSFlags & SIInstrFlags::SOP1; 442 } 443 444 bool isSOP1(uint16_t Opcode) const { 445 return get(Opcode).TSFlags & SIInstrFlags::SOP1; 446 } 447 448 static bool isSOP2(const MachineInstr &MI) { 449 return MI.getDesc().TSFlags & SIInstrFlags::SOP2; 450 } 451 452 bool isSOP2(uint16_t Opcode) const { 453 return get(Opcode).TSFlags & SIInstrFlags::SOP2; 454 } 455 456 static bool isSOPC(const MachineInstr &MI) { 457 return MI.getDesc().TSFlags & SIInstrFlags::SOPC; 458 } 459 460 bool isSOPC(uint16_t Opcode) const { 461 return get(Opcode).TSFlags & SIInstrFlags::SOPC; 462 } 463 464 static bool isSOPK(const MachineInstr &MI) { 465 return MI.getDesc().TSFlags & SIInstrFlags::SOPK; 466 } 467 468 bool isSOPK(uint16_t Opcode) const { 469 return get(Opcode).TSFlags & SIInstrFlags::SOPK; 470 } 471 472 static bool isSOPP(const MachineInstr &MI) { 473 return MI.getDesc().TSFlags & SIInstrFlags::SOPP; 474 } 475 476 bool isSOPP(uint16_t Opcode) const { 477 return get(Opcode).TSFlags & SIInstrFlags::SOPP; 478 } 479 480 static bool isPacked(const MachineInstr &MI) { 481 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked; 482 } 483 484 bool isPacked(uint16_t Opcode) const { 485 return get(Opcode).TSFlags & SIInstrFlags::IsPacked; 486 } 487 488 static bool isVOP1(const MachineInstr &MI) { 489 return MI.getDesc().TSFlags & SIInstrFlags::VOP1; 490 } 491 492 bool isVOP1(uint16_t Opcode) const { 493 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 494 } 495 496 static bool isVOP2(const MachineInstr &MI) { 497 return MI.getDesc().TSFlags & SIInstrFlags::VOP2; 498 } 499 500 bool isVOP2(uint16_t Opcode) const { 501 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 502 } 503 504 static bool isVOP3(const MachineInstr &MI) { 505 return MI.getDesc().TSFlags & SIInstrFlags::VOP3; 506 } 507 508 bool isVOP3(uint16_t Opcode) const { 509 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 510 } 511 512 static bool isSDWA(const MachineInstr &MI) { 513 return MI.getDesc().TSFlags & SIInstrFlags::SDWA; 514 } 515 516 bool isSDWA(uint16_t Opcode) const { 517 return get(Opcode).TSFlags & SIInstrFlags::SDWA; 518 } 519 520 static bool isVOPC(const MachineInstr &MI) { 521 return MI.getDesc().TSFlags & SIInstrFlags::VOPC; 522 } 523 524 bool isVOPC(uint16_t Opcode) const { 525 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 526 } 527 528 static bool isMUBUF(const MachineInstr &MI) { 529 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; 530 } 531 532 bool isMUBUF(uint16_t Opcode) const { 533 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; 534 } 535 536 static bool isMTBUF(const MachineInstr &MI) { 537 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; 538 } 539 540 bool isMTBUF(uint16_t Opcode) const { 541 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; 542 } 543 544 static bool isSMRD(const MachineInstr &MI) { 545 return MI.getDesc().TSFlags & SIInstrFlags::SMRD; 546 } 547 548 bool isSMRD(uint16_t Opcode) const { 549 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 550 } 551 552 bool isBufferSMRD(const MachineInstr &MI) const; 553 554 static bool isDS(const MachineInstr &MI) { 555 return MI.getDesc().TSFlags & SIInstrFlags::DS; 556 } 557 558 bool isDS(uint16_t Opcode) const { 559 return get(Opcode).TSFlags & SIInstrFlags::DS; 560 } 561 562 static bool isLDSDMA(const MachineInstr &MI) { 563 return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI)); 564 } 565 566 bool isLDSDMA(uint16_t Opcode) { 567 return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode)); 568 } 569 570 static bool isGWS(const MachineInstr &MI) { 571 return MI.getDesc().TSFlags & SIInstrFlags::GWS; 572 } 573 574 bool isGWS(uint16_t Opcode) const { 575 return get(Opcode).TSFlags & SIInstrFlags::GWS; 576 } 577 578 bool isAlwaysGDS(uint16_t Opcode) const; 579 580 static bool isMIMG(const MachineInstr &MI) { 581 return MI.getDesc().TSFlags & SIInstrFlags::MIMG; 582 } 583 584 bool isMIMG(uint16_t Opcode) const { 585 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 586 } 587 588 static bool isVIMAGE(const MachineInstr &MI) { 589 return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE; 590 } 591 592 bool isVIMAGE(uint16_t Opcode) const { 593 return get(Opcode).TSFlags & SIInstrFlags::VIMAGE; 594 } 595 596 static bool isVSAMPLE(const MachineInstr &MI) { 597 return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE; 598 } 599 600 bool isVSAMPLE(uint16_t Opcode) const { 601 return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE; 602 } 603 604 static bool isGather4(const MachineInstr &MI) { 605 return MI.getDesc().TSFlags & SIInstrFlags::Gather4; 606 } 607 608 bool isGather4(uint16_t Opcode) const { 609 return get(Opcode).TSFlags & SIInstrFlags::Gather4; 610 } 611 612 static bool isFLAT(const MachineInstr &MI) { 613 return MI.getDesc().TSFlags & SIInstrFlags::FLAT; 614 } 615 616 // Is a FLAT encoded instruction which accesses a specific segment, 617 // i.e. global_* or scratch_*. 618 static bool isSegmentSpecificFLAT(const MachineInstr &MI) { 619 auto Flags = MI.getDesc().TSFlags; 620 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 621 } 622 623 bool isSegmentSpecificFLAT(uint16_t Opcode) const { 624 auto Flags = get(Opcode).TSFlags; 625 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 626 } 627 628 static bool isFLATGlobal(const MachineInstr &MI) { 629 return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal; 630 } 631 632 bool isFLATGlobal(uint16_t Opcode) const { 633 return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal; 634 } 635 636 static bool isFLATScratch(const MachineInstr &MI) { 637 return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch; 638 } 639 640 bool isFLATScratch(uint16_t Opcode) const { 641 return get(Opcode).TSFlags & SIInstrFlags::FlatScratch; 642 } 643 644 // Any FLAT encoded instruction, including global_* and scratch_*. 645 bool isFLAT(uint16_t Opcode) const { 646 return get(Opcode).TSFlags & SIInstrFlags::FLAT; 647 } 648 649 static bool isEXP(const MachineInstr &MI) { 650 return MI.getDesc().TSFlags & SIInstrFlags::EXP; 651 } 652 653 static bool isDualSourceBlendEXP(const MachineInstr &MI) { 654 if (!isEXP(MI)) 655 return false; 656 unsigned Target = MI.getOperand(0).getImm(); 657 return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 || 658 Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1; 659 } 660 661 bool isEXP(uint16_t Opcode) const { 662 return get(Opcode).TSFlags & SIInstrFlags::EXP; 663 } 664 665 static bool isAtomicNoRet(const MachineInstr &MI) { 666 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet; 667 } 668 669 bool isAtomicNoRet(uint16_t Opcode) const { 670 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet; 671 } 672 673 static bool isAtomicRet(const MachineInstr &MI) { 674 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet; 675 } 676 677 bool isAtomicRet(uint16_t Opcode) const { 678 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet; 679 } 680 681 static bool isAtomic(const MachineInstr &MI) { 682 return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet | 683 SIInstrFlags::IsAtomicNoRet); 684 } 685 686 bool isAtomic(uint16_t Opcode) const { 687 return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet | 688 SIInstrFlags::IsAtomicNoRet); 689 } 690 691 static bool mayWriteLDSThroughDMA(const MachineInstr &MI) { 692 return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD; 693 } 694 695 static bool isWQM(const MachineInstr &MI) { 696 return MI.getDesc().TSFlags & SIInstrFlags::WQM; 697 } 698 699 bool isWQM(uint16_t Opcode) const { 700 return get(Opcode).TSFlags & SIInstrFlags::WQM; 701 } 702 703 static bool isDisableWQM(const MachineInstr &MI) { 704 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; 705 } 706 707 bool isDisableWQM(uint16_t Opcode) const { 708 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; 709 } 710 711 static bool isVGPRSpill(const MachineInstr &MI) { 712 return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill; 713 } 714 715 bool isVGPRSpill(uint16_t Opcode) const { 716 return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; 717 } 718 719 static bool isSGPRSpill(const MachineInstr &MI) { 720 return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill; 721 } 722 723 bool isSGPRSpill(uint16_t Opcode) const { 724 return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill; 725 } 726 727 bool isSpillOpcode(uint16_t Opcode) const { 728 return get(Opcode).TSFlags & 729 (SIInstrFlags::SGPRSpill | SIInstrFlags::VGPRSpill); 730 } 731 732 static bool isWWMRegSpillOpcode(uint16_t Opcode) { 733 return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE || 734 Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE || 735 Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE || 736 Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE; 737 } 738 739 static bool isChainCallOpcode(uint64_t Opcode) { 740 return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 || 741 Opcode == AMDGPU::SI_CS_CHAIN_TC_W64; 742 } 743 744 static bool isDPP(const MachineInstr &MI) { 745 return MI.getDesc().TSFlags & SIInstrFlags::DPP; 746 } 747 748 bool isDPP(uint16_t Opcode) const { 749 return get(Opcode).TSFlags & SIInstrFlags::DPP; 750 } 751 752 static bool isTRANS(const MachineInstr &MI) { 753 return MI.getDesc().TSFlags & SIInstrFlags::TRANS; 754 } 755 756 bool isTRANS(uint16_t Opcode) const { 757 return get(Opcode).TSFlags & SIInstrFlags::TRANS; 758 } 759 760 static bool isVOP3P(const MachineInstr &MI) { 761 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; 762 } 763 764 bool isVOP3P(uint16_t Opcode) const { 765 return get(Opcode).TSFlags & SIInstrFlags::VOP3P; 766 } 767 768 static bool isVINTRP(const MachineInstr &MI) { 769 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; 770 } 771 772 bool isVINTRP(uint16_t Opcode) const { 773 return get(Opcode).TSFlags & SIInstrFlags::VINTRP; 774 } 775 776 static bool isMAI(const MachineInstr &MI) { 777 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI; 778 } 779 780 bool isMAI(uint16_t Opcode) const { 781 return get(Opcode).TSFlags & SIInstrFlags::IsMAI; 782 } 783 784 static bool isMFMA(const MachineInstr &MI) { 785 return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 786 MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; 787 } 788 789 static bool isDOT(const MachineInstr &MI) { 790 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; 791 } 792 793 static bool isWMMA(const MachineInstr &MI) { 794 return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA; 795 } 796 797 bool isWMMA(uint16_t Opcode) const { 798 return get(Opcode).TSFlags & SIInstrFlags::IsWMMA; 799 } 800 801 static bool isMFMAorWMMA(const MachineInstr &MI) { 802 return isMFMA(MI) || isWMMA(MI); 803 } 804 805 static bool isSWMMAC(const MachineInstr &MI) { 806 return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC; 807 } 808 809 bool isSWMMAC(uint16_t Opcode) const { 810 return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC; 811 } 812 813 bool isDOT(uint16_t Opcode) const { 814 return get(Opcode).TSFlags & SIInstrFlags::IsDOT; 815 } 816 817 static bool isLDSDIR(const MachineInstr &MI) { 818 return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR; 819 } 820 821 bool isLDSDIR(uint16_t Opcode) const { 822 return get(Opcode).TSFlags & SIInstrFlags::LDSDIR; 823 } 824 825 static bool isVINTERP(const MachineInstr &MI) { 826 return MI.getDesc().TSFlags & SIInstrFlags::VINTERP; 827 } 828 829 bool isVINTERP(uint16_t Opcode) const { 830 return get(Opcode).TSFlags & SIInstrFlags::VINTERP; 831 } 832 833 static bool isScalarUnit(const MachineInstr &MI) { 834 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); 835 } 836 837 static bool usesVM_CNT(const MachineInstr &MI) { 838 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; 839 } 840 841 static bool usesLGKM_CNT(const MachineInstr &MI) { 842 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; 843 } 844 845 static bool sopkIsZext(const MachineInstr &MI) { 846 return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT; 847 } 848 849 bool sopkIsZext(uint16_t Opcode) const { 850 return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT; 851 } 852 853 /// \returns true if this is an s_store_dword* instruction. This is more 854 /// specific than isSMEM && mayStore. 855 static bool isScalarStore(const MachineInstr &MI) { 856 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; 857 } 858 859 bool isScalarStore(uint16_t Opcode) const { 860 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; 861 } 862 863 static bool isFixedSize(const MachineInstr &MI) { 864 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; 865 } 866 867 bool isFixedSize(uint16_t Opcode) const { 868 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; 869 } 870 871 static bool hasFPClamp(const MachineInstr &MI) { 872 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; 873 } 874 875 bool hasFPClamp(uint16_t Opcode) const { 876 return get(Opcode).TSFlags & SIInstrFlags::FPClamp; 877 } 878 879 static bool hasIntClamp(const MachineInstr &MI) { 880 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp; 881 } 882 883 uint64_t getClampMask(const MachineInstr &MI) const { 884 const uint64_t ClampFlags = SIInstrFlags::FPClamp | 885 SIInstrFlags::IntClamp | 886 SIInstrFlags::ClampLo | 887 SIInstrFlags::ClampHi; 888 return MI.getDesc().TSFlags & ClampFlags; 889 } 890 891 static bool usesFPDPRounding(const MachineInstr &MI) { 892 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding; 893 } 894 895 bool usesFPDPRounding(uint16_t Opcode) const { 896 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; 897 } 898 899 static bool isFPAtomic(const MachineInstr &MI) { 900 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; 901 } 902 903 bool isFPAtomic(uint16_t Opcode) const { 904 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; 905 } 906 907 static bool isNeverUniform(const MachineInstr &MI) { 908 return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform; 909 } 910 911 // Check to see if opcode is for a barrier start. Pre gfx12 this is just the 912 // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want 913 // to check for the barrier start (S_BARRIER_SIGNAL*) 914 bool isBarrierStart(unsigned Opcode) const { 915 return Opcode == AMDGPU::S_BARRIER || 916 Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 || 917 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 || 918 Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM || 919 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM; 920 } 921 922 static bool doesNotReadTiedSource(const MachineInstr &MI) { 923 return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; 924 } 925 926 bool doesNotReadTiedSource(uint16_t Opcode) const { 927 return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead; 928 } 929 930 static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) { 931 switch (Opcode) { 932 case AMDGPU::S_WAITCNT_soft: 933 return AMDGPU::S_WAITCNT; 934 case AMDGPU::S_WAITCNT_VSCNT_soft: 935 return AMDGPU::S_WAITCNT_VSCNT; 936 case AMDGPU::S_WAIT_LOADCNT_soft: 937 return AMDGPU::S_WAIT_LOADCNT; 938 case AMDGPU::S_WAIT_STORECNT_soft: 939 return AMDGPU::S_WAIT_STORECNT; 940 case AMDGPU::S_WAIT_SAMPLECNT_soft: 941 return AMDGPU::S_WAIT_SAMPLECNT; 942 case AMDGPU::S_WAIT_BVHCNT_soft: 943 return AMDGPU::S_WAIT_BVHCNT; 944 case AMDGPU::S_WAIT_DSCNT_soft: 945 return AMDGPU::S_WAIT_DSCNT; 946 default: 947 return Opcode; 948 } 949 } 950 951 bool isVGPRCopy(const MachineInstr &MI) const { 952 assert(isCopyInstr(MI)); 953 Register Dest = MI.getOperand(0).getReg(); 954 const MachineFunction &MF = *MI.getParent()->getParent(); 955 const MachineRegisterInfo &MRI = MF.getRegInfo(); 956 return !RI.isSGPRReg(MRI, Dest); 957 } 958 959 bool hasVGPRUses(const MachineInstr &MI) const { 960 const MachineFunction &MF = *MI.getParent()->getParent(); 961 const MachineRegisterInfo &MRI = MF.getRegInfo(); 962 return llvm::any_of(MI.explicit_uses(), 963 [&MRI, this](const MachineOperand &MO) { 964 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());}); 965 } 966 967 /// Return true if the instruction modifies the mode register.q 968 static bool modifiesModeRegister(const MachineInstr &MI); 969 970 /// Whether we must prevent this instruction from executing with EXEC = 0. 971 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; 972 973 /// Returns true if the instruction could potentially depend on the value of 974 /// exec. If false, exec dependencies may safely be ignored. 975 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; 976 977 bool isInlineConstant(const APInt &Imm) const; 978 979 bool isInlineConstant(const APFloat &Imm) const { 980 return isInlineConstant(Imm.bitcastToAPInt()); 981 } 982 983 // Returns true if this non-register operand definitely does not need to be 984 // encoded as a 32-bit literal. Note that this function handles all kinds of 985 // operands, not just immediates. 986 // 987 // Some operands like FrameIndexes could resolve to an inline immediate value 988 // that will not require an additional 4-bytes; this function assumes that it 989 // will. 990 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; 991 992 bool isInlineConstant(const MachineOperand &MO, 993 const MCOperandInfo &OpInfo) const { 994 return isInlineConstant(MO, OpInfo.OperandType); 995 } 996 997 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would 998 /// be an inline immediate. 999 bool isInlineConstant(const MachineInstr &MI, 1000 const MachineOperand &UseMO, 1001 const MachineOperand &DefMO) const { 1002 assert(UseMO.getParent() == &MI); 1003 int OpIdx = UseMO.getOperandNo(); 1004 if (OpIdx >= MI.getDesc().NumOperands) 1005 return false; 1006 1007 return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]); 1008 } 1009 1010 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline 1011 /// immediate. 1012 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const { 1013 const MachineOperand &MO = MI.getOperand(OpIdx); 1014 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 1015 } 1016 1017 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, 1018 const MachineOperand &MO) const { 1019 if (OpIdx >= MI.getDesc().NumOperands) 1020 return false; 1021 1022 if (isCopyInstr(MI)) { 1023 unsigned Size = getOpSize(MI, OpIdx); 1024 assert(Size == 8 || Size == 4); 1025 1026 uint8_t OpType = (Size == 8) ? 1027 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32; 1028 return isInlineConstant(MO, OpType); 1029 } 1030 1031 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 1032 } 1033 1034 bool isInlineConstant(const MachineOperand &MO) const { 1035 return isInlineConstant(*MO.getParent(), MO.getOperandNo()); 1036 } 1037 1038 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 1039 const MachineOperand &MO) const; 1040 1041 /// Return true if this 64-bit VALU instruction has a 32-bit encoding. 1042 /// This function will return false if you pass it a 32-bit instruction. 1043 bool hasVALU32BitEncoding(unsigned Opcode) const; 1044 1045 /// Returns true if this operand uses the constant bus. 1046 bool usesConstantBus(const MachineRegisterInfo &MRI, 1047 const MachineOperand &MO, 1048 const MCOperandInfo &OpInfo) const; 1049 1050 /// Return true if this instruction has any modifiers. 1051 /// e.g. src[012]_mod, omod, clamp. 1052 bool hasModifiers(unsigned Opcode) const; 1053 1054 bool hasModifiersSet(const MachineInstr &MI, 1055 unsigned OpName) const; 1056 bool hasAnyModifiersSet(const MachineInstr &MI) const; 1057 1058 bool canShrink(const MachineInstr &MI, 1059 const MachineRegisterInfo &MRI) const; 1060 1061 MachineInstr *buildShrunkInst(MachineInstr &MI, 1062 unsigned NewOpcode) const; 1063 1064 bool verifyInstruction(const MachineInstr &MI, 1065 StringRef &ErrInfo) const override; 1066 1067 unsigned getVALUOp(const MachineInstr &MI) const; 1068 1069 void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, 1070 MachineBasicBlock::iterator MBBI, 1071 const DebugLoc &DL, Register Reg, bool IsSCCLive, 1072 SlotIndexes *Indexes = nullptr) const; 1073 1074 void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, 1075 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 1076 Register Reg, SlotIndexes *Indexes = nullptr) const; 1077 1078 /// Return the correct register class for \p OpNo. For target-specific 1079 /// instructions, this will return the register class that has been defined 1080 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return 1081 /// the register class of its machine operand. 1082 /// to infer the correct register class base on the other operands. 1083 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, 1084 unsigned OpNo) const; 1085 1086 /// Return the size in bytes of the operand OpNo on the given 1087 // instruction opcode. 1088 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { 1089 const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo]; 1090 1091 if (OpInfo.RegClass == -1) { 1092 // If this is an immediate operand, this must be a 32-bit literal. 1093 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); 1094 return 4; 1095 } 1096 1097 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; 1098 } 1099 1100 /// This form should usually be preferred since it handles operands 1101 /// with unknown register classes. 1102 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { 1103 const MachineOperand &MO = MI.getOperand(OpNo); 1104 if (MO.isReg()) { 1105 if (unsigned SubReg = MO.getSubReg()) { 1106 return RI.getSubRegIdxSize(SubReg) / 8; 1107 } 1108 } 1109 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; 1110 } 1111 1112 /// Legalize the \p OpIndex operand of this instruction by inserting 1113 /// a MOV. For example: 1114 /// ADD_I32_e32 VGPR0, 15 1115 /// to 1116 /// MOV VGPR1, 15 1117 /// ADD_I32_e32 VGPR0, VGPR1 1118 /// 1119 /// If the operand being legalized is a register, then a COPY will be used 1120 /// instead of MOV. 1121 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; 1122 1123 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand 1124 /// for \p MI. 1125 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 1126 const MachineOperand *MO = nullptr) const; 1127 1128 /// Check if \p MO would be a valid operand for the given operand 1129 /// definition \p OpInfo. Note this does not attempt to validate constant bus 1130 /// restrictions (e.g. literal constant usage). 1131 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, 1132 const MCOperandInfo &OpInfo, 1133 const MachineOperand &MO) const; 1134 1135 /// Check if \p MO (a register operand) is a legal register for the 1136 /// given operand description. 1137 bool isLegalRegOperand(const MachineRegisterInfo &MRI, 1138 const MCOperandInfo &OpInfo, 1139 const MachineOperand &MO) const; 1140 1141 /// Legalize operands in \p MI by either commuting it or inserting a 1142 /// copy of src1. 1143 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1144 1145 /// Fix operands in \p MI to satisfy constant bus requirements. 1146 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1147 1148 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only 1149 /// be used when it is know that the value in SrcReg is same across all 1150 /// threads in the wave. 1151 /// \returns The SGPR register that \p SrcReg was copied to. 1152 Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, 1153 MachineRegisterInfo &MRI) const; 1154 1155 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1156 void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1157 1158 void legalizeGenericOperand(MachineBasicBlock &InsertMBB, 1159 MachineBasicBlock::iterator I, 1160 const TargetRegisterClass *DstRC, 1161 MachineOperand &Op, MachineRegisterInfo &MRI, 1162 const DebugLoc &DL) const; 1163 1164 /// Legalize all operands in this instruction. This function may create new 1165 /// instructions and control-flow around \p MI. If present, \p MDT is 1166 /// updated. 1167 /// \returns A new basic block that contains \p MI if new blocks were created. 1168 MachineBasicBlock * 1169 legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; 1170 1171 /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand 1172 /// was moved to VGPR. \returns true if succeeded. 1173 bool moveFlatAddrToVGPR(MachineInstr &Inst) const; 1174 1175 /// Replace the instructions opcode with the equivalent VALU 1176 /// opcode. This function will also move the users of MachineInstruntions 1177 /// in the \p WorkList to the VALU if necessary. If present, \p MDT is 1178 /// updated. 1179 void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const; 1180 1181 void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, 1182 MachineInstr &Inst) const; 1183 1184 void insertNoop(MachineBasicBlock &MBB, 1185 MachineBasicBlock::iterator MI) const override; 1186 1187 void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1188 unsigned Quantity) const override; 1189 1190 void insertReturn(MachineBasicBlock &MBB) const; 1191 /// Return the number of wait states that result from executing this 1192 /// instruction. 1193 static unsigned getNumWaitStates(const MachineInstr &MI); 1194 1195 /// Returns the operand named \p Op. If \p MI does not have an 1196 /// operand named \c Op, this function returns nullptr. 1197 LLVM_READONLY 1198 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; 1199 1200 LLVM_READONLY 1201 const MachineOperand *getNamedOperand(const MachineInstr &MI, 1202 unsigned OpName) const { 1203 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName); 1204 } 1205 1206 /// Get required immediate operand 1207 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const { 1208 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); 1209 return MI.getOperand(Idx).getImm(); 1210 } 1211 1212 uint64_t getDefaultRsrcDataFormat() const; 1213 uint64_t getScratchRsrcWords23() const; 1214 1215 bool isLowLatencyInstruction(const MachineInstr &MI) const; 1216 bool isHighLatencyDef(int Opc) const override; 1217 1218 /// Return the descriptor of the target-specific machine instruction 1219 /// that corresponds to the specified pseudo or native opcode. 1220 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { 1221 return get(pseudoToMCOpcode(Opcode)); 1222 } 1223 1224 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1225 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1226 1227 unsigned isLoadFromStackSlot(const MachineInstr &MI, 1228 int &FrameIndex) const override; 1229 unsigned isStoreToStackSlot(const MachineInstr &MI, 1230 int &FrameIndex) const override; 1231 1232 unsigned getInstBundleSize(const MachineInstr &MI) const; 1233 unsigned getInstSizeInBytes(const MachineInstr &MI) const override; 1234 1235 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; 1236 1237 bool isNonUniformBranchInstr(MachineInstr &Instr) const; 1238 1239 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, 1240 MachineBasicBlock *IfEnd) const; 1241 1242 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, 1243 MachineBasicBlock *LoopEnd) const; 1244 1245 std::pair<unsigned, unsigned> 1246 decomposeMachineOperandsTargetFlags(unsigned TF) const override; 1247 1248 ArrayRef<std::pair<int, const char *>> 1249 getSerializableTargetIndices() const override; 1250 1251 ArrayRef<std::pair<unsigned, const char *>> 1252 getSerializableDirectMachineOperandTargetFlags() const override; 1253 1254 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 1255 getSerializableMachineMemOperandTargetFlags() const override; 1256 1257 ScheduleHazardRecognizer * 1258 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 1259 const ScheduleDAG *DAG) const override; 1260 1261 ScheduleHazardRecognizer * 1262 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; 1263 1264 ScheduleHazardRecognizer * 1265 CreateTargetMIHazardRecognizer(const InstrItineraryData *II, 1266 const ScheduleDAGMI *DAG) const override; 1267 1268 unsigned getLiveRangeSplitOpcode(Register Reg, 1269 const MachineFunction &MF) const override; 1270 1271 bool isBasicBlockPrologue(const MachineInstr &MI, 1272 Register Reg = Register()) const override; 1273 1274 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, 1275 MachineBasicBlock::iterator InsPt, 1276 const DebugLoc &DL, Register Src, 1277 Register Dst) const override; 1278 1279 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, 1280 MachineBasicBlock::iterator InsPt, 1281 const DebugLoc &DL, Register Src, 1282 unsigned SrcSubReg, 1283 Register Dst) const override; 1284 1285 bool isWave32() const; 1286 1287 /// Return a partially built integer add instruction without carry. 1288 /// Caller must add source operands. 1289 /// For pre-GFX9 it will generate unused carry destination operand. 1290 /// TODO: After GFX9 it should return a no-carry operation. 1291 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1292 MachineBasicBlock::iterator I, 1293 const DebugLoc &DL, 1294 Register DestReg) const; 1295 1296 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1297 MachineBasicBlock::iterator I, 1298 const DebugLoc &DL, 1299 Register DestReg, 1300 RegScavenger &RS) const; 1301 1302 static bool isKillTerminator(unsigned Opcode); 1303 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; 1304 1305 bool isLegalMUBUFImmOffset(unsigned Imm) const; 1306 1307 static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST); 1308 1309 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 1310 Align Alignment = Align(4)) const; 1311 1312 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT 1313 /// encoded instruction. If \p Signed, this is for an instruction that 1314 /// interprets the offset as signed. 1315 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, 1316 uint64_t FlatVariant) const; 1317 1318 /// Split \p COffsetVal into {immediate offset field, remainder offset} 1319 /// values. 1320 std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal, 1321 unsigned AddrSpace, 1322 uint64_t FlatVariant) const; 1323 1324 /// Returns true if negative offsets are allowed for the given \p FlatVariant. 1325 bool allowNegativeFlatOffset(uint64_t FlatVariant) const; 1326 1327 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. 1328 /// Return -1 if the target-specific opcode for the pseudo instruction does 1329 /// not exist. If Opcode is not a pseudo instruction, this is identity. 1330 int pseudoToMCOpcode(int Opcode) const; 1331 1332 /// \brief Check if this instruction should only be used by assembler. 1333 /// Return true if this opcode should not be used by codegen. 1334 bool isAsmOnlyOpcode(int MCOp) const; 1335 1336 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, 1337 const TargetRegisterInfo *TRI, 1338 const MachineFunction &MF) 1339 const override; 1340 1341 void fixImplicitOperands(MachineInstr &MI) const; 1342 1343 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, 1344 ArrayRef<unsigned> Ops, 1345 MachineBasicBlock::iterator InsertPt, 1346 int FrameIndex, 1347 LiveIntervals *LIS = nullptr, 1348 VirtRegMap *VRM = nullptr) const override; 1349 1350 unsigned getInstrLatency(const InstrItineraryData *ItinData, 1351 const MachineInstr &MI, 1352 unsigned *PredCost = nullptr) const override; 1353 1354 InstructionUniformity 1355 getInstructionUniformity(const MachineInstr &MI) const override final; 1356 1357 InstructionUniformity 1358 getGenericInstructionUniformity(const MachineInstr &MI) const; 1359 1360 const MIRFormatter *getMIRFormatter() const override { 1361 if (!Formatter.get()) 1362 Formatter = std::make_unique<AMDGPUMIRFormatter>(); 1363 return Formatter.get(); 1364 } 1365 1366 static unsigned getDSShaderTypeValue(const MachineFunction &MF); 1367 1368 const TargetSchedModel &getSchedModel() const { return SchedModel; } 1369 1370 // Enforce operand's \p OpName even alignment if required by target. 1371 // This is used if an operand is a 32 bit register but needs to be aligned 1372 // regardless. 1373 void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const; 1374 }; 1375 1376 /// \brief Returns true if a reg:subreg pair P has a TRC class 1377 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, 1378 const TargetRegisterClass &TRC, 1379 MachineRegisterInfo &MRI) { 1380 auto *RC = MRI.getRegClass(P.Reg); 1381 if (!P.SubReg) 1382 return RC == &TRC; 1383 auto *TRI = MRI.getTargetRegisterInfo(); 1384 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg); 1385 } 1386 1387 /// \brief Create RegSubRegPair from a register MachineOperand 1388 inline 1389 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) { 1390 assert(O.isReg()); 1391 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg()); 1392 } 1393 1394 /// \brief Return the SubReg component from REG_SEQUENCE 1395 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, 1396 unsigned SubReg); 1397 1398 /// \brief Return the defining instruction for a given reg:subreg pair 1399 /// skipping copy like instructions and subreg-manipulation pseudos. 1400 /// Following another subreg of a reg:subreg isn't supported. 1401 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, 1402 MachineRegisterInfo &MRI); 1403 1404 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1405 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not 1406 /// attempt to track between blocks. 1407 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, 1408 Register VReg, 1409 const MachineInstr &DefMI, 1410 const MachineInstr &UseMI); 1411 1412 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1413 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to 1414 /// track between blocks. 1415 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, 1416 Register VReg, 1417 const MachineInstr &DefMI); 1418 1419 namespace AMDGPU { 1420 1421 LLVM_READONLY 1422 int getVOPe64(uint16_t Opcode); 1423 1424 LLVM_READONLY 1425 int getVOPe32(uint16_t Opcode); 1426 1427 LLVM_READONLY 1428 int getSDWAOp(uint16_t Opcode); 1429 1430 LLVM_READONLY 1431 int getDPPOp32(uint16_t Opcode); 1432 1433 LLVM_READONLY 1434 int getDPPOp64(uint16_t Opcode); 1435 1436 LLVM_READONLY 1437 int getBasicFromSDWAOp(uint16_t Opcode); 1438 1439 LLVM_READONLY 1440 int getCommuteRev(uint16_t Opcode); 1441 1442 LLVM_READONLY 1443 int getCommuteOrig(uint16_t Opcode); 1444 1445 LLVM_READONLY 1446 int getAddr64Inst(uint16_t Opcode); 1447 1448 /// Check if \p Opcode is an Addr64 opcode. 1449 /// 1450 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1. 1451 LLVM_READONLY 1452 int getIfAddr64Inst(uint16_t Opcode); 1453 1454 LLVM_READONLY 1455 int getAtomicNoRetOp(uint16_t Opcode); 1456 1457 LLVM_READONLY 1458 int getSOPKOp(uint16_t Opcode); 1459 1460 /// \returns SADDR form of a FLAT Global instruction given an \p Opcode 1461 /// of a VADDR form. 1462 LLVM_READONLY 1463 int getGlobalSaddrOp(uint16_t Opcode); 1464 1465 /// \returns VADDR form of a FLAT Global instruction given an \p Opcode 1466 /// of a SADDR form. 1467 LLVM_READONLY 1468 int getGlobalVaddrOp(uint16_t Opcode); 1469 1470 LLVM_READONLY 1471 int getVCMPXNoSDstOp(uint16_t Opcode); 1472 1473 /// \returns ST form with only immediate offset of a FLAT Scratch instruction 1474 /// given an \p Opcode of an SS (SADDR) form. 1475 LLVM_READONLY 1476 int getFlatScratchInstSTfromSS(uint16_t Opcode); 1477 1478 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1479 /// of an SVS (SADDR + VADDR) form. 1480 LLVM_READONLY 1481 int getFlatScratchInstSVfromSVS(uint16_t Opcode); 1482 1483 /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode 1484 /// of an SV (VADDR) form. 1485 LLVM_READONLY 1486 int getFlatScratchInstSSfromSV(uint16_t Opcode); 1487 1488 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1489 /// of an SS (SADDR) form. 1490 LLVM_READONLY 1491 int getFlatScratchInstSVfromSS(uint16_t Opcode); 1492 1493 /// \returns earlyclobber version of a MAC MFMA is exists. 1494 LLVM_READONLY 1495 int getMFMAEarlyClobberOp(uint16_t Opcode); 1496 1497 /// \returns v_cmpx version of a v_cmp instruction. 1498 LLVM_READONLY 1499 int getVCMPXOpFromVCMP(uint16_t Opcode); 1500 1501 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; 1502 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); 1503 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); 1504 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); 1505 1506 } // end namespace AMDGPU 1507 1508 namespace AMDGPU { 1509 enum AsmComments { 1510 // For sgpr to vgpr spill instructions 1511 SGPR_SPILL = MachineInstr::TAsmComments 1512 }; 1513 } // namespace AMDGPU 1514 1515 namespace SI { 1516 namespace KernelInputOffsets { 1517 1518 /// Offsets in bytes from the start of the input buffer 1519 enum Offsets { 1520 NGROUPS_X = 0, 1521 NGROUPS_Y = 4, 1522 NGROUPS_Z = 8, 1523 GLOBAL_SIZE_X = 12, 1524 GLOBAL_SIZE_Y = 16, 1525 GLOBAL_SIZE_Z = 20, 1526 LOCAL_SIZE_X = 24, 1527 LOCAL_SIZE_Y = 28, 1528 LOCAL_SIZE_Z = 32 1529 }; 1530 1531 } // end namespace KernelInputOffsets 1532 } // end namespace SI 1533 1534 } // end namespace llvm 1535 1536 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 1537