1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition for SIInstrInfo. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 16 17 #include "AMDGPUMIRFormatter.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIRegisterInfo.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/CodeGen/TargetInstrInfo.h" 23 #include "llvm/CodeGen/TargetSchedule.h" 24 25 #define GET_INSTRINFO_HEADER 26 #include "AMDGPUGenInstrInfo.inc" 27 28 namespace llvm { 29 30 class APInt; 31 class GCNSubtarget; 32 class LiveVariables; 33 class MachineDominatorTree; 34 class MachineRegisterInfo; 35 class RegScavenger; 36 class TargetRegisterClass; 37 class ScheduleHazardRecognizer; 38 39 /// Mark the MMO of a uniform load if there are no potentially clobbering stores 40 /// on any path from the start of an entry function to this load. 41 static const MachineMemOperand::Flags MONoClobber = 42 MachineMemOperand::MOTargetFlag1; 43 44 /// Mark the MMO of a load as the last use. 45 static const MachineMemOperand::Flags MOLastUse = 46 MachineMemOperand::MOTargetFlag2; 47 48 /// Utility to store machine instructions worklist. 49 struct SIInstrWorklist { 50 SIInstrWorklist() = default; 51 52 void insert(MachineInstr *MI); 53 54 MachineInstr *top() const { 55 auto iter = InstrList.begin(); 56 return *iter; 57 } 58 59 void erase_top() { 60 auto iter = InstrList.begin(); 61 InstrList.erase(iter); 62 } 63 64 bool empty() const { return InstrList.empty(); } 65 66 void clear() { 67 InstrList.clear(); 68 DeferredList.clear(); 69 } 70 71 bool isDeferred(MachineInstr *MI); 72 73 SetVector<MachineInstr *> &getDeferredList() { return DeferredList; } 74 75 private: 76 /// InstrList contains the MachineInstrs. 77 SetVector<MachineInstr *> InstrList; 78 /// Deferred instructions are specific MachineInstr 79 /// that will be added by insert method. 80 SetVector<MachineInstr *> DeferredList; 81 }; 82 83 class SIInstrInfo final : public AMDGPUGenInstrInfo { 84 private: 85 const SIRegisterInfo RI; 86 const GCNSubtarget &ST; 87 TargetSchedModel SchedModel; 88 mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter; 89 90 // The inverse predicate should have the negative value. 91 enum BranchPredicate { 92 INVALID_BR = 0, 93 SCC_TRUE = 1, 94 SCC_FALSE = -1, 95 VCCNZ = 2, 96 VCCZ = -2, 97 EXECNZ = -3, 98 EXECZ = 3 99 }; 100 101 using SetVectorType = SmallSetVector<MachineInstr *, 32>; 102 103 static unsigned getBranchOpcode(BranchPredicate Cond); 104 static BranchPredicate getBranchPredicate(unsigned Opcode); 105 106 public: 107 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, 108 MachineRegisterInfo &MRI, 109 const MachineOperand &SuperReg, 110 const TargetRegisterClass *SuperRC, 111 unsigned SubIdx, 112 const TargetRegisterClass *SubRC) const; 113 MachineOperand buildExtractSubRegOrImm( 114 MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, 115 const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, 116 unsigned SubIdx, const TargetRegisterClass *SubRC) const; 117 118 private: 119 void swapOperands(MachineInstr &Inst) const; 120 121 std::pair<bool, MachineBasicBlock *> 122 moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst, 123 MachineDominatorTree *MDT = nullptr) const; 124 125 void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, 126 MachineDominatorTree *MDT = nullptr) const; 127 128 void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 129 130 void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 131 132 void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst, 133 unsigned Opcode) const; 134 135 void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst, 136 unsigned Opcode) const; 137 138 void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 139 unsigned Opcode, bool Swap = false) const; 140 141 void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 142 unsigned Opcode, 143 MachineDominatorTree *MDT = nullptr) const; 144 145 void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst, 146 MachineDominatorTree *MDT) const; 147 148 void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst, 149 MachineDominatorTree *MDT) const; 150 151 void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst, 152 MachineDominatorTree *MDT = nullptr) const; 153 154 void splitScalar64BitBCNT(SIInstrWorklist &Worklist, 155 MachineInstr &Inst) const; 156 void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 157 void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 158 unsigned Opcode, 159 MachineDominatorTree *MDT = nullptr) const; 160 void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI, 161 MachineInstr &Inst) const; 162 163 void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI, 164 SIInstrWorklist &Worklist) const; 165 166 void addSCCDefUsersToVALUWorklist(MachineOperand &Op, 167 MachineInstr &SCCDefInst, 168 SIInstrWorklist &Worklist, 169 Register NewCond = Register()) const; 170 void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, 171 SIInstrWorklist &Worklist) const; 172 173 const TargetRegisterClass * 174 getDestEquivalentVGPRClass(const MachineInstr &Inst) const; 175 176 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, 177 const MachineInstr &MIb) const; 178 179 Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; 180 181 protected: 182 /// If the specific machine instruction is a instruction that moves/copies 183 /// value from one register to another register return destination and source 184 /// registers as machine operands. 185 std::optional<DestSourcePair> 186 isCopyInstrImpl(const MachineInstr &MI) const override; 187 188 bool swapSourceModifiers(MachineInstr &MI, 189 MachineOperand &Src0, unsigned Src0OpName, 190 MachineOperand &Src1, unsigned Src1OpName) const; 191 192 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, 193 unsigned OpIdx0, 194 unsigned OpIdx1) const override; 195 196 public: 197 enum TargetOperandFlags { 198 MO_MASK = 0xf, 199 200 MO_NONE = 0, 201 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. 202 MO_GOTPCREL = 1, 203 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. 204 MO_GOTPCREL32 = 2, 205 MO_GOTPCREL32_LO = 2, 206 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. 207 MO_GOTPCREL32_HI = 3, 208 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. 209 MO_REL32 = 4, 210 MO_REL32_LO = 4, 211 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. 212 MO_REL32_HI = 5, 213 214 MO_FAR_BRANCH_OFFSET = 6, 215 216 MO_ABS32_LO = 8, 217 MO_ABS32_HI = 9, 218 }; 219 220 explicit SIInstrInfo(const GCNSubtarget &ST); 221 222 const SIRegisterInfo &getRegisterInfo() const { 223 return RI; 224 } 225 226 const GCNSubtarget &getSubtarget() const { 227 return ST; 228 } 229 230 bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; 231 232 bool isIgnorableUse(const MachineOperand &MO) const override; 233 234 bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, 235 MachineCycleInfo *CI) const override; 236 237 bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, 238 int64_t &Offset1) const override; 239 240 bool getMemOperandsWithOffsetWidth( 241 const MachineInstr &LdSt, 242 SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset, 243 bool &OffsetIsScalable, unsigned &Width, 244 const TargetRegisterInfo *TRI) const final; 245 246 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, 247 int64_t Offset1, bool OffsetIsScalable1, 248 ArrayRef<const MachineOperand *> BaseOps2, 249 int64_t Offset2, bool OffsetIsScalable2, 250 unsigned ClusterSize, 251 unsigned NumBytes) const override; 252 253 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, 254 int64_t Offset1, unsigned NumLoads) const override; 255 256 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 257 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, 258 bool KillSrc) const override; 259 260 void materializeImmediate(MachineBasicBlock &MBB, 261 MachineBasicBlock::iterator MI, const DebugLoc &DL, 262 Register DestReg, int64_t Value) const; 263 264 const TargetRegisterClass *getPreferredSelectRegClass( 265 unsigned Size) const; 266 267 Register insertNE(MachineBasicBlock *MBB, 268 MachineBasicBlock::iterator I, const DebugLoc &DL, 269 Register SrcReg, int Value) const; 270 271 Register insertEQ(MachineBasicBlock *MBB, 272 MachineBasicBlock::iterator I, const DebugLoc &DL, 273 Register SrcReg, int Value) const; 274 275 void storeRegToStackSlot(MachineBasicBlock &MBB, 276 MachineBasicBlock::iterator MI, Register SrcReg, 277 bool isKill, int FrameIndex, 278 const TargetRegisterClass *RC, 279 const TargetRegisterInfo *TRI, 280 Register VReg) const override; 281 282 void loadRegFromStackSlot(MachineBasicBlock &MBB, 283 MachineBasicBlock::iterator MI, Register DestReg, 284 int FrameIndex, const TargetRegisterClass *RC, 285 const TargetRegisterInfo *TRI, 286 Register VReg) const override; 287 288 bool expandPostRAPseudo(MachineInstr &MI) const override; 289 290 void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 291 Register DestReg, unsigned SubIdx, 292 const MachineInstr &Orig, 293 const TargetRegisterInfo &TRI) const override; 294 295 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp 296 // instructions. Returns a pair of generated instructions. 297 // Can split either post-RA with physical registers or pre-RA with 298 // virtual registers. In latter case IR needs to be in SSA form and 299 // and a REG_SEQUENCE is produced to define original register. 300 std::pair<MachineInstr*, MachineInstr*> 301 expandMovDPP64(MachineInstr &MI) const; 302 303 // Returns an opcode that can be used to move a value to a \p DstRC 304 // register. If there is no hardware instruction that can store to \p 305 // DstRC, then AMDGPU::COPY is returned. 306 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; 307 308 const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize, 309 unsigned EltSize, 310 bool IsSGPR) const; 311 312 const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize, 313 bool IsIndirectSrc) const; 314 LLVM_READONLY 315 int commuteOpcode(unsigned Opc) const; 316 317 LLVM_READONLY 318 inline int commuteOpcode(const MachineInstr &MI) const { 319 return commuteOpcode(MI.getOpcode()); 320 } 321 322 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, 323 unsigned &SrcOpIdx1) const override; 324 325 bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0, 326 unsigned &SrcOpIdx1) const; 327 328 bool isBranchOffsetInRange(unsigned BranchOpc, 329 int64_t BrOffset) const override; 330 331 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; 332 333 /// Return whether the block terminate with divergent branch. 334 /// Note this only work before lowering the pseudo control flow instructions. 335 bool hasDivergentBranch(const MachineBasicBlock *MBB) const; 336 337 void insertIndirectBranch(MachineBasicBlock &MBB, 338 MachineBasicBlock &NewDestBB, 339 MachineBasicBlock &RestoreBB, const DebugLoc &DL, 340 int64_t BrOffset, RegScavenger *RS) const override; 341 342 bool analyzeBranchImpl(MachineBasicBlock &MBB, 343 MachineBasicBlock::iterator I, 344 MachineBasicBlock *&TBB, 345 MachineBasicBlock *&FBB, 346 SmallVectorImpl<MachineOperand> &Cond, 347 bool AllowModify) const; 348 349 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 350 MachineBasicBlock *&FBB, 351 SmallVectorImpl<MachineOperand> &Cond, 352 bool AllowModify = false) const override; 353 354 unsigned removeBranch(MachineBasicBlock &MBB, 355 int *BytesRemoved = nullptr) const override; 356 357 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 358 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, 359 const DebugLoc &DL, 360 int *BytesAdded = nullptr) const override; 361 362 bool reverseBranchCondition( 363 SmallVectorImpl<MachineOperand> &Cond) const override; 364 365 bool canInsertSelect(const MachineBasicBlock &MBB, 366 ArrayRef<MachineOperand> Cond, Register DstReg, 367 Register TrueReg, Register FalseReg, int &CondCycles, 368 int &TrueCycles, int &FalseCycles) const override; 369 370 void insertSelect(MachineBasicBlock &MBB, 371 MachineBasicBlock::iterator I, const DebugLoc &DL, 372 Register DstReg, ArrayRef<MachineOperand> Cond, 373 Register TrueReg, Register FalseReg) const override; 374 375 void insertVectorSelect(MachineBasicBlock &MBB, 376 MachineBasicBlock::iterator I, const DebugLoc &DL, 377 Register DstReg, ArrayRef<MachineOperand> Cond, 378 Register TrueReg, Register FalseReg) const; 379 380 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, 381 Register &SrcReg2, int64_t &CmpMask, 382 int64_t &CmpValue) const override; 383 384 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, 385 Register SrcReg2, int64_t CmpMask, int64_t CmpValue, 386 const MachineRegisterInfo *MRI) const override; 387 388 bool 389 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 390 const MachineInstr &MIb) const override; 391 392 static bool isFoldableCopy(const MachineInstr &MI); 393 394 void removeModOperands(MachineInstr &MI) const; 395 396 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, 397 MachineRegisterInfo *MRI) const final; 398 399 unsigned getMachineCSELookAheadLimit() const override { return 500; } 400 401 MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, 402 LiveIntervals *LIS) const override; 403 404 bool isSchedulingBoundary(const MachineInstr &MI, 405 const MachineBasicBlock *MBB, 406 const MachineFunction &MF) const override; 407 408 static bool isSALU(const MachineInstr &MI) { 409 return MI.getDesc().TSFlags & SIInstrFlags::SALU; 410 } 411 412 bool isSALU(uint16_t Opcode) const { 413 return get(Opcode).TSFlags & SIInstrFlags::SALU; 414 } 415 416 static bool isVALU(const MachineInstr &MI) { 417 return MI.getDesc().TSFlags & SIInstrFlags::VALU; 418 } 419 420 bool isVALU(uint16_t Opcode) const { 421 return get(Opcode).TSFlags & SIInstrFlags::VALU; 422 } 423 424 static bool isImage(const MachineInstr &MI) { 425 return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI); 426 } 427 428 bool isImage(uint16_t Opcode) const { 429 return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode); 430 } 431 432 static bool isVMEM(const MachineInstr &MI) { 433 return isMUBUF(MI) || isMTBUF(MI) || isImage(MI); 434 } 435 436 bool isVMEM(uint16_t Opcode) const { 437 return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode); 438 } 439 440 static bool isSOP1(const MachineInstr &MI) { 441 return MI.getDesc().TSFlags & SIInstrFlags::SOP1; 442 } 443 444 bool isSOP1(uint16_t Opcode) const { 445 return get(Opcode).TSFlags & SIInstrFlags::SOP1; 446 } 447 448 static bool isSOP2(const MachineInstr &MI) { 449 return MI.getDesc().TSFlags & SIInstrFlags::SOP2; 450 } 451 452 bool isSOP2(uint16_t Opcode) const { 453 return get(Opcode).TSFlags & SIInstrFlags::SOP2; 454 } 455 456 static bool isSOPC(const MachineInstr &MI) { 457 return MI.getDesc().TSFlags & SIInstrFlags::SOPC; 458 } 459 460 bool isSOPC(uint16_t Opcode) const { 461 return get(Opcode).TSFlags & SIInstrFlags::SOPC; 462 } 463 464 static bool isSOPK(const MachineInstr &MI) { 465 return MI.getDesc().TSFlags & SIInstrFlags::SOPK; 466 } 467 468 bool isSOPK(uint16_t Opcode) const { 469 return get(Opcode).TSFlags & SIInstrFlags::SOPK; 470 } 471 472 static bool isSOPP(const MachineInstr &MI) { 473 return MI.getDesc().TSFlags & SIInstrFlags::SOPP; 474 } 475 476 bool isSOPP(uint16_t Opcode) const { 477 return get(Opcode).TSFlags & SIInstrFlags::SOPP; 478 } 479 480 static bool isPacked(const MachineInstr &MI) { 481 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked; 482 } 483 484 bool isPacked(uint16_t Opcode) const { 485 return get(Opcode).TSFlags & SIInstrFlags::IsPacked; 486 } 487 488 static bool isVOP1(const MachineInstr &MI) { 489 return MI.getDesc().TSFlags & SIInstrFlags::VOP1; 490 } 491 492 bool isVOP1(uint16_t Opcode) const { 493 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 494 } 495 496 static bool isVOP2(const MachineInstr &MI) { 497 return MI.getDesc().TSFlags & SIInstrFlags::VOP2; 498 } 499 500 bool isVOP2(uint16_t Opcode) const { 501 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 502 } 503 504 static bool isVOP3(const MachineInstr &MI) { 505 return MI.getDesc().TSFlags & SIInstrFlags::VOP3; 506 } 507 508 bool isVOP3(uint16_t Opcode) const { 509 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 510 } 511 512 static bool isSDWA(const MachineInstr &MI) { 513 return MI.getDesc().TSFlags & SIInstrFlags::SDWA; 514 } 515 516 bool isSDWA(uint16_t Opcode) const { 517 return get(Opcode).TSFlags & SIInstrFlags::SDWA; 518 } 519 520 static bool isVOPC(const MachineInstr &MI) { 521 return MI.getDesc().TSFlags & SIInstrFlags::VOPC; 522 } 523 524 bool isVOPC(uint16_t Opcode) const { 525 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 526 } 527 528 static bool isMUBUF(const MachineInstr &MI) { 529 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; 530 } 531 532 bool isMUBUF(uint16_t Opcode) const { 533 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; 534 } 535 536 static bool isMTBUF(const MachineInstr &MI) { 537 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; 538 } 539 540 bool isMTBUF(uint16_t Opcode) const { 541 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; 542 } 543 544 static bool isSMRD(const MachineInstr &MI) { 545 return MI.getDesc().TSFlags & SIInstrFlags::SMRD; 546 } 547 548 bool isSMRD(uint16_t Opcode) const { 549 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 550 } 551 552 bool isBufferSMRD(const MachineInstr &MI) const; 553 554 static bool isDS(const MachineInstr &MI) { 555 return MI.getDesc().TSFlags & SIInstrFlags::DS; 556 } 557 558 bool isDS(uint16_t Opcode) const { 559 return get(Opcode).TSFlags & SIInstrFlags::DS; 560 } 561 562 static bool isLDSDMA(const MachineInstr &MI) { 563 return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI)); 564 } 565 566 bool isLDSDMA(uint16_t Opcode) { 567 return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode)); 568 } 569 570 static bool isGWS(const MachineInstr &MI) { 571 return MI.getDesc().TSFlags & SIInstrFlags::GWS; 572 } 573 574 bool isGWS(uint16_t Opcode) const { 575 return get(Opcode).TSFlags & SIInstrFlags::GWS; 576 } 577 578 bool isAlwaysGDS(uint16_t Opcode) const; 579 580 static bool isMIMG(const MachineInstr &MI) { 581 return MI.getDesc().TSFlags & SIInstrFlags::MIMG; 582 } 583 584 bool isMIMG(uint16_t Opcode) const { 585 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 586 } 587 588 static bool isVIMAGE(const MachineInstr &MI) { 589 return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE; 590 } 591 592 bool isVIMAGE(uint16_t Opcode) const { 593 return get(Opcode).TSFlags & SIInstrFlags::VIMAGE; 594 } 595 596 static bool isVSAMPLE(const MachineInstr &MI) { 597 return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE; 598 } 599 600 bool isVSAMPLE(uint16_t Opcode) const { 601 return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE; 602 } 603 604 static bool isGather4(const MachineInstr &MI) { 605 return MI.getDesc().TSFlags & SIInstrFlags::Gather4; 606 } 607 608 bool isGather4(uint16_t Opcode) const { 609 return get(Opcode).TSFlags & SIInstrFlags::Gather4; 610 } 611 612 static bool isFLAT(const MachineInstr &MI) { 613 return MI.getDesc().TSFlags & SIInstrFlags::FLAT; 614 } 615 616 // Is a FLAT encoded instruction which accesses a specific segment, 617 // i.e. global_* or scratch_*. 618 static bool isSegmentSpecificFLAT(const MachineInstr &MI) { 619 auto Flags = MI.getDesc().TSFlags; 620 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 621 } 622 623 bool isSegmentSpecificFLAT(uint16_t Opcode) const { 624 auto Flags = get(Opcode).TSFlags; 625 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 626 } 627 628 static bool isFLATGlobal(const MachineInstr &MI) { 629 return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal; 630 } 631 632 bool isFLATGlobal(uint16_t Opcode) const { 633 return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal; 634 } 635 636 static bool isFLATScratch(const MachineInstr &MI) { 637 return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch; 638 } 639 640 bool isFLATScratch(uint16_t Opcode) const { 641 return get(Opcode).TSFlags & SIInstrFlags::FlatScratch; 642 } 643 644 // Any FLAT encoded instruction, including global_* and scratch_*. 645 bool isFLAT(uint16_t Opcode) const { 646 return get(Opcode).TSFlags & SIInstrFlags::FLAT; 647 } 648 649 static bool isEXP(const MachineInstr &MI) { 650 return MI.getDesc().TSFlags & SIInstrFlags::EXP; 651 } 652 653 static bool isDualSourceBlendEXP(const MachineInstr &MI) { 654 if (!isEXP(MI)) 655 return false; 656 unsigned Target = MI.getOperand(0).getImm(); 657 return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 || 658 Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1; 659 } 660 661 bool isEXP(uint16_t Opcode) const { 662 return get(Opcode).TSFlags & SIInstrFlags::EXP; 663 } 664 665 static bool isAtomicNoRet(const MachineInstr &MI) { 666 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet; 667 } 668 669 bool isAtomicNoRet(uint16_t Opcode) const { 670 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet; 671 } 672 673 static bool isAtomicRet(const MachineInstr &MI) { 674 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet; 675 } 676 677 bool isAtomicRet(uint16_t Opcode) const { 678 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet; 679 } 680 681 static bool isAtomic(const MachineInstr &MI) { 682 return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet | 683 SIInstrFlags::IsAtomicNoRet); 684 } 685 686 bool isAtomic(uint16_t Opcode) const { 687 return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet | 688 SIInstrFlags::IsAtomicNoRet); 689 } 690 691 static bool mayWriteLDSThroughDMA(const MachineInstr &MI) { 692 return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD; 693 } 694 695 static bool isWQM(const MachineInstr &MI) { 696 return MI.getDesc().TSFlags & SIInstrFlags::WQM; 697 } 698 699 bool isWQM(uint16_t Opcode) const { 700 return get(Opcode).TSFlags & SIInstrFlags::WQM; 701 } 702 703 static bool isDisableWQM(const MachineInstr &MI) { 704 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; 705 } 706 707 bool isDisableWQM(uint16_t Opcode) const { 708 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; 709 } 710 711 static bool isVGPRSpill(const MachineInstr &MI) { 712 return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill; 713 } 714 715 bool isVGPRSpill(uint16_t Opcode) const { 716 return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; 717 } 718 719 static bool isSGPRSpill(const MachineInstr &MI) { 720 return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill; 721 } 722 723 bool isSGPRSpill(uint16_t Opcode) const { 724 return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill; 725 } 726 727 bool isSpillOpcode(uint16_t Opcode) const { 728 return get(Opcode).TSFlags & 729 (SIInstrFlags::SGPRSpill | SIInstrFlags::VGPRSpill); 730 } 731 732 static bool isWWMRegSpillOpcode(uint16_t Opcode) { 733 return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE || 734 Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE || 735 Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE || 736 Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE; 737 } 738 739 static bool isChainCallOpcode(uint64_t Opcode) { 740 return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 || 741 Opcode == AMDGPU::SI_CS_CHAIN_TC_W64; 742 } 743 744 static bool isDPP(const MachineInstr &MI) { 745 return MI.getDesc().TSFlags & SIInstrFlags::DPP; 746 } 747 748 bool isDPP(uint16_t Opcode) const { 749 return get(Opcode).TSFlags & SIInstrFlags::DPP; 750 } 751 752 static bool isTRANS(const MachineInstr &MI) { 753 return MI.getDesc().TSFlags & SIInstrFlags::TRANS; 754 } 755 756 bool isTRANS(uint16_t Opcode) const { 757 return get(Opcode).TSFlags & SIInstrFlags::TRANS; 758 } 759 760 static bool isVOP3P(const MachineInstr &MI) { 761 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; 762 } 763 764 bool isVOP3P(uint16_t Opcode) const { 765 return get(Opcode).TSFlags & SIInstrFlags::VOP3P; 766 } 767 768 static bool isVINTRP(const MachineInstr &MI) { 769 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; 770 } 771 772 bool isVINTRP(uint16_t Opcode) const { 773 return get(Opcode).TSFlags & SIInstrFlags::VINTRP; 774 } 775 776 static bool isMAI(const MachineInstr &MI) { 777 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI; 778 } 779 780 bool isMAI(uint16_t Opcode) const { 781 return get(Opcode).TSFlags & SIInstrFlags::IsMAI; 782 } 783 784 static bool isMFMA(const MachineInstr &MI) { 785 return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 786 MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; 787 } 788 789 static bool isDOT(const MachineInstr &MI) { 790 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; 791 } 792 793 static bool isWMMA(const MachineInstr &MI) { 794 return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA; 795 } 796 797 bool isWMMA(uint16_t Opcode) const { 798 return get(Opcode).TSFlags & SIInstrFlags::IsWMMA; 799 } 800 801 static bool isMFMAorWMMA(const MachineInstr &MI) { 802 return isMFMA(MI) || isWMMA(MI); 803 } 804 805 static bool isSWMMAC(const MachineInstr &MI) { 806 return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC; 807 } 808 809 bool isSWMMAC(uint16_t Opcode) const { 810 return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC; 811 } 812 813 bool isDOT(uint16_t Opcode) const { 814 return get(Opcode).TSFlags & SIInstrFlags::IsDOT; 815 } 816 817 static bool isLDSDIR(const MachineInstr &MI) { 818 return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR; 819 } 820 821 bool isLDSDIR(uint16_t Opcode) const { 822 return get(Opcode).TSFlags & SIInstrFlags::LDSDIR; 823 } 824 825 static bool isVINTERP(const MachineInstr &MI) { 826 return MI.getDesc().TSFlags & SIInstrFlags::VINTERP; 827 } 828 829 bool isVINTERP(uint16_t Opcode) const { 830 return get(Opcode).TSFlags & SIInstrFlags::VINTERP; 831 } 832 833 static bool isScalarUnit(const MachineInstr &MI) { 834 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); 835 } 836 837 static bool usesVM_CNT(const MachineInstr &MI) { 838 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; 839 } 840 841 static bool usesLGKM_CNT(const MachineInstr &MI) { 842 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; 843 } 844 845 static bool sopkIsZext(const MachineInstr &MI) { 846 return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT; 847 } 848 849 bool sopkIsZext(uint16_t Opcode) const { 850 return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT; 851 } 852 853 /// \returns true if this is an s_store_dword* instruction. This is more 854 /// specific than isSMEM && mayStore. 855 static bool isScalarStore(const MachineInstr &MI) { 856 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; 857 } 858 859 bool isScalarStore(uint16_t Opcode) const { 860 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; 861 } 862 863 static bool isFixedSize(const MachineInstr &MI) { 864 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; 865 } 866 867 bool isFixedSize(uint16_t Opcode) const { 868 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; 869 } 870 871 static bool hasFPClamp(const MachineInstr &MI) { 872 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; 873 } 874 875 bool hasFPClamp(uint16_t Opcode) const { 876 return get(Opcode).TSFlags & SIInstrFlags::FPClamp; 877 } 878 879 static bool hasIntClamp(const MachineInstr &MI) { 880 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp; 881 } 882 883 uint64_t getClampMask(const MachineInstr &MI) const { 884 const uint64_t ClampFlags = SIInstrFlags::FPClamp | 885 SIInstrFlags::IntClamp | 886 SIInstrFlags::ClampLo | 887 SIInstrFlags::ClampHi; 888 return MI.getDesc().TSFlags & ClampFlags; 889 } 890 891 static bool usesFPDPRounding(const MachineInstr &MI) { 892 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding; 893 } 894 895 bool usesFPDPRounding(uint16_t Opcode) const { 896 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; 897 } 898 899 static bool isFPAtomic(const MachineInstr &MI) { 900 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; 901 } 902 903 bool isFPAtomic(uint16_t Opcode) const { 904 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; 905 } 906 907 static bool isNeverUniform(const MachineInstr &MI) { 908 return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform; 909 } 910 911 static bool doesNotReadTiedSource(const MachineInstr &MI) { 912 return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; 913 } 914 915 bool doesNotReadTiedSource(uint16_t Opcode) const { 916 return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead; 917 } 918 919 static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) { 920 switch (Opcode) { 921 case AMDGPU::S_WAITCNT_soft: 922 return AMDGPU::S_WAITCNT; 923 case AMDGPU::S_WAITCNT_VSCNT_soft: 924 return AMDGPU::S_WAITCNT_VSCNT; 925 case AMDGPU::S_WAIT_LOADCNT_soft: 926 return AMDGPU::S_WAIT_LOADCNT; 927 case AMDGPU::S_WAIT_STORECNT_soft: 928 return AMDGPU::S_WAIT_STORECNT; 929 case AMDGPU::S_WAIT_SAMPLECNT_soft: 930 return AMDGPU::S_WAIT_SAMPLECNT; 931 case AMDGPU::S_WAIT_BVHCNT_soft: 932 return AMDGPU::S_WAIT_BVHCNT; 933 case AMDGPU::S_WAIT_DSCNT_soft: 934 return AMDGPU::S_WAIT_DSCNT; 935 default: 936 return Opcode; 937 } 938 } 939 940 bool isVGPRCopy(const MachineInstr &MI) const { 941 assert(isCopyInstr(MI)); 942 Register Dest = MI.getOperand(0).getReg(); 943 const MachineFunction &MF = *MI.getParent()->getParent(); 944 const MachineRegisterInfo &MRI = MF.getRegInfo(); 945 return !RI.isSGPRReg(MRI, Dest); 946 } 947 948 bool hasVGPRUses(const MachineInstr &MI) const { 949 const MachineFunction &MF = *MI.getParent()->getParent(); 950 const MachineRegisterInfo &MRI = MF.getRegInfo(); 951 return llvm::any_of(MI.explicit_uses(), 952 [&MRI, this](const MachineOperand &MO) { 953 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());}); 954 } 955 956 /// Return true if the instruction modifies the mode register.q 957 static bool modifiesModeRegister(const MachineInstr &MI); 958 959 /// Whether we must prevent this instruction from executing with EXEC = 0. 960 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; 961 962 /// Returns true if the instruction could potentially depend on the value of 963 /// exec. If false, exec dependencies may safely be ignored. 964 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; 965 966 bool isInlineConstant(const APInt &Imm) const; 967 968 bool isInlineConstant(const APFloat &Imm) const { 969 return isInlineConstant(Imm.bitcastToAPInt()); 970 } 971 972 // Returns true if this non-register operand definitely does not need to be 973 // encoded as a 32-bit literal. Note that this function handles all kinds of 974 // operands, not just immediates. 975 // 976 // Some operands like FrameIndexes could resolve to an inline immediate value 977 // that will not require an additional 4-bytes; this function assumes that it 978 // will. 979 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; 980 981 bool isInlineConstant(const MachineOperand &MO, 982 const MCOperandInfo &OpInfo) const { 983 return isInlineConstant(MO, OpInfo.OperandType); 984 } 985 986 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would 987 /// be an inline immediate. 988 bool isInlineConstant(const MachineInstr &MI, 989 const MachineOperand &UseMO, 990 const MachineOperand &DefMO) const { 991 assert(UseMO.getParent() == &MI); 992 int OpIdx = UseMO.getOperandNo(); 993 if (OpIdx >= MI.getDesc().NumOperands) 994 return false; 995 996 return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]); 997 } 998 999 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline 1000 /// immediate. 1001 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const { 1002 const MachineOperand &MO = MI.getOperand(OpIdx); 1003 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 1004 } 1005 1006 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, 1007 const MachineOperand &MO) const { 1008 if (OpIdx >= MI.getDesc().NumOperands) 1009 return false; 1010 1011 if (isCopyInstr(MI)) { 1012 unsigned Size = getOpSize(MI, OpIdx); 1013 assert(Size == 8 || Size == 4); 1014 1015 uint8_t OpType = (Size == 8) ? 1016 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32; 1017 return isInlineConstant(MO, OpType); 1018 } 1019 1020 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 1021 } 1022 1023 bool isInlineConstant(const MachineOperand &MO) const { 1024 return isInlineConstant(*MO.getParent(), MO.getOperandNo()); 1025 } 1026 1027 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 1028 const MachineOperand &MO) const; 1029 1030 /// Return true if this 64-bit VALU instruction has a 32-bit encoding. 1031 /// This function will return false if you pass it a 32-bit instruction. 1032 bool hasVALU32BitEncoding(unsigned Opcode) const; 1033 1034 /// Returns true if this operand uses the constant bus. 1035 bool usesConstantBus(const MachineRegisterInfo &MRI, 1036 const MachineOperand &MO, 1037 const MCOperandInfo &OpInfo) const; 1038 1039 /// Return true if this instruction has any modifiers. 1040 /// e.g. src[012]_mod, omod, clamp. 1041 bool hasModifiers(unsigned Opcode) const; 1042 1043 bool hasModifiersSet(const MachineInstr &MI, 1044 unsigned OpName) const; 1045 bool hasAnyModifiersSet(const MachineInstr &MI) const; 1046 1047 bool canShrink(const MachineInstr &MI, 1048 const MachineRegisterInfo &MRI) const; 1049 1050 MachineInstr *buildShrunkInst(MachineInstr &MI, 1051 unsigned NewOpcode) const; 1052 1053 bool verifyInstruction(const MachineInstr &MI, 1054 StringRef &ErrInfo) const override; 1055 1056 unsigned getVALUOp(const MachineInstr &MI) const; 1057 1058 void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, 1059 MachineBasicBlock::iterator MBBI, 1060 const DebugLoc &DL, Register Reg, bool IsSCCLive, 1061 SlotIndexes *Indexes = nullptr) const; 1062 1063 void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, 1064 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 1065 Register Reg, SlotIndexes *Indexes = nullptr) const; 1066 1067 /// Return the correct register class for \p OpNo. For target-specific 1068 /// instructions, this will return the register class that has been defined 1069 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return 1070 /// the register class of its machine operand. 1071 /// to infer the correct register class base on the other operands. 1072 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, 1073 unsigned OpNo) const; 1074 1075 /// Return the size in bytes of the operand OpNo on the given 1076 // instruction opcode. 1077 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { 1078 const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo]; 1079 1080 if (OpInfo.RegClass == -1) { 1081 // If this is an immediate operand, this must be a 32-bit literal. 1082 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); 1083 return 4; 1084 } 1085 1086 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; 1087 } 1088 1089 /// This form should usually be preferred since it handles operands 1090 /// with unknown register classes. 1091 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { 1092 const MachineOperand &MO = MI.getOperand(OpNo); 1093 if (MO.isReg()) { 1094 if (unsigned SubReg = MO.getSubReg()) { 1095 return RI.getSubRegIdxSize(SubReg) / 8; 1096 } 1097 } 1098 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; 1099 } 1100 1101 /// Legalize the \p OpIndex operand of this instruction by inserting 1102 /// a MOV. For example: 1103 /// ADD_I32_e32 VGPR0, 15 1104 /// to 1105 /// MOV VGPR1, 15 1106 /// ADD_I32_e32 VGPR0, VGPR1 1107 /// 1108 /// If the operand being legalized is a register, then a COPY will be used 1109 /// instead of MOV. 1110 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; 1111 1112 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand 1113 /// for \p MI. 1114 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 1115 const MachineOperand *MO = nullptr) const; 1116 1117 /// Check if \p MO would be a valid operand for the given operand 1118 /// definition \p OpInfo. Note this does not attempt to validate constant bus 1119 /// restrictions (e.g. literal constant usage). 1120 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, 1121 const MCOperandInfo &OpInfo, 1122 const MachineOperand &MO) const; 1123 1124 /// Check if \p MO (a register operand) is a legal register for the 1125 /// given operand description. 1126 bool isLegalRegOperand(const MachineRegisterInfo &MRI, 1127 const MCOperandInfo &OpInfo, 1128 const MachineOperand &MO) const; 1129 1130 /// Legalize operands in \p MI by either commuting it or inserting a 1131 /// copy of src1. 1132 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1133 1134 /// Fix operands in \p MI to satisfy constant bus requirements. 1135 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1136 1137 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only 1138 /// be used when it is know that the value in SrcReg is same across all 1139 /// threads in the wave. 1140 /// \returns The SGPR register that \p SrcReg was copied to. 1141 Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, 1142 MachineRegisterInfo &MRI) const; 1143 1144 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1145 void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1146 1147 void legalizeGenericOperand(MachineBasicBlock &InsertMBB, 1148 MachineBasicBlock::iterator I, 1149 const TargetRegisterClass *DstRC, 1150 MachineOperand &Op, MachineRegisterInfo &MRI, 1151 const DebugLoc &DL) const; 1152 1153 /// Legalize all operands in this instruction. This function may create new 1154 /// instructions and control-flow around \p MI. If present, \p MDT is 1155 /// updated. 1156 /// \returns A new basic block that contains \p MI if new blocks were created. 1157 MachineBasicBlock * 1158 legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; 1159 1160 /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand 1161 /// was moved to VGPR. \returns true if succeeded. 1162 bool moveFlatAddrToVGPR(MachineInstr &Inst) const; 1163 1164 /// Replace the instructions opcode with the equivalent VALU 1165 /// opcode. This function will also move the users of MachineInstruntions 1166 /// in the \p WorkList to the VALU if necessary. If present, \p MDT is 1167 /// updated. 1168 void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const; 1169 1170 void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, 1171 MachineInstr &Inst) const; 1172 1173 void insertNoop(MachineBasicBlock &MBB, 1174 MachineBasicBlock::iterator MI) const override; 1175 1176 void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1177 unsigned Quantity) const override; 1178 1179 void insertReturn(MachineBasicBlock &MBB) const; 1180 /// Return the number of wait states that result from executing this 1181 /// instruction. 1182 static unsigned getNumWaitStates(const MachineInstr &MI); 1183 1184 /// Returns the operand named \p Op. If \p MI does not have an 1185 /// operand named \c Op, this function returns nullptr. 1186 LLVM_READONLY 1187 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; 1188 1189 LLVM_READONLY 1190 const MachineOperand *getNamedOperand(const MachineInstr &MI, 1191 unsigned OpName) const { 1192 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName); 1193 } 1194 1195 /// Get required immediate operand 1196 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const { 1197 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); 1198 return MI.getOperand(Idx).getImm(); 1199 } 1200 1201 uint64_t getDefaultRsrcDataFormat() const; 1202 uint64_t getScratchRsrcWords23() const; 1203 1204 bool isLowLatencyInstruction(const MachineInstr &MI) const; 1205 bool isHighLatencyDef(int Opc) const override; 1206 1207 /// Return the descriptor of the target-specific machine instruction 1208 /// that corresponds to the specified pseudo or native opcode. 1209 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { 1210 return get(pseudoToMCOpcode(Opcode)); 1211 } 1212 1213 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1214 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1215 1216 unsigned isLoadFromStackSlot(const MachineInstr &MI, 1217 int &FrameIndex) const override; 1218 unsigned isStoreToStackSlot(const MachineInstr &MI, 1219 int &FrameIndex) const override; 1220 1221 unsigned getInstBundleSize(const MachineInstr &MI) const; 1222 unsigned getInstSizeInBytes(const MachineInstr &MI) const override; 1223 1224 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; 1225 1226 bool isNonUniformBranchInstr(MachineInstr &Instr) const; 1227 1228 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, 1229 MachineBasicBlock *IfEnd) const; 1230 1231 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, 1232 MachineBasicBlock *LoopEnd) const; 1233 1234 std::pair<unsigned, unsigned> 1235 decomposeMachineOperandsTargetFlags(unsigned TF) const override; 1236 1237 ArrayRef<std::pair<int, const char *>> 1238 getSerializableTargetIndices() const override; 1239 1240 ArrayRef<std::pair<unsigned, const char *>> 1241 getSerializableDirectMachineOperandTargetFlags() const override; 1242 1243 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 1244 getSerializableMachineMemOperandTargetFlags() const override; 1245 1246 ScheduleHazardRecognizer * 1247 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 1248 const ScheduleDAG *DAG) const override; 1249 1250 ScheduleHazardRecognizer * 1251 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; 1252 1253 ScheduleHazardRecognizer * 1254 CreateTargetMIHazardRecognizer(const InstrItineraryData *II, 1255 const ScheduleDAGMI *DAG) const override; 1256 1257 unsigned getLiveRangeSplitOpcode(Register Reg, 1258 const MachineFunction &MF) const override; 1259 1260 bool isBasicBlockPrologue(const MachineInstr &MI, 1261 Register Reg = Register()) const override; 1262 1263 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, 1264 MachineBasicBlock::iterator InsPt, 1265 const DebugLoc &DL, Register Src, 1266 Register Dst) const override; 1267 1268 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, 1269 MachineBasicBlock::iterator InsPt, 1270 const DebugLoc &DL, Register Src, 1271 unsigned SrcSubReg, 1272 Register Dst) const override; 1273 1274 bool isWave32() const; 1275 1276 /// Return a partially built integer add instruction without carry. 1277 /// Caller must add source operands. 1278 /// For pre-GFX9 it will generate unused carry destination operand. 1279 /// TODO: After GFX9 it should return a no-carry operation. 1280 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1281 MachineBasicBlock::iterator I, 1282 const DebugLoc &DL, 1283 Register DestReg) const; 1284 1285 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1286 MachineBasicBlock::iterator I, 1287 const DebugLoc &DL, 1288 Register DestReg, 1289 RegScavenger &RS) const; 1290 1291 static bool isKillTerminator(unsigned Opcode); 1292 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; 1293 1294 bool isLegalMUBUFImmOffset(unsigned Imm) const; 1295 1296 static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST); 1297 1298 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 1299 Align Alignment = Align(4)) const; 1300 1301 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT 1302 /// encoded instruction. If \p Signed, this is for an instruction that 1303 /// interprets the offset as signed. 1304 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, 1305 uint64_t FlatVariant) const; 1306 1307 /// Split \p COffsetVal into {immediate offset field, remainder offset} 1308 /// values. 1309 std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal, 1310 unsigned AddrSpace, 1311 uint64_t FlatVariant) const; 1312 1313 /// Returns true if negative offsets are allowed for the given \p FlatVariant. 1314 bool allowNegativeFlatOffset(uint64_t FlatVariant) const; 1315 1316 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. 1317 /// Return -1 if the target-specific opcode for the pseudo instruction does 1318 /// not exist. If Opcode is not a pseudo instruction, this is identity. 1319 int pseudoToMCOpcode(int Opcode) const; 1320 1321 /// \brief Check if this instruction should only be used by assembler. 1322 /// Return true if this opcode should not be used by codegen. 1323 bool isAsmOnlyOpcode(int MCOp) const; 1324 1325 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, 1326 const TargetRegisterInfo *TRI, 1327 const MachineFunction &MF) 1328 const override; 1329 1330 void fixImplicitOperands(MachineInstr &MI) const; 1331 1332 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, 1333 ArrayRef<unsigned> Ops, 1334 MachineBasicBlock::iterator InsertPt, 1335 int FrameIndex, 1336 LiveIntervals *LIS = nullptr, 1337 VirtRegMap *VRM = nullptr) const override; 1338 1339 unsigned getInstrLatency(const InstrItineraryData *ItinData, 1340 const MachineInstr &MI, 1341 unsigned *PredCost = nullptr) const override; 1342 1343 InstructionUniformity 1344 getInstructionUniformity(const MachineInstr &MI) const override final; 1345 1346 InstructionUniformity 1347 getGenericInstructionUniformity(const MachineInstr &MI) const; 1348 1349 const MIRFormatter *getMIRFormatter() const override { 1350 if (!Formatter.get()) 1351 Formatter = std::make_unique<AMDGPUMIRFormatter>(); 1352 return Formatter.get(); 1353 } 1354 1355 static unsigned getDSShaderTypeValue(const MachineFunction &MF); 1356 1357 const TargetSchedModel &getSchedModel() const { return SchedModel; } 1358 1359 // Enforce operand's \p OpName even alignment if required by target. 1360 // This is used if an operand is a 32 bit register but needs to be aligned 1361 // regardless. 1362 void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const; 1363 }; 1364 1365 /// \brief Returns true if a reg:subreg pair P has a TRC class 1366 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, 1367 const TargetRegisterClass &TRC, 1368 MachineRegisterInfo &MRI) { 1369 auto *RC = MRI.getRegClass(P.Reg); 1370 if (!P.SubReg) 1371 return RC == &TRC; 1372 auto *TRI = MRI.getTargetRegisterInfo(); 1373 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg); 1374 } 1375 1376 /// \brief Create RegSubRegPair from a register MachineOperand 1377 inline 1378 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) { 1379 assert(O.isReg()); 1380 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg()); 1381 } 1382 1383 /// \brief Return the SubReg component from REG_SEQUENCE 1384 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, 1385 unsigned SubReg); 1386 1387 /// \brief Return the defining instruction for a given reg:subreg pair 1388 /// skipping copy like instructions and subreg-manipulation pseudos. 1389 /// Following another subreg of a reg:subreg isn't supported. 1390 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, 1391 MachineRegisterInfo &MRI); 1392 1393 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1394 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not 1395 /// attempt to track between blocks. 1396 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, 1397 Register VReg, 1398 const MachineInstr &DefMI, 1399 const MachineInstr &UseMI); 1400 1401 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1402 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to 1403 /// track between blocks. 1404 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, 1405 Register VReg, 1406 const MachineInstr &DefMI); 1407 1408 namespace AMDGPU { 1409 1410 LLVM_READONLY 1411 int getVOPe64(uint16_t Opcode); 1412 1413 LLVM_READONLY 1414 int getVOPe32(uint16_t Opcode); 1415 1416 LLVM_READONLY 1417 int getSDWAOp(uint16_t Opcode); 1418 1419 LLVM_READONLY 1420 int getDPPOp32(uint16_t Opcode); 1421 1422 LLVM_READONLY 1423 int getDPPOp64(uint16_t Opcode); 1424 1425 LLVM_READONLY 1426 int getBasicFromSDWAOp(uint16_t Opcode); 1427 1428 LLVM_READONLY 1429 int getCommuteRev(uint16_t Opcode); 1430 1431 LLVM_READONLY 1432 int getCommuteOrig(uint16_t Opcode); 1433 1434 LLVM_READONLY 1435 int getAddr64Inst(uint16_t Opcode); 1436 1437 /// Check if \p Opcode is an Addr64 opcode. 1438 /// 1439 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1. 1440 LLVM_READONLY 1441 int getIfAddr64Inst(uint16_t Opcode); 1442 1443 LLVM_READONLY 1444 int getAtomicNoRetOp(uint16_t Opcode); 1445 1446 LLVM_READONLY 1447 int getSOPKOp(uint16_t Opcode); 1448 1449 /// \returns SADDR form of a FLAT Global instruction given an \p Opcode 1450 /// of a VADDR form. 1451 LLVM_READONLY 1452 int getGlobalSaddrOp(uint16_t Opcode); 1453 1454 /// \returns VADDR form of a FLAT Global instruction given an \p Opcode 1455 /// of a SADDR form. 1456 LLVM_READONLY 1457 int getGlobalVaddrOp(uint16_t Opcode); 1458 1459 LLVM_READONLY 1460 int getVCMPXNoSDstOp(uint16_t Opcode); 1461 1462 /// \returns ST form with only immediate offset of a FLAT Scratch instruction 1463 /// given an \p Opcode of an SS (SADDR) form. 1464 LLVM_READONLY 1465 int getFlatScratchInstSTfromSS(uint16_t Opcode); 1466 1467 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1468 /// of an SVS (SADDR + VADDR) form. 1469 LLVM_READONLY 1470 int getFlatScratchInstSVfromSVS(uint16_t Opcode); 1471 1472 /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode 1473 /// of an SV (VADDR) form. 1474 LLVM_READONLY 1475 int getFlatScratchInstSSfromSV(uint16_t Opcode); 1476 1477 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1478 /// of an SS (SADDR) form. 1479 LLVM_READONLY 1480 int getFlatScratchInstSVfromSS(uint16_t Opcode); 1481 1482 /// \returns earlyclobber version of a MAC MFMA is exists. 1483 LLVM_READONLY 1484 int getMFMAEarlyClobberOp(uint16_t Opcode); 1485 1486 /// \returns v_cmpx version of a v_cmp instruction. 1487 LLVM_READONLY 1488 int getVCMPXOpFromVCMP(uint16_t Opcode); 1489 1490 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; 1491 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); 1492 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); 1493 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); 1494 1495 } // end namespace AMDGPU 1496 1497 namespace AMDGPU { 1498 enum AsmComments { 1499 // For sgpr to vgpr spill instructions 1500 SGPR_SPILL = MachineInstr::TAsmComments 1501 }; 1502 } // namespace AMDGPU 1503 1504 namespace SI { 1505 namespace KernelInputOffsets { 1506 1507 /// Offsets in bytes from the start of the input buffer 1508 enum Offsets { 1509 NGROUPS_X = 0, 1510 NGROUPS_Y = 4, 1511 NGROUPS_Z = 8, 1512 GLOBAL_SIZE_X = 12, 1513 GLOBAL_SIZE_Y = 16, 1514 GLOBAL_SIZE_Z = 20, 1515 LOCAL_SIZE_X = 24, 1516 LOCAL_SIZE_Y = 28, 1517 LOCAL_SIZE_Z = 32 1518 }; 1519 1520 } // end namespace KernelInputOffsets 1521 } // end namespace SI 1522 1523 } // end namespace llvm 1524 1525 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 1526