1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition for SIInstrInfo. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 16 17 #include "AMDGPUMIRFormatter.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIRegisterInfo.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/CodeGen/TargetInstrInfo.h" 23 #include "llvm/CodeGen/TargetSchedule.h" 24 25 #define GET_INSTRINFO_HEADER 26 #include "AMDGPUGenInstrInfo.inc" 27 28 namespace llvm { 29 30 class APInt; 31 class GCNSubtarget; 32 class LiveVariables; 33 class MachineDominatorTree; 34 class MachineRegisterInfo; 35 class RegScavenger; 36 class TargetRegisterClass; 37 class ScheduleHazardRecognizer; 38 39 /// Mark the MMO of a uniform load if there are no potentially clobbering stores 40 /// on any path from the start of an entry function to this load. 41 static const MachineMemOperand::Flags MONoClobber = 42 MachineMemOperand::MOTargetFlag1; 43 44 class SIInstrInfo final : public AMDGPUGenInstrInfo { 45 private: 46 const SIRegisterInfo RI; 47 const GCNSubtarget &ST; 48 TargetSchedModel SchedModel; 49 mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter; 50 51 // The inverse predicate should have the negative value. 52 enum BranchPredicate { 53 INVALID_BR = 0, 54 SCC_TRUE = 1, 55 SCC_FALSE = -1, 56 VCCNZ = 2, 57 VCCZ = -2, 58 EXECNZ = -3, 59 EXECZ = 3 60 }; 61 62 using SetVectorType = SmallSetVector<MachineInstr *, 32>; 63 64 static unsigned getBranchOpcode(BranchPredicate Cond); 65 static BranchPredicate getBranchPredicate(unsigned Opcode); 66 67 public: 68 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, 69 MachineRegisterInfo &MRI, 70 MachineOperand &SuperReg, 71 const TargetRegisterClass *SuperRC, 72 unsigned SubIdx, 73 const TargetRegisterClass *SubRC) const; 74 MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, 75 MachineRegisterInfo &MRI, 76 MachineOperand &SuperReg, 77 const TargetRegisterClass *SuperRC, 78 unsigned SubIdx, 79 const TargetRegisterClass *SubRC) const; 80 private: 81 void swapOperands(MachineInstr &Inst) const; 82 83 std::pair<bool, MachineBasicBlock *> 84 moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, 85 MachineDominatorTree *MDT = nullptr) const; 86 87 void lowerSelect(SetVectorType &Worklist, MachineInstr &Inst, 88 MachineDominatorTree *MDT = nullptr) const; 89 90 void lowerScalarAbs(SetVectorType &Worklist, 91 MachineInstr &Inst) const; 92 93 void lowerScalarXnor(SetVectorType &Worklist, 94 MachineInstr &Inst) const; 95 96 void splitScalarNotBinop(SetVectorType &Worklist, 97 MachineInstr &Inst, 98 unsigned Opcode) const; 99 100 void splitScalarBinOpN2(SetVectorType &Worklist, 101 MachineInstr &Inst, 102 unsigned Opcode) const; 103 104 void splitScalar64BitUnaryOp(SetVectorType &Worklist, 105 MachineInstr &Inst, unsigned Opcode, 106 bool Swap = false) const; 107 108 void splitScalar64BitAddSub(SetVectorType &Worklist, MachineInstr &Inst, 109 MachineDominatorTree *MDT = nullptr) const; 110 111 void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst, 112 unsigned Opcode, 113 MachineDominatorTree *MDT = nullptr) const; 114 115 void splitScalar64BitXnor(SetVectorType &Worklist, MachineInstr &Inst, 116 MachineDominatorTree *MDT = nullptr) const; 117 118 void splitScalar64BitBCNT(SetVectorType &Worklist, 119 MachineInstr &Inst) const; 120 void splitScalar64BitBFE(SetVectorType &Worklist, 121 MachineInstr &Inst) const; 122 void movePackToVALU(SetVectorType &Worklist, 123 MachineRegisterInfo &MRI, 124 MachineInstr &Inst) const; 125 126 void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI, 127 SetVectorType &Worklist) const; 128 129 void addSCCDefUsersToVALUWorklist(MachineOperand &Op, 130 MachineInstr &SCCDefInst, 131 SetVectorType &Worklist, 132 Register NewCond = Register()) const; 133 void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, 134 SetVectorType &Worklist) const; 135 136 const TargetRegisterClass * 137 getDestEquivalentVGPRClass(const MachineInstr &Inst) const; 138 139 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, 140 const MachineInstr &MIb) const; 141 142 Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; 143 144 protected: 145 bool swapSourceModifiers(MachineInstr &MI, 146 MachineOperand &Src0, unsigned Src0OpName, 147 MachineOperand &Src1, unsigned Src1OpName) const; 148 149 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, 150 unsigned OpIdx0, 151 unsigned OpIdx1) const override; 152 153 public: 154 enum TargetOperandFlags { 155 MO_MASK = 0xf, 156 157 MO_NONE = 0, 158 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. 159 MO_GOTPCREL = 1, 160 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. 161 MO_GOTPCREL32 = 2, 162 MO_GOTPCREL32_LO = 2, 163 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. 164 MO_GOTPCREL32_HI = 3, 165 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. 166 MO_REL32 = 4, 167 MO_REL32_LO = 4, 168 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. 169 MO_REL32_HI = 5, 170 171 MO_FAR_BRANCH_OFFSET = 6, 172 173 MO_ABS32_LO = 8, 174 MO_ABS32_HI = 9, 175 }; 176 177 explicit SIInstrInfo(const GCNSubtarget &ST); 178 179 const SIRegisterInfo &getRegisterInfo() const { 180 return RI; 181 } 182 183 const GCNSubtarget &getSubtarget() const { 184 return ST; 185 } 186 187 bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; 188 189 bool isIgnorableUse(const MachineOperand &MO) const override; 190 191 bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, 192 int64_t &Offset1) const override; 193 194 bool getMemOperandsWithOffsetWidth( 195 const MachineInstr &LdSt, 196 SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset, 197 bool &OffsetIsScalable, unsigned &Width, 198 const TargetRegisterInfo *TRI) const final; 199 200 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, 201 ArrayRef<const MachineOperand *> BaseOps2, 202 unsigned NumLoads, unsigned NumBytes) const override; 203 204 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, 205 int64_t Offset1, unsigned NumLoads) const override; 206 207 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 208 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, 209 bool KillSrc) const override; 210 211 void materializeImmediate(MachineBasicBlock &MBB, 212 MachineBasicBlock::iterator MI, const DebugLoc &DL, 213 Register DestReg, int64_t Value) const; 214 215 const TargetRegisterClass *getPreferredSelectRegClass( 216 unsigned Size) const; 217 218 Register insertNE(MachineBasicBlock *MBB, 219 MachineBasicBlock::iterator I, const DebugLoc &DL, 220 Register SrcReg, int Value) const; 221 222 Register insertEQ(MachineBasicBlock *MBB, 223 MachineBasicBlock::iterator I, const DebugLoc &DL, 224 Register SrcReg, int Value) const; 225 226 void storeRegToStackSlot(MachineBasicBlock &MBB, 227 MachineBasicBlock::iterator MI, Register SrcReg, 228 bool isKill, int FrameIndex, 229 const TargetRegisterClass *RC, 230 const TargetRegisterInfo *TRI, 231 Register VReg) const override; 232 233 void loadRegFromStackSlot(MachineBasicBlock &MBB, 234 MachineBasicBlock::iterator MI, Register DestReg, 235 int FrameIndex, const TargetRegisterClass *RC, 236 const TargetRegisterInfo *TRI, 237 Register VReg) const override; 238 239 bool expandPostRAPseudo(MachineInstr &MI) const override; 240 241 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp 242 // instructions. Returns a pair of generated instructions. 243 // Can split either post-RA with physical registers or pre-RA with 244 // virtual registers. In latter case IR needs to be in SSA form and 245 // and a REG_SEQUENCE is produced to define original register. 246 std::pair<MachineInstr*, MachineInstr*> 247 expandMovDPP64(MachineInstr &MI) const; 248 249 // Returns an opcode that can be used to move a value to a \p DstRC 250 // register. If there is no hardware instruction that can store to \p 251 // DstRC, then AMDGPU::COPY is returned. 252 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; 253 254 const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize, 255 unsigned EltSize, 256 bool IsSGPR) const; 257 258 const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize, 259 bool IsIndirectSrc) const; 260 LLVM_READONLY 261 int commuteOpcode(unsigned Opc) const; 262 263 LLVM_READONLY 264 inline int commuteOpcode(const MachineInstr &MI) const { 265 return commuteOpcode(MI.getOpcode()); 266 } 267 268 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, 269 unsigned &SrcOpIdx1) const override; 270 271 bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0, 272 unsigned &SrcOpIdx1) const; 273 274 bool isBranchOffsetInRange(unsigned BranchOpc, 275 int64_t BrOffset) const override; 276 277 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; 278 279 /// Return whether the block terminate with divergent branch. 280 /// Note this only work before lowering the pseudo control flow instructions. 281 bool hasDivergentBranch(const MachineBasicBlock *MBB) const; 282 283 void insertIndirectBranch(MachineBasicBlock &MBB, 284 MachineBasicBlock &NewDestBB, 285 MachineBasicBlock &RestoreBB, const DebugLoc &DL, 286 int64_t BrOffset, RegScavenger *RS) const override; 287 288 bool analyzeBranchImpl(MachineBasicBlock &MBB, 289 MachineBasicBlock::iterator I, 290 MachineBasicBlock *&TBB, 291 MachineBasicBlock *&FBB, 292 SmallVectorImpl<MachineOperand> &Cond, 293 bool AllowModify) const; 294 295 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 296 MachineBasicBlock *&FBB, 297 SmallVectorImpl<MachineOperand> &Cond, 298 bool AllowModify = false) const override; 299 300 unsigned removeBranch(MachineBasicBlock &MBB, 301 int *BytesRemoved = nullptr) const override; 302 303 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 304 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, 305 const DebugLoc &DL, 306 int *BytesAdded = nullptr) const override; 307 308 bool reverseBranchCondition( 309 SmallVectorImpl<MachineOperand> &Cond) const override; 310 311 bool canInsertSelect(const MachineBasicBlock &MBB, 312 ArrayRef<MachineOperand> Cond, Register DstReg, 313 Register TrueReg, Register FalseReg, int &CondCycles, 314 int &TrueCycles, int &FalseCycles) const override; 315 316 void insertSelect(MachineBasicBlock &MBB, 317 MachineBasicBlock::iterator I, const DebugLoc &DL, 318 Register DstReg, ArrayRef<MachineOperand> Cond, 319 Register TrueReg, Register FalseReg) const override; 320 321 void insertVectorSelect(MachineBasicBlock &MBB, 322 MachineBasicBlock::iterator I, const DebugLoc &DL, 323 Register DstReg, ArrayRef<MachineOperand> Cond, 324 Register TrueReg, Register FalseReg) const; 325 326 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, 327 Register &SrcReg2, int64_t &CmpMask, 328 int64_t &CmpValue) const override; 329 330 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, 331 Register SrcReg2, int64_t CmpMask, int64_t CmpValue, 332 const MachineRegisterInfo *MRI) const override; 333 334 bool 335 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 336 const MachineInstr &MIb) const override; 337 338 static bool isFoldableCopy(const MachineInstr &MI); 339 340 void removeModOperands(MachineInstr &MI) const; 341 342 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, 343 MachineRegisterInfo *MRI) const final; 344 345 unsigned getMachineCSELookAheadLimit() const override { return 500; } 346 347 MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, 348 LiveIntervals *LIS) const override; 349 350 bool isSchedulingBoundary(const MachineInstr &MI, 351 const MachineBasicBlock *MBB, 352 const MachineFunction &MF) const override; 353 354 static bool isSALU(const MachineInstr &MI) { 355 return MI.getDesc().TSFlags & SIInstrFlags::SALU; 356 } 357 358 bool isSALU(uint16_t Opcode) const { 359 return get(Opcode).TSFlags & SIInstrFlags::SALU; 360 } 361 362 static bool isVALU(const MachineInstr &MI) { 363 return MI.getDesc().TSFlags & SIInstrFlags::VALU; 364 } 365 366 bool isVALU(uint16_t Opcode) const { 367 return get(Opcode).TSFlags & SIInstrFlags::VALU; 368 } 369 370 static bool isVMEM(const MachineInstr &MI) { 371 return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI); 372 } 373 374 bool isVMEM(uint16_t Opcode) const { 375 return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode); 376 } 377 378 static bool isSOP1(const MachineInstr &MI) { 379 return MI.getDesc().TSFlags & SIInstrFlags::SOP1; 380 } 381 382 bool isSOP1(uint16_t Opcode) const { 383 return get(Opcode).TSFlags & SIInstrFlags::SOP1; 384 } 385 386 static bool isSOP2(const MachineInstr &MI) { 387 return MI.getDesc().TSFlags & SIInstrFlags::SOP2; 388 } 389 390 bool isSOP2(uint16_t Opcode) const { 391 return get(Opcode).TSFlags & SIInstrFlags::SOP2; 392 } 393 394 static bool isSOPC(const MachineInstr &MI) { 395 return MI.getDesc().TSFlags & SIInstrFlags::SOPC; 396 } 397 398 bool isSOPC(uint16_t Opcode) const { 399 return get(Opcode).TSFlags & SIInstrFlags::SOPC; 400 } 401 402 static bool isSOPK(const MachineInstr &MI) { 403 return MI.getDesc().TSFlags & SIInstrFlags::SOPK; 404 } 405 406 bool isSOPK(uint16_t Opcode) const { 407 return get(Opcode).TSFlags & SIInstrFlags::SOPK; 408 } 409 410 static bool isSOPP(const MachineInstr &MI) { 411 return MI.getDesc().TSFlags & SIInstrFlags::SOPP; 412 } 413 414 bool isSOPP(uint16_t Opcode) const { 415 return get(Opcode).TSFlags & SIInstrFlags::SOPP; 416 } 417 418 static bool isPacked(const MachineInstr &MI) { 419 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked; 420 } 421 422 bool isPacked(uint16_t Opcode) const { 423 return get(Opcode).TSFlags & SIInstrFlags::IsPacked; 424 } 425 426 static bool isVOP1(const MachineInstr &MI) { 427 return MI.getDesc().TSFlags & SIInstrFlags::VOP1; 428 } 429 430 bool isVOP1(uint16_t Opcode) const { 431 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 432 } 433 434 static bool isVOP2(const MachineInstr &MI) { 435 return MI.getDesc().TSFlags & SIInstrFlags::VOP2; 436 } 437 438 bool isVOP2(uint16_t Opcode) const { 439 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 440 } 441 442 static bool isVOP3(const MachineInstr &MI) { 443 return MI.getDesc().TSFlags & SIInstrFlags::VOP3; 444 } 445 446 bool isVOP3(uint16_t Opcode) const { 447 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 448 } 449 450 static bool isSDWA(const MachineInstr &MI) { 451 return MI.getDesc().TSFlags & SIInstrFlags::SDWA; 452 } 453 454 bool isSDWA(uint16_t Opcode) const { 455 return get(Opcode).TSFlags & SIInstrFlags::SDWA; 456 } 457 458 static bool isVOPC(const MachineInstr &MI) { 459 return MI.getDesc().TSFlags & SIInstrFlags::VOPC; 460 } 461 462 bool isVOPC(uint16_t Opcode) const { 463 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 464 } 465 466 static bool isMUBUF(const MachineInstr &MI) { 467 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; 468 } 469 470 bool isMUBUF(uint16_t Opcode) const { 471 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; 472 } 473 474 static bool isMTBUF(const MachineInstr &MI) { 475 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; 476 } 477 478 bool isMTBUF(uint16_t Opcode) const { 479 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; 480 } 481 482 static bool isSMRD(const MachineInstr &MI) { 483 return MI.getDesc().TSFlags & SIInstrFlags::SMRD; 484 } 485 486 bool isSMRD(uint16_t Opcode) const { 487 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 488 } 489 490 bool isBufferSMRD(const MachineInstr &MI) const; 491 492 static bool isDS(const MachineInstr &MI) { 493 return MI.getDesc().TSFlags & SIInstrFlags::DS; 494 } 495 496 bool isDS(uint16_t Opcode) const { 497 return get(Opcode).TSFlags & SIInstrFlags::DS; 498 } 499 500 bool isAlwaysGDS(uint16_t Opcode) const; 501 502 static bool isMIMG(const MachineInstr &MI) { 503 return MI.getDesc().TSFlags & SIInstrFlags::MIMG; 504 } 505 506 bool isMIMG(uint16_t Opcode) const { 507 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 508 } 509 510 static bool isGather4(const MachineInstr &MI) { 511 return MI.getDesc().TSFlags & SIInstrFlags::Gather4; 512 } 513 514 bool isGather4(uint16_t Opcode) const { 515 return get(Opcode).TSFlags & SIInstrFlags::Gather4; 516 } 517 518 static bool isFLAT(const MachineInstr &MI) { 519 return MI.getDesc().TSFlags & SIInstrFlags::FLAT; 520 } 521 522 // Is a FLAT encoded instruction which accesses a specific segment, 523 // i.e. global_* or scratch_*. 524 static bool isSegmentSpecificFLAT(const MachineInstr &MI) { 525 auto Flags = MI.getDesc().TSFlags; 526 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 527 } 528 529 bool isSegmentSpecificFLAT(uint16_t Opcode) const { 530 auto Flags = get(Opcode).TSFlags; 531 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 532 } 533 534 static bool isFLATGlobal(const MachineInstr &MI) { 535 return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal; 536 } 537 538 bool isFLATGlobal(uint16_t Opcode) const { 539 return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal; 540 } 541 542 static bool isFLATScratch(const MachineInstr &MI) { 543 return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch; 544 } 545 546 bool isFLATScratch(uint16_t Opcode) const { 547 return get(Opcode).TSFlags & SIInstrFlags::FlatScratch; 548 } 549 550 // Any FLAT encoded instruction, including global_* and scratch_*. 551 bool isFLAT(uint16_t Opcode) const { 552 return get(Opcode).TSFlags & SIInstrFlags::FLAT; 553 } 554 555 static bool isEXP(const MachineInstr &MI) { 556 return MI.getDesc().TSFlags & SIInstrFlags::EXP; 557 } 558 559 static bool isDualSourceBlendEXP(const MachineInstr &MI) { 560 if (!isEXP(MI)) 561 return false; 562 unsigned Target = MI.getOperand(0).getImm(); 563 return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 || 564 Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1; 565 } 566 567 bool isEXP(uint16_t Opcode) const { 568 return get(Opcode).TSFlags & SIInstrFlags::EXP; 569 } 570 571 static bool isAtomicNoRet(const MachineInstr &MI) { 572 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet; 573 } 574 575 bool isAtomicNoRet(uint16_t Opcode) const { 576 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet; 577 } 578 579 static bool isAtomicRet(const MachineInstr &MI) { 580 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet; 581 } 582 583 bool isAtomicRet(uint16_t Opcode) const { 584 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet; 585 } 586 587 static bool isAtomic(const MachineInstr &MI) { 588 return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet | 589 SIInstrFlags::IsAtomicNoRet); 590 } 591 592 bool isAtomic(uint16_t Opcode) const { 593 return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet | 594 SIInstrFlags::IsAtomicNoRet); 595 } 596 597 static bool isWQM(const MachineInstr &MI) { 598 return MI.getDesc().TSFlags & SIInstrFlags::WQM; 599 } 600 601 bool isWQM(uint16_t Opcode) const { 602 return get(Opcode).TSFlags & SIInstrFlags::WQM; 603 } 604 605 static bool isDisableWQM(const MachineInstr &MI) { 606 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; 607 } 608 609 bool isDisableWQM(uint16_t Opcode) const { 610 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; 611 } 612 613 static bool isVGPRSpill(const MachineInstr &MI) { 614 return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill; 615 } 616 617 bool isVGPRSpill(uint16_t Opcode) const { 618 return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; 619 } 620 621 static bool isSGPRSpill(const MachineInstr &MI) { 622 return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill; 623 } 624 625 bool isSGPRSpill(uint16_t Opcode) const { 626 return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill; 627 } 628 629 static bool isDPP(const MachineInstr &MI) { 630 return MI.getDesc().TSFlags & SIInstrFlags::DPP; 631 } 632 633 bool isDPP(uint16_t Opcode) const { 634 return get(Opcode).TSFlags & SIInstrFlags::DPP; 635 } 636 637 static bool isTRANS(const MachineInstr &MI) { 638 return MI.getDesc().TSFlags & SIInstrFlags::TRANS; 639 } 640 641 bool isTRANS(uint16_t Opcode) const { 642 return get(Opcode).TSFlags & SIInstrFlags::TRANS; 643 } 644 645 static bool isVOP3P(const MachineInstr &MI) { 646 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; 647 } 648 649 bool isVOP3P(uint16_t Opcode) const { 650 return get(Opcode).TSFlags & SIInstrFlags::VOP3P; 651 } 652 653 static bool isVINTRP(const MachineInstr &MI) { 654 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; 655 } 656 657 bool isVINTRP(uint16_t Opcode) const { 658 return get(Opcode).TSFlags & SIInstrFlags::VINTRP; 659 } 660 661 static bool isMAI(const MachineInstr &MI) { 662 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI; 663 } 664 665 bool isMAI(uint16_t Opcode) const { 666 return get(Opcode).TSFlags & SIInstrFlags::IsMAI; 667 } 668 669 static bool isMFMA(const MachineInstr &MI) { 670 return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 671 MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; 672 } 673 674 static bool isDOT(const MachineInstr &MI) { 675 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; 676 } 677 678 static bool isWMMA(const MachineInstr &MI) { 679 return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA; 680 } 681 682 bool isWMMA(uint16_t Opcode) const { 683 return get(Opcode).TSFlags & SIInstrFlags::IsWMMA; 684 } 685 686 static bool isMFMAorWMMA(const MachineInstr &MI) { 687 return isMFMA(MI) || isWMMA(MI); 688 } 689 690 bool isDOT(uint16_t Opcode) const { 691 return get(Opcode).TSFlags & SIInstrFlags::IsDOT; 692 } 693 694 static bool isLDSDIR(const MachineInstr &MI) { 695 return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR; 696 } 697 698 bool isLDSDIR(uint16_t Opcode) const { 699 return get(Opcode).TSFlags & SIInstrFlags::LDSDIR; 700 } 701 702 static bool isVINTERP(const MachineInstr &MI) { 703 return MI.getDesc().TSFlags & SIInstrFlags::VINTERP; 704 } 705 706 bool isVINTERP(uint16_t Opcode) const { 707 return get(Opcode).TSFlags & SIInstrFlags::VINTERP; 708 } 709 710 static bool isScalarUnit(const MachineInstr &MI) { 711 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); 712 } 713 714 static bool usesVM_CNT(const MachineInstr &MI) { 715 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; 716 } 717 718 static bool usesLGKM_CNT(const MachineInstr &MI) { 719 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; 720 } 721 722 static bool sopkIsZext(const MachineInstr &MI) { 723 return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT; 724 } 725 726 bool sopkIsZext(uint16_t Opcode) const { 727 return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT; 728 } 729 730 /// \returns true if this is an s_store_dword* instruction. This is more 731 /// specific than isSMEM && mayStore. 732 static bool isScalarStore(const MachineInstr &MI) { 733 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; 734 } 735 736 bool isScalarStore(uint16_t Opcode) const { 737 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; 738 } 739 740 static bool isFixedSize(const MachineInstr &MI) { 741 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; 742 } 743 744 bool isFixedSize(uint16_t Opcode) const { 745 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; 746 } 747 748 static bool hasFPClamp(const MachineInstr &MI) { 749 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; 750 } 751 752 bool hasFPClamp(uint16_t Opcode) const { 753 return get(Opcode).TSFlags & SIInstrFlags::FPClamp; 754 } 755 756 static bool hasIntClamp(const MachineInstr &MI) { 757 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp; 758 } 759 760 uint64_t getClampMask(const MachineInstr &MI) const { 761 const uint64_t ClampFlags = SIInstrFlags::FPClamp | 762 SIInstrFlags::IntClamp | 763 SIInstrFlags::ClampLo | 764 SIInstrFlags::ClampHi; 765 return MI.getDesc().TSFlags & ClampFlags; 766 } 767 768 static bool usesFPDPRounding(const MachineInstr &MI) { 769 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding; 770 } 771 772 bool usesFPDPRounding(uint16_t Opcode) const { 773 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; 774 } 775 776 static bool isFPAtomic(const MachineInstr &MI) { 777 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; 778 } 779 780 bool isFPAtomic(uint16_t Opcode) const { 781 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; 782 } 783 784 static bool doesNotReadTiedSource(const MachineInstr &MI) { 785 return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; 786 } 787 788 bool doesNotReadTiedSource(uint16_t Opcode) const { 789 return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead; 790 } 791 792 bool isVGPRCopy(const MachineInstr &MI) const { 793 assert(MI.isCopy()); 794 Register Dest = MI.getOperand(0).getReg(); 795 const MachineFunction &MF = *MI.getParent()->getParent(); 796 const MachineRegisterInfo &MRI = MF.getRegInfo(); 797 return !RI.isSGPRReg(MRI, Dest); 798 } 799 800 bool hasVGPRUses(const MachineInstr &MI) const { 801 const MachineFunction &MF = *MI.getParent()->getParent(); 802 const MachineRegisterInfo &MRI = MF.getRegInfo(); 803 return llvm::any_of(MI.explicit_uses(), 804 [&MRI, this](const MachineOperand &MO) { 805 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());}); 806 } 807 808 /// Return true if the instruction modifies the mode register.q 809 static bool modifiesModeRegister(const MachineInstr &MI); 810 811 /// Whether we must prevent this instruction from executing with EXEC = 0. 812 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; 813 814 /// Returns true if the instruction could potentially depend on the value of 815 /// exec. If false, exec dependencies may safely be ignored. 816 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; 817 818 bool isInlineConstant(const APInt &Imm) const; 819 820 bool isInlineConstant(const APFloat &Imm) const { 821 return isInlineConstant(Imm.bitcastToAPInt()); 822 } 823 824 // Returns true if this non-register operand definitely does not need to be 825 // encoded as a 32-bit literal. Note that this function handles all kinds of 826 // operands, not just immediates. 827 // 828 // Some operands like FrameIndexes could resolve to an inline immediate value 829 // that will not require an additional 4-bytes; this function assumes that it 830 // will. 831 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; 832 833 bool isInlineConstant(const MachineOperand &MO, 834 const MCOperandInfo &OpInfo) const { 835 return isInlineConstant(MO, OpInfo.OperandType); 836 } 837 838 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would 839 /// be an inline immediate. 840 bool isInlineConstant(const MachineInstr &MI, 841 const MachineOperand &UseMO, 842 const MachineOperand &DefMO) const { 843 assert(UseMO.getParent() == &MI); 844 int OpIdx = MI.getOperandNo(&UseMO); 845 if (OpIdx >= MI.getDesc().NumOperands) 846 return false; 847 848 return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]); 849 } 850 851 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline 852 /// immediate. 853 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const { 854 const MachineOperand &MO = MI.getOperand(OpIdx); 855 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 856 } 857 858 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, 859 const MachineOperand &MO) const { 860 if (OpIdx >= MI.getDesc().NumOperands) 861 return false; 862 863 if (MI.isCopy()) { 864 unsigned Size = getOpSize(MI, OpIdx); 865 assert(Size == 8 || Size == 4); 866 867 uint8_t OpType = (Size == 8) ? 868 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32; 869 return isInlineConstant(MO, OpType); 870 } 871 872 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 873 } 874 875 bool isInlineConstant(const MachineOperand &MO) const { 876 const MachineInstr *Parent = MO.getParent(); 877 return isInlineConstant(*Parent, Parent->getOperandNo(&MO)); 878 } 879 880 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 881 const MachineOperand &MO) const; 882 883 /// Return true if this 64-bit VALU instruction has a 32-bit encoding. 884 /// This function will return false if you pass it a 32-bit instruction. 885 bool hasVALU32BitEncoding(unsigned Opcode) const; 886 887 /// Returns true if this operand uses the constant bus. 888 bool usesConstantBus(const MachineRegisterInfo &MRI, 889 const MachineOperand &MO, 890 const MCOperandInfo &OpInfo) const; 891 892 /// Return true if this instruction has any modifiers. 893 /// e.g. src[012]_mod, omod, clamp. 894 bool hasModifiers(unsigned Opcode) const; 895 896 bool hasModifiersSet(const MachineInstr &MI, 897 unsigned OpName) const; 898 bool hasAnyModifiersSet(const MachineInstr &MI) const; 899 900 bool canShrink(const MachineInstr &MI, 901 const MachineRegisterInfo &MRI) const; 902 903 MachineInstr *buildShrunkInst(MachineInstr &MI, 904 unsigned NewOpcode) const; 905 906 bool verifyInstruction(const MachineInstr &MI, 907 StringRef &ErrInfo) const override; 908 909 unsigned getVALUOp(const MachineInstr &MI) const; 910 911 /// Return the correct register class for \p OpNo. For target-specific 912 /// instructions, this will return the register class that has been defined 913 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return 914 /// the register class of its machine operand. 915 /// to infer the correct register class base on the other operands. 916 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, 917 unsigned OpNo) const; 918 919 /// Return the size in bytes of the operand OpNo on the given 920 // instruction opcode. 921 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { 922 const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo]; 923 924 if (OpInfo.RegClass == -1) { 925 // If this is an immediate operand, this must be a 32-bit literal. 926 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); 927 return 4; 928 } 929 930 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; 931 } 932 933 /// This form should usually be preferred since it handles operands 934 /// with unknown register classes. 935 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { 936 const MachineOperand &MO = MI.getOperand(OpNo); 937 if (MO.isReg()) { 938 if (unsigned SubReg = MO.getSubReg()) { 939 return RI.getSubRegIdxSize(SubReg) / 8; 940 } 941 } 942 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; 943 } 944 945 /// Legalize the \p OpIndex operand of this instruction by inserting 946 /// a MOV. For example: 947 /// ADD_I32_e32 VGPR0, 15 948 /// to 949 /// MOV VGPR1, 15 950 /// ADD_I32_e32 VGPR0, VGPR1 951 /// 952 /// If the operand being legalized is a register, then a COPY will be used 953 /// instead of MOV. 954 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; 955 956 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand 957 /// for \p MI. 958 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 959 const MachineOperand *MO = nullptr) const; 960 961 /// Check if \p MO would be a valid operand for the given operand 962 /// definition \p OpInfo. Note this does not attempt to validate constant bus 963 /// restrictions (e.g. literal constant usage). 964 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, 965 const MCOperandInfo &OpInfo, 966 const MachineOperand &MO) const; 967 968 /// Check if \p MO (a register operand) is a legal register for the 969 /// given operand description. 970 bool isLegalRegOperand(const MachineRegisterInfo &MRI, 971 const MCOperandInfo &OpInfo, 972 const MachineOperand &MO) const; 973 974 /// Legalize operands in \p MI by either commuting it or inserting a 975 /// copy of src1. 976 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; 977 978 /// Fix operands in \p MI to satisfy constant bus requirements. 979 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; 980 981 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only 982 /// be used when it is know that the value in SrcReg is same across all 983 /// threads in the wave. 984 /// \returns The SGPR register that \p SrcReg was copied to. 985 Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, 986 MachineRegisterInfo &MRI) const; 987 988 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; 989 void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const; 990 991 void legalizeGenericOperand(MachineBasicBlock &InsertMBB, 992 MachineBasicBlock::iterator I, 993 const TargetRegisterClass *DstRC, 994 MachineOperand &Op, MachineRegisterInfo &MRI, 995 const DebugLoc &DL) const; 996 997 /// Legalize all operands in this instruction. This function may create new 998 /// instructions and control-flow around \p MI. If present, \p MDT is 999 /// updated. 1000 /// \returns A new basic block that contains \p MI if new blocks were created. 1001 MachineBasicBlock * 1002 legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; 1003 1004 /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand 1005 /// was moved to VGPR. \returns true if succeeded. 1006 bool moveFlatAddrToVGPR(MachineInstr &Inst) const; 1007 1008 /// Replace this instruction's opcode with the equivalent VALU 1009 /// opcode. This function will also move the users of \p MI to the 1010 /// VALU if necessary. If present, \p MDT is updated. 1011 MachineBasicBlock *moveToVALU(MachineInstr &MI, 1012 MachineDominatorTree *MDT = nullptr) const; 1013 1014 void insertNoop(MachineBasicBlock &MBB, 1015 MachineBasicBlock::iterator MI) const override; 1016 1017 void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1018 unsigned Quantity) const override; 1019 1020 void insertReturn(MachineBasicBlock &MBB) const; 1021 /// Return the number of wait states that result from executing this 1022 /// instruction. 1023 static unsigned getNumWaitStates(const MachineInstr &MI); 1024 1025 /// Returns the operand named \p Op. If \p MI does not have an 1026 /// operand named \c Op, this function returns nullptr. 1027 LLVM_READONLY 1028 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; 1029 1030 LLVM_READONLY 1031 const MachineOperand *getNamedOperand(const MachineInstr &MI, 1032 unsigned OpName) const { 1033 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName); 1034 } 1035 1036 /// Get required immediate operand 1037 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const { 1038 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); 1039 return MI.getOperand(Idx).getImm(); 1040 } 1041 1042 uint64_t getDefaultRsrcDataFormat() const; 1043 uint64_t getScratchRsrcWords23() const; 1044 1045 bool isLowLatencyInstruction(const MachineInstr &MI) const; 1046 bool isHighLatencyDef(int Opc) const override; 1047 1048 /// Return the descriptor of the target-specific machine instruction 1049 /// that corresponds to the specified pseudo or native opcode. 1050 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { 1051 return get(pseudoToMCOpcode(Opcode)); 1052 } 1053 1054 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1055 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1056 1057 unsigned isLoadFromStackSlot(const MachineInstr &MI, 1058 int &FrameIndex) const override; 1059 unsigned isStoreToStackSlot(const MachineInstr &MI, 1060 int &FrameIndex) const override; 1061 1062 unsigned getInstBundleSize(const MachineInstr &MI) const; 1063 unsigned getInstSizeInBytes(const MachineInstr &MI) const override; 1064 1065 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; 1066 1067 bool isNonUniformBranchInstr(MachineInstr &Instr) const; 1068 1069 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, 1070 MachineBasicBlock *IfEnd) const; 1071 1072 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, 1073 MachineBasicBlock *LoopEnd) const; 1074 1075 std::pair<unsigned, unsigned> 1076 decomposeMachineOperandsTargetFlags(unsigned TF) const override; 1077 1078 ArrayRef<std::pair<int, const char *>> 1079 getSerializableTargetIndices() const override; 1080 1081 ArrayRef<std::pair<unsigned, const char *>> 1082 getSerializableDirectMachineOperandTargetFlags() const override; 1083 1084 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 1085 getSerializableMachineMemOperandTargetFlags() const override; 1086 1087 ScheduleHazardRecognizer * 1088 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 1089 const ScheduleDAG *DAG) const override; 1090 1091 ScheduleHazardRecognizer * 1092 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; 1093 1094 ScheduleHazardRecognizer * 1095 CreateTargetMIHazardRecognizer(const InstrItineraryData *II, 1096 const ScheduleDAGMI *DAG) const override; 1097 1098 bool isBasicBlockPrologue(const MachineInstr &MI) const override; 1099 1100 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, 1101 MachineBasicBlock::iterator InsPt, 1102 const DebugLoc &DL, Register Src, 1103 Register Dst) const override; 1104 1105 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, 1106 MachineBasicBlock::iterator InsPt, 1107 const DebugLoc &DL, Register Src, 1108 unsigned SrcSubReg, 1109 Register Dst) const override; 1110 1111 bool isWave32() const; 1112 1113 /// Return a partially built integer add instruction without carry. 1114 /// Caller must add source operands. 1115 /// For pre-GFX9 it will generate unused carry destination operand. 1116 /// TODO: After GFX9 it should return a no-carry operation. 1117 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1118 MachineBasicBlock::iterator I, 1119 const DebugLoc &DL, 1120 Register DestReg) const; 1121 1122 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1123 MachineBasicBlock::iterator I, 1124 const DebugLoc &DL, 1125 Register DestReg, 1126 RegScavenger &RS) const; 1127 1128 static bool isKillTerminator(unsigned Opcode); 1129 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; 1130 1131 static bool isLegalMUBUFImmOffset(unsigned Imm) { 1132 return isUInt<12>(Imm); 1133 } 1134 1135 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT 1136 /// encoded instruction. If \p Signed, this is for an instruction that 1137 /// interprets the offset as signed. 1138 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, 1139 uint64_t FlatVariant) const; 1140 1141 /// Split \p COffsetVal into {immediate offset field, remainder offset} 1142 /// values. 1143 std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal, 1144 unsigned AddrSpace, 1145 uint64_t FlatVariant) const; 1146 1147 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. 1148 /// Return -1 if the target-specific opcode for the pseudo instruction does 1149 /// not exist. If Opcode is not a pseudo instruction, this is identity. 1150 int pseudoToMCOpcode(int Opcode) const; 1151 1152 /// \brief Check if this instruction should only be used by assembler. 1153 /// Return true if this opcode should not be used by codegen. 1154 bool isAsmOnlyOpcode(int MCOp) const; 1155 1156 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, 1157 const TargetRegisterInfo *TRI, 1158 const MachineFunction &MF) 1159 const override; 1160 1161 void fixImplicitOperands(MachineInstr &MI) const; 1162 1163 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, 1164 ArrayRef<unsigned> Ops, 1165 MachineBasicBlock::iterator InsertPt, 1166 int FrameIndex, 1167 LiveIntervals *LIS = nullptr, 1168 VirtRegMap *VRM = nullptr) const override; 1169 1170 unsigned getInstrLatency(const InstrItineraryData *ItinData, 1171 const MachineInstr &MI, 1172 unsigned *PredCost = nullptr) const override; 1173 1174 InstructionUniformity 1175 getInstructionUniformity(const MachineInstr &MI) const override final; 1176 1177 InstructionUniformity 1178 getGenericInstructionUniformity(const MachineInstr &MI) const; 1179 1180 const MIRFormatter *getMIRFormatter() const override { 1181 if (!Formatter.get()) 1182 Formatter = std::make_unique<AMDGPUMIRFormatter>(); 1183 return Formatter.get(); 1184 } 1185 1186 static unsigned getDSShaderTypeValue(const MachineFunction &MF); 1187 1188 const TargetSchedModel &getSchedModel() const { return SchedModel; } 1189 1190 // Enforce operand's \p OpName even alignment if required by target. 1191 // This is used if an operand is a 32 bit register but needs to be aligned 1192 // regardless. 1193 void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const; 1194 }; 1195 1196 /// \brief Returns true if a reg:subreg pair P has a TRC class 1197 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, 1198 const TargetRegisterClass &TRC, 1199 MachineRegisterInfo &MRI) { 1200 auto *RC = MRI.getRegClass(P.Reg); 1201 if (!P.SubReg) 1202 return RC == &TRC; 1203 auto *TRI = MRI.getTargetRegisterInfo(); 1204 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg); 1205 } 1206 1207 /// \brief Create RegSubRegPair from a register MachineOperand 1208 inline 1209 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) { 1210 assert(O.isReg()); 1211 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg()); 1212 } 1213 1214 /// \brief Return the SubReg component from REG_SEQUENCE 1215 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, 1216 unsigned SubReg); 1217 1218 /// \brief Return the defining instruction for a given reg:subreg pair 1219 /// skipping copy like instructions and subreg-manipulation pseudos. 1220 /// Following another subreg of a reg:subreg isn't supported. 1221 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, 1222 MachineRegisterInfo &MRI); 1223 1224 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1225 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not 1226 /// attempt to track between blocks. 1227 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, 1228 Register VReg, 1229 const MachineInstr &DefMI, 1230 const MachineInstr &UseMI); 1231 1232 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1233 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to 1234 /// track between blocks. 1235 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, 1236 Register VReg, 1237 const MachineInstr &DefMI); 1238 1239 namespace AMDGPU { 1240 1241 LLVM_READONLY 1242 int getVOPe64(uint16_t Opcode); 1243 1244 LLVM_READONLY 1245 int getVOPe32(uint16_t Opcode); 1246 1247 LLVM_READONLY 1248 int getSDWAOp(uint16_t Opcode); 1249 1250 LLVM_READONLY 1251 int getDPPOp32(uint16_t Opcode); 1252 1253 LLVM_READONLY 1254 int getDPPOp64(uint16_t Opcode); 1255 1256 LLVM_READONLY 1257 int getBasicFromSDWAOp(uint16_t Opcode); 1258 1259 LLVM_READONLY 1260 int getCommuteRev(uint16_t Opcode); 1261 1262 LLVM_READONLY 1263 int getCommuteOrig(uint16_t Opcode); 1264 1265 LLVM_READONLY 1266 int getAddr64Inst(uint16_t Opcode); 1267 1268 /// Check if \p Opcode is an Addr64 opcode. 1269 /// 1270 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1. 1271 LLVM_READONLY 1272 int getIfAddr64Inst(uint16_t Opcode); 1273 1274 LLVM_READONLY 1275 int getAtomicNoRetOp(uint16_t Opcode); 1276 1277 LLVM_READONLY 1278 int getSOPKOp(uint16_t Opcode); 1279 1280 /// \returns SADDR form of a FLAT Global instruction given an \p Opcode 1281 /// of a VADDR form. 1282 LLVM_READONLY 1283 int getGlobalSaddrOp(uint16_t Opcode); 1284 1285 /// \returns VADDR form of a FLAT Global instruction given an \p Opcode 1286 /// of a SADDR form. 1287 LLVM_READONLY 1288 int getGlobalVaddrOp(uint16_t Opcode); 1289 1290 LLVM_READONLY 1291 int getVCMPXNoSDstOp(uint16_t Opcode); 1292 1293 /// \returns ST form with only immediate offset of a FLAT Scratch instruction 1294 /// given an \p Opcode of an SS (SADDR) form. 1295 LLVM_READONLY 1296 int getFlatScratchInstSTfromSS(uint16_t Opcode); 1297 1298 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1299 /// of an SVS (SADDR + VADDR) form. 1300 LLVM_READONLY 1301 int getFlatScratchInstSVfromSVS(uint16_t Opcode); 1302 1303 /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode 1304 /// of an SV (VADDR) form. 1305 LLVM_READONLY 1306 int getFlatScratchInstSSfromSV(uint16_t Opcode); 1307 1308 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1309 /// of an SS (SADDR) form. 1310 LLVM_READONLY 1311 int getFlatScratchInstSVfromSS(uint16_t Opcode); 1312 1313 /// \returns earlyclobber version of a MAC MFMA is exists. 1314 LLVM_READONLY 1315 int getMFMAEarlyClobberOp(uint16_t Opcode); 1316 1317 /// \returns v_cmpx version of a v_cmp instruction. 1318 LLVM_READONLY 1319 int getVCMPXOpFromVCMP(uint16_t Opcode); 1320 1321 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; 1322 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); 1323 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); 1324 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); 1325 1326 } // end namespace AMDGPU 1327 1328 namespace SI { 1329 namespace KernelInputOffsets { 1330 1331 /// Offsets in bytes from the start of the input buffer 1332 enum Offsets { 1333 NGROUPS_X = 0, 1334 NGROUPS_Y = 4, 1335 NGROUPS_Z = 8, 1336 GLOBAL_SIZE_X = 12, 1337 GLOBAL_SIZE_Y = 16, 1338 GLOBAL_SIZE_Z = 20, 1339 LOCAL_SIZE_X = 24, 1340 LOCAL_SIZE_Y = 28, 1341 LOCAL_SIZE_Z = 32 1342 }; 1343 1344 } // end namespace KernelInputOffsets 1345 } // end namespace SI 1346 1347 } // end namespace llvm 1348 1349 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 1350