1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition for SIInstrInfo. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 16 17 #include "AMDGPUMIRFormatter.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIRegisterInfo.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/CodeGen/TargetInstrInfo.h" 23 #include "llvm/CodeGen/TargetSchedule.h" 24 25 #define GET_INSTRINFO_HEADER 26 #include "AMDGPUGenInstrInfo.inc" 27 28 namespace llvm { 29 30 class APInt; 31 class GCNSubtarget; 32 class LiveVariables; 33 class MachineDominatorTree; 34 class MachineRegisterInfo; 35 class RegScavenger; 36 class TargetRegisterClass; 37 class ScheduleHazardRecognizer; 38 39 /// Mark the MMO of a uniform load if there are no potentially clobbering stores 40 /// on any path from the start of an entry function to this load. 41 static const MachineMemOperand::Flags MONoClobber = 42 MachineMemOperand::MOTargetFlag1; 43 44 /// Utility to store machine instructions worklist. 45 struct SIInstrWorklist { 46 SIInstrWorklist() : InstrList() {} 47 48 void insert(MachineInstr *MI); 49 50 MachineInstr *top() const { 51 auto iter = InstrList.begin(); 52 return *iter; 53 } 54 55 void erase_top() { 56 auto iter = InstrList.begin(); 57 InstrList.erase(iter); 58 } 59 60 bool empty() const { return InstrList.empty(); } 61 62 void clear() { 63 InstrList.clear(); 64 DeferredList.clear(); 65 } 66 67 bool isDeferred(MachineInstr *MI); 68 69 SetVector<MachineInstr *> &getDeferredList() { return DeferredList; } 70 71 private: 72 /// InstrList contains the MachineInstrs. 73 SetVector<MachineInstr *> InstrList; 74 /// Deferred instructions are specific MachineInstr 75 /// that will be added by insert method. 76 SetVector<MachineInstr *> DeferredList; 77 }; 78 79 class SIInstrInfo final : public AMDGPUGenInstrInfo { 80 private: 81 const SIRegisterInfo RI; 82 const GCNSubtarget &ST; 83 TargetSchedModel SchedModel; 84 mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter; 85 86 // The inverse predicate should have the negative value. 87 enum BranchPredicate { 88 INVALID_BR = 0, 89 SCC_TRUE = 1, 90 SCC_FALSE = -1, 91 VCCNZ = 2, 92 VCCZ = -2, 93 EXECNZ = -3, 94 EXECZ = 3 95 }; 96 97 using SetVectorType = SmallSetVector<MachineInstr *, 32>; 98 99 static unsigned getBranchOpcode(BranchPredicate Cond); 100 static BranchPredicate getBranchPredicate(unsigned Opcode); 101 102 public: 103 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, 104 MachineRegisterInfo &MRI, 105 MachineOperand &SuperReg, 106 const TargetRegisterClass *SuperRC, 107 unsigned SubIdx, 108 const TargetRegisterClass *SubRC) const; 109 MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, 110 MachineRegisterInfo &MRI, 111 MachineOperand &SuperReg, 112 const TargetRegisterClass *SuperRC, 113 unsigned SubIdx, 114 const TargetRegisterClass *SubRC) const; 115 private: 116 void swapOperands(MachineInstr &Inst) const; 117 118 std::pair<bool, MachineBasicBlock *> 119 moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst, 120 MachineDominatorTree *MDT = nullptr) const; 121 122 void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, 123 MachineDominatorTree *MDT = nullptr) const; 124 125 void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 126 127 void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 128 129 void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst, 130 unsigned Opcode) const; 131 132 void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst, 133 unsigned Opcode) const; 134 135 void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 136 unsigned Opcode, bool Swap = false) const; 137 138 void splitScalar64BitAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst, 139 MachineDominatorTree *MDT = nullptr) const; 140 141 void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, 142 unsigned Opcode, 143 MachineDominatorTree *MDT = nullptr) const; 144 145 void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst, 146 MachineDominatorTree *MDT = nullptr) const; 147 148 void splitScalar64BitBCNT(SIInstrWorklist &Worklist, 149 MachineInstr &Inst) const; 150 void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const; 151 void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI, 152 MachineInstr &Inst) const; 153 154 void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI, 155 SIInstrWorklist &Worklist) const; 156 157 void addSCCDefUsersToVALUWorklist(MachineOperand &Op, 158 MachineInstr &SCCDefInst, 159 SIInstrWorklist &Worklist, 160 Register NewCond = Register()) const; 161 void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, 162 SIInstrWorklist &Worklist) const; 163 164 const TargetRegisterClass * 165 getDestEquivalentVGPRClass(const MachineInstr &Inst) const; 166 167 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, 168 const MachineInstr &MIb) const; 169 170 Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; 171 172 protected: 173 bool swapSourceModifiers(MachineInstr &MI, 174 MachineOperand &Src0, unsigned Src0OpName, 175 MachineOperand &Src1, unsigned Src1OpName) const; 176 177 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, 178 unsigned OpIdx0, 179 unsigned OpIdx1) const override; 180 181 public: 182 enum TargetOperandFlags { 183 MO_MASK = 0xf, 184 185 MO_NONE = 0, 186 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. 187 MO_GOTPCREL = 1, 188 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. 189 MO_GOTPCREL32 = 2, 190 MO_GOTPCREL32_LO = 2, 191 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. 192 MO_GOTPCREL32_HI = 3, 193 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. 194 MO_REL32 = 4, 195 MO_REL32_LO = 4, 196 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. 197 MO_REL32_HI = 5, 198 199 MO_FAR_BRANCH_OFFSET = 6, 200 201 MO_ABS32_LO = 8, 202 MO_ABS32_HI = 9, 203 }; 204 205 explicit SIInstrInfo(const GCNSubtarget &ST); 206 207 const SIRegisterInfo &getRegisterInfo() const { 208 return RI; 209 } 210 211 const GCNSubtarget &getSubtarget() const { 212 return ST; 213 } 214 215 bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; 216 217 bool isIgnorableUse(const MachineOperand &MO) const override; 218 219 bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, 220 int64_t &Offset1) const override; 221 222 bool getMemOperandsWithOffsetWidth( 223 const MachineInstr &LdSt, 224 SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset, 225 bool &OffsetIsScalable, unsigned &Width, 226 const TargetRegisterInfo *TRI) const final; 227 228 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, 229 ArrayRef<const MachineOperand *> BaseOps2, 230 unsigned NumLoads, unsigned NumBytes) const override; 231 232 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, 233 int64_t Offset1, unsigned NumLoads) const override; 234 235 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 236 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, 237 bool KillSrc) const override; 238 239 void materializeImmediate(MachineBasicBlock &MBB, 240 MachineBasicBlock::iterator MI, const DebugLoc &DL, 241 Register DestReg, int64_t Value) const; 242 243 const TargetRegisterClass *getPreferredSelectRegClass( 244 unsigned Size) const; 245 246 Register insertNE(MachineBasicBlock *MBB, 247 MachineBasicBlock::iterator I, const DebugLoc &DL, 248 Register SrcReg, int Value) const; 249 250 Register insertEQ(MachineBasicBlock *MBB, 251 MachineBasicBlock::iterator I, const DebugLoc &DL, 252 Register SrcReg, int Value) const; 253 254 void storeRegToStackSlot(MachineBasicBlock &MBB, 255 MachineBasicBlock::iterator MI, Register SrcReg, 256 bool isKill, int FrameIndex, 257 const TargetRegisterClass *RC, 258 const TargetRegisterInfo *TRI, 259 Register VReg) const override; 260 261 void loadRegFromStackSlot(MachineBasicBlock &MBB, 262 MachineBasicBlock::iterator MI, Register DestReg, 263 int FrameIndex, const TargetRegisterClass *RC, 264 const TargetRegisterInfo *TRI, 265 Register VReg) const override; 266 267 bool expandPostRAPseudo(MachineInstr &MI) const override; 268 269 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp 270 // instructions. Returns a pair of generated instructions. 271 // Can split either post-RA with physical registers or pre-RA with 272 // virtual registers. In latter case IR needs to be in SSA form and 273 // and a REG_SEQUENCE is produced to define original register. 274 std::pair<MachineInstr*, MachineInstr*> 275 expandMovDPP64(MachineInstr &MI) const; 276 277 // Returns an opcode that can be used to move a value to a \p DstRC 278 // register. If there is no hardware instruction that can store to \p 279 // DstRC, then AMDGPU::COPY is returned. 280 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; 281 282 const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize, 283 unsigned EltSize, 284 bool IsSGPR) const; 285 286 const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize, 287 bool IsIndirectSrc) const; 288 LLVM_READONLY 289 int commuteOpcode(unsigned Opc) const; 290 291 LLVM_READONLY 292 inline int commuteOpcode(const MachineInstr &MI) const { 293 return commuteOpcode(MI.getOpcode()); 294 } 295 296 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, 297 unsigned &SrcOpIdx1) const override; 298 299 bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0, 300 unsigned &SrcOpIdx1) const; 301 302 bool isBranchOffsetInRange(unsigned BranchOpc, 303 int64_t BrOffset) const override; 304 305 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; 306 307 /// Return whether the block terminate with divergent branch. 308 /// Note this only work before lowering the pseudo control flow instructions. 309 bool hasDivergentBranch(const MachineBasicBlock *MBB) const; 310 311 void insertIndirectBranch(MachineBasicBlock &MBB, 312 MachineBasicBlock &NewDestBB, 313 MachineBasicBlock &RestoreBB, const DebugLoc &DL, 314 int64_t BrOffset, RegScavenger *RS) const override; 315 316 bool analyzeBranchImpl(MachineBasicBlock &MBB, 317 MachineBasicBlock::iterator I, 318 MachineBasicBlock *&TBB, 319 MachineBasicBlock *&FBB, 320 SmallVectorImpl<MachineOperand> &Cond, 321 bool AllowModify) const; 322 323 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 324 MachineBasicBlock *&FBB, 325 SmallVectorImpl<MachineOperand> &Cond, 326 bool AllowModify = false) const override; 327 328 unsigned removeBranch(MachineBasicBlock &MBB, 329 int *BytesRemoved = nullptr) const override; 330 331 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 332 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, 333 const DebugLoc &DL, 334 int *BytesAdded = nullptr) const override; 335 336 bool reverseBranchCondition( 337 SmallVectorImpl<MachineOperand> &Cond) const override; 338 339 bool canInsertSelect(const MachineBasicBlock &MBB, 340 ArrayRef<MachineOperand> Cond, Register DstReg, 341 Register TrueReg, Register FalseReg, int &CondCycles, 342 int &TrueCycles, int &FalseCycles) const override; 343 344 void insertSelect(MachineBasicBlock &MBB, 345 MachineBasicBlock::iterator I, const DebugLoc &DL, 346 Register DstReg, ArrayRef<MachineOperand> Cond, 347 Register TrueReg, Register FalseReg) const override; 348 349 void insertVectorSelect(MachineBasicBlock &MBB, 350 MachineBasicBlock::iterator I, const DebugLoc &DL, 351 Register DstReg, ArrayRef<MachineOperand> Cond, 352 Register TrueReg, Register FalseReg) const; 353 354 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, 355 Register &SrcReg2, int64_t &CmpMask, 356 int64_t &CmpValue) const override; 357 358 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, 359 Register SrcReg2, int64_t CmpMask, int64_t CmpValue, 360 const MachineRegisterInfo *MRI) const override; 361 362 bool 363 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 364 const MachineInstr &MIb) const override; 365 366 static bool isFoldableCopy(const MachineInstr &MI); 367 368 void removeModOperands(MachineInstr &MI) const; 369 370 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, 371 MachineRegisterInfo *MRI) const final; 372 373 unsigned getMachineCSELookAheadLimit() const override { return 500; } 374 375 MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, 376 LiveIntervals *LIS) const override; 377 378 bool isSchedulingBoundary(const MachineInstr &MI, 379 const MachineBasicBlock *MBB, 380 const MachineFunction &MF) const override; 381 382 static bool isSALU(const MachineInstr &MI) { 383 return MI.getDesc().TSFlags & SIInstrFlags::SALU; 384 } 385 386 bool isSALU(uint16_t Opcode) const { 387 return get(Opcode).TSFlags & SIInstrFlags::SALU; 388 } 389 390 static bool isVALU(const MachineInstr &MI) { 391 return MI.getDesc().TSFlags & SIInstrFlags::VALU; 392 } 393 394 bool isVALU(uint16_t Opcode) const { 395 return get(Opcode).TSFlags & SIInstrFlags::VALU; 396 } 397 398 static bool isVMEM(const MachineInstr &MI) { 399 return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI); 400 } 401 402 bool isVMEM(uint16_t Opcode) const { 403 return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode); 404 } 405 406 static bool isSOP1(const MachineInstr &MI) { 407 return MI.getDesc().TSFlags & SIInstrFlags::SOP1; 408 } 409 410 bool isSOP1(uint16_t Opcode) const { 411 return get(Opcode).TSFlags & SIInstrFlags::SOP1; 412 } 413 414 static bool isSOP2(const MachineInstr &MI) { 415 return MI.getDesc().TSFlags & SIInstrFlags::SOP2; 416 } 417 418 bool isSOP2(uint16_t Opcode) const { 419 return get(Opcode).TSFlags & SIInstrFlags::SOP2; 420 } 421 422 static bool isSOPC(const MachineInstr &MI) { 423 return MI.getDesc().TSFlags & SIInstrFlags::SOPC; 424 } 425 426 bool isSOPC(uint16_t Opcode) const { 427 return get(Opcode).TSFlags & SIInstrFlags::SOPC; 428 } 429 430 static bool isSOPK(const MachineInstr &MI) { 431 return MI.getDesc().TSFlags & SIInstrFlags::SOPK; 432 } 433 434 bool isSOPK(uint16_t Opcode) const { 435 return get(Opcode).TSFlags & SIInstrFlags::SOPK; 436 } 437 438 static bool isSOPP(const MachineInstr &MI) { 439 return MI.getDesc().TSFlags & SIInstrFlags::SOPP; 440 } 441 442 bool isSOPP(uint16_t Opcode) const { 443 return get(Opcode).TSFlags & SIInstrFlags::SOPP; 444 } 445 446 static bool isPacked(const MachineInstr &MI) { 447 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked; 448 } 449 450 bool isPacked(uint16_t Opcode) const { 451 return get(Opcode).TSFlags & SIInstrFlags::IsPacked; 452 } 453 454 static bool isVOP1(const MachineInstr &MI) { 455 return MI.getDesc().TSFlags & SIInstrFlags::VOP1; 456 } 457 458 bool isVOP1(uint16_t Opcode) const { 459 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 460 } 461 462 static bool isVOP2(const MachineInstr &MI) { 463 return MI.getDesc().TSFlags & SIInstrFlags::VOP2; 464 } 465 466 bool isVOP2(uint16_t Opcode) const { 467 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 468 } 469 470 static bool isVOP3(const MachineInstr &MI) { 471 return MI.getDesc().TSFlags & SIInstrFlags::VOP3; 472 } 473 474 bool isVOP3(uint16_t Opcode) const { 475 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 476 } 477 478 static bool isSDWA(const MachineInstr &MI) { 479 return MI.getDesc().TSFlags & SIInstrFlags::SDWA; 480 } 481 482 bool isSDWA(uint16_t Opcode) const { 483 return get(Opcode).TSFlags & SIInstrFlags::SDWA; 484 } 485 486 static bool isVOPC(const MachineInstr &MI) { 487 return MI.getDesc().TSFlags & SIInstrFlags::VOPC; 488 } 489 490 bool isVOPC(uint16_t Opcode) const { 491 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 492 } 493 494 static bool isMUBUF(const MachineInstr &MI) { 495 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; 496 } 497 498 bool isMUBUF(uint16_t Opcode) const { 499 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; 500 } 501 502 static bool isMTBUF(const MachineInstr &MI) { 503 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; 504 } 505 506 bool isMTBUF(uint16_t Opcode) const { 507 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; 508 } 509 510 static bool isSMRD(const MachineInstr &MI) { 511 return MI.getDesc().TSFlags & SIInstrFlags::SMRD; 512 } 513 514 bool isSMRD(uint16_t Opcode) const { 515 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 516 } 517 518 bool isBufferSMRD(const MachineInstr &MI) const; 519 520 static bool isDS(const MachineInstr &MI) { 521 return MI.getDesc().TSFlags & SIInstrFlags::DS; 522 } 523 524 bool isDS(uint16_t Opcode) const { 525 return get(Opcode).TSFlags & SIInstrFlags::DS; 526 } 527 528 bool isAlwaysGDS(uint16_t Opcode) const; 529 530 static bool isMIMG(const MachineInstr &MI) { 531 return MI.getDesc().TSFlags & SIInstrFlags::MIMG; 532 } 533 534 bool isMIMG(uint16_t Opcode) const { 535 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 536 } 537 538 static bool isGather4(const MachineInstr &MI) { 539 return MI.getDesc().TSFlags & SIInstrFlags::Gather4; 540 } 541 542 bool isGather4(uint16_t Opcode) const { 543 return get(Opcode).TSFlags & SIInstrFlags::Gather4; 544 } 545 546 static bool isFLAT(const MachineInstr &MI) { 547 return MI.getDesc().TSFlags & SIInstrFlags::FLAT; 548 } 549 550 // Is a FLAT encoded instruction which accesses a specific segment, 551 // i.e. global_* or scratch_*. 552 static bool isSegmentSpecificFLAT(const MachineInstr &MI) { 553 auto Flags = MI.getDesc().TSFlags; 554 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 555 } 556 557 bool isSegmentSpecificFLAT(uint16_t Opcode) const { 558 auto Flags = get(Opcode).TSFlags; 559 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); 560 } 561 562 static bool isFLATGlobal(const MachineInstr &MI) { 563 return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal; 564 } 565 566 bool isFLATGlobal(uint16_t Opcode) const { 567 return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal; 568 } 569 570 static bool isFLATScratch(const MachineInstr &MI) { 571 return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch; 572 } 573 574 bool isFLATScratch(uint16_t Opcode) const { 575 return get(Opcode).TSFlags & SIInstrFlags::FlatScratch; 576 } 577 578 // Any FLAT encoded instruction, including global_* and scratch_*. 579 bool isFLAT(uint16_t Opcode) const { 580 return get(Opcode).TSFlags & SIInstrFlags::FLAT; 581 } 582 583 static bool isEXP(const MachineInstr &MI) { 584 return MI.getDesc().TSFlags & SIInstrFlags::EXP; 585 } 586 587 static bool isDualSourceBlendEXP(const MachineInstr &MI) { 588 if (!isEXP(MI)) 589 return false; 590 unsigned Target = MI.getOperand(0).getImm(); 591 return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 || 592 Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1; 593 } 594 595 bool isEXP(uint16_t Opcode) const { 596 return get(Opcode).TSFlags & SIInstrFlags::EXP; 597 } 598 599 static bool isAtomicNoRet(const MachineInstr &MI) { 600 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet; 601 } 602 603 bool isAtomicNoRet(uint16_t Opcode) const { 604 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet; 605 } 606 607 static bool isAtomicRet(const MachineInstr &MI) { 608 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet; 609 } 610 611 bool isAtomicRet(uint16_t Opcode) const { 612 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet; 613 } 614 615 static bool isAtomic(const MachineInstr &MI) { 616 return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet | 617 SIInstrFlags::IsAtomicNoRet); 618 } 619 620 bool isAtomic(uint16_t Opcode) const { 621 return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet | 622 SIInstrFlags::IsAtomicNoRet); 623 } 624 625 static bool isWQM(const MachineInstr &MI) { 626 return MI.getDesc().TSFlags & SIInstrFlags::WQM; 627 } 628 629 bool isWQM(uint16_t Opcode) const { 630 return get(Opcode).TSFlags & SIInstrFlags::WQM; 631 } 632 633 static bool isDisableWQM(const MachineInstr &MI) { 634 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; 635 } 636 637 bool isDisableWQM(uint16_t Opcode) const { 638 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; 639 } 640 641 static bool isVGPRSpill(const MachineInstr &MI) { 642 return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill; 643 } 644 645 bool isVGPRSpill(uint16_t Opcode) const { 646 return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; 647 } 648 649 static bool isSGPRSpill(const MachineInstr &MI) { 650 return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill; 651 } 652 653 bool isSGPRSpill(uint16_t Opcode) const { 654 return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill; 655 } 656 657 static bool isWWMRegSpillOpcode(uint16_t Opcode) { 658 return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE || 659 Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE; 660 } 661 662 static bool isDPP(const MachineInstr &MI) { 663 return MI.getDesc().TSFlags & SIInstrFlags::DPP; 664 } 665 666 bool isDPP(uint16_t Opcode) const { 667 return get(Opcode).TSFlags & SIInstrFlags::DPP; 668 } 669 670 static bool isTRANS(const MachineInstr &MI) { 671 return MI.getDesc().TSFlags & SIInstrFlags::TRANS; 672 } 673 674 bool isTRANS(uint16_t Opcode) const { 675 return get(Opcode).TSFlags & SIInstrFlags::TRANS; 676 } 677 678 static bool isVOP3P(const MachineInstr &MI) { 679 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; 680 } 681 682 bool isVOP3P(uint16_t Opcode) const { 683 return get(Opcode).TSFlags & SIInstrFlags::VOP3P; 684 } 685 686 static bool isVINTRP(const MachineInstr &MI) { 687 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; 688 } 689 690 bool isVINTRP(uint16_t Opcode) const { 691 return get(Opcode).TSFlags & SIInstrFlags::VINTRP; 692 } 693 694 static bool isMAI(const MachineInstr &MI) { 695 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI; 696 } 697 698 bool isMAI(uint16_t Opcode) const { 699 return get(Opcode).TSFlags & SIInstrFlags::IsMAI; 700 } 701 702 static bool isMFMA(const MachineInstr &MI) { 703 return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && 704 MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; 705 } 706 707 static bool isDOT(const MachineInstr &MI) { 708 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; 709 } 710 711 static bool isWMMA(const MachineInstr &MI) { 712 return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA; 713 } 714 715 bool isWMMA(uint16_t Opcode) const { 716 return get(Opcode).TSFlags & SIInstrFlags::IsWMMA; 717 } 718 719 static bool isMFMAorWMMA(const MachineInstr &MI) { 720 return isMFMA(MI) || isWMMA(MI); 721 } 722 723 bool isDOT(uint16_t Opcode) const { 724 return get(Opcode).TSFlags & SIInstrFlags::IsDOT; 725 } 726 727 static bool isLDSDIR(const MachineInstr &MI) { 728 return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR; 729 } 730 731 bool isLDSDIR(uint16_t Opcode) const { 732 return get(Opcode).TSFlags & SIInstrFlags::LDSDIR; 733 } 734 735 static bool isVINTERP(const MachineInstr &MI) { 736 return MI.getDesc().TSFlags & SIInstrFlags::VINTERP; 737 } 738 739 bool isVINTERP(uint16_t Opcode) const { 740 return get(Opcode).TSFlags & SIInstrFlags::VINTERP; 741 } 742 743 static bool isScalarUnit(const MachineInstr &MI) { 744 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); 745 } 746 747 static bool usesVM_CNT(const MachineInstr &MI) { 748 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; 749 } 750 751 static bool usesLGKM_CNT(const MachineInstr &MI) { 752 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; 753 } 754 755 static bool sopkIsZext(const MachineInstr &MI) { 756 return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT; 757 } 758 759 bool sopkIsZext(uint16_t Opcode) const { 760 return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT; 761 } 762 763 /// \returns true if this is an s_store_dword* instruction. This is more 764 /// specific than isSMEM && mayStore. 765 static bool isScalarStore(const MachineInstr &MI) { 766 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; 767 } 768 769 bool isScalarStore(uint16_t Opcode) const { 770 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; 771 } 772 773 static bool isFixedSize(const MachineInstr &MI) { 774 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; 775 } 776 777 bool isFixedSize(uint16_t Opcode) const { 778 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; 779 } 780 781 static bool hasFPClamp(const MachineInstr &MI) { 782 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; 783 } 784 785 bool hasFPClamp(uint16_t Opcode) const { 786 return get(Opcode).TSFlags & SIInstrFlags::FPClamp; 787 } 788 789 static bool hasIntClamp(const MachineInstr &MI) { 790 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp; 791 } 792 793 uint64_t getClampMask(const MachineInstr &MI) const { 794 const uint64_t ClampFlags = SIInstrFlags::FPClamp | 795 SIInstrFlags::IntClamp | 796 SIInstrFlags::ClampLo | 797 SIInstrFlags::ClampHi; 798 return MI.getDesc().TSFlags & ClampFlags; 799 } 800 801 static bool usesFPDPRounding(const MachineInstr &MI) { 802 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding; 803 } 804 805 bool usesFPDPRounding(uint16_t Opcode) const { 806 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; 807 } 808 809 static bool isFPAtomic(const MachineInstr &MI) { 810 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; 811 } 812 813 bool isFPAtomic(uint16_t Opcode) const { 814 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; 815 } 816 817 static bool isNeverUniform(const MachineInstr &MI) { 818 return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform; 819 } 820 821 static bool doesNotReadTiedSource(const MachineInstr &MI) { 822 return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; 823 } 824 825 bool doesNotReadTiedSource(uint16_t Opcode) const { 826 return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead; 827 } 828 829 bool isVGPRCopy(const MachineInstr &MI) const { 830 assert(MI.isCopy()); 831 Register Dest = MI.getOperand(0).getReg(); 832 const MachineFunction &MF = *MI.getParent()->getParent(); 833 const MachineRegisterInfo &MRI = MF.getRegInfo(); 834 return !RI.isSGPRReg(MRI, Dest); 835 } 836 837 bool hasVGPRUses(const MachineInstr &MI) const { 838 const MachineFunction &MF = *MI.getParent()->getParent(); 839 const MachineRegisterInfo &MRI = MF.getRegInfo(); 840 return llvm::any_of(MI.explicit_uses(), 841 [&MRI, this](const MachineOperand &MO) { 842 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());}); 843 } 844 845 /// Return true if the instruction modifies the mode register.q 846 static bool modifiesModeRegister(const MachineInstr &MI); 847 848 /// Whether we must prevent this instruction from executing with EXEC = 0. 849 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; 850 851 /// Returns true if the instruction could potentially depend on the value of 852 /// exec. If false, exec dependencies may safely be ignored. 853 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; 854 855 bool isInlineConstant(const APInt &Imm) const; 856 857 bool isInlineConstant(const APFloat &Imm) const { 858 return isInlineConstant(Imm.bitcastToAPInt()); 859 } 860 861 // Returns true if this non-register operand definitely does not need to be 862 // encoded as a 32-bit literal. Note that this function handles all kinds of 863 // operands, not just immediates. 864 // 865 // Some operands like FrameIndexes could resolve to an inline immediate value 866 // that will not require an additional 4-bytes; this function assumes that it 867 // will. 868 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; 869 870 bool isInlineConstant(const MachineOperand &MO, 871 const MCOperandInfo &OpInfo) const { 872 return isInlineConstant(MO, OpInfo.OperandType); 873 } 874 875 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would 876 /// be an inline immediate. 877 bool isInlineConstant(const MachineInstr &MI, 878 const MachineOperand &UseMO, 879 const MachineOperand &DefMO) const { 880 assert(UseMO.getParent() == &MI); 881 int OpIdx = UseMO.getOperandNo(); 882 if (OpIdx >= MI.getDesc().NumOperands) 883 return false; 884 885 return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]); 886 } 887 888 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline 889 /// immediate. 890 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const { 891 const MachineOperand &MO = MI.getOperand(OpIdx); 892 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 893 } 894 895 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, 896 const MachineOperand &MO) const { 897 if (OpIdx >= MI.getDesc().NumOperands) 898 return false; 899 900 if (MI.isCopy()) { 901 unsigned Size = getOpSize(MI, OpIdx); 902 assert(Size == 8 || Size == 4); 903 904 uint8_t OpType = (Size == 8) ? 905 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32; 906 return isInlineConstant(MO, OpType); 907 } 908 909 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType); 910 } 911 912 bool isInlineConstant(const MachineOperand &MO) const { 913 return isInlineConstant(*MO.getParent(), MO.getOperandNo()); 914 } 915 916 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 917 const MachineOperand &MO) const; 918 919 /// Return true if this 64-bit VALU instruction has a 32-bit encoding. 920 /// This function will return false if you pass it a 32-bit instruction. 921 bool hasVALU32BitEncoding(unsigned Opcode) const; 922 923 /// Returns true if this operand uses the constant bus. 924 bool usesConstantBus(const MachineRegisterInfo &MRI, 925 const MachineOperand &MO, 926 const MCOperandInfo &OpInfo) const; 927 928 /// Return true if this instruction has any modifiers. 929 /// e.g. src[012]_mod, omod, clamp. 930 bool hasModifiers(unsigned Opcode) const; 931 932 bool hasModifiersSet(const MachineInstr &MI, 933 unsigned OpName) const; 934 bool hasAnyModifiersSet(const MachineInstr &MI) const; 935 936 bool canShrink(const MachineInstr &MI, 937 const MachineRegisterInfo &MRI) const; 938 939 MachineInstr *buildShrunkInst(MachineInstr &MI, 940 unsigned NewOpcode) const; 941 942 bool verifyInstruction(const MachineInstr &MI, 943 StringRef &ErrInfo) const override; 944 945 unsigned getVALUOp(const MachineInstr &MI) const; 946 947 void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, 948 MachineBasicBlock::iterator MBBI, 949 const DebugLoc &DL, Register Reg, 950 bool IsSCCLive) const; 951 952 void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, 953 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 954 Register Reg) const; 955 956 /// Return the correct register class for \p OpNo. For target-specific 957 /// instructions, this will return the register class that has been defined 958 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return 959 /// the register class of its machine operand. 960 /// to infer the correct register class base on the other operands. 961 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, 962 unsigned OpNo) const; 963 964 /// Return the size in bytes of the operand OpNo on the given 965 // instruction opcode. 966 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { 967 const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo]; 968 969 if (OpInfo.RegClass == -1) { 970 // If this is an immediate operand, this must be a 32-bit literal. 971 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); 972 return 4; 973 } 974 975 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; 976 } 977 978 /// This form should usually be preferred since it handles operands 979 /// with unknown register classes. 980 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { 981 const MachineOperand &MO = MI.getOperand(OpNo); 982 if (MO.isReg()) { 983 if (unsigned SubReg = MO.getSubReg()) { 984 return RI.getSubRegIdxSize(SubReg) / 8; 985 } 986 } 987 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; 988 } 989 990 /// Legalize the \p OpIndex operand of this instruction by inserting 991 /// a MOV. For example: 992 /// ADD_I32_e32 VGPR0, 15 993 /// to 994 /// MOV VGPR1, 15 995 /// ADD_I32_e32 VGPR0, VGPR1 996 /// 997 /// If the operand being legalized is a register, then a COPY will be used 998 /// instead of MOV. 999 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; 1000 1001 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand 1002 /// for \p MI. 1003 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 1004 const MachineOperand *MO = nullptr) const; 1005 1006 /// Check if \p MO would be a valid operand for the given operand 1007 /// definition \p OpInfo. Note this does not attempt to validate constant bus 1008 /// restrictions (e.g. literal constant usage). 1009 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, 1010 const MCOperandInfo &OpInfo, 1011 const MachineOperand &MO) const; 1012 1013 /// Check if \p MO (a register operand) is a legal register for the 1014 /// given operand description. 1015 bool isLegalRegOperand(const MachineRegisterInfo &MRI, 1016 const MCOperandInfo &OpInfo, 1017 const MachineOperand &MO) const; 1018 1019 /// Legalize operands in \p MI by either commuting it or inserting a 1020 /// copy of src1. 1021 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1022 1023 /// Fix operands in \p MI to satisfy constant bus requirements. 1024 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1025 1026 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only 1027 /// be used when it is know that the value in SrcReg is same across all 1028 /// threads in the wave. 1029 /// \returns The SGPR register that \p SrcReg was copied to. 1030 Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, 1031 MachineRegisterInfo &MRI) const; 1032 1033 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1034 void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const; 1035 1036 void legalizeGenericOperand(MachineBasicBlock &InsertMBB, 1037 MachineBasicBlock::iterator I, 1038 const TargetRegisterClass *DstRC, 1039 MachineOperand &Op, MachineRegisterInfo &MRI, 1040 const DebugLoc &DL) const; 1041 1042 /// Legalize all operands in this instruction. This function may create new 1043 /// instructions and control-flow around \p MI. If present, \p MDT is 1044 /// updated. 1045 /// \returns A new basic block that contains \p MI if new blocks were created. 1046 MachineBasicBlock * 1047 legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; 1048 1049 /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand 1050 /// was moved to VGPR. \returns true if succeeded. 1051 bool moveFlatAddrToVGPR(MachineInstr &Inst) const; 1052 1053 /// Replace the instructions opcode with the equivalent VALU 1054 /// opcode. This function will also move the users of MachineInstruntions 1055 /// in the \p WorkList to the VALU if necessary. If present, \p MDT is 1056 /// updated. 1057 void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const; 1058 1059 void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, 1060 MachineInstr &Inst) const; 1061 1062 void insertNoop(MachineBasicBlock &MBB, 1063 MachineBasicBlock::iterator MI) const override; 1064 1065 void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1066 unsigned Quantity) const override; 1067 1068 void insertReturn(MachineBasicBlock &MBB) const; 1069 /// Return the number of wait states that result from executing this 1070 /// instruction. 1071 static unsigned getNumWaitStates(const MachineInstr &MI); 1072 1073 /// Returns the operand named \p Op. If \p MI does not have an 1074 /// operand named \c Op, this function returns nullptr. 1075 LLVM_READONLY 1076 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; 1077 1078 LLVM_READONLY 1079 const MachineOperand *getNamedOperand(const MachineInstr &MI, 1080 unsigned OpName) const { 1081 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName); 1082 } 1083 1084 /// Get required immediate operand 1085 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const { 1086 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); 1087 return MI.getOperand(Idx).getImm(); 1088 } 1089 1090 uint64_t getDefaultRsrcDataFormat() const; 1091 uint64_t getScratchRsrcWords23() const; 1092 1093 bool isLowLatencyInstruction(const MachineInstr &MI) const; 1094 bool isHighLatencyDef(int Opc) const override; 1095 1096 /// Return the descriptor of the target-specific machine instruction 1097 /// that corresponds to the specified pseudo or native opcode. 1098 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { 1099 return get(pseudoToMCOpcode(Opcode)); 1100 } 1101 1102 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1103 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; 1104 1105 unsigned isLoadFromStackSlot(const MachineInstr &MI, 1106 int &FrameIndex) const override; 1107 unsigned isStoreToStackSlot(const MachineInstr &MI, 1108 int &FrameIndex) const override; 1109 1110 unsigned getInstBundleSize(const MachineInstr &MI) const; 1111 unsigned getInstSizeInBytes(const MachineInstr &MI) const override; 1112 1113 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; 1114 1115 bool isNonUniformBranchInstr(MachineInstr &Instr) const; 1116 1117 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, 1118 MachineBasicBlock *IfEnd) const; 1119 1120 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, 1121 MachineBasicBlock *LoopEnd) const; 1122 1123 std::pair<unsigned, unsigned> 1124 decomposeMachineOperandsTargetFlags(unsigned TF) const override; 1125 1126 ArrayRef<std::pair<int, const char *>> 1127 getSerializableTargetIndices() const override; 1128 1129 ArrayRef<std::pair<unsigned, const char *>> 1130 getSerializableDirectMachineOperandTargetFlags() const override; 1131 1132 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 1133 getSerializableMachineMemOperandTargetFlags() const override; 1134 1135 ScheduleHazardRecognizer * 1136 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 1137 const ScheduleDAG *DAG) const override; 1138 1139 ScheduleHazardRecognizer * 1140 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; 1141 1142 ScheduleHazardRecognizer * 1143 CreateTargetMIHazardRecognizer(const InstrItineraryData *II, 1144 const ScheduleDAGMI *DAG) const override; 1145 1146 bool isBasicBlockPrologue(const MachineInstr &MI) const override; 1147 1148 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, 1149 MachineBasicBlock::iterator InsPt, 1150 const DebugLoc &DL, Register Src, 1151 Register Dst) const override; 1152 1153 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, 1154 MachineBasicBlock::iterator InsPt, 1155 const DebugLoc &DL, Register Src, 1156 unsigned SrcSubReg, 1157 Register Dst) const override; 1158 1159 bool isWave32() const; 1160 1161 /// Return a partially built integer add instruction without carry. 1162 /// Caller must add source operands. 1163 /// For pre-GFX9 it will generate unused carry destination operand. 1164 /// TODO: After GFX9 it should return a no-carry operation. 1165 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1166 MachineBasicBlock::iterator I, 1167 const DebugLoc &DL, 1168 Register DestReg) const; 1169 1170 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1171 MachineBasicBlock::iterator I, 1172 const DebugLoc &DL, 1173 Register DestReg, 1174 RegScavenger &RS) const; 1175 1176 static bool isKillTerminator(unsigned Opcode); 1177 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; 1178 1179 static bool isLegalMUBUFImmOffset(unsigned Imm) { 1180 return isUInt<12>(Imm); 1181 } 1182 1183 static unsigned getMaxMUBUFImmOffset(); 1184 1185 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 1186 Align Alignment = Align(4)) const; 1187 1188 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT 1189 /// encoded instruction. If \p Signed, this is for an instruction that 1190 /// interprets the offset as signed. 1191 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, 1192 uint64_t FlatVariant) const; 1193 1194 /// Split \p COffsetVal into {immediate offset field, remainder offset} 1195 /// values. 1196 std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal, 1197 unsigned AddrSpace, 1198 uint64_t FlatVariant) const; 1199 1200 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. 1201 /// Return -1 if the target-specific opcode for the pseudo instruction does 1202 /// not exist. If Opcode is not a pseudo instruction, this is identity. 1203 int pseudoToMCOpcode(int Opcode) const; 1204 1205 /// \brief Check if this instruction should only be used by assembler. 1206 /// Return true if this opcode should not be used by codegen. 1207 bool isAsmOnlyOpcode(int MCOp) const; 1208 1209 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, 1210 const TargetRegisterInfo *TRI, 1211 const MachineFunction &MF) 1212 const override; 1213 1214 void fixImplicitOperands(MachineInstr &MI) const; 1215 1216 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, 1217 ArrayRef<unsigned> Ops, 1218 MachineBasicBlock::iterator InsertPt, 1219 int FrameIndex, 1220 LiveIntervals *LIS = nullptr, 1221 VirtRegMap *VRM = nullptr) const override; 1222 1223 unsigned getInstrLatency(const InstrItineraryData *ItinData, 1224 const MachineInstr &MI, 1225 unsigned *PredCost = nullptr) const override; 1226 1227 InstructionUniformity 1228 getInstructionUniformity(const MachineInstr &MI) const override final; 1229 1230 InstructionUniformity 1231 getGenericInstructionUniformity(const MachineInstr &MI) const; 1232 1233 const MIRFormatter *getMIRFormatter() const override { 1234 if (!Formatter.get()) 1235 Formatter = std::make_unique<AMDGPUMIRFormatter>(); 1236 return Formatter.get(); 1237 } 1238 1239 static unsigned getDSShaderTypeValue(const MachineFunction &MF); 1240 1241 const TargetSchedModel &getSchedModel() const { return SchedModel; } 1242 1243 // Enforce operand's \p OpName even alignment if required by target. 1244 // This is used if an operand is a 32 bit register but needs to be aligned 1245 // regardless. 1246 void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const; 1247 }; 1248 1249 /// \brief Returns true if a reg:subreg pair P has a TRC class 1250 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, 1251 const TargetRegisterClass &TRC, 1252 MachineRegisterInfo &MRI) { 1253 auto *RC = MRI.getRegClass(P.Reg); 1254 if (!P.SubReg) 1255 return RC == &TRC; 1256 auto *TRI = MRI.getTargetRegisterInfo(); 1257 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg); 1258 } 1259 1260 /// \brief Create RegSubRegPair from a register MachineOperand 1261 inline 1262 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) { 1263 assert(O.isReg()); 1264 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg()); 1265 } 1266 1267 /// \brief Return the SubReg component from REG_SEQUENCE 1268 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, 1269 unsigned SubReg); 1270 1271 /// \brief Return the defining instruction for a given reg:subreg pair 1272 /// skipping copy like instructions and subreg-manipulation pseudos. 1273 /// Following another subreg of a reg:subreg isn't supported. 1274 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, 1275 MachineRegisterInfo &MRI); 1276 1277 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1278 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not 1279 /// attempt to track between blocks. 1280 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, 1281 Register VReg, 1282 const MachineInstr &DefMI, 1283 const MachineInstr &UseMI); 1284 1285 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1286 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to 1287 /// track between blocks. 1288 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, 1289 Register VReg, 1290 const MachineInstr &DefMI); 1291 1292 namespace AMDGPU { 1293 1294 LLVM_READONLY 1295 int getVOPe64(uint16_t Opcode); 1296 1297 LLVM_READONLY 1298 int getVOPe32(uint16_t Opcode); 1299 1300 LLVM_READONLY 1301 int getSDWAOp(uint16_t Opcode); 1302 1303 LLVM_READONLY 1304 int getDPPOp32(uint16_t Opcode); 1305 1306 LLVM_READONLY 1307 int getDPPOp64(uint16_t Opcode); 1308 1309 LLVM_READONLY 1310 int getBasicFromSDWAOp(uint16_t Opcode); 1311 1312 LLVM_READONLY 1313 int getCommuteRev(uint16_t Opcode); 1314 1315 LLVM_READONLY 1316 int getCommuteOrig(uint16_t Opcode); 1317 1318 LLVM_READONLY 1319 int getAddr64Inst(uint16_t Opcode); 1320 1321 /// Check if \p Opcode is an Addr64 opcode. 1322 /// 1323 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1. 1324 LLVM_READONLY 1325 int getIfAddr64Inst(uint16_t Opcode); 1326 1327 LLVM_READONLY 1328 int getAtomicNoRetOp(uint16_t Opcode); 1329 1330 LLVM_READONLY 1331 int getSOPKOp(uint16_t Opcode); 1332 1333 /// \returns SADDR form of a FLAT Global instruction given an \p Opcode 1334 /// of a VADDR form. 1335 LLVM_READONLY 1336 int getGlobalSaddrOp(uint16_t Opcode); 1337 1338 /// \returns VADDR form of a FLAT Global instruction given an \p Opcode 1339 /// of a SADDR form. 1340 LLVM_READONLY 1341 int getGlobalVaddrOp(uint16_t Opcode); 1342 1343 LLVM_READONLY 1344 int getVCMPXNoSDstOp(uint16_t Opcode); 1345 1346 /// \returns ST form with only immediate offset of a FLAT Scratch instruction 1347 /// given an \p Opcode of an SS (SADDR) form. 1348 LLVM_READONLY 1349 int getFlatScratchInstSTfromSS(uint16_t Opcode); 1350 1351 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1352 /// of an SVS (SADDR + VADDR) form. 1353 LLVM_READONLY 1354 int getFlatScratchInstSVfromSVS(uint16_t Opcode); 1355 1356 /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode 1357 /// of an SV (VADDR) form. 1358 LLVM_READONLY 1359 int getFlatScratchInstSSfromSV(uint16_t Opcode); 1360 1361 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode 1362 /// of an SS (SADDR) form. 1363 LLVM_READONLY 1364 int getFlatScratchInstSVfromSS(uint16_t Opcode); 1365 1366 /// \returns earlyclobber version of a MAC MFMA is exists. 1367 LLVM_READONLY 1368 int getMFMAEarlyClobberOp(uint16_t Opcode); 1369 1370 /// \returns v_cmpx version of a v_cmp instruction. 1371 LLVM_READONLY 1372 int getVCMPXOpFromVCMP(uint16_t Opcode); 1373 1374 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; 1375 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); 1376 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); 1377 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); 1378 1379 } // end namespace AMDGPU 1380 1381 namespace SI { 1382 namespace KernelInputOffsets { 1383 1384 /// Offsets in bytes from the start of the input buffer 1385 enum Offsets { 1386 NGROUPS_X = 0, 1387 NGROUPS_Y = 4, 1388 NGROUPS_Z = 8, 1389 GLOBAL_SIZE_X = 12, 1390 GLOBAL_SIZE_Y = 16, 1391 GLOBAL_SIZE_Z = 20, 1392 LOCAL_SIZE_X = 24, 1393 LOCAL_SIZE_Y = 28, 1394 LOCAL_SIZE_Z = 32 1395 }; 1396 1397 } // end namespace KernelInputOffsets 1398 } // end namespace SI 1399 1400 } // end namespace llvm 1401 1402 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 1403