1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition for SIInstrInfo. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 16 17 #include "AMDGPUInstrInfo.h" 18 #include "SIDefines.h" 19 #include "SIRegisterInfo.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineOperand.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/Support/Compiler.h" 30 #include <cassert> 31 #include <cstdint> 32 33 #define GET_INSTRINFO_HEADER 34 #include "AMDGPUGenInstrInfo.inc" 35 36 namespace llvm { 37 38 class APInt; 39 class MachineDominatorTree; 40 class MachineRegisterInfo; 41 class RegScavenger; 42 class GCNSubtarget; 43 class TargetRegisterClass; 44 45 class SIInstrInfo final : public AMDGPUGenInstrInfo { 46 private: 47 const SIRegisterInfo RI; 48 const GCNSubtarget &ST; 49 50 // The inverse predicate should have the negative value. 51 enum BranchPredicate { 52 INVALID_BR = 0, 53 SCC_TRUE = 1, 54 SCC_FALSE = -1, 55 VCCNZ = 2, 56 VCCZ = -2, 57 EXECNZ = -3, 58 EXECZ = 3 59 }; 60 61 using SetVectorType = SmallSetVector<MachineInstr *, 32>; 62 63 static unsigned getBranchOpcode(BranchPredicate Cond); 64 static BranchPredicate getBranchPredicate(unsigned Opcode); 65 66 public: 67 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, 68 MachineRegisterInfo &MRI, 69 MachineOperand &SuperReg, 70 const TargetRegisterClass *SuperRC, 71 unsigned SubIdx, 72 const TargetRegisterClass *SubRC) const; 73 MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, 74 MachineRegisterInfo &MRI, 75 MachineOperand &SuperReg, 76 const TargetRegisterClass *SuperRC, 77 unsigned SubIdx, 78 const TargetRegisterClass *SubRC) const; 79 private: 80 void swapOperands(MachineInstr &Inst) const; 81 82 bool moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, 83 MachineDominatorTree *MDT = nullptr) const; 84 85 void lowerScalarAbs(SetVectorType &Worklist, 86 MachineInstr &Inst) const; 87 88 void lowerScalarXnor(SetVectorType &Worklist, 89 MachineInstr &Inst) const; 90 91 void splitScalarNotBinop(SetVectorType &Worklist, 92 MachineInstr &Inst, 93 unsigned Opcode) const; 94 95 void splitScalarBinOpN2(SetVectorType &Worklist, 96 MachineInstr &Inst, 97 unsigned Opcode) const; 98 99 void splitScalar64BitUnaryOp(SetVectorType &Worklist, 100 MachineInstr &Inst, unsigned Opcode) const; 101 102 void splitScalar64BitAddSub(SetVectorType &Worklist, MachineInstr &Inst, 103 MachineDominatorTree *MDT = nullptr) const; 104 105 void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst, 106 unsigned Opcode, 107 MachineDominatorTree *MDT = nullptr) const; 108 109 void splitScalar64BitXnor(SetVectorType &Worklist, MachineInstr &Inst, 110 MachineDominatorTree *MDT = nullptr) const; 111 112 void splitScalar64BitBCNT(SetVectorType &Worklist, 113 MachineInstr &Inst) const; 114 void splitScalar64BitBFE(SetVectorType &Worklist, 115 MachineInstr &Inst) const; 116 void movePackToVALU(SetVectorType &Worklist, 117 MachineRegisterInfo &MRI, 118 MachineInstr &Inst) const; 119 120 void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI, 121 SetVectorType &Worklist) const; 122 123 void addSCCDefUsersToVALUWorklist(MachineOperand &Op, 124 MachineInstr &SCCDefInst, 125 SetVectorType &Worklist) const; 126 127 const TargetRegisterClass * 128 getDestEquivalentVGPRClass(const MachineInstr &Inst) const; 129 130 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, 131 const MachineInstr &MIb) const; 132 133 unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; 134 135 protected: 136 bool swapSourceModifiers(MachineInstr &MI, 137 MachineOperand &Src0, unsigned Src0OpName, 138 MachineOperand &Src1, unsigned Src1OpName) const; 139 140 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, 141 unsigned OpIdx0, 142 unsigned OpIdx1) const override; 143 144 public: 145 enum TargetOperandFlags { 146 MO_MASK = 0xf, 147 148 MO_NONE = 0, 149 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. 150 MO_GOTPCREL = 1, 151 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. 152 MO_GOTPCREL32 = 2, 153 MO_GOTPCREL32_LO = 2, 154 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. 155 MO_GOTPCREL32_HI = 3, 156 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. 157 MO_REL32 = 4, 158 MO_REL32_LO = 4, 159 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. 160 MO_REL32_HI = 5, 161 162 MO_LONG_BRANCH_FORWARD = 6, 163 MO_LONG_BRANCH_BACKWARD = 7, 164 165 MO_ABS32_LO = 8, 166 MO_ABS32_HI = 9, 167 }; 168 169 explicit SIInstrInfo(const GCNSubtarget &ST); 170 171 const SIRegisterInfo &getRegisterInfo() const { 172 return RI; 173 } 174 175 bool isReallyTriviallyReMaterializable(const MachineInstr &MI, 176 AAResults *AA) const override; 177 178 bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 179 int64_t &Offset1, 180 int64_t &Offset2) const override; 181 182 bool getMemOperandWithOffset(const MachineInstr &LdSt, 183 const MachineOperand *&BaseOp, 184 int64_t &Offset, 185 const TargetRegisterInfo *TRI) const final; 186 187 bool shouldClusterMemOps(const MachineOperand &BaseOp1, 188 const MachineOperand &BaseOp2, 189 unsigned NumLoads) const override; 190 191 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, 192 int64_t Offset1, unsigned NumLoads) const override; 193 194 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 195 const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, 196 bool KillSrc) const override; 197 198 unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI, 199 RegScavenger *RS, unsigned TmpReg, 200 unsigned Offset, unsigned Size) const; 201 202 void materializeImmediate(MachineBasicBlock &MBB, 203 MachineBasicBlock::iterator MI, 204 const DebugLoc &DL, 205 unsigned DestReg, 206 int64_t Value) const; 207 208 const TargetRegisterClass *getPreferredSelectRegClass( 209 unsigned Size) const; 210 211 unsigned insertNE(MachineBasicBlock *MBB, 212 MachineBasicBlock::iterator I, const DebugLoc &DL, 213 unsigned SrcReg, int Value) const; 214 215 unsigned insertEQ(MachineBasicBlock *MBB, 216 MachineBasicBlock::iterator I, const DebugLoc &DL, 217 unsigned SrcReg, int Value) const; 218 219 void storeRegToStackSlot(MachineBasicBlock &MBB, 220 MachineBasicBlock::iterator MI, unsigned SrcReg, 221 bool isKill, int FrameIndex, 222 const TargetRegisterClass *RC, 223 const TargetRegisterInfo *TRI) const override; 224 225 void loadRegFromStackSlot(MachineBasicBlock &MBB, 226 MachineBasicBlock::iterator MI, unsigned DestReg, 227 int FrameIndex, const TargetRegisterClass *RC, 228 const TargetRegisterInfo *TRI) const override; 229 230 bool expandPostRAPseudo(MachineInstr &MI) const override; 231 232 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp 233 // instructions. Returns a pair of generated instructions. 234 // Can split either post-RA with physical registers or pre-RA with 235 // virtual registers. In latter case IR needs to be in SSA form and 236 // and a REG_SEQUENCE is produced to define original register. 237 std::pair<MachineInstr*, MachineInstr*> 238 expandMovDPP64(MachineInstr &MI) const; 239 240 // Returns an opcode that can be used to move a value to a \p DstRC 241 // register. If there is no hardware instruction that can store to \p 242 // DstRC, then AMDGPU::COPY is returned. 243 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; 244 245 LLVM_READONLY 246 int commuteOpcode(unsigned Opc) const; 247 248 LLVM_READONLY 249 inline int commuteOpcode(const MachineInstr &MI) const { 250 return commuteOpcode(MI.getOpcode()); 251 } 252 253 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, 254 unsigned &SrcOpIdx2) const override; 255 256 bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0, 257 unsigned & SrcOpIdx1) const; 258 259 bool isBranchOffsetInRange(unsigned BranchOpc, 260 int64_t BrOffset) const override; 261 262 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; 263 264 unsigned insertIndirectBranch(MachineBasicBlock &MBB, 265 MachineBasicBlock &NewDestBB, 266 const DebugLoc &DL, 267 int64_t BrOffset, 268 RegScavenger *RS = nullptr) const override; 269 270 bool analyzeBranchImpl(MachineBasicBlock &MBB, 271 MachineBasicBlock::iterator I, 272 MachineBasicBlock *&TBB, 273 MachineBasicBlock *&FBB, 274 SmallVectorImpl<MachineOperand> &Cond, 275 bool AllowModify) const; 276 277 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 278 MachineBasicBlock *&FBB, 279 SmallVectorImpl<MachineOperand> &Cond, 280 bool AllowModify = false) const override; 281 282 unsigned removeBranch(MachineBasicBlock &MBB, 283 int *BytesRemoved = nullptr) const override; 284 285 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 286 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, 287 const DebugLoc &DL, 288 int *BytesAdded = nullptr) const override; 289 290 bool reverseBranchCondition( 291 SmallVectorImpl<MachineOperand> &Cond) const override; 292 293 bool canInsertSelect(const MachineBasicBlock &MBB, 294 ArrayRef<MachineOperand> Cond, 295 unsigned TrueReg, unsigned FalseReg, 296 int &CondCycles, 297 int &TrueCycles, int &FalseCycles) const override; 298 299 void insertSelect(MachineBasicBlock &MBB, 300 MachineBasicBlock::iterator I, const DebugLoc &DL, 301 unsigned DstReg, ArrayRef<MachineOperand> Cond, 302 unsigned TrueReg, unsigned FalseReg) const override; 303 304 void insertVectorSelect(MachineBasicBlock &MBB, 305 MachineBasicBlock::iterator I, const DebugLoc &DL, 306 unsigned DstReg, ArrayRef<MachineOperand> Cond, 307 unsigned TrueReg, unsigned FalseReg) const; 308 309 unsigned getAddressSpaceForPseudoSourceKind( 310 unsigned Kind) const override; 311 312 bool 313 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 314 const MachineInstr &MIb) const override; 315 316 bool isFoldableCopy(const MachineInstr &MI) const; 317 318 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, 319 MachineRegisterInfo *MRI) const final; 320 321 unsigned getMachineCSELookAheadLimit() const override { return 500; } 322 323 MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB, 324 MachineInstr &MI, 325 LiveVariables *LV) const override; 326 327 bool isSchedulingBoundary(const MachineInstr &MI, 328 const MachineBasicBlock *MBB, 329 const MachineFunction &MF) const override; 330 331 static bool isSALU(const MachineInstr &MI) { 332 return MI.getDesc().TSFlags & SIInstrFlags::SALU; 333 } 334 335 bool isSALU(uint16_t Opcode) const { 336 return get(Opcode).TSFlags & SIInstrFlags::SALU; 337 } 338 339 static bool isVALU(const MachineInstr &MI) { 340 return MI.getDesc().TSFlags & SIInstrFlags::VALU; 341 } 342 343 bool isVALU(uint16_t Opcode) const { 344 return get(Opcode).TSFlags & SIInstrFlags::VALU; 345 } 346 347 static bool isVMEM(const MachineInstr &MI) { 348 return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI); 349 } 350 351 bool isVMEM(uint16_t Opcode) const { 352 return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode); 353 } 354 355 static bool isSOP1(const MachineInstr &MI) { 356 return MI.getDesc().TSFlags & SIInstrFlags::SOP1; 357 } 358 359 bool isSOP1(uint16_t Opcode) const { 360 return get(Opcode).TSFlags & SIInstrFlags::SOP1; 361 } 362 363 static bool isSOP2(const MachineInstr &MI) { 364 return MI.getDesc().TSFlags & SIInstrFlags::SOP2; 365 } 366 367 bool isSOP2(uint16_t Opcode) const { 368 return get(Opcode).TSFlags & SIInstrFlags::SOP2; 369 } 370 371 static bool isSOPC(const MachineInstr &MI) { 372 return MI.getDesc().TSFlags & SIInstrFlags::SOPC; 373 } 374 375 bool isSOPC(uint16_t Opcode) const { 376 return get(Opcode).TSFlags & SIInstrFlags::SOPC; 377 } 378 379 static bool isSOPK(const MachineInstr &MI) { 380 return MI.getDesc().TSFlags & SIInstrFlags::SOPK; 381 } 382 383 bool isSOPK(uint16_t Opcode) const { 384 return get(Opcode).TSFlags & SIInstrFlags::SOPK; 385 } 386 387 static bool isSOPP(const MachineInstr &MI) { 388 return MI.getDesc().TSFlags & SIInstrFlags::SOPP; 389 } 390 391 bool isSOPP(uint16_t Opcode) const { 392 return get(Opcode).TSFlags & SIInstrFlags::SOPP; 393 } 394 395 static bool isPacked(const MachineInstr &MI) { 396 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked; 397 } 398 399 bool isPacked(uint16_t Opcode) const { 400 return get(Opcode).TSFlags & SIInstrFlags::IsPacked; 401 } 402 403 static bool isVOP1(const MachineInstr &MI) { 404 return MI.getDesc().TSFlags & SIInstrFlags::VOP1; 405 } 406 407 bool isVOP1(uint16_t Opcode) const { 408 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 409 } 410 411 static bool isVOP2(const MachineInstr &MI) { 412 return MI.getDesc().TSFlags & SIInstrFlags::VOP2; 413 } 414 415 bool isVOP2(uint16_t Opcode) const { 416 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 417 } 418 419 static bool isVOP3(const MachineInstr &MI) { 420 return MI.getDesc().TSFlags & SIInstrFlags::VOP3; 421 } 422 423 bool isVOP3(uint16_t Opcode) const { 424 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 425 } 426 427 static bool isSDWA(const MachineInstr &MI) { 428 return MI.getDesc().TSFlags & SIInstrFlags::SDWA; 429 } 430 431 bool isSDWA(uint16_t Opcode) const { 432 return get(Opcode).TSFlags & SIInstrFlags::SDWA; 433 } 434 435 static bool isVOPC(const MachineInstr &MI) { 436 return MI.getDesc().TSFlags & SIInstrFlags::VOPC; 437 } 438 439 bool isVOPC(uint16_t Opcode) const { 440 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 441 } 442 443 static bool isMUBUF(const MachineInstr &MI) { 444 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; 445 } 446 447 bool isMUBUF(uint16_t Opcode) const { 448 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; 449 } 450 451 static bool isMTBUF(const MachineInstr &MI) { 452 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; 453 } 454 455 bool isMTBUF(uint16_t Opcode) const { 456 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; 457 } 458 459 static bool isSMRD(const MachineInstr &MI) { 460 return MI.getDesc().TSFlags & SIInstrFlags::SMRD; 461 } 462 463 bool isSMRD(uint16_t Opcode) const { 464 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 465 } 466 467 bool isBufferSMRD(const MachineInstr &MI) const; 468 469 static bool isDS(const MachineInstr &MI) { 470 return MI.getDesc().TSFlags & SIInstrFlags::DS; 471 } 472 473 bool isDS(uint16_t Opcode) const { 474 return get(Opcode).TSFlags & SIInstrFlags::DS; 475 } 476 477 bool isAlwaysGDS(uint16_t Opcode) const; 478 479 static bool isMIMG(const MachineInstr &MI) { 480 return MI.getDesc().TSFlags & SIInstrFlags::MIMG; 481 } 482 483 bool isMIMG(uint16_t Opcode) const { 484 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 485 } 486 487 static bool isGather4(const MachineInstr &MI) { 488 return MI.getDesc().TSFlags & SIInstrFlags::Gather4; 489 } 490 491 bool isGather4(uint16_t Opcode) const { 492 return get(Opcode).TSFlags & SIInstrFlags::Gather4; 493 } 494 495 static bool isFLAT(const MachineInstr &MI) { 496 return MI.getDesc().TSFlags & SIInstrFlags::FLAT; 497 } 498 499 // Is a FLAT encoded instruction which accesses a specific segment, 500 // i.e. global_* or scratch_*. 501 static bool isSegmentSpecificFLAT(const MachineInstr &MI) { 502 auto Flags = MI.getDesc().TSFlags; 503 return (Flags & SIInstrFlags::FLAT) && !(Flags & SIInstrFlags::LGKM_CNT); 504 } 505 506 // FIXME: Make this more precise 507 static bool isFLATScratch(const MachineInstr &MI) { 508 return isSegmentSpecificFLAT(MI); 509 } 510 511 // Any FLAT encoded instruction, including global_* and scratch_*. 512 bool isFLAT(uint16_t Opcode) const { 513 return get(Opcode).TSFlags & SIInstrFlags::FLAT; 514 } 515 516 static bool isEXP(const MachineInstr &MI) { 517 return MI.getDesc().TSFlags & SIInstrFlags::EXP; 518 } 519 520 bool isEXP(uint16_t Opcode) const { 521 return get(Opcode).TSFlags & SIInstrFlags::EXP; 522 } 523 524 static bool isWQM(const MachineInstr &MI) { 525 return MI.getDesc().TSFlags & SIInstrFlags::WQM; 526 } 527 528 bool isWQM(uint16_t Opcode) const { 529 return get(Opcode).TSFlags & SIInstrFlags::WQM; 530 } 531 532 static bool isDisableWQM(const MachineInstr &MI) { 533 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; 534 } 535 536 bool isDisableWQM(uint16_t Opcode) const { 537 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; 538 } 539 540 static bool isVGPRSpill(const MachineInstr &MI) { 541 return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill; 542 } 543 544 bool isVGPRSpill(uint16_t Opcode) const { 545 return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; 546 } 547 548 static bool isSGPRSpill(const MachineInstr &MI) { 549 return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill; 550 } 551 552 bool isSGPRSpill(uint16_t Opcode) const { 553 return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill; 554 } 555 556 static bool isDPP(const MachineInstr &MI) { 557 return MI.getDesc().TSFlags & SIInstrFlags::DPP; 558 } 559 560 bool isDPP(uint16_t Opcode) const { 561 return get(Opcode).TSFlags & SIInstrFlags::DPP; 562 } 563 564 static bool isVOP3P(const MachineInstr &MI) { 565 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; 566 } 567 568 bool isVOP3P(uint16_t Opcode) const { 569 return get(Opcode).TSFlags & SIInstrFlags::VOP3P; 570 } 571 572 static bool isVINTRP(const MachineInstr &MI) { 573 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; 574 } 575 576 bool isVINTRP(uint16_t Opcode) const { 577 return get(Opcode).TSFlags & SIInstrFlags::VINTRP; 578 } 579 580 static bool isMAI(const MachineInstr &MI) { 581 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI; 582 } 583 584 bool isMAI(uint16_t Opcode) const { 585 return get(Opcode).TSFlags & SIInstrFlags::IsMAI; 586 } 587 588 static bool isDOT(const MachineInstr &MI) { 589 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; 590 } 591 592 bool isDOT(uint16_t Opcode) const { 593 return get(Opcode).TSFlags & SIInstrFlags::IsDOT; 594 } 595 596 static bool isScalarUnit(const MachineInstr &MI) { 597 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); 598 } 599 600 static bool usesVM_CNT(const MachineInstr &MI) { 601 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; 602 } 603 604 static bool usesLGKM_CNT(const MachineInstr &MI) { 605 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; 606 } 607 608 static bool sopkIsZext(const MachineInstr &MI) { 609 return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT; 610 } 611 612 bool sopkIsZext(uint16_t Opcode) const { 613 return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT; 614 } 615 616 /// \returns true if this is an s_store_dword* instruction. This is more 617 /// specific than than isSMEM && mayStore. 618 static bool isScalarStore(const MachineInstr &MI) { 619 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; 620 } 621 622 bool isScalarStore(uint16_t Opcode) const { 623 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; 624 } 625 626 static bool isFixedSize(const MachineInstr &MI) { 627 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; 628 } 629 630 bool isFixedSize(uint16_t Opcode) const { 631 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; 632 } 633 634 static bool hasFPClamp(const MachineInstr &MI) { 635 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; 636 } 637 638 bool hasFPClamp(uint16_t Opcode) const { 639 return get(Opcode).TSFlags & SIInstrFlags::FPClamp; 640 } 641 642 static bool hasIntClamp(const MachineInstr &MI) { 643 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp; 644 } 645 646 uint64_t getClampMask(const MachineInstr &MI) const { 647 const uint64_t ClampFlags = SIInstrFlags::FPClamp | 648 SIInstrFlags::IntClamp | 649 SIInstrFlags::ClampLo | 650 SIInstrFlags::ClampHi; 651 return MI.getDesc().TSFlags & ClampFlags; 652 } 653 654 static bool usesFPDPRounding(const MachineInstr &MI) { 655 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding; 656 } 657 658 bool usesFPDPRounding(uint16_t Opcode) const { 659 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; 660 } 661 662 static bool isFPAtomic(const MachineInstr &MI) { 663 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; 664 } 665 666 bool isFPAtomic(uint16_t Opcode) const { 667 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; 668 } 669 670 bool isVGPRCopy(const MachineInstr &MI) const { 671 assert(MI.isCopy()); 672 unsigned Dest = MI.getOperand(0).getReg(); 673 const MachineFunction &MF = *MI.getParent()->getParent(); 674 const MachineRegisterInfo &MRI = MF.getRegInfo(); 675 return !RI.isSGPRReg(MRI, Dest); 676 } 677 678 bool hasVGPRUses(const MachineInstr &MI) const { 679 const MachineFunction &MF = *MI.getParent()->getParent(); 680 const MachineRegisterInfo &MRI = MF.getRegInfo(); 681 return llvm::any_of(MI.explicit_uses(), 682 [&MRI, this](const MachineOperand &MO) { 683 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());}); 684 } 685 686 /// Whether we must prevent this instruction from executing with EXEC = 0. 687 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; 688 689 /// Returns true if the instruction could potentially depend on the value of 690 /// exec. If false, exec dependencies may safely be ignored. 691 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; 692 693 bool isInlineConstant(const APInt &Imm) const; 694 695 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; 696 697 bool isInlineConstant(const MachineOperand &MO, 698 const MCOperandInfo &OpInfo) const { 699 return isInlineConstant(MO, OpInfo.OperandType); 700 } 701 702 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would 703 /// be an inline immediate. 704 bool isInlineConstant(const MachineInstr &MI, 705 const MachineOperand &UseMO, 706 const MachineOperand &DefMO) const { 707 assert(UseMO.getParent() == &MI); 708 int OpIdx = MI.getOperandNo(&UseMO); 709 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) { 710 return false; 711 } 712 713 return isInlineConstant(DefMO, MI.getDesc().OpInfo[OpIdx]); 714 } 715 716 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline 717 /// immediate. 718 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const { 719 const MachineOperand &MO = MI.getOperand(OpIdx); 720 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType); 721 } 722 723 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, 724 const MachineOperand &MO) const { 725 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) 726 return false; 727 728 if (MI.isCopy()) { 729 unsigned Size = getOpSize(MI, OpIdx); 730 assert(Size == 8 || Size == 4); 731 732 uint8_t OpType = (Size == 8) ? 733 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32; 734 return isInlineConstant(MO, OpType); 735 } 736 737 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType); 738 } 739 740 bool isInlineConstant(const MachineOperand &MO) const { 741 const MachineInstr *Parent = MO.getParent(); 742 return isInlineConstant(*Parent, Parent->getOperandNo(&MO)); 743 } 744 745 bool isLiteralConstant(const MachineOperand &MO, 746 const MCOperandInfo &OpInfo) const { 747 return MO.isImm() && !isInlineConstant(MO, OpInfo.OperandType); 748 } 749 750 bool isLiteralConstant(const MachineInstr &MI, int OpIdx) const { 751 const MachineOperand &MO = MI.getOperand(OpIdx); 752 return MO.isImm() && !isInlineConstant(MI, OpIdx); 753 } 754 755 // Returns true if this operand could potentially require a 32-bit literal 756 // operand, but not necessarily. A FrameIndex for example could resolve to an 757 // inline immediate value that will not require an additional 4-bytes; this 758 // assumes that it will. 759 bool isLiteralConstantLike(const MachineOperand &MO, 760 const MCOperandInfo &OpInfo) const; 761 762 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 763 const MachineOperand &MO) const; 764 765 /// Return true if this 64-bit VALU instruction has a 32-bit encoding. 766 /// This function will return false if you pass it a 32-bit instruction. 767 bool hasVALU32BitEncoding(unsigned Opcode) const; 768 769 /// Returns true if this operand uses the constant bus. 770 bool usesConstantBus(const MachineRegisterInfo &MRI, 771 const MachineOperand &MO, 772 const MCOperandInfo &OpInfo) const; 773 774 /// Return true if this instruction has any modifiers. 775 /// e.g. src[012]_mod, omod, clamp. 776 bool hasModifiers(unsigned Opcode) const; 777 778 bool hasModifiersSet(const MachineInstr &MI, 779 unsigned OpName) const; 780 bool hasAnyModifiersSet(const MachineInstr &MI) const; 781 782 bool canShrink(const MachineInstr &MI, 783 const MachineRegisterInfo &MRI) const; 784 785 MachineInstr *buildShrunkInst(MachineInstr &MI, 786 unsigned NewOpcode) const; 787 788 bool verifyInstruction(const MachineInstr &MI, 789 StringRef &ErrInfo) const override; 790 791 unsigned getVALUOp(const MachineInstr &MI) const; 792 793 /// Return the correct register class for \p OpNo. For target-specific 794 /// instructions, this will return the register class that has been defined 795 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return 796 /// the register class of its machine operand. 797 /// to infer the correct register class base on the other operands. 798 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, 799 unsigned OpNo) const; 800 801 /// Return the size in bytes of the operand OpNo on the given 802 // instruction opcode. 803 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { 804 const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo]; 805 806 if (OpInfo.RegClass == -1) { 807 // If this is an immediate operand, this must be a 32-bit literal. 808 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); 809 return 4; 810 } 811 812 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; 813 } 814 815 /// This form should usually be preferred since it handles operands 816 /// with unknown register classes. 817 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { 818 const MachineOperand &MO = MI.getOperand(OpNo); 819 if (MO.isReg()) { 820 if (unsigned SubReg = MO.getSubReg()) { 821 assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg( 822 MI.getParent()->getParent()->getRegInfo(). 823 getRegClass(MO.getReg()), SubReg)) >= 32 && 824 "Sub-dword subregs are not supported"); 825 return RI.getSubRegIndexLaneMask(SubReg).getNumLanes() * 4; 826 } 827 } 828 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; 829 } 830 831 /// Legalize the \p OpIndex operand of this instruction by inserting 832 /// a MOV. For example: 833 /// ADD_I32_e32 VGPR0, 15 834 /// to 835 /// MOV VGPR1, 15 836 /// ADD_I32_e32 VGPR0, VGPR1 837 /// 838 /// If the operand being legalized is a register, then a COPY will be used 839 /// instead of MOV. 840 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; 841 842 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand 843 /// for \p MI. 844 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 845 const MachineOperand *MO = nullptr) const; 846 847 /// Check if \p MO would be a valid operand for the given operand 848 /// definition \p OpInfo. Note this does not attempt to validate constant bus 849 /// restrictions (e.g. literal constant usage). 850 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, 851 const MCOperandInfo &OpInfo, 852 const MachineOperand &MO) const; 853 854 /// Check if \p MO (a register operand) is a legal register for the 855 /// given operand description. 856 bool isLegalRegOperand(const MachineRegisterInfo &MRI, 857 const MCOperandInfo &OpInfo, 858 const MachineOperand &MO) const; 859 860 /// Legalize operands in \p MI by either commuting it or inserting a 861 /// copy of src1. 862 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; 863 864 /// Fix operands in \p MI to satisfy constant bus requirements. 865 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; 866 867 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only 868 /// be used when it is know that the value in SrcReg is same across all 869 /// threads in the wave. 870 /// \returns The SGPR register that \p SrcReg was copied to. 871 unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, 872 MachineRegisterInfo &MRI) const; 873 874 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; 875 876 void legalizeGenericOperand(MachineBasicBlock &InsertMBB, 877 MachineBasicBlock::iterator I, 878 const TargetRegisterClass *DstRC, 879 MachineOperand &Op, MachineRegisterInfo &MRI, 880 const DebugLoc &DL) const; 881 882 /// Legalize all operands in this instruction. This function may create new 883 /// instructions and control-flow around \p MI. If present, \p MDT is 884 /// updated. 885 void legalizeOperands(MachineInstr &MI, 886 MachineDominatorTree *MDT = nullptr) const; 887 888 /// Replace this instruction's opcode with the equivalent VALU 889 /// opcode. This function will also move the users of \p MI to the 890 /// VALU if necessary. If present, \p MDT is updated. 891 void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; 892 893 void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI, 894 int Count) const; 895 896 void insertNoop(MachineBasicBlock &MBB, 897 MachineBasicBlock::iterator MI) const override; 898 899 void insertReturn(MachineBasicBlock &MBB) const; 900 /// Return the number of wait states that result from executing this 901 /// instruction. 902 static unsigned getNumWaitStates(const MachineInstr &MI); 903 904 /// Returns the operand named \p Op. If \p MI does not have an 905 /// operand named \c Op, this function returns nullptr. 906 LLVM_READONLY 907 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; 908 909 LLVM_READONLY 910 const MachineOperand *getNamedOperand(const MachineInstr &MI, 911 unsigned OpName) const { 912 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName); 913 } 914 915 /// Get required immediate operand 916 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const { 917 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); 918 return MI.getOperand(Idx).getImm(); 919 } 920 921 uint64_t getDefaultRsrcDataFormat() const; 922 uint64_t getScratchRsrcWords23() const; 923 924 bool isLowLatencyInstruction(const MachineInstr &MI) const; 925 bool isHighLatencyInstruction(const MachineInstr &MI) const; 926 927 /// Return the descriptor of the target-specific machine instruction 928 /// that corresponds to the specified pseudo or native opcode. 929 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { 930 return get(pseudoToMCOpcode(Opcode)); 931 } 932 933 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; 934 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; 935 936 unsigned isLoadFromStackSlot(const MachineInstr &MI, 937 int &FrameIndex) const override; 938 unsigned isStoreToStackSlot(const MachineInstr &MI, 939 int &FrameIndex) const override; 940 941 unsigned getInstBundleSize(const MachineInstr &MI) const; 942 unsigned getInstSizeInBytes(const MachineInstr &MI) const override; 943 944 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; 945 946 bool isNonUniformBranchInstr(MachineInstr &Instr) const; 947 948 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, 949 MachineBasicBlock *IfEnd) const; 950 951 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, 952 MachineBasicBlock *LoopEnd) const; 953 954 std::pair<unsigned, unsigned> 955 decomposeMachineOperandsTargetFlags(unsigned TF) const override; 956 957 ArrayRef<std::pair<int, const char *>> 958 getSerializableTargetIndices() const override; 959 960 ArrayRef<std::pair<unsigned, const char *>> 961 getSerializableDirectMachineOperandTargetFlags() const override; 962 963 ScheduleHazardRecognizer * 964 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 965 const ScheduleDAG *DAG) const override; 966 967 ScheduleHazardRecognizer * 968 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; 969 970 bool isBasicBlockPrologue(const MachineInstr &MI) const override; 971 972 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, 973 MachineBasicBlock::iterator InsPt, 974 const DebugLoc &DL, Register Src, 975 Register Dst) const override; 976 977 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, 978 MachineBasicBlock::iterator InsPt, 979 const DebugLoc &DL, Register Src, 980 Register SrcSubReg, 981 Register Dst) const override; 982 983 bool isWave32() const; 984 985 /// Return a partially built integer add instruction without carry. 986 /// Caller must add source operands. 987 /// For pre-GFX9 it will generate unused carry destination operand. 988 /// TODO: After GFX9 it should return a no-carry operation. 989 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 990 MachineBasicBlock::iterator I, 991 const DebugLoc &DL, 992 unsigned DestReg) const; 993 994 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 995 MachineBasicBlock::iterator I, 996 const DebugLoc &DL, 997 Register DestReg, 998 RegScavenger &RS) const; 999 1000 static bool isKillTerminator(unsigned Opcode); 1001 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; 1002 1003 static bool isLegalMUBUFImmOffset(unsigned Imm) { 1004 return isUInt<12>(Imm); 1005 } 1006 1007 unsigned getNumFlatOffsetBits(unsigned AddrSpace, bool Signed) const; 1008 1009 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT 1010 /// encoded instruction. If \p Signed, this is for an instruction that 1011 /// interprets the offset as signed. 1012 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, 1013 bool Signed) const; 1014 1015 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. 1016 /// Return -1 if the target-specific opcode for the pseudo instruction does 1017 /// not exist. If Opcode is not a pseudo instruction, this is identity. 1018 int pseudoToMCOpcode(int Opcode) const; 1019 1020 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, 1021 const TargetRegisterInfo *TRI, 1022 const MachineFunction &MF) 1023 const override { 1024 if (OpNum >= TID.getNumOperands()) 1025 return nullptr; 1026 return RI.getRegClass(TID.OpInfo[OpNum].RegClass); 1027 } 1028 1029 void fixImplicitOperands(MachineInstr &MI) const; 1030 }; 1031 1032 /// \brief Returns true if a reg:subreg pair P has a TRC class 1033 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, 1034 const TargetRegisterClass &TRC, 1035 MachineRegisterInfo &MRI) { 1036 auto *RC = MRI.getRegClass(P.Reg); 1037 if (!P.SubReg) 1038 return RC == &TRC; 1039 auto *TRI = MRI.getTargetRegisterInfo(); 1040 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg); 1041 } 1042 1043 /// \brief Create RegSubRegPair from a register MachineOperand 1044 inline 1045 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) { 1046 assert(O.isReg()); 1047 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg()); 1048 } 1049 1050 /// \brief Return the SubReg component from REG_SEQUENCE 1051 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, 1052 unsigned SubReg); 1053 1054 /// \brief Return the defining instruction for a given reg:subreg pair 1055 /// skipping copy like instructions and subreg-manipulation pseudos. 1056 /// Following another subreg of a reg:subreg isn't supported. 1057 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, 1058 MachineRegisterInfo &MRI); 1059 1060 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1061 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not 1062 /// attempt to track between blocks. 1063 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, 1064 Register VReg, 1065 const MachineInstr &DefMI, 1066 const MachineInstr &UseMI); 1067 1068 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1069 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to 1070 /// track between blocks. 1071 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, 1072 Register VReg, 1073 const MachineInstr &DefMI); 1074 1075 namespace AMDGPU { 1076 1077 LLVM_READONLY 1078 int getVOPe64(uint16_t Opcode); 1079 1080 LLVM_READONLY 1081 int getVOPe32(uint16_t Opcode); 1082 1083 LLVM_READONLY 1084 int getSDWAOp(uint16_t Opcode); 1085 1086 LLVM_READONLY 1087 int getDPPOp32(uint16_t Opcode); 1088 1089 LLVM_READONLY 1090 int getBasicFromSDWAOp(uint16_t Opcode); 1091 1092 LLVM_READONLY 1093 int getCommuteRev(uint16_t Opcode); 1094 1095 LLVM_READONLY 1096 int getCommuteOrig(uint16_t Opcode); 1097 1098 LLVM_READONLY 1099 int getAddr64Inst(uint16_t Opcode); 1100 1101 /// Check if \p Opcode is an Addr64 opcode. 1102 /// 1103 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1. 1104 LLVM_READONLY 1105 int getIfAddr64Inst(uint16_t Opcode); 1106 1107 LLVM_READONLY 1108 int getMUBUFNoLdsInst(uint16_t Opcode); 1109 1110 LLVM_READONLY 1111 int getAtomicRetOp(uint16_t Opcode); 1112 1113 LLVM_READONLY 1114 int getAtomicNoRetOp(uint16_t Opcode); 1115 1116 LLVM_READONLY 1117 int getSOPKOp(uint16_t Opcode); 1118 1119 LLVM_READONLY 1120 int getGlobalSaddrOp(uint16_t Opcode); 1121 1122 LLVM_READONLY 1123 int getVCMPXNoSDstOp(uint16_t Opcode); 1124 1125 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; 1126 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); 1127 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); 1128 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); 1129 1130 } // end namespace AMDGPU 1131 1132 namespace SI { 1133 namespace KernelInputOffsets { 1134 1135 /// Offsets in bytes from the start of the input buffer 1136 enum Offsets { 1137 NGROUPS_X = 0, 1138 NGROUPS_Y = 4, 1139 NGROUPS_Z = 8, 1140 GLOBAL_SIZE_X = 12, 1141 GLOBAL_SIZE_Y = 16, 1142 GLOBAL_SIZE_Z = 20, 1143 LOCAL_SIZE_X = 24, 1144 LOCAL_SIZE_Y = 28, 1145 LOCAL_SIZE_Z = 32 1146 }; 1147 1148 } // end namespace KernelInputOffsets 1149 } // end namespace SI 1150 1151 } // end namespace llvm 1152 1153 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 1154