1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition for SIInstrInfo. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 16 17 #include "AMDGPUInstrInfo.h" 18 #include "SIDefines.h" 19 #include "SIRegisterInfo.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineOperand.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/Support/Compiler.h" 30 #include <cassert> 31 #include <cstdint> 32 33 #define GET_INSTRINFO_HEADER 34 #include "AMDGPUGenInstrInfo.inc" 35 36 namespace llvm { 37 38 class APInt; 39 class MachineDominatorTree; 40 class MachineRegisterInfo; 41 class RegScavenger; 42 class GCNSubtarget; 43 class TargetRegisterClass; 44 45 class SIInstrInfo final : public AMDGPUGenInstrInfo { 46 private: 47 const SIRegisterInfo RI; 48 const GCNSubtarget &ST; 49 50 // The inverse predicate should have the negative value. 51 enum BranchPredicate { 52 INVALID_BR = 0, 53 SCC_TRUE = 1, 54 SCC_FALSE = -1, 55 VCCNZ = 2, 56 VCCZ = -2, 57 EXECNZ = -3, 58 EXECZ = 3 59 }; 60 61 using SetVectorType = SmallSetVector<MachineInstr *, 32>; 62 63 static unsigned getBranchOpcode(BranchPredicate Cond); 64 static BranchPredicate getBranchPredicate(unsigned Opcode); 65 66 public: 67 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, 68 MachineRegisterInfo &MRI, 69 MachineOperand &SuperReg, 70 const TargetRegisterClass *SuperRC, 71 unsigned SubIdx, 72 const TargetRegisterClass *SubRC) const; 73 MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, 74 MachineRegisterInfo &MRI, 75 MachineOperand &SuperReg, 76 const TargetRegisterClass *SuperRC, 77 unsigned SubIdx, 78 const TargetRegisterClass *SubRC) const; 79 private: 80 void swapOperands(MachineInstr &Inst) const; 81 82 bool moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, 83 MachineDominatorTree *MDT = nullptr) const; 84 85 void lowerScalarAbs(SetVectorType &Worklist, 86 MachineInstr &Inst) const; 87 88 void lowerScalarXnor(SetVectorType &Worklist, 89 MachineInstr &Inst) const; 90 91 void splitScalarNotBinop(SetVectorType &Worklist, 92 MachineInstr &Inst, 93 unsigned Opcode) const; 94 95 void splitScalarBinOpN2(SetVectorType &Worklist, 96 MachineInstr &Inst, 97 unsigned Opcode) const; 98 99 void splitScalar64BitUnaryOp(SetVectorType &Worklist, 100 MachineInstr &Inst, unsigned Opcode) const; 101 102 void splitScalar64BitAddSub(SetVectorType &Worklist, MachineInstr &Inst, 103 MachineDominatorTree *MDT = nullptr) const; 104 105 void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst, 106 unsigned Opcode, 107 MachineDominatorTree *MDT = nullptr) const; 108 109 void splitScalar64BitXnor(SetVectorType &Worklist, MachineInstr &Inst, 110 MachineDominatorTree *MDT = nullptr) const; 111 112 void splitScalar64BitBCNT(SetVectorType &Worklist, 113 MachineInstr &Inst) const; 114 void splitScalar64BitBFE(SetVectorType &Worklist, 115 MachineInstr &Inst) const; 116 void movePackToVALU(SetVectorType &Worklist, 117 MachineRegisterInfo &MRI, 118 MachineInstr &Inst) const; 119 120 void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI, 121 SetVectorType &Worklist) const; 122 123 void addSCCDefUsersToVALUWorklist(MachineOperand &Op, 124 MachineInstr &SCCDefInst, 125 SetVectorType &Worklist) const; 126 127 const TargetRegisterClass * 128 getDestEquivalentVGPRClass(const MachineInstr &Inst) const; 129 130 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, 131 const MachineInstr &MIb) const; 132 133 unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; 134 135 protected: 136 bool swapSourceModifiers(MachineInstr &MI, 137 MachineOperand &Src0, unsigned Src0OpName, 138 MachineOperand &Src1, unsigned Src1OpName) const; 139 140 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, 141 unsigned OpIdx0, 142 unsigned OpIdx1) const override; 143 144 public: 145 enum TargetOperandFlags { 146 MO_MASK = 0xf, 147 148 MO_NONE = 0, 149 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. 150 MO_GOTPCREL = 1, 151 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. 152 MO_GOTPCREL32 = 2, 153 MO_GOTPCREL32_LO = 2, 154 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. 155 MO_GOTPCREL32_HI = 3, 156 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. 157 MO_REL32 = 4, 158 MO_REL32_LO = 4, 159 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. 160 MO_REL32_HI = 5, 161 162 MO_LONG_BRANCH_FORWARD = 6, 163 MO_LONG_BRANCH_BACKWARD = 7, 164 165 MO_ABS32_LO = 8, 166 MO_ABS32_HI = 9, 167 }; 168 169 explicit SIInstrInfo(const GCNSubtarget &ST); 170 171 const SIRegisterInfo &getRegisterInfo() const { 172 return RI; 173 } 174 175 bool isReallyTriviallyReMaterializable(const MachineInstr &MI, 176 AliasAnalysis *AA) const override; 177 178 bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 179 int64_t &Offset1, 180 int64_t &Offset2) const override; 181 182 bool getMemOperandWithOffset(const MachineInstr &LdSt, 183 const MachineOperand *&BaseOp, 184 int64_t &Offset, 185 const TargetRegisterInfo *TRI) const final; 186 187 bool shouldClusterMemOps(const MachineOperand &BaseOp1, 188 const MachineOperand &BaseOp2, 189 unsigned NumLoads) const override; 190 191 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, 192 int64_t Offset1, unsigned NumLoads) const override; 193 194 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 195 const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, 196 bool KillSrc) const override; 197 198 unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI, 199 RegScavenger *RS, unsigned TmpReg, 200 unsigned Offset, unsigned Size) const; 201 202 void materializeImmediate(MachineBasicBlock &MBB, 203 MachineBasicBlock::iterator MI, 204 const DebugLoc &DL, 205 unsigned DestReg, 206 int64_t Value) const; 207 208 const TargetRegisterClass *getPreferredSelectRegClass( 209 unsigned Size) const; 210 211 unsigned insertNE(MachineBasicBlock *MBB, 212 MachineBasicBlock::iterator I, const DebugLoc &DL, 213 unsigned SrcReg, int Value) const; 214 215 unsigned insertEQ(MachineBasicBlock *MBB, 216 MachineBasicBlock::iterator I, const DebugLoc &DL, 217 unsigned SrcReg, int Value) const; 218 219 void storeRegToStackSlot(MachineBasicBlock &MBB, 220 MachineBasicBlock::iterator MI, unsigned SrcReg, 221 bool isKill, int FrameIndex, 222 const TargetRegisterClass *RC, 223 const TargetRegisterInfo *TRI) const override; 224 225 void loadRegFromStackSlot(MachineBasicBlock &MBB, 226 MachineBasicBlock::iterator MI, unsigned DestReg, 227 int FrameIndex, const TargetRegisterClass *RC, 228 const TargetRegisterInfo *TRI) const override; 229 230 bool expandPostRAPseudo(MachineInstr &MI) const override; 231 232 // Returns an opcode that can be used to move a value to a \p DstRC 233 // register. If there is no hardware instruction that can store to \p 234 // DstRC, then AMDGPU::COPY is returned. 235 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; 236 237 LLVM_READONLY 238 int commuteOpcode(unsigned Opc) const; 239 240 LLVM_READONLY 241 inline int commuteOpcode(const MachineInstr &MI) const { 242 return commuteOpcode(MI.getOpcode()); 243 } 244 245 bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, 246 unsigned &SrcOpIdx2) const override; 247 248 bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0, 249 unsigned & SrcOpIdx1) const; 250 251 bool isBranchOffsetInRange(unsigned BranchOpc, 252 int64_t BrOffset) const override; 253 254 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; 255 256 unsigned insertIndirectBranch(MachineBasicBlock &MBB, 257 MachineBasicBlock &NewDestBB, 258 const DebugLoc &DL, 259 int64_t BrOffset, 260 RegScavenger *RS = nullptr) const override; 261 262 bool analyzeBranchImpl(MachineBasicBlock &MBB, 263 MachineBasicBlock::iterator I, 264 MachineBasicBlock *&TBB, 265 MachineBasicBlock *&FBB, 266 SmallVectorImpl<MachineOperand> &Cond, 267 bool AllowModify) const; 268 269 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 270 MachineBasicBlock *&FBB, 271 SmallVectorImpl<MachineOperand> &Cond, 272 bool AllowModify = false) const override; 273 274 unsigned removeBranch(MachineBasicBlock &MBB, 275 int *BytesRemoved = nullptr) const override; 276 277 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 278 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, 279 const DebugLoc &DL, 280 int *BytesAdded = nullptr) const override; 281 282 bool reverseBranchCondition( 283 SmallVectorImpl<MachineOperand> &Cond) const override; 284 285 bool canInsertSelect(const MachineBasicBlock &MBB, 286 ArrayRef<MachineOperand> Cond, 287 unsigned TrueReg, unsigned FalseReg, 288 int &CondCycles, 289 int &TrueCycles, int &FalseCycles) const override; 290 291 void insertSelect(MachineBasicBlock &MBB, 292 MachineBasicBlock::iterator I, const DebugLoc &DL, 293 unsigned DstReg, ArrayRef<MachineOperand> Cond, 294 unsigned TrueReg, unsigned FalseReg) const override; 295 296 void insertVectorSelect(MachineBasicBlock &MBB, 297 MachineBasicBlock::iterator I, const DebugLoc &DL, 298 unsigned DstReg, ArrayRef<MachineOperand> Cond, 299 unsigned TrueReg, unsigned FalseReg) const; 300 301 unsigned getAddressSpaceForPseudoSourceKind( 302 unsigned Kind) const override; 303 304 bool 305 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 306 const MachineInstr &MIb, 307 AliasAnalysis *AA = nullptr) const override; 308 309 bool isFoldableCopy(const MachineInstr &MI) const; 310 311 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, 312 MachineRegisterInfo *MRI) const final; 313 314 unsigned getMachineCSELookAheadLimit() const override { return 500; } 315 316 MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB, 317 MachineInstr &MI, 318 LiveVariables *LV) const override; 319 320 bool isSchedulingBoundary(const MachineInstr &MI, 321 const MachineBasicBlock *MBB, 322 const MachineFunction &MF) const override; 323 324 static bool isSALU(const MachineInstr &MI) { 325 return MI.getDesc().TSFlags & SIInstrFlags::SALU; 326 } 327 328 bool isSALU(uint16_t Opcode) const { 329 return get(Opcode).TSFlags & SIInstrFlags::SALU; 330 } 331 332 static bool isVALU(const MachineInstr &MI) { 333 return MI.getDesc().TSFlags & SIInstrFlags::VALU; 334 } 335 336 bool isVALU(uint16_t Opcode) const { 337 return get(Opcode).TSFlags & SIInstrFlags::VALU; 338 } 339 340 static bool isVMEM(const MachineInstr &MI) { 341 return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI); 342 } 343 344 bool isVMEM(uint16_t Opcode) const { 345 return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode); 346 } 347 348 static bool isSOP1(const MachineInstr &MI) { 349 return MI.getDesc().TSFlags & SIInstrFlags::SOP1; 350 } 351 352 bool isSOP1(uint16_t Opcode) const { 353 return get(Opcode).TSFlags & SIInstrFlags::SOP1; 354 } 355 356 static bool isSOP2(const MachineInstr &MI) { 357 return MI.getDesc().TSFlags & SIInstrFlags::SOP2; 358 } 359 360 bool isSOP2(uint16_t Opcode) const { 361 return get(Opcode).TSFlags & SIInstrFlags::SOP2; 362 } 363 364 static bool isSOPC(const MachineInstr &MI) { 365 return MI.getDesc().TSFlags & SIInstrFlags::SOPC; 366 } 367 368 bool isSOPC(uint16_t Opcode) const { 369 return get(Opcode).TSFlags & SIInstrFlags::SOPC; 370 } 371 372 static bool isSOPK(const MachineInstr &MI) { 373 return MI.getDesc().TSFlags & SIInstrFlags::SOPK; 374 } 375 376 bool isSOPK(uint16_t Opcode) const { 377 return get(Opcode).TSFlags & SIInstrFlags::SOPK; 378 } 379 380 static bool isSOPP(const MachineInstr &MI) { 381 return MI.getDesc().TSFlags & SIInstrFlags::SOPP; 382 } 383 384 bool isSOPP(uint16_t Opcode) const { 385 return get(Opcode).TSFlags & SIInstrFlags::SOPP; 386 } 387 388 static bool isPacked(const MachineInstr &MI) { 389 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked; 390 } 391 392 bool isPacked(uint16_t Opcode) const { 393 return get(Opcode).TSFlags & SIInstrFlags::IsPacked; 394 } 395 396 static bool isVOP1(const MachineInstr &MI) { 397 return MI.getDesc().TSFlags & SIInstrFlags::VOP1; 398 } 399 400 bool isVOP1(uint16_t Opcode) const { 401 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 402 } 403 404 static bool isVOP2(const MachineInstr &MI) { 405 return MI.getDesc().TSFlags & SIInstrFlags::VOP2; 406 } 407 408 bool isVOP2(uint16_t Opcode) const { 409 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 410 } 411 412 static bool isVOP3(const MachineInstr &MI) { 413 return MI.getDesc().TSFlags & SIInstrFlags::VOP3; 414 } 415 416 bool isVOP3(uint16_t Opcode) const { 417 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 418 } 419 420 static bool isSDWA(const MachineInstr &MI) { 421 return MI.getDesc().TSFlags & SIInstrFlags::SDWA; 422 } 423 424 bool isSDWA(uint16_t Opcode) const { 425 return get(Opcode).TSFlags & SIInstrFlags::SDWA; 426 } 427 428 static bool isVOPC(const MachineInstr &MI) { 429 return MI.getDesc().TSFlags & SIInstrFlags::VOPC; 430 } 431 432 bool isVOPC(uint16_t Opcode) const { 433 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 434 } 435 436 static bool isMUBUF(const MachineInstr &MI) { 437 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; 438 } 439 440 bool isMUBUF(uint16_t Opcode) const { 441 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; 442 } 443 444 static bool isMTBUF(const MachineInstr &MI) { 445 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; 446 } 447 448 bool isMTBUF(uint16_t Opcode) const { 449 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; 450 } 451 452 static bool isSMRD(const MachineInstr &MI) { 453 return MI.getDesc().TSFlags & SIInstrFlags::SMRD; 454 } 455 456 bool isSMRD(uint16_t Opcode) const { 457 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 458 } 459 460 bool isBufferSMRD(const MachineInstr &MI) const; 461 462 static bool isDS(const MachineInstr &MI) { 463 return MI.getDesc().TSFlags & SIInstrFlags::DS; 464 } 465 466 bool isDS(uint16_t Opcode) const { 467 return get(Opcode).TSFlags & SIInstrFlags::DS; 468 } 469 470 bool isAlwaysGDS(uint16_t Opcode) const; 471 472 static bool isMIMG(const MachineInstr &MI) { 473 return MI.getDesc().TSFlags & SIInstrFlags::MIMG; 474 } 475 476 bool isMIMG(uint16_t Opcode) const { 477 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 478 } 479 480 static bool isGather4(const MachineInstr &MI) { 481 return MI.getDesc().TSFlags & SIInstrFlags::Gather4; 482 } 483 484 bool isGather4(uint16_t Opcode) const { 485 return get(Opcode).TSFlags & SIInstrFlags::Gather4; 486 } 487 488 static bool isFLAT(const MachineInstr &MI) { 489 return MI.getDesc().TSFlags & SIInstrFlags::FLAT; 490 } 491 492 // Is a FLAT encoded instruction which accesses a specific segment, 493 // i.e. global_* or scratch_*. 494 static bool isSegmentSpecificFLAT(const MachineInstr &MI) { 495 auto Flags = MI.getDesc().TSFlags; 496 return (Flags & SIInstrFlags::FLAT) && !(Flags & SIInstrFlags::LGKM_CNT); 497 } 498 499 // FIXME: Make this more precise 500 static bool isFLATScratch(const MachineInstr &MI) { 501 return isSegmentSpecificFLAT(MI); 502 } 503 504 // Any FLAT encoded instruction, including global_* and scratch_*. 505 bool isFLAT(uint16_t Opcode) const { 506 return get(Opcode).TSFlags & SIInstrFlags::FLAT; 507 } 508 509 static bool isEXP(const MachineInstr &MI) { 510 return MI.getDesc().TSFlags & SIInstrFlags::EXP; 511 } 512 513 bool isEXP(uint16_t Opcode) const { 514 return get(Opcode).TSFlags & SIInstrFlags::EXP; 515 } 516 517 static bool isWQM(const MachineInstr &MI) { 518 return MI.getDesc().TSFlags & SIInstrFlags::WQM; 519 } 520 521 bool isWQM(uint16_t Opcode) const { 522 return get(Opcode).TSFlags & SIInstrFlags::WQM; 523 } 524 525 static bool isDisableWQM(const MachineInstr &MI) { 526 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; 527 } 528 529 bool isDisableWQM(uint16_t Opcode) const { 530 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; 531 } 532 533 static bool isVGPRSpill(const MachineInstr &MI) { 534 return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill; 535 } 536 537 bool isVGPRSpill(uint16_t Opcode) const { 538 return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; 539 } 540 541 static bool isSGPRSpill(const MachineInstr &MI) { 542 return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill; 543 } 544 545 bool isSGPRSpill(uint16_t Opcode) const { 546 return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill; 547 } 548 549 static bool isDPP(const MachineInstr &MI) { 550 return MI.getDesc().TSFlags & SIInstrFlags::DPP; 551 } 552 553 bool isDPP(uint16_t Opcode) const { 554 return get(Opcode).TSFlags & SIInstrFlags::DPP; 555 } 556 557 static bool isVOP3P(const MachineInstr &MI) { 558 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; 559 } 560 561 bool isVOP3P(uint16_t Opcode) const { 562 return get(Opcode).TSFlags & SIInstrFlags::VOP3P; 563 } 564 565 static bool isVINTRP(const MachineInstr &MI) { 566 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; 567 } 568 569 bool isVINTRP(uint16_t Opcode) const { 570 return get(Opcode).TSFlags & SIInstrFlags::VINTRP; 571 } 572 573 static bool isMAI(const MachineInstr &MI) { 574 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI; 575 } 576 577 bool isMAI(uint16_t Opcode) const { 578 return get(Opcode).TSFlags & SIInstrFlags::IsMAI; 579 } 580 581 static bool isScalarUnit(const MachineInstr &MI) { 582 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); 583 } 584 585 static bool usesVM_CNT(const MachineInstr &MI) { 586 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; 587 } 588 589 static bool usesLGKM_CNT(const MachineInstr &MI) { 590 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; 591 } 592 593 static bool sopkIsZext(const MachineInstr &MI) { 594 return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT; 595 } 596 597 bool sopkIsZext(uint16_t Opcode) const { 598 return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT; 599 } 600 601 /// \returns true if this is an s_store_dword* instruction. This is more 602 /// specific than than isSMEM && mayStore. 603 static bool isScalarStore(const MachineInstr &MI) { 604 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; 605 } 606 607 bool isScalarStore(uint16_t Opcode) const { 608 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; 609 } 610 611 static bool isFixedSize(const MachineInstr &MI) { 612 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; 613 } 614 615 bool isFixedSize(uint16_t Opcode) const { 616 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; 617 } 618 619 static bool hasFPClamp(const MachineInstr &MI) { 620 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; 621 } 622 623 bool hasFPClamp(uint16_t Opcode) const { 624 return get(Opcode).TSFlags & SIInstrFlags::FPClamp; 625 } 626 627 static bool hasIntClamp(const MachineInstr &MI) { 628 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp; 629 } 630 631 uint64_t getClampMask(const MachineInstr &MI) const { 632 const uint64_t ClampFlags = SIInstrFlags::FPClamp | 633 SIInstrFlags::IntClamp | 634 SIInstrFlags::ClampLo | 635 SIInstrFlags::ClampHi; 636 return MI.getDesc().TSFlags & ClampFlags; 637 } 638 639 static bool usesFPDPRounding(const MachineInstr &MI) { 640 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding; 641 } 642 643 bool usesFPDPRounding(uint16_t Opcode) const { 644 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; 645 } 646 647 static bool isFPAtomic(const MachineInstr &MI) { 648 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; 649 } 650 651 bool isFPAtomic(uint16_t Opcode) const { 652 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; 653 } 654 655 bool isVGPRCopy(const MachineInstr &MI) const { 656 assert(MI.isCopy()); 657 unsigned Dest = MI.getOperand(0).getReg(); 658 const MachineFunction &MF = *MI.getParent()->getParent(); 659 const MachineRegisterInfo &MRI = MF.getRegInfo(); 660 return !RI.isSGPRReg(MRI, Dest); 661 } 662 663 bool hasVGPRUses(const MachineInstr &MI) const { 664 const MachineFunction &MF = *MI.getParent()->getParent(); 665 const MachineRegisterInfo &MRI = MF.getRegInfo(); 666 return llvm::any_of(MI.explicit_uses(), 667 [&MRI, this](const MachineOperand &MO) { 668 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());}); 669 } 670 671 /// Whether we must prevent this instruction from executing with EXEC = 0. 672 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; 673 674 /// Returns true if the instruction could potentially depend on the value of 675 /// exec. If false, exec dependencies may safely be ignored. 676 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; 677 678 bool isInlineConstant(const APInt &Imm) const; 679 680 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; 681 682 bool isInlineConstant(const MachineOperand &MO, 683 const MCOperandInfo &OpInfo) const { 684 return isInlineConstant(MO, OpInfo.OperandType); 685 } 686 687 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would 688 /// be an inline immediate. 689 bool isInlineConstant(const MachineInstr &MI, 690 const MachineOperand &UseMO, 691 const MachineOperand &DefMO) const { 692 assert(UseMO.getParent() == &MI); 693 int OpIdx = MI.getOperandNo(&UseMO); 694 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) { 695 return false; 696 } 697 698 return isInlineConstant(DefMO, MI.getDesc().OpInfo[OpIdx]); 699 } 700 701 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline 702 /// immediate. 703 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const { 704 const MachineOperand &MO = MI.getOperand(OpIdx); 705 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType); 706 } 707 708 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, 709 const MachineOperand &MO) const { 710 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) 711 return false; 712 713 if (MI.isCopy()) { 714 unsigned Size = getOpSize(MI, OpIdx); 715 assert(Size == 8 || Size == 4); 716 717 uint8_t OpType = (Size == 8) ? 718 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32; 719 return isInlineConstant(MO, OpType); 720 } 721 722 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType); 723 } 724 725 bool isInlineConstant(const MachineOperand &MO) const { 726 const MachineInstr *Parent = MO.getParent(); 727 return isInlineConstant(*Parent, Parent->getOperandNo(&MO)); 728 } 729 730 bool isLiteralConstant(const MachineOperand &MO, 731 const MCOperandInfo &OpInfo) const { 732 return MO.isImm() && !isInlineConstant(MO, OpInfo.OperandType); 733 } 734 735 bool isLiteralConstant(const MachineInstr &MI, int OpIdx) const { 736 const MachineOperand &MO = MI.getOperand(OpIdx); 737 return MO.isImm() && !isInlineConstant(MI, OpIdx); 738 } 739 740 // Returns true if this operand could potentially require a 32-bit literal 741 // operand, but not necessarily. A FrameIndex for example could resolve to an 742 // inline immediate value that will not require an additional 4-bytes; this 743 // assumes that it will. 744 bool isLiteralConstantLike(const MachineOperand &MO, 745 const MCOperandInfo &OpInfo) const; 746 747 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 748 const MachineOperand &MO) const; 749 750 /// Return true if this 64-bit VALU instruction has a 32-bit encoding. 751 /// This function will return false if you pass it a 32-bit instruction. 752 bool hasVALU32BitEncoding(unsigned Opcode) const; 753 754 /// Returns true if this operand uses the constant bus. 755 bool usesConstantBus(const MachineRegisterInfo &MRI, 756 const MachineOperand &MO, 757 const MCOperandInfo &OpInfo) const; 758 759 /// Return true if this instruction has any modifiers. 760 /// e.g. src[012]_mod, omod, clamp. 761 bool hasModifiers(unsigned Opcode) const; 762 763 bool hasModifiersSet(const MachineInstr &MI, 764 unsigned OpName) const; 765 bool hasAnyModifiersSet(const MachineInstr &MI) const; 766 767 bool canShrink(const MachineInstr &MI, 768 const MachineRegisterInfo &MRI) const; 769 770 MachineInstr *buildShrunkInst(MachineInstr &MI, 771 unsigned NewOpcode) const; 772 773 bool verifyInstruction(const MachineInstr &MI, 774 StringRef &ErrInfo) const override; 775 776 unsigned getVALUOp(const MachineInstr &MI) const; 777 778 /// Return the correct register class for \p OpNo. For target-specific 779 /// instructions, this will return the register class that has been defined 780 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return 781 /// the register class of its machine operand. 782 /// to infer the correct register class base on the other operands. 783 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, 784 unsigned OpNo) const; 785 786 /// Return the size in bytes of the operand OpNo on the given 787 // instruction opcode. 788 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { 789 const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo]; 790 791 if (OpInfo.RegClass == -1) { 792 // If this is an immediate operand, this must be a 32-bit literal. 793 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); 794 return 4; 795 } 796 797 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; 798 } 799 800 /// This form should usually be preferred since it handles operands 801 /// with unknown register classes. 802 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { 803 const MachineOperand &MO = MI.getOperand(OpNo); 804 if (MO.isReg()) { 805 if (unsigned SubReg = MO.getSubReg()) { 806 assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg( 807 MI.getParent()->getParent()->getRegInfo(). 808 getRegClass(MO.getReg()), SubReg)) >= 32 && 809 "Sub-dword subregs are not supported"); 810 return RI.getSubRegIndexLaneMask(SubReg).getNumLanes() * 4; 811 } 812 } 813 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; 814 } 815 816 /// Legalize the \p OpIndex operand of this instruction by inserting 817 /// a MOV. For example: 818 /// ADD_I32_e32 VGPR0, 15 819 /// to 820 /// MOV VGPR1, 15 821 /// ADD_I32_e32 VGPR0, VGPR1 822 /// 823 /// If the operand being legalized is a register, then a COPY will be used 824 /// instead of MOV. 825 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; 826 827 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand 828 /// for \p MI. 829 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 830 const MachineOperand *MO = nullptr) const; 831 832 /// Check if \p MO would be a valid operand for the given operand 833 /// definition \p OpInfo. Note this does not attempt to validate constant bus 834 /// restrictions (e.g. literal constant usage). 835 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, 836 const MCOperandInfo &OpInfo, 837 const MachineOperand &MO) const; 838 839 /// Check if \p MO (a register operand) is a legal register for the 840 /// given operand description. 841 bool isLegalRegOperand(const MachineRegisterInfo &MRI, 842 const MCOperandInfo &OpInfo, 843 const MachineOperand &MO) const; 844 845 /// Legalize operands in \p MI by either commuting it or inserting a 846 /// copy of src1. 847 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; 848 849 /// Fix operands in \p MI to satisfy constant bus requirements. 850 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; 851 852 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only 853 /// be used when it is know that the value in SrcReg is same across all 854 /// threads in the wave. 855 /// \returns The SGPR register that \p SrcReg was copied to. 856 unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, 857 MachineRegisterInfo &MRI) const; 858 859 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; 860 861 void legalizeGenericOperand(MachineBasicBlock &InsertMBB, 862 MachineBasicBlock::iterator I, 863 const TargetRegisterClass *DstRC, 864 MachineOperand &Op, MachineRegisterInfo &MRI, 865 const DebugLoc &DL) const; 866 867 /// Legalize all operands in this instruction. This function may create new 868 /// instructions and control-flow around \p MI. If present, \p MDT is 869 /// updated. 870 void legalizeOperands(MachineInstr &MI, 871 MachineDominatorTree *MDT = nullptr) const; 872 873 /// Replace this instruction's opcode with the equivalent VALU 874 /// opcode. This function will also move the users of \p MI to the 875 /// VALU if necessary. If present, \p MDT is updated. 876 void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; 877 878 void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI, 879 int Count) const; 880 881 void insertNoop(MachineBasicBlock &MBB, 882 MachineBasicBlock::iterator MI) const override; 883 884 void insertReturn(MachineBasicBlock &MBB) const; 885 /// Return the number of wait states that result from executing this 886 /// instruction. 887 static unsigned getNumWaitStates(const MachineInstr &MI); 888 889 /// Returns the operand named \p Op. If \p MI does not have an 890 /// operand named \c Op, this function returns nullptr. 891 LLVM_READONLY 892 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; 893 894 LLVM_READONLY 895 const MachineOperand *getNamedOperand(const MachineInstr &MI, 896 unsigned OpName) const { 897 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName); 898 } 899 900 /// Get required immediate operand 901 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const { 902 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); 903 return MI.getOperand(Idx).getImm(); 904 } 905 906 uint64_t getDefaultRsrcDataFormat() const; 907 uint64_t getScratchRsrcWords23() const; 908 909 bool isLowLatencyInstruction(const MachineInstr &MI) const; 910 bool isHighLatencyInstruction(const MachineInstr &MI) const; 911 912 /// Return the descriptor of the target-specific machine instruction 913 /// that corresponds to the specified pseudo or native opcode. 914 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { 915 return get(pseudoToMCOpcode(Opcode)); 916 } 917 918 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; 919 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; 920 921 unsigned isLoadFromStackSlot(const MachineInstr &MI, 922 int &FrameIndex) const override; 923 unsigned isStoreToStackSlot(const MachineInstr &MI, 924 int &FrameIndex) const override; 925 926 unsigned getInstBundleSize(const MachineInstr &MI) const; 927 unsigned getInstSizeInBytes(const MachineInstr &MI) const override; 928 929 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; 930 931 bool isNonUniformBranchInstr(MachineInstr &Instr) const; 932 933 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, 934 MachineBasicBlock *IfEnd) const; 935 936 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, 937 MachineBasicBlock *LoopEnd) const; 938 939 std::pair<unsigned, unsigned> 940 decomposeMachineOperandsTargetFlags(unsigned TF) const override; 941 942 ArrayRef<std::pair<int, const char *>> 943 getSerializableTargetIndices() const override; 944 945 ArrayRef<std::pair<unsigned, const char *>> 946 getSerializableDirectMachineOperandTargetFlags() const override; 947 948 ScheduleHazardRecognizer * 949 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 950 const ScheduleDAG *DAG) const override; 951 952 ScheduleHazardRecognizer * 953 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; 954 955 bool isBasicBlockPrologue(const MachineInstr &MI) const override; 956 957 /// Return a partially built integer add instruction without carry. 958 /// Caller must add source operands. 959 /// For pre-GFX9 it will generate unused carry destination operand. 960 /// TODO: After GFX9 it should return a no-carry operation. 961 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 962 MachineBasicBlock::iterator I, 963 const DebugLoc &DL, 964 unsigned DestReg) const; 965 966 static bool isKillTerminator(unsigned Opcode); 967 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; 968 969 static bool isLegalMUBUFImmOffset(unsigned Imm) { 970 return isUInt<12>(Imm); 971 } 972 973 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT 974 /// encoded instruction. If \p Signed, this is for an instruction that 975 /// interprets the offset as signed. 976 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, 977 bool Signed) const; 978 979 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. 980 /// Return -1 if the target-specific opcode for the pseudo instruction does 981 /// not exist. If Opcode is not a pseudo instruction, this is identity. 982 int pseudoToMCOpcode(int Opcode) const; 983 984 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, 985 const TargetRegisterInfo *TRI, 986 const MachineFunction &MF) 987 const override { 988 if (OpNum >= TID.getNumOperands()) 989 return nullptr; 990 return RI.getRegClass(TID.OpInfo[OpNum].RegClass); 991 } 992 993 void fixImplicitOperands(MachineInstr &MI) const; 994 }; 995 996 /// \brief Returns true if a reg:subreg pair P has a TRC class 997 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, 998 const TargetRegisterClass &TRC, 999 MachineRegisterInfo &MRI) { 1000 auto *RC = MRI.getRegClass(P.Reg); 1001 if (!P.SubReg) 1002 return RC == &TRC; 1003 auto *TRI = MRI.getTargetRegisterInfo(); 1004 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg); 1005 } 1006 1007 /// \brief Create RegSubRegPair from a register MachineOperand 1008 inline 1009 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) { 1010 assert(O.isReg()); 1011 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg()); 1012 } 1013 1014 /// \brief Return the SubReg component from REG_SEQUENCE 1015 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, 1016 unsigned SubReg); 1017 1018 /// \brief Return the defining instruction for a given reg:subreg pair 1019 /// skipping copy like instructions and subreg-manipulation pseudos. 1020 /// Following another subreg of a reg:subreg isn't supported. 1021 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, 1022 MachineRegisterInfo &MRI); 1023 1024 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1025 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not 1026 /// attempt to track between blocks. 1027 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, 1028 Register VReg, 1029 const MachineInstr &DefMI, 1030 const MachineInstr &UseMI); 1031 1032 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1033 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to 1034 /// track between blocks. 1035 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, 1036 Register VReg, 1037 const MachineInstr &DefMI); 1038 1039 namespace AMDGPU { 1040 1041 LLVM_READONLY 1042 int getVOPe64(uint16_t Opcode); 1043 1044 LLVM_READONLY 1045 int getVOPe32(uint16_t Opcode); 1046 1047 LLVM_READONLY 1048 int getSDWAOp(uint16_t Opcode); 1049 1050 LLVM_READONLY 1051 int getDPPOp32(uint16_t Opcode); 1052 1053 LLVM_READONLY 1054 int getBasicFromSDWAOp(uint16_t Opcode); 1055 1056 LLVM_READONLY 1057 int getCommuteRev(uint16_t Opcode); 1058 1059 LLVM_READONLY 1060 int getCommuteOrig(uint16_t Opcode); 1061 1062 LLVM_READONLY 1063 int getAddr64Inst(uint16_t Opcode); 1064 1065 /// Check if \p Opcode is an Addr64 opcode. 1066 /// 1067 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1. 1068 LLVM_READONLY 1069 int getIfAddr64Inst(uint16_t Opcode); 1070 1071 LLVM_READONLY 1072 int getMUBUFNoLdsInst(uint16_t Opcode); 1073 1074 LLVM_READONLY 1075 int getAtomicRetOp(uint16_t Opcode); 1076 1077 LLVM_READONLY 1078 int getAtomicNoRetOp(uint16_t Opcode); 1079 1080 LLVM_READONLY 1081 int getSOPKOp(uint16_t Opcode); 1082 1083 LLVM_READONLY 1084 int getGlobalSaddrOp(uint16_t Opcode); 1085 1086 LLVM_READONLY 1087 int getVCMPXNoSDstOp(uint16_t Opcode); 1088 1089 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; 1090 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); 1091 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); 1092 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); 1093 1094 } // end namespace AMDGPU 1095 1096 namespace SI { 1097 namespace KernelInputOffsets { 1098 1099 /// Offsets in bytes from the start of the input buffer 1100 enum Offsets { 1101 NGROUPS_X = 0, 1102 NGROUPS_Y = 4, 1103 NGROUPS_Z = 8, 1104 GLOBAL_SIZE_X = 12, 1105 GLOBAL_SIZE_Y = 16, 1106 GLOBAL_SIZE_Z = 20, 1107 LOCAL_SIZE_X = 24, 1108 LOCAL_SIZE_Y = 28, 1109 LOCAL_SIZE_Z = 32 1110 }; 1111 1112 } // end namespace KernelInputOffsets 1113 } // end namespace SI 1114 1115 } // end namespace llvm 1116 1117 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 1118