1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition for SIInstrInfo. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 16 17 #include "AMDGPUInstrInfo.h" 18 #include "SIDefines.h" 19 #include "SIRegisterInfo.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineOperand.h" 28 #include "llvm/CodeGen/TargetSchedule.h" 29 #include "llvm/MC/MCInstrDesc.h" 30 #include "llvm/Support/Compiler.h" 31 #include <cassert> 32 #include <cstdint> 33 34 #define GET_INSTRINFO_HEADER 35 #include "AMDGPUGenInstrInfo.inc" 36 37 namespace llvm { 38 39 class APInt; 40 class MachineDominatorTree; 41 class MachineRegisterInfo; 42 class RegScavenger; 43 class GCNSubtarget; 44 class TargetRegisterClass; 45 46 class SIInstrInfo final : public AMDGPUGenInstrInfo { 47 private: 48 const SIRegisterInfo RI; 49 const GCNSubtarget &ST; 50 TargetSchedModel SchedModel; 51 52 // The inverse predicate should have the negative value. 53 enum BranchPredicate { 54 INVALID_BR = 0, 55 SCC_TRUE = 1, 56 SCC_FALSE = -1, 57 VCCNZ = 2, 58 VCCZ = -2, 59 EXECNZ = -3, 60 EXECZ = 3 61 }; 62 63 using SetVectorType = SmallSetVector<MachineInstr *, 32>; 64 65 static unsigned getBranchOpcode(BranchPredicate Cond); 66 static BranchPredicate getBranchPredicate(unsigned Opcode); 67 68 public: 69 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, 70 MachineRegisterInfo &MRI, 71 MachineOperand &SuperReg, 72 const TargetRegisterClass *SuperRC, 73 unsigned SubIdx, 74 const TargetRegisterClass *SubRC) const; 75 MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, 76 MachineRegisterInfo &MRI, 77 MachineOperand &SuperReg, 78 const TargetRegisterClass *SuperRC, 79 unsigned SubIdx, 80 const TargetRegisterClass *SubRC) const; 81 private: 82 void swapOperands(MachineInstr &Inst) const; 83 84 bool moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst, 85 MachineDominatorTree *MDT = nullptr) const; 86 87 void lowerScalarAbs(SetVectorType &Worklist, 88 MachineInstr &Inst) const; 89 90 void lowerScalarXnor(SetVectorType &Worklist, 91 MachineInstr &Inst) const; 92 93 void splitScalarNotBinop(SetVectorType &Worklist, 94 MachineInstr &Inst, 95 unsigned Opcode) const; 96 97 void splitScalarBinOpN2(SetVectorType &Worklist, 98 MachineInstr &Inst, 99 unsigned Opcode) const; 100 101 void splitScalar64BitUnaryOp(SetVectorType &Worklist, 102 MachineInstr &Inst, unsigned Opcode) const; 103 104 void splitScalar64BitAddSub(SetVectorType &Worklist, MachineInstr &Inst, 105 MachineDominatorTree *MDT = nullptr) const; 106 107 void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst, 108 unsigned Opcode, 109 MachineDominatorTree *MDT = nullptr) const; 110 111 void splitScalar64BitXnor(SetVectorType &Worklist, MachineInstr &Inst, 112 MachineDominatorTree *MDT = nullptr) const; 113 114 void splitScalar64BitBCNT(SetVectorType &Worklist, 115 MachineInstr &Inst) const; 116 void splitScalar64BitBFE(SetVectorType &Worklist, 117 MachineInstr &Inst) const; 118 void movePackToVALU(SetVectorType &Worklist, 119 MachineRegisterInfo &MRI, 120 MachineInstr &Inst) const; 121 122 void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI, 123 SetVectorType &Worklist) const; 124 125 void addSCCDefUsersToVALUWorklist(MachineOperand &Op, 126 MachineInstr &SCCDefInst, 127 SetVectorType &Worklist) const; 128 129 const TargetRegisterClass * 130 getDestEquivalentVGPRClass(const MachineInstr &Inst) const; 131 132 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, 133 const MachineInstr &MIb) const; 134 135 unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; 136 137 protected: 138 bool swapSourceModifiers(MachineInstr &MI, 139 MachineOperand &Src0, unsigned Src0OpName, 140 MachineOperand &Src1, unsigned Src1OpName) const; 141 142 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, 143 unsigned OpIdx0, 144 unsigned OpIdx1) const override; 145 146 public: 147 enum TargetOperandFlags { 148 MO_MASK = 0xf, 149 150 MO_NONE = 0, 151 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. 152 MO_GOTPCREL = 1, 153 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. 154 MO_GOTPCREL32 = 2, 155 MO_GOTPCREL32_LO = 2, 156 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. 157 MO_GOTPCREL32_HI = 3, 158 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. 159 MO_REL32 = 4, 160 MO_REL32_LO = 4, 161 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. 162 MO_REL32_HI = 5, 163 164 MO_LONG_BRANCH_FORWARD = 6, 165 MO_LONG_BRANCH_BACKWARD = 7, 166 167 MO_ABS32_LO = 8, 168 MO_ABS32_HI = 9, 169 }; 170 171 explicit SIInstrInfo(const GCNSubtarget &ST); 172 173 const SIRegisterInfo &getRegisterInfo() const { 174 return RI; 175 } 176 177 bool isReallyTriviallyReMaterializable(const MachineInstr &MI, 178 AAResults *AA) const override; 179 180 bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 181 int64_t &Offset1, 182 int64_t &Offset2) const override; 183 184 bool getMemOperandWithOffset(const MachineInstr &LdSt, 185 const MachineOperand *&BaseOp, 186 int64_t &Offset, 187 const TargetRegisterInfo *TRI) const final; 188 189 bool shouldClusterMemOps(const MachineOperand &BaseOp1, 190 const MachineOperand &BaseOp2, 191 unsigned NumLoads) const override; 192 193 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, 194 int64_t Offset1, unsigned NumLoads) const override; 195 196 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 197 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, 198 bool KillSrc) const override; 199 200 unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI, 201 RegScavenger *RS, unsigned TmpReg, 202 unsigned Offset, unsigned Size) const; 203 204 void materializeImmediate(MachineBasicBlock &MBB, 205 MachineBasicBlock::iterator MI, 206 const DebugLoc &DL, 207 unsigned DestReg, 208 int64_t Value) const; 209 210 const TargetRegisterClass *getPreferredSelectRegClass( 211 unsigned Size) const; 212 213 unsigned insertNE(MachineBasicBlock *MBB, 214 MachineBasicBlock::iterator I, const DebugLoc &DL, 215 unsigned SrcReg, int Value) const; 216 217 unsigned insertEQ(MachineBasicBlock *MBB, 218 MachineBasicBlock::iterator I, const DebugLoc &DL, 219 unsigned SrcReg, int Value) const; 220 221 void storeRegToStackSlot(MachineBasicBlock &MBB, 222 MachineBasicBlock::iterator MI, unsigned SrcReg, 223 bool isKill, int FrameIndex, 224 const TargetRegisterClass *RC, 225 const TargetRegisterInfo *TRI) const override; 226 227 void loadRegFromStackSlot(MachineBasicBlock &MBB, 228 MachineBasicBlock::iterator MI, unsigned DestReg, 229 int FrameIndex, const TargetRegisterClass *RC, 230 const TargetRegisterInfo *TRI) const override; 231 232 bool expandPostRAPseudo(MachineInstr &MI) const override; 233 234 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp 235 // instructions. Returns a pair of generated instructions. 236 // Can split either post-RA with physical registers or pre-RA with 237 // virtual registers. In latter case IR needs to be in SSA form and 238 // and a REG_SEQUENCE is produced to define original register. 239 std::pair<MachineInstr*, MachineInstr*> 240 expandMovDPP64(MachineInstr &MI) const; 241 242 // Returns an opcode that can be used to move a value to a \p DstRC 243 // register. If there is no hardware instruction that can store to \p 244 // DstRC, then AMDGPU::COPY is returned. 245 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; 246 247 LLVM_READONLY 248 int commuteOpcode(unsigned Opc) const; 249 250 LLVM_READONLY 251 inline int commuteOpcode(const MachineInstr &MI) const { 252 return commuteOpcode(MI.getOpcode()); 253 } 254 255 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, 256 unsigned &SrcOpIdx2) const override; 257 258 bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0, 259 unsigned & SrcOpIdx1) const; 260 261 bool isBranchOffsetInRange(unsigned BranchOpc, 262 int64_t BrOffset) const override; 263 264 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; 265 266 unsigned insertIndirectBranch(MachineBasicBlock &MBB, 267 MachineBasicBlock &NewDestBB, 268 const DebugLoc &DL, 269 int64_t BrOffset, 270 RegScavenger *RS = nullptr) const override; 271 272 bool analyzeBranchImpl(MachineBasicBlock &MBB, 273 MachineBasicBlock::iterator I, 274 MachineBasicBlock *&TBB, 275 MachineBasicBlock *&FBB, 276 SmallVectorImpl<MachineOperand> &Cond, 277 bool AllowModify) const; 278 279 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 280 MachineBasicBlock *&FBB, 281 SmallVectorImpl<MachineOperand> &Cond, 282 bool AllowModify = false) const override; 283 284 unsigned removeBranch(MachineBasicBlock &MBB, 285 int *BytesRemoved = nullptr) const override; 286 287 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 288 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, 289 const DebugLoc &DL, 290 int *BytesAdded = nullptr) const override; 291 292 bool reverseBranchCondition( 293 SmallVectorImpl<MachineOperand> &Cond) const override; 294 295 bool canInsertSelect(const MachineBasicBlock &MBB, 296 ArrayRef<MachineOperand> Cond, 297 unsigned TrueReg, unsigned FalseReg, 298 int &CondCycles, 299 int &TrueCycles, int &FalseCycles) const override; 300 301 void insertSelect(MachineBasicBlock &MBB, 302 MachineBasicBlock::iterator I, const DebugLoc &DL, 303 unsigned DstReg, ArrayRef<MachineOperand> Cond, 304 unsigned TrueReg, unsigned FalseReg) const override; 305 306 void insertVectorSelect(MachineBasicBlock &MBB, 307 MachineBasicBlock::iterator I, const DebugLoc &DL, 308 unsigned DstReg, ArrayRef<MachineOperand> Cond, 309 unsigned TrueReg, unsigned FalseReg) const; 310 311 unsigned getAddressSpaceForPseudoSourceKind( 312 unsigned Kind) const override; 313 314 bool 315 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, 316 const MachineInstr &MIb) const override; 317 318 bool isFoldableCopy(const MachineInstr &MI) const; 319 320 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, 321 MachineRegisterInfo *MRI) const final; 322 323 unsigned getMachineCSELookAheadLimit() const override { return 500; } 324 325 MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB, 326 MachineInstr &MI, 327 LiveVariables *LV) const override; 328 329 bool isSchedulingBoundary(const MachineInstr &MI, 330 const MachineBasicBlock *MBB, 331 const MachineFunction &MF) const override; 332 333 static bool isSALU(const MachineInstr &MI) { 334 return MI.getDesc().TSFlags & SIInstrFlags::SALU; 335 } 336 337 bool isSALU(uint16_t Opcode) const { 338 return get(Opcode).TSFlags & SIInstrFlags::SALU; 339 } 340 341 static bool isVALU(const MachineInstr &MI) { 342 return MI.getDesc().TSFlags & SIInstrFlags::VALU; 343 } 344 345 bool isVALU(uint16_t Opcode) const { 346 return get(Opcode).TSFlags & SIInstrFlags::VALU; 347 } 348 349 static bool isVMEM(const MachineInstr &MI) { 350 return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI); 351 } 352 353 bool isVMEM(uint16_t Opcode) const { 354 return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode); 355 } 356 357 static bool isSOP1(const MachineInstr &MI) { 358 return MI.getDesc().TSFlags & SIInstrFlags::SOP1; 359 } 360 361 bool isSOP1(uint16_t Opcode) const { 362 return get(Opcode).TSFlags & SIInstrFlags::SOP1; 363 } 364 365 static bool isSOP2(const MachineInstr &MI) { 366 return MI.getDesc().TSFlags & SIInstrFlags::SOP2; 367 } 368 369 bool isSOP2(uint16_t Opcode) const { 370 return get(Opcode).TSFlags & SIInstrFlags::SOP2; 371 } 372 373 static bool isSOPC(const MachineInstr &MI) { 374 return MI.getDesc().TSFlags & SIInstrFlags::SOPC; 375 } 376 377 bool isSOPC(uint16_t Opcode) const { 378 return get(Opcode).TSFlags & SIInstrFlags::SOPC; 379 } 380 381 static bool isSOPK(const MachineInstr &MI) { 382 return MI.getDesc().TSFlags & SIInstrFlags::SOPK; 383 } 384 385 bool isSOPK(uint16_t Opcode) const { 386 return get(Opcode).TSFlags & SIInstrFlags::SOPK; 387 } 388 389 static bool isSOPP(const MachineInstr &MI) { 390 return MI.getDesc().TSFlags & SIInstrFlags::SOPP; 391 } 392 393 bool isSOPP(uint16_t Opcode) const { 394 return get(Opcode).TSFlags & SIInstrFlags::SOPP; 395 } 396 397 static bool isPacked(const MachineInstr &MI) { 398 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked; 399 } 400 401 bool isPacked(uint16_t Opcode) const { 402 return get(Opcode).TSFlags & SIInstrFlags::IsPacked; 403 } 404 405 static bool isVOP1(const MachineInstr &MI) { 406 return MI.getDesc().TSFlags & SIInstrFlags::VOP1; 407 } 408 409 bool isVOP1(uint16_t Opcode) const { 410 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 411 } 412 413 static bool isVOP2(const MachineInstr &MI) { 414 return MI.getDesc().TSFlags & SIInstrFlags::VOP2; 415 } 416 417 bool isVOP2(uint16_t Opcode) const { 418 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 419 } 420 421 static bool isVOP3(const MachineInstr &MI) { 422 return MI.getDesc().TSFlags & SIInstrFlags::VOP3; 423 } 424 425 bool isVOP3(uint16_t Opcode) const { 426 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 427 } 428 429 static bool isSDWA(const MachineInstr &MI) { 430 return MI.getDesc().TSFlags & SIInstrFlags::SDWA; 431 } 432 433 bool isSDWA(uint16_t Opcode) const { 434 return get(Opcode).TSFlags & SIInstrFlags::SDWA; 435 } 436 437 static bool isVOPC(const MachineInstr &MI) { 438 return MI.getDesc().TSFlags & SIInstrFlags::VOPC; 439 } 440 441 bool isVOPC(uint16_t Opcode) const { 442 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 443 } 444 445 static bool isMUBUF(const MachineInstr &MI) { 446 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; 447 } 448 449 bool isMUBUF(uint16_t Opcode) const { 450 return get(Opcode).TSFlags & SIInstrFlags::MUBUF; 451 } 452 453 static bool isMTBUF(const MachineInstr &MI) { 454 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; 455 } 456 457 bool isMTBUF(uint16_t Opcode) const { 458 return get(Opcode).TSFlags & SIInstrFlags::MTBUF; 459 } 460 461 static bool isSMRD(const MachineInstr &MI) { 462 return MI.getDesc().TSFlags & SIInstrFlags::SMRD; 463 } 464 465 bool isSMRD(uint16_t Opcode) const { 466 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 467 } 468 469 bool isBufferSMRD(const MachineInstr &MI) const; 470 471 static bool isDS(const MachineInstr &MI) { 472 return MI.getDesc().TSFlags & SIInstrFlags::DS; 473 } 474 475 bool isDS(uint16_t Opcode) const { 476 return get(Opcode).TSFlags & SIInstrFlags::DS; 477 } 478 479 bool isAlwaysGDS(uint16_t Opcode) const; 480 481 static bool isMIMG(const MachineInstr &MI) { 482 return MI.getDesc().TSFlags & SIInstrFlags::MIMG; 483 } 484 485 bool isMIMG(uint16_t Opcode) const { 486 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 487 } 488 489 static bool isGather4(const MachineInstr &MI) { 490 return MI.getDesc().TSFlags & SIInstrFlags::Gather4; 491 } 492 493 bool isGather4(uint16_t Opcode) const { 494 return get(Opcode).TSFlags & SIInstrFlags::Gather4; 495 } 496 497 static bool isFLAT(const MachineInstr &MI) { 498 return MI.getDesc().TSFlags & SIInstrFlags::FLAT; 499 } 500 501 // Is a FLAT encoded instruction which accesses a specific segment, 502 // i.e. global_* or scratch_*. 503 static bool isSegmentSpecificFLAT(const MachineInstr &MI) { 504 auto Flags = MI.getDesc().TSFlags; 505 return (Flags & SIInstrFlags::FLAT) && !(Flags & SIInstrFlags::LGKM_CNT); 506 } 507 508 // FIXME: Make this more precise 509 static bool isFLATScratch(const MachineInstr &MI) { 510 return isSegmentSpecificFLAT(MI); 511 } 512 513 // Any FLAT encoded instruction, including global_* and scratch_*. 514 bool isFLAT(uint16_t Opcode) const { 515 return get(Opcode).TSFlags & SIInstrFlags::FLAT; 516 } 517 518 static bool isEXP(const MachineInstr &MI) { 519 return MI.getDesc().TSFlags & SIInstrFlags::EXP; 520 } 521 522 bool isEXP(uint16_t Opcode) const { 523 return get(Opcode).TSFlags & SIInstrFlags::EXP; 524 } 525 526 static bool isWQM(const MachineInstr &MI) { 527 return MI.getDesc().TSFlags & SIInstrFlags::WQM; 528 } 529 530 bool isWQM(uint16_t Opcode) const { 531 return get(Opcode).TSFlags & SIInstrFlags::WQM; 532 } 533 534 static bool isDisableWQM(const MachineInstr &MI) { 535 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; 536 } 537 538 bool isDisableWQM(uint16_t Opcode) const { 539 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; 540 } 541 542 static bool isVGPRSpill(const MachineInstr &MI) { 543 return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill; 544 } 545 546 bool isVGPRSpill(uint16_t Opcode) const { 547 return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; 548 } 549 550 static bool isSGPRSpill(const MachineInstr &MI) { 551 return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill; 552 } 553 554 bool isSGPRSpill(uint16_t Opcode) const { 555 return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill; 556 } 557 558 static bool isDPP(const MachineInstr &MI) { 559 return MI.getDesc().TSFlags & SIInstrFlags::DPP; 560 } 561 562 bool isDPP(uint16_t Opcode) const { 563 return get(Opcode).TSFlags & SIInstrFlags::DPP; 564 } 565 566 static bool isVOP3P(const MachineInstr &MI) { 567 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; 568 } 569 570 bool isVOP3P(uint16_t Opcode) const { 571 return get(Opcode).TSFlags & SIInstrFlags::VOP3P; 572 } 573 574 static bool isVINTRP(const MachineInstr &MI) { 575 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; 576 } 577 578 bool isVINTRP(uint16_t Opcode) const { 579 return get(Opcode).TSFlags & SIInstrFlags::VINTRP; 580 } 581 582 static bool isMAI(const MachineInstr &MI) { 583 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI; 584 } 585 586 bool isMAI(uint16_t Opcode) const { 587 return get(Opcode).TSFlags & SIInstrFlags::IsMAI; 588 } 589 590 static bool isDOT(const MachineInstr &MI) { 591 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; 592 } 593 594 bool isDOT(uint16_t Opcode) const { 595 return get(Opcode).TSFlags & SIInstrFlags::IsDOT; 596 } 597 598 static bool isScalarUnit(const MachineInstr &MI) { 599 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); 600 } 601 602 static bool usesVM_CNT(const MachineInstr &MI) { 603 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; 604 } 605 606 static bool usesLGKM_CNT(const MachineInstr &MI) { 607 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; 608 } 609 610 static bool sopkIsZext(const MachineInstr &MI) { 611 return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT; 612 } 613 614 bool sopkIsZext(uint16_t Opcode) const { 615 return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT; 616 } 617 618 /// \returns true if this is an s_store_dword* instruction. This is more 619 /// specific than than isSMEM && mayStore. 620 static bool isScalarStore(const MachineInstr &MI) { 621 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; 622 } 623 624 bool isScalarStore(uint16_t Opcode) const { 625 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; 626 } 627 628 static bool isFixedSize(const MachineInstr &MI) { 629 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; 630 } 631 632 bool isFixedSize(uint16_t Opcode) const { 633 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; 634 } 635 636 static bool hasFPClamp(const MachineInstr &MI) { 637 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; 638 } 639 640 bool hasFPClamp(uint16_t Opcode) const { 641 return get(Opcode).TSFlags & SIInstrFlags::FPClamp; 642 } 643 644 static bool hasIntClamp(const MachineInstr &MI) { 645 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp; 646 } 647 648 uint64_t getClampMask(const MachineInstr &MI) const { 649 const uint64_t ClampFlags = SIInstrFlags::FPClamp | 650 SIInstrFlags::IntClamp | 651 SIInstrFlags::ClampLo | 652 SIInstrFlags::ClampHi; 653 return MI.getDesc().TSFlags & ClampFlags; 654 } 655 656 static bool usesFPDPRounding(const MachineInstr &MI) { 657 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding; 658 } 659 660 bool usesFPDPRounding(uint16_t Opcode) const { 661 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; 662 } 663 664 static bool isFPAtomic(const MachineInstr &MI) { 665 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; 666 } 667 668 bool isFPAtomic(uint16_t Opcode) const { 669 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; 670 } 671 672 bool isVGPRCopy(const MachineInstr &MI) const { 673 assert(MI.isCopy()); 674 unsigned Dest = MI.getOperand(0).getReg(); 675 const MachineFunction &MF = *MI.getParent()->getParent(); 676 const MachineRegisterInfo &MRI = MF.getRegInfo(); 677 return !RI.isSGPRReg(MRI, Dest); 678 } 679 680 bool hasVGPRUses(const MachineInstr &MI) const { 681 const MachineFunction &MF = *MI.getParent()->getParent(); 682 const MachineRegisterInfo &MRI = MF.getRegInfo(); 683 return llvm::any_of(MI.explicit_uses(), 684 [&MRI, this](const MachineOperand &MO) { 685 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());}); 686 } 687 688 /// Whether we must prevent this instruction from executing with EXEC = 0. 689 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; 690 691 /// Returns true if the instruction could potentially depend on the value of 692 /// exec. If false, exec dependencies may safely be ignored. 693 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; 694 695 bool isInlineConstant(const APInt &Imm) const; 696 697 bool isInlineConstant(const APFloat &Imm) const { 698 return isInlineConstant(Imm.bitcastToAPInt()); 699 } 700 701 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; 702 703 bool isInlineConstant(const MachineOperand &MO, 704 const MCOperandInfo &OpInfo) const { 705 return isInlineConstant(MO, OpInfo.OperandType); 706 } 707 708 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would 709 /// be an inline immediate. 710 bool isInlineConstant(const MachineInstr &MI, 711 const MachineOperand &UseMO, 712 const MachineOperand &DefMO) const { 713 assert(UseMO.getParent() == &MI); 714 int OpIdx = MI.getOperandNo(&UseMO); 715 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) { 716 return false; 717 } 718 719 return isInlineConstant(DefMO, MI.getDesc().OpInfo[OpIdx]); 720 } 721 722 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline 723 /// immediate. 724 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const { 725 const MachineOperand &MO = MI.getOperand(OpIdx); 726 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType); 727 } 728 729 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, 730 const MachineOperand &MO) const { 731 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) 732 return false; 733 734 if (MI.isCopy()) { 735 unsigned Size = getOpSize(MI, OpIdx); 736 assert(Size == 8 || Size == 4); 737 738 uint8_t OpType = (Size == 8) ? 739 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32; 740 return isInlineConstant(MO, OpType); 741 } 742 743 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType); 744 } 745 746 bool isInlineConstant(const MachineOperand &MO) const { 747 const MachineInstr *Parent = MO.getParent(); 748 return isInlineConstant(*Parent, Parent->getOperandNo(&MO)); 749 } 750 751 bool isLiteralConstant(const MachineOperand &MO, 752 const MCOperandInfo &OpInfo) const { 753 return MO.isImm() && !isInlineConstant(MO, OpInfo.OperandType); 754 } 755 756 bool isLiteralConstant(const MachineInstr &MI, int OpIdx) const { 757 const MachineOperand &MO = MI.getOperand(OpIdx); 758 return MO.isImm() && !isInlineConstant(MI, OpIdx); 759 } 760 761 // Returns true if this operand could potentially require a 32-bit literal 762 // operand, but not necessarily. A FrameIndex for example could resolve to an 763 // inline immediate value that will not require an additional 4-bytes; this 764 // assumes that it will. 765 bool isLiteralConstantLike(const MachineOperand &MO, 766 const MCOperandInfo &OpInfo) const; 767 768 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 769 const MachineOperand &MO) const; 770 771 /// Return true if this 64-bit VALU instruction has a 32-bit encoding. 772 /// This function will return false if you pass it a 32-bit instruction. 773 bool hasVALU32BitEncoding(unsigned Opcode) const; 774 775 /// Returns true if this operand uses the constant bus. 776 bool usesConstantBus(const MachineRegisterInfo &MRI, 777 const MachineOperand &MO, 778 const MCOperandInfo &OpInfo) const; 779 780 /// Return true if this instruction has any modifiers. 781 /// e.g. src[012]_mod, omod, clamp. 782 bool hasModifiers(unsigned Opcode) const; 783 784 bool hasModifiersSet(const MachineInstr &MI, 785 unsigned OpName) const; 786 bool hasAnyModifiersSet(const MachineInstr &MI) const; 787 788 bool canShrink(const MachineInstr &MI, 789 const MachineRegisterInfo &MRI) const; 790 791 MachineInstr *buildShrunkInst(MachineInstr &MI, 792 unsigned NewOpcode) const; 793 794 bool verifyInstruction(const MachineInstr &MI, 795 StringRef &ErrInfo) const override; 796 797 unsigned getVALUOp(const MachineInstr &MI) const; 798 799 /// Return the correct register class for \p OpNo. For target-specific 800 /// instructions, this will return the register class that has been defined 801 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return 802 /// the register class of its machine operand. 803 /// to infer the correct register class base on the other operands. 804 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, 805 unsigned OpNo) const; 806 807 /// Return the size in bytes of the operand OpNo on the given 808 // instruction opcode. 809 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { 810 const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo]; 811 812 if (OpInfo.RegClass == -1) { 813 // If this is an immediate operand, this must be a 32-bit literal. 814 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); 815 return 4; 816 } 817 818 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; 819 } 820 821 /// This form should usually be preferred since it handles operands 822 /// with unknown register classes. 823 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { 824 const MachineOperand &MO = MI.getOperand(OpNo); 825 if (MO.isReg()) { 826 if (unsigned SubReg = MO.getSubReg()) { 827 assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg( 828 MI.getParent()->getParent()->getRegInfo(). 829 getRegClass(MO.getReg()), SubReg)) >= 32 && 830 "Sub-dword subregs are not supported"); 831 return RI.getSubRegIndexLaneMask(SubReg).getNumLanes() * 4; 832 } 833 } 834 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; 835 } 836 837 /// Legalize the \p OpIndex operand of this instruction by inserting 838 /// a MOV. For example: 839 /// ADD_I32_e32 VGPR0, 15 840 /// to 841 /// MOV VGPR1, 15 842 /// ADD_I32_e32 VGPR0, VGPR1 843 /// 844 /// If the operand being legalized is a register, then a COPY will be used 845 /// instead of MOV. 846 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; 847 848 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand 849 /// for \p MI. 850 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 851 const MachineOperand *MO = nullptr) const; 852 853 /// Check if \p MO would be a valid operand for the given operand 854 /// definition \p OpInfo. Note this does not attempt to validate constant bus 855 /// restrictions (e.g. literal constant usage). 856 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, 857 const MCOperandInfo &OpInfo, 858 const MachineOperand &MO) const; 859 860 /// Check if \p MO (a register operand) is a legal register for the 861 /// given operand description. 862 bool isLegalRegOperand(const MachineRegisterInfo &MRI, 863 const MCOperandInfo &OpInfo, 864 const MachineOperand &MO) const; 865 866 /// Legalize operands in \p MI by either commuting it or inserting a 867 /// copy of src1. 868 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; 869 870 /// Fix operands in \p MI to satisfy constant bus requirements. 871 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; 872 873 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only 874 /// be used when it is know that the value in SrcReg is same across all 875 /// threads in the wave. 876 /// \returns The SGPR register that \p SrcReg was copied to. 877 unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, 878 MachineRegisterInfo &MRI) const; 879 880 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; 881 882 void legalizeGenericOperand(MachineBasicBlock &InsertMBB, 883 MachineBasicBlock::iterator I, 884 const TargetRegisterClass *DstRC, 885 MachineOperand &Op, MachineRegisterInfo &MRI, 886 const DebugLoc &DL) const; 887 888 /// Legalize all operands in this instruction. This function may create new 889 /// instructions and control-flow around \p MI. If present, \p MDT is 890 /// updated. 891 void legalizeOperands(MachineInstr &MI, 892 MachineDominatorTree *MDT = nullptr) const; 893 894 /// Replace this instruction's opcode with the equivalent VALU 895 /// opcode. This function will also move the users of \p MI to the 896 /// VALU if necessary. If present, \p MDT is updated. 897 void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; 898 899 void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI, 900 int Count) const; 901 902 void insertNoop(MachineBasicBlock &MBB, 903 MachineBasicBlock::iterator MI) const override; 904 905 void insertReturn(MachineBasicBlock &MBB) const; 906 /// Return the number of wait states that result from executing this 907 /// instruction. 908 static unsigned getNumWaitStates(const MachineInstr &MI); 909 910 /// Returns the operand named \p Op. If \p MI does not have an 911 /// operand named \c Op, this function returns nullptr. 912 LLVM_READONLY 913 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; 914 915 LLVM_READONLY 916 const MachineOperand *getNamedOperand(const MachineInstr &MI, 917 unsigned OpName) const { 918 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName); 919 } 920 921 /// Get required immediate operand 922 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const { 923 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName); 924 return MI.getOperand(Idx).getImm(); 925 } 926 927 uint64_t getDefaultRsrcDataFormat() const; 928 uint64_t getScratchRsrcWords23() const; 929 930 bool isLowLatencyInstruction(const MachineInstr &MI) const; 931 bool isHighLatencyInstruction(const MachineInstr &MI) const; 932 933 /// Return the descriptor of the target-specific machine instruction 934 /// that corresponds to the specified pseudo or native opcode. 935 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { 936 return get(pseudoToMCOpcode(Opcode)); 937 } 938 939 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; 940 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; 941 942 unsigned isLoadFromStackSlot(const MachineInstr &MI, 943 int &FrameIndex) const override; 944 unsigned isStoreToStackSlot(const MachineInstr &MI, 945 int &FrameIndex) const override; 946 947 unsigned getInstBundleSize(const MachineInstr &MI) const; 948 unsigned getInstSizeInBytes(const MachineInstr &MI) const override; 949 950 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; 951 952 bool isNonUniformBranchInstr(MachineInstr &Instr) const; 953 954 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, 955 MachineBasicBlock *IfEnd) const; 956 957 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, 958 MachineBasicBlock *LoopEnd) const; 959 960 std::pair<unsigned, unsigned> 961 decomposeMachineOperandsTargetFlags(unsigned TF) const override; 962 963 ArrayRef<std::pair<int, const char *>> 964 getSerializableTargetIndices() const override; 965 966 ArrayRef<std::pair<unsigned, const char *>> 967 getSerializableDirectMachineOperandTargetFlags() const override; 968 969 ScheduleHazardRecognizer * 970 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 971 const ScheduleDAG *DAG) const override; 972 973 ScheduleHazardRecognizer * 974 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; 975 976 bool isBasicBlockPrologue(const MachineInstr &MI) const override; 977 978 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, 979 MachineBasicBlock::iterator InsPt, 980 const DebugLoc &DL, Register Src, 981 Register Dst) const override; 982 983 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, 984 MachineBasicBlock::iterator InsPt, 985 const DebugLoc &DL, Register Src, 986 unsigned SrcSubReg, 987 Register Dst) const override; 988 989 bool isWave32() const; 990 991 /// Return a partially built integer add instruction without carry. 992 /// Caller must add source operands. 993 /// For pre-GFX9 it will generate unused carry destination operand. 994 /// TODO: After GFX9 it should return a no-carry operation. 995 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 996 MachineBasicBlock::iterator I, 997 const DebugLoc &DL, 998 unsigned DestReg) const; 999 1000 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, 1001 MachineBasicBlock::iterator I, 1002 const DebugLoc &DL, 1003 Register DestReg, 1004 RegScavenger &RS) const; 1005 1006 static bool isKillTerminator(unsigned Opcode); 1007 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; 1008 1009 static bool isLegalMUBUFImmOffset(unsigned Imm) { 1010 return isUInt<12>(Imm); 1011 } 1012 1013 unsigned getNumFlatOffsetBits(unsigned AddrSpace, bool Signed) const; 1014 1015 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT 1016 /// encoded instruction. If \p Signed, this is for an instruction that 1017 /// interprets the offset as signed. 1018 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, 1019 bool Signed) const; 1020 1021 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. 1022 /// Return -1 if the target-specific opcode for the pseudo instruction does 1023 /// not exist. If Opcode is not a pseudo instruction, this is identity. 1024 int pseudoToMCOpcode(int Opcode) const; 1025 1026 /// \brief Check if this instruction should only be used by assembler. 1027 /// Return true if this opcode should not be used by codegen. 1028 bool isAsmOnlyOpcode(int MCOp) const; 1029 1030 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, 1031 const TargetRegisterInfo *TRI, 1032 const MachineFunction &MF) 1033 const override { 1034 if (OpNum >= TID.getNumOperands()) 1035 return nullptr; 1036 return RI.getRegClass(TID.OpInfo[OpNum].RegClass); 1037 } 1038 1039 void fixImplicitOperands(MachineInstr &MI) const; 1040 1041 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, 1042 ArrayRef<unsigned> Ops, 1043 MachineBasicBlock::iterator InsertPt, 1044 int FrameIndex, 1045 LiveIntervals *LIS = nullptr, 1046 VirtRegMap *VRM = nullptr) const override; 1047 1048 unsigned getInstrLatency(const InstrItineraryData *ItinData, 1049 const MachineInstr &MI, 1050 unsigned *PredCost = nullptr) const override; 1051 }; 1052 1053 /// \brief Returns true if a reg:subreg pair P has a TRC class 1054 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, 1055 const TargetRegisterClass &TRC, 1056 MachineRegisterInfo &MRI) { 1057 auto *RC = MRI.getRegClass(P.Reg); 1058 if (!P.SubReg) 1059 return RC == &TRC; 1060 auto *TRI = MRI.getTargetRegisterInfo(); 1061 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg); 1062 } 1063 1064 /// \brief Create RegSubRegPair from a register MachineOperand 1065 inline 1066 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) { 1067 assert(O.isReg()); 1068 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg()); 1069 } 1070 1071 /// \brief Return the SubReg component from REG_SEQUENCE 1072 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, 1073 unsigned SubReg); 1074 1075 /// \brief Return the defining instruction for a given reg:subreg pair 1076 /// skipping copy like instructions and subreg-manipulation pseudos. 1077 /// Following another subreg of a reg:subreg isn't supported. 1078 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, 1079 MachineRegisterInfo &MRI); 1080 1081 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1082 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not 1083 /// attempt to track between blocks. 1084 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, 1085 Register VReg, 1086 const MachineInstr &DefMI, 1087 const MachineInstr &UseMI); 1088 1089 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p 1090 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to 1091 /// track between blocks. 1092 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, 1093 Register VReg, 1094 const MachineInstr &DefMI); 1095 1096 namespace AMDGPU { 1097 1098 LLVM_READONLY 1099 int getVOPe64(uint16_t Opcode); 1100 1101 LLVM_READONLY 1102 int getVOPe32(uint16_t Opcode); 1103 1104 LLVM_READONLY 1105 int getSDWAOp(uint16_t Opcode); 1106 1107 LLVM_READONLY 1108 int getDPPOp32(uint16_t Opcode); 1109 1110 LLVM_READONLY 1111 int getBasicFromSDWAOp(uint16_t Opcode); 1112 1113 LLVM_READONLY 1114 int getCommuteRev(uint16_t Opcode); 1115 1116 LLVM_READONLY 1117 int getCommuteOrig(uint16_t Opcode); 1118 1119 LLVM_READONLY 1120 int getAddr64Inst(uint16_t Opcode); 1121 1122 /// Check if \p Opcode is an Addr64 opcode. 1123 /// 1124 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1. 1125 LLVM_READONLY 1126 int getIfAddr64Inst(uint16_t Opcode); 1127 1128 LLVM_READONLY 1129 int getMUBUFNoLdsInst(uint16_t Opcode); 1130 1131 LLVM_READONLY 1132 int getAtomicRetOp(uint16_t Opcode); 1133 1134 LLVM_READONLY 1135 int getAtomicNoRetOp(uint16_t Opcode); 1136 1137 LLVM_READONLY 1138 int getSOPKOp(uint16_t Opcode); 1139 1140 LLVM_READONLY 1141 int getGlobalSaddrOp(uint16_t Opcode); 1142 1143 LLVM_READONLY 1144 int getVCMPXNoSDstOp(uint16_t Opcode); 1145 1146 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; 1147 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); 1148 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); 1149 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); 1150 1151 } // end namespace AMDGPU 1152 1153 namespace SI { 1154 namespace KernelInputOffsets { 1155 1156 /// Offsets in bytes from the start of the input buffer 1157 enum Offsets { 1158 NGROUPS_X = 0, 1159 NGROUPS_Y = 4, 1160 NGROUPS_Z = 8, 1161 GLOBAL_SIZE_X = 12, 1162 GLOBAL_SIZE_Y = 16, 1163 GLOBAL_SIZE_Z = 20, 1164 LOCAL_SIZE_X = 24, 1165 LOCAL_SIZE_Y = 28, 1166 LOCAL_SIZE_Z = 32 1167 }; 1168 1169 } // end namespace KernelInputOffsets 1170 } // end namespace SI 1171 1172 } // end namespace llvm 1173 1174 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H 1175