xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIInstrInfo.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16 
17 #include "AMDGPUMIRFormatter.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIRegisterInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
24 
25 #define GET_INSTRINFO_HEADER
26 #include "AMDGPUGenInstrInfo.inc"
27 
28 namespace llvm {
29 
30 class APInt;
31 class GCNSubtarget;
32 class LiveVariables;
33 class MachineDominatorTree;
34 class MachineRegisterInfo;
35 class RegScavenger;
36 class TargetRegisterClass;
37 class ScheduleHazardRecognizer;
38 
39 constexpr unsigned DefaultMemoryClusterDWordsLimit = 8;
40 
41 /// Mark the MMO of a uniform load if there are no potentially clobbering stores
42 /// on any path from the start of an entry function to this load.
43 static const MachineMemOperand::Flags MONoClobber =
44     MachineMemOperand::MOTargetFlag1;
45 
46 /// Mark the MMO of a load as the last use.
47 static const MachineMemOperand::Flags MOLastUse =
48     MachineMemOperand::MOTargetFlag2;
49 
50 /// Utility to store machine instructions worklist.
51 struct SIInstrWorklist {
52   SIInstrWorklist() = default;
53 
54   void insert(MachineInstr *MI);
55 
topSIInstrWorklist56   MachineInstr *top() const {
57     const auto *iter = InstrList.begin();
58     return *iter;
59   }
60 
erase_topSIInstrWorklist61   void erase_top() {
62     const auto *iter = InstrList.begin();
63     InstrList.erase(iter);
64   }
65 
emptySIInstrWorklist66   bool empty() const { return InstrList.empty(); }
67 
clearSIInstrWorklist68   void clear() {
69     InstrList.clear();
70     DeferredList.clear();
71   }
72 
73   bool isDeferred(MachineInstr *MI);
74 
getDeferredListSIInstrWorklist75   SetVector<MachineInstr *> &getDeferredList() { return DeferredList; }
76 
77 private:
78   /// InstrList contains the MachineInstrs.
79   SetVector<MachineInstr *> InstrList;
80   /// Deferred instructions are specific MachineInstr
81   /// that will be added by insert method.
82   SetVector<MachineInstr *> DeferredList;
83 };
84 
85 class SIInstrInfo final : public AMDGPUGenInstrInfo {
86 private:
87   const SIRegisterInfo RI;
88   const GCNSubtarget &ST;
89   TargetSchedModel SchedModel;
90   mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
91 
92   // The inverse predicate should have the negative value.
93   enum BranchPredicate {
94     INVALID_BR = 0,
95     SCC_TRUE = 1,
96     SCC_FALSE = -1,
97     VCCNZ = 2,
98     VCCZ = -2,
99     EXECNZ = -3,
100     EXECZ = 3
101   };
102 
103   using SetVectorType = SmallSetVector<MachineInstr *, 32>;
104 
105   static unsigned getBranchOpcode(BranchPredicate Cond);
106   static BranchPredicate getBranchPredicate(unsigned Opcode);
107 
108 public:
109   unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
110                               MachineRegisterInfo &MRI,
111                               const MachineOperand &SuperReg,
112                               const TargetRegisterClass *SuperRC,
113                               unsigned SubIdx,
114                               const TargetRegisterClass *SubRC) const;
115   MachineOperand buildExtractSubRegOrImm(
116       MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI,
117       const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC,
118       unsigned SubIdx, const TargetRegisterClass *SubRC) const;
119 
120 private:
121   void swapOperands(MachineInstr &Inst) const;
122 
123   std::pair<bool, MachineBasicBlock *>
124   moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
125                    MachineDominatorTree *MDT = nullptr) const;
126 
127   void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
128                    MachineDominatorTree *MDT = nullptr) const;
129 
130   void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
131 
132   void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
133 
134   void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst,
135                            unsigned Opcode) const;
136 
137   void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst,
138                           unsigned Opcode) const;
139 
140   void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
141                                unsigned Opcode, bool Swap = false) const;
142 
143   void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
144                                 unsigned Opcode,
145                                 MachineDominatorTree *MDT = nullptr) const;
146 
147   void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst,
148                           MachineDominatorTree *MDT) const;
149 
150   void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst,
151                              MachineDominatorTree *MDT) const;
152 
153   void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst,
154                             MachineDominatorTree *MDT = nullptr) const;
155 
156   void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
157                             MachineInstr &Inst) const;
158   void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
159   void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
160                                unsigned Opcode,
161                                MachineDominatorTree *MDT = nullptr) const;
162   void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
163                       MachineInstr &Inst) const;
164 
165   void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
166                                     SIInstrWorklist &Worklist) const;
167 
168   void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
169                                     MachineInstr &SCCDefInst,
170                                     SIInstrWorklist &Worklist,
171                                     Register NewCond = Register()) const;
172   void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
173                                 SIInstrWorklist &Worklist) const;
174 
175   const TargetRegisterClass *
176   getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
177 
178   bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
179                                     const MachineInstr &MIb) const;
180 
181   Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
182 
183   bool verifyCopy(const MachineInstr &MI, const MachineRegisterInfo &MRI,
184                   StringRef &ErrInfo) const;
185 
186   bool resultDependsOnExec(const MachineInstr &MI) const;
187 
188 protected:
189   /// If the specific machine instruction is a instruction that moves/copies
190   /// value from one register to another register return destination and source
191   /// registers as machine operands.
192   std::optional<DestSourcePair>
193   isCopyInstrImpl(const MachineInstr &MI) const override;
194 
195   bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0,
196                            AMDGPU::OpName Src0OpName, MachineOperand &Src1,
197                            AMDGPU::OpName Src1OpName) const;
198   bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx,
199                      const MachineOperand *fromMO, unsigned toIdx,
200                      const MachineOperand *toMO) const;
201   MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
202                                        unsigned OpIdx0,
203                                        unsigned OpIdx1) const override;
204 
205 public:
206   enum TargetOperandFlags {
207     MO_MASK = 0xf,
208 
209     MO_NONE = 0,
210     // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
211     MO_GOTPCREL = 1,
212     // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
213     MO_GOTPCREL32 = 2,
214     MO_GOTPCREL32_LO = 2,
215     // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
216     MO_GOTPCREL32_HI = 3,
217     // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
218     MO_REL32 = 4,
219     MO_REL32_LO = 4,
220     // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
221     MO_REL32_HI = 5,
222 
223     MO_FAR_BRANCH_OFFSET = 6,
224 
225     MO_ABS32_LO = 8,
226     MO_ABS32_HI = 9,
227   };
228 
229   explicit SIInstrInfo(const GCNSubtarget &ST);
230 
getRegisterInfo()231   const SIRegisterInfo &getRegisterInfo() const {
232     return RI;
233   }
234 
getSubtarget()235   const GCNSubtarget &getSubtarget() const {
236     return ST;
237   }
238 
239   bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
240 
241   bool isIgnorableUse(const MachineOperand &MO) const override;
242 
243   bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo,
244                     MachineCycleInfo *CI) const override;
245 
246   bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
247                                int64_t &Offset1) const override;
248 
249   bool isGlobalMemoryObject(const MachineInstr *MI) const override;
250 
251   bool getMemOperandsWithOffsetWidth(
252       const MachineInstr &LdSt,
253       SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
254       bool &OffsetIsScalable, LocationSize &Width,
255       const TargetRegisterInfo *TRI) const final;
256 
257   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
258                            int64_t Offset1, bool OffsetIsScalable1,
259                            ArrayRef<const MachineOperand *> BaseOps2,
260                            int64_t Offset2, bool OffsetIsScalable2,
261                            unsigned ClusterSize,
262                            unsigned NumBytes) const override;
263 
264   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
265                                int64_t Offset1, unsigned NumLoads) const override;
266 
267   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
268                    const DebugLoc &DL, Register DestReg, Register SrcReg,
269                    bool KillSrc, bool RenamableDest = false,
270                    bool RenamableSrc = false) const override;
271 
272   const TargetRegisterClass *getPreferredSelectRegClass(
273                                unsigned Size) const;
274 
275   Register insertNE(MachineBasicBlock *MBB,
276                     MachineBasicBlock::iterator I, const DebugLoc &DL,
277                     Register SrcReg, int Value) const;
278 
279   Register insertEQ(MachineBasicBlock *MBB,
280                     MachineBasicBlock::iterator I, const DebugLoc &DL,
281                     Register SrcReg, int Value)  const;
282 
283   bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg,
284                                int64_t &ImmVal) const override;
285 
286   void storeRegToStackSlot(
287       MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
288       bool isKill, int FrameIndex, const TargetRegisterClass *RC,
289       const TargetRegisterInfo *TRI, Register VReg,
290       MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
291 
292   void loadRegFromStackSlot(
293       MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
294       int FrameIndex, const TargetRegisterClass *RC,
295       const TargetRegisterInfo *TRI, Register VReg,
296       MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
297 
298   bool expandPostRAPseudo(MachineInstr &MI) const override;
299 
300   void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
301                      Register DestReg, unsigned SubIdx,
302                      const MachineInstr &Orig,
303                      const TargetRegisterInfo &TRI) const override;
304 
305   // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
306   // instructions. Returns a pair of generated instructions.
307   // Can split either post-RA with physical registers or pre-RA with
308   // virtual registers. In latter case IR needs to be in SSA form and
309   // and a REG_SEQUENCE is produced to define original register.
310   std::pair<MachineInstr*, MachineInstr*>
311   expandMovDPP64(MachineInstr &MI) const;
312 
313   // Returns an opcode that can be used to move a value to a \p DstRC
314   // register.  If there is no hardware instruction that can store to \p
315   // DstRC, then AMDGPU::COPY is returned.
316   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
317 
318   const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
319                                                      unsigned EltSize,
320                                                      bool IsSGPR) const;
321 
322   const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
323                                              bool IsIndirectSrc) const;
324   LLVM_READONLY
325   int commuteOpcode(unsigned Opc) const;
326 
327   LLVM_READONLY
commuteOpcode(const MachineInstr & MI)328   inline int commuteOpcode(const MachineInstr &MI) const {
329     return commuteOpcode(MI.getOpcode());
330   }
331 
332   bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0,
333                              unsigned &SrcOpIdx1) const override;
334 
335   bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0,
336                              unsigned &SrcOpIdx1) const;
337 
338   bool isBranchOffsetInRange(unsigned BranchOpc,
339                              int64_t BrOffset) const override;
340 
341   MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
342 
343   /// Return whether the block terminate with divergent branch.
344   /// Note this only work before lowering the pseudo control flow instructions.
345   bool hasDivergentBranch(const MachineBasicBlock *MBB) const;
346 
347   void insertIndirectBranch(MachineBasicBlock &MBB,
348                             MachineBasicBlock &NewDestBB,
349                             MachineBasicBlock &RestoreBB, const DebugLoc &DL,
350                             int64_t BrOffset, RegScavenger *RS) const override;
351 
352   bool analyzeBranchImpl(MachineBasicBlock &MBB,
353                          MachineBasicBlock::iterator I,
354                          MachineBasicBlock *&TBB,
355                          MachineBasicBlock *&FBB,
356                          SmallVectorImpl<MachineOperand> &Cond,
357                          bool AllowModify) const;
358 
359   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
360                      MachineBasicBlock *&FBB,
361                      SmallVectorImpl<MachineOperand> &Cond,
362                      bool AllowModify = false) const override;
363 
364   unsigned removeBranch(MachineBasicBlock &MBB,
365                         int *BytesRemoved = nullptr) const override;
366 
367   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
368                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
369                         const DebugLoc &DL,
370                         int *BytesAdded = nullptr) const override;
371 
372   bool reverseBranchCondition(
373     SmallVectorImpl<MachineOperand> &Cond) const override;
374 
375   bool canInsertSelect(const MachineBasicBlock &MBB,
376                        ArrayRef<MachineOperand> Cond, Register DstReg,
377                        Register TrueReg, Register FalseReg, int &CondCycles,
378                        int &TrueCycles, int &FalseCycles) const override;
379 
380   void insertSelect(MachineBasicBlock &MBB,
381                     MachineBasicBlock::iterator I, const DebugLoc &DL,
382                     Register DstReg, ArrayRef<MachineOperand> Cond,
383                     Register TrueReg, Register FalseReg) const override;
384 
385   void insertVectorSelect(MachineBasicBlock &MBB,
386                           MachineBasicBlock::iterator I, const DebugLoc &DL,
387                           Register DstReg, ArrayRef<MachineOperand> Cond,
388                           Register TrueReg, Register FalseReg) const;
389 
390   bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
391                       Register &SrcReg2, int64_t &CmpMask,
392                       int64_t &CmpValue) const override;
393 
394   bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
395                             Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
396                             const MachineRegisterInfo *MRI) const override;
397 
398   bool
399   areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
400                                   const MachineInstr &MIb) const override;
401 
402   static bool isFoldableCopy(const MachineInstr &MI);
403 
404   void removeModOperands(MachineInstr &MI) const;
405 
406   /// Return the extracted immediate value in a subregister use from a constant
407   /// materialized in a super register.
408   ///
409   /// e.g. %imm = S_MOV_B64 K[0:63]
410   ///      USE %imm.sub1
411   /// This will return K[32:63]
412   static std::optional<int64_t> extractSubregFromImm(int64_t ImmVal,
413                                                      unsigned SubRegIndex);
414 
415   bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
416                      MachineRegisterInfo *MRI) const final;
417 
getMachineCSELookAheadLimit()418   unsigned getMachineCSELookAheadLimit() const override { return 500; }
419 
420   MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
421                                       LiveIntervals *LIS) const override;
422 
423   bool isSchedulingBoundary(const MachineInstr &MI,
424                             const MachineBasicBlock *MBB,
425                             const MachineFunction &MF) const override;
426 
isSALU(const MachineInstr & MI)427   static bool isSALU(const MachineInstr &MI) {
428     return MI.getDesc().TSFlags & SIInstrFlags::SALU;
429   }
430 
isSALU(uint16_t Opcode)431   bool isSALU(uint16_t Opcode) const {
432     return get(Opcode).TSFlags & SIInstrFlags::SALU;
433   }
434 
isVALU(const MachineInstr & MI)435   static bool isVALU(const MachineInstr &MI) {
436     return MI.getDesc().TSFlags & SIInstrFlags::VALU;
437   }
438 
isVALU(uint16_t Opcode)439   bool isVALU(uint16_t Opcode) const {
440     return get(Opcode).TSFlags & SIInstrFlags::VALU;
441   }
442 
isImage(const MachineInstr & MI)443   static bool isImage(const MachineInstr &MI) {
444     return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI);
445   }
446 
isImage(uint16_t Opcode)447   bool isImage(uint16_t Opcode) const {
448     return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode);
449   }
450 
isVMEM(const MachineInstr & MI)451   static bool isVMEM(const MachineInstr &MI) {
452     return isMUBUF(MI) || isMTBUF(MI) || isImage(MI) || isFLAT(MI);
453   }
454 
isVMEM(uint16_t Opcode)455   bool isVMEM(uint16_t Opcode) const {
456     return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode);
457   }
458 
isSOP1(const MachineInstr & MI)459   static bool isSOP1(const MachineInstr &MI) {
460     return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
461   }
462 
isSOP1(uint16_t Opcode)463   bool isSOP1(uint16_t Opcode) const {
464     return get(Opcode).TSFlags & SIInstrFlags::SOP1;
465   }
466 
isSOP2(const MachineInstr & MI)467   static bool isSOP2(const MachineInstr &MI) {
468     return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
469   }
470 
isSOP2(uint16_t Opcode)471   bool isSOP2(uint16_t Opcode) const {
472     return get(Opcode).TSFlags & SIInstrFlags::SOP2;
473   }
474 
isSOPC(const MachineInstr & MI)475   static bool isSOPC(const MachineInstr &MI) {
476     return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
477   }
478 
isSOPC(uint16_t Opcode)479   bool isSOPC(uint16_t Opcode) const {
480     return get(Opcode).TSFlags & SIInstrFlags::SOPC;
481   }
482 
isSOPK(const MachineInstr & MI)483   static bool isSOPK(const MachineInstr &MI) {
484     return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
485   }
486 
isSOPK(uint16_t Opcode)487   bool isSOPK(uint16_t Opcode) const {
488     return get(Opcode).TSFlags & SIInstrFlags::SOPK;
489   }
490 
isSOPP(const MachineInstr & MI)491   static bool isSOPP(const MachineInstr &MI) {
492     return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
493   }
494 
isSOPP(uint16_t Opcode)495   bool isSOPP(uint16_t Opcode) const {
496     return get(Opcode).TSFlags & SIInstrFlags::SOPP;
497   }
498 
isPacked(const MachineInstr & MI)499   static bool isPacked(const MachineInstr &MI) {
500     return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
501   }
502 
isPacked(uint16_t Opcode)503   bool isPacked(uint16_t Opcode) const {
504     return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
505   }
506 
isVOP1(const MachineInstr & MI)507   static bool isVOP1(const MachineInstr &MI) {
508     return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
509   }
510 
isVOP1(uint16_t Opcode)511   bool isVOP1(uint16_t Opcode) const {
512     return get(Opcode).TSFlags & SIInstrFlags::VOP1;
513   }
514 
isVOP2(const MachineInstr & MI)515   static bool isVOP2(const MachineInstr &MI) {
516     return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
517   }
518 
isVOP2(uint16_t Opcode)519   bool isVOP2(uint16_t Opcode) const {
520     return get(Opcode).TSFlags & SIInstrFlags::VOP2;
521   }
522 
isVOP3(const MachineInstr & MI)523   static bool isVOP3(const MachineInstr &MI) {
524     return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
525   }
526 
isVOP3(uint16_t Opcode)527   bool isVOP3(uint16_t Opcode) const {
528     return get(Opcode).TSFlags & SIInstrFlags::VOP3;
529   }
530 
isSDWA(const MachineInstr & MI)531   static bool isSDWA(const MachineInstr &MI) {
532     return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
533   }
534 
isSDWA(uint16_t Opcode)535   bool isSDWA(uint16_t Opcode) const {
536     return get(Opcode).TSFlags & SIInstrFlags::SDWA;
537   }
538 
isVOPC(const MachineInstr & MI)539   static bool isVOPC(const MachineInstr &MI) {
540     return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
541   }
542 
isVOPC(uint16_t Opcode)543   bool isVOPC(uint16_t Opcode) const {
544     return get(Opcode).TSFlags & SIInstrFlags::VOPC;
545   }
546 
isMUBUF(const MachineInstr & MI)547   static bool isMUBUF(const MachineInstr &MI) {
548     return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
549   }
550 
isMUBUF(uint16_t Opcode)551   bool isMUBUF(uint16_t Opcode) const {
552     return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
553   }
554 
isMTBUF(const MachineInstr & MI)555   static bool isMTBUF(const MachineInstr &MI) {
556     return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
557   }
558 
isMTBUF(uint16_t Opcode)559   bool isMTBUF(uint16_t Opcode) const {
560     return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
561   }
562 
isSMRD(const MachineInstr & MI)563   static bool isSMRD(const MachineInstr &MI) {
564     return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
565   }
566 
isSMRD(uint16_t Opcode)567   bool isSMRD(uint16_t Opcode) const {
568     return get(Opcode).TSFlags & SIInstrFlags::SMRD;
569   }
570 
571   bool isBufferSMRD(const MachineInstr &MI) const;
572 
isDS(const MachineInstr & MI)573   static bool isDS(const MachineInstr &MI) {
574     return MI.getDesc().TSFlags & SIInstrFlags::DS;
575   }
576 
isDS(uint16_t Opcode)577   bool isDS(uint16_t Opcode) const {
578     return get(Opcode).TSFlags & SIInstrFlags::DS;
579   }
580 
isLDSDMA(const MachineInstr & MI)581   static bool isLDSDMA(const MachineInstr &MI) {
582     return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI));
583   }
584 
isLDSDMA(uint16_t Opcode)585   bool isLDSDMA(uint16_t Opcode) {
586     return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode));
587   }
588 
isGWS(const MachineInstr & MI)589   static bool isGWS(const MachineInstr &MI) {
590     return MI.getDesc().TSFlags & SIInstrFlags::GWS;
591   }
592 
isGWS(uint16_t Opcode)593   bool isGWS(uint16_t Opcode) const {
594     return get(Opcode).TSFlags & SIInstrFlags::GWS;
595   }
596 
597   bool isAlwaysGDS(uint16_t Opcode) const;
598 
isMIMG(const MachineInstr & MI)599   static bool isMIMG(const MachineInstr &MI) {
600     return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
601   }
602 
isMIMG(uint16_t Opcode)603   bool isMIMG(uint16_t Opcode) const {
604     return get(Opcode).TSFlags & SIInstrFlags::MIMG;
605   }
606 
isVIMAGE(const MachineInstr & MI)607   static bool isVIMAGE(const MachineInstr &MI) {
608     return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE;
609   }
610 
isVIMAGE(uint16_t Opcode)611   bool isVIMAGE(uint16_t Opcode) const {
612     return get(Opcode).TSFlags & SIInstrFlags::VIMAGE;
613   }
614 
isVSAMPLE(const MachineInstr & MI)615   static bool isVSAMPLE(const MachineInstr &MI) {
616     return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE;
617   }
618 
isVSAMPLE(uint16_t Opcode)619   bool isVSAMPLE(uint16_t Opcode) const {
620     return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE;
621   }
622 
isGather4(const MachineInstr & MI)623   static bool isGather4(const MachineInstr &MI) {
624     return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
625   }
626 
isGather4(uint16_t Opcode)627   bool isGather4(uint16_t Opcode) const {
628     return get(Opcode).TSFlags & SIInstrFlags::Gather4;
629   }
630 
isFLAT(const MachineInstr & MI)631   static bool isFLAT(const MachineInstr &MI) {
632     return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
633   }
634 
635   // Is a FLAT encoded instruction which accesses a specific segment,
636   // i.e. global_* or scratch_*.
isSegmentSpecificFLAT(const MachineInstr & MI)637   static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
638     auto Flags = MI.getDesc().TSFlags;
639     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
640   }
641 
isSegmentSpecificFLAT(uint16_t Opcode)642   bool isSegmentSpecificFLAT(uint16_t Opcode) const {
643     auto Flags = get(Opcode).TSFlags;
644     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
645   }
646 
isFLATGlobal(const MachineInstr & MI)647   static bool isFLATGlobal(const MachineInstr &MI) {
648     return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal;
649   }
650 
isFLATGlobal(uint16_t Opcode)651   bool isFLATGlobal(uint16_t Opcode) const {
652     return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal;
653   }
654 
isFLATScratch(const MachineInstr & MI)655   static bool isFLATScratch(const MachineInstr &MI) {
656     return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch;
657   }
658 
isFLATScratch(uint16_t Opcode)659   bool isFLATScratch(uint16_t Opcode) const {
660     return get(Opcode).TSFlags & SIInstrFlags::FlatScratch;
661   }
662 
663   // Any FLAT encoded instruction, including global_* and scratch_*.
isFLAT(uint16_t Opcode)664   bool isFLAT(uint16_t Opcode) const {
665     return get(Opcode).TSFlags & SIInstrFlags::FLAT;
666   }
667 
isBlockLoadStore(uint16_t Opcode)668   static bool isBlockLoadStore(uint16_t Opcode) {
669     switch (Opcode) {
670     case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
671     case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:
672     case AMDGPU::SCRATCH_STORE_BLOCK_SADDR:
673     case AMDGPU::SCRATCH_LOAD_BLOCK_SADDR:
674     case AMDGPU::SCRATCH_STORE_BLOCK_SVS:
675     case AMDGPU::SCRATCH_LOAD_BLOCK_SVS:
676       return true;
677     default:
678       return false;
679     }
680   }
681 
isEXP(const MachineInstr & MI)682   static bool isEXP(const MachineInstr &MI) {
683     return MI.getDesc().TSFlags & SIInstrFlags::EXP;
684   }
685 
isDualSourceBlendEXP(const MachineInstr & MI)686   static bool isDualSourceBlendEXP(const MachineInstr &MI) {
687     if (!isEXP(MI))
688       return false;
689     unsigned Target = MI.getOperand(0).getImm();
690     return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 ||
691            Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
692   }
693 
isEXP(uint16_t Opcode)694   bool isEXP(uint16_t Opcode) const {
695     return get(Opcode).TSFlags & SIInstrFlags::EXP;
696   }
697 
isAtomicNoRet(const MachineInstr & MI)698   static bool isAtomicNoRet(const MachineInstr &MI) {
699     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
700   }
701 
isAtomicNoRet(uint16_t Opcode)702   bool isAtomicNoRet(uint16_t Opcode) const {
703     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
704   }
705 
isAtomicRet(const MachineInstr & MI)706   static bool isAtomicRet(const MachineInstr &MI) {
707     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
708   }
709 
isAtomicRet(uint16_t Opcode)710   bool isAtomicRet(uint16_t Opcode) const {
711     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
712   }
713 
isAtomic(const MachineInstr & MI)714   static bool isAtomic(const MachineInstr &MI) {
715     return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
716                                    SIInstrFlags::IsAtomicNoRet);
717   }
718 
isAtomic(uint16_t Opcode)719   bool isAtomic(uint16_t Opcode) const {
720     return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
721                                   SIInstrFlags::IsAtomicNoRet);
722   }
723 
mayWriteLDSThroughDMA(const MachineInstr & MI)724   static bool mayWriteLDSThroughDMA(const MachineInstr &MI) {
725     return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD;
726   }
727 
isWQM(const MachineInstr & MI)728   static bool isWQM(const MachineInstr &MI) {
729     return MI.getDesc().TSFlags & SIInstrFlags::WQM;
730   }
731 
isWQM(uint16_t Opcode)732   bool isWQM(uint16_t Opcode) const {
733     return get(Opcode).TSFlags & SIInstrFlags::WQM;
734   }
735 
isDisableWQM(const MachineInstr & MI)736   static bool isDisableWQM(const MachineInstr &MI) {
737     return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
738   }
739 
isDisableWQM(uint16_t Opcode)740   bool isDisableWQM(uint16_t Opcode) const {
741     return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
742   }
743 
744   // SI_SPILL_S32_TO_VGPR and SI_RESTORE_S32_FROM_VGPR form a special case of
745   // SGPRs spilling to VGPRs which are SGPR spills but from VALU instructions
746   // therefore we need an explicit check for them since just checking if the
747   // Spill bit is set and what instruction type it came from misclassifies
748   // them.
isVGPRSpill(const MachineInstr & MI)749   static bool isVGPRSpill(const MachineInstr &MI) {
750     return MI.getOpcode() != AMDGPU::SI_SPILL_S32_TO_VGPR &&
751            MI.getOpcode() != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
752            (isSpill(MI) && isVALU(MI));
753   }
754 
isVGPRSpill(uint16_t Opcode)755   bool isVGPRSpill(uint16_t Opcode) const {
756     return Opcode != AMDGPU::SI_SPILL_S32_TO_VGPR &&
757            Opcode != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
758            (isSpill(Opcode) && isVALU(Opcode));
759   }
760 
isSGPRSpill(const MachineInstr & MI)761   static bool isSGPRSpill(const MachineInstr &MI) {
762     return MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR ||
763            MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
764            (isSpill(MI) && isSALU(MI));
765   }
766 
isSGPRSpill(uint16_t Opcode)767   bool isSGPRSpill(uint16_t Opcode) const {
768     return Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR ||
769            Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
770            (isSpill(Opcode) && isSALU(Opcode));
771   }
772 
isSpill(uint16_t Opcode)773   bool isSpill(uint16_t Opcode) const {
774     return get(Opcode).TSFlags & SIInstrFlags::Spill;
775   }
776 
isSpill(const MachineInstr & MI)777   static bool isSpill(const MachineInstr &MI) {
778     return MI.getDesc().TSFlags & SIInstrFlags::Spill;
779   }
780 
isWWMRegSpillOpcode(uint16_t Opcode)781   static bool isWWMRegSpillOpcode(uint16_t Opcode) {
782     return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
783            Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
784            Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE ||
785            Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
786   }
787 
isChainCallOpcode(uint64_t Opcode)788   static bool isChainCallOpcode(uint64_t Opcode) {
789     return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 ||
790            Opcode == AMDGPU::SI_CS_CHAIN_TC_W64;
791   }
792 
isDPP(const MachineInstr & MI)793   static bool isDPP(const MachineInstr &MI) {
794     return MI.getDesc().TSFlags & SIInstrFlags::DPP;
795   }
796 
isDPP(uint16_t Opcode)797   bool isDPP(uint16_t Opcode) const {
798     return get(Opcode).TSFlags & SIInstrFlags::DPP;
799   }
800 
isTRANS(const MachineInstr & MI)801   static bool isTRANS(const MachineInstr &MI) {
802     return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
803   }
804 
isTRANS(uint16_t Opcode)805   bool isTRANS(uint16_t Opcode) const {
806     return get(Opcode).TSFlags & SIInstrFlags::TRANS;
807   }
808 
isVOP3P(const MachineInstr & MI)809   static bool isVOP3P(const MachineInstr &MI) {
810     return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
811   }
812 
isVOP3P(uint16_t Opcode)813   bool isVOP3P(uint16_t Opcode) const {
814     return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
815   }
816 
isVINTRP(const MachineInstr & MI)817   static bool isVINTRP(const MachineInstr &MI) {
818     return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
819   }
820 
isVINTRP(uint16_t Opcode)821   bool isVINTRP(uint16_t Opcode) const {
822     return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
823   }
824 
isMAI(const MachineInstr & MI)825   static bool isMAI(const MachineInstr &MI) {
826     return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
827   }
828 
isMAI(uint16_t Opcode)829   bool isMAI(uint16_t Opcode) const {
830     return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
831   }
832 
isMFMA(const MachineInstr & MI)833   static bool isMFMA(const MachineInstr &MI) {
834     return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
835            MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
836   }
837 
isDOT(const MachineInstr & MI)838   static bool isDOT(const MachineInstr &MI) {
839     return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
840   }
841 
isWMMA(const MachineInstr & MI)842   static bool isWMMA(const MachineInstr &MI) {
843     return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
844   }
845 
isWMMA(uint16_t Opcode)846   bool isWMMA(uint16_t Opcode) const {
847     return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
848   }
849 
isMFMAorWMMA(const MachineInstr & MI)850   static bool isMFMAorWMMA(const MachineInstr &MI) {
851     return isMFMA(MI) || isWMMA(MI) || isSWMMAC(MI);
852   }
853 
isSWMMAC(const MachineInstr & MI)854   static bool isSWMMAC(const MachineInstr &MI) {
855     return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC;
856   }
857 
isSWMMAC(uint16_t Opcode)858   bool isSWMMAC(uint16_t Opcode) const {
859     return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC;
860   }
861 
isDOT(uint16_t Opcode)862   bool isDOT(uint16_t Opcode) const {
863     return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
864   }
865 
866   bool isXDL(const MachineInstr &MI) const;
867 
isDGEMM(unsigned Opcode)868   static bool isDGEMM(unsigned Opcode) { return AMDGPU::getMAIIsDGEMM(Opcode); }
869 
isLDSDIR(const MachineInstr & MI)870   static bool isLDSDIR(const MachineInstr &MI) {
871     return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
872   }
873 
isLDSDIR(uint16_t Opcode)874   bool isLDSDIR(uint16_t Opcode) const {
875     return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
876   }
877 
isVINTERP(const MachineInstr & MI)878   static bool isVINTERP(const MachineInstr &MI) {
879     return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
880   }
881 
isVINTERP(uint16_t Opcode)882   bool isVINTERP(uint16_t Opcode) const {
883     return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
884   }
885 
isScalarUnit(const MachineInstr & MI)886   static bool isScalarUnit(const MachineInstr &MI) {
887     return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
888   }
889 
usesVM_CNT(const MachineInstr & MI)890   static bool usesVM_CNT(const MachineInstr &MI) {
891     return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
892   }
893 
usesLGKM_CNT(const MachineInstr & MI)894   static bool usesLGKM_CNT(const MachineInstr &MI) {
895     return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
896   }
897 
898   // Most sopk treat the immediate as a signed 16-bit, however some
899   // use it as unsigned.
sopkIsZext(unsigned Opcode)900   static bool sopkIsZext(unsigned Opcode) {
901     return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 ||
902            Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 ||
903            Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 ||
904            Opcode == AMDGPU::S_GETREG_B32;
905   }
906 
907   /// \returns true if this is an s_store_dword* instruction. This is more
908   /// specific than isSMEM && mayStore.
isScalarStore(const MachineInstr & MI)909   static bool isScalarStore(const MachineInstr &MI) {
910     return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
911   }
912 
isScalarStore(uint16_t Opcode)913   bool isScalarStore(uint16_t Opcode) const {
914     return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
915   }
916 
isFixedSize(const MachineInstr & MI)917   static bool isFixedSize(const MachineInstr &MI) {
918     return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
919   }
920 
isFixedSize(uint16_t Opcode)921   bool isFixedSize(uint16_t Opcode) const {
922     return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
923   }
924 
hasFPClamp(const MachineInstr & MI)925   static bool hasFPClamp(const MachineInstr &MI) {
926     return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
927   }
928 
hasFPClamp(uint16_t Opcode)929   bool hasFPClamp(uint16_t Opcode) const {
930     return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
931   }
932 
hasIntClamp(const MachineInstr & MI)933   static bool hasIntClamp(const MachineInstr &MI) {
934     return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
935   }
936 
getClampMask(const MachineInstr & MI)937   uint64_t getClampMask(const MachineInstr &MI) const {
938     const uint64_t ClampFlags = SIInstrFlags::FPClamp |
939                                 SIInstrFlags::IntClamp |
940                                 SIInstrFlags::ClampLo |
941                                 SIInstrFlags::ClampHi;
942       return MI.getDesc().TSFlags & ClampFlags;
943   }
944 
usesFPDPRounding(const MachineInstr & MI)945   static bool usesFPDPRounding(const MachineInstr &MI) {
946     return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
947   }
948 
usesFPDPRounding(uint16_t Opcode)949   bool usesFPDPRounding(uint16_t Opcode) const {
950     return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
951   }
952 
isFPAtomic(const MachineInstr & MI)953   static bool isFPAtomic(const MachineInstr &MI) {
954     return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
955   }
956 
isFPAtomic(uint16_t Opcode)957   bool isFPAtomic(uint16_t Opcode) const {
958     return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
959   }
960 
isNeverUniform(const MachineInstr & MI)961   static bool isNeverUniform(const MachineInstr &MI) {
962     return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
963   }
964 
965   // Check to see if opcode is for a barrier start. Pre gfx12 this is just the
966   // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want
967   // to check for the barrier start (S_BARRIER_SIGNAL*)
isBarrierStart(unsigned Opcode)968   bool isBarrierStart(unsigned Opcode) const {
969     return Opcode == AMDGPU::S_BARRIER ||
970            Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 ||
971            Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 ||
972            Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM ||
973            Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
974   }
975 
isBarrier(unsigned Opcode)976   bool isBarrier(unsigned Opcode) const {
977     return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
978            Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER;
979   }
980 
isF16PseudoScalarTrans(unsigned Opcode)981   static bool isF16PseudoScalarTrans(unsigned Opcode) {
982     return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
983            Opcode == AMDGPU::V_S_LOG_F16_e64 ||
984            Opcode == AMDGPU::V_S_RCP_F16_e64 ||
985            Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
986            Opcode == AMDGPU::V_S_SQRT_F16_e64;
987   }
988 
doesNotReadTiedSource(const MachineInstr & MI)989   static bool doesNotReadTiedSource(const MachineInstr &MI) {
990     return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
991   }
992 
doesNotReadTiedSource(uint16_t Opcode)993   bool doesNotReadTiedSource(uint16_t Opcode) const {
994     return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
995   }
996 
isIGLP(unsigned Opcode)997   bool isIGLP(unsigned Opcode) const {
998     return Opcode == AMDGPU::SCHED_BARRIER ||
999            Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
1000   }
1001 
isIGLP(const MachineInstr & MI)1002   bool isIGLP(const MachineInstr &MI) const { return isIGLP(MI.getOpcode()); }
1003 
1004   // Return true if the instruction is mutually exclusive with all non-IGLP DAG
1005   // mutations, requiring all other mutations to be disabled.
isIGLPMutationOnly(unsigned Opcode)1006   bool isIGLPMutationOnly(unsigned Opcode) const {
1007     return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
1008   }
1009 
getNonSoftWaitcntOpcode(unsigned Opcode)1010   static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
1011     switch (Opcode) {
1012     case AMDGPU::S_WAITCNT_soft:
1013       return AMDGPU::S_WAITCNT;
1014     case AMDGPU::S_WAITCNT_VSCNT_soft:
1015       return AMDGPU::S_WAITCNT_VSCNT;
1016     case AMDGPU::S_WAIT_LOADCNT_soft:
1017       return AMDGPU::S_WAIT_LOADCNT;
1018     case AMDGPU::S_WAIT_STORECNT_soft:
1019       return AMDGPU::S_WAIT_STORECNT;
1020     case AMDGPU::S_WAIT_SAMPLECNT_soft:
1021       return AMDGPU::S_WAIT_SAMPLECNT;
1022     case AMDGPU::S_WAIT_BVHCNT_soft:
1023       return AMDGPU::S_WAIT_BVHCNT;
1024     case AMDGPU::S_WAIT_DSCNT_soft:
1025       return AMDGPU::S_WAIT_DSCNT;
1026     case AMDGPU::S_WAIT_KMCNT_soft:
1027       return AMDGPU::S_WAIT_KMCNT;
1028     default:
1029       return Opcode;
1030     }
1031   }
1032 
isWaitcnt(unsigned Opcode)1033   bool isWaitcnt(unsigned Opcode) const {
1034     switch (getNonSoftWaitcntOpcode(Opcode)) {
1035     case AMDGPU::S_WAITCNT:
1036     case AMDGPU::S_WAITCNT_VSCNT:
1037     case AMDGPU::S_WAITCNT_VMCNT:
1038     case AMDGPU::S_WAITCNT_EXPCNT:
1039     case AMDGPU::S_WAITCNT_LGKMCNT:
1040     case AMDGPU::S_WAIT_LOADCNT:
1041     case AMDGPU::S_WAIT_LOADCNT_DSCNT:
1042     case AMDGPU::S_WAIT_STORECNT:
1043     case AMDGPU::S_WAIT_STORECNT_DSCNT:
1044     case AMDGPU::S_WAIT_SAMPLECNT:
1045     case AMDGPU::S_WAIT_BVHCNT:
1046     case AMDGPU::S_WAIT_EXPCNT:
1047     case AMDGPU::S_WAIT_DSCNT:
1048     case AMDGPU::S_WAIT_KMCNT:
1049     case AMDGPU::S_WAIT_IDLE:
1050       return true;
1051     default:
1052       return false;
1053     }
1054   }
1055 
isVGPRCopy(const MachineInstr & MI)1056   bool isVGPRCopy(const MachineInstr &MI) const {
1057     assert(isCopyInstr(MI));
1058     Register Dest = MI.getOperand(0).getReg();
1059     const MachineFunction &MF = *MI.getParent()->getParent();
1060     const MachineRegisterInfo &MRI = MF.getRegInfo();
1061     return !RI.isSGPRReg(MRI, Dest);
1062   }
1063 
hasVGPRUses(const MachineInstr & MI)1064   bool hasVGPRUses(const MachineInstr &MI) const {
1065     const MachineFunction &MF = *MI.getParent()->getParent();
1066     const MachineRegisterInfo &MRI = MF.getRegInfo();
1067     return llvm::any_of(MI.explicit_uses(),
1068                         [&MRI, this](const MachineOperand &MO) {
1069       return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
1070   }
1071 
1072   /// Return true if the instruction modifies the mode register.q
1073   static bool modifiesModeRegister(const MachineInstr &MI);
1074 
1075   /// This function is used to determine if an instruction can be safely
1076   /// executed under EXEC = 0 without hardware error, indeterminate results,
1077   /// and/or visible effects on future vector execution or outside the shader.
1078   /// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is
1079   /// used in removing branches over short EXEC = 0 sequences.
1080   /// As such it embeds certain assumptions which may not apply to every case
1081   /// of EXEC = 0 execution.
1082   bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
1083 
1084   /// Returns true if the instruction could potentially depend on the value of
1085   /// exec. If false, exec dependencies may safely be ignored.
1086   bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
1087 
1088   bool isInlineConstant(const APInt &Imm) const;
1089 
1090   bool isInlineConstant(const APFloat &Imm) const;
1091 
1092   // Returns true if this non-register operand definitely does not need to be
1093   // encoded as a 32-bit literal. Note that this function handles all kinds of
1094   // operands, not just immediates.
1095   //
1096   // Some operands like FrameIndexes could resolve to an inline immediate value
1097   // that will not require an additional 4-bytes; this function assumes that it
1098   // will.
isInlineConstant(const MachineOperand & MO,uint8_t OperandType)1099   bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const {
1100     assert(!MO.isReg() && "isInlineConstant called on register operand!");
1101     if (!MO.isImm())
1102       return false;
1103     return isInlineConstant(MO.getImm(), OperandType);
1104   }
1105   bool isInlineConstant(int64_t ImmVal, uint8_t OperandType) const;
1106 
isInlineConstant(const MachineOperand & MO,const MCOperandInfo & OpInfo)1107   bool isInlineConstant(const MachineOperand &MO,
1108                         const MCOperandInfo &OpInfo) const {
1109     return isInlineConstant(MO, OpInfo.OperandType);
1110   }
1111 
1112   /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
1113   /// be an inline immediate.
isInlineConstant(const MachineInstr & MI,const MachineOperand & UseMO,const MachineOperand & DefMO)1114   bool isInlineConstant(const MachineInstr &MI,
1115                         const MachineOperand &UseMO,
1116                         const MachineOperand &DefMO) const {
1117     assert(UseMO.getParent() == &MI);
1118     int OpIdx = UseMO.getOperandNo();
1119     if (OpIdx >= MI.getDesc().NumOperands)
1120       return false;
1121 
1122     return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]);
1123   }
1124 
1125   /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
1126   /// immediate.
isInlineConstant(const MachineInstr & MI,unsigned OpIdx)1127   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
1128     const MachineOperand &MO = MI.getOperand(OpIdx);
1129     return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType);
1130   }
1131 
isInlineConstant(const MachineInstr & MI,unsigned OpIdx,int64_t ImmVal)1132   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
1133                         int64_t ImmVal) const {
1134     if (OpIdx >= MI.getDesc().NumOperands)
1135       return false;
1136 
1137     if (isCopyInstr(MI)) {
1138       unsigned Size = getOpSize(MI, OpIdx);
1139       assert(Size == 8 || Size == 4);
1140 
1141       uint8_t OpType = (Size == 8) ?
1142         AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
1143       return isInlineConstant(ImmVal, OpType);
1144     }
1145 
1146     return isInlineConstant(ImmVal, MI.getDesc().operands()[OpIdx].OperandType);
1147   }
1148 
isInlineConstant(const MachineInstr & MI,unsigned OpIdx,const MachineOperand & MO)1149   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
1150                         const MachineOperand &MO) const {
1151     return isInlineConstant(MI, OpIdx, MO.getImm());
1152   }
1153 
isInlineConstant(const MachineOperand & MO)1154   bool isInlineConstant(const MachineOperand &MO) const {
1155     return isInlineConstant(*MO.getParent(), MO.getOperandNo());
1156   }
1157 
1158   bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
1159                          const MachineOperand &MO) const;
1160 
1161   /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
1162   /// This function will return false if you pass it a 32-bit instruction.
1163   bool hasVALU32BitEncoding(unsigned Opcode) const;
1164 
1165   /// Returns true if this operand uses the constant bus.
1166   bool usesConstantBus(const MachineRegisterInfo &MRI,
1167                        const MachineOperand &MO,
1168                        const MCOperandInfo &OpInfo) const;
1169 
usesConstantBus(const MachineRegisterInfo & MRI,const MachineInstr & MI,int OpIdx)1170   bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineInstr &MI,
1171                        int OpIdx) const {
1172     return usesConstantBus(MRI, MI.getOperand(OpIdx),
1173                            MI.getDesc().operands()[OpIdx]);
1174   }
1175 
1176   /// Return true if this instruction has any modifiers.
1177   ///  e.g. src[012]_mod, omod, clamp.
1178   bool hasModifiers(unsigned Opcode) const;
1179 
1180   bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const;
1181   bool hasAnyModifiersSet(const MachineInstr &MI) const;
1182 
1183   bool canShrink(const MachineInstr &MI,
1184                  const MachineRegisterInfo &MRI) const;
1185 
1186   MachineInstr *buildShrunkInst(MachineInstr &MI,
1187                                 unsigned NewOpcode) const;
1188 
1189   bool verifyInstruction(const MachineInstr &MI,
1190                          StringRef &ErrInfo) const override;
1191 
1192   unsigned getVALUOp(const MachineInstr &MI) const;
1193 
1194   void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
1195                              MachineBasicBlock::iterator MBBI,
1196                              const DebugLoc &DL, Register Reg, bool IsSCCLive,
1197                              SlotIndexes *Indexes = nullptr) const;
1198 
1199   void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
1200                    MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1201                    Register Reg, SlotIndexes *Indexes = nullptr) const;
1202 
1203   /// Return the correct register class for \p OpNo.  For target-specific
1204   /// instructions, this will return the register class that has been defined
1205   /// in tablegen.  For generic instructions, like REG_SEQUENCE it will return
1206   /// the register class of its machine operand.
1207   /// to infer the correct register class base on the other operands.
1208   const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
1209                                            unsigned OpNo) const;
1210 
1211   /// Return the size in bytes of the operand OpNo on the given
1212   // instruction opcode.
getOpSize(uint16_t Opcode,unsigned OpNo)1213   unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
1214     const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo];
1215 
1216     if (OpInfo.RegClass == -1) {
1217       // If this is an immediate operand, this must be a 32-bit literal.
1218       assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
1219       return 4;
1220     }
1221 
1222     return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
1223   }
1224 
1225   /// This form should usually be preferred since it handles operands
1226   /// with unknown register classes.
getOpSize(const MachineInstr & MI,unsigned OpNo)1227   unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
1228     const MachineOperand &MO = MI.getOperand(OpNo);
1229     if (MO.isReg()) {
1230       if (unsigned SubReg = MO.getSubReg()) {
1231         return RI.getSubRegIdxSize(SubReg) / 8;
1232       }
1233     }
1234     return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
1235   }
1236 
1237   /// Legalize the \p OpIndex operand of this instruction by inserting
1238   /// a MOV.  For example:
1239   /// ADD_I32_e32 VGPR0, 15
1240   /// to
1241   /// MOV VGPR1, 15
1242   /// ADD_I32_e32 VGPR0, VGPR1
1243   ///
1244   /// If the operand being legalized is a register, then a COPY will be used
1245   /// instead of MOV.
1246   void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
1247 
1248   /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
1249   /// for \p MI.
1250   bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
1251                       const MachineOperand *MO = nullptr) const;
1252 
1253   /// Check if \p MO would be a valid operand for the given operand
1254   /// definition \p OpInfo. Note this does not attempt to validate constant bus
1255   /// restrictions (e.g. literal constant usage).
1256   bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
1257                           const MCOperandInfo &OpInfo,
1258                           const MachineOperand &MO) const;
1259 
1260   /// Check if \p MO (a register operand) is a legal register for the
1261   /// given operand description or operand index.
1262   /// The operand index version provide more legality checks
1263   bool isLegalRegOperand(const MachineRegisterInfo &MRI,
1264                          const MCOperandInfo &OpInfo,
1265                          const MachineOperand &MO) const;
1266   bool isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
1267                          const MachineOperand &MO) const;
1268   /// Legalize operands in \p MI by either commuting it or inserting a
1269   /// copy of src1.
1270   void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1271 
1272   /// Fix operands in \p MI to satisfy constant bus requirements.
1273   void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1274 
1275   /// Copy a value from a VGPR (\p SrcReg) to SGPR. The desired register class
1276   /// for the dst register (\p DstRC) can be optionally supplied. This function
1277   /// can only be used when it is know that the value in SrcReg is same across
1278   /// all threads in the wave.
1279   /// \returns The SGPR register that \p SrcReg was copied to.
1280   Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
1281                               MachineRegisterInfo &MRI,
1282                               const TargetRegisterClass *DstRC = nullptr) const;
1283 
1284   void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1285   void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1286 
1287   void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
1288                               MachineBasicBlock::iterator I,
1289                               const TargetRegisterClass *DstRC,
1290                               MachineOperand &Op, MachineRegisterInfo &MRI,
1291                               const DebugLoc &DL) const;
1292 
1293   /// Legalize all operands in this instruction.  This function may create new
1294   /// instructions and control-flow around \p MI.  If present, \p MDT is
1295   /// updated.
1296   /// \returns A new basic block that contains \p MI if new blocks were created.
1297   MachineBasicBlock *
1298   legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
1299 
1300   /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
1301   /// was moved to VGPR. \returns true if succeeded.
1302   bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
1303 
1304   /// Fix operands in Inst to fix 16bit SALU to VALU lowering.
1305   void legalizeOperandsVALUt16(MachineInstr &Inst,
1306                                MachineRegisterInfo &MRI) const;
1307   void legalizeOperandsVALUt16(MachineInstr &Inst, unsigned OpIdx,
1308                                MachineRegisterInfo &MRI) const;
1309 
1310   /// Replace the instructions opcode with the equivalent VALU
1311   /// opcode.  This function will also move the users of MachineInstruntions
1312   /// in the \p WorkList to the VALU if necessary. If present, \p MDT is
1313   /// updated.
1314   void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const;
1315 
1316   void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT,
1317                       MachineInstr &Inst) const;
1318 
1319   void insertNoop(MachineBasicBlock &MBB,
1320                   MachineBasicBlock::iterator MI) const override;
1321 
1322   void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1323                    unsigned Quantity) const override;
1324 
1325   void insertReturn(MachineBasicBlock &MBB) const;
1326 
1327   /// Build instructions that simulate the behavior of a `s_trap 2` instructions
1328   /// for hardware (namely, gfx11) that runs in PRIV=1 mode. There, s_trap is
1329   /// interpreted as a nop.
1330   MachineBasicBlock *insertSimulatedTrap(MachineRegisterInfo &MRI,
1331                                          MachineBasicBlock &MBB,
1332                                          MachineInstr &MI,
1333                                          const DebugLoc &DL) const;
1334 
1335   /// Return the number of wait states that result from executing this
1336   /// instruction.
1337   static unsigned getNumWaitStates(const MachineInstr &MI);
1338 
1339   /// Returns the operand named \p Op.  If \p MI does not have an
1340   /// operand named \c Op, this function returns nullptr.
1341   LLVM_READONLY
1342   MachineOperand *getNamedOperand(MachineInstr &MI,
1343                                   AMDGPU::OpName OperandName) const;
1344 
1345   LLVM_READONLY
getNamedOperand(const MachineInstr & MI,AMDGPU::OpName OperandName)1346   const MachineOperand *getNamedOperand(const MachineInstr &MI,
1347                                         AMDGPU::OpName OperandName) const {
1348     return getNamedOperand(const_cast<MachineInstr &>(MI), OperandName);
1349   }
1350 
1351   /// Get required immediate operand
getNamedImmOperand(const MachineInstr & MI,AMDGPU::OpName OperandName)1352   int64_t getNamedImmOperand(const MachineInstr &MI,
1353                              AMDGPU::OpName OperandName) const {
1354     int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
1355     return MI.getOperand(Idx).getImm();
1356   }
1357 
1358   uint64_t getDefaultRsrcDataFormat() const;
1359   uint64_t getScratchRsrcWords23() const;
1360 
1361   bool isLowLatencyInstruction(const MachineInstr &MI) const;
1362   bool isHighLatencyDef(int Opc) const override;
1363 
1364   /// Return the descriptor of the target-specific machine instruction
1365   /// that corresponds to the specified pseudo or native opcode.
getMCOpcodeFromPseudo(unsigned Opcode)1366   const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
1367     return get(pseudoToMCOpcode(Opcode));
1368   }
1369 
1370   unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1371   unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1372 
1373   Register isLoadFromStackSlot(const MachineInstr &MI,
1374                                int &FrameIndex) const override;
1375   Register isStoreToStackSlot(const MachineInstr &MI,
1376                               int &FrameIndex) const override;
1377 
1378   unsigned getInstBundleSize(const MachineInstr &MI) const;
1379   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
1380 
1381   bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
1382 
1383   std::pair<unsigned, unsigned>
1384   decomposeMachineOperandsTargetFlags(unsigned TF) const override;
1385 
1386   ArrayRef<std::pair<int, const char *>>
1387   getSerializableTargetIndices() const override;
1388 
1389   ArrayRef<std::pair<unsigned, const char *>>
1390   getSerializableDirectMachineOperandTargetFlags() const override;
1391 
1392   ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
1393   getSerializableMachineMemOperandTargetFlags() const override;
1394 
1395   ScheduleHazardRecognizer *
1396   CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
1397                                  const ScheduleDAG *DAG) const override;
1398 
1399   ScheduleHazardRecognizer *
1400   CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
1401 
1402   ScheduleHazardRecognizer *
1403   CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
1404                                  const ScheduleDAGMI *DAG) const override;
1405 
1406   unsigned getLiveRangeSplitOpcode(Register Reg,
1407                                    const MachineFunction &MF) const override;
1408 
1409   bool isBasicBlockPrologue(const MachineInstr &MI,
1410                             Register Reg = Register()) const override;
1411 
1412   MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
1413                                          MachineBasicBlock::iterator InsPt,
1414                                          const DebugLoc &DL, Register Src,
1415                                          Register Dst) const override;
1416 
1417   MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
1418                                     MachineBasicBlock::iterator InsPt,
1419                                     const DebugLoc &DL, Register Src,
1420                                     unsigned SrcSubReg,
1421                                     Register Dst) const override;
1422 
1423   bool isWave32() const;
1424 
1425   /// Return a partially built integer add instruction without carry.
1426   /// Caller must add source operands.
1427   /// For pre-GFX9 it will generate unused carry destination operand.
1428   /// TODO: After GFX9 it should return a no-carry operation.
1429   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1430                                     MachineBasicBlock::iterator I,
1431                                     const DebugLoc &DL,
1432                                     Register DestReg) const;
1433 
1434   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1435                                     MachineBasicBlock::iterator I,
1436                                     const DebugLoc &DL,
1437                                     Register DestReg,
1438                                     RegScavenger &RS) const;
1439 
1440   static bool isKillTerminator(unsigned Opcode);
1441   const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
1442 
1443   bool isLegalMUBUFImmOffset(unsigned Imm) const;
1444 
1445   static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST);
1446 
1447   bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1448                         Align Alignment = Align(4)) const;
1449 
1450   /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1451   /// encoded instruction with the given \p FlatVariant.
1452   bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
1453                          uint64_t FlatVariant) const;
1454 
1455   /// Split \p COffsetVal into {immediate offset field, remainder offset}
1456   /// values.
1457   std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
1458                                               unsigned AddrSpace,
1459                                               uint64_t FlatVariant) const;
1460 
1461   /// Returns true if negative offsets are allowed for the given \p FlatVariant.
1462   bool allowNegativeFlatOffset(uint64_t FlatVariant) const;
1463 
1464   /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1465   /// Return -1 if the target-specific opcode for the pseudo instruction does
1466   /// not exist. If Opcode is not a pseudo instruction, this is identity.
1467   int pseudoToMCOpcode(int Opcode) const;
1468 
1469   /// \brief Check if this instruction should only be used by assembler.
1470   /// Return true if this opcode should not be used by codegen.
1471   bool isAsmOnlyOpcode(int MCOp) const;
1472 
1473   const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
1474                                          const TargetRegisterInfo *TRI,
1475                                          const MachineFunction &MF)
1476     const override;
1477 
1478   void fixImplicitOperands(MachineInstr &MI) const;
1479 
1480   MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
1481                                       ArrayRef<unsigned> Ops,
1482                                       MachineBasicBlock::iterator InsertPt,
1483                                       int FrameIndex,
1484                                       LiveIntervals *LIS = nullptr,
1485                                       VirtRegMap *VRM = nullptr) const override;
1486 
1487   unsigned getInstrLatency(const InstrItineraryData *ItinData,
1488                            const MachineInstr &MI,
1489                            unsigned *PredCost = nullptr) const override;
1490 
1491   InstructionUniformity
1492   getInstructionUniformity(const MachineInstr &MI) const override final;
1493 
1494   InstructionUniformity
1495   getGenericInstructionUniformity(const MachineInstr &MI) const;
1496 
getMIRFormatter()1497   const MIRFormatter *getMIRFormatter() const override {
1498     if (!Formatter)
1499       Formatter = std::make_unique<AMDGPUMIRFormatter>();
1500     return Formatter.get();
1501   }
1502 
1503   static unsigned getDSShaderTypeValue(const MachineFunction &MF);
1504 
getSchedModel()1505   const TargetSchedModel &getSchedModel() const { return SchedModel; }
1506 
1507   // Enforce operand's \p OpName even alignment if required by target.
1508   // This is used if an operand is a 32 bit register but needs to be aligned
1509   // regardless.
1510   void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const;
1511 };
1512 
1513 /// \brief Returns true if a reg:subreg pair P has a TRC class
isOfRegClass(const TargetInstrInfo::RegSubRegPair & P,const TargetRegisterClass & TRC,MachineRegisterInfo & MRI)1514 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
1515                          const TargetRegisterClass &TRC,
1516                          MachineRegisterInfo &MRI) {
1517   auto *RC = MRI.getRegClass(P.Reg);
1518   if (!P.SubReg)
1519     return RC == &TRC;
1520   auto *TRI = MRI.getTargetRegisterInfo();
1521   return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
1522 }
1523 
1524 /// \brief Create RegSubRegPair from a register MachineOperand
1525 inline
getRegSubRegPair(const MachineOperand & O)1526 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1527   assert(O.isReg());
1528   return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
1529 }
1530 
1531 /// \brief Return the SubReg component from REG_SEQUENCE
1532 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1533                                                     unsigned SubReg);
1534 
1535 /// \brief Return the defining instruction for a given reg:subreg pair
1536 /// skipping copy like instructions and subreg-manipulation pseudos.
1537 /// Following another subreg of a reg:subreg isn't supported.
1538 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1539                                MachineRegisterInfo &MRI);
1540 
1541 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1542 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1543 /// attempt to track between blocks.
1544 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1545                                 Register VReg,
1546                                 const MachineInstr &DefMI,
1547                                 const MachineInstr &UseMI);
1548 
1549 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1550 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1551 /// track between blocks.
1552 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1553                                    Register VReg,
1554                                    const MachineInstr &DefMI);
1555 
1556 namespace AMDGPU {
1557 
1558   LLVM_READONLY
1559   int getVOPe64(uint16_t Opcode);
1560 
1561   LLVM_READONLY
1562   int getVOPe32(uint16_t Opcode);
1563 
1564   LLVM_READONLY
1565   int getSDWAOp(uint16_t Opcode);
1566 
1567   LLVM_READONLY
1568   int getDPPOp32(uint16_t Opcode);
1569 
1570   LLVM_READONLY
1571   int getDPPOp64(uint16_t Opcode);
1572 
1573   LLVM_READONLY
1574   int getBasicFromSDWAOp(uint16_t Opcode);
1575 
1576   LLVM_READONLY
1577   int getCommuteRev(uint16_t Opcode);
1578 
1579   LLVM_READONLY
1580   int getCommuteOrig(uint16_t Opcode);
1581 
1582   LLVM_READONLY
1583   int getAddr64Inst(uint16_t Opcode);
1584 
1585   /// Check if \p Opcode is an Addr64 opcode.
1586   ///
1587   /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1588   LLVM_READONLY
1589   int getIfAddr64Inst(uint16_t Opcode);
1590 
1591   LLVM_READONLY
1592   int getSOPKOp(uint16_t Opcode);
1593 
1594   /// \returns SADDR form of a FLAT Global instruction given an \p Opcode
1595   /// of a VADDR form.
1596   LLVM_READONLY
1597   int getGlobalSaddrOp(uint16_t Opcode);
1598 
1599   /// \returns VADDR form of a FLAT Global instruction given an \p Opcode
1600   /// of a SADDR form.
1601   LLVM_READONLY
1602   int getGlobalVaddrOp(uint16_t Opcode);
1603 
1604   LLVM_READONLY
1605   int getVCMPXNoSDstOp(uint16_t Opcode);
1606 
1607   /// \returns ST form with only immediate offset of a FLAT Scratch instruction
1608   /// given an \p Opcode of an SS (SADDR) form.
1609   LLVM_READONLY
1610   int getFlatScratchInstSTfromSS(uint16_t Opcode);
1611 
1612   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1613   /// of an SVS (SADDR + VADDR) form.
1614   LLVM_READONLY
1615   int getFlatScratchInstSVfromSVS(uint16_t Opcode);
1616 
1617   /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
1618   /// of an SV (VADDR) form.
1619   LLVM_READONLY
1620   int getFlatScratchInstSSfromSV(uint16_t Opcode);
1621 
1622   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1623   /// of an SS (SADDR) form.
1624   LLVM_READONLY
1625   int getFlatScratchInstSVfromSS(uint16_t Opcode);
1626 
1627   /// \returns earlyclobber version of a MAC MFMA is exists.
1628   LLVM_READONLY
1629   int getMFMAEarlyClobberOp(uint16_t Opcode);
1630 
1631   /// \returns Version of an MFMA instruction which uses AGPRs for srcC and
1632   /// vdst, given an \p Opcode of an MFMA which uses VGPRs for srcC/vdst.
1633   LLVM_READONLY
1634   int getMFMASrcCVDstAGPROp(uint16_t Opcode);
1635 
1636   /// \returns v_cmpx version of a v_cmp instruction.
1637   LLVM_READONLY
1638   int getVCMPXOpFromVCMP(uint16_t Opcode);
1639 
1640   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1641   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1642   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1643   const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1644 
1645 } // end namespace AMDGPU
1646 
1647 namespace AMDGPU {
1648 enum AsmComments {
1649   // For sgpr to vgpr spill instructions
1650   SGPR_SPILL = MachineInstr::TAsmComments
1651 };
1652 } // namespace AMDGPU
1653 
1654 namespace SI {
1655 namespace KernelInputOffsets {
1656 
1657 /// Offsets in bytes from the start of the input buffer
1658 enum Offsets {
1659   NGROUPS_X = 0,
1660   NGROUPS_Y = 4,
1661   NGROUPS_Z = 8,
1662   GLOBAL_SIZE_X = 12,
1663   GLOBAL_SIZE_Y = 16,
1664   GLOBAL_SIZE_Z = 20,
1665   LOCAL_SIZE_X = 24,
1666   LOCAL_SIZE_Y = 28,
1667   LOCAL_SIZE_Z = 32
1668 };
1669 
1670 } // end namespace KernelInputOffsets
1671 } // end namespace SI
1672 
1673 } // end namespace llvm
1674 
1675 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1676