xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIInstrInfo.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16 
17 #include "AMDGPUMIRFormatter.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIRegisterInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
24 
25 #define GET_INSTRINFO_HEADER
26 #include "AMDGPUGenInstrInfo.inc"
27 
28 namespace llvm {
29 
30 class APInt;
31 class GCNSubtarget;
32 class LiveVariables;
33 class MachineDominatorTree;
34 class MachineRegisterInfo;
35 class RegScavenger;
36 class TargetRegisterClass;
37 class ScheduleHazardRecognizer;
38 
39 /// Mark the MMO of a uniform load if there are no potentially clobbering stores
40 /// on any path from the start of an entry function to this load.
41 static const MachineMemOperand::Flags MONoClobber =
42     MachineMemOperand::MOTargetFlag1;
43 
44 /// Mark the MMO of a load as the last use.
45 static const MachineMemOperand::Flags MOLastUse =
46     MachineMemOperand::MOTargetFlag2;
47 
48 /// Utility to store machine instructions worklist.
49 struct SIInstrWorklist {
50   SIInstrWorklist() = default;
51 
52   void insert(MachineInstr *MI);
53 
54   MachineInstr *top() const {
55     auto iter = InstrList.begin();
56     return *iter;
57   }
58 
59   void erase_top() {
60     auto iter = InstrList.begin();
61     InstrList.erase(iter);
62   }
63 
64   bool empty() const { return InstrList.empty(); }
65 
66   void clear() {
67     InstrList.clear();
68     DeferredList.clear();
69   }
70 
71   bool isDeferred(MachineInstr *MI);
72 
73   SetVector<MachineInstr *> &getDeferredList() { return DeferredList; }
74 
75 private:
76   /// InstrList contains the MachineInstrs.
77   SetVector<MachineInstr *> InstrList;
78   /// Deferred instructions are specific MachineInstr
79   /// that will be added by insert method.
80   SetVector<MachineInstr *> DeferredList;
81 };
82 
83 class SIInstrInfo final : public AMDGPUGenInstrInfo {
84 private:
85   const SIRegisterInfo RI;
86   const GCNSubtarget &ST;
87   TargetSchedModel SchedModel;
88   mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
89 
90   // The inverse predicate should have the negative value.
91   enum BranchPredicate {
92     INVALID_BR = 0,
93     SCC_TRUE = 1,
94     SCC_FALSE = -1,
95     VCCNZ = 2,
96     VCCZ = -2,
97     EXECNZ = -3,
98     EXECZ = 3
99   };
100 
101   using SetVectorType = SmallSetVector<MachineInstr *, 32>;
102 
103   static unsigned getBranchOpcode(BranchPredicate Cond);
104   static BranchPredicate getBranchPredicate(unsigned Opcode);
105 
106 public:
107   unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
108                               MachineRegisterInfo &MRI,
109                               const MachineOperand &SuperReg,
110                               const TargetRegisterClass *SuperRC,
111                               unsigned SubIdx,
112                               const TargetRegisterClass *SubRC) const;
113   MachineOperand buildExtractSubRegOrImm(
114       MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI,
115       const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC,
116       unsigned SubIdx, const TargetRegisterClass *SubRC) const;
117 
118 private:
119   void swapOperands(MachineInstr &Inst) const;
120 
121   std::pair<bool, MachineBasicBlock *>
122   moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
123                    MachineDominatorTree *MDT = nullptr) const;
124 
125   void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
126                    MachineDominatorTree *MDT = nullptr) const;
127 
128   void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
129 
130   void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
131 
132   void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst,
133                            unsigned Opcode) const;
134 
135   void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst,
136                           unsigned Opcode) const;
137 
138   void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
139                                unsigned Opcode, bool Swap = false) const;
140 
141   void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
142                                 unsigned Opcode,
143                                 MachineDominatorTree *MDT = nullptr) const;
144 
145   void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst,
146                           MachineDominatorTree *MDT) const;
147 
148   void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst,
149                              MachineDominatorTree *MDT) const;
150 
151   void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst,
152                             MachineDominatorTree *MDT = nullptr) const;
153 
154   void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
155                             MachineInstr &Inst) const;
156   void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
157   void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
158                                unsigned Opcode,
159                                MachineDominatorTree *MDT = nullptr) const;
160   void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
161                       MachineInstr &Inst) const;
162 
163   void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
164                                     SIInstrWorklist &Worklist) const;
165 
166   void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
167                                     MachineInstr &SCCDefInst,
168                                     SIInstrWorklist &Worklist,
169                                     Register NewCond = Register()) const;
170   void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
171                                 SIInstrWorklist &Worklist) const;
172 
173   const TargetRegisterClass *
174   getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
175 
176   bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
177                                     const MachineInstr &MIb) const;
178 
179   Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
180 
181 protected:
182   /// If the specific machine instruction is a instruction that moves/copies
183   /// value from one register to another register return destination and source
184   /// registers as machine operands.
185   std::optional<DestSourcePair>
186   isCopyInstrImpl(const MachineInstr &MI) const override;
187 
188   bool swapSourceModifiers(MachineInstr &MI,
189                            MachineOperand &Src0, unsigned Src0OpName,
190                            MachineOperand &Src1, unsigned Src1OpName) const;
191 
192   MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
193                                        unsigned OpIdx0,
194                                        unsigned OpIdx1) const override;
195 
196 public:
197   enum TargetOperandFlags {
198     MO_MASK = 0xf,
199 
200     MO_NONE = 0,
201     // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
202     MO_GOTPCREL = 1,
203     // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
204     MO_GOTPCREL32 = 2,
205     MO_GOTPCREL32_LO = 2,
206     // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
207     MO_GOTPCREL32_HI = 3,
208     // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
209     MO_REL32 = 4,
210     MO_REL32_LO = 4,
211     // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
212     MO_REL32_HI = 5,
213 
214     MO_FAR_BRANCH_OFFSET = 6,
215 
216     MO_ABS32_LO = 8,
217     MO_ABS32_HI = 9,
218   };
219 
220   explicit SIInstrInfo(const GCNSubtarget &ST);
221 
222   const SIRegisterInfo &getRegisterInfo() const {
223     return RI;
224   }
225 
226   const GCNSubtarget &getSubtarget() const {
227     return ST;
228   }
229 
230   bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
231 
232   bool isIgnorableUse(const MachineOperand &MO) const override;
233 
234   bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo,
235                     MachineCycleInfo *CI) const override;
236 
237   bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
238                                int64_t &Offset1) const override;
239 
240   bool getMemOperandsWithOffsetWidth(
241       const MachineInstr &LdSt,
242       SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
243       bool &OffsetIsScalable, LocationSize &Width,
244       const TargetRegisterInfo *TRI) const final;
245 
246   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
247                            int64_t Offset1, bool OffsetIsScalable1,
248                            ArrayRef<const MachineOperand *> BaseOps2,
249                            int64_t Offset2, bool OffsetIsScalable2,
250                            unsigned ClusterSize,
251                            unsigned NumBytes) const override;
252 
253   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
254                                int64_t Offset1, unsigned NumLoads) const override;
255 
256   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
257                    const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
258                    bool KillSrc) const override;
259 
260   void materializeImmediate(MachineBasicBlock &MBB,
261                             MachineBasicBlock::iterator MI, const DebugLoc &DL,
262                             Register DestReg, int64_t Value) const;
263 
264   const TargetRegisterClass *getPreferredSelectRegClass(
265                                unsigned Size) const;
266 
267   Register insertNE(MachineBasicBlock *MBB,
268                     MachineBasicBlock::iterator I, const DebugLoc &DL,
269                     Register SrcReg, int Value) const;
270 
271   Register insertEQ(MachineBasicBlock *MBB,
272                     MachineBasicBlock::iterator I, const DebugLoc &DL,
273                     Register SrcReg, int Value)  const;
274 
275   void storeRegToStackSlot(MachineBasicBlock &MBB,
276                            MachineBasicBlock::iterator MI, Register SrcReg,
277                            bool isKill, int FrameIndex,
278                            const TargetRegisterClass *RC,
279                            const TargetRegisterInfo *TRI,
280                            Register VReg) const override;
281 
282   void loadRegFromStackSlot(MachineBasicBlock &MBB,
283                             MachineBasicBlock::iterator MI, Register DestReg,
284                             int FrameIndex, const TargetRegisterClass *RC,
285                             const TargetRegisterInfo *TRI,
286                             Register VReg) const override;
287 
288   bool expandPostRAPseudo(MachineInstr &MI) const override;
289 
290   void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
291                      Register DestReg, unsigned SubIdx,
292                      const MachineInstr &Orig,
293                      const TargetRegisterInfo &TRI) const override;
294 
295   // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
296   // instructions. Returns a pair of generated instructions.
297   // Can split either post-RA with physical registers or pre-RA with
298   // virtual registers. In latter case IR needs to be in SSA form and
299   // and a REG_SEQUENCE is produced to define original register.
300   std::pair<MachineInstr*, MachineInstr*>
301   expandMovDPP64(MachineInstr &MI) const;
302 
303   // Returns an opcode that can be used to move a value to a \p DstRC
304   // register.  If there is no hardware instruction that can store to \p
305   // DstRC, then AMDGPU::COPY is returned.
306   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
307 
308   const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
309                                                      unsigned EltSize,
310                                                      bool IsSGPR) const;
311 
312   const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
313                                              bool IsIndirectSrc) const;
314   LLVM_READONLY
315   int commuteOpcode(unsigned Opc) const;
316 
317   LLVM_READONLY
318   inline int commuteOpcode(const MachineInstr &MI) const {
319     return commuteOpcode(MI.getOpcode());
320   }
321 
322   bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0,
323                              unsigned &SrcOpIdx1) const override;
324 
325   bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0,
326                              unsigned &SrcOpIdx1) const;
327 
328   bool isBranchOffsetInRange(unsigned BranchOpc,
329                              int64_t BrOffset) const override;
330 
331   MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
332 
333   /// Return whether the block terminate with divergent branch.
334   /// Note this only work before lowering the pseudo control flow instructions.
335   bool hasDivergentBranch(const MachineBasicBlock *MBB) const;
336 
337   void insertIndirectBranch(MachineBasicBlock &MBB,
338                             MachineBasicBlock &NewDestBB,
339                             MachineBasicBlock &RestoreBB, const DebugLoc &DL,
340                             int64_t BrOffset, RegScavenger *RS) const override;
341 
342   bool analyzeBranchImpl(MachineBasicBlock &MBB,
343                          MachineBasicBlock::iterator I,
344                          MachineBasicBlock *&TBB,
345                          MachineBasicBlock *&FBB,
346                          SmallVectorImpl<MachineOperand> &Cond,
347                          bool AllowModify) const;
348 
349   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
350                      MachineBasicBlock *&FBB,
351                      SmallVectorImpl<MachineOperand> &Cond,
352                      bool AllowModify = false) const override;
353 
354   unsigned removeBranch(MachineBasicBlock &MBB,
355                         int *BytesRemoved = nullptr) const override;
356 
357   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
358                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
359                         const DebugLoc &DL,
360                         int *BytesAdded = nullptr) const override;
361 
362   bool reverseBranchCondition(
363     SmallVectorImpl<MachineOperand> &Cond) const override;
364 
365   bool canInsertSelect(const MachineBasicBlock &MBB,
366                        ArrayRef<MachineOperand> Cond, Register DstReg,
367                        Register TrueReg, Register FalseReg, int &CondCycles,
368                        int &TrueCycles, int &FalseCycles) const override;
369 
370   void insertSelect(MachineBasicBlock &MBB,
371                     MachineBasicBlock::iterator I, const DebugLoc &DL,
372                     Register DstReg, ArrayRef<MachineOperand> Cond,
373                     Register TrueReg, Register FalseReg) const override;
374 
375   void insertVectorSelect(MachineBasicBlock &MBB,
376                           MachineBasicBlock::iterator I, const DebugLoc &DL,
377                           Register DstReg, ArrayRef<MachineOperand> Cond,
378                           Register TrueReg, Register FalseReg) const;
379 
380   bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
381                       Register &SrcReg2, int64_t &CmpMask,
382                       int64_t &CmpValue) const override;
383 
384   bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
385                             Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
386                             const MachineRegisterInfo *MRI) const override;
387 
388   bool
389   areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
390                                   const MachineInstr &MIb) const override;
391 
392   static bool isFoldableCopy(const MachineInstr &MI);
393 
394   void removeModOperands(MachineInstr &MI) const;
395 
396   bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
397                      MachineRegisterInfo *MRI) const final;
398 
399   unsigned getMachineCSELookAheadLimit() const override { return 500; }
400 
401   MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
402                                       LiveIntervals *LIS) const override;
403 
404   bool isSchedulingBoundary(const MachineInstr &MI,
405                             const MachineBasicBlock *MBB,
406                             const MachineFunction &MF) const override;
407 
408   static bool isSALU(const MachineInstr &MI) {
409     return MI.getDesc().TSFlags & SIInstrFlags::SALU;
410   }
411 
412   bool isSALU(uint16_t Opcode) const {
413     return get(Opcode).TSFlags & SIInstrFlags::SALU;
414   }
415 
416   static bool isVALU(const MachineInstr &MI) {
417     return MI.getDesc().TSFlags & SIInstrFlags::VALU;
418   }
419 
420   bool isVALU(uint16_t Opcode) const {
421     return get(Opcode).TSFlags & SIInstrFlags::VALU;
422   }
423 
424   static bool isImage(const MachineInstr &MI) {
425     return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI);
426   }
427 
428   bool isImage(uint16_t Opcode) const {
429     return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode);
430   }
431 
432   static bool isVMEM(const MachineInstr &MI) {
433     return isMUBUF(MI) || isMTBUF(MI) || isImage(MI);
434   }
435 
436   bool isVMEM(uint16_t Opcode) const {
437     return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode);
438   }
439 
440   static bool isSOP1(const MachineInstr &MI) {
441     return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
442   }
443 
444   bool isSOP1(uint16_t Opcode) const {
445     return get(Opcode).TSFlags & SIInstrFlags::SOP1;
446   }
447 
448   static bool isSOP2(const MachineInstr &MI) {
449     return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
450   }
451 
452   bool isSOP2(uint16_t Opcode) const {
453     return get(Opcode).TSFlags & SIInstrFlags::SOP2;
454   }
455 
456   static bool isSOPC(const MachineInstr &MI) {
457     return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
458   }
459 
460   bool isSOPC(uint16_t Opcode) const {
461     return get(Opcode).TSFlags & SIInstrFlags::SOPC;
462   }
463 
464   static bool isSOPK(const MachineInstr &MI) {
465     return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
466   }
467 
468   bool isSOPK(uint16_t Opcode) const {
469     return get(Opcode).TSFlags & SIInstrFlags::SOPK;
470   }
471 
472   static bool isSOPP(const MachineInstr &MI) {
473     return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
474   }
475 
476   bool isSOPP(uint16_t Opcode) const {
477     return get(Opcode).TSFlags & SIInstrFlags::SOPP;
478   }
479 
480   static bool isPacked(const MachineInstr &MI) {
481     return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
482   }
483 
484   bool isPacked(uint16_t Opcode) const {
485     return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
486   }
487 
488   static bool isVOP1(const MachineInstr &MI) {
489     return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
490   }
491 
492   bool isVOP1(uint16_t Opcode) const {
493     return get(Opcode).TSFlags & SIInstrFlags::VOP1;
494   }
495 
496   static bool isVOP2(const MachineInstr &MI) {
497     return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
498   }
499 
500   bool isVOP2(uint16_t Opcode) const {
501     return get(Opcode).TSFlags & SIInstrFlags::VOP2;
502   }
503 
504   static bool isVOP3(const MachineInstr &MI) {
505     return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
506   }
507 
508   bool isVOP3(uint16_t Opcode) const {
509     return get(Opcode).TSFlags & SIInstrFlags::VOP3;
510   }
511 
512   static bool isSDWA(const MachineInstr &MI) {
513     return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
514   }
515 
516   bool isSDWA(uint16_t Opcode) const {
517     return get(Opcode).TSFlags & SIInstrFlags::SDWA;
518   }
519 
520   static bool isVOPC(const MachineInstr &MI) {
521     return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
522   }
523 
524   bool isVOPC(uint16_t Opcode) const {
525     return get(Opcode).TSFlags & SIInstrFlags::VOPC;
526   }
527 
528   static bool isMUBUF(const MachineInstr &MI) {
529     return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
530   }
531 
532   bool isMUBUF(uint16_t Opcode) const {
533     return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
534   }
535 
536   static bool isMTBUF(const MachineInstr &MI) {
537     return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
538   }
539 
540   bool isMTBUF(uint16_t Opcode) const {
541     return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
542   }
543 
544   static bool isSMRD(const MachineInstr &MI) {
545     return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
546   }
547 
548   bool isSMRD(uint16_t Opcode) const {
549     return get(Opcode).TSFlags & SIInstrFlags::SMRD;
550   }
551 
552   bool isBufferSMRD(const MachineInstr &MI) const;
553 
554   static bool isDS(const MachineInstr &MI) {
555     return MI.getDesc().TSFlags & SIInstrFlags::DS;
556   }
557 
558   bool isDS(uint16_t Opcode) const {
559     return get(Opcode).TSFlags & SIInstrFlags::DS;
560   }
561 
562   static bool isLDSDMA(const MachineInstr &MI) {
563     return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI));
564   }
565 
566   bool isLDSDMA(uint16_t Opcode) {
567     return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode));
568   }
569 
570   static bool isGWS(const MachineInstr &MI) {
571     return MI.getDesc().TSFlags & SIInstrFlags::GWS;
572   }
573 
574   bool isGWS(uint16_t Opcode) const {
575     return get(Opcode).TSFlags & SIInstrFlags::GWS;
576   }
577 
578   bool isAlwaysGDS(uint16_t Opcode) const;
579 
580   static bool isMIMG(const MachineInstr &MI) {
581     return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
582   }
583 
584   bool isMIMG(uint16_t Opcode) const {
585     return get(Opcode).TSFlags & SIInstrFlags::MIMG;
586   }
587 
588   static bool isVIMAGE(const MachineInstr &MI) {
589     return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE;
590   }
591 
592   bool isVIMAGE(uint16_t Opcode) const {
593     return get(Opcode).TSFlags & SIInstrFlags::VIMAGE;
594   }
595 
596   static bool isVSAMPLE(const MachineInstr &MI) {
597     return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE;
598   }
599 
600   bool isVSAMPLE(uint16_t Opcode) const {
601     return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE;
602   }
603 
604   static bool isGather4(const MachineInstr &MI) {
605     return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
606   }
607 
608   bool isGather4(uint16_t Opcode) const {
609     return get(Opcode).TSFlags & SIInstrFlags::Gather4;
610   }
611 
612   static bool isFLAT(const MachineInstr &MI) {
613     return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
614   }
615 
616   // Is a FLAT encoded instruction which accesses a specific segment,
617   // i.e. global_* or scratch_*.
618   static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
619     auto Flags = MI.getDesc().TSFlags;
620     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
621   }
622 
623   bool isSegmentSpecificFLAT(uint16_t Opcode) const {
624     auto Flags = get(Opcode).TSFlags;
625     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
626   }
627 
628   static bool isFLATGlobal(const MachineInstr &MI) {
629     return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal;
630   }
631 
632   bool isFLATGlobal(uint16_t Opcode) const {
633     return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal;
634   }
635 
636   static bool isFLATScratch(const MachineInstr &MI) {
637     return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch;
638   }
639 
640   bool isFLATScratch(uint16_t Opcode) const {
641     return get(Opcode).TSFlags & SIInstrFlags::FlatScratch;
642   }
643 
644   // Any FLAT encoded instruction, including global_* and scratch_*.
645   bool isFLAT(uint16_t Opcode) const {
646     return get(Opcode).TSFlags & SIInstrFlags::FLAT;
647   }
648 
649   static bool isEXP(const MachineInstr &MI) {
650     return MI.getDesc().TSFlags & SIInstrFlags::EXP;
651   }
652 
653   static bool isDualSourceBlendEXP(const MachineInstr &MI) {
654     if (!isEXP(MI))
655       return false;
656     unsigned Target = MI.getOperand(0).getImm();
657     return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 ||
658            Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
659   }
660 
661   bool isEXP(uint16_t Opcode) const {
662     return get(Opcode).TSFlags & SIInstrFlags::EXP;
663   }
664 
665   static bool isAtomicNoRet(const MachineInstr &MI) {
666     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
667   }
668 
669   bool isAtomicNoRet(uint16_t Opcode) const {
670     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
671   }
672 
673   static bool isAtomicRet(const MachineInstr &MI) {
674     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
675   }
676 
677   bool isAtomicRet(uint16_t Opcode) const {
678     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
679   }
680 
681   static bool isAtomic(const MachineInstr &MI) {
682     return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
683                                    SIInstrFlags::IsAtomicNoRet);
684   }
685 
686   bool isAtomic(uint16_t Opcode) const {
687     return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
688                                   SIInstrFlags::IsAtomicNoRet);
689   }
690 
691   static bool mayWriteLDSThroughDMA(const MachineInstr &MI) {
692     return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD;
693   }
694 
695   static bool isWQM(const MachineInstr &MI) {
696     return MI.getDesc().TSFlags & SIInstrFlags::WQM;
697   }
698 
699   bool isWQM(uint16_t Opcode) const {
700     return get(Opcode).TSFlags & SIInstrFlags::WQM;
701   }
702 
703   static bool isDisableWQM(const MachineInstr &MI) {
704     return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
705   }
706 
707   bool isDisableWQM(uint16_t Opcode) const {
708     return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
709   }
710 
711   // SI_SPILL_S32_TO_VGPR and SI_RESTORE_S32_FROM_VGPR form a special case of
712   // SGPRs spilling to VGPRs which are SGPR spills but from VALU instructions
713   // therefore we need an explicit check for them since just checking if the
714   // Spill bit is set and what instruction type it came from misclassifies
715   // them.
716   static bool isVGPRSpill(const MachineInstr &MI) {
717     return MI.getOpcode() != AMDGPU::SI_SPILL_S32_TO_VGPR &&
718            MI.getOpcode() != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
719            (isSpill(MI) && isVALU(MI));
720   }
721 
722   bool isVGPRSpill(uint16_t Opcode) const {
723     return Opcode != AMDGPU::SI_SPILL_S32_TO_VGPR &&
724            Opcode != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
725            (isSpill(Opcode) && isVALU(Opcode));
726   }
727 
728   static bool isSGPRSpill(const MachineInstr &MI) {
729     return MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR ||
730            MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
731            (isSpill(MI) && isSALU(MI));
732   }
733 
734   bool isSGPRSpill(uint16_t Opcode) const {
735     return Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR ||
736            Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
737            (isSpill(Opcode) && isSALU(Opcode));
738   }
739 
740   bool isSpill(uint16_t Opcode) const {
741     return get(Opcode).TSFlags & SIInstrFlags::Spill;
742   }
743 
744   static bool isSpill(const MachineInstr &MI) {
745     return MI.getDesc().TSFlags & SIInstrFlags::Spill;
746   }
747 
748   static bool isWWMRegSpillOpcode(uint16_t Opcode) {
749     return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
750            Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
751            Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE ||
752            Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
753   }
754 
755   static bool isChainCallOpcode(uint64_t Opcode) {
756     return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 ||
757            Opcode == AMDGPU::SI_CS_CHAIN_TC_W64;
758   }
759 
760   static bool isDPP(const MachineInstr &MI) {
761     return MI.getDesc().TSFlags & SIInstrFlags::DPP;
762   }
763 
764   bool isDPP(uint16_t Opcode) const {
765     return get(Opcode).TSFlags & SIInstrFlags::DPP;
766   }
767 
768   static bool isTRANS(const MachineInstr &MI) {
769     return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
770   }
771 
772   bool isTRANS(uint16_t Opcode) const {
773     return get(Opcode).TSFlags & SIInstrFlags::TRANS;
774   }
775 
776   static bool isVOP3P(const MachineInstr &MI) {
777     return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
778   }
779 
780   bool isVOP3P(uint16_t Opcode) const {
781     return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
782   }
783 
784   static bool isVINTRP(const MachineInstr &MI) {
785     return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
786   }
787 
788   bool isVINTRP(uint16_t Opcode) const {
789     return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
790   }
791 
792   static bool isMAI(const MachineInstr &MI) {
793     return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
794   }
795 
796   bool isMAI(uint16_t Opcode) const {
797     return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
798   }
799 
800   static bool isMFMA(const MachineInstr &MI) {
801     return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
802            MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
803   }
804 
805   static bool isDOT(const MachineInstr &MI) {
806     return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
807   }
808 
809   static bool isWMMA(const MachineInstr &MI) {
810     return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
811   }
812 
813   bool isWMMA(uint16_t Opcode) const {
814     return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
815   }
816 
817   static bool isMFMAorWMMA(const MachineInstr &MI) {
818     return isMFMA(MI) || isWMMA(MI) || isSWMMAC(MI);
819   }
820 
821   static bool isSWMMAC(const MachineInstr &MI) {
822     return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC;
823   }
824 
825   bool isSWMMAC(uint16_t Opcode) const {
826     return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC;
827   }
828 
829   bool isDOT(uint16_t Opcode) const {
830     return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
831   }
832 
833   static bool isLDSDIR(const MachineInstr &MI) {
834     return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
835   }
836 
837   bool isLDSDIR(uint16_t Opcode) const {
838     return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
839   }
840 
841   static bool isVINTERP(const MachineInstr &MI) {
842     return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
843   }
844 
845   bool isVINTERP(uint16_t Opcode) const {
846     return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
847   }
848 
849   static bool isScalarUnit(const MachineInstr &MI) {
850     return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
851   }
852 
853   static bool usesVM_CNT(const MachineInstr &MI) {
854     return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
855   }
856 
857   static bool usesLGKM_CNT(const MachineInstr &MI) {
858     return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
859   }
860 
861   // Most sopk treat the immediate as a signed 16-bit, however some
862   // use it as unsigned.
863   static bool sopkIsZext(unsigned Opcode) {
864     return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 ||
865            Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 ||
866            Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 ||
867            Opcode == AMDGPU::S_GETREG_B32;
868   }
869 
870   /// \returns true if this is an s_store_dword* instruction. This is more
871   /// specific than isSMEM && mayStore.
872   static bool isScalarStore(const MachineInstr &MI) {
873     return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
874   }
875 
876   bool isScalarStore(uint16_t Opcode) const {
877     return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
878   }
879 
880   static bool isFixedSize(const MachineInstr &MI) {
881     return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
882   }
883 
884   bool isFixedSize(uint16_t Opcode) const {
885     return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
886   }
887 
888   static bool hasFPClamp(const MachineInstr &MI) {
889     return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
890   }
891 
892   bool hasFPClamp(uint16_t Opcode) const {
893     return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
894   }
895 
896   static bool hasIntClamp(const MachineInstr &MI) {
897     return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
898   }
899 
900   uint64_t getClampMask(const MachineInstr &MI) const {
901     const uint64_t ClampFlags = SIInstrFlags::FPClamp |
902                                 SIInstrFlags::IntClamp |
903                                 SIInstrFlags::ClampLo |
904                                 SIInstrFlags::ClampHi;
905       return MI.getDesc().TSFlags & ClampFlags;
906   }
907 
908   static bool usesFPDPRounding(const MachineInstr &MI) {
909     return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
910   }
911 
912   bool usesFPDPRounding(uint16_t Opcode) const {
913     return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
914   }
915 
916   static bool isFPAtomic(const MachineInstr &MI) {
917     return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
918   }
919 
920   bool isFPAtomic(uint16_t Opcode) const {
921     return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
922   }
923 
924   static bool isNeverUniform(const MachineInstr &MI) {
925     return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
926   }
927 
928   // Check to see if opcode is for a barrier start. Pre gfx12 this is just the
929   // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want
930   // to check for the barrier start (S_BARRIER_SIGNAL*)
931   bool isBarrierStart(unsigned Opcode) const {
932     return Opcode == AMDGPU::S_BARRIER ||
933            Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 ||
934            Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 ||
935            Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM ||
936            Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
937   }
938 
939   bool isBarrier(unsigned Opcode) const {
940     return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
941            Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
942            Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
943            Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||
944            Opcode == AMDGPU::S_BARRIER_LEAVE ||
945            Opcode == AMDGPU::DS_GWS_INIT ||
946            Opcode == AMDGPU::DS_GWS_BARRIER;
947   }
948 
949   static bool isF16PseudoScalarTrans(unsigned Opcode) {
950     return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
951            Opcode == AMDGPU::V_S_LOG_F16_e64 ||
952            Opcode == AMDGPU::V_S_RCP_F16_e64 ||
953            Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
954            Opcode == AMDGPU::V_S_SQRT_F16_e64;
955   }
956 
957   static bool doesNotReadTiedSource(const MachineInstr &MI) {
958     return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
959   }
960 
961   bool doesNotReadTiedSource(uint16_t Opcode) const {
962     return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
963   }
964 
965   static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
966     switch (Opcode) {
967     case AMDGPU::S_WAITCNT_soft:
968       return AMDGPU::S_WAITCNT;
969     case AMDGPU::S_WAITCNT_VSCNT_soft:
970       return AMDGPU::S_WAITCNT_VSCNT;
971     case AMDGPU::S_WAIT_LOADCNT_soft:
972       return AMDGPU::S_WAIT_LOADCNT;
973     case AMDGPU::S_WAIT_STORECNT_soft:
974       return AMDGPU::S_WAIT_STORECNT;
975     case AMDGPU::S_WAIT_SAMPLECNT_soft:
976       return AMDGPU::S_WAIT_SAMPLECNT;
977     case AMDGPU::S_WAIT_BVHCNT_soft:
978       return AMDGPU::S_WAIT_BVHCNT;
979     case AMDGPU::S_WAIT_DSCNT_soft:
980       return AMDGPU::S_WAIT_DSCNT;
981     case AMDGPU::S_WAIT_KMCNT_soft:
982       return AMDGPU::S_WAIT_KMCNT;
983     default:
984       return Opcode;
985     }
986   }
987 
988   bool isWaitcnt(unsigned Opcode) const {
989     switch (getNonSoftWaitcntOpcode(Opcode)) {
990     case AMDGPU::S_WAITCNT:
991     case AMDGPU::S_WAITCNT_VSCNT:
992     case AMDGPU::S_WAITCNT_VMCNT:
993     case AMDGPU::S_WAITCNT_EXPCNT:
994     case AMDGPU::S_WAITCNT_LGKMCNT:
995     case AMDGPU::S_WAIT_LOADCNT:
996     case AMDGPU::S_WAIT_LOADCNT_DSCNT:
997     case AMDGPU::S_WAIT_STORECNT:
998     case AMDGPU::S_WAIT_STORECNT_DSCNT:
999     case AMDGPU::S_WAIT_SAMPLECNT:
1000     case AMDGPU::S_WAIT_BVHCNT:
1001     case AMDGPU::S_WAIT_EXPCNT:
1002     case AMDGPU::S_WAIT_DSCNT:
1003     case AMDGPU::S_WAIT_KMCNT:
1004     case AMDGPU::S_WAIT_IDLE:
1005       return true;
1006     default:
1007       return false;
1008     }
1009   }
1010 
1011   bool isVGPRCopy(const MachineInstr &MI) const {
1012     assert(isCopyInstr(MI));
1013     Register Dest = MI.getOperand(0).getReg();
1014     const MachineFunction &MF = *MI.getParent()->getParent();
1015     const MachineRegisterInfo &MRI = MF.getRegInfo();
1016     return !RI.isSGPRReg(MRI, Dest);
1017   }
1018 
1019   bool hasVGPRUses(const MachineInstr &MI) const {
1020     const MachineFunction &MF = *MI.getParent()->getParent();
1021     const MachineRegisterInfo &MRI = MF.getRegInfo();
1022     return llvm::any_of(MI.explicit_uses(),
1023                         [&MRI, this](const MachineOperand &MO) {
1024       return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
1025   }
1026 
1027   /// Return true if the instruction modifies the mode register.q
1028   static bool modifiesModeRegister(const MachineInstr &MI);
1029 
1030   /// This function is used to determine if an instruction can be safely
1031   /// executed under EXEC = 0 without hardware error, indeterminate results,
1032   /// and/or visible effects on future vector execution or outside the shader.
1033   /// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is
1034   /// used in removing branches over short EXEC = 0 sequences.
1035   /// As such it embeds certain assumptions which may not apply to every case
1036   /// of EXEC = 0 execution.
1037   bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
1038 
1039   /// Returns true if the instruction could potentially depend on the value of
1040   /// exec. If false, exec dependencies may safely be ignored.
1041   bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
1042 
1043   bool isInlineConstant(const APInt &Imm) const;
1044 
1045   bool isInlineConstant(const APFloat &Imm) const;
1046 
1047   // Returns true if this non-register operand definitely does not need to be
1048   // encoded as a 32-bit literal. Note that this function handles all kinds of
1049   // operands, not just immediates.
1050   //
1051   // Some operands like FrameIndexes could resolve to an inline immediate value
1052   // that will not require an additional 4-bytes; this function assumes that it
1053   // will.
1054   bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
1055 
1056   bool isInlineConstant(const MachineOperand &MO,
1057                         const MCOperandInfo &OpInfo) const {
1058     return isInlineConstant(MO, OpInfo.OperandType);
1059   }
1060 
1061   /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
1062   /// be an inline immediate.
1063   bool isInlineConstant(const MachineInstr &MI,
1064                         const MachineOperand &UseMO,
1065                         const MachineOperand &DefMO) const {
1066     assert(UseMO.getParent() == &MI);
1067     int OpIdx = UseMO.getOperandNo();
1068     if (OpIdx >= MI.getDesc().NumOperands)
1069       return false;
1070 
1071     return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]);
1072   }
1073 
1074   /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
1075   /// immediate.
1076   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
1077     const MachineOperand &MO = MI.getOperand(OpIdx);
1078     return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType);
1079   }
1080 
1081   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
1082                         const MachineOperand &MO) const {
1083     if (OpIdx >= MI.getDesc().NumOperands)
1084       return false;
1085 
1086     if (isCopyInstr(MI)) {
1087       unsigned Size = getOpSize(MI, OpIdx);
1088       assert(Size == 8 || Size == 4);
1089 
1090       uint8_t OpType = (Size == 8) ?
1091         AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
1092       return isInlineConstant(MO, OpType);
1093     }
1094 
1095     return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType);
1096   }
1097 
1098   bool isInlineConstant(const MachineOperand &MO) const {
1099     return isInlineConstant(*MO.getParent(), MO.getOperandNo());
1100   }
1101 
1102   bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
1103                          const MachineOperand &MO) const;
1104 
1105   /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
1106   /// This function will return false if you pass it a 32-bit instruction.
1107   bool hasVALU32BitEncoding(unsigned Opcode) const;
1108 
1109   /// Returns true if this operand uses the constant bus.
1110   bool usesConstantBus(const MachineRegisterInfo &MRI,
1111                        const MachineOperand &MO,
1112                        const MCOperandInfo &OpInfo) const;
1113 
1114   /// Return true if this instruction has any modifiers.
1115   ///  e.g. src[012]_mod, omod, clamp.
1116   bool hasModifiers(unsigned Opcode) const;
1117 
1118   bool hasModifiersSet(const MachineInstr &MI,
1119                        unsigned OpName) const;
1120   bool hasAnyModifiersSet(const MachineInstr &MI) const;
1121 
1122   bool canShrink(const MachineInstr &MI,
1123                  const MachineRegisterInfo &MRI) const;
1124 
1125   MachineInstr *buildShrunkInst(MachineInstr &MI,
1126                                 unsigned NewOpcode) const;
1127 
1128   bool verifyInstruction(const MachineInstr &MI,
1129                          StringRef &ErrInfo) const override;
1130 
1131   unsigned getVALUOp(const MachineInstr &MI) const;
1132 
1133   void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
1134                              MachineBasicBlock::iterator MBBI,
1135                              const DebugLoc &DL, Register Reg, bool IsSCCLive,
1136                              SlotIndexes *Indexes = nullptr) const;
1137 
1138   void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
1139                    MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1140                    Register Reg, SlotIndexes *Indexes = nullptr) const;
1141 
1142   /// Return the correct register class for \p OpNo.  For target-specific
1143   /// instructions, this will return the register class that has been defined
1144   /// in tablegen.  For generic instructions, like REG_SEQUENCE it will return
1145   /// the register class of its machine operand.
1146   /// to infer the correct register class base on the other operands.
1147   const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
1148                                            unsigned OpNo) const;
1149 
1150   /// Return the size in bytes of the operand OpNo on the given
1151   // instruction opcode.
1152   unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
1153     const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo];
1154 
1155     if (OpInfo.RegClass == -1) {
1156       // If this is an immediate operand, this must be a 32-bit literal.
1157       assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
1158       return 4;
1159     }
1160 
1161     return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
1162   }
1163 
1164   /// This form should usually be preferred since it handles operands
1165   /// with unknown register classes.
1166   unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
1167     const MachineOperand &MO = MI.getOperand(OpNo);
1168     if (MO.isReg()) {
1169       if (unsigned SubReg = MO.getSubReg()) {
1170         return RI.getSubRegIdxSize(SubReg) / 8;
1171       }
1172     }
1173     return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
1174   }
1175 
1176   /// Legalize the \p OpIndex operand of this instruction by inserting
1177   /// a MOV.  For example:
1178   /// ADD_I32_e32 VGPR0, 15
1179   /// to
1180   /// MOV VGPR1, 15
1181   /// ADD_I32_e32 VGPR0, VGPR1
1182   ///
1183   /// If the operand being legalized is a register, then a COPY will be used
1184   /// instead of MOV.
1185   void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
1186 
1187   /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
1188   /// for \p MI.
1189   bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
1190                       const MachineOperand *MO = nullptr) const;
1191 
1192   /// Check if \p MO would be a valid operand for the given operand
1193   /// definition \p OpInfo. Note this does not attempt to validate constant bus
1194   /// restrictions (e.g. literal constant usage).
1195   bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
1196                           const MCOperandInfo &OpInfo,
1197                           const MachineOperand &MO) const;
1198 
1199   /// Check if \p MO (a register operand) is a legal register for the
1200   /// given operand description.
1201   bool isLegalRegOperand(const MachineRegisterInfo &MRI,
1202                          const MCOperandInfo &OpInfo,
1203                          const MachineOperand &MO) const;
1204 
1205   /// Legalize operands in \p MI by either commuting it or inserting a
1206   /// copy of src1.
1207   void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1208 
1209   /// Fix operands in \p MI to satisfy constant bus requirements.
1210   void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1211 
1212   /// Copy a value from a VGPR (\p SrcReg) to SGPR.  This function can only
1213   /// be used when it is know that the value in SrcReg is same across all
1214   /// threads in the wave.
1215   /// \returns The SGPR register that \p SrcReg was copied to.
1216   Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
1217                               MachineRegisterInfo &MRI) const;
1218 
1219   void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1220   void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1221 
1222   void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
1223                               MachineBasicBlock::iterator I,
1224                               const TargetRegisterClass *DstRC,
1225                               MachineOperand &Op, MachineRegisterInfo &MRI,
1226                               const DebugLoc &DL) const;
1227 
1228   /// Legalize all operands in this instruction.  This function may create new
1229   /// instructions and control-flow around \p MI.  If present, \p MDT is
1230   /// updated.
1231   /// \returns A new basic block that contains \p MI if new blocks were created.
1232   MachineBasicBlock *
1233   legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
1234 
1235   /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
1236   /// was moved to VGPR. \returns true if succeeded.
1237   bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
1238 
1239   /// Replace the instructions opcode with the equivalent VALU
1240   /// opcode.  This function will also move the users of MachineInstruntions
1241   /// in the \p WorkList to the VALU if necessary. If present, \p MDT is
1242   /// updated.
1243   void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const;
1244 
1245   void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT,
1246                       MachineInstr &Inst) const;
1247 
1248   void insertNoop(MachineBasicBlock &MBB,
1249                   MachineBasicBlock::iterator MI) const override;
1250 
1251   void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1252                    unsigned Quantity) const override;
1253 
1254   void insertReturn(MachineBasicBlock &MBB) const;
1255 
1256   /// Build instructions that simulate the behavior of a `s_trap 2` instructions
1257   /// for hardware (namely, gfx11) that runs in PRIV=1 mode. There, s_trap is
1258   /// interpreted as a nop.
1259   MachineBasicBlock *insertSimulatedTrap(MachineRegisterInfo &MRI,
1260                                          MachineBasicBlock &MBB,
1261                                          MachineInstr &MI,
1262                                          const DebugLoc &DL) const;
1263 
1264   /// Return the number of wait states that result from executing this
1265   /// instruction.
1266   static unsigned getNumWaitStates(const MachineInstr &MI);
1267 
1268   /// Returns the operand named \p Op.  If \p MI does not have an
1269   /// operand named \c Op, this function returns nullptr.
1270   LLVM_READONLY
1271   MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
1272 
1273   LLVM_READONLY
1274   const MachineOperand *getNamedOperand(const MachineInstr &MI,
1275                                         unsigned OpName) const {
1276     return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
1277   }
1278 
1279   /// Get required immediate operand
1280   int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
1281     int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
1282     return MI.getOperand(Idx).getImm();
1283   }
1284 
1285   uint64_t getDefaultRsrcDataFormat() const;
1286   uint64_t getScratchRsrcWords23() const;
1287 
1288   bool isLowLatencyInstruction(const MachineInstr &MI) const;
1289   bool isHighLatencyDef(int Opc) const override;
1290 
1291   /// Return the descriptor of the target-specific machine instruction
1292   /// that corresponds to the specified pseudo or native opcode.
1293   const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
1294     return get(pseudoToMCOpcode(Opcode));
1295   }
1296 
1297   unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1298   unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1299 
1300   Register isLoadFromStackSlot(const MachineInstr &MI,
1301                                int &FrameIndex) const override;
1302   Register isStoreToStackSlot(const MachineInstr &MI,
1303                               int &FrameIndex) const override;
1304 
1305   unsigned getInstBundleSize(const MachineInstr &MI) const;
1306   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
1307 
1308   bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
1309 
1310   bool isNonUniformBranchInstr(MachineInstr &Instr) const;
1311 
1312   void convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
1313                                  MachineBasicBlock *IfEnd) const;
1314 
1315   void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
1316                                    MachineBasicBlock *LoopEnd) const;
1317 
1318   std::pair<unsigned, unsigned>
1319   decomposeMachineOperandsTargetFlags(unsigned TF) const override;
1320 
1321   ArrayRef<std::pair<int, const char *>>
1322   getSerializableTargetIndices() const override;
1323 
1324   ArrayRef<std::pair<unsigned, const char *>>
1325   getSerializableDirectMachineOperandTargetFlags() const override;
1326 
1327   ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
1328   getSerializableMachineMemOperandTargetFlags() const override;
1329 
1330   ScheduleHazardRecognizer *
1331   CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
1332                                  const ScheduleDAG *DAG) const override;
1333 
1334   ScheduleHazardRecognizer *
1335   CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
1336 
1337   ScheduleHazardRecognizer *
1338   CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
1339                                  const ScheduleDAGMI *DAG) const override;
1340 
1341   unsigned getLiveRangeSplitOpcode(Register Reg,
1342                                    const MachineFunction &MF) const override;
1343 
1344   bool isBasicBlockPrologue(const MachineInstr &MI,
1345                             Register Reg = Register()) const override;
1346 
1347   MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
1348                                          MachineBasicBlock::iterator InsPt,
1349                                          const DebugLoc &DL, Register Src,
1350                                          Register Dst) const override;
1351 
1352   MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
1353                                     MachineBasicBlock::iterator InsPt,
1354                                     const DebugLoc &DL, Register Src,
1355                                     unsigned SrcSubReg,
1356                                     Register Dst) const override;
1357 
1358   bool isWave32() const;
1359 
1360   /// Return a partially built integer add instruction without carry.
1361   /// Caller must add source operands.
1362   /// For pre-GFX9 it will generate unused carry destination operand.
1363   /// TODO: After GFX9 it should return a no-carry operation.
1364   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1365                                     MachineBasicBlock::iterator I,
1366                                     const DebugLoc &DL,
1367                                     Register DestReg) const;
1368 
1369   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1370                                     MachineBasicBlock::iterator I,
1371                                     const DebugLoc &DL,
1372                                     Register DestReg,
1373                                     RegScavenger &RS) const;
1374 
1375   static bool isKillTerminator(unsigned Opcode);
1376   const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
1377 
1378   bool isLegalMUBUFImmOffset(unsigned Imm) const;
1379 
1380   static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST);
1381 
1382   bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1383                         Align Alignment = Align(4)) const;
1384 
1385   /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1386   /// encoded instruction. If \p Signed, this is for an instruction that
1387   /// interprets the offset as signed.
1388   bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
1389                          uint64_t FlatVariant) const;
1390 
1391   /// Split \p COffsetVal into {immediate offset field, remainder offset}
1392   /// values.
1393   std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
1394                                               unsigned AddrSpace,
1395                                               uint64_t FlatVariant) const;
1396 
1397   /// Returns true if negative offsets are allowed for the given \p FlatVariant.
1398   bool allowNegativeFlatOffset(uint64_t FlatVariant) const;
1399 
1400   /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1401   /// Return -1 if the target-specific opcode for the pseudo instruction does
1402   /// not exist. If Opcode is not a pseudo instruction, this is identity.
1403   int pseudoToMCOpcode(int Opcode) const;
1404 
1405   /// \brief Check if this instruction should only be used by assembler.
1406   /// Return true if this opcode should not be used by codegen.
1407   bool isAsmOnlyOpcode(int MCOp) const;
1408 
1409   const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
1410                                          const TargetRegisterInfo *TRI,
1411                                          const MachineFunction &MF)
1412     const override;
1413 
1414   void fixImplicitOperands(MachineInstr &MI) const;
1415 
1416   MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
1417                                       ArrayRef<unsigned> Ops,
1418                                       MachineBasicBlock::iterator InsertPt,
1419                                       int FrameIndex,
1420                                       LiveIntervals *LIS = nullptr,
1421                                       VirtRegMap *VRM = nullptr) const override;
1422 
1423   unsigned getInstrLatency(const InstrItineraryData *ItinData,
1424                            const MachineInstr &MI,
1425                            unsigned *PredCost = nullptr) const override;
1426 
1427   InstructionUniformity
1428   getInstructionUniformity(const MachineInstr &MI) const override final;
1429 
1430   InstructionUniformity
1431   getGenericInstructionUniformity(const MachineInstr &MI) const;
1432 
1433   const MIRFormatter *getMIRFormatter() const override {
1434     if (!Formatter)
1435       Formatter = std::make_unique<AMDGPUMIRFormatter>();
1436     return Formatter.get();
1437   }
1438 
1439   static unsigned getDSShaderTypeValue(const MachineFunction &MF);
1440 
1441   const TargetSchedModel &getSchedModel() const { return SchedModel; }
1442 
1443   // Enforce operand's \p OpName even alignment if required by target.
1444   // This is used if an operand is a 32 bit register but needs to be aligned
1445   // regardless.
1446   void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const;
1447 };
1448 
1449 /// \brief Returns true if a reg:subreg pair P has a TRC class
1450 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
1451                          const TargetRegisterClass &TRC,
1452                          MachineRegisterInfo &MRI) {
1453   auto *RC = MRI.getRegClass(P.Reg);
1454   if (!P.SubReg)
1455     return RC == &TRC;
1456   auto *TRI = MRI.getTargetRegisterInfo();
1457   return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
1458 }
1459 
1460 /// \brief Create RegSubRegPair from a register MachineOperand
1461 inline
1462 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1463   assert(O.isReg());
1464   return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
1465 }
1466 
1467 /// \brief Return the SubReg component from REG_SEQUENCE
1468 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1469                                                     unsigned SubReg);
1470 
1471 /// \brief Return the defining instruction for a given reg:subreg pair
1472 /// skipping copy like instructions and subreg-manipulation pseudos.
1473 /// Following another subreg of a reg:subreg isn't supported.
1474 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1475                                MachineRegisterInfo &MRI);
1476 
1477 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1478 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1479 /// attempt to track between blocks.
1480 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1481                                 Register VReg,
1482                                 const MachineInstr &DefMI,
1483                                 const MachineInstr &UseMI);
1484 
1485 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1486 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1487 /// track between blocks.
1488 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1489                                    Register VReg,
1490                                    const MachineInstr &DefMI);
1491 
1492 namespace AMDGPU {
1493 
1494   LLVM_READONLY
1495   int getVOPe64(uint16_t Opcode);
1496 
1497   LLVM_READONLY
1498   int getVOPe32(uint16_t Opcode);
1499 
1500   LLVM_READONLY
1501   int getSDWAOp(uint16_t Opcode);
1502 
1503   LLVM_READONLY
1504   int getDPPOp32(uint16_t Opcode);
1505 
1506   LLVM_READONLY
1507   int getDPPOp64(uint16_t Opcode);
1508 
1509   LLVM_READONLY
1510   int getBasicFromSDWAOp(uint16_t Opcode);
1511 
1512   LLVM_READONLY
1513   int getCommuteRev(uint16_t Opcode);
1514 
1515   LLVM_READONLY
1516   int getCommuteOrig(uint16_t Opcode);
1517 
1518   LLVM_READONLY
1519   int getAddr64Inst(uint16_t Opcode);
1520 
1521   /// Check if \p Opcode is an Addr64 opcode.
1522   ///
1523   /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1524   LLVM_READONLY
1525   int getIfAddr64Inst(uint16_t Opcode);
1526 
1527   LLVM_READONLY
1528   int getSOPKOp(uint16_t Opcode);
1529 
1530   /// \returns SADDR form of a FLAT Global instruction given an \p Opcode
1531   /// of a VADDR form.
1532   LLVM_READONLY
1533   int getGlobalSaddrOp(uint16_t Opcode);
1534 
1535   /// \returns VADDR form of a FLAT Global instruction given an \p Opcode
1536   /// of a SADDR form.
1537   LLVM_READONLY
1538   int getGlobalVaddrOp(uint16_t Opcode);
1539 
1540   LLVM_READONLY
1541   int getVCMPXNoSDstOp(uint16_t Opcode);
1542 
1543   /// \returns ST form with only immediate offset of a FLAT Scratch instruction
1544   /// given an \p Opcode of an SS (SADDR) form.
1545   LLVM_READONLY
1546   int getFlatScratchInstSTfromSS(uint16_t Opcode);
1547 
1548   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1549   /// of an SVS (SADDR + VADDR) form.
1550   LLVM_READONLY
1551   int getFlatScratchInstSVfromSVS(uint16_t Opcode);
1552 
1553   /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
1554   /// of an SV (VADDR) form.
1555   LLVM_READONLY
1556   int getFlatScratchInstSSfromSV(uint16_t Opcode);
1557 
1558   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1559   /// of an SS (SADDR) form.
1560   LLVM_READONLY
1561   int getFlatScratchInstSVfromSS(uint16_t Opcode);
1562 
1563   /// \returns earlyclobber version of a MAC MFMA is exists.
1564   LLVM_READONLY
1565   int getMFMAEarlyClobberOp(uint16_t Opcode);
1566 
1567   /// \returns v_cmpx version of a v_cmp instruction.
1568   LLVM_READONLY
1569   int getVCMPXOpFromVCMP(uint16_t Opcode);
1570 
1571   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1572   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1573   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1574   const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1575 
1576 } // end namespace AMDGPU
1577 
1578 namespace AMDGPU {
1579 enum AsmComments {
1580   // For sgpr to vgpr spill instructions
1581   SGPR_SPILL = MachineInstr::TAsmComments
1582 };
1583 } // namespace AMDGPU
1584 
1585 namespace SI {
1586 namespace KernelInputOffsets {
1587 
1588 /// Offsets in bytes from the start of the input buffer
1589 enum Offsets {
1590   NGROUPS_X = 0,
1591   NGROUPS_Y = 4,
1592   NGROUPS_Z = 8,
1593   GLOBAL_SIZE_X = 12,
1594   GLOBAL_SIZE_Y = 16,
1595   GLOBAL_SIZE_Z = 20,
1596   LOCAL_SIZE_X = 24,
1597   LOCAL_SIZE_Y = 28,
1598   LOCAL_SIZE_Z = 32
1599 };
1600 
1601 } // end namespace KernelInputOffsets
1602 } // end namespace SI
1603 
1604 } // end namespace llvm
1605 
1606 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1607