xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIInstrInfo.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16 
17 #include "AMDGPUMIRFormatter.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIRegisterInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
24 
25 #define GET_INSTRINFO_HEADER
26 #include "AMDGPUGenInstrInfo.inc"
27 
28 namespace llvm {
29 
30 class APInt;
31 class GCNSubtarget;
32 class LiveVariables;
33 class MachineDominatorTree;
34 class MachineRegisterInfo;
35 class RegScavenger;
36 class TargetRegisterClass;
37 class ScheduleHazardRecognizer;
38 
39 /// Mark the MMO of a uniform load if there are no potentially clobbering stores
40 /// on any path from the start of an entry function to this load.
41 static const MachineMemOperand::Flags MONoClobber =
42     MachineMemOperand::MOTargetFlag1;
43 
44 /// Utility to store machine instructions worklist.
45 struct SIInstrWorklist {
46   SIInstrWorklist() : InstrList() {}
47 
48   void insert(MachineInstr *MI);
49 
50   MachineInstr *top() const {
51     auto iter = InstrList.begin();
52     return *iter;
53   }
54 
55   void erase_top() {
56     auto iter = InstrList.begin();
57     InstrList.erase(iter);
58   }
59 
60   bool empty() const { return InstrList.empty(); }
61 
62   void clear() {
63     InstrList.clear();
64     DeferredList.clear();
65   }
66 
67   bool isDeferred(MachineInstr *MI);
68 
69   SetVector<MachineInstr *> &getDeferredList() { return DeferredList; }
70 
71 private:
72   /// InstrList contains the MachineInstrs.
73   SetVector<MachineInstr *> InstrList;
74   /// Deferred instructions are specific MachineInstr
75   /// that will be added by insert method.
76   SetVector<MachineInstr *> DeferredList;
77 };
78 
79 class SIInstrInfo final : public AMDGPUGenInstrInfo {
80 private:
81   const SIRegisterInfo RI;
82   const GCNSubtarget &ST;
83   TargetSchedModel SchedModel;
84   mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
85 
86   // The inverse predicate should have the negative value.
87   enum BranchPredicate {
88     INVALID_BR = 0,
89     SCC_TRUE = 1,
90     SCC_FALSE = -1,
91     VCCNZ = 2,
92     VCCZ = -2,
93     EXECNZ = -3,
94     EXECZ = 3
95   };
96 
97   using SetVectorType = SmallSetVector<MachineInstr *, 32>;
98 
99   static unsigned getBranchOpcode(BranchPredicate Cond);
100   static BranchPredicate getBranchPredicate(unsigned Opcode);
101 
102 public:
103   unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
104                               MachineRegisterInfo &MRI,
105                               MachineOperand &SuperReg,
106                               const TargetRegisterClass *SuperRC,
107                               unsigned SubIdx,
108                               const TargetRegisterClass *SubRC) const;
109   MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI,
110                                          MachineRegisterInfo &MRI,
111                                          MachineOperand &SuperReg,
112                                          const TargetRegisterClass *SuperRC,
113                                          unsigned SubIdx,
114                                          const TargetRegisterClass *SubRC) const;
115 private:
116   void swapOperands(MachineInstr &Inst) const;
117 
118   std::pair<bool, MachineBasicBlock *>
119   moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
120                    MachineDominatorTree *MDT = nullptr) const;
121 
122   void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
123                    MachineDominatorTree *MDT = nullptr) const;
124 
125   void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
126 
127   void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
128 
129   void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst,
130                            unsigned Opcode) const;
131 
132   void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst,
133                           unsigned Opcode) const;
134 
135   void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
136                                unsigned Opcode, bool Swap = false) const;
137 
138   void splitScalar64BitAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
139                               MachineDominatorTree *MDT = nullptr) const;
140 
141   void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
142                                 unsigned Opcode,
143                                 MachineDominatorTree *MDT = nullptr) const;
144 
145   void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst,
146                             MachineDominatorTree *MDT = nullptr) const;
147 
148   void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
149                             MachineInstr &Inst) const;
150   void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
151   void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
152                       MachineInstr &Inst) const;
153 
154   void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
155                                     SIInstrWorklist &Worklist) const;
156 
157   void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
158                                     MachineInstr &SCCDefInst,
159                                     SIInstrWorklist &Worklist,
160                                     Register NewCond = Register()) const;
161   void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
162                                 SIInstrWorklist &Worklist) const;
163 
164   const TargetRegisterClass *
165   getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
166 
167   bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
168                                     const MachineInstr &MIb) const;
169 
170   Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
171 
172 protected:
173   /// If the specific machine instruction is a instruction that moves/copies
174   /// value from one register to another register return destination and source
175   /// registers as machine operands.
176   std::optional<DestSourcePair>
177   isCopyInstrImpl(const MachineInstr &MI) const override;
178 
179   bool swapSourceModifiers(MachineInstr &MI,
180                            MachineOperand &Src0, unsigned Src0OpName,
181                            MachineOperand &Src1, unsigned Src1OpName) const;
182 
183   MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
184                                        unsigned OpIdx0,
185                                        unsigned OpIdx1) const override;
186 
187 public:
188   enum TargetOperandFlags {
189     MO_MASK = 0xf,
190 
191     MO_NONE = 0,
192     // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
193     MO_GOTPCREL = 1,
194     // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
195     MO_GOTPCREL32 = 2,
196     MO_GOTPCREL32_LO = 2,
197     // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
198     MO_GOTPCREL32_HI = 3,
199     // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
200     MO_REL32 = 4,
201     MO_REL32_LO = 4,
202     // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
203     MO_REL32_HI = 5,
204 
205     MO_FAR_BRANCH_OFFSET = 6,
206 
207     MO_ABS32_LO = 8,
208     MO_ABS32_HI = 9,
209   };
210 
211   explicit SIInstrInfo(const GCNSubtarget &ST);
212 
213   const SIRegisterInfo &getRegisterInfo() const {
214     return RI;
215   }
216 
217   const GCNSubtarget &getSubtarget() const {
218     return ST;
219   }
220 
221   bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
222 
223   bool isIgnorableUse(const MachineOperand &MO) const override;
224 
225   bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
226                                int64_t &Offset1) const override;
227 
228   bool getMemOperandsWithOffsetWidth(
229       const MachineInstr &LdSt,
230       SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
231       bool &OffsetIsScalable, unsigned &Width,
232       const TargetRegisterInfo *TRI) const final;
233 
234   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
235                            ArrayRef<const MachineOperand *> BaseOps2,
236                            unsigned NumLoads, unsigned NumBytes) const override;
237 
238   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
239                                int64_t Offset1, unsigned NumLoads) const override;
240 
241   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
242                    const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
243                    bool KillSrc) const override;
244 
245   void materializeImmediate(MachineBasicBlock &MBB,
246                             MachineBasicBlock::iterator MI, const DebugLoc &DL,
247                             Register DestReg, int64_t Value) const;
248 
249   const TargetRegisterClass *getPreferredSelectRegClass(
250                                unsigned Size) const;
251 
252   Register insertNE(MachineBasicBlock *MBB,
253                     MachineBasicBlock::iterator I, const DebugLoc &DL,
254                     Register SrcReg, int Value) const;
255 
256   Register insertEQ(MachineBasicBlock *MBB,
257                     MachineBasicBlock::iterator I, const DebugLoc &DL,
258                     Register SrcReg, int Value)  const;
259 
260   void storeRegToStackSlot(MachineBasicBlock &MBB,
261                            MachineBasicBlock::iterator MI, Register SrcReg,
262                            bool isKill, int FrameIndex,
263                            const TargetRegisterClass *RC,
264                            const TargetRegisterInfo *TRI,
265                            Register VReg) const override;
266 
267   void loadRegFromStackSlot(MachineBasicBlock &MBB,
268                             MachineBasicBlock::iterator MI, Register DestReg,
269                             int FrameIndex, const TargetRegisterClass *RC,
270                             const TargetRegisterInfo *TRI,
271                             Register VReg) const override;
272 
273   bool expandPostRAPseudo(MachineInstr &MI) const override;
274 
275   // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
276   // instructions. Returns a pair of generated instructions.
277   // Can split either post-RA with physical registers or pre-RA with
278   // virtual registers. In latter case IR needs to be in SSA form and
279   // and a REG_SEQUENCE is produced to define original register.
280   std::pair<MachineInstr*, MachineInstr*>
281   expandMovDPP64(MachineInstr &MI) const;
282 
283   // Returns an opcode that can be used to move a value to a \p DstRC
284   // register.  If there is no hardware instruction that can store to \p
285   // DstRC, then AMDGPU::COPY is returned.
286   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
287 
288   const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
289                                                      unsigned EltSize,
290                                                      bool IsSGPR) const;
291 
292   const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
293                                              bool IsIndirectSrc) const;
294   LLVM_READONLY
295   int commuteOpcode(unsigned Opc) const;
296 
297   LLVM_READONLY
298   inline int commuteOpcode(const MachineInstr &MI) const {
299     return commuteOpcode(MI.getOpcode());
300   }
301 
302   bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0,
303                              unsigned &SrcOpIdx1) const override;
304 
305   bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0,
306                              unsigned &SrcOpIdx1) const;
307 
308   bool isBranchOffsetInRange(unsigned BranchOpc,
309                              int64_t BrOffset) const override;
310 
311   MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
312 
313   /// Return whether the block terminate with divergent branch.
314   /// Note this only work before lowering the pseudo control flow instructions.
315   bool hasDivergentBranch(const MachineBasicBlock *MBB) const;
316 
317   void insertIndirectBranch(MachineBasicBlock &MBB,
318                             MachineBasicBlock &NewDestBB,
319                             MachineBasicBlock &RestoreBB, const DebugLoc &DL,
320                             int64_t BrOffset, RegScavenger *RS) const override;
321 
322   bool analyzeBranchImpl(MachineBasicBlock &MBB,
323                          MachineBasicBlock::iterator I,
324                          MachineBasicBlock *&TBB,
325                          MachineBasicBlock *&FBB,
326                          SmallVectorImpl<MachineOperand> &Cond,
327                          bool AllowModify) const;
328 
329   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
330                      MachineBasicBlock *&FBB,
331                      SmallVectorImpl<MachineOperand> &Cond,
332                      bool AllowModify = false) const override;
333 
334   unsigned removeBranch(MachineBasicBlock &MBB,
335                         int *BytesRemoved = nullptr) const override;
336 
337   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
338                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
339                         const DebugLoc &DL,
340                         int *BytesAdded = nullptr) const override;
341 
342   bool reverseBranchCondition(
343     SmallVectorImpl<MachineOperand> &Cond) const override;
344 
345   bool canInsertSelect(const MachineBasicBlock &MBB,
346                        ArrayRef<MachineOperand> Cond, Register DstReg,
347                        Register TrueReg, Register FalseReg, int &CondCycles,
348                        int &TrueCycles, int &FalseCycles) const override;
349 
350   void insertSelect(MachineBasicBlock &MBB,
351                     MachineBasicBlock::iterator I, const DebugLoc &DL,
352                     Register DstReg, ArrayRef<MachineOperand> Cond,
353                     Register TrueReg, Register FalseReg) const override;
354 
355   void insertVectorSelect(MachineBasicBlock &MBB,
356                           MachineBasicBlock::iterator I, const DebugLoc &DL,
357                           Register DstReg, ArrayRef<MachineOperand> Cond,
358                           Register TrueReg, Register FalseReg) const;
359 
360   bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
361                       Register &SrcReg2, int64_t &CmpMask,
362                       int64_t &CmpValue) const override;
363 
364   bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
365                             Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
366                             const MachineRegisterInfo *MRI) const override;
367 
368   bool
369   areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
370                                   const MachineInstr &MIb) const override;
371 
372   static bool isFoldableCopy(const MachineInstr &MI);
373 
374   void removeModOperands(MachineInstr &MI) const;
375 
376   bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
377                      MachineRegisterInfo *MRI) const final;
378 
379   unsigned getMachineCSELookAheadLimit() const override { return 500; }
380 
381   MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
382                                       LiveIntervals *LIS) const override;
383 
384   bool isSchedulingBoundary(const MachineInstr &MI,
385                             const MachineBasicBlock *MBB,
386                             const MachineFunction &MF) const override;
387 
388   static bool isSALU(const MachineInstr &MI) {
389     return MI.getDesc().TSFlags & SIInstrFlags::SALU;
390   }
391 
392   bool isSALU(uint16_t Opcode) const {
393     return get(Opcode).TSFlags & SIInstrFlags::SALU;
394   }
395 
396   static bool isVALU(const MachineInstr &MI) {
397     return MI.getDesc().TSFlags & SIInstrFlags::VALU;
398   }
399 
400   bool isVALU(uint16_t Opcode) const {
401     return get(Opcode).TSFlags & SIInstrFlags::VALU;
402   }
403 
404   static bool isVMEM(const MachineInstr &MI) {
405     return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI);
406   }
407 
408   bool isVMEM(uint16_t Opcode) const {
409     return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode);
410   }
411 
412   static bool isSOP1(const MachineInstr &MI) {
413     return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
414   }
415 
416   bool isSOP1(uint16_t Opcode) const {
417     return get(Opcode).TSFlags & SIInstrFlags::SOP1;
418   }
419 
420   static bool isSOP2(const MachineInstr &MI) {
421     return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
422   }
423 
424   bool isSOP2(uint16_t Opcode) const {
425     return get(Opcode).TSFlags & SIInstrFlags::SOP2;
426   }
427 
428   static bool isSOPC(const MachineInstr &MI) {
429     return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
430   }
431 
432   bool isSOPC(uint16_t Opcode) const {
433     return get(Opcode).TSFlags & SIInstrFlags::SOPC;
434   }
435 
436   static bool isSOPK(const MachineInstr &MI) {
437     return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
438   }
439 
440   bool isSOPK(uint16_t Opcode) const {
441     return get(Opcode).TSFlags & SIInstrFlags::SOPK;
442   }
443 
444   static bool isSOPP(const MachineInstr &MI) {
445     return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
446   }
447 
448   bool isSOPP(uint16_t Opcode) const {
449     return get(Opcode).TSFlags & SIInstrFlags::SOPP;
450   }
451 
452   static bool isPacked(const MachineInstr &MI) {
453     return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
454   }
455 
456   bool isPacked(uint16_t Opcode) const {
457     return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
458   }
459 
460   static bool isVOP1(const MachineInstr &MI) {
461     return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
462   }
463 
464   bool isVOP1(uint16_t Opcode) const {
465     return get(Opcode).TSFlags & SIInstrFlags::VOP1;
466   }
467 
468   static bool isVOP2(const MachineInstr &MI) {
469     return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
470   }
471 
472   bool isVOP2(uint16_t Opcode) const {
473     return get(Opcode).TSFlags & SIInstrFlags::VOP2;
474   }
475 
476   static bool isVOP3(const MachineInstr &MI) {
477     return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
478   }
479 
480   bool isVOP3(uint16_t Opcode) const {
481     return get(Opcode).TSFlags & SIInstrFlags::VOP3;
482   }
483 
484   static bool isSDWA(const MachineInstr &MI) {
485     return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
486   }
487 
488   bool isSDWA(uint16_t Opcode) const {
489     return get(Opcode).TSFlags & SIInstrFlags::SDWA;
490   }
491 
492   static bool isVOPC(const MachineInstr &MI) {
493     return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
494   }
495 
496   bool isVOPC(uint16_t Opcode) const {
497     return get(Opcode).TSFlags & SIInstrFlags::VOPC;
498   }
499 
500   static bool isMUBUF(const MachineInstr &MI) {
501     return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
502   }
503 
504   bool isMUBUF(uint16_t Opcode) const {
505     return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
506   }
507 
508   static bool isMTBUF(const MachineInstr &MI) {
509     return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
510   }
511 
512   bool isMTBUF(uint16_t Opcode) const {
513     return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
514   }
515 
516   static bool isSMRD(const MachineInstr &MI) {
517     return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
518   }
519 
520   bool isSMRD(uint16_t Opcode) const {
521     return get(Opcode).TSFlags & SIInstrFlags::SMRD;
522   }
523 
524   bool isBufferSMRD(const MachineInstr &MI) const;
525 
526   static bool isDS(const MachineInstr &MI) {
527     return MI.getDesc().TSFlags & SIInstrFlags::DS;
528   }
529 
530   bool isDS(uint16_t Opcode) const {
531     return get(Opcode).TSFlags & SIInstrFlags::DS;
532   }
533 
534   bool isAlwaysGDS(uint16_t Opcode) const;
535 
536   static bool isMIMG(const MachineInstr &MI) {
537     return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
538   }
539 
540   bool isMIMG(uint16_t Opcode) const {
541     return get(Opcode).TSFlags & SIInstrFlags::MIMG;
542   }
543 
544   static bool isGather4(const MachineInstr &MI) {
545     return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
546   }
547 
548   bool isGather4(uint16_t Opcode) const {
549     return get(Opcode).TSFlags & SIInstrFlags::Gather4;
550   }
551 
552   static bool isFLAT(const MachineInstr &MI) {
553     return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
554   }
555 
556   // Is a FLAT encoded instruction which accesses a specific segment,
557   // i.e. global_* or scratch_*.
558   static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
559     auto Flags = MI.getDesc().TSFlags;
560     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
561   }
562 
563   bool isSegmentSpecificFLAT(uint16_t Opcode) const {
564     auto Flags = get(Opcode).TSFlags;
565     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
566   }
567 
568   static bool isFLATGlobal(const MachineInstr &MI) {
569     return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal;
570   }
571 
572   bool isFLATGlobal(uint16_t Opcode) const {
573     return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal;
574   }
575 
576   static bool isFLATScratch(const MachineInstr &MI) {
577     return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch;
578   }
579 
580   bool isFLATScratch(uint16_t Opcode) const {
581     return get(Opcode).TSFlags & SIInstrFlags::FlatScratch;
582   }
583 
584   // Any FLAT encoded instruction, including global_* and scratch_*.
585   bool isFLAT(uint16_t Opcode) const {
586     return get(Opcode).TSFlags & SIInstrFlags::FLAT;
587   }
588 
589   static bool isEXP(const MachineInstr &MI) {
590     return MI.getDesc().TSFlags & SIInstrFlags::EXP;
591   }
592 
593   static bool isDualSourceBlendEXP(const MachineInstr &MI) {
594     if (!isEXP(MI))
595       return false;
596     unsigned Target = MI.getOperand(0).getImm();
597     return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 ||
598            Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
599   }
600 
601   bool isEXP(uint16_t Opcode) const {
602     return get(Opcode).TSFlags & SIInstrFlags::EXP;
603   }
604 
605   static bool isAtomicNoRet(const MachineInstr &MI) {
606     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
607   }
608 
609   bool isAtomicNoRet(uint16_t Opcode) const {
610     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
611   }
612 
613   static bool isAtomicRet(const MachineInstr &MI) {
614     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
615   }
616 
617   bool isAtomicRet(uint16_t Opcode) const {
618     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
619   }
620 
621   static bool isAtomic(const MachineInstr &MI) {
622     return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
623                                    SIInstrFlags::IsAtomicNoRet);
624   }
625 
626   bool isAtomic(uint16_t Opcode) const {
627     return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
628                                   SIInstrFlags::IsAtomicNoRet);
629   }
630 
631   static bool isWQM(const MachineInstr &MI) {
632     return MI.getDesc().TSFlags & SIInstrFlags::WQM;
633   }
634 
635   bool isWQM(uint16_t Opcode) const {
636     return get(Opcode).TSFlags & SIInstrFlags::WQM;
637   }
638 
639   static bool isDisableWQM(const MachineInstr &MI) {
640     return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
641   }
642 
643   bool isDisableWQM(uint16_t Opcode) const {
644     return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
645   }
646 
647   static bool isVGPRSpill(const MachineInstr &MI) {
648     return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
649   }
650 
651   bool isVGPRSpill(uint16_t Opcode) const {
652     return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
653   }
654 
655   static bool isSGPRSpill(const MachineInstr &MI) {
656     return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill;
657   }
658 
659   bool isSGPRSpill(uint16_t Opcode) const {
660     return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
661   }
662 
663   static bool isWWMRegSpillOpcode(uint16_t Opcode) {
664     return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
665            Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE;
666   }
667 
668   static bool isDPP(const MachineInstr &MI) {
669     return MI.getDesc().TSFlags & SIInstrFlags::DPP;
670   }
671 
672   bool isDPP(uint16_t Opcode) const {
673     return get(Opcode).TSFlags & SIInstrFlags::DPP;
674   }
675 
676   static bool isTRANS(const MachineInstr &MI) {
677     return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
678   }
679 
680   bool isTRANS(uint16_t Opcode) const {
681     return get(Opcode).TSFlags & SIInstrFlags::TRANS;
682   }
683 
684   static bool isVOP3P(const MachineInstr &MI) {
685     return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
686   }
687 
688   bool isVOP3P(uint16_t Opcode) const {
689     return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
690   }
691 
692   static bool isVINTRP(const MachineInstr &MI) {
693     return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
694   }
695 
696   bool isVINTRP(uint16_t Opcode) const {
697     return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
698   }
699 
700   static bool isMAI(const MachineInstr &MI) {
701     return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
702   }
703 
704   bool isMAI(uint16_t Opcode) const {
705     return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
706   }
707 
708   static bool isMFMA(const MachineInstr &MI) {
709     return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
710            MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
711   }
712 
713   static bool isDOT(const MachineInstr &MI) {
714     return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
715   }
716 
717   static bool isWMMA(const MachineInstr &MI) {
718     return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
719   }
720 
721   bool isWMMA(uint16_t Opcode) const {
722     return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
723   }
724 
725   static bool isMFMAorWMMA(const MachineInstr &MI) {
726     return isMFMA(MI) || isWMMA(MI);
727   }
728 
729   bool isDOT(uint16_t Opcode) const {
730     return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
731   }
732 
733   static bool isLDSDIR(const MachineInstr &MI) {
734     return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
735   }
736 
737   bool isLDSDIR(uint16_t Opcode) const {
738     return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
739   }
740 
741   static bool isVINTERP(const MachineInstr &MI) {
742     return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
743   }
744 
745   bool isVINTERP(uint16_t Opcode) const {
746     return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
747   }
748 
749   static bool isScalarUnit(const MachineInstr &MI) {
750     return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
751   }
752 
753   static bool usesVM_CNT(const MachineInstr &MI) {
754     return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
755   }
756 
757   static bool usesLGKM_CNT(const MachineInstr &MI) {
758     return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
759   }
760 
761   static bool sopkIsZext(const MachineInstr &MI) {
762     return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
763   }
764 
765   bool sopkIsZext(uint16_t Opcode) const {
766     return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
767   }
768 
769   /// \returns true if this is an s_store_dword* instruction. This is more
770   /// specific than isSMEM && mayStore.
771   static bool isScalarStore(const MachineInstr &MI) {
772     return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
773   }
774 
775   bool isScalarStore(uint16_t Opcode) const {
776     return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
777   }
778 
779   static bool isFixedSize(const MachineInstr &MI) {
780     return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
781   }
782 
783   bool isFixedSize(uint16_t Opcode) const {
784     return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
785   }
786 
787   static bool hasFPClamp(const MachineInstr &MI) {
788     return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
789   }
790 
791   bool hasFPClamp(uint16_t Opcode) const {
792     return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
793   }
794 
795   static bool hasIntClamp(const MachineInstr &MI) {
796     return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
797   }
798 
799   uint64_t getClampMask(const MachineInstr &MI) const {
800     const uint64_t ClampFlags = SIInstrFlags::FPClamp |
801                                 SIInstrFlags::IntClamp |
802                                 SIInstrFlags::ClampLo |
803                                 SIInstrFlags::ClampHi;
804       return MI.getDesc().TSFlags & ClampFlags;
805   }
806 
807   static bool usesFPDPRounding(const MachineInstr &MI) {
808     return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
809   }
810 
811   bool usesFPDPRounding(uint16_t Opcode) const {
812     return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
813   }
814 
815   static bool isFPAtomic(const MachineInstr &MI) {
816     return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
817   }
818 
819   bool isFPAtomic(uint16_t Opcode) const {
820     return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
821   }
822 
823   static bool isNeverUniform(const MachineInstr &MI) {
824     return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
825   }
826 
827   static bool doesNotReadTiedSource(const MachineInstr &MI) {
828     return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
829   }
830 
831   bool doesNotReadTiedSource(uint16_t Opcode) const {
832     return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
833   }
834 
835   bool isVGPRCopy(const MachineInstr &MI) const {
836     assert(isCopyInstr(MI));
837     Register Dest = MI.getOperand(0).getReg();
838     const MachineFunction &MF = *MI.getParent()->getParent();
839     const MachineRegisterInfo &MRI = MF.getRegInfo();
840     return !RI.isSGPRReg(MRI, Dest);
841   }
842 
843   bool hasVGPRUses(const MachineInstr &MI) const {
844     const MachineFunction &MF = *MI.getParent()->getParent();
845     const MachineRegisterInfo &MRI = MF.getRegInfo();
846     return llvm::any_of(MI.explicit_uses(),
847                         [&MRI, this](const MachineOperand &MO) {
848       return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
849   }
850 
851   /// Return true if the instruction modifies the mode register.q
852   static bool modifiesModeRegister(const MachineInstr &MI);
853 
854   /// Whether we must prevent this instruction from executing with EXEC = 0.
855   bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
856 
857   /// Returns true if the instruction could potentially depend on the value of
858   /// exec. If false, exec dependencies may safely be ignored.
859   bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
860 
861   bool isInlineConstant(const APInt &Imm) const;
862 
863   bool isInlineConstant(const APFloat &Imm) const {
864     return isInlineConstant(Imm.bitcastToAPInt());
865   }
866 
867   // Returns true if this non-register operand definitely does not need to be
868   // encoded as a 32-bit literal. Note that this function handles all kinds of
869   // operands, not just immediates.
870   //
871   // Some operands like FrameIndexes could resolve to an inline immediate value
872   // that will not require an additional 4-bytes; this function assumes that it
873   // will.
874   bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
875 
876   bool isInlineConstant(const MachineOperand &MO,
877                         const MCOperandInfo &OpInfo) const {
878     return isInlineConstant(MO, OpInfo.OperandType);
879   }
880 
881   /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
882   /// be an inline immediate.
883   bool isInlineConstant(const MachineInstr &MI,
884                         const MachineOperand &UseMO,
885                         const MachineOperand &DefMO) const {
886     assert(UseMO.getParent() == &MI);
887     int OpIdx = UseMO.getOperandNo();
888     if (OpIdx >= MI.getDesc().NumOperands)
889       return false;
890 
891     return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]);
892   }
893 
894   /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
895   /// immediate.
896   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
897     const MachineOperand &MO = MI.getOperand(OpIdx);
898     return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType);
899   }
900 
901   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
902                         const MachineOperand &MO) const {
903     if (OpIdx >= MI.getDesc().NumOperands)
904       return false;
905 
906     if (isCopyInstr(MI)) {
907       unsigned Size = getOpSize(MI, OpIdx);
908       assert(Size == 8 || Size == 4);
909 
910       uint8_t OpType = (Size == 8) ?
911         AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
912       return isInlineConstant(MO, OpType);
913     }
914 
915     return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType);
916   }
917 
918   bool isInlineConstant(const MachineOperand &MO) const {
919     return isInlineConstant(*MO.getParent(), MO.getOperandNo());
920   }
921 
922   bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
923                          const MachineOperand &MO) const;
924 
925   /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
926   /// This function will return false if you pass it a 32-bit instruction.
927   bool hasVALU32BitEncoding(unsigned Opcode) const;
928 
929   /// Returns true if this operand uses the constant bus.
930   bool usesConstantBus(const MachineRegisterInfo &MRI,
931                        const MachineOperand &MO,
932                        const MCOperandInfo &OpInfo) const;
933 
934   /// Return true if this instruction has any modifiers.
935   ///  e.g. src[012]_mod, omod, clamp.
936   bool hasModifiers(unsigned Opcode) const;
937 
938   bool hasModifiersSet(const MachineInstr &MI,
939                        unsigned OpName) const;
940   bool hasAnyModifiersSet(const MachineInstr &MI) const;
941 
942   bool canShrink(const MachineInstr &MI,
943                  const MachineRegisterInfo &MRI) const;
944 
945   MachineInstr *buildShrunkInst(MachineInstr &MI,
946                                 unsigned NewOpcode) const;
947 
948   bool verifyInstruction(const MachineInstr &MI,
949                          StringRef &ErrInfo) const override;
950 
951   unsigned getVALUOp(const MachineInstr &MI) const;
952 
953   void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
954                              MachineBasicBlock::iterator MBBI,
955                              const DebugLoc &DL, Register Reg, bool IsSCCLive,
956                              SlotIndexes *Indexes = nullptr) const;
957 
958   void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
959                    MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
960                    Register Reg, SlotIndexes *Indexes = nullptr) const;
961 
962   /// Return the correct register class for \p OpNo.  For target-specific
963   /// instructions, this will return the register class that has been defined
964   /// in tablegen.  For generic instructions, like REG_SEQUENCE it will return
965   /// the register class of its machine operand.
966   /// to infer the correct register class base on the other operands.
967   const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
968                                            unsigned OpNo) const;
969 
970   /// Return the size in bytes of the operand OpNo on the given
971   // instruction opcode.
972   unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
973     const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo];
974 
975     if (OpInfo.RegClass == -1) {
976       // If this is an immediate operand, this must be a 32-bit literal.
977       assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
978       return 4;
979     }
980 
981     return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
982   }
983 
984   /// This form should usually be preferred since it handles operands
985   /// with unknown register classes.
986   unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
987     const MachineOperand &MO = MI.getOperand(OpNo);
988     if (MO.isReg()) {
989       if (unsigned SubReg = MO.getSubReg()) {
990         return RI.getSubRegIdxSize(SubReg) / 8;
991       }
992     }
993     return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
994   }
995 
996   /// Legalize the \p OpIndex operand of this instruction by inserting
997   /// a MOV.  For example:
998   /// ADD_I32_e32 VGPR0, 15
999   /// to
1000   /// MOV VGPR1, 15
1001   /// ADD_I32_e32 VGPR0, VGPR1
1002   ///
1003   /// If the operand being legalized is a register, then a COPY will be used
1004   /// instead of MOV.
1005   void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
1006 
1007   /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
1008   /// for \p MI.
1009   bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
1010                       const MachineOperand *MO = nullptr) const;
1011 
1012   /// Check if \p MO would be a valid operand for the given operand
1013   /// definition \p OpInfo. Note this does not attempt to validate constant bus
1014   /// restrictions (e.g. literal constant usage).
1015   bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
1016                           const MCOperandInfo &OpInfo,
1017                           const MachineOperand &MO) const;
1018 
1019   /// Check if \p MO (a register operand) is a legal register for the
1020   /// given operand description.
1021   bool isLegalRegOperand(const MachineRegisterInfo &MRI,
1022                          const MCOperandInfo &OpInfo,
1023                          const MachineOperand &MO) const;
1024 
1025   /// Legalize operands in \p MI by either commuting it or inserting a
1026   /// copy of src1.
1027   void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1028 
1029   /// Fix operands in \p MI to satisfy constant bus requirements.
1030   void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1031 
1032   /// Copy a value from a VGPR (\p SrcReg) to SGPR.  This function can only
1033   /// be used when it is know that the value in SrcReg is same across all
1034   /// threads in the wave.
1035   /// \returns The SGPR register that \p SrcReg was copied to.
1036   Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
1037                               MachineRegisterInfo &MRI) const;
1038 
1039   void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1040   void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1041 
1042   void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
1043                               MachineBasicBlock::iterator I,
1044                               const TargetRegisterClass *DstRC,
1045                               MachineOperand &Op, MachineRegisterInfo &MRI,
1046                               const DebugLoc &DL) const;
1047 
1048   /// Legalize all operands in this instruction.  This function may create new
1049   /// instructions and control-flow around \p MI.  If present, \p MDT is
1050   /// updated.
1051   /// \returns A new basic block that contains \p MI if new blocks were created.
1052   MachineBasicBlock *
1053   legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
1054 
1055   /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
1056   /// was moved to VGPR. \returns true if succeeded.
1057   bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
1058 
1059   /// Replace the instructions opcode with the equivalent VALU
1060   /// opcode.  This function will also move the users of MachineInstruntions
1061   /// in the \p WorkList to the VALU if necessary. If present, \p MDT is
1062   /// updated.
1063   void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const;
1064 
1065   void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT,
1066                       MachineInstr &Inst) const;
1067 
1068   void insertNoop(MachineBasicBlock &MBB,
1069                   MachineBasicBlock::iterator MI) const override;
1070 
1071   void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1072                    unsigned Quantity) const override;
1073 
1074   void insertReturn(MachineBasicBlock &MBB) const;
1075   /// Return the number of wait states that result from executing this
1076   /// instruction.
1077   static unsigned getNumWaitStates(const MachineInstr &MI);
1078 
1079   /// Returns the operand named \p Op.  If \p MI does not have an
1080   /// operand named \c Op, this function returns nullptr.
1081   LLVM_READONLY
1082   MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
1083 
1084   LLVM_READONLY
1085   const MachineOperand *getNamedOperand(const MachineInstr &MI,
1086                                         unsigned OpName) const {
1087     return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
1088   }
1089 
1090   /// Get required immediate operand
1091   int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
1092     int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
1093     return MI.getOperand(Idx).getImm();
1094   }
1095 
1096   uint64_t getDefaultRsrcDataFormat() const;
1097   uint64_t getScratchRsrcWords23() const;
1098 
1099   bool isLowLatencyInstruction(const MachineInstr &MI) const;
1100   bool isHighLatencyDef(int Opc) const override;
1101 
1102   /// Return the descriptor of the target-specific machine instruction
1103   /// that corresponds to the specified pseudo or native opcode.
1104   const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
1105     return get(pseudoToMCOpcode(Opcode));
1106   }
1107 
1108   unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1109   unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1110 
1111   unsigned isLoadFromStackSlot(const MachineInstr &MI,
1112                                int &FrameIndex) const override;
1113   unsigned isStoreToStackSlot(const MachineInstr &MI,
1114                               int &FrameIndex) const override;
1115 
1116   unsigned getInstBundleSize(const MachineInstr &MI) const;
1117   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
1118 
1119   bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
1120 
1121   bool isNonUniformBranchInstr(MachineInstr &Instr) const;
1122 
1123   void convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
1124                                  MachineBasicBlock *IfEnd) const;
1125 
1126   void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
1127                                    MachineBasicBlock *LoopEnd) const;
1128 
1129   std::pair<unsigned, unsigned>
1130   decomposeMachineOperandsTargetFlags(unsigned TF) const override;
1131 
1132   ArrayRef<std::pair<int, const char *>>
1133   getSerializableTargetIndices() const override;
1134 
1135   ArrayRef<std::pair<unsigned, const char *>>
1136   getSerializableDirectMachineOperandTargetFlags() const override;
1137 
1138   ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
1139   getSerializableMachineMemOperandTargetFlags() const override;
1140 
1141   ScheduleHazardRecognizer *
1142   CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
1143                                  const ScheduleDAG *DAG) const override;
1144 
1145   ScheduleHazardRecognizer *
1146   CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
1147 
1148   ScheduleHazardRecognizer *
1149   CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
1150                                  const ScheduleDAGMI *DAG) const override;
1151 
1152   unsigned getLiveRangeSplitOpcode(Register Reg,
1153                                    const MachineFunction &MF) const override;
1154 
1155   bool isBasicBlockPrologue(const MachineInstr &MI) const override;
1156 
1157   MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
1158                                          MachineBasicBlock::iterator InsPt,
1159                                          const DebugLoc &DL, Register Src,
1160                                          Register Dst) const override;
1161 
1162   MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
1163                                     MachineBasicBlock::iterator InsPt,
1164                                     const DebugLoc &DL, Register Src,
1165                                     unsigned SrcSubReg,
1166                                     Register Dst) const override;
1167 
1168   bool isWave32() const;
1169 
1170   /// Return a partially built integer add instruction without carry.
1171   /// Caller must add source operands.
1172   /// For pre-GFX9 it will generate unused carry destination operand.
1173   /// TODO: After GFX9 it should return a no-carry operation.
1174   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1175                                     MachineBasicBlock::iterator I,
1176                                     const DebugLoc &DL,
1177                                     Register DestReg) const;
1178 
1179   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1180                                     MachineBasicBlock::iterator I,
1181                                     const DebugLoc &DL,
1182                                     Register DestReg,
1183                                     RegScavenger &RS) const;
1184 
1185   static bool isKillTerminator(unsigned Opcode);
1186   const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
1187 
1188   static bool isLegalMUBUFImmOffset(unsigned Imm) {
1189     return isUInt<12>(Imm);
1190   }
1191 
1192   static unsigned getMaxMUBUFImmOffset();
1193 
1194   bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1195                         Align Alignment = Align(4)) const;
1196 
1197   /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1198   /// encoded instruction. If \p Signed, this is for an instruction that
1199   /// interprets the offset as signed.
1200   bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
1201                          uint64_t FlatVariant) const;
1202 
1203   /// Split \p COffsetVal into {immediate offset field, remainder offset}
1204   /// values.
1205   std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
1206                                               unsigned AddrSpace,
1207                                               uint64_t FlatVariant) const;
1208 
1209   /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1210   /// Return -1 if the target-specific opcode for the pseudo instruction does
1211   /// not exist. If Opcode is not a pseudo instruction, this is identity.
1212   int pseudoToMCOpcode(int Opcode) const;
1213 
1214   /// \brief Check if this instruction should only be used by assembler.
1215   /// Return true if this opcode should not be used by codegen.
1216   bool isAsmOnlyOpcode(int MCOp) const;
1217 
1218   const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
1219                                          const TargetRegisterInfo *TRI,
1220                                          const MachineFunction &MF)
1221     const override;
1222 
1223   void fixImplicitOperands(MachineInstr &MI) const;
1224 
1225   MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
1226                                       ArrayRef<unsigned> Ops,
1227                                       MachineBasicBlock::iterator InsertPt,
1228                                       int FrameIndex,
1229                                       LiveIntervals *LIS = nullptr,
1230                                       VirtRegMap *VRM = nullptr) const override;
1231 
1232   unsigned getInstrLatency(const InstrItineraryData *ItinData,
1233                            const MachineInstr &MI,
1234                            unsigned *PredCost = nullptr) const override;
1235 
1236   InstructionUniformity
1237   getInstructionUniformity(const MachineInstr &MI) const override final;
1238 
1239   InstructionUniformity
1240   getGenericInstructionUniformity(const MachineInstr &MI) const;
1241 
1242   const MIRFormatter *getMIRFormatter() const override {
1243     if (!Formatter.get())
1244       Formatter = std::make_unique<AMDGPUMIRFormatter>();
1245     return Formatter.get();
1246   }
1247 
1248   static unsigned getDSShaderTypeValue(const MachineFunction &MF);
1249 
1250   const TargetSchedModel &getSchedModel() const { return SchedModel; }
1251 
1252   // Enforce operand's \p OpName even alignment if required by target.
1253   // This is used if an operand is a 32 bit register but needs to be aligned
1254   // regardless.
1255   void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const;
1256 };
1257 
1258 /// \brief Returns true if a reg:subreg pair P has a TRC class
1259 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
1260                          const TargetRegisterClass &TRC,
1261                          MachineRegisterInfo &MRI) {
1262   auto *RC = MRI.getRegClass(P.Reg);
1263   if (!P.SubReg)
1264     return RC == &TRC;
1265   auto *TRI = MRI.getTargetRegisterInfo();
1266   return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
1267 }
1268 
1269 /// \brief Create RegSubRegPair from a register MachineOperand
1270 inline
1271 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1272   assert(O.isReg());
1273   return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
1274 }
1275 
1276 /// \brief Return the SubReg component from REG_SEQUENCE
1277 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1278                                                     unsigned SubReg);
1279 
1280 /// \brief Return the defining instruction for a given reg:subreg pair
1281 /// skipping copy like instructions and subreg-manipulation pseudos.
1282 /// Following another subreg of a reg:subreg isn't supported.
1283 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1284                                MachineRegisterInfo &MRI);
1285 
1286 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1287 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1288 /// attempt to track between blocks.
1289 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1290                                 Register VReg,
1291                                 const MachineInstr &DefMI,
1292                                 const MachineInstr &UseMI);
1293 
1294 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1295 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1296 /// track between blocks.
1297 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1298                                    Register VReg,
1299                                    const MachineInstr &DefMI);
1300 
1301 namespace AMDGPU {
1302 
1303   LLVM_READONLY
1304   int getVOPe64(uint16_t Opcode);
1305 
1306   LLVM_READONLY
1307   int getVOPe32(uint16_t Opcode);
1308 
1309   LLVM_READONLY
1310   int getSDWAOp(uint16_t Opcode);
1311 
1312   LLVM_READONLY
1313   int getDPPOp32(uint16_t Opcode);
1314 
1315   LLVM_READONLY
1316   int getDPPOp64(uint16_t Opcode);
1317 
1318   LLVM_READONLY
1319   int getBasicFromSDWAOp(uint16_t Opcode);
1320 
1321   LLVM_READONLY
1322   int getCommuteRev(uint16_t Opcode);
1323 
1324   LLVM_READONLY
1325   int getCommuteOrig(uint16_t Opcode);
1326 
1327   LLVM_READONLY
1328   int getAddr64Inst(uint16_t Opcode);
1329 
1330   /// Check if \p Opcode is an Addr64 opcode.
1331   ///
1332   /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1333   LLVM_READONLY
1334   int getIfAddr64Inst(uint16_t Opcode);
1335 
1336   LLVM_READONLY
1337   int getAtomicNoRetOp(uint16_t Opcode);
1338 
1339   LLVM_READONLY
1340   int getSOPKOp(uint16_t Opcode);
1341 
1342   /// \returns SADDR form of a FLAT Global instruction given an \p Opcode
1343   /// of a VADDR form.
1344   LLVM_READONLY
1345   int getGlobalSaddrOp(uint16_t Opcode);
1346 
1347   /// \returns VADDR form of a FLAT Global instruction given an \p Opcode
1348   /// of a SADDR form.
1349   LLVM_READONLY
1350   int getGlobalVaddrOp(uint16_t Opcode);
1351 
1352   LLVM_READONLY
1353   int getVCMPXNoSDstOp(uint16_t Opcode);
1354 
1355   /// \returns ST form with only immediate offset of a FLAT Scratch instruction
1356   /// given an \p Opcode of an SS (SADDR) form.
1357   LLVM_READONLY
1358   int getFlatScratchInstSTfromSS(uint16_t Opcode);
1359 
1360   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1361   /// of an SVS (SADDR + VADDR) form.
1362   LLVM_READONLY
1363   int getFlatScratchInstSVfromSVS(uint16_t Opcode);
1364 
1365   /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
1366   /// of an SV (VADDR) form.
1367   LLVM_READONLY
1368   int getFlatScratchInstSSfromSV(uint16_t Opcode);
1369 
1370   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1371   /// of an SS (SADDR) form.
1372   LLVM_READONLY
1373   int getFlatScratchInstSVfromSS(uint16_t Opcode);
1374 
1375   /// \returns earlyclobber version of a MAC MFMA is exists.
1376   LLVM_READONLY
1377   int getMFMAEarlyClobberOp(uint16_t Opcode);
1378 
1379   /// \returns v_cmpx version of a v_cmp instruction.
1380   LLVM_READONLY
1381   int getVCMPXOpFromVCMP(uint16_t Opcode);
1382 
1383   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1384   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1385   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1386   const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1387 
1388 } // end namespace AMDGPU
1389 
1390 namespace SI {
1391 namespace KernelInputOffsets {
1392 
1393 /// Offsets in bytes from the start of the input buffer
1394 enum Offsets {
1395   NGROUPS_X = 0,
1396   NGROUPS_Y = 4,
1397   NGROUPS_Z = 8,
1398   GLOBAL_SIZE_X = 12,
1399   GLOBAL_SIZE_Y = 16,
1400   GLOBAL_SIZE_Z = 20,
1401   LOCAL_SIZE_X = 24,
1402   LOCAL_SIZE_Y = 28,
1403   LOCAL_SIZE_Z = 32
1404 };
1405 
1406 } // end namespace KernelInputOffsets
1407 } // end namespace SI
1408 
1409 } // end namespace llvm
1410 
1411 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1412