xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h (revision e0c4386e7e71d93b0edc0c8fa156263fc4a8b0b6)
1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIInstrInfo.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16 
17 #include "AMDGPUMIRFormatter.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIRegisterInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
24 
25 #define GET_INSTRINFO_HEADER
26 #include "AMDGPUGenInstrInfo.inc"
27 
28 namespace llvm {
29 
30 class APInt;
31 class GCNSubtarget;
32 class LiveVariables;
33 class MachineDominatorTree;
34 class MachineRegisterInfo;
35 class RegScavenger;
36 class TargetRegisterClass;
37 class ScheduleHazardRecognizer;
38 
39 /// Mark the MMO of a uniform load if there are no potentially clobbering stores
40 /// on any path from the start of an entry function to this load.
41 static const MachineMemOperand::Flags MONoClobber =
42     MachineMemOperand::MOTargetFlag1;
43 
44 /// Utility to store machine instructions worklist.
45 struct SIInstrWorklist {
46   SIInstrWorklist() : InstrList() {}
47 
48   void insert(MachineInstr *MI);
49 
50   MachineInstr *top() const {
51     auto iter = InstrList.begin();
52     return *iter;
53   }
54 
55   void erase_top() {
56     auto iter = InstrList.begin();
57     InstrList.erase(iter);
58   }
59 
60   bool empty() const { return InstrList.empty(); }
61 
62   void clear() {
63     InstrList.clear();
64     DeferredList.clear();
65   }
66 
67   bool isDeferred(MachineInstr *MI);
68 
69   SetVector<MachineInstr *> &getDeferredList() { return DeferredList; }
70 
71 private:
72   /// InstrList contains the MachineInstrs.
73   SetVector<MachineInstr *> InstrList;
74   /// Deferred instructions are specific MachineInstr
75   /// that will be added by insert method.
76   SetVector<MachineInstr *> DeferredList;
77 };
78 
79 class SIInstrInfo final : public AMDGPUGenInstrInfo {
80 private:
81   const SIRegisterInfo RI;
82   const GCNSubtarget &ST;
83   TargetSchedModel SchedModel;
84   mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
85 
86   // The inverse predicate should have the negative value.
87   enum BranchPredicate {
88     INVALID_BR = 0,
89     SCC_TRUE = 1,
90     SCC_FALSE = -1,
91     VCCNZ = 2,
92     VCCZ = -2,
93     EXECNZ = -3,
94     EXECZ = 3
95   };
96 
97   using SetVectorType = SmallSetVector<MachineInstr *, 32>;
98 
99   static unsigned getBranchOpcode(BranchPredicate Cond);
100   static BranchPredicate getBranchPredicate(unsigned Opcode);
101 
102 public:
103   unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
104                               MachineRegisterInfo &MRI,
105                               MachineOperand &SuperReg,
106                               const TargetRegisterClass *SuperRC,
107                               unsigned SubIdx,
108                               const TargetRegisterClass *SubRC) const;
109   MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI,
110                                          MachineRegisterInfo &MRI,
111                                          MachineOperand &SuperReg,
112                                          const TargetRegisterClass *SuperRC,
113                                          unsigned SubIdx,
114                                          const TargetRegisterClass *SubRC) const;
115 private:
116   void swapOperands(MachineInstr &Inst) const;
117 
118   std::pair<bool, MachineBasicBlock *>
119   moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
120                    MachineDominatorTree *MDT = nullptr) const;
121 
122   void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
123                    MachineDominatorTree *MDT = nullptr) const;
124 
125   void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
126 
127   void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
128 
129   void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst,
130                            unsigned Opcode) const;
131 
132   void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst,
133                           unsigned Opcode) const;
134 
135   void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
136                                unsigned Opcode, bool Swap = false) const;
137 
138   void splitScalar64BitAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
139                               MachineDominatorTree *MDT = nullptr) const;
140 
141   void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
142                                 unsigned Opcode,
143                                 MachineDominatorTree *MDT = nullptr) const;
144 
145   void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst,
146                             MachineDominatorTree *MDT = nullptr) const;
147 
148   void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
149                             MachineInstr &Inst) const;
150   void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
151   void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
152                       MachineInstr &Inst) const;
153 
154   void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
155                                     SIInstrWorklist &Worklist) const;
156 
157   void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
158                                     MachineInstr &SCCDefInst,
159                                     SIInstrWorklist &Worklist,
160                                     Register NewCond = Register()) const;
161   void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
162                                 SIInstrWorklist &Worklist) const;
163 
164   const TargetRegisterClass *
165   getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
166 
167   bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
168                                     const MachineInstr &MIb) const;
169 
170   Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
171 
172 protected:
173   bool swapSourceModifiers(MachineInstr &MI,
174                            MachineOperand &Src0, unsigned Src0OpName,
175                            MachineOperand &Src1, unsigned Src1OpName) const;
176 
177   MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
178                                        unsigned OpIdx0,
179                                        unsigned OpIdx1) const override;
180 
181 public:
182   enum TargetOperandFlags {
183     MO_MASK = 0xf,
184 
185     MO_NONE = 0,
186     // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
187     MO_GOTPCREL = 1,
188     // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
189     MO_GOTPCREL32 = 2,
190     MO_GOTPCREL32_LO = 2,
191     // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
192     MO_GOTPCREL32_HI = 3,
193     // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
194     MO_REL32 = 4,
195     MO_REL32_LO = 4,
196     // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
197     MO_REL32_HI = 5,
198 
199     MO_FAR_BRANCH_OFFSET = 6,
200 
201     MO_ABS32_LO = 8,
202     MO_ABS32_HI = 9,
203   };
204 
205   explicit SIInstrInfo(const GCNSubtarget &ST);
206 
207   const SIRegisterInfo &getRegisterInfo() const {
208     return RI;
209   }
210 
211   const GCNSubtarget &getSubtarget() const {
212     return ST;
213   }
214 
215   bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
216 
217   bool isIgnorableUse(const MachineOperand &MO) const override;
218 
219   bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
220                                int64_t &Offset1) const override;
221 
222   bool getMemOperandsWithOffsetWidth(
223       const MachineInstr &LdSt,
224       SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
225       bool &OffsetIsScalable, unsigned &Width,
226       const TargetRegisterInfo *TRI) const final;
227 
228   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
229                            ArrayRef<const MachineOperand *> BaseOps2,
230                            unsigned NumLoads, unsigned NumBytes) const override;
231 
232   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
233                                int64_t Offset1, unsigned NumLoads) const override;
234 
235   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
236                    const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
237                    bool KillSrc) const override;
238 
239   void materializeImmediate(MachineBasicBlock &MBB,
240                             MachineBasicBlock::iterator MI, const DebugLoc &DL,
241                             Register DestReg, int64_t Value) const;
242 
243   const TargetRegisterClass *getPreferredSelectRegClass(
244                                unsigned Size) const;
245 
246   Register insertNE(MachineBasicBlock *MBB,
247                     MachineBasicBlock::iterator I, const DebugLoc &DL,
248                     Register SrcReg, int Value) const;
249 
250   Register insertEQ(MachineBasicBlock *MBB,
251                     MachineBasicBlock::iterator I, const DebugLoc &DL,
252                     Register SrcReg, int Value)  const;
253 
254   void storeRegToStackSlot(MachineBasicBlock &MBB,
255                            MachineBasicBlock::iterator MI, Register SrcReg,
256                            bool isKill, int FrameIndex,
257                            const TargetRegisterClass *RC,
258                            const TargetRegisterInfo *TRI,
259                            Register VReg) const override;
260 
261   void loadRegFromStackSlot(MachineBasicBlock &MBB,
262                             MachineBasicBlock::iterator MI, Register DestReg,
263                             int FrameIndex, const TargetRegisterClass *RC,
264                             const TargetRegisterInfo *TRI,
265                             Register VReg) const override;
266 
267   bool expandPostRAPseudo(MachineInstr &MI) const override;
268 
269   // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
270   // instructions. Returns a pair of generated instructions.
271   // Can split either post-RA with physical registers or pre-RA with
272   // virtual registers. In latter case IR needs to be in SSA form and
273   // and a REG_SEQUENCE is produced to define original register.
274   std::pair<MachineInstr*, MachineInstr*>
275   expandMovDPP64(MachineInstr &MI) const;
276 
277   // Returns an opcode that can be used to move a value to a \p DstRC
278   // register.  If there is no hardware instruction that can store to \p
279   // DstRC, then AMDGPU::COPY is returned.
280   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
281 
282   const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
283                                                      unsigned EltSize,
284                                                      bool IsSGPR) const;
285 
286   const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
287                                              bool IsIndirectSrc) const;
288   LLVM_READONLY
289   int commuteOpcode(unsigned Opc) const;
290 
291   LLVM_READONLY
292   inline int commuteOpcode(const MachineInstr &MI) const {
293     return commuteOpcode(MI.getOpcode());
294   }
295 
296   bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0,
297                              unsigned &SrcOpIdx1) const override;
298 
299   bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0,
300                              unsigned &SrcOpIdx1) const;
301 
302   bool isBranchOffsetInRange(unsigned BranchOpc,
303                              int64_t BrOffset) const override;
304 
305   MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
306 
307   /// Return whether the block terminate with divergent branch.
308   /// Note this only work before lowering the pseudo control flow instructions.
309   bool hasDivergentBranch(const MachineBasicBlock *MBB) const;
310 
311   void insertIndirectBranch(MachineBasicBlock &MBB,
312                             MachineBasicBlock &NewDestBB,
313                             MachineBasicBlock &RestoreBB, const DebugLoc &DL,
314                             int64_t BrOffset, RegScavenger *RS) const override;
315 
316   bool analyzeBranchImpl(MachineBasicBlock &MBB,
317                          MachineBasicBlock::iterator I,
318                          MachineBasicBlock *&TBB,
319                          MachineBasicBlock *&FBB,
320                          SmallVectorImpl<MachineOperand> &Cond,
321                          bool AllowModify) const;
322 
323   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
324                      MachineBasicBlock *&FBB,
325                      SmallVectorImpl<MachineOperand> &Cond,
326                      bool AllowModify = false) const override;
327 
328   unsigned removeBranch(MachineBasicBlock &MBB,
329                         int *BytesRemoved = nullptr) const override;
330 
331   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
332                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
333                         const DebugLoc &DL,
334                         int *BytesAdded = nullptr) const override;
335 
336   bool reverseBranchCondition(
337     SmallVectorImpl<MachineOperand> &Cond) const override;
338 
339   bool canInsertSelect(const MachineBasicBlock &MBB,
340                        ArrayRef<MachineOperand> Cond, Register DstReg,
341                        Register TrueReg, Register FalseReg, int &CondCycles,
342                        int &TrueCycles, int &FalseCycles) const override;
343 
344   void insertSelect(MachineBasicBlock &MBB,
345                     MachineBasicBlock::iterator I, const DebugLoc &DL,
346                     Register DstReg, ArrayRef<MachineOperand> Cond,
347                     Register TrueReg, Register FalseReg) const override;
348 
349   void insertVectorSelect(MachineBasicBlock &MBB,
350                           MachineBasicBlock::iterator I, const DebugLoc &DL,
351                           Register DstReg, ArrayRef<MachineOperand> Cond,
352                           Register TrueReg, Register FalseReg) const;
353 
354   bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
355                       Register &SrcReg2, int64_t &CmpMask,
356                       int64_t &CmpValue) const override;
357 
358   bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
359                             Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
360                             const MachineRegisterInfo *MRI) const override;
361 
362   bool
363   areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
364                                   const MachineInstr &MIb) const override;
365 
366   static bool isFoldableCopy(const MachineInstr &MI);
367 
368   void removeModOperands(MachineInstr &MI) const;
369 
370   bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
371                      MachineRegisterInfo *MRI) const final;
372 
373   unsigned getMachineCSELookAheadLimit() const override { return 500; }
374 
375   MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
376                                       LiveIntervals *LIS) const override;
377 
378   bool isSchedulingBoundary(const MachineInstr &MI,
379                             const MachineBasicBlock *MBB,
380                             const MachineFunction &MF) const override;
381 
382   static bool isSALU(const MachineInstr &MI) {
383     return MI.getDesc().TSFlags & SIInstrFlags::SALU;
384   }
385 
386   bool isSALU(uint16_t Opcode) const {
387     return get(Opcode).TSFlags & SIInstrFlags::SALU;
388   }
389 
390   static bool isVALU(const MachineInstr &MI) {
391     return MI.getDesc().TSFlags & SIInstrFlags::VALU;
392   }
393 
394   bool isVALU(uint16_t Opcode) const {
395     return get(Opcode).TSFlags & SIInstrFlags::VALU;
396   }
397 
398   static bool isVMEM(const MachineInstr &MI) {
399     return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI);
400   }
401 
402   bool isVMEM(uint16_t Opcode) const {
403     return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode);
404   }
405 
406   static bool isSOP1(const MachineInstr &MI) {
407     return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
408   }
409 
410   bool isSOP1(uint16_t Opcode) const {
411     return get(Opcode).TSFlags & SIInstrFlags::SOP1;
412   }
413 
414   static bool isSOP2(const MachineInstr &MI) {
415     return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
416   }
417 
418   bool isSOP2(uint16_t Opcode) const {
419     return get(Opcode).TSFlags & SIInstrFlags::SOP2;
420   }
421 
422   static bool isSOPC(const MachineInstr &MI) {
423     return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
424   }
425 
426   bool isSOPC(uint16_t Opcode) const {
427     return get(Opcode).TSFlags & SIInstrFlags::SOPC;
428   }
429 
430   static bool isSOPK(const MachineInstr &MI) {
431     return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
432   }
433 
434   bool isSOPK(uint16_t Opcode) const {
435     return get(Opcode).TSFlags & SIInstrFlags::SOPK;
436   }
437 
438   static bool isSOPP(const MachineInstr &MI) {
439     return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
440   }
441 
442   bool isSOPP(uint16_t Opcode) const {
443     return get(Opcode).TSFlags & SIInstrFlags::SOPP;
444   }
445 
446   static bool isPacked(const MachineInstr &MI) {
447     return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
448   }
449 
450   bool isPacked(uint16_t Opcode) const {
451     return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
452   }
453 
454   static bool isVOP1(const MachineInstr &MI) {
455     return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
456   }
457 
458   bool isVOP1(uint16_t Opcode) const {
459     return get(Opcode).TSFlags & SIInstrFlags::VOP1;
460   }
461 
462   static bool isVOP2(const MachineInstr &MI) {
463     return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
464   }
465 
466   bool isVOP2(uint16_t Opcode) const {
467     return get(Opcode).TSFlags & SIInstrFlags::VOP2;
468   }
469 
470   static bool isVOP3(const MachineInstr &MI) {
471     return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
472   }
473 
474   bool isVOP3(uint16_t Opcode) const {
475     return get(Opcode).TSFlags & SIInstrFlags::VOP3;
476   }
477 
478   static bool isSDWA(const MachineInstr &MI) {
479     return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
480   }
481 
482   bool isSDWA(uint16_t Opcode) const {
483     return get(Opcode).TSFlags & SIInstrFlags::SDWA;
484   }
485 
486   static bool isVOPC(const MachineInstr &MI) {
487     return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
488   }
489 
490   bool isVOPC(uint16_t Opcode) const {
491     return get(Opcode).TSFlags & SIInstrFlags::VOPC;
492   }
493 
494   static bool isMUBUF(const MachineInstr &MI) {
495     return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
496   }
497 
498   bool isMUBUF(uint16_t Opcode) const {
499     return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
500   }
501 
502   static bool isMTBUF(const MachineInstr &MI) {
503     return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
504   }
505 
506   bool isMTBUF(uint16_t Opcode) const {
507     return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
508   }
509 
510   static bool isSMRD(const MachineInstr &MI) {
511     return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
512   }
513 
514   bool isSMRD(uint16_t Opcode) const {
515     return get(Opcode).TSFlags & SIInstrFlags::SMRD;
516   }
517 
518   bool isBufferSMRD(const MachineInstr &MI) const;
519 
520   static bool isDS(const MachineInstr &MI) {
521     return MI.getDesc().TSFlags & SIInstrFlags::DS;
522   }
523 
524   bool isDS(uint16_t Opcode) const {
525     return get(Opcode).TSFlags & SIInstrFlags::DS;
526   }
527 
528   bool isAlwaysGDS(uint16_t Opcode) const;
529 
530   static bool isMIMG(const MachineInstr &MI) {
531     return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
532   }
533 
534   bool isMIMG(uint16_t Opcode) const {
535     return get(Opcode).TSFlags & SIInstrFlags::MIMG;
536   }
537 
538   static bool isGather4(const MachineInstr &MI) {
539     return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
540   }
541 
542   bool isGather4(uint16_t Opcode) const {
543     return get(Opcode).TSFlags & SIInstrFlags::Gather4;
544   }
545 
546   static bool isFLAT(const MachineInstr &MI) {
547     return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
548   }
549 
550   // Is a FLAT encoded instruction which accesses a specific segment,
551   // i.e. global_* or scratch_*.
552   static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
553     auto Flags = MI.getDesc().TSFlags;
554     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
555   }
556 
557   bool isSegmentSpecificFLAT(uint16_t Opcode) const {
558     auto Flags = get(Opcode).TSFlags;
559     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
560   }
561 
562   static bool isFLATGlobal(const MachineInstr &MI) {
563     return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal;
564   }
565 
566   bool isFLATGlobal(uint16_t Opcode) const {
567     return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal;
568   }
569 
570   static bool isFLATScratch(const MachineInstr &MI) {
571     return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch;
572   }
573 
574   bool isFLATScratch(uint16_t Opcode) const {
575     return get(Opcode).TSFlags & SIInstrFlags::FlatScratch;
576   }
577 
578   // Any FLAT encoded instruction, including global_* and scratch_*.
579   bool isFLAT(uint16_t Opcode) const {
580     return get(Opcode).TSFlags & SIInstrFlags::FLAT;
581   }
582 
583   static bool isEXP(const MachineInstr &MI) {
584     return MI.getDesc().TSFlags & SIInstrFlags::EXP;
585   }
586 
587   static bool isDualSourceBlendEXP(const MachineInstr &MI) {
588     if (!isEXP(MI))
589       return false;
590     unsigned Target = MI.getOperand(0).getImm();
591     return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 ||
592            Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
593   }
594 
595   bool isEXP(uint16_t Opcode) const {
596     return get(Opcode).TSFlags & SIInstrFlags::EXP;
597   }
598 
599   static bool isAtomicNoRet(const MachineInstr &MI) {
600     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
601   }
602 
603   bool isAtomicNoRet(uint16_t Opcode) const {
604     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
605   }
606 
607   static bool isAtomicRet(const MachineInstr &MI) {
608     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
609   }
610 
611   bool isAtomicRet(uint16_t Opcode) const {
612     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
613   }
614 
615   static bool isAtomic(const MachineInstr &MI) {
616     return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
617                                    SIInstrFlags::IsAtomicNoRet);
618   }
619 
620   bool isAtomic(uint16_t Opcode) const {
621     return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
622                                   SIInstrFlags::IsAtomicNoRet);
623   }
624 
625   static bool isWQM(const MachineInstr &MI) {
626     return MI.getDesc().TSFlags & SIInstrFlags::WQM;
627   }
628 
629   bool isWQM(uint16_t Opcode) const {
630     return get(Opcode).TSFlags & SIInstrFlags::WQM;
631   }
632 
633   static bool isDisableWQM(const MachineInstr &MI) {
634     return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
635   }
636 
637   bool isDisableWQM(uint16_t Opcode) const {
638     return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
639   }
640 
641   static bool isVGPRSpill(const MachineInstr &MI) {
642     return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
643   }
644 
645   bool isVGPRSpill(uint16_t Opcode) const {
646     return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
647   }
648 
649   static bool isSGPRSpill(const MachineInstr &MI) {
650     return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill;
651   }
652 
653   bool isSGPRSpill(uint16_t Opcode) const {
654     return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
655   }
656 
657   static bool isWWMRegSpillOpcode(uint16_t Opcode) {
658     return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
659            Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE;
660   }
661 
662   static bool isDPP(const MachineInstr &MI) {
663     return MI.getDesc().TSFlags & SIInstrFlags::DPP;
664   }
665 
666   bool isDPP(uint16_t Opcode) const {
667     return get(Opcode).TSFlags & SIInstrFlags::DPP;
668   }
669 
670   static bool isTRANS(const MachineInstr &MI) {
671     return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
672   }
673 
674   bool isTRANS(uint16_t Opcode) const {
675     return get(Opcode).TSFlags & SIInstrFlags::TRANS;
676   }
677 
678   static bool isVOP3P(const MachineInstr &MI) {
679     return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
680   }
681 
682   bool isVOP3P(uint16_t Opcode) const {
683     return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
684   }
685 
686   static bool isVINTRP(const MachineInstr &MI) {
687     return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
688   }
689 
690   bool isVINTRP(uint16_t Opcode) const {
691     return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
692   }
693 
694   static bool isMAI(const MachineInstr &MI) {
695     return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
696   }
697 
698   bool isMAI(uint16_t Opcode) const {
699     return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
700   }
701 
702   static bool isMFMA(const MachineInstr &MI) {
703     return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
704            MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
705   }
706 
707   static bool isDOT(const MachineInstr &MI) {
708     return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
709   }
710 
711   static bool isWMMA(const MachineInstr &MI) {
712     return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
713   }
714 
715   bool isWMMA(uint16_t Opcode) const {
716     return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
717   }
718 
719   static bool isMFMAorWMMA(const MachineInstr &MI) {
720     return isMFMA(MI) || isWMMA(MI);
721   }
722 
723   bool isDOT(uint16_t Opcode) const {
724     return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
725   }
726 
727   static bool isLDSDIR(const MachineInstr &MI) {
728     return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
729   }
730 
731   bool isLDSDIR(uint16_t Opcode) const {
732     return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
733   }
734 
735   static bool isVINTERP(const MachineInstr &MI) {
736     return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
737   }
738 
739   bool isVINTERP(uint16_t Opcode) const {
740     return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
741   }
742 
743   static bool isScalarUnit(const MachineInstr &MI) {
744     return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
745   }
746 
747   static bool usesVM_CNT(const MachineInstr &MI) {
748     return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
749   }
750 
751   static bool usesLGKM_CNT(const MachineInstr &MI) {
752     return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
753   }
754 
755   static bool sopkIsZext(const MachineInstr &MI) {
756     return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
757   }
758 
759   bool sopkIsZext(uint16_t Opcode) const {
760     return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
761   }
762 
763   /// \returns true if this is an s_store_dword* instruction. This is more
764   /// specific than isSMEM && mayStore.
765   static bool isScalarStore(const MachineInstr &MI) {
766     return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
767   }
768 
769   bool isScalarStore(uint16_t Opcode) const {
770     return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
771   }
772 
773   static bool isFixedSize(const MachineInstr &MI) {
774     return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
775   }
776 
777   bool isFixedSize(uint16_t Opcode) const {
778     return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
779   }
780 
781   static bool hasFPClamp(const MachineInstr &MI) {
782     return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
783   }
784 
785   bool hasFPClamp(uint16_t Opcode) const {
786     return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
787   }
788 
789   static bool hasIntClamp(const MachineInstr &MI) {
790     return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
791   }
792 
793   uint64_t getClampMask(const MachineInstr &MI) const {
794     const uint64_t ClampFlags = SIInstrFlags::FPClamp |
795                                 SIInstrFlags::IntClamp |
796                                 SIInstrFlags::ClampLo |
797                                 SIInstrFlags::ClampHi;
798       return MI.getDesc().TSFlags & ClampFlags;
799   }
800 
801   static bool usesFPDPRounding(const MachineInstr &MI) {
802     return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
803   }
804 
805   bool usesFPDPRounding(uint16_t Opcode) const {
806     return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
807   }
808 
809   static bool isFPAtomic(const MachineInstr &MI) {
810     return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
811   }
812 
813   bool isFPAtomic(uint16_t Opcode) const {
814     return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
815   }
816 
817   static bool isNeverUniform(const MachineInstr &MI) {
818     return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
819   }
820 
821   static bool doesNotReadTiedSource(const MachineInstr &MI) {
822     return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
823   }
824 
825   bool doesNotReadTiedSource(uint16_t Opcode) const {
826     return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
827   }
828 
829   bool isVGPRCopy(const MachineInstr &MI) const {
830     assert(MI.isCopy());
831     Register Dest = MI.getOperand(0).getReg();
832     const MachineFunction &MF = *MI.getParent()->getParent();
833     const MachineRegisterInfo &MRI = MF.getRegInfo();
834     return !RI.isSGPRReg(MRI, Dest);
835   }
836 
837   bool hasVGPRUses(const MachineInstr &MI) const {
838     const MachineFunction &MF = *MI.getParent()->getParent();
839     const MachineRegisterInfo &MRI = MF.getRegInfo();
840     return llvm::any_of(MI.explicit_uses(),
841                         [&MRI, this](const MachineOperand &MO) {
842       return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
843   }
844 
845   /// Return true if the instruction modifies the mode register.q
846   static bool modifiesModeRegister(const MachineInstr &MI);
847 
848   /// Whether we must prevent this instruction from executing with EXEC = 0.
849   bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
850 
851   /// Returns true if the instruction could potentially depend on the value of
852   /// exec. If false, exec dependencies may safely be ignored.
853   bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
854 
855   bool isInlineConstant(const APInt &Imm) const;
856 
857   bool isInlineConstant(const APFloat &Imm) const {
858     return isInlineConstant(Imm.bitcastToAPInt());
859   }
860 
861   // Returns true if this non-register operand definitely does not need to be
862   // encoded as a 32-bit literal. Note that this function handles all kinds of
863   // operands, not just immediates.
864   //
865   // Some operands like FrameIndexes could resolve to an inline immediate value
866   // that will not require an additional 4-bytes; this function assumes that it
867   // will.
868   bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
869 
870   bool isInlineConstant(const MachineOperand &MO,
871                         const MCOperandInfo &OpInfo) const {
872     return isInlineConstant(MO, OpInfo.OperandType);
873   }
874 
875   /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
876   /// be an inline immediate.
877   bool isInlineConstant(const MachineInstr &MI,
878                         const MachineOperand &UseMO,
879                         const MachineOperand &DefMO) const {
880     assert(UseMO.getParent() == &MI);
881     int OpIdx = UseMO.getOperandNo();
882     if (OpIdx >= MI.getDesc().NumOperands)
883       return false;
884 
885     return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]);
886   }
887 
888   /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
889   /// immediate.
890   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
891     const MachineOperand &MO = MI.getOperand(OpIdx);
892     return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType);
893   }
894 
895   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
896                         const MachineOperand &MO) const {
897     if (OpIdx >= MI.getDesc().NumOperands)
898       return false;
899 
900     if (MI.isCopy()) {
901       unsigned Size = getOpSize(MI, OpIdx);
902       assert(Size == 8 || Size == 4);
903 
904       uint8_t OpType = (Size == 8) ?
905         AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
906       return isInlineConstant(MO, OpType);
907     }
908 
909     return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType);
910   }
911 
912   bool isInlineConstant(const MachineOperand &MO) const {
913     return isInlineConstant(*MO.getParent(), MO.getOperandNo());
914   }
915 
916   bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
917                          const MachineOperand &MO) const;
918 
919   /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
920   /// This function will return false if you pass it a 32-bit instruction.
921   bool hasVALU32BitEncoding(unsigned Opcode) const;
922 
923   /// Returns true if this operand uses the constant bus.
924   bool usesConstantBus(const MachineRegisterInfo &MRI,
925                        const MachineOperand &MO,
926                        const MCOperandInfo &OpInfo) const;
927 
928   /// Return true if this instruction has any modifiers.
929   ///  e.g. src[012]_mod, omod, clamp.
930   bool hasModifiers(unsigned Opcode) const;
931 
932   bool hasModifiersSet(const MachineInstr &MI,
933                        unsigned OpName) const;
934   bool hasAnyModifiersSet(const MachineInstr &MI) const;
935 
936   bool canShrink(const MachineInstr &MI,
937                  const MachineRegisterInfo &MRI) const;
938 
939   MachineInstr *buildShrunkInst(MachineInstr &MI,
940                                 unsigned NewOpcode) const;
941 
942   bool verifyInstruction(const MachineInstr &MI,
943                          StringRef &ErrInfo) const override;
944 
945   unsigned getVALUOp(const MachineInstr &MI) const;
946 
947   void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
948                              MachineBasicBlock::iterator MBBI,
949                              const DebugLoc &DL, Register Reg,
950                              bool IsSCCLive) const;
951 
952   void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
953                    MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
954                    Register Reg) const;
955 
956   /// Return the correct register class for \p OpNo.  For target-specific
957   /// instructions, this will return the register class that has been defined
958   /// in tablegen.  For generic instructions, like REG_SEQUENCE it will return
959   /// the register class of its machine operand.
960   /// to infer the correct register class base on the other operands.
961   const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
962                                            unsigned OpNo) const;
963 
964   /// Return the size in bytes of the operand OpNo on the given
965   // instruction opcode.
966   unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
967     const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo];
968 
969     if (OpInfo.RegClass == -1) {
970       // If this is an immediate operand, this must be a 32-bit literal.
971       assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
972       return 4;
973     }
974 
975     return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
976   }
977 
978   /// This form should usually be preferred since it handles operands
979   /// with unknown register classes.
980   unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
981     const MachineOperand &MO = MI.getOperand(OpNo);
982     if (MO.isReg()) {
983       if (unsigned SubReg = MO.getSubReg()) {
984         return RI.getSubRegIdxSize(SubReg) / 8;
985       }
986     }
987     return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
988   }
989 
990   /// Legalize the \p OpIndex operand of this instruction by inserting
991   /// a MOV.  For example:
992   /// ADD_I32_e32 VGPR0, 15
993   /// to
994   /// MOV VGPR1, 15
995   /// ADD_I32_e32 VGPR0, VGPR1
996   ///
997   /// If the operand being legalized is a register, then a COPY will be used
998   /// instead of MOV.
999   void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
1000 
1001   /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
1002   /// for \p MI.
1003   bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
1004                       const MachineOperand *MO = nullptr) const;
1005 
1006   /// Check if \p MO would be a valid operand for the given operand
1007   /// definition \p OpInfo. Note this does not attempt to validate constant bus
1008   /// restrictions (e.g. literal constant usage).
1009   bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
1010                           const MCOperandInfo &OpInfo,
1011                           const MachineOperand &MO) const;
1012 
1013   /// Check if \p MO (a register operand) is a legal register for the
1014   /// given operand description.
1015   bool isLegalRegOperand(const MachineRegisterInfo &MRI,
1016                          const MCOperandInfo &OpInfo,
1017                          const MachineOperand &MO) const;
1018 
1019   /// Legalize operands in \p MI by either commuting it or inserting a
1020   /// copy of src1.
1021   void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1022 
1023   /// Fix operands in \p MI to satisfy constant bus requirements.
1024   void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1025 
1026   /// Copy a value from a VGPR (\p SrcReg) to SGPR.  This function can only
1027   /// be used when it is know that the value in SrcReg is same across all
1028   /// threads in the wave.
1029   /// \returns The SGPR register that \p SrcReg was copied to.
1030   Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
1031                               MachineRegisterInfo &MRI) const;
1032 
1033   void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1034   void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1035 
1036   void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
1037                               MachineBasicBlock::iterator I,
1038                               const TargetRegisterClass *DstRC,
1039                               MachineOperand &Op, MachineRegisterInfo &MRI,
1040                               const DebugLoc &DL) const;
1041 
1042   /// Legalize all operands in this instruction.  This function may create new
1043   /// instructions and control-flow around \p MI.  If present, \p MDT is
1044   /// updated.
1045   /// \returns A new basic block that contains \p MI if new blocks were created.
1046   MachineBasicBlock *
1047   legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
1048 
1049   /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
1050   /// was moved to VGPR. \returns true if succeeded.
1051   bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
1052 
1053   /// Replace the instructions opcode with the equivalent VALU
1054   /// opcode.  This function will also move the users of MachineInstruntions
1055   /// in the \p WorkList to the VALU if necessary. If present, \p MDT is
1056   /// updated.
1057   void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const;
1058 
1059   void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT,
1060                       MachineInstr &Inst) const;
1061 
1062   void insertNoop(MachineBasicBlock &MBB,
1063                   MachineBasicBlock::iterator MI) const override;
1064 
1065   void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1066                    unsigned Quantity) const override;
1067 
1068   void insertReturn(MachineBasicBlock &MBB) const;
1069   /// Return the number of wait states that result from executing this
1070   /// instruction.
1071   static unsigned getNumWaitStates(const MachineInstr &MI);
1072 
1073   /// Returns the operand named \p Op.  If \p MI does not have an
1074   /// operand named \c Op, this function returns nullptr.
1075   LLVM_READONLY
1076   MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
1077 
1078   LLVM_READONLY
1079   const MachineOperand *getNamedOperand(const MachineInstr &MI,
1080                                         unsigned OpName) const {
1081     return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
1082   }
1083 
1084   /// Get required immediate operand
1085   int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
1086     int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
1087     return MI.getOperand(Idx).getImm();
1088   }
1089 
1090   uint64_t getDefaultRsrcDataFormat() const;
1091   uint64_t getScratchRsrcWords23() const;
1092 
1093   bool isLowLatencyInstruction(const MachineInstr &MI) const;
1094   bool isHighLatencyDef(int Opc) const override;
1095 
1096   /// Return the descriptor of the target-specific machine instruction
1097   /// that corresponds to the specified pseudo or native opcode.
1098   const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
1099     return get(pseudoToMCOpcode(Opcode));
1100   }
1101 
1102   unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1103   unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1104 
1105   unsigned isLoadFromStackSlot(const MachineInstr &MI,
1106                                int &FrameIndex) const override;
1107   unsigned isStoreToStackSlot(const MachineInstr &MI,
1108                               int &FrameIndex) const override;
1109 
1110   unsigned getInstBundleSize(const MachineInstr &MI) const;
1111   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
1112 
1113   bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
1114 
1115   bool isNonUniformBranchInstr(MachineInstr &Instr) const;
1116 
1117   void convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
1118                                  MachineBasicBlock *IfEnd) const;
1119 
1120   void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
1121                                    MachineBasicBlock *LoopEnd) const;
1122 
1123   std::pair<unsigned, unsigned>
1124   decomposeMachineOperandsTargetFlags(unsigned TF) const override;
1125 
1126   ArrayRef<std::pair<int, const char *>>
1127   getSerializableTargetIndices() const override;
1128 
1129   ArrayRef<std::pair<unsigned, const char *>>
1130   getSerializableDirectMachineOperandTargetFlags() const override;
1131 
1132   ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
1133   getSerializableMachineMemOperandTargetFlags() const override;
1134 
1135   ScheduleHazardRecognizer *
1136   CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
1137                                  const ScheduleDAG *DAG) const override;
1138 
1139   ScheduleHazardRecognizer *
1140   CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
1141 
1142   ScheduleHazardRecognizer *
1143   CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
1144                                  const ScheduleDAGMI *DAG) const override;
1145 
1146   bool isBasicBlockPrologue(const MachineInstr &MI) const override;
1147 
1148   MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
1149                                          MachineBasicBlock::iterator InsPt,
1150                                          const DebugLoc &DL, Register Src,
1151                                          Register Dst) const override;
1152 
1153   MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
1154                                     MachineBasicBlock::iterator InsPt,
1155                                     const DebugLoc &DL, Register Src,
1156                                     unsigned SrcSubReg,
1157                                     Register Dst) const override;
1158 
1159   bool isWave32() const;
1160 
1161   /// Return a partially built integer add instruction without carry.
1162   /// Caller must add source operands.
1163   /// For pre-GFX9 it will generate unused carry destination operand.
1164   /// TODO: After GFX9 it should return a no-carry operation.
1165   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1166                                     MachineBasicBlock::iterator I,
1167                                     const DebugLoc &DL,
1168                                     Register DestReg) const;
1169 
1170   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1171                                     MachineBasicBlock::iterator I,
1172                                     const DebugLoc &DL,
1173                                     Register DestReg,
1174                                     RegScavenger &RS) const;
1175 
1176   static bool isKillTerminator(unsigned Opcode);
1177   const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
1178 
1179   static bool isLegalMUBUFImmOffset(unsigned Imm) {
1180     return isUInt<12>(Imm);
1181   }
1182 
1183   static unsigned getMaxMUBUFImmOffset();
1184 
1185   bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1186                         Align Alignment = Align(4)) const;
1187 
1188   /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1189   /// encoded instruction. If \p Signed, this is for an instruction that
1190   /// interprets the offset as signed.
1191   bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
1192                          uint64_t FlatVariant) const;
1193 
1194   /// Split \p COffsetVal into {immediate offset field, remainder offset}
1195   /// values.
1196   std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
1197                                               unsigned AddrSpace,
1198                                               uint64_t FlatVariant) const;
1199 
1200   /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1201   /// Return -1 if the target-specific opcode for the pseudo instruction does
1202   /// not exist. If Opcode is not a pseudo instruction, this is identity.
1203   int pseudoToMCOpcode(int Opcode) const;
1204 
1205   /// \brief Check if this instruction should only be used by assembler.
1206   /// Return true if this opcode should not be used by codegen.
1207   bool isAsmOnlyOpcode(int MCOp) const;
1208 
1209   const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
1210                                          const TargetRegisterInfo *TRI,
1211                                          const MachineFunction &MF)
1212     const override;
1213 
1214   void fixImplicitOperands(MachineInstr &MI) const;
1215 
1216   MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
1217                                       ArrayRef<unsigned> Ops,
1218                                       MachineBasicBlock::iterator InsertPt,
1219                                       int FrameIndex,
1220                                       LiveIntervals *LIS = nullptr,
1221                                       VirtRegMap *VRM = nullptr) const override;
1222 
1223   unsigned getInstrLatency(const InstrItineraryData *ItinData,
1224                            const MachineInstr &MI,
1225                            unsigned *PredCost = nullptr) const override;
1226 
1227   InstructionUniformity
1228   getInstructionUniformity(const MachineInstr &MI) const override final;
1229 
1230   InstructionUniformity
1231   getGenericInstructionUniformity(const MachineInstr &MI) const;
1232 
1233   const MIRFormatter *getMIRFormatter() const override {
1234     if (!Formatter.get())
1235       Formatter = std::make_unique<AMDGPUMIRFormatter>();
1236     return Formatter.get();
1237   }
1238 
1239   static unsigned getDSShaderTypeValue(const MachineFunction &MF);
1240 
1241   const TargetSchedModel &getSchedModel() const { return SchedModel; }
1242 
1243   // Enforce operand's \p OpName even alignment if required by target.
1244   // This is used if an operand is a 32 bit register but needs to be aligned
1245   // regardless.
1246   void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const;
1247 };
1248 
1249 /// \brief Returns true if a reg:subreg pair P has a TRC class
1250 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
1251                          const TargetRegisterClass &TRC,
1252                          MachineRegisterInfo &MRI) {
1253   auto *RC = MRI.getRegClass(P.Reg);
1254   if (!P.SubReg)
1255     return RC == &TRC;
1256   auto *TRI = MRI.getTargetRegisterInfo();
1257   return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
1258 }
1259 
1260 /// \brief Create RegSubRegPair from a register MachineOperand
1261 inline
1262 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1263   assert(O.isReg());
1264   return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
1265 }
1266 
1267 /// \brief Return the SubReg component from REG_SEQUENCE
1268 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1269                                                     unsigned SubReg);
1270 
1271 /// \brief Return the defining instruction for a given reg:subreg pair
1272 /// skipping copy like instructions and subreg-manipulation pseudos.
1273 /// Following another subreg of a reg:subreg isn't supported.
1274 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1275                                MachineRegisterInfo &MRI);
1276 
1277 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1278 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1279 /// attempt to track between blocks.
1280 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1281                                 Register VReg,
1282                                 const MachineInstr &DefMI,
1283                                 const MachineInstr &UseMI);
1284 
1285 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1286 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1287 /// track between blocks.
1288 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1289                                    Register VReg,
1290                                    const MachineInstr &DefMI);
1291 
1292 namespace AMDGPU {
1293 
1294   LLVM_READONLY
1295   int getVOPe64(uint16_t Opcode);
1296 
1297   LLVM_READONLY
1298   int getVOPe32(uint16_t Opcode);
1299 
1300   LLVM_READONLY
1301   int getSDWAOp(uint16_t Opcode);
1302 
1303   LLVM_READONLY
1304   int getDPPOp32(uint16_t Opcode);
1305 
1306   LLVM_READONLY
1307   int getDPPOp64(uint16_t Opcode);
1308 
1309   LLVM_READONLY
1310   int getBasicFromSDWAOp(uint16_t Opcode);
1311 
1312   LLVM_READONLY
1313   int getCommuteRev(uint16_t Opcode);
1314 
1315   LLVM_READONLY
1316   int getCommuteOrig(uint16_t Opcode);
1317 
1318   LLVM_READONLY
1319   int getAddr64Inst(uint16_t Opcode);
1320 
1321   /// Check if \p Opcode is an Addr64 opcode.
1322   ///
1323   /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1324   LLVM_READONLY
1325   int getIfAddr64Inst(uint16_t Opcode);
1326 
1327   LLVM_READONLY
1328   int getAtomicNoRetOp(uint16_t Opcode);
1329 
1330   LLVM_READONLY
1331   int getSOPKOp(uint16_t Opcode);
1332 
1333   /// \returns SADDR form of a FLAT Global instruction given an \p Opcode
1334   /// of a VADDR form.
1335   LLVM_READONLY
1336   int getGlobalSaddrOp(uint16_t Opcode);
1337 
1338   /// \returns VADDR form of a FLAT Global instruction given an \p Opcode
1339   /// of a SADDR form.
1340   LLVM_READONLY
1341   int getGlobalVaddrOp(uint16_t Opcode);
1342 
1343   LLVM_READONLY
1344   int getVCMPXNoSDstOp(uint16_t Opcode);
1345 
1346   /// \returns ST form with only immediate offset of a FLAT Scratch instruction
1347   /// given an \p Opcode of an SS (SADDR) form.
1348   LLVM_READONLY
1349   int getFlatScratchInstSTfromSS(uint16_t Opcode);
1350 
1351   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1352   /// of an SVS (SADDR + VADDR) form.
1353   LLVM_READONLY
1354   int getFlatScratchInstSVfromSVS(uint16_t Opcode);
1355 
1356   /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
1357   /// of an SV (VADDR) form.
1358   LLVM_READONLY
1359   int getFlatScratchInstSSfromSV(uint16_t Opcode);
1360 
1361   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1362   /// of an SS (SADDR) form.
1363   LLVM_READONLY
1364   int getFlatScratchInstSVfromSS(uint16_t Opcode);
1365 
1366   /// \returns earlyclobber version of a MAC MFMA is exists.
1367   LLVM_READONLY
1368   int getMFMAEarlyClobberOp(uint16_t Opcode);
1369 
1370   /// \returns v_cmpx version of a v_cmp instruction.
1371   LLVM_READONLY
1372   int getVCMPXOpFromVCMP(uint16_t Opcode);
1373 
1374   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1375   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1376   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1377   const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1378 
1379 } // end namespace AMDGPU
1380 
1381 namespace SI {
1382 namespace KernelInputOffsets {
1383 
1384 /// Offsets in bytes from the start of the input buffer
1385 enum Offsets {
1386   NGROUPS_X = 0,
1387   NGROUPS_Y = 4,
1388   NGROUPS_Z = 8,
1389   GLOBAL_SIZE_X = 12,
1390   GLOBAL_SIZE_Y = 16,
1391   GLOBAL_SIZE_Z = 20,
1392   LOCAL_SIZE_X = 24,
1393   LOCAL_SIZE_Y = 28,
1394   LOCAL_SIZE_Z = 32
1395 };
1396 
1397 } // end namespace KernelInputOffsets
1398 } // end namespace SI
1399 
1400 } // end namespace llvm
1401 
1402 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1403