xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h (revision f2530c80db7b29b95368fce956b3a778f096b368)
1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIInstrInfo.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16 
17 #include "AMDGPUInstrInfo.h"
18 #include "SIDefines.h"
19 #include "SIRegisterInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineOperand.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/Support/Compiler.h"
30 #include <cassert>
31 #include <cstdint>
32 
33 #define GET_INSTRINFO_HEADER
34 #include "AMDGPUGenInstrInfo.inc"
35 
36 namespace llvm {
37 
38 class APInt;
39 class MachineDominatorTree;
40 class MachineRegisterInfo;
41 class RegScavenger;
42 class GCNSubtarget;
43 class TargetRegisterClass;
44 
45 class SIInstrInfo final : public AMDGPUGenInstrInfo {
46 private:
47   const SIRegisterInfo RI;
48   const GCNSubtarget &ST;
49 
50   // The inverse predicate should have the negative value.
51   enum BranchPredicate {
52     INVALID_BR = 0,
53     SCC_TRUE = 1,
54     SCC_FALSE = -1,
55     VCCNZ = 2,
56     VCCZ = -2,
57     EXECNZ = -3,
58     EXECZ = 3
59   };
60 
61   using SetVectorType = SmallSetVector<MachineInstr *, 32>;
62 
63   static unsigned getBranchOpcode(BranchPredicate Cond);
64   static BranchPredicate getBranchPredicate(unsigned Opcode);
65 
66 public:
67   unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
68                               MachineRegisterInfo &MRI,
69                               MachineOperand &SuperReg,
70                               const TargetRegisterClass *SuperRC,
71                               unsigned SubIdx,
72                               const TargetRegisterClass *SubRC) const;
73   MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI,
74                                          MachineRegisterInfo &MRI,
75                                          MachineOperand &SuperReg,
76                                          const TargetRegisterClass *SuperRC,
77                                          unsigned SubIdx,
78                                          const TargetRegisterClass *SubRC) const;
79 private:
80   void swapOperands(MachineInstr &Inst) const;
81 
82   bool moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
83                         MachineDominatorTree *MDT = nullptr) const;
84 
85   void lowerScalarAbs(SetVectorType &Worklist,
86                       MachineInstr &Inst) const;
87 
88   void lowerScalarXnor(SetVectorType &Worklist,
89                        MachineInstr &Inst) const;
90 
91   void splitScalarNotBinop(SetVectorType &Worklist,
92                            MachineInstr &Inst,
93                            unsigned Opcode) const;
94 
95   void splitScalarBinOpN2(SetVectorType &Worklist,
96                           MachineInstr &Inst,
97                           unsigned Opcode) const;
98 
99   void splitScalar64BitUnaryOp(SetVectorType &Worklist,
100                                MachineInstr &Inst, unsigned Opcode) const;
101 
102   void splitScalar64BitAddSub(SetVectorType &Worklist, MachineInstr &Inst,
103                               MachineDominatorTree *MDT = nullptr) const;
104 
105   void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst,
106                                 unsigned Opcode,
107                                 MachineDominatorTree *MDT = nullptr) const;
108 
109   void splitScalar64BitXnor(SetVectorType &Worklist, MachineInstr &Inst,
110                                 MachineDominatorTree *MDT = nullptr) const;
111 
112   void splitScalar64BitBCNT(SetVectorType &Worklist,
113                             MachineInstr &Inst) const;
114   void splitScalar64BitBFE(SetVectorType &Worklist,
115                            MachineInstr &Inst) const;
116   void movePackToVALU(SetVectorType &Worklist,
117                       MachineRegisterInfo &MRI,
118                       MachineInstr &Inst) const;
119 
120   void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI,
121                                     SetVectorType &Worklist) const;
122 
123   void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
124                                     MachineInstr &SCCDefInst,
125                                     SetVectorType &Worklist) const;
126 
127   const TargetRegisterClass *
128   getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
129 
130   bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
131                                     const MachineInstr &MIb) const;
132 
133   unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
134 
135 protected:
136   bool swapSourceModifiers(MachineInstr &MI,
137                            MachineOperand &Src0, unsigned Src0OpName,
138                            MachineOperand &Src1, unsigned Src1OpName) const;
139 
140   MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
141                                        unsigned OpIdx0,
142                                        unsigned OpIdx1) const override;
143 
144 public:
145   enum TargetOperandFlags {
146     MO_MASK = 0xf,
147 
148     MO_NONE = 0,
149     // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
150     MO_GOTPCREL = 1,
151     // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
152     MO_GOTPCREL32 = 2,
153     MO_GOTPCREL32_LO = 2,
154     // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
155     MO_GOTPCREL32_HI = 3,
156     // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
157     MO_REL32 = 4,
158     MO_REL32_LO = 4,
159     // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
160     MO_REL32_HI = 5,
161 
162     MO_LONG_BRANCH_FORWARD = 6,
163     MO_LONG_BRANCH_BACKWARD = 7,
164 
165     MO_ABS32_LO = 8,
166     MO_ABS32_HI = 9,
167   };
168 
169   explicit SIInstrInfo(const GCNSubtarget &ST);
170 
171   const SIRegisterInfo &getRegisterInfo() const {
172     return RI;
173   }
174 
175   bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
176                                          AliasAnalysis *AA) const override;
177 
178   bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
179                                int64_t &Offset1,
180                                int64_t &Offset2) const override;
181 
182   bool getMemOperandWithOffset(const MachineInstr &LdSt,
183                                const MachineOperand *&BaseOp,
184                                int64_t &Offset,
185                                const TargetRegisterInfo *TRI) const final;
186 
187   bool shouldClusterMemOps(const MachineOperand &BaseOp1,
188                            const MachineOperand &BaseOp2,
189                            unsigned NumLoads) const override;
190 
191   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
192                                int64_t Offset1, unsigned NumLoads) const override;
193 
194   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
195                    const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
196                    bool KillSrc) const override;
197 
198   unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI,
199                                     RegScavenger *RS, unsigned TmpReg,
200                                     unsigned Offset, unsigned Size) const;
201 
202   void materializeImmediate(MachineBasicBlock &MBB,
203                             MachineBasicBlock::iterator MI,
204                             const DebugLoc &DL,
205                             unsigned DestReg,
206                             int64_t Value) const;
207 
208   const TargetRegisterClass *getPreferredSelectRegClass(
209                                unsigned Size) const;
210 
211   unsigned insertNE(MachineBasicBlock *MBB,
212                     MachineBasicBlock::iterator I, const DebugLoc &DL,
213                     unsigned SrcReg, int Value) const;
214 
215   unsigned insertEQ(MachineBasicBlock *MBB,
216                     MachineBasicBlock::iterator I, const DebugLoc &DL,
217                     unsigned SrcReg, int Value)  const;
218 
219   void storeRegToStackSlot(MachineBasicBlock &MBB,
220                            MachineBasicBlock::iterator MI, unsigned SrcReg,
221                            bool isKill, int FrameIndex,
222                            const TargetRegisterClass *RC,
223                            const TargetRegisterInfo *TRI) const override;
224 
225   void loadRegFromStackSlot(MachineBasicBlock &MBB,
226                             MachineBasicBlock::iterator MI, unsigned DestReg,
227                             int FrameIndex, const TargetRegisterClass *RC,
228                             const TargetRegisterInfo *TRI) const override;
229 
230   bool expandPostRAPseudo(MachineInstr &MI) const override;
231 
232   // Returns an opcode that can be used to move a value to a \p DstRC
233   // register.  If there is no hardware instruction that can store to \p
234   // DstRC, then AMDGPU::COPY is returned.
235   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
236 
237   LLVM_READONLY
238   int commuteOpcode(unsigned Opc) const;
239 
240   LLVM_READONLY
241   inline int commuteOpcode(const MachineInstr &MI) const {
242     return commuteOpcode(MI.getOpcode());
243   }
244 
245   bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
246                              unsigned &SrcOpIdx2) const override;
247 
248   bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0,
249    unsigned & SrcOpIdx1) const;
250 
251   bool isBranchOffsetInRange(unsigned BranchOpc,
252                              int64_t BrOffset) const override;
253 
254   MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
255 
256   unsigned insertIndirectBranch(MachineBasicBlock &MBB,
257                                 MachineBasicBlock &NewDestBB,
258                                 const DebugLoc &DL,
259                                 int64_t BrOffset,
260                                 RegScavenger *RS = nullptr) const override;
261 
262   bool analyzeBranchImpl(MachineBasicBlock &MBB,
263                          MachineBasicBlock::iterator I,
264                          MachineBasicBlock *&TBB,
265                          MachineBasicBlock *&FBB,
266                          SmallVectorImpl<MachineOperand> &Cond,
267                          bool AllowModify) const;
268 
269   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
270                      MachineBasicBlock *&FBB,
271                      SmallVectorImpl<MachineOperand> &Cond,
272                      bool AllowModify = false) const override;
273 
274   unsigned removeBranch(MachineBasicBlock &MBB,
275                         int *BytesRemoved = nullptr) const override;
276 
277   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
278                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
279                         const DebugLoc &DL,
280                         int *BytesAdded = nullptr) const override;
281 
282   bool reverseBranchCondition(
283     SmallVectorImpl<MachineOperand> &Cond) const override;
284 
285   bool canInsertSelect(const MachineBasicBlock &MBB,
286                        ArrayRef<MachineOperand> Cond,
287                        unsigned TrueReg, unsigned FalseReg,
288                        int &CondCycles,
289                        int &TrueCycles, int &FalseCycles) const override;
290 
291   void insertSelect(MachineBasicBlock &MBB,
292                     MachineBasicBlock::iterator I, const DebugLoc &DL,
293                     unsigned DstReg, ArrayRef<MachineOperand> Cond,
294                     unsigned TrueReg, unsigned FalseReg) const override;
295 
296   void insertVectorSelect(MachineBasicBlock &MBB,
297                           MachineBasicBlock::iterator I, const DebugLoc &DL,
298                           unsigned DstReg, ArrayRef<MachineOperand> Cond,
299                           unsigned TrueReg, unsigned FalseReg) const;
300 
301   unsigned getAddressSpaceForPseudoSourceKind(
302              unsigned Kind) const override;
303 
304   bool
305   areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
306                                   const MachineInstr &MIb,
307                                   AliasAnalysis *AA = nullptr) const override;
308 
309   bool isFoldableCopy(const MachineInstr &MI) const;
310 
311   bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
312                      MachineRegisterInfo *MRI) const final;
313 
314   unsigned getMachineCSELookAheadLimit() const override { return 500; }
315 
316   MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
317                                       MachineInstr &MI,
318                                       LiveVariables *LV) const override;
319 
320   bool isSchedulingBoundary(const MachineInstr &MI,
321                             const MachineBasicBlock *MBB,
322                             const MachineFunction &MF) const override;
323 
324   static bool isSALU(const MachineInstr &MI) {
325     return MI.getDesc().TSFlags & SIInstrFlags::SALU;
326   }
327 
328   bool isSALU(uint16_t Opcode) const {
329     return get(Opcode).TSFlags & SIInstrFlags::SALU;
330   }
331 
332   static bool isVALU(const MachineInstr &MI) {
333     return MI.getDesc().TSFlags & SIInstrFlags::VALU;
334   }
335 
336   bool isVALU(uint16_t Opcode) const {
337     return get(Opcode).TSFlags & SIInstrFlags::VALU;
338   }
339 
340   static bool isVMEM(const MachineInstr &MI) {
341     return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI);
342   }
343 
344   bool isVMEM(uint16_t Opcode) const {
345     return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode);
346   }
347 
348   static bool isSOP1(const MachineInstr &MI) {
349     return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
350   }
351 
352   bool isSOP1(uint16_t Opcode) const {
353     return get(Opcode).TSFlags & SIInstrFlags::SOP1;
354   }
355 
356   static bool isSOP2(const MachineInstr &MI) {
357     return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
358   }
359 
360   bool isSOP2(uint16_t Opcode) const {
361     return get(Opcode).TSFlags & SIInstrFlags::SOP2;
362   }
363 
364   static bool isSOPC(const MachineInstr &MI) {
365     return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
366   }
367 
368   bool isSOPC(uint16_t Opcode) const {
369     return get(Opcode).TSFlags & SIInstrFlags::SOPC;
370   }
371 
372   static bool isSOPK(const MachineInstr &MI) {
373     return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
374   }
375 
376   bool isSOPK(uint16_t Opcode) const {
377     return get(Opcode).TSFlags & SIInstrFlags::SOPK;
378   }
379 
380   static bool isSOPP(const MachineInstr &MI) {
381     return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
382   }
383 
384   bool isSOPP(uint16_t Opcode) const {
385     return get(Opcode).TSFlags & SIInstrFlags::SOPP;
386   }
387 
388   static bool isPacked(const MachineInstr &MI) {
389     return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
390   }
391 
392   bool isPacked(uint16_t Opcode) const {
393     return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
394   }
395 
396   static bool isVOP1(const MachineInstr &MI) {
397     return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
398   }
399 
400   bool isVOP1(uint16_t Opcode) const {
401     return get(Opcode).TSFlags & SIInstrFlags::VOP1;
402   }
403 
404   static bool isVOP2(const MachineInstr &MI) {
405     return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
406   }
407 
408   bool isVOP2(uint16_t Opcode) const {
409     return get(Opcode).TSFlags & SIInstrFlags::VOP2;
410   }
411 
412   static bool isVOP3(const MachineInstr &MI) {
413     return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
414   }
415 
416   bool isVOP3(uint16_t Opcode) const {
417     return get(Opcode).TSFlags & SIInstrFlags::VOP3;
418   }
419 
420   static bool isSDWA(const MachineInstr &MI) {
421     return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
422   }
423 
424   bool isSDWA(uint16_t Opcode) const {
425     return get(Opcode).TSFlags & SIInstrFlags::SDWA;
426   }
427 
428   static bool isVOPC(const MachineInstr &MI) {
429     return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
430   }
431 
432   bool isVOPC(uint16_t Opcode) const {
433     return get(Opcode).TSFlags & SIInstrFlags::VOPC;
434   }
435 
436   static bool isMUBUF(const MachineInstr &MI) {
437     return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
438   }
439 
440   bool isMUBUF(uint16_t Opcode) const {
441     return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
442   }
443 
444   static bool isMTBUF(const MachineInstr &MI) {
445     return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
446   }
447 
448   bool isMTBUF(uint16_t Opcode) const {
449     return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
450   }
451 
452   static bool isSMRD(const MachineInstr &MI) {
453     return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
454   }
455 
456   bool isSMRD(uint16_t Opcode) const {
457     return get(Opcode).TSFlags & SIInstrFlags::SMRD;
458   }
459 
460   bool isBufferSMRD(const MachineInstr &MI) const;
461 
462   static bool isDS(const MachineInstr &MI) {
463     return MI.getDesc().TSFlags & SIInstrFlags::DS;
464   }
465 
466   bool isDS(uint16_t Opcode) const {
467     return get(Opcode).TSFlags & SIInstrFlags::DS;
468   }
469 
470   bool isAlwaysGDS(uint16_t Opcode) const;
471 
472   static bool isMIMG(const MachineInstr &MI) {
473     return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
474   }
475 
476   bool isMIMG(uint16_t Opcode) const {
477     return get(Opcode).TSFlags & SIInstrFlags::MIMG;
478   }
479 
480   static bool isGather4(const MachineInstr &MI) {
481     return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
482   }
483 
484   bool isGather4(uint16_t Opcode) const {
485     return get(Opcode).TSFlags & SIInstrFlags::Gather4;
486   }
487 
488   static bool isFLAT(const MachineInstr &MI) {
489     return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
490   }
491 
492   // Is a FLAT encoded instruction which accesses a specific segment,
493   // i.e. global_* or scratch_*.
494   static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
495     auto Flags = MI.getDesc().TSFlags;
496     return (Flags & SIInstrFlags::FLAT) && !(Flags & SIInstrFlags::LGKM_CNT);
497   }
498 
499   // FIXME: Make this more precise
500   static bool isFLATScratch(const MachineInstr &MI) {
501     return isSegmentSpecificFLAT(MI);
502   }
503 
504   // Any FLAT encoded instruction, including global_* and scratch_*.
505   bool isFLAT(uint16_t Opcode) const {
506     return get(Opcode).TSFlags & SIInstrFlags::FLAT;
507   }
508 
509   static bool isEXP(const MachineInstr &MI) {
510     return MI.getDesc().TSFlags & SIInstrFlags::EXP;
511   }
512 
513   bool isEXP(uint16_t Opcode) const {
514     return get(Opcode).TSFlags & SIInstrFlags::EXP;
515   }
516 
517   static bool isWQM(const MachineInstr &MI) {
518     return MI.getDesc().TSFlags & SIInstrFlags::WQM;
519   }
520 
521   bool isWQM(uint16_t Opcode) const {
522     return get(Opcode).TSFlags & SIInstrFlags::WQM;
523   }
524 
525   static bool isDisableWQM(const MachineInstr &MI) {
526     return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
527   }
528 
529   bool isDisableWQM(uint16_t Opcode) const {
530     return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
531   }
532 
533   static bool isVGPRSpill(const MachineInstr &MI) {
534     return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
535   }
536 
537   bool isVGPRSpill(uint16_t Opcode) const {
538     return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
539   }
540 
541   static bool isSGPRSpill(const MachineInstr &MI) {
542     return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill;
543   }
544 
545   bool isSGPRSpill(uint16_t Opcode) const {
546     return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
547   }
548 
549   static bool isDPP(const MachineInstr &MI) {
550     return MI.getDesc().TSFlags & SIInstrFlags::DPP;
551   }
552 
553   bool isDPP(uint16_t Opcode) const {
554     return get(Opcode).TSFlags & SIInstrFlags::DPP;
555   }
556 
557   static bool isVOP3P(const MachineInstr &MI) {
558     return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
559   }
560 
561   bool isVOP3P(uint16_t Opcode) const {
562     return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
563   }
564 
565   static bool isVINTRP(const MachineInstr &MI) {
566     return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
567   }
568 
569   bool isVINTRP(uint16_t Opcode) const {
570     return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
571   }
572 
573   static bool isMAI(const MachineInstr &MI) {
574     return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
575   }
576 
577   bool isMAI(uint16_t Opcode) const {
578     return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
579   }
580 
581   static bool isScalarUnit(const MachineInstr &MI) {
582     return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
583   }
584 
585   static bool usesVM_CNT(const MachineInstr &MI) {
586     return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
587   }
588 
589   static bool usesLGKM_CNT(const MachineInstr &MI) {
590     return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
591   }
592 
593   static bool sopkIsZext(const MachineInstr &MI) {
594     return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
595   }
596 
597   bool sopkIsZext(uint16_t Opcode) const {
598     return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
599   }
600 
601   /// \returns true if this is an s_store_dword* instruction. This is more
602   /// specific than than isSMEM && mayStore.
603   static bool isScalarStore(const MachineInstr &MI) {
604     return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
605   }
606 
607   bool isScalarStore(uint16_t Opcode) const {
608     return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
609   }
610 
611   static bool isFixedSize(const MachineInstr &MI) {
612     return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
613   }
614 
615   bool isFixedSize(uint16_t Opcode) const {
616     return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
617   }
618 
619   static bool hasFPClamp(const MachineInstr &MI) {
620     return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
621   }
622 
623   bool hasFPClamp(uint16_t Opcode) const {
624     return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
625   }
626 
627   static bool hasIntClamp(const MachineInstr &MI) {
628     return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
629   }
630 
631   uint64_t getClampMask(const MachineInstr &MI) const {
632     const uint64_t ClampFlags = SIInstrFlags::FPClamp |
633                                 SIInstrFlags::IntClamp |
634                                 SIInstrFlags::ClampLo |
635                                 SIInstrFlags::ClampHi;
636       return MI.getDesc().TSFlags & ClampFlags;
637   }
638 
639   static bool usesFPDPRounding(const MachineInstr &MI) {
640     return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
641   }
642 
643   bool usesFPDPRounding(uint16_t Opcode) const {
644     return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
645   }
646 
647   static bool isFPAtomic(const MachineInstr &MI) {
648     return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
649   }
650 
651   bool isFPAtomic(uint16_t Opcode) const {
652     return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
653   }
654 
655   bool isVGPRCopy(const MachineInstr &MI) const {
656     assert(MI.isCopy());
657     unsigned Dest = MI.getOperand(0).getReg();
658     const MachineFunction &MF = *MI.getParent()->getParent();
659     const MachineRegisterInfo &MRI = MF.getRegInfo();
660     return !RI.isSGPRReg(MRI, Dest);
661   }
662 
663   bool hasVGPRUses(const MachineInstr &MI) const {
664     const MachineFunction &MF = *MI.getParent()->getParent();
665     const MachineRegisterInfo &MRI = MF.getRegInfo();
666     return llvm::any_of(MI.explicit_uses(),
667                         [&MRI, this](const MachineOperand &MO) {
668       return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
669   }
670 
671   /// Whether we must prevent this instruction from executing with EXEC = 0.
672   bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
673 
674   /// Returns true if the instruction could potentially depend on the value of
675   /// exec. If false, exec dependencies may safely be ignored.
676   bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
677 
678   bool isInlineConstant(const APInt &Imm) const;
679 
680   bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
681 
682   bool isInlineConstant(const MachineOperand &MO,
683                         const MCOperandInfo &OpInfo) const {
684     return isInlineConstant(MO, OpInfo.OperandType);
685   }
686 
687   /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
688   /// be an inline immediate.
689   bool isInlineConstant(const MachineInstr &MI,
690                         const MachineOperand &UseMO,
691                         const MachineOperand &DefMO) const {
692     assert(UseMO.getParent() == &MI);
693     int OpIdx = MI.getOperandNo(&UseMO);
694     if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) {
695       return false;
696     }
697 
698     return isInlineConstant(DefMO, MI.getDesc().OpInfo[OpIdx]);
699   }
700 
701   /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
702   /// immediate.
703   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
704     const MachineOperand &MO = MI.getOperand(OpIdx);
705     return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
706   }
707 
708   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
709                         const MachineOperand &MO) const {
710     if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands)
711       return false;
712 
713     if (MI.isCopy()) {
714       unsigned Size = getOpSize(MI, OpIdx);
715       assert(Size == 8 || Size == 4);
716 
717       uint8_t OpType = (Size == 8) ?
718         AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
719       return isInlineConstant(MO, OpType);
720     }
721 
722     return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
723   }
724 
725   bool isInlineConstant(const MachineOperand &MO) const {
726     const MachineInstr *Parent = MO.getParent();
727     return isInlineConstant(*Parent, Parent->getOperandNo(&MO));
728   }
729 
730   bool isLiteralConstant(const MachineOperand &MO,
731                          const MCOperandInfo &OpInfo) const {
732     return MO.isImm() && !isInlineConstant(MO, OpInfo.OperandType);
733   }
734 
735   bool isLiteralConstant(const MachineInstr &MI, int OpIdx) const {
736     const MachineOperand &MO = MI.getOperand(OpIdx);
737     return MO.isImm() && !isInlineConstant(MI, OpIdx);
738   }
739 
740   // Returns true if this operand could potentially require a 32-bit literal
741   // operand, but not necessarily. A FrameIndex for example could resolve to an
742   // inline immediate value that will not require an additional 4-bytes; this
743   // assumes that it will.
744   bool isLiteralConstantLike(const MachineOperand &MO,
745                              const MCOperandInfo &OpInfo) const;
746 
747   bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
748                          const MachineOperand &MO) const;
749 
750   /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
751   /// This function will return false if you pass it a 32-bit instruction.
752   bool hasVALU32BitEncoding(unsigned Opcode) const;
753 
754   /// Returns true if this operand uses the constant bus.
755   bool usesConstantBus(const MachineRegisterInfo &MRI,
756                        const MachineOperand &MO,
757                        const MCOperandInfo &OpInfo) const;
758 
759   /// Return true if this instruction has any modifiers.
760   ///  e.g. src[012]_mod, omod, clamp.
761   bool hasModifiers(unsigned Opcode) const;
762 
763   bool hasModifiersSet(const MachineInstr &MI,
764                        unsigned OpName) const;
765   bool hasAnyModifiersSet(const MachineInstr &MI) const;
766 
767   bool canShrink(const MachineInstr &MI,
768                  const MachineRegisterInfo &MRI) const;
769 
770   MachineInstr *buildShrunkInst(MachineInstr &MI,
771                                 unsigned NewOpcode) const;
772 
773   bool verifyInstruction(const MachineInstr &MI,
774                          StringRef &ErrInfo) const override;
775 
776   unsigned getVALUOp(const MachineInstr &MI) const;
777 
778   /// Return the correct register class for \p OpNo.  For target-specific
779   /// instructions, this will return the register class that has been defined
780   /// in tablegen.  For generic instructions, like REG_SEQUENCE it will return
781   /// the register class of its machine operand.
782   /// to infer the correct register class base on the other operands.
783   const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
784                                            unsigned OpNo) const;
785 
786   /// Return the size in bytes of the operand OpNo on the given
787   // instruction opcode.
788   unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
789     const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
790 
791     if (OpInfo.RegClass == -1) {
792       // If this is an immediate operand, this must be a 32-bit literal.
793       assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
794       return 4;
795     }
796 
797     return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
798   }
799 
800   /// This form should usually be preferred since it handles operands
801   /// with unknown register classes.
802   unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
803     const MachineOperand &MO = MI.getOperand(OpNo);
804     if (MO.isReg()) {
805       if (unsigned SubReg = MO.getSubReg()) {
806         assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg(
807                                    MI.getParent()->getParent()->getRegInfo().
808                                      getRegClass(MO.getReg()), SubReg)) >= 32 &&
809                "Sub-dword subregs are not supported");
810         return RI.getSubRegIndexLaneMask(SubReg).getNumLanes() * 4;
811       }
812     }
813     return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
814   }
815 
816   /// Legalize the \p OpIndex operand of this instruction by inserting
817   /// a MOV.  For example:
818   /// ADD_I32_e32 VGPR0, 15
819   /// to
820   /// MOV VGPR1, 15
821   /// ADD_I32_e32 VGPR0, VGPR1
822   ///
823   /// If the operand being legalized is a register, then a COPY will be used
824   /// instead of MOV.
825   void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
826 
827   /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
828   /// for \p MI.
829   bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
830                       const MachineOperand *MO = nullptr) const;
831 
832   /// Check if \p MO would be a valid operand for the given operand
833   /// definition \p OpInfo. Note this does not attempt to validate constant bus
834   /// restrictions (e.g. literal constant usage).
835   bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
836                           const MCOperandInfo &OpInfo,
837                           const MachineOperand &MO) const;
838 
839   /// Check if \p MO (a register operand) is a legal register for the
840   /// given operand description.
841   bool isLegalRegOperand(const MachineRegisterInfo &MRI,
842                          const MCOperandInfo &OpInfo,
843                          const MachineOperand &MO) const;
844 
845   /// Legalize operands in \p MI by either commuting it or inserting a
846   /// copy of src1.
847   void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
848 
849   /// Fix operands in \p MI to satisfy constant bus requirements.
850   void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
851 
852   /// Copy a value from a VGPR (\p SrcReg) to SGPR.  This function can only
853   /// be used when it is know that the value in SrcReg is same across all
854   /// threads in the wave.
855   /// \returns The SGPR register that \p SrcReg was copied to.
856   unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
857                               MachineRegisterInfo &MRI) const;
858 
859   void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
860 
861   void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
862                               MachineBasicBlock::iterator I,
863                               const TargetRegisterClass *DstRC,
864                               MachineOperand &Op, MachineRegisterInfo &MRI,
865                               const DebugLoc &DL) const;
866 
867   /// Legalize all operands in this instruction.  This function may create new
868   /// instructions and control-flow around \p MI.  If present, \p MDT is
869   /// updated.
870   void legalizeOperands(MachineInstr &MI,
871                         MachineDominatorTree *MDT = nullptr) const;
872 
873   /// Replace this instruction's opcode with the equivalent VALU
874   /// opcode.  This function will also move the users of \p MI to the
875   /// VALU if necessary. If present, \p MDT is updated.
876   void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
877 
878   void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
879                         int Count) const;
880 
881   void insertNoop(MachineBasicBlock &MBB,
882                   MachineBasicBlock::iterator MI) const override;
883 
884   void insertReturn(MachineBasicBlock &MBB) const;
885   /// Return the number of wait states that result from executing this
886   /// instruction.
887   static unsigned getNumWaitStates(const MachineInstr &MI);
888 
889   /// Returns the operand named \p Op.  If \p MI does not have an
890   /// operand named \c Op, this function returns nullptr.
891   LLVM_READONLY
892   MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
893 
894   LLVM_READONLY
895   const MachineOperand *getNamedOperand(const MachineInstr &MI,
896                                         unsigned OpName) const {
897     return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
898   }
899 
900   /// Get required immediate operand
901   int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
902     int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
903     return MI.getOperand(Idx).getImm();
904   }
905 
906   uint64_t getDefaultRsrcDataFormat() const;
907   uint64_t getScratchRsrcWords23() const;
908 
909   bool isLowLatencyInstruction(const MachineInstr &MI) const;
910   bool isHighLatencyInstruction(const MachineInstr &MI) const;
911 
912   /// Return the descriptor of the target-specific machine instruction
913   /// that corresponds to the specified pseudo or native opcode.
914   const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
915     return get(pseudoToMCOpcode(Opcode));
916   }
917 
918   unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
919   unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
920 
921   unsigned isLoadFromStackSlot(const MachineInstr &MI,
922                                int &FrameIndex) const override;
923   unsigned isStoreToStackSlot(const MachineInstr &MI,
924                               int &FrameIndex) const override;
925 
926   unsigned getInstBundleSize(const MachineInstr &MI) const;
927   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
928 
929   bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
930 
931   bool isNonUniformBranchInstr(MachineInstr &Instr) const;
932 
933   void convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
934                                  MachineBasicBlock *IfEnd) const;
935 
936   void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
937                                    MachineBasicBlock *LoopEnd) const;
938 
939   std::pair<unsigned, unsigned>
940   decomposeMachineOperandsTargetFlags(unsigned TF) const override;
941 
942   ArrayRef<std::pair<int, const char *>>
943   getSerializableTargetIndices() const override;
944 
945   ArrayRef<std::pair<unsigned, const char *>>
946   getSerializableDirectMachineOperandTargetFlags() const override;
947 
948   ScheduleHazardRecognizer *
949   CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
950                                  const ScheduleDAG *DAG) const override;
951 
952   ScheduleHazardRecognizer *
953   CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
954 
955   bool isBasicBlockPrologue(const MachineInstr &MI) const override;
956 
957   /// Return a partially built integer add instruction without carry.
958   /// Caller must add source operands.
959   /// For pre-GFX9 it will generate unused carry destination operand.
960   /// TODO: After GFX9 it should return a no-carry operation.
961   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
962                                     MachineBasicBlock::iterator I,
963                                     const DebugLoc &DL,
964                                     unsigned DestReg) const;
965 
966   static bool isKillTerminator(unsigned Opcode);
967   const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
968 
969   static bool isLegalMUBUFImmOffset(unsigned Imm) {
970     return isUInt<12>(Imm);
971   }
972 
973   /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
974   /// encoded instruction. If \p Signed, this is for an instruction that
975   /// interprets the offset as signed.
976   bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
977                          bool Signed) const;
978 
979   /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
980   /// Return -1 if the target-specific opcode for the pseudo instruction does
981   /// not exist. If Opcode is not a pseudo instruction, this is identity.
982   int pseudoToMCOpcode(int Opcode) const;
983 
984   const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
985                                          const TargetRegisterInfo *TRI,
986                                          const MachineFunction &MF)
987     const override {
988     if (OpNum >= TID.getNumOperands())
989       return nullptr;
990     return RI.getRegClass(TID.OpInfo[OpNum].RegClass);
991   }
992 
993   void fixImplicitOperands(MachineInstr &MI) const;
994 };
995 
996 /// \brief Returns true if a reg:subreg pair P has a TRC class
997 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
998                          const TargetRegisterClass &TRC,
999                          MachineRegisterInfo &MRI) {
1000   auto *RC = MRI.getRegClass(P.Reg);
1001   if (!P.SubReg)
1002     return RC == &TRC;
1003   auto *TRI = MRI.getTargetRegisterInfo();
1004   return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
1005 }
1006 
1007 /// \brief Create RegSubRegPair from a register MachineOperand
1008 inline
1009 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1010   assert(O.isReg());
1011   return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
1012 }
1013 
1014 /// \brief Return the SubReg component from REG_SEQUENCE
1015 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1016                                                     unsigned SubReg);
1017 
1018 /// \brief Return the defining instruction for a given reg:subreg pair
1019 /// skipping copy like instructions and subreg-manipulation pseudos.
1020 /// Following another subreg of a reg:subreg isn't supported.
1021 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1022                                MachineRegisterInfo &MRI);
1023 
1024 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1025 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1026 /// attempt to track between blocks.
1027 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1028                                 Register VReg,
1029                                 const MachineInstr &DefMI,
1030                                 const MachineInstr &UseMI);
1031 
1032 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1033 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1034 /// track between blocks.
1035 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1036                                    Register VReg,
1037                                    const MachineInstr &DefMI);
1038 
1039 namespace AMDGPU {
1040 
1041   LLVM_READONLY
1042   int getVOPe64(uint16_t Opcode);
1043 
1044   LLVM_READONLY
1045   int getVOPe32(uint16_t Opcode);
1046 
1047   LLVM_READONLY
1048   int getSDWAOp(uint16_t Opcode);
1049 
1050   LLVM_READONLY
1051   int getDPPOp32(uint16_t Opcode);
1052 
1053   LLVM_READONLY
1054   int getBasicFromSDWAOp(uint16_t Opcode);
1055 
1056   LLVM_READONLY
1057   int getCommuteRev(uint16_t Opcode);
1058 
1059   LLVM_READONLY
1060   int getCommuteOrig(uint16_t Opcode);
1061 
1062   LLVM_READONLY
1063   int getAddr64Inst(uint16_t Opcode);
1064 
1065   /// Check if \p Opcode is an Addr64 opcode.
1066   ///
1067   /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1068   LLVM_READONLY
1069   int getIfAddr64Inst(uint16_t Opcode);
1070 
1071   LLVM_READONLY
1072   int getMUBUFNoLdsInst(uint16_t Opcode);
1073 
1074   LLVM_READONLY
1075   int getAtomicRetOp(uint16_t Opcode);
1076 
1077   LLVM_READONLY
1078   int getAtomicNoRetOp(uint16_t Opcode);
1079 
1080   LLVM_READONLY
1081   int getSOPKOp(uint16_t Opcode);
1082 
1083   LLVM_READONLY
1084   int getGlobalSaddrOp(uint16_t Opcode);
1085 
1086   LLVM_READONLY
1087   int getVCMPXNoSDstOp(uint16_t Opcode);
1088 
1089   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1090   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1091   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1092   const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1093 
1094 } // end namespace AMDGPU
1095 
1096 namespace SI {
1097 namespace KernelInputOffsets {
1098 
1099 /// Offsets in bytes from the start of the input buffer
1100 enum Offsets {
1101   NGROUPS_X = 0,
1102   NGROUPS_Y = 4,
1103   NGROUPS_Z = 8,
1104   GLOBAL_SIZE_X = 12,
1105   GLOBAL_SIZE_Y = 16,
1106   GLOBAL_SIZE_Z = 20,
1107   LOCAL_SIZE_X = 24,
1108   LOCAL_SIZE_Y = 28,
1109   LOCAL_SIZE_Z = 32
1110 };
1111 
1112 } // end namespace KernelInputOffsets
1113 } // end namespace SI
1114 
1115 } // end namespace llvm
1116 
1117 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1118