1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIInstrInfo.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16
17 #include "AMDGPUMIRFormatter.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIRegisterInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
24
25 #define GET_INSTRINFO_HEADER
26 #include "AMDGPUGenInstrInfo.inc"
27
28 namespace llvm {
29
30 class APInt;
31 class GCNSubtarget;
32 class LiveVariables;
33 class MachineDominatorTree;
34 class MachineRegisterInfo;
35 class RegScavenger;
36 class TargetRegisterClass;
37 class ScheduleHazardRecognizer;
38
39 constexpr unsigned DefaultMemoryClusterDWordsLimit = 8;
40
41 /// Mark the MMO of a uniform load if there are no potentially clobbering stores
42 /// on any path from the start of an entry function to this load.
43 static const MachineMemOperand::Flags MONoClobber =
44 MachineMemOperand::MOTargetFlag1;
45
46 /// Mark the MMO of a load as the last use.
47 static const MachineMemOperand::Flags MOLastUse =
48 MachineMemOperand::MOTargetFlag2;
49
50 /// Utility to store machine instructions worklist.
51 struct SIInstrWorklist {
52 SIInstrWorklist() = default;
53
54 void insert(MachineInstr *MI);
55
topSIInstrWorklist56 MachineInstr *top() const {
57 const auto *iter = InstrList.begin();
58 return *iter;
59 }
60
erase_topSIInstrWorklist61 void erase_top() {
62 const auto *iter = InstrList.begin();
63 InstrList.erase(iter);
64 }
65
emptySIInstrWorklist66 bool empty() const { return InstrList.empty(); }
67
clearSIInstrWorklist68 void clear() {
69 InstrList.clear();
70 DeferredList.clear();
71 }
72
73 bool isDeferred(MachineInstr *MI);
74
getDeferredListSIInstrWorklist75 SetVector<MachineInstr *> &getDeferredList() { return DeferredList; }
76
77 private:
78 /// InstrList contains the MachineInstrs.
79 SetVector<MachineInstr *> InstrList;
80 /// Deferred instructions are specific MachineInstr
81 /// that will be added by insert method.
82 SetVector<MachineInstr *> DeferredList;
83 };
84
85 class SIInstrInfo final : public AMDGPUGenInstrInfo {
86 private:
87 const SIRegisterInfo RI;
88 const GCNSubtarget &ST;
89 TargetSchedModel SchedModel;
90 mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
91
92 // The inverse predicate should have the negative value.
93 enum BranchPredicate {
94 INVALID_BR = 0,
95 SCC_TRUE = 1,
96 SCC_FALSE = -1,
97 VCCNZ = 2,
98 VCCZ = -2,
99 EXECNZ = -3,
100 EXECZ = 3
101 };
102
103 using SetVectorType = SmallSetVector<MachineInstr *, 32>;
104
105 static unsigned getBranchOpcode(BranchPredicate Cond);
106 static BranchPredicate getBranchPredicate(unsigned Opcode);
107
108 public:
109 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
110 MachineRegisterInfo &MRI,
111 const MachineOperand &SuperReg,
112 const TargetRegisterClass *SuperRC,
113 unsigned SubIdx,
114 const TargetRegisterClass *SubRC) const;
115 MachineOperand buildExtractSubRegOrImm(
116 MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI,
117 const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC,
118 unsigned SubIdx, const TargetRegisterClass *SubRC) const;
119
120 private:
121 void swapOperands(MachineInstr &Inst) const;
122
123 std::pair<bool, MachineBasicBlock *>
124 moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
125 MachineDominatorTree *MDT = nullptr) const;
126
127 void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
128 MachineDominatorTree *MDT = nullptr) const;
129
130 void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
131
132 void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
133
134 void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst,
135 unsigned Opcode) const;
136
137 void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst,
138 unsigned Opcode) const;
139
140 void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
141 unsigned Opcode, bool Swap = false) const;
142
143 void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
144 unsigned Opcode,
145 MachineDominatorTree *MDT = nullptr) const;
146
147 void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst,
148 MachineDominatorTree *MDT) const;
149
150 void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst,
151 MachineDominatorTree *MDT) const;
152
153 void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst,
154 MachineDominatorTree *MDT = nullptr) const;
155
156 void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
157 MachineInstr &Inst) const;
158 void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
159 void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
160 unsigned Opcode,
161 MachineDominatorTree *MDT = nullptr) const;
162 void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
163 MachineInstr &Inst) const;
164
165 void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
166 SIInstrWorklist &Worklist) const;
167
168 void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
169 MachineInstr &SCCDefInst,
170 SIInstrWorklist &Worklist,
171 Register NewCond = Register()) const;
172 void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
173 SIInstrWorklist &Worklist) const;
174
175 const TargetRegisterClass *
176 getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
177
178 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
179 const MachineInstr &MIb) const;
180
181 Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
182
183 bool verifyCopy(const MachineInstr &MI, const MachineRegisterInfo &MRI,
184 StringRef &ErrInfo) const;
185
186 bool resultDependsOnExec(const MachineInstr &MI) const;
187
188 protected:
189 /// If the specific machine instruction is a instruction that moves/copies
190 /// value from one register to another register return destination and source
191 /// registers as machine operands.
192 std::optional<DestSourcePair>
193 isCopyInstrImpl(const MachineInstr &MI) const override;
194
195 bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0,
196 AMDGPU::OpName Src0OpName, MachineOperand &Src1,
197 AMDGPU::OpName Src1OpName) const;
198 bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx,
199 const MachineOperand *fromMO, unsigned toIdx,
200 const MachineOperand *toMO) const;
201 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
202 unsigned OpIdx0,
203 unsigned OpIdx1) const override;
204
205 public:
206 enum TargetOperandFlags {
207 MO_MASK = 0xf,
208
209 MO_NONE = 0,
210 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
211 MO_GOTPCREL = 1,
212 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
213 MO_GOTPCREL32 = 2,
214 MO_GOTPCREL32_LO = 2,
215 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
216 MO_GOTPCREL32_HI = 3,
217 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
218 MO_REL32 = 4,
219 MO_REL32_LO = 4,
220 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
221 MO_REL32_HI = 5,
222
223 MO_FAR_BRANCH_OFFSET = 6,
224
225 MO_ABS32_LO = 8,
226 MO_ABS32_HI = 9,
227 };
228
229 explicit SIInstrInfo(const GCNSubtarget &ST);
230
getRegisterInfo()231 const SIRegisterInfo &getRegisterInfo() const {
232 return RI;
233 }
234
getSubtarget()235 const GCNSubtarget &getSubtarget() const {
236 return ST;
237 }
238
239 bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
240
241 bool isIgnorableUse(const MachineOperand &MO) const override;
242
243 bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo,
244 MachineCycleInfo *CI) const override;
245
246 bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
247 int64_t &Offset1) const override;
248
249 bool isGlobalMemoryObject(const MachineInstr *MI) const override;
250
251 bool getMemOperandsWithOffsetWidth(
252 const MachineInstr &LdSt,
253 SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
254 bool &OffsetIsScalable, LocationSize &Width,
255 const TargetRegisterInfo *TRI) const final;
256
257 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
258 int64_t Offset1, bool OffsetIsScalable1,
259 ArrayRef<const MachineOperand *> BaseOps2,
260 int64_t Offset2, bool OffsetIsScalable2,
261 unsigned ClusterSize,
262 unsigned NumBytes) const override;
263
264 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
265 int64_t Offset1, unsigned NumLoads) const override;
266
267 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
268 const DebugLoc &DL, Register DestReg, Register SrcReg,
269 bool KillSrc, bool RenamableDest = false,
270 bool RenamableSrc = false) const override;
271
272 const TargetRegisterClass *getPreferredSelectRegClass(
273 unsigned Size) const;
274
275 Register insertNE(MachineBasicBlock *MBB,
276 MachineBasicBlock::iterator I, const DebugLoc &DL,
277 Register SrcReg, int Value) const;
278
279 Register insertEQ(MachineBasicBlock *MBB,
280 MachineBasicBlock::iterator I, const DebugLoc &DL,
281 Register SrcReg, int Value) const;
282
283 bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg,
284 int64_t &ImmVal) const override;
285
286 void storeRegToStackSlot(
287 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
288 bool isKill, int FrameIndex, const TargetRegisterClass *RC,
289 const TargetRegisterInfo *TRI, Register VReg,
290 MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
291
292 void loadRegFromStackSlot(
293 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
294 int FrameIndex, const TargetRegisterClass *RC,
295 const TargetRegisterInfo *TRI, Register VReg,
296 MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
297
298 bool expandPostRAPseudo(MachineInstr &MI) const override;
299
300 void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
301 Register DestReg, unsigned SubIdx,
302 const MachineInstr &Orig,
303 const TargetRegisterInfo &TRI) const override;
304
305 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
306 // instructions. Returns a pair of generated instructions.
307 // Can split either post-RA with physical registers or pre-RA with
308 // virtual registers. In latter case IR needs to be in SSA form and
309 // and a REG_SEQUENCE is produced to define original register.
310 std::pair<MachineInstr*, MachineInstr*>
311 expandMovDPP64(MachineInstr &MI) const;
312
313 // Returns an opcode that can be used to move a value to a \p DstRC
314 // register. If there is no hardware instruction that can store to \p
315 // DstRC, then AMDGPU::COPY is returned.
316 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
317
318 const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
319 unsigned EltSize,
320 bool IsSGPR) const;
321
322 const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
323 bool IsIndirectSrc) const;
324 LLVM_READONLY
325 int commuteOpcode(unsigned Opc) const;
326
327 LLVM_READONLY
commuteOpcode(const MachineInstr & MI)328 inline int commuteOpcode(const MachineInstr &MI) const {
329 return commuteOpcode(MI.getOpcode());
330 }
331
332 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0,
333 unsigned &SrcOpIdx1) const override;
334
335 bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0,
336 unsigned &SrcOpIdx1) const;
337
338 bool isBranchOffsetInRange(unsigned BranchOpc,
339 int64_t BrOffset) const override;
340
341 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
342
343 /// Return whether the block terminate with divergent branch.
344 /// Note this only work before lowering the pseudo control flow instructions.
345 bool hasDivergentBranch(const MachineBasicBlock *MBB) const;
346
347 void insertIndirectBranch(MachineBasicBlock &MBB,
348 MachineBasicBlock &NewDestBB,
349 MachineBasicBlock &RestoreBB, const DebugLoc &DL,
350 int64_t BrOffset, RegScavenger *RS) const override;
351
352 bool analyzeBranchImpl(MachineBasicBlock &MBB,
353 MachineBasicBlock::iterator I,
354 MachineBasicBlock *&TBB,
355 MachineBasicBlock *&FBB,
356 SmallVectorImpl<MachineOperand> &Cond,
357 bool AllowModify) const;
358
359 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
360 MachineBasicBlock *&FBB,
361 SmallVectorImpl<MachineOperand> &Cond,
362 bool AllowModify = false) const override;
363
364 unsigned removeBranch(MachineBasicBlock &MBB,
365 int *BytesRemoved = nullptr) const override;
366
367 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
368 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
369 const DebugLoc &DL,
370 int *BytesAdded = nullptr) const override;
371
372 bool reverseBranchCondition(
373 SmallVectorImpl<MachineOperand> &Cond) const override;
374
375 bool canInsertSelect(const MachineBasicBlock &MBB,
376 ArrayRef<MachineOperand> Cond, Register DstReg,
377 Register TrueReg, Register FalseReg, int &CondCycles,
378 int &TrueCycles, int &FalseCycles) const override;
379
380 void insertSelect(MachineBasicBlock &MBB,
381 MachineBasicBlock::iterator I, const DebugLoc &DL,
382 Register DstReg, ArrayRef<MachineOperand> Cond,
383 Register TrueReg, Register FalseReg) const override;
384
385 void insertVectorSelect(MachineBasicBlock &MBB,
386 MachineBasicBlock::iterator I, const DebugLoc &DL,
387 Register DstReg, ArrayRef<MachineOperand> Cond,
388 Register TrueReg, Register FalseReg) const;
389
390 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
391 Register &SrcReg2, int64_t &CmpMask,
392 int64_t &CmpValue) const override;
393
394 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
395 Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
396 const MachineRegisterInfo *MRI) const override;
397
398 bool
399 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
400 const MachineInstr &MIb) const override;
401
402 static bool isFoldableCopy(const MachineInstr &MI);
403
404 void removeModOperands(MachineInstr &MI) const;
405
406 /// Return the extracted immediate value in a subregister use from a constant
407 /// materialized in a super register.
408 ///
409 /// e.g. %imm = S_MOV_B64 K[0:63]
410 /// USE %imm.sub1
411 /// This will return K[32:63]
412 static std::optional<int64_t> extractSubregFromImm(int64_t ImmVal,
413 unsigned SubRegIndex);
414
415 bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
416 MachineRegisterInfo *MRI) const final;
417
getMachineCSELookAheadLimit()418 unsigned getMachineCSELookAheadLimit() const override { return 500; }
419
420 MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
421 LiveIntervals *LIS) const override;
422
423 bool isSchedulingBoundary(const MachineInstr &MI,
424 const MachineBasicBlock *MBB,
425 const MachineFunction &MF) const override;
426
isSALU(const MachineInstr & MI)427 static bool isSALU(const MachineInstr &MI) {
428 return MI.getDesc().TSFlags & SIInstrFlags::SALU;
429 }
430
isSALU(uint16_t Opcode)431 bool isSALU(uint16_t Opcode) const {
432 return get(Opcode).TSFlags & SIInstrFlags::SALU;
433 }
434
isVALU(const MachineInstr & MI)435 static bool isVALU(const MachineInstr &MI) {
436 return MI.getDesc().TSFlags & SIInstrFlags::VALU;
437 }
438
isVALU(uint16_t Opcode)439 bool isVALU(uint16_t Opcode) const {
440 return get(Opcode).TSFlags & SIInstrFlags::VALU;
441 }
442
isImage(const MachineInstr & MI)443 static bool isImage(const MachineInstr &MI) {
444 return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI);
445 }
446
isImage(uint16_t Opcode)447 bool isImage(uint16_t Opcode) const {
448 return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode);
449 }
450
isVMEM(const MachineInstr & MI)451 static bool isVMEM(const MachineInstr &MI) {
452 return isMUBUF(MI) || isMTBUF(MI) || isImage(MI) || isFLAT(MI);
453 }
454
isVMEM(uint16_t Opcode)455 bool isVMEM(uint16_t Opcode) const {
456 return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode);
457 }
458
isSOP1(const MachineInstr & MI)459 static bool isSOP1(const MachineInstr &MI) {
460 return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
461 }
462
isSOP1(uint16_t Opcode)463 bool isSOP1(uint16_t Opcode) const {
464 return get(Opcode).TSFlags & SIInstrFlags::SOP1;
465 }
466
isSOP2(const MachineInstr & MI)467 static bool isSOP2(const MachineInstr &MI) {
468 return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
469 }
470
isSOP2(uint16_t Opcode)471 bool isSOP2(uint16_t Opcode) const {
472 return get(Opcode).TSFlags & SIInstrFlags::SOP2;
473 }
474
isSOPC(const MachineInstr & MI)475 static bool isSOPC(const MachineInstr &MI) {
476 return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
477 }
478
isSOPC(uint16_t Opcode)479 bool isSOPC(uint16_t Opcode) const {
480 return get(Opcode).TSFlags & SIInstrFlags::SOPC;
481 }
482
isSOPK(const MachineInstr & MI)483 static bool isSOPK(const MachineInstr &MI) {
484 return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
485 }
486
isSOPK(uint16_t Opcode)487 bool isSOPK(uint16_t Opcode) const {
488 return get(Opcode).TSFlags & SIInstrFlags::SOPK;
489 }
490
isSOPP(const MachineInstr & MI)491 static bool isSOPP(const MachineInstr &MI) {
492 return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
493 }
494
isSOPP(uint16_t Opcode)495 bool isSOPP(uint16_t Opcode) const {
496 return get(Opcode).TSFlags & SIInstrFlags::SOPP;
497 }
498
isPacked(const MachineInstr & MI)499 static bool isPacked(const MachineInstr &MI) {
500 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
501 }
502
isPacked(uint16_t Opcode)503 bool isPacked(uint16_t Opcode) const {
504 return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
505 }
506
isVOP1(const MachineInstr & MI)507 static bool isVOP1(const MachineInstr &MI) {
508 return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
509 }
510
isVOP1(uint16_t Opcode)511 bool isVOP1(uint16_t Opcode) const {
512 return get(Opcode).TSFlags & SIInstrFlags::VOP1;
513 }
514
isVOP2(const MachineInstr & MI)515 static bool isVOP2(const MachineInstr &MI) {
516 return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
517 }
518
isVOP2(uint16_t Opcode)519 bool isVOP2(uint16_t Opcode) const {
520 return get(Opcode).TSFlags & SIInstrFlags::VOP2;
521 }
522
isVOP3(const MachineInstr & MI)523 static bool isVOP3(const MachineInstr &MI) {
524 return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
525 }
526
isVOP3(uint16_t Opcode)527 bool isVOP3(uint16_t Opcode) const {
528 return get(Opcode).TSFlags & SIInstrFlags::VOP3;
529 }
530
isSDWA(const MachineInstr & MI)531 static bool isSDWA(const MachineInstr &MI) {
532 return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
533 }
534
isSDWA(uint16_t Opcode)535 bool isSDWA(uint16_t Opcode) const {
536 return get(Opcode).TSFlags & SIInstrFlags::SDWA;
537 }
538
isVOPC(const MachineInstr & MI)539 static bool isVOPC(const MachineInstr &MI) {
540 return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
541 }
542
isVOPC(uint16_t Opcode)543 bool isVOPC(uint16_t Opcode) const {
544 return get(Opcode).TSFlags & SIInstrFlags::VOPC;
545 }
546
isMUBUF(const MachineInstr & MI)547 static bool isMUBUF(const MachineInstr &MI) {
548 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
549 }
550
isMUBUF(uint16_t Opcode)551 bool isMUBUF(uint16_t Opcode) const {
552 return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
553 }
554
isMTBUF(const MachineInstr & MI)555 static bool isMTBUF(const MachineInstr &MI) {
556 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
557 }
558
isMTBUF(uint16_t Opcode)559 bool isMTBUF(uint16_t Opcode) const {
560 return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
561 }
562
isSMRD(const MachineInstr & MI)563 static bool isSMRD(const MachineInstr &MI) {
564 return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
565 }
566
isSMRD(uint16_t Opcode)567 bool isSMRD(uint16_t Opcode) const {
568 return get(Opcode).TSFlags & SIInstrFlags::SMRD;
569 }
570
571 bool isBufferSMRD(const MachineInstr &MI) const;
572
isDS(const MachineInstr & MI)573 static bool isDS(const MachineInstr &MI) {
574 return MI.getDesc().TSFlags & SIInstrFlags::DS;
575 }
576
isDS(uint16_t Opcode)577 bool isDS(uint16_t Opcode) const {
578 return get(Opcode).TSFlags & SIInstrFlags::DS;
579 }
580
isLDSDMA(const MachineInstr & MI)581 static bool isLDSDMA(const MachineInstr &MI) {
582 return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI));
583 }
584
isLDSDMA(uint16_t Opcode)585 bool isLDSDMA(uint16_t Opcode) {
586 return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode));
587 }
588
isGWS(const MachineInstr & MI)589 static bool isGWS(const MachineInstr &MI) {
590 return MI.getDesc().TSFlags & SIInstrFlags::GWS;
591 }
592
isGWS(uint16_t Opcode)593 bool isGWS(uint16_t Opcode) const {
594 return get(Opcode).TSFlags & SIInstrFlags::GWS;
595 }
596
597 bool isAlwaysGDS(uint16_t Opcode) const;
598
isMIMG(const MachineInstr & MI)599 static bool isMIMG(const MachineInstr &MI) {
600 return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
601 }
602
isMIMG(uint16_t Opcode)603 bool isMIMG(uint16_t Opcode) const {
604 return get(Opcode).TSFlags & SIInstrFlags::MIMG;
605 }
606
isVIMAGE(const MachineInstr & MI)607 static bool isVIMAGE(const MachineInstr &MI) {
608 return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE;
609 }
610
isVIMAGE(uint16_t Opcode)611 bool isVIMAGE(uint16_t Opcode) const {
612 return get(Opcode).TSFlags & SIInstrFlags::VIMAGE;
613 }
614
isVSAMPLE(const MachineInstr & MI)615 static bool isVSAMPLE(const MachineInstr &MI) {
616 return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE;
617 }
618
isVSAMPLE(uint16_t Opcode)619 bool isVSAMPLE(uint16_t Opcode) const {
620 return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE;
621 }
622
isGather4(const MachineInstr & MI)623 static bool isGather4(const MachineInstr &MI) {
624 return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
625 }
626
isGather4(uint16_t Opcode)627 bool isGather4(uint16_t Opcode) const {
628 return get(Opcode).TSFlags & SIInstrFlags::Gather4;
629 }
630
isFLAT(const MachineInstr & MI)631 static bool isFLAT(const MachineInstr &MI) {
632 return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
633 }
634
635 // Is a FLAT encoded instruction which accesses a specific segment,
636 // i.e. global_* or scratch_*.
isSegmentSpecificFLAT(const MachineInstr & MI)637 static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
638 auto Flags = MI.getDesc().TSFlags;
639 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
640 }
641
isSegmentSpecificFLAT(uint16_t Opcode)642 bool isSegmentSpecificFLAT(uint16_t Opcode) const {
643 auto Flags = get(Opcode).TSFlags;
644 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
645 }
646
isFLATGlobal(const MachineInstr & MI)647 static bool isFLATGlobal(const MachineInstr &MI) {
648 return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal;
649 }
650
isFLATGlobal(uint16_t Opcode)651 bool isFLATGlobal(uint16_t Opcode) const {
652 return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal;
653 }
654
isFLATScratch(const MachineInstr & MI)655 static bool isFLATScratch(const MachineInstr &MI) {
656 return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch;
657 }
658
isFLATScratch(uint16_t Opcode)659 bool isFLATScratch(uint16_t Opcode) const {
660 return get(Opcode).TSFlags & SIInstrFlags::FlatScratch;
661 }
662
663 // Any FLAT encoded instruction, including global_* and scratch_*.
isFLAT(uint16_t Opcode)664 bool isFLAT(uint16_t Opcode) const {
665 return get(Opcode).TSFlags & SIInstrFlags::FLAT;
666 }
667
isBlockLoadStore(uint16_t Opcode)668 static bool isBlockLoadStore(uint16_t Opcode) {
669 switch (Opcode) {
670 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
671 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:
672 case AMDGPU::SCRATCH_STORE_BLOCK_SADDR:
673 case AMDGPU::SCRATCH_LOAD_BLOCK_SADDR:
674 case AMDGPU::SCRATCH_STORE_BLOCK_SVS:
675 case AMDGPU::SCRATCH_LOAD_BLOCK_SVS:
676 return true;
677 default:
678 return false;
679 }
680 }
681
isEXP(const MachineInstr & MI)682 static bool isEXP(const MachineInstr &MI) {
683 return MI.getDesc().TSFlags & SIInstrFlags::EXP;
684 }
685
isDualSourceBlendEXP(const MachineInstr & MI)686 static bool isDualSourceBlendEXP(const MachineInstr &MI) {
687 if (!isEXP(MI))
688 return false;
689 unsigned Target = MI.getOperand(0).getImm();
690 return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 ||
691 Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
692 }
693
isEXP(uint16_t Opcode)694 bool isEXP(uint16_t Opcode) const {
695 return get(Opcode).TSFlags & SIInstrFlags::EXP;
696 }
697
isAtomicNoRet(const MachineInstr & MI)698 static bool isAtomicNoRet(const MachineInstr &MI) {
699 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
700 }
701
isAtomicNoRet(uint16_t Opcode)702 bool isAtomicNoRet(uint16_t Opcode) const {
703 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
704 }
705
isAtomicRet(const MachineInstr & MI)706 static bool isAtomicRet(const MachineInstr &MI) {
707 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
708 }
709
isAtomicRet(uint16_t Opcode)710 bool isAtomicRet(uint16_t Opcode) const {
711 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
712 }
713
isAtomic(const MachineInstr & MI)714 static bool isAtomic(const MachineInstr &MI) {
715 return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
716 SIInstrFlags::IsAtomicNoRet);
717 }
718
isAtomic(uint16_t Opcode)719 bool isAtomic(uint16_t Opcode) const {
720 return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
721 SIInstrFlags::IsAtomicNoRet);
722 }
723
mayWriteLDSThroughDMA(const MachineInstr & MI)724 static bool mayWriteLDSThroughDMA(const MachineInstr &MI) {
725 return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD;
726 }
727
isWQM(const MachineInstr & MI)728 static bool isWQM(const MachineInstr &MI) {
729 return MI.getDesc().TSFlags & SIInstrFlags::WQM;
730 }
731
isWQM(uint16_t Opcode)732 bool isWQM(uint16_t Opcode) const {
733 return get(Opcode).TSFlags & SIInstrFlags::WQM;
734 }
735
isDisableWQM(const MachineInstr & MI)736 static bool isDisableWQM(const MachineInstr &MI) {
737 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
738 }
739
isDisableWQM(uint16_t Opcode)740 bool isDisableWQM(uint16_t Opcode) const {
741 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
742 }
743
744 // SI_SPILL_S32_TO_VGPR and SI_RESTORE_S32_FROM_VGPR form a special case of
745 // SGPRs spilling to VGPRs which are SGPR spills but from VALU instructions
746 // therefore we need an explicit check for them since just checking if the
747 // Spill bit is set and what instruction type it came from misclassifies
748 // them.
isVGPRSpill(const MachineInstr & MI)749 static bool isVGPRSpill(const MachineInstr &MI) {
750 return MI.getOpcode() != AMDGPU::SI_SPILL_S32_TO_VGPR &&
751 MI.getOpcode() != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
752 (isSpill(MI) && isVALU(MI));
753 }
754
isVGPRSpill(uint16_t Opcode)755 bool isVGPRSpill(uint16_t Opcode) const {
756 return Opcode != AMDGPU::SI_SPILL_S32_TO_VGPR &&
757 Opcode != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
758 (isSpill(Opcode) && isVALU(Opcode));
759 }
760
isSGPRSpill(const MachineInstr & MI)761 static bool isSGPRSpill(const MachineInstr &MI) {
762 return MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR ||
763 MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
764 (isSpill(MI) && isSALU(MI));
765 }
766
isSGPRSpill(uint16_t Opcode)767 bool isSGPRSpill(uint16_t Opcode) const {
768 return Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR ||
769 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
770 (isSpill(Opcode) && isSALU(Opcode));
771 }
772
isSpill(uint16_t Opcode)773 bool isSpill(uint16_t Opcode) const {
774 return get(Opcode).TSFlags & SIInstrFlags::Spill;
775 }
776
isSpill(const MachineInstr & MI)777 static bool isSpill(const MachineInstr &MI) {
778 return MI.getDesc().TSFlags & SIInstrFlags::Spill;
779 }
780
isWWMRegSpillOpcode(uint16_t Opcode)781 static bool isWWMRegSpillOpcode(uint16_t Opcode) {
782 return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
783 Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
784 Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE ||
785 Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
786 }
787
isChainCallOpcode(uint64_t Opcode)788 static bool isChainCallOpcode(uint64_t Opcode) {
789 return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 ||
790 Opcode == AMDGPU::SI_CS_CHAIN_TC_W64;
791 }
792
isDPP(const MachineInstr & MI)793 static bool isDPP(const MachineInstr &MI) {
794 return MI.getDesc().TSFlags & SIInstrFlags::DPP;
795 }
796
isDPP(uint16_t Opcode)797 bool isDPP(uint16_t Opcode) const {
798 return get(Opcode).TSFlags & SIInstrFlags::DPP;
799 }
800
isTRANS(const MachineInstr & MI)801 static bool isTRANS(const MachineInstr &MI) {
802 return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
803 }
804
isTRANS(uint16_t Opcode)805 bool isTRANS(uint16_t Opcode) const {
806 return get(Opcode).TSFlags & SIInstrFlags::TRANS;
807 }
808
isVOP3P(const MachineInstr & MI)809 static bool isVOP3P(const MachineInstr &MI) {
810 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
811 }
812
isVOP3P(uint16_t Opcode)813 bool isVOP3P(uint16_t Opcode) const {
814 return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
815 }
816
isVINTRP(const MachineInstr & MI)817 static bool isVINTRP(const MachineInstr &MI) {
818 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
819 }
820
isVINTRP(uint16_t Opcode)821 bool isVINTRP(uint16_t Opcode) const {
822 return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
823 }
824
isMAI(const MachineInstr & MI)825 static bool isMAI(const MachineInstr &MI) {
826 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
827 }
828
isMAI(uint16_t Opcode)829 bool isMAI(uint16_t Opcode) const {
830 return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
831 }
832
isMFMA(const MachineInstr & MI)833 static bool isMFMA(const MachineInstr &MI) {
834 return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
835 MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
836 }
837
isDOT(const MachineInstr & MI)838 static bool isDOT(const MachineInstr &MI) {
839 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
840 }
841
isWMMA(const MachineInstr & MI)842 static bool isWMMA(const MachineInstr &MI) {
843 return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
844 }
845
isWMMA(uint16_t Opcode)846 bool isWMMA(uint16_t Opcode) const {
847 return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
848 }
849
isMFMAorWMMA(const MachineInstr & MI)850 static bool isMFMAorWMMA(const MachineInstr &MI) {
851 return isMFMA(MI) || isWMMA(MI) || isSWMMAC(MI);
852 }
853
isSWMMAC(const MachineInstr & MI)854 static bool isSWMMAC(const MachineInstr &MI) {
855 return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC;
856 }
857
isSWMMAC(uint16_t Opcode)858 bool isSWMMAC(uint16_t Opcode) const {
859 return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC;
860 }
861
isDOT(uint16_t Opcode)862 bool isDOT(uint16_t Opcode) const {
863 return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
864 }
865
866 bool isXDL(const MachineInstr &MI) const;
867
isDGEMM(unsigned Opcode)868 static bool isDGEMM(unsigned Opcode) { return AMDGPU::getMAIIsDGEMM(Opcode); }
869
isLDSDIR(const MachineInstr & MI)870 static bool isLDSDIR(const MachineInstr &MI) {
871 return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
872 }
873
isLDSDIR(uint16_t Opcode)874 bool isLDSDIR(uint16_t Opcode) const {
875 return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
876 }
877
isVINTERP(const MachineInstr & MI)878 static bool isVINTERP(const MachineInstr &MI) {
879 return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
880 }
881
isVINTERP(uint16_t Opcode)882 bool isVINTERP(uint16_t Opcode) const {
883 return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
884 }
885
isScalarUnit(const MachineInstr & MI)886 static bool isScalarUnit(const MachineInstr &MI) {
887 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
888 }
889
usesVM_CNT(const MachineInstr & MI)890 static bool usesVM_CNT(const MachineInstr &MI) {
891 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
892 }
893
usesLGKM_CNT(const MachineInstr & MI)894 static bool usesLGKM_CNT(const MachineInstr &MI) {
895 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
896 }
897
898 // Most sopk treat the immediate as a signed 16-bit, however some
899 // use it as unsigned.
sopkIsZext(unsigned Opcode)900 static bool sopkIsZext(unsigned Opcode) {
901 return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 ||
902 Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 ||
903 Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 ||
904 Opcode == AMDGPU::S_GETREG_B32;
905 }
906
907 /// \returns true if this is an s_store_dword* instruction. This is more
908 /// specific than isSMEM && mayStore.
isScalarStore(const MachineInstr & MI)909 static bool isScalarStore(const MachineInstr &MI) {
910 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
911 }
912
isScalarStore(uint16_t Opcode)913 bool isScalarStore(uint16_t Opcode) const {
914 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
915 }
916
isFixedSize(const MachineInstr & MI)917 static bool isFixedSize(const MachineInstr &MI) {
918 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
919 }
920
isFixedSize(uint16_t Opcode)921 bool isFixedSize(uint16_t Opcode) const {
922 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
923 }
924
hasFPClamp(const MachineInstr & MI)925 static bool hasFPClamp(const MachineInstr &MI) {
926 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
927 }
928
hasFPClamp(uint16_t Opcode)929 bool hasFPClamp(uint16_t Opcode) const {
930 return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
931 }
932
hasIntClamp(const MachineInstr & MI)933 static bool hasIntClamp(const MachineInstr &MI) {
934 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
935 }
936
getClampMask(const MachineInstr & MI)937 uint64_t getClampMask(const MachineInstr &MI) const {
938 const uint64_t ClampFlags = SIInstrFlags::FPClamp |
939 SIInstrFlags::IntClamp |
940 SIInstrFlags::ClampLo |
941 SIInstrFlags::ClampHi;
942 return MI.getDesc().TSFlags & ClampFlags;
943 }
944
usesFPDPRounding(const MachineInstr & MI)945 static bool usesFPDPRounding(const MachineInstr &MI) {
946 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
947 }
948
usesFPDPRounding(uint16_t Opcode)949 bool usesFPDPRounding(uint16_t Opcode) const {
950 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
951 }
952
isFPAtomic(const MachineInstr & MI)953 static bool isFPAtomic(const MachineInstr &MI) {
954 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
955 }
956
isFPAtomic(uint16_t Opcode)957 bool isFPAtomic(uint16_t Opcode) const {
958 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
959 }
960
isNeverUniform(const MachineInstr & MI)961 static bool isNeverUniform(const MachineInstr &MI) {
962 return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
963 }
964
965 // Check to see if opcode is for a barrier start. Pre gfx12 this is just the
966 // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want
967 // to check for the barrier start (S_BARRIER_SIGNAL*)
isBarrierStart(unsigned Opcode)968 bool isBarrierStart(unsigned Opcode) const {
969 return Opcode == AMDGPU::S_BARRIER ||
970 Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 ||
971 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 ||
972 Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM ||
973 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
974 }
975
isBarrier(unsigned Opcode)976 bool isBarrier(unsigned Opcode) const {
977 return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
978 Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER;
979 }
980
isF16PseudoScalarTrans(unsigned Opcode)981 static bool isF16PseudoScalarTrans(unsigned Opcode) {
982 return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
983 Opcode == AMDGPU::V_S_LOG_F16_e64 ||
984 Opcode == AMDGPU::V_S_RCP_F16_e64 ||
985 Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
986 Opcode == AMDGPU::V_S_SQRT_F16_e64;
987 }
988
doesNotReadTiedSource(const MachineInstr & MI)989 static bool doesNotReadTiedSource(const MachineInstr &MI) {
990 return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
991 }
992
doesNotReadTiedSource(uint16_t Opcode)993 bool doesNotReadTiedSource(uint16_t Opcode) const {
994 return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
995 }
996
isIGLP(unsigned Opcode)997 bool isIGLP(unsigned Opcode) const {
998 return Opcode == AMDGPU::SCHED_BARRIER ||
999 Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
1000 }
1001
isIGLP(const MachineInstr & MI)1002 bool isIGLP(const MachineInstr &MI) const { return isIGLP(MI.getOpcode()); }
1003
1004 // Return true if the instruction is mutually exclusive with all non-IGLP DAG
1005 // mutations, requiring all other mutations to be disabled.
isIGLPMutationOnly(unsigned Opcode)1006 bool isIGLPMutationOnly(unsigned Opcode) const {
1007 return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
1008 }
1009
getNonSoftWaitcntOpcode(unsigned Opcode)1010 static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
1011 switch (Opcode) {
1012 case AMDGPU::S_WAITCNT_soft:
1013 return AMDGPU::S_WAITCNT;
1014 case AMDGPU::S_WAITCNT_VSCNT_soft:
1015 return AMDGPU::S_WAITCNT_VSCNT;
1016 case AMDGPU::S_WAIT_LOADCNT_soft:
1017 return AMDGPU::S_WAIT_LOADCNT;
1018 case AMDGPU::S_WAIT_STORECNT_soft:
1019 return AMDGPU::S_WAIT_STORECNT;
1020 case AMDGPU::S_WAIT_SAMPLECNT_soft:
1021 return AMDGPU::S_WAIT_SAMPLECNT;
1022 case AMDGPU::S_WAIT_BVHCNT_soft:
1023 return AMDGPU::S_WAIT_BVHCNT;
1024 case AMDGPU::S_WAIT_DSCNT_soft:
1025 return AMDGPU::S_WAIT_DSCNT;
1026 case AMDGPU::S_WAIT_KMCNT_soft:
1027 return AMDGPU::S_WAIT_KMCNT;
1028 default:
1029 return Opcode;
1030 }
1031 }
1032
isWaitcnt(unsigned Opcode)1033 bool isWaitcnt(unsigned Opcode) const {
1034 switch (getNonSoftWaitcntOpcode(Opcode)) {
1035 case AMDGPU::S_WAITCNT:
1036 case AMDGPU::S_WAITCNT_VSCNT:
1037 case AMDGPU::S_WAITCNT_VMCNT:
1038 case AMDGPU::S_WAITCNT_EXPCNT:
1039 case AMDGPU::S_WAITCNT_LGKMCNT:
1040 case AMDGPU::S_WAIT_LOADCNT:
1041 case AMDGPU::S_WAIT_LOADCNT_DSCNT:
1042 case AMDGPU::S_WAIT_STORECNT:
1043 case AMDGPU::S_WAIT_STORECNT_DSCNT:
1044 case AMDGPU::S_WAIT_SAMPLECNT:
1045 case AMDGPU::S_WAIT_BVHCNT:
1046 case AMDGPU::S_WAIT_EXPCNT:
1047 case AMDGPU::S_WAIT_DSCNT:
1048 case AMDGPU::S_WAIT_KMCNT:
1049 case AMDGPU::S_WAIT_IDLE:
1050 return true;
1051 default:
1052 return false;
1053 }
1054 }
1055
isVGPRCopy(const MachineInstr & MI)1056 bool isVGPRCopy(const MachineInstr &MI) const {
1057 assert(isCopyInstr(MI));
1058 Register Dest = MI.getOperand(0).getReg();
1059 const MachineFunction &MF = *MI.getParent()->getParent();
1060 const MachineRegisterInfo &MRI = MF.getRegInfo();
1061 return !RI.isSGPRReg(MRI, Dest);
1062 }
1063
hasVGPRUses(const MachineInstr & MI)1064 bool hasVGPRUses(const MachineInstr &MI) const {
1065 const MachineFunction &MF = *MI.getParent()->getParent();
1066 const MachineRegisterInfo &MRI = MF.getRegInfo();
1067 return llvm::any_of(MI.explicit_uses(),
1068 [&MRI, this](const MachineOperand &MO) {
1069 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
1070 }
1071
1072 /// Return true if the instruction modifies the mode register.q
1073 static bool modifiesModeRegister(const MachineInstr &MI);
1074
1075 /// This function is used to determine if an instruction can be safely
1076 /// executed under EXEC = 0 without hardware error, indeterminate results,
1077 /// and/or visible effects on future vector execution or outside the shader.
1078 /// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is
1079 /// used in removing branches over short EXEC = 0 sequences.
1080 /// As such it embeds certain assumptions which may not apply to every case
1081 /// of EXEC = 0 execution.
1082 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
1083
1084 /// Returns true if the instruction could potentially depend on the value of
1085 /// exec. If false, exec dependencies may safely be ignored.
1086 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
1087
1088 bool isInlineConstant(const APInt &Imm) const;
1089
1090 bool isInlineConstant(const APFloat &Imm) const;
1091
1092 // Returns true if this non-register operand definitely does not need to be
1093 // encoded as a 32-bit literal. Note that this function handles all kinds of
1094 // operands, not just immediates.
1095 //
1096 // Some operands like FrameIndexes could resolve to an inline immediate value
1097 // that will not require an additional 4-bytes; this function assumes that it
1098 // will.
isInlineConstant(const MachineOperand & MO,uint8_t OperandType)1099 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const {
1100 assert(!MO.isReg() && "isInlineConstant called on register operand!");
1101 if (!MO.isImm())
1102 return false;
1103 return isInlineConstant(MO.getImm(), OperandType);
1104 }
1105 bool isInlineConstant(int64_t ImmVal, uint8_t OperandType) const;
1106
isInlineConstant(const MachineOperand & MO,const MCOperandInfo & OpInfo)1107 bool isInlineConstant(const MachineOperand &MO,
1108 const MCOperandInfo &OpInfo) const {
1109 return isInlineConstant(MO, OpInfo.OperandType);
1110 }
1111
1112 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
1113 /// be an inline immediate.
isInlineConstant(const MachineInstr & MI,const MachineOperand & UseMO,const MachineOperand & DefMO)1114 bool isInlineConstant(const MachineInstr &MI,
1115 const MachineOperand &UseMO,
1116 const MachineOperand &DefMO) const {
1117 assert(UseMO.getParent() == &MI);
1118 int OpIdx = UseMO.getOperandNo();
1119 if (OpIdx >= MI.getDesc().NumOperands)
1120 return false;
1121
1122 return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]);
1123 }
1124
1125 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
1126 /// immediate.
isInlineConstant(const MachineInstr & MI,unsigned OpIdx)1127 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
1128 const MachineOperand &MO = MI.getOperand(OpIdx);
1129 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType);
1130 }
1131
isInlineConstant(const MachineInstr & MI,unsigned OpIdx,int64_t ImmVal)1132 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
1133 int64_t ImmVal) const {
1134 if (OpIdx >= MI.getDesc().NumOperands)
1135 return false;
1136
1137 if (isCopyInstr(MI)) {
1138 unsigned Size = getOpSize(MI, OpIdx);
1139 assert(Size == 8 || Size == 4);
1140
1141 uint8_t OpType = (Size == 8) ?
1142 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
1143 return isInlineConstant(ImmVal, OpType);
1144 }
1145
1146 return isInlineConstant(ImmVal, MI.getDesc().operands()[OpIdx].OperandType);
1147 }
1148
isInlineConstant(const MachineInstr & MI,unsigned OpIdx,const MachineOperand & MO)1149 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
1150 const MachineOperand &MO) const {
1151 return isInlineConstant(MI, OpIdx, MO.getImm());
1152 }
1153
isInlineConstant(const MachineOperand & MO)1154 bool isInlineConstant(const MachineOperand &MO) const {
1155 return isInlineConstant(*MO.getParent(), MO.getOperandNo());
1156 }
1157
1158 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
1159 const MachineOperand &MO) const;
1160
1161 /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
1162 /// This function will return false if you pass it a 32-bit instruction.
1163 bool hasVALU32BitEncoding(unsigned Opcode) const;
1164
1165 /// Returns true if this operand uses the constant bus.
1166 bool usesConstantBus(const MachineRegisterInfo &MRI,
1167 const MachineOperand &MO,
1168 const MCOperandInfo &OpInfo) const;
1169
usesConstantBus(const MachineRegisterInfo & MRI,const MachineInstr & MI,int OpIdx)1170 bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineInstr &MI,
1171 int OpIdx) const {
1172 return usesConstantBus(MRI, MI.getOperand(OpIdx),
1173 MI.getDesc().operands()[OpIdx]);
1174 }
1175
1176 /// Return true if this instruction has any modifiers.
1177 /// e.g. src[012]_mod, omod, clamp.
1178 bool hasModifiers(unsigned Opcode) const;
1179
1180 bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const;
1181 bool hasAnyModifiersSet(const MachineInstr &MI) const;
1182
1183 bool canShrink(const MachineInstr &MI,
1184 const MachineRegisterInfo &MRI) const;
1185
1186 MachineInstr *buildShrunkInst(MachineInstr &MI,
1187 unsigned NewOpcode) const;
1188
1189 bool verifyInstruction(const MachineInstr &MI,
1190 StringRef &ErrInfo) const override;
1191
1192 unsigned getVALUOp(const MachineInstr &MI) const;
1193
1194 void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
1195 MachineBasicBlock::iterator MBBI,
1196 const DebugLoc &DL, Register Reg, bool IsSCCLive,
1197 SlotIndexes *Indexes = nullptr) const;
1198
1199 void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
1200 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1201 Register Reg, SlotIndexes *Indexes = nullptr) const;
1202
1203 /// Return the correct register class for \p OpNo. For target-specific
1204 /// instructions, this will return the register class that has been defined
1205 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return
1206 /// the register class of its machine operand.
1207 /// to infer the correct register class base on the other operands.
1208 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
1209 unsigned OpNo) const;
1210
1211 /// Return the size in bytes of the operand OpNo on the given
1212 // instruction opcode.
getOpSize(uint16_t Opcode,unsigned OpNo)1213 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
1214 const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo];
1215
1216 if (OpInfo.RegClass == -1) {
1217 // If this is an immediate operand, this must be a 32-bit literal.
1218 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
1219 return 4;
1220 }
1221
1222 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
1223 }
1224
1225 /// This form should usually be preferred since it handles operands
1226 /// with unknown register classes.
getOpSize(const MachineInstr & MI,unsigned OpNo)1227 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
1228 const MachineOperand &MO = MI.getOperand(OpNo);
1229 if (MO.isReg()) {
1230 if (unsigned SubReg = MO.getSubReg()) {
1231 return RI.getSubRegIdxSize(SubReg) / 8;
1232 }
1233 }
1234 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
1235 }
1236
1237 /// Legalize the \p OpIndex operand of this instruction by inserting
1238 /// a MOV. For example:
1239 /// ADD_I32_e32 VGPR0, 15
1240 /// to
1241 /// MOV VGPR1, 15
1242 /// ADD_I32_e32 VGPR0, VGPR1
1243 ///
1244 /// If the operand being legalized is a register, then a COPY will be used
1245 /// instead of MOV.
1246 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
1247
1248 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
1249 /// for \p MI.
1250 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
1251 const MachineOperand *MO = nullptr) const;
1252
1253 /// Check if \p MO would be a valid operand for the given operand
1254 /// definition \p OpInfo. Note this does not attempt to validate constant bus
1255 /// restrictions (e.g. literal constant usage).
1256 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
1257 const MCOperandInfo &OpInfo,
1258 const MachineOperand &MO) const;
1259
1260 /// Check if \p MO (a register operand) is a legal register for the
1261 /// given operand description or operand index.
1262 /// The operand index version provide more legality checks
1263 bool isLegalRegOperand(const MachineRegisterInfo &MRI,
1264 const MCOperandInfo &OpInfo,
1265 const MachineOperand &MO) const;
1266 bool isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
1267 const MachineOperand &MO) const;
1268 /// Legalize operands in \p MI by either commuting it or inserting a
1269 /// copy of src1.
1270 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1271
1272 /// Fix operands in \p MI to satisfy constant bus requirements.
1273 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1274
1275 /// Copy a value from a VGPR (\p SrcReg) to SGPR. The desired register class
1276 /// for the dst register (\p DstRC) can be optionally supplied. This function
1277 /// can only be used when it is know that the value in SrcReg is same across
1278 /// all threads in the wave.
1279 /// \returns The SGPR register that \p SrcReg was copied to.
1280 Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
1281 MachineRegisterInfo &MRI,
1282 const TargetRegisterClass *DstRC = nullptr) const;
1283
1284 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1285 void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1286
1287 void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
1288 MachineBasicBlock::iterator I,
1289 const TargetRegisterClass *DstRC,
1290 MachineOperand &Op, MachineRegisterInfo &MRI,
1291 const DebugLoc &DL) const;
1292
1293 /// Legalize all operands in this instruction. This function may create new
1294 /// instructions and control-flow around \p MI. If present, \p MDT is
1295 /// updated.
1296 /// \returns A new basic block that contains \p MI if new blocks were created.
1297 MachineBasicBlock *
1298 legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
1299
1300 /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
1301 /// was moved to VGPR. \returns true if succeeded.
1302 bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
1303
1304 /// Fix operands in Inst to fix 16bit SALU to VALU lowering.
1305 void legalizeOperandsVALUt16(MachineInstr &Inst,
1306 MachineRegisterInfo &MRI) const;
1307 void legalizeOperandsVALUt16(MachineInstr &Inst, unsigned OpIdx,
1308 MachineRegisterInfo &MRI) const;
1309
1310 /// Replace the instructions opcode with the equivalent VALU
1311 /// opcode. This function will also move the users of MachineInstruntions
1312 /// in the \p WorkList to the VALU if necessary. If present, \p MDT is
1313 /// updated.
1314 void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const;
1315
1316 void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT,
1317 MachineInstr &Inst) const;
1318
1319 void insertNoop(MachineBasicBlock &MBB,
1320 MachineBasicBlock::iterator MI) const override;
1321
1322 void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1323 unsigned Quantity) const override;
1324
1325 void insertReturn(MachineBasicBlock &MBB) const;
1326
1327 /// Build instructions that simulate the behavior of a `s_trap 2` instructions
1328 /// for hardware (namely, gfx11) that runs in PRIV=1 mode. There, s_trap is
1329 /// interpreted as a nop.
1330 MachineBasicBlock *insertSimulatedTrap(MachineRegisterInfo &MRI,
1331 MachineBasicBlock &MBB,
1332 MachineInstr &MI,
1333 const DebugLoc &DL) const;
1334
1335 /// Return the number of wait states that result from executing this
1336 /// instruction.
1337 static unsigned getNumWaitStates(const MachineInstr &MI);
1338
1339 /// Returns the operand named \p Op. If \p MI does not have an
1340 /// operand named \c Op, this function returns nullptr.
1341 LLVM_READONLY
1342 MachineOperand *getNamedOperand(MachineInstr &MI,
1343 AMDGPU::OpName OperandName) const;
1344
1345 LLVM_READONLY
getNamedOperand(const MachineInstr & MI,AMDGPU::OpName OperandName)1346 const MachineOperand *getNamedOperand(const MachineInstr &MI,
1347 AMDGPU::OpName OperandName) const {
1348 return getNamedOperand(const_cast<MachineInstr &>(MI), OperandName);
1349 }
1350
1351 /// Get required immediate operand
getNamedImmOperand(const MachineInstr & MI,AMDGPU::OpName OperandName)1352 int64_t getNamedImmOperand(const MachineInstr &MI,
1353 AMDGPU::OpName OperandName) const {
1354 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName);
1355 return MI.getOperand(Idx).getImm();
1356 }
1357
1358 uint64_t getDefaultRsrcDataFormat() const;
1359 uint64_t getScratchRsrcWords23() const;
1360
1361 bool isLowLatencyInstruction(const MachineInstr &MI) const;
1362 bool isHighLatencyDef(int Opc) const override;
1363
1364 /// Return the descriptor of the target-specific machine instruction
1365 /// that corresponds to the specified pseudo or native opcode.
getMCOpcodeFromPseudo(unsigned Opcode)1366 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
1367 return get(pseudoToMCOpcode(Opcode));
1368 }
1369
1370 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1371 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1372
1373 Register isLoadFromStackSlot(const MachineInstr &MI,
1374 int &FrameIndex) const override;
1375 Register isStoreToStackSlot(const MachineInstr &MI,
1376 int &FrameIndex) const override;
1377
1378 unsigned getInstBundleSize(const MachineInstr &MI) const;
1379 unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
1380
1381 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
1382
1383 std::pair<unsigned, unsigned>
1384 decomposeMachineOperandsTargetFlags(unsigned TF) const override;
1385
1386 ArrayRef<std::pair<int, const char *>>
1387 getSerializableTargetIndices() const override;
1388
1389 ArrayRef<std::pair<unsigned, const char *>>
1390 getSerializableDirectMachineOperandTargetFlags() const override;
1391
1392 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
1393 getSerializableMachineMemOperandTargetFlags() const override;
1394
1395 ScheduleHazardRecognizer *
1396 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
1397 const ScheduleDAG *DAG) const override;
1398
1399 ScheduleHazardRecognizer *
1400 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
1401
1402 ScheduleHazardRecognizer *
1403 CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
1404 const ScheduleDAGMI *DAG) const override;
1405
1406 unsigned getLiveRangeSplitOpcode(Register Reg,
1407 const MachineFunction &MF) const override;
1408
1409 bool isBasicBlockPrologue(const MachineInstr &MI,
1410 Register Reg = Register()) const override;
1411
1412 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
1413 MachineBasicBlock::iterator InsPt,
1414 const DebugLoc &DL, Register Src,
1415 Register Dst) const override;
1416
1417 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
1418 MachineBasicBlock::iterator InsPt,
1419 const DebugLoc &DL, Register Src,
1420 unsigned SrcSubReg,
1421 Register Dst) const override;
1422
1423 bool isWave32() const;
1424
1425 /// Return a partially built integer add instruction without carry.
1426 /// Caller must add source operands.
1427 /// For pre-GFX9 it will generate unused carry destination operand.
1428 /// TODO: After GFX9 it should return a no-carry operation.
1429 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1430 MachineBasicBlock::iterator I,
1431 const DebugLoc &DL,
1432 Register DestReg) const;
1433
1434 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1435 MachineBasicBlock::iterator I,
1436 const DebugLoc &DL,
1437 Register DestReg,
1438 RegScavenger &RS) const;
1439
1440 static bool isKillTerminator(unsigned Opcode);
1441 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
1442
1443 bool isLegalMUBUFImmOffset(unsigned Imm) const;
1444
1445 static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST);
1446
1447 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1448 Align Alignment = Align(4)) const;
1449
1450 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1451 /// encoded instruction with the given \p FlatVariant.
1452 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
1453 uint64_t FlatVariant) const;
1454
1455 /// Split \p COffsetVal into {immediate offset field, remainder offset}
1456 /// values.
1457 std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
1458 unsigned AddrSpace,
1459 uint64_t FlatVariant) const;
1460
1461 /// Returns true if negative offsets are allowed for the given \p FlatVariant.
1462 bool allowNegativeFlatOffset(uint64_t FlatVariant) const;
1463
1464 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1465 /// Return -1 if the target-specific opcode for the pseudo instruction does
1466 /// not exist. If Opcode is not a pseudo instruction, this is identity.
1467 int pseudoToMCOpcode(int Opcode) const;
1468
1469 /// \brief Check if this instruction should only be used by assembler.
1470 /// Return true if this opcode should not be used by codegen.
1471 bool isAsmOnlyOpcode(int MCOp) const;
1472
1473 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
1474 const TargetRegisterInfo *TRI,
1475 const MachineFunction &MF)
1476 const override;
1477
1478 void fixImplicitOperands(MachineInstr &MI) const;
1479
1480 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
1481 ArrayRef<unsigned> Ops,
1482 MachineBasicBlock::iterator InsertPt,
1483 int FrameIndex,
1484 LiveIntervals *LIS = nullptr,
1485 VirtRegMap *VRM = nullptr) const override;
1486
1487 unsigned getInstrLatency(const InstrItineraryData *ItinData,
1488 const MachineInstr &MI,
1489 unsigned *PredCost = nullptr) const override;
1490
1491 InstructionUniformity
1492 getInstructionUniformity(const MachineInstr &MI) const override final;
1493
1494 InstructionUniformity
1495 getGenericInstructionUniformity(const MachineInstr &MI) const;
1496
getMIRFormatter()1497 const MIRFormatter *getMIRFormatter() const override {
1498 if (!Formatter)
1499 Formatter = std::make_unique<AMDGPUMIRFormatter>();
1500 return Formatter.get();
1501 }
1502
1503 static unsigned getDSShaderTypeValue(const MachineFunction &MF);
1504
getSchedModel()1505 const TargetSchedModel &getSchedModel() const { return SchedModel; }
1506
1507 // Enforce operand's \p OpName even alignment if required by target.
1508 // This is used if an operand is a 32 bit register but needs to be aligned
1509 // regardless.
1510 void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const;
1511 };
1512
1513 /// \brief Returns true if a reg:subreg pair P has a TRC class
isOfRegClass(const TargetInstrInfo::RegSubRegPair & P,const TargetRegisterClass & TRC,MachineRegisterInfo & MRI)1514 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
1515 const TargetRegisterClass &TRC,
1516 MachineRegisterInfo &MRI) {
1517 auto *RC = MRI.getRegClass(P.Reg);
1518 if (!P.SubReg)
1519 return RC == &TRC;
1520 auto *TRI = MRI.getTargetRegisterInfo();
1521 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
1522 }
1523
1524 /// \brief Create RegSubRegPair from a register MachineOperand
1525 inline
getRegSubRegPair(const MachineOperand & O)1526 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1527 assert(O.isReg());
1528 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
1529 }
1530
1531 /// \brief Return the SubReg component from REG_SEQUENCE
1532 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1533 unsigned SubReg);
1534
1535 /// \brief Return the defining instruction for a given reg:subreg pair
1536 /// skipping copy like instructions and subreg-manipulation pseudos.
1537 /// Following another subreg of a reg:subreg isn't supported.
1538 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1539 MachineRegisterInfo &MRI);
1540
1541 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1542 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1543 /// attempt to track between blocks.
1544 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1545 Register VReg,
1546 const MachineInstr &DefMI,
1547 const MachineInstr &UseMI);
1548
1549 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1550 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1551 /// track between blocks.
1552 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1553 Register VReg,
1554 const MachineInstr &DefMI);
1555
1556 namespace AMDGPU {
1557
1558 LLVM_READONLY
1559 int getVOPe64(uint16_t Opcode);
1560
1561 LLVM_READONLY
1562 int getVOPe32(uint16_t Opcode);
1563
1564 LLVM_READONLY
1565 int getSDWAOp(uint16_t Opcode);
1566
1567 LLVM_READONLY
1568 int getDPPOp32(uint16_t Opcode);
1569
1570 LLVM_READONLY
1571 int getDPPOp64(uint16_t Opcode);
1572
1573 LLVM_READONLY
1574 int getBasicFromSDWAOp(uint16_t Opcode);
1575
1576 LLVM_READONLY
1577 int getCommuteRev(uint16_t Opcode);
1578
1579 LLVM_READONLY
1580 int getCommuteOrig(uint16_t Opcode);
1581
1582 LLVM_READONLY
1583 int getAddr64Inst(uint16_t Opcode);
1584
1585 /// Check if \p Opcode is an Addr64 opcode.
1586 ///
1587 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1588 LLVM_READONLY
1589 int getIfAddr64Inst(uint16_t Opcode);
1590
1591 LLVM_READONLY
1592 int getSOPKOp(uint16_t Opcode);
1593
1594 /// \returns SADDR form of a FLAT Global instruction given an \p Opcode
1595 /// of a VADDR form.
1596 LLVM_READONLY
1597 int getGlobalSaddrOp(uint16_t Opcode);
1598
1599 /// \returns VADDR form of a FLAT Global instruction given an \p Opcode
1600 /// of a SADDR form.
1601 LLVM_READONLY
1602 int getGlobalVaddrOp(uint16_t Opcode);
1603
1604 LLVM_READONLY
1605 int getVCMPXNoSDstOp(uint16_t Opcode);
1606
1607 /// \returns ST form with only immediate offset of a FLAT Scratch instruction
1608 /// given an \p Opcode of an SS (SADDR) form.
1609 LLVM_READONLY
1610 int getFlatScratchInstSTfromSS(uint16_t Opcode);
1611
1612 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1613 /// of an SVS (SADDR + VADDR) form.
1614 LLVM_READONLY
1615 int getFlatScratchInstSVfromSVS(uint16_t Opcode);
1616
1617 /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
1618 /// of an SV (VADDR) form.
1619 LLVM_READONLY
1620 int getFlatScratchInstSSfromSV(uint16_t Opcode);
1621
1622 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1623 /// of an SS (SADDR) form.
1624 LLVM_READONLY
1625 int getFlatScratchInstSVfromSS(uint16_t Opcode);
1626
1627 /// \returns earlyclobber version of a MAC MFMA is exists.
1628 LLVM_READONLY
1629 int getMFMAEarlyClobberOp(uint16_t Opcode);
1630
1631 /// \returns Version of an MFMA instruction which uses AGPRs for srcC and
1632 /// vdst, given an \p Opcode of an MFMA which uses VGPRs for srcC/vdst.
1633 LLVM_READONLY
1634 int getMFMASrcCVDstAGPROp(uint16_t Opcode);
1635
1636 /// \returns v_cmpx version of a v_cmp instruction.
1637 LLVM_READONLY
1638 int getVCMPXOpFromVCMP(uint16_t Opcode);
1639
1640 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1641 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1642 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1643 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1644
1645 } // end namespace AMDGPU
1646
1647 namespace AMDGPU {
1648 enum AsmComments {
1649 // For sgpr to vgpr spill instructions
1650 SGPR_SPILL = MachineInstr::TAsmComments
1651 };
1652 } // namespace AMDGPU
1653
1654 namespace SI {
1655 namespace KernelInputOffsets {
1656
1657 /// Offsets in bytes from the start of the input buffer
1658 enum Offsets {
1659 NGROUPS_X = 0,
1660 NGROUPS_Y = 4,
1661 NGROUPS_Z = 8,
1662 GLOBAL_SIZE_X = 12,
1663 GLOBAL_SIZE_Y = 16,
1664 GLOBAL_SIZE_Z = 20,
1665 LOCAL_SIZE_X = 24,
1666 LOCAL_SIZE_Y = 28,
1667 LOCAL_SIZE_Z = 32
1668 };
1669
1670 } // end namespace KernelInputOffsets
1671 } // end namespace SI
1672
1673 } // end namespace llvm
1674
1675 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1676