1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUAsmUtils.h"
12 #include "AMDKernelCodeT.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "Utils/AMDKernelCodeTUtils.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/BinaryFormat/ELF.h"
17 #include "llvm/IR/Attributes.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/Function.h"
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/IR/IntrinsicsAMDGPU.h"
22 #include "llvm/IR/IntrinsicsR600.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/TargetParser/TargetParser.h"
29 #include <optional>
30
31 #define GET_INSTRINFO_NAMED_OPS
32 #define GET_INSTRMAP_INFO
33 #include "AMDGPUGenInstrInfo.inc"
34
35 static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
36 "amdhsa-code-object-version", llvm::cl::Hidden,
37 llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6),
38 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
39 "or asm directive still take priority if present)"));
40
41 namespace {
42
43 /// \returns Bit mask for given bit \p Shift and bit \p Width.
getBitMask(unsigned Shift,unsigned Width)44 unsigned getBitMask(unsigned Shift, unsigned Width) {
45 return ((1 << Width) - 1) << Shift;
46 }
47
48 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
49 ///
50 /// \returns Packed \p Dst.
packBits(unsigned Src,unsigned Dst,unsigned Shift,unsigned Width)51 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
52 unsigned Mask = getBitMask(Shift, Width);
53 return ((Src << Shift) & Mask) | (Dst & ~Mask);
54 }
55
56 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
57 ///
58 /// \returns Unpacked bits.
unpackBits(unsigned Src,unsigned Shift,unsigned Width)59 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
60 return (Src & getBitMask(Shift, Width)) >> Shift;
61 }
62
63 /// \returns Vmcnt bit shift (lower bits).
getVmcntBitShiftLo(unsigned VersionMajor)64 unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
65 return VersionMajor >= 11 ? 10 : 0;
66 }
67
68 /// \returns Vmcnt bit width (lower bits).
getVmcntBitWidthLo(unsigned VersionMajor)69 unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
70 return VersionMajor >= 11 ? 6 : 4;
71 }
72
73 /// \returns Expcnt bit shift.
getExpcntBitShift(unsigned VersionMajor)74 unsigned getExpcntBitShift(unsigned VersionMajor) {
75 return VersionMajor >= 11 ? 0 : 4;
76 }
77
78 /// \returns Expcnt bit width.
getExpcntBitWidth(unsigned VersionMajor)79 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
80
81 /// \returns Lgkmcnt bit shift.
getLgkmcntBitShift(unsigned VersionMajor)82 unsigned getLgkmcntBitShift(unsigned VersionMajor) {
83 return VersionMajor >= 11 ? 4 : 8;
84 }
85
86 /// \returns Lgkmcnt bit width.
getLgkmcntBitWidth(unsigned VersionMajor)87 unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
88 return VersionMajor >= 10 ? 6 : 4;
89 }
90
91 /// \returns Vmcnt bit shift (higher bits).
getVmcntBitShiftHi(unsigned VersionMajor)92 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
93
94 /// \returns Vmcnt bit width (higher bits).
getVmcntBitWidthHi(unsigned VersionMajor)95 unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
96 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
97 }
98
99 /// \returns Loadcnt bit width
getLoadcntBitWidth(unsigned VersionMajor)100 unsigned getLoadcntBitWidth(unsigned VersionMajor) {
101 return VersionMajor >= 12 ? 6 : 0;
102 }
103
104 /// \returns Samplecnt bit width.
getSamplecntBitWidth(unsigned VersionMajor)105 unsigned getSamplecntBitWidth(unsigned VersionMajor) {
106 return VersionMajor >= 12 ? 6 : 0;
107 }
108
109 /// \returns Bvhcnt bit width.
getBvhcntBitWidth(unsigned VersionMajor)110 unsigned getBvhcntBitWidth(unsigned VersionMajor) {
111 return VersionMajor >= 12 ? 3 : 0;
112 }
113
114 /// \returns Dscnt bit width.
getDscntBitWidth(unsigned VersionMajor)115 unsigned getDscntBitWidth(unsigned VersionMajor) {
116 return VersionMajor >= 12 ? 6 : 0;
117 }
118
119 /// \returns Dscnt bit shift in combined S_WAIT instructions.
getDscntBitShift(unsigned VersionMajor)120 unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
121
122 /// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
getStorecntBitWidth(unsigned VersionMajor)123 unsigned getStorecntBitWidth(unsigned VersionMajor) {
124 return VersionMajor >= 10 ? 6 : 0;
125 }
126
127 /// \returns Kmcnt bit width.
getKmcntBitWidth(unsigned VersionMajor)128 unsigned getKmcntBitWidth(unsigned VersionMajor) {
129 return VersionMajor >= 12 ? 5 : 0;
130 }
131
132 /// \returns Xcnt bit width.
getXcntBitWidth(unsigned VersionMajor,unsigned VersionMinor)133 unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
134 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
135 }
136
137 /// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
getLoadcntStorecntBitShift(unsigned VersionMajor)138 unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
139 return VersionMajor >= 12 ? 8 : 0;
140 }
141
142 /// \returns VaSdst bit width
getVaSdstBitWidth()143 inline unsigned getVaSdstBitWidth() { return 3; }
144
145 /// \returns VaSdst bit shift
getVaSdstBitShift()146 inline unsigned getVaSdstBitShift() { return 9; }
147
148 /// \returns VmVsrc bit width
getVmVsrcBitWidth()149 inline unsigned getVmVsrcBitWidth() { return 3; }
150
151 /// \returns VmVsrc bit shift
getVmVsrcBitShift()152 inline unsigned getVmVsrcBitShift() { return 2; }
153
154 /// \returns VaVdst bit width
getVaVdstBitWidth()155 inline unsigned getVaVdstBitWidth() { return 4; }
156
157 /// \returns VaVdst bit shift
getVaVdstBitShift()158 inline unsigned getVaVdstBitShift() { return 12; }
159
160 /// \returns VaVcc bit width
getVaVccBitWidth()161 inline unsigned getVaVccBitWidth() { return 1; }
162
163 /// \returns VaVcc bit shift
getVaVccBitShift()164 inline unsigned getVaVccBitShift() { return 1; }
165
166 /// \returns SaSdst bit width
getSaSdstBitWidth()167 inline unsigned getSaSdstBitWidth() { return 1; }
168
169 /// \returns SaSdst bit shift
getSaSdstBitShift()170 inline unsigned getSaSdstBitShift() { return 0; }
171
172 /// \returns VaSsrc width
getVaSsrcBitWidth()173 inline unsigned getVaSsrcBitWidth() { return 1; }
174
175 /// \returns VaSsrc bit shift
getVaSsrcBitShift()176 inline unsigned getVaSsrcBitShift() { return 8; }
177
178 /// \returns HoldCnt bit shift
getHoldCntWidth()179 inline unsigned getHoldCntWidth() { return 1; }
180
181 /// \returns HoldCnt bit shift
getHoldCntBitShift()182 inline unsigned getHoldCntBitShift() { return 7; }
183
184 } // end anonymous namespace
185
186 namespace llvm {
187
188 namespace AMDGPU {
189
190 /// \returns true if the target supports signed immediate offset for SMRD
191 /// instructions.
hasSMRDSignedImmOffset(const MCSubtargetInfo & ST)192 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
193 return isGFX9Plus(ST);
194 }
195
196 /// \returns True if \p STI is AMDHSA.
isHsaAbi(const MCSubtargetInfo & STI)197 bool isHsaAbi(const MCSubtargetInfo &STI) {
198 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
199 }
200
getAMDHSACodeObjectVersion(const Module & M)201 unsigned getAMDHSACodeObjectVersion(const Module &M) {
202 if (auto *Ver = mdconst::extract_or_null<ConstantInt>(
203 M.getModuleFlag("amdhsa_code_object_version"))) {
204 return (unsigned)Ver->getZExtValue() / 100;
205 }
206
207 return getDefaultAMDHSACodeObjectVersion();
208 }
209
getDefaultAMDHSACodeObjectVersion()210 unsigned getDefaultAMDHSACodeObjectVersion() {
211 return DefaultAMDHSACodeObjectVersion;
212 }
213
getAMDHSACodeObjectVersion(unsigned ABIVersion)214 unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
215 switch (ABIVersion) {
216 case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
217 return 4;
218 case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
219 return 5;
220 case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
221 return 6;
222 default:
223 return getDefaultAMDHSACodeObjectVersion();
224 }
225 }
226
getELFABIVersion(const Triple & T,unsigned CodeObjectVersion)227 uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
228 if (T.getOS() != Triple::AMDHSA)
229 return 0;
230
231 switch (CodeObjectVersion) {
232 case 4:
233 return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
234 case 5:
235 return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
236 case 6:
237 return ELF::ELFABIVERSION_AMDGPU_HSA_V6;
238 default:
239 report_fatal_error("Unsupported AMDHSA Code Object Version " +
240 Twine(CodeObjectVersion));
241 }
242 }
243
getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)244 unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
245 switch (CodeObjectVersion) {
246 case AMDHSA_COV4:
247 return 48;
248 case AMDHSA_COV5:
249 case AMDHSA_COV6:
250 default:
251 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
252 }
253 }
254
255 // FIXME: All such magic numbers about the ABI should be in a
256 // central TD file.
getHostcallImplicitArgPosition(unsigned CodeObjectVersion)257 unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
258 switch (CodeObjectVersion) {
259 case AMDHSA_COV4:
260 return 24;
261 case AMDHSA_COV5:
262 case AMDHSA_COV6:
263 default:
264 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
265 }
266 }
267
getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)268 unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
269 switch (CodeObjectVersion) {
270 case AMDHSA_COV4:
271 return 32;
272 case AMDHSA_COV5:
273 case AMDHSA_COV6:
274 default:
275 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
276 }
277 }
278
getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)279 unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
280 switch (CodeObjectVersion) {
281 case AMDHSA_COV4:
282 return 40;
283 case AMDHSA_COV5:
284 case AMDHSA_COV6:
285 default:
286 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
287 }
288 }
289
290 #define GET_MIMGBaseOpcodesTable_IMPL
291 #define GET_MIMGDimInfoTable_IMPL
292 #define GET_MIMGInfoTable_IMPL
293 #define GET_MIMGLZMappingTable_IMPL
294 #define GET_MIMGMIPMappingTable_IMPL
295 #define GET_MIMGBiasMappingTable_IMPL
296 #define GET_MIMGOffsetMappingTable_IMPL
297 #define GET_MIMGG16MappingTable_IMPL
298 #define GET_MAIInstInfoTable_IMPL
299 #include "AMDGPUGenSearchableTables.inc"
300
getMIMGOpcode(unsigned BaseOpcode,unsigned MIMGEncoding,unsigned VDataDwords,unsigned VAddrDwords)301 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
302 unsigned VDataDwords, unsigned VAddrDwords) {
303 const MIMGInfo *Info =
304 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
305 return Info ? Info->Opcode : -1;
306 }
307
getMIMGBaseOpcode(unsigned Opc)308 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
309 const MIMGInfo *Info = getMIMGInfo(Opc);
310 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
311 }
312
getMaskedMIMGOp(unsigned Opc,unsigned NewChannels)313 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
314 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
315 const MIMGInfo *NewInfo =
316 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
317 NewChannels, OrigInfo->VAddrDwords);
318 return NewInfo ? NewInfo->Opcode : -1;
319 }
320
getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo * BaseOpcode,const MIMGDimInfo * Dim,bool IsA16,bool IsG16Supported)321 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
322 const MIMGDimInfo *Dim, bool IsA16,
323 bool IsG16Supported) {
324 unsigned AddrWords = BaseOpcode->NumExtraArgs;
325 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
326 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
327 if (IsA16)
328 AddrWords += divideCeil(AddrComponents, 2);
329 else
330 AddrWords += AddrComponents;
331
332 // Note: For subtargets that support A16 but not G16, enabling A16 also
333 // enables 16 bit gradients.
334 // For subtargets that support A16 (operand) and G16 (done with a different
335 // instruction encoding), they are independent.
336
337 if (BaseOpcode->Gradients) {
338 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
339 // There are two gradients per coordinate, we pack them separately.
340 // For the 3d case,
341 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
342 AddrWords += alignTo<2>(Dim->NumGradients / 2);
343 else
344 AddrWords += Dim->NumGradients;
345 }
346 return AddrWords;
347 }
348
349 struct MUBUFInfo {
350 uint16_t Opcode;
351 uint16_t BaseOpcode;
352 uint8_t elements;
353 bool has_vaddr;
354 bool has_srsrc;
355 bool has_soffset;
356 bool IsBufferInv;
357 bool tfe;
358 };
359
360 struct MTBUFInfo {
361 uint16_t Opcode;
362 uint16_t BaseOpcode;
363 uint8_t elements;
364 bool has_vaddr;
365 bool has_srsrc;
366 bool has_soffset;
367 };
368
369 struct SMInfo {
370 uint16_t Opcode;
371 bool IsBuffer;
372 };
373
374 struct VOPInfo {
375 uint16_t Opcode;
376 bool IsSingle;
377 };
378
379 struct VOPC64DPPInfo {
380 uint16_t Opcode;
381 };
382
383 struct VOPCDPPAsmOnlyInfo {
384 uint16_t Opcode;
385 };
386
387 struct VOP3CDPPAsmOnlyInfo {
388 uint16_t Opcode;
389 };
390
391 struct VOPDComponentInfo {
392 uint16_t BaseVOP;
393 uint16_t VOPDOp;
394 bool CanBeVOPDX;
395 bool CanBeVOPD3X;
396 };
397
398 struct VOPDInfo {
399 uint16_t Opcode;
400 uint16_t OpX;
401 uint16_t OpY;
402 uint16_t Subtarget;
403 bool VOPD3;
404 };
405
406 struct VOPTrue16Info {
407 uint16_t Opcode;
408 bool IsTrue16;
409 };
410
411 #define GET_FP4FP8DstByteSelTable_DECL
412 #define GET_FP4FP8DstByteSelTable_IMPL
413
414 struct DPMACCInstructionInfo {
415 uint16_t Opcode;
416 bool IsDPMACCInstruction;
417 };
418
419 struct FP4FP8DstByteSelInfo {
420 uint16_t Opcode;
421 bool HasFP8DstByteSel;
422 bool HasFP4DstByteSel;
423 };
424
425 #define GET_MTBUFInfoTable_DECL
426 #define GET_MTBUFInfoTable_IMPL
427 #define GET_MUBUFInfoTable_DECL
428 #define GET_MUBUFInfoTable_IMPL
429 #define GET_SMInfoTable_DECL
430 #define GET_SMInfoTable_IMPL
431 #define GET_VOP1InfoTable_DECL
432 #define GET_VOP1InfoTable_IMPL
433 #define GET_VOP2InfoTable_DECL
434 #define GET_VOP2InfoTable_IMPL
435 #define GET_VOP3InfoTable_DECL
436 #define GET_VOP3InfoTable_IMPL
437 #define GET_VOPC64DPPTable_DECL
438 #define GET_VOPC64DPPTable_IMPL
439 #define GET_VOPC64DPP8Table_DECL
440 #define GET_VOPC64DPP8Table_IMPL
441 #define GET_VOPCAsmOnlyInfoTable_DECL
442 #define GET_VOPCAsmOnlyInfoTable_IMPL
443 #define GET_VOP3CAsmOnlyInfoTable_DECL
444 #define GET_VOP3CAsmOnlyInfoTable_IMPL
445 #define GET_VOPDComponentTable_DECL
446 #define GET_VOPDComponentTable_IMPL
447 #define GET_VOPDPairs_DECL
448 #define GET_VOPDPairs_IMPL
449 #define GET_VOPTrue16Table_DECL
450 #define GET_VOPTrue16Table_IMPL
451 #define GET_True16D16Table_IMPL
452 #define GET_WMMAOpcode2AddrMappingTable_DECL
453 #define GET_WMMAOpcode2AddrMappingTable_IMPL
454 #define GET_WMMAOpcode3AddrMappingTable_DECL
455 #define GET_WMMAOpcode3AddrMappingTable_IMPL
456 #define GET_getMFMA_F8F6F4_WithSize_DECL
457 #define GET_getMFMA_F8F6F4_WithSize_IMPL
458 #define GET_isMFMA_F8F6F4Table_IMPL
459 #define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
460
461 #include "AMDGPUGenSearchableTables.inc"
462
getMTBUFBaseOpcode(unsigned Opc)463 int getMTBUFBaseOpcode(unsigned Opc) {
464 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
465 return Info ? Info->BaseOpcode : -1;
466 }
467
getMTBUFOpcode(unsigned BaseOpc,unsigned Elements)468 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
469 const MTBUFInfo *Info =
470 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
471 return Info ? Info->Opcode : -1;
472 }
473
getMTBUFElements(unsigned Opc)474 int getMTBUFElements(unsigned Opc) {
475 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
476 return Info ? Info->elements : 0;
477 }
478
getMTBUFHasVAddr(unsigned Opc)479 bool getMTBUFHasVAddr(unsigned Opc) {
480 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
481 return Info && Info->has_vaddr;
482 }
483
getMTBUFHasSrsrc(unsigned Opc)484 bool getMTBUFHasSrsrc(unsigned Opc) {
485 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
486 return Info && Info->has_srsrc;
487 }
488
getMTBUFHasSoffset(unsigned Opc)489 bool getMTBUFHasSoffset(unsigned Opc) {
490 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
491 return Info && Info->has_soffset;
492 }
493
getMUBUFBaseOpcode(unsigned Opc)494 int getMUBUFBaseOpcode(unsigned Opc) {
495 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
496 return Info ? Info->BaseOpcode : -1;
497 }
498
getMUBUFOpcode(unsigned BaseOpc,unsigned Elements)499 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
500 const MUBUFInfo *Info =
501 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
502 return Info ? Info->Opcode : -1;
503 }
504
getMUBUFElements(unsigned Opc)505 int getMUBUFElements(unsigned Opc) {
506 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
507 return Info ? Info->elements : 0;
508 }
509
getMUBUFHasVAddr(unsigned Opc)510 bool getMUBUFHasVAddr(unsigned Opc) {
511 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
512 return Info && Info->has_vaddr;
513 }
514
getMUBUFHasSrsrc(unsigned Opc)515 bool getMUBUFHasSrsrc(unsigned Opc) {
516 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
517 return Info && Info->has_srsrc;
518 }
519
getMUBUFHasSoffset(unsigned Opc)520 bool getMUBUFHasSoffset(unsigned Opc) {
521 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
522 return Info && Info->has_soffset;
523 }
524
getMUBUFIsBufferInv(unsigned Opc)525 bool getMUBUFIsBufferInv(unsigned Opc) {
526 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
527 return Info && Info->IsBufferInv;
528 }
529
getMUBUFTfe(unsigned Opc)530 bool getMUBUFTfe(unsigned Opc) {
531 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
532 return Info && Info->tfe;
533 }
534
getSMEMIsBuffer(unsigned Opc)535 bool getSMEMIsBuffer(unsigned Opc) {
536 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
537 return Info && Info->IsBuffer;
538 }
539
getVOP1IsSingle(unsigned Opc)540 bool getVOP1IsSingle(unsigned Opc) {
541 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
542 return !Info || Info->IsSingle;
543 }
544
getVOP2IsSingle(unsigned Opc)545 bool getVOP2IsSingle(unsigned Opc) {
546 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
547 return !Info || Info->IsSingle;
548 }
549
getVOP3IsSingle(unsigned Opc)550 bool getVOP3IsSingle(unsigned Opc) {
551 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
552 return !Info || Info->IsSingle;
553 }
554
isVOPC64DPP(unsigned Opc)555 bool isVOPC64DPP(unsigned Opc) {
556 return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
557 }
558
isVOPCAsmOnly(unsigned Opc)559 bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); }
560
getMAIIsDGEMM(unsigned Opc)561 bool getMAIIsDGEMM(unsigned Opc) {
562 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
563 return Info && Info->is_dgemm;
564 }
565
getMAIIsGFX940XDL(unsigned Opc)566 bool getMAIIsGFX940XDL(unsigned Opc) {
567 const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
568 return Info && Info->is_gfx940_xdl;
569 }
570
mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)571 uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) {
572 switch (EncodingVal) {
573 case MFMAScaleFormats::FP6_E2M3:
574 case MFMAScaleFormats::FP6_E3M2:
575 return 6;
576 case MFMAScaleFormats::FP4_E2M1:
577 return 4;
578 case MFMAScaleFormats::FP8_E4M3:
579 case MFMAScaleFormats::FP8_E5M2:
580 default:
581 return 8;
582 }
583
584 llvm_unreachable("covered switch over mfma scale formats");
585 }
586
getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,unsigned BLGP,unsigned F8F8Opcode)587 const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
588 unsigned BLGP,
589 unsigned F8F8Opcode) {
590 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(CBSZ);
591 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(BLGP);
592 return getMFMA_F8F6F4_InstWithNumRegs(SrcANumRegs, SrcBNumRegs, F8F8Opcode);
593 }
594
getVOPDEncodingFamily(const MCSubtargetInfo & ST)595 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
596 if (ST.hasFeature(AMDGPU::FeatureGFX1250Insts))
597 return SIEncodingFamily::GFX1250;
598 if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
599 return SIEncodingFamily::GFX12;
600 if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
601 return SIEncodingFamily::GFX11;
602 llvm_unreachable("Subtarget generation does not support VOPD!");
603 }
604
getCanBeVOPD(unsigned Opc,unsigned EncodingFamily,bool VOPD3)605 CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) {
606 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
607 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
608 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
609 if (Info) {
610 // Check that Opc can be used as VOPDY for this encoding. V_MOV_B32 as a
611 // VOPDX is just a placeholder here, it is supported on all encodings.
612 // TODO: This can be optimized by creating tables of supported VOPDY
613 // opcodes per encoding.
614 unsigned VOPDMov = AMDGPU::getVOPDOpcode(AMDGPU::V_MOV_B32_e32, VOPD3);
615 bool CanBeVOPDY = getVOPDFull(VOPDMov, AMDGPU::getVOPDOpcode(Opc, VOPD3),
616 EncodingFamily, VOPD3) != -1;
617 return {VOPD3 ? Info->CanBeVOPD3X : Info->CanBeVOPDX, CanBeVOPDY};
618 }
619
620 return {false, false};
621 }
622
getVOPDOpcode(unsigned Opc,bool VOPD3)623 unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) {
624 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
625 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
626 const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
627 return Info ? Info->VOPDOp : ~0u;
628 }
629
isVOPD(unsigned Opc)630 bool isVOPD(unsigned Opc) {
631 return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
632 }
633
isMAC(unsigned Opc)634 bool isMAC(unsigned Opc) {
635 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
636 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
637 Opc == AMDGPU::V_MAC_F32_e64_vi ||
638 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
639 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
640 Opc == AMDGPU::V_MAC_F16_e64_vi ||
641 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
642 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
643 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
644 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
645 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
646 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
647 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
648 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
649 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
650 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
651 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
652 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
653 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
654 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
655 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
656 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
657 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
658 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
659 }
660
isPermlane16(unsigned Opc)661 bool isPermlane16(unsigned Opc) {
662 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
663 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
664 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
665 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
666 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
667 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
668 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
669 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
670 }
671
isCvt_F32_Fp8_Bf8_e64(unsigned Opc)672 bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
673 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
674 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
675 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
676 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
677 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
678 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
679 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
680 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
681 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
682 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
683 }
684
isGenericAtomic(unsigned Opc)685 bool isGenericAtomic(unsigned Opc) {
686 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
687 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
688 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
689 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
690 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
691 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
692 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
693 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
694 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
695 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
696 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
697 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
698 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
699 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
700 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
701 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
702 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
703 }
704
isAsyncStore(unsigned Opc)705 bool isAsyncStore(unsigned Opc) {
706 return false; // placeholder before async store implementation.
707 }
708
isTensorStore(unsigned Opc)709 bool isTensorStore(unsigned Opc) {
710 return Opc == TENSOR_STORE_FROM_LDS_gfx1250 ||
711 Opc == TENSOR_STORE_FROM_LDS_D2_gfx1250;
712 }
713
getTemporalHintType(const MCInstrDesc TID)714 unsigned getTemporalHintType(const MCInstrDesc TID) {
715 if (TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))
716 return CPol::TH_TYPE_ATOMIC;
717 unsigned Opc = TID.getOpcode();
718 // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE
719 if (TID.mayStore() &&
720 (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad()))
721 return CPol::TH_TYPE_STORE;
722
723 // This will default to returning TH_TYPE_LOAD when neither MayStore nor
724 // MayLoad flag is present which is the case with instructions like
725 // image_get_resinfo.
726 return CPol::TH_TYPE_LOAD;
727 }
728
isTrue16Inst(unsigned Opc)729 bool isTrue16Inst(unsigned Opc) {
730 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
731 return Info && Info->IsTrue16;
732 }
733
getFPDstSelType(unsigned Opc)734 FPType getFPDstSelType(unsigned Opc) {
735 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opc);
736 if (!Info)
737 return FPType::None;
738 if (Info->HasFP8DstByteSel)
739 return FPType::FP8;
740 if (Info->HasFP4DstByteSel)
741 return FPType::FP4;
742
743 return FPType::None;
744 }
745
mapWMMA2AddrTo3AddrOpcode(unsigned Opc)746 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
747 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
748 return Info ? Info->Opcode3Addr : ~0u;
749 }
750
mapWMMA3AddrTo2AddrOpcode(unsigned Opc)751 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
752 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
753 return Info ? Info->Opcode2Addr : ~0u;
754 }
755
756 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any
757 // header files, so we need to wrap it in a function that takes unsigned
758 // instead.
getMCOpcode(uint16_t Opcode,unsigned Gen)759 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
760 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
761 }
762
getBitOp2(unsigned Opc)763 unsigned getBitOp2(unsigned Opc) {
764 switch (Opc) {
765 default:
766 return 0;
767 case AMDGPU::V_AND_B32_e32:
768 return 0x40;
769 case AMDGPU::V_OR_B32_e32:
770 return 0x54;
771 case AMDGPU::V_XOR_B32_e32:
772 return 0x14;
773 case AMDGPU::V_XNOR_B32_e32:
774 return 0x41;
775 }
776 }
777
getVOPDFull(unsigned OpX,unsigned OpY,unsigned EncodingFamily,bool VOPD3)778 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
779 bool VOPD3) {
780 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(OpY) : 0;
781 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
782 const VOPDInfo *Info =
783 getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily, VOPD3);
784 return Info ? Info->Opcode : -1;
785 }
786
getVOPDComponents(unsigned VOPDOpcode)787 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
788 const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
789 assert(Info);
790 const auto *OpX = getVOPDBaseFromComponent(Info->OpX);
791 const auto *OpY = getVOPDBaseFromComponent(Info->OpY);
792 assert(OpX && OpY);
793 return {OpX->BaseVOP, OpY->BaseVOP};
794 }
795
796 namespace VOPD {
797
ComponentProps(const MCInstrDesc & OpDesc,bool VOP3Layout)798 ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) {
799 assert(OpDesc.getNumDefs() == Component::DST_NUM);
800
801 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
802 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
803 auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
804 assert(TiedIdx == -1 || TiedIdx == Component::DST);
805 HasSrc2Acc = TiedIdx != -1;
806 Opcode = OpDesc.getOpcode();
807
808 IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3);
809 SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2) ? 3
810 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm) ? 3
811 : AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1) ? 2
812 : 1;
813 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
814
815 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
816 Opcode == AMDGPU::V_CNDMASK_B32_e64) {
817 // CNDMASK is an awkward exception, it has FP modifiers, but not FP
818 // operands.
819 NumVOPD3Mods = 2;
820 if (IsVOP3)
821 SrcOperandsNum = 3;
822 } else if (isSISrcFPOperand(OpDesc,
823 getNamedOperandIdx(Opcode, OpName::src0))) {
824 // All FP VOPD instructions have Neg modifiers for all operands except
825 // for tied src2.
826 NumVOPD3Mods = SrcOperandsNum;
827 if (HasSrc2Acc)
828 --NumVOPD3Mods;
829 }
830
831 if (OpDesc.TSFlags & SIInstrFlags::VOP3)
832 return;
833
834 auto OperandsNum = OpDesc.getNumOperands();
835 unsigned CompOprIdx;
836 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
837 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
838 MandatoryLiteralIdx = CompOprIdx;
839 break;
840 }
841 }
842 }
843
getBitOp3OperandIdx() const844 int ComponentProps::getBitOp3OperandIdx() const {
845 return getNamedOperandIdx(Opcode, OpName::bitop3);
846 }
847
getIndexInParsedOperands(unsigned CompOprIdx) const848 unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
849 assert(CompOprIdx < Component::MAX_OPR_NUM);
850
851 if (CompOprIdx == Component::DST)
852 return getIndexOfDstInParsedOperands();
853
854 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
855 if (CompSrcIdx < getCompParsedSrcOperandsNum())
856 return getIndexOfSrcInParsedOperands(CompSrcIdx);
857
858 // The specified operand does not exist.
859 return 0;
860 }
861
getInvalidCompOperandIndex(std::function<unsigned (unsigned,unsigned)> GetRegIdx,const MCRegisterInfo & MRI,bool SkipSrc,bool AllowSameVGPR,bool VOPD3) const862 std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
863 std::function<unsigned(unsigned, unsigned)> GetRegIdx,
864 const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR,
865 bool VOPD3) const {
866
867 auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx,
868 CompInfo[ComponentIndex::X].isVOP3());
869 auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx,
870 CompInfo[ComponentIndex::Y].isVOP3());
871
872 const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y,
873 unsigned BanksMask) -> bool {
874 MCRegister BaseX = MRI.getSubReg(X, AMDGPU::sub0);
875 MCRegister BaseY = MRI.getSubReg(Y, AMDGPU::sub0);
876 if (!BaseX)
877 BaseX = X;
878 if (!BaseY)
879 BaseY = Y;
880 if ((BaseX & BanksMask) == (BaseY & BanksMask))
881 return true;
882 if (BaseX != X /* This is 64-bit register */ &&
883 ((BaseX + 1) & BanksMask) == (BaseY & BanksMask))
884 return true;
885 if (BaseY != Y && (BaseX & BanksMask) == ((BaseY + 1) & BanksMask))
886 return true;
887
888 // If both are 64-bit bank conflict will be detected yet while checking
889 // the first subreg.
890 return false;
891 };
892
893 unsigned CompOprIdx;
894 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
895 unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx]
896 : VOPD_VGPR_BANK_MASKS[CompOprIdx];
897 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
898 continue;
899
900 if (SkipSrc && CompOprIdx >= Component::DST_NUM)
901 continue;
902
903 if (CompOprIdx < Component::DST_NUM) {
904 // Even if we do not check vdst parity, vdst operands still shall not
905 // overlap.
906 if (MRI.regsOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx]))
907 return CompOprIdx;
908 if (VOPD3) // No need to check dst parity.
909 continue;
910 }
911
912 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
913 (!AllowSameVGPR || CompOprIdx < Component::DST_NUM ||
914 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
915 return CompOprIdx;
916 }
917
918 return {};
919 }
920
921 // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
922 // by the specified component. If an operand is unused
923 // or is not a VGPR, the corresponding value is 0.
924 //
925 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
926 // for the specified component and MC operand. The callback must return 0
927 // if the operand is not a register or not a VGPR.
928 InstInfo::RegIndices
getRegIndices(unsigned CompIdx,std::function<unsigned (unsigned,unsigned)> GetRegIdx,bool VOPD3) const929 InstInfo::getRegIndices(unsigned CompIdx,
930 std::function<unsigned(unsigned, unsigned)> GetRegIdx,
931 bool VOPD3) const {
932 assert(CompIdx < COMPONENTS_NUM);
933
934 const auto &Comp = CompInfo[CompIdx];
935 InstInfo::RegIndices RegIndices;
936
937 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
938
939 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
940 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
941 RegIndices[CompOprIdx] =
942 Comp.hasRegSrcOperand(CompSrcIdx)
943 ? GetRegIdx(CompIdx,
944 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
945 : 0;
946 }
947 return RegIndices;
948 }
949
950 } // namespace VOPD
951
getVOPDInstInfo(const MCInstrDesc & OpX,const MCInstrDesc & OpY)952 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) {
953 return VOPD::InstInfo(OpX, OpY);
954 }
955
getVOPDInstInfo(unsigned VOPDOpcode,const MCInstrInfo * InstrInfo)956 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
957 const MCInstrInfo *InstrInfo) {
958 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
959 const auto &OpXDesc = InstrInfo->get(OpX);
960 const auto &OpYDesc = InstrInfo->get(OpY);
961 bool VOPD3 = InstrInfo->get(VOPDOpcode).TSFlags & SIInstrFlags::VOPD3;
962 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X, VOPD3);
963 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3);
964 return VOPD::InstInfo(OpXInfo, OpYInfo);
965 }
966
967 namespace IsaInfo {
968
AMDGPUTargetID(const MCSubtargetInfo & STI)969 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
970 : STI(STI), XnackSetting(TargetIDSetting::Any),
971 SramEccSetting(TargetIDSetting::Any) {
972 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
973 XnackSetting = TargetIDSetting::Unsupported;
974 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
975 SramEccSetting = TargetIDSetting::Unsupported;
976 }
977
setTargetIDFromFeaturesString(StringRef FS)978 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
979 // Check if xnack or sramecc is explicitly enabled or disabled. In the
980 // absence of the target features we assume we must generate code that can run
981 // in any environment.
982 SubtargetFeatures Features(FS);
983 std::optional<bool> XnackRequested;
984 std::optional<bool> SramEccRequested;
985
986 for (const std::string &Feature : Features.getFeatures()) {
987 if (Feature == "+xnack")
988 XnackRequested = true;
989 else if (Feature == "-xnack")
990 XnackRequested = false;
991 else if (Feature == "+sramecc")
992 SramEccRequested = true;
993 else if (Feature == "-sramecc")
994 SramEccRequested = false;
995 }
996
997 bool XnackSupported = isXnackSupported();
998 bool SramEccSupported = isSramEccSupported();
999
1000 if (XnackRequested) {
1001 if (XnackSupported) {
1002 XnackSetting =
1003 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1004 } else {
1005 // If a specific xnack setting was requested and this GPU does not support
1006 // xnack emit a warning. Setting will remain set to "Unsupported".
1007 if (*XnackRequested) {
1008 errs() << "warning: xnack 'On' was requested for a processor that does "
1009 "not support it!\n";
1010 } else {
1011 errs() << "warning: xnack 'Off' was requested for a processor that "
1012 "does not support it!\n";
1013 }
1014 }
1015 }
1016
1017 if (SramEccRequested) {
1018 if (SramEccSupported) {
1019 SramEccSetting =
1020 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1021 } else {
1022 // If a specific sramecc setting was requested and this GPU does not
1023 // support sramecc emit a warning. Setting will remain set to
1024 // "Unsupported".
1025 if (*SramEccRequested) {
1026 errs() << "warning: sramecc 'On' was requested for a processor that "
1027 "does not support it!\n";
1028 } else {
1029 errs() << "warning: sramecc 'Off' was requested for a processor that "
1030 "does not support it!\n";
1031 }
1032 }
1033 }
1034 }
1035
1036 static TargetIDSetting
getTargetIDSettingFromFeatureString(StringRef FeatureString)1037 getTargetIDSettingFromFeatureString(StringRef FeatureString) {
1038 if (FeatureString.ends_with("-"))
1039 return TargetIDSetting::Off;
1040 if (FeatureString.ends_with("+"))
1041 return TargetIDSetting::On;
1042
1043 llvm_unreachable("Malformed feature string");
1044 }
1045
setTargetIDFromTargetIDStream(StringRef TargetID)1046 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
1047 SmallVector<StringRef, 3> TargetIDSplit;
1048 TargetID.split(TargetIDSplit, ':');
1049
1050 for (const auto &FeatureString : TargetIDSplit) {
1051 if (FeatureString.starts_with("xnack"))
1052 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
1053 if (FeatureString.starts_with("sramecc"))
1054 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
1055 }
1056 }
1057
toString() const1058 std::string AMDGPUTargetID::toString() const {
1059 std::string StringRep;
1060 raw_string_ostream StreamRep(StringRep);
1061
1062 auto TargetTriple = STI.getTargetTriple();
1063 auto Version = getIsaVersion(STI.getCPU());
1064
1065 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()
1066 << '-' << TargetTriple.getOSName() << '-'
1067 << TargetTriple.getEnvironmentName() << '-';
1068
1069 std::string Processor;
1070 // TODO: Following else statement is present here because we used various
1071 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
1072 // Remove once all aliases are removed from GCNProcessors.td.
1073 if (Version.Major >= 9)
1074 Processor = STI.getCPU().str();
1075 else
1076 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
1077 Twine(Version.Stepping))
1078 .str();
1079
1080 std::string Features;
1081 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
1082 // sramecc.
1083 if (getSramEccSetting() == TargetIDSetting::Off)
1084 Features += ":sramecc-";
1085 else if (getSramEccSetting() == TargetIDSetting::On)
1086 Features += ":sramecc+";
1087 // xnack.
1088 if (getXnackSetting() == TargetIDSetting::Off)
1089 Features += ":xnack-";
1090 else if (getXnackSetting() == TargetIDSetting::On)
1091 Features += ":xnack+";
1092 }
1093
1094 StreamRep << Processor << Features;
1095
1096 return StringRep;
1097 }
1098
getWavefrontSize(const MCSubtargetInfo * STI)1099 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
1100 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
1101 return 16;
1102 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
1103 return 32;
1104
1105 return 64;
1106 }
1107
getLocalMemorySize(const MCSubtargetInfo * STI)1108 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
1109 unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
1110
1111 // "Per CU" really means "per whatever functional block the waves of a
1112 // workgroup must share". So the effective local memory size is doubled in
1113 // WGP mode on gfx10.
1114 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
1115 BytesPerCU *= 2;
1116
1117 return BytesPerCU;
1118 }
1119
getAddressableLocalMemorySize(const MCSubtargetInfo * STI)1120 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) {
1121 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize32768))
1122 return 32768;
1123 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize65536))
1124 return 65536;
1125 if (STI->getFeatureBits().test(FeatureAddressableLocalMemorySize163840))
1126 return 163840;
1127 return 0;
1128 }
1129
getEUsPerCU(const MCSubtargetInfo * STI)1130 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
1131 // "Per CU" really means "per whatever functional block the waves of a
1132 // workgroup must share". For gfx10 in CU mode this is the CU, which contains
1133 // two SIMDs.
1134 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
1135 return 2;
1136 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
1137 // two CUs, so a total of four SIMDs.
1138 return 4;
1139 }
1140
getMaxWorkGroupsPerCU(const MCSubtargetInfo * STI,unsigned FlatWorkGroupSize)1141 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
1142 unsigned FlatWorkGroupSize) {
1143 assert(FlatWorkGroupSize != 0);
1144 if (!STI->getTargetTriple().isAMDGCN())
1145 return 8;
1146 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
1147 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
1148 if (N == 1) {
1149 // Single-wave workgroups don't consume barrier resources.
1150 return MaxWaves;
1151 }
1152
1153 unsigned MaxBarriers = 16;
1154 if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
1155 MaxBarriers = 32;
1156
1157 return std::min(MaxWaves / N, MaxBarriers);
1158 }
1159
getMinWavesPerEU(const MCSubtargetInfo * STI)1160 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; }
1161
getMaxWavesPerEU(const MCSubtargetInfo * STI)1162 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
1163 // FIXME: Need to take scratch memory into account.
1164 if (isGFX90A(*STI))
1165 return 8;
1166 if (!isGFX10Plus(*STI))
1167 return 10;
1168 return hasGFX10_3Insts(*STI) ? 16 : 20;
1169 }
1170
getWavesPerEUForWorkGroup(const MCSubtargetInfo * STI,unsigned FlatWorkGroupSize)1171 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
1172 unsigned FlatWorkGroupSize) {
1173 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1174 getEUsPerCU(STI));
1175 }
1176
getMinFlatWorkGroupSize(const MCSubtargetInfo * STI)1177 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; }
1178
getMaxFlatWorkGroupSize(const MCSubtargetInfo * STI)1179 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
1180 // Some subtargets allow encoding 2048, but this isn't tested or supported.
1181 return 1024;
1182 }
1183
getWavesPerWorkGroup(const MCSubtargetInfo * STI,unsigned FlatWorkGroupSize)1184 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
1185 unsigned FlatWorkGroupSize) {
1186 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
1187 }
1188
getSGPRAllocGranule(const MCSubtargetInfo * STI)1189 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
1190 IsaVersion Version = getIsaVersion(STI->getCPU());
1191 if (Version.Major >= 10)
1192 return getAddressableNumSGPRs(STI);
1193 if (Version.Major >= 8)
1194 return 16;
1195 return 8;
1196 }
1197
getSGPREncodingGranule(const MCSubtargetInfo * STI)1198 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; }
1199
getTotalNumSGPRs(const MCSubtargetInfo * STI)1200 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
1201 IsaVersion Version = getIsaVersion(STI->getCPU());
1202 if (Version.Major >= 8)
1203 return 800;
1204 return 512;
1205 }
1206
getAddressableNumSGPRs(const MCSubtargetInfo * STI)1207 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
1208 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
1209 return FIXED_NUM_SGPRS_FOR_INIT_BUG;
1210
1211 IsaVersion Version = getIsaVersion(STI->getCPU());
1212 if (Version.Major >= 10)
1213 return 106;
1214 if (Version.Major >= 8)
1215 return 102;
1216 return 104;
1217 }
1218
getMinNumSGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU)1219 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1220 assert(WavesPerEU != 0);
1221
1222 IsaVersion Version = getIsaVersion(STI->getCPU());
1223 if (Version.Major >= 10)
1224 return 0;
1225
1226 if (WavesPerEU >= getMaxWavesPerEU(STI))
1227 return 0;
1228
1229 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1230 if (STI->getFeatureBits().test(FeatureTrapHandler))
1231 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1232 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
1233 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
1234 }
1235
getMaxNumSGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU,bool Addressable)1236 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1237 bool Addressable) {
1238 assert(WavesPerEU != 0);
1239
1240 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1241 IsaVersion Version = getIsaVersion(STI->getCPU());
1242 if (Version.Major >= 10)
1243 return Addressable ? AddressableNumSGPRs : 108;
1244 if (Version.Major >= 8 && !Addressable)
1245 AddressableNumSGPRs = 112;
1246 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1247 if (STI->getFeatureBits().test(FeatureTrapHandler))
1248 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
1249 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
1250 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
1251 }
1252
getNumExtraSGPRs(const MCSubtargetInfo * STI,bool VCCUsed,bool FlatScrUsed,bool XNACKUsed)1253 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1254 bool FlatScrUsed, bool XNACKUsed) {
1255 unsigned ExtraSGPRs = 0;
1256 if (VCCUsed)
1257 ExtraSGPRs = 2;
1258
1259 IsaVersion Version = getIsaVersion(STI->getCPU());
1260 if (Version.Major >= 10)
1261 return ExtraSGPRs;
1262
1263 if (Version.Major < 8) {
1264 if (FlatScrUsed)
1265 ExtraSGPRs = 4;
1266 } else {
1267 if (XNACKUsed)
1268 ExtraSGPRs = 4;
1269
1270 if (FlatScrUsed ||
1271 STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
1272 ExtraSGPRs = 6;
1273 }
1274
1275 return ExtraSGPRs;
1276 }
1277
getNumExtraSGPRs(const MCSubtargetInfo * STI,bool VCCUsed,bool FlatScrUsed)1278 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1279 bool FlatScrUsed) {
1280 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1281 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
1282 }
1283
getGranulatedNumRegisterBlocks(unsigned NumRegs,unsigned Granule)1284 static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1285 unsigned Granule) {
1286 return divideCeil(std::max(1u, NumRegs), Granule);
1287 }
1288
getNumSGPRBlocks(const MCSubtargetInfo * STI,unsigned NumSGPRs)1289 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1290 // SGPRBlocks is actual number of SGPR blocks minus 1.
1291 return getGranulatedNumRegisterBlocks(NumSGPRs, getSGPREncodingGranule(STI)) -
1292 1;
1293 }
1294
getVGPRAllocGranule(const MCSubtargetInfo * STI,unsigned DynamicVGPRBlockSize,std::optional<bool> EnableWavefrontSize32)1295 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
1296 unsigned DynamicVGPRBlockSize,
1297 std::optional<bool> EnableWavefrontSize32) {
1298 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1299 return 8;
1300
1301 if (DynamicVGPRBlockSize != 0)
1302 return DynamicVGPRBlockSize;
1303
1304 // Temporarily check the subtarget feature, until we fully switch to using
1305 // attributes.
1306 if (STI->getFeatureBits().test(FeatureDynamicVGPR))
1307 return STI->getFeatureBits().test(FeatureDynamicVGPRBlockSize32) ? 32 : 16;
1308
1309 bool IsWave32 = EnableWavefrontSize32
1310 ? *EnableWavefrontSize32
1311 : STI->getFeatureBits().test(FeatureWavefrontSize32);
1312
1313 if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1314 return IsWave32 ? 24 : 12;
1315
1316 if (hasGFX10_3Insts(*STI))
1317 return IsWave32 ? 16 : 8;
1318
1319 return IsWave32 ? 8 : 4;
1320 }
1321
getVGPREncodingGranule(const MCSubtargetInfo * STI,std::optional<bool> EnableWavefrontSize32)1322 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
1323 std::optional<bool> EnableWavefrontSize32) {
1324 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1325 return 8;
1326
1327 bool IsWave32 = EnableWavefrontSize32
1328 ? *EnableWavefrontSize32
1329 : STI->getFeatureBits().test(FeatureWavefrontSize32);
1330
1331 return IsWave32 ? 8 : 4;
1332 }
1333
getArchVGPRAllocGranule()1334 unsigned getArchVGPRAllocGranule() { return 4; }
1335
getTotalNumVGPRs(const MCSubtargetInfo * STI)1336 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1337 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1338 return 512;
1339 if (!isGFX10Plus(*STI))
1340 return 256;
1341 bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1342 if (STI->getFeatureBits().test(Feature1_5xVGPRs))
1343 return IsWave32 ? 1536 : 768;
1344 return IsWave32 ? 1024 : 512;
1345 }
1346
getAddressableNumArchVGPRs(const MCSubtargetInfo * STI)1347 unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
1348
getAddressableNumVGPRs(const MCSubtargetInfo * STI,unsigned DynamicVGPRBlockSize)1349 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
1350 unsigned DynamicVGPRBlockSize) {
1351 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1352 return 512;
1353
1354 // Temporarily check the subtarget feature, until we fully switch to using
1355 // attributes.
1356 if (DynamicVGPRBlockSize != 0 ||
1357 STI->getFeatureBits().test(FeatureDynamicVGPR))
1358 // On GFX12 we can allocate at most 8 blocks of VGPRs.
1359 return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1360 return getAddressableNumArchVGPRs(STI);
1361 }
1362
getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo * STI,unsigned NumVGPRs,unsigned DynamicVGPRBlockSize)1363 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
1364 unsigned NumVGPRs,
1365 unsigned DynamicVGPRBlockSize) {
1366 return getNumWavesPerEUWithNumVGPRs(
1367 NumVGPRs, getVGPRAllocGranule(STI, DynamicVGPRBlockSize),
1368 getMaxWavesPerEU(STI), getTotalNumVGPRs(STI));
1369 }
1370
getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs,unsigned Granule,unsigned MaxWaves,unsigned TotalNumVGPRs)1371 unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1372 unsigned MaxWaves,
1373 unsigned TotalNumVGPRs) {
1374 if (NumVGPRs < Granule)
1375 return MaxWaves;
1376 unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1377 return std::min(std::max(TotalNumVGPRs / RoundedRegs, 1u), MaxWaves);
1378 }
1379
getOccupancyWithNumSGPRs(unsigned SGPRs,unsigned MaxWaves,AMDGPUSubtarget::Generation Gen)1380 unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1381 AMDGPUSubtarget::Generation Gen) {
1382 if (Gen >= AMDGPUSubtarget::GFX10)
1383 return MaxWaves;
1384
1385 if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1386 if (SGPRs <= 80)
1387 return 10;
1388 if (SGPRs <= 88)
1389 return 9;
1390 if (SGPRs <= 100)
1391 return 8;
1392 return 7;
1393 }
1394 if (SGPRs <= 48)
1395 return 10;
1396 if (SGPRs <= 56)
1397 return 9;
1398 if (SGPRs <= 64)
1399 return 8;
1400 if (SGPRs <= 72)
1401 return 7;
1402 if (SGPRs <= 80)
1403 return 6;
1404 return 5;
1405 }
1406
getMinNumVGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU,unsigned DynamicVGPRBlockSize)1407 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1408 unsigned DynamicVGPRBlockSize) {
1409 assert(WavesPerEU != 0);
1410
1411 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1412 if (WavesPerEU >= MaxWavesPerEU)
1413 return 0;
1414
1415 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1416 unsigned AddrsableNumVGPRs =
1417 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1418 unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1419 unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1420
1421 if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1422 return 0;
1423
1424 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs,
1425 DynamicVGPRBlockSize);
1426 if (WavesPerEU < MinWavesPerEU)
1427 return getMinNumVGPRs(STI, MinWavesPerEU, DynamicVGPRBlockSize);
1428
1429 unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1430 unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1431 return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1432 }
1433
getMaxNumVGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU,unsigned DynamicVGPRBlockSize)1434 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1435 unsigned DynamicVGPRBlockSize) {
1436 assert(WavesPerEU != 0);
1437
1438 unsigned MaxNumVGPRs =
1439 alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1440 getVGPRAllocGranule(STI, DynamicVGPRBlockSize));
1441 unsigned AddressableNumVGPRs =
1442 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1443 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1444 }
1445
getEncodedNumVGPRBlocks(const MCSubtargetInfo * STI,unsigned NumVGPRs,std::optional<bool> EnableWavefrontSize32)1446 unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1447 std::optional<bool> EnableWavefrontSize32) {
1448 return getGranulatedNumRegisterBlocks(
1449 NumVGPRs, getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1450 1;
1451 }
1452
getAllocatedNumVGPRBlocks(const MCSubtargetInfo * STI,unsigned NumVGPRs,unsigned DynamicVGPRBlockSize,std::optional<bool> EnableWavefrontSize32)1453 unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
1454 unsigned NumVGPRs,
1455 unsigned DynamicVGPRBlockSize,
1456 std::optional<bool> EnableWavefrontSize32) {
1457 return getGranulatedNumRegisterBlocks(
1458 NumVGPRs,
1459 getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32));
1460 }
1461 } // end namespace IsaInfo
1462
initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT & KernelCode,const MCSubtargetInfo * STI)1463 void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
1464 const MCSubtargetInfo *STI) {
1465 IsaVersion Version = getIsaVersion(STI->getCPU());
1466 KernelCode.amd_kernel_code_version_major = 1;
1467 KernelCode.amd_kernel_code_version_minor = 2;
1468 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1469 KernelCode.amd_machine_version_major = Version.Major;
1470 KernelCode.amd_machine_version_minor = Version.Minor;
1471 KernelCode.amd_machine_version_stepping = Version.Stepping;
1472 KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
1473 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1474 KernelCode.wavefront_size = 5;
1475 KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1476 } else {
1477 KernelCode.wavefront_size = 6;
1478 }
1479
1480 // If the code object does not support indirect functions, then the value must
1481 // be 0xffffffff.
1482 KernelCode.call_convention = -1;
1483
1484 // These alignment values are specified in powers of two, so alignment =
1485 // 2^n. The minimum alignment is 2^4 = 16.
1486 KernelCode.kernarg_segment_alignment = 4;
1487 KernelCode.group_segment_alignment = 4;
1488 KernelCode.private_segment_alignment = 4;
1489
1490 if (Version.Major >= 10) {
1491 KernelCode.compute_pgm_resource_registers |=
1492 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1493 S_00B848_MEM_ORDERED(1) | S_00B848_FWD_PROGRESS(1);
1494 }
1495 }
1496
isGroupSegment(const GlobalValue * GV)1497 bool isGroupSegment(const GlobalValue *GV) {
1498 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1499 }
1500
isGlobalSegment(const GlobalValue * GV)1501 bool isGlobalSegment(const GlobalValue *GV) {
1502 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1503 }
1504
isReadOnlySegment(const GlobalValue * GV)1505 bool isReadOnlySegment(const GlobalValue *GV) {
1506 unsigned AS = GV->getAddressSpace();
1507 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1508 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1509 }
1510
shouldEmitConstantsToTextSection(const Triple & TT)1511 bool shouldEmitConstantsToTextSection(const Triple &TT) {
1512 return TT.getArch() == Triple::r600;
1513 }
1514
1515 std::pair<unsigned, unsigned>
getIntegerPairAttribute(const Function & F,StringRef Name,std::pair<unsigned,unsigned> Default,bool OnlyFirstRequired)1516 getIntegerPairAttribute(const Function &F, StringRef Name,
1517 std::pair<unsigned, unsigned> Default,
1518 bool OnlyFirstRequired) {
1519 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1520 return {Attr->first, Attr->second.value_or(Default.second)};
1521 return Default;
1522 }
1523
1524 std::optional<std::pair<unsigned, std::optional<unsigned>>>
getIntegerPairAttribute(const Function & F,StringRef Name,bool OnlyFirstRequired)1525 getIntegerPairAttribute(const Function &F, StringRef Name,
1526 bool OnlyFirstRequired) {
1527 Attribute A = F.getFnAttribute(Name);
1528 if (!A.isStringAttribute())
1529 return std::nullopt;
1530
1531 LLVMContext &Ctx = F.getContext();
1532 std::pair<unsigned, std::optional<unsigned>> Ints;
1533 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1534 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1535 Ctx.emitError("can't parse first integer attribute " + Name);
1536 return std::nullopt;
1537 }
1538 unsigned Second = 0;
1539 if (Strs.second.trim().getAsInteger(0, Second)) {
1540 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1541 Ctx.emitError("can't parse second integer attribute " + Name);
1542 return std::nullopt;
1543 }
1544 } else {
1545 Ints.second = Second;
1546 }
1547
1548 return Ints;
1549 }
1550
getIntegerVecAttribute(const Function & F,StringRef Name,unsigned Size,unsigned DefaultVal)1551 SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1552 unsigned Size,
1553 unsigned DefaultVal) {
1554 std::optional<SmallVector<unsigned>> R =
1555 getIntegerVecAttribute(F, Name, Size);
1556 return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal);
1557 }
1558
1559 std::optional<SmallVector<unsigned>>
getIntegerVecAttribute(const Function & F,StringRef Name,unsigned Size)1560 getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size) {
1561 assert(Size > 2);
1562 LLVMContext &Ctx = F.getContext();
1563
1564 Attribute A = F.getFnAttribute(Name);
1565 if (!A.isValid())
1566 return std::nullopt;
1567 if (!A.isStringAttribute()) {
1568 Ctx.emitError(Name + " is not a string attribute");
1569 return std::nullopt;
1570 }
1571
1572 SmallVector<unsigned> Vals(Size);
1573
1574 StringRef S = A.getValueAsString();
1575 unsigned i = 0;
1576 for (; !S.empty() && i < Size; i++) {
1577 std::pair<StringRef, StringRef> Strs = S.split(',');
1578 unsigned IntVal;
1579 if (Strs.first.trim().getAsInteger(0, IntVal)) {
1580 Ctx.emitError("can't parse integer attribute " + Strs.first + " in " +
1581 Name);
1582 return std::nullopt;
1583 }
1584 Vals[i] = IntVal;
1585 S = Strs.second;
1586 }
1587
1588 if (!S.empty() || i < Size) {
1589 Ctx.emitError("attribute " + Name +
1590 " has incorrect number of integers; expected " +
1591 llvm::utostr(Size));
1592 return std::nullopt;
1593 }
1594 return Vals;
1595 }
1596
getVmcntBitMask(const IsaVersion & Version)1597 unsigned getVmcntBitMask(const IsaVersion &Version) {
1598 return (1 << (getVmcntBitWidthLo(Version.Major) +
1599 getVmcntBitWidthHi(Version.Major))) -
1600 1;
1601 }
1602
getLoadcntBitMask(const IsaVersion & Version)1603 unsigned getLoadcntBitMask(const IsaVersion &Version) {
1604 return (1 << getLoadcntBitWidth(Version.Major)) - 1;
1605 }
1606
getSamplecntBitMask(const IsaVersion & Version)1607 unsigned getSamplecntBitMask(const IsaVersion &Version) {
1608 return (1 << getSamplecntBitWidth(Version.Major)) - 1;
1609 }
1610
getBvhcntBitMask(const IsaVersion & Version)1611 unsigned getBvhcntBitMask(const IsaVersion &Version) {
1612 return (1 << getBvhcntBitWidth(Version.Major)) - 1;
1613 }
1614
getExpcntBitMask(const IsaVersion & Version)1615 unsigned getExpcntBitMask(const IsaVersion &Version) {
1616 return (1 << getExpcntBitWidth(Version.Major)) - 1;
1617 }
1618
getLgkmcntBitMask(const IsaVersion & Version)1619 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1620 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1621 }
1622
getDscntBitMask(const IsaVersion & Version)1623 unsigned getDscntBitMask(const IsaVersion &Version) {
1624 return (1 << getDscntBitWidth(Version.Major)) - 1;
1625 }
1626
getKmcntBitMask(const IsaVersion & Version)1627 unsigned getKmcntBitMask(const IsaVersion &Version) {
1628 return (1 << getKmcntBitWidth(Version.Major)) - 1;
1629 }
1630
getXcntBitMask(const IsaVersion & Version)1631 unsigned getXcntBitMask(const IsaVersion &Version) {
1632 return (1 << getXcntBitWidth(Version.Major, Version.Minor)) - 1;
1633 }
1634
getStorecntBitMask(const IsaVersion & Version)1635 unsigned getStorecntBitMask(const IsaVersion &Version) {
1636 return (1 << getStorecntBitWidth(Version.Major)) - 1;
1637 }
1638
getWaitcntBitMask(const IsaVersion & Version)1639 unsigned getWaitcntBitMask(const IsaVersion &Version) {
1640 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1641 getVmcntBitWidthLo(Version.Major));
1642 unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1643 getExpcntBitWidth(Version.Major));
1644 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1645 getLgkmcntBitWidth(Version.Major));
1646 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1647 getVmcntBitWidthHi(Version.Major));
1648 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1649 }
1650
decodeVmcnt(const IsaVersion & Version,unsigned Waitcnt)1651 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1652 unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1653 getVmcntBitWidthLo(Version.Major));
1654 unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1655 getVmcntBitWidthHi(Version.Major));
1656 return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1657 }
1658
decodeExpcnt(const IsaVersion & Version,unsigned Waitcnt)1659 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1660 return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1661 getExpcntBitWidth(Version.Major));
1662 }
1663
decodeLgkmcnt(const IsaVersion & Version,unsigned Waitcnt)1664 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1665 return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1666 getLgkmcntBitWidth(Version.Major));
1667 }
1668
decodeWaitcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned & Vmcnt,unsigned & Expcnt,unsigned & Lgkmcnt)1669 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1670 unsigned &Expcnt, unsigned &Lgkmcnt) {
1671 Vmcnt = decodeVmcnt(Version, Waitcnt);
1672 Expcnt = decodeExpcnt(Version, Waitcnt);
1673 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1674 }
1675
decodeWaitcnt(const IsaVersion & Version,unsigned Encoded)1676 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1677 Waitcnt Decoded;
1678 Decoded.LoadCnt = decodeVmcnt(Version, Encoded);
1679 Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1680 Decoded.DsCnt = decodeLgkmcnt(Version, Encoded);
1681 return Decoded;
1682 }
1683
encodeVmcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Vmcnt)1684 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1685 unsigned Vmcnt) {
1686 Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1687 getVmcntBitWidthLo(Version.Major));
1688 return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1689 getVmcntBitShiftHi(Version.Major),
1690 getVmcntBitWidthHi(Version.Major));
1691 }
1692
encodeExpcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Expcnt)1693 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1694 unsigned Expcnt) {
1695 return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1696 getExpcntBitWidth(Version.Major));
1697 }
1698
encodeLgkmcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Lgkmcnt)1699 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1700 unsigned Lgkmcnt) {
1701 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1702 getLgkmcntBitWidth(Version.Major));
1703 }
1704
encodeWaitcnt(const IsaVersion & Version,unsigned Vmcnt,unsigned Expcnt,unsigned Lgkmcnt)1705 unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1706 unsigned Expcnt, unsigned Lgkmcnt) {
1707 unsigned Waitcnt = getWaitcntBitMask(Version);
1708 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1709 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1710 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1711 return Waitcnt;
1712 }
1713
encodeWaitcnt(const IsaVersion & Version,const Waitcnt & Decoded)1714 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1715 return encodeWaitcnt(Version, Decoded.LoadCnt, Decoded.ExpCnt, Decoded.DsCnt);
1716 }
1717
getCombinedCountBitMask(const IsaVersion & Version,bool IsStore)1718 static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1719 bool IsStore) {
1720 unsigned Dscnt = getBitMask(getDscntBitShift(Version.Major),
1721 getDscntBitWidth(Version.Major));
1722 if (IsStore) {
1723 unsigned Storecnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1724 getStorecntBitWidth(Version.Major));
1725 return Dscnt | Storecnt;
1726 }
1727 unsigned Loadcnt = getBitMask(getLoadcntStorecntBitShift(Version.Major),
1728 getLoadcntBitWidth(Version.Major));
1729 return Dscnt | Loadcnt;
1730 }
1731
decodeLoadcntDscnt(const IsaVersion & Version,unsigned LoadcntDscnt)1732 Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1733 Waitcnt Decoded;
1734 Decoded.LoadCnt =
1735 unpackBits(LoadcntDscnt, getLoadcntStorecntBitShift(Version.Major),
1736 getLoadcntBitWidth(Version.Major));
1737 Decoded.DsCnt = unpackBits(LoadcntDscnt, getDscntBitShift(Version.Major),
1738 getDscntBitWidth(Version.Major));
1739 return Decoded;
1740 }
1741
decodeStorecntDscnt(const IsaVersion & Version,unsigned StorecntDscnt)1742 Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1743 Waitcnt Decoded;
1744 Decoded.StoreCnt =
1745 unpackBits(StorecntDscnt, getLoadcntStorecntBitShift(Version.Major),
1746 getStorecntBitWidth(Version.Major));
1747 Decoded.DsCnt = unpackBits(StorecntDscnt, getDscntBitShift(Version.Major),
1748 getDscntBitWidth(Version.Major));
1749 return Decoded;
1750 }
1751
encodeLoadcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Loadcnt)1752 static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1753 unsigned Loadcnt) {
1754 return packBits(Loadcnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1755 getLoadcntBitWidth(Version.Major));
1756 }
1757
encodeStorecnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Storecnt)1758 static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1759 unsigned Storecnt) {
1760 return packBits(Storecnt, Waitcnt, getLoadcntStorecntBitShift(Version.Major),
1761 getStorecntBitWidth(Version.Major));
1762 }
1763
encodeDscnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Dscnt)1764 static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1765 unsigned Dscnt) {
1766 return packBits(Dscnt, Waitcnt, getDscntBitShift(Version.Major),
1767 getDscntBitWidth(Version.Major));
1768 }
1769
encodeLoadcntDscnt(const IsaVersion & Version,unsigned Loadcnt,unsigned Dscnt)1770 static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1771 unsigned Dscnt) {
1772 unsigned Waitcnt = getCombinedCountBitMask(Version, false);
1773 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1774 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1775 return Waitcnt;
1776 }
1777
encodeLoadcntDscnt(const IsaVersion & Version,const Waitcnt & Decoded)1778 unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1779 return encodeLoadcntDscnt(Version, Decoded.LoadCnt, Decoded.DsCnt);
1780 }
1781
encodeStorecntDscnt(const IsaVersion & Version,unsigned Storecnt,unsigned Dscnt)1782 static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1783 unsigned Storecnt, unsigned Dscnt) {
1784 unsigned Waitcnt = getCombinedCountBitMask(Version, true);
1785 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1786 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1787 return Waitcnt;
1788 }
1789
encodeStorecntDscnt(const IsaVersion & Version,const Waitcnt & Decoded)1790 unsigned encodeStorecntDscnt(const IsaVersion &Version,
1791 const Waitcnt &Decoded) {
1792 return encodeStorecntDscnt(Version, Decoded.StoreCnt, Decoded.DsCnt);
1793 }
1794
1795 //===----------------------------------------------------------------------===//
1796 // Custom Operand Values
1797 //===----------------------------------------------------------------------===//
1798
getDefaultCustomOperandEncoding(const CustomOperandVal * Opr,int Size,const MCSubtargetInfo & STI)1799 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
1800 int Size,
1801 const MCSubtargetInfo &STI) {
1802 unsigned Enc = 0;
1803 for (int Idx = 0; Idx < Size; ++Idx) {
1804 const auto &Op = Opr[Idx];
1805 if (Op.isSupported(STI))
1806 Enc |= Op.encode(Op.Default);
1807 }
1808 return Enc;
1809 }
1810
isSymbolicCustomOperandEncoding(const CustomOperandVal * Opr,int Size,unsigned Code,bool & HasNonDefaultVal,const MCSubtargetInfo & STI)1811 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
1812 int Size, unsigned Code,
1813 bool &HasNonDefaultVal,
1814 const MCSubtargetInfo &STI) {
1815 unsigned UsedOprMask = 0;
1816 HasNonDefaultVal = false;
1817 for (int Idx = 0; Idx < Size; ++Idx) {
1818 const auto &Op = Opr[Idx];
1819 if (!Op.isSupported(STI))
1820 continue;
1821 UsedOprMask |= Op.getMask();
1822 unsigned Val = Op.decode(Code);
1823 if (!Op.isValid(Val))
1824 return false;
1825 HasNonDefaultVal |= (Val != Op.Default);
1826 }
1827 return (Code & ~UsedOprMask) == 0;
1828 }
1829
decodeCustomOperand(const CustomOperandVal * Opr,int Size,unsigned Code,int & Idx,StringRef & Name,unsigned & Val,bool & IsDefault,const MCSubtargetInfo & STI)1830 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1831 unsigned Code, int &Idx, StringRef &Name,
1832 unsigned &Val, bool &IsDefault,
1833 const MCSubtargetInfo &STI) {
1834 while (Idx < Size) {
1835 const auto &Op = Opr[Idx++];
1836 if (Op.isSupported(STI)) {
1837 Name = Op.Name;
1838 Val = Op.decode(Code);
1839 IsDefault = (Val == Op.Default);
1840 return true;
1841 }
1842 }
1843
1844 return false;
1845 }
1846
encodeCustomOperandVal(const CustomOperandVal & Op,int64_t InputVal)1847 static int encodeCustomOperandVal(const CustomOperandVal &Op,
1848 int64_t InputVal) {
1849 if (InputVal < 0 || InputVal > Op.Max)
1850 return OPR_VAL_INVALID;
1851 return Op.encode(InputVal);
1852 }
1853
encodeCustomOperand(const CustomOperandVal * Opr,int Size,const StringRef Name,int64_t InputVal,unsigned & UsedOprMask,const MCSubtargetInfo & STI)1854 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1855 const StringRef Name, int64_t InputVal,
1856 unsigned &UsedOprMask,
1857 const MCSubtargetInfo &STI) {
1858 int InvalidId = OPR_ID_UNKNOWN;
1859 for (int Idx = 0; Idx < Size; ++Idx) {
1860 const auto &Op = Opr[Idx];
1861 if (Op.Name == Name) {
1862 if (!Op.isSupported(STI)) {
1863 InvalidId = OPR_ID_UNSUPPORTED;
1864 continue;
1865 }
1866 auto OprMask = Op.getMask();
1867 if (OprMask & UsedOprMask)
1868 return OPR_ID_DUPLICATE;
1869 UsedOprMask |= OprMask;
1870 return encodeCustomOperandVal(Op, InputVal);
1871 }
1872 }
1873 return InvalidId;
1874 }
1875
1876 //===----------------------------------------------------------------------===//
1877 // DepCtr
1878 //===----------------------------------------------------------------------===//
1879
1880 namespace DepCtr {
1881
getDefaultDepCtrEncoding(const MCSubtargetInfo & STI)1882 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
1883 static int Default = -1;
1884 if (Default == -1)
1885 Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI);
1886 return Default;
1887 }
1888
isSymbolicDepCtrEncoding(unsigned Code,bool & HasNonDefaultVal,const MCSubtargetInfo & STI)1889 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1890 const MCSubtargetInfo &STI) {
1891 return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code,
1892 HasNonDefaultVal, STI);
1893 }
1894
decodeDepCtr(unsigned Code,int & Id,StringRef & Name,unsigned & Val,bool & IsDefault,const MCSubtargetInfo & STI)1895 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1896 bool &IsDefault, const MCSubtargetInfo &STI) {
1897 return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1898 IsDefault, STI);
1899 }
1900
encodeDepCtr(const StringRef Name,int64_t Val,unsigned & UsedOprMask,const MCSubtargetInfo & STI)1901 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1902 const MCSubtargetInfo &STI) {
1903 return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1904 STI);
1905 }
1906
decodeFieldVmVsrc(unsigned Encoded)1907 unsigned decodeFieldVmVsrc(unsigned Encoded) {
1908 return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1909 }
1910
decodeFieldVaVdst(unsigned Encoded)1911 unsigned decodeFieldVaVdst(unsigned Encoded) {
1912 return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1913 }
1914
decodeFieldSaSdst(unsigned Encoded)1915 unsigned decodeFieldSaSdst(unsigned Encoded) {
1916 return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1917 }
1918
decodeFieldVaSdst(unsigned Encoded)1919 unsigned decodeFieldVaSdst(unsigned Encoded) {
1920 return unpackBits(Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
1921 }
1922
decodeFieldVaVcc(unsigned Encoded)1923 unsigned decodeFieldVaVcc(unsigned Encoded) {
1924 return unpackBits(Encoded, getVaVccBitShift(), getVaVccBitWidth());
1925 }
1926
decodeFieldVaSsrc(unsigned Encoded)1927 unsigned decodeFieldVaSsrc(unsigned Encoded) {
1928 return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
1929 }
1930
decodeFieldHoldCnt(unsigned Encoded)1931 unsigned decodeFieldHoldCnt(unsigned Encoded) {
1932 return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth());
1933 }
1934
encodeFieldVmVsrc(unsigned Encoded,unsigned VmVsrc)1935 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1936 return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1937 }
1938
encodeFieldVmVsrc(unsigned VmVsrc)1939 unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1940 return encodeFieldVmVsrc(0xffff, VmVsrc);
1941 }
1942
encodeFieldVaVdst(unsigned Encoded,unsigned VaVdst)1943 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1944 return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1945 }
1946
encodeFieldVaVdst(unsigned VaVdst)1947 unsigned encodeFieldVaVdst(unsigned VaVdst) {
1948 return encodeFieldVaVdst(0xffff, VaVdst);
1949 }
1950
encodeFieldSaSdst(unsigned Encoded,unsigned SaSdst)1951 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1952 return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1953 }
1954
encodeFieldSaSdst(unsigned SaSdst)1955 unsigned encodeFieldSaSdst(unsigned SaSdst) {
1956 return encodeFieldSaSdst(0xffff, SaSdst);
1957 }
1958
encodeFieldVaSdst(unsigned Encoded,unsigned VaSdst)1959 unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) {
1960 return packBits(VaSdst, Encoded, getVaSdstBitShift(), getVaSdstBitWidth());
1961 }
1962
encodeFieldVaSdst(unsigned VaSdst)1963 unsigned encodeFieldVaSdst(unsigned VaSdst) {
1964 return encodeFieldVaSdst(0xffff, VaSdst);
1965 }
1966
encodeFieldVaVcc(unsigned Encoded,unsigned VaVcc)1967 unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) {
1968 return packBits(VaVcc, Encoded, getVaVccBitShift(), getVaVccBitWidth());
1969 }
1970
encodeFieldVaVcc(unsigned VaVcc)1971 unsigned encodeFieldVaVcc(unsigned VaVcc) {
1972 return encodeFieldVaVcc(0xffff, VaVcc);
1973 }
1974
encodeFieldVaSsrc(unsigned Encoded,unsigned VaSsrc)1975 unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
1976 return packBits(VaSsrc, Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth());
1977 }
1978
encodeFieldVaSsrc(unsigned VaSsrc)1979 unsigned encodeFieldVaSsrc(unsigned VaSsrc) {
1980 return encodeFieldVaSsrc(0xffff, VaSsrc);
1981 }
1982
encodeFieldHoldCnt(unsigned Encoded,unsigned HoldCnt)1983 unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) {
1984 return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth());
1985 }
1986
encodeFieldHoldCnt(unsigned HoldCnt)1987 unsigned encodeFieldHoldCnt(unsigned HoldCnt) {
1988 return encodeFieldHoldCnt(0xffff, HoldCnt);
1989 }
1990
1991 } // namespace DepCtr
1992
1993 //===----------------------------------------------------------------------===//
1994 // exp tgt
1995 //===----------------------------------------------------------------------===//
1996
1997 namespace Exp {
1998
1999 struct ExpTgt {
2000 StringLiteral Name;
2001 unsigned Tgt;
2002 unsigned MaxIndex;
2003 };
2004
2005 // clang-format off
2006 static constexpr ExpTgt ExpTgtInfo[] = {
2007 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
2008 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
2009 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
2010 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
2011 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
2012 {{"dual_src_blend"},ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
2013 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
2014 };
2015 // clang-format on
2016
getTgtName(unsigned Id,StringRef & Name,int & Index)2017 bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
2018 for (const ExpTgt &Val : ExpTgtInfo) {
2019 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2020 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2021 Name = Val.Name;
2022 return true;
2023 }
2024 }
2025 return false;
2026 }
2027
getTgtId(const StringRef Name)2028 unsigned getTgtId(const StringRef Name) {
2029
2030 for (const ExpTgt &Val : ExpTgtInfo) {
2031 if (Val.MaxIndex == 0 && Name == Val.Name)
2032 return Val.Tgt;
2033
2034 if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
2035 StringRef Suffix = Name.drop_front(Val.Name.size());
2036
2037 unsigned Id;
2038 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
2039 return ET_INVALID;
2040
2041 // Disable leading zeroes
2042 if (Suffix.size() > 1 && Suffix[0] == '0')
2043 return ET_INVALID;
2044
2045 return Val.Tgt + Id;
2046 }
2047 }
2048 return ET_INVALID;
2049 }
2050
isSupportedTgtId(unsigned Id,const MCSubtargetInfo & STI)2051 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
2052 switch (Id) {
2053 case ET_NULL:
2054 return !isGFX11Plus(STI);
2055 case ET_POS4:
2056 case ET_PRIM:
2057 return isGFX10Plus(STI);
2058 case ET_DUAL_SRC_BLEND0:
2059 case ET_DUAL_SRC_BLEND1:
2060 return isGFX11Plus(STI);
2061 default:
2062 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
2063 return !isGFX11Plus(STI);
2064 return true;
2065 }
2066 }
2067
2068 } // namespace Exp
2069
2070 //===----------------------------------------------------------------------===//
2071 // MTBUF Format
2072 //===----------------------------------------------------------------------===//
2073
2074 namespace MTBUFFormat {
2075
getDfmt(const StringRef Name)2076 int64_t getDfmt(const StringRef Name) {
2077 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
2078 if (Name == DfmtSymbolic[Id])
2079 return Id;
2080 }
2081 return DFMT_UNDEF;
2082 }
2083
getDfmtName(unsigned Id)2084 StringRef getDfmtName(unsigned Id) {
2085 assert(Id <= DFMT_MAX);
2086 return DfmtSymbolic[Id];
2087 }
2088
getNfmtLookupTable(const MCSubtargetInfo & STI)2089 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
2090 if (isSI(STI) || isCI(STI))
2091 return NfmtSymbolicSICI;
2092 if (isVI(STI) || isGFX9(STI))
2093 return NfmtSymbolicVI;
2094 return NfmtSymbolicGFX10;
2095 }
2096
getNfmt(const StringRef Name,const MCSubtargetInfo & STI)2097 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
2098 const auto *lookupTable = getNfmtLookupTable(STI);
2099 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
2100 if (Name == lookupTable[Id])
2101 return Id;
2102 }
2103 return NFMT_UNDEF;
2104 }
2105
getNfmtName(unsigned Id,const MCSubtargetInfo & STI)2106 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
2107 assert(Id <= NFMT_MAX);
2108 return getNfmtLookupTable(STI)[Id];
2109 }
2110
isValidDfmtNfmt(unsigned Id,const MCSubtargetInfo & STI)2111 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2112 unsigned Dfmt;
2113 unsigned Nfmt;
2114 decodeDfmtNfmt(Id, Dfmt, Nfmt);
2115 return isValidNfmt(Nfmt, STI);
2116 }
2117
isValidNfmt(unsigned Id,const MCSubtargetInfo & STI)2118 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2119 return !getNfmtName(Id, STI).empty();
2120 }
2121
encodeDfmtNfmt(unsigned Dfmt,unsigned Nfmt)2122 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
2123 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
2124 }
2125
decodeDfmtNfmt(unsigned Format,unsigned & Dfmt,unsigned & Nfmt)2126 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
2127 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
2128 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
2129 }
2130
getUnifiedFormat(const StringRef Name,const MCSubtargetInfo & STI)2131 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
2132 if (isGFX11Plus(STI)) {
2133 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2134 if (Name == UfmtSymbolicGFX11[Id])
2135 return Id;
2136 }
2137 } else {
2138 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2139 if (Name == UfmtSymbolicGFX10[Id])
2140 return Id;
2141 }
2142 }
2143 return UFMT_UNDEF;
2144 }
2145
getUnifiedFormatName(unsigned Id,const MCSubtargetInfo & STI)2146 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
2147 if (isValidUnifiedFormat(Id, STI))
2148 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
2149 return "";
2150 }
2151
isValidUnifiedFormat(unsigned Id,const MCSubtargetInfo & STI)2152 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
2153 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
2154 }
2155
convertDfmtNfmt2Ufmt(unsigned Dfmt,unsigned Nfmt,const MCSubtargetInfo & STI)2156 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
2157 const MCSubtargetInfo &STI) {
2158 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
2159 if (isGFX11Plus(STI)) {
2160 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2161 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
2162 return Id;
2163 }
2164 } else {
2165 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2166 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
2167 return Id;
2168 }
2169 }
2170 return UFMT_UNDEF;
2171 }
2172
isValidFormatEncoding(unsigned Val,const MCSubtargetInfo & STI)2173 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
2174 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
2175 }
2176
getDefaultFormatEncoding(const MCSubtargetInfo & STI)2177 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
2178 if (isGFX10Plus(STI))
2179 return UFMT_DEFAULT;
2180 return DFMT_NFMT_DEFAULT;
2181 }
2182
2183 } // namespace MTBUFFormat
2184
2185 //===----------------------------------------------------------------------===//
2186 // SendMsg
2187 //===----------------------------------------------------------------------===//
2188
2189 namespace SendMsg {
2190
getMsgIdMask(const MCSubtargetInfo & STI)2191 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
2192 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
2193 }
2194
isValidMsgId(int64_t MsgId,const MCSubtargetInfo & STI)2195 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
2196 return (MsgId & ~(getMsgIdMask(STI))) == 0;
2197 }
2198
isValidMsgOp(int64_t MsgId,int64_t OpId,const MCSubtargetInfo & STI,bool Strict)2199 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
2200 bool Strict) {
2201 assert(isValidMsgId(MsgId, STI));
2202
2203 if (!Strict)
2204 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
2205
2206 if (msgRequiresOp(MsgId, STI)) {
2207 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
2208 return false;
2209
2210 return !getMsgOpName(MsgId, OpId, STI).empty();
2211 }
2212
2213 return OpId == OP_NONE_;
2214 }
2215
isValidMsgStream(int64_t MsgId,int64_t OpId,int64_t StreamId,const MCSubtargetInfo & STI,bool Strict)2216 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2217 const MCSubtargetInfo &STI, bool Strict) {
2218 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2219
2220 if (!Strict)
2221 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
2222
2223 if (!isGFX11Plus(STI)) {
2224 switch (MsgId) {
2225 case ID_GS_PreGFX11:
2226 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
2227 case ID_GS_DONE_PreGFX11:
2228 return (OpId == OP_GS_NOP)
2229 ? (StreamId == STREAM_ID_NONE_)
2230 : (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
2231 }
2232 }
2233 return StreamId == STREAM_ID_NONE_;
2234 }
2235
msgRequiresOp(int64_t MsgId,const MCSubtargetInfo & STI)2236 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2237 return MsgId == ID_SYSMSG ||
2238 (!isGFX11Plus(STI) &&
2239 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2240 }
2241
msgSupportsStream(int64_t MsgId,int64_t OpId,const MCSubtargetInfo & STI)2242 bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2243 const MCSubtargetInfo &STI) {
2244 return !isGFX11Plus(STI) &&
2245 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2246 OpId != OP_GS_NOP;
2247 }
2248
decodeMsg(unsigned Val,uint16_t & MsgId,uint16_t & OpId,uint16_t & StreamId,const MCSubtargetInfo & STI)2249 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2250 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2251 MsgId = Val & getMsgIdMask(STI);
2252 if (isGFX11Plus(STI)) {
2253 OpId = 0;
2254 StreamId = 0;
2255 } else {
2256 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2257 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
2258 }
2259 }
2260
encodeMsg(uint64_t MsgId,uint64_t OpId,uint64_t StreamId)2261 uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) {
2262 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2263 }
2264
2265 } // namespace SendMsg
2266
2267 //===----------------------------------------------------------------------===//
2268 //
2269 //===----------------------------------------------------------------------===//
2270
getInitialPSInputAddr(const Function & F)2271 unsigned getInitialPSInputAddr(const Function &F) {
2272 return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
2273 }
2274
getHasColorExport(const Function & F)2275 bool getHasColorExport(const Function &F) {
2276 // As a safe default always respond as if PS has color exports.
2277 return F.getFnAttributeAsParsedInteger(
2278 "amdgpu-color-export",
2279 F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2280 }
2281
getHasDepthExport(const Function & F)2282 bool getHasDepthExport(const Function &F) {
2283 return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
2284 }
2285
getDynamicVGPRBlockSize(const Function & F)2286 unsigned getDynamicVGPRBlockSize(const Function &F) {
2287 unsigned BlockSize =
2288 F.getFnAttributeAsParsedInteger("amdgpu-dynamic-vgpr-block-size", 0);
2289
2290 if (BlockSize == 16 || BlockSize == 32)
2291 return BlockSize;
2292
2293 return 0;
2294 }
2295
hasXNACK(const MCSubtargetInfo & STI)2296 bool hasXNACK(const MCSubtargetInfo &STI) {
2297 return STI.hasFeature(AMDGPU::FeatureXNACK);
2298 }
2299
hasSRAMECC(const MCSubtargetInfo & STI)2300 bool hasSRAMECC(const MCSubtargetInfo &STI) {
2301 return STI.hasFeature(AMDGPU::FeatureSRAMECC);
2302 }
2303
hasMIMG_R128(const MCSubtargetInfo & STI)2304 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
2305 return STI.hasFeature(AMDGPU::FeatureMIMG_R128) &&
2306 !STI.hasFeature(AMDGPU::FeatureR128A16);
2307 }
2308
hasA16(const MCSubtargetInfo & STI)2309 bool hasA16(const MCSubtargetInfo &STI) {
2310 return STI.hasFeature(AMDGPU::FeatureA16);
2311 }
2312
hasG16(const MCSubtargetInfo & STI)2313 bool hasG16(const MCSubtargetInfo &STI) {
2314 return STI.hasFeature(AMDGPU::FeatureG16);
2315 }
2316
hasPackedD16(const MCSubtargetInfo & STI)2317 bool hasPackedD16(const MCSubtargetInfo &STI) {
2318 return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2319 !isSI(STI);
2320 }
2321
hasGDS(const MCSubtargetInfo & STI)2322 bool hasGDS(const MCSubtargetInfo &STI) {
2323 return STI.hasFeature(AMDGPU::FeatureGDS);
2324 }
2325
getNSAMaxSize(const MCSubtargetInfo & STI,bool HasSampler)2326 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2327 auto Version = getIsaVersion(STI.getCPU());
2328 if (Version.Major == 10)
2329 return Version.Minor >= 3 ? 13 : 5;
2330 if (Version.Major == 11)
2331 return 5;
2332 if (Version.Major >= 12)
2333 return HasSampler ? 4 : 5;
2334 return 0;
2335 }
2336
getMaxNumUserSGPRs(const MCSubtargetInfo & STI)2337 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2338
isSI(const MCSubtargetInfo & STI)2339 bool isSI(const MCSubtargetInfo &STI) {
2340 return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2341 }
2342
isCI(const MCSubtargetInfo & STI)2343 bool isCI(const MCSubtargetInfo &STI) {
2344 return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2345 }
2346
isVI(const MCSubtargetInfo & STI)2347 bool isVI(const MCSubtargetInfo &STI) {
2348 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2349 }
2350
isGFX9(const MCSubtargetInfo & STI)2351 bool isGFX9(const MCSubtargetInfo &STI) {
2352 return STI.hasFeature(AMDGPU::FeatureGFX9);
2353 }
2354
isGFX9_GFX10(const MCSubtargetInfo & STI)2355 bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
2356 return isGFX9(STI) || isGFX10(STI);
2357 }
2358
isGFX9_GFX10_GFX11(const MCSubtargetInfo & STI)2359 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
2360 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2361 }
2362
isGFX8_GFX9_GFX10(const MCSubtargetInfo & STI)2363 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
2364 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2365 }
2366
isGFX8Plus(const MCSubtargetInfo & STI)2367 bool isGFX8Plus(const MCSubtargetInfo &STI) {
2368 return isVI(STI) || isGFX9Plus(STI);
2369 }
2370
isGFX9Plus(const MCSubtargetInfo & STI)2371 bool isGFX9Plus(const MCSubtargetInfo &STI) {
2372 return isGFX9(STI) || isGFX10Plus(STI);
2373 }
2374
isNotGFX9Plus(const MCSubtargetInfo & STI)2375 bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2376
isGFX10(const MCSubtargetInfo & STI)2377 bool isGFX10(const MCSubtargetInfo &STI) {
2378 return STI.hasFeature(AMDGPU::FeatureGFX10);
2379 }
2380
isGFX10_GFX11(const MCSubtargetInfo & STI)2381 bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
2382 return isGFX10(STI) || isGFX11(STI);
2383 }
2384
isGFX10Plus(const MCSubtargetInfo & STI)2385 bool isGFX10Plus(const MCSubtargetInfo &STI) {
2386 return isGFX10(STI) || isGFX11Plus(STI);
2387 }
2388
isGFX11(const MCSubtargetInfo & STI)2389 bool isGFX11(const MCSubtargetInfo &STI) {
2390 return STI.hasFeature(AMDGPU::FeatureGFX11);
2391 }
2392
isGFX11Plus(const MCSubtargetInfo & STI)2393 bool isGFX11Plus(const MCSubtargetInfo &STI) {
2394 return isGFX11(STI) || isGFX12Plus(STI);
2395 }
2396
isGFX12(const MCSubtargetInfo & STI)2397 bool isGFX12(const MCSubtargetInfo &STI) {
2398 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2399 }
2400
isGFX12Plus(const MCSubtargetInfo & STI)2401 bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2402
isNotGFX12Plus(const MCSubtargetInfo & STI)2403 bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2404
isGFX1250(const MCSubtargetInfo & STI)2405 bool isGFX1250(const MCSubtargetInfo &STI) {
2406 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
2407 }
2408
isNotGFX11Plus(const MCSubtargetInfo & STI)2409 bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); }
2410
isNotGFX10Plus(const MCSubtargetInfo & STI)2411 bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
2412 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2413 }
2414
isGFX10Before1030(const MCSubtargetInfo & STI)2415 bool isGFX10Before1030(const MCSubtargetInfo &STI) {
2416 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2417 }
2418
isGCN3Encoding(const MCSubtargetInfo & STI)2419 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
2420 return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2421 }
2422
isGFX10_AEncoding(const MCSubtargetInfo & STI)2423 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
2424 return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2425 }
2426
isGFX10_BEncoding(const MCSubtargetInfo & STI)2427 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
2428 return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2429 }
2430
hasGFX10_3Insts(const MCSubtargetInfo & STI)2431 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
2432 return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2433 }
2434
isGFX10_3_GFX11(const MCSubtargetInfo & STI)2435 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
2436 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2437 }
2438
isGFX90A(const MCSubtargetInfo & STI)2439 bool isGFX90A(const MCSubtargetInfo &STI) {
2440 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2441 }
2442
isGFX940(const MCSubtargetInfo & STI)2443 bool isGFX940(const MCSubtargetInfo &STI) {
2444 return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2445 }
2446
hasArchitectedFlatScratch(const MCSubtargetInfo & STI)2447 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
2448 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2449 }
2450
hasMAIInsts(const MCSubtargetInfo & STI)2451 bool hasMAIInsts(const MCSubtargetInfo &STI) {
2452 return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2453 }
2454
hasVOPD(const MCSubtargetInfo & STI)2455 bool hasVOPD(const MCSubtargetInfo &STI) {
2456 return STI.hasFeature(AMDGPU::FeatureVOPD);
2457 }
2458
hasDPPSrc1SGPR(const MCSubtargetInfo & STI)2459 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
2460 return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2461 }
2462
hasKernargPreload(const MCSubtargetInfo & STI)2463 unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
2464 return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2465 }
2466
getTotalNumVGPRs(bool has90AInsts,int32_t ArgNumAGPR,int32_t ArgNumVGPR)2467 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2468 int32_t ArgNumVGPR) {
2469 if (has90AInsts && ArgNumAGPR)
2470 return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2471 return std::max(ArgNumVGPR, ArgNumAGPR);
2472 }
2473
isSGPR(MCRegister Reg,const MCRegisterInfo * TRI)2474 bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI) {
2475 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2476 const MCRegister FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2477 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2478 Reg == AMDGPU::SCC;
2479 }
2480
isHi16Reg(MCRegister Reg,const MCRegisterInfo & MRI)2481 bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI) {
2482 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI16;
2483 }
2484
2485 #define MAP_REG2REG \
2486 using namespace AMDGPU; \
2487 switch (Reg.id()) { \
2488 default: \
2489 return Reg; \
2490 CASE_CI_VI(FLAT_SCR) \
2491 CASE_CI_VI(FLAT_SCR_LO) \
2492 CASE_CI_VI(FLAT_SCR_HI) \
2493 CASE_VI_GFX9PLUS(TTMP0) \
2494 CASE_VI_GFX9PLUS(TTMP1) \
2495 CASE_VI_GFX9PLUS(TTMP2) \
2496 CASE_VI_GFX9PLUS(TTMP3) \
2497 CASE_VI_GFX9PLUS(TTMP4) \
2498 CASE_VI_GFX9PLUS(TTMP5) \
2499 CASE_VI_GFX9PLUS(TTMP6) \
2500 CASE_VI_GFX9PLUS(TTMP7) \
2501 CASE_VI_GFX9PLUS(TTMP8) \
2502 CASE_VI_GFX9PLUS(TTMP9) \
2503 CASE_VI_GFX9PLUS(TTMP10) \
2504 CASE_VI_GFX9PLUS(TTMP11) \
2505 CASE_VI_GFX9PLUS(TTMP12) \
2506 CASE_VI_GFX9PLUS(TTMP13) \
2507 CASE_VI_GFX9PLUS(TTMP14) \
2508 CASE_VI_GFX9PLUS(TTMP15) \
2509 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2510 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2511 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2512 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2513 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2514 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2515 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2516 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2517 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2518 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2519 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2520 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2521 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2522 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2523 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2524 CASE_VI_GFX9PLUS( \
2525 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2526 CASE_GFXPRE11_GFX11PLUS(M0) \
2527 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2528 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2529 }
2530
2531 #define CASE_CI_VI(node) \
2532 assert(!isSI(STI)); \
2533 case node: \
2534 return isCI(STI) ? node##_ci : node##_vi;
2535
2536 #define CASE_VI_GFX9PLUS(node) \
2537 case node: \
2538 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2539
2540 #define CASE_GFXPRE11_GFX11PLUS(node) \
2541 case node: \
2542 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2543
2544 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2545 case node: \
2546 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2547
getMCReg(MCRegister Reg,const MCSubtargetInfo & STI)2548 MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI) {
2549 if (STI.getTargetTriple().getArch() == Triple::r600)
2550 return Reg;
2551 MAP_REG2REG
2552 }
2553
2554 #undef CASE_CI_VI
2555 #undef CASE_VI_GFX9PLUS
2556 #undef CASE_GFXPRE11_GFX11PLUS
2557 #undef CASE_GFXPRE11_GFX11PLUS_TO
2558
2559 #define CASE_CI_VI(node) \
2560 case node##_ci: \
2561 case node##_vi: \
2562 return node;
2563 #define CASE_VI_GFX9PLUS(node) \
2564 case node##_vi: \
2565 case node##_gfx9plus: \
2566 return node;
2567 #define CASE_GFXPRE11_GFX11PLUS(node) \
2568 case node##_gfx11plus: \
2569 case node##_gfxpre11: \
2570 return node;
2571 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2572
mc2PseudoReg(MCRegister Reg)2573 MCRegister mc2PseudoReg(MCRegister Reg) { MAP_REG2REG }
2574
isInlineValue(unsigned Reg)2575 bool isInlineValue(unsigned Reg) {
2576 switch (Reg) {
2577 case AMDGPU::SRC_SHARED_BASE_LO:
2578 case AMDGPU::SRC_SHARED_BASE:
2579 case AMDGPU::SRC_SHARED_LIMIT_LO:
2580 case AMDGPU::SRC_SHARED_LIMIT:
2581 case AMDGPU::SRC_PRIVATE_BASE_LO:
2582 case AMDGPU::SRC_PRIVATE_BASE:
2583 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2584 case AMDGPU::SRC_PRIVATE_LIMIT:
2585 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2586 return true;
2587 case AMDGPU::SRC_VCCZ:
2588 case AMDGPU::SRC_EXECZ:
2589 case AMDGPU::SRC_SCC:
2590 return true;
2591 case AMDGPU::SGPR_NULL:
2592 return true;
2593 default:
2594 return false;
2595 }
2596 }
2597
2598 #undef CASE_CI_VI
2599 #undef CASE_VI_GFX9PLUS
2600 #undef CASE_GFXPRE11_GFX11PLUS
2601 #undef CASE_GFXPRE11_GFX11PLUS_TO
2602 #undef MAP_REG2REG
2603
isSISrcOperand(const MCInstrDesc & Desc,unsigned OpNo)2604 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2605 assert(OpNo < Desc.NumOperands);
2606 unsigned OpType = Desc.operands()[OpNo].OperandType;
2607 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2608 OpType <= AMDGPU::OPERAND_SRC_LAST;
2609 }
2610
isKImmOperand(const MCInstrDesc & Desc,unsigned OpNo)2611 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2612 assert(OpNo < Desc.NumOperands);
2613 unsigned OpType = Desc.operands()[OpNo].OperandType;
2614 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2615 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2616 }
2617
isSISrcFPOperand(const MCInstrDesc & Desc,unsigned OpNo)2618 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2619 assert(OpNo < Desc.NumOperands);
2620 unsigned OpType = Desc.operands()[OpNo].OperandType;
2621 switch (OpType) {
2622 case AMDGPU::OPERAND_REG_IMM_FP32:
2623 case AMDGPU::OPERAND_REG_IMM_FP64:
2624 case AMDGPU::OPERAND_REG_IMM_FP16:
2625 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2626 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2627 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2628 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2629 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2630 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2631 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2632 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2633 return true;
2634 default:
2635 return false;
2636 }
2637 }
2638
isSISrcInlinableOperand(const MCInstrDesc & Desc,unsigned OpNo)2639 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2640 assert(OpNo < Desc.NumOperands);
2641 unsigned OpType = Desc.operands()[OpNo].OperandType;
2642 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2643 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2644 (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2645 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2646 }
2647
2648 // Avoid using MCRegisterClass::getSize, since that function will go away
2649 // (move from MC* level to Target* level). Return size in bits.
getRegBitWidth(unsigned RCID)2650 unsigned getRegBitWidth(unsigned RCID) {
2651 switch (RCID) {
2652 case AMDGPU::VGPR_16RegClassID:
2653 case AMDGPU::VGPR_16_Lo128RegClassID:
2654 case AMDGPU::SGPR_LO16RegClassID:
2655 case AMDGPU::AGPR_LO16RegClassID:
2656 return 16;
2657 case AMDGPU::SGPR_32RegClassID:
2658 case AMDGPU::VGPR_32RegClassID:
2659 case AMDGPU::VRegOrLds_32RegClassID:
2660 case AMDGPU::AGPR_32RegClassID:
2661 case AMDGPU::VS_32RegClassID:
2662 case AMDGPU::AV_32RegClassID:
2663 case AMDGPU::SReg_32RegClassID:
2664 case AMDGPU::SReg_32_XM0RegClassID:
2665 case AMDGPU::SRegOrLds_32RegClassID:
2666 return 32;
2667 case AMDGPU::SGPR_64RegClassID:
2668 case AMDGPU::VS_64RegClassID:
2669 case AMDGPU::SReg_64RegClassID:
2670 case AMDGPU::VReg_64RegClassID:
2671 case AMDGPU::AReg_64RegClassID:
2672 case AMDGPU::SReg_64_XEXECRegClassID:
2673 case AMDGPU::VReg_64_Align2RegClassID:
2674 case AMDGPU::AReg_64_Align2RegClassID:
2675 case AMDGPU::AV_64RegClassID:
2676 case AMDGPU::AV_64_Align2RegClassID:
2677 return 64;
2678 case AMDGPU::SGPR_96RegClassID:
2679 case AMDGPU::SReg_96RegClassID:
2680 case AMDGPU::VReg_96RegClassID:
2681 case AMDGPU::AReg_96RegClassID:
2682 case AMDGPU::VReg_96_Align2RegClassID:
2683 case AMDGPU::AReg_96_Align2RegClassID:
2684 case AMDGPU::AV_96RegClassID:
2685 case AMDGPU::AV_96_Align2RegClassID:
2686 return 96;
2687 case AMDGPU::SGPR_128RegClassID:
2688 case AMDGPU::SReg_128RegClassID:
2689 case AMDGPU::VReg_128RegClassID:
2690 case AMDGPU::AReg_128RegClassID:
2691 case AMDGPU::VReg_128_Align2RegClassID:
2692 case AMDGPU::AReg_128_Align2RegClassID:
2693 case AMDGPU::AV_128RegClassID:
2694 case AMDGPU::AV_128_Align2RegClassID:
2695 case AMDGPU::SReg_128_XNULLRegClassID:
2696 return 128;
2697 case AMDGPU::SGPR_160RegClassID:
2698 case AMDGPU::SReg_160RegClassID:
2699 case AMDGPU::VReg_160RegClassID:
2700 case AMDGPU::AReg_160RegClassID:
2701 case AMDGPU::VReg_160_Align2RegClassID:
2702 case AMDGPU::AReg_160_Align2RegClassID:
2703 case AMDGPU::AV_160RegClassID:
2704 case AMDGPU::AV_160_Align2RegClassID:
2705 return 160;
2706 case AMDGPU::SGPR_192RegClassID:
2707 case AMDGPU::SReg_192RegClassID:
2708 case AMDGPU::VReg_192RegClassID:
2709 case AMDGPU::AReg_192RegClassID:
2710 case AMDGPU::VReg_192_Align2RegClassID:
2711 case AMDGPU::AReg_192_Align2RegClassID:
2712 case AMDGPU::AV_192RegClassID:
2713 case AMDGPU::AV_192_Align2RegClassID:
2714 return 192;
2715 case AMDGPU::SGPR_224RegClassID:
2716 case AMDGPU::SReg_224RegClassID:
2717 case AMDGPU::VReg_224RegClassID:
2718 case AMDGPU::AReg_224RegClassID:
2719 case AMDGPU::VReg_224_Align2RegClassID:
2720 case AMDGPU::AReg_224_Align2RegClassID:
2721 case AMDGPU::AV_224RegClassID:
2722 case AMDGPU::AV_224_Align2RegClassID:
2723 return 224;
2724 case AMDGPU::SGPR_256RegClassID:
2725 case AMDGPU::SReg_256RegClassID:
2726 case AMDGPU::VReg_256RegClassID:
2727 case AMDGPU::AReg_256RegClassID:
2728 case AMDGPU::VReg_256_Align2RegClassID:
2729 case AMDGPU::AReg_256_Align2RegClassID:
2730 case AMDGPU::AV_256RegClassID:
2731 case AMDGPU::AV_256_Align2RegClassID:
2732 case AMDGPU::SReg_256_XNULLRegClassID:
2733 return 256;
2734 case AMDGPU::SGPR_288RegClassID:
2735 case AMDGPU::SReg_288RegClassID:
2736 case AMDGPU::VReg_288RegClassID:
2737 case AMDGPU::AReg_288RegClassID:
2738 case AMDGPU::VReg_288_Align2RegClassID:
2739 case AMDGPU::AReg_288_Align2RegClassID:
2740 case AMDGPU::AV_288RegClassID:
2741 case AMDGPU::AV_288_Align2RegClassID:
2742 return 288;
2743 case AMDGPU::SGPR_320RegClassID:
2744 case AMDGPU::SReg_320RegClassID:
2745 case AMDGPU::VReg_320RegClassID:
2746 case AMDGPU::AReg_320RegClassID:
2747 case AMDGPU::VReg_320_Align2RegClassID:
2748 case AMDGPU::AReg_320_Align2RegClassID:
2749 case AMDGPU::AV_320RegClassID:
2750 case AMDGPU::AV_320_Align2RegClassID:
2751 return 320;
2752 case AMDGPU::SGPR_352RegClassID:
2753 case AMDGPU::SReg_352RegClassID:
2754 case AMDGPU::VReg_352RegClassID:
2755 case AMDGPU::AReg_352RegClassID:
2756 case AMDGPU::VReg_352_Align2RegClassID:
2757 case AMDGPU::AReg_352_Align2RegClassID:
2758 case AMDGPU::AV_352RegClassID:
2759 case AMDGPU::AV_352_Align2RegClassID:
2760 return 352;
2761 case AMDGPU::SGPR_384RegClassID:
2762 case AMDGPU::SReg_384RegClassID:
2763 case AMDGPU::VReg_384RegClassID:
2764 case AMDGPU::AReg_384RegClassID:
2765 case AMDGPU::VReg_384_Align2RegClassID:
2766 case AMDGPU::AReg_384_Align2RegClassID:
2767 case AMDGPU::AV_384RegClassID:
2768 case AMDGPU::AV_384_Align2RegClassID:
2769 return 384;
2770 case AMDGPU::SGPR_512RegClassID:
2771 case AMDGPU::SReg_512RegClassID:
2772 case AMDGPU::VReg_512RegClassID:
2773 case AMDGPU::AReg_512RegClassID:
2774 case AMDGPU::VReg_512_Align2RegClassID:
2775 case AMDGPU::AReg_512_Align2RegClassID:
2776 case AMDGPU::AV_512RegClassID:
2777 case AMDGPU::AV_512_Align2RegClassID:
2778 return 512;
2779 case AMDGPU::SGPR_1024RegClassID:
2780 case AMDGPU::SReg_1024RegClassID:
2781 case AMDGPU::VReg_1024RegClassID:
2782 case AMDGPU::AReg_1024RegClassID:
2783 case AMDGPU::VReg_1024_Align2RegClassID:
2784 case AMDGPU::AReg_1024_Align2RegClassID:
2785 case AMDGPU::AV_1024RegClassID:
2786 case AMDGPU::AV_1024_Align2RegClassID:
2787 return 1024;
2788 default:
2789 llvm_unreachable("Unexpected register class");
2790 }
2791 }
2792
getRegBitWidth(const MCRegisterClass & RC)2793 unsigned getRegBitWidth(const MCRegisterClass &RC) {
2794 return getRegBitWidth(RC.getID());
2795 }
2796
getRegOperandSize(const MCRegisterInfo * MRI,const MCInstrDesc & Desc,unsigned OpNo)2797 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
2798 unsigned OpNo) {
2799 assert(OpNo < Desc.NumOperands);
2800 unsigned RCID = Desc.operands()[OpNo].RegClass;
2801 return getRegBitWidth(RCID) / 8;
2802 }
2803
isInlinableLiteral64(int64_t Literal,bool HasInv2Pi)2804 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2805 if (isInlinableIntLiteral(Literal))
2806 return true;
2807
2808 uint64_t Val = static_cast<uint64_t>(Literal);
2809 return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2810 (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2811 (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2812 (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2813 (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2814 (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2815 (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2816 (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2817 (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2818 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2819 }
2820
isInlinableLiteral32(int32_t Literal,bool HasInv2Pi)2821 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2822 if (isInlinableIntLiteral(Literal))
2823 return true;
2824
2825 // The actual type of the operand does not seem to matter as long
2826 // as the bits match one of the inline immediate values. For example:
2827 //
2828 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2829 // so it is a legal inline immediate.
2830 //
2831 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2832 // floating-point, so it is a legal inline immediate.
2833
2834 uint32_t Val = static_cast<uint32_t>(Literal);
2835 return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2836 (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2837 (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2838 (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2839 (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2840 (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2841 (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2842 (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2843 (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2844 (Val == 0x3e22f983 && HasInv2Pi);
2845 }
2846
isInlinableLiteralBF16(int16_t Literal,bool HasInv2Pi)2847 bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
2848 if (!HasInv2Pi)
2849 return false;
2850 if (isInlinableIntLiteral(Literal))
2851 return true;
2852 uint16_t Val = static_cast<uint16_t>(Literal);
2853 return Val == 0x3F00 || // 0.5
2854 Val == 0xBF00 || // -0.5
2855 Val == 0x3F80 || // 1.0
2856 Val == 0xBF80 || // -1.0
2857 Val == 0x4000 || // 2.0
2858 Val == 0xC000 || // -2.0
2859 Val == 0x4080 || // 4.0
2860 Val == 0xC080 || // -4.0
2861 Val == 0x3E22; // 1.0 / (2.0 * pi)
2862 }
2863
isInlinableLiteralI16(int32_t Literal,bool HasInv2Pi)2864 bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
2865 return isInlinableLiteral32(Literal, HasInv2Pi);
2866 }
2867
isInlinableLiteralFP16(int16_t Literal,bool HasInv2Pi)2868 bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2869 if (!HasInv2Pi)
2870 return false;
2871 if (isInlinableIntLiteral(Literal))
2872 return true;
2873 uint16_t Val = static_cast<uint16_t>(Literal);
2874 return Val == 0x3C00 || // 1.0
2875 Val == 0xBC00 || // -1.0
2876 Val == 0x3800 || // 0.5
2877 Val == 0xB800 || // -0.5
2878 Val == 0x4000 || // 2.0
2879 Val == 0xC000 || // -2.0
2880 Val == 0x4400 || // 4.0
2881 Val == 0xC400 || // -4.0
2882 Val == 0x3118; // 1/2pi
2883 }
2884
getInlineEncodingV216(bool IsFloat,uint32_t Literal)2885 std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
2886 // Unfortunately, the Instruction Set Architecture Reference Guide is
2887 // misleading about how the inline operands work for (packed) 16-bit
2888 // instructions. In a nutshell, the actual HW behavior is:
2889 //
2890 // - integer encodings (-16 .. 64) are always produced as sign-extended
2891 // 32-bit values
2892 // - float encodings are produced as:
2893 // - for F16 instructions: corresponding half-precision float values in
2894 // the LSBs, 0 in the MSBs
2895 // - for UI16 instructions: corresponding single-precision float value
2896 int32_t Signed = static_cast<int32_t>(Literal);
2897 if (Signed >= 0 && Signed <= 64)
2898 return 128 + Signed;
2899
2900 if (Signed >= -16 && Signed <= -1)
2901 return 192 + std::abs(Signed);
2902
2903 if (IsFloat) {
2904 // clang-format off
2905 switch (Literal) {
2906 case 0x3800: return 240; // 0.5
2907 case 0xB800: return 241; // -0.5
2908 case 0x3C00: return 242; // 1.0
2909 case 0xBC00: return 243; // -1.0
2910 case 0x4000: return 244; // 2.0
2911 case 0xC000: return 245; // -2.0
2912 case 0x4400: return 246; // 4.0
2913 case 0xC400: return 247; // -4.0
2914 case 0x3118: return 248; // 1.0 / (2.0 * pi)
2915 default: break;
2916 }
2917 // clang-format on
2918 } else {
2919 // clang-format off
2920 switch (Literal) {
2921 case 0x3F000000: return 240; // 0.5
2922 case 0xBF000000: return 241; // -0.5
2923 case 0x3F800000: return 242; // 1.0
2924 case 0xBF800000: return 243; // -1.0
2925 case 0x40000000: return 244; // 2.0
2926 case 0xC0000000: return 245; // -2.0
2927 case 0x40800000: return 246; // 4.0
2928 case 0xC0800000: return 247; // -4.0
2929 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
2930 default: break;
2931 }
2932 // clang-format on
2933 }
2934
2935 return {};
2936 }
2937
2938 // Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2939 // or nullopt.
getInlineEncodingV2I16(uint32_t Literal)2940 std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
2941 return getInlineEncodingV216(false, Literal);
2942 }
2943
2944 // Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
2945 // or nullopt.
getInlineEncodingV2BF16(uint32_t Literal)2946 std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
2947 int32_t Signed = static_cast<int32_t>(Literal);
2948 if (Signed >= 0 && Signed <= 64)
2949 return 128 + Signed;
2950
2951 if (Signed >= -16 && Signed <= -1)
2952 return 192 + std::abs(Signed);
2953
2954 // clang-format off
2955 switch (Literal) {
2956 case 0x3F00: return 240; // 0.5
2957 case 0xBF00: return 241; // -0.5
2958 case 0x3F80: return 242; // 1.0
2959 case 0xBF80: return 243; // -1.0
2960 case 0x4000: return 244; // 2.0
2961 case 0xC000: return 245; // -2.0
2962 case 0x4080: return 246; // 4.0
2963 case 0xC080: return 247; // -4.0
2964 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
2965 default: break;
2966 }
2967 // clang-format on
2968
2969 return std::nullopt;
2970 }
2971
2972 // Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2973 // or nullopt.
getInlineEncodingV2F16(uint32_t Literal)2974 std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
2975 return getInlineEncodingV216(true, Literal);
2976 }
2977
2978 // Whether the given literal can be inlined for a V_PK_* instruction.
isInlinableLiteralV216(uint32_t Literal,uint8_t OpType)2979 bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
2980 switch (OpType) {
2981 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2982 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2983 return getInlineEncodingV216(false, Literal).has_value();
2984 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2985 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2986 return getInlineEncodingV216(true, Literal).has_value();
2987 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2988 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2989 return isInlinableLiteralV2BF16(Literal);
2990 default:
2991 llvm_unreachable("bad packed operand type");
2992 }
2993 }
2994
2995 // Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
isInlinableLiteralV2I16(uint32_t Literal)2996 bool isInlinableLiteralV2I16(uint32_t Literal) {
2997 return getInlineEncodingV2I16(Literal).has_value();
2998 }
2999
3000 // Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
isInlinableLiteralV2BF16(uint32_t Literal)3001 bool isInlinableLiteralV2BF16(uint32_t Literal) {
3002 return getInlineEncodingV2BF16(Literal).has_value();
3003 }
3004
3005 // Whether the given literal can be inlined for a V_PK_*_F16 instruction.
isInlinableLiteralV2F16(uint32_t Literal)3006 bool isInlinableLiteralV2F16(uint32_t Literal) {
3007 return getInlineEncodingV2F16(Literal).has_value();
3008 }
3009
isValid32BitLiteral(uint64_t Val,bool IsFP64)3010 bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
3011 if (IsFP64)
3012 return !Lo_32(Val);
3013
3014 return isUInt<32>(Val) || isInt<32>(Val);
3015 }
3016
isArgPassedInSGPR(const Argument * A)3017 bool isArgPassedInSGPR(const Argument *A) {
3018 const Function *F = A->getParent();
3019
3020 // Arguments to compute shaders are never a source of divergence.
3021 CallingConv::ID CC = F->getCallingConv();
3022 switch (CC) {
3023 case CallingConv::AMDGPU_KERNEL:
3024 case CallingConv::SPIR_KERNEL:
3025 return true;
3026 case CallingConv::AMDGPU_VS:
3027 case CallingConv::AMDGPU_LS:
3028 case CallingConv::AMDGPU_HS:
3029 case CallingConv::AMDGPU_ES:
3030 case CallingConv::AMDGPU_GS:
3031 case CallingConv::AMDGPU_PS:
3032 case CallingConv::AMDGPU_CS:
3033 case CallingConv::AMDGPU_Gfx:
3034 case CallingConv::AMDGPU_CS_Chain:
3035 case CallingConv::AMDGPU_CS_ChainPreserve:
3036 // For non-compute shaders, SGPR inputs are marked with either inreg or
3037 // byval. Everything else is in VGPRs.
3038 return A->hasAttribute(Attribute::InReg) ||
3039 A->hasAttribute(Attribute::ByVal);
3040 default:
3041 // TODO: treat i1 as divergent?
3042 return A->hasAttribute(Attribute::InReg);
3043 }
3044 }
3045
isArgPassedInSGPR(const CallBase * CB,unsigned ArgNo)3046 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
3047 // Arguments to compute shaders are never a source of divergence.
3048 CallingConv::ID CC = CB->getCallingConv();
3049 switch (CC) {
3050 case CallingConv::AMDGPU_KERNEL:
3051 case CallingConv::SPIR_KERNEL:
3052 return true;
3053 case CallingConv::AMDGPU_VS:
3054 case CallingConv::AMDGPU_LS:
3055 case CallingConv::AMDGPU_HS:
3056 case CallingConv::AMDGPU_ES:
3057 case CallingConv::AMDGPU_GS:
3058 case CallingConv::AMDGPU_PS:
3059 case CallingConv::AMDGPU_CS:
3060 case CallingConv::AMDGPU_Gfx:
3061 case CallingConv::AMDGPU_CS_Chain:
3062 case CallingConv::AMDGPU_CS_ChainPreserve:
3063 // For non-compute shaders, SGPR inputs are marked with either inreg or
3064 // byval. Everything else is in VGPRs.
3065 return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
3066 CB->paramHasAttr(ArgNo, Attribute::ByVal);
3067 default:
3068 return CB->paramHasAttr(ArgNo, Attribute::InReg);
3069 }
3070 }
3071
hasSMEMByteOffset(const MCSubtargetInfo & ST)3072 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
3073 return isGCN3Encoding(ST) || isGFX10Plus(ST);
3074 }
3075
isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo & ST,int64_t EncodedOffset)3076 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
3077 int64_t EncodedOffset) {
3078 if (isGFX12Plus(ST))
3079 return isUInt<23>(EncodedOffset);
3080
3081 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
3082 : isUInt<8>(EncodedOffset);
3083 }
3084
isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo & ST,int64_t EncodedOffset,bool IsBuffer)3085 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
3086 int64_t EncodedOffset, bool IsBuffer) {
3087 if (isGFX12Plus(ST))
3088 return isInt<24>(EncodedOffset);
3089
3090 return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(EncodedOffset);
3091 }
3092
isDwordAligned(uint64_t ByteOffset)3093 static bool isDwordAligned(uint64_t ByteOffset) {
3094 return (ByteOffset & 3) == 0;
3095 }
3096
convertSMRDOffsetUnits(const MCSubtargetInfo & ST,uint64_t ByteOffset)3097 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
3098 uint64_t ByteOffset) {
3099 if (hasSMEMByteOffset(ST))
3100 return ByteOffset;
3101
3102 assert(isDwordAligned(ByteOffset));
3103 return ByteOffset >> 2;
3104 }
3105
getSMRDEncodedOffset(const MCSubtargetInfo & ST,int64_t ByteOffset,bool IsBuffer,bool HasSOffset)3106 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
3107 int64_t ByteOffset, bool IsBuffer,
3108 bool HasSOffset) {
3109 // For unbuffered smem loads, it is illegal for the Immediate Offset to be
3110 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
3111 // Handle case where SOffset is not present.
3112 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
3113 return std::nullopt;
3114
3115 if (isGFX12Plus(ST)) // 24 bit signed offsets
3116 return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3117 : std::nullopt;
3118
3119 // The signed version is always a byte offset.
3120 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
3121 assert(hasSMEMByteOffset(ST));
3122 return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
3123 : std::nullopt;
3124 }
3125
3126 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
3127 return std::nullopt;
3128
3129 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3130 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
3131 ? std::optional<int64_t>(EncodedOffset)
3132 : std::nullopt;
3133 }
3134
getSMRDEncodedLiteralOffset32(const MCSubtargetInfo & ST,int64_t ByteOffset)3135 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
3136 int64_t ByteOffset) {
3137 if (!isCI(ST) || !isDwordAligned(ByteOffset))
3138 return std::nullopt;
3139
3140 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3141 return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3142 : std::nullopt;
3143 }
3144
getNumFlatOffsetBits(const MCSubtargetInfo & ST)3145 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
3146 if (AMDGPU::isGFX10(ST))
3147 return 12;
3148
3149 if (AMDGPU::isGFX12(ST))
3150 return 24;
3151 return 13;
3152 }
3153
3154 namespace {
3155
3156 struct SourceOfDivergence {
3157 unsigned Intr;
3158 };
3159 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3160
3161 struct AlwaysUniform {
3162 unsigned Intr;
3163 };
3164 const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3165
3166 #define GET_SourcesOfDivergence_IMPL
3167 #define GET_UniformIntrinsics_IMPL
3168 #define GET_Gfx9BufferFormat_IMPL
3169 #define GET_Gfx10BufferFormat_IMPL
3170 #define GET_Gfx11PlusBufferFormat_IMPL
3171
3172 #include "AMDGPUGenSearchableTables.inc"
3173
3174 } // end anonymous namespace
3175
isIntrinsicSourceOfDivergence(unsigned IntrID)3176 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3177 return lookupSourceOfDivergence(IntrID);
3178 }
3179
isIntrinsicAlwaysUniform(unsigned IntrID)3180 bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3181 return lookupAlwaysUniform(IntrID);
3182 }
3183
getGcnBufferFormatInfo(uint8_t BitsPerComp,uint8_t NumComponents,uint8_t NumFormat,const MCSubtargetInfo & STI)3184 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
3185 uint8_t NumComponents,
3186 uint8_t NumFormat,
3187 const MCSubtargetInfo &STI) {
3188 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3189 BitsPerComp, NumComponents, NumFormat)
3190 : isGFX10(STI)
3191 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3192 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3193 }
3194
getGcnBufferFormatInfo(uint8_t Format,const MCSubtargetInfo & STI)3195 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
3196 const MCSubtargetInfo &STI) {
3197 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3198 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3199 : getGfx9BufferFormatInfo(Format);
3200 }
3201
hasAny64BitVGPROperands(const MCInstrDesc & OpDesc)3202 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
3203 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3204 int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
3205 if (Idx == -1)
3206 continue;
3207
3208 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
3209 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
3210 return true;
3211 }
3212
3213 return false;
3214 }
3215
isDPALU_DPP(const MCInstrDesc & OpDesc)3216 bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
3217 return hasAny64BitVGPROperands(OpDesc);
3218 }
3219
getLdsDwGranularity(const MCSubtargetInfo & ST)3220 unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
3221 // Currently this is 128 for all subtargets
3222 return 128;
3223 }
3224
3225 } // namespace AMDGPU
3226
operator <<(raw_ostream & OS,const AMDGPU::IsaInfo::TargetIDSetting S)3227 raw_ostream &operator<<(raw_ostream &OS,
3228 const AMDGPU::IsaInfo::TargetIDSetting S) {
3229 switch (S) {
3230 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
3231 OS << "Unsupported";
3232 break;
3233 case (AMDGPU::IsaInfo::TargetIDSetting::Any):
3234 OS << "Any";
3235 break;
3236 case (AMDGPU::IsaInfo::TargetIDSetting::Off):
3237 OS << "Off";
3238 break;
3239 case (AMDGPU::IsaInfo::TargetIDSetting::On):
3240 OS << "On";
3241 break;
3242 }
3243 return OS;
3244 }
3245
3246 } // namespace llvm
3247