xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (revision 6132212808e8dccedc9e5d85fea4390c2f38059a)
1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 
12 #include "AMDGPU.h"
13 #include "AMDKernelCodeT.h"
14 #include "SIDefines.h"
15 #include "llvm/IR/CallingConv.h"
16 #include "llvm/MC/MCInstrDesc.h"
17 #include "llvm/Support/AMDHSAKernelDescriptor.h"
18 #include "llvm/Support/Alignment.h"
19 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/TargetParser.h"
22 #include <cstdint>
23 #include <string>
24 #include <utility>
25 
26 namespace llvm {
27 
28 class Argument;
29 class Function;
30 class GCNSubtarget;
31 class GlobalValue;
32 class MCRegisterClass;
33 class MCRegisterInfo;
34 class MCSubtargetInfo;
35 class StringRef;
36 class Triple;
37 
38 namespace AMDGPU {
39 
40 struct GcnBufferFormatInfo {
41   unsigned Format;
42   unsigned BitsPerComp;
43   unsigned NumComponents;
44   unsigned NumFormat;
45   unsigned DataFormat;
46 };
47 
48 #define GET_MIMGBaseOpcode_DECL
49 #define GET_MIMGDim_DECL
50 #define GET_MIMGEncoding_DECL
51 #define GET_MIMGLZMapping_DECL
52 #define GET_MIMGMIPMapping_DECL
53 #include "AMDGPUGenSearchableTables.inc"
54 
55 namespace IsaInfo {
56 
57 enum {
58   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
59   // doesn't spill SGPRs as much as when 80 is set.
60   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
61   TRAP_NUM_SGPRS = 16
62 };
63 
64 /// Streams isa version string for given subtarget \p STI into \p Stream.
65 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
66 
67 /// \returns True if given subtarget \p STI supports code object version 3,
68 /// false otherwise.
69 bool hasCodeObjectV3(const MCSubtargetInfo *STI);
70 
71 /// \returns Wavefront size for given subtarget \p STI.
72 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
73 
74 /// \returns Local memory size in bytes for given subtarget \p STI.
75 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
76 
77 /// \returns Number of execution units per compute unit for given subtarget \p
78 /// STI.
79 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
80 
81 /// \returns Maximum number of work groups per compute unit for given subtarget
82 /// \p STI and limited by given \p FlatWorkGroupSize.
83 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
84                                unsigned FlatWorkGroupSize);
85 
86 /// \returns Minimum number of waves per execution unit for given subtarget \p
87 /// STI.
88 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
89 
90 /// \returns Maximum number of waves per execution unit for given subtarget \p
91 /// STI without any kind of limitation.
92 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
93 
94 /// \returns Number of waves per execution unit required to support the given \p
95 /// FlatWorkGroupSize.
96 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
97                                    unsigned FlatWorkGroupSize);
98 
99 /// \returns Minimum flat work group size for given subtarget \p STI.
100 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
101 
102 /// \returns Maximum flat work group size for given subtarget \p STI.
103 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
104 
105 /// \returns Number of waves per work group for given subtarget \p STI and
106 /// \p FlatWorkGroupSize.
107 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
108                               unsigned FlatWorkGroupSize);
109 
110 /// \returns SGPR allocation granularity for given subtarget \p STI.
111 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
112 
113 /// \returns SGPR encoding granularity for given subtarget \p STI.
114 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
115 
116 /// \returns Total number of SGPRs for given subtarget \p STI.
117 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
118 
119 /// \returns Addressable number of SGPRs for given subtarget \p STI.
120 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
121 
122 /// \returns Minimum number of SGPRs that meets the given number of waves per
123 /// execution unit requirement for given subtarget \p STI.
124 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
125 
126 /// \returns Maximum number of SGPRs that meets the given number of waves per
127 /// execution unit requirement for given subtarget \p STI.
128 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
129                         bool Addressable);
130 
131 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
132 /// STI when the given special registers are used.
133 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
134                           bool FlatScrUsed, bool XNACKUsed);
135 
136 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
137 /// STI when the given special registers are used. XNACK is inferred from
138 /// \p STI.
139 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
140                           bool FlatScrUsed);
141 
142 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
143 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
144 /// register counts.
145 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
146 
147 /// \returns VGPR allocation granularity for given subtarget \p STI.
148 ///
149 /// For subtargets which support it, \p EnableWavefrontSize32 should match
150 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
151 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
152                              Optional<bool> EnableWavefrontSize32 = None);
153 
154 /// \returns VGPR encoding granularity for given subtarget \p STI.
155 ///
156 /// For subtargets which support it, \p EnableWavefrontSize32 should match
157 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
158 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
159                                 Optional<bool> EnableWavefrontSize32 = None);
160 
161 /// \returns Total number of VGPRs for given subtarget \p STI.
162 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
163 
164 /// \returns Addressable number of VGPRs for given subtarget \p STI.
165 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
166 
167 /// \returns Minimum number of VGPRs that meets given number of waves per
168 /// execution unit requirement for given subtarget \p STI.
169 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
170 
171 /// \returns Maximum number of VGPRs that meets given number of waves per
172 /// execution unit requirement for given subtarget \p STI.
173 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
174 
175 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
176 /// \p NumVGPRs are used.
177 ///
178 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
179 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
180 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
181                           Optional<bool> EnableWavefrontSize32 = None);
182 
183 } // end namespace IsaInfo
184 
185 LLVM_READONLY
186 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
187 
188 LLVM_READONLY
189 int getSOPPWithRelaxation(uint16_t Opcode);
190 
191 struct MIMGBaseOpcodeInfo {
192   MIMGBaseOpcode BaseOpcode;
193   bool Store;
194   bool Atomic;
195   bool AtomicX2;
196   bool Sampler;
197   bool Gather4;
198 
199   uint8_t NumExtraArgs;
200   bool Gradients;
201   bool G16;
202   bool Coordinates;
203   bool LodOrClampOrMip;
204   bool HasD16;
205 };
206 
207 LLVM_READONLY
208 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
209 
210 struct MIMGDimInfo {
211   MIMGDim Dim;
212   uint8_t NumCoords;
213   uint8_t NumGradients;
214   bool DA;
215   uint8_t Encoding;
216   const char *AsmSuffix;
217 };
218 
219 LLVM_READONLY
220 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
221 
222 LLVM_READONLY
223 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
224 
225 LLVM_READONLY
226 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
227 
228 struct MIMGLZMappingInfo {
229   MIMGBaseOpcode L;
230   MIMGBaseOpcode LZ;
231 };
232 
233 struct MIMGMIPMappingInfo {
234   MIMGBaseOpcode MIP;
235   MIMGBaseOpcode NONMIP;
236 };
237 
238 struct MIMGG16MappingInfo {
239   MIMGBaseOpcode G;
240   MIMGBaseOpcode G16;
241 };
242 
243 LLVM_READONLY
244 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
245 
246 LLVM_READONLY
247 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
248 
249 LLVM_READONLY
250 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
251 
252 LLVM_READONLY
253 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
254                   unsigned VDataDwords, unsigned VAddrDwords);
255 
256 LLVM_READONLY
257 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
258 
259 struct MIMGInfo {
260   uint16_t Opcode;
261   uint16_t BaseOpcode;
262   uint8_t MIMGEncoding;
263   uint8_t VDataDwords;
264   uint8_t VAddrDwords;
265 };
266 
267 LLVM_READONLY
268 const MIMGInfo *getMIMGInfo(unsigned Opc);
269 
270 LLVM_READONLY
271 int getMTBUFBaseOpcode(unsigned Opc);
272 
273 LLVM_READONLY
274 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
275 
276 LLVM_READONLY
277 int getMTBUFElements(unsigned Opc);
278 
279 LLVM_READONLY
280 bool getMTBUFHasVAddr(unsigned Opc);
281 
282 LLVM_READONLY
283 bool getMTBUFHasSrsrc(unsigned Opc);
284 
285 LLVM_READONLY
286 bool getMTBUFHasSoffset(unsigned Opc);
287 
288 LLVM_READONLY
289 int getMUBUFBaseOpcode(unsigned Opc);
290 
291 LLVM_READONLY
292 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
293 
294 LLVM_READONLY
295 int getMUBUFElements(unsigned Opc);
296 
297 LLVM_READONLY
298 bool getMUBUFHasVAddr(unsigned Opc);
299 
300 LLVM_READONLY
301 bool getMUBUFHasSrsrc(unsigned Opc);
302 
303 LLVM_READONLY
304 bool getMUBUFHasSoffset(unsigned Opc);
305 
306 LLVM_READONLY
307 bool getSMEMIsBuffer(unsigned Opc);
308 
309 LLVM_READONLY
310 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
311                                                   uint8_t NumComponents,
312                                                   uint8_t NumFormat,
313                                                   const MCSubtargetInfo &STI);
314 LLVM_READONLY
315 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
316                                                   const MCSubtargetInfo &STI);
317 
318 LLVM_READONLY
319 int getMCOpcode(uint16_t Opcode, unsigned Gen);
320 
321 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
322                                const MCSubtargetInfo *STI);
323 
324 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
325     const MCSubtargetInfo *STI);
326 
327 bool isGroupSegment(const GlobalValue *GV);
328 bool isGlobalSegment(const GlobalValue *GV);
329 bool isReadOnlySegment(const GlobalValue *GV);
330 
331 /// \returns True if constants should be emitted to .text section for given
332 /// target triple \p TT, false otherwise.
333 bool shouldEmitConstantsToTextSection(const Triple &TT);
334 
335 /// \returns Integer value requested using \p F's \p Name attribute.
336 ///
337 /// \returns \p Default if attribute is not present.
338 ///
339 /// \returns \p Default and emits error if requested value cannot be converted
340 /// to integer.
341 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
342 
343 /// \returns A pair of integer values requested using \p F's \p Name attribute
344 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
345 /// is false).
346 ///
347 /// \returns \p Default if attribute is not present.
348 ///
349 /// \returns \p Default and emits error if one of the requested values cannot be
350 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
351 /// not present.
352 std::pair<int, int> getIntegerPairAttribute(const Function &F,
353                                             StringRef Name,
354                                             std::pair<int, int> Default,
355                                             bool OnlyFirstRequired = false);
356 
357 /// Represents the counter values to wait for in an s_waitcnt instruction.
358 ///
359 /// Large values (including the maximum possible integer) can be used to
360 /// represent "don't care" waits.
361 struct Waitcnt {
362   unsigned VmCnt = ~0u;
363   unsigned ExpCnt = ~0u;
364   unsigned LgkmCnt = ~0u;
365   unsigned VsCnt = ~0u;
366 
367   Waitcnt() {}
368   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
369       : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
370 
371   static Waitcnt allZero(const IsaVersion &Version) {
372     return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u);
373   }
374   static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
375 
376   bool hasWait() const {
377     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
378   }
379 
380   bool dominates(const Waitcnt &Other) const {
381     return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
382            LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
383   }
384 
385   Waitcnt combined(const Waitcnt &Other) const {
386     return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
387                    std::min(LgkmCnt, Other.LgkmCnt),
388                    std::min(VsCnt, Other.VsCnt));
389   }
390 };
391 
392 /// \returns Vmcnt bit mask for given isa \p Version.
393 unsigned getVmcntBitMask(const IsaVersion &Version);
394 
395 /// \returns Expcnt bit mask for given isa \p Version.
396 unsigned getExpcntBitMask(const IsaVersion &Version);
397 
398 /// \returns Lgkmcnt bit mask for given isa \p Version.
399 unsigned getLgkmcntBitMask(const IsaVersion &Version);
400 
401 /// \returns Waitcnt bit mask for given isa \p Version.
402 unsigned getWaitcntBitMask(const IsaVersion &Version);
403 
404 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
405 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
406 
407 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
408 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
409 
410 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
411 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
412 
413 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
414 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
415 /// \p Lgkmcnt respectively.
416 ///
417 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
418 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
419 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
420 ///     \p Expcnt = \p Waitcnt[6:4]
421 ///     \p Lgkmcnt = \p Waitcnt[11:8]                   (pre-gfx10 only)
422 ///     \p Lgkmcnt = \p Waitcnt[13:8]                   (gfx10+ only)
423 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
424                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
425 
426 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
427 
428 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
429 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
430                      unsigned Vmcnt);
431 
432 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
433 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
434                       unsigned Expcnt);
435 
436 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
437 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
438                        unsigned Lgkmcnt);
439 
440 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
441 /// \p Version.
442 ///
443 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
444 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
445 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
446 ///     Waitcnt[6:4]   = \p Expcnt
447 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10 only)
448 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10+ only)
449 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
450 ///
451 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
452 /// isa \p Version.
453 unsigned encodeWaitcnt(const IsaVersion &Version,
454                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
455 
456 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
457 
458 namespace Hwreg {
459 
460 LLVM_READONLY
461 int64_t getHwregId(const StringRef Name);
462 
463 LLVM_READNONE
464 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
465 
466 LLVM_READNONE
467 bool isValidHwreg(int64_t Id);
468 
469 LLVM_READNONE
470 bool isValidHwregOffset(int64_t Offset);
471 
472 LLVM_READNONE
473 bool isValidHwregWidth(int64_t Width);
474 
475 LLVM_READNONE
476 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
477 
478 LLVM_READNONE
479 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
480 
481 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
482 
483 } // namespace Hwreg
484 
485 namespace SendMsg {
486 
487 LLVM_READONLY
488 int64_t getMsgId(const StringRef Name);
489 
490 LLVM_READONLY
491 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
492 
493 LLVM_READNONE
494 StringRef getMsgName(int64_t MsgId);
495 
496 LLVM_READNONE
497 StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
498 
499 LLVM_READNONE
500 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
501 
502 LLVM_READNONE
503 bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true);
504 
505 LLVM_READNONE
506 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true);
507 
508 LLVM_READNONE
509 bool msgRequiresOp(int64_t MsgId);
510 
511 LLVM_READNONE
512 bool msgSupportsStream(int64_t MsgId, int64_t OpId);
513 
514 void decodeMsg(unsigned Val,
515                uint16_t &MsgId,
516                uint16_t &OpId,
517                uint16_t &StreamId);
518 
519 LLVM_READNONE
520 uint64_t encodeMsg(uint64_t MsgId,
521                    uint64_t OpId,
522                    uint64_t StreamId);
523 
524 } // namespace SendMsg
525 
526 
527 unsigned getInitialPSInputAddr(const Function &F);
528 
529 LLVM_READNONE
530 bool isShader(CallingConv::ID CC);
531 
532 LLVM_READNONE
533 bool isCompute(CallingConv::ID CC);
534 
535 LLVM_READNONE
536 bool isEntryFunctionCC(CallingConv::ID CC);
537 
538 // FIXME: Remove this when calling conventions cleaned up
539 LLVM_READNONE
540 inline bool isKernel(CallingConv::ID CC) {
541   switch (CC) {
542   case CallingConv::AMDGPU_KERNEL:
543   case CallingConv::SPIR_KERNEL:
544     return true;
545   default:
546     return false;
547   }
548 }
549 
550 bool hasXNACK(const MCSubtargetInfo &STI);
551 bool hasSRAMECC(const MCSubtargetInfo &STI);
552 bool hasMIMG_R128(const MCSubtargetInfo &STI);
553 bool hasGFX10A16(const MCSubtargetInfo &STI);
554 bool hasG16(const MCSubtargetInfo &STI);
555 bool hasPackedD16(const MCSubtargetInfo &STI);
556 
557 bool isSI(const MCSubtargetInfo &STI);
558 bool isCI(const MCSubtargetInfo &STI);
559 bool isVI(const MCSubtargetInfo &STI);
560 bool isGFX9(const MCSubtargetInfo &STI);
561 bool isGFX10(const MCSubtargetInfo &STI);
562 bool isGCN3Encoding(const MCSubtargetInfo &STI);
563 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
564 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
565 
566 /// Is Reg - scalar register
567 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
568 
569 /// Is there any intersection between registers
570 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
571 
572 /// If \p Reg is a pseudo reg, return the correct hardware register given
573 /// \p STI otherwise return \p Reg.
574 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
575 
576 /// Convert hardware register \p Reg to a pseudo register
577 LLVM_READNONE
578 unsigned mc2PseudoReg(unsigned Reg);
579 
580 /// Can this operand also contain immediate values?
581 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
582 
583 /// Is this floating-point operand?
584 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
585 
586 /// Does this opearnd support only inlinable literals?
587 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
588 
589 /// Get the size in bits of a register from the register class \p RC.
590 unsigned getRegBitWidth(unsigned RCID);
591 
592 /// Get the size in bits of a register from the register class \p RC.
593 unsigned getRegBitWidth(const MCRegisterClass &RC);
594 
595 /// Get size of register operand
596 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
597                            unsigned OpNo);
598 
599 LLVM_READNONE
600 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
601   switch (OpInfo.OperandType) {
602   case AMDGPU::OPERAND_REG_IMM_INT32:
603   case AMDGPU::OPERAND_REG_IMM_FP32:
604   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
605   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
606   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
607   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
608     return 4;
609 
610   case AMDGPU::OPERAND_REG_IMM_INT64:
611   case AMDGPU::OPERAND_REG_IMM_FP64:
612   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
613   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
614     return 8;
615 
616   case AMDGPU::OPERAND_REG_IMM_INT16:
617   case AMDGPU::OPERAND_REG_IMM_FP16:
618   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
619   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
620   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
621   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
622   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
623   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
624   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
625   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
626   case AMDGPU::OPERAND_REG_IMM_V2INT16:
627   case AMDGPU::OPERAND_REG_IMM_V2FP16:
628     return 2;
629 
630   default:
631     llvm_unreachable("unhandled operand type");
632   }
633 }
634 
635 LLVM_READNONE
636 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
637   return getOperandSize(Desc.OpInfo[OpNo]);
638 }
639 
640 /// Is this literal inlinable, and not one of the values intended for floating
641 /// point values.
642 LLVM_READNONE
643 inline bool isInlinableIntLiteral(int64_t Literal) {
644   return Literal >= -16 && Literal <= 64;
645 }
646 
647 /// Is this literal inlinable
648 LLVM_READNONE
649 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
650 
651 LLVM_READNONE
652 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
653 
654 LLVM_READNONE
655 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
656 
657 LLVM_READNONE
658 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
659 
660 LLVM_READNONE
661 bool isInlinableIntLiteralV216(int32_t Literal);
662 
663 bool isArgPassedInSGPR(const Argument *Arg);
664 
665 LLVM_READONLY
666 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
667                                       int64_t EncodedOffset);
668 
669 LLVM_READONLY
670 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
671                                     int64_t EncodedOffset,
672                                     bool IsBuffer);
673 
674 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
675 /// offsets.
676 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
677 
678 /// \returns The encoding that will be used for \p ByteOffset in the
679 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
680 /// S_LOAD instructions have a signed offset, on other subtargets it is
681 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
682 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
683                                        int64_t ByteOffset, bool IsBuffer);
684 
685 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
686 /// instruction. This is only useful on CI.s
687 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
688                                                 int64_t ByteOffset);
689 
690 /// \returns true if this offset is small enough to fit in the SMRD
691 /// offset field.  \p ByteOffset should be the offset in bytes and
692 /// not the encoded offset.
693 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
694 
695 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
696                       const GCNSubtarget *Subtarget,
697                       Align Alignment = Align(4));
698 
699 /// \returns true if the intrinsic is divergent
700 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
701 
702 // Track defaults for fields in the MODE registser.
703 struct SIModeRegisterDefaults {
704   /// Floating point opcodes that support exception flag gathering quiet and
705   /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
706   /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
707   /// quieting.
708   bool IEEE : 1;
709 
710   /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
711   /// clamp NaN to zero; otherwise, pass NaN through.
712   bool DX10Clamp : 1;
713 
714   /// If this is set, neither input or output denormals are flushed for most f32
715   /// instructions.
716   bool FP32InputDenormals : 1;
717   bool FP32OutputDenormals : 1;
718 
719   /// If this is set, neither input or output denormals are flushed for both f64
720   /// and f16/v2f16 instructions.
721   bool FP64FP16InputDenormals : 1;
722   bool FP64FP16OutputDenormals : 1;
723 
724   SIModeRegisterDefaults() :
725     IEEE(true),
726     DX10Clamp(true),
727     FP32InputDenormals(true),
728     FP32OutputDenormals(true),
729     FP64FP16InputDenormals(true),
730     FP64FP16OutputDenormals(true) {}
731 
732   SIModeRegisterDefaults(const Function &F);
733 
734   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
735     const bool IsCompute = AMDGPU::isCompute(CC);
736 
737     SIModeRegisterDefaults Mode;
738     Mode.IEEE = IsCompute;
739     return Mode;
740   }
741 
742   bool operator ==(const SIModeRegisterDefaults Other) const {
743     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
744            FP32InputDenormals == Other.FP32InputDenormals &&
745            FP32OutputDenormals == Other.FP32OutputDenormals &&
746            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
747            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
748   }
749 
750   bool allFP32Denormals() const {
751     return FP32InputDenormals && FP32OutputDenormals;
752   }
753 
754   bool allFP64FP16Denormals() const {
755     return FP64FP16InputDenormals && FP64FP16OutputDenormals;
756   }
757 
758   /// Get the encoding value for the FP_DENORM bits of the mode register for the
759   /// FP32 denormal mode.
760   uint32_t fpDenormModeSPValue() const {
761     if (FP32InputDenormals && FP32OutputDenormals)
762       return FP_DENORM_FLUSH_NONE;
763     if (FP32InputDenormals)
764       return FP_DENORM_FLUSH_OUT;
765     if (FP32OutputDenormals)
766       return FP_DENORM_FLUSH_IN;
767     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
768   }
769 
770   /// Get the encoding value for the FP_DENORM bits of the mode register for the
771   /// FP64/FP16 denormal mode.
772   uint32_t fpDenormModeDPValue() const {
773     if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
774       return FP_DENORM_FLUSH_NONE;
775     if (FP64FP16InputDenormals)
776       return FP_DENORM_FLUSH_OUT;
777     if (FP64FP16OutputDenormals)
778       return FP_DENORM_FLUSH_IN;
779     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
780   }
781 
782   /// Returns true if a flag is compatible if it's enabled in the callee, but
783   /// disabled in the caller.
784   static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
785     return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
786   }
787 
788   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
789   // be able to override.
790   bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
791     if (DX10Clamp != CalleeMode.DX10Clamp)
792       return false;
793     if (IEEE != CalleeMode.IEEE)
794       return false;
795 
796     // Allow inlining denormals enabled into denormals flushed functions.
797     return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
798            oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
799            oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
800            oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
801   }
802 };
803 
804 } // end namespace AMDGPU
805 } // end namespace llvm
806 
807 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
808