xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (revision 7fdf597e96a02165cfe22ff357b857d5fa15ed8a)
1 //===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // \file
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPUInstPrinter.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "SIDefines.h"
13 #include "Utils/AMDGPUAsmUtils.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/MC/MCExpr.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MC/MCInstrDesc.h"
18 #include "llvm/MC/MCInstrInfo.h"
19 #include "llvm/MC/MCRegisterInfo.h"
20 #include "llvm/MC/MCSubtargetInfo.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/TargetParser/TargetParser.h"
23 
24 using namespace llvm;
25 using namespace llvm::AMDGPU;
26 
27 void AMDGPUInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
28   // FIXME: The current implementation of
29   // AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this
30   // as an integer or we provide a name which represents a physical register.
31   // For CFI instructions we really want to emit a name for the DWARF register
32   // instead, because there may be multiple DWARF registers corresponding to a
33   // single physical register. One case where this problem manifests is with
34   // wave32/wave64 where using the physical register name is ambiguous: if we
35   // write e.g. `.cfi_undefined v0` we lose information about the wavefront
36   // size which we need to encode the register in the final DWARF. Ideally we
37   // would extend MC to support parsing DWARF register names so we could do
38   // something like `.cfi_undefined dwarf_wave32_v0`. For now we just live with
39   // non-pretty DWARF register names in assembly text.
40   OS << Reg.id();
41 }
42 
43 void AMDGPUInstPrinter::printInst(const MCInst *MI, uint64_t Address,
44                                   StringRef Annot, const MCSubtargetInfo &STI,
45                                   raw_ostream &OS) {
46   printInstruction(MI, Address, STI, OS);
47   printAnnotation(OS, Annot);
48 }
49 
50 void AMDGPUInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
51                                           const MCSubtargetInfo &STI,
52                                           raw_ostream &O) {
53   O << formatHex(MI->getOperand(OpNo).getImm() & 0xf);
54 }
55 
56 void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
57                                            const MCSubtargetInfo &STI,
58                                            raw_ostream &O) {
59   const MCOperand &Op = MI->getOperand(OpNo);
60   if (Op.isExpr()) {
61     Op.getExpr()->print(O, &MAI);
62     return;
63   }
64 
65   // It's possible to end up with a 32-bit literal used with a 16-bit operand
66   // with ignored high bits. Print as 32-bit anyway in that case.
67   int64_t Imm = Op.getImm();
68   if (isInt<16>(Imm) || isUInt<16>(Imm))
69     O << formatHex(static_cast<uint64_t>(Imm & 0xffff));
70   else
71     printU32ImmOperand(MI, OpNo, STI, O);
72 }
73 
74 void AMDGPUInstPrinter::printU4ImmDecOperand(const MCInst *MI, unsigned OpNo,
75                                              raw_ostream &O) {
76   O << formatDec(MI->getOperand(OpNo).getImm() & 0xf);
77 }
78 
79 void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo,
80                                              raw_ostream &O) {
81   O << formatDec(MI->getOperand(OpNo).getImm() & 0xff);
82 }
83 
84 void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
85                                               raw_ostream &O) {
86   O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
87 }
88 
89 void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
90                                            const MCSubtargetInfo &STI,
91                                            raw_ostream &O) {
92   O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
93 }
94 
95 void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
96                                       raw_ostream &O, StringRef BitName) {
97   if (MI->getOperand(OpNo).getImm()) {
98     O << ' ' << BitName;
99   }
100 }
101 
102 void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
103                                     const MCSubtargetInfo &STI,
104                                     raw_ostream &O) {
105   uint32_t Imm = MI->getOperand(OpNo).getImm();
106   if (Imm != 0) {
107     O << " offset:";
108 
109     // GFX12 uses a 24-bit signed offset for VBUFFER.
110     const MCInstrDesc &Desc = MII.get(MI->getOpcode());
111     bool IsVBuffer = Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF);
112     if (AMDGPU::isGFX12(STI) && IsVBuffer)
113       O << formatDec(SignExtend32<24>(Imm));
114     else
115       printU16ImmDecOperand(MI, OpNo, O);
116   }
117 }
118 
119 void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
120                                         const MCSubtargetInfo &STI,
121                                         raw_ostream &O) {
122   uint32_t Imm = MI->getOperand(OpNo).getImm();
123   if (Imm != 0) {
124     O << " offset:";
125 
126     const MCInstrDesc &Desc = MII.get(MI->getOpcode());
127     bool AllowNegative = (Desc.TSFlags & (SIInstrFlags::FlatGlobal |
128                                           SIInstrFlags::FlatScratch)) ||
129                          AMDGPU::isGFX12(STI);
130 
131     if (AllowNegative) // Signed offset
132       O << formatDec(SignExtend32(Imm, AMDGPU::getNumFlatOffsetBits(STI)));
133     else // Unsigned offset
134       printU16ImmDecOperand(MI, OpNo, O);
135   }
136 }
137 
138 void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo,
139                                      const MCSubtargetInfo &STI,
140                                      raw_ostream &O) {
141   if (MI->getOperand(OpNo).getImm()) {
142     O << " offset0:";
143     printU8ImmDecOperand(MI, OpNo, O);
144   }
145 }
146 
147 void AMDGPUInstPrinter::printOffset1(const MCInst *MI, unsigned OpNo,
148                                      const MCSubtargetInfo &STI,
149                                      raw_ostream &O) {
150   if (MI->getOperand(OpNo).getImm()) {
151     O << " offset1:";
152     printU8ImmDecOperand(MI, OpNo, O);
153   }
154 }
155 
156 void AMDGPUInstPrinter::printSMRDOffset8(const MCInst *MI, unsigned OpNo,
157                                         const MCSubtargetInfo &STI,
158                                         raw_ostream &O) {
159   printU32ImmOperand(MI, OpNo, STI, O);
160 }
161 
162 void AMDGPUInstPrinter::printSMEMOffset(const MCInst *MI, unsigned OpNo,
163                                         const MCSubtargetInfo &STI,
164                                         raw_ostream &O) {
165   O << formatHex(MI->getOperand(OpNo).getImm());
166 }
167 
168 void AMDGPUInstPrinter::printSMEMOffsetMod(const MCInst *MI, unsigned OpNo,
169                                            const MCSubtargetInfo &STI,
170                                            raw_ostream &O) {
171   O << " offset:";
172   printSMEMOffset(MI, OpNo, STI, O);
173 }
174 
175 void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
176                                                const MCSubtargetInfo &STI,
177                                                raw_ostream &O) {
178   printU32ImmOperand(MI, OpNo, STI, O);
179 }
180 
181 void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
182                                   const MCSubtargetInfo &STI, raw_ostream &O) {
183   auto Imm = MI->getOperand(OpNo).getImm();
184 
185   if (AMDGPU::isGFX12Plus(STI)) {
186     const int64_t TH = Imm & CPol::TH;
187     const int64_t Scope = Imm & CPol::SCOPE;
188 
189     printTH(MI, TH, Scope, O);
190     printScope(Scope, O);
191 
192     return;
193   }
194 
195   if (Imm & CPol::GLC)
196     O << ((AMDGPU::isGFX940(STI) &&
197            !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0"
198                                                                      : " glc");
199   if (Imm & CPol::SLC)
200     O << (AMDGPU::isGFX940(STI) ? " nt" : " slc");
201   if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI))
202     O << " dlc";
203   if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
204     O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
205   if (Imm & ~CPol::ALL)
206     O << " /* unexpected cache policy bit */";
207 }
208 
209 void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
210                                 raw_ostream &O) {
211   // For th = 0 do not print this field
212   if (TH == 0)
213     return;
214 
215   const unsigned Opcode = MI->getOpcode();
216   const MCInstrDesc &TID = MII.get(Opcode);
217   bool IsStore = TID.mayStore();
218   bool IsAtomic =
219       TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
220 
221   O << " th:";
222 
223   if (IsAtomic) {
224     O << "TH_ATOMIC_";
225     if (TH & AMDGPU::CPol::TH_ATOMIC_CASCADE) {
226       if (Scope >= AMDGPU::CPol::SCOPE_DEV)
227         O << "CASCADE" << (TH & AMDGPU::CPol::TH_ATOMIC_NT ? "_NT" : "_RT");
228       else
229         O << formatHex(TH);
230     } else if (TH & AMDGPU::CPol::TH_ATOMIC_NT)
231       O << "NT" << (TH & AMDGPU::CPol::TH_ATOMIC_RETURN ? "_RETURN" : "");
232     else if (TH & AMDGPU::CPol::TH_ATOMIC_RETURN)
233       O << "RETURN";
234     else
235       O << formatHex(TH);
236   } else {
237     if (!IsStore && TH == AMDGPU::CPol::TH_RESERVED)
238       O << formatHex(TH);
239     else {
240       // This will default to printing load variants when neither MayStore nor
241       // MayLoad flag is present which is the case with instructions like
242       // image_get_resinfo.
243       O << (IsStore ? "TH_STORE_" : "TH_LOAD_");
244       switch (TH) {
245       case AMDGPU::CPol::TH_NT:
246         O << "NT";
247         break;
248       case AMDGPU::CPol::TH_HT:
249         O << "HT";
250         break;
251       case AMDGPU::CPol::TH_BYPASS: // or LU or RT_WB
252         O << (Scope == AMDGPU::CPol::SCOPE_SYS ? "BYPASS"
253                                                : (IsStore ? "RT_WB" : "LU"));
254         break;
255       case AMDGPU::CPol::TH_NT_RT:
256         O << "NT_RT";
257         break;
258       case AMDGPU::CPol::TH_RT_NT:
259         O << "RT_NT";
260         break;
261       case AMDGPU::CPol::TH_NT_HT:
262         O << "NT_HT";
263         break;
264       case AMDGPU::CPol::TH_NT_WB:
265         O << "NT_WB";
266         break;
267       default:
268         llvm_unreachable("unexpected th value");
269       }
270     }
271   }
272 }
273 
274 void AMDGPUInstPrinter::printScope(int64_t Scope, raw_ostream &O) {
275   if (Scope == CPol::SCOPE_CU)
276     return;
277 
278   O << " scope:";
279 
280   if (Scope == CPol::SCOPE_SE)
281     O << "SCOPE_SE";
282   else if (Scope == CPol::SCOPE_DEV)
283     O << "SCOPE_DEV";
284   else if (Scope == CPol::SCOPE_SYS)
285     O << "SCOPE_SYS";
286   else
287     llvm_unreachable("unexpected scope policy value");
288 
289   return;
290 }
291 
292 void AMDGPUInstPrinter::printDMask(const MCInst *MI, unsigned OpNo,
293                                    const MCSubtargetInfo &STI, raw_ostream &O) {
294   if (MI->getOperand(OpNo).getImm()) {
295     O << " dmask:";
296     printU16ImmOperand(MI, OpNo, STI, O);
297   }
298 }
299 
300 void AMDGPUInstPrinter::printDim(const MCInst *MI, unsigned OpNo,
301                                  const MCSubtargetInfo &STI, raw_ostream &O) {
302   unsigned Dim = MI->getOperand(OpNo).getImm();
303   O << " dim:SQ_RSRC_IMG_";
304 
305   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
306   if (DimInfo)
307     O << DimInfo->AsmSuffix;
308   else
309     O << Dim;
310 }
311 
312 void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
313                                   const MCSubtargetInfo &STI, raw_ostream &O) {
314   if (STI.hasFeature(AMDGPU::FeatureR128A16))
315     printNamedBit(MI, OpNo, O, "a16");
316   else
317     printNamedBit(MI, OpNo, O, "r128");
318 }
319 
320 void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
321                                     const MCSubtargetInfo &STI,
322                                     raw_ostream &O) {
323 }
324 
325 void AMDGPUInstPrinter::printSymbolicFormat(const MCInst *MI,
326                                             const MCSubtargetInfo &STI,
327                                             raw_ostream &O) {
328   using namespace llvm::AMDGPU::MTBUFFormat;
329 
330   int OpNo =
331     AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::format);
332   assert(OpNo != -1);
333 
334   unsigned Val = MI->getOperand(OpNo).getImm();
335   if (AMDGPU::isGFX10Plus(STI)) {
336     if (Val == UFMT_DEFAULT)
337       return;
338     if (isValidUnifiedFormat(Val, STI)) {
339       O << " format:[" << getUnifiedFormatName(Val, STI) << ']';
340     } else {
341       O << " format:" << Val;
342     }
343   } else {
344     if (Val == DFMT_NFMT_DEFAULT)
345       return;
346     if (isValidDfmtNfmt(Val, STI)) {
347       unsigned Dfmt;
348       unsigned Nfmt;
349       decodeDfmtNfmt(Val, Dfmt, Nfmt);
350       O << " format:[";
351       if (Dfmt != DFMT_DEFAULT) {
352         O << getDfmtName(Dfmt);
353         if (Nfmt != NFMT_DEFAULT) {
354           O << ',';
355         }
356       }
357       if (Nfmt != NFMT_DEFAULT) {
358         O << getNfmtName(Nfmt, STI);
359       }
360       O << ']';
361     } else {
362       O << " format:" << Val;
363     }
364   }
365 }
366 
367 void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
368                                         const MCRegisterInfo &MRI) {
369 #if !defined(NDEBUG)
370   switch (RegNo) {
371   case AMDGPU::FP_REG:
372   case AMDGPU::SP_REG:
373   case AMDGPU::PRIVATE_RSRC_REG:
374     llvm_unreachable("pseudo-register should not ever be emitted");
375   case AMDGPU::SCC:
376     llvm_unreachable("pseudo scc should not ever be emitted");
377   default:
378     break;
379   }
380 #endif
381 
382   O << getRegisterName(RegNo);
383 }
384 
385 void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
386                                     const MCSubtargetInfo &STI, raw_ostream &O) {
387   auto Opcode = MI->getOpcode();
388   auto Flags = MII.get(Opcode).TSFlags;
389   if (OpNo == 0) {
390     if (Flags & SIInstrFlags::VOP3 && Flags & SIInstrFlags::DPP)
391       O << "_e64_dpp";
392     else if (Flags & SIInstrFlags::VOP3) {
393       if (!getVOP3IsSingle(Opcode))
394         O << "_e64";
395     } else if (Flags & SIInstrFlags::DPP)
396       O << "_dpp";
397     else if (Flags & SIInstrFlags::SDWA)
398       O << "_sdwa";
399     else if (((Flags & SIInstrFlags::VOP1) && !getVOP1IsSingle(Opcode)) ||
400              ((Flags & SIInstrFlags::VOP2) && !getVOP2IsSingle(Opcode)))
401       O << "_e32";
402     O << " ";
403   }
404 
405   printRegularOperand(MI, OpNo, STI, O);
406 
407   // Print default vcc/vcc_lo operand.
408   switch (Opcode) {
409   default: break;
410 
411   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
412   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
413   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
414   case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
415   case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
416   case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
417   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
418   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
419   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
420   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
421   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
422   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
423   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11:
424   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11:
425   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11:
426   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11:
427   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11:
428   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11:
429   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
430   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
431   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
432   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12:
433   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12:
434   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12:
435   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12:
436   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12:
437   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12:
438   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12:
439   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12:
440   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12:
441     printDefaultVccOperand(false, STI, O);
442     break;
443   }
444 }
445 
446 void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
447                                        const MCSubtargetInfo &STI, raw_ostream &O) {
448   if (AMDGPU::isSI(STI) || AMDGPU::isCI(STI))
449     O << " ";
450   else
451     O << "_e32 ";
452 
453   printRegularOperand(MI, OpNo, STI, O);
454 }
455 
456 void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
457                                             const MCSubtargetInfo &STI,
458                                             raw_ostream &O) {
459   int32_t SImm = static_cast<int32_t>(Imm);
460   if (isInlinableIntLiteral(SImm)) {
461     O << SImm;
462     return;
463   }
464 
465   if (printImmediateFloat32(Imm, STI, O))
466     return;
467 
468   O << formatHex(static_cast<uint64_t>(Imm & 0xffff));
469 }
470 
471 static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
472                                raw_ostream &O) {
473   if (Imm == 0x3C00)
474     O << "1.0";
475   else if (Imm == 0xBC00)
476     O << "-1.0";
477   else if (Imm == 0x3800)
478     O << "0.5";
479   else if (Imm == 0xB800)
480     O << "-0.5";
481   else if (Imm == 0x4000)
482     O << "2.0";
483   else if (Imm == 0xC000)
484     O << "-2.0";
485   else if (Imm == 0x4400)
486     O << "4.0";
487   else if (Imm == 0xC400)
488     O << "-4.0";
489   else if (Imm == 0x3118 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
490     O << "0.15915494";
491   else
492     return false;
493 
494   return true;
495 }
496 
497 static bool printImmediateBFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
498                                    raw_ostream &O) {
499   if (Imm == 0x3F80)
500     O << "1.0";
501   else if (Imm == 0xBF80)
502     O << "-1.0";
503   else if (Imm == 0x3F00)
504     O << "0.5";
505   else if (Imm == 0xBF00)
506     O << "-0.5";
507   else if (Imm == 0x4000)
508     O << "2.0";
509   else if (Imm == 0xC000)
510     O << "-2.0";
511   else if (Imm == 0x4080)
512     O << "4.0";
513   else if (Imm == 0xC080)
514     O << "-4.0";
515   else if (Imm == 0x3E22 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
516     O << "0.15915494";
517   else
518     return false;
519 
520   return true;
521 }
522 
523 void AMDGPUInstPrinter::printImmediateBF16(uint32_t Imm,
524                                            const MCSubtargetInfo &STI,
525                                            raw_ostream &O) {
526   int16_t SImm = static_cast<int16_t>(Imm);
527   if (isInlinableIntLiteral(SImm)) {
528     O << SImm;
529     return;
530   }
531 
532   if (printImmediateBFloat16(static_cast<uint16_t>(Imm), STI, O))
533     return;
534 
535   O << formatHex(static_cast<uint64_t>(Imm));
536 }
537 
538 void AMDGPUInstPrinter::printImmediateF16(uint32_t Imm,
539                                           const MCSubtargetInfo &STI,
540                                           raw_ostream &O) {
541   int16_t SImm = static_cast<int16_t>(Imm);
542   if (isInlinableIntLiteral(SImm)) {
543     O << SImm;
544     return;
545   }
546 
547   uint16_t HImm = static_cast<uint16_t>(Imm);
548   if (printImmediateFP16(HImm, STI, O))
549     return;
550 
551   uint64_t Imm16 = static_cast<uint16_t>(Imm);
552   O << formatHex(Imm16);
553 }
554 
555 void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
556                                            const MCSubtargetInfo &STI,
557                                            raw_ostream &O) {
558   int32_t SImm = static_cast<int32_t>(Imm);
559   if (isInlinableIntLiteral(SImm)) {
560     O << SImm;
561     return;
562   }
563 
564   switch (OpType) {
565   case AMDGPU::OPERAND_REG_IMM_V2INT16:
566   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
567   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
568     if (printImmediateFloat32(Imm, STI, O))
569       return;
570     break;
571   case AMDGPU::OPERAND_REG_IMM_V2FP16:
572   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
573   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
574     if (isUInt<16>(Imm) &&
575         printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
576       return;
577     break;
578   case AMDGPU::OPERAND_REG_IMM_V2BF16:
579   case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
580   case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
581     if (isUInt<16>(Imm) &&
582         printImmediateBFloat16(static_cast<uint16_t>(Imm), STI, O))
583       return;
584     break;
585   default:
586     llvm_unreachable("bad operand type");
587   }
588 
589   O << formatHex(static_cast<uint64_t>(Imm));
590 }
591 
592 bool AMDGPUInstPrinter::printImmediateFloat32(uint32_t Imm,
593                                               const MCSubtargetInfo &STI,
594                                               raw_ostream &O) {
595   if (Imm == llvm::bit_cast<uint32_t>(0.0f))
596     O << "0.0";
597   else if (Imm == llvm::bit_cast<uint32_t>(1.0f))
598     O << "1.0";
599   else if (Imm == llvm::bit_cast<uint32_t>(-1.0f))
600     O << "-1.0";
601   else if (Imm == llvm::bit_cast<uint32_t>(0.5f))
602     O << "0.5";
603   else if (Imm == llvm::bit_cast<uint32_t>(-0.5f))
604     O << "-0.5";
605   else if (Imm == llvm::bit_cast<uint32_t>(2.0f))
606     O << "2.0";
607   else if (Imm == llvm::bit_cast<uint32_t>(-2.0f))
608     O << "-2.0";
609   else if (Imm == llvm::bit_cast<uint32_t>(4.0f))
610     O << "4.0";
611   else if (Imm == llvm::bit_cast<uint32_t>(-4.0f))
612     O << "-4.0";
613   else if (Imm == 0x3e22f983 &&
614            STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
615     O << "0.15915494";
616   else
617     return false;
618 
619   return true;
620 }
621 
622 void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
623                                          const MCSubtargetInfo &STI,
624                                          raw_ostream &O) {
625   int32_t SImm = static_cast<int32_t>(Imm);
626   if (isInlinableIntLiteral(SImm)) {
627     O << SImm;
628     return;
629   }
630 
631   if (printImmediateFloat32(Imm, STI, O))
632     return;
633 
634   O << formatHex(static_cast<uint64_t>(Imm));
635 }
636 
637 void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
638                                          const MCSubtargetInfo &STI,
639                                          raw_ostream &O, bool IsFP) {
640   int64_t SImm = static_cast<int64_t>(Imm);
641   if (SImm >= -16 && SImm <= 64) {
642     O << SImm;
643     return;
644   }
645 
646   if (Imm == llvm::bit_cast<uint64_t>(0.0))
647     O << "0.0";
648   else if (Imm == llvm::bit_cast<uint64_t>(1.0))
649     O << "1.0";
650   else if (Imm == llvm::bit_cast<uint64_t>(-1.0))
651     O << "-1.0";
652   else if (Imm == llvm::bit_cast<uint64_t>(0.5))
653     O << "0.5";
654   else if (Imm == llvm::bit_cast<uint64_t>(-0.5))
655     O << "-0.5";
656   else if (Imm == llvm::bit_cast<uint64_t>(2.0))
657     O << "2.0";
658   else if (Imm == llvm::bit_cast<uint64_t>(-2.0))
659     O << "-2.0";
660   else if (Imm == llvm::bit_cast<uint64_t>(4.0))
661     O << "4.0";
662   else if (Imm == llvm::bit_cast<uint64_t>(-4.0))
663     O << "-4.0";
664   else if (Imm == 0x3fc45f306dc9c882 &&
665            STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
666     O << "0.15915494309189532";
667   else if (IsFP) {
668     assert(AMDGPU::isValid32BitLiteral(Imm, true));
669     O << formatHex(static_cast<uint64_t>(Hi_32(Imm)));
670   } else {
671     assert(isUInt<32>(Imm) || isInt<32>(Imm));
672 
673     // In rare situations, we will have a 32-bit literal in a 64-bit
674     // operand. This is technically allowed for the encoding of s_mov_b64.
675     O << formatHex(static_cast<uint64_t>(Imm));
676   }
677 }
678 
679 void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
680                                   const MCSubtargetInfo &STI,
681                                   raw_ostream &O) {
682   unsigned Imm = MI->getOperand(OpNo).getImm();
683   if (!Imm)
684     return;
685 
686   if (AMDGPU::isGFX940(STI)) {
687     switch (MI->getOpcode()) {
688     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
689     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
690     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
691     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
692       O << " neg:[" << (Imm & 1) << ',' << ((Imm >> 1) & 1) << ','
693         << ((Imm >> 2) & 1) << ']';
694       return;
695     }
696   }
697 
698   O << " blgp:" << Imm;
699 }
700 
701 void AMDGPUInstPrinter::printCBSZ(const MCInst *MI, unsigned OpNo,
702                                   const MCSubtargetInfo &STI,
703                                   raw_ostream &O) {
704   unsigned Imm = MI->getOperand(OpNo).getImm();
705   if (!Imm)
706     return;
707 
708   O << " cbsz:" << Imm;
709 }
710 
711 void AMDGPUInstPrinter::printABID(const MCInst *MI, unsigned OpNo,
712                                   const MCSubtargetInfo &STI,
713                                   raw_ostream &O) {
714   unsigned Imm = MI->getOperand(OpNo).getImm();
715   if (!Imm)
716     return;
717 
718   O << " abid:" << Imm;
719 }
720 
721 void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand,
722                                                const MCSubtargetInfo &STI,
723                                                raw_ostream &O) {
724   if (!FirstOperand)
725     O << ", ";
726   printRegOperand(STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
727                       ? AMDGPU::VCC
728                       : AMDGPU::VCC_LO,
729                   O, MRI);
730   if (FirstOperand)
731     O << ", ";
732 }
733 
734 void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
735                                       const MCSubtargetInfo &STI,
736                                       raw_ostream &O) {
737   O << " wait_vdst:";
738   printU4ImmDecOperand(MI, OpNo, O);
739 }
740 
741 void AMDGPUInstPrinter::printWaitVAVDst(const MCInst *MI, unsigned OpNo,
742                                         const MCSubtargetInfo &STI,
743                                         raw_ostream &O) {
744   O << " wait_va_vdst:";
745   printU4ImmDecOperand(MI, OpNo, O);
746 }
747 
748 void AMDGPUInstPrinter::printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
749                                         const MCSubtargetInfo &STI,
750                                         raw_ostream &O) {
751   O << " wait_vm_vsrc:";
752   printU4ImmDecOperand(MI, OpNo, O);
753 }
754 
755 void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
756                                     const MCSubtargetInfo &STI,
757                                     raw_ostream &O) {
758   O << " wait_exp:";
759   printU4ImmDecOperand(MI, OpNo, O);
760 }
761 
762 bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc,
763                                         unsigned OpNo) const {
764   return OpNo == 0 && (Desc.TSFlags & SIInstrFlags::DPP) &&
765          (Desc.TSFlags & SIInstrFlags::VOPC) &&
766          !isVOPCAsmOnly(Desc.getOpcode()) &&
767          (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
768           Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO));
769 }
770 
771 // Print default vcc/vcc_lo operand of VOPC.
772 void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
773                                      const MCSubtargetInfo &STI,
774                                      raw_ostream &O) {
775   unsigned Opc = MI->getOpcode();
776   const MCInstrDesc &Desc = MII.get(Opc);
777   int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
778   // 0, 1 and 2 are the first printed operands in different cases
779   // If there are printed modifiers, printOperandAndFPInputMods or
780   // printOperandAndIntInputMods will be called instead
781   if ((OpNo == 0 ||
782        (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) &&
783       (Desc.TSFlags & SIInstrFlags::VOPC) && !isVOPCAsmOnly(Desc.getOpcode()) &&
784       (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
785        Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)))
786     printDefaultVccOperand(true, STI, O);
787 
788   printRegularOperand(MI, OpNo, STI, O);
789 }
790 
791 // Print operands after vcc or modifier handling.
792 void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
793                                             const MCSubtargetInfo &STI,
794                                             raw_ostream &O) {
795   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
796 
797   if (OpNo >= MI->getNumOperands()) {
798     O << "/*Missing OP" << OpNo << "*/";
799     return;
800   }
801 
802   const MCOperand &Op = MI->getOperand(OpNo);
803   if (Op.isReg()) {
804     printRegOperand(Op.getReg(), O, MRI);
805 
806     // Check if operand register class contains register used.
807     // Intention: print disassembler message when invalid code is decoded,
808     // for example sgpr register used in VReg or VISrc(VReg or imm) operand.
809     int RCID = Desc.operands()[OpNo].RegClass;
810     if (RCID != -1) {
811       const MCRegisterClass RC = MRI.getRegClass(RCID);
812       auto Reg = mc2PseudoReg(Op.getReg());
813       if (!RC.contains(Reg) && !isInlineValue(Reg)) {
814         O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
815           << "\' register class*/";
816       }
817     }
818   } else if (Op.isImm()) {
819     const uint8_t OpTy = Desc.operands()[OpNo].OperandType;
820     switch (OpTy) {
821     case AMDGPU::OPERAND_REG_IMM_INT32:
822     case AMDGPU::OPERAND_REG_IMM_FP32:
823     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
824     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
825     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
826     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
827     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
828     case AMDGPU::OPERAND_REG_IMM_V2INT32:
829     case AMDGPU::OPERAND_REG_IMM_V2FP32:
830     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
831     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
832     case MCOI::OPERAND_IMMEDIATE:
833     case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
834       printImmediate32(Op.getImm(), STI, O);
835       break;
836     case AMDGPU::OPERAND_REG_IMM_INT64:
837     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
838       printImmediate64(Op.getImm(), STI, O, false);
839       break;
840     case AMDGPU::OPERAND_REG_IMM_FP64:
841     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
842     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
843       printImmediate64(Op.getImm(), STI, O, true);
844       break;
845     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
846     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
847     case AMDGPU::OPERAND_REG_IMM_INT16:
848       printImmediateInt16(Op.getImm(), STI, O);
849       break;
850     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
851     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
852     case AMDGPU::OPERAND_REG_IMM_FP16:
853     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
854       printImmediateF16(Op.getImm(), STI, O);
855       break;
856     case AMDGPU::OPERAND_REG_INLINE_C_BF16:
857     case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
858     case AMDGPU::OPERAND_REG_IMM_BF16:
859     case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
860       printImmediateBF16(Op.getImm(), STI, O);
861       break;
862     case AMDGPU::OPERAND_REG_IMM_V2INT16:
863     case AMDGPU::OPERAND_REG_IMM_V2BF16:
864     case AMDGPU::OPERAND_REG_IMM_V2FP16:
865     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
866     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
867     case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
868     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
869     case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
870     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
871       printImmediateV216(Op.getImm(), OpTy, STI, O);
872       break;
873     case MCOI::OPERAND_UNKNOWN:
874     case MCOI::OPERAND_PCREL:
875       O << formatDec(Op.getImm());
876       break;
877     case MCOI::OPERAND_REGISTER:
878       // Disassembler does not fail when operand should not allow immediate
879       // operands but decodes them into 32bit immediate operand.
880       printImmediate32(Op.getImm(), STI, O);
881       O << "/*Invalid immediate*/";
882       break;
883     default:
884       // We hit this for the immediate instruction bits that don't yet have a
885       // custom printer.
886       llvm_unreachable("unexpected immediate operand type");
887     }
888   } else if (Op.isDFPImm()) {
889     double Value = bit_cast<double>(Op.getDFPImm());
890     // We special case 0.0 because otherwise it will be printed as an integer.
891     if (Value == 0.0)
892       O << "0.0";
893     else {
894       const MCInstrDesc &Desc = MII.get(MI->getOpcode());
895       int RCID = Desc.operands()[OpNo].RegClass;
896       unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID));
897       if (RCBits == 32)
898         printImmediate32(llvm::bit_cast<uint32_t>((float)Value), STI, O);
899       else if (RCBits == 64)
900         printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O, true);
901       else
902         llvm_unreachable("Invalid register class size");
903     }
904   } else if (Op.isExpr()) {
905     const MCExpr *Exp = Op.getExpr();
906     Exp->print(O, &MAI);
907   } else {
908     O << "/*INV_OP*/";
909   }
910 
911   // Print default vcc/vcc_lo operand of v_cndmask_b32_e32.
912   switch (MI->getOpcode()) {
913   default: break;
914 
915   case AMDGPU::V_CNDMASK_B32_e32_gfx10:
916   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
917   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
918   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
919   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
920   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
921   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
922   case AMDGPU::V_CNDMASK_B32_dpp8_gfx10:
923   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
924   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
925   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
926   case AMDGPU::V_CNDMASK_B32_e32_gfx11:
927   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11:
928   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11:
929   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11:
930   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11:
931   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11:
932   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11:
933   case AMDGPU::V_CNDMASK_B32_dpp8_gfx11:
934   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
935   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
936   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
937   case AMDGPU::V_CNDMASK_B32_e32_gfx12:
938   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12:
939   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12:
940   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12:
941   case AMDGPU::V_CNDMASK_B32_dpp_gfx12:
942   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12:
943   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12:
944   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12:
945   case AMDGPU::V_CNDMASK_B32_dpp8_gfx12:
946   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12:
947   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12:
948   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12:
949 
950   case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7:
951   case AMDGPU::V_CNDMASK_B32_e32_vi:
952     if ((int)OpNo == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
953                                                 AMDGPU::OpName::src1))
954       printDefaultVccOperand(OpNo == 0, STI, O);
955     break;
956   }
957 
958   if (Desc.TSFlags & SIInstrFlags::MTBUF) {
959     int SOffsetIdx =
960       AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::soffset);
961     assert(SOffsetIdx != -1);
962     if ((int)OpNo == SOffsetIdx)
963       printSymbolicFormat(MI, STI, O);
964   }
965 }
966 
967 void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
968                                                    unsigned OpNo,
969                                                    const MCSubtargetInfo &STI,
970                                                    raw_ostream &O) {
971   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
972   if (needsImpliedVcc(Desc, OpNo))
973     printDefaultVccOperand(true, STI, O);
974 
975   unsigned InputModifiers = MI->getOperand(OpNo).getImm();
976 
977   // Use 'neg(...)' instead of '-' to avoid ambiguity.
978   // This is important for integer literals because
979   // -1 is not the same value as neg(1).
980   bool NegMnemo = false;
981 
982   if (InputModifiers & SISrcMods::NEG) {
983     if (OpNo + 1 < MI->getNumOperands() &&
984         (InputModifiers & SISrcMods::ABS) == 0) {
985       const MCOperand &Op = MI->getOperand(OpNo + 1);
986       NegMnemo = Op.isImm() || Op.isDFPImm();
987     }
988     if (NegMnemo) {
989       O << "neg(";
990     } else {
991       O << '-';
992     }
993   }
994 
995   if (InputModifiers & SISrcMods::ABS)
996     O << '|';
997   printRegularOperand(MI, OpNo + 1, STI, O);
998   if (InputModifiers & SISrcMods::ABS)
999     O << '|';
1000 
1001   if (NegMnemo) {
1002     O << ')';
1003   }
1004 
1005   // Print default vcc/vcc_lo operand of VOP2b.
1006   switch (MI->getOpcode()) {
1007   default:
1008     break;
1009 
1010   case AMDGPU::V_CNDMASK_B32_sdwa_gfx10:
1011   case AMDGPU::V_CNDMASK_B32_dpp_gfx10:
1012   case AMDGPU::V_CNDMASK_B32_dpp_gfx11:
1013     if ((int)OpNo + 1 ==
1014         AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::src1))
1015       printDefaultVccOperand(OpNo == 0, STI, O);
1016     break;
1017   }
1018 }
1019 
1020 void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
1021                                                     unsigned OpNo,
1022                                                     const MCSubtargetInfo &STI,
1023                                                     raw_ostream &O) {
1024   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
1025   if (needsImpliedVcc(Desc, OpNo))
1026     printDefaultVccOperand(true, STI, O);
1027 
1028   unsigned InputModifiers = MI->getOperand(OpNo).getImm();
1029   if (InputModifiers & SISrcMods::SEXT)
1030     O << "sext(";
1031   printRegularOperand(MI, OpNo + 1, STI, O);
1032   if (InputModifiers & SISrcMods::SEXT)
1033     O << ')';
1034 
1035   // Print default vcc/vcc_lo operand of VOP2b.
1036   switch (MI->getOpcode()) {
1037   default: break;
1038 
1039   case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
1040   case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
1041   case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
1042     if ((int)OpNo + 1 == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1043                                                     AMDGPU::OpName::src1))
1044       printDefaultVccOperand(OpNo == 0, STI, O);
1045     break;
1046   }
1047 }
1048 
1049 void AMDGPUInstPrinter::printDPP8(const MCInst *MI, unsigned OpNo,
1050                                   const MCSubtargetInfo &STI,
1051                                   raw_ostream &O) {
1052   if (!AMDGPU::isGFX10Plus(STI))
1053     llvm_unreachable("dpp8 is not supported on ASICs earlier than GFX10");
1054 
1055   unsigned Imm = MI->getOperand(OpNo).getImm();
1056   O << "dpp8:[" << formatDec(Imm & 0x7);
1057   for (size_t i = 1; i < 8; ++i) {
1058     O << ',' << formatDec((Imm >> (3 * i)) & 0x7);
1059   }
1060   O << ']';
1061 }
1062 
1063 void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
1064                                      const MCSubtargetInfo &STI,
1065                                      raw_ostream &O) {
1066   using namespace AMDGPU::DPP;
1067 
1068   unsigned Imm = MI->getOperand(OpNo).getImm();
1069   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
1070 
1071   if (!AMDGPU::isLegalDPALU_DPPControl(Imm) && AMDGPU::isDPALU_DPP(Desc)) {
1072     O << " /* DP ALU dpp only supports row_newbcast */";
1073     return;
1074   }
1075   if (Imm <= DppCtrl::QUAD_PERM_LAST) {
1076     O << "quad_perm:[";
1077     O << formatDec(Imm & 0x3)         << ',';
1078     O << formatDec((Imm & 0xc)  >> 2) << ',';
1079     O << formatDec((Imm & 0x30) >> 4) << ',';
1080     O << formatDec((Imm & 0xc0) >> 6) << ']';
1081   } else if ((Imm >= DppCtrl::ROW_SHL_FIRST) &&
1082              (Imm <= DppCtrl::ROW_SHL_LAST)) {
1083     O << "row_shl:";
1084     printU4ImmDecOperand(MI, OpNo, O);
1085   } else if ((Imm >= DppCtrl::ROW_SHR_FIRST) &&
1086              (Imm <= DppCtrl::ROW_SHR_LAST)) {
1087     O << "row_shr:";
1088     printU4ImmDecOperand(MI, OpNo, O);
1089   } else if ((Imm >= DppCtrl::ROW_ROR_FIRST) &&
1090              (Imm <= DppCtrl::ROW_ROR_LAST)) {
1091     O << "row_ror:";
1092     printU4ImmDecOperand(MI, OpNo, O);
1093   } else if (Imm == DppCtrl::WAVE_SHL1) {
1094     if (AMDGPU::isGFX10Plus(STI)) {
1095       O << "/* wave_shl is not supported starting from GFX10 */";
1096       return;
1097     }
1098     O << "wave_shl:1";
1099   } else if (Imm == DppCtrl::WAVE_ROL1) {
1100     if (AMDGPU::isGFX10Plus(STI)) {
1101       O << "/* wave_rol is not supported starting from GFX10 */";
1102       return;
1103     }
1104     O << "wave_rol:1";
1105   } else if (Imm == DppCtrl::WAVE_SHR1) {
1106     if (AMDGPU::isGFX10Plus(STI)) {
1107       O << "/* wave_shr is not supported starting from GFX10 */";
1108       return;
1109     }
1110     O << "wave_shr:1";
1111   } else if (Imm == DppCtrl::WAVE_ROR1) {
1112     if (AMDGPU::isGFX10Plus(STI)) {
1113       O << "/* wave_ror is not supported starting from GFX10 */";
1114       return;
1115     }
1116     O << "wave_ror:1";
1117   } else if (Imm == DppCtrl::ROW_MIRROR) {
1118     O << "row_mirror";
1119   } else if (Imm == DppCtrl::ROW_HALF_MIRROR) {
1120     O << "row_half_mirror";
1121   } else if (Imm == DppCtrl::BCAST15) {
1122     if (AMDGPU::isGFX10Plus(STI)) {
1123       O << "/* row_bcast is not supported starting from GFX10 */";
1124       return;
1125     }
1126     O << "row_bcast:15";
1127   } else if (Imm == DppCtrl::BCAST31) {
1128     if (AMDGPU::isGFX10Plus(STI)) {
1129       O << "/* row_bcast is not supported starting from GFX10 */";
1130       return;
1131     }
1132     O << "row_bcast:31";
1133   } else if ((Imm >= DppCtrl::ROW_SHARE_FIRST) &&
1134              (Imm <= DppCtrl::ROW_SHARE_LAST)) {
1135     if (AMDGPU::isGFX90A(STI)) {
1136       O << "row_newbcast:";
1137     } else if (AMDGPU::isGFX10Plus(STI)) {
1138       O << "row_share:";
1139     } else {
1140       O << " /* row_newbcast/row_share is not supported on ASICs earlier "
1141            "than GFX90A/GFX10 */";
1142       return;
1143     }
1144     printU4ImmDecOperand(MI, OpNo, O);
1145   } else if ((Imm >= DppCtrl::ROW_XMASK_FIRST) &&
1146              (Imm <= DppCtrl::ROW_XMASK_LAST)) {
1147     if (!AMDGPU::isGFX10Plus(STI)) {
1148       O << "/* row_xmask is not supported on ASICs earlier than GFX10 */";
1149       return;
1150     }
1151     O << "row_xmask:";
1152     printU4ImmDecOperand(MI, OpNo, O);
1153   } else {
1154     O << "/* Invalid dpp_ctrl value */";
1155   }
1156 }
1157 
1158 void AMDGPUInstPrinter::printDppRowMask(const MCInst *MI, unsigned OpNo,
1159                                         const MCSubtargetInfo &STI,
1160                                         raw_ostream &O) {
1161   O << " row_mask:";
1162   printU4ImmOperand(MI, OpNo, STI, O);
1163 }
1164 
1165 void AMDGPUInstPrinter::printDppBankMask(const MCInst *MI, unsigned OpNo,
1166                                          const MCSubtargetInfo &STI,
1167                                          raw_ostream &O) {
1168   O << " bank_mask:";
1169   printU4ImmOperand(MI, OpNo, STI, O);
1170 }
1171 
1172 void AMDGPUInstPrinter::printDppBoundCtrl(const MCInst *MI, unsigned OpNo,
1173                                           const MCSubtargetInfo &STI,
1174                                           raw_ostream &O) {
1175   unsigned Imm = MI->getOperand(OpNo).getImm();
1176   if (Imm) {
1177     O << " bound_ctrl:1";
1178   }
1179 }
1180 
1181 void AMDGPUInstPrinter::printDppFI(const MCInst *MI, unsigned OpNo,
1182                                    const MCSubtargetInfo &STI, raw_ostream &O) {
1183   using namespace llvm::AMDGPU::DPP;
1184   unsigned Imm = MI->getOperand(OpNo).getImm();
1185   if (Imm == DPP_FI_1 || Imm == DPP8_FI_1) {
1186     O << " fi:1";
1187   }
1188 }
1189 
1190 void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
1191                                      raw_ostream &O) {
1192   using namespace llvm::AMDGPU::SDWA;
1193 
1194   unsigned Imm = MI->getOperand(OpNo).getImm();
1195   switch (Imm) {
1196   case SdwaSel::BYTE_0: O << "BYTE_0"; break;
1197   case SdwaSel::BYTE_1: O << "BYTE_1"; break;
1198   case SdwaSel::BYTE_2: O << "BYTE_2"; break;
1199   case SdwaSel::BYTE_3: O << "BYTE_3"; break;
1200   case SdwaSel::WORD_0: O << "WORD_0"; break;
1201   case SdwaSel::WORD_1: O << "WORD_1"; break;
1202   case SdwaSel::DWORD: O << "DWORD"; break;
1203   default: llvm_unreachable("Invalid SDWA data select operand");
1204   }
1205 }
1206 
1207 void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo,
1208                                         const MCSubtargetInfo &STI,
1209                                         raw_ostream &O) {
1210   O << "dst_sel:";
1211   printSDWASel(MI, OpNo, O);
1212 }
1213 
1214 void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
1215                                          const MCSubtargetInfo &STI,
1216                                          raw_ostream &O) {
1217   O << "src0_sel:";
1218   printSDWASel(MI, OpNo, O);
1219 }
1220 
1221 void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
1222                                          const MCSubtargetInfo &STI,
1223                                          raw_ostream &O) {
1224   O << "src1_sel:";
1225   printSDWASel(MI, OpNo, O);
1226 }
1227 
1228 void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
1229                                            const MCSubtargetInfo &STI,
1230                                            raw_ostream &O) {
1231   using namespace llvm::AMDGPU::SDWA;
1232 
1233   O << "dst_unused:";
1234   unsigned Imm = MI->getOperand(OpNo).getImm();
1235   switch (Imm) {
1236   case DstUnused::UNUSED_PAD: O << "UNUSED_PAD"; break;
1237   case DstUnused::UNUSED_SEXT: O << "UNUSED_SEXT"; break;
1238   case DstUnused::UNUSED_PRESERVE: O << "UNUSED_PRESERVE"; break;
1239   default: llvm_unreachable("Invalid SDWA dest_unused operand");
1240   }
1241 }
1242 
1243 void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
1244                                      const MCSubtargetInfo &STI, raw_ostream &O,
1245                                      unsigned N) {
1246   unsigned Opc = MI->getOpcode();
1247   int EnIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::en);
1248   unsigned En = MI->getOperand(EnIdx).getImm();
1249 
1250   int ComprIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::compr);
1251 
1252   // If compr is set, print as src0, src0, src1, src1
1253   if (MI->getOperand(ComprIdx).getImm())
1254     OpNo = OpNo - N + N / 2;
1255 
1256   if (En & (1 << N))
1257     printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI);
1258   else
1259     O << "off";
1260 }
1261 
1262 void AMDGPUInstPrinter::printExpSrc0(const MCInst *MI, unsigned OpNo,
1263                                      const MCSubtargetInfo &STI,
1264                                      raw_ostream &O) {
1265   printExpSrcN(MI, OpNo, STI, O, 0);
1266 }
1267 
1268 void AMDGPUInstPrinter::printExpSrc1(const MCInst *MI, unsigned OpNo,
1269                                      const MCSubtargetInfo &STI,
1270                                      raw_ostream &O) {
1271   printExpSrcN(MI, OpNo, STI, O, 1);
1272 }
1273 
1274 void AMDGPUInstPrinter::printExpSrc2(const MCInst *MI, unsigned OpNo,
1275                                      const MCSubtargetInfo &STI,
1276                                      raw_ostream &O) {
1277   printExpSrcN(MI, OpNo, STI, O, 2);
1278 }
1279 
1280 void AMDGPUInstPrinter::printExpSrc3(const MCInst *MI, unsigned OpNo,
1281                                      const MCSubtargetInfo &STI,
1282                                      raw_ostream &O) {
1283   printExpSrcN(MI, OpNo, STI, O, 3);
1284 }
1285 
1286 void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
1287                                     const MCSubtargetInfo &STI,
1288                                     raw_ostream &O) {
1289   using namespace llvm::AMDGPU::Exp;
1290 
1291   // This is really a 6 bit field.
1292   unsigned Id = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1);
1293 
1294   int Index;
1295   StringRef TgtName;
1296   if (getTgtName(Id, TgtName, Index) && isSupportedTgtId(Id, STI)) {
1297     O << ' ' << TgtName;
1298     if (Index >= 0)
1299       O << Index;
1300   } else {
1301     O << " invalid_target_" << Id;
1302   }
1303 }
1304 
1305 static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod,
1306                                bool IsPacked, bool HasDstSel) {
1307   int DefaultValue = IsPacked && (Mod == SISrcMods::OP_SEL_1);
1308 
1309   for (int I = 0; I < NumOps; ++I) {
1310     if (!!(Ops[I] & Mod) != DefaultValue)
1311       return false;
1312   }
1313 
1314   if (HasDstSel && (Ops[0] & SISrcMods::DST_OP_SEL) != 0)
1315     return false;
1316 
1317   return true;
1318 }
1319 
1320 void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
1321                                             StringRef Name,
1322                                             unsigned Mod,
1323                                             raw_ostream &O) {
1324   unsigned Opc = MI->getOpcode();
1325   int NumOps = 0;
1326   int Ops[3];
1327 
1328   std::pair<int, int> MOps[] = {
1329       {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src0},
1330       {AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src1},
1331       {AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::src2}};
1332   int DefaultValue = (Mod == SISrcMods::OP_SEL_1);
1333 
1334   for (auto [SrcMod, Src] : MOps) {
1335     if (!AMDGPU::hasNamedOperand(Opc, Src))
1336       break;
1337 
1338     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, SrcMod);
1339     Ops[NumOps++] =
1340         (ModIdx != -1) ? MI->getOperand(ModIdx).getImm() : DefaultValue;
1341   }
1342 
1343   // Print three values of neg/opsel for wmma instructions (prints 0 when there
1344   // is no src_modifier operand instead of not printing anything).
1345   if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsSWMMAC ||
1346       MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsWMMA) {
1347     NumOps = 0;
1348     int DefaultValue = Mod == SISrcMods::OP_SEL_1;
1349     for (int OpName :
1350          {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
1351           AMDGPU::OpName::src2_modifiers}) {
1352       int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1353       if (Idx != -1)
1354         Ops[NumOps++] = MI->getOperand(Idx).getImm();
1355       else
1356         Ops[NumOps++] = DefaultValue;
1357     }
1358   }
1359 
1360   const bool HasDstSel =
1361     NumOps > 0 &&
1362     Mod == SISrcMods::OP_SEL_0 &&
1363     MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3_OPSEL;
1364 
1365   const bool IsPacked =
1366     MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsPacked;
1367 
1368   if (allOpsDefaultValue(Ops, NumOps, Mod, IsPacked, HasDstSel))
1369     return;
1370 
1371   O << Name;
1372   for (int I = 0; I < NumOps; ++I) {
1373     if (I != 0)
1374       O << ',';
1375 
1376     O << !!(Ops[I] & Mod);
1377   }
1378 
1379   if (HasDstSel) {
1380     O << ',' << !!(Ops[0] & SISrcMods::DST_OP_SEL);
1381   }
1382 
1383   O << ']';
1384 }
1385 
1386 void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
1387                                    const MCSubtargetInfo &STI,
1388                                    raw_ostream &O) {
1389   unsigned Opc = MI->getOpcode();
1390   if (isCvt_F32_Fp8_Bf8_e64(Opc)) {
1391     auto SrcMod =
1392         AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
1393     unsigned Mod = MI->getOperand(SrcMod).getImm();
1394     unsigned Index0 = !!(Mod & SISrcMods::OP_SEL_0);
1395     unsigned Index1 = !!(Mod & SISrcMods::OP_SEL_1);
1396     if (Index0 || Index1)
1397       O << " op_sel:[" << Index0 << ',' << Index1 << ']';
1398     return;
1399   }
1400   if (isPermlane16(Opc)) {
1401     auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
1402     auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
1403     unsigned FI = !!(MI->getOperand(FIN).getImm() & SISrcMods::OP_SEL_0);
1404     unsigned BC = !!(MI->getOperand(BCN).getImm() & SISrcMods::OP_SEL_0);
1405     if (FI || BC)
1406       O << " op_sel:[" << FI << ',' << BC << ']';
1407     return;
1408   }
1409 
1410   printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
1411 }
1412 
1413 void AMDGPUInstPrinter::printOpSelHi(const MCInst *MI, unsigned OpNo,
1414                                      const MCSubtargetInfo &STI,
1415                                      raw_ostream &O) {
1416   printPackedModifier(MI, " op_sel_hi:[", SISrcMods::OP_SEL_1, O);
1417 }
1418 
1419 void AMDGPUInstPrinter::printNegLo(const MCInst *MI, unsigned OpNo,
1420                                    const MCSubtargetInfo &STI,
1421                                    raw_ostream &O) {
1422   printPackedModifier(MI, " neg_lo:[", SISrcMods::NEG, O);
1423 }
1424 
1425 void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
1426                                    const MCSubtargetInfo &STI,
1427                                    raw_ostream &O) {
1428   printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
1429 }
1430 
1431 void AMDGPUInstPrinter::printIndexKey8bit(const MCInst *MI, unsigned OpNo,
1432                                           const MCSubtargetInfo &STI,
1433                                           raw_ostream &O) {
1434   auto Imm = MI->getOperand(OpNo).getImm() & 0x7;
1435   if (Imm == 0)
1436     return;
1437 
1438   O << " index_key:" << Imm;
1439 }
1440 
1441 void AMDGPUInstPrinter::printIndexKey16bit(const MCInst *MI, unsigned OpNo,
1442                                            const MCSubtargetInfo &STI,
1443                                            raw_ostream &O) {
1444   auto Imm = MI->getOperand(OpNo).getImm() & 0x7;
1445   if (Imm == 0)
1446     return;
1447 
1448   O << " index_key:" << Imm;
1449 }
1450 
1451 void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
1452                                         const MCSubtargetInfo &STI,
1453                                         raw_ostream &O) {
1454   unsigned Imm = MI->getOperand(OpNum).getImm();
1455   switch (Imm) {
1456   case 0:
1457     O << "p10";
1458     break;
1459   case 1:
1460     O << "p20";
1461     break;
1462   case 2:
1463     O << "p0";
1464     break;
1465   default:
1466     O << "invalid_param_" << Imm;
1467   }
1468 }
1469 
1470 void AMDGPUInstPrinter::printInterpAttr(const MCInst *MI, unsigned OpNum,
1471                                         const MCSubtargetInfo &STI,
1472                                         raw_ostream &O) {
1473   unsigned Attr = MI->getOperand(OpNum).getImm();
1474   O << "attr" << Attr;
1475 }
1476 
1477 void AMDGPUInstPrinter::printInterpAttrChan(const MCInst *MI, unsigned OpNum,
1478                                         const MCSubtargetInfo &STI,
1479                                         raw_ostream &O) {
1480   unsigned Chan = MI->getOperand(OpNum).getImm();
1481   O << '.' << "xyzw"[Chan & 0x3];
1482 }
1483 
1484 void AMDGPUInstPrinter::printGPRIdxMode(const MCInst *MI, unsigned OpNo,
1485                                         const MCSubtargetInfo &STI,
1486                                         raw_ostream &O) {
1487   using namespace llvm::AMDGPU::VGPRIndexMode;
1488   unsigned Val = MI->getOperand(OpNo).getImm();
1489 
1490   if ((Val & ~ENABLE_MASK) != 0) {
1491     O << formatHex(static_cast<uint64_t>(Val));
1492   } else {
1493     O << "gpr_idx(";
1494     bool NeedComma = false;
1495     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
1496       if (Val & (1 << ModeId)) {
1497         if (NeedComma)
1498           O << ',';
1499         O << IdSymbolic[ModeId];
1500         NeedComma = true;
1501       }
1502     }
1503     O << ')';
1504   }
1505 }
1506 
1507 void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
1508                                         const MCSubtargetInfo &STI,
1509                                         raw_ostream &O) {
1510   printRegularOperand(MI, OpNo, STI, O);
1511   O  << ", ";
1512   printRegularOperand(MI, OpNo + 1, STI, O);
1513 }
1514 
1515 void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
1516                                    raw_ostream &O, StringRef Asm,
1517                                    StringRef Default) {
1518   const MCOperand &Op = MI->getOperand(OpNo);
1519   assert(Op.isImm());
1520   if (Op.getImm() == 1) {
1521     O << Asm;
1522   } else {
1523     O << Default;
1524   }
1525 }
1526 
1527 void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
1528                                    raw_ostream &O, char Asm) {
1529   const MCOperand &Op = MI->getOperand(OpNo);
1530   assert(Op.isImm());
1531   if (Op.getImm() == 1)
1532     O << Asm;
1533 }
1534 
1535 void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
1536                                     const MCSubtargetInfo &STI,
1537                                     raw_ostream &O) {
1538   int Imm = MI->getOperand(OpNo).getImm();
1539   if (Imm == SIOutMods::MUL2)
1540     O << " mul:2";
1541   else if (Imm == SIOutMods::MUL4)
1542     O << " mul:4";
1543   else if (Imm == SIOutMods::DIV2)
1544     O << " div:2";
1545 }
1546 
1547 void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
1548                                      const MCSubtargetInfo &STI,
1549                                      raw_ostream &O) {
1550   using namespace llvm::AMDGPU::SendMsg;
1551 
1552   const unsigned Imm16 = MI->getOperand(OpNo).getImm();
1553 
1554   uint16_t MsgId;
1555   uint16_t OpId;
1556   uint16_t StreamId;
1557   decodeMsg(Imm16, MsgId, OpId, StreamId, STI);
1558 
1559   StringRef MsgName = getMsgName(MsgId, STI);
1560 
1561   if (!MsgName.empty() && isValidMsgOp(MsgId, OpId, STI) &&
1562       isValidMsgStream(MsgId, OpId, StreamId, STI)) {
1563     O << "sendmsg(" << MsgName;
1564     if (msgRequiresOp(MsgId, STI)) {
1565       O << ", " << getMsgOpName(MsgId, OpId, STI);
1566       if (msgSupportsStream(MsgId, OpId, STI)) {
1567         O << ", " << StreamId;
1568       }
1569     }
1570     O << ')';
1571   } else if (encodeMsg(MsgId, OpId, StreamId) == Imm16) {
1572     O << "sendmsg(" << MsgId << ", " << OpId << ", " << StreamId << ')';
1573   } else {
1574     O << Imm16; // Unknown imm16 code.
1575   }
1576 }
1577 
1578 static void printSwizzleBitmask(const uint16_t AndMask,
1579                                 const uint16_t OrMask,
1580                                 const uint16_t XorMask,
1581                                 raw_ostream &O) {
1582   using namespace llvm::AMDGPU::Swizzle;
1583 
1584   uint16_t Probe0 = ((0            & AndMask) | OrMask) ^ XorMask;
1585   uint16_t Probe1 = ((BITMASK_MASK & AndMask) | OrMask) ^ XorMask;
1586 
1587   O << "\"";
1588 
1589   for (unsigned Mask = 1 << (BITMASK_WIDTH - 1); Mask > 0; Mask >>= 1) {
1590     uint16_t p0 = Probe0 & Mask;
1591     uint16_t p1 = Probe1 & Mask;
1592 
1593     if (p0 == p1) {
1594       if (p0 == 0) {
1595         O << "0";
1596       } else {
1597         O << "1";
1598       }
1599     } else {
1600       if (p0 == 0) {
1601         O << "p";
1602       } else {
1603         O << "i";
1604       }
1605     }
1606   }
1607 
1608   O << "\"";
1609 }
1610 
1611 void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
1612                                      const MCSubtargetInfo &STI,
1613                                      raw_ostream &O) {
1614   using namespace llvm::AMDGPU::Swizzle;
1615 
1616   uint16_t Imm = MI->getOperand(OpNo).getImm();
1617   if (Imm == 0) {
1618     return;
1619   }
1620 
1621   O << " offset:";
1622 
1623   if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) {
1624 
1625     O << "swizzle(" << IdSymbolic[ID_QUAD_PERM];
1626     for (unsigned I = 0; I < LANE_NUM; ++I) {
1627       O << ",";
1628       O << formatDec(Imm & LANE_MASK);
1629       Imm >>= LANE_SHIFT;
1630     }
1631     O << ")";
1632 
1633   } else if ((Imm & BITMASK_PERM_ENC_MASK) == BITMASK_PERM_ENC) {
1634 
1635     uint16_t AndMask = (Imm >> BITMASK_AND_SHIFT) & BITMASK_MASK;
1636     uint16_t OrMask  = (Imm >> BITMASK_OR_SHIFT)  & BITMASK_MASK;
1637     uint16_t XorMask = (Imm >> BITMASK_XOR_SHIFT) & BITMASK_MASK;
1638 
1639     if (AndMask == BITMASK_MAX && OrMask == 0 && llvm::popcount(XorMask) == 1) {
1640 
1641       O << "swizzle(" << IdSymbolic[ID_SWAP];
1642       O << ",";
1643       O << formatDec(XorMask);
1644       O << ")";
1645 
1646     } else if (AndMask == BITMASK_MAX && OrMask == 0 && XorMask > 0 &&
1647                isPowerOf2_64(XorMask + 1)) {
1648 
1649       O << "swizzle(" << IdSymbolic[ID_REVERSE];
1650       O << ",";
1651       O << formatDec(XorMask + 1);
1652       O << ")";
1653 
1654     } else {
1655 
1656       uint16_t GroupSize = BITMASK_MAX - AndMask + 1;
1657       if (GroupSize > 1 &&
1658           isPowerOf2_64(GroupSize) &&
1659           OrMask < GroupSize &&
1660           XorMask == 0) {
1661 
1662         O << "swizzle(" << IdSymbolic[ID_BROADCAST];
1663         O << ",";
1664         O << formatDec(GroupSize);
1665         O << ",";
1666         O << formatDec(OrMask);
1667         O << ")";
1668 
1669       } else {
1670         O << "swizzle(" << IdSymbolic[ID_BITMASK_PERM];
1671         O << ",";
1672         printSwizzleBitmask(AndMask, OrMask, XorMask, O);
1673         O << ")";
1674       }
1675     }
1676   } else {
1677     printU16ImmDecOperand(MI, OpNo, O);
1678   }
1679 }
1680 
1681 void AMDGPUInstPrinter::printSWaitCnt(const MCInst *MI, unsigned OpNo,
1682                                       const MCSubtargetInfo &STI,
1683                                       raw_ostream &O) {
1684   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
1685 
1686   unsigned SImm16 = MI->getOperand(OpNo).getImm();
1687   unsigned Vmcnt, Expcnt, Lgkmcnt;
1688   decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
1689 
1690   bool IsDefaultVmcnt = Vmcnt == getVmcntBitMask(ISA);
1691   bool IsDefaultExpcnt = Expcnt == getExpcntBitMask(ISA);
1692   bool IsDefaultLgkmcnt = Lgkmcnt == getLgkmcntBitMask(ISA);
1693   bool PrintAll = IsDefaultVmcnt && IsDefaultExpcnt && IsDefaultLgkmcnt;
1694 
1695   bool NeedSpace = false;
1696 
1697   if (!IsDefaultVmcnt || PrintAll) {
1698     O << "vmcnt(" << Vmcnt << ')';
1699     NeedSpace = true;
1700   }
1701 
1702   if (!IsDefaultExpcnt || PrintAll) {
1703     if (NeedSpace)
1704       O << ' ';
1705     O << "expcnt(" << Expcnt << ')';
1706     NeedSpace = true;
1707   }
1708 
1709   if (!IsDefaultLgkmcnt || PrintAll) {
1710     if (NeedSpace)
1711       O << ' ';
1712     O << "lgkmcnt(" << Lgkmcnt << ')';
1713   }
1714 }
1715 
1716 void AMDGPUInstPrinter::printDepCtr(const MCInst *MI, unsigned OpNo,
1717                                     const MCSubtargetInfo &STI,
1718                                     raw_ostream &O) {
1719   using namespace llvm::AMDGPU::DepCtr;
1720 
1721   uint64_t Imm16 = MI->getOperand(OpNo).getImm() & 0xffff;
1722 
1723   bool HasNonDefaultVal = false;
1724   if (isSymbolicDepCtrEncoding(Imm16, HasNonDefaultVal, STI)) {
1725     int Id = 0;
1726     StringRef Name;
1727     unsigned Val;
1728     bool IsDefault;
1729     bool NeedSpace = false;
1730     while (decodeDepCtr(Imm16, Id, Name, Val, IsDefault, STI)) {
1731       if (!IsDefault || !HasNonDefaultVal) {
1732         if (NeedSpace)
1733           O << ' ';
1734         O << Name << '(' << Val << ')';
1735         NeedSpace = true;
1736       }
1737     }
1738   } else {
1739     O << formatHex(Imm16);
1740   }
1741 }
1742 
1743 void AMDGPUInstPrinter::printSDelayALU(const MCInst *MI, unsigned OpNo,
1744                                        const MCSubtargetInfo &STI,
1745                                        raw_ostream &O) {
1746   const char *BadInstId = "/* invalid instid value */";
1747   static const std::array<const char *, 12> InstIds = {
1748       "NO_DEP",        "VALU_DEP_1",    "VALU_DEP_2",
1749       "VALU_DEP_3",    "VALU_DEP_4",    "TRANS32_DEP_1",
1750       "TRANS32_DEP_2", "TRANS32_DEP_3", "FMA_ACCUM_CYCLE_1",
1751       "SALU_CYCLE_1",  "SALU_CYCLE_2",  "SALU_CYCLE_3"};
1752 
1753   const char *BadInstSkip = "/* invalid instskip value */";
1754   static const std::array<const char *, 6> InstSkips = {
1755       "SAME", "NEXT", "SKIP_1", "SKIP_2", "SKIP_3", "SKIP_4"};
1756 
1757   unsigned SImm16 = MI->getOperand(OpNo).getImm();
1758   const char *Prefix = "";
1759 
1760   unsigned Value = SImm16 & 0xF;
1761   if (Value) {
1762     const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId;
1763     O << Prefix << "instid0(" << Name << ')';
1764     Prefix = " | ";
1765   }
1766 
1767   Value = (SImm16 >> 4) & 7;
1768   if (Value) {
1769     const char *Name =
1770         Value < InstSkips.size() ? InstSkips[Value] : BadInstSkip;
1771     O << Prefix << "instskip(" << Name << ')';
1772     Prefix = " | ";
1773   }
1774 
1775   Value = (SImm16 >> 7) & 0xF;
1776   if (Value) {
1777     const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId;
1778     O << Prefix << "instid1(" << Name << ')';
1779     Prefix = " | ";
1780   }
1781 
1782   if (!*Prefix)
1783     O << "0";
1784 }
1785 
1786 void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
1787                                    const MCSubtargetInfo &STI, raw_ostream &O) {
1788   using namespace llvm::AMDGPU::Hwreg;
1789   unsigned Val = MI->getOperand(OpNo).getImm();
1790   auto [Id, Offset, Width] = HwregEncoding::decode(Val);
1791   StringRef HwRegName = getHwreg(Id, STI);
1792 
1793   O << "hwreg(";
1794   if (!HwRegName.empty()) {
1795     O << HwRegName;
1796   } else {
1797     O << Id;
1798   }
1799   if (Width != HwregSize::Default || Offset != HwregOffset::Default)
1800     O << ", " << Offset << ", " << Width;
1801   O << ')';
1802 }
1803 
1804 void AMDGPUInstPrinter::printEndpgm(const MCInst *MI, unsigned OpNo,
1805                                     const MCSubtargetInfo &STI,
1806                                     raw_ostream &O) {
1807   uint16_t Imm = MI->getOperand(OpNo).getImm();
1808   if (Imm == 0) {
1809     return;
1810   }
1811 
1812   O << ' ' << formatDec(Imm);
1813 }
1814 
1815 void AMDGPUInstPrinter::printByteSel(const MCInst *MI, unsigned OpNo,
1816                                      const MCSubtargetInfo &STI,
1817                                      raw_ostream &O) {
1818   uint8_t Imm = MI->getOperand(OpNo).getImm();
1819   if (!Imm)
1820     return;
1821 
1822   O << " byte_sel:" << formatDec(Imm);
1823 }
1824 
1825 #include "AMDGPUGenAsmWriter.inc"
1826