xref: /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp (revision 8ddb146abcdf061be9f2c0db7e391697dafad85c)
1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a printer that converts from our internal representation
10 // of machine-dependent LLVM code to NVPTX assembly language.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXAsmPrinter.h"
15 #include "MCTargetDesc/NVPTXBaseInfo.h"
16 #include "MCTargetDesc/NVPTXInstPrinter.h"
17 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
18 #include "MCTargetDesc/NVPTXTargetStreamer.h"
19 #include "NVPTX.h"
20 #include "NVPTXMCExpr.h"
21 #include "NVPTXMachineFunctionInfo.h"
22 #include "NVPTXRegisterInfo.h"
23 #include "NVPTXSubtarget.h"
24 #include "NVPTXTargetMachine.h"
25 #include "NVPTXUtilities.h"
26 #include "TargetInfo/NVPTXTargetInfo.h"
27 #include "cl_common_defines.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/DenseSet.h"
32 #include "llvm/ADT/SmallString.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/StringExtras.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/Triple.h"
37 #include "llvm/ADT/Twine.h"
38 #include "llvm/Analysis/ConstantFolding.h"
39 #include "llvm/CodeGen/Analysis.h"
40 #include "llvm/CodeGen/MachineBasicBlock.h"
41 #include "llvm/CodeGen/MachineFrameInfo.h"
42 #include "llvm/CodeGen/MachineFunction.h"
43 #include "llvm/CodeGen/MachineInstr.h"
44 #include "llvm/CodeGen/MachineLoopInfo.h"
45 #include "llvm/CodeGen/MachineModuleInfo.h"
46 #include "llvm/CodeGen/MachineOperand.h"
47 #include "llvm/CodeGen/MachineRegisterInfo.h"
48 #include "llvm/CodeGen/TargetLowering.h"
49 #include "llvm/CodeGen/TargetRegisterInfo.h"
50 #include "llvm/CodeGen/ValueTypes.h"
51 #include "llvm/IR/Attributes.h"
52 #include "llvm/IR/BasicBlock.h"
53 #include "llvm/IR/Constant.h"
54 #include "llvm/IR/Constants.h"
55 #include "llvm/IR/DataLayout.h"
56 #include "llvm/IR/DebugInfo.h"
57 #include "llvm/IR/DebugInfoMetadata.h"
58 #include "llvm/IR/DebugLoc.h"
59 #include "llvm/IR/DerivedTypes.h"
60 #include "llvm/IR/Function.h"
61 #include "llvm/IR/GlobalValue.h"
62 #include "llvm/IR/GlobalVariable.h"
63 #include "llvm/IR/Instruction.h"
64 #include "llvm/IR/LLVMContext.h"
65 #include "llvm/IR/Module.h"
66 #include "llvm/IR/Operator.h"
67 #include "llvm/IR/Type.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/MC/MCExpr.h"
70 #include "llvm/MC/MCInst.h"
71 #include "llvm/MC/MCInstrDesc.h"
72 #include "llvm/MC/MCStreamer.h"
73 #include "llvm/MC/MCSymbol.h"
74 #include "llvm/MC/TargetRegistry.h"
75 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/CommandLine.h"
77 #include "llvm/Support/ErrorHandling.h"
78 #include "llvm/Support/MachineValueType.h"
79 #include "llvm/Support/Path.h"
80 #include "llvm/Support/raw_ostream.h"
81 #include "llvm/Target/TargetLoweringObjectFile.h"
82 #include "llvm/Target/TargetMachine.h"
83 #include "llvm/Transforms/Utils/UnrollLoop.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <cstring>
87 #include <new>
88 #include <string>
89 #include <utility>
90 #include <vector>
91 
92 using namespace llvm;
93 
94 #define DEPOTNAME "__local_depot"
95 
96 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
97 /// depends.
98 static void
99 DiscoverDependentGlobals(const Value *V,
100                          DenseSet<const GlobalVariable *> &Globals) {
101   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
102     Globals.insert(GV);
103   else {
104     if (const User *U = dyn_cast<User>(V)) {
105       for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
106         DiscoverDependentGlobals(U->getOperand(i), Globals);
107       }
108     }
109   }
110 }
111 
112 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
113 /// instances to be emitted, but only after any dependents have been added
114 /// first.s
115 static void
116 VisitGlobalVariableForEmission(const GlobalVariable *GV,
117                                SmallVectorImpl<const GlobalVariable *> &Order,
118                                DenseSet<const GlobalVariable *> &Visited,
119                                DenseSet<const GlobalVariable *> &Visiting) {
120   // Have we already visited this one?
121   if (Visited.count(GV))
122     return;
123 
124   // Do we have a circular dependency?
125   if (!Visiting.insert(GV).second)
126     report_fatal_error("Circular dependency found in global variable set");
127 
128   // Make sure we visit all dependents first
129   DenseSet<const GlobalVariable *> Others;
130   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
131     DiscoverDependentGlobals(GV->getOperand(i), Others);
132 
133   for (const GlobalVariable *GV : Others)
134     VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
135 
136   // Now we can visit ourself
137   Order.push_back(GV);
138   Visited.insert(GV);
139   Visiting.erase(GV);
140 }
141 
142 void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
143   MCInst Inst;
144   lowerToMCInst(MI, Inst);
145   EmitToStreamer(*OutStreamer, Inst);
146 }
147 
148 // Handle symbol backtracking for targets that do not support image handles
149 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
150                                            unsigned OpNo, MCOperand &MCOp) {
151   const MachineOperand &MO = MI->getOperand(OpNo);
152   const MCInstrDesc &MCID = MI->getDesc();
153 
154   if (MCID.TSFlags & NVPTXII::IsTexFlag) {
155     // This is a texture fetch, so operand 4 is a texref and operand 5 is
156     // a samplerref
157     if (OpNo == 4 && MO.isImm()) {
158       lowerImageHandleSymbol(MO.getImm(), MCOp);
159       return true;
160     }
161     if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
162       lowerImageHandleSymbol(MO.getImm(), MCOp);
163       return true;
164     }
165 
166     return false;
167   } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
168     unsigned VecSize =
169       1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
170 
171     // For a surface load of vector size N, the Nth operand will be the surfref
172     if (OpNo == VecSize && MO.isImm()) {
173       lowerImageHandleSymbol(MO.getImm(), MCOp);
174       return true;
175     }
176 
177     return false;
178   } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
179     // This is a surface store, so operand 0 is a surfref
180     if (OpNo == 0 && MO.isImm()) {
181       lowerImageHandleSymbol(MO.getImm(), MCOp);
182       return true;
183     }
184 
185     return false;
186   } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
187     // This is a query, so operand 1 is a surfref/texref
188     if (OpNo == 1 && MO.isImm()) {
189       lowerImageHandleSymbol(MO.getImm(), MCOp);
190       return true;
191     }
192 
193     return false;
194   }
195 
196   return false;
197 }
198 
199 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
200   // Ewwww
201   LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
202   NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
203   const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
204   const char *Sym = MFI->getImageHandleSymbol(Index);
205   std::string *SymNamePtr =
206     nvTM.getManagedStrPool()->getManagedString(Sym);
207   MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
208 }
209 
210 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
211   OutMI.setOpcode(MI->getOpcode());
212   // Special: Do not mangle symbol operand of CALL_PROTOTYPE
213   if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
214     const MachineOperand &MO = MI->getOperand(0);
215     OutMI.addOperand(GetSymbolRef(
216       OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
217     return;
218   }
219 
220   const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
221   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
222     const MachineOperand &MO = MI->getOperand(i);
223 
224     MCOperand MCOp;
225     if (!STI.hasImageHandles()) {
226       if (lowerImageHandleOperand(MI, i, MCOp)) {
227         OutMI.addOperand(MCOp);
228         continue;
229       }
230     }
231 
232     if (lowerOperand(MO, MCOp))
233       OutMI.addOperand(MCOp);
234   }
235 }
236 
237 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
238                                    MCOperand &MCOp) {
239   switch (MO.getType()) {
240   default: llvm_unreachable("unknown operand type");
241   case MachineOperand::MO_Register:
242     MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
243     break;
244   case MachineOperand::MO_Immediate:
245     MCOp = MCOperand::createImm(MO.getImm());
246     break;
247   case MachineOperand::MO_MachineBasicBlock:
248     MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
249         MO.getMBB()->getSymbol(), OutContext));
250     break;
251   case MachineOperand::MO_ExternalSymbol:
252     MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
253     break;
254   case MachineOperand::MO_GlobalAddress:
255     MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
256     break;
257   case MachineOperand::MO_FPImmediate: {
258     const ConstantFP *Cnt = MO.getFPImm();
259     const APFloat &Val = Cnt->getValueAPF();
260 
261     switch (Cnt->getType()->getTypeID()) {
262     default: report_fatal_error("Unsupported FP type"); break;
263     case Type::HalfTyID:
264       MCOp = MCOperand::createExpr(
265         NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext));
266       break;
267     case Type::FloatTyID:
268       MCOp = MCOperand::createExpr(
269         NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext));
270       break;
271     case Type::DoubleTyID:
272       MCOp = MCOperand::createExpr(
273         NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext));
274       break;
275     }
276     break;
277   }
278   }
279   return true;
280 }
281 
282 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
283   if (Register::isVirtualRegister(Reg)) {
284     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
285 
286     DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
287     unsigned RegNum = RegMap[Reg];
288 
289     // Encode the register class in the upper 4 bits
290     // Must be kept in sync with NVPTXInstPrinter::printRegName
291     unsigned Ret = 0;
292     if (RC == &NVPTX::Int1RegsRegClass) {
293       Ret = (1 << 28);
294     } else if (RC == &NVPTX::Int16RegsRegClass) {
295       Ret = (2 << 28);
296     } else if (RC == &NVPTX::Int32RegsRegClass) {
297       Ret = (3 << 28);
298     } else if (RC == &NVPTX::Int64RegsRegClass) {
299       Ret = (4 << 28);
300     } else if (RC == &NVPTX::Float32RegsRegClass) {
301       Ret = (5 << 28);
302     } else if (RC == &NVPTX::Float64RegsRegClass) {
303       Ret = (6 << 28);
304     } else if (RC == &NVPTX::Float16RegsRegClass) {
305       Ret = (7 << 28);
306     } else if (RC == &NVPTX::Float16x2RegsRegClass) {
307       Ret = (8 << 28);
308     } else {
309       report_fatal_error("Bad register class");
310     }
311 
312     // Insert the vreg number
313     Ret |= (RegNum & 0x0FFFFFFF);
314     return Ret;
315   } else {
316     // Some special-use registers are actually physical registers.
317     // Encode this as the register class ID of 0 and the real register ID.
318     return Reg & 0x0FFFFFFF;
319   }
320 }
321 
322 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
323   const MCExpr *Expr;
324   Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
325                                  OutContext);
326   return MCOperand::createExpr(Expr);
327 }
328 
329 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
330   const DataLayout &DL = getDataLayout();
331   const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
332   const TargetLowering *TLI = STI.getTargetLowering();
333 
334   Type *Ty = F->getReturnType();
335 
336   bool isABI = (STI.getSmVersion() >= 20);
337 
338   if (Ty->getTypeID() == Type::VoidTyID)
339     return;
340 
341   O << " (";
342 
343   if (isABI) {
344     if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
345       unsigned size = 0;
346       if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
347         size = ITy->getBitWidth();
348       } else {
349         assert(Ty->isFloatingPointTy() && "Floating point type expected here");
350         size = Ty->getPrimitiveSizeInBits();
351       }
352       // PTX ABI requires all scalar return values to be at least 32
353       // bits in size.  fp16 normally uses .b16 as its storage type in
354       // PTX, so its size must be adjusted here, too.
355       if (size < 32)
356         size = 32;
357 
358       O << ".param .b" << size << " func_retval0";
359     } else if (isa<PointerType>(Ty)) {
360       O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
361         << " func_retval0";
362     } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
363       unsigned totalsz = DL.getTypeAllocSize(Ty);
364       unsigned retAlignment = 0;
365       if (!getAlign(*F, 0, retAlignment))
366         retAlignment = DL.getABITypeAlignment(Ty);
367       O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
368         << "]";
369     } else
370       llvm_unreachable("Unknown return type");
371   } else {
372     SmallVector<EVT, 16> vtparts;
373     ComputeValueVTs(*TLI, DL, Ty, vtparts);
374     unsigned idx = 0;
375     for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
376       unsigned elems = 1;
377       EVT elemtype = vtparts[i];
378       if (vtparts[i].isVector()) {
379         elems = vtparts[i].getVectorNumElements();
380         elemtype = vtparts[i].getVectorElementType();
381       }
382 
383       for (unsigned j = 0, je = elems; j != je; ++j) {
384         unsigned sz = elemtype.getSizeInBits();
385         if (elemtype.isInteger() && (sz < 32))
386           sz = 32;
387         O << ".reg .b" << sz << " func_retval" << idx;
388         if (j < je - 1)
389           O << ", ";
390         ++idx;
391       }
392       if (i < e - 1)
393         O << ", ";
394     }
395   }
396   O << ") ";
397 }
398 
399 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
400                                         raw_ostream &O) {
401   const Function &F = MF.getFunction();
402   printReturnValStr(&F, O);
403 }
404 
405 // Return true if MBB is the header of a loop marked with
406 // llvm.loop.unroll.disable.
407 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
408 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
409     const MachineBasicBlock &MBB) const {
410   MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
411   // We insert .pragma "nounroll" only to the loop header.
412   if (!LI.isLoopHeader(&MBB))
413     return false;
414 
415   // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
416   // we iterate through each back edge of the loop with header MBB, and check
417   // whether its metadata contains llvm.loop.unroll.disable.
418   for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
419     if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
420       // Edges from other loops to MBB are not back edges.
421       continue;
422     }
423     if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
424       if (MDNode *LoopID =
425               PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
426         if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
427           return true;
428       }
429     }
430   }
431   return false;
432 }
433 
434 void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
435   AsmPrinter::emitBasicBlockStart(MBB);
436   if (isLoopHeaderOfNoUnroll(MBB))
437     OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
438 }
439 
440 void NVPTXAsmPrinter::emitFunctionEntryLabel() {
441   SmallString<128> Str;
442   raw_svector_ostream O(Str);
443 
444   if (!GlobalsEmitted) {
445     emitGlobals(*MF->getFunction().getParent());
446     GlobalsEmitted = true;
447   }
448 
449   // Set up
450   MRI = &MF->getRegInfo();
451   F = &MF->getFunction();
452   emitLinkageDirective(F, O);
453   if (isKernelFunction(*F))
454     O << ".entry ";
455   else {
456     O << ".func ";
457     printReturnValStr(*MF, O);
458   }
459 
460   CurrentFnSym->print(O, MAI);
461 
462   emitFunctionParamList(*MF, O);
463 
464   if (isKernelFunction(*F))
465     emitKernelFunctionDirectives(*F, O);
466 
467   OutStreamer->emitRawText(O.str());
468 
469   VRegMapping.clear();
470   // Emit open brace for function body.
471   OutStreamer->emitRawText(StringRef("{\n"));
472   setAndEmitFunctionVirtualRegisters(*MF);
473   // Emit initial .loc debug directive for correct relocation symbol data.
474   if (MMI && MMI->hasDebugInfo())
475     emitInitialRawDwarfLocDirective(*MF);
476 }
477 
478 bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) {
479   bool Result = AsmPrinter::runOnMachineFunction(F);
480   // Emit closing brace for the body of function F.
481   // The closing brace must be emitted here because we need to emit additional
482   // debug labels/data after the last basic block.
483   // We need to emit the closing brace here because we don't have function that
484   // finished emission of the function body.
485   OutStreamer->emitRawText(StringRef("}\n"));
486   return Result;
487 }
488 
489 void NVPTXAsmPrinter::emitFunctionBodyStart() {
490   SmallString<128> Str;
491   raw_svector_ostream O(Str);
492   emitDemotedVars(&MF->getFunction(), O);
493   OutStreamer->emitRawText(O.str());
494 }
495 
496 void NVPTXAsmPrinter::emitFunctionBodyEnd() {
497   VRegMapping.clear();
498 }
499 
500 const MCSymbol *NVPTXAsmPrinter::getFunctionFrameSymbol() const {
501     SmallString<128> Str;
502     raw_svector_ostream(Str) << DEPOTNAME << getFunctionNumber();
503     return OutContext.getOrCreateSymbol(Str);
504 }
505 
506 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
507   Register RegNo = MI->getOperand(0).getReg();
508   if (Register::isVirtualRegister(RegNo)) {
509     OutStreamer->AddComment(Twine("implicit-def: ") +
510                             getVirtualRegisterName(RegNo));
511   } else {
512     const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
513     OutStreamer->AddComment(Twine("implicit-def: ") +
514                             STI.getRegisterInfo()->getName(RegNo));
515   }
516   OutStreamer->AddBlankLine();
517 }
518 
519 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
520                                                    raw_ostream &O) const {
521   // If the NVVM IR has some of reqntid* specified, then output
522   // the reqntid directive, and set the unspecified ones to 1.
523   // If none of reqntid* is specified, don't output reqntid directive.
524   unsigned reqntidx, reqntidy, reqntidz;
525   bool specified = false;
526   if (!getReqNTIDx(F, reqntidx))
527     reqntidx = 1;
528   else
529     specified = true;
530   if (!getReqNTIDy(F, reqntidy))
531     reqntidy = 1;
532   else
533     specified = true;
534   if (!getReqNTIDz(F, reqntidz))
535     reqntidz = 1;
536   else
537     specified = true;
538 
539   if (specified)
540     O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
541       << "\n";
542 
543   // If the NVVM IR has some of maxntid* specified, then output
544   // the maxntid directive, and set the unspecified ones to 1.
545   // If none of maxntid* is specified, don't output maxntid directive.
546   unsigned maxntidx, maxntidy, maxntidz;
547   specified = false;
548   if (!getMaxNTIDx(F, maxntidx))
549     maxntidx = 1;
550   else
551     specified = true;
552   if (!getMaxNTIDy(F, maxntidy))
553     maxntidy = 1;
554   else
555     specified = true;
556   if (!getMaxNTIDz(F, maxntidz))
557     maxntidz = 1;
558   else
559     specified = true;
560 
561   if (specified)
562     O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
563       << "\n";
564 
565   unsigned mincta;
566   if (getMinCTASm(F, mincta))
567     O << ".minnctapersm " << mincta << "\n";
568 
569   unsigned maxnreg;
570   if (getMaxNReg(F, maxnreg))
571     O << ".maxnreg " << maxnreg << "\n";
572 }
573 
574 std::string
575 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
576   const TargetRegisterClass *RC = MRI->getRegClass(Reg);
577 
578   std::string Name;
579   raw_string_ostream NameStr(Name);
580 
581   VRegRCMap::const_iterator I = VRegMapping.find(RC);
582   assert(I != VRegMapping.end() && "Bad register class");
583   const DenseMap<unsigned, unsigned> &RegMap = I->second;
584 
585   VRegMap::const_iterator VI = RegMap.find(Reg);
586   assert(VI != RegMap.end() && "Bad virtual register");
587   unsigned MappedVR = VI->second;
588 
589   NameStr << getNVPTXRegClassStr(RC) << MappedVR;
590 
591   NameStr.flush();
592   return Name;
593 }
594 
595 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
596                                           raw_ostream &O) {
597   O << getVirtualRegisterName(vr);
598 }
599 
600 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
601   emitLinkageDirective(F, O);
602   if (isKernelFunction(*F))
603     O << ".entry ";
604   else
605     O << ".func ";
606   printReturnValStr(F, O);
607   getSymbol(F)->print(O, MAI);
608   O << "\n";
609   emitFunctionParamList(F, O);
610   O << ";\n";
611 }
612 
613 static bool usedInGlobalVarDef(const Constant *C) {
614   if (!C)
615     return false;
616 
617   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
618     return GV->getName() != "llvm.used";
619   }
620 
621   for (const User *U : C->users())
622     if (const Constant *C = dyn_cast<Constant>(U))
623       if (usedInGlobalVarDef(C))
624         return true;
625 
626   return false;
627 }
628 
629 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
630   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
631     if (othergv->getName() == "llvm.used")
632       return true;
633   }
634 
635   if (const Instruction *instr = dyn_cast<Instruction>(U)) {
636     if (instr->getParent() && instr->getParent()->getParent()) {
637       const Function *curFunc = instr->getParent()->getParent();
638       if (oneFunc && (curFunc != oneFunc))
639         return false;
640       oneFunc = curFunc;
641       return true;
642     } else
643       return false;
644   }
645 
646   for (const User *UU : U->users())
647     if (!usedInOneFunc(UU, oneFunc))
648       return false;
649 
650   return true;
651 }
652 
653 /* Find out if a global variable can be demoted to local scope.
654  * Currently, this is valid for CUDA shared variables, which have local
655  * scope and global lifetime. So the conditions to check are :
656  * 1. Is the global variable in shared address space?
657  * 2. Does it have internal linkage?
658  * 3. Is the global variable referenced only in one function?
659  */
660 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
661   if (!gv->hasInternalLinkage())
662     return false;
663   PointerType *Pty = gv->getType();
664   if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED)
665     return false;
666 
667   const Function *oneFunc = nullptr;
668 
669   bool flag = usedInOneFunc(gv, oneFunc);
670   if (!flag)
671     return false;
672   if (!oneFunc)
673     return false;
674   f = oneFunc;
675   return true;
676 }
677 
678 static bool useFuncSeen(const Constant *C,
679                         DenseMap<const Function *, bool> &seenMap) {
680   for (const User *U : C->users()) {
681     if (const Constant *cu = dyn_cast<Constant>(U)) {
682       if (useFuncSeen(cu, seenMap))
683         return true;
684     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
685       const BasicBlock *bb = I->getParent();
686       if (!bb)
687         continue;
688       const Function *caller = bb->getParent();
689       if (!caller)
690         continue;
691       if (seenMap.find(caller) != seenMap.end())
692         return true;
693     }
694   }
695   return false;
696 }
697 
698 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
699   DenseMap<const Function *, bool> seenMap;
700   for (const Function &F : M) {
701     if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
702       emitDeclaration(&F, O);
703       continue;
704     }
705 
706     if (F.isDeclaration()) {
707       if (F.use_empty())
708         continue;
709       if (F.getIntrinsicID())
710         continue;
711       emitDeclaration(&F, O);
712       continue;
713     }
714     for (const User *U : F.users()) {
715       if (const Constant *C = dyn_cast<Constant>(U)) {
716         if (usedInGlobalVarDef(C)) {
717           // The use is in the initialization of a global variable
718           // that is a function pointer, so print a declaration
719           // for the original function
720           emitDeclaration(&F, O);
721           break;
722         }
723         // Emit a declaration of this function if the function that
724         // uses this constant expr has already been seen.
725         if (useFuncSeen(C, seenMap)) {
726           emitDeclaration(&F, O);
727           break;
728         }
729       }
730 
731       if (!isa<Instruction>(U))
732         continue;
733       const Instruction *instr = cast<Instruction>(U);
734       const BasicBlock *bb = instr->getParent();
735       if (!bb)
736         continue;
737       const Function *caller = bb->getParent();
738       if (!caller)
739         continue;
740 
741       // If a caller has already been seen, then the caller is
742       // appearing in the module before the callee. so print out
743       // a declaration for the callee.
744       if (seenMap.find(caller) != seenMap.end()) {
745         emitDeclaration(&F, O);
746         break;
747       }
748     }
749     seenMap[&F] = true;
750   }
751 }
752 
753 static bool isEmptyXXStructor(GlobalVariable *GV) {
754   if (!GV) return true;
755   const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
756   if (!InitList) return true;  // Not an array; we don't know how to parse.
757   return InitList->getNumOperands() == 0;
758 }
759 
760 void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
761   // Construct a default subtarget off of the TargetMachine defaults. The
762   // rest of NVPTX isn't friendly to change subtargets per function and
763   // so the default TargetMachine will have all of the options.
764   const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
765   const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
766   SmallString<128> Str1;
767   raw_svector_ostream OS1(Str1);
768 
769   // Emit header before any dwarf directives are emitted below.
770   emitHeader(M, OS1, *STI);
771   OutStreamer->emitRawText(OS1.str());
772 }
773 
774 bool NVPTXAsmPrinter::doInitialization(Module &M) {
775   if (M.alias_size()) {
776     report_fatal_error("Module has aliases, which NVPTX does not support.");
777     return true; // error
778   }
779   if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
780     report_fatal_error(
781         "Module has a nontrivial global ctor, which NVPTX does not support.");
782     return true;  // error
783   }
784   if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
785     report_fatal_error(
786         "Module has a nontrivial global dtor, which NVPTX does not support.");
787     return true;  // error
788   }
789 
790   // We need to call the parent's one explicitly.
791   bool Result = AsmPrinter::doInitialization(M);
792 
793   GlobalsEmitted = false;
794 
795   return Result;
796 }
797 
798 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
799   SmallString<128> Str2;
800   raw_svector_ostream OS2(Str2);
801 
802   emitDeclarations(M, OS2);
803 
804   // As ptxas does not support forward references of globals, we need to first
805   // sort the list of module-level globals in def-use order. We visit each
806   // global variable in order, and ensure that we emit it *after* its dependent
807   // globals. We use a little extra memory maintaining both a set and a list to
808   // have fast searches while maintaining a strict ordering.
809   SmallVector<const GlobalVariable *, 8> Globals;
810   DenseSet<const GlobalVariable *> GVVisited;
811   DenseSet<const GlobalVariable *> GVVisiting;
812 
813   // Visit each global variable, in order
814   for (const GlobalVariable &I : M.globals())
815     VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
816 
817   assert(GVVisited.size() == M.getGlobalList().size() &&
818          "Missed a global variable");
819   assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
820 
821   // Print out module-level global variables in proper order
822   for (unsigned i = 0, e = Globals.size(); i != e; ++i)
823     printModuleLevelGV(Globals[i], OS2);
824 
825   OS2 << '\n';
826 
827   OutStreamer->emitRawText(OS2.str());
828 }
829 
830 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
831                                  const NVPTXSubtarget &STI) {
832   O << "//\n";
833   O << "// Generated by LLVM NVPTX Back-End\n";
834   O << "//\n";
835   O << "\n";
836 
837   unsigned PTXVersion = STI.getPTXVersion();
838   O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
839 
840   O << ".target ";
841   O << STI.getTargetName();
842 
843   const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
844   if (NTM.getDrvInterface() == NVPTX::NVCL)
845     O << ", texmode_independent";
846 
847   bool HasFullDebugInfo = false;
848   for (DICompileUnit *CU : M.debug_compile_units()) {
849     switch(CU->getEmissionKind()) {
850     case DICompileUnit::NoDebug:
851     case DICompileUnit::DebugDirectivesOnly:
852       break;
853     case DICompileUnit::LineTablesOnly:
854     case DICompileUnit::FullDebug:
855       HasFullDebugInfo = true;
856       break;
857     }
858     if (HasFullDebugInfo)
859       break;
860   }
861   if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
862     O << ", debug";
863 
864   O << "\n";
865 
866   O << ".address_size ";
867   if (NTM.is64Bit())
868     O << "64";
869   else
870     O << "32";
871   O << "\n";
872 
873   O << "\n";
874 }
875 
876 bool NVPTXAsmPrinter::doFinalization(Module &M) {
877   bool HasDebugInfo = MMI && MMI->hasDebugInfo();
878 
879   // If we did not emit any functions, then the global declarations have not
880   // yet been emitted.
881   if (!GlobalsEmitted) {
882     emitGlobals(M);
883     GlobalsEmitted = true;
884   }
885 
886   // call doFinalization
887   bool ret = AsmPrinter::doFinalization(M);
888 
889   clearAnnotationCache(&M);
890 
891   // Close the last emitted section
892   if (HasDebugInfo) {
893     static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
894         ->closeLastSection();
895     // Emit empty .debug_loc section for better support of the empty files.
896     OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
897   }
898 
899   // Output last DWARF .file directives, if any.
900   static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
901       ->outputDwarfFileDirectives();
902 
903   return ret;
904 
905   //bool Result = AsmPrinter::doFinalization(M);
906   // Instead of calling the parents doFinalization, we may
907   // clone parents doFinalization and customize here.
908   // Currently, we if NVISA out the EmitGlobals() in
909   // parent's doFinalization, which is too intrusive.
910   //
911   // Same for the doInitialization.
912   //return Result;
913 }
914 
915 // This function emits appropriate linkage directives for
916 // functions and global variables.
917 //
918 // extern function declaration            -> .extern
919 // extern function definition             -> .visible
920 // external global variable with init     -> .visible
921 // external without init                  -> .extern
922 // appending                              -> not allowed, assert.
923 // for any linkage other than
924 // internal, private, linker_private,
925 // linker_private_weak, linker_private_weak_def_auto,
926 // we emit                                -> .weak.
927 
928 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
929                                            raw_ostream &O) {
930   if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
931     if (V->hasExternalLinkage()) {
932       if (isa<GlobalVariable>(V)) {
933         const GlobalVariable *GVar = cast<GlobalVariable>(V);
934         if (GVar) {
935           if (GVar->hasInitializer())
936             O << ".visible ";
937           else
938             O << ".extern ";
939         }
940       } else if (V->isDeclaration())
941         O << ".extern ";
942       else
943         O << ".visible ";
944     } else if (V->hasAppendingLinkage()) {
945       std::string msg;
946       msg.append("Error: ");
947       msg.append("Symbol ");
948       if (V->hasName())
949         msg.append(std::string(V->getName()));
950       msg.append("has unsupported appending linkage type");
951       llvm_unreachable(msg.c_str());
952     } else if (!V->hasInternalLinkage() &&
953                !V->hasPrivateLinkage()) {
954       O << ".weak ";
955     }
956   }
957 }
958 
959 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
960                                          raw_ostream &O,
961                                          bool processDemoted) {
962   // Skip meta data
963   if (GVar->hasSection()) {
964     if (GVar->getSection() == "llvm.metadata")
965       return;
966   }
967 
968   // Skip LLVM intrinsic global variables
969   if (GVar->getName().startswith("llvm.") ||
970       GVar->getName().startswith("nvvm."))
971     return;
972 
973   const DataLayout &DL = getDataLayout();
974 
975   // GlobalVariables are always constant pointers themselves.
976   PointerType *PTy = GVar->getType();
977   Type *ETy = GVar->getValueType();
978 
979   if (GVar->hasExternalLinkage()) {
980     if (GVar->hasInitializer())
981       O << ".visible ";
982     else
983       O << ".extern ";
984   } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
985              GVar->hasAvailableExternallyLinkage() ||
986              GVar->hasCommonLinkage()) {
987     O << ".weak ";
988   }
989 
990   if (isTexture(*GVar)) {
991     O << ".global .texref " << getTextureName(*GVar) << ";\n";
992     return;
993   }
994 
995   if (isSurface(*GVar)) {
996     O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
997     return;
998   }
999 
1000   if (GVar->isDeclaration()) {
1001     // (extern) declarations, no definition or initializer
1002     // Currently the only known declaration is for an automatic __local
1003     // (.shared) promoted to global.
1004     emitPTXGlobalVariable(GVar, O);
1005     O << ";\n";
1006     return;
1007   }
1008 
1009   if (isSampler(*GVar)) {
1010     O << ".global .samplerref " << getSamplerName(*GVar);
1011 
1012     const Constant *Initializer = nullptr;
1013     if (GVar->hasInitializer())
1014       Initializer = GVar->getInitializer();
1015     const ConstantInt *CI = nullptr;
1016     if (Initializer)
1017       CI = dyn_cast<ConstantInt>(Initializer);
1018     if (CI) {
1019       unsigned sample = CI->getZExtValue();
1020 
1021       O << " = { ";
1022 
1023       for (int i = 0,
1024                addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1025            i < 3; i++) {
1026         O << "addr_mode_" << i << " = ";
1027         switch (addr) {
1028         case 0:
1029           O << "wrap";
1030           break;
1031         case 1:
1032           O << "clamp_to_border";
1033           break;
1034         case 2:
1035           O << "clamp_to_edge";
1036           break;
1037         case 3:
1038           O << "wrap";
1039           break;
1040         case 4:
1041           O << "mirror";
1042           break;
1043         }
1044         O << ", ";
1045       }
1046       O << "filter_mode = ";
1047       switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1048       case 0:
1049         O << "nearest";
1050         break;
1051       case 1:
1052         O << "linear";
1053         break;
1054       case 2:
1055         llvm_unreachable("Anisotropic filtering is not supported");
1056       default:
1057         O << "nearest";
1058         break;
1059       }
1060       if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1061         O << ", force_unnormalized_coords = 1";
1062       }
1063       O << " }";
1064     }
1065 
1066     O << ";\n";
1067     return;
1068   }
1069 
1070   if (GVar->hasPrivateLinkage()) {
1071     if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1072       return;
1073 
1074     // FIXME - need better way (e.g. Metadata) to avoid generating this global
1075     if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1076       return;
1077     if (GVar->use_empty())
1078       return;
1079   }
1080 
1081   const Function *demotedFunc = nullptr;
1082   if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1083     O << "// " << GVar->getName() << " has been demoted\n";
1084     if (localDecls.find(demotedFunc) != localDecls.end())
1085       localDecls[demotedFunc].push_back(GVar);
1086     else {
1087       std::vector<const GlobalVariable *> temp;
1088       temp.push_back(GVar);
1089       localDecls[demotedFunc] = temp;
1090     }
1091     return;
1092   }
1093 
1094   O << ".";
1095   emitPTXAddressSpace(PTy->getAddressSpace(), O);
1096 
1097   if (isManaged(*GVar)) {
1098     O << " .attribute(.managed)";
1099   }
1100 
1101   if (MaybeAlign A = GVar->getAlign())
1102     O << " .align " << A->value();
1103   else
1104     O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1105 
1106   if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1107       (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1108     O << " .";
1109     // Special case: ABI requires that we use .u8 for predicates
1110     if (ETy->isIntegerTy(1))
1111       O << "u8";
1112     else
1113       O << getPTXFundamentalTypeStr(ETy, false);
1114     O << " ";
1115     getSymbol(GVar)->print(O, MAI);
1116 
1117     // Ptx allows variable initilization only for constant and global state
1118     // spaces.
1119     if (GVar->hasInitializer()) {
1120       if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1121           (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1122         const Constant *Initializer = GVar->getInitializer();
1123         // 'undef' is treated as there is no value specified.
1124         if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1125           O << " = ";
1126           printScalarConstant(Initializer, O);
1127         }
1128       } else {
1129         // The frontend adds zero-initializer to device and constant variables
1130         // that don't have an initial value, and UndefValue to shared
1131         // variables, so skip warning for this case.
1132         if (!GVar->getInitializer()->isNullValue() &&
1133             !isa<UndefValue>(GVar->getInitializer())) {
1134           report_fatal_error("initial value of '" + GVar->getName() +
1135                              "' is not allowed in addrspace(" +
1136                              Twine(PTy->getAddressSpace()) + ")");
1137         }
1138       }
1139     }
1140   } else {
1141     unsigned int ElementSize = 0;
1142 
1143     // Although PTX has direct support for struct type and array type and
1144     // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1145     // targets that support these high level field accesses. Structs, arrays
1146     // and vectors are lowered into arrays of bytes.
1147     switch (ETy->getTypeID()) {
1148     case Type::IntegerTyID: // Integers larger than 64 bits
1149     case Type::StructTyID:
1150     case Type::ArrayTyID:
1151     case Type::FixedVectorTyID:
1152       ElementSize = DL.getTypeStoreSize(ETy);
1153       // Ptx allows variable initilization only for constant and
1154       // global state spaces.
1155       if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1156            (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1157           GVar->hasInitializer()) {
1158         const Constant *Initializer = GVar->getInitializer();
1159         if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1160           AggBuffer aggBuffer(ElementSize, O, *this);
1161           bufferAggregateConstant(Initializer, &aggBuffer);
1162           if (aggBuffer.numSymbols) {
1163             if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
1164               O << " .u64 ";
1165               getSymbol(GVar)->print(O, MAI);
1166               O << "[";
1167               O << ElementSize / 8;
1168             } else {
1169               O << " .u32 ";
1170               getSymbol(GVar)->print(O, MAI);
1171               O << "[";
1172               O << ElementSize / 4;
1173             }
1174             O << "]";
1175           } else {
1176             O << " .b8 ";
1177             getSymbol(GVar)->print(O, MAI);
1178             O << "[";
1179             O << ElementSize;
1180             O << "]";
1181           }
1182           O << " = {";
1183           aggBuffer.print();
1184           O << "}";
1185         } else {
1186           O << " .b8 ";
1187           getSymbol(GVar)->print(O, MAI);
1188           if (ElementSize) {
1189             O << "[";
1190             O << ElementSize;
1191             O << "]";
1192           }
1193         }
1194       } else {
1195         O << " .b8 ";
1196         getSymbol(GVar)->print(O, MAI);
1197         if (ElementSize) {
1198           O << "[";
1199           O << ElementSize;
1200           O << "]";
1201         }
1202       }
1203       break;
1204     default:
1205       llvm_unreachable("type not supported yet");
1206     }
1207   }
1208   O << ";\n";
1209 }
1210 
1211 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1212   if (localDecls.find(f) == localDecls.end())
1213     return;
1214 
1215   std::vector<const GlobalVariable *> &gvars = localDecls[f];
1216 
1217   for (const GlobalVariable *GV : gvars) {
1218     O << "\t// demoted variable\n\t";
1219     printModuleLevelGV(GV, O, true);
1220   }
1221 }
1222 
1223 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1224                                           raw_ostream &O) const {
1225   switch (AddressSpace) {
1226   case ADDRESS_SPACE_LOCAL:
1227     O << "local";
1228     break;
1229   case ADDRESS_SPACE_GLOBAL:
1230     O << "global";
1231     break;
1232   case ADDRESS_SPACE_CONST:
1233     O << "const";
1234     break;
1235   case ADDRESS_SPACE_SHARED:
1236     O << "shared";
1237     break;
1238   default:
1239     report_fatal_error("Bad address space found while emitting PTX: " +
1240                        llvm::Twine(AddressSpace));
1241     break;
1242   }
1243 }
1244 
1245 std::string
1246 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1247   switch (Ty->getTypeID()) {
1248   case Type::IntegerTyID: {
1249     unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1250     if (NumBits == 1)
1251       return "pred";
1252     else if (NumBits <= 64) {
1253       std::string name = "u";
1254       return name + utostr(NumBits);
1255     } else {
1256       llvm_unreachable("Integer too large");
1257       break;
1258     }
1259     break;
1260   }
1261   case Type::HalfTyID:
1262     // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
1263     return "b16";
1264   case Type::FloatTyID:
1265     return "f32";
1266   case Type::DoubleTyID:
1267     return "f64";
1268   case Type::PointerTyID:
1269     if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
1270       if (useB4PTR)
1271         return "b64";
1272       else
1273         return "u64";
1274     else if (useB4PTR)
1275       return "b32";
1276     else
1277       return "u32";
1278   default:
1279     break;
1280   }
1281   llvm_unreachable("unexpected type");
1282 }
1283 
1284 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1285                                             raw_ostream &O) {
1286   const DataLayout &DL = getDataLayout();
1287 
1288   // GlobalVariables are always constant pointers themselves.
1289   Type *ETy = GVar->getValueType();
1290 
1291   O << ".";
1292   emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1293   if (MaybeAlign A = GVar->getAlign())
1294     O << " .align " << A->value();
1295   else
1296     O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1297 
1298   // Special case for i128
1299   if (ETy->isIntegerTy(128)) {
1300     O << " .b8 ";
1301     getSymbol(GVar)->print(O, MAI);
1302     O << "[16]";
1303     return;
1304   }
1305 
1306   if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1307     O << " .";
1308     O << getPTXFundamentalTypeStr(ETy);
1309     O << " ";
1310     getSymbol(GVar)->print(O, MAI);
1311     return;
1312   }
1313 
1314   int64_t ElementSize = 0;
1315 
1316   // Although PTX has direct support for struct type and array type and LLVM IR
1317   // is very similar to PTX, the LLVM CodeGen does not support for targets that
1318   // support these high level field accesses. Structs and arrays are lowered
1319   // into arrays of bytes.
1320   switch (ETy->getTypeID()) {
1321   case Type::StructTyID:
1322   case Type::ArrayTyID:
1323   case Type::FixedVectorTyID:
1324     ElementSize = DL.getTypeStoreSize(ETy);
1325     O << " .b8 ";
1326     getSymbol(GVar)->print(O, MAI);
1327     O << "[";
1328     if (ElementSize) {
1329       O << ElementSize;
1330     }
1331     O << "]";
1332     break;
1333   default:
1334     llvm_unreachable("type not supported yet");
1335   }
1336 }
1337 
1338 static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
1339   if (Ty->isSingleValueType())
1340     return DL.getPrefTypeAlignment(Ty);
1341 
1342   auto *ATy = dyn_cast<ArrayType>(Ty);
1343   if (ATy)
1344     return getOpenCLAlignment(DL, ATy->getElementType());
1345 
1346   auto *STy = dyn_cast<StructType>(Ty);
1347   if (STy) {
1348     unsigned int alignStruct = 1;
1349     // Go through each element of the struct and find the
1350     // largest alignment.
1351     for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
1352       Type *ETy = STy->getElementType(i);
1353       unsigned int align = getOpenCLAlignment(DL, ETy);
1354       if (align > alignStruct)
1355         alignStruct = align;
1356     }
1357     return alignStruct;
1358   }
1359 
1360   auto *FTy = dyn_cast<FunctionType>(Ty);
1361   if (FTy)
1362     return DL.getPointerPrefAlignment().value();
1363   return DL.getPrefTypeAlignment(Ty);
1364 }
1365 
1366 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1367                                      int paramIndex, raw_ostream &O) {
1368   getSymbol(I->getParent())->print(O, MAI);
1369   O << "_param_" << paramIndex;
1370 }
1371 
1372 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1373   const DataLayout &DL = getDataLayout();
1374   const AttributeList &PAL = F->getAttributes();
1375   const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1376   const TargetLowering *TLI = STI.getTargetLowering();
1377   Function::const_arg_iterator I, E;
1378   unsigned paramIndex = 0;
1379   bool first = true;
1380   bool isKernelFunc = isKernelFunction(*F);
1381   bool isABI = (STI.getSmVersion() >= 20);
1382   bool hasImageHandles = STI.hasImageHandles();
1383   MVT thePointerTy = TLI->getPointerTy(DL);
1384 
1385   if (F->arg_empty()) {
1386     O << "()\n";
1387     return;
1388   }
1389 
1390   O << "(\n";
1391 
1392   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1393     Type *Ty = I->getType();
1394 
1395     if (!first)
1396       O << ",\n";
1397 
1398     first = false;
1399 
1400     // Handle image/sampler parameters
1401     if (isKernelFunction(*F)) {
1402       if (isSampler(*I) || isImage(*I)) {
1403         if (isImage(*I)) {
1404           std::string sname = std::string(I->getName());
1405           if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1406             if (hasImageHandles)
1407               O << "\t.param .u64 .ptr .surfref ";
1408             else
1409               O << "\t.param .surfref ";
1410             CurrentFnSym->print(O, MAI);
1411             O << "_param_" << paramIndex;
1412           }
1413           else { // Default image is read_only
1414             if (hasImageHandles)
1415               O << "\t.param .u64 .ptr .texref ";
1416             else
1417               O << "\t.param .texref ";
1418             CurrentFnSym->print(O, MAI);
1419             O << "_param_" << paramIndex;
1420           }
1421         } else {
1422           if (hasImageHandles)
1423             O << "\t.param .u64 .ptr .samplerref ";
1424           else
1425             O << "\t.param .samplerref ";
1426           CurrentFnSym->print(O, MAI);
1427           O << "_param_" << paramIndex;
1428         }
1429         continue;
1430       }
1431     }
1432 
1433     if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
1434       if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1435         // Just print .param .align <a> .b8 .param[size];
1436         // <a> = PAL.getparamalignment
1437         // size = typeallocsize of element type
1438         const Align align = DL.getValueOrABITypeAlignment(
1439             PAL.getParamAlignment(paramIndex), Ty);
1440 
1441         unsigned sz = DL.getTypeAllocSize(Ty);
1442         O << "\t.param .align " << align.value() << " .b8 ";
1443         printParamName(I, paramIndex, O);
1444         O << "[" << sz << "]";
1445 
1446         continue;
1447       }
1448       // Just a scalar
1449       auto *PTy = dyn_cast<PointerType>(Ty);
1450       if (isKernelFunc) {
1451         if (PTy) {
1452           // Special handling for pointer arguments to kernel
1453           O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1454 
1455           if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1456               NVPTX::CUDA) {
1457             Type *ETy = PTy->getPointerElementType();
1458             int addrSpace = PTy->getAddressSpace();
1459             switch (addrSpace) {
1460             default:
1461               O << ".ptr ";
1462               break;
1463             case ADDRESS_SPACE_CONST:
1464               O << ".ptr .const ";
1465               break;
1466             case ADDRESS_SPACE_SHARED:
1467               O << ".ptr .shared ";
1468               break;
1469             case ADDRESS_SPACE_GLOBAL:
1470               O << ".ptr .global ";
1471               break;
1472             }
1473             O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
1474           }
1475           printParamName(I, paramIndex, O);
1476           continue;
1477         }
1478 
1479         // non-pointer scalar to kernel func
1480         O << "\t.param .";
1481         // Special case: predicate operands become .u8 types
1482         if (Ty->isIntegerTy(1))
1483           O << "u8";
1484         else
1485           O << getPTXFundamentalTypeStr(Ty);
1486         O << " ";
1487         printParamName(I, paramIndex, O);
1488         continue;
1489       }
1490       // Non-kernel function, just print .param .b<size> for ABI
1491       // and .reg .b<size> for non-ABI
1492       unsigned sz = 0;
1493       if (isa<IntegerType>(Ty)) {
1494         sz = cast<IntegerType>(Ty)->getBitWidth();
1495         if (sz < 32)
1496           sz = 32;
1497       } else if (isa<PointerType>(Ty))
1498         sz = thePointerTy.getSizeInBits();
1499       else if (Ty->isHalfTy())
1500         // PTX ABI requires all scalar parameters to be at least 32
1501         // bits in size.  fp16 normally uses .b16 as its storage type
1502         // in PTX, so its size must be adjusted here, too.
1503         sz = 32;
1504       else
1505         sz = Ty->getPrimitiveSizeInBits();
1506       if (isABI)
1507         O << "\t.param .b" << sz << " ";
1508       else
1509         O << "\t.reg .b" << sz << " ";
1510       printParamName(I, paramIndex, O);
1511       continue;
1512     }
1513 
1514     // param has byVal attribute. So should be a pointer
1515     auto *PTy = dyn_cast<PointerType>(Ty);
1516     assert(PTy && "Param with byval attribute should be a pointer type");
1517     Type *ETy = PTy->getPointerElementType();
1518 
1519     if (isABI || isKernelFunc) {
1520       // Just print .param .align <a> .b8 .param[size];
1521       // <a> = PAL.getparamalignment
1522       // size = typeallocsize of element type
1523       Align align =
1524           DL.getValueOrABITypeAlignment(PAL.getParamAlignment(paramIndex), ETy);
1525       // Work around a bug in ptxas. When PTX code takes address of
1526       // byval parameter with alignment < 4, ptxas generates code to
1527       // spill argument into memory. Alas on sm_50+ ptxas generates
1528       // SASS code that fails with misaligned access. To work around
1529       // the problem, make sure that we align byval parameters by at
1530       // least 4. Matching change must be made in LowerCall() where we
1531       // prepare parameters for the call.
1532       //
1533       // TODO: this will need to be undone when we get to support multi-TU
1534       // device-side compilation as it breaks ABI compatibility with nvcc.
1535       // Hopefully ptxas bug is fixed by then.
1536       if (!isKernelFunc && align < Align(4))
1537         align = Align(4);
1538       unsigned sz = DL.getTypeAllocSize(ETy);
1539       O << "\t.param .align " << align.value() << " .b8 ";
1540       printParamName(I, paramIndex, O);
1541       O << "[" << sz << "]";
1542       continue;
1543     } else {
1544       // Split the ETy into constituent parts and
1545       // print .param .b<size> <name> for each part.
1546       // Further, if a part is vector, print the above for
1547       // each vector element.
1548       SmallVector<EVT, 16> vtparts;
1549       ComputeValueVTs(*TLI, DL, ETy, vtparts);
1550       for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1551         unsigned elems = 1;
1552         EVT elemtype = vtparts[i];
1553         if (vtparts[i].isVector()) {
1554           elems = vtparts[i].getVectorNumElements();
1555           elemtype = vtparts[i].getVectorElementType();
1556         }
1557 
1558         for (unsigned j = 0, je = elems; j != je; ++j) {
1559           unsigned sz = elemtype.getSizeInBits();
1560           if (elemtype.isInteger() && (sz < 32))
1561             sz = 32;
1562           O << "\t.reg .b" << sz << " ";
1563           printParamName(I, paramIndex, O);
1564           if (j < je - 1)
1565             O << ",\n";
1566           ++paramIndex;
1567         }
1568         if (i < e - 1)
1569           O << ",\n";
1570       }
1571       --paramIndex;
1572       continue;
1573     }
1574   }
1575 
1576   O << "\n)\n";
1577 }
1578 
1579 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1580                                             raw_ostream &O) {
1581   const Function &F = MF.getFunction();
1582   emitFunctionParamList(&F, O);
1583 }
1584 
1585 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1586     const MachineFunction &MF) {
1587   SmallString<128> Str;
1588   raw_svector_ostream O(Str);
1589 
1590   // Map the global virtual register number to a register class specific
1591   // virtual register number starting from 1 with that class.
1592   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1593   //unsigned numRegClasses = TRI->getNumRegClasses();
1594 
1595   // Emit the Fake Stack Object
1596   const MachineFrameInfo &MFI = MF.getFrameInfo();
1597   int NumBytes = (int) MFI.getStackSize();
1598   if (NumBytes) {
1599     O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1600       << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1601     if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1602       O << "\t.reg .b64 \t%SP;\n";
1603       O << "\t.reg .b64 \t%SPL;\n";
1604     } else {
1605       O << "\t.reg .b32 \t%SP;\n";
1606       O << "\t.reg .b32 \t%SPL;\n";
1607     }
1608   }
1609 
1610   // Go through all virtual registers to establish the mapping between the
1611   // global virtual
1612   // register number and the per class virtual register number.
1613   // We use the per class virtual register number in the ptx output.
1614   unsigned int numVRs = MRI->getNumVirtRegs();
1615   for (unsigned i = 0; i < numVRs; i++) {
1616     Register vr = Register::index2VirtReg(i);
1617     const TargetRegisterClass *RC = MRI->getRegClass(vr);
1618     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1619     int n = regmap.size();
1620     regmap.insert(std::make_pair(vr, n + 1));
1621   }
1622 
1623   // Emit register declarations
1624   // @TODO: Extract out the real register usage
1625   // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1626   // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1627   // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1628   // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1629   // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1630   // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1631   // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1632 
1633   // Emit declaration of the virtual registers or 'physical' registers for
1634   // each register class
1635   for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1636     const TargetRegisterClass *RC = TRI->getRegClass(i);
1637     DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1638     std::string rcname = getNVPTXRegClassName(RC);
1639     std::string rcStr = getNVPTXRegClassStr(RC);
1640     int n = regmap.size();
1641 
1642     // Only declare those registers that may be used.
1643     if (n) {
1644        O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1645          << ">;\n";
1646     }
1647   }
1648 
1649   OutStreamer->emitRawText(O.str());
1650 }
1651 
1652 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1653   APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1654   bool ignored;
1655   unsigned int numHex;
1656   const char *lead;
1657 
1658   if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1659     numHex = 8;
1660     lead = "0f";
1661     APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &ignored);
1662   } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1663     numHex = 16;
1664     lead = "0d";
1665     APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &ignored);
1666   } else
1667     llvm_unreachable("unsupported fp type");
1668 
1669   APInt API = APF.bitcastToAPInt();
1670   O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1671 }
1672 
1673 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1674   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1675     O << CI->getValue();
1676     return;
1677   }
1678   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1679     printFPConstant(CFP, O);
1680     return;
1681   }
1682   if (isa<ConstantPointerNull>(CPV)) {
1683     O << "0";
1684     return;
1685   }
1686   if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1687     bool IsNonGenericPointer = false;
1688     if (GVar->getType()->getAddressSpace() != 0) {
1689       IsNonGenericPointer = true;
1690     }
1691     if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1692       O << "generic(";
1693       getSymbol(GVar)->print(O, MAI);
1694       O << ")";
1695     } else {
1696       getSymbol(GVar)->print(O, MAI);
1697     }
1698     return;
1699   }
1700   if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1701     const Value *v = Cexpr->stripPointerCasts();
1702     PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
1703     bool IsNonGenericPointer = false;
1704     if (PTy && PTy->getAddressSpace() != 0) {
1705       IsNonGenericPointer = true;
1706     }
1707     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1708       if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
1709         O << "generic(";
1710         getSymbol(GVar)->print(O, MAI);
1711         O << ")";
1712       } else {
1713         getSymbol(GVar)->print(O, MAI);
1714       }
1715       return;
1716     } else {
1717       lowerConstant(CPV)->print(O, MAI);
1718       return;
1719     }
1720   }
1721   llvm_unreachable("Not scalar type found in printScalarConstant()");
1722 }
1723 
1724 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1725                                    AggBuffer *AggBuffer) {
1726   const DataLayout &DL = getDataLayout();
1727   int AllocSize = DL.getTypeAllocSize(CPV->getType());
1728   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1729     // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1730     // only the space allocated by CPV.
1731     AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1732     return;
1733   }
1734 
1735   // Helper for filling AggBuffer with APInts.
1736   auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1737     size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1738     SmallVector<unsigned char, 16> Buf(NumBytes);
1739     for (unsigned I = 0; I < NumBytes; ++I) {
1740       Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1741     }
1742     AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1743   };
1744 
1745   switch (CPV->getType()->getTypeID()) {
1746   case Type::IntegerTyID:
1747     if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
1748       AddIntToBuffer(CI->getValue());
1749       break;
1750     }
1751     if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1752       if (const auto *CI =
1753               dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1754         AddIntToBuffer(CI->getValue());
1755         break;
1756       }
1757       if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1758         Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1759         AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1760         AggBuffer->addZeros(AllocSize);
1761         break;
1762       }
1763     }
1764     llvm_unreachable("unsupported integer const type");
1765     break;
1766 
1767   case Type::HalfTyID:
1768   case Type::FloatTyID:
1769   case Type::DoubleTyID:
1770     AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1771     break;
1772 
1773   case Type::PointerTyID: {
1774     if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1775       AggBuffer->addSymbol(GVar, GVar);
1776     } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1777       const Value *v = Cexpr->stripPointerCasts();
1778       AggBuffer->addSymbol(v, Cexpr);
1779     }
1780     AggBuffer->addZeros(AllocSize);
1781     break;
1782   }
1783 
1784   case Type::ArrayTyID:
1785   case Type::FixedVectorTyID:
1786   case Type::StructTyID: {
1787     if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1788       bufferAggregateConstant(CPV, AggBuffer);
1789       if (Bytes > AllocSize)
1790         AggBuffer->addZeros(Bytes - AllocSize);
1791     } else if (isa<ConstantAggregateZero>(CPV))
1792       AggBuffer->addZeros(Bytes);
1793     else
1794       llvm_unreachable("Unexpected Constant type");
1795     break;
1796   }
1797 
1798   default:
1799     llvm_unreachable("unsupported type");
1800   }
1801 }
1802 
1803 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1804                                               AggBuffer *aggBuffer) {
1805   const DataLayout &DL = getDataLayout();
1806   int Bytes;
1807 
1808   // Integers of arbitrary width
1809   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1810     APInt Val = CI->getValue();
1811     for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1812       uint8_t Byte = Val.getLoBits(8).getZExtValue();
1813       aggBuffer->addBytes(&Byte, 1, 1);
1814       Val.lshrInPlace(8);
1815     }
1816     return;
1817   }
1818 
1819   // Old constants
1820   if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1821     if (CPV->getNumOperands())
1822       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1823         bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1824     return;
1825   }
1826 
1827   if (const ConstantDataSequential *CDS =
1828           dyn_cast<ConstantDataSequential>(CPV)) {
1829     if (CDS->getNumElements())
1830       for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1831         bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1832                      aggBuffer);
1833     return;
1834   }
1835 
1836   if (isa<ConstantStruct>(CPV)) {
1837     if (CPV->getNumOperands()) {
1838       StructType *ST = cast<StructType>(CPV->getType());
1839       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1840         if (i == (e - 1))
1841           Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1842                   DL.getTypeAllocSize(ST) -
1843                   DL.getStructLayout(ST)->getElementOffset(i);
1844         else
1845           Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1846                   DL.getStructLayout(ST)->getElementOffset(i);
1847         bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1848       }
1849     }
1850     return;
1851   }
1852   llvm_unreachable("unsupported constant type in printAggregateConstant()");
1853 }
1854 
1855 /// lowerConstantForGV - Return an MCExpr for the given Constant.  This is mostly
1856 /// a copy from AsmPrinter::lowerConstant, except customized to only handle
1857 /// expressions that are representable in PTX and create
1858 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1859 const MCExpr *
1860 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1861   MCContext &Ctx = OutContext;
1862 
1863   if (CV->isNullValue() || isa<UndefValue>(CV))
1864     return MCConstantExpr::create(0, Ctx);
1865 
1866   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1867     return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1868 
1869   if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1870     const MCSymbolRefExpr *Expr =
1871       MCSymbolRefExpr::create(getSymbol(GV), Ctx);
1872     if (ProcessingGeneric) {
1873       return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1874     } else {
1875       return Expr;
1876     }
1877   }
1878 
1879   const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1880   if (!CE) {
1881     llvm_unreachable("Unknown constant value to lower!");
1882   }
1883 
1884   switch (CE->getOpcode()) {
1885   default: {
1886     // If the code isn't optimized, there may be outstanding folding
1887     // opportunities. Attempt to fold the expression using DataLayout as a
1888     // last resort before giving up.
1889     Constant *C = ConstantFoldConstant(CE, getDataLayout());
1890     if (C != CE)
1891       return lowerConstantForGV(C, ProcessingGeneric);
1892 
1893     // Otherwise report the problem to the user.
1894     std::string S;
1895     raw_string_ostream OS(S);
1896     OS << "Unsupported expression in static initializer: ";
1897     CE->printAsOperand(OS, /*PrintType=*/false,
1898                    !MF ? nullptr : MF->getFunction().getParent());
1899     report_fatal_error(Twine(OS.str()));
1900   }
1901 
1902   case Instruction::AddrSpaceCast: {
1903     // Strip the addrspacecast and pass along the operand
1904     PointerType *DstTy = cast<PointerType>(CE->getType());
1905     if (DstTy->getAddressSpace() == 0) {
1906       return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1907     }
1908     std::string S;
1909     raw_string_ostream OS(S);
1910     OS << "Unsupported expression in static initializer: ";
1911     CE->printAsOperand(OS, /*PrintType=*/ false,
1912                        !MF ? nullptr : MF->getFunction().getParent());
1913     report_fatal_error(Twine(OS.str()));
1914   }
1915 
1916   case Instruction::GetElementPtr: {
1917     const DataLayout &DL = getDataLayout();
1918 
1919     // Generate a symbolic expression for the byte address
1920     APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
1921     cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
1922 
1923     const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
1924                                             ProcessingGeneric);
1925     if (!OffsetAI)
1926       return Base;
1927 
1928     int64_t Offset = OffsetAI.getSExtValue();
1929     return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
1930                                    Ctx);
1931   }
1932 
1933   case Instruction::Trunc:
1934     // We emit the value and depend on the assembler to truncate the generated
1935     // expression properly.  This is important for differences between
1936     // blockaddress labels.  Since the two labels are in the same function, it
1937     // is reasonable to treat their delta as a 32-bit value.
1938     LLVM_FALLTHROUGH;
1939   case Instruction::BitCast:
1940     return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1941 
1942   case Instruction::IntToPtr: {
1943     const DataLayout &DL = getDataLayout();
1944 
1945     // Handle casts to pointers by changing them into casts to the appropriate
1946     // integer type.  This promotes constant folding and simplifies this code.
1947     Constant *Op = CE->getOperand(0);
1948     Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
1949                                       false/*ZExt*/);
1950     return lowerConstantForGV(Op, ProcessingGeneric);
1951   }
1952 
1953   case Instruction::PtrToInt: {
1954     const DataLayout &DL = getDataLayout();
1955 
1956     // Support only foldable casts to/from pointers that can be eliminated by
1957     // changing the pointer to the appropriately sized integer type.
1958     Constant *Op = CE->getOperand(0);
1959     Type *Ty = CE->getType();
1960 
1961     const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
1962 
1963     // We can emit the pointer value into this slot if the slot is an
1964     // integer slot equal to the size of the pointer.
1965     if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
1966       return OpExpr;
1967 
1968     // Otherwise the pointer is smaller than the resultant integer, mask off
1969     // the high bits so we are sure to get a proper truncation if the input is
1970     // a constant expr.
1971     unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
1972     const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
1973     return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
1974   }
1975 
1976   // The MC library also has a right-shift operator, but it isn't consistently
1977   // signed or unsigned between different targets.
1978   case Instruction::Add: {
1979     const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1980     const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
1981     switch (CE->getOpcode()) {
1982     default: llvm_unreachable("Unknown binary operator constant cast expr");
1983     case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
1984     }
1985   }
1986   }
1987 }
1988 
1989 // Copy of MCExpr::print customized for NVPTX
1990 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
1991   switch (Expr.getKind()) {
1992   case MCExpr::Target:
1993     return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
1994   case MCExpr::Constant:
1995     OS << cast<MCConstantExpr>(Expr).getValue();
1996     return;
1997 
1998   case MCExpr::SymbolRef: {
1999     const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2000     const MCSymbol &Sym = SRE.getSymbol();
2001     Sym.print(OS, MAI);
2002     return;
2003   }
2004 
2005   case MCExpr::Unary: {
2006     const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2007     switch (UE.getOpcode()) {
2008     case MCUnaryExpr::LNot:  OS << '!'; break;
2009     case MCUnaryExpr::Minus: OS << '-'; break;
2010     case MCUnaryExpr::Not:   OS << '~'; break;
2011     case MCUnaryExpr::Plus:  OS << '+'; break;
2012     }
2013     printMCExpr(*UE.getSubExpr(), OS);
2014     return;
2015   }
2016 
2017   case MCExpr::Binary: {
2018     const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2019 
2020     // Only print parens around the LHS if it is non-trivial.
2021     if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2022         isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2023       printMCExpr(*BE.getLHS(), OS);
2024     } else {
2025       OS << '(';
2026       printMCExpr(*BE.getLHS(), OS);
2027       OS<< ')';
2028     }
2029 
2030     switch (BE.getOpcode()) {
2031     case MCBinaryExpr::Add:
2032       // Print "X-42" instead of "X+-42".
2033       if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2034         if (RHSC->getValue() < 0) {
2035           OS << RHSC->getValue();
2036           return;
2037         }
2038       }
2039 
2040       OS <<  '+';
2041       break;
2042     default: llvm_unreachable("Unhandled binary operator");
2043     }
2044 
2045     // Only print parens around the LHS if it is non-trivial.
2046     if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2047       printMCExpr(*BE.getRHS(), OS);
2048     } else {
2049       OS << '(';
2050       printMCExpr(*BE.getRHS(), OS);
2051       OS << ')';
2052     }
2053     return;
2054   }
2055   }
2056 
2057   llvm_unreachable("Invalid expression kind!");
2058 }
2059 
2060 /// PrintAsmOperand - Print out an operand for an inline asm expression.
2061 ///
2062 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2063                                       const char *ExtraCode, raw_ostream &O) {
2064   if (ExtraCode && ExtraCode[0]) {
2065     if (ExtraCode[1] != 0)
2066       return true; // Unknown modifier.
2067 
2068     switch (ExtraCode[0]) {
2069     default:
2070       // See if this is a generic print operand
2071       return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2072     case 'r':
2073       break;
2074     }
2075   }
2076 
2077   printOperand(MI, OpNo, O);
2078 
2079   return false;
2080 }
2081 
2082 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2083                                             unsigned OpNo,
2084                                             const char *ExtraCode,
2085                                             raw_ostream &O) {
2086   if (ExtraCode && ExtraCode[0])
2087     return true; // Unknown modifier
2088 
2089   O << '[';
2090   printMemOperand(MI, OpNo, O);
2091   O << ']';
2092 
2093   return false;
2094 }
2095 
2096 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2097                                    raw_ostream &O) {
2098   const MachineOperand &MO = MI->getOperand(opNum);
2099   switch (MO.getType()) {
2100   case MachineOperand::MO_Register:
2101     if (Register::isPhysicalRegister(MO.getReg())) {
2102       if (MO.getReg() == NVPTX::VRDepot)
2103         O << DEPOTNAME << getFunctionNumber();
2104       else
2105         O << NVPTXInstPrinter::getRegisterName(MO.getReg());
2106     } else {
2107       emitVirtualRegister(MO.getReg(), O);
2108     }
2109     break;
2110 
2111   case MachineOperand::MO_Immediate:
2112     O << MO.getImm();
2113     break;
2114 
2115   case MachineOperand::MO_FPImmediate:
2116     printFPConstant(MO.getFPImm(), O);
2117     break;
2118 
2119   case MachineOperand::MO_GlobalAddress:
2120     PrintSymbolOperand(MO, O);
2121     break;
2122 
2123   case MachineOperand::MO_MachineBasicBlock:
2124     MO.getMBB()->getSymbol()->print(O, MAI);
2125     break;
2126 
2127   default:
2128     llvm_unreachable("Operand type not supported.");
2129   }
2130 }
2131 
2132 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2133                                       raw_ostream &O, const char *Modifier) {
2134   printOperand(MI, opNum, O);
2135 
2136   if (Modifier && strcmp(Modifier, "add") == 0) {
2137     O << ", ";
2138     printOperand(MI, opNum + 1, O);
2139   } else {
2140     if (MI->getOperand(opNum + 1).isImm() &&
2141         MI->getOperand(opNum + 1).getImm() == 0)
2142       return; // don't print ',0' or '+0'
2143     O << "+";
2144     printOperand(MI, opNum + 1, O);
2145   }
2146 }
2147 
2148 // Force static initialization.
2149 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter() {
2150   RegisterAsmPrinter<NVPTXAsmPrinter> X(getTheNVPTXTarget32());
2151   RegisterAsmPrinter<NVPTXAsmPrinter> Y(getTheNVPTXTarget64());
2152 }
2153