1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a printer that converts from our internal representation 10 // of machine-dependent LLVM code to NVPTX assembly language. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "NVPTXAsmPrinter.h" 15 #include "MCTargetDesc/NVPTXBaseInfo.h" 16 #include "MCTargetDesc/NVPTXInstPrinter.h" 17 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 18 #include "MCTargetDesc/NVPTXTargetStreamer.h" 19 #include "NVPTX.h" 20 #include "NVPTXMCExpr.h" 21 #include "NVPTXMachineFunctionInfo.h" 22 #include "NVPTXRegisterInfo.h" 23 #include "NVPTXSubtarget.h" 24 #include "NVPTXTargetMachine.h" 25 #include "NVPTXUtilities.h" 26 #include "TargetInfo/NVPTXTargetInfo.h" 27 #include "cl_common_defines.h" 28 #include "llvm/ADT/APFloat.h" 29 #include "llvm/ADT/APInt.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/DenseSet.h" 32 #include "llvm/ADT/SmallString.h" 33 #include "llvm/ADT/SmallVector.h" 34 #include "llvm/ADT/StringExtras.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/ADT/Triple.h" 37 #include "llvm/ADT/Twine.h" 38 #include "llvm/Analysis/ConstantFolding.h" 39 #include "llvm/CodeGen/Analysis.h" 40 #include "llvm/CodeGen/MachineBasicBlock.h" 41 #include "llvm/CodeGen/MachineFrameInfo.h" 42 #include "llvm/CodeGen/MachineFunction.h" 43 #include "llvm/CodeGen/MachineInstr.h" 44 #include "llvm/CodeGen/MachineLoopInfo.h" 45 #include "llvm/CodeGen/MachineModuleInfo.h" 46 #include "llvm/CodeGen/MachineOperand.h" 47 #include "llvm/CodeGen/MachineRegisterInfo.h" 48 #include "llvm/CodeGen/TargetLowering.h" 49 #include "llvm/CodeGen/TargetRegisterInfo.h" 50 #include "llvm/CodeGen/ValueTypes.h" 51 #include "llvm/IR/Attributes.h" 52 #include "llvm/IR/BasicBlock.h" 53 #include "llvm/IR/Constant.h" 54 #include "llvm/IR/Constants.h" 55 #include "llvm/IR/DataLayout.h" 56 #include "llvm/IR/DebugInfo.h" 57 #include "llvm/IR/DebugInfoMetadata.h" 58 #include "llvm/IR/DebugLoc.h" 59 #include "llvm/IR/DerivedTypes.h" 60 #include "llvm/IR/Function.h" 61 #include "llvm/IR/GlobalValue.h" 62 #include "llvm/IR/GlobalVariable.h" 63 #include "llvm/IR/Instruction.h" 64 #include "llvm/IR/LLVMContext.h" 65 #include "llvm/IR/Module.h" 66 #include "llvm/IR/Operator.h" 67 #include "llvm/IR/Type.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/MC/MCExpr.h" 70 #include "llvm/MC/MCInst.h" 71 #include "llvm/MC/MCInstrDesc.h" 72 #include "llvm/MC/MCStreamer.h" 73 #include "llvm/MC/MCSymbol.h" 74 #include "llvm/Support/Casting.h" 75 #include "llvm/Support/CommandLine.h" 76 #include "llvm/Support/ErrorHandling.h" 77 #include "llvm/Support/MachineValueType.h" 78 #include "llvm/Support/Path.h" 79 #include "llvm/Support/TargetRegistry.h" 80 #include "llvm/Support/raw_ostream.h" 81 #include "llvm/Target/TargetLoweringObjectFile.h" 82 #include "llvm/Target/TargetMachine.h" 83 #include "llvm/Transforms/Utils/UnrollLoop.h" 84 #include <cassert> 85 #include <cstdint> 86 #include <cstring> 87 #include <new> 88 #include <string> 89 #include <utility> 90 #include <vector> 91 92 using namespace llvm; 93 94 #define DEPOTNAME "__local_depot" 95 96 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 97 /// depends. 98 static void 99 DiscoverDependentGlobals(const Value *V, 100 DenseSet<const GlobalVariable *> &Globals) { 101 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 102 Globals.insert(GV); 103 else { 104 if (const User *U = dyn_cast<User>(V)) { 105 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 106 DiscoverDependentGlobals(U->getOperand(i), Globals); 107 } 108 } 109 } 110 } 111 112 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 113 /// instances to be emitted, but only after any dependents have been added 114 /// first.s 115 static void 116 VisitGlobalVariableForEmission(const GlobalVariable *GV, 117 SmallVectorImpl<const GlobalVariable *> &Order, 118 DenseSet<const GlobalVariable *> &Visited, 119 DenseSet<const GlobalVariable *> &Visiting) { 120 // Have we already visited this one? 121 if (Visited.count(GV)) 122 return; 123 124 // Do we have a circular dependency? 125 if (!Visiting.insert(GV).second) 126 report_fatal_error("Circular dependency found in global variable set"); 127 128 // Make sure we visit all dependents first 129 DenseSet<const GlobalVariable *> Others; 130 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 131 DiscoverDependentGlobals(GV->getOperand(i), Others); 132 133 for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 134 E = Others.end(); 135 I != E; ++I) 136 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 137 138 // Now we can visit ourself 139 Order.push_back(GV); 140 Visited.insert(GV); 141 Visiting.erase(GV); 142 } 143 144 void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) { 145 MCInst Inst; 146 lowerToMCInst(MI, Inst); 147 EmitToStreamer(*OutStreamer, Inst); 148 } 149 150 // Handle symbol backtracking for targets that do not support image handles 151 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, 152 unsigned OpNo, MCOperand &MCOp) { 153 const MachineOperand &MO = MI->getOperand(OpNo); 154 const MCInstrDesc &MCID = MI->getDesc(); 155 156 if (MCID.TSFlags & NVPTXII::IsTexFlag) { 157 // This is a texture fetch, so operand 4 is a texref and operand 5 is 158 // a samplerref 159 if (OpNo == 4 && MO.isImm()) { 160 lowerImageHandleSymbol(MO.getImm(), MCOp); 161 return true; 162 } 163 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { 164 lowerImageHandleSymbol(MO.getImm(), MCOp); 165 return true; 166 } 167 168 return false; 169 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { 170 unsigned VecSize = 171 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); 172 173 // For a surface load of vector size N, the Nth operand will be the surfref 174 if (OpNo == VecSize && MO.isImm()) { 175 lowerImageHandleSymbol(MO.getImm(), MCOp); 176 return true; 177 } 178 179 return false; 180 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { 181 // This is a surface store, so operand 0 is a surfref 182 if (OpNo == 0 && MO.isImm()) { 183 lowerImageHandleSymbol(MO.getImm(), MCOp); 184 return true; 185 } 186 187 return false; 188 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { 189 // This is a query, so operand 1 is a surfref/texref 190 if (OpNo == 1 && MO.isImm()) { 191 lowerImageHandleSymbol(MO.getImm(), MCOp); 192 return true; 193 } 194 195 return false; 196 } 197 198 return false; 199 } 200 201 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { 202 // Ewwww 203 LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget()); 204 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM); 205 const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>(); 206 const char *Sym = MFI->getImageHandleSymbol(Index); 207 std::string *SymNamePtr = 208 nvTM.getManagedStrPool()->getManagedString(Sym); 209 MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr))); 210 } 211 212 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { 213 OutMI.setOpcode(MI->getOpcode()); 214 // Special: Do not mangle symbol operand of CALL_PROTOTYPE 215 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { 216 const MachineOperand &MO = MI->getOperand(0); 217 OutMI.addOperand(GetSymbolRef( 218 OutContext.getOrCreateSymbol(Twine(MO.getSymbolName())))); 219 return; 220 } 221 222 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>(); 223 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 224 const MachineOperand &MO = MI->getOperand(i); 225 226 MCOperand MCOp; 227 if (!STI.hasImageHandles()) { 228 if (lowerImageHandleOperand(MI, i, MCOp)) { 229 OutMI.addOperand(MCOp); 230 continue; 231 } 232 } 233 234 if (lowerOperand(MO, MCOp)) 235 OutMI.addOperand(MCOp); 236 } 237 } 238 239 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, 240 MCOperand &MCOp) { 241 switch (MO.getType()) { 242 default: llvm_unreachable("unknown operand type"); 243 case MachineOperand::MO_Register: 244 MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg())); 245 break; 246 case MachineOperand::MO_Immediate: 247 MCOp = MCOperand::createImm(MO.getImm()); 248 break; 249 case MachineOperand::MO_MachineBasicBlock: 250 MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( 251 MO.getMBB()->getSymbol(), OutContext)); 252 break; 253 case MachineOperand::MO_ExternalSymbol: 254 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName())); 255 break; 256 case MachineOperand::MO_GlobalAddress: 257 MCOp = GetSymbolRef(getSymbol(MO.getGlobal())); 258 break; 259 case MachineOperand::MO_FPImmediate: { 260 const ConstantFP *Cnt = MO.getFPImm(); 261 const APFloat &Val = Cnt->getValueAPF(); 262 263 switch (Cnt->getType()->getTypeID()) { 264 default: report_fatal_error("Unsupported FP type"); break; 265 case Type::HalfTyID: 266 MCOp = MCOperand::createExpr( 267 NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext)); 268 break; 269 case Type::FloatTyID: 270 MCOp = MCOperand::createExpr( 271 NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext)); 272 break; 273 case Type::DoubleTyID: 274 MCOp = MCOperand::createExpr( 275 NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext)); 276 break; 277 } 278 break; 279 } 280 } 281 return true; 282 } 283 284 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { 285 if (Register::isVirtualRegister(Reg)) { 286 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 287 288 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC]; 289 unsigned RegNum = RegMap[Reg]; 290 291 // Encode the register class in the upper 4 bits 292 // Must be kept in sync with NVPTXInstPrinter::printRegName 293 unsigned Ret = 0; 294 if (RC == &NVPTX::Int1RegsRegClass) { 295 Ret = (1 << 28); 296 } else if (RC == &NVPTX::Int16RegsRegClass) { 297 Ret = (2 << 28); 298 } else if (RC == &NVPTX::Int32RegsRegClass) { 299 Ret = (3 << 28); 300 } else if (RC == &NVPTX::Int64RegsRegClass) { 301 Ret = (4 << 28); 302 } else if (RC == &NVPTX::Float32RegsRegClass) { 303 Ret = (5 << 28); 304 } else if (RC == &NVPTX::Float64RegsRegClass) { 305 Ret = (6 << 28); 306 } else if (RC == &NVPTX::Float16RegsRegClass) { 307 Ret = (7 << 28); 308 } else if (RC == &NVPTX::Float16x2RegsRegClass) { 309 Ret = (8 << 28); 310 } else { 311 report_fatal_error("Bad register class"); 312 } 313 314 // Insert the vreg number 315 Ret |= (RegNum & 0x0FFFFFFF); 316 return Ret; 317 } else { 318 // Some special-use registers are actually physical registers. 319 // Encode this as the register class ID of 0 and the real register ID. 320 return Reg & 0x0FFFFFFF; 321 } 322 } 323 324 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { 325 const MCExpr *Expr; 326 Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, 327 OutContext); 328 return MCOperand::createExpr(Expr); 329 } 330 331 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 332 const DataLayout &DL = getDataLayout(); 333 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F); 334 const TargetLowering *TLI = STI.getTargetLowering(); 335 336 Type *Ty = F->getReturnType(); 337 338 bool isABI = (STI.getSmVersion() >= 20); 339 340 if (Ty->getTypeID() == Type::VoidTyID) 341 return; 342 343 O << " ("; 344 345 if (isABI) { 346 if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) { 347 unsigned size = 0; 348 if (auto *ITy = dyn_cast<IntegerType>(Ty)) { 349 size = ITy->getBitWidth(); 350 } else { 351 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 352 size = Ty->getPrimitiveSizeInBits(); 353 } 354 // PTX ABI requires all scalar return values to be at least 32 355 // bits in size. fp16 normally uses .b16 as its storage type in 356 // PTX, so its size must be adjusted here, too. 357 if (size < 32) 358 size = 32; 359 360 O << ".param .b" << size << " func_retval0"; 361 } else if (isa<PointerType>(Ty)) { 362 O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits() 363 << " func_retval0"; 364 } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { 365 unsigned totalsz = DL.getTypeAllocSize(Ty); 366 unsigned retAlignment = 0; 367 if (!getAlign(*F, 0, retAlignment)) 368 retAlignment = DL.getABITypeAlignment(Ty); 369 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 370 << "]"; 371 } else 372 llvm_unreachable("Unknown return type"); 373 } else { 374 SmallVector<EVT, 16> vtparts; 375 ComputeValueVTs(*TLI, DL, Ty, vtparts); 376 unsigned idx = 0; 377 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 378 unsigned elems = 1; 379 EVT elemtype = vtparts[i]; 380 if (vtparts[i].isVector()) { 381 elems = vtparts[i].getVectorNumElements(); 382 elemtype = vtparts[i].getVectorElementType(); 383 } 384 385 for (unsigned j = 0, je = elems; j != je; ++j) { 386 unsigned sz = elemtype.getSizeInBits(); 387 if (elemtype.isInteger() && (sz < 32)) 388 sz = 32; 389 O << ".reg .b" << sz << " func_retval" << idx; 390 if (j < je - 1) 391 O << ", "; 392 ++idx; 393 } 394 if (i < e - 1) 395 O << ", "; 396 } 397 } 398 O << ") "; 399 } 400 401 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 402 raw_ostream &O) { 403 const Function &F = MF.getFunction(); 404 printReturnValStr(&F, O); 405 } 406 407 // Return true if MBB is the header of a loop marked with 408 // llvm.loop.unroll.disable. 409 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll". 410 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( 411 const MachineBasicBlock &MBB) const { 412 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); 413 // We insert .pragma "nounroll" only to the loop header. 414 if (!LI.isLoopHeader(&MBB)) 415 return false; 416 417 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore, 418 // we iterate through each back edge of the loop with header MBB, and check 419 // whether its metadata contains llvm.loop.unroll.disable. 420 for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) { 421 const MachineBasicBlock *PMBB = *I; 422 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) { 423 // Edges from other loops to MBB are not back edges. 424 continue; 425 } 426 if (const BasicBlock *PBB = PMBB->getBasicBlock()) { 427 if (MDNode *LoopID = 428 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) { 429 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable")) 430 return true; 431 } 432 } 433 } 434 return false; 435 } 436 437 void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { 438 AsmPrinter::emitBasicBlockStart(MBB); 439 if (isLoopHeaderOfNoUnroll(MBB)) 440 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n")); 441 } 442 443 void NVPTXAsmPrinter::emitFunctionEntryLabel() { 444 SmallString<128> Str; 445 raw_svector_ostream O(Str); 446 447 if (!GlobalsEmitted) { 448 emitGlobals(*MF->getFunction().getParent()); 449 GlobalsEmitted = true; 450 } 451 452 // Set up 453 MRI = &MF->getRegInfo(); 454 F = &MF->getFunction(); 455 emitLinkageDirective(F, O); 456 if (isKernelFunction(*F)) 457 O << ".entry "; 458 else { 459 O << ".func "; 460 printReturnValStr(*MF, O); 461 } 462 463 CurrentFnSym->print(O, MAI); 464 465 emitFunctionParamList(*MF, O); 466 467 if (isKernelFunction(*F)) 468 emitKernelFunctionDirectives(*F, O); 469 470 OutStreamer->emitRawText(O.str()); 471 472 VRegMapping.clear(); 473 // Emit open brace for function body. 474 OutStreamer->emitRawText(StringRef("{\n")); 475 setAndEmitFunctionVirtualRegisters(*MF); 476 // Emit initial .loc debug directive for correct relocation symbol data. 477 if (MMI && MMI->hasDebugInfo()) 478 emitInitialRawDwarfLocDirective(*MF); 479 } 480 481 bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) { 482 bool Result = AsmPrinter::runOnMachineFunction(F); 483 // Emit closing brace for the body of function F. 484 // The closing brace must be emitted here because we need to emit additional 485 // debug labels/data after the last basic block. 486 // We need to emit the closing brace here because we don't have function that 487 // finished emission of the function body. 488 OutStreamer->emitRawText(StringRef("}\n")); 489 return Result; 490 } 491 492 void NVPTXAsmPrinter::emitFunctionBodyStart() { 493 SmallString<128> Str; 494 raw_svector_ostream O(Str); 495 emitDemotedVars(&MF->getFunction(), O); 496 OutStreamer->emitRawText(O.str()); 497 } 498 499 void NVPTXAsmPrinter::emitFunctionBodyEnd() { 500 VRegMapping.clear(); 501 } 502 503 const MCSymbol *NVPTXAsmPrinter::getFunctionFrameSymbol() const { 504 SmallString<128> Str; 505 raw_svector_ostream(Str) << DEPOTNAME << getFunctionNumber(); 506 return OutContext.getOrCreateSymbol(Str); 507 } 508 509 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { 510 Register RegNo = MI->getOperand(0).getReg(); 511 if (Register::isVirtualRegister(RegNo)) { 512 OutStreamer->AddComment(Twine("implicit-def: ") + 513 getVirtualRegisterName(RegNo)); 514 } else { 515 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>(); 516 OutStreamer->AddComment(Twine("implicit-def: ") + 517 STI.getRegisterInfo()->getName(RegNo)); 518 } 519 OutStreamer->AddBlankLine(); 520 } 521 522 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 523 raw_ostream &O) const { 524 // If the NVVM IR has some of reqntid* specified, then output 525 // the reqntid directive, and set the unspecified ones to 1. 526 // If none of reqntid* is specified, don't output reqntid directive. 527 unsigned reqntidx, reqntidy, reqntidz; 528 bool specified = false; 529 if (!getReqNTIDx(F, reqntidx)) 530 reqntidx = 1; 531 else 532 specified = true; 533 if (!getReqNTIDy(F, reqntidy)) 534 reqntidy = 1; 535 else 536 specified = true; 537 if (!getReqNTIDz(F, reqntidz)) 538 reqntidz = 1; 539 else 540 specified = true; 541 542 if (specified) 543 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 544 << "\n"; 545 546 // If the NVVM IR has some of maxntid* specified, then output 547 // the maxntid directive, and set the unspecified ones to 1. 548 // If none of maxntid* is specified, don't output maxntid directive. 549 unsigned maxntidx, maxntidy, maxntidz; 550 specified = false; 551 if (!getMaxNTIDx(F, maxntidx)) 552 maxntidx = 1; 553 else 554 specified = true; 555 if (!getMaxNTIDy(F, maxntidy)) 556 maxntidy = 1; 557 else 558 specified = true; 559 if (!getMaxNTIDz(F, maxntidz)) 560 maxntidz = 1; 561 else 562 specified = true; 563 564 if (specified) 565 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 566 << "\n"; 567 568 unsigned mincta; 569 if (getMinCTASm(F, mincta)) 570 O << ".minnctapersm " << mincta << "\n"; 571 572 unsigned maxnreg; 573 if (getMaxNReg(F, maxnreg)) 574 O << ".maxnreg " << maxnreg << "\n"; 575 } 576 577 std::string 578 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { 579 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 580 581 std::string Name; 582 raw_string_ostream NameStr(Name); 583 584 VRegRCMap::const_iterator I = VRegMapping.find(RC); 585 assert(I != VRegMapping.end() && "Bad register class"); 586 const DenseMap<unsigned, unsigned> &RegMap = I->second; 587 588 VRegMap::const_iterator VI = RegMap.find(Reg); 589 assert(VI != RegMap.end() && "Bad virtual register"); 590 unsigned MappedVR = VI->second; 591 592 NameStr << getNVPTXRegClassStr(RC) << MappedVR; 593 594 NameStr.flush(); 595 return Name; 596 } 597 598 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, 599 raw_ostream &O) { 600 O << getVirtualRegisterName(vr); 601 } 602 603 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 604 emitLinkageDirective(F, O); 605 if (isKernelFunction(*F)) 606 O << ".entry "; 607 else 608 O << ".func "; 609 printReturnValStr(F, O); 610 getSymbol(F)->print(O, MAI); 611 O << "\n"; 612 emitFunctionParamList(F, O); 613 O << ";\n"; 614 } 615 616 static bool usedInGlobalVarDef(const Constant *C) { 617 if (!C) 618 return false; 619 620 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 621 return GV->getName() != "llvm.used"; 622 } 623 624 for (const User *U : C->users()) 625 if (const Constant *C = dyn_cast<Constant>(U)) 626 if (usedInGlobalVarDef(C)) 627 return true; 628 629 return false; 630 } 631 632 static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 633 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 634 if (othergv->getName() == "llvm.used") 635 return true; 636 } 637 638 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 639 if (instr->getParent() && instr->getParent()->getParent()) { 640 const Function *curFunc = instr->getParent()->getParent(); 641 if (oneFunc && (curFunc != oneFunc)) 642 return false; 643 oneFunc = curFunc; 644 return true; 645 } else 646 return false; 647 } 648 649 for (const User *UU : U->users()) 650 if (!usedInOneFunc(UU, oneFunc)) 651 return false; 652 653 return true; 654 } 655 656 /* Find out if a global variable can be demoted to local scope. 657 * Currently, this is valid for CUDA shared variables, which have local 658 * scope and global lifetime. So the conditions to check are : 659 * 1. Is the global variable in shared address space? 660 * 2. Does it have internal linkage? 661 * 3. Is the global variable referenced only in one function? 662 */ 663 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 664 if (!gv->hasInternalLinkage()) 665 return false; 666 PointerType *Pty = gv->getType(); 667 if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED) 668 return false; 669 670 const Function *oneFunc = nullptr; 671 672 bool flag = usedInOneFunc(gv, oneFunc); 673 if (!flag) 674 return false; 675 if (!oneFunc) 676 return false; 677 f = oneFunc; 678 return true; 679 } 680 681 static bool useFuncSeen(const Constant *C, 682 DenseMap<const Function *, bool> &seenMap) { 683 for (const User *U : C->users()) { 684 if (const Constant *cu = dyn_cast<Constant>(U)) { 685 if (useFuncSeen(cu, seenMap)) 686 return true; 687 } else if (const Instruction *I = dyn_cast<Instruction>(U)) { 688 const BasicBlock *bb = I->getParent(); 689 if (!bb) 690 continue; 691 const Function *caller = bb->getParent(); 692 if (!caller) 693 continue; 694 if (seenMap.find(caller) != seenMap.end()) 695 return true; 696 } 697 } 698 return false; 699 } 700 701 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 702 DenseMap<const Function *, bool> seenMap; 703 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 704 const Function *F = &*FI; 705 706 if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) { 707 emitDeclaration(F, O); 708 continue; 709 } 710 711 if (F->isDeclaration()) { 712 if (F->use_empty()) 713 continue; 714 if (F->getIntrinsicID()) 715 continue; 716 emitDeclaration(F, O); 717 continue; 718 } 719 for (const User *U : F->users()) { 720 if (const Constant *C = dyn_cast<Constant>(U)) { 721 if (usedInGlobalVarDef(C)) { 722 // The use is in the initialization of a global variable 723 // that is a function pointer, so print a declaration 724 // for the original function 725 emitDeclaration(F, O); 726 break; 727 } 728 // Emit a declaration of this function if the function that 729 // uses this constant expr has already been seen. 730 if (useFuncSeen(C, seenMap)) { 731 emitDeclaration(F, O); 732 break; 733 } 734 } 735 736 if (!isa<Instruction>(U)) 737 continue; 738 const Instruction *instr = cast<Instruction>(U); 739 const BasicBlock *bb = instr->getParent(); 740 if (!bb) 741 continue; 742 const Function *caller = bb->getParent(); 743 if (!caller) 744 continue; 745 746 // If a caller has already been seen, then the caller is 747 // appearing in the module before the callee. so print out 748 // a declaration for the callee. 749 if (seenMap.find(caller) != seenMap.end()) { 750 emitDeclaration(F, O); 751 break; 752 } 753 } 754 seenMap[F] = true; 755 } 756 } 757 758 static bool isEmptyXXStructor(GlobalVariable *GV) { 759 if (!GV) return true; 760 const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); 761 if (!InitList) return true; // Not an array; we don't know how to parse. 762 return InitList->getNumOperands() == 0; 763 } 764 765 void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) { 766 // Construct a default subtarget off of the TargetMachine defaults. The 767 // rest of NVPTX isn't friendly to change subtargets per function and 768 // so the default TargetMachine will have all of the options. 769 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 770 const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl()); 771 SmallString<128> Str1; 772 raw_svector_ostream OS1(Str1); 773 774 // Emit header before any dwarf directives are emitted below. 775 emitHeader(M, OS1, *STI); 776 OutStreamer->emitRawText(OS1.str()); 777 } 778 779 bool NVPTXAsmPrinter::doInitialization(Module &M) { 780 if (M.alias_size()) { 781 report_fatal_error("Module has aliases, which NVPTX does not support."); 782 return true; // error 783 } 784 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) { 785 report_fatal_error( 786 "Module has a nontrivial global ctor, which NVPTX does not support."); 787 return true; // error 788 } 789 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) { 790 report_fatal_error( 791 "Module has a nontrivial global dtor, which NVPTX does not support."); 792 return true; // error 793 } 794 795 // We need to call the parent's one explicitly. 796 bool Result = AsmPrinter::doInitialization(M); 797 798 GlobalsEmitted = false; 799 800 return Result; 801 } 802 803 void NVPTXAsmPrinter::emitGlobals(const Module &M) { 804 SmallString<128> Str2; 805 raw_svector_ostream OS2(Str2); 806 807 emitDeclarations(M, OS2); 808 809 // As ptxas does not support forward references of globals, we need to first 810 // sort the list of module-level globals in def-use order. We visit each 811 // global variable in order, and ensure that we emit it *after* its dependent 812 // globals. We use a little extra memory maintaining both a set and a list to 813 // have fast searches while maintaining a strict ordering. 814 SmallVector<const GlobalVariable *, 8> Globals; 815 DenseSet<const GlobalVariable *> GVVisited; 816 DenseSet<const GlobalVariable *> GVVisiting; 817 818 // Visit each global variable, in order 819 for (const GlobalVariable &I : M.globals()) 820 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting); 821 822 assert(GVVisited.size() == M.getGlobalList().size() && 823 "Missed a global variable"); 824 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 825 826 // Print out module-level global variables in proper order 827 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 828 printModuleLevelGV(Globals[i], OS2); 829 830 OS2 << '\n'; 831 832 OutStreamer->emitRawText(OS2.str()); 833 } 834 835 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O, 836 const NVPTXSubtarget &STI) { 837 O << "//\n"; 838 O << "// Generated by LLVM NVPTX Back-End\n"; 839 O << "//\n"; 840 O << "\n"; 841 842 unsigned PTXVersion = STI.getPTXVersion(); 843 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 844 845 O << ".target "; 846 O << STI.getTargetName(); 847 848 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 849 if (NTM.getDrvInterface() == NVPTX::NVCL) 850 O << ", texmode_independent"; 851 852 bool HasFullDebugInfo = false; 853 for (DICompileUnit *CU : M.debug_compile_units()) { 854 switch(CU->getEmissionKind()) { 855 case DICompileUnit::NoDebug: 856 case DICompileUnit::DebugDirectivesOnly: 857 break; 858 case DICompileUnit::LineTablesOnly: 859 case DICompileUnit::FullDebug: 860 HasFullDebugInfo = true; 861 break; 862 } 863 if (HasFullDebugInfo) 864 break; 865 } 866 if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo) 867 O << ", debug"; 868 869 O << "\n"; 870 871 O << ".address_size "; 872 if (NTM.is64Bit()) 873 O << "64"; 874 else 875 O << "32"; 876 O << "\n"; 877 878 O << "\n"; 879 } 880 881 bool NVPTXAsmPrinter::doFinalization(Module &M) { 882 bool HasDebugInfo = MMI && MMI->hasDebugInfo(); 883 884 // If we did not emit any functions, then the global declarations have not 885 // yet been emitted. 886 if (!GlobalsEmitted) { 887 emitGlobals(M); 888 GlobalsEmitted = true; 889 } 890 891 // XXX Temproarily remove global variables so that doFinalization() will not 892 // emit them again (global variables are emitted at beginning). 893 894 Module::GlobalListType &global_list = M.getGlobalList(); 895 int i, n = global_list.size(); 896 GlobalVariable **gv_array = new GlobalVariable *[n]; 897 898 // first, back-up GlobalVariable in gv_array 899 i = 0; 900 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 901 I != E; ++I) 902 gv_array[i++] = &*I; 903 904 // second, empty global_list 905 while (!global_list.empty()) 906 global_list.remove(global_list.begin()); 907 908 // call doFinalization 909 bool ret = AsmPrinter::doFinalization(M); 910 911 // now we restore global variables 912 for (i = 0; i < n; i++) 913 global_list.insert(global_list.end(), gv_array[i]); 914 915 clearAnnotationCache(&M); 916 917 delete[] gv_array; 918 // Close the last emitted section 919 if (HasDebugInfo) { 920 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer()) 921 ->closeLastSection(); 922 // Emit empty .debug_loc section for better support of the empty files. 923 OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}"); 924 } 925 926 // Output last DWARF .file directives, if any. 927 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer()) 928 ->outputDwarfFileDirectives(); 929 930 return ret; 931 932 //bool Result = AsmPrinter::doFinalization(M); 933 // Instead of calling the parents doFinalization, we may 934 // clone parents doFinalization and customize here. 935 // Currently, we if NVISA out the EmitGlobals() in 936 // parent's doFinalization, which is too intrusive. 937 // 938 // Same for the doInitialization. 939 //return Result; 940 } 941 942 // This function emits appropriate linkage directives for 943 // functions and global variables. 944 // 945 // extern function declaration -> .extern 946 // extern function definition -> .visible 947 // external global variable with init -> .visible 948 // external without init -> .extern 949 // appending -> not allowed, assert. 950 // for any linkage other than 951 // internal, private, linker_private, 952 // linker_private_weak, linker_private_weak_def_auto, 953 // we emit -> .weak. 954 955 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 956 raw_ostream &O) { 957 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) { 958 if (V->hasExternalLinkage()) { 959 if (isa<GlobalVariable>(V)) { 960 const GlobalVariable *GVar = cast<GlobalVariable>(V); 961 if (GVar) { 962 if (GVar->hasInitializer()) 963 O << ".visible "; 964 else 965 O << ".extern "; 966 } 967 } else if (V->isDeclaration()) 968 O << ".extern "; 969 else 970 O << ".visible "; 971 } else if (V->hasAppendingLinkage()) { 972 std::string msg; 973 msg.append("Error: "); 974 msg.append("Symbol "); 975 if (V->hasName()) 976 msg.append(std::string(V->getName())); 977 msg.append("has unsupported appending linkage type"); 978 llvm_unreachable(msg.c_str()); 979 } else if (!V->hasInternalLinkage() && 980 !V->hasPrivateLinkage()) { 981 O << ".weak "; 982 } 983 } 984 } 985 986 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 987 raw_ostream &O, 988 bool processDemoted) { 989 // Skip meta data 990 if (GVar->hasSection()) { 991 if (GVar->getSection() == "llvm.metadata") 992 return; 993 } 994 995 // Skip LLVM intrinsic global variables 996 if (GVar->getName().startswith("llvm.") || 997 GVar->getName().startswith("nvvm.")) 998 return; 999 1000 const DataLayout &DL = getDataLayout(); 1001 1002 // GlobalVariables are always constant pointers themselves. 1003 PointerType *PTy = GVar->getType(); 1004 Type *ETy = GVar->getValueType(); 1005 1006 if (GVar->hasExternalLinkage()) { 1007 if (GVar->hasInitializer()) 1008 O << ".visible "; 1009 else 1010 O << ".extern "; 1011 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() || 1012 GVar->hasAvailableExternallyLinkage() || 1013 GVar->hasCommonLinkage()) { 1014 O << ".weak "; 1015 } 1016 1017 if (isTexture(*GVar)) { 1018 O << ".global .texref " << getTextureName(*GVar) << ";\n"; 1019 return; 1020 } 1021 1022 if (isSurface(*GVar)) { 1023 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n"; 1024 return; 1025 } 1026 1027 if (GVar->isDeclaration()) { 1028 // (extern) declarations, no definition or initializer 1029 // Currently the only known declaration is for an automatic __local 1030 // (.shared) promoted to global. 1031 emitPTXGlobalVariable(GVar, O); 1032 O << ";\n"; 1033 return; 1034 } 1035 1036 if (isSampler(*GVar)) { 1037 O << ".global .samplerref " << getSamplerName(*GVar); 1038 1039 const Constant *Initializer = nullptr; 1040 if (GVar->hasInitializer()) 1041 Initializer = GVar->getInitializer(); 1042 const ConstantInt *CI = nullptr; 1043 if (Initializer) 1044 CI = dyn_cast<ConstantInt>(Initializer); 1045 if (CI) { 1046 unsigned sample = CI->getZExtValue(); 1047 1048 O << " = { "; 1049 1050 for (int i = 0, 1051 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 1052 i < 3; i++) { 1053 O << "addr_mode_" << i << " = "; 1054 switch (addr) { 1055 case 0: 1056 O << "wrap"; 1057 break; 1058 case 1: 1059 O << "clamp_to_border"; 1060 break; 1061 case 2: 1062 O << "clamp_to_edge"; 1063 break; 1064 case 3: 1065 O << "wrap"; 1066 break; 1067 case 4: 1068 O << "mirror"; 1069 break; 1070 } 1071 O << ", "; 1072 } 1073 O << "filter_mode = "; 1074 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 1075 case 0: 1076 O << "nearest"; 1077 break; 1078 case 1: 1079 O << "linear"; 1080 break; 1081 case 2: 1082 llvm_unreachable("Anisotropic filtering is not supported"); 1083 default: 1084 O << "nearest"; 1085 break; 1086 } 1087 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 1088 O << ", force_unnormalized_coords = 1"; 1089 } 1090 O << " }"; 1091 } 1092 1093 O << ";\n"; 1094 return; 1095 } 1096 1097 if (GVar->hasPrivateLinkage()) { 1098 if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0) 1099 return; 1100 1101 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1102 if (strncmp(GVar->getName().data(), "filename", 8) == 0) 1103 return; 1104 if (GVar->use_empty()) 1105 return; 1106 } 1107 1108 const Function *demotedFunc = nullptr; 1109 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1110 O << "// " << GVar->getName() << " has been demoted\n"; 1111 if (localDecls.find(demotedFunc) != localDecls.end()) 1112 localDecls[demotedFunc].push_back(GVar); 1113 else { 1114 std::vector<const GlobalVariable *> temp; 1115 temp.push_back(GVar); 1116 localDecls[demotedFunc] = temp; 1117 } 1118 return; 1119 } 1120 1121 O << "."; 1122 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1123 1124 if (isManaged(*GVar)) { 1125 O << " .attribute(.managed)"; 1126 } 1127 1128 if (GVar->getAlignment() == 0) 1129 O << " .align " << (int)DL.getPrefTypeAlignment(ETy); 1130 else 1131 O << " .align " << GVar->getAlignment(); 1132 1133 if (ETy->isFloatingPointTy() || ETy->isPointerTy() || 1134 (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) { 1135 O << " ."; 1136 // Special case: ABI requires that we use .u8 for predicates 1137 if (ETy->isIntegerTy(1)) 1138 O << "u8"; 1139 else 1140 O << getPTXFundamentalTypeStr(ETy, false); 1141 O << " "; 1142 getSymbol(GVar)->print(O, MAI); 1143 1144 // Ptx allows variable initilization only for constant and global state 1145 // spaces. 1146 if (GVar->hasInitializer()) { 1147 if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) || 1148 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) { 1149 const Constant *Initializer = GVar->getInitializer(); 1150 // 'undef' is treated as there is no value specified. 1151 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) { 1152 O << " = "; 1153 printScalarConstant(Initializer, O); 1154 } 1155 } else { 1156 // The frontend adds zero-initializer to device and constant variables 1157 // that don't have an initial value, and UndefValue to shared 1158 // variables, so skip warning for this case. 1159 if (!GVar->getInitializer()->isNullValue() && 1160 !isa<UndefValue>(GVar->getInitializer())) { 1161 report_fatal_error("initial value of '" + GVar->getName() + 1162 "' is not allowed in addrspace(" + 1163 Twine(PTy->getAddressSpace()) + ")"); 1164 } 1165 } 1166 } 1167 } else { 1168 unsigned int ElementSize = 0; 1169 1170 // Although PTX has direct support for struct type and array type and 1171 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1172 // targets that support these high level field accesses. Structs, arrays 1173 // and vectors are lowered into arrays of bytes. 1174 switch (ETy->getTypeID()) { 1175 case Type::IntegerTyID: // Integers larger than 64 bits 1176 case Type::StructTyID: 1177 case Type::ArrayTyID: 1178 case Type::FixedVectorTyID: 1179 ElementSize = DL.getTypeStoreSize(ETy); 1180 // Ptx allows variable initilization only for constant and 1181 // global state spaces. 1182 if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) || 1183 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) && 1184 GVar->hasInitializer()) { 1185 const Constant *Initializer = GVar->getInitializer(); 1186 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1187 AggBuffer aggBuffer(ElementSize, O, *this); 1188 bufferAggregateConstant(Initializer, &aggBuffer); 1189 if (aggBuffer.numSymbols) { 1190 if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) { 1191 O << " .u64 "; 1192 getSymbol(GVar)->print(O, MAI); 1193 O << "["; 1194 O << ElementSize / 8; 1195 } else { 1196 O << " .u32 "; 1197 getSymbol(GVar)->print(O, MAI); 1198 O << "["; 1199 O << ElementSize / 4; 1200 } 1201 O << "]"; 1202 } else { 1203 O << " .b8 "; 1204 getSymbol(GVar)->print(O, MAI); 1205 O << "["; 1206 O << ElementSize; 1207 O << "]"; 1208 } 1209 O << " = {"; 1210 aggBuffer.print(); 1211 O << "}"; 1212 } else { 1213 O << " .b8 "; 1214 getSymbol(GVar)->print(O, MAI); 1215 if (ElementSize) { 1216 O << "["; 1217 O << ElementSize; 1218 O << "]"; 1219 } 1220 } 1221 } else { 1222 O << " .b8 "; 1223 getSymbol(GVar)->print(O, MAI); 1224 if (ElementSize) { 1225 O << "["; 1226 O << ElementSize; 1227 O << "]"; 1228 } 1229 } 1230 break; 1231 default: 1232 llvm_unreachable("type not supported yet"); 1233 } 1234 } 1235 O << ";\n"; 1236 } 1237 1238 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1239 if (localDecls.find(f) == localDecls.end()) 1240 return; 1241 1242 std::vector<const GlobalVariable *> &gvars = localDecls[f]; 1243 1244 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 1245 O << "\t// demoted variable\n\t"; 1246 printModuleLevelGV(gvars[i], O, true); 1247 } 1248 } 1249 1250 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1251 raw_ostream &O) const { 1252 switch (AddressSpace) { 1253 case ADDRESS_SPACE_LOCAL: 1254 O << "local"; 1255 break; 1256 case ADDRESS_SPACE_GLOBAL: 1257 O << "global"; 1258 break; 1259 case ADDRESS_SPACE_CONST: 1260 O << "const"; 1261 break; 1262 case ADDRESS_SPACE_SHARED: 1263 O << "shared"; 1264 break; 1265 default: 1266 report_fatal_error("Bad address space found while emitting PTX: " + 1267 llvm::Twine(AddressSpace)); 1268 break; 1269 } 1270 } 1271 1272 std::string 1273 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const { 1274 switch (Ty->getTypeID()) { 1275 default: 1276 llvm_unreachable("unexpected type"); 1277 break; 1278 case Type::IntegerTyID: { 1279 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1280 if (NumBits == 1) 1281 return "pred"; 1282 else if (NumBits <= 64) { 1283 std::string name = "u"; 1284 return name + utostr(NumBits); 1285 } else { 1286 llvm_unreachable("Integer too large"); 1287 break; 1288 } 1289 break; 1290 } 1291 case Type::HalfTyID: 1292 // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly. 1293 return "b16"; 1294 case Type::FloatTyID: 1295 return "f32"; 1296 case Type::DoubleTyID: 1297 return "f64"; 1298 case Type::PointerTyID: 1299 if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) 1300 if (useB4PTR) 1301 return "b64"; 1302 else 1303 return "u64"; 1304 else if (useB4PTR) 1305 return "b32"; 1306 else 1307 return "u32"; 1308 } 1309 llvm_unreachable("unexpected type"); 1310 return nullptr; 1311 } 1312 1313 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 1314 raw_ostream &O) { 1315 const DataLayout &DL = getDataLayout(); 1316 1317 // GlobalVariables are always constant pointers themselves. 1318 Type *ETy = GVar->getValueType(); 1319 1320 O << "."; 1321 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O); 1322 if (GVar->getAlignment() == 0) 1323 O << " .align " << (int)DL.getPrefTypeAlignment(ETy); 1324 else 1325 O << " .align " << GVar->getAlignment(); 1326 1327 // Special case for i128 1328 if (ETy->isIntegerTy(128)) { 1329 O << " .b8 "; 1330 getSymbol(GVar)->print(O, MAI); 1331 O << "[16]"; 1332 return; 1333 } 1334 1335 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) { 1336 O << " ."; 1337 O << getPTXFundamentalTypeStr(ETy); 1338 O << " "; 1339 getSymbol(GVar)->print(O, MAI); 1340 return; 1341 } 1342 1343 int64_t ElementSize = 0; 1344 1345 // Although PTX has direct support for struct type and array type and LLVM IR 1346 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1347 // support these high level field accesses. Structs and arrays are lowered 1348 // into arrays of bytes. 1349 switch (ETy->getTypeID()) { 1350 case Type::StructTyID: 1351 case Type::ArrayTyID: 1352 case Type::FixedVectorTyID: 1353 ElementSize = DL.getTypeStoreSize(ETy); 1354 O << " .b8 "; 1355 getSymbol(GVar)->print(O, MAI); 1356 O << "["; 1357 if (ElementSize) { 1358 O << ElementSize; 1359 } 1360 O << "]"; 1361 break; 1362 default: 1363 llvm_unreachable("type not supported yet"); 1364 } 1365 } 1366 1367 static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) { 1368 if (Ty->isSingleValueType()) 1369 return DL.getPrefTypeAlignment(Ty); 1370 1371 auto *ATy = dyn_cast<ArrayType>(Ty); 1372 if (ATy) 1373 return getOpenCLAlignment(DL, ATy->getElementType()); 1374 1375 auto *STy = dyn_cast<StructType>(Ty); 1376 if (STy) { 1377 unsigned int alignStruct = 1; 1378 // Go through each element of the struct and find the 1379 // largest alignment. 1380 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 1381 Type *ETy = STy->getElementType(i); 1382 unsigned int align = getOpenCLAlignment(DL, ETy); 1383 if (align > alignStruct) 1384 alignStruct = align; 1385 } 1386 return alignStruct; 1387 } 1388 1389 auto *FTy = dyn_cast<FunctionType>(Ty); 1390 if (FTy) 1391 return DL.getPointerPrefAlignment().value(); 1392 return DL.getPrefTypeAlignment(Ty); 1393 } 1394 1395 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1396 int paramIndex, raw_ostream &O) { 1397 getSymbol(I->getParent())->print(O, MAI); 1398 O << "_param_" << paramIndex; 1399 } 1400 1401 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 1402 const DataLayout &DL = getDataLayout(); 1403 const AttributeList &PAL = F->getAttributes(); 1404 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F); 1405 const TargetLowering *TLI = STI.getTargetLowering(); 1406 Function::const_arg_iterator I, E; 1407 unsigned paramIndex = 0; 1408 bool first = true; 1409 bool isKernelFunc = isKernelFunction(*F); 1410 bool isABI = (STI.getSmVersion() >= 20); 1411 bool hasImageHandles = STI.hasImageHandles(); 1412 MVT thePointerTy = TLI->getPointerTy(DL); 1413 1414 if (F->arg_empty()) { 1415 O << "()\n"; 1416 return; 1417 } 1418 1419 O << "(\n"; 1420 1421 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1422 Type *Ty = I->getType(); 1423 1424 if (!first) 1425 O << ",\n"; 1426 1427 first = false; 1428 1429 // Handle image/sampler parameters 1430 if (isKernelFunction(*F)) { 1431 if (isSampler(*I) || isImage(*I)) { 1432 if (isImage(*I)) { 1433 std::string sname = std::string(I->getName()); 1434 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { 1435 if (hasImageHandles) 1436 O << "\t.param .u64 .ptr .surfref "; 1437 else 1438 O << "\t.param .surfref "; 1439 CurrentFnSym->print(O, MAI); 1440 O << "_param_" << paramIndex; 1441 } 1442 else { // Default image is read_only 1443 if (hasImageHandles) 1444 O << "\t.param .u64 .ptr .texref "; 1445 else 1446 O << "\t.param .texref "; 1447 CurrentFnSym->print(O, MAI); 1448 O << "_param_" << paramIndex; 1449 } 1450 } else { 1451 if (hasImageHandles) 1452 O << "\t.param .u64 .ptr .samplerref "; 1453 else 1454 O << "\t.param .samplerref "; 1455 CurrentFnSym->print(O, MAI); 1456 O << "_param_" << paramIndex; 1457 } 1458 continue; 1459 } 1460 } 1461 1462 if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) { 1463 if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { 1464 // Just print .param .align <a> .b8 .param[size]; 1465 // <a> = PAL.getparamalignment 1466 // size = typeallocsize of element type 1467 const Align align = DL.getValueOrABITypeAlignment( 1468 PAL.getParamAlignment(paramIndex), Ty); 1469 1470 unsigned sz = DL.getTypeAllocSize(Ty); 1471 O << "\t.param .align " << align.value() << " .b8 "; 1472 printParamName(I, paramIndex, O); 1473 O << "[" << sz << "]"; 1474 1475 continue; 1476 } 1477 // Just a scalar 1478 auto *PTy = dyn_cast<PointerType>(Ty); 1479 if (isKernelFunc) { 1480 if (PTy) { 1481 // Special handling for pointer arguments to kernel 1482 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1483 1484 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() != 1485 NVPTX::CUDA) { 1486 Type *ETy = PTy->getElementType(); 1487 int addrSpace = PTy->getAddressSpace(); 1488 switch (addrSpace) { 1489 default: 1490 O << ".ptr "; 1491 break; 1492 case ADDRESS_SPACE_CONST: 1493 O << ".ptr .const "; 1494 break; 1495 case ADDRESS_SPACE_SHARED: 1496 O << ".ptr .shared "; 1497 break; 1498 case ADDRESS_SPACE_GLOBAL: 1499 O << ".ptr .global "; 1500 break; 1501 } 1502 O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " "; 1503 } 1504 printParamName(I, paramIndex, O); 1505 continue; 1506 } 1507 1508 // non-pointer scalar to kernel func 1509 O << "\t.param ."; 1510 // Special case: predicate operands become .u8 types 1511 if (Ty->isIntegerTy(1)) 1512 O << "u8"; 1513 else 1514 O << getPTXFundamentalTypeStr(Ty); 1515 O << " "; 1516 printParamName(I, paramIndex, O); 1517 continue; 1518 } 1519 // Non-kernel function, just print .param .b<size> for ABI 1520 // and .reg .b<size> for non-ABI 1521 unsigned sz = 0; 1522 if (isa<IntegerType>(Ty)) { 1523 sz = cast<IntegerType>(Ty)->getBitWidth(); 1524 if (sz < 32) 1525 sz = 32; 1526 } else if (isa<PointerType>(Ty)) 1527 sz = thePointerTy.getSizeInBits(); 1528 else if (Ty->isHalfTy()) 1529 // PTX ABI requires all scalar parameters to be at least 32 1530 // bits in size. fp16 normally uses .b16 as its storage type 1531 // in PTX, so its size must be adjusted here, too. 1532 sz = 32; 1533 else 1534 sz = Ty->getPrimitiveSizeInBits(); 1535 if (isABI) 1536 O << "\t.param .b" << sz << " "; 1537 else 1538 O << "\t.reg .b" << sz << " "; 1539 printParamName(I, paramIndex, O); 1540 continue; 1541 } 1542 1543 // param has byVal attribute. So should be a pointer 1544 auto *PTy = dyn_cast<PointerType>(Ty); 1545 assert(PTy && "Param with byval attribute should be a pointer type"); 1546 Type *ETy = PTy->getElementType(); 1547 1548 if (isABI || isKernelFunc) { 1549 // Just print .param .align <a> .b8 .param[size]; 1550 // <a> = PAL.getparamalignment 1551 // size = typeallocsize of element type 1552 Align align = 1553 DL.getValueOrABITypeAlignment(PAL.getParamAlignment(paramIndex), ETy); 1554 // Work around a bug in ptxas. When PTX code takes address of 1555 // byval parameter with alignment < 4, ptxas generates code to 1556 // spill argument into memory. Alas on sm_50+ ptxas generates 1557 // SASS code that fails with misaligned access. To work around 1558 // the problem, make sure that we align byval parameters by at 1559 // least 4. Matching change must be made in LowerCall() where we 1560 // prepare parameters for the call. 1561 // 1562 // TODO: this will need to be undone when we get to support multi-TU 1563 // device-side compilation as it breaks ABI compatibility with nvcc. 1564 // Hopefully ptxas bug is fixed by then. 1565 if (!isKernelFunc && align < Align(4)) 1566 align = Align(4); 1567 unsigned sz = DL.getTypeAllocSize(ETy); 1568 O << "\t.param .align " << align.value() << " .b8 "; 1569 printParamName(I, paramIndex, O); 1570 O << "[" << sz << "]"; 1571 continue; 1572 } else { 1573 // Split the ETy into constituent parts and 1574 // print .param .b<size> <name> for each part. 1575 // Further, if a part is vector, print the above for 1576 // each vector element. 1577 SmallVector<EVT, 16> vtparts; 1578 ComputeValueVTs(*TLI, DL, ETy, vtparts); 1579 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 1580 unsigned elems = 1; 1581 EVT elemtype = vtparts[i]; 1582 if (vtparts[i].isVector()) { 1583 elems = vtparts[i].getVectorNumElements(); 1584 elemtype = vtparts[i].getVectorElementType(); 1585 } 1586 1587 for (unsigned j = 0, je = elems; j != je; ++j) { 1588 unsigned sz = elemtype.getSizeInBits(); 1589 if (elemtype.isInteger() && (sz < 32)) 1590 sz = 32; 1591 O << "\t.reg .b" << sz << " "; 1592 printParamName(I, paramIndex, O); 1593 if (j < je - 1) 1594 O << ",\n"; 1595 ++paramIndex; 1596 } 1597 if (i < e - 1) 1598 O << ",\n"; 1599 } 1600 --paramIndex; 1601 continue; 1602 } 1603 } 1604 1605 O << "\n)\n"; 1606 } 1607 1608 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1609 raw_ostream &O) { 1610 const Function &F = MF.getFunction(); 1611 emitFunctionParamList(&F, O); 1612 } 1613 1614 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 1615 const MachineFunction &MF) { 1616 SmallString<128> Str; 1617 raw_svector_ostream O(Str); 1618 1619 // Map the global virtual register number to a register class specific 1620 // virtual register number starting from 1 with that class. 1621 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1622 //unsigned numRegClasses = TRI->getNumRegClasses(); 1623 1624 // Emit the Fake Stack Object 1625 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1626 int NumBytes = (int) MFI.getStackSize(); 1627 if (NumBytes) { 1628 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t" 1629 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n"; 1630 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) { 1631 O << "\t.reg .b64 \t%SP;\n"; 1632 O << "\t.reg .b64 \t%SPL;\n"; 1633 } else { 1634 O << "\t.reg .b32 \t%SP;\n"; 1635 O << "\t.reg .b32 \t%SPL;\n"; 1636 } 1637 } 1638 1639 // Go through all virtual registers to establish the mapping between the 1640 // global virtual 1641 // register number and the per class virtual register number. 1642 // We use the per class virtual register number in the ptx output. 1643 unsigned int numVRs = MRI->getNumVirtRegs(); 1644 for (unsigned i = 0; i < numVRs; i++) { 1645 unsigned int vr = Register::index2VirtReg(i); 1646 const TargetRegisterClass *RC = MRI->getRegClass(vr); 1647 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1648 int n = regmap.size(); 1649 regmap.insert(std::make_pair(vr, n + 1)); 1650 } 1651 1652 // Emit register declarations 1653 // @TODO: Extract out the real register usage 1654 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1655 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1656 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1657 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1658 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n"; 1659 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1660 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n"; 1661 1662 // Emit declaration of the virtual registers or 'physical' registers for 1663 // each register class 1664 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) { 1665 const TargetRegisterClass *RC = TRI->getRegClass(i); 1666 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1667 std::string rcname = getNVPTXRegClassName(RC); 1668 std::string rcStr = getNVPTXRegClassStr(RC); 1669 int n = regmap.size(); 1670 1671 // Only declare those registers that may be used. 1672 if (n) { 1673 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1674 << ">;\n"; 1675 } 1676 } 1677 1678 OutStreamer->emitRawText(O.str()); 1679 } 1680 1681 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1682 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1683 bool ignored; 1684 unsigned int numHex; 1685 const char *lead; 1686 1687 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 1688 numHex = 8; 1689 lead = "0f"; 1690 APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &ignored); 1691 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1692 numHex = 16; 1693 lead = "0d"; 1694 APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &ignored); 1695 } else 1696 llvm_unreachable("unsupported fp type"); 1697 1698 APInt API = APF.bitcastToAPInt(); 1699 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true); 1700 } 1701 1702 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 1703 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1704 O << CI->getValue(); 1705 return; 1706 } 1707 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1708 printFPConstant(CFP, O); 1709 return; 1710 } 1711 if (isa<ConstantPointerNull>(CPV)) { 1712 O << "0"; 1713 return; 1714 } 1715 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1716 bool IsNonGenericPointer = false; 1717 if (GVar->getType()->getAddressSpace() != 0) { 1718 IsNonGenericPointer = true; 1719 } 1720 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) { 1721 O << "generic("; 1722 getSymbol(GVar)->print(O, MAI); 1723 O << ")"; 1724 } else { 1725 getSymbol(GVar)->print(O, MAI); 1726 } 1727 return; 1728 } 1729 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1730 const Value *v = Cexpr->stripPointerCasts(); 1731 PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType()); 1732 bool IsNonGenericPointer = false; 1733 if (PTy && PTy->getAddressSpace() != 0) { 1734 IsNonGenericPointer = true; 1735 } 1736 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1737 if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { 1738 O << "generic("; 1739 getSymbol(GVar)->print(O, MAI); 1740 O << ")"; 1741 } else { 1742 getSymbol(GVar)->print(O, MAI); 1743 } 1744 return; 1745 } else { 1746 lowerConstant(CPV)->print(O, MAI); 1747 return; 1748 } 1749 } 1750 llvm_unreachable("Not scalar type found in printScalarConstant()"); 1751 } 1752 1753 // These utility functions assure we get the right sequence of bytes for a given 1754 // type even for big-endian machines 1755 template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) { 1756 int64_t vp = (int64_t)val; 1757 for (unsigned i = 0; i < sizeof(T); ++i) { 1758 p[i] = (unsigned char)vp; 1759 vp >>= 8; 1760 } 1761 } 1762 static void ConvertFloatToBytes(unsigned char *p, float val) { 1763 int32_t *vp = (int32_t *)&val; 1764 for (unsigned i = 0; i < sizeof(int32_t); ++i) { 1765 p[i] = (unsigned char)*vp; 1766 *vp >>= 8; 1767 } 1768 } 1769 static void ConvertDoubleToBytes(unsigned char *p, double val) { 1770 int64_t *vp = (int64_t *)&val; 1771 for (unsigned i = 0; i < sizeof(int64_t); ++i) { 1772 p[i] = (unsigned char)*vp; 1773 *vp >>= 8; 1774 } 1775 } 1776 1777 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 1778 AggBuffer *aggBuffer) { 1779 const DataLayout &DL = getDataLayout(); 1780 1781 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1782 int s = DL.getTypeAllocSize(CPV->getType()); 1783 if (s < Bytes) 1784 s = Bytes; 1785 aggBuffer->addZeros(s); 1786 return; 1787 } 1788 1789 unsigned char ptr[8]; 1790 switch (CPV->getType()->getTypeID()) { 1791 1792 case Type::IntegerTyID: { 1793 Type *ETy = CPV->getType(); 1794 if (ETy == Type::getInt8Ty(CPV->getContext())) { 1795 unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue(); 1796 ConvertIntToBytes<>(ptr, c); 1797 aggBuffer->addBytes(ptr, 1, Bytes); 1798 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 1799 short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue(); 1800 ConvertIntToBytes<>(ptr, int16); 1801 aggBuffer->addBytes(ptr, 2, Bytes); 1802 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 1803 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1804 int int32 = (int)(constInt->getZExtValue()); 1805 ConvertIntToBytes<>(ptr, int32); 1806 aggBuffer->addBytes(ptr, 4, Bytes); 1807 break; 1808 } else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1809 if (const auto *constInt = dyn_cast<ConstantInt>( 1810 ConstantFoldConstant(Cexpr, DL))) { 1811 int int32 = (int)(constInt->getZExtValue()); 1812 ConvertIntToBytes<>(ptr, int32); 1813 aggBuffer->addBytes(ptr, 4, Bytes); 1814 break; 1815 } 1816 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1817 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1818 aggBuffer->addSymbol(v, Cexpr->getOperand(0)); 1819 aggBuffer->addZeros(4); 1820 break; 1821 } 1822 } 1823 llvm_unreachable("unsupported integer const type"); 1824 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 1825 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1826 long long int64 = (long long)(constInt->getZExtValue()); 1827 ConvertIntToBytes<>(ptr, int64); 1828 aggBuffer->addBytes(ptr, 8, Bytes); 1829 break; 1830 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1831 if (const auto *constInt = dyn_cast<ConstantInt>( 1832 ConstantFoldConstant(Cexpr, DL))) { 1833 long long int64 = (long long)(constInt->getZExtValue()); 1834 ConvertIntToBytes<>(ptr, int64); 1835 aggBuffer->addBytes(ptr, 8, Bytes); 1836 break; 1837 } 1838 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1839 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1840 aggBuffer->addSymbol(v, Cexpr->getOperand(0)); 1841 aggBuffer->addZeros(8); 1842 break; 1843 } 1844 } 1845 llvm_unreachable("unsupported integer const type"); 1846 } else 1847 llvm_unreachable("unsupported integer const type"); 1848 break; 1849 } 1850 case Type::HalfTyID: 1851 case Type::FloatTyID: 1852 case Type::DoubleTyID: { 1853 const auto *CFP = cast<ConstantFP>(CPV); 1854 Type *Ty = CFP->getType(); 1855 if (Ty == Type::getHalfTy(CPV->getContext())) { 1856 APInt API = CFP->getValueAPF().bitcastToAPInt(); 1857 uint16_t float16 = API.getLoBits(16).getZExtValue(); 1858 ConvertIntToBytes<>(ptr, float16); 1859 aggBuffer->addBytes(ptr, 2, Bytes); 1860 } else if (Ty == Type::getFloatTy(CPV->getContext())) { 1861 float float32 = (float) CFP->getValueAPF().convertToFloat(); 1862 ConvertFloatToBytes(ptr, float32); 1863 aggBuffer->addBytes(ptr, 4, Bytes); 1864 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1865 double float64 = CFP->getValueAPF().convertToDouble(); 1866 ConvertDoubleToBytes(ptr, float64); 1867 aggBuffer->addBytes(ptr, 8, Bytes); 1868 } else { 1869 llvm_unreachable("unsupported fp const type"); 1870 } 1871 break; 1872 } 1873 case Type::PointerTyID: { 1874 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1875 aggBuffer->addSymbol(GVar, GVar); 1876 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1877 const Value *v = Cexpr->stripPointerCasts(); 1878 aggBuffer->addSymbol(v, Cexpr); 1879 } 1880 unsigned int s = DL.getTypeAllocSize(CPV->getType()); 1881 aggBuffer->addZeros(s); 1882 break; 1883 } 1884 1885 case Type::ArrayTyID: 1886 case Type::FixedVectorTyID: 1887 case Type::StructTyID: { 1888 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) { 1889 int ElementSize = DL.getTypeAllocSize(CPV->getType()); 1890 bufferAggregateConstant(CPV, aggBuffer); 1891 if (Bytes > ElementSize) 1892 aggBuffer->addZeros(Bytes - ElementSize); 1893 } else if (isa<ConstantAggregateZero>(CPV)) 1894 aggBuffer->addZeros(Bytes); 1895 else 1896 llvm_unreachable("Unexpected Constant type"); 1897 break; 1898 } 1899 1900 default: 1901 llvm_unreachable("unsupported type"); 1902 } 1903 } 1904 1905 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 1906 AggBuffer *aggBuffer) { 1907 const DataLayout &DL = getDataLayout(); 1908 int Bytes; 1909 1910 // Integers of arbitrary width 1911 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1912 APInt Val = CI->getValue(); 1913 for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) { 1914 uint8_t Byte = Val.getLoBits(8).getZExtValue(); 1915 aggBuffer->addBytes(&Byte, 1, 1); 1916 Val.lshrInPlace(8); 1917 } 1918 return; 1919 } 1920 1921 // Old constants 1922 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1923 if (CPV->getNumOperands()) 1924 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1925 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1926 return; 1927 } 1928 1929 if (const ConstantDataSequential *CDS = 1930 dyn_cast<ConstantDataSequential>(CPV)) { 1931 if (CDS->getNumElements()) 1932 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1933 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1934 aggBuffer); 1935 return; 1936 } 1937 1938 if (isa<ConstantStruct>(CPV)) { 1939 if (CPV->getNumOperands()) { 1940 StructType *ST = cast<StructType>(CPV->getType()); 1941 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 1942 if (i == (e - 1)) 1943 Bytes = DL.getStructLayout(ST)->getElementOffset(0) + 1944 DL.getTypeAllocSize(ST) - 1945 DL.getStructLayout(ST)->getElementOffset(i); 1946 else 1947 Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) - 1948 DL.getStructLayout(ST)->getElementOffset(i); 1949 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 1950 } 1951 } 1952 return; 1953 } 1954 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 1955 } 1956 1957 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly 1958 /// a copy from AsmPrinter::lowerConstant, except customized to only handle 1959 /// expressions that are representable in PTX and create 1960 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions. 1961 const MCExpr * 1962 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) { 1963 MCContext &Ctx = OutContext; 1964 1965 if (CV->isNullValue() || isa<UndefValue>(CV)) 1966 return MCConstantExpr::create(0, Ctx); 1967 1968 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 1969 return MCConstantExpr::create(CI->getZExtValue(), Ctx); 1970 1971 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { 1972 const MCSymbolRefExpr *Expr = 1973 MCSymbolRefExpr::create(getSymbol(GV), Ctx); 1974 if (ProcessingGeneric) { 1975 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx); 1976 } else { 1977 return Expr; 1978 } 1979 } 1980 1981 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 1982 if (!CE) { 1983 llvm_unreachable("Unknown constant value to lower!"); 1984 } 1985 1986 switch (CE->getOpcode()) { 1987 default: { 1988 // If the code isn't optimized, there may be outstanding folding 1989 // opportunities. Attempt to fold the expression using DataLayout as a 1990 // last resort before giving up. 1991 Constant *C = ConstantFoldConstant(CE, getDataLayout()); 1992 if (C != CE) 1993 return lowerConstantForGV(C, ProcessingGeneric); 1994 1995 // Otherwise report the problem to the user. 1996 std::string S; 1997 raw_string_ostream OS(S); 1998 OS << "Unsupported expression in static initializer: "; 1999 CE->printAsOperand(OS, /*PrintType=*/false, 2000 !MF ? nullptr : MF->getFunction().getParent()); 2001 report_fatal_error(OS.str()); 2002 } 2003 2004 case Instruction::AddrSpaceCast: { 2005 // Strip the addrspacecast and pass along the operand 2006 PointerType *DstTy = cast<PointerType>(CE->getType()); 2007 if (DstTy->getAddressSpace() == 0) { 2008 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true); 2009 } 2010 std::string S; 2011 raw_string_ostream OS(S); 2012 OS << "Unsupported expression in static initializer: "; 2013 CE->printAsOperand(OS, /*PrintType=*/ false, 2014 !MF ? nullptr : MF->getFunction().getParent()); 2015 report_fatal_error(OS.str()); 2016 } 2017 2018 case Instruction::GetElementPtr: { 2019 const DataLayout &DL = getDataLayout(); 2020 2021 // Generate a symbolic expression for the byte address 2022 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); 2023 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); 2024 2025 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0), 2026 ProcessingGeneric); 2027 if (!OffsetAI) 2028 return Base; 2029 2030 int64_t Offset = OffsetAI.getSExtValue(); 2031 return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx), 2032 Ctx); 2033 } 2034 2035 case Instruction::Trunc: 2036 // We emit the value and depend on the assembler to truncate the generated 2037 // expression properly. This is important for differences between 2038 // blockaddress labels. Since the two labels are in the same function, it 2039 // is reasonable to treat their delta as a 32-bit value. 2040 LLVM_FALLTHROUGH; 2041 case Instruction::BitCast: 2042 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); 2043 2044 case Instruction::IntToPtr: { 2045 const DataLayout &DL = getDataLayout(); 2046 2047 // Handle casts to pointers by changing them into casts to the appropriate 2048 // integer type. This promotes constant folding and simplifies this code. 2049 Constant *Op = CE->getOperand(0); 2050 Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), 2051 false/*ZExt*/); 2052 return lowerConstantForGV(Op, ProcessingGeneric); 2053 } 2054 2055 case Instruction::PtrToInt: { 2056 const DataLayout &DL = getDataLayout(); 2057 2058 // Support only foldable casts to/from pointers that can be eliminated by 2059 // changing the pointer to the appropriately sized integer type. 2060 Constant *Op = CE->getOperand(0); 2061 Type *Ty = CE->getType(); 2062 2063 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric); 2064 2065 // We can emit the pointer value into this slot if the slot is an 2066 // integer slot equal to the size of the pointer. 2067 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType())) 2068 return OpExpr; 2069 2070 // Otherwise the pointer is smaller than the resultant integer, mask off 2071 // the high bits so we are sure to get a proper truncation if the input is 2072 // a constant expr. 2073 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); 2074 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx); 2075 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx); 2076 } 2077 2078 // The MC library also has a right-shift operator, but it isn't consistently 2079 // signed or unsigned between different targets. 2080 case Instruction::Add: { 2081 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); 2082 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric); 2083 switch (CE->getOpcode()) { 2084 default: llvm_unreachable("Unknown binary operator constant cast expr"); 2085 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); 2086 } 2087 } 2088 } 2089 } 2090 2091 // Copy of MCExpr::print customized for NVPTX 2092 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) { 2093 switch (Expr.getKind()) { 2094 case MCExpr::Target: 2095 return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI); 2096 case MCExpr::Constant: 2097 OS << cast<MCConstantExpr>(Expr).getValue(); 2098 return; 2099 2100 case MCExpr::SymbolRef: { 2101 const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr); 2102 const MCSymbol &Sym = SRE.getSymbol(); 2103 Sym.print(OS, MAI); 2104 return; 2105 } 2106 2107 case MCExpr::Unary: { 2108 const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr); 2109 switch (UE.getOpcode()) { 2110 case MCUnaryExpr::LNot: OS << '!'; break; 2111 case MCUnaryExpr::Minus: OS << '-'; break; 2112 case MCUnaryExpr::Not: OS << '~'; break; 2113 case MCUnaryExpr::Plus: OS << '+'; break; 2114 } 2115 printMCExpr(*UE.getSubExpr(), OS); 2116 return; 2117 } 2118 2119 case MCExpr::Binary: { 2120 const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr); 2121 2122 // Only print parens around the LHS if it is non-trivial. 2123 if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) || 2124 isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) { 2125 printMCExpr(*BE.getLHS(), OS); 2126 } else { 2127 OS << '('; 2128 printMCExpr(*BE.getLHS(), OS); 2129 OS<< ')'; 2130 } 2131 2132 switch (BE.getOpcode()) { 2133 case MCBinaryExpr::Add: 2134 // Print "X-42" instead of "X+-42". 2135 if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) { 2136 if (RHSC->getValue() < 0) { 2137 OS << RHSC->getValue(); 2138 return; 2139 } 2140 } 2141 2142 OS << '+'; 2143 break; 2144 default: llvm_unreachable("Unhandled binary operator"); 2145 } 2146 2147 // Only print parens around the LHS if it is non-trivial. 2148 if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) { 2149 printMCExpr(*BE.getRHS(), OS); 2150 } else { 2151 OS << '('; 2152 printMCExpr(*BE.getRHS(), OS); 2153 OS << ')'; 2154 } 2155 return; 2156 } 2157 } 2158 2159 llvm_unreachable("Invalid expression kind!"); 2160 } 2161 2162 /// PrintAsmOperand - Print out an operand for an inline asm expression. 2163 /// 2164 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 2165 const char *ExtraCode, raw_ostream &O) { 2166 if (ExtraCode && ExtraCode[0]) { 2167 if (ExtraCode[1] != 0) 2168 return true; // Unknown modifier. 2169 2170 switch (ExtraCode[0]) { 2171 default: 2172 // See if this is a generic print operand 2173 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); 2174 case 'r': 2175 break; 2176 } 2177 } 2178 2179 printOperand(MI, OpNo, O); 2180 2181 return false; 2182 } 2183 2184 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, 2185 unsigned OpNo, 2186 const char *ExtraCode, 2187 raw_ostream &O) { 2188 if (ExtraCode && ExtraCode[0]) 2189 return true; // Unknown modifier 2190 2191 O << '['; 2192 printMemOperand(MI, OpNo, O); 2193 O << ']'; 2194 2195 return false; 2196 } 2197 2198 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 2199 raw_ostream &O) { 2200 const MachineOperand &MO = MI->getOperand(opNum); 2201 switch (MO.getType()) { 2202 case MachineOperand::MO_Register: 2203 if (Register::isPhysicalRegister(MO.getReg())) { 2204 if (MO.getReg() == NVPTX::VRDepot) 2205 O << DEPOTNAME << getFunctionNumber(); 2206 else 2207 O << NVPTXInstPrinter::getRegisterName(MO.getReg()); 2208 } else { 2209 emitVirtualRegister(MO.getReg(), O); 2210 } 2211 break; 2212 2213 case MachineOperand::MO_Immediate: 2214 O << MO.getImm(); 2215 break; 2216 2217 case MachineOperand::MO_FPImmediate: 2218 printFPConstant(MO.getFPImm(), O); 2219 break; 2220 2221 case MachineOperand::MO_GlobalAddress: 2222 PrintSymbolOperand(MO, O); 2223 break; 2224 2225 case MachineOperand::MO_MachineBasicBlock: 2226 MO.getMBB()->getSymbol()->print(O, MAI); 2227 break; 2228 2229 default: 2230 llvm_unreachable("Operand type not supported."); 2231 } 2232 } 2233 2234 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 2235 raw_ostream &O, const char *Modifier) { 2236 printOperand(MI, opNum, O); 2237 2238 if (Modifier && strcmp(Modifier, "add") == 0) { 2239 O << ", "; 2240 printOperand(MI, opNum + 1, O); 2241 } else { 2242 if (MI->getOperand(opNum + 1).isImm() && 2243 MI->getOperand(opNum + 1).getImm() == 0) 2244 return; // don't print ',0' or '+0' 2245 O << "+"; 2246 printOperand(MI, opNum + 1, O); 2247 } 2248 } 2249 2250 // Force static initialization. 2251 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter() { 2252 RegisterAsmPrinter<NVPTXAsmPrinter> X(getTheNVPTXTarget32()); 2253 RegisterAsmPrinter<NVPTXAsmPrinter> Y(getTheNVPTXTarget64()); 2254 } 2255