1 //=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // \file 9 // 10 // The pass bind printfs to a kernel arg pointer that will be bound to a buffer 11 // later by the runtime. 12 // 13 // This pass traverses the functions in the module and converts 14 // each call to printf to a sequence of operations that 15 // store the following into the printf buffer: 16 // - format string (passed as a module's metadata unique ID) 17 // - bitwise copies of printf arguments 18 // The backend passes will need to store metadata in the kernel 19 //===----------------------------------------------------------------------===// 20 21 #include "AMDGPU.h" 22 #include "llvm/ADT/StringExtras.h" 23 #include "llvm/Analysis/ValueTracking.h" 24 #include "llvm/IR/DiagnosticInfo.h" 25 #include "llvm/IR/Dominators.h" 26 #include "llvm/IR/IRBuilder.h" 27 #include "llvm/IR/Instructions.h" 28 #include "llvm/InitializePasses.h" 29 #include "llvm/Support/DataExtractor.h" 30 #include "llvm/TargetParser/Triple.h" 31 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "printfToRuntime" 36 #define DWORD_ALIGN 4 37 38 namespace { 39 class AMDGPUPrintfRuntimeBinding final : public ModulePass { 40 41 public: 42 static char ID; 43 44 explicit AMDGPUPrintfRuntimeBinding(); 45 46 private: 47 bool runOnModule(Module &M) override; 48 }; 49 50 class AMDGPUPrintfRuntimeBindingImpl { 51 public: 52 AMDGPUPrintfRuntimeBindingImpl() {} 53 bool run(Module &M); 54 55 private: 56 void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers, 57 StringRef fmt, size_t num_ops) const; 58 59 bool lowerPrintfForGpu(Module &M); 60 61 const DataLayout *TD; 62 SmallVector<CallInst *, 32> Printfs; 63 }; 64 } // namespace 65 66 char AMDGPUPrintfRuntimeBinding::ID = 0; 67 68 INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding, 69 "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering", 70 false, false) 71 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 72 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 73 INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding", 74 "AMDGPU Printf lowering", false, false) 75 76 char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID; 77 78 namespace llvm { 79 ModulePass *createAMDGPUPrintfRuntimeBinding() { 80 return new AMDGPUPrintfRuntimeBinding(); 81 } 82 } // namespace llvm 83 84 AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() : ModulePass(ID) { 85 initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry()); 86 } 87 88 void AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers( 89 SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt, 90 size_t NumOps) const { 91 // not all format characters are collected. 92 // At this time the format characters of interest 93 // are %p and %s, which use to know if we 94 // are either storing a literal string or a 95 // pointer to the printf buffer. 96 static const char ConvSpecifiers[] = "cdieEfgGaosuxXp"; 97 size_t CurFmtSpecifierIdx = 0; 98 size_t PrevFmtSpecifierIdx = 0; 99 100 while ((CurFmtSpecifierIdx = Fmt.find_first_of( 101 ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) { 102 bool ArgDump = false; 103 StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx, 104 CurFmtSpecifierIdx - PrevFmtSpecifierIdx); 105 size_t pTag = CurFmt.find_last_of("%"); 106 if (pTag != StringRef::npos) { 107 ArgDump = true; 108 while (pTag && CurFmt[--pTag] == '%') { 109 ArgDump = !ArgDump; 110 } 111 } 112 113 if (ArgDump) 114 OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]); 115 116 PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx; 117 } 118 } 119 120 static bool shouldPrintAsStr(char Specifier, Type *OpType) { 121 return Specifier == 's' && isa<PointerType>(OpType); 122 } 123 124 constexpr StringLiteral NonLiteralStr("???"); 125 static_assert(NonLiteralStr.size() == 3); 126 127 static StringRef getAsConstantStr(Value *V) { 128 StringRef S; 129 if (!getConstantStringInfo(V, S)) 130 S = NonLiteralStr; 131 132 return S; 133 } 134 135 static void diagnoseInvalidFormatString(const CallBase *CI) { 136 DiagnosticInfoUnsupported UnsupportedFormatStr( 137 *CI->getParent()->getParent(), 138 "printf format string must be a trivially resolved constant string " 139 "global variable", 140 CI->getDebugLoc()); 141 CI->getContext().diagnose(UnsupportedFormatStr); 142 } 143 144 bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) { 145 LLVMContext &Ctx = M.getContext(); 146 IRBuilder<> Builder(Ctx); 147 Type *I32Ty = Type::getInt32Ty(Ctx); 148 149 // Instead of creating global variables, the printf format strings are 150 // extracted and passed as metadata. This avoids polluting llvm's symbol 151 // tables in this module. Metadata is going to be extracted by the backend 152 // passes and inserted into the OpenCL binary as appropriate. 153 NamedMDNode *metaD = M.getOrInsertNamedMetadata("llvm.printf.fmts"); 154 unsigned UniqID = metaD->getNumOperands(); 155 156 for (auto *CI : Printfs) { 157 unsigned NumOps = CI->arg_size(); 158 159 SmallString<16> OpConvSpecifiers; 160 Value *Op = CI->getArgOperand(0); 161 162 StringRef FormatStr; 163 if (!getConstantStringInfo(Op, FormatStr)) { 164 Value *Stripped = Op->stripPointerCasts(); 165 if (!isa<UndefValue>(Stripped) && !isa<ConstantPointerNull>(Stripped)) 166 diagnoseInvalidFormatString(CI); 167 continue; 168 } 169 170 // We need this call to ascertain that we are printing a string or a 171 // pointer. It takes out the specifiers and fills up the first arg. 172 getConversionSpecifiers(OpConvSpecifiers, FormatStr, NumOps - 1); 173 174 // Add metadata for the string 175 std::string AStreamHolder; 176 raw_string_ostream Sizes(AStreamHolder); 177 int Sum = DWORD_ALIGN; 178 Sizes << CI->arg_size() - 1; 179 Sizes << ':'; 180 for (unsigned ArgCount = 1; 181 ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.size(); 182 ArgCount++) { 183 Value *Arg = CI->getArgOperand(ArgCount); 184 Type *ArgType = Arg->getType(); 185 unsigned ArgSize = TD->getTypeAllocSize(ArgType); 186 // 187 // ArgSize by design should be a multiple of DWORD_ALIGN, 188 // expand the arguments that do not follow this rule. 189 // 190 if (ArgSize % DWORD_ALIGN != 0) { 191 Type *ResType = Type::getInt32Ty(Ctx); 192 if (auto *VecType = dyn_cast<VectorType>(ArgType)) 193 ResType = VectorType::get(ResType, VecType->getElementCount()); 194 Builder.SetInsertPoint(CI); 195 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 196 197 if (ArgType->isFloatingPointTy()) { 198 Arg = Builder.CreateBitCast( 199 Arg, 200 IntegerType::getIntNTy(Ctx, ArgType->getPrimitiveSizeInBits())); 201 } 202 203 if (OpConvSpecifiers[ArgCount - 1] == 'x' || 204 OpConvSpecifiers[ArgCount - 1] == 'X' || 205 OpConvSpecifiers[ArgCount - 1] == 'u' || 206 OpConvSpecifiers[ArgCount - 1] == 'o') 207 Arg = Builder.CreateZExt(Arg, ResType); 208 else 209 Arg = Builder.CreateSExt(Arg, ResType); 210 ArgType = Arg->getType(); 211 ArgSize = TD->getTypeAllocSize(ArgType); 212 CI->setOperand(ArgCount, Arg); 213 } 214 if (OpConvSpecifiers[ArgCount - 1] == 'f') { 215 ConstantFP *FpCons = dyn_cast<ConstantFP>(Arg); 216 if (FpCons) 217 ArgSize = 4; 218 else { 219 FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg); 220 if (FpExt && FpExt->getType()->isDoubleTy() && 221 FpExt->getOperand(0)->getType()->isFloatTy()) 222 ArgSize = 4; 223 } 224 } 225 if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) 226 ArgSize = alignTo(getAsConstantStr(Arg).size() + 1, 4); 227 228 LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize 229 << " for type: " << *ArgType << '\n'); 230 Sizes << ArgSize << ':'; 231 Sum += ArgSize; 232 } 233 LLVM_DEBUG(dbgs() << "Printf format string in source = " << FormatStr 234 << '\n'); 235 for (char C : FormatStr) { 236 // Rest of the C escape sequences (e.g. \') are handled correctly 237 // by the MDParser 238 switch (C) { 239 case '\a': 240 Sizes << "\\a"; 241 break; 242 case '\b': 243 Sizes << "\\b"; 244 break; 245 case '\f': 246 Sizes << "\\f"; 247 break; 248 case '\n': 249 Sizes << "\\n"; 250 break; 251 case '\r': 252 Sizes << "\\r"; 253 break; 254 case '\v': 255 Sizes << "\\v"; 256 break; 257 case ':': 258 // ':' cannot be scanned by Flex, as it is defined as a delimiter 259 // Replace it with it's octal representation \72 260 Sizes << "\\72"; 261 break; 262 default: 263 Sizes << C; 264 break; 265 } 266 } 267 268 // Insert the printf_alloc call 269 Builder.SetInsertPoint(CI); 270 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); 271 272 AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex, 273 Attribute::NoUnwind); 274 275 Type *SizetTy = Type::getInt32Ty(Ctx); 276 277 Type *Tys_alloc[1] = {SizetTy}; 278 Type *I8Ty = Type::getInt8Ty(Ctx); 279 Type *I8Ptr = PointerType::get(I8Ty, 1); 280 FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false); 281 FunctionCallee PrintfAllocFn = 282 M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr); 283 284 LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n'); 285 std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str(); 286 MDString *fmtStrArray = MDString::get(Ctx, fmtstr); 287 288 MDNode *myMD = MDNode::get(Ctx, fmtStrArray); 289 metaD->addOperand(myMD); 290 Value *sumC = ConstantInt::get(SizetTy, Sum, false); 291 SmallVector<Value *, 1> alloc_args; 292 alloc_args.push_back(sumC); 293 CallInst *pcall = 294 CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI); 295 296 // 297 // Insert code to split basicblock with a 298 // piece of hammock code. 299 // basicblock splits after buffer overflow check 300 // 301 ConstantPointerNull *zeroIntPtr = 302 ConstantPointerNull::get(PointerType::get(I8Ty, 1)); 303 auto *cmp = cast<ICmpInst>(Builder.CreateICmpNE(pcall, zeroIntPtr, "")); 304 if (!CI->use_empty()) { 305 Value *result = 306 Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res"); 307 CI->replaceAllUsesWith(result); 308 } 309 SplitBlock(CI->getParent(), cmp); 310 Instruction *Brnch = 311 SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false); 312 313 Builder.SetInsertPoint(Brnch); 314 315 // store unique printf id in the buffer 316 // 317 GetElementPtrInst *BufferIdx = GetElementPtrInst::Create( 318 I8Ty, pcall, ConstantInt::get(Ctx, APInt(32, 0)), "PrintBuffID", Brnch); 319 320 Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS); 321 Value *id_gep_cast = 322 new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch); 323 324 new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast, Brnch); 325 326 // 1st 4 bytes hold the printf_id 327 // the following GEP is the buffer pointer 328 BufferIdx = GetElementPtrInst::Create(I8Ty, pcall, 329 ConstantInt::get(Ctx, APInt(32, 4)), 330 "PrintBuffGep", Brnch); 331 332 Type *Int32Ty = Type::getInt32Ty(Ctx); 333 for (unsigned ArgCount = 1; 334 ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.size(); 335 ArgCount++) { 336 Value *Arg = CI->getArgOperand(ArgCount); 337 Type *ArgType = Arg->getType(); 338 SmallVector<Value *, 32> WhatToStore; 339 if (ArgType->isFPOrFPVectorTy() && !isa<VectorType>(ArgType)) { 340 if (OpConvSpecifiers[ArgCount - 1] == 'f') { 341 if (auto *FpCons = dyn_cast<ConstantFP>(Arg)) { 342 APFloat Val(FpCons->getValueAPF()); 343 bool Lost = false; 344 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, 345 &Lost); 346 Arg = ConstantFP::get(Ctx, Val); 347 } else if (auto *FpExt = dyn_cast<FPExtInst>(Arg)) { 348 if (FpExt->getType()->isDoubleTy() && 349 FpExt->getOperand(0)->getType()->isFloatTy()) { 350 Arg = FpExt->getOperand(0); 351 } 352 } 353 } 354 WhatToStore.push_back(Arg); 355 } else if (isa<PointerType>(ArgType)) { 356 if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) { 357 StringRef S = getAsConstantStr(Arg); 358 if (!S.empty()) { 359 const uint64_t ReadSize = 4; 360 361 DataExtractor Extractor(S, /*IsLittleEndian=*/true, 8); 362 DataExtractor::Cursor Offset(0); 363 while (Offset && Offset.tell() < S.size()) { 364 uint64_t ReadNow = std::min(ReadSize, S.size() - Offset.tell()); 365 uint64_t ReadBytes = 0; 366 switch (ReadNow) { 367 default: llvm_unreachable("min(4, X) > 4?"); 368 case 1: 369 ReadBytes = Extractor.getU8(Offset); 370 break; 371 case 2: 372 ReadBytes = Extractor.getU16(Offset); 373 break; 374 case 3: 375 ReadBytes = Extractor.getU24(Offset); 376 break; 377 case 4: 378 ReadBytes = Extractor.getU32(Offset); 379 break; 380 } 381 382 cantFail(Offset.takeError(), 383 "failed to read bytes from constant array"); 384 385 APInt IntVal(8 * ReadSize, ReadBytes); 386 387 // TODO: Should not bothering aligning up. 388 if (ReadNow < ReadSize) 389 IntVal = IntVal.zext(8 * ReadSize); 390 391 Type *IntTy = Type::getIntNTy(Ctx, IntVal.getBitWidth()); 392 WhatToStore.push_back(ConstantInt::get(IntTy, IntVal)); 393 } 394 } else { 395 // Empty string, give a hint to RT it is no NULL 396 Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false); 397 WhatToStore.push_back(ANumV); 398 } 399 } else { 400 WhatToStore.push_back(Arg); 401 } 402 } else { 403 WhatToStore.push_back(Arg); 404 } 405 for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) { 406 Value *TheBtCast = WhatToStore[I]; 407 unsigned ArgSize = TD->getTypeAllocSize(TheBtCast->getType()); 408 StoreInst *StBuff = new StoreInst(TheBtCast, BufferIdx, Brnch); 409 LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n" 410 << *StBuff << '\n'); 411 (void)StBuff; 412 if (I + 1 == E && ArgCount + 1 == CI->arg_size()) 413 break; 414 BufferIdx = GetElementPtrInst::Create( 415 I8Ty, BufferIdx, {ConstantInt::get(I32Ty, ArgSize)}, 416 "PrintBuffNextPtr", Brnch); 417 LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n" 418 << *BufferIdx << '\n'); 419 } 420 } 421 } 422 423 // erase the printf calls 424 for (auto *CI : Printfs) 425 CI->eraseFromParent(); 426 427 Printfs.clear(); 428 return true; 429 } 430 431 bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) { 432 Triple TT(M.getTargetTriple()); 433 if (TT.getArch() == Triple::r600) 434 return false; 435 436 auto PrintfFunction = M.getFunction("printf"); 437 if (!PrintfFunction || !PrintfFunction->isDeclaration()) 438 return false; 439 440 for (auto &U : PrintfFunction->uses()) { 441 if (auto *CI = dyn_cast<CallInst>(U.getUser())) { 442 if (CI->isCallee(&U)) 443 Printfs.push_back(CI); 444 } 445 } 446 447 if (Printfs.empty()) 448 return false; 449 450 TD = &M.getDataLayout(); 451 452 return lowerPrintfForGpu(M); 453 } 454 455 bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) { 456 return AMDGPUPrintfRuntimeBindingImpl().run(M); 457 } 458 459 PreservedAnalyses 460 AMDGPUPrintfRuntimeBindingPass::run(Module &M, ModuleAnalysisManager &AM) { 461 bool Changed = AMDGPUPrintfRuntimeBindingImpl().run(M); 462 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 463 } 464