1 //===- AMDGPUEmitPrintf.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Utility function to lower a printf call into a series of device 10 // library calls on the AMDGPU target. 11 // 12 // WARNING: This file knows about certain library functions. It recognizes them 13 // by name, and hardwires knowledge of their semantics. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h" 18 #include "llvm/ADT/SparseBitVector.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/Analysis/ValueTracking.h" 21 #include "llvm/Support/DataExtractor.h" 22 #include "llvm/Support/MD5.h" 23 #include "llvm/Support/MathExtras.h" 24 25 using namespace llvm; 26 27 #define DEBUG_TYPE "amdgpu-emit-printf" 28 29 static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) { 30 auto Int64Ty = Builder.getInt64Ty(); 31 auto Ty = Arg->getType(); 32 33 if (auto IntTy = dyn_cast<IntegerType>(Ty)) { 34 switch (IntTy->getBitWidth()) { 35 case 32: 36 return Builder.CreateZExt(Arg, Int64Ty); 37 case 64: 38 return Arg; 39 } 40 } 41 42 if (Ty->getTypeID() == Type::DoubleTyID) { 43 return Builder.CreateBitCast(Arg, Int64Ty); 44 } 45 46 if (isa<PointerType>(Ty)) { 47 return Builder.CreatePtrToInt(Arg, Int64Ty); 48 } 49 50 llvm_unreachable("unexpected type"); 51 } 52 53 static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) { 54 auto Int64Ty = Builder.getInt64Ty(); 55 auto M = Builder.GetInsertBlock()->getModule(); 56 auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty); 57 return Builder.CreateCall(Fn, Version); 58 } 59 60 static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs, 61 Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3, 62 Value *Arg4, Value *Arg5, Value *Arg6, 63 bool IsLast) { 64 auto Int64Ty = Builder.getInt64Ty(); 65 auto Int32Ty = Builder.getInt32Ty(); 66 auto M = Builder.GetInsertBlock()->getModule(); 67 auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty, 68 Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty, 69 Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty); 70 auto IsLastValue = Builder.getInt32(IsLast); 71 auto NumArgsValue = Builder.getInt32(NumArgs); 72 return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3, 73 Arg4, Arg5, Arg6, IsLastValue}); 74 } 75 76 static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, 77 bool IsLast) { 78 auto Arg0 = fitArgInto64Bits(Builder, Arg); 79 auto Zero = Builder.getInt64(0); 80 return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero, 81 Zero, IsLast); 82 } 83 84 // The device library does not provide strlen, so we build our own loop 85 // here. While we are at it, we also include the terminating null in the length. 86 static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) { 87 auto *Prev = Builder.GetInsertBlock(); 88 Module *M = Prev->getModule(); 89 90 auto CharZero = Builder.getInt8(0); 91 auto One = Builder.getInt64(1); 92 auto Zero = Builder.getInt64(0); 93 auto Int64Ty = Builder.getInt64Ty(); 94 95 // The length is either zero for a null pointer, or the computed value for an 96 // actual string. We need a join block for a phi that represents the final 97 // value. 98 // 99 // Strictly speaking, the zero does not matter since 100 // __ockl_printf_append_string_n ignores the length if the pointer is null. 101 BasicBlock *Join = nullptr; 102 if (Prev->getTerminator()) { 103 Join = Prev->splitBasicBlock(Builder.GetInsertPoint(), 104 "strlen.join"); 105 Prev->getTerminator()->eraseFromParent(); 106 } else { 107 Join = BasicBlock::Create(M->getContext(), "strlen.join", 108 Prev->getParent()); 109 } 110 BasicBlock *While = 111 BasicBlock::Create(M->getContext(), "strlen.while", 112 Prev->getParent(), Join); 113 BasicBlock *WhileDone = BasicBlock::Create( 114 M->getContext(), "strlen.while.done", 115 Prev->getParent(), Join); 116 117 // Emit an early return for when the pointer is null. 118 Builder.SetInsertPoint(Prev); 119 auto CmpNull = 120 Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType())); 121 BranchInst::Create(Join, While, CmpNull, Prev); 122 123 // Entry to the while loop. 124 Builder.SetInsertPoint(While); 125 126 auto PtrPhi = Builder.CreatePHI(Str->getType(), 2); 127 PtrPhi->addIncoming(Str, Prev); 128 auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One); 129 PtrPhi->addIncoming(PtrNext, While); 130 131 // Condition for the while loop. 132 auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi); 133 auto Cmp = Builder.CreateICmpEQ(Data, CharZero); 134 Builder.CreateCondBr(Cmp, WhileDone, While); 135 136 // Add one to the computed length. 137 Builder.SetInsertPoint(WhileDone, WhileDone->begin()); 138 auto Begin = Builder.CreatePtrToInt(Str, Int64Ty); 139 auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty); 140 auto Len = Builder.CreateSub(End, Begin); 141 Len = Builder.CreateAdd(Len, One); 142 143 // Final join. 144 BranchInst::Create(Join, WhileDone); 145 Builder.SetInsertPoint(Join, Join->begin()); 146 auto LenPhi = Builder.CreatePHI(Len->getType(), 2); 147 LenPhi->addIncoming(Len, WhileDone); 148 LenPhi->addIncoming(Zero, Prev); 149 150 return LenPhi; 151 } 152 153 static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str, 154 Value *Length, bool isLast) { 155 auto Int64Ty = Builder.getInt64Ty(); 156 auto CharPtrTy = Builder.getInt8PtrTy(); 157 auto Int32Ty = Builder.getInt32Ty(); 158 auto M = Builder.GetInsertBlock()->getModule(); 159 auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty, 160 Int64Ty, CharPtrTy, Int64Ty, Int32Ty); 161 auto IsLastInt32 = Builder.getInt32(isLast); 162 return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32}); 163 } 164 165 static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, 166 bool IsLast) { 167 Arg = Builder.CreateBitCast( 168 Arg, Builder.getInt8PtrTy(Arg->getType()->getPointerAddressSpace())); 169 auto Length = getStrlenWithNull(Builder, Arg); 170 return callAppendStringN(Builder, Desc, Arg, Length, IsLast); 171 } 172 173 static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, 174 bool SpecIsCString, bool IsLast) { 175 if (SpecIsCString && isa<PointerType>(Arg->getType())) { 176 return appendString(Builder, Desc, Arg, IsLast); 177 } 178 // If the format specifies a string but the argument is not, the frontend will 179 // have printed a warning. We just rely on undefined behaviour and send the 180 // argument anyway. 181 return appendArg(Builder, Desc, Arg, IsLast); 182 } 183 184 // Scan the format string to locate all specifiers, and mark the ones that 185 // specify a string, i.e, the "%s" specifier with optional '*' characters. 186 static void locateCStrings(SparseBitVector<8> &BV, StringRef Str) { 187 static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn"; 188 size_t SpecPos = 0; 189 // Skip the first argument, the format string. 190 unsigned ArgIdx = 1; 191 192 while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) { 193 if (Str[SpecPos + 1] == '%') { 194 SpecPos += 2; 195 continue; 196 } 197 auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos); 198 if (SpecEnd == StringRef::npos) 199 return; 200 auto Spec = Str.slice(SpecPos, SpecEnd + 1); 201 ArgIdx += Spec.count('*'); 202 if (Str[SpecEnd] == 's') { 203 BV.set(ArgIdx); 204 } 205 SpecPos = SpecEnd + 1; 206 ++ArgIdx; 207 } 208 } 209 210 // helper struct to package the string related data 211 struct StringData { 212 StringRef Str; 213 Value *RealSize = nullptr; 214 Value *AlignedSize = nullptr; 215 bool IsConst = true; 216 217 StringData(StringRef ST, Value *RS, Value *AS, bool IC) 218 : Str(ST), RealSize(RS), AlignedSize(AS), IsConst(IC) {} 219 }; 220 221 // Calculates frame size required for current printf expansion and allocates 222 // space on printf buffer. Printf frame includes following contents 223 // [ ControlDWord , format string/Hash , Arguments (each aligned to 8 byte) ] 224 static Value *callBufferedPrintfStart( 225 IRBuilder<> &Builder, ArrayRef<Value *> Args, Value *Fmt, 226 bool isConstFmtStr, SparseBitVector<8> &SpecIsCString, 227 SmallVectorImpl<StringData> &StringContents, Value *&ArgSize) { 228 Module *M = Builder.GetInsertBlock()->getModule(); 229 Value *NonConstStrLen = nullptr; 230 Value *LenWithNull = nullptr; 231 Value *LenWithNullAligned = nullptr; 232 Value *TempAdd = nullptr; 233 234 // First 4 bytes to be reserved for control dword 235 size_t BufSize = 4; 236 if (isConstFmtStr) 237 // First 8 bytes of MD5 hash 238 BufSize += 8; 239 else { 240 LenWithNull = getStrlenWithNull(Builder, Fmt); 241 242 // Align the computed length to next 8 byte boundary 243 TempAdd = Builder.CreateAdd(LenWithNull, 244 ConstantInt::get(LenWithNull->getType(), 7U)); 245 NonConstStrLen = Builder.CreateAnd( 246 TempAdd, ConstantInt::get(LenWithNull->getType(), ~7U)); 247 248 StringContents.push_back( 249 StringData(StringRef(), LenWithNull, NonConstStrLen, false)); 250 } 251 252 for (size_t i = 1; i < Args.size(); i++) { 253 if (SpecIsCString.test(i)) { 254 StringRef ArgStr; 255 if (getConstantStringInfo(Args[i], ArgStr)) { 256 auto alignedLen = alignTo(ArgStr.size() + 1, 8); 257 StringContents.push_back(StringData( 258 ArgStr, 259 /*RealSize*/ nullptr, /*AlignedSize*/ nullptr, /*IsConst*/ true)); 260 BufSize += alignedLen; 261 } else { 262 LenWithNull = getStrlenWithNull(Builder, Args[i]); 263 264 // Align the computed length to next 8 byte boundary 265 TempAdd = Builder.CreateAdd( 266 LenWithNull, ConstantInt::get(LenWithNull->getType(), 7U)); 267 LenWithNullAligned = Builder.CreateAnd( 268 TempAdd, ConstantInt::get(LenWithNull->getType(), ~7U)); 269 270 if (NonConstStrLen) { 271 auto Val = Builder.CreateAdd(LenWithNullAligned, NonConstStrLen, 272 "cumulativeAdd"); 273 NonConstStrLen = Val; 274 } else 275 NonConstStrLen = LenWithNullAligned; 276 277 StringContents.push_back( 278 StringData(StringRef(), LenWithNull, LenWithNullAligned, false)); 279 } 280 } else { 281 int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType()); 282 // We end up expanding non string arguments to 8 bytes 283 // (args smaller than 8 bytes) 284 BufSize += std::max(AllocSize, 8); 285 } 286 } 287 288 // calculate final size value to be passed to printf_alloc 289 Value *SizeToReserve = ConstantInt::get(Builder.getInt64Ty(), BufSize, false); 290 SmallVector<Value *, 1> Alloc_args; 291 if (NonConstStrLen) 292 SizeToReserve = Builder.CreateAdd(NonConstStrLen, SizeToReserve); 293 294 ArgSize = Builder.CreateTrunc(SizeToReserve, Builder.getInt32Ty()); 295 Alloc_args.push_back(ArgSize); 296 297 // call the printf_alloc function 298 AttributeList Attr = AttributeList::get( 299 Builder.getContext(), AttributeList::FunctionIndex, Attribute::NoUnwind); 300 301 Type *Tys_alloc[1] = {Builder.getInt32Ty()}; 302 Type *I8Ptr = 303 Builder.getInt8PtrTy(M->getDataLayout().getDefaultGlobalsAddressSpace()); 304 FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false); 305 auto PrintfAllocFn = 306 M->getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr); 307 308 return Builder.CreateCall(PrintfAllocFn, Alloc_args, "printf_alloc_fn"); 309 } 310 311 // Prepare constant string argument to push onto the buffer 312 static void processConstantStringArg(StringData *SD, IRBuilder<> &Builder, 313 SmallVectorImpl<Value *> &WhatToStore) { 314 std::string Str(SD->Str.str() + '\0'); 315 316 DataExtractor Extractor(Str, /*IsLittleEndian=*/true, 8); 317 DataExtractor::Cursor Offset(0); 318 while (Offset && Offset.tell() < Str.size()) { 319 const uint64_t ReadSize = 4; 320 uint64_t ReadNow = std::min(ReadSize, Str.size() - Offset.tell()); 321 uint64_t ReadBytes = 0; 322 switch (ReadNow) { 323 default: 324 llvm_unreachable("min(4, X) > 4?"); 325 case 1: 326 ReadBytes = Extractor.getU8(Offset); 327 break; 328 case 2: 329 ReadBytes = Extractor.getU16(Offset); 330 break; 331 case 3: 332 ReadBytes = Extractor.getU24(Offset); 333 break; 334 case 4: 335 ReadBytes = Extractor.getU32(Offset); 336 break; 337 } 338 cantFail(Offset.takeError(), "failed to read bytes from constant array"); 339 340 APInt IntVal(8 * ReadSize, ReadBytes); 341 342 // TODO: Should not bother aligning up. 343 if (ReadNow < ReadSize) 344 IntVal = IntVal.zext(8 * ReadSize); 345 346 Type *IntTy = Type::getIntNTy(Builder.getContext(), IntVal.getBitWidth()); 347 WhatToStore.push_back(ConstantInt::get(IntTy, IntVal)); 348 } 349 // Additional padding for 8 byte alignment 350 int Rem = (Str.size() % 8); 351 if (Rem > 0 && Rem <= 4) 352 WhatToStore.push_back(ConstantInt::get(Builder.getInt32Ty(), 0)); 353 } 354 355 static Value *processNonStringArg(Value *Arg, IRBuilder<> &Builder) { 356 const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); 357 auto Ty = Arg->getType(); 358 359 if (auto IntTy = dyn_cast<IntegerType>(Ty)) { 360 if (IntTy->getBitWidth() < 64) { 361 return Builder.CreateZExt(Arg, Builder.getInt64Ty()); 362 } 363 } 364 365 if (Ty->isFloatingPointTy()) { 366 if (DL.getTypeAllocSize(Ty) < 8) { 367 return Builder.CreateFPExt(Arg, Builder.getDoubleTy()); 368 } 369 } 370 371 return Arg; 372 } 373 374 static void 375 callBufferedPrintfArgPush(IRBuilder<> &Builder, ArrayRef<Value *> Args, 376 Value *PtrToStore, SparseBitVector<8> &SpecIsCString, 377 SmallVectorImpl<StringData> &StringContents, 378 bool IsConstFmtStr) { 379 Module *M = Builder.GetInsertBlock()->getModule(); 380 const DataLayout &DL = M->getDataLayout(); 381 auto StrIt = StringContents.begin(); 382 size_t i = IsConstFmtStr ? 1 : 0; 383 for (; i < Args.size(); i++) { 384 SmallVector<Value *, 32> WhatToStore; 385 if ((i == 0) || SpecIsCString.test(i)) { 386 if (StrIt->IsConst) { 387 processConstantStringArg(StrIt, Builder, WhatToStore); 388 StrIt++; 389 } else { 390 // This copies the contents of the string, however the next offset 391 // is at aligned length, the extra space that might be created due 392 // to alignment padding is not populated with any specific value 393 // here. This would be safe as long as runtime is sync with 394 // the offsets. 395 Builder.CreateMemCpy(PtrToStore, /*DstAlign*/ Align(1), Args[i], 396 /*SrcAlign*/ Args[i]->getPointerAlignment(DL), 397 StrIt->RealSize); 398 399 PtrToStore = 400 Builder.CreateInBoundsGEP(Builder.getInt8Ty(), PtrToStore, 401 {StrIt->AlignedSize}, "PrintBuffNextPtr"); 402 LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:" 403 << *PtrToStore << '\n'); 404 405 // done with current argument, move to next 406 StrIt++; 407 continue; 408 } 409 } else { 410 WhatToStore.push_back(processNonStringArg(Args[i], Builder)); 411 } 412 413 for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) { 414 Value *toStore = WhatToStore[I]; 415 416 StoreInst *StBuff = Builder.CreateStore(toStore, PtrToStore); 417 LLVM_DEBUG(dbgs() << "inserting store to printf buffer:" << *StBuff 418 << '\n'); 419 (void)StBuff; 420 PtrToStore = Builder.CreateConstInBoundsGEP1_32( 421 Builder.getInt8Ty(), PtrToStore, 422 M->getDataLayout().getTypeAllocSize(toStore->getType()), 423 "PrintBuffNextPtr"); 424 LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:" << *PtrToStore 425 << '\n'); 426 } 427 } 428 } 429 430 Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder, ArrayRef<Value *> Args, 431 bool IsBuffered) { 432 auto NumOps = Args.size(); 433 assert(NumOps >= 1); 434 435 auto Fmt = Args[0]; 436 SparseBitVector<8> SpecIsCString; 437 StringRef FmtStr; 438 439 if (getConstantStringInfo(Fmt, FmtStr)) 440 locateCStrings(SpecIsCString, FmtStr); 441 442 if (IsBuffered) { 443 SmallVector<StringData, 8> StringContents; 444 Module *M = Builder.GetInsertBlock()->getModule(); 445 LLVMContext &Ctx = Builder.getContext(); 446 auto Int8Ty = Builder.getInt8Ty(); 447 auto Int32Ty = Builder.getInt32Ty(); 448 bool IsConstFmtStr = !FmtStr.empty(); 449 450 Value *ArgSize = nullptr; 451 Value *Ptr = 452 callBufferedPrintfStart(Builder, Args, Fmt, IsConstFmtStr, 453 SpecIsCString, StringContents, ArgSize); 454 455 // The buffered version still follows OpenCL printf standards for 456 // printf return value, i.e 0 on success, -1 on failure. 457 ConstantPointerNull *zeroIntPtr = 458 ConstantPointerNull::get(cast<PointerType>(Ptr->getType())); 459 460 auto *Cmp = cast<ICmpInst>(Builder.CreateICmpNE(Ptr, zeroIntPtr, "")); 461 462 BasicBlock *End = BasicBlock::Create(Ctx, "end.block", 463 Builder.GetInsertBlock()->getParent()); 464 BasicBlock *ArgPush = BasicBlock::Create( 465 Ctx, "argpush.block", Builder.GetInsertBlock()->getParent()); 466 467 BranchInst::Create(ArgPush, End, Cmp, Builder.GetInsertBlock()); 468 Builder.SetInsertPoint(ArgPush); 469 470 // Create controlDWord and store as the first entry, format as follows 471 // Bit 0 (LSB) -> stream (1 if stderr, 0 if stdout, printf always outputs to 472 // stdout) Bit 1 -> constant format string (1 if constant) Bits 2-31 -> size 473 // of printf data frame 474 auto ConstantTwo = Builder.getInt32(2); 475 auto ControlDWord = Builder.CreateShl(ArgSize, ConstantTwo); 476 if (IsConstFmtStr) 477 ControlDWord = Builder.CreateOr(ControlDWord, ConstantTwo); 478 479 Builder.CreateStore(ControlDWord, Ptr); 480 481 Ptr = Builder.CreateConstInBoundsGEP1_32(Int8Ty, Ptr, 4); 482 483 // Create MD5 hash for costant format string, push low 64 bits of the 484 // same onto buffer and metadata. 485 NamedMDNode *metaD = M->getOrInsertNamedMetadata("llvm.printf.fmts"); 486 if (IsConstFmtStr) { 487 MD5 Hasher; 488 MD5::MD5Result Hash; 489 Hasher.update(FmtStr); 490 Hasher.final(Hash); 491 492 // Try sticking to llvm.printf.fmts format, although we are not going to 493 // use the ID and argument size fields while printing, 494 std::string MetadataStr = 495 "0:0:" + llvm::utohexstr(Hash.low(), /*LowerCase=*/true) + "," + 496 FmtStr.str(); 497 MDString *fmtStrArray = MDString::get(Ctx, MetadataStr); 498 MDNode *myMD = MDNode::get(Ctx, fmtStrArray); 499 metaD->addOperand(myMD); 500 501 Builder.CreateStore(Builder.getInt64(Hash.low()), Ptr); 502 Ptr = Builder.CreateConstInBoundsGEP1_32(Int8Ty, Ptr, 8); 503 } else { 504 // Include a dummy metadata instance in case of only non constant 505 // format string usage, This might be an absurd usecase but needs to 506 // be done for completeness 507 if (metaD->getNumOperands() == 0) { 508 MDString *fmtStrArray = 509 MDString::get(Ctx, "0:0:ffffffff,\"Non const format string\""); 510 MDNode *myMD = MDNode::get(Ctx, fmtStrArray); 511 metaD->addOperand(myMD); 512 } 513 } 514 515 // Push The printf arguments onto buffer 516 callBufferedPrintfArgPush(Builder, Args, Ptr, SpecIsCString, StringContents, 517 IsConstFmtStr); 518 519 // End block, returns -1 on failure 520 BranchInst::Create(End, ArgPush); 521 Builder.SetInsertPoint(End); 522 return Builder.CreateSExt(Builder.CreateNot(Cmp), Int32Ty, "printf_result"); 523 } 524 525 auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0)); 526 Desc = appendString(Builder, Desc, Fmt, NumOps == 1); 527 528 // FIXME: This invokes hostcall once for each argument. We can pack up to 529 // seven scalar printf arguments in a single hostcall. See the signature of 530 // callAppendArgs(). 531 for (unsigned int i = 1; i != NumOps; ++i) { 532 bool IsLast = i == NumOps - 1; 533 bool IsCString = SpecIsCString.test(i); 534 Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast); 535 } 536 537 return Builder.CreateTrunc(Desc, Builder.getInt32Ty()); 538 } 539