xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===- AMDGPUEmitPrintf.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Utility function to lower a printf call into a series of device
10 // library calls on the AMDGPU target.
11 //
12 // WARNING: This file knows about certain library functions. It recognizes them
13 // by name, and hardwires knowledge of their semantics.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
18 #include "llvm/ADT/SparseBitVector.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/Analysis/ValueTracking.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/Support/DataExtractor.h"
23 #include "llvm/Support/MD5.h"
24 #include "llvm/Support/MathExtras.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "amdgpu-emit-printf"
29 
30 static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
31   auto Int64Ty = Builder.getInt64Ty();
32   auto Ty = Arg->getType();
33 
34   if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
35     switch (IntTy->getBitWidth()) {
36     case 32:
37       return Builder.CreateZExt(Arg, Int64Ty);
38     case 64:
39       return Arg;
40     }
41   }
42 
43   if (Ty->getTypeID() == Type::DoubleTyID) {
44     return Builder.CreateBitCast(Arg, Int64Ty);
45   }
46 
47   if (isa<PointerType>(Ty)) {
48     return Builder.CreatePtrToInt(Arg, Int64Ty);
49   }
50 
51   llvm_unreachable("unexpected type");
52 }
53 
54 static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
55   auto Int64Ty = Builder.getInt64Ty();
56   auto M = Builder.GetInsertBlock()->getModule();
57   auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
58   return Builder.CreateCall(Fn, Version);
59 }
60 
61 static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs,
62                              Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
63                              Value *Arg4, Value *Arg5, Value *Arg6,
64                              bool IsLast) {
65   auto Int64Ty = Builder.getInt64Ty();
66   auto Int32Ty = Builder.getInt32Ty();
67   auto M = Builder.GetInsertBlock()->getModule();
68   auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
69                                    Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
70                                    Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
71   auto IsLastValue = Builder.getInt32(IsLast);
72   auto NumArgsValue = Builder.getInt32(NumArgs);
73   return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
74                                  Arg4, Arg5, Arg6, IsLastValue});
75 }
76 
77 static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
78                         bool IsLast) {
79   auto Arg0 = fitArgInto64Bits(Builder, Arg);
80   auto Zero = Builder.getInt64(0);
81   return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
82                         Zero, IsLast);
83 }
84 
85 // The device library does not provide strlen, so we build our own loop
86 // here. While we are at it, we also include the terminating null in the length.
87 static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) {
88   auto *Prev = Builder.GetInsertBlock();
89   Module *M = Prev->getModule();
90 
91   auto CharZero = Builder.getInt8(0);
92   auto One = Builder.getInt64(1);
93   auto Zero = Builder.getInt64(0);
94   auto Int64Ty = Builder.getInt64Ty();
95 
96   // The length is either zero for a null pointer, or the computed value for an
97   // actual string. We need a join block for a phi that represents the final
98   // value.
99   //
100   //  Strictly speaking, the zero does not matter since
101   // __ockl_printf_append_string_n ignores the length if the pointer is null.
102   BasicBlock *Join = nullptr;
103   if (Prev->getTerminator()) {
104     Join = Prev->splitBasicBlock(Builder.GetInsertPoint(),
105                                  "strlen.join");
106     Prev->getTerminator()->eraseFromParent();
107   } else {
108     Join = BasicBlock::Create(M->getContext(), "strlen.join",
109                               Prev->getParent());
110   }
111   BasicBlock *While =
112       BasicBlock::Create(M->getContext(), "strlen.while",
113                          Prev->getParent(), Join);
114   BasicBlock *WhileDone = BasicBlock::Create(
115       M->getContext(), "strlen.while.done",
116       Prev->getParent(), Join);
117 
118   // Emit an early return for when the pointer is null.
119   Builder.SetInsertPoint(Prev);
120   auto CmpNull =
121       Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType()));
122   BranchInst::Create(Join, While, CmpNull, Prev);
123 
124   // Entry to the while loop.
125   Builder.SetInsertPoint(While);
126 
127   auto PtrPhi = Builder.CreatePHI(Str->getType(), 2);
128   PtrPhi->addIncoming(Str, Prev);
129   auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One);
130   PtrPhi->addIncoming(PtrNext, While);
131 
132   // Condition for the while loop.
133   auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi);
134   auto Cmp = Builder.CreateICmpEQ(Data, CharZero);
135   Builder.CreateCondBr(Cmp, WhileDone, While);
136 
137   // Add one to the computed length.
138   Builder.SetInsertPoint(WhileDone, WhileDone->begin());
139   auto Begin = Builder.CreatePtrToInt(Str, Int64Ty);
140   auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty);
141   auto Len = Builder.CreateSub(End, Begin);
142   Len = Builder.CreateAdd(Len, One);
143 
144   // Final join.
145   BranchInst::Create(Join, WhileDone);
146   Builder.SetInsertPoint(Join, Join->begin());
147   auto LenPhi = Builder.CreatePHI(Len->getType(), 2);
148   LenPhi->addIncoming(Len, WhileDone);
149   LenPhi->addIncoming(Zero, Prev);
150 
151   return LenPhi;
152 }
153 
154 static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
155                                 Value *Length, bool isLast) {
156   auto Int64Ty = Builder.getInt64Ty();
157   auto IsLastInt32 = Builder.getInt32(isLast);
158   auto M = Builder.GetInsertBlock()->getModule();
159   auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
160                                    Desc->getType(), Str->getType(),
161                                    Length->getType(), IsLastInt32->getType());
162   return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
163 }
164 
165 static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
166                            bool IsLast) {
167   auto Length = getStrlenWithNull(Builder, Arg);
168   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
169 }
170 
171 static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
172                          bool SpecIsCString, bool IsLast) {
173   if (SpecIsCString && isa<PointerType>(Arg->getType())) {
174     return appendString(Builder, Desc, Arg, IsLast);
175   }
176   // If the format specifies a string but the argument is not, the frontend will
177   // have printed a warning. We just rely on undefined behaviour and send the
178   // argument anyway.
179   return appendArg(Builder, Desc, Arg, IsLast);
180 }
181 
182 // Scan the format string to locate all specifiers, and mark the ones that
183 // specify a string, i.e, the "%s" specifier with optional '*' characters.
184 static void locateCStrings(SparseBitVector<8> &BV, StringRef Str) {
185   static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn";
186   size_t SpecPos = 0;
187   // Skip the first argument, the format string.
188   unsigned ArgIdx = 1;
189 
190   while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) {
191     if (Str[SpecPos + 1] == '%') {
192       SpecPos += 2;
193       continue;
194     }
195     auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos);
196     if (SpecEnd == StringRef::npos)
197       return;
198     auto Spec = Str.slice(SpecPos, SpecEnd + 1);
199     ArgIdx += Spec.count('*');
200     if (Str[SpecEnd] == 's') {
201       BV.set(ArgIdx);
202     }
203     SpecPos = SpecEnd + 1;
204     ++ArgIdx;
205   }
206 }
207 
208 // helper struct to package the string related data
209 struct StringData {
210   StringRef Str;
211   Value *RealSize = nullptr;
212   Value *AlignedSize = nullptr;
213   bool IsConst = true;
214 
215   StringData(StringRef ST, Value *RS, Value *AS, bool IC)
216       : Str(ST), RealSize(RS), AlignedSize(AS), IsConst(IC) {}
217 };
218 
219 // Calculates frame size required for current printf expansion and allocates
220 // space on printf buffer. Printf frame includes following contents
221 // [ ControlDWord , format string/Hash , Arguments (each aligned to 8 byte) ]
222 static Value *callBufferedPrintfStart(
223     IRBuilder<> &Builder, ArrayRef<Value *> Args, Value *Fmt,
224     bool isConstFmtStr, SparseBitVector<8> &SpecIsCString,
225     SmallVectorImpl<StringData> &StringContents, Value *&ArgSize) {
226   Module *M = Builder.GetInsertBlock()->getModule();
227   Value *NonConstStrLen = nullptr;
228   Value *LenWithNull = nullptr;
229   Value *LenWithNullAligned = nullptr;
230   Value *TempAdd = nullptr;
231 
232   // First 4 bytes to be reserved for control dword
233   size_t BufSize = 4;
234   if (isConstFmtStr)
235     // First 8 bytes of MD5 hash
236     BufSize += 8;
237   else {
238     LenWithNull = getStrlenWithNull(Builder, Fmt);
239 
240     // Align the computed length to next 8 byte boundary
241     TempAdd = Builder.CreateAdd(LenWithNull,
242                                 ConstantInt::get(LenWithNull->getType(), 7U));
243     NonConstStrLen = Builder.CreateAnd(
244         TempAdd, ConstantInt::get(LenWithNull->getType(), ~7U));
245 
246     StringContents.push_back(
247         StringData(StringRef(), LenWithNull, NonConstStrLen, false));
248   }
249 
250   for (size_t i = 1; i < Args.size(); i++) {
251     if (SpecIsCString.test(i)) {
252       StringRef ArgStr;
253       if (getConstantStringInfo(Args[i], ArgStr)) {
254         auto alignedLen = alignTo(ArgStr.size() + 1, 8);
255         StringContents.push_back(StringData(
256             ArgStr,
257             /*RealSize*/ nullptr, /*AlignedSize*/ nullptr, /*IsConst*/ true));
258         BufSize += alignedLen;
259       } else {
260         LenWithNull = getStrlenWithNull(Builder, Args[i]);
261 
262         // Align the computed length to next 8 byte boundary
263         TempAdd = Builder.CreateAdd(
264             LenWithNull, ConstantInt::get(LenWithNull->getType(), 7U));
265         LenWithNullAligned = Builder.CreateAnd(
266             TempAdd, ConstantInt::get(LenWithNull->getType(), ~7U));
267 
268         if (NonConstStrLen) {
269           auto Val = Builder.CreateAdd(LenWithNullAligned, NonConstStrLen,
270                                        "cumulativeAdd");
271           NonConstStrLen = Val;
272         } else
273           NonConstStrLen = LenWithNullAligned;
274 
275         StringContents.push_back(
276             StringData(StringRef(), LenWithNull, LenWithNullAligned, false));
277       }
278     } else {
279       int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType());
280       // We end up expanding non string arguments to 8 bytes
281       // (args smaller than 8 bytes)
282       BufSize += std::max(AllocSize, 8);
283     }
284   }
285 
286   // calculate final size value to be passed to printf_alloc
287   Value *SizeToReserve = ConstantInt::get(Builder.getInt64Ty(), BufSize, false);
288   SmallVector<Value *, 1> Alloc_args;
289   if (NonConstStrLen)
290     SizeToReserve = Builder.CreateAdd(NonConstStrLen, SizeToReserve);
291 
292   ArgSize = Builder.CreateTrunc(SizeToReserve, Builder.getInt32Ty());
293   Alloc_args.push_back(ArgSize);
294 
295   // call the printf_alloc function
296   AttributeList Attr = AttributeList::get(
297       Builder.getContext(), AttributeList::FunctionIndex, Attribute::NoUnwind);
298 
299   Type *Tys_alloc[1] = {Builder.getInt32Ty()};
300   Type *PtrTy =
301       Builder.getPtrTy(M->getDataLayout().getDefaultGlobalsAddressSpace());
302   FunctionType *FTy_alloc = FunctionType::get(PtrTy, Tys_alloc, false);
303   auto PrintfAllocFn =
304       M->getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
305 
306   return Builder.CreateCall(PrintfAllocFn, Alloc_args, "printf_alloc_fn");
307 }
308 
309 // Prepare constant string argument to push onto the buffer
310 static void processConstantStringArg(StringData *SD, IRBuilder<> &Builder,
311                                      SmallVectorImpl<Value *> &WhatToStore) {
312   std::string Str(SD->Str.str() + '\0');
313 
314   DataExtractor Extractor(Str, /*IsLittleEndian=*/true, 8);
315   DataExtractor::Cursor Offset(0);
316   while (Offset && Offset.tell() < Str.size()) {
317     const uint64_t ReadSize = 4;
318     uint64_t ReadNow = std::min(ReadSize, Str.size() - Offset.tell());
319     uint64_t ReadBytes = 0;
320     switch (ReadNow) {
321     default:
322       llvm_unreachable("min(4, X) > 4?");
323     case 1:
324       ReadBytes = Extractor.getU8(Offset);
325       break;
326     case 2:
327       ReadBytes = Extractor.getU16(Offset);
328       break;
329     case 3:
330       ReadBytes = Extractor.getU24(Offset);
331       break;
332     case 4:
333       ReadBytes = Extractor.getU32(Offset);
334       break;
335     }
336     cantFail(Offset.takeError(), "failed to read bytes from constant array");
337 
338     APInt IntVal(8 * ReadSize, ReadBytes);
339 
340     // TODO: Should not bother aligning up.
341     if (ReadNow < ReadSize)
342       IntVal = IntVal.zext(8 * ReadSize);
343 
344     Type *IntTy = Type::getIntNTy(Builder.getContext(), IntVal.getBitWidth());
345     WhatToStore.push_back(ConstantInt::get(IntTy, IntVal));
346   }
347   // Additional padding for 8 byte alignment
348   int Rem = (Str.size() % 8);
349   if (Rem > 0 && Rem <= 4)
350     WhatToStore.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
351 }
352 
353 static Value *processNonStringArg(Value *Arg, IRBuilder<> &Builder) {
354   const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
355   auto Ty = Arg->getType();
356 
357   if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
358     if (IntTy->getBitWidth() < 64) {
359       return Builder.CreateZExt(Arg, Builder.getInt64Ty());
360     }
361   }
362 
363   if (Ty->isFloatingPointTy()) {
364     if (DL.getTypeAllocSize(Ty) < 8) {
365       return Builder.CreateFPExt(Arg, Builder.getDoubleTy());
366     }
367   }
368 
369   return Arg;
370 }
371 
372 static void
373 callBufferedPrintfArgPush(IRBuilder<> &Builder, ArrayRef<Value *> Args,
374                           Value *PtrToStore, SparseBitVector<8> &SpecIsCString,
375                           SmallVectorImpl<StringData> &StringContents,
376                           bool IsConstFmtStr) {
377   Module *M = Builder.GetInsertBlock()->getModule();
378   const DataLayout &DL = M->getDataLayout();
379   auto StrIt = StringContents.begin();
380   size_t i = IsConstFmtStr ? 1 : 0;
381   for (; i < Args.size(); i++) {
382     SmallVector<Value *, 32> WhatToStore;
383     if ((i == 0) || SpecIsCString.test(i)) {
384       if (StrIt->IsConst) {
385         processConstantStringArg(StrIt, Builder, WhatToStore);
386         StrIt++;
387       } else {
388         // This copies the contents of the string, however the next offset
389         // is at aligned length, the extra space that might be created due
390         // to alignment padding is not populated with any specific value
391         // here. This would be safe as long as runtime is sync with
392         // the offsets.
393         Builder.CreateMemCpy(PtrToStore, /*DstAlign*/ Align(1), Args[i],
394                              /*SrcAlign*/ Args[i]->getPointerAlignment(DL),
395                              StrIt->RealSize);
396 
397         PtrToStore =
398             Builder.CreateInBoundsGEP(Builder.getInt8Ty(), PtrToStore,
399                                       {StrIt->AlignedSize}, "PrintBuffNextPtr");
400         LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:"
401                           << *PtrToStore << '\n');
402 
403         // done with current argument, move to next
404         StrIt++;
405         continue;
406       }
407     } else {
408       WhatToStore.push_back(processNonStringArg(Args[i], Builder));
409     }
410 
411     for (Value *toStore : WhatToStore) {
412       StoreInst *StBuff = Builder.CreateStore(toStore, PtrToStore);
413       LLVM_DEBUG(dbgs() << "inserting store to printf buffer:" << *StBuff
414                         << '\n');
415       (void)StBuff;
416       PtrToStore = Builder.CreateConstInBoundsGEP1_32(
417           Builder.getInt8Ty(), PtrToStore,
418           M->getDataLayout().getTypeAllocSize(toStore->getType()),
419           "PrintBuffNextPtr");
420       LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:" << *PtrToStore
421                         << '\n');
422     }
423   }
424 }
425 
426 Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder, ArrayRef<Value *> Args,
427                                   bool IsBuffered) {
428   auto NumOps = Args.size();
429   assert(NumOps >= 1);
430 
431   auto Fmt = Args[0];
432   SparseBitVector<8> SpecIsCString;
433   StringRef FmtStr;
434 
435   if (getConstantStringInfo(Fmt, FmtStr))
436     locateCStrings(SpecIsCString, FmtStr);
437 
438   if (IsBuffered) {
439     SmallVector<StringData, 8> StringContents;
440     Module *M = Builder.GetInsertBlock()->getModule();
441     LLVMContext &Ctx = Builder.getContext();
442     auto Int8Ty = Builder.getInt8Ty();
443     auto Int32Ty = Builder.getInt32Ty();
444     bool IsConstFmtStr = !FmtStr.empty();
445 
446     Value *ArgSize = nullptr;
447     Value *Ptr =
448         callBufferedPrintfStart(Builder, Args, Fmt, IsConstFmtStr,
449                                 SpecIsCString, StringContents, ArgSize);
450 
451     // The buffered version still follows OpenCL printf standards for
452     // printf return value, i.e 0 on success, -1 on failure.
453     ConstantPointerNull *zeroIntPtr =
454         ConstantPointerNull::get(cast<PointerType>(Ptr->getType()));
455 
456     auto *Cmp = cast<ICmpInst>(Builder.CreateICmpNE(Ptr, zeroIntPtr, ""));
457 
458     BasicBlock *End = BasicBlock::Create(Ctx, "end.block",
459                                          Builder.GetInsertBlock()->getParent());
460     BasicBlock *ArgPush = BasicBlock::Create(
461         Ctx, "argpush.block", Builder.GetInsertBlock()->getParent());
462 
463     BranchInst::Create(ArgPush, End, Cmp, Builder.GetInsertBlock());
464     Builder.SetInsertPoint(ArgPush);
465 
466     // Create controlDWord and store as the first entry, format as follows
467     // Bit 0 (LSB) -> stream (1 if stderr, 0 if stdout, printf always outputs to
468     // stdout) Bit 1 -> constant format string (1 if constant) Bits 2-31 -> size
469     // of printf data frame
470     auto ConstantTwo = Builder.getInt32(2);
471     auto ControlDWord = Builder.CreateShl(ArgSize, ConstantTwo);
472     if (IsConstFmtStr)
473       ControlDWord = Builder.CreateOr(ControlDWord, ConstantTwo);
474 
475     Builder.CreateStore(ControlDWord, Ptr);
476 
477     Ptr = Builder.CreateConstInBoundsGEP1_32(Int8Ty, Ptr, 4);
478 
479     // Create MD5 hash for costant format string, push low 64 bits of the
480     // same onto buffer and metadata.
481     NamedMDNode *metaD = M->getOrInsertNamedMetadata("llvm.printf.fmts");
482     if (IsConstFmtStr) {
483       MD5 Hasher;
484       MD5::MD5Result Hash;
485       Hasher.update(FmtStr);
486       Hasher.final(Hash);
487 
488       // Try sticking to llvm.printf.fmts format, although we are not going to
489       // use the ID and argument size fields while printing,
490       std::string MetadataStr =
491           "0:0:" + llvm::utohexstr(Hash.low(), /*LowerCase=*/true) + "," +
492           FmtStr.str();
493       MDString *fmtStrArray = MDString::get(Ctx, MetadataStr);
494       MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
495       metaD->addOperand(myMD);
496 
497       Builder.CreateStore(Builder.getInt64(Hash.low()), Ptr);
498       Ptr = Builder.CreateConstInBoundsGEP1_32(Int8Ty, Ptr, 8);
499     } else {
500       // Include a dummy metadata instance in case of only non constant
501       // format string usage, This might be an absurd usecase but needs to
502       // be done for completeness
503       if (metaD->getNumOperands() == 0) {
504         MDString *fmtStrArray =
505             MDString::get(Ctx, "0:0:ffffffff,\"Non const format string\"");
506         MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
507         metaD->addOperand(myMD);
508       }
509     }
510 
511     // Push The printf arguments onto buffer
512     callBufferedPrintfArgPush(Builder, Args, Ptr, SpecIsCString, StringContents,
513                               IsConstFmtStr);
514 
515     // End block, returns -1 on failure
516     BranchInst::Create(End, ArgPush);
517     Builder.SetInsertPoint(End);
518     return Builder.CreateSExt(Builder.CreateNot(Cmp), Int32Ty, "printf_result");
519   }
520 
521   auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0));
522   Desc = appendString(Builder, Desc, Fmt, NumOps == 1);
523 
524   // FIXME: This invokes hostcall once for each argument. We can pack up to
525   // seven scalar printf arguments in a single hostcall. See the signature of
526   // callAppendArgs().
527   for (unsigned int i = 1; i != NumOps; ++i) {
528     bool IsLast = i == NumOps - 1;
529     bool IsCString = SpecIsCString.test(i);
530     Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast);
531   }
532 
533   return Builder.CreateTrunc(Desc, Builder.getInt32Ty());
534 }
535