1 //===- AMDGPUEmitPrintf.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Utility function to lower a printf call into a series of device 10 // library calls on the AMDGPU target. 11 // 12 // WARNING: This file knows about certain library functions. It recognizes them 13 // by name, and hardwires knowledge of their semantics. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h" 18 #include "llvm/ADT/SparseBitVector.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 21 using namespace llvm; 22 23 #define DEBUG_TYPE "amdgpu-emit-printf" 24 25 static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) { 26 auto Int64Ty = Builder.getInt64Ty(); 27 auto Ty = Arg->getType(); 28 29 if (auto IntTy = dyn_cast<IntegerType>(Ty)) { 30 switch (IntTy->getBitWidth()) { 31 case 32: 32 return Builder.CreateZExt(Arg, Int64Ty); 33 case 64: 34 return Arg; 35 } 36 } 37 38 if (Ty->getTypeID() == Type::DoubleTyID) { 39 return Builder.CreateBitCast(Arg, Int64Ty); 40 } 41 42 if (isa<PointerType>(Ty)) { 43 return Builder.CreatePtrToInt(Arg, Int64Ty); 44 } 45 46 llvm_unreachable("unexpected type"); 47 } 48 49 static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) { 50 auto Int64Ty = Builder.getInt64Ty(); 51 auto M = Builder.GetInsertBlock()->getModule(); 52 auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty); 53 return Builder.CreateCall(Fn, Version); 54 } 55 56 static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs, 57 Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3, 58 Value *Arg4, Value *Arg5, Value *Arg6, 59 bool IsLast) { 60 auto Int64Ty = Builder.getInt64Ty(); 61 auto Int32Ty = Builder.getInt32Ty(); 62 auto M = Builder.GetInsertBlock()->getModule(); 63 auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty, 64 Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty, 65 Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty); 66 auto IsLastValue = Builder.getInt32(IsLast); 67 auto NumArgsValue = Builder.getInt32(NumArgs); 68 return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3, 69 Arg4, Arg5, Arg6, IsLastValue}); 70 } 71 72 static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, 73 bool IsLast) { 74 auto Arg0 = fitArgInto64Bits(Builder, Arg); 75 auto Zero = Builder.getInt64(0); 76 return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero, 77 Zero, IsLast); 78 } 79 80 // The device library does not provide strlen, so we build our own loop 81 // here. While we are at it, we also include the terminating null in the length. 82 static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) { 83 auto *Prev = Builder.GetInsertBlock(); 84 Module *M = Prev->getModule(); 85 86 auto CharZero = Builder.getInt8(0); 87 auto One = Builder.getInt64(1); 88 auto Zero = Builder.getInt64(0); 89 auto Int64Ty = Builder.getInt64Ty(); 90 91 // The length is either zero for a null pointer, or the computed value for an 92 // actual string. We need a join block for a phi that represents the final 93 // value. 94 // 95 // Strictly speaking, the zero does not matter since 96 // __ockl_printf_append_string_n ignores the length if the pointer is null. 97 BasicBlock *Join = nullptr; 98 if (Prev->getTerminator()) { 99 Join = Prev->splitBasicBlock(Builder.GetInsertPoint(), 100 "strlen.join"); 101 Prev->getTerminator()->eraseFromParent(); 102 } else { 103 Join = BasicBlock::Create(M->getContext(), "strlen.join", 104 Prev->getParent()); 105 } 106 BasicBlock *While = 107 BasicBlock::Create(M->getContext(), "strlen.while", 108 Prev->getParent(), Join); 109 BasicBlock *WhileDone = BasicBlock::Create( 110 M->getContext(), "strlen.while.done", 111 Prev->getParent(), Join); 112 113 // Emit an early return for when the pointer is null. 114 Builder.SetInsertPoint(Prev); 115 auto CmpNull = 116 Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType())); 117 BranchInst::Create(Join, While, CmpNull, Prev); 118 119 // Entry to the while loop. 120 Builder.SetInsertPoint(While); 121 122 auto PtrPhi = Builder.CreatePHI(Str->getType(), 2); 123 PtrPhi->addIncoming(Str, Prev); 124 auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One); 125 PtrPhi->addIncoming(PtrNext, While); 126 127 // Condition for the while loop. 128 auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi); 129 auto Cmp = Builder.CreateICmpEQ(Data, CharZero); 130 Builder.CreateCondBr(Cmp, WhileDone, While); 131 132 // Add one to the computed length. 133 Builder.SetInsertPoint(WhileDone, WhileDone->begin()); 134 auto Begin = Builder.CreatePtrToInt(Str, Int64Ty); 135 auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty); 136 auto Len = Builder.CreateSub(End, Begin); 137 Len = Builder.CreateAdd(Len, One); 138 139 // Final join. 140 BranchInst::Create(Join, WhileDone); 141 Builder.SetInsertPoint(Join, Join->begin()); 142 auto LenPhi = Builder.CreatePHI(Len->getType(), 2); 143 LenPhi->addIncoming(Len, WhileDone); 144 LenPhi->addIncoming(Zero, Prev); 145 146 return LenPhi; 147 } 148 149 static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str, 150 Value *Length, bool isLast) { 151 auto Int64Ty = Builder.getInt64Ty(); 152 auto CharPtrTy = Builder.getInt8PtrTy(); 153 auto Int32Ty = Builder.getInt32Ty(); 154 auto M = Builder.GetInsertBlock()->getModule(); 155 auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty, 156 Int64Ty, CharPtrTy, Int64Ty, Int32Ty); 157 auto IsLastInt32 = Builder.getInt32(isLast); 158 return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32}); 159 } 160 161 static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, 162 bool IsLast) { 163 Arg = Builder.CreateBitCast( 164 Arg, Builder.getInt8PtrTy(Arg->getType()->getPointerAddressSpace())); 165 auto Length = getStrlenWithNull(Builder, Arg); 166 return callAppendStringN(Builder, Desc, Arg, Length, IsLast); 167 } 168 169 static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, 170 bool SpecIsCString, bool IsLast) { 171 if (SpecIsCString && isa<PointerType>(Arg->getType())) { 172 return appendString(Builder, Desc, Arg, IsLast); 173 } 174 // If the format specifies a string but the argument is not, the frontend will 175 // have printed a warning. We just rely on undefined behaviour and send the 176 // argument anyway. 177 return appendArg(Builder, Desc, Arg, IsLast); 178 } 179 180 // Scan the format string to locate all specifiers, and mark the ones that 181 // specify a string, i.e, the "%s" specifier with optional '*' characters. 182 static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) { 183 StringRef Str; 184 if (!getConstantStringInfo(Fmt, Str) || Str.empty()) 185 return; 186 187 static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn"; 188 size_t SpecPos = 0; 189 // Skip the first argument, the format string. 190 unsigned ArgIdx = 1; 191 192 while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) { 193 if (Str[SpecPos + 1] == '%') { 194 SpecPos += 2; 195 continue; 196 } 197 auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos); 198 if (SpecEnd == StringRef::npos) 199 return; 200 auto Spec = Str.slice(SpecPos, SpecEnd + 1); 201 ArgIdx += Spec.count('*'); 202 if (Str[SpecEnd] == 's') { 203 BV.set(ArgIdx); 204 } 205 SpecPos = SpecEnd + 1; 206 ++ArgIdx; 207 } 208 } 209 210 Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder, 211 ArrayRef<Value *> Args) { 212 auto NumOps = Args.size(); 213 assert(NumOps >= 1); 214 215 auto Fmt = Args[0]; 216 SparseBitVector<8> SpecIsCString; 217 locateCStrings(SpecIsCString, Fmt); 218 219 auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0)); 220 Desc = appendString(Builder, Desc, Fmt, NumOps == 1); 221 222 // FIXME: This invokes hostcall once for each argument. We can pack up to 223 // seven scalar printf arguments in a single hostcall. See the signature of 224 // callAppendArgs(). 225 for (unsigned int i = 1; i != NumOps; ++i) { 226 bool IsLast = i == NumOps - 1; 227 bool IsCString = SpecIsCString.test(i); 228 Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast); 229 } 230 231 return Builder.CreateTrunc(Desc, Builder.getInt32Ty()); 232 } 233