10b57cec5SDimitry Andric //===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the transformation that promotes indirect calls to
100b57cec5SDimitry Andric // conditional direct calls when the indirect-call value profile metadata is
110b57cec5SDimitry Andric // available.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric
150b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
16*0fca6ea1SDimitry Andric #include "llvm/ADT/DenseMap.h"
170b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
180b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
190b57cec5SDimitry Andric #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
200b57cec5SDimitry Andric #include "llvm/Analysis/IndirectCallVisitor.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
23*0fca6ea1SDimitry Andric #include "llvm/Analysis/TypeMetadataUtils.h"
240b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
25*0fca6ea1SDimitry Andric #include "llvm/IR/Dominators.h"
260b57cec5SDimitry Andric #include "llvm/IR/Function.h"
270b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h"
280b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
290b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h"
300b57cec5SDimitry Andric #include "llvm/IR/MDBuilder.h"
310b57cec5SDimitry Andric #include "llvm/IR/PassManager.h"
325f757f3fSDimitry Andric #include "llvm/IR/ProfDataUtils.h"
330b57cec5SDimitry Andric #include "llvm/IR/Value.h"
340b57cec5SDimitry Andric #include "llvm/ProfileData/InstrProf.h"
350b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
360b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
370b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
380b57cec5SDimitry Andric #include "llvm/Support/Error.h"
390b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
400b57cec5SDimitry Andric #include "llvm/Transforms/Instrumentation.h"
410b57cec5SDimitry Andric #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
420b57cec5SDimitry Andric #include "llvm/Transforms/Utils/CallPromotionUtils.h"
430b57cec5SDimitry Andric #include <cassert>
440b57cec5SDimitry Andric #include <cstdint>
450b57cec5SDimitry Andric #include <memory>
46*0fca6ea1SDimitry Andric #include <set>
470b57cec5SDimitry Andric #include <string>
48*0fca6ea1SDimitry Andric #include <unordered_map>
490b57cec5SDimitry Andric #include <utility>
500b57cec5SDimitry Andric #include <vector>
510b57cec5SDimitry Andric
520b57cec5SDimitry Andric using namespace llvm;
530b57cec5SDimitry Andric
540b57cec5SDimitry Andric #define DEBUG_TYPE "pgo-icall-prom"
550b57cec5SDimitry Andric
560b57cec5SDimitry Andric STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
570b57cec5SDimitry Andric STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
580b57cec5SDimitry Andric
59*0fca6ea1SDimitry Andric extern cl::opt<unsigned> MaxNumVTableAnnotations;
60*0fca6ea1SDimitry Andric
61*0fca6ea1SDimitry Andric namespace llvm {
62*0fca6ea1SDimitry Andric extern cl::opt<bool> EnableVTableProfileUse;
63*0fca6ea1SDimitry Andric }
64*0fca6ea1SDimitry Andric
650b57cec5SDimitry Andric // Command line option to disable indirect-call promotion with the default as
660b57cec5SDimitry Andric // false. This is for debug purpose.
670b57cec5SDimitry Andric static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
680b57cec5SDimitry Andric cl::desc("Disable indirect call promotion"));
690b57cec5SDimitry Andric
700b57cec5SDimitry Andric // Set the cutoff value for the promotion. If the value is other than 0, we
710b57cec5SDimitry Andric // stop the transformation once the total number of promotions equals the cutoff
720b57cec5SDimitry Andric // value.
730b57cec5SDimitry Andric // For debug use only.
740b57cec5SDimitry Andric static cl::opt<unsigned>
7581ad6265SDimitry Andric ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden,
760b57cec5SDimitry Andric cl::desc("Max number of promotions for this compilation"));
770b57cec5SDimitry Andric
780b57cec5SDimitry Andric // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
790b57cec5SDimitry Andric // For debug use only.
800b57cec5SDimitry Andric static cl::opt<unsigned>
8181ad6265SDimitry Andric ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
820b57cec5SDimitry Andric cl::desc("Skip Callsite up to this number for this compilation"));
830b57cec5SDimitry Andric
840b57cec5SDimitry Andric // Set if the pass is called in LTO optimization. The difference for LTO mode
850b57cec5SDimitry Andric // is the pass won't prefix the source module name to the internal linkage
860b57cec5SDimitry Andric // symbols.
870b57cec5SDimitry Andric static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
880b57cec5SDimitry Andric cl::desc("Run indirect-call promotion in LTO "
890b57cec5SDimitry Andric "mode"));
900b57cec5SDimitry Andric
910b57cec5SDimitry Andric // Set if the pass is called in SamplePGO mode. The difference for SamplePGO
920b57cec5SDimitry Andric // mode is it will add prof metadatato the created direct call.
930b57cec5SDimitry Andric static cl::opt<bool>
940b57cec5SDimitry Andric ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
950b57cec5SDimitry Andric cl::desc("Run indirect-call promotion in SamplePGO mode"));
960b57cec5SDimitry Andric
970b57cec5SDimitry Andric // If the option is set to true, only call instructions will be considered for
980b57cec5SDimitry Andric // transformation -- invoke instructions will be ignored.
990b57cec5SDimitry Andric static cl::opt<bool>
1000b57cec5SDimitry Andric ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
1010b57cec5SDimitry Andric cl::desc("Run indirect-call promotion for call instructions "
1020b57cec5SDimitry Andric "only"));
1030b57cec5SDimitry Andric
1040b57cec5SDimitry Andric // If the option is set to true, only invoke instructions will be considered for
1050b57cec5SDimitry Andric // transformation -- call instructions will be ignored.
1060b57cec5SDimitry Andric static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
1070b57cec5SDimitry Andric cl::Hidden,
1080b57cec5SDimitry Andric cl::desc("Run indirect-call promotion for "
1090b57cec5SDimitry Andric "invoke instruction only"));
1100b57cec5SDimitry Andric
1110b57cec5SDimitry Andric // Dump the function level IR if the transformation happened in this
1120b57cec5SDimitry Andric // function. For debug use only.
1130b57cec5SDimitry Andric static cl::opt<bool>
1140b57cec5SDimitry Andric ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
1150b57cec5SDimitry Andric cl::desc("Dump IR after transformation happens"));
1160b57cec5SDimitry Andric
117*0fca6ea1SDimitry Andric // Indirect call promotion pass will fall back to function-based comparison if
118*0fca6ea1SDimitry Andric // vtable-count / function-count is smaller than this threshold.
119*0fca6ea1SDimitry Andric static cl::opt<float> ICPVTablePercentageThreshold(
120*0fca6ea1SDimitry Andric "icp-vtable-percentage-threshold", cl::init(0.99), cl::Hidden,
121*0fca6ea1SDimitry Andric cl::desc("The percentage threshold of vtable-count / function-count for "
122*0fca6ea1SDimitry Andric "cost-benefit analysis."));
123*0fca6ea1SDimitry Andric
124*0fca6ea1SDimitry Andric // Although comparing vtables can save a vtable load, we may need to compare
125*0fca6ea1SDimitry Andric // vtable pointer with multiple vtable address points due to class inheritance.
126*0fca6ea1SDimitry Andric // Comparing with multiple vtables inserts additional instructions on hot code
127*0fca6ea1SDimitry Andric // path, and doing so for an earlier candidate delays the comparisons for later
128*0fca6ea1SDimitry Andric // candidates. For the last candidate, only the fallback path is affected.
129*0fca6ea1SDimitry Andric // We allow multiple vtable comparison for the last function candidate and use
130*0fca6ea1SDimitry Andric // the option below to cap the number of vtables.
131*0fca6ea1SDimitry Andric static cl::opt<int> ICPMaxNumVTableLastCandidate(
132*0fca6ea1SDimitry Andric "icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
133*0fca6ea1SDimitry Andric cl::desc("The maximum number of vtable for the last candidate."));
134*0fca6ea1SDimitry Andric
1350b57cec5SDimitry Andric namespace {
1360b57cec5SDimitry Andric
137*0fca6ea1SDimitry Andric // The key is a vtable global variable, and the value is a map.
138*0fca6ea1SDimitry Andric // In the inner map, the key represents address point offsets and the value is a
139*0fca6ea1SDimitry Andric // constant for this address point.
140*0fca6ea1SDimitry Andric using VTableAddressPointOffsetValMap =
141*0fca6ea1SDimitry Andric SmallDenseMap<const GlobalVariable *, std::unordered_map<int, Constant *>>;
142*0fca6ea1SDimitry Andric
143*0fca6ea1SDimitry Andric // A struct to collect type information for a virtual call site.
144*0fca6ea1SDimitry Andric struct VirtualCallSiteInfo {
145*0fca6ea1SDimitry Andric // The offset from the address point to virtual function in the vtable.
146*0fca6ea1SDimitry Andric uint64_t FunctionOffset;
147*0fca6ea1SDimitry Andric // The instruction that computes the address point of vtable.
148*0fca6ea1SDimitry Andric Instruction *VPtr;
149*0fca6ea1SDimitry Andric // The compatible type used in LLVM type intrinsics.
150*0fca6ea1SDimitry Andric StringRef CompatibleTypeStr;
151*0fca6ea1SDimitry Andric };
152*0fca6ea1SDimitry Andric
153*0fca6ea1SDimitry Andric // The key is a virtual call, and value is its type information.
154*0fca6ea1SDimitry Andric using VirtualCallSiteTypeInfoMap =
155*0fca6ea1SDimitry Andric SmallDenseMap<const CallBase *, VirtualCallSiteInfo>;
156*0fca6ea1SDimitry Andric
157*0fca6ea1SDimitry Andric // The key is vtable GUID, and value is its value profile count.
158*0fca6ea1SDimitry Andric using VTableGUIDCountsMap = SmallDenseMap<uint64_t, uint64_t, 16>;
159*0fca6ea1SDimitry Andric
160*0fca6ea1SDimitry Andric // Return the address point offset of the given compatible type.
161*0fca6ea1SDimitry Andric //
162*0fca6ea1SDimitry Andric // Type metadata of a vtable specifies the types that can contain a pointer to
163*0fca6ea1SDimitry Andric // this vtable, for example, `Base*` can be a pointer to an derived type
164*0fca6ea1SDimitry Andric // but not vice versa. See also https://llvm.org/docs/TypeMetadata.html
165*0fca6ea1SDimitry Andric static std::optional<uint64_t>
getAddressPointOffset(const GlobalVariable & VTableVar,StringRef CompatibleType)166*0fca6ea1SDimitry Andric getAddressPointOffset(const GlobalVariable &VTableVar,
167*0fca6ea1SDimitry Andric StringRef CompatibleType) {
168*0fca6ea1SDimitry Andric SmallVector<MDNode *> Types;
169*0fca6ea1SDimitry Andric VTableVar.getMetadata(LLVMContext::MD_type, Types);
170*0fca6ea1SDimitry Andric
171*0fca6ea1SDimitry Andric for (MDNode *Type : Types)
172*0fca6ea1SDimitry Andric if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get());
173*0fca6ea1SDimitry Andric TypeId && TypeId->getString() == CompatibleType)
174*0fca6ea1SDimitry Andric return cast<ConstantInt>(
175*0fca6ea1SDimitry Andric cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
176*0fca6ea1SDimitry Andric ->getZExtValue();
177*0fca6ea1SDimitry Andric
178*0fca6ea1SDimitry Andric return std::nullopt;
179*0fca6ea1SDimitry Andric }
180*0fca6ea1SDimitry Andric
181*0fca6ea1SDimitry Andric // Return a constant representing the vtable's address point specified by the
182*0fca6ea1SDimitry Andric // offset.
getVTableAddressPointOffset(GlobalVariable * VTable,uint32_t AddressPointOffset)183*0fca6ea1SDimitry Andric static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
184*0fca6ea1SDimitry Andric uint32_t AddressPointOffset) {
185*0fca6ea1SDimitry Andric Module &M = *VTable->getParent();
186*0fca6ea1SDimitry Andric LLVMContext &Context = M.getContext();
187*0fca6ea1SDimitry Andric assert(AddressPointOffset <
188*0fca6ea1SDimitry Andric M.getDataLayout().getTypeAllocSize(VTable->getValueType()) &&
189*0fca6ea1SDimitry Andric "Out-of-bound access");
190*0fca6ea1SDimitry Andric
191*0fca6ea1SDimitry Andric return ConstantExpr::getInBoundsGetElementPtr(
192*0fca6ea1SDimitry Andric Type::getInt8Ty(Context), VTable,
193*0fca6ea1SDimitry Andric llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset));
194*0fca6ea1SDimitry Andric }
195*0fca6ea1SDimitry Andric
196*0fca6ea1SDimitry Andric // Return the basic block in which Use `U` is used via its `UserInst`.
getUserBasicBlock(Use & U,Instruction * UserInst)197*0fca6ea1SDimitry Andric static BasicBlock *getUserBasicBlock(Use &U, Instruction *UserInst) {
198*0fca6ea1SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(UserInst))
199*0fca6ea1SDimitry Andric return PN->getIncomingBlock(U);
200*0fca6ea1SDimitry Andric
201*0fca6ea1SDimitry Andric return UserInst->getParent();
202*0fca6ea1SDimitry Andric }
203*0fca6ea1SDimitry Andric
204*0fca6ea1SDimitry Andric // `DestBB` is a suitable basic block to sink `Inst` into when `Inst` have users
205*0fca6ea1SDimitry Andric // and all users are in `DestBB`. The caller guarantees that `Inst->getParent()`
206*0fca6ea1SDimitry Andric // is the sole predecessor of `DestBB` and `DestBB` is dominated by
207*0fca6ea1SDimitry Andric // `Inst->getParent()`.
isDestBBSuitableForSink(Instruction * Inst,BasicBlock * DestBB)208*0fca6ea1SDimitry Andric static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
209*0fca6ea1SDimitry Andric // 'BB' is used only by assert.
210*0fca6ea1SDimitry Andric [[maybe_unused]] BasicBlock *BB = Inst->getParent();
211*0fca6ea1SDimitry Andric
212*0fca6ea1SDimitry Andric assert(BB != DestBB && BB->getTerminator()->getNumSuccessors() == 2 &&
213*0fca6ea1SDimitry Andric DestBB->getUniquePredecessor() == BB &&
214*0fca6ea1SDimitry Andric "Guaranteed by ICP transformation");
215*0fca6ea1SDimitry Andric
216*0fca6ea1SDimitry Andric BasicBlock *UserBB = nullptr;
217*0fca6ea1SDimitry Andric for (Use &Use : Inst->uses()) {
218*0fca6ea1SDimitry Andric User *User = Use.getUser();
219*0fca6ea1SDimitry Andric // Do checked cast since IR verifier guarantees that the user of an
220*0fca6ea1SDimitry Andric // instruction must be an instruction. See `Verifier::visitInstruction`.
221*0fca6ea1SDimitry Andric Instruction *UserInst = cast<Instruction>(User);
222*0fca6ea1SDimitry Andric // We can sink debug or pseudo instructions together with Inst.
223*0fca6ea1SDimitry Andric if (UserInst->isDebugOrPseudoInst())
224*0fca6ea1SDimitry Andric continue;
225*0fca6ea1SDimitry Andric UserBB = getUserBasicBlock(Use, UserInst);
226*0fca6ea1SDimitry Andric // Do not sink if Inst is used in a basic block that is not DestBB.
227*0fca6ea1SDimitry Andric // TODO: Sink to the common dominator of all user blocks.
228*0fca6ea1SDimitry Andric if (UserBB != DestBB)
229*0fca6ea1SDimitry Andric return false;
230*0fca6ea1SDimitry Andric }
231*0fca6ea1SDimitry Andric return UserBB != nullptr;
232*0fca6ea1SDimitry Andric }
233*0fca6ea1SDimitry Andric
234*0fca6ea1SDimitry Andric // For the virtual call dispatch sequence, try to sink vtable load instructions
235*0fca6ea1SDimitry Andric // to the cold indirect call fallback.
236*0fca6ea1SDimitry Andric // FIXME: Move the sink eligibility check below to a utility function in
237*0fca6ea1SDimitry Andric // Transforms/Utils/ directory.
tryToSinkInstruction(Instruction * I,BasicBlock * DestBlock)238*0fca6ea1SDimitry Andric static bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
239*0fca6ea1SDimitry Andric if (!isDestBBSuitableForSink(I, DestBlock))
240*0fca6ea1SDimitry Andric return false;
241*0fca6ea1SDimitry Andric
242*0fca6ea1SDimitry Andric // Do not move control-flow-involving, volatile loads, vaarg, alloca
243*0fca6ea1SDimitry Andric // instructions, etc.
244*0fca6ea1SDimitry Andric if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
245*0fca6ea1SDimitry Andric isa<AllocaInst>(I))
246*0fca6ea1SDimitry Andric return false;
247*0fca6ea1SDimitry Andric
248*0fca6ea1SDimitry Andric // Do not sink convergent call instructions.
249*0fca6ea1SDimitry Andric if (const auto *C = dyn_cast<CallBase>(I))
250*0fca6ea1SDimitry Andric if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
251*0fca6ea1SDimitry Andric return false;
252*0fca6ea1SDimitry Andric
253*0fca6ea1SDimitry Andric // Do not move an instruction that may write to memory.
254*0fca6ea1SDimitry Andric if (I->mayWriteToMemory())
255*0fca6ea1SDimitry Andric return false;
256*0fca6ea1SDimitry Andric
257*0fca6ea1SDimitry Andric // We can only sink load instructions if there is nothing between the load and
258*0fca6ea1SDimitry Andric // the end of block that could change the value.
259*0fca6ea1SDimitry Andric if (I->mayReadFromMemory()) {
260*0fca6ea1SDimitry Andric // We already know that SrcBlock is the unique predecessor of DestBlock.
261*0fca6ea1SDimitry Andric for (BasicBlock::iterator Scan = std::next(I->getIterator()),
262*0fca6ea1SDimitry Andric E = I->getParent()->end();
263*0fca6ea1SDimitry Andric Scan != E; ++Scan) {
264*0fca6ea1SDimitry Andric // Note analysis analysis can tell whether two pointers can point to the
265*0fca6ea1SDimitry Andric // same object in memory or not thereby find further opportunities to
266*0fca6ea1SDimitry Andric // sink.
267*0fca6ea1SDimitry Andric if (Scan->mayWriteToMemory())
268*0fca6ea1SDimitry Andric return false;
269*0fca6ea1SDimitry Andric }
270*0fca6ea1SDimitry Andric }
271*0fca6ea1SDimitry Andric
272*0fca6ea1SDimitry Andric BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
273*0fca6ea1SDimitry Andric I->moveBefore(*DestBlock, InsertPos);
274*0fca6ea1SDimitry Andric
275*0fca6ea1SDimitry Andric // TODO: Sink debug intrinsic users of I to 'DestBlock'.
276*0fca6ea1SDimitry Andric // 'InstCombinerImpl::tryToSinkInstructionDbgValues' and
277*0fca6ea1SDimitry Andric // 'InstCombinerImpl::tryToSinkInstructionDbgVariableRecords' already have
278*0fca6ea1SDimitry Andric // the core logic to do this.
279*0fca6ea1SDimitry Andric return true;
280*0fca6ea1SDimitry Andric }
281*0fca6ea1SDimitry Andric
282*0fca6ea1SDimitry Andric // Try to sink instructions after VPtr to the indirect call fallback.
283*0fca6ea1SDimitry Andric // Return the number of sunk IR instructions.
tryToSinkInstructions(BasicBlock * OriginalBB,BasicBlock * IndirectCallBB)284*0fca6ea1SDimitry Andric static int tryToSinkInstructions(BasicBlock *OriginalBB,
285*0fca6ea1SDimitry Andric BasicBlock *IndirectCallBB) {
286*0fca6ea1SDimitry Andric int SinkCount = 0;
287*0fca6ea1SDimitry Andric // Do not sink across a critical edge for simplicity.
288*0fca6ea1SDimitry Andric if (IndirectCallBB->getUniquePredecessor() != OriginalBB)
289*0fca6ea1SDimitry Andric return SinkCount;
290*0fca6ea1SDimitry Andric // Sink all eligible instructions in OriginalBB in reverse order.
291*0fca6ea1SDimitry Andric for (Instruction &I :
292*0fca6ea1SDimitry Andric llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(*OriginalBB))))
293*0fca6ea1SDimitry Andric if (tryToSinkInstruction(&I, IndirectCallBB))
294*0fca6ea1SDimitry Andric SinkCount++;
295*0fca6ea1SDimitry Andric
296*0fca6ea1SDimitry Andric return SinkCount;
297*0fca6ea1SDimitry Andric }
298*0fca6ea1SDimitry Andric
29906c3fb27SDimitry Andric // Promote indirect calls to conditional direct calls, keeping track of
30006c3fb27SDimitry Andric // thresholds.
30106c3fb27SDimitry Andric class IndirectCallPromoter {
3020b57cec5SDimitry Andric private:
3030b57cec5SDimitry Andric Function &F;
304*0fca6ea1SDimitry Andric Module &M;
305*0fca6ea1SDimitry Andric
306*0fca6ea1SDimitry Andric ProfileSummaryInfo *PSI = nullptr;
3070b57cec5SDimitry Andric
3080b57cec5SDimitry Andric // Symtab that maps indirect call profile values to function names and
3090b57cec5SDimitry Andric // defines.
31006c3fb27SDimitry Andric InstrProfSymtab *const Symtab;
3110b57cec5SDimitry Andric
31206c3fb27SDimitry Andric const bool SamplePGO;
3130b57cec5SDimitry Andric
314*0fca6ea1SDimitry Andric // A map from a virtual call to its type information.
315*0fca6ea1SDimitry Andric const VirtualCallSiteTypeInfoMap &VirtualCSInfo;
316*0fca6ea1SDimitry Andric
317*0fca6ea1SDimitry Andric VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal;
318*0fca6ea1SDimitry Andric
3190b57cec5SDimitry Andric OptimizationRemarkEmitter &ORE;
3200b57cec5SDimitry Andric
3210b57cec5SDimitry Andric // A struct that records the direct target and it's call count.
3220b57cec5SDimitry Andric struct PromotionCandidate {
32306c3fb27SDimitry Andric Function *const TargetFunction;
32406c3fb27SDimitry Andric const uint64_t Count;
3250b57cec5SDimitry Andric
326*0fca6ea1SDimitry Andric // The following fields only exists for promotion candidates with vtable
327*0fca6ea1SDimitry Andric // information.
328*0fca6ea1SDimitry Andric //
329*0fca6ea1SDimitry Andric // Due to class inheritance, one virtual call candidate can come from
330*0fca6ea1SDimitry Andric // multiple vtables. `VTableGUIDAndCounts` tracks the vtable GUIDs and
331*0fca6ea1SDimitry Andric // counts for 'TargetFunction'. `AddressPoints` stores the vtable address
332*0fca6ea1SDimitry Andric // points for comparison.
333*0fca6ea1SDimitry Andric VTableGUIDCountsMap VTableGUIDAndCounts;
334*0fca6ea1SDimitry Andric SmallVector<Constant *> AddressPoints;
335*0fca6ea1SDimitry Andric
PromotionCandidate__anon5ae95e860111::IndirectCallPromoter::PromotionCandidate3360b57cec5SDimitry Andric PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
3370b57cec5SDimitry Andric };
3380b57cec5SDimitry Andric
3390b57cec5SDimitry Andric // Check if the indirect-call call site should be promoted. Return the number
3400b57cec5SDimitry Andric // of promotions. Inst is the candidate indirect call, ValueDataRef
3410b57cec5SDimitry Andric // contains the array of value profile data for profiled targets,
3420b57cec5SDimitry Andric // TotalCount is the total profiled count of call executions, and
3430b57cec5SDimitry Andric // NumCandidates is the number of candidate entries in ValueDataRef.
3440b57cec5SDimitry Andric std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
345*0fca6ea1SDimitry Andric const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
3460b57cec5SDimitry Andric uint64_t TotalCount, uint32_t NumCandidates);
3470b57cec5SDimitry Andric
348*0fca6ea1SDimitry Andric // Promote a list of targets for one indirect-call callsite by comparing
349*0fca6ea1SDimitry Andric // indirect callee with functions. Return true if there are IR
350*0fca6ea1SDimitry Andric // transformations and false otherwise.
351*0fca6ea1SDimitry Andric bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr,
352*0fca6ea1SDimitry Andric ArrayRef<PromotionCandidate> Candidates,
353*0fca6ea1SDimitry Andric uint64_t TotalCount,
354*0fca6ea1SDimitry Andric ArrayRef<InstrProfValueData> ICallProfDataRef,
355*0fca6ea1SDimitry Andric uint32_t NumCandidates,
356*0fca6ea1SDimitry Andric VTableGUIDCountsMap &VTableGUIDCounts);
357*0fca6ea1SDimitry Andric
358*0fca6ea1SDimitry Andric // Promote a list of targets for one indirect call by comparing vtables with
359*0fca6ea1SDimitry Andric // functions. Return true if there are IR transformations and false
360*0fca6ea1SDimitry Andric // otherwise.
361*0fca6ea1SDimitry Andric bool tryToPromoteWithVTableCmp(
362*0fca6ea1SDimitry Andric CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
363*0fca6ea1SDimitry Andric uint64_t TotalFuncCount, uint32_t NumCandidates,
364*0fca6ea1SDimitry Andric MutableArrayRef<InstrProfValueData> ICallProfDataRef,
365*0fca6ea1SDimitry Andric VTableGUIDCountsMap &VTableGUIDCounts);
366*0fca6ea1SDimitry Andric
367*0fca6ea1SDimitry Andric // Return true if it's profitable to compare vtables for the callsite.
368*0fca6ea1SDimitry Andric bool isProfitableToCompareVTables(const CallBase &CB,
369*0fca6ea1SDimitry Andric ArrayRef<PromotionCandidate> Candidates,
370*0fca6ea1SDimitry Andric uint64_t TotalCount);
371*0fca6ea1SDimitry Andric
372*0fca6ea1SDimitry Andric // Given an indirect callsite and the list of function candidates, compute
373*0fca6ea1SDimitry Andric // the following vtable information in output parameters and return vtable
374*0fca6ea1SDimitry Andric // pointer if type profiles exist.
375*0fca6ea1SDimitry Andric // - Populate `VTableGUIDCounts` with <vtable-guid, count> using !prof
376*0fca6ea1SDimitry Andric // metadata attached on the vtable pointer.
377*0fca6ea1SDimitry Andric // - For each function candidate, finds out the vtables from which it gets
378*0fca6ea1SDimitry Andric // called and stores the <vtable-guid, count> in promotion candidate.
379*0fca6ea1SDimitry Andric Instruction *computeVTableInfos(const CallBase *CB,
380*0fca6ea1SDimitry Andric VTableGUIDCountsMap &VTableGUIDCounts,
381*0fca6ea1SDimitry Andric std::vector<PromotionCandidate> &Candidates);
382*0fca6ea1SDimitry Andric
383*0fca6ea1SDimitry Andric Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
384*0fca6ea1SDimitry Andric uint64_t AddressPointOffset);
385*0fca6ea1SDimitry Andric
386*0fca6ea1SDimitry Andric void updateFuncValueProfiles(CallBase &CB, ArrayRef<InstrProfValueData> VDs,
387*0fca6ea1SDimitry Andric uint64_t Sum, uint32_t MaxMDCount);
388*0fca6ea1SDimitry Andric
389*0fca6ea1SDimitry Andric void updateVPtrValueProfiles(Instruction *VPtr,
390*0fca6ea1SDimitry Andric VTableGUIDCountsMap &VTableGUIDCounts);
3910b57cec5SDimitry Andric
3920b57cec5SDimitry Andric public:
IndirectCallPromoter(Function & Func,Module & M,ProfileSummaryInfo * PSI,InstrProfSymtab * Symtab,bool SamplePGO,const VirtualCallSiteTypeInfoMap & VirtualCSInfo,VTableAddressPointOffsetValMap & VTableAddressPointOffsetVal,OptimizationRemarkEmitter & ORE)393*0fca6ea1SDimitry Andric IndirectCallPromoter(
394*0fca6ea1SDimitry Andric Function &Func, Module &M, ProfileSummaryInfo *PSI,
395*0fca6ea1SDimitry Andric InstrProfSymtab *Symtab, bool SamplePGO,
396*0fca6ea1SDimitry Andric const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
397*0fca6ea1SDimitry Andric VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
39806c3fb27SDimitry Andric OptimizationRemarkEmitter &ORE)
399*0fca6ea1SDimitry Andric : F(Func), M(M), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO),
400*0fca6ea1SDimitry Andric VirtualCSInfo(VirtualCSInfo),
401*0fca6ea1SDimitry Andric VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
40206c3fb27SDimitry Andric IndirectCallPromoter(const IndirectCallPromoter &) = delete;
40306c3fb27SDimitry Andric IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
4040b57cec5SDimitry Andric
4050b57cec5SDimitry Andric bool processFunction(ProfileSummaryInfo *PSI);
4060b57cec5SDimitry Andric };
4070b57cec5SDimitry Andric
4080b57cec5SDimitry Andric } // end anonymous namespace
4090b57cec5SDimitry Andric
4100b57cec5SDimitry Andric // Indirect-call promotion heuristic. The direct targets are sorted based on
4110b57cec5SDimitry Andric // the count. Stop at the first target that is not promoted.
41206c3fb27SDimitry Andric std::vector<IndirectCallPromoter::PromotionCandidate>
getPromotionCandidatesForCallSite(const CallBase & CB,ArrayRef<InstrProfValueData> ValueDataRef,uint64_t TotalCount,uint32_t NumCandidates)41306c3fb27SDimitry Andric IndirectCallPromoter::getPromotionCandidatesForCallSite(
414*0fca6ea1SDimitry Andric const CallBase &CB, ArrayRef<InstrProfValueData> ValueDataRef,
4150b57cec5SDimitry Andric uint64_t TotalCount, uint32_t NumCandidates) {
4160b57cec5SDimitry Andric std::vector<PromotionCandidate> Ret;
4170b57cec5SDimitry Andric
4185ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << CB
4190b57cec5SDimitry Andric << " Num_targets: " << ValueDataRef.size()
4200b57cec5SDimitry Andric << " Num_candidates: " << NumCandidates << "\n");
4210b57cec5SDimitry Andric NumOfPGOICallsites++;
4220b57cec5SDimitry Andric if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
4230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Skip: User options.\n");
4240b57cec5SDimitry Andric return Ret;
4250b57cec5SDimitry Andric }
4260b57cec5SDimitry Andric
4270b57cec5SDimitry Andric for (uint32_t I = 0; I < NumCandidates; I++) {
4280b57cec5SDimitry Andric uint64_t Count = ValueDataRef[I].Count;
4290b57cec5SDimitry Andric assert(Count <= TotalCount);
430fe6060f1SDimitry Andric (void)TotalCount;
4310b57cec5SDimitry Andric uint64_t Target = ValueDataRef[I].Value;
4320b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
4330b57cec5SDimitry Andric << " Target_func: " << Target << "\n");
4340b57cec5SDimitry Andric
4355ffd83dbSDimitry Andric if (ICPInvokeOnly && isa<CallInst>(CB)) {
4360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
4370b57cec5SDimitry Andric ORE.emit([&]() {
4385ffd83dbSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
4390b57cec5SDimitry Andric << " Not promote: User options";
4400b57cec5SDimitry Andric });
4410b57cec5SDimitry Andric break;
4420b57cec5SDimitry Andric }
4435ffd83dbSDimitry Andric if (ICPCallOnly && isa<InvokeInst>(CB)) {
4440b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
4450b57cec5SDimitry Andric ORE.emit([&]() {
4465ffd83dbSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", &CB)
4470b57cec5SDimitry Andric << " Not promote: User options";
4480b57cec5SDimitry Andric });
4490b57cec5SDimitry Andric break;
4500b57cec5SDimitry Andric }
4510b57cec5SDimitry Andric if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
4520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
4530b57cec5SDimitry Andric ORE.emit([&]() {
4545ffd83dbSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", &CB)
4550b57cec5SDimitry Andric << " Not promote: Cutoff reached";
4560b57cec5SDimitry Andric });
4570b57cec5SDimitry Andric break;
4580b57cec5SDimitry Andric }
4590b57cec5SDimitry Andric
460e8d8bef9SDimitry Andric // Don't promote if the symbol is not defined in the module. This avoids
461e8d8bef9SDimitry Andric // creating a reference to a symbol that doesn't exist in the module
462e8d8bef9SDimitry Andric // This can happen when we compile with a sample profile collected from
463e8d8bef9SDimitry Andric // one binary but used for another, which may have profiled targets that
464e8d8bef9SDimitry Andric // aren't used in the new binary. We might have a declaration initially in
465e8d8bef9SDimitry Andric // the case where the symbol is globally dead in the binary and removed by
466e8d8bef9SDimitry Andric // ThinLTO.
4670b57cec5SDimitry Andric Function *TargetFunction = Symtab->getFunction(Target);
468e8d8bef9SDimitry Andric if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
4690b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
4700b57cec5SDimitry Andric ORE.emit([&]() {
4715ffd83dbSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
4720b57cec5SDimitry Andric << "Cannot promote indirect call: target with md5sum "
4730b57cec5SDimitry Andric << ore::NV("target md5sum", Target) << " not found";
4740b57cec5SDimitry Andric });
4750b57cec5SDimitry Andric break;
4760b57cec5SDimitry Andric }
4770b57cec5SDimitry Andric
4780b57cec5SDimitry Andric const char *Reason = nullptr;
4795ffd83dbSDimitry Andric if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
4800b57cec5SDimitry Andric using namespace ore;
4810b57cec5SDimitry Andric
4820b57cec5SDimitry Andric ORE.emit([&]() {
4835ffd83dbSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
4840b57cec5SDimitry Andric << "Cannot promote indirect call to "
4850b57cec5SDimitry Andric << NV("TargetFunction", TargetFunction) << " with count of "
4860b57cec5SDimitry Andric << NV("Count", Count) << ": " << Reason;
4870b57cec5SDimitry Andric });
4880b57cec5SDimitry Andric break;
4890b57cec5SDimitry Andric }
4900b57cec5SDimitry Andric
4910b57cec5SDimitry Andric Ret.push_back(PromotionCandidate(TargetFunction, Count));
4920b57cec5SDimitry Andric TotalCount -= Count;
4930b57cec5SDimitry Andric }
4940b57cec5SDimitry Andric return Ret;
4950b57cec5SDimitry Andric }
4960b57cec5SDimitry Andric
getOrCreateVTableAddressPointVar(GlobalVariable * GV,uint64_t AddressPointOffset)497*0fca6ea1SDimitry Andric Constant *IndirectCallPromoter::getOrCreateVTableAddressPointVar(
498*0fca6ea1SDimitry Andric GlobalVariable *GV, uint64_t AddressPointOffset) {
499*0fca6ea1SDimitry Andric auto [Iter, Inserted] =
500*0fca6ea1SDimitry Andric VTableAddressPointOffsetVal[GV].try_emplace(AddressPointOffset, nullptr);
501*0fca6ea1SDimitry Andric if (Inserted)
502*0fca6ea1SDimitry Andric Iter->second = getVTableAddressPointOffset(GV, AddressPointOffset);
503*0fca6ea1SDimitry Andric return Iter->second;
504*0fca6ea1SDimitry Andric }
505*0fca6ea1SDimitry Andric
computeVTableInfos(const CallBase * CB,VTableGUIDCountsMap & GUIDCountsMap,std::vector<PromotionCandidate> & Candidates)506*0fca6ea1SDimitry Andric Instruction *IndirectCallPromoter::computeVTableInfos(
507*0fca6ea1SDimitry Andric const CallBase *CB, VTableGUIDCountsMap &GUIDCountsMap,
508*0fca6ea1SDimitry Andric std::vector<PromotionCandidate> &Candidates) {
509*0fca6ea1SDimitry Andric if (!EnableVTableProfileUse)
510*0fca6ea1SDimitry Andric return nullptr;
511*0fca6ea1SDimitry Andric
512*0fca6ea1SDimitry Andric // Take the following code sequence as an example, here is how the code works
513*0fca6ea1SDimitry Andric // @vtable1 = {[n x ptr] [... ptr @func1]}
514*0fca6ea1SDimitry Andric // @vtable2 = {[m x ptr] [... ptr @func2]}
515*0fca6ea1SDimitry Andric //
516*0fca6ea1SDimitry Andric // %vptr = load ptr, ptr %d, !prof !0
517*0fca6ea1SDimitry Andric // %0 = tail call i1 @llvm.type.test(ptr %vptr, metadata !"vtable1")
518*0fca6ea1SDimitry Andric // tail call void @llvm.assume(i1 %0)
519*0fca6ea1SDimitry Andric // %vfn = getelementptr inbounds ptr, ptr %vptr, i64 1
520*0fca6ea1SDimitry Andric // %1 = load ptr, ptr %vfn
521*0fca6ea1SDimitry Andric // call void %1(ptr %d), !prof !1
522*0fca6ea1SDimitry Andric //
523*0fca6ea1SDimitry Andric // !0 = !{!"VP", i32 2, i64 100, i64 123, i64 50, i64 456, i64 50}
524*0fca6ea1SDimitry Andric // !1 = !{!"VP", i32 0, i64 100, i64 789, i64 50, i64 579, i64 50}
525*0fca6ea1SDimitry Andric //
526*0fca6ea1SDimitry Andric // Step 1. Find out the %vptr instruction for indirect call and use its !prof
527*0fca6ea1SDimitry Andric // to populate `GUIDCountsMap`.
528*0fca6ea1SDimitry Andric // Step 2. For each vtable-guid, look up its definition from symtab. LTO can
529*0fca6ea1SDimitry Andric // make vtable definitions visible across modules.
530*0fca6ea1SDimitry Andric // Step 3. Compute the byte offset of the virtual call, by adding vtable
531*0fca6ea1SDimitry Andric // address point offset and function's offset relative to vtable address
532*0fca6ea1SDimitry Andric // point. For each function candidate, this step tells us the vtable from
533*0fca6ea1SDimitry Andric // which it comes from, and the vtable address point to compare %vptr with.
534*0fca6ea1SDimitry Andric
535*0fca6ea1SDimitry Andric // Only virtual calls have virtual call site info.
536*0fca6ea1SDimitry Andric auto Iter = VirtualCSInfo.find(CB);
537*0fca6ea1SDimitry Andric if (Iter == VirtualCSInfo.end())
538*0fca6ea1SDimitry Andric return nullptr;
539*0fca6ea1SDimitry Andric
540*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\nComputing vtable infos for callsite #"
541*0fca6ea1SDimitry Andric << NumOfPGOICallsites << "\n");
542*0fca6ea1SDimitry Andric
543*0fca6ea1SDimitry Andric const auto &VirtualCallInfo = Iter->second;
544*0fca6ea1SDimitry Andric Instruction *VPtr = VirtualCallInfo.VPtr;
545*0fca6ea1SDimitry Andric
546*0fca6ea1SDimitry Andric SmallDenseMap<Function *, int, 4> CalleeIndexMap;
547*0fca6ea1SDimitry Andric for (size_t I = 0; I < Candidates.size(); I++)
548*0fca6ea1SDimitry Andric CalleeIndexMap[Candidates[I].TargetFunction] = I;
549*0fca6ea1SDimitry Andric
550*0fca6ea1SDimitry Andric uint64_t TotalVTableCount = 0;
551*0fca6ea1SDimitry Andric auto VTableValueDataArray =
552*0fca6ea1SDimitry Andric getValueProfDataFromInst(*VirtualCallInfo.VPtr, IPVK_VTableTarget,
553*0fca6ea1SDimitry Andric MaxNumVTableAnnotations, TotalVTableCount);
554*0fca6ea1SDimitry Andric if (VTableValueDataArray.empty())
555*0fca6ea1SDimitry Andric return VPtr;
556*0fca6ea1SDimitry Andric
557*0fca6ea1SDimitry Andric // Compute the functions and counts from by each vtable.
558*0fca6ea1SDimitry Andric for (const auto &V : VTableValueDataArray) {
559*0fca6ea1SDimitry Andric uint64_t VTableVal = V.Value;
560*0fca6ea1SDimitry Andric GUIDCountsMap[VTableVal] = V.Count;
561*0fca6ea1SDimitry Andric GlobalVariable *VTableVar = Symtab->getGlobalVariable(VTableVal);
562*0fca6ea1SDimitry Andric if (!VTableVar) {
563*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Cannot find vtable definition for " << VTableVal
564*0fca6ea1SDimitry Andric << "; maybe the vtable isn't imported\n");
565*0fca6ea1SDimitry Andric continue;
566*0fca6ea1SDimitry Andric }
567*0fca6ea1SDimitry Andric
568*0fca6ea1SDimitry Andric std::optional<uint64_t> MaybeAddressPointOffset =
569*0fca6ea1SDimitry Andric getAddressPointOffset(*VTableVar, VirtualCallInfo.CompatibleTypeStr);
570*0fca6ea1SDimitry Andric if (!MaybeAddressPointOffset)
571*0fca6ea1SDimitry Andric continue;
572*0fca6ea1SDimitry Andric
573*0fca6ea1SDimitry Andric const uint64_t AddressPointOffset = *MaybeAddressPointOffset;
574*0fca6ea1SDimitry Andric
575*0fca6ea1SDimitry Andric Function *Callee = nullptr;
576*0fca6ea1SDimitry Andric std::tie(Callee, std::ignore) = getFunctionAtVTableOffset(
577*0fca6ea1SDimitry Andric VTableVar, AddressPointOffset + VirtualCallInfo.FunctionOffset, M);
578*0fca6ea1SDimitry Andric if (!Callee)
579*0fca6ea1SDimitry Andric continue;
580*0fca6ea1SDimitry Andric auto CalleeIndexIter = CalleeIndexMap.find(Callee);
581*0fca6ea1SDimitry Andric if (CalleeIndexIter == CalleeIndexMap.end())
582*0fca6ea1SDimitry Andric continue;
583*0fca6ea1SDimitry Andric
584*0fca6ea1SDimitry Andric auto &Candidate = Candidates[CalleeIndexIter->second];
585*0fca6ea1SDimitry Andric // There shouldn't be duplicate GUIDs in one !prof metadata (except
586*0fca6ea1SDimitry Andric // duplicated zeros), so assign counters directly won't cause overwrite or
587*0fca6ea1SDimitry Andric // counter loss.
588*0fca6ea1SDimitry Andric Candidate.VTableGUIDAndCounts[VTableVal] = V.Count;
589*0fca6ea1SDimitry Andric Candidate.AddressPoints.push_back(
590*0fca6ea1SDimitry Andric getOrCreateVTableAddressPointVar(VTableVar, AddressPointOffset));
591*0fca6ea1SDimitry Andric }
592*0fca6ea1SDimitry Andric
593*0fca6ea1SDimitry Andric return VPtr;
594*0fca6ea1SDimitry Andric }
595*0fca6ea1SDimitry Andric
596*0fca6ea1SDimitry Andric // Creates 'branch_weights' prof metadata using TrueWeight and FalseWeight.
597*0fca6ea1SDimitry Andric // Scales uint64_t counters down to uint32_t if necessary to prevent overflow.
createBranchWeights(LLVMContext & Context,uint64_t TrueWeight,uint64_t FalseWeight)598*0fca6ea1SDimitry Andric static MDNode *createBranchWeights(LLVMContext &Context, uint64_t TrueWeight,
599*0fca6ea1SDimitry Andric uint64_t FalseWeight) {
600*0fca6ea1SDimitry Andric MDBuilder MDB(Context);
601*0fca6ea1SDimitry Andric uint64_t Scale = calculateCountScale(std::max(TrueWeight, FalseWeight));
602*0fca6ea1SDimitry Andric return MDB.createBranchWeights(scaleBranchCount(TrueWeight, Scale),
603*0fca6ea1SDimitry Andric scaleBranchCount(FalseWeight, Scale));
604*0fca6ea1SDimitry Andric }
605*0fca6ea1SDimitry Andric
promoteIndirectCall(CallBase & CB,Function * DirectCallee,uint64_t Count,uint64_t TotalCount,bool AttachProfToDirectCall,OptimizationRemarkEmitter * ORE)6065ffd83dbSDimitry Andric CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
6070b57cec5SDimitry Andric uint64_t Count, uint64_t TotalCount,
6080b57cec5SDimitry Andric bool AttachProfToDirectCall,
6090b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE) {
610*0fca6ea1SDimitry Andric CallBase &NewInst = promoteCallWithIfThenElse(
611*0fca6ea1SDimitry Andric CB, DirectCallee,
612*0fca6ea1SDimitry Andric createBranchWeights(CB.getContext(), Count, TotalCount - Count));
6130b57cec5SDimitry Andric
614*0fca6ea1SDimitry Andric if (AttachProfToDirectCall)
615*0fca6ea1SDimitry Andric setBranchWeights(NewInst, {static_cast<uint32_t>(Count)},
616*0fca6ea1SDimitry Andric /*IsExpected=*/false);
6170b57cec5SDimitry Andric
6180b57cec5SDimitry Andric using namespace ore;
6190b57cec5SDimitry Andric
6200b57cec5SDimitry Andric if (ORE)
6210b57cec5SDimitry Andric ORE->emit([&]() {
6225ffd83dbSDimitry Andric return OptimizationRemark(DEBUG_TYPE, "Promoted", &CB)
6230b57cec5SDimitry Andric << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
6240b57cec5SDimitry Andric << " with count " << NV("Count", Count) << " out of "
6250b57cec5SDimitry Andric << NV("TotalCount", TotalCount);
6260b57cec5SDimitry Andric });
6270b57cec5SDimitry Andric return NewInst;
6280b57cec5SDimitry Andric }
6290b57cec5SDimitry Andric
6300b57cec5SDimitry Andric // Promote indirect-call to conditional direct-call for one callsite.
tryToPromoteWithFuncCmp(CallBase & CB,Instruction * VPtr,ArrayRef<PromotionCandidate> Candidates,uint64_t TotalCount,ArrayRef<InstrProfValueData> ICallProfDataRef,uint32_t NumCandidates,VTableGUIDCountsMap & VTableGUIDCounts)631*0fca6ea1SDimitry Andric bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
632*0fca6ea1SDimitry Andric CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
633*0fca6ea1SDimitry Andric uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
634*0fca6ea1SDimitry Andric uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
6350b57cec5SDimitry Andric uint32_t NumPromoted = 0;
6360b57cec5SDimitry Andric
637bdd1243dSDimitry Andric for (const auto &C : Candidates) {
638*0fca6ea1SDimitry Andric uint64_t FuncCount = C.Count;
639*0fca6ea1SDimitry Andric pgo::promoteIndirectCall(CB, C.TargetFunction, FuncCount, TotalCount,
640*0fca6ea1SDimitry Andric SamplePGO, &ORE);
641*0fca6ea1SDimitry Andric assert(TotalCount >= FuncCount);
642*0fca6ea1SDimitry Andric TotalCount -= FuncCount;
6430b57cec5SDimitry Andric NumOfPGOICallPromotion++;
6440b57cec5SDimitry Andric NumPromoted++;
645*0fca6ea1SDimitry Andric
646*0fca6ea1SDimitry Andric if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
647*0fca6ea1SDimitry Andric continue;
648*0fca6ea1SDimitry Andric
649*0fca6ea1SDimitry Andric // After a virtual call candidate gets promoted, update the vtable's counts
650*0fca6ea1SDimitry Andric // proportionally. Each vtable-guid in `C.VTableGUIDAndCounts` represents
651*0fca6ea1SDimitry Andric // a vtable from which the virtual call is loaded. Compute the sum and use
652*0fca6ea1SDimitry Andric // 128-bit APInt to improve accuracy.
653*0fca6ea1SDimitry Andric uint64_t SumVTableCount = 0;
654*0fca6ea1SDimitry Andric for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts)
655*0fca6ea1SDimitry Andric SumVTableCount += VTableCount;
656*0fca6ea1SDimitry Andric
657*0fca6ea1SDimitry Andric for (const auto &[GUID, VTableCount] : C.VTableGUIDAndCounts) {
658*0fca6ea1SDimitry Andric APInt APFuncCount((unsigned)128, FuncCount, false /*signed*/);
659*0fca6ea1SDimitry Andric APFuncCount *= VTableCount;
660*0fca6ea1SDimitry Andric VTableGUIDCounts[GUID] -= APFuncCount.udiv(SumVTableCount).getZExtValue();
6610b57cec5SDimitry Andric }
662*0fca6ea1SDimitry Andric }
663*0fca6ea1SDimitry Andric if (NumPromoted == 0)
664*0fca6ea1SDimitry Andric return false;
665*0fca6ea1SDimitry Andric
666*0fca6ea1SDimitry Andric assert(NumPromoted <= ICallProfDataRef.size() &&
667*0fca6ea1SDimitry Andric "Number of promoted functions should not be greater than the number "
668*0fca6ea1SDimitry Andric "of values in profile metadata");
669*0fca6ea1SDimitry Andric
670*0fca6ea1SDimitry Andric // Update value profiles on the indirect call.
671*0fca6ea1SDimitry Andric updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
672*0fca6ea1SDimitry Andric NumCandidates);
673*0fca6ea1SDimitry Andric updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
674*0fca6ea1SDimitry Andric return true;
675*0fca6ea1SDimitry Andric }
676*0fca6ea1SDimitry Andric
updateFuncValueProfiles(CallBase & CB,ArrayRef<InstrProfValueData> CallVDs,uint64_t TotalCount,uint32_t MaxMDCount)677*0fca6ea1SDimitry Andric void IndirectCallPromoter::updateFuncValueProfiles(
678*0fca6ea1SDimitry Andric CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount,
679*0fca6ea1SDimitry Andric uint32_t MaxMDCount) {
680*0fca6ea1SDimitry Andric // First clear the existing !prof.
681*0fca6ea1SDimitry Andric CB.setMetadata(LLVMContext::MD_prof, nullptr);
682*0fca6ea1SDimitry Andric // Annotate the remaining value profiles if counter is not zero.
683*0fca6ea1SDimitry Andric if (TotalCount != 0)
684*0fca6ea1SDimitry Andric annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget,
685*0fca6ea1SDimitry Andric MaxMDCount);
686*0fca6ea1SDimitry Andric }
687*0fca6ea1SDimitry Andric
updateVPtrValueProfiles(Instruction * VPtr,VTableGUIDCountsMap & VTableGUIDCounts)688*0fca6ea1SDimitry Andric void IndirectCallPromoter::updateVPtrValueProfiles(
689*0fca6ea1SDimitry Andric Instruction *VPtr, VTableGUIDCountsMap &VTableGUIDCounts) {
690*0fca6ea1SDimitry Andric if (!EnableVTableProfileUse || VPtr == nullptr ||
691*0fca6ea1SDimitry Andric !VPtr->getMetadata(LLVMContext::MD_prof))
692*0fca6ea1SDimitry Andric return;
693*0fca6ea1SDimitry Andric VPtr->setMetadata(LLVMContext::MD_prof, nullptr);
694*0fca6ea1SDimitry Andric std::vector<InstrProfValueData> VTableValueProfiles;
695*0fca6ea1SDimitry Andric uint64_t TotalVTableCount = 0;
696*0fca6ea1SDimitry Andric for (auto [GUID, Count] : VTableGUIDCounts) {
697*0fca6ea1SDimitry Andric if (Count == 0)
698*0fca6ea1SDimitry Andric continue;
699*0fca6ea1SDimitry Andric
700*0fca6ea1SDimitry Andric VTableValueProfiles.push_back({GUID, Count});
701*0fca6ea1SDimitry Andric TotalVTableCount += Count;
702*0fca6ea1SDimitry Andric }
703*0fca6ea1SDimitry Andric llvm::sort(VTableValueProfiles,
704*0fca6ea1SDimitry Andric [](const InstrProfValueData &LHS, const InstrProfValueData &RHS) {
705*0fca6ea1SDimitry Andric return LHS.Count > RHS.Count;
706*0fca6ea1SDimitry Andric });
707*0fca6ea1SDimitry Andric
708*0fca6ea1SDimitry Andric annotateValueSite(M, *VPtr, VTableValueProfiles, TotalVTableCount,
709*0fca6ea1SDimitry Andric IPVK_VTableTarget, VTableValueProfiles.size());
710*0fca6ea1SDimitry Andric }
711*0fca6ea1SDimitry Andric
tryToPromoteWithVTableCmp(CallBase & CB,Instruction * VPtr,ArrayRef<PromotionCandidate> Candidates,uint64_t TotalFuncCount,uint32_t NumCandidates,MutableArrayRef<InstrProfValueData> ICallProfDataRef,VTableGUIDCountsMap & VTableGUIDCounts)712*0fca6ea1SDimitry Andric bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
713*0fca6ea1SDimitry Andric CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
714*0fca6ea1SDimitry Andric uint64_t TotalFuncCount, uint32_t NumCandidates,
715*0fca6ea1SDimitry Andric MutableArrayRef<InstrProfValueData> ICallProfDataRef,
716*0fca6ea1SDimitry Andric VTableGUIDCountsMap &VTableGUIDCounts) {
717*0fca6ea1SDimitry Andric SmallVector<uint64_t, 4> PromotedFuncCount;
718*0fca6ea1SDimitry Andric
719*0fca6ea1SDimitry Andric for (const auto &Candidate : Candidates) {
720*0fca6ea1SDimitry Andric for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
721*0fca6ea1SDimitry Andric VTableGUIDCounts[GUID] -= Count;
722*0fca6ea1SDimitry Andric
723*0fca6ea1SDimitry Andric // 'OriginalBB' is the basic block of indirect call. After each candidate
724*0fca6ea1SDimitry Andric // is promoted, a new basic block is created for the indirect fallback basic
725*0fca6ea1SDimitry Andric // block and indirect call `CB` is moved into this new BB.
726*0fca6ea1SDimitry Andric BasicBlock *OriginalBB = CB.getParent();
727*0fca6ea1SDimitry Andric promoteCallWithVTableCmp(
728*0fca6ea1SDimitry Andric CB, VPtr, Candidate.TargetFunction, Candidate.AddressPoints,
729*0fca6ea1SDimitry Andric createBranchWeights(CB.getContext(), Candidate.Count,
730*0fca6ea1SDimitry Andric TotalFuncCount - Candidate.Count));
731*0fca6ea1SDimitry Andric
732*0fca6ea1SDimitry Andric int SinkCount = tryToSinkInstructions(OriginalBB, CB.getParent());
733*0fca6ea1SDimitry Andric
734*0fca6ea1SDimitry Andric ORE.emit([&]() {
735*0fca6ea1SDimitry Andric OptimizationRemark Remark(DEBUG_TYPE, "Promoted", &CB);
736*0fca6ea1SDimitry Andric
737*0fca6ea1SDimitry Andric const auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
738*0fca6ea1SDimitry Andric Remark << "Promote indirect call to "
739*0fca6ea1SDimitry Andric << ore::NV("DirectCallee", Candidate.TargetFunction)
740*0fca6ea1SDimitry Andric << " with count " << ore::NV("Count", Candidate.Count)
741*0fca6ea1SDimitry Andric << " out of " << ore::NV("TotalCount", TotalFuncCount) << ", sink "
742*0fca6ea1SDimitry Andric << ore::NV("SinkCount", SinkCount)
743*0fca6ea1SDimitry Andric << " instruction(s) and compare "
744*0fca6ea1SDimitry Andric << ore::NV("VTable", VTableGUIDAndCounts.size())
745*0fca6ea1SDimitry Andric << " vtable(s): {";
746*0fca6ea1SDimitry Andric
747*0fca6ea1SDimitry Andric // Sort GUIDs so remark message is deterministic.
748*0fca6ea1SDimitry Andric std::set<uint64_t> GUIDSet;
749*0fca6ea1SDimitry Andric for (auto [GUID, Count] : VTableGUIDAndCounts)
750*0fca6ea1SDimitry Andric GUIDSet.insert(GUID);
751*0fca6ea1SDimitry Andric for (auto Iter = GUIDSet.begin(); Iter != GUIDSet.end(); Iter++) {
752*0fca6ea1SDimitry Andric if (Iter != GUIDSet.begin())
753*0fca6ea1SDimitry Andric Remark << ", ";
754*0fca6ea1SDimitry Andric Remark << ore::NV("VTable", Symtab->getGlobalVariable(*Iter));
755*0fca6ea1SDimitry Andric }
756*0fca6ea1SDimitry Andric
757*0fca6ea1SDimitry Andric Remark << "}";
758*0fca6ea1SDimitry Andric
759*0fca6ea1SDimitry Andric return Remark;
760*0fca6ea1SDimitry Andric });
761*0fca6ea1SDimitry Andric
762*0fca6ea1SDimitry Andric PromotedFuncCount.push_back(Candidate.Count);
763*0fca6ea1SDimitry Andric
764*0fca6ea1SDimitry Andric assert(TotalFuncCount >= Candidate.Count &&
765*0fca6ea1SDimitry Andric "Within one prof metadata, total count is the sum of counts from "
766*0fca6ea1SDimitry Andric "individual <target, count> pairs");
767*0fca6ea1SDimitry Andric // Use std::min since 'TotalFuncCount' is the saturated sum of individual
768*0fca6ea1SDimitry Andric // counts, see
769*0fca6ea1SDimitry Andric // https://github.com/llvm/llvm-project/blob/abedb3b8356d5d56f1c575c4f7682fba2cb19787/llvm/lib/ProfileData/InstrProf.cpp#L1281-L1288
770*0fca6ea1SDimitry Andric TotalFuncCount -= std::min(TotalFuncCount, Candidate.Count);
771*0fca6ea1SDimitry Andric NumOfPGOICallPromotion++;
772*0fca6ea1SDimitry Andric }
773*0fca6ea1SDimitry Andric
774*0fca6ea1SDimitry Andric if (PromotedFuncCount.empty())
775*0fca6ea1SDimitry Andric return false;
776*0fca6ea1SDimitry Andric
777*0fca6ea1SDimitry Andric // Update value profiles for 'CB' and 'VPtr', assuming that each 'CB' has a
778*0fca6ea1SDimitry Andric // a distinct 'VPtr'.
779*0fca6ea1SDimitry Andric // FIXME: When Clang `-fstrict-vtable-pointers` is enabled, a vtable might be
780*0fca6ea1SDimitry Andric // used to load multiple virtual functions. The vtable profiles needs to be
781*0fca6ea1SDimitry Andric // updated properly in that case (e.g, for each indirect call annotate both
782*0fca6ea1SDimitry Andric // type profiles and function profiles in one !prof).
783*0fca6ea1SDimitry Andric for (size_t I = 0; I < PromotedFuncCount.size(); I++)
784*0fca6ea1SDimitry Andric ICallProfDataRef[I].Count -=
785*0fca6ea1SDimitry Andric std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count);
786*0fca6ea1SDimitry Andric // Sort value profiles by count in descending order.
787*0fca6ea1SDimitry Andric llvm::stable_sort(ICallProfDataRef, [](const InstrProfValueData &LHS,
788*0fca6ea1SDimitry Andric const InstrProfValueData &RHS) {
789*0fca6ea1SDimitry Andric return LHS.Count > RHS.Count;
790*0fca6ea1SDimitry Andric });
791*0fca6ea1SDimitry Andric // Drop the <target-value, count> pair if count is zero.
792*0fca6ea1SDimitry Andric ArrayRef<InstrProfValueData> VDs(
793*0fca6ea1SDimitry Andric ICallProfDataRef.begin(),
794*0fca6ea1SDimitry Andric llvm::upper_bound(ICallProfDataRef, 0U,
795*0fca6ea1SDimitry Andric [](uint64_t Count, const InstrProfValueData &ProfData) {
796*0fca6ea1SDimitry Andric return ProfData.Count <= Count;
797*0fca6ea1SDimitry Andric }));
798*0fca6ea1SDimitry Andric updateFuncValueProfiles(CB, VDs, TotalFuncCount, NumCandidates);
799*0fca6ea1SDimitry Andric updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
800*0fca6ea1SDimitry Andric return true;
8010b57cec5SDimitry Andric }
8020b57cec5SDimitry Andric
8030b57cec5SDimitry Andric // Traverse all the indirect-call callsite and get the value profile
8040b57cec5SDimitry Andric // annotation to perform indirect-call promotion.
processFunction(ProfileSummaryInfo * PSI)80506c3fb27SDimitry Andric bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
8060b57cec5SDimitry Andric bool Changed = false;
8070b57cec5SDimitry Andric ICallPromotionAnalysis ICallAnalysis;
8085ffd83dbSDimitry Andric for (auto *CB : findIndirectCalls(F)) {
809*0fca6ea1SDimitry Andric uint32_t NumCandidates;
8100b57cec5SDimitry Andric uint64_t TotalCount;
8110b57cec5SDimitry Andric auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
812*0fca6ea1SDimitry Andric CB, TotalCount, NumCandidates);
8130b57cec5SDimitry Andric if (!NumCandidates ||
8140b57cec5SDimitry Andric (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
8150b57cec5SDimitry Andric continue;
816*0fca6ea1SDimitry Andric
8170b57cec5SDimitry Andric auto PromotionCandidates = getPromotionCandidatesForCallSite(
8185ffd83dbSDimitry Andric *CB, ICallProfDataRef, TotalCount, NumCandidates);
8190b57cec5SDimitry Andric
820*0fca6ea1SDimitry Andric VTableGUIDCountsMap VTableGUIDCounts;
821*0fca6ea1SDimitry Andric Instruction *VPtr =
822*0fca6ea1SDimitry Andric computeVTableInfos(CB, VTableGUIDCounts, PromotionCandidates);
823*0fca6ea1SDimitry Andric
824*0fca6ea1SDimitry Andric if (isProfitableToCompareVTables(*CB, PromotionCandidates, TotalCount))
825*0fca6ea1SDimitry Andric Changed |= tryToPromoteWithVTableCmp(*CB, VPtr, PromotionCandidates,
826*0fca6ea1SDimitry Andric TotalCount, NumCandidates,
827*0fca6ea1SDimitry Andric ICallProfDataRef, VTableGUIDCounts);
828*0fca6ea1SDimitry Andric else
829*0fca6ea1SDimitry Andric Changed |= tryToPromoteWithFuncCmp(*CB, VPtr, PromotionCandidates,
830*0fca6ea1SDimitry Andric TotalCount, ICallProfDataRef,
831*0fca6ea1SDimitry Andric NumCandidates, VTableGUIDCounts);
8320b57cec5SDimitry Andric }
8330b57cec5SDimitry Andric return Changed;
8340b57cec5SDimitry Andric }
8350b57cec5SDimitry Andric
836*0fca6ea1SDimitry Andric // TODO: Return false if the function addressing and vtable load instructions
837*0fca6ea1SDimitry Andric // cannot sink to indirect fallback.
isProfitableToCompareVTables(const CallBase & CB,ArrayRef<PromotionCandidate> Candidates,uint64_t TotalCount)838*0fca6ea1SDimitry Andric bool IndirectCallPromoter::isProfitableToCompareVTables(
839*0fca6ea1SDimitry Andric const CallBase &CB, ArrayRef<PromotionCandidate> Candidates,
840*0fca6ea1SDimitry Andric uint64_t TotalCount) {
841*0fca6ea1SDimitry Andric if (!EnableVTableProfileUse || Candidates.empty())
842*0fca6ea1SDimitry Andric return false;
843*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\nEvaluating vtable profitability for callsite #"
844*0fca6ea1SDimitry Andric << NumOfPGOICallsites << CB << "\n");
845*0fca6ea1SDimitry Andric uint64_t RemainingVTableCount = TotalCount;
846*0fca6ea1SDimitry Andric const size_t CandidateSize = Candidates.size();
847*0fca6ea1SDimitry Andric for (size_t I = 0; I < CandidateSize; I++) {
848*0fca6ea1SDimitry Andric auto &Candidate = Candidates[I];
849*0fca6ea1SDimitry Andric auto &VTableGUIDAndCounts = Candidate.VTableGUIDAndCounts;
850*0fca6ea1SDimitry Andric
851*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Candidate " << I << " FunctionCount: "
852*0fca6ea1SDimitry Andric << Candidate.Count << ", VTableCounts:");
853*0fca6ea1SDimitry Andric // Add [[maybe_unused]] since <GUID, Count> are only used by LLVM_DEBUG.
854*0fca6ea1SDimitry Andric for ([[maybe_unused]] auto &[GUID, Count] : VTableGUIDAndCounts)
855*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " {" << Symtab->getGlobalVariable(GUID)->getName()
856*0fca6ea1SDimitry Andric << ", " << Count << "}");
857*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
858*0fca6ea1SDimitry Andric
859*0fca6ea1SDimitry Andric uint64_t CandidateVTableCount = 0;
860*0fca6ea1SDimitry Andric for (auto &[GUID, Count] : VTableGUIDAndCounts)
861*0fca6ea1SDimitry Andric CandidateVTableCount += Count;
862*0fca6ea1SDimitry Andric
863*0fca6ea1SDimitry Andric if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
864*0fca6ea1SDimitry Andric LLVM_DEBUG(
865*0fca6ea1SDimitry Andric dbgs() << " function count " << Candidate.Count
866*0fca6ea1SDimitry Andric << " and its vtable sum count " << CandidateVTableCount
867*0fca6ea1SDimitry Andric << " have discrepancies. Bail out vtable comparison.\n");
868*0fca6ea1SDimitry Andric return false;
869*0fca6ea1SDimitry Andric }
870*0fca6ea1SDimitry Andric
871*0fca6ea1SDimitry Andric RemainingVTableCount -= Candidate.Count;
872*0fca6ea1SDimitry Andric
873*0fca6ea1SDimitry Andric // 'MaxNumVTable' limits the number of vtables to make vtable comparison
874*0fca6ea1SDimitry Andric // profitable. Comparing multiple vtables for one function candidate will
875*0fca6ea1SDimitry Andric // insert additional instructions on the hot path, and allowing more than
876*0fca6ea1SDimitry Andric // one vtable for non last candidates may or may not elongate the dependency
877*0fca6ea1SDimitry Andric // chain for the subsequent candidates. Set its value to 1 for non-last
878*0fca6ea1SDimitry Andric // candidate and allow option to override it for the last candidate.
879*0fca6ea1SDimitry Andric int MaxNumVTable = 1;
880*0fca6ea1SDimitry Andric if (I == CandidateSize - 1)
881*0fca6ea1SDimitry Andric MaxNumVTable = ICPMaxNumVTableLastCandidate;
882*0fca6ea1SDimitry Andric
883*0fca6ea1SDimitry Andric if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
884*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got "
885*0fca6ea1SDimitry Andric << Candidate.AddressPoints.size()
886*0fca6ea1SDimitry Andric << " vtables. Bail out for vtable comparison.\n");
887*0fca6ea1SDimitry Andric return false;
888*0fca6ea1SDimitry Andric }
889*0fca6ea1SDimitry Andric }
890*0fca6ea1SDimitry Andric
891*0fca6ea1SDimitry Andric // If the indirect fallback is not cold, don't compare vtables.
892*0fca6ea1SDimitry Andric if (PSI && PSI->hasProfileSummary() &&
893*0fca6ea1SDimitry Andric !PSI->isColdCount(RemainingVTableCount)) {
894*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << " Indirect fallback basic block is not cold. Bail "
895*0fca6ea1SDimitry Andric "out for vtable comparison.\n");
896*0fca6ea1SDimitry Andric return false;
897*0fca6ea1SDimitry Andric }
898*0fca6ea1SDimitry Andric
899*0fca6ea1SDimitry Andric return true;
900*0fca6ea1SDimitry Andric }
901*0fca6ea1SDimitry Andric
902*0fca6ea1SDimitry Andric // For virtual calls in the module, collect per-callsite information which will
903*0fca6ea1SDimitry Andric // be used to associate an ICP candidate with a vtable and a specific function
904*0fca6ea1SDimitry Andric // in the vtable. With type intrinsics (llvm.type.test), we can find virtual
905*0fca6ea1SDimitry Andric // calls in a compile-time efficient manner (by iterating its users) and more
906*0fca6ea1SDimitry Andric // importantly use the compatible type later to figure out the function byte
907*0fca6ea1SDimitry Andric // offset relative to the start of vtables.
908*0fca6ea1SDimitry Andric static void
computeVirtualCallSiteTypeInfoMap(Module & M,ModuleAnalysisManager & MAM,VirtualCallSiteTypeInfoMap & VirtualCSInfo)909*0fca6ea1SDimitry Andric computeVirtualCallSiteTypeInfoMap(Module &M, ModuleAnalysisManager &MAM,
910*0fca6ea1SDimitry Andric VirtualCallSiteTypeInfoMap &VirtualCSInfo) {
911*0fca6ea1SDimitry Andric // Right now only llvm.type.test is used to find out virtual call sites.
912*0fca6ea1SDimitry Andric // With ThinLTO and whole-program-devirtualization, llvm.type.test and
913*0fca6ea1SDimitry Andric // llvm.public.type.test are emitted, and llvm.public.type.test is either
914*0fca6ea1SDimitry Andric // refined to llvm.type.test or dropped before indirect-call-promotion pass.
915*0fca6ea1SDimitry Andric //
916*0fca6ea1SDimitry Andric // FIXME: For fullLTO with VFE, `llvm.type.checked.load intrinsic` is emitted.
917*0fca6ea1SDimitry Andric // Find out virtual calls by looking at users of llvm.type.checked.load in
918*0fca6ea1SDimitry Andric // that case.
919*0fca6ea1SDimitry Andric Function *TypeTestFunc =
920*0fca6ea1SDimitry Andric M.getFunction(Intrinsic::getName(Intrinsic::type_test));
921*0fca6ea1SDimitry Andric if (!TypeTestFunc || TypeTestFunc->use_empty())
922*0fca6ea1SDimitry Andric return;
923*0fca6ea1SDimitry Andric
924*0fca6ea1SDimitry Andric auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
925*0fca6ea1SDimitry Andric auto LookupDomTree = [&FAM](Function &F) -> DominatorTree & {
926*0fca6ea1SDimitry Andric return FAM.getResult<DominatorTreeAnalysis>(F);
927*0fca6ea1SDimitry Andric };
928*0fca6ea1SDimitry Andric // Iterate all type.test calls to find all indirect calls.
929*0fca6ea1SDimitry Andric for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
930*0fca6ea1SDimitry Andric auto *CI = dyn_cast<CallInst>(U.getUser());
931*0fca6ea1SDimitry Andric if (!CI)
932*0fca6ea1SDimitry Andric continue;
933*0fca6ea1SDimitry Andric auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
934*0fca6ea1SDimitry Andric if (!TypeMDVal)
935*0fca6ea1SDimitry Andric continue;
936*0fca6ea1SDimitry Andric auto *CompatibleTypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
937*0fca6ea1SDimitry Andric if (!CompatibleTypeId)
938*0fca6ea1SDimitry Andric continue;
939*0fca6ea1SDimitry Andric
940*0fca6ea1SDimitry Andric // Find out all devirtualizable call sites given a llvm.type.test
941*0fca6ea1SDimitry Andric // intrinsic call.
942*0fca6ea1SDimitry Andric SmallVector<DevirtCallSite, 1> DevirtCalls;
943*0fca6ea1SDimitry Andric SmallVector<CallInst *, 1> Assumes;
944*0fca6ea1SDimitry Andric auto &DT = LookupDomTree(*CI->getFunction());
945*0fca6ea1SDimitry Andric findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
946*0fca6ea1SDimitry Andric
947*0fca6ea1SDimitry Andric for (auto &DevirtCall : DevirtCalls) {
948*0fca6ea1SDimitry Andric CallBase &CB = DevirtCall.CB;
949*0fca6ea1SDimitry Andric // Given an indirect call, try find the instruction which loads a
950*0fca6ea1SDimitry Andric // pointer to virtual table.
951*0fca6ea1SDimitry Andric Instruction *VTablePtr =
952*0fca6ea1SDimitry Andric PGOIndirectCallVisitor::tryGetVTableInstruction(&CB);
953*0fca6ea1SDimitry Andric if (!VTablePtr)
954*0fca6ea1SDimitry Andric continue;
955*0fca6ea1SDimitry Andric VirtualCSInfo[&CB] = {DevirtCall.Offset, VTablePtr,
956*0fca6ea1SDimitry Andric CompatibleTypeId->getString()};
957*0fca6ea1SDimitry Andric }
958*0fca6ea1SDimitry Andric }
959*0fca6ea1SDimitry Andric }
960*0fca6ea1SDimitry Andric
9610b57cec5SDimitry Andric // A wrapper function that does the actual work.
promoteIndirectCalls(Module & M,ProfileSummaryInfo * PSI,bool InLTO,bool SamplePGO,ModuleAnalysisManager & MAM)96206c3fb27SDimitry Andric static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
96306c3fb27SDimitry Andric bool SamplePGO, ModuleAnalysisManager &MAM) {
9640b57cec5SDimitry Andric if (DisableICP)
9650b57cec5SDimitry Andric return false;
9660b57cec5SDimitry Andric InstrProfSymtab Symtab;
9670b57cec5SDimitry Andric if (Error E = Symtab.create(M, InLTO)) {
9680b57cec5SDimitry Andric std::string SymtabFailure = toString(std::move(E));
969fe6060f1SDimitry Andric M.getContext().emitError("Failed to create symtab: " + SymtabFailure);
9700b57cec5SDimitry Andric return false;
9710b57cec5SDimitry Andric }
9720b57cec5SDimitry Andric bool Changed = false;
973*0fca6ea1SDimitry Andric VirtualCallSiteTypeInfoMap VirtualCSInfo;
974*0fca6ea1SDimitry Andric
975*0fca6ea1SDimitry Andric if (EnableVTableProfileUse)
976*0fca6ea1SDimitry Andric computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
977*0fca6ea1SDimitry Andric
978*0fca6ea1SDimitry Andric // VTableAddressPointOffsetVal stores the vtable address points. The vtable
979*0fca6ea1SDimitry Andric // address point of a given <vtable, address point offset> is static (doesn't
980*0fca6ea1SDimitry Andric // change after being computed once).
981*0fca6ea1SDimitry Andric // IndirectCallPromoter::getOrCreateVTableAddressPointVar creates the map
982*0fca6ea1SDimitry Andric // entry the first time a <vtable, offset> pair is seen, as
983*0fca6ea1SDimitry Andric // promoteIndirectCalls processes an IR module and calls IndirectCallPromoter
984*0fca6ea1SDimitry Andric // repeatedly on each function.
985*0fca6ea1SDimitry Andric VTableAddressPointOffsetValMap VTableAddressPointOffsetVal;
986*0fca6ea1SDimitry Andric
9870b57cec5SDimitry Andric for (auto &F : M) {
9880b57cec5SDimitry Andric if (F.isDeclaration() || F.hasOptNone())
9890b57cec5SDimitry Andric continue;
9900b57cec5SDimitry Andric
9910b57cec5SDimitry Andric auto &FAM =
99206c3fb27SDimitry Andric MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
99306c3fb27SDimitry Andric auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
9940b57cec5SDimitry Andric
995*0fca6ea1SDimitry Andric IndirectCallPromoter CallPromoter(F, M, PSI, &Symtab, SamplePGO,
996*0fca6ea1SDimitry Andric VirtualCSInfo,
997*0fca6ea1SDimitry Andric VTableAddressPointOffsetVal, ORE);
99806c3fb27SDimitry Andric bool FuncChanged = CallPromoter.processFunction(PSI);
9990b57cec5SDimitry Andric if (ICPDUMPAFTER && FuncChanged) {
10000b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
10010b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
10020b57cec5SDimitry Andric }
10030b57cec5SDimitry Andric Changed |= FuncChanged;
10040b57cec5SDimitry Andric if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
10050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
10060b57cec5SDimitry Andric break;
10070b57cec5SDimitry Andric }
10080b57cec5SDimitry Andric }
10090b57cec5SDimitry Andric return Changed;
10100b57cec5SDimitry Andric }
10110b57cec5SDimitry Andric
run(Module & M,ModuleAnalysisManager & MAM)10120b57cec5SDimitry Andric PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,
101306c3fb27SDimitry Andric ModuleAnalysisManager &MAM) {
101406c3fb27SDimitry Andric ProfileSummaryInfo *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
10150b57cec5SDimitry Andric
10160b57cec5SDimitry Andric if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
101706c3fb27SDimitry Andric SamplePGO | ICPSamplePGOMode, MAM))
10180b57cec5SDimitry Andric return PreservedAnalyses::all();
10190b57cec5SDimitry Andric
10200b57cec5SDimitry Andric return PreservedAnalyses::none();
10210b57cec5SDimitry Andric }
1022