xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This pass lowers instrprof_* intrinsics emitted by an instrumentor.
10  // It also builds the data structures and initialization code needed for
11  // updating execution counts and emitting the profile at runtime.
12  //
13  //===----------------------------------------------------------------------===//
14  
15  #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
16  #include "llvm/ADT/ArrayRef.h"
17  #include "llvm/ADT/STLExtras.h"
18  #include "llvm/ADT/SmallVector.h"
19  #include "llvm/ADT/StringRef.h"
20  #include "llvm/ADT/Twine.h"
21  #include "llvm/Analysis/BlockFrequencyInfo.h"
22  #include "llvm/Analysis/BranchProbabilityInfo.h"
23  #include "llvm/Analysis/LoopInfo.h"
24  #include "llvm/Analysis/TargetLibraryInfo.h"
25  #include "llvm/IR/Attributes.h"
26  #include "llvm/IR/BasicBlock.h"
27  #include "llvm/IR/CFG.h"
28  #include "llvm/IR/Constant.h"
29  #include "llvm/IR/Constants.h"
30  #include "llvm/IR/DIBuilder.h"
31  #include "llvm/IR/DerivedTypes.h"
32  #include "llvm/IR/DiagnosticInfo.h"
33  #include "llvm/IR/Dominators.h"
34  #include "llvm/IR/Function.h"
35  #include "llvm/IR/GlobalValue.h"
36  #include "llvm/IR/GlobalVariable.h"
37  #include "llvm/IR/IRBuilder.h"
38  #include "llvm/IR/Instruction.h"
39  #include "llvm/IR/Instructions.h"
40  #include "llvm/IR/IntrinsicInst.h"
41  #include "llvm/IR/MDBuilder.h"
42  #include "llvm/IR/Module.h"
43  #include "llvm/IR/Type.h"
44  #include "llvm/InitializePasses.h"
45  #include "llvm/Pass.h"
46  #include "llvm/ProfileData/InstrProf.h"
47  #include "llvm/ProfileData/InstrProfCorrelator.h"
48  #include "llvm/Support/Casting.h"
49  #include "llvm/Support/CommandLine.h"
50  #include "llvm/Support/Error.h"
51  #include "llvm/Support/ErrorHandling.h"
52  #include "llvm/TargetParser/Triple.h"
53  #include "llvm/Transforms/Instrumentation.h"
54  #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
55  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
56  #include "llvm/Transforms/Utils/ModuleUtils.h"
57  #include "llvm/Transforms/Utils/SSAUpdater.h"
58  #include <algorithm>
59  #include <cassert>
60  #include <cstdint>
61  #include <string>
62  
63  using namespace llvm;
64  
65  #define DEBUG_TYPE "instrprof"
66  
67  namespace llvm {
68  // Command line option to enable vtable value profiling. Defined in
69  // ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
70  extern cl::opt<bool> EnableVTableValueProfiling;
71  // TODO: Remove -debug-info-correlate in next LLVM release, in favor of
72  // -profile-correlate=debug-info.
73  cl::opt<bool> DebugInfoCorrelate(
74      "debug-info-correlate",
75      cl::desc("Use debug info to correlate profiles. (Deprecated, use "
76               "-profile-correlate=debug-info)"),
77      cl::init(false));
78  
79  cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate(
80      "profile-correlate",
81      cl::desc("Use debug info or binary file to correlate profiles."),
82      cl::init(InstrProfCorrelator::NONE),
83      cl::values(clEnumValN(InstrProfCorrelator::NONE, "",
84                            "No profile correlation"),
85                 clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",
86                            "Use debug info to correlate"),
87                 clEnumValN(InstrProfCorrelator::BINARY, "binary",
88                            "Use binary to correlate")));
89  } // namespace llvm
90  
91  namespace {
92  
93  cl::opt<bool> DoHashBasedCounterSplit(
94      "hash-based-counter-split",
95      cl::desc("Rename counter variable of a comdat function based on cfg hash"),
96      cl::init(true));
97  
98  cl::opt<bool>
99      RuntimeCounterRelocation("runtime-counter-relocation",
100                               cl::desc("Enable relocating counters at runtime."),
101                               cl::init(false));
102  
103  cl::opt<bool> ValueProfileStaticAlloc(
104      "vp-static-alloc",
105      cl::desc("Do static counter allocation for value profiler"),
106      cl::init(true));
107  
108  cl::opt<double> NumCountersPerValueSite(
109      "vp-counters-per-site",
110      cl::desc("The average number of profile counters allocated "
111               "per value profiling site."),
112      // This is set to a very small value because in real programs, only
113      // a very small percentage of value sites have non-zero targets, e.g, 1/30.
114      // For those sites with non-zero profile, the average number of targets
115      // is usually smaller than 2.
116      cl::init(1.0));
117  
118  cl::opt<bool> AtomicCounterUpdateAll(
119      "instrprof-atomic-counter-update-all",
120      cl::desc("Make all profile counter updates atomic (for testing only)"),
121      cl::init(false));
122  
123  cl::opt<bool> AtomicCounterUpdatePromoted(
124      "atomic-counter-update-promoted",
125      cl::desc("Do counter update using atomic fetch add "
126               " for promoted counters only"),
127      cl::init(false));
128  
129  cl::opt<bool> AtomicFirstCounter(
130      "atomic-first-counter",
131      cl::desc("Use atomic fetch add for first counter in a function (usually "
132               "the entry counter)"),
133      cl::init(false));
134  
135  // If the option is not specified, the default behavior about whether
136  // counter promotion is done depends on how instrumentaiton lowering
137  // pipeline is setup, i.e., the default value of true of this option
138  // does not mean the promotion will be done by default. Explicitly
139  // setting this option can override the default behavior.
140  cl::opt<bool> DoCounterPromotion("do-counter-promotion",
141                                   cl::desc("Do counter register promotion"),
142                                   cl::init(false));
143  cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
144      "max-counter-promotions-per-loop", cl::init(20),
145      cl::desc("Max number counter promotions per loop to avoid"
146               " increasing register pressure too much"));
147  
148  // A debug option
149  cl::opt<int>
150      MaxNumOfPromotions("max-counter-promotions", cl::init(-1),
151                         cl::desc("Max number of allowed counter promotions"));
152  
153  cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
154      "speculative-counter-promotion-max-exiting", cl::init(3),
155      cl::desc("The max number of exiting blocks of a loop to allow "
156               " speculative counter promotion"));
157  
158  cl::opt<bool> SpeculativeCounterPromotionToLoop(
159      "speculative-counter-promotion-to-loop",
160      cl::desc("When the option is false, if the target block is in a loop, "
161               "the promotion will be disallowed unless the promoted counter "
162               " update can be further/iteratively promoted into an acyclic "
163               " region."));
164  
165  cl::opt<bool> IterativeCounterPromotion(
166      "iterative-counter-promotion", cl::init(true),
167      cl::desc("Allow counter promotion across the whole loop nest."));
168  
169  cl::opt<bool> SkipRetExitBlock(
170      "skip-ret-exit-block", cl::init(true),
171      cl::desc("Suppress counter promotion if exit blocks contain ret."));
172  
173  static cl::opt<bool> SampledInstr("sampled-instrumentation", cl::ZeroOrMore,
174                                    cl::init(false),
175                                    cl::desc("Do PGO instrumentation sampling"));
176  
177  static cl::opt<unsigned> SampledInstrPeriod(
178      "sampled-instr-period",
179      cl::desc("Set the profile instrumentation sample period. For each sample "
180               "period, a fixed number of consecutive samples will be recorded. "
181               "The number is controlled by 'sampled-instr-burst-duration' flag. "
182               "The default sample period of 65535 is optimized for generating "
183               "efficient code that leverages unsigned integer wrapping in "
184               "overflow."),
185      cl::init(65535));
186  
187  static cl::opt<unsigned> SampledInstrBurstDuration(
188      "sampled-instr-burst-duration",
189      cl::desc("Set the profile instrumentation burst duration, which can range "
190               "from 0 to one less than the value of 'sampled-instr-period'. "
191               "This number of samples will be recorded for each "
192               "'sampled-instr-period' count update. Setting to 1 enables "
193               "simple sampling, in which case it is recommended to set "
194               "'sampled-instr-period' to a prime number."),
195      cl::init(200));
196  
197  using LoadStorePair = std::pair<Instruction *, Instruction *>;
198  
getIntModuleFlagOrZero(const Module & M,StringRef Flag)199  static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
200    auto *MD = dyn_cast_or_null<ConstantAsMetadata>(M.getModuleFlag(Flag));
201    if (!MD)
202      return 0;
203  
204    // If the flag is a ConstantAsMetadata, it should be an integer representable
205    // in 64-bits.
206    return cast<ConstantInt>(MD->getValue())->getZExtValue();
207  }
208  
enablesValueProfiling(const Module & M)209  static bool enablesValueProfiling(const Module &M) {
210    return isIRPGOFlagSet(&M) ||
211           getIntModuleFlagOrZero(M, "EnableValueProfiling") != 0;
212  }
213  
214  // Conservatively returns true if value profiling is enabled.
profDataReferencedByCode(const Module & M)215  static bool profDataReferencedByCode(const Module &M) {
216    return enablesValueProfiling(M);
217  }
218  
219  class InstrLowerer final {
220  public:
InstrLowerer(Module & M,const InstrProfOptions & Options,std::function<const TargetLibraryInfo & (Function & F)> GetTLI,bool IsCS)221    InstrLowerer(Module &M, const InstrProfOptions &Options,
222                 std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
223                 bool IsCS)
224        : M(M), Options(Options), TT(Triple(M.getTargetTriple())), IsCS(IsCS),
225          GetTLI(GetTLI), DataReferencedByCode(profDataReferencedByCode(M)) {}
226  
227    bool lower();
228  
229  private:
230    Module &M;
231    const InstrProfOptions Options;
232    const Triple TT;
233    // Is this lowering for the context-sensitive instrumentation.
234    const bool IsCS;
235  
236    std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
237  
238    const bool DataReferencedByCode;
239  
240    struct PerFunctionProfileData {
241      uint32_t NumValueSites[IPVK_Last + 1] = {};
242      GlobalVariable *RegionCounters = nullptr;
243      GlobalVariable *DataVar = nullptr;
244      GlobalVariable *RegionBitmaps = nullptr;
245      uint32_t NumBitmapBytes = 0;
246  
247      PerFunctionProfileData() = default;
248    };
249    DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
250    // Key is virtual table variable, value is 'VTableProfData' in the form of
251    // GlobalVariable.
252    DenseMap<GlobalVariable *, GlobalVariable *> VTableDataMap;
253    /// If runtime relocation is enabled, this maps functions to the load
254    /// instruction that produces the profile relocation bias.
255    DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
256    std::vector<GlobalValue *> CompilerUsedVars;
257    std::vector<GlobalValue *> UsedVars;
258    std::vector<GlobalVariable *> ReferencedNames;
259    // The list of virtual table variables of which the VTableProfData is
260    // collected.
261    std::vector<GlobalVariable *> ReferencedVTables;
262    GlobalVariable *NamesVar = nullptr;
263    size_t NamesSize = 0;
264  
265    /// The instance of [[alwaysinline]] rmw_or(ptr, i8).
266    /// This is name-insensitive.
267    Function *RMWOrFunc = nullptr;
268  
269    // vector of counter load/store pairs to be register promoted.
270    std::vector<LoadStorePair> PromotionCandidates;
271  
272    int64_t TotalCountersPromoted = 0;
273  
274    /// Lower instrumentation intrinsics in the function. Returns true if there
275    /// any lowering.
276    bool lowerIntrinsics(Function *F);
277  
278    /// Register-promote counter loads and stores in loops.
279    void promoteCounterLoadStores(Function *F);
280  
281    /// Returns true if relocating counters at runtime is enabled.
282    bool isRuntimeCounterRelocationEnabled() const;
283  
284    /// Returns true if profile counter update register promotion is enabled.
285    bool isCounterPromotionEnabled() const;
286  
287    /// Return true if profile sampling is enabled.
288    bool isSamplingEnabled() const;
289  
290    /// Count the number of instrumented value sites for the function.
291    void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
292  
293    /// Replace instrprof.value.profile with a call to runtime library.
294    void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
295  
296    /// Replace instrprof.cover with a store instruction to the coverage byte.
297    void lowerCover(InstrProfCoverInst *Inc);
298  
299    /// Replace instrprof.timestamp with a call to
300    /// INSTR_PROF_PROFILE_SET_TIMESTAMP.
301    void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction);
302  
303    /// Replace instrprof.increment with an increment of the appropriate value.
304    void lowerIncrement(InstrProfIncrementInst *Inc);
305  
306    /// Force emitting of name vars for unused functions.
307    void lowerCoverageData(GlobalVariable *CoverageNamesVar);
308  
309    /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction
310    /// using the index represented by the a temp value into a bitmap.
311    void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins);
312  
313    /// Get the Bias value for data to access mmap-ed area.
314    /// Create it if it hasn't been seen.
315    GlobalVariable *getOrCreateBiasVar(StringRef VarName);
316  
317    /// Compute the address of the counter value that this profiling instruction
318    /// acts on.
319    Value *getCounterAddress(InstrProfCntrInstBase *I);
320  
321    /// Lower the incremental instructions under profile sampling predicates.
322    void doSampling(Instruction *I);
323  
324    /// Get the region counters for an increment, creating them if necessary.
325    ///
326    /// If the counter array doesn't yet exist, the profile data variables
327    /// referring to them will also be created.
328    GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc);
329  
330    /// Create the region counters.
331    GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc,
332                                         StringRef Name,
333                                         GlobalValue::LinkageTypes Linkage);
334  
335    /// Create [[alwaysinline]] rmw_or(ptr, i8).
336    /// This doesn't update `RMWOrFunc`.
337    Function *createRMWOrFunc();
338  
339    /// Get the call to `rmw_or`.
340    /// Create the instance if it is unknown.
341    CallInst *getRMWOrCall(Value *Addr, Value *Val);
342  
343    /// Compute the address of the test vector bitmap that this profiling
344    /// instruction acts on.
345    Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I);
346  
347    /// Get the region bitmaps for an increment, creating them if necessary.
348    ///
349    /// If the bitmap array doesn't yet exist, the profile data variables
350    /// referring to them will also be created.
351    GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc);
352  
353    /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with
354    /// an MC/DC Decision region. The number of bytes required is indicated by
355    /// the intrinsic used (type InstrProfMCDCBitmapInstBase).  This is called
356    /// as part of setupProfileSection() and is conceptually very similar to
357    /// what is done for profile data counters in createRegionCounters().
358    GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
359                                        StringRef Name,
360                                        GlobalValue::LinkageTypes Linkage);
361  
362    /// Set Comdat property of GV, if required.
363    void maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef VarName);
364  
365    /// Setup the sections into which counters and bitmaps are allocated.
366    GlobalVariable *setupProfileSection(InstrProfInstBase *Inc,
367                                        InstrProfSectKind IPSK);
368  
369    /// Create INSTR_PROF_DATA variable for counters and bitmaps.
370    void createDataVariable(InstrProfCntrInstBase *Inc);
371  
372    /// Get the counters for virtual table values, creating them if necessary.
373    void getOrCreateVTableProfData(GlobalVariable *GV);
374  
375    /// Emit the section with compressed function names.
376    void emitNameData();
377  
378    /// Emit the section with compressed vtable names.
379    void emitVTableNames();
380  
381    /// Emit value nodes section for value profiling.
382    void emitVNodes();
383  
384    /// Emit runtime registration functions for each profile data variable.
385    void emitRegistration();
386  
387    /// Emit the necessary plumbing to pull in the runtime initialization.
388    /// Returns true if a change was made.
389    bool emitRuntimeHook();
390  
391    /// Add uses of our data variables and runtime hook.
392    void emitUses();
393  
394    /// Create a static initializer for our data, on platforms that need it,
395    /// and for any profile output file that was specified.
396    void emitInitialization();
397  };
398  
399  ///
400  /// A helper class to promote one counter RMW operation in the loop
401  /// into register update.
402  ///
403  /// RWM update for the counter will be sinked out of the loop after
404  /// the transformation.
405  ///
406  class PGOCounterPromoterHelper : public LoadAndStorePromoter {
407  public:
PGOCounterPromoterHelper(Instruction * L,Instruction * S,SSAUpdater & SSA,Value * Init,BasicBlock * PH,ArrayRef<BasicBlock * > ExitBlocks,ArrayRef<Instruction * > InsertPts,DenseMap<Loop *,SmallVector<LoadStorePair,8>> & LoopToCands,LoopInfo & LI)408    PGOCounterPromoterHelper(
409        Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
410        BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
411        ArrayRef<Instruction *> InsertPts,
412        DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
413        LoopInfo &LI)
414        : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
415          InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
416      assert(isa<LoadInst>(L));
417      assert(isa<StoreInst>(S));
418      SSA.AddAvailableValue(PH, Init);
419    }
420  
doExtraRewritesBeforeFinalDeletion()421    void doExtraRewritesBeforeFinalDeletion() override {
422      for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
423        BasicBlock *ExitBlock = ExitBlocks[i];
424        Instruction *InsertPos = InsertPts[i];
425        // Get LiveIn value into the ExitBlock. If there are multiple
426        // predecessors, the value is defined by a PHI node in this
427        // block.
428        Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
429        Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
430        Type *Ty = LiveInValue->getType();
431        IRBuilder<> Builder(InsertPos);
432        if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Addr)) {
433          // If isRuntimeCounterRelocationEnabled() is true then the address of
434          // the store instruction is computed with two instructions in
435          // InstrProfiling::getCounterAddress(). We need to copy those
436          // instructions to this block to compute Addr correctly.
437          // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias>
438          // %Addr = inttoptr i64 %BiasAdd to i64*
439          auto *OrigBiasInst = dyn_cast<BinaryOperator>(AddrInst->getOperand(0));
440          assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);
441          Value *BiasInst = Builder.Insert(OrigBiasInst->clone());
442          Addr = Builder.CreateIntToPtr(BiasInst,
443                                        PointerType::getUnqual(Ty->getContext()));
444        }
445        if (AtomicCounterUpdatePromoted)
446          // automic update currently can only be promoted across the current
447          // loop, not the whole loop nest.
448          Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
449                                  MaybeAlign(),
450                                  AtomicOrdering::SequentiallyConsistent);
451        else {
452          LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
453          auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
454          auto *NewStore = Builder.CreateStore(NewVal, Addr);
455  
456          // Now update the parent loop's candidate list:
457          if (IterativeCounterPromotion) {
458            auto *TargetLoop = LI.getLoopFor(ExitBlock);
459            if (TargetLoop)
460              LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
461          }
462        }
463      }
464    }
465  
466  private:
467    Instruction *Store;
468    ArrayRef<BasicBlock *> ExitBlocks;
469    ArrayRef<Instruction *> InsertPts;
470    DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
471    LoopInfo &LI;
472  };
473  
474  /// A helper class to do register promotion for all profile counter
475  /// updates in a loop.
476  ///
477  class PGOCounterPromoter {
478  public:
PGOCounterPromoter(DenseMap<Loop *,SmallVector<LoadStorePair,8>> & LoopToCands,Loop & CurLoop,LoopInfo & LI,BlockFrequencyInfo * BFI)479    PGOCounterPromoter(
480        DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
481        Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
482        : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) {
483  
484      // Skip collection of ExitBlocks and InsertPts for loops that will not be
485      // able to have counters promoted.
486      SmallVector<BasicBlock *, 8> LoopExitBlocks;
487      SmallPtrSet<BasicBlock *, 8> BlockSet;
488  
489      L.getExitBlocks(LoopExitBlocks);
490      if (!isPromotionPossible(&L, LoopExitBlocks))
491        return;
492  
493      for (BasicBlock *ExitBlock : LoopExitBlocks) {
494        if (BlockSet.insert(ExitBlock).second &&
495            llvm::none_of(predecessors(ExitBlock), [&](const BasicBlock *Pred) {
496              return llvm::isPresplitCoroSuspendExitEdge(*Pred, *ExitBlock);
497            })) {
498          ExitBlocks.push_back(ExitBlock);
499          InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
500        }
501      }
502    }
503  
run(int64_t * NumPromoted)504    bool run(int64_t *NumPromoted) {
505      // Skip 'infinite' loops:
506      if (ExitBlocks.size() == 0)
507        return false;
508  
509      // Skip if any of the ExitBlocks contains a ret instruction.
510      // This is to prevent dumping of incomplete profile -- if the
511      // the loop is a long running loop and dump is called in the middle
512      // of the loop, the result profile is incomplete.
513      // FIXME: add other heuristics to detect long running loops.
514      if (SkipRetExitBlock) {
515        for (auto *BB : ExitBlocks)
516          if (isa<ReturnInst>(BB->getTerminator()))
517            return false;
518      }
519  
520      unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
521      if (MaxProm == 0)
522        return false;
523  
524      unsigned Promoted = 0;
525      for (auto &Cand : LoopToCandidates[&L]) {
526  
527        SmallVector<PHINode *, 4> NewPHIs;
528        SSAUpdater SSA(&NewPHIs);
529        Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
530  
531        // If BFI is set, we will use it to guide the promotions.
532        if (BFI) {
533          auto *BB = Cand.first->getParent();
534          auto InstrCount = BFI->getBlockProfileCount(BB);
535          if (!InstrCount)
536            continue;
537          auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
538          // If the average loop trip count is not greater than 1.5, we skip
539          // promotion.
540          if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2))
541            continue;
542        }
543  
544        PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
545                                          L.getLoopPreheader(), ExitBlocks,
546                                          InsertPts, LoopToCandidates, LI);
547        Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
548        Promoted++;
549        if (Promoted >= MaxProm)
550          break;
551  
552        (*NumPromoted)++;
553        if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
554          break;
555      }
556  
557      LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
558                        << L.getLoopDepth() << ")\n");
559      return Promoted != 0;
560    }
561  
562  private:
allowSpeculativeCounterPromotion(Loop * LP)563    bool allowSpeculativeCounterPromotion(Loop *LP) {
564      SmallVector<BasicBlock *, 8> ExitingBlocks;
565      L.getExitingBlocks(ExitingBlocks);
566      // Not considierered speculative.
567      if (ExitingBlocks.size() == 1)
568        return true;
569      if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
570        return false;
571      return true;
572    }
573  
574    // Check whether the loop satisfies the basic conditions needed to perform
575    // Counter Promotions.
576    bool
isPromotionPossible(Loop * LP,const SmallVectorImpl<BasicBlock * > & LoopExitBlocks)577    isPromotionPossible(Loop *LP,
578                        const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
579      // We can't insert into a catchswitch.
580      if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
581            return isa<CatchSwitchInst>(Exit->getTerminator());
582          }))
583        return false;
584  
585      if (!LP->hasDedicatedExits())
586        return false;
587  
588      BasicBlock *PH = LP->getLoopPreheader();
589      if (!PH)
590        return false;
591  
592      return true;
593    }
594  
595    // Returns the max number of Counter Promotions for LP.
getMaxNumOfPromotionsInLoop(Loop * LP)596    unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
597      SmallVector<BasicBlock *, 8> LoopExitBlocks;
598      LP->getExitBlocks(LoopExitBlocks);
599      if (!isPromotionPossible(LP, LoopExitBlocks))
600        return 0;
601  
602      SmallVector<BasicBlock *, 8> ExitingBlocks;
603      LP->getExitingBlocks(ExitingBlocks);
604  
605      // If BFI is set, we do more aggressive promotions based on BFI.
606      if (BFI)
607        return (unsigned)-1;
608  
609      // Not considierered speculative.
610      if (ExitingBlocks.size() == 1)
611        return MaxNumOfPromotionsPerLoop;
612  
613      if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
614        return 0;
615  
616      // Whether the target block is in a loop does not matter:
617      if (SpeculativeCounterPromotionToLoop)
618        return MaxNumOfPromotionsPerLoop;
619  
620      // Now check the target block:
621      unsigned MaxProm = MaxNumOfPromotionsPerLoop;
622      for (auto *TargetBlock : LoopExitBlocks) {
623        auto *TargetLoop = LI.getLoopFor(TargetBlock);
624        if (!TargetLoop)
625          continue;
626        unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
627        unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
628        MaxProm =
629            std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
630                                  PendingCandsInTarget);
631      }
632      return MaxProm;
633    }
634  
635    DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
636    SmallVector<BasicBlock *, 8> ExitBlocks;
637    SmallVector<Instruction *, 8> InsertPts;
638    Loop &L;
639    LoopInfo &LI;
640    BlockFrequencyInfo *BFI;
641  };
642  
643  enum class ValueProfilingCallType {
644    // Individual values are tracked. Currently used for indiret call target
645    // profiling.
646    Default,
647  
648    // MemOp: the memop size value profiling.
649    MemOp
650  };
651  
652  } // end anonymous namespace
653  
run(Module & M,ModuleAnalysisManager & AM)654  PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,
655                                                    ModuleAnalysisManager &AM) {
656    FunctionAnalysisManager &FAM =
657        AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
658    auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
659      return FAM.getResult<TargetLibraryAnalysis>(F);
660    };
661    InstrLowerer Lowerer(M, Options, GetTLI, IsCS);
662    if (!Lowerer.lower())
663      return PreservedAnalyses::all();
664  
665    return PreservedAnalyses::none();
666  }
667  
668  //
669  // Perform instrumentation sampling.
670  //
671  // There are 3 favors of sampling:
672  // (1) Full burst sampling: We transform:
673  //   Increment_Instruction;
674  // to:
675  //   if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {
676  //     Increment_Instruction;
677  //   }
678  //   __llvm_profile_sampling__ += 1;
679  //   if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
680  //     __llvm_profile_sampling__ = 0;
681  //   }
682  //
683  // "__llvm_profile_sampling__" is a thread-local global shared by all PGO
684  // counters (value-instrumentation and edge instrumentation).
685  //
686  // (2) Fast burst sampling:
687  // "__llvm_profile_sampling__" variable is an unsigned type, meaning it will
688  // wrap around to zero when overflows. In this case, the second check is
689  // unnecessary, so we won't generate check2 when the SampledInstrPeriod is
690  // set to 65535 (64K - 1). The code after:
691  //   if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {
692  //     Increment_Instruction;
693  //   }
694  //   __llvm_profile_sampling__ += 1;
695  //
696  // (3) Simple sampling:
697  // When SampledInstrBurstDuration sets to 1, we do a simple sampling:
698  //   __llvm_profile_sampling__ += 1;
699  //   if (__llvm_profile_sampling__ >= SampledInstrPeriod) {
700  //     __llvm_profile_sampling__ = 0;
701  //     Increment_Instruction;
702  //   }
703  //
704  // Note that, the code snippet after the transformation can still be counter
705  // promoted. However, with sampling enabled, counter updates are expected to
706  // be infrequent, making the benefits of counter promotion negligible.
707  // Moreover, counter promotion can potentially cause issues in server
708  // applications, particularly when the counters are dumped without a clean
709  // exit. To mitigate this risk, counter promotion is disabled by default when
710  // sampling is enabled. This behavior can be overridden using the internal
711  // option.
doSampling(Instruction * I)712  void InstrLowerer::doSampling(Instruction *I) {
713    if (!isSamplingEnabled())
714      return;
715  
716    unsigned SampledBurstDuration = SampledInstrBurstDuration.getValue();
717    unsigned SampledPeriod = SampledInstrPeriod.getValue();
718    if (SampledBurstDuration >= SampledPeriod) {
719      report_fatal_error(
720          "SampledPeriod needs to be greater than SampledBurstDuration");
721    }
722    bool UseShort = (SampledPeriod <= USHRT_MAX);
723    bool IsSimpleSampling = (SampledBurstDuration == 1);
724    // If (SampledBurstDuration == 1 && SampledPeriod == 65535), generate
725    // the simple sampling style code.
726    bool IsFastSampling = (!IsSimpleSampling && SampledPeriod == 65535);
727  
728    auto GetConstant = [UseShort](IRBuilder<> &Builder, uint32_t C) {
729      if (UseShort)
730        return Builder.getInt16(C);
731      else
732        return Builder.getInt32(C);
733    };
734  
735    IntegerType *SamplingVarTy;
736    if (UseShort)
737      SamplingVarTy = Type::getInt16Ty(M.getContext());
738    else
739      SamplingVarTy = Type::getInt32Ty(M.getContext());
740    auto *SamplingVar =
741        M.getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
742    assert(SamplingVar && "SamplingVar not set properly");
743  
744    // Create the condition for checking the burst duration.
745    Instruction *SamplingVarIncr;
746    Value *NewSamplingVarVal;
747    MDBuilder MDB(I->getContext());
748    MDNode *BranchWeight;
749    IRBuilder<> CondBuilder(I);
750    auto *LoadSamplingVar = CondBuilder.CreateLoad(SamplingVarTy, SamplingVar);
751    if (IsSimpleSampling) {
752      // For the simple sampling, just create the load and increments.
753      IRBuilder<> IncBuilder(I);
754      NewSamplingVarVal =
755          IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));
756      SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);
757    } else {
758      // For the bust-sampling, create the conditonal update.
759      auto *DurationCond = CondBuilder.CreateICmpULE(
760          LoadSamplingVar, GetConstant(CondBuilder, SampledBurstDuration));
761      BranchWeight = MDB.createBranchWeights(
762          SampledBurstDuration, SampledPeriod + 1 - SampledBurstDuration);
763      Instruction *ThenTerm = SplitBlockAndInsertIfThen(
764          DurationCond, I, /* Unreachable */ false, BranchWeight);
765      IRBuilder<> IncBuilder(I);
766      NewSamplingVarVal =
767          IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));
768      SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);
769      I->moveBefore(ThenTerm);
770    }
771  
772    if (IsFastSampling)
773      return;
774  
775    // Create the condtion for checking the period.
776    Instruction *ThenTerm, *ElseTerm;
777    IRBuilder<> PeriodCondBuilder(SamplingVarIncr);
778    auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE(
779        NewSamplingVarVal, GetConstant(PeriodCondBuilder, SampledPeriod));
780    BranchWeight = MDB.createBranchWeights(1, SampledPeriod);
781    SplitBlockAndInsertIfThenElse(PeriodCond, SamplingVarIncr, &ThenTerm,
782                                  &ElseTerm, BranchWeight);
783  
784    // For the simple sampling, the counter update happens in sampling var reset.
785    if (IsSimpleSampling)
786      I->moveBefore(ThenTerm);
787  
788    IRBuilder<> ResetBuilder(ThenTerm);
789    ResetBuilder.CreateStore(GetConstant(ResetBuilder, 0), SamplingVar);
790    SamplingVarIncr->moveBefore(ElseTerm);
791  }
792  
lowerIntrinsics(Function * F)793  bool InstrLowerer::lowerIntrinsics(Function *F) {
794    bool MadeChange = false;
795    PromotionCandidates.clear();
796    SmallVector<InstrProfInstBase *, 8> InstrProfInsts;
797  
798    // To ensure compatibility with sampling, we save the intrinsics into
799    // a buffer to prevent potential breakage of the iterator (as the
800    // intrinsics will be moved to a different BB).
801    for (BasicBlock &BB : *F) {
802      for (Instruction &Instr : llvm::make_early_inc_range(BB)) {
803        if (auto *IP = dyn_cast<InstrProfInstBase>(&Instr))
804          InstrProfInsts.push_back(IP);
805      }
806    }
807  
808    for (auto *Instr : InstrProfInsts) {
809      doSampling(Instr);
810      if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Instr)) {
811        lowerIncrement(IPIS);
812        MadeChange = true;
813      } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Instr)) {
814        lowerIncrement(IPI);
815        MadeChange = true;
816      } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Instr)) {
817        lowerTimestamp(IPC);
818        MadeChange = true;
819      } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Instr)) {
820        lowerCover(IPC);
821        MadeChange = true;
822      } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Instr)) {
823        lowerValueProfileInst(IPVP);
824        MadeChange = true;
825      } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Instr)) {
826        IPMP->eraseFromParent();
827        MadeChange = true;
828      } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Instr)) {
829        lowerMCDCTestVectorBitmapUpdate(IPBU);
830        MadeChange = true;
831      }
832    }
833  
834    if (!MadeChange)
835      return false;
836  
837    promoteCounterLoadStores(F);
838    return true;
839  }
840  
isRuntimeCounterRelocationEnabled() const841  bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {
842    // Mach-O don't support weak external references.
843    if (TT.isOSBinFormatMachO())
844      return false;
845  
846    if (RuntimeCounterRelocation.getNumOccurrences() > 0)
847      return RuntimeCounterRelocation;
848  
849    // Fuchsia uses runtime counter relocation by default.
850    return TT.isOSFuchsia();
851  }
852  
isSamplingEnabled() const853  bool InstrLowerer::isSamplingEnabled() const {
854    if (SampledInstr.getNumOccurrences() > 0)
855      return SampledInstr;
856    return Options.Sampling;
857  }
858  
isCounterPromotionEnabled() const859  bool InstrLowerer::isCounterPromotionEnabled() const {
860    if (DoCounterPromotion.getNumOccurrences() > 0)
861      return DoCounterPromotion;
862  
863    return Options.DoCounterPromotion;
864  }
865  
promoteCounterLoadStores(Function * F)866  void InstrLowerer::promoteCounterLoadStores(Function *F) {
867    if (!isCounterPromotionEnabled())
868      return;
869  
870    DominatorTree DT(*F);
871    LoopInfo LI(DT);
872    DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
873  
874    std::unique_ptr<BlockFrequencyInfo> BFI;
875    if (Options.UseBFIInPromotion) {
876      std::unique_ptr<BranchProbabilityInfo> BPI;
877      BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
878      BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
879    }
880  
881    for (const auto &LoadStore : PromotionCandidates) {
882      auto *CounterLoad = LoadStore.first;
883      auto *CounterStore = LoadStore.second;
884      BasicBlock *BB = CounterLoad->getParent();
885      Loop *ParentLoop = LI.getLoopFor(BB);
886      if (!ParentLoop)
887        continue;
888      LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
889    }
890  
891    SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
892  
893    // Do a post-order traversal of the loops so that counter updates can be
894    // iteratively hoisted outside the loop nest.
895    for (auto *Loop : llvm::reverse(Loops)) {
896      PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
897      Promoter.run(&TotalCountersPromoted);
898    }
899  }
900  
needsRuntimeHookUnconditionally(const Triple & TT)901  static bool needsRuntimeHookUnconditionally(const Triple &TT) {
902    // On Fuchsia, we only need runtime hook if any counters are present.
903    if (TT.isOSFuchsia())
904      return false;
905  
906    return true;
907  }
908  
909  /// Check if the module contains uses of any profiling intrinsics.
containsProfilingIntrinsics(Module & M)910  static bool containsProfilingIntrinsics(Module &M) {
911    auto containsIntrinsic = [&](int ID) {
912      if (auto *F = M.getFunction(Intrinsic::getName(ID)))
913        return !F->use_empty();
914      return false;
915    };
916    return containsIntrinsic(llvm::Intrinsic::instrprof_cover) ||
917           containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||
918           containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) ||
919           containsIntrinsic(llvm::Intrinsic::instrprof_timestamp) ||
920           containsIntrinsic(llvm::Intrinsic::instrprof_value_profile);
921  }
922  
lower()923  bool InstrLowerer::lower() {
924    bool MadeChange = false;
925    bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT);
926    if (NeedsRuntimeHook)
927      MadeChange = emitRuntimeHook();
928  
929    if (!IsCS && isSamplingEnabled())
930      createProfileSamplingVar(M);
931  
932    bool ContainsProfiling = containsProfilingIntrinsics(M);
933    GlobalVariable *CoverageNamesVar =
934        M.getNamedGlobal(getCoverageUnusedNamesVarName());
935    // Improve compile time by avoiding linear scans when there is no work.
936    if (!ContainsProfiling && !CoverageNamesVar)
937      return MadeChange;
938  
939    // We did not know how many value sites there would be inside
940    // the instrumented function. This is counting the number of instrumented
941    // target value sites to enter it as field in the profile data variable.
942    for (Function &F : M) {
943      InstrProfCntrInstBase *FirstProfInst = nullptr;
944      for (BasicBlock &BB : F) {
945        for (auto I = BB.begin(), E = BB.end(); I != E; I++) {
946          if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
947            computeNumValueSiteCounts(Ind);
948          else {
949            if (FirstProfInst == nullptr &&
950                (isa<InstrProfIncrementInst>(I) || isa<InstrProfCoverInst>(I)))
951              FirstProfInst = dyn_cast<InstrProfCntrInstBase>(I);
952            // If the MCDCBitmapParameters intrinsic seen, create the bitmaps.
953            if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(I))
954              static_cast<void>(getOrCreateRegionBitmaps(Params));
955          }
956        }
957      }
958  
959      // Use a profile intrinsic to create the region counters and data variable.
960      // Also create the data variable based on the MCDCParams.
961      if (FirstProfInst != nullptr) {
962        static_cast<void>(getOrCreateRegionCounters(FirstProfInst));
963      }
964    }
965  
966    if (EnableVTableValueProfiling)
967      for (GlobalVariable &GV : M.globals())
968        // Global variables with type metadata are virtual table variables.
969        if (GV.hasMetadata(LLVMContext::MD_type))
970          getOrCreateVTableProfData(&GV);
971  
972    for (Function &F : M)
973      MadeChange |= lowerIntrinsics(&F);
974  
975    if (CoverageNamesVar) {
976      lowerCoverageData(CoverageNamesVar);
977      MadeChange = true;
978    }
979  
980    if (!MadeChange)
981      return false;
982  
983    emitVNodes();
984    emitNameData();
985    emitVTableNames();
986  
987    // Emit runtime hook for the cases where the target does not unconditionally
988    // require pulling in profile runtime, and coverage is enabled on code that is
989    // not eliminated by the front-end, e.g. unused functions with internal
990    // linkage.
991    if (!NeedsRuntimeHook && ContainsProfiling)
992      emitRuntimeHook();
993  
994    emitRegistration();
995    emitUses();
996    emitInitialization();
997    return true;
998  }
999  
getOrInsertValueProfilingCall(Module & M,const TargetLibraryInfo & TLI,ValueProfilingCallType CallType=ValueProfilingCallType::Default)1000  static FunctionCallee getOrInsertValueProfilingCall(
1001      Module &M, const TargetLibraryInfo &TLI,
1002      ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
1003    LLVMContext &Ctx = M.getContext();
1004    auto *ReturnTy = Type::getVoidTy(M.getContext());
1005  
1006    AttributeList AL;
1007    if (auto AK = TLI.getExtAttrForI32Param(false))
1008      AL = AL.addParamAttribute(M.getContext(), 2, AK);
1009  
1010    assert((CallType == ValueProfilingCallType::Default ||
1011            CallType == ValueProfilingCallType::MemOp) &&
1012           "Must be Default or MemOp");
1013    Type *ParamTypes[] = {
1014  #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
1015  #include "llvm/ProfileData/InstrProfData.inc"
1016    };
1017    auto *ValueProfilingCallTy =
1018        FunctionType::get(ReturnTy, ArrayRef(ParamTypes), false);
1019    StringRef FuncName = CallType == ValueProfilingCallType::Default
1020                             ? getInstrProfValueProfFuncName()
1021                             : getInstrProfValueProfMemOpFuncName();
1022    return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL);
1023  }
1024  
computeNumValueSiteCounts(InstrProfValueProfileInst * Ind)1025  void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
1026    GlobalVariable *Name = Ind->getName();
1027    uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1028    uint64_t Index = Ind->getIndex()->getZExtValue();
1029    auto &PD = ProfileDataMap[Name];
1030    PD.NumValueSites[ValueKind] =
1031        std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1));
1032  }
1033  
lowerValueProfileInst(InstrProfValueProfileInst * Ind)1034  void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
1035    // TODO: Value profiling heavily depends on the data section which is omitted
1036    // in lightweight mode. We need to move the value profile pointer to the
1037    // Counter struct to get this working.
1038    assert(
1039        !DebugInfoCorrelate && ProfileCorrelate == InstrProfCorrelator::NONE &&
1040        "Value profiling is not yet supported with lightweight instrumentation");
1041    GlobalVariable *Name = Ind->getName();
1042    auto It = ProfileDataMap.find(Name);
1043    assert(It != ProfileDataMap.end() && It->second.DataVar &&
1044           "value profiling detected in function with no counter incerement");
1045  
1046    GlobalVariable *DataVar = It->second.DataVar;
1047    uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
1048    uint64_t Index = Ind->getIndex()->getZExtValue();
1049    for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
1050      Index += It->second.NumValueSites[Kind];
1051  
1052    IRBuilder<> Builder(Ind);
1053    bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
1054                        llvm::InstrProfValueKind::IPVK_MemOPSize);
1055    CallInst *Call = nullptr;
1056    auto *TLI = &GetTLI(*Ind->getFunction());
1057  
1058    // To support value profiling calls within Windows exception handlers, funclet
1059    // information contained within operand bundles needs to be copied over to
1060    // the library call. This is required for the IR to be processed by the
1061    // WinEHPrepare pass.
1062    SmallVector<OperandBundleDef, 1> OpBundles;
1063    Ind->getOperandBundlesAsDefs(OpBundles);
1064    if (!IsMemOpSize) {
1065      Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
1066      Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,
1067                                OpBundles);
1068    } else {
1069      Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
1070      Call = Builder.CreateCall(
1071          getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),
1072          Args, OpBundles);
1073    }
1074    if (auto AK = TLI->getExtAttrForI32Param(false))
1075      Call->addParamAttr(2, AK);
1076    Ind->replaceAllUsesWith(Call);
1077    Ind->eraseFromParent();
1078  }
1079  
getOrCreateBiasVar(StringRef VarName)1080  GlobalVariable *InstrLowerer::getOrCreateBiasVar(StringRef VarName) {
1081    GlobalVariable *Bias = M.getGlobalVariable(VarName);
1082    if (Bias)
1083      return Bias;
1084  
1085    Type *Int64Ty = Type::getInt64Ty(M.getContext());
1086  
1087    // Compiler must define this variable when runtime counter relocation
1088    // is being used. Runtime has a weak external reference that is used
1089    // to check whether that's the case or not.
1090    Bias = new GlobalVariable(M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
1091                              Constant::getNullValue(Int64Ty), VarName);
1092    Bias->setVisibility(GlobalVariable::HiddenVisibility);
1093    // A definition that's weak (linkonce_odr) without being in a COMDAT
1094    // section wouldn't lead to link errors, but it would lead to a dead
1095    // data word from every TU but one. Putting it in COMDAT ensures there
1096    // will be exactly one data slot in the link.
1097    if (TT.supportsCOMDAT())
1098      Bias->setComdat(M.getOrInsertComdat(VarName));
1099  
1100    return Bias;
1101  }
1102  
getCounterAddress(InstrProfCntrInstBase * I)1103  Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
1104    auto *Counters = getOrCreateRegionCounters(I);
1105    IRBuilder<> Builder(I);
1106  
1107    if (isa<InstrProfTimestampInst>(I))
1108      Counters->setAlignment(Align(8));
1109  
1110    auto *Addr = Builder.CreateConstInBoundsGEP2_32(
1111        Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue());
1112  
1113    if (!isRuntimeCounterRelocationEnabled())
1114      return Addr;
1115  
1116    Type *Int64Ty = Type::getInt64Ty(M.getContext());
1117    Function *Fn = I->getParent()->getParent();
1118    LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];
1119    if (!BiasLI) {
1120      IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
1121      auto *Bias = getOrCreateBiasVar(getInstrProfCounterBiasVarName());
1122      BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias, "profc_bias");
1123      // Bias doesn't change after startup.
1124      BiasLI->setMetadata(LLVMContext::MD_invariant_load,
1125                          MDNode::get(M.getContext(), std::nullopt));
1126    }
1127    auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI);
1128    return Builder.CreateIntToPtr(Add, Addr->getType());
1129  }
1130  
1131  /// Create `void [[alwaysinline]] rmw_or(uint8_t *ArgAddr, uint8_t ArgVal)`
1132  /// "Basic" sequence is `*ArgAddr |= ArgVal`
createRMWOrFunc()1133  Function *InstrLowerer::createRMWOrFunc() {
1134    auto &Ctx = M.getContext();
1135    auto *Int8Ty = Type::getInt8Ty(Ctx);
1136    Function *Fn = Function::Create(
1137        FunctionType::get(Type::getVoidTy(Ctx),
1138                          {PointerType::getUnqual(Ctx), Int8Ty}, false),
1139        Function::LinkageTypes::PrivateLinkage, "rmw_or", M);
1140    Fn->addFnAttr(Attribute::AlwaysInline);
1141    auto *ArgAddr = Fn->getArg(0);
1142    auto *ArgVal = Fn->getArg(1);
1143    IRBuilder<> Builder(BasicBlock::Create(Ctx, "", Fn));
1144  
1145    // Load profile bitmap byte.
1146    //  %mcdc.bits = load i8, ptr %4, align 1
1147    auto *Bitmap = Builder.CreateLoad(Int8Ty, ArgAddr, "mcdc.bits");
1148  
1149    if (Options.Atomic || AtomicCounterUpdateAll) {
1150      // If ((Bitmap & Val) != Val), then execute atomic (Bitmap |= Val).
1151      // Note, just-loaded Bitmap might not be up-to-date. Use it just for
1152      // early testing.
1153      auto *Masked = Builder.CreateAnd(Bitmap, ArgVal);
1154      auto *ShouldStore = Builder.CreateICmpNE(Masked, ArgVal);
1155      auto *ThenTerm = BasicBlock::Create(Ctx, "", Fn);
1156      auto *ElseTerm = BasicBlock::Create(Ctx, "", Fn);
1157      // Assume updating will be rare.
1158      auto *Unlikely = MDBuilder(Ctx).createUnlikelyBranchWeights();
1159      Builder.CreateCondBr(ShouldStore, ThenTerm, ElseTerm, Unlikely);
1160  
1161      IRBuilder<> ThenBuilder(ThenTerm);
1162      ThenBuilder.CreateAtomicRMW(AtomicRMWInst::Or, ArgAddr, ArgVal,
1163                                  MaybeAlign(), AtomicOrdering::Monotonic);
1164      ThenBuilder.CreateRetVoid();
1165  
1166      IRBuilder<> ElseBuilder(ElseTerm);
1167      ElseBuilder.CreateRetVoid();
1168  
1169      return Fn;
1170    }
1171  
1172    // Perform logical OR of profile bitmap byte and shifted bit offset.
1173    //  %8 = or i8 %mcdc.bits, %7
1174    auto *Result = Builder.CreateOr(Bitmap, ArgVal);
1175  
1176    // Store the updated profile bitmap byte.
1177    //  store i8 %8, ptr %3, align 1
1178    Builder.CreateStore(Result, ArgAddr);
1179  
1180    // Terminator
1181    Builder.CreateRetVoid();
1182  
1183    return Fn;
1184  }
1185  
getRMWOrCall(Value * Addr,Value * Val)1186  CallInst *InstrLowerer::getRMWOrCall(Value *Addr, Value *Val) {
1187    if (!RMWOrFunc)
1188      RMWOrFunc = createRMWOrFunc();
1189  
1190    return CallInst::Create(RMWOrFunc, {Addr, Val});
1191  }
1192  
getBitmapAddress(InstrProfMCDCTVBitmapUpdate * I)1193  Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) {
1194    auto *Bitmaps = getOrCreateRegionBitmaps(I);
1195    IRBuilder<> Builder(I);
1196  
1197    if (isRuntimeCounterRelocationEnabled()) {
1198      LLVMContext &Ctx = M.getContext();
1199      Ctx.diagnose(DiagnosticInfoPGOProfile(
1200          M.getName().data(),
1201          Twine("Runtime counter relocation is presently not supported for MC/DC "
1202                "bitmaps."),
1203          DS_Warning));
1204    }
1205  
1206    return Bitmaps;
1207  }
1208  
lowerCover(InstrProfCoverInst * CoverInstruction)1209  void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) {
1210    auto *Addr = getCounterAddress(CoverInstruction);
1211    IRBuilder<> Builder(CoverInstruction);
1212    // We store zero to represent that this block is covered.
1213    Builder.CreateStore(Builder.getInt8(0), Addr);
1214    CoverInstruction->eraseFromParent();
1215  }
1216  
lowerTimestamp(InstrProfTimestampInst * TimestampInstruction)1217  void InstrLowerer::lowerTimestamp(
1218      InstrProfTimestampInst *TimestampInstruction) {
1219    assert(TimestampInstruction->getIndex()->isZeroValue() &&
1220           "timestamp probes are always the first probe for a function");
1221    auto &Ctx = M.getContext();
1222    auto *TimestampAddr = getCounterAddress(TimestampInstruction);
1223    IRBuilder<> Builder(TimestampInstruction);
1224    auto *CalleeTy =
1225        FunctionType::get(Type::getVoidTy(Ctx), TimestampAddr->getType(), false);
1226    auto Callee = M.getOrInsertFunction(
1227        INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), CalleeTy);
1228    Builder.CreateCall(Callee, {TimestampAddr});
1229    TimestampInstruction->eraseFromParent();
1230  }
1231  
lowerIncrement(InstrProfIncrementInst * Inc)1232  void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) {
1233    auto *Addr = getCounterAddress(Inc);
1234  
1235    IRBuilder<> Builder(Inc);
1236    if (Options.Atomic || AtomicCounterUpdateAll ||
1237        (Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) {
1238      Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
1239                              MaybeAlign(), AtomicOrdering::Monotonic);
1240    } else {
1241      Value *IncStep = Inc->getStep();
1242      Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
1243      auto *Count = Builder.CreateAdd(Load, Inc->getStep());
1244      auto *Store = Builder.CreateStore(Count, Addr);
1245      if (isCounterPromotionEnabled())
1246        PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
1247    }
1248    Inc->eraseFromParent();
1249  }
1250  
lowerCoverageData(GlobalVariable * CoverageNamesVar)1251  void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
1252    ConstantArray *Names =
1253        cast<ConstantArray>(CoverageNamesVar->getInitializer());
1254    for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
1255      Constant *NC = Names->getOperand(I);
1256      Value *V = NC->stripPointerCasts();
1257      assert(isa<GlobalVariable>(V) && "Missing reference to function name");
1258      GlobalVariable *Name = cast<GlobalVariable>(V);
1259  
1260      Name->setLinkage(GlobalValue::PrivateLinkage);
1261      ReferencedNames.push_back(Name);
1262      if (isa<ConstantExpr>(NC))
1263        NC->dropAllReferences();
1264    }
1265    CoverageNamesVar->eraseFromParent();
1266  }
1267  
lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate * Update)1268  void InstrLowerer::lowerMCDCTestVectorBitmapUpdate(
1269      InstrProfMCDCTVBitmapUpdate *Update) {
1270    IRBuilder<> Builder(Update);
1271    auto *Int8Ty = Type::getInt8Ty(M.getContext());
1272    auto *Int32Ty = Type::getInt32Ty(M.getContext());
1273    auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();
1274    auto *BitmapAddr = getBitmapAddress(Update);
1275  
1276    // Load Temp Val + BitmapIdx.
1277    //  %mcdc.temp = load i32, ptr %mcdc.addr, align 4
1278    auto *Temp = Builder.CreateAdd(
1279        Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"),
1280        Update->getBitmapIndex());
1281  
1282    // Calculate byte offset using div8.
1283    //  %1 = lshr i32 %mcdc.temp, 3
1284    auto *BitmapByteOffset = Builder.CreateLShr(Temp, 0x3);
1285  
1286    // Add byte offset to section base byte address.
1287    // %4 = getelementptr inbounds i8, ptr @__profbm_test, i32 %1
1288    auto *BitmapByteAddr =
1289        Builder.CreateInBoundsPtrAdd(BitmapAddr, BitmapByteOffset);
1290  
1291    // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8)
1292    //  %5 = and i32 %mcdc.temp, 7
1293    //  %6 = trunc i32 %5 to i8
1294    auto *BitToSet = Builder.CreateTrunc(Builder.CreateAnd(Temp, 0x7), Int8Ty);
1295  
1296    // Shift bit offset left to form a bitmap.
1297    //  %7 = shl i8 1, %6
1298    auto *ShiftedVal = Builder.CreateShl(Builder.getInt8(0x1), BitToSet);
1299  
1300    Builder.Insert(getRMWOrCall(BitmapByteAddr, ShiftedVal));
1301    Update->eraseFromParent();
1302  }
1303  
1304  /// Get the name of a profiling variable for a particular function.
getVarName(InstrProfInstBase * Inc,StringRef Prefix,bool & Renamed)1305  static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,
1306                                bool &Renamed) {
1307    StringRef NamePrefix = getInstrProfNameVarPrefix();
1308    StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
1309    Function *F = Inc->getParent()->getParent();
1310    Module *M = F->getParent();
1311    if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
1312        !canRenameComdatFunc(*F)) {
1313      Renamed = false;
1314      return (Prefix + Name).str();
1315    }
1316    Renamed = true;
1317    uint64_t FuncHash = Inc->getHash()->getZExtValue();
1318    SmallVector<char, 24> HashPostfix;
1319    if (Name.ends_with((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
1320      return (Prefix + Name).str();
1321    return (Prefix + Name + "." + Twine(FuncHash)).str();
1322  }
1323  
shouldRecordFunctionAddr(Function * F)1324  static inline bool shouldRecordFunctionAddr(Function *F) {
1325    // Only record function addresses if IR PGO is enabled or if clang value
1326    // profiling is enabled. Recording function addresses greatly increases object
1327    // file size, because it prevents the inliner from deleting functions that
1328    // have been inlined everywhere.
1329    if (!profDataReferencedByCode(*F->getParent()))
1330      return false;
1331  
1332    // Check the linkage
1333    bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
1334    if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
1335        !HasAvailableExternallyLinkage)
1336      return true;
1337  
1338    // A function marked 'alwaysinline' with available_externally linkage can't
1339    // have its address taken. Doing so would create an undefined external ref to
1340    // the function, which would fail to link.
1341    if (HasAvailableExternallyLinkage &&
1342        F->hasFnAttribute(Attribute::AlwaysInline))
1343      return false;
1344  
1345    // Prohibit function address recording if the function is both internal and
1346    // COMDAT. This avoids the profile data variable referencing internal symbols
1347    // in COMDAT.
1348    if (F->hasLocalLinkage() && F->hasComdat())
1349      return false;
1350  
1351    // Check uses of this function for other than direct calls or invokes to it.
1352    // Inline virtual functions have linkeOnceODR linkage. When a key method
1353    // exists, the vtable will only be emitted in the TU where the key method
1354    // is defined. In a TU where vtable is not available, the function won't
1355    // be 'addresstaken'. If its address is not recorded here, the profile data
1356    // with missing address may be picked by the linker leading  to missing
1357    // indirect call target info.
1358    return F->hasAddressTaken() || F->hasLinkOnceLinkage();
1359  }
1360  
shouldUsePublicSymbol(Function * Fn)1361  static inline bool shouldUsePublicSymbol(Function *Fn) {
1362    // It isn't legal to make an alias of this function at all
1363    if (Fn->isDeclarationForLinker())
1364      return true;
1365  
1366    // Symbols with local linkage can just use the symbol directly without
1367    // introducing relocations
1368    if (Fn->hasLocalLinkage())
1369      return true;
1370  
1371    // PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some
1372    // unfavorable interaction between the new alias and the alias renaming done
1373    // in LowerTypeTests under ThinLTO. For comdat functions that would normally
1374    // be deduplicated, but the renaming scheme ends up preventing renaming, since
1375    // it creates unique names for each alias, resulting in duplicated symbols. In
1376    // the future, we should update the CFI related passes to migrate these
1377    // aliases to the same module as the jump-table they refer to will be defined.
1378    if (Fn->hasMetadata(LLVMContext::MD_type))
1379      return true;
1380  
1381    // For comdat functions, an alias would need the same linkage as the original
1382    // function and hidden visibility. There is no point in adding an alias with
1383    // identical linkage an visibility to avoid introducing symbolic relocations.
1384    if (Fn->hasComdat() &&
1385        (Fn->getVisibility() == GlobalValue::VisibilityTypes::HiddenVisibility))
1386      return true;
1387  
1388    // its OK to use an alias
1389    return false;
1390  }
1391  
getFuncAddrForProfData(Function * Fn)1392  static inline Constant *getFuncAddrForProfData(Function *Fn) {
1393    auto *Int8PtrTy = PointerType::getUnqual(Fn->getContext());
1394    // Store a nullptr in __llvm_profd, if we shouldn't use a real address
1395    if (!shouldRecordFunctionAddr(Fn))
1396      return ConstantPointerNull::get(Int8PtrTy);
1397  
1398    // If we can't use an alias, we must use the public symbol, even though this
1399    // may require a symbolic relocation.
1400    if (shouldUsePublicSymbol(Fn))
1401      return Fn;
1402  
1403    // When possible use a private alias to avoid symbolic relocations.
1404    auto *GA = GlobalAlias::create(GlobalValue::LinkageTypes::PrivateLinkage,
1405                                   Fn->getName() + ".local", Fn);
1406  
1407    // When the instrumented function is a COMDAT function, we cannot use a
1408    // private alias. If we did, we would create reference to a local label in
1409    // this function's section. If this version of the function isn't selected by
1410    // the linker, then the metadata would introduce a reference to a discarded
1411    // section. So, for COMDAT functions, we need to adjust the linkage of the
1412    // alias. Using hidden visibility avoids a dynamic relocation and an entry in
1413    // the dynamic symbol table.
1414    //
1415    // Note that this handles COMDAT functions with visibility other than Hidden,
1416    // since that case is covered in shouldUsePublicSymbol()
1417    if (Fn->hasComdat()) {
1418      GA->setLinkage(Fn->getLinkage());
1419      GA->setVisibility(GlobalValue::VisibilityTypes::HiddenVisibility);
1420    }
1421  
1422    // appendToCompilerUsed(*Fn->getParent(), {GA});
1423  
1424    return GA;
1425  }
1426  
needsRuntimeRegistrationOfSectionRange(const Triple & TT)1427  static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
1428    // compiler-rt uses linker support to get data/counters/name start/end for
1429    // ELF, COFF, Mach-O and XCOFF.
1430    if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() ||
1431        TT.isOSBinFormatMachO() || TT.isOSBinFormatXCOFF())
1432      return false;
1433  
1434    return true;
1435  }
1436  
maybeSetComdat(GlobalVariable * GV,GlobalObject * GO,StringRef CounterGroupName)1437  void InstrLowerer::maybeSetComdat(GlobalVariable *GV, GlobalObject *GO,
1438                                    StringRef CounterGroupName) {
1439    // Place lowered global variables in a comdat group if the associated function
1440    // or global variable is a COMDAT. This will make sure that only one copy of
1441    // global variable (e.g. function counters) of the COMDAT function will be
1442    // emitted after linking.
1443    bool NeedComdat = needsComdatForCounter(*GO, M);
1444    bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
1445  
1446    if (!UseComdat)
1447      return;
1448  
1449    // Keep in mind that this pass may run before the inliner, so we need to
1450    // create a new comdat group (for counters, profiling data, etc). If we use
1451    // the comdat of the parent function, that will result in relocations against
1452    // discarded sections.
1453    //
1454    // If the data variable is referenced by code, non-counter variables (notably
1455    // profiling data) and counters have to be in different comdats for COFF
1456    // because the Visual C++ linker will report duplicate symbol errors if there
1457    // are multiple external symbols with the same name marked
1458    // IMAGE_COMDAT_SELECT_ASSOCIATIVE.
1459    StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode
1460                              ? GV->getName()
1461                              : CounterGroupName;
1462    Comdat *C = M.getOrInsertComdat(GroupName);
1463  
1464    if (!NeedComdat) {
1465      // Object file format must be ELF since `UseComdat && !NeedComdat` is true.
1466      //
1467      // For ELF, when not using COMDAT, put counters, data and values into a
1468      // nodeduplicate COMDAT which is lowered to a zero-flag section group. This
1469      // allows -z start-stop-gc to discard the entire group when the function is
1470      // discarded.
1471      C->setSelectionKind(Comdat::NoDeduplicate);
1472    }
1473    GV->setComdat(C);
1474    // COFF doesn't allow the comdat group leader to have private linkage, so
1475    // upgrade private linkage to internal linkage to produce a symbol table
1476    // entry.
1477    if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())
1478      GV->setLinkage(GlobalValue::InternalLinkage);
1479  }
1480  
shouldRecordVTableAddr(GlobalVariable * GV)1481  static inline bool shouldRecordVTableAddr(GlobalVariable *GV) {
1482    if (!profDataReferencedByCode(*GV->getParent()))
1483      return false;
1484  
1485    if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() &&
1486        !GV->hasAvailableExternallyLinkage())
1487      return true;
1488  
1489    // This avoids the profile data from referencing internal symbols in
1490    // COMDAT.
1491    if (GV->hasLocalLinkage() && GV->hasComdat())
1492      return false;
1493  
1494    return true;
1495  }
1496  
1497  // FIXME: Introduce an internal alias like what's done for functions to reduce
1498  // the number of relocation entries.
getVTableAddrForProfData(GlobalVariable * GV)1499  static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) {
1500    auto *Int8PtrTy = PointerType::getUnqual(GV->getContext());
1501  
1502    // Store a nullptr in __profvt_ if a real address shouldn't be used.
1503    if (!shouldRecordVTableAddr(GV))
1504      return ConstantPointerNull::get(Int8PtrTy);
1505  
1506    return ConstantExpr::getBitCast(GV, Int8PtrTy);
1507  }
1508  
getOrCreateVTableProfData(GlobalVariable * GV)1509  void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {
1510    assert(!DebugInfoCorrelate &&
1511           "Value profiling is not supported with lightweight instrumentation");
1512    if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
1513      return;
1514  
1515    // Skip llvm internal global variable or __prof variables.
1516    if (GV->getName().starts_with("llvm.") ||
1517        GV->getName().starts_with("__llvm") ||
1518        GV->getName().starts_with("__prof"))
1519      return;
1520  
1521    // VTableProfData already created
1522    auto It = VTableDataMap.find(GV);
1523    if (It != VTableDataMap.end() && It->second)
1524      return;
1525  
1526    GlobalValue::LinkageTypes Linkage = GV->getLinkage();
1527    GlobalValue::VisibilityTypes Visibility = GV->getVisibility();
1528  
1529    // This is to keep consistent with per-function profile data
1530    // for correctness.
1531    if (TT.isOSBinFormatXCOFF()) {
1532      Linkage = GlobalValue::InternalLinkage;
1533      Visibility = GlobalValue::DefaultVisibility;
1534    }
1535  
1536    LLVMContext &Ctx = M.getContext();
1537    Type *DataTypes[] = {
1538  #define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType,
1539  #include "llvm/ProfileData/InstrProfData.inc"
1540  #undef INSTR_PROF_VTABLE_DATA
1541    };
1542  
1543    auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
1544  
1545    // Used by INSTR_PROF_VTABLE_DATA MACRO
1546    Constant *VTableAddr = getVTableAddrForProfData(GV);
1547    const std::string PGOVTableName = getPGOName(*GV);
1548    // Record the length of the vtable. This is needed since vtable pointers
1549    // loaded from C++ objects might be from the middle of a vtable definition.
1550    uint32_t VTableSizeVal =
1551        M.getDataLayout().getTypeAllocSize(GV->getValueType());
1552  
1553    Constant *DataVals[] = {
1554  #define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init,
1555  #include "llvm/ProfileData/InstrProfData.inc"
1556  #undef INSTR_PROF_VTABLE_DATA
1557    };
1558  
1559    auto *Data =
1560        new GlobalVariable(M, DataTy, /*constant=*/false, Linkage,
1561                           ConstantStruct::get(DataTy, DataVals),
1562                           getInstrProfVTableVarPrefix() + PGOVTableName);
1563  
1564    Data->setVisibility(Visibility);
1565    Data->setSection(getInstrProfSectionName(IPSK_vtab, TT.getObjectFormat()));
1566    Data->setAlignment(Align(8));
1567  
1568    maybeSetComdat(Data, GV, Data->getName());
1569  
1570    VTableDataMap[GV] = Data;
1571  
1572    ReferencedVTables.push_back(GV);
1573  
1574    // VTable <Hash, Addr> is used by runtime but not referenced by other
1575    // sections. Conservatively mark it linker retained.
1576    UsedVars.push_back(Data);
1577  }
1578  
setupProfileSection(InstrProfInstBase * Inc,InstrProfSectKind IPSK)1579  GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
1580                                                    InstrProfSectKind IPSK) {
1581    GlobalVariable *NamePtr = Inc->getName();
1582  
1583    // Match the linkage and visibility of the name global.
1584    Function *Fn = Inc->getParent()->getParent();
1585    GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
1586    GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1587  
1588    // Use internal rather than private linkage so the counter variable shows up
1589    // in the symbol table when using debug info for correlation.
1590    if ((DebugInfoCorrelate ||
1591         ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) &&
1592        TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)
1593      Linkage = GlobalValue::InternalLinkage;
1594  
1595    // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1596    // symbols in the same csect won't be discarded. When there are duplicate weak
1597    // symbols, we can NOT guarantee that the relocations get resolved to the
1598    // intended weak symbol, so we can not ensure the correctness of the relative
1599    // CounterPtr, so we have to use private linkage for counter and data symbols.
1600    if (TT.isOSBinFormatXCOFF()) {
1601      Linkage = GlobalValue::PrivateLinkage;
1602      Visibility = GlobalValue::DefaultVisibility;
1603    }
1604    // Move the name variable to the right section.
1605    bool Renamed;
1606    GlobalVariable *Ptr;
1607    StringRef VarPrefix;
1608    std::string VarName;
1609    if (IPSK == IPSK_cnts) {
1610      VarPrefix = getInstrProfCountersVarPrefix();
1611      VarName = getVarName(Inc, VarPrefix, Renamed);
1612      InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Inc);
1613      Ptr = createRegionCounters(CntrIncrement, VarName, Linkage);
1614    } else if (IPSK == IPSK_bitmap) {
1615      VarPrefix = getInstrProfBitmapVarPrefix();
1616      VarName = getVarName(Inc, VarPrefix, Renamed);
1617      InstrProfMCDCBitmapInstBase *BitmapUpdate =
1618          dyn_cast<InstrProfMCDCBitmapInstBase>(Inc);
1619      Ptr = createRegionBitmaps(BitmapUpdate, VarName, Linkage);
1620    } else {
1621      llvm_unreachable("Profile Section must be for Counters or Bitmaps");
1622    }
1623  
1624    Ptr->setVisibility(Visibility);
1625    // Put the counters and bitmaps in their own sections so linkers can
1626    // remove unneeded sections.
1627    Ptr->setSection(getInstrProfSectionName(IPSK, TT.getObjectFormat()));
1628    Ptr->setLinkage(Linkage);
1629    maybeSetComdat(Ptr, Fn, VarName);
1630    return Ptr;
1631  }
1632  
1633  GlobalVariable *
createRegionBitmaps(InstrProfMCDCBitmapInstBase * Inc,StringRef Name,GlobalValue::LinkageTypes Linkage)1634  InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
1635                                    StringRef Name,
1636                                    GlobalValue::LinkageTypes Linkage) {
1637    uint64_t NumBytes = Inc->getNumBitmapBytes();
1638    auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M.getContext()), NumBytes);
1639    auto GV = new GlobalVariable(M, BitmapTy, false, Linkage,
1640                                 Constant::getNullValue(BitmapTy), Name);
1641    GV->setAlignment(Align(1));
1642    return GV;
1643  }
1644  
1645  GlobalVariable *
getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase * Inc)1646  InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {
1647    GlobalVariable *NamePtr = Inc->getName();
1648    auto &PD = ProfileDataMap[NamePtr];
1649    if (PD.RegionBitmaps)
1650      return PD.RegionBitmaps;
1651  
1652    // If RegionBitmaps doesn't already exist, create it by first setting up
1653    // the corresponding profile section.
1654    auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap);
1655    PD.RegionBitmaps = BitmapPtr;
1656    PD.NumBitmapBytes = Inc->getNumBitmapBytes();
1657    return PD.RegionBitmaps;
1658  }
1659  
1660  GlobalVariable *
createRegionCounters(InstrProfCntrInstBase * Inc,StringRef Name,GlobalValue::LinkageTypes Linkage)1661  InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,
1662                                     GlobalValue::LinkageTypes Linkage) {
1663    uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1664    auto &Ctx = M.getContext();
1665    GlobalVariable *GV;
1666    if (isa<InstrProfCoverInst>(Inc)) {
1667      auto *CounterTy = Type::getInt8Ty(Ctx);
1668      auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);
1669      // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
1670      std::vector<Constant *> InitialValues(NumCounters,
1671                                            Constant::getAllOnesValue(CounterTy));
1672      GV = new GlobalVariable(M, CounterArrTy, false, Linkage,
1673                              ConstantArray::get(CounterArrTy, InitialValues),
1674                              Name);
1675      GV->setAlignment(Align(1));
1676    } else {
1677      auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
1678      GV = new GlobalVariable(M, CounterTy, false, Linkage,
1679                              Constant::getNullValue(CounterTy), Name);
1680      GV->setAlignment(Align(8));
1681    }
1682    return GV;
1683  }
1684  
1685  GlobalVariable *
getOrCreateRegionCounters(InstrProfCntrInstBase * Inc)1686  InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
1687    GlobalVariable *NamePtr = Inc->getName();
1688    auto &PD = ProfileDataMap[NamePtr];
1689    if (PD.RegionCounters)
1690      return PD.RegionCounters;
1691  
1692    // If RegionCounters doesn't already exist, create it by first setting up
1693    // the corresponding profile section.
1694    auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts);
1695    PD.RegionCounters = CounterPtr;
1696  
1697    if (DebugInfoCorrelate ||
1698        ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) {
1699      LLVMContext &Ctx = M.getContext();
1700      Function *Fn = Inc->getParent()->getParent();
1701      if (auto *SP = Fn->getSubprogram()) {
1702        DIBuilder DB(M, true, SP->getUnit());
1703        Metadata *FunctionNameAnnotation[] = {
1704            MDString::get(Ctx, InstrProfCorrelator::FunctionNameAttributeName),
1705            MDString::get(Ctx, getPGOFuncNameVarInitializer(NamePtr)),
1706        };
1707        Metadata *CFGHashAnnotation[] = {
1708            MDString::get(Ctx, InstrProfCorrelator::CFGHashAttributeName),
1709            ConstantAsMetadata::get(Inc->getHash()),
1710        };
1711        Metadata *NumCountersAnnotation[] = {
1712            MDString::get(Ctx, InstrProfCorrelator::NumCountersAttributeName),
1713            ConstantAsMetadata::get(Inc->getNumCounters()),
1714        };
1715        auto Annotations = DB.getOrCreateArray({
1716            MDNode::get(Ctx, FunctionNameAnnotation),
1717            MDNode::get(Ctx, CFGHashAnnotation),
1718            MDNode::get(Ctx, NumCountersAnnotation),
1719        });
1720        auto *DICounter = DB.createGlobalVariableExpression(
1721            SP, CounterPtr->getName(), /*LinkageName=*/StringRef(), SP->getFile(),
1722            /*LineNo=*/0, DB.createUnspecifiedType("Profile Data Type"),
1723            CounterPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr,
1724            /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,
1725            Annotations);
1726        CounterPtr->addDebugInfo(DICounter);
1727        DB.finalize();
1728      }
1729  
1730      // Mark the counter variable as used so that it isn't optimized out.
1731      CompilerUsedVars.push_back(PD.RegionCounters);
1732    }
1733  
1734    // Create the data variable (if it doesn't already exist).
1735    createDataVariable(Inc);
1736  
1737    return PD.RegionCounters;
1738  }
1739  
createDataVariable(InstrProfCntrInstBase * Inc)1740  void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
1741    // When debug information is correlated to profile data, a data variable
1742    // is not needed.
1743    if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
1744      return;
1745  
1746    GlobalVariable *NamePtr = Inc->getName();
1747    auto &PD = ProfileDataMap[NamePtr];
1748  
1749    // Return if data variable was already created.
1750    if (PD.DataVar)
1751      return;
1752  
1753    LLVMContext &Ctx = M.getContext();
1754  
1755    Function *Fn = Inc->getParent()->getParent();
1756    GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
1757    GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1758  
1759    // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1760    // symbols in the same csect won't be discarded. When there are duplicate weak
1761    // symbols, we can NOT guarantee that the relocations get resolved to the
1762    // intended weak symbol, so we can not ensure the correctness of the relative
1763    // CounterPtr, so we have to use private linkage for counter and data symbols.
1764    if (TT.isOSBinFormatXCOFF()) {
1765      Linkage = GlobalValue::PrivateLinkage;
1766      Visibility = GlobalValue::DefaultVisibility;
1767    }
1768  
1769    bool NeedComdat = needsComdatForCounter(*Fn, M);
1770    bool Renamed;
1771  
1772    // The Data Variable section is anchored to profile counters.
1773    std::string CntsVarName =
1774        getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed);
1775    std::string DataVarName =
1776        getVarName(Inc, getInstrProfDataVarPrefix(), Renamed);
1777  
1778    auto *Int8PtrTy = PointerType::getUnqual(Ctx);
1779    // Allocate statically the array of pointers to value profile nodes for
1780    // the current function.
1781    Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
1782    uint64_t NS = 0;
1783    for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1784      NS += PD.NumValueSites[Kind];
1785    if (NS > 0 && ValueProfileStaticAlloc &&
1786        !needsRuntimeRegistrationOfSectionRange(TT)) {
1787      ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
1788      auto *ValuesVar = new GlobalVariable(
1789          M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
1790          getVarName(Inc, getInstrProfValuesVarPrefix(), Renamed));
1791      ValuesVar->setVisibility(Visibility);
1792      setGlobalVariableLargeSection(TT, *ValuesVar);
1793      ValuesVar->setSection(
1794          getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
1795      ValuesVar->setAlignment(Align(8));
1796      maybeSetComdat(ValuesVar, Fn, CntsVarName);
1797      ValuesPtrExpr = ValuesVar;
1798    }
1799  
1800    uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1801    auto *CounterPtr = PD.RegionCounters;
1802  
1803    uint64_t NumBitmapBytes = PD.NumBitmapBytes;
1804  
1805    // Create data variable.
1806    auto *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
1807    auto *Int16Ty = Type::getInt16Ty(Ctx);
1808    auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
1809    Type *DataTypes[] = {
1810  #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
1811  #include "llvm/ProfileData/InstrProfData.inc"
1812    };
1813    auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
1814  
1815    Constant *FunctionAddr = getFuncAddrForProfData(Fn);
1816  
1817    Constant *Int16ArrayVals[IPVK_Last + 1];
1818    for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1819      Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
1820  
1821    // If the data variable is not referenced by code (if we don't emit
1822    // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
1823    // data variable live under linker GC, the data variable can be private. This
1824    // optimization applies to ELF.
1825    //
1826    // On COFF, a comdat leader cannot be local so we require DataReferencedByCode
1827    // to be false.
1828    //
1829    // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
1830    // that other copies must have the same CFG and cannot have value profiling.
1831    // If no hash suffix, other profd copies may be referenced by code.
1832    if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
1833        (TT.isOSBinFormatELF() ||
1834         (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
1835      Linkage = GlobalValue::PrivateLinkage;
1836      Visibility = GlobalValue::DefaultVisibility;
1837    }
1838    auto *Data =
1839        new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);
1840    Constant *RelativeCounterPtr;
1841    GlobalVariable *BitmapPtr = PD.RegionBitmaps;
1842    Constant *RelativeBitmapPtr = ConstantInt::get(IntPtrTy, 0);
1843    InstrProfSectKind DataSectionKind;
1844    // With binary profile correlation, profile data is not loaded into memory.
1845    // profile data must reference profile counter with an absolute relocation.
1846    if (ProfileCorrelate == InstrProfCorrelator::BINARY) {
1847      DataSectionKind = IPSK_covdata;
1848      RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);
1849      if (BitmapPtr != nullptr)
1850        RelativeBitmapPtr = ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy);
1851    } else {
1852      // Reference the counter variable with a label difference (link-time
1853      // constant).
1854      DataSectionKind = IPSK_data;
1855      RelativeCounterPtr =
1856          ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy),
1857                               ConstantExpr::getPtrToInt(Data, IntPtrTy));
1858      if (BitmapPtr != nullptr)
1859        RelativeBitmapPtr =
1860            ConstantExpr::getSub(ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy),
1861                                 ConstantExpr::getPtrToInt(Data, IntPtrTy));
1862    }
1863  
1864    Constant *DataVals[] = {
1865  #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
1866  #include "llvm/ProfileData/InstrProfData.inc"
1867    };
1868    Data->setInitializer(ConstantStruct::get(DataTy, DataVals));
1869  
1870    Data->setVisibility(Visibility);
1871    Data->setSection(
1872        getInstrProfSectionName(DataSectionKind, TT.getObjectFormat()));
1873    Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
1874    maybeSetComdat(Data, Fn, CntsVarName);
1875  
1876    PD.DataVar = Data;
1877  
1878    // Mark the data variable as used so that it isn't stripped out.
1879    CompilerUsedVars.push_back(Data);
1880    // Now that the linkage set by the FE has been passed to the data and counter
1881    // variables, reset Name variable's linkage and visibility to private so that
1882    // it can be removed later by the compiler.
1883    NamePtr->setLinkage(GlobalValue::PrivateLinkage);
1884    // Collect the referenced names to be used by emitNameData.
1885    ReferencedNames.push_back(NamePtr);
1886  }
1887  
emitVNodes()1888  void InstrLowerer::emitVNodes() {
1889    if (!ValueProfileStaticAlloc)
1890      return;
1891  
1892    // For now only support this on platforms that do
1893    // not require runtime registration to discover
1894    // named section start/end.
1895    if (needsRuntimeRegistrationOfSectionRange(TT))
1896      return;
1897  
1898    size_t TotalNS = 0;
1899    for (auto &PD : ProfileDataMap) {
1900      for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1901        TotalNS += PD.second.NumValueSites[Kind];
1902    }
1903  
1904    if (!TotalNS)
1905      return;
1906  
1907    uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
1908  // Heuristic for small programs with very few total value sites.
1909  // The default value of vp-counters-per-site is chosen based on
1910  // the observation that large apps usually have a low percentage
1911  // of value sites that actually have any profile data, and thus
1912  // the average number of counters per site is low. For small
1913  // apps with very few sites, this may not be true. Bump up the
1914  // number of counters in this case.
1915  #define INSTR_PROF_MIN_VAL_COUNTS 10
1916    if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
1917      NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
1918  
1919    auto &Ctx = M.getContext();
1920    Type *VNodeTypes[] = {
1921  #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
1922  #include "llvm/ProfileData/InstrProfData.inc"
1923    };
1924    auto *VNodeTy = StructType::get(Ctx, ArrayRef(VNodeTypes));
1925  
1926    ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
1927    auto *VNodesVar = new GlobalVariable(
1928        M, VNodesTy, false, GlobalValue::PrivateLinkage,
1929        Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
1930    setGlobalVariableLargeSection(TT, *VNodesVar);
1931    VNodesVar->setSection(
1932        getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
1933    VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(VNodesTy));
1934    // VNodesVar is used by runtime but not referenced via relocation by other
1935    // sections. Conservatively make it linker retained.
1936    UsedVars.push_back(VNodesVar);
1937  }
1938  
emitNameData()1939  void InstrLowerer::emitNameData() {
1940    std::string UncompressedData;
1941  
1942    if (ReferencedNames.empty())
1943      return;
1944  
1945    std::string CompressedNameStr;
1946    if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
1947                                            DoInstrProfNameCompression)) {
1948      report_fatal_error(Twine(toString(std::move(E))), false);
1949    }
1950  
1951    auto &Ctx = M.getContext();
1952    auto *NamesVal =
1953        ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false);
1954    NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
1955                                  GlobalValue::PrivateLinkage, NamesVal,
1956                                  getInstrProfNamesVarName());
1957    NamesSize = CompressedNameStr.size();
1958    setGlobalVariableLargeSection(TT, *NamesVar);
1959    NamesVar->setSection(
1960        ProfileCorrelate == InstrProfCorrelator::BINARY
1961            ? getInstrProfSectionName(IPSK_covname, TT.getObjectFormat())
1962            : getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
1963    // On COFF, it's important to reduce the alignment down to 1 to prevent the
1964    // linker from inserting padding before the start of the names section or
1965    // between names entries.
1966    NamesVar->setAlignment(Align(1));
1967    // NamesVar is used by runtime but not referenced via relocation by other
1968    // sections. Conservatively make it linker retained.
1969    UsedVars.push_back(NamesVar);
1970  
1971    for (auto *NamePtr : ReferencedNames)
1972      NamePtr->eraseFromParent();
1973  }
1974  
emitVTableNames()1975  void InstrLowerer::emitVTableNames() {
1976    if (!EnableVTableValueProfiling || ReferencedVTables.empty())
1977      return;
1978  
1979    // Collect the PGO names of referenced vtables and compress them.
1980    std::string CompressedVTableNames;
1981    if (Error E = collectVTableStrings(ReferencedVTables, CompressedVTableNames,
1982                                       DoInstrProfNameCompression)) {
1983      report_fatal_error(Twine(toString(std::move(E))), false);
1984    }
1985  
1986    auto &Ctx = M.getContext();
1987    auto *VTableNamesVal = ConstantDataArray::getString(
1988        Ctx, StringRef(CompressedVTableNames), false /* AddNull */);
1989    GlobalVariable *VTableNamesVar =
1990        new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */,
1991                           GlobalValue::PrivateLinkage, VTableNamesVal,
1992                           getInstrProfVTableNamesVarName());
1993    VTableNamesVar->setSection(
1994        getInstrProfSectionName(IPSK_vname, TT.getObjectFormat()));
1995    VTableNamesVar->setAlignment(Align(1));
1996    // Make VTableNames linker retained.
1997    UsedVars.push_back(VTableNamesVar);
1998  }
1999  
emitRegistration()2000  void InstrLowerer::emitRegistration() {
2001    if (!needsRuntimeRegistrationOfSectionRange(TT))
2002      return;
2003  
2004    // Construct the function.
2005    auto *VoidTy = Type::getVoidTy(M.getContext());
2006    auto *VoidPtrTy = PointerType::getUnqual(M.getContext());
2007    auto *Int64Ty = Type::getInt64Ty(M.getContext());
2008    auto *RegisterFTy = FunctionType::get(VoidTy, false);
2009    auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
2010                                       getInstrProfRegFuncsName(), M);
2011    RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2012    if (Options.NoRedZone)
2013      RegisterF->addFnAttr(Attribute::NoRedZone);
2014  
2015    auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
2016    auto *RuntimeRegisterF =
2017        Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
2018                         getInstrProfRegFuncName(), M);
2019  
2020    IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));
2021    for (Value *Data : CompilerUsedVars)
2022      if (!isa<Function>(Data))
2023        IRB.CreateCall(RuntimeRegisterF, Data);
2024    for (Value *Data : UsedVars)
2025      if (Data != NamesVar && !isa<Function>(Data))
2026        IRB.CreateCall(RuntimeRegisterF, Data);
2027  
2028    if (NamesVar) {
2029      Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
2030      auto *NamesRegisterTy =
2031          FunctionType::get(VoidTy, ArrayRef(ParamTypes), false);
2032      auto *NamesRegisterF =
2033          Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
2034                           getInstrProfNamesRegFuncName(), M);
2035      IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)});
2036    }
2037  
2038    IRB.CreateRetVoid();
2039  }
2040  
emitRuntimeHook()2041  bool InstrLowerer::emitRuntimeHook() {
2042    // We expect the linker to be invoked with -u<hook_var> flag for Linux
2043    // in which case there is no need to emit the external variable.
2044    if (TT.isOSLinux() || TT.isOSAIX())
2045      return false;
2046  
2047    // If the module's provided its own runtime, we don't need to do anything.
2048    if (M.getGlobalVariable(getInstrProfRuntimeHookVarName()))
2049      return false;
2050  
2051    // Declare an external variable that will pull in the runtime initialization.
2052    auto *Int32Ty = Type::getInt32Ty(M.getContext());
2053    auto *Var =
2054        new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
2055                           nullptr, getInstrProfRuntimeHookVarName());
2056    Var->setVisibility(GlobalValue::HiddenVisibility);
2057  
2058    if (TT.isOSBinFormatELF() && !TT.isPS()) {
2059      // Mark the user variable as used so that it isn't stripped out.
2060      CompilerUsedVars.push_back(Var);
2061    } else {
2062      // Make a function that uses it.
2063      auto *User = Function::Create(FunctionType::get(Int32Ty, false),
2064                                    GlobalValue::LinkOnceODRLinkage,
2065                                    getInstrProfRuntimeHookVarUseFuncName(), M);
2066      User->addFnAttr(Attribute::NoInline);
2067      if (Options.NoRedZone)
2068        User->addFnAttr(Attribute::NoRedZone);
2069      User->setVisibility(GlobalValue::HiddenVisibility);
2070      if (TT.supportsCOMDAT())
2071        User->setComdat(M.getOrInsertComdat(User->getName()));
2072  
2073      IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", User));
2074      auto *Load = IRB.CreateLoad(Int32Ty, Var);
2075      IRB.CreateRet(Load);
2076  
2077      // Mark the function as used so that it isn't stripped out.
2078      CompilerUsedVars.push_back(User);
2079    }
2080    return true;
2081  }
2082  
emitUses()2083  void InstrLowerer::emitUses() {
2084    // The metadata sections are parallel arrays. Optimizers (e.g.
2085    // GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
2086    // we conservatively retain all unconditionally in the compiler.
2087    //
2088    // On ELF and Mach-O, the linker can guarantee the associated sections will be
2089    // retained or discarded as a unit, so llvm.compiler.used is sufficient.
2090    // Similarly on COFF, if prof data is not referenced by code we use one comdat
2091    // and ensure this GC property as well. Otherwise, we have to conservatively
2092    // make all of the sections retained by the linker.
2093    if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
2094        (TT.isOSBinFormatCOFF() && !DataReferencedByCode))
2095      appendToCompilerUsed(M, CompilerUsedVars);
2096    else
2097      appendToUsed(M, CompilerUsedVars);
2098  
2099    // We do not add proper references from used metadata sections to NamesVar and
2100    // VNodesVar, so we have to be conservative and place them in llvm.used
2101    // regardless of the target,
2102    appendToUsed(M, UsedVars);
2103  }
2104  
emitInitialization()2105  void InstrLowerer::emitInitialization() {
2106    // Create ProfileFileName variable. Don't don't this for the
2107    // context-sensitive instrumentation lowering: This lowering is after
2108    // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
2109    // have already create the variable before LTO/ThinLTO linking.
2110    if (!IsCS)
2111      createProfileFileNameVar(M, Options.InstrProfileOutput);
2112    Function *RegisterF = M.getFunction(getInstrProfRegFuncsName());
2113    if (!RegisterF)
2114      return;
2115  
2116    // Create the initialization function.
2117    auto *VoidTy = Type::getVoidTy(M.getContext());
2118    auto *F = Function::Create(FunctionType::get(VoidTy, false),
2119                               GlobalValue::InternalLinkage,
2120                               getInstrProfInitFuncName(), M);
2121    F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
2122    F->addFnAttr(Attribute::NoInline);
2123    if (Options.NoRedZone)
2124      F->addFnAttr(Attribute::NoRedZone);
2125  
2126    // Add the basic block and the necessary calls.
2127    IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", F));
2128    IRB.CreateCall(RegisterF, {});
2129    IRB.CreateRetVoid();
2130  
2131    appendToGlobalCtors(M, F, 0);
2132  }
2133  
2134  namespace llvm {
2135  // Create the variable for profile sampling.
createProfileSamplingVar(Module & M)2136  void createProfileSamplingVar(Module &M) {
2137    const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));
2138    IntegerType *SamplingVarTy;
2139    Constant *ValueZero;
2140    if (SampledInstrPeriod.getValue() <= USHRT_MAX) {
2141      SamplingVarTy = Type::getInt16Ty(M.getContext());
2142      ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(16, 0));
2143    } else {
2144      SamplingVarTy = Type::getInt32Ty(M.getContext());
2145      ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(32, 0));
2146    }
2147    auto SamplingVar = new GlobalVariable(
2148        M, SamplingVarTy, false, GlobalValue::WeakAnyLinkage, ValueZero, VarName);
2149    SamplingVar->setVisibility(GlobalValue::DefaultVisibility);
2150    SamplingVar->setThreadLocal(true);
2151    Triple TT(M.getTargetTriple());
2152    if (TT.supportsCOMDAT()) {
2153      SamplingVar->setLinkage(GlobalValue::ExternalLinkage);
2154      SamplingVar->setComdat(M.getOrInsertComdat(VarName));
2155    }
2156    appendToCompilerUsed(M, SamplingVar);
2157  }
2158  } // namespace llvm
2159