1 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass lowers instrprof_* intrinsics emitted by an instrumentor. 10 // It also builds the data structures and initialization code needed for 11 // updating execution counts and emitting the profile at runtime. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Instrumentation/InstrProfiling.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/Analysis/BlockFrequencyInfo.h" 22 #include "llvm/Analysis/BranchProbabilityInfo.h" 23 #include "llvm/Analysis/LoopInfo.h" 24 #include "llvm/Analysis/TargetLibraryInfo.h" 25 #include "llvm/IR/Attributes.h" 26 #include "llvm/IR/BasicBlock.h" 27 #include "llvm/IR/CFG.h" 28 #include "llvm/IR/Constant.h" 29 #include "llvm/IR/Constants.h" 30 #include "llvm/IR/DIBuilder.h" 31 #include "llvm/IR/DerivedTypes.h" 32 #include "llvm/IR/DiagnosticInfo.h" 33 #include "llvm/IR/Dominators.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/GlobalVariable.h" 37 #include "llvm/IR/IRBuilder.h" 38 #include "llvm/IR/Instruction.h" 39 #include "llvm/IR/Instructions.h" 40 #include "llvm/IR/IntrinsicInst.h" 41 #include "llvm/IR/Module.h" 42 #include "llvm/IR/Type.h" 43 #include "llvm/InitializePasses.h" 44 #include "llvm/Pass.h" 45 #include "llvm/ProfileData/InstrProf.h" 46 #include "llvm/ProfileData/InstrProfCorrelator.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/CommandLine.h" 49 #include "llvm/Support/Error.h" 50 #include "llvm/Support/ErrorHandling.h" 51 #include "llvm/TargetParser/Triple.h" 52 #include "llvm/Transforms/Instrumentation.h" 53 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 54 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 55 #include "llvm/Transforms/Utils/ModuleUtils.h" 56 #include "llvm/Transforms/Utils/SSAUpdater.h" 57 #include <algorithm> 58 #include <cassert> 59 #include <cstdint> 60 #include <string> 61 62 using namespace llvm; 63 64 #define DEBUG_TYPE "instrprof" 65 66 namespace llvm { 67 // TODO: Remove -debug-info-correlate in next LLVM release, in favor of 68 // -profile-correlate=debug-info. 69 cl::opt<bool> DebugInfoCorrelate( 70 "debug-info-correlate", 71 cl::desc("Use debug info to correlate profiles. (Deprecated, use " 72 "-profile-correlate=debug-info)"), 73 cl::init(false)); 74 75 cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate( 76 "profile-correlate", 77 cl::desc("Use debug info or binary file to correlate profiles."), 78 cl::init(InstrProfCorrelator::NONE), 79 cl::values(clEnumValN(InstrProfCorrelator::NONE, "", 80 "No profile correlation"), 81 clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", 82 "Use debug info to correlate"), 83 clEnumValN(InstrProfCorrelator::BINARY, "binary", 84 "Use binary to correlate"))); 85 } // namespace llvm 86 87 namespace { 88 89 cl::opt<bool> DoHashBasedCounterSplit( 90 "hash-based-counter-split", 91 cl::desc("Rename counter variable of a comdat function based on cfg hash"), 92 cl::init(true)); 93 94 cl::opt<bool> 95 RuntimeCounterRelocation("runtime-counter-relocation", 96 cl::desc("Enable relocating counters at runtime."), 97 cl::init(false)); 98 99 cl::opt<bool> ValueProfileStaticAlloc( 100 "vp-static-alloc", 101 cl::desc("Do static counter allocation for value profiler"), 102 cl::init(true)); 103 104 cl::opt<double> NumCountersPerValueSite( 105 "vp-counters-per-site", 106 cl::desc("The average number of profile counters allocated " 107 "per value profiling site."), 108 // This is set to a very small value because in real programs, only 109 // a very small percentage of value sites have non-zero targets, e.g, 1/30. 110 // For those sites with non-zero profile, the average number of targets 111 // is usually smaller than 2. 112 cl::init(1.0)); 113 114 cl::opt<bool> AtomicCounterUpdateAll( 115 "instrprof-atomic-counter-update-all", 116 cl::desc("Make all profile counter updates atomic (for testing only)"), 117 cl::init(false)); 118 119 cl::opt<bool> AtomicCounterUpdatePromoted( 120 "atomic-counter-update-promoted", 121 cl::desc("Do counter update using atomic fetch add " 122 " for promoted counters only"), 123 cl::init(false)); 124 125 cl::opt<bool> AtomicFirstCounter( 126 "atomic-first-counter", 127 cl::desc("Use atomic fetch add for first counter in a function (usually " 128 "the entry counter)"), 129 cl::init(false)); 130 131 // If the option is not specified, the default behavior about whether 132 // counter promotion is done depends on how instrumentaiton lowering 133 // pipeline is setup, i.e., the default value of true of this option 134 // does not mean the promotion will be done by default. Explicitly 135 // setting this option can override the default behavior. 136 cl::opt<bool> DoCounterPromotion("do-counter-promotion", 137 cl::desc("Do counter register promotion"), 138 cl::init(false)); 139 cl::opt<unsigned> MaxNumOfPromotionsPerLoop( 140 "max-counter-promotions-per-loop", cl::init(20), 141 cl::desc("Max number counter promotions per loop to avoid" 142 " increasing register pressure too much")); 143 144 // A debug option 145 cl::opt<int> 146 MaxNumOfPromotions("max-counter-promotions", cl::init(-1), 147 cl::desc("Max number of allowed counter promotions")); 148 149 cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting( 150 "speculative-counter-promotion-max-exiting", cl::init(3), 151 cl::desc("The max number of exiting blocks of a loop to allow " 152 " speculative counter promotion")); 153 154 cl::opt<bool> SpeculativeCounterPromotionToLoop( 155 "speculative-counter-promotion-to-loop", 156 cl::desc("When the option is false, if the target block is in a loop, " 157 "the promotion will be disallowed unless the promoted counter " 158 " update can be further/iteratively promoted into an acyclic " 159 " region.")); 160 161 cl::opt<bool> IterativeCounterPromotion( 162 "iterative-counter-promotion", cl::init(true), 163 cl::desc("Allow counter promotion across the whole loop nest.")); 164 165 cl::opt<bool> SkipRetExitBlock( 166 "skip-ret-exit-block", cl::init(true), 167 cl::desc("Suppress counter promotion if exit blocks contain ret.")); 168 169 using LoadStorePair = std::pair<Instruction *, Instruction *>; 170 171 class InstrLowerer final { 172 public: 173 InstrLowerer(Module &M, const InstrProfOptions &Options, 174 std::function<const TargetLibraryInfo &(Function &F)> GetTLI, 175 bool IsCS) 176 : M(M), Options(Options), TT(Triple(M.getTargetTriple())), IsCS(IsCS), 177 GetTLI(GetTLI) {} 178 179 bool lower(); 180 181 private: 182 Module &M; 183 const InstrProfOptions Options; 184 const Triple TT; 185 // Is this lowering for the context-sensitive instrumentation. 186 const bool IsCS; 187 188 std::function<const TargetLibraryInfo &(Function &F)> GetTLI; 189 struct PerFunctionProfileData { 190 uint32_t NumValueSites[IPVK_Last + 1] = {}; 191 GlobalVariable *RegionCounters = nullptr; 192 GlobalVariable *DataVar = nullptr; 193 GlobalVariable *RegionBitmaps = nullptr; 194 uint32_t NumBitmapBytes = 0; 195 196 PerFunctionProfileData() = default; 197 }; 198 DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap; 199 /// If runtime relocation is enabled, this maps functions to the load 200 /// instruction that produces the profile relocation bias. 201 DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap; 202 std::vector<GlobalValue *> CompilerUsedVars; 203 std::vector<GlobalValue *> UsedVars; 204 std::vector<GlobalVariable *> ReferencedNames; 205 GlobalVariable *NamesVar = nullptr; 206 size_t NamesSize = 0; 207 208 // vector of counter load/store pairs to be register promoted. 209 std::vector<LoadStorePair> PromotionCandidates; 210 211 int64_t TotalCountersPromoted = 0; 212 213 /// Lower instrumentation intrinsics in the function. Returns true if there 214 /// any lowering. 215 bool lowerIntrinsics(Function *F); 216 217 /// Register-promote counter loads and stores in loops. 218 void promoteCounterLoadStores(Function *F); 219 220 /// Returns true if relocating counters at runtime is enabled. 221 bool isRuntimeCounterRelocationEnabled() const; 222 223 /// Returns true if profile counter update register promotion is enabled. 224 bool isCounterPromotionEnabled() const; 225 226 /// Count the number of instrumented value sites for the function. 227 void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins); 228 229 /// Replace instrprof.value.profile with a call to runtime library. 230 void lowerValueProfileInst(InstrProfValueProfileInst *Ins); 231 232 /// Replace instrprof.cover with a store instruction to the coverage byte. 233 void lowerCover(InstrProfCoverInst *Inc); 234 235 /// Replace instrprof.timestamp with a call to 236 /// INSTR_PROF_PROFILE_SET_TIMESTAMP. 237 void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction); 238 239 /// Replace instrprof.increment with an increment of the appropriate value. 240 void lowerIncrement(InstrProfIncrementInst *Inc); 241 242 /// Force emitting of name vars for unused functions. 243 void lowerCoverageData(GlobalVariable *CoverageNamesVar); 244 245 /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction 246 /// using the index represented by the a temp value into a bitmap. 247 void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins); 248 249 /// Replace instrprof.mcdc.temp.update with a shift and or instruction using 250 /// the corresponding condition ID. 251 void lowerMCDCCondBitmapUpdate(InstrProfMCDCCondBitmapUpdate *Ins); 252 253 /// Compute the address of the counter value that this profiling instruction 254 /// acts on. 255 Value *getCounterAddress(InstrProfCntrInstBase *I); 256 257 /// Get the region counters for an increment, creating them if necessary. 258 /// 259 /// If the counter array doesn't yet exist, the profile data variables 260 /// referring to them will also be created. 261 GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc); 262 263 /// Create the region counters. 264 GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc, 265 StringRef Name, 266 GlobalValue::LinkageTypes Linkage); 267 268 /// Compute the address of the test vector bitmap that this profiling 269 /// instruction acts on. 270 Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I); 271 272 /// Get the region bitmaps for an increment, creating them if necessary. 273 /// 274 /// If the bitmap array doesn't yet exist, the profile data variables 275 /// referring to them will also be created. 276 GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc); 277 278 /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with 279 /// an MC/DC Decision region. The number of bytes required is indicated by 280 /// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called 281 /// as part of setupProfileSection() and is conceptually very similar to 282 /// what is done for profile data counters in createRegionCounters(). 283 GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc, 284 StringRef Name, 285 GlobalValue::LinkageTypes Linkage); 286 287 /// Set Comdat property of GV, if required. 288 void maybeSetComdat(GlobalVariable *GV, Function *Fn, StringRef VarName); 289 290 /// Setup the sections into which counters and bitmaps are allocated. 291 GlobalVariable *setupProfileSection(InstrProfInstBase *Inc, 292 InstrProfSectKind IPSK); 293 294 /// Create INSTR_PROF_DATA variable for counters and bitmaps. 295 void createDataVariable(InstrProfCntrInstBase *Inc); 296 297 /// Emit the section with compressed function names. 298 void emitNameData(); 299 300 /// Emit value nodes section for value profiling. 301 void emitVNodes(); 302 303 /// Emit runtime registration functions for each profile data variable. 304 void emitRegistration(); 305 306 /// Emit the necessary plumbing to pull in the runtime initialization. 307 /// Returns true if a change was made. 308 bool emitRuntimeHook(); 309 310 /// Add uses of our data variables and runtime hook. 311 void emitUses(); 312 313 /// Create a static initializer for our data, on platforms that need it, 314 /// and for any profile output file that was specified. 315 void emitInitialization(); 316 }; 317 318 /// 319 /// A helper class to promote one counter RMW operation in the loop 320 /// into register update. 321 /// 322 /// RWM update for the counter will be sinked out of the loop after 323 /// the transformation. 324 /// 325 class PGOCounterPromoterHelper : public LoadAndStorePromoter { 326 public: 327 PGOCounterPromoterHelper( 328 Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init, 329 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks, 330 ArrayRef<Instruction *> InsertPts, 331 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands, 332 LoopInfo &LI) 333 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks), 334 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) { 335 assert(isa<LoadInst>(L)); 336 assert(isa<StoreInst>(S)); 337 SSA.AddAvailableValue(PH, Init); 338 } 339 340 void doExtraRewritesBeforeFinalDeletion() override { 341 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 342 BasicBlock *ExitBlock = ExitBlocks[i]; 343 Instruction *InsertPos = InsertPts[i]; 344 // Get LiveIn value into the ExitBlock. If there are multiple 345 // predecessors, the value is defined by a PHI node in this 346 // block. 347 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); 348 Value *Addr = cast<StoreInst>(Store)->getPointerOperand(); 349 Type *Ty = LiveInValue->getType(); 350 IRBuilder<> Builder(InsertPos); 351 if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Addr)) { 352 // If isRuntimeCounterRelocationEnabled() is true then the address of 353 // the store instruction is computed with two instructions in 354 // InstrProfiling::getCounterAddress(). We need to copy those 355 // instructions to this block to compute Addr correctly. 356 // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias> 357 // %Addr = inttoptr i64 %BiasAdd to i64* 358 auto *OrigBiasInst = dyn_cast<BinaryOperator>(AddrInst->getOperand(0)); 359 assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add); 360 Value *BiasInst = Builder.Insert(OrigBiasInst->clone()); 361 Addr = Builder.CreateIntToPtr(BiasInst, 362 PointerType::getUnqual(Ty->getContext())); 363 } 364 if (AtomicCounterUpdatePromoted) 365 // automic update currently can only be promoted across the current 366 // loop, not the whole loop nest. 367 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue, 368 MaybeAlign(), 369 AtomicOrdering::SequentiallyConsistent); 370 else { 371 LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted"); 372 auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue); 373 auto *NewStore = Builder.CreateStore(NewVal, Addr); 374 375 // Now update the parent loop's candidate list: 376 if (IterativeCounterPromotion) { 377 auto *TargetLoop = LI.getLoopFor(ExitBlock); 378 if (TargetLoop) 379 LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore); 380 } 381 } 382 } 383 } 384 385 private: 386 Instruction *Store; 387 ArrayRef<BasicBlock *> ExitBlocks; 388 ArrayRef<Instruction *> InsertPts; 389 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates; 390 LoopInfo &LI; 391 }; 392 393 /// A helper class to do register promotion for all profile counter 394 /// updates in a loop. 395 /// 396 class PGOCounterPromoter { 397 public: 398 PGOCounterPromoter( 399 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands, 400 Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI) 401 : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) { 402 403 // Skip collection of ExitBlocks and InsertPts for loops that will not be 404 // able to have counters promoted. 405 SmallVector<BasicBlock *, 8> LoopExitBlocks; 406 SmallPtrSet<BasicBlock *, 8> BlockSet; 407 408 L.getExitBlocks(LoopExitBlocks); 409 if (!isPromotionPossible(&L, LoopExitBlocks)) 410 return; 411 412 for (BasicBlock *ExitBlock : LoopExitBlocks) { 413 if (BlockSet.insert(ExitBlock).second && 414 llvm::none_of(predecessors(ExitBlock), [&](const BasicBlock *Pred) { 415 return llvm::isPresplitCoroSuspendExitEdge(*Pred, *ExitBlock); 416 })) { 417 ExitBlocks.push_back(ExitBlock); 418 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); 419 } 420 } 421 } 422 423 bool run(int64_t *NumPromoted) { 424 // Skip 'infinite' loops: 425 if (ExitBlocks.size() == 0) 426 return false; 427 428 // Skip if any of the ExitBlocks contains a ret instruction. 429 // This is to prevent dumping of incomplete profile -- if the 430 // the loop is a long running loop and dump is called in the middle 431 // of the loop, the result profile is incomplete. 432 // FIXME: add other heuristics to detect long running loops. 433 if (SkipRetExitBlock) { 434 for (auto *BB : ExitBlocks) 435 if (isa<ReturnInst>(BB->getTerminator())) 436 return false; 437 } 438 439 unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L); 440 if (MaxProm == 0) 441 return false; 442 443 unsigned Promoted = 0; 444 for (auto &Cand : LoopToCandidates[&L]) { 445 446 SmallVector<PHINode *, 4> NewPHIs; 447 SSAUpdater SSA(&NewPHIs); 448 Value *InitVal = ConstantInt::get(Cand.first->getType(), 0); 449 450 // If BFI is set, we will use it to guide the promotions. 451 if (BFI) { 452 auto *BB = Cand.first->getParent(); 453 auto InstrCount = BFI->getBlockProfileCount(BB); 454 if (!InstrCount) 455 continue; 456 auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader()); 457 // If the average loop trip count is not greater than 1.5, we skip 458 // promotion. 459 if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2)) 460 continue; 461 } 462 463 PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal, 464 L.getLoopPreheader(), ExitBlocks, 465 InsertPts, LoopToCandidates, LI); 466 Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second})); 467 Promoted++; 468 if (Promoted >= MaxProm) 469 break; 470 471 (*NumPromoted)++; 472 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions) 473 break; 474 } 475 476 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth=" 477 << L.getLoopDepth() << ")\n"); 478 return Promoted != 0; 479 } 480 481 private: 482 bool allowSpeculativeCounterPromotion(Loop *LP) { 483 SmallVector<BasicBlock *, 8> ExitingBlocks; 484 L.getExitingBlocks(ExitingBlocks); 485 // Not considierered speculative. 486 if (ExitingBlocks.size() == 1) 487 return true; 488 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) 489 return false; 490 return true; 491 } 492 493 // Check whether the loop satisfies the basic conditions needed to perform 494 // Counter Promotions. 495 bool 496 isPromotionPossible(Loop *LP, 497 const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) { 498 // We can't insert into a catchswitch. 499 if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) { 500 return isa<CatchSwitchInst>(Exit->getTerminator()); 501 })) 502 return false; 503 504 if (!LP->hasDedicatedExits()) 505 return false; 506 507 BasicBlock *PH = LP->getLoopPreheader(); 508 if (!PH) 509 return false; 510 511 return true; 512 } 513 514 // Returns the max number of Counter Promotions for LP. 515 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) { 516 SmallVector<BasicBlock *, 8> LoopExitBlocks; 517 LP->getExitBlocks(LoopExitBlocks); 518 if (!isPromotionPossible(LP, LoopExitBlocks)) 519 return 0; 520 521 SmallVector<BasicBlock *, 8> ExitingBlocks; 522 LP->getExitingBlocks(ExitingBlocks); 523 524 // If BFI is set, we do more aggressive promotions based on BFI. 525 if (BFI) 526 return (unsigned)-1; 527 528 // Not considierered speculative. 529 if (ExitingBlocks.size() == 1) 530 return MaxNumOfPromotionsPerLoop; 531 532 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) 533 return 0; 534 535 // Whether the target block is in a loop does not matter: 536 if (SpeculativeCounterPromotionToLoop) 537 return MaxNumOfPromotionsPerLoop; 538 539 // Now check the target block: 540 unsigned MaxProm = MaxNumOfPromotionsPerLoop; 541 for (auto *TargetBlock : LoopExitBlocks) { 542 auto *TargetLoop = LI.getLoopFor(TargetBlock); 543 if (!TargetLoop) 544 continue; 545 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop); 546 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size(); 547 MaxProm = 548 std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) - 549 PendingCandsInTarget); 550 } 551 return MaxProm; 552 } 553 554 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates; 555 SmallVector<BasicBlock *, 8> ExitBlocks; 556 SmallVector<Instruction *, 8> InsertPts; 557 Loop &L; 558 LoopInfo &LI; 559 BlockFrequencyInfo *BFI; 560 }; 561 562 enum class ValueProfilingCallType { 563 // Individual values are tracked. Currently used for indiret call target 564 // profiling. 565 Default, 566 567 // MemOp: the memop size value profiling. 568 MemOp 569 }; 570 571 } // end anonymous namespace 572 573 PreservedAnalyses InstrProfilingLoweringPass::run(Module &M, 574 ModuleAnalysisManager &AM) { 575 FunctionAnalysisManager &FAM = 576 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 577 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 578 return FAM.getResult<TargetLibraryAnalysis>(F); 579 }; 580 InstrLowerer Lowerer(M, Options, GetTLI, IsCS); 581 if (!Lowerer.lower()) 582 return PreservedAnalyses::all(); 583 584 return PreservedAnalyses::none(); 585 } 586 587 bool InstrLowerer::lowerIntrinsics(Function *F) { 588 bool MadeChange = false; 589 PromotionCandidates.clear(); 590 for (BasicBlock &BB : *F) { 591 for (Instruction &Instr : llvm::make_early_inc_range(BB)) { 592 if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(&Instr)) { 593 lowerIncrement(IPIS); 594 MadeChange = true; 595 } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) { 596 lowerIncrement(IPI); 597 MadeChange = true; 598 } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(&Instr)) { 599 lowerTimestamp(IPC); 600 MadeChange = true; 601 } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) { 602 lowerCover(IPC); 603 MadeChange = true; 604 } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) { 605 lowerValueProfileInst(IPVP); 606 MadeChange = true; 607 } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(&Instr)) { 608 IPMP->eraseFromParent(); 609 MadeChange = true; 610 } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(&Instr)) { 611 lowerMCDCTestVectorBitmapUpdate(IPBU); 612 MadeChange = true; 613 } else if (auto *IPTU = dyn_cast<InstrProfMCDCCondBitmapUpdate>(&Instr)) { 614 lowerMCDCCondBitmapUpdate(IPTU); 615 MadeChange = true; 616 } 617 } 618 } 619 620 if (!MadeChange) 621 return false; 622 623 promoteCounterLoadStores(F); 624 return true; 625 } 626 627 bool InstrLowerer::isRuntimeCounterRelocationEnabled() const { 628 // Mach-O don't support weak external references. 629 if (TT.isOSBinFormatMachO()) 630 return false; 631 632 if (RuntimeCounterRelocation.getNumOccurrences() > 0) 633 return RuntimeCounterRelocation; 634 635 // Fuchsia uses runtime counter relocation by default. 636 return TT.isOSFuchsia(); 637 } 638 639 bool InstrLowerer::isCounterPromotionEnabled() const { 640 if (DoCounterPromotion.getNumOccurrences() > 0) 641 return DoCounterPromotion; 642 643 return Options.DoCounterPromotion; 644 } 645 646 void InstrLowerer::promoteCounterLoadStores(Function *F) { 647 if (!isCounterPromotionEnabled()) 648 return; 649 650 DominatorTree DT(*F); 651 LoopInfo LI(DT); 652 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates; 653 654 std::unique_ptr<BlockFrequencyInfo> BFI; 655 if (Options.UseBFIInPromotion) { 656 std::unique_ptr<BranchProbabilityInfo> BPI; 657 BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F))); 658 BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI)); 659 } 660 661 for (const auto &LoadStore : PromotionCandidates) { 662 auto *CounterLoad = LoadStore.first; 663 auto *CounterStore = LoadStore.second; 664 BasicBlock *BB = CounterLoad->getParent(); 665 Loop *ParentLoop = LI.getLoopFor(BB); 666 if (!ParentLoop) 667 continue; 668 LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore); 669 } 670 671 SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder(); 672 673 // Do a post-order traversal of the loops so that counter updates can be 674 // iteratively hoisted outside the loop nest. 675 for (auto *Loop : llvm::reverse(Loops)) { 676 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get()); 677 Promoter.run(&TotalCountersPromoted); 678 } 679 } 680 681 static bool needsRuntimeHookUnconditionally(const Triple &TT) { 682 // On Fuchsia, we only need runtime hook if any counters are present. 683 if (TT.isOSFuchsia()) 684 return false; 685 686 return true; 687 } 688 689 /// Check if the module contains uses of any profiling intrinsics. 690 static bool containsProfilingIntrinsics(Module &M) { 691 auto containsIntrinsic = [&](int ID) { 692 if (auto *F = M.getFunction(Intrinsic::getName(ID))) 693 return !F->use_empty(); 694 return false; 695 }; 696 return containsIntrinsic(llvm::Intrinsic::instrprof_cover) || 697 containsIntrinsic(llvm::Intrinsic::instrprof_increment) || 698 containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) || 699 containsIntrinsic(llvm::Intrinsic::instrprof_timestamp) || 700 containsIntrinsic(llvm::Intrinsic::instrprof_value_profile); 701 } 702 703 bool InstrLowerer::lower() { 704 bool MadeChange = false; 705 bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT); 706 if (NeedsRuntimeHook) 707 MadeChange = emitRuntimeHook(); 708 709 bool ContainsProfiling = containsProfilingIntrinsics(M); 710 GlobalVariable *CoverageNamesVar = 711 M.getNamedGlobal(getCoverageUnusedNamesVarName()); 712 // Improve compile time by avoiding linear scans when there is no work. 713 if (!ContainsProfiling && !CoverageNamesVar) 714 return MadeChange; 715 716 // We did not know how many value sites there would be inside 717 // the instrumented function. This is counting the number of instrumented 718 // target value sites to enter it as field in the profile data variable. 719 for (Function &F : M) { 720 InstrProfCntrInstBase *FirstProfInst = nullptr; 721 for (BasicBlock &BB : F) { 722 for (auto I = BB.begin(), E = BB.end(); I != E; I++) { 723 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I)) 724 computeNumValueSiteCounts(Ind); 725 else { 726 if (FirstProfInst == nullptr && 727 (isa<InstrProfIncrementInst>(I) || isa<InstrProfCoverInst>(I))) 728 FirstProfInst = dyn_cast<InstrProfCntrInstBase>(I); 729 // If the MCDCBitmapParameters intrinsic seen, create the bitmaps. 730 if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(I)) 731 static_cast<void>(getOrCreateRegionBitmaps(Params)); 732 } 733 } 734 } 735 736 // Use a profile intrinsic to create the region counters and data variable. 737 // Also create the data variable based on the MCDCParams. 738 if (FirstProfInst != nullptr) { 739 static_cast<void>(getOrCreateRegionCounters(FirstProfInst)); 740 } 741 } 742 743 for (Function &F : M) 744 MadeChange |= lowerIntrinsics(&F); 745 746 if (CoverageNamesVar) { 747 lowerCoverageData(CoverageNamesVar); 748 MadeChange = true; 749 } 750 751 if (!MadeChange) 752 return false; 753 754 emitVNodes(); 755 emitNameData(); 756 757 // Emit runtime hook for the cases where the target does not unconditionally 758 // require pulling in profile runtime, and coverage is enabled on code that is 759 // not eliminated by the front-end, e.g. unused functions with internal 760 // linkage. 761 if (!NeedsRuntimeHook && ContainsProfiling) 762 emitRuntimeHook(); 763 764 emitRegistration(); 765 emitUses(); 766 emitInitialization(); 767 return true; 768 } 769 770 static FunctionCallee getOrInsertValueProfilingCall( 771 Module &M, const TargetLibraryInfo &TLI, 772 ValueProfilingCallType CallType = ValueProfilingCallType::Default) { 773 LLVMContext &Ctx = M.getContext(); 774 auto *ReturnTy = Type::getVoidTy(M.getContext()); 775 776 AttributeList AL; 777 if (auto AK = TLI.getExtAttrForI32Param(false)) 778 AL = AL.addParamAttribute(M.getContext(), 2, AK); 779 780 assert((CallType == ValueProfilingCallType::Default || 781 CallType == ValueProfilingCallType::MemOp) && 782 "Must be Default or MemOp"); 783 Type *ParamTypes[] = { 784 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType 785 #include "llvm/ProfileData/InstrProfData.inc" 786 }; 787 auto *ValueProfilingCallTy = 788 FunctionType::get(ReturnTy, ArrayRef(ParamTypes), false); 789 StringRef FuncName = CallType == ValueProfilingCallType::Default 790 ? getInstrProfValueProfFuncName() 791 : getInstrProfValueProfMemOpFuncName(); 792 return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL); 793 } 794 795 void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) { 796 GlobalVariable *Name = Ind->getName(); 797 uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); 798 uint64_t Index = Ind->getIndex()->getZExtValue(); 799 auto &PD = ProfileDataMap[Name]; 800 PD.NumValueSites[ValueKind] = 801 std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1)); 802 } 803 804 void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { 805 // TODO: Value profiling heavily depends on the data section which is omitted 806 // in lightweight mode. We need to move the value profile pointer to the 807 // Counter struct to get this working. 808 assert( 809 !DebugInfoCorrelate && ProfileCorrelate == InstrProfCorrelator::NONE && 810 "Value profiling is not yet supported with lightweight instrumentation"); 811 GlobalVariable *Name = Ind->getName(); 812 auto It = ProfileDataMap.find(Name); 813 assert(It != ProfileDataMap.end() && It->second.DataVar && 814 "value profiling detected in function with no counter incerement"); 815 816 GlobalVariable *DataVar = It->second.DataVar; 817 uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); 818 uint64_t Index = Ind->getIndex()->getZExtValue(); 819 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind) 820 Index += It->second.NumValueSites[Kind]; 821 822 IRBuilder<> Builder(Ind); 823 bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() == 824 llvm::InstrProfValueKind::IPVK_MemOPSize); 825 CallInst *Call = nullptr; 826 auto *TLI = &GetTLI(*Ind->getFunction()); 827 828 // To support value profiling calls within Windows exception handlers, funclet 829 // information contained within operand bundles needs to be copied over to 830 // the library call. This is required for the IR to be processed by the 831 // WinEHPrepare pass. 832 SmallVector<OperandBundleDef, 1> OpBundles; 833 Ind->getOperandBundlesAsDefs(OpBundles); 834 if (!IsMemOpSize) { 835 Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)}; 836 Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args, 837 OpBundles); 838 } else { 839 Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)}; 840 Call = Builder.CreateCall( 841 getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp), 842 Args, OpBundles); 843 } 844 if (auto AK = TLI->getExtAttrForI32Param(false)) 845 Call->addParamAttr(2, AK); 846 Ind->replaceAllUsesWith(Call); 847 Ind->eraseFromParent(); 848 } 849 850 Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { 851 auto *Counters = getOrCreateRegionCounters(I); 852 IRBuilder<> Builder(I); 853 854 if (isa<InstrProfTimestampInst>(I)) 855 Counters->setAlignment(Align(8)); 856 857 auto *Addr = Builder.CreateConstInBoundsGEP2_32( 858 Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue()); 859 860 if (!isRuntimeCounterRelocationEnabled()) 861 return Addr; 862 863 Type *Int64Ty = Type::getInt64Ty(M.getContext()); 864 Function *Fn = I->getParent()->getParent(); 865 LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn]; 866 if (!BiasLI) { 867 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front()); 868 auto *Bias = M.getGlobalVariable(getInstrProfCounterBiasVarName()); 869 if (!Bias) { 870 // Compiler must define this variable when runtime counter relocation 871 // is being used. Runtime has a weak external reference that is used 872 // to check whether that's the case or not. 873 Bias = new GlobalVariable( 874 M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, 875 Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName()); 876 Bias->setVisibility(GlobalVariable::HiddenVisibility); 877 // A definition that's weak (linkonce_odr) without being in a COMDAT 878 // section wouldn't lead to link errors, but it would lead to a dead 879 // data word from every TU but one. Putting it in COMDAT ensures there 880 // will be exactly one data slot in the link. 881 if (TT.supportsCOMDAT()) 882 Bias->setComdat(M.getOrInsertComdat(Bias->getName())); 883 } 884 BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias); 885 } 886 auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI); 887 return Builder.CreateIntToPtr(Add, Addr->getType()); 888 } 889 890 Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) { 891 auto *Bitmaps = getOrCreateRegionBitmaps(I); 892 IRBuilder<> Builder(I); 893 894 auto *Addr = Builder.CreateConstInBoundsGEP2_32( 895 Bitmaps->getValueType(), Bitmaps, 0, I->getBitmapIndex()->getZExtValue()); 896 897 if (isRuntimeCounterRelocationEnabled()) { 898 LLVMContext &Ctx = M.getContext(); 899 Ctx.diagnose(DiagnosticInfoPGOProfile( 900 M.getName().data(), 901 Twine("Runtime counter relocation is presently not supported for MC/DC " 902 "bitmaps."), 903 DS_Warning)); 904 } 905 906 return Addr; 907 } 908 909 void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) { 910 auto *Addr = getCounterAddress(CoverInstruction); 911 IRBuilder<> Builder(CoverInstruction); 912 // We store zero to represent that this block is covered. 913 Builder.CreateStore(Builder.getInt8(0), Addr); 914 CoverInstruction->eraseFromParent(); 915 } 916 917 void InstrLowerer::lowerTimestamp( 918 InstrProfTimestampInst *TimestampInstruction) { 919 assert(TimestampInstruction->getIndex()->isZeroValue() && 920 "timestamp probes are always the first probe for a function"); 921 auto &Ctx = M.getContext(); 922 auto *TimestampAddr = getCounterAddress(TimestampInstruction); 923 IRBuilder<> Builder(TimestampInstruction); 924 auto *CalleeTy = 925 FunctionType::get(Type::getVoidTy(Ctx), TimestampAddr->getType(), false); 926 auto Callee = M.getOrInsertFunction( 927 INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), CalleeTy); 928 Builder.CreateCall(Callee, {TimestampAddr}); 929 TimestampInstruction->eraseFromParent(); 930 } 931 932 void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) { 933 auto *Addr = getCounterAddress(Inc); 934 935 IRBuilder<> Builder(Inc); 936 if (Options.Atomic || AtomicCounterUpdateAll || 937 (Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) { 938 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(), 939 MaybeAlign(), AtomicOrdering::Monotonic); 940 } else { 941 Value *IncStep = Inc->getStep(); 942 Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount"); 943 auto *Count = Builder.CreateAdd(Load, Inc->getStep()); 944 auto *Store = Builder.CreateStore(Count, Addr); 945 if (isCounterPromotionEnabled()) 946 PromotionCandidates.emplace_back(cast<Instruction>(Load), Store); 947 } 948 Inc->eraseFromParent(); 949 } 950 951 void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) { 952 ConstantArray *Names = 953 cast<ConstantArray>(CoverageNamesVar->getInitializer()); 954 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) { 955 Constant *NC = Names->getOperand(I); 956 Value *V = NC->stripPointerCasts(); 957 assert(isa<GlobalVariable>(V) && "Missing reference to function name"); 958 GlobalVariable *Name = cast<GlobalVariable>(V); 959 960 Name->setLinkage(GlobalValue::PrivateLinkage); 961 ReferencedNames.push_back(Name); 962 if (isa<ConstantExpr>(NC)) 963 NC->dropAllReferences(); 964 } 965 CoverageNamesVar->eraseFromParent(); 966 } 967 968 void InstrLowerer::lowerMCDCTestVectorBitmapUpdate( 969 InstrProfMCDCTVBitmapUpdate *Update) { 970 IRBuilder<> Builder(Update); 971 auto *Int8Ty = Type::getInt8Ty(M.getContext()); 972 auto *Int8PtrTy = PointerType::getUnqual(M.getContext()); 973 auto *Int32Ty = Type::getInt32Ty(M.getContext()); 974 auto *Int64Ty = Type::getInt64Ty(M.getContext()); 975 auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr(); 976 auto *BitmapAddr = getBitmapAddress(Update); 977 978 // Load Temp Val. 979 // %mcdc.temp = load i32, ptr %mcdc.addr, align 4 980 auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"); 981 982 // Calculate byte offset using div8. 983 // %1 = lshr i32 %mcdc.temp, 3 984 auto *BitmapByteOffset = Builder.CreateLShr(Temp, 0x3); 985 986 // Add byte offset to section base byte address. 987 // %2 = zext i32 %1 to i64 988 // %3 = add i64 ptrtoint (ptr @__profbm_test to i64), %2 989 auto *BitmapByteAddr = 990 Builder.CreateAdd(Builder.CreatePtrToInt(BitmapAddr, Int64Ty), 991 Builder.CreateZExtOrBitCast(BitmapByteOffset, Int64Ty)); 992 993 // Convert to a pointer. 994 // %4 = inttoptr i32 %3 to ptr 995 BitmapByteAddr = Builder.CreateIntToPtr(BitmapByteAddr, Int8PtrTy); 996 997 // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8) 998 // %5 = and i32 %mcdc.temp, 7 999 // %6 = trunc i32 %5 to i8 1000 auto *BitToSet = Builder.CreateTrunc(Builder.CreateAnd(Temp, 0x7), Int8Ty); 1001 1002 // Shift bit offset left to form a bitmap. 1003 // %7 = shl i8 1, %6 1004 auto *ShiftedVal = Builder.CreateShl(Builder.getInt8(0x1), BitToSet); 1005 1006 // Load profile bitmap byte. 1007 // %mcdc.bits = load i8, ptr %4, align 1 1008 auto *Bitmap = Builder.CreateLoad(Int8Ty, BitmapByteAddr, "mcdc.bits"); 1009 1010 // Perform logical OR of profile bitmap byte and shifted bit offset. 1011 // %8 = or i8 %mcdc.bits, %7 1012 auto *Result = Builder.CreateOr(Bitmap, ShiftedVal); 1013 1014 // Store the updated profile bitmap byte. 1015 // store i8 %8, ptr %3, align 1 1016 Builder.CreateStore(Result, BitmapByteAddr); 1017 Update->eraseFromParent(); 1018 } 1019 1020 void InstrLowerer::lowerMCDCCondBitmapUpdate( 1021 InstrProfMCDCCondBitmapUpdate *Update) { 1022 IRBuilder<> Builder(Update); 1023 auto *Int32Ty = Type::getInt32Ty(M.getContext()); 1024 auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr(); 1025 1026 // Load the MCDC temporary value from the stack. 1027 // %mcdc.temp = load i32, ptr %mcdc.addr, align 4 1028 auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"); 1029 1030 // Zero-extend the evaluated condition boolean value (0 or 1) by 32bits. 1031 // %1 = zext i1 %tobool to i32 1032 auto *CondV_32 = Builder.CreateZExt(Update->getCondBool(), Int32Ty); 1033 1034 // Shift the boolean value left (by the condition's ID) to form a bitmap. 1035 // %2 = shl i32 %1, <Update->getCondID()> 1036 auto *ShiftedVal = Builder.CreateShl(CondV_32, Update->getCondID()); 1037 1038 // Perform logical OR of the bitmap against the loaded MCDC temporary value. 1039 // %3 = or i32 %mcdc.temp, %2 1040 auto *Result = Builder.CreateOr(Temp, ShiftedVal); 1041 1042 // Store the updated temporary value back to the stack. 1043 // store i32 %3, ptr %mcdc.addr, align 4 1044 Builder.CreateStore(Result, MCDCCondBitmapAddr); 1045 Update->eraseFromParent(); 1046 } 1047 1048 /// Get the name of a profiling variable for a particular function. 1049 static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix, 1050 bool &Renamed) { 1051 StringRef NamePrefix = getInstrProfNameVarPrefix(); 1052 StringRef Name = Inc->getName()->getName().substr(NamePrefix.size()); 1053 Function *F = Inc->getParent()->getParent(); 1054 Module *M = F->getParent(); 1055 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) || 1056 !canRenameComdatFunc(*F)) { 1057 Renamed = false; 1058 return (Prefix + Name).str(); 1059 } 1060 Renamed = true; 1061 uint64_t FuncHash = Inc->getHash()->getZExtValue(); 1062 SmallVector<char, 24> HashPostfix; 1063 if (Name.ends_with((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix))) 1064 return (Prefix + Name).str(); 1065 return (Prefix + Name + "." + Twine(FuncHash)).str(); 1066 } 1067 1068 static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) { 1069 auto *MD = dyn_cast_or_null<ConstantAsMetadata>(M.getModuleFlag(Flag)); 1070 if (!MD) 1071 return 0; 1072 1073 // If the flag is a ConstantAsMetadata, it should be an integer representable 1074 // in 64-bits. 1075 return cast<ConstantInt>(MD->getValue())->getZExtValue(); 1076 } 1077 1078 static bool enablesValueProfiling(const Module &M) { 1079 return isIRPGOFlagSet(&M) || 1080 getIntModuleFlagOrZero(M, "EnableValueProfiling") != 0; 1081 } 1082 1083 // Conservatively returns true if data variables may be referenced by code. 1084 static bool profDataReferencedByCode(const Module &M) { 1085 return enablesValueProfiling(M); 1086 } 1087 1088 static inline bool shouldRecordFunctionAddr(Function *F) { 1089 // Only record function addresses if IR PGO is enabled or if clang value 1090 // profiling is enabled. Recording function addresses greatly increases object 1091 // file size, because it prevents the inliner from deleting functions that 1092 // have been inlined everywhere. 1093 if (!profDataReferencedByCode(*F->getParent())) 1094 return false; 1095 1096 // Check the linkage 1097 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage(); 1098 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && 1099 !HasAvailableExternallyLinkage) 1100 return true; 1101 1102 // A function marked 'alwaysinline' with available_externally linkage can't 1103 // have its address taken. Doing so would create an undefined external ref to 1104 // the function, which would fail to link. 1105 if (HasAvailableExternallyLinkage && 1106 F->hasFnAttribute(Attribute::AlwaysInline)) 1107 return false; 1108 1109 // Prohibit function address recording if the function is both internal and 1110 // COMDAT. This avoids the profile data variable referencing internal symbols 1111 // in COMDAT. 1112 if (F->hasLocalLinkage() && F->hasComdat()) 1113 return false; 1114 1115 // Check uses of this function for other than direct calls or invokes to it. 1116 // Inline virtual functions have linkeOnceODR linkage. When a key method 1117 // exists, the vtable will only be emitted in the TU where the key method 1118 // is defined. In a TU where vtable is not available, the function won't 1119 // be 'addresstaken'. If its address is not recorded here, the profile data 1120 // with missing address may be picked by the linker leading to missing 1121 // indirect call target info. 1122 return F->hasAddressTaken() || F->hasLinkOnceLinkage(); 1123 } 1124 1125 static inline bool shouldUsePublicSymbol(Function *Fn) { 1126 // It isn't legal to make an alias of this function at all 1127 if (Fn->isDeclarationForLinker()) 1128 return true; 1129 1130 // Symbols with local linkage can just use the symbol directly without 1131 // introducing relocations 1132 if (Fn->hasLocalLinkage()) 1133 return true; 1134 1135 // PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some 1136 // unfavorable interaction between the new alias and the alias renaming done 1137 // in LowerTypeTests under ThinLTO. For comdat functions that would normally 1138 // be deduplicated, but the renaming scheme ends up preventing renaming, since 1139 // it creates unique names for each alias, resulting in duplicated symbols. In 1140 // the future, we should update the CFI related passes to migrate these 1141 // aliases to the same module as the jump-table they refer to will be defined. 1142 if (Fn->hasMetadata(LLVMContext::MD_type)) 1143 return true; 1144 1145 // For comdat functions, an alias would need the same linkage as the original 1146 // function and hidden visibility. There is no point in adding an alias with 1147 // identical linkage an visibility to avoid introducing symbolic relocations. 1148 if (Fn->hasComdat() && 1149 (Fn->getVisibility() == GlobalValue::VisibilityTypes::HiddenVisibility)) 1150 return true; 1151 1152 // its OK to use an alias 1153 return false; 1154 } 1155 1156 static inline Constant *getFuncAddrForProfData(Function *Fn) { 1157 auto *Int8PtrTy = PointerType::getUnqual(Fn->getContext()); 1158 // Store a nullptr in __llvm_profd, if we shouldn't use a real address 1159 if (!shouldRecordFunctionAddr(Fn)) 1160 return ConstantPointerNull::get(Int8PtrTy); 1161 1162 // If we can't use an alias, we must use the public symbol, even though this 1163 // may require a symbolic relocation. 1164 if (shouldUsePublicSymbol(Fn)) 1165 return Fn; 1166 1167 // When possible use a private alias to avoid symbolic relocations. 1168 auto *GA = GlobalAlias::create(GlobalValue::LinkageTypes::PrivateLinkage, 1169 Fn->getName() + ".local", Fn); 1170 1171 // When the instrumented function is a COMDAT function, we cannot use a 1172 // private alias. If we did, we would create reference to a local label in 1173 // this function's section. If this version of the function isn't selected by 1174 // the linker, then the metadata would introduce a reference to a discarded 1175 // section. So, for COMDAT functions, we need to adjust the linkage of the 1176 // alias. Using hidden visibility avoids a dynamic relocation and an entry in 1177 // the dynamic symbol table. 1178 // 1179 // Note that this handles COMDAT functions with visibility other than Hidden, 1180 // since that case is covered in shouldUsePublicSymbol() 1181 if (Fn->hasComdat()) { 1182 GA->setLinkage(Fn->getLinkage()); 1183 GA->setVisibility(GlobalValue::VisibilityTypes::HiddenVisibility); 1184 } 1185 1186 // appendToCompilerUsed(*Fn->getParent(), {GA}); 1187 1188 return GA; 1189 } 1190 1191 static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { 1192 // compiler-rt uses linker support to get data/counters/name start/end for 1193 // ELF, COFF, Mach-O and XCOFF. 1194 if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() || 1195 TT.isOSBinFormatMachO() || TT.isOSBinFormatXCOFF()) 1196 return false; 1197 1198 return true; 1199 } 1200 1201 void InstrLowerer::maybeSetComdat(GlobalVariable *GV, Function *Fn, 1202 StringRef VarName) { 1203 bool DataReferencedByCode = profDataReferencedByCode(M); 1204 bool NeedComdat = needsComdatForCounter(*Fn, M); 1205 bool UseComdat = (NeedComdat || TT.isOSBinFormatELF()); 1206 1207 if (!UseComdat) 1208 return; 1209 1210 StringRef GroupName = 1211 TT.isOSBinFormatCOFF() && DataReferencedByCode ? GV->getName() : VarName; 1212 Comdat *C = M.getOrInsertComdat(GroupName); 1213 if (!NeedComdat) 1214 C->setSelectionKind(Comdat::NoDeduplicate); 1215 GV->setComdat(C); 1216 // COFF doesn't allow the comdat group leader to have private linkage, so 1217 // upgrade private linkage to internal linkage to produce a symbol table 1218 // entry. 1219 if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage()) 1220 GV->setLinkage(GlobalValue::InternalLinkage); 1221 } 1222 1223 GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc, 1224 InstrProfSectKind IPSK) { 1225 GlobalVariable *NamePtr = Inc->getName(); 1226 1227 // Match the linkage and visibility of the name global. 1228 Function *Fn = Inc->getParent()->getParent(); 1229 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage(); 1230 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility(); 1231 1232 // Use internal rather than private linkage so the counter variable shows up 1233 // in the symbol table when using debug info for correlation. 1234 if ((DebugInfoCorrelate || 1235 ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) && 1236 TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage) 1237 Linkage = GlobalValue::InternalLinkage; 1238 1239 // Due to the limitation of binder as of 2021/09/28, the duplicate weak 1240 // symbols in the same csect won't be discarded. When there are duplicate weak 1241 // symbols, we can NOT guarantee that the relocations get resolved to the 1242 // intended weak symbol, so we can not ensure the correctness of the relative 1243 // CounterPtr, so we have to use private linkage for counter and data symbols. 1244 if (TT.isOSBinFormatXCOFF()) { 1245 Linkage = GlobalValue::PrivateLinkage; 1246 Visibility = GlobalValue::DefaultVisibility; 1247 } 1248 // Move the name variable to the right section. Place them in a COMDAT group 1249 // if the associated function is a COMDAT. This will make sure that only one 1250 // copy of counters of the COMDAT function will be emitted after linking. Keep 1251 // in mind that this pass may run before the inliner, so we need to create a 1252 // new comdat group for the counters and profiling data. If we use the comdat 1253 // of the parent function, that will result in relocations against discarded 1254 // sections. 1255 // 1256 // If the data variable is referenced by code, counters and data have to be 1257 // in different comdats for COFF because the Visual C++ linker will report 1258 // duplicate symbol errors if there are multiple external symbols with the 1259 // same name marked IMAGE_COMDAT_SELECT_ASSOCIATIVE. 1260 // 1261 // For ELF, when not using COMDAT, put counters, data and values into a 1262 // nodeduplicate COMDAT which is lowered to a zero-flag section group. This 1263 // allows -z start-stop-gc to discard the entire group when the function is 1264 // discarded. 1265 bool Renamed; 1266 GlobalVariable *Ptr; 1267 StringRef VarPrefix; 1268 std::string VarName; 1269 if (IPSK == IPSK_cnts) { 1270 VarPrefix = getInstrProfCountersVarPrefix(); 1271 VarName = getVarName(Inc, VarPrefix, Renamed); 1272 InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Inc); 1273 Ptr = createRegionCounters(CntrIncrement, VarName, Linkage); 1274 } else if (IPSK == IPSK_bitmap) { 1275 VarPrefix = getInstrProfBitmapVarPrefix(); 1276 VarName = getVarName(Inc, VarPrefix, Renamed); 1277 InstrProfMCDCBitmapInstBase *BitmapUpdate = 1278 dyn_cast<InstrProfMCDCBitmapInstBase>(Inc); 1279 Ptr = createRegionBitmaps(BitmapUpdate, VarName, Linkage); 1280 } else { 1281 llvm_unreachable("Profile Section must be for Counters or Bitmaps"); 1282 } 1283 1284 Ptr->setVisibility(Visibility); 1285 // Put the counters and bitmaps in their own sections so linkers can 1286 // remove unneeded sections. 1287 Ptr->setSection(getInstrProfSectionName(IPSK, TT.getObjectFormat())); 1288 Ptr->setLinkage(Linkage); 1289 maybeSetComdat(Ptr, Fn, VarName); 1290 return Ptr; 1291 } 1292 1293 GlobalVariable * 1294 InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc, 1295 StringRef Name, 1296 GlobalValue::LinkageTypes Linkage) { 1297 uint64_t NumBytes = Inc->getNumBitmapBytes()->getZExtValue(); 1298 auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M.getContext()), NumBytes); 1299 auto GV = new GlobalVariable(M, BitmapTy, false, Linkage, 1300 Constant::getNullValue(BitmapTy), Name); 1301 GV->setAlignment(Align(1)); 1302 return GV; 1303 } 1304 1305 GlobalVariable * 1306 InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) { 1307 GlobalVariable *NamePtr = Inc->getName(); 1308 auto &PD = ProfileDataMap[NamePtr]; 1309 if (PD.RegionBitmaps) 1310 return PD.RegionBitmaps; 1311 1312 // If RegionBitmaps doesn't already exist, create it by first setting up 1313 // the corresponding profile section. 1314 auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap); 1315 PD.RegionBitmaps = BitmapPtr; 1316 PD.NumBitmapBytes = Inc->getNumBitmapBytes()->getZExtValue(); 1317 return PD.RegionBitmaps; 1318 } 1319 1320 GlobalVariable * 1321 InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name, 1322 GlobalValue::LinkageTypes Linkage) { 1323 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); 1324 auto &Ctx = M.getContext(); 1325 GlobalVariable *GV; 1326 if (isa<InstrProfCoverInst>(Inc)) { 1327 auto *CounterTy = Type::getInt8Ty(Ctx); 1328 auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters); 1329 // TODO: `Constant::getAllOnesValue()` does not yet accept an array type. 1330 std::vector<Constant *> InitialValues(NumCounters, 1331 Constant::getAllOnesValue(CounterTy)); 1332 GV = new GlobalVariable(M, CounterArrTy, false, Linkage, 1333 ConstantArray::get(CounterArrTy, InitialValues), 1334 Name); 1335 GV->setAlignment(Align(1)); 1336 } else { 1337 auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); 1338 GV = new GlobalVariable(M, CounterTy, false, Linkage, 1339 Constant::getNullValue(CounterTy), Name); 1340 GV->setAlignment(Align(8)); 1341 } 1342 return GV; 1343 } 1344 1345 GlobalVariable * 1346 InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) { 1347 GlobalVariable *NamePtr = Inc->getName(); 1348 auto &PD = ProfileDataMap[NamePtr]; 1349 if (PD.RegionCounters) 1350 return PD.RegionCounters; 1351 1352 // If RegionCounters doesn't already exist, create it by first setting up 1353 // the corresponding profile section. 1354 auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts); 1355 PD.RegionCounters = CounterPtr; 1356 1357 if (DebugInfoCorrelate || 1358 ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) { 1359 LLVMContext &Ctx = M.getContext(); 1360 Function *Fn = Inc->getParent()->getParent(); 1361 if (auto *SP = Fn->getSubprogram()) { 1362 DIBuilder DB(M, true, SP->getUnit()); 1363 Metadata *FunctionNameAnnotation[] = { 1364 MDString::get(Ctx, InstrProfCorrelator::FunctionNameAttributeName), 1365 MDString::get(Ctx, getPGOFuncNameVarInitializer(NamePtr)), 1366 }; 1367 Metadata *CFGHashAnnotation[] = { 1368 MDString::get(Ctx, InstrProfCorrelator::CFGHashAttributeName), 1369 ConstantAsMetadata::get(Inc->getHash()), 1370 }; 1371 Metadata *NumCountersAnnotation[] = { 1372 MDString::get(Ctx, InstrProfCorrelator::NumCountersAttributeName), 1373 ConstantAsMetadata::get(Inc->getNumCounters()), 1374 }; 1375 auto Annotations = DB.getOrCreateArray({ 1376 MDNode::get(Ctx, FunctionNameAnnotation), 1377 MDNode::get(Ctx, CFGHashAnnotation), 1378 MDNode::get(Ctx, NumCountersAnnotation), 1379 }); 1380 auto *DICounter = DB.createGlobalVariableExpression( 1381 SP, CounterPtr->getName(), /*LinkageName=*/StringRef(), SP->getFile(), 1382 /*LineNo=*/0, DB.createUnspecifiedType("Profile Data Type"), 1383 CounterPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr, 1384 /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0, 1385 Annotations); 1386 CounterPtr->addDebugInfo(DICounter); 1387 DB.finalize(); 1388 } 1389 1390 // Mark the counter variable as used so that it isn't optimized out. 1391 CompilerUsedVars.push_back(PD.RegionCounters); 1392 } 1393 1394 // Create the data variable (if it doesn't already exist). 1395 createDataVariable(Inc); 1396 1397 return PD.RegionCounters; 1398 } 1399 1400 void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) { 1401 // When debug information is correlated to profile data, a data variable 1402 // is not needed. 1403 if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) 1404 return; 1405 1406 GlobalVariable *NamePtr = Inc->getName(); 1407 auto &PD = ProfileDataMap[NamePtr]; 1408 1409 // Return if data variable was already created. 1410 if (PD.DataVar) 1411 return; 1412 1413 LLVMContext &Ctx = M.getContext(); 1414 1415 Function *Fn = Inc->getParent()->getParent(); 1416 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage(); 1417 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility(); 1418 1419 // Due to the limitation of binder as of 2021/09/28, the duplicate weak 1420 // symbols in the same csect won't be discarded. When there are duplicate weak 1421 // symbols, we can NOT guarantee that the relocations get resolved to the 1422 // intended weak symbol, so we can not ensure the correctness of the relative 1423 // CounterPtr, so we have to use private linkage for counter and data symbols. 1424 if (TT.isOSBinFormatXCOFF()) { 1425 Linkage = GlobalValue::PrivateLinkage; 1426 Visibility = GlobalValue::DefaultVisibility; 1427 } 1428 1429 bool DataReferencedByCode = profDataReferencedByCode(M); 1430 bool NeedComdat = needsComdatForCounter(*Fn, M); 1431 bool Renamed; 1432 1433 // The Data Variable section is anchored to profile counters. 1434 std::string CntsVarName = 1435 getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed); 1436 std::string DataVarName = 1437 getVarName(Inc, getInstrProfDataVarPrefix(), Renamed); 1438 1439 auto *Int8PtrTy = PointerType::getUnqual(Ctx); 1440 // Allocate statically the array of pointers to value profile nodes for 1441 // the current function. 1442 Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy); 1443 uint64_t NS = 0; 1444 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1445 NS += PD.NumValueSites[Kind]; 1446 if (NS > 0 && ValueProfileStaticAlloc && 1447 !needsRuntimeRegistrationOfSectionRange(TT)) { 1448 ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS); 1449 auto *ValuesVar = new GlobalVariable( 1450 M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy), 1451 getVarName(Inc, getInstrProfValuesVarPrefix(), Renamed)); 1452 ValuesVar->setVisibility(Visibility); 1453 setGlobalVariableLargeSection(TT, *ValuesVar); 1454 ValuesVar->setSection( 1455 getInstrProfSectionName(IPSK_vals, TT.getObjectFormat())); 1456 ValuesVar->setAlignment(Align(8)); 1457 maybeSetComdat(ValuesVar, Fn, CntsVarName); 1458 ValuesPtrExpr = ValuesVar; 1459 } 1460 1461 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); 1462 auto *CounterPtr = PD.RegionCounters; 1463 1464 uint64_t NumBitmapBytes = PD.NumBitmapBytes; 1465 1466 // Create data variable. 1467 auto *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext()); 1468 auto *Int16Ty = Type::getInt16Ty(Ctx); 1469 auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1); 1470 Type *DataTypes[] = { 1471 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType, 1472 #include "llvm/ProfileData/InstrProfData.inc" 1473 }; 1474 auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes)); 1475 1476 Constant *FunctionAddr = getFuncAddrForProfData(Fn); 1477 1478 Constant *Int16ArrayVals[IPVK_Last + 1]; 1479 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1480 Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]); 1481 1482 // If the data variable is not referenced by code (if we don't emit 1483 // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the 1484 // data variable live under linker GC, the data variable can be private. This 1485 // optimization applies to ELF. 1486 // 1487 // On COFF, a comdat leader cannot be local so we require DataReferencedByCode 1488 // to be false. 1489 // 1490 // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees 1491 // that other copies must have the same CFG and cannot have value profiling. 1492 // If no hash suffix, other profd copies may be referenced by code. 1493 if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) && 1494 (TT.isOSBinFormatELF() || 1495 (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) { 1496 Linkage = GlobalValue::PrivateLinkage; 1497 Visibility = GlobalValue::DefaultVisibility; 1498 } 1499 auto *Data = 1500 new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName); 1501 Constant *RelativeCounterPtr; 1502 GlobalVariable *BitmapPtr = PD.RegionBitmaps; 1503 Constant *RelativeBitmapPtr = ConstantInt::get(IntPtrTy, 0); 1504 InstrProfSectKind DataSectionKind; 1505 // With binary profile correlation, profile data is not loaded into memory. 1506 // profile data must reference profile counter with an absolute relocation. 1507 if (ProfileCorrelate == InstrProfCorrelator::BINARY) { 1508 DataSectionKind = IPSK_covdata; 1509 RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy); 1510 if (BitmapPtr != nullptr) 1511 RelativeBitmapPtr = ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy); 1512 } else { 1513 // Reference the counter variable with a label difference (link-time 1514 // constant). 1515 DataSectionKind = IPSK_data; 1516 RelativeCounterPtr = 1517 ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy), 1518 ConstantExpr::getPtrToInt(Data, IntPtrTy)); 1519 if (BitmapPtr != nullptr) 1520 RelativeBitmapPtr = 1521 ConstantExpr::getSub(ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy), 1522 ConstantExpr::getPtrToInt(Data, IntPtrTy)); 1523 } 1524 1525 Constant *DataVals[] = { 1526 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init, 1527 #include "llvm/ProfileData/InstrProfData.inc" 1528 }; 1529 Data->setInitializer(ConstantStruct::get(DataTy, DataVals)); 1530 1531 Data->setVisibility(Visibility); 1532 Data->setSection( 1533 getInstrProfSectionName(DataSectionKind, TT.getObjectFormat())); 1534 Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT)); 1535 maybeSetComdat(Data, Fn, CntsVarName); 1536 1537 PD.DataVar = Data; 1538 1539 // Mark the data variable as used so that it isn't stripped out. 1540 CompilerUsedVars.push_back(Data); 1541 // Now that the linkage set by the FE has been passed to the data and counter 1542 // variables, reset Name variable's linkage and visibility to private so that 1543 // it can be removed later by the compiler. 1544 NamePtr->setLinkage(GlobalValue::PrivateLinkage); 1545 // Collect the referenced names to be used by emitNameData. 1546 ReferencedNames.push_back(NamePtr); 1547 } 1548 1549 void InstrLowerer::emitVNodes() { 1550 if (!ValueProfileStaticAlloc) 1551 return; 1552 1553 // For now only support this on platforms that do 1554 // not require runtime registration to discover 1555 // named section start/end. 1556 if (needsRuntimeRegistrationOfSectionRange(TT)) 1557 return; 1558 1559 size_t TotalNS = 0; 1560 for (auto &PD : ProfileDataMap) { 1561 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1562 TotalNS += PD.second.NumValueSites[Kind]; 1563 } 1564 1565 if (!TotalNS) 1566 return; 1567 1568 uint64_t NumCounters = TotalNS * NumCountersPerValueSite; 1569 // Heuristic for small programs with very few total value sites. 1570 // The default value of vp-counters-per-site is chosen based on 1571 // the observation that large apps usually have a low percentage 1572 // of value sites that actually have any profile data, and thus 1573 // the average number of counters per site is low. For small 1574 // apps with very few sites, this may not be true. Bump up the 1575 // number of counters in this case. 1576 #define INSTR_PROF_MIN_VAL_COUNTS 10 1577 if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS) 1578 NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2); 1579 1580 auto &Ctx = M.getContext(); 1581 Type *VNodeTypes[] = { 1582 #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType, 1583 #include "llvm/ProfileData/InstrProfData.inc" 1584 }; 1585 auto *VNodeTy = StructType::get(Ctx, ArrayRef(VNodeTypes)); 1586 1587 ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters); 1588 auto *VNodesVar = new GlobalVariable( 1589 M, VNodesTy, false, GlobalValue::PrivateLinkage, 1590 Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName()); 1591 setGlobalVariableLargeSection(TT, *VNodesVar); 1592 VNodesVar->setSection( 1593 getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat())); 1594 VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(VNodesTy)); 1595 // VNodesVar is used by runtime but not referenced via relocation by other 1596 // sections. Conservatively make it linker retained. 1597 UsedVars.push_back(VNodesVar); 1598 } 1599 1600 void InstrLowerer::emitNameData() { 1601 std::string UncompressedData; 1602 1603 if (ReferencedNames.empty()) 1604 return; 1605 1606 std::string CompressedNameStr; 1607 if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr, 1608 DoInstrProfNameCompression)) { 1609 report_fatal_error(Twine(toString(std::move(E))), false); 1610 } 1611 1612 auto &Ctx = M.getContext(); 1613 auto *NamesVal = 1614 ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false); 1615 NamesVar = new GlobalVariable(M, NamesVal->getType(), true, 1616 GlobalValue::PrivateLinkage, NamesVal, 1617 getInstrProfNamesVarName()); 1618 NamesSize = CompressedNameStr.size(); 1619 setGlobalVariableLargeSection(TT, *NamesVar); 1620 NamesVar->setSection( 1621 ProfileCorrelate == InstrProfCorrelator::BINARY 1622 ? getInstrProfSectionName(IPSK_covname, TT.getObjectFormat()) 1623 : getInstrProfSectionName(IPSK_name, TT.getObjectFormat())); 1624 // On COFF, it's important to reduce the alignment down to 1 to prevent the 1625 // linker from inserting padding before the start of the names section or 1626 // between names entries. 1627 NamesVar->setAlignment(Align(1)); 1628 // NamesVar is used by runtime but not referenced via relocation by other 1629 // sections. Conservatively make it linker retained. 1630 UsedVars.push_back(NamesVar); 1631 1632 for (auto *NamePtr : ReferencedNames) 1633 NamePtr->eraseFromParent(); 1634 } 1635 1636 void InstrLowerer::emitRegistration() { 1637 if (!needsRuntimeRegistrationOfSectionRange(TT)) 1638 return; 1639 1640 // Construct the function. 1641 auto *VoidTy = Type::getVoidTy(M.getContext()); 1642 auto *VoidPtrTy = PointerType::getUnqual(M.getContext()); 1643 auto *Int64Ty = Type::getInt64Ty(M.getContext()); 1644 auto *RegisterFTy = FunctionType::get(VoidTy, false); 1645 auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage, 1646 getInstrProfRegFuncsName(), M); 1647 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 1648 if (Options.NoRedZone) 1649 RegisterF->addFnAttr(Attribute::NoRedZone); 1650 1651 auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false); 1652 auto *RuntimeRegisterF = 1653 Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage, 1654 getInstrProfRegFuncName(), M); 1655 1656 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF)); 1657 for (Value *Data : CompilerUsedVars) 1658 if (!isa<Function>(Data)) 1659 IRB.CreateCall(RuntimeRegisterF, Data); 1660 for (Value *Data : UsedVars) 1661 if (Data != NamesVar && !isa<Function>(Data)) 1662 IRB.CreateCall(RuntimeRegisterF, Data); 1663 1664 if (NamesVar) { 1665 Type *ParamTypes[] = {VoidPtrTy, Int64Ty}; 1666 auto *NamesRegisterTy = 1667 FunctionType::get(VoidTy, ArrayRef(ParamTypes), false); 1668 auto *NamesRegisterF = 1669 Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage, 1670 getInstrProfNamesRegFuncName(), M); 1671 IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)}); 1672 } 1673 1674 IRB.CreateRetVoid(); 1675 } 1676 1677 bool InstrLowerer::emitRuntimeHook() { 1678 // We expect the linker to be invoked with -u<hook_var> flag for Linux 1679 // in which case there is no need to emit the external variable. 1680 if (TT.isOSLinux() || TT.isOSAIX()) 1681 return false; 1682 1683 // If the module's provided its own runtime, we don't need to do anything. 1684 if (M.getGlobalVariable(getInstrProfRuntimeHookVarName())) 1685 return false; 1686 1687 // Declare an external variable that will pull in the runtime initialization. 1688 auto *Int32Ty = Type::getInt32Ty(M.getContext()); 1689 auto *Var = 1690 new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, 1691 nullptr, getInstrProfRuntimeHookVarName()); 1692 Var->setVisibility(GlobalValue::HiddenVisibility); 1693 1694 if (TT.isOSBinFormatELF() && !TT.isPS()) { 1695 // Mark the user variable as used so that it isn't stripped out. 1696 CompilerUsedVars.push_back(Var); 1697 } else { 1698 // Make a function that uses it. 1699 auto *User = Function::Create(FunctionType::get(Int32Ty, false), 1700 GlobalValue::LinkOnceODRLinkage, 1701 getInstrProfRuntimeHookVarUseFuncName(), M); 1702 User->addFnAttr(Attribute::NoInline); 1703 if (Options.NoRedZone) 1704 User->addFnAttr(Attribute::NoRedZone); 1705 User->setVisibility(GlobalValue::HiddenVisibility); 1706 if (TT.supportsCOMDAT()) 1707 User->setComdat(M.getOrInsertComdat(User->getName())); 1708 1709 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", User)); 1710 auto *Load = IRB.CreateLoad(Int32Ty, Var); 1711 IRB.CreateRet(Load); 1712 1713 // Mark the function as used so that it isn't stripped out. 1714 CompilerUsedVars.push_back(User); 1715 } 1716 return true; 1717 } 1718 1719 void InstrLowerer::emitUses() { 1720 // The metadata sections are parallel arrays. Optimizers (e.g. 1721 // GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so 1722 // we conservatively retain all unconditionally in the compiler. 1723 // 1724 // On ELF and Mach-O, the linker can guarantee the associated sections will be 1725 // retained or discarded as a unit, so llvm.compiler.used is sufficient. 1726 // Similarly on COFF, if prof data is not referenced by code we use one comdat 1727 // and ensure this GC property as well. Otherwise, we have to conservatively 1728 // make all of the sections retained by the linker. 1729 if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() || 1730 (TT.isOSBinFormatCOFF() && !profDataReferencedByCode(M))) 1731 appendToCompilerUsed(M, CompilerUsedVars); 1732 else 1733 appendToUsed(M, CompilerUsedVars); 1734 1735 // We do not add proper references from used metadata sections to NamesVar and 1736 // VNodesVar, so we have to be conservative and place them in llvm.used 1737 // regardless of the target, 1738 appendToUsed(M, UsedVars); 1739 } 1740 1741 void InstrLowerer::emitInitialization() { 1742 // Create ProfileFileName variable. Don't don't this for the 1743 // context-sensitive instrumentation lowering: This lowering is after 1744 // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should 1745 // have already create the variable before LTO/ThinLTO linking. 1746 if (!IsCS) 1747 createProfileFileNameVar(M, Options.InstrProfileOutput); 1748 Function *RegisterF = M.getFunction(getInstrProfRegFuncsName()); 1749 if (!RegisterF) 1750 return; 1751 1752 // Create the initialization function. 1753 auto *VoidTy = Type::getVoidTy(M.getContext()); 1754 auto *F = Function::Create(FunctionType::get(VoidTy, false), 1755 GlobalValue::InternalLinkage, 1756 getInstrProfInitFuncName(), M); 1757 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 1758 F->addFnAttr(Attribute::NoInline); 1759 if (Options.NoRedZone) 1760 F->addFnAttr(Attribute::NoRedZone); 1761 1762 // Add the basic block and the necessary calls. 1763 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", F)); 1764 IRB.CreateCall(RegisterF, {}); 1765 IRB.CreateRetVoid(); 1766 1767 appendToGlobalCtors(M, F, 0); 1768 } 1769