1 //===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Instrumentation-based profile-guided optimization 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CodeGenPGO.h" 14 #include "CodeGenFunction.h" 15 #include "CoverageMappingGen.h" 16 #include "clang/AST/RecursiveASTVisitor.h" 17 #include "clang/AST/StmtVisitor.h" 18 #include "llvm/IR/Intrinsics.h" 19 #include "llvm/IR/MDBuilder.h" 20 #include "llvm/Support/CommandLine.h" 21 #include "llvm/Support/Endian.h" 22 #include "llvm/Support/FileSystem.h" 23 #include "llvm/Support/MD5.h" 24 #include <optional> 25 26 static llvm::cl::opt<bool> 27 EnableValueProfiling("enable-value-profiling", 28 llvm::cl::desc("Enable value profiling"), 29 llvm::cl::Hidden, llvm::cl::init(false)); 30 31 extern llvm::cl::opt<bool> SystemHeadersCoverage; 32 33 using namespace clang; 34 using namespace CodeGen; 35 36 void CodeGenPGO::setFuncName(StringRef Name, 37 llvm::GlobalValue::LinkageTypes Linkage) { 38 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader(); 39 FuncName = llvm::getPGOFuncName( 40 Name, Linkage, CGM.getCodeGenOpts().MainFileName, 41 PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version); 42 43 // If we're generating a profile, create a variable for the name. 44 if (CGM.getCodeGenOpts().hasProfileClangInstr()) 45 FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName); 46 } 47 48 void CodeGenPGO::setFuncName(llvm::Function *Fn) { 49 setFuncName(Fn->getName(), Fn->getLinkage()); 50 // Create PGOFuncName meta data. 51 llvm::createPGOFuncNameMetadata(*Fn, FuncName); 52 } 53 54 /// The version of the PGO hash algorithm. 55 enum PGOHashVersion : unsigned { 56 PGO_HASH_V1, 57 PGO_HASH_V2, 58 PGO_HASH_V3, 59 60 // Keep this set to the latest hash version. 61 PGO_HASH_LATEST = PGO_HASH_V3 62 }; 63 64 namespace { 65 /// Stable hasher for PGO region counters. 66 /// 67 /// PGOHash produces a stable hash of a given function's control flow. 68 /// 69 /// Changing the output of this hash will invalidate all previously generated 70 /// profiles -- i.e., don't do it. 71 /// 72 /// \note When this hash does eventually change (years?), we still need to 73 /// support old hashes. We'll need to pull in the version number from the 74 /// profile data format and use the matching hash function. 75 class PGOHash { 76 uint64_t Working; 77 unsigned Count; 78 PGOHashVersion HashVersion; 79 llvm::MD5 MD5; 80 81 static const int NumBitsPerType = 6; 82 static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType; 83 static const unsigned TooBig = 1u << NumBitsPerType; 84 85 public: 86 /// Hash values for AST nodes. 87 /// 88 /// Distinct values for AST nodes that have region counters attached. 89 /// 90 /// These values must be stable. All new members must be added at the end, 91 /// and no members should be removed. Changing the enumeration value for an 92 /// AST node will affect the hash of every function that contains that node. 93 enum HashType : unsigned char { 94 None = 0, 95 LabelStmt = 1, 96 WhileStmt, 97 DoStmt, 98 ForStmt, 99 CXXForRangeStmt, 100 ObjCForCollectionStmt, 101 SwitchStmt, 102 CaseStmt, 103 DefaultStmt, 104 IfStmt, 105 CXXTryStmt, 106 CXXCatchStmt, 107 ConditionalOperator, 108 BinaryOperatorLAnd, 109 BinaryOperatorLOr, 110 BinaryConditionalOperator, 111 // The preceding values are available with PGO_HASH_V1. 112 113 EndOfScope, 114 IfThenBranch, 115 IfElseBranch, 116 GotoStmt, 117 IndirectGotoStmt, 118 BreakStmt, 119 ContinueStmt, 120 ReturnStmt, 121 ThrowExpr, 122 UnaryOperatorLNot, 123 BinaryOperatorLT, 124 BinaryOperatorGT, 125 BinaryOperatorLE, 126 BinaryOperatorGE, 127 BinaryOperatorEQ, 128 BinaryOperatorNE, 129 // The preceding values are available since PGO_HASH_V2. 130 131 // Keep this last. It's for the static assert that follows. 132 LastHashType 133 }; 134 static_assert(LastHashType <= TooBig, "Too many types in HashType"); 135 136 PGOHash(PGOHashVersion HashVersion) 137 : Working(0), Count(0), HashVersion(HashVersion) {} 138 void combine(HashType Type); 139 uint64_t finalize(); 140 PGOHashVersion getHashVersion() const { return HashVersion; } 141 }; 142 const int PGOHash::NumBitsPerType; 143 const unsigned PGOHash::NumTypesPerWord; 144 const unsigned PGOHash::TooBig; 145 146 /// Get the PGO hash version used in the given indexed profile. 147 static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader, 148 CodeGenModule &CGM) { 149 if (PGOReader->getVersion() <= 4) 150 return PGO_HASH_V1; 151 if (PGOReader->getVersion() <= 5) 152 return PGO_HASH_V2; 153 return PGO_HASH_V3; 154 } 155 156 /// A RecursiveASTVisitor that fills a map of statements to PGO counters. 157 struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { 158 using Base = RecursiveASTVisitor<MapRegionCounters>; 159 160 /// The next counter value to assign. 161 unsigned NextCounter; 162 /// The function hash. 163 PGOHash Hash; 164 /// The map of statements to counters. 165 llvm::DenseMap<const Stmt *, unsigned> &CounterMap; 166 /// The next bitmap byte index to assign. 167 unsigned NextMCDCBitmapIdx; 168 /// The map of statements to MC/DC bitmap coverage objects. 169 llvm::DenseMap<const Stmt *, unsigned> &MCDCBitmapMap; 170 /// Maximum number of supported MC/DC conditions in a boolean expression. 171 unsigned MCDCMaxCond; 172 /// The profile version. 173 uint64_t ProfileVersion; 174 /// Diagnostics Engine used to report warnings. 175 DiagnosticsEngine &Diag; 176 177 MapRegionCounters(PGOHashVersion HashVersion, uint64_t ProfileVersion, 178 llvm::DenseMap<const Stmt *, unsigned> &CounterMap, 179 llvm::DenseMap<const Stmt *, unsigned> &MCDCBitmapMap, 180 unsigned MCDCMaxCond, DiagnosticsEngine &Diag) 181 : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap), 182 NextMCDCBitmapIdx(0), MCDCBitmapMap(MCDCBitmapMap), 183 MCDCMaxCond(MCDCMaxCond), ProfileVersion(ProfileVersion), Diag(Diag) {} 184 185 // Blocks and lambdas are handled as separate functions, so we need not 186 // traverse them in the parent context. 187 bool TraverseBlockExpr(BlockExpr *BE) { return true; } 188 bool TraverseLambdaExpr(LambdaExpr *LE) { 189 // Traverse the captures, but not the body. 190 for (auto C : zip(LE->captures(), LE->capture_inits())) 191 TraverseLambdaCapture(LE, &std::get<0>(C), std::get<1>(C)); 192 return true; 193 } 194 bool TraverseCapturedStmt(CapturedStmt *CS) { return true; } 195 196 bool VisitDecl(const Decl *D) { 197 switch (D->getKind()) { 198 default: 199 break; 200 case Decl::Function: 201 case Decl::CXXMethod: 202 case Decl::CXXConstructor: 203 case Decl::CXXDestructor: 204 case Decl::CXXConversion: 205 case Decl::ObjCMethod: 206 case Decl::Block: 207 case Decl::Captured: 208 CounterMap[D->getBody()] = NextCounter++; 209 break; 210 } 211 return true; 212 } 213 214 /// If \p S gets a fresh counter, update the counter mappings. Return the 215 /// V1 hash of \p S. 216 PGOHash::HashType updateCounterMappings(Stmt *S) { 217 auto Type = getHashType(PGO_HASH_V1, S); 218 if (Type != PGOHash::None) 219 CounterMap[S] = NextCounter++; 220 return Type; 221 } 222 223 /// The following stacks are used with dataTraverseStmtPre() and 224 /// dataTraverseStmtPost() to track the depth of nested logical operators in a 225 /// boolean expression in a function. The ultimate purpose is to keep track 226 /// of the number of leaf-level conditions in the boolean expression so that a 227 /// profile bitmap can be allocated based on that number. 228 /// 229 /// The stacks are also used to find error cases and notify the user. A 230 /// standard logical operator nest for a boolean expression could be in a form 231 /// similar to this: "x = a && b && c && (d || f)" 232 unsigned NumCond = 0; 233 bool SplitNestedLogicalOp = false; 234 SmallVector<const Stmt *, 16> NonLogOpStack; 235 SmallVector<const BinaryOperator *, 16> LogOpStack; 236 237 // Hook: dataTraverseStmtPre() is invoked prior to visiting an AST Stmt node. 238 bool dataTraverseStmtPre(Stmt *S) { 239 /// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing. 240 if (MCDCMaxCond == 0) 241 return true; 242 243 /// At the top of the logical operator nest, reset the number of conditions, 244 /// also forget previously seen split nesting cases. 245 if (LogOpStack.empty()) { 246 NumCond = 0; 247 SplitNestedLogicalOp = false; 248 } 249 250 if (const Expr *E = dyn_cast<Expr>(S)) { 251 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(E->IgnoreParens()); 252 if (BinOp && BinOp->isLogicalOp()) { 253 /// Check for "split-nested" logical operators. This happens when a new 254 /// boolean expression logical-op nest is encountered within an existing 255 /// boolean expression, separated by a non-logical operator. For 256 /// example, in "x = (a && b && c && foo(d && f))", the "d && f" case 257 /// starts a new boolean expression that is separated from the other 258 /// conditions by the operator foo(). Split-nested cases are not 259 /// supported by MC/DC. 260 SplitNestedLogicalOp = SplitNestedLogicalOp || !NonLogOpStack.empty(); 261 262 LogOpStack.push_back(BinOp); 263 return true; 264 } 265 } 266 267 /// Keep track of non-logical operators. These are OK as long as we don't 268 /// encounter a new logical operator after seeing one. 269 if (!LogOpStack.empty()) 270 NonLogOpStack.push_back(S); 271 272 return true; 273 } 274 275 // Hook: dataTraverseStmtPost() is invoked by the AST visitor after visiting 276 // an AST Stmt node. MC/DC will use it to to signal when the top of a 277 // logical operation (boolean expression) nest is encountered. 278 bool dataTraverseStmtPost(Stmt *S) { 279 /// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing. 280 if (MCDCMaxCond == 0) 281 return true; 282 283 if (const Expr *E = dyn_cast<Expr>(S)) { 284 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(E->IgnoreParens()); 285 if (BinOp && BinOp->isLogicalOp()) { 286 assert(LogOpStack.back() == BinOp); 287 LogOpStack.pop_back(); 288 289 /// At the top of logical operator nest: 290 if (LogOpStack.empty()) { 291 /// Was the "split-nested" logical operator case encountered? 292 if (SplitNestedLogicalOp) { 293 unsigned DiagID = Diag.getCustomDiagID( 294 DiagnosticsEngine::Warning, 295 "unsupported MC/DC boolean expression; " 296 "contains an operation with a nested boolean expression. " 297 "Expression will not be covered"); 298 Diag.Report(S->getBeginLoc(), DiagID); 299 return true; 300 } 301 302 /// Was the maximum number of conditions encountered? 303 if (NumCond > MCDCMaxCond) { 304 unsigned DiagID = Diag.getCustomDiagID( 305 DiagnosticsEngine::Warning, 306 "unsupported MC/DC boolean expression; " 307 "number of conditions (%0) exceeds max (%1). " 308 "Expression will not be covered"); 309 Diag.Report(S->getBeginLoc(), DiagID) << NumCond << MCDCMaxCond; 310 return true; 311 } 312 313 // Otherwise, allocate the number of bytes required for the bitmap 314 // based on the number of conditions. Must be at least 1-byte long. 315 MCDCBitmapMap[BinOp] = NextMCDCBitmapIdx; 316 unsigned SizeInBits = std::max<unsigned>(1L << NumCond, CHAR_BIT); 317 NextMCDCBitmapIdx += SizeInBits / CHAR_BIT; 318 } 319 return true; 320 } 321 } 322 323 if (!LogOpStack.empty()) 324 NonLogOpStack.pop_back(); 325 326 return true; 327 } 328 329 /// The RHS of all logical operators gets a fresh counter in order to count 330 /// how many times the RHS evaluates to true or false, depending on the 331 /// semantics of the operator. This is only valid for ">= v7" of the profile 332 /// version so that we facilitate backward compatibility. In addition, in 333 /// order to use MC/DC, count the number of total LHS and RHS conditions. 334 bool VisitBinaryOperator(BinaryOperator *S) { 335 if (S->isLogicalOp()) { 336 if (CodeGenFunction::isInstrumentedCondition(S->getLHS())) 337 NumCond++; 338 339 if (CodeGenFunction::isInstrumentedCondition(S->getRHS())) { 340 if (ProfileVersion >= llvm::IndexedInstrProf::Version7) 341 CounterMap[S->getRHS()] = NextCounter++; 342 343 NumCond++; 344 } 345 } 346 return Base::VisitBinaryOperator(S); 347 } 348 349 /// Include \p S in the function hash. 350 bool VisitStmt(Stmt *S) { 351 auto Type = updateCounterMappings(S); 352 if (Hash.getHashVersion() != PGO_HASH_V1) 353 Type = getHashType(Hash.getHashVersion(), S); 354 if (Type != PGOHash::None) 355 Hash.combine(Type); 356 return true; 357 } 358 359 bool TraverseIfStmt(IfStmt *If) { 360 // If we used the V1 hash, use the default traversal. 361 if (Hash.getHashVersion() == PGO_HASH_V1) 362 return Base::TraverseIfStmt(If); 363 364 // Otherwise, keep track of which branch we're in while traversing. 365 VisitStmt(If); 366 for (Stmt *CS : If->children()) { 367 if (!CS) 368 continue; 369 if (CS == If->getThen()) 370 Hash.combine(PGOHash::IfThenBranch); 371 else if (CS == If->getElse()) 372 Hash.combine(PGOHash::IfElseBranch); 373 TraverseStmt(CS); 374 } 375 Hash.combine(PGOHash::EndOfScope); 376 return true; 377 } 378 379 // If the statement type \p N is nestable, and its nesting impacts profile 380 // stability, define a custom traversal which tracks the end of the statement 381 // in the hash (provided we're not using the V1 hash). 382 #define DEFINE_NESTABLE_TRAVERSAL(N) \ 383 bool Traverse##N(N *S) { \ 384 Base::Traverse##N(S); \ 385 if (Hash.getHashVersion() != PGO_HASH_V1) \ 386 Hash.combine(PGOHash::EndOfScope); \ 387 return true; \ 388 } 389 390 DEFINE_NESTABLE_TRAVERSAL(WhileStmt) 391 DEFINE_NESTABLE_TRAVERSAL(DoStmt) 392 DEFINE_NESTABLE_TRAVERSAL(ForStmt) 393 DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt) 394 DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt) 395 DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt) 396 DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt) 397 398 /// Get version \p HashVersion of the PGO hash for \p S. 399 PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) { 400 switch (S->getStmtClass()) { 401 default: 402 break; 403 case Stmt::LabelStmtClass: 404 return PGOHash::LabelStmt; 405 case Stmt::WhileStmtClass: 406 return PGOHash::WhileStmt; 407 case Stmt::DoStmtClass: 408 return PGOHash::DoStmt; 409 case Stmt::ForStmtClass: 410 return PGOHash::ForStmt; 411 case Stmt::CXXForRangeStmtClass: 412 return PGOHash::CXXForRangeStmt; 413 case Stmt::ObjCForCollectionStmtClass: 414 return PGOHash::ObjCForCollectionStmt; 415 case Stmt::SwitchStmtClass: 416 return PGOHash::SwitchStmt; 417 case Stmt::CaseStmtClass: 418 return PGOHash::CaseStmt; 419 case Stmt::DefaultStmtClass: 420 return PGOHash::DefaultStmt; 421 case Stmt::IfStmtClass: 422 return PGOHash::IfStmt; 423 case Stmt::CXXTryStmtClass: 424 return PGOHash::CXXTryStmt; 425 case Stmt::CXXCatchStmtClass: 426 return PGOHash::CXXCatchStmt; 427 case Stmt::ConditionalOperatorClass: 428 return PGOHash::ConditionalOperator; 429 case Stmt::BinaryConditionalOperatorClass: 430 return PGOHash::BinaryConditionalOperator; 431 case Stmt::BinaryOperatorClass: { 432 const BinaryOperator *BO = cast<BinaryOperator>(S); 433 if (BO->getOpcode() == BO_LAnd) 434 return PGOHash::BinaryOperatorLAnd; 435 if (BO->getOpcode() == BO_LOr) 436 return PGOHash::BinaryOperatorLOr; 437 if (HashVersion >= PGO_HASH_V2) { 438 switch (BO->getOpcode()) { 439 default: 440 break; 441 case BO_LT: 442 return PGOHash::BinaryOperatorLT; 443 case BO_GT: 444 return PGOHash::BinaryOperatorGT; 445 case BO_LE: 446 return PGOHash::BinaryOperatorLE; 447 case BO_GE: 448 return PGOHash::BinaryOperatorGE; 449 case BO_EQ: 450 return PGOHash::BinaryOperatorEQ; 451 case BO_NE: 452 return PGOHash::BinaryOperatorNE; 453 } 454 } 455 break; 456 } 457 } 458 459 if (HashVersion >= PGO_HASH_V2) { 460 switch (S->getStmtClass()) { 461 default: 462 break; 463 case Stmt::GotoStmtClass: 464 return PGOHash::GotoStmt; 465 case Stmt::IndirectGotoStmtClass: 466 return PGOHash::IndirectGotoStmt; 467 case Stmt::BreakStmtClass: 468 return PGOHash::BreakStmt; 469 case Stmt::ContinueStmtClass: 470 return PGOHash::ContinueStmt; 471 case Stmt::ReturnStmtClass: 472 return PGOHash::ReturnStmt; 473 case Stmt::CXXThrowExprClass: 474 return PGOHash::ThrowExpr; 475 case Stmt::UnaryOperatorClass: { 476 const UnaryOperator *UO = cast<UnaryOperator>(S); 477 if (UO->getOpcode() == UO_LNot) 478 return PGOHash::UnaryOperatorLNot; 479 break; 480 } 481 } 482 } 483 484 return PGOHash::None; 485 } 486 }; 487 488 /// A StmtVisitor that propagates the raw counts through the AST and 489 /// records the count at statements where the value may change. 490 struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> { 491 /// PGO state. 492 CodeGenPGO &PGO; 493 494 /// A flag that is set when the current count should be recorded on the 495 /// next statement, such as at the exit of a loop. 496 bool RecordNextStmtCount; 497 498 /// The count at the current location in the traversal. 499 uint64_t CurrentCount; 500 501 /// The map of statements to count values. 502 llvm::DenseMap<const Stmt *, uint64_t> &CountMap; 503 504 /// BreakContinueStack - Keep counts of breaks and continues inside loops. 505 struct BreakContinue { 506 uint64_t BreakCount = 0; 507 uint64_t ContinueCount = 0; 508 BreakContinue() = default; 509 }; 510 SmallVector<BreakContinue, 8> BreakContinueStack; 511 512 ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap, 513 CodeGenPGO &PGO) 514 : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {} 515 516 void RecordStmtCount(const Stmt *S) { 517 if (RecordNextStmtCount) { 518 CountMap[S] = CurrentCount; 519 RecordNextStmtCount = false; 520 } 521 } 522 523 /// Set and return the current count. 524 uint64_t setCount(uint64_t Count) { 525 CurrentCount = Count; 526 return Count; 527 } 528 529 void VisitStmt(const Stmt *S) { 530 RecordStmtCount(S); 531 for (const Stmt *Child : S->children()) 532 if (Child) 533 this->Visit(Child); 534 } 535 536 void VisitFunctionDecl(const FunctionDecl *D) { 537 // Counter tracks entry to the function body. 538 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody())); 539 CountMap[D->getBody()] = BodyCount; 540 Visit(D->getBody()); 541 } 542 543 // Skip lambda expressions. We visit these as FunctionDecls when we're 544 // generating them and aren't interested in the body when generating a 545 // parent context. 546 void VisitLambdaExpr(const LambdaExpr *LE) {} 547 548 void VisitCapturedDecl(const CapturedDecl *D) { 549 // Counter tracks entry to the capture body. 550 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody())); 551 CountMap[D->getBody()] = BodyCount; 552 Visit(D->getBody()); 553 } 554 555 void VisitObjCMethodDecl(const ObjCMethodDecl *D) { 556 // Counter tracks entry to the method body. 557 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody())); 558 CountMap[D->getBody()] = BodyCount; 559 Visit(D->getBody()); 560 } 561 562 void VisitBlockDecl(const BlockDecl *D) { 563 // Counter tracks entry to the block body. 564 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody())); 565 CountMap[D->getBody()] = BodyCount; 566 Visit(D->getBody()); 567 } 568 569 void VisitReturnStmt(const ReturnStmt *S) { 570 RecordStmtCount(S); 571 if (S->getRetValue()) 572 Visit(S->getRetValue()); 573 CurrentCount = 0; 574 RecordNextStmtCount = true; 575 } 576 577 void VisitCXXThrowExpr(const CXXThrowExpr *E) { 578 RecordStmtCount(E); 579 if (E->getSubExpr()) 580 Visit(E->getSubExpr()); 581 CurrentCount = 0; 582 RecordNextStmtCount = true; 583 } 584 585 void VisitGotoStmt(const GotoStmt *S) { 586 RecordStmtCount(S); 587 CurrentCount = 0; 588 RecordNextStmtCount = true; 589 } 590 591 void VisitLabelStmt(const LabelStmt *S) { 592 RecordNextStmtCount = false; 593 // Counter tracks the block following the label. 594 uint64_t BlockCount = setCount(PGO.getRegionCount(S)); 595 CountMap[S] = BlockCount; 596 Visit(S->getSubStmt()); 597 } 598 599 void VisitBreakStmt(const BreakStmt *S) { 600 RecordStmtCount(S); 601 assert(!BreakContinueStack.empty() && "break not in a loop or switch!"); 602 BreakContinueStack.back().BreakCount += CurrentCount; 603 CurrentCount = 0; 604 RecordNextStmtCount = true; 605 } 606 607 void VisitContinueStmt(const ContinueStmt *S) { 608 RecordStmtCount(S); 609 assert(!BreakContinueStack.empty() && "continue stmt not in a loop!"); 610 BreakContinueStack.back().ContinueCount += CurrentCount; 611 CurrentCount = 0; 612 RecordNextStmtCount = true; 613 } 614 615 void VisitWhileStmt(const WhileStmt *S) { 616 RecordStmtCount(S); 617 uint64_t ParentCount = CurrentCount; 618 619 BreakContinueStack.push_back(BreakContinue()); 620 // Visit the body region first so the break/continue adjustments can be 621 // included when visiting the condition. 622 uint64_t BodyCount = setCount(PGO.getRegionCount(S)); 623 CountMap[S->getBody()] = CurrentCount; 624 Visit(S->getBody()); 625 uint64_t BackedgeCount = CurrentCount; 626 627 // ...then go back and propagate counts through the condition. The count 628 // at the start of the condition is the sum of the incoming edges, 629 // the backedge from the end of the loop body, and the edges from 630 // continue statements. 631 BreakContinue BC = BreakContinueStack.pop_back_val(); 632 uint64_t CondCount = 633 setCount(ParentCount + BackedgeCount + BC.ContinueCount); 634 CountMap[S->getCond()] = CondCount; 635 Visit(S->getCond()); 636 setCount(BC.BreakCount + CondCount - BodyCount); 637 RecordNextStmtCount = true; 638 } 639 640 void VisitDoStmt(const DoStmt *S) { 641 RecordStmtCount(S); 642 uint64_t LoopCount = PGO.getRegionCount(S); 643 644 BreakContinueStack.push_back(BreakContinue()); 645 // The count doesn't include the fallthrough from the parent scope. Add it. 646 uint64_t BodyCount = setCount(LoopCount + CurrentCount); 647 CountMap[S->getBody()] = BodyCount; 648 Visit(S->getBody()); 649 uint64_t BackedgeCount = CurrentCount; 650 651 BreakContinue BC = BreakContinueStack.pop_back_val(); 652 // The count at the start of the condition is equal to the count at the 653 // end of the body, plus any continues. 654 uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount); 655 CountMap[S->getCond()] = CondCount; 656 Visit(S->getCond()); 657 setCount(BC.BreakCount + CondCount - LoopCount); 658 RecordNextStmtCount = true; 659 } 660 661 void VisitForStmt(const ForStmt *S) { 662 RecordStmtCount(S); 663 if (S->getInit()) 664 Visit(S->getInit()); 665 666 uint64_t ParentCount = CurrentCount; 667 668 BreakContinueStack.push_back(BreakContinue()); 669 // Visit the body region first. (This is basically the same as a while 670 // loop; see further comments in VisitWhileStmt.) 671 uint64_t BodyCount = setCount(PGO.getRegionCount(S)); 672 CountMap[S->getBody()] = BodyCount; 673 Visit(S->getBody()); 674 uint64_t BackedgeCount = CurrentCount; 675 BreakContinue BC = BreakContinueStack.pop_back_val(); 676 677 // The increment is essentially part of the body but it needs to include 678 // the count for all the continue statements. 679 if (S->getInc()) { 680 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount); 681 CountMap[S->getInc()] = IncCount; 682 Visit(S->getInc()); 683 } 684 685 // ...then go back and propagate counts through the condition. 686 uint64_t CondCount = 687 setCount(ParentCount + BackedgeCount + BC.ContinueCount); 688 if (S->getCond()) { 689 CountMap[S->getCond()] = CondCount; 690 Visit(S->getCond()); 691 } 692 setCount(BC.BreakCount + CondCount - BodyCount); 693 RecordNextStmtCount = true; 694 } 695 696 void VisitCXXForRangeStmt(const CXXForRangeStmt *S) { 697 RecordStmtCount(S); 698 if (S->getInit()) 699 Visit(S->getInit()); 700 Visit(S->getLoopVarStmt()); 701 Visit(S->getRangeStmt()); 702 Visit(S->getBeginStmt()); 703 Visit(S->getEndStmt()); 704 705 uint64_t ParentCount = CurrentCount; 706 BreakContinueStack.push_back(BreakContinue()); 707 // Visit the body region first. (This is basically the same as a while 708 // loop; see further comments in VisitWhileStmt.) 709 uint64_t BodyCount = setCount(PGO.getRegionCount(S)); 710 CountMap[S->getBody()] = BodyCount; 711 Visit(S->getBody()); 712 uint64_t BackedgeCount = CurrentCount; 713 BreakContinue BC = BreakContinueStack.pop_back_val(); 714 715 // The increment is essentially part of the body but it needs to include 716 // the count for all the continue statements. 717 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount); 718 CountMap[S->getInc()] = IncCount; 719 Visit(S->getInc()); 720 721 // ...then go back and propagate counts through the condition. 722 uint64_t CondCount = 723 setCount(ParentCount + BackedgeCount + BC.ContinueCount); 724 CountMap[S->getCond()] = CondCount; 725 Visit(S->getCond()); 726 setCount(BC.BreakCount + CondCount - BodyCount); 727 RecordNextStmtCount = true; 728 } 729 730 void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) { 731 RecordStmtCount(S); 732 Visit(S->getElement()); 733 uint64_t ParentCount = CurrentCount; 734 BreakContinueStack.push_back(BreakContinue()); 735 // Counter tracks the body of the loop. 736 uint64_t BodyCount = setCount(PGO.getRegionCount(S)); 737 CountMap[S->getBody()] = BodyCount; 738 Visit(S->getBody()); 739 uint64_t BackedgeCount = CurrentCount; 740 BreakContinue BC = BreakContinueStack.pop_back_val(); 741 742 setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount - 743 BodyCount); 744 RecordNextStmtCount = true; 745 } 746 747 void VisitSwitchStmt(const SwitchStmt *S) { 748 RecordStmtCount(S); 749 if (S->getInit()) 750 Visit(S->getInit()); 751 Visit(S->getCond()); 752 CurrentCount = 0; 753 BreakContinueStack.push_back(BreakContinue()); 754 Visit(S->getBody()); 755 // If the switch is inside a loop, add the continue counts. 756 BreakContinue BC = BreakContinueStack.pop_back_val(); 757 if (!BreakContinueStack.empty()) 758 BreakContinueStack.back().ContinueCount += BC.ContinueCount; 759 // Counter tracks the exit block of the switch. 760 setCount(PGO.getRegionCount(S)); 761 RecordNextStmtCount = true; 762 } 763 764 void VisitSwitchCase(const SwitchCase *S) { 765 RecordNextStmtCount = false; 766 // Counter for this particular case. This counts only jumps from the 767 // switch header and does not include fallthrough from the case before 768 // this one. 769 uint64_t CaseCount = PGO.getRegionCount(S); 770 setCount(CurrentCount + CaseCount); 771 // We need the count without fallthrough in the mapping, so it's more useful 772 // for branch probabilities. 773 CountMap[S] = CaseCount; 774 RecordNextStmtCount = true; 775 Visit(S->getSubStmt()); 776 } 777 778 void VisitIfStmt(const IfStmt *S) { 779 RecordStmtCount(S); 780 781 if (S->isConsteval()) { 782 const Stmt *Stm = S->isNegatedConsteval() ? S->getThen() : S->getElse(); 783 if (Stm) 784 Visit(Stm); 785 return; 786 } 787 788 uint64_t ParentCount = CurrentCount; 789 if (S->getInit()) 790 Visit(S->getInit()); 791 Visit(S->getCond()); 792 793 // Counter tracks the "then" part of an if statement. The count for 794 // the "else" part, if it exists, will be calculated from this counter. 795 uint64_t ThenCount = setCount(PGO.getRegionCount(S)); 796 CountMap[S->getThen()] = ThenCount; 797 Visit(S->getThen()); 798 uint64_t OutCount = CurrentCount; 799 800 uint64_t ElseCount = ParentCount - ThenCount; 801 if (S->getElse()) { 802 setCount(ElseCount); 803 CountMap[S->getElse()] = ElseCount; 804 Visit(S->getElse()); 805 OutCount += CurrentCount; 806 } else 807 OutCount += ElseCount; 808 setCount(OutCount); 809 RecordNextStmtCount = true; 810 } 811 812 void VisitCXXTryStmt(const CXXTryStmt *S) { 813 RecordStmtCount(S); 814 Visit(S->getTryBlock()); 815 for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I) 816 Visit(S->getHandler(I)); 817 // Counter tracks the continuation block of the try statement. 818 setCount(PGO.getRegionCount(S)); 819 RecordNextStmtCount = true; 820 } 821 822 void VisitCXXCatchStmt(const CXXCatchStmt *S) { 823 RecordNextStmtCount = false; 824 // Counter tracks the catch statement's handler block. 825 uint64_t CatchCount = setCount(PGO.getRegionCount(S)); 826 CountMap[S] = CatchCount; 827 Visit(S->getHandlerBlock()); 828 } 829 830 void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { 831 RecordStmtCount(E); 832 uint64_t ParentCount = CurrentCount; 833 Visit(E->getCond()); 834 835 // Counter tracks the "true" part of a conditional operator. The 836 // count in the "false" part will be calculated from this counter. 837 uint64_t TrueCount = setCount(PGO.getRegionCount(E)); 838 CountMap[E->getTrueExpr()] = TrueCount; 839 Visit(E->getTrueExpr()); 840 uint64_t OutCount = CurrentCount; 841 842 uint64_t FalseCount = setCount(ParentCount - TrueCount); 843 CountMap[E->getFalseExpr()] = FalseCount; 844 Visit(E->getFalseExpr()); 845 OutCount += CurrentCount; 846 847 setCount(OutCount); 848 RecordNextStmtCount = true; 849 } 850 851 void VisitBinLAnd(const BinaryOperator *E) { 852 RecordStmtCount(E); 853 uint64_t ParentCount = CurrentCount; 854 Visit(E->getLHS()); 855 // Counter tracks the right hand side of a logical and operator. 856 uint64_t RHSCount = setCount(PGO.getRegionCount(E)); 857 CountMap[E->getRHS()] = RHSCount; 858 Visit(E->getRHS()); 859 setCount(ParentCount + RHSCount - CurrentCount); 860 RecordNextStmtCount = true; 861 } 862 863 void VisitBinLOr(const BinaryOperator *E) { 864 RecordStmtCount(E); 865 uint64_t ParentCount = CurrentCount; 866 Visit(E->getLHS()); 867 // Counter tracks the right hand side of a logical or operator. 868 uint64_t RHSCount = setCount(PGO.getRegionCount(E)); 869 CountMap[E->getRHS()] = RHSCount; 870 Visit(E->getRHS()); 871 setCount(ParentCount + RHSCount - CurrentCount); 872 RecordNextStmtCount = true; 873 } 874 }; 875 } // end anonymous namespace 876 877 void PGOHash::combine(HashType Type) { 878 // Check that we never combine 0 and only have six bits. 879 assert(Type && "Hash is invalid: unexpected type 0"); 880 assert(unsigned(Type) < TooBig && "Hash is invalid: too many types"); 881 882 // Pass through MD5 if enough work has built up. 883 if (Count && Count % NumTypesPerWord == 0) { 884 using namespace llvm::support; 885 uint64_t Swapped = 886 endian::byte_swap<uint64_t, llvm::endianness::little>(Working); 887 MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); 888 Working = 0; 889 } 890 891 // Accumulate the current type. 892 ++Count; 893 Working = Working << NumBitsPerType | Type; 894 } 895 896 uint64_t PGOHash::finalize() { 897 // Use Working as the hash directly if we never used MD5. 898 if (Count <= NumTypesPerWord) 899 // No need to byte swap here, since none of the math was endian-dependent. 900 // This number will be byte-swapped as required on endianness transitions, 901 // so we will see the same value on the other side. 902 return Working; 903 904 // Check for remaining work in Working. 905 if (Working) { 906 // Keep the buggy behavior from v1 and v2 for backward-compatibility. This 907 // is buggy because it converts a uint64_t into an array of uint8_t. 908 if (HashVersion < PGO_HASH_V3) { 909 MD5.update({(uint8_t)Working}); 910 } else { 911 using namespace llvm::support; 912 uint64_t Swapped = 913 endian::byte_swap<uint64_t, llvm::endianness::little>(Working); 914 MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); 915 } 916 } 917 918 // Finalize the MD5 and return the hash. 919 llvm::MD5::MD5Result Result; 920 MD5.final(Result); 921 return Result.low(); 922 } 923 924 void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) { 925 const Decl *D = GD.getDecl(); 926 if (!D->hasBody()) 927 return; 928 929 // Skip CUDA/HIP kernel launch stub functions. 930 if (CGM.getLangOpts().CUDA && !CGM.getLangOpts().CUDAIsDevice && 931 D->hasAttr<CUDAGlobalAttr>()) 932 return; 933 934 bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr(); 935 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader(); 936 if (!InstrumentRegions && !PGOReader) 937 return; 938 if (D->isImplicit()) 939 return; 940 // Constructors and destructors may be represented by several functions in IR. 941 // If so, instrument only base variant, others are implemented by delegation 942 // to the base one, it would be counted twice otherwise. 943 if (CGM.getTarget().getCXXABI().hasConstructorVariants()) { 944 if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D)) 945 if (GD.getCtorType() != Ctor_Base && 946 CodeGenFunction::IsConstructorDelegationValid(CCD)) 947 return; 948 } 949 if (isa<CXXDestructorDecl>(D) && GD.getDtorType() != Dtor_Base) 950 return; 951 952 CGM.ClearUnusedCoverageMapping(D); 953 if (Fn->hasFnAttribute(llvm::Attribute::NoProfile)) 954 return; 955 if (Fn->hasFnAttribute(llvm::Attribute::SkipProfile)) 956 return; 957 958 setFuncName(Fn); 959 960 mapRegionCounters(D); 961 if (CGM.getCodeGenOpts().CoverageMapping) 962 emitCounterRegionMapping(D); 963 if (PGOReader) { 964 SourceManager &SM = CGM.getContext().getSourceManager(); 965 loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation())); 966 computeRegionCounts(D); 967 applyFunctionAttributes(PGOReader, Fn); 968 } 969 } 970 971 void CodeGenPGO::mapRegionCounters(const Decl *D) { 972 // Use the latest hash version when inserting instrumentation, but use the 973 // version in the indexed profile if we're reading PGO data. 974 PGOHashVersion HashVersion = PGO_HASH_LATEST; 975 uint64_t ProfileVersion = llvm::IndexedInstrProf::Version; 976 if (auto *PGOReader = CGM.getPGOReader()) { 977 HashVersion = getPGOHashVersion(PGOReader, CGM); 978 ProfileVersion = PGOReader->getVersion(); 979 } 980 981 // If MC/DC is enabled, set the MaxConditions to a preset value. Otherwise, 982 // set it to zero. This value impacts the number of conditions accepted in a 983 // given boolean expression, which impacts the size of the bitmap used to 984 // track test vector execution for that boolean expression. Because the 985 // bitmap scales exponentially (2^n) based on the number of conditions seen, 986 // the maximum value is hard-coded at 6 conditions, which is more than enough 987 // for most embedded applications. Setting a maximum value prevents the 988 // bitmap footprint from growing too large without the user's knowledge. In 989 // the future, this value could be adjusted with a command-line option. 990 unsigned MCDCMaxConditions = (CGM.getCodeGenOpts().MCDCCoverage) ? 6 : 0; 991 992 RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>); 993 RegionMCDCBitmapMap.reset(new llvm::DenseMap<const Stmt *, unsigned>); 994 MapRegionCounters Walker(HashVersion, ProfileVersion, *RegionCounterMap, 995 *RegionMCDCBitmapMap, MCDCMaxConditions, 996 CGM.getDiags()); 997 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) 998 Walker.TraverseDecl(const_cast<FunctionDecl *>(FD)); 999 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D)) 1000 Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD)); 1001 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D)) 1002 Walker.TraverseDecl(const_cast<BlockDecl *>(BD)); 1003 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D)) 1004 Walker.TraverseDecl(const_cast<CapturedDecl *>(CD)); 1005 assert(Walker.NextCounter > 0 && "no entry counter mapped for decl"); 1006 NumRegionCounters = Walker.NextCounter; 1007 MCDCBitmapBytes = Walker.NextMCDCBitmapIdx; 1008 FunctionHash = Walker.Hash.finalize(); 1009 } 1010 1011 bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) { 1012 if (!D->getBody()) 1013 return true; 1014 1015 // Skip host-only functions in the CUDA device compilation and device-only 1016 // functions in the host compilation. Just roughly filter them out based on 1017 // the function attributes. If there are effectively host-only or device-only 1018 // ones, their coverage mapping may still be generated. 1019 if (CGM.getLangOpts().CUDA && 1020 ((CGM.getLangOpts().CUDAIsDevice && !D->hasAttr<CUDADeviceAttr>() && 1021 !D->hasAttr<CUDAGlobalAttr>()) || 1022 (!CGM.getLangOpts().CUDAIsDevice && 1023 (D->hasAttr<CUDAGlobalAttr>() || 1024 (!D->hasAttr<CUDAHostAttr>() && D->hasAttr<CUDADeviceAttr>()))))) 1025 return true; 1026 1027 // Don't map the functions in system headers. 1028 const auto &SM = CGM.getContext().getSourceManager(); 1029 auto Loc = D->getBody()->getBeginLoc(); 1030 return !SystemHeadersCoverage && SM.isInSystemHeader(Loc); 1031 } 1032 1033 void CodeGenPGO::emitCounterRegionMapping(const Decl *D) { 1034 if (skipRegionMappingForDecl(D)) 1035 return; 1036 1037 std::string CoverageMapping; 1038 llvm::raw_string_ostream OS(CoverageMapping); 1039 RegionCondIDMap.reset(new llvm::DenseMap<const Stmt *, unsigned>); 1040 CoverageMappingGen MappingGen( 1041 *CGM.getCoverageMapping(), CGM.getContext().getSourceManager(), 1042 CGM.getLangOpts(), RegionCounterMap.get(), RegionMCDCBitmapMap.get(), 1043 RegionCondIDMap.get()); 1044 MappingGen.emitCounterMapping(D, OS); 1045 OS.flush(); 1046 1047 if (CoverageMapping.empty()) 1048 return; 1049 1050 CGM.getCoverageMapping()->addFunctionMappingRecord( 1051 FuncNameVar, FuncName, FunctionHash, CoverageMapping); 1052 } 1053 1054 void 1055 CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name, 1056 llvm::GlobalValue::LinkageTypes Linkage) { 1057 if (skipRegionMappingForDecl(D)) 1058 return; 1059 1060 std::string CoverageMapping; 1061 llvm::raw_string_ostream OS(CoverageMapping); 1062 CoverageMappingGen MappingGen(*CGM.getCoverageMapping(), 1063 CGM.getContext().getSourceManager(), 1064 CGM.getLangOpts()); 1065 MappingGen.emitEmptyMapping(D, OS); 1066 OS.flush(); 1067 1068 if (CoverageMapping.empty()) 1069 return; 1070 1071 setFuncName(Name, Linkage); 1072 CGM.getCoverageMapping()->addFunctionMappingRecord( 1073 FuncNameVar, FuncName, FunctionHash, CoverageMapping, false); 1074 } 1075 1076 void CodeGenPGO::computeRegionCounts(const Decl *D) { 1077 StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>); 1078 ComputeRegionCounts Walker(*StmtCountMap, *this); 1079 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) 1080 Walker.VisitFunctionDecl(FD); 1081 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D)) 1082 Walker.VisitObjCMethodDecl(MD); 1083 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D)) 1084 Walker.VisitBlockDecl(BD); 1085 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D)) 1086 Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD)); 1087 } 1088 1089 void 1090 CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader, 1091 llvm::Function *Fn) { 1092 if (!haveRegionCounts()) 1093 return; 1094 1095 uint64_t FunctionCount = getRegionCount(nullptr); 1096 Fn->setEntryCount(FunctionCount); 1097 } 1098 1099 void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S, 1100 llvm::Value *StepV) { 1101 if (!RegionCounterMap || !Builder.GetInsertBlock()) 1102 return; 1103 1104 unsigned Counter = (*RegionCounterMap)[S]; 1105 1106 llvm::Value *Args[] = {FuncNameVar, 1107 Builder.getInt64(FunctionHash), 1108 Builder.getInt32(NumRegionCounters), 1109 Builder.getInt32(Counter), StepV}; 1110 if (!StepV) 1111 Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment), 1112 ArrayRef(Args, 4)); 1113 else 1114 Builder.CreateCall( 1115 CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step), 1116 ArrayRef(Args)); 1117 } 1118 1119 bool CodeGenPGO::canEmitMCDCCoverage(const CGBuilderTy &Builder) { 1120 return (CGM.getCodeGenOpts().hasProfileClangInstr() && 1121 CGM.getCodeGenOpts().MCDCCoverage && Builder.GetInsertBlock()); 1122 } 1123 1124 void CodeGenPGO::emitMCDCParameters(CGBuilderTy &Builder) { 1125 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCBitmapMap) 1126 return; 1127 1128 auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); 1129 1130 // Emit intrinsic representing MCDC bitmap parameters at function entry. 1131 // This is used by the instrumentation pass, but it isn't actually lowered to 1132 // anything. 1133 llvm::Value *Args[3] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1134 Builder.getInt64(FunctionHash), 1135 Builder.getInt32(MCDCBitmapBytes)}; 1136 Builder.CreateCall( 1137 CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_parameters), Args); 1138 } 1139 1140 void CodeGenPGO::emitMCDCTestVectorBitmapUpdate(CGBuilderTy &Builder, 1141 const Expr *S, 1142 Address MCDCCondBitmapAddr) { 1143 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCBitmapMap) 1144 return; 1145 1146 S = S->IgnoreParens(); 1147 1148 auto ExprMCDCBitmapMapIterator = RegionMCDCBitmapMap->find(S); 1149 if (ExprMCDCBitmapMapIterator == RegionMCDCBitmapMap->end()) 1150 return; 1151 1152 // Extract the ID of the global bitmap associated with this expression. 1153 unsigned MCDCTestVectorBitmapID = ExprMCDCBitmapMapIterator->second; 1154 auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); 1155 1156 // Emit intrinsic responsible for updating the global bitmap corresponding to 1157 // a boolean expression. The index being set is based on the value loaded 1158 // from a pointer to a dedicated temporary value on the stack that is itself 1159 // updated via emitMCDCCondBitmapReset() and emitMCDCCondBitmapUpdate(). The 1160 // index represents an executed test vector. 1161 llvm::Value *Args[5] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1162 Builder.getInt64(FunctionHash), 1163 Builder.getInt32(MCDCBitmapBytes), 1164 Builder.getInt32(MCDCTestVectorBitmapID), 1165 MCDCCondBitmapAddr.getPointer()}; 1166 Builder.CreateCall( 1167 CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_tvbitmap_update), Args); 1168 } 1169 1170 void CodeGenPGO::emitMCDCCondBitmapReset(CGBuilderTy &Builder, const Expr *S, 1171 Address MCDCCondBitmapAddr) { 1172 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCBitmapMap) 1173 return; 1174 1175 S = S->IgnoreParens(); 1176 1177 if (RegionMCDCBitmapMap->find(S) == RegionMCDCBitmapMap->end()) 1178 return; 1179 1180 // Emit intrinsic that resets a dedicated temporary value on the stack to 0. 1181 Builder.CreateStore(Builder.getInt32(0), MCDCCondBitmapAddr); 1182 } 1183 1184 void CodeGenPGO::emitMCDCCondBitmapUpdate(CGBuilderTy &Builder, const Expr *S, 1185 Address MCDCCondBitmapAddr, 1186 llvm::Value *Val) { 1187 if (!canEmitMCDCCoverage(Builder) || !RegionCondIDMap) 1188 return; 1189 1190 // Even though, for simplicity, parentheses and unary logical-NOT operators 1191 // are considered part of their underlying condition for both MC/DC and 1192 // branch coverage, the condition IDs themselves are assigned and tracked 1193 // using the underlying condition itself. This is done solely for 1194 // consistency since parentheses and logical-NOTs are ignored when checking 1195 // whether the condition is actually an instrumentable condition. This can 1196 // also make debugging a bit easier. 1197 S = CodeGenFunction::stripCond(S); 1198 1199 auto ExprMCDCConditionIDMapIterator = RegionCondIDMap->find(S); 1200 if (ExprMCDCConditionIDMapIterator == RegionCondIDMap->end()) 1201 return; 1202 1203 // Extract the ID of the condition we are setting in the bitmap. 1204 unsigned CondID = ExprMCDCConditionIDMapIterator->second; 1205 assert(CondID > 0 && "Condition has no ID!"); 1206 1207 auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); 1208 1209 // Emit intrinsic that updates a dedicated temporary value on the stack after 1210 // a condition is evaluated. After the set of conditions has been updated, 1211 // the resulting value is used to update the boolean expression's bitmap. 1212 llvm::Value *Args[5] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 1213 Builder.getInt64(FunctionHash), 1214 Builder.getInt32(CondID - 1), 1215 MCDCCondBitmapAddr.getPointer(), Val}; 1216 Builder.CreateCall( 1217 CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_condbitmap_update), 1218 Args); 1219 } 1220 1221 void CodeGenPGO::setValueProfilingFlag(llvm::Module &M) { 1222 if (CGM.getCodeGenOpts().hasProfileClangInstr()) 1223 M.addModuleFlag(llvm::Module::Warning, "EnableValueProfiling", 1224 uint32_t(EnableValueProfiling)); 1225 } 1226 1227 // This method either inserts a call to the profile run-time during 1228 // instrumentation or puts profile data into metadata for PGO use. 1229 void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind, 1230 llvm::Instruction *ValueSite, llvm::Value *ValuePtr) { 1231 1232 if (!EnableValueProfiling) 1233 return; 1234 1235 if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock()) 1236 return; 1237 1238 if (isa<llvm::Constant>(ValuePtr)) 1239 return; 1240 1241 bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr(); 1242 if (InstrumentValueSites && RegionCounterMap) { 1243 auto BuilderInsertPoint = Builder.saveIP(); 1244 Builder.SetInsertPoint(ValueSite); 1245 llvm::Value *Args[5] = { 1246 FuncNameVar, 1247 Builder.getInt64(FunctionHash), 1248 Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()), 1249 Builder.getInt32(ValueKind), 1250 Builder.getInt32(NumValueSites[ValueKind]++) 1251 }; 1252 Builder.CreateCall( 1253 CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args); 1254 Builder.restoreIP(BuilderInsertPoint); 1255 return; 1256 } 1257 1258 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader(); 1259 if (PGOReader && haveRegionCounts()) { 1260 // We record the top most called three functions at each call site. 1261 // Profile metadata contains "VP" string identifying this metadata 1262 // as value profiling data, then a uint32_t value for the value profiling 1263 // kind, a uint64_t value for the total number of times the call is 1264 // executed, followed by the function hash and execution count (uint64_t) 1265 // pairs for each function. 1266 if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind)) 1267 return; 1268 1269 llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord, 1270 (llvm::InstrProfValueKind)ValueKind, 1271 NumValueSites[ValueKind]); 1272 1273 NumValueSites[ValueKind]++; 1274 } 1275 } 1276 1277 void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader, 1278 bool IsInMainFile) { 1279 CGM.getPGOStats().addVisited(IsInMainFile); 1280 RegionCounts.clear(); 1281 llvm::Expected<llvm::InstrProfRecord> RecordExpected = 1282 PGOReader->getInstrProfRecord(FuncName, FunctionHash); 1283 if (auto E = RecordExpected.takeError()) { 1284 auto IPE = std::get<0>(llvm::InstrProfError::take(std::move(E))); 1285 if (IPE == llvm::instrprof_error::unknown_function) 1286 CGM.getPGOStats().addMissing(IsInMainFile); 1287 else if (IPE == llvm::instrprof_error::hash_mismatch) 1288 CGM.getPGOStats().addMismatched(IsInMainFile); 1289 else if (IPE == llvm::instrprof_error::malformed) 1290 // TODO: Consider a more specific warning for this case. 1291 CGM.getPGOStats().addMismatched(IsInMainFile); 1292 return; 1293 } 1294 ProfRecord = 1295 std::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get())); 1296 RegionCounts = ProfRecord->Counts; 1297 } 1298 1299 /// Calculate what to divide by to scale weights. 1300 /// 1301 /// Given the maximum weight, calculate a divisor that will scale all the 1302 /// weights to strictly less than UINT32_MAX. 1303 static uint64_t calculateWeightScale(uint64_t MaxWeight) { 1304 return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1; 1305 } 1306 1307 /// Scale an individual branch weight (and add 1). 1308 /// 1309 /// Scale a 64-bit weight down to 32-bits using \c Scale. 1310 /// 1311 /// According to Laplace's Rule of Succession, it is better to compute the 1312 /// weight based on the count plus 1, so universally add 1 to the value. 1313 /// 1314 /// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no 1315 /// greater than \c Weight. 1316 static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) { 1317 assert(Scale && "scale by 0?"); 1318 uint64_t Scaled = Weight / Scale + 1; 1319 assert(Scaled <= UINT32_MAX && "overflow 32-bits"); 1320 return Scaled; 1321 } 1322 1323 llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount, 1324 uint64_t FalseCount) const { 1325 // Check for empty weights. 1326 if (!TrueCount && !FalseCount) 1327 return nullptr; 1328 1329 // Calculate how to scale down to 32-bits. 1330 uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount)); 1331 1332 llvm::MDBuilder MDHelper(CGM.getLLVMContext()); 1333 return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale), 1334 scaleBranchWeight(FalseCount, Scale)); 1335 } 1336 1337 llvm::MDNode * 1338 CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) const { 1339 // We need at least two elements to create meaningful weights. 1340 if (Weights.size() < 2) 1341 return nullptr; 1342 1343 // Check for empty weights. 1344 uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end()); 1345 if (MaxWeight == 0) 1346 return nullptr; 1347 1348 // Calculate how to scale down to 32-bits. 1349 uint64_t Scale = calculateWeightScale(MaxWeight); 1350 1351 SmallVector<uint32_t, 16> ScaledWeights; 1352 ScaledWeights.reserve(Weights.size()); 1353 for (uint64_t W : Weights) 1354 ScaledWeights.push_back(scaleBranchWeight(W, Scale)); 1355 1356 llvm::MDBuilder MDHelper(CGM.getLLVMContext()); 1357 return MDHelper.createBranchWeights(ScaledWeights); 1358 } 1359 1360 llvm::MDNode * 1361 CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond, 1362 uint64_t LoopCount) const { 1363 if (!PGO.haveRegionCounts()) 1364 return nullptr; 1365 std::optional<uint64_t> CondCount = PGO.getStmtCount(Cond); 1366 if (!CondCount || *CondCount == 0) 1367 return nullptr; 1368 return createProfileWeights(LoopCount, 1369 std::max(*CondCount, LoopCount) - LoopCount); 1370 } 1371