xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Utils/MisExpect.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
181ad6265SDimitry Andric //===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
281ad6265SDimitry Andric //
381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
681ad6265SDimitry Andric //
781ad6265SDimitry Andric //===----------------------------------------------------------------------===//
881ad6265SDimitry Andric //
981ad6265SDimitry Andric // This contains code to emit warnings for potentially incorrect usage of the
1081ad6265SDimitry Andric // llvm.expect intrinsic. This utility extracts the threshold values from
1181ad6265SDimitry Andric // metadata associated with the instrumented Branch or Switch instruction. The
1281ad6265SDimitry Andric // threshold values are then used to determine if a warning should be emmited.
1381ad6265SDimitry Andric //
1481ad6265SDimitry Andric // MisExpect's implementation relies on two assumptions about how branch weights
1581ad6265SDimitry Andric // are managed in LLVM.
1681ad6265SDimitry Andric //
1781ad6265SDimitry Andric // 1) Frontend profiling weights are always in place before llvm.expect is
1881ad6265SDimitry Andric // lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore
1981ad6265SDimitry Andric // needs to extract the branch weights and then compare them to the weights
2081ad6265SDimitry Andric // being added by the llvm.expect intrinsic lowering.
2181ad6265SDimitry Andric //
2281ad6265SDimitry Andric // 2) Sampling and IR based profiles will *only* have branch weight metadata
2381ad6265SDimitry Andric // before profiling data is consulted if they are from a lowered llvm.expect
2481ad6265SDimitry Andric // intrinsic. These profiles thus always extract the expected weights and then
2581ad6265SDimitry Andric // compare them to the weights collected during profiling to determine if a
2681ad6265SDimitry Andric // diagnostic message is warranted.
2781ad6265SDimitry Andric //
2881ad6265SDimitry Andric //===----------------------------------------------------------------------===//
2981ad6265SDimitry Andric 
3081ad6265SDimitry Andric #include "llvm/Transforms/Utils/MisExpect.h"
3181ad6265SDimitry Andric #include "llvm/ADT/Twine.h"
3281ad6265SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
3381ad6265SDimitry Andric #include "llvm/IR/Constants.h"
3481ad6265SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
3581ad6265SDimitry Andric #include "llvm/IR/Instruction.h"
3681ad6265SDimitry Andric #include "llvm/IR/Instructions.h"
3781ad6265SDimitry Andric #include "llvm/IR/LLVMContext.h"
38bdd1243dSDimitry Andric #include "llvm/IR/ProfDataUtils.h"
3981ad6265SDimitry Andric #include "llvm/Support/BranchProbability.h"
4081ad6265SDimitry Andric #include "llvm/Support/CommandLine.h"
4181ad6265SDimitry Andric #include "llvm/Support/Debug.h"
4281ad6265SDimitry Andric #include "llvm/Support/FormatVariadic.h"
43bdd1243dSDimitry Andric #include <algorithm>
4481ad6265SDimitry Andric #include <cstdint>
4581ad6265SDimitry Andric #include <functional>
4681ad6265SDimitry Andric #include <numeric>
4781ad6265SDimitry Andric 
4881ad6265SDimitry Andric #define DEBUG_TYPE "misexpect"
4981ad6265SDimitry Andric 
5081ad6265SDimitry Andric using namespace llvm;
5181ad6265SDimitry Andric using namespace misexpect;
5281ad6265SDimitry Andric 
5381ad6265SDimitry Andric namespace llvm {
5481ad6265SDimitry Andric 
5581ad6265SDimitry Andric // Command line option to enable/disable the warning when profile data suggests
5681ad6265SDimitry Andric // a mismatch with the use of the llvm.expect intrinsic
5781ad6265SDimitry Andric static cl::opt<bool> PGOWarnMisExpect(
5881ad6265SDimitry Andric     "pgo-warn-misexpect", cl::init(false), cl::Hidden,
5981ad6265SDimitry Andric     cl::desc("Use this option to turn on/off "
6081ad6265SDimitry Andric              "warnings about incorrect usage of llvm.expect intrinsics."));
6181ad6265SDimitry Andric 
62*0fca6ea1SDimitry Andric // Command line option for setting the diagnostic tolerance threshold
63bdd1243dSDimitry Andric static cl::opt<uint32_t> MisExpectTolerance(
6481ad6265SDimitry Andric     "misexpect-tolerance", cl::init(0),
65*0fca6ea1SDimitry Andric     cl::desc("Prevents emitting diagnostics when profile counts are "
6681ad6265SDimitry Andric              "within N% of the threshold.."));
6781ad6265SDimitry Andric 
6881ad6265SDimitry Andric } // namespace llvm
6981ad6265SDimitry Andric 
7081ad6265SDimitry Andric namespace {
7181ad6265SDimitry Andric 
isMisExpectDiagEnabled(LLVMContext & Ctx)7281ad6265SDimitry Andric bool isMisExpectDiagEnabled(LLVMContext &Ctx) {
7381ad6265SDimitry Andric   return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested();
7481ad6265SDimitry Andric }
7581ad6265SDimitry Andric 
getMisExpectTolerance(LLVMContext & Ctx)76bdd1243dSDimitry Andric uint32_t getMisExpectTolerance(LLVMContext &Ctx) {
77bdd1243dSDimitry Andric   return std::max(static_cast<uint32_t>(MisExpectTolerance),
7881ad6265SDimitry Andric                   Ctx.getDiagnosticsMisExpectTolerance());
7981ad6265SDimitry Andric }
8081ad6265SDimitry Andric 
getInstCondition(Instruction * I)8181ad6265SDimitry Andric Instruction *getInstCondition(Instruction *I) {
8281ad6265SDimitry Andric   assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
8381ad6265SDimitry Andric   Instruction *Ret = nullptr;
8481ad6265SDimitry Andric   if (auto *B = dyn_cast<BranchInst>(I)) {
8581ad6265SDimitry Andric     Ret = dyn_cast<Instruction>(B->getCondition());
8681ad6265SDimitry Andric   }
8781ad6265SDimitry Andric   // TODO: Find a way to resolve condition location for switches
8881ad6265SDimitry Andric   // Using the condition of the switch seems to often resolve to an earlier
8981ad6265SDimitry Andric   // point in the program, i.e. the calculation of the switch condition, rather
9081ad6265SDimitry Andric   // than the switch's location in the source code. Thus, we should use the
9181ad6265SDimitry Andric   // instruction to get source code locations rather than the condition to
9281ad6265SDimitry Andric   // improve diagnostic output, such as the caret. If the same problem exists
9381ad6265SDimitry Andric   // for branch instructions, then we should remove this function and directly
9481ad6265SDimitry Andric   // use the instruction
9581ad6265SDimitry Andric   //
9681ad6265SDimitry Andric   else if (auto *S = dyn_cast<SwitchInst>(I)) {
9781ad6265SDimitry Andric     Ret = dyn_cast<Instruction>(S->getCondition());
9881ad6265SDimitry Andric   }
9981ad6265SDimitry Andric   return Ret ? Ret : I;
10081ad6265SDimitry Andric }
10181ad6265SDimitry Andric 
emitMisexpectDiagnostic(Instruction * I,LLVMContext & Ctx,uint64_t ProfCount,uint64_t TotalCount)10281ad6265SDimitry Andric void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx,
10381ad6265SDimitry Andric                              uint64_t ProfCount, uint64_t TotalCount) {
10481ad6265SDimitry Andric   double PercentageCorrect = (double)ProfCount / TotalCount;
10581ad6265SDimitry Andric   auto PerString =
10681ad6265SDimitry Andric       formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
10781ad6265SDimitry Andric   auto RemStr = formatv(
10881ad6265SDimitry Andric       "Potential performance regression from use of the llvm.expect intrinsic: "
10981ad6265SDimitry Andric       "Annotation was correct on {0} of profiled executions.",
11081ad6265SDimitry Andric       PerString);
11181ad6265SDimitry Andric   Twine Msg(PerString);
11281ad6265SDimitry Andric   Instruction *Cond = getInstCondition(I);
11381ad6265SDimitry Andric   if (isMisExpectDiagEnabled(Ctx))
11481ad6265SDimitry Andric     Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg));
11581ad6265SDimitry Andric   OptimizationRemarkEmitter ORE(I->getParent()->getParent());
11681ad6265SDimitry Andric   ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
11781ad6265SDimitry Andric }
11881ad6265SDimitry Andric 
11981ad6265SDimitry Andric } // namespace
12081ad6265SDimitry Andric 
12181ad6265SDimitry Andric namespace llvm {
12281ad6265SDimitry Andric namespace misexpect {
12381ad6265SDimitry Andric 
verifyMisExpect(Instruction & I,ArrayRef<uint32_t> RealWeights,ArrayRef<uint32_t> ExpectedWeights)12481ad6265SDimitry Andric void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights,
12581ad6265SDimitry Andric                      ArrayRef<uint32_t> ExpectedWeights) {
12681ad6265SDimitry Andric   // To determine if we emit a diagnostic, we need to compare the branch weights
12781ad6265SDimitry Andric   // from the profile to those added by the llvm.expect intrinsic.
12881ad6265SDimitry Andric   // So first, we extract the "likely" and "unlikely" weights from
12981ad6265SDimitry Andric   // ExpectedWeights And determine the correct weight in the profile to compare
13081ad6265SDimitry Andric   // against.
13181ad6265SDimitry Andric   uint64_t LikelyBranchWeight = 0,
13281ad6265SDimitry Andric            UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max();
13381ad6265SDimitry Andric   size_t MaxIndex = 0;
13481ad6265SDimitry Andric   for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) {
13581ad6265SDimitry Andric     uint32_t V = ExpectedWeights[Idx];
13681ad6265SDimitry Andric     if (LikelyBranchWeight < V) {
13781ad6265SDimitry Andric       LikelyBranchWeight = V;
13881ad6265SDimitry Andric       MaxIndex = Idx;
13981ad6265SDimitry Andric     }
14081ad6265SDimitry Andric     if (UnlikelyBranchWeight > V) {
14181ad6265SDimitry Andric       UnlikelyBranchWeight = V;
14281ad6265SDimitry Andric     }
14381ad6265SDimitry Andric   }
14481ad6265SDimitry Andric 
14581ad6265SDimitry Andric   const uint64_t ProfiledWeight = RealWeights[MaxIndex];
14681ad6265SDimitry Andric   const uint64_t RealWeightsTotal =
14781ad6265SDimitry Andric       std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0,
14881ad6265SDimitry Andric                       std::plus<uint64_t>());
14981ad6265SDimitry Andric   const uint64_t NumUnlikelyTargets = RealWeights.size() - 1;
15081ad6265SDimitry Andric 
15181ad6265SDimitry Andric   uint64_t TotalBranchWeight =
15281ad6265SDimitry Andric       LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);
15381ad6265SDimitry Andric 
154*0fca6ea1SDimitry Andric   // Failing this assert means that we have corrupted metadata.
155*0fca6ea1SDimitry Andric   assert((TotalBranchWeight >= LikelyBranchWeight) && (TotalBranchWeight > 0) &&
156*0fca6ea1SDimitry Andric          "TotalBranchWeight is less than the Likely branch weight");
15781ad6265SDimitry Andric 
15881ad6265SDimitry Andric   // To determine our threshold value we need to obtain the branch probability
15981ad6265SDimitry Andric   // for the weights added by llvm.expect and use that proportion to calculate
16081ad6265SDimitry Andric   // our threshold based on the collected profile data.
16181ad6265SDimitry Andric   auto LikelyProbablilty = BranchProbability::getBranchProbability(
16281ad6265SDimitry Andric       LikelyBranchWeight, TotalBranchWeight);
16381ad6265SDimitry Andric 
16481ad6265SDimitry Andric   uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal);
16581ad6265SDimitry Andric 
16681ad6265SDimitry Andric   // clamp tolerance range to [0, 100)
16781ad6265SDimitry Andric   auto Tolerance = getMisExpectTolerance(I.getContext());
168bdd1243dSDimitry Andric   Tolerance = std::clamp(Tolerance, 0u, 99u);
16981ad6265SDimitry Andric 
17081ad6265SDimitry Andric   // Allow users to relax checking by N%  i.e., if they use a 5% tolerance,
17181ad6265SDimitry Andric   // then we check against 0.95*ScaledThreshold
17281ad6265SDimitry Andric   if (Tolerance > 0)
17381ad6265SDimitry Andric     ScaledThreshold *= (1.0 - Tolerance / 100.0);
17481ad6265SDimitry Andric 
17581ad6265SDimitry Andric   // When the profile weight is below the threshold, we emit the diagnostic
17681ad6265SDimitry Andric   if (ProfiledWeight < ScaledThreshold)
17781ad6265SDimitry Andric     emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight,
17881ad6265SDimitry Andric                             RealWeightsTotal);
17981ad6265SDimitry Andric }
18081ad6265SDimitry Andric 
checkBackendInstrumentation(Instruction & I,const ArrayRef<uint32_t> RealWeights)18181ad6265SDimitry Andric void checkBackendInstrumentation(Instruction &I,
18281ad6265SDimitry Andric                                  const ArrayRef<uint32_t> RealWeights) {
183*0fca6ea1SDimitry Andric   // Backend checking assumes any existing weight comes from an `llvm.expect`
184*0fca6ea1SDimitry Andric   // intrinsic. However, SampleProfiling + ThinLTO add branch weights  multiple
185*0fca6ea1SDimitry Andric   // times, leading to an invalid assumption in our checking. Backend checks
186*0fca6ea1SDimitry Andric   // should only operate on branch weights that carry the "!expected" field,
187*0fca6ea1SDimitry Andric   // since they are guaranteed to be added by the LowerExpectIntrinsic pass.
188*0fca6ea1SDimitry Andric   if (!hasBranchWeightOrigin(I))
189*0fca6ea1SDimitry Andric     return;
190bdd1243dSDimitry Andric   SmallVector<uint32_t> ExpectedWeights;
191bdd1243dSDimitry Andric   if (!extractBranchWeights(I, ExpectedWeights))
19281ad6265SDimitry Andric     return;
19381ad6265SDimitry Andric   verifyMisExpect(I, RealWeights, ExpectedWeights);
19481ad6265SDimitry Andric }
19581ad6265SDimitry Andric 
checkFrontendInstrumentation(Instruction & I,const ArrayRef<uint32_t> ExpectedWeights)19681ad6265SDimitry Andric void checkFrontendInstrumentation(Instruction &I,
19781ad6265SDimitry Andric                                   const ArrayRef<uint32_t> ExpectedWeights) {
198bdd1243dSDimitry Andric   SmallVector<uint32_t> RealWeights;
199bdd1243dSDimitry Andric   if (!extractBranchWeights(I, RealWeights))
20081ad6265SDimitry Andric     return;
20181ad6265SDimitry Andric   verifyMisExpect(I, RealWeights, ExpectedWeights);
20281ad6265SDimitry Andric }
20381ad6265SDimitry Andric 
checkExpectAnnotations(Instruction & I,const ArrayRef<uint32_t> ExistingWeights,bool IsFrontend)20481ad6265SDimitry Andric void checkExpectAnnotations(Instruction &I,
20581ad6265SDimitry Andric                             const ArrayRef<uint32_t> ExistingWeights,
206bdd1243dSDimitry Andric                             bool IsFrontend) {
207bdd1243dSDimitry Andric   if (IsFrontend) {
20881ad6265SDimitry Andric     checkFrontendInstrumentation(I, ExistingWeights);
20981ad6265SDimitry Andric   } else {
21081ad6265SDimitry Andric     checkBackendInstrumentation(I, ExistingWeights);
21181ad6265SDimitry Andric   }
21281ad6265SDimitry Andric }
21381ad6265SDimitry Andric 
21481ad6265SDimitry Andric } // namespace misexpect
21581ad6265SDimitry Andric } // namespace llvm
21681ad6265SDimitry Andric #undef DEBUG_TYPE
217