1 //===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This contains code to emit warnings for potentially incorrect usage of the 10 // llvm.expect intrinsic. This utility extracts the threshold values from 11 // metadata associated with the instrumented Branch or Switch instruction. The 12 // threshold values are then used to determine if a warning should be emmited. 13 // 14 // MisExpect's implementation relies on two assumptions about how branch weights 15 // are managed in LLVM. 16 // 17 // 1) Frontend profiling weights are always in place before llvm.expect is 18 // lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore 19 // needs to extract the branch weights and then compare them to the weights 20 // being added by the llvm.expect intrinsic lowering. 21 // 22 // 2) Sampling and IR based profiles will *only* have branch weight metadata 23 // before profiling data is consulted if they are from a lowered llvm.expect 24 // intrinsic. These profiles thus always extract the expected weights and then 25 // compare them to the weights collected during profiling to determine if a 26 // diagnostic message is warranted. 27 // 28 //===----------------------------------------------------------------------===// 29 30 #include "llvm/Transforms/Utils/MisExpect.h" 31 #include "llvm/ADT/Twine.h" 32 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DiagnosticInfo.h" 35 #include "llvm/IR/Instruction.h" 36 #include "llvm/IR/Instructions.h" 37 #include "llvm/IR/LLVMContext.h" 38 #include "llvm/IR/ProfDataUtils.h" 39 #include "llvm/Support/BranchProbability.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/FormatVariadic.h" 43 #include <algorithm> 44 #include <cstdint> 45 #include <functional> 46 #include <numeric> 47 48 #define DEBUG_TYPE "misexpect" 49 50 using namespace llvm; 51 using namespace misexpect; 52 53 namespace llvm { 54 55 // Command line option to enable/disable the warning when profile data suggests 56 // a mismatch with the use of the llvm.expect intrinsic 57 static cl::opt<bool> PGOWarnMisExpect( 58 "pgo-warn-misexpect", cl::init(false), cl::Hidden, 59 cl::desc("Use this option to turn on/off " 60 "warnings about incorrect usage of llvm.expect intrinsics.")); 61 62 // Command line option for setting the diagnostic tolerance threshold 63 static cl::opt<uint32_t> MisExpectTolerance( 64 "misexpect-tolerance", cl::init(0), 65 cl::desc("Prevents emitting diagnostics when profile counts are " 66 "within N% of the threshold..")); 67 68 } // namespace llvm 69 70 namespace { 71 72 bool isMisExpectDiagEnabled(LLVMContext &Ctx) { 73 return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested(); 74 } 75 76 uint32_t getMisExpectTolerance(LLVMContext &Ctx) { 77 return std::max(static_cast<uint32_t>(MisExpectTolerance), 78 Ctx.getDiagnosticsMisExpectTolerance()); 79 } 80 81 Instruction *getInstCondition(Instruction *I) { 82 assert(I != nullptr && "MisExpect target Instruction cannot be nullptr"); 83 Instruction *Ret = nullptr; 84 if (auto *B = dyn_cast<BranchInst>(I)) { 85 Ret = dyn_cast<Instruction>(B->getCondition()); 86 } 87 // TODO: Find a way to resolve condition location for switches 88 // Using the condition of the switch seems to often resolve to an earlier 89 // point in the program, i.e. the calculation of the switch condition, rather 90 // than the switch's location in the source code. Thus, we should use the 91 // instruction to get source code locations rather than the condition to 92 // improve diagnostic output, such as the caret. If the same problem exists 93 // for branch instructions, then we should remove this function and directly 94 // use the instruction 95 // 96 else if (auto *S = dyn_cast<SwitchInst>(I)) { 97 Ret = dyn_cast<Instruction>(S->getCondition()); 98 } 99 return Ret ? Ret : I; 100 } 101 102 void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx, 103 uint64_t ProfCount, uint64_t TotalCount) { 104 double PercentageCorrect = (double)ProfCount / TotalCount; 105 auto PerString = 106 formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount); 107 auto RemStr = formatv( 108 "Potential performance regression from use of the llvm.expect intrinsic: " 109 "Annotation was correct on {0} of profiled executions.", 110 PerString); 111 Twine Msg(PerString); 112 Instruction *Cond = getInstCondition(I); 113 if (isMisExpectDiagEnabled(Ctx)) 114 Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg)); 115 OptimizationRemarkEmitter ORE(I->getParent()->getParent()); 116 ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str()); 117 } 118 119 } // namespace 120 121 namespace llvm { 122 namespace misexpect { 123 124 void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights, 125 ArrayRef<uint32_t> ExpectedWeights) { 126 // To determine if we emit a diagnostic, we need to compare the branch weights 127 // from the profile to those added by the llvm.expect intrinsic. 128 // So first, we extract the "likely" and "unlikely" weights from 129 // ExpectedWeights And determine the correct weight in the profile to compare 130 // against. 131 uint64_t LikelyBranchWeight = 0, 132 UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max(); 133 size_t MaxIndex = 0; 134 for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) { 135 uint32_t V = ExpectedWeights[Idx]; 136 if (LikelyBranchWeight < V) { 137 LikelyBranchWeight = V; 138 MaxIndex = Idx; 139 } 140 if (UnlikelyBranchWeight > V) { 141 UnlikelyBranchWeight = V; 142 } 143 } 144 145 const uint64_t ProfiledWeight = RealWeights[MaxIndex]; 146 const uint64_t RealWeightsTotal = 147 std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0, 148 std::plus<uint64_t>()); 149 const uint64_t NumUnlikelyTargets = RealWeights.size() - 1; 150 151 uint64_t TotalBranchWeight = 152 LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets); 153 154 // Failing this assert means that we have corrupted metadata. 155 assert((TotalBranchWeight >= LikelyBranchWeight) && (TotalBranchWeight > 0) && 156 "TotalBranchWeight is less than the Likely branch weight"); 157 158 // To determine our threshold value we need to obtain the branch probability 159 // for the weights added by llvm.expect and use that proportion to calculate 160 // our threshold based on the collected profile data. 161 auto LikelyProbablilty = BranchProbability::getBranchProbability( 162 LikelyBranchWeight, TotalBranchWeight); 163 164 uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal); 165 166 // clamp tolerance range to [0, 100) 167 auto Tolerance = getMisExpectTolerance(I.getContext()); 168 Tolerance = std::clamp(Tolerance, 0u, 99u); 169 170 // Allow users to relax checking by N% i.e., if they use a 5% tolerance, 171 // then we check against 0.95*ScaledThreshold 172 if (Tolerance > 0) 173 ScaledThreshold *= (1.0 - Tolerance / 100.0); 174 175 // When the profile weight is below the threshold, we emit the diagnostic 176 if (ProfiledWeight < ScaledThreshold) 177 emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight, 178 RealWeightsTotal); 179 } 180 181 void checkBackendInstrumentation(Instruction &I, 182 const ArrayRef<uint32_t> RealWeights) { 183 // Backend checking assumes any existing weight comes from an `llvm.expect` 184 // intrinsic. However, SampleProfiling + ThinLTO add branch weights multiple 185 // times, leading to an invalid assumption in our checking. Backend checks 186 // should only operate on branch weights that carry the "!expected" field, 187 // since they are guaranteed to be added by the LowerExpectIntrinsic pass. 188 if (!hasBranchWeightOrigin(I)) 189 return; 190 SmallVector<uint32_t> ExpectedWeights; 191 if (!extractBranchWeights(I, ExpectedWeights)) 192 return; 193 verifyMisExpect(I, RealWeights, ExpectedWeights); 194 } 195 196 void checkFrontendInstrumentation(Instruction &I, 197 const ArrayRef<uint32_t> ExpectedWeights) { 198 SmallVector<uint32_t> RealWeights; 199 if (!extractBranchWeights(I, RealWeights)) 200 return; 201 verifyMisExpect(I, RealWeights, ExpectedWeights); 202 } 203 204 void checkExpectAnnotations(Instruction &I, 205 const ArrayRef<uint32_t> ExistingWeights, 206 bool IsFrontend) { 207 if (IsFrontend) { 208 checkFrontendInstrumentation(I, ExistingWeights); 209 } else { 210 checkBackendInstrumentation(I, ExistingWeights); 211 } 212 } 213 214 } // namespace misexpect 215 } // namespace llvm 216 #undef DEBUG_TYPE 217