1 //===- Transforms/IPO/SampleProfileMatcher.h ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file provides the interface for SampleProfileMatcher. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_TRANSFORMS_IPO_SAMPLEPROFILEMATCHER_H 15 #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEMATCHER_H 16 17 #include "llvm/ADT/StringSet.h" 18 #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" 19 20 namespace llvm { 21 22 using AnchorList = std::vector<std::pair<LineLocation, FunctionId>>; 23 using AnchorMap = std::map<LineLocation, FunctionId>; 24 25 // Sample profile matching - fuzzy match. 26 class SampleProfileMatcher { 27 Module &M; 28 SampleProfileReader &Reader; 29 LazyCallGraph &CG; 30 const PseudoProbeManager *ProbeManager; 31 const ThinOrFullLTOPhase LTOPhase; 32 SampleProfileMap FlattenedProfiles; 33 // For each function, the matcher generates a map, of which each entry is a 34 // mapping from the source location of current build to the source location 35 // in the profile. 36 StringMap<LocToLocMap> FuncMappings; 37 38 // Match state for an anchor/callsite. 39 enum class MatchState { 40 Unknown = 0, 41 // Initial match between input profile and current IR. 42 InitialMatch = 1, 43 // Initial mismatch between input profile and current IR. 44 InitialMismatch = 2, 45 // InitialMatch stays matched after fuzzy profile matching. 46 UnchangedMatch = 3, 47 // InitialMismatch stays mismatched after fuzzy profile matching. 48 UnchangedMismatch = 4, 49 // InitialMismatch is recovered after fuzzy profile matching. 50 RecoveredMismatch = 5, 51 // InitialMatch is removed and becomes mismatched after fuzzy profile 52 // matching. 53 RemovedMatch = 6, 54 }; 55 56 // For each function, store every callsite and its matching state into this 57 // map, of which each entry is a pair of callsite location and MatchState. 58 // This is used for profile staleness computation and report. 59 StringMap<std::unordered_map<LineLocation, MatchState, LineLocationHash>> 60 FuncCallsiteMatchStates; 61 62 struct FuncToProfileNameMapHash { 63 uint64_t operatorFuncToProfileNameMapHash64 operator()(const std::pair<const Function *, FunctionId> &P) const { 65 return hash_combine(P.first, P.second); 66 } 67 }; 68 // A map from a pair of function and profile name to a boolean value 69 // indicating whether they are matched. This is used as a cache for the 70 // matching result. 71 std::unordered_map<std::pair<const Function *, FunctionId>, bool, 72 FuncToProfileNameMapHash> 73 FuncProfileMatchCache; 74 // The new functions found by the call graph matching. The map's key is the 75 // the new(renamed) function pointer and the value is old(unused) profile 76 // name. 77 std::unordered_map<Function *, FunctionId> FuncToProfileNameMap; 78 79 // A map pointer to the FuncNameToProfNameMap in SampleProfileLoader, 80 // which maps the function name to the matched profile name. This is used 81 // for sample loader to look up profile using the new name. 82 HashKeyMap<std::unordered_map, FunctionId, FunctionId> *FuncNameToProfNameMap; 83 84 // A map pointer to the SymbolMap in SampleProfileLoader, which stores all 85 // the original matched symbols before the matching. this is to determine if 86 // the profile is unused(to be matched) or not. 87 HashKeyMap<std::unordered_map, FunctionId, Function *> *SymbolMap; 88 89 // The new functions from IR. 90 HashKeyMap<std::unordered_map, FunctionId, Function *> 91 FunctionsWithoutProfile; 92 93 // Pointer to the Profile Symbol List in the reader. 94 std::shared_ptr<ProfileSymbolList> PSL; 95 96 // Profile mismatch statstics: 97 uint64_t TotalProfiledFunc = 0; 98 // Num of checksum-mismatched function. 99 uint64_t NumStaleProfileFunc = 0; 100 uint64_t TotalProfiledCallsites = 0; 101 uint64_t NumMismatchedCallsites = 0; 102 uint64_t NumRecoveredCallsites = 0; 103 // Total samples for all profiled functions. 104 uint64_t TotalFunctionSamples = 0; 105 // Total samples for all checksum-mismatched functions. 106 uint64_t MismatchedFunctionSamples = 0; 107 uint64_t MismatchedCallsiteSamples = 0; 108 uint64_t RecoveredCallsiteSamples = 0; 109 110 // Profile call-graph matching statstics: 111 uint64_t NumCallGraphRecoveredProfiledFunc = 0; 112 uint64_t NumCallGraphRecoveredFuncSamples = 0; 113 114 // A dummy name for unknown indirect callee, used to differentiate from a 115 // non-call instruction that also has an empty callee name. 116 static constexpr const char *UnknownIndirectCallee = 117 "unknown.indirect.callee"; 118 119 public: SampleProfileMatcher(Module & M,SampleProfileReader & Reader,LazyCallGraph & CG,const PseudoProbeManager * ProbeManager,ThinOrFullLTOPhase LTOPhase,HashKeyMap<std::unordered_map,FunctionId,Function * > & SymMap,std::shared_ptr<ProfileSymbolList> PSL,HashKeyMap<std::unordered_map,FunctionId,FunctionId> & FuncNameToProfNameMap)120 SampleProfileMatcher( 121 Module &M, SampleProfileReader &Reader, LazyCallGraph &CG, 122 const PseudoProbeManager *ProbeManager, ThinOrFullLTOPhase LTOPhase, 123 HashKeyMap<std::unordered_map, FunctionId, Function *> &SymMap, 124 std::shared_ptr<ProfileSymbolList> PSL, 125 HashKeyMap<std::unordered_map, FunctionId, FunctionId> 126 &FuncNameToProfNameMap) 127 : M(M), Reader(Reader), CG(CG), ProbeManager(ProbeManager), 128 LTOPhase(LTOPhase), FuncNameToProfNameMap(&FuncNameToProfNameMap), 129 SymbolMap(&SymMap), PSL(PSL) {}; 130 void runOnModule(); clearMatchingData()131 void clearMatchingData() { 132 // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which 133 // will be used for sample loader. 134 // Do not clear FlattenedProfiles as it contains function names referenced 135 // by FuncNameToProfNameMap. Clearing this memory could lead to a 136 // use-after-free error. 137 freeContainer(FuncCallsiteMatchStates); 138 freeContainer(FunctionsWithoutProfile); 139 freeContainer(FuncToProfileNameMap); 140 } 141 142 private: getFlattenedSamplesFor(const FunctionId & Fname)143 FunctionSamples *getFlattenedSamplesFor(const FunctionId &Fname) { 144 auto It = FlattenedProfiles.find(Fname); 145 if (It != FlattenedProfiles.end()) 146 return &It->second; 147 return nullptr; 148 } getFlattenedSamplesFor(const Function & F)149 FunctionSamples *getFlattenedSamplesFor(const Function &F) { 150 StringRef CanonFName = FunctionSamples::getCanonicalFnName(F); 151 return getFlattenedSamplesFor(FunctionId(CanonFName)); 152 } freeContainer(T & C)153 template <typename T> inline void freeContainer(T &C) { 154 T Empty; 155 std::swap(C, Empty); 156 } 157 void getFilteredAnchorList(const AnchorMap &IRAnchors, 158 const AnchorMap &ProfileAnchors, 159 AnchorList &FilteredIRAnchorsList, 160 AnchorList &FilteredProfileAnchorList); 161 void runOnFunction(Function &F); 162 void findIRAnchors(const Function &F, AnchorMap &IRAnchors) const; 163 void findProfileAnchors(const FunctionSamples &FS, 164 AnchorMap &ProfileAnchors) const; 165 // Record the callsite match states for profile staleness report, the result 166 // is saved in FuncCallsiteMatchStates. 167 void recordCallsiteMatchStates(const Function &F, const AnchorMap &IRAnchors, 168 const AnchorMap &ProfileAnchors, 169 const LocToLocMap *IRToProfileLocationMap); 170 isMismatchState(const enum MatchState & State)171 bool isMismatchState(const enum MatchState &State) { 172 return State == MatchState::InitialMismatch || 173 State == MatchState::UnchangedMismatch || 174 State == MatchState::RemovedMatch; 175 }; 176 isInitialState(const enum MatchState & State)177 bool isInitialState(const enum MatchState &State) { 178 return State == MatchState::InitialMatch || 179 State == MatchState::InitialMismatch; 180 }; 181 isFinalState(const enum MatchState & State)182 bool isFinalState(const enum MatchState &State) { 183 return State == MatchState::UnchangedMatch || 184 State == MatchState::UnchangedMismatch || 185 State == MatchState::RecoveredMismatch || 186 State == MatchState::RemovedMatch; 187 }; 188 189 void countCallGraphRecoveredSamples( 190 const FunctionSamples &FS, 191 std::unordered_set<FunctionId> &MatchedUnusedProfile); 192 // Count the samples of checksum mismatched function for the top-level 193 // function and all inlinees. 194 void countMismatchedFuncSamples(const FunctionSamples &FS, bool IsTopLevel); 195 // Count the number of mismatched or recovered callsites. 196 void countMismatchCallsites(const FunctionSamples &FS); 197 // Count the samples of mismatched or recovered callsites for top-level 198 // function and all inlinees. 199 void countMismatchedCallsiteSamples(const FunctionSamples &FS); 200 void computeAndReportProfileStaleness(); 201 getIRToProfileLocationMap(const Function & F)202 LocToLocMap &getIRToProfileLocationMap(const Function &F) { 203 auto Ret = FuncMappings.try_emplace( 204 FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap()); 205 return Ret.first->second; 206 } 207 void distributeIRToProfileLocationMap(); 208 void distributeIRToProfileLocationMap(FunctionSamples &FS); 209 // This function implements the Myers diff algorithm used for stale profile 210 // matching. The algorithm provides a simple and efficient way to find the 211 // Longest Common Subsequence(LCS) or the Shortest Edit Script(SES) of two 212 // sequences. For more details, refer to the paper 'An O(ND) Difference 213 // Algorithm and Its Variations' by Eugene W. Myers. 214 // In the scenario of profile fuzzy matching, the two sequences are the IR 215 // callsite anchors and profile callsite anchors. The subsequence equivalent 216 // parts from the resulting SES are used to remap the IR locations to the 217 // profile locations. As the number of function callsite is usually not big, 218 // we currently just implements the basic greedy version(page 6 of the paper). 219 LocToLocMap longestCommonSequence(const AnchorList &IRCallsiteAnchors, 220 const AnchorList &ProfileCallsiteAnchors, 221 bool MatchUnusedFunction); 222 void matchNonCallsiteLocs(const LocToLocMap &AnchorMatchings, 223 const AnchorMap &IRAnchors, 224 LocToLocMap &IRToProfileLocationMap); 225 void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors, 226 const AnchorMap &ProfileAnchors, 227 LocToLocMap &IRToProfileLocationMap, 228 bool RunCFGMatching, bool RunCGMatching); 229 // If the function doesn't have profile, return the pointer to the function. 230 bool functionHasProfile(const FunctionId &IRFuncName, 231 Function *&FuncWithoutProfile); 232 bool isProfileUnused(const FunctionId &ProfileFuncName); 233 bool functionMatchesProfileHelper(const Function &IRFunc, 234 const FunctionId &ProfFunc); 235 // Determine if the function matches profile. If FindMatchedProfileOnly is 236 // set, only search the existing matched function. Otherwise, try matching the 237 // two functions. 238 bool functionMatchesProfile(const FunctionId &IRFuncName, 239 const FunctionId &ProfileFuncName, 240 bool FindMatchedProfileOnly); 241 // Determine if the function matches profile by computing a similarity ratio 242 // between two sequences of callsite anchors extracted from function and 243 // profile. If it's above the threshold, the function matches the profile. 244 bool functionMatchesProfile(Function &IRFunc, const FunctionId &ProfFunc, 245 bool FindMatchedProfileOnly); 246 // Find functions that don't show in the profile or profile symbol list, 247 // which are supposed to be new functions. We use them as the targets for 248 // call graph matching. 249 void findFunctionsWithoutProfile(); 250 void reportOrPersistProfileStats(); 251 }; 252 } // end namespace llvm 253 #endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEMATCHER_H 254