1 //===- MCPseudoProbe.h - Pseudo probe encoding support ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the declaration of the MCPseudoProbe to support the pseudo 10 // probe encoding for AutoFDO. Pseudo probes together with their inline context 11 // are encoded in a DFS recursive way in the .pseudoprobe sections. For each 12 // .pseudoprobe section, the encoded binary data consist of a single or mutiple 13 // function records each for one outlined function. A function record has the 14 // following format : 15 // 16 // FUNCTION BODY (one for each outlined function present in the text section) 17 // GUID (uint64) 18 // GUID of the function's source name which may be different from the 19 // actual binary linkage name. This GUID will be used to decode and 20 // generate a profile against the source function name. 21 // NPROBES (ULEB128) 22 // Number of probes originating from this function. 23 // NUM_INLINED_FUNCTIONS (ULEB128) 24 // Number of callees inlined into this function, aka number of 25 // first-level inlinees 26 // PROBE RECORDS 27 // A list of NPROBES entries. Each entry contains: 28 // INDEX (ULEB128) 29 // TYPE (uint4) 30 // 0 - block probe, 1 - indirect call, 2 - direct call 31 // ATTRIBUTE (uint3) 32 // 1 - reserved 33 // 2 - Sentinel 34 // 4 - HasDiscriminator 35 // ADDRESS_TYPE (uint1) 36 // 0 - code address for regular probes (for downwards compatibility) 37 // - GUID of linkage name for sentinel probes 38 // 1 - address delta 39 // CODE_ADDRESS (uint64 or ULEB128) 40 // code address or address delta, depending on ADDRESS_TYPE 41 // DISCRIMINATOR (ULEB128) if HasDiscriminator 42 // INLINED FUNCTION RECORDS 43 // A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined 44 // callees. Each record contains: 45 // INLINE SITE 46 // ID of the callsite probe (ULEB128) 47 // FUNCTION BODY 48 // A FUNCTION BODY entry describing the inlined function. 49 // 50 // TODO: retire the ADDRESS_TYPE encoding for code addresses once compatibility 51 // is no longer an issue. 52 //===----------------------------------------------------------------------===// 53 54 #ifndef LLVM_MC_MCPSEUDOPROBE_H 55 #define LLVM_MC_MCPSEUDOPROBE_H 56 57 #include "llvm/ADT/DenseMap.h" 58 #include "llvm/ADT/DenseSet.h" 59 #include "llvm/ADT/SmallVector.h" 60 #include "llvm/ADT/StringRef.h" 61 #include "llvm/IR/PseudoProbe.h" 62 #include "llvm/Support/ErrorOr.h" 63 #include <list> 64 #include <map> 65 #include <memory> 66 #include <string> 67 #include <tuple> 68 #include <type_traits> 69 #include <unordered_map> 70 #include <unordered_set> 71 #include <vector> 72 73 namespace llvm { 74 75 class MCSymbol; 76 class MCObjectStreamer; 77 class raw_ostream; 78 79 enum class MCPseudoProbeFlag { 80 // If set, indicates that the probe is encoded as an address delta 81 // instead of a real code address. 82 AddressDelta = 0x1, 83 }; 84 85 // Function descriptor decoded from .pseudo_probe_desc section 86 struct MCPseudoProbeFuncDesc { 87 uint64_t FuncGUID = 0; 88 uint64_t FuncHash = 0; 89 std::string FuncName; 90 MCPseudoProbeFuncDescMCPseudoProbeFuncDesc91 MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name) 92 : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){}; 93 94 void print(raw_ostream &OS); 95 }; 96 97 class MCDecodedPseudoProbe; 98 99 // An inline frame has the form <CalleeGuid, ProbeID> 100 using InlineSite = std::tuple<uint64_t, uint32_t>; 101 using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>; 102 // GUID to PseudoProbeFuncDesc map 103 using GUIDProbeFunctionMap = 104 std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>; 105 // Address to pseudo probes map. 106 using AddressProbesMap = std::map<uint64_t, std::list<MCDecodedPseudoProbe>>; 107 108 class MCDecodedPseudoProbeInlineTree; 109 110 class MCPseudoProbeBase { 111 protected: 112 uint64_t Guid; 113 uint64_t Index; 114 uint32_t Discriminator; 115 uint8_t Attributes; 116 uint8_t Type; 117 // The value should be equal to PseudoProbeReservedId::Last + 1 which is 118 // defined in SampleProfileProbe.h. The header file is not included here to 119 // reduce the dependency from MC to IPO. 120 const static uint32_t PseudoProbeFirstId = 1; 121 122 public: MCPseudoProbeBase(uint64_t G,uint64_t I,uint64_t At,uint8_t T,uint32_t D)123 MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T, uint32_t D) 124 : Guid(G), Index(I), Discriminator(D), Attributes(At), Type(T) {} 125 isEntry()126 bool isEntry() const { return Index == PseudoProbeFirstId; } 127 getGuid()128 uint64_t getGuid() const { return Guid; } 129 getIndex()130 uint64_t getIndex() const { return Index; } 131 getDiscriminator()132 uint32_t getDiscriminator() const { return Discriminator; } 133 getAttributes()134 uint8_t getAttributes() const { return Attributes; } 135 getType()136 uint8_t getType() const { return Type; } 137 isBlock()138 bool isBlock() const { 139 return Type == static_cast<uint8_t>(PseudoProbeType::Block); 140 } 141 isIndirectCall()142 bool isIndirectCall() const { 143 return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall); 144 } 145 isDirectCall()146 bool isDirectCall() const { 147 return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall); 148 } 149 isCall()150 bool isCall() const { return isIndirectCall() || isDirectCall(); } 151 setAttributes(uint8_t Attr)152 void setAttributes(uint8_t Attr) { Attributes = Attr; } 153 }; 154 155 /// Instances of this class represent a pseudo probe instance for a pseudo probe 156 /// table entry, which is created during a machine instruction is assembled and 157 /// uses an address from a temporary label created at the current address in the 158 /// current section. 159 class MCPseudoProbe : public MCPseudoProbeBase { 160 MCSymbol *Label; 161 162 public: MCPseudoProbe(MCSymbol * Label,uint64_t Guid,uint64_t Index,uint64_t Type,uint64_t Attributes,uint32_t Discriminator)163 MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type, 164 uint64_t Attributes, uint32_t Discriminator) 165 : MCPseudoProbeBase(Guid, Index, Attributes, Type, Discriminator), 166 Label(Label) { 167 assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8"); 168 assert(Attributes <= 0xFF && 169 "Probe attributes too big to encode, exceeding 2^16"); 170 } 171 getLabel()172 MCSymbol *getLabel() const { return Label; } 173 void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const; 174 }; 175 176 // Represents a callsite with caller function name and probe id 177 using MCPseudoProbeFrameLocation = std::pair<StringRef, uint32_t>; 178 179 class MCDecodedPseudoProbe : public MCPseudoProbeBase { 180 uint64_t Address; 181 MCDecodedPseudoProbeInlineTree *InlineTree; 182 183 public: MCDecodedPseudoProbe(uint64_t Ad,uint64_t G,uint32_t I,PseudoProbeType K,uint8_t At,uint32_t D,MCDecodedPseudoProbeInlineTree * Tree)184 MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K, 185 uint8_t At, uint32_t D, 186 MCDecodedPseudoProbeInlineTree *Tree) 187 : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K), D), Address(Ad), 188 InlineTree(Tree){}; 189 getAddress()190 uint64_t getAddress() const { return Address; } 191 setAddress(uint64_t Addr)192 void setAddress(uint64_t Addr) { Address = Addr; } 193 getInlineTreeNode()194 MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const { 195 return InlineTree; 196 } 197 198 // Get the inlined context by traversing current inline tree backwards, 199 // each tree node has its InlineSite which is taken as the context. 200 // \p ContextStack is populated in root to leaf order 201 void 202 getInlineContext(SmallVectorImpl<MCPseudoProbeFrameLocation> &ContextStack, 203 const GUIDProbeFunctionMap &GUID2FuncMAP) const; 204 205 // Helper function to get the string from context stack 206 std::string 207 getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const; 208 209 // Print pseudo probe while disassembling 210 void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP, 211 bool ShowName) const; 212 }; 213 214 template <typename ProbeType, typename DerivedProbeInlineTreeType> 215 class MCPseudoProbeInlineTreeBase { 216 struct InlineSiteHash { operatorInlineSiteHash217 uint64_t operator()(const InlineSite &Site) const { 218 return std::get<0>(Site) ^ std::get<1>(Site); 219 } 220 }; 221 222 protected: 223 // Track children (e.g. inlinees) of current context 224 using InlinedProbeTreeMap = std::unordered_map< 225 InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>; 226 InlinedProbeTreeMap Children; 227 // Set of probes that come with the function. 228 std::vector<ProbeType> Probes; MCPseudoProbeInlineTreeBase()229 MCPseudoProbeInlineTreeBase() { 230 static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase, 231 DerivedProbeInlineTreeType>::value, 232 "DerivedProbeInlineTreeType must be subclass of " 233 "MCPseudoProbeInlineTreeBase"); 234 } 235 236 public: 237 uint64_t Guid = 0; 238 239 // Root node has a GUID 0. isRoot()240 bool isRoot() const { return Guid == 0; } getChildren()241 InlinedProbeTreeMap &getChildren() { return Children; } getChildren()242 const InlinedProbeTreeMap &getChildren() const { return Children; } getProbes()243 std::vector<ProbeType> &getProbes() { return Probes; } addProbes(ProbeType Probe)244 void addProbes(ProbeType Probe) { Probes.push_back(Probe); } 245 // Caller node of the inline site 246 MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent = 247 nullptr; getOrAddNode(const InlineSite & Site)248 DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) { 249 auto Ret = Children.emplace( 250 Site, std::make_unique<DerivedProbeInlineTreeType>(Site)); 251 Ret.first->second->Parent = this; 252 return Ret.first->second.get(); 253 }; 254 }; 255 256 // A Tri-tree based data structure to group probes by inline stack. 257 // A tree is allocated for a standalone .text section. A fake 258 // instance is created as the root of a tree. 259 // A real instance of this class is created for each function, either a 260 // not inlined function that has code in .text section or an inlined function. 261 class MCPseudoProbeInlineTree 262 : public MCPseudoProbeInlineTreeBase<MCPseudoProbe, 263 MCPseudoProbeInlineTree> { 264 public: 265 MCPseudoProbeInlineTree() = default; MCPseudoProbeInlineTree(uint64_t Guid)266 MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; } MCPseudoProbeInlineTree(const InlineSite & Site)267 MCPseudoProbeInlineTree(const InlineSite &Site) { 268 this->Guid = std::get<0>(Site); 269 } 270 271 // MCPseudoProbeInlineTree method based on Inlinees 272 void addPseudoProbe(const MCPseudoProbe &Probe, 273 const MCPseudoProbeInlineStack &InlineStack); 274 void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe); 275 }; 276 277 // inline tree node for the decoded pseudo probe 278 class MCDecodedPseudoProbeInlineTree 279 : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *, 280 MCDecodedPseudoProbeInlineTree> { 281 public: 282 InlineSite ISite; 283 // Used for decoding 284 uint32_t ChildrenToProcess = 0; 285 286 MCDecodedPseudoProbeInlineTree() = default; MCDecodedPseudoProbeInlineTree(const InlineSite & Site)287 MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){}; 288 289 // Return false if it's a dummy inline site hasInlineSite()290 bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); } 291 }; 292 293 /// Instances of this class represent the pseudo probes inserted into a compile 294 /// unit. 295 class MCPseudoProbeSections { 296 public: addPseudoProbe(MCSymbol * FuncSym,const MCPseudoProbe & Probe,const MCPseudoProbeInlineStack & InlineStack)297 void addPseudoProbe(MCSymbol *FuncSym, const MCPseudoProbe &Probe, 298 const MCPseudoProbeInlineStack &InlineStack) { 299 MCProbeDivisions[FuncSym].addPseudoProbe(Probe, InlineStack); 300 } 301 302 // The addresses of MCPseudoProbeInlineTree are used by the tree structure and 303 // need to be stable. 304 using MCProbeDivisionMap = std::unordered_map<MCSymbol *, MCPseudoProbeInlineTree>; 305 306 private: 307 // A collection of MCPseudoProbe for each function. The MCPseudoProbes are 308 // grouped by GUIDs due to inlining that can bring probes from different 309 // functions into one function. 310 MCProbeDivisionMap MCProbeDivisions; 311 312 public: getMCProbes()313 const MCProbeDivisionMap &getMCProbes() const { return MCProbeDivisions; } 314 empty()315 bool empty() const { return MCProbeDivisions.empty(); } 316 317 void emit(MCObjectStreamer *MCOS); 318 }; 319 320 class MCPseudoProbeTable { 321 // A collection of MCPseudoProbe in the current module grouped by 322 // functions. MCPseudoProbes will be encoded into a corresponding 323 // .pseudoprobe section. With functions emitted as separate comdats, 324 // a text section really only contains the code of a function solely, and the 325 // probes associated with the text section will be emitted into a standalone 326 // .pseudoprobe section that shares the same comdat group with the function. 327 MCPseudoProbeSections MCProbeSections; 328 329 public: 330 static void emit(MCObjectStreamer *MCOS); 331 getProbeSections()332 MCPseudoProbeSections &getProbeSections() { return MCProbeSections; } 333 334 #ifndef NDEBUG 335 static int DdgPrintIndent; 336 #endif 337 }; 338 339 class MCPseudoProbeDecoder { 340 // GUID to PseudoProbeFuncDesc map. 341 GUIDProbeFunctionMap GUID2FuncDescMap; 342 343 // Address to probes map. 344 AddressProbesMap Address2ProbesMap; 345 346 // The dummy root of the inline trie, all the outlined function will directly 347 // be the children of the dummy root, all the inlined function will be the 348 // children of its inlineer. So the relation would be like: 349 // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2 350 MCDecodedPseudoProbeInlineTree DummyInlineRoot; 351 352 /// Points to the current location in the buffer. 353 const uint8_t *Data = nullptr; 354 355 /// Points to the end of the buffer. 356 const uint8_t *End = nullptr; 357 358 /// Whether encoding is based on a starting probe with absolute code address. 359 bool EncodingIsAddrBased = false; 360 361 // Decoding helper function 362 template <typename T> ErrorOr<T> readUnencodedNumber(); 363 template <typename T> ErrorOr<T> readUnsignedNumber(); 364 template <typename T> ErrorOr<T> readSignedNumber(); 365 ErrorOr<StringRef> readString(uint32_t Size); 366 367 public: 368 using Uint64Set = DenseSet<uint64_t>; 369 using Uint64Map = DenseMap<uint64_t, uint64_t>; 370 371 // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map. 372 bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size); 373 374 // Decode pseudo_probe section to build address to probes map for specifed 375 // functions only. 376 bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size, 377 const Uint64Set &GuildFilter, 378 const Uint64Map &FuncStartAddrs); 379 380 bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, 381 uint64_t &LastAddr, const Uint64Set &GuildFilter, 382 const Uint64Map &FuncStartAddrs); 383 384 // Print pseudo_probe_desc section info 385 void printGUID2FuncDescMap(raw_ostream &OS); 386 387 // Print pseudo_probe section info, used along with show-disassembly 388 void printProbeForAddress(raw_ostream &OS, uint64_t Address); 389 390 // do printProbeForAddress for all addresses 391 void printProbesForAllAddresses(raw_ostream &OS); 392 393 // Look up the probe of a call for the input address 394 const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const; 395 396 const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const; 397 398 // Helper function to populate one probe's inline stack into 399 // \p InlineContextStack. 400 // Current leaf location info will be added if IncludeLeaf is true 401 // Example: 402 // Current probe(bar:3) inlined at foo:2 then inlined at main:1 403 // IncludeLeaf = true, Output: [main:1, foo:2, bar:3] 404 // IncludeLeaf = false, Output: [main:1, foo:2] 405 void getInlineContextForProbe( 406 const MCDecodedPseudoProbe *Probe, 407 SmallVectorImpl<MCPseudoProbeFrameLocation> &InlineContextStack, 408 bool IncludeLeaf) const; 409 getAddress2ProbesMap()410 const AddressProbesMap &getAddress2ProbesMap() const { 411 return Address2ProbesMap; 412 } 413 getAddress2ProbesMap()414 AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; } 415 getGUID2FuncDescMap()416 const GUIDProbeFunctionMap &getGUID2FuncDescMap() const { 417 return GUID2FuncDescMap; 418 } 419 420 const MCPseudoProbeFuncDesc * 421 getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const; 422 getDummyInlineRoot()423 const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const { 424 return DummyInlineRoot; 425 } 426 }; 427 428 } // end namespace llvm 429 430 #endif // LLVM_MC_MCPSEUDOPROBE_H 431