1 //===- CallGraph.h - Build a Module's call graph ----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file provides interfaces used to build and manipulate a call graph, 11 /// which is a very useful tool for interprocedural optimization. 12 /// 13 /// Every function in a module is represented as a node in the call graph. The 14 /// callgraph node keeps track of which functions are called by the function 15 /// corresponding to the node. 16 /// 17 /// A call graph may contain nodes where the function that they correspond to 18 /// is null. These 'external' nodes are used to represent control flow that is 19 /// not represented (or analyzable) in the module. In particular, this 20 /// analysis builds one external node such that: 21 /// 1. All functions in the module without internal linkage will have edges 22 /// from this external node, indicating that they could be called by 23 /// functions outside of the module. 24 /// 2. All functions whose address is used for something more than a direct 25 /// call, for example being stored into a memory location will also have 26 /// an edge from this external node. Since they may be called by an 27 /// unknown caller later, they must be tracked as such. 28 /// 29 /// There is a second external node added for calls that leave this module. 30 /// Functions have a call edge to the external node iff: 31 /// 1. The function is external, reflecting the fact that they could call 32 /// anything without internal linkage or that has its address taken. 33 /// 2. The function contains an indirect function call. 34 /// 35 /// As an extension in the future, there may be multiple nodes with a null 36 /// function. These will be used when we can prove (through pointer analysis) 37 /// that an indirect call site can call only a specific set of functions. 38 /// 39 /// Because of these properties, the CallGraph captures a conservative superset 40 /// of all of the caller-callee relationships, which is useful for 41 /// transformations. 42 /// 43 //===----------------------------------------------------------------------===// 44 45 #ifndef LLVM_ANALYSIS_CALLGRAPH_H 46 #define LLVM_ANALYSIS_CALLGRAPH_H 47 48 #include "llvm/IR/InstrTypes.h" 49 #include "llvm/IR/Intrinsics.h" 50 #include "llvm/IR/PassManager.h" 51 #include "llvm/IR/ValueHandle.h" 52 #include "llvm/Pass.h" 53 #include <cassert> 54 #include <map> 55 #include <memory> 56 #include <utility> 57 #include <vector> 58 59 namespace llvm { 60 61 template <class GraphType> struct GraphTraits; 62 class CallGraphNode; 63 class Function; 64 class Module; 65 class raw_ostream; 66 67 /// The basic data container for the call graph of a \c Module of IR. 68 /// 69 /// This class exposes both the interface to the call graph for a module of IR. 70 /// 71 /// The core call graph itself can also be updated to reflect changes to the IR. 72 class CallGraph { 73 Module &M; 74 75 using FunctionMapTy = 76 std::map<const Function *, std::unique_ptr<CallGraphNode>>; 77 78 /// A map from \c Function* to \c CallGraphNode*. 79 FunctionMapTy FunctionMap; 80 81 /// This node has edges to all external functions and those internal 82 /// functions that have their address taken. 83 CallGraphNode *ExternalCallingNode; 84 85 /// This node has edges to it from all functions making indirect calls 86 /// or calling an external function. 87 std::unique_ptr<CallGraphNode> CallsExternalNode; 88 89 public: 90 explicit CallGraph(Module &M); 91 CallGraph(CallGraph &&Arg); 92 ~CallGraph(); 93 94 void print(raw_ostream &OS) const; 95 void dump() const; 96 97 using iterator = FunctionMapTy::iterator; 98 using const_iterator = FunctionMapTy::const_iterator; 99 100 /// Returns the module the call graph corresponds to. 101 Module &getModule() const { return M; } 102 103 bool invalidate(Module &, const PreservedAnalyses &PA, 104 ModuleAnalysisManager::Invalidator &); 105 106 inline iterator begin() { return FunctionMap.begin(); } 107 inline iterator end() { return FunctionMap.end(); } 108 inline const_iterator begin() const { return FunctionMap.begin(); } 109 inline const_iterator end() const { return FunctionMap.end(); } 110 111 /// Returns the call graph node for the provided function. 112 inline const CallGraphNode *operator[](const Function *F) const { 113 const_iterator I = FunctionMap.find(F); 114 assert(I != FunctionMap.end() && "Function not in callgraph!"); 115 return I->second.get(); 116 } 117 118 /// Returns the call graph node for the provided function. 119 inline CallGraphNode *operator[](const Function *F) { 120 const_iterator I = FunctionMap.find(F); 121 assert(I != FunctionMap.end() && "Function not in callgraph!"); 122 return I->second.get(); 123 } 124 125 /// Returns the \c CallGraphNode which is used to represent 126 /// undetermined calls into the callgraph. 127 CallGraphNode *getExternalCallingNode() const { return ExternalCallingNode; } 128 129 CallGraphNode *getCallsExternalNode() const { 130 return CallsExternalNode.get(); 131 } 132 133 /// Old node has been deleted, and New is to be used in its place, update the 134 /// ExternalCallingNode. 135 void ReplaceExternalCallEdge(CallGraphNode *Old, CallGraphNode *New); 136 137 //===--------------------------------------------------------------------- 138 // Functions to keep a call graph up to date with a function that has been 139 // modified. 140 // 141 142 /// Unlink the function from this module, returning it. 143 /// 144 /// Because this removes the function from the module, the call graph node is 145 /// destroyed. This is only valid if the function does not call any other 146 /// functions (ie, there are no edges in it's CGN). The easiest way to do 147 /// this is to dropAllReferences before calling this. 148 Function *removeFunctionFromModule(CallGraphNode *CGN); 149 150 /// Similar to operator[], but this will insert a new CallGraphNode for 151 /// \c F if one does not already exist. 152 CallGraphNode *getOrInsertFunction(const Function *F); 153 154 /// Populate \p CGN based on the calls inside the associated function. 155 void populateCallGraphNode(CallGraphNode *CGN); 156 157 /// Add a function to the call graph, and link the node to all of the 158 /// functions that it calls. 159 void addToCallGraph(Function *F); 160 }; 161 162 /// A node in the call graph for a module. 163 /// 164 /// Typically represents a function in the call graph. There are also special 165 /// "null" nodes used to represent theoretical entries in the call graph. 166 class CallGraphNode { 167 public: 168 /// A pair of the calling instruction (a call or invoke) 169 /// and the call graph node being called. 170 /// Call graph node may have two types of call records which represent an edge 171 /// in the call graph - reference or a call edge. Reference edges are not 172 /// associated with any call instruction and are created with the first field 173 /// set to `None`, while real call edges have instruction address in this 174 /// field. Therefore, all real call edges are expected to have a value in the 175 /// first field and it is not supposed to be `nullptr`. 176 /// Reference edges, for example, are used for connecting broker function 177 /// caller to the callback function for callback call sites. 178 using CallRecord = std::pair<std::optional<WeakTrackingVH>, CallGraphNode *>; 179 180 public: 181 using CalledFunctionsVector = std::vector<CallRecord>; 182 183 /// Creates a node for the specified function. 184 inline CallGraphNode(CallGraph *CG, Function *F) : CG(CG), F(F) {} 185 186 CallGraphNode(const CallGraphNode &) = delete; 187 CallGraphNode &operator=(const CallGraphNode &) = delete; 188 189 ~CallGraphNode() { 190 assert(NumReferences == 0 && "Node deleted while references remain"); 191 } 192 193 using iterator = std::vector<CallRecord>::iterator; 194 using const_iterator = std::vector<CallRecord>::const_iterator; 195 196 /// Returns the function that this call graph node represents. 197 Function *getFunction() const { return F; } 198 199 inline iterator begin() { return CalledFunctions.begin(); } 200 inline iterator end() { return CalledFunctions.end(); } 201 inline const_iterator begin() const { return CalledFunctions.begin(); } 202 inline const_iterator end() const { return CalledFunctions.end(); } 203 inline bool empty() const { return CalledFunctions.empty(); } 204 inline unsigned size() const { return (unsigned)CalledFunctions.size(); } 205 206 /// Returns the number of other CallGraphNodes in this CallGraph that 207 /// reference this node in their callee list. 208 unsigned getNumReferences() const { return NumReferences; } 209 210 /// Returns the i'th called function. 211 CallGraphNode *operator[](unsigned i) const { 212 assert(i < CalledFunctions.size() && "Invalid index"); 213 return CalledFunctions[i].second; 214 } 215 216 /// Print out this call graph node. 217 void dump() const; 218 void print(raw_ostream &OS) const; 219 220 //===--------------------------------------------------------------------- 221 // Methods to keep a call graph up to date with a function that has been 222 // modified 223 // 224 225 /// Removes all edges from this CallGraphNode to any functions it 226 /// calls. 227 void removeAllCalledFunctions() { 228 while (!CalledFunctions.empty()) { 229 CalledFunctions.back().second->DropRef(); 230 CalledFunctions.pop_back(); 231 } 232 } 233 234 /// Moves all the callee information from N to this node. 235 void stealCalledFunctionsFrom(CallGraphNode *N) { 236 assert(CalledFunctions.empty() && 237 "Cannot steal callsite information if I already have some"); 238 std::swap(CalledFunctions, N->CalledFunctions); 239 } 240 241 /// Adds a function to the list of functions called by this one. 242 void addCalledFunction(CallBase *Call, CallGraphNode *M) { 243 CalledFunctions.emplace_back(Call ? std::optional<WeakTrackingVH>(Call) 244 : std::optional<WeakTrackingVH>(), 245 M); 246 M->AddRef(); 247 } 248 249 void removeCallEdge(iterator I) { 250 I->second->DropRef(); 251 *I = CalledFunctions.back(); 252 CalledFunctions.pop_back(); 253 } 254 255 /// Removes the edge in the node for the specified call site. 256 /// 257 /// Note that this method takes linear time, so it should be used sparingly. 258 void removeCallEdgeFor(CallBase &Call); 259 260 /// Removes all call edges from this node to the specified callee 261 /// function. 262 /// 263 /// This takes more time to execute than removeCallEdgeTo, so it should not 264 /// be used unless necessary. 265 void removeAnyCallEdgeTo(CallGraphNode *Callee); 266 267 /// Removes one edge associated with a null callsite from this node to 268 /// the specified callee function. 269 void removeOneAbstractEdgeTo(CallGraphNode *Callee); 270 271 /// Replaces the edge in the node for the specified call site with a 272 /// new one. 273 /// 274 /// Note that this method takes linear time, so it should be used sparingly. 275 void replaceCallEdge(CallBase &Call, CallBase &NewCall, 276 CallGraphNode *NewNode); 277 278 private: 279 friend class CallGraph; 280 281 CallGraph *CG; 282 Function *F; 283 284 std::vector<CallRecord> CalledFunctions; 285 286 /// The number of times that this CallGraphNode occurs in the 287 /// CalledFunctions array of this or other CallGraphNodes. 288 unsigned NumReferences = 0; 289 290 void DropRef() { --NumReferences; } 291 void AddRef() { ++NumReferences; } 292 293 /// A special function that should only be used by the CallGraph class. 294 void allReferencesDropped() { NumReferences = 0; } 295 }; 296 297 /// An analysis pass to compute the \c CallGraph for a \c Module. 298 /// 299 /// This class implements the concept of an analysis pass used by the \c 300 /// ModuleAnalysisManager to run an analysis over a module and cache the 301 /// resulting data. 302 class CallGraphAnalysis : public AnalysisInfoMixin<CallGraphAnalysis> { 303 friend AnalysisInfoMixin<CallGraphAnalysis>; 304 305 static AnalysisKey Key; 306 307 public: 308 /// A formulaic type to inform clients of the result type. 309 using Result = CallGraph; 310 311 /// Compute the \c CallGraph for the module \c M. 312 /// 313 /// The real work here is done in the \c CallGraph constructor. 314 CallGraph run(Module &M, ModuleAnalysisManager &) { return CallGraph(M); } 315 }; 316 317 /// Printer pass for the \c CallGraphAnalysis results. 318 class CallGraphPrinterPass : public PassInfoMixin<CallGraphPrinterPass> { 319 raw_ostream &OS; 320 321 public: 322 explicit CallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} 323 324 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 325 326 static bool isRequired() { return true; } 327 }; 328 329 /// Printer pass for the summarized \c CallGraphAnalysis results. 330 class CallGraphSCCsPrinterPass 331 : public PassInfoMixin<CallGraphSCCsPrinterPass> { 332 raw_ostream &OS; 333 334 public: 335 explicit CallGraphSCCsPrinterPass(raw_ostream &OS) : OS(OS) {} 336 337 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 338 339 static bool isRequired() { return true; } 340 }; 341 342 /// The \c ModulePass which wraps up a \c CallGraph and the logic to 343 /// build it. 344 /// 345 /// This class exposes both the interface to the call graph container and the 346 /// module pass which runs over a module of IR and produces the call graph. The 347 /// call graph interface is entirelly a wrapper around a \c CallGraph object 348 /// which is stored internally for each module. 349 class CallGraphWrapperPass : public ModulePass { 350 std::unique_ptr<CallGraph> G; 351 352 public: 353 static char ID; // Class identification, replacement for typeinfo 354 355 CallGraphWrapperPass(); 356 ~CallGraphWrapperPass() override; 357 358 /// The internal \c CallGraph around which the rest of this interface 359 /// is wrapped. 360 const CallGraph &getCallGraph() const { return *G; } 361 CallGraph &getCallGraph() { return *G; } 362 363 using iterator = CallGraph::iterator; 364 using const_iterator = CallGraph::const_iterator; 365 366 /// Returns the module the call graph corresponds to. 367 Module &getModule() const { return G->getModule(); } 368 369 inline iterator begin() { return G->begin(); } 370 inline iterator end() { return G->end(); } 371 inline const_iterator begin() const { return G->begin(); } 372 inline const_iterator end() const { return G->end(); } 373 374 /// Returns the call graph node for the provided function. 375 inline const CallGraphNode *operator[](const Function *F) const { 376 return (*G)[F]; 377 } 378 379 /// Returns the call graph node for the provided function. 380 inline CallGraphNode *operator[](const Function *F) { return (*G)[F]; } 381 382 /// Returns the \c CallGraphNode which is used to represent 383 /// undetermined calls into the callgraph. 384 CallGraphNode *getExternalCallingNode() const { 385 return G->getExternalCallingNode(); 386 } 387 388 CallGraphNode *getCallsExternalNode() const { 389 return G->getCallsExternalNode(); 390 } 391 392 //===--------------------------------------------------------------------- 393 // Functions to keep a call graph up to date with a function that has been 394 // modified. 395 // 396 397 /// Unlink the function from this module, returning it. 398 /// 399 /// Because this removes the function from the module, the call graph node is 400 /// destroyed. This is only valid if the function does not call any other 401 /// functions (ie, there are no edges in it's CGN). The easiest way to do 402 /// this is to dropAllReferences before calling this. 403 Function *removeFunctionFromModule(CallGraphNode *CGN) { 404 return G->removeFunctionFromModule(CGN); 405 } 406 407 /// Similar to operator[], but this will insert a new CallGraphNode for 408 /// \c F if one does not already exist. 409 CallGraphNode *getOrInsertFunction(const Function *F) { 410 return G->getOrInsertFunction(F); 411 } 412 413 //===--------------------------------------------------------------------- 414 // Implementation of the ModulePass interface needed here. 415 // 416 417 void getAnalysisUsage(AnalysisUsage &AU) const override; 418 bool runOnModule(Module &M) override; 419 void releaseMemory() override; 420 421 void print(raw_ostream &o, const Module *) const override; 422 void dump() const; 423 }; 424 425 //===----------------------------------------------------------------------===// 426 // GraphTraits specializations for call graphs so that they can be treated as 427 // graphs by the generic graph algorithms. 428 // 429 430 // Provide graph traits for traversing call graphs using standard graph 431 // traversals. 432 template <> struct GraphTraits<CallGraphNode *> { 433 using NodeRef = CallGraphNode *; 434 using CGNPairTy = CallGraphNode::CallRecord; 435 436 static NodeRef getEntryNode(CallGraphNode *CGN) { return CGN; } 437 static CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; } 438 439 using ChildIteratorType = 440 mapped_iterator<CallGraphNode::iterator, decltype(&CGNGetValue)>; 441 442 static ChildIteratorType child_begin(NodeRef N) { 443 return ChildIteratorType(N->begin(), &CGNGetValue); 444 } 445 446 static ChildIteratorType child_end(NodeRef N) { 447 return ChildIteratorType(N->end(), &CGNGetValue); 448 } 449 }; 450 451 template <> struct GraphTraits<const CallGraphNode *> { 452 using NodeRef = const CallGraphNode *; 453 using CGNPairTy = CallGraphNode::CallRecord; 454 using EdgeRef = const CallGraphNode::CallRecord &; 455 456 static NodeRef getEntryNode(const CallGraphNode *CGN) { return CGN; } 457 static const CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; } 458 459 using ChildIteratorType = 460 mapped_iterator<CallGraphNode::const_iterator, decltype(&CGNGetValue)>; 461 using ChildEdgeIteratorType = CallGraphNode::const_iterator; 462 463 static ChildIteratorType child_begin(NodeRef N) { 464 return ChildIteratorType(N->begin(), &CGNGetValue); 465 } 466 467 static ChildIteratorType child_end(NodeRef N) { 468 return ChildIteratorType(N->end(), &CGNGetValue); 469 } 470 471 static ChildEdgeIteratorType child_edge_begin(NodeRef N) { 472 return N->begin(); 473 } 474 static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } 475 476 static NodeRef edge_dest(EdgeRef E) { return E.second; } 477 }; 478 479 template <> 480 struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> { 481 using PairTy = 482 std::pair<const Function *const, std::unique_ptr<CallGraphNode>>; 483 484 static NodeRef getEntryNode(CallGraph *CGN) { 485 return CGN->getExternalCallingNode(); // Start at the external node! 486 } 487 488 static CallGraphNode *CGGetValuePtr(const PairTy &P) { 489 return P.second.get(); 490 } 491 492 // nodes_iterator/begin/end - Allow iteration over all nodes in the graph 493 using nodes_iterator = 494 mapped_iterator<CallGraph::iterator, decltype(&CGGetValuePtr)>; 495 496 static nodes_iterator nodes_begin(CallGraph *CG) { 497 return nodes_iterator(CG->begin(), &CGGetValuePtr); 498 } 499 500 static nodes_iterator nodes_end(CallGraph *CG) { 501 return nodes_iterator(CG->end(), &CGGetValuePtr); 502 } 503 }; 504 505 template <> 506 struct GraphTraits<const CallGraph *> : public GraphTraits< 507 const CallGraphNode *> { 508 using PairTy = 509 std::pair<const Function *const, std::unique_ptr<CallGraphNode>>; 510 511 static NodeRef getEntryNode(const CallGraph *CGN) { 512 return CGN->getExternalCallingNode(); // Start at the external node! 513 } 514 515 static const CallGraphNode *CGGetValuePtr(const PairTy &P) { 516 return P.second.get(); 517 } 518 519 // nodes_iterator/begin/end - Allow iteration over all nodes in the graph 520 using nodes_iterator = 521 mapped_iterator<CallGraph::const_iterator, decltype(&CGGetValuePtr)>; 522 523 static nodes_iterator nodes_begin(const CallGraph *CG) { 524 return nodes_iterator(CG->begin(), &CGGetValuePtr); 525 } 526 527 static nodes_iterator nodes_end(const CallGraph *CG) { 528 return nodes_iterator(CG->end(), &CGGetValuePtr); 529 } 530 }; 531 532 } // end namespace llvm 533 534 #endif // LLVM_ANALYSIS_CALLGRAPH_H 535