1 //===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Our algorithm works by first identifying a subset of nodes that must always 10 // be instrumented. We call these nodes ambiguous because knowing the coverage 11 // of all remaining nodes is not enough to infer their coverage status. 12 // 13 // In general a node v is ambiguous if there exists two entry-to-terminal paths 14 // P_1 and P_2 such that: 15 // 1. v not in P_1 but P_1 visits a predecessor of v, and 16 // 2. v not in P_2 but P_2 visits a successor of v. 17 // 18 // If a node v is not ambiguous, then if condition 1 fails, we can infer v’s 19 // coverage from the coverage of its predecessors, or if condition 2 fails, we 20 // can infer v’s coverage from the coverage of its successors. 21 // 22 // Sadly, there are example CFGs where it is not possible to infer all nodes 23 // from the ambiguous nodes alone. Our algorithm selects a minimum number of 24 // extra nodes to add to the ambiguous nodes to form a valid instrumentation S. 25 // 26 // Details on this algorithm can be found in https://arxiv.org/abs/2208.13907 27 // 28 //===----------------------------------------------------------------------===// 29 30 #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" 31 #include "llvm/ADT/DepthFirstIterator.h" 32 #include "llvm/ADT/Statistic.h" 33 #include "llvm/Support/CRC.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/GraphWriter.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "pgo-block-coverage" 42 43 STATISTIC(NumFunctions, "Number of total functions that BCI has processed"); 44 STATISTIC(NumIneligibleFunctions, 45 "Number of functions for which BCI cannot run on"); 46 STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed"); 47 STATISTIC(NumInstrumentedBlocks, 48 "Number of basic blocks instrumented for coverage"); 49 50 BlockCoverageInference::BlockCoverageInference(const Function &F, 51 bool ForceInstrumentEntry) 52 : F(F), ForceInstrumentEntry(ForceInstrumentEntry) { 53 findDependencies(); 54 assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock())); 55 56 ++NumFunctions; 57 for (auto &BB : F) { 58 ++NumBlocks; 59 if (shouldInstrumentBlock(BB)) 60 ++NumInstrumentedBlocks; 61 } 62 } 63 64 BlockCoverageInference::BlockSet 65 BlockCoverageInference::getDependencies(const BasicBlock &BB) const { 66 assert(BB.getParent() == &F); 67 BlockSet Dependencies; 68 auto It = PredecessorDependencies.find(&BB); 69 if (It != PredecessorDependencies.end()) 70 Dependencies.set_union(It->second); 71 It = SuccessorDependencies.find(&BB); 72 if (It != SuccessorDependencies.end()) 73 Dependencies.set_union(It->second); 74 return Dependencies; 75 } 76 77 uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const { 78 JamCRC JC; 79 uint64_t Index = 0; 80 for (auto &BB : F) { 81 if (shouldInstrumentBlock(BB)) { 82 uint8_t Data[8]; 83 support::endian::write64le(Data, Index); 84 JC.update(Data); 85 } 86 Index++; 87 } 88 return JC.getCRC(); 89 } 90 91 bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const { 92 assert(BB.getParent() == &F); 93 auto It = PredecessorDependencies.find(&BB); 94 if (It != PredecessorDependencies.end() && It->second.size()) 95 return false; 96 It = SuccessorDependencies.find(&BB); 97 if (It != SuccessorDependencies.end() && It->second.size()) 98 return false; 99 return true; 100 } 101 102 void BlockCoverageInference::findDependencies() { 103 assert(PredecessorDependencies.empty() && SuccessorDependencies.empty()); 104 // Empirical analysis shows that this algorithm finishes within 5 seconds for 105 // functions with fewer than 1.5K blocks. 106 if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) { 107 ++NumIneligibleFunctions; 108 return; 109 } 110 111 SmallVector<const BasicBlock *, 4> TerminalBlocks; 112 for (auto &BB : F) 113 if (succ_empty(&BB)) 114 TerminalBlocks.push_back(&BB); 115 116 // Traverse the CFG backwards from the terminal blocks to make sure every 117 // block can reach some terminal block. Otherwise this algorithm will not work 118 // and we must fall back to instrumenting every block. 119 df_iterator_default_set<const BasicBlock *> Visited; 120 for (auto *BB : TerminalBlocks) 121 for (auto *N : inverse_depth_first_ext(BB, Visited)) 122 (void)N; 123 if (F.size() != Visited.size()) { 124 ++NumIneligibleFunctions; 125 return; 126 } 127 128 // The current implementation for computing `PredecessorDependencies` and 129 // `SuccessorDependencies` runs in quadratic time with respect to the number 130 // of basic blocks. While we do have a more complicated linear time algorithm 131 // in https://arxiv.org/abs/2208.13907 we do not know if it will give a 132 // significant speedup in practice given that most functions tend to be 133 // relatively small in size for intended use cases. 134 auto &EntryBlock = F.getEntryBlock(); 135 for (auto &BB : F) { 136 // The set of blocks that are reachable while avoiding BB. 137 BlockSet ReachableFromEntry, ReachableFromTerminal; 138 getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true, 139 ReachableFromEntry); 140 for (auto *TerminalBlock : TerminalBlocks) 141 getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false, 142 ReachableFromTerminal); 143 144 auto Preds = predecessors(&BB); 145 bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) { 146 return ReachableFromEntry.count(Pred) && 147 ReachableFromTerminal.count(Pred); 148 }); 149 if (!HasSuperReachablePred) 150 for (auto *Pred : Preds) 151 if (ReachableFromEntry.count(Pred)) 152 PredecessorDependencies[&BB].insert(Pred); 153 154 auto Succs = successors(&BB); 155 bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) { 156 return ReachableFromEntry.count(Succ) && 157 ReachableFromTerminal.count(Succ); 158 }); 159 if (!HasSuperReachableSucc) 160 for (auto *Succ : Succs) 161 if (ReachableFromTerminal.count(Succ)) 162 SuccessorDependencies[&BB].insert(Succ); 163 } 164 165 if (ForceInstrumentEntry) { 166 // Force the entry block to be instrumented by clearing the blocks it can 167 // infer coverage from. 168 PredecessorDependencies[&EntryBlock].clear(); 169 SuccessorDependencies[&EntryBlock].clear(); 170 } 171 172 // Construct a graph where blocks are connected if there is a mutual 173 // dependency between them. This graph has a special property that it contains 174 // only paths. 175 DenseMap<const BasicBlock *, BlockSet> AdjacencyList; 176 for (auto &BB : F) { 177 for (auto *Succ : successors(&BB)) { 178 if (SuccessorDependencies[&BB].count(Succ) && 179 PredecessorDependencies[Succ].count(&BB)) { 180 AdjacencyList[&BB].insert(Succ); 181 AdjacencyList[Succ].insert(&BB); 182 } 183 } 184 } 185 186 // Given a path with at least one node, return the next node on the path. 187 auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * { 188 assert(Path.size()); 189 auto &Neighbors = AdjacencyList[Path.back()]; 190 if (Path.size() == 1) { 191 // This is the first node on the path, return its neighbor. 192 assert(Neighbors.size() == 1); 193 return Neighbors.front(); 194 } else if (Neighbors.size() == 2) { 195 // This is the middle of the path, find the neighbor that is not on the 196 // path already. 197 assert(Path.size() >= 2); 198 return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0]; 199 } 200 // This is the end of the path. 201 assert(Neighbors.size() == 1); 202 return nullptr; 203 }; 204 205 // Remove all cycles in the inferencing graph. 206 for (auto &BB : F) { 207 if (AdjacencyList[&BB].size() == 1) { 208 // We found the head of some path. 209 BlockSet Path; 210 Path.insert(&BB); 211 while (const BasicBlock *Next = getNextOnPath(Path)) 212 Path.insert(Next); 213 LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n"); 214 215 // Remove these nodes from the graph so we don't discover this path again. 216 for (auto *BB : Path) 217 AdjacencyList[BB].clear(); 218 219 // Finally, remove the cycles. 220 if (PredecessorDependencies[Path.front()].size()) { 221 for (auto *BB : Path) 222 if (BB != Path.back()) 223 SuccessorDependencies[BB].clear(); 224 } else { 225 for (auto *BB : Path) 226 if (BB != Path.front()) 227 PredecessorDependencies[BB].clear(); 228 } 229 } 230 } 231 LLVM_DEBUG(dump(dbgs())); 232 } 233 234 void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start, 235 const BasicBlock &Avoid, 236 bool IsForward, 237 BlockSet &Reachable) const { 238 df_iterator_default_set<const BasicBlock *> Visited; 239 Visited.insert(&Avoid); 240 if (IsForward) { 241 auto Range = depth_first_ext(&Start, Visited); 242 Reachable.insert(Range.begin(), Range.end()); 243 } else { 244 auto Range = inverse_depth_first_ext(&Start, Visited); 245 Reachable.insert(Range.begin(), Range.end()); 246 } 247 } 248 249 namespace llvm { 250 class DotFuncBCIInfo { 251 private: 252 const BlockCoverageInference *BCI; 253 const DenseMap<const BasicBlock *, bool> *Coverage; 254 255 public: 256 DotFuncBCIInfo(const BlockCoverageInference *BCI, 257 const DenseMap<const BasicBlock *, bool> *Coverage) 258 : BCI(BCI), Coverage(Coverage) {} 259 260 const Function &getFunction() { return BCI->F; } 261 262 bool isInstrumented(const BasicBlock *BB) const { 263 return BCI->shouldInstrumentBlock(*BB); 264 } 265 266 bool isCovered(const BasicBlock *BB) const { 267 return Coverage && Coverage->lookup(BB); 268 } 269 270 bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const { 271 return BCI->getDependencies(*Src).count(Dest); 272 } 273 }; 274 275 template <> 276 struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> { 277 static NodeRef getEntryNode(DotFuncBCIInfo *Info) { 278 return &(Info->getFunction().getEntryBlock()); 279 } 280 281 // nodes_iterator/begin/end - Allow iteration over all nodes in the graph 282 using nodes_iterator = pointer_iterator<Function::const_iterator>; 283 284 static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) { 285 return nodes_iterator(Info->getFunction().begin()); 286 } 287 288 static nodes_iterator nodes_end(DotFuncBCIInfo *Info) { 289 return nodes_iterator(Info->getFunction().end()); 290 } 291 292 static size_t size(DotFuncBCIInfo *Info) { 293 return Info->getFunction().size(); 294 } 295 }; 296 297 template <> 298 struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits { 299 300 DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} 301 302 static std::string getGraphName(DotFuncBCIInfo *Info) { 303 return "BCI CFG for " + Info->getFunction().getName().str(); 304 } 305 306 std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) { 307 return Node->getName().str(); 308 } 309 310 std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I, 311 DotFuncBCIInfo *Info) { 312 const BasicBlock *Dest = *I; 313 if (Info->isDependent(Src, Dest)) 314 return "color=red"; 315 if (Info->isDependent(Dest, Src)) 316 return "color=blue"; 317 return ""; 318 } 319 320 std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) { 321 std::string Result; 322 if (Info->isInstrumented(Node)) 323 Result += "style=filled,fillcolor=gray"; 324 if (Info->isCovered(Node)) 325 Result += std::string(Result.empty() ? "" : ",") + "color=red"; 326 return Result; 327 } 328 }; 329 330 } // namespace llvm 331 332 void BlockCoverageInference::viewBlockCoverageGraph( 333 const DenseMap<const BasicBlock *, bool> *Coverage) const { 334 DotFuncBCIInfo Info(this, Coverage); 335 WriteGraph(&Info, "BCI", false, 336 "Block Coverage Inference for " + F.getName()); 337 } 338 339 void BlockCoverageInference::dump(raw_ostream &OS) const { 340 OS << "Minimal block coverage for function \'" << F.getName() 341 << "\' (Instrumented=*)\n"; 342 for (auto &BB : F) { 343 OS << (shouldInstrumentBlock(BB) ? "* " : " ") << BB.getName() << "\n"; 344 auto It = PredecessorDependencies.find(&BB); 345 if (It != PredecessorDependencies.end() && It->second.size()) 346 OS << " PredDeps = " << getBlockNames(It->second) << "\n"; 347 It = SuccessorDependencies.find(&BB); 348 if (It != SuccessorDependencies.end() && It->second.size()) 349 OS << " SuccDeps = " << getBlockNames(It->second) << "\n"; 350 } 351 OS << " Instrumented Blocks Hash = 0x" 352 << Twine::utohexstr(getInstrumentedBlocksHash()) << "\n"; 353 } 354 355 std::string 356 BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) { 357 std::string Result; 358 raw_string_ostream OS(Result); 359 OS << "["; 360 if (!BBs.empty()) { 361 OS << BBs.front()->getName(); 362 BBs = BBs.drop_front(); 363 } 364 for (auto *BB : BBs) 365 OS << ", " << BB->getName(); 366 OS << "]"; 367 return OS.str(); 368 } 369