xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1  //===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // Our algorithm works by first identifying a subset of nodes that must always
10  // be instrumented. We call these nodes ambiguous because knowing the coverage
11  // of all remaining nodes is not enough to infer their coverage status.
12  //
13  // In general a node v is ambiguous if there exists two entry-to-terminal paths
14  // P_1 and P_2 such that:
15  //   1. v not in P_1 but P_1 visits a predecessor of v, and
16  //   2. v not in P_2 but P_2 visits a successor of v.
17  //
18  // If a node v is not ambiguous, then if condition 1 fails, we can infer v’s
19  // coverage from the coverage of its predecessors, or if condition 2 fails, we
20  // can infer v’s coverage from the coverage of its successors.
21  //
22  // Sadly, there are example CFGs where it is not possible to infer all nodes
23  // from the ambiguous nodes alone. Our algorithm selects a minimum number of
24  // extra nodes to add to the ambiguous nodes to form a valid instrumentation S.
25  //
26  // Details on this algorithm can be found in https://arxiv.org/abs/2208.13907
27  //
28  //===----------------------------------------------------------------------===//
29  
30  #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
31  #include "llvm/ADT/DepthFirstIterator.h"
32  #include "llvm/ADT/Statistic.h"
33  #include "llvm/Support/CRC.h"
34  #include "llvm/Support/Debug.h"
35  #include "llvm/Support/GraphWriter.h"
36  #include "llvm/Support/raw_ostream.h"
37  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
38  
39  using namespace llvm;
40  
41  #define DEBUG_TYPE "pgo-block-coverage"
42  
43  STATISTIC(NumFunctions, "Number of total functions that BCI has processed");
44  STATISTIC(NumIneligibleFunctions,
45            "Number of functions for which BCI cannot run on");
46  STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed");
47  STATISTIC(NumInstrumentedBlocks,
48            "Number of basic blocks instrumented for coverage");
49  
BlockCoverageInference(const Function & F,bool ForceInstrumentEntry)50  BlockCoverageInference::BlockCoverageInference(const Function &F,
51                                                 bool ForceInstrumentEntry)
52      : F(F), ForceInstrumentEntry(ForceInstrumentEntry) {
53    findDependencies();
54    assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock()));
55  
56    ++NumFunctions;
57    for (auto &BB : F) {
58      ++NumBlocks;
59      if (shouldInstrumentBlock(BB))
60        ++NumInstrumentedBlocks;
61    }
62  }
63  
64  BlockCoverageInference::BlockSet
getDependencies(const BasicBlock & BB) const65  BlockCoverageInference::getDependencies(const BasicBlock &BB) const {
66    assert(BB.getParent() == &F);
67    BlockSet Dependencies;
68    auto It = PredecessorDependencies.find(&BB);
69    if (It != PredecessorDependencies.end())
70      Dependencies.set_union(It->second);
71    It = SuccessorDependencies.find(&BB);
72    if (It != SuccessorDependencies.end())
73      Dependencies.set_union(It->second);
74    return Dependencies;
75  }
76  
getInstrumentedBlocksHash() const77  uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const {
78    JamCRC JC;
79    uint64_t Index = 0;
80    for (auto &BB : F) {
81      if (shouldInstrumentBlock(BB)) {
82        uint8_t Data[8];
83        support::endian::write64le(Data, Index);
84        JC.update(Data);
85      }
86      Index++;
87    }
88    return JC.getCRC();
89  }
90  
shouldInstrumentBlock(const BasicBlock & BB) const91  bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const {
92    assert(BB.getParent() == &F);
93    auto It = PredecessorDependencies.find(&BB);
94    if (It != PredecessorDependencies.end() && It->second.size())
95      return false;
96    It = SuccessorDependencies.find(&BB);
97    if (It != SuccessorDependencies.end() && It->second.size())
98      return false;
99    return true;
100  }
101  
findDependencies()102  void BlockCoverageInference::findDependencies() {
103    assert(PredecessorDependencies.empty() && SuccessorDependencies.empty());
104    // Empirical analysis shows that this algorithm finishes within 5 seconds for
105    // functions with fewer than 1.5K blocks.
106    if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) {
107      ++NumIneligibleFunctions;
108      return;
109    }
110  
111    SmallVector<const BasicBlock *, 4> TerminalBlocks;
112    for (auto &BB : F)
113      if (succ_empty(&BB))
114        TerminalBlocks.push_back(&BB);
115  
116    // Traverse the CFG backwards from the terminal blocks to make sure every
117    // block can reach some terminal block. Otherwise this algorithm will not work
118    // and we must fall back to instrumenting every block.
119    df_iterator_default_set<const BasicBlock *> Visited;
120    for (auto *BB : TerminalBlocks)
121      for (auto *N : inverse_depth_first_ext(BB, Visited))
122        (void)N;
123    if (F.size() != Visited.size()) {
124      ++NumIneligibleFunctions;
125      return;
126    }
127  
128    // The current implementation for computing `PredecessorDependencies` and
129    // `SuccessorDependencies` runs in quadratic time with respect to the number
130    // of basic blocks. While we do have a more complicated linear time algorithm
131    // in https://arxiv.org/abs/2208.13907 we do not know if it will give a
132    // significant speedup in practice given that most functions tend to be
133    // relatively small in size for intended use cases.
134    auto &EntryBlock = F.getEntryBlock();
135    for (auto &BB : F) {
136      // The set of blocks that are reachable while avoiding BB.
137      BlockSet ReachableFromEntry, ReachableFromTerminal;
138      getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true,
139                           ReachableFromEntry);
140      for (auto *TerminalBlock : TerminalBlocks)
141        getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false,
142                             ReachableFromTerminal);
143  
144      auto Preds = predecessors(&BB);
145      bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) {
146        return ReachableFromEntry.count(Pred) &&
147               ReachableFromTerminal.count(Pred);
148      });
149      if (!HasSuperReachablePred)
150        for (auto *Pred : Preds)
151          if (ReachableFromEntry.count(Pred))
152            PredecessorDependencies[&BB].insert(Pred);
153  
154      auto Succs = successors(&BB);
155      bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) {
156        return ReachableFromEntry.count(Succ) &&
157               ReachableFromTerminal.count(Succ);
158      });
159      if (!HasSuperReachableSucc)
160        for (auto *Succ : Succs)
161          if (ReachableFromTerminal.count(Succ))
162            SuccessorDependencies[&BB].insert(Succ);
163    }
164  
165    if (ForceInstrumentEntry) {
166      // Force the entry block to be instrumented by clearing the blocks it can
167      // infer coverage from.
168      PredecessorDependencies[&EntryBlock].clear();
169      SuccessorDependencies[&EntryBlock].clear();
170    }
171  
172    // Construct a graph where blocks are connected if there is a mutual
173    // dependency between them. This graph has a special property that it contains
174    // only paths.
175    DenseMap<const BasicBlock *, BlockSet> AdjacencyList;
176    for (auto &BB : F) {
177      for (auto *Succ : successors(&BB)) {
178        if (SuccessorDependencies[&BB].count(Succ) &&
179            PredecessorDependencies[Succ].count(&BB)) {
180          AdjacencyList[&BB].insert(Succ);
181          AdjacencyList[Succ].insert(&BB);
182        }
183      }
184    }
185  
186    // Given a path with at least one node, return the next node on the path.
187    auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * {
188      assert(Path.size());
189      auto &Neighbors = AdjacencyList[Path.back()];
190      if (Path.size() == 1) {
191        // This is the first node on the path, return its neighbor.
192        assert(Neighbors.size() == 1);
193        return Neighbors.front();
194      } else if (Neighbors.size() == 2) {
195        // This is the middle of the path, find the neighbor that is not on the
196        // path already.
197        assert(Path.size() >= 2);
198        return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0];
199      }
200      // This is the end of the path.
201      assert(Neighbors.size() == 1);
202      return nullptr;
203    };
204  
205    // Remove all cycles in the inferencing graph.
206    for (auto &BB : F) {
207      if (AdjacencyList[&BB].size() == 1) {
208        // We found the head of some path.
209        BlockSet Path;
210        Path.insert(&BB);
211        while (const BasicBlock *Next = getNextOnPath(Path))
212          Path.insert(Next);
213        LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n");
214  
215        // Remove these nodes from the graph so we don't discover this path again.
216        for (auto *BB : Path)
217          AdjacencyList[BB].clear();
218  
219        // Finally, remove the cycles.
220        if (PredecessorDependencies[Path.front()].size()) {
221          for (auto *BB : Path)
222            if (BB != Path.back())
223              SuccessorDependencies[BB].clear();
224        } else {
225          for (auto *BB : Path)
226            if (BB != Path.front())
227              PredecessorDependencies[BB].clear();
228        }
229      }
230    }
231    LLVM_DEBUG(dump(dbgs()));
232  }
233  
getReachableAvoiding(const BasicBlock & Start,const BasicBlock & Avoid,bool IsForward,BlockSet & Reachable) const234  void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start,
235                                                    const BasicBlock &Avoid,
236                                                    bool IsForward,
237                                                    BlockSet &Reachable) const {
238    df_iterator_default_set<const BasicBlock *> Visited;
239    Visited.insert(&Avoid);
240    if (IsForward) {
241      auto Range = depth_first_ext(&Start, Visited);
242      Reachable.insert(Range.begin(), Range.end());
243    } else {
244      auto Range = inverse_depth_first_ext(&Start, Visited);
245      Reachable.insert(Range.begin(), Range.end());
246    }
247  }
248  
249  namespace llvm {
250  class DotFuncBCIInfo {
251  private:
252    const BlockCoverageInference *BCI;
253    const DenseMap<const BasicBlock *, bool> *Coverage;
254  
255  public:
DotFuncBCIInfo(const BlockCoverageInference * BCI,const DenseMap<const BasicBlock *,bool> * Coverage)256    DotFuncBCIInfo(const BlockCoverageInference *BCI,
257                   const DenseMap<const BasicBlock *, bool> *Coverage)
258        : BCI(BCI), Coverage(Coverage) {}
259  
getFunction()260    const Function &getFunction() { return BCI->F; }
261  
isInstrumented(const BasicBlock * BB) const262    bool isInstrumented(const BasicBlock *BB) const {
263      return BCI->shouldInstrumentBlock(*BB);
264    }
265  
isCovered(const BasicBlock * BB) const266    bool isCovered(const BasicBlock *BB) const {
267      return Coverage && Coverage->lookup(BB);
268    }
269  
isDependent(const BasicBlock * Src,const BasicBlock * Dest) const270    bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const {
271      return BCI->getDependencies(*Src).count(Dest);
272    }
273  };
274  
275  template <>
276  struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> {
getEntryNodellvm::GraphTraits277    static NodeRef getEntryNode(DotFuncBCIInfo *Info) {
278      return &(Info->getFunction().getEntryBlock());
279    }
280  
281    // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
282    using nodes_iterator = pointer_iterator<Function::const_iterator>;
283  
nodes_beginllvm::GraphTraits284    static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) {
285      return nodes_iterator(Info->getFunction().begin());
286    }
287  
nodes_endllvm::GraphTraits288    static nodes_iterator nodes_end(DotFuncBCIInfo *Info) {
289      return nodes_iterator(Info->getFunction().end());
290    }
291  
sizellvm::GraphTraits292    static size_t size(DotFuncBCIInfo *Info) {
293      return Info->getFunction().size();
294    }
295  };
296  
297  template <>
298  struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits {
299  
DOTGraphTraitsllvm::DOTGraphTraits300    DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
301  
getGraphNamellvm::DOTGraphTraits302    static std::string getGraphName(DotFuncBCIInfo *Info) {
303      return "BCI CFG for " + Info->getFunction().getName().str();
304    }
305  
getNodeLabelllvm::DOTGraphTraits306    std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) {
307      return Node->getName().str();
308    }
309  
getEdgeAttributesllvm::DOTGraphTraits310    std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I,
311                                  DotFuncBCIInfo *Info) {
312      const BasicBlock *Dest = *I;
313      if (Info->isDependent(Src, Dest))
314        return "color=red";
315      if (Info->isDependent(Dest, Src))
316        return "color=blue";
317      return "";
318    }
319  
getNodeAttributesllvm::DOTGraphTraits320    std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) {
321      std::string Result;
322      if (Info->isInstrumented(Node))
323        Result += "style=filled,fillcolor=gray";
324      if (Info->isCovered(Node))
325        Result += std::string(Result.empty() ? "" : ",") + "color=red";
326      return Result;
327    }
328  };
329  
330  } // namespace llvm
331  
viewBlockCoverageGraph(const DenseMap<const BasicBlock *,bool> * Coverage) const332  void BlockCoverageInference::viewBlockCoverageGraph(
333      const DenseMap<const BasicBlock *, bool> *Coverage) const {
334    DotFuncBCIInfo Info(this, Coverage);
335    WriteGraph(&Info, "BCI", false,
336               "Block Coverage Inference for " + F.getName());
337  }
338  
dump(raw_ostream & OS) const339  void BlockCoverageInference::dump(raw_ostream &OS) const {
340    OS << "Minimal block coverage for function \'" << F.getName()
341       << "\' (Instrumented=*)\n";
342    for (auto &BB : F) {
343      OS << (shouldInstrumentBlock(BB) ? "* " : "  ") << BB.getName() << "\n";
344      auto It = PredecessorDependencies.find(&BB);
345      if (It != PredecessorDependencies.end() && It->second.size())
346        OS << "    PredDeps = " << getBlockNames(It->second) << "\n";
347      It = SuccessorDependencies.find(&BB);
348      if (It != SuccessorDependencies.end() && It->second.size())
349        OS << "    SuccDeps = " << getBlockNames(It->second) << "\n";
350    }
351    OS << "  Instrumented Blocks Hash = 0x"
352       << Twine::utohexstr(getInstrumentedBlocksHash()) << "\n";
353  }
354  
355  std::string
getBlockNames(ArrayRef<const BasicBlock * > BBs)356  BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) {
357    std::string Result;
358    raw_string_ostream OS(Result);
359    OS << "[";
360    if (!BBs.empty()) {
361      OS << BBs.front()->getName();
362      BBs = BBs.drop_front();
363    }
364    for (auto *BB : BBs)
365      OS << ", " << BB->getName();
366    OS << "]";
367    return OS.str();
368  }
369