xref: /freebsd/contrib/llvm-project/llvm/lib/Analysis/DependenceGraphBuilder.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- DependenceGraphBuilder.cpp ------------------------------------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file implements common steps of the build algorithm for construction
9 // of dependence graphs such as DDG and PDG.
10 //===----------------------------------------------------------------------===//
11 
12 #include "llvm/Analysis/DependenceGraphBuilder.h"
13 #include "llvm/ADT/DepthFirstIterator.h"
14 #include "llvm/ADT/EnumeratedArray.h"
15 #include "llvm/ADT/PostOrderIterator.h"
16 #include "llvm/ADT/SCCIterator.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/DDG.h"
19 
20 using namespace llvm;
21 
22 #define DEBUG_TYPE "dgb"
23 
24 STATISTIC(TotalGraphs, "Number of dependence graphs created.");
25 STATISTIC(TotalDefUseEdges, "Number of def-use edges created.");
26 STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created.");
27 STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created.");
28 STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created.");
29 STATISTIC(TotalConfusedEdges,
30           "Number of confused memory dependencies between two nodes.");
31 STATISTIC(TotalEdgeReversals,
32           "Number of times the source and sink of dependence was reversed to "
33           "expose cycles in the graph.");
34 
35 using InstructionListType = SmallVector<Instruction *, 2>;
36 
37 //===--------------------------------------------------------------------===//
38 // AbstractDependenceGraphBuilder implementation
39 //===--------------------------------------------------------------------===//
40 
41 template <class G>
42 void AbstractDependenceGraphBuilder<G>::computeInstructionOrdinals() {
43   // The BBList is expected to be in program order.
44   size_t NextOrdinal = 1;
45   for (auto *BB : BBList)
46     for (auto &I : *BB)
47       InstOrdinalMap.insert(std::make_pair(&I, NextOrdinal++));
48 }
49 
50 template <class G>
51 void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() {
52   ++TotalGraphs;
53   assert(IMap.empty() && "Expected empty instruction map at start");
54   for (BasicBlock *BB : BBList)
55     for (Instruction &I : *BB) {
56       auto &NewNode = createFineGrainedNode(I);
57       IMap.insert(std::make_pair(&I, &NewNode));
58       NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I)));
59       ++TotalFineGrainedNodes;
60     }
61 }
62 
63 template <class G>
64 void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() {
65   // Create a root node that connects to every connected component of the graph.
66   // This is done to allow graph iterators to visit all the disjoint components
67   // of the graph, in a single walk.
68   //
69   // This algorithm works by going through each node of the graph and for each
70   // node N, do a DFS starting from N. A rooted edge is established between the
71   // root node and N (if N is not yet visited). All the nodes reachable from N
72   // are marked as visited and are skipped in the DFS of subsequent nodes.
73   //
74   // Note: This algorithm tries to limit the number of edges out of the root
75   // node to some extent, but there may be redundant edges created depending on
76   // the iteration order. For example for a graph {A -> B}, an edge from the
77   // root node is added to both nodes if B is visited before A. While it does
78   // not result in minimal number of edges, this approach saves compile-time
79   // while keeping the number of edges in check.
80   auto &RootNode = createRootNode();
81   df_iterator_default_set<const NodeType *, 4> Visited;
82   for (auto *N : Graph) {
83     if (*N == RootNode)
84       continue;
85     for (auto I : depth_first_ext(N, Visited))
86       if (I == N)
87         createRootedEdge(RootNode, *N);
88   }
89 }
90 
91 template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() {
92   if (!shouldCreatePiBlocks())
93     return;
94 
95   LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n");
96 
97   // The overall algorithm is as follows:
98   // 1. Identify SCCs and for each SCC create a pi-block node containing all
99   //    the nodes in that SCC.
100   // 2. Identify incoming edges incident to the nodes inside of the SCC and
101   //    reconnect them to the pi-block node.
102   // 3. Identify outgoing edges from the nodes inside of the SCC to nodes
103   //    outside of it and reconnect them so that the edges are coming out of the
104   //    SCC node instead.
105 
106   // Adding nodes as we iterate through the SCCs cause the SCC
107   // iterators to get invalidated. To prevent this invalidation, we first
108   // collect a list of nodes that are part of an SCC, and then iterate over
109   // those lists to create the pi-block nodes. Each element of the list is a
110   // list of nodes in an SCC. Note: trivial SCCs containing a single node are
111   // ignored.
112   SmallVector<NodeListType, 4> ListOfSCCs;
113   for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) {
114     if (SCC.size() > 1)
115       ListOfSCCs.emplace_back(SCC.begin(), SCC.end());
116   }
117 
118   for (NodeListType &NL : ListOfSCCs) {
119     LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size()
120                       << " nodes in it.\n");
121 
122     // SCC iterator may put the nodes in an order that's different from the
123     // program order. To preserve original program order, we sort the list of
124     // nodes based on ordinal numbers computed earlier.
125     llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) {
126       return getOrdinal(*LHS) < getOrdinal(*RHS);
127     });
128 
129     NodeType &PiNode = createPiBlock(NL);
130     ++TotalPiBlockNodes;
131 
132     // Build a set to speed up the lookup for edges whose targets
133     // are inside the SCC.
134     SmallPtrSet<NodeType *, 4> NodesInSCC(llvm::from_range, NL);
135 
136     // We have the set of nodes in the SCC. We go through the set of nodes
137     // that are outside of the SCC and look for edges that cross the two sets.
138     for (NodeType *N : Graph) {
139 
140       // Skip the SCC node and all the nodes inside of it.
141       if (*N == PiNode || NodesInSCC.count(N))
142         continue;
143 
144       enum Direction {
145         Incoming,      // Incoming edges to the SCC
146         Outgoing,      // Edges going ot of the SCC
147         DirectionCount // To make the enum usable as an array index.
148       };
149 
150       // Use these flags to help us avoid creating redundant edges. If there
151       // are more than one edges from an outside node to inside nodes, we only
152       // keep one edge from that node to the pi-block node. Similarly, if
153       // there are more than one edges from inside nodes to an outside node,
154       // we only keep one edge from the pi-block node to the outside node.
155       // There is a flag defined for each direction (incoming vs outgoing) and
156       // for each type of edge supported, using a two-dimensional boolean
157       // array.
158       using EdgeKind = typename EdgeType::EdgeKind;
159       EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{false,
160                                                                          false};
161 
162       auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst,
163                                      const EdgeKind K) {
164         switch (K) {
165         case EdgeKind::RegisterDefUse:
166           createDefUseEdge(Src, Dst);
167           break;
168         case EdgeKind::MemoryDependence:
169           createMemoryEdge(Src, Dst);
170           break;
171         case EdgeKind::Rooted:
172           createRootedEdge(Src, Dst);
173           break;
174         default:
175           llvm_unreachable("Unsupported type of edge.");
176         }
177       };
178 
179       auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New,
180                                 const Direction Dir) {
181         if (!Src->hasEdgeTo(*Dst))
182           return;
183         LLVM_DEBUG(
184             dbgs() << "reconnecting("
185                    << (Dir == Direction::Incoming ? "incoming)" : "outgoing)")
186                    << ":\nSrc:" << *Src << "\nDst:" << *Dst << "\nNew:" << *New
187                    << "\n");
188         assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) &&
189                "Invalid direction.");
190 
191         SmallVector<EdgeType *, 10> EL;
192         Src->findEdgesTo(*Dst, EL);
193         for (EdgeType *OldEdge : EL) {
194           EdgeKind Kind = OldEdge->getKind();
195           if (!EdgeAlreadyCreated[Dir][Kind]) {
196             if (Dir == Direction::Incoming) {
197               createEdgeOfKind(*Src, *New, Kind);
198               LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
199             } else if (Dir == Direction::Outgoing) {
200               createEdgeOfKind(*New, *Dst, Kind);
201               LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
202             }
203             EdgeAlreadyCreated[Dir][Kind] = true;
204           }
205           Src->removeEdge(*OldEdge);
206           destroyEdge(*OldEdge);
207           LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
208         }
209       };
210 
211       for (NodeType *SCCNode : NL) {
212         // Process incoming edges incident to the pi-block node.
213         reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming);
214 
215         // Process edges that are coming out of the pi-block node.
216         reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing);
217       }
218     }
219   }
220 
221   // Ordinal maps are no longer needed.
222   InstOrdinalMap.clear();
223   NodeOrdinalMap.clear();
224 
225   LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n");
226 }
227 
228 template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() {
229   for (NodeType *N : Graph) {
230     InstructionListType SrcIList;
231     N->collectInstructions([](const Instruction *I) { return true; }, SrcIList);
232 
233     // Use a set to mark the targets that we link to N, so we don't add
234     // duplicate def-use edges when more than one instruction in a target node
235     // use results of instructions that are contained in N.
236     SmallPtrSet<NodeType *, 4> VisitedTargets;
237 
238     for (Instruction *II : SrcIList) {
239       for (User *U : II->users()) {
240         Instruction *UI = dyn_cast<Instruction>(U);
241         if (!UI)
242           continue;
243         NodeType *DstNode = IMap.lookup(UI);
244 
245         // In the case of loops, the scope of the subgraph is all the
246         // basic blocks (and instructions within them) belonging to the loop. We
247         // simply ignore all the edges coming from (or going into) instructions
248         // or basic blocks outside of this range.
249         if (!DstNode) {
250           LLVM_DEBUG(
251               dbgs()
252               << "skipped def-use edge since the sink" << *UI
253               << " is outside the range of instructions being considered.\n");
254           continue;
255         }
256 
257         // Self dependencies are ignored because they are redundant and
258         // uninteresting.
259         if (DstNode == N) {
260           LLVM_DEBUG(dbgs()
261                      << "skipped def-use edge since the sink and the source ("
262                      << N << ") are the same.\n");
263           continue;
264         }
265 
266         if (VisitedTargets.insert(DstNode).second) {
267           createDefUseEdge(*N, *DstNode);
268           ++TotalDefUseEdges;
269         }
270       }
271     }
272   }
273 }
274 
275 template <class G>
276 void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
277   using DGIterator = typename G::iterator;
278   auto isMemoryAccess = [](const Instruction *I) {
279     return I->mayReadOrWriteMemory();
280   };
281   for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) {
282     InstructionListType SrcIList;
283     (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList);
284     if (SrcIList.empty())
285       continue;
286 
287     for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) {
288       if (**SrcIt == **DstIt)
289         continue;
290       InstructionListType DstIList;
291       (*DstIt)->collectInstructions(isMemoryAccess, DstIList);
292       if (DstIList.empty())
293         continue;
294       bool ForwardEdgeCreated = false;
295       bool BackwardEdgeCreated = false;
296       for (Instruction *ISrc : SrcIList) {
297         for (Instruction *IDst : DstIList) {
298           auto D = DI.depends(ISrc, IDst);
299           if (!D)
300             continue;
301 
302           // If we have a dependence with its left-most non-'=' direction
303           // being '>' we need to reverse the direction of the edge, because
304           // the source of the dependence cannot occur after the sink. For
305           // confused dependencies, we will create edges in both directions to
306           // represent the possibility of a cycle.
307 
308           auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) {
309             if (!ForwardEdgeCreated) {
310               createMemoryEdge(Src, Dst);
311               ++TotalMemoryEdges;
312             }
313             if (!BackwardEdgeCreated) {
314               createMemoryEdge(Dst, Src);
315               ++TotalMemoryEdges;
316             }
317             ForwardEdgeCreated = BackwardEdgeCreated = true;
318             ++TotalConfusedEdges;
319           };
320 
321           auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) {
322             if (!ForwardEdgeCreated) {
323               createMemoryEdge(Src, Dst);
324               ++TotalMemoryEdges;
325             }
326             ForwardEdgeCreated = true;
327           };
328 
329           auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) {
330             if (!BackwardEdgeCreated) {
331               createMemoryEdge(Dst, Src);
332               ++TotalMemoryEdges;
333             }
334             BackwardEdgeCreated = true;
335           };
336 
337           if (D->isConfused())
338             createConfusedEdges(**SrcIt, **DstIt);
339           else if (D->isOrdered() && !D->isLoopIndependent()) {
340             bool ReversedEdge = false;
341             for (unsigned Level = 1; Level <= D->getLevels(); ++Level) {
342               if (D->getDirection(Level) == Dependence::DVEntry::EQ)
343                 continue;
344               else if (D->getDirection(Level) == Dependence::DVEntry::GT) {
345                 createBackwardEdge(**SrcIt, **DstIt);
346                 ReversedEdge = true;
347                 ++TotalEdgeReversals;
348                 break;
349               } else if (D->getDirection(Level) == Dependence::DVEntry::LT)
350                 break;
351               else {
352                 createConfusedEdges(**SrcIt, **DstIt);
353                 break;
354               }
355             }
356             if (!ReversedEdge)
357               createForwardEdge(**SrcIt, **DstIt);
358           } else
359             createForwardEdge(**SrcIt, **DstIt);
360 
361           // Avoid creating duplicate edges.
362           if (ForwardEdgeCreated && BackwardEdgeCreated)
363             break;
364         }
365 
366         // If we've created edges in both directions, there is no more
367         // unique edge that we can create between these two nodes, so we
368         // can exit early.
369         if (ForwardEdgeCreated && BackwardEdgeCreated)
370           break;
371       }
372     }
373   }
374 }
375 
376 template <class G> void AbstractDependenceGraphBuilder<G>::simplify() {
377   if (!shouldSimplify())
378     return;
379   LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n");
380 
381   // This algorithm works by first collecting a set of candidate nodes that have
382   // an out-degree of one (in terms of def-use edges), and then ignoring those
383   // whose targets have an in-degree more than one. Each node in the resulting
384   // set can then be merged with its corresponding target and put back into the
385   // worklist until no further merge candidates are available.
386   SmallPtrSet<NodeType *, 32> CandidateSourceNodes;
387 
388   // A mapping between nodes and their in-degree. To save space, this map
389   // only contains nodes that are targets of nodes in the CandidateSourceNodes.
390   DenseMap<NodeType *, unsigned> TargetInDegreeMap;
391 
392   for (NodeType *N : Graph) {
393     if (N->getEdges().size() != 1)
394       continue;
395     EdgeType &Edge = N->back();
396     if (!Edge.isDefUse())
397       continue;
398     CandidateSourceNodes.insert(N);
399 
400     // Insert an element into the in-degree map and initialize to zero. The
401     // count will get updated in the next step.
402     TargetInDegreeMap.insert({&Edge.getTargetNode(), 0});
403   }
404 
405   LLVM_DEBUG({
406     dbgs() << "Size of candidate src node list:" << CandidateSourceNodes.size()
407            << "\nNode with single outgoing def-use edge:\n";
408     for (NodeType *N : CandidateSourceNodes) {
409       dbgs() << N << "\n";
410     }
411   });
412 
413   for (NodeType *N : Graph) {
414     for (EdgeType *E : *N) {
415       NodeType *Tgt = &E->getTargetNode();
416       auto TgtIT = TargetInDegreeMap.find(Tgt);
417       if (TgtIT != TargetInDegreeMap.end())
418         ++(TgtIT->second);
419     }
420   }
421 
422   LLVM_DEBUG({
423     dbgs() << "Size of target in-degree map:" << TargetInDegreeMap.size()
424            << "\nContent of in-degree map:\n";
425     for (auto &I : TargetInDegreeMap) {
426       dbgs() << I.first << " --> " << I.second << "\n";
427     }
428   });
429 
430   SmallVector<NodeType *, 32> Worklist(CandidateSourceNodes.begin(),
431                                        CandidateSourceNodes.end());
432   while (!Worklist.empty()) {
433     NodeType &Src = *Worklist.pop_back_val();
434     // As nodes get merged, we need to skip any node that has been removed from
435     // the candidate set (see below).
436     if (!CandidateSourceNodes.erase(&Src))
437       continue;
438 
439     assert(Src.getEdges().size() == 1 &&
440            "Expected a single edge from the candidate src node.");
441     NodeType &Tgt = Src.back().getTargetNode();
442     assert(TargetInDegreeMap.find(&Tgt) != TargetInDegreeMap.end() &&
443            "Expected target to be in the in-degree map.");
444 
445     if (TargetInDegreeMap[&Tgt] != 1)
446       continue;
447 
448     if (!areNodesMergeable(Src, Tgt))
449       continue;
450 
451     // Do not merge if there is also an edge from target to src (immediate
452     // cycle).
453     if (Tgt.hasEdgeTo(Src))
454       continue;
455 
456     LLVM_DEBUG(dbgs() << "Merging:" << Src << "\nWith:" << Tgt << "\n");
457 
458     mergeNodes(Src, Tgt);
459 
460     // If the target node is in the candidate set itself, we need to put the
461     // src node back into the worklist again so it gives the target a chance
462     // to get merged into it. For example if we have:
463     // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a},
464     // then after merging (a) and (b) together, we need to put (a,b) back in
465     // the worklist so that (c) can get merged in as well resulting in
466     // {(a,b,c) -> d}
467     // We also need to remove the old target (b), from the worklist. We first
468     // remove it from the candidate set here, and skip any item from the
469     // worklist that is not in the set.
470     if (CandidateSourceNodes.erase(&Tgt)) {
471       Worklist.push_back(&Src);
472       CandidateSourceNodes.insert(&Src);
473       LLVM_DEBUG(dbgs() << "Putting " << &Src << " back in the worklist.\n");
474     }
475   }
476   LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n");
477 }
478 
479 template <class G>
480 void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() {
481 
482   // If we don't create pi-blocks, then we may not have a DAG.
483   if (!shouldCreatePiBlocks())
484     return;
485 
486   SmallVector<NodeType *, 64> NodesInPO;
487   using NodeKind = typename NodeType::NodeKind;
488   for (NodeType *N : post_order(&Graph)) {
489     if (N->getKind() == NodeKind::PiBlock) {
490       // Put members of the pi-block right after the pi-block itself, for
491       // convenience.
492       const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
493       llvm::append_range(NodesInPO, PiBlockMembers);
494     }
495     NodesInPO.push_back(N);
496   }
497 
498   size_t OldSize = Graph.Nodes.size();
499   Graph.Nodes.clear();
500   append_range(Graph.Nodes, reverse(NodesInPO));
501   if (Graph.Nodes.size() != OldSize)
502     assert(false &&
503            "Expected the number of nodes to stay the same after the sort");
504 }
505 
506 template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
507 template class llvm::DependenceGraphInfo<DDGNode>;
508