xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass merges conditional blocks of code and reduces the number of
10 // conditional branches in the hot paths based on profiles.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Analysis/BlockFrequencyInfo.h"
20 #include "llvm/Analysis/GlobalsModRef.h"
21 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22 #include "llvm/Analysis/ProfileSummaryInfo.h"
23 #include "llvm/Analysis/RegionInfo.h"
24 #include "llvm/Analysis/RegionIterator.h"
25 #include "llvm/Analysis/ValueTracking.h"
26 #include "llvm/IR/CFG.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/IntrinsicInst.h"
30 #include "llvm/IR/MDBuilder.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/PassManager.h"
33 #include "llvm/IR/ProfDataUtils.h"
34 #include "llvm/Support/BranchProbability.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Transforms/Utils.h"
38 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
39 #include "llvm/Transforms/Utils/Cloning.h"
40 #include "llvm/Transforms/Utils/ValueMapper.h"
41 
42 #include <optional>
43 #include <set>
44 #include <sstream>
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "chr"
49 
50 #define CHR_DEBUG(X) LLVM_DEBUG(X)
51 
52 static cl::opt<bool> DisableCHR("disable-chr", cl::init(false), cl::Hidden,
53                                 cl::desc("Disable CHR for all functions"));
54 
55 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
56                               cl::desc("Apply CHR for all functions"));
57 
58 static cl::opt<double> CHRBiasThreshold(
59     "chr-bias-threshold", cl::init(0.99), cl::Hidden,
60     cl::desc("CHR considers a branch bias greater than this ratio as biased"));
61 
62 static cl::opt<unsigned> CHRMergeThreshold(
63     "chr-merge-threshold", cl::init(2), cl::Hidden,
64     cl::desc("CHR merges a group of N branches/selects where N >= this value"));
65 
66 static cl::opt<std::string> CHRModuleList(
67     "chr-module-list", cl::init(""), cl::Hidden,
68     cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
69 
70 static cl::opt<std::string> CHRFunctionList(
71     "chr-function-list", cl::init(""), cl::Hidden,
72     cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
73 
74 static cl::opt<unsigned> CHRDupThreshsold(
75     "chr-dup-threshold", cl::init(3), cl::Hidden,
76     cl::desc("Max number of duplications by CHR for a region"));
77 
78 static StringSet<> CHRModules;
79 static StringSet<> CHRFunctions;
80 
parseCHRFilterFiles()81 static void parseCHRFilterFiles() {
82   if (!CHRModuleList.empty()) {
83     auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
84     if (!FileOrErr) {
85       errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
86       std::exit(1);
87     }
88     StringRef Buf = FileOrErr->get()->getBuffer();
89     SmallVector<StringRef, 0> Lines;
90     Buf.split(Lines, '\n');
91     for (StringRef Line : Lines) {
92       Line = Line.trim();
93       if (!Line.empty())
94         CHRModules.insert(Line);
95     }
96   }
97   if (!CHRFunctionList.empty()) {
98     auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
99     if (!FileOrErr) {
100       errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
101       std::exit(1);
102     }
103     StringRef Buf = FileOrErr->get()->getBuffer();
104     SmallVector<StringRef, 0> Lines;
105     Buf.split(Lines, '\n');
106     for (StringRef Line : Lines) {
107       Line = Line.trim();
108       if (!Line.empty())
109         CHRFunctions.insert(Line);
110     }
111   }
112 }
113 
114 namespace {
115 
116 struct CHRStats {
117   CHRStats() = default;
print__anone1ce48180111::CHRStats118   void print(raw_ostream &OS) const {
119     OS << "CHRStats: NumBranches " << NumBranches
120        << " NumBranchesDelta " << NumBranchesDelta
121        << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
122   }
123   // The original number of conditional branches / selects
124   uint64_t NumBranches = 0;
125   // The decrease of the number of conditional branches / selects in the hot
126   // paths due to CHR.
127   uint64_t NumBranchesDelta = 0;
128   // NumBranchesDelta weighted by the profile count at the scope entry.
129   uint64_t WeightedNumBranchesDelta = 0;
130 };
131 
132 // RegInfo - some properties of a Region.
133 struct RegInfo {
134   RegInfo() = default;
RegInfo__anone1ce48180111::RegInfo135   RegInfo(Region *RegionIn) : R(RegionIn) {}
136   Region *R = nullptr;
137   bool HasBranch = false;
138   SmallVector<SelectInst *, 8> Selects;
139 };
140 
141 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
142 
143 // CHRScope - a sequence of regions to CHR together. It corresponds to a
144 // sequence of conditional blocks. It can have subscopes which correspond to
145 // nested conditional blocks. Nested CHRScopes form a tree.
146 class CHRScope {
147  public:
CHRScope(RegInfo RI)148   CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
149     assert(RI.R && "Null RegionIn");
150     RegInfos.push_back(RI);
151   }
152 
getParentRegion()153   Region *getParentRegion() {
154     assert(RegInfos.size() > 0 && "Empty CHRScope");
155     Region *Parent = RegInfos[0].R->getParent();
156     assert(Parent && "Unexpected to call this on the top-level region");
157     return Parent;
158   }
159 
getEntryBlock()160   BasicBlock *getEntryBlock() {
161     assert(RegInfos.size() > 0 && "Empty CHRScope");
162     return RegInfos.front().R->getEntry();
163   }
164 
getExitBlock()165   BasicBlock *getExitBlock() {
166     assert(RegInfos.size() > 0 && "Empty CHRScope");
167     return RegInfos.back().R->getExit();
168   }
169 
appendable(CHRScope * Next)170   bool appendable(CHRScope *Next) {
171     // The next scope is appendable only if this scope is directly connected to
172     // it (which implies it post-dominates this scope) and this scope dominates
173     // it (no edge to the next scope outside this scope).
174     BasicBlock *NextEntry = Next->getEntryBlock();
175     if (getExitBlock() != NextEntry)
176       // Not directly connected.
177       return false;
178     Region *LastRegion = RegInfos.back().R;
179     for (BasicBlock *Pred : predecessors(NextEntry))
180       if (!LastRegion->contains(Pred))
181         // There's an edge going into the entry of the next scope from outside
182         // of this scope.
183         return false;
184     return true;
185   }
186 
append(CHRScope * Next)187   void append(CHRScope *Next) {
188     assert(RegInfos.size() > 0 && "Empty CHRScope");
189     assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
190     assert(getParentRegion() == Next->getParentRegion() &&
191            "Must be siblings");
192     assert(getExitBlock() == Next->getEntryBlock() &&
193            "Must be adjacent");
194     RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
195     Subs.append(Next->Subs.begin(), Next->Subs.end());
196   }
197 
addSub(CHRScope * SubIn)198   void addSub(CHRScope *SubIn) {
199 #ifndef NDEBUG
200     bool IsChild = false;
201     for (RegInfo &RI : RegInfos)
202       if (RI.R == SubIn->getParentRegion()) {
203         IsChild = true;
204         break;
205       }
206     assert(IsChild && "Must be a child");
207 #endif
208     Subs.push_back(SubIn);
209   }
210 
211   // Split this scope at the boundary region into two, which will belong to the
212   // tail and returns the tail.
split(Region * Boundary)213   CHRScope *split(Region *Boundary) {
214     assert(Boundary && "Boundary null");
215     assert(RegInfos.begin()->R != Boundary &&
216            "Can't be split at beginning");
217     auto BoundaryIt = llvm::find_if(
218         RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
219     if (BoundaryIt == RegInfos.end())
220       return nullptr;
221     ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
222     DenseSet<Region *> TailRegionSet;
223     for (const RegInfo &RI : TailRegInfos)
224       TailRegionSet.insert(RI.R);
225 
226     auto TailIt =
227         std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
228           assert(Sub && "null Sub");
229           Region *Parent = Sub->getParentRegion();
230           if (TailRegionSet.count(Parent))
231             return false;
232 
233           assert(llvm::any_of(
234                      RegInfos,
235                      [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
236                  "Must be in head");
237           return true;
238         });
239     ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
240 
241     assert(HoistStopMap.empty() && "MapHoistStops must be empty");
242     auto *Scope = new CHRScope(TailRegInfos, TailSubs);
243     RegInfos.erase(BoundaryIt, RegInfos.end());
244     Subs.erase(TailIt, Subs.end());
245     return Scope;
246   }
247 
contains(Instruction * I) const248   bool contains(Instruction *I) const {
249     BasicBlock *Parent = I->getParent();
250     for (const RegInfo &RI : RegInfos)
251       if (RI.R->contains(Parent))
252         return true;
253     return false;
254   }
255 
256   void print(raw_ostream &OS) const;
257 
258   SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
259   SmallVector<CHRScope *, 8> Subs;  // Subscopes.
260 
261   // The instruction at which to insert the CHR conditional branch (and hoist
262   // the dependent condition values).
263   Instruction *BranchInsertPoint;
264 
265   // True-biased and false-biased regions (conditional blocks),
266   // respectively. Used only for the outermost scope and includes regions in
267   // subscopes. The rest are unbiased.
268   DenseSet<Region *> TrueBiasedRegions;
269   DenseSet<Region *> FalseBiasedRegions;
270   // Among the biased regions, the regions that get CHRed.
271   SmallVector<RegInfo, 8> CHRRegions;
272 
273   // True-biased and false-biased selects, respectively. Used only for the
274   // outermost scope and includes ones in subscopes.
275   DenseSet<SelectInst *> TrueBiasedSelects;
276   DenseSet<SelectInst *> FalseBiasedSelects;
277 
278   // Map from one of the above regions to the instructions to stop
279   // hoisting instructions at through use-def chains.
280   HoistStopMapTy HoistStopMap;
281 
282  private:
CHRScope(ArrayRef<RegInfo> RegInfosIn,ArrayRef<CHRScope * > SubsIn)283    CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
284        : RegInfos(RegInfosIn.begin(), RegInfosIn.end()),
285          Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {}
286 };
287 
288 class CHR {
289  public:
CHR(Function & Fin,BlockFrequencyInfo & BFIin,DominatorTree & DTin,ProfileSummaryInfo & PSIin,RegionInfo & RIin,OptimizationRemarkEmitter & OREin)290   CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
291       ProfileSummaryInfo &PSIin, RegionInfo &RIin,
292       OptimizationRemarkEmitter &OREin)
293       : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
294 
~CHR()295   ~CHR() {
296     for (CHRScope *Scope : Scopes) {
297       delete Scope;
298     }
299   }
300 
301   bool run();
302 
303  private:
304   // See the comments in CHR::run() for the high level flow of the algorithm and
305   // what the following functions do.
306 
findScopes(SmallVectorImpl<CHRScope * > & Output)307   void findScopes(SmallVectorImpl<CHRScope *> &Output) {
308     Region *R = RI.getTopLevelRegion();
309     if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
310       Output.push_back(Scope);
311     }
312   }
313   CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
314                         SmallVectorImpl<CHRScope *> &Scopes);
315   CHRScope *findScope(Region *R);
316   void checkScopeHoistable(CHRScope *Scope);
317 
318   void splitScopes(SmallVectorImpl<CHRScope *> &Input,
319                    SmallVectorImpl<CHRScope *> &Output);
320   SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
321                                         CHRScope *Outer,
322                                         DenseSet<Value *> *OuterConditionValues,
323                                         Instruction *OuterInsertPoint,
324                                         SmallVectorImpl<CHRScope *> &Output,
325                                         DenseSet<Instruction *> &Unhoistables);
326 
327   void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
328   void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
329 
330   void filterScopes(SmallVectorImpl<CHRScope *> &Input,
331                     SmallVectorImpl<CHRScope *> &Output);
332 
333   void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
334                      SmallVectorImpl<CHRScope *> &Output);
335   void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
336 
337   void sortScopes(SmallVectorImpl<CHRScope *> &Input,
338                   SmallVectorImpl<CHRScope *> &Output);
339 
340   void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
341   void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
342   void cloneScopeBlocks(CHRScope *Scope,
343                         BasicBlock *PreEntryBlock,
344                         BasicBlock *ExitBlock,
345                         Region *LastRegion,
346                         ValueToValueMapTy &VMap);
347   BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
348                                  BasicBlock *EntryBlock,
349                                  BasicBlock *NewEntryBlock,
350                                  ValueToValueMapTy &VMap);
351   void fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock,
352                                BranchInst *MergedBR, uint64_t ProfileCount);
353   void fixupBranch(Region *R, CHRScope *Scope, IRBuilder<> &IRB,
354                    Value *&MergedCondition, BranchProbability &CHRBranchBias);
355   void fixupSelect(SelectInst *SI, CHRScope *Scope, IRBuilder<> &IRB,
356                    Value *&MergedCondition, BranchProbability &CHRBranchBias);
357   void addToMergedCondition(bool IsTrueBiased, Value *Cond,
358                             Instruction *BranchOrSelect, CHRScope *Scope,
359                             IRBuilder<> &IRB, Value *&MergedCondition);
getRegionDuplicationCount(const Region * R)360   unsigned getRegionDuplicationCount(const Region *R) {
361     unsigned Count = 0;
362     // Find out how many times region R is cloned. Note that if the parent
363     // of R is cloned, R is also cloned, but R's clone count is not updated
364     // from the clone of the parent. We need to accumlate all the counts
365     // from the ancestors to get the clone count.
366     while (R) {
367       Count += DuplicationCount[R];
368       R = R->getParent();
369     }
370     return Count;
371   }
372 
373   Function &F;
374   BlockFrequencyInfo &BFI;
375   DominatorTree &DT;
376   ProfileSummaryInfo &PSI;
377   RegionInfo &RI;
378   OptimizationRemarkEmitter &ORE;
379   CHRStats Stats;
380 
381   // All the true-biased regions in the function
382   DenseSet<Region *> TrueBiasedRegionsGlobal;
383   // All the false-biased regions in the function
384   DenseSet<Region *> FalseBiasedRegionsGlobal;
385   // All the true-biased selects in the function
386   DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
387   // All the false-biased selects in the function
388   DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
389   // A map from biased regions to their branch bias
390   DenseMap<Region *, BranchProbability> BranchBiasMap;
391   // A map from biased selects to their branch bias
392   DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
393   // All the scopes.
394   DenseSet<CHRScope *> Scopes;
395   // This maps records how many times this region is cloned.
396   DenseMap<const Region *, unsigned> DuplicationCount;
397 };
398 
399 } // end anonymous namespace
400 
401 static inline
operator <<(raw_ostream & OS,const CHRStats & Stats)402 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
403                                               const CHRStats &Stats) {
404   Stats.print(OS);
405   return OS;
406 }
407 
408 static inline
operator <<(raw_ostream & OS,const CHRScope & Scope)409 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
410   Scope.print(OS);
411   return OS;
412 }
413 
shouldApply(Function & F,ProfileSummaryInfo & PSI)414 static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) {
415   if (DisableCHR)
416     return false;
417 
418   if (ForceCHR)
419     return true;
420 
421   if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
422     if (CHRModules.count(F.getParent()->getName()))
423       return true;
424     return CHRFunctions.count(F.getName());
425   }
426 
427   return PSI.isFunctionEntryHot(&F);
428 }
429 
dumpIR(Function & F,const char * Label,CHRStats * Stats)430 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
431                                          CHRStats *Stats) {
432   StringRef FuncName = F.getName();
433   StringRef ModuleName = F.getParent()->getName();
434   (void)(FuncName); // Unused in release build.
435   (void)(ModuleName); // Unused in release build.
436   CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
437             << FuncName);
438   if (Stats)
439     CHR_DEBUG(dbgs() << " " << *Stats);
440   CHR_DEBUG(dbgs() << "\n");
441   CHR_DEBUG(F.dump());
442 }
443 
print(raw_ostream & OS) const444 void CHRScope::print(raw_ostream &OS) const {
445   assert(RegInfos.size() > 0 && "Empty CHRScope");
446   OS << "CHRScope[";
447   OS << RegInfos.size() << ", Regions[";
448   for (const RegInfo &RI : RegInfos) {
449     OS << RI.R->getNameStr();
450     if (RI.HasBranch)
451       OS << " B";
452     if (RI.Selects.size() > 0)
453       OS << " S" << RI.Selects.size();
454     OS << ", ";
455   }
456   if (RegInfos[0].R->getParent()) {
457     OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
458   } else {
459     // top level region
460     OS << "]";
461   }
462   OS << ", Subs[";
463   for (CHRScope *Sub : Subs) {
464     OS << *Sub << ", ";
465   }
466   OS << "]]";
467 }
468 
469 // Return true if the given instruction type can be hoisted by CHR.
isHoistableInstructionType(Instruction * I)470 static bool isHoistableInstructionType(Instruction *I) {
471   return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
472       isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
473       isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
474       isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
475       isa<InsertValueInst>(I);
476 }
477 
478 // Return true if the given instruction can be hoisted by CHR.
isHoistable(Instruction * I,DominatorTree & DT)479 static bool isHoistable(Instruction *I, DominatorTree &DT) {
480   if (!isHoistableInstructionType(I))
481     return false;
482   return isSafeToSpeculativelyExecute(I, nullptr, nullptr, &DT);
483 }
484 
485 // Recursively traverse the use-def chains of the given value and return a set
486 // of the unhoistable base values defined within the scope (excluding the
487 // first-region entry block) or the (hoistable or unhoistable) base values that
488 // are defined outside (including the first-region entry block) of the
489 // scope. The returned set doesn't include constants.
490 static const std::set<Value *> &
getBaseValues(Value * V,DominatorTree & DT,DenseMap<Value *,std::set<Value * >> & Visited)491 getBaseValues(Value *V, DominatorTree &DT,
492               DenseMap<Value *, std::set<Value *>> &Visited) {
493   auto It = Visited.find(V);
494   if (It != Visited.end()) {
495     return It->second;
496   }
497   std::set<Value *> Result;
498   if (auto *I = dyn_cast<Instruction>(V)) {
499     // We don't stop at a block that's not in the Scope because we would miss
500     // some instructions that are based on the same base values if we stop
501     // there.
502     if (!isHoistable(I, DT)) {
503       Result.insert(I);
504       return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
505     }
506     // I is hoistable above the Scope.
507     for (Value *Op : I->operands()) {
508       const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
509       Result.insert(OpResult.begin(), OpResult.end());
510     }
511     return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
512   }
513   if (isa<Argument>(V)) {
514     Result.insert(V);
515   }
516   // We don't include others like constants because those won't lead to any
517   // chance of folding of conditions (eg two bit checks merged into one check)
518   // after CHR.
519   return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
520 }
521 
522 // Return true if V is already hoisted or can be hoisted (along with its
523 // operands) above the insert point. When it returns true and HoistStops is
524 // non-null, the instructions to stop hoisting at through the use-def chains are
525 // inserted into HoistStops.
526 static bool
checkHoistValue(Value * V,Instruction * InsertPoint,DominatorTree & DT,DenseSet<Instruction * > & Unhoistables,DenseSet<Instruction * > * HoistStops,DenseMap<Instruction *,bool> & Visited)527 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
528                 DenseSet<Instruction *> &Unhoistables,
529                 DenseSet<Instruction *> *HoistStops,
530                 DenseMap<Instruction *, bool> &Visited) {
531   assert(InsertPoint && "Null InsertPoint");
532   if (auto *I = dyn_cast<Instruction>(V)) {
533     auto It = Visited.find(I);
534     if (It != Visited.end()) {
535       return It->second;
536     }
537     assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
538     assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
539     if (Unhoistables.count(I)) {
540       // Don't hoist if they are not to be hoisted.
541       Visited[I] = false;
542       return false;
543     }
544     if (DT.dominates(I, InsertPoint)) {
545       // We are already above the insert point. Stop here.
546       if (HoistStops)
547         HoistStops->insert(I);
548       Visited[I] = true;
549       return true;
550     }
551     // We aren't not above the insert point, check if we can hoist it above the
552     // insert point.
553     if (isHoistable(I, DT)) {
554       // Check operands first.
555       DenseSet<Instruction *> OpsHoistStops;
556       bool AllOpsHoisted = true;
557       for (Value *Op : I->operands()) {
558         if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
559                              Visited)) {
560           AllOpsHoisted = false;
561           break;
562         }
563       }
564       if (AllOpsHoisted) {
565         CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
566         if (HoistStops)
567           HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
568         Visited[I] = true;
569         return true;
570       }
571     }
572     Visited[I] = false;
573     return false;
574   }
575   // Non-instructions are considered hoistable.
576   return true;
577 }
578 
579 // Constructs the true and false branch probabilities if the the instruction has
580 // valid branch weights. Returns true when this was successful, false otherwise.
extractBranchProbabilities(Instruction * I,BranchProbability & TrueProb,BranchProbability & FalseProb)581 static bool extractBranchProbabilities(Instruction *I,
582                                        BranchProbability &TrueProb,
583                                        BranchProbability &FalseProb) {
584   uint64_t TrueWeight;
585   uint64_t FalseWeight;
586   if (!extractBranchWeights(*I, TrueWeight, FalseWeight))
587     return false;
588   uint64_t SumWeight = TrueWeight + FalseWeight;
589 
590   assert(SumWeight >= TrueWeight && SumWeight >= FalseWeight &&
591          "Overflow calculating branch probabilities.");
592 
593   // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
594   if (SumWeight == 0)
595     return false;
596 
597   TrueProb = BranchProbability::getBranchProbability(TrueWeight, SumWeight);
598   FalseProb = BranchProbability::getBranchProbability(FalseWeight, SumWeight);
599   return true;
600 }
601 
getCHRBiasThreshold()602 static BranchProbability getCHRBiasThreshold() {
603   return BranchProbability::getBranchProbability(
604       static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
605 }
606 
607 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
608 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
609 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
610 // false.
611 template <typename K, typename S, typename M>
checkBias(K * Key,BranchProbability TrueProb,BranchProbability FalseProb,S & TrueSet,S & FalseSet,M & BiasMap)612 static bool checkBias(K *Key, BranchProbability TrueProb,
613                       BranchProbability FalseProb, S &TrueSet, S &FalseSet,
614                       M &BiasMap) {
615   BranchProbability Threshold = getCHRBiasThreshold();
616   if (TrueProb >= Threshold) {
617     TrueSet.insert(Key);
618     BiasMap[Key] = TrueProb;
619     return true;
620   } else if (FalseProb >= Threshold) {
621     FalseSet.insert(Key);
622     BiasMap[Key] = FalseProb;
623     return true;
624   }
625   return false;
626 }
627 
628 // Returns true and insert a region into the right biased set and the map if the
629 // branch of the region is biased.
checkBiasedBranch(BranchInst * BI,Region * R,DenseSet<Region * > & TrueBiasedRegionsGlobal,DenseSet<Region * > & FalseBiasedRegionsGlobal,DenseMap<Region *,BranchProbability> & BranchBiasMap)630 static bool checkBiasedBranch(BranchInst *BI, Region *R,
631                               DenseSet<Region *> &TrueBiasedRegionsGlobal,
632                               DenseSet<Region *> &FalseBiasedRegionsGlobal,
633                               DenseMap<Region *, BranchProbability> &BranchBiasMap) {
634   if (!BI->isConditional())
635     return false;
636   BranchProbability ThenProb, ElseProb;
637   if (!extractBranchProbabilities(BI, ThenProb, ElseProb))
638     return false;
639   BasicBlock *IfThen = BI->getSuccessor(0);
640   BasicBlock *IfElse = BI->getSuccessor(1);
641   assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
642          IfThen != IfElse &&
643          "Invariant from findScopes");
644   if (IfThen == R->getExit()) {
645     // Swap them so that IfThen/ThenProb means going into the conditional code
646     // and IfElse/ElseProb means skipping it.
647     std::swap(IfThen, IfElse);
648     std::swap(ThenProb, ElseProb);
649   }
650   CHR_DEBUG(dbgs() << "BI " << *BI << " ");
651   CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
652   CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
653   return checkBias(R, ThenProb, ElseProb,
654                    TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
655                    BranchBiasMap);
656 }
657 
658 // Returns true and insert a select into the right biased set and the map if the
659 // select is biased.
checkBiasedSelect(SelectInst * SI,Region * R,DenseSet<SelectInst * > & TrueBiasedSelectsGlobal,DenseSet<SelectInst * > & FalseBiasedSelectsGlobal,DenseMap<SelectInst *,BranchProbability> & SelectBiasMap)660 static bool checkBiasedSelect(
661     SelectInst *SI, Region *R,
662     DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
663     DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
664     DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
665   BranchProbability TrueProb, FalseProb;
666   if (!extractBranchProbabilities(SI, TrueProb, FalseProb))
667     return false;
668   CHR_DEBUG(dbgs() << "SI " << *SI << " ");
669   CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
670   CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
671   return checkBias(SI, TrueProb, FalseProb,
672                    TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
673                    SelectBiasMap);
674 }
675 
676 // Returns the instruction at which to hoist the dependent condition values and
677 // insert the CHR branch for a region. This is the terminator branch in the
678 // entry block or the first select in the entry block, if any.
getBranchInsertPoint(RegInfo & RI)679 static Instruction* getBranchInsertPoint(RegInfo &RI) {
680   Region *R = RI.R;
681   BasicBlock *EntryBB = R->getEntry();
682   // The hoist point is by default the terminator of the entry block, which is
683   // the same as the branch instruction if RI.HasBranch is true.
684   Instruction *HoistPoint = EntryBB->getTerminator();
685   for (SelectInst *SI : RI.Selects) {
686     if (SI->getParent() == EntryBB) {
687       // Pick the first select in Selects in the entry block.  Note Selects is
688       // sorted in the instruction order within a block (asserted below).
689       HoistPoint = SI;
690       break;
691     }
692   }
693   assert(HoistPoint && "Null HoistPoint");
694 #ifndef NDEBUG
695   // Check that HoistPoint is the first one in Selects in the entry block,
696   // if any.
697   DenseSet<Instruction *> EntryBlockSelectSet;
698   for (SelectInst *SI : RI.Selects) {
699     if (SI->getParent() == EntryBB) {
700       EntryBlockSelectSet.insert(SI);
701     }
702   }
703   for (Instruction &I : *EntryBB) {
704     if (EntryBlockSelectSet.contains(&I)) {
705       assert(&I == HoistPoint &&
706              "HoistPoint must be the first one in Selects");
707       break;
708     }
709   }
710 #endif
711   return HoistPoint;
712 }
713 
714 // Find a CHR scope in the given region.
findScope(Region * R)715 CHRScope * CHR::findScope(Region *R) {
716   CHRScope *Result = nullptr;
717   BasicBlock *Entry = R->getEntry();
718   BasicBlock *Exit = R->getExit();  // null if top level.
719   assert(Entry && "Entry must not be null");
720   assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
721          "Only top level region has a null exit");
722   if (Entry)
723     CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
724   else
725     CHR_DEBUG(dbgs() << "Entry null\n");
726   if (Exit)
727     CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
728   else
729     CHR_DEBUG(dbgs() << "Exit null\n");
730   // Exclude cases where Entry is part of a subregion (hence it doesn't belong
731   // to this region).
732   bool EntryInSubregion = RI.getRegionFor(Entry) != R;
733   if (EntryInSubregion)
734     return nullptr;
735   // Exclude loops
736   for (BasicBlock *Pred : predecessors(Entry))
737     if (R->contains(Pred))
738       return nullptr;
739   // If any of the basic blocks have address taken, we must skip this region
740   // because we cannot clone basic blocks that have address taken.
741   for (BasicBlock *BB : R->blocks()) {
742     if (BB->hasAddressTaken())
743       return nullptr;
744     // If we encounter llvm.coro.id, skip this region because if the basic block
745     // is cloned, we end up inserting a token type PHI node to the block with
746     // llvm.coro.begin.
747     // FIXME: This could lead to less optimal codegen, because the region is
748     // excluded, it can prevent CHR from merging adjacent regions into bigger
749     // scope and hoisting more branches.
750     for (Instruction &I : *BB)
751       if (auto *II = dyn_cast<IntrinsicInst>(&I))
752         if (II->getIntrinsicID() == Intrinsic::coro_id)
753           return nullptr;
754   }
755 
756   if (Exit) {
757     // Try to find an if-then block (check if R is an if-then).
758     // if (cond) {
759     //  ...
760     // }
761     auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
762     if (BI)
763       CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
764     else
765       CHR_DEBUG(dbgs() << "BI null\n");
766     if (BI && BI->isConditional()) {
767       BasicBlock *S0 = BI->getSuccessor(0);
768       BasicBlock *S1 = BI->getSuccessor(1);
769       CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
770       CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
771       if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
772         RegInfo RI(R);
773         RI.HasBranch = checkBiasedBranch(
774             BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
775             BranchBiasMap);
776         Result = new CHRScope(RI);
777         Scopes.insert(Result);
778         CHR_DEBUG(dbgs() << "Found a region with a branch\n");
779         ++Stats.NumBranches;
780         if (!RI.HasBranch) {
781           ORE.emit([&]() {
782             return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
783                 << "Branch not biased";
784           });
785         }
786       }
787     }
788   }
789   {
790     // Try to look for selects in the direct child blocks (as opposed to in
791     // subregions) of R.
792     // ...
793     // if (..) { // Some subregion
794     //   ...
795     // }
796     // if (..) { // Some subregion
797     //   ...
798     // }
799     // ...
800     // a = cond ? b : c;
801     // ...
802     SmallVector<SelectInst *, 8> Selects;
803     for (RegionNode *E : R->elements()) {
804       if (E->isSubRegion())
805         continue;
806       // This returns the basic block of E if E is a direct child of R (not a
807       // subregion.)
808       BasicBlock *BB = E->getEntry();
809       // Need to push in the order to make it easier to find the first Select
810       // later.
811       for (Instruction &I : *BB) {
812         if (auto *SI = dyn_cast<SelectInst>(&I)) {
813           Selects.push_back(SI);
814           ++Stats.NumBranches;
815         }
816       }
817     }
818     if (Selects.size() > 0) {
819       auto AddSelects = [&](RegInfo &RI) {
820         for (auto *SI : Selects)
821           if (checkBiasedSelect(SI, RI.R,
822                                 TrueBiasedSelectsGlobal,
823                                 FalseBiasedSelectsGlobal,
824                                 SelectBiasMap))
825             RI.Selects.push_back(SI);
826           else
827             ORE.emit([&]() {
828               return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
829                   << "Select not biased";
830             });
831       };
832       if (!Result) {
833         CHR_DEBUG(dbgs() << "Found a select-only region\n");
834         RegInfo RI(R);
835         AddSelects(RI);
836         Result = new CHRScope(RI);
837         Scopes.insert(Result);
838       } else {
839         CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
840         AddSelects(Result->RegInfos[0]);
841       }
842     }
843   }
844 
845   if (Result) {
846     checkScopeHoistable(Result);
847   }
848   return Result;
849 }
850 
851 // Check that any of the branch and the selects in the region could be
852 // hoisted above the the CHR branch insert point (the most dominating of
853 // them, either the branch (at the end of the first block) or the first
854 // select in the first block). If the branch can't be hoisted, drop the
855 // selects in the first blocks.
856 //
857 // For example, for the following scope/region with selects, we want to insert
858 // the merged branch right before the first select in the first/entry block by
859 // hoisting c1, c2, c3, and c4.
860 //
861 // // Branch insert point here.
862 // a = c1 ? b : c; // Select 1
863 // d = c2 ? e : f; // Select 2
864 // if (c3) { // Branch
865 //   ...
866 //   c4 = foo() // A call.
867 //   g = c4 ? h : i; // Select 3
868 // }
869 //
870 // But suppose we can't hoist c4 because it's dependent on the preceding
871 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
872 // Select 2. If we can't hoist c3, we drop Selects 1 & 2.
checkScopeHoistable(CHRScope * Scope)873 void CHR::checkScopeHoistable(CHRScope *Scope) {
874   RegInfo &RI = Scope->RegInfos[0];
875   Region *R = RI.R;
876   BasicBlock *EntryBB = R->getEntry();
877   auto *Branch = RI.HasBranch ?
878                  cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
879   SmallVector<SelectInst *, 8> &Selects = RI.Selects;
880   if (RI.HasBranch || !Selects.empty()) {
881     Instruction *InsertPoint = getBranchInsertPoint(RI);
882     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
883     // Avoid a data dependence from a select or a branch to a(nother)
884     // select. Note no instruction can't data-depend on a branch (a branch
885     // instruction doesn't produce a value).
886     DenseSet<Instruction *> Unhoistables;
887     // Initialize Unhoistables with the selects.
888     for (SelectInst *SI : Selects) {
889       Unhoistables.insert(SI);
890     }
891     // Remove Selects that can't be hoisted.
892     for (auto it = Selects.begin(); it != Selects.end(); ) {
893       SelectInst *SI = *it;
894       if (SI == InsertPoint) {
895         ++it;
896         continue;
897       }
898       DenseMap<Instruction *, bool> Visited;
899       bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
900                                          DT, Unhoistables, nullptr, Visited);
901       if (!IsHoistable) {
902         CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
903         ORE.emit([&]() {
904           return OptimizationRemarkMissed(DEBUG_TYPE,
905                                           "DropUnhoistableSelect", SI)
906               << "Dropped unhoistable select";
907         });
908         it = Selects.erase(it);
909         // Since we are dropping the select here, we also drop it from
910         // Unhoistables.
911         Unhoistables.erase(SI);
912       } else
913         ++it;
914     }
915     // Update InsertPoint after potentially removing selects.
916     InsertPoint = getBranchInsertPoint(RI);
917     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
918     if (RI.HasBranch && InsertPoint != Branch) {
919       DenseMap<Instruction *, bool> Visited;
920       bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
921                                          DT, Unhoistables, nullptr, Visited);
922       if (!IsHoistable) {
923         // If the branch isn't hoistable, drop the selects in the entry
924         // block, preferring the branch, which makes the branch the hoist
925         // point.
926         assert(InsertPoint != Branch && "Branch must not be the hoist point");
927         CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
928         CHR_DEBUG(
929             for (SelectInst *SI : Selects) {
930               dbgs() << "SI " << *SI << "\n";
931             });
932         for (SelectInst *SI : Selects) {
933           ORE.emit([&]() {
934             return OptimizationRemarkMissed(DEBUG_TYPE,
935                                             "DropSelectUnhoistableBranch", SI)
936                 << "Dropped select due to unhoistable branch";
937           });
938         }
939         llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
940           return SI->getParent() == EntryBB;
941         });
942         Unhoistables.clear();
943         InsertPoint = Branch;
944       }
945     }
946     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
947 #ifndef NDEBUG
948     if (RI.HasBranch) {
949       assert(!DT.dominates(Branch, InsertPoint) &&
950              "Branch can't be already above the hoist point");
951       DenseMap<Instruction *, bool> Visited;
952       assert(checkHoistValue(Branch->getCondition(), InsertPoint,
953                              DT, Unhoistables, nullptr, Visited) &&
954              "checkHoistValue for branch");
955     }
956     for (auto *SI : Selects) {
957       assert(!DT.dominates(SI, InsertPoint) &&
958              "SI can't be already above the hoist point");
959       DenseMap<Instruction *, bool> Visited;
960       assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
961                              Unhoistables, nullptr, Visited) &&
962              "checkHoistValue for selects");
963     }
964     CHR_DEBUG(dbgs() << "Result\n");
965     if (RI.HasBranch) {
966       CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
967     }
968     for (auto *SI : Selects) {
969       CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
970     }
971 #endif
972   }
973 }
974 
975 // Traverse the region tree, find all nested scopes and merge them if possible.
findScopes(Region * R,Region * NextRegion,Region * ParentRegion,SmallVectorImpl<CHRScope * > & Scopes)976 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
977                            SmallVectorImpl<CHRScope *> &Scopes) {
978   CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
979   CHRScope *Result = findScope(R);
980   // Visit subscopes.
981   CHRScope *ConsecutiveSubscope = nullptr;
982   SmallVector<CHRScope *, 8> Subscopes;
983   for (auto It = R->begin(); It != R->end(); ++It) {
984     const std::unique_ptr<Region> &SubR = *It;
985     auto NextIt = std::next(It);
986     Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
987     CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
988               << "\n");
989     CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
990     if (SubCHRScope) {
991       CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
992     } else {
993       CHR_DEBUG(dbgs() << "Subregion Scope null\n");
994     }
995     if (SubCHRScope) {
996       if (!ConsecutiveSubscope)
997         ConsecutiveSubscope = SubCHRScope;
998       else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
999         Subscopes.push_back(ConsecutiveSubscope);
1000         ConsecutiveSubscope = SubCHRScope;
1001       } else
1002         ConsecutiveSubscope->append(SubCHRScope);
1003     } else {
1004       if (ConsecutiveSubscope) {
1005         Subscopes.push_back(ConsecutiveSubscope);
1006       }
1007       ConsecutiveSubscope = nullptr;
1008     }
1009   }
1010   if (ConsecutiveSubscope) {
1011     Subscopes.push_back(ConsecutiveSubscope);
1012   }
1013   for (CHRScope *Sub : Subscopes) {
1014     if (Result) {
1015       // Combine it with the parent.
1016       Result->addSub(Sub);
1017     } else {
1018       // Push Subscopes as they won't be combined with the parent.
1019       Scopes.push_back(Sub);
1020     }
1021   }
1022   return Result;
1023 }
1024 
getCHRConditionValuesForRegion(RegInfo & RI)1025 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
1026   DenseSet<Value *> ConditionValues;
1027   if (RI.HasBranch) {
1028     auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1029     ConditionValues.insert(BI->getCondition());
1030   }
1031   for (SelectInst *SI : RI.Selects) {
1032     ConditionValues.insert(SI->getCondition());
1033   }
1034   return ConditionValues;
1035 }
1036 
1037 
1038 // Determine whether to split a scope depending on the sets of the branch
1039 // condition values of the previous region and the current region. We split
1040 // (return true) it if 1) the condition values of the inner/lower scope can't be
1041 // hoisted up to the outer/upper scope, or 2) the two sets of the condition
1042 // values have an empty intersection (because the combined branch conditions
1043 // won't probably lead to a simpler combined condition).
shouldSplit(Instruction * InsertPoint,DenseSet<Value * > & PrevConditionValues,DenseSet<Value * > & ConditionValues,DominatorTree & DT,DenseSet<Instruction * > & Unhoistables)1044 static bool shouldSplit(Instruction *InsertPoint,
1045                         DenseSet<Value *> &PrevConditionValues,
1046                         DenseSet<Value *> &ConditionValues,
1047                         DominatorTree &DT,
1048                         DenseSet<Instruction *> &Unhoistables) {
1049   assert(InsertPoint && "Null InsertPoint");
1050   CHR_DEBUG(
1051       dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1052       for (Value *V : PrevConditionValues) {
1053         dbgs() << *V << ", ";
1054       }
1055       dbgs() << " ConditionValues ";
1056       for (Value *V : ConditionValues) {
1057         dbgs() << *V << ", ";
1058       }
1059       dbgs() << "\n");
1060   // If any of Bases isn't hoistable to the hoist point, split.
1061   for (Value *V : ConditionValues) {
1062     DenseMap<Instruction *, bool> Visited;
1063     if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1064       CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1065       return true; // Not hoistable, split.
1066     }
1067   }
1068   // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1069   // unnecessary splits at scopes with no branch/selects.  If
1070   // PrevConditionValues and ConditionValues don't intersect at all, split.
1071   if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1072     // Use std::set as DenseSet doesn't work with set_intersection.
1073     std::set<Value *> PrevBases, Bases;
1074     DenseMap<Value *, std::set<Value *>> Visited;
1075     for (Value *V : PrevConditionValues) {
1076       const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1077       PrevBases.insert(BaseValues.begin(), BaseValues.end());
1078     }
1079     for (Value *V : ConditionValues) {
1080       const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1081       Bases.insert(BaseValues.begin(), BaseValues.end());
1082     }
1083     CHR_DEBUG(
1084         dbgs() << "PrevBases ";
1085         for (Value *V : PrevBases) {
1086           dbgs() << *V << ", ";
1087         }
1088         dbgs() << " Bases ";
1089         for (Value *V : Bases) {
1090           dbgs() << *V << ", ";
1091         }
1092         dbgs() << "\n");
1093     std::vector<Value *> Intersection;
1094     std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
1095                           Bases.end(), std::back_inserter(Intersection));
1096     if (Intersection.empty()) {
1097       // Empty intersection, split.
1098       CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1099       return true;
1100     }
1101   }
1102   CHR_DEBUG(dbgs() << "No split\n");
1103   return false;  // Don't split.
1104 }
1105 
getSelectsInScope(CHRScope * Scope,DenseSet<Instruction * > & Output)1106 static void getSelectsInScope(CHRScope *Scope,
1107                               DenseSet<Instruction *> &Output) {
1108   for (RegInfo &RI : Scope->RegInfos)
1109     for (SelectInst *SI : RI.Selects)
1110       Output.insert(SI);
1111   for (CHRScope *Sub : Scope->Subs)
1112     getSelectsInScope(Sub, Output);
1113 }
1114 
splitScopes(SmallVectorImpl<CHRScope * > & Input,SmallVectorImpl<CHRScope * > & Output)1115 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1116                       SmallVectorImpl<CHRScope *> &Output) {
1117   for (CHRScope *Scope : Input) {
1118     assert(!Scope->BranchInsertPoint &&
1119            "BranchInsertPoint must not be set");
1120     DenseSet<Instruction *> Unhoistables;
1121     getSelectsInScope(Scope, Unhoistables);
1122     splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1123   }
1124 #ifndef NDEBUG
1125   for (CHRScope *Scope : Output) {
1126     assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1127   }
1128 #endif
1129 }
1130 
splitScope(CHRScope * Scope,CHRScope * Outer,DenseSet<Value * > * OuterConditionValues,Instruction * OuterInsertPoint,SmallVectorImpl<CHRScope * > & Output,DenseSet<Instruction * > & Unhoistables)1131 SmallVector<CHRScope *, 8> CHR::splitScope(
1132     CHRScope *Scope,
1133     CHRScope *Outer,
1134     DenseSet<Value *> *OuterConditionValues,
1135     Instruction *OuterInsertPoint,
1136     SmallVectorImpl<CHRScope *> &Output,
1137     DenseSet<Instruction *> &Unhoistables) {
1138   if (Outer) {
1139     assert(OuterConditionValues && "Null OuterConditionValues");
1140     assert(OuterInsertPoint && "Null OuterInsertPoint");
1141   }
1142   bool PrevSplitFromOuter = true;
1143   DenseSet<Value *> PrevConditionValues;
1144   Instruction *PrevInsertPoint = nullptr;
1145   SmallVector<CHRScope *, 8> Splits;
1146   SmallVector<bool, 8> SplitsSplitFromOuter;
1147   SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1148   SmallVector<Instruction *, 8> SplitsInsertPoints;
1149   SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos);  // Copy
1150   for (RegInfo &RI : RegInfos) {
1151     Instruction *InsertPoint = getBranchInsertPoint(RI);
1152     DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1153     CHR_DEBUG(
1154         dbgs() << "ConditionValues ";
1155         for (Value *V : ConditionValues) {
1156           dbgs() << *V << ", ";
1157         }
1158         dbgs() << "\n");
1159     if (RI.R == RegInfos[0].R) {
1160       // First iteration. Check to see if we should split from the outer.
1161       if (Outer) {
1162         CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1163         CHR_DEBUG(dbgs() << "Should split from outer at "
1164                   << RI.R->getNameStr() << "\n");
1165         if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1166                         ConditionValues, DT, Unhoistables)) {
1167           PrevConditionValues = ConditionValues;
1168           PrevInsertPoint = InsertPoint;
1169           ORE.emit([&]() {
1170             return OptimizationRemarkMissed(DEBUG_TYPE,
1171                                             "SplitScopeFromOuter",
1172                                             RI.R->getEntry()->getTerminator())
1173                 << "Split scope from outer due to unhoistable branch/select "
1174                 << "and/or lack of common condition values";
1175           });
1176         } else {
1177           // Not splitting from the outer. Use the outer bases and insert
1178           // point. Union the bases.
1179           PrevSplitFromOuter = false;
1180           PrevConditionValues = *OuterConditionValues;
1181           PrevConditionValues.insert(ConditionValues.begin(),
1182                                      ConditionValues.end());
1183           PrevInsertPoint = OuterInsertPoint;
1184         }
1185       } else {
1186         CHR_DEBUG(dbgs() << "Outer null\n");
1187         PrevConditionValues = ConditionValues;
1188         PrevInsertPoint = InsertPoint;
1189       }
1190     } else {
1191       CHR_DEBUG(dbgs() << "Should split from prev at "
1192                 << RI.R->getNameStr() << "\n");
1193       if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1194                       DT, Unhoistables)) {
1195         CHRScope *Tail = Scope->split(RI.R);
1196         Scopes.insert(Tail);
1197         Splits.push_back(Scope);
1198         SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1199         SplitsConditionValues.push_back(PrevConditionValues);
1200         SplitsInsertPoints.push_back(PrevInsertPoint);
1201         Scope = Tail;
1202         PrevConditionValues = ConditionValues;
1203         PrevInsertPoint = InsertPoint;
1204         PrevSplitFromOuter = true;
1205         ORE.emit([&]() {
1206           return OptimizationRemarkMissed(DEBUG_TYPE,
1207                                           "SplitScopeFromPrev",
1208                                           RI.R->getEntry()->getTerminator())
1209               << "Split scope from previous due to unhoistable branch/select "
1210               << "and/or lack of common condition values";
1211         });
1212       } else {
1213         // Not splitting. Union the bases. Keep the hoist point.
1214         PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1215       }
1216     }
1217   }
1218   Splits.push_back(Scope);
1219   SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1220   SplitsConditionValues.push_back(PrevConditionValues);
1221   assert(PrevInsertPoint && "Null PrevInsertPoint");
1222   SplitsInsertPoints.push_back(PrevInsertPoint);
1223   assert(Splits.size() == SplitsConditionValues.size() &&
1224          Splits.size() == SplitsSplitFromOuter.size() &&
1225          Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1226   for (size_t I = 0; I < Splits.size(); ++I) {
1227     CHRScope *Split = Splits[I];
1228     DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1229     Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1230     SmallVector<CHRScope *, 8> NewSubs;
1231     DenseSet<Instruction *> SplitUnhoistables;
1232     getSelectsInScope(Split, SplitUnhoistables);
1233     for (CHRScope *Sub : Split->Subs) {
1234       SmallVector<CHRScope *, 8> SubSplits = splitScope(
1235           Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1236           SplitUnhoistables);
1237       llvm::append_range(NewSubs, SubSplits);
1238     }
1239     Split->Subs = NewSubs;
1240   }
1241   SmallVector<CHRScope *, 8> Result;
1242   for (size_t I = 0; I < Splits.size(); ++I) {
1243     CHRScope *Split = Splits[I];
1244     if (SplitsSplitFromOuter[I]) {
1245       // Split from the outer.
1246       Output.push_back(Split);
1247       Split->BranchInsertPoint = SplitsInsertPoints[I];
1248       CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1249                 << "\n");
1250     } else {
1251       // Connected to the outer.
1252       Result.push_back(Split);
1253     }
1254   }
1255   if (!Outer)
1256     assert(Result.empty() &&
1257            "If no outer (top-level), must return no nested ones");
1258   return Result;
1259 }
1260 
classifyBiasedScopes(SmallVectorImpl<CHRScope * > & Scopes)1261 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1262   for (CHRScope *Scope : Scopes) {
1263     assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1264     classifyBiasedScopes(Scope, Scope);
1265     CHR_DEBUG(
1266         dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1267         dbgs() << "TrueBiasedRegions ";
1268         for (Region *R : Scope->TrueBiasedRegions) {
1269           dbgs() << R->getNameStr() << ", ";
1270         }
1271         dbgs() << "\n";
1272         dbgs() << "FalseBiasedRegions ";
1273         for (Region *R : Scope->FalseBiasedRegions) {
1274           dbgs() << R->getNameStr() << ", ";
1275         }
1276         dbgs() << "\n";
1277         dbgs() << "TrueBiasedSelects ";
1278         for (SelectInst *SI : Scope->TrueBiasedSelects) {
1279           dbgs() << *SI << ", ";
1280         }
1281         dbgs() << "\n";
1282         dbgs() << "FalseBiasedSelects ";
1283         for (SelectInst *SI : Scope->FalseBiasedSelects) {
1284           dbgs() << *SI << ", ";
1285         }
1286         dbgs() << "\n";);
1287   }
1288 }
1289 
classifyBiasedScopes(CHRScope * Scope,CHRScope * OutermostScope)1290 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1291   for (RegInfo &RI : Scope->RegInfos) {
1292     if (RI.HasBranch) {
1293       Region *R = RI.R;
1294       if (TrueBiasedRegionsGlobal.contains(R))
1295         OutermostScope->TrueBiasedRegions.insert(R);
1296       else if (FalseBiasedRegionsGlobal.contains(R))
1297         OutermostScope->FalseBiasedRegions.insert(R);
1298       else
1299         llvm_unreachable("Must be biased");
1300     }
1301     for (SelectInst *SI : RI.Selects) {
1302       if (TrueBiasedSelectsGlobal.contains(SI))
1303         OutermostScope->TrueBiasedSelects.insert(SI);
1304       else if (FalseBiasedSelectsGlobal.contains(SI))
1305         OutermostScope->FalseBiasedSelects.insert(SI);
1306       else
1307         llvm_unreachable("Must be biased");
1308     }
1309   }
1310   for (CHRScope *Sub : Scope->Subs) {
1311     classifyBiasedScopes(Sub, OutermostScope);
1312   }
1313 }
1314 
hasAtLeastTwoBiasedBranches(CHRScope * Scope)1315 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1316   unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1317                        Scope->FalseBiasedRegions.size() +
1318                        Scope->TrueBiasedSelects.size() +
1319                        Scope->FalseBiasedSelects.size();
1320   return NumBiased >= CHRMergeThreshold;
1321 }
1322 
filterScopes(SmallVectorImpl<CHRScope * > & Input,SmallVectorImpl<CHRScope * > & Output)1323 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1324                        SmallVectorImpl<CHRScope *> &Output) {
1325   for (CHRScope *Scope : Input) {
1326     // Filter out the ones with only one region and no subs.
1327     if (!hasAtLeastTwoBiasedBranches(Scope)) {
1328       CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1329                 << Scope->TrueBiasedRegions.size()
1330                 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1331                 << " true-selects " << Scope->TrueBiasedSelects.size()
1332                 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1333       ORE.emit([&]() {
1334         return OptimizationRemarkMissed(
1335             DEBUG_TYPE,
1336             "DropScopeWithOneBranchOrSelect",
1337             Scope->RegInfos[0].R->getEntry()->getTerminator())
1338             << "Drop scope with < "
1339             << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1340             << " biased branch(es) or select(s)";
1341       });
1342       continue;
1343     }
1344     Output.push_back(Scope);
1345   }
1346 }
1347 
setCHRRegions(SmallVectorImpl<CHRScope * > & Input,SmallVectorImpl<CHRScope * > & Output)1348 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1349                         SmallVectorImpl<CHRScope *> &Output) {
1350   for (CHRScope *Scope : Input) {
1351     assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1352            "Empty");
1353     setCHRRegions(Scope, Scope);
1354     Output.push_back(Scope);
1355     CHR_DEBUG(
1356         dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1357         for (auto pair : Scope->HoistStopMap) {
1358           Region *R = pair.first;
1359           dbgs() << "Region " << R->getNameStr() << "\n";
1360           for (Instruction *I : pair.second) {
1361             dbgs() << "HoistStop " << *I << "\n";
1362           }
1363         }
1364         dbgs() << "CHRRegions" << "\n";
1365         for (RegInfo &RI : Scope->CHRRegions) {
1366           dbgs() << RI.R->getNameStr() << "\n";
1367         });
1368   }
1369 }
1370 
setCHRRegions(CHRScope * Scope,CHRScope * OutermostScope)1371 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1372   DenseSet<Instruction *> Unhoistables;
1373   // Put the biased selects in Unhoistables because they should stay where they
1374   // are and constant-folded after CHR (in case one biased select or a branch
1375   // can depend on another biased select.)
1376   for (RegInfo &RI : Scope->RegInfos) {
1377     for (SelectInst *SI : RI.Selects) {
1378       Unhoistables.insert(SI);
1379     }
1380   }
1381   Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1382   for (RegInfo &RI : Scope->RegInfos) {
1383     Region *R = RI.R;
1384     DenseSet<Instruction *> HoistStops;
1385     bool IsHoisted = false;
1386     if (RI.HasBranch) {
1387       assert((OutermostScope->TrueBiasedRegions.contains(R) ||
1388               OutermostScope->FalseBiasedRegions.contains(R)) &&
1389              "Must be truthy or falsy");
1390       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1391       // Note checkHoistValue fills in HoistStops.
1392       DenseMap<Instruction *, bool> Visited;
1393       bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1394                                          Unhoistables, &HoistStops, Visited);
1395       assert(IsHoistable && "Must be hoistable");
1396       (void)(IsHoistable);  // Unused in release build
1397       IsHoisted = true;
1398     }
1399     for (SelectInst *SI : RI.Selects) {
1400       assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
1401               OutermostScope->FalseBiasedSelects.contains(SI)) &&
1402              "Must be true or false biased");
1403       // Note checkHoistValue fills in HoistStops.
1404       DenseMap<Instruction *, bool> Visited;
1405       bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1406                                          Unhoistables, &HoistStops, Visited);
1407       assert(IsHoistable && "Must be hoistable");
1408       (void)(IsHoistable);  // Unused in release build
1409       IsHoisted = true;
1410     }
1411     if (IsHoisted) {
1412       OutermostScope->CHRRegions.push_back(RI);
1413       OutermostScope->HoistStopMap[R] = HoistStops;
1414     }
1415   }
1416   for (CHRScope *Sub : Scope->Subs)
1417     setCHRRegions(Sub, OutermostScope);
1418 }
1419 
CHRScopeSorter(CHRScope * Scope1,CHRScope * Scope2)1420 static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1421   return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1422 }
1423 
sortScopes(SmallVectorImpl<CHRScope * > & Input,SmallVectorImpl<CHRScope * > & Output)1424 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1425                      SmallVectorImpl<CHRScope *> &Output) {
1426   Output.resize(Input.size());
1427   llvm::copy(Input, Output.begin());
1428   llvm::stable_sort(Output, CHRScopeSorter);
1429 }
1430 
1431 // Return true if V is already hoisted or was hoisted (along with its operands)
1432 // to the insert point.
hoistValue(Value * V,Instruction * HoistPoint,Region * R,HoistStopMapTy & HoistStopMap,DenseSet<Instruction * > & HoistedSet,DenseSet<PHINode * > & TrivialPHIs,DominatorTree & DT)1433 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1434                        HoistStopMapTy &HoistStopMap,
1435                        DenseSet<Instruction *> &HoistedSet,
1436                        DenseSet<PHINode *> &TrivialPHIs,
1437                        DominatorTree &DT) {
1438   auto IT = HoistStopMap.find(R);
1439   assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1440   DenseSet<Instruction *> &HoistStops = IT->second;
1441   if (auto *I = dyn_cast<Instruction>(V)) {
1442     if (I == HoistPoint)
1443       return;
1444     if (HoistStops.count(I))
1445       return;
1446     if (auto *PN = dyn_cast<PHINode>(I))
1447       if (TrivialPHIs.count(PN))
1448         // The trivial phi inserted by the previous CHR scope could replace a
1449         // non-phi in HoistStops. Note that since this phi is at the exit of a
1450         // previous CHR scope, which dominates this scope, it's safe to stop
1451         // hoisting there.
1452         return;
1453     if (HoistedSet.count(I))
1454       // Already hoisted, return.
1455       return;
1456     assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1457     assert(DT.getNode(I->getParent()) && "DT must contain I's block");
1458     assert(DT.getNode(HoistPoint->getParent()) &&
1459            "DT must contain HoistPoint block");
1460     if (DT.dominates(I, HoistPoint))
1461       // We are already above the hoist point. Stop here. This may be necessary
1462       // when multiple scopes would independently hoist the same
1463       // instruction. Since an outer (dominating) scope would hoist it to its
1464       // entry before an inner (dominated) scope would to its entry, the inner
1465       // scope may see the instruction already hoisted, in which case it
1466       // potentially wrong for the inner scope to hoist it and could cause bad
1467       // IR (non-dominating def), but safe to skip hoisting it instead because
1468       // it's already in a block that dominates the inner scope.
1469       return;
1470     for (Value *Op : I->operands()) {
1471       hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1472     }
1473     I->moveBefore(HoistPoint);
1474     HoistedSet.insert(I);
1475     CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1476   }
1477 }
1478 
1479 // Hoist the dependent condition values of the branches and the selects in the
1480 // scope to the insert point.
hoistScopeConditions(CHRScope * Scope,Instruction * HoistPoint,DenseSet<PHINode * > & TrivialPHIs,DominatorTree & DT)1481 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1482                                  DenseSet<PHINode *> &TrivialPHIs,
1483                                  DominatorTree &DT) {
1484   DenseSet<Instruction *> HoistedSet;
1485   for (const RegInfo &RI : Scope->CHRRegions) {
1486     Region *R = RI.R;
1487     bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1488     bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1489     if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1490       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1491       hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1492                  HoistedSet, TrivialPHIs, DT);
1493     }
1494     for (SelectInst *SI : RI.Selects) {
1495       bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1496       bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1497       if (!(IsTrueBiased || IsFalseBiased))
1498         continue;
1499       hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1500                  HoistedSet, TrivialPHIs, DT);
1501     }
1502   }
1503 }
1504 
1505 // Negate the predicate if an ICmp if it's used only by branches or selects by
1506 // swapping the operands of the branches or the selects. Returns true if success.
negateICmpIfUsedByBranchOrSelectOnly(ICmpInst * ICmp,Instruction * ExcludedUser,CHRScope * Scope)1507 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1508                                                  Instruction *ExcludedUser,
1509                                                  CHRScope *Scope) {
1510   for (User *U : ICmp->users()) {
1511     if (U == ExcludedUser)
1512       continue;
1513     if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1514       continue;
1515     if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1516       continue;
1517     return false;
1518   }
1519   for (User *U : ICmp->users()) {
1520     if (U == ExcludedUser)
1521       continue;
1522     if (auto *BI = dyn_cast<BranchInst>(U)) {
1523       assert(BI->isConditional() && "Must be conditional");
1524       BI->swapSuccessors();
1525       // Don't need to swap this in terms of
1526       // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1527       // mean whehter the branch is likely go into the if-then rather than
1528       // successor0/successor1 and because we can tell which edge is the then or
1529       // the else one by comparing the destination to the region exit block.
1530       continue;
1531     }
1532     if (auto *SI = dyn_cast<SelectInst>(U)) {
1533       // Swap operands
1534       SI->swapValues();
1535       SI->swapProfMetadata();
1536       if (Scope->TrueBiasedSelects.count(SI)) {
1537         assert(!Scope->FalseBiasedSelects.contains(SI) &&
1538                "Must not be already in");
1539         Scope->FalseBiasedSelects.insert(SI);
1540       } else if (Scope->FalseBiasedSelects.count(SI)) {
1541         assert(!Scope->TrueBiasedSelects.contains(SI) &&
1542                "Must not be already in");
1543         Scope->TrueBiasedSelects.insert(SI);
1544       }
1545       continue;
1546     }
1547     llvm_unreachable("Must be a branch or a select");
1548   }
1549   ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1550   return true;
1551 }
1552 
1553 // A helper for transformScopes. Insert a trivial phi at the scope exit block
1554 // for a value that's defined in the scope but used outside it (meaning it's
1555 // alive at the exit block).
insertTrivialPHIs(CHRScope * Scope,BasicBlock * EntryBlock,BasicBlock * ExitBlock,DenseSet<PHINode * > & TrivialPHIs)1556 static void insertTrivialPHIs(CHRScope *Scope,
1557                               BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1558                               DenseSet<PHINode *> &TrivialPHIs) {
1559   SmallSetVector<BasicBlock *, 8> BlocksInScope;
1560   for (RegInfo &RI : Scope->RegInfos) {
1561     for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1562                                             // sub-Scopes.
1563       BlocksInScope.insert(BB);
1564     }
1565   }
1566   CHR_DEBUG({
1567     dbgs() << "Inserting redundant phis\n";
1568     for (BasicBlock *BB : BlocksInScope)
1569       dbgs() << "BlockInScope " << BB->getName() << "\n";
1570   });
1571   for (BasicBlock *BB : BlocksInScope) {
1572     for (Instruction &I : *BB) {
1573       SmallVector<Instruction *, 8> Users;
1574       for (User *U : I.users()) {
1575         if (auto *UI = dyn_cast<Instruction>(U)) {
1576           if (!BlocksInScope.contains(UI->getParent()) &&
1577               // Unless there's already a phi for I at the exit block.
1578               !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1579             CHR_DEBUG(dbgs() << "V " << I << "\n");
1580             CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1581             Users.push_back(UI);
1582           } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1583             // There's a loop backedge from a block that's dominated by this
1584             // scope to the entry block.
1585             CHR_DEBUG(dbgs() << "V " << I << "\n");
1586             CHR_DEBUG(dbgs()
1587                       << "Used at entry block (for a back edge) by a phi user "
1588                       << *UI << "\n");
1589             Users.push_back(UI);
1590           }
1591         }
1592       }
1593       if (Users.size() > 0) {
1594         // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1595         // ExitBlock. Replace I with the new phi in UI unless UI is another
1596         // phi at ExitBlock.
1597         PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "");
1598         PN->insertBefore(ExitBlock->begin());
1599         for (BasicBlock *Pred : predecessors(ExitBlock)) {
1600           PN->addIncoming(&I, Pred);
1601         }
1602         TrivialPHIs.insert(PN);
1603         CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1604         for (Instruction *UI : Users) {
1605           for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1606             if (UI->getOperand(J) == &I) {
1607               UI->setOperand(J, PN);
1608             }
1609           }
1610           CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1611         }
1612       }
1613     }
1614   }
1615 }
1616 
1617 // Assert that all the CHR regions of the scope have a biased branch or select.
1618 static void LLVM_ATTRIBUTE_UNUSED
assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope * Scope)1619 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
1620 #ifndef NDEBUG
1621   auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1622     if (Scope->TrueBiasedRegions.count(RI.R) ||
1623         Scope->FalseBiasedRegions.count(RI.R))
1624       return true;
1625     for (SelectInst *SI : RI.Selects)
1626       if (Scope->TrueBiasedSelects.count(SI) ||
1627           Scope->FalseBiasedSelects.count(SI))
1628         return true;
1629     return false;
1630   };
1631   for (RegInfo &RI : Scope->CHRRegions) {
1632     assert(HasBiasedBranchOrSelect(RI, Scope) &&
1633            "Must have biased branch or select");
1634   }
1635 #endif
1636 }
1637 
1638 // Assert that all the condition values of the biased branches and selects have
1639 // been hoisted to the pre-entry block or outside of the scope.
assertBranchOrSelectConditionHoisted(CHRScope * Scope,BasicBlock * PreEntryBlock)1640 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
1641     CHRScope *Scope, BasicBlock *PreEntryBlock) {
1642   CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1643   for (RegInfo &RI : Scope->CHRRegions) {
1644     Region *R = RI.R;
1645     bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1646     bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1647     if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1648       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1649       Value *V = BI->getCondition();
1650       CHR_DEBUG(dbgs() << *V << "\n");
1651       if (auto *I = dyn_cast<Instruction>(V)) {
1652         (void)(I); // Unused in release build.
1653         assert((I->getParent() == PreEntryBlock ||
1654                 !Scope->contains(I)) &&
1655                "Must have been hoisted to PreEntryBlock or outside the scope");
1656       }
1657     }
1658     for (SelectInst *SI : RI.Selects) {
1659       bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1660       bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1661       if (!(IsTrueBiased || IsFalseBiased))
1662         continue;
1663       Value *V = SI->getCondition();
1664       CHR_DEBUG(dbgs() << *V << "\n");
1665       if (auto *I = dyn_cast<Instruction>(V)) {
1666         (void)(I); // Unused in release build.
1667         assert((I->getParent() == PreEntryBlock ||
1668                 !Scope->contains(I)) &&
1669                "Must have been hoisted to PreEntryBlock or outside the scope");
1670       }
1671     }
1672   }
1673 }
1674 
transformScopes(CHRScope * Scope,DenseSet<PHINode * > & TrivialPHIs)1675 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1676   CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1677 
1678   assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1679 
1680   for (RegInfo &RI : Scope->RegInfos) {
1681     const Region *R = RI.R;
1682     unsigned Duplication = getRegionDuplicationCount(R);
1683     CHR_DEBUG(dbgs() << "Dup count for R=" << R << "  is " << Duplication
1684                      << "\n");
1685     if (Duplication >= CHRDupThreshsold) {
1686       CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication
1687                        << " for this region");
1688       ORE.emit([&]() {
1689         return OptimizationRemarkMissed(DEBUG_TYPE, "DupThresholdReached",
1690                                         R->getEntry()->getTerminator())
1691                << "Reached the duplication threshold for the region";
1692       });
1693       return;
1694     }
1695   }
1696   for (RegInfo &RI : Scope->RegInfos) {
1697     DuplicationCount[RI.R]++;
1698   }
1699 
1700   Region *FirstRegion = Scope->RegInfos[0].R;
1701   BasicBlock *EntryBlock = FirstRegion->getEntry();
1702   Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1703   BasicBlock *ExitBlock = LastRegion->getExit();
1704   std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1705 
1706   if (ExitBlock) {
1707     // Insert a trivial phi at the exit block (where the CHR hot path and the
1708     // cold path merges) for a value that's defined in the scope but used
1709     // outside it (meaning it's alive at the exit block). We will add the
1710     // incoming values for the CHR cold paths to it below. Without this, we'd
1711     // miss updating phi's for such values unless there happens to already be a
1712     // phi for that value there.
1713     insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1714   }
1715 
1716   // Split the entry block of the first region. The new block becomes the new
1717   // entry block of the first region. The old entry block becomes the block to
1718   // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1719   // through the split, we update the entry of the first region after the split,
1720   // and Region only points to the entry and the exit blocks, rather than
1721   // keeping everything in a list or set, the blocks membership and the
1722   // entry/exit blocks of the region are still valid after the split.
1723   CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1724             << " at " << *Scope->BranchInsertPoint << "\n");
1725   BasicBlock *NewEntryBlock =
1726       SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1727   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1728          "NewEntryBlock's only pred must be EntryBlock");
1729   FirstRegion->replaceEntryRecursive(NewEntryBlock);
1730   BasicBlock *PreEntryBlock = EntryBlock;
1731 
1732   ValueToValueMapTy VMap;
1733   // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1734   // hot path (originals) and a cold path (clones) and update the PHIs at the
1735   // exit block.
1736   cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1737 
1738   // Replace the old (placeholder) branch with the new (merged) conditional
1739   // branch.
1740   BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1741                                             NewEntryBlock, VMap);
1742 
1743 #ifndef NDEBUG
1744   assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1745 #endif
1746 
1747   // Hoist the conditional values of the branches/selects.
1748   hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1749 
1750 #ifndef NDEBUG
1751   assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1752 #endif
1753 
1754   // Create the combined branch condition and constant-fold the branches/selects
1755   // in the hot path.
1756   fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1757                           ProfileCount.value_or(0));
1758 }
1759 
1760 // A helper for transformScopes. Clone the blocks in the scope (excluding the
1761 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1762 // at the exit block.
cloneScopeBlocks(CHRScope * Scope,BasicBlock * PreEntryBlock,BasicBlock * ExitBlock,Region * LastRegion,ValueToValueMapTy & VMap)1763 void CHR::cloneScopeBlocks(CHRScope *Scope,
1764                            BasicBlock *PreEntryBlock,
1765                            BasicBlock *ExitBlock,
1766                            Region *LastRegion,
1767                            ValueToValueMapTy &VMap) {
1768   // Clone all the blocks. The original blocks will be the hot-path
1769   // CHR-optimized code and the cloned blocks will be the original unoptimized
1770   // code. This is so that the block pointers from the
1771   // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1772   // which CHR should apply to.
1773   SmallVector<BasicBlock*, 8> NewBlocks;
1774   for (RegInfo &RI : Scope->RegInfos)
1775     for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1776                                             // sub-Scopes.
1777       assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1778       BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1779       NewBlocks.push_back(NewBB);
1780       VMap[BB] = NewBB;
1781 
1782       // Unreachable predecessors will not be cloned and will not have an edge
1783       // to the cloned block. As such, also remove them from any phi nodes.
1784       for (PHINode &PN : make_early_inc_range(NewBB->phis()))
1785         PN.removeIncomingValueIf([&](unsigned Idx) {
1786           return !DT.isReachableFromEntry(PN.getIncomingBlock(Idx));
1787         });
1788     }
1789 
1790   // Place the cloned blocks right after the original blocks (right before the
1791   // exit block of.)
1792   if (ExitBlock)
1793     F.splice(ExitBlock->getIterator(), &F, NewBlocks[0]->getIterator(),
1794              F.end());
1795 
1796   // Update the cloned blocks/instructions to refer to themselves.
1797   for (BasicBlock *NewBB : NewBlocks)
1798     for (Instruction &I : *NewBB)
1799       RemapInstruction(&I, VMap,
1800                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1801 
1802   // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1803   // the top-level region but we don't need to add PHIs. The trivial PHIs
1804   // inserted above will be updated here.
1805   if (ExitBlock)
1806     for (PHINode &PN : ExitBlock->phis())
1807       for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1808            ++I) {
1809         BasicBlock *Pred = PN.getIncomingBlock(I);
1810         if (LastRegion->contains(Pred)) {
1811           Value *V = PN.getIncomingValue(I);
1812           auto It = VMap.find(V);
1813           if (It != VMap.end()) V = It->second;
1814           assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1815           PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1816         }
1817       }
1818 }
1819 
1820 // A helper for transformScope. Replace the old (placeholder) branch with the
1821 // new (merged) conditional branch.
createMergedBranch(BasicBlock * PreEntryBlock,BasicBlock * EntryBlock,BasicBlock * NewEntryBlock,ValueToValueMapTy & VMap)1822 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1823                                     BasicBlock *EntryBlock,
1824                                     BasicBlock *NewEntryBlock,
1825                                     ValueToValueMapTy &VMap) {
1826   BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1827   assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1828          "SplitBlock did not work correctly!");
1829   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1830          "NewEntryBlock's only pred must be EntryBlock");
1831   assert(VMap.find(NewEntryBlock) != VMap.end() &&
1832          "NewEntryBlock must have been copied");
1833   OldBR->dropAllReferences();
1834   OldBR->eraseFromParent();
1835   // The true predicate is a placeholder. It will be replaced later in
1836   // fixupBranchesAndSelects().
1837   BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1838                                          cast<BasicBlock>(VMap[NewEntryBlock]),
1839                                          ConstantInt::getTrue(F.getContext()));
1840   NewBR->insertInto(PreEntryBlock, PreEntryBlock->end());
1841   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1842          "NewEntryBlock's only pred must be EntryBlock");
1843   return NewBR;
1844 }
1845 
1846 // A helper for transformScopes. Create the combined branch condition and
1847 // constant-fold the branches/selects in the hot path.
fixupBranchesAndSelects(CHRScope * Scope,BasicBlock * PreEntryBlock,BranchInst * MergedBR,uint64_t ProfileCount)1848 void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1849                                   BasicBlock *PreEntryBlock,
1850                                   BranchInst *MergedBR,
1851                                   uint64_t ProfileCount) {
1852   Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1853   BranchProbability CHRBranchBias(1, 1);
1854   uint64_t NumCHRedBranches = 0;
1855   IRBuilder<> IRB(PreEntryBlock->getTerminator());
1856   for (RegInfo &RI : Scope->CHRRegions) {
1857     Region *R = RI.R;
1858     if (RI.HasBranch) {
1859       fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1860       ++NumCHRedBranches;
1861     }
1862     for (SelectInst *SI : RI.Selects) {
1863       fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1864       ++NumCHRedBranches;
1865     }
1866   }
1867   Stats.NumBranchesDelta += NumCHRedBranches - 1;
1868   Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1869   ORE.emit([&]() {
1870     return OptimizationRemark(DEBUG_TYPE,
1871                               "CHR",
1872                               // Refer to the hot (original) path
1873                               MergedBR->getSuccessor(0)->getTerminator())
1874         << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1875         << " branches or selects";
1876   });
1877   MergedBR->setCondition(MergedCondition);
1878   uint32_t Weights[] = {
1879       static_cast<uint32_t>(CHRBranchBias.scale(1000)),
1880       static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
1881   };
1882   setBranchWeights(*MergedBR, Weights, /*IsExpected=*/false);
1883   CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1884             << "\n");
1885 }
1886 
1887 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1888 // and constant-fold a branch in the hot path.
fixupBranch(Region * R,CHRScope * Scope,IRBuilder<> & IRB,Value * & MergedCondition,BranchProbability & CHRBranchBias)1889 void CHR::fixupBranch(Region *R, CHRScope *Scope,
1890                       IRBuilder<> &IRB,
1891                       Value *&MergedCondition,
1892                       BranchProbability &CHRBranchBias) {
1893   bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1894   assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1895          "Must be truthy or falsy");
1896   auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1897   assert(BranchBiasMap.contains(R) && "Must be in the bias map");
1898   BranchProbability Bias = BranchBiasMap[R];
1899   assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1900   // Take the min.
1901   if (CHRBranchBias > Bias)
1902     CHRBranchBias = Bias;
1903   BasicBlock *IfThen = BI->getSuccessor(1);
1904   BasicBlock *IfElse = BI->getSuccessor(0);
1905   BasicBlock *RegionExitBlock = R->getExit();
1906   assert(RegionExitBlock && "Null ExitBlock");
1907   assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1908          IfThen != IfElse && "Invariant from findScopes");
1909   if (IfThen == RegionExitBlock) {
1910     // Swap them so that IfThen means going into it and IfElse means skipping
1911     // it.
1912     std::swap(IfThen, IfElse);
1913   }
1914   CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1915             << " IfElse " << IfElse->getName() << "\n");
1916   Value *Cond = BI->getCondition();
1917   BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1918   bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1919   addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1920                        MergedCondition);
1921   // Constant-fold the branch at ClonedEntryBlock.
1922   assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1923          "The successor shouldn't change");
1924   Value *NewCondition = ConditionTrue ?
1925                         ConstantInt::getTrue(F.getContext()) :
1926                         ConstantInt::getFalse(F.getContext());
1927   BI->setCondition(NewCondition);
1928 }
1929 
1930 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1931 // and constant-fold a select in the hot path.
fixupSelect(SelectInst * SI,CHRScope * Scope,IRBuilder<> & IRB,Value * & MergedCondition,BranchProbability & CHRBranchBias)1932 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1933                       IRBuilder<> &IRB,
1934                       Value *&MergedCondition,
1935                       BranchProbability &CHRBranchBias) {
1936   bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1937   assert((IsTrueBiased ||
1938           Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1939   assert(SelectBiasMap.contains(SI) && "Must be in the bias map");
1940   BranchProbability Bias = SelectBiasMap[SI];
1941   assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1942   // Take the min.
1943   if (CHRBranchBias > Bias)
1944     CHRBranchBias = Bias;
1945   Value *Cond = SI->getCondition();
1946   addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1947                        MergedCondition);
1948   Value *NewCondition = IsTrueBiased ?
1949                         ConstantInt::getTrue(F.getContext()) :
1950                         ConstantInt::getFalse(F.getContext());
1951   SI->setCondition(NewCondition);
1952 }
1953 
1954 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1955 // condition.
addToMergedCondition(bool IsTrueBiased,Value * Cond,Instruction * BranchOrSelect,CHRScope * Scope,IRBuilder<> & IRB,Value * & MergedCondition)1956 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1957                                Instruction *BranchOrSelect, CHRScope *Scope,
1958                                IRBuilder<> &IRB, Value *&MergedCondition) {
1959   if (!IsTrueBiased) {
1960     // If Cond is an icmp and all users of V except for BranchOrSelect is a
1961     // branch, negate the icmp predicate and swap the branch targets and avoid
1962     // inserting an Xor to negate Cond.
1963     auto *ICmp = dyn_cast<ICmpInst>(Cond);
1964     if (!ICmp ||
1965         !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope))
1966       Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond);
1967   }
1968 
1969   // Freeze potentially poisonous conditions.
1970   if (!isGuaranteedNotToBeUndefOrPoison(Cond))
1971     Cond = IRB.CreateFreeze(Cond);
1972 
1973   // Use logical and to avoid propagating poison from later conditions.
1974   MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond);
1975 }
1976 
transformScopes(SmallVectorImpl<CHRScope * > & CHRScopes)1977 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1978   unsigned I = 0;
1979   DenseSet<PHINode *> TrivialPHIs;
1980   for (CHRScope *Scope : CHRScopes) {
1981     transformScopes(Scope, TrivialPHIs);
1982     CHR_DEBUG(
1983         std::ostringstream oss;
1984         oss << " after transformScopes " << I++;
1985         dumpIR(F, oss.str().c_str(), nullptr));
1986     (void)I;
1987   }
1988 }
1989 
1990 static void LLVM_ATTRIBUTE_UNUSED
dumpScopes(SmallVectorImpl<CHRScope * > & Scopes,const char * Label)1991 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1992   dbgs() << Label << " " << Scopes.size() << "\n";
1993   for (CHRScope *Scope : Scopes) {
1994     dbgs() << *Scope << "\n";
1995   }
1996 }
1997 
run()1998 bool CHR::run() {
1999   if (!shouldApply(F, PSI))
2000     return false;
2001 
2002   CHR_DEBUG(dumpIR(F, "before", nullptr));
2003 
2004   bool Changed = false;
2005   {
2006     CHR_DEBUG(
2007         dbgs() << "RegionInfo:\n";
2008         RI.print(dbgs()));
2009 
2010     // Recursively traverse the region tree and find regions that have biased
2011     // branches and/or selects and create scopes.
2012     SmallVector<CHRScope *, 8> AllScopes;
2013     findScopes(AllScopes);
2014     CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
2015 
2016     // Split the scopes if 1) the conditional values of the biased
2017     // branches/selects of the inner/lower scope can't be hoisted up to the
2018     // outermost/uppermost scope entry, or 2) the condition values of the biased
2019     // branches/selects in a scope (including subscopes) don't share at least
2020     // one common value.
2021     SmallVector<CHRScope *, 8> SplitScopes;
2022     splitScopes(AllScopes, SplitScopes);
2023     CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
2024 
2025     // After splitting, set the biased regions and selects of a scope (a tree
2026     // root) that include those of the subscopes.
2027     classifyBiasedScopes(SplitScopes);
2028     CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
2029 
2030     // Filter out the scopes that has only one biased region or select (CHR
2031     // isn't useful in such a case).
2032     SmallVector<CHRScope *, 8> FilteredScopes;
2033     filterScopes(SplitScopes, FilteredScopes);
2034     CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
2035 
2036     // Set the regions to be CHR'ed and their hoist stops for each scope.
2037     SmallVector<CHRScope *, 8> SetScopes;
2038     setCHRRegions(FilteredScopes, SetScopes);
2039     CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
2040 
2041     // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2042     // ones. We need to apply CHR from outer to inner so that we apply CHR only
2043     // to the hot path, rather than both hot and cold paths.
2044     SmallVector<CHRScope *, 8> SortedScopes;
2045     sortScopes(SetScopes, SortedScopes);
2046     CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
2047 
2048     CHR_DEBUG(
2049         dbgs() << "RegionInfo:\n";
2050         RI.print(dbgs()));
2051 
2052     // Apply the CHR transformation.
2053     if (!SortedScopes.empty()) {
2054       transformScopes(SortedScopes);
2055       Changed = true;
2056     }
2057   }
2058 
2059   if (Changed) {
2060     CHR_DEBUG(dumpIR(F, "after", &Stats));
2061     ORE.emit([&]() {
2062       return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
2063           << ore::NV("Function", &F) << " "
2064           << "Reduced the number of branches in hot paths by "
2065           << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2066           << " (static) and "
2067           << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2068           << " (weighted by PGO count)";
2069     });
2070   }
2071 
2072   return Changed;
2073 }
2074 
2075 namespace llvm {
2076 
ControlHeightReductionPass()2077 ControlHeightReductionPass::ControlHeightReductionPass() {
2078   parseCHRFilterFiles();
2079 }
2080 
run(Function & F,FunctionAnalysisManager & FAM)2081 PreservedAnalyses ControlHeightReductionPass::run(
2082     Function &F,
2083     FunctionAnalysisManager &FAM) {
2084   auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2085   auto PPSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2086   // If there is no profile summary, we should not do CHR.
2087   if (!PPSI || !PPSI->hasProfileSummary())
2088     return PreservedAnalyses::all();
2089   auto &PSI = *PPSI;
2090   auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2091   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2092   auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2093   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2094   bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2095   if (!Changed)
2096     return PreservedAnalyses::all();
2097   return PreservedAnalyses::none();
2098 }
2099 
2100 } // namespace llvm
2101