xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp (revision 77013d11e6483b970af25e13c9b892075742f7e5)
1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass merges conditional blocks of code and reduces the number of
10 // conditional branches in the hot paths based on profiles.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Analysis/BlockFrequencyInfo.h"
20 #include "llvm/Analysis/GlobalsModRef.h"
21 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22 #include "llvm/Analysis/ProfileSummaryInfo.h"
23 #include "llvm/Analysis/RegionInfo.h"
24 #include "llvm/Analysis/RegionIterator.h"
25 #include "llvm/Analysis/ValueTracking.h"
26 #include "llvm/IR/CFG.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Support/BranchProbability.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Transforms/Utils.h"
35 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
36 #include "llvm/Transforms/Utils/Cloning.h"
37 #include "llvm/Transforms/Utils/ValueMapper.h"
38 
39 #include <set>
40 #include <sstream>
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "chr"
45 
46 #define CHR_DEBUG(X) LLVM_DEBUG(X)
47 
48 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
49                               cl::desc("Apply CHR for all functions"));
50 
51 static cl::opt<double> CHRBiasThreshold(
52     "chr-bias-threshold", cl::init(0.99), cl::Hidden,
53     cl::desc("CHR considers a branch bias greater than this ratio as biased"));
54 
55 static cl::opt<unsigned> CHRMergeThreshold(
56     "chr-merge-threshold", cl::init(2), cl::Hidden,
57     cl::desc("CHR merges a group of N branches/selects where N >= this value"));
58 
59 static cl::opt<std::string> CHRModuleList(
60     "chr-module-list", cl::init(""), cl::Hidden,
61     cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
62 
63 static cl::opt<std::string> CHRFunctionList(
64     "chr-function-list", cl::init(""), cl::Hidden,
65     cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
66 
67 static StringSet<> CHRModules;
68 static StringSet<> CHRFunctions;
69 
70 static void parseCHRFilterFiles() {
71   if (!CHRModuleList.empty()) {
72     auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
73     if (!FileOrErr) {
74       errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
75       std::exit(1);
76     }
77     StringRef Buf = FileOrErr->get()->getBuffer();
78     SmallVector<StringRef, 0> Lines;
79     Buf.split(Lines, '\n');
80     for (StringRef Line : Lines) {
81       Line = Line.trim();
82       if (!Line.empty())
83         CHRModules.insert(Line);
84     }
85   }
86   if (!CHRFunctionList.empty()) {
87     auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
88     if (!FileOrErr) {
89       errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
90       std::exit(1);
91     }
92     StringRef Buf = FileOrErr->get()->getBuffer();
93     SmallVector<StringRef, 0> Lines;
94     Buf.split(Lines, '\n');
95     for (StringRef Line : Lines) {
96       Line = Line.trim();
97       if (!Line.empty())
98         CHRFunctions.insert(Line);
99     }
100   }
101 }
102 
103 namespace {
104 class ControlHeightReductionLegacyPass : public FunctionPass {
105 public:
106   static char ID;
107 
108   ControlHeightReductionLegacyPass() : FunctionPass(ID) {
109     initializeControlHeightReductionLegacyPassPass(
110         *PassRegistry::getPassRegistry());
111     parseCHRFilterFiles();
112   }
113 
114   bool runOnFunction(Function &F) override;
115   void getAnalysisUsage(AnalysisUsage &AU) const override {
116     AU.addRequired<BlockFrequencyInfoWrapperPass>();
117     AU.addRequired<DominatorTreeWrapperPass>();
118     AU.addRequired<ProfileSummaryInfoWrapperPass>();
119     AU.addRequired<RegionInfoPass>();
120     AU.addPreserved<GlobalsAAWrapperPass>();
121   }
122 };
123 } // end anonymous namespace
124 
125 char ControlHeightReductionLegacyPass::ID = 0;
126 
127 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
128                       "chr",
129                       "Reduce control height in the hot paths",
130                       false, false)
131 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
132 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
133 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
134 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
135 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
136                     "chr",
137                     "Reduce control height in the hot paths",
138                     false, false)
139 
140 FunctionPass *llvm::createControlHeightReductionLegacyPass() {
141   return new ControlHeightReductionLegacyPass();
142 }
143 
144 namespace {
145 
146 struct CHRStats {
147   CHRStats() : NumBranches(0), NumBranchesDelta(0),
148                WeightedNumBranchesDelta(0) {}
149   void print(raw_ostream &OS) const {
150     OS << "CHRStats: NumBranches " << NumBranches
151        << " NumBranchesDelta " << NumBranchesDelta
152        << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
153   }
154   uint64_t NumBranches;       // The original number of conditional branches /
155                               // selects
156   uint64_t NumBranchesDelta;  // The decrease of the number of conditional
157                               // branches / selects in the hot paths due to CHR.
158   uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
159                                      // count at the scope entry.
160 };
161 
162 // RegInfo - some properties of a Region.
163 struct RegInfo {
164   RegInfo() : R(nullptr), HasBranch(false) {}
165   RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
166   Region *R;
167   bool HasBranch;
168   SmallVector<SelectInst *, 8> Selects;
169 };
170 
171 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
172 
173 // CHRScope - a sequence of regions to CHR together. It corresponds to a
174 // sequence of conditional blocks. It can have subscopes which correspond to
175 // nested conditional blocks. Nested CHRScopes form a tree.
176 class CHRScope {
177  public:
178   CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
179     assert(RI.R && "Null RegionIn");
180     RegInfos.push_back(RI);
181   }
182 
183   Region *getParentRegion() {
184     assert(RegInfos.size() > 0 && "Empty CHRScope");
185     Region *Parent = RegInfos[0].R->getParent();
186     assert(Parent && "Unexpected to call this on the top-level region");
187     return Parent;
188   }
189 
190   BasicBlock *getEntryBlock() {
191     assert(RegInfos.size() > 0 && "Empty CHRScope");
192     return RegInfos.front().R->getEntry();
193   }
194 
195   BasicBlock *getExitBlock() {
196     assert(RegInfos.size() > 0 && "Empty CHRScope");
197     return RegInfos.back().R->getExit();
198   }
199 
200   bool appendable(CHRScope *Next) {
201     // The next scope is appendable only if this scope is directly connected to
202     // it (which implies it post-dominates this scope) and this scope dominates
203     // it (no edge to the next scope outside this scope).
204     BasicBlock *NextEntry = Next->getEntryBlock();
205     if (getExitBlock() != NextEntry)
206       // Not directly connected.
207       return false;
208     Region *LastRegion = RegInfos.back().R;
209     for (BasicBlock *Pred : predecessors(NextEntry))
210       if (!LastRegion->contains(Pred))
211         // There's an edge going into the entry of the next scope from outside
212         // of this scope.
213         return false;
214     return true;
215   }
216 
217   void append(CHRScope *Next) {
218     assert(RegInfos.size() > 0 && "Empty CHRScope");
219     assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
220     assert(getParentRegion() == Next->getParentRegion() &&
221            "Must be siblings");
222     assert(getExitBlock() == Next->getEntryBlock() &&
223            "Must be adjacent");
224     RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
225     Subs.append(Next->Subs.begin(), Next->Subs.end());
226   }
227 
228   void addSub(CHRScope *SubIn) {
229 #ifndef NDEBUG
230     bool IsChild = false;
231     for (RegInfo &RI : RegInfos)
232       if (RI.R == SubIn->getParentRegion()) {
233         IsChild = true;
234         break;
235       }
236     assert(IsChild && "Must be a child");
237 #endif
238     Subs.push_back(SubIn);
239   }
240 
241   // Split this scope at the boundary region into two, which will belong to the
242   // tail and returns the tail.
243   CHRScope *split(Region *Boundary) {
244     assert(Boundary && "Boundary null");
245     assert(RegInfos.begin()->R != Boundary &&
246            "Can't be split at beginning");
247     auto BoundaryIt = llvm::find_if(
248         RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
249     if (BoundaryIt == RegInfos.end())
250       return nullptr;
251     ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
252     DenseSet<Region *> TailRegionSet;
253     for (const RegInfo &RI : TailRegInfos)
254       TailRegionSet.insert(RI.R);
255 
256     auto TailIt =
257         std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
258           assert(Sub && "null Sub");
259           Region *Parent = Sub->getParentRegion();
260           if (TailRegionSet.count(Parent))
261             return false;
262 
263           assert(llvm::any_of(
264                      RegInfos,
265                      [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
266                  "Must be in head");
267           return true;
268         });
269     ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
270 
271     assert(HoistStopMap.empty() && "MapHoistStops must be empty");
272     auto *Scope = new CHRScope(TailRegInfos, TailSubs);
273     RegInfos.erase(BoundaryIt, RegInfos.end());
274     Subs.erase(TailIt, Subs.end());
275     return Scope;
276   }
277 
278   bool contains(Instruction *I) const {
279     BasicBlock *Parent = I->getParent();
280     for (const RegInfo &RI : RegInfos)
281       if (RI.R->contains(Parent))
282         return true;
283     return false;
284   }
285 
286   void print(raw_ostream &OS) const;
287 
288   SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
289   SmallVector<CHRScope *, 8> Subs;  // Subscopes.
290 
291   // The instruction at which to insert the CHR conditional branch (and hoist
292   // the dependent condition values).
293   Instruction *BranchInsertPoint;
294 
295   // True-biased and false-biased regions (conditional blocks),
296   // respectively. Used only for the outermost scope and includes regions in
297   // subscopes. The rest are unbiased.
298   DenseSet<Region *> TrueBiasedRegions;
299   DenseSet<Region *> FalseBiasedRegions;
300   // Among the biased regions, the regions that get CHRed.
301   SmallVector<RegInfo, 8> CHRRegions;
302 
303   // True-biased and false-biased selects, respectively. Used only for the
304   // outermost scope and includes ones in subscopes.
305   DenseSet<SelectInst *> TrueBiasedSelects;
306   DenseSet<SelectInst *> FalseBiasedSelects;
307 
308   // Map from one of the above regions to the instructions to stop
309   // hoisting instructions at through use-def chains.
310   HoistStopMapTy HoistStopMap;
311 
312  private:
313    CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
314        : RegInfos(RegInfosIn.begin(), RegInfosIn.end()),
315          Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {}
316 };
317 
318 class CHR {
319  public:
320   CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
321       ProfileSummaryInfo &PSIin, RegionInfo &RIin,
322       OptimizationRemarkEmitter &OREin)
323       : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
324 
325   ~CHR() {
326     for (CHRScope *Scope : Scopes) {
327       delete Scope;
328     }
329   }
330 
331   bool run();
332 
333  private:
334   // See the comments in CHR::run() for the high level flow of the algorithm and
335   // what the following functions do.
336 
337   void findScopes(SmallVectorImpl<CHRScope *> &Output) {
338     Region *R = RI.getTopLevelRegion();
339     if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
340       Output.push_back(Scope);
341     }
342   }
343   CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
344                         SmallVectorImpl<CHRScope *> &Scopes);
345   CHRScope *findScope(Region *R);
346   void checkScopeHoistable(CHRScope *Scope);
347 
348   void splitScopes(SmallVectorImpl<CHRScope *> &Input,
349                    SmallVectorImpl<CHRScope *> &Output);
350   SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
351                                         CHRScope *Outer,
352                                         DenseSet<Value *> *OuterConditionValues,
353                                         Instruction *OuterInsertPoint,
354                                         SmallVectorImpl<CHRScope *> &Output,
355                                         DenseSet<Instruction *> &Unhoistables);
356 
357   void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
358   void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
359 
360   void filterScopes(SmallVectorImpl<CHRScope *> &Input,
361                     SmallVectorImpl<CHRScope *> &Output);
362 
363   void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
364                      SmallVectorImpl<CHRScope *> &Output);
365   void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
366 
367   void sortScopes(SmallVectorImpl<CHRScope *> &Input,
368                   SmallVectorImpl<CHRScope *> &Output);
369 
370   void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
371   void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
372   void cloneScopeBlocks(CHRScope *Scope,
373                         BasicBlock *PreEntryBlock,
374                         BasicBlock *ExitBlock,
375                         Region *LastRegion,
376                         ValueToValueMapTy &VMap);
377   BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
378                                  BasicBlock *EntryBlock,
379                                  BasicBlock *NewEntryBlock,
380                                  ValueToValueMapTy &VMap);
381   void fixupBranchesAndSelects(CHRScope *Scope,
382                                BasicBlock *PreEntryBlock,
383                                BranchInst *MergedBR,
384                                uint64_t ProfileCount);
385   void fixupBranch(Region *R,
386                    CHRScope *Scope,
387                    IRBuilder<> &IRB,
388                    Value *&MergedCondition, BranchProbability &CHRBranchBias);
389   void fixupSelect(SelectInst* SI,
390                    CHRScope *Scope,
391                    IRBuilder<> &IRB,
392                    Value *&MergedCondition, BranchProbability &CHRBranchBias);
393   void addToMergedCondition(bool IsTrueBiased, Value *Cond,
394                             Instruction *BranchOrSelect,
395                             CHRScope *Scope,
396                             IRBuilder<> &IRB,
397                             Value *&MergedCondition);
398 
399   Function &F;
400   BlockFrequencyInfo &BFI;
401   DominatorTree &DT;
402   ProfileSummaryInfo &PSI;
403   RegionInfo &RI;
404   OptimizationRemarkEmitter &ORE;
405   CHRStats Stats;
406 
407   // All the true-biased regions in the function
408   DenseSet<Region *> TrueBiasedRegionsGlobal;
409   // All the false-biased regions in the function
410   DenseSet<Region *> FalseBiasedRegionsGlobal;
411   // All the true-biased selects in the function
412   DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
413   // All the false-biased selects in the function
414   DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
415   // A map from biased regions to their branch bias
416   DenseMap<Region *, BranchProbability> BranchBiasMap;
417   // A map from biased selects to their branch bias
418   DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
419   // All the scopes.
420   DenseSet<CHRScope *> Scopes;
421 };
422 
423 } // end anonymous namespace
424 
425 static inline
426 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
427                                               const CHRStats &Stats) {
428   Stats.print(OS);
429   return OS;
430 }
431 
432 static inline
433 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
434   Scope.print(OS);
435   return OS;
436 }
437 
438 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
439   if (ForceCHR)
440     return true;
441 
442   if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
443     if (CHRModules.count(F.getParent()->getName()))
444       return true;
445     return CHRFunctions.count(F.getName());
446   }
447 
448   assert(PSI.hasProfileSummary() && "Empty PSI?");
449   return PSI.isFunctionEntryHot(&F);
450 }
451 
452 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
453                                          CHRStats *Stats) {
454   StringRef FuncName = F.getName();
455   StringRef ModuleName = F.getParent()->getName();
456   (void)(FuncName); // Unused in release build.
457   (void)(ModuleName); // Unused in release build.
458   CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
459             << FuncName);
460   if (Stats)
461     CHR_DEBUG(dbgs() << " " << *Stats);
462   CHR_DEBUG(dbgs() << "\n");
463   CHR_DEBUG(F.dump());
464 }
465 
466 void CHRScope::print(raw_ostream &OS) const {
467   assert(RegInfos.size() > 0 && "Empty CHRScope");
468   OS << "CHRScope[";
469   OS << RegInfos.size() << ", Regions[";
470   for (const RegInfo &RI : RegInfos) {
471     OS << RI.R->getNameStr();
472     if (RI.HasBranch)
473       OS << " B";
474     if (RI.Selects.size() > 0)
475       OS << " S" << RI.Selects.size();
476     OS << ", ";
477   }
478   if (RegInfos[0].R->getParent()) {
479     OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
480   } else {
481     // top level region
482     OS << "]";
483   }
484   OS << ", Subs[";
485   for (CHRScope *Sub : Subs) {
486     OS << *Sub << ", ";
487   }
488   OS << "]]";
489 }
490 
491 // Return true if the given instruction type can be hoisted by CHR.
492 static bool isHoistableInstructionType(Instruction *I) {
493   return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
494       isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
495       isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
496       isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
497       isa<InsertValueInst>(I);
498 }
499 
500 // Return true if the given instruction can be hoisted by CHR.
501 static bool isHoistable(Instruction *I, DominatorTree &DT) {
502   if (!isHoistableInstructionType(I))
503     return false;
504   return isSafeToSpeculativelyExecute(I, nullptr, &DT);
505 }
506 
507 // Recursively traverse the use-def chains of the given value and return a set
508 // of the unhoistable base values defined within the scope (excluding the
509 // first-region entry block) or the (hoistable or unhoistable) base values that
510 // are defined outside (including the first-region entry block) of the
511 // scope. The returned set doesn't include constants.
512 static const std::set<Value *> &
513 getBaseValues(Value *V, DominatorTree &DT,
514               DenseMap<Value *, std::set<Value *>> &Visited) {
515   auto It = Visited.find(V);
516   if (It != Visited.end()) {
517     return It->second;
518   }
519   std::set<Value *> Result;
520   if (auto *I = dyn_cast<Instruction>(V)) {
521     // We don't stop at a block that's not in the Scope because we would miss
522     // some instructions that are based on the same base values if we stop
523     // there.
524     if (!isHoistable(I, DT)) {
525       Result.insert(I);
526       return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
527     }
528     // I is hoistable above the Scope.
529     for (Value *Op : I->operands()) {
530       const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
531       Result.insert(OpResult.begin(), OpResult.end());
532     }
533     return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
534   }
535   if (isa<Argument>(V)) {
536     Result.insert(V);
537   }
538   // We don't include others like constants because those won't lead to any
539   // chance of folding of conditions (eg two bit checks merged into one check)
540   // after CHR.
541   return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
542 }
543 
544 // Return true if V is already hoisted or can be hoisted (along with its
545 // operands) above the insert point. When it returns true and HoistStops is
546 // non-null, the instructions to stop hoisting at through the use-def chains are
547 // inserted into HoistStops.
548 static bool
549 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
550                 DenseSet<Instruction *> &Unhoistables,
551                 DenseSet<Instruction *> *HoistStops,
552                 DenseMap<Instruction *, bool> &Visited) {
553   assert(InsertPoint && "Null InsertPoint");
554   if (auto *I = dyn_cast<Instruction>(V)) {
555     auto It = Visited.find(I);
556     if (It != Visited.end()) {
557       return It->second;
558     }
559     assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
560     assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
561     if (Unhoistables.count(I)) {
562       // Don't hoist if they are not to be hoisted.
563       Visited[I] = false;
564       return false;
565     }
566     if (DT.dominates(I, InsertPoint)) {
567       // We are already above the insert point. Stop here.
568       if (HoistStops)
569         HoistStops->insert(I);
570       Visited[I] = true;
571       return true;
572     }
573     // We aren't not above the insert point, check if we can hoist it above the
574     // insert point.
575     if (isHoistable(I, DT)) {
576       // Check operands first.
577       DenseSet<Instruction *> OpsHoistStops;
578       bool AllOpsHoisted = true;
579       for (Value *Op : I->operands()) {
580         if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
581                              Visited)) {
582           AllOpsHoisted = false;
583           break;
584         }
585       }
586       if (AllOpsHoisted) {
587         CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
588         if (HoistStops)
589           HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
590         Visited[I] = true;
591         return true;
592       }
593     }
594     Visited[I] = false;
595     return false;
596   }
597   // Non-instructions are considered hoistable.
598   return true;
599 }
600 
601 // Returns true and sets the true probability and false probability of an
602 // MD_prof metadata if it's well-formed.
603 static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb,
604                         BranchProbability &FalseProb) {
605   if (!MD) return false;
606   MDString *MDName = cast<MDString>(MD->getOperand(0));
607   if (MDName->getString() != "branch_weights" ||
608       MD->getNumOperands() != 3)
609     return false;
610   ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
611   ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
612   if (!TrueWeight || !FalseWeight)
613     return false;
614   uint64_t TrueWt = TrueWeight->getValue().getZExtValue();
615   uint64_t FalseWt = FalseWeight->getValue().getZExtValue();
616   uint64_t SumWt = TrueWt + FalseWt;
617 
618   assert(SumWt >= TrueWt && SumWt >= FalseWt &&
619          "Overflow calculating branch probabilities.");
620 
621   // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
622   if (SumWt == 0)
623     return false;
624 
625   TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
626   FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
627   return true;
628 }
629 
630 static BranchProbability getCHRBiasThreshold() {
631   return BranchProbability::getBranchProbability(
632       static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
633 }
634 
635 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
636 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
637 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
638 // false.
639 template <typename K, typename S, typename M>
640 static bool checkBias(K *Key, BranchProbability TrueProb,
641                       BranchProbability FalseProb, S &TrueSet, S &FalseSet,
642                       M &BiasMap) {
643   BranchProbability Threshold = getCHRBiasThreshold();
644   if (TrueProb >= Threshold) {
645     TrueSet.insert(Key);
646     BiasMap[Key] = TrueProb;
647     return true;
648   } else if (FalseProb >= Threshold) {
649     FalseSet.insert(Key);
650     BiasMap[Key] = FalseProb;
651     return true;
652   }
653   return false;
654 }
655 
656 // Returns true and insert a region into the right biased set and the map if the
657 // branch of the region is biased.
658 static bool checkBiasedBranch(BranchInst *BI, Region *R,
659                               DenseSet<Region *> &TrueBiasedRegionsGlobal,
660                               DenseSet<Region *> &FalseBiasedRegionsGlobal,
661                               DenseMap<Region *, BranchProbability> &BranchBiasMap) {
662   if (!BI->isConditional())
663     return false;
664   BranchProbability ThenProb, ElseProb;
665   if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof),
666                    ThenProb, ElseProb))
667     return false;
668   BasicBlock *IfThen = BI->getSuccessor(0);
669   BasicBlock *IfElse = BI->getSuccessor(1);
670   assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
671          IfThen != IfElse &&
672          "Invariant from findScopes");
673   if (IfThen == R->getExit()) {
674     // Swap them so that IfThen/ThenProb means going into the conditional code
675     // and IfElse/ElseProb means skipping it.
676     std::swap(IfThen, IfElse);
677     std::swap(ThenProb, ElseProb);
678   }
679   CHR_DEBUG(dbgs() << "BI " << *BI << " ");
680   CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
681   CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
682   return checkBias(R, ThenProb, ElseProb,
683                    TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
684                    BranchBiasMap);
685 }
686 
687 // Returns true and insert a select into the right biased set and the map if the
688 // select is biased.
689 static bool checkBiasedSelect(
690     SelectInst *SI, Region *R,
691     DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
692     DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
693     DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
694   BranchProbability TrueProb, FalseProb;
695   if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof),
696                    TrueProb, FalseProb))
697     return false;
698   CHR_DEBUG(dbgs() << "SI " << *SI << " ");
699   CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
700   CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
701   return checkBias(SI, TrueProb, FalseProb,
702                    TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
703                    SelectBiasMap);
704 }
705 
706 // Returns the instruction at which to hoist the dependent condition values and
707 // insert the CHR branch for a region. This is the terminator branch in the
708 // entry block or the first select in the entry block, if any.
709 static Instruction* getBranchInsertPoint(RegInfo &RI) {
710   Region *R = RI.R;
711   BasicBlock *EntryBB = R->getEntry();
712   // The hoist point is by default the terminator of the entry block, which is
713   // the same as the branch instruction if RI.HasBranch is true.
714   Instruction *HoistPoint = EntryBB->getTerminator();
715   for (SelectInst *SI : RI.Selects) {
716     if (SI->getParent() == EntryBB) {
717       // Pick the first select in Selects in the entry block.  Note Selects is
718       // sorted in the instruction order within a block (asserted below).
719       HoistPoint = SI;
720       break;
721     }
722   }
723   assert(HoistPoint && "Null HoistPoint");
724 #ifndef NDEBUG
725   // Check that HoistPoint is the first one in Selects in the entry block,
726   // if any.
727   DenseSet<Instruction *> EntryBlockSelectSet;
728   for (SelectInst *SI : RI.Selects) {
729     if (SI->getParent() == EntryBB) {
730       EntryBlockSelectSet.insert(SI);
731     }
732   }
733   for (Instruction &I : *EntryBB) {
734     if (EntryBlockSelectSet.contains(&I)) {
735       assert(&I == HoistPoint &&
736              "HoistPoint must be the first one in Selects");
737       break;
738     }
739   }
740 #endif
741   return HoistPoint;
742 }
743 
744 // Find a CHR scope in the given region.
745 CHRScope * CHR::findScope(Region *R) {
746   CHRScope *Result = nullptr;
747   BasicBlock *Entry = R->getEntry();
748   BasicBlock *Exit = R->getExit();  // null if top level.
749   assert(Entry && "Entry must not be null");
750   assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
751          "Only top level region has a null exit");
752   if (Entry)
753     CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
754   else
755     CHR_DEBUG(dbgs() << "Entry null\n");
756   if (Exit)
757     CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
758   else
759     CHR_DEBUG(dbgs() << "Exit null\n");
760   // Exclude cases where Entry is part of a subregion (hence it doesn't belong
761   // to this region).
762   bool EntryInSubregion = RI.getRegionFor(Entry) != R;
763   if (EntryInSubregion)
764     return nullptr;
765   // Exclude loops
766   for (BasicBlock *Pred : predecessors(Entry))
767     if (R->contains(Pred))
768       return nullptr;
769   if (Exit) {
770     // Try to find an if-then block (check if R is an if-then).
771     // if (cond) {
772     //  ...
773     // }
774     auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
775     if (BI)
776       CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
777     else
778       CHR_DEBUG(dbgs() << "BI null\n");
779     if (BI && BI->isConditional()) {
780       BasicBlock *S0 = BI->getSuccessor(0);
781       BasicBlock *S1 = BI->getSuccessor(1);
782       CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
783       CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
784       if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
785         RegInfo RI(R);
786         RI.HasBranch = checkBiasedBranch(
787             BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
788             BranchBiasMap);
789         Result = new CHRScope(RI);
790         Scopes.insert(Result);
791         CHR_DEBUG(dbgs() << "Found a region with a branch\n");
792         ++Stats.NumBranches;
793         if (!RI.HasBranch) {
794           ORE.emit([&]() {
795             return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
796                 << "Branch not biased";
797           });
798         }
799       }
800     }
801   }
802   {
803     // Try to look for selects in the direct child blocks (as opposed to in
804     // subregions) of R.
805     // ...
806     // if (..) { // Some subregion
807     //   ...
808     // }
809     // if (..) { // Some subregion
810     //   ...
811     // }
812     // ...
813     // a = cond ? b : c;
814     // ...
815     SmallVector<SelectInst *, 8> Selects;
816     for (RegionNode *E : R->elements()) {
817       if (E->isSubRegion())
818         continue;
819       // This returns the basic block of E if E is a direct child of R (not a
820       // subregion.)
821       BasicBlock *BB = E->getEntry();
822       // Need to push in the order to make it easier to find the first Select
823       // later.
824       for (Instruction &I : *BB) {
825         if (auto *SI = dyn_cast<SelectInst>(&I)) {
826           Selects.push_back(SI);
827           ++Stats.NumBranches;
828         }
829       }
830     }
831     if (Selects.size() > 0) {
832       auto AddSelects = [&](RegInfo &RI) {
833         for (auto *SI : Selects)
834           if (checkBiasedSelect(SI, RI.R,
835                                 TrueBiasedSelectsGlobal,
836                                 FalseBiasedSelectsGlobal,
837                                 SelectBiasMap))
838             RI.Selects.push_back(SI);
839           else
840             ORE.emit([&]() {
841               return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
842                   << "Select not biased";
843             });
844       };
845       if (!Result) {
846         CHR_DEBUG(dbgs() << "Found a select-only region\n");
847         RegInfo RI(R);
848         AddSelects(RI);
849         Result = new CHRScope(RI);
850         Scopes.insert(Result);
851       } else {
852         CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
853         AddSelects(Result->RegInfos[0]);
854       }
855     }
856   }
857 
858   if (Result) {
859     checkScopeHoistable(Result);
860   }
861   return Result;
862 }
863 
864 // Check that any of the branch and the selects in the region could be
865 // hoisted above the the CHR branch insert point (the most dominating of
866 // them, either the branch (at the end of the first block) or the first
867 // select in the first block). If the branch can't be hoisted, drop the
868 // selects in the first blocks.
869 //
870 // For example, for the following scope/region with selects, we want to insert
871 // the merged branch right before the first select in the first/entry block by
872 // hoisting c1, c2, c3, and c4.
873 //
874 // // Branch insert point here.
875 // a = c1 ? b : c; // Select 1
876 // d = c2 ? e : f; // Select 2
877 // if (c3) { // Branch
878 //   ...
879 //   c4 = foo() // A call.
880 //   g = c4 ? h : i; // Select 3
881 // }
882 //
883 // But suppose we can't hoist c4 because it's dependent on the preceding
884 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
885 // Select 2. If we can't hoist c3, we drop Selects 1 & 2.
886 void CHR::checkScopeHoistable(CHRScope *Scope) {
887   RegInfo &RI = Scope->RegInfos[0];
888   Region *R = RI.R;
889   BasicBlock *EntryBB = R->getEntry();
890   auto *Branch = RI.HasBranch ?
891                  cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
892   SmallVector<SelectInst *, 8> &Selects = RI.Selects;
893   if (RI.HasBranch || !Selects.empty()) {
894     Instruction *InsertPoint = getBranchInsertPoint(RI);
895     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
896     // Avoid a data dependence from a select or a branch to a(nother)
897     // select. Note no instruction can't data-depend on a branch (a branch
898     // instruction doesn't produce a value).
899     DenseSet<Instruction *> Unhoistables;
900     // Initialize Unhoistables with the selects.
901     for (SelectInst *SI : Selects) {
902       Unhoistables.insert(SI);
903     }
904     // Remove Selects that can't be hoisted.
905     for (auto it = Selects.begin(); it != Selects.end(); ) {
906       SelectInst *SI = *it;
907       if (SI == InsertPoint) {
908         ++it;
909         continue;
910       }
911       DenseMap<Instruction *, bool> Visited;
912       bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
913                                          DT, Unhoistables, nullptr, Visited);
914       if (!IsHoistable) {
915         CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
916         ORE.emit([&]() {
917           return OptimizationRemarkMissed(DEBUG_TYPE,
918                                           "DropUnhoistableSelect", SI)
919               << "Dropped unhoistable select";
920         });
921         it = Selects.erase(it);
922         // Since we are dropping the select here, we also drop it from
923         // Unhoistables.
924         Unhoistables.erase(SI);
925       } else
926         ++it;
927     }
928     // Update InsertPoint after potentially removing selects.
929     InsertPoint = getBranchInsertPoint(RI);
930     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
931     if (RI.HasBranch && InsertPoint != Branch) {
932       DenseMap<Instruction *, bool> Visited;
933       bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
934                                          DT, Unhoistables, nullptr, Visited);
935       if (!IsHoistable) {
936         // If the branch isn't hoistable, drop the selects in the entry
937         // block, preferring the branch, which makes the branch the hoist
938         // point.
939         assert(InsertPoint != Branch && "Branch must not be the hoist point");
940         CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
941         CHR_DEBUG(
942             for (SelectInst *SI : Selects) {
943               dbgs() << "SI " << *SI << "\n";
944             });
945         for (SelectInst *SI : Selects) {
946           ORE.emit([&]() {
947             return OptimizationRemarkMissed(DEBUG_TYPE,
948                                             "DropSelectUnhoistableBranch", SI)
949                 << "Dropped select due to unhoistable branch";
950           });
951         }
952         llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
953           return SI->getParent() == EntryBB;
954         });
955         Unhoistables.clear();
956         InsertPoint = Branch;
957       }
958     }
959     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
960 #ifndef NDEBUG
961     if (RI.HasBranch) {
962       assert(!DT.dominates(Branch, InsertPoint) &&
963              "Branch can't be already above the hoist point");
964       DenseMap<Instruction *, bool> Visited;
965       assert(checkHoistValue(Branch->getCondition(), InsertPoint,
966                              DT, Unhoistables, nullptr, Visited) &&
967              "checkHoistValue for branch");
968     }
969     for (auto *SI : Selects) {
970       assert(!DT.dominates(SI, InsertPoint) &&
971              "SI can't be already above the hoist point");
972       DenseMap<Instruction *, bool> Visited;
973       assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
974                              Unhoistables, nullptr, Visited) &&
975              "checkHoistValue for selects");
976     }
977     CHR_DEBUG(dbgs() << "Result\n");
978     if (RI.HasBranch) {
979       CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
980     }
981     for (auto *SI : Selects) {
982       CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
983     }
984 #endif
985   }
986 }
987 
988 // Traverse the region tree, find all nested scopes and merge them if possible.
989 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
990                            SmallVectorImpl<CHRScope *> &Scopes) {
991   CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
992   CHRScope *Result = findScope(R);
993   // Visit subscopes.
994   CHRScope *ConsecutiveSubscope = nullptr;
995   SmallVector<CHRScope *, 8> Subscopes;
996   for (auto It = R->begin(); It != R->end(); ++It) {
997     const std::unique_ptr<Region> &SubR = *It;
998     auto NextIt = std::next(It);
999     Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
1000     CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
1001               << "\n");
1002     CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
1003     if (SubCHRScope) {
1004       CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
1005     } else {
1006       CHR_DEBUG(dbgs() << "Subregion Scope null\n");
1007     }
1008     if (SubCHRScope) {
1009       if (!ConsecutiveSubscope)
1010         ConsecutiveSubscope = SubCHRScope;
1011       else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
1012         Subscopes.push_back(ConsecutiveSubscope);
1013         ConsecutiveSubscope = SubCHRScope;
1014       } else
1015         ConsecutiveSubscope->append(SubCHRScope);
1016     } else {
1017       if (ConsecutiveSubscope) {
1018         Subscopes.push_back(ConsecutiveSubscope);
1019       }
1020       ConsecutiveSubscope = nullptr;
1021     }
1022   }
1023   if (ConsecutiveSubscope) {
1024     Subscopes.push_back(ConsecutiveSubscope);
1025   }
1026   for (CHRScope *Sub : Subscopes) {
1027     if (Result) {
1028       // Combine it with the parent.
1029       Result->addSub(Sub);
1030     } else {
1031       // Push Subscopes as they won't be combined with the parent.
1032       Scopes.push_back(Sub);
1033     }
1034   }
1035   return Result;
1036 }
1037 
1038 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
1039   DenseSet<Value *> ConditionValues;
1040   if (RI.HasBranch) {
1041     auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1042     ConditionValues.insert(BI->getCondition());
1043   }
1044   for (SelectInst *SI : RI.Selects) {
1045     ConditionValues.insert(SI->getCondition());
1046   }
1047   return ConditionValues;
1048 }
1049 
1050 
1051 // Determine whether to split a scope depending on the sets of the branch
1052 // condition values of the previous region and the current region. We split
1053 // (return true) it if 1) the condition values of the inner/lower scope can't be
1054 // hoisted up to the outer/upper scope, or 2) the two sets of the condition
1055 // values have an empty intersection (because the combined branch conditions
1056 // won't probably lead to a simpler combined condition).
1057 static bool shouldSplit(Instruction *InsertPoint,
1058                         DenseSet<Value *> &PrevConditionValues,
1059                         DenseSet<Value *> &ConditionValues,
1060                         DominatorTree &DT,
1061                         DenseSet<Instruction *> &Unhoistables) {
1062   assert(InsertPoint && "Null InsertPoint");
1063   CHR_DEBUG(
1064       dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1065       for (Value *V : PrevConditionValues) {
1066         dbgs() << *V << ", ";
1067       }
1068       dbgs() << " ConditionValues ";
1069       for (Value *V : ConditionValues) {
1070         dbgs() << *V << ", ";
1071       }
1072       dbgs() << "\n");
1073   // If any of Bases isn't hoistable to the hoist point, split.
1074   for (Value *V : ConditionValues) {
1075     DenseMap<Instruction *, bool> Visited;
1076     if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1077       CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1078       return true; // Not hoistable, split.
1079     }
1080   }
1081   // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1082   // unnecessary splits at scopes with no branch/selects.  If
1083   // PrevConditionValues and ConditionValues don't intersect at all, split.
1084   if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1085     // Use std::set as DenseSet doesn't work with set_intersection.
1086     std::set<Value *> PrevBases, Bases;
1087     DenseMap<Value *, std::set<Value *>> Visited;
1088     for (Value *V : PrevConditionValues) {
1089       const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1090       PrevBases.insert(BaseValues.begin(), BaseValues.end());
1091     }
1092     for (Value *V : ConditionValues) {
1093       const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1094       Bases.insert(BaseValues.begin(), BaseValues.end());
1095     }
1096     CHR_DEBUG(
1097         dbgs() << "PrevBases ";
1098         for (Value *V : PrevBases) {
1099           dbgs() << *V << ", ";
1100         }
1101         dbgs() << " Bases ";
1102         for (Value *V : Bases) {
1103           dbgs() << *V << ", ";
1104         }
1105         dbgs() << "\n");
1106     std::vector<Value *> Intersection;
1107     std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
1108                           Bases.end(), std::back_inserter(Intersection));
1109     if (Intersection.empty()) {
1110       // Empty intersection, split.
1111       CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1112       return true;
1113     }
1114   }
1115   CHR_DEBUG(dbgs() << "No split\n");
1116   return false;  // Don't split.
1117 }
1118 
1119 static void getSelectsInScope(CHRScope *Scope,
1120                               DenseSet<Instruction *> &Output) {
1121   for (RegInfo &RI : Scope->RegInfos)
1122     for (SelectInst *SI : RI.Selects)
1123       Output.insert(SI);
1124   for (CHRScope *Sub : Scope->Subs)
1125     getSelectsInScope(Sub, Output);
1126 }
1127 
1128 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1129                       SmallVectorImpl<CHRScope *> &Output) {
1130   for (CHRScope *Scope : Input) {
1131     assert(!Scope->BranchInsertPoint &&
1132            "BranchInsertPoint must not be set");
1133     DenseSet<Instruction *> Unhoistables;
1134     getSelectsInScope(Scope, Unhoistables);
1135     splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1136   }
1137 #ifndef NDEBUG
1138   for (CHRScope *Scope : Output) {
1139     assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1140   }
1141 #endif
1142 }
1143 
1144 SmallVector<CHRScope *, 8> CHR::splitScope(
1145     CHRScope *Scope,
1146     CHRScope *Outer,
1147     DenseSet<Value *> *OuterConditionValues,
1148     Instruction *OuterInsertPoint,
1149     SmallVectorImpl<CHRScope *> &Output,
1150     DenseSet<Instruction *> &Unhoistables) {
1151   if (Outer) {
1152     assert(OuterConditionValues && "Null OuterConditionValues");
1153     assert(OuterInsertPoint && "Null OuterInsertPoint");
1154   }
1155   bool PrevSplitFromOuter = true;
1156   DenseSet<Value *> PrevConditionValues;
1157   Instruction *PrevInsertPoint = nullptr;
1158   SmallVector<CHRScope *, 8> Splits;
1159   SmallVector<bool, 8> SplitsSplitFromOuter;
1160   SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1161   SmallVector<Instruction *, 8> SplitsInsertPoints;
1162   SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos);  // Copy
1163   for (RegInfo &RI : RegInfos) {
1164     Instruction *InsertPoint = getBranchInsertPoint(RI);
1165     DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1166     CHR_DEBUG(
1167         dbgs() << "ConditionValues ";
1168         for (Value *V : ConditionValues) {
1169           dbgs() << *V << ", ";
1170         }
1171         dbgs() << "\n");
1172     if (RI.R == RegInfos[0].R) {
1173       // First iteration. Check to see if we should split from the outer.
1174       if (Outer) {
1175         CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1176         CHR_DEBUG(dbgs() << "Should split from outer at "
1177                   << RI.R->getNameStr() << "\n");
1178         if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1179                         ConditionValues, DT, Unhoistables)) {
1180           PrevConditionValues = ConditionValues;
1181           PrevInsertPoint = InsertPoint;
1182           ORE.emit([&]() {
1183             return OptimizationRemarkMissed(DEBUG_TYPE,
1184                                             "SplitScopeFromOuter",
1185                                             RI.R->getEntry()->getTerminator())
1186                 << "Split scope from outer due to unhoistable branch/select "
1187                 << "and/or lack of common condition values";
1188           });
1189         } else {
1190           // Not splitting from the outer. Use the outer bases and insert
1191           // point. Union the bases.
1192           PrevSplitFromOuter = false;
1193           PrevConditionValues = *OuterConditionValues;
1194           PrevConditionValues.insert(ConditionValues.begin(),
1195                                      ConditionValues.end());
1196           PrevInsertPoint = OuterInsertPoint;
1197         }
1198       } else {
1199         CHR_DEBUG(dbgs() << "Outer null\n");
1200         PrevConditionValues = ConditionValues;
1201         PrevInsertPoint = InsertPoint;
1202       }
1203     } else {
1204       CHR_DEBUG(dbgs() << "Should split from prev at "
1205                 << RI.R->getNameStr() << "\n");
1206       if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1207                       DT, Unhoistables)) {
1208         CHRScope *Tail = Scope->split(RI.R);
1209         Scopes.insert(Tail);
1210         Splits.push_back(Scope);
1211         SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1212         SplitsConditionValues.push_back(PrevConditionValues);
1213         SplitsInsertPoints.push_back(PrevInsertPoint);
1214         Scope = Tail;
1215         PrevConditionValues = ConditionValues;
1216         PrevInsertPoint = InsertPoint;
1217         PrevSplitFromOuter = true;
1218         ORE.emit([&]() {
1219           return OptimizationRemarkMissed(DEBUG_TYPE,
1220                                           "SplitScopeFromPrev",
1221                                           RI.R->getEntry()->getTerminator())
1222               << "Split scope from previous due to unhoistable branch/select "
1223               << "and/or lack of common condition values";
1224         });
1225       } else {
1226         // Not splitting. Union the bases. Keep the hoist point.
1227         PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1228       }
1229     }
1230   }
1231   Splits.push_back(Scope);
1232   SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1233   SplitsConditionValues.push_back(PrevConditionValues);
1234   assert(PrevInsertPoint && "Null PrevInsertPoint");
1235   SplitsInsertPoints.push_back(PrevInsertPoint);
1236   assert(Splits.size() == SplitsConditionValues.size() &&
1237          Splits.size() == SplitsSplitFromOuter.size() &&
1238          Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1239   for (size_t I = 0; I < Splits.size(); ++I) {
1240     CHRScope *Split = Splits[I];
1241     DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1242     Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1243     SmallVector<CHRScope *, 8> NewSubs;
1244     DenseSet<Instruction *> SplitUnhoistables;
1245     getSelectsInScope(Split, SplitUnhoistables);
1246     for (CHRScope *Sub : Split->Subs) {
1247       SmallVector<CHRScope *, 8> SubSplits = splitScope(
1248           Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1249           SplitUnhoistables);
1250       llvm::append_range(NewSubs, SubSplits);
1251     }
1252     Split->Subs = NewSubs;
1253   }
1254   SmallVector<CHRScope *, 8> Result;
1255   for (size_t I = 0; I < Splits.size(); ++I) {
1256     CHRScope *Split = Splits[I];
1257     if (SplitsSplitFromOuter[I]) {
1258       // Split from the outer.
1259       Output.push_back(Split);
1260       Split->BranchInsertPoint = SplitsInsertPoints[I];
1261       CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1262                 << "\n");
1263     } else {
1264       // Connected to the outer.
1265       Result.push_back(Split);
1266     }
1267   }
1268   if (!Outer)
1269     assert(Result.empty() &&
1270            "If no outer (top-level), must return no nested ones");
1271   return Result;
1272 }
1273 
1274 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1275   for (CHRScope *Scope : Scopes) {
1276     assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1277     classifyBiasedScopes(Scope, Scope);
1278     CHR_DEBUG(
1279         dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1280         dbgs() << "TrueBiasedRegions ";
1281         for (Region *R : Scope->TrueBiasedRegions) {
1282           dbgs() << R->getNameStr() << ", ";
1283         }
1284         dbgs() << "\n";
1285         dbgs() << "FalseBiasedRegions ";
1286         for (Region *R : Scope->FalseBiasedRegions) {
1287           dbgs() << R->getNameStr() << ", ";
1288         }
1289         dbgs() << "\n";
1290         dbgs() << "TrueBiasedSelects ";
1291         for (SelectInst *SI : Scope->TrueBiasedSelects) {
1292           dbgs() << *SI << ", ";
1293         }
1294         dbgs() << "\n";
1295         dbgs() << "FalseBiasedSelects ";
1296         for (SelectInst *SI : Scope->FalseBiasedSelects) {
1297           dbgs() << *SI << ", ";
1298         }
1299         dbgs() << "\n";);
1300   }
1301 }
1302 
1303 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1304   for (RegInfo &RI : Scope->RegInfos) {
1305     if (RI.HasBranch) {
1306       Region *R = RI.R;
1307       if (TrueBiasedRegionsGlobal.contains(R))
1308         OutermostScope->TrueBiasedRegions.insert(R);
1309       else if (FalseBiasedRegionsGlobal.contains(R))
1310         OutermostScope->FalseBiasedRegions.insert(R);
1311       else
1312         llvm_unreachable("Must be biased");
1313     }
1314     for (SelectInst *SI : RI.Selects) {
1315       if (TrueBiasedSelectsGlobal.contains(SI))
1316         OutermostScope->TrueBiasedSelects.insert(SI);
1317       else if (FalseBiasedSelectsGlobal.contains(SI))
1318         OutermostScope->FalseBiasedSelects.insert(SI);
1319       else
1320         llvm_unreachable("Must be biased");
1321     }
1322   }
1323   for (CHRScope *Sub : Scope->Subs) {
1324     classifyBiasedScopes(Sub, OutermostScope);
1325   }
1326 }
1327 
1328 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1329   unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1330                        Scope->FalseBiasedRegions.size() +
1331                        Scope->TrueBiasedSelects.size() +
1332                        Scope->FalseBiasedSelects.size();
1333   return NumBiased >= CHRMergeThreshold;
1334 }
1335 
1336 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1337                        SmallVectorImpl<CHRScope *> &Output) {
1338   for (CHRScope *Scope : Input) {
1339     // Filter out the ones with only one region and no subs.
1340     if (!hasAtLeastTwoBiasedBranches(Scope)) {
1341       CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1342                 << Scope->TrueBiasedRegions.size()
1343                 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1344                 << " true-selects " << Scope->TrueBiasedSelects.size()
1345                 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1346       ORE.emit([&]() {
1347         return OptimizationRemarkMissed(
1348             DEBUG_TYPE,
1349             "DropScopeWithOneBranchOrSelect",
1350             Scope->RegInfos[0].R->getEntry()->getTerminator())
1351             << "Drop scope with < "
1352             << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1353             << " biased branch(es) or select(s)";
1354       });
1355       continue;
1356     }
1357     Output.push_back(Scope);
1358   }
1359 }
1360 
1361 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1362                         SmallVectorImpl<CHRScope *> &Output) {
1363   for (CHRScope *Scope : Input) {
1364     assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1365            "Empty");
1366     setCHRRegions(Scope, Scope);
1367     Output.push_back(Scope);
1368     CHR_DEBUG(
1369         dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1370         for (auto pair : Scope->HoistStopMap) {
1371           Region *R = pair.first;
1372           dbgs() << "Region " << R->getNameStr() << "\n";
1373           for (Instruction *I : pair.second) {
1374             dbgs() << "HoistStop " << *I << "\n";
1375           }
1376         }
1377         dbgs() << "CHRRegions" << "\n";
1378         for (RegInfo &RI : Scope->CHRRegions) {
1379           dbgs() << RI.R->getNameStr() << "\n";
1380         });
1381   }
1382 }
1383 
1384 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1385   DenseSet<Instruction *> Unhoistables;
1386   // Put the biased selects in Unhoistables because they should stay where they
1387   // are and constant-folded after CHR (in case one biased select or a branch
1388   // can depend on another biased select.)
1389   for (RegInfo &RI : Scope->RegInfos) {
1390     for (SelectInst *SI : RI.Selects) {
1391       Unhoistables.insert(SI);
1392     }
1393   }
1394   Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1395   for (RegInfo &RI : Scope->RegInfos) {
1396     Region *R = RI.R;
1397     DenseSet<Instruction *> HoistStops;
1398     bool IsHoisted = false;
1399     if (RI.HasBranch) {
1400       assert((OutermostScope->TrueBiasedRegions.contains(R) ||
1401               OutermostScope->FalseBiasedRegions.contains(R)) &&
1402              "Must be truthy or falsy");
1403       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1404       // Note checkHoistValue fills in HoistStops.
1405       DenseMap<Instruction *, bool> Visited;
1406       bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1407                                          Unhoistables, &HoistStops, Visited);
1408       assert(IsHoistable && "Must be hoistable");
1409       (void)(IsHoistable);  // Unused in release build
1410       IsHoisted = true;
1411     }
1412     for (SelectInst *SI : RI.Selects) {
1413       assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
1414               OutermostScope->FalseBiasedSelects.contains(SI)) &&
1415              "Must be true or false biased");
1416       // Note checkHoistValue fills in HoistStops.
1417       DenseMap<Instruction *, bool> Visited;
1418       bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1419                                          Unhoistables, &HoistStops, Visited);
1420       assert(IsHoistable && "Must be hoistable");
1421       (void)(IsHoistable);  // Unused in release build
1422       IsHoisted = true;
1423     }
1424     if (IsHoisted) {
1425       OutermostScope->CHRRegions.push_back(RI);
1426       OutermostScope->HoistStopMap[R] = HoistStops;
1427     }
1428   }
1429   for (CHRScope *Sub : Scope->Subs)
1430     setCHRRegions(Sub, OutermostScope);
1431 }
1432 
1433 static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1434   return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1435 }
1436 
1437 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1438                      SmallVectorImpl<CHRScope *> &Output) {
1439   Output.resize(Input.size());
1440   llvm::copy(Input, Output.begin());
1441   llvm::stable_sort(Output, CHRScopeSorter);
1442 }
1443 
1444 // Return true if V is already hoisted or was hoisted (along with its operands)
1445 // to the insert point.
1446 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1447                        HoistStopMapTy &HoistStopMap,
1448                        DenseSet<Instruction *> &HoistedSet,
1449                        DenseSet<PHINode *> &TrivialPHIs,
1450                        DominatorTree &DT) {
1451   auto IT = HoistStopMap.find(R);
1452   assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1453   DenseSet<Instruction *> &HoistStops = IT->second;
1454   if (auto *I = dyn_cast<Instruction>(V)) {
1455     if (I == HoistPoint)
1456       return;
1457     if (HoistStops.count(I))
1458       return;
1459     if (auto *PN = dyn_cast<PHINode>(I))
1460       if (TrivialPHIs.count(PN))
1461         // The trivial phi inserted by the previous CHR scope could replace a
1462         // non-phi in HoistStops. Note that since this phi is at the exit of a
1463         // previous CHR scope, which dominates this scope, it's safe to stop
1464         // hoisting there.
1465         return;
1466     if (HoistedSet.count(I))
1467       // Already hoisted, return.
1468       return;
1469     assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1470     assert(DT.getNode(I->getParent()) && "DT must contain I's block");
1471     assert(DT.getNode(HoistPoint->getParent()) &&
1472            "DT must contain HoistPoint block");
1473     if (DT.dominates(I, HoistPoint))
1474       // We are already above the hoist point. Stop here. This may be necessary
1475       // when multiple scopes would independently hoist the same
1476       // instruction. Since an outer (dominating) scope would hoist it to its
1477       // entry before an inner (dominated) scope would to its entry, the inner
1478       // scope may see the instruction already hoisted, in which case it
1479       // potentially wrong for the inner scope to hoist it and could cause bad
1480       // IR (non-dominating def), but safe to skip hoisting it instead because
1481       // it's already in a block that dominates the inner scope.
1482       return;
1483     for (Value *Op : I->operands()) {
1484       hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1485     }
1486     I->moveBefore(HoistPoint);
1487     HoistedSet.insert(I);
1488     CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1489   }
1490 }
1491 
1492 // Hoist the dependent condition values of the branches and the selects in the
1493 // scope to the insert point.
1494 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1495                                  DenseSet<PHINode *> &TrivialPHIs,
1496                                  DominatorTree &DT) {
1497   DenseSet<Instruction *> HoistedSet;
1498   for (const RegInfo &RI : Scope->CHRRegions) {
1499     Region *R = RI.R;
1500     bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1501     bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1502     if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1503       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1504       hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1505                  HoistedSet, TrivialPHIs, DT);
1506     }
1507     for (SelectInst *SI : RI.Selects) {
1508       bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1509       bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1510       if (!(IsTrueBiased || IsFalseBiased))
1511         continue;
1512       hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1513                  HoistedSet, TrivialPHIs, DT);
1514     }
1515   }
1516 }
1517 
1518 // Negate the predicate if an ICmp if it's used only by branches or selects by
1519 // swapping the operands of the branches or the selects. Returns true if success.
1520 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1521                                                  Instruction *ExcludedUser,
1522                                                  CHRScope *Scope) {
1523   for (User *U : ICmp->users()) {
1524     if (U == ExcludedUser)
1525       continue;
1526     if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1527       continue;
1528     if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1529       continue;
1530     return false;
1531   }
1532   for (User *U : ICmp->users()) {
1533     if (U == ExcludedUser)
1534       continue;
1535     if (auto *BI = dyn_cast<BranchInst>(U)) {
1536       assert(BI->isConditional() && "Must be conditional");
1537       BI->swapSuccessors();
1538       // Don't need to swap this in terms of
1539       // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1540       // mean whehter the branch is likely go into the if-then rather than
1541       // successor0/successor1 and because we can tell which edge is the then or
1542       // the else one by comparing the destination to the region exit block.
1543       continue;
1544     }
1545     if (auto *SI = dyn_cast<SelectInst>(U)) {
1546       // Swap operands
1547       SI->swapValues();
1548       SI->swapProfMetadata();
1549       if (Scope->TrueBiasedSelects.count(SI)) {
1550         assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
1551                "Must not be already in");
1552         Scope->FalseBiasedSelects.insert(SI);
1553       } else if (Scope->FalseBiasedSelects.count(SI)) {
1554         assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
1555                "Must not be already in");
1556         Scope->TrueBiasedSelects.insert(SI);
1557       }
1558       continue;
1559     }
1560     llvm_unreachable("Must be a branch or a select");
1561   }
1562   ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1563   return true;
1564 }
1565 
1566 // A helper for transformScopes. Insert a trivial phi at the scope exit block
1567 // for a value that's defined in the scope but used outside it (meaning it's
1568 // alive at the exit block).
1569 static void insertTrivialPHIs(CHRScope *Scope,
1570                               BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1571                               DenseSet<PHINode *> &TrivialPHIs) {
1572   SmallSetVector<BasicBlock *, 8> BlocksInScope;
1573   for (RegInfo &RI : Scope->RegInfos) {
1574     for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1575                                             // sub-Scopes.
1576       BlocksInScope.insert(BB);
1577     }
1578   }
1579   CHR_DEBUG({
1580     dbgs() << "Inserting redundant phis\n";
1581     for (BasicBlock *BB : BlocksInScope)
1582       dbgs() << "BlockInScope " << BB->getName() << "\n";
1583   });
1584   for (BasicBlock *BB : BlocksInScope) {
1585     for (Instruction &I : *BB) {
1586       SmallVector<Instruction *, 8> Users;
1587       for (User *U : I.users()) {
1588         if (auto *UI = dyn_cast<Instruction>(U)) {
1589           if (BlocksInScope.count(UI->getParent()) == 0 &&
1590               // Unless there's already a phi for I at the exit block.
1591               !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1592             CHR_DEBUG(dbgs() << "V " << I << "\n");
1593             CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1594             Users.push_back(UI);
1595           } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1596             // There's a loop backedge from a block that's dominated by this
1597             // scope to the entry block.
1598             CHR_DEBUG(dbgs() << "V " << I << "\n");
1599             CHR_DEBUG(dbgs()
1600                       << "Used at entry block (for a back edge) by a phi user "
1601                       << *UI << "\n");
1602             Users.push_back(UI);
1603           }
1604         }
1605       }
1606       if (Users.size() > 0) {
1607         // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1608         // ExitBlock. Replace I with the new phi in UI unless UI is another
1609         // phi at ExitBlock.
1610         PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "",
1611                                       &ExitBlock->front());
1612         for (BasicBlock *Pred : predecessors(ExitBlock)) {
1613           PN->addIncoming(&I, Pred);
1614         }
1615         TrivialPHIs.insert(PN);
1616         CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1617         for (Instruction *UI : Users) {
1618           for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1619             if (UI->getOperand(J) == &I) {
1620               UI->setOperand(J, PN);
1621             }
1622           }
1623           CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1624         }
1625       }
1626     }
1627   }
1628 }
1629 
1630 // Assert that all the CHR regions of the scope have a biased branch or select.
1631 static void LLVM_ATTRIBUTE_UNUSED
1632 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
1633 #ifndef NDEBUG
1634   auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1635     if (Scope->TrueBiasedRegions.count(RI.R) ||
1636         Scope->FalseBiasedRegions.count(RI.R))
1637       return true;
1638     for (SelectInst *SI : RI.Selects)
1639       if (Scope->TrueBiasedSelects.count(SI) ||
1640           Scope->FalseBiasedSelects.count(SI))
1641         return true;
1642     return false;
1643   };
1644   for (RegInfo &RI : Scope->CHRRegions) {
1645     assert(HasBiasedBranchOrSelect(RI, Scope) &&
1646            "Must have biased branch or select");
1647   }
1648 #endif
1649 }
1650 
1651 // Assert that all the condition values of the biased branches and selects have
1652 // been hoisted to the pre-entry block or outside of the scope.
1653 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
1654     CHRScope *Scope, BasicBlock *PreEntryBlock) {
1655   CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1656   for (RegInfo &RI : Scope->CHRRegions) {
1657     Region *R = RI.R;
1658     bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1659     bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1660     if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1661       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1662       Value *V = BI->getCondition();
1663       CHR_DEBUG(dbgs() << *V << "\n");
1664       if (auto *I = dyn_cast<Instruction>(V)) {
1665         (void)(I); // Unused in release build.
1666         assert((I->getParent() == PreEntryBlock ||
1667                 !Scope->contains(I)) &&
1668                "Must have been hoisted to PreEntryBlock or outside the scope");
1669       }
1670     }
1671     for (SelectInst *SI : RI.Selects) {
1672       bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1673       bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1674       if (!(IsTrueBiased || IsFalseBiased))
1675         continue;
1676       Value *V = SI->getCondition();
1677       CHR_DEBUG(dbgs() << *V << "\n");
1678       if (auto *I = dyn_cast<Instruction>(V)) {
1679         (void)(I); // Unused in release build.
1680         assert((I->getParent() == PreEntryBlock ||
1681                 !Scope->contains(I)) &&
1682                "Must have been hoisted to PreEntryBlock or outside the scope");
1683       }
1684     }
1685   }
1686 }
1687 
1688 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1689   CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1690 
1691   assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1692   Region *FirstRegion = Scope->RegInfos[0].R;
1693   BasicBlock *EntryBlock = FirstRegion->getEntry();
1694   Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1695   BasicBlock *ExitBlock = LastRegion->getExit();
1696   Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1697 
1698   if (ExitBlock) {
1699     // Insert a trivial phi at the exit block (where the CHR hot path and the
1700     // cold path merges) for a value that's defined in the scope but used
1701     // outside it (meaning it's alive at the exit block). We will add the
1702     // incoming values for the CHR cold paths to it below. Without this, we'd
1703     // miss updating phi's for such values unless there happens to already be a
1704     // phi for that value there.
1705     insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1706   }
1707 
1708   // Split the entry block of the first region. The new block becomes the new
1709   // entry block of the first region. The old entry block becomes the block to
1710   // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1711   // through the split, we update the entry of the first region after the split,
1712   // and Region only points to the entry and the exit blocks, rather than
1713   // keeping everything in a list or set, the blocks membership and the
1714   // entry/exit blocks of the region are still valid after the split.
1715   CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1716             << " at " << *Scope->BranchInsertPoint << "\n");
1717   BasicBlock *NewEntryBlock =
1718       SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1719   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1720          "NewEntryBlock's only pred must be EntryBlock");
1721   FirstRegion->replaceEntryRecursive(NewEntryBlock);
1722   BasicBlock *PreEntryBlock = EntryBlock;
1723 
1724   ValueToValueMapTy VMap;
1725   // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1726   // hot path (originals) and a cold path (clones) and update the PHIs at the
1727   // exit block.
1728   cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1729 
1730   // Replace the old (placeholder) branch with the new (merged) conditional
1731   // branch.
1732   BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1733                                             NewEntryBlock, VMap);
1734 
1735 #ifndef NDEBUG
1736   assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1737 #endif
1738 
1739   // Hoist the conditional values of the branches/selects.
1740   hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1741 
1742 #ifndef NDEBUG
1743   assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1744 #endif
1745 
1746   // Create the combined branch condition and constant-fold the branches/selects
1747   // in the hot path.
1748   fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1749                           ProfileCount ? ProfileCount.getValue() : 0);
1750 }
1751 
1752 // A helper for transformScopes. Clone the blocks in the scope (excluding the
1753 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1754 // at the exit block.
1755 void CHR::cloneScopeBlocks(CHRScope *Scope,
1756                            BasicBlock *PreEntryBlock,
1757                            BasicBlock *ExitBlock,
1758                            Region *LastRegion,
1759                            ValueToValueMapTy &VMap) {
1760   // Clone all the blocks. The original blocks will be the hot-path
1761   // CHR-optimized code and the cloned blocks will be the original unoptimized
1762   // code. This is so that the block pointers from the
1763   // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1764   // which CHR should apply to.
1765   SmallVector<BasicBlock*, 8> NewBlocks;
1766   for (RegInfo &RI : Scope->RegInfos)
1767     for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1768                                             // sub-Scopes.
1769       assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1770       BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1771       NewBlocks.push_back(NewBB);
1772       VMap[BB] = NewBB;
1773     }
1774 
1775   // Place the cloned blocks right after the original blocks (right before the
1776   // exit block of.)
1777   if (ExitBlock)
1778     F.getBasicBlockList().splice(ExitBlock->getIterator(),
1779                                  F.getBasicBlockList(),
1780                                  NewBlocks[0]->getIterator(), F.end());
1781 
1782   // Update the cloned blocks/instructions to refer to themselves.
1783   for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
1784     for (Instruction &I : *NewBlocks[i])
1785       RemapInstruction(&I, VMap,
1786                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1787 
1788   // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1789   // the top-level region but we don't need to add PHIs. The trivial PHIs
1790   // inserted above will be updated here.
1791   if (ExitBlock)
1792     for (PHINode &PN : ExitBlock->phis())
1793       for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1794            ++I) {
1795         BasicBlock *Pred = PN.getIncomingBlock(I);
1796         if (LastRegion->contains(Pred)) {
1797           Value *V = PN.getIncomingValue(I);
1798           auto It = VMap.find(V);
1799           if (It != VMap.end()) V = It->second;
1800           assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1801           PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1802         }
1803       }
1804 }
1805 
1806 // A helper for transformScope. Replace the old (placeholder) branch with the
1807 // new (merged) conditional branch.
1808 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1809                                     BasicBlock *EntryBlock,
1810                                     BasicBlock *NewEntryBlock,
1811                                     ValueToValueMapTy &VMap) {
1812   BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1813   assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1814          "SplitBlock did not work correctly!");
1815   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1816          "NewEntryBlock's only pred must be EntryBlock");
1817   assert(VMap.find(NewEntryBlock) != VMap.end() &&
1818          "NewEntryBlock must have been copied");
1819   OldBR->dropAllReferences();
1820   OldBR->eraseFromParent();
1821   // The true predicate is a placeholder. It will be replaced later in
1822   // fixupBranchesAndSelects().
1823   BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1824                                          cast<BasicBlock>(VMap[NewEntryBlock]),
1825                                          ConstantInt::getTrue(F.getContext()));
1826   PreEntryBlock->getInstList().push_back(NewBR);
1827   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1828          "NewEntryBlock's only pred must be EntryBlock");
1829   return NewBR;
1830 }
1831 
1832 // A helper for transformScopes. Create the combined branch condition and
1833 // constant-fold the branches/selects in the hot path.
1834 void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1835                                   BasicBlock *PreEntryBlock,
1836                                   BranchInst *MergedBR,
1837                                   uint64_t ProfileCount) {
1838   Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1839   BranchProbability CHRBranchBias(1, 1);
1840   uint64_t NumCHRedBranches = 0;
1841   IRBuilder<> IRB(PreEntryBlock->getTerminator());
1842   for (RegInfo &RI : Scope->CHRRegions) {
1843     Region *R = RI.R;
1844     if (RI.HasBranch) {
1845       fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1846       ++NumCHRedBranches;
1847     }
1848     for (SelectInst *SI : RI.Selects) {
1849       fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1850       ++NumCHRedBranches;
1851     }
1852   }
1853   Stats.NumBranchesDelta += NumCHRedBranches - 1;
1854   Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1855   ORE.emit([&]() {
1856     return OptimizationRemark(DEBUG_TYPE,
1857                               "CHR",
1858                               // Refer to the hot (original) path
1859                               MergedBR->getSuccessor(0)->getTerminator())
1860         << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1861         << " branches or selects";
1862   });
1863   MergedBR->setCondition(MergedCondition);
1864   uint32_t Weights[] = {
1865       static_cast<uint32_t>(CHRBranchBias.scale(1000)),
1866       static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
1867   };
1868   MDBuilder MDB(F.getContext());
1869   MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1870   CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1871             << "\n");
1872 }
1873 
1874 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1875 // and constant-fold a branch in the hot path.
1876 void CHR::fixupBranch(Region *R, CHRScope *Scope,
1877                       IRBuilder<> &IRB,
1878                       Value *&MergedCondition,
1879                       BranchProbability &CHRBranchBias) {
1880   bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1881   assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1882          "Must be truthy or falsy");
1883   auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1884   assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
1885          "Must be in the bias map");
1886   BranchProbability Bias = BranchBiasMap[R];
1887   assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1888   // Take the min.
1889   if (CHRBranchBias > Bias)
1890     CHRBranchBias = Bias;
1891   BasicBlock *IfThen = BI->getSuccessor(1);
1892   BasicBlock *IfElse = BI->getSuccessor(0);
1893   BasicBlock *RegionExitBlock = R->getExit();
1894   assert(RegionExitBlock && "Null ExitBlock");
1895   assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1896          IfThen != IfElse && "Invariant from findScopes");
1897   if (IfThen == RegionExitBlock) {
1898     // Swap them so that IfThen means going into it and IfElse means skipping
1899     // it.
1900     std::swap(IfThen, IfElse);
1901   }
1902   CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1903             << " IfElse " << IfElse->getName() << "\n");
1904   Value *Cond = BI->getCondition();
1905   BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1906   bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1907   addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1908                        MergedCondition);
1909   // Constant-fold the branch at ClonedEntryBlock.
1910   assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1911          "The successor shouldn't change");
1912   Value *NewCondition = ConditionTrue ?
1913                         ConstantInt::getTrue(F.getContext()) :
1914                         ConstantInt::getFalse(F.getContext());
1915   BI->setCondition(NewCondition);
1916 }
1917 
1918 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1919 // and constant-fold a select in the hot path.
1920 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1921                       IRBuilder<> &IRB,
1922                       Value *&MergedCondition,
1923                       BranchProbability &CHRBranchBias) {
1924   bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1925   assert((IsTrueBiased ||
1926           Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1927   assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
1928          "Must be in the bias map");
1929   BranchProbability Bias = SelectBiasMap[SI];
1930   assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1931   // Take the min.
1932   if (CHRBranchBias > Bias)
1933     CHRBranchBias = Bias;
1934   Value *Cond = SI->getCondition();
1935   addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1936                        MergedCondition);
1937   Value *NewCondition = IsTrueBiased ?
1938                         ConstantInt::getTrue(F.getContext()) :
1939                         ConstantInt::getFalse(F.getContext());
1940   SI->setCondition(NewCondition);
1941 }
1942 
1943 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1944 // condition.
1945 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1946                                Instruction *BranchOrSelect,
1947                                CHRScope *Scope,
1948                                IRBuilder<> &IRB,
1949                                Value *&MergedCondition) {
1950   if (IsTrueBiased) {
1951     MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1952   } else {
1953     // If Cond is an icmp and all users of V except for BranchOrSelect is a
1954     // branch, negate the icmp predicate and swap the branch targets and avoid
1955     // inserting an Xor to negate Cond.
1956     bool Done = false;
1957     if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
1958       if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
1959         MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1960         Done = true;
1961       }
1962     if (!Done) {
1963       Value *Negate = IRB.CreateXor(
1964           ConstantInt::getTrue(F.getContext()), Cond);
1965       MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
1966     }
1967   }
1968 }
1969 
1970 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1971   unsigned I = 0;
1972   DenseSet<PHINode *> TrivialPHIs;
1973   for (CHRScope *Scope : CHRScopes) {
1974     transformScopes(Scope, TrivialPHIs);
1975     CHR_DEBUG(
1976         std::ostringstream oss;
1977         oss << " after transformScopes " << I++;
1978         dumpIR(F, oss.str().c_str(), nullptr));
1979     (void)I;
1980   }
1981 }
1982 
1983 static void LLVM_ATTRIBUTE_UNUSED
1984 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1985   dbgs() << Label << " " << Scopes.size() << "\n";
1986   for (CHRScope *Scope : Scopes) {
1987     dbgs() << *Scope << "\n";
1988   }
1989 }
1990 
1991 bool CHR::run() {
1992   if (!shouldApply(F, PSI))
1993     return false;
1994 
1995   CHR_DEBUG(dumpIR(F, "before", nullptr));
1996 
1997   bool Changed = false;
1998   {
1999     CHR_DEBUG(
2000         dbgs() << "RegionInfo:\n";
2001         RI.print(dbgs()));
2002 
2003     // Recursively traverse the region tree and find regions that have biased
2004     // branches and/or selects and create scopes.
2005     SmallVector<CHRScope *, 8> AllScopes;
2006     findScopes(AllScopes);
2007     CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
2008 
2009     // Split the scopes if 1) the conditiona values of the biased
2010     // branches/selects of the inner/lower scope can't be hoisted up to the
2011     // outermost/uppermost scope entry, or 2) the condition values of the biased
2012     // branches/selects in a scope (including subscopes) don't share at least
2013     // one common value.
2014     SmallVector<CHRScope *, 8> SplitScopes;
2015     splitScopes(AllScopes, SplitScopes);
2016     CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
2017 
2018     // After splitting, set the biased regions and selects of a scope (a tree
2019     // root) that include those of the subscopes.
2020     classifyBiasedScopes(SplitScopes);
2021     CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
2022 
2023     // Filter out the scopes that has only one biased region or select (CHR
2024     // isn't useful in such a case).
2025     SmallVector<CHRScope *, 8> FilteredScopes;
2026     filterScopes(SplitScopes, FilteredScopes);
2027     CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
2028 
2029     // Set the regions to be CHR'ed and their hoist stops for each scope.
2030     SmallVector<CHRScope *, 8> SetScopes;
2031     setCHRRegions(FilteredScopes, SetScopes);
2032     CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
2033 
2034     // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2035     // ones. We need to apply CHR from outer to inner so that we apply CHR only
2036     // to the hot path, rather than both hot and cold paths.
2037     SmallVector<CHRScope *, 8> SortedScopes;
2038     sortScopes(SetScopes, SortedScopes);
2039     CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
2040 
2041     CHR_DEBUG(
2042         dbgs() << "RegionInfo:\n";
2043         RI.print(dbgs()));
2044 
2045     // Apply the CHR transformation.
2046     if (!SortedScopes.empty()) {
2047       transformScopes(SortedScopes);
2048       Changed = true;
2049     }
2050   }
2051 
2052   if (Changed) {
2053     CHR_DEBUG(dumpIR(F, "after", &Stats));
2054     ORE.emit([&]() {
2055       return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
2056           << ore::NV("Function", &F) << " "
2057           << "Reduced the number of branches in hot paths by "
2058           << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2059           << " (static) and "
2060           << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2061           << " (weighted by PGO count)";
2062     });
2063   }
2064 
2065   return Changed;
2066 }
2067 
2068 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
2069   BlockFrequencyInfo &BFI =
2070       getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
2071   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2072   ProfileSummaryInfo &PSI =
2073       getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2074   RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
2075   std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
2076       std::make_unique<OptimizationRemarkEmitter>(&F);
2077   return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run();
2078 }
2079 
2080 namespace llvm {
2081 
2082 ControlHeightReductionPass::ControlHeightReductionPass() {
2083   parseCHRFilterFiles();
2084 }
2085 
2086 PreservedAnalyses ControlHeightReductionPass::run(
2087     Function &F,
2088     FunctionAnalysisManager &FAM) {
2089   auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2090   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2091   auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2092   auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2093   auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2094   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2095   bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2096   if (!Changed)
2097     return PreservedAnalyses::all();
2098   auto PA = PreservedAnalyses();
2099   PA.preserve<GlobalsAA>();
2100   return PA;
2101 }
2102 
2103 } // namespace llvm
2104