1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass merges conditional blocks of code and reduces the number of 10 // conditional branches in the hot paths based on profiles. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/GlobalsModRef.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/IntrinsicInst.h" 30 #include "llvm/IR/MDBuilder.h" 31 #include "llvm/IR/PassManager.h" 32 #include "llvm/IR/ProfDataUtils.h" 33 #include "llvm/Support/BranchProbability.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/MemoryBuffer.h" 36 #include "llvm/Transforms/Utils.h" 37 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 38 #include "llvm/Transforms/Utils/Cloning.h" 39 #include "llvm/Transforms/Utils/ValueMapper.h" 40 41 #include <optional> 42 #include <set> 43 #include <sstream> 44 45 using namespace llvm; 46 47 #define DEBUG_TYPE "chr" 48 49 #define CHR_DEBUG(X) LLVM_DEBUG(X) 50 51 static cl::opt<bool> DisableCHR("disable-chr", cl::init(false), cl::Hidden, 52 cl::desc("Disable CHR for all functions")); 53 54 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 55 cl::desc("Apply CHR for all functions")); 56 57 static cl::opt<double> CHRBiasThreshold( 58 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 59 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 60 61 static cl::opt<unsigned> CHRMergeThreshold( 62 "chr-merge-threshold", cl::init(2), cl::Hidden, 63 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 64 65 static cl::opt<std::string> CHRModuleList( 66 "chr-module-list", cl::init(""), cl::Hidden, 67 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 68 69 static cl::opt<std::string> CHRFunctionList( 70 "chr-function-list", cl::init(""), cl::Hidden, 71 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 72 73 static cl::opt<unsigned> CHRDupThreshsold( 74 "chr-dup-threshold", cl::init(3), cl::Hidden, 75 cl::desc("Max number of duplications by CHR for a region")); 76 77 static StringSet<> CHRModules; 78 static StringSet<> CHRFunctions; 79 80 static void parseCHRFilterFiles() { 81 if (!CHRModuleList.empty()) { 82 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 83 if (!FileOrErr) { 84 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 85 std::exit(1); 86 } 87 StringRef Buf = FileOrErr->get()->getBuffer(); 88 SmallVector<StringRef, 0> Lines; 89 Buf.split(Lines, '\n'); 90 for (StringRef Line : Lines) { 91 Line = Line.trim(); 92 if (!Line.empty()) 93 CHRModules.insert(Line); 94 } 95 } 96 if (!CHRFunctionList.empty()) { 97 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 98 if (!FileOrErr) { 99 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 100 std::exit(1); 101 } 102 StringRef Buf = FileOrErr->get()->getBuffer(); 103 SmallVector<StringRef, 0> Lines; 104 Buf.split(Lines, '\n'); 105 for (StringRef Line : Lines) { 106 Line = Line.trim(); 107 if (!Line.empty()) 108 CHRFunctions.insert(Line); 109 } 110 } 111 } 112 113 namespace { 114 115 struct CHRStats { 116 CHRStats() = default; 117 void print(raw_ostream &OS) const { 118 OS << "CHRStats: NumBranches " << NumBranches 119 << " NumBranchesDelta " << NumBranchesDelta 120 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 121 } 122 // The original number of conditional branches / selects 123 uint64_t NumBranches = 0; 124 // The decrease of the number of conditional branches / selects in the hot 125 // paths due to CHR. 126 uint64_t NumBranchesDelta = 0; 127 // NumBranchesDelta weighted by the profile count at the scope entry. 128 uint64_t WeightedNumBranchesDelta = 0; 129 }; 130 131 // RegInfo - some properties of a Region. 132 struct RegInfo { 133 RegInfo() = default; 134 RegInfo(Region *RegionIn) : R(RegionIn) {} 135 Region *R = nullptr; 136 bool HasBranch = false; 137 SmallVector<SelectInst *, 8> Selects; 138 }; 139 140 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 141 142 // CHRScope - a sequence of regions to CHR together. It corresponds to a 143 // sequence of conditional blocks. It can have subscopes which correspond to 144 // nested conditional blocks. Nested CHRScopes form a tree. 145 class CHRScope { 146 public: 147 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 148 assert(RI.R && "Null RegionIn"); 149 RegInfos.push_back(RI); 150 } 151 152 Region *getParentRegion() { 153 assert(RegInfos.size() > 0 && "Empty CHRScope"); 154 Region *Parent = RegInfos[0].R->getParent(); 155 assert(Parent && "Unexpected to call this on the top-level region"); 156 return Parent; 157 } 158 159 BasicBlock *getEntryBlock() { 160 assert(RegInfos.size() > 0 && "Empty CHRScope"); 161 return RegInfos.front().R->getEntry(); 162 } 163 164 BasicBlock *getExitBlock() { 165 assert(RegInfos.size() > 0 && "Empty CHRScope"); 166 return RegInfos.back().R->getExit(); 167 } 168 169 bool appendable(CHRScope *Next) { 170 // The next scope is appendable only if this scope is directly connected to 171 // it (which implies it post-dominates this scope) and this scope dominates 172 // it (no edge to the next scope outside this scope). 173 BasicBlock *NextEntry = Next->getEntryBlock(); 174 if (getExitBlock() != NextEntry) 175 // Not directly connected. 176 return false; 177 Region *LastRegion = RegInfos.back().R; 178 for (BasicBlock *Pred : predecessors(NextEntry)) 179 if (!LastRegion->contains(Pred)) 180 // There's an edge going into the entry of the next scope from outside 181 // of this scope. 182 return false; 183 return true; 184 } 185 186 void append(CHRScope *Next) { 187 assert(RegInfos.size() > 0 && "Empty CHRScope"); 188 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 189 assert(getParentRegion() == Next->getParentRegion() && 190 "Must be siblings"); 191 assert(getExitBlock() == Next->getEntryBlock() && 192 "Must be adjacent"); 193 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end()); 194 Subs.append(Next->Subs.begin(), Next->Subs.end()); 195 } 196 197 void addSub(CHRScope *SubIn) { 198 #ifndef NDEBUG 199 bool IsChild = false; 200 for (RegInfo &RI : RegInfos) 201 if (RI.R == SubIn->getParentRegion()) { 202 IsChild = true; 203 break; 204 } 205 assert(IsChild && "Must be a child"); 206 #endif 207 Subs.push_back(SubIn); 208 } 209 210 // Split this scope at the boundary region into two, which will belong to the 211 // tail and returns the tail. 212 CHRScope *split(Region *Boundary) { 213 assert(Boundary && "Boundary null"); 214 assert(RegInfos.begin()->R != Boundary && 215 "Can't be split at beginning"); 216 auto BoundaryIt = llvm::find_if( 217 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; }); 218 if (BoundaryIt == RegInfos.end()) 219 return nullptr; 220 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end()); 221 DenseSet<Region *> TailRegionSet; 222 for (const RegInfo &RI : TailRegInfos) 223 TailRegionSet.insert(RI.R); 224 225 auto TailIt = 226 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) { 227 assert(Sub && "null Sub"); 228 Region *Parent = Sub->getParentRegion(); 229 if (TailRegionSet.count(Parent)) 230 return false; 231 232 assert(llvm::any_of( 233 RegInfos, 234 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) && 235 "Must be in head"); 236 return true; 237 }); 238 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end()); 239 240 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 241 auto *Scope = new CHRScope(TailRegInfos, TailSubs); 242 RegInfos.erase(BoundaryIt, RegInfos.end()); 243 Subs.erase(TailIt, Subs.end()); 244 return Scope; 245 } 246 247 bool contains(Instruction *I) const { 248 BasicBlock *Parent = I->getParent(); 249 for (const RegInfo &RI : RegInfos) 250 if (RI.R->contains(Parent)) 251 return true; 252 return false; 253 } 254 255 void print(raw_ostream &OS) const; 256 257 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 258 SmallVector<CHRScope *, 8> Subs; // Subscopes. 259 260 // The instruction at which to insert the CHR conditional branch (and hoist 261 // the dependent condition values). 262 Instruction *BranchInsertPoint; 263 264 // True-biased and false-biased regions (conditional blocks), 265 // respectively. Used only for the outermost scope and includes regions in 266 // subscopes. The rest are unbiased. 267 DenseSet<Region *> TrueBiasedRegions; 268 DenseSet<Region *> FalseBiasedRegions; 269 // Among the biased regions, the regions that get CHRed. 270 SmallVector<RegInfo, 8> CHRRegions; 271 272 // True-biased and false-biased selects, respectively. Used only for the 273 // outermost scope and includes ones in subscopes. 274 DenseSet<SelectInst *> TrueBiasedSelects; 275 DenseSet<SelectInst *> FalseBiasedSelects; 276 277 // Map from one of the above regions to the instructions to stop 278 // hoisting instructions at through use-def chains. 279 HoistStopMapTy HoistStopMap; 280 281 private: 282 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn) 283 : RegInfos(RegInfosIn.begin(), RegInfosIn.end()), 284 Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {} 285 }; 286 287 class CHR { 288 public: 289 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 290 ProfileSummaryInfo &PSIin, RegionInfo &RIin, 291 OptimizationRemarkEmitter &OREin) 292 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} 293 294 ~CHR() { 295 for (CHRScope *Scope : Scopes) { 296 delete Scope; 297 } 298 } 299 300 bool run(); 301 302 private: 303 // See the comments in CHR::run() for the high level flow of the algorithm and 304 // what the following functions do. 305 306 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 307 Region *R = RI.getTopLevelRegion(); 308 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) { 309 Output.push_back(Scope); 310 } 311 } 312 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 313 SmallVectorImpl<CHRScope *> &Scopes); 314 CHRScope *findScope(Region *R); 315 void checkScopeHoistable(CHRScope *Scope); 316 317 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 318 SmallVectorImpl<CHRScope *> &Output); 319 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 320 CHRScope *Outer, 321 DenseSet<Value *> *OuterConditionValues, 322 Instruction *OuterInsertPoint, 323 SmallVectorImpl<CHRScope *> &Output, 324 DenseSet<Instruction *> &Unhoistables); 325 326 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 327 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 328 329 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 330 SmallVectorImpl<CHRScope *> &Output); 331 332 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 333 SmallVectorImpl<CHRScope *> &Output); 334 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 335 336 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 337 SmallVectorImpl<CHRScope *> &Output); 338 339 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 340 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 341 void cloneScopeBlocks(CHRScope *Scope, 342 BasicBlock *PreEntryBlock, 343 BasicBlock *ExitBlock, 344 Region *LastRegion, 345 ValueToValueMapTy &VMap); 346 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 347 BasicBlock *EntryBlock, 348 BasicBlock *NewEntryBlock, 349 ValueToValueMapTy &VMap); 350 void fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock, 351 BranchInst *MergedBR, uint64_t ProfileCount); 352 void fixupBranch(Region *R, CHRScope *Scope, IRBuilder<> &IRB, 353 Value *&MergedCondition, BranchProbability &CHRBranchBias); 354 void fixupSelect(SelectInst *SI, CHRScope *Scope, IRBuilder<> &IRB, 355 Value *&MergedCondition, BranchProbability &CHRBranchBias); 356 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 357 Instruction *BranchOrSelect, CHRScope *Scope, 358 IRBuilder<> &IRB, Value *&MergedCondition); 359 unsigned getRegionDuplicationCount(const Region *R) { 360 unsigned Count = 0; 361 // Find out how many times region R is cloned. Note that if the parent 362 // of R is cloned, R is also cloned, but R's clone count is not updated 363 // from the clone of the parent. We need to accumlate all the counts 364 // from the ancestors to get the clone count. 365 while (R) { 366 Count += DuplicationCount[R]; 367 R = R->getParent(); 368 } 369 return Count; 370 } 371 372 Function &F; 373 BlockFrequencyInfo &BFI; 374 DominatorTree &DT; 375 ProfileSummaryInfo &PSI; 376 RegionInfo &RI; 377 OptimizationRemarkEmitter &ORE; 378 CHRStats Stats; 379 380 // All the true-biased regions in the function 381 DenseSet<Region *> TrueBiasedRegionsGlobal; 382 // All the false-biased regions in the function 383 DenseSet<Region *> FalseBiasedRegionsGlobal; 384 // All the true-biased selects in the function 385 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 386 // All the false-biased selects in the function 387 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 388 // A map from biased regions to their branch bias 389 DenseMap<Region *, BranchProbability> BranchBiasMap; 390 // A map from biased selects to their branch bias 391 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 392 // All the scopes. 393 DenseSet<CHRScope *> Scopes; 394 // This maps records how many times this region is cloned. 395 DenseMap<const Region *, unsigned> DuplicationCount; 396 }; 397 398 } // end anonymous namespace 399 400 static inline 401 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 402 const CHRStats &Stats) { 403 Stats.print(OS); 404 return OS; 405 } 406 407 static inline 408 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 409 Scope.print(OS); 410 return OS; 411 } 412 413 static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) { 414 if (DisableCHR) 415 return false; 416 417 if (ForceCHR) 418 return true; 419 420 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 421 if (CHRModules.count(F.getParent()->getName())) 422 return true; 423 return CHRFunctions.count(F.getName()); 424 } 425 426 return PSI.isFunctionEntryHot(&F); 427 } 428 429 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 430 CHRStats *Stats) { 431 StringRef FuncName = F.getName(); 432 StringRef ModuleName = F.getParent()->getName(); 433 (void)(FuncName); // Unused in release build. 434 (void)(ModuleName); // Unused in release build. 435 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 436 << FuncName); 437 if (Stats) 438 CHR_DEBUG(dbgs() << " " << *Stats); 439 CHR_DEBUG(dbgs() << "\n"); 440 CHR_DEBUG(F.dump()); 441 } 442 443 void CHRScope::print(raw_ostream &OS) const { 444 assert(RegInfos.size() > 0 && "Empty CHRScope"); 445 OS << "CHRScope["; 446 OS << RegInfos.size() << ", Regions["; 447 for (const RegInfo &RI : RegInfos) { 448 OS << RI.R->getNameStr(); 449 if (RI.HasBranch) 450 OS << " B"; 451 if (RI.Selects.size() > 0) 452 OS << " S" << RI.Selects.size(); 453 OS << ", "; 454 } 455 if (RegInfos[0].R->getParent()) { 456 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 457 } else { 458 // top level region 459 OS << "]"; 460 } 461 OS << ", Subs["; 462 for (CHRScope *Sub : Subs) { 463 OS << *Sub << ", "; 464 } 465 OS << "]]"; 466 } 467 468 // Return true if the given instruction type can be hoisted by CHR. 469 static bool isHoistableInstructionType(Instruction *I) { 470 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 471 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 472 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 473 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 474 isa<InsertValueInst>(I); 475 } 476 477 // Return true if the given instruction can be hoisted by CHR. 478 static bool isHoistable(Instruction *I, DominatorTree &DT) { 479 if (!isHoistableInstructionType(I)) 480 return false; 481 return isSafeToSpeculativelyExecute(I, nullptr, nullptr, &DT); 482 } 483 484 // Recursively traverse the use-def chains of the given value and return a set 485 // of the unhoistable base values defined within the scope (excluding the 486 // first-region entry block) or the (hoistable or unhoistable) base values that 487 // are defined outside (including the first-region entry block) of the 488 // scope. The returned set doesn't include constants. 489 static const std::set<Value *> & 490 getBaseValues(Value *V, DominatorTree &DT, 491 DenseMap<Value *, std::set<Value *>> &Visited) { 492 auto It = Visited.find(V); 493 if (It != Visited.end()) { 494 return It->second; 495 } 496 std::set<Value *> Result; 497 if (auto *I = dyn_cast<Instruction>(V)) { 498 // We don't stop at a block that's not in the Scope because we would miss 499 // some instructions that are based on the same base values if we stop 500 // there. 501 if (!isHoistable(I, DT)) { 502 Result.insert(I); 503 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 504 } 505 // I is hoistable above the Scope. 506 for (Value *Op : I->operands()) { 507 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited); 508 Result.insert(OpResult.begin(), OpResult.end()); 509 } 510 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 511 } 512 if (isa<Argument>(V)) { 513 Result.insert(V); 514 } 515 // We don't include others like constants because those won't lead to any 516 // chance of folding of conditions (eg two bit checks merged into one check) 517 // after CHR. 518 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 519 } 520 521 // Return true if V is already hoisted or can be hoisted (along with its 522 // operands) above the insert point. When it returns true and HoistStops is 523 // non-null, the instructions to stop hoisting at through the use-def chains are 524 // inserted into HoistStops. 525 static bool 526 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 527 DenseSet<Instruction *> &Unhoistables, 528 DenseSet<Instruction *> *HoistStops, 529 DenseMap<Instruction *, bool> &Visited) { 530 assert(InsertPoint && "Null InsertPoint"); 531 if (auto *I = dyn_cast<Instruction>(V)) { 532 auto It = Visited.find(I); 533 if (It != Visited.end()) { 534 return It->second; 535 } 536 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 537 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 538 if (Unhoistables.count(I)) { 539 // Don't hoist if they are not to be hoisted. 540 Visited[I] = false; 541 return false; 542 } 543 if (DT.dominates(I, InsertPoint)) { 544 // We are already above the insert point. Stop here. 545 if (HoistStops) 546 HoistStops->insert(I); 547 Visited[I] = true; 548 return true; 549 } 550 // We aren't not above the insert point, check if we can hoist it above the 551 // insert point. 552 if (isHoistable(I, DT)) { 553 // Check operands first. 554 DenseSet<Instruction *> OpsHoistStops; 555 bool AllOpsHoisted = true; 556 for (Value *Op : I->operands()) { 557 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, 558 Visited)) { 559 AllOpsHoisted = false; 560 break; 561 } 562 } 563 if (AllOpsHoisted) { 564 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 565 if (HoistStops) 566 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 567 Visited[I] = true; 568 return true; 569 } 570 } 571 Visited[I] = false; 572 return false; 573 } 574 // Non-instructions are considered hoistable. 575 return true; 576 } 577 578 // Constructs the true and false branch probabilities if the the instruction has 579 // valid branch weights. Returns true when this was successful, false otherwise. 580 static bool extractBranchProbabilities(Instruction *I, 581 BranchProbability &TrueProb, 582 BranchProbability &FalseProb) { 583 uint64_t TrueWeight; 584 uint64_t FalseWeight; 585 if (!extractBranchWeights(*I, TrueWeight, FalseWeight)) 586 return false; 587 uint64_t SumWeight = TrueWeight + FalseWeight; 588 589 assert(SumWeight >= TrueWeight && SumWeight >= FalseWeight && 590 "Overflow calculating branch probabilities."); 591 592 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash. 593 if (SumWeight == 0) 594 return false; 595 596 TrueProb = BranchProbability::getBranchProbability(TrueWeight, SumWeight); 597 FalseProb = BranchProbability::getBranchProbability(FalseWeight, SumWeight); 598 return true; 599 } 600 601 static BranchProbability getCHRBiasThreshold() { 602 return BranchProbability::getBranchProbability( 603 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 604 } 605 606 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 607 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 608 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 609 // false. 610 template <typename K, typename S, typename M> 611 static bool checkBias(K *Key, BranchProbability TrueProb, 612 BranchProbability FalseProb, S &TrueSet, S &FalseSet, 613 M &BiasMap) { 614 BranchProbability Threshold = getCHRBiasThreshold(); 615 if (TrueProb >= Threshold) { 616 TrueSet.insert(Key); 617 BiasMap[Key] = TrueProb; 618 return true; 619 } else if (FalseProb >= Threshold) { 620 FalseSet.insert(Key); 621 BiasMap[Key] = FalseProb; 622 return true; 623 } 624 return false; 625 } 626 627 // Returns true and insert a region into the right biased set and the map if the 628 // branch of the region is biased. 629 static bool checkBiasedBranch(BranchInst *BI, Region *R, 630 DenseSet<Region *> &TrueBiasedRegionsGlobal, 631 DenseSet<Region *> &FalseBiasedRegionsGlobal, 632 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 633 if (!BI->isConditional()) 634 return false; 635 BranchProbability ThenProb, ElseProb; 636 if (!extractBranchProbabilities(BI, ThenProb, ElseProb)) 637 return false; 638 BasicBlock *IfThen = BI->getSuccessor(0); 639 BasicBlock *IfElse = BI->getSuccessor(1); 640 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 641 IfThen != IfElse && 642 "Invariant from findScopes"); 643 if (IfThen == R->getExit()) { 644 // Swap them so that IfThen/ThenProb means going into the conditional code 645 // and IfElse/ElseProb means skipping it. 646 std::swap(IfThen, IfElse); 647 std::swap(ThenProb, ElseProb); 648 } 649 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 650 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 651 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 652 return checkBias(R, ThenProb, ElseProb, 653 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 654 BranchBiasMap); 655 } 656 657 // Returns true and insert a select into the right biased set and the map if the 658 // select is biased. 659 static bool checkBiasedSelect( 660 SelectInst *SI, Region *R, 661 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 662 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 663 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 664 BranchProbability TrueProb, FalseProb; 665 if (!extractBranchProbabilities(SI, TrueProb, FalseProb)) 666 return false; 667 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 668 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 669 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 670 return checkBias(SI, TrueProb, FalseProb, 671 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 672 SelectBiasMap); 673 } 674 675 // Returns the instruction at which to hoist the dependent condition values and 676 // insert the CHR branch for a region. This is the terminator branch in the 677 // entry block or the first select in the entry block, if any. 678 static Instruction* getBranchInsertPoint(RegInfo &RI) { 679 Region *R = RI.R; 680 BasicBlock *EntryBB = R->getEntry(); 681 // The hoist point is by default the terminator of the entry block, which is 682 // the same as the branch instruction if RI.HasBranch is true. 683 Instruction *HoistPoint = EntryBB->getTerminator(); 684 for (SelectInst *SI : RI.Selects) { 685 if (SI->getParent() == EntryBB) { 686 // Pick the first select in Selects in the entry block. Note Selects is 687 // sorted in the instruction order within a block (asserted below). 688 HoistPoint = SI; 689 break; 690 } 691 } 692 assert(HoistPoint && "Null HoistPoint"); 693 #ifndef NDEBUG 694 // Check that HoistPoint is the first one in Selects in the entry block, 695 // if any. 696 DenseSet<Instruction *> EntryBlockSelectSet; 697 for (SelectInst *SI : RI.Selects) { 698 if (SI->getParent() == EntryBB) { 699 EntryBlockSelectSet.insert(SI); 700 } 701 } 702 for (Instruction &I : *EntryBB) { 703 if (EntryBlockSelectSet.contains(&I)) { 704 assert(&I == HoistPoint && 705 "HoistPoint must be the first one in Selects"); 706 break; 707 } 708 } 709 #endif 710 return HoistPoint; 711 } 712 713 // Find a CHR scope in the given region. 714 CHRScope * CHR::findScope(Region *R) { 715 CHRScope *Result = nullptr; 716 BasicBlock *Entry = R->getEntry(); 717 BasicBlock *Exit = R->getExit(); // null if top level. 718 assert(Entry && "Entry must not be null"); 719 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 720 "Only top level region has a null exit"); 721 if (Entry) 722 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 723 else 724 CHR_DEBUG(dbgs() << "Entry null\n"); 725 if (Exit) 726 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 727 else 728 CHR_DEBUG(dbgs() << "Exit null\n"); 729 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 730 // to this region). 731 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 732 if (EntryInSubregion) 733 return nullptr; 734 // Exclude loops 735 for (BasicBlock *Pred : predecessors(Entry)) 736 if (R->contains(Pred)) 737 return nullptr; 738 // If any of the basic blocks have address taken, we must skip this region 739 // because we cannot clone basic blocks that have address taken. 740 for (BasicBlock *BB : R->blocks()) { 741 if (BB->hasAddressTaken()) 742 return nullptr; 743 // If we encounter llvm.coro.id, skip this region because if the basic block 744 // is cloned, we end up inserting a token type PHI node to the block with 745 // llvm.coro.begin. 746 // FIXME: This could lead to less optimal codegen, because the region is 747 // excluded, it can prevent CHR from merging adjacent regions into bigger 748 // scope and hoisting more branches. 749 for (Instruction &I : *BB) 750 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 751 if (II->getIntrinsicID() == Intrinsic::coro_id) 752 return nullptr; 753 } 754 755 if (Exit) { 756 // Try to find an if-then block (check if R is an if-then). 757 // if (cond) { 758 // ... 759 // } 760 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 761 if (BI) 762 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 763 else 764 CHR_DEBUG(dbgs() << "BI null\n"); 765 if (BI && BI->isConditional()) { 766 BasicBlock *S0 = BI->getSuccessor(0); 767 BasicBlock *S1 = BI->getSuccessor(1); 768 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 769 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 770 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 771 RegInfo RI(R); 772 RI.HasBranch = checkBiasedBranch( 773 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 774 BranchBiasMap); 775 Result = new CHRScope(RI); 776 Scopes.insert(Result); 777 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 778 ++Stats.NumBranches; 779 if (!RI.HasBranch) { 780 ORE.emit([&]() { 781 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) 782 << "Branch not biased"; 783 }); 784 } 785 } 786 } 787 } 788 { 789 // Try to look for selects in the direct child blocks (as opposed to in 790 // subregions) of R. 791 // ... 792 // if (..) { // Some subregion 793 // ... 794 // } 795 // if (..) { // Some subregion 796 // ... 797 // } 798 // ... 799 // a = cond ? b : c; 800 // ... 801 SmallVector<SelectInst *, 8> Selects; 802 for (RegionNode *E : R->elements()) { 803 if (E->isSubRegion()) 804 continue; 805 // This returns the basic block of E if E is a direct child of R (not a 806 // subregion.) 807 BasicBlock *BB = E->getEntry(); 808 // Need to push in the order to make it easier to find the first Select 809 // later. 810 for (Instruction &I : *BB) { 811 if (auto *SI = dyn_cast<SelectInst>(&I)) { 812 Selects.push_back(SI); 813 ++Stats.NumBranches; 814 } 815 } 816 } 817 if (Selects.size() > 0) { 818 auto AddSelects = [&](RegInfo &RI) { 819 for (auto *SI : Selects) 820 if (checkBiasedSelect(SI, RI.R, 821 TrueBiasedSelectsGlobal, 822 FalseBiasedSelectsGlobal, 823 SelectBiasMap)) 824 RI.Selects.push_back(SI); 825 else 826 ORE.emit([&]() { 827 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) 828 << "Select not biased"; 829 }); 830 }; 831 if (!Result) { 832 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 833 RegInfo RI(R); 834 AddSelects(RI); 835 Result = new CHRScope(RI); 836 Scopes.insert(Result); 837 } else { 838 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 839 AddSelects(Result->RegInfos[0]); 840 } 841 } 842 } 843 844 if (Result) { 845 checkScopeHoistable(Result); 846 } 847 return Result; 848 } 849 850 // Check that any of the branch and the selects in the region could be 851 // hoisted above the the CHR branch insert point (the most dominating of 852 // them, either the branch (at the end of the first block) or the first 853 // select in the first block). If the branch can't be hoisted, drop the 854 // selects in the first blocks. 855 // 856 // For example, for the following scope/region with selects, we want to insert 857 // the merged branch right before the first select in the first/entry block by 858 // hoisting c1, c2, c3, and c4. 859 // 860 // // Branch insert point here. 861 // a = c1 ? b : c; // Select 1 862 // d = c2 ? e : f; // Select 2 863 // if (c3) { // Branch 864 // ... 865 // c4 = foo() // A call. 866 // g = c4 ? h : i; // Select 3 867 // } 868 // 869 // But suppose we can't hoist c4 because it's dependent on the preceding 870 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 871 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 872 void CHR::checkScopeHoistable(CHRScope *Scope) { 873 RegInfo &RI = Scope->RegInfos[0]; 874 Region *R = RI.R; 875 BasicBlock *EntryBB = R->getEntry(); 876 auto *Branch = RI.HasBranch ? 877 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 878 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 879 if (RI.HasBranch || !Selects.empty()) { 880 Instruction *InsertPoint = getBranchInsertPoint(RI); 881 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 882 // Avoid a data dependence from a select or a branch to a(nother) 883 // select. Note no instruction can't data-depend on a branch (a branch 884 // instruction doesn't produce a value). 885 DenseSet<Instruction *> Unhoistables; 886 // Initialize Unhoistables with the selects. 887 for (SelectInst *SI : Selects) { 888 Unhoistables.insert(SI); 889 } 890 // Remove Selects that can't be hoisted. 891 for (auto it = Selects.begin(); it != Selects.end(); ) { 892 SelectInst *SI = *it; 893 if (SI == InsertPoint) { 894 ++it; 895 continue; 896 } 897 DenseMap<Instruction *, bool> Visited; 898 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 899 DT, Unhoistables, nullptr, Visited); 900 if (!IsHoistable) { 901 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 902 ORE.emit([&]() { 903 return OptimizationRemarkMissed(DEBUG_TYPE, 904 "DropUnhoistableSelect", SI) 905 << "Dropped unhoistable select"; 906 }); 907 it = Selects.erase(it); 908 // Since we are dropping the select here, we also drop it from 909 // Unhoistables. 910 Unhoistables.erase(SI); 911 } else 912 ++it; 913 } 914 // Update InsertPoint after potentially removing selects. 915 InsertPoint = getBranchInsertPoint(RI); 916 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 917 if (RI.HasBranch && InsertPoint != Branch) { 918 DenseMap<Instruction *, bool> Visited; 919 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 920 DT, Unhoistables, nullptr, Visited); 921 if (!IsHoistable) { 922 // If the branch isn't hoistable, drop the selects in the entry 923 // block, preferring the branch, which makes the branch the hoist 924 // point. 925 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 926 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 927 CHR_DEBUG( 928 for (SelectInst *SI : Selects) { 929 dbgs() << "SI " << *SI << "\n"; 930 }); 931 for (SelectInst *SI : Selects) { 932 ORE.emit([&]() { 933 return OptimizationRemarkMissed(DEBUG_TYPE, 934 "DropSelectUnhoistableBranch", SI) 935 << "Dropped select due to unhoistable branch"; 936 }); 937 } 938 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) { 939 return SI->getParent() == EntryBB; 940 }); 941 Unhoistables.clear(); 942 InsertPoint = Branch; 943 } 944 } 945 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 946 #ifndef NDEBUG 947 if (RI.HasBranch) { 948 assert(!DT.dominates(Branch, InsertPoint) && 949 "Branch can't be already above the hoist point"); 950 DenseMap<Instruction *, bool> Visited; 951 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 952 DT, Unhoistables, nullptr, Visited) && 953 "checkHoistValue for branch"); 954 } 955 for (auto *SI : Selects) { 956 assert(!DT.dominates(SI, InsertPoint) && 957 "SI can't be already above the hoist point"); 958 DenseMap<Instruction *, bool> Visited; 959 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 960 Unhoistables, nullptr, Visited) && 961 "checkHoistValue for selects"); 962 } 963 CHR_DEBUG(dbgs() << "Result\n"); 964 if (RI.HasBranch) { 965 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 966 } 967 for (auto *SI : Selects) { 968 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 969 } 970 #endif 971 } 972 } 973 974 // Traverse the region tree, find all nested scopes and merge them if possible. 975 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 976 SmallVectorImpl<CHRScope *> &Scopes) { 977 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 978 CHRScope *Result = findScope(R); 979 // Visit subscopes. 980 CHRScope *ConsecutiveSubscope = nullptr; 981 SmallVector<CHRScope *, 8> Subscopes; 982 for (auto It = R->begin(); It != R->end(); ++It) { 983 const std::unique_ptr<Region> &SubR = *It; 984 auto NextIt = std::next(It); 985 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 986 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 987 << "\n"); 988 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 989 if (SubCHRScope) { 990 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 991 } else { 992 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 993 } 994 if (SubCHRScope) { 995 if (!ConsecutiveSubscope) 996 ConsecutiveSubscope = SubCHRScope; 997 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 998 Subscopes.push_back(ConsecutiveSubscope); 999 ConsecutiveSubscope = SubCHRScope; 1000 } else 1001 ConsecutiveSubscope->append(SubCHRScope); 1002 } else { 1003 if (ConsecutiveSubscope) { 1004 Subscopes.push_back(ConsecutiveSubscope); 1005 } 1006 ConsecutiveSubscope = nullptr; 1007 } 1008 } 1009 if (ConsecutiveSubscope) { 1010 Subscopes.push_back(ConsecutiveSubscope); 1011 } 1012 for (CHRScope *Sub : Subscopes) { 1013 if (Result) { 1014 // Combine it with the parent. 1015 Result->addSub(Sub); 1016 } else { 1017 // Push Subscopes as they won't be combined with the parent. 1018 Scopes.push_back(Sub); 1019 } 1020 } 1021 return Result; 1022 } 1023 1024 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1025 DenseSet<Value *> ConditionValues; 1026 if (RI.HasBranch) { 1027 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1028 ConditionValues.insert(BI->getCondition()); 1029 } 1030 for (SelectInst *SI : RI.Selects) { 1031 ConditionValues.insert(SI->getCondition()); 1032 } 1033 return ConditionValues; 1034 } 1035 1036 1037 // Determine whether to split a scope depending on the sets of the branch 1038 // condition values of the previous region and the current region. We split 1039 // (return true) it if 1) the condition values of the inner/lower scope can't be 1040 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1041 // values have an empty intersection (because the combined branch conditions 1042 // won't probably lead to a simpler combined condition). 1043 static bool shouldSplit(Instruction *InsertPoint, 1044 DenseSet<Value *> &PrevConditionValues, 1045 DenseSet<Value *> &ConditionValues, 1046 DominatorTree &DT, 1047 DenseSet<Instruction *> &Unhoistables) { 1048 assert(InsertPoint && "Null InsertPoint"); 1049 CHR_DEBUG( 1050 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1051 for (Value *V : PrevConditionValues) { 1052 dbgs() << *V << ", "; 1053 } 1054 dbgs() << " ConditionValues "; 1055 for (Value *V : ConditionValues) { 1056 dbgs() << *V << ", "; 1057 } 1058 dbgs() << "\n"); 1059 // If any of Bases isn't hoistable to the hoist point, split. 1060 for (Value *V : ConditionValues) { 1061 DenseMap<Instruction *, bool> Visited; 1062 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { 1063 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1064 return true; // Not hoistable, split. 1065 } 1066 } 1067 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1068 // unnecessary splits at scopes with no branch/selects. If 1069 // PrevConditionValues and ConditionValues don't intersect at all, split. 1070 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1071 // Use std::set as DenseSet doesn't work with set_intersection. 1072 std::set<Value *> PrevBases, Bases; 1073 DenseMap<Value *, std::set<Value *>> Visited; 1074 for (Value *V : PrevConditionValues) { 1075 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1076 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1077 } 1078 for (Value *V : ConditionValues) { 1079 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1080 Bases.insert(BaseValues.begin(), BaseValues.end()); 1081 } 1082 CHR_DEBUG( 1083 dbgs() << "PrevBases "; 1084 for (Value *V : PrevBases) { 1085 dbgs() << *V << ", "; 1086 } 1087 dbgs() << " Bases "; 1088 for (Value *V : Bases) { 1089 dbgs() << *V << ", "; 1090 } 1091 dbgs() << "\n"); 1092 std::vector<Value *> Intersection; 1093 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(), 1094 Bases.end(), std::back_inserter(Intersection)); 1095 if (Intersection.empty()) { 1096 // Empty intersection, split. 1097 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1098 return true; 1099 } 1100 } 1101 CHR_DEBUG(dbgs() << "No split\n"); 1102 return false; // Don't split. 1103 } 1104 1105 static void getSelectsInScope(CHRScope *Scope, 1106 DenseSet<Instruction *> &Output) { 1107 for (RegInfo &RI : Scope->RegInfos) 1108 for (SelectInst *SI : RI.Selects) 1109 Output.insert(SI); 1110 for (CHRScope *Sub : Scope->Subs) 1111 getSelectsInScope(Sub, Output); 1112 } 1113 1114 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1115 SmallVectorImpl<CHRScope *> &Output) { 1116 for (CHRScope *Scope : Input) { 1117 assert(!Scope->BranchInsertPoint && 1118 "BranchInsertPoint must not be set"); 1119 DenseSet<Instruction *> Unhoistables; 1120 getSelectsInScope(Scope, Unhoistables); 1121 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1122 } 1123 #ifndef NDEBUG 1124 for (CHRScope *Scope : Output) { 1125 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1126 } 1127 #endif 1128 } 1129 1130 SmallVector<CHRScope *, 8> CHR::splitScope( 1131 CHRScope *Scope, 1132 CHRScope *Outer, 1133 DenseSet<Value *> *OuterConditionValues, 1134 Instruction *OuterInsertPoint, 1135 SmallVectorImpl<CHRScope *> &Output, 1136 DenseSet<Instruction *> &Unhoistables) { 1137 if (Outer) { 1138 assert(OuterConditionValues && "Null OuterConditionValues"); 1139 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1140 } 1141 bool PrevSplitFromOuter = true; 1142 DenseSet<Value *> PrevConditionValues; 1143 Instruction *PrevInsertPoint = nullptr; 1144 SmallVector<CHRScope *, 8> Splits; 1145 SmallVector<bool, 8> SplitsSplitFromOuter; 1146 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1147 SmallVector<Instruction *, 8> SplitsInsertPoints; 1148 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1149 for (RegInfo &RI : RegInfos) { 1150 Instruction *InsertPoint = getBranchInsertPoint(RI); 1151 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1152 CHR_DEBUG( 1153 dbgs() << "ConditionValues "; 1154 for (Value *V : ConditionValues) { 1155 dbgs() << *V << ", "; 1156 } 1157 dbgs() << "\n"); 1158 if (RI.R == RegInfos[0].R) { 1159 // First iteration. Check to see if we should split from the outer. 1160 if (Outer) { 1161 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1162 CHR_DEBUG(dbgs() << "Should split from outer at " 1163 << RI.R->getNameStr() << "\n"); 1164 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1165 ConditionValues, DT, Unhoistables)) { 1166 PrevConditionValues = ConditionValues; 1167 PrevInsertPoint = InsertPoint; 1168 ORE.emit([&]() { 1169 return OptimizationRemarkMissed(DEBUG_TYPE, 1170 "SplitScopeFromOuter", 1171 RI.R->getEntry()->getTerminator()) 1172 << "Split scope from outer due to unhoistable branch/select " 1173 << "and/or lack of common condition values"; 1174 }); 1175 } else { 1176 // Not splitting from the outer. Use the outer bases and insert 1177 // point. Union the bases. 1178 PrevSplitFromOuter = false; 1179 PrevConditionValues = *OuterConditionValues; 1180 PrevConditionValues.insert(ConditionValues.begin(), 1181 ConditionValues.end()); 1182 PrevInsertPoint = OuterInsertPoint; 1183 } 1184 } else { 1185 CHR_DEBUG(dbgs() << "Outer null\n"); 1186 PrevConditionValues = ConditionValues; 1187 PrevInsertPoint = InsertPoint; 1188 } 1189 } else { 1190 CHR_DEBUG(dbgs() << "Should split from prev at " 1191 << RI.R->getNameStr() << "\n"); 1192 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1193 DT, Unhoistables)) { 1194 CHRScope *Tail = Scope->split(RI.R); 1195 Scopes.insert(Tail); 1196 Splits.push_back(Scope); 1197 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1198 SplitsConditionValues.push_back(PrevConditionValues); 1199 SplitsInsertPoints.push_back(PrevInsertPoint); 1200 Scope = Tail; 1201 PrevConditionValues = ConditionValues; 1202 PrevInsertPoint = InsertPoint; 1203 PrevSplitFromOuter = true; 1204 ORE.emit([&]() { 1205 return OptimizationRemarkMissed(DEBUG_TYPE, 1206 "SplitScopeFromPrev", 1207 RI.R->getEntry()->getTerminator()) 1208 << "Split scope from previous due to unhoistable branch/select " 1209 << "and/or lack of common condition values"; 1210 }); 1211 } else { 1212 // Not splitting. Union the bases. Keep the hoist point. 1213 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1214 } 1215 } 1216 } 1217 Splits.push_back(Scope); 1218 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1219 SplitsConditionValues.push_back(PrevConditionValues); 1220 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1221 SplitsInsertPoints.push_back(PrevInsertPoint); 1222 assert(Splits.size() == SplitsConditionValues.size() && 1223 Splits.size() == SplitsSplitFromOuter.size() && 1224 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1225 for (size_t I = 0; I < Splits.size(); ++I) { 1226 CHRScope *Split = Splits[I]; 1227 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1228 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1229 SmallVector<CHRScope *, 8> NewSubs; 1230 DenseSet<Instruction *> SplitUnhoistables; 1231 getSelectsInScope(Split, SplitUnhoistables); 1232 for (CHRScope *Sub : Split->Subs) { 1233 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1234 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1235 SplitUnhoistables); 1236 llvm::append_range(NewSubs, SubSplits); 1237 } 1238 Split->Subs = NewSubs; 1239 } 1240 SmallVector<CHRScope *, 8> Result; 1241 for (size_t I = 0; I < Splits.size(); ++I) { 1242 CHRScope *Split = Splits[I]; 1243 if (SplitsSplitFromOuter[I]) { 1244 // Split from the outer. 1245 Output.push_back(Split); 1246 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1247 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1248 << "\n"); 1249 } else { 1250 // Connected to the outer. 1251 Result.push_back(Split); 1252 } 1253 } 1254 if (!Outer) 1255 assert(Result.empty() && 1256 "If no outer (top-level), must return no nested ones"); 1257 return Result; 1258 } 1259 1260 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1261 for (CHRScope *Scope : Scopes) { 1262 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1263 classifyBiasedScopes(Scope, Scope); 1264 CHR_DEBUG( 1265 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1266 dbgs() << "TrueBiasedRegions "; 1267 for (Region *R : Scope->TrueBiasedRegions) { 1268 dbgs() << R->getNameStr() << ", "; 1269 } 1270 dbgs() << "\n"; 1271 dbgs() << "FalseBiasedRegions "; 1272 for (Region *R : Scope->FalseBiasedRegions) { 1273 dbgs() << R->getNameStr() << ", "; 1274 } 1275 dbgs() << "\n"; 1276 dbgs() << "TrueBiasedSelects "; 1277 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1278 dbgs() << *SI << ", "; 1279 } 1280 dbgs() << "\n"; 1281 dbgs() << "FalseBiasedSelects "; 1282 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1283 dbgs() << *SI << ", "; 1284 } 1285 dbgs() << "\n";); 1286 } 1287 } 1288 1289 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1290 for (RegInfo &RI : Scope->RegInfos) { 1291 if (RI.HasBranch) { 1292 Region *R = RI.R; 1293 if (TrueBiasedRegionsGlobal.contains(R)) 1294 OutermostScope->TrueBiasedRegions.insert(R); 1295 else if (FalseBiasedRegionsGlobal.contains(R)) 1296 OutermostScope->FalseBiasedRegions.insert(R); 1297 else 1298 llvm_unreachable("Must be biased"); 1299 } 1300 for (SelectInst *SI : RI.Selects) { 1301 if (TrueBiasedSelectsGlobal.contains(SI)) 1302 OutermostScope->TrueBiasedSelects.insert(SI); 1303 else if (FalseBiasedSelectsGlobal.contains(SI)) 1304 OutermostScope->FalseBiasedSelects.insert(SI); 1305 else 1306 llvm_unreachable("Must be biased"); 1307 } 1308 } 1309 for (CHRScope *Sub : Scope->Subs) { 1310 classifyBiasedScopes(Sub, OutermostScope); 1311 } 1312 } 1313 1314 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1315 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1316 Scope->FalseBiasedRegions.size() + 1317 Scope->TrueBiasedSelects.size() + 1318 Scope->FalseBiasedSelects.size(); 1319 return NumBiased >= CHRMergeThreshold; 1320 } 1321 1322 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1323 SmallVectorImpl<CHRScope *> &Output) { 1324 for (CHRScope *Scope : Input) { 1325 // Filter out the ones with only one region and no subs. 1326 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1327 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1328 << Scope->TrueBiasedRegions.size() 1329 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1330 << " true-selects " << Scope->TrueBiasedSelects.size() 1331 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1332 ORE.emit([&]() { 1333 return OptimizationRemarkMissed( 1334 DEBUG_TYPE, 1335 "DropScopeWithOneBranchOrSelect", 1336 Scope->RegInfos[0].R->getEntry()->getTerminator()) 1337 << "Drop scope with < " 1338 << ore::NV("CHRMergeThreshold", CHRMergeThreshold) 1339 << " biased branch(es) or select(s)"; 1340 }); 1341 continue; 1342 } 1343 Output.push_back(Scope); 1344 } 1345 } 1346 1347 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1348 SmallVectorImpl<CHRScope *> &Output) { 1349 for (CHRScope *Scope : Input) { 1350 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1351 "Empty"); 1352 setCHRRegions(Scope, Scope); 1353 Output.push_back(Scope); 1354 CHR_DEBUG( 1355 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1356 for (auto pair : Scope->HoistStopMap) { 1357 Region *R = pair.first; 1358 dbgs() << "Region " << R->getNameStr() << "\n"; 1359 for (Instruction *I : pair.second) { 1360 dbgs() << "HoistStop " << *I << "\n"; 1361 } 1362 } 1363 dbgs() << "CHRRegions" << "\n"; 1364 for (RegInfo &RI : Scope->CHRRegions) { 1365 dbgs() << RI.R->getNameStr() << "\n"; 1366 }); 1367 } 1368 } 1369 1370 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1371 DenseSet<Instruction *> Unhoistables; 1372 // Put the biased selects in Unhoistables because they should stay where they 1373 // are and constant-folded after CHR (in case one biased select or a branch 1374 // can depend on another biased select.) 1375 for (RegInfo &RI : Scope->RegInfos) { 1376 for (SelectInst *SI : RI.Selects) { 1377 Unhoistables.insert(SI); 1378 } 1379 } 1380 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1381 for (RegInfo &RI : Scope->RegInfos) { 1382 Region *R = RI.R; 1383 DenseSet<Instruction *> HoistStops; 1384 bool IsHoisted = false; 1385 if (RI.HasBranch) { 1386 assert((OutermostScope->TrueBiasedRegions.contains(R) || 1387 OutermostScope->FalseBiasedRegions.contains(R)) && 1388 "Must be truthy or falsy"); 1389 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1390 // Note checkHoistValue fills in HoistStops. 1391 DenseMap<Instruction *, bool> Visited; 1392 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1393 Unhoistables, &HoistStops, Visited); 1394 assert(IsHoistable && "Must be hoistable"); 1395 (void)(IsHoistable); // Unused in release build 1396 IsHoisted = true; 1397 } 1398 for (SelectInst *SI : RI.Selects) { 1399 assert((OutermostScope->TrueBiasedSelects.contains(SI) || 1400 OutermostScope->FalseBiasedSelects.contains(SI)) && 1401 "Must be true or false biased"); 1402 // Note checkHoistValue fills in HoistStops. 1403 DenseMap<Instruction *, bool> Visited; 1404 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1405 Unhoistables, &HoistStops, Visited); 1406 assert(IsHoistable && "Must be hoistable"); 1407 (void)(IsHoistable); // Unused in release build 1408 IsHoisted = true; 1409 } 1410 if (IsHoisted) { 1411 OutermostScope->CHRRegions.push_back(RI); 1412 OutermostScope->HoistStopMap[R] = HoistStops; 1413 } 1414 } 1415 for (CHRScope *Sub : Scope->Subs) 1416 setCHRRegions(Sub, OutermostScope); 1417 } 1418 1419 static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1420 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1421 } 1422 1423 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1424 SmallVectorImpl<CHRScope *> &Output) { 1425 Output.resize(Input.size()); 1426 llvm::copy(Input, Output.begin()); 1427 llvm::stable_sort(Output, CHRScopeSorter); 1428 } 1429 1430 // Return true if V is already hoisted or was hoisted (along with its operands) 1431 // to the insert point. 1432 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1433 HoistStopMapTy &HoistStopMap, 1434 DenseSet<Instruction *> &HoistedSet, 1435 DenseSet<PHINode *> &TrivialPHIs, 1436 DominatorTree &DT) { 1437 auto IT = HoistStopMap.find(R); 1438 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1439 DenseSet<Instruction *> &HoistStops = IT->second; 1440 if (auto *I = dyn_cast<Instruction>(V)) { 1441 if (I == HoistPoint) 1442 return; 1443 if (HoistStops.count(I)) 1444 return; 1445 if (auto *PN = dyn_cast<PHINode>(I)) 1446 if (TrivialPHIs.count(PN)) 1447 // The trivial phi inserted by the previous CHR scope could replace a 1448 // non-phi in HoistStops. Note that since this phi is at the exit of a 1449 // previous CHR scope, which dominates this scope, it's safe to stop 1450 // hoisting there. 1451 return; 1452 if (HoistedSet.count(I)) 1453 // Already hoisted, return. 1454 return; 1455 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1456 assert(DT.getNode(I->getParent()) && "DT must contain I's block"); 1457 assert(DT.getNode(HoistPoint->getParent()) && 1458 "DT must contain HoistPoint block"); 1459 if (DT.dominates(I, HoistPoint)) 1460 // We are already above the hoist point. Stop here. This may be necessary 1461 // when multiple scopes would independently hoist the same 1462 // instruction. Since an outer (dominating) scope would hoist it to its 1463 // entry before an inner (dominated) scope would to its entry, the inner 1464 // scope may see the instruction already hoisted, in which case it 1465 // potentially wrong for the inner scope to hoist it and could cause bad 1466 // IR (non-dominating def), but safe to skip hoisting it instead because 1467 // it's already in a block that dominates the inner scope. 1468 return; 1469 for (Value *Op : I->operands()) { 1470 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); 1471 } 1472 I->moveBefore(HoistPoint); 1473 HoistedSet.insert(I); 1474 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1475 } 1476 } 1477 1478 // Hoist the dependent condition values of the branches and the selects in the 1479 // scope to the insert point. 1480 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1481 DenseSet<PHINode *> &TrivialPHIs, 1482 DominatorTree &DT) { 1483 DenseSet<Instruction *> HoistedSet; 1484 for (const RegInfo &RI : Scope->CHRRegions) { 1485 Region *R = RI.R; 1486 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1487 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1488 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1489 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1490 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1491 HoistedSet, TrivialPHIs, DT); 1492 } 1493 for (SelectInst *SI : RI.Selects) { 1494 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1495 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1496 if (!(IsTrueBiased || IsFalseBiased)) 1497 continue; 1498 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1499 HoistedSet, TrivialPHIs, DT); 1500 } 1501 } 1502 } 1503 1504 // Negate the predicate if an ICmp if it's used only by branches or selects by 1505 // swapping the operands of the branches or the selects. Returns true if success. 1506 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1507 Instruction *ExcludedUser, 1508 CHRScope *Scope) { 1509 for (User *U : ICmp->users()) { 1510 if (U == ExcludedUser) 1511 continue; 1512 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1513 continue; 1514 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1515 continue; 1516 return false; 1517 } 1518 for (User *U : ICmp->users()) { 1519 if (U == ExcludedUser) 1520 continue; 1521 if (auto *BI = dyn_cast<BranchInst>(U)) { 1522 assert(BI->isConditional() && "Must be conditional"); 1523 BI->swapSuccessors(); 1524 // Don't need to swap this in terms of 1525 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1526 // mean whehter the branch is likely go into the if-then rather than 1527 // successor0/successor1 and because we can tell which edge is the then or 1528 // the else one by comparing the destination to the region exit block. 1529 continue; 1530 } 1531 if (auto *SI = dyn_cast<SelectInst>(U)) { 1532 // Swap operands 1533 SI->swapValues(); 1534 SI->swapProfMetadata(); 1535 if (Scope->TrueBiasedSelects.count(SI)) { 1536 assert(!Scope->FalseBiasedSelects.contains(SI) && 1537 "Must not be already in"); 1538 Scope->FalseBiasedSelects.insert(SI); 1539 } else if (Scope->FalseBiasedSelects.count(SI)) { 1540 assert(!Scope->TrueBiasedSelects.contains(SI) && 1541 "Must not be already in"); 1542 Scope->TrueBiasedSelects.insert(SI); 1543 } 1544 continue; 1545 } 1546 llvm_unreachable("Must be a branch or a select"); 1547 } 1548 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1549 return true; 1550 } 1551 1552 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1553 // for a value that's defined in the scope but used outside it (meaning it's 1554 // alive at the exit block). 1555 static void insertTrivialPHIs(CHRScope *Scope, 1556 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1557 DenseSet<PHINode *> &TrivialPHIs) { 1558 SmallSetVector<BasicBlock *, 8> BlocksInScope; 1559 for (RegInfo &RI : Scope->RegInfos) { 1560 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1561 // sub-Scopes. 1562 BlocksInScope.insert(BB); 1563 } 1564 } 1565 CHR_DEBUG({ 1566 dbgs() << "Inserting redundant phis\n"; 1567 for (BasicBlock *BB : BlocksInScope) 1568 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1569 }); 1570 for (BasicBlock *BB : BlocksInScope) { 1571 for (Instruction &I : *BB) { 1572 SmallVector<Instruction *, 8> Users; 1573 for (User *U : I.users()) { 1574 if (auto *UI = dyn_cast<Instruction>(U)) { 1575 if (!BlocksInScope.contains(UI->getParent()) && 1576 // Unless there's already a phi for I at the exit block. 1577 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1578 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1579 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1580 Users.push_back(UI); 1581 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1582 // There's a loop backedge from a block that's dominated by this 1583 // scope to the entry block. 1584 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1585 CHR_DEBUG(dbgs() 1586 << "Used at entry block (for a back edge) by a phi user " 1587 << *UI << "\n"); 1588 Users.push_back(UI); 1589 } 1590 } 1591 } 1592 if (Users.size() > 0) { 1593 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1594 // ExitBlock. Replace I with the new phi in UI unless UI is another 1595 // phi at ExitBlock. 1596 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "", 1597 &ExitBlock->front()); 1598 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1599 PN->addIncoming(&I, Pred); 1600 } 1601 TrivialPHIs.insert(PN); 1602 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1603 for (Instruction *UI : Users) { 1604 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1605 if (UI->getOperand(J) == &I) { 1606 UI->setOperand(J, PN); 1607 } 1608 } 1609 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1610 } 1611 } 1612 } 1613 } 1614 } 1615 1616 // Assert that all the CHR regions of the scope have a biased branch or select. 1617 static void LLVM_ATTRIBUTE_UNUSED 1618 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1619 #ifndef NDEBUG 1620 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1621 if (Scope->TrueBiasedRegions.count(RI.R) || 1622 Scope->FalseBiasedRegions.count(RI.R)) 1623 return true; 1624 for (SelectInst *SI : RI.Selects) 1625 if (Scope->TrueBiasedSelects.count(SI) || 1626 Scope->FalseBiasedSelects.count(SI)) 1627 return true; 1628 return false; 1629 }; 1630 for (RegInfo &RI : Scope->CHRRegions) { 1631 assert(HasBiasedBranchOrSelect(RI, Scope) && 1632 "Must have biased branch or select"); 1633 } 1634 #endif 1635 } 1636 1637 // Assert that all the condition values of the biased branches and selects have 1638 // been hoisted to the pre-entry block or outside of the scope. 1639 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1640 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1641 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1642 for (RegInfo &RI : Scope->CHRRegions) { 1643 Region *R = RI.R; 1644 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1645 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1646 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1647 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1648 Value *V = BI->getCondition(); 1649 CHR_DEBUG(dbgs() << *V << "\n"); 1650 if (auto *I = dyn_cast<Instruction>(V)) { 1651 (void)(I); // Unused in release build. 1652 assert((I->getParent() == PreEntryBlock || 1653 !Scope->contains(I)) && 1654 "Must have been hoisted to PreEntryBlock or outside the scope"); 1655 } 1656 } 1657 for (SelectInst *SI : RI.Selects) { 1658 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1659 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1660 if (!(IsTrueBiased || IsFalseBiased)) 1661 continue; 1662 Value *V = SI->getCondition(); 1663 CHR_DEBUG(dbgs() << *V << "\n"); 1664 if (auto *I = dyn_cast<Instruction>(V)) { 1665 (void)(I); // Unused in release build. 1666 assert((I->getParent() == PreEntryBlock || 1667 !Scope->contains(I)) && 1668 "Must have been hoisted to PreEntryBlock or outside the scope"); 1669 } 1670 } 1671 } 1672 } 1673 1674 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1675 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1676 1677 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1678 1679 for (RegInfo &RI : Scope->RegInfos) { 1680 const Region *R = RI.R; 1681 unsigned Duplication = getRegionDuplicationCount(R); 1682 CHR_DEBUG(dbgs() << "Dup count for R=" << R << " is " << Duplication 1683 << "\n"); 1684 if (Duplication >= CHRDupThreshsold) { 1685 CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication 1686 << " for this region"); 1687 ORE.emit([&]() { 1688 return OptimizationRemarkMissed(DEBUG_TYPE, "DupThresholdReached", 1689 R->getEntry()->getTerminator()) 1690 << "Reached the duplication threshold for the region"; 1691 }); 1692 return; 1693 } 1694 } 1695 for (RegInfo &RI : Scope->RegInfos) { 1696 DuplicationCount[RI.R]++; 1697 } 1698 1699 Region *FirstRegion = Scope->RegInfos[0].R; 1700 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1701 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1702 BasicBlock *ExitBlock = LastRegion->getExit(); 1703 std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1704 1705 if (ExitBlock) { 1706 // Insert a trivial phi at the exit block (where the CHR hot path and the 1707 // cold path merges) for a value that's defined in the scope but used 1708 // outside it (meaning it's alive at the exit block). We will add the 1709 // incoming values for the CHR cold paths to it below. Without this, we'd 1710 // miss updating phi's for such values unless there happens to already be a 1711 // phi for that value there. 1712 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1713 } 1714 1715 // Split the entry block of the first region. The new block becomes the new 1716 // entry block of the first region. The old entry block becomes the block to 1717 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1718 // through the split, we update the entry of the first region after the split, 1719 // and Region only points to the entry and the exit blocks, rather than 1720 // keeping everything in a list or set, the blocks membership and the 1721 // entry/exit blocks of the region are still valid after the split. 1722 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1723 << " at " << *Scope->BranchInsertPoint << "\n"); 1724 BasicBlock *NewEntryBlock = 1725 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1726 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1727 "NewEntryBlock's only pred must be EntryBlock"); 1728 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1729 BasicBlock *PreEntryBlock = EntryBlock; 1730 1731 ValueToValueMapTy VMap; 1732 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1733 // hot path (originals) and a cold path (clones) and update the PHIs at the 1734 // exit block. 1735 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1736 1737 // Replace the old (placeholder) branch with the new (merged) conditional 1738 // branch. 1739 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1740 NewEntryBlock, VMap); 1741 1742 #ifndef NDEBUG 1743 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1744 #endif 1745 1746 // Hoist the conditional values of the branches/selects. 1747 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); 1748 1749 #ifndef NDEBUG 1750 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1751 #endif 1752 1753 // Create the combined branch condition and constant-fold the branches/selects 1754 // in the hot path. 1755 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1756 ProfileCount.value_or(0)); 1757 } 1758 1759 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1760 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1761 // at the exit block. 1762 void CHR::cloneScopeBlocks(CHRScope *Scope, 1763 BasicBlock *PreEntryBlock, 1764 BasicBlock *ExitBlock, 1765 Region *LastRegion, 1766 ValueToValueMapTy &VMap) { 1767 // Clone all the blocks. The original blocks will be the hot-path 1768 // CHR-optimized code and the cloned blocks will be the original unoptimized 1769 // code. This is so that the block pointers from the 1770 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1771 // which CHR should apply to. 1772 SmallVector<BasicBlock*, 8> NewBlocks; 1773 for (RegInfo &RI : Scope->RegInfos) 1774 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1775 // sub-Scopes. 1776 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1777 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1778 NewBlocks.push_back(NewBB); 1779 VMap[BB] = NewBB; 1780 1781 // Unreachable predecessors will not be cloned and will not have an edge 1782 // to the cloned block. As such, also remove them from any phi nodes. 1783 // To avoid iterator invalidation, first collect the dead predecessors 1784 // from the first phi node, and then perform the actual removal. 1785 if (auto *FirstPN = dyn_cast<PHINode>(NewBB->begin())) { 1786 SmallVector<BasicBlock *> DeadPreds; 1787 for (BasicBlock *Pred : FirstPN->blocks()) 1788 if (!DT.isReachableFromEntry(Pred)) 1789 DeadPreds.push_back(Pred); 1790 for (PHINode &PN : make_early_inc_range(NewBB->phis())) 1791 for (BasicBlock *Pred : DeadPreds) 1792 PN.removeIncomingValue(Pred); 1793 } 1794 } 1795 1796 // Place the cloned blocks right after the original blocks (right before the 1797 // exit block of.) 1798 if (ExitBlock) 1799 F.splice(ExitBlock->getIterator(), &F, NewBlocks[0]->getIterator(), 1800 F.end()); 1801 1802 // Update the cloned blocks/instructions to refer to themselves. 1803 for (BasicBlock *NewBB : NewBlocks) 1804 for (Instruction &I : *NewBB) 1805 RemapInstruction(&I, VMap, 1806 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1807 1808 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1809 // the top-level region but we don't need to add PHIs. The trivial PHIs 1810 // inserted above will be updated here. 1811 if (ExitBlock) 1812 for (PHINode &PN : ExitBlock->phis()) 1813 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1814 ++I) { 1815 BasicBlock *Pred = PN.getIncomingBlock(I); 1816 if (LastRegion->contains(Pred)) { 1817 Value *V = PN.getIncomingValue(I); 1818 auto It = VMap.find(V); 1819 if (It != VMap.end()) V = It->second; 1820 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1821 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1822 } 1823 } 1824 } 1825 1826 // A helper for transformScope. Replace the old (placeholder) branch with the 1827 // new (merged) conditional branch. 1828 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1829 BasicBlock *EntryBlock, 1830 BasicBlock *NewEntryBlock, 1831 ValueToValueMapTy &VMap) { 1832 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1833 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1834 "SplitBlock did not work correctly!"); 1835 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1836 "NewEntryBlock's only pred must be EntryBlock"); 1837 assert(VMap.find(NewEntryBlock) != VMap.end() && 1838 "NewEntryBlock must have been copied"); 1839 OldBR->dropAllReferences(); 1840 OldBR->eraseFromParent(); 1841 // The true predicate is a placeholder. It will be replaced later in 1842 // fixupBranchesAndSelects(). 1843 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1844 cast<BasicBlock>(VMap[NewEntryBlock]), 1845 ConstantInt::getTrue(F.getContext())); 1846 NewBR->insertInto(PreEntryBlock, PreEntryBlock->end()); 1847 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1848 "NewEntryBlock's only pred must be EntryBlock"); 1849 return NewBR; 1850 } 1851 1852 // A helper for transformScopes. Create the combined branch condition and 1853 // constant-fold the branches/selects in the hot path. 1854 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1855 BasicBlock *PreEntryBlock, 1856 BranchInst *MergedBR, 1857 uint64_t ProfileCount) { 1858 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1859 BranchProbability CHRBranchBias(1, 1); 1860 uint64_t NumCHRedBranches = 0; 1861 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1862 for (RegInfo &RI : Scope->CHRRegions) { 1863 Region *R = RI.R; 1864 if (RI.HasBranch) { 1865 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1866 ++NumCHRedBranches; 1867 } 1868 for (SelectInst *SI : RI.Selects) { 1869 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1870 ++NumCHRedBranches; 1871 } 1872 } 1873 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1874 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1875 ORE.emit([&]() { 1876 return OptimizationRemark(DEBUG_TYPE, 1877 "CHR", 1878 // Refer to the hot (original) path 1879 MergedBR->getSuccessor(0)->getTerminator()) 1880 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) 1881 << " branches or selects"; 1882 }); 1883 MergedBR->setCondition(MergedCondition); 1884 uint32_t Weights[] = { 1885 static_cast<uint32_t>(CHRBranchBias.scale(1000)), 1886 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)), 1887 }; 1888 MDBuilder MDB(F.getContext()); 1889 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1890 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1891 << "\n"); 1892 } 1893 1894 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1895 // and constant-fold a branch in the hot path. 1896 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1897 IRBuilder<> &IRB, 1898 Value *&MergedCondition, 1899 BranchProbability &CHRBranchBias) { 1900 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1901 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1902 "Must be truthy or falsy"); 1903 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1904 assert(BranchBiasMap.contains(R) && "Must be in the bias map"); 1905 BranchProbability Bias = BranchBiasMap[R]; 1906 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1907 // Take the min. 1908 if (CHRBranchBias > Bias) 1909 CHRBranchBias = Bias; 1910 BasicBlock *IfThen = BI->getSuccessor(1); 1911 BasicBlock *IfElse = BI->getSuccessor(0); 1912 BasicBlock *RegionExitBlock = R->getExit(); 1913 assert(RegionExitBlock && "Null ExitBlock"); 1914 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1915 IfThen != IfElse && "Invariant from findScopes"); 1916 if (IfThen == RegionExitBlock) { 1917 // Swap them so that IfThen means going into it and IfElse means skipping 1918 // it. 1919 std::swap(IfThen, IfElse); 1920 } 1921 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1922 << " IfElse " << IfElse->getName() << "\n"); 1923 Value *Cond = BI->getCondition(); 1924 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1925 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1926 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1927 MergedCondition); 1928 // Constant-fold the branch at ClonedEntryBlock. 1929 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1930 "The successor shouldn't change"); 1931 Value *NewCondition = ConditionTrue ? 1932 ConstantInt::getTrue(F.getContext()) : 1933 ConstantInt::getFalse(F.getContext()); 1934 BI->setCondition(NewCondition); 1935 } 1936 1937 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1938 // and constant-fold a select in the hot path. 1939 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1940 IRBuilder<> &IRB, 1941 Value *&MergedCondition, 1942 BranchProbability &CHRBranchBias) { 1943 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1944 assert((IsTrueBiased || 1945 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1946 assert(SelectBiasMap.contains(SI) && "Must be in the bias map"); 1947 BranchProbability Bias = SelectBiasMap[SI]; 1948 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1949 // Take the min. 1950 if (CHRBranchBias > Bias) 1951 CHRBranchBias = Bias; 1952 Value *Cond = SI->getCondition(); 1953 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1954 MergedCondition); 1955 Value *NewCondition = IsTrueBiased ? 1956 ConstantInt::getTrue(F.getContext()) : 1957 ConstantInt::getFalse(F.getContext()); 1958 SI->setCondition(NewCondition); 1959 } 1960 1961 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1962 // condition. 1963 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1964 Instruction *BranchOrSelect, CHRScope *Scope, 1965 IRBuilder<> &IRB, Value *&MergedCondition) { 1966 if (!IsTrueBiased) { 1967 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1968 // branch, negate the icmp predicate and swap the branch targets and avoid 1969 // inserting an Xor to negate Cond. 1970 auto *ICmp = dyn_cast<ICmpInst>(Cond); 1971 if (!ICmp || 1972 !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) 1973 Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond); 1974 } 1975 1976 // Freeze potentially poisonous conditions. 1977 if (!isGuaranteedNotToBeUndefOrPoison(Cond)) 1978 Cond = IRB.CreateFreeze(Cond); 1979 1980 // Use logical and to avoid propagating poison from later conditions. 1981 MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond); 1982 } 1983 1984 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1985 unsigned I = 0; 1986 DenseSet<PHINode *> TrivialPHIs; 1987 for (CHRScope *Scope : CHRScopes) { 1988 transformScopes(Scope, TrivialPHIs); 1989 CHR_DEBUG( 1990 std::ostringstream oss; 1991 oss << " after transformScopes " << I++; 1992 dumpIR(F, oss.str().c_str(), nullptr)); 1993 (void)I; 1994 } 1995 } 1996 1997 static void LLVM_ATTRIBUTE_UNUSED 1998 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1999 dbgs() << Label << " " << Scopes.size() << "\n"; 2000 for (CHRScope *Scope : Scopes) { 2001 dbgs() << *Scope << "\n"; 2002 } 2003 } 2004 2005 bool CHR::run() { 2006 if (!shouldApply(F, PSI)) 2007 return false; 2008 2009 CHR_DEBUG(dumpIR(F, "before", nullptr)); 2010 2011 bool Changed = false; 2012 { 2013 CHR_DEBUG( 2014 dbgs() << "RegionInfo:\n"; 2015 RI.print(dbgs())); 2016 2017 // Recursively traverse the region tree and find regions that have biased 2018 // branches and/or selects and create scopes. 2019 SmallVector<CHRScope *, 8> AllScopes; 2020 findScopes(AllScopes); 2021 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 2022 2023 // Split the scopes if 1) the conditional values of the biased 2024 // branches/selects of the inner/lower scope can't be hoisted up to the 2025 // outermost/uppermost scope entry, or 2) the condition values of the biased 2026 // branches/selects in a scope (including subscopes) don't share at least 2027 // one common value. 2028 SmallVector<CHRScope *, 8> SplitScopes; 2029 splitScopes(AllScopes, SplitScopes); 2030 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 2031 2032 // After splitting, set the biased regions and selects of a scope (a tree 2033 // root) that include those of the subscopes. 2034 classifyBiasedScopes(SplitScopes); 2035 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 2036 2037 // Filter out the scopes that has only one biased region or select (CHR 2038 // isn't useful in such a case). 2039 SmallVector<CHRScope *, 8> FilteredScopes; 2040 filterScopes(SplitScopes, FilteredScopes); 2041 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 2042 2043 // Set the regions to be CHR'ed and their hoist stops for each scope. 2044 SmallVector<CHRScope *, 8> SetScopes; 2045 setCHRRegions(FilteredScopes, SetScopes); 2046 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 2047 2048 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 2049 // ones. We need to apply CHR from outer to inner so that we apply CHR only 2050 // to the hot path, rather than both hot and cold paths. 2051 SmallVector<CHRScope *, 8> SortedScopes; 2052 sortScopes(SetScopes, SortedScopes); 2053 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 2054 2055 CHR_DEBUG( 2056 dbgs() << "RegionInfo:\n"; 2057 RI.print(dbgs())); 2058 2059 // Apply the CHR transformation. 2060 if (!SortedScopes.empty()) { 2061 transformScopes(SortedScopes); 2062 Changed = true; 2063 } 2064 } 2065 2066 if (Changed) { 2067 CHR_DEBUG(dumpIR(F, "after", &Stats)); 2068 ORE.emit([&]() { 2069 return OptimizationRemark(DEBUG_TYPE, "Stats", &F) 2070 << ore::NV("Function", &F) << " " 2071 << "Reduced the number of branches in hot paths by " 2072 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) 2073 << " (static) and " 2074 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) 2075 << " (weighted by PGO count)"; 2076 }); 2077 } 2078 2079 return Changed; 2080 } 2081 2082 namespace llvm { 2083 2084 ControlHeightReductionPass::ControlHeightReductionPass() { 2085 parseCHRFilterFiles(); 2086 } 2087 2088 PreservedAnalyses ControlHeightReductionPass::run( 2089 Function &F, 2090 FunctionAnalysisManager &FAM) { 2091 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2092 auto PPSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2093 // If there is no profile summary, we should not do CHR. 2094 if (!PPSI || !PPSI->hasProfileSummary()) 2095 return PreservedAnalyses::all(); 2096 auto &PSI = *PPSI; 2097 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 2098 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2099 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2100 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2101 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); 2102 if (!Changed) 2103 return PreservedAnalyses::all(); 2104 return PreservedAnalyses::none(); 2105 } 2106 2107 } // namespace llvm 2108