1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass merges conditional blocks of code and reduces the number of 10 // conditional branches in the hot paths based on profiles. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/GlobalsModRef.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/IntrinsicInst.h" 30 #include "llvm/IR/MDBuilder.h" 31 #include "llvm/IR/Module.h" 32 #include "llvm/IR/PassManager.h" 33 #include "llvm/IR/ProfDataUtils.h" 34 #include "llvm/Support/BranchProbability.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/MemoryBuffer.h" 37 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 38 #include "llvm/Transforms/Utils/Cloning.h" 39 #include "llvm/Transforms/Utils/ValueMapper.h" 40 41 #include <optional> 42 #include <set> 43 #include <sstream> 44 45 using namespace llvm; 46 47 #define DEBUG_TYPE "chr" 48 49 #define CHR_DEBUG(X) LLVM_DEBUG(X) 50 51 static cl::opt<bool> DisableCHR("disable-chr", cl::init(false), cl::Hidden, 52 cl::desc("Disable CHR for all functions")); 53 54 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 55 cl::desc("Apply CHR for all functions")); 56 57 static cl::opt<double> CHRBiasThreshold( 58 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 59 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 60 61 static cl::opt<unsigned> CHRMergeThreshold( 62 "chr-merge-threshold", cl::init(2), cl::Hidden, 63 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 64 65 static cl::opt<std::string> CHRModuleList( 66 "chr-module-list", cl::init(""), cl::Hidden, 67 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 68 69 static cl::opt<std::string> CHRFunctionList( 70 "chr-function-list", cl::init(""), cl::Hidden, 71 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 72 73 static cl::opt<unsigned> CHRDupThreshsold( 74 "chr-dup-threshold", cl::init(3), cl::Hidden, 75 cl::desc("Max number of duplications by CHR for a region")); 76 77 static StringSet<> CHRModules; 78 static StringSet<> CHRFunctions; 79 80 static void parseCHRFilterFiles() { 81 if (!CHRModuleList.empty()) { 82 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 83 if (!FileOrErr) { 84 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 85 std::exit(1); 86 } 87 StringRef Buf = FileOrErr->get()->getBuffer(); 88 SmallVector<StringRef, 0> Lines; 89 Buf.split(Lines, '\n'); 90 for (StringRef Line : Lines) { 91 Line = Line.trim(); 92 if (!Line.empty()) 93 CHRModules.insert(Line); 94 } 95 } 96 if (!CHRFunctionList.empty()) { 97 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 98 if (!FileOrErr) { 99 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 100 std::exit(1); 101 } 102 StringRef Buf = FileOrErr->get()->getBuffer(); 103 SmallVector<StringRef, 0> Lines; 104 Buf.split(Lines, '\n'); 105 for (StringRef Line : Lines) { 106 Line = Line.trim(); 107 if (!Line.empty()) 108 CHRFunctions.insert(Line); 109 } 110 } 111 } 112 113 namespace { 114 115 struct CHRStats { 116 CHRStats() = default; 117 void print(raw_ostream &OS) const { 118 OS << "CHRStats: NumBranches " << NumBranches 119 << " NumBranchesDelta " << NumBranchesDelta 120 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 121 } 122 // The original number of conditional branches / selects 123 uint64_t NumBranches = 0; 124 // The decrease of the number of conditional branches / selects in the hot 125 // paths due to CHR. 126 uint64_t NumBranchesDelta = 0; 127 // NumBranchesDelta weighted by the profile count at the scope entry. 128 uint64_t WeightedNumBranchesDelta = 0; 129 }; 130 131 // RegInfo - some properties of a Region. 132 struct RegInfo { 133 RegInfo() = default; 134 RegInfo(Region *RegionIn) : R(RegionIn) {} 135 Region *R = nullptr; 136 bool HasBranch = false; 137 SmallVector<SelectInst *, 8> Selects; 138 }; 139 140 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 141 142 // CHRScope - a sequence of regions to CHR together. It corresponds to a 143 // sequence of conditional blocks. It can have subscopes which correspond to 144 // nested conditional blocks. Nested CHRScopes form a tree. 145 class CHRScope { 146 public: 147 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 148 assert(RI.R && "Null RegionIn"); 149 RegInfos.push_back(RI); 150 } 151 152 Region *getParentRegion() { 153 assert(RegInfos.size() > 0 && "Empty CHRScope"); 154 Region *Parent = RegInfos[0].R->getParent(); 155 assert(Parent && "Unexpected to call this on the top-level region"); 156 return Parent; 157 } 158 159 BasicBlock *getEntryBlock() { 160 assert(RegInfos.size() > 0 && "Empty CHRScope"); 161 return RegInfos.front().R->getEntry(); 162 } 163 164 BasicBlock *getExitBlock() { 165 assert(RegInfos.size() > 0 && "Empty CHRScope"); 166 return RegInfos.back().R->getExit(); 167 } 168 169 bool appendable(CHRScope *Next) { 170 // The next scope is appendable only if this scope is directly connected to 171 // it (which implies it post-dominates this scope) and this scope dominates 172 // it (no edge to the next scope outside this scope). 173 BasicBlock *NextEntry = Next->getEntryBlock(); 174 if (getExitBlock() != NextEntry) 175 // Not directly connected. 176 return false; 177 Region *LastRegion = RegInfos.back().R; 178 for (BasicBlock *Pred : predecessors(NextEntry)) 179 if (!LastRegion->contains(Pred)) 180 // There's an edge going into the entry of the next scope from outside 181 // of this scope. 182 return false; 183 return true; 184 } 185 186 void append(CHRScope *Next) { 187 assert(RegInfos.size() > 0 && "Empty CHRScope"); 188 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 189 assert(getParentRegion() == Next->getParentRegion() && 190 "Must be siblings"); 191 assert(getExitBlock() == Next->getEntryBlock() && 192 "Must be adjacent"); 193 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end()); 194 Subs.append(Next->Subs.begin(), Next->Subs.end()); 195 } 196 197 void addSub(CHRScope *SubIn) { 198 #ifndef NDEBUG 199 bool IsChild = false; 200 for (RegInfo &RI : RegInfos) 201 if (RI.R == SubIn->getParentRegion()) { 202 IsChild = true; 203 break; 204 } 205 assert(IsChild && "Must be a child"); 206 #endif 207 Subs.push_back(SubIn); 208 } 209 210 // Split this scope at the boundary region into two, which will belong to the 211 // tail and returns the tail. 212 CHRScope *split(Region *Boundary) { 213 assert(Boundary && "Boundary null"); 214 assert(RegInfos.begin()->R != Boundary && 215 "Can't be split at beginning"); 216 auto BoundaryIt = llvm::find_if( 217 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; }); 218 if (BoundaryIt == RegInfos.end()) 219 return nullptr; 220 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end()); 221 DenseSet<Region *> TailRegionSet; 222 for (const RegInfo &RI : TailRegInfos) 223 TailRegionSet.insert(RI.R); 224 225 auto TailIt = 226 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) { 227 assert(Sub && "null Sub"); 228 Region *Parent = Sub->getParentRegion(); 229 if (TailRegionSet.count(Parent)) 230 return false; 231 232 assert(llvm::any_of( 233 RegInfos, 234 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) && 235 "Must be in head"); 236 return true; 237 }); 238 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end()); 239 240 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 241 auto *Scope = new CHRScope(TailRegInfos, TailSubs); 242 RegInfos.erase(BoundaryIt, RegInfos.end()); 243 Subs.erase(TailIt, Subs.end()); 244 return Scope; 245 } 246 247 bool contains(Instruction *I) const { 248 BasicBlock *Parent = I->getParent(); 249 for (const RegInfo &RI : RegInfos) 250 if (RI.R->contains(Parent)) 251 return true; 252 return false; 253 } 254 255 void print(raw_ostream &OS) const; 256 257 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 258 SmallVector<CHRScope *, 8> Subs; // Subscopes. 259 260 // The instruction at which to insert the CHR conditional branch (and hoist 261 // the dependent condition values). 262 Instruction *BranchInsertPoint; 263 264 // True-biased and false-biased regions (conditional blocks), 265 // respectively. Used only for the outermost scope and includes regions in 266 // subscopes. The rest are unbiased. 267 DenseSet<Region *> TrueBiasedRegions; 268 DenseSet<Region *> FalseBiasedRegions; 269 // Among the biased regions, the regions that get CHRed. 270 SmallVector<RegInfo, 8> CHRRegions; 271 272 // True-biased and false-biased selects, respectively. Used only for the 273 // outermost scope and includes ones in subscopes. 274 DenseSet<SelectInst *> TrueBiasedSelects; 275 DenseSet<SelectInst *> FalseBiasedSelects; 276 277 // Map from one of the above regions to the instructions to stop 278 // hoisting instructions at through use-def chains. 279 HoistStopMapTy HoistStopMap; 280 281 private: 282 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn) 283 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} 284 }; 285 286 class CHR { 287 public: 288 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 289 ProfileSummaryInfo &PSIin, RegionInfo &RIin, 290 OptimizationRemarkEmitter &OREin) 291 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} 292 293 ~CHR() { 294 for (CHRScope *Scope : Scopes) { 295 delete Scope; 296 } 297 } 298 299 bool run(); 300 301 private: 302 // See the comments in CHR::run() for the high level flow of the algorithm and 303 // what the following functions do. 304 305 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 306 Region *R = RI.getTopLevelRegion(); 307 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) { 308 Output.push_back(Scope); 309 } 310 } 311 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 312 SmallVectorImpl<CHRScope *> &Scopes); 313 CHRScope *findScope(Region *R); 314 void checkScopeHoistable(CHRScope *Scope); 315 316 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 317 SmallVectorImpl<CHRScope *> &Output); 318 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 319 CHRScope *Outer, 320 DenseSet<Value *> *OuterConditionValues, 321 Instruction *OuterInsertPoint, 322 SmallVectorImpl<CHRScope *> &Output, 323 DenseSet<Instruction *> &Unhoistables); 324 325 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 326 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 327 328 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 329 SmallVectorImpl<CHRScope *> &Output); 330 331 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 332 SmallVectorImpl<CHRScope *> &Output); 333 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 334 335 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 336 SmallVectorImpl<CHRScope *> &Output); 337 338 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 339 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 340 void cloneScopeBlocks(CHRScope *Scope, 341 BasicBlock *PreEntryBlock, 342 BasicBlock *ExitBlock, 343 Region *LastRegion, 344 ValueToValueMapTy &VMap); 345 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 346 BasicBlock *EntryBlock, 347 BasicBlock *NewEntryBlock, 348 ValueToValueMapTy &VMap); 349 void fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock, 350 BranchInst *MergedBR, uint64_t ProfileCount); 351 void fixupBranch(Region *R, CHRScope *Scope, IRBuilder<> &IRB, 352 Value *&MergedCondition, BranchProbability &CHRBranchBias); 353 void fixupSelect(SelectInst *SI, CHRScope *Scope, IRBuilder<> &IRB, 354 Value *&MergedCondition, BranchProbability &CHRBranchBias); 355 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 356 Instruction *BranchOrSelect, CHRScope *Scope, 357 IRBuilder<> &IRB, Value *&MergedCondition); 358 unsigned getRegionDuplicationCount(const Region *R) { 359 unsigned Count = 0; 360 // Find out how many times region R is cloned. Note that if the parent 361 // of R is cloned, R is also cloned, but R's clone count is not updated 362 // from the clone of the parent. We need to accumlate all the counts 363 // from the ancestors to get the clone count. 364 while (R) { 365 Count += DuplicationCount[R]; 366 R = R->getParent(); 367 } 368 return Count; 369 } 370 371 Function &F; 372 BlockFrequencyInfo &BFI; 373 DominatorTree &DT; 374 ProfileSummaryInfo &PSI; 375 RegionInfo &RI; 376 OptimizationRemarkEmitter &ORE; 377 CHRStats Stats; 378 379 // All the true-biased regions in the function 380 DenseSet<Region *> TrueBiasedRegionsGlobal; 381 // All the false-biased regions in the function 382 DenseSet<Region *> FalseBiasedRegionsGlobal; 383 // All the true-biased selects in the function 384 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 385 // All the false-biased selects in the function 386 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 387 // A map from biased regions to their branch bias 388 DenseMap<Region *, BranchProbability> BranchBiasMap; 389 // A map from biased selects to their branch bias 390 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 391 // All the scopes. 392 DenseSet<CHRScope *> Scopes; 393 // This maps records how many times this region is cloned. 394 DenseMap<const Region *, unsigned> DuplicationCount; 395 }; 396 397 } // end anonymous namespace 398 399 static inline 400 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 401 const CHRStats &Stats) { 402 Stats.print(OS); 403 return OS; 404 } 405 406 static inline 407 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 408 Scope.print(OS); 409 return OS; 410 } 411 412 static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) { 413 if (DisableCHR) 414 return false; 415 416 if (ForceCHR) 417 return true; 418 419 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 420 if (CHRModules.count(F.getParent()->getName())) 421 return true; 422 return CHRFunctions.count(F.getName()); 423 } 424 425 return PSI.isFunctionEntryHot(&F); 426 } 427 428 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 429 CHRStats *Stats) { 430 StringRef FuncName = F.getName(); 431 StringRef ModuleName = F.getParent()->getName(); 432 (void)(FuncName); // Unused in release build. 433 (void)(ModuleName); // Unused in release build. 434 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 435 << FuncName); 436 if (Stats) 437 CHR_DEBUG(dbgs() << " " << *Stats); 438 CHR_DEBUG(dbgs() << "\n"); 439 CHR_DEBUG(F.dump()); 440 } 441 442 void CHRScope::print(raw_ostream &OS) const { 443 assert(RegInfos.size() > 0 && "Empty CHRScope"); 444 OS << "CHRScope["; 445 OS << RegInfos.size() << ", Regions["; 446 for (const RegInfo &RI : RegInfos) { 447 OS << RI.R->getNameStr(); 448 if (RI.HasBranch) 449 OS << " B"; 450 if (RI.Selects.size() > 0) 451 OS << " S" << RI.Selects.size(); 452 OS << ", "; 453 } 454 if (RegInfos[0].R->getParent()) { 455 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 456 } else { 457 // top level region 458 OS << "]"; 459 } 460 OS << ", Subs["; 461 for (CHRScope *Sub : Subs) { 462 OS << *Sub << ", "; 463 } 464 OS << "]]"; 465 } 466 467 // Return true if the given instruction type can be hoisted by CHR. 468 static bool isHoistableInstructionType(Instruction *I) { 469 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 470 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 471 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 472 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 473 isa<InsertValueInst>(I); 474 } 475 476 // Return true if the given instruction can be hoisted by CHR. 477 static bool isHoistable(Instruction *I, DominatorTree &DT) { 478 if (!isHoistableInstructionType(I)) 479 return false; 480 return isSafeToSpeculativelyExecute(I, nullptr, nullptr, &DT); 481 } 482 483 // Recursively traverse the use-def chains of the given value and return a set 484 // of the unhoistable base values defined within the scope (excluding the 485 // first-region entry block) or the (hoistable or unhoistable) base values that 486 // are defined outside (including the first-region entry block) of the 487 // scope. The returned set doesn't include constants. 488 static const std::set<Value *> & 489 getBaseValues(Value *V, DominatorTree &DT, 490 DenseMap<Value *, std::set<Value *>> &Visited) { 491 auto It = Visited.find(V); 492 if (It != Visited.end()) { 493 return It->second; 494 } 495 std::set<Value *> Result; 496 if (auto *I = dyn_cast<Instruction>(V)) { 497 // We don't stop at a block that's not in the Scope because we would miss 498 // some instructions that are based on the same base values if we stop 499 // there. 500 if (!isHoistable(I, DT)) { 501 Result.insert(I); 502 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 503 } 504 // I is hoistable above the Scope. 505 for (Value *Op : I->operands()) { 506 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited); 507 Result.insert(OpResult.begin(), OpResult.end()); 508 } 509 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 510 } 511 if (isa<Argument>(V)) { 512 Result.insert(V); 513 } 514 // We don't include others like constants because those won't lead to any 515 // chance of folding of conditions (eg two bit checks merged into one check) 516 // after CHR. 517 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 518 } 519 520 // Return true if V is already hoisted or can be hoisted (along with its 521 // operands) above the insert point. When it returns true and HoistStops is 522 // non-null, the instructions to stop hoisting at through the use-def chains are 523 // inserted into HoistStops. 524 static bool 525 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 526 DenseSet<Instruction *> &Unhoistables, 527 DenseSet<Instruction *> *HoistStops, 528 DenseMap<Instruction *, bool> &Visited) { 529 assert(InsertPoint && "Null InsertPoint"); 530 if (auto *I = dyn_cast<Instruction>(V)) { 531 auto It = Visited.find(I); 532 if (It != Visited.end()) { 533 return It->second; 534 } 535 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 536 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 537 if (Unhoistables.count(I)) { 538 // Don't hoist if they are not to be hoisted. 539 Visited[I] = false; 540 return false; 541 } 542 if (DT.dominates(I, InsertPoint)) { 543 // We are already above the insert point. Stop here. 544 if (HoistStops) 545 HoistStops->insert(I); 546 Visited[I] = true; 547 return true; 548 } 549 // We aren't not above the insert point, check if we can hoist it above the 550 // insert point. 551 if (isHoistable(I, DT)) { 552 // Check operands first. 553 DenseSet<Instruction *> OpsHoistStops; 554 bool AllOpsHoisted = true; 555 for (Value *Op : I->operands()) { 556 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, 557 Visited)) { 558 AllOpsHoisted = false; 559 break; 560 } 561 } 562 if (AllOpsHoisted) { 563 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 564 if (HoistStops) 565 HoistStops->insert_range(OpsHoistStops); 566 Visited[I] = true; 567 return true; 568 } 569 } 570 Visited[I] = false; 571 return false; 572 } 573 // Non-instructions are considered hoistable. 574 return true; 575 } 576 577 // Constructs the true and false branch probabilities if the the instruction has 578 // valid branch weights. Returns true when this was successful, false otherwise. 579 static bool extractBranchProbabilities(Instruction *I, 580 BranchProbability &TrueProb, 581 BranchProbability &FalseProb) { 582 uint64_t TrueWeight; 583 uint64_t FalseWeight; 584 if (!extractBranchWeights(*I, TrueWeight, FalseWeight)) 585 return false; 586 uint64_t SumWeight = TrueWeight + FalseWeight; 587 588 assert(SumWeight >= TrueWeight && SumWeight >= FalseWeight && 589 "Overflow calculating branch probabilities."); 590 591 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash. 592 if (SumWeight == 0) 593 return false; 594 595 TrueProb = BranchProbability::getBranchProbability(TrueWeight, SumWeight); 596 FalseProb = BranchProbability::getBranchProbability(FalseWeight, SumWeight); 597 return true; 598 } 599 600 static BranchProbability getCHRBiasThreshold() { 601 return BranchProbability::getBranchProbability( 602 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 603 } 604 605 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 606 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 607 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 608 // false. 609 template <typename K, typename S, typename M> 610 static bool checkBias(K *Key, BranchProbability TrueProb, 611 BranchProbability FalseProb, S &TrueSet, S &FalseSet, 612 M &BiasMap) { 613 BranchProbability Threshold = getCHRBiasThreshold(); 614 if (TrueProb >= Threshold) { 615 TrueSet.insert(Key); 616 BiasMap[Key] = TrueProb; 617 return true; 618 } else if (FalseProb >= Threshold) { 619 FalseSet.insert(Key); 620 BiasMap[Key] = FalseProb; 621 return true; 622 } 623 return false; 624 } 625 626 // Returns true and insert a region into the right biased set and the map if the 627 // branch of the region is biased. 628 static bool checkBiasedBranch(BranchInst *BI, Region *R, 629 DenseSet<Region *> &TrueBiasedRegionsGlobal, 630 DenseSet<Region *> &FalseBiasedRegionsGlobal, 631 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 632 if (!BI->isConditional()) 633 return false; 634 BranchProbability ThenProb, ElseProb; 635 if (!extractBranchProbabilities(BI, ThenProb, ElseProb)) 636 return false; 637 BasicBlock *IfThen = BI->getSuccessor(0); 638 BasicBlock *IfElse = BI->getSuccessor(1); 639 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 640 IfThen != IfElse && 641 "Invariant from findScopes"); 642 if (IfThen == R->getExit()) { 643 // Swap them so that IfThen/ThenProb means going into the conditional code 644 // and IfElse/ElseProb means skipping it. 645 std::swap(IfThen, IfElse); 646 std::swap(ThenProb, ElseProb); 647 } 648 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 649 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 650 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 651 return checkBias(R, ThenProb, ElseProb, 652 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 653 BranchBiasMap); 654 } 655 656 // Returns true and insert a select into the right biased set and the map if the 657 // select is biased. 658 static bool checkBiasedSelect( 659 SelectInst *SI, Region *R, 660 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 661 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 662 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 663 BranchProbability TrueProb, FalseProb; 664 if (!extractBranchProbabilities(SI, TrueProb, FalseProb)) 665 return false; 666 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 667 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 668 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 669 return checkBias(SI, TrueProb, FalseProb, 670 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 671 SelectBiasMap); 672 } 673 674 // Returns the instruction at which to hoist the dependent condition values and 675 // insert the CHR branch for a region. This is the terminator branch in the 676 // entry block or the first select in the entry block, if any. 677 static Instruction* getBranchInsertPoint(RegInfo &RI) { 678 Region *R = RI.R; 679 BasicBlock *EntryBB = R->getEntry(); 680 // The hoist point is by default the terminator of the entry block, which is 681 // the same as the branch instruction if RI.HasBranch is true. 682 Instruction *HoistPoint = EntryBB->getTerminator(); 683 for (SelectInst *SI : RI.Selects) { 684 if (SI->getParent() == EntryBB) { 685 // Pick the first select in Selects in the entry block. Note Selects is 686 // sorted in the instruction order within a block (asserted below). 687 HoistPoint = SI; 688 break; 689 } 690 } 691 assert(HoistPoint && "Null HoistPoint"); 692 #ifndef NDEBUG 693 // Check that HoistPoint is the first one in Selects in the entry block, 694 // if any. 695 DenseSet<Instruction *> EntryBlockSelectSet; 696 for (SelectInst *SI : RI.Selects) { 697 if (SI->getParent() == EntryBB) { 698 EntryBlockSelectSet.insert(SI); 699 } 700 } 701 for (Instruction &I : *EntryBB) { 702 if (EntryBlockSelectSet.contains(&I)) { 703 assert(&I == HoistPoint && 704 "HoistPoint must be the first one in Selects"); 705 break; 706 } 707 } 708 #endif 709 return HoistPoint; 710 } 711 712 // Find a CHR scope in the given region. 713 CHRScope * CHR::findScope(Region *R) { 714 CHRScope *Result = nullptr; 715 BasicBlock *Entry = R->getEntry(); 716 BasicBlock *Exit = R->getExit(); // null if top level. 717 assert(Entry && "Entry must not be null"); 718 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 719 "Only top level region has a null exit"); 720 if (Entry) 721 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 722 else 723 CHR_DEBUG(dbgs() << "Entry null\n"); 724 if (Exit) 725 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 726 else 727 CHR_DEBUG(dbgs() << "Exit null\n"); 728 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 729 // to this region). 730 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 731 if (EntryInSubregion) 732 return nullptr; 733 // Exclude loops 734 for (BasicBlock *Pred : predecessors(Entry)) 735 if (R->contains(Pred)) 736 return nullptr; 737 // If any of the basic blocks have address taken, we must skip this region 738 // because we cannot clone basic blocks that have address taken. 739 for (BasicBlock *BB : R->blocks()) { 740 if (BB->hasAddressTaken()) 741 return nullptr; 742 // If we encounter llvm.coro.id, skip this region because if the basic block 743 // is cloned, we end up inserting a token type PHI node to the block with 744 // llvm.coro.begin. 745 // FIXME: This could lead to less optimal codegen, because the region is 746 // excluded, it can prevent CHR from merging adjacent regions into bigger 747 // scope and hoisting more branches. 748 for (Instruction &I : *BB) 749 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 750 if (II->getIntrinsicID() == Intrinsic::coro_id) 751 return nullptr; 752 } 753 754 if (Exit) { 755 // Try to find an if-then block (check if R is an if-then). 756 // if (cond) { 757 // ... 758 // } 759 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 760 if (BI) 761 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 762 else 763 CHR_DEBUG(dbgs() << "BI null\n"); 764 if (BI && BI->isConditional()) { 765 BasicBlock *S0 = BI->getSuccessor(0); 766 BasicBlock *S1 = BI->getSuccessor(1); 767 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 768 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 769 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 770 RegInfo RI(R); 771 RI.HasBranch = checkBiasedBranch( 772 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 773 BranchBiasMap); 774 Result = new CHRScope(RI); 775 Scopes.insert(Result); 776 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 777 ++Stats.NumBranches; 778 if (!RI.HasBranch) { 779 ORE.emit([&]() { 780 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) 781 << "Branch not biased"; 782 }); 783 } 784 } 785 } 786 } 787 { 788 // Try to look for selects in the direct child blocks (as opposed to in 789 // subregions) of R. 790 // ... 791 // if (..) { // Some subregion 792 // ... 793 // } 794 // if (..) { // Some subregion 795 // ... 796 // } 797 // ... 798 // a = cond ? b : c; 799 // ... 800 SmallVector<SelectInst *, 8> Selects; 801 for (RegionNode *E : R->elements()) { 802 if (E->isSubRegion()) 803 continue; 804 // This returns the basic block of E if E is a direct child of R (not a 805 // subregion.) 806 BasicBlock *BB = E->getEntry(); 807 // Need to push in the order to make it easier to find the first Select 808 // later. 809 for (Instruction &I : *BB) { 810 if (auto *SI = dyn_cast<SelectInst>(&I)) { 811 Selects.push_back(SI); 812 ++Stats.NumBranches; 813 } 814 } 815 } 816 if (Selects.size() > 0) { 817 auto AddSelects = [&](RegInfo &RI) { 818 for (auto *SI : Selects) 819 if (checkBiasedSelect(SI, RI.R, 820 TrueBiasedSelectsGlobal, 821 FalseBiasedSelectsGlobal, 822 SelectBiasMap)) 823 RI.Selects.push_back(SI); 824 else 825 ORE.emit([&]() { 826 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) 827 << "Select not biased"; 828 }); 829 }; 830 if (!Result) { 831 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 832 RegInfo RI(R); 833 AddSelects(RI); 834 Result = new CHRScope(RI); 835 Scopes.insert(Result); 836 } else { 837 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 838 AddSelects(Result->RegInfos[0]); 839 } 840 } 841 } 842 843 if (Result) { 844 checkScopeHoistable(Result); 845 } 846 return Result; 847 } 848 849 // Check that any of the branch and the selects in the region could be 850 // hoisted above the the CHR branch insert point (the most dominating of 851 // them, either the branch (at the end of the first block) or the first 852 // select in the first block). If the branch can't be hoisted, drop the 853 // selects in the first blocks. 854 // 855 // For example, for the following scope/region with selects, we want to insert 856 // the merged branch right before the first select in the first/entry block by 857 // hoisting c1, c2, c3, and c4. 858 // 859 // // Branch insert point here. 860 // a = c1 ? b : c; // Select 1 861 // d = c2 ? e : f; // Select 2 862 // if (c3) { // Branch 863 // ... 864 // c4 = foo() // A call. 865 // g = c4 ? h : i; // Select 3 866 // } 867 // 868 // But suppose we can't hoist c4 because it's dependent on the preceding 869 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 870 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 871 void CHR::checkScopeHoistable(CHRScope *Scope) { 872 RegInfo &RI = Scope->RegInfos[0]; 873 Region *R = RI.R; 874 BasicBlock *EntryBB = R->getEntry(); 875 auto *Branch = RI.HasBranch ? 876 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 877 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 878 if (RI.HasBranch || !Selects.empty()) { 879 Instruction *InsertPoint = getBranchInsertPoint(RI); 880 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 881 // Avoid a data dependence from a select or a branch to a(nother) 882 // select. Note no instruction can't data-depend on a branch (a branch 883 // instruction doesn't produce a value). 884 // Initialize Unhoistables with the selects. 885 DenseSet<Instruction *> Unhoistables(llvm::from_range, Selects); 886 // Remove Selects that can't be hoisted. 887 for (auto it = Selects.begin(); it != Selects.end(); ) { 888 SelectInst *SI = *it; 889 if (SI == InsertPoint) { 890 ++it; 891 continue; 892 } 893 DenseMap<Instruction *, bool> Visited; 894 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 895 DT, Unhoistables, nullptr, Visited); 896 if (!IsHoistable) { 897 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 898 ORE.emit([&]() { 899 return OptimizationRemarkMissed(DEBUG_TYPE, 900 "DropUnhoistableSelect", SI) 901 << "Dropped unhoistable select"; 902 }); 903 it = Selects.erase(it); 904 // Since we are dropping the select here, we also drop it from 905 // Unhoistables. 906 Unhoistables.erase(SI); 907 } else 908 ++it; 909 } 910 // Update InsertPoint after potentially removing selects. 911 InsertPoint = getBranchInsertPoint(RI); 912 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 913 if (RI.HasBranch && InsertPoint != Branch) { 914 DenseMap<Instruction *, bool> Visited; 915 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 916 DT, Unhoistables, nullptr, Visited); 917 if (!IsHoistable) { 918 // If the branch isn't hoistable, drop the selects in the entry 919 // block, preferring the branch, which makes the branch the hoist 920 // point. 921 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 922 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 923 CHR_DEBUG( 924 for (SelectInst *SI : Selects) { 925 dbgs() << "SI " << *SI << "\n"; 926 }); 927 for (SelectInst *SI : Selects) { 928 ORE.emit([&]() { 929 return OptimizationRemarkMissed(DEBUG_TYPE, 930 "DropSelectUnhoistableBranch", SI) 931 << "Dropped select due to unhoistable branch"; 932 }); 933 } 934 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) { 935 return SI->getParent() == EntryBB; 936 }); 937 Unhoistables.clear(); 938 InsertPoint = Branch; 939 } 940 } 941 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 942 #ifndef NDEBUG 943 if (RI.HasBranch) { 944 assert(!DT.dominates(Branch, InsertPoint) && 945 "Branch can't be already above the hoist point"); 946 DenseMap<Instruction *, bool> Visited; 947 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 948 DT, Unhoistables, nullptr, Visited) && 949 "checkHoistValue for branch"); 950 } 951 for (auto *SI : Selects) { 952 assert(!DT.dominates(SI, InsertPoint) && 953 "SI can't be already above the hoist point"); 954 DenseMap<Instruction *, bool> Visited; 955 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 956 Unhoistables, nullptr, Visited) && 957 "checkHoistValue for selects"); 958 } 959 CHR_DEBUG(dbgs() << "Result\n"); 960 if (RI.HasBranch) { 961 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 962 } 963 for (auto *SI : Selects) { 964 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 965 } 966 #endif 967 } 968 } 969 970 // Traverse the region tree, find all nested scopes and merge them if possible. 971 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 972 SmallVectorImpl<CHRScope *> &Scopes) { 973 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 974 CHRScope *Result = findScope(R); 975 // Visit subscopes. 976 CHRScope *ConsecutiveSubscope = nullptr; 977 SmallVector<CHRScope *, 8> Subscopes; 978 for (auto It = R->begin(); It != R->end(); ++It) { 979 const std::unique_ptr<Region> &SubR = *It; 980 auto NextIt = std::next(It); 981 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 982 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 983 << "\n"); 984 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 985 if (SubCHRScope) { 986 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 987 } else { 988 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 989 } 990 if (SubCHRScope) { 991 if (!ConsecutiveSubscope) 992 ConsecutiveSubscope = SubCHRScope; 993 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 994 Subscopes.push_back(ConsecutiveSubscope); 995 ConsecutiveSubscope = SubCHRScope; 996 } else 997 ConsecutiveSubscope->append(SubCHRScope); 998 } else { 999 if (ConsecutiveSubscope) { 1000 Subscopes.push_back(ConsecutiveSubscope); 1001 } 1002 ConsecutiveSubscope = nullptr; 1003 } 1004 } 1005 if (ConsecutiveSubscope) { 1006 Subscopes.push_back(ConsecutiveSubscope); 1007 } 1008 for (CHRScope *Sub : Subscopes) { 1009 if (Result) { 1010 // Combine it with the parent. 1011 Result->addSub(Sub); 1012 } else { 1013 // Push Subscopes as they won't be combined with the parent. 1014 Scopes.push_back(Sub); 1015 } 1016 } 1017 return Result; 1018 } 1019 1020 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1021 DenseSet<Value *> ConditionValues; 1022 if (RI.HasBranch) { 1023 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1024 ConditionValues.insert(BI->getCondition()); 1025 } 1026 for (SelectInst *SI : RI.Selects) { 1027 ConditionValues.insert(SI->getCondition()); 1028 } 1029 return ConditionValues; 1030 } 1031 1032 1033 // Determine whether to split a scope depending on the sets of the branch 1034 // condition values of the previous region and the current region. We split 1035 // (return true) it if 1) the condition values of the inner/lower scope can't be 1036 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1037 // values have an empty intersection (because the combined branch conditions 1038 // won't probably lead to a simpler combined condition). 1039 static bool shouldSplit(Instruction *InsertPoint, 1040 DenseSet<Value *> &PrevConditionValues, 1041 DenseSet<Value *> &ConditionValues, 1042 DominatorTree &DT, 1043 DenseSet<Instruction *> &Unhoistables) { 1044 assert(InsertPoint && "Null InsertPoint"); 1045 CHR_DEBUG( 1046 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1047 for (Value *V : PrevConditionValues) { 1048 dbgs() << *V << ", "; 1049 } 1050 dbgs() << " ConditionValues "; 1051 for (Value *V : ConditionValues) { 1052 dbgs() << *V << ", "; 1053 } 1054 dbgs() << "\n"); 1055 // If any of Bases isn't hoistable to the hoist point, split. 1056 for (Value *V : ConditionValues) { 1057 DenseMap<Instruction *, bool> Visited; 1058 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { 1059 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1060 return true; // Not hoistable, split. 1061 } 1062 } 1063 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1064 // unnecessary splits at scopes with no branch/selects. If 1065 // PrevConditionValues and ConditionValues don't intersect at all, split. 1066 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1067 // Use std::set as DenseSet doesn't work with set_intersection. 1068 std::set<Value *> PrevBases, Bases; 1069 DenseMap<Value *, std::set<Value *>> Visited; 1070 for (Value *V : PrevConditionValues) { 1071 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1072 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1073 } 1074 for (Value *V : ConditionValues) { 1075 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1076 Bases.insert(BaseValues.begin(), BaseValues.end()); 1077 } 1078 CHR_DEBUG( 1079 dbgs() << "PrevBases "; 1080 for (Value *V : PrevBases) { 1081 dbgs() << *V << ", "; 1082 } 1083 dbgs() << " Bases "; 1084 for (Value *V : Bases) { 1085 dbgs() << *V << ", "; 1086 } 1087 dbgs() << "\n"); 1088 std::vector<Value *> Intersection; 1089 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(), 1090 Bases.end(), std::back_inserter(Intersection)); 1091 if (Intersection.empty()) { 1092 // Empty intersection, split. 1093 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1094 return true; 1095 } 1096 } 1097 CHR_DEBUG(dbgs() << "No split\n"); 1098 return false; // Don't split. 1099 } 1100 1101 static void getSelectsInScope(CHRScope *Scope, 1102 DenseSet<Instruction *> &Output) { 1103 for (RegInfo &RI : Scope->RegInfos) 1104 Output.insert_range(RI.Selects); 1105 for (CHRScope *Sub : Scope->Subs) 1106 getSelectsInScope(Sub, Output); 1107 } 1108 1109 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1110 SmallVectorImpl<CHRScope *> &Output) { 1111 for (CHRScope *Scope : Input) { 1112 assert(!Scope->BranchInsertPoint && 1113 "BranchInsertPoint must not be set"); 1114 DenseSet<Instruction *> Unhoistables; 1115 getSelectsInScope(Scope, Unhoistables); 1116 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1117 } 1118 #ifndef NDEBUG 1119 for (CHRScope *Scope : Output) { 1120 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1121 } 1122 #endif 1123 } 1124 1125 SmallVector<CHRScope *, 8> CHR::splitScope( 1126 CHRScope *Scope, 1127 CHRScope *Outer, 1128 DenseSet<Value *> *OuterConditionValues, 1129 Instruction *OuterInsertPoint, 1130 SmallVectorImpl<CHRScope *> &Output, 1131 DenseSet<Instruction *> &Unhoistables) { 1132 if (Outer) { 1133 assert(OuterConditionValues && "Null OuterConditionValues"); 1134 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1135 } 1136 bool PrevSplitFromOuter = true; 1137 DenseSet<Value *> PrevConditionValues; 1138 Instruction *PrevInsertPoint = nullptr; 1139 SmallVector<CHRScope *, 8> Splits; 1140 SmallVector<bool, 8> SplitsSplitFromOuter; 1141 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1142 SmallVector<Instruction *, 8> SplitsInsertPoints; 1143 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1144 for (RegInfo &RI : RegInfos) { 1145 Instruction *InsertPoint = getBranchInsertPoint(RI); 1146 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1147 CHR_DEBUG( 1148 dbgs() << "ConditionValues "; 1149 for (Value *V : ConditionValues) { 1150 dbgs() << *V << ", "; 1151 } 1152 dbgs() << "\n"); 1153 if (RI.R == RegInfos[0].R) { 1154 // First iteration. Check to see if we should split from the outer. 1155 if (Outer) { 1156 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1157 CHR_DEBUG(dbgs() << "Should split from outer at " 1158 << RI.R->getNameStr() << "\n"); 1159 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1160 ConditionValues, DT, Unhoistables)) { 1161 PrevConditionValues = ConditionValues; 1162 PrevInsertPoint = InsertPoint; 1163 ORE.emit([&]() { 1164 return OptimizationRemarkMissed(DEBUG_TYPE, 1165 "SplitScopeFromOuter", 1166 RI.R->getEntry()->getTerminator()) 1167 << "Split scope from outer due to unhoistable branch/select " 1168 << "and/or lack of common condition values"; 1169 }); 1170 } else { 1171 // Not splitting from the outer. Use the outer bases and insert 1172 // point. Union the bases. 1173 PrevSplitFromOuter = false; 1174 PrevConditionValues = *OuterConditionValues; 1175 PrevConditionValues.insert_range(ConditionValues); 1176 PrevInsertPoint = OuterInsertPoint; 1177 } 1178 } else { 1179 CHR_DEBUG(dbgs() << "Outer null\n"); 1180 PrevConditionValues = ConditionValues; 1181 PrevInsertPoint = InsertPoint; 1182 } 1183 } else { 1184 CHR_DEBUG(dbgs() << "Should split from prev at " 1185 << RI.R->getNameStr() << "\n"); 1186 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1187 DT, Unhoistables)) { 1188 CHRScope *Tail = Scope->split(RI.R); 1189 Scopes.insert(Tail); 1190 Splits.push_back(Scope); 1191 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1192 SplitsConditionValues.push_back(PrevConditionValues); 1193 SplitsInsertPoints.push_back(PrevInsertPoint); 1194 Scope = Tail; 1195 PrevConditionValues = ConditionValues; 1196 PrevInsertPoint = InsertPoint; 1197 PrevSplitFromOuter = true; 1198 ORE.emit([&]() { 1199 return OptimizationRemarkMissed(DEBUG_TYPE, 1200 "SplitScopeFromPrev", 1201 RI.R->getEntry()->getTerminator()) 1202 << "Split scope from previous due to unhoistable branch/select " 1203 << "and/or lack of common condition values"; 1204 }); 1205 } else { 1206 // Not splitting. Union the bases. Keep the hoist point. 1207 PrevConditionValues.insert_range(ConditionValues); 1208 } 1209 } 1210 } 1211 Splits.push_back(Scope); 1212 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1213 SplitsConditionValues.push_back(PrevConditionValues); 1214 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1215 SplitsInsertPoints.push_back(PrevInsertPoint); 1216 assert(Splits.size() == SplitsConditionValues.size() && 1217 Splits.size() == SplitsSplitFromOuter.size() && 1218 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1219 for (size_t I = 0; I < Splits.size(); ++I) { 1220 CHRScope *Split = Splits[I]; 1221 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1222 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1223 SmallVector<CHRScope *, 8> NewSubs; 1224 DenseSet<Instruction *> SplitUnhoistables; 1225 getSelectsInScope(Split, SplitUnhoistables); 1226 for (CHRScope *Sub : Split->Subs) { 1227 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1228 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1229 SplitUnhoistables); 1230 llvm::append_range(NewSubs, SubSplits); 1231 } 1232 Split->Subs = NewSubs; 1233 } 1234 SmallVector<CHRScope *, 8> Result; 1235 for (size_t I = 0; I < Splits.size(); ++I) { 1236 CHRScope *Split = Splits[I]; 1237 if (SplitsSplitFromOuter[I]) { 1238 // Split from the outer. 1239 Output.push_back(Split); 1240 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1241 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1242 << "\n"); 1243 } else { 1244 // Connected to the outer. 1245 Result.push_back(Split); 1246 } 1247 } 1248 if (!Outer) 1249 assert(Result.empty() && 1250 "If no outer (top-level), must return no nested ones"); 1251 return Result; 1252 } 1253 1254 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1255 for (CHRScope *Scope : Scopes) { 1256 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1257 classifyBiasedScopes(Scope, Scope); 1258 CHR_DEBUG( 1259 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1260 dbgs() << "TrueBiasedRegions "; 1261 for (Region *R : Scope->TrueBiasedRegions) { 1262 dbgs() << R->getNameStr() << ", "; 1263 } 1264 dbgs() << "\n"; 1265 dbgs() << "FalseBiasedRegions "; 1266 for (Region *R : Scope->FalseBiasedRegions) { 1267 dbgs() << R->getNameStr() << ", "; 1268 } 1269 dbgs() << "\n"; 1270 dbgs() << "TrueBiasedSelects "; 1271 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1272 dbgs() << *SI << ", "; 1273 } 1274 dbgs() << "\n"; 1275 dbgs() << "FalseBiasedSelects "; 1276 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1277 dbgs() << *SI << ", "; 1278 } 1279 dbgs() << "\n";); 1280 } 1281 } 1282 1283 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1284 for (RegInfo &RI : Scope->RegInfos) { 1285 if (RI.HasBranch) { 1286 Region *R = RI.R; 1287 if (TrueBiasedRegionsGlobal.contains(R)) 1288 OutermostScope->TrueBiasedRegions.insert(R); 1289 else if (FalseBiasedRegionsGlobal.contains(R)) 1290 OutermostScope->FalseBiasedRegions.insert(R); 1291 else 1292 llvm_unreachable("Must be biased"); 1293 } 1294 for (SelectInst *SI : RI.Selects) { 1295 if (TrueBiasedSelectsGlobal.contains(SI)) 1296 OutermostScope->TrueBiasedSelects.insert(SI); 1297 else if (FalseBiasedSelectsGlobal.contains(SI)) 1298 OutermostScope->FalseBiasedSelects.insert(SI); 1299 else 1300 llvm_unreachable("Must be biased"); 1301 } 1302 } 1303 for (CHRScope *Sub : Scope->Subs) { 1304 classifyBiasedScopes(Sub, OutermostScope); 1305 } 1306 } 1307 1308 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1309 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1310 Scope->FalseBiasedRegions.size() + 1311 Scope->TrueBiasedSelects.size() + 1312 Scope->FalseBiasedSelects.size(); 1313 return NumBiased >= CHRMergeThreshold; 1314 } 1315 1316 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1317 SmallVectorImpl<CHRScope *> &Output) { 1318 for (CHRScope *Scope : Input) { 1319 // Filter out the ones with only one region and no subs. 1320 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1321 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1322 << Scope->TrueBiasedRegions.size() 1323 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1324 << " true-selects " << Scope->TrueBiasedSelects.size() 1325 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1326 ORE.emit([&]() { 1327 return OptimizationRemarkMissed( 1328 DEBUG_TYPE, 1329 "DropScopeWithOneBranchOrSelect", 1330 Scope->RegInfos[0].R->getEntry()->getTerminator()) 1331 << "Drop scope with < " 1332 << ore::NV("CHRMergeThreshold", CHRMergeThreshold) 1333 << " biased branch(es) or select(s)"; 1334 }); 1335 continue; 1336 } 1337 Output.push_back(Scope); 1338 } 1339 } 1340 1341 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1342 SmallVectorImpl<CHRScope *> &Output) { 1343 for (CHRScope *Scope : Input) { 1344 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1345 "Empty"); 1346 setCHRRegions(Scope, Scope); 1347 Output.push_back(Scope); 1348 CHR_DEBUG( 1349 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1350 for (auto pair : Scope->HoistStopMap) { 1351 Region *R = pair.first; 1352 dbgs() << "Region " << R->getNameStr() << "\n"; 1353 for (Instruction *I : pair.second) { 1354 dbgs() << "HoistStop " << *I << "\n"; 1355 } 1356 } 1357 dbgs() << "CHRRegions" << "\n"; 1358 for (RegInfo &RI : Scope->CHRRegions) { 1359 dbgs() << RI.R->getNameStr() << "\n"; 1360 }); 1361 } 1362 } 1363 1364 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1365 DenseSet<Instruction *> Unhoistables; 1366 // Put the biased selects in Unhoistables because they should stay where they 1367 // are and constant-folded after CHR (in case one biased select or a branch 1368 // can depend on another biased select.) 1369 for (RegInfo &RI : Scope->RegInfos) 1370 Unhoistables.insert_range(RI.Selects); 1371 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1372 for (RegInfo &RI : Scope->RegInfos) { 1373 Region *R = RI.R; 1374 DenseSet<Instruction *> HoistStops; 1375 bool IsHoisted = false; 1376 if (RI.HasBranch) { 1377 assert((OutermostScope->TrueBiasedRegions.contains(R) || 1378 OutermostScope->FalseBiasedRegions.contains(R)) && 1379 "Must be truthy or falsy"); 1380 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1381 // Note checkHoistValue fills in HoistStops. 1382 DenseMap<Instruction *, bool> Visited; 1383 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1384 Unhoistables, &HoistStops, Visited); 1385 assert(IsHoistable && "Must be hoistable"); 1386 (void)(IsHoistable); // Unused in release build 1387 IsHoisted = true; 1388 } 1389 for (SelectInst *SI : RI.Selects) { 1390 assert((OutermostScope->TrueBiasedSelects.contains(SI) || 1391 OutermostScope->FalseBiasedSelects.contains(SI)) && 1392 "Must be true or false biased"); 1393 // Note checkHoistValue fills in HoistStops. 1394 DenseMap<Instruction *, bool> Visited; 1395 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1396 Unhoistables, &HoistStops, Visited); 1397 assert(IsHoistable && "Must be hoistable"); 1398 (void)(IsHoistable); // Unused in release build 1399 IsHoisted = true; 1400 } 1401 if (IsHoisted) { 1402 OutermostScope->CHRRegions.push_back(RI); 1403 OutermostScope->HoistStopMap[R] = HoistStops; 1404 } 1405 } 1406 for (CHRScope *Sub : Scope->Subs) 1407 setCHRRegions(Sub, OutermostScope); 1408 } 1409 1410 static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1411 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1412 } 1413 1414 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1415 SmallVectorImpl<CHRScope *> &Output) { 1416 Output.resize(Input.size()); 1417 llvm::copy(Input, Output.begin()); 1418 llvm::stable_sort(Output, CHRScopeSorter); 1419 } 1420 1421 // Return true if V is already hoisted or was hoisted (along with its operands) 1422 // to the insert point. 1423 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1424 HoistStopMapTy &HoistStopMap, 1425 DenseSet<Instruction *> &HoistedSet, 1426 DenseSet<PHINode *> &TrivialPHIs, 1427 DominatorTree &DT) { 1428 auto IT = HoistStopMap.find(R); 1429 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1430 DenseSet<Instruction *> &HoistStops = IT->second; 1431 if (auto *I = dyn_cast<Instruction>(V)) { 1432 if (I == HoistPoint) 1433 return; 1434 if (HoistStops.count(I)) 1435 return; 1436 if (auto *PN = dyn_cast<PHINode>(I)) 1437 if (TrivialPHIs.count(PN)) 1438 // The trivial phi inserted by the previous CHR scope could replace a 1439 // non-phi in HoistStops. Note that since this phi is at the exit of a 1440 // previous CHR scope, which dominates this scope, it's safe to stop 1441 // hoisting there. 1442 return; 1443 if (HoistedSet.count(I)) 1444 // Already hoisted, return. 1445 return; 1446 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1447 assert(DT.getNode(I->getParent()) && "DT must contain I's block"); 1448 assert(DT.getNode(HoistPoint->getParent()) && 1449 "DT must contain HoistPoint block"); 1450 if (DT.dominates(I, HoistPoint)) 1451 // We are already above the hoist point. Stop here. This may be necessary 1452 // when multiple scopes would independently hoist the same 1453 // instruction. Since an outer (dominating) scope would hoist it to its 1454 // entry before an inner (dominated) scope would to its entry, the inner 1455 // scope may see the instruction already hoisted, in which case it 1456 // potentially wrong for the inner scope to hoist it and could cause bad 1457 // IR (non-dominating def), but safe to skip hoisting it instead because 1458 // it's already in a block that dominates the inner scope. 1459 return; 1460 for (Value *Op : I->operands()) { 1461 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); 1462 } 1463 I->moveBefore(HoistPoint->getIterator()); 1464 HoistedSet.insert(I); 1465 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1466 } 1467 } 1468 1469 // Hoist the dependent condition values of the branches and the selects in the 1470 // scope to the insert point. 1471 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1472 DenseSet<PHINode *> &TrivialPHIs, 1473 DominatorTree &DT) { 1474 DenseSet<Instruction *> HoistedSet; 1475 for (const RegInfo &RI : Scope->CHRRegions) { 1476 Region *R = RI.R; 1477 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1478 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1479 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1480 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1481 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1482 HoistedSet, TrivialPHIs, DT); 1483 } 1484 for (SelectInst *SI : RI.Selects) { 1485 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1486 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1487 if (!(IsTrueBiased || IsFalseBiased)) 1488 continue; 1489 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1490 HoistedSet, TrivialPHIs, DT); 1491 } 1492 } 1493 } 1494 1495 // Negate the predicate if an ICmp if it's used only by branches or selects by 1496 // swapping the operands of the branches or the selects. Returns true if success. 1497 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1498 Instruction *ExcludedUser, 1499 CHRScope *Scope) { 1500 for (User *U : ICmp->users()) { 1501 if (U == ExcludedUser) 1502 continue; 1503 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1504 continue; 1505 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1506 continue; 1507 return false; 1508 } 1509 for (User *U : ICmp->users()) { 1510 if (U == ExcludedUser) 1511 continue; 1512 if (auto *BI = dyn_cast<BranchInst>(U)) { 1513 assert(BI->isConditional() && "Must be conditional"); 1514 BI->swapSuccessors(); 1515 // Don't need to swap this in terms of 1516 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1517 // mean whehter the branch is likely go into the if-then rather than 1518 // successor0/successor1 and because we can tell which edge is the then or 1519 // the else one by comparing the destination to the region exit block. 1520 continue; 1521 } 1522 if (auto *SI = dyn_cast<SelectInst>(U)) { 1523 // Swap operands 1524 SI->swapValues(); 1525 SI->swapProfMetadata(); 1526 if (Scope->TrueBiasedSelects.count(SI)) { 1527 assert(!Scope->FalseBiasedSelects.contains(SI) && 1528 "Must not be already in"); 1529 Scope->FalseBiasedSelects.insert(SI); 1530 } else if (Scope->FalseBiasedSelects.count(SI)) { 1531 assert(!Scope->TrueBiasedSelects.contains(SI) && 1532 "Must not be already in"); 1533 Scope->TrueBiasedSelects.insert(SI); 1534 } 1535 continue; 1536 } 1537 llvm_unreachable("Must be a branch or a select"); 1538 } 1539 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1540 return true; 1541 } 1542 1543 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1544 // for a value that's defined in the scope but used outside it (meaning it's 1545 // alive at the exit block). 1546 static void insertTrivialPHIs(CHRScope *Scope, 1547 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1548 DenseSet<PHINode *> &TrivialPHIs) { 1549 SmallSetVector<BasicBlock *, 8> BlocksInScope; 1550 for (RegInfo &RI : Scope->RegInfos) { 1551 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1552 // sub-Scopes. 1553 BlocksInScope.insert(BB); 1554 } 1555 } 1556 CHR_DEBUG({ 1557 dbgs() << "Inserting redundant phis\n"; 1558 for (BasicBlock *BB : BlocksInScope) 1559 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1560 }); 1561 for (BasicBlock *BB : BlocksInScope) { 1562 for (Instruction &I : *BB) { 1563 SmallVector<Instruction *, 8> Users; 1564 for (User *U : I.users()) { 1565 if (auto *UI = dyn_cast<Instruction>(U)) { 1566 if (!BlocksInScope.contains(UI->getParent()) && 1567 // Unless there's already a phi for I at the exit block. 1568 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1569 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1570 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1571 Users.push_back(UI); 1572 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1573 // There's a loop backedge from a block that's dominated by this 1574 // scope to the entry block. 1575 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1576 CHR_DEBUG(dbgs() 1577 << "Used at entry block (for a back edge) by a phi user " 1578 << *UI << "\n"); 1579 Users.push_back(UI); 1580 } 1581 } 1582 } 1583 if (Users.size() > 0) { 1584 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1585 // ExitBlock. Replace I with the new phi in UI unless UI is another 1586 // phi at ExitBlock. 1587 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), ""); 1588 PN->insertBefore(ExitBlock->begin()); 1589 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1590 PN->addIncoming(&I, Pred); 1591 } 1592 TrivialPHIs.insert(PN); 1593 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1594 for (Instruction *UI : Users) { 1595 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1596 if (UI->getOperand(J) == &I) { 1597 UI->setOperand(J, PN); 1598 } 1599 } 1600 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1601 } 1602 } 1603 } 1604 } 1605 } 1606 1607 // Assert that all the CHR regions of the scope have a biased branch or select. 1608 static void LLVM_ATTRIBUTE_UNUSED 1609 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1610 #ifndef NDEBUG 1611 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1612 if (Scope->TrueBiasedRegions.count(RI.R) || 1613 Scope->FalseBiasedRegions.count(RI.R)) 1614 return true; 1615 for (SelectInst *SI : RI.Selects) 1616 if (Scope->TrueBiasedSelects.count(SI) || 1617 Scope->FalseBiasedSelects.count(SI)) 1618 return true; 1619 return false; 1620 }; 1621 for (RegInfo &RI : Scope->CHRRegions) { 1622 assert(HasBiasedBranchOrSelect(RI, Scope) && 1623 "Must have biased branch or select"); 1624 } 1625 #endif 1626 } 1627 1628 // Assert that all the condition values of the biased branches and selects have 1629 // been hoisted to the pre-entry block or outside of the scope. 1630 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1631 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1632 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1633 for (RegInfo &RI : Scope->CHRRegions) { 1634 Region *R = RI.R; 1635 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1636 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1637 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1638 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1639 Value *V = BI->getCondition(); 1640 CHR_DEBUG(dbgs() << *V << "\n"); 1641 if (auto *I = dyn_cast<Instruction>(V)) { 1642 (void)(I); // Unused in release build. 1643 assert((I->getParent() == PreEntryBlock || 1644 !Scope->contains(I)) && 1645 "Must have been hoisted to PreEntryBlock or outside the scope"); 1646 } 1647 } 1648 for (SelectInst *SI : RI.Selects) { 1649 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1650 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1651 if (!(IsTrueBiased || IsFalseBiased)) 1652 continue; 1653 Value *V = SI->getCondition(); 1654 CHR_DEBUG(dbgs() << *V << "\n"); 1655 if (auto *I = dyn_cast<Instruction>(V)) { 1656 (void)(I); // Unused in release build. 1657 assert((I->getParent() == PreEntryBlock || 1658 !Scope->contains(I)) && 1659 "Must have been hoisted to PreEntryBlock or outside the scope"); 1660 } 1661 } 1662 } 1663 } 1664 1665 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1666 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1667 1668 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1669 1670 for (RegInfo &RI : Scope->RegInfos) { 1671 const Region *R = RI.R; 1672 unsigned Duplication = getRegionDuplicationCount(R); 1673 CHR_DEBUG(dbgs() << "Dup count for R=" << R << " is " << Duplication 1674 << "\n"); 1675 if (Duplication >= CHRDupThreshsold) { 1676 CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication 1677 << " for this region"); 1678 ORE.emit([&]() { 1679 return OptimizationRemarkMissed(DEBUG_TYPE, "DupThresholdReached", 1680 R->getEntry()->getTerminator()) 1681 << "Reached the duplication threshold for the region"; 1682 }); 1683 return; 1684 } 1685 } 1686 for (RegInfo &RI : Scope->RegInfos) { 1687 DuplicationCount[RI.R]++; 1688 } 1689 1690 Region *FirstRegion = Scope->RegInfos[0].R; 1691 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1692 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1693 BasicBlock *ExitBlock = LastRegion->getExit(); 1694 std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1695 1696 if (ExitBlock) { 1697 // Insert a trivial phi at the exit block (where the CHR hot path and the 1698 // cold path merges) for a value that's defined in the scope but used 1699 // outside it (meaning it's alive at the exit block). We will add the 1700 // incoming values for the CHR cold paths to it below. Without this, we'd 1701 // miss updating phi's for such values unless there happens to already be a 1702 // phi for that value there. 1703 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1704 } 1705 1706 // Split the entry block of the first region. The new block becomes the new 1707 // entry block of the first region. The old entry block becomes the block to 1708 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1709 // through the split, we update the entry of the first region after the split, 1710 // and Region only points to the entry and the exit blocks, rather than 1711 // keeping everything in a list or set, the blocks membership and the 1712 // entry/exit blocks of the region are still valid after the split. 1713 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1714 << " at " << *Scope->BranchInsertPoint << "\n"); 1715 BasicBlock *NewEntryBlock = 1716 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1717 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1718 "NewEntryBlock's only pred must be EntryBlock"); 1719 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1720 BasicBlock *PreEntryBlock = EntryBlock; 1721 1722 ValueToValueMapTy VMap; 1723 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1724 // hot path (originals) and a cold path (clones) and update the PHIs at the 1725 // exit block. 1726 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1727 1728 // Replace the old (placeholder) branch with the new (merged) conditional 1729 // branch. 1730 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1731 NewEntryBlock, VMap); 1732 1733 #ifndef NDEBUG 1734 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1735 #endif 1736 1737 // Hoist the conditional values of the branches/selects. 1738 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); 1739 1740 #ifndef NDEBUG 1741 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1742 #endif 1743 1744 // Create the combined branch condition and constant-fold the branches/selects 1745 // in the hot path. 1746 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1747 ProfileCount.value_or(0)); 1748 } 1749 1750 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1751 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1752 // at the exit block. 1753 void CHR::cloneScopeBlocks(CHRScope *Scope, 1754 BasicBlock *PreEntryBlock, 1755 BasicBlock *ExitBlock, 1756 Region *LastRegion, 1757 ValueToValueMapTy &VMap) { 1758 // Clone all the blocks. The original blocks will be the hot-path 1759 // CHR-optimized code and the cloned blocks will be the original unoptimized 1760 // code. This is so that the block pointers from the 1761 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1762 // which CHR should apply to. 1763 SmallVector<BasicBlock*, 8> NewBlocks; 1764 for (RegInfo &RI : Scope->RegInfos) 1765 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1766 // sub-Scopes. 1767 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1768 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1769 NewBlocks.push_back(NewBB); 1770 VMap[BB] = NewBB; 1771 1772 // Unreachable predecessors will not be cloned and will not have an edge 1773 // to the cloned block. As such, also remove them from any phi nodes. 1774 for (PHINode &PN : make_early_inc_range(NewBB->phis())) 1775 PN.removeIncomingValueIf([&](unsigned Idx) { 1776 return !DT.isReachableFromEntry(PN.getIncomingBlock(Idx)); 1777 }); 1778 } 1779 1780 // Place the cloned blocks right after the original blocks (right before the 1781 // exit block of.) 1782 if (ExitBlock) 1783 F.splice(ExitBlock->getIterator(), &F, NewBlocks[0]->getIterator(), 1784 F.end()); 1785 1786 // Update the cloned blocks/instructions to refer to themselves. 1787 for (BasicBlock *NewBB : NewBlocks) 1788 for (Instruction &I : *NewBB) 1789 RemapInstruction(&I, VMap, 1790 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1791 1792 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1793 // the top-level region but we don't need to add PHIs. The trivial PHIs 1794 // inserted above will be updated here. 1795 if (ExitBlock) 1796 for (PHINode &PN : ExitBlock->phis()) 1797 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1798 ++I) { 1799 BasicBlock *Pred = PN.getIncomingBlock(I); 1800 if (LastRegion->contains(Pred)) { 1801 Value *V = PN.getIncomingValue(I); 1802 auto It = VMap.find(V); 1803 if (It != VMap.end()) V = It->second; 1804 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1805 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1806 } 1807 } 1808 } 1809 1810 // A helper for transformScope. Replace the old (placeholder) branch with the 1811 // new (merged) conditional branch. 1812 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1813 BasicBlock *EntryBlock, 1814 BasicBlock *NewEntryBlock, 1815 ValueToValueMapTy &VMap) { 1816 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1817 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1818 "SplitBlock did not work correctly!"); 1819 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1820 "NewEntryBlock's only pred must be EntryBlock"); 1821 assert(VMap.find(NewEntryBlock) != VMap.end() && 1822 "NewEntryBlock must have been copied"); 1823 OldBR->dropAllReferences(); 1824 OldBR->eraseFromParent(); 1825 // The true predicate is a placeholder. It will be replaced later in 1826 // fixupBranchesAndSelects(). 1827 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1828 cast<BasicBlock>(VMap[NewEntryBlock]), 1829 ConstantInt::getTrue(F.getContext())); 1830 NewBR->insertInto(PreEntryBlock, PreEntryBlock->end()); 1831 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1832 "NewEntryBlock's only pred must be EntryBlock"); 1833 return NewBR; 1834 } 1835 1836 // A helper for transformScopes. Create the combined branch condition and 1837 // constant-fold the branches/selects in the hot path. 1838 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1839 BasicBlock *PreEntryBlock, 1840 BranchInst *MergedBR, 1841 uint64_t ProfileCount) { 1842 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1843 BranchProbability CHRBranchBias(1, 1); 1844 uint64_t NumCHRedBranches = 0; 1845 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1846 for (RegInfo &RI : Scope->CHRRegions) { 1847 Region *R = RI.R; 1848 if (RI.HasBranch) { 1849 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1850 ++NumCHRedBranches; 1851 } 1852 for (SelectInst *SI : RI.Selects) { 1853 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1854 ++NumCHRedBranches; 1855 } 1856 } 1857 assert(NumCHRedBranches > 0); 1858 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1859 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1860 ORE.emit([&]() { 1861 return OptimizationRemark(DEBUG_TYPE, 1862 "CHR", 1863 // Refer to the hot (original) path 1864 MergedBR->getSuccessor(0)->getTerminator()) 1865 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) 1866 << " branches or selects"; 1867 }); 1868 MergedBR->setCondition(MergedCondition); 1869 uint32_t Weights[] = { 1870 static_cast<uint32_t>(CHRBranchBias.scale(1000)), 1871 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)), 1872 }; 1873 setBranchWeights(*MergedBR, Weights, /*IsExpected=*/false); 1874 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1875 << "\n"); 1876 } 1877 1878 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1879 // and constant-fold a branch in the hot path. 1880 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1881 IRBuilder<> &IRB, 1882 Value *&MergedCondition, 1883 BranchProbability &CHRBranchBias) { 1884 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1885 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1886 "Must be truthy or falsy"); 1887 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1888 assert(BranchBiasMap.contains(R) && "Must be in the bias map"); 1889 BranchProbability Bias = BranchBiasMap[R]; 1890 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1891 // Take the min. 1892 if (CHRBranchBias > Bias) 1893 CHRBranchBias = Bias; 1894 BasicBlock *IfThen = BI->getSuccessor(1); 1895 BasicBlock *IfElse = BI->getSuccessor(0); 1896 BasicBlock *RegionExitBlock = R->getExit(); 1897 assert(RegionExitBlock && "Null ExitBlock"); 1898 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1899 IfThen != IfElse && "Invariant from findScopes"); 1900 if (IfThen == RegionExitBlock) { 1901 // Swap them so that IfThen means going into it and IfElse means skipping 1902 // it. 1903 std::swap(IfThen, IfElse); 1904 } 1905 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1906 << " IfElse " << IfElse->getName() << "\n"); 1907 Value *Cond = BI->getCondition(); 1908 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1909 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1910 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1911 MergedCondition); 1912 // Constant-fold the branch at ClonedEntryBlock. 1913 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1914 "The successor shouldn't change"); 1915 Value *NewCondition = ConditionTrue ? 1916 ConstantInt::getTrue(F.getContext()) : 1917 ConstantInt::getFalse(F.getContext()); 1918 BI->setCondition(NewCondition); 1919 } 1920 1921 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1922 // and constant-fold a select in the hot path. 1923 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1924 IRBuilder<> &IRB, 1925 Value *&MergedCondition, 1926 BranchProbability &CHRBranchBias) { 1927 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1928 assert((IsTrueBiased || 1929 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1930 assert(SelectBiasMap.contains(SI) && "Must be in the bias map"); 1931 BranchProbability Bias = SelectBiasMap[SI]; 1932 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1933 // Take the min. 1934 if (CHRBranchBias > Bias) 1935 CHRBranchBias = Bias; 1936 Value *Cond = SI->getCondition(); 1937 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1938 MergedCondition); 1939 Value *NewCondition = IsTrueBiased ? 1940 ConstantInt::getTrue(F.getContext()) : 1941 ConstantInt::getFalse(F.getContext()); 1942 SI->setCondition(NewCondition); 1943 } 1944 1945 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1946 // condition. 1947 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1948 Instruction *BranchOrSelect, CHRScope *Scope, 1949 IRBuilder<> &IRB, Value *&MergedCondition) { 1950 if (!IsTrueBiased) { 1951 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1952 // branch, negate the icmp predicate and swap the branch targets and avoid 1953 // inserting an Xor to negate Cond. 1954 auto *ICmp = dyn_cast<ICmpInst>(Cond); 1955 if (!ICmp || 1956 !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) 1957 Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond); 1958 } 1959 1960 // Freeze potentially poisonous conditions. 1961 if (!isGuaranteedNotToBeUndefOrPoison(Cond)) 1962 Cond = IRB.CreateFreeze(Cond); 1963 1964 // Use logical and to avoid propagating poison from later conditions. 1965 MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond); 1966 } 1967 1968 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1969 unsigned I = 0; 1970 DenseSet<PHINode *> TrivialPHIs; 1971 for (CHRScope *Scope : CHRScopes) { 1972 transformScopes(Scope, TrivialPHIs); 1973 CHR_DEBUG( 1974 std::ostringstream oss; 1975 oss << " after transformScopes " << I++; 1976 dumpIR(F, oss.str().c_str(), nullptr)); 1977 (void)I; 1978 } 1979 } 1980 1981 static void LLVM_ATTRIBUTE_UNUSED 1982 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1983 dbgs() << Label << " " << Scopes.size() << "\n"; 1984 for (CHRScope *Scope : Scopes) { 1985 dbgs() << *Scope << "\n"; 1986 } 1987 } 1988 1989 bool CHR::run() { 1990 if (!shouldApply(F, PSI)) 1991 return false; 1992 1993 CHR_DEBUG(dumpIR(F, "before", nullptr)); 1994 1995 bool Changed = false; 1996 { 1997 CHR_DEBUG( 1998 dbgs() << "RegionInfo:\n"; 1999 RI.print(dbgs())); 2000 2001 // Recursively traverse the region tree and find regions that have biased 2002 // branches and/or selects and create scopes. 2003 SmallVector<CHRScope *, 8> AllScopes; 2004 findScopes(AllScopes); 2005 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 2006 2007 // Split the scopes if 1) the conditional values of the biased 2008 // branches/selects of the inner/lower scope can't be hoisted up to the 2009 // outermost/uppermost scope entry, or 2) the condition values of the biased 2010 // branches/selects in a scope (including subscopes) don't share at least 2011 // one common value. 2012 SmallVector<CHRScope *, 8> SplitScopes; 2013 splitScopes(AllScopes, SplitScopes); 2014 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 2015 2016 // After splitting, set the biased regions and selects of a scope (a tree 2017 // root) that include those of the subscopes. 2018 classifyBiasedScopes(SplitScopes); 2019 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 2020 2021 // Filter out the scopes that has only one biased region or select (CHR 2022 // isn't useful in such a case). 2023 SmallVector<CHRScope *, 8> FilteredScopes; 2024 filterScopes(SplitScopes, FilteredScopes); 2025 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 2026 2027 // Set the regions to be CHR'ed and their hoist stops for each scope. 2028 SmallVector<CHRScope *, 8> SetScopes; 2029 setCHRRegions(FilteredScopes, SetScopes); 2030 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 2031 2032 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 2033 // ones. We need to apply CHR from outer to inner so that we apply CHR only 2034 // to the hot path, rather than both hot and cold paths. 2035 SmallVector<CHRScope *, 8> SortedScopes; 2036 sortScopes(SetScopes, SortedScopes); 2037 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 2038 2039 CHR_DEBUG( 2040 dbgs() << "RegionInfo:\n"; 2041 RI.print(dbgs())); 2042 2043 // Apply the CHR transformation. 2044 if (!SortedScopes.empty()) { 2045 transformScopes(SortedScopes); 2046 Changed = true; 2047 } 2048 } 2049 2050 if (Changed) { 2051 CHR_DEBUG(dumpIR(F, "after", &Stats)); 2052 ORE.emit([&]() { 2053 return OptimizationRemark(DEBUG_TYPE, "Stats", &F) 2054 << ore::NV("Function", &F) << " " 2055 << "Reduced the number of branches in hot paths by " 2056 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) 2057 << " (static) and " 2058 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) 2059 << " (weighted by PGO count)"; 2060 }); 2061 } 2062 2063 return Changed; 2064 } 2065 2066 namespace llvm { 2067 2068 ControlHeightReductionPass::ControlHeightReductionPass() { 2069 parseCHRFilterFiles(); 2070 } 2071 2072 PreservedAnalyses ControlHeightReductionPass::run( 2073 Function &F, 2074 FunctionAnalysisManager &FAM) { 2075 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2076 auto PPSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2077 // If there is no profile summary, we should not do CHR. 2078 if (!PPSI || !PPSI->hasProfileSummary()) 2079 return PreservedAnalyses::all(); 2080 auto &PSI = *PPSI; 2081 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 2082 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2083 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2084 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2085 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); 2086 if (!Changed) 2087 return PreservedAnalyses::all(); 2088 return PreservedAnalyses::none(); 2089 } 2090 2091 } // namespace llvm 2092