1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass merges conditional blocks of code and reduces the number of 10 // conditional branches in the hot paths based on profiles. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/GlobalsModRef.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/IntrinsicInst.h" 30 #include "llvm/IR/MDBuilder.h" 31 #include "llvm/IR/PassManager.h" 32 #include "llvm/IR/ProfDataUtils.h" 33 #include "llvm/InitializePasses.h" 34 #include "llvm/Support/BranchProbability.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/MemoryBuffer.h" 37 #include "llvm/Transforms/Utils.h" 38 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 39 #include "llvm/Transforms/Utils/Cloning.h" 40 #include "llvm/Transforms/Utils/ValueMapper.h" 41 42 #include <optional> 43 #include <set> 44 #include <sstream> 45 46 using namespace llvm; 47 48 #define DEBUG_TYPE "chr" 49 50 #define CHR_DEBUG(X) LLVM_DEBUG(X) 51 52 static cl::opt<bool> DisableCHR("disable-chr", cl::init(false), cl::Hidden, 53 cl::desc("Disable CHR for all functions")); 54 55 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 56 cl::desc("Apply CHR for all functions")); 57 58 static cl::opt<double> CHRBiasThreshold( 59 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 60 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 61 62 static cl::opt<unsigned> CHRMergeThreshold( 63 "chr-merge-threshold", cl::init(2), cl::Hidden, 64 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 65 66 static cl::opt<std::string> CHRModuleList( 67 "chr-module-list", cl::init(""), cl::Hidden, 68 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 69 70 static cl::opt<std::string> CHRFunctionList( 71 "chr-function-list", cl::init(""), cl::Hidden, 72 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 73 74 static cl::opt<unsigned> CHRDupThreshsold( 75 "chr-dup-threshold", cl::init(3), cl::Hidden, 76 cl::desc("Max number of duplications by CHR for a region")); 77 78 static StringSet<> CHRModules; 79 static StringSet<> CHRFunctions; 80 81 static void parseCHRFilterFiles() { 82 if (!CHRModuleList.empty()) { 83 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 84 if (!FileOrErr) { 85 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 86 std::exit(1); 87 } 88 StringRef Buf = FileOrErr->get()->getBuffer(); 89 SmallVector<StringRef, 0> Lines; 90 Buf.split(Lines, '\n'); 91 for (StringRef Line : Lines) { 92 Line = Line.trim(); 93 if (!Line.empty()) 94 CHRModules.insert(Line); 95 } 96 } 97 if (!CHRFunctionList.empty()) { 98 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 99 if (!FileOrErr) { 100 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 101 std::exit(1); 102 } 103 StringRef Buf = FileOrErr->get()->getBuffer(); 104 SmallVector<StringRef, 0> Lines; 105 Buf.split(Lines, '\n'); 106 for (StringRef Line : Lines) { 107 Line = Line.trim(); 108 if (!Line.empty()) 109 CHRFunctions.insert(Line); 110 } 111 } 112 } 113 114 namespace { 115 116 struct CHRStats { 117 CHRStats() = default; 118 void print(raw_ostream &OS) const { 119 OS << "CHRStats: NumBranches " << NumBranches 120 << " NumBranchesDelta " << NumBranchesDelta 121 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 122 } 123 // The original number of conditional branches / selects 124 uint64_t NumBranches = 0; 125 // The decrease of the number of conditional branches / selects in the hot 126 // paths due to CHR. 127 uint64_t NumBranchesDelta = 0; 128 // NumBranchesDelta weighted by the profile count at the scope entry. 129 uint64_t WeightedNumBranchesDelta = 0; 130 }; 131 132 // RegInfo - some properties of a Region. 133 struct RegInfo { 134 RegInfo() = default; 135 RegInfo(Region *RegionIn) : R(RegionIn) {} 136 Region *R = nullptr; 137 bool HasBranch = false; 138 SmallVector<SelectInst *, 8> Selects; 139 }; 140 141 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 142 143 // CHRScope - a sequence of regions to CHR together. It corresponds to a 144 // sequence of conditional blocks. It can have subscopes which correspond to 145 // nested conditional blocks. Nested CHRScopes form a tree. 146 class CHRScope { 147 public: 148 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 149 assert(RI.R && "Null RegionIn"); 150 RegInfos.push_back(RI); 151 } 152 153 Region *getParentRegion() { 154 assert(RegInfos.size() > 0 && "Empty CHRScope"); 155 Region *Parent = RegInfos[0].R->getParent(); 156 assert(Parent && "Unexpected to call this on the top-level region"); 157 return Parent; 158 } 159 160 BasicBlock *getEntryBlock() { 161 assert(RegInfos.size() > 0 && "Empty CHRScope"); 162 return RegInfos.front().R->getEntry(); 163 } 164 165 BasicBlock *getExitBlock() { 166 assert(RegInfos.size() > 0 && "Empty CHRScope"); 167 return RegInfos.back().R->getExit(); 168 } 169 170 bool appendable(CHRScope *Next) { 171 // The next scope is appendable only if this scope is directly connected to 172 // it (which implies it post-dominates this scope) and this scope dominates 173 // it (no edge to the next scope outside this scope). 174 BasicBlock *NextEntry = Next->getEntryBlock(); 175 if (getExitBlock() != NextEntry) 176 // Not directly connected. 177 return false; 178 Region *LastRegion = RegInfos.back().R; 179 for (BasicBlock *Pred : predecessors(NextEntry)) 180 if (!LastRegion->contains(Pred)) 181 // There's an edge going into the entry of the next scope from outside 182 // of this scope. 183 return false; 184 return true; 185 } 186 187 void append(CHRScope *Next) { 188 assert(RegInfos.size() > 0 && "Empty CHRScope"); 189 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 190 assert(getParentRegion() == Next->getParentRegion() && 191 "Must be siblings"); 192 assert(getExitBlock() == Next->getEntryBlock() && 193 "Must be adjacent"); 194 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end()); 195 Subs.append(Next->Subs.begin(), Next->Subs.end()); 196 } 197 198 void addSub(CHRScope *SubIn) { 199 #ifndef NDEBUG 200 bool IsChild = false; 201 for (RegInfo &RI : RegInfos) 202 if (RI.R == SubIn->getParentRegion()) { 203 IsChild = true; 204 break; 205 } 206 assert(IsChild && "Must be a child"); 207 #endif 208 Subs.push_back(SubIn); 209 } 210 211 // Split this scope at the boundary region into two, which will belong to the 212 // tail and returns the tail. 213 CHRScope *split(Region *Boundary) { 214 assert(Boundary && "Boundary null"); 215 assert(RegInfos.begin()->R != Boundary && 216 "Can't be split at beginning"); 217 auto BoundaryIt = llvm::find_if( 218 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; }); 219 if (BoundaryIt == RegInfos.end()) 220 return nullptr; 221 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end()); 222 DenseSet<Region *> TailRegionSet; 223 for (const RegInfo &RI : TailRegInfos) 224 TailRegionSet.insert(RI.R); 225 226 auto TailIt = 227 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) { 228 assert(Sub && "null Sub"); 229 Region *Parent = Sub->getParentRegion(); 230 if (TailRegionSet.count(Parent)) 231 return false; 232 233 assert(llvm::any_of( 234 RegInfos, 235 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) && 236 "Must be in head"); 237 return true; 238 }); 239 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end()); 240 241 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 242 auto *Scope = new CHRScope(TailRegInfos, TailSubs); 243 RegInfos.erase(BoundaryIt, RegInfos.end()); 244 Subs.erase(TailIt, Subs.end()); 245 return Scope; 246 } 247 248 bool contains(Instruction *I) const { 249 BasicBlock *Parent = I->getParent(); 250 for (const RegInfo &RI : RegInfos) 251 if (RI.R->contains(Parent)) 252 return true; 253 return false; 254 } 255 256 void print(raw_ostream &OS) const; 257 258 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 259 SmallVector<CHRScope *, 8> Subs; // Subscopes. 260 261 // The instruction at which to insert the CHR conditional branch (and hoist 262 // the dependent condition values). 263 Instruction *BranchInsertPoint; 264 265 // True-biased and false-biased regions (conditional blocks), 266 // respectively. Used only for the outermost scope and includes regions in 267 // subscopes. The rest are unbiased. 268 DenseSet<Region *> TrueBiasedRegions; 269 DenseSet<Region *> FalseBiasedRegions; 270 // Among the biased regions, the regions that get CHRed. 271 SmallVector<RegInfo, 8> CHRRegions; 272 273 // True-biased and false-biased selects, respectively. Used only for the 274 // outermost scope and includes ones in subscopes. 275 DenseSet<SelectInst *> TrueBiasedSelects; 276 DenseSet<SelectInst *> FalseBiasedSelects; 277 278 // Map from one of the above regions to the instructions to stop 279 // hoisting instructions at through use-def chains. 280 HoistStopMapTy HoistStopMap; 281 282 private: 283 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn) 284 : RegInfos(RegInfosIn.begin(), RegInfosIn.end()), 285 Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {} 286 }; 287 288 class CHR { 289 public: 290 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 291 ProfileSummaryInfo &PSIin, RegionInfo &RIin, 292 OptimizationRemarkEmitter &OREin) 293 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} 294 295 ~CHR() { 296 for (CHRScope *Scope : Scopes) { 297 delete Scope; 298 } 299 } 300 301 bool run(); 302 303 private: 304 // See the comments in CHR::run() for the high level flow of the algorithm and 305 // what the following functions do. 306 307 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 308 Region *R = RI.getTopLevelRegion(); 309 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) { 310 Output.push_back(Scope); 311 } 312 } 313 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 314 SmallVectorImpl<CHRScope *> &Scopes); 315 CHRScope *findScope(Region *R); 316 void checkScopeHoistable(CHRScope *Scope); 317 318 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 319 SmallVectorImpl<CHRScope *> &Output); 320 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 321 CHRScope *Outer, 322 DenseSet<Value *> *OuterConditionValues, 323 Instruction *OuterInsertPoint, 324 SmallVectorImpl<CHRScope *> &Output, 325 DenseSet<Instruction *> &Unhoistables); 326 327 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 328 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 329 330 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 331 SmallVectorImpl<CHRScope *> &Output); 332 333 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 334 SmallVectorImpl<CHRScope *> &Output); 335 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 336 337 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 338 SmallVectorImpl<CHRScope *> &Output); 339 340 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 341 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 342 void cloneScopeBlocks(CHRScope *Scope, 343 BasicBlock *PreEntryBlock, 344 BasicBlock *ExitBlock, 345 Region *LastRegion, 346 ValueToValueMapTy &VMap); 347 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 348 BasicBlock *EntryBlock, 349 BasicBlock *NewEntryBlock, 350 ValueToValueMapTy &VMap); 351 void fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock, 352 BranchInst *MergedBR, uint64_t ProfileCount); 353 void fixupBranch(Region *R, CHRScope *Scope, IRBuilder<> &IRB, 354 Value *&MergedCondition, BranchProbability &CHRBranchBias); 355 void fixupSelect(SelectInst *SI, CHRScope *Scope, IRBuilder<> &IRB, 356 Value *&MergedCondition, BranchProbability &CHRBranchBias); 357 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 358 Instruction *BranchOrSelect, CHRScope *Scope, 359 IRBuilder<> &IRB, Value *&MergedCondition); 360 unsigned getRegionDuplicationCount(const Region *R) { 361 unsigned Count = 0; 362 // Find out how many times region R is cloned. Note that if the parent 363 // of R is cloned, R is also cloned, but R's clone count is not updated 364 // from the clone of the parent. We need to accumlate all the counts 365 // from the ancestors to get the clone count. 366 while (R) { 367 Count += DuplicationCount[R]; 368 R = R->getParent(); 369 } 370 return Count; 371 } 372 373 Function &F; 374 BlockFrequencyInfo &BFI; 375 DominatorTree &DT; 376 ProfileSummaryInfo &PSI; 377 RegionInfo &RI; 378 OptimizationRemarkEmitter &ORE; 379 CHRStats Stats; 380 381 // All the true-biased regions in the function 382 DenseSet<Region *> TrueBiasedRegionsGlobal; 383 // All the false-biased regions in the function 384 DenseSet<Region *> FalseBiasedRegionsGlobal; 385 // All the true-biased selects in the function 386 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 387 // All the false-biased selects in the function 388 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 389 // A map from biased regions to their branch bias 390 DenseMap<Region *, BranchProbability> BranchBiasMap; 391 // A map from biased selects to their branch bias 392 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 393 // All the scopes. 394 DenseSet<CHRScope *> Scopes; 395 // This maps records how many times this region is cloned. 396 DenseMap<const Region *, unsigned> DuplicationCount; 397 }; 398 399 } // end anonymous namespace 400 401 static inline 402 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 403 const CHRStats &Stats) { 404 Stats.print(OS); 405 return OS; 406 } 407 408 static inline 409 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 410 Scope.print(OS); 411 return OS; 412 } 413 414 static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) { 415 if (DisableCHR) 416 return false; 417 418 if (ForceCHR) 419 return true; 420 421 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 422 if (CHRModules.count(F.getParent()->getName())) 423 return true; 424 return CHRFunctions.count(F.getName()); 425 } 426 427 return PSI.isFunctionEntryHot(&F); 428 } 429 430 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 431 CHRStats *Stats) { 432 StringRef FuncName = F.getName(); 433 StringRef ModuleName = F.getParent()->getName(); 434 (void)(FuncName); // Unused in release build. 435 (void)(ModuleName); // Unused in release build. 436 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 437 << FuncName); 438 if (Stats) 439 CHR_DEBUG(dbgs() << " " << *Stats); 440 CHR_DEBUG(dbgs() << "\n"); 441 CHR_DEBUG(F.dump()); 442 } 443 444 void CHRScope::print(raw_ostream &OS) const { 445 assert(RegInfos.size() > 0 && "Empty CHRScope"); 446 OS << "CHRScope["; 447 OS << RegInfos.size() << ", Regions["; 448 for (const RegInfo &RI : RegInfos) { 449 OS << RI.R->getNameStr(); 450 if (RI.HasBranch) 451 OS << " B"; 452 if (RI.Selects.size() > 0) 453 OS << " S" << RI.Selects.size(); 454 OS << ", "; 455 } 456 if (RegInfos[0].R->getParent()) { 457 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 458 } else { 459 // top level region 460 OS << "]"; 461 } 462 OS << ", Subs["; 463 for (CHRScope *Sub : Subs) { 464 OS << *Sub << ", "; 465 } 466 OS << "]]"; 467 } 468 469 // Return true if the given instruction type can be hoisted by CHR. 470 static bool isHoistableInstructionType(Instruction *I) { 471 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 472 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 473 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 474 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 475 isa<InsertValueInst>(I); 476 } 477 478 // Return true if the given instruction can be hoisted by CHR. 479 static bool isHoistable(Instruction *I, DominatorTree &DT) { 480 if (!isHoistableInstructionType(I)) 481 return false; 482 return isSafeToSpeculativelyExecute(I, nullptr, nullptr, &DT); 483 } 484 485 // Recursively traverse the use-def chains of the given value and return a set 486 // of the unhoistable base values defined within the scope (excluding the 487 // first-region entry block) or the (hoistable or unhoistable) base values that 488 // are defined outside (including the first-region entry block) of the 489 // scope. The returned set doesn't include constants. 490 static const std::set<Value *> & 491 getBaseValues(Value *V, DominatorTree &DT, 492 DenseMap<Value *, std::set<Value *>> &Visited) { 493 auto It = Visited.find(V); 494 if (It != Visited.end()) { 495 return It->second; 496 } 497 std::set<Value *> Result; 498 if (auto *I = dyn_cast<Instruction>(V)) { 499 // We don't stop at a block that's not in the Scope because we would miss 500 // some instructions that are based on the same base values if we stop 501 // there. 502 if (!isHoistable(I, DT)) { 503 Result.insert(I); 504 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 505 } 506 // I is hoistable above the Scope. 507 for (Value *Op : I->operands()) { 508 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited); 509 Result.insert(OpResult.begin(), OpResult.end()); 510 } 511 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 512 } 513 if (isa<Argument>(V)) { 514 Result.insert(V); 515 } 516 // We don't include others like constants because those won't lead to any 517 // chance of folding of conditions (eg two bit checks merged into one check) 518 // after CHR. 519 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 520 } 521 522 // Return true if V is already hoisted or can be hoisted (along with its 523 // operands) above the insert point. When it returns true and HoistStops is 524 // non-null, the instructions to stop hoisting at through the use-def chains are 525 // inserted into HoistStops. 526 static bool 527 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 528 DenseSet<Instruction *> &Unhoistables, 529 DenseSet<Instruction *> *HoistStops, 530 DenseMap<Instruction *, bool> &Visited) { 531 assert(InsertPoint && "Null InsertPoint"); 532 if (auto *I = dyn_cast<Instruction>(V)) { 533 auto It = Visited.find(I); 534 if (It != Visited.end()) { 535 return It->second; 536 } 537 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 538 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 539 if (Unhoistables.count(I)) { 540 // Don't hoist if they are not to be hoisted. 541 Visited[I] = false; 542 return false; 543 } 544 if (DT.dominates(I, InsertPoint)) { 545 // We are already above the insert point. Stop here. 546 if (HoistStops) 547 HoistStops->insert(I); 548 Visited[I] = true; 549 return true; 550 } 551 // We aren't not above the insert point, check if we can hoist it above the 552 // insert point. 553 if (isHoistable(I, DT)) { 554 // Check operands first. 555 DenseSet<Instruction *> OpsHoistStops; 556 bool AllOpsHoisted = true; 557 for (Value *Op : I->operands()) { 558 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, 559 Visited)) { 560 AllOpsHoisted = false; 561 break; 562 } 563 } 564 if (AllOpsHoisted) { 565 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 566 if (HoistStops) 567 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 568 Visited[I] = true; 569 return true; 570 } 571 } 572 Visited[I] = false; 573 return false; 574 } 575 // Non-instructions are considered hoistable. 576 return true; 577 } 578 579 // Constructs the true and false branch probabilities if the the instruction has 580 // valid branch weights. Returns true when this was successful, false otherwise. 581 static bool extractBranchProbabilities(Instruction *I, 582 BranchProbability &TrueProb, 583 BranchProbability &FalseProb) { 584 uint64_t TrueWeight; 585 uint64_t FalseWeight; 586 if (!extractBranchWeights(*I, TrueWeight, FalseWeight)) 587 return false; 588 uint64_t SumWeight = TrueWeight + FalseWeight; 589 590 assert(SumWeight >= TrueWeight && SumWeight >= FalseWeight && 591 "Overflow calculating branch probabilities."); 592 593 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash. 594 if (SumWeight == 0) 595 return false; 596 597 TrueProb = BranchProbability::getBranchProbability(TrueWeight, SumWeight); 598 FalseProb = BranchProbability::getBranchProbability(FalseWeight, SumWeight); 599 return true; 600 } 601 602 static BranchProbability getCHRBiasThreshold() { 603 return BranchProbability::getBranchProbability( 604 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 605 } 606 607 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 608 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 609 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 610 // false. 611 template <typename K, typename S, typename M> 612 static bool checkBias(K *Key, BranchProbability TrueProb, 613 BranchProbability FalseProb, S &TrueSet, S &FalseSet, 614 M &BiasMap) { 615 BranchProbability Threshold = getCHRBiasThreshold(); 616 if (TrueProb >= Threshold) { 617 TrueSet.insert(Key); 618 BiasMap[Key] = TrueProb; 619 return true; 620 } else if (FalseProb >= Threshold) { 621 FalseSet.insert(Key); 622 BiasMap[Key] = FalseProb; 623 return true; 624 } 625 return false; 626 } 627 628 // Returns true and insert a region into the right biased set and the map if the 629 // branch of the region is biased. 630 static bool checkBiasedBranch(BranchInst *BI, Region *R, 631 DenseSet<Region *> &TrueBiasedRegionsGlobal, 632 DenseSet<Region *> &FalseBiasedRegionsGlobal, 633 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 634 if (!BI->isConditional()) 635 return false; 636 BranchProbability ThenProb, ElseProb; 637 if (!extractBranchProbabilities(BI, ThenProb, ElseProb)) 638 return false; 639 BasicBlock *IfThen = BI->getSuccessor(0); 640 BasicBlock *IfElse = BI->getSuccessor(1); 641 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 642 IfThen != IfElse && 643 "Invariant from findScopes"); 644 if (IfThen == R->getExit()) { 645 // Swap them so that IfThen/ThenProb means going into the conditional code 646 // and IfElse/ElseProb means skipping it. 647 std::swap(IfThen, IfElse); 648 std::swap(ThenProb, ElseProb); 649 } 650 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 651 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 652 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 653 return checkBias(R, ThenProb, ElseProb, 654 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 655 BranchBiasMap); 656 } 657 658 // Returns true and insert a select into the right biased set and the map if the 659 // select is biased. 660 static bool checkBiasedSelect( 661 SelectInst *SI, Region *R, 662 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 663 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 664 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 665 BranchProbability TrueProb, FalseProb; 666 if (!extractBranchProbabilities(SI, TrueProb, FalseProb)) 667 return false; 668 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 669 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 670 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 671 return checkBias(SI, TrueProb, FalseProb, 672 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 673 SelectBiasMap); 674 } 675 676 // Returns the instruction at which to hoist the dependent condition values and 677 // insert the CHR branch for a region. This is the terminator branch in the 678 // entry block or the first select in the entry block, if any. 679 static Instruction* getBranchInsertPoint(RegInfo &RI) { 680 Region *R = RI.R; 681 BasicBlock *EntryBB = R->getEntry(); 682 // The hoist point is by default the terminator of the entry block, which is 683 // the same as the branch instruction if RI.HasBranch is true. 684 Instruction *HoistPoint = EntryBB->getTerminator(); 685 for (SelectInst *SI : RI.Selects) { 686 if (SI->getParent() == EntryBB) { 687 // Pick the first select in Selects in the entry block. Note Selects is 688 // sorted in the instruction order within a block (asserted below). 689 HoistPoint = SI; 690 break; 691 } 692 } 693 assert(HoistPoint && "Null HoistPoint"); 694 #ifndef NDEBUG 695 // Check that HoistPoint is the first one in Selects in the entry block, 696 // if any. 697 DenseSet<Instruction *> EntryBlockSelectSet; 698 for (SelectInst *SI : RI.Selects) { 699 if (SI->getParent() == EntryBB) { 700 EntryBlockSelectSet.insert(SI); 701 } 702 } 703 for (Instruction &I : *EntryBB) { 704 if (EntryBlockSelectSet.contains(&I)) { 705 assert(&I == HoistPoint && 706 "HoistPoint must be the first one in Selects"); 707 break; 708 } 709 } 710 #endif 711 return HoistPoint; 712 } 713 714 // Find a CHR scope in the given region. 715 CHRScope * CHR::findScope(Region *R) { 716 CHRScope *Result = nullptr; 717 BasicBlock *Entry = R->getEntry(); 718 BasicBlock *Exit = R->getExit(); // null if top level. 719 assert(Entry && "Entry must not be null"); 720 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 721 "Only top level region has a null exit"); 722 if (Entry) 723 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 724 else 725 CHR_DEBUG(dbgs() << "Entry null\n"); 726 if (Exit) 727 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 728 else 729 CHR_DEBUG(dbgs() << "Exit null\n"); 730 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 731 // to this region). 732 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 733 if (EntryInSubregion) 734 return nullptr; 735 // Exclude loops 736 for (BasicBlock *Pred : predecessors(Entry)) 737 if (R->contains(Pred)) 738 return nullptr; 739 // If any of the basic blocks have address taken, we must skip this region 740 // because we cannot clone basic blocks that have address taken. 741 for (BasicBlock *BB : R->blocks()) { 742 if (BB->hasAddressTaken()) 743 return nullptr; 744 // If we encounter llvm.coro.id, skip this region because if the basic block 745 // is cloned, we end up inserting a token type PHI node to the block with 746 // llvm.coro.begin. 747 // FIXME: This could lead to less optimal codegen, because the region is 748 // excluded, it can prevent CHR from merging adjacent regions into bigger 749 // scope and hoisting more branches. 750 for (Instruction &I : *BB) 751 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 752 if (II->getIntrinsicID() == Intrinsic::coro_id) 753 return nullptr; 754 } 755 756 if (Exit) { 757 // Try to find an if-then block (check if R is an if-then). 758 // if (cond) { 759 // ... 760 // } 761 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 762 if (BI) 763 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 764 else 765 CHR_DEBUG(dbgs() << "BI null\n"); 766 if (BI && BI->isConditional()) { 767 BasicBlock *S0 = BI->getSuccessor(0); 768 BasicBlock *S1 = BI->getSuccessor(1); 769 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 770 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 771 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 772 RegInfo RI(R); 773 RI.HasBranch = checkBiasedBranch( 774 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 775 BranchBiasMap); 776 Result = new CHRScope(RI); 777 Scopes.insert(Result); 778 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 779 ++Stats.NumBranches; 780 if (!RI.HasBranch) { 781 ORE.emit([&]() { 782 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) 783 << "Branch not biased"; 784 }); 785 } 786 } 787 } 788 } 789 { 790 // Try to look for selects in the direct child blocks (as opposed to in 791 // subregions) of R. 792 // ... 793 // if (..) { // Some subregion 794 // ... 795 // } 796 // if (..) { // Some subregion 797 // ... 798 // } 799 // ... 800 // a = cond ? b : c; 801 // ... 802 SmallVector<SelectInst *, 8> Selects; 803 for (RegionNode *E : R->elements()) { 804 if (E->isSubRegion()) 805 continue; 806 // This returns the basic block of E if E is a direct child of R (not a 807 // subregion.) 808 BasicBlock *BB = E->getEntry(); 809 // Need to push in the order to make it easier to find the first Select 810 // later. 811 for (Instruction &I : *BB) { 812 if (auto *SI = dyn_cast<SelectInst>(&I)) { 813 Selects.push_back(SI); 814 ++Stats.NumBranches; 815 } 816 } 817 } 818 if (Selects.size() > 0) { 819 auto AddSelects = [&](RegInfo &RI) { 820 for (auto *SI : Selects) 821 if (checkBiasedSelect(SI, RI.R, 822 TrueBiasedSelectsGlobal, 823 FalseBiasedSelectsGlobal, 824 SelectBiasMap)) 825 RI.Selects.push_back(SI); 826 else 827 ORE.emit([&]() { 828 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) 829 << "Select not biased"; 830 }); 831 }; 832 if (!Result) { 833 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 834 RegInfo RI(R); 835 AddSelects(RI); 836 Result = new CHRScope(RI); 837 Scopes.insert(Result); 838 } else { 839 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 840 AddSelects(Result->RegInfos[0]); 841 } 842 } 843 } 844 845 if (Result) { 846 checkScopeHoistable(Result); 847 } 848 return Result; 849 } 850 851 // Check that any of the branch and the selects in the region could be 852 // hoisted above the the CHR branch insert point (the most dominating of 853 // them, either the branch (at the end of the first block) or the first 854 // select in the first block). If the branch can't be hoisted, drop the 855 // selects in the first blocks. 856 // 857 // For example, for the following scope/region with selects, we want to insert 858 // the merged branch right before the first select in the first/entry block by 859 // hoisting c1, c2, c3, and c4. 860 // 861 // // Branch insert point here. 862 // a = c1 ? b : c; // Select 1 863 // d = c2 ? e : f; // Select 2 864 // if (c3) { // Branch 865 // ... 866 // c4 = foo() // A call. 867 // g = c4 ? h : i; // Select 3 868 // } 869 // 870 // But suppose we can't hoist c4 because it's dependent on the preceding 871 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 872 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 873 void CHR::checkScopeHoistable(CHRScope *Scope) { 874 RegInfo &RI = Scope->RegInfos[0]; 875 Region *R = RI.R; 876 BasicBlock *EntryBB = R->getEntry(); 877 auto *Branch = RI.HasBranch ? 878 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 879 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 880 if (RI.HasBranch || !Selects.empty()) { 881 Instruction *InsertPoint = getBranchInsertPoint(RI); 882 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 883 // Avoid a data dependence from a select or a branch to a(nother) 884 // select. Note no instruction can't data-depend on a branch (a branch 885 // instruction doesn't produce a value). 886 DenseSet<Instruction *> Unhoistables; 887 // Initialize Unhoistables with the selects. 888 for (SelectInst *SI : Selects) { 889 Unhoistables.insert(SI); 890 } 891 // Remove Selects that can't be hoisted. 892 for (auto it = Selects.begin(); it != Selects.end(); ) { 893 SelectInst *SI = *it; 894 if (SI == InsertPoint) { 895 ++it; 896 continue; 897 } 898 DenseMap<Instruction *, bool> Visited; 899 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 900 DT, Unhoistables, nullptr, Visited); 901 if (!IsHoistable) { 902 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 903 ORE.emit([&]() { 904 return OptimizationRemarkMissed(DEBUG_TYPE, 905 "DropUnhoistableSelect", SI) 906 << "Dropped unhoistable select"; 907 }); 908 it = Selects.erase(it); 909 // Since we are dropping the select here, we also drop it from 910 // Unhoistables. 911 Unhoistables.erase(SI); 912 } else 913 ++it; 914 } 915 // Update InsertPoint after potentially removing selects. 916 InsertPoint = getBranchInsertPoint(RI); 917 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 918 if (RI.HasBranch && InsertPoint != Branch) { 919 DenseMap<Instruction *, bool> Visited; 920 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 921 DT, Unhoistables, nullptr, Visited); 922 if (!IsHoistable) { 923 // If the branch isn't hoistable, drop the selects in the entry 924 // block, preferring the branch, which makes the branch the hoist 925 // point. 926 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 927 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 928 CHR_DEBUG( 929 for (SelectInst *SI : Selects) { 930 dbgs() << "SI " << *SI << "\n"; 931 }); 932 for (SelectInst *SI : Selects) { 933 ORE.emit([&]() { 934 return OptimizationRemarkMissed(DEBUG_TYPE, 935 "DropSelectUnhoistableBranch", SI) 936 << "Dropped select due to unhoistable branch"; 937 }); 938 } 939 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) { 940 return SI->getParent() == EntryBB; 941 }); 942 Unhoistables.clear(); 943 InsertPoint = Branch; 944 } 945 } 946 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 947 #ifndef NDEBUG 948 if (RI.HasBranch) { 949 assert(!DT.dominates(Branch, InsertPoint) && 950 "Branch can't be already above the hoist point"); 951 DenseMap<Instruction *, bool> Visited; 952 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 953 DT, Unhoistables, nullptr, Visited) && 954 "checkHoistValue for branch"); 955 } 956 for (auto *SI : Selects) { 957 assert(!DT.dominates(SI, InsertPoint) && 958 "SI can't be already above the hoist point"); 959 DenseMap<Instruction *, bool> Visited; 960 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 961 Unhoistables, nullptr, Visited) && 962 "checkHoistValue for selects"); 963 } 964 CHR_DEBUG(dbgs() << "Result\n"); 965 if (RI.HasBranch) { 966 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 967 } 968 for (auto *SI : Selects) { 969 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 970 } 971 #endif 972 } 973 } 974 975 // Traverse the region tree, find all nested scopes and merge them if possible. 976 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 977 SmallVectorImpl<CHRScope *> &Scopes) { 978 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 979 CHRScope *Result = findScope(R); 980 // Visit subscopes. 981 CHRScope *ConsecutiveSubscope = nullptr; 982 SmallVector<CHRScope *, 8> Subscopes; 983 for (auto It = R->begin(); It != R->end(); ++It) { 984 const std::unique_ptr<Region> &SubR = *It; 985 auto NextIt = std::next(It); 986 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 987 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 988 << "\n"); 989 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 990 if (SubCHRScope) { 991 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 992 } else { 993 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 994 } 995 if (SubCHRScope) { 996 if (!ConsecutiveSubscope) 997 ConsecutiveSubscope = SubCHRScope; 998 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 999 Subscopes.push_back(ConsecutiveSubscope); 1000 ConsecutiveSubscope = SubCHRScope; 1001 } else 1002 ConsecutiveSubscope->append(SubCHRScope); 1003 } else { 1004 if (ConsecutiveSubscope) { 1005 Subscopes.push_back(ConsecutiveSubscope); 1006 } 1007 ConsecutiveSubscope = nullptr; 1008 } 1009 } 1010 if (ConsecutiveSubscope) { 1011 Subscopes.push_back(ConsecutiveSubscope); 1012 } 1013 for (CHRScope *Sub : Subscopes) { 1014 if (Result) { 1015 // Combine it with the parent. 1016 Result->addSub(Sub); 1017 } else { 1018 // Push Subscopes as they won't be combined with the parent. 1019 Scopes.push_back(Sub); 1020 } 1021 } 1022 return Result; 1023 } 1024 1025 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1026 DenseSet<Value *> ConditionValues; 1027 if (RI.HasBranch) { 1028 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1029 ConditionValues.insert(BI->getCondition()); 1030 } 1031 for (SelectInst *SI : RI.Selects) { 1032 ConditionValues.insert(SI->getCondition()); 1033 } 1034 return ConditionValues; 1035 } 1036 1037 1038 // Determine whether to split a scope depending on the sets of the branch 1039 // condition values of the previous region and the current region. We split 1040 // (return true) it if 1) the condition values of the inner/lower scope can't be 1041 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1042 // values have an empty intersection (because the combined branch conditions 1043 // won't probably lead to a simpler combined condition). 1044 static bool shouldSplit(Instruction *InsertPoint, 1045 DenseSet<Value *> &PrevConditionValues, 1046 DenseSet<Value *> &ConditionValues, 1047 DominatorTree &DT, 1048 DenseSet<Instruction *> &Unhoistables) { 1049 assert(InsertPoint && "Null InsertPoint"); 1050 CHR_DEBUG( 1051 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1052 for (Value *V : PrevConditionValues) { 1053 dbgs() << *V << ", "; 1054 } 1055 dbgs() << " ConditionValues "; 1056 for (Value *V : ConditionValues) { 1057 dbgs() << *V << ", "; 1058 } 1059 dbgs() << "\n"); 1060 // If any of Bases isn't hoistable to the hoist point, split. 1061 for (Value *V : ConditionValues) { 1062 DenseMap<Instruction *, bool> Visited; 1063 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { 1064 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1065 return true; // Not hoistable, split. 1066 } 1067 } 1068 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1069 // unnecessary splits at scopes with no branch/selects. If 1070 // PrevConditionValues and ConditionValues don't intersect at all, split. 1071 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1072 // Use std::set as DenseSet doesn't work with set_intersection. 1073 std::set<Value *> PrevBases, Bases; 1074 DenseMap<Value *, std::set<Value *>> Visited; 1075 for (Value *V : PrevConditionValues) { 1076 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1077 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1078 } 1079 for (Value *V : ConditionValues) { 1080 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1081 Bases.insert(BaseValues.begin(), BaseValues.end()); 1082 } 1083 CHR_DEBUG( 1084 dbgs() << "PrevBases "; 1085 for (Value *V : PrevBases) { 1086 dbgs() << *V << ", "; 1087 } 1088 dbgs() << " Bases "; 1089 for (Value *V : Bases) { 1090 dbgs() << *V << ", "; 1091 } 1092 dbgs() << "\n"); 1093 std::vector<Value *> Intersection; 1094 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(), 1095 Bases.end(), std::back_inserter(Intersection)); 1096 if (Intersection.empty()) { 1097 // Empty intersection, split. 1098 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1099 return true; 1100 } 1101 } 1102 CHR_DEBUG(dbgs() << "No split\n"); 1103 return false; // Don't split. 1104 } 1105 1106 static void getSelectsInScope(CHRScope *Scope, 1107 DenseSet<Instruction *> &Output) { 1108 for (RegInfo &RI : Scope->RegInfos) 1109 for (SelectInst *SI : RI.Selects) 1110 Output.insert(SI); 1111 for (CHRScope *Sub : Scope->Subs) 1112 getSelectsInScope(Sub, Output); 1113 } 1114 1115 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1116 SmallVectorImpl<CHRScope *> &Output) { 1117 for (CHRScope *Scope : Input) { 1118 assert(!Scope->BranchInsertPoint && 1119 "BranchInsertPoint must not be set"); 1120 DenseSet<Instruction *> Unhoistables; 1121 getSelectsInScope(Scope, Unhoistables); 1122 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1123 } 1124 #ifndef NDEBUG 1125 for (CHRScope *Scope : Output) { 1126 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1127 } 1128 #endif 1129 } 1130 1131 SmallVector<CHRScope *, 8> CHR::splitScope( 1132 CHRScope *Scope, 1133 CHRScope *Outer, 1134 DenseSet<Value *> *OuterConditionValues, 1135 Instruction *OuterInsertPoint, 1136 SmallVectorImpl<CHRScope *> &Output, 1137 DenseSet<Instruction *> &Unhoistables) { 1138 if (Outer) { 1139 assert(OuterConditionValues && "Null OuterConditionValues"); 1140 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1141 } 1142 bool PrevSplitFromOuter = true; 1143 DenseSet<Value *> PrevConditionValues; 1144 Instruction *PrevInsertPoint = nullptr; 1145 SmallVector<CHRScope *, 8> Splits; 1146 SmallVector<bool, 8> SplitsSplitFromOuter; 1147 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1148 SmallVector<Instruction *, 8> SplitsInsertPoints; 1149 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1150 for (RegInfo &RI : RegInfos) { 1151 Instruction *InsertPoint = getBranchInsertPoint(RI); 1152 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1153 CHR_DEBUG( 1154 dbgs() << "ConditionValues "; 1155 for (Value *V : ConditionValues) { 1156 dbgs() << *V << ", "; 1157 } 1158 dbgs() << "\n"); 1159 if (RI.R == RegInfos[0].R) { 1160 // First iteration. Check to see if we should split from the outer. 1161 if (Outer) { 1162 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1163 CHR_DEBUG(dbgs() << "Should split from outer at " 1164 << RI.R->getNameStr() << "\n"); 1165 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1166 ConditionValues, DT, Unhoistables)) { 1167 PrevConditionValues = ConditionValues; 1168 PrevInsertPoint = InsertPoint; 1169 ORE.emit([&]() { 1170 return OptimizationRemarkMissed(DEBUG_TYPE, 1171 "SplitScopeFromOuter", 1172 RI.R->getEntry()->getTerminator()) 1173 << "Split scope from outer due to unhoistable branch/select " 1174 << "and/or lack of common condition values"; 1175 }); 1176 } else { 1177 // Not splitting from the outer. Use the outer bases and insert 1178 // point. Union the bases. 1179 PrevSplitFromOuter = false; 1180 PrevConditionValues = *OuterConditionValues; 1181 PrevConditionValues.insert(ConditionValues.begin(), 1182 ConditionValues.end()); 1183 PrevInsertPoint = OuterInsertPoint; 1184 } 1185 } else { 1186 CHR_DEBUG(dbgs() << "Outer null\n"); 1187 PrevConditionValues = ConditionValues; 1188 PrevInsertPoint = InsertPoint; 1189 } 1190 } else { 1191 CHR_DEBUG(dbgs() << "Should split from prev at " 1192 << RI.R->getNameStr() << "\n"); 1193 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1194 DT, Unhoistables)) { 1195 CHRScope *Tail = Scope->split(RI.R); 1196 Scopes.insert(Tail); 1197 Splits.push_back(Scope); 1198 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1199 SplitsConditionValues.push_back(PrevConditionValues); 1200 SplitsInsertPoints.push_back(PrevInsertPoint); 1201 Scope = Tail; 1202 PrevConditionValues = ConditionValues; 1203 PrevInsertPoint = InsertPoint; 1204 PrevSplitFromOuter = true; 1205 ORE.emit([&]() { 1206 return OptimizationRemarkMissed(DEBUG_TYPE, 1207 "SplitScopeFromPrev", 1208 RI.R->getEntry()->getTerminator()) 1209 << "Split scope from previous due to unhoistable branch/select " 1210 << "and/or lack of common condition values"; 1211 }); 1212 } else { 1213 // Not splitting. Union the bases. Keep the hoist point. 1214 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1215 } 1216 } 1217 } 1218 Splits.push_back(Scope); 1219 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1220 SplitsConditionValues.push_back(PrevConditionValues); 1221 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1222 SplitsInsertPoints.push_back(PrevInsertPoint); 1223 assert(Splits.size() == SplitsConditionValues.size() && 1224 Splits.size() == SplitsSplitFromOuter.size() && 1225 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1226 for (size_t I = 0; I < Splits.size(); ++I) { 1227 CHRScope *Split = Splits[I]; 1228 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1229 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1230 SmallVector<CHRScope *, 8> NewSubs; 1231 DenseSet<Instruction *> SplitUnhoistables; 1232 getSelectsInScope(Split, SplitUnhoistables); 1233 for (CHRScope *Sub : Split->Subs) { 1234 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1235 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1236 SplitUnhoistables); 1237 llvm::append_range(NewSubs, SubSplits); 1238 } 1239 Split->Subs = NewSubs; 1240 } 1241 SmallVector<CHRScope *, 8> Result; 1242 for (size_t I = 0; I < Splits.size(); ++I) { 1243 CHRScope *Split = Splits[I]; 1244 if (SplitsSplitFromOuter[I]) { 1245 // Split from the outer. 1246 Output.push_back(Split); 1247 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1248 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1249 << "\n"); 1250 } else { 1251 // Connected to the outer. 1252 Result.push_back(Split); 1253 } 1254 } 1255 if (!Outer) 1256 assert(Result.empty() && 1257 "If no outer (top-level), must return no nested ones"); 1258 return Result; 1259 } 1260 1261 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1262 for (CHRScope *Scope : Scopes) { 1263 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1264 classifyBiasedScopes(Scope, Scope); 1265 CHR_DEBUG( 1266 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1267 dbgs() << "TrueBiasedRegions "; 1268 for (Region *R : Scope->TrueBiasedRegions) { 1269 dbgs() << R->getNameStr() << ", "; 1270 } 1271 dbgs() << "\n"; 1272 dbgs() << "FalseBiasedRegions "; 1273 for (Region *R : Scope->FalseBiasedRegions) { 1274 dbgs() << R->getNameStr() << ", "; 1275 } 1276 dbgs() << "\n"; 1277 dbgs() << "TrueBiasedSelects "; 1278 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1279 dbgs() << *SI << ", "; 1280 } 1281 dbgs() << "\n"; 1282 dbgs() << "FalseBiasedSelects "; 1283 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1284 dbgs() << *SI << ", "; 1285 } 1286 dbgs() << "\n";); 1287 } 1288 } 1289 1290 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1291 for (RegInfo &RI : Scope->RegInfos) { 1292 if (RI.HasBranch) { 1293 Region *R = RI.R; 1294 if (TrueBiasedRegionsGlobal.contains(R)) 1295 OutermostScope->TrueBiasedRegions.insert(R); 1296 else if (FalseBiasedRegionsGlobal.contains(R)) 1297 OutermostScope->FalseBiasedRegions.insert(R); 1298 else 1299 llvm_unreachable("Must be biased"); 1300 } 1301 for (SelectInst *SI : RI.Selects) { 1302 if (TrueBiasedSelectsGlobal.contains(SI)) 1303 OutermostScope->TrueBiasedSelects.insert(SI); 1304 else if (FalseBiasedSelectsGlobal.contains(SI)) 1305 OutermostScope->FalseBiasedSelects.insert(SI); 1306 else 1307 llvm_unreachable("Must be biased"); 1308 } 1309 } 1310 for (CHRScope *Sub : Scope->Subs) { 1311 classifyBiasedScopes(Sub, OutermostScope); 1312 } 1313 } 1314 1315 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1316 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1317 Scope->FalseBiasedRegions.size() + 1318 Scope->TrueBiasedSelects.size() + 1319 Scope->FalseBiasedSelects.size(); 1320 return NumBiased >= CHRMergeThreshold; 1321 } 1322 1323 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1324 SmallVectorImpl<CHRScope *> &Output) { 1325 for (CHRScope *Scope : Input) { 1326 // Filter out the ones with only one region and no subs. 1327 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1328 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1329 << Scope->TrueBiasedRegions.size() 1330 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1331 << " true-selects " << Scope->TrueBiasedSelects.size() 1332 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1333 ORE.emit([&]() { 1334 return OptimizationRemarkMissed( 1335 DEBUG_TYPE, 1336 "DropScopeWithOneBranchOrSelect", 1337 Scope->RegInfos[0].R->getEntry()->getTerminator()) 1338 << "Drop scope with < " 1339 << ore::NV("CHRMergeThreshold", CHRMergeThreshold) 1340 << " biased branch(es) or select(s)"; 1341 }); 1342 continue; 1343 } 1344 Output.push_back(Scope); 1345 } 1346 } 1347 1348 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1349 SmallVectorImpl<CHRScope *> &Output) { 1350 for (CHRScope *Scope : Input) { 1351 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1352 "Empty"); 1353 setCHRRegions(Scope, Scope); 1354 Output.push_back(Scope); 1355 CHR_DEBUG( 1356 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1357 for (auto pair : Scope->HoistStopMap) { 1358 Region *R = pair.first; 1359 dbgs() << "Region " << R->getNameStr() << "\n"; 1360 for (Instruction *I : pair.second) { 1361 dbgs() << "HoistStop " << *I << "\n"; 1362 } 1363 } 1364 dbgs() << "CHRRegions" << "\n"; 1365 for (RegInfo &RI : Scope->CHRRegions) { 1366 dbgs() << RI.R->getNameStr() << "\n"; 1367 }); 1368 } 1369 } 1370 1371 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1372 DenseSet<Instruction *> Unhoistables; 1373 // Put the biased selects in Unhoistables because they should stay where they 1374 // are and constant-folded after CHR (in case one biased select or a branch 1375 // can depend on another biased select.) 1376 for (RegInfo &RI : Scope->RegInfos) { 1377 for (SelectInst *SI : RI.Selects) { 1378 Unhoistables.insert(SI); 1379 } 1380 } 1381 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1382 for (RegInfo &RI : Scope->RegInfos) { 1383 Region *R = RI.R; 1384 DenseSet<Instruction *> HoistStops; 1385 bool IsHoisted = false; 1386 if (RI.HasBranch) { 1387 assert((OutermostScope->TrueBiasedRegions.contains(R) || 1388 OutermostScope->FalseBiasedRegions.contains(R)) && 1389 "Must be truthy or falsy"); 1390 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1391 // Note checkHoistValue fills in HoistStops. 1392 DenseMap<Instruction *, bool> Visited; 1393 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1394 Unhoistables, &HoistStops, Visited); 1395 assert(IsHoistable && "Must be hoistable"); 1396 (void)(IsHoistable); // Unused in release build 1397 IsHoisted = true; 1398 } 1399 for (SelectInst *SI : RI.Selects) { 1400 assert((OutermostScope->TrueBiasedSelects.contains(SI) || 1401 OutermostScope->FalseBiasedSelects.contains(SI)) && 1402 "Must be true or false biased"); 1403 // Note checkHoistValue fills in HoistStops. 1404 DenseMap<Instruction *, bool> Visited; 1405 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1406 Unhoistables, &HoistStops, Visited); 1407 assert(IsHoistable && "Must be hoistable"); 1408 (void)(IsHoistable); // Unused in release build 1409 IsHoisted = true; 1410 } 1411 if (IsHoisted) { 1412 OutermostScope->CHRRegions.push_back(RI); 1413 OutermostScope->HoistStopMap[R] = HoistStops; 1414 } 1415 } 1416 for (CHRScope *Sub : Scope->Subs) 1417 setCHRRegions(Sub, OutermostScope); 1418 } 1419 1420 static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1421 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1422 } 1423 1424 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1425 SmallVectorImpl<CHRScope *> &Output) { 1426 Output.resize(Input.size()); 1427 llvm::copy(Input, Output.begin()); 1428 llvm::stable_sort(Output, CHRScopeSorter); 1429 } 1430 1431 // Return true if V is already hoisted or was hoisted (along with its operands) 1432 // to the insert point. 1433 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1434 HoistStopMapTy &HoistStopMap, 1435 DenseSet<Instruction *> &HoistedSet, 1436 DenseSet<PHINode *> &TrivialPHIs, 1437 DominatorTree &DT) { 1438 auto IT = HoistStopMap.find(R); 1439 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1440 DenseSet<Instruction *> &HoistStops = IT->second; 1441 if (auto *I = dyn_cast<Instruction>(V)) { 1442 if (I == HoistPoint) 1443 return; 1444 if (HoistStops.count(I)) 1445 return; 1446 if (auto *PN = dyn_cast<PHINode>(I)) 1447 if (TrivialPHIs.count(PN)) 1448 // The trivial phi inserted by the previous CHR scope could replace a 1449 // non-phi in HoistStops. Note that since this phi is at the exit of a 1450 // previous CHR scope, which dominates this scope, it's safe to stop 1451 // hoisting there. 1452 return; 1453 if (HoistedSet.count(I)) 1454 // Already hoisted, return. 1455 return; 1456 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1457 assert(DT.getNode(I->getParent()) && "DT must contain I's block"); 1458 assert(DT.getNode(HoistPoint->getParent()) && 1459 "DT must contain HoistPoint block"); 1460 if (DT.dominates(I, HoistPoint)) 1461 // We are already above the hoist point. Stop here. This may be necessary 1462 // when multiple scopes would independently hoist the same 1463 // instruction. Since an outer (dominating) scope would hoist it to its 1464 // entry before an inner (dominated) scope would to its entry, the inner 1465 // scope may see the instruction already hoisted, in which case it 1466 // potentially wrong for the inner scope to hoist it and could cause bad 1467 // IR (non-dominating def), but safe to skip hoisting it instead because 1468 // it's already in a block that dominates the inner scope. 1469 return; 1470 for (Value *Op : I->operands()) { 1471 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); 1472 } 1473 I->moveBefore(HoistPoint); 1474 HoistedSet.insert(I); 1475 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1476 } 1477 } 1478 1479 // Hoist the dependent condition values of the branches and the selects in the 1480 // scope to the insert point. 1481 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1482 DenseSet<PHINode *> &TrivialPHIs, 1483 DominatorTree &DT) { 1484 DenseSet<Instruction *> HoistedSet; 1485 for (const RegInfo &RI : Scope->CHRRegions) { 1486 Region *R = RI.R; 1487 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1488 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1489 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1490 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1491 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1492 HoistedSet, TrivialPHIs, DT); 1493 } 1494 for (SelectInst *SI : RI.Selects) { 1495 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1496 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1497 if (!(IsTrueBiased || IsFalseBiased)) 1498 continue; 1499 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1500 HoistedSet, TrivialPHIs, DT); 1501 } 1502 } 1503 } 1504 1505 // Negate the predicate if an ICmp if it's used only by branches or selects by 1506 // swapping the operands of the branches or the selects. Returns true if success. 1507 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1508 Instruction *ExcludedUser, 1509 CHRScope *Scope) { 1510 for (User *U : ICmp->users()) { 1511 if (U == ExcludedUser) 1512 continue; 1513 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1514 continue; 1515 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1516 continue; 1517 return false; 1518 } 1519 for (User *U : ICmp->users()) { 1520 if (U == ExcludedUser) 1521 continue; 1522 if (auto *BI = dyn_cast<BranchInst>(U)) { 1523 assert(BI->isConditional() && "Must be conditional"); 1524 BI->swapSuccessors(); 1525 // Don't need to swap this in terms of 1526 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1527 // mean whehter the branch is likely go into the if-then rather than 1528 // successor0/successor1 and because we can tell which edge is the then or 1529 // the else one by comparing the destination to the region exit block. 1530 continue; 1531 } 1532 if (auto *SI = dyn_cast<SelectInst>(U)) { 1533 // Swap operands 1534 SI->swapValues(); 1535 SI->swapProfMetadata(); 1536 if (Scope->TrueBiasedSelects.count(SI)) { 1537 assert(!Scope->FalseBiasedSelects.contains(SI) && 1538 "Must not be already in"); 1539 Scope->FalseBiasedSelects.insert(SI); 1540 } else if (Scope->FalseBiasedSelects.count(SI)) { 1541 assert(!Scope->TrueBiasedSelects.contains(SI) && 1542 "Must not be already in"); 1543 Scope->TrueBiasedSelects.insert(SI); 1544 } 1545 continue; 1546 } 1547 llvm_unreachable("Must be a branch or a select"); 1548 } 1549 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1550 return true; 1551 } 1552 1553 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1554 // for a value that's defined in the scope but used outside it (meaning it's 1555 // alive at the exit block). 1556 static void insertTrivialPHIs(CHRScope *Scope, 1557 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1558 DenseSet<PHINode *> &TrivialPHIs) { 1559 SmallSetVector<BasicBlock *, 8> BlocksInScope; 1560 for (RegInfo &RI : Scope->RegInfos) { 1561 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1562 // sub-Scopes. 1563 BlocksInScope.insert(BB); 1564 } 1565 } 1566 CHR_DEBUG({ 1567 dbgs() << "Inserting redundant phis\n"; 1568 for (BasicBlock *BB : BlocksInScope) 1569 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1570 }); 1571 for (BasicBlock *BB : BlocksInScope) { 1572 for (Instruction &I : *BB) { 1573 SmallVector<Instruction *, 8> Users; 1574 for (User *U : I.users()) { 1575 if (auto *UI = dyn_cast<Instruction>(U)) { 1576 if (!BlocksInScope.contains(UI->getParent()) && 1577 // Unless there's already a phi for I at the exit block. 1578 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1579 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1580 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1581 Users.push_back(UI); 1582 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1583 // There's a loop backedge from a block that's dominated by this 1584 // scope to the entry block. 1585 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1586 CHR_DEBUG(dbgs() 1587 << "Used at entry block (for a back edge) by a phi user " 1588 << *UI << "\n"); 1589 Users.push_back(UI); 1590 } 1591 } 1592 } 1593 if (Users.size() > 0) { 1594 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1595 // ExitBlock. Replace I with the new phi in UI unless UI is another 1596 // phi at ExitBlock. 1597 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "", 1598 &ExitBlock->front()); 1599 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1600 PN->addIncoming(&I, Pred); 1601 } 1602 TrivialPHIs.insert(PN); 1603 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1604 for (Instruction *UI : Users) { 1605 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1606 if (UI->getOperand(J) == &I) { 1607 UI->setOperand(J, PN); 1608 } 1609 } 1610 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1611 } 1612 } 1613 } 1614 } 1615 } 1616 1617 // Assert that all the CHR regions of the scope have a biased branch or select. 1618 static void LLVM_ATTRIBUTE_UNUSED 1619 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1620 #ifndef NDEBUG 1621 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1622 if (Scope->TrueBiasedRegions.count(RI.R) || 1623 Scope->FalseBiasedRegions.count(RI.R)) 1624 return true; 1625 for (SelectInst *SI : RI.Selects) 1626 if (Scope->TrueBiasedSelects.count(SI) || 1627 Scope->FalseBiasedSelects.count(SI)) 1628 return true; 1629 return false; 1630 }; 1631 for (RegInfo &RI : Scope->CHRRegions) { 1632 assert(HasBiasedBranchOrSelect(RI, Scope) && 1633 "Must have biased branch or select"); 1634 } 1635 #endif 1636 } 1637 1638 // Assert that all the condition values of the biased branches and selects have 1639 // been hoisted to the pre-entry block or outside of the scope. 1640 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1641 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1642 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1643 for (RegInfo &RI : Scope->CHRRegions) { 1644 Region *R = RI.R; 1645 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1646 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1647 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1648 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1649 Value *V = BI->getCondition(); 1650 CHR_DEBUG(dbgs() << *V << "\n"); 1651 if (auto *I = dyn_cast<Instruction>(V)) { 1652 (void)(I); // Unused in release build. 1653 assert((I->getParent() == PreEntryBlock || 1654 !Scope->contains(I)) && 1655 "Must have been hoisted to PreEntryBlock or outside the scope"); 1656 } 1657 } 1658 for (SelectInst *SI : RI.Selects) { 1659 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1660 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1661 if (!(IsTrueBiased || IsFalseBiased)) 1662 continue; 1663 Value *V = SI->getCondition(); 1664 CHR_DEBUG(dbgs() << *V << "\n"); 1665 if (auto *I = dyn_cast<Instruction>(V)) { 1666 (void)(I); // Unused in release build. 1667 assert((I->getParent() == PreEntryBlock || 1668 !Scope->contains(I)) && 1669 "Must have been hoisted to PreEntryBlock or outside the scope"); 1670 } 1671 } 1672 } 1673 } 1674 1675 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1676 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1677 1678 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1679 1680 for (RegInfo &RI : Scope->RegInfos) { 1681 const Region *R = RI.R; 1682 unsigned Duplication = getRegionDuplicationCount(R); 1683 CHR_DEBUG(dbgs() << "Dup count for R=" << R << " is " << Duplication 1684 << "\n"); 1685 if (Duplication >= CHRDupThreshsold) { 1686 CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication 1687 << " for this region"); 1688 ORE.emit([&]() { 1689 return OptimizationRemarkMissed(DEBUG_TYPE, "DupThresholdReached", 1690 R->getEntry()->getTerminator()) 1691 << "Reached the duplication threshold for the region"; 1692 }); 1693 return; 1694 } 1695 } 1696 for (RegInfo &RI : Scope->RegInfos) { 1697 DuplicationCount[RI.R]++; 1698 } 1699 1700 Region *FirstRegion = Scope->RegInfos[0].R; 1701 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1702 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1703 BasicBlock *ExitBlock = LastRegion->getExit(); 1704 std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1705 1706 if (ExitBlock) { 1707 // Insert a trivial phi at the exit block (where the CHR hot path and the 1708 // cold path merges) for a value that's defined in the scope but used 1709 // outside it (meaning it's alive at the exit block). We will add the 1710 // incoming values for the CHR cold paths to it below. Without this, we'd 1711 // miss updating phi's for such values unless there happens to already be a 1712 // phi for that value there. 1713 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1714 } 1715 1716 // Split the entry block of the first region. The new block becomes the new 1717 // entry block of the first region. The old entry block becomes the block to 1718 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1719 // through the split, we update the entry of the first region after the split, 1720 // and Region only points to the entry and the exit blocks, rather than 1721 // keeping everything in a list or set, the blocks membership and the 1722 // entry/exit blocks of the region are still valid after the split. 1723 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1724 << " at " << *Scope->BranchInsertPoint << "\n"); 1725 BasicBlock *NewEntryBlock = 1726 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1727 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1728 "NewEntryBlock's only pred must be EntryBlock"); 1729 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1730 BasicBlock *PreEntryBlock = EntryBlock; 1731 1732 ValueToValueMapTy VMap; 1733 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1734 // hot path (originals) and a cold path (clones) and update the PHIs at the 1735 // exit block. 1736 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1737 1738 // Replace the old (placeholder) branch with the new (merged) conditional 1739 // branch. 1740 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1741 NewEntryBlock, VMap); 1742 1743 #ifndef NDEBUG 1744 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1745 #endif 1746 1747 // Hoist the conditional values of the branches/selects. 1748 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); 1749 1750 #ifndef NDEBUG 1751 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1752 #endif 1753 1754 // Create the combined branch condition and constant-fold the branches/selects 1755 // in the hot path. 1756 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1757 ProfileCount.value_or(0)); 1758 } 1759 1760 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1761 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1762 // at the exit block. 1763 void CHR::cloneScopeBlocks(CHRScope *Scope, 1764 BasicBlock *PreEntryBlock, 1765 BasicBlock *ExitBlock, 1766 Region *LastRegion, 1767 ValueToValueMapTy &VMap) { 1768 // Clone all the blocks. The original blocks will be the hot-path 1769 // CHR-optimized code and the cloned blocks will be the original unoptimized 1770 // code. This is so that the block pointers from the 1771 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1772 // which CHR should apply to. 1773 SmallVector<BasicBlock*, 8> NewBlocks; 1774 for (RegInfo &RI : Scope->RegInfos) 1775 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1776 // sub-Scopes. 1777 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1778 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1779 NewBlocks.push_back(NewBB); 1780 VMap[BB] = NewBB; 1781 } 1782 1783 // Place the cloned blocks right after the original blocks (right before the 1784 // exit block of.) 1785 if (ExitBlock) 1786 F.splice(ExitBlock->getIterator(), &F, NewBlocks[0]->getIterator(), 1787 F.end()); 1788 1789 // Update the cloned blocks/instructions to refer to themselves. 1790 for (BasicBlock *NewBB : NewBlocks) 1791 for (Instruction &I : *NewBB) 1792 RemapInstruction(&I, VMap, 1793 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1794 1795 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1796 // the top-level region but we don't need to add PHIs. The trivial PHIs 1797 // inserted above will be updated here. 1798 if (ExitBlock) 1799 for (PHINode &PN : ExitBlock->phis()) 1800 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1801 ++I) { 1802 BasicBlock *Pred = PN.getIncomingBlock(I); 1803 if (LastRegion->contains(Pred)) { 1804 Value *V = PN.getIncomingValue(I); 1805 auto It = VMap.find(V); 1806 if (It != VMap.end()) V = It->second; 1807 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1808 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1809 } 1810 } 1811 } 1812 1813 // A helper for transformScope. Replace the old (placeholder) branch with the 1814 // new (merged) conditional branch. 1815 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1816 BasicBlock *EntryBlock, 1817 BasicBlock *NewEntryBlock, 1818 ValueToValueMapTy &VMap) { 1819 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1820 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1821 "SplitBlock did not work correctly!"); 1822 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1823 "NewEntryBlock's only pred must be EntryBlock"); 1824 assert(VMap.find(NewEntryBlock) != VMap.end() && 1825 "NewEntryBlock must have been copied"); 1826 OldBR->dropAllReferences(); 1827 OldBR->eraseFromParent(); 1828 // The true predicate is a placeholder. It will be replaced later in 1829 // fixupBranchesAndSelects(). 1830 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1831 cast<BasicBlock>(VMap[NewEntryBlock]), 1832 ConstantInt::getTrue(F.getContext())); 1833 NewBR->insertInto(PreEntryBlock, PreEntryBlock->end()); 1834 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1835 "NewEntryBlock's only pred must be EntryBlock"); 1836 return NewBR; 1837 } 1838 1839 // A helper for transformScopes. Create the combined branch condition and 1840 // constant-fold the branches/selects in the hot path. 1841 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1842 BasicBlock *PreEntryBlock, 1843 BranchInst *MergedBR, 1844 uint64_t ProfileCount) { 1845 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1846 BranchProbability CHRBranchBias(1, 1); 1847 uint64_t NumCHRedBranches = 0; 1848 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1849 for (RegInfo &RI : Scope->CHRRegions) { 1850 Region *R = RI.R; 1851 if (RI.HasBranch) { 1852 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1853 ++NumCHRedBranches; 1854 } 1855 for (SelectInst *SI : RI.Selects) { 1856 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1857 ++NumCHRedBranches; 1858 } 1859 } 1860 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1861 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1862 ORE.emit([&]() { 1863 return OptimizationRemark(DEBUG_TYPE, 1864 "CHR", 1865 // Refer to the hot (original) path 1866 MergedBR->getSuccessor(0)->getTerminator()) 1867 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) 1868 << " branches or selects"; 1869 }); 1870 MergedBR->setCondition(MergedCondition); 1871 uint32_t Weights[] = { 1872 static_cast<uint32_t>(CHRBranchBias.scale(1000)), 1873 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)), 1874 }; 1875 MDBuilder MDB(F.getContext()); 1876 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1877 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1878 << "\n"); 1879 } 1880 1881 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1882 // and constant-fold a branch in the hot path. 1883 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1884 IRBuilder<> &IRB, 1885 Value *&MergedCondition, 1886 BranchProbability &CHRBranchBias) { 1887 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1888 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1889 "Must be truthy or falsy"); 1890 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1891 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1892 "Must be in the bias map"); 1893 BranchProbability Bias = BranchBiasMap[R]; 1894 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1895 // Take the min. 1896 if (CHRBranchBias > Bias) 1897 CHRBranchBias = Bias; 1898 BasicBlock *IfThen = BI->getSuccessor(1); 1899 BasicBlock *IfElse = BI->getSuccessor(0); 1900 BasicBlock *RegionExitBlock = R->getExit(); 1901 assert(RegionExitBlock && "Null ExitBlock"); 1902 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1903 IfThen != IfElse && "Invariant from findScopes"); 1904 if (IfThen == RegionExitBlock) { 1905 // Swap them so that IfThen means going into it and IfElse means skipping 1906 // it. 1907 std::swap(IfThen, IfElse); 1908 } 1909 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1910 << " IfElse " << IfElse->getName() << "\n"); 1911 Value *Cond = BI->getCondition(); 1912 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1913 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1914 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1915 MergedCondition); 1916 // Constant-fold the branch at ClonedEntryBlock. 1917 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1918 "The successor shouldn't change"); 1919 Value *NewCondition = ConditionTrue ? 1920 ConstantInt::getTrue(F.getContext()) : 1921 ConstantInt::getFalse(F.getContext()); 1922 BI->setCondition(NewCondition); 1923 } 1924 1925 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1926 // and constant-fold a select in the hot path. 1927 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1928 IRBuilder<> &IRB, 1929 Value *&MergedCondition, 1930 BranchProbability &CHRBranchBias) { 1931 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1932 assert((IsTrueBiased || 1933 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1934 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1935 "Must be in the bias map"); 1936 BranchProbability Bias = SelectBiasMap[SI]; 1937 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1938 // Take the min. 1939 if (CHRBranchBias > Bias) 1940 CHRBranchBias = Bias; 1941 Value *Cond = SI->getCondition(); 1942 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1943 MergedCondition); 1944 Value *NewCondition = IsTrueBiased ? 1945 ConstantInt::getTrue(F.getContext()) : 1946 ConstantInt::getFalse(F.getContext()); 1947 SI->setCondition(NewCondition); 1948 } 1949 1950 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1951 // condition. 1952 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1953 Instruction *BranchOrSelect, CHRScope *Scope, 1954 IRBuilder<> &IRB, Value *&MergedCondition) { 1955 if (!IsTrueBiased) { 1956 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1957 // branch, negate the icmp predicate and swap the branch targets and avoid 1958 // inserting an Xor to negate Cond. 1959 auto *ICmp = dyn_cast<ICmpInst>(Cond); 1960 if (!ICmp || 1961 !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) 1962 Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond); 1963 } 1964 1965 // Select conditions can be poison, while branching on poison is immediate 1966 // undefined behavior. As such, we need to freeze potentially poisonous 1967 // conditions derived from selects. 1968 if (isa<SelectInst>(BranchOrSelect) && 1969 !isGuaranteedNotToBeUndefOrPoison(Cond)) 1970 Cond = IRB.CreateFreeze(Cond); 1971 1972 // Use logical and to avoid propagating poison from later conditions. 1973 MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond); 1974 } 1975 1976 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1977 unsigned I = 0; 1978 DenseSet<PHINode *> TrivialPHIs; 1979 for (CHRScope *Scope : CHRScopes) { 1980 transformScopes(Scope, TrivialPHIs); 1981 CHR_DEBUG( 1982 std::ostringstream oss; 1983 oss << " after transformScopes " << I++; 1984 dumpIR(F, oss.str().c_str(), nullptr)); 1985 (void)I; 1986 } 1987 } 1988 1989 static void LLVM_ATTRIBUTE_UNUSED 1990 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1991 dbgs() << Label << " " << Scopes.size() << "\n"; 1992 for (CHRScope *Scope : Scopes) { 1993 dbgs() << *Scope << "\n"; 1994 } 1995 } 1996 1997 bool CHR::run() { 1998 if (!shouldApply(F, PSI)) 1999 return false; 2000 2001 CHR_DEBUG(dumpIR(F, "before", nullptr)); 2002 2003 bool Changed = false; 2004 { 2005 CHR_DEBUG( 2006 dbgs() << "RegionInfo:\n"; 2007 RI.print(dbgs())); 2008 2009 // Recursively traverse the region tree and find regions that have biased 2010 // branches and/or selects and create scopes. 2011 SmallVector<CHRScope *, 8> AllScopes; 2012 findScopes(AllScopes); 2013 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 2014 2015 // Split the scopes if 1) the conditional values of the biased 2016 // branches/selects of the inner/lower scope can't be hoisted up to the 2017 // outermost/uppermost scope entry, or 2) the condition values of the biased 2018 // branches/selects in a scope (including subscopes) don't share at least 2019 // one common value. 2020 SmallVector<CHRScope *, 8> SplitScopes; 2021 splitScopes(AllScopes, SplitScopes); 2022 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 2023 2024 // After splitting, set the biased regions and selects of a scope (a tree 2025 // root) that include those of the subscopes. 2026 classifyBiasedScopes(SplitScopes); 2027 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 2028 2029 // Filter out the scopes that has only one biased region or select (CHR 2030 // isn't useful in such a case). 2031 SmallVector<CHRScope *, 8> FilteredScopes; 2032 filterScopes(SplitScopes, FilteredScopes); 2033 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 2034 2035 // Set the regions to be CHR'ed and their hoist stops for each scope. 2036 SmallVector<CHRScope *, 8> SetScopes; 2037 setCHRRegions(FilteredScopes, SetScopes); 2038 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 2039 2040 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 2041 // ones. We need to apply CHR from outer to inner so that we apply CHR only 2042 // to the hot path, rather than both hot and cold paths. 2043 SmallVector<CHRScope *, 8> SortedScopes; 2044 sortScopes(SetScopes, SortedScopes); 2045 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 2046 2047 CHR_DEBUG( 2048 dbgs() << "RegionInfo:\n"; 2049 RI.print(dbgs())); 2050 2051 // Apply the CHR transformation. 2052 if (!SortedScopes.empty()) { 2053 transformScopes(SortedScopes); 2054 Changed = true; 2055 } 2056 } 2057 2058 if (Changed) { 2059 CHR_DEBUG(dumpIR(F, "after", &Stats)); 2060 ORE.emit([&]() { 2061 return OptimizationRemark(DEBUG_TYPE, "Stats", &F) 2062 << ore::NV("Function", &F) << " " 2063 << "Reduced the number of branches in hot paths by " 2064 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) 2065 << " (static) and " 2066 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) 2067 << " (weighted by PGO count)"; 2068 }); 2069 } 2070 2071 return Changed; 2072 } 2073 2074 namespace llvm { 2075 2076 ControlHeightReductionPass::ControlHeightReductionPass() { 2077 parseCHRFilterFiles(); 2078 } 2079 2080 PreservedAnalyses ControlHeightReductionPass::run( 2081 Function &F, 2082 FunctionAnalysisManager &FAM) { 2083 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 2084 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2085 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2086 auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2087 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2088 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2089 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); 2090 if (!Changed) 2091 return PreservedAnalyses::all(); 2092 return PreservedAnalyses::none(); 2093 } 2094 2095 } // namespace llvm 2096