1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass merges conditional blocks of code and reduces the number of 10 // conditional branches in the hot paths based on profiles. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/GlobalsModRef.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/IntrinsicInst.h" 30 #include "llvm/IR/MDBuilder.h" 31 #include "llvm/IR/PassManager.h" 32 #include "llvm/InitializePasses.h" 33 #include "llvm/Support/BranchProbability.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/MemoryBuffer.h" 36 #include "llvm/Transforms/Utils.h" 37 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 38 #include "llvm/Transforms/Utils/Cloning.h" 39 #include "llvm/Transforms/Utils/ValueMapper.h" 40 41 #include <set> 42 #include <sstream> 43 44 using namespace llvm; 45 46 #define DEBUG_TYPE "chr" 47 48 #define CHR_DEBUG(X) LLVM_DEBUG(X) 49 50 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 51 cl::desc("Apply CHR for all functions")); 52 53 static cl::opt<double> CHRBiasThreshold( 54 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 55 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 56 57 static cl::opt<unsigned> CHRMergeThreshold( 58 "chr-merge-threshold", cl::init(2), cl::Hidden, 59 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 60 61 static cl::opt<std::string> CHRModuleList( 62 "chr-module-list", cl::init(""), cl::Hidden, 63 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 64 65 static cl::opt<std::string> CHRFunctionList( 66 "chr-function-list", cl::init(""), cl::Hidden, 67 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 68 69 static StringSet<> CHRModules; 70 static StringSet<> CHRFunctions; 71 72 static void parseCHRFilterFiles() { 73 if (!CHRModuleList.empty()) { 74 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 75 if (!FileOrErr) { 76 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 77 std::exit(1); 78 } 79 StringRef Buf = FileOrErr->get()->getBuffer(); 80 SmallVector<StringRef, 0> Lines; 81 Buf.split(Lines, '\n'); 82 for (StringRef Line : Lines) { 83 Line = Line.trim(); 84 if (!Line.empty()) 85 CHRModules.insert(Line); 86 } 87 } 88 if (!CHRFunctionList.empty()) { 89 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 90 if (!FileOrErr) { 91 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 92 std::exit(1); 93 } 94 StringRef Buf = FileOrErr->get()->getBuffer(); 95 SmallVector<StringRef, 0> Lines; 96 Buf.split(Lines, '\n'); 97 for (StringRef Line : Lines) { 98 Line = Line.trim(); 99 if (!Line.empty()) 100 CHRFunctions.insert(Line); 101 } 102 } 103 } 104 105 namespace { 106 class ControlHeightReductionLegacyPass : public FunctionPass { 107 public: 108 static char ID; 109 110 ControlHeightReductionLegacyPass() : FunctionPass(ID) { 111 initializeControlHeightReductionLegacyPassPass( 112 *PassRegistry::getPassRegistry()); 113 parseCHRFilterFiles(); 114 } 115 116 bool runOnFunction(Function &F) override; 117 void getAnalysisUsage(AnalysisUsage &AU) const override { 118 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 119 AU.addRequired<DominatorTreeWrapperPass>(); 120 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 121 AU.addRequired<RegionInfoPass>(); 122 AU.addPreserved<GlobalsAAWrapperPass>(); 123 } 124 }; 125 } // end anonymous namespace 126 127 char ControlHeightReductionLegacyPass::ID = 0; 128 129 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 130 "chr", 131 "Reduce control height in the hot paths", 132 false, false) 133 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 134 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 135 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 136 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 137 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 138 "chr", 139 "Reduce control height in the hot paths", 140 false, false) 141 142 FunctionPass *llvm::createControlHeightReductionLegacyPass() { 143 return new ControlHeightReductionLegacyPass(); 144 } 145 146 namespace { 147 148 struct CHRStats { 149 CHRStats() = default; 150 void print(raw_ostream &OS) const { 151 OS << "CHRStats: NumBranches " << NumBranches 152 << " NumBranchesDelta " << NumBranchesDelta 153 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 154 } 155 // The original number of conditional branches / selects 156 uint64_t NumBranches = 0; 157 // The decrease of the number of conditional branches / selects in the hot 158 // paths due to CHR. 159 uint64_t NumBranchesDelta = 0; 160 // NumBranchesDelta weighted by the profile count at the scope entry. 161 uint64_t WeightedNumBranchesDelta = 0; 162 }; 163 164 // RegInfo - some properties of a Region. 165 struct RegInfo { 166 RegInfo() = default; 167 RegInfo(Region *RegionIn) : R(RegionIn) {} 168 Region *R = nullptr; 169 bool HasBranch = false; 170 SmallVector<SelectInst *, 8> Selects; 171 }; 172 173 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 174 175 // CHRScope - a sequence of regions to CHR together. It corresponds to a 176 // sequence of conditional blocks. It can have subscopes which correspond to 177 // nested conditional blocks. Nested CHRScopes form a tree. 178 class CHRScope { 179 public: 180 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 181 assert(RI.R && "Null RegionIn"); 182 RegInfos.push_back(RI); 183 } 184 185 Region *getParentRegion() { 186 assert(RegInfos.size() > 0 && "Empty CHRScope"); 187 Region *Parent = RegInfos[0].R->getParent(); 188 assert(Parent && "Unexpected to call this on the top-level region"); 189 return Parent; 190 } 191 192 BasicBlock *getEntryBlock() { 193 assert(RegInfos.size() > 0 && "Empty CHRScope"); 194 return RegInfos.front().R->getEntry(); 195 } 196 197 BasicBlock *getExitBlock() { 198 assert(RegInfos.size() > 0 && "Empty CHRScope"); 199 return RegInfos.back().R->getExit(); 200 } 201 202 bool appendable(CHRScope *Next) { 203 // The next scope is appendable only if this scope is directly connected to 204 // it (which implies it post-dominates this scope) and this scope dominates 205 // it (no edge to the next scope outside this scope). 206 BasicBlock *NextEntry = Next->getEntryBlock(); 207 if (getExitBlock() != NextEntry) 208 // Not directly connected. 209 return false; 210 Region *LastRegion = RegInfos.back().R; 211 for (BasicBlock *Pred : predecessors(NextEntry)) 212 if (!LastRegion->contains(Pred)) 213 // There's an edge going into the entry of the next scope from outside 214 // of this scope. 215 return false; 216 return true; 217 } 218 219 void append(CHRScope *Next) { 220 assert(RegInfos.size() > 0 && "Empty CHRScope"); 221 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 222 assert(getParentRegion() == Next->getParentRegion() && 223 "Must be siblings"); 224 assert(getExitBlock() == Next->getEntryBlock() && 225 "Must be adjacent"); 226 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end()); 227 Subs.append(Next->Subs.begin(), Next->Subs.end()); 228 } 229 230 void addSub(CHRScope *SubIn) { 231 #ifndef NDEBUG 232 bool IsChild = false; 233 for (RegInfo &RI : RegInfos) 234 if (RI.R == SubIn->getParentRegion()) { 235 IsChild = true; 236 break; 237 } 238 assert(IsChild && "Must be a child"); 239 #endif 240 Subs.push_back(SubIn); 241 } 242 243 // Split this scope at the boundary region into two, which will belong to the 244 // tail and returns the tail. 245 CHRScope *split(Region *Boundary) { 246 assert(Boundary && "Boundary null"); 247 assert(RegInfos.begin()->R != Boundary && 248 "Can't be split at beginning"); 249 auto BoundaryIt = llvm::find_if( 250 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; }); 251 if (BoundaryIt == RegInfos.end()) 252 return nullptr; 253 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end()); 254 DenseSet<Region *> TailRegionSet; 255 for (const RegInfo &RI : TailRegInfos) 256 TailRegionSet.insert(RI.R); 257 258 auto TailIt = 259 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) { 260 assert(Sub && "null Sub"); 261 Region *Parent = Sub->getParentRegion(); 262 if (TailRegionSet.count(Parent)) 263 return false; 264 265 assert(llvm::any_of( 266 RegInfos, 267 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) && 268 "Must be in head"); 269 return true; 270 }); 271 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end()); 272 273 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 274 auto *Scope = new CHRScope(TailRegInfos, TailSubs); 275 RegInfos.erase(BoundaryIt, RegInfos.end()); 276 Subs.erase(TailIt, Subs.end()); 277 return Scope; 278 } 279 280 bool contains(Instruction *I) const { 281 BasicBlock *Parent = I->getParent(); 282 for (const RegInfo &RI : RegInfos) 283 if (RI.R->contains(Parent)) 284 return true; 285 return false; 286 } 287 288 void print(raw_ostream &OS) const; 289 290 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 291 SmallVector<CHRScope *, 8> Subs; // Subscopes. 292 293 // The instruction at which to insert the CHR conditional branch (and hoist 294 // the dependent condition values). 295 Instruction *BranchInsertPoint; 296 297 // True-biased and false-biased regions (conditional blocks), 298 // respectively. Used only for the outermost scope and includes regions in 299 // subscopes. The rest are unbiased. 300 DenseSet<Region *> TrueBiasedRegions; 301 DenseSet<Region *> FalseBiasedRegions; 302 // Among the biased regions, the regions that get CHRed. 303 SmallVector<RegInfo, 8> CHRRegions; 304 305 // True-biased and false-biased selects, respectively. Used only for the 306 // outermost scope and includes ones in subscopes. 307 DenseSet<SelectInst *> TrueBiasedSelects; 308 DenseSet<SelectInst *> FalseBiasedSelects; 309 310 // Map from one of the above regions to the instructions to stop 311 // hoisting instructions at through use-def chains. 312 HoistStopMapTy HoistStopMap; 313 314 private: 315 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn) 316 : RegInfos(RegInfosIn.begin(), RegInfosIn.end()), 317 Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {} 318 }; 319 320 class CHR { 321 public: 322 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 323 ProfileSummaryInfo &PSIin, RegionInfo &RIin, 324 OptimizationRemarkEmitter &OREin) 325 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} 326 327 ~CHR() { 328 for (CHRScope *Scope : Scopes) { 329 delete Scope; 330 } 331 } 332 333 bool run(); 334 335 private: 336 // See the comments in CHR::run() for the high level flow of the algorithm and 337 // what the following functions do. 338 339 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 340 Region *R = RI.getTopLevelRegion(); 341 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) { 342 Output.push_back(Scope); 343 } 344 } 345 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 346 SmallVectorImpl<CHRScope *> &Scopes); 347 CHRScope *findScope(Region *R); 348 void checkScopeHoistable(CHRScope *Scope); 349 350 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 351 SmallVectorImpl<CHRScope *> &Output); 352 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 353 CHRScope *Outer, 354 DenseSet<Value *> *OuterConditionValues, 355 Instruction *OuterInsertPoint, 356 SmallVectorImpl<CHRScope *> &Output, 357 DenseSet<Instruction *> &Unhoistables); 358 359 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 360 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 361 362 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 363 SmallVectorImpl<CHRScope *> &Output); 364 365 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 366 SmallVectorImpl<CHRScope *> &Output); 367 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 368 369 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 370 SmallVectorImpl<CHRScope *> &Output); 371 372 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 373 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 374 void cloneScopeBlocks(CHRScope *Scope, 375 BasicBlock *PreEntryBlock, 376 BasicBlock *ExitBlock, 377 Region *LastRegion, 378 ValueToValueMapTy &VMap); 379 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 380 BasicBlock *EntryBlock, 381 BasicBlock *NewEntryBlock, 382 ValueToValueMapTy &VMap); 383 void fixupBranchesAndSelects(CHRScope *Scope, 384 BasicBlock *PreEntryBlock, 385 BranchInst *MergedBR, 386 uint64_t ProfileCount); 387 void fixupBranch(Region *R, 388 CHRScope *Scope, 389 IRBuilder<> &IRB, 390 Value *&MergedCondition, BranchProbability &CHRBranchBias); 391 void fixupSelect(SelectInst* SI, 392 CHRScope *Scope, 393 IRBuilder<> &IRB, 394 Value *&MergedCondition, BranchProbability &CHRBranchBias); 395 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 396 Instruction *BranchOrSelect, 397 CHRScope *Scope, 398 IRBuilder<> &IRB, 399 Value *&MergedCondition); 400 401 Function &F; 402 BlockFrequencyInfo &BFI; 403 DominatorTree &DT; 404 ProfileSummaryInfo &PSI; 405 RegionInfo &RI; 406 OptimizationRemarkEmitter &ORE; 407 CHRStats Stats; 408 409 // All the true-biased regions in the function 410 DenseSet<Region *> TrueBiasedRegionsGlobal; 411 // All the false-biased regions in the function 412 DenseSet<Region *> FalseBiasedRegionsGlobal; 413 // All the true-biased selects in the function 414 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 415 // All the false-biased selects in the function 416 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 417 // A map from biased regions to their branch bias 418 DenseMap<Region *, BranchProbability> BranchBiasMap; 419 // A map from biased selects to their branch bias 420 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 421 // All the scopes. 422 DenseSet<CHRScope *> Scopes; 423 }; 424 425 } // end anonymous namespace 426 427 static inline 428 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 429 const CHRStats &Stats) { 430 Stats.print(OS); 431 return OS; 432 } 433 434 static inline 435 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 436 Scope.print(OS); 437 return OS; 438 } 439 440 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 441 if (ForceCHR) 442 return true; 443 444 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 445 if (CHRModules.count(F.getParent()->getName())) 446 return true; 447 return CHRFunctions.count(F.getName()); 448 } 449 450 assert(PSI.hasProfileSummary() && "Empty PSI?"); 451 return PSI.isFunctionEntryHot(&F); 452 } 453 454 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 455 CHRStats *Stats) { 456 StringRef FuncName = F.getName(); 457 StringRef ModuleName = F.getParent()->getName(); 458 (void)(FuncName); // Unused in release build. 459 (void)(ModuleName); // Unused in release build. 460 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 461 << FuncName); 462 if (Stats) 463 CHR_DEBUG(dbgs() << " " << *Stats); 464 CHR_DEBUG(dbgs() << "\n"); 465 CHR_DEBUG(F.dump()); 466 } 467 468 void CHRScope::print(raw_ostream &OS) const { 469 assert(RegInfos.size() > 0 && "Empty CHRScope"); 470 OS << "CHRScope["; 471 OS << RegInfos.size() << ", Regions["; 472 for (const RegInfo &RI : RegInfos) { 473 OS << RI.R->getNameStr(); 474 if (RI.HasBranch) 475 OS << " B"; 476 if (RI.Selects.size() > 0) 477 OS << " S" << RI.Selects.size(); 478 OS << ", "; 479 } 480 if (RegInfos[0].R->getParent()) { 481 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 482 } else { 483 // top level region 484 OS << "]"; 485 } 486 OS << ", Subs["; 487 for (CHRScope *Sub : Subs) { 488 OS << *Sub << ", "; 489 } 490 OS << "]]"; 491 } 492 493 // Return true if the given instruction type can be hoisted by CHR. 494 static bool isHoistableInstructionType(Instruction *I) { 495 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 496 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 497 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 498 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 499 isa<InsertValueInst>(I); 500 } 501 502 // Return true if the given instruction can be hoisted by CHR. 503 static bool isHoistable(Instruction *I, DominatorTree &DT) { 504 if (!isHoistableInstructionType(I)) 505 return false; 506 return isSafeToSpeculativelyExecute(I, nullptr, &DT); 507 } 508 509 // Recursively traverse the use-def chains of the given value and return a set 510 // of the unhoistable base values defined within the scope (excluding the 511 // first-region entry block) or the (hoistable or unhoistable) base values that 512 // are defined outside (including the first-region entry block) of the 513 // scope. The returned set doesn't include constants. 514 static const std::set<Value *> & 515 getBaseValues(Value *V, DominatorTree &DT, 516 DenseMap<Value *, std::set<Value *>> &Visited) { 517 auto It = Visited.find(V); 518 if (It != Visited.end()) { 519 return It->second; 520 } 521 std::set<Value *> Result; 522 if (auto *I = dyn_cast<Instruction>(V)) { 523 // We don't stop at a block that's not in the Scope because we would miss 524 // some instructions that are based on the same base values if we stop 525 // there. 526 if (!isHoistable(I, DT)) { 527 Result.insert(I); 528 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 529 } 530 // I is hoistable above the Scope. 531 for (Value *Op : I->operands()) { 532 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited); 533 Result.insert(OpResult.begin(), OpResult.end()); 534 } 535 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 536 } 537 if (isa<Argument>(V)) { 538 Result.insert(V); 539 } 540 // We don't include others like constants because those won't lead to any 541 // chance of folding of conditions (eg two bit checks merged into one check) 542 // after CHR. 543 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 544 } 545 546 // Return true if V is already hoisted or can be hoisted (along with its 547 // operands) above the insert point. When it returns true and HoistStops is 548 // non-null, the instructions to stop hoisting at through the use-def chains are 549 // inserted into HoistStops. 550 static bool 551 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 552 DenseSet<Instruction *> &Unhoistables, 553 DenseSet<Instruction *> *HoistStops, 554 DenseMap<Instruction *, bool> &Visited) { 555 assert(InsertPoint && "Null InsertPoint"); 556 if (auto *I = dyn_cast<Instruction>(V)) { 557 auto It = Visited.find(I); 558 if (It != Visited.end()) { 559 return It->second; 560 } 561 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 562 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 563 if (Unhoistables.count(I)) { 564 // Don't hoist if they are not to be hoisted. 565 Visited[I] = false; 566 return false; 567 } 568 if (DT.dominates(I, InsertPoint)) { 569 // We are already above the insert point. Stop here. 570 if (HoistStops) 571 HoistStops->insert(I); 572 Visited[I] = true; 573 return true; 574 } 575 // We aren't not above the insert point, check if we can hoist it above the 576 // insert point. 577 if (isHoistable(I, DT)) { 578 // Check operands first. 579 DenseSet<Instruction *> OpsHoistStops; 580 bool AllOpsHoisted = true; 581 for (Value *Op : I->operands()) { 582 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, 583 Visited)) { 584 AllOpsHoisted = false; 585 break; 586 } 587 } 588 if (AllOpsHoisted) { 589 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 590 if (HoistStops) 591 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 592 Visited[I] = true; 593 return true; 594 } 595 } 596 Visited[I] = false; 597 return false; 598 } 599 // Non-instructions are considered hoistable. 600 return true; 601 } 602 603 // Returns true and sets the true probability and false probability of an 604 // MD_prof metadata if it's well-formed. 605 static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb, 606 BranchProbability &FalseProb) { 607 if (!MD) return false; 608 MDString *MDName = cast<MDString>(MD->getOperand(0)); 609 if (MDName->getString() != "branch_weights" || 610 MD->getNumOperands() != 3) 611 return false; 612 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 613 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 614 if (!TrueWeight || !FalseWeight) 615 return false; 616 uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 617 uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 618 uint64_t SumWt = TrueWt + FalseWt; 619 620 assert(SumWt >= TrueWt && SumWt >= FalseWt && 621 "Overflow calculating branch probabilities."); 622 623 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash. 624 if (SumWt == 0) 625 return false; 626 627 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 628 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 629 return true; 630 } 631 632 static BranchProbability getCHRBiasThreshold() { 633 return BranchProbability::getBranchProbability( 634 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 635 } 636 637 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 638 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 639 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 640 // false. 641 template <typename K, typename S, typename M> 642 static bool checkBias(K *Key, BranchProbability TrueProb, 643 BranchProbability FalseProb, S &TrueSet, S &FalseSet, 644 M &BiasMap) { 645 BranchProbability Threshold = getCHRBiasThreshold(); 646 if (TrueProb >= Threshold) { 647 TrueSet.insert(Key); 648 BiasMap[Key] = TrueProb; 649 return true; 650 } else if (FalseProb >= Threshold) { 651 FalseSet.insert(Key); 652 BiasMap[Key] = FalseProb; 653 return true; 654 } 655 return false; 656 } 657 658 // Returns true and insert a region into the right biased set and the map if the 659 // branch of the region is biased. 660 static bool checkBiasedBranch(BranchInst *BI, Region *R, 661 DenseSet<Region *> &TrueBiasedRegionsGlobal, 662 DenseSet<Region *> &FalseBiasedRegionsGlobal, 663 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 664 if (!BI->isConditional()) 665 return false; 666 BranchProbability ThenProb, ElseProb; 667 if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof), 668 ThenProb, ElseProb)) 669 return false; 670 BasicBlock *IfThen = BI->getSuccessor(0); 671 BasicBlock *IfElse = BI->getSuccessor(1); 672 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 673 IfThen != IfElse && 674 "Invariant from findScopes"); 675 if (IfThen == R->getExit()) { 676 // Swap them so that IfThen/ThenProb means going into the conditional code 677 // and IfElse/ElseProb means skipping it. 678 std::swap(IfThen, IfElse); 679 std::swap(ThenProb, ElseProb); 680 } 681 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 682 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 683 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 684 return checkBias(R, ThenProb, ElseProb, 685 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 686 BranchBiasMap); 687 } 688 689 // Returns true and insert a select into the right biased set and the map if the 690 // select is biased. 691 static bool checkBiasedSelect( 692 SelectInst *SI, Region *R, 693 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 694 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 695 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 696 BranchProbability TrueProb, FalseProb; 697 if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof), 698 TrueProb, FalseProb)) 699 return false; 700 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 701 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 702 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 703 return checkBias(SI, TrueProb, FalseProb, 704 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 705 SelectBiasMap); 706 } 707 708 // Returns the instruction at which to hoist the dependent condition values and 709 // insert the CHR branch for a region. This is the terminator branch in the 710 // entry block or the first select in the entry block, if any. 711 static Instruction* getBranchInsertPoint(RegInfo &RI) { 712 Region *R = RI.R; 713 BasicBlock *EntryBB = R->getEntry(); 714 // The hoist point is by default the terminator of the entry block, which is 715 // the same as the branch instruction if RI.HasBranch is true. 716 Instruction *HoistPoint = EntryBB->getTerminator(); 717 for (SelectInst *SI : RI.Selects) { 718 if (SI->getParent() == EntryBB) { 719 // Pick the first select in Selects in the entry block. Note Selects is 720 // sorted in the instruction order within a block (asserted below). 721 HoistPoint = SI; 722 break; 723 } 724 } 725 assert(HoistPoint && "Null HoistPoint"); 726 #ifndef NDEBUG 727 // Check that HoistPoint is the first one in Selects in the entry block, 728 // if any. 729 DenseSet<Instruction *> EntryBlockSelectSet; 730 for (SelectInst *SI : RI.Selects) { 731 if (SI->getParent() == EntryBB) { 732 EntryBlockSelectSet.insert(SI); 733 } 734 } 735 for (Instruction &I : *EntryBB) { 736 if (EntryBlockSelectSet.contains(&I)) { 737 assert(&I == HoistPoint && 738 "HoistPoint must be the first one in Selects"); 739 break; 740 } 741 } 742 #endif 743 return HoistPoint; 744 } 745 746 // Find a CHR scope in the given region. 747 CHRScope * CHR::findScope(Region *R) { 748 CHRScope *Result = nullptr; 749 BasicBlock *Entry = R->getEntry(); 750 BasicBlock *Exit = R->getExit(); // null if top level. 751 assert(Entry && "Entry must not be null"); 752 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 753 "Only top level region has a null exit"); 754 if (Entry) 755 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 756 else 757 CHR_DEBUG(dbgs() << "Entry null\n"); 758 if (Exit) 759 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 760 else 761 CHR_DEBUG(dbgs() << "Exit null\n"); 762 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 763 // to this region). 764 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 765 if (EntryInSubregion) 766 return nullptr; 767 // Exclude loops 768 for (BasicBlock *Pred : predecessors(Entry)) 769 if (R->contains(Pred)) 770 return nullptr; 771 // If any of the basic blocks have address taken, we must skip this region 772 // because we cannot clone basic blocks that have address taken. 773 for (BasicBlock *BB : R->blocks()) { 774 if (BB->hasAddressTaken()) 775 return nullptr; 776 // If we encounter llvm.coro.id, skip this region because if the basic block 777 // is cloned, we end up inserting a token type PHI node to the block with 778 // llvm.coro.begin. 779 // FIXME: This could lead to less optimal codegen, because the region is 780 // excluded, it can prevent CHR from merging adjacent regions into bigger 781 // scope and hoisting more branches. 782 for (Instruction &I : *BB) 783 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 784 if (II->getIntrinsicID() == Intrinsic::coro_id) 785 return nullptr; 786 } 787 788 if (Exit) { 789 // Try to find an if-then block (check if R is an if-then). 790 // if (cond) { 791 // ... 792 // } 793 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 794 if (BI) 795 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 796 else 797 CHR_DEBUG(dbgs() << "BI null\n"); 798 if (BI && BI->isConditional()) { 799 BasicBlock *S0 = BI->getSuccessor(0); 800 BasicBlock *S1 = BI->getSuccessor(1); 801 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 802 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 803 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 804 RegInfo RI(R); 805 RI.HasBranch = checkBiasedBranch( 806 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 807 BranchBiasMap); 808 Result = new CHRScope(RI); 809 Scopes.insert(Result); 810 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 811 ++Stats.NumBranches; 812 if (!RI.HasBranch) { 813 ORE.emit([&]() { 814 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) 815 << "Branch not biased"; 816 }); 817 } 818 } 819 } 820 } 821 { 822 // Try to look for selects in the direct child blocks (as opposed to in 823 // subregions) of R. 824 // ... 825 // if (..) { // Some subregion 826 // ... 827 // } 828 // if (..) { // Some subregion 829 // ... 830 // } 831 // ... 832 // a = cond ? b : c; 833 // ... 834 SmallVector<SelectInst *, 8> Selects; 835 for (RegionNode *E : R->elements()) { 836 if (E->isSubRegion()) 837 continue; 838 // This returns the basic block of E if E is a direct child of R (not a 839 // subregion.) 840 BasicBlock *BB = E->getEntry(); 841 // Need to push in the order to make it easier to find the first Select 842 // later. 843 for (Instruction &I : *BB) { 844 if (auto *SI = dyn_cast<SelectInst>(&I)) { 845 Selects.push_back(SI); 846 ++Stats.NumBranches; 847 } 848 } 849 } 850 if (Selects.size() > 0) { 851 auto AddSelects = [&](RegInfo &RI) { 852 for (auto *SI : Selects) 853 if (checkBiasedSelect(SI, RI.R, 854 TrueBiasedSelectsGlobal, 855 FalseBiasedSelectsGlobal, 856 SelectBiasMap)) 857 RI.Selects.push_back(SI); 858 else 859 ORE.emit([&]() { 860 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) 861 << "Select not biased"; 862 }); 863 }; 864 if (!Result) { 865 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 866 RegInfo RI(R); 867 AddSelects(RI); 868 Result = new CHRScope(RI); 869 Scopes.insert(Result); 870 } else { 871 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 872 AddSelects(Result->RegInfos[0]); 873 } 874 } 875 } 876 877 if (Result) { 878 checkScopeHoistable(Result); 879 } 880 return Result; 881 } 882 883 // Check that any of the branch and the selects in the region could be 884 // hoisted above the the CHR branch insert point (the most dominating of 885 // them, either the branch (at the end of the first block) or the first 886 // select in the first block). If the branch can't be hoisted, drop the 887 // selects in the first blocks. 888 // 889 // For example, for the following scope/region with selects, we want to insert 890 // the merged branch right before the first select in the first/entry block by 891 // hoisting c1, c2, c3, and c4. 892 // 893 // // Branch insert point here. 894 // a = c1 ? b : c; // Select 1 895 // d = c2 ? e : f; // Select 2 896 // if (c3) { // Branch 897 // ... 898 // c4 = foo() // A call. 899 // g = c4 ? h : i; // Select 3 900 // } 901 // 902 // But suppose we can't hoist c4 because it's dependent on the preceding 903 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 904 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 905 void CHR::checkScopeHoistable(CHRScope *Scope) { 906 RegInfo &RI = Scope->RegInfos[0]; 907 Region *R = RI.R; 908 BasicBlock *EntryBB = R->getEntry(); 909 auto *Branch = RI.HasBranch ? 910 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 911 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 912 if (RI.HasBranch || !Selects.empty()) { 913 Instruction *InsertPoint = getBranchInsertPoint(RI); 914 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 915 // Avoid a data dependence from a select or a branch to a(nother) 916 // select. Note no instruction can't data-depend on a branch (a branch 917 // instruction doesn't produce a value). 918 DenseSet<Instruction *> Unhoistables; 919 // Initialize Unhoistables with the selects. 920 for (SelectInst *SI : Selects) { 921 Unhoistables.insert(SI); 922 } 923 // Remove Selects that can't be hoisted. 924 for (auto it = Selects.begin(); it != Selects.end(); ) { 925 SelectInst *SI = *it; 926 if (SI == InsertPoint) { 927 ++it; 928 continue; 929 } 930 DenseMap<Instruction *, bool> Visited; 931 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 932 DT, Unhoistables, nullptr, Visited); 933 if (!IsHoistable) { 934 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 935 ORE.emit([&]() { 936 return OptimizationRemarkMissed(DEBUG_TYPE, 937 "DropUnhoistableSelect", SI) 938 << "Dropped unhoistable select"; 939 }); 940 it = Selects.erase(it); 941 // Since we are dropping the select here, we also drop it from 942 // Unhoistables. 943 Unhoistables.erase(SI); 944 } else 945 ++it; 946 } 947 // Update InsertPoint after potentially removing selects. 948 InsertPoint = getBranchInsertPoint(RI); 949 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 950 if (RI.HasBranch && InsertPoint != Branch) { 951 DenseMap<Instruction *, bool> Visited; 952 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 953 DT, Unhoistables, nullptr, Visited); 954 if (!IsHoistable) { 955 // If the branch isn't hoistable, drop the selects in the entry 956 // block, preferring the branch, which makes the branch the hoist 957 // point. 958 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 959 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 960 CHR_DEBUG( 961 for (SelectInst *SI : Selects) { 962 dbgs() << "SI " << *SI << "\n"; 963 }); 964 for (SelectInst *SI : Selects) { 965 ORE.emit([&]() { 966 return OptimizationRemarkMissed(DEBUG_TYPE, 967 "DropSelectUnhoistableBranch", SI) 968 << "Dropped select due to unhoistable branch"; 969 }); 970 } 971 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) { 972 return SI->getParent() == EntryBB; 973 }); 974 Unhoistables.clear(); 975 InsertPoint = Branch; 976 } 977 } 978 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 979 #ifndef NDEBUG 980 if (RI.HasBranch) { 981 assert(!DT.dominates(Branch, InsertPoint) && 982 "Branch can't be already above the hoist point"); 983 DenseMap<Instruction *, bool> Visited; 984 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 985 DT, Unhoistables, nullptr, Visited) && 986 "checkHoistValue for branch"); 987 } 988 for (auto *SI : Selects) { 989 assert(!DT.dominates(SI, InsertPoint) && 990 "SI can't be already above the hoist point"); 991 DenseMap<Instruction *, bool> Visited; 992 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 993 Unhoistables, nullptr, Visited) && 994 "checkHoistValue for selects"); 995 } 996 CHR_DEBUG(dbgs() << "Result\n"); 997 if (RI.HasBranch) { 998 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 999 } 1000 for (auto *SI : Selects) { 1001 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 1002 } 1003 #endif 1004 } 1005 } 1006 1007 // Traverse the region tree, find all nested scopes and merge them if possible. 1008 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 1009 SmallVectorImpl<CHRScope *> &Scopes) { 1010 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 1011 CHRScope *Result = findScope(R); 1012 // Visit subscopes. 1013 CHRScope *ConsecutiveSubscope = nullptr; 1014 SmallVector<CHRScope *, 8> Subscopes; 1015 for (auto It = R->begin(); It != R->end(); ++It) { 1016 const std::unique_ptr<Region> &SubR = *It; 1017 auto NextIt = std::next(It); 1018 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 1019 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 1020 << "\n"); 1021 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 1022 if (SubCHRScope) { 1023 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 1024 } else { 1025 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 1026 } 1027 if (SubCHRScope) { 1028 if (!ConsecutiveSubscope) 1029 ConsecutiveSubscope = SubCHRScope; 1030 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 1031 Subscopes.push_back(ConsecutiveSubscope); 1032 ConsecutiveSubscope = SubCHRScope; 1033 } else 1034 ConsecutiveSubscope->append(SubCHRScope); 1035 } else { 1036 if (ConsecutiveSubscope) { 1037 Subscopes.push_back(ConsecutiveSubscope); 1038 } 1039 ConsecutiveSubscope = nullptr; 1040 } 1041 } 1042 if (ConsecutiveSubscope) { 1043 Subscopes.push_back(ConsecutiveSubscope); 1044 } 1045 for (CHRScope *Sub : Subscopes) { 1046 if (Result) { 1047 // Combine it with the parent. 1048 Result->addSub(Sub); 1049 } else { 1050 // Push Subscopes as they won't be combined with the parent. 1051 Scopes.push_back(Sub); 1052 } 1053 } 1054 return Result; 1055 } 1056 1057 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1058 DenseSet<Value *> ConditionValues; 1059 if (RI.HasBranch) { 1060 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1061 ConditionValues.insert(BI->getCondition()); 1062 } 1063 for (SelectInst *SI : RI.Selects) { 1064 ConditionValues.insert(SI->getCondition()); 1065 } 1066 return ConditionValues; 1067 } 1068 1069 1070 // Determine whether to split a scope depending on the sets of the branch 1071 // condition values of the previous region and the current region. We split 1072 // (return true) it if 1) the condition values of the inner/lower scope can't be 1073 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1074 // values have an empty intersection (because the combined branch conditions 1075 // won't probably lead to a simpler combined condition). 1076 static bool shouldSplit(Instruction *InsertPoint, 1077 DenseSet<Value *> &PrevConditionValues, 1078 DenseSet<Value *> &ConditionValues, 1079 DominatorTree &DT, 1080 DenseSet<Instruction *> &Unhoistables) { 1081 assert(InsertPoint && "Null InsertPoint"); 1082 CHR_DEBUG( 1083 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1084 for (Value *V : PrevConditionValues) { 1085 dbgs() << *V << ", "; 1086 } 1087 dbgs() << " ConditionValues "; 1088 for (Value *V : ConditionValues) { 1089 dbgs() << *V << ", "; 1090 } 1091 dbgs() << "\n"); 1092 // If any of Bases isn't hoistable to the hoist point, split. 1093 for (Value *V : ConditionValues) { 1094 DenseMap<Instruction *, bool> Visited; 1095 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { 1096 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1097 return true; // Not hoistable, split. 1098 } 1099 } 1100 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1101 // unnecessary splits at scopes with no branch/selects. If 1102 // PrevConditionValues and ConditionValues don't intersect at all, split. 1103 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1104 // Use std::set as DenseSet doesn't work with set_intersection. 1105 std::set<Value *> PrevBases, Bases; 1106 DenseMap<Value *, std::set<Value *>> Visited; 1107 for (Value *V : PrevConditionValues) { 1108 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1109 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1110 } 1111 for (Value *V : ConditionValues) { 1112 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1113 Bases.insert(BaseValues.begin(), BaseValues.end()); 1114 } 1115 CHR_DEBUG( 1116 dbgs() << "PrevBases "; 1117 for (Value *V : PrevBases) { 1118 dbgs() << *V << ", "; 1119 } 1120 dbgs() << " Bases "; 1121 for (Value *V : Bases) { 1122 dbgs() << *V << ", "; 1123 } 1124 dbgs() << "\n"); 1125 std::vector<Value *> Intersection; 1126 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(), 1127 Bases.end(), std::back_inserter(Intersection)); 1128 if (Intersection.empty()) { 1129 // Empty intersection, split. 1130 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1131 return true; 1132 } 1133 } 1134 CHR_DEBUG(dbgs() << "No split\n"); 1135 return false; // Don't split. 1136 } 1137 1138 static void getSelectsInScope(CHRScope *Scope, 1139 DenseSet<Instruction *> &Output) { 1140 for (RegInfo &RI : Scope->RegInfos) 1141 for (SelectInst *SI : RI.Selects) 1142 Output.insert(SI); 1143 for (CHRScope *Sub : Scope->Subs) 1144 getSelectsInScope(Sub, Output); 1145 } 1146 1147 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1148 SmallVectorImpl<CHRScope *> &Output) { 1149 for (CHRScope *Scope : Input) { 1150 assert(!Scope->BranchInsertPoint && 1151 "BranchInsertPoint must not be set"); 1152 DenseSet<Instruction *> Unhoistables; 1153 getSelectsInScope(Scope, Unhoistables); 1154 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1155 } 1156 #ifndef NDEBUG 1157 for (CHRScope *Scope : Output) { 1158 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1159 } 1160 #endif 1161 } 1162 1163 SmallVector<CHRScope *, 8> CHR::splitScope( 1164 CHRScope *Scope, 1165 CHRScope *Outer, 1166 DenseSet<Value *> *OuterConditionValues, 1167 Instruction *OuterInsertPoint, 1168 SmallVectorImpl<CHRScope *> &Output, 1169 DenseSet<Instruction *> &Unhoistables) { 1170 if (Outer) { 1171 assert(OuterConditionValues && "Null OuterConditionValues"); 1172 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1173 } 1174 bool PrevSplitFromOuter = true; 1175 DenseSet<Value *> PrevConditionValues; 1176 Instruction *PrevInsertPoint = nullptr; 1177 SmallVector<CHRScope *, 8> Splits; 1178 SmallVector<bool, 8> SplitsSplitFromOuter; 1179 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1180 SmallVector<Instruction *, 8> SplitsInsertPoints; 1181 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1182 for (RegInfo &RI : RegInfos) { 1183 Instruction *InsertPoint = getBranchInsertPoint(RI); 1184 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1185 CHR_DEBUG( 1186 dbgs() << "ConditionValues "; 1187 for (Value *V : ConditionValues) { 1188 dbgs() << *V << ", "; 1189 } 1190 dbgs() << "\n"); 1191 if (RI.R == RegInfos[0].R) { 1192 // First iteration. Check to see if we should split from the outer. 1193 if (Outer) { 1194 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1195 CHR_DEBUG(dbgs() << "Should split from outer at " 1196 << RI.R->getNameStr() << "\n"); 1197 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1198 ConditionValues, DT, Unhoistables)) { 1199 PrevConditionValues = ConditionValues; 1200 PrevInsertPoint = InsertPoint; 1201 ORE.emit([&]() { 1202 return OptimizationRemarkMissed(DEBUG_TYPE, 1203 "SplitScopeFromOuter", 1204 RI.R->getEntry()->getTerminator()) 1205 << "Split scope from outer due to unhoistable branch/select " 1206 << "and/or lack of common condition values"; 1207 }); 1208 } else { 1209 // Not splitting from the outer. Use the outer bases and insert 1210 // point. Union the bases. 1211 PrevSplitFromOuter = false; 1212 PrevConditionValues = *OuterConditionValues; 1213 PrevConditionValues.insert(ConditionValues.begin(), 1214 ConditionValues.end()); 1215 PrevInsertPoint = OuterInsertPoint; 1216 } 1217 } else { 1218 CHR_DEBUG(dbgs() << "Outer null\n"); 1219 PrevConditionValues = ConditionValues; 1220 PrevInsertPoint = InsertPoint; 1221 } 1222 } else { 1223 CHR_DEBUG(dbgs() << "Should split from prev at " 1224 << RI.R->getNameStr() << "\n"); 1225 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1226 DT, Unhoistables)) { 1227 CHRScope *Tail = Scope->split(RI.R); 1228 Scopes.insert(Tail); 1229 Splits.push_back(Scope); 1230 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1231 SplitsConditionValues.push_back(PrevConditionValues); 1232 SplitsInsertPoints.push_back(PrevInsertPoint); 1233 Scope = Tail; 1234 PrevConditionValues = ConditionValues; 1235 PrevInsertPoint = InsertPoint; 1236 PrevSplitFromOuter = true; 1237 ORE.emit([&]() { 1238 return OptimizationRemarkMissed(DEBUG_TYPE, 1239 "SplitScopeFromPrev", 1240 RI.R->getEntry()->getTerminator()) 1241 << "Split scope from previous due to unhoistable branch/select " 1242 << "and/or lack of common condition values"; 1243 }); 1244 } else { 1245 // Not splitting. Union the bases. Keep the hoist point. 1246 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1247 } 1248 } 1249 } 1250 Splits.push_back(Scope); 1251 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1252 SplitsConditionValues.push_back(PrevConditionValues); 1253 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1254 SplitsInsertPoints.push_back(PrevInsertPoint); 1255 assert(Splits.size() == SplitsConditionValues.size() && 1256 Splits.size() == SplitsSplitFromOuter.size() && 1257 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1258 for (size_t I = 0; I < Splits.size(); ++I) { 1259 CHRScope *Split = Splits[I]; 1260 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1261 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1262 SmallVector<CHRScope *, 8> NewSubs; 1263 DenseSet<Instruction *> SplitUnhoistables; 1264 getSelectsInScope(Split, SplitUnhoistables); 1265 for (CHRScope *Sub : Split->Subs) { 1266 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1267 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1268 SplitUnhoistables); 1269 llvm::append_range(NewSubs, SubSplits); 1270 } 1271 Split->Subs = NewSubs; 1272 } 1273 SmallVector<CHRScope *, 8> Result; 1274 for (size_t I = 0; I < Splits.size(); ++I) { 1275 CHRScope *Split = Splits[I]; 1276 if (SplitsSplitFromOuter[I]) { 1277 // Split from the outer. 1278 Output.push_back(Split); 1279 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1280 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1281 << "\n"); 1282 } else { 1283 // Connected to the outer. 1284 Result.push_back(Split); 1285 } 1286 } 1287 if (!Outer) 1288 assert(Result.empty() && 1289 "If no outer (top-level), must return no nested ones"); 1290 return Result; 1291 } 1292 1293 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1294 for (CHRScope *Scope : Scopes) { 1295 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1296 classifyBiasedScopes(Scope, Scope); 1297 CHR_DEBUG( 1298 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1299 dbgs() << "TrueBiasedRegions "; 1300 for (Region *R : Scope->TrueBiasedRegions) { 1301 dbgs() << R->getNameStr() << ", "; 1302 } 1303 dbgs() << "\n"; 1304 dbgs() << "FalseBiasedRegions "; 1305 for (Region *R : Scope->FalseBiasedRegions) { 1306 dbgs() << R->getNameStr() << ", "; 1307 } 1308 dbgs() << "\n"; 1309 dbgs() << "TrueBiasedSelects "; 1310 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1311 dbgs() << *SI << ", "; 1312 } 1313 dbgs() << "\n"; 1314 dbgs() << "FalseBiasedSelects "; 1315 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1316 dbgs() << *SI << ", "; 1317 } 1318 dbgs() << "\n";); 1319 } 1320 } 1321 1322 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1323 for (RegInfo &RI : Scope->RegInfos) { 1324 if (RI.HasBranch) { 1325 Region *R = RI.R; 1326 if (TrueBiasedRegionsGlobal.contains(R)) 1327 OutermostScope->TrueBiasedRegions.insert(R); 1328 else if (FalseBiasedRegionsGlobal.contains(R)) 1329 OutermostScope->FalseBiasedRegions.insert(R); 1330 else 1331 llvm_unreachable("Must be biased"); 1332 } 1333 for (SelectInst *SI : RI.Selects) { 1334 if (TrueBiasedSelectsGlobal.contains(SI)) 1335 OutermostScope->TrueBiasedSelects.insert(SI); 1336 else if (FalseBiasedSelectsGlobal.contains(SI)) 1337 OutermostScope->FalseBiasedSelects.insert(SI); 1338 else 1339 llvm_unreachable("Must be biased"); 1340 } 1341 } 1342 for (CHRScope *Sub : Scope->Subs) { 1343 classifyBiasedScopes(Sub, OutermostScope); 1344 } 1345 } 1346 1347 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1348 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1349 Scope->FalseBiasedRegions.size() + 1350 Scope->TrueBiasedSelects.size() + 1351 Scope->FalseBiasedSelects.size(); 1352 return NumBiased >= CHRMergeThreshold; 1353 } 1354 1355 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1356 SmallVectorImpl<CHRScope *> &Output) { 1357 for (CHRScope *Scope : Input) { 1358 // Filter out the ones with only one region and no subs. 1359 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1360 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1361 << Scope->TrueBiasedRegions.size() 1362 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1363 << " true-selects " << Scope->TrueBiasedSelects.size() 1364 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1365 ORE.emit([&]() { 1366 return OptimizationRemarkMissed( 1367 DEBUG_TYPE, 1368 "DropScopeWithOneBranchOrSelect", 1369 Scope->RegInfos[0].R->getEntry()->getTerminator()) 1370 << "Drop scope with < " 1371 << ore::NV("CHRMergeThreshold", CHRMergeThreshold) 1372 << " biased branch(es) or select(s)"; 1373 }); 1374 continue; 1375 } 1376 Output.push_back(Scope); 1377 } 1378 } 1379 1380 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1381 SmallVectorImpl<CHRScope *> &Output) { 1382 for (CHRScope *Scope : Input) { 1383 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1384 "Empty"); 1385 setCHRRegions(Scope, Scope); 1386 Output.push_back(Scope); 1387 CHR_DEBUG( 1388 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1389 for (auto pair : Scope->HoistStopMap) { 1390 Region *R = pair.first; 1391 dbgs() << "Region " << R->getNameStr() << "\n"; 1392 for (Instruction *I : pair.second) { 1393 dbgs() << "HoistStop " << *I << "\n"; 1394 } 1395 } 1396 dbgs() << "CHRRegions" << "\n"; 1397 for (RegInfo &RI : Scope->CHRRegions) { 1398 dbgs() << RI.R->getNameStr() << "\n"; 1399 }); 1400 } 1401 } 1402 1403 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1404 DenseSet<Instruction *> Unhoistables; 1405 // Put the biased selects in Unhoistables because they should stay where they 1406 // are and constant-folded after CHR (in case one biased select or a branch 1407 // can depend on another biased select.) 1408 for (RegInfo &RI : Scope->RegInfos) { 1409 for (SelectInst *SI : RI.Selects) { 1410 Unhoistables.insert(SI); 1411 } 1412 } 1413 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1414 for (RegInfo &RI : Scope->RegInfos) { 1415 Region *R = RI.R; 1416 DenseSet<Instruction *> HoistStops; 1417 bool IsHoisted = false; 1418 if (RI.HasBranch) { 1419 assert((OutermostScope->TrueBiasedRegions.contains(R) || 1420 OutermostScope->FalseBiasedRegions.contains(R)) && 1421 "Must be truthy or falsy"); 1422 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1423 // Note checkHoistValue fills in HoistStops. 1424 DenseMap<Instruction *, bool> Visited; 1425 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1426 Unhoistables, &HoistStops, Visited); 1427 assert(IsHoistable && "Must be hoistable"); 1428 (void)(IsHoistable); // Unused in release build 1429 IsHoisted = true; 1430 } 1431 for (SelectInst *SI : RI.Selects) { 1432 assert((OutermostScope->TrueBiasedSelects.contains(SI) || 1433 OutermostScope->FalseBiasedSelects.contains(SI)) && 1434 "Must be true or false biased"); 1435 // Note checkHoistValue fills in HoistStops. 1436 DenseMap<Instruction *, bool> Visited; 1437 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1438 Unhoistables, &HoistStops, Visited); 1439 assert(IsHoistable && "Must be hoistable"); 1440 (void)(IsHoistable); // Unused in release build 1441 IsHoisted = true; 1442 } 1443 if (IsHoisted) { 1444 OutermostScope->CHRRegions.push_back(RI); 1445 OutermostScope->HoistStopMap[R] = HoistStops; 1446 } 1447 } 1448 for (CHRScope *Sub : Scope->Subs) 1449 setCHRRegions(Sub, OutermostScope); 1450 } 1451 1452 static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1453 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1454 } 1455 1456 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1457 SmallVectorImpl<CHRScope *> &Output) { 1458 Output.resize(Input.size()); 1459 llvm::copy(Input, Output.begin()); 1460 llvm::stable_sort(Output, CHRScopeSorter); 1461 } 1462 1463 // Return true if V is already hoisted or was hoisted (along with its operands) 1464 // to the insert point. 1465 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1466 HoistStopMapTy &HoistStopMap, 1467 DenseSet<Instruction *> &HoistedSet, 1468 DenseSet<PHINode *> &TrivialPHIs, 1469 DominatorTree &DT) { 1470 auto IT = HoistStopMap.find(R); 1471 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1472 DenseSet<Instruction *> &HoistStops = IT->second; 1473 if (auto *I = dyn_cast<Instruction>(V)) { 1474 if (I == HoistPoint) 1475 return; 1476 if (HoistStops.count(I)) 1477 return; 1478 if (auto *PN = dyn_cast<PHINode>(I)) 1479 if (TrivialPHIs.count(PN)) 1480 // The trivial phi inserted by the previous CHR scope could replace a 1481 // non-phi in HoistStops. Note that since this phi is at the exit of a 1482 // previous CHR scope, which dominates this scope, it's safe to stop 1483 // hoisting there. 1484 return; 1485 if (HoistedSet.count(I)) 1486 // Already hoisted, return. 1487 return; 1488 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1489 assert(DT.getNode(I->getParent()) && "DT must contain I's block"); 1490 assert(DT.getNode(HoistPoint->getParent()) && 1491 "DT must contain HoistPoint block"); 1492 if (DT.dominates(I, HoistPoint)) 1493 // We are already above the hoist point. Stop here. This may be necessary 1494 // when multiple scopes would independently hoist the same 1495 // instruction. Since an outer (dominating) scope would hoist it to its 1496 // entry before an inner (dominated) scope would to its entry, the inner 1497 // scope may see the instruction already hoisted, in which case it 1498 // potentially wrong for the inner scope to hoist it and could cause bad 1499 // IR (non-dominating def), but safe to skip hoisting it instead because 1500 // it's already in a block that dominates the inner scope. 1501 return; 1502 for (Value *Op : I->operands()) { 1503 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); 1504 } 1505 I->moveBefore(HoistPoint); 1506 HoistedSet.insert(I); 1507 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1508 } 1509 } 1510 1511 // Hoist the dependent condition values of the branches and the selects in the 1512 // scope to the insert point. 1513 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1514 DenseSet<PHINode *> &TrivialPHIs, 1515 DominatorTree &DT) { 1516 DenseSet<Instruction *> HoistedSet; 1517 for (const RegInfo &RI : Scope->CHRRegions) { 1518 Region *R = RI.R; 1519 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1520 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1521 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1522 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1523 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1524 HoistedSet, TrivialPHIs, DT); 1525 } 1526 for (SelectInst *SI : RI.Selects) { 1527 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1528 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1529 if (!(IsTrueBiased || IsFalseBiased)) 1530 continue; 1531 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1532 HoistedSet, TrivialPHIs, DT); 1533 } 1534 } 1535 } 1536 1537 // Negate the predicate if an ICmp if it's used only by branches or selects by 1538 // swapping the operands of the branches or the selects. Returns true if success. 1539 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1540 Instruction *ExcludedUser, 1541 CHRScope *Scope) { 1542 for (User *U : ICmp->users()) { 1543 if (U == ExcludedUser) 1544 continue; 1545 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1546 continue; 1547 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1548 continue; 1549 return false; 1550 } 1551 for (User *U : ICmp->users()) { 1552 if (U == ExcludedUser) 1553 continue; 1554 if (auto *BI = dyn_cast<BranchInst>(U)) { 1555 assert(BI->isConditional() && "Must be conditional"); 1556 BI->swapSuccessors(); 1557 // Don't need to swap this in terms of 1558 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1559 // mean whehter the branch is likely go into the if-then rather than 1560 // successor0/successor1 and because we can tell which edge is the then or 1561 // the else one by comparing the destination to the region exit block. 1562 continue; 1563 } 1564 if (auto *SI = dyn_cast<SelectInst>(U)) { 1565 // Swap operands 1566 SI->swapValues(); 1567 SI->swapProfMetadata(); 1568 if (Scope->TrueBiasedSelects.count(SI)) { 1569 assert(!Scope->FalseBiasedSelects.contains(SI) && 1570 "Must not be already in"); 1571 Scope->FalseBiasedSelects.insert(SI); 1572 } else if (Scope->FalseBiasedSelects.count(SI)) { 1573 assert(!Scope->TrueBiasedSelects.contains(SI) && 1574 "Must not be already in"); 1575 Scope->TrueBiasedSelects.insert(SI); 1576 } 1577 continue; 1578 } 1579 llvm_unreachable("Must be a branch or a select"); 1580 } 1581 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1582 return true; 1583 } 1584 1585 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1586 // for a value that's defined in the scope but used outside it (meaning it's 1587 // alive at the exit block). 1588 static void insertTrivialPHIs(CHRScope *Scope, 1589 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1590 DenseSet<PHINode *> &TrivialPHIs) { 1591 SmallSetVector<BasicBlock *, 8> BlocksInScope; 1592 for (RegInfo &RI : Scope->RegInfos) { 1593 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1594 // sub-Scopes. 1595 BlocksInScope.insert(BB); 1596 } 1597 } 1598 CHR_DEBUG({ 1599 dbgs() << "Inserting redundant phis\n"; 1600 for (BasicBlock *BB : BlocksInScope) 1601 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1602 }); 1603 for (BasicBlock *BB : BlocksInScope) { 1604 for (Instruction &I : *BB) { 1605 SmallVector<Instruction *, 8> Users; 1606 for (User *U : I.users()) { 1607 if (auto *UI = dyn_cast<Instruction>(U)) { 1608 if (!BlocksInScope.contains(UI->getParent()) && 1609 // Unless there's already a phi for I at the exit block. 1610 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1611 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1612 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1613 Users.push_back(UI); 1614 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1615 // There's a loop backedge from a block that's dominated by this 1616 // scope to the entry block. 1617 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1618 CHR_DEBUG(dbgs() 1619 << "Used at entry block (for a back edge) by a phi user " 1620 << *UI << "\n"); 1621 Users.push_back(UI); 1622 } 1623 } 1624 } 1625 if (Users.size() > 0) { 1626 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1627 // ExitBlock. Replace I with the new phi in UI unless UI is another 1628 // phi at ExitBlock. 1629 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "", 1630 &ExitBlock->front()); 1631 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1632 PN->addIncoming(&I, Pred); 1633 } 1634 TrivialPHIs.insert(PN); 1635 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1636 for (Instruction *UI : Users) { 1637 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1638 if (UI->getOperand(J) == &I) { 1639 UI->setOperand(J, PN); 1640 } 1641 } 1642 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1643 } 1644 } 1645 } 1646 } 1647 } 1648 1649 // Assert that all the CHR regions of the scope have a biased branch or select. 1650 static void LLVM_ATTRIBUTE_UNUSED 1651 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1652 #ifndef NDEBUG 1653 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1654 if (Scope->TrueBiasedRegions.count(RI.R) || 1655 Scope->FalseBiasedRegions.count(RI.R)) 1656 return true; 1657 for (SelectInst *SI : RI.Selects) 1658 if (Scope->TrueBiasedSelects.count(SI) || 1659 Scope->FalseBiasedSelects.count(SI)) 1660 return true; 1661 return false; 1662 }; 1663 for (RegInfo &RI : Scope->CHRRegions) { 1664 assert(HasBiasedBranchOrSelect(RI, Scope) && 1665 "Must have biased branch or select"); 1666 } 1667 #endif 1668 } 1669 1670 // Assert that all the condition values of the biased branches and selects have 1671 // been hoisted to the pre-entry block or outside of the scope. 1672 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1673 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1674 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1675 for (RegInfo &RI : Scope->CHRRegions) { 1676 Region *R = RI.R; 1677 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1678 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1679 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1680 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1681 Value *V = BI->getCondition(); 1682 CHR_DEBUG(dbgs() << *V << "\n"); 1683 if (auto *I = dyn_cast<Instruction>(V)) { 1684 (void)(I); // Unused in release build. 1685 assert((I->getParent() == PreEntryBlock || 1686 !Scope->contains(I)) && 1687 "Must have been hoisted to PreEntryBlock or outside the scope"); 1688 } 1689 } 1690 for (SelectInst *SI : RI.Selects) { 1691 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1692 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1693 if (!(IsTrueBiased || IsFalseBiased)) 1694 continue; 1695 Value *V = SI->getCondition(); 1696 CHR_DEBUG(dbgs() << *V << "\n"); 1697 if (auto *I = dyn_cast<Instruction>(V)) { 1698 (void)(I); // Unused in release build. 1699 assert((I->getParent() == PreEntryBlock || 1700 !Scope->contains(I)) && 1701 "Must have been hoisted to PreEntryBlock or outside the scope"); 1702 } 1703 } 1704 } 1705 } 1706 1707 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1708 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1709 1710 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1711 Region *FirstRegion = Scope->RegInfos[0].R; 1712 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1713 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1714 BasicBlock *ExitBlock = LastRegion->getExit(); 1715 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1716 1717 if (ExitBlock) { 1718 // Insert a trivial phi at the exit block (where the CHR hot path and the 1719 // cold path merges) for a value that's defined in the scope but used 1720 // outside it (meaning it's alive at the exit block). We will add the 1721 // incoming values for the CHR cold paths to it below. Without this, we'd 1722 // miss updating phi's for such values unless there happens to already be a 1723 // phi for that value there. 1724 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1725 } 1726 1727 // Split the entry block of the first region. The new block becomes the new 1728 // entry block of the first region. The old entry block becomes the block to 1729 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1730 // through the split, we update the entry of the first region after the split, 1731 // and Region only points to the entry and the exit blocks, rather than 1732 // keeping everything in a list or set, the blocks membership and the 1733 // entry/exit blocks of the region are still valid after the split. 1734 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1735 << " at " << *Scope->BranchInsertPoint << "\n"); 1736 BasicBlock *NewEntryBlock = 1737 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1738 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1739 "NewEntryBlock's only pred must be EntryBlock"); 1740 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1741 BasicBlock *PreEntryBlock = EntryBlock; 1742 1743 ValueToValueMapTy VMap; 1744 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1745 // hot path (originals) and a cold path (clones) and update the PHIs at the 1746 // exit block. 1747 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1748 1749 // Replace the old (placeholder) branch with the new (merged) conditional 1750 // branch. 1751 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1752 NewEntryBlock, VMap); 1753 1754 #ifndef NDEBUG 1755 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1756 #endif 1757 1758 // Hoist the conditional values of the branches/selects. 1759 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); 1760 1761 #ifndef NDEBUG 1762 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1763 #endif 1764 1765 // Create the combined branch condition and constant-fold the branches/selects 1766 // in the hot path. 1767 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1768 ProfileCount.value_or(0)); 1769 } 1770 1771 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1772 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1773 // at the exit block. 1774 void CHR::cloneScopeBlocks(CHRScope *Scope, 1775 BasicBlock *PreEntryBlock, 1776 BasicBlock *ExitBlock, 1777 Region *LastRegion, 1778 ValueToValueMapTy &VMap) { 1779 // Clone all the blocks. The original blocks will be the hot-path 1780 // CHR-optimized code and the cloned blocks will be the original unoptimized 1781 // code. This is so that the block pointers from the 1782 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1783 // which CHR should apply to. 1784 SmallVector<BasicBlock*, 8> NewBlocks; 1785 for (RegInfo &RI : Scope->RegInfos) 1786 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1787 // sub-Scopes. 1788 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1789 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1790 NewBlocks.push_back(NewBB); 1791 VMap[BB] = NewBB; 1792 } 1793 1794 // Place the cloned blocks right after the original blocks (right before the 1795 // exit block of.) 1796 if (ExitBlock) 1797 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1798 F.getBasicBlockList(), 1799 NewBlocks[0]->getIterator(), F.end()); 1800 1801 // Update the cloned blocks/instructions to refer to themselves. 1802 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1803 for (Instruction &I : *NewBlocks[i]) 1804 RemapInstruction(&I, VMap, 1805 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1806 1807 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1808 // the top-level region but we don't need to add PHIs. The trivial PHIs 1809 // inserted above will be updated here. 1810 if (ExitBlock) 1811 for (PHINode &PN : ExitBlock->phis()) 1812 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1813 ++I) { 1814 BasicBlock *Pred = PN.getIncomingBlock(I); 1815 if (LastRegion->contains(Pred)) { 1816 Value *V = PN.getIncomingValue(I); 1817 auto It = VMap.find(V); 1818 if (It != VMap.end()) V = It->second; 1819 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1820 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1821 } 1822 } 1823 } 1824 1825 // A helper for transformScope. Replace the old (placeholder) branch with the 1826 // new (merged) conditional branch. 1827 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1828 BasicBlock *EntryBlock, 1829 BasicBlock *NewEntryBlock, 1830 ValueToValueMapTy &VMap) { 1831 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1832 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1833 "SplitBlock did not work correctly!"); 1834 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1835 "NewEntryBlock's only pred must be EntryBlock"); 1836 assert(VMap.find(NewEntryBlock) != VMap.end() && 1837 "NewEntryBlock must have been copied"); 1838 OldBR->dropAllReferences(); 1839 OldBR->eraseFromParent(); 1840 // The true predicate is a placeholder. It will be replaced later in 1841 // fixupBranchesAndSelects(). 1842 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1843 cast<BasicBlock>(VMap[NewEntryBlock]), 1844 ConstantInt::getTrue(F.getContext())); 1845 PreEntryBlock->getInstList().push_back(NewBR); 1846 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1847 "NewEntryBlock's only pred must be EntryBlock"); 1848 return NewBR; 1849 } 1850 1851 // A helper for transformScopes. Create the combined branch condition and 1852 // constant-fold the branches/selects in the hot path. 1853 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1854 BasicBlock *PreEntryBlock, 1855 BranchInst *MergedBR, 1856 uint64_t ProfileCount) { 1857 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1858 BranchProbability CHRBranchBias(1, 1); 1859 uint64_t NumCHRedBranches = 0; 1860 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1861 for (RegInfo &RI : Scope->CHRRegions) { 1862 Region *R = RI.R; 1863 if (RI.HasBranch) { 1864 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1865 ++NumCHRedBranches; 1866 } 1867 for (SelectInst *SI : RI.Selects) { 1868 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1869 ++NumCHRedBranches; 1870 } 1871 } 1872 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1873 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1874 ORE.emit([&]() { 1875 return OptimizationRemark(DEBUG_TYPE, 1876 "CHR", 1877 // Refer to the hot (original) path 1878 MergedBR->getSuccessor(0)->getTerminator()) 1879 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) 1880 << " branches or selects"; 1881 }); 1882 MergedBR->setCondition(MergedCondition); 1883 uint32_t Weights[] = { 1884 static_cast<uint32_t>(CHRBranchBias.scale(1000)), 1885 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)), 1886 }; 1887 MDBuilder MDB(F.getContext()); 1888 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1889 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1890 << "\n"); 1891 } 1892 1893 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1894 // and constant-fold a branch in the hot path. 1895 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1896 IRBuilder<> &IRB, 1897 Value *&MergedCondition, 1898 BranchProbability &CHRBranchBias) { 1899 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1900 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1901 "Must be truthy or falsy"); 1902 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1903 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1904 "Must be in the bias map"); 1905 BranchProbability Bias = BranchBiasMap[R]; 1906 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1907 // Take the min. 1908 if (CHRBranchBias > Bias) 1909 CHRBranchBias = Bias; 1910 BasicBlock *IfThen = BI->getSuccessor(1); 1911 BasicBlock *IfElse = BI->getSuccessor(0); 1912 BasicBlock *RegionExitBlock = R->getExit(); 1913 assert(RegionExitBlock && "Null ExitBlock"); 1914 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1915 IfThen != IfElse && "Invariant from findScopes"); 1916 if (IfThen == RegionExitBlock) { 1917 // Swap them so that IfThen means going into it and IfElse means skipping 1918 // it. 1919 std::swap(IfThen, IfElse); 1920 } 1921 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1922 << " IfElse " << IfElse->getName() << "\n"); 1923 Value *Cond = BI->getCondition(); 1924 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1925 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1926 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1927 MergedCondition); 1928 // Constant-fold the branch at ClonedEntryBlock. 1929 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1930 "The successor shouldn't change"); 1931 Value *NewCondition = ConditionTrue ? 1932 ConstantInt::getTrue(F.getContext()) : 1933 ConstantInt::getFalse(F.getContext()); 1934 BI->setCondition(NewCondition); 1935 } 1936 1937 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1938 // and constant-fold a select in the hot path. 1939 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1940 IRBuilder<> &IRB, 1941 Value *&MergedCondition, 1942 BranchProbability &CHRBranchBias) { 1943 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1944 assert((IsTrueBiased || 1945 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1946 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1947 "Must be in the bias map"); 1948 BranchProbability Bias = SelectBiasMap[SI]; 1949 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1950 // Take the min. 1951 if (CHRBranchBias > Bias) 1952 CHRBranchBias = Bias; 1953 Value *Cond = SI->getCondition(); 1954 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1955 MergedCondition); 1956 Value *NewCondition = IsTrueBiased ? 1957 ConstantInt::getTrue(F.getContext()) : 1958 ConstantInt::getFalse(F.getContext()); 1959 SI->setCondition(NewCondition); 1960 } 1961 1962 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1963 // condition. 1964 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1965 Instruction *BranchOrSelect, CHRScope *Scope, 1966 IRBuilder<> &IRB, Value *&MergedCondition) { 1967 if (!IsTrueBiased) { 1968 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1969 // branch, negate the icmp predicate and swap the branch targets and avoid 1970 // inserting an Xor to negate Cond. 1971 auto *ICmp = dyn_cast<ICmpInst>(Cond); 1972 if (!ICmp || 1973 !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) 1974 Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond); 1975 } 1976 1977 // Select conditions can be poison, while branching on poison is immediate 1978 // undefined behavior. As such, we need to freeze potentially poisonous 1979 // conditions derived from selects. 1980 if (isa<SelectInst>(BranchOrSelect) && 1981 !isGuaranteedNotToBeUndefOrPoison(Cond)) 1982 Cond = IRB.CreateFreeze(Cond); 1983 1984 // Use logical and to avoid propagating poison from later conditions. 1985 MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond); 1986 } 1987 1988 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1989 unsigned I = 0; 1990 DenseSet<PHINode *> TrivialPHIs; 1991 for (CHRScope *Scope : CHRScopes) { 1992 transformScopes(Scope, TrivialPHIs); 1993 CHR_DEBUG( 1994 std::ostringstream oss; 1995 oss << " after transformScopes " << I++; 1996 dumpIR(F, oss.str().c_str(), nullptr)); 1997 (void)I; 1998 } 1999 } 2000 2001 static void LLVM_ATTRIBUTE_UNUSED 2002 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 2003 dbgs() << Label << " " << Scopes.size() << "\n"; 2004 for (CHRScope *Scope : Scopes) { 2005 dbgs() << *Scope << "\n"; 2006 } 2007 } 2008 2009 bool CHR::run() { 2010 if (!shouldApply(F, PSI)) 2011 return false; 2012 2013 CHR_DEBUG(dumpIR(F, "before", nullptr)); 2014 2015 bool Changed = false; 2016 { 2017 CHR_DEBUG( 2018 dbgs() << "RegionInfo:\n"; 2019 RI.print(dbgs())); 2020 2021 // Recursively traverse the region tree and find regions that have biased 2022 // branches and/or selects and create scopes. 2023 SmallVector<CHRScope *, 8> AllScopes; 2024 findScopes(AllScopes); 2025 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 2026 2027 // Split the scopes if 1) the conditiona values of the biased 2028 // branches/selects of the inner/lower scope can't be hoisted up to the 2029 // outermost/uppermost scope entry, or 2) the condition values of the biased 2030 // branches/selects in a scope (including subscopes) don't share at least 2031 // one common value. 2032 SmallVector<CHRScope *, 8> SplitScopes; 2033 splitScopes(AllScopes, SplitScopes); 2034 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 2035 2036 // After splitting, set the biased regions and selects of a scope (a tree 2037 // root) that include those of the subscopes. 2038 classifyBiasedScopes(SplitScopes); 2039 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 2040 2041 // Filter out the scopes that has only one biased region or select (CHR 2042 // isn't useful in such a case). 2043 SmallVector<CHRScope *, 8> FilteredScopes; 2044 filterScopes(SplitScopes, FilteredScopes); 2045 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 2046 2047 // Set the regions to be CHR'ed and their hoist stops for each scope. 2048 SmallVector<CHRScope *, 8> SetScopes; 2049 setCHRRegions(FilteredScopes, SetScopes); 2050 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 2051 2052 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 2053 // ones. We need to apply CHR from outer to inner so that we apply CHR only 2054 // to the hot path, rather than both hot and cold paths. 2055 SmallVector<CHRScope *, 8> SortedScopes; 2056 sortScopes(SetScopes, SortedScopes); 2057 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 2058 2059 CHR_DEBUG( 2060 dbgs() << "RegionInfo:\n"; 2061 RI.print(dbgs())); 2062 2063 // Apply the CHR transformation. 2064 if (!SortedScopes.empty()) { 2065 transformScopes(SortedScopes); 2066 Changed = true; 2067 } 2068 } 2069 2070 if (Changed) { 2071 CHR_DEBUG(dumpIR(F, "after", &Stats)); 2072 ORE.emit([&]() { 2073 return OptimizationRemark(DEBUG_TYPE, "Stats", &F) 2074 << ore::NV("Function", &F) << " " 2075 << "Reduced the number of branches in hot paths by " 2076 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) 2077 << " (static) and " 2078 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) 2079 << " (weighted by PGO count)"; 2080 }); 2081 } 2082 2083 return Changed; 2084 } 2085 2086 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 2087 BlockFrequencyInfo &BFI = 2088 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 2089 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2090 ProfileSummaryInfo &PSI = 2091 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2092 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 2093 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE = 2094 std::make_unique<OptimizationRemarkEmitter>(&F); 2095 return CHR(F, BFI, DT, PSI, RI, *OwnedORE).run(); 2096 } 2097 2098 namespace llvm { 2099 2100 ControlHeightReductionPass::ControlHeightReductionPass() { 2101 parseCHRFilterFiles(); 2102 } 2103 2104 PreservedAnalyses ControlHeightReductionPass::run( 2105 Function &F, 2106 FunctionAnalysisManager &FAM) { 2107 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 2108 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2109 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2110 auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2111 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2112 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2113 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); 2114 if (!Changed) 2115 return PreservedAnalyses::all(); 2116 return PreservedAnalyses::none(); 2117 } 2118 2119 } // namespace llvm 2120