1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass merges conditional blocks of code and reduces the number of 10 // conditional branches in the hot paths based on profiles. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/GlobalsModRef.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/MDBuilder.h" 30 #include "llvm/InitializePasses.h" 31 #include "llvm/Support/BranchProbability.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/MemoryBuffer.h" 34 #include "llvm/Transforms/Utils.h" 35 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 36 #include "llvm/Transforms/Utils/Cloning.h" 37 #include "llvm/Transforms/Utils/ValueMapper.h" 38 39 #include <set> 40 #include <sstream> 41 42 using namespace llvm; 43 44 #define DEBUG_TYPE "chr" 45 46 #define CHR_DEBUG(X) LLVM_DEBUG(X) 47 48 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 49 cl::desc("Apply CHR for all functions")); 50 51 static cl::opt<double> CHRBiasThreshold( 52 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 53 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 54 55 static cl::opt<unsigned> CHRMergeThreshold( 56 "chr-merge-threshold", cl::init(2), cl::Hidden, 57 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 58 59 static cl::opt<std::string> CHRModuleList( 60 "chr-module-list", cl::init(""), cl::Hidden, 61 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 62 63 static cl::opt<std::string> CHRFunctionList( 64 "chr-function-list", cl::init(""), cl::Hidden, 65 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 66 67 static StringSet<> CHRModules; 68 static StringSet<> CHRFunctions; 69 70 static void parseCHRFilterFiles() { 71 if (!CHRModuleList.empty()) { 72 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 73 if (!FileOrErr) { 74 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 75 std::exit(1); 76 } 77 StringRef Buf = FileOrErr->get()->getBuffer(); 78 SmallVector<StringRef, 0> Lines; 79 Buf.split(Lines, '\n'); 80 for (StringRef Line : Lines) { 81 Line = Line.trim(); 82 if (!Line.empty()) 83 CHRModules.insert(Line); 84 } 85 } 86 if (!CHRFunctionList.empty()) { 87 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 88 if (!FileOrErr) { 89 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 90 std::exit(1); 91 } 92 StringRef Buf = FileOrErr->get()->getBuffer(); 93 SmallVector<StringRef, 0> Lines; 94 Buf.split(Lines, '\n'); 95 for (StringRef Line : Lines) { 96 Line = Line.trim(); 97 if (!Line.empty()) 98 CHRFunctions.insert(Line); 99 } 100 } 101 } 102 103 namespace { 104 class ControlHeightReductionLegacyPass : public FunctionPass { 105 public: 106 static char ID; 107 108 ControlHeightReductionLegacyPass() : FunctionPass(ID) { 109 initializeControlHeightReductionLegacyPassPass( 110 *PassRegistry::getPassRegistry()); 111 parseCHRFilterFiles(); 112 } 113 114 bool runOnFunction(Function &F) override; 115 void getAnalysisUsage(AnalysisUsage &AU) const override { 116 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 117 AU.addRequired<DominatorTreeWrapperPass>(); 118 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 119 AU.addRequired<RegionInfoPass>(); 120 AU.addPreserved<GlobalsAAWrapperPass>(); 121 } 122 }; 123 } // end anonymous namespace 124 125 char ControlHeightReductionLegacyPass::ID = 0; 126 127 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 128 "chr", 129 "Reduce control height in the hot paths", 130 false, false) 131 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 132 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 133 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 134 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 135 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 136 "chr", 137 "Reduce control height in the hot paths", 138 false, false) 139 140 FunctionPass *llvm::createControlHeightReductionLegacyPass() { 141 return new ControlHeightReductionLegacyPass(); 142 } 143 144 namespace { 145 146 struct CHRStats { 147 CHRStats() : NumBranches(0), NumBranchesDelta(0), 148 WeightedNumBranchesDelta(0) {} 149 void print(raw_ostream &OS) const { 150 OS << "CHRStats: NumBranches " << NumBranches 151 << " NumBranchesDelta " << NumBranchesDelta 152 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 153 } 154 uint64_t NumBranches; // The original number of conditional branches / 155 // selects 156 uint64_t NumBranchesDelta; // The decrease of the number of conditional 157 // branches / selects in the hot paths due to CHR. 158 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile 159 // count at the scope entry. 160 }; 161 162 // RegInfo - some properties of a Region. 163 struct RegInfo { 164 RegInfo() : R(nullptr), HasBranch(false) {} 165 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} 166 Region *R; 167 bool HasBranch; 168 SmallVector<SelectInst *, 8> Selects; 169 }; 170 171 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 172 173 // CHRScope - a sequence of regions to CHR together. It corresponds to a 174 // sequence of conditional blocks. It can have subscopes which correspond to 175 // nested conditional blocks. Nested CHRScopes form a tree. 176 class CHRScope { 177 public: 178 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 179 assert(RI.R && "Null RegionIn"); 180 RegInfos.push_back(RI); 181 } 182 183 Region *getParentRegion() { 184 assert(RegInfos.size() > 0 && "Empty CHRScope"); 185 Region *Parent = RegInfos[0].R->getParent(); 186 assert(Parent && "Unexpected to call this on the top-level region"); 187 return Parent; 188 } 189 190 BasicBlock *getEntryBlock() { 191 assert(RegInfos.size() > 0 && "Empty CHRScope"); 192 return RegInfos.front().R->getEntry(); 193 } 194 195 BasicBlock *getExitBlock() { 196 assert(RegInfos.size() > 0 && "Empty CHRScope"); 197 return RegInfos.back().R->getExit(); 198 } 199 200 bool appendable(CHRScope *Next) { 201 // The next scope is appendable only if this scope is directly connected to 202 // it (which implies it post-dominates this scope) and this scope dominates 203 // it (no edge to the next scope outside this scope). 204 BasicBlock *NextEntry = Next->getEntryBlock(); 205 if (getExitBlock() != NextEntry) 206 // Not directly connected. 207 return false; 208 Region *LastRegion = RegInfos.back().R; 209 for (BasicBlock *Pred : predecessors(NextEntry)) 210 if (!LastRegion->contains(Pred)) 211 // There's an edge going into the entry of the next scope from outside 212 // of this scope. 213 return false; 214 return true; 215 } 216 217 void append(CHRScope *Next) { 218 assert(RegInfos.size() > 0 && "Empty CHRScope"); 219 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 220 assert(getParentRegion() == Next->getParentRegion() && 221 "Must be siblings"); 222 assert(getExitBlock() == Next->getEntryBlock() && 223 "Must be adjacent"); 224 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end()); 225 Subs.append(Next->Subs.begin(), Next->Subs.end()); 226 } 227 228 void addSub(CHRScope *SubIn) { 229 #ifndef NDEBUG 230 bool IsChild = false; 231 for (RegInfo &RI : RegInfos) 232 if (RI.R == SubIn->getParentRegion()) { 233 IsChild = true; 234 break; 235 } 236 assert(IsChild && "Must be a child"); 237 #endif 238 Subs.push_back(SubIn); 239 } 240 241 // Split this scope at the boundary region into two, which will belong to the 242 // tail and returns the tail. 243 CHRScope *split(Region *Boundary) { 244 assert(Boundary && "Boundary null"); 245 assert(RegInfos.begin()->R != Boundary && 246 "Can't be split at beginning"); 247 auto BoundaryIt = llvm::find_if( 248 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; }); 249 if (BoundaryIt == RegInfos.end()) 250 return nullptr; 251 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end()); 252 DenseSet<Region *> TailRegionSet; 253 for (const RegInfo &RI : TailRegInfos) 254 TailRegionSet.insert(RI.R); 255 256 auto TailIt = 257 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) { 258 assert(Sub && "null Sub"); 259 Region *Parent = Sub->getParentRegion(); 260 if (TailRegionSet.count(Parent)) 261 return false; 262 263 assert(llvm::any_of( 264 RegInfos, 265 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) && 266 "Must be in head"); 267 return true; 268 }); 269 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end()); 270 271 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 272 auto *Scope = new CHRScope(TailRegInfos, TailSubs); 273 RegInfos.erase(BoundaryIt, RegInfos.end()); 274 Subs.erase(TailIt, Subs.end()); 275 return Scope; 276 } 277 278 bool contains(Instruction *I) const { 279 BasicBlock *Parent = I->getParent(); 280 for (const RegInfo &RI : RegInfos) 281 if (RI.R->contains(Parent)) 282 return true; 283 return false; 284 } 285 286 void print(raw_ostream &OS) const; 287 288 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 289 SmallVector<CHRScope *, 8> Subs; // Subscopes. 290 291 // The instruction at which to insert the CHR conditional branch (and hoist 292 // the dependent condition values). 293 Instruction *BranchInsertPoint; 294 295 // True-biased and false-biased regions (conditional blocks), 296 // respectively. Used only for the outermost scope and includes regions in 297 // subscopes. The rest are unbiased. 298 DenseSet<Region *> TrueBiasedRegions; 299 DenseSet<Region *> FalseBiasedRegions; 300 // Among the biased regions, the regions that get CHRed. 301 SmallVector<RegInfo, 8> CHRRegions; 302 303 // True-biased and false-biased selects, respectively. Used only for the 304 // outermost scope and includes ones in subscopes. 305 DenseSet<SelectInst *> TrueBiasedSelects; 306 DenseSet<SelectInst *> FalseBiasedSelects; 307 308 // Map from one of the above regions to the instructions to stop 309 // hoisting instructions at through use-def chains. 310 HoistStopMapTy HoistStopMap; 311 312 private: 313 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn) 314 : RegInfos(RegInfosIn.begin(), RegInfosIn.end()), 315 Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {} 316 }; 317 318 class CHR { 319 public: 320 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 321 ProfileSummaryInfo &PSIin, RegionInfo &RIin, 322 OptimizationRemarkEmitter &OREin) 323 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} 324 325 ~CHR() { 326 for (CHRScope *Scope : Scopes) { 327 delete Scope; 328 } 329 } 330 331 bool run(); 332 333 private: 334 // See the comments in CHR::run() for the high level flow of the algorithm and 335 // what the following functions do. 336 337 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 338 Region *R = RI.getTopLevelRegion(); 339 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) { 340 Output.push_back(Scope); 341 } 342 } 343 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 344 SmallVectorImpl<CHRScope *> &Scopes); 345 CHRScope *findScope(Region *R); 346 void checkScopeHoistable(CHRScope *Scope); 347 348 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 349 SmallVectorImpl<CHRScope *> &Output); 350 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 351 CHRScope *Outer, 352 DenseSet<Value *> *OuterConditionValues, 353 Instruction *OuterInsertPoint, 354 SmallVectorImpl<CHRScope *> &Output, 355 DenseSet<Instruction *> &Unhoistables); 356 357 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 358 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 359 360 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 361 SmallVectorImpl<CHRScope *> &Output); 362 363 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 364 SmallVectorImpl<CHRScope *> &Output); 365 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 366 367 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 368 SmallVectorImpl<CHRScope *> &Output); 369 370 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 371 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 372 void cloneScopeBlocks(CHRScope *Scope, 373 BasicBlock *PreEntryBlock, 374 BasicBlock *ExitBlock, 375 Region *LastRegion, 376 ValueToValueMapTy &VMap); 377 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 378 BasicBlock *EntryBlock, 379 BasicBlock *NewEntryBlock, 380 ValueToValueMapTy &VMap); 381 void fixupBranchesAndSelects(CHRScope *Scope, 382 BasicBlock *PreEntryBlock, 383 BranchInst *MergedBR, 384 uint64_t ProfileCount); 385 void fixupBranch(Region *R, 386 CHRScope *Scope, 387 IRBuilder<> &IRB, 388 Value *&MergedCondition, BranchProbability &CHRBranchBias); 389 void fixupSelect(SelectInst* SI, 390 CHRScope *Scope, 391 IRBuilder<> &IRB, 392 Value *&MergedCondition, BranchProbability &CHRBranchBias); 393 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 394 Instruction *BranchOrSelect, 395 CHRScope *Scope, 396 IRBuilder<> &IRB, 397 Value *&MergedCondition); 398 399 Function &F; 400 BlockFrequencyInfo &BFI; 401 DominatorTree &DT; 402 ProfileSummaryInfo &PSI; 403 RegionInfo &RI; 404 OptimizationRemarkEmitter &ORE; 405 CHRStats Stats; 406 407 // All the true-biased regions in the function 408 DenseSet<Region *> TrueBiasedRegionsGlobal; 409 // All the false-biased regions in the function 410 DenseSet<Region *> FalseBiasedRegionsGlobal; 411 // All the true-biased selects in the function 412 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 413 // All the false-biased selects in the function 414 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 415 // A map from biased regions to their branch bias 416 DenseMap<Region *, BranchProbability> BranchBiasMap; 417 // A map from biased selects to their branch bias 418 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 419 // All the scopes. 420 DenseSet<CHRScope *> Scopes; 421 }; 422 423 } // end anonymous namespace 424 425 static inline 426 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 427 const CHRStats &Stats) { 428 Stats.print(OS); 429 return OS; 430 } 431 432 static inline 433 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 434 Scope.print(OS); 435 return OS; 436 } 437 438 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 439 if (ForceCHR) 440 return true; 441 442 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 443 if (CHRModules.count(F.getParent()->getName())) 444 return true; 445 return CHRFunctions.count(F.getName()); 446 } 447 448 assert(PSI.hasProfileSummary() && "Empty PSI?"); 449 return PSI.isFunctionEntryHot(&F); 450 } 451 452 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 453 CHRStats *Stats) { 454 StringRef FuncName = F.getName(); 455 StringRef ModuleName = F.getParent()->getName(); 456 (void)(FuncName); // Unused in release build. 457 (void)(ModuleName); // Unused in release build. 458 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 459 << FuncName); 460 if (Stats) 461 CHR_DEBUG(dbgs() << " " << *Stats); 462 CHR_DEBUG(dbgs() << "\n"); 463 CHR_DEBUG(F.dump()); 464 } 465 466 void CHRScope::print(raw_ostream &OS) const { 467 assert(RegInfos.size() > 0 && "Empty CHRScope"); 468 OS << "CHRScope["; 469 OS << RegInfos.size() << ", Regions["; 470 for (const RegInfo &RI : RegInfos) { 471 OS << RI.R->getNameStr(); 472 if (RI.HasBranch) 473 OS << " B"; 474 if (RI.Selects.size() > 0) 475 OS << " S" << RI.Selects.size(); 476 OS << ", "; 477 } 478 if (RegInfos[0].R->getParent()) { 479 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 480 } else { 481 // top level region 482 OS << "]"; 483 } 484 OS << ", Subs["; 485 for (CHRScope *Sub : Subs) { 486 OS << *Sub << ", "; 487 } 488 OS << "]]"; 489 } 490 491 // Return true if the given instruction type can be hoisted by CHR. 492 static bool isHoistableInstructionType(Instruction *I) { 493 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 494 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 495 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 496 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 497 isa<InsertValueInst>(I); 498 } 499 500 // Return true if the given instruction can be hoisted by CHR. 501 static bool isHoistable(Instruction *I, DominatorTree &DT) { 502 if (!isHoistableInstructionType(I)) 503 return false; 504 return isSafeToSpeculativelyExecute(I, nullptr, &DT); 505 } 506 507 // Recursively traverse the use-def chains of the given value and return a set 508 // of the unhoistable base values defined within the scope (excluding the 509 // first-region entry block) or the (hoistable or unhoistable) base values that 510 // are defined outside (including the first-region entry block) of the 511 // scope. The returned set doesn't include constants. 512 static const std::set<Value *> & 513 getBaseValues(Value *V, DominatorTree &DT, 514 DenseMap<Value *, std::set<Value *>> &Visited) { 515 auto It = Visited.find(V); 516 if (It != Visited.end()) { 517 return It->second; 518 } 519 std::set<Value *> Result; 520 if (auto *I = dyn_cast<Instruction>(V)) { 521 // We don't stop at a block that's not in the Scope because we would miss 522 // some instructions that are based on the same base values if we stop 523 // there. 524 if (!isHoistable(I, DT)) { 525 Result.insert(I); 526 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 527 } 528 // I is hoistable above the Scope. 529 for (Value *Op : I->operands()) { 530 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited); 531 Result.insert(OpResult.begin(), OpResult.end()); 532 } 533 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 534 } 535 if (isa<Argument>(V)) { 536 Result.insert(V); 537 } 538 // We don't include others like constants because those won't lead to any 539 // chance of folding of conditions (eg two bit checks merged into one check) 540 // after CHR. 541 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 542 } 543 544 // Return true if V is already hoisted or can be hoisted (along with its 545 // operands) above the insert point. When it returns true and HoistStops is 546 // non-null, the instructions to stop hoisting at through the use-def chains are 547 // inserted into HoistStops. 548 static bool 549 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 550 DenseSet<Instruction *> &Unhoistables, 551 DenseSet<Instruction *> *HoistStops, 552 DenseMap<Instruction *, bool> &Visited) { 553 assert(InsertPoint && "Null InsertPoint"); 554 if (auto *I = dyn_cast<Instruction>(V)) { 555 auto It = Visited.find(I); 556 if (It != Visited.end()) { 557 return It->second; 558 } 559 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 560 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 561 if (Unhoistables.count(I)) { 562 // Don't hoist if they are not to be hoisted. 563 Visited[I] = false; 564 return false; 565 } 566 if (DT.dominates(I, InsertPoint)) { 567 // We are already above the insert point. Stop here. 568 if (HoistStops) 569 HoistStops->insert(I); 570 Visited[I] = true; 571 return true; 572 } 573 // We aren't not above the insert point, check if we can hoist it above the 574 // insert point. 575 if (isHoistable(I, DT)) { 576 // Check operands first. 577 DenseSet<Instruction *> OpsHoistStops; 578 bool AllOpsHoisted = true; 579 for (Value *Op : I->operands()) { 580 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, 581 Visited)) { 582 AllOpsHoisted = false; 583 break; 584 } 585 } 586 if (AllOpsHoisted) { 587 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 588 if (HoistStops) 589 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 590 Visited[I] = true; 591 return true; 592 } 593 } 594 Visited[I] = false; 595 return false; 596 } 597 // Non-instructions are considered hoistable. 598 return true; 599 } 600 601 // Returns true and sets the true probability and false probability of an 602 // MD_prof metadata if it's well-formed. 603 static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb, 604 BranchProbability &FalseProb) { 605 if (!MD) return false; 606 MDString *MDName = cast<MDString>(MD->getOperand(0)); 607 if (MDName->getString() != "branch_weights" || 608 MD->getNumOperands() != 3) 609 return false; 610 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 611 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 612 if (!TrueWeight || !FalseWeight) 613 return false; 614 uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 615 uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 616 uint64_t SumWt = TrueWt + FalseWt; 617 618 assert(SumWt >= TrueWt && SumWt >= FalseWt && 619 "Overflow calculating branch probabilities."); 620 621 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash. 622 if (SumWt == 0) 623 return false; 624 625 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 626 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 627 return true; 628 } 629 630 static BranchProbability getCHRBiasThreshold() { 631 return BranchProbability::getBranchProbability( 632 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 633 } 634 635 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 636 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 637 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 638 // false. 639 template <typename K, typename S, typename M> 640 static bool checkBias(K *Key, BranchProbability TrueProb, 641 BranchProbability FalseProb, S &TrueSet, S &FalseSet, 642 M &BiasMap) { 643 BranchProbability Threshold = getCHRBiasThreshold(); 644 if (TrueProb >= Threshold) { 645 TrueSet.insert(Key); 646 BiasMap[Key] = TrueProb; 647 return true; 648 } else if (FalseProb >= Threshold) { 649 FalseSet.insert(Key); 650 BiasMap[Key] = FalseProb; 651 return true; 652 } 653 return false; 654 } 655 656 // Returns true and insert a region into the right biased set and the map if the 657 // branch of the region is biased. 658 static bool checkBiasedBranch(BranchInst *BI, Region *R, 659 DenseSet<Region *> &TrueBiasedRegionsGlobal, 660 DenseSet<Region *> &FalseBiasedRegionsGlobal, 661 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 662 if (!BI->isConditional()) 663 return false; 664 BranchProbability ThenProb, ElseProb; 665 if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof), 666 ThenProb, ElseProb)) 667 return false; 668 BasicBlock *IfThen = BI->getSuccessor(0); 669 BasicBlock *IfElse = BI->getSuccessor(1); 670 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 671 IfThen != IfElse && 672 "Invariant from findScopes"); 673 if (IfThen == R->getExit()) { 674 // Swap them so that IfThen/ThenProb means going into the conditional code 675 // and IfElse/ElseProb means skipping it. 676 std::swap(IfThen, IfElse); 677 std::swap(ThenProb, ElseProb); 678 } 679 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 680 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 681 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 682 return checkBias(R, ThenProb, ElseProb, 683 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 684 BranchBiasMap); 685 } 686 687 // Returns true and insert a select into the right biased set and the map if the 688 // select is biased. 689 static bool checkBiasedSelect( 690 SelectInst *SI, Region *R, 691 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 692 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 693 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 694 BranchProbability TrueProb, FalseProb; 695 if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof), 696 TrueProb, FalseProb)) 697 return false; 698 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 699 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 700 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 701 return checkBias(SI, TrueProb, FalseProb, 702 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 703 SelectBiasMap); 704 } 705 706 // Returns the instruction at which to hoist the dependent condition values and 707 // insert the CHR branch for a region. This is the terminator branch in the 708 // entry block or the first select in the entry block, if any. 709 static Instruction* getBranchInsertPoint(RegInfo &RI) { 710 Region *R = RI.R; 711 BasicBlock *EntryBB = R->getEntry(); 712 // The hoist point is by default the terminator of the entry block, which is 713 // the same as the branch instruction if RI.HasBranch is true. 714 Instruction *HoistPoint = EntryBB->getTerminator(); 715 for (SelectInst *SI : RI.Selects) { 716 if (SI->getParent() == EntryBB) { 717 // Pick the first select in Selects in the entry block. Note Selects is 718 // sorted in the instruction order within a block (asserted below). 719 HoistPoint = SI; 720 break; 721 } 722 } 723 assert(HoistPoint && "Null HoistPoint"); 724 #ifndef NDEBUG 725 // Check that HoistPoint is the first one in Selects in the entry block, 726 // if any. 727 DenseSet<Instruction *> EntryBlockSelectSet; 728 for (SelectInst *SI : RI.Selects) { 729 if (SI->getParent() == EntryBB) { 730 EntryBlockSelectSet.insert(SI); 731 } 732 } 733 for (Instruction &I : *EntryBB) { 734 if (EntryBlockSelectSet.contains(&I)) { 735 assert(&I == HoistPoint && 736 "HoistPoint must be the first one in Selects"); 737 break; 738 } 739 } 740 #endif 741 return HoistPoint; 742 } 743 744 // Find a CHR scope in the given region. 745 CHRScope * CHR::findScope(Region *R) { 746 CHRScope *Result = nullptr; 747 BasicBlock *Entry = R->getEntry(); 748 BasicBlock *Exit = R->getExit(); // null if top level. 749 assert(Entry && "Entry must not be null"); 750 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 751 "Only top level region has a null exit"); 752 if (Entry) 753 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 754 else 755 CHR_DEBUG(dbgs() << "Entry null\n"); 756 if (Exit) 757 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 758 else 759 CHR_DEBUG(dbgs() << "Exit null\n"); 760 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 761 // to this region). 762 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 763 if (EntryInSubregion) 764 return nullptr; 765 // Exclude loops 766 for (BasicBlock *Pred : predecessors(Entry)) 767 if (R->contains(Pred)) 768 return nullptr; 769 if (Exit) { 770 // Try to find an if-then block (check if R is an if-then). 771 // if (cond) { 772 // ... 773 // } 774 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 775 if (BI) 776 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 777 else 778 CHR_DEBUG(dbgs() << "BI null\n"); 779 if (BI && BI->isConditional()) { 780 BasicBlock *S0 = BI->getSuccessor(0); 781 BasicBlock *S1 = BI->getSuccessor(1); 782 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 783 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 784 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 785 RegInfo RI(R); 786 RI.HasBranch = checkBiasedBranch( 787 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 788 BranchBiasMap); 789 Result = new CHRScope(RI); 790 Scopes.insert(Result); 791 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 792 ++Stats.NumBranches; 793 if (!RI.HasBranch) { 794 ORE.emit([&]() { 795 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) 796 << "Branch not biased"; 797 }); 798 } 799 } 800 } 801 } 802 { 803 // Try to look for selects in the direct child blocks (as opposed to in 804 // subregions) of R. 805 // ... 806 // if (..) { // Some subregion 807 // ... 808 // } 809 // if (..) { // Some subregion 810 // ... 811 // } 812 // ... 813 // a = cond ? b : c; 814 // ... 815 SmallVector<SelectInst *, 8> Selects; 816 for (RegionNode *E : R->elements()) { 817 if (E->isSubRegion()) 818 continue; 819 // This returns the basic block of E if E is a direct child of R (not a 820 // subregion.) 821 BasicBlock *BB = E->getEntry(); 822 // Need to push in the order to make it easier to find the first Select 823 // later. 824 for (Instruction &I : *BB) { 825 if (auto *SI = dyn_cast<SelectInst>(&I)) { 826 Selects.push_back(SI); 827 ++Stats.NumBranches; 828 } 829 } 830 } 831 if (Selects.size() > 0) { 832 auto AddSelects = [&](RegInfo &RI) { 833 for (auto *SI : Selects) 834 if (checkBiasedSelect(SI, RI.R, 835 TrueBiasedSelectsGlobal, 836 FalseBiasedSelectsGlobal, 837 SelectBiasMap)) 838 RI.Selects.push_back(SI); 839 else 840 ORE.emit([&]() { 841 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) 842 << "Select not biased"; 843 }); 844 }; 845 if (!Result) { 846 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 847 RegInfo RI(R); 848 AddSelects(RI); 849 Result = new CHRScope(RI); 850 Scopes.insert(Result); 851 } else { 852 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 853 AddSelects(Result->RegInfos[0]); 854 } 855 } 856 } 857 858 if (Result) { 859 checkScopeHoistable(Result); 860 } 861 return Result; 862 } 863 864 // Check that any of the branch and the selects in the region could be 865 // hoisted above the the CHR branch insert point (the most dominating of 866 // them, either the branch (at the end of the first block) or the first 867 // select in the first block). If the branch can't be hoisted, drop the 868 // selects in the first blocks. 869 // 870 // For example, for the following scope/region with selects, we want to insert 871 // the merged branch right before the first select in the first/entry block by 872 // hoisting c1, c2, c3, and c4. 873 // 874 // // Branch insert point here. 875 // a = c1 ? b : c; // Select 1 876 // d = c2 ? e : f; // Select 2 877 // if (c3) { // Branch 878 // ... 879 // c4 = foo() // A call. 880 // g = c4 ? h : i; // Select 3 881 // } 882 // 883 // But suppose we can't hoist c4 because it's dependent on the preceding 884 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 885 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 886 void CHR::checkScopeHoistable(CHRScope *Scope) { 887 RegInfo &RI = Scope->RegInfos[0]; 888 Region *R = RI.R; 889 BasicBlock *EntryBB = R->getEntry(); 890 auto *Branch = RI.HasBranch ? 891 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 892 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 893 if (RI.HasBranch || !Selects.empty()) { 894 Instruction *InsertPoint = getBranchInsertPoint(RI); 895 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 896 // Avoid a data dependence from a select or a branch to a(nother) 897 // select. Note no instruction can't data-depend on a branch (a branch 898 // instruction doesn't produce a value). 899 DenseSet<Instruction *> Unhoistables; 900 // Initialize Unhoistables with the selects. 901 for (SelectInst *SI : Selects) { 902 Unhoistables.insert(SI); 903 } 904 // Remove Selects that can't be hoisted. 905 for (auto it = Selects.begin(); it != Selects.end(); ) { 906 SelectInst *SI = *it; 907 if (SI == InsertPoint) { 908 ++it; 909 continue; 910 } 911 DenseMap<Instruction *, bool> Visited; 912 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 913 DT, Unhoistables, nullptr, Visited); 914 if (!IsHoistable) { 915 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 916 ORE.emit([&]() { 917 return OptimizationRemarkMissed(DEBUG_TYPE, 918 "DropUnhoistableSelect", SI) 919 << "Dropped unhoistable select"; 920 }); 921 it = Selects.erase(it); 922 // Since we are dropping the select here, we also drop it from 923 // Unhoistables. 924 Unhoistables.erase(SI); 925 } else 926 ++it; 927 } 928 // Update InsertPoint after potentially removing selects. 929 InsertPoint = getBranchInsertPoint(RI); 930 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 931 if (RI.HasBranch && InsertPoint != Branch) { 932 DenseMap<Instruction *, bool> Visited; 933 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 934 DT, Unhoistables, nullptr, Visited); 935 if (!IsHoistable) { 936 // If the branch isn't hoistable, drop the selects in the entry 937 // block, preferring the branch, which makes the branch the hoist 938 // point. 939 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 940 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 941 CHR_DEBUG( 942 for (SelectInst *SI : Selects) { 943 dbgs() << "SI " << *SI << "\n"; 944 }); 945 for (SelectInst *SI : Selects) { 946 ORE.emit([&]() { 947 return OptimizationRemarkMissed(DEBUG_TYPE, 948 "DropSelectUnhoistableBranch", SI) 949 << "Dropped select due to unhoistable branch"; 950 }); 951 } 952 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) { 953 return SI->getParent() == EntryBB; 954 }); 955 Unhoistables.clear(); 956 InsertPoint = Branch; 957 } 958 } 959 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 960 #ifndef NDEBUG 961 if (RI.HasBranch) { 962 assert(!DT.dominates(Branch, InsertPoint) && 963 "Branch can't be already above the hoist point"); 964 DenseMap<Instruction *, bool> Visited; 965 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 966 DT, Unhoistables, nullptr, Visited) && 967 "checkHoistValue for branch"); 968 } 969 for (auto *SI : Selects) { 970 assert(!DT.dominates(SI, InsertPoint) && 971 "SI can't be already above the hoist point"); 972 DenseMap<Instruction *, bool> Visited; 973 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 974 Unhoistables, nullptr, Visited) && 975 "checkHoistValue for selects"); 976 } 977 CHR_DEBUG(dbgs() << "Result\n"); 978 if (RI.HasBranch) { 979 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 980 } 981 for (auto *SI : Selects) { 982 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 983 } 984 #endif 985 } 986 } 987 988 // Traverse the region tree, find all nested scopes and merge them if possible. 989 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 990 SmallVectorImpl<CHRScope *> &Scopes) { 991 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 992 CHRScope *Result = findScope(R); 993 // Visit subscopes. 994 CHRScope *ConsecutiveSubscope = nullptr; 995 SmallVector<CHRScope *, 8> Subscopes; 996 for (auto It = R->begin(); It != R->end(); ++It) { 997 const std::unique_ptr<Region> &SubR = *It; 998 auto NextIt = std::next(It); 999 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 1000 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 1001 << "\n"); 1002 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 1003 if (SubCHRScope) { 1004 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 1005 } else { 1006 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 1007 } 1008 if (SubCHRScope) { 1009 if (!ConsecutiveSubscope) 1010 ConsecutiveSubscope = SubCHRScope; 1011 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 1012 Subscopes.push_back(ConsecutiveSubscope); 1013 ConsecutiveSubscope = SubCHRScope; 1014 } else 1015 ConsecutiveSubscope->append(SubCHRScope); 1016 } else { 1017 if (ConsecutiveSubscope) { 1018 Subscopes.push_back(ConsecutiveSubscope); 1019 } 1020 ConsecutiveSubscope = nullptr; 1021 } 1022 } 1023 if (ConsecutiveSubscope) { 1024 Subscopes.push_back(ConsecutiveSubscope); 1025 } 1026 for (CHRScope *Sub : Subscopes) { 1027 if (Result) { 1028 // Combine it with the parent. 1029 Result->addSub(Sub); 1030 } else { 1031 // Push Subscopes as they won't be combined with the parent. 1032 Scopes.push_back(Sub); 1033 } 1034 } 1035 return Result; 1036 } 1037 1038 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1039 DenseSet<Value *> ConditionValues; 1040 if (RI.HasBranch) { 1041 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1042 ConditionValues.insert(BI->getCondition()); 1043 } 1044 for (SelectInst *SI : RI.Selects) { 1045 ConditionValues.insert(SI->getCondition()); 1046 } 1047 return ConditionValues; 1048 } 1049 1050 1051 // Determine whether to split a scope depending on the sets of the branch 1052 // condition values of the previous region and the current region. We split 1053 // (return true) it if 1) the condition values of the inner/lower scope can't be 1054 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1055 // values have an empty intersection (because the combined branch conditions 1056 // won't probably lead to a simpler combined condition). 1057 static bool shouldSplit(Instruction *InsertPoint, 1058 DenseSet<Value *> &PrevConditionValues, 1059 DenseSet<Value *> &ConditionValues, 1060 DominatorTree &DT, 1061 DenseSet<Instruction *> &Unhoistables) { 1062 assert(InsertPoint && "Null InsertPoint"); 1063 CHR_DEBUG( 1064 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1065 for (Value *V : PrevConditionValues) { 1066 dbgs() << *V << ", "; 1067 } 1068 dbgs() << " ConditionValues "; 1069 for (Value *V : ConditionValues) { 1070 dbgs() << *V << ", "; 1071 } 1072 dbgs() << "\n"); 1073 // If any of Bases isn't hoistable to the hoist point, split. 1074 for (Value *V : ConditionValues) { 1075 DenseMap<Instruction *, bool> Visited; 1076 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { 1077 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1078 return true; // Not hoistable, split. 1079 } 1080 } 1081 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1082 // unnecessary splits at scopes with no branch/selects. If 1083 // PrevConditionValues and ConditionValues don't intersect at all, split. 1084 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1085 // Use std::set as DenseSet doesn't work with set_intersection. 1086 std::set<Value *> PrevBases, Bases; 1087 DenseMap<Value *, std::set<Value *>> Visited; 1088 for (Value *V : PrevConditionValues) { 1089 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1090 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1091 } 1092 for (Value *V : ConditionValues) { 1093 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1094 Bases.insert(BaseValues.begin(), BaseValues.end()); 1095 } 1096 CHR_DEBUG( 1097 dbgs() << "PrevBases "; 1098 for (Value *V : PrevBases) { 1099 dbgs() << *V << ", "; 1100 } 1101 dbgs() << " Bases "; 1102 for (Value *V : Bases) { 1103 dbgs() << *V << ", "; 1104 } 1105 dbgs() << "\n"); 1106 std::vector<Value *> Intersection; 1107 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(), 1108 Bases.end(), std::back_inserter(Intersection)); 1109 if (Intersection.empty()) { 1110 // Empty intersection, split. 1111 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1112 return true; 1113 } 1114 } 1115 CHR_DEBUG(dbgs() << "No split\n"); 1116 return false; // Don't split. 1117 } 1118 1119 static void getSelectsInScope(CHRScope *Scope, 1120 DenseSet<Instruction *> &Output) { 1121 for (RegInfo &RI : Scope->RegInfos) 1122 for (SelectInst *SI : RI.Selects) 1123 Output.insert(SI); 1124 for (CHRScope *Sub : Scope->Subs) 1125 getSelectsInScope(Sub, Output); 1126 } 1127 1128 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1129 SmallVectorImpl<CHRScope *> &Output) { 1130 for (CHRScope *Scope : Input) { 1131 assert(!Scope->BranchInsertPoint && 1132 "BranchInsertPoint must not be set"); 1133 DenseSet<Instruction *> Unhoistables; 1134 getSelectsInScope(Scope, Unhoistables); 1135 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1136 } 1137 #ifndef NDEBUG 1138 for (CHRScope *Scope : Output) { 1139 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1140 } 1141 #endif 1142 } 1143 1144 SmallVector<CHRScope *, 8> CHR::splitScope( 1145 CHRScope *Scope, 1146 CHRScope *Outer, 1147 DenseSet<Value *> *OuterConditionValues, 1148 Instruction *OuterInsertPoint, 1149 SmallVectorImpl<CHRScope *> &Output, 1150 DenseSet<Instruction *> &Unhoistables) { 1151 if (Outer) { 1152 assert(OuterConditionValues && "Null OuterConditionValues"); 1153 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1154 } 1155 bool PrevSplitFromOuter = true; 1156 DenseSet<Value *> PrevConditionValues; 1157 Instruction *PrevInsertPoint = nullptr; 1158 SmallVector<CHRScope *, 8> Splits; 1159 SmallVector<bool, 8> SplitsSplitFromOuter; 1160 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1161 SmallVector<Instruction *, 8> SplitsInsertPoints; 1162 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1163 for (RegInfo &RI : RegInfos) { 1164 Instruction *InsertPoint = getBranchInsertPoint(RI); 1165 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1166 CHR_DEBUG( 1167 dbgs() << "ConditionValues "; 1168 for (Value *V : ConditionValues) { 1169 dbgs() << *V << ", "; 1170 } 1171 dbgs() << "\n"); 1172 if (RI.R == RegInfos[0].R) { 1173 // First iteration. Check to see if we should split from the outer. 1174 if (Outer) { 1175 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1176 CHR_DEBUG(dbgs() << "Should split from outer at " 1177 << RI.R->getNameStr() << "\n"); 1178 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1179 ConditionValues, DT, Unhoistables)) { 1180 PrevConditionValues = ConditionValues; 1181 PrevInsertPoint = InsertPoint; 1182 ORE.emit([&]() { 1183 return OptimizationRemarkMissed(DEBUG_TYPE, 1184 "SplitScopeFromOuter", 1185 RI.R->getEntry()->getTerminator()) 1186 << "Split scope from outer due to unhoistable branch/select " 1187 << "and/or lack of common condition values"; 1188 }); 1189 } else { 1190 // Not splitting from the outer. Use the outer bases and insert 1191 // point. Union the bases. 1192 PrevSplitFromOuter = false; 1193 PrevConditionValues = *OuterConditionValues; 1194 PrevConditionValues.insert(ConditionValues.begin(), 1195 ConditionValues.end()); 1196 PrevInsertPoint = OuterInsertPoint; 1197 } 1198 } else { 1199 CHR_DEBUG(dbgs() << "Outer null\n"); 1200 PrevConditionValues = ConditionValues; 1201 PrevInsertPoint = InsertPoint; 1202 } 1203 } else { 1204 CHR_DEBUG(dbgs() << "Should split from prev at " 1205 << RI.R->getNameStr() << "\n"); 1206 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1207 DT, Unhoistables)) { 1208 CHRScope *Tail = Scope->split(RI.R); 1209 Scopes.insert(Tail); 1210 Splits.push_back(Scope); 1211 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1212 SplitsConditionValues.push_back(PrevConditionValues); 1213 SplitsInsertPoints.push_back(PrevInsertPoint); 1214 Scope = Tail; 1215 PrevConditionValues = ConditionValues; 1216 PrevInsertPoint = InsertPoint; 1217 PrevSplitFromOuter = true; 1218 ORE.emit([&]() { 1219 return OptimizationRemarkMissed(DEBUG_TYPE, 1220 "SplitScopeFromPrev", 1221 RI.R->getEntry()->getTerminator()) 1222 << "Split scope from previous due to unhoistable branch/select " 1223 << "and/or lack of common condition values"; 1224 }); 1225 } else { 1226 // Not splitting. Union the bases. Keep the hoist point. 1227 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1228 } 1229 } 1230 } 1231 Splits.push_back(Scope); 1232 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1233 SplitsConditionValues.push_back(PrevConditionValues); 1234 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1235 SplitsInsertPoints.push_back(PrevInsertPoint); 1236 assert(Splits.size() == SplitsConditionValues.size() && 1237 Splits.size() == SplitsSplitFromOuter.size() && 1238 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1239 for (size_t I = 0; I < Splits.size(); ++I) { 1240 CHRScope *Split = Splits[I]; 1241 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1242 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1243 SmallVector<CHRScope *, 8> NewSubs; 1244 DenseSet<Instruction *> SplitUnhoistables; 1245 getSelectsInScope(Split, SplitUnhoistables); 1246 for (CHRScope *Sub : Split->Subs) { 1247 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1248 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1249 SplitUnhoistables); 1250 llvm::append_range(NewSubs, SubSplits); 1251 } 1252 Split->Subs = NewSubs; 1253 } 1254 SmallVector<CHRScope *, 8> Result; 1255 for (size_t I = 0; I < Splits.size(); ++I) { 1256 CHRScope *Split = Splits[I]; 1257 if (SplitsSplitFromOuter[I]) { 1258 // Split from the outer. 1259 Output.push_back(Split); 1260 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1261 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1262 << "\n"); 1263 } else { 1264 // Connected to the outer. 1265 Result.push_back(Split); 1266 } 1267 } 1268 if (!Outer) 1269 assert(Result.empty() && 1270 "If no outer (top-level), must return no nested ones"); 1271 return Result; 1272 } 1273 1274 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1275 for (CHRScope *Scope : Scopes) { 1276 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1277 classifyBiasedScopes(Scope, Scope); 1278 CHR_DEBUG( 1279 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1280 dbgs() << "TrueBiasedRegions "; 1281 for (Region *R : Scope->TrueBiasedRegions) { 1282 dbgs() << R->getNameStr() << ", "; 1283 } 1284 dbgs() << "\n"; 1285 dbgs() << "FalseBiasedRegions "; 1286 for (Region *R : Scope->FalseBiasedRegions) { 1287 dbgs() << R->getNameStr() << ", "; 1288 } 1289 dbgs() << "\n"; 1290 dbgs() << "TrueBiasedSelects "; 1291 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1292 dbgs() << *SI << ", "; 1293 } 1294 dbgs() << "\n"; 1295 dbgs() << "FalseBiasedSelects "; 1296 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1297 dbgs() << *SI << ", "; 1298 } 1299 dbgs() << "\n";); 1300 } 1301 } 1302 1303 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1304 for (RegInfo &RI : Scope->RegInfos) { 1305 if (RI.HasBranch) { 1306 Region *R = RI.R; 1307 if (TrueBiasedRegionsGlobal.contains(R)) 1308 OutermostScope->TrueBiasedRegions.insert(R); 1309 else if (FalseBiasedRegionsGlobal.contains(R)) 1310 OutermostScope->FalseBiasedRegions.insert(R); 1311 else 1312 llvm_unreachable("Must be biased"); 1313 } 1314 for (SelectInst *SI : RI.Selects) { 1315 if (TrueBiasedSelectsGlobal.contains(SI)) 1316 OutermostScope->TrueBiasedSelects.insert(SI); 1317 else if (FalseBiasedSelectsGlobal.contains(SI)) 1318 OutermostScope->FalseBiasedSelects.insert(SI); 1319 else 1320 llvm_unreachable("Must be biased"); 1321 } 1322 } 1323 for (CHRScope *Sub : Scope->Subs) { 1324 classifyBiasedScopes(Sub, OutermostScope); 1325 } 1326 } 1327 1328 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1329 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1330 Scope->FalseBiasedRegions.size() + 1331 Scope->TrueBiasedSelects.size() + 1332 Scope->FalseBiasedSelects.size(); 1333 return NumBiased >= CHRMergeThreshold; 1334 } 1335 1336 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1337 SmallVectorImpl<CHRScope *> &Output) { 1338 for (CHRScope *Scope : Input) { 1339 // Filter out the ones with only one region and no subs. 1340 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1341 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1342 << Scope->TrueBiasedRegions.size() 1343 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1344 << " true-selects " << Scope->TrueBiasedSelects.size() 1345 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1346 ORE.emit([&]() { 1347 return OptimizationRemarkMissed( 1348 DEBUG_TYPE, 1349 "DropScopeWithOneBranchOrSelect", 1350 Scope->RegInfos[0].R->getEntry()->getTerminator()) 1351 << "Drop scope with < " 1352 << ore::NV("CHRMergeThreshold", CHRMergeThreshold) 1353 << " biased branch(es) or select(s)"; 1354 }); 1355 continue; 1356 } 1357 Output.push_back(Scope); 1358 } 1359 } 1360 1361 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1362 SmallVectorImpl<CHRScope *> &Output) { 1363 for (CHRScope *Scope : Input) { 1364 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1365 "Empty"); 1366 setCHRRegions(Scope, Scope); 1367 Output.push_back(Scope); 1368 CHR_DEBUG( 1369 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1370 for (auto pair : Scope->HoistStopMap) { 1371 Region *R = pair.first; 1372 dbgs() << "Region " << R->getNameStr() << "\n"; 1373 for (Instruction *I : pair.second) { 1374 dbgs() << "HoistStop " << *I << "\n"; 1375 } 1376 } 1377 dbgs() << "CHRRegions" << "\n"; 1378 for (RegInfo &RI : Scope->CHRRegions) { 1379 dbgs() << RI.R->getNameStr() << "\n"; 1380 }); 1381 } 1382 } 1383 1384 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1385 DenseSet<Instruction *> Unhoistables; 1386 // Put the biased selects in Unhoistables because they should stay where they 1387 // are and constant-folded after CHR (in case one biased select or a branch 1388 // can depend on another biased select.) 1389 for (RegInfo &RI : Scope->RegInfos) { 1390 for (SelectInst *SI : RI.Selects) { 1391 Unhoistables.insert(SI); 1392 } 1393 } 1394 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1395 for (RegInfo &RI : Scope->RegInfos) { 1396 Region *R = RI.R; 1397 DenseSet<Instruction *> HoistStops; 1398 bool IsHoisted = false; 1399 if (RI.HasBranch) { 1400 assert((OutermostScope->TrueBiasedRegions.contains(R) || 1401 OutermostScope->FalseBiasedRegions.contains(R)) && 1402 "Must be truthy or falsy"); 1403 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1404 // Note checkHoistValue fills in HoistStops. 1405 DenseMap<Instruction *, bool> Visited; 1406 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1407 Unhoistables, &HoistStops, Visited); 1408 assert(IsHoistable && "Must be hoistable"); 1409 (void)(IsHoistable); // Unused in release build 1410 IsHoisted = true; 1411 } 1412 for (SelectInst *SI : RI.Selects) { 1413 assert((OutermostScope->TrueBiasedSelects.contains(SI) || 1414 OutermostScope->FalseBiasedSelects.contains(SI)) && 1415 "Must be true or false biased"); 1416 // Note checkHoistValue fills in HoistStops. 1417 DenseMap<Instruction *, bool> Visited; 1418 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1419 Unhoistables, &HoistStops, Visited); 1420 assert(IsHoistable && "Must be hoistable"); 1421 (void)(IsHoistable); // Unused in release build 1422 IsHoisted = true; 1423 } 1424 if (IsHoisted) { 1425 OutermostScope->CHRRegions.push_back(RI); 1426 OutermostScope->HoistStopMap[R] = HoistStops; 1427 } 1428 } 1429 for (CHRScope *Sub : Scope->Subs) 1430 setCHRRegions(Sub, OutermostScope); 1431 } 1432 1433 static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1434 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1435 } 1436 1437 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1438 SmallVectorImpl<CHRScope *> &Output) { 1439 Output.resize(Input.size()); 1440 llvm::copy(Input, Output.begin()); 1441 llvm::stable_sort(Output, CHRScopeSorter); 1442 } 1443 1444 // Return true if V is already hoisted or was hoisted (along with its operands) 1445 // to the insert point. 1446 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1447 HoistStopMapTy &HoistStopMap, 1448 DenseSet<Instruction *> &HoistedSet, 1449 DenseSet<PHINode *> &TrivialPHIs, 1450 DominatorTree &DT) { 1451 auto IT = HoistStopMap.find(R); 1452 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1453 DenseSet<Instruction *> &HoistStops = IT->second; 1454 if (auto *I = dyn_cast<Instruction>(V)) { 1455 if (I == HoistPoint) 1456 return; 1457 if (HoistStops.count(I)) 1458 return; 1459 if (auto *PN = dyn_cast<PHINode>(I)) 1460 if (TrivialPHIs.count(PN)) 1461 // The trivial phi inserted by the previous CHR scope could replace a 1462 // non-phi in HoistStops. Note that since this phi is at the exit of a 1463 // previous CHR scope, which dominates this scope, it's safe to stop 1464 // hoisting there. 1465 return; 1466 if (HoistedSet.count(I)) 1467 // Already hoisted, return. 1468 return; 1469 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1470 assert(DT.getNode(I->getParent()) && "DT must contain I's block"); 1471 assert(DT.getNode(HoistPoint->getParent()) && 1472 "DT must contain HoistPoint block"); 1473 if (DT.dominates(I, HoistPoint)) 1474 // We are already above the hoist point. Stop here. This may be necessary 1475 // when multiple scopes would independently hoist the same 1476 // instruction. Since an outer (dominating) scope would hoist it to its 1477 // entry before an inner (dominated) scope would to its entry, the inner 1478 // scope may see the instruction already hoisted, in which case it 1479 // potentially wrong for the inner scope to hoist it and could cause bad 1480 // IR (non-dominating def), but safe to skip hoisting it instead because 1481 // it's already in a block that dominates the inner scope. 1482 return; 1483 for (Value *Op : I->operands()) { 1484 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); 1485 } 1486 I->moveBefore(HoistPoint); 1487 HoistedSet.insert(I); 1488 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1489 } 1490 } 1491 1492 // Hoist the dependent condition values of the branches and the selects in the 1493 // scope to the insert point. 1494 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1495 DenseSet<PHINode *> &TrivialPHIs, 1496 DominatorTree &DT) { 1497 DenseSet<Instruction *> HoistedSet; 1498 for (const RegInfo &RI : Scope->CHRRegions) { 1499 Region *R = RI.R; 1500 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1501 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1502 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1503 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1504 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1505 HoistedSet, TrivialPHIs, DT); 1506 } 1507 for (SelectInst *SI : RI.Selects) { 1508 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1509 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1510 if (!(IsTrueBiased || IsFalseBiased)) 1511 continue; 1512 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1513 HoistedSet, TrivialPHIs, DT); 1514 } 1515 } 1516 } 1517 1518 // Negate the predicate if an ICmp if it's used only by branches or selects by 1519 // swapping the operands of the branches or the selects. Returns true if success. 1520 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1521 Instruction *ExcludedUser, 1522 CHRScope *Scope) { 1523 for (User *U : ICmp->users()) { 1524 if (U == ExcludedUser) 1525 continue; 1526 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1527 continue; 1528 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1529 continue; 1530 return false; 1531 } 1532 for (User *U : ICmp->users()) { 1533 if (U == ExcludedUser) 1534 continue; 1535 if (auto *BI = dyn_cast<BranchInst>(U)) { 1536 assert(BI->isConditional() && "Must be conditional"); 1537 BI->swapSuccessors(); 1538 // Don't need to swap this in terms of 1539 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1540 // mean whehter the branch is likely go into the if-then rather than 1541 // successor0/successor1 and because we can tell which edge is the then or 1542 // the else one by comparing the destination to the region exit block. 1543 continue; 1544 } 1545 if (auto *SI = dyn_cast<SelectInst>(U)) { 1546 // Swap operands 1547 SI->swapValues(); 1548 SI->swapProfMetadata(); 1549 if (Scope->TrueBiasedSelects.count(SI)) { 1550 assert(Scope->FalseBiasedSelects.count(SI) == 0 && 1551 "Must not be already in"); 1552 Scope->FalseBiasedSelects.insert(SI); 1553 } else if (Scope->FalseBiasedSelects.count(SI)) { 1554 assert(Scope->TrueBiasedSelects.count(SI) == 0 && 1555 "Must not be already in"); 1556 Scope->TrueBiasedSelects.insert(SI); 1557 } 1558 continue; 1559 } 1560 llvm_unreachable("Must be a branch or a select"); 1561 } 1562 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1563 return true; 1564 } 1565 1566 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1567 // for a value that's defined in the scope but used outside it (meaning it's 1568 // alive at the exit block). 1569 static void insertTrivialPHIs(CHRScope *Scope, 1570 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1571 DenseSet<PHINode *> &TrivialPHIs) { 1572 SmallSetVector<BasicBlock *, 8> BlocksInScope; 1573 for (RegInfo &RI : Scope->RegInfos) { 1574 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1575 // sub-Scopes. 1576 BlocksInScope.insert(BB); 1577 } 1578 } 1579 CHR_DEBUG({ 1580 dbgs() << "Inserting redundant phis\n"; 1581 for (BasicBlock *BB : BlocksInScope) 1582 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1583 }); 1584 for (BasicBlock *BB : BlocksInScope) { 1585 for (Instruction &I : *BB) { 1586 SmallVector<Instruction *, 8> Users; 1587 for (User *U : I.users()) { 1588 if (auto *UI = dyn_cast<Instruction>(U)) { 1589 if (BlocksInScope.count(UI->getParent()) == 0 && 1590 // Unless there's already a phi for I at the exit block. 1591 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1592 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1593 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1594 Users.push_back(UI); 1595 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1596 // There's a loop backedge from a block that's dominated by this 1597 // scope to the entry block. 1598 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1599 CHR_DEBUG(dbgs() 1600 << "Used at entry block (for a back edge) by a phi user " 1601 << *UI << "\n"); 1602 Users.push_back(UI); 1603 } 1604 } 1605 } 1606 if (Users.size() > 0) { 1607 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1608 // ExitBlock. Replace I with the new phi in UI unless UI is another 1609 // phi at ExitBlock. 1610 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "", 1611 &ExitBlock->front()); 1612 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1613 PN->addIncoming(&I, Pred); 1614 } 1615 TrivialPHIs.insert(PN); 1616 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1617 for (Instruction *UI : Users) { 1618 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1619 if (UI->getOperand(J) == &I) { 1620 UI->setOperand(J, PN); 1621 } 1622 } 1623 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1624 } 1625 } 1626 } 1627 } 1628 } 1629 1630 // Assert that all the CHR regions of the scope have a biased branch or select. 1631 static void LLVM_ATTRIBUTE_UNUSED 1632 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1633 #ifndef NDEBUG 1634 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1635 if (Scope->TrueBiasedRegions.count(RI.R) || 1636 Scope->FalseBiasedRegions.count(RI.R)) 1637 return true; 1638 for (SelectInst *SI : RI.Selects) 1639 if (Scope->TrueBiasedSelects.count(SI) || 1640 Scope->FalseBiasedSelects.count(SI)) 1641 return true; 1642 return false; 1643 }; 1644 for (RegInfo &RI : Scope->CHRRegions) { 1645 assert(HasBiasedBranchOrSelect(RI, Scope) && 1646 "Must have biased branch or select"); 1647 } 1648 #endif 1649 } 1650 1651 // Assert that all the condition values of the biased branches and selects have 1652 // been hoisted to the pre-entry block or outside of the scope. 1653 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1654 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1655 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1656 for (RegInfo &RI : Scope->CHRRegions) { 1657 Region *R = RI.R; 1658 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1659 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1660 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1661 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1662 Value *V = BI->getCondition(); 1663 CHR_DEBUG(dbgs() << *V << "\n"); 1664 if (auto *I = dyn_cast<Instruction>(V)) { 1665 (void)(I); // Unused in release build. 1666 assert((I->getParent() == PreEntryBlock || 1667 !Scope->contains(I)) && 1668 "Must have been hoisted to PreEntryBlock or outside the scope"); 1669 } 1670 } 1671 for (SelectInst *SI : RI.Selects) { 1672 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1673 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1674 if (!(IsTrueBiased || IsFalseBiased)) 1675 continue; 1676 Value *V = SI->getCondition(); 1677 CHR_DEBUG(dbgs() << *V << "\n"); 1678 if (auto *I = dyn_cast<Instruction>(V)) { 1679 (void)(I); // Unused in release build. 1680 assert((I->getParent() == PreEntryBlock || 1681 !Scope->contains(I)) && 1682 "Must have been hoisted to PreEntryBlock or outside the scope"); 1683 } 1684 } 1685 } 1686 } 1687 1688 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1689 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1690 1691 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1692 Region *FirstRegion = Scope->RegInfos[0].R; 1693 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1694 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1695 BasicBlock *ExitBlock = LastRegion->getExit(); 1696 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1697 1698 if (ExitBlock) { 1699 // Insert a trivial phi at the exit block (where the CHR hot path and the 1700 // cold path merges) for a value that's defined in the scope but used 1701 // outside it (meaning it's alive at the exit block). We will add the 1702 // incoming values for the CHR cold paths to it below. Without this, we'd 1703 // miss updating phi's for such values unless there happens to already be a 1704 // phi for that value there. 1705 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1706 } 1707 1708 // Split the entry block of the first region. The new block becomes the new 1709 // entry block of the first region. The old entry block becomes the block to 1710 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1711 // through the split, we update the entry of the first region after the split, 1712 // and Region only points to the entry and the exit blocks, rather than 1713 // keeping everything in a list or set, the blocks membership and the 1714 // entry/exit blocks of the region are still valid after the split. 1715 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1716 << " at " << *Scope->BranchInsertPoint << "\n"); 1717 BasicBlock *NewEntryBlock = 1718 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1719 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1720 "NewEntryBlock's only pred must be EntryBlock"); 1721 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1722 BasicBlock *PreEntryBlock = EntryBlock; 1723 1724 ValueToValueMapTy VMap; 1725 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1726 // hot path (originals) and a cold path (clones) and update the PHIs at the 1727 // exit block. 1728 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1729 1730 // Replace the old (placeholder) branch with the new (merged) conditional 1731 // branch. 1732 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1733 NewEntryBlock, VMap); 1734 1735 #ifndef NDEBUG 1736 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1737 #endif 1738 1739 // Hoist the conditional values of the branches/selects. 1740 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); 1741 1742 #ifndef NDEBUG 1743 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1744 #endif 1745 1746 // Create the combined branch condition and constant-fold the branches/selects 1747 // in the hot path. 1748 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1749 ProfileCount ? ProfileCount.getValue() : 0); 1750 } 1751 1752 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1753 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1754 // at the exit block. 1755 void CHR::cloneScopeBlocks(CHRScope *Scope, 1756 BasicBlock *PreEntryBlock, 1757 BasicBlock *ExitBlock, 1758 Region *LastRegion, 1759 ValueToValueMapTy &VMap) { 1760 // Clone all the blocks. The original blocks will be the hot-path 1761 // CHR-optimized code and the cloned blocks will be the original unoptimized 1762 // code. This is so that the block pointers from the 1763 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1764 // which CHR should apply to. 1765 SmallVector<BasicBlock*, 8> NewBlocks; 1766 for (RegInfo &RI : Scope->RegInfos) 1767 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1768 // sub-Scopes. 1769 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1770 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1771 NewBlocks.push_back(NewBB); 1772 VMap[BB] = NewBB; 1773 } 1774 1775 // Place the cloned blocks right after the original blocks (right before the 1776 // exit block of.) 1777 if (ExitBlock) 1778 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1779 F.getBasicBlockList(), 1780 NewBlocks[0]->getIterator(), F.end()); 1781 1782 // Update the cloned blocks/instructions to refer to themselves. 1783 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1784 for (Instruction &I : *NewBlocks[i]) 1785 RemapInstruction(&I, VMap, 1786 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1787 1788 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1789 // the top-level region but we don't need to add PHIs. The trivial PHIs 1790 // inserted above will be updated here. 1791 if (ExitBlock) 1792 for (PHINode &PN : ExitBlock->phis()) 1793 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1794 ++I) { 1795 BasicBlock *Pred = PN.getIncomingBlock(I); 1796 if (LastRegion->contains(Pred)) { 1797 Value *V = PN.getIncomingValue(I); 1798 auto It = VMap.find(V); 1799 if (It != VMap.end()) V = It->second; 1800 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1801 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1802 } 1803 } 1804 } 1805 1806 // A helper for transformScope. Replace the old (placeholder) branch with the 1807 // new (merged) conditional branch. 1808 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1809 BasicBlock *EntryBlock, 1810 BasicBlock *NewEntryBlock, 1811 ValueToValueMapTy &VMap) { 1812 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1813 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1814 "SplitBlock did not work correctly!"); 1815 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1816 "NewEntryBlock's only pred must be EntryBlock"); 1817 assert(VMap.find(NewEntryBlock) != VMap.end() && 1818 "NewEntryBlock must have been copied"); 1819 OldBR->dropAllReferences(); 1820 OldBR->eraseFromParent(); 1821 // The true predicate is a placeholder. It will be replaced later in 1822 // fixupBranchesAndSelects(). 1823 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1824 cast<BasicBlock>(VMap[NewEntryBlock]), 1825 ConstantInt::getTrue(F.getContext())); 1826 PreEntryBlock->getInstList().push_back(NewBR); 1827 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1828 "NewEntryBlock's only pred must be EntryBlock"); 1829 return NewBR; 1830 } 1831 1832 // A helper for transformScopes. Create the combined branch condition and 1833 // constant-fold the branches/selects in the hot path. 1834 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1835 BasicBlock *PreEntryBlock, 1836 BranchInst *MergedBR, 1837 uint64_t ProfileCount) { 1838 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1839 BranchProbability CHRBranchBias(1, 1); 1840 uint64_t NumCHRedBranches = 0; 1841 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1842 for (RegInfo &RI : Scope->CHRRegions) { 1843 Region *R = RI.R; 1844 if (RI.HasBranch) { 1845 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1846 ++NumCHRedBranches; 1847 } 1848 for (SelectInst *SI : RI.Selects) { 1849 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1850 ++NumCHRedBranches; 1851 } 1852 } 1853 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1854 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1855 ORE.emit([&]() { 1856 return OptimizationRemark(DEBUG_TYPE, 1857 "CHR", 1858 // Refer to the hot (original) path 1859 MergedBR->getSuccessor(0)->getTerminator()) 1860 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) 1861 << " branches or selects"; 1862 }); 1863 MergedBR->setCondition(MergedCondition); 1864 uint32_t Weights[] = { 1865 static_cast<uint32_t>(CHRBranchBias.scale(1000)), 1866 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)), 1867 }; 1868 MDBuilder MDB(F.getContext()); 1869 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1870 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1871 << "\n"); 1872 } 1873 1874 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1875 // and constant-fold a branch in the hot path. 1876 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1877 IRBuilder<> &IRB, 1878 Value *&MergedCondition, 1879 BranchProbability &CHRBranchBias) { 1880 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1881 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1882 "Must be truthy or falsy"); 1883 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1884 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1885 "Must be in the bias map"); 1886 BranchProbability Bias = BranchBiasMap[R]; 1887 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1888 // Take the min. 1889 if (CHRBranchBias > Bias) 1890 CHRBranchBias = Bias; 1891 BasicBlock *IfThen = BI->getSuccessor(1); 1892 BasicBlock *IfElse = BI->getSuccessor(0); 1893 BasicBlock *RegionExitBlock = R->getExit(); 1894 assert(RegionExitBlock && "Null ExitBlock"); 1895 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1896 IfThen != IfElse && "Invariant from findScopes"); 1897 if (IfThen == RegionExitBlock) { 1898 // Swap them so that IfThen means going into it and IfElse means skipping 1899 // it. 1900 std::swap(IfThen, IfElse); 1901 } 1902 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1903 << " IfElse " << IfElse->getName() << "\n"); 1904 Value *Cond = BI->getCondition(); 1905 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1906 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1907 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1908 MergedCondition); 1909 // Constant-fold the branch at ClonedEntryBlock. 1910 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1911 "The successor shouldn't change"); 1912 Value *NewCondition = ConditionTrue ? 1913 ConstantInt::getTrue(F.getContext()) : 1914 ConstantInt::getFalse(F.getContext()); 1915 BI->setCondition(NewCondition); 1916 } 1917 1918 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1919 // and constant-fold a select in the hot path. 1920 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1921 IRBuilder<> &IRB, 1922 Value *&MergedCondition, 1923 BranchProbability &CHRBranchBias) { 1924 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1925 assert((IsTrueBiased || 1926 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1927 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1928 "Must be in the bias map"); 1929 BranchProbability Bias = SelectBiasMap[SI]; 1930 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1931 // Take the min. 1932 if (CHRBranchBias > Bias) 1933 CHRBranchBias = Bias; 1934 Value *Cond = SI->getCondition(); 1935 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1936 MergedCondition); 1937 Value *NewCondition = IsTrueBiased ? 1938 ConstantInt::getTrue(F.getContext()) : 1939 ConstantInt::getFalse(F.getContext()); 1940 SI->setCondition(NewCondition); 1941 } 1942 1943 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1944 // condition. 1945 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1946 Instruction *BranchOrSelect, 1947 CHRScope *Scope, 1948 IRBuilder<> &IRB, 1949 Value *&MergedCondition) { 1950 if (IsTrueBiased) { 1951 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1952 } else { 1953 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1954 // branch, negate the icmp predicate and swap the branch targets and avoid 1955 // inserting an Xor to negate Cond. 1956 bool Done = false; 1957 if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) 1958 if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { 1959 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1960 Done = true; 1961 } 1962 if (!Done) { 1963 Value *Negate = IRB.CreateXor( 1964 ConstantInt::getTrue(F.getContext()), Cond); 1965 MergedCondition = IRB.CreateAnd(MergedCondition, Negate); 1966 } 1967 } 1968 } 1969 1970 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1971 unsigned I = 0; 1972 DenseSet<PHINode *> TrivialPHIs; 1973 for (CHRScope *Scope : CHRScopes) { 1974 transformScopes(Scope, TrivialPHIs); 1975 CHR_DEBUG( 1976 std::ostringstream oss; 1977 oss << " after transformScopes " << I++; 1978 dumpIR(F, oss.str().c_str(), nullptr)); 1979 (void)I; 1980 } 1981 } 1982 1983 static void LLVM_ATTRIBUTE_UNUSED 1984 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1985 dbgs() << Label << " " << Scopes.size() << "\n"; 1986 for (CHRScope *Scope : Scopes) { 1987 dbgs() << *Scope << "\n"; 1988 } 1989 } 1990 1991 bool CHR::run() { 1992 if (!shouldApply(F, PSI)) 1993 return false; 1994 1995 CHR_DEBUG(dumpIR(F, "before", nullptr)); 1996 1997 bool Changed = false; 1998 { 1999 CHR_DEBUG( 2000 dbgs() << "RegionInfo:\n"; 2001 RI.print(dbgs())); 2002 2003 // Recursively traverse the region tree and find regions that have biased 2004 // branches and/or selects and create scopes. 2005 SmallVector<CHRScope *, 8> AllScopes; 2006 findScopes(AllScopes); 2007 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 2008 2009 // Split the scopes if 1) the conditiona values of the biased 2010 // branches/selects of the inner/lower scope can't be hoisted up to the 2011 // outermost/uppermost scope entry, or 2) the condition values of the biased 2012 // branches/selects in a scope (including subscopes) don't share at least 2013 // one common value. 2014 SmallVector<CHRScope *, 8> SplitScopes; 2015 splitScopes(AllScopes, SplitScopes); 2016 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 2017 2018 // After splitting, set the biased regions and selects of a scope (a tree 2019 // root) that include those of the subscopes. 2020 classifyBiasedScopes(SplitScopes); 2021 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 2022 2023 // Filter out the scopes that has only one biased region or select (CHR 2024 // isn't useful in such a case). 2025 SmallVector<CHRScope *, 8> FilteredScopes; 2026 filterScopes(SplitScopes, FilteredScopes); 2027 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 2028 2029 // Set the regions to be CHR'ed and their hoist stops for each scope. 2030 SmallVector<CHRScope *, 8> SetScopes; 2031 setCHRRegions(FilteredScopes, SetScopes); 2032 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 2033 2034 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 2035 // ones. We need to apply CHR from outer to inner so that we apply CHR only 2036 // to the hot path, rather than both hot and cold paths. 2037 SmallVector<CHRScope *, 8> SortedScopes; 2038 sortScopes(SetScopes, SortedScopes); 2039 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 2040 2041 CHR_DEBUG( 2042 dbgs() << "RegionInfo:\n"; 2043 RI.print(dbgs())); 2044 2045 // Apply the CHR transformation. 2046 if (!SortedScopes.empty()) { 2047 transformScopes(SortedScopes); 2048 Changed = true; 2049 } 2050 } 2051 2052 if (Changed) { 2053 CHR_DEBUG(dumpIR(F, "after", &Stats)); 2054 ORE.emit([&]() { 2055 return OptimizationRemark(DEBUG_TYPE, "Stats", &F) 2056 << ore::NV("Function", &F) << " " 2057 << "Reduced the number of branches in hot paths by " 2058 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) 2059 << " (static) and " 2060 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) 2061 << " (weighted by PGO count)"; 2062 }); 2063 } 2064 2065 return Changed; 2066 } 2067 2068 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 2069 BlockFrequencyInfo &BFI = 2070 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 2071 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2072 ProfileSummaryInfo &PSI = 2073 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2074 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 2075 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE = 2076 std::make_unique<OptimizationRemarkEmitter>(&F); 2077 return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run(); 2078 } 2079 2080 namespace llvm { 2081 2082 ControlHeightReductionPass::ControlHeightReductionPass() { 2083 parseCHRFilterFiles(); 2084 } 2085 2086 PreservedAnalyses ControlHeightReductionPass::run( 2087 Function &F, 2088 FunctionAnalysisManager &FAM) { 2089 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 2090 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2091 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2092 auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2093 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2094 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2095 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); 2096 if (!Changed) 2097 return PreservedAnalyses::all(); 2098 auto PA = PreservedAnalyses(); 2099 PA.preserve<GlobalsAA>(); 2100 return PA; 2101 } 2102 2103 } // namespace llvm 2104