1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass merges conditional blocks of code and reduces the number of 10 // conditional branches in the hot paths based on profiles. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/GlobalsModRef.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/MDBuilder.h" 30 #include "llvm/InitializePasses.h" 31 #include "llvm/Support/BranchProbability.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/MemoryBuffer.h" 34 #include "llvm/Transforms/Utils.h" 35 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 36 #include "llvm/Transforms/Utils/Cloning.h" 37 #include "llvm/Transforms/Utils/ValueMapper.h" 38 39 #include <set> 40 #include <sstream> 41 42 using namespace llvm; 43 44 #define DEBUG_TYPE "chr" 45 46 #define CHR_DEBUG(X) LLVM_DEBUG(X) 47 48 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 49 cl::desc("Apply CHR for all functions")); 50 51 static cl::opt<double> CHRBiasThreshold( 52 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 53 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 54 55 static cl::opt<unsigned> CHRMergeThreshold( 56 "chr-merge-threshold", cl::init(2), cl::Hidden, 57 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 58 59 static cl::opt<std::string> CHRModuleList( 60 "chr-module-list", cl::init(""), cl::Hidden, 61 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 62 63 static cl::opt<std::string> CHRFunctionList( 64 "chr-function-list", cl::init(""), cl::Hidden, 65 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 66 67 static StringSet<> CHRModules; 68 static StringSet<> CHRFunctions; 69 70 static void parseCHRFilterFiles() { 71 if (!CHRModuleList.empty()) { 72 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 73 if (!FileOrErr) { 74 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 75 std::exit(1); 76 } 77 StringRef Buf = FileOrErr->get()->getBuffer(); 78 SmallVector<StringRef, 0> Lines; 79 Buf.split(Lines, '\n'); 80 for (StringRef Line : Lines) { 81 Line = Line.trim(); 82 if (!Line.empty()) 83 CHRModules.insert(Line); 84 } 85 } 86 if (!CHRFunctionList.empty()) { 87 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 88 if (!FileOrErr) { 89 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 90 std::exit(1); 91 } 92 StringRef Buf = FileOrErr->get()->getBuffer(); 93 SmallVector<StringRef, 0> Lines; 94 Buf.split(Lines, '\n'); 95 for (StringRef Line : Lines) { 96 Line = Line.trim(); 97 if (!Line.empty()) 98 CHRFunctions.insert(Line); 99 } 100 } 101 } 102 103 namespace { 104 class ControlHeightReductionLegacyPass : public FunctionPass { 105 public: 106 static char ID; 107 108 ControlHeightReductionLegacyPass() : FunctionPass(ID) { 109 initializeControlHeightReductionLegacyPassPass( 110 *PassRegistry::getPassRegistry()); 111 parseCHRFilterFiles(); 112 } 113 114 bool runOnFunction(Function &F) override; 115 void getAnalysisUsage(AnalysisUsage &AU) const override { 116 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 117 AU.addRequired<DominatorTreeWrapperPass>(); 118 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 119 AU.addRequired<RegionInfoPass>(); 120 AU.addPreserved<GlobalsAAWrapperPass>(); 121 } 122 }; 123 } // end anonymous namespace 124 125 char ControlHeightReductionLegacyPass::ID = 0; 126 127 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 128 "chr", 129 "Reduce control height in the hot paths", 130 false, false) 131 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 132 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 133 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 134 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 135 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 136 "chr", 137 "Reduce control height in the hot paths", 138 false, false) 139 140 FunctionPass *llvm::createControlHeightReductionLegacyPass() { 141 return new ControlHeightReductionLegacyPass(); 142 } 143 144 namespace { 145 146 struct CHRStats { 147 CHRStats() : NumBranches(0), NumBranchesDelta(0), 148 WeightedNumBranchesDelta(0) {} 149 void print(raw_ostream &OS) const { 150 OS << "CHRStats: NumBranches " << NumBranches 151 << " NumBranchesDelta " << NumBranchesDelta 152 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 153 } 154 uint64_t NumBranches; // The original number of conditional branches / 155 // selects 156 uint64_t NumBranchesDelta; // The decrease of the number of conditional 157 // branches / selects in the hot paths due to CHR. 158 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile 159 // count at the scope entry. 160 }; 161 162 // RegInfo - some properties of a Region. 163 struct RegInfo { 164 RegInfo() : R(nullptr), HasBranch(false) {} 165 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} 166 Region *R; 167 bool HasBranch; 168 SmallVector<SelectInst *, 8> Selects; 169 }; 170 171 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 172 173 // CHRScope - a sequence of regions to CHR together. It corresponds to a 174 // sequence of conditional blocks. It can have subscopes which correspond to 175 // nested conditional blocks. Nested CHRScopes form a tree. 176 class CHRScope { 177 public: 178 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 179 assert(RI.R && "Null RegionIn"); 180 RegInfos.push_back(RI); 181 } 182 183 Region *getParentRegion() { 184 assert(RegInfos.size() > 0 && "Empty CHRScope"); 185 Region *Parent = RegInfos[0].R->getParent(); 186 assert(Parent && "Unexpected to call this on the top-level region"); 187 return Parent; 188 } 189 190 BasicBlock *getEntryBlock() { 191 assert(RegInfos.size() > 0 && "Empty CHRScope"); 192 return RegInfos.front().R->getEntry(); 193 } 194 195 BasicBlock *getExitBlock() { 196 assert(RegInfos.size() > 0 && "Empty CHRScope"); 197 return RegInfos.back().R->getExit(); 198 } 199 200 bool appendable(CHRScope *Next) { 201 // The next scope is appendable only if this scope is directly connected to 202 // it (which implies it post-dominates this scope) and this scope dominates 203 // it (no edge to the next scope outside this scope). 204 BasicBlock *NextEntry = Next->getEntryBlock(); 205 if (getExitBlock() != NextEntry) 206 // Not directly connected. 207 return false; 208 Region *LastRegion = RegInfos.back().R; 209 for (BasicBlock *Pred : predecessors(NextEntry)) 210 if (!LastRegion->contains(Pred)) 211 // There's an edge going into the entry of the next scope from outside 212 // of this scope. 213 return false; 214 return true; 215 } 216 217 void append(CHRScope *Next) { 218 assert(RegInfos.size() > 0 && "Empty CHRScope"); 219 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 220 assert(getParentRegion() == Next->getParentRegion() && 221 "Must be siblings"); 222 assert(getExitBlock() == Next->getEntryBlock() && 223 "Must be adjacent"); 224 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end()); 225 Subs.append(Next->Subs.begin(), Next->Subs.end()); 226 } 227 228 void addSub(CHRScope *SubIn) { 229 #ifndef NDEBUG 230 bool IsChild = false; 231 for (RegInfo &RI : RegInfos) 232 if (RI.R == SubIn->getParentRegion()) { 233 IsChild = true; 234 break; 235 } 236 assert(IsChild && "Must be a child"); 237 #endif 238 Subs.push_back(SubIn); 239 } 240 241 // Split this scope at the boundary region into two, which will belong to the 242 // tail and returns the tail. 243 CHRScope *split(Region *Boundary) { 244 assert(Boundary && "Boundary null"); 245 assert(RegInfos.begin()->R != Boundary && 246 "Can't be split at beginning"); 247 auto BoundaryIt = llvm::find_if( 248 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; }); 249 if (BoundaryIt == RegInfos.end()) 250 return nullptr; 251 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end()); 252 DenseSet<Region *> TailRegionSet; 253 for (const RegInfo &RI : TailRegInfos) 254 TailRegionSet.insert(RI.R); 255 256 auto TailIt = 257 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) { 258 assert(Sub && "null Sub"); 259 Region *Parent = Sub->getParentRegion(); 260 if (TailRegionSet.count(Parent)) 261 return false; 262 263 assert(llvm::find_if(RegInfos, 264 [&Parent](const RegInfo &RI) { 265 return Parent == RI.R; 266 }) != RegInfos.end() && 267 "Must be in head"); 268 return true; 269 }); 270 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end()); 271 272 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 273 auto *Scope = new CHRScope(TailRegInfos, TailSubs); 274 RegInfos.erase(BoundaryIt, RegInfos.end()); 275 Subs.erase(TailIt, Subs.end()); 276 return Scope; 277 } 278 279 bool contains(Instruction *I) const { 280 BasicBlock *Parent = I->getParent(); 281 for (const RegInfo &RI : RegInfos) 282 if (RI.R->contains(Parent)) 283 return true; 284 return false; 285 } 286 287 void print(raw_ostream &OS) const; 288 289 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 290 SmallVector<CHRScope *, 8> Subs; // Subscopes. 291 292 // The instruction at which to insert the CHR conditional branch (and hoist 293 // the dependent condition values). 294 Instruction *BranchInsertPoint; 295 296 // True-biased and false-biased regions (conditional blocks), 297 // respectively. Used only for the outermost scope and includes regions in 298 // subscopes. The rest are unbiased. 299 DenseSet<Region *> TrueBiasedRegions; 300 DenseSet<Region *> FalseBiasedRegions; 301 // Among the biased regions, the regions that get CHRed. 302 SmallVector<RegInfo, 8> CHRRegions; 303 304 // True-biased and false-biased selects, respectively. Used only for the 305 // outermost scope and includes ones in subscopes. 306 DenseSet<SelectInst *> TrueBiasedSelects; 307 DenseSet<SelectInst *> FalseBiasedSelects; 308 309 // Map from one of the above regions to the instructions to stop 310 // hoisting instructions at through use-def chains. 311 HoistStopMapTy HoistStopMap; 312 313 private: 314 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn) 315 : RegInfos(RegInfosIn.begin(), RegInfosIn.end()), 316 Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {} 317 }; 318 319 class CHR { 320 public: 321 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 322 ProfileSummaryInfo &PSIin, RegionInfo &RIin, 323 OptimizationRemarkEmitter &OREin) 324 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} 325 326 ~CHR() { 327 for (CHRScope *Scope : Scopes) { 328 delete Scope; 329 } 330 } 331 332 bool run(); 333 334 private: 335 // See the comments in CHR::run() for the high level flow of the algorithm and 336 // what the following functions do. 337 338 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 339 Region *R = RI.getTopLevelRegion(); 340 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) { 341 Output.push_back(Scope); 342 } 343 } 344 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 345 SmallVectorImpl<CHRScope *> &Scopes); 346 CHRScope *findScope(Region *R); 347 void checkScopeHoistable(CHRScope *Scope); 348 349 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 350 SmallVectorImpl<CHRScope *> &Output); 351 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 352 CHRScope *Outer, 353 DenseSet<Value *> *OuterConditionValues, 354 Instruction *OuterInsertPoint, 355 SmallVectorImpl<CHRScope *> &Output, 356 DenseSet<Instruction *> &Unhoistables); 357 358 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 359 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 360 361 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 362 SmallVectorImpl<CHRScope *> &Output); 363 364 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 365 SmallVectorImpl<CHRScope *> &Output); 366 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 367 368 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 369 SmallVectorImpl<CHRScope *> &Output); 370 371 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 372 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 373 void cloneScopeBlocks(CHRScope *Scope, 374 BasicBlock *PreEntryBlock, 375 BasicBlock *ExitBlock, 376 Region *LastRegion, 377 ValueToValueMapTy &VMap); 378 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 379 BasicBlock *EntryBlock, 380 BasicBlock *NewEntryBlock, 381 ValueToValueMapTy &VMap); 382 void fixupBranchesAndSelects(CHRScope *Scope, 383 BasicBlock *PreEntryBlock, 384 BranchInst *MergedBR, 385 uint64_t ProfileCount); 386 void fixupBranch(Region *R, 387 CHRScope *Scope, 388 IRBuilder<> &IRB, 389 Value *&MergedCondition, BranchProbability &CHRBranchBias); 390 void fixupSelect(SelectInst* SI, 391 CHRScope *Scope, 392 IRBuilder<> &IRB, 393 Value *&MergedCondition, BranchProbability &CHRBranchBias); 394 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 395 Instruction *BranchOrSelect, 396 CHRScope *Scope, 397 IRBuilder<> &IRB, 398 Value *&MergedCondition); 399 400 Function &F; 401 BlockFrequencyInfo &BFI; 402 DominatorTree &DT; 403 ProfileSummaryInfo &PSI; 404 RegionInfo &RI; 405 OptimizationRemarkEmitter &ORE; 406 CHRStats Stats; 407 408 // All the true-biased regions in the function 409 DenseSet<Region *> TrueBiasedRegionsGlobal; 410 // All the false-biased regions in the function 411 DenseSet<Region *> FalseBiasedRegionsGlobal; 412 // All the true-biased selects in the function 413 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 414 // All the false-biased selects in the function 415 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 416 // A map from biased regions to their branch bias 417 DenseMap<Region *, BranchProbability> BranchBiasMap; 418 // A map from biased selects to their branch bias 419 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 420 // All the scopes. 421 DenseSet<CHRScope *> Scopes; 422 }; 423 424 } // end anonymous namespace 425 426 static inline 427 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 428 const CHRStats &Stats) { 429 Stats.print(OS); 430 return OS; 431 } 432 433 static inline 434 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 435 Scope.print(OS); 436 return OS; 437 } 438 439 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 440 if (ForceCHR) 441 return true; 442 443 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 444 if (CHRModules.count(F.getParent()->getName())) 445 return true; 446 return CHRFunctions.count(F.getName()); 447 } 448 449 assert(PSI.hasProfileSummary() && "Empty PSI?"); 450 return PSI.isFunctionEntryHot(&F); 451 } 452 453 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 454 CHRStats *Stats) { 455 StringRef FuncName = F.getName(); 456 StringRef ModuleName = F.getParent()->getName(); 457 (void)(FuncName); // Unused in release build. 458 (void)(ModuleName); // Unused in release build. 459 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 460 << FuncName); 461 if (Stats) 462 CHR_DEBUG(dbgs() << " " << *Stats); 463 CHR_DEBUG(dbgs() << "\n"); 464 CHR_DEBUG(F.dump()); 465 } 466 467 void CHRScope::print(raw_ostream &OS) const { 468 assert(RegInfos.size() > 0 && "Empty CHRScope"); 469 OS << "CHRScope["; 470 OS << RegInfos.size() << ", Regions["; 471 for (const RegInfo &RI : RegInfos) { 472 OS << RI.R->getNameStr(); 473 if (RI.HasBranch) 474 OS << " B"; 475 if (RI.Selects.size() > 0) 476 OS << " S" << RI.Selects.size(); 477 OS << ", "; 478 } 479 if (RegInfos[0].R->getParent()) { 480 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 481 } else { 482 // top level region 483 OS << "]"; 484 } 485 OS << ", Subs["; 486 for (CHRScope *Sub : Subs) { 487 OS << *Sub << ", "; 488 } 489 OS << "]]"; 490 } 491 492 // Return true if the given instruction type can be hoisted by CHR. 493 static bool isHoistableInstructionType(Instruction *I) { 494 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 495 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 496 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 497 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 498 isa<InsertValueInst>(I); 499 } 500 501 // Return true if the given instruction can be hoisted by CHR. 502 static bool isHoistable(Instruction *I, DominatorTree &DT) { 503 if (!isHoistableInstructionType(I)) 504 return false; 505 return isSafeToSpeculativelyExecute(I, nullptr, &DT); 506 } 507 508 // Recursively traverse the use-def chains of the given value and return a set 509 // of the unhoistable base values defined within the scope (excluding the 510 // first-region entry block) or the (hoistable or unhoistable) base values that 511 // are defined outside (including the first-region entry block) of the 512 // scope. The returned set doesn't include constants. 513 static const std::set<Value *> & 514 getBaseValues(Value *V, DominatorTree &DT, 515 DenseMap<Value *, std::set<Value *>> &Visited) { 516 auto It = Visited.find(V); 517 if (It != Visited.end()) { 518 return It->second; 519 } 520 std::set<Value *> Result; 521 if (auto *I = dyn_cast<Instruction>(V)) { 522 // We don't stop at a block that's not in the Scope because we would miss 523 // some instructions that are based on the same base values if we stop 524 // there. 525 if (!isHoistable(I, DT)) { 526 Result.insert(I); 527 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 528 } 529 // I is hoistable above the Scope. 530 for (Value *Op : I->operands()) { 531 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited); 532 Result.insert(OpResult.begin(), OpResult.end()); 533 } 534 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 535 } 536 if (isa<Argument>(V)) { 537 Result.insert(V); 538 } 539 // We don't include others like constants because those won't lead to any 540 // chance of folding of conditions (eg two bit checks merged into one check) 541 // after CHR. 542 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 543 } 544 545 // Return true if V is already hoisted or can be hoisted (along with its 546 // operands) above the insert point. When it returns true and HoistStops is 547 // non-null, the instructions to stop hoisting at through the use-def chains are 548 // inserted into HoistStops. 549 static bool 550 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 551 DenseSet<Instruction *> &Unhoistables, 552 DenseSet<Instruction *> *HoistStops, 553 DenseMap<Instruction *, bool> &Visited) { 554 assert(InsertPoint && "Null InsertPoint"); 555 if (auto *I = dyn_cast<Instruction>(V)) { 556 auto It = Visited.find(I); 557 if (It != Visited.end()) { 558 return It->second; 559 } 560 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 561 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 562 if (Unhoistables.count(I)) { 563 // Don't hoist if they are not to be hoisted. 564 Visited[I] = false; 565 return false; 566 } 567 if (DT.dominates(I, InsertPoint)) { 568 // We are already above the insert point. Stop here. 569 if (HoistStops) 570 HoistStops->insert(I); 571 Visited[I] = true; 572 return true; 573 } 574 // We aren't not above the insert point, check if we can hoist it above the 575 // insert point. 576 if (isHoistable(I, DT)) { 577 // Check operands first. 578 DenseSet<Instruction *> OpsHoistStops; 579 bool AllOpsHoisted = true; 580 for (Value *Op : I->operands()) { 581 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, 582 Visited)) { 583 AllOpsHoisted = false; 584 break; 585 } 586 } 587 if (AllOpsHoisted) { 588 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 589 if (HoistStops) 590 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 591 Visited[I] = true; 592 return true; 593 } 594 } 595 Visited[I] = false; 596 return false; 597 } 598 // Non-instructions are considered hoistable. 599 return true; 600 } 601 602 // Returns true and sets the true probability and false probability of an 603 // MD_prof metadata if it's well-formed. 604 static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb, 605 BranchProbability &FalseProb) { 606 if (!MD) return false; 607 MDString *MDName = cast<MDString>(MD->getOperand(0)); 608 if (MDName->getString() != "branch_weights" || 609 MD->getNumOperands() != 3) 610 return false; 611 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 612 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 613 if (!TrueWeight || !FalseWeight) 614 return false; 615 uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 616 uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 617 uint64_t SumWt = TrueWt + FalseWt; 618 619 assert(SumWt >= TrueWt && SumWt >= FalseWt && 620 "Overflow calculating branch probabilities."); 621 622 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash. 623 if (SumWt == 0) 624 return false; 625 626 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 627 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 628 return true; 629 } 630 631 static BranchProbability getCHRBiasThreshold() { 632 return BranchProbability::getBranchProbability( 633 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 634 } 635 636 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 637 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 638 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 639 // false. 640 template <typename K, typename S, typename M> 641 static bool checkBias(K *Key, BranchProbability TrueProb, 642 BranchProbability FalseProb, S &TrueSet, S &FalseSet, 643 M &BiasMap) { 644 BranchProbability Threshold = getCHRBiasThreshold(); 645 if (TrueProb >= Threshold) { 646 TrueSet.insert(Key); 647 BiasMap[Key] = TrueProb; 648 return true; 649 } else if (FalseProb >= Threshold) { 650 FalseSet.insert(Key); 651 BiasMap[Key] = FalseProb; 652 return true; 653 } 654 return false; 655 } 656 657 // Returns true and insert a region into the right biased set and the map if the 658 // branch of the region is biased. 659 static bool checkBiasedBranch(BranchInst *BI, Region *R, 660 DenseSet<Region *> &TrueBiasedRegionsGlobal, 661 DenseSet<Region *> &FalseBiasedRegionsGlobal, 662 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 663 if (!BI->isConditional()) 664 return false; 665 BranchProbability ThenProb, ElseProb; 666 if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof), 667 ThenProb, ElseProb)) 668 return false; 669 BasicBlock *IfThen = BI->getSuccessor(0); 670 BasicBlock *IfElse = BI->getSuccessor(1); 671 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 672 IfThen != IfElse && 673 "Invariant from findScopes"); 674 if (IfThen == R->getExit()) { 675 // Swap them so that IfThen/ThenProb means going into the conditional code 676 // and IfElse/ElseProb means skipping it. 677 std::swap(IfThen, IfElse); 678 std::swap(ThenProb, ElseProb); 679 } 680 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 681 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 682 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 683 return checkBias(R, ThenProb, ElseProb, 684 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 685 BranchBiasMap); 686 } 687 688 // Returns true and insert a select into the right biased set and the map if the 689 // select is biased. 690 static bool checkBiasedSelect( 691 SelectInst *SI, Region *R, 692 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 693 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 694 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 695 BranchProbability TrueProb, FalseProb; 696 if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof), 697 TrueProb, FalseProb)) 698 return false; 699 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 700 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 701 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 702 return checkBias(SI, TrueProb, FalseProb, 703 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 704 SelectBiasMap); 705 } 706 707 // Returns the instruction at which to hoist the dependent condition values and 708 // insert the CHR branch for a region. This is the terminator branch in the 709 // entry block or the first select in the entry block, if any. 710 static Instruction* getBranchInsertPoint(RegInfo &RI) { 711 Region *R = RI.R; 712 BasicBlock *EntryBB = R->getEntry(); 713 // The hoist point is by default the terminator of the entry block, which is 714 // the same as the branch instruction if RI.HasBranch is true. 715 Instruction *HoistPoint = EntryBB->getTerminator(); 716 for (SelectInst *SI : RI.Selects) { 717 if (SI->getParent() == EntryBB) { 718 // Pick the first select in Selects in the entry block. Note Selects is 719 // sorted in the instruction order within a block (asserted below). 720 HoistPoint = SI; 721 break; 722 } 723 } 724 assert(HoistPoint && "Null HoistPoint"); 725 #ifndef NDEBUG 726 // Check that HoistPoint is the first one in Selects in the entry block, 727 // if any. 728 DenseSet<Instruction *> EntryBlockSelectSet; 729 for (SelectInst *SI : RI.Selects) { 730 if (SI->getParent() == EntryBB) { 731 EntryBlockSelectSet.insert(SI); 732 } 733 } 734 for (Instruction &I : *EntryBB) { 735 if (EntryBlockSelectSet.count(&I) > 0) { 736 assert(&I == HoistPoint && 737 "HoistPoint must be the first one in Selects"); 738 break; 739 } 740 } 741 #endif 742 return HoistPoint; 743 } 744 745 // Find a CHR scope in the given region. 746 CHRScope * CHR::findScope(Region *R) { 747 CHRScope *Result = nullptr; 748 BasicBlock *Entry = R->getEntry(); 749 BasicBlock *Exit = R->getExit(); // null if top level. 750 assert(Entry && "Entry must not be null"); 751 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 752 "Only top level region has a null exit"); 753 if (Entry) 754 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 755 else 756 CHR_DEBUG(dbgs() << "Entry null\n"); 757 if (Exit) 758 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 759 else 760 CHR_DEBUG(dbgs() << "Exit null\n"); 761 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 762 // to this region). 763 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 764 if (EntryInSubregion) 765 return nullptr; 766 // Exclude loops 767 for (BasicBlock *Pred : predecessors(Entry)) 768 if (R->contains(Pred)) 769 return nullptr; 770 if (Exit) { 771 // Try to find an if-then block (check if R is an if-then). 772 // if (cond) { 773 // ... 774 // } 775 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 776 if (BI) 777 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 778 else 779 CHR_DEBUG(dbgs() << "BI null\n"); 780 if (BI && BI->isConditional()) { 781 BasicBlock *S0 = BI->getSuccessor(0); 782 BasicBlock *S1 = BI->getSuccessor(1); 783 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 784 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 785 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 786 RegInfo RI(R); 787 RI.HasBranch = checkBiasedBranch( 788 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 789 BranchBiasMap); 790 Result = new CHRScope(RI); 791 Scopes.insert(Result); 792 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 793 ++Stats.NumBranches; 794 if (!RI.HasBranch) { 795 ORE.emit([&]() { 796 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) 797 << "Branch not biased"; 798 }); 799 } 800 } 801 } 802 } 803 { 804 // Try to look for selects in the direct child blocks (as opposed to in 805 // subregions) of R. 806 // ... 807 // if (..) { // Some subregion 808 // ... 809 // } 810 // if (..) { // Some subregion 811 // ... 812 // } 813 // ... 814 // a = cond ? b : c; 815 // ... 816 SmallVector<SelectInst *, 8> Selects; 817 for (RegionNode *E : R->elements()) { 818 if (E->isSubRegion()) 819 continue; 820 // This returns the basic block of E if E is a direct child of R (not a 821 // subregion.) 822 BasicBlock *BB = E->getEntry(); 823 // Need to push in the order to make it easier to find the first Select 824 // later. 825 for (Instruction &I : *BB) { 826 if (auto *SI = dyn_cast<SelectInst>(&I)) { 827 Selects.push_back(SI); 828 ++Stats.NumBranches; 829 } 830 } 831 } 832 if (Selects.size() > 0) { 833 auto AddSelects = [&](RegInfo &RI) { 834 for (auto *SI : Selects) 835 if (checkBiasedSelect(SI, RI.R, 836 TrueBiasedSelectsGlobal, 837 FalseBiasedSelectsGlobal, 838 SelectBiasMap)) 839 RI.Selects.push_back(SI); 840 else 841 ORE.emit([&]() { 842 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) 843 << "Select not biased"; 844 }); 845 }; 846 if (!Result) { 847 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 848 RegInfo RI(R); 849 AddSelects(RI); 850 Result = new CHRScope(RI); 851 Scopes.insert(Result); 852 } else { 853 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 854 AddSelects(Result->RegInfos[0]); 855 } 856 } 857 } 858 859 if (Result) { 860 checkScopeHoistable(Result); 861 } 862 return Result; 863 } 864 865 // Check that any of the branch and the selects in the region could be 866 // hoisted above the the CHR branch insert point (the most dominating of 867 // them, either the branch (at the end of the first block) or the first 868 // select in the first block). If the branch can't be hoisted, drop the 869 // selects in the first blocks. 870 // 871 // For example, for the following scope/region with selects, we want to insert 872 // the merged branch right before the first select in the first/entry block by 873 // hoisting c1, c2, c3, and c4. 874 // 875 // // Branch insert point here. 876 // a = c1 ? b : c; // Select 1 877 // d = c2 ? e : f; // Select 2 878 // if (c3) { // Branch 879 // ... 880 // c4 = foo() // A call. 881 // g = c4 ? h : i; // Select 3 882 // } 883 // 884 // But suppose we can't hoist c4 because it's dependent on the preceding 885 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 886 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 887 void CHR::checkScopeHoistable(CHRScope *Scope) { 888 RegInfo &RI = Scope->RegInfos[0]; 889 Region *R = RI.R; 890 BasicBlock *EntryBB = R->getEntry(); 891 auto *Branch = RI.HasBranch ? 892 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 893 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 894 if (RI.HasBranch || !Selects.empty()) { 895 Instruction *InsertPoint = getBranchInsertPoint(RI); 896 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 897 // Avoid a data dependence from a select or a branch to a(nother) 898 // select. Note no instruction can't data-depend on a branch (a branch 899 // instruction doesn't produce a value). 900 DenseSet<Instruction *> Unhoistables; 901 // Initialize Unhoistables with the selects. 902 for (SelectInst *SI : Selects) { 903 Unhoistables.insert(SI); 904 } 905 // Remove Selects that can't be hoisted. 906 for (auto it = Selects.begin(); it != Selects.end(); ) { 907 SelectInst *SI = *it; 908 if (SI == InsertPoint) { 909 ++it; 910 continue; 911 } 912 DenseMap<Instruction *, bool> Visited; 913 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 914 DT, Unhoistables, nullptr, Visited); 915 if (!IsHoistable) { 916 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 917 ORE.emit([&]() { 918 return OptimizationRemarkMissed(DEBUG_TYPE, 919 "DropUnhoistableSelect", SI) 920 << "Dropped unhoistable select"; 921 }); 922 it = Selects.erase(it); 923 // Since we are dropping the select here, we also drop it from 924 // Unhoistables. 925 Unhoistables.erase(SI); 926 } else 927 ++it; 928 } 929 // Update InsertPoint after potentially removing selects. 930 InsertPoint = getBranchInsertPoint(RI); 931 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 932 if (RI.HasBranch && InsertPoint != Branch) { 933 DenseMap<Instruction *, bool> Visited; 934 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 935 DT, Unhoistables, nullptr, Visited); 936 if (!IsHoistable) { 937 // If the branch isn't hoistable, drop the selects in the entry 938 // block, preferring the branch, which makes the branch the hoist 939 // point. 940 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 941 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 942 CHR_DEBUG( 943 for (SelectInst *SI : Selects) { 944 dbgs() << "SI " << *SI << "\n"; 945 }); 946 for (SelectInst *SI : Selects) { 947 ORE.emit([&]() { 948 return OptimizationRemarkMissed(DEBUG_TYPE, 949 "DropSelectUnhoistableBranch", SI) 950 << "Dropped select due to unhoistable branch"; 951 }); 952 } 953 Selects.erase(std::remove_if(Selects.begin(), Selects.end(), 954 [EntryBB](SelectInst *SI) { 955 return SI->getParent() == EntryBB; 956 }), Selects.end()); 957 Unhoistables.clear(); 958 InsertPoint = Branch; 959 } 960 } 961 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 962 #ifndef NDEBUG 963 if (RI.HasBranch) { 964 assert(!DT.dominates(Branch, InsertPoint) && 965 "Branch can't be already above the hoist point"); 966 DenseMap<Instruction *, bool> Visited; 967 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 968 DT, Unhoistables, nullptr, Visited) && 969 "checkHoistValue for branch"); 970 } 971 for (auto *SI : Selects) { 972 assert(!DT.dominates(SI, InsertPoint) && 973 "SI can't be already above the hoist point"); 974 DenseMap<Instruction *, bool> Visited; 975 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 976 Unhoistables, nullptr, Visited) && 977 "checkHoistValue for selects"); 978 } 979 CHR_DEBUG(dbgs() << "Result\n"); 980 if (RI.HasBranch) { 981 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 982 } 983 for (auto *SI : Selects) { 984 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 985 } 986 #endif 987 } 988 } 989 990 // Traverse the region tree, find all nested scopes and merge them if possible. 991 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 992 SmallVectorImpl<CHRScope *> &Scopes) { 993 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 994 CHRScope *Result = findScope(R); 995 // Visit subscopes. 996 CHRScope *ConsecutiveSubscope = nullptr; 997 SmallVector<CHRScope *, 8> Subscopes; 998 for (auto It = R->begin(); It != R->end(); ++It) { 999 const std::unique_ptr<Region> &SubR = *It; 1000 auto NextIt = std::next(It); 1001 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 1002 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 1003 << "\n"); 1004 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 1005 if (SubCHRScope) { 1006 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 1007 } else { 1008 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 1009 } 1010 if (SubCHRScope) { 1011 if (!ConsecutiveSubscope) 1012 ConsecutiveSubscope = SubCHRScope; 1013 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 1014 Subscopes.push_back(ConsecutiveSubscope); 1015 ConsecutiveSubscope = SubCHRScope; 1016 } else 1017 ConsecutiveSubscope->append(SubCHRScope); 1018 } else { 1019 if (ConsecutiveSubscope) { 1020 Subscopes.push_back(ConsecutiveSubscope); 1021 } 1022 ConsecutiveSubscope = nullptr; 1023 } 1024 } 1025 if (ConsecutiveSubscope) { 1026 Subscopes.push_back(ConsecutiveSubscope); 1027 } 1028 for (CHRScope *Sub : Subscopes) { 1029 if (Result) { 1030 // Combine it with the parent. 1031 Result->addSub(Sub); 1032 } else { 1033 // Push Subscopes as they won't be combined with the parent. 1034 Scopes.push_back(Sub); 1035 } 1036 } 1037 return Result; 1038 } 1039 1040 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1041 DenseSet<Value *> ConditionValues; 1042 if (RI.HasBranch) { 1043 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1044 ConditionValues.insert(BI->getCondition()); 1045 } 1046 for (SelectInst *SI : RI.Selects) { 1047 ConditionValues.insert(SI->getCondition()); 1048 } 1049 return ConditionValues; 1050 } 1051 1052 1053 // Determine whether to split a scope depending on the sets of the branch 1054 // condition values of the previous region and the current region. We split 1055 // (return true) it if 1) the condition values of the inner/lower scope can't be 1056 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1057 // values have an empty intersection (because the combined branch conditions 1058 // won't probably lead to a simpler combined condition). 1059 static bool shouldSplit(Instruction *InsertPoint, 1060 DenseSet<Value *> &PrevConditionValues, 1061 DenseSet<Value *> &ConditionValues, 1062 DominatorTree &DT, 1063 DenseSet<Instruction *> &Unhoistables) { 1064 assert(InsertPoint && "Null InsertPoint"); 1065 CHR_DEBUG( 1066 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1067 for (Value *V : PrevConditionValues) { 1068 dbgs() << *V << ", "; 1069 } 1070 dbgs() << " ConditionValues "; 1071 for (Value *V : ConditionValues) { 1072 dbgs() << *V << ", "; 1073 } 1074 dbgs() << "\n"); 1075 // If any of Bases isn't hoistable to the hoist point, split. 1076 for (Value *V : ConditionValues) { 1077 DenseMap<Instruction *, bool> Visited; 1078 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { 1079 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1080 return true; // Not hoistable, split. 1081 } 1082 } 1083 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1084 // unnecessary splits at scopes with no branch/selects. If 1085 // PrevConditionValues and ConditionValues don't intersect at all, split. 1086 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1087 // Use std::set as DenseSet doesn't work with set_intersection. 1088 std::set<Value *> PrevBases, Bases; 1089 DenseMap<Value *, std::set<Value *>> Visited; 1090 for (Value *V : PrevConditionValues) { 1091 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1092 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1093 } 1094 for (Value *V : ConditionValues) { 1095 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1096 Bases.insert(BaseValues.begin(), BaseValues.end()); 1097 } 1098 CHR_DEBUG( 1099 dbgs() << "PrevBases "; 1100 for (Value *V : PrevBases) { 1101 dbgs() << *V << ", "; 1102 } 1103 dbgs() << " Bases "; 1104 for (Value *V : Bases) { 1105 dbgs() << *V << ", "; 1106 } 1107 dbgs() << "\n"); 1108 std::vector<Value *> Intersection; 1109 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(), 1110 Bases.end(), std::back_inserter(Intersection)); 1111 if (Intersection.empty()) { 1112 // Empty intersection, split. 1113 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1114 return true; 1115 } 1116 } 1117 CHR_DEBUG(dbgs() << "No split\n"); 1118 return false; // Don't split. 1119 } 1120 1121 static void getSelectsInScope(CHRScope *Scope, 1122 DenseSet<Instruction *> &Output) { 1123 for (RegInfo &RI : Scope->RegInfos) 1124 for (SelectInst *SI : RI.Selects) 1125 Output.insert(SI); 1126 for (CHRScope *Sub : Scope->Subs) 1127 getSelectsInScope(Sub, Output); 1128 } 1129 1130 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1131 SmallVectorImpl<CHRScope *> &Output) { 1132 for (CHRScope *Scope : Input) { 1133 assert(!Scope->BranchInsertPoint && 1134 "BranchInsertPoint must not be set"); 1135 DenseSet<Instruction *> Unhoistables; 1136 getSelectsInScope(Scope, Unhoistables); 1137 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1138 } 1139 #ifndef NDEBUG 1140 for (CHRScope *Scope : Output) { 1141 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1142 } 1143 #endif 1144 } 1145 1146 SmallVector<CHRScope *, 8> CHR::splitScope( 1147 CHRScope *Scope, 1148 CHRScope *Outer, 1149 DenseSet<Value *> *OuterConditionValues, 1150 Instruction *OuterInsertPoint, 1151 SmallVectorImpl<CHRScope *> &Output, 1152 DenseSet<Instruction *> &Unhoistables) { 1153 if (Outer) { 1154 assert(OuterConditionValues && "Null OuterConditionValues"); 1155 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1156 } 1157 bool PrevSplitFromOuter = true; 1158 DenseSet<Value *> PrevConditionValues; 1159 Instruction *PrevInsertPoint = nullptr; 1160 SmallVector<CHRScope *, 8> Splits; 1161 SmallVector<bool, 8> SplitsSplitFromOuter; 1162 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1163 SmallVector<Instruction *, 8> SplitsInsertPoints; 1164 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1165 for (RegInfo &RI : RegInfos) { 1166 Instruction *InsertPoint = getBranchInsertPoint(RI); 1167 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1168 CHR_DEBUG( 1169 dbgs() << "ConditionValues "; 1170 for (Value *V : ConditionValues) { 1171 dbgs() << *V << ", "; 1172 } 1173 dbgs() << "\n"); 1174 if (RI.R == RegInfos[0].R) { 1175 // First iteration. Check to see if we should split from the outer. 1176 if (Outer) { 1177 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1178 CHR_DEBUG(dbgs() << "Should split from outer at " 1179 << RI.R->getNameStr() << "\n"); 1180 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1181 ConditionValues, DT, Unhoistables)) { 1182 PrevConditionValues = ConditionValues; 1183 PrevInsertPoint = InsertPoint; 1184 ORE.emit([&]() { 1185 return OptimizationRemarkMissed(DEBUG_TYPE, 1186 "SplitScopeFromOuter", 1187 RI.R->getEntry()->getTerminator()) 1188 << "Split scope from outer due to unhoistable branch/select " 1189 << "and/or lack of common condition values"; 1190 }); 1191 } else { 1192 // Not splitting from the outer. Use the outer bases and insert 1193 // point. Union the bases. 1194 PrevSplitFromOuter = false; 1195 PrevConditionValues = *OuterConditionValues; 1196 PrevConditionValues.insert(ConditionValues.begin(), 1197 ConditionValues.end()); 1198 PrevInsertPoint = OuterInsertPoint; 1199 } 1200 } else { 1201 CHR_DEBUG(dbgs() << "Outer null\n"); 1202 PrevConditionValues = ConditionValues; 1203 PrevInsertPoint = InsertPoint; 1204 } 1205 } else { 1206 CHR_DEBUG(dbgs() << "Should split from prev at " 1207 << RI.R->getNameStr() << "\n"); 1208 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1209 DT, Unhoistables)) { 1210 CHRScope *Tail = Scope->split(RI.R); 1211 Scopes.insert(Tail); 1212 Splits.push_back(Scope); 1213 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1214 SplitsConditionValues.push_back(PrevConditionValues); 1215 SplitsInsertPoints.push_back(PrevInsertPoint); 1216 Scope = Tail; 1217 PrevConditionValues = ConditionValues; 1218 PrevInsertPoint = InsertPoint; 1219 PrevSplitFromOuter = true; 1220 ORE.emit([&]() { 1221 return OptimizationRemarkMissed(DEBUG_TYPE, 1222 "SplitScopeFromPrev", 1223 RI.R->getEntry()->getTerminator()) 1224 << "Split scope from previous due to unhoistable branch/select " 1225 << "and/or lack of common condition values"; 1226 }); 1227 } else { 1228 // Not splitting. Union the bases. Keep the hoist point. 1229 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1230 } 1231 } 1232 } 1233 Splits.push_back(Scope); 1234 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1235 SplitsConditionValues.push_back(PrevConditionValues); 1236 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1237 SplitsInsertPoints.push_back(PrevInsertPoint); 1238 assert(Splits.size() == SplitsConditionValues.size() && 1239 Splits.size() == SplitsSplitFromOuter.size() && 1240 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1241 for (size_t I = 0; I < Splits.size(); ++I) { 1242 CHRScope *Split = Splits[I]; 1243 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1244 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1245 SmallVector<CHRScope *, 8> NewSubs; 1246 DenseSet<Instruction *> SplitUnhoistables; 1247 getSelectsInScope(Split, SplitUnhoistables); 1248 for (CHRScope *Sub : Split->Subs) { 1249 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1250 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1251 SplitUnhoistables); 1252 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end()); 1253 } 1254 Split->Subs = NewSubs; 1255 } 1256 SmallVector<CHRScope *, 8> Result; 1257 for (size_t I = 0; I < Splits.size(); ++I) { 1258 CHRScope *Split = Splits[I]; 1259 if (SplitsSplitFromOuter[I]) { 1260 // Split from the outer. 1261 Output.push_back(Split); 1262 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1263 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1264 << "\n"); 1265 } else { 1266 // Connected to the outer. 1267 Result.push_back(Split); 1268 } 1269 } 1270 if (!Outer) 1271 assert(Result.empty() && 1272 "If no outer (top-level), must return no nested ones"); 1273 return Result; 1274 } 1275 1276 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1277 for (CHRScope *Scope : Scopes) { 1278 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1279 classifyBiasedScopes(Scope, Scope); 1280 CHR_DEBUG( 1281 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1282 dbgs() << "TrueBiasedRegions "; 1283 for (Region *R : Scope->TrueBiasedRegions) { 1284 dbgs() << R->getNameStr() << ", "; 1285 } 1286 dbgs() << "\n"; 1287 dbgs() << "FalseBiasedRegions "; 1288 for (Region *R : Scope->FalseBiasedRegions) { 1289 dbgs() << R->getNameStr() << ", "; 1290 } 1291 dbgs() << "\n"; 1292 dbgs() << "TrueBiasedSelects "; 1293 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1294 dbgs() << *SI << ", "; 1295 } 1296 dbgs() << "\n"; 1297 dbgs() << "FalseBiasedSelects "; 1298 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1299 dbgs() << *SI << ", "; 1300 } 1301 dbgs() << "\n";); 1302 } 1303 } 1304 1305 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1306 for (RegInfo &RI : Scope->RegInfos) { 1307 if (RI.HasBranch) { 1308 Region *R = RI.R; 1309 if (TrueBiasedRegionsGlobal.count(R) > 0) 1310 OutermostScope->TrueBiasedRegions.insert(R); 1311 else if (FalseBiasedRegionsGlobal.count(R) > 0) 1312 OutermostScope->FalseBiasedRegions.insert(R); 1313 else 1314 llvm_unreachable("Must be biased"); 1315 } 1316 for (SelectInst *SI : RI.Selects) { 1317 if (TrueBiasedSelectsGlobal.count(SI) > 0) 1318 OutermostScope->TrueBiasedSelects.insert(SI); 1319 else if (FalseBiasedSelectsGlobal.count(SI) > 0) 1320 OutermostScope->FalseBiasedSelects.insert(SI); 1321 else 1322 llvm_unreachable("Must be biased"); 1323 } 1324 } 1325 for (CHRScope *Sub : Scope->Subs) { 1326 classifyBiasedScopes(Sub, OutermostScope); 1327 } 1328 } 1329 1330 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1331 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1332 Scope->FalseBiasedRegions.size() + 1333 Scope->TrueBiasedSelects.size() + 1334 Scope->FalseBiasedSelects.size(); 1335 return NumBiased >= CHRMergeThreshold; 1336 } 1337 1338 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1339 SmallVectorImpl<CHRScope *> &Output) { 1340 for (CHRScope *Scope : Input) { 1341 // Filter out the ones with only one region and no subs. 1342 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1343 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1344 << Scope->TrueBiasedRegions.size() 1345 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1346 << " true-selects " << Scope->TrueBiasedSelects.size() 1347 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1348 ORE.emit([&]() { 1349 return OptimizationRemarkMissed( 1350 DEBUG_TYPE, 1351 "DropScopeWithOneBranchOrSelect", 1352 Scope->RegInfos[0].R->getEntry()->getTerminator()) 1353 << "Drop scope with < " 1354 << ore::NV("CHRMergeThreshold", CHRMergeThreshold) 1355 << " biased branch(es) or select(s)"; 1356 }); 1357 continue; 1358 } 1359 Output.push_back(Scope); 1360 } 1361 } 1362 1363 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1364 SmallVectorImpl<CHRScope *> &Output) { 1365 for (CHRScope *Scope : Input) { 1366 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1367 "Empty"); 1368 setCHRRegions(Scope, Scope); 1369 Output.push_back(Scope); 1370 CHR_DEBUG( 1371 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1372 for (auto pair : Scope->HoistStopMap) { 1373 Region *R = pair.first; 1374 dbgs() << "Region " << R->getNameStr() << "\n"; 1375 for (Instruction *I : pair.second) { 1376 dbgs() << "HoistStop " << *I << "\n"; 1377 } 1378 } 1379 dbgs() << "CHRRegions" << "\n"; 1380 for (RegInfo &RI : Scope->CHRRegions) { 1381 dbgs() << RI.R->getNameStr() << "\n"; 1382 }); 1383 } 1384 } 1385 1386 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1387 DenseSet<Instruction *> Unhoistables; 1388 // Put the biased selects in Unhoistables because they should stay where they 1389 // are and constant-folded after CHR (in case one biased select or a branch 1390 // can depend on another biased select.) 1391 for (RegInfo &RI : Scope->RegInfos) { 1392 for (SelectInst *SI : RI.Selects) { 1393 Unhoistables.insert(SI); 1394 } 1395 } 1396 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1397 for (RegInfo &RI : Scope->RegInfos) { 1398 Region *R = RI.R; 1399 DenseSet<Instruction *> HoistStops; 1400 bool IsHoisted = false; 1401 if (RI.HasBranch) { 1402 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 || 1403 OutermostScope->FalseBiasedRegions.count(R) > 0) && 1404 "Must be truthy or falsy"); 1405 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1406 // Note checkHoistValue fills in HoistStops. 1407 DenseMap<Instruction *, bool> Visited; 1408 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1409 Unhoistables, &HoistStops, Visited); 1410 assert(IsHoistable && "Must be hoistable"); 1411 (void)(IsHoistable); // Unused in release build 1412 IsHoisted = true; 1413 } 1414 for (SelectInst *SI : RI.Selects) { 1415 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 || 1416 OutermostScope->FalseBiasedSelects.count(SI) > 0) && 1417 "Must be true or false biased"); 1418 // Note checkHoistValue fills in HoistStops. 1419 DenseMap<Instruction *, bool> Visited; 1420 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1421 Unhoistables, &HoistStops, Visited); 1422 assert(IsHoistable && "Must be hoistable"); 1423 (void)(IsHoistable); // Unused in release build 1424 IsHoisted = true; 1425 } 1426 if (IsHoisted) { 1427 OutermostScope->CHRRegions.push_back(RI); 1428 OutermostScope->HoistStopMap[R] = HoistStops; 1429 } 1430 } 1431 for (CHRScope *Sub : Scope->Subs) 1432 setCHRRegions(Sub, OutermostScope); 1433 } 1434 1435 static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1436 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1437 } 1438 1439 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1440 SmallVectorImpl<CHRScope *> &Output) { 1441 Output.resize(Input.size()); 1442 llvm::copy(Input, Output.begin()); 1443 llvm::stable_sort(Output, CHRScopeSorter); 1444 } 1445 1446 // Return true if V is already hoisted or was hoisted (along with its operands) 1447 // to the insert point. 1448 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1449 HoistStopMapTy &HoistStopMap, 1450 DenseSet<Instruction *> &HoistedSet, 1451 DenseSet<PHINode *> &TrivialPHIs, 1452 DominatorTree &DT) { 1453 auto IT = HoistStopMap.find(R); 1454 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1455 DenseSet<Instruction *> &HoistStops = IT->second; 1456 if (auto *I = dyn_cast<Instruction>(V)) { 1457 if (I == HoistPoint) 1458 return; 1459 if (HoistStops.count(I)) 1460 return; 1461 if (auto *PN = dyn_cast<PHINode>(I)) 1462 if (TrivialPHIs.count(PN)) 1463 // The trivial phi inserted by the previous CHR scope could replace a 1464 // non-phi in HoistStops. Note that since this phi is at the exit of a 1465 // previous CHR scope, which dominates this scope, it's safe to stop 1466 // hoisting there. 1467 return; 1468 if (HoistedSet.count(I)) 1469 // Already hoisted, return. 1470 return; 1471 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1472 assert(DT.getNode(I->getParent()) && "DT must contain I's block"); 1473 assert(DT.getNode(HoistPoint->getParent()) && 1474 "DT must contain HoistPoint block"); 1475 if (DT.dominates(I, HoistPoint)) 1476 // We are already above the hoist point. Stop here. This may be necessary 1477 // when multiple scopes would independently hoist the same 1478 // instruction. Since an outer (dominating) scope would hoist it to its 1479 // entry before an inner (dominated) scope would to its entry, the inner 1480 // scope may see the instruction already hoisted, in which case it 1481 // potentially wrong for the inner scope to hoist it and could cause bad 1482 // IR (non-dominating def), but safe to skip hoisting it instead because 1483 // it's already in a block that dominates the inner scope. 1484 return; 1485 for (Value *Op : I->operands()) { 1486 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); 1487 } 1488 I->moveBefore(HoistPoint); 1489 HoistedSet.insert(I); 1490 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1491 } 1492 } 1493 1494 // Hoist the dependent condition values of the branches and the selects in the 1495 // scope to the insert point. 1496 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1497 DenseSet<PHINode *> &TrivialPHIs, 1498 DominatorTree &DT) { 1499 DenseSet<Instruction *> HoistedSet; 1500 for (const RegInfo &RI : Scope->CHRRegions) { 1501 Region *R = RI.R; 1502 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1503 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1504 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1505 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1506 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1507 HoistedSet, TrivialPHIs, DT); 1508 } 1509 for (SelectInst *SI : RI.Selects) { 1510 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1511 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1512 if (!(IsTrueBiased || IsFalseBiased)) 1513 continue; 1514 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1515 HoistedSet, TrivialPHIs, DT); 1516 } 1517 } 1518 } 1519 1520 // Negate the predicate if an ICmp if it's used only by branches or selects by 1521 // swapping the operands of the branches or the selects. Returns true if success. 1522 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1523 Instruction *ExcludedUser, 1524 CHRScope *Scope) { 1525 for (User *U : ICmp->users()) { 1526 if (U == ExcludedUser) 1527 continue; 1528 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1529 continue; 1530 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1531 continue; 1532 return false; 1533 } 1534 for (User *U : ICmp->users()) { 1535 if (U == ExcludedUser) 1536 continue; 1537 if (auto *BI = dyn_cast<BranchInst>(U)) { 1538 assert(BI->isConditional() && "Must be conditional"); 1539 BI->swapSuccessors(); 1540 // Don't need to swap this in terms of 1541 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1542 // mean whehter the branch is likely go into the if-then rather than 1543 // successor0/successor1 and because we can tell which edge is the then or 1544 // the else one by comparing the destination to the region exit block. 1545 continue; 1546 } 1547 if (auto *SI = dyn_cast<SelectInst>(U)) { 1548 // Swap operands 1549 SI->swapValues(); 1550 SI->swapProfMetadata(); 1551 if (Scope->TrueBiasedSelects.count(SI)) { 1552 assert(Scope->FalseBiasedSelects.count(SI) == 0 && 1553 "Must not be already in"); 1554 Scope->FalseBiasedSelects.insert(SI); 1555 } else if (Scope->FalseBiasedSelects.count(SI)) { 1556 assert(Scope->TrueBiasedSelects.count(SI) == 0 && 1557 "Must not be already in"); 1558 Scope->TrueBiasedSelects.insert(SI); 1559 } 1560 continue; 1561 } 1562 llvm_unreachable("Must be a branch or a select"); 1563 } 1564 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1565 return true; 1566 } 1567 1568 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1569 // for a value that's defined in the scope but used outside it (meaning it's 1570 // alive at the exit block). 1571 static void insertTrivialPHIs(CHRScope *Scope, 1572 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1573 DenseSet<PHINode *> &TrivialPHIs) { 1574 SmallSetVector<BasicBlock *, 8> BlocksInScope; 1575 for (RegInfo &RI : Scope->RegInfos) { 1576 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1577 // sub-Scopes. 1578 BlocksInScope.insert(BB); 1579 } 1580 } 1581 CHR_DEBUG({ 1582 dbgs() << "Inserting redundant phis\n"; 1583 for (BasicBlock *BB : BlocksInScope) 1584 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1585 }); 1586 for (BasicBlock *BB : BlocksInScope) { 1587 for (Instruction &I : *BB) { 1588 SmallVector<Instruction *, 8> Users; 1589 for (User *U : I.users()) { 1590 if (auto *UI = dyn_cast<Instruction>(U)) { 1591 if (BlocksInScope.count(UI->getParent()) == 0 && 1592 // Unless there's already a phi for I at the exit block. 1593 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1594 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1595 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1596 Users.push_back(UI); 1597 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1598 // There's a loop backedge from a block that's dominated by this 1599 // scope to the entry block. 1600 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1601 CHR_DEBUG(dbgs() 1602 << "Used at entry block (for a back edge) by a phi user " 1603 << *UI << "\n"); 1604 Users.push_back(UI); 1605 } 1606 } 1607 } 1608 if (Users.size() > 0) { 1609 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1610 // ExitBlock. Replace I with the new phi in UI unless UI is another 1611 // phi at ExitBlock. 1612 unsigned PredCount = std::distance(pred_begin(ExitBlock), 1613 pred_end(ExitBlock)); 1614 PHINode *PN = PHINode::Create(I.getType(), PredCount, "", 1615 &ExitBlock->front()); 1616 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1617 PN->addIncoming(&I, Pred); 1618 } 1619 TrivialPHIs.insert(PN); 1620 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1621 for (Instruction *UI : Users) { 1622 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1623 if (UI->getOperand(J) == &I) { 1624 UI->setOperand(J, PN); 1625 } 1626 } 1627 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1628 } 1629 } 1630 } 1631 } 1632 } 1633 1634 // Assert that all the CHR regions of the scope have a biased branch or select. 1635 static void LLVM_ATTRIBUTE_UNUSED 1636 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1637 #ifndef NDEBUG 1638 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1639 if (Scope->TrueBiasedRegions.count(RI.R) || 1640 Scope->FalseBiasedRegions.count(RI.R)) 1641 return true; 1642 for (SelectInst *SI : RI.Selects) 1643 if (Scope->TrueBiasedSelects.count(SI) || 1644 Scope->FalseBiasedSelects.count(SI)) 1645 return true; 1646 return false; 1647 }; 1648 for (RegInfo &RI : Scope->CHRRegions) { 1649 assert(HasBiasedBranchOrSelect(RI, Scope) && 1650 "Must have biased branch or select"); 1651 } 1652 #endif 1653 } 1654 1655 // Assert that all the condition values of the biased branches and selects have 1656 // been hoisted to the pre-entry block or outside of the scope. 1657 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1658 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1659 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1660 for (RegInfo &RI : Scope->CHRRegions) { 1661 Region *R = RI.R; 1662 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1663 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1664 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1665 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1666 Value *V = BI->getCondition(); 1667 CHR_DEBUG(dbgs() << *V << "\n"); 1668 if (auto *I = dyn_cast<Instruction>(V)) { 1669 (void)(I); // Unused in release build. 1670 assert((I->getParent() == PreEntryBlock || 1671 !Scope->contains(I)) && 1672 "Must have been hoisted to PreEntryBlock or outside the scope"); 1673 } 1674 } 1675 for (SelectInst *SI : RI.Selects) { 1676 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1677 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1678 if (!(IsTrueBiased || IsFalseBiased)) 1679 continue; 1680 Value *V = SI->getCondition(); 1681 CHR_DEBUG(dbgs() << *V << "\n"); 1682 if (auto *I = dyn_cast<Instruction>(V)) { 1683 (void)(I); // Unused in release build. 1684 assert((I->getParent() == PreEntryBlock || 1685 !Scope->contains(I)) && 1686 "Must have been hoisted to PreEntryBlock or outside the scope"); 1687 } 1688 } 1689 } 1690 } 1691 1692 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1693 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1694 1695 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1696 Region *FirstRegion = Scope->RegInfos[0].R; 1697 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1698 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1699 BasicBlock *ExitBlock = LastRegion->getExit(); 1700 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1701 1702 if (ExitBlock) { 1703 // Insert a trivial phi at the exit block (where the CHR hot path and the 1704 // cold path merges) for a value that's defined in the scope but used 1705 // outside it (meaning it's alive at the exit block). We will add the 1706 // incoming values for the CHR cold paths to it below. Without this, we'd 1707 // miss updating phi's for such values unless there happens to already be a 1708 // phi for that value there. 1709 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1710 } 1711 1712 // Split the entry block of the first region. The new block becomes the new 1713 // entry block of the first region. The old entry block becomes the block to 1714 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1715 // through the split, we update the entry of the first region after the split, 1716 // and Region only points to the entry and the exit blocks, rather than 1717 // keeping everything in a list or set, the blocks membership and the 1718 // entry/exit blocks of the region are still valid after the split. 1719 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1720 << " at " << *Scope->BranchInsertPoint << "\n"); 1721 BasicBlock *NewEntryBlock = 1722 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1723 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1724 "NewEntryBlock's only pred must be EntryBlock"); 1725 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1726 BasicBlock *PreEntryBlock = EntryBlock; 1727 1728 ValueToValueMapTy VMap; 1729 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1730 // hot path (originals) and a cold path (clones) and update the PHIs at the 1731 // exit block. 1732 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1733 1734 // Replace the old (placeholder) branch with the new (merged) conditional 1735 // branch. 1736 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1737 NewEntryBlock, VMap); 1738 1739 #ifndef NDEBUG 1740 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1741 #endif 1742 1743 // Hoist the conditional values of the branches/selects. 1744 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); 1745 1746 #ifndef NDEBUG 1747 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1748 #endif 1749 1750 // Create the combined branch condition and constant-fold the branches/selects 1751 // in the hot path. 1752 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1753 ProfileCount ? ProfileCount.getValue() : 0); 1754 } 1755 1756 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1757 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1758 // at the exit block. 1759 void CHR::cloneScopeBlocks(CHRScope *Scope, 1760 BasicBlock *PreEntryBlock, 1761 BasicBlock *ExitBlock, 1762 Region *LastRegion, 1763 ValueToValueMapTy &VMap) { 1764 // Clone all the blocks. The original blocks will be the hot-path 1765 // CHR-optimized code and the cloned blocks will be the original unoptimized 1766 // code. This is so that the block pointers from the 1767 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1768 // which CHR should apply to. 1769 SmallVector<BasicBlock*, 8> NewBlocks; 1770 for (RegInfo &RI : Scope->RegInfos) 1771 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1772 // sub-Scopes. 1773 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1774 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1775 NewBlocks.push_back(NewBB); 1776 VMap[BB] = NewBB; 1777 } 1778 1779 // Place the cloned blocks right after the original blocks (right before the 1780 // exit block of.) 1781 if (ExitBlock) 1782 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1783 F.getBasicBlockList(), 1784 NewBlocks[0]->getIterator(), F.end()); 1785 1786 // Update the cloned blocks/instructions to refer to themselves. 1787 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1788 for (Instruction &I : *NewBlocks[i]) 1789 RemapInstruction(&I, VMap, 1790 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1791 1792 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1793 // the top-level region but we don't need to add PHIs. The trivial PHIs 1794 // inserted above will be updated here. 1795 if (ExitBlock) 1796 for (PHINode &PN : ExitBlock->phis()) 1797 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1798 ++I) { 1799 BasicBlock *Pred = PN.getIncomingBlock(I); 1800 if (LastRegion->contains(Pred)) { 1801 Value *V = PN.getIncomingValue(I); 1802 auto It = VMap.find(V); 1803 if (It != VMap.end()) V = It->second; 1804 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1805 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1806 } 1807 } 1808 } 1809 1810 // A helper for transformScope. Replace the old (placeholder) branch with the 1811 // new (merged) conditional branch. 1812 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1813 BasicBlock *EntryBlock, 1814 BasicBlock *NewEntryBlock, 1815 ValueToValueMapTy &VMap) { 1816 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1817 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1818 "SplitBlock did not work correctly!"); 1819 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1820 "NewEntryBlock's only pred must be EntryBlock"); 1821 assert(VMap.find(NewEntryBlock) != VMap.end() && 1822 "NewEntryBlock must have been copied"); 1823 OldBR->dropAllReferences(); 1824 OldBR->eraseFromParent(); 1825 // The true predicate is a placeholder. It will be replaced later in 1826 // fixupBranchesAndSelects(). 1827 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1828 cast<BasicBlock>(VMap[NewEntryBlock]), 1829 ConstantInt::getTrue(F.getContext())); 1830 PreEntryBlock->getInstList().push_back(NewBR); 1831 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1832 "NewEntryBlock's only pred must be EntryBlock"); 1833 return NewBR; 1834 } 1835 1836 // A helper for transformScopes. Create the combined branch condition and 1837 // constant-fold the branches/selects in the hot path. 1838 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1839 BasicBlock *PreEntryBlock, 1840 BranchInst *MergedBR, 1841 uint64_t ProfileCount) { 1842 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1843 BranchProbability CHRBranchBias(1, 1); 1844 uint64_t NumCHRedBranches = 0; 1845 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1846 for (RegInfo &RI : Scope->CHRRegions) { 1847 Region *R = RI.R; 1848 if (RI.HasBranch) { 1849 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1850 ++NumCHRedBranches; 1851 } 1852 for (SelectInst *SI : RI.Selects) { 1853 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1854 ++NumCHRedBranches; 1855 } 1856 } 1857 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1858 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1859 ORE.emit([&]() { 1860 return OptimizationRemark(DEBUG_TYPE, 1861 "CHR", 1862 // Refer to the hot (original) path 1863 MergedBR->getSuccessor(0)->getTerminator()) 1864 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) 1865 << " branches or selects"; 1866 }); 1867 MergedBR->setCondition(MergedCondition); 1868 uint32_t Weights[] = { 1869 static_cast<uint32_t>(CHRBranchBias.scale(1000)), 1870 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)), 1871 }; 1872 MDBuilder MDB(F.getContext()); 1873 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1874 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1875 << "\n"); 1876 } 1877 1878 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1879 // and constant-fold a branch in the hot path. 1880 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1881 IRBuilder<> &IRB, 1882 Value *&MergedCondition, 1883 BranchProbability &CHRBranchBias) { 1884 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1885 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1886 "Must be truthy or falsy"); 1887 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1888 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1889 "Must be in the bias map"); 1890 BranchProbability Bias = BranchBiasMap[R]; 1891 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1892 // Take the min. 1893 if (CHRBranchBias > Bias) 1894 CHRBranchBias = Bias; 1895 BasicBlock *IfThen = BI->getSuccessor(1); 1896 BasicBlock *IfElse = BI->getSuccessor(0); 1897 BasicBlock *RegionExitBlock = R->getExit(); 1898 assert(RegionExitBlock && "Null ExitBlock"); 1899 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1900 IfThen != IfElse && "Invariant from findScopes"); 1901 if (IfThen == RegionExitBlock) { 1902 // Swap them so that IfThen means going into it and IfElse means skipping 1903 // it. 1904 std::swap(IfThen, IfElse); 1905 } 1906 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1907 << " IfElse " << IfElse->getName() << "\n"); 1908 Value *Cond = BI->getCondition(); 1909 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1910 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1911 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1912 MergedCondition); 1913 // Constant-fold the branch at ClonedEntryBlock. 1914 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1915 "The successor shouldn't change"); 1916 Value *NewCondition = ConditionTrue ? 1917 ConstantInt::getTrue(F.getContext()) : 1918 ConstantInt::getFalse(F.getContext()); 1919 BI->setCondition(NewCondition); 1920 } 1921 1922 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1923 // and constant-fold a select in the hot path. 1924 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1925 IRBuilder<> &IRB, 1926 Value *&MergedCondition, 1927 BranchProbability &CHRBranchBias) { 1928 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1929 assert((IsTrueBiased || 1930 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1931 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1932 "Must be in the bias map"); 1933 BranchProbability Bias = SelectBiasMap[SI]; 1934 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1935 // Take the min. 1936 if (CHRBranchBias > Bias) 1937 CHRBranchBias = Bias; 1938 Value *Cond = SI->getCondition(); 1939 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1940 MergedCondition); 1941 Value *NewCondition = IsTrueBiased ? 1942 ConstantInt::getTrue(F.getContext()) : 1943 ConstantInt::getFalse(F.getContext()); 1944 SI->setCondition(NewCondition); 1945 } 1946 1947 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1948 // condition. 1949 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1950 Instruction *BranchOrSelect, 1951 CHRScope *Scope, 1952 IRBuilder<> &IRB, 1953 Value *&MergedCondition) { 1954 if (IsTrueBiased) { 1955 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1956 } else { 1957 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1958 // branch, negate the icmp predicate and swap the branch targets and avoid 1959 // inserting an Xor to negate Cond. 1960 bool Done = false; 1961 if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) 1962 if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { 1963 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1964 Done = true; 1965 } 1966 if (!Done) { 1967 Value *Negate = IRB.CreateXor( 1968 ConstantInt::getTrue(F.getContext()), Cond); 1969 MergedCondition = IRB.CreateAnd(MergedCondition, Negate); 1970 } 1971 } 1972 } 1973 1974 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1975 unsigned I = 0; 1976 DenseSet<PHINode *> TrivialPHIs; 1977 for (CHRScope *Scope : CHRScopes) { 1978 transformScopes(Scope, TrivialPHIs); 1979 CHR_DEBUG( 1980 std::ostringstream oss; 1981 oss << " after transformScopes " << I++; 1982 dumpIR(F, oss.str().c_str(), nullptr)); 1983 (void)I; 1984 } 1985 } 1986 1987 static void LLVM_ATTRIBUTE_UNUSED 1988 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1989 dbgs() << Label << " " << Scopes.size() << "\n"; 1990 for (CHRScope *Scope : Scopes) { 1991 dbgs() << *Scope << "\n"; 1992 } 1993 } 1994 1995 bool CHR::run() { 1996 if (!shouldApply(F, PSI)) 1997 return false; 1998 1999 CHR_DEBUG(dumpIR(F, "before", nullptr)); 2000 2001 bool Changed = false; 2002 { 2003 CHR_DEBUG( 2004 dbgs() << "RegionInfo:\n"; 2005 RI.print(dbgs())); 2006 2007 // Recursively traverse the region tree and find regions that have biased 2008 // branches and/or selects and create scopes. 2009 SmallVector<CHRScope *, 8> AllScopes; 2010 findScopes(AllScopes); 2011 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 2012 2013 // Split the scopes if 1) the conditiona values of the biased 2014 // branches/selects of the inner/lower scope can't be hoisted up to the 2015 // outermost/uppermost scope entry, or 2) the condition values of the biased 2016 // branches/selects in a scope (including subscopes) don't share at least 2017 // one common value. 2018 SmallVector<CHRScope *, 8> SplitScopes; 2019 splitScopes(AllScopes, SplitScopes); 2020 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 2021 2022 // After splitting, set the biased regions and selects of a scope (a tree 2023 // root) that include those of the subscopes. 2024 classifyBiasedScopes(SplitScopes); 2025 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 2026 2027 // Filter out the scopes that has only one biased region or select (CHR 2028 // isn't useful in such a case). 2029 SmallVector<CHRScope *, 8> FilteredScopes; 2030 filterScopes(SplitScopes, FilteredScopes); 2031 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 2032 2033 // Set the regions to be CHR'ed and their hoist stops for each scope. 2034 SmallVector<CHRScope *, 8> SetScopes; 2035 setCHRRegions(FilteredScopes, SetScopes); 2036 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 2037 2038 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 2039 // ones. We need to apply CHR from outer to inner so that we apply CHR only 2040 // to the hot path, rather than both hot and cold paths. 2041 SmallVector<CHRScope *, 8> SortedScopes; 2042 sortScopes(SetScopes, SortedScopes); 2043 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 2044 2045 CHR_DEBUG( 2046 dbgs() << "RegionInfo:\n"; 2047 RI.print(dbgs())); 2048 2049 // Apply the CHR transformation. 2050 if (!SortedScopes.empty()) { 2051 transformScopes(SortedScopes); 2052 Changed = true; 2053 } 2054 } 2055 2056 if (Changed) { 2057 CHR_DEBUG(dumpIR(F, "after", &Stats)); 2058 ORE.emit([&]() { 2059 return OptimizationRemark(DEBUG_TYPE, "Stats", &F) 2060 << ore::NV("Function", &F) << " " 2061 << "Reduced the number of branches in hot paths by " 2062 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) 2063 << " (static) and " 2064 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) 2065 << " (weighted by PGO count)"; 2066 }); 2067 } 2068 2069 return Changed; 2070 } 2071 2072 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 2073 BlockFrequencyInfo &BFI = 2074 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 2075 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2076 ProfileSummaryInfo &PSI = 2077 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2078 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 2079 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE = 2080 std::make_unique<OptimizationRemarkEmitter>(&F); 2081 return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run(); 2082 } 2083 2084 namespace llvm { 2085 2086 ControlHeightReductionPass::ControlHeightReductionPass() { 2087 parseCHRFilterFiles(); 2088 } 2089 2090 PreservedAnalyses ControlHeightReductionPass::run( 2091 Function &F, 2092 FunctionAnalysisManager &FAM) { 2093 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 2094 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2095 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2096 auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2097 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2098 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2099 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); 2100 if (!Changed) 2101 return PreservedAnalyses::all(); 2102 auto PA = PreservedAnalyses(); 2103 PA.preserve<GlobalsAA>(); 2104 return PA; 2105 } 2106 2107 } // namespace llvm 2108