1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass merges conditional blocks of code and reduces the number of 10 // conditional branches in the hot paths based on profiles. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/GlobalsModRef.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/IntrinsicInst.h" 30 #include "llvm/IR/MDBuilder.h" 31 #include "llvm/IR/PassManager.h" 32 #include "llvm/InitializePasses.h" 33 #include "llvm/Support/BranchProbability.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/MemoryBuffer.h" 36 #include "llvm/Transforms/Utils.h" 37 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 38 #include "llvm/Transforms/Utils/Cloning.h" 39 #include "llvm/Transforms/Utils/ValueMapper.h" 40 41 #include <set> 42 #include <sstream> 43 44 using namespace llvm; 45 46 #define DEBUG_TYPE "chr" 47 48 #define CHR_DEBUG(X) LLVM_DEBUG(X) 49 50 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 51 cl::desc("Apply CHR for all functions")); 52 53 static cl::opt<double> CHRBiasThreshold( 54 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 55 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 56 57 static cl::opt<unsigned> CHRMergeThreshold( 58 "chr-merge-threshold", cl::init(2), cl::Hidden, 59 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 60 61 static cl::opt<std::string> CHRModuleList( 62 "chr-module-list", cl::init(""), cl::Hidden, 63 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 64 65 static cl::opt<std::string> CHRFunctionList( 66 "chr-function-list", cl::init(""), cl::Hidden, 67 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 68 69 static StringSet<> CHRModules; 70 static StringSet<> CHRFunctions; 71 72 static void parseCHRFilterFiles() { 73 if (!CHRModuleList.empty()) { 74 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 75 if (!FileOrErr) { 76 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 77 std::exit(1); 78 } 79 StringRef Buf = FileOrErr->get()->getBuffer(); 80 SmallVector<StringRef, 0> Lines; 81 Buf.split(Lines, '\n'); 82 for (StringRef Line : Lines) { 83 Line = Line.trim(); 84 if (!Line.empty()) 85 CHRModules.insert(Line); 86 } 87 } 88 if (!CHRFunctionList.empty()) { 89 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 90 if (!FileOrErr) { 91 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 92 std::exit(1); 93 } 94 StringRef Buf = FileOrErr->get()->getBuffer(); 95 SmallVector<StringRef, 0> Lines; 96 Buf.split(Lines, '\n'); 97 for (StringRef Line : Lines) { 98 Line = Line.trim(); 99 if (!Line.empty()) 100 CHRFunctions.insert(Line); 101 } 102 } 103 } 104 105 namespace { 106 107 struct CHRStats { 108 CHRStats() = default; 109 void print(raw_ostream &OS) const { 110 OS << "CHRStats: NumBranches " << NumBranches 111 << " NumBranchesDelta " << NumBranchesDelta 112 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 113 } 114 // The original number of conditional branches / selects 115 uint64_t NumBranches = 0; 116 // The decrease of the number of conditional branches / selects in the hot 117 // paths due to CHR. 118 uint64_t NumBranchesDelta = 0; 119 // NumBranchesDelta weighted by the profile count at the scope entry. 120 uint64_t WeightedNumBranchesDelta = 0; 121 }; 122 123 // RegInfo - some properties of a Region. 124 struct RegInfo { 125 RegInfo() = default; 126 RegInfo(Region *RegionIn) : R(RegionIn) {} 127 Region *R = nullptr; 128 bool HasBranch = false; 129 SmallVector<SelectInst *, 8> Selects; 130 }; 131 132 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 133 134 // CHRScope - a sequence of regions to CHR together. It corresponds to a 135 // sequence of conditional blocks. It can have subscopes which correspond to 136 // nested conditional blocks. Nested CHRScopes form a tree. 137 class CHRScope { 138 public: 139 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 140 assert(RI.R && "Null RegionIn"); 141 RegInfos.push_back(RI); 142 } 143 144 Region *getParentRegion() { 145 assert(RegInfos.size() > 0 && "Empty CHRScope"); 146 Region *Parent = RegInfos[0].R->getParent(); 147 assert(Parent && "Unexpected to call this on the top-level region"); 148 return Parent; 149 } 150 151 BasicBlock *getEntryBlock() { 152 assert(RegInfos.size() > 0 && "Empty CHRScope"); 153 return RegInfos.front().R->getEntry(); 154 } 155 156 BasicBlock *getExitBlock() { 157 assert(RegInfos.size() > 0 && "Empty CHRScope"); 158 return RegInfos.back().R->getExit(); 159 } 160 161 bool appendable(CHRScope *Next) { 162 // The next scope is appendable only if this scope is directly connected to 163 // it (which implies it post-dominates this scope) and this scope dominates 164 // it (no edge to the next scope outside this scope). 165 BasicBlock *NextEntry = Next->getEntryBlock(); 166 if (getExitBlock() != NextEntry) 167 // Not directly connected. 168 return false; 169 Region *LastRegion = RegInfos.back().R; 170 for (BasicBlock *Pred : predecessors(NextEntry)) 171 if (!LastRegion->contains(Pred)) 172 // There's an edge going into the entry of the next scope from outside 173 // of this scope. 174 return false; 175 return true; 176 } 177 178 void append(CHRScope *Next) { 179 assert(RegInfos.size() > 0 && "Empty CHRScope"); 180 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 181 assert(getParentRegion() == Next->getParentRegion() && 182 "Must be siblings"); 183 assert(getExitBlock() == Next->getEntryBlock() && 184 "Must be adjacent"); 185 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end()); 186 Subs.append(Next->Subs.begin(), Next->Subs.end()); 187 } 188 189 void addSub(CHRScope *SubIn) { 190 #ifndef NDEBUG 191 bool IsChild = false; 192 for (RegInfo &RI : RegInfos) 193 if (RI.R == SubIn->getParentRegion()) { 194 IsChild = true; 195 break; 196 } 197 assert(IsChild && "Must be a child"); 198 #endif 199 Subs.push_back(SubIn); 200 } 201 202 // Split this scope at the boundary region into two, which will belong to the 203 // tail and returns the tail. 204 CHRScope *split(Region *Boundary) { 205 assert(Boundary && "Boundary null"); 206 assert(RegInfos.begin()->R != Boundary && 207 "Can't be split at beginning"); 208 auto BoundaryIt = llvm::find_if( 209 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; }); 210 if (BoundaryIt == RegInfos.end()) 211 return nullptr; 212 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end()); 213 DenseSet<Region *> TailRegionSet; 214 for (const RegInfo &RI : TailRegInfos) 215 TailRegionSet.insert(RI.R); 216 217 auto TailIt = 218 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) { 219 assert(Sub && "null Sub"); 220 Region *Parent = Sub->getParentRegion(); 221 if (TailRegionSet.count(Parent)) 222 return false; 223 224 assert(llvm::any_of( 225 RegInfos, 226 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) && 227 "Must be in head"); 228 return true; 229 }); 230 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end()); 231 232 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 233 auto *Scope = new CHRScope(TailRegInfos, TailSubs); 234 RegInfos.erase(BoundaryIt, RegInfos.end()); 235 Subs.erase(TailIt, Subs.end()); 236 return Scope; 237 } 238 239 bool contains(Instruction *I) const { 240 BasicBlock *Parent = I->getParent(); 241 for (const RegInfo &RI : RegInfos) 242 if (RI.R->contains(Parent)) 243 return true; 244 return false; 245 } 246 247 void print(raw_ostream &OS) const; 248 249 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 250 SmallVector<CHRScope *, 8> Subs; // Subscopes. 251 252 // The instruction at which to insert the CHR conditional branch (and hoist 253 // the dependent condition values). 254 Instruction *BranchInsertPoint; 255 256 // True-biased and false-biased regions (conditional blocks), 257 // respectively. Used only for the outermost scope and includes regions in 258 // subscopes. The rest are unbiased. 259 DenseSet<Region *> TrueBiasedRegions; 260 DenseSet<Region *> FalseBiasedRegions; 261 // Among the biased regions, the regions that get CHRed. 262 SmallVector<RegInfo, 8> CHRRegions; 263 264 // True-biased and false-biased selects, respectively. Used only for the 265 // outermost scope and includes ones in subscopes. 266 DenseSet<SelectInst *> TrueBiasedSelects; 267 DenseSet<SelectInst *> FalseBiasedSelects; 268 269 // Map from one of the above regions to the instructions to stop 270 // hoisting instructions at through use-def chains. 271 HoistStopMapTy HoistStopMap; 272 273 private: 274 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn) 275 : RegInfos(RegInfosIn.begin(), RegInfosIn.end()), 276 Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {} 277 }; 278 279 class CHR { 280 public: 281 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 282 ProfileSummaryInfo &PSIin, RegionInfo &RIin, 283 OptimizationRemarkEmitter &OREin) 284 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} 285 286 ~CHR() { 287 for (CHRScope *Scope : Scopes) { 288 delete Scope; 289 } 290 } 291 292 bool run(); 293 294 private: 295 // See the comments in CHR::run() for the high level flow of the algorithm and 296 // what the following functions do. 297 298 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 299 Region *R = RI.getTopLevelRegion(); 300 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) { 301 Output.push_back(Scope); 302 } 303 } 304 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 305 SmallVectorImpl<CHRScope *> &Scopes); 306 CHRScope *findScope(Region *R); 307 void checkScopeHoistable(CHRScope *Scope); 308 309 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 310 SmallVectorImpl<CHRScope *> &Output); 311 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 312 CHRScope *Outer, 313 DenseSet<Value *> *OuterConditionValues, 314 Instruction *OuterInsertPoint, 315 SmallVectorImpl<CHRScope *> &Output, 316 DenseSet<Instruction *> &Unhoistables); 317 318 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 319 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 320 321 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 322 SmallVectorImpl<CHRScope *> &Output); 323 324 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 325 SmallVectorImpl<CHRScope *> &Output); 326 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 327 328 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 329 SmallVectorImpl<CHRScope *> &Output); 330 331 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 332 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 333 void cloneScopeBlocks(CHRScope *Scope, 334 BasicBlock *PreEntryBlock, 335 BasicBlock *ExitBlock, 336 Region *LastRegion, 337 ValueToValueMapTy &VMap); 338 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 339 BasicBlock *EntryBlock, 340 BasicBlock *NewEntryBlock, 341 ValueToValueMapTy &VMap); 342 void fixupBranchesAndSelects(CHRScope *Scope, 343 BasicBlock *PreEntryBlock, 344 BranchInst *MergedBR, 345 uint64_t ProfileCount); 346 void fixupBranch(Region *R, 347 CHRScope *Scope, 348 IRBuilder<> &IRB, 349 Value *&MergedCondition, BranchProbability &CHRBranchBias); 350 void fixupSelect(SelectInst* SI, 351 CHRScope *Scope, 352 IRBuilder<> &IRB, 353 Value *&MergedCondition, BranchProbability &CHRBranchBias); 354 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 355 Instruction *BranchOrSelect, 356 CHRScope *Scope, 357 IRBuilder<> &IRB, 358 Value *&MergedCondition); 359 360 Function &F; 361 BlockFrequencyInfo &BFI; 362 DominatorTree &DT; 363 ProfileSummaryInfo &PSI; 364 RegionInfo &RI; 365 OptimizationRemarkEmitter &ORE; 366 CHRStats Stats; 367 368 // All the true-biased regions in the function 369 DenseSet<Region *> TrueBiasedRegionsGlobal; 370 // All the false-biased regions in the function 371 DenseSet<Region *> FalseBiasedRegionsGlobal; 372 // All the true-biased selects in the function 373 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 374 // All the false-biased selects in the function 375 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 376 // A map from biased regions to their branch bias 377 DenseMap<Region *, BranchProbability> BranchBiasMap; 378 // A map from biased selects to their branch bias 379 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 380 // All the scopes. 381 DenseSet<CHRScope *> Scopes; 382 }; 383 384 } // end anonymous namespace 385 386 static inline 387 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 388 const CHRStats &Stats) { 389 Stats.print(OS); 390 return OS; 391 } 392 393 static inline 394 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 395 Scope.print(OS); 396 return OS; 397 } 398 399 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 400 if (ForceCHR) 401 return true; 402 403 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 404 if (CHRModules.count(F.getParent()->getName())) 405 return true; 406 return CHRFunctions.count(F.getName()); 407 } 408 409 assert(PSI.hasProfileSummary() && "Empty PSI?"); 410 return PSI.isFunctionEntryHot(&F); 411 } 412 413 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 414 CHRStats *Stats) { 415 StringRef FuncName = F.getName(); 416 StringRef ModuleName = F.getParent()->getName(); 417 (void)(FuncName); // Unused in release build. 418 (void)(ModuleName); // Unused in release build. 419 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 420 << FuncName); 421 if (Stats) 422 CHR_DEBUG(dbgs() << " " << *Stats); 423 CHR_DEBUG(dbgs() << "\n"); 424 CHR_DEBUG(F.dump()); 425 } 426 427 void CHRScope::print(raw_ostream &OS) const { 428 assert(RegInfos.size() > 0 && "Empty CHRScope"); 429 OS << "CHRScope["; 430 OS << RegInfos.size() << ", Regions["; 431 for (const RegInfo &RI : RegInfos) { 432 OS << RI.R->getNameStr(); 433 if (RI.HasBranch) 434 OS << " B"; 435 if (RI.Selects.size() > 0) 436 OS << " S" << RI.Selects.size(); 437 OS << ", "; 438 } 439 if (RegInfos[0].R->getParent()) { 440 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 441 } else { 442 // top level region 443 OS << "]"; 444 } 445 OS << ", Subs["; 446 for (CHRScope *Sub : Subs) { 447 OS << *Sub << ", "; 448 } 449 OS << "]]"; 450 } 451 452 // Return true if the given instruction type can be hoisted by CHR. 453 static bool isHoistableInstructionType(Instruction *I) { 454 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 455 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 456 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 457 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 458 isa<InsertValueInst>(I); 459 } 460 461 // Return true if the given instruction can be hoisted by CHR. 462 static bool isHoistable(Instruction *I, DominatorTree &DT) { 463 if (!isHoistableInstructionType(I)) 464 return false; 465 return isSafeToSpeculativelyExecute(I, nullptr, &DT); 466 } 467 468 // Recursively traverse the use-def chains of the given value and return a set 469 // of the unhoistable base values defined within the scope (excluding the 470 // first-region entry block) or the (hoistable or unhoistable) base values that 471 // are defined outside (including the first-region entry block) of the 472 // scope. The returned set doesn't include constants. 473 static const std::set<Value *> & 474 getBaseValues(Value *V, DominatorTree &DT, 475 DenseMap<Value *, std::set<Value *>> &Visited) { 476 auto It = Visited.find(V); 477 if (It != Visited.end()) { 478 return It->second; 479 } 480 std::set<Value *> Result; 481 if (auto *I = dyn_cast<Instruction>(V)) { 482 // We don't stop at a block that's not in the Scope because we would miss 483 // some instructions that are based on the same base values if we stop 484 // there. 485 if (!isHoistable(I, DT)) { 486 Result.insert(I); 487 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 488 } 489 // I is hoistable above the Scope. 490 for (Value *Op : I->operands()) { 491 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited); 492 Result.insert(OpResult.begin(), OpResult.end()); 493 } 494 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 495 } 496 if (isa<Argument>(V)) { 497 Result.insert(V); 498 } 499 // We don't include others like constants because those won't lead to any 500 // chance of folding of conditions (eg two bit checks merged into one check) 501 // after CHR. 502 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 503 } 504 505 // Return true if V is already hoisted or can be hoisted (along with its 506 // operands) above the insert point. When it returns true and HoistStops is 507 // non-null, the instructions to stop hoisting at through the use-def chains are 508 // inserted into HoistStops. 509 static bool 510 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 511 DenseSet<Instruction *> &Unhoistables, 512 DenseSet<Instruction *> *HoistStops, 513 DenseMap<Instruction *, bool> &Visited) { 514 assert(InsertPoint && "Null InsertPoint"); 515 if (auto *I = dyn_cast<Instruction>(V)) { 516 auto It = Visited.find(I); 517 if (It != Visited.end()) { 518 return It->second; 519 } 520 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 521 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 522 if (Unhoistables.count(I)) { 523 // Don't hoist if they are not to be hoisted. 524 Visited[I] = false; 525 return false; 526 } 527 if (DT.dominates(I, InsertPoint)) { 528 // We are already above the insert point. Stop here. 529 if (HoistStops) 530 HoistStops->insert(I); 531 Visited[I] = true; 532 return true; 533 } 534 // We aren't not above the insert point, check if we can hoist it above the 535 // insert point. 536 if (isHoistable(I, DT)) { 537 // Check operands first. 538 DenseSet<Instruction *> OpsHoistStops; 539 bool AllOpsHoisted = true; 540 for (Value *Op : I->operands()) { 541 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, 542 Visited)) { 543 AllOpsHoisted = false; 544 break; 545 } 546 } 547 if (AllOpsHoisted) { 548 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 549 if (HoistStops) 550 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 551 Visited[I] = true; 552 return true; 553 } 554 } 555 Visited[I] = false; 556 return false; 557 } 558 // Non-instructions are considered hoistable. 559 return true; 560 } 561 562 // Returns true and sets the true probability and false probability of an 563 // MD_prof metadata if it's well-formed. 564 static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb, 565 BranchProbability &FalseProb) { 566 if (!MD) return false; 567 MDString *MDName = cast<MDString>(MD->getOperand(0)); 568 if (MDName->getString() != "branch_weights" || 569 MD->getNumOperands() != 3) 570 return false; 571 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 572 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 573 if (!TrueWeight || !FalseWeight) 574 return false; 575 uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 576 uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 577 uint64_t SumWt = TrueWt + FalseWt; 578 579 assert(SumWt >= TrueWt && SumWt >= FalseWt && 580 "Overflow calculating branch probabilities."); 581 582 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash. 583 if (SumWt == 0) 584 return false; 585 586 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 587 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 588 return true; 589 } 590 591 static BranchProbability getCHRBiasThreshold() { 592 return BranchProbability::getBranchProbability( 593 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 594 } 595 596 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 597 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 598 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 599 // false. 600 template <typename K, typename S, typename M> 601 static bool checkBias(K *Key, BranchProbability TrueProb, 602 BranchProbability FalseProb, S &TrueSet, S &FalseSet, 603 M &BiasMap) { 604 BranchProbability Threshold = getCHRBiasThreshold(); 605 if (TrueProb >= Threshold) { 606 TrueSet.insert(Key); 607 BiasMap[Key] = TrueProb; 608 return true; 609 } else if (FalseProb >= Threshold) { 610 FalseSet.insert(Key); 611 BiasMap[Key] = FalseProb; 612 return true; 613 } 614 return false; 615 } 616 617 // Returns true and insert a region into the right biased set and the map if the 618 // branch of the region is biased. 619 static bool checkBiasedBranch(BranchInst *BI, Region *R, 620 DenseSet<Region *> &TrueBiasedRegionsGlobal, 621 DenseSet<Region *> &FalseBiasedRegionsGlobal, 622 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 623 if (!BI->isConditional()) 624 return false; 625 BranchProbability ThenProb, ElseProb; 626 if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof), 627 ThenProb, ElseProb)) 628 return false; 629 BasicBlock *IfThen = BI->getSuccessor(0); 630 BasicBlock *IfElse = BI->getSuccessor(1); 631 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 632 IfThen != IfElse && 633 "Invariant from findScopes"); 634 if (IfThen == R->getExit()) { 635 // Swap them so that IfThen/ThenProb means going into the conditional code 636 // and IfElse/ElseProb means skipping it. 637 std::swap(IfThen, IfElse); 638 std::swap(ThenProb, ElseProb); 639 } 640 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 641 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 642 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 643 return checkBias(R, ThenProb, ElseProb, 644 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 645 BranchBiasMap); 646 } 647 648 // Returns true and insert a select into the right biased set and the map if the 649 // select is biased. 650 static bool checkBiasedSelect( 651 SelectInst *SI, Region *R, 652 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 653 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 654 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 655 BranchProbability TrueProb, FalseProb; 656 if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof), 657 TrueProb, FalseProb)) 658 return false; 659 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 660 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 661 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 662 return checkBias(SI, TrueProb, FalseProb, 663 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 664 SelectBiasMap); 665 } 666 667 // Returns the instruction at which to hoist the dependent condition values and 668 // insert the CHR branch for a region. This is the terminator branch in the 669 // entry block or the first select in the entry block, if any. 670 static Instruction* getBranchInsertPoint(RegInfo &RI) { 671 Region *R = RI.R; 672 BasicBlock *EntryBB = R->getEntry(); 673 // The hoist point is by default the terminator of the entry block, which is 674 // the same as the branch instruction if RI.HasBranch is true. 675 Instruction *HoistPoint = EntryBB->getTerminator(); 676 for (SelectInst *SI : RI.Selects) { 677 if (SI->getParent() == EntryBB) { 678 // Pick the first select in Selects in the entry block. Note Selects is 679 // sorted in the instruction order within a block (asserted below). 680 HoistPoint = SI; 681 break; 682 } 683 } 684 assert(HoistPoint && "Null HoistPoint"); 685 #ifndef NDEBUG 686 // Check that HoistPoint is the first one in Selects in the entry block, 687 // if any. 688 DenseSet<Instruction *> EntryBlockSelectSet; 689 for (SelectInst *SI : RI.Selects) { 690 if (SI->getParent() == EntryBB) { 691 EntryBlockSelectSet.insert(SI); 692 } 693 } 694 for (Instruction &I : *EntryBB) { 695 if (EntryBlockSelectSet.contains(&I)) { 696 assert(&I == HoistPoint && 697 "HoistPoint must be the first one in Selects"); 698 break; 699 } 700 } 701 #endif 702 return HoistPoint; 703 } 704 705 // Find a CHR scope in the given region. 706 CHRScope * CHR::findScope(Region *R) { 707 CHRScope *Result = nullptr; 708 BasicBlock *Entry = R->getEntry(); 709 BasicBlock *Exit = R->getExit(); // null if top level. 710 assert(Entry && "Entry must not be null"); 711 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 712 "Only top level region has a null exit"); 713 if (Entry) 714 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 715 else 716 CHR_DEBUG(dbgs() << "Entry null\n"); 717 if (Exit) 718 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 719 else 720 CHR_DEBUG(dbgs() << "Exit null\n"); 721 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 722 // to this region). 723 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 724 if (EntryInSubregion) 725 return nullptr; 726 // Exclude loops 727 for (BasicBlock *Pred : predecessors(Entry)) 728 if (R->contains(Pred)) 729 return nullptr; 730 // If any of the basic blocks have address taken, we must skip this region 731 // because we cannot clone basic blocks that have address taken. 732 for (BasicBlock *BB : R->blocks()) { 733 if (BB->hasAddressTaken()) 734 return nullptr; 735 // If we encounter llvm.coro.id, skip this region because if the basic block 736 // is cloned, we end up inserting a token type PHI node to the block with 737 // llvm.coro.begin. 738 // FIXME: This could lead to less optimal codegen, because the region is 739 // excluded, it can prevent CHR from merging adjacent regions into bigger 740 // scope and hoisting more branches. 741 for (Instruction &I : *BB) 742 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 743 if (II->getIntrinsicID() == Intrinsic::coro_id) 744 return nullptr; 745 } 746 747 if (Exit) { 748 // Try to find an if-then block (check if R is an if-then). 749 // if (cond) { 750 // ... 751 // } 752 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 753 if (BI) 754 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 755 else 756 CHR_DEBUG(dbgs() << "BI null\n"); 757 if (BI && BI->isConditional()) { 758 BasicBlock *S0 = BI->getSuccessor(0); 759 BasicBlock *S1 = BI->getSuccessor(1); 760 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 761 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 762 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 763 RegInfo RI(R); 764 RI.HasBranch = checkBiasedBranch( 765 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 766 BranchBiasMap); 767 Result = new CHRScope(RI); 768 Scopes.insert(Result); 769 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 770 ++Stats.NumBranches; 771 if (!RI.HasBranch) { 772 ORE.emit([&]() { 773 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) 774 << "Branch not biased"; 775 }); 776 } 777 } 778 } 779 } 780 { 781 // Try to look for selects in the direct child blocks (as opposed to in 782 // subregions) of R. 783 // ... 784 // if (..) { // Some subregion 785 // ... 786 // } 787 // if (..) { // Some subregion 788 // ... 789 // } 790 // ... 791 // a = cond ? b : c; 792 // ... 793 SmallVector<SelectInst *, 8> Selects; 794 for (RegionNode *E : R->elements()) { 795 if (E->isSubRegion()) 796 continue; 797 // This returns the basic block of E if E is a direct child of R (not a 798 // subregion.) 799 BasicBlock *BB = E->getEntry(); 800 // Need to push in the order to make it easier to find the first Select 801 // later. 802 for (Instruction &I : *BB) { 803 if (auto *SI = dyn_cast<SelectInst>(&I)) { 804 Selects.push_back(SI); 805 ++Stats.NumBranches; 806 } 807 } 808 } 809 if (Selects.size() > 0) { 810 auto AddSelects = [&](RegInfo &RI) { 811 for (auto *SI : Selects) 812 if (checkBiasedSelect(SI, RI.R, 813 TrueBiasedSelectsGlobal, 814 FalseBiasedSelectsGlobal, 815 SelectBiasMap)) 816 RI.Selects.push_back(SI); 817 else 818 ORE.emit([&]() { 819 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) 820 << "Select not biased"; 821 }); 822 }; 823 if (!Result) { 824 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 825 RegInfo RI(R); 826 AddSelects(RI); 827 Result = new CHRScope(RI); 828 Scopes.insert(Result); 829 } else { 830 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 831 AddSelects(Result->RegInfos[0]); 832 } 833 } 834 } 835 836 if (Result) { 837 checkScopeHoistable(Result); 838 } 839 return Result; 840 } 841 842 // Check that any of the branch and the selects in the region could be 843 // hoisted above the the CHR branch insert point (the most dominating of 844 // them, either the branch (at the end of the first block) or the first 845 // select in the first block). If the branch can't be hoisted, drop the 846 // selects in the first blocks. 847 // 848 // For example, for the following scope/region with selects, we want to insert 849 // the merged branch right before the first select in the first/entry block by 850 // hoisting c1, c2, c3, and c4. 851 // 852 // // Branch insert point here. 853 // a = c1 ? b : c; // Select 1 854 // d = c2 ? e : f; // Select 2 855 // if (c3) { // Branch 856 // ... 857 // c4 = foo() // A call. 858 // g = c4 ? h : i; // Select 3 859 // } 860 // 861 // But suppose we can't hoist c4 because it's dependent on the preceding 862 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 863 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 864 void CHR::checkScopeHoistable(CHRScope *Scope) { 865 RegInfo &RI = Scope->RegInfos[0]; 866 Region *R = RI.R; 867 BasicBlock *EntryBB = R->getEntry(); 868 auto *Branch = RI.HasBranch ? 869 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 870 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 871 if (RI.HasBranch || !Selects.empty()) { 872 Instruction *InsertPoint = getBranchInsertPoint(RI); 873 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 874 // Avoid a data dependence from a select or a branch to a(nother) 875 // select. Note no instruction can't data-depend on a branch (a branch 876 // instruction doesn't produce a value). 877 DenseSet<Instruction *> Unhoistables; 878 // Initialize Unhoistables with the selects. 879 for (SelectInst *SI : Selects) { 880 Unhoistables.insert(SI); 881 } 882 // Remove Selects that can't be hoisted. 883 for (auto it = Selects.begin(); it != Selects.end(); ) { 884 SelectInst *SI = *it; 885 if (SI == InsertPoint) { 886 ++it; 887 continue; 888 } 889 DenseMap<Instruction *, bool> Visited; 890 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 891 DT, Unhoistables, nullptr, Visited); 892 if (!IsHoistable) { 893 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 894 ORE.emit([&]() { 895 return OptimizationRemarkMissed(DEBUG_TYPE, 896 "DropUnhoistableSelect", SI) 897 << "Dropped unhoistable select"; 898 }); 899 it = Selects.erase(it); 900 // Since we are dropping the select here, we also drop it from 901 // Unhoistables. 902 Unhoistables.erase(SI); 903 } else 904 ++it; 905 } 906 // Update InsertPoint after potentially removing selects. 907 InsertPoint = getBranchInsertPoint(RI); 908 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 909 if (RI.HasBranch && InsertPoint != Branch) { 910 DenseMap<Instruction *, bool> Visited; 911 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 912 DT, Unhoistables, nullptr, Visited); 913 if (!IsHoistable) { 914 // If the branch isn't hoistable, drop the selects in the entry 915 // block, preferring the branch, which makes the branch the hoist 916 // point. 917 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 918 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 919 CHR_DEBUG( 920 for (SelectInst *SI : Selects) { 921 dbgs() << "SI " << *SI << "\n"; 922 }); 923 for (SelectInst *SI : Selects) { 924 ORE.emit([&]() { 925 return OptimizationRemarkMissed(DEBUG_TYPE, 926 "DropSelectUnhoistableBranch", SI) 927 << "Dropped select due to unhoistable branch"; 928 }); 929 } 930 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) { 931 return SI->getParent() == EntryBB; 932 }); 933 Unhoistables.clear(); 934 InsertPoint = Branch; 935 } 936 } 937 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 938 #ifndef NDEBUG 939 if (RI.HasBranch) { 940 assert(!DT.dominates(Branch, InsertPoint) && 941 "Branch can't be already above the hoist point"); 942 DenseMap<Instruction *, bool> Visited; 943 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 944 DT, Unhoistables, nullptr, Visited) && 945 "checkHoistValue for branch"); 946 } 947 for (auto *SI : Selects) { 948 assert(!DT.dominates(SI, InsertPoint) && 949 "SI can't be already above the hoist point"); 950 DenseMap<Instruction *, bool> Visited; 951 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 952 Unhoistables, nullptr, Visited) && 953 "checkHoistValue for selects"); 954 } 955 CHR_DEBUG(dbgs() << "Result\n"); 956 if (RI.HasBranch) { 957 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 958 } 959 for (auto *SI : Selects) { 960 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 961 } 962 #endif 963 } 964 } 965 966 // Traverse the region tree, find all nested scopes and merge them if possible. 967 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 968 SmallVectorImpl<CHRScope *> &Scopes) { 969 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 970 CHRScope *Result = findScope(R); 971 // Visit subscopes. 972 CHRScope *ConsecutiveSubscope = nullptr; 973 SmallVector<CHRScope *, 8> Subscopes; 974 for (auto It = R->begin(); It != R->end(); ++It) { 975 const std::unique_ptr<Region> &SubR = *It; 976 auto NextIt = std::next(It); 977 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 978 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 979 << "\n"); 980 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 981 if (SubCHRScope) { 982 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 983 } else { 984 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 985 } 986 if (SubCHRScope) { 987 if (!ConsecutiveSubscope) 988 ConsecutiveSubscope = SubCHRScope; 989 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 990 Subscopes.push_back(ConsecutiveSubscope); 991 ConsecutiveSubscope = SubCHRScope; 992 } else 993 ConsecutiveSubscope->append(SubCHRScope); 994 } else { 995 if (ConsecutiveSubscope) { 996 Subscopes.push_back(ConsecutiveSubscope); 997 } 998 ConsecutiveSubscope = nullptr; 999 } 1000 } 1001 if (ConsecutiveSubscope) { 1002 Subscopes.push_back(ConsecutiveSubscope); 1003 } 1004 for (CHRScope *Sub : Subscopes) { 1005 if (Result) { 1006 // Combine it with the parent. 1007 Result->addSub(Sub); 1008 } else { 1009 // Push Subscopes as they won't be combined with the parent. 1010 Scopes.push_back(Sub); 1011 } 1012 } 1013 return Result; 1014 } 1015 1016 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1017 DenseSet<Value *> ConditionValues; 1018 if (RI.HasBranch) { 1019 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1020 ConditionValues.insert(BI->getCondition()); 1021 } 1022 for (SelectInst *SI : RI.Selects) { 1023 ConditionValues.insert(SI->getCondition()); 1024 } 1025 return ConditionValues; 1026 } 1027 1028 1029 // Determine whether to split a scope depending on the sets of the branch 1030 // condition values of the previous region and the current region. We split 1031 // (return true) it if 1) the condition values of the inner/lower scope can't be 1032 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1033 // values have an empty intersection (because the combined branch conditions 1034 // won't probably lead to a simpler combined condition). 1035 static bool shouldSplit(Instruction *InsertPoint, 1036 DenseSet<Value *> &PrevConditionValues, 1037 DenseSet<Value *> &ConditionValues, 1038 DominatorTree &DT, 1039 DenseSet<Instruction *> &Unhoistables) { 1040 assert(InsertPoint && "Null InsertPoint"); 1041 CHR_DEBUG( 1042 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1043 for (Value *V : PrevConditionValues) { 1044 dbgs() << *V << ", "; 1045 } 1046 dbgs() << " ConditionValues "; 1047 for (Value *V : ConditionValues) { 1048 dbgs() << *V << ", "; 1049 } 1050 dbgs() << "\n"); 1051 // If any of Bases isn't hoistable to the hoist point, split. 1052 for (Value *V : ConditionValues) { 1053 DenseMap<Instruction *, bool> Visited; 1054 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { 1055 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1056 return true; // Not hoistable, split. 1057 } 1058 } 1059 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1060 // unnecessary splits at scopes with no branch/selects. If 1061 // PrevConditionValues and ConditionValues don't intersect at all, split. 1062 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1063 // Use std::set as DenseSet doesn't work with set_intersection. 1064 std::set<Value *> PrevBases, Bases; 1065 DenseMap<Value *, std::set<Value *>> Visited; 1066 for (Value *V : PrevConditionValues) { 1067 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1068 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1069 } 1070 for (Value *V : ConditionValues) { 1071 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1072 Bases.insert(BaseValues.begin(), BaseValues.end()); 1073 } 1074 CHR_DEBUG( 1075 dbgs() << "PrevBases "; 1076 for (Value *V : PrevBases) { 1077 dbgs() << *V << ", "; 1078 } 1079 dbgs() << " Bases "; 1080 for (Value *V : Bases) { 1081 dbgs() << *V << ", "; 1082 } 1083 dbgs() << "\n"); 1084 std::vector<Value *> Intersection; 1085 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(), 1086 Bases.end(), std::back_inserter(Intersection)); 1087 if (Intersection.empty()) { 1088 // Empty intersection, split. 1089 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1090 return true; 1091 } 1092 } 1093 CHR_DEBUG(dbgs() << "No split\n"); 1094 return false; // Don't split. 1095 } 1096 1097 static void getSelectsInScope(CHRScope *Scope, 1098 DenseSet<Instruction *> &Output) { 1099 for (RegInfo &RI : Scope->RegInfos) 1100 for (SelectInst *SI : RI.Selects) 1101 Output.insert(SI); 1102 for (CHRScope *Sub : Scope->Subs) 1103 getSelectsInScope(Sub, Output); 1104 } 1105 1106 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1107 SmallVectorImpl<CHRScope *> &Output) { 1108 for (CHRScope *Scope : Input) { 1109 assert(!Scope->BranchInsertPoint && 1110 "BranchInsertPoint must not be set"); 1111 DenseSet<Instruction *> Unhoistables; 1112 getSelectsInScope(Scope, Unhoistables); 1113 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1114 } 1115 #ifndef NDEBUG 1116 for (CHRScope *Scope : Output) { 1117 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1118 } 1119 #endif 1120 } 1121 1122 SmallVector<CHRScope *, 8> CHR::splitScope( 1123 CHRScope *Scope, 1124 CHRScope *Outer, 1125 DenseSet<Value *> *OuterConditionValues, 1126 Instruction *OuterInsertPoint, 1127 SmallVectorImpl<CHRScope *> &Output, 1128 DenseSet<Instruction *> &Unhoistables) { 1129 if (Outer) { 1130 assert(OuterConditionValues && "Null OuterConditionValues"); 1131 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1132 } 1133 bool PrevSplitFromOuter = true; 1134 DenseSet<Value *> PrevConditionValues; 1135 Instruction *PrevInsertPoint = nullptr; 1136 SmallVector<CHRScope *, 8> Splits; 1137 SmallVector<bool, 8> SplitsSplitFromOuter; 1138 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1139 SmallVector<Instruction *, 8> SplitsInsertPoints; 1140 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1141 for (RegInfo &RI : RegInfos) { 1142 Instruction *InsertPoint = getBranchInsertPoint(RI); 1143 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1144 CHR_DEBUG( 1145 dbgs() << "ConditionValues "; 1146 for (Value *V : ConditionValues) { 1147 dbgs() << *V << ", "; 1148 } 1149 dbgs() << "\n"); 1150 if (RI.R == RegInfos[0].R) { 1151 // First iteration. Check to see if we should split from the outer. 1152 if (Outer) { 1153 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1154 CHR_DEBUG(dbgs() << "Should split from outer at " 1155 << RI.R->getNameStr() << "\n"); 1156 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1157 ConditionValues, DT, Unhoistables)) { 1158 PrevConditionValues = ConditionValues; 1159 PrevInsertPoint = InsertPoint; 1160 ORE.emit([&]() { 1161 return OptimizationRemarkMissed(DEBUG_TYPE, 1162 "SplitScopeFromOuter", 1163 RI.R->getEntry()->getTerminator()) 1164 << "Split scope from outer due to unhoistable branch/select " 1165 << "and/or lack of common condition values"; 1166 }); 1167 } else { 1168 // Not splitting from the outer. Use the outer bases and insert 1169 // point. Union the bases. 1170 PrevSplitFromOuter = false; 1171 PrevConditionValues = *OuterConditionValues; 1172 PrevConditionValues.insert(ConditionValues.begin(), 1173 ConditionValues.end()); 1174 PrevInsertPoint = OuterInsertPoint; 1175 } 1176 } else { 1177 CHR_DEBUG(dbgs() << "Outer null\n"); 1178 PrevConditionValues = ConditionValues; 1179 PrevInsertPoint = InsertPoint; 1180 } 1181 } else { 1182 CHR_DEBUG(dbgs() << "Should split from prev at " 1183 << RI.R->getNameStr() << "\n"); 1184 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1185 DT, Unhoistables)) { 1186 CHRScope *Tail = Scope->split(RI.R); 1187 Scopes.insert(Tail); 1188 Splits.push_back(Scope); 1189 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1190 SplitsConditionValues.push_back(PrevConditionValues); 1191 SplitsInsertPoints.push_back(PrevInsertPoint); 1192 Scope = Tail; 1193 PrevConditionValues = ConditionValues; 1194 PrevInsertPoint = InsertPoint; 1195 PrevSplitFromOuter = true; 1196 ORE.emit([&]() { 1197 return OptimizationRemarkMissed(DEBUG_TYPE, 1198 "SplitScopeFromPrev", 1199 RI.R->getEntry()->getTerminator()) 1200 << "Split scope from previous due to unhoistable branch/select " 1201 << "and/or lack of common condition values"; 1202 }); 1203 } else { 1204 // Not splitting. Union the bases. Keep the hoist point. 1205 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1206 } 1207 } 1208 } 1209 Splits.push_back(Scope); 1210 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1211 SplitsConditionValues.push_back(PrevConditionValues); 1212 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1213 SplitsInsertPoints.push_back(PrevInsertPoint); 1214 assert(Splits.size() == SplitsConditionValues.size() && 1215 Splits.size() == SplitsSplitFromOuter.size() && 1216 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1217 for (size_t I = 0; I < Splits.size(); ++I) { 1218 CHRScope *Split = Splits[I]; 1219 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1220 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1221 SmallVector<CHRScope *, 8> NewSubs; 1222 DenseSet<Instruction *> SplitUnhoistables; 1223 getSelectsInScope(Split, SplitUnhoistables); 1224 for (CHRScope *Sub : Split->Subs) { 1225 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1226 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1227 SplitUnhoistables); 1228 llvm::append_range(NewSubs, SubSplits); 1229 } 1230 Split->Subs = NewSubs; 1231 } 1232 SmallVector<CHRScope *, 8> Result; 1233 for (size_t I = 0; I < Splits.size(); ++I) { 1234 CHRScope *Split = Splits[I]; 1235 if (SplitsSplitFromOuter[I]) { 1236 // Split from the outer. 1237 Output.push_back(Split); 1238 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1239 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1240 << "\n"); 1241 } else { 1242 // Connected to the outer. 1243 Result.push_back(Split); 1244 } 1245 } 1246 if (!Outer) 1247 assert(Result.empty() && 1248 "If no outer (top-level), must return no nested ones"); 1249 return Result; 1250 } 1251 1252 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1253 for (CHRScope *Scope : Scopes) { 1254 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1255 classifyBiasedScopes(Scope, Scope); 1256 CHR_DEBUG( 1257 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1258 dbgs() << "TrueBiasedRegions "; 1259 for (Region *R : Scope->TrueBiasedRegions) { 1260 dbgs() << R->getNameStr() << ", "; 1261 } 1262 dbgs() << "\n"; 1263 dbgs() << "FalseBiasedRegions "; 1264 for (Region *R : Scope->FalseBiasedRegions) { 1265 dbgs() << R->getNameStr() << ", "; 1266 } 1267 dbgs() << "\n"; 1268 dbgs() << "TrueBiasedSelects "; 1269 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1270 dbgs() << *SI << ", "; 1271 } 1272 dbgs() << "\n"; 1273 dbgs() << "FalseBiasedSelects "; 1274 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1275 dbgs() << *SI << ", "; 1276 } 1277 dbgs() << "\n";); 1278 } 1279 } 1280 1281 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1282 for (RegInfo &RI : Scope->RegInfos) { 1283 if (RI.HasBranch) { 1284 Region *R = RI.R; 1285 if (TrueBiasedRegionsGlobal.contains(R)) 1286 OutermostScope->TrueBiasedRegions.insert(R); 1287 else if (FalseBiasedRegionsGlobal.contains(R)) 1288 OutermostScope->FalseBiasedRegions.insert(R); 1289 else 1290 llvm_unreachable("Must be biased"); 1291 } 1292 for (SelectInst *SI : RI.Selects) { 1293 if (TrueBiasedSelectsGlobal.contains(SI)) 1294 OutermostScope->TrueBiasedSelects.insert(SI); 1295 else if (FalseBiasedSelectsGlobal.contains(SI)) 1296 OutermostScope->FalseBiasedSelects.insert(SI); 1297 else 1298 llvm_unreachable("Must be biased"); 1299 } 1300 } 1301 for (CHRScope *Sub : Scope->Subs) { 1302 classifyBiasedScopes(Sub, OutermostScope); 1303 } 1304 } 1305 1306 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1307 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1308 Scope->FalseBiasedRegions.size() + 1309 Scope->TrueBiasedSelects.size() + 1310 Scope->FalseBiasedSelects.size(); 1311 return NumBiased >= CHRMergeThreshold; 1312 } 1313 1314 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1315 SmallVectorImpl<CHRScope *> &Output) { 1316 for (CHRScope *Scope : Input) { 1317 // Filter out the ones with only one region and no subs. 1318 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1319 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1320 << Scope->TrueBiasedRegions.size() 1321 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1322 << " true-selects " << Scope->TrueBiasedSelects.size() 1323 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1324 ORE.emit([&]() { 1325 return OptimizationRemarkMissed( 1326 DEBUG_TYPE, 1327 "DropScopeWithOneBranchOrSelect", 1328 Scope->RegInfos[0].R->getEntry()->getTerminator()) 1329 << "Drop scope with < " 1330 << ore::NV("CHRMergeThreshold", CHRMergeThreshold) 1331 << " biased branch(es) or select(s)"; 1332 }); 1333 continue; 1334 } 1335 Output.push_back(Scope); 1336 } 1337 } 1338 1339 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1340 SmallVectorImpl<CHRScope *> &Output) { 1341 for (CHRScope *Scope : Input) { 1342 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1343 "Empty"); 1344 setCHRRegions(Scope, Scope); 1345 Output.push_back(Scope); 1346 CHR_DEBUG( 1347 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1348 for (auto pair : Scope->HoistStopMap) { 1349 Region *R = pair.first; 1350 dbgs() << "Region " << R->getNameStr() << "\n"; 1351 for (Instruction *I : pair.second) { 1352 dbgs() << "HoistStop " << *I << "\n"; 1353 } 1354 } 1355 dbgs() << "CHRRegions" << "\n"; 1356 for (RegInfo &RI : Scope->CHRRegions) { 1357 dbgs() << RI.R->getNameStr() << "\n"; 1358 }); 1359 } 1360 } 1361 1362 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1363 DenseSet<Instruction *> Unhoistables; 1364 // Put the biased selects in Unhoistables because they should stay where they 1365 // are and constant-folded after CHR (in case one biased select or a branch 1366 // can depend on another biased select.) 1367 for (RegInfo &RI : Scope->RegInfos) { 1368 for (SelectInst *SI : RI.Selects) { 1369 Unhoistables.insert(SI); 1370 } 1371 } 1372 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1373 for (RegInfo &RI : Scope->RegInfos) { 1374 Region *R = RI.R; 1375 DenseSet<Instruction *> HoistStops; 1376 bool IsHoisted = false; 1377 if (RI.HasBranch) { 1378 assert((OutermostScope->TrueBiasedRegions.contains(R) || 1379 OutermostScope->FalseBiasedRegions.contains(R)) && 1380 "Must be truthy or falsy"); 1381 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1382 // Note checkHoistValue fills in HoistStops. 1383 DenseMap<Instruction *, bool> Visited; 1384 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1385 Unhoistables, &HoistStops, Visited); 1386 assert(IsHoistable && "Must be hoistable"); 1387 (void)(IsHoistable); // Unused in release build 1388 IsHoisted = true; 1389 } 1390 for (SelectInst *SI : RI.Selects) { 1391 assert((OutermostScope->TrueBiasedSelects.contains(SI) || 1392 OutermostScope->FalseBiasedSelects.contains(SI)) && 1393 "Must be true or false biased"); 1394 // Note checkHoistValue fills in HoistStops. 1395 DenseMap<Instruction *, bool> Visited; 1396 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1397 Unhoistables, &HoistStops, Visited); 1398 assert(IsHoistable && "Must be hoistable"); 1399 (void)(IsHoistable); // Unused in release build 1400 IsHoisted = true; 1401 } 1402 if (IsHoisted) { 1403 OutermostScope->CHRRegions.push_back(RI); 1404 OutermostScope->HoistStopMap[R] = HoistStops; 1405 } 1406 } 1407 for (CHRScope *Sub : Scope->Subs) 1408 setCHRRegions(Sub, OutermostScope); 1409 } 1410 1411 static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1412 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1413 } 1414 1415 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1416 SmallVectorImpl<CHRScope *> &Output) { 1417 Output.resize(Input.size()); 1418 llvm::copy(Input, Output.begin()); 1419 llvm::stable_sort(Output, CHRScopeSorter); 1420 } 1421 1422 // Return true if V is already hoisted or was hoisted (along with its operands) 1423 // to the insert point. 1424 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1425 HoistStopMapTy &HoistStopMap, 1426 DenseSet<Instruction *> &HoistedSet, 1427 DenseSet<PHINode *> &TrivialPHIs, 1428 DominatorTree &DT) { 1429 auto IT = HoistStopMap.find(R); 1430 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1431 DenseSet<Instruction *> &HoistStops = IT->second; 1432 if (auto *I = dyn_cast<Instruction>(V)) { 1433 if (I == HoistPoint) 1434 return; 1435 if (HoistStops.count(I)) 1436 return; 1437 if (auto *PN = dyn_cast<PHINode>(I)) 1438 if (TrivialPHIs.count(PN)) 1439 // The trivial phi inserted by the previous CHR scope could replace a 1440 // non-phi in HoistStops. Note that since this phi is at the exit of a 1441 // previous CHR scope, which dominates this scope, it's safe to stop 1442 // hoisting there. 1443 return; 1444 if (HoistedSet.count(I)) 1445 // Already hoisted, return. 1446 return; 1447 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1448 assert(DT.getNode(I->getParent()) && "DT must contain I's block"); 1449 assert(DT.getNode(HoistPoint->getParent()) && 1450 "DT must contain HoistPoint block"); 1451 if (DT.dominates(I, HoistPoint)) 1452 // We are already above the hoist point. Stop here. This may be necessary 1453 // when multiple scopes would independently hoist the same 1454 // instruction. Since an outer (dominating) scope would hoist it to its 1455 // entry before an inner (dominated) scope would to its entry, the inner 1456 // scope may see the instruction already hoisted, in which case it 1457 // potentially wrong for the inner scope to hoist it and could cause bad 1458 // IR (non-dominating def), but safe to skip hoisting it instead because 1459 // it's already in a block that dominates the inner scope. 1460 return; 1461 for (Value *Op : I->operands()) { 1462 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); 1463 } 1464 I->moveBefore(HoistPoint); 1465 HoistedSet.insert(I); 1466 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1467 } 1468 } 1469 1470 // Hoist the dependent condition values of the branches and the selects in the 1471 // scope to the insert point. 1472 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1473 DenseSet<PHINode *> &TrivialPHIs, 1474 DominatorTree &DT) { 1475 DenseSet<Instruction *> HoistedSet; 1476 for (const RegInfo &RI : Scope->CHRRegions) { 1477 Region *R = RI.R; 1478 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1479 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1480 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1481 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1482 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1483 HoistedSet, TrivialPHIs, DT); 1484 } 1485 for (SelectInst *SI : RI.Selects) { 1486 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1487 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1488 if (!(IsTrueBiased || IsFalseBiased)) 1489 continue; 1490 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1491 HoistedSet, TrivialPHIs, DT); 1492 } 1493 } 1494 } 1495 1496 // Negate the predicate if an ICmp if it's used only by branches or selects by 1497 // swapping the operands of the branches or the selects. Returns true if success. 1498 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1499 Instruction *ExcludedUser, 1500 CHRScope *Scope) { 1501 for (User *U : ICmp->users()) { 1502 if (U == ExcludedUser) 1503 continue; 1504 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1505 continue; 1506 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1507 continue; 1508 return false; 1509 } 1510 for (User *U : ICmp->users()) { 1511 if (U == ExcludedUser) 1512 continue; 1513 if (auto *BI = dyn_cast<BranchInst>(U)) { 1514 assert(BI->isConditional() && "Must be conditional"); 1515 BI->swapSuccessors(); 1516 // Don't need to swap this in terms of 1517 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1518 // mean whehter the branch is likely go into the if-then rather than 1519 // successor0/successor1 and because we can tell which edge is the then or 1520 // the else one by comparing the destination to the region exit block. 1521 continue; 1522 } 1523 if (auto *SI = dyn_cast<SelectInst>(U)) { 1524 // Swap operands 1525 SI->swapValues(); 1526 SI->swapProfMetadata(); 1527 if (Scope->TrueBiasedSelects.count(SI)) { 1528 assert(!Scope->FalseBiasedSelects.contains(SI) && 1529 "Must not be already in"); 1530 Scope->FalseBiasedSelects.insert(SI); 1531 } else if (Scope->FalseBiasedSelects.count(SI)) { 1532 assert(!Scope->TrueBiasedSelects.contains(SI) && 1533 "Must not be already in"); 1534 Scope->TrueBiasedSelects.insert(SI); 1535 } 1536 continue; 1537 } 1538 llvm_unreachable("Must be a branch or a select"); 1539 } 1540 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1541 return true; 1542 } 1543 1544 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1545 // for a value that's defined in the scope but used outside it (meaning it's 1546 // alive at the exit block). 1547 static void insertTrivialPHIs(CHRScope *Scope, 1548 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1549 DenseSet<PHINode *> &TrivialPHIs) { 1550 SmallSetVector<BasicBlock *, 8> BlocksInScope; 1551 for (RegInfo &RI : Scope->RegInfos) { 1552 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1553 // sub-Scopes. 1554 BlocksInScope.insert(BB); 1555 } 1556 } 1557 CHR_DEBUG({ 1558 dbgs() << "Inserting redundant phis\n"; 1559 for (BasicBlock *BB : BlocksInScope) 1560 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1561 }); 1562 for (BasicBlock *BB : BlocksInScope) { 1563 for (Instruction &I : *BB) { 1564 SmallVector<Instruction *, 8> Users; 1565 for (User *U : I.users()) { 1566 if (auto *UI = dyn_cast<Instruction>(U)) { 1567 if (!BlocksInScope.contains(UI->getParent()) && 1568 // Unless there's already a phi for I at the exit block. 1569 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1570 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1571 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1572 Users.push_back(UI); 1573 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1574 // There's a loop backedge from a block that's dominated by this 1575 // scope to the entry block. 1576 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1577 CHR_DEBUG(dbgs() 1578 << "Used at entry block (for a back edge) by a phi user " 1579 << *UI << "\n"); 1580 Users.push_back(UI); 1581 } 1582 } 1583 } 1584 if (Users.size() > 0) { 1585 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1586 // ExitBlock. Replace I with the new phi in UI unless UI is another 1587 // phi at ExitBlock. 1588 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "", 1589 &ExitBlock->front()); 1590 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1591 PN->addIncoming(&I, Pred); 1592 } 1593 TrivialPHIs.insert(PN); 1594 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1595 for (Instruction *UI : Users) { 1596 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1597 if (UI->getOperand(J) == &I) { 1598 UI->setOperand(J, PN); 1599 } 1600 } 1601 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1602 } 1603 } 1604 } 1605 } 1606 } 1607 1608 // Assert that all the CHR regions of the scope have a biased branch or select. 1609 static void LLVM_ATTRIBUTE_UNUSED 1610 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1611 #ifndef NDEBUG 1612 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1613 if (Scope->TrueBiasedRegions.count(RI.R) || 1614 Scope->FalseBiasedRegions.count(RI.R)) 1615 return true; 1616 for (SelectInst *SI : RI.Selects) 1617 if (Scope->TrueBiasedSelects.count(SI) || 1618 Scope->FalseBiasedSelects.count(SI)) 1619 return true; 1620 return false; 1621 }; 1622 for (RegInfo &RI : Scope->CHRRegions) { 1623 assert(HasBiasedBranchOrSelect(RI, Scope) && 1624 "Must have biased branch or select"); 1625 } 1626 #endif 1627 } 1628 1629 // Assert that all the condition values of the biased branches and selects have 1630 // been hoisted to the pre-entry block or outside of the scope. 1631 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1632 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1633 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1634 for (RegInfo &RI : Scope->CHRRegions) { 1635 Region *R = RI.R; 1636 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1637 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1638 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1639 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1640 Value *V = BI->getCondition(); 1641 CHR_DEBUG(dbgs() << *V << "\n"); 1642 if (auto *I = dyn_cast<Instruction>(V)) { 1643 (void)(I); // Unused in release build. 1644 assert((I->getParent() == PreEntryBlock || 1645 !Scope->contains(I)) && 1646 "Must have been hoisted to PreEntryBlock or outside the scope"); 1647 } 1648 } 1649 for (SelectInst *SI : RI.Selects) { 1650 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1651 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1652 if (!(IsTrueBiased || IsFalseBiased)) 1653 continue; 1654 Value *V = SI->getCondition(); 1655 CHR_DEBUG(dbgs() << *V << "\n"); 1656 if (auto *I = dyn_cast<Instruction>(V)) { 1657 (void)(I); // Unused in release build. 1658 assert((I->getParent() == PreEntryBlock || 1659 !Scope->contains(I)) && 1660 "Must have been hoisted to PreEntryBlock or outside the scope"); 1661 } 1662 } 1663 } 1664 } 1665 1666 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1667 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1668 1669 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1670 Region *FirstRegion = Scope->RegInfos[0].R; 1671 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1672 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1673 BasicBlock *ExitBlock = LastRegion->getExit(); 1674 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1675 1676 if (ExitBlock) { 1677 // Insert a trivial phi at the exit block (where the CHR hot path and the 1678 // cold path merges) for a value that's defined in the scope but used 1679 // outside it (meaning it's alive at the exit block). We will add the 1680 // incoming values for the CHR cold paths to it below. Without this, we'd 1681 // miss updating phi's for such values unless there happens to already be a 1682 // phi for that value there. 1683 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1684 } 1685 1686 // Split the entry block of the first region. The new block becomes the new 1687 // entry block of the first region. The old entry block becomes the block to 1688 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1689 // through the split, we update the entry of the first region after the split, 1690 // and Region only points to the entry and the exit blocks, rather than 1691 // keeping everything in a list or set, the blocks membership and the 1692 // entry/exit blocks of the region are still valid after the split. 1693 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1694 << " at " << *Scope->BranchInsertPoint << "\n"); 1695 BasicBlock *NewEntryBlock = 1696 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1697 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1698 "NewEntryBlock's only pred must be EntryBlock"); 1699 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1700 BasicBlock *PreEntryBlock = EntryBlock; 1701 1702 ValueToValueMapTy VMap; 1703 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1704 // hot path (originals) and a cold path (clones) and update the PHIs at the 1705 // exit block. 1706 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1707 1708 // Replace the old (placeholder) branch with the new (merged) conditional 1709 // branch. 1710 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1711 NewEntryBlock, VMap); 1712 1713 #ifndef NDEBUG 1714 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1715 #endif 1716 1717 // Hoist the conditional values of the branches/selects. 1718 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); 1719 1720 #ifndef NDEBUG 1721 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1722 #endif 1723 1724 // Create the combined branch condition and constant-fold the branches/selects 1725 // in the hot path. 1726 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1727 ProfileCount.value_or(0)); 1728 } 1729 1730 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1731 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1732 // at the exit block. 1733 void CHR::cloneScopeBlocks(CHRScope *Scope, 1734 BasicBlock *PreEntryBlock, 1735 BasicBlock *ExitBlock, 1736 Region *LastRegion, 1737 ValueToValueMapTy &VMap) { 1738 // Clone all the blocks. The original blocks will be the hot-path 1739 // CHR-optimized code and the cloned blocks will be the original unoptimized 1740 // code. This is so that the block pointers from the 1741 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1742 // which CHR should apply to. 1743 SmallVector<BasicBlock*, 8> NewBlocks; 1744 for (RegInfo &RI : Scope->RegInfos) 1745 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1746 // sub-Scopes. 1747 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1748 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1749 NewBlocks.push_back(NewBB); 1750 VMap[BB] = NewBB; 1751 } 1752 1753 // Place the cloned blocks right after the original blocks (right before the 1754 // exit block of.) 1755 if (ExitBlock) 1756 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1757 F.getBasicBlockList(), 1758 NewBlocks[0]->getIterator(), F.end()); 1759 1760 // Update the cloned blocks/instructions to refer to themselves. 1761 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1762 for (Instruction &I : *NewBlocks[i]) 1763 RemapInstruction(&I, VMap, 1764 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1765 1766 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1767 // the top-level region but we don't need to add PHIs. The trivial PHIs 1768 // inserted above will be updated here. 1769 if (ExitBlock) 1770 for (PHINode &PN : ExitBlock->phis()) 1771 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1772 ++I) { 1773 BasicBlock *Pred = PN.getIncomingBlock(I); 1774 if (LastRegion->contains(Pred)) { 1775 Value *V = PN.getIncomingValue(I); 1776 auto It = VMap.find(V); 1777 if (It != VMap.end()) V = It->second; 1778 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1779 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1780 } 1781 } 1782 } 1783 1784 // A helper for transformScope. Replace the old (placeholder) branch with the 1785 // new (merged) conditional branch. 1786 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1787 BasicBlock *EntryBlock, 1788 BasicBlock *NewEntryBlock, 1789 ValueToValueMapTy &VMap) { 1790 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1791 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1792 "SplitBlock did not work correctly!"); 1793 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1794 "NewEntryBlock's only pred must be EntryBlock"); 1795 assert(VMap.find(NewEntryBlock) != VMap.end() && 1796 "NewEntryBlock must have been copied"); 1797 OldBR->dropAllReferences(); 1798 OldBR->eraseFromParent(); 1799 // The true predicate is a placeholder. It will be replaced later in 1800 // fixupBranchesAndSelects(). 1801 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1802 cast<BasicBlock>(VMap[NewEntryBlock]), 1803 ConstantInt::getTrue(F.getContext())); 1804 PreEntryBlock->getInstList().push_back(NewBR); 1805 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1806 "NewEntryBlock's only pred must be EntryBlock"); 1807 return NewBR; 1808 } 1809 1810 // A helper for transformScopes. Create the combined branch condition and 1811 // constant-fold the branches/selects in the hot path. 1812 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1813 BasicBlock *PreEntryBlock, 1814 BranchInst *MergedBR, 1815 uint64_t ProfileCount) { 1816 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1817 BranchProbability CHRBranchBias(1, 1); 1818 uint64_t NumCHRedBranches = 0; 1819 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1820 for (RegInfo &RI : Scope->CHRRegions) { 1821 Region *R = RI.R; 1822 if (RI.HasBranch) { 1823 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1824 ++NumCHRedBranches; 1825 } 1826 for (SelectInst *SI : RI.Selects) { 1827 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1828 ++NumCHRedBranches; 1829 } 1830 } 1831 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1832 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1833 ORE.emit([&]() { 1834 return OptimizationRemark(DEBUG_TYPE, 1835 "CHR", 1836 // Refer to the hot (original) path 1837 MergedBR->getSuccessor(0)->getTerminator()) 1838 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) 1839 << " branches or selects"; 1840 }); 1841 MergedBR->setCondition(MergedCondition); 1842 uint32_t Weights[] = { 1843 static_cast<uint32_t>(CHRBranchBias.scale(1000)), 1844 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)), 1845 }; 1846 MDBuilder MDB(F.getContext()); 1847 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1848 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1849 << "\n"); 1850 } 1851 1852 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1853 // and constant-fold a branch in the hot path. 1854 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1855 IRBuilder<> &IRB, 1856 Value *&MergedCondition, 1857 BranchProbability &CHRBranchBias) { 1858 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1859 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1860 "Must be truthy or falsy"); 1861 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1862 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1863 "Must be in the bias map"); 1864 BranchProbability Bias = BranchBiasMap[R]; 1865 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1866 // Take the min. 1867 if (CHRBranchBias > Bias) 1868 CHRBranchBias = Bias; 1869 BasicBlock *IfThen = BI->getSuccessor(1); 1870 BasicBlock *IfElse = BI->getSuccessor(0); 1871 BasicBlock *RegionExitBlock = R->getExit(); 1872 assert(RegionExitBlock && "Null ExitBlock"); 1873 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1874 IfThen != IfElse && "Invariant from findScopes"); 1875 if (IfThen == RegionExitBlock) { 1876 // Swap them so that IfThen means going into it and IfElse means skipping 1877 // it. 1878 std::swap(IfThen, IfElse); 1879 } 1880 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1881 << " IfElse " << IfElse->getName() << "\n"); 1882 Value *Cond = BI->getCondition(); 1883 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1884 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1885 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1886 MergedCondition); 1887 // Constant-fold the branch at ClonedEntryBlock. 1888 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1889 "The successor shouldn't change"); 1890 Value *NewCondition = ConditionTrue ? 1891 ConstantInt::getTrue(F.getContext()) : 1892 ConstantInt::getFalse(F.getContext()); 1893 BI->setCondition(NewCondition); 1894 } 1895 1896 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1897 // and constant-fold a select in the hot path. 1898 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1899 IRBuilder<> &IRB, 1900 Value *&MergedCondition, 1901 BranchProbability &CHRBranchBias) { 1902 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1903 assert((IsTrueBiased || 1904 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1905 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1906 "Must be in the bias map"); 1907 BranchProbability Bias = SelectBiasMap[SI]; 1908 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1909 // Take the min. 1910 if (CHRBranchBias > Bias) 1911 CHRBranchBias = Bias; 1912 Value *Cond = SI->getCondition(); 1913 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1914 MergedCondition); 1915 Value *NewCondition = IsTrueBiased ? 1916 ConstantInt::getTrue(F.getContext()) : 1917 ConstantInt::getFalse(F.getContext()); 1918 SI->setCondition(NewCondition); 1919 } 1920 1921 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1922 // condition. 1923 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1924 Instruction *BranchOrSelect, CHRScope *Scope, 1925 IRBuilder<> &IRB, Value *&MergedCondition) { 1926 if (!IsTrueBiased) { 1927 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1928 // branch, negate the icmp predicate and swap the branch targets and avoid 1929 // inserting an Xor to negate Cond. 1930 auto *ICmp = dyn_cast<ICmpInst>(Cond); 1931 if (!ICmp || 1932 !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) 1933 Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond); 1934 } 1935 1936 // Select conditions can be poison, while branching on poison is immediate 1937 // undefined behavior. As such, we need to freeze potentially poisonous 1938 // conditions derived from selects. 1939 if (isa<SelectInst>(BranchOrSelect) && 1940 !isGuaranteedNotToBeUndefOrPoison(Cond)) 1941 Cond = IRB.CreateFreeze(Cond); 1942 1943 // Use logical and to avoid propagating poison from later conditions. 1944 MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond); 1945 } 1946 1947 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1948 unsigned I = 0; 1949 DenseSet<PHINode *> TrivialPHIs; 1950 for (CHRScope *Scope : CHRScopes) { 1951 transformScopes(Scope, TrivialPHIs); 1952 CHR_DEBUG( 1953 std::ostringstream oss; 1954 oss << " after transformScopes " << I++; 1955 dumpIR(F, oss.str().c_str(), nullptr)); 1956 (void)I; 1957 } 1958 } 1959 1960 static void LLVM_ATTRIBUTE_UNUSED 1961 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1962 dbgs() << Label << " " << Scopes.size() << "\n"; 1963 for (CHRScope *Scope : Scopes) { 1964 dbgs() << *Scope << "\n"; 1965 } 1966 } 1967 1968 bool CHR::run() { 1969 if (!shouldApply(F, PSI)) 1970 return false; 1971 1972 CHR_DEBUG(dumpIR(F, "before", nullptr)); 1973 1974 bool Changed = false; 1975 { 1976 CHR_DEBUG( 1977 dbgs() << "RegionInfo:\n"; 1978 RI.print(dbgs())); 1979 1980 // Recursively traverse the region tree and find regions that have biased 1981 // branches and/or selects and create scopes. 1982 SmallVector<CHRScope *, 8> AllScopes; 1983 findScopes(AllScopes); 1984 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 1985 1986 // Split the scopes if 1) the conditiona values of the biased 1987 // branches/selects of the inner/lower scope can't be hoisted up to the 1988 // outermost/uppermost scope entry, or 2) the condition values of the biased 1989 // branches/selects in a scope (including subscopes) don't share at least 1990 // one common value. 1991 SmallVector<CHRScope *, 8> SplitScopes; 1992 splitScopes(AllScopes, SplitScopes); 1993 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 1994 1995 // After splitting, set the biased regions and selects of a scope (a tree 1996 // root) that include those of the subscopes. 1997 classifyBiasedScopes(SplitScopes); 1998 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 1999 2000 // Filter out the scopes that has only one biased region or select (CHR 2001 // isn't useful in such a case). 2002 SmallVector<CHRScope *, 8> FilteredScopes; 2003 filterScopes(SplitScopes, FilteredScopes); 2004 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 2005 2006 // Set the regions to be CHR'ed and their hoist stops for each scope. 2007 SmallVector<CHRScope *, 8> SetScopes; 2008 setCHRRegions(FilteredScopes, SetScopes); 2009 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 2010 2011 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 2012 // ones. We need to apply CHR from outer to inner so that we apply CHR only 2013 // to the hot path, rather than both hot and cold paths. 2014 SmallVector<CHRScope *, 8> SortedScopes; 2015 sortScopes(SetScopes, SortedScopes); 2016 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 2017 2018 CHR_DEBUG( 2019 dbgs() << "RegionInfo:\n"; 2020 RI.print(dbgs())); 2021 2022 // Apply the CHR transformation. 2023 if (!SortedScopes.empty()) { 2024 transformScopes(SortedScopes); 2025 Changed = true; 2026 } 2027 } 2028 2029 if (Changed) { 2030 CHR_DEBUG(dumpIR(F, "after", &Stats)); 2031 ORE.emit([&]() { 2032 return OptimizationRemark(DEBUG_TYPE, "Stats", &F) 2033 << ore::NV("Function", &F) << " " 2034 << "Reduced the number of branches in hot paths by " 2035 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) 2036 << " (static) and " 2037 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) 2038 << " (weighted by PGO count)"; 2039 }); 2040 } 2041 2042 return Changed; 2043 } 2044 2045 namespace llvm { 2046 2047 ControlHeightReductionPass::ControlHeightReductionPass() { 2048 parseCHRFilterFiles(); 2049 } 2050 2051 PreservedAnalyses ControlHeightReductionPass::run( 2052 Function &F, 2053 FunctionAnalysisManager &FAM) { 2054 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 2055 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2056 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2057 auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2058 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2059 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2060 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); 2061 if (!Changed) 2062 return PreservedAnalyses::all(); 2063 return PreservedAnalyses::none(); 2064 } 2065 2066 } // namespace llvm 2067