1 //===- LoopVersioningLICM.cpp - LICM Loop Versioning ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // When alias analysis is uncertain about the aliasing between any two accesses, 10 // it will return MayAlias. This uncertainty from alias analysis restricts LICM 11 // from proceeding further. In cases where alias analysis is uncertain we might 12 // use loop versioning as an alternative. 13 // 14 // Loop Versioning will create a version of the loop with aggressive aliasing 15 // assumptions in addition to the original with conservative (default) aliasing 16 // assumptions. The version of the loop making aggressive aliasing assumptions 17 // will have all the memory accesses marked as no-alias. These two versions of 18 // loop will be preceded by a memory runtime check. This runtime check consists 19 // of bound checks for all unique memory accessed in loop, and it ensures the 20 // lack of memory aliasing. The result of the runtime check determines which of 21 // the loop versions is executed: If the runtime check detects any memory 22 // aliasing, then the original loop is executed. Otherwise, the version with 23 // aggressive aliasing assumptions is used. 24 // 25 // Following are the top level steps: 26 // 27 // a) Perform LoopVersioningLICM's feasibility check. 28 // b) If loop is a candidate for versioning then create a memory bound check, 29 // by considering all the memory accesses in loop body. 30 // c) Clone original loop and set all memory accesses as no-alias in new loop. 31 // d) Set original loop & versioned loop as a branch target of the runtime check 32 // result. 33 // 34 // It transforms loop as shown below: 35 // 36 // +----------------+ 37 // |Runtime Memcheck| 38 // +----------------+ 39 // | 40 // +----------+----------------+----------+ 41 // | | 42 // +---------+----------+ +-----------+----------+ 43 // |Orig Loop Preheader | |Cloned Loop Preheader | 44 // +--------------------+ +----------------------+ 45 // | | 46 // +--------------------+ +----------------------+ 47 // |Orig Loop Body | |Cloned Loop Body | 48 // +--------------------+ +----------------------+ 49 // | | 50 // +--------------------+ +----------------------+ 51 // |Orig Loop Exit Block| |Cloned Loop Exit Block| 52 // +--------------------+ +-----------+----------+ 53 // | | 54 // +----------+--------------+-----------+ 55 // | 56 // +-----+----+ 57 // |Join Block| 58 // +----------+ 59 // 60 //===----------------------------------------------------------------------===// 61 62 #include "llvm/Transforms/Scalar/LoopVersioningLICM.h" 63 #include "llvm/ADT/SmallVector.h" 64 #include "llvm/ADT/StringRef.h" 65 #include "llvm/Analysis/AliasAnalysis.h" 66 #include "llvm/Analysis/AliasSetTracker.h" 67 #include "llvm/Analysis/GlobalsModRef.h" 68 #include "llvm/Analysis/LoopAccessAnalysis.h" 69 #include "llvm/Analysis/LoopInfo.h" 70 #include "llvm/Analysis/LoopPass.h" 71 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 72 #include "llvm/Analysis/ScalarEvolution.h" 73 #include "llvm/IR/Dominators.h" 74 #include "llvm/IR/Instruction.h" 75 #include "llvm/IR/Instructions.h" 76 #include "llvm/IR/LLVMContext.h" 77 #include "llvm/IR/MDBuilder.h" 78 #include "llvm/IR/Metadata.h" 79 #include "llvm/IR/Value.h" 80 #include "llvm/Support/Casting.h" 81 #include "llvm/Support/CommandLine.h" 82 #include "llvm/Support/Debug.h" 83 #include "llvm/Support/raw_ostream.h" 84 #include "llvm/Transforms/Utils/LoopUtils.h" 85 #include "llvm/Transforms/Utils/LoopVersioning.h" 86 #include <cassert> 87 88 using namespace llvm; 89 90 #define DEBUG_TYPE "loop-versioning-licm" 91 92 static const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable"; 93 94 /// Threshold minimum allowed percentage for possible 95 /// invariant instructions in a loop. 96 static cl::opt<float> 97 LVInvarThreshold("licm-versioning-invariant-threshold", 98 cl::desc("LoopVersioningLICM's minimum allowed percentage " 99 "of possible invariant instructions per loop"), 100 cl::init(25), cl::Hidden); 101 102 /// Threshold for maximum allowed loop nest/depth 103 static cl::opt<unsigned> LVLoopDepthThreshold( 104 "licm-versioning-max-depth-threshold", 105 cl::desc( 106 "LoopVersioningLICM's threshold for maximum allowed loop nest/depth"), 107 cl::init(2), cl::Hidden); 108 109 namespace { 110 111 struct LoopVersioningLICM { 112 // We don't explicitly pass in LoopAccessInfo to the constructor since the 113 // loop versioning might return early due to instructions that are not safe 114 // for versioning. By passing the proxy instead the construction of 115 // LoopAccessInfo will take place only when it's necessary. 116 LoopVersioningLICM(AliasAnalysis *AA, ScalarEvolution *SE, 117 OptimizationRemarkEmitter *ORE, 118 LoopAccessInfoManager &LAIs, LoopInfo &LI, 119 Loop *CurLoop) 120 : AA(AA), SE(SE), LAIs(LAIs), LI(LI), CurLoop(CurLoop), 121 LoopDepthThreshold(LVLoopDepthThreshold), 122 InvariantThreshold(LVInvarThreshold), ORE(ORE) {} 123 124 bool run(DominatorTree *DT); 125 126 private: 127 // Current AliasAnalysis information 128 AliasAnalysis *AA; 129 130 // Current ScalarEvolution 131 ScalarEvolution *SE; 132 133 // Current Loop's LoopAccessInfo 134 const LoopAccessInfo *LAI = nullptr; 135 136 // Proxy for retrieving LoopAccessInfo. 137 LoopAccessInfoManager &LAIs; 138 139 LoopInfo &LI; 140 141 // The current loop we are working on. 142 Loop *CurLoop; 143 144 // Maximum loop nest threshold 145 unsigned LoopDepthThreshold; 146 147 // Minimum invariant threshold 148 float InvariantThreshold; 149 150 // Counter to track num of load & store 151 unsigned LoadAndStoreCounter = 0; 152 153 // Counter to track num of invariant 154 unsigned InvariantCounter = 0; 155 156 // Read only loop marker. 157 bool IsReadOnlyLoop = true; 158 159 // OptimizationRemarkEmitter 160 OptimizationRemarkEmitter *ORE; 161 162 bool isLegalForVersioning(); 163 bool legalLoopStructure(); 164 bool legalLoopInstructions(); 165 bool legalLoopMemoryAccesses(); 166 bool isLoopAlreadyVisited(); 167 bool instructionSafeForVersioning(Instruction *I); 168 }; 169 170 } // end anonymous namespace 171 172 /// Check loop structure and confirms it's good for LoopVersioningLICM. 173 bool LoopVersioningLICM::legalLoopStructure() { 174 // Loop must be in loop simplify form. 175 if (!CurLoop->isLoopSimplifyForm()) { 176 LLVM_DEBUG(dbgs() << " loop is not in loop-simplify form.\n"); 177 return false; 178 } 179 // Loop should be innermost loop, if not return false. 180 if (!CurLoop->getSubLoops().empty()) { 181 LLVM_DEBUG(dbgs() << " loop is not innermost\n"); 182 return false; 183 } 184 // Loop should have a single backedge, if not return false. 185 if (CurLoop->getNumBackEdges() != 1) { 186 LLVM_DEBUG(dbgs() << " loop has multiple backedges\n"); 187 return false; 188 } 189 // Loop must have a single exiting block, if not return false. 190 if (!CurLoop->getExitingBlock()) { 191 LLVM_DEBUG(dbgs() << " loop has multiple exiting block\n"); 192 return false; 193 } 194 // We only handle bottom-tested loop, i.e. loop in which the condition is 195 // checked at the end of each iteration. With that we can assume that all 196 // instructions in the loop are executed the same number of times. 197 if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) { 198 LLVM_DEBUG(dbgs() << " loop is not bottom tested\n"); 199 return false; 200 } 201 // Parallel loops must not have aliasing loop-invariant memory accesses. 202 // Hence we don't need to version anything in this case. 203 if (CurLoop->isAnnotatedParallel()) { 204 LLVM_DEBUG(dbgs() << " Parallel loop is not worth versioning\n"); 205 return false; 206 } 207 // Loop depth more then LoopDepthThreshold are not allowed 208 if (CurLoop->getLoopDepth() > LoopDepthThreshold) { 209 LLVM_DEBUG(dbgs() << " loop depth is more than threshold\n"); 210 return false; 211 } 212 // We need to be able to compute the loop trip count in order 213 // to generate the bound checks. 214 const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop); 215 if (isa<SCEVCouldNotCompute>(ExitCount)) { 216 LLVM_DEBUG(dbgs() << " loop does not have trip count\n"); 217 return false; 218 } 219 return true; 220 } 221 222 /// Check memory accesses in loop and confirms it's good for 223 /// LoopVersioningLICM. 224 bool LoopVersioningLICM::legalLoopMemoryAccesses() { 225 // Loop over the body of this loop, construct AST. 226 BatchAAResults BAA(*AA); 227 AliasSetTracker AST(BAA); 228 for (auto *Block : CurLoop->getBlocks()) { 229 // Ignore blocks in subloops. 230 if (LI.getLoopFor(Block) == CurLoop) 231 AST.add(*Block); 232 } 233 234 // Memory check: 235 // Transform phase will generate a versioned loop and also a runtime check to 236 // ensure the pointers are independent and they don’t alias. 237 // In version variant of loop, alias meta data asserts that all access are 238 // mutually independent. 239 // 240 // Pointers aliasing in alias domain are avoided because with multiple 241 // aliasing domains we may not be able to hoist potential loop invariant 242 // access out of the loop. 243 // 244 // Iterate over alias tracker sets, and confirm AliasSets doesn't have any 245 // must alias set. 246 bool HasMayAlias = false; 247 bool TypeSafety = false; 248 bool HasMod = false; 249 for (const auto &I : AST) { 250 const AliasSet &AS = I; 251 // Skip Forward Alias Sets, as this should be ignored as part of 252 // the AliasSetTracker object. 253 if (AS.isForwardingAliasSet()) 254 continue; 255 // With MustAlias its not worth adding runtime bound check. 256 if (AS.isMustAlias()) 257 return false; 258 const Value *SomePtr = AS.begin()->Ptr; 259 bool TypeCheck = true; 260 // Check for Mod & MayAlias 261 HasMayAlias |= AS.isMayAlias(); 262 HasMod |= AS.isMod(); 263 for (const auto &MemLoc : AS) { 264 const Value *Ptr = MemLoc.Ptr; 265 // Alias tracker should have pointers of same data type. 266 // 267 // FIXME: check no longer effective since opaque pointers? 268 // If the intent is to check that the memory accesses use the 269 // same data type (such that LICM can promote them), then we 270 // can no longer see this from the pointer value types. 271 TypeCheck = (TypeCheck && (SomePtr->getType() == Ptr->getType())); 272 } 273 // At least one alias tracker should have pointers of same data type. 274 TypeSafety |= TypeCheck; 275 } 276 // Ensure types should be of same type. 277 if (!TypeSafety) { 278 LLVM_DEBUG(dbgs() << " Alias tracker type safety failed!\n"); 279 return false; 280 } 281 // Ensure loop body shouldn't be read only. 282 if (!HasMod) { 283 LLVM_DEBUG(dbgs() << " No memory modified in loop body\n"); 284 return false; 285 } 286 // Make sure alias set has may alias case. 287 // If there no alias memory ambiguity, return false. 288 if (!HasMayAlias) { 289 LLVM_DEBUG(dbgs() << " No ambiguity in memory access.\n"); 290 return false; 291 } 292 return true; 293 } 294 295 /// Check loop instructions safe for Loop versioning. 296 /// It returns true if it's safe else returns false. 297 /// Consider following: 298 /// 1) Check all load store in loop body are non atomic & non volatile. 299 /// 2) Check function call safety, by ensuring its not accessing memory. 300 /// 3) Loop body shouldn't have any may throw instruction. 301 /// 4) Loop body shouldn't have any convergent or noduplicate instructions. 302 bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) { 303 assert(I != nullptr && "Null instruction found!"); 304 // Check function call safety 305 if (auto *Call = dyn_cast<CallBase>(I)) { 306 if (Call->isConvergent() || Call->cannotDuplicate()) { 307 LLVM_DEBUG(dbgs() << " Convergent call site found.\n"); 308 return false; 309 } 310 311 if (!AA->doesNotAccessMemory(Call)) { 312 LLVM_DEBUG(dbgs() << " Unsafe call site found.\n"); 313 return false; 314 } 315 } 316 317 // Avoid loops with possiblity of throw 318 if (I->mayThrow()) { 319 LLVM_DEBUG(dbgs() << " May throw instruction found in loop body\n"); 320 return false; 321 } 322 // If current instruction is load instructions 323 // make sure it's a simple load (non atomic & non volatile) 324 if (I->mayReadFromMemory()) { 325 LoadInst *Ld = dyn_cast<LoadInst>(I); 326 if (!Ld || !Ld->isSimple()) { 327 LLVM_DEBUG(dbgs() << " Found a non-simple load.\n"); 328 return false; 329 } 330 LoadAndStoreCounter++; 331 Value *Ptr = Ld->getPointerOperand(); 332 // Check loop invariant. 333 if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) 334 InvariantCounter++; 335 } 336 // If current instruction is store instruction 337 // make sure it's a simple store (non atomic & non volatile) 338 else if (I->mayWriteToMemory()) { 339 StoreInst *St = dyn_cast<StoreInst>(I); 340 if (!St || !St->isSimple()) { 341 LLVM_DEBUG(dbgs() << " Found a non-simple store.\n"); 342 return false; 343 } 344 LoadAndStoreCounter++; 345 Value *Ptr = St->getPointerOperand(); 346 // Don't allow stores that we don't have runtime checks for, as we won't be 347 // able to mark them noalias meaning they would prevent any code motion. 348 auto &Pointers = LAI->getRuntimePointerChecking()->Pointers; 349 if (!any_of(Pointers, [&](auto &P) { return P.PointerValue == Ptr; })) { 350 LLVM_DEBUG(dbgs() << " Found a store without a runtime check.\n"); 351 return false; 352 } 353 // Check loop invariant. 354 if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) 355 InvariantCounter++; 356 357 IsReadOnlyLoop = false; 358 } 359 return true; 360 } 361 362 /// Check loop instructions and confirms it's good for 363 /// LoopVersioningLICM. 364 bool LoopVersioningLICM::legalLoopInstructions() { 365 // Resetting counters. 366 LoadAndStoreCounter = 0; 367 InvariantCounter = 0; 368 IsReadOnlyLoop = true; 369 using namespace ore; 370 // Get LoopAccessInfo from current loop via the proxy. 371 LAI = &LAIs.getInfo(*CurLoop, /*AllowPartial=*/true); 372 // Check LoopAccessInfo for need of runtime check. 373 if (LAI->getRuntimePointerChecking()->getChecks().empty()) { 374 LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n"); 375 return false; 376 } 377 // Iterate over loop blocks and instructions of each block and check 378 // instruction safety. 379 for (auto *Block : CurLoop->getBlocks()) 380 for (auto &Inst : *Block) { 381 // If instruction is unsafe just return false. 382 if (!instructionSafeForVersioning(&Inst)) { 383 ORE->emit([&]() { 384 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopInst", &Inst) 385 << " Unsafe Loop Instruction"; 386 }); 387 return false; 388 } 389 } 390 // Number of runtime-checks should be less then RuntimeMemoryCheckThreshold 391 if (LAI->getNumRuntimePointerChecks() > 392 VectorizerParams::RuntimeMemoryCheckThreshold) { 393 LLVM_DEBUG( 394 dbgs() << " LAA: Runtime checks are more than threshold !!\n"); 395 ORE->emit([&]() { 396 return OptimizationRemarkMissed(DEBUG_TYPE, "RuntimeCheck", 397 CurLoop->getStartLoc(), 398 CurLoop->getHeader()) 399 << "Number of runtime checks " 400 << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks()) 401 << " exceeds threshold " 402 << NV("Threshold", VectorizerParams::RuntimeMemoryCheckThreshold); 403 }); 404 return false; 405 } 406 // Loop should have at least one invariant load or store instruction. 407 if (!InvariantCounter) { 408 LLVM_DEBUG(dbgs() << " Invariant not found !!\n"); 409 return false; 410 } 411 // Read only loop not allowed. 412 if (IsReadOnlyLoop) { 413 LLVM_DEBUG(dbgs() << " Found a read-only loop!\n"); 414 return false; 415 } 416 // Profitablity check: 417 // Check invariant threshold, should be in limit. 418 if (InvariantCounter * 100 < InvariantThreshold * LoadAndStoreCounter) { 419 LLVM_DEBUG( 420 dbgs() 421 << " Invariant load & store are less then defined threshold\n"); 422 LLVM_DEBUG(dbgs() << " Invariant loads & stores: " 423 << ((InvariantCounter * 100) / LoadAndStoreCounter) 424 << "%\n"); 425 LLVM_DEBUG(dbgs() << " Invariant loads & store threshold: " 426 << InvariantThreshold << "%\n"); 427 ORE->emit([&]() { 428 return OptimizationRemarkMissed(DEBUG_TYPE, "InvariantThreshold", 429 CurLoop->getStartLoc(), 430 CurLoop->getHeader()) 431 << "Invariant load & store " 432 << NV("LoadAndStoreCounter", 433 ((InvariantCounter * 100) / LoadAndStoreCounter)) 434 << " are less then defined threshold " 435 << NV("Threshold", InvariantThreshold); 436 }); 437 return false; 438 } 439 return true; 440 } 441 442 /// It checks loop is already visited or not. 443 /// check loop meta data, if loop revisited return true 444 /// else false. 445 bool LoopVersioningLICM::isLoopAlreadyVisited() { 446 // Check LoopVersioningLICM metadata into loop 447 if (findStringMetadataForLoop(CurLoop, LICMVersioningMetaData)) { 448 return true; 449 } 450 return false; 451 } 452 453 /// Checks legality for LoopVersioningLICM by considering following: 454 /// a) loop structure legality b) loop instruction legality 455 /// c) loop memory access legality. 456 /// Return true if legal else returns false. 457 bool LoopVersioningLICM::isLegalForVersioning() { 458 using namespace ore; 459 LLVM_DEBUG(dbgs() << "Loop: " << *CurLoop); 460 // Make sure not re-visiting same loop again. 461 if (isLoopAlreadyVisited()) { 462 LLVM_DEBUG( 463 dbgs() << " Revisiting loop in LoopVersioningLICM not allowed.\n\n"); 464 return false; 465 } 466 // Check loop structure leagality. 467 if (!legalLoopStructure()) { 468 LLVM_DEBUG( 469 dbgs() << " Loop structure not suitable for LoopVersioningLICM\n\n"); 470 ORE->emit([&]() { 471 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopStruct", 472 CurLoop->getStartLoc(), 473 CurLoop->getHeader()) 474 << " Unsafe Loop structure"; 475 }); 476 return false; 477 } 478 // Check loop instruction leagality. 479 if (!legalLoopInstructions()) { 480 LLVM_DEBUG( 481 dbgs() 482 << " Loop instructions not suitable for LoopVersioningLICM\n\n"); 483 return false; 484 } 485 // Check loop memory access leagality. 486 if (!legalLoopMemoryAccesses()) { 487 LLVM_DEBUG( 488 dbgs() 489 << " Loop memory access not suitable for LoopVersioningLICM\n\n"); 490 ORE->emit([&]() { 491 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopMemoryAccess", 492 CurLoop->getStartLoc(), 493 CurLoop->getHeader()) 494 << " Unsafe Loop memory access"; 495 }); 496 return false; 497 } 498 // Loop versioning is feasible, return true. 499 LLVM_DEBUG(dbgs() << " Loop Versioning found to be beneficial\n\n"); 500 ORE->emit([&]() { 501 return OptimizationRemark(DEBUG_TYPE, "IsLegalForVersioning", 502 CurLoop->getStartLoc(), CurLoop->getHeader()) 503 << " Versioned loop for LICM." 504 << " Number of runtime checks we had to insert " 505 << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks()); 506 }); 507 return true; 508 } 509 510 bool LoopVersioningLICM::run(DominatorTree *DT) { 511 // Do not do the transformation if disabled by metadata. 512 if (hasLICMVersioningTransformation(CurLoop) & TM_Disable) 513 return false; 514 515 bool Changed = false; 516 517 // Check feasiblity of LoopVersioningLICM. 518 // If versioning found to be feasible and beneficial then proceed 519 // else simply return, by cleaning up memory. 520 if (isLegalForVersioning()) { 521 // Do loop versioning. 522 // Create memcheck for memory accessed inside loop. 523 // Clone original loop, and set blocks properly. 524 LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(), 525 CurLoop, &LI, DT, SE); 526 LVer.versionLoop(); 527 // Set Loop Versioning metaData for original loop. 528 addStringMetadataToLoop(LVer.getNonVersionedLoop(), LICMVersioningMetaData); 529 // Set Loop Versioning metaData for version loop. 530 addStringMetadataToLoop(LVer.getVersionedLoop(), LICMVersioningMetaData); 531 // Set "llvm.mem.parallel_loop_access" metaData to versioned loop. 532 // FIXME: "llvm.mem.parallel_loop_access" annotates memory access 533 // instructions, not loops. 534 addStringMetadataToLoop(LVer.getVersionedLoop(), 535 "llvm.mem.parallel_loop_access"); 536 // Update version loop with aggressive aliasing assumption. 537 LVer.annotateLoopWithNoAlias(); 538 Changed = true; 539 } 540 return Changed; 541 } 542 543 namespace llvm { 544 545 PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM, 546 LoopStandardAnalysisResults &LAR, 547 LPMUpdater &U) { 548 AliasAnalysis *AA = &LAR.AA; 549 ScalarEvolution *SE = &LAR.SE; 550 DominatorTree *DT = &LAR.DT; 551 const Function *F = L.getHeader()->getParent(); 552 OptimizationRemarkEmitter ORE(F); 553 554 LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr); 555 if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT)) 556 return PreservedAnalyses::all(); 557 return getLoopPassPreservedAnalyses(); 558 } 559 } // namespace llvm 560