1 //===- LoopVersioningLICM.cpp - LICM Loop Versioning ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // When alias analysis is uncertain about the aliasing between any two accesses,
10 // it will return MayAlias. This uncertainty from alias analysis restricts LICM
11 // from proceeding further. In cases where alias analysis is uncertain we might
12 // use loop versioning as an alternative.
13 //
14 // Loop Versioning will create a version of the loop with aggressive aliasing
15 // assumptions in addition to the original with conservative (default) aliasing
16 // assumptions. The version of the loop making aggressive aliasing assumptions
17 // will have all the memory accesses marked as no-alias. These two versions of
18 // loop will be preceded by a memory runtime check. This runtime check consists
19 // of bound checks for all unique memory accessed in loop, and it ensures the
20 // lack of memory aliasing. The result of the runtime check determines which of
21 // the loop versions is executed: If the runtime check detects any memory
22 // aliasing, then the original loop is executed. Otherwise, the version with
23 // aggressive aliasing assumptions is used.
24 //
25 // Following are the top level steps:
26 //
27 // a) Perform LoopVersioningLICM's feasibility check.
28 // b) If loop is a candidate for versioning then create a memory bound check,
29 // by considering all the memory accesses in loop body.
30 // c) Clone original loop and set all memory accesses as no-alias in new loop.
31 // d) Set original loop & versioned loop as a branch target of the runtime check
32 // result.
33 //
34 // It transforms loop as shown below:
35 //
36 // +----------------+
37 // |Runtime Memcheck|
38 // +----------------+
39 // |
40 // +----------+----------------+----------+
41 // | |
42 // +---------+----------+ +-----------+----------+
43 // |Orig Loop Preheader | |Cloned Loop Preheader |
44 // +--------------------+ +----------------------+
45 // | |
46 // +--------------------+ +----------------------+
47 // |Orig Loop Body | |Cloned Loop Body |
48 // +--------------------+ +----------------------+
49 // | |
50 // +--------------------+ +----------------------+
51 // |Orig Loop Exit Block| |Cloned Loop Exit Block|
52 // +--------------------+ +-----------+----------+
53 // | |
54 // +----------+--------------+-----------+
55 // |
56 // +-----+----+
57 // |Join Block|
58 // +----------+
59 //
60 //===----------------------------------------------------------------------===//
61
62 #include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
63 #include "llvm/ADT/SmallVector.h"
64 #include "llvm/ADT/StringRef.h"
65 #include "llvm/Analysis/AliasAnalysis.h"
66 #include "llvm/Analysis/AliasSetTracker.h"
67 #include "llvm/Analysis/GlobalsModRef.h"
68 #include "llvm/Analysis/LoopAccessAnalysis.h"
69 #include "llvm/Analysis/LoopInfo.h"
70 #include "llvm/Analysis/LoopPass.h"
71 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
72 #include "llvm/Analysis/ScalarEvolution.h"
73 #include "llvm/IR/Dominators.h"
74 #include "llvm/IR/Instruction.h"
75 #include "llvm/IR/Instructions.h"
76 #include "llvm/IR/LLVMContext.h"
77 #include "llvm/IR/MDBuilder.h"
78 #include "llvm/IR/Metadata.h"
79 #include "llvm/IR/Value.h"
80 #include "llvm/Support/Casting.h"
81 #include "llvm/Support/CommandLine.h"
82 #include "llvm/Support/Debug.h"
83 #include "llvm/Support/raw_ostream.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/LoopVersioning.h"
86 #include <cassert>
87
88 using namespace llvm;
89
90 #define DEBUG_TYPE "loop-versioning-licm"
91
92 static const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable";
93
94 /// Threshold minimum allowed percentage for possible
95 /// invariant instructions in a loop.
96 static cl::opt<float>
97 LVInvarThreshold("licm-versioning-invariant-threshold",
98 cl::desc("LoopVersioningLICM's minimum allowed percentage "
99 "of possible invariant instructions per loop"),
100 cl::init(25), cl::Hidden);
101
102 /// Threshold for maximum allowed loop nest/depth
103 static cl::opt<unsigned> LVLoopDepthThreshold(
104 "licm-versioning-max-depth-threshold",
105 cl::desc(
106 "LoopVersioningLICM's threshold for maximum allowed loop nest/depth"),
107 cl::init(2), cl::Hidden);
108
109 namespace {
110
111 struct LoopVersioningLICM {
112 // We don't explicitly pass in LoopAccessInfo to the constructor since the
113 // loop versioning might return early due to instructions that are not safe
114 // for versioning. By passing the proxy instead the construction of
115 // LoopAccessInfo will take place only when it's necessary.
LoopVersioningLICM__anon3e7448160111::LoopVersioningLICM116 LoopVersioningLICM(AliasAnalysis *AA, ScalarEvolution *SE,
117 OptimizationRemarkEmitter *ORE,
118 LoopAccessInfoManager &LAIs, LoopInfo &LI,
119 Loop *CurLoop)
120 : AA(AA), SE(SE), LAIs(LAIs), LI(LI), CurLoop(CurLoop),
121 LoopDepthThreshold(LVLoopDepthThreshold),
122 InvariantThreshold(LVInvarThreshold), ORE(ORE) {}
123
124 bool run(DominatorTree *DT);
125
126 private:
127 // Current AliasAnalysis information
128 AliasAnalysis *AA;
129
130 // Current ScalarEvolution
131 ScalarEvolution *SE;
132
133 // Current Loop's LoopAccessInfo
134 const LoopAccessInfo *LAI = nullptr;
135
136 // Proxy for retrieving LoopAccessInfo.
137 LoopAccessInfoManager &LAIs;
138
139 LoopInfo &LI;
140
141 // The current loop we are working on.
142 Loop *CurLoop;
143
144 // Maximum loop nest threshold
145 unsigned LoopDepthThreshold;
146
147 // Minimum invariant threshold
148 float InvariantThreshold;
149
150 // Counter to track num of load & store
151 unsigned LoadAndStoreCounter = 0;
152
153 // Counter to track num of invariant
154 unsigned InvariantCounter = 0;
155
156 // Read only loop marker.
157 bool IsReadOnlyLoop = true;
158
159 // OptimizationRemarkEmitter
160 OptimizationRemarkEmitter *ORE;
161
162 bool isLegalForVersioning();
163 bool legalLoopStructure();
164 bool legalLoopInstructions();
165 bool legalLoopMemoryAccesses();
166 bool isLoopAlreadyVisited();
167 bool instructionSafeForVersioning(Instruction *I);
168 };
169
170 } // end anonymous namespace
171
172 /// Check loop structure and confirms it's good for LoopVersioningLICM.
legalLoopStructure()173 bool LoopVersioningLICM::legalLoopStructure() {
174 // Loop must be in loop simplify form.
175 if (!CurLoop->isLoopSimplifyForm()) {
176 LLVM_DEBUG(dbgs() << " loop is not in loop-simplify form.\n");
177 return false;
178 }
179 // Loop should be innermost loop, if not return false.
180 if (!CurLoop->getSubLoops().empty()) {
181 LLVM_DEBUG(dbgs() << " loop is not innermost\n");
182 return false;
183 }
184 // Loop should have a single backedge, if not return false.
185 if (CurLoop->getNumBackEdges() != 1) {
186 LLVM_DEBUG(dbgs() << " loop has multiple backedges\n");
187 return false;
188 }
189 // Loop must have a single exiting block, if not return false.
190 if (!CurLoop->getExitingBlock()) {
191 LLVM_DEBUG(dbgs() << " loop has multiple exiting block\n");
192 return false;
193 }
194 // We only handle bottom-tested loop, i.e. loop in which the condition is
195 // checked at the end of each iteration. With that we can assume that all
196 // instructions in the loop are executed the same number of times.
197 if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) {
198 LLVM_DEBUG(dbgs() << " loop is not bottom tested\n");
199 return false;
200 }
201 // Parallel loops must not have aliasing loop-invariant memory accesses.
202 // Hence we don't need to version anything in this case.
203 if (CurLoop->isAnnotatedParallel()) {
204 LLVM_DEBUG(dbgs() << " Parallel loop is not worth versioning\n");
205 return false;
206 }
207 // Loop depth more then LoopDepthThreshold are not allowed
208 if (CurLoop->getLoopDepth() > LoopDepthThreshold) {
209 LLVM_DEBUG(dbgs() << " loop depth is more than threshold\n");
210 return false;
211 }
212 // We need to be able to compute the loop trip count in order
213 // to generate the bound checks.
214 const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop);
215 if (isa<SCEVCouldNotCompute>(ExitCount)) {
216 LLVM_DEBUG(dbgs() << " loop does not have trip count\n");
217 return false;
218 }
219 return true;
220 }
221
222 /// Check memory accesses in loop and confirms it's good for
223 /// LoopVersioningLICM.
legalLoopMemoryAccesses()224 bool LoopVersioningLICM::legalLoopMemoryAccesses() {
225 // Loop over the body of this loop, construct AST.
226 BatchAAResults BAA(*AA);
227 AliasSetTracker AST(BAA);
228 for (auto *Block : CurLoop->getBlocks()) {
229 // Ignore blocks in subloops.
230 if (LI.getLoopFor(Block) == CurLoop)
231 AST.add(*Block);
232 }
233
234 // Memory check:
235 // Transform phase will generate a versioned loop and also a runtime check to
236 // ensure the pointers are independent and they don’t alias.
237 // In version variant of loop, alias meta data asserts that all access are
238 // mutually independent.
239 //
240 // Pointers aliasing in alias domain are avoided because with multiple
241 // aliasing domains we may not be able to hoist potential loop invariant
242 // access out of the loop.
243 //
244 // Iterate over alias tracker sets, and confirm AliasSets doesn't have any
245 // must alias set.
246 bool HasMayAlias = false;
247 bool TypeSafety = false;
248 bool HasMod = false;
249 for (const auto &I : AST) {
250 const AliasSet &AS = I;
251 // Skip Forward Alias Sets, as this should be ignored as part of
252 // the AliasSetTracker object.
253 if (AS.isForwardingAliasSet())
254 continue;
255 // With MustAlias its not worth adding runtime bound check.
256 if (AS.isMustAlias())
257 return false;
258 const Value *SomePtr = AS.begin()->Ptr;
259 bool TypeCheck = true;
260 // Check for Mod & MayAlias
261 HasMayAlias |= AS.isMayAlias();
262 HasMod |= AS.isMod();
263 for (const auto &MemLoc : AS) {
264 const Value *Ptr = MemLoc.Ptr;
265 // Alias tracker should have pointers of same data type.
266 //
267 // FIXME: check no longer effective since opaque pointers?
268 // If the intent is to check that the memory accesses use the
269 // same data type (such that LICM can promote them), then we
270 // can no longer see this from the pointer value types.
271 TypeCheck = (TypeCheck && (SomePtr->getType() == Ptr->getType()));
272 }
273 // At least one alias tracker should have pointers of same data type.
274 TypeSafety |= TypeCheck;
275 }
276 // Ensure types should be of same type.
277 if (!TypeSafety) {
278 LLVM_DEBUG(dbgs() << " Alias tracker type safety failed!\n");
279 return false;
280 }
281 // Ensure loop body shouldn't be read only.
282 if (!HasMod) {
283 LLVM_DEBUG(dbgs() << " No memory modified in loop body\n");
284 return false;
285 }
286 // Make sure alias set has may alias case.
287 // If there no alias memory ambiguity, return false.
288 if (!HasMayAlias) {
289 LLVM_DEBUG(dbgs() << " No ambiguity in memory access.\n");
290 return false;
291 }
292 return true;
293 }
294
295 /// Check loop instructions safe for Loop versioning.
296 /// It returns true if it's safe else returns false.
297 /// Consider following:
298 /// 1) Check all load store in loop body are non atomic & non volatile.
299 /// 2) Check function call safety, by ensuring its not accessing memory.
300 /// 3) Loop body shouldn't have any may throw instruction.
301 /// 4) Loop body shouldn't have any convergent or noduplicate instructions.
instructionSafeForVersioning(Instruction * I)302 bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
303 assert(I != nullptr && "Null instruction found!");
304 // Check function call safety
305 if (auto *Call = dyn_cast<CallBase>(I)) {
306 if (Call->isConvergent() || Call->cannotDuplicate()) {
307 LLVM_DEBUG(dbgs() << " Convergent call site found.\n");
308 return false;
309 }
310
311 if (!AA->doesNotAccessMemory(Call)) {
312 LLVM_DEBUG(dbgs() << " Unsafe call site found.\n");
313 return false;
314 }
315 }
316
317 // Avoid loops with possiblity of throw
318 if (I->mayThrow()) {
319 LLVM_DEBUG(dbgs() << " May throw instruction found in loop body\n");
320 return false;
321 }
322 // If current instruction is load instructions
323 // make sure it's a simple load (non atomic & non volatile)
324 if (I->mayReadFromMemory()) {
325 LoadInst *Ld = dyn_cast<LoadInst>(I);
326 if (!Ld || !Ld->isSimple()) {
327 LLVM_DEBUG(dbgs() << " Found a non-simple load.\n");
328 return false;
329 }
330 LoadAndStoreCounter++;
331 Value *Ptr = Ld->getPointerOperand();
332 // Check loop invariant.
333 if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
334 InvariantCounter++;
335 }
336 // If current instruction is store instruction
337 // make sure it's a simple store (non atomic & non volatile)
338 else if (I->mayWriteToMemory()) {
339 StoreInst *St = dyn_cast<StoreInst>(I);
340 if (!St || !St->isSimple()) {
341 LLVM_DEBUG(dbgs() << " Found a non-simple store.\n");
342 return false;
343 }
344 LoadAndStoreCounter++;
345 Value *Ptr = St->getPointerOperand();
346 // Don't allow stores that we don't have runtime checks for, as we won't be
347 // able to mark them noalias meaning they would prevent any code motion.
348 auto &Pointers = LAI->getRuntimePointerChecking()->Pointers;
349 if (!any_of(Pointers, [&](auto &P) { return P.PointerValue == Ptr; })) {
350 LLVM_DEBUG(dbgs() << " Found a store without a runtime check.\n");
351 return false;
352 }
353 // Check loop invariant.
354 if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
355 InvariantCounter++;
356
357 IsReadOnlyLoop = false;
358 }
359 return true;
360 }
361
362 /// Check loop instructions and confirms it's good for
363 /// LoopVersioningLICM.
legalLoopInstructions()364 bool LoopVersioningLICM::legalLoopInstructions() {
365 // Resetting counters.
366 LoadAndStoreCounter = 0;
367 InvariantCounter = 0;
368 IsReadOnlyLoop = true;
369 using namespace ore;
370 // Get LoopAccessInfo from current loop via the proxy.
371 LAI = &LAIs.getInfo(*CurLoop, /*AllowPartial=*/true);
372 // Check LoopAccessInfo for need of runtime check.
373 if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
374 LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n");
375 return false;
376 }
377 // Iterate over loop blocks and instructions of each block and check
378 // instruction safety.
379 for (auto *Block : CurLoop->getBlocks())
380 for (auto &Inst : *Block) {
381 // If instruction is unsafe just return false.
382 if (!instructionSafeForVersioning(&Inst)) {
383 ORE->emit([&]() {
384 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopInst", &Inst)
385 << " Unsafe Loop Instruction";
386 });
387 return false;
388 }
389 }
390 // Number of runtime-checks should be less then RuntimeMemoryCheckThreshold
391 if (LAI->getNumRuntimePointerChecks() >
392 VectorizerParams::RuntimeMemoryCheckThreshold) {
393 LLVM_DEBUG(
394 dbgs() << " LAA: Runtime checks are more than threshold !!\n");
395 ORE->emit([&]() {
396 return OptimizationRemarkMissed(DEBUG_TYPE, "RuntimeCheck",
397 CurLoop->getStartLoc(),
398 CurLoop->getHeader())
399 << "Number of runtime checks "
400 << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks())
401 << " exceeds threshold "
402 << NV("Threshold", VectorizerParams::RuntimeMemoryCheckThreshold);
403 });
404 return false;
405 }
406 // Loop should have at least one invariant load or store instruction.
407 if (!InvariantCounter) {
408 LLVM_DEBUG(dbgs() << " Invariant not found !!\n");
409 return false;
410 }
411 // Read only loop not allowed.
412 if (IsReadOnlyLoop) {
413 LLVM_DEBUG(dbgs() << " Found a read-only loop!\n");
414 return false;
415 }
416 // Profitablity check:
417 // Check invariant threshold, should be in limit.
418 if (InvariantCounter * 100 < InvariantThreshold * LoadAndStoreCounter) {
419 LLVM_DEBUG(
420 dbgs()
421 << " Invariant load & store are less then defined threshold\n");
422 LLVM_DEBUG(dbgs() << " Invariant loads & stores: "
423 << ((InvariantCounter * 100) / LoadAndStoreCounter)
424 << "%\n");
425 LLVM_DEBUG(dbgs() << " Invariant loads & store threshold: "
426 << InvariantThreshold << "%\n");
427 ORE->emit([&]() {
428 return OptimizationRemarkMissed(DEBUG_TYPE, "InvariantThreshold",
429 CurLoop->getStartLoc(),
430 CurLoop->getHeader())
431 << "Invariant load & store "
432 << NV("LoadAndStoreCounter",
433 ((InvariantCounter * 100) / LoadAndStoreCounter))
434 << " are less then defined threshold "
435 << NV("Threshold", InvariantThreshold);
436 });
437 return false;
438 }
439 return true;
440 }
441
442 /// It checks loop is already visited or not.
443 /// check loop meta data, if loop revisited return true
444 /// else false.
isLoopAlreadyVisited()445 bool LoopVersioningLICM::isLoopAlreadyVisited() {
446 // Check LoopVersioningLICM metadata into loop
447 if (findStringMetadataForLoop(CurLoop, LICMVersioningMetaData)) {
448 return true;
449 }
450 return false;
451 }
452
453 /// Checks legality for LoopVersioningLICM by considering following:
454 /// a) loop structure legality b) loop instruction legality
455 /// c) loop memory access legality.
456 /// Return true if legal else returns false.
isLegalForVersioning()457 bool LoopVersioningLICM::isLegalForVersioning() {
458 using namespace ore;
459 LLVM_DEBUG(dbgs() << "Loop: " << *CurLoop);
460 // Make sure not re-visiting same loop again.
461 if (isLoopAlreadyVisited()) {
462 LLVM_DEBUG(
463 dbgs() << " Revisiting loop in LoopVersioningLICM not allowed.\n\n");
464 return false;
465 }
466 // Check loop structure leagality.
467 if (!legalLoopStructure()) {
468 LLVM_DEBUG(
469 dbgs() << " Loop structure not suitable for LoopVersioningLICM\n\n");
470 ORE->emit([&]() {
471 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopStruct",
472 CurLoop->getStartLoc(),
473 CurLoop->getHeader())
474 << " Unsafe Loop structure";
475 });
476 return false;
477 }
478 // Check loop instruction leagality.
479 if (!legalLoopInstructions()) {
480 LLVM_DEBUG(
481 dbgs()
482 << " Loop instructions not suitable for LoopVersioningLICM\n\n");
483 return false;
484 }
485 // Check loop memory access leagality.
486 if (!legalLoopMemoryAccesses()) {
487 LLVM_DEBUG(
488 dbgs()
489 << " Loop memory access not suitable for LoopVersioningLICM\n\n");
490 ORE->emit([&]() {
491 return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopMemoryAccess",
492 CurLoop->getStartLoc(),
493 CurLoop->getHeader())
494 << " Unsafe Loop memory access";
495 });
496 return false;
497 }
498 // Loop versioning is feasible, return true.
499 LLVM_DEBUG(dbgs() << " Loop Versioning found to be beneficial\n\n");
500 ORE->emit([&]() {
501 return OptimizationRemark(DEBUG_TYPE, "IsLegalForVersioning",
502 CurLoop->getStartLoc(), CurLoop->getHeader())
503 << " Versioned loop for LICM."
504 << " Number of runtime checks we had to insert "
505 << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks());
506 });
507 return true;
508 }
509
run(DominatorTree * DT)510 bool LoopVersioningLICM::run(DominatorTree *DT) {
511 // Do not do the transformation if disabled by metadata.
512 if (hasLICMVersioningTransformation(CurLoop) & TM_Disable)
513 return false;
514
515 bool Changed = false;
516
517 // Check feasiblity of LoopVersioningLICM.
518 // If versioning found to be feasible and beneficial then proceed
519 // else simply return, by cleaning up memory.
520 if (isLegalForVersioning()) {
521 // Do loop versioning.
522 // Create memcheck for memory accessed inside loop.
523 // Clone original loop, and set blocks properly.
524 LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(),
525 CurLoop, &LI, DT, SE);
526 LVer.versionLoop();
527 // Set Loop Versioning metaData for original loop.
528 addStringMetadataToLoop(LVer.getNonVersionedLoop(), LICMVersioningMetaData);
529 // Set Loop Versioning metaData for version loop.
530 addStringMetadataToLoop(LVer.getVersionedLoop(), LICMVersioningMetaData);
531 // Set "llvm.mem.parallel_loop_access" metaData to versioned loop.
532 // FIXME: "llvm.mem.parallel_loop_access" annotates memory access
533 // instructions, not loops.
534 addStringMetadataToLoop(LVer.getVersionedLoop(),
535 "llvm.mem.parallel_loop_access");
536 // Update version loop with aggressive aliasing assumption.
537 LVer.annotateLoopWithNoAlias();
538 Changed = true;
539 }
540 return Changed;
541 }
542
543 namespace llvm {
544
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & LAR,LPMUpdater & U)545 PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
546 LoopStandardAnalysisResults &LAR,
547 LPMUpdater &U) {
548 AliasAnalysis *AA = &LAR.AA;
549 ScalarEvolution *SE = &LAR.SE;
550 DominatorTree *DT = &LAR.DT;
551 const Function *F = L.getHeader()->getParent();
552 OptimizationRemarkEmitter ORE(F);
553
554 LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr);
555 if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT))
556 return PreservedAnalyses::all();
557 return getLoopPassPreservedAnalyses();
558 }
559 } // namespace llvm
560