xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- LoopVersioningLICM.cpp - LICM Loop Versioning ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // When alias analysis is uncertain about the aliasing between any two accesses,
10 // it will return MayAlias. This uncertainty from alias analysis restricts LICM
11 // from proceeding further. In cases where alias analysis is uncertain we might
12 // use loop versioning as an alternative.
13 //
14 // Loop Versioning will create a version of the loop with aggressive aliasing
15 // assumptions in addition to the original with conservative (default) aliasing
16 // assumptions. The version of the loop making aggressive aliasing assumptions
17 // will have all the memory accesses marked as no-alias. These two versions of
18 // loop will be preceded by a memory runtime check. This runtime check consists
19 // of bound checks for all unique memory accessed in loop, and it ensures the
20 // lack of memory aliasing. The result of the runtime check determines which of
21 // the loop versions is executed: If the runtime check detects any memory
22 // aliasing, then the original loop is executed. Otherwise, the version with
23 // aggressive aliasing assumptions is used.
24 //
25 // Following are the top level steps:
26 //
27 // a) Perform LoopVersioningLICM's feasibility check.
28 // b) If loop is a candidate for versioning then create a memory bound check,
29 //    by considering all the memory accesses in loop body.
30 // c) Clone original loop and set all memory accesses as no-alias in new loop.
31 // d) Set original loop & versioned loop as a branch target of the runtime check
32 //    result.
33 //
34 // It transforms loop as shown below:
35 //
36 //                         +----------------+
37 //                         |Runtime Memcheck|
38 //                         +----------------+
39 //                                 |
40 //              +----------+----------------+----------+
41 //              |                                      |
42 //    +---------+----------+               +-----------+----------+
43 //    |Orig Loop Preheader |               |Cloned Loop Preheader |
44 //    +--------------------+               +----------------------+
45 //              |                                      |
46 //    +--------------------+               +----------------------+
47 //    |Orig Loop Body      |               |Cloned Loop Body      |
48 //    +--------------------+               +----------------------+
49 //              |                                      |
50 //    +--------------------+               +----------------------+
51 //    |Orig Loop Exit Block|               |Cloned Loop Exit Block|
52 //    +--------------------+               +-----------+----------+
53 //              |                                      |
54 //              +----------+--------------+-----------+
55 //                                 |
56 //                           +-----+----+
57 //                           |Join Block|
58 //                           +----------+
59 //
60 //===----------------------------------------------------------------------===//
61 
62 #include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
63 #include "llvm/ADT/SmallVector.h"
64 #include "llvm/ADT/StringRef.h"
65 #include "llvm/Analysis/AliasAnalysis.h"
66 #include "llvm/Analysis/AliasSetTracker.h"
67 #include "llvm/Analysis/GlobalsModRef.h"
68 #include "llvm/Analysis/LoopAccessAnalysis.h"
69 #include "llvm/Analysis/LoopInfo.h"
70 #include "llvm/Analysis/LoopPass.h"
71 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
72 #include "llvm/Analysis/ScalarEvolution.h"
73 #include "llvm/IR/Dominators.h"
74 #include "llvm/IR/Instruction.h"
75 #include "llvm/IR/Instructions.h"
76 #include "llvm/IR/LLVMContext.h"
77 #include "llvm/IR/MDBuilder.h"
78 #include "llvm/IR/Metadata.h"
79 #include "llvm/IR/Value.h"
80 #include "llvm/Support/Casting.h"
81 #include "llvm/Support/CommandLine.h"
82 #include "llvm/Support/Debug.h"
83 #include "llvm/Support/raw_ostream.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/LoopVersioning.h"
86 #include <cassert>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "loop-versioning-licm"
91 
92 static const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable";
93 
94 /// Threshold minimum allowed percentage for possible
95 /// invariant instructions in a loop.
96 static cl::opt<float>
97     LVInvarThreshold("licm-versioning-invariant-threshold",
98                      cl::desc("LoopVersioningLICM's minimum allowed percentage "
99                               "of possible invariant instructions per loop"),
100                      cl::init(25), cl::Hidden);
101 
102 /// Threshold for maximum allowed loop nest/depth
103 static cl::opt<unsigned> LVLoopDepthThreshold(
104     "licm-versioning-max-depth-threshold",
105     cl::desc(
106         "LoopVersioningLICM's threshold for maximum allowed loop nest/depth"),
107     cl::init(2), cl::Hidden);
108 
109 namespace {
110 
111 struct LoopVersioningLICM {
112   // We don't explicitly pass in LoopAccessInfo to the constructor since the
113   // loop versioning might return early due to instructions that are not safe
114   // for versioning. By passing the proxy instead the construction of
115   // LoopAccessInfo will take place only when it's necessary.
116   LoopVersioningLICM(AliasAnalysis *AA, ScalarEvolution *SE,
117                      OptimizationRemarkEmitter *ORE,
118                      LoopAccessInfoManager &LAIs, LoopInfo &LI,
119                      Loop *CurLoop)
120       : AA(AA), SE(SE), LAIs(LAIs), LI(LI), CurLoop(CurLoop),
121         LoopDepthThreshold(LVLoopDepthThreshold),
122         InvariantThreshold(LVInvarThreshold), ORE(ORE) {}
123 
124   bool run(DominatorTree *DT);
125 
126 private:
127   // Current AliasAnalysis information
128   AliasAnalysis *AA;
129 
130   // Current ScalarEvolution
131   ScalarEvolution *SE;
132 
133   // Current Loop's LoopAccessInfo
134   const LoopAccessInfo *LAI = nullptr;
135 
136   // Proxy for retrieving LoopAccessInfo.
137   LoopAccessInfoManager &LAIs;
138 
139   LoopInfo &LI;
140 
141   // The current loop we are working on.
142   Loop *CurLoop;
143 
144   // Maximum loop nest threshold
145   unsigned LoopDepthThreshold;
146 
147   // Minimum invariant threshold
148   float InvariantThreshold;
149 
150   // Counter to track num of load & store
151   unsigned LoadAndStoreCounter = 0;
152 
153   // Counter to track num of invariant
154   unsigned InvariantCounter = 0;
155 
156   // Read only loop marker.
157   bool IsReadOnlyLoop = true;
158 
159   // OptimizationRemarkEmitter
160   OptimizationRemarkEmitter *ORE;
161 
162   bool isLegalForVersioning();
163   bool legalLoopStructure();
164   bool legalLoopInstructions();
165   bool legalLoopMemoryAccesses();
166   bool isLoopAlreadyVisited();
167   bool instructionSafeForVersioning(Instruction *I);
168 };
169 
170 } // end anonymous namespace
171 
172 /// Check loop structure and confirms it's good for LoopVersioningLICM.
173 bool LoopVersioningLICM::legalLoopStructure() {
174   // Loop must be in loop simplify form.
175   if (!CurLoop->isLoopSimplifyForm()) {
176     LLVM_DEBUG(dbgs() << "    loop is not in loop-simplify form.\n");
177     return false;
178   }
179   // Loop should be innermost loop, if not return false.
180   if (!CurLoop->getSubLoops().empty()) {
181     LLVM_DEBUG(dbgs() << "    loop is not innermost\n");
182     return false;
183   }
184   // Loop should have a single backedge, if not return false.
185   if (CurLoop->getNumBackEdges() != 1) {
186     LLVM_DEBUG(dbgs() << "    loop has multiple backedges\n");
187     return false;
188   }
189   // Loop must have a single exiting block, if not return false.
190   if (!CurLoop->getExitingBlock()) {
191     LLVM_DEBUG(dbgs() << "    loop has multiple exiting block\n");
192     return false;
193   }
194   // We only handle bottom-tested loop, i.e. loop in which the condition is
195   // checked at the end of each iteration. With that we can assume that all
196   // instructions in the loop are executed the same number of times.
197   if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) {
198     LLVM_DEBUG(dbgs() << "    loop is not bottom tested\n");
199     return false;
200   }
201   // Parallel loops must not have aliasing loop-invariant memory accesses.
202   // Hence we don't need to version anything in this case.
203   if (CurLoop->isAnnotatedParallel()) {
204     LLVM_DEBUG(dbgs() << "    Parallel loop is not worth versioning\n");
205     return false;
206   }
207   // Loop depth more then LoopDepthThreshold are not allowed
208   if (CurLoop->getLoopDepth() > LoopDepthThreshold) {
209     LLVM_DEBUG(dbgs() << "    loop depth is more than threshold\n");
210     return false;
211   }
212   // We need to be able to compute the loop trip count in order
213   // to generate the bound checks.
214   const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop);
215   if (isa<SCEVCouldNotCompute>(ExitCount)) {
216     LLVM_DEBUG(dbgs() << "    loop does not have trip count\n");
217     return false;
218   }
219   return true;
220 }
221 
222 /// Check memory accesses in loop and confirms it's good for
223 /// LoopVersioningLICM.
224 bool LoopVersioningLICM::legalLoopMemoryAccesses() {
225   // Loop over the body of this loop, construct AST.
226   BatchAAResults BAA(*AA);
227   AliasSetTracker AST(BAA);
228   for (auto *Block : CurLoop->getBlocks()) {
229     // Ignore blocks in subloops.
230     if (LI.getLoopFor(Block) == CurLoop)
231       AST.add(*Block);
232   }
233 
234   // Memory check:
235   // Transform phase will generate a versioned loop and also a runtime check to
236   // ensure the pointers are independent and they don’t alias.
237   // In version variant of loop, alias meta data asserts that all access are
238   // mutually independent.
239   //
240   // Pointers aliasing in alias domain are avoided because with multiple
241   // aliasing domains we may not be able to hoist potential loop invariant
242   // access out of the loop.
243   //
244   // Iterate over alias tracker sets, and confirm AliasSets doesn't have any
245   // must alias set.
246   bool HasMayAlias = false;
247   bool TypeSafety = false;
248   bool HasMod = false;
249   for (const auto &I : AST) {
250     const AliasSet &AS = I;
251     // Skip Forward Alias Sets, as this should be ignored as part of
252     // the AliasSetTracker object.
253     if (AS.isForwardingAliasSet())
254       continue;
255     // With MustAlias its not worth adding runtime bound check.
256     if (AS.isMustAlias())
257       return false;
258     const Value *SomePtr = AS.begin()->Ptr;
259     bool TypeCheck = true;
260     // Check for Mod & MayAlias
261     HasMayAlias |= AS.isMayAlias();
262     HasMod |= AS.isMod();
263     for (const auto &MemLoc : AS) {
264       const Value *Ptr = MemLoc.Ptr;
265       // Alias tracker should have pointers of same data type.
266       //
267       // FIXME: check no longer effective since opaque pointers?
268       // If the intent is to check that the memory accesses use the
269       // same data type (such that LICM can promote them), then we
270       // can no longer see this from the pointer value types.
271       TypeCheck = (TypeCheck && (SomePtr->getType() == Ptr->getType()));
272     }
273     // At least one alias tracker should have pointers of same data type.
274     TypeSafety |= TypeCheck;
275   }
276   // Ensure types should be of same type.
277   if (!TypeSafety) {
278     LLVM_DEBUG(dbgs() << "    Alias tracker type safety failed!\n");
279     return false;
280   }
281   // Ensure loop body shouldn't be read only.
282   if (!HasMod) {
283     LLVM_DEBUG(dbgs() << "    No memory modified in loop body\n");
284     return false;
285   }
286   // Make sure alias set has may alias case.
287   // If there no alias memory ambiguity, return false.
288   if (!HasMayAlias) {
289     LLVM_DEBUG(dbgs() << "    No ambiguity in memory access.\n");
290     return false;
291   }
292   return true;
293 }
294 
295 /// Check loop instructions safe for Loop versioning.
296 /// It returns true if it's safe else returns false.
297 /// Consider following:
298 /// 1) Check all load store in loop body are non atomic & non volatile.
299 /// 2) Check function call safety, by ensuring its not accessing memory.
300 /// 3) Loop body shouldn't have any may throw instruction.
301 /// 4) Loop body shouldn't have any convergent or noduplicate instructions.
302 bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
303   assert(I != nullptr && "Null instruction found!");
304   // Check function call safety
305   if (auto *Call = dyn_cast<CallBase>(I)) {
306     if (Call->isConvergent() || Call->cannotDuplicate()) {
307       LLVM_DEBUG(dbgs() << "    Convergent call site found.\n");
308       return false;
309     }
310 
311     if (!AA->doesNotAccessMemory(Call)) {
312       LLVM_DEBUG(dbgs() << "    Unsafe call site found.\n");
313       return false;
314     }
315   }
316 
317   // Avoid loops with possiblity of throw
318   if (I->mayThrow()) {
319     LLVM_DEBUG(dbgs() << "    May throw instruction found in loop body\n");
320     return false;
321   }
322   // If current instruction is load instructions
323   // make sure it's a simple load (non atomic & non volatile)
324   if (I->mayReadFromMemory()) {
325     LoadInst *Ld = dyn_cast<LoadInst>(I);
326     if (!Ld || !Ld->isSimple()) {
327       LLVM_DEBUG(dbgs() << "    Found a non-simple load.\n");
328       return false;
329     }
330     LoadAndStoreCounter++;
331     Value *Ptr = Ld->getPointerOperand();
332     // Check loop invariant.
333     if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
334       InvariantCounter++;
335   }
336   // If current instruction is store instruction
337   // make sure it's a simple store (non atomic & non volatile)
338   else if (I->mayWriteToMemory()) {
339     StoreInst *St = dyn_cast<StoreInst>(I);
340     if (!St || !St->isSimple()) {
341       LLVM_DEBUG(dbgs() << "    Found a non-simple store.\n");
342       return false;
343     }
344     LoadAndStoreCounter++;
345     Value *Ptr = St->getPointerOperand();
346     // Don't allow stores that we don't have runtime checks for, as we won't be
347     // able to mark them noalias meaning they would prevent any code motion.
348     auto &Pointers = LAI->getRuntimePointerChecking()->Pointers;
349     if (!any_of(Pointers, [&](auto &P) { return P.PointerValue == Ptr; })) {
350       LLVM_DEBUG(dbgs() << "    Found a store without a runtime check.\n");
351       return false;
352     }
353     // Check loop invariant.
354     if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop))
355       InvariantCounter++;
356 
357     IsReadOnlyLoop = false;
358   }
359   return true;
360 }
361 
362 /// Check loop instructions and confirms it's good for
363 /// LoopVersioningLICM.
364 bool LoopVersioningLICM::legalLoopInstructions() {
365   // Resetting counters.
366   LoadAndStoreCounter = 0;
367   InvariantCounter = 0;
368   IsReadOnlyLoop = true;
369   using namespace ore;
370   // Get LoopAccessInfo from current loop via the proxy.
371   LAI = &LAIs.getInfo(*CurLoop, /*AllowPartial=*/true);
372   // Check LoopAccessInfo for need of runtime check.
373   if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
374     LLVM_DEBUG(dbgs() << "    LAA: Runtime check not found !!\n");
375     return false;
376   }
377   // Iterate over loop blocks and instructions of each block and check
378   // instruction safety.
379   for (auto *Block : CurLoop->getBlocks())
380     for (auto &Inst : *Block) {
381       // If instruction is unsafe just return false.
382       if (!instructionSafeForVersioning(&Inst)) {
383         ORE->emit([&]() {
384           return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopInst", &Inst)
385                  << " Unsafe Loop Instruction";
386         });
387         return false;
388       }
389     }
390   // Number of runtime-checks should be less then RuntimeMemoryCheckThreshold
391   if (LAI->getNumRuntimePointerChecks() >
392       VectorizerParams::RuntimeMemoryCheckThreshold) {
393     LLVM_DEBUG(
394         dbgs() << "    LAA: Runtime checks are more than threshold !!\n");
395     ORE->emit([&]() {
396       return OptimizationRemarkMissed(DEBUG_TYPE, "RuntimeCheck",
397                                       CurLoop->getStartLoc(),
398                                       CurLoop->getHeader())
399              << "Number of runtime checks "
400              << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks())
401              << " exceeds threshold "
402              << NV("Threshold", VectorizerParams::RuntimeMemoryCheckThreshold);
403     });
404     return false;
405   }
406   // Loop should have at least one invariant load or store instruction.
407   if (!InvariantCounter) {
408     LLVM_DEBUG(dbgs() << "    Invariant not found !!\n");
409     return false;
410   }
411   // Read only loop not allowed.
412   if (IsReadOnlyLoop) {
413     LLVM_DEBUG(dbgs() << "    Found a read-only loop!\n");
414     return false;
415   }
416   // Profitablity check:
417   // Check invariant threshold, should be in limit.
418   if (InvariantCounter * 100 < InvariantThreshold * LoadAndStoreCounter) {
419     LLVM_DEBUG(
420         dbgs()
421         << "    Invariant load & store are less then defined threshold\n");
422     LLVM_DEBUG(dbgs() << "    Invariant loads & stores: "
423                       << ((InvariantCounter * 100) / LoadAndStoreCounter)
424                       << "%\n");
425     LLVM_DEBUG(dbgs() << "    Invariant loads & store threshold: "
426                       << InvariantThreshold << "%\n");
427     ORE->emit([&]() {
428       return OptimizationRemarkMissed(DEBUG_TYPE, "InvariantThreshold",
429                                       CurLoop->getStartLoc(),
430                                       CurLoop->getHeader())
431              << "Invariant load & store "
432              << NV("LoadAndStoreCounter",
433                    ((InvariantCounter * 100) / LoadAndStoreCounter))
434              << " are less then defined threshold "
435              << NV("Threshold", InvariantThreshold);
436     });
437     return false;
438   }
439   return true;
440 }
441 
442 /// It checks loop is already visited or not.
443 /// check loop meta data, if loop revisited return true
444 /// else false.
445 bool LoopVersioningLICM::isLoopAlreadyVisited() {
446   // Check LoopVersioningLICM metadata into loop
447   if (findStringMetadataForLoop(CurLoop, LICMVersioningMetaData)) {
448     return true;
449   }
450   return false;
451 }
452 
453 /// Checks legality for LoopVersioningLICM by considering following:
454 /// a) loop structure legality   b) loop instruction legality
455 /// c) loop memory access legality.
456 /// Return true if legal else returns false.
457 bool LoopVersioningLICM::isLegalForVersioning() {
458   using namespace ore;
459   LLVM_DEBUG(dbgs() << "Loop: " << *CurLoop);
460   // Make sure not re-visiting same loop again.
461   if (isLoopAlreadyVisited()) {
462     LLVM_DEBUG(
463         dbgs() << "    Revisiting loop in LoopVersioningLICM not allowed.\n\n");
464     return false;
465   }
466   // Check loop structure leagality.
467   if (!legalLoopStructure()) {
468     LLVM_DEBUG(
469         dbgs() << "    Loop structure not suitable for LoopVersioningLICM\n\n");
470     ORE->emit([&]() {
471       return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopStruct",
472                                       CurLoop->getStartLoc(),
473                                       CurLoop->getHeader())
474              << " Unsafe Loop structure";
475     });
476     return false;
477   }
478   // Check loop instruction leagality.
479   if (!legalLoopInstructions()) {
480     LLVM_DEBUG(
481         dbgs()
482         << "    Loop instructions not suitable for LoopVersioningLICM\n\n");
483     return false;
484   }
485   // Check loop memory access leagality.
486   if (!legalLoopMemoryAccesses()) {
487     LLVM_DEBUG(
488         dbgs()
489         << "    Loop memory access not suitable for LoopVersioningLICM\n\n");
490     ORE->emit([&]() {
491       return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopMemoryAccess",
492                                       CurLoop->getStartLoc(),
493                                       CurLoop->getHeader())
494              << " Unsafe Loop memory access";
495     });
496     return false;
497   }
498   // Loop versioning is feasible, return true.
499   LLVM_DEBUG(dbgs() << "    Loop Versioning found to be beneficial\n\n");
500   ORE->emit([&]() {
501     return OptimizationRemark(DEBUG_TYPE, "IsLegalForVersioning",
502                               CurLoop->getStartLoc(), CurLoop->getHeader())
503            << " Versioned loop for LICM."
504            << " Number of runtime checks we had to insert "
505            << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks());
506   });
507   return true;
508 }
509 
510 bool LoopVersioningLICM::run(DominatorTree *DT) {
511   // Do not do the transformation if disabled by metadata.
512   if (hasLICMVersioningTransformation(CurLoop) & TM_Disable)
513     return false;
514 
515   bool Changed = false;
516 
517   // Check feasiblity of LoopVersioningLICM.
518   // If versioning found to be feasible and beneficial then proceed
519   // else simply return, by cleaning up memory.
520   if (isLegalForVersioning()) {
521     // Do loop versioning.
522     // Create memcheck for memory accessed inside loop.
523     // Clone original loop, and set blocks properly.
524     LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(),
525                         CurLoop, &LI, DT, SE);
526     LVer.versionLoop();
527     // Set Loop Versioning metaData for original loop.
528     addStringMetadataToLoop(LVer.getNonVersionedLoop(), LICMVersioningMetaData);
529     // Set Loop Versioning metaData for version loop.
530     addStringMetadataToLoop(LVer.getVersionedLoop(), LICMVersioningMetaData);
531     // Set "llvm.mem.parallel_loop_access" metaData to versioned loop.
532     // FIXME: "llvm.mem.parallel_loop_access" annotates memory access
533     // instructions, not loops.
534     addStringMetadataToLoop(LVer.getVersionedLoop(),
535                             "llvm.mem.parallel_loop_access");
536     // Update version loop with aggressive aliasing assumption.
537     LVer.annotateLoopWithNoAlias();
538     Changed = true;
539   }
540   return Changed;
541 }
542 
543 namespace llvm {
544 
545 PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
546                                               LoopStandardAnalysisResults &LAR,
547                                               LPMUpdater &U) {
548   AliasAnalysis *AA = &LAR.AA;
549   ScalarEvolution *SE = &LAR.SE;
550   DominatorTree *DT = &LAR.DT;
551   const Function *F = L.getHeader()->getParent();
552   OptimizationRemarkEmitter ORE(F);
553 
554   LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr);
555   if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT))
556     return PreservedAnalyses::all();
557   return getLoopPassPreservedAnalyses();
558 }
559 } // namespace llvm
560