xref: /freebsd/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- Construction of pass pipelines -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file provides the implementation of the PassBuilder based on our
11 /// static pass registry as well as related functionality. It also provides
12 /// helpers to aid in analyzing, debugging, and testing passes and pass
13 /// pipelines.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/BasicAliasAnalysis.h"
20 #include "llvm/Analysis/CGSCCPassManager.h"
21 #include "llvm/Analysis/CtxProfAnalysis.h"
22 #include "llvm/Analysis/GlobalsModRef.h"
23 #include "llvm/Analysis/InlineAdvisor.h"
24 #include "llvm/Analysis/ProfileSummaryInfo.h"
25 #include "llvm/Analysis/ScopedNoAliasAA.h"
26 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/Pass.h"
29 #include "llvm/Passes/OptimizationLevel.h"
30 #include "llvm/Passes/PassBuilder.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/PGOOptions.h"
34 #include "llvm/Support/VirtualFileSystem.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
37 #include "llvm/Transforms/Coroutines/CoroAnnotationElide.h"
38 #include "llvm/Transforms/Coroutines/CoroCleanup.h"
39 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
40 #include "llvm/Transforms/Coroutines/CoroEarly.h"
41 #include "llvm/Transforms/Coroutines/CoroElide.h"
42 #include "llvm/Transforms/Coroutines/CoroSplit.h"
43 #include "llvm/Transforms/HipStdPar/HipStdPar.h"
44 #include "llvm/Transforms/IPO/AlwaysInliner.h"
45 #include "llvm/Transforms/IPO/Annotation2Metadata.h"
46 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
47 #include "llvm/Transforms/IPO/Attributor.h"
48 #include "llvm/Transforms/IPO/CalledValuePropagation.h"
49 #include "llvm/Transforms/IPO/ConstantMerge.h"
50 #include "llvm/Transforms/IPO/CrossDSOCFI.h"
51 #include "llvm/Transforms/IPO/DeadArgumentElimination.h"
52 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
53 #include "llvm/Transforms/IPO/EmbedBitcodePass.h"
54 #include "llvm/Transforms/IPO/ExpandVariadics.h"
55 #include "llvm/Transforms/IPO/FatLTOCleanup.h"
56 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
57 #include "llvm/Transforms/IPO/FunctionAttrs.h"
58 #include "llvm/Transforms/IPO/GlobalDCE.h"
59 #include "llvm/Transforms/IPO/GlobalOpt.h"
60 #include "llvm/Transforms/IPO/GlobalSplit.h"
61 #include "llvm/Transforms/IPO/HotColdSplitting.h"
62 #include "llvm/Transforms/IPO/IROutliner.h"
63 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
64 #include "llvm/Transforms/IPO/Inliner.h"
65 #include "llvm/Transforms/IPO/LowerTypeTests.h"
66 #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
67 #include "llvm/Transforms/IPO/MergeFunctions.h"
68 #include "llvm/Transforms/IPO/ModuleInliner.h"
69 #include "llvm/Transforms/IPO/OpenMPOpt.h"
70 #include "llvm/Transforms/IPO/PartialInlining.h"
71 #include "llvm/Transforms/IPO/SCCP.h"
72 #include "llvm/Transforms/IPO/SampleProfile.h"
73 #include "llvm/Transforms/IPO/SampleProfileProbe.h"
74 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
75 #include "llvm/Transforms/InstCombine/InstCombine.h"
76 #include "llvm/Transforms/Instrumentation/CGProfile.h"
77 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
78 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
79 #include "llvm/Transforms/Instrumentation/MemProfInstrumentation.h"
80 #include "llvm/Transforms/Instrumentation/MemProfUse.h"
81 #include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
82 #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
83 #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
84 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
85 #include "llvm/Transforms/Scalar/ADCE.h"
86 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
87 #include "llvm/Transforms/Scalar/AnnotationRemarks.h"
88 #include "llvm/Transforms/Scalar/BDCE.h"
89 #include "llvm/Transforms/Scalar/CallSiteSplitting.h"
90 #include "llvm/Transforms/Scalar/ConstraintElimination.h"
91 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
92 #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
93 #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
94 #include "llvm/Transforms/Scalar/DivRemPairs.h"
95 #include "llvm/Transforms/Scalar/EarlyCSE.h"
96 #include "llvm/Transforms/Scalar/Float2Int.h"
97 #include "llvm/Transforms/Scalar/GVN.h"
98 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
99 #include "llvm/Transforms/Scalar/InferAlignment.h"
100 #include "llvm/Transforms/Scalar/InstSimplifyPass.h"
101 #include "llvm/Transforms/Scalar/JumpTableToSwitch.h"
102 #include "llvm/Transforms/Scalar/JumpThreading.h"
103 #include "llvm/Transforms/Scalar/LICM.h"
104 #include "llvm/Transforms/Scalar/LoopDeletion.h"
105 #include "llvm/Transforms/Scalar/LoopDistribute.h"
106 #include "llvm/Transforms/Scalar/LoopFlatten.h"
107 #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
108 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
109 #include "llvm/Transforms/Scalar/LoopInterchange.h"
110 #include "llvm/Transforms/Scalar/LoopLoadElimination.h"
111 #include "llvm/Transforms/Scalar/LoopPassManager.h"
112 #include "llvm/Transforms/Scalar/LoopRotation.h"
113 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
114 #include "llvm/Transforms/Scalar/LoopSink.h"
115 #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
116 #include "llvm/Transforms/Scalar/LoopUnrollPass.h"
117 #include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
118 #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
119 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
120 #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
121 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
122 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
123 #include "llvm/Transforms/Scalar/NewGVN.h"
124 #include "llvm/Transforms/Scalar/Reassociate.h"
125 #include "llvm/Transforms/Scalar/SCCP.h"
126 #include "llvm/Transforms/Scalar/SROA.h"
127 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
128 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
129 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
130 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
131 #include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
132 #include "llvm/Transforms/Utils/AddDiscriminators.h"
133 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
134 #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
135 #include "llvm/Transforms/Utils/CountVisits.h"
136 #include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
137 #include "llvm/Transforms/Utils/ExtraPassManager.h"
138 #include "llvm/Transforms/Utils/InjectTLIMappings.h"
139 #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
140 #include "llvm/Transforms/Utils/Mem2Reg.h"
141 #include "llvm/Transforms/Utils/MoveAutoInit.h"
142 #include "llvm/Transforms/Utils/NameAnonGlobals.h"
143 #include "llvm/Transforms/Utils/RelLookupTableConverter.h"
144 #include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
145 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
146 #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
147 #include "llvm/Transforms/Vectorize/VectorCombine.h"
148 
149 using namespace llvm;
150 
151 static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
152     "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
153     cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
154     cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
155                           "Heuristics-based inliner version"),
156                clEnumValN(InliningAdvisorMode::Development, "development",
157                           "Use development mode (runtime-loadable model)"),
158                clEnumValN(InliningAdvisorMode::Release, "release",
159                           "Use release mode (AOT-compiled model)")));
160 
161 /// Flag to enable inline deferral during PGO.
162 static cl::opt<bool>
163     EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
164                             cl::Hidden,
165                             cl::desc("Enable inline deferral during PGO"));
166 
167 static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
168                                          cl::init(false), cl::Hidden,
169                                          cl::desc("Enable module inliner"));
170 
171 static cl::opt<bool> PerformMandatoryInliningsFirst(
172     "mandatory-inlining-first", cl::init(false), cl::Hidden,
173     cl::desc("Perform mandatory inlinings module-wide, before performing "
174              "inlining"));
175 
176 static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
177     "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
178     cl::desc("Eagerly invalidate more analyses in default pipelines"));
179 
180 static cl::opt<bool> EnableMergeFunctions(
181     "enable-merge-functions", cl::init(false), cl::Hidden,
182     cl::desc("Enable function merging as part of the optimization pipeline"));
183 
184 static cl::opt<bool> EnablePostPGOLoopRotation(
185     "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
186     cl::desc("Run the loop rotation transformation after PGO instrumentation"));
187 
188 static cl::opt<bool> EnableGlobalAnalyses(
189     "enable-global-analyses", cl::init(true), cl::Hidden,
190     cl::desc("Enable inter-procedural analyses"));
191 
192 static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
193                                         cl::init(false), cl::Hidden,
194                                         cl::desc("Run Partial inlining pass"));
195 
196 static cl::opt<bool> ExtraVectorizerPasses(
197     "extra-vectorizer-passes", cl::init(false), cl::Hidden,
198     cl::desc("Run cleanup optimization passes after vectorization"));
199 
200 static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
201                                cl::desc("Run the NewGVN pass"));
202 
203 static cl::opt<bool>
204     EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
205                           cl::desc("Enable the LoopInterchange Pass"));
206 
207 static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
208                                         cl::init(false), cl::Hidden,
209                                         cl::desc("Enable Unroll And Jam Pass"));
210 
211 static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
212                                        cl::Hidden,
213                                        cl::desc("Enable the LoopFlatten Pass"));
214 
215 // Experimentally allow loop header duplication. This should allow for better
216 // optimization at Oz, since loop-idiom recognition can then recognize things
217 // like memcpy. If this ends up being useful for many targets, we should drop
218 // this flag and make a code generation option that can be controlled
219 // independent of the opt level and exposed through the frontend.
220 static cl::opt<bool> EnableLoopHeaderDuplication(
221     "enable-loop-header-duplication", cl::init(false), cl::Hidden,
222     cl::desc("Enable loop header duplication at any optimization level"));
223 
224 static cl::opt<bool>
225     EnableDFAJumpThreading("enable-dfa-jump-thread",
226                            cl::desc("Enable DFA jump threading"),
227                            cl::init(false), cl::Hidden);
228 
229 static cl::opt<bool>
230     EnableHotColdSplit("hot-cold-split",
231                        cl::desc("Enable hot-cold splitting pass"));
232 
233 static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
234                                       cl::Hidden,
235                                       cl::desc("Enable ir outliner pass"));
236 
237 static cl::opt<bool>
238     DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
239                       cl::desc("Disable pre-instrumentation inliner"));
240 
241 static cl::opt<int> PreInlineThreshold(
242     "preinline-threshold", cl::Hidden, cl::init(75),
243     cl::desc("Control the amount of inlining in pre-instrumentation inliner "
244              "(default = 75)"));
245 
246 static cl::opt<bool>
247     EnableGVNHoist("enable-gvn-hoist",
248                    cl::desc("Enable the GVN hoisting pass (default = off)"));
249 
250 static cl::opt<bool>
251     EnableGVNSink("enable-gvn-sink",
252                   cl::desc("Enable the GVN sinking pass (default = off)"));
253 
254 static cl::opt<bool> EnableJumpTableToSwitch(
255     "enable-jump-table-to-switch",
256     cl::desc("Enable JumpTableToSwitch pass (default = off)"));
257 
258 // This option is used in simplifying testing SampleFDO optimizations for
259 // profile loading.
260 static cl::opt<bool>
261     EnableCHR("enable-chr", cl::init(true), cl::Hidden,
262               cl::desc("Enable control height reduction optimization (CHR)"));
263 
264 static cl::opt<bool> FlattenedProfileUsed(
265     "flattened-profile-used", cl::init(false), cl::Hidden,
266     cl::desc("Indicate the sample profile being used is flattened, i.e., "
267              "no inline hierarchy exists in the profile"));
268 
269 static cl::opt<bool>
270     EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
271                  cl::desc("Enable lowering of the matrix intrinsics"));
272 
273 static cl::opt<bool> EnableConstraintElimination(
274     "enable-constraint-elimination", cl::init(true), cl::Hidden,
275     cl::desc(
276         "Enable pass to eliminate conditions based on linear constraints"));
277 
278 static cl::opt<AttributorRunOption> AttributorRun(
279     "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
280     cl::desc("Enable the attributor inter-procedural deduction pass"),
281     cl::values(clEnumValN(AttributorRunOption::ALL, "all",
282                           "enable all attributor runs"),
283                clEnumValN(AttributorRunOption::MODULE, "module",
284                           "enable module-wide attributor runs"),
285                clEnumValN(AttributorRunOption::CGSCC, "cgscc",
286                           "enable call graph SCC attributor runs"),
287                clEnumValN(AttributorRunOption::NONE, "none",
288                           "disable attributor runs")));
289 
290 static cl::opt<bool> EnableSampledInstr(
291     "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
292     cl::desc("Enable profile instrumentation sampling (default = off)"));
293 static cl::opt<bool> UseLoopVersioningLICM(
294     "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
295     cl::desc("Enable the experimental Loop Versioning LICM pass"));
296 
297 static cl::opt<std::string> InstrumentColdFuncOnlyPath(
298     "instrument-cold-function-only-path", cl::init(""),
299     cl::desc("File path for cold function only instrumentation(requires use "
300              "with --pgo-instrument-cold-function-only)"),
301     cl::Hidden);
302 
303 extern cl::opt<std::string> UseCtxProfile;
304 extern cl::opt<bool> PGOInstrumentColdFunctionOnly;
305 
306 namespace llvm {
307 extern cl::opt<bool> EnableMemProfContextDisambiguation;
308 } // namespace llvm
309 
PipelineTuningOptions()310 PipelineTuningOptions::PipelineTuningOptions() {
311   LoopInterleaving = true;
312   LoopVectorization = true;
313   SLPVectorization = false;
314   LoopUnrolling = true;
315   LoopInterchange = EnableLoopInterchange;
316   ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
317   LicmMssaOptCap = SetLicmMssaOptCap;
318   LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
319   CallGraphProfile = true;
320   UnifiedLTO = false;
321   MergeFunctions = EnableMergeFunctions;
322   InlinerThreshold = -1;
323   EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
324 }
325 
326 namespace llvm {
327 extern cl::opt<unsigned> MaxDevirtIterations;
328 } // namespace llvm
329 
invokePeepholeEPCallbacks(FunctionPassManager & FPM,OptimizationLevel Level)330 void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
331                                             OptimizationLevel Level) {
332   for (auto &C : PeepholeEPCallbacks)
333     C(FPM, Level);
334 }
invokeLateLoopOptimizationsEPCallbacks(LoopPassManager & LPM,OptimizationLevel Level)335 void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(
336     LoopPassManager &LPM, OptimizationLevel Level) {
337   for (auto &C : LateLoopOptimizationsEPCallbacks)
338     C(LPM, Level);
339 }
invokeLoopOptimizerEndEPCallbacks(LoopPassManager & LPM,OptimizationLevel Level)340 void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
341                                                     OptimizationLevel Level) {
342   for (auto &C : LoopOptimizerEndEPCallbacks)
343     C(LPM, Level);
344 }
invokeScalarOptimizerLateEPCallbacks(FunctionPassManager & FPM,OptimizationLevel Level)345 void PassBuilder::invokeScalarOptimizerLateEPCallbacks(
346     FunctionPassManager &FPM, OptimizationLevel Level) {
347   for (auto &C : ScalarOptimizerLateEPCallbacks)
348     C(FPM, Level);
349 }
invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager & CGPM,OptimizationLevel Level)350 void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
351                                                       OptimizationLevel Level) {
352   for (auto &C : CGSCCOptimizerLateEPCallbacks)
353     C(CGPM, Level);
354 }
invokeVectorizerStartEPCallbacks(FunctionPassManager & FPM,OptimizationLevel Level)355 void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
356                                                    OptimizationLevel Level) {
357   for (auto &C : VectorizerStartEPCallbacks)
358     C(FPM, Level);
359 }
invokeVectorizerEndEPCallbacks(FunctionPassManager & FPM,OptimizationLevel Level)360 void PassBuilder::invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM,
361                                                  OptimizationLevel Level) {
362   for (auto &C : VectorizerEndEPCallbacks)
363     C(FPM, Level);
364 }
invokeOptimizerEarlyEPCallbacks(ModulePassManager & MPM,OptimizationLevel Level,ThinOrFullLTOPhase Phase)365 void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
366                                                   OptimizationLevel Level,
367                                                   ThinOrFullLTOPhase Phase) {
368   for (auto &C : OptimizerEarlyEPCallbacks)
369     C(MPM, Level, Phase);
370 }
invokeOptimizerLastEPCallbacks(ModulePassManager & MPM,OptimizationLevel Level,ThinOrFullLTOPhase Phase)371 void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
372                                                  OptimizationLevel Level,
373                                                  ThinOrFullLTOPhase Phase) {
374   for (auto &C : OptimizerLastEPCallbacks)
375     C(MPM, Level, Phase);
376 }
invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager & MPM,OptimizationLevel Level)377 void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
378     ModulePassManager &MPM, OptimizationLevel Level) {
379   for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
380     C(MPM, Level);
381 }
invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager & MPM,OptimizationLevel Level)382 void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(
383     ModulePassManager &MPM, OptimizationLevel Level) {
384   for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
385     C(MPM, Level);
386 }
invokePipelineStartEPCallbacks(ModulePassManager & MPM,OptimizationLevel Level)387 void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
388                                                  OptimizationLevel Level) {
389   for (auto &C : PipelineStartEPCallbacks)
390     C(MPM, Level);
391 }
invokePipelineEarlySimplificationEPCallbacks(ModulePassManager & MPM,OptimizationLevel Level,ThinOrFullLTOPhase Phase)392 void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
393     ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase) {
394   for (auto &C : PipelineEarlySimplificationEPCallbacks)
395     C(MPM, Level, Phase);
396 }
397 
398 // Helper to add AnnotationRemarksPass.
addAnnotationRemarksPass(ModulePassManager & MPM)399 static void addAnnotationRemarksPass(ModulePassManager &MPM) {
400   MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
401 }
402 
403 // Helper to check if the current compilation phase is preparing for LTO
isLTOPreLink(ThinOrFullLTOPhase Phase)404 static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
405   return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
406          Phase == ThinOrFullLTOPhase::FullLTOPreLink;
407 }
408 
409 // Helper to check if the current compilation phase is LTO backend
isLTOPostLink(ThinOrFullLTOPhase Phase)410 static bool isLTOPostLink(ThinOrFullLTOPhase Phase) {
411   return Phase == ThinOrFullLTOPhase::ThinLTOPostLink ||
412          Phase == ThinOrFullLTOPhase::FullLTOPostLink;
413 }
414 
415 // Helper to wrap conditionally Coro passes.
buildCoroWrapper(ThinOrFullLTOPhase Phase)416 static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase) {
417   // TODO: Skip passes according to Phase.
418   ModulePassManager CoroPM;
419   CoroPM.addPass(CoroEarlyPass());
420   CGSCCPassManager CGPM;
421   CGPM.addPass(CoroSplitPass());
422   CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
423   CoroPM.addPass(CoroCleanupPass());
424   CoroPM.addPass(GlobalDCEPass());
425   return CoroConditionalWrapper(std::move(CoroPM));
426 }
427 
428 // TODO: Investigate the cost/benefit of tail call elimination on debugging.
429 FunctionPassManager
buildO1FunctionSimplificationPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)430 PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
431                                                    ThinOrFullLTOPhase Phase) {
432 
433   FunctionPassManager FPM;
434 
435   if (AreStatisticsEnabled())
436     FPM.addPass(CountVisitsPass());
437 
438   // Form SSA out of local memory accesses after breaking apart aggregates into
439   // scalars.
440   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
441 
442   // Catch trivial redundancies
443   FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
444 
445   // Hoisting of scalars and load expressions.
446   FPM.addPass(
447       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
448   FPM.addPass(InstCombinePass());
449 
450   FPM.addPass(LibCallsShrinkWrapPass());
451 
452   invokePeepholeEPCallbacks(FPM, Level);
453 
454   FPM.addPass(
455       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
456 
457   // Form canonically associated expression trees, and simplify the trees using
458   // basic mathematical properties. For example, this will form (nearly)
459   // minimal multiplication trees.
460   FPM.addPass(ReassociatePass());
461 
462   // Add the primary loop simplification pipeline.
463   // FIXME: Currently this is split into two loop pass pipelines because we run
464   // some function passes in between them. These can and should be removed
465   // and/or replaced by scheduling the loop pass equivalents in the correct
466   // positions. But those equivalent passes aren't powerful enough yet.
467   // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
468   // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
469   // fully replace `SimplifyCFGPass`, and the closest to the other we have is
470   // `LoopInstSimplify`.
471   LoopPassManager LPM1, LPM2;
472 
473   // Simplify the loop body. We do this initially to clean up after other loop
474   // passes run, either when iterating on a loop or on inner loops with
475   // implications on the outer loop.
476   LPM1.addPass(LoopInstSimplifyPass());
477   LPM1.addPass(LoopSimplifyCFGPass());
478 
479   // Try to remove as much code from the loop header as possible,
480   // to reduce amount of IR that will have to be duplicated. However,
481   // do not perform speculative hoisting the first time as LICM
482   // will destroy metadata that may not need to be destroyed if run
483   // after loop rotation.
484   // TODO: Investigate promotion cap for O1.
485   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
486                         /*AllowSpeculation=*/false));
487 
488   LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
489                               isLTOPreLink(Phase)));
490   // TODO: Investigate promotion cap for O1.
491   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
492                         /*AllowSpeculation=*/true));
493   LPM1.addPass(SimpleLoopUnswitchPass());
494   if (EnableLoopFlatten)
495     LPM1.addPass(LoopFlattenPass());
496 
497   LPM2.addPass(LoopIdiomRecognizePass());
498   LPM2.addPass(IndVarSimplifyPass());
499 
500   invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
501 
502   LPM2.addPass(LoopDeletionPass());
503 
504   // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
505   // because it changes IR to makes profile annotation in back compile
506   // inaccurate. The normal unroller doesn't pay attention to forced full unroll
507   // attributes so we need to make sure and allow the full unroll pass to pay
508   // attention to it.
509   if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
510       PGOOpt->Action != PGOOptions::SampleUse)
511     LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
512                                     /* OnlyWhenForced= */ !PTO.LoopUnrolling,
513                                     PTO.ForgetAllSCEVInLoopUnroll));
514 
515   invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
516 
517   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
518                                               /*UseMemorySSA=*/true,
519                                               /*UseBlockFrequencyInfo=*/true));
520   FPM.addPass(
521       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
522   FPM.addPass(InstCombinePass());
523   // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
524   // *All* loop passes must preserve it, in order to be able to use it.
525   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
526                                               /*UseMemorySSA=*/false,
527                                               /*UseBlockFrequencyInfo=*/false));
528 
529   // Delete small array after loop unroll.
530   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
531 
532   // Specially optimize memory movement as it doesn't look like dataflow in SSA.
533   FPM.addPass(MemCpyOptPass());
534 
535   // Sparse conditional constant propagation.
536   // FIXME: It isn't clear why we do this *after* loop passes rather than
537   // before...
538   FPM.addPass(SCCPPass());
539 
540   // Delete dead bit computations (instcombine runs after to fold away the dead
541   // computations, and then ADCE will run later to exploit any new DCE
542   // opportunities that creates).
543   FPM.addPass(BDCEPass());
544 
545   // Run instcombine after redundancy and dead bit elimination to exploit
546   // opportunities opened up by them.
547   FPM.addPass(InstCombinePass());
548   invokePeepholeEPCallbacks(FPM, Level);
549 
550   FPM.addPass(CoroElidePass());
551 
552   invokeScalarOptimizerLateEPCallbacks(FPM, Level);
553 
554   // Finally, do an expensive DCE pass to catch all the dead code exposed by
555   // the simplifications and basic cleanup after all the simplifications.
556   // TODO: Investigate if this is too expensive.
557   FPM.addPass(ADCEPass());
558   FPM.addPass(
559       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
560   FPM.addPass(InstCombinePass());
561   invokePeepholeEPCallbacks(FPM, Level);
562 
563   return FPM;
564 }
565 
566 FunctionPassManager
buildFunctionSimplificationPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)567 PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
568                                                  ThinOrFullLTOPhase Phase) {
569   assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
570 
571   // The O1 pipeline has a separate pipeline creation function to simplify
572   // construction readability.
573   if (Level.getSpeedupLevel() == 1)
574     return buildO1FunctionSimplificationPipeline(Level, Phase);
575 
576   FunctionPassManager FPM;
577 
578   if (AreStatisticsEnabled())
579     FPM.addPass(CountVisitsPass());
580 
581   // Form SSA out of local memory accesses after breaking apart aggregates into
582   // scalars.
583   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
584 
585   // Catch trivial redundancies
586   FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
587   if (EnableKnowledgeRetention)
588     FPM.addPass(AssumeSimplifyPass());
589 
590   // Hoisting of scalars and load expressions.
591   if (EnableGVNHoist)
592     FPM.addPass(GVNHoistPass());
593 
594   // Global value numbering based sinking.
595   if (EnableGVNSink) {
596     FPM.addPass(GVNSinkPass());
597     FPM.addPass(
598         SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
599   }
600 
601   // Speculative execution if the target has divergent branches; otherwise nop.
602   FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
603 
604   // Optimize based on known information about branches, and cleanup afterward.
605   FPM.addPass(JumpThreadingPass());
606   FPM.addPass(CorrelatedValuePropagationPass());
607 
608   // Jump table to switch conversion.
609   if (EnableJumpTableToSwitch)
610     FPM.addPass(JumpTableToSwitchPass());
611 
612   FPM.addPass(
613       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
614   FPM.addPass(InstCombinePass());
615   FPM.addPass(AggressiveInstCombinePass());
616 
617   if (!Level.isOptimizingForSize())
618     FPM.addPass(LibCallsShrinkWrapPass());
619 
620   invokePeepholeEPCallbacks(FPM, Level);
621 
622   // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
623   // using the size value profile. Don't perform this when optimizing for size.
624   if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
625       !Level.isOptimizingForSize())
626     FPM.addPass(PGOMemOPSizeOpt());
627 
628   FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
629                                isInstrumentedPGOUse()));
630   FPM.addPass(
631       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
632 
633   // Form canonically associated expression trees, and simplify the trees using
634   // basic mathematical properties. For example, this will form (nearly)
635   // minimal multiplication trees.
636   FPM.addPass(ReassociatePass());
637 
638   if (EnableConstraintElimination)
639     FPM.addPass(ConstraintEliminationPass());
640 
641   // Add the primary loop simplification pipeline.
642   // FIXME: Currently this is split into two loop pass pipelines because we run
643   // some function passes in between them. These can and should be removed
644   // and/or replaced by scheduling the loop pass equivalents in the correct
645   // positions. But those equivalent passes aren't powerful enough yet.
646   // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
647   // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
648   // fully replace `SimplifyCFGPass`, and the closest to the other we have is
649   // `LoopInstSimplify`.
650   LoopPassManager LPM1, LPM2;
651 
652   // Simplify the loop body. We do this initially to clean up after other loop
653   // passes run, either when iterating on a loop or on inner loops with
654   // implications on the outer loop.
655   LPM1.addPass(LoopInstSimplifyPass());
656   LPM1.addPass(LoopSimplifyCFGPass());
657 
658   // Try to remove as much code from the loop header as possible,
659   // to reduce amount of IR that will have to be duplicated. However,
660   // do not perform speculative hoisting the first time as LICM
661   // will destroy metadata that may not need to be destroyed if run
662   // after loop rotation.
663   // TODO: Investigate promotion cap for O1.
664   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
665                         /*AllowSpeculation=*/false));
666 
667   // Disable header duplication in loop rotation at -Oz.
668   LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
669                                   Level != OptimizationLevel::Oz,
670                               isLTOPreLink(Phase)));
671   // TODO: Investigate promotion cap for O1.
672   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
673                         /*AllowSpeculation=*/true));
674   LPM1.addPass(
675       SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
676   if (EnableLoopFlatten)
677     LPM1.addPass(LoopFlattenPass());
678 
679   LPM2.addPass(LoopIdiomRecognizePass());
680   LPM2.addPass(IndVarSimplifyPass());
681 
682   {
683     ExtraLoopPassManager<ShouldRunExtraSimpleLoopUnswitch> ExtraPasses;
684     ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
685                                                OptimizationLevel::O3));
686     LPM2.addPass(std::move(ExtraPasses));
687   }
688 
689   invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
690 
691   LPM2.addPass(LoopDeletionPass());
692 
693   // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
694   // because it changes IR to makes profile annotation in back compile
695   // inaccurate. The normal unroller doesn't pay attention to forced full unroll
696   // attributes so we need to make sure and allow the full unroll pass to pay
697   // attention to it.
698   if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
699       PGOOpt->Action != PGOOptions::SampleUse)
700     LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
701                                     /* OnlyWhenForced= */ !PTO.LoopUnrolling,
702                                     PTO.ForgetAllSCEVInLoopUnroll));
703 
704   invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
705 
706   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
707                                               /*UseMemorySSA=*/true,
708                                               /*UseBlockFrequencyInfo=*/true));
709   FPM.addPass(
710       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
711   FPM.addPass(InstCombinePass());
712   // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
713   // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
714   // *All* loop passes must preserve it, in order to be able to use it.
715   FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
716                                               /*UseMemorySSA=*/false,
717                                               /*UseBlockFrequencyInfo=*/false));
718 
719   // Delete small array after loop unroll.
720   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
721 
722   // Try vectorization/scalarization transforms that are both improvements
723   // themselves and can allow further folds with GVN and InstCombine.
724   FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
725 
726   // Eliminate redundancies.
727   FPM.addPass(MergedLoadStoreMotionPass());
728   if (RunNewGVN)
729     FPM.addPass(NewGVNPass());
730   else
731     FPM.addPass(GVNPass());
732 
733   // Sparse conditional constant propagation.
734   // FIXME: It isn't clear why we do this *after* loop passes rather than
735   // before...
736   FPM.addPass(SCCPPass());
737 
738   // Delete dead bit computations (instcombine runs after to fold away the dead
739   // computations, and then ADCE will run later to exploit any new DCE
740   // opportunities that creates).
741   FPM.addPass(BDCEPass());
742 
743   // Run instcombine after redundancy and dead bit elimination to exploit
744   // opportunities opened up by them.
745   FPM.addPass(InstCombinePass());
746   invokePeepholeEPCallbacks(FPM, Level);
747 
748   // Re-consider control flow based optimizations after redundancy elimination,
749   // redo DCE, etc.
750   if (EnableDFAJumpThreading)
751     FPM.addPass(DFAJumpThreadingPass());
752 
753   FPM.addPass(JumpThreadingPass());
754   FPM.addPass(CorrelatedValuePropagationPass());
755 
756   // Finally, do an expensive DCE pass to catch all the dead code exposed by
757   // the simplifications and basic cleanup after all the simplifications.
758   // TODO: Investigate if this is too expensive.
759   FPM.addPass(ADCEPass());
760 
761   // Specially optimize memory movement as it doesn't look like dataflow in SSA.
762   FPM.addPass(MemCpyOptPass());
763 
764   FPM.addPass(DSEPass());
765   FPM.addPass(MoveAutoInitPass());
766 
767   FPM.addPass(createFunctionToLoopPassAdaptor(
768       LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
769                /*AllowSpeculation=*/true),
770       /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
771 
772   FPM.addPass(CoroElidePass());
773 
774   invokeScalarOptimizerLateEPCallbacks(FPM, Level);
775 
776   FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
777                                   .convertSwitchRangeToICmp(true)
778                                   .hoistCommonInsts(true)
779                                   .sinkCommonInsts(true)));
780   FPM.addPass(InstCombinePass());
781   invokePeepholeEPCallbacks(FPM, Level);
782 
783   return FPM;
784 }
785 
addRequiredLTOPreLinkPasses(ModulePassManager & MPM)786 void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
787   MPM.addPass(CanonicalizeAliasesPass());
788   MPM.addPass(NameAnonGlobalPass());
789 }
790 
addPreInlinerPasses(ModulePassManager & MPM,OptimizationLevel Level,ThinOrFullLTOPhase LTOPhase)791 void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
792                                       OptimizationLevel Level,
793                                       ThinOrFullLTOPhase LTOPhase) {
794   assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
795   if (DisablePreInliner)
796     return;
797   InlineParams IP;
798 
799   IP.DefaultThreshold = PreInlineThreshold;
800 
801   // FIXME: The hint threshold has the same value used by the regular inliner
802   // when not optimzing for size. This should probably be lowered after
803   // performance testing.
804   // FIXME: this comment is cargo culted from the old pass manager, revisit).
805   IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
806   ModuleInlinerWrapperPass MIWP(
807       IP, /* MandatoryFirst */ true,
808       InlineContext{LTOPhase, InlinePass::EarlyInliner});
809   CGSCCPassManager &CGPipeline = MIWP.getPM();
810 
811   FunctionPassManager FPM;
812   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
813   FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
814   FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
815       true)));                    // Merge & remove basic blocks.
816   FPM.addPass(InstCombinePass()); // Combine silly sequences.
817   invokePeepholeEPCallbacks(FPM, Level);
818 
819   CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
820       std::move(FPM), PTO.EagerlyInvalidateAnalyses));
821 
822   MPM.addPass(std::move(MIWP));
823 
824   // Delete anything that is now dead to make sure that we don't instrument
825   // dead code. Instrumentation can end up keeping dead code around and
826   // dramatically increase code size.
827   MPM.addPass(GlobalDCEPass());
828 }
829 
addPostPGOLoopRotation(ModulePassManager & MPM,OptimizationLevel Level)830 void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
831                                          OptimizationLevel Level) {
832   if (EnablePostPGOLoopRotation) {
833     // Disable header duplication in loop rotation at -Oz.
834     MPM.addPass(createModuleToFunctionPassAdaptor(
835         createFunctionToLoopPassAdaptor(
836             LoopRotatePass(EnableLoopHeaderDuplication ||
837                            Level != OptimizationLevel::Oz),
838             /*UseMemorySSA=*/false,
839             /*UseBlockFrequencyInfo=*/false),
840         PTO.EagerlyInvalidateAnalyses));
841   }
842 }
843 
addPGOInstrPasses(ModulePassManager & MPM,OptimizationLevel Level,bool RunProfileGen,bool IsCS,bool AtomicCounterUpdate,std::string ProfileFile,std::string ProfileRemappingFile,IntrusiveRefCntPtr<vfs::FileSystem> FS)844 void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
845                                     OptimizationLevel Level, bool RunProfileGen,
846                                     bool IsCS, bool AtomicCounterUpdate,
847                                     std::string ProfileFile,
848                                     std::string ProfileRemappingFile,
849                                     IntrusiveRefCntPtr<vfs::FileSystem> FS) {
850   assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
851 
852   if (!RunProfileGen) {
853     assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
854     MPM.addPass(
855         PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
856     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
857     // RequireAnalysisPass for PSI before subsequent non-module passes.
858     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
859     return;
860   }
861 
862   // Perform PGO instrumentation.
863   MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
864                                          : PGOInstrumentationType::FDO));
865 
866   addPostPGOLoopRotation(MPM, Level);
867   // Add the profile lowering pass.
868   InstrProfOptions Options;
869   if (!ProfileFile.empty())
870     Options.InstrProfileOutput = ProfileFile;
871   // Do counter promotion at Level greater than O0.
872   Options.DoCounterPromotion = true;
873   Options.UseBFIInPromotion = IsCS;
874   if (EnableSampledInstr) {
875     Options.Sampling = true;
876     // With sampling, there is little beneifit to enable counter promotion.
877     // But note that sampling does work with counter promotion.
878     Options.DoCounterPromotion = false;
879   }
880   Options.Atomic = AtomicCounterUpdate;
881   MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
882 }
883 
addPGOInstrPassesForO0(ModulePassManager & MPM,bool RunProfileGen,bool IsCS,bool AtomicCounterUpdate,std::string ProfileFile,std::string ProfileRemappingFile,IntrusiveRefCntPtr<vfs::FileSystem> FS)884 void PassBuilder::addPGOInstrPassesForO0(
885     ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
886     bool AtomicCounterUpdate, std::string ProfileFile,
887     std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
888   if (!RunProfileGen) {
889     assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
890     MPM.addPass(
891         PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
892     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
893     // RequireAnalysisPass for PSI before subsequent non-module passes.
894     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
895     return;
896   }
897 
898   // Perform PGO instrumentation.
899   MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
900                                          : PGOInstrumentationType::FDO));
901   // Add the profile lowering pass.
902   InstrProfOptions Options;
903   if (!ProfileFile.empty())
904     Options.InstrProfileOutput = ProfileFile;
905   // Do not do counter promotion at O0.
906   Options.DoCounterPromotion = false;
907   Options.UseBFIInPromotion = IsCS;
908   Options.Atomic = AtomicCounterUpdate;
909   MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
910 }
911 
getInlineParamsFromOptLevel(OptimizationLevel Level)912 static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
913   return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
914 }
915 
916 ModuleInlinerWrapperPass
buildInlinerPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)917 PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
918                                   ThinOrFullLTOPhase Phase) {
919   InlineParams IP;
920   if (PTO.InlinerThreshold == -1)
921     IP = getInlineParamsFromOptLevel(Level);
922   else
923     IP = getInlineParams(PTO.InlinerThreshold);
924   // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
925   // set hot-caller threshold to 0 to disable hot
926   // callsite inline (as much as possible [1]) because it makes
927   // profile annotation in the backend inaccurate.
928   //
929   // [1] Note the cost of a function could be below zero due to erased
930   // prologue / epilogue.
931   if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
932     IP.HotCallSiteThreshold = 0;
933 
934   if (PGOOpt)
935     IP.EnableDeferral = EnablePGOInlineDeferral;
936 
937   ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
938                                 InlineContext{Phase, InlinePass::CGSCCInliner},
939                                 UseInlineAdvisor, MaxDevirtIterations);
940 
941   // Require the GlobalsAA analysis for the module so we can query it within
942   // the CGSCC pipeline.
943   if (EnableGlobalAnalyses) {
944     MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
945     // Invalidate AAManager so it can be recreated and pick up the newly
946     // available GlobalsAA.
947     MIWP.addModulePass(
948         createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
949   }
950 
951   // Require the ProfileSummaryAnalysis for the module so we can query it within
952   // the inliner pass.
953   MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
954 
955   // Now begin the main postorder CGSCC pipeline.
956   // FIXME: The current CGSCC pipeline has its origins in the legacy pass
957   // manager and trying to emulate its precise behavior. Much of this doesn't
958   // make a lot of sense and we should revisit the core CGSCC structure.
959   CGSCCPassManager &MainCGPipeline = MIWP.getPM();
960 
961   // Note: historically, the PruneEH pass was run first to deduce nounwind and
962   // generally clean up exception handling overhead. It isn't clear this is
963   // valuable as the inliner doesn't currently care whether it is inlining an
964   // invoke or a call.
965 
966   if (AttributorRun & AttributorRunOption::CGSCC)
967     MainCGPipeline.addPass(AttributorCGSCCPass());
968 
969   // Deduce function attributes. We do another run of this after the function
970   // simplification pipeline, so this only needs to run when it could affect the
971   // function simplification pipeline, which is only the case with recursive
972   // functions.
973   MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
974 
975   // When at O3 add argument promotion to the pass pipeline.
976   // FIXME: It isn't at all clear why this should be limited to O3.
977   if (Level == OptimizationLevel::O3)
978     MainCGPipeline.addPass(ArgumentPromotionPass());
979 
980   // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
981   // there are no OpenMP runtime calls present in the module.
982   if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
983     MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
984 
985   invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
986 
987   // Add the core function simplification pipeline nested inside the
988   // CGSCC walk.
989   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
990       buildFunctionSimplificationPipeline(Level, Phase),
991       PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
992 
993   // Finally, deduce any function attributes based on the fully simplified
994   // function.
995   MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
996 
997   // Mark that the function is fully simplified and that it shouldn't be
998   // simplified again if we somehow revisit it due to CGSCC mutations unless
999   // it's been modified since.
1000   MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
1001       RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
1002 
1003   if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
1004     MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1005     MainCGPipeline.addPass(CoroAnnotationElidePass());
1006   }
1007 
1008   // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1009   MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1010       InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
1011 
1012   return MIWP;
1013 }
1014 
1015 ModulePassManager
buildModuleInlinerPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)1016 PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
1017                                         ThinOrFullLTOPhase Phase) {
1018   ModulePassManager MPM;
1019 
1020   InlineParams IP = getInlineParamsFromOptLevel(Level);
1021   // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1022   // set hot-caller threshold to 0 to disable hot
1023   // callsite inline (as much as possible [1]) because it makes
1024   // profile annotation in the backend inaccurate.
1025   //
1026   // [1] Note the cost of a function could be below zero due to erased
1027   // prologue / epilogue.
1028   if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1029     IP.HotCallSiteThreshold = 0;
1030 
1031   if (PGOOpt)
1032     IP.EnableDeferral = EnablePGOInlineDeferral;
1033 
1034   // The inline deferral logic is used to avoid losing some
1035   // inlining chance in future. It is helpful in SCC inliner, in which
1036   // inlining is processed in bottom-up order.
1037   // While in module inliner, the inlining order is a priority-based order
1038   // by default. The inline deferral is unnecessary there. So we disable the
1039   // inline deferral logic in module inliner.
1040   IP.EnableDeferral = false;
1041 
1042   MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
1043   if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) {
1044     MPM.addPass(GlobalOptPass());
1045     MPM.addPass(GlobalDCEPass());
1046     MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1047   }
1048 
1049   MPM.addPass(createModuleToFunctionPassAdaptor(
1050       buildFunctionSimplificationPipeline(Level, Phase),
1051       PTO.EagerlyInvalidateAnalyses));
1052 
1053   if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
1054     MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
1055         CoroSplitPass(Level != OptimizationLevel::O0)));
1056     MPM.addPass(
1057         createModuleToPostOrderCGSCCPassAdaptor(CoroAnnotationElidePass()));
1058   }
1059 
1060   return MPM;
1061 }
1062 
1063 ModulePassManager
buildModuleSimplificationPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)1064 PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
1065                                                ThinOrFullLTOPhase Phase) {
1066   assert(Level != OptimizationLevel::O0 &&
1067          "Should not be used for O0 pipeline");
1068 
1069   assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink &&
1070          "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1071 
1072   ModulePassManager MPM;
1073 
1074   // Place pseudo probe instrumentation as the first pass of the pipeline to
1075   // minimize the impact of optimization changes.
1076   if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1077       Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
1078     MPM.addPass(SampleProfileProbePass(TM));
1079 
1080   bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1081 
1082   // In ThinLTO mode, when flattened profile is used, all the available
1083   // profile information will be annotated in PreLink phase so there is
1084   // no need to load the profile again in PostLink.
1085   bool LoadSampleProfile =
1086       HasSampleProfile &&
1087       !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
1088 
1089   // During the ThinLTO backend phase we perform early indirect call promotion
1090   // here, before globalopt. Otherwise imported available_externally functions
1091   // look unreferenced and are removed. If we are going to load the sample
1092   // profile then defer until later.
1093   // TODO: See if we can move later and consolidate with the location where
1094   // we perform ICP when we are loading a sample profile.
1095   // TODO: We pass HasSampleProfile (whether there was a sample profile file
1096   // passed to the compile) to the SamplePGO flag of ICP. This is used to
1097   // determine whether the new direct calls are annotated with prof metadata.
1098   // Ideally this should be determined from whether the IR is annotated with
1099   // sample profile, and not whether the a sample profile was provided on the
1100   // command line. E.g. for flattened profiles where we will not be reloading
1101   // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1102   // provide the sample profile file.
1103   if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1104     MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1105 
1106   // Create an early function pass manager to cleanup the output of the
1107   // frontend. Not necessary with LTO post link pipelines since the pre link
1108   // pipeline already cleaned up the frontend output.
1109   if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
1110     // Do basic inference of function attributes from known properties of system
1111     // libraries and other oracles.
1112     MPM.addPass(InferFunctionAttrsPass());
1113     MPM.addPass(CoroEarlyPass());
1114 
1115     FunctionPassManager EarlyFPM;
1116     EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1117     // Lower llvm.expect to metadata before attempting transforms.
1118     // Compare/branch metadata may alter the behavior of passes like
1119     // SimplifyCFG.
1120     EarlyFPM.addPass(LowerExpectIntrinsicPass());
1121     EarlyFPM.addPass(SimplifyCFGPass());
1122     EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
1123     EarlyFPM.addPass(EarlyCSEPass());
1124     if (Level == OptimizationLevel::O3)
1125       EarlyFPM.addPass(CallSiteSplittingPass());
1126     MPM.addPass(createModuleToFunctionPassAdaptor(
1127         std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1128   }
1129 
1130   if (LoadSampleProfile) {
1131     // Annotate sample profile right after early FPM to ensure freshness of
1132     // the debug info.
1133     MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1134                                         PGOOpt->ProfileRemappingFile, Phase));
1135     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1136     // RequireAnalysisPass for PSI before subsequent non-module passes.
1137     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1138     // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1139     // for the profile annotation to be accurate in the LTO backend.
1140     if (!isLTOPreLink(Phase))
1141       // We perform early indirect call promotion here, before globalopt.
1142       // This is important for the ThinLTO backend phase because otherwise
1143       // imported available_externally functions look unreferenced and are
1144       // removed.
1145       MPM.addPass(
1146           PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1147   }
1148 
1149   // Try to perform OpenMP specific optimizations on the module. This is a
1150   // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1151   MPM.addPass(OpenMPOptPass(Phase));
1152 
1153   if (AttributorRun & AttributorRunOption::MODULE)
1154     MPM.addPass(AttributorPass());
1155 
1156   // Lower type metadata and the type.test intrinsic in the ThinLTO
1157   // post link pipeline after ICP. This is to enable usage of the type
1158   // tests in ICP sequences.
1159   if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1160     MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1161                                    lowertypetests::DropTestKind::Assume));
1162 
1163   invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
1164 
1165   // Interprocedural constant propagation now that basic cleanup has occurred
1166   // and prior to optimizing globals.
1167   // FIXME: This position in the pipeline hasn't been carefully considered in
1168   // years, it should be re-analyzed.
1169   MPM.addPass(IPSCCPPass(
1170               IPSCCPOptions(/*AllowFuncSpec=*/
1171                             Level != OptimizationLevel::Os &&
1172                             Level != OptimizationLevel::Oz &&
1173                             !isLTOPreLink(Phase))));
1174 
1175   // Attach metadata to indirect call sites indicating the set of functions
1176   // they may target at run-time. This should follow IPSCCP.
1177   MPM.addPass(CalledValuePropagationPass());
1178 
1179   // Optimize globals to try and fold them into constants.
1180   MPM.addPass(GlobalOptPass());
1181 
1182   // Create a small function pass pipeline to cleanup after all the global
1183   // optimizations.
1184   FunctionPassManager GlobalCleanupPM;
1185   // FIXME: Should this instead by a run of SROA?
1186   GlobalCleanupPM.addPass(PromotePass());
1187   GlobalCleanupPM.addPass(InstCombinePass());
1188   invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1189   GlobalCleanupPM.addPass(
1190       SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1191   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1192                                                 PTO.EagerlyInvalidateAnalyses));
1193 
1194   // We already asserted this happens in non-FullLTOPostLink earlier.
1195   const bool IsPreLink = Phase != ThinOrFullLTOPhase::ThinLTOPostLink;
1196   // Enable contextual profiling instrumentation.
1197   const bool IsCtxProfGen =
1198       IsPreLink && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled();
1199   const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1200   const bool IsPGOInstrGen =
1201       IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1202   const bool IsPGOInstrUse =
1203       IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1204   const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1205   // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1206   // enable ctx profiling from the frontend.
1207   assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) &&
1208          "Enabling both instrumented PGO and contextual instrumentation is not "
1209          "supported.");
1210   const bool IsCtxProfUse =
1211       !UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
1212 
1213   assert(
1214       (InstrumentColdFuncOnlyPath.empty() || PGOInstrumentColdFunctionOnly) &&
1215       "--instrument-cold-function-only-path is provided but "
1216       "--pgo-instrument-cold-function-only is not enabled");
1217   const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1218                                       IsPGOPreLink &&
1219                                       !InstrumentColdFuncOnlyPath.empty();
1220 
1221   if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1222       IsCtxProfUse || IsColdFuncOnlyInstrGen)
1223     addPreInlinerPasses(MPM, Level, Phase);
1224 
1225   // Add all the requested passes for instrumentation PGO, if requested.
1226   if (IsPGOInstrGen || IsPGOInstrUse) {
1227     addPGOInstrPasses(MPM, Level,
1228                       /*RunProfileGen=*/IsPGOInstrGen,
1229                       /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1230                       PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1231                       PGOOpt->FS);
1232   } else if (IsCtxProfGen || IsCtxProfUse) {
1233     MPM.addPass(PGOInstrumentationGen(PGOInstrumentationType::CTXPROF));
1234     // In pre-link, we just want the instrumented IR. We use the contextual
1235     // profile in the post-thinlink phase.
1236     // The instrumentation will be removed in post-thinlink after IPO.
1237     // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1238     // mechanism for GUIDs.
1239     MPM.addPass(AssignGUIDPass());
1240     if (IsCtxProfUse) {
1241       MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1242       return MPM;
1243     }
1244     // Block further inlining in the instrumented ctxprof case. This avoids
1245     // confusingly collecting profiles for the same GUID corresponding to
1246     // different variants of the function. We could do like PGO and identify
1247     // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1248     // thinlto to happen before performing any further optimizations, it's
1249     // unnecessary to collect profiles for non-prevailing copies.
1250     MPM.addPass(NoinlineNonPrevailing());
1251     addPostPGOLoopRotation(MPM, Level);
1252     MPM.addPass(PGOCtxProfLoweringPass());
1253   } else if (IsColdFuncOnlyInstrGen) {
1254     addPGOInstrPasses(
1255         MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1256         /* AtomicCounterUpdate */ false, InstrumentColdFuncOnlyPath,
1257         /* ProfileRemappingFile */ "", IntrusiveRefCntPtr<vfs::FileSystem>());
1258   }
1259 
1260   if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1261     MPM.addPass(PGOIndirectCallPromotion(false, false));
1262 
1263   if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1264     MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1265                                                EnableSampledInstr));
1266 
1267   if (IsMemprofUse)
1268     MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1269 
1270   if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1271                  PGOOpt->Action == PGOOptions::SampleUse))
1272     MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1273 
1274   MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1275 
1276   if (EnableModuleInliner)
1277     MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
1278   else
1279     MPM.addPass(buildInlinerPipeline(Level, Phase));
1280 
1281   // Remove any dead arguments exposed by cleanups, constant folding globals,
1282   // and argument promotion.
1283   MPM.addPass(DeadArgumentEliminationPass());
1284 
1285   if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1286     MPM.addPass(SimplifyTypeTestsPass());
1287 
1288   if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
1289     MPM.addPass(CoroCleanupPass());
1290 
1291   // Optimize globals now that functions are fully simplified.
1292   MPM.addPass(GlobalOptPass());
1293   MPM.addPass(GlobalDCEPass());
1294 
1295   return MPM;
1296 }
1297 
1298 /// TODO: Should LTO cause any differences to this set of passes?
addVectorPasses(OptimizationLevel Level,FunctionPassManager & FPM,bool IsFullLTO)1299 void PassBuilder::addVectorPasses(OptimizationLevel Level,
1300                                   FunctionPassManager &FPM, bool IsFullLTO) {
1301   FPM.addPass(LoopVectorizePass(
1302       LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1303 
1304   FPM.addPass(InferAlignmentPass());
1305   if (IsFullLTO) {
1306     // The vectorizer may have significantly shortened a loop body; unroll
1307     // again. Unroll small loops to hide loop backedge latency and saturate any
1308     // parallel execution resources of an out-of-order processor. We also then
1309     // need to clean up redundancies and loop invariant code.
1310     // FIXME: It would be really good to use a loop-integrated instruction
1311     // combiner for cleanup here so that the unrolling and LICM can be pipelined
1312     // across the loop nests.
1313     // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1314     if (EnableUnrollAndJam && PTO.LoopUnrolling)
1315       FPM.addPass(createFunctionToLoopPassAdaptor(
1316           LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1317     FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1318         Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1319         PTO.ForgetAllSCEVInLoopUnroll)));
1320     FPM.addPass(WarnMissedTransformationsPass());
1321     // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1322     // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1323     // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1324     // NOTE: we are very late in the pipeline, and we don't have any LICM
1325     // or SimplifyCFG passes scheduled after us, that would cleanup
1326     // the CFG mess this may created if allowed to modify CFG, so forbid that.
1327     FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1328   }
1329 
1330   if (!IsFullLTO) {
1331     // Eliminate loads by forwarding stores from the previous iteration to loads
1332     // of the current iteration.
1333     FPM.addPass(LoopLoadEliminationPass());
1334   }
1335   // Cleanup after the loop optimization passes.
1336   FPM.addPass(InstCombinePass());
1337 
1338   if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1339     ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1340     // At higher optimization levels, try to clean up any runtime overlap and
1341     // alignment checks inserted by the vectorizer. We want to track correlated
1342     // runtime checks for two inner loops in the same outer loop, fold any
1343     // common computations, hoist loop-invariant aspects out of any outer loop,
1344     // and unswitch the runtime checks if possible. Once hoisted, we may have
1345     // dead (or speculatable) control flows or more combining opportunities.
1346     ExtraPasses.addPass(EarlyCSEPass());
1347     ExtraPasses.addPass(CorrelatedValuePropagationPass());
1348     ExtraPasses.addPass(InstCombinePass());
1349     LoopPassManager LPM;
1350     LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1351                          /*AllowSpeculation=*/true));
1352     LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1353                                        OptimizationLevel::O3));
1354     ExtraPasses.addPass(
1355         createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
1356                                         /*UseBlockFrequencyInfo=*/true));
1357     ExtraPasses.addPass(
1358         SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1359     ExtraPasses.addPass(InstCombinePass());
1360     FPM.addPass(std::move(ExtraPasses));
1361   }
1362 
1363   // Now that we've formed fast to execute loop structures, we do further
1364   // optimizations. These are run afterward as they might block doing complex
1365   // analyses and transforms such as what are needed for loop vectorization.
1366 
1367   // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1368   // GVN, loop transforms, and others have already run, so it's now better to
1369   // convert to more optimized IR using more aggressive simplify CFG options.
1370   // The extra sinking transform can create larger basic blocks, so do this
1371   // before SLP vectorization.
1372   FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1373                                   .forwardSwitchCondToPhi(true)
1374                                   .convertSwitchRangeToICmp(true)
1375                                   .convertSwitchToLookupTable(true)
1376                                   .needCanonicalLoops(false)
1377                                   .hoistCommonInsts(true)
1378                                   .sinkCommonInsts(true)));
1379 
1380   if (IsFullLTO) {
1381     FPM.addPass(SCCPPass());
1382     FPM.addPass(InstCombinePass());
1383     FPM.addPass(BDCEPass());
1384   }
1385 
1386   // Optimize parallel scalar instruction chains into SIMD instructions.
1387   if (PTO.SLPVectorization) {
1388     FPM.addPass(SLPVectorizerPass());
1389     if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1390       FPM.addPass(EarlyCSEPass());
1391     }
1392   }
1393   // Enhance/cleanup vector code.
1394   FPM.addPass(VectorCombinePass());
1395 
1396   if (!IsFullLTO) {
1397     FPM.addPass(InstCombinePass());
1398     // Unroll small loops to hide loop backedge latency and saturate any
1399     // parallel execution resources of an out-of-order processor. We also then
1400     // need to clean up redundancies and loop invariant code.
1401     // FIXME: It would be really good to use a loop-integrated instruction
1402     // combiner for cleanup here so that the unrolling and LICM can be pipelined
1403     // across the loop nests.
1404     // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1405     if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1406       FPM.addPass(createFunctionToLoopPassAdaptor(
1407           LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1408     }
1409     FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1410         Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1411         PTO.ForgetAllSCEVInLoopUnroll)));
1412     FPM.addPass(WarnMissedTransformationsPass());
1413     // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1414     // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1415     // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1416     // NOTE: we are very late in the pipeline, and we don't have any LICM
1417     // or SimplifyCFG passes scheduled after us, that would cleanup
1418     // the CFG mess this may created if allowed to modify CFG, so forbid that.
1419     FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
1420   }
1421 
1422   FPM.addPass(InferAlignmentPass());
1423   FPM.addPass(InstCombinePass());
1424 
1425   // This is needed for two reasons:
1426   //   1. It works around problems that instcombine introduces, such as sinking
1427   //      expensive FP divides into loops containing multiplications using the
1428   //      divide result.
1429   //   2. It helps to clean up some loop-invariant code created by the loop
1430   //      unroll pass when IsFullLTO=false.
1431   FPM.addPass(createFunctionToLoopPassAdaptor(
1432       LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1433                /*AllowSpeculation=*/true),
1434       /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1435 
1436   // Now that we've vectorized and unrolled loops, we may have more refined
1437   // alignment information, try to re-derive it here.
1438   FPM.addPass(AlignmentFromAssumptionsPass());
1439 }
1440 
1441 ModulePassManager
buildModuleOptimizationPipeline(OptimizationLevel Level,ThinOrFullLTOPhase LTOPhase)1442 PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1443                                              ThinOrFullLTOPhase LTOPhase) {
1444   const bool LTOPreLink = isLTOPreLink(LTOPhase);
1445   ModulePassManager MPM;
1446 
1447   // Run partial inlining pass to partially inline functions that have
1448   // large bodies.
1449   if (RunPartialInlining)
1450     MPM.addPass(PartialInlinerPass());
1451 
1452   // Remove avail extern fns and globals definitions since we aren't compiling
1453   // an object file for later LTO. For LTO we want to preserve these so they
1454   // are eligible for inlining at link-time. Note if they are unreferenced they
1455   // will be removed by GlobalDCE later, so this only impacts referenced
1456   // available externally globals. Eventually they will be suppressed during
1457   // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1458   // may make globals referenced by available external functions dead and saves
1459   // running remaining passes on the eliminated functions. These should be
1460   // preserved during prelinking for link-time inlining decisions.
1461   if (!LTOPreLink)
1462     MPM.addPass(EliminateAvailableExternallyPass());
1463 
1464   // Do RPO function attribute inference across the module to forward-propagate
1465   // attributes where applicable.
1466   // FIXME: Is this really an optimization rather than a canonicalization?
1467   MPM.addPass(ReversePostOrderFunctionAttrsPass());
1468 
1469   // Do a post inline PGO instrumentation and use pass. This is a context
1470   // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1471   // cross-module inline has not been done yet. The context sensitive
1472   // instrumentation is after all the inlines are done.
1473   if (!LTOPreLink && PGOOpt) {
1474     if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1475       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1476                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1477                         PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
1478                         PGOOpt->FS);
1479     else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1480       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1481                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1482                         PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
1483                         PGOOpt->FS);
1484   }
1485 
1486   // Re-compute GlobalsAA here prior to function passes. This is particularly
1487   // useful as the above will have inlined, DCE'ed, and function-attr
1488   // propagated everything. We should at this point have a reasonably minimal
1489   // and richly annotated call graph. By computing aliasing and mod/ref
1490   // information for all local globals here, the late loop passes and notably
1491   // the vectorizer will be able to use them to help recognize vectorizable
1492   // memory operations.
1493   if (EnableGlobalAnalyses)
1494     MPM.addPass(RecomputeGlobalsAAPass());
1495 
1496   invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1497 
1498   FunctionPassManager OptimizePM;
1499   // Scheduling LoopVersioningLICM when inlining is over, because after that
1500   // we may see more accurate aliasing. Reason to run this late is that too
1501   // early versioning may prevent further inlining due to increase of code
1502   // size. Other optimizations which runs later might get benefit of no-alias
1503   // assumption in clone loop.
1504   if (UseLoopVersioningLICM) {
1505     OptimizePM.addPass(
1506         createFunctionToLoopPassAdaptor(LoopVersioningLICMPass()));
1507     // LoopVersioningLICM pass might increase new LICM opportunities.
1508     OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1509         LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1510                  /*AllowSpeculation=*/true),
1511         /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1512   }
1513 
1514   OptimizePM.addPass(Float2IntPass());
1515   OptimizePM.addPass(LowerConstantIntrinsicsPass());
1516 
1517   if (EnableMatrix) {
1518     OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1519     OptimizePM.addPass(EarlyCSEPass());
1520   }
1521 
1522   // CHR pass should only be applied with the profile information.
1523   // The check is to check the profile summary information in CHR.
1524   if (EnableCHR && Level == OptimizationLevel::O3)
1525     OptimizePM.addPass(ControlHeightReductionPass());
1526 
1527   // FIXME: We need to run some loop optimizations to re-rotate loops after
1528   // simplifycfg and others undo their rotation.
1529 
1530   // Optimize the loop execution. These passes operate on entire loop nests
1531   // rather than on each loop in an inside-out manner, and so they are actually
1532   // function passes.
1533 
1534   invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1535 
1536   LoopPassManager LPM;
1537   // First rotate loops that may have been un-rotated by prior passes.
1538   // Disable header duplication at -Oz.
1539   LPM.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
1540                                  Level != OptimizationLevel::Oz,
1541                              LTOPreLink));
1542   // Some loops may have become dead by now. Try to delete them.
1543   // FIXME: see discussion in https://reviews.llvm.org/D112851,
1544   //        this may need to be revisited once we run GVN before loop deletion
1545   //        in the simplification pipeline.
1546   LPM.addPass(LoopDeletionPass());
1547 
1548   if (PTO.LoopInterchange)
1549     LPM.addPass(LoopInterchangePass());
1550 
1551   OptimizePM.addPass(createFunctionToLoopPassAdaptor(
1552       std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1553 
1554   // Distribute loops to allow partial vectorization.  I.e. isolate dependences
1555   // into separate loop that would otherwise inhibit vectorization.  This is
1556   // currently only performed for loops marked with the metadata
1557   // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1558   OptimizePM.addPass(LoopDistributePass());
1559 
1560   // Populates the VFABI attribute with the scalar-to-vector mappings
1561   // from the TargetLibraryInfo.
1562   OptimizePM.addPass(InjectTLIMappings());
1563 
1564   addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
1565 
1566   invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1567 
1568   // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1569   // canonicalization pass that enables other optimizations. As a result,
1570   // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1571   // result too early.
1572   OptimizePM.addPass(LoopSinkPass());
1573 
1574   // And finally clean up LCSSA form before generating code.
1575   OptimizePM.addPass(InstSimplifyPass());
1576 
1577   // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1578   // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1579   // flattening of blocks.
1580   OptimizePM.addPass(DivRemPairsPass());
1581 
1582   // Try to annotate calls that were created during optimization.
1583   OptimizePM.addPass(
1584       TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1585 
1586   // LoopSink (and other loop passes since the last simplifyCFG) might have
1587   // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1588   OptimizePM.addPass(
1589       SimplifyCFGPass(SimplifyCFGOptions()
1590                           .convertSwitchRangeToICmp(true)
1591                           .speculateUnpredictables(true)
1592                           .hoistLoadsStoresWithCondFaulting(true)));
1593 
1594   // Add the core optimizing pipeline.
1595   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1596                                                 PTO.EagerlyInvalidateAnalyses));
1597 
1598   invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1599 
1600   // Split out cold code. Splitting is done late to avoid hiding context from
1601   // other optimizations and inadvertently regressing performance. The tradeoff
1602   // is that this has a higher code size cost than splitting early.
1603   if (EnableHotColdSplit && !LTOPreLink)
1604     MPM.addPass(HotColdSplittingPass());
1605 
1606   // Search the code for similar regions of code. If enough similar regions can
1607   // be found where extracting the regions into their own function will decrease
1608   // the size of the program, we extract the regions, a deduplicate the
1609   // structurally similar regions.
1610   if (EnableIROutliner)
1611     MPM.addPass(IROutlinerPass());
1612 
1613   // Now we need to do some global optimization transforms.
1614   // FIXME: It would seem like these should come first in the optimization
1615   // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1616   // ordering here.
1617   MPM.addPass(GlobalDCEPass());
1618   MPM.addPass(ConstantMergePass());
1619 
1620   // Merge functions if requested. It has a better chance to merge functions
1621   // after ConstantMerge folded jump tables.
1622   if (PTO.MergeFunctions)
1623     MPM.addPass(MergeFunctionsPass());
1624 
1625   if (PTO.CallGraphProfile && !LTOPreLink)
1626     MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1627 
1628   // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1629   if (!LTOPreLink)
1630     MPM.addPass(RelLookupTableConverterPass());
1631 
1632   return MPM;
1633 }
1634 
1635 ModulePassManager
buildPerModuleDefaultPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)1636 PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1637                                            ThinOrFullLTOPhase Phase) {
1638   if (Level == OptimizationLevel::O0)
1639     return buildO0DefaultPipeline(Level, Phase);
1640 
1641   ModulePassManager MPM;
1642 
1643   // Convert @llvm.global.annotations to !annotation metadata.
1644   MPM.addPass(Annotation2MetadataPass());
1645 
1646   // Force any function attributes we want the rest of the pipeline to observe.
1647   MPM.addPass(ForceFunctionAttrsPass());
1648 
1649   if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1650     MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1651 
1652   // Apply module pipeline start EP callback.
1653   invokePipelineStartEPCallbacks(MPM, Level);
1654 
1655   // Add the core simplification pipeline.
1656   MPM.addPass(buildModuleSimplificationPipeline(Level, Phase));
1657 
1658   // Now add the optimization pipeline.
1659   MPM.addPass(buildModuleOptimizationPipeline(Level, Phase));
1660 
1661   if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1662       PGOOpt->Action == PGOOptions::SampleUse)
1663     MPM.addPass(PseudoProbeUpdatePass());
1664 
1665   // Emit annotation remarks.
1666   addAnnotationRemarksPass(MPM);
1667 
1668   if (isLTOPreLink(Phase))
1669     addRequiredLTOPreLinkPasses(MPM);
1670   return MPM;
1671 }
1672 
1673 ModulePassManager
buildFatLTODefaultPipeline(OptimizationLevel Level,bool ThinLTO,bool EmitSummary)1674 PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
1675                                         bool EmitSummary) {
1676   ModulePassManager MPM;
1677   if (ThinLTO)
1678     MPM.addPass(buildThinLTOPreLinkDefaultPipeline(Level));
1679   else
1680     MPM.addPass(buildLTOPreLinkDefaultPipeline(Level));
1681   MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1682 
1683   // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1684   // like removing CFI/WPD related instructions. Note, we reuse
1685   // LowerTypeTestsPass to clean up type tests rather than duplicate that logic
1686   // in FatLtoCleanup.
1687   MPM.addPass(FatLtoCleanup());
1688 
1689   // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1690   // object code, only in the bitcode section, so drop it before we run
1691   // module optimization and generate machine code. If llvm.type.test() isn't in
1692   // the IR, this won't do anything.
1693   MPM.addPass(
1694       LowerTypeTestsPass(nullptr, nullptr, lowertypetests::DropTestKind::All));
1695 
1696   // Use the ThinLTO post-link pipeline with sample profiling
1697   if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1698     MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1699   else {
1700     // ModuleSimplification does not run the coroutine passes for
1701     // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1702     // builds, otherwise they will miscompile.
1703     if (ThinLTO) {
1704       // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1705       // consideration.
1706       CGSCCPassManager CGPM;
1707       CGPM.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1708       CGPM.addPass(CoroAnnotationElidePass());
1709       MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
1710       MPM.addPass(CoroCleanupPass());
1711     }
1712 
1713     // otherwise, just use module optimization
1714     MPM.addPass(
1715         buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None));
1716     // Emit annotation remarks.
1717     addAnnotationRemarksPass(MPM);
1718   }
1719   return MPM;
1720 }
1721 
1722 ModulePassManager
buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)1723 PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1724   if (Level == OptimizationLevel::O0)
1725     return buildO0DefaultPipeline(Level, ThinOrFullLTOPhase::ThinLTOPreLink);
1726 
1727   ModulePassManager MPM;
1728 
1729   // Convert @llvm.global.annotations to !annotation metadata.
1730   MPM.addPass(Annotation2MetadataPass());
1731 
1732   // Force any function attributes we want the rest of the pipeline to observe.
1733   MPM.addPass(ForceFunctionAttrsPass());
1734 
1735   if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1736     MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
1737 
1738   // Apply module pipeline start EP callback.
1739   invokePipelineStartEPCallbacks(MPM, Level);
1740 
1741   // If we are planning to perform ThinLTO later, we don't bloat the code with
1742   // unrolling/vectorization/... now. Just simplify the module as much as we
1743   // can.
1744   MPM.addPass(buildModuleSimplificationPipeline(
1745       Level, ThinOrFullLTOPhase::ThinLTOPreLink));
1746   // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1747   // thinlto use the contextual info to perform imports; then use the contextual
1748   // profile in the post-thinlink phase.
1749   if (!UseCtxProfile.empty()) {
1750     addRequiredLTOPreLinkPasses(MPM);
1751     return MPM;
1752   }
1753 
1754   // Run partial inlining pass to partially inline functions that have
1755   // large bodies.
1756   // FIXME: It isn't clear whether this is really the right place to run this
1757   // in ThinLTO. Because there is another canonicalization and simplification
1758   // phase that will run after the thin link, running this here ends up with
1759   // less information than will be available later and it may grow functions in
1760   // ways that aren't beneficial.
1761   if (RunPartialInlining)
1762     MPM.addPass(PartialInlinerPass());
1763 
1764   if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1765       PGOOpt->Action == PGOOptions::SampleUse)
1766     MPM.addPass(PseudoProbeUpdatePass());
1767 
1768   // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1769   // optimization is going to be done in PostLink stage, but clang can't add
1770   // callbacks there in case of in-process ThinLTO called by linker.
1771   invokeOptimizerEarlyEPCallbacks(MPM, Level,
1772                                   /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
1773   invokeOptimizerLastEPCallbacks(MPM, Level,
1774                                  /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
1775 
1776   // Emit annotation remarks.
1777   addAnnotationRemarksPass(MPM);
1778 
1779   addRequiredLTOPreLinkPasses(MPM);
1780 
1781   return MPM;
1782 }
1783 
buildThinLTODefaultPipeline(OptimizationLevel Level,const ModuleSummaryIndex * ImportSummary)1784 ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1785     OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1786   ModulePassManager MPM;
1787 
1788   if (ImportSummary) {
1789     // For ThinLTO we must apply the context disambiguation decisions early, to
1790     // ensure we can correctly match the callsites to summary data.
1791     if (EnableMemProfContextDisambiguation)
1792       MPM.addPass(MemProfContextDisambiguation(
1793           ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1794 
1795     // These passes import type identifier resolutions for whole-program
1796     // devirtualization and CFI. They must run early because other passes may
1797     // disturb the specific instruction patterns that these passes look for,
1798     // creating dependencies on resolutions that may not appear in the summary.
1799     //
1800     // For example, GVN may transform the pattern assume(type.test) appearing in
1801     // two basic blocks into assume(phi(type.test, type.test)), which would
1802     // transform a dependency on a WPD resolution into a dependency on a type
1803     // identifier resolution for CFI.
1804     //
1805     // Also, WPD has access to more precise information than ICP and can
1806     // devirtualize more effectively, so it should operate on the IR first.
1807     //
1808     // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1809     // metadata and intrinsics.
1810     MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1811     MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1812   }
1813 
1814   if (Level == OptimizationLevel::O0) {
1815     // Run a second time to clean up any type tests left behind by WPD for use
1816     // in ICP.
1817     MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1818                                    lowertypetests::DropTestKind::Assume));
1819     // Drop available_externally and unreferenced globals. This is necessary
1820     // with ThinLTO in order to avoid leaving undefined references to dead
1821     // globals in the object file.
1822     MPM.addPass(EliminateAvailableExternallyPass());
1823     MPM.addPass(GlobalDCEPass());
1824     return MPM;
1825   }
1826   if (!UseCtxProfile.empty()) {
1827     MPM.addPass(
1828         buildModuleInlinerPipeline(Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1829   } else {
1830     // Add the core simplification pipeline.
1831     MPM.addPass(buildModuleSimplificationPipeline(
1832         Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1833   }
1834   // Now add the optimization pipeline.
1835   MPM.addPass(buildModuleOptimizationPipeline(
1836       Level, ThinOrFullLTOPhase::ThinLTOPostLink));
1837 
1838   // Emit annotation remarks.
1839   addAnnotationRemarksPass(MPM);
1840 
1841   return MPM;
1842 }
1843 
1844 ModulePassManager
buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)1845 PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1846   // FIXME: We should use a customized pre-link pipeline!
1847   return buildPerModuleDefaultPipeline(Level,
1848                                        ThinOrFullLTOPhase::FullLTOPreLink);
1849 }
1850 
1851 ModulePassManager
buildLTODefaultPipeline(OptimizationLevel Level,ModuleSummaryIndex * ExportSummary)1852 PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
1853                                      ModuleSummaryIndex *ExportSummary) {
1854   ModulePassManager MPM;
1855 
1856   invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
1857 
1858   // Create a function that performs CFI checks for cross-DSO calls with targets
1859   // in the current module.
1860   MPM.addPass(CrossDSOCFIPass());
1861 
1862   if (Level == OptimizationLevel::O0) {
1863     // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1864     // metadata and intrinsics.
1865     MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1866     MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1867     // Run a second time to clean up any type tests left behind by WPD for use
1868     // in ICP.
1869     MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1870                                    lowertypetests::DropTestKind::Assume));
1871 
1872     MPM.addPass(buildCoroWrapper(ThinOrFullLTOPhase::FullLTOPostLink));
1873 
1874     invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1875 
1876     // Emit annotation remarks.
1877     addAnnotationRemarksPass(MPM);
1878 
1879     return MPM;
1880   }
1881 
1882   if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1883     // Load sample profile before running the LTO optimization pipeline.
1884     MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
1885                                         PGOOpt->ProfileRemappingFile,
1886                                         ThinOrFullLTOPhase::FullLTOPostLink));
1887     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1888     // RequireAnalysisPass for PSI before subsequent non-module passes.
1889     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1890   }
1891 
1892   // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1893   MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1894 
1895   // Remove unused virtual tables to improve the quality of code generated by
1896   // whole-program devirtualization and bitset lowering.
1897   MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
1898 
1899   // Do basic inference of function attributes from known properties of system
1900   // libraries and other oracles.
1901   MPM.addPass(InferFunctionAttrsPass());
1902 
1903   if (Level.getSpeedupLevel() > 1) {
1904     MPM.addPass(createModuleToFunctionPassAdaptor(
1905         CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
1906 
1907     // Indirect call promotion. This should promote all the targets that are
1908     // left by the earlier promotion pass that promotes intra-module targets.
1909     // This two-step promotion is to save the compile time. For LTO, it should
1910     // produce the same result as if we only do promotion here.
1911     MPM.addPass(PGOIndirectCallPromotion(
1912         true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1913 
1914     // Promoting by-reference arguments to by-value exposes more constants to
1915     // IPSCCP.
1916     CGSCCPassManager CGPM;
1917     CGPM.addPass(PostOrderFunctionAttrsPass());
1918     CGPM.addPass(ArgumentPromotionPass());
1919     CGPM.addPass(
1920         createCGSCCToFunctionPassAdaptor(SROAPass(SROAOptions::ModifyCFG)));
1921     MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
1922 
1923     // Propagate constants at call sites into the functions they call.  This
1924     // opens opportunities for globalopt (and inlining) by substituting function
1925     // pointers passed as arguments to direct uses of functions.
1926     MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1927                                          Level != OptimizationLevel::Os &&
1928                                          Level != OptimizationLevel::Oz)));
1929 
1930     // Attach metadata to indirect call sites indicating the set of functions
1931     // they may target at run-time. This should follow IPSCCP.
1932     MPM.addPass(CalledValuePropagationPass());
1933   }
1934 
1935   // Do RPO function attribute inference across the module to forward-propagate
1936   // attributes where applicable.
1937   // FIXME: Is this really an optimization rather than a canonicalization?
1938   MPM.addPass(ReversePostOrderFunctionAttrsPass());
1939 
1940   // Use in-range annotations on GEP indices to split globals where beneficial.
1941   MPM.addPass(GlobalSplitPass());
1942 
1943   // Run whole program optimization of virtual call when the list of callees
1944   // is fixed.
1945   MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
1946 
1947   // Stop here at -O1.
1948   if (Level == OptimizationLevel::O1) {
1949     // The LowerTypeTestsPass needs to run to lower type metadata and the
1950     // type.test intrinsics. The pass does nothing if CFI is disabled.
1951     MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
1952     // Run a second time to clean up any type tests left behind by WPD for use
1953     // in ICP (which is performed earlier than this in the regular LTO
1954     // pipeline).
1955     MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
1956                                    lowertypetests::DropTestKind::Assume));
1957 
1958     MPM.addPass(buildCoroWrapper(ThinOrFullLTOPhase::FullLTOPostLink));
1959 
1960     invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1961 
1962     // Emit annotation remarks.
1963     addAnnotationRemarksPass(MPM);
1964 
1965     return MPM;
1966   }
1967 
1968   // TODO: Skip to match buildCoroWrapper.
1969   MPM.addPass(CoroEarlyPass());
1970 
1971   // Optimize globals to try and fold them into constants.
1972   MPM.addPass(GlobalOptPass());
1973 
1974   // Promote any localized globals to SSA registers.
1975   MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
1976 
1977   // Linking modules together can lead to duplicate global constant, only
1978   // keep one copy of each constant.
1979   MPM.addPass(ConstantMergePass());
1980 
1981   // Remove unused arguments from functions.
1982   MPM.addPass(DeadArgumentEliminationPass());
1983 
1984   // Reduce the code after globalopt and ipsccp.  Both can open up significant
1985   // simplification opportunities, and both can propagate functions through
1986   // function pointers.  When this happens, we often have to resolve varargs
1987   // calls, etc, so let instcombine do this.
1988   FunctionPassManager PeepholeFPM;
1989   PeepholeFPM.addPass(InstCombinePass());
1990   if (Level.getSpeedupLevel() > 1)
1991     PeepholeFPM.addPass(AggressiveInstCombinePass());
1992   invokePeepholeEPCallbacks(PeepholeFPM, Level);
1993 
1994   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
1995                                                 PTO.EagerlyInvalidateAnalyses));
1996 
1997   // Lower variadic functions for supported targets prior to inlining.
1998   MPM.addPass(ExpandVariadicsPass(ExpandVariadicsMode::Optimize));
1999 
2000   // Note: historically, the PruneEH pass was run first to deduce nounwind and
2001   // generally clean up exception handling overhead. It isn't clear this is
2002   // valuable as the inliner doesn't currently care whether it is inlining an
2003   // invoke or a call.
2004   // Run the inliner now.
2005   if (EnableModuleInliner) {
2006     MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
2007                                   UseInlineAdvisor,
2008                                   ThinOrFullLTOPhase::FullLTOPostLink));
2009   } else {
2010     MPM.addPass(ModuleInlinerWrapperPass(
2011         getInlineParamsFromOptLevel(Level),
2012         /* MandatoryFirst */ true,
2013         InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,
2014                       InlinePass::CGSCCInliner}));
2015   }
2016 
2017   // Perform context disambiguation after inlining, since that would reduce the
2018   // amount of additional cloning required to distinguish the allocation
2019   // contexts.
2020   if (EnableMemProfContextDisambiguation)
2021     MPM.addPass(MemProfContextDisambiguation(
2022         /*Summary=*/nullptr,
2023         PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2024 
2025   // Optimize globals again after we ran the inliner.
2026   MPM.addPass(GlobalOptPass());
2027 
2028   // Run the OpenMPOpt pass again after global optimizations.
2029   MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
2030 
2031   // Garbage collect dead functions.
2032   MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2033 
2034   // If we didn't decide to inline a function, check to see if we can
2035   // transform it to pass arguments by value instead of by reference.
2036   CGSCCPassManager CGPM;
2037   CGPM.addPass(ArgumentPromotionPass());
2038   CGPM.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
2039   CGPM.addPass(CoroAnnotationElidePass());
2040   MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2041 
2042   FunctionPassManager FPM;
2043   // The IPO Passes may leave cruft around. Clean up after them.
2044   FPM.addPass(InstCombinePass());
2045   invokePeepholeEPCallbacks(FPM, Level);
2046 
2047   if (EnableConstraintElimination)
2048     FPM.addPass(ConstraintEliminationPass());
2049 
2050   FPM.addPass(JumpThreadingPass());
2051 
2052   // Do a post inline PGO instrumentation and use pass. This is a context
2053   // sensitive PGO pass.
2054   if (PGOOpt) {
2055     if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2056       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2057                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2058                         PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
2059                         PGOOpt->FS);
2060     else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2061       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2062                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2063                         PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
2064                         PGOOpt->FS);
2065   }
2066 
2067   // Break up allocas
2068   FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
2069 
2070   // LTO provides additional opportunities for tailcall elimination due to
2071   // link-time inlining, and visibility of nocapture attribute.
2072   FPM.addPass(
2073       TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2074 
2075   // Run a few AA driver optimizations here and now to cleanup the code.
2076   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2077                                                 PTO.EagerlyInvalidateAnalyses));
2078 
2079   MPM.addPass(
2080       createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
2081 
2082   // Require the GlobalsAA analysis for the module so we can query it within
2083   // MainFPM.
2084   if (EnableGlobalAnalyses) {
2085     MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
2086     // Invalidate AAManager so it can be recreated and pick up the newly
2087     // available GlobalsAA.
2088     MPM.addPass(
2089         createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
2090   }
2091 
2092   FunctionPassManager MainFPM;
2093   MainFPM.addPass(createFunctionToLoopPassAdaptor(
2094       LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2095                /*AllowSpeculation=*/true),
2096       /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
2097 
2098   if (RunNewGVN)
2099     MainFPM.addPass(NewGVNPass());
2100   else
2101     MainFPM.addPass(GVNPass());
2102 
2103   // Remove dead memcpy()'s.
2104   MainFPM.addPass(MemCpyOptPass());
2105 
2106   // Nuke dead stores.
2107   MainFPM.addPass(DSEPass());
2108   MainFPM.addPass(MoveAutoInitPass());
2109   MainFPM.addPass(MergedLoadStoreMotionPass());
2110 
2111   invokeVectorizerStartEPCallbacks(MainFPM, Level);
2112 
2113   LoopPassManager LPM;
2114   if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2115     LPM.addPass(LoopFlattenPass());
2116   LPM.addPass(IndVarSimplifyPass());
2117   LPM.addPass(LoopDeletionPass());
2118   // FIXME: Add loop interchange.
2119 
2120   // Unroll small loops and perform peeling.
2121   LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2122                                  /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2123                                  PTO.ForgetAllSCEVInLoopUnroll));
2124   // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2125   // *All* loop passes must preserve it, in order to be able to use it.
2126   MainFPM.addPass(createFunctionToLoopPassAdaptor(
2127       std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
2128 
2129   MainFPM.addPass(LoopDistributePass());
2130 
2131   addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
2132 
2133   invokeVectorizerEndEPCallbacks(MainFPM, Level);
2134 
2135   // Run the OpenMPOpt CGSCC pass again late.
2136   MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
2137       OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
2138 
2139   invokePeepholeEPCallbacks(MainFPM, Level);
2140   MainFPM.addPass(JumpThreadingPass());
2141   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2142                                                 PTO.EagerlyInvalidateAnalyses));
2143 
2144   // Lower type metadata and the type.test intrinsic. This pass supports
2145   // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2146   // to be run at link time if CFI is enabled. This pass does nothing if
2147   // CFI is disabled.
2148   MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2149   // Run a second time to clean up any type tests left behind by WPD for use
2150   // in ICP (which is performed earlier than this in the regular LTO pipeline).
2151   MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
2152                                  lowertypetests::DropTestKind::Assume));
2153 
2154   // Enable splitting late in the FullLTO post-link pipeline.
2155   if (EnableHotColdSplit)
2156     MPM.addPass(HotColdSplittingPass());
2157 
2158   // Add late LTO optimization passes.
2159   FunctionPassManager LateFPM;
2160 
2161   // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2162   // canonicalization pass that enables other optimizations. As a result,
2163   // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2164   // result too early.
2165   LateFPM.addPass(LoopSinkPass());
2166 
2167   // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2168   // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2169   // flattening of blocks.
2170   LateFPM.addPass(DivRemPairsPass());
2171 
2172   // Delete basic blocks, which optimization passes may have killed.
2173   LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
2174                                       .convertSwitchRangeToICmp(true)
2175                                       .hoistCommonInsts(true)
2176                                       .speculateUnpredictables(true)));
2177   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2178 
2179   // Drop bodies of available eternally objects to improve GlobalDCE.
2180   MPM.addPass(EliminateAvailableExternallyPass());
2181 
2182   // Now that we have optimized the program, discard unreachable functions.
2183   MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2184 
2185   if (PTO.MergeFunctions)
2186     MPM.addPass(MergeFunctionsPass());
2187 
2188   MPM.addPass(RelLookupTableConverterPass());
2189 
2190   if (PTO.CallGraphProfile)
2191     MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2192 
2193   MPM.addPass(CoroCleanupPass());
2194 
2195   invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2196 
2197   // Emit annotation remarks.
2198   addAnnotationRemarksPass(MPM);
2199 
2200   return MPM;
2201 }
2202 
2203 ModulePassManager
buildO0DefaultPipeline(OptimizationLevel Level,ThinOrFullLTOPhase Phase)2204 PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
2205                                     ThinOrFullLTOPhase Phase) {
2206   assert(Level == OptimizationLevel::O0 &&
2207          "buildO0DefaultPipeline should only be used with O0");
2208 
2209   ModulePassManager MPM;
2210 
2211   // Perform pseudo probe instrumentation in O0 mode. This is for the
2212   // consistency between different build modes. For example, a LTO build can be
2213   // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2214   // the postlink will require pseudo probe instrumentation in the prelink.
2215   if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2216     MPM.addPass(SampleProfileProbePass(TM));
2217 
2218   if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2219                  PGOOpt->Action == PGOOptions::IRUse))
2220     addPGOInstrPassesForO0(
2221         MPM,
2222         /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2223         /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2224         PGOOpt->ProfileRemappingFile, PGOOpt->FS);
2225 
2226   // Instrument function entry and exit before all inlining.
2227   MPM.addPass(createModuleToFunctionPassAdaptor(
2228       EntryExitInstrumenterPass(/*PostInlining=*/false)));
2229 
2230   invokePipelineStartEPCallbacks(MPM, Level);
2231 
2232   if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2233     MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
2234 
2235   if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2236     // Explicitly disable sample loader inlining and use flattened profile in O0
2237     // pipeline.
2238     MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2239                                         PGOOpt->ProfileRemappingFile,
2240                                         ThinOrFullLTOPhase::None, nullptr,
2241                                         /*DisableSampleProfileInlining=*/true,
2242                                         /*UseFlattenedProfile=*/true));
2243     // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2244     // RequireAnalysisPass for PSI before subsequent non-module passes.
2245     MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
2246   }
2247 
2248   invokePipelineEarlySimplificationEPCallbacks(MPM, Level, Phase);
2249 
2250   // Build a minimal pipeline based on the semantics required by LLVM,
2251   // which is just that always inlining occurs. Further, disable generating
2252   // lifetime intrinsics to avoid enabling further optimizations during
2253   // code generation.
2254   MPM.addPass(AlwaysInlinerPass(
2255       /*InsertLifetimeIntrinsics=*/false));
2256 
2257   if (PTO.MergeFunctions)
2258     MPM.addPass(MergeFunctionsPass());
2259 
2260   if (EnableMatrix)
2261     MPM.addPass(
2262         createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
2263 
2264   if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2265     CGSCCPassManager CGPM;
2266     invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
2267     if (!CGPM.isEmpty())
2268       MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
2269   }
2270   if (!LateLoopOptimizationsEPCallbacks.empty()) {
2271     LoopPassManager LPM;
2272     invokeLateLoopOptimizationsEPCallbacks(LPM, Level);
2273     if (!LPM.isEmpty()) {
2274       MPM.addPass(createModuleToFunctionPassAdaptor(
2275           createFunctionToLoopPassAdaptor(std::move(LPM))));
2276     }
2277   }
2278   if (!LoopOptimizerEndEPCallbacks.empty()) {
2279     LoopPassManager LPM;
2280     invokeLoopOptimizerEndEPCallbacks(LPM, Level);
2281     if (!LPM.isEmpty()) {
2282       MPM.addPass(createModuleToFunctionPassAdaptor(
2283           createFunctionToLoopPassAdaptor(std::move(LPM))));
2284     }
2285   }
2286   if (!ScalarOptimizerLateEPCallbacks.empty()) {
2287     FunctionPassManager FPM;
2288     invokeScalarOptimizerLateEPCallbacks(FPM, Level);
2289     if (!FPM.isEmpty())
2290       MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2291   }
2292 
2293   invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase);
2294 
2295   if (!VectorizerStartEPCallbacks.empty()) {
2296     FunctionPassManager FPM;
2297     invokeVectorizerStartEPCallbacks(FPM, Level);
2298     if (!FPM.isEmpty())
2299       MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2300   }
2301 
2302   if (!VectorizerEndEPCallbacks.empty()) {
2303     FunctionPassManager FPM;
2304     invokeVectorizerEndEPCallbacks(FPM, Level);
2305     if (!FPM.isEmpty())
2306       MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2307   }
2308 
2309   MPM.addPass(buildCoroWrapper(Phase));
2310 
2311   invokeOptimizerLastEPCallbacks(MPM, Level, Phase);
2312 
2313   if (isLTOPreLink(Phase))
2314     addRequiredLTOPreLinkPasses(MPM);
2315 
2316   MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
2317 
2318   return MPM;
2319 }
2320 
buildDefaultAAPipeline()2321 AAManager PassBuilder::buildDefaultAAPipeline() {
2322   AAManager AA;
2323 
2324   // The order in which these are registered determines their priority when
2325   // being queried.
2326 
2327   // Add any target-specific alias analyses that should be run early.
2328   if (TM)
2329     TM->registerEarlyDefaultAliasAnalyses(AA);
2330 
2331   // First we register the basic alias analysis that provides the majority of
2332   // per-function local AA logic. This is a stateless, on-demand local set of
2333   // AA techniques.
2334   AA.registerFunctionAnalysis<BasicAA>();
2335 
2336   // Next we query fast, specialized alias analyses that wrap IR-embedded
2337   // information about aliasing.
2338   AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2339   AA.registerFunctionAnalysis<TypeBasedAA>();
2340 
2341   // Add support for querying global aliasing information when available.
2342   // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2343   // analysis, all that the `AAManager` can do is query for any *cached*
2344   // results from `GlobalsAA` through a readonly proxy.
2345   if (EnableGlobalAnalyses)
2346     AA.registerModuleAnalysis<GlobalsAA>();
2347 
2348   // Add target-specific alias analyses.
2349   if (TM)
2350     TM->registerDefaultAliasAnalyses(AA);
2351 
2352   return AA;
2353 }
2354 
isInstrumentedPGOUse() const2355 bool PassBuilder::isInstrumentedPGOUse() const {
2356   return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2357          !UseCtxProfile.empty();
2358 }