1 //===- Construction of pass pipelines -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file provides the implementation of the PassBuilder based on our 11 /// static pass registry as well as related functionality. It also provides 12 /// helpers to aid in analyzing, debugging, and testing passes and pass 13 /// pipelines. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/ADT/Statistic.h" 18 #include "llvm/Analysis/AliasAnalysis.h" 19 #include "llvm/Analysis/BasicAliasAnalysis.h" 20 #include "llvm/Analysis/CGSCCPassManager.h" 21 #include "llvm/Analysis/GlobalsModRef.h" 22 #include "llvm/Analysis/InlineAdvisor.h" 23 #include "llvm/Analysis/ProfileSummaryInfo.h" 24 #include "llvm/Analysis/ScopedNoAliasAA.h" 25 #include "llvm/Analysis/TypeBasedAliasAnalysis.h" 26 #include "llvm/IR/PassManager.h" 27 #include "llvm/Passes/OptimizationLevel.h" 28 #include "llvm/Passes/PassBuilder.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/PGOOptions.h" 32 #include "llvm/Support/VirtualFileSystem.h" 33 #include "llvm/Target/TargetMachine.h" 34 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" 35 #include "llvm/Transforms/Coroutines/CoroCleanup.h" 36 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h" 37 #include "llvm/Transforms/Coroutines/CoroEarly.h" 38 #include "llvm/Transforms/Coroutines/CoroElide.h" 39 #include "llvm/Transforms/Coroutines/CoroSplit.h" 40 #include "llvm/Transforms/IPO/AlwaysInliner.h" 41 #include "llvm/Transforms/IPO/Annotation2Metadata.h" 42 #include "llvm/Transforms/IPO/ArgumentPromotion.h" 43 #include "llvm/Transforms/IPO/Attributor.h" 44 #include "llvm/Transforms/IPO/CalledValuePropagation.h" 45 #include "llvm/Transforms/IPO/ConstantMerge.h" 46 #include "llvm/Transforms/IPO/CrossDSOCFI.h" 47 #include "llvm/Transforms/IPO/DeadArgumentElimination.h" 48 #include "llvm/Transforms/IPO/ElimAvailExtern.h" 49 #include "llvm/Transforms/IPO/EmbedBitcodePass.h" 50 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" 51 #include "llvm/Transforms/IPO/FunctionAttrs.h" 52 #include "llvm/Transforms/IPO/GlobalDCE.h" 53 #include "llvm/Transforms/IPO/GlobalOpt.h" 54 #include "llvm/Transforms/IPO/GlobalSplit.h" 55 #include "llvm/Transforms/IPO/HotColdSplitting.h" 56 #include "llvm/Transforms/IPO/IROutliner.h" 57 #include "llvm/Transforms/IPO/InferFunctionAttrs.h" 58 #include "llvm/Transforms/IPO/Inliner.h" 59 #include "llvm/Transforms/IPO/LowerTypeTests.h" 60 #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" 61 #include "llvm/Transforms/IPO/MergeFunctions.h" 62 #include "llvm/Transforms/IPO/ModuleInliner.h" 63 #include "llvm/Transforms/IPO/OpenMPOpt.h" 64 #include "llvm/Transforms/IPO/PartialInlining.h" 65 #include "llvm/Transforms/IPO/SCCP.h" 66 #include "llvm/Transforms/IPO/SampleProfile.h" 67 #include "llvm/Transforms/IPO/SampleProfileProbe.h" 68 #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" 69 #include "llvm/Transforms/IPO/WholeProgramDevirt.h" 70 #include "llvm/Transforms/InstCombine/InstCombine.h" 71 #include "llvm/Transforms/Instrumentation/CGProfile.h" 72 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 73 #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" 74 #include "llvm/Transforms/Instrumentation/InstrProfiling.h" 75 #include "llvm/Transforms/Instrumentation/MemProfiler.h" 76 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 77 #include "llvm/Transforms/Scalar/ADCE.h" 78 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" 79 #include "llvm/Transforms/Scalar/AnnotationRemarks.h" 80 #include "llvm/Transforms/Scalar/BDCE.h" 81 #include "llvm/Transforms/Scalar/CallSiteSplitting.h" 82 #include "llvm/Transforms/Scalar/ConstraintElimination.h" 83 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" 84 #include "llvm/Transforms/Scalar/DFAJumpThreading.h" 85 #include "llvm/Transforms/Scalar/DeadStoreElimination.h" 86 #include "llvm/Transforms/Scalar/DivRemPairs.h" 87 #include "llvm/Transforms/Scalar/EarlyCSE.h" 88 #include "llvm/Transforms/Scalar/Float2Int.h" 89 #include "llvm/Transforms/Scalar/GVN.h" 90 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 91 #include "llvm/Transforms/Scalar/InstSimplifyPass.h" 92 #include "llvm/Transforms/Scalar/JumpThreading.h" 93 #include "llvm/Transforms/Scalar/LICM.h" 94 #include "llvm/Transforms/Scalar/LoopDeletion.h" 95 #include "llvm/Transforms/Scalar/LoopDistribute.h" 96 #include "llvm/Transforms/Scalar/LoopFlatten.h" 97 #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" 98 #include "llvm/Transforms/Scalar/LoopInstSimplify.h" 99 #include "llvm/Transforms/Scalar/LoopInterchange.h" 100 #include "llvm/Transforms/Scalar/LoopLoadElimination.h" 101 #include "llvm/Transforms/Scalar/LoopPassManager.h" 102 #include "llvm/Transforms/Scalar/LoopRotation.h" 103 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" 104 #include "llvm/Transforms/Scalar/LoopSink.h" 105 #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" 106 #include "llvm/Transforms/Scalar/LoopUnrollPass.h" 107 #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" 108 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" 109 #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" 110 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" 111 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" 112 #include "llvm/Transforms/Scalar/NewGVN.h" 113 #include "llvm/Transforms/Scalar/Reassociate.h" 114 #include "llvm/Transforms/Scalar/SCCP.h" 115 #include "llvm/Transforms/Scalar/SROA.h" 116 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" 117 #include "llvm/Transforms/Scalar/SimplifyCFG.h" 118 #include "llvm/Transforms/Scalar/SpeculativeExecution.h" 119 #include "llvm/Transforms/Scalar/TailRecursionElimination.h" 120 #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" 121 #include "llvm/Transforms/Utils/AddDiscriminators.h" 122 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 123 #include "llvm/Transforms/Utils/CanonicalizeAliases.h" 124 #include "llvm/Transforms/Utils/CountVisits.h" 125 #include "llvm/Transforms/Utils/InjectTLIMappings.h" 126 #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" 127 #include "llvm/Transforms/Utils/Mem2Reg.h" 128 #include "llvm/Transforms/Utils/MoveAutoInit.h" 129 #include "llvm/Transforms/Utils/NameAnonGlobals.h" 130 #include "llvm/Transforms/Utils/RelLookupTableConverter.h" 131 #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" 132 #include "llvm/Transforms/Vectorize/LoopVectorize.h" 133 #include "llvm/Transforms/Vectorize/SLPVectorizer.h" 134 #include "llvm/Transforms/Vectorize/VectorCombine.h" 135 136 using namespace llvm; 137 138 static cl::opt<InliningAdvisorMode> UseInlineAdvisor( 139 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, 140 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), 141 cl::values(clEnumValN(InliningAdvisorMode::Default, "default", 142 "Heuristics-based inliner version"), 143 clEnumValN(InliningAdvisorMode::Development, "development", 144 "Use development mode (runtime-loadable model)"), 145 clEnumValN(InliningAdvisorMode::Release, "release", 146 "Use release mode (AOT-compiled model)"))); 147 148 static cl::opt<bool> EnableSyntheticCounts( 149 "enable-npm-synthetic-counts", cl::Hidden, 150 cl::desc("Run synthetic function entry count generation " 151 "pass")); 152 153 /// Flag to enable inline deferral during PGO. 154 static cl::opt<bool> 155 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), 156 cl::Hidden, 157 cl::desc("Enable inline deferral during PGO")); 158 159 static cl::opt<bool> EnableModuleInliner("enable-module-inliner", 160 cl::init(false), cl::Hidden, 161 cl::desc("Enable module inliner")); 162 163 static cl::opt<bool> PerformMandatoryInliningsFirst( 164 "mandatory-inlining-first", cl::init(true), cl::Hidden, 165 cl::desc("Perform mandatory inlinings module-wide, before performing " 166 "inlining")); 167 168 static cl::opt<bool> EnableEagerlyInvalidateAnalyses( 169 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden, 170 cl::desc("Eagerly invalidate more analyses in default pipelines")); 171 172 static cl::opt<bool> EnableMergeFunctions( 173 "enable-merge-functions", cl::init(false), cl::Hidden, 174 cl::desc("Enable function merging as part of the optimization pipeline")); 175 176 static cl::opt<bool> EnablePostPGOLoopRotation( 177 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, 178 cl::desc("Run the loop rotation transformation after PGO instrumentation")); 179 180 static cl::opt<bool> EnableGlobalAnalyses( 181 "enable-global-analyses", cl::init(true), cl::Hidden, 182 cl::desc("Enable inter-procedural analyses")); 183 184 static cl::opt<bool> 185 RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, 186 cl::desc("Run Partial inlinining pass")); 187 188 static cl::opt<bool> ExtraVectorizerPasses( 189 "extra-vectorizer-passes", cl::init(false), cl::Hidden, 190 cl::desc("Run cleanup optimization passes after vectorization")); 191 192 static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, 193 cl::desc("Run the NewGVN pass")); 194 195 static cl::opt<bool> EnableLoopInterchange( 196 "enable-loopinterchange", cl::init(false), cl::Hidden, 197 cl::desc("Enable the experimental LoopInterchange Pass")); 198 199 static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam", 200 cl::init(false), cl::Hidden, 201 cl::desc("Enable Unroll And Jam Pass")); 202 203 static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false), 204 cl::Hidden, 205 cl::desc("Enable the LoopFlatten Pass")); 206 207 static cl::opt<bool> 208 EnableDFAJumpThreading("enable-dfa-jump-thread", 209 cl::desc("Enable DFA jump threading"), 210 cl::init(false), cl::Hidden); 211 212 static cl::opt<bool> 213 EnableHotColdSplit("hot-cold-split", 214 cl::desc("Enable hot-cold splitting pass")); 215 216 static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false), 217 cl::Hidden, 218 cl::desc("Enable ir outliner pass")); 219 220 static cl::opt<bool> 221 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, 222 cl::desc("Disable pre-instrumentation inliner")); 223 224 static cl::opt<int> PreInlineThreshold( 225 "preinline-threshold", cl::Hidden, cl::init(75), 226 cl::desc("Control the amount of inlining in pre-instrumentation inliner " 227 "(default = 75)")); 228 229 static cl::opt<bool> 230 EnableGVNHoist("enable-gvn-hoist", 231 cl::desc("Enable the GVN hoisting pass (default = off)")); 232 233 static cl::opt<bool> 234 EnableGVNSink("enable-gvn-sink", 235 cl::desc("Enable the GVN sinking pass (default = off)")); 236 237 // This option is used in simplifying testing SampleFDO optimizations for 238 // profile loading. 239 static cl::opt<bool> 240 EnableCHR("enable-chr", cl::init(true), cl::Hidden, 241 cl::desc("Enable control height reduction optimization (CHR)")); 242 243 static cl::opt<bool> FlattenedProfileUsed( 244 "flattened-profile-used", cl::init(false), cl::Hidden, 245 cl::desc("Indicate the sample profile being used is flattened, i.e., " 246 "no inline hierachy exists in the profile")); 247 248 static cl::opt<bool> EnableOrderFileInstrumentation( 249 "enable-order-file-instrumentation", cl::init(false), cl::Hidden, 250 cl::desc("Enable order file instrumentation (default = off)")); 251 252 static cl::opt<bool> 253 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, 254 cl::desc("Enable lowering of the matrix intrinsics")); 255 256 static cl::opt<bool> EnableConstraintElimination( 257 "enable-constraint-elimination", cl::init(true), cl::Hidden, 258 cl::desc( 259 "Enable pass to eliminate conditions based on linear constraints")); 260 261 static cl::opt<AttributorRunOption> AttributorRun( 262 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), 263 cl::desc("Enable the attributor inter-procedural deduction pass"), 264 cl::values(clEnumValN(AttributorRunOption::ALL, "all", 265 "enable all attributor runs"), 266 clEnumValN(AttributorRunOption::MODULE, "module", 267 "enable module-wide attributor runs"), 268 clEnumValN(AttributorRunOption::CGSCC, "cgscc", 269 "enable call graph SCC attributor runs"), 270 clEnumValN(AttributorRunOption::NONE, "none", 271 "disable attributor runs"))); 272 273 cl::opt<bool> EnableMemProfContextDisambiguation( 274 "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, 275 cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation")); 276 277 PipelineTuningOptions::PipelineTuningOptions() { 278 LoopInterleaving = true; 279 LoopVectorization = true; 280 SLPVectorization = false; 281 LoopUnrolling = true; 282 ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; 283 LicmMssaOptCap = SetLicmMssaOptCap; 284 LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; 285 CallGraphProfile = true; 286 UnifiedLTO = false; 287 MergeFunctions = EnableMergeFunctions; 288 InlinerThreshold = -1; 289 EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; 290 } 291 292 namespace llvm { 293 extern cl::opt<unsigned> MaxDevirtIterations; 294 extern cl::opt<bool> EnableKnowledgeRetention; 295 } // namespace llvm 296 297 void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM, 298 OptimizationLevel Level) { 299 for (auto &C : PeepholeEPCallbacks) 300 C(FPM, Level); 301 } 302 void PassBuilder::invokeLateLoopOptimizationsEPCallbacks( 303 LoopPassManager &LPM, OptimizationLevel Level) { 304 for (auto &C : LateLoopOptimizationsEPCallbacks) 305 C(LPM, Level); 306 } 307 void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, 308 OptimizationLevel Level) { 309 for (auto &C : LoopOptimizerEndEPCallbacks) 310 C(LPM, Level); 311 } 312 void PassBuilder::invokeScalarOptimizerLateEPCallbacks( 313 FunctionPassManager &FPM, OptimizationLevel Level) { 314 for (auto &C : ScalarOptimizerLateEPCallbacks) 315 C(FPM, Level); 316 } 317 void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, 318 OptimizationLevel Level) { 319 for (auto &C : CGSCCOptimizerLateEPCallbacks) 320 C(CGPM, Level); 321 } 322 void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, 323 OptimizationLevel Level) { 324 for (auto &C : VectorizerStartEPCallbacks) 325 C(FPM, Level); 326 } 327 void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, 328 OptimizationLevel Level) { 329 for (auto &C : OptimizerEarlyEPCallbacks) 330 C(MPM, Level); 331 } 332 void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, 333 OptimizationLevel Level) { 334 for (auto &C : OptimizerLastEPCallbacks) 335 C(MPM, Level); 336 } 337 void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks( 338 ModulePassManager &MPM, OptimizationLevel Level) { 339 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks) 340 C(MPM, Level); 341 } 342 void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks( 343 ModulePassManager &MPM, OptimizationLevel Level) { 344 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks) 345 C(MPM, Level); 346 } 347 void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM, 348 OptimizationLevel Level) { 349 for (auto &C : PipelineStartEPCallbacks) 350 C(MPM, Level); 351 } 352 void PassBuilder::invokePipelineEarlySimplificationEPCallbacks( 353 ModulePassManager &MPM, OptimizationLevel Level) { 354 for (auto &C : PipelineEarlySimplificationEPCallbacks) 355 C(MPM, Level); 356 } 357 358 // Helper to add AnnotationRemarksPass. 359 static void addAnnotationRemarksPass(ModulePassManager &MPM) { 360 MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); 361 } 362 363 // Helper to check if the current compilation phase is preparing for LTO 364 static bool isLTOPreLink(ThinOrFullLTOPhase Phase) { 365 return Phase == ThinOrFullLTOPhase::ThinLTOPreLink || 366 Phase == ThinOrFullLTOPhase::FullLTOPreLink; 367 } 368 369 // TODO: Investigate the cost/benefit of tail call elimination on debugging. 370 FunctionPassManager 371 PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, 372 ThinOrFullLTOPhase Phase) { 373 374 FunctionPassManager FPM; 375 376 if (AreStatisticsEnabled()) 377 FPM.addPass(CountVisitsPass()); 378 379 // Form SSA out of local memory accesses after breaking apart aggregates into 380 // scalars. 381 FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 382 383 // Catch trivial redundancies 384 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); 385 386 // Hoisting of scalars and load expressions. 387 FPM.addPass( 388 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 389 FPM.addPass(InstCombinePass()); 390 391 FPM.addPass(LibCallsShrinkWrapPass()); 392 393 invokePeepholeEPCallbacks(FPM, Level); 394 395 FPM.addPass( 396 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 397 398 // Form canonically associated expression trees, and simplify the trees using 399 // basic mathematical properties. For example, this will form (nearly) 400 // minimal multiplication trees. 401 FPM.addPass(ReassociatePass()); 402 403 // Add the primary loop simplification pipeline. 404 // FIXME: Currently this is split into two loop pass pipelines because we run 405 // some function passes in between them. These can and should be removed 406 // and/or replaced by scheduling the loop pass equivalents in the correct 407 // positions. But those equivalent passes aren't powerful enough yet. 408 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still 409 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to 410 // fully replace `SimplifyCFGPass`, and the closest to the other we have is 411 // `LoopInstSimplify`. 412 LoopPassManager LPM1, LPM2; 413 414 // Simplify the loop body. We do this initially to clean up after other loop 415 // passes run, either when iterating on a loop or on inner loops with 416 // implications on the outer loop. 417 LPM1.addPass(LoopInstSimplifyPass()); 418 LPM1.addPass(LoopSimplifyCFGPass()); 419 420 // Try to remove as much code from the loop header as possible, 421 // to reduce amount of IR that will have to be duplicated. However, 422 // do not perform speculative hoisting the first time as LICM 423 // will destroy metadata that may not need to be destroyed if run 424 // after loop rotation. 425 // TODO: Investigate promotion cap for O1. 426 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 427 /*AllowSpeculation=*/false)); 428 429 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, 430 isLTOPreLink(Phase))); 431 // TODO: Investigate promotion cap for O1. 432 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 433 /*AllowSpeculation=*/true)); 434 LPM1.addPass(SimpleLoopUnswitchPass()); 435 if (EnableLoopFlatten) 436 LPM1.addPass(LoopFlattenPass()); 437 438 LPM2.addPass(LoopIdiomRecognizePass()); 439 LPM2.addPass(IndVarSimplifyPass()); 440 441 invokeLateLoopOptimizationsEPCallbacks(LPM2, Level); 442 443 LPM2.addPass(LoopDeletionPass()); 444 445 if (EnableLoopInterchange) 446 LPM2.addPass(LoopInterchangePass()); 447 448 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO 449 // because it changes IR to makes profile annotation in back compile 450 // inaccurate. The normal unroller doesn't pay attention to forced full unroll 451 // attributes so we need to make sure and allow the full unroll pass to pay 452 // attention to it. 453 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || 454 PGOOpt->Action != PGOOptions::SampleUse) 455 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), 456 /* OnlyWhenForced= */ !PTO.LoopUnrolling, 457 PTO.ForgetAllSCEVInLoopUnroll)); 458 459 invokeLoopOptimizerEndEPCallbacks(LPM2, Level); 460 461 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), 462 /*UseMemorySSA=*/true, 463 /*UseBlockFrequencyInfo=*/true)); 464 FPM.addPass( 465 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 466 FPM.addPass(InstCombinePass()); 467 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. 468 // *All* loop passes must preserve it, in order to be able to use it. 469 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), 470 /*UseMemorySSA=*/false, 471 /*UseBlockFrequencyInfo=*/false)); 472 473 // Delete small array after loop unroll. 474 FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 475 476 // Specially optimize memory movement as it doesn't look like dataflow in SSA. 477 FPM.addPass(MemCpyOptPass()); 478 479 // Sparse conditional constant propagation. 480 // FIXME: It isn't clear why we do this *after* loop passes rather than 481 // before... 482 FPM.addPass(SCCPPass()); 483 484 // Delete dead bit computations (instcombine runs after to fold away the dead 485 // computations, and then ADCE will run later to exploit any new DCE 486 // opportunities that creates). 487 FPM.addPass(BDCEPass()); 488 489 // Run instcombine after redundancy and dead bit elimination to exploit 490 // opportunities opened up by them. 491 FPM.addPass(InstCombinePass()); 492 invokePeepholeEPCallbacks(FPM, Level); 493 494 FPM.addPass(CoroElidePass()); 495 496 invokeScalarOptimizerLateEPCallbacks(FPM, Level); 497 498 // Finally, do an expensive DCE pass to catch all the dead code exposed by 499 // the simplifications and basic cleanup after all the simplifications. 500 // TODO: Investigate if this is too expensive. 501 FPM.addPass(ADCEPass()); 502 FPM.addPass( 503 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 504 FPM.addPass(InstCombinePass()); 505 invokePeepholeEPCallbacks(FPM, Level); 506 507 return FPM; 508 } 509 510 FunctionPassManager 511 PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, 512 ThinOrFullLTOPhase Phase) { 513 assert(Level != OptimizationLevel::O0 && "Must request optimizations!"); 514 515 // The O1 pipeline has a separate pipeline creation function to simplify 516 // construction readability. 517 if (Level.getSpeedupLevel() == 1) 518 return buildO1FunctionSimplificationPipeline(Level, Phase); 519 520 FunctionPassManager FPM; 521 522 if (AreStatisticsEnabled()) 523 FPM.addPass(CountVisitsPass()); 524 525 // Form SSA out of local memory accesses after breaking apart aggregates into 526 // scalars. 527 FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 528 529 // Catch trivial redundancies 530 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); 531 if (EnableKnowledgeRetention) 532 FPM.addPass(AssumeSimplifyPass()); 533 534 // Hoisting of scalars and load expressions. 535 if (EnableGVNHoist) 536 FPM.addPass(GVNHoistPass()); 537 538 // Global value numbering based sinking. 539 if (EnableGVNSink) { 540 FPM.addPass(GVNSinkPass()); 541 FPM.addPass( 542 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 543 } 544 545 // Speculative execution if the target has divergent branches; otherwise nop. 546 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true)); 547 548 // Optimize based on known information about branches, and cleanup afterward. 549 FPM.addPass(JumpThreadingPass()); 550 FPM.addPass(CorrelatedValuePropagationPass()); 551 552 FPM.addPass( 553 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 554 FPM.addPass(InstCombinePass()); 555 FPM.addPass(AggressiveInstCombinePass()); 556 557 if (EnableConstraintElimination) 558 FPM.addPass(ConstraintEliminationPass()); 559 560 if (!Level.isOptimizingForSize()) 561 FPM.addPass(LibCallsShrinkWrapPass()); 562 563 invokePeepholeEPCallbacks(FPM, Level); 564 565 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy 566 // using the size value profile. Don't perform this when optimizing for size. 567 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && 568 !Level.isOptimizingForSize()) 569 FPM.addPass(PGOMemOPSizeOpt()); 570 571 FPM.addPass(TailCallElimPass()); 572 FPM.addPass( 573 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 574 575 // Form canonically associated expression trees, and simplify the trees using 576 // basic mathematical properties. For example, this will form (nearly) 577 // minimal multiplication trees. 578 FPM.addPass(ReassociatePass()); 579 580 // Add the primary loop simplification pipeline. 581 // FIXME: Currently this is split into two loop pass pipelines because we run 582 // some function passes in between them. These can and should be removed 583 // and/or replaced by scheduling the loop pass equivalents in the correct 584 // positions. But those equivalent passes aren't powerful enough yet. 585 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still 586 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to 587 // fully replace `SimplifyCFGPass`, and the closest to the other we have is 588 // `LoopInstSimplify`. 589 LoopPassManager LPM1, LPM2; 590 591 // Simplify the loop body. We do this initially to clean up after other loop 592 // passes run, either when iterating on a loop or on inner loops with 593 // implications on the outer loop. 594 LPM1.addPass(LoopInstSimplifyPass()); 595 LPM1.addPass(LoopSimplifyCFGPass()); 596 597 // Try to remove as much code from the loop header as possible, 598 // to reduce amount of IR that will have to be duplicated. However, 599 // do not perform speculative hoisting the first time as LICM 600 // will destroy metadata that may not need to be destroyed if run 601 // after loop rotation. 602 // TODO: Investigate promotion cap for O1. 603 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 604 /*AllowSpeculation=*/false)); 605 606 // Disable header duplication in loop rotation at -Oz. 607 LPM1.addPass( 608 LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase))); 609 // TODO: Investigate promotion cap for O1. 610 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 611 /*AllowSpeculation=*/true)); 612 LPM1.addPass( 613 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); 614 if (EnableLoopFlatten) 615 LPM1.addPass(LoopFlattenPass()); 616 617 LPM2.addPass(LoopIdiomRecognizePass()); 618 LPM2.addPass(IndVarSimplifyPass()); 619 620 invokeLateLoopOptimizationsEPCallbacks(LPM2, Level); 621 622 LPM2.addPass(LoopDeletionPass()); 623 624 if (EnableLoopInterchange) 625 LPM2.addPass(LoopInterchangePass()); 626 627 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO 628 // because it changes IR to makes profile annotation in back compile 629 // inaccurate. The normal unroller doesn't pay attention to forced full unroll 630 // attributes so we need to make sure and allow the full unroll pass to pay 631 // attention to it. 632 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || 633 PGOOpt->Action != PGOOptions::SampleUse) 634 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), 635 /* OnlyWhenForced= */ !PTO.LoopUnrolling, 636 PTO.ForgetAllSCEVInLoopUnroll)); 637 638 invokeLoopOptimizerEndEPCallbacks(LPM2, Level); 639 640 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), 641 /*UseMemorySSA=*/true, 642 /*UseBlockFrequencyInfo=*/true)); 643 FPM.addPass( 644 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 645 FPM.addPass(InstCombinePass()); 646 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, 647 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. 648 // *All* loop passes must preserve it, in order to be able to use it. 649 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), 650 /*UseMemorySSA=*/false, 651 /*UseBlockFrequencyInfo=*/false)); 652 653 // Delete small array after loop unroll. 654 FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 655 656 // Try vectorization/scalarization transforms that are both improvements 657 // themselves and can allow further folds with GVN and InstCombine. 658 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); 659 660 // Eliminate redundancies. 661 FPM.addPass(MergedLoadStoreMotionPass()); 662 if (RunNewGVN) 663 FPM.addPass(NewGVNPass()); 664 else 665 FPM.addPass(GVNPass()); 666 667 // Sparse conditional constant propagation. 668 // FIXME: It isn't clear why we do this *after* loop passes rather than 669 // before... 670 FPM.addPass(SCCPPass()); 671 672 // Delete dead bit computations (instcombine runs after to fold away the dead 673 // computations, and then ADCE will run later to exploit any new DCE 674 // opportunities that creates). 675 FPM.addPass(BDCEPass()); 676 677 // Run instcombine after redundancy and dead bit elimination to exploit 678 // opportunities opened up by them. 679 FPM.addPass(InstCombinePass()); 680 invokePeepholeEPCallbacks(FPM, Level); 681 682 // Re-consider control flow based optimizations after redundancy elimination, 683 // redo DCE, etc. 684 if (EnableDFAJumpThreading && Level.getSizeLevel() == 0) 685 FPM.addPass(DFAJumpThreadingPass()); 686 687 FPM.addPass(JumpThreadingPass()); 688 FPM.addPass(CorrelatedValuePropagationPass()); 689 690 // Finally, do an expensive DCE pass to catch all the dead code exposed by 691 // the simplifications and basic cleanup after all the simplifications. 692 // TODO: Investigate if this is too expensive. 693 FPM.addPass(ADCEPass()); 694 695 // Specially optimize memory movement as it doesn't look like dataflow in SSA. 696 FPM.addPass(MemCpyOptPass()); 697 698 FPM.addPass(DSEPass()); 699 FPM.addPass(MoveAutoInitPass()); 700 701 FPM.addPass(createFunctionToLoopPassAdaptor( 702 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 703 /*AllowSpeculation=*/true), 704 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); 705 706 FPM.addPass(CoroElidePass()); 707 708 invokeScalarOptimizerLateEPCallbacks(FPM, Level); 709 710 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() 711 .convertSwitchRangeToICmp(true) 712 .hoistCommonInsts(true) 713 .sinkCommonInsts(true))); 714 FPM.addPass(InstCombinePass()); 715 invokePeepholeEPCallbacks(FPM, Level); 716 717 return FPM; 718 } 719 720 void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) { 721 MPM.addPass(CanonicalizeAliasesPass()); 722 MPM.addPass(NameAnonGlobalPass()); 723 } 724 725 void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, 726 OptimizationLevel Level, bool RunProfileGen, 727 bool IsCS, std::string ProfileFile, 728 std::string ProfileRemappingFile, 729 ThinOrFullLTOPhase LTOPhase, 730 IntrusiveRefCntPtr<vfs::FileSystem> FS) { 731 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!"); 732 if (!IsCS && !DisablePreInliner) { 733 InlineParams IP; 734 735 IP.DefaultThreshold = PreInlineThreshold; 736 737 // FIXME: The hint threshold has the same value used by the regular inliner 738 // when not optimzing for size. This should probably be lowered after 739 // performance testing. 740 // FIXME: this comment is cargo culted from the old pass manager, revisit). 741 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325; 742 ModuleInlinerWrapperPass MIWP( 743 IP, /* MandatoryFirst */ true, 744 InlineContext{LTOPhase, InlinePass::EarlyInliner}); 745 CGSCCPassManager &CGPipeline = MIWP.getPM(); 746 747 FunctionPassManager FPM; 748 FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 749 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. 750 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( 751 true))); // Merge & remove basic blocks. 752 FPM.addPass(InstCombinePass()); // Combine silly sequences. 753 invokePeepholeEPCallbacks(FPM, Level); 754 755 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor( 756 std::move(FPM), PTO.EagerlyInvalidateAnalyses)); 757 758 MPM.addPass(std::move(MIWP)); 759 760 // Delete anything that is now dead to make sure that we don't instrument 761 // dead code. Instrumentation can end up keeping dead code around and 762 // dramatically increase code size. 763 MPM.addPass(GlobalDCEPass()); 764 } 765 766 if (!RunProfileGen) { 767 assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); 768 MPM.addPass( 769 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); 770 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 771 // RequireAnalysisPass for PSI before subsequent non-module passes. 772 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 773 return; 774 } 775 776 // Perform PGO instrumentation. 777 MPM.addPass(PGOInstrumentationGen(IsCS)); 778 779 if (EnablePostPGOLoopRotation) { 780 // Disable header duplication in loop rotation at -Oz. 781 MPM.addPass(createModuleToFunctionPassAdaptor( 782 createFunctionToLoopPassAdaptor( 783 LoopRotatePass(Level != OptimizationLevel::Oz), 784 /*UseMemorySSA=*/false, 785 /*UseBlockFrequencyInfo=*/false), 786 PTO.EagerlyInvalidateAnalyses)); 787 } 788 789 // Add the profile lowering pass. 790 InstrProfOptions Options; 791 if (!ProfileFile.empty()) 792 Options.InstrProfileOutput = ProfileFile; 793 // Do counter promotion at Level greater than O0. 794 Options.DoCounterPromotion = true; 795 Options.UseBFIInPromotion = IsCS; 796 MPM.addPass(InstrProfiling(Options, IsCS)); 797 } 798 799 void PassBuilder::addPGOInstrPassesForO0( 800 ModulePassManager &MPM, bool RunProfileGen, bool IsCS, 801 std::string ProfileFile, std::string ProfileRemappingFile, 802 IntrusiveRefCntPtr<vfs::FileSystem> FS) { 803 if (!RunProfileGen) { 804 assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); 805 MPM.addPass( 806 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS)); 807 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 808 // RequireAnalysisPass for PSI before subsequent non-module passes. 809 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 810 return; 811 } 812 813 // Perform PGO instrumentation. 814 MPM.addPass(PGOInstrumentationGen(IsCS)); 815 // Add the profile lowering pass. 816 InstrProfOptions Options; 817 if (!ProfileFile.empty()) 818 Options.InstrProfileOutput = ProfileFile; 819 // Do not do counter promotion at O0. 820 Options.DoCounterPromotion = false; 821 Options.UseBFIInPromotion = IsCS; 822 MPM.addPass(InstrProfiling(Options, IsCS)); 823 } 824 825 static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) { 826 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel()); 827 } 828 829 ModuleInlinerWrapperPass 830 PassBuilder::buildInlinerPipeline(OptimizationLevel Level, 831 ThinOrFullLTOPhase Phase) { 832 InlineParams IP; 833 if (PTO.InlinerThreshold == -1) 834 IP = getInlineParamsFromOptLevel(Level); 835 else 836 IP = getInlineParams(PTO.InlinerThreshold); 837 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to 838 // disable hot callsite inline (as much as possible [1]) because it makes 839 // profile annotation in the backend inaccurate. 840 // 841 // [1] Note the cost of a function could be below zero due to erased 842 // prologue / epilogue. 843 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && 844 PGOOpt->Action == PGOOptions::SampleUse) 845 IP.HotCallSiteThreshold = 0; 846 847 if (PGOOpt) 848 IP.EnableDeferral = EnablePGOInlineDeferral; 849 850 ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst, 851 InlineContext{Phase, InlinePass::CGSCCInliner}, 852 UseInlineAdvisor, MaxDevirtIterations); 853 854 // Require the GlobalsAA analysis for the module so we can query it within 855 // the CGSCC pipeline. 856 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>()); 857 // Invalidate AAManager so it can be recreated and pick up the newly available 858 // GlobalsAA. 859 MIWP.addModulePass( 860 createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>())); 861 862 // Require the ProfileSummaryAnalysis for the module so we can query it within 863 // the inliner pass. 864 MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 865 866 // Now begin the main postorder CGSCC pipeline. 867 // FIXME: The current CGSCC pipeline has its origins in the legacy pass 868 // manager and trying to emulate its precise behavior. Much of this doesn't 869 // make a lot of sense and we should revisit the core CGSCC structure. 870 CGSCCPassManager &MainCGPipeline = MIWP.getPM(); 871 872 // Note: historically, the PruneEH pass was run first to deduce nounwind and 873 // generally clean up exception handling overhead. It isn't clear this is 874 // valuable as the inliner doesn't currently care whether it is inlining an 875 // invoke or a call. 876 877 if (AttributorRun & AttributorRunOption::CGSCC) 878 MainCGPipeline.addPass(AttributorCGSCCPass()); 879 880 // Deduce function attributes. We do another run of this after the function 881 // simplification pipeline, so this only needs to run when it could affect the 882 // function simplification pipeline, which is only the case with recursive 883 // functions. 884 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true)); 885 886 // When at O3 add argument promotion to the pass pipeline. 887 // FIXME: It isn't at all clear why this should be limited to O3. 888 if (Level == OptimizationLevel::O3) 889 MainCGPipeline.addPass(ArgumentPromotionPass()); 890 891 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if 892 // there are no OpenMP runtime calls present in the module. 893 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) 894 MainCGPipeline.addPass(OpenMPOptCGSCCPass()); 895 896 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level); 897 898 // Add the core function simplification pipeline nested inside the 899 // CGSCC walk. 900 MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( 901 buildFunctionSimplificationPipeline(Level, Phase), 902 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true)); 903 904 // Finally, deduce any function attributes based on the fully simplified 905 // function. 906 MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); 907 908 // Mark that the function is fully simplified and that it shouldn't be 909 // simplified again if we somehow revisit it due to CGSCC mutations unless 910 // it's been modified since. 911 MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( 912 RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>())); 913 914 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0)); 915 916 // Make sure we don't affect potential future NoRerun CGSCC adaptors. 917 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor( 918 InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>())); 919 920 return MIWP; 921 } 922 923 ModulePassManager 924 PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, 925 ThinOrFullLTOPhase Phase) { 926 ModulePassManager MPM; 927 928 InlineParams IP = getInlineParamsFromOptLevel(Level); 929 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to 930 // disable hot callsite inline (as much as possible [1]) because it makes 931 // profile annotation in the backend inaccurate. 932 // 933 // [1] Note the cost of a function could be below zero due to erased 934 // prologue / epilogue. 935 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && 936 PGOOpt->Action == PGOOptions::SampleUse) 937 IP.HotCallSiteThreshold = 0; 938 939 if (PGOOpt) 940 IP.EnableDeferral = EnablePGOInlineDeferral; 941 942 // The inline deferral logic is used to avoid losing some 943 // inlining chance in future. It is helpful in SCC inliner, in which 944 // inlining is processed in bottom-up order. 945 // While in module inliner, the inlining order is a priority-based order 946 // by default. The inline deferral is unnecessary there. So we disable the 947 // inline deferral logic in module inliner. 948 IP.EnableDeferral = false; 949 950 MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase)); 951 952 MPM.addPass(createModuleToFunctionPassAdaptor( 953 buildFunctionSimplificationPipeline(Level, Phase), 954 PTO.EagerlyInvalidateAnalyses)); 955 956 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( 957 CoroSplitPass(Level != OptimizationLevel::O0))); 958 959 return MPM; 960 } 961 962 ModulePassManager 963 PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, 964 ThinOrFullLTOPhase Phase) { 965 assert(Level != OptimizationLevel::O0 && 966 "Should not be used for O0 pipeline"); 967 968 assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink && 969 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!"); 970 971 ModulePassManager MPM; 972 973 // Place pseudo probe instrumentation as the first pass of the pipeline to 974 // minimize the impact of optimization changes. 975 if (PGOOpt && PGOOpt->PseudoProbeForProfiling && 976 Phase != ThinOrFullLTOPhase::ThinLTOPostLink) 977 MPM.addPass(SampleProfileProbePass(TM)); 978 979 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); 980 981 // In ThinLTO mode, when flattened profile is used, all the available 982 // profile information will be annotated in PreLink phase so there is 983 // no need to load the profile again in PostLink. 984 bool LoadSampleProfile = 985 HasSampleProfile && 986 !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink); 987 988 // During the ThinLTO backend phase we perform early indirect call promotion 989 // here, before globalopt. Otherwise imported available_externally functions 990 // look unreferenced and are removed. If we are going to load the sample 991 // profile then defer until later. 992 // TODO: See if we can move later and consolidate with the location where 993 // we perform ICP when we are loading a sample profile. 994 // TODO: We pass HasSampleProfile (whether there was a sample profile file 995 // passed to the compile) to the SamplePGO flag of ICP. This is used to 996 // determine whether the new direct calls are annotated with prof metadata. 997 // Ideally this should be determined from whether the IR is annotated with 998 // sample profile, and not whether the a sample profile was provided on the 999 // command line. E.g. for flattened profiles where we will not be reloading 1000 // the sample profile in the ThinLTO backend, we ideally shouldn't have to 1001 // provide the sample profile file. 1002 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile) 1003 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile)); 1004 1005 // Create an early function pass manager to cleanup the output of the 1006 // frontend. Not necessary with LTO post link pipelines since the pre link 1007 // pipeline already cleaned up the frontend output. 1008 if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) { 1009 // Do basic inference of function attributes from known properties of system 1010 // libraries and other oracles. 1011 MPM.addPass(InferFunctionAttrsPass()); 1012 MPM.addPass(CoroEarlyPass()); 1013 1014 FunctionPassManager EarlyFPM; 1015 // Lower llvm.expect to metadata before attempting transforms. 1016 // Compare/branch metadata may alter the behavior of passes like 1017 // SimplifyCFG. 1018 EarlyFPM.addPass(LowerExpectIntrinsicPass()); 1019 EarlyFPM.addPass(SimplifyCFGPass()); 1020 EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 1021 EarlyFPM.addPass(EarlyCSEPass()); 1022 if (Level == OptimizationLevel::O3) 1023 EarlyFPM.addPass(CallSiteSplittingPass()); 1024 MPM.addPass(createModuleToFunctionPassAdaptor( 1025 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses)); 1026 } 1027 1028 if (LoadSampleProfile) { 1029 // Annotate sample profile right after early FPM to ensure freshness of 1030 // the debug info. 1031 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, 1032 PGOOpt->ProfileRemappingFile, Phase)); 1033 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 1034 // RequireAnalysisPass for PSI before subsequent non-module passes. 1035 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 1036 // Do not invoke ICP in the LTOPrelink phase as it makes it hard 1037 // for the profile annotation to be accurate in the LTO backend. 1038 if (!isLTOPreLink(Phase)) 1039 // We perform early indirect call promotion here, before globalopt. 1040 // This is important for the ThinLTO backend phase because otherwise 1041 // imported available_externally functions look unreferenced and are 1042 // removed. 1043 MPM.addPass( 1044 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */)); 1045 } 1046 1047 // Try to perform OpenMP specific optimizations on the module. This is a 1048 // (quick!) no-op if there are no OpenMP runtime calls present in the module. 1049 MPM.addPass(OpenMPOptPass()); 1050 1051 if (AttributorRun & AttributorRunOption::MODULE) 1052 MPM.addPass(AttributorPass()); 1053 1054 // Lower type metadata and the type.test intrinsic in the ThinLTO 1055 // post link pipeline after ICP. This is to enable usage of the type 1056 // tests in ICP sequences. 1057 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink) 1058 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1059 1060 invokePipelineEarlySimplificationEPCallbacks(MPM, Level); 1061 1062 // Interprocedural constant propagation now that basic cleanup has occurred 1063 // and prior to optimizing globals. 1064 // FIXME: This position in the pipeline hasn't been carefully considered in 1065 // years, it should be re-analyzed. 1066 MPM.addPass(IPSCCPPass( 1067 IPSCCPOptions(/*AllowFuncSpec=*/ 1068 Level != OptimizationLevel::Os && 1069 Level != OptimizationLevel::Oz && 1070 !isLTOPreLink(Phase)))); 1071 1072 // Attach metadata to indirect call sites indicating the set of functions 1073 // they may target at run-time. This should follow IPSCCP. 1074 MPM.addPass(CalledValuePropagationPass()); 1075 1076 // Optimize globals to try and fold them into constants. 1077 MPM.addPass(GlobalOptPass()); 1078 1079 // Create a small function pass pipeline to cleanup after all the global 1080 // optimizations. 1081 FunctionPassManager GlobalCleanupPM; 1082 // FIXME: Should this instead by a run of SROA? 1083 GlobalCleanupPM.addPass(PromotePass()); 1084 GlobalCleanupPM.addPass(InstCombinePass()); 1085 invokePeepholeEPCallbacks(GlobalCleanupPM, Level); 1086 GlobalCleanupPM.addPass( 1087 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 1088 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM), 1089 PTO.EagerlyInvalidateAnalyses)); 1090 1091 // Add all the requested passes for instrumentation PGO, if requested. 1092 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && 1093 (PGOOpt->Action == PGOOptions::IRInstr || 1094 PGOOpt->Action == PGOOptions::IRUse)) { 1095 addPGOInstrPasses(MPM, Level, 1096 /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, 1097 /* IsCS */ false, PGOOpt->ProfileFile, 1098 PGOOpt->ProfileRemappingFile, Phase, PGOOpt->FS); 1099 MPM.addPass(PGOIndirectCallPromotion(false, false)); 1100 } 1101 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && 1102 PGOOpt->CSAction == PGOOptions::CSIRInstr) 1103 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile)); 1104 1105 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && 1106 !PGOOpt->MemoryProfile.empty()) 1107 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS)); 1108 1109 // Synthesize function entry counts for non-PGO compilation. 1110 if (EnableSyntheticCounts && !PGOOpt) 1111 MPM.addPass(SyntheticCountsPropagation()); 1112 1113 if (EnableModuleInliner) 1114 MPM.addPass(buildModuleInlinerPipeline(Level, Phase)); 1115 else 1116 MPM.addPass(buildInlinerPipeline(Level, Phase)); 1117 1118 // Remove any dead arguments exposed by cleanups, constant folding globals, 1119 // and argument promotion. 1120 MPM.addPass(DeadArgumentEliminationPass()); 1121 1122 MPM.addPass(CoroCleanupPass()); 1123 1124 // Optimize globals now that functions are fully simplified. 1125 MPM.addPass(GlobalOptPass()); 1126 MPM.addPass(GlobalDCEPass()); 1127 1128 return MPM; 1129 } 1130 1131 /// TODO: Should LTO cause any differences to this set of passes? 1132 void PassBuilder::addVectorPasses(OptimizationLevel Level, 1133 FunctionPassManager &FPM, bool IsFullLTO) { 1134 FPM.addPass(LoopVectorizePass( 1135 LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); 1136 1137 if (IsFullLTO) { 1138 // The vectorizer may have significantly shortened a loop body; unroll 1139 // again. Unroll small loops to hide loop backedge latency and saturate any 1140 // parallel execution resources of an out-of-order processor. We also then 1141 // need to clean up redundancies and loop invariant code. 1142 // FIXME: It would be really good to use a loop-integrated instruction 1143 // combiner for cleanup here so that the unrolling and LICM can be pipelined 1144 // across the loop nests. 1145 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll 1146 if (EnableUnrollAndJam && PTO.LoopUnrolling) 1147 FPM.addPass(createFunctionToLoopPassAdaptor( 1148 LoopUnrollAndJamPass(Level.getSpeedupLevel()))); 1149 FPM.addPass(LoopUnrollPass(LoopUnrollOptions( 1150 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, 1151 PTO.ForgetAllSCEVInLoopUnroll))); 1152 FPM.addPass(WarnMissedTransformationsPass()); 1153 // Now that we are done with loop unrolling, be it either by LoopVectorizer, 1154 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have 1155 // become constant-offset, thus enabling SROA and alloca promotion. Do so. 1156 // NOTE: we are very late in the pipeline, and we don't have any LICM 1157 // or SimplifyCFG passes scheduled after us, that would cleanup 1158 // the CFG mess this may created if allowed to modify CFG, so forbid that. 1159 FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); 1160 } 1161 1162 if (!IsFullLTO) { 1163 // Eliminate loads by forwarding stores from the previous iteration to loads 1164 // of the current iteration. 1165 FPM.addPass(LoopLoadEliminationPass()); 1166 } 1167 // Cleanup after the loop optimization passes. 1168 FPM.addPass(InstCombinePass()); 1169 1170 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { 1171 ExtraVectorPassManager ExtraPasses; 1172 // At higher optimization levels, try to clean up any runtime overlap and 1173 // alignment checks inserted by the vectorizer. We want to track correlated 1174 // runtime checks for two inner loops in the same outer loop, fold any 1175 // common computations, hoist loop-invariant aspects out of any outer loop, 1176 // and unswitch the runtime checks if possible. Once hoisted, we may have 1177 // dead (or speculatable) control flows or more combining opportunities. 1178 ExtraPasses.addPass(EarlyCSEPass()); 1179 ExtraPasses.addPass(CorrelatedValuePropagationPass()); 1180 ExtraPasses.addPass(InstCombinePass()); 1181 LoopPassManager LPM; 1182 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 1183 /*AllowSpeculation=*/true)); 1184 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == 1185 OptimizationLevel::O3)); 1186 ExtraPasses.addPass( 1187 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true, 1188 /*UseBlockFrequencyInfo=*/true)); 1189 ExtraPasses.addPass( 1190 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 1191 ExtraPasses.addPass(InstCombinePass()); 1192 FPM.addPass(std::move(ExtraPasses)); 1193 } 1194 1195 // Now that we've formed fast to execute loop structures, we do further 1196 // optimizations. These are run afterward as they might block doing complex 1197 // analyses and transforms such as what are needed for loop vectorization. 1198 1199 // Cleanup after loop vectorization, etc. Simplification passes like CVP and 1200 // GVN, loop transforms, and others have already run, so it's now better to 1201 // convert to more optimized IR using more aggressive simplify CFG options. 1202 // The extra sinking transform can create larger basic blocks, so do this 1203 // before SLP vectorization. 1204 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() 1205 .forwardSwitchCondToPhi(true) 1206 .convertSwitchRangeToICmp(true) 1207 .convertSwitchToLookupTable(true) 1208 .needCanonicalLoops(false) 1209 .hoistCommonInsts(true) 1210 .sinkCommonInsts(true))); 1211 1212 if (IsFullLTO) { 1213 FPM.addPass(SCCPPass()); 1214 FPM.addPass(InstCombinePass()); 1215 FPM.addPass(BDCEPass()); 1216 } 1217 1218 // Optimize parallel scalar instruction chains into SIMD instructions. 1219 if (PTO.SLPVectorization) { 1220 FPM.addPass(SLPVectorizerPass()); 1221 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { 1222 FPM.addPass(EarlyCSEPass()); 1223 } 1224 } 1225 // Enhance/cleanup vector code. 1226 FPM.addPass(VectorCombinePass()); 1227 1228 if (!IsFullLTO) { 1229 FPM.addPass(InstCombinePass()); 1230 // Unroll small loops to hide loop backedge latency and saturate any 1231 // parallel execution resources of an out-of-order processor. We also then 1232 // need to clean up redundancies and loop invariant code. 1233 // FIXME: It would be really good to use a loop-integrated instruction 1234 // combiner for cleanup here so that the unrolling and LICM can be pipelined 1235 // across the loop nests. 1236 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll 1237 if (EnableUnrollAndJam && PTO.LoopUnrolling) { 1238 FPM.addPass(createFunctionToLoopPassAdaptor( 1239 LoopUnrollAndJamPass(Level.getSpeedupLevel()))); 1240 } 1241 FPM.addPass(LoopUnrollPass(LoopUnrollOptions( 1242 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, 1243 PTO.ForgetAllSCEVInLoopUnroll))); 1244 FPM.addPass(WarnMissedTransformationsPass()); 1245 // Now that we are done with loop unrolling, be it either by LoopVectorizer, 1246 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have 1247 // become constant-offset, thus enabling SROA and alloca promotion. Do so. 1248 // NOTE: we are very late in the pipeline, and we don't have any LICM 1249 // or SimplifyCFG passes scheduled after us, that would cleanup 1250 // the CFG mess this may created if allowed to modify CFG, so forbid that. 1251 FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); 1252 } 1253 1254 FPM.addPass(InstCombinePass()); 1255 1256 // This is needed for two reasons: 1257 // 1. It works around problems that instcombine introduces, such as sinking 1258 // expensive FP divides into loops containing multiplications using the 1259 // divide result. 1260 // 2. It helps to clean up some loop-invariant code created by the loop 1261 // unroll pass when IsFullLTO=false. 1262 FPM.addPass(createFunctionToLoopPassAdaptor( 1263 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 1264 /*AllowSpeculation=*/true), 1265 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); 1266 1267 // Now that we've vectorized and unrolled loops, we may have more refined 1268 // alignment information, try to re-derive it here. 1269 FPM.addPass(AlignmentFromAssumptionsPass()); 1270 } 1271 1272 ModulePassManager 1273 PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, 1274 ThinOrFullLTOPhase LTOPhase) { 1275 const bool LTOPreLink = isLTOPreLink(LTOPhase); 1276 ModulePassManager MPM; 1277 1278 // Run partial inlining pass to partially inline functions that have 1279 // large bodies. 1280 if (RunPartialInlining) 1281 MPM.addPass(PartialInlinerPass()); 1282 1283 // Remove avail extern fns and globals definitions since we aren't compiling 1284 // an object file for later LTO. For LTO we want to preserve these so they 1285 // are eligible for inlining at link-time. Note if they are unreferenced they 1286 // will be removed by GlobalDCE later, so this only impacts referenced 1287 // available externally globals. Eventually they will be suppressed during 1288 // codegen, but eliminating here enables more opportunity for GlobalDCE as it 1289 // may make globals referenced by available external functions dead and saves 1290 // running remaining passes on the eliminated functions. These should be 1291 // preserved during prelinking for link-time inlining decisions. 1292 if (!LTOPreLink) 1293 MPM.addPass(EliminateAvailableExternallyPass()); 1294 1295 if (EnableOrderFileInstrumentation) 1296 MPM.addPass(InstrOrderFilePass()); 1297 1298 // Do RPO function attribute inference across the module to forward-propagate 1299 // attributes where applicable. 1300 // FIXME: Is this really an optimization rather than a canonicalization? 1301 MPM.addPass(ReversePostOrderFunctionAttrsPass()); 1302 1303 // Do a post inline PGO instrumentation and use pass. This is a context 1304 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as 1305 // cross-module inline has not been done yet. The context sensitive 1306 // instrumentation is after all the inlines are done. 1307 if (!LTOPreLink && PGOOpt) { 1308 if (PGOOpt->CSAction == PGOOptions::CSIRInstr) 1309 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, 1310 /* IsCS */ true, PGOOpt->CSProfileGenFile, 1311 PGOOpt->ProfileRemappingFile, LTOPhase, PGOOpt->FS); 1312 else if (PGOOpt->CSAction == PGOOptions::CSIRUse) 1313 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, 1314 /* IsCS */ true, PGOOpt->ProfileFile, 1315 PGOOpt->ProfileRemappingFile, LTOPhase, PGOOpt->FS); 1316 } 1317 1318 // Re-compute GlobalsAA here prior to function passes. This is particularly 1319 // useful as the above will have inlined, DCE'ed, and function-attr 1320 // propagated everything. We should at this point have a reasonably minimal 1321 // and richly annotated call graph. By computing aliasing and mod/ref 1322 // information for all local globals here, the late loop passes and notably 1323 // the vectorizer will be able to use them to help recognize vectorizable 1324 // memory operations. 1325 MPM.addPass(RecomputeGlobalsAAPass()); 1326 1327 invokeOptimizerEarlyEPCallbacks(MPM, Level); 1328 1329 FunctionPassManager OptimizePM; 1330 OptimizePM.addPass(Float2IntPass()); 1331 OptimizePM.addPass(LowerConstantIntrinsicsPass()); 1332 1333 if (EnableMatrix) { 1334 OptimizePM.addPass(LowerMatrixIntrinsicsPass()); 1335 OptimizePM.addPass(EarlyCSEPass()); 1336 } 1337 1338 // CHR pass should only be applied with the profile information. 1339 // The check is to check the profile summary information in CHR. 1340 if (EnableCHR && Level == OptimizationLevel::O3) 1341 OptimizePM.addPass(ControlHeightReductionPass()); 1342 1343 // FIXME: We need to run some loop optimizations to re-rotate loops after 1344 // simplifycfg and others undo their rotation. 1345 1346 // Optimize the loop execution. These passes operate on entire loop nests 1347 // rather than on each loop in an inside-out manner, and so they are actually 1348 // function passes. 1349 1350 invokeVectorizerStartEPCallbacks(OptimizePM, Level); 1351 1352 LoopPassManager LPM; 1353 // First rotate loops that may have been un-rotated by prior passes. 1354 // Disable header duplication at -Oz. 1355 LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink)); 1356 // Some loops may have become dead by now. Try to delete them. 1357 // FIXME: see discussion in https://reviews.llvm.org/D112851, 1358 // this may need to be revisited once we run GVN before loop deletion 1359 // in the simplification pipeline. 1360 LPM.addPass(LoopDeletionPass()); 1361 OptimizePM.addPass(createFunctionToLoopPassAdaptor( 1362 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); 1363 1364 // Distribute loops to allow partial vectorization. I.e. isolate dependences 1365 // into separate loop that would otherwise inhibit vectorization. This is 1366 // currently only performed for loops marked with the metadata 1367 // llvm.loop.distribute=true or when -enable-loop-distribute is specified. 1368 OptimizePM.addPass(LoopDistributePass()); 1369 1370 // Populates the VFABI attribute with the scalar-to-vector mappings 1371 // from the TargetLibraryInfo. 1372 OptimizePM.addPass(InjectTLIMappings()); 1373 1374 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false); 1375 1376 // LoopSink pass sinks instructions hoisted by LICM, which serves as a 1377 // canonicalization pass that enables other optimizations. As a result, 1378 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM 1379 // result too early. 1380 OptimizePM.addPass(LoopSinkPass()); 1381 1382 // And finally clean up LCSSA form before generating code. 1383 OptimizePM.addPass(InstSimplifyPass()); 1384 1385 // This hoists/decomposes div/rem ops. It should run after other sink/hoist 1386 // passes to avoid re-sinking, but before SimplifyCFG because it can allow 1387 // flattening of blocks. 1388 OptimizePM.addPass(DivRemPairsPass()); 1389 1390 // Try to annotate calls that were created during optimization. 1391 OptimizePM.addPass(TailCallElimPass()); 1392 1393 // LoopSink (and other loop passes since the last simplifyCFG) might have 1394 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. 1395 OptimizePM.addPass( 1396 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 1397 1398 // Add the core optimizing pipeline. 1399 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM), 1400 PTO.EagerlyInvalidateAnalyses)); 1401 1402 invokeOptimizerLastEPCallbacks(MPM, Level); 1403 1404 // Split out cold code. Splitting is done late to avoid hiding context from 1405 // other optimizations and inadvertently regressing performance. The tradeoff 1406 // is that this has a higher code size cost than splitting early. 1407 if (EnableHotColdSplit && !LTOPreLink) 1408 MPM.addPass(HotColdSplittingPass()); 1409 1410 // Search the code for similar regions of code. If enough similar regions can 1411 // be found where extracting the regions into their own function will decrease 1412 // the size of the program, we extract the regions, a deduplicate the 1413 // structurally similar regions. 1414 if (EnableIROutliner) 1415 MPM.addPass(IROutlinerPass()); 1416 1417 // Merge functions if requested. 1418 if (PTO.MergeFunctions) 1419 MPM.addPass(MergeFunctionsPass()); 1420 1421 // Now we need to do some global optimization transforms. 1422 // FIXME: It would seem like these should come first in the optimization 1423 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird 1424 // ordering here. 1425 MPM.addPass(GlobalDCEPass()); 1426 MPM.addPass(ConstantMergePass()); 1427 1428 if (PTO.CallGraphProfile && !LTOPreLink) 1429 MPM.addPass(CGProfilePass()); 1430 1431 // TODO: Relative look table converter pass caused an issue when full lto is 1432 // enabled. See https://reviews.llvm.org/D94355 for more details. 1433 // Until the issue fixed, disable this pass during pre-linking phase. 1434 if (!LTOPreLink) 1435 MPM.addPass(RelLookupTableConverterPass()); 1436 1437 return MPM; 1438 } 1439 1440 ModulePassManager 1441 PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, 1442 bool LTOPreLink) { 1443 if (Level == OptimizationLevel::O0) 1444 return buildO0DefaultPipeline(Level, LTOPreLink); 1445 1446 ModulePassManager MPM; 1447 1448 // Convert @llvm.global.annotations to !annotation metadata. 1449 MPM.addPass(Annotation2MetadataPass()); 1450 1451 // Force any function attributes we want the rest of the pipeline to observe. 1452 MPM.addPass(ForceFunctionAttrsPass()); 1453 1454 if (PGOOpt && PGOOpt->DebugInfoForProfiling) 1455 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); 1456 1457 // Apply module pipeline start EP callback. 1458 invokePipelineStartEPCallbacks(MPM, Level); 1459 1460 const ThinOrFullLTOPhase LTOPhase = LTOPreLink 1461 ? ThinOrFullLTOPhase::FullLTOPreLink 1462 : ThinOrFullLTOPhase::None; 1463 // Add the core simplification pipeline. 1464 MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase)); 1465 1466 // Now add the optimization pipeline. 1467 MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPhase)); 1468 1469 if (PGOOpt && PGOOpt->PseudoProbeForProfiling && 1470 PGOOpt->Action == PGOOptions::SampleUse) 1471 MPM.addPass(PseudoProbeUpdatePass()); 1472 1473 // Emit annotation remarks. 1474 addAnnotationRemarksPass(MPM); 1475 1476 if (LTOPreLink) 1477 addRequiredLTOPreLinkPasses(MPM); 1478 return MPM; 1479 } 1480 1481 ModulePassManager 1482 PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, 1483 bool EmitSummary) { 1484 ModulePassManager MPM; 1485 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary, 1486 ThinLTO 1487 ? buildThinLTOPreLinkDefaultPipeline(Level) 1488 : buildLTOPreLinkDefaultPipeline(Level))); 1489 MPM.addPass(buildPerModuleDefaultPipeline(Level)); 1490 return MPM; 1491 } 1492 1493 ModulePassManager 1494 PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { 1495 if (Level == OptimizationLevel::O0) 1496 return buildO0DefaultPipeline(Level, /*LTOPreLink*/true); 1497 1498 ModulePassManager MPM; 1499 1500 // Convert @llvm.global.annotations to !annotation metadata. 1501 MPM.addPass(Annotation2MetadataPass()); 1502 1503 // Force any function attributes we want the rest of the pipeline to observe. 1504 MPM.addPass(ForceFunctionAttrsPass()); 1505 1506 if (PGOOpt && PGOOpt->DebugInfoForProfiling) 1507 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); 1508 1509 // Apply module pipeline start EP callback. 1510 invokePipelineStartEPCallbacks(MPM, Level); 1511 1512 // If we are planning to perform ThinLTO later, we don't bloat the code with 1513 // unrolling/vectorization/... now. Just simplify the module as much as we 1514 // can. 1515 MPM.addPass(buildModuleSimplificationPipeline( 1516 Level, ThinOrFullLTOPhase::ThinLTOPreLink)); 1517 1518 // Run partial inlining pass to partially inline functions that have 1519 // large bodies. 1520 // FIXME: It isn't clear whether this is really the right place to run this 1521 // in ThinLTO. Because there is another canonicalization and simplification 1522 // phase that will run after the thin link, running this here ends up with 1523 // less information than will be available later and it may grow functions in 1524 // ways that aren't beneficial. 1525 if (RunPartialInlining) 1526 MPM.addPass(PartialInlinerPass()); 1527 1528 if (PGOOpt && PGOOpt->PseudoProbeForProfiling && 1529 PGOOpt->Action == PGOOptions::SampleUse) 1530 MPM.addPass(PseudoProbeUpdatePass()); 1531 1532 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual 1533 // optimization is going to be done in PostLink stage, but clang can't add 1534 // callbacks there in case of in-process ThinLTO called by linker. 1535 invokeOptimizerEarlyEPCallbacks(MPM, Level); 1536 invokeOptimizerLastEPCallbacks(MPM, Level); 1537 1538 // Emit annotation remarks. 1539 addAnnotationRemarksPass(MPM); 1540 1541 addRequiredLTOPreLinkPasses(MPM); 1542 1543 return MPM; 1544 } 1545 1546 ModulePassManager PassBuilder::buildThinLTODefaultPipeline( 1547 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) { 1548 ModulePassManager MPM; 1549 1550 if (ImportSummary) { 1551 // For ThinLTO we must apply the context disambiguation decisions early, to 1552 // ensure we can correctly match the callsites to summary data. 1553 if (EnableMemProfContextDisambiguation) 1554 MPM.addPass(MemProfContextDisambiguation(ImportSummary)); 1555 1556 // These passes import type identifier resolutions for whole-program 1557 // devirtualization and CFI. They must run early because other passes may 1558 // disturb the specific instruction patterns that these passes look for, 1559 // creating dependencies on resolutions that may not appear in the summary. 1560 // 1561 // For example, GVN may transform the pattern assume(type.test) appearing in 1562 // two basic blocks into assume(phi(type.test, type.test)), which would 1563 // transform a dependency on a WPD resolution into a dependency on a type 1564 // identifier resolution for CFI. 1565 // 1566 // Also, WPD has access to more precise information than ICP and can 1567 // devirtualize more effectively, so it should operate on the IR first. 1568 // 1569 // The WPD and LowerTypeTest passes need to run at -O0 to lower type 1570 // metadata and intrinsics. 1571 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary)); 1572 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary)); 1573 } 1574 1575 if (Level == OptimizationLevel::O0) { 1576 // Run a second time to clean up any type tests left behind by WPD for use 1577 // in ICP. 1578 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1579 // Drop available_externally and unreferenced globals. This is necessary 1580 // with ThinLTO in order to avoid leaving undefined references to dead 1581 // globals in the object file. 1582 MPM.addPass(EliminateAvailableExternallyPass()); 1583 MPM.addPass(GlobalDCEPass()); 1584 return MPM; 1585 } 1586 1587 // Add the core simplification pipeline. 1588 MPM.addPass(buildModuleSimplificationPipeline( 1589 Level, ThinOrFullLTOPhase::ThinLTOPostLink)); 1590 1591 // Now add the optimization pipeline. 1592 MPM.addPass(buildModuleOptimizationPipeline( 1593 Level, ThinOrFullLTOPhase::ThinLTOPostLink)); 1594 1595 // Emit annotation remarks. 1596 addAnnotationRemarksPass(MPM); 1597 1598 return MPM; 1599 } 1600 1601 ModulePassManager 1602 PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) { 1603 // FIXME: We should use a customized pre-link pipeline! 1604 return buildPerModuleDefaultPipeline(Level, 1605 /* LTOPreLink */ true); 1606 } 1607 1608 ModulePassManager 1609 PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, 1610 ModuleSummaryIndex *ExportSummary) { 1611 ModulePassManager MPM; 1612 1613 invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level); 1614 1615 // Create a function that performs CFI checks for cross-DSO calls with targets 1616 // in the current module. 1617 MPM.addPass(CrossDSOCFIPass()); 1618 1619 if (Level == OptimizationLevel::O0) { 1620 // The WPD and LowerTypeTest passes need to run at -O0 to lower type 1621 // metadata and intrinsics. 1622 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); 1623 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); 1624 // Run a second time to clean up any type tests left behind by WPD for use 1625 // in ICP. 1626 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1627 1628 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); 1629 1630 // Emit annotation remarks. 1631 addAnnotationRemarksPass(MPM); 1632 1633 return MPM; 1634 } 1635 1636 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { 1637 // Load sample profile before running the LTO optimization pipeline. 1638 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, 1639 PGOOpt->ProfileRemappingFile, 1640 ThinOrFullLTOPhase::FullLTOPostLink)); 1641 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 1642 // RequireAnalysisPass for PSI before subsequent non-module passes. 1643 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 1644 } 1645 1646 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present. 1647 MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); 1648 1649 // Remove unused virtual tables to improve the quality of code generated by 1650 // whole-program devirtualization and bitset lowering. 1651 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true)); 1652 1653 // Do basic inference of function attributes from known properties of system 1654 // libraries and other oracles. 1655 MPM.addPass(InferFunctionAttrsPass()); 1656 1657 if (Level.getSpeedupLevel() > 1) { 1658 MPM.addPass(createModuleToFunctionPassAdaptor( 1659 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses)); 1660 1661 // Indirect call promotion. This should promote all the targets that are 1662 // left by the earlier promotion pass that promotes intra-module targets. 1663 // This two-step promotion is to save the compile time. For LTO, it should 1664 // produce the same result as if we only do promotion here. 1665 MPM.addPass(PGOIndirectCallPromotion( 1666 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); 1667 1668 // Propagate constants at call sites into the functions they call. This 1669 // opens opportunities for globalopt (and inlining) by substituting function 1670 // pointers passed as arguments to direct uses of functions. 1671 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/ 1672 Level != OptimizationLevel::Os && 1673 Level != OptimizationLevel::Oz))); 1674 1675 // Attach metadata to indirect call sites indicating the set of functions 1676 // they may target at run-time. This should follow IPSCCP. 1677 MPM.addPass(CalledValuePropagationPass()); 1678 } 1679 1680 // Now deduce any function attributes based in the current code. 1681 MPM.addPass( 1682 createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); 1683 1684 // Do RPO function attribute inference across the module to forward-propagate 1685 // attributes where applicable. 1686 // FIXME: Is this really an optimization rather than a canonicalization? 1687 MPM.addPass(ReversePostOrderFunctionAttrsPass()); 1688 1689 // Use in-range annotations on GEP indices to split globals where beneficial. 1690 MPM.addPass(GlobalSplitPass()); 1691 1692 // Run whole program optimization of virtual call when the list of callees 1693 // is fixed. 1694 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); 1695 1696 // Stop here at -O1. 1697 if (Level == OptimizationLevel::O1) { 1698 // The LowerTypeTestsPass needs to run to lower type metadata and the 1699 // type.test intrinsics. The pass does nothing if CFI is disabled. 1700 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); 1701 // Run a second time to clean up any type tests left behind by WPD for use 1702 // in ICP (which is performed earlier than this in the regular LTO 1703 // pipeline). 1704 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1705 1706 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); 1707 1708 // Emit annotation remarks. 1709 addAnnotationRemarksPass(MPM); 1710 1711 return MPM; 1712 } 1713 1714 // Optimize globals to try and fold them into constants. 1715 MPM.addPass(GlobalOptPass()); 1716 1717 // Promote any localized globals to SSA registers. 1718 MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); 1719 1720 // Linking modules together can lead to duplicate global constant, only 1721 // keep one copy of each constant. 1722 MPM.addPass(ConstantMergePass()); 1723 1724 // Remove unused arguments from functions. 1725 MPM.addPass(DeadArgumentEliminationPass()); 1726 1727 // Reduce the code after globalopt and ipsccp. Both can open up significant 1728 // simplification opportunities, and both can propagate functions through 1729 // function pointers. When this happens, we often have to resolve varargs 1730 // calls, etc, so let instcombine do this. 1731 FunctionPassManager PeepholeFPM; 1732 PeepholeFPM.addPass(InstCombinePass()); 1733 if (Level.getSpeedupLevel() > 1) 1734 PeepholeFPM.addPass(AggressiveInstCombinePass()); 1735 invokePeepholeEPCallbacks(PeepholeFPM, Level); 1736 1737 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM), 1738 PTO.EagerlyInvalidateAnalyses)); 1739 1740 // Note: historically, the PruneEH pass was run first to deduce nounwind and 1741 // generally clean up exception handling overhead. It isn't clear this is 1742 // valuable as the inliner doesn't currently care whether it is inlining an 1743 // invoke or a call. 1744 // Run the inliner now. 1745 if (EnableModuleInliner) { 1746 MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level), 1747 UseInlineAdvisor, 1748 ThinOrFullLTOPhase::FullLTOPostLink)); 1749 } else { 1750 MPM.addPass(ModuleInlinerWrapperPass( 1751 getInlineParamsFromOptLevel(Level), 1752 /* MandatoryFirst */ true, 1753 InlineContext{ThinOrFullLTOPhase::FullLTOPostLink, 1754 InlinePass::CGSCCInliner})); 1755 } 1756 1757 // Perform context disambiguation after inlining, since that would reduce the 1758 // amount of additional cloning required to distinguish the allocation 1759 // contexts. 1760 if (EnableMemProfContextDisambiguation) 1761 MPM.addPass(MemProfContextDisambiguation()); 1762 1763 // Optimize globals again after we ran the inliner. 1764 MPM.addPass(GlobalOptPass()); 1765 1766 // Run the OpenMPOpt pass again after global optimizations. 1767 MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); 1768 1769 // Garbage collect dead functions. 1770 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true)); 1771 1772 // If we didn't decide to inline a function, check to see if we can 1773 // transform it to pass arguments by value instead of by reference. 1774 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass())); 1775 1776 FunctionPassManager FPM; 1777 // The IPO Passes may leave cruft around. Clean up after them. 1778 FPM.addPass(InstCombinePass()); 1779 invokePeepholeEPCallbacks(FPM, Level); 1780 1781 if (EnableConstraintElimination) 1782 FPM.addPass(ConstraintEliminationPass()); 1783 1784 FPM.addPass(JumpThreadingPass()); 1785 1786 // Do a post inline PGO instrumentation and use pass. This is a context 1787 // sensitive PGO pass. 1788 if (PGOOpt) { 1789 if (PGOOpt->CSAction == PGOOptions::CSIRInstr) 1790 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, 1791 /* IsCS */ true, PGOOpt->CSProfileGenFile, 1792 PGOOpt->ProfileRemappingFile, 1793 ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS); 1794 else if (PGOOpt->CSAction == PGOOptions::CSIRUse) 1795 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, 1796 /* IsCS */ true, PGOOpt->ProfileFile, 1797 PGOOpt->ProfileRemappingFile, 1798 ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS); 1799 } 1800 1801 // Break up allocas 1802 FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 1803 1804 // LTO provides additional opportunities for tailcall elimination due to 1805 // link-time inlining, and visibility of nocapture attribute. 1806 FPM.addPass(TailCallElimPass()); 1807 1808 // Run a few AA driver optimizations here and now to cleanup the code. 1809 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM), 1810 PTO.EagerlyInvalidateAnalyses)); 1811 1812 MPM.addPass( 1813 createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); 1814 1815 // Require the GlobalsAA analysis for the module so we can query it within 1816 // MainFPM. 1817 MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); 1818 // Invalidate AAManager so it can be recreated and pick up the newly available 1819 // GlobalsAA. 1820 MPM.addPass( 1821 createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>())); 1822 1823 FunctionPassManager MainFPM; 1824 MainFPM.addPass(createFunctionToLoopPassAdaptor( 1825 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 1826 /*AllowSpeculation=*/true), 1827 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false)); 1828 1829 if (RunNewGVN) 1830 MainFPM.addPass(NewGVNPass()); 1831 else 1832 MainFPM.addPass(GVNPass()); 1833 1834 // Remove dead memcpy()'s. 1835 MainFPM.addPass(MemCpyOptPass()); 1836 1837 // Nuke dead stores. 1838 MainFPM.addPass(DSEPass()); 1839 MainFPM.addPass(MoveAutoInitPass()); 1840 MainFPM.addPass(MergedLoadStoreMotionPass()); 1841 1842 LoopPassManager LPM; 1843 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1) 1844 LPM.addPass(LoopFlattenPass()); 1845 LPM.addPass(IndVarSimplifyPass()); 1846 LPM.addPass(LoopDeletionPass()); 1847 // FIXME: Add loop interchange. 1848 1849 // Unroll small loops and perform peeling. 1850 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), 1851 /* OnlyWhenForced= */ !PTO.LoopUnrolling, 1852 PTO.ForgetAllSCEVInLoopUnroll)); 1853 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA. 1854 // *All* loop passes must preserve it, in order to be able to use it. 1855 MainFPM.addPass(createFunctionToLoopPassAdaptor( 1856 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true)); 1857 1858 MainFPM.addPass(LoopDistributePass()); 1859 1860 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true); 1861 1862 // Run the OpenMPOpt CGSCC pass again late. 1863 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( 1864 OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink))); 1865 1866 invokePeepholeEPCallbacks(MainFPM, Level); 1867 MainFPM.addPass(JumpThreadingPass()); 1868 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM), 1869 PTO.EagerlyInvalidateAnalyses)); 1870 1871 // Lower type metadata and the type.test intrinsic. This pass supports 1872 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs 1873 // to be run at link time if CFI is enabled. This pass does nothing if 1874 // CFI is disabled. 1875 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); 1876 // Run a second time to clean up any type tests left behind by WPD for use 1877 // in ICP (which is performed earlier than this in the regular LTO pipeline). 1878 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1879 1880 // Enable splitting late in the FullLTO post-link pipeline. 1881 if (EnableHotColdSplit) 1882 MPM.addPass(HotColdSplittingPass()); 1883 1884 // Add late LTO optimization passes. 1885 FunctionPassManager LateFPM; 1886 1887 // LoopSink pass sinks instructions hoisted by LICM, which serves as a 1888 // canonicalization pass that enables other optimizations. As a result, 1889 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM 1890 // result too early. 1891 LateFPM.addPass(LoopSinkPass()); 1892 1893 // This hoists/decomposes div/rem ops. It should run after other sink/hoist 1894 // passes to avoid re-sinking, but before SimplifyCFG because it can allow 1895 // flattening of blocks. 1896 LateFPM.addPass(DivRemPairsPass()); 1897 1898 // Delete basic blocks, which optimization passes may have killed. 1899 LateFPM.addPass(SimplifyCFGPass( 1900 SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts( 1901 true))); 1902 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM))); 1903 1904 // Drop bodies of available eternally objects to improve GlobalDCE. 1905 MPM.addPass(EliminateAvailableExternallyPass()); 1906 1907 // Now that we have optimized the program, discard unreachable functions. 1908 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true)); 1909 1910 if (PTO.MergeFunctions) 1911 MPM.addPass(MergeFunctionsPass()); 1912 1913 if (PTO.CallGraphProfile) 1914 MPM.addPass(CGProfilePass()); 1915 1916 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); 1917 1918 // Emit annotation remarks. 1919 addAnnotationRemarksPass(MPM); 1920 1921 return MPM; 1922 } 1923 1924 ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, 1925 bool LTOPreLink) { 1926 assert(Level == OptimizationLevel::O0 && 1927 "buildO0DefaultPipeline should only be used with O0"); 1928 1929 ModulePassManager MPM; 1930 1931 // Perform pseudo probe instrumentation in O0 mode. This is for the 1932 // consistency between different build modes. For example, a LTO build can be 1933 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in 1934 // the postlink will require pseudo probe instrumentation in the prelink. 1935 if (PGOOpt && PGOOpt->PseudoProbeForProfiling) 1936 MPM.addPass(SampleProfileProbePass(TM)); 1937 1938 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || 1939 PGOOpt->Action == PGOOptions::IRUse)) 1940 addPGOInstrPassesForO0( 1941 MPM, 1942 /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr), 1943 /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, 1944 PGOOpt->FS); 1945 1946 invokePipelineStartEPCallbacks(MPM, Level); 1947 1948 if (PGOOpt && PGOOpt->DebugInfoForProfiling) 1949 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); 1950 1951 invokePipelineEarlySimplificationEPCallbacks(MPM, Level); 1952 1953 // Build a minimal pipeline based on the semantics required by LLVM, 1954 // which is just that always inlining occurs. Further, disable generating 1955 // lifetime intrinsics to avoid enabling further optimizations during 1956 // code generation. 1957 MPM.addPass(AlwaysInlinerPass( 1958 /*InsertLifetimeIntrinsics=*/false)); 1959 1960 if (PTO.MergeFunctions) 1961 MPM.addPass(MergeFunctionsPass()); 1962 1963 if (EnableMatrix) 1964 MPM.addPass( 1965 createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true))); 1966 1967 if (!CGSCCOptimizerLateEPCallbacks.empty()) { 1968 CGSCCPassManager CGPM; 1969 invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level); 1970 if (!CGPM.isEmpty()) 1971 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); 1972 } 1973 if (!LateLoopOptimizationsEPCallbacks.empty()) { 1974 LoopPassManager LPM; 1975 invokeLateLoopOptimizationsEPCallbacks(LPM, Level); 1976 if (!LPM.isEmpty()) { 1977 MPM.addPass(createModuleToFunctionPassAdaptor( 1978 createFunctionToLoopPassAdaptor(std::move(LPM)))); 1979 } 1980 } 1981 if (!LoopOptimizerEndEPCallbacks.empty()) { 1982 LoopPassManager LPM; 1983 invokeLoopOptimizerEndEPCallbacks(LPM, Level); 1984 if (!LPM.isEmpty()) { 1985 MPM.addPass(createModuleToFunctionPassAdaptor( 1986 createFunctionToLoopPassAdaptor(std::move(LPM)))); 1987 } 1988 } 1989 if (!ScalarOptimizerLateEPCallbacks.empty()) { 1990 FunctionPassManager FPM; 1991 invokeScalarOptimizerLateEPCallbacks(FPM, Level); 1992 if (!FPM.isEmpty()) 1993 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); 1994 } 1995 1996 invokeOptimizerEarlyEPCallbacks(MPM, Level); 1997 1998 if (!VectorizerStartEPCallbacks.empty()) { 1999 FunctionPassManager FPM; 2000 invokeVectorizerStartEPCallbacks(FPM, Level); 2001 if (!FPM.isEmpty()) 2002 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); 2003 } 2004 2005 ModulePassManager CoroPM; 2006 CoroPM.addPass(CoroEarlyPass()); 2007 CGSCCPassManager CGPM; 2008 CGPM.addPass(CoroSplitPass()); 2009 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); 2010 CoroPM.addPass(CoroCleanupPass()); 2011 CoroPM.addPass(GlobalDCEPass()); 2012 MPM.addPass(CoroConditionalWrapper(std::move(CoroPM))); 2013 2014 invokeOptimizerLastEPCallbacks(MPM, Level); 2015 2016 if (LTOPreLink) 2017 addRequiredLTOPreLinkPasses(MPM); 2018 2019 MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); 2020 2021 return MPM; 2022 } 2023 2024 AAManager PassBuilder::buildDefaultAAPipeline() { 2025 AAManager AA; 2026 2027 // The order in which these are registered determines their priority when 2028 // being queried. 2029 2030 // First we register the basic alias analysis that provides the majority of 2031 // per-function local AA logic. This is a stateless, on-demand local set of 2032 // AA techniques. 2033 AA.registerFunctionAnalysis<BasicAA>(); 2034 2035 // Next we query fast, specialized alias analyses that wrap IR-embedded 2036 // information about aliasing. 2037 AA.registerFunctionAnalysis<ScopedNoAliasAA>(); 2038 AA.registerFunctionAnalysis<TypeBasedAA>(); 2039 2040 // Add support for querying global aliasing information when available. 2041 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module 2042 // analysis, all that the `AAManager` can do is query for any *cached* 2043 // results from `GlobalsAA` through a readonly proxy. 2044 if (EnableGlobalAnalyses) 2045 AA.registerModuleAnalysis<GlobalsAA>(); 2046 2047 // Add target-specific alias analyses. 2048 if (TM) 2049 TM->registerDefaultAliasAnalyses(AA); 2050 2051 return AA; 2052 } 2053