1 //===- Construction of pass pipelines -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file provides the implementation of the PassBuilder based on our 11 /// static pass registry as well as related functionality. It also provides 12 /// helpers to aid in analyzing, debugging, and testing passes and pass 13 /// pipelines. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Analysis/AliasAnalysis.h" 18 #include "llvm/Analysis/BasicAliasAnalysis.h" 19 #include "llvm/Analysis/CGSCCPassManager.h" 20 #include "llvm/Analysis/GlobalsModRef.h" 21 #include "llvm/Analysis/InlineAdvisor.h" 22 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 23 #include "llvm/Analysis/ProfileSummaryInfo.h" 24 #include "llvm/Analysis/ScopedNoAliasAA.h" 25 #include "llvm/Analysis/TypeBasedAliasAnalysis.h" 26 #include "llvm/IR/PassManager.h" 27 #include "llvm/Passes/OptimizationLevel.h" 28 #include "llvm/Passes/PassBuilder.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/PGOOptions.h" 32 #include "llvm/Target/TargetMachine.h" 33 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" 34 #include "llvm/Transforms/Coroutines/CoroCleanup.h" 35 #include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h" 36 #include "llvm/Transforms/Coroutines/CoroEarly.h" 37 #include "llvm/Transforms/Coroutines/CoroElide.h" 38 #include "llvm/Transforms/Coroutines/CoroSplit.h" 39 #include "llvm/Transforms/IPO/AlwaysInliner.h" 40 #include "llvm/Transforms/IPO/Annotation2Metadata.h" 41 #include "llvm/Transforms/IPO/ArgumentPromotion.h" 42 #include "llvm/Transforms/IPO/Attributor.h" 43 #include "llvm/Transforms/IPO/CalledValuePropagation.h" 44 #include "llvm/Transforms/IPO/ConstantMerge.h" 45 #include "llvm/Transforms/IPO/CrossDSOCFI.h" 46 #include "llvm/Transforms/IPO/DeadArgumentElimination.h" 47 #include "llvm/Transforms/IPO/ElimAvailExtern.h" 48 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" 49 #include "llvm/Transforms/IPO/FunctionAttrs.h" 50 #include "llvm/Transforms/IPO/GlobalDCE.h" 51 #include "llvm/Transforms/IPO/GlobalOpt.h" 52 #include "llvm/Transforms/IPO/GlobalSplit.h" 53 #include "llvm/Transforms/IPO/HotColdSplitting.h" 54 #include "llvm/Transforms/IPO/IROutliner.h" 55 #include "llvm/Transforms/IPO/InferFunctionAttrs.h" 56 #include "llvm/Transforms/IPO/Inliner.h" 57 #include "llvm/Transforms/IPO/LowerTypeTests.h" 58 #include "llvm/Transforms/IPO/MergeFunctions.h" 59 #include "llvm/Transforms/IPO/ModuleInliner.h" 60 #include "llvm/Transforms/IPO/OpenMPOpt.h" 61 #include "llvm/Transforms/IPO/PartialInlining.h" 62 #include "llvm/Transforms/IPO/SCCP.h" 63 #include "llvm/Transforms/IPO/SampleProfile.h" 64 #include "llvm/Transforms/IPO/SampleProfileProbe.h" 65 #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" 66 #include "llvm/Transforms/IPO/WholeProgramDevirt.h" 67 #include "llvm/Transforms/InstCombine/InstCombine.h" 68 #include "llvm/Transforms/Instrumentation/CGProfile.h" 69 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 70 #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" 71 #include "llvm/Transforms/Instrumentation/InstrProfiling.h" 72 #include "llvm/Transforms/Instrumentation/MemProfiler.h" 73 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 74 #include "llvm/Transforms/Scalar/ADCE.h" 75 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" 76 #include "llvm/Transforms/Scalar/AnnotationRemarks.h" 77 #include "llvm/Transforms/Scalar/BDCE.h" 78 #include "llvm/Transforms/Scalar/CallSiteSplitting.h" 79 #include "llvm/Transforms/Scalar/ConstraintElimination.h" 80 #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" 81 #include "llvm/Transforms/Scalar/DFAJumpThreading.h" 82 #include "llvm/Transforms/Scalar/DeadStoreElimination.h" 83 #include "llvm/Transforms/Scalar/DivRemPairs.h" 84 #include "llvm/Transforms/Scalar/EarlyCSE.h" 85 #include "llvm/Transforms/Scalar/Float2Int.h" 86 #include "llvm/Transforms/Scalar/GVN.h" 87 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 88 #include "llvm/Transforms/Scalar/InstSimplifyPass.h" 89 #include "llvm/Transforms/Scalar/JumpThreading.h" 90 #include "llvm/Transforms/Scalar/LICM.h" 91 #include "llvm/Transforms/Scalar/LoopDeletion.h" 92 #include "llvm/Transforms/Scalar/LoopDistribute.h" 93 #include "llvm/Transforms/Scalar/LoopFlatten.h" 94 #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" 95 #include "llvm/Transforms/Scalar/LoopInstSimplify.h" 96 #include "llvm/Transforms/Scalar/LoopInterchange.h" 97 #include "llvm/Transforms/Scalar/LoopLoadElimination.h" 98 #include "llvm/Transforms/Scalar/LoopPassManager.h" 99 #include "llvm/Transforms/Scalar/LoopRotation.h" 100 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" 101 #include "llvm/Transforms/Scalar/LoopSink.h" 102 #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" 103 #include "llvm/Transforms/Scalar/LoopUnrollPass.h" 104 #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" 105 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" 106 #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" 107 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" 108 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" 109 #include "llvm/Transforms/Scalar/NewGVN.h" 110 #include "llvm/Transforms/Scalar/Reassociate.h" 111 #include "llvm/Transforms/Scalar/SCCP.h" 112 #include "llvm/Transforms/Scalar/SROA.h" 113 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" 114 #include "llvm/Transforms/Scalar/SimplifyCFG.h" 115 #include "llvm/Transforms/Scalar/SpeculativeExecution.h" 116 #include "llvm/Transforms/Scalar/TailRecursionElimination.h" 117 #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" 118 #include "llvm/Transforms/Utils/AddDiscriminators.h" 119 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 120 #include "llvm/Transforms/Utils/CanonicalizeAliases.h" 121 #include "llvm/Transforms/Utils/InjectTLIMappings.h" 122 #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" 123 #include "llvm/Transforms/Utils/Mem2Reg.h" 124 #include "llvm/Transforms/Utils/NameAnonGlobals.h" 125 #include "llvm/Transforms/Utils/RelLookupTableConverter.h" 126 #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" 127 #include "llvm/Transforms/Vectorize/LoopVectorize.h" 128 #include "llvm/Transforms/Vectorize/SLPVectorizer.h" 129 #include "llvm/Transforms/Vectorize/VectorCombine.h" 130 131 using namespace llvm; 132 133 static cl::opt<InliningAdvisorMode> UseInlineAdvisor( 134 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, 135 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), 136 cl::values(clEnumValN(InliningAdvisorMode::Default, "default", 137 "Heuristics-based inliner version"), 138 clEnumValN(InliningAdvisorMode::Development, "development", 139 "Use development mode (runtime-loadable model)"), 140 clEnumValN(InliningAdvisorMode::Release, "release", 141 "Use release mode (AOT-compiled model)"))); 142 143 static cl::opt<bool> EnableSyntheticCounts( 144 "enable-npm-synthetic-counts", cl::Hidden, 145 cl::desc("Run synthetic function entry count generation " 146 "pass")); 147 148 /// Flag to enable inline deferral during PGO. 149 static cl::opt<bool> 150 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), 151 cl::Hidden, 152 cl::desc("Enable inline deferral during PGO")); 153 154 static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::Hidden, 155 cl::desc("Enable memory profiler")); 156 157 static cl::opt<bool> EnableModuleInliner("enable-module-inliner", 158 cl::init(false), cl::Hidden, 159 cl::desc("Enable module inliner")); 160 161 static cl::opt<bool> PerformMandatoryInliningsFirst( 162 "mandatory-inlining-first", cl::init(true), cl::Hidden, 163 cl::desc("Perform mandatory inlinings module-wide, before performing " 164 "inlining")); 165 166 static cl::opt<bool> EnableO3NonTrivialUnswitching( 167 "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden, 168 cl::desc("Enable non-trivial loop unswitching for -O3")); 169 170 static cl::opt<bool> EnableEagerlyInvalidateAnalyses( 171 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden, 172 cl::desc("Eagerly invalidate more analyses in default pipelines")); 173 174 static cl::opt<bool> EnableNoRerunSimplificationPipeline( 175 "enable-no-rerun-simplification-pipeline", cl::init(true), cl::Hidden, 176 cl::desc( 177 "Prevent running the simplification pipeline on a function more " 178 "than once in the case that SCC mutations cause a function to be " 179 "visited multiple times as long as the function has not been changed")); 180 181 static cl::opt<bool> EnableMergeFunctions( 182 "enable-merge-functions", cl::init(false), cl::Hidden, 183 cl::desc("Enable function merging as part of the optimization pipeline")); 184 185 static cl::opt<bool> EnablePostPGOLoopRotation( 186 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, 187 cl::desc("Run the loop rotation transformation after PGO instrumentation")); 188 189 static cl::opt<bool> EnableGlobalAnalyses( 190 "enable-global-analyses", cl::init(true), cl::Hidden, 191 cl::desc("Enable inter-procedural analyses")); 192 193 static cl::opt<bool> 194 RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, 195 cl::desc("Run Partial inlinining pass")); 196 197 static cl::opt<bool> ExtraVectorizerPasses( 198 "extra-vectorizer-passes", cl::init(false), cl::Hidden, 199 cl::desc("Run cleanup optimization passes after vectorization")); 200 201 static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, 202 cl::desc("Run the NewGVN pass")); 203 204 static cl::opt<bool> EnableLoopInterchange( 205 "enable-loopinterchange", cl::init(false), cl::Hidden, 206 cl::desc("Enable the experimental LoopInterchange Pass")); 207 208 static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam", 209 cl::init(false), cl::Hidden, 210 cl::desc("Enable Unroll And Jam Pass")); 211 212 static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false), 213 cl::Hidden, 214 cl::desc("Enable the LoopFlatten Pass")); 215 216 static cl::opt<bool> 217 EnableDFAJumpThreading("enable-dfa-jump-thread", 218 cl::desc("Enable DFA jump threading"), 219 cl::init(false), cl::Hidden); 220 221 static cl::opt<bool> 222 EnableHotColdSplit("hot-cold-split", 223 cl::desc("Enable hot-cold splitting pass")); 224 225 static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false), 226 cl::Hidden, 227 cl::desc("Enable ir outliner pass")); 228 229 static cl::opt<bool> 230 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, 231 cl::desc("Disable pre-instrumentation inliner")); 232 233 static cl::opt<int> PreInlineThreshold( 234 "preinline-threshold", cl::Hidden, cl::init(75), 235 cl::desc("Control the amount of inlining in pre-instrumentation inliner " 236 "(default = 75)")); 237 238 static cl::opt<bool> 239 EnableGVNHoist("enable-gvn-hoist", 240 cl::desc("Enable the GVN hoisting pass (default = off)")); 241 242 static cl::opt<bool> 243 EnableGVNSink("enable-gvn-sink", 244 cl::desc("Enable the GVN sinking pass (default = off)")); 245 246 // This option is used in simplifying testing SampleFDO optimizations for 247 // profile loading. 248 static cl::opt<bool> 249 EnableCHR("enable-chr", cl::init(true), cl::Hidden, 250 cl::desc("Enable control height reduction optimization (CHR)")); 251 252 static cl::opt<bool> FlattenedProfileUsed( 253 "flattened-profile-used", cl::init(false), cl::Hidden, 254 cl::desc("Indicate the sample profile being used is flattened, i.e., " 255 "no inline hierachy exists in the profile")); 256 257 static cl::opt<bool> EnableOrderFileInstrumentation( 258 "enable-order-file-instrumentation", cl::init(false), cl::Hidden, 259 cl::desc("Enable order file instrumentation (default = off)")); 260 261 static cl::opt<bool> 262 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, 263 cl::desc("Enable lowering of the matrix intrinsics")); 264 265 static cl::opt<bool> EnableConstraintElimination( 266 "enable-constraint-elimination", cl::init(false), cl::Hidden, 267 cl::desc( 268 "Enable pass to eliminate conditions based on linear constraints")); 269 270 static cl::opt<AttributorRunOption> AttributorRun( 271 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), 272 cl::desc("Enable the attributor inter-procedural deduction pass"), 273 cl::values(clEnumValN(AttributorRunOption::ALL, "all", 274 "enable all attributor runs"), 275 clEnumValN(AttributorRunOption::MODULE, "module", 276 "enable module-wide attributor runs"), 277 clEnumValN(AttributorRunOption::CGSCC, "cgscc", 278 "enable call graph SCC attributor runs"), 279 clEnumValN(AttributorRunOption::NONE, "none", 280 "disable attributor runs"))); 281 282 PipelineTuningOptions::PipelineTuningOptions() { 283 LoopInterleaving = true; 284 LoopVectorization = true; 285 SLPVectorization = false; 286 LoopUnrolling = true; 287 ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; 288 LicmMssaOptCap = SetLicmMssaOptCap; 289 LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; 290 CallGraphProfile = true; 291 MergeFunctions = EnableMergeFunctions; 292 InlinerThreshold = -1; 293 EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; 294 } 295 296 namespace llvm { 297 extern cl::opt<unsigned> MaxDevirtIterations; 298 extern cl::opt<bool> EnableKnowledgeRetention; 299 } // namespace llvm 300 301 void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM, 302 OptimizationLevel Level) { 303 for (auto &C : PeepholeEPCallbacks) 304 C(FPM, Level); 305 } 306 307 // Helper to add AnnotationRemarksPass. 308 static void addAnnotationRemarksPass(ModulePassManager &MPM) { 309 MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); 310 } 311 312 // Helper to check if the current compilation phase is preparing for LTO 313 static bool isLTOPreLink(ThinOrFullLTOPhase Phase) { 314 return Phase == ThinOrFullLTOPhase::ThinLTOPreLink || 315 Phase == ThinOrFullLTOPhase::FullLTOPreLink; 316 } 317 318 // TODO: Investigate the cost/benefit of tail call elimination on debugging. 319 FunctionPassManager 320 PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, 321 ThinOrFullLTOPhase Phase) { 322 323 FunctionPassManager FPM; 324 325 // Form SSA out of local memory accesses after breaking apart aggregates into 326 // scalars. 327 FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 328 329 // Catch trivial redundancies 330 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); 331 332 // Hoisting of scalars and load expressions. 333 FPM.addPass( 334 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 335 FPM.addPass(InstCombinePass()); 336 337 FPM.addPass(LibCallsShrinkWrapPass()); 338 339 invokePeepholeEPCallbacks(FPM, Level); 340 341 FPM.addPass( 342 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 343 344 // Form canonically associated expression trees, and simplify the trees using 345 // basic mathematical properties. For example, this will form (nearly) 346 // minimal multiplication trees. 347 FPM.addPass(ReassociatePass()); 348 349 // Add the primary loop simplification pipeline. 350 // FIXME: Currently this is split into two loop pass pipelines because we run 351 // some function passes in between them. These can and should be removed 352 // and/or replaced by scheduling the loop pass equivalents in the correct 353 // positions. But those equivalent passes aren't powerful enough yet. 354 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still 355 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to 356 // fully replace `SimplifyCFGPass`, and the closest to the other we have is 357 // `LoopInstSimplify`. 358 LoopPassManager LPM1, LPM2; 359 360 // Simplify the loop body. We do this initially to clean up after other loop 361 // passes run, either when iterating on a loop or on inner loops with 362 // implications on the outer loop. 363 LPM1.addPass(LoopInstSimplifyPass()); 364 LPM1.addPass(LoopSimplifyCFGPass()); 365 366 // Try to remove as much code from the loop header as possible, 367 // to reduce amount of IR that will have to be duplicated. However, 368 // do not perform speculative hoisting the first time as LICM 369 // will destroy metadata that may not need to be destroyed if run 370 // after loop rotation. 371 // TODO: Investigate promotion cap for O1. 372 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 373 /*AllowSpeculation=*/false)); 374 375 LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, 376 isLTOPreLink(Phase))); 377 // TODO: Investigate promotion cap for O1. 378 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 379 /*AllowSpeculation=*/true)); 380 LPM1.addPass(SimpleLoopUnswitchPass()); 381 if (EnableLoopFlatten) 382 LPM1.addPass(LoopFlattenPass()); 383 384 LPM2.addPass(LoopIdiomRecognizePass()); 385 LPM2.addPass(IndVarSimplifyPass()); 386 387 for (auto &C : LateLoopOptimizationsEPCallbacks) 388 C(LPM2, Level); 389 390 LPM2.addPass(LoopDeletionPass()); 391 392 if (EnableLoopInterchange) 393 LPM2.addPass(LoopInterchangePass()); 394 395 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO 396 // because it changes IR to makes profile annotation in back compile 397 // inaccurate. The normal unroller doesn't pay attention to forced full unroll 398 // attributes so we need to make sure and allow the full unroll pass to pay 399 // attention to it. 400 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || 401 PGOOpt->Action != PGOOptions::SampleUse) 402 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), 403 /* OnlyWhenForced= */ !PTO.LoopUnrolling, 404 PTO.ForgetAllSCEVInLoopUnroll)); 405 406 for (auto &C : LoopOptimizerEndEPCallbacks) 407 C(LPM2, Level); 408 409 // We provide the opt remark emitter pass for LICM to use. We only need to do 410 // this once as it is immutable. 411 FPM.addPass( 412 RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); 413 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), 414 /*UseMemorySSA=*/true, 415 /*UseBlockFrequencyInfo=*/true)); 416 FPM.addPass( 417 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 418 FPM.addPass(InstCombinePass()); 419 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. 420 // *All* loop passes must preserve it, in order to be able to use it. 421 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), 422 /*UseMemorySSA=*/false, 423 /*UseBlockFrequencyInfo=*/false)); 424 425 // Delete small array after loop unroll. 426 FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 427 428 // Specially optimize memory movement as it doesn't look like dataflow in SSA. 429 FPM.addPass(MemCpyOptPass()); 430 431 // Sparse conditional constant propagation. 432 // FIXME: It isn't clear why we do this *after* loop passes rather than 433 // before... 434 FPM.addPass(SCCPPass()); 435 436 // Delete dead bit computations (instcombine runs after to fold away the dead 437 // computations, and then ADCE will run later to exploit any new DCE 438 // opportunities that creates). 439 FPM.addPass(BDCEPass()); 440 441 // Run instcombine after redundancy and dead bit elimination to exploit 442 // opportunities opened up by them. 443 FPM.addPass(InstCombinePass()); 444 invokePeepholeEPCallbacks(FPM, Level); 445 446 FPM.addPass(CoroElidePass()); 447 448 for (auto &C : ScalarOptimizerLateEPCallbacks) 449 C(FPM, Level); 450 451 // Finally, do an expensive DCE pass to catch all the dead code exposed by 452 // the simplifications and basic cleanup after all the simplifications. 453 // TODO: Investigate if this is too expensive. 454 FPM.addPass(ADCEPass()); 455 FPM.addPass( 456 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 457 FPM.addPass(InstCombinePass()); 458 invokePeepholeEPCallbacks(FPM, Level); 459 460 return FPM; 461 } 462 463 FunctionPassManager 464 PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, 465 ThinOrFullLTOPhase Phase) { 466 assert(Level != OptimizationLevel::O0 && "Must request optimizations!"); 467 468 // The O1 pipeline has a separate pipeline creation function to simplify 469 // construction readability. 470 if (Level.getSpeedupLevel() == 1) 471 return buildO1FunctionSimplificationPipeline(Level, Phase); 472 473 FunctionPassManager FPM; 474 475 // Form SSA out of local memory accesses after breaking apart aggregates into 476 // scalars. 477 FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 478 479 // Catch trivial redundancies 480 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); 481 if (EnableKnowledgeRetention) 482 FPM.addPass(AssumeSimplifyPass()); 483 484 // Hoisting of scalars and load expressions. 485 if (EnableGVNHoist) 486 FPM.addPass(GVNHoistPass()); 487 488 // Global value numbering based sinking. 489 if (EnableGVNSink) { 490 FPM.addPass(GVNSinkPass()); 491 FPM.addPass( 492 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 493 } 494 495 // Speculative execution if the target has divergent branches; otherwise nop. 496 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true)); 497 498 // Optimize based on known information about branches, and cleanup afterward. 499 FPM.addPass(JumpThreadingPass()); 500 FPM.addPass(CorrelatedValuePropagationPass()); 501 502 FPM.addPass( 503 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 504 FPM.addPass(InstCombinePass()); 505 if (Level == OptimizationLevel::O3) 506 FPM.addPass(AggressiveInstCombinePass()); 507 508 if (EnableConstraintElimination) 509 FPM.addPass(ConstraintEliminationPass()); 510 511 if (!Level.isOptimizingForSize()) 512 FPM.addPass(LibCallsShrinkWrapPass()); 513 514 invokePeepholeEPCallbacks(FPM, Level); 515 516 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy 517 // using the size value profile. Don't perform this when optimizing for size. 518 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && 519 !Level.isOptimizingForSize()) 520 FPM.addPass(PGOMemOPSizeOpt()); 521 522 FPM.addPass(TailCallElimPass()); 523 FPM.addPass( 524 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 525 526 // Form canonically associated expression trees, and simplify the trees using 527 // basic mathematical properties. For example, this will form (nearly) 528 // minimal multiplication trees. 529 FPM.addPass(ReassociatePass()); 530 531 // Add the primary loop simplification pipeline. 532 // FIXME: Currently this is split into two loop pass pipelines because we run 533 // some function passes in between them. These can and should be removed 534 // and/or replaced by scheduling the loop pass equivalents in the correct 535 // positions. But those equivalent passes aren't powerful enough yet. 536 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still 537 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to 538 // fully replace `SimplifyCFGPass`, and the closest to the other we have is 539 // `LoopInstSimplify`. 540 LoopPassManager LPM1, LPM2; 541 542 // Simplify the loop body. We do this initially to clean up after other loop 543 // passes run, either when iterating on a loop or on inner loops with 544 // implications on the outer loop. 545 LPM1.addPass(LoopInstSimplifyPass()); 546 LPM1.addPass(LoopSimplifyCFGPass()); 547 548 // Try to remove as much code from the loop header as possible, 549 // to reduce amount of IR that will have to be duplicated. However, 550 // do not perform speculative hoisting the first time as LICM 551 // will destroy metadata that may not need to be destroyed if run 552 // after loop rotation. 553 // TODO: Investigate promotion cap for O1. 554 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 555 /*AllowSpeculation=*/false)); 556 557 // Disable header duplication in loop rotation at -Oz. 558 LPM1.addPass( 559 LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase))); 560 // TODO: Investigate promotion cap for O1. 561 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 562 /*AllowSpeculation=*/true)); 563 LPM1.addPass( 564 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 && 565 EnableO3NonTrivialUnswitching)); 566 if (EnableLoopFlatten) 567 LPM1.addPass(LoopFlattenPass()); 568 569 LPM2.addPass(LoopIdiomRecognizePass()); 570 LPM2.addPass(IndVarSimplifyPass()); 571 572 for (auto &C : LateLoopOptimizationsEPCallbacks) 573 C(LPM2, Level); 574 575 LPM2.addPass(LoopDeletionPass()); 576 577 if (EnableLoopInterchange) 578 LPM2.addPass(LoopInterchangePass()); 579 580 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO 581 // because it changes IR to makes profile annotation in back compile 582 // inaccurate. The normal unroller doesn't pay attention to forced full unroll 583 // attributes so we need to make sure and allow the full unroll pass to pay 584 // attention to it. 585 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || 586 PGOOpt->Action != PGOOptions::SampleUse) 587 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), 588 /* OnlyWhenForced= */ !PTO.LoopUnrolling, 589 PTO.ForgetAllSCEVInLoopUnroll)); 590 591 for (auto &C : LoopOptimizerEndEPCallbacks) 592 C(LPM2, Level); 593 594 // We provide the opt remark emitter pass for LICM to use. We only need to do 595 // this once as it is immutable. 596 FPM.addPass( 597 RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); 598 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), 599 /*UseMemorySSA=*/true, 600 /*UseBlockFrequencyInfo=*/true)); 601 FPM.addPass( 602 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 603 FPM.addPass(InstCombinePass()); 604 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, 605 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. 606 // *All* loop passes must preserve it, in order to be able to use it. 607 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), 608 /*UseMemorySSA=*/false, 609 /*UseBlockFrequencyInfo=*/false)); 610 611 // Delete small array after loop unroll. 612 FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 613 614 // Try vectorization/scalarization transforms that are both improvements 615 // themselves and can allow further folds with GVN and InstCombine. 616 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true)); 617 618 // Eliminate redundancies. 619 FPM.addPass(MergedLoadStoreMotionPass()); 620 if (RunNewGVN) 621 FPM.addPass(NewGVNPass()); 622 else 623 FPM.addPass(GVNPass()); 624 625 // Sparse conditional constant propagation. 626 // FIXME: It isn't clear why we do this *after* loop passes rather than 627 // before... 628 FPM.addPass(SCCPPass()); 629 630 // Delete dead bit computations (instcombine runs after to fold away the dead 631 // computations, and then ADCE will run later to exploit any new DCE 632 // opportunities that creates). 633 FPM.addPass(BDCEPass()); 634 635 // Run instcombine after redundancy and dead bit elimination to exploit 636 // opportunities opened up by them. 637 FPM.addPass(InstCombinePass()); 638 invokePeepholeEPCallbacks(FPM, Level); 639 640 // Re-consider control flow based optimizations after redundancy elimination, 641 // redo DCE, etc. 642 if (EnableDFAJumpThreading && Level.getSizeLevel() == 0) 643 FPM.addPass(DFAJumpThreadingPass()); 644 645 FPM.addPass(JumpThreadingPass()); 646 FPM.addPass(CorrelatedValuePropagationPass()); 647 648 // Finally, do an expensive DCE pass to catch all the dead code exposed by 649 // the simplifications and basic cleanup after all the simplifications. 650 // TODO: Investigate if this is too expensive. 651 FPM.addPass(ADCEPass()); 652 653 // Specially optimize memory movement as it doesn't look like dataflow in SSA. 654 FPM.addPass(MemCpyOptPass()); 655 656 FPM.addPass(DSEPass()); 657 FPM.addPass(createFunctionToLoopPassAdaptor( 658 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 659 /*AllowSpeculation=*/true), 660 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); 661 662 FPM.addPass(CoroElidePass()); 663 664 for (auto &C : ScalarOptimizerLateEPCallbacks) 665 C(FPM, Level); 666 667 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() 668 .convertSwitchRangeToICmp(true) 669 .hoistCommonInsts(true) 670 .sinkCommonInsts(true))); 671 FPM.addPass(InstCombinePass()); 672 invokePeepholeEPCallbacks(FPM, Level); 673 674 // Don't add CHR pass for CSIRInstr build in PostLink as the profile 675 // is still the same as the PreLink compilation. 676 if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt && 677 ((PGOOpt->Action == PGOOptions::IRUse && 678 (Phase != ThinOrFullLTOPhase::ThinLTOPostLink || 679 PGOOpt->CSAction != PGOOptions::CSIRInstr)) || 680 PGOOpt->Action == PGOOptions::SampleUse)) 681 FPM.addPass(ControlHeightReductionPass()); 682 683 return FPM; 684 } 685 686 void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) { 687 MPM.addPass(CanonicalizeAliasesPass()); 688 MPM.addPass(NameAnonGlobalPass()); 689 } 690 691 void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, 692 OptimizationLevel Level, bool RunProfileGen, 693 bool IsCS, std::string ProfileFile, 694 std::string ProfileRemappingFile, 695 ThinOrFullLTOPhase LTOPhase) { 696 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!"); 697 if (!IsCS && !DisablePreInliner) { 698 InlineParams IP; 699 700 IP.DefaultThreshold = PreInlineThreshold; 701 702 // FIXME: The hint threshold has the same value used by the regular inliner 703 // when not optimzing for size. This should probably be lowered after 704 // performance testing. 705 // FIXME: this comment is cargo culted from the old pass manager, revisit). 706 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325; 707 ModuleInlinerWrapperPass MIWP( 708 IP, /* MandatoryFirst */ true, 709 InlineContext{LTOPhase, InlinePass::EarlyInliner}); 710 CGSCCPassManager &CGPipeline = MIWP.getPM(); 711 712 FunctionPassManager FPM; 713 FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 714 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. 715 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( 716 true))); // Merge & remove basic blocks. 717 FPM.addPass(InstCombinePass()); // Combine silly sequences. 718 invokePeepholeEPCallbacks(FPM, Level); 719 720 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor( 721 std::move(FPM), PTO.EagerlyInvalidateAnalyses)); 722 723 MPM.addPass(std::move(MIWP)); 724 725 // Delete anything that is now dead to make sure that we don't instrument 726 // dead code. Instrumentation can end up keeping dead code around and 727 // dramatically increase code size. 728 MPM.addPass(GlobalDCEPass()); 729 } 730 731 if (!RunProfileGen) { 732 assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); 733 MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); 734 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 735 // RequireAnalysisPass for PSI before subsequent non-module passes. 736 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 737 return; 738 } 739 740 // Perform PGO instrumentation. 741 MPM.addPass(PGOInstrumentationGen(IsCS)); 742 743 if (EnablePostPGOLoopRotation) { 744 // Disable header duplication in loop rotation at -Oz. 745 MPM.addPass(createModuleToFunctionPassAdaptor( 746 createFunctionToLoopPassAdaptor( 747 LoopRotatePass(Level != OptimizationLevel::Oz), 748 /*UseMemorySSA=*/false, 749 /*UseBlockFrequencyInfo=*/false), 750 PTO.EagerlyInvalidateAnalyses)); 751 } 752 753 // Add the profile lowering pass. 754 InstrProfOptions Options; 755 if (!ProfileFile.empty()) 756 Options.InstrProfileOutput = ProfileFile; 757 // Do counter promotion at Level greater than O0. 758 Options.DoCounterPromotion = true; 759 Options.UseBFIInPromotion = IsCS; 760 MPM.addPass(InstrProfiling(Options, IsCS)); 761 } 762 763 void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM, 764 bool RunProfileGen, bool IsCS, 765 std::string ProfileFile, 766 std::string ProfileRemappingFile) { 767 if (!RunProfileGen) { 768 assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); 769 MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); 770 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 771 // RequireAnalysisPass for PSI before subsequent non-module passes. 772 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 773 return; 774 } 775 776 // Perform PGO instrumentation. 777 MPM.addPass(PGOInstrumentationGen(IsCS)); 778 // Add the profile lowering pass. 779 InstrProfOptions Options; 780 if (!ProfileFile.empty()) 781 Options.InstrProfileOutput = ProfileFile; 782 // Do not do counter promotion at O0. 783 Options.DoCounterPromotion = false; 784 Options.UseBFIInPromotion = IsCS; 785 MPM.addPass(InstrProfiling(Options, IsCS)); 786 } 787 788 static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) { 789 return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel()); 790 } 791 792 ModuleInlinerWrapperPass 793 PassBuilder::buildInlinerPipeline(OptimizationLevel Level, 794 ThinOrFullLTOPhase Phase) { 795 InlineParams IP; 796 if (PTO.InlinerThreshold == -1) 797 IP = getInlineParamsFromOptLevel(Level); 798 else 799 IP = getInlineParams(PTO.InlinerThreshold); 800 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to 801 // disable hot callsite inline (as much as possible [1]) because it makes 802 // profile annotation in the backend inaccurate. 803 // 804 // [1] Note the cost of a function could be below zero due to erased 805 // prologue / epilogue. 806 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && 807 PGOOpt->Action == PGOOptions::SampleUse) 808 IP.HotCallSiteThreshold = 0; 809 810 if (PGOOpt) 811 IP.EnableDeferral = EnablePGOInlineDeferral; 812 813 ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst, 814 InlineContext{Phase, InlinePass::CGSCCInliner}, 815 UseInlineAdvisor, MaxDevirtIterations); 816 817 // Require the GlobalsAA analysis for the module so we can query it within 818 // the CGSCC pipeline. 819 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>()); 820 // Invalidate AAManager so it can be recreated and pick up the newly available 821 // GlobalsAA. 822 MIWP.addModulePass( 823 createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>())); 824 825 // Require the ProfileSummaryAnalysis for the module so we can query it within 826 // the inliner pass. 827 MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 828 829 // Now begin the main postorder CGSCC pipeline. 830 // FIXME: The current CGSCC pipeline has its origins in the legacy pass 831 // manager and trying to emulate its precise behavior. Much of this doesn't 832 // make a lot of sense and we should revisit the core CGSCC structure. 833 CGSCCPassManager &MainCGPipeline = MIWP.getPM(); 834 835 // Note: historically, the PruneEH pass was run first to deduce nounwind and 836 // generally clean up exception handling overhead. It isn't clear this is 837 // valuable as the inliner doesn't currently care whether it is inlining an 838 // invoke or a call. 839 840 if (AttributorRun & AttributorRunOption::CGSCC) 841 MainCGPipeline.addPass(AttributorCGSCCPass()); 842 843 // Now deduce any function attributes based in the current code. 844 MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); 845 846 // When at O3 add argument promotion to the pass pipeline. 847 // FIXME: It isn't at all clear why this should be limited to O3. 848 if (Level == OptimizationLevel::O3) 849 MainCGPipeline.addPass(ArgumentPromotionPass()); 850 851 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if 852 // there are no OpenMP runtime calls present in the module. 853 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) 854 MainCGPipeline.addPass(OpenMPOptCGSCCPass()); 855 856 for (auto &C : CGSCCOptimizerLateEPCallbacks) 857 C(MainCGPipeline, Level); 858 859 // Lastly, add the core function simplification pipeline nested inside the 860 // CGSCC walk. 861 MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( 862 buildFunctionSimplificationPipeline(Level, Phase), 863 PTO.EagerlyInvalidateAnalyses, EnableNoRerunSimplificationPipeline)); 864 865 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0)); 866 867 if (EnableNoRerunSimplificationPipeline) 868 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor( 869 InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>())); 870 871 return MIWP; 872 } 873 874 ModulePassManager 875 PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, 876 ThinOrFullLTOPhase Phase) { 877 ModulePassManager MPM; 878 879 InlineParams IP = getInlineParamsFromOptLevel(Level); 880 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to 881 // disable hot callsite inline (as much as possible [1]) because it makes 882 // profile annotation in the backend inaccurate. 883 // 884 // [1] Note the cost of a function could be below zero due to erased 885 // prologue / epilogue. 886 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && 887 PGOOpt->Action == PGOOptions::SampleUse) 888 IP.HotCallSiteThreshold = 0; 889 890 if (PGOOpt) 891 IP.EnableDeferral = EnablePGOInlineDeferral; 892 893 // The inline deferral logic is used to avoid losing some 894 // inlining chance in future. It is helpful in SCC inliner, in which 895 // inlining is processed in bottom-up order. 896 // While in module inliner, the inlining order is a priority-based order 897 // by default. The inline deferral is unnecessary there. So we disable the 898 // inline deferral logic in module inliner. 899 IP.EnableDeferral = false; 900 901 MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor, Phase)); 902 903 MPM.addPass(createModuleToFunctionPassAdaptor( 904 buildFunctionSimplificationPipeline(Level, Phase), 905 PTO.EagerlyInvalidateAnalyses)); 906 907 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( 908 CoroSplitPass(Level != OptimizationLevel::O0))); 909 910 return MPM; 911 } 912 913 ModulePassManager 914 PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, 915 ThinOrFullLTOPhase Phase) { 916 ModulePassManager MPM; 917 918 // Place pseudo probe instrumentation as the first pass of the pipeline to 919 // minimize the impact of optimization changes. 920 if (PGOOpt && PGOOpt->PseudoProbeForProfiling && 921 Phase != ThinOrFullLTOPhase::ThinLTOPostLink) 922 MPM.addPass(SampleProfileProbePass(TM)); 923 924 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); 925 926 // In ThinLTO mode, when flattened profile is used, all the available 927 // profile information will be annotated in PreLink phase so there is 928 // no need to load the profile again in PostLink. 929 bool LoadSampleProfile = 930 HasSampleProfile && 931 !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink); 932 933 // During the ThinLTO backend phase we perform early indirect call promotion 934 // here, before globalopt. Otherwise imported available_externally functions 935 // look unreferenced and are removed. If we are going to load the sample 936 // profile then defer until later. 937 // TODO: See if we can move later and consolidate with the location where 938 // we perform ICP when we are loading a sample profile. 939 // TODO: We pass HasSampleProfile (whether there was a sample profile file 940 // passed to the compile) to the SamplePGO flag of ICP. This is used to 941 // determine whether the new direct calls are annotated with prof metadata. 942 // Ideally this should be determined from whether the IR is annotated with 943 // sample profile, and not whether the a sample profile was provided on the 944 // command line. E.g. for flattened profiles where we will not be reloading 945 // the sample profile in the ThinLTO backend, we ideally shouldn't have to 946 // provide the sample profile file. 947 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile) 948 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile)); 949 950 // Do basic inference of function attributes from known properties of system 951 // libraries and other oracles. 952 MPM.addPass(InferFunctionAttrsPass()); 953 MPM.addPass(CoroEarlyPass()); 954 955 // Create an early function pass manager to cleanup the output of the 956 // frontend. 957 FunctionPassManager EarlyFPM; 958 // Lower llvm.expect to metadata before attempting transforms. 959 // Compare/branch metadata may alter the behavior of passes like SimplifyCFG. 960 EarlyFPM.addPass(LowerExpectIntrinsicPass()); 961 EarlyFPM.addPass(SimplifyCFGPass()); 962 EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 963 EarlyFPM.addPass(EarlyCSEPass()); 964 if (Level == OptimizationLevel::O3) 965 EarlyFPM.addPass(CallSiteSplittingPass()); 966 967 // In SamplePGO ThinLTO backend, we need instcombine before profile annotation 968 // to convert bitcast to direct calls so that they can be inlined during the 969 // profile annotation prepration step. 970 // More details about SamplePGO design can be found in: 971 // https://research.google.com/pubs/pub45290.html 972 // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured. 973 if (LoadSampleProfile) 974 EarlyFPM.addPass(InstCombinePass()); 975 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM), 976 PTO.EagerlyInvalidateAnalyses)); 977 978 if (LoadSampleProfile) { 979 // Annotate sample profile right after early FPM to ensure freshness of 980 // the debug info. 981 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, 982 PGOOpt->ProfileRemappingFile, Phase)); 983 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 984 // RequireAnalysisPass for PSI before subsequent non-module passes. 985 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 986 // Do not invoke ICP in the LTOPrelink phase as it makes it hard 987 // for the profile annotation to be accurate in the LTO backend. 988 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink && 989 Phase != ThinOrFullLTOPhase::FullLTOPreLink) 990 // We perform early indirect call promotion here, before globalopt. 991 // This is important for the ThinLTO backend phase because otherwise 992 // imported available_externally functions look unreferenced and are 993 // removed. 994 MPM.addPass( 995 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */)); 996 } 997 998 // Try to perform OpenMP specific optimizations on the module. This is a 999 // (quick!) no-op if there are no OpenMP runtime calls present in the module. 1000 if (Level != OptimizationLevel::O0) 1001 MPM.addPass(OpenMPOptPass()); 1002 1003 if (AttributorRun & AttributorRunOption::MODULE) 1004 MPM.addPass(AttributorPass()); 1005 1006 // Lower type metadata and the type.test intrinsic in the ThinLTO 1007 // post link pipeline after ICP. This is to enable usage of the type 1008 // tests in ICP sequences. 1009 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink) 1010 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1011 1012 for (auto &C : PipelineEarlySimplificationEPCallbacks) 1013 C(MPM, Level); 1014 1015 // Interprocedural constant propagation now that basic cleanup has occurred 1016 // and prior to optimizing globals. 1017 // FIXME: This position in the pipeline hasn't been carefully considered in 1018 // years, it should be re-analyzed. 1019 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/ 1020 Level != OptimizationLevel::Os && 1021 Level != OptimizationLevel::Oz))); 1022 1023 // Attach metadata to indirect call sites indicating the set of functions 1024 // they may target at run-time. This should follow IPSCCP. 1025 MPM.addPass(CalledValuePropagationPass()); 1026 1027 // Optimize globals to try and fold them into constants. 1028 MPM.addPass(GlobalOptPass()); 1029 1030 // Promote any localized globals to SSA registers. 1031 // FIXME: Should this instead by a run of SROA? 1032 // FIXME: We should probably run instcombine and simplifycfg afterward to 1033 // delete control flows that are dead once globals have been folded to 1034 // constants. 1035 MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); 1036 1037 // Create a small function pass pipeline to cleanup after all the global 1038 // optimizations. 1039 FunctionPassManager GlobalCleanupPM; 1040 GlobalCleanupPM.addPass(InstCombinePass()); 1041 invokePeepholeEPCallbacks(GlobalCleanupPM, Level); 1042 1043 GlobalCleanupPM.addPass( 1044 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 1045 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM), 1046 PTO.EagerlyInvalidateAnalyses)); 1047 1048 // Add all the requested passes for instrumentation PGO, if requested. 1049 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && 1050 (PGOOpt->Action == PGOOptions::IRInstr || 1051 PGOOpt->Action == PGOOptions::IRUse)) { 1052 addPGOInstrPasses(MPM, Level, 1053 /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, 1054 /* IsCS */ false, PGOOpt->ProfileFile, 1055 PGOOpt->ProfileRemappingFile, Phase); 1056 MPM.addPass(PGOIndirectCallPromotion(false, false)); 1057 } 1058 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && 1059 PGOOpt->CSAction == PGOOptions::CSIRInstr) 1060 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile)); 1061 1062 // Synthesize function entry counts for non-PGO compilation. 1063 if (EnableSyntheticCounts && !PGOOpt) 1064 MPM.addPass(SyntheticCountsPropagation()); 1065 1066 if (EnableModuleInliner) 1067 MPM.addPass(buildModuleInlinerPipeline(Level, Phase)); 1068 else 1069 MPM.addPass(buildInlinerPipeline(Level, Phase)); 1070 1071 // Remove any dead arguments exposed by cleanups, constant folding globals, 1072 // and argument promotion. 1073 MPM.addPass(DeadArgumentEliminationPass()); 1074 1075 MPM.addPass(CoroCleanupPass()); 1076 1077 if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) { 1078 MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); 1079 MPM.addPass(ModuleMemProfilerPass()); 1080 } 1081 1082 return MPM; 1083 } 1084 1085 /// TODO: Should LTO cause any differences to this set of passes? 1086 void PassBuilder::addVectorPasses(OptimizationLevel Level, 1087 FunctionPassManager &FPM, bool IsFullLTO) { 1088 FPM.addPass(LoopVectorizePass( 1089 LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); 1090 1091 if (IsFullLTO) { 1092 // The vectorizer may have significantly shortened a loop body; unroll 1093 // again. Unroll small loops to hide loop backedge latency and saturate any 1094 // parallel execution resources of an out-of-order processor. We also then 1095 // need to clean up redundancies and loop invariant code. 1096 // FIXME: It would be really good to use a loop-integrated instruction 1097 // combiner for cleanup here so that the unrolling and LICM can be pipelined 1098 // across the loop nests. 1099 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll 1100 if (EnableUnrollAndJam && PTO.LoopUnrolling) 1101 FPM.addPass(createFunctionToLoopPassAdaptor( 1102 LoopUnrollAndJamPass(Level.getSpeedupLevel()))); 1103 FPM.addPass(LoopUnrollPass(LoopUnrollOptions( 1104 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, 1105 PTO.ForgetAllSCEVInLoopUnroll))); 1106 FPM.addPass(WarnMissedTransformationsPass()); 1107 // Now that we are done with loop unrolling, be it either by LoopVectorizer, 1108 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have 1109 // become constant-offset, thus enabling SROA and alloca promotion. Do so. 1110 // NOTE: we are very late in the pipeline, and we don't have any LICM 1111 // or SimplifyCFG passes scheduled after us, that would cleanup 1112 // the CFG mess this may created if allowed to modify CFG, so forbid that. 1113 FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); 1114 } 1115 1116 if (!IsFullLTO) { 1117 // Eliminate loads by forwarding stores from the previous iteration to loads 1118 // of the current iteration. 1119 FPM.addPass(LoopLoadEliminationPass()); 1120 } 1121 // Cleanup after the loop optimization passes. 1122 FPM.addPass(InstCombinePass()); 1123 1124 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { 1125 ExtraVectorPassManager ExtraPasses; 1126 // At higher optimization levels, try to clean up any runtime overlap and 1127 // alignment checks inserted by the vectorizer. We want to track correlated 1128 // runtime checks for two inner loops in the same outer loop, fold any 1129 // common computations, hoist loop-invariant aspects out of any outer loop, 1130 // and unswitch the runtime checks if possible. Once hoisted, we may have 1131 // dead (or speculatable) control flows or more combining opportunities. 1132 ExtraPasses.addPass(EarlyCSEPass()); 1133 ExtraPasses.addPass(CorrelatedValuePropagationPass()); 1134 ExtraPasses.addPass(InstCombinePass()); 1135 LoopPassManager LPM; 1136 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 1137 /*AllowSpeculation=*/true)); 1138 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == 1139 OptimizationLevel::O3)); 1140 ExtraPasses.addPass( 1141 RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); 1142 ExtraPasses.addPass( 1143 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true, 1144 /*UseBlockFrequencyInfo=*/true)); 1145 ExtraPasses.addPass( 1146 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 1147 ExtraPasses.addPass(InstCombinePass()); 1148 FPM.addPass(std::move(ExtraPasses)); 1149 } 1150 1151 // Now that we've formed fast to execute loop structures, we do further 1152 // optimizations. These are run afterward as they might block doing complex 1153 // analyses and transforms such as what are needed for loop vectorization. 1154 1155 // Cleanup after loop vectorization, etc. Simplification passes like CVP and 1156 // GVN, loop transforms, and others have already run, so it's now better to 1157 // convert to more optimized IR using more aggressive simplify CFG options. 1158 // The extra sinking transform can create larger basic blocks, so do this 1159 // before SLP vectorization. 1160 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() 1161 .forwardSwitchCondToPhi(true) 1162 .convertSwitchRangeToICmp(true) 1163 .convertSwitchToLookupTable(true) 1164 .needCanonicalLoops(false) 1165 .hoistCommonInsts(true) 1166 .sinkCommonInsts(true))); 1167 1168 if (IsFullLTO) { 1169 FPM.addPass(SCCPPass()); 1170 FPM.addPass(InstCombinePass()); 1171 FPM.addPass(BDCEPass()); 1172 } 1173 1174 // Optimize parallel scalar instruction chains into SIMD instructions. 1175 if (PTO.SLPVectorization) { 1176 FPM.addPass(SLPVectorizerPass()); 1177 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { 1178 FPM.addPass(EarlyCSEPass()); 1179 } 1180 } 1181 // Enhance/cleanup vector code. 1182 FPM.addPass(VectorCombinePass()); 1183 1184 if (!IsFullLTO) { 1185 FPM.addPass(InstCombinePass()); 1186 // Unroll small loops to hide loop backedge latency and saturate any 1187 // parallel execution resources of an out-of-order processor. We also then 1188 // need to clean up redundancies and loop invariant code. 1189 // FIXME: It would be really good to use a loop-integrated instruction 1190 // combiner for cleanup here so that the unrolling and LICM can be pipelined 1191 // across the loop nests. 1192 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll 1193 if (EnableUnrollAndJam && PTO.LoopUnrolling) { 1194 FPM.addPass(createFunctionToLoopPassAdaptor( 1195 LoopUnrollAndJamPass(Level.getSpeedupLevel()))); 1196 } 1197 FPM.addPass(LoopUnrollPass(LoopUnrollOptions( 1198 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, 1199 PTO.ForgetAllSCEVInLoopUnroll))); 1200 FPM.addPass(WarnMissedTransformationsPass()); 1201 // Now that we are done with loop unrolling, be it either by LoopVectorizer, 1202 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have 1203 // become constant-offset, thus enabling SROA and alloca promotion. Do so. 1204 // NOTE: we are very late in the pipeline, and we don't have any LICM 1205 // or SimplifyCFG passes scheduled after us, that would cleanup 1206 // the CFG mess this may created if allowed to modify CFG, so forbid that. 1207 FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); 1208 FPM.addPass(InstCombinePass()); 1209 FPM.addPass( 1210 RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); 1211 FPM.addPass(createFunctionToLoopPassAdaptor( 1212 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 1213 /*AllowSpeculation=*/true), 1214 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); 1215 } 1216 1217 // Now that we've vectorized and unrolled loops, we may have more refined 1218 // alignment information, try to re-derive it here. 1219 FPM.addPass(AlignmentFromAssumptionsPass()); 1220 1221 if (IsFullLTO) 1222 FPM.addPass(InstCombinePass()); 1223 } 1224 1225 ModulePassManager 1226 PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, 1227 ThinOrFullLTOPhase LTOPhase) { 1228 const bool LTOPreLink = (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink || 1229 LTOPhase == ThinOrFullLTOPhase::FullLTOPreLink); 1230 ModulePassManager MPM; 1231 1232 // Optimize globals now that the module is fully simplified. 1233 MPM.addPass(GlobalOptPass()); 1234 MPM.addPass(GlobalDCEPass()); 1235 1236 // Run partial inlining pass to partially inline functions that have 1237 // large bodies. 1238 if (RunPartialInlining) 1239 MPM.addPass(PartialInlinerPass()); 1240 1241 // Remove avail extern fns and globals definitions since we aren't compiling 1242 // an object file for later LTO. For LTO we want to preserve these so they 1243 // are eligible for inlining at link-time. Note if they are unreferenced they 1244 // will be removed by GlobalDCE later, so this only impacts referenced 1245 // available externally globals. Eventually they will be suppressed during 1246 // codegen, but eliminating here enables more opportunity for GlobalDCE as it 1247 // may make globals referenced by available external functions dead and saves 1248 // running remaining passes on the eliminated functions. These should be 1249 // preserved during prelinking for link-time inlining decisions. 1250 if (!LTOPreLink) 1251 MPM.addPass(EliminateAvailableExternallyPass()); 1252 1253 if (EnableOrderFileInstrumentation) 1254 MPM.addPass(InstrOrderFilePass()); 1255 1256 // Do RPO function attribute inference across the module to forward-propagate 1257 // attributes where applicable. 1258 // FIXME: Is this really an optimization rather than a canonicalization? 1259 MPM.addPass(ReversePostOrderFunctionAttrsPass()); 1260 1261 // Do a post inline PGO instrumentation and use pass. This is a context 1262 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as 1263 // cross-module inline has not been done yet. The context sensitive 1264 // instrumentation is after all the inlines are done. 1265 if (!LTOPreLink && PGOOpt) { 1266 if (PGOOpt->CSAction == PGOOptions::CSIRInstr) 1267 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, 1268 /* IsCS */ true, PGOOpt->CSProfileGenFile, 1269 PGOOpt->ProfileRemappingFile, LTOPhase); 1270 else if (PGOOpt->CSAction == PGOOptions::CSIRUse) 1271 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, 1272 /* IsCS */ true, PGOOpt->ProfileFile, 1273 PGOOpt->ProfileRemappingFile, LTOPhase); 1274 } 1275 1276 // Re-compute GlobalsAA here prior to function passes. This is particularly 1277 // useful as the above will have inlined, DCE'ed, and function-attr 1278 // propagated everything. We should at this point have a reasonably minimal 1279 // and richly annotated call graph. By computing aliasing and mod/ref 1280 // information for all local globals here, the late loop passes and notably 1281 // the vectorizer will be able to use them to help recognize vectorizable 1282 // memory operations. 1283 MPM.addPass(RecomputeGlobalsAAPass()); 1284 1285 for (auto &C : OptimizerEarlyEPCallbacks) 1286 C(MPM, Level); 1287 1288 FunctionPassManager OptimizePM; 1289 OptimizePM.addPass(Float2IntPass()); 1290 OptimizePM.addPass(LowerConstantIntrinsicsPass()); 1291 1292 if (EnableMatrix) { 1293 OptimizePM.addPass(LowerMatrixIntrinsicsPass()); 1294 OptimizePM.addPass(EarlyCSEPass()); 1295 } 1296 1297 // FIXME: We need to run some loop optimizations to re-rotate loops after 1298 // simplifycfg and others undo their rotation. 1299 1300 // Optimize the loop execution. These passes operate on entire loop nests 1301 // rather than on each loop in an inside-out manner, and so they are actually 1302 // function passes. 1303 1304 for (auto &C : VectorizerStartEPCallbacks) 1305 C(OptimizePM, Level); 1306 1307 LoopPassManager LPM; 1308 // First rotate loops that may have been un-rotated by prior passes. 1309 // Disable header duplication at -Oz. 1310 LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink)); 1311 // Some loops may have become dead by now. Try to delete them. 1312 // FIXME: see discussion in https://reviews.llvm.org/D112851, 1313 // this may need to be revisited once we run GVN before loop deletion 1314 // in the simplification pipeline. 1315 LPM.addPass(LoopDeletionPass()); 1316 OptimizePM.addPass(createFunctionToLoopPassAdaptor( 1317 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); 1318 1319 // Distribute loops to allow partial vectorization. I.e. isolate dependences 1320 // into separate loop that would otherwise inhibit vectorization. This is 1321 // currently only performed for loops marked with the metadata 1322 // llvm.loop.distribute=true or when -enable-loop-distribute is specified. 1323 OptimizePM.addPass(LoopDistributePass()); 1324 1325 // Populates the VFABI attribute with the scalar-to-vector mappings 1326 // from the TargetLibraryInfo. 1327 OptimizePM.addPass(InjectTLIMappings()); 1328 1329 addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false); 1330 1331 // LoopSink pass sinks instructions hoisted by LICM, which serves as a 1332 // canonicalization pass that enables other optimizations. As a result, 1333 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM 1334 // result too early. 1335 OptimizePM.addPass(LoopSinkPass()); 1336 1337 // And finally clean up LCSSA form before generating code. 1338 OptimizePM.addPass(InstSimplifyPass()); 1339 1340 // This hoists/decomposes div/rem ops. It should run after other sink/hoist 1341 // passes to avoid re-sinking, but before SimplifyCFG because it can allow 1342 // flattening of blocks. 1343 OptimizePM.addPass(DivRemPairsPass()); 1344 1345 // Try to annotate calls that were created during optimization. 1346 OptimizePM.addPass(TailCallElimPass()); 1347 1348 // LoopSink (and other loop passes since the last simplifyCFG) might have 1349 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. 1350 OptimizePM.addPass( 1351 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 1352 1353 // Add the core optimizing pipeline. 1354 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM), 1355 PTO.EagerlyInvalidateAnalyses)); 1356 1357 for (auto &C : OptimizerLastEPCallbacks) 1358 C(MPM, Level); 1359 1360 // Split out cold code. Splitting is done late to avoid hiding context from 1361 // other optimizations and inadvertently regressing performance. The tradeoff 1362 // is that this has a higher code size cost than splitting early. 1363 if (EnableHotColdSplit && !LTOPreLink) 1364 MPM.addPass(HotColdSplittingPass()); 1365 1366 // Search the code for similar regions of code. If enough similar regions can 1367 // be found where extracting the regions into their own function will decrease 1368 // the size of the program, we extract the regions, a deduplicate the 1369 // structurally similar regions. 1370 if (EnableIROutliner) 1371 MPM.addPass(IROutlinerPass()); 1372 1373 // Merge functions if requested. 1374 if (PTO.MergeFunctions) 1375 MPM.addPass(MergeFunctionsPass()); 1376 1377 // Now we need to do some global optimization transforms. 1378 // FIXME: It would seem like these should come first in the optimization 1379 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird 1380 // ordering here. 1381 MPM.addPass(GlobalDCEPass()); 1382 MPM.addPass(ConstantMergePass()); 1383 1384 if (PTO.CallGraphProfile && !LTOPreLink) 1385 MPM.addPass(CGProfilePass()); 1386 1387 // TODO: Relative look table converter pass caused an issue when full lto is 1388 // enabled. See https://reviews.llvm.org/D94355 for more details. 1389 // Until the issue fixed, disable this pass during pre-linking phase. 1390 if (!LTOPreLink) 1391 MPM.addPass(RelLookupTableConverterPass()); 1392 1393 return MPM; 1394 } 1395 1396 ModulePassManager 1397 PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, 1398 bool LTOPreLink) { 1399 assert(Level != OptimizationLevel::O0 && 1400 "Must request optimizations for the default pipeline!"); 1401 1402 ModulePassManager MPM; 1403 1404 // Convert @llvm.global.annotations to !annotation metadata. 1405 MPM.addPass(Annotation2MetadataPass()); 1406 1407 // Force any function attributes we want the rest of the pipeline to observe. 1408 MPM.addPass(ForceFunctionAttrsPass()); 1409 1410 // Apply module pipeline start EP callback. 1411 for (auto &C : PipelineStartEPCallbacks) 1412 C(MPM, Level); 1413 1414 if (PGOOpt && PGOOpt->DebugInfoForProfiling) 1415 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); 1416 1417 const ThinOrFullLTOPhase LTOPhase = LTOPreLink 1418 ? ThinOrFullLTOPhase::FullLTOPreLink 1419 : ThinOrFullLTOPhase::None; 1420 // Add the core simplification pipeline. 1421 MPM.addPass(buildModuleSimplificationPipeline(Level, LTOPhase)); 1422 1423 // Now add the optimization pipeline. 1424 MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPhase)); 1425 1426 if (PGOOpt && PGOOpt->PseudoProbeForProfiling && 1427 PGOOpt->Action == PGOOptions::SampleUse) 1428 MPM.addPass(PseudoProbeUpdatePass()); 1429 1430 // Emit annotation remarks. 1431 addAnnotationRemarksPass(MPM); 1432 1433 if (LTOPreLink) 1434 addRequiredLTOPreLinkPasses(MPM); 1435 1436 return MPM; 1437 } 1438 1439 ModulePassManager 1440 PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { 1441 assert(Level != OptimizationLevel::O0 && 1442 "Must request optimizations for the default pipeline!"); 1443 1444 ModulePassManager MPM; 1445 1446 // Convert @llvm.global.annotations to !annotation metadata. 1447 MPM.addPass(Annotation2MetadataPass()); 1448 1449 // Force any function attributes we want the rest of the pipeline to observe. 1450 MPM.addPass(ForceFunctionAttrsPass()); 1451 1452 if (PGOOpt && PGOOpt->DebugInfoForProfiling) 1453 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); 1454 1455 // Apply module pipeline start EP callback. 1456 for (auto &C : PipelineStartEPCallbacks) 1457 C(MPM, Level); 1458 1459 // If we are planning to perform ThinLTO later, we don't bloat the code with 1460 // unrolling/vectorization/... now. Just simplify the module as much as we 1461 // can. 1462 MPM.addPass(buildModuleSimplificationPipeline( 1463 Level, ThinOrFullLTOPhase::ThinLTOPreLink)); 1464 1465 // Run partial inlining pass to partially inline functions that have 1466 // large bodies. 1467 // FIXME: It isn't clear whether this is really the right place to run this 1468 // in ThinLTO. Because there is another canonicalization and simplification 1469 // phase that will run after the thin link, running this here ends up with 1470 // less information than will be available later and it may grow functions in 1471 // ways that aren't beneficial. 1472 if (RunPartialInlining) 1473 MPM.addPass(PartialInlinerPass()); 1474 1475 // Reduce the size of the IR as much as possible. 1476 MPM.addPass(GlobalOptPass()); 1477 1478 if (PGOOpt && PGOOpt->PseudoProbeForProfiling && 1479 PGOOpt->Action == PGOOptions::SampleUse) 1480 MPM.addPass(PseudoProbeUpdatePass()); 1481 1482 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual 1483 // optimization is going to be done in PostLink stage, but clang can't add 1484 // callbacks there in case of in-process ThinLTO called by linker. 1485 for (auto &C : OptimizerEarlyEPCallbacks) 1486 C(MPM, Level); 1487 for (auto &C : OptimizerLastEPCallbacks) 1488 C(MPM, Level); 1489 1490 // Emit annotation remarks. 1491 addAnnotationRemarksPass(MPM); 1492 1493 addRequiredLTOPreLinkPasses(MPM); 1494 1495 return MPM; 1496 } 1497 1498 ModulePassManager PassBuilder::buildThinLTODefaultPipeline( 1499 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) { 1500 ModulePassManager MPM; 1501 1502 // Convert @llvm.global.annotations to !annotation metadata. 1503 MPM.addPass(Annotation2MetadataPass()); 1504 1505 if (ImportSummary) { 1506 // These passes import type identifier resolutions for whole-program 1507 // devirtualization and CFI. They must run early because other passes may 1508 // disturb the specific instruction patterns that these passes look for, 1509 // creating dependencies on resolutions that may not appear in the summary. 1510 // 1511 // For example, GVN may transform the pattern assume(type.test) appearing in 1512 // two basic blocks into assume(phi(type.test, type.test)), which would 1513 // transform a dependency on a WPD resolution into a dependency on a type 1514 // identifier resolution for CFI. 1515 // 1516 // Also, WPD has access to more precise information than ICP and can 1517 // devirtualize more effectively, so it should operate on the IR first. 1518 // 1519 // The WPD and LowerTypeTest passes need to run at -O0 to lower type 1520 // metadata and intrinsics. 1521 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary)); 1522 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary)); 1523 } 1524 1525 if (Level == OptimizationLevel::O0) { 1526 // Run a second time to clean up any type tests left behind by WPD for use 1527 // in ICP. 1528 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1529 // Drop available_externally and unreferenced globals. This is necessary 1530 // with ThinLTO in order to avoid leaving undefined references to dead 1531 // globals in the object file. 1532 MPM.addPass(EliminateAvailableExternallyPass()); 1533 MPM.addPass(GlobalDCEPass()); 1534 return MPM; 1535 } 1536 1537 // Force any function attributes we want the rest of the pipeline to observe. 1538 MPM.addPass(ForceFunctionAttrsPass()); 1539 1540 // Add the core simplification pipeline. 1541 MPM.addPass(buildModuleSimplificationPipeline( 1542 Level, ThinOrFullLTOPhase::ThinLTOPostLink)); 1543 1544 // Now add the optimization pipeline. 1545 MPM.addPass(buildModuleOptimizationPipeline( 1546 Level, ThinOrFullLTOPhase::ThinLTOPostLink)); 1547 1548 // Emit annotation remarks. 1549 addAnnotationRemarksPass(MPM); 1550 1551 return MPM; 1552 } 1553 1554 ModulePassManager 1555 PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) { 1556 assert(Level != OptimizationLevel::O0 && 1557 "Must request optimizations for the default pipeline!"); 1558 // FIXME: We should use a customized pre-link pipeline! 1559 return buildPerModuleDefaultPipeline(Level, 1560 /* LTOPreLink */ true); 1561 } 1562 1563 ModulePassManager 1564 PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, 1565 ModuleSummaryIndex *ExportSummary) { 1566 ModulePassManager MPM; 1567 1568 // Convert @llvm.global.annotations to !annotation metadata. 1569 MPM.addPass(Annotation2MetadataPass()); 1570 1571 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks) 1572 C(MPM, Level); 1573 1574 // Create a function that performs CFI checks for cross-DSO calls with targets 1575 // in the current module. 1576 MPM.addPass(CrossDSOCFIPass()); 1577 1578 if (Level == OptimizationLevel::O0) { 1579 // The WPD and LowerTypeTest passes need to run at -O0 to lower type 1580 // metadata and intrinsics. 1581 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); 1582 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); 1583 // Run a second time to clean up any type tests left behind by WPD for use 1584 // in ICP. 1585 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1586 1587 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks) 1588 C(MPM, Level); 1589 1590 // Emit annotation remarks. 1591 addAnnotationRemarksPass(MPM); 1592 1593 return MPM; 1594 } 1595 1596 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { 1597 // Load sample profile before running the LTO optimization pipeline. 1598 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, 1599 PGOOpt->ProfileRemappingFile, 1600 ThinOrFullLTOPhase::FullLTOPostLink)); 1601 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert 1602 // RequireAnalysisPass for PSI before subsequent non-module passes. 1603 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); 1604 } 1605 1606 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present. 1607 MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); 1608 1609 // Remove unused virtual tables to improve the quality of code generated by 1610 // whole-program devirtualization and bitset lowering. 1611 MPM.addPass(GlobalDCEPass()); 1612 1613 // Force any function attributes we want the rest of the pipeline to observe. 1614 MPM.addPass(ForceFunctionAttrsPass()); 1615 1616 // Do basic inference of function attributes from known properties of system 1617 // libraries and other oracles. 1618 MPM.addPass(InferFunctionAttrsPass()); 1619 1620 if (Level.getSpeedupLevel() > 1) { 1621 MPM.addPass(createModuleToFunctionPassAdaptor( 1622 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses)); 1623 1624 // Indirect call promotion. This should promote all the targets that are 1625 // left by the earlier promotion pass that promotes intra-module targets. 1626 // This two-step promotion is to save the compile time. For LTO, it should 1627 // produce the same result as if we only do promotion here. 1628 MPM.addPass(PGOIndirectCallPromotion( 1629 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); 1630 1631 // Propagate constants at call sites into the functions they call. This 1632 // opens opportunities for globalopt (and inlining) by substituting function 1633 // pointers passed as arguments to direct uses of functions. 1634 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/ 1635 Level != OptimizationLevel::Os && 1636 Level != OptimizationLevel::Oz))); 1637 1638 // Attach metadata to indirect call sites indicating the set of functions 1639 // they may target at run-time. This should follow IPSCCP. 1640 MPM.addPass(CalledValuePropagationPass()); 1641 } 1642 1643 // Now deduce any function attributes based in the current code. 1644 MPM.addPass( 1645 createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); 1646 1647 // Do RPO function attribute inference across the module to forward-propagate 1648 // attributes where applicable. 1649 // FIXME: Is this really an optimization rather than a canonicalization? 1650 MPM.addPass(ReversePostOrderFunctionAttrsPass()); 1651 1652 // Use in-range annotations on GEP indices to split globals where beneficial. 1653 MPM.addPass(GlobalSplitPass()); 1654 1655 // Run whole program optimization of virtual call when the list of callees 1656 // is fixed. 1657 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); 1658 1659 // Stop here at -O1. 1660 if (Level == OptimizationLevel::O1) { 1661 // The LowerTypeTestsPass needs to run to lower type metadata and the 1662 // type.test intrinsics. The pass does nothing if CFI is disabled. 1663 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); 1664 // Run a second time to clean up any type tests left behind by WPD for use 1665 // in ICP (which is performed earlier than this in the regular LTO 1666 // pipeline). 1667 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1668 1669 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks) 1670 C(MPM, Level); 1671 1672 // Emit annotation remarks. 1673 addAnnotationRemarksPass(MPM); 1674 1675 return MPM; 1676 } 1677 1678 // Optimize globals to try and fold them into constants. 1679 MPM.addPass(GlobalOptPass()); 1680 1681 // Promote any localized globals to SSA registers. 1682 MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); 1683 1684 // Linking modules together can lead to duplicate global constant, only 1685 // keep one copy of each constant. 1686 MPM.addPass(ConstantMergePass()); 1687 1688 // Remove unused arguments from functions. 1689 MPM.addPass(DeadArgumentEliminationPass()); 1690 1691 // Reduce the code after globalopt and ipsccp. Both can open up significant 1692 // simplification opportunities, and both can propagate functions through 1693 // function pointers. When this happens, we often have to resolve varargs 1694 // calls, etc, so let instcombine do this. 1695 FunctionPassManager PeepholeFPM; 1696 PeepholeFPM.addPass(InstCombinePass()); 1697 if (Level == OptimizationLevel::O3) 1698 PeepholeFPM.addPass(AggressiveInstCombinePass()); 1699 invokePeepholeEPCallbacks(PeepholeFPM, Level); 1700 1701 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM), 1702 PTO.EagerlyInvalidateAnalyses)); 1703 1704 // Note: historically, the PruneEH pass was run first to deduce nounwind and 1705 // generally clean up exception handling overhead. It isn't clear this is 1706 // valuable as the inliner doesn't currently care whether it is inlining an 1707 // invoke or a call. 1708 // Run the inliner now. 1709 MPM.addPass(ModuleInlinerWrapperPass( 1710 getInlineParamsFromOptLevel(Level), 1711 /* MandatoryFirst */ true, 1712 InlineContext{ThinOrFullLTOPhase::FullLTOPostLink, 1713 InlinePass::CGSCCInliner})); 1714 1715 // Optimize globals again after we ran the inliner. 1716 MPM.addPass(GlobalOptPass()); 1717 1718 // Run the OpenMPOpt pass again after global optimizations. 1719 MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink)); 1720 1721 // Garbage collect dead functions. 1722 MPM.addPass(GlobalDCEPass()); 1723 1724 // If we didn't decide to inline a function, check to see if we can 1725 // transform it to pass arguments by value instead of by reference. 1726 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass())); 1727 1728 FunctionPassManager FPM; 1729 // The IPO Passes may leave cruft around. Clean up after them. 1730 FPM.addPass(InstCombinePass()); 1731 invokePeepholeEPCallbacks(FPM, Level); 1732 1733 if (EnableConstraintElimination) 1734 FPM.addPass(ConstraintEliminationPass()); 1735 1736 FPM.addPass(JumpThreadingPass()); 1737 1738 // Do a post inline PGO instrumentation and use pass. This is a context 1739 // sensitive PGO pass. 1740 if (PGOOpt) { 1741 if (PGOOpt->CSAction == PGOOptions::CSIRInstr) 1742 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, 1743 /* IsCS */ true, PGOOpt->CSProfileGenFile, 1744 PGOOpt->ProfileRemappingFile, 1745 ThinOrFullLTOPhase::FullLTOPostLink); 1746 else if (PGOOpt->CSAction == PGOOptions::CSIRUse) 1747 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, 1748 /* IsCS */ true, PGOOpt->ProfileFile, 1749 PGOOpt->ProfileRemappingFile, 1750 ThinOrFullLTOPhase::FullLTOPostLink); 1751 } 1752 1753 // Break up allocas 1754 FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); 1755 1756 // LTO provides additional opportunities for tailcall elimination due to 1757 // link-time inlining, and visibility of nocapture attribute. 1758 FPM.addPass(TailCallElimPass()); 1759 1760 // Run a few AA driver optimizations here and now to cleanup the code. 1761 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM), 1762 PTO.EagerlyInvalidateAnalyses)); 1763 1764 MPM.addPass( 1765 createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); 1766 1767 // Require the GlobalsAA analysis for the module so we can query it within 1768 // MainFPM. 1769 MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); 1770 // Invalidate AAManager so it can be recreated and pick up the newly available 1771 // GlobalsAA. 1772 MPM.addPass( 1773 createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>())); 1774 1775 FunctionPassManager MainFPM; 1776 MainFPM.addPass(createFunctionToLoopPassAdaptor( 1777 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, 1778 /*AllowSpeculation=*/true), 1779 /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); 1780 1781 if (RunNewGVN) 1782 MainFPM.addPass(NewGVNPass()); 1783 else 1784 MainFPM.addPass(GVNPass()); 1785 1786 // Remove dead memcpy()'s. 1787 MainFPM.addPass(MemCpyOptPass()); 1788 1789 // Nuke dead stores. 1790 MainFPM.addPass(DSEPass()); 1791 MainFPM.addPass(MergedLoadStoreMotionPass()); 1792 1793 LoopPassManager LPM; 1794 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1) 1795 LPM.addPass(LoopFlattenPass()); 1796 LPM.addPass(IndVarSimplifyPass()); 1797 LPM.addPass(LoopDeletionPass()); 1798 // FIXME: Add loop interchange. 1799 1800 // Unroll small loops and perform peeling. 1801 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), 1802 /* OnlyWhenForced= */ !PTO.LoopUnrolling, 1803 PTO.ForgetAllSCEVInLoopUnroll)); 1804 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA. 1805 // *All* loop passes must preserve it, in order to be able to use it. 1806 MainFPM.addPass(createFunctionToLoopPassAdaptor( 1807 std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true)); 1808 1809 MainFPM.addPass(LoopDistributePass()); 1810 1811 addVectorPasses(Level, MainFPM, /* IsFullLTO */ true); 1812 1813 // Run the OpenMPOpt CGSCC pass again late. 1814 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( 1815 OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink))); 1816 1817 invokePeepholeEPCallbacks(MainFPM, Level); 1818 MainFPM.addPass(JumpThreadingPass()); 1819 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM), 1820 PTO.EagerlyInvalidateAnalyses)); 1821 1822 // Lower type metadata and the type.test intrinsic. This pass supports 1823 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs 1824 // to be run at link time if CFI is enabled. This pass does nothing if 1825 // CFI is disabled. 1826 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); 1827 // Run a second time to clean up any type tests left behind by WPD for use 1828 // in ICP (which is performed earlier than this in the regular LTO pipeline). 1829 MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); 1830 1831 // Enable splitting late in the FullLTO post-link pipeline. 1832 if (EnableHotColdSplit) 1833 MPM.addPass(HotColdSplittingPass()); 1834 1835 // Add late LTO optimization passes. 1836 // Delete basic blocks, which optimization passes may have killed. 1837 MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass( 1838 SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts( 1839 true)))); 1840 1841 // Drop bodies of available eternally objects to improve GlobalDCE. 1842 MPM.addPass(EliminateAvailableExternallyPass()); 1843 1844 // Now that we have optimized the program, discard unreachable functions. 1845 MPM.addPass(GlobalDCEPass()); 1846 1847 if (PTO.MergeFunctions) 1848 MPM.addPass(MergeFunctionsPass()); 1849 1850 if (PTO.CallGraphProfile) 1851 MPM.addPass(CGProfilePass()); 1852 1853 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks) 1854 C(MPM, Level); 1855 1856 // Emit annotation remarks. 1857 addAnnotationRemarksPass(MPM); 1858 1859 return MPM; 1860 } 1861 1862 ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, 1863 bool LTOPreLink) { 1864 assert(Level == OptimizationLevel::O0 && 1865 "buildO0DefaultPipeline should only be used with O0"); 1866 1867 ModulePassManager MPM; 1868 1869 // Perform pseudo probe instrumentation in O0 mode. This is for the 1870 // consistency between different build modes. For example, a LTO build can be 1871 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in 1872 // the postlink will require pseudo probe instrumentation in the prelink. 1873 if (PGOOpt && PGOOpt->PseudoProbeForProfiling) 1874 MPM.addPass(SampleProfileProbePass(TM)); 1875 1876 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || 1877 PGOOpt->Action == PGOOptions::IRUse)) 1878 addPGOInstrPassesForO0( 1879 MPM, 1880 /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr), 1881 /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile); 1882 1883 for (auto &C : PipelineStartEPCallbacks) 1884 C(MPM, Level); 1885 1886 if (PGOOpt && PGOOpt->DebugInfoForProfiling) 1887 MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); 1888 1889 for (auto &C : PipelineEarlySimplificationEPCallbacks) 1890 C(MPM, Level); 1891 1892 // Build a minimal pipeline based on the semantics required by LLVM, 1893 // which is just that always inlining occurs. Further, disable generating 1894 // lifetime intrinsics to avoid enabling further optimizations during 1895 // code generation. 1896 MPM.addPass(AlwaysInlinerPass( 1897 /*InsertLifetimeIntrinsics=*/false)); 1898 1899 if (PTO.MergeFunctions) 1900 MPM.addPass(MergeFunctionsPass()); 1901 1902 if (EnableMatrix) 1903 MPM.addPass( 1904 createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true))); 1905 1906 if (!CGSCCOptimizerLateEPCallbacks.empty()) { 1907 CGSCCPassManager CGPM; 1908 for (auto &C : CGSCCOptimizerLateEPCallbacks) 1909 C(CGPM, Level); 1910 if (!CGPM.isEmpty()) 1911 MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); 1912 } 1913 if (!LateLoopOptimizationsEPCallbacks.empty()) { 1914 LoopPassManager LPM; 1915 for (auto &C : LateLoopOptimizationsEPCallbacks) 1916 C(LPM, Level); 1917 if (!LPM.isEmpty()) { 1918 MPM.addPass(createModuleToFunctionPassAdaptor( 1919 createFunctionToLoopPassAdaptor(std::move(LPM)))); 1920 } 1921 } 1922 if (!LoopOptimizerEndEPCallbacks.empty()) { 1923 LoopPassManager LPM; 1924 for (auto &C : LoopOptimizerEndEPCallbacks) 1925 C(LPM, Level); 1926 if (!LPM.isEmpty()) { 1927 MPM.addPass(createModuleToFunctionPassAdaptor( 1928 createFunctionToLoopPassAdaptor(std::move(LPM)))); 1929 } 1930 } 1931 if (!ScalarOptimizerLateEPCallbacks.empty()) { 1932 FunctionPassManager FPM; 1933 for (auto &C : ScalarOptimizerLateEPCallbacks) 1934 C(FPM, Level); 1935 if (!FPM.isEmpty()) 1936 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); 1937 } 1938 1939 for (auto &C : OptimizerEarlyEPCallbacks) 1940 C(MPM, Level); 1941 1942 if (!VectorizerStartEPCallbacks.empty()) { 1943 FunctionPassManager FPM; 1944 for (auto &C : VectorizerStartEPCallbacks) 1945 C(FPM, Level); 1946 if (!FPM.isEmpty()) 1947 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); 1948 } 1949 1950 ModulePassManager CoroPM; 1951 CoroPM.addPass(CoroEarlyPass()); 1952 CGSCCPassManager CGPM; 1953 CGPM.addPass(CoroSplitPass()); 1954 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); 1955 CoroPM.addPass(CoroCleanupPass()); 1956 CoroPM.addPass(GlobalDCEPass()); 1957 MPM.addPass(CoroConditionalWrapper(std::move(CoroPM))); 1958 1959 for (auto &C : OptimizerLastEPCallbacks) 1960 C(MPM, Level); 1961 1962 if (LTOPreLink) 1963 addRequiredLTOPreLinkPasses(MPM); 1964 1965 MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); 1966 1967 return MPM; 1968 } 1969 1970 AAManager PassBuilder::buildDefaultAAPipeline() { 1971 AAManager AA; 1972 1973 // The order in which these are registered determines their priority when 1974 // being queried. 1975 1976 // First we register the basic alias analysis that provides the majority of 1977 // per-function local AA logic. This is a stateless, on-demand local set of 1978 // AA techniques. 1979 AA.registerFunctionAnalysis<BasicAA>(); 1980 1981 // Next we query fast, specialized alias analyses that wrap IR-embedded 1982 // information about aliasing. 1983 AA.registerFunctionAnalysis<ScopedNoAliasAA>(); 1984 AA.registerFunctionAnalysis<TypeBasedAA>(); 1985 1986 // Add support for querying global aliasing information when available. 1987 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module 1988 // analysis, all that the `AAManager` can do is query for any *cached* 1989 // results from `GlobalsAA` through a readonly proxy. 1990 if (EnableGlobalAnalyses) 1991 AA.registerModuleAnalysis<GlobalsAA>(); 1992 1993 // Add target-specific alias analyses. 1994 if (TM) 1995 TM->registerDefaultAliasAnalyses(AA); 1996 1997 return AA; 1998 } 1999