10b57cec5SDimitry Andric //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Top-level implementation for the NVPTX target.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #include "NVPTXTargetMachine.h"
140b57cec5SDimitry Andric #include "NVPTX.h"
1506c3fb27SDimitry Andric #include "NVPTXAliasAnalysis.h"
160b57cec5SDimitry Andric #include "NVPTXAllocaHoisting.h"
17fe6060f1SDimitry Andric #include "NVPTXAtomicLower.h"
1806c3fb27SDimitry Andric #include "NVPTXCtorDtorLowering.h"
190b57cec5SDimitry Andric #include "NVPTXLowerAggrCopies.h"
20bdd1243dSDimitry Andric #include "NVPTXMachineFunctionInfo.h"
210b57cec5SDimitry Andric #include "NVPTXTargetObjectFile.h"
220b57cec5SDimitry Andric #include "NVPTXTargetTransformInfo.h"
230b57cec5SDimitry Andric #include "TargetInfo/NVPTXTargetInfo.h"
240b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
28349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsNVPTX.h"
29349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h"
300b57cec5SDimitry Andric #include "llvm/Pass.h"
31e8d8bef9SDimitry Andric #include "llvm/Passes/PassBuilder.h"
320b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
330b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
340b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h"
3506c3fb27SDimitry Andric #include "llvm/TargetParser/Triple.h"
36*0fca6ea1SDimitry Andric #include "llvm/Transforms/IPO/ExpandVariadics.h"
370b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
380b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/GVN.h"
3906c3fb27SDimitry Andric #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
400b57cec5SDimitry Andric #include <cassert>
41bdd1243dSDimitry Andric #include <optional>
420b57cec5SDimitry Andric #include <string>
430b57cec5SDimitry Andric
440b57cec5SDimitry Andric using namespace llvm;
450b57cec5SDimitry Andric
460b57cec5SDimitry Andric // LSV is still relatively new; this switch lets us turn it off in case we
470b57cec5SDimitry Andric // encounter (or suspect) a bug.
480b57cec5SDimitry Andric static cl::opt<bool>
490b57cec5SDimitry Andric DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer",
500b57cec5SDimitry Andric cl::desc("Disable load/store vectorizer"),
510b57cec5SDimitry Andric cl::init(false), cl::Hidden);
520b57cec5SDimitry Andric
530b57cec5SDimitry Andric // TODO: Remove this flag when we are confident with no regressions.
540b57cec5SDimitry Andric static cl::opt<bool> DisableRequireStructuredCFG(
550b57cec5SDimitry Andric "disable-nvptx-require-structured-cfg",
560b57cec5SDimitry Andric cl::desc("Transitional flag to turn off NVPTX's requirement on preserving "
570b57cec5SDimitry Andric "structured CFG. The requirement should be disabled only when "
580b57cec5SDimitry Andric "unexpected regressions happen."),
590b57cec5SDimitry Andric cl::init(false), cl::Hidden);
600b57cec5SDimitry Andric
610b57cec5SDimitry Andric static cl::opt<bool> UseShortPointersOpt(
620b57cec5SDimitry Andric "nvptx-short-ptr",
630b57cec5SDimitry Andric cl::desc(
640b57cec5SDimitry Andric "Use 32-bit pointers for accessing const/local/shared address spaces."),
650b57cec5SDimitry Andric cl::init(false), cl::Hidden);
660b57cec5SDimitry Andric
670b57cec5SDimitry Andric namespace llvm {
680b57cec5SDimitry Andric
6906c3fb27SDimitry Andric void initializeGenericToNVVMLegacyPassPass(PassRegistry &);
700b57cec5SDimitry Andric void initializeNVPTXAllocaHoistingPass(PassRegistry &);
710b57cec5SDimitry Andric void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry &);
72bdd1243dSDimitry Andric void initializeNVPTXAtomicLowerPass(PassRegistry &);
7306c3fb27SDimitry Andric void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
740b57cec5SDimitry Andric void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
750b57cec5SDimitry Andric void initializeNVPTXLowerAllocaPass(PassRegistry &);
7606c3fb27SDimitry Andric void initializeNVPTXLowerUnreachablePass(PassRegistry &);
7706c3fb27SDimitry Andric void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
78bdd1243dSDimitry Andric void initializeNVPTXLowerArgsPass(PassRegistry &);
790b57cec5SDimitry Andric void initializeNVPTXProxyRegErasurePass(PassRegistry &);
80bdd1243dSDimitry Andric void initializeNVVMIntrRangePass(PassRegistry &);
81bdd1243dSDimitry Andric void initializeNVVMReflectPass(PassRegistry &);
8206c3fb27SDimitry Andric void initializeNVPTXAAWrapperPassPass(PassRegistry &);
8306c3fb27SDimitry Andric void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
840b57cec5SDimitry Andric
850b57cec5SDimitry Andric } // end namespace llvm
860b57cec5SDimitry Andric
LLVMInitializeNVPTXTarget()87480093f4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
880b57cec5SDimitry Andric // Register the target.
890b57cec5SDimitry Andric RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32());
900b57cec5SDimitry Andric RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64());
910b57cec5SDimitry Andric
92bdd1243dSDimitry Andric PassRegistry &PR = *PassRegistry::getPassRegistry();
930b57cec5SDimitry Andric // FIXME: This pass is really intended to be invoked during IR optimization,
940b57cec5SDimitry Andric // but it's very NVPTX-specific.
950b57cec5SDimitry Andric initializeNVVMReflectPass(PR);
960b57cec5SDimitry Andric initializeNVVMIntrRangePass(PR);
9706c3fb27SDimitry Andric initializeGenericToNVVMLegacyPassPass(PR);
980b57cec5SDimitry Andric initializeNVPTXAllocaHoistingPass(PR);
990b57cec5SDimitry Andric initializeNVPTXAssignValidGlobalNamesPass(PR);
100fe6060f1SDimitry Andric initializeNVPTXAtomicLowerPass(PR);
1010b57cec5SDimitry Andric initializeNVPTXLowerArgsPass(PR);
1020b57cec5SDimitry Andric initializeNVPTXLowerAllocaPass(PR);
10306c3fb27SDimitry Andric initializeNVPTXLowerUnreachablePass(PR);
10406c3fb27SDimitry Andric initializeNVPTXCtorDtorLoweringLegacyPass(PR);
1050b57cec5SDimitry Andric initializeNVPTXLowerAggrCopiesPass(PR);
1060b57cec5SDimitry Andric initializeNVPTXProxyRegErasurePass(PR);
107*0fca6ea1SDimitry Andric initializeNVPTXDAGToDAGISelLegacyPass(PR);
10806c3fb27SDimitry Andric initializeNVPTXAAWrapperPassPass(PR);
10906c3fb27SDimitry Andric initializeNVPTXExternalAAWrapperPass(PR);
1100b57cec5SDimitry Andric }
1110b57cec5SDimitry Andric
computeDataLayout(bool is64Bit,bool UseShortPointers)1120b57cec5SDimitry Andric static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
1130b57cec5SDimitry Andric std::string Ret = "e";
1140b57cec5SDimitry Andric
1150b57cec5SDimitry Andric if (!is64Bit)
1160b57cec5SDimitry Andric Ret += "-p:32:32";
1170b57cec5SDimitry Andric else if (UseShortPointers)
1180b57cec5SDimitry Andric Ret += "-p3:32:32-p4:32:32-p5:32:32";
1190b57cec5SDimitry Andric
1200b57cec5SDimitry Andric Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
1210b57cec5SDimitry Andric
1220b57cec5SDimitry Andric return Ret;
1230b57cec5SDimitry Andric }
1240b57cec5SDimitry Andric
NVPTXTargetMachine(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,std::optional<Reloc::Model> RM,std::optional<CodeModel::Model> CM,CodeGenOptLevel OL,bool is64bit)1250b57cec5SDimitry Andric NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
1260b57cec5SDimitry Andric StringRef CPU, StringRef FS,
1270b57cec5SDimitry Andric const TargetOptions &Options,
128bdd1243dSDimitry Andric std::optional<Reloc::Model> RM,
129bdd1243dSDimitry Andric std::optional<CodeModel::Model> CM,
1305f757f3fSDimitry Andric CodeGenOptLevel OL, bool is64bit)
1310b57cec5SDimitry Andric // The pic relocation model is used regardless of what the client has
1320b57cec5SDimitry Andric // specified, as it is the only relocation model currently supported.
1330b57cec5SDimitry Andric : LLVMTargetMachine(T, computeDataLayout(is64bit, UseShortPointersOpt), TT,
1340b57cec5SDimitry Andric CPU, FS, Options, Reloc::PIC_,
1350b57cec5SDimitry Andric getEffectiveCodeModel(CM, CodeModel::Small), OL),
136*0fca6ea1SDimitry Andric is64bit(is64bit), TLOF(std::make_unique<NVPTXTargetObjectFile>()),
137bdd1243dSDimitry Andric Subtarget(TT, std::string(CPU), std::string(FS), *this),
138bdd1243dSDimitry Andric StrPool(StrAlloc) {
1390b57cec5SDimitry Andric if (TT.getOS() == Triple::NVCL)
1400b57cec5SDimitry Andric drvInterface = NVPTX::NVCL;
1410b57cec5SDimitry Andric else
1420b57cec5SDimitry Andric drvInterface = NVPTX::CUDA;
1430b57cec5SDimitry Andric if (!DisableRequireStructuredCFG)
1440b57cec5SDimitry Andric setRequiresStructuredCFG(true);
1450b57cec5SDimitry Andric initAsmInfo();
1460b57cec5SDimitry Andric }
1470b57cec5SDimitry Andric
1480b57cec5SDimitry Andric NVPTXTargetMachine::~NVPTXTargetMachine() = default;
1490b57cec5SDimitry Andric
anchor()1500b57cec5SDimitry Andric void NVPTXTargetMachine32::anchor() {}
1510b57cec5SDimitry Andric
NVPTXTargetMachine32(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,std::optional<Reloc::Model> RM,std::optional<CodeModel::Model> CM,CodeGenOptLevel OL,bool JIT)1520b57cec5SDimitry Andric NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
1530b57cec5SDimitry Andric StringRef CPU, StringRef FS,
1540b57cec5SDimitry Andric const TargetOptions &Options,
155bdd1243dSDimitry Andric std::optional<Reloc::Model> RM,
156bdd1243dSDimitry Andric std::optional<CodeModel::Model> CM,
1575f757f3fSDimitry Andric CodeGenOptLevel OL, bool JIT)
1580b57cec5SDimitry Andric : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
1590b57cec5SDimitry Andric
anchor()1600b57cec5SDimitry Andric void NVPTXTargetMachine64::anchor() {}
1610b57cec5SDimitry Andric
NVPTXTargetMachine64(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,std::optional<Reloc::Model> RM,std::optional<CodeModel::Model> CM,CodeGenOptLevel OL,bool JIT)1620b57cec5SDimitry Andric NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
1630b57cec5SDimitry Andric StringRef CPU, StringRef FS,
1640b57cec5SDimitry Andric const TargetOptions &Options,
165bdd1243dSDimitry Andric std::optional<Reloc::Model> RM,
166bdd1243dSDimitry Andric std::optional<CodeModel::Model> CM,
1675f757f3fSDimitry Andric CodeGenOptLevel OL, bool JIT)
1680b57cec5SDimitry Andric : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
1690b57cec5SDimitry Andric
1700b57cec5SDimitry Andric namespace {
1710b57cec5SDimitry Andric
1720b57cec5SDimitry Andric class NVPTXPassConfig : public TargetPassConfig {
1730b57cec5SDimitry Andric public:
NVPTXPassConfig(NVPTXTargetMachine & TM,PassManagerBase & PM)1740b57cec5SDimitry Andric NVPTXPassConfig(NVPTXTargetMachine &TM, PassManagerBase &PM)
1750b57cec5SDimitry Andric : TargetPassConfig(TM, PM) {}
1760b57cec5SDimitry Andric
getNVPTXTargetMachine() const1770b57cec5SDimitry Andric NVPTXTargetMachine &getNVPTXTargetMachine() const {
1780b57cec5SDimitry Andric return getTM<NVPTXTargetMachine>();
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric
1810b57cec5SDimitry Andric void addIRPasses() override;
1820b57cec5SDimitry Andric bool addInstSelector() override;
1830b57cec5SDimitry Andric void addPreRegAlloc() override;
1840b57cec5SDimitry Andric void addPostRegAlloc() override;
1850b57cec5SDimitry Andric void addMachineSSAOptimization() override;
1860b57cec5SDimitry Andric
1870b57cec5SDimitry Andric FunctionPass *createTargetRegisterAllocator(bool) override;
1880b57cec5SDimitry Andric void addFastRegAlloc() override;
1890b57cec5SDimitry Andric void addOptimizedRegAlloc() override;
1900b57cec5SDimitry Andric
addRegAssignAndRewriteFast()191e8d8bef9SDimitry Andric bool addRegAssignAndRewriteFast() override {
1920b57cec5SDimitry Andric llvm_unreachable("should not be used");
1930b57cec5SDimitry Andric }
1940b57cec5SDimitry Andric
addRegAssignAndRewriteOptimized()195e8d8bef9SDimitry Andric bool addRegAssignAndRewriteOptimized() override {
1960b57cec5SDimitry Andric llvm_unreachable("should not be used");
1970b57cec5SDimitry Andric }
1980b57cec5SDimitry Andric
1990b57cec5SDimitry Andric private:
2000b57cec5SDimitry Andric // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
2010b57cec5SDimitry Andric // function is only called in opt mode.
2020b57cec5SDimitry Andric void addEarlyCSEOrGVNPass();
2030b57cec5SDimitry Andric
2040b57cec5SDimitry Andric // Add passes that propagate special memory spaces.
2050b57cec5SDimitry Andric void addAddressSpaceInferencePasses();
2060b57cec5SDimitry Andric
2070b57cec5SDimitry Andric // Add passes that perform straight-line scalar optimizations.
2080b57cec5SDimitry Andric void addStraightLineScalarOptimizationPasses();
2090b57cec5SDimitry Andric };
2100b57cec5SDimitry Andric
2110b57cec5SDimitry Andric } // end anonymous namespace
2120b57cec5SDimitry Andric
createPassConfig(PassManagerBase & PM)2130b57cec5SDimitry Andric TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
2140b57cec5SDimitry Andric return new NVPTXPassConfig(*this, PM);
2150b57cec5SDimitry Andric }
2160b57cec5SDimitry Andric
createMachineFunctionInfo(BumpPtrAllocator & Allocator,const Function & F,const TargetSubtargetInfo * STI) const217bdd1243dSDimitry Andric MachineFunctionInfo *NVPTXTargetMachine::createMachineFunctionInfo(
218bdd1243dSDimitry Andric BumpPtrAllocator &Allocator, const Function &F,
219bdd1243dSDimitry Andric const TargetSubtargetInfo *STI) const {
220bdd1243dSDimitry Andric return NVPTXMachineFunctionInfo::create<NVPTXMachineFunctionInfo>(Allocator,
221bdd1243dSDimitry Andric F, STI);
2220b57cec5SDimitry Andric }
2230b57cec5SDimitry Andric
registerDefaultAliasAnalyses(AAManager & AAM)22406c3fb27SDimitry Andric void NVPTXTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
22506c3fb27SDimitry Andric AAM.registerFunctionAnalysis<NVPTXAA>();
22606c3fb27SDimitry Andric }
22706c3fb27SDimitry Andric
registerPassBuilderCallbacks(PassBuilder & PB)228*0fca6ea1SDimitry Andric void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
229*0fca6ea1SDimitry Andric #define GET_PASS_REGISTRY "NVPTXPassRegistry.def"
230*0fca6ea1SDimitry Andric #include "llvm/Passes/TargetPassRegistry.inc"
23106c3fb27SDimitry Andric
232e8d8bef9SDimitry Andric PB.registerPipelineStartEPCallback(
233349cc55cSDimitry Andric [this](ModulePassManager &PM, OptimizationLevel Level) {
234fe6060f1SDimitry Andric FunctionPassManager FPM;
235e8d8bef9SDimitry Andric FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
236*0fca6ea1SDimitry Andric // Note: NVVMIntrRangePass was causing numerical discrepancies at one
237*0fca6ea1SDimitry Andric // point, if issues crop up, consider disabling.
238*0fca6ea1SDimitry Andric FPM.addPass(NVVMIntrRangePass());
239e8d8bef9SDimitry Andric PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
240e8d8bef9SDimitry Andric });
241e8d8bef9SDimitry Andric }
242e8d8bef9SDimitry Andric
2430b57cec5SDimitry Andric TargetTransformInfo
getTargetTransformInfo(const Function & F) const24481ad6265SDimitry Andric NVPTXTargetMachine::getTargetTransformInfo(const Function &F) const {
2450b57cec5SDimitry Andric return TargetTransformInfo(NVPTXTTIImpl(this, F));
2460b57cec5SDimitry Andric }
2470b57cec5SDimitry Andric
248349cc55cSDimitry Andric std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value * V) const249349cc55cSDimitry Andric NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
250349cc55cSDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(V)) {
251349cc55cSDimitry Andric switch (II->getIntrinsicID()) {
252349cc55cSDimitry Andric case Intrinsic::nvvm_isspacep_const:
253349cc55cSDimitry Andric return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_CONST);
254349cc55cSDimitry Andric case Intrinsic::nvvm_isspacep_global:
255349cc55cSDimitry Andric return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_GLOBAL);
256349cc55cSDimitry Andric case Intrinsic::nvvm_isspacep_local:
257349cc55cSDimitry Andric return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL);
258349cc55cSDimitry Andric case Intrinsic::nvvm_isspacep_shared:
25906c3fb27SDimitry Andric case Intrinsic::nvvm_isspacep_shared_cluster:
260349cc55cSDimitry Andric return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED);
261349cc55cSDimitry Andric default:
262349cc55cSDimitry Andric break;
263349cc55cSDimitry Andric }
264349cc55cSDimitry Andric }
265349cc55cSDimitry Andric return std::make_pair(nullptr, -1);
266349cc55cSDimitry Andric }
267349cc55cSDimitry Andric
addEarlyCSEOrGVNPass()2680b57cec5SDimitry Andric void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
2695f757f3fSDimitry Andric if (getOptLevel() == CodeGenOptLevel::Aggressive)
2700b57cec5SDimitry Andric addPass(createGVNPass());
2710b57cec5SDimitry Andric else
2720b57cec5SDimitry Andric addPass(createEarlyCSEPass());
2730b57cec5SDimitry Andric }
2740b57cec5SDimitry Andric
addAddressSpaceInferencePasses()2750b57cec5SDimitry Andric void NVPTXPassConfig::addAddressSpaceInferencePasses() {
2760b57cec5SDimitry Andric // NVPTXLowerArgs emits alloca for byval parameters which can often
2770b57cec5SDimitry Andric // be eliminated by SROA.
2780b57cec5SDimitry Andric addPass(createSROAPass());
2790b57cec5SDimitry Andric addPass(createNVPTXLowerAllocaPass());
2800b57cec5SDimitry Andric addPass(createInferAddressSpacesPass());
281fe6060f1SDimitry Andric addPass(createNVPTXAtomicLowerPass());
2820b57cec5SDimitry Andric }
2830b57cec5SDimitry Andric
addStraightLineScalarOptimizationPasses()2840b57cec5SDimitry Andric void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
2850b57cec5SDimitry Andric addPass(createSeparateConstOffsetFromGEPPass());
2860b57cec5SDimitry Andric addPass(createSpeculativeExecutionPass());
2870b57cec5SDimitry Andric // ReassociateGEPs exposes more opportunites for SLSR. See
2880b57cec5SDimitry Andric // the example in reassociate-geps-and-slsr.ll.
2890b57cec5SDimitry Andric addPass(createStraightLineStrengthReducePass());
2900b57cec5SDimitry Andric // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
2910b57cec5SDimitry Andric // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
2920b57cec5SDimitry Andric // for some of our benchmarks.
2930b57cec5SDimitry Andric addEarlyCSEOrGVNPass();
2940b57cec5SDimitry Andric // Run NaryReassociate after EarlyCSE/GVN to be more effective.
2950b57cec5SDimitry Andric addPass(createNaryReassociatePass());
2960b57cec5SDimitry Andric // NaryReassociate on GEPs creates redundant common expressions, so run
2970b57cec5SDimitry Andric // EarlyCSE after it.
2980b57cec5SDimitry Andric addPass(createEarlyCSEPass());
2990b57cec5SDimitry Andric }
3000b57cec5SDimitry Andric
addIRPasses()3010b57cec5SDimitry Andric void NVPTXPassConfig::addIRPasses() {
3020b57cec5SDimitry Andric // The following passes are known to not play well with virtual regs hanging
3030b57cec5SDimitry Andric // around after register allocation (which in our case, is *all* registers).
3040b57cec5SDimitry Andric // We explicitly disable them here. We do, however, need some functionality
3050b57cec5SDimitry Andric // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
3060b57cec5SDimitry Andric // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
3070b57cec5SDimitry Andric disablePass(&PrologEpilogCodeInserterID);
308bdd1243dSDimitry Andric disablePass(&MachineLateInstrsCleanupID);
3090b57cec5SDimitry Andric disablePass(&MachineCopyPropagationID);
3100b57cec5SDimitry Andric disablePass(&TailDuplicateID);
3110b57cec5SDimitry Andric disablePass(&StackMapLivenessID);
3120b57cec5SDimitry Andric disablePass(&LiveDebugValuesID);
3130b57cec5SDimitry Andric disablePass(&PostRAMachineSinkingID);
3140b57cec5SDimitry Andric disablePass(&PostRASchedulerID);
3150b57cec5SDimitry Andric disablePass(&FuncletLayoutID);
3160b57cec5SDimitry Andric disablePass(&PatchableFunctionID);
3170b57cec5SDimitry Andric disablePass(&ShrinkWrapID);
3180b57cec5SDimitry Andric
31906c3fb27SDimitry Andric addPass(createNVPTXAAWrapperPass());
32006c3fb27SDimitry Andric addPass(createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
32106c3fb27SDimitry Andric if (auto *WrapperPass = P.getAnalysisIfAvailable<NVPTXAAWrapperPass>())
32206c3fb27SDimitry Andric AAR.addAAResult(WrapperPass->getResult());
32306c3fb27SDimitry Andric }));
32406c3fb27SDimitry Andric
3250b57cec5SDimitry Andric // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
3260b57cec5SDimitry Andric // it here does nothing. But since we need it for correctness when lowering
3270b57cec5SDimitry Andric // to NVPTX, run it here too, in case whoever built our pass pipeline didn't
3280b57cec5SDimitry Andric // call addEarlyAsPossiblePasses.
3290b57cec5SDimitry Andric const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
3300b57cec5SDimitry Andric addPass(createNVVMReflectPass(ST.getSmVersion()));
3310b57cec5SDimitry Andric
3325f757f3fSDimitry Andric if (getOptLevel() != CodeGenOptLevel::None)
3330b57cec5SDimitry Andric addPass(createNVPTXImageOptimizerPass());
3340b57cec5SDimitry Andric addPass(createNVPTXAssignValidGlobalNamesPass());
33506c3fb27SDimitry Andric addPass(createGenericToNVVMLegacyPass());
3360b57cec5SDimitry Andric
3370b57cec5SDimitry Andric // NVPTXLowerArgs is required for correctness and should be run right
3380b57cec5SDimitry Andric // before the address space inference passes.
33906c3fb27SDimitry Andric addPass(createNVPTXLowerArgsPass());
3405f757f3fSDimitry Andric if (getOptLevel() != CodeGenOptLevel::None) {
3410b57cec5SDimitry Andric addAddressSpaceInferencePasses();
3420b57cec5SDimitry Andric addStraightLineScalarOptimizationPasses();
3430b57cec5SDimitry Andric }
3440b57cec5SDimitry Andric
345*0fca6ea1SDimitry Andric addPass(createAtomicExpandLegacyPass());
346*0fca6ea1SDimitry Andric addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
34706c3fb27SDimitry Andric addPass(createNVPTXCtorDtorLoweringLegacyPass());
34881ad6265SDimitry Andric
3490b57cec5SDimitry Andric // === LSR and other generic IR passes ===
3500b57cec5SDimitry Andric TargetPassConfig::addIRPasses();
3510b57cec5SDimitry Andric // EarlyCSE is not always strong enough to clean up what LSR produces. For
3520b57cec5SDimitry Andric // example, GVN can combine
3530b57cec5SDimitry Andric //
3540b57cec5SDimitry Andric // %0 = add %a, %b
3550b57cec5SDimitry Andric // %1 = add %b, %a
3560b57cec5SDimitry Andric //
3570b57cec5SDimitry Andric // and
3580b57cec5SDimitry Andric //
3590b57cec5SDimitry Andric // %0 = shl nsw %a, 2
3600b57cec5SDimitry Andric // %1 = shl %a, 2
3610b57cec5SDimitry Andric //
3620b57cec5SDimitry Andric // but EarlyCSE can do neither of them.
3635f757f3fSDimitry Andric if (getOptLevel() != CodeGenOptLevel::None) {
3640b57cec5SDimitry Andric addEarlyCSEOrGVNPass();
3655ffd83dbSDimitry Andric if (!DisableLoadStoreVectorizer)
3665ffd83dbSDimitry Andric addPass(createLoadStoreVectorizerPass());
367349cc55cSDimitry Andric addPass(createSROAPass());
3685ffd83dbSDimitry Andric }
36906c3fb27SDimitry Andric
3705f757f3fSDimitry Andric const auto &Options = getNVPTXTargetMachine().Options;
3715f757f3fSDimitry Andric addPass(createNVPTXLowerUnreachablePass(Options.TrapUnreachable,
3725f757f3fSDimitry Andric Options.NoTrapAfterNoreturn));
3730b57cec5SDimitry Andric }
3740b57cec5SDimitry Andric
addInstSelector()3750b57cec5SDimitry Andric bool NVPTXPassConfig::addInstSelector() {
3760b57cec5SDimitry Andric const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
3770b57cec5SDimitry Andric
3780b57cec5SDimitry Andric addPass(createLowerAggrCopies());
3790b57cec5SDimitry Andric addPass(createAllocaHoisting());
3800b57cec5SDimitry Andric addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
3810b57cec5SDimitry Andric
3820b57cec5SDimitry Andric if (!ST.hasImageHandles())
3830b57cec5SDimitry Andric addPass(createNVPTXReplaceImageHandlesPass());
3840b57cec5SDimitry Andric
3850b57cec5SDimitry Andric return false;
3860b57cec5SDimitry Andric }
3870b57cec5SDimitry Andric
addPreRegAlloc()3880b57cec5SDimitry Andric void NVPTXPassConfig::addPreRegAlloc() {
3890b57cec5SDimitry Andric // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive.
3900b57cec5SDimitry Andric addPass(createNVPTXProxyRegErasurePass());
3910b57cec5SDimitry Andric }
3920b57cec5SDimitry Andric
addPostRegAlloc()3930b57cec5SDimitry Andric void NVPTXPassConfig::addPostRegAlloc() {
394349cc55cSDimitry Andric addPass(createNVPTXPrologEpilogPass());
3955f757f3fSDimitry Andric if (getOptLevel() != CodeGenOptLevel::None) {
3960b57cec5SDimitry Andric // NVPTXPrologEpilogPass calculates frame object offset and replace frame
3970b57cec5SDimitry Andric // index with VRFrame register. NVPTXPeephole need to be run after that and
3980b57cec5SDimitry Andric // will replace VRFrame with VRFrameLocal when possible.
3990b57cec5SDimitry Andric addPass(createNVPTXPeephole());
4000b57cec5SDimitry Andric }
4010b57cec5SDimitry Andric }
4020b57cec5SDimitry Andric
createTargetRegisterAllocator(bool)4030b57cec5SDimitry Andric FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
4040b57cec5SDimitry Andric return nullptr; // No reg alloc
4050b57cec5SDimitry Andric }
4060b57cec5SDimitry Andric
addFastRegAlloc()4070b57cec5SDimitry Andric void NVPTXPassConfig::addFastRegAlloc() {
4080b57cec5SDimitry Andric addPass(&PHIEliminationID);
4090b57cec5SDimitry Andric addPass(&TwoAddressInstructionPassID);
4100b57cec5SDimitry Andric }
4110b57cec5SDimitry Andric
addOptimizedRegAlloc()4120b57cec5SDimitry Andric void NVPTXPassConfig::addOptimizedRegAlloc() {
4130b57cec5SDimitry Andric addPass(&ProcessImplicitDefsID);
4140b57cec5SDimitry Andric addPass(&LiveVariablesID);
4150b57cec5SDimitry Andric addPass(&MachineLoopInfoID);
4160b57cec5SDimitry Andric addPass(&PHIEliminationID);
4170b57cec5SDimitry Andric
4180b57cec5SDimitry Andric addPass(&TwoAddressInstructionPassID);
4190b57cec5SDimitry Andric addPass(&RegisterCoalescerID);
4200b57cec5SDimitry Andric
4210b57cec5SDimitry Andric // PreRA instruction scheduling.
4220b57cec5SDimitry Andric if (addPass(&MachineSchedulerID))
4230b57cec5SDimitry Andric printAndVerify("After Machine Scheduling");
4240b57cec5SDimitry Andric
4250b57cec5SDimitry Andric addPass(&StackSlotColoringID);
4260b57cec5SDimitry Andric
4270b57cec5SDimitry Andric // FIXME: Needs physical registers
4280b57cec5SDimitry Andric // addPass(&MachineLICMID);
4290b57cec5SDimitry Andric
4300b57cec5SDimitry Andric printAndVerify("After StackSlotColoring");
4310b57cec5SDimitry Andric }
4320b57cec5SDimitry Andric
addMachineSSAOptimization()4330b57cec5SDimitry Andric void NVPTXPassConfig::addMachineSSAOptimization() {
4340b57cec5SDimitry Andric // Pre-ra tail duplication.
4350b57cec5SDimitry Andric if (addPass(&EarlyTailDuplicateID))
4360b57cec5SDimitry Andric printAndVerify("After Pre-RegAlloc TailDuplicate");
4370b57cec5SDimitry Andric
4380b57cec5SDimitry Andric // Optimize PHIs before DCE: removing dead PHI cycles may make more
4390b57cec5SDimitry Andric // instructions dead.
4400b57cec5SDimitry Andric addPass(&OptimizePHIsID);
4410b57cec5SDimitry Andric
4420b57cec5SDimitry Andric // This pass merges large allocas. StackSlotColoring is a different pass
4430b57cec5SDimitry Andric // which merges spill slots.
4440b57cec5SDimitry Andric addPass(&StackColoringID);
4450b57cec5SDimitry Andric
4460b57cec5SDimitry Andric // If the target requests it, assign local variables to stack slots relative
4470b57cec5SDimitry Andric // to one another and simplify frame index references where possible.
4480b57cec5SDimitry Andric addPass(&LocalStackSlotAllocationID);
4490b57cec5SDimitry Andric
4500b57cec5SDimitry Andric // With optimization, dead code should already be eliminated. However
4510b57cec5SDimitry Andric // there is one known exception: lowered code for arguments that are only
4520b57cec5SDimitry Andric // used by tail calls, where the tail calls reuse the incoming stack
4530b57cec5SDimitry Andric // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
4540b57cec5SDimitry Andric addPass(&DeadMachineInstructionElimID);
4550b57cec5SDimitry Andric printAndVerify("After codegen DCE pass");
4560b57cec5SDimitry Andric
4570b57cec5SDimitry Andric // Allow targets to insert passes that improve instruction level parallelism,
4580b57cec5SDimitry Andric // like if-conversion. Such passes will typically need dominator trees and
4590b57cec5SDimitry Andric // loop info, just like LICM and CSE below.
4600b57cec5SDimitry Andric if (addILPOpts())
4610b57cec5SDimitry Andric printAndVerify("After ILP optimizations");
4620b57cec5SDimitry Andric
4630b57cec5SDimitry Andric addPass(&EarlyMachineLICMID);
4640b57cec5SDimitry Andric addPass(&MachineCSEID);
4650b57cec5SDimitry Andric
4660b57cec5SDimitry Andric addPass(&MachineSinkingID);
4670b57cec5SDimitry Andric printAndVerify("After Machine LICM, CSE and Sinking passes");
4680b57cec5SDimitry Andric
4690b57cec5SDimitry Andric addPass(&PeepholeOptimizerID);
4700b57cec5SDimitry Andric printAndVerify("After codegen peephole optimization pass");
4710b57cec5SDimitry Andric }
472