15f757f3fSDimitry Andric //===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===//
25f757f3fSDimitry Andric //
35f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65f757f3fSDimitry Andric //
75f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
85f757f3fSDimitry Andric // This file implements two passes that enable HIP C++ Standard Parallelism
95f757f3fSDimitry Andric // Support:
105f757f3fSDimitry Andric //
115f757f3fSDimitry Andric // 1. AcceleratorCodeSelection (required): Given that only algorithms are
125f757f3fSDimitry Andric // accelerated, and that the accelerated implementation exists in the form of
135f757f3fSDimitry Andric // a compute kernel, we assume that only the kernel, and all functions
145f757f3fSDimitry Andric // reachable from it, constitute code that the user expects the accelerator
155f757f3fSDimitry Andric // to execute. Thus, we identify the set of all functions reachable from
165f757f3fSDimitry Andric // kernels, and then remove all unreachable ones. This last part is necessary
175f757f3fSDimitry Andric // because it is possible for code that the user did not expect to execute on
185f757f3fSDimitry Andric // an accelerator to contain constructs that cannot be handled by the target
195f757f3fSDimitry Andric // BE, which cannot be provably demonstrated to be dead code in general, and
205f757f3fSDimitry Andric // thus can lead to mis-compilation. The degenerate case of this is when a
215f757f3fSDimitry Andric // Module contains no kernels (the parent TU had no algorithm invocations fit
225f757f3fSDimitry Andric // for acceleration), which we handle by completely emptying said module.
235f757f3fSDimitry Andric // **NOTE**: The above does not handle indirectly reachable functions i.e.
245f757f3fSDimitry Andric // it is possible to obtain a case where the target of an indirect
255f757f3fSDimitry Andric // call is otherwise unreachable and thus is removed; this
265f757f3fSDimitry Andric // restriction is aligned with the current `-hipstdpar` limitations
275f757f3fSDimitry Andric // and will be relaxed in the future.
285f757f3fSDimitry Andric //
295f757f3fSDimitry Andric // 2. AllocationInterposition (required only when on-demand paging is
305f757f3fSDimitry Andric // unsupported): Some accelerators or operating systems might not support
315f757f3fSDimitry Andric // transparent on-demand paging. Thus, they would only be able to access
325f757f3fSDimitry Andric // memory that is allocated by an accelerator-aware mechanism. For such cases
335f757f3fSDimitry Andric // the user can opt into enabling allocation / deallocation interposition,
345f757f3fSDimitry Andric // whereby we replace calls to known allocation / deallocation functions with
355f757f3fSDimitry Andric // calls to runtime implemented equivalents that forward the requests to
365f757f3fSDimitry Andric // accelerator-aware interfaces. We also support freeing system allocated
375f757f3fSDimitry Andric // memory that ends up in one of the runtime equivalents, since this can
385f757f3fSDimitry Andric // happen if e.g. a library that was compiled without interposition returns
395f757f3fSDimitry Andric // an allocation that can be validly passed to `free`.
405f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
415f757f3fSDimitry Andric
425f757f3fSDimitry Andric #include "llvm/Transforms/HipStdPar/HipStdPar.h"
435f757f3fSDimitry Andric
445f757f3fSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
455f757f3fSDimitry Andric #include "llvm/ADT/SmallVector.h"
465f757f3fSDimitry Andric #include "llvm/ADT/STLExtras.h"
475f757f3fSDimitry Andric #include "llvm/Analysis/CallGraph.h"
485f757f3fSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
495f757f3fSDimitry Andric #include "llvm/IR/Constants.h"
505f757f3fSDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
515f757f3fSDimitry Andric #include "llvm/IR/Function.h"
525f757f3fSDimitry Andric #include "llvm/IR/Module.h"
535f757f3fSDimitry Andric #include "llvm/Transforms/Utils/ModuleUtils.h"
545f757f3fSDimitry Andric
555f757f3fSDimitry Andric #include <cassert>
565f757f3fSDimitry Andric #include <string>
575f757f3fSDimitry Andric #include <utility>
585f757f3fSDimitry Andric
595f757f3fSDimitry Andric using namespace llvm;
605f757f3fSDimitry Andric
615f757f3fSDimitry Andric template<typename T>
eraseFromModule(T & ToErase)625f757f3fSDimitry Andric static inline void eraseFromModule(T &ToErase) {
635f757f3fSDimitry Andric ToErase.replaceAllUsesWith(PoisonValue::get(ToErase.getType()));
645f757f3fSDimitry Andric ToErase.eraseFromParent();
655f757f3fSDimitry Andric }
665f757f3fSDimitry Andric
checkIfSupported(GlobalVariable & G)675f757f3fSDimitry Andric static inline bool checkIfSupported(GlobalVariable &G) {
685f757f3fSDimitry Andric if (!G.isThreadLocal())
695f757f3fSDimitry Andric return true;
705f757f3fSDimitry Andric
715f757f3fSDimitry Andric G.dropDroppableUses();
725f757f3fSDimitry Andric
735f757f3fSDimitry Andric if (!G.isConstantUsed())
745f757f3fSDimitry Andric return true;
755f757f3fSDimitry Andric
765f757f3fSDimitry Andric std::string W;
775f757f3fSDimitry Andric raw_string_ostream OS(W);
785f757f3fSDimitry Andric
795f757f3fSDimitry Andric OS << "Accelerator does not support the thread_local variable "
805f757f3fSDimitry Andric << G.getName();
815f757f3fSDimitry Andric
825f757f3fSDimitry Andric Instruction *I = nullptr;
835f757f3fSDimitry Andric SmallVector<User *> Tmp(G.user_begin(), G.user_end());
845f757f3fSDimitry Andric SmallPtrSet<User *, 5> Visited;
855f757f3fSDimitry Andric do {
865f757f3fSDimitry Andric auto U = std::move(Tmp.back());
875f757f3fSDimitry Andric Tmp.pop_back();
885f757f3fSDimitry Andric
895f757f3fSDimitry Andric if (Visited.contains(U))
905f757f3fSDimitry Andric continue;
915f757f3fSDimitry Andric
925f757f3fSDimitry Andric if (isa<Instruction>(U))
935f757f3fSDimitry Andric I = cast<Instruction>(U);
945f757f3fSDimitry Andric else
955f757f3fSDimitry Andric Tmp.insert(Tmp.end(), U->user_begin(), U->user_end());
965f757f3fSDimitry Andric
975f757f3fSDimitry Andric Visited.insert(U);
985f757f3fSDimitry Andric } while (!I && !Tmp.empty());
995f757f3fSDimitry Andric
1005f757f3fSDimitry Andric assert(I && "thread_local global should have at least one non-constant use.");
1015f757f3fSDimitry Andric
1025f757f3fSDimitry Andric G.getContext().diagnose(
1035f757f3fSDimitry Andric DiagnosticInfoUnsupported(*I->getParent()->getParent(), W,
1045f757f3fSDimitry Andric I->getDebugLoc(), DS_Error));
1055f757f3fSDimitry Andric
1065f757f3fSDimitry Andric return false;
1075f757f3fSDimitry Andric }
1085f757f3fSDimitry Andric
clearModule(Module & M)1095f757f3fSDimitry Andric static inline void clearModule(Module &M) { // TODO: simplify.
1105f757f3fSDimitry Andric while (!M.functions().empty())
1115f757f3fSDimitry Andric eraseFromModule(*M.begin());
1125f757f3fSDimitry Andric while (!M.globals().empty())
1135f757f3fSDimitry Andric eraseFromModule(*M.globals().begin());
1145f757f3fSDimitry Andric while (!M.aliases().empty())
1155f757f3fSDimitry Andric eraseFromModule(*M.aliases().begin());
1165f757f3fSDimitry Andric while (!M.ifuncs().empty())
1175f757f3fSDimitry Andric eraseFromModule(*M.ifuncs().begin());
1185f757f3fSDimitry Andric }
1195f757f3fSDimitry Andric
maybeHandleGlobals(Module & M)1205f757f3fSDimitry Andric static inline void maybeHandleGlobals(Module &M) {
1215f757f3fSDimitry Andric unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace();
1225f757f3fSDimitry Andric for (auto &&G : M.globals()) { // TODO: should we handle these in the FE?
1235f757f3fSDimitry Andric if (!checkIfSupported(G))
1245f757f3fSDimitry Andric return clearModule(M);
1255f757f3fSDimitry Andric
1265f757f3fSDimitry Andric if (G.isThreadLocal())
1275f757f3fSDimitry Andric continue;
1285f757f3fSDimitry Andric if (G.isConstant())
1295f757f3fSDimitry Andric continue;
1305f757f3fSDimitry Andric if (G.getAddressSpace() != GlobAS)
1315f757f3fSDimitry Andric continue;
1325f757f3fSDimitry Andric if (G.getLinkage() != GlobalVariable::ExternalLinkage)
1335f757f3fSDimitry Andric continue;
1345f757f3fSDimitry Andric
1355f757f3fSDimitry Andric G.setLinkage(GlobalVariable::ExternalWeakLinkage);
136*0fca6ea1SDimitry Andric G.setInitializer(nullptr);
1375f757f3fSDimitry Andric G.setExternallyInitialized(true);
1385f757f3fSDimitry Andric }
1395f757f3fSDimitry Andric }
1405f757f3fSDimitry Andric
1415f757f3fSDimitry Andric template<unsigned N>
removeUnreachableFunctions(const SmallPtrSet<const Function *,N> & Reachable,Module & M)1425f757f3fSDimitry Andric static inline void removeUnreachableFunctions(
1435f757f3fSDimitry Andric const SmallPtrSet<const Function *, N>& Reachable, Module &M) {
1445f757f3fSDimitry Andric removeFromUsedLists(M, [&](Constant *C) {
1455f757f3fSDimitry Andric if (auto F = dyn_cast<Function>(C))
1465f757f3fSDimitry Andric return !Reachable.contains(F);
1475f757f3fSDimitry Andric
1485f757f3fSDimitry Andric return false;
1495f757f3fSDimitry Andric });
1505f757f3fSDimitry Andric
1515f757f3fSDimitry Andric SmallVector<std::reference_wrapper<Function>> ToRemove;
1525f757f3fSDimitry Andric copy_if(M, std::back_inserter(ToRemove), [&](auto &&F) {
1535f757f3fSDimitry Andric return !F.isIntrinsic() && !Reachable.contains(&F);
1545f757f3fSDimitry Andric });
1555f757f3fSDimitry Andric
1565f757f3fSDimitry Andric for_each(ToRemove, eraseFromModule<Function>);
1575f757f3fSDimitry Andric }
1585f757f3fSDimitry Andric
isAcceleratorExecutionRoot(const Function * F)1595f757f3fSDimitry Andric static inline bool isAcceleratorExecutionRoot(const Function *F) {
1605f757f3fSDimitry Andric if (!F)
1615f757f3fSDimitry Andric return false;
1625f757f3fSDimitry Andric
1635f757f3fSDimitry Andric return F->getCallingConv() == CallingConv::AMDGPU_KERNEL;
1645f757f3fSDimitry Andric }
1655f757f3fSDimitry Andric
checkIfSupported(const Function * F,const CallBase * CB)1665f757f3fSDimitry Andric static inline bool checkIfSupported(const Function *F, const CallBase *CB) {
1675f757f3fSDimitry Andric const auto Dx = F->getName().rfind("__hipstdpar_unsupported");
1685f757f3fSDimitry Andric
1695f757f3fSDimitry Andric if (Dx == StringRef::npos)
1705f757f3fSDimitry Andric return true;
1715f757f3fSDimitry Andric
1725f757f3fSDimitry Andric const auto N = F->getName().substr(0, Dx);
1735f757f3fSDimitry Andric
1745f757f3fSDimitry Andric std::string W;
1755f757f3fSDimitry Andric raw_string_ostream OS(W);
1765f757f3fSDimitry Andric
1775f757f3fSDimitry Andric if (N == "__ASM")
1785f757f3fSDimitry Andric OS << "Accelerator does not support the ASM block:\n"
1795f757f3fSDimitry Andric << cast<ConstantDataArray>(CB->getArgOperand(0))->getAsCString();
1805f757f3fSDimitry Andric else
1815f757f3fSDimitry Andric OS << "Accelerator does not support the " << N << " function.";
1825f757f3fSDimitry Andric
1835f757f3fSDimitry Andric auto Caller = CB->getParent()->getParent();
1845f757f3fSDimitry Andric
1855f757f3fSDimitry Andric Caller->getContext().diagnose(
1865f757f3fSDimitry Andric DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error));
1875f757f3fSDimitry Andric
1885f757f3fSDimitry Andric return false;
1895f757f3fSDimitry Andric }
1905f757f3fSDimitry Andric
1915f757f3fSDimitry Andric PreservedAnalyses
run(Module & M,ModuleAnalysisManager & MAM)1925f757f3fSDimitry Andric HipStdParAcceleratorCodeSelectionPass::run(Module &M,
1935f757f3fSDimitry Andric ModuleAnalysisManager &MAM) {
1945f757f3fSDimitry Andric auto &CGA = MAM.getResult<CallGraphAnalysis>(M);
1955f757f3fSDimitry Andric
1965f757f3fSDimitry Andric SmallPtrSet<const Function *, 32> Reachable;
1975f757f3fSDimitry Andric for (auto &&CGN : CGA) {
1985f757f3fSDimitry Andric if (!isAcceleratorExecutionRoot(CGN.first))
1995f757f3fSDimitry Andric continue;
2005f757f3fSDimitry Andric
2015f757f3fSDimitry Andric Reachable.insert(CGN.first);
2025f757f3fSDimitry Andric
2035f757f3fSDimitry Andric SmallVector<const Function *> Tmp({CGN.first});
2045f757f3fSDimitry Andric do {
2055f757f3fSDimitry Andric auto F = std::move(Tmp.back());
2065f757f3fSDimitry Andric Tmp.pop_back();
2075f757f3fSDimitry Andric
2085f757f3fSDimitry Andric for (auto &&N : *CGA[F]) {
2095f757f3fSDimitry Andric if (!N.second)
2105f757f3fSDimitry Andric continue;
2115f757f3fSDimitry Andric if (!N.second->getFunction())
2125f757f3fSDimitry Andric continue;
2135f757f3fSDimitry Andric if (Reachable.contains(N.second->getFunction()))
2145f757f3fSDimitry Andric continue;
2155f757f3fSDimitry Andric
2165f757f3fSDimitry Andric if (!checkIfSupported(N.second->getFunction(),
2175f757f3fSDimitry Andric dyn_cast<CallBase>(*N.first)))
2185f757f3fSDimitry Andric return PreservedAnalyses::none();
2195f757f3fSDimitry Andric
2205f757f3fSDimitry Andric Reachable.insert(N.second->getFunction());
2215f757f3fSDimitry Andric Tmp.push_back(N.second->getFunction());
2225f757f3fSDimitry Andric }
2235f757f3fSDimitry Andric } while (!std::empty(Tmp));
2245f757f3fSDimitry Andric }
2255f757f3fSDimitry Andric
2265f757f3fSDimitry Andric if (std::empty(Reachable))
2275f757f3fSDimitry Andric clearModule(M);
2285f757f3fSDimitry Andric else
2295f757f3fSDimitry Andric removeUnreachableFunctions(Reachable, M);
2305f757f3fSDimitry Andric
2315f757f3fSDimitry Andric maybeHandleGlobals(M);
2325f757f3fSDimitry Andric
2335f757f3fSDimitry Andric return PreservedAnalyses::none();
2345f757f3fSDimitry Andric }
2355f757f3fSDimitry Andric
2365f757f3fSDimitry Andric static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{
2375f757f3fSDimitry Andric {"aligned_alloc", "__hipstdpar_aligned_alloc"},
2385f757f3fSDimitry Andric {"calloc", "__hipstdpar_calloc"},
2395f757f3fSDimitry Andric {"free", "__hipstdpar_free"},
2405f757f3fSDimitry Andric {"malloc", "__hipstdpar_malloc"},
2415f757f3fSDimitry Andric {"memalign", "__hipstdpar_aligned_alloc"},
2425f757f3fSDimitry Andric {"posix_memalign", "__hipstdpar_posix_aligned_alloc"},
2435f757f3fSDimitry Andric {"realloc", "__hipstdpar_realloc"},
2445f757f3fSDimitry Andric {"reallocarray", "__hipstdpar_realloc_array"},
2455f757f3fSDimitry Andric {"_ZdaPv", "__hipstdpar_operator_delete"},
2465f757f3fSDimitry Andric {"_ZdaPvm", "__hipstdpar_operator_delete_sized"},
2475f757f3fSDimitry Andric {"_ZdaPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
2485f757f3fSDimitry Andric {"_ZdaPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
2495f757f3fSDimitry Andric {"_ZdlPv", "__hipstdpar_operator_delete"},
2505f757f3fSDimitry Andric {"_ZdlPvm", "__hipstdpar_operator_delete_sized"},
2515f757f3fSDimitry Andric {"_ZdlPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
2525f757f3fSDimitry Andric {"_ZdlPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
2535f757f3fSDimitry Andric {"_Znam", "__hipstdpar_operator_new"},
2545f757f3fSDimitry Andric {"_ZnamRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
2555f757f3fSDimitry Andric {"_ZnamSt11align_val_t", "__hipstdpar_operator_new_aligned"},
2565f757f3fSDimitry Andric {"_ZnamSt11align_val_tRKSt9nothrow_t",
2575f757f3fSDimitry Andric "__hipstdpar_operator_new_aligned_nothrow"},
2585f757f3fSDimitry Andric
2595f757f3fSDimitry Andric {"_Znwm", "__hipstdpar_operator_new"},
2605f757f3fSDimitry Andric {"_ZnwmRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
2615f757f3fSDimitry Andric {"_ZnwmSt11align_val_t", "__hipstdpar_operator_new_aligned"},
2625f757f3fSDimitry Andric {"_ZnwmSt11align_val_tRKSt9nothrow_t",
2635f757f3fSDimitry Andric "__hipstdpar_operator_new_aligned_nothrow"},
2645f757f3fSDimitry Andric {"__builtin_calloc", "__hipstdpar_calloc"},
2655f757f3fSDimitry Andric {"__builtin_free", "__hipstdpar_free"},
2665f757f3fSDimitry Andric {"__builtin_malloc", "__hipstdpar_malloc"},
2675f757f3fSDimitry Andric {"__builtin_operator_delete", "__hipstdpar_operator_delete"},
2685f757f3fSDimitry Andric {"__builtin_operator_new", "__hipstdpar_operator_new"},
2695f757f3fSDimitry Andric {"__builtin_realloc", "__hipstdpar_realloc"},
2705f757f3fSDimitry Andric {"__libc_calloc", "__hipstdpar_calloc"},
2715f757f3fSDimitry Andric {"__libc_free", "__hipstdpar_free"},
2725f757f3fSDimitry Andric {"__libc_malloc", "__hipstdpar_malloc"},
2735f757f3fSDimitry Andric {"__libc_memalign", "__hipstdpar_aligned_alloc"},
2745f757f3fSDimitry Andric {"__libc_realloc", "__hipstdpar_realloc"}
2755f757f3fSDimitry Andric };
2765f757f3fSDimitry Andric
2775f757f3fSDimitry Andric PreservedAnalyses
run(Module & M,ModuleAnalysisManager &)2785f757f3fSDimitry Andric HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
2795f757f3fSDimitry Andric SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(ReplaceMap),
2805f757f3fSDimitry Andric std::cend(ReplaceMap));
2815f757f3fSDimitry Andric
2825f757f3fSDimitry Andric for (auto &&F : M) {
2835f757f3fSDimitry Andric if (!F.hasName())
2845f757f3fSDimitry Andric continue;
2855f757f3fSDimitry Andric if (!AllocReplacements.contains(F.getName()))
2865f757f3fSDimitry Andric continue;
2875f757f3fSDimitry Andric
2885f757f3fSDimitry Andric if (auto R = M.getFunction(AllocReplacements[F.getName()])) {
2895f757f3fSDimitry Andric F.replaceAllUsesWith(R);
2905f757f3fSDimitry Andric } else {
2915f757f3fSDimitry Andric std::string W;
2925f757f3fSDimitry Andric raw_string_ostream OS(W);
2935f757f3fSDimitry Andric
2945f757f3fSDimitry Andric OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()]
2955f757f3fSDimitry Andric << ". Tried to run the allocation interposition pass without the "
2965f757f3fSDimitry Andric << "replacement functions available.";
2975f757f3fSDimitry Andric
2985f757f3fSDimitry Andric F.getContext().diagnose(DiagnosticInfoUnsupported(F, W,
2995f757f3fSDimitry Andric F.getSubprogram(),
3005f757f3fSDimitry Andric DS_Warning));
3015f757f3fSDimitry Andric }
3025f757f3fSDimitry Andric }
3035f757f3fSDimitry Andric
3045f757f3fSDimitry Andric if (auto F = M.getFunction("__hipstdpar_hidden_free")) {
3055f757f3fSDimitry Andric auto LibcFree = M.getOrInsertFunction("__libc_free", F->getFunctionType(),
3065f757f3fSDimitry Andric F->getAttributes());
3075f757f3fSDimitry Andric F->replaceAllUsesWith(LibcFree.getCallee());
3085f757f3fSDimitry Andric
3095f757f3fSDimitry Andric eraseFromModule(*F);
3105f757f3fSDimitry Andric }
3115f757f3fSDimitry Andric
3125f757f3fSDimitry Andric return PreservedAnalyses::none();
3135f757f3fSDimitry Andric }
314