10b57cec5SDimitry Andric //===-- Internalize.cpp - Mark functions internal -------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass loops over all of the functions and variables in the input module.
100b57cec5SDimitry Andric // If the function or variable does not need to be preserved according to the
110b57cec5SDimitry Andric // client supplied callback, it is marked as internal.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // This transformation would not be legal in a regular compilation, but it gets
140b57cec5SDimitry Andric // extra information from the linker about what is safe.
150b57cec5SDimitry Andric //
160b57cec5SDimitry Andric // For example: Internalizing a function with external linkage. Only if we are
170b57cec5SDimitry Andric // told it is only used from within this module, it is safe to do it.
180b57cec5SDimitry Andric //
190b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
200b57cec5SDimitry Andric
210b57cec5SDimitry Andric #include "llvm/Transforms/IPO/Internalize.h"
22*06c3fb27SDimitry Andric #include "llvm/ADT/SmallString.h"
230b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
240b57cec5SDimitry Andric #include "llvm/ADT/StringSet.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/CallGraph.h"
260b57cec5SDimitry Andric #include "llvm/IR/Module.h"
270b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
280b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
29fcaf7f86SDimitry Andric #include "llvm/Support/GlobPattern.h"
300b57cec5SDimitry Andric #include "llvm/Support/LineIterator.h"
310b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
320b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
33*06c3fb27SDimitry Andric #include "llvm/TargetParser/Triple.h"
340b57cec5SDimitry Andric #include "llvm/Transforms/IPO.h"
350b57cec5SDimitry Andric using namespace llvm;
360b57cec5SDimitry Andric
370b57cec5SDimitry Andric #define DEBUG_TYPE "internalize"
380b57cec5SDimitry Andric
390b57cec5SDimitry Andric STATISTIC(NumAliases, "Number of aliases internalized");
400b57cec5SDimitry Andric STATISTIC(NumFunctions, "Number of functions internalized");
410b57cec5SDimitry Andric STATISTIC(NumGlobals, "Number of global vars internalized");
420b57cec5SDimitry Andric
43fcaf7f86SDimitry Andric // APIFile - A file which contains a list of symbol glob patterns that should
44fcaf7f86SDimitry Andric // not be marked external.
450b57cec5SDimitry Andric static cl::opt<std::string>
460b57cec5SDimitry Andric APIFile("internalize-public-api-file", cl::value_desc("filename"),
470b57cec5SDimitry Andric cl::desc("A file containing list of symbol names to preserve"));
480b57cec5SDimitry Andric
49fcaf7f86SDimitry Andric // APIList - A list of symbol glob patterns that should not be marked internal.
500b57cec5SDimitry Andric static cl::list<std::string>
510b57cec5SDimitry Andric APIList("internalize-public-api-list", cl::value_desc("list"),
520b57cec5SDimitry Andric cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
530b57cec5SDimitry Andric
540b57cec5SDimitry Andric namespace {
550b57cec5SDimitry Andric // Helper to load an API list to preserve from file and expose it as a functor
560b57cec5SDimitry Andric // for internalization.
570b57cec5SDimitry Andric class PreserveAPIList {
580b57cec5SDimitry Andric public:
PreserveAPIList()590b57cec5SDimitry Andric PreserveAPIList() {
600b57cec5SDimitry Andric if (!APIFile.empty())
610b57cec5SDimitry Andric LoadFile(APIFile);
62fcaf7f86SDimitry Andric for (StringRef Pattern : APIList)
63fcaf7f86SDimitry Andric addGlob(Pattern);
640b57cec5SDimitry Andric }
650b57cec5SDimitry Andric
operator ()(const GlobalValue & GV)660b57cec5SDimitry Andric bool operator()(const GlobalValue &GV) {
67fcaf7f86SDimitry Andric return llvm::any_of(
68fcaf7f86SDimitry Andric ExternalNames, [&](GlobPattern &GP) { return GP.match(GV.getName()); });
690b57cec5SDimitry Andric }
700b57cec5SDimitry Andric
710b57cec5SDimitry Andric private:
720b57cec5SDimitry Andric // Contains the set of symbols loaded from file
73fcaf7f86SDimitry Andric SmallVector<GlobPattern> ExternalNames;
74fcaf7f86SDimitry Andric
addGlob(StringRef Pattern)75fcaf7f86SDimitry Andric void addGlob(StringRef Pattern) {
76fcaf7f86SDimitry Andric auto GlobOrErr = GlobPattern::create(Pattern);
77fcaf7f86SDimitry Andric if (!GlobOrErr) {
78fcaf7f86SDimitry Andric errs() << "WARNING: when loading pattern: '"
79fcaf7f86SDimitry Andric << toString(GlobOrErr.takeError()) << "' ignoring";
80fcaf7f86SDimitry Andric return;
81fcaf7f86SDimitry Andric }
82fcaf7f86SDimitry Andric ExternalNames.emplace_back(std::move(*GlobOrErr));
83fcaf7f86SDimitry Andric }
840b57cec5SDimitry Andric
LoadFile(StringRef Filename)850b57cec5SDimitry Andric void LoadFile(StringRef Filename) {
860b57cec5SDimitry Andric // Load the APIFile...
87fcaf7f86SDimitry Andric ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
880b57cec5SDimitry Andric MemoryBuffer::getFile(Filename);
89fcaf7f86SDimitry Andric if (!BufOrErr) {
900b57cec5SDimitry Andric errs() << "WARNING: Internalize couldn't load file '" << Filename
910b57cec5SDimitry Andric << "'! Continuing as if it's empty.\n";
920b57cec5SDimitry Andric return; // Just continue as if the file were empty
930b57cec5SDimitry Andric }
94fcaf7f86SDimitry Andric Buf = std::move(*BufOrErr);
95fcaf7f86SDimitry Andric for (line_iterator I(*Buf, true), E; I != E; ++I)
96fcaf7f86SDimitry Andric addGlob(*I);
970b57cec5SDimitry Andric }
98fcaf7f86SDimitry Andric
99fcaf7f86SDimitry Andric std::shared_ptr<MemoryBuffer> Buf;
1000b57cec5SDimitry Andric };
1010b57cec5SDimitry Andric } // end anonymous namespace
1020b57cec5SDimitry Andric
shouldPreserveGV(const GlobalValue & GV)1030b57cec5SDimitry Andric bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
1040b57cec5SDimitry Andric // Function must be defined here
1050b57cec5SDimitry Andric if (GV.isDeclaration())
1060b57cec5SDimitry Andric return true;
1070b57cec5SDimitry Andric
1080b57cec5SDimitry Andric // Available externally is really just a "declaration with a body".
1090b57cec5SDimitry Andric if (GV.hasAvailableExternallyLinkage())
1100b57cec5SDimitry Andric return true;
1110b57cec5SDimitry Andric
1120b57cec5SDimitry Andric // Assume that dllexported symbols are referenced elsewhere
1130b57cec5SDimitry Andric if (GV.hasDLLExportStorageClass())
1140b57cec5SDimitry Andric return true;
1150b57cec5SDimitry Andric
116fe6060f1SDimitry Andric // As the name suggests, externally initialized variables need preserving as
117fe6060f1SDimitry Andric // they would be initialized elsewhere externally.
118fe6060f1SDimitry Andric if (const auto *G = dyn_cast<GlobalVariable>(&GV))
119fe6060f1SDimitry Andric if (G->isExternallyInitialized())
120fe6060f1SDimitry Andric return true;
121fe6060f1SDimitry Andric
1220b57cec5SDimitry Andric // Already local, has nothing to do.
1230b57cec5SDimitry Andric if (GV.hasLocalLinkage())
1240b57cec5SDimitry Andric return false;
1250b57cec5SDimitry Andric
1260b57cec5SDimitry Andric // Check some special cases
1270b57cec5SDimitry Andric if (AlwaysPreserved.count(GV.getName()))
1280b57cec5SDimitry Andric return true;
1290b57cec5SDimitry Andric
1300b57cec5SDimitry Andric return MustPreserveGV(GV);
1310b57cec5SDimitry Andric }
1320b57cec5SDimitry Andric
maybeInternalize(GlobalValue & GV,DenseMap<const Comdat *,ComdatInfo> & ComdatMap)1330b57cec5SDimitry Andric bool InternalizePass::maybeInternalize(
134fe6060f1SDimitry Andric GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) {
135fe6060f1SDimitry Andric SmallString<0> ComdatName;
1360b57cec5SDimitry Andric if (Comdat *C = GV.getComdat()) {
137fe6060f1SDimitry Andric // For GlobalAlias, C is the aliasee object's comdat which may have been
138fe6060f1SDimitry Andric // redirected. So ComdatMap may not contain C.
139fe6060f1SDimitry Andric if (ComdatMap.lookup(C).External)
1400b57cec5SDimitry Andric return false;
1410b57cec5SDimitry Andric
142fe6060f1SDimitry Andric if (auto *GO = dyn_cast<GlobalObject>(&GV)) {
143fe6060f1SDimitry Andric // If a comdat with one member is not externally visible, we can drop it.
144fe6060f1SDimitry Andric // Otherwise, the comdat can be used to establish dependencies among the
145fe6060f1SDimitry Andric // group of sections. Thus we have to keep the comdat but switch it to
146fe6060f1SDimitry Andric // nodeduplicate.
147fe6060f1SDimitry Andric // Note: nodeduplicate is not necessary for COFF. wasm doesn't support
148fe6060f1SDimitry Andric // nodeduplicate.
149fe6060f1SDimitry Andric ComdatInfo &Info = ComdatMap.find(C)->second;
150fe6060f1SDimitry Andric if (Info.Size == 1)
1510b57cec5SDimitry Andric GO->setComdat(nullptr);
152fe6060f1SDimitry Andric else if (!IsWasm)
153fe6060f1SDimitry Andric C->setSelectionKind(Comdat::NoDeduplicate);
154fe6060f1SDimitry Andric }
1550b57cec5SDimitry Andric
1560b57cec5SDimitry Andric if (GV.hasLocalLinkage())
1570b57cec5SDimitry Andric return false;
1580b57cec5SDimitry Andric } else {
1590b57cec5SDimitry Andric if (GV.hasLocalLinkage())
1600b57cec5SDimitry Andric return false;
1610b57cec5SDimitry Andric
1620b57cec5SDimitry Andric if (shouldPreserveGV(GV))
1630b57cec5SDimitry Andric return false;
1640b57cec5SDimitry Andric }
1650b57cec5SDimitry Andric
1660b57cec5SDimitry Andric GV.setVisibility(GlobalValue::DefaultVisibility);
1670b57cec5SDimitry Andric GV.setLinkage(GlobalValue::InternalLinkage);
1680b57cec5SDimitry Andric return true;
1690b57cec5SDimitry Andric }
1700b57cec5SDimitry Andric
171fe6060f1SDimitry Andric // If GV is part of a comdat and is externally visible, update the comdat size
172fe6060f1SDimitry Andric // and keep track of its comdat so that we don't internalize any of its members.
checkComdat(GlobalValue & GV,DenseMap<const Comdat *,ComdatInfo> & ComdatMap)173fe6060f1SDimitry Andric void InternalizePass::checkComdat(
174fe6060f1SDimitry Andric GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) {
1750b57cec5SDimitry Andric Comdat *C = GV.getComdat();
1760b57cec5SDimitry Andric if (!C)
1770b57cec5SDimitry Andric return;
1780b57cec5SDimitry Andric
179fe6060f1SDimitry Andric ComdatInfo &Info = ComdatMap.try_emplace(C).first->second;
180fe6060f1SDimitry Andric ++Info.Size;
1810b57cec5SDimitry Andric if (shouldPreserveGV(GV))
182fe6060f1SDimitry Andric Info.External = true;
1830b57cec5SDimitry Andric }
1840b57cec5SDimitry Andric
internalizeModule(Module & M)185*06c3fb27SDimitry Andric bool InternalizePass::internalizeModule(Module &M) {
1860b57cec5SDimitry Andric bool Changed = false;
1870b57cec5SDimitry Andric
188fe6060f1SDimitry Andric SmallVector<GlobalValue *, 4> Used;
1890b57cec5SDimitry Andric collectUsedGlobalVariables(M, Used, false);
1900b57cec5SDimitry Andric
191fe6060f1SDimitry Andric // Collect comdat size and visiblity information for the module.
192fe6060f1SDimitry Andric DenseMap<const Comdat *, ComdatInfo> ComdatMap;
1930b57cec5SDimitry Andric if (!M.getComdatSymbolTable().empty()) {
1940b57cec5SDimitry Andric for (Function &F : M)
195fe6060f1SDimitry Andric checkComdat(F, ComdatMap);
1960b57cec5SDimitry Andric for (GlobalVariable &GV : M.globals())
197fe6060f1SDimitry Andric checkComdat(GV, ComdatMap);
1980b57cec5SDimitry Andric for (GlobalAlias &GA : M.aliases())
199fe6060f1SDimitry Andric checkComdat(GA, ComdatMap);
2000b57cec5SDimitry Andric }
2010b57cec5SDimitry Andric
2020b57cec5SDimitry Andric // We must assume that globals in llvm.used have a reference that not even
2030b57cec5SDimitry Andric // the linker can see, so we don't internalize them.
2040b57cec5SDimitry Andric // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
2050b57cec5SDimitry Andric // linker can drop those symbols. If this pass is running as part of LTO,
2060b57cec5SDimitry Andric // one might think that it could just drop llvm.compiler.used. The problem
2070b57cec5SDimitry Andric // is that even in LTO llvm doesn't see every reference. For example,
2080b57cec5SDimitry Andric // we don't see references from function local inline assembly. To be
2090b57cec5SDimitry Andric // conservative, we internalize symbols in llvm.compiler.used, but we
2100b57cec5SDimitry Andric // keep llvm.compiler.used so that the symbol is not deleted by llvm.
2110b57cec5SDimitry Andric for (GlobalValue *V : Used) {
2120b57cec5SDimitry Andric AlwaysPreserved.insert(V->getName());
2130b57cec5SDimitry Andric }
2140b57cec5SDimitry Andric
2150b57cec5SDimitry Andric // Never internalize the llvm.used symbol. It is used to implement
2160b57cec5SDimitry Andric // attribute((used)).
2170b57cec5SDimitry Andric // FIXME: Shouldn't this just filter on llvm.metadata section??
2180b57cec5SDimitry Andric AlwaysPreserved.insert("llvm.used");
2190b57cec5SDimitry Andric AlwaysPreserved.insert("llvm.compiler.used");
2200b57cec5SDimitry Andric
2210b57cec5SDimitry Andric // Never internalize anchors used by the machine module info, else the info
2220b57cec5SDimitry Andric // won't find them. (see MachineModuleInfo.)
2230b57cec5SDimitry Andric AlwaysPreserved.insert("llvm.global_ctors");
2240b57cec5SDimitry Andric AlwaysPreserved.insert("llvm.global_dtors");
2250b57cec5SDimitry Andric AlwaysPreserved.insert("llvm.global.annotations");
2260b57cec5SDimitry Andric
2270b57cec5SDimitry Andric // Never internalize symbols code-gen inserts.
2280b57cec5SDimitry Andric // FIXME: We should probably add this (and the __stack_chk_guard) via some
2290b57cec5SDimitry Andric // type of call-back in CodeGen.
2300b57cec5SDimitry Andric AlwaysPreserved.insert("__stack_chk_fail");
231fe6060f1SDimitry Andric if (Triple(M.getTargetTriple()).isOSAIX())
232fe6060f1SDimitry Andric AlwaysPreserved.insert("__ssp_canary_word");
233fe6060f1SDimitry Andric else
2340b57cec5SDimitry Andric AlwaysPreserved.insert("__stack_chk_guard");
2350b57cec5SDimitry Andric
236349cc55cSDimitry Andric // Mark all functions not in the api as internal.
237349cc55cSDimitry Andric IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
238349cc55cSDimitry Andric for (Function &I : M) {
239349cc55cSDimitry Andric if (!maybeInternalize(I, ComdatMap))
240349cc55cSDimitry Andric continue;
241349cc55cSDimitry Andric Changed = true;
242349cc55cSDimitry Andric
243349cc55cSDimitry Andric ++NumFunctions;
244349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
245349cc55cSDimitry Andric }
246349cc55cSDimitry Andric
2470b57cec5SDimitry Andric // Mark all global variables with initializers that are not in the api as
2480b57cec5SDimitry Andric // internal as well.
2490b57cec5SDimitry Andric for (auto &GV : M.globals()) {
250fe6060f1SDimitry Andric if (!maybeInternalize(GV, ComdatMap))
2510b57cec5SDimitry Andric continue;
2520b57cec5SDimitry Andric Changed = true;
2530b57cec5SDimitry Andric
2540b57cec5SDimitry Andric ++NumGlobals;
2550b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
2560b57cec5SDimitry Andric }
2570b57cec5SDimitry Andric
2580b57cec5SDimitry Andric // Mark all aliases that are not in the api as internal as well.
2590b57cec5SDimitry Andric for (auto &GA : M.aliases()) {
260fe6060f1SDimitry Andric if (!maybeInternalize(GA, ComdatMap))
2610b57cec5SDimitry Andric continue;
2620b57cec5SDimitry Andric Changed = true;
2630b57cec5SDimitry Andric
2640b57cec5SDimitry Andric ++NumAliases;
2650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
2660b57cec5SDimitry Andric }
2670b57cec5SDimitry Andric
2680b57cec5SDimitry Andric return Changed;
2690b57cec5SDimitry Andric }
2700b57cec5SDimitry Andric
InternalizePass()2710b57cec5SDimitry Andric InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
2720b57cec5SDimitry Andric
run(Module & M,ModuleAnalysisManager & AM)2730b57cec5SDimitry Andric PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) {
274*06c3fb27SDimitry Andric if (!internalizeModule(M))
2750b57cec5SDimitry Andric return PreservedAnalyses::all();
2760b57cec5SDimitry Andric
277*06c3fb27SDimitry Andric return PreservedAnalyses::none();
2780b57cec5SDimitry Andric }
279