1 //===-- Internalize.cpp - Mark functions internal -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass loops over all of the functions and variables in the input module. 10 // If the function or variable does not need to be preserved according to the 11 // client supplied callback, it is marked as internal. 12 // 13 // This transformation would not be legal in a regular compilation, but it gets 14 // extra information from the linker about what is safe. 15 // 16 // For example: Internalizing a function with external linkage. Only if we are 17 // told it is only used from within this module, it is safe to do it. 18 // 19 //===----------------------------------------------------------------------===// 20 21 #include "llvm/Transforms/IPO/Internalize.h" 22 #include "llvm/ADT/SmallPtrSet.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/ADT/StringSet.h" 25 #include "llvm/Analysis/CallGraph.h" 26 #include "llvm/IR/Module.h" 27 #include "llvm/InitializePasses.h" 28 #include "llvm/Pass.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/LineIterator.h" 32 #include "llvm/Support/MemoryBuffer.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include "llvm/Transforms/IPO.h" 35 #include "llvm/Transforms/Utils/GlobalStatus.h" 36 using namespace llvm; 37 38 #define DEBUG_TYPE "internalize" 39 40 STATISTIC(NumAliases, "Number of aliases internalized"); 41 STATISTIC(NumFunctions, "Number of functions internalized"); 42 STATISTIC(NumGlobals, "Number of global vars internalized"); 43 44 // APIFile - A file which contains a list of symbols that should not be marked 45 // external. 46 static cl::opt<std::string> 47 APIFile("internalize-public-api-file", cl::value_desc("filename"), 48 cl::desc("A file containing list of symbol names to preserve")); 49 50 // APIList - A list of symbols that should not be marked internal. 51 static cl::list<std::string> 52 APIList("internalize-public-api-list", cl::value_desc("list"), 53 cl::desc("A list of symbol names to preserve"), cl::CommaSeparated); 54 55 namespace { 56 // Helper to load an API list to preserve from file and expose it as a functor 57 // for internalization. 58 class PreserveAPIList { 59 public: 60 PreserveAPIList() { 61 if (!APIFile.empty()) 62 LoadFile(APIFile); 63 ExternalNames.insert(APIList.begin(), APIList.end()); 64 } 65 66 bool operator()(const GlobalValue &GV) { 67 return ExternalNames.count(GV.getName()); 68 } 69 70 private: 71 // Contains the set of symbols loaded from file 72 StringSet<> ExternalNames; 73 74 void LoadFile(StringRef Filename) { 75 // Load the APIFile... 76 ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = 77 MemoryBuffer::getFile(Filename); 78 if (!Buf) { 79 errs() << "WARNING: Internalize couldn't load file '" << Filename 80 << "'! Continuing as if it's empty.\n"; 81 return; // Just continue as if the file were empty 82 } 83 for (line_iterator I(*Buf->get(), true), E; I != E; ++I) 84 ExternalNames.insert(*I); 85 } 86 }; 87 } // end anonymous namespace 88 89 bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) { 90 // Function must be defined here 91 if (GV.isDeclaration()) 92 return true; 93 94 // Available externally is really just a "declaration with a body". 95 if (GV.hasAvailableExternallyLinkage()) 96 return true; 97 98 // Assume that dllexported symbols are referenced elsewhere 99 if (GV.hasDLLExportStorageClass()) 100 return true; 101 102 // Already local, has nothing to do. 103 if (GV.hasLocalLinkage()) 104 return false; 105 106 // Check some special cases 107 if (AlwaysPreserved.count(GV.getName())) 108 return true; 109 110 return MustPreserveGV(GV); 111 } 112 113 bool InternalizePass::maybeInternalize( 114 GlobalValue &GV, const DenseSet<const Comdat *> &ExternalComdats) { 115 if (Comdat *C = GV.getComdat()) { 116 if (ExternalComdats.count(C)) 117 return false; 118 119 // If a comdat is not externally visible we can drop it. 120 if (auto GO = dyn_cast<GlobalObject>(&GV)) 121 GO->setComdat(nullptr); 122 123 if (GV.hasLocalLinkage()) 124 return false; 125 } else { 126 if (GV.hasLocalLinkage()) 127 return false; 128 129 if (shouldPreserveGV(GV)) 130 return false; 131 } 132 133 GV.setVisibility(GlobalValue::DefaultVisibility); 134 GV.setLinkage(GlobalValue::InternalLinkage); 135 return true; 136 } 137 138 // If GV is part of a comdat and is externally visible, keep track of its 139 // comdat so that we don't internalize any of its members. 140 void InternalizePass::checkComdatVisibility( 141 GlobalValue &GV, DenseSet<const Comdat *> &ExternalComdats) { 142 Comdat *C = GV.getComdat(); 143 if (!C) 144 return; 145 146 if (shouldPreserveGV(GV)) 147 ExternalComdats.insert(C); 148 } 149 150 bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) { 151 bool Changed = false; 152 CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr; 153 154 SmallPtrSet<GlobalValue *, 8> Used; 155 collectUsedGlobalVariables(M, Used, false); 156 157 // Collect comdat visiblity information for the module. 158 DenseSet<const Comdat *> ExternalComdats; 159 if (!M.getComdatSymbolTable().empty()) { 160 for (Function &F : M) 161 checkComdatVisibility(F, ExternalComdats); 162 for (GlobalVariable &GV : M.globals()) 163 checkComdatVisibility(GV, ExternalComdats); 164 for (GlobalAlias &GA : M.aliases()) 165 checkComdatVisibility(GA, ExternalComdats); 166 } 167 168 // We must assume that globals in llvm.used have a reference that not even 169 // the linker can see, so we don't internalize them. 170 // For llvm.compiler.used the situation is a bit fuzzy. The assembler and 171 // linker can drop those symbols. If this pass is running as part of LTO, 172 // one might think that it could just drop llvm.compiler.used. The problem 173 // is that even in LTO llvm doesn't see every reference. For example, 174 // we don't see references from function local inline assembly. To be 175 // conservative, we internalize symbols in llvm.compiler.used, but we 176 // keep llvm.compiler.used so that the symbol is not deleted by llvm. 177 for (GlobalValue *V : Used) { 178 AlwaysPreserved.insert(V->getName()); 179 } 180 181 // Mark all functions not in the api as internal. 182 for (Function &I : M) { 183 if (!maybeInternalize(I, ExternalComdats)) 184 continue; 185 Changed = true; 186 187 if (ExternalNode) 188 // Remove a callgraph edge from the external node to this function. 189 ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]); 190 191 ++NumFunctions; 192 LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n"); 193 } 194 195 // Never internalize the llvm.used symbol. It is used to implement 196 // attribute((used)). 197 // FIXME: Shouldn't this just filter on llvm.metadata section?? 198 AlwaysPreserved.insert("llvm.used"); 199 AlwaysPreserved.insert("llvm.compiler.used"); 200 201 // Never internalize anchors used by the machine module info, else the info 202 // won't find them. (see MachineModuleInfo.) 203 AlwaysPreserved.insert("llvm.global_ctors"); 204 AlwaysPreserved.insert("llvm.global_dtors"); 205 AlwaysPreserved.insert("llvm.global.annotations"); 206 207 // Never internalize symbols code-gen inserts. 208 // FIXME: We should probably add this (and the __stack_chk_guard) via some 209 // type of call-back in CodeGen. 210 AlwaysPreserved.insert("__stack_chk_fail"); 211 AlwaysPreserved.insert("__stack_chk_guard"); 212 213 // Mark all global variables with initializers that are not in the api as 214 // internal as well. 215 for (auto &GV : M.globals()) { 216 if (!maybeInternalize(GV, ExternalComdats)) 217 continue; 218 Changed = true; 219 220 ++NumGlobals; 221 LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n"); 222 } 223 224 // Mark all aliases that are not in the api as internal as well. 225 for (auto &GA : M.aliases()) { 226 if (!maybeInternalize(GA, ExternalComdats)) 227 continue; 228 Changed = true; 229 230 ++NumAliases; 231 LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n"); 232 } 233 234 return Changed; 235 } 236 237 InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {} 238 239 PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) { 240 if (!internalizeModule(M, AM.getCachedResult<CallGraphAnalysis>(M))) 241 return PreservedAnalyses::all(); 242 243 PreservedAnalyses PA; 244 PA.preserve<CallGraphAnalysis>(); 245 return PA; 246 } 247 248 namespace { 249 class InternalizeLegacyPass : public ModulePass { 250 // Client supplied callback to control wheter a symbol must be preserved. 251 std::function<bool(const GlobalValue &)> MustPreserveGV; 252 253 public: 254 static char ID; // Pass identification, replacement for typeid 255 256 InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {} 257 258 InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV) 259 : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) { 260 initializeInternalizeLegacyPassPass(*PassRegistry::getPassRegistry()); 261 } 262 263 bool runOnModule(Module &M) override { 264 if (skipModule(M)) 265 return false; 266 267 CallGraphWrapperPass *CGPass = 268 getAnalysisIfAvailable<CallGraphWrapperPass>(); 269 CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr; 270 return internalizeModule(M, MustPreserveGV, CG); 271 } 272 273 void getAnalysisUsage(AnalysisUsage &AU) const override { 274 AU.setPreservesCFG(); 275 AU.addPreserved<CallGraphWrapperPass>(); 276 } 277 }; 278 } 279 280 char InternalizeLegacyPass::ID = 0; 281 INITIALIZE_PASS(InternalizeLegacyPass, "internalize", 282 "Internalize Global Symbols", false, false) 283 284 ModulePass *llvm::createInternalizePass() { 285 return new InternalizeLegacyPass(); 286 } 287 288 ModulePass *llvm::createInternalizePass( 289 std::function<bool(const GlobalValue &)> MustPreserveGV) { 290 return new InternalizeLegacyPass(std::move(MustPreserveGV)); 291 } 292