1 //===-- Internalize.cpp - Mark functions internal -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass loops over all of the functions and variables in the input module. 10 // If the function or variable does not need to be preserved according to the 11 // client supplied callback, it is marked as internal. 12 // 13 // This transformation would not be legal in a regular compilation, but it gets 14 // extra information from the linker about what is safe. 15 // 16 // For example: Internalizing a function with external linkage. Only if we are 17 // told it is only used from within this module, it is safe to do it. 18 // 19 //===----------------------------------------------------------------------===// 20 21 #include "llvm/Transforms/IPO/Internalize.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/ADT/StringSet.h" 24 #include "llvm/Analysis/CallGraph.h" 25 #include "llvm/IR/Module.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/GlobPattern.h" 29 #include "llvm/Support/LineIterator.h" 30 #include "llvm/Support/MemoryBuffer.h" 31 #include "llvm/Support/raw_ostream.h" 32 #include "llvm/TargetParser/Triple.h" 33 #include "llvm/Transforms/IPO.h" 34 using namespace llvm; 35 36 #define DEBUG_TYPE "internalize" 37 38 STATISTIC(NumAliases, "Number of aliases internalized"); 39 STATISTIC(NumFunctions, "Number of functions internalized"); 40 STATISTIC(NumGlobals, "Number of global vars internalized"); 41 42 // APIFile - A file which contains a list of symbol glob patterns that should 43 // not be marked external. 44 static cl::opt<std::string> 45 APIFile("internalize-public-api-file", cl::value_desc("filename"), 46 cl::desc("A file containing list of symbol names to preserve")); 47 48 // APIList - A list of symbol glob patterns that should not be marked internal. 49 static cl::list<std::string> 50 APIList("internalize-public-api-list", cl::value_desc("list"), 51 cl::desc("A list of symbol names to preserve"), cl::CommaSeparated); 52 53 namespace { 54 // Helper to load an API list to preserve from file and expose it as a functor 55 // for internalization. 56 class PreserveAPIList { 57 public: 58 PreserveAPIList() { 59 if (!APIFile.empty()) 60 LoadFile(APIFile); 61 for (StringRef Pattern : APIList) 62 addGlob(Pattern); 63 } 64 65 bool operator()(const GlobalValue &GV) { 66 return llvm::any_of( 67 ExternalNames, [&](GlobPattern &GP) { return GP.match(GV.getName()); }); 68 } 69 70 private: 71 // Contains the set of symbols loaded from file 72 SmallVector<GlobPattern> ExternalNames; 73 74 void addGlob(StringRef Pattern) { 75 auto GlobOrErr = GlobPattern::create(Pattern); 76 if (!GlobOrErr) { 77 errs() << "WARNING: when loading pattern: '" 78 << toString(GlobOrErr.takeError()) << "' ignoring"; 79 return; 80 } 81 ExternalNames.emplace_back(std::move(*GlobOrErr)); 82 } 83 84 void LoadFile(StringRef Filename) { 85 // Load the APIFile... 86 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = 87 MemoryBuffer::getFile(Filename); 88 if (!BufOrErr) { 89 errs() << "WARNING: Internalize couldn't load file '" << Filename 90 << "'! Continuing as if it's empty.\n"; 91 return; // Just continue as if the file were empty 92 } 93 Buf = std::move(*BufOrErr); 94 for (line_iterator I(*Buf, true), E; I != E; ++I) 95 addGlob(*I); 96 } 97 98 std::shared_ptr<MemoryBuffer> Buf; 99 }; 100 } // end anonymous namespace 101 102 bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) { 103 // Function must be defined here 104 if (GV.isDeclaration()) 105 return true; 106 107 // Available externally is really just a "declaration with a body". 108 if (GV.hasAvailableExternallyLinkage()) 109 return true; 110 111 // Assume that dllexported symbols are referenced elsewhere 112 if (GV.hasDLLExportStorageClass()) 113 return true; 114 115 // As the name suggests, externally initialized variables need preserving as 116 // they would be initialized elsewhere externally. 117 if (const auto *G = dyn_cast<GlobalVariable>(&GV)) 118 if (G->isExternallyInitialized()) 119 return true; 120 121 // Already local, has nothing to do. 122 if (GV.hasLocalLinkage()) 123 return false; 124 125 // Check some special cases 126 if (AlwaysPreserved.count(GV.getName())) 127 return true; 128 129 return MustPreserveGV(GV); 130 } 131 132 bool InternalizePass::maybeInternalize( 133 GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) { 134 if (Comdat *C = GV.getComdat()) { 135 // For GlobalAlias, C is the aliasee object's comdat which may have been 136 // redirected. So ComdatMap may not contain C. 137 if (ComdatMap.lookup(C).External) 138 return false; 139 140 if (auto *GO = dyn_cast<GlobalObject>(&GV)) { 141 // If a comdat with one member is not externally visible, we can drop it. 142 // Otherwise, the comdat can be used to establish dependencies among the 143 // group of sections. Thus we have to keep the comdat but switch it to 144 // nodeduplicate. 145 // Note: nodeduplicate is not necessary for COFF. wasm doesn't support 146 // nodeduplicate. 147 ComdatInfo &Info = ComdatMap.find(C)->second; 148 if (Info.Size == 1) 149 GO->setComdat(nullptr); 150 else if (!IsWasm) 151 C->setSelectionKind(Comdat::NoDeduplicate); 152 } 153 154 if (GV.hasLocalLinkage()) 155 return false; 156 } else { 157 if (GV.hasLocalLinkage()) 158 return false; 159 160 if (shouldPreserveGV(GV)) 161 return false; 162 } 163 164 GV.setVisibility(GlobalValue::DefaultVisibility); 165 GV.setLinkage(GlobalValue::InternalLinkage); 166 return true; 167 } 168 169 // If GV is part of a comdat and is externally visible, update the comdat size 170 // and keep track of its comdat so that we don't internalize any of its members. 171 void InternalizePass::checkComdat( 172 GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) { 173 Comdat *C = GV.getComdat(); 174 if (!C) 175 return; 176 177 ComdatInfo &Info = ComdatMap[C]; 178 ++Info.Size; 179 if (shouldPreserveGV(GV)) 180 Info.External = true; 181 } 182 183 bool InternalizePass::internalizeModule(Module &M) { 184 bool Changed = false; 185 186 SmallVector<GlobalValue *, 4> Used; 187 collectUsedGlobalVariables(M, Used, false); 188 189 // Collect comdat size and visiblity information for the module. 190 DenseMap<const Comdat *, ComdatInfo> ComdatMap; 191 if (!M.getComdatSymbolTable().empty()) { 192 for (Function &F : M) 193 checkComdat(F, ComdatMap); 194 for (GlobalVariable &GV : M.globals()) 195 checkComdat(GV, ComdatMap); 196 for (GlobalAlias &GA : M.aliases()) 197 checkComdat(GA, ComdatMap); 198 } 199 200 // We must assume that globals in llvm.used have a reference that not even 201 // the linker can see, so we don't internalize them. 202 // For llvm.compiler.used the situation is a bit fuzzy. The assembler and 203 // linker can drop those symbols. If this pass is running as part of LTO, 204 // one might think that it could just drop llvm.compiler.used. The problem 205 // is that even in LTO llvm doesn't see every reference. For example, 206 // we don't see references from function local inline assembly. To be 207 // conservative, we internalize symbols in llvm.compiler.used, but we 208 // keep llvm.compiler.used so that the symbol is not deleted by llvm. 209 for (GlobalValue *V : Used) { 210 AlwaysPreserved.insert(V->getName()); 211 } 212 213 // Never internalize the llvm.used symbol. It is used to implement 214 // attribute((used)). 215 // FIXME: Shouldn't this just filter on llvm.metadata section?? 216 AlwaysPreserved.insert("llvm.used"); 217 AlwaysPreserved.insert("llvm.compiler.used"); 218 219 // Never internalize anchors used by the machine module info, else the info 220 // won't find them. (see MachineModuleInfo.) 221 AlwaysPreserved.insert("llvm.global_ctors"); 222 AlwaysPreserved.insert("llvm.global_dtors"); 223 AlwaysPreserved.insert("llvm.global.annotations"); 224 225 // Never internalize symbols code-gen inserts. 226 // FIXME: We should probably add this (and the __stack_chk_guard) via some 227 // type of call-back in CodeGen. 228 AlwaysPreserved.insert("__stack_chk_fail"); 229 if (M.getTargetTriple().isOSAIX()) 230 AlwaysPreserved.insert("__ssp_canary_word"); 231 else 232 AlwaysPreserved.insert("__stack_chk_guard"); 233 234 // Preserve the RPC interface for GPU host callbacks when internalizing. 235 if (M.getTargetTriple().isNVPTX()) 236 AlwaysPreserved.insert("__llvm_rpc_client"); 237 238 // Mark all functions not in the api as internal. 239 IsWasm = M.getTargetTriple().isOSBinFormatWasm(); 240 for (Function &I : M) { 241 if (!maybeInternalize(I, ComdatMap)) 242 continue; 243 Changed = true; 244 245 ++NumFunctions; 246 LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n"); 247 } 248 249 // Mark all global variables with initializers that are not in the api as 250 // internal as well. 251 for (auto &GV : M.globals()) { 252 if (!maybeInternalize(GV, ComdatMap)) 253 continue; 254 Changed = true; 255 256 ++NumGlobals; 257 LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n"); 258 } 259 260 // Mark all aliases that are not in the api as internal as well. 261 for (auto &GA : M.aliases()) { 262 if (!maybeInternalize(GA, ComdatMap)) 263 continue; 264 Changed = true; 265 266 ++NumAliases; 267 LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n"); 268 } 269 270 return Changed; 271 } 272 273 InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {} 274 275 PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) { 276 if (!internalizeModule(M)) 277 return PreservedAnalyses::all(); 278 279 return PreservedAnalyses::none(); 280 } 281