1 //===- llvm/Transforms/IPO/FunctionImport.h - ThinLTO importing -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONIMPORT_H 10 #define LLVM_TRANSFORMS_IPO_FUNCTIONIMPORT_H 11 12 #include "llvm/ADT/DenseSet.h" 13 #include "llvm/ADT/MapVector.h" 14 #include "llvm/ADT/StringRef.h" 15 #include "llvm/IR/GlobalValue.h" 16 #include "llvm/IR/ModuleSummaryIndex.h" 17 #include "llvm/IR/PassManager.h" 18 #include "llvm/Support/Compiler.h" 19 #include "llvm/Support/Error.h" 20 #include <functional> 21 #include <memory> 22 #include <system_error> 23 #include <utility> 24 25 namespace llvm { 26 27 class Module; 28 29 /// The function importer is automatically importing function from other modules 30 /// based on the provided summary informations. 31 class FunctionImporter { 32 public: 33 /// The different reasons selectCallee will chose not to import a 34 /// candidate. 35 enum class ImportFailureReason { 36 None, 37 // We can encounter a global variable instead of a function in rare 38 // situations with SamplePGO. See comments where this failure type is 39 // set for more details. 40 GlobalVar, 41 // Found to be globally dead, so we don't bother importing. 42 NotLive, 43 // Instruction count over the current threshold. 44 TooLarge, 45 // Don't import something with interposable linkage as we can't inline it 46 // anyway. 47 InterposableLinkage, 48 // Generally we won't end up failing due to this reason, as we expect 49 // to find at least one summary for the GUID that is global or a local 50 // in the referenced module for direct calls. 51 LocalLinkageNotInModule, 52 // This corresponds to the NotEligibleToImport being set on the summary, 53 // which can happen in a few different cases (e.g. local that can't be 54 // renamed or promoted because it is referenced on a llvm*.used variable). 55 NotEligible, 56 // This corresponds to NoInline being set on the function summary, 57 // which will happen if it is known that the inliner will not be able 58 // to inline the function (e.g. it is marked with a NoInline attribute). 59 NoInline 60 }; 61 62 /// Information optionally tracked for candidates the importer decided 63 /// not to import. Used for optional stat printing. 64 struct ImportFailureInfo { 65 // The ValueInfo corresponding to the candidate. We save an index hash 66 // table lookup for each GUID by stashing this here. 67 ValueInfo VI; 68 // The maximum call edge hotness for all failed imports of this candidate. 69 CalleeInfo::HotnessType MaxHotness; 70 // most recent reason for failing to import (doesn't necessarily correspond 71 // to the attempt with the maximum hotness). 72 ImportFailureReason Reason; 73 // The number of times we tried to import candidate but failed. 74 unsigned Attempts; ImportFailureInfoImportFailureInfo75 ImportFailureInfo(ValueInfo VI, CalleeInfo::HotnessType MaxHotness, 76 ImportFailureReason Reason, unsigned Attempts) 77 : VI(VI), MaxHotness(MaxHotness), Reason(Reason), Attempts(Attempts) {} 78 }; 79 80 /// Map of callee GUID considered for import into a given module to a pair 81 /// consisting of the largest threshold applied when deciding whether to 82 /// import it and, if we decided to import, a pointer to the summary instance 83 /// imported. If we decided not to import, the summary will be nullptr. 84 using ImportThresholdsTy = 85 DenseMap<GlobalValue::GUID, 86 std::tuple<unsigned, const GlobalValueSummary *, 87 std::unique_ptr<ImportFailureInfo>>>; 88 89 // Issues import IDs. Each ID uniquely corresponds to a tuple of 90 // (FromModule, GUID, Definition/Declaration). 91 // 92 // The import IDs make the import list space efficient by referring to each 93 // import with a 32-bit integer ID while maintaining a central table that maps 94 // those integer IDs to tuples of (FromModule, GUID, Def/Decl). 95 // 96 // In one large application, a pair of (FromModule, GUID) is mentioned in 97 // import lists more than 50 times on average across all destination modules. 98 // Mentioning the 32-byte tuple: 99 // 100 // std::tuple<StringRef, GlobalValue::GUID, GlobalValueSummary::ImportKind> 101 // 102 // 50 times by value in various import lists would be costly. We can reduce 103 // the memory footprint of import lists by placing one copy in a central table 104 // and referring to it with 32-bit integer IDs. 105 // 106 // To save space within the central table, we only store pairs of 107 // (FromModule, GUID) in the central table. In the actual 32-bit integer ID, 108 // the top 31 bits index into the central table while the bottom 1 bit 109 // indicates whether an ID is for GlobalValueSummary::Declaration or 110 // GlobalValueSummary::Definition. 111 class ImportIDTable { 112 public: 113 using ImportIDTy = uint32_t; 114 115 ImportIDTable() = default; 116 117 // Something is wrong with the application logic if we need to make a copy 118 // of this and potentially make a fork. 119 ImportIDTable(const ImportIDTable &) = delete; 120 ImportIDTable &operator=(const ImportIDTable &) = delete; 121 122 // Create a pair of import IDs [Def, Decl] for a given pair of FromModule 123 // and GUID. createImportIDs(StringRef FromModule,GlobalValue::GUID GUID)124 std::pair<ImportIDTy, ImportIDTy> createImportIDs(StringRef FromModule, 125 GlobalValue::GUID GUID) { 126 auto Key = std::make_pair(FromModule, GUID); 127 auto InsertResult = TheTable.try_emplace(Key, TheTable.size()); 128 return makeIDPair(InsertResult.first->second); 129 } 130 131 // Get a pair of previously created import IDs [Def, Decl] for a given pair 132 // of FromModule and GUID. Returns std::nullopt if not available. 133 std::optional<std::pair<ImportIDTy, ImportIDTy>> getImportIDs(StringRef FromModule,GlobalValue::GUID GUID)134 getImportIDs(StringRef FromModule, GlobalValue::GUID GUID) { 135 auto Key = std::make_pair(FromModule, GUID); 136 auto It = TheTable.find(Key); 137 if (It != TheTable.end()) 138 return makeIDPair(It->second); 139 return std::nullopt; 140 } 141 142 // Return a tuple of [FromModule, GUID, Def/Decl] that a given ImportID 143 // corresponds to. 144 std::tuple<StringRef, GlobalValue::GUID, GlobalValueSummary::ImportKind> lookup(ImportIDTy ImportID)145 lookup(ImportIDTy ImportID) const { 146 GlobalValueSummary::ImportKind Kind = 147 (ImportID & 1) ? GlobalValueSummary::Declaration 148 : GlobalValueSummary::Definition; 149 auto It = TheTable.begin() + (ImportID >> 1); 150 StringRef FromModule = It->first.first; 151 GlobalValue::GUID GUID = It->first.second; 152 return std::make_tuple(FromModule, GUID, Kind); 153 } 154 155 // The same as lookup above. Useful for map_iterator. 156 std::tuple<StringRef, GlobalValue::GUID, GlobalValueSummary::ImportKind> operator()157 operator()(ImportIDTable::ImportIDTy ImportID) const { 158 return lookup(ImportID); 159 } 160 161 private: 162 // Make a pair of import IDs [Def, Decl] from an index into TheTable. makeIDPair(ImportIDTy Index)163 static std::pair<ImportIDTy, ImportIDTy> makeIDPair(ImportIDTy Index) { 164 ImportIDTy Def = Index << 1; 165 ImportIDTy Decl = Def | 1; 166 return std::make_pair(Def, Decl); 167 } 168 169 MapVector<std::pair<StringRef, GlobalValue::GUID>, ImportIDTy> TheTable; 170 }; 171 172 // Forward-declare SortedImportList for ImportMapTy. 173 class SortedImportList; 174 175 /// The map maintains the list of imports. Conceptually, it is a collection 176 /// of tuples of the form: 177 /// 178 /// (The name of the source module, GUID, Definition/Declaration) 179 /// 180 /// The name of the source module is the module identifier to pass to the 181 /// ModuleLoader. The module identifier strings must be owned elsewhere, 182 /// typically by the in-memory ModuleSummaryIndex the importing decisions are 183 /// made from (the module path for each summary is owned by the index's module 184 /// path string table). 185 class ImportMapTy { 186 public: 187 enum class AddDefinitionStatus { 188 // No change was made to the list of imports or whether each import should 189 // be imported as a declaration or definition. 190 NoChange, 191 // Successfully added the given GUID to be imported as a definition. There 192 // was no existing entry with the same GUID as a declaration. 193 Inserted, 194 // An existing with the given GUID was changed to a definition. 195 ChangedToDefinition, 196 }; 197 198 ImportMapTy() = delete; ImportMapTy(ImportIDTable & IDs)199 ImportMapTy(ImportIDTable &IDs) : IDs(IDs) {} 200 201 // Add the given GUID to ImportList as a definition. If the same GUID has 202 // been added as a declaration previously, that entry is overridden. 203 LLVM_ABI AddDefinitionStatus addDefinition(StringRef FromModule, 204 GlobalValue::GUID GUID); 205 206 // Add the given GUID to ImportList as a declaration. If the same GUID has 207 // been added as a definition previously, that entry takes precedence, and 208 // no change is made. 209 LLVM_ABI void maybeAddDeclaration(StringRef FromModule, 210 GlobalValue::GUID GUID); 211 addGUID(StringRef FromModule,GlobalValue::GUID GUID,GlobalValueSummary::ImportKind ImportKind)212 void addGUID(StringRef FromModule, GlobalValue::GUID GUID, 213 GlobalValueSummary::ImportKind ImportKind) { 214 if (ImportKind == GlobalValueSummary::Definition) 215 addDefinition(FromModule, GUID); 216 else 217 maybeAddDeclaration(FromModule, GUID); 218 } 219 220 // Return the list of source modules sorted in the ascending alphabetical 221 // order. 222 LLVM_ABI SmallVector<StringRef, 0> getSourceModules() const; 223 224 LLVM_ABI std::optional<GlobalValueSummary::ImportKind> 225 getImportType(StringRef FromModule, GlobalValue::GUID GUID) const; 226 227 // Iterate over the import list. The caller gets tuples of FromModule, 228 // GUID, and ImportKind instead of import IDs. std::cref below prevents 229 // map_iterator from deep-copying IDs. begin()230 auto begin() const { return map_iterator(Imports.begin(), std::cref(IDs)); } end()231 auto end() const { return map_iterator(Imports.end(), std::cref(IDs)); } 232 233 friend class SortedImportList; 234 235 private: 236 ImportIDTable &IDs; 237 DenseSet<ImportIDTable::ImportIDTy> Imports; 238 }; 239 240 // A read-only copy of ImportMapTy with its contents sorted according to the 241 // given comparison function. 242 class SortedImportList { 243 public: SortedImportList(const ImportMapTy & ImportMap,llvm::function_ref<bool (const std::pair<StringRef,GlobalValue::GUID> &,const std::pair<StringRef,GlobalValue::GUID> &)> Comp)244 SortedImportList(const ImportMapTy &ImportMap, 245 llvm::function_ref< 246 bool(const std::pair<StringRef, GlobalValue::GUID> &, 247 const std::pair<StringRef, GlobalValue::GUID> &)> 248 Comp) 249 : IDs(ImportMap.IDs), Imports(iterator_range(ImportMap.Imports)) { 250 llvm::sort(Imports, [&](ImportIDTable::ImportIDTy L, 251 ImportIDTable::ImportIDTy R) { 252 auto Lookup = [&](ImportIDTable::ImportIDTy Id) 253 -> std::pair<StringRef, GlobalValue::GUID> { 254 auto Tuple = IDs.lookup(Id); 255 return std::make_pair(std::get<0>(Tuple), std::get<1>(Tuple)); 256 }; 257 return Comp(Lookup(L), Lookup(R)); 258 }); 259 } 260 261 // Iterate over the import list. The caller gets tuples of FromModule, 262 // GUID, and ImportKind instead of import IDs. std::cref below prevents 263 // map_iterator from deep-copying IDs. begin()264 auto begin() const { return map_iterator(Imports.begin(), std::cref(IDs)); } end()265 auto end() const { return map_iterator(Imports.end(), std::cref(IDs)); } 266 267 private: 268 const ImportIDTable &IDs; 269 SmallVector<ImportIDTable::ImportIDTy, 0> Imports; 270 }; 271 272 // A map from destination modules to lists of imports. 273 class ImportListsTy { 274 public: ImportListsTy()275 ImportListsTy() : ImportIDs(), EmptyList(ImportIDs) {} ImportListsTy(size_t Size)276 ImportListsTy(size_t Size) 277 : ImportIDs(), EmptyList(ImportIDs), ListsImpl(Size) {} 278 279 ImportMapTy &operator[](StringRef DestMod) { 280 return ListsImpl.try_emplace(DestMod, ImportIDs).first->second; 281 } 282 lookup(StringRef DestMod)283 const ImportMapTy &lookup(StringRef DestMod) const { 284 auto It = ListsImpl.find(DestMod); 285 if (It != ListsImpl.end()) 286 return It->second; 287 return EmptyList; 288 } 289 size()290 size_t size() const { return ListsImpl.size(); } 291 292 using const_iterator = DenseMap<StringRef, ImportMapTy>::const_iterator; begin()293 const_iterator begin() const { return ListsImpl.begin(); } end()294 const_iterator end() const { return ListsImpl.end(); } 295 296 private: 297 ImportIDTable ImportIDs; 298 ImportMapTy EmptyList; 299 DenseMap<StringRef, ImportMapTy> ListsImpl; 300 }; 301 302 /// The set contains an entry for every global value that the module exports. 303 /// Depending on the user context, this container is allowed to contain 304 /// definitions, declarations or a mix of both. 305 using ExportSetTy = DenseSet<ValueInfo>; 306 307 /// A function of this type is used to load modules referenced by the index. 308 using ModuleLoaderTy = 309 std::function<Expected<std::unique_ptr<Module>>(StringRef Identifier)>; 310 311 /// Create a Function Importer. FunctionImporter(const ModuleSummaryIndex & Index,ModuleLoaderTy ModuleLoader,bool ClearDSOLocalOnDeclarations)312 FunctionImporter(const ModuleSummaryIndex &Index, ModuleLoaderTy ModuleLoader, 313 bool ClearDSOLocalOnDeclarations) 314 : Index(Index), ModuleLoader(std::move(ModuleLoader)), 315 ClearDSOLocalOnDeclarations(ClearDSOLocalOnDeclarations) {} 316 317 /// Import functions in Module \p M based on the supplied import list. 318 LLVM_ABI Expected<bool> importFunctions(Module &M, 319 const ImportMapTy &ImportList); 320 321 private: 322 /// The summaries index used to trigger importing. 323 const ModuleSummaryIndex &Index; 324 325 /// Factory function to load a Module for a given identifier 326 ModuleLoaderTy ModuleLoader; 327 328 /// See the comment of ClearDSOLocalOnDeclarations in 329 /// Utils/FunctionImportUtils.h. 330 bool ClearDSOLocalOnDeclarations; 331 }; 332 333 /// The function importing pass 334 class FunctionImportPass : public PassInfoMixin<FunctionImportPass> { 335 public: 336 LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 337 }; 338 339 /// Compute all the imports and exports for every module in the Index. 340 /// 341 /// \p ModuleToDefinedGVSummaries contains for each Module a map 342 /// (GUID -> Summary) for every global defined in the module. 343 /// 344 /// \p isPrevailing is a callback that will be called with a global value's GUID 345 /// and summary and should return whether the module corresponding to the 346 /// summary contains the linker-prevailing copy of that value. 347 /// 348 /// \p ImportLists will be populated with an entry for every Module we are 349 /// importing into. This entry is itself a map that can be passed to 350 /// FunctionImporter::importFunctions() above (see description there). 351 /// 352 /// \p ExportLists contains for each Module the set of globals (GUID) that will 353 /// be imported by another module, or referenced by such a function. I.e. this 354 /// is the set of globals that need to be promoted/renamed appropriately. 355 /// 356 /// The module identifier strings that are the keys of the above two maps 357 /// are owned by the in-memory ModuleSummaryIndex the importing decisions 358 /// are made from (the module path for each summary is owned by the index's 359 /// module path string table). 360 LLVM_ABI void ComputeCrossModuleImport( 361 const ModuleSummaryIndex &Index, 362 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, 363 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 364 isPrevailing, 365 FunctionImporter::ImportListsTy &ImportLists, 366 DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists); 367 368 /// PrevailingType enum used as a return type of callback passed 369 /// to computeDeadSymbolsAndUpdateIndirectCalls. Yes and No values used when 370 /// status explicitly set by symbols resolution, otherwise status is Unknown. 371 enum class PrevailingType { Yes, No, Unknown }; 372 373 /// Update call edges for indirect calls to local functions added from 374 /// SamplePGO when needed. Normally this is done during 375 /// computeDeadSymbolsAndUpdateIndirectCalls, but can be called standalone 376 /// when that is not called (e.g. during testing). 377 LLVM_ABI void updateIndirectCalls(ModuleSummaryIndex &Index); 378 379 /// Compute all the symbols that are "dead": i.e these that can't be reached 380 /// in the graph from any of the given symbols listed in 381 /// \p GUIDPreservedSymbols. Non-prevailing symbols are symbols without a 382 /// prevailing copy anywhere in IR and are normally dead, \p isPrevailing 383 /// predicate returns status of symbol. 384 /// Also update call edges for indirect calls to local functions added from 385 /// SamplePGO when needed. 386 LLVM_ABI void computeDeadSymbolsAndUpdateIndirectCalls( 387 ModuleSummaryIndex &Index, 388 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols, 389 function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing); 390 391 /// Compute dead symbols and run constant propagation in combined index 392 /// after that. 393 LLVM_ABI void computeDeadSymbolsWithConstProp( 394 ModuleSummaryIndex &Index, 395 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols, 396 function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing, 397 bool ImportEnabled); 398 399 /// Converts value \p GV to declaration, or replaces with a declaration if 400 /// it is an alias. Returns true if converted, false if replaced. 401 LLVM_ABI bool convertToDeclaration(GlobalValue &GV); 402 403 /// Compute the set of summaries needed for a ThinLTO backend compilation of 404 /// \p ModulePath. 405 // 406 /// This includes summaries from that module (in case any global summary based 407 /// optimizations were recorded) and from any definitions in other modules that 408 /// should be imported. 409 // 410 /// \p ModuleToSummariesForIndex will be populated with the needed summaries 411 /// from each required module path. Use a std::map instead of StringMap to get 412 /// stable order for bitcode emission. 413 /// 414 /// \p DecSummaries will be popluated with the subset of of summary pointers 415 /// that have 'declaration' import type among all summaries the module need. 416 LLVM_ABI void gatherImportedSummariesForModule( 417 StringRef ModulePath, 418 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, 419 const FunctionImporter::ImportMapTy &ImportList, 420 ModuleToSummariesForIndexTy &ModuleToSummariesForIndex, 421 GVSummaryPtrSet &DecSummaries); 422 423 /// Emit into \p OutputFilename the files module \p ModulePath will import from. 424 LLVM_ABI Error 425 EmitImportsFiles(StringRef ModulePath, StringRef OutputFilename, 426 const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex); 427 428 /// Call \p F passing each of the files module \p ModulePath will import from. 429 LLVM_ABI void processImportsFiles( 430 StringRef ModulePath, 431 const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex, 432 function_ref<void(const std::string &)> F); 433 434 /// Based on the information recorded in the summaries during global 435 /// summary-based analysis: 436 /// 1. Resolve prevailing symbol linkages and constrain visibility (CanAutoHide 437 /// and consider visibility from other definitions for ELF) in \p TheModule 438 /// 2. (optional) Apply propagated function attributes to \p TheModule if 439 /// PropagateAttrs is true 440 LLVM_ABI void thinLTOFinalizeInModule(Module &TheModule, 441 const GVSummaryMapTy &DefinedGlobals, 442 bool PropagateAttrs); 443 444 /// Internalize \p TheModule based on the information recorded in the summaries 445 /// during global summary-based analysis. 446 LLVM_ABI void thinLTOInternalizeModule(Module &TheModule, 447 const GVSummaryMapTy &DefinedGlobals); 448 449 } // end namespace llvm 450 451 #endif // LLVM_TRANSFORMS_IPO_FUNCTIONIMPORT_H 452