1 //===-- CodeGenData.cpp ---------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for codegen data that has stable summary which 10 // can be used to optimize the code in the subsequent codegen. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Bitcode/BitcodeWriter.h" 15 #include "llvm/CGData/CodeGenDataReader.h" 16 #include "llvm/CGData/OutlinedHashTreeRecord.h" 17 #include "llvm/CGData/StableFunctionMapRecord.h" 18 #include "llvm/Object/ObjectFile.h" 19 #include "llvm/Support/Caching.h" 20 #include "llvm/Support/CommandLine.h" 21 #include "llvm/Support/WithColor.h" 22 23 #define DEBUG_TYPE "cg-data" 24 25 using namespace llvm; 26 using namespace cgdata; 27 28 static cl::opt<bool> 29 CodeGenDataGenerate("codegen-data-generate", cl::init(false), cl::Hidden, 30 cl::desc("Emit CodeGen Data into custom sections")); 31 static cl::opt<std::string> 32 CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden, 33 cl::desc("File path to where .cgdata file is read")); 34 cl::opt<bool> CodeGenDataThinLTOTwoRounds( 35 "codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden, 36 cl::desc("Enable two-round ThinLTO code generation. The first round " 37 "emits codegen data, while the second round uses the emitted " 38 "codegen data for further optimizations.")); 39 40 static std::string getCGDataErrString(cgdata_error Err, 41 const std::string &ErrMsg = "") { 42 std::string Msg; 43 raw_string_ostream OS(Msg); 44 45 switch (Err) { 46 case cgdata_error::success: 47 OS << "success"; 48 break; 49 case cgdata_error::eof: 50 OS << "end of File"; 51 break; 52 case cgdata_error::bad_magic: 53 OS << "invalid codegen data (bad magic)"; 54 break; 55 case cgdata_error::bad_header: 56 OS << "invalid codegen data (file header is corrupt)"; 57 break; 58 case cgdata_error::empty_cgdata: 59 OS << "empty codegen data"; 60 break; 61 case cgdata_error::malformed: 62 OS << "malformed codegen data"; 63 break; 64 case cgdata_error::unsupported_version: 65 OS << "unsupported codegen data version"; 66 break; 67 } 68 69 // If optional error message is not empty, append it to the message. 70 if (!ErrMsg.empty()) 71 OS << ": " << ErrMsg; 72 73 return OS.str(); 74 } 75 76 namespace { 77 78 // FIXME: This class is only here to support the transition to llvm::Error. It 79 // will be removed once this transition is complete. Clients should prefer to 80 // deal with the Error value directly, rather than converting to error_code. 81 class CGDataErrorCategoryType : public std::error_category { 82 const char *name() const noexcept override { return "llvm.cgdata"; } 83 84 std::string message(int IE) const override { 85 return getCGDataErrString(static_cast<cgdata_error>(IE)); 86 } 87 }; 88 89 } // end anonymous namespace 90 91 const std::error_category &llvm::cgdata_category() { 92 static CGDataErrorCategoryType ErrorCategory; 93 return ErrorCategory; 94 } 95 96 std::string CGDataError::message() const { 97 return getCGDataErrString(Err, Msg); 98 } 99 100 char CGDataError::ID = 0; 101 102 namespace { 103 104 const char *CodeGenDataSectNameCommon[] = { 105 #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ 106 SectNameCommon, 107 #include "llvm/CGData/CodeGenData.inc" 108 }; 109 110 const char *CodeGenDataSectNameCoff[] = { 111 #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ 112 SectNameCoff, 113 #include "llvm/CGData/CodeGenData.inc" 114 }; 115 116 const char *CodeGenDataSectNamePrefix[] = { 117 #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Prefix, 118 #include "llvm/CGData/CodeGenData.inc" 119 }; 120 121 } // namespace 122 123 namespace llvm { 124 125 std::string getCodeGenDataSectionName(CGDataSectKind CGSK, 126 Triple::ObjectFormatType OF, 127 bool AddSegmentInfo) { 128 std::string SectName; 129 130 if (OF == Triple::MachO && AddSegmentInfo) 131 SectName = CodeGenDataSectNamePrefix[CGSK]; 132 133 if (OF == Triple::COFF) 134 SectName += CodeGenDataSectNameCoff[CGSK]; 135 else 136 SectName += CodeGenDataSectNameCommon[CGSK]; 137 138 return SectName; 139 } 140 141 std::unique_ptr<CodeGenData> CodeGenData::Instance = nullptr; 142 std::once_flag CodeGenData::OnceFlag; 143 144 CodeGenData &CodeGenData::getInstance() { 145 std::call_once(CodeGenData::OnceFlag, []() { 146 Instance = std::unique_ptr<CodeGenData>(new CodeGenData()); 147 148 if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds) 149 Instance->EmitCGData = true; 150 else if (!CodeGenDataUsePath.empty()) { 151 // Initialize the global CGData if the input file name is given. 152 // We do not error-out when failing to parse the input file. 153 // Instead, just emit an warning message and fall back as if no CGData 154 // were available. 155 auto FS = vfs::getRealFileSystem(); 156 auto ReaderOrErr = CodeGenDataReader::create(CodeGenDataUsePath, *FS); 157 if (Error E = ReaderOrErr.takeError()) { 158 warn(std::move(E), CodeGenDataUsePath); 159 return; 160 } 161 // Publish each CGData based on the data type in the header. 162 auto Reader = ReaderOrErr->get(); 163 if (Reader->hasOutlinedHashTree()) 164 Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree()); 165 if (Reader->hasStableFunctionMap()) 166 Instance->publishStableFunctionMap(Reader->releaseStableFunctionMap()); 167 } 168 }); 169 return *Instance; 170 } 171 172 namespace IndexedCGData { 173 174 Expected<Header> Header::readFromBuffer(const unsigned char *Curr) { 175 using namespace support; 176 177 static_assert(std::is_standard_layout_v<llvm::IndexedCGData::Header>, 178 "The header should be standard layout type since we use offset " 179 "of fields to read."); 180 Header H; 181 H.Magic = endian::readNext<uint64_t, endianness::little, unaligned>(Curr); 182 if (H.Magic != IndexedCGData::Magic) 183 return make_error<CGDataError>(cgdata_error::bad_magic); 184 H.Version = endian::readNext<uint32_t, endianness::little, unaligned>(Curr); 185 if (H.Version > IndexedCGData::CGDataVersion::CurrentVersion) 186 return make_error<CGDataError>(cgdata_error::unsupported_version); 187 H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr); 188 189 static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version3, 190 "Please update the offset computation below if a new field has " 191 "been added to the header."); 192 H.OutlinedHashTreeOffset = 193 endian::readNext<uint64_t, endianness::little, unaligned>(Curr); 194 if (H.Version >= 2) 195 H.StableFunctionMapOffset = 196 endian::readNext<uint64_t, endianness::little, unaligned>(Curr); 197 198 return H; 199 } 200 201 } // end namespace IndexedCGData 202 203 namespace cgdata { 204 205 void warn(Twine Message, StringRef Whence, StringRef Hint) { 206 WithColor::warning(); 207 if (!Whence.empty()) 208 errs() << Whence << ": "; 209 errs() << Message << "\n"; 210 if (!Hint.empty()) 211 WithColor::note() << Hint << "\n"; 212 } 213 214 void warn(Error E, StringRef Whence) { 215 if (E.isA<CGDataError>()) { 216 handleAllErrors(std::move(E), [&](const CGDataError &IPE) { 217 warn(IPE.message(), Whence, ""); 218 }); 219 } 220 } 221 222 void saveModuleForTwoRounds(const Module &TheModule, unsigned Task, 223 AddStreamFn AddStream) { 224 LLVM_DEBUG(dbgs() << "Saving module: " << TheModule.getModuleIdentifier() 225 << " in Task " << Task << "\n"); 226 Expected<std::unique_ptr<CachedFileStream>> StreamOrErr = 227 AddStream(Task, TheModule.getModuleIdentifier()); 228 if (Error Err = StreamOrErr.takeError()) 229 report_fatal_error(std::move(Err)); 230 std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr; 231 232 WriteBitcodeToFile(TheModule, *Stream->OS, 233 /*ShouldPreserveUseListOrder=*/true); 234 235 if (Error Err = Stream->commit()) 236 report_fatal_error(std::move(Err)); 237 } 238 239 std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule, 240 unsigned Task, 241 LLVMContext &Context, 242 ArrayRef<StringRef> IRFiles) { 243 LLVM_DEBUG(dbgs() << "Loading module: " << OrigModule.getModuleIdentifier() 244 << " in Task " << Task << "\n"); 245 auto FileBuffer = MemoryBuffer::getMemBuffer( 246 IRFiles[Task], "in-memory IR file", /*RequiresNullTerminator=*/false); 247 auto RestoredModule = parseBitcodeFile(*FileBuffer, Context); 248 if (!RestoredModule) 249 report_fatal_error( 250 Twine("Failed to parse optimized bitcode loaded for Task: ") + 251 Twine(Task) + "\n"); 252 253 // Restore the original module identifier. 254 (*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier()); 255 return std::move(*RestoredModule); 256 } 257 258 Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) { 259 OutlinedHashTreeRecord GlobalOutlineRecord; 260 StableFunctionMapRecord GlobalStableFunctionMapRecord; 261 stable_hash CombinedHash = 0; 262 for (auto File : ObjFiles) { 263 if (File.empty()) 264 continue; 265 std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer( 266 File, "in-memory object file", /*RequiresNullTerminator=*/false); 267 Expected<std::unique_ptr<object::ObjectFile>> BinOrErr = 268 object::ObjectFile::createObjectFile(Buffer->getMemBufferRef()); 269 if (!BinOrErr) 270 return BinOrErr.takeError(); 271 272 std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get(); 273 if (auto E = CodeGenDataReader::mergeFromObjectFile( 274 Obj.get(), GlobalOutlineRecord, GlobalStableFunctionMapRecord, 275 &CombinedHash)) 276 return E; 277 } 278 279 GlobalStableFunctionMapRecord.finalize(); 280 281 if (!GlobalOutlineRecord.empty()) 282 cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree)); 283 if (!GlobalStableFunctionMapRecord.empty()) 284 cgdata::publishStableFunctionMap( 285 std::move(GlobalStableFunctionMapRecord.FunctionMap)); 286 287 return CombinedHash; 288 } 289 290 } // end namespace cgdata 291 292 } // end namespace llvm 293