1 //===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// 11 /// This file contains several definitions for the debuginfod client and server. 12 /// For the client, this file defines the fetchInfo function. For the server, 13 /// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as 14 /// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo 15 /// function retrieves any of the three supported artifact types: (executable, 16 /// debuginfo, source file) associated with a build-id from debuginfod servers. 17 /// If a source file is to be fetched, its absolute path must be specified in 18 /// the Description argument to fetchInfo. The DebuginfodLogEntry, 19 /// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to 20 /// scan the local filesystem for binaries and serve the debuginfod protocol. 21 /// 22 //===----------------------------------------------------------------------===// 23 24 #include "llvm/Debuginfod/Debuginfod.h" 25 #include "llvm/ADT/StringExtras.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/BinaryFormat/Magic.h" 28 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 29 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 30 #include "llvm/Debuginfod/HTTPClient.h" 31 #include "llvm/Object/BuildID.h" 32 #include "llvm/Object/ELFObjectFile.h" 33 #include "llvm/Support/CachePruning.h" 34 #include "llvm/Support/Caching.h" 35 #include "llvm/Support/Errc.h" 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/FileUtilities.h" 38 #include "llvm/Support/MemoryBuffer.h" 39 #include "llvm/Support/Path.h" 40 #include "llvm/Support/ThreadPool.h" 41 #include "llvm/Support/xxhash.h" 42 43 #include <atomic> 44 #include <optional> 45 #include <thread> 46 47 namespace llvm { 48 49 using llvm::object::BuildIDRef; 50 51 namespace { 52 std::optional<SmallVector<StringRef>> DebuginfodUrls; 53 // Many Readers/Single Writer lock protecting the global debuginfod URL list. 54 llvm::sys::RWMutex UrlsMutex; 55 } // namespace 56 57 std::string getDebuginfodCacheKey(llvm::StringRef S) { 58 return utostr(xxh3_64bits(S)); 59 } 60 61 // Returns a binary BuildID as a normalized hex string. 62 // Uses lowercase for compatibility with common debuginfod servers. 63 static std::string buildIDToString(BuildIDRef ID) { 64 return llvm::toHex(ID, /*LowerCase=*/true); 65 } 66 67 bool canUseDebuginfod() { 68 return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty(); 69 } 70 71 SmallVector<StringRef> getDefaultDebuginfodUrls() { 72 std::shared_lock<llvm::sys::RWMutex> ReadGuard(UrlsMutex); 73 if (!DebuginfodUrls) { 74 // Only read from the environment variable if the user hasn't already 75 // set the value. 76 ReadGuard.unlock(); 77 std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex); 78 DebuginfodUrls = SmallVector<StringRef>(); 79 if (const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS")) { 80 StringRef(DebuginfodUrlsEnv) 81 .split(DebuginfodUrls.value(), " ", -1, false); 82 } 83 WriteGuard.unlock(); 84 ReadGuard.lock(); 85 } 86 return DebuginfodUrls.value(); 87 } 88 89 // Set the default debuginfod URL list, override the environment variable. 90 void setDefaultDebuginfodUrls(const SmallVector<StringRef> &URLs) { 91 std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex); 92 DebuginfodUrls = URLs; 93 } 94 95 /// Finds a default local file caching directory for the debuginfod client, 96 /// first checking DEBUGINFOD_CACHE_PATH. 97 Expected<std::string> getDefaultDebuginfodCacheDirectory() { 98 if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH")) 99 return CacheDirectoryEnv; 100 101 SmallString<64> CacheDirectory; 102 if (!sys::path::cache_directory(CacheDirectory)) 103 return createStringError( 104 errc::io_error, "Unable to determine appropriate cache directory."); 105 sys::path::append(CacheDirectory, "llvm-debuginfod", "client"); 106 return std::string(CacheDirectory); 107 } 108 109 std::chrono::milliseconds getDefaultDebuginfodTimeout() { 110 long Timeout; 111 const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT"); 112 if (DebuginfodTimeoutEnv && 113 to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10)) 114 return std::chrono::milliseconds(Timeout * 1000); 115 116 return std::chrono::milliseconds(90 * 1000); 117 } 118 119 /// The following functions fetch a debuginfod artifact to a file in a local 120 /// cache and return the cached file path. They first search the local cache, 121 /// followed by the debuginfod servers. 122 123 std::string getDebuginfodSourceUrlPath(BuildIDRef ID, 124 StringRef SourceFilePath) { 125 SmallString<64> UrlPath; 126 sys::path::append(UrlPath, sys::path::Style::posix, "buildid", 127 buildIDToString(ID), "source", 128 sys::path::convert_to_slash(SourceFilePath)); 129 return std::string(UrlPath); 130 } 131 132 Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID, 133 StringRef SourceFilePath) { 134 std::string UrlPath = getDebuginfodSourceUrlPath(ID, SourceFilePath); 135 return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath); 136 } 137 138 std::string getDebuginfodExecutableUrlPath(BuildIDRef ID) { 139 SmallString<64> UrlPath; 140 sys::path::append(UrlPath, sys::path::Style::posix, "buildid", 141 buildIDToString(ID), "executable"); 142 return std::string(UrlPath); 143 } 144 145 Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) { 146 std::string UrlPath = getDebuginfodExecutableUrlPath(ID); 147 return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath); 148 } 149 150 std::string getDebuginfodDebuginfoUrlPath(BuildIDRef ID) { 151 SmallString<64> UrlPath; 152 sys::path::append(UrlPath, sys::path::Style::posix, "buildid", 153 buildIDToString(ID), "debuginfo"); 154 return std::string(UrlPath); 155 } 156 157 Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) { 158 std::string UrlPath = getDebuginfodDebuginfoUrlPath(ID); 159 return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath); 160 } 161 162 // General fetching function. 163 Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey, 164 StringRef UrlPath) { 165 SmallString<10> CacheDir; 166 167 Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory(); 168 if (!CacheDirOrErr) 169 return CacheDirOrErr.takeError(); 170 CacheDir = *CacheDirOrErr; 171 172 return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir, 173 getDefaultDebuginfodUrls(), 174 getDefaultDebuginfodTimeout()); 175 } 176 177 namespace { 178 179 /// A simple handler which streams the returned data to a cache file. The cache 180 /// file is only created if a 200 OK status is observed. 181 class StreamedHTTPResponseHandler : public HTTPResponseHandler { 182 using CreateStreamFn = 183 std::function<Expected<std::unique_ptr<CachedFileStream>>()>; 184 CreateStreamFn CreateStream; 185 HTTPClient &Client; 186 std::unique_ptr<CachedFileStream> FileStream; 187 188 public: 189 StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client) 190 : CreateStream(CreateStream), Client(Client) {} 191 virtual ~StreamedHTTPResponseHandler() = default; 192 193 Error handleBodyChunk(StringRef BodyChunk) override; 194 }; 195 196 } // namespace 197 198 Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) { 199 if (!FileStream) { 200 unsigned Code = Client.responseCode(); 201 if (Code && Code != 200) 202 return Error::success(); 203 Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError = 204 CreateStream(); 205 if (!FileStreamOrError) 206 return FileStreamOrError.takeError(); 207 FileStream = std::move(*FileStreamOrError); 208 } 209 *FileStream->OS << BodyChunk; 210 return Error::success(); 211 } 212 213 // An over-accepting simplification of the HTTP RFC 7230 spec. 214 static bool isHeader(StringRef S) { 215 StringRef Name; 216 StringRef Value; 217 std::tie(Name, Value) = S.split(':'); 218 if (Name.empty() || Value.empty()) 219 return false; 220 return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) && 221 all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; }); 222 } 223 224 static SmallVector<std::string, 0> getHeaders() { 225 const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE"); 226 if (!Filename) 227 return {}; 228 ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile = 229 MemoryBuffer::getFile(Filename, /*IsText=*/true); 230 if (!HeadersFile) 231 return {}; 232 233 SmallVector<std::string, 0> Headers; 234 uint64_t LineNumber = 0; 235 for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) { 236 LineNumber++; 237 if (!Line.empty() && Line.back() == '\r') 238 Line = Line.drop_back(); 239 if (!isHeader(Line)) { 240 if (!all_of(Line, llvm::isSpace)) 241 WithColor::warning() 242 << "could not parse debuginfod header: " << Filename << ':' 243 << LineNumber << '\n'; 244 continue; 245 } 246 Headers.emplace_back(Line); 247 } 248 return Headers; 249 } 250 251 Expected<std::string> getCachedOrDownloadArtifact( 252 StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath, 253 ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) { 254 SmallString<64> AbsCachedArtifactPath; 255 sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath, 256 "llvmcache-" + UniqueKey); 257 258 Expected<FileCache> CacheOrErr = 259 localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath); 260 if (!CacheOrErr) 261 return CacheOrErr.takeError(); 262 263 FileCache Cache = *CacheOrErr; 264 // We choose an arbitrary Task parameter as we do not make use of it. 265 unsigned Task = 0; 266 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, ""); 267 if (!CacheAddStreamOrErr) 268 return CacheAddStreamOrErr.takeError(); 269 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr; 270 if (!CacheAddStream) 271 return std::string(AbsCachedArtifactPath); 272 // The artifact was not found in the local cache, query the debuginfod 273 // servers. 274 if (!HTTPClient::isAvailable()) 275 return createStringError(errc::io_error, 276 "No working HTTP client is available."); 277 278 if (!HTTPClient::IsInitialized) 279 return createStringError( 280 errc::io_error, 281 "A working HTTP client is available, but it is not initialized. To " 282 "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() " 283 "at the beginning of main."); 284 285 HTTPClient Client; 286 Client.setTimeout(Timeout); 287 for (StringRef ServerUrl : DebuginfodUrls) { 288 SmallString<64> ArtifactUrl; 289 sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath); 290 291 // Perform the HTTP request and if successful, write the response body to 292 // the cache. 293 { 294 StreamedHTTPResponseHandler Handler( 295 [&]() { return CacheAddStream(Task, ""); }, Client); 296 HTTPRequest Request(ArtifactUrl); 297 Request.Headers = getHeaders(); 298 Error Err = Client.perform(Request, Handler); 299 if (Err) 300 return std::move(Err); 301 302 unsigned Code = Client.responseCode(); 303 if (Code && Code != 200) 304 continue; 305 } 306 307 Expected<CachePruningPolicy> PruningPolicyOrErr = 308 parseCachePruningPolicy(std::getenv("DEBUGINFOD_CACHE_POLICY")); 309 if (!PruningPolicyOrErr) 310 return PruningPolicyOrErr.takeError(); 311 pruneCache(CacheDirectoryPath, *PruningPolicyOrErr); 312 313 // Return the path to the artifact on disk. 314 return std::string(AbsCachedArtifactPath); 315 } 316 317 return createStringError(errc::argument_out_of_domain, "build id not found"); 318 } 319 320 DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message) 321 : Message(Message.str()) {} 322 323 void DebuginfodLog::push(const Twine &Message) { 324 push(DebuginfodLogEntry(Message)); 325 } 326 327 void DebuginfodLog::push(DebuginfodLogEntry Entry) { 328 { 329 std::lock_guard<std::mutex> Guard(QueueMutex); 330 LogEntryQueue.push(Entry); 331 } 332 QueueCondition.notify_one(); 333 } 334 335 DebuginfodLogEntry DebuginfodLog::pop() { 336 { 337 std::unique_lock<std::mutex> Guard(QueueMutex); 338 // Wait for messages to be pushed into the queue. 339 QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); }); 340 } 341 std::lock_guard<std::mutex> Guard(QueueMutex); 342 if (!LogEntryQueue.size()) 343 llvm_unreachable("Expected message in the queue."); 344 345 DebuginfodLogEntry Entry = LogEntryQueue.front(); 346 LogEntryQueue.pop(); 347 return Entry; 348 } 349 350 DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef, 351 DebuginfodLog &Log, ThreadPool &Pool, 352 double MinInterval) 353 : Log(Log), Pool(Pool), MinInterval(MinInterval) { 354 for (StringRef Path : PathsRef) 355 Paths.push_back(Path.str()); 356 } 357 358 Error DebuginfodCollection::update() { 359 std::lock_guard<sys::Mutex> Guard(UpdateMutex); 360 if (UpdateTimer.isRunning()) 361 UpdateTimer.stopTimer(); 362 UpdateTimer.clear(); 363 for (const std::string &Path : Paths) { 364 Log.push("Updating binaries at path " + Path); 365 if (Error Err = findBinaries(Path)) 366 return Err; 367 } 368 Log.push("Updated collection"); 369 UpdateTimer.startTimer(); 370 return Error::success(); 371 } 372 373 Expected<bool> DebuginfodCollection::updateIfStale() { 374 if (!UpdateTimer.isRunning()) 375 return false; 376 UpdateTimer.stopTimer(); 377 double Time = UpdateTimer.getTotalTime().getWallTime(); 378 UpdateTimer.startTimer(); 379 if (Time < MinInterval) 380 return false; 381 if (Error Err = update()) 382 return std::move(Err); 383 return true; 384 } 385 386 Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) { 387 while (true) { 388 if (Error Err = update()) 389 return Err; 390 std::this_thread::sleep_for(Interval); 391 } 392 llvm_unreachable("updateForever loop should never end"); 393 } 394 395 static bool hasELFMagic(StringRef FilePath) { 396 file_magic Type; 397 std::error_code EC = identify_magic(FilePath, Type); 398 if (EC) 399 return false; 400 switch (Type) { 401 case file_magic::elf: 402 case file_magic::elf_relocatable: 403 case file_magic::elf_executable: 404 case file_magic::elf_shared_object: 405 case file_magic::elf_core: 406 return true; 407 default: 408 return false; 409 } 410 } 411 412 Error DebuginfodCollection::findBinaries(StringRef Path) { 413 std::error_code EC; 414 sys::fs::recursive_directory_iterator I(Twine(Path), EC), E; 415 std::mutex IteratorMutex; 416 ThreadPoolTaskGroup IteratorGroup(Pool); 417 for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount(); 418 WorkerIndex++) { 419 IteratorGroup.async([&, this]() -> void { 420 std::string FilePath; 421 while (true) { 422 { 423 // Check if iteration is over or there is an error during iteration 424 std::lock_guard<std::mutex> Guard(IteratorMutex); 425 if (I == E || EC) 426 return; 427 // Grab a file path from the directory iterator and advance the 428 // iterator. 429 FilePath = I->path(); 430 I.increment(EC); 431 } 432 433 // Inspect the file at this path to determine if it is debuginfo. 434 if (!hasELFMagic(FilePath)) 435 continue; 436 437 Expected<object::OwningBinary<object::Binary>> BinOrErr = 438 object::createBinary(FilePath); 439 440 if (!BinOrErr) { 441 consumeError(BinOrErr.takeError()); 442 continue; 443 } 444 object::Binary *Bin = std::move(BinOrErr.get().getBinary()); 445 if (!Bin->isObject()) 446 continue; 447 448 // TODO: Support non-ELF binaries 449 object::ELFObjectFileBase *Object = 450 dyn_cast<object::ELFObjectFileBase>(Bin); 451 if (!Object) 452 continue; 453 454 BuildIDRef ID = getBuildID(Object); 455 if (ID.empty()) 456 continue; 457 458 std::string IDString = buildIDToString(ID); 459 if (Object->hasDebugInfo()) { 460 std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex); 461 (void)DebugBinaries.try_emplace(IDString, std::move(FilePath)); 462 } else { 463 std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex); 464 (void)Binaries.try_emplace(IDString, std::move(FilePath)); 465 } 466 } 467 }); 468 } 469 IteratorGroup.wait(); 470 std::unique_lock<std::mutex> Guard(IteratorMutex); 471 if (EC) 472 return errorCodeToError(EC); 473 return Error::success(); 474 } 475 476 Expected<std::optional<std::string>> 477 DebuginfodCollection::getBinaryPath(BuildIDRef ID) { 478 Log.push("getting binary path of ID " + buildIDToString(ID)); 479 std::shared_lock<sys::RWMutex> Guard(BinariesMutex); 480 auto Loc = Binaries.find(buildIDToString(ID)); 481 if (Loc != Binaries.end()) { 482 std::string Path = Loc->getValue(); 483 return Path; 484 } 485 return std::nullopt; 486 } 487 488 Expected<std::optional<std::string>> 489 DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) { 490 Log.push("getting debug binary path of ID " + buildIDToString(ID)); 491 std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex); 492 auto Loc = DebugBinaries.find(buildIDToString(ID)); 493 if (Loc != DebugBinaries.end()) { 494 std::string Path = Loc->getValue(); 495 return Path; 496 } 497 return std::nullopt; 498 } 499 500 Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) { 501 { 502 // Check collection; perform on-demand update if stale. 503 Expected<std::optional<std::string>> PathOrErr = getBinaryPath(ID); 504 if (!PathOrErr) 505 return PathOrErr.takeError(); 506 std::optional<std::string> Path = *PathOrErr; 507 if (!Path) { 508 Expected<bool> UpdatedOrErr = updateIfStale(); 509 if (!UpdatedOrErr) 510 return UpdatedOrErr.takeError(); 511 if (*UpdatedOrErr) { 512 // Try once more. 513 PathOrErr = getBinaryPath(ID); 514 if (!PathOrErr) 515 return PathOrErr.takeError(); 516 Path = *PathOrErr; 517 } 518 } 519 if (Path) 520 return *Path; 521 } 522 523 // Try federation. 524 Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID); 525 if (!PathOrErr) 526 consumeError(PathOrErr.takeError()); 527 528 // Fall back to debug binary. 529 return findDebugBinaryPath(ID); 530 } 531 532 Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) { 533 // Check collection; perform on-demand update if stale. 534 Expected<std::optional<std::string>> PathOrErr = getDebugBinaryPath(ID); 535 if (!PathOrErr) 536 return PathOrErr.takeError(); 537 std::optional<std::string> Path = *PathOrErr; 538 if (!Path) { 539 Expected<bool> UpdatedOrErr = updateIfStale(); 540 if (!UpdatedOrErr) 541 return UpdatedOrErr.takeError(); 542 if (*UpdatedOrErr) { 543 // Try once more. 544 PathOrErr = getBinaryPath(ID); 545 if (!PathOrErr) 546 return PathOrErr.takeError(); 547 Path = *PathOrErr; 548 } 549 } 550 if (Path) 551 return *Path; 552 553 // Try federation. 554 return getCachedOrDownloadDebuginfo(ID); 555 } 556 557 DebuginfodServer::DebuginfodServer(DebuginfodLog &Log, 558 DebuginfodCollection &Collection) 559 : Log(Log), Collection(Collection) { 560 cantFail( 561 Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) { 562 Log.push("GET " + Request.UrlPath); 563 std::string IDString; 564 if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) { 565 Request.setResponse( 566 {404, "text/plain", "Build ID is not a hex string\n"}); 567 return; 568 } 569 object::BuildID ID(IDString.begin(), IDString.end()); 570 Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID); 571 if (Error Err = PathOrErr.takeError()) { 572 consumeError(std::move(Err)); 573 Request.setResponse({404, "text/plain", "Build ID not found\n"}); 574 return; 575 } 576 streamFile(Request, *PathOrErr); 577 })); 578 cantFail( 579 Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) { 580 Log.push("GET " + Request.UrlPath); 581 std::string IDString; 582 if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) { 583 Request.setResponse( 584 {404, "text/plain", "Build ID is not a hex string\n"}); 585 return; 586 } 587 object::BuildID ID(IDString.begin(), IDString.end()); 588 Expected<std::string> PathOrErr = Collection.findBinaryPath(ID); 589 if (Error Err = PathOrErr.takeError()) { 590 consumeError(std::move(Err)); 591 Request.setResponse({404, "text/plain", "Build ID not found\n"}); 592 return; 593 } 594 streamFile(Request, *PathOrErr); 595 })); 596 } 597 598 } // namespace llvm 599