1 //===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// 11 /// This file contains several definitions for the debuginfod client and server. 12 /// For the client, this file defines the fetchInfo function. For the server, 13 /// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as 14 /// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo 15 /// function retrieves any of the three supported artifact types: (executable, 16 /// debuginfo, source file) associated with a build-id from debuginfod servers. 17 /// If a source file is to be fetched, its absolute path must be specified in 18 /// the Description argument to fetchInfo. The DebuginfodLogEntry, 19 /// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to 20 /// scan the local filesystem for binaries and serve the debuginfod protocol. 21 /// 22 //===----------------------------------------------------------------------===// 23 24 #include "llvm/Debuginfod/Debuginfod.h" 25 #include "llvm/ADT/StringExtras.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/BinaryFormat/Magic.h" 28 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 29 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 30 #include "llvm/Debuginfod/HTTPClient.h" 31 #include "llvm/Object/BuildID.h" 32 #include "llvm/Object/ELFObjectFile.h" 33 #include "llvm/Support/CachePruning.h" 34 #include "llvm/Support/Caching.h" 35 #include "llvm/Support/Errc.h" 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/FileUtilities.h" 38 #include "llvm/Support/MemoryBuffer.h" 39 #include "llvm/Support/Path.h" 40 #include "llvm/Support/ThreadPool.h" 41 #include "llvm/Support/xxhash.h" 42 43 #include <atomic> 44 #include <optional> 45 #include <thread> 46 47 namespace llvm { 48 49 using llvm::object::BuildIDRef; 50 51 namespace { 52 std::optional<SmallVector<StringRef>> DebuginfodUrls; 53 // Many Readers/Single Writer lock protecting the global debuginfod URL list. 54 llvm::sys::RWMutex UrlsMutex; 55 } // namespace 56 57 std::string getDebuginfodCacheKey(llvm::StringRef S) { 58 return utostr(xxh3_64bits(S)); 59 } 60 61 // Returns a binary BuildID as a normalized hex string. 62 // Uses lowercase for compatibility with common debuginfod servers. 63 static std::string buildIDToString(BuildIDRef ID) { 64 return llvm::toHex(ID, /*LowerCase=*/true); 65 } 66 67 bool canUseDebuginfod() { 68 return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty(); 69 } 70 71 SmallVector<StringRef> getDefaultDebuginfodUrls() { 72 std::shared_lock<llvm::sys::RWMutex> ReadGuard(UrlsMutex); 73 if (!DebuginfodUrls) { 74 // Only read from the environment variable if the user hasn't already 75 // set the value. 76 ReadGuard.unlock(); 77 std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex); 78 DebuginfodUrls = SmallVector<StringRef>(); 79 if (const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS")) { 80 StringRef(DebuginfodUrlsEnv) 81 .split(DebuginfodUrls.value(), " ", -1, false); 82 } 83 WriteGuard.unlock(); 84 ReadGuard.lock(); 85 } 86 return DebuginfodUrls.value(); 87 } 88 89 // Set the default debuginfod URL list, override the environment variable. 90 void setDefaultDebuginfodUrls(const SmallVector<StringRef> &URLs) { 91 std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex); 92 DebuginfodUrls = URLs; 93 } 94 95 /// Finds a default local file caching directory for the debuginfod client, 96 /// first checking DEBUGINFOD_CACHE_PATH. 97 Expected<std::string> getDefaultDebuginfodCacheDirectory() { 98 if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH")) 99 return CacheDirectoryEnv; 100 101 SmallString<64> CacheDirectory; 102 if (!sys::path::cache_directory(CacheDirectory)) 103 return createStringError( 104 errc::io_error, "Unable to determine appropriate cache directory."); 105 sys::path::append(CacheDirectory, "llvm-debuginfod", "client"); 106 return std::string(CacheDirectory); 107 } 108 109 std::chrono::milliseconds getDefaultDebuginfodTimeout() { 110 long Timeout; 111 const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT"); 112 if (DebuginfodTimeoutEnv && 113 to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10)) 114 return std::chrono::milliseconds(Timeout * 1000); 115 116 return std::chrono::milliseconds(90 * 1000); 117 } 118 119 /// The following functions fetch a debuginfod artifact to a file in a local 120 /// cache and return the cached file path. They first search the local cache, 121 /// followed by the debuginfod servers. 122 123 std::string getDebuginfodSourceUrlPath(BuildIDRef ID, 124 StringRef SourceFilePath) { 125 SmallString<64> UrlPath; 126 sys::path::append(UrlPath, sys::path::Style::posix, "buildid", 127 buildIDToString(ID), "source", 128 sys::path::convert_to_slash(SourceFilePath)); 129 return std::string(UrlPath); 130 } 131 132 Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID, 133 StringRef SourceFilePath) { 134 std::string UrlPath = getDebuginfodSourceUrlPath(ID, SourceFilePath); 135 return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath); 136 } 137 138 std::string getDebuginfodExecutableUrlPath(BuildIDRef ID) { 139 SmallString<64> UrlPath; 140 sys::path::append(UrlPath, sys::path::Style::posix, "buildid", 141 buildIDToString(ID), "executable"); 142 return std::string(UrlPath); 143 } 144 145 Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) { 146 std::string UrlPath = getDebuginfodExecutableUrlPath(ID); 147 return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath); 148 } 149 150 std::string getDebuginfodDebuginfoUrlPath(BuildIDRef ID) { 151 SmallString<64> UrlPath; 152 sys::path::append(UrlPath, sys::path::Style::posix, "buildid", 153 buildIDToString(ID), "debuginfo"); 154 return std::string(UrlPath); 155 } 156 157 Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) { 158 std::string UrlPath = getDebuginfodDebuginfoUrlPath(ID); 159 return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath); 160 } 161 162 // General fetching function. 163 Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey, 164 StringRef UrlPath) { 165 SmallString<10> CacheDir; 166 167 Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory(); 168 if (!CacheDirOrErr) 169 return CacheDirOrErr.takeError(); 170 CacheDir = *CacheDirOrErr; 171 172 return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir, 173 getDefaultDebuginfodUrls(), 174 getDefaultDebuginfodTimeout()); 175 } 176 177 namespace { 178 179 /// A simple handler which streams the returned data to a cache file. The cache 180 /// file is only created if a 200 OK status is observed. 181 class StreamedHTTPResponseHandler : public HTTPResponseHandler { 182 using CreateStreamFn = 183 std::function<Expected<std::unique_ptr<CachedFileStream>>()>; 184 CreateStreamFn CreateStream; 185 HTTPClient &Client; 186 std::unique_ptr<CachedFileStream> FileStream; 187 188 public: 189 StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client) 190 : CreateStream(CreateStream), Client(Client) {} 191 192 /// Must be called exactly once after the writes have been completed 193 /// but before the StreamedHTTPResponseHandler object is destroyed. 194 Error commit(); 195 196 virtual ~StreamedHTTPResponseHandler() = default; 197 198 Error handleBodyChunk(StringRef BodyChunk) override; 199 }; 200 201 } // namespace 202 203 Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) { 204 if (!FileStream) { 205 unsigned Code = Client.responseCode(); 206 if (Code && Code != 200) 207 return Error::success(); 208 Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError = 209 CreateStream(); 210 if (!FileStreamOrError) 211 return FileStreamOrError.takeError(); 212 FileStream = std::move(*FileStreamOrError); 213 } 214 *FileStream->OS << BodyChunk; 215 return Error::success(); 216 } 217 218 Error StreamedHTTPResponseHandler::commit() { 219 if (FileStream) 220 return FileStream->commit(); 221 return Error::success(); 222 } 223 224 // An over-accepting simplification of the HTTP RFC 7230 spec. 225 static bool isHeader(StringRef S) { 226 StringRef Name; 227 StringRef Value; 228 std::tie(Name, Value) = S.split(':'); 229 if (Name.empty() || Value.empty()) 230 return false; 231 return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) && 232 all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; }); 233 } 234 235 static SmallVector<std::string, 0> getHeaders() { 236 const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE"); 237 if (!Filename) 238 return {}; 239 ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile = 240 MemoryBuffer::getFile(Filename, /*IsText=*/true); 241 if (!HeadersFile) 242 return {}; 243 244 SmallVector<std::string, 0> Headers; 245 uint64_t LineNumber = 0; 246 for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) { 247 LineNumber++; 248 Line.consume_back("\r"); 249 if (!isHeader(Line)) { 250 if (!all_of(Line, llvm::isSpace)) 251 WithColor::warning() 252 << "could not parse debuginfod header: " << Filename << ':' 253 << LineNumber << '\n'; 254 continue; 255 } 256 Headers.emplace_back(Line); 257 } 258 return Headers; 259 } 260 261 Expected<std::string> getCachedOrDownloadArtifact( 262 StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath, 263 ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) { 264 SmallString<64> AbsCachedArtifactPath; 265 sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath, 266 "llvmcache-" + UniqueKey); 267 268 Expected<FileCache> CacheOrErr = 269 localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath); 270 if (!CacheOrErr) 271 return CacheOrErr.takeError(); 272 273 FileCache Cache = *CacheOrErr; 274 // We choose an arbitrary Task parameter as we do not make use of it. 275 unsigned Task = 0; 276 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, ""); 277 if (!CacheAddStreamOrErr) 278 return CacheAddStreamOrErr.takeError(); 279 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr; 280 if (!CacheAddStream) 281 return std::string(AbsCachedArtifactPath); 282 // The artifact was not found in the local cache, query the debuginfod 283 // servers. 284 if (!HTTPClient::isAvailable()) 285 return createStringError(errc::io_error, 286 "No working HTTP client is available."); 287 288 if (!HTTPClient::IsInitialized) 289 return createStringError( 290 errc::io_error, 291 "A working HTTP client is available, but it is not initialized. To " 292 "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() " 293 "at the beginning of main."); 294 295 HTTPClient Client; 296 Client.setTimeout(Timeout); 297 for (StringRef ServerUrl : DebuginfodUrls) { 298 SmallString<64> ArtifactUrl; 299 sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath); 300 301 // Perform the HTTP request and if successful, write the response body to 302 // the cache. 303 { 304 StreamedHTTPResponseHandler Handler( 305 [&]() { return CacheAddStream(Task, ""); }, Client); 306 HTTPRequest Request(ArtifactUrl); 307 Request.Headers = getHeaders(); 308 Error Err = Client.perform(Request, Handler); 309 if (Err) 310 return std::move(Err); 311 if ((Err = Handler.commit())) 312 return std::move(Err); 313 314 unsigned Code = Client.responseCode(); 315 if (Code && Code != 200) 316 continue; 317 } 318 319 Expected<CachePruningPolicy> PruningPolicyOrErr = 320 parseCachePruningPolicy(std::getenv("DEBUGINFOD_CACHE_POLICY")); 321 if (!PruningPolicyOrErr) 322 return PruningPolicyOrErr.takeError(); 323 pruneCache(CacheDirectoryPath, *PruningPolicyOrErr); 324 325 // Return the path to the artifact on disk. 326 return std::string(AbsCachedArtifactPath); 327 } 328 329 return createStringError(errc::argument_out_of_domain, "build id not found"); 330 } 331 332 DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message) 333 : Message(Message.str()) {} 334 335 void DebuginfodLog::push(const Twine &Message) { 336 push(DebuginfodLogEntry(Message)); 337 } 338 339 void DebuginfodLog::push(DebuginfodLogEntry Entry) { 340 { 341 std::lock_guard<std::mutex> Guard(QueueMutex); 342 LogEntryQueue.push(Entry); 343 } 344 QueueCondition.notify_one(); 345 } 346 347 DebuginfodLogEntry DebuginfodLog::pop() { 348 { 349 std::unique_lock<std::mutex> Guard(QueueMutex); 350 // Wait for messages to be pushed into the queue. 351 QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); }); 352 } 353 std::lock_guard<std::mutex> Guard(QueueMutex); 354 if (!LogEntryQueue.size()) 355 llvm_unreachable("Expected message in the queue."); 356 357 DebuginfodLogEntry Entry = LogEntryQueue.front(); 358 LogEntryQueue.pop(); 359 return Entry; 360 } 361 362 DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef, 363 DebuginfodLog &Log, 364 ThreadPoolInterface &Pool, 365 double MinInterval) 366 : Log(Log), Pool(Pool), MinInterval(MinInterval) { 367 for (StringRef Path : PathsRef) 368 Paths.push_back(Path.str()); 369 } 370 371 Error DebuginfodCollection::update() { 372 std::lock_guard<sys::Mutex> Guard(UpdateMutex); 373 if (UpdateTimer.isRunning()) 374 UpdateTimer.stopTimer(); 375 UpdateTimer.clear(); 376 for (const std::string &Path : Paths) { 377 Log.push("Updating binaries at path " + Path); 378 if (Error Err = findBinaries(Path)) 379 return Err; 380 } 381 Log.push("Updated collection"); 382 UpdateTimer.startTimer(); 383 return Error::success(); 384 } 385 386 Expected<bool> DebuginfodCollection::updateIfStale() { 387 if (!UpdateTimer.isRunning()) 388 return false; 389 UpdateTimer.stopTimer(); 390 double Time = UpdateTimer.getTotalTime().getWallTime(); 391 UpdateTimer.startTimer(); 392 if (Time < MinInterval) 393 return false; 394 if (Error Err = update()) 395 return std::move(Err); 396 return true; 397 } 398 399 Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) { 400 while (true) { 401 if (Error Err = update()) 402 return Err; 403 std::this_thread::sleep_for(Interval); 404 } 405 llvm_unreachable("updateForever loop should never end"); 406 } 407 408 static bool hasELFMagic(StringRef FilePath) { 409 file_magic Type; 410 std::error_code EC = identify_magic(FilePath, Type); 411 if (EC) 412 return false; 413 switch (Type) { 414 case file_magic::elf: 415 case file_magic::elf_relocatable: 416 case file_magic::elf_executable: 417 case file_magic::elf_shared_object: 418 case file_magic::elf_core: 419 return true; 420 default: 421 return false; 422 } 423 } 424 425 Error DebuginfodCollection::findBinaries(StringRef Path) { 426 std::error_code EC; 427 sys::fs::recursive_directory_iterator I(Twine(Path), EC), E; 428 std::mutex IteratorMutex; 429 ThreadPoolTaskGroup IteratorGroup(Pool); 430 for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getMaxConcurrency(); 431 WorkerIndex++) { 432 IteratorGroup.async([&, this]() -> void { 433 std::string FilePath; 434 while (true) { 435 { 436 // Check if iteration is over or there is an error during iteration 437 std::lock_guard<std::mutex> Guard(IteratorMutex); 438 if (I == E || EC) 439 return; 440 // Grab a file path from the directory iterator and advance the 441 // iterator. 442 FilePath = I->path(); 443 I.increment(EC); 444 } 445 446 // Inspect the file at this path to determine if it is debuginfo. 447 if (!hasELFMagic(FilePath)) 448 continue; 449 450 Expected<object::OwningBinary<object::Binary>> BinOrErr = 451 object::createBinary(FilePath); 452 453 if (!BinOrErr) { 454 consumeError(BinOrErr.takeError()); 455 continue; 456 } 457 object::Binary *Bin = std::move(BinOrErr.get().getBinary()); 458 if (!Bin->isObject()) 459 continue; 460 461 // TODO: Support non-ELF binaries 462 object::ELFObjectFileBase *Object = 463 dyn_cast<object::ELFObjectFileBase>(Bin); 464 if (!Object) 465 continue; 466 467 BuildIDRef ID = getBuildID(Object); 468 if (ID.empty()) 469 continue; 470 471 std::string IDString = buildIDToString(ID); 472 if (Object->hasDebugInfo()) { 473 std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex); 474 (void)DebugBinaries.try_emplace(IDString, std::move(FilePath)); 475 } else { 476 std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex); 477 (void)Binaries.try_emplace(IDString, std::move(FilePath)); 478 } 479 } 480 }); 481 } 482 IteratorGroup.wait(); 483 std::unique_lock<std::mutex> Guard(IteratorMutex); 484 if (EC) 485 return errorCodeToError(EC); 486 return Error::success(); 487 } 488 489 Expected<std::optional<std::string>> 490 DebuginfodCollection::getBinaryPath(BuildIDRef ID) { 491 Log.push("getting binary path of ID " + buildIDToString(ID)); 492 std::shared_lock<sys::RWMutex> Guard(BinariesMutex); 493 auto Loc = Binaries.find(buildIDToString(ID)); 494 if (Loc != Binaries.end()) { 495 std::string Path = Loc->getValue(); 496 return Path; 497 } 498 return std::nullopt; 499 } 500 501 Expected<std::optional<std::string>> 502 DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) { 503 Log.push("getting debug binary path of ID " + buildIDToString(ID)); 504 std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex); 505 auto Loc = DebugBinaries.find(buildIDToString(ID)); 506 if (Loc != DebugBinaries.end()) { 507 std::string Path = Loc->getValue(); 508 return Path; 509 } 510 return std::nullopt; 511 } 512 513 Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) { 514 { 515 // Check collection; perform on-demand update if stale. 516 Expected<std::optional<std::string>> PathOrErr = getBinaryPath(ID); 517 if (!PathOrErr) 518 return PathOrErr.takeError(); 519 std::optional<std::string> Path = *PathOrErr; 520 if (!Path) { 521 Expected<bool> UpdatedOrErr = updateIfStale(); 522 if (!UpdatedOrErr) 523 return UpdatedOrErr.takeError(); 524 if (*UpdatedOrErr) { 525 // Try once more. 526 PathOrErr = getBinaryPath(ID); 527 if (!PathOrErr) 528 return PathOrErr.takeError(); 529 Path = *PathOrErr; 530 } 531 } 532 if (Path) 533 return *Path; 534 } 535 536 // Try federation. 537 Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID); 538 if (!PathOrErr) 539 consumeError(PathOrErr.takeError()); 540 541 // Fall back to debug binary. 542 return findDebugBinaryPath(ID); 543 } 544 545 Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) { 546 // Check collection; perform on-demand update if stale. 547 Expected<std::optional<std::string>> PathOrErr = getDebugBinaryPath(ID); 548 if (!PathOrErr) 549 return PathOrErr.takeError(); 550 std::optional<std::string> Path = *PathOrErr; 551 if (!Path) { 552 Expected<bool> UpdatedOrErr = updateIfStale(); 553 if (!UpdatedOrErr) 554 return UpdatedOrErr.takeError(); 555 if (*UpdatedOrErr) { 556 // Try once more. 557 PathOrErr = getBinaryPath(ID); 558 if (!PathOrErr) 559 return PathOrErr.takeError(); 560 Path = *PathOrErr; 561 } 562 } 563 if (Path) 564 return *Path; 565 566 // Try federation. 567 return getCachedOrDownloadDebuginfo(ID); 568 } 569 570 DebuginfodServer::DebuginfodServer(DebuginfodLog &Log, 571 DebuginfodCollection &Collection) 572 : Log(Log), Collection(Collection) { 573 cantFail( 574 Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) { 575 Log.push("GET " + Request.UrlPath); 576 std::string IDString; 577 if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) { 578 Request.setResponse( 579 {404, "text/plain", "Build ID is not a hex string\n"}); 580 return; 581 } 582 object::BuildID ID(IDString.begin(), IDString.end()); 583 Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID); 584 if (Error Err = PathOrErr.takeError()) { 585 consumeError(std::move(Err)); 586 Request.setResponse({404, "text/plain", "Build ID not found\n"}); 587 return; 588 } 589 streamFile(Request, *PathOrErr); 590 })); 591 cantFail( 592 Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) { 593 Log.push("GET " + Request.UrlPath); 594 std::string IDString; 595 if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) { 596 Request.setResponse( 597 {404, "text/plain", "Build ID is not a hex string\n"}); 598 return; 599 } 600 object::BuildID ID(IDString.begin(), IDString.end()); 601 Expected<std::string> PathOrErr = Collection.findBinaryPath(ID); 602 if (Error Err = PathOrErr.takeError()) { 603 consumeError(std::move(Err)); 604 Request.setResponse({404, "text/plain", "Build ID not found\n"}); 605 return; 606 } 607 streamFile(Request, *PathOrErr); 608 })); 609 } 610 611 } // namespace llvm 612