1 //===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// 11 /// This file contains several definitions for the debuginfod client and server. 12 /// For the client, this file defines the fetchInfo function. For the server, 13 /// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as 14 /// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo 15 /// function retrieves any of the three supported artifact types: (executable, 16 /// debuginfo, source file) associated with a build-id from debuginfod servers. 17 /// If a source file is to be fetched, its absolute path must be specified in 18 /// the Description argument to fetchInfo. The DebuginfodLogEntry, 19 /// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to 20 /// scan the local filesystem for binaries and serve the debuginfod protocol. 21 /// 22 //===----------------------------------------------------------------------===// 23 24 #include "llvm/Debuginfod/Debuginfod.h" 25 #include "llvm/ADT/StringExtras.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/BinaryFormat/Magic.h" 28 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 29 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 30 #include "llvm/Debuginfod/HTTPClient.h" 31 #include "llvm/Object/BuildID.h" 32 #include "llvm/Object/ELFObjectFile.h" 33 #include "llvm/Support/CachePruning.h" 34 #include "llvm/Support/Caching.h" 35 #include "llvm/Support/Errc.h" 36 #include "llvm/Support/Error.h" 37 #include "llvm/Support/FileUtilities.h" 38 #include "llvm/Support/MemoryBuffer.h" 39 #include "llvm/Support/Path.h" 40 #include "llvm/Support/ThreadPool.h" 41 #include "llvm/Support/xxhash.h" 42 43 #include <atomic> 44 #include <optional> 45 #include <thread> 46 47 namespace llvm { 48 49 using llvm::object::BuildIDRef; 50 51 namespace { 52 std::optional<SmallVector<StringRef>> DebuginfodUrls; 53 // Many Readers/Single Writer lock protecting the global debuginfod URL list. 54 llvm::sys::RWMutex UrlsMutex; 55 } // namespace 56 57 std::string getDebuginfodCacheKey(llvm::StringRef S) { 58 return utostr(xxh3_64bits(S)); 59 } 60 61 // Returns a binary BuildID as a normalized hex string. 62 // Uses lowercase for compatibility with common debuginfod servers. 63 static std::string buildIDToString(BuildIDRef ID) { 64 return llvm::toHex(ID, /*LowerCase=*/true); 65 } 66 67 bool canUseDebuginfod() { 68 return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty(); 69 } 70 71 SmallVector<StringRef> getDefaultDebuginfodUrls() { 72 std::shared_lock<llvm::sys::RWMutex> ReadGuard(UrlsMutex); 73 if (!DebuginfodUrls) { 74 // Only read from the environment variable if the user hasn't already 75 // set the value. 76 ReadGuard.unlock(); 77 std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex); 78 DebuginfodUrls = SmallVector<StringRef>(); 79 if (const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS")) { 80 StringRef(DebuginfodUrlsEnv) 81 .split(DebuginfodUrls.value(), " ", -1, false); 82 } 83 WriteGuard.unlock(); 84 ReadGuard.lock(); 85 } 86 return DebuginfodUrls.value(); 87 } 88 89 // Set the default debuginfod URL list, override the environment variable. 90 void setDefaultDebuginfodUrls(const SmallVector<StringRef> &URLs) { 91 std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex); 92 DebuginfodUrls = URLs; 93 } 94 95 /// Finds a default local file caching directory for the debuginfod client, 96 /// first checking DEBUGINFOD_CACHE_PATH. 97 Expected<std::string> getDefaultDebuginfodCacheDirectory() { 98 if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH")) 99 return CacheDirectoryEnv; 100 101 SmallString<64> CacheDirectory; 102 if (!sys::path::cache_directory(CacheDirectory)) 103 return createStringError( 104 errc::io_error, "Unable to determine appropriate cache directory."); 105 sys::path::append(CacheDirectory, "llvm-debuginfod", "client"); 106 return std::string(CacheDirectory); 107 } 108 109 std::chrono::milliseconds getDefaultDebuginfodTimeout() { 110 long Timeout; 111 const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT"); 112 if (DebuginfodTimeoutEnv && 113 to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10)) 114 return std::chrono::milliseconds(Timeout * 1000); 115 116 return std::chrono::milliseconds(90 * 1000); 117 } 118 119 /// The following functions fetch a debuginfod artifact to a file in a local 120 /// cache and return the cached file path. They first search the local cache, 121 /// followed by the debuginfod servers. 122 123 std::string getDebuginfodSourceUrlPath(BuildIDRef ID, 124 StringRef SourceFilePath) { 125 SmallString<64> UrlPath; 126 sys::path::append(UrlPath, sys::path::Style::posix, "buildid", 127 buildIDToString(ID), "source", 128 sys::path::convert_to_slash(SourceFilePath)); 129 return std::string(UrlPath); 130 } 131 132 Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID, 133 StringRef SourceFilePath) { 134 std::string UrlPath = getDebuginfodSourceUrlPath(ID, SourceFilePath); 135 return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath); 136 } 137 138 std::string getDebuginfodExecutableUrlPath(BuildIDRef ID) { 139 SmallString<64> UrlPath; 140 sys::path::append(UrlPath, sys::path::Style::posix, "buildid", 141 buildIDToString(ID), "executable"); 142 return std::string(UrlPath); 143 } 144 145 Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) { 146 std::string UrlPath = getDebuginfodExecutableUrlPath(ID); 147 return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath); 148 } 149 150 std::string getDebuginfodDebuginfoUrlPath(BuildIDRef ID) { 151 SmallString<64> UrlPath; 152 sys::path::append(UrlPath, sys::path::Style::posix, "buildid", 153 buildIDToString(ID), "debuginfo"); 154 return std::string(UrlPath); 155 } 156 157 Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) { 158 std::string UrlPath = getDebuginfodDebuginfoUrlPath(ID); 159 return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath); 160 } 161 162 // General fetching function. 163 Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey, 164 StringRef UrlPath) { 165 SmallString<10> CacheDir; 166 167 Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory(); 168 if (!CacheDirOrErr) 169 return CacheDirOrErr.takeError(); 170 CacheDir = *CacheDirOrErr; 171 172 return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir, 173 getDefaultDebuginfodUrls(), 174 getDefaultDebuginfodTimeout()); 175 } 176 177 namespace { 178 179 /// A simple handler which streams the returned data to a cache file. The cache 180 /// file is only created if a 200 OK status is observed. 181 class StreamedHTTPResponseHandler : public HTTPResponseHandler { 182 using CreateStreamFn = 183 std::function<Expected<std::unique_ptr<CachedFileStream>>()>; 184 CreateStreamFn CreateStream; 185 HTTPClient &Client; 186 std::unique_ptr<CachedFileStream> FileStream; 187 188 public: 189 StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client) 190 : CreateStream(CreateStream), Client(Client) {} 191 virtual ~StreamedHTTPResponseHandler() = default; 192 193 Error handleBodyChunk(StringRef BodyChunk) override; 194 }; 195 196 } // namespace 197 198 Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) { 199 if (!FileStream) { 200 unsigned Code = Client.responseCode(); 201 if (Code && Code != 200) 202 return Error::success(); 203 Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError = 204 CreateStream(); 205 if (!FileStreamOrError) 206 return FileStreamOrError.takeError(); 207 FileStream = std::move(*FileStreamOrError); 208 } 209 *FileStream->OS << BodyChunk; 210 return Error::success(); 211 } 212 213 // An over-accepting simplification of the HTTP RFC 7230 spec. 214 static bool isHeader(StringRef S) { 215 StringRef Name; 216 StringRef Value; 217 std::tie(Name, Value) = S.split(':'); 218 if (Name.empty() || Value.empty()) 219 return false; 220 return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) && 221 all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; }); 222 } 223 224 static SmallVector<std::string, 0> getHeaders() { 225 const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE"); 226 if (!Filename) 227 return {}; 228 ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile = 229 MemoryBuffer::getFile(Filename, /*IsText=*/true); 230 if (!HeadersFile) 231 return {}; 232 233 SmallVector<std::string, 0> Headers; 234 uint64_t LineNumber = 0; 235 for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) { 236 LineNumber++; 237 if (!Line.empty() && Line.back() == '\r') 238 Line = Line.drop_back(); 239 if (!isHeader(Line)) { 240 if (!all_of(Line, llvm::isSpace)) 241 WithColor::warning() 242 << "could not parse debuginfod header: " << Filename << ':' 243 << LineNumber << '\n'; 244 continue; 245 } 246 Headers.emplace_back(Line); 247 } 248 return Headers; 249 } 250 251 Expected<std::string> getCachedOrDownloadArtifact( 252 StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath, 253 ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) { 254 SmallString<64> AbsCachedArtifactPath; 255 sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath, 256 "llvmcache-" + UniqueKey); 257 258 Expected<FileCache> CacheOrErr = 259 localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath); 260 if (!CacheOrErr) 261 return CacheOrErr.takeError(); 262 263 FileCache Cache = *CacheOrErr; 264 // We choose an arbitrary Task parameter as we do not make use of it. 265 unsigned Task = 0; 266 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, ""); 267 if (!CacheAddStreamOrErr) 268 return CacheAddStreamOrErr.takeError(); 269 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr; 270 if (!CacheAddStream) 271 return std::string(AbsCachedArtifactPath); 272 // The artifact was not found in the local cache, query the debuginfod 273 // servers. 274 if (!HTTPClient::isAvailable()) 275 return createStringError(errc::io_error, 276 "No working HTTP client is available."); 277 278 if (!HTTPClient::IsInitialized) 279 return createStringError( 280 errc::io_error, 281 "A working HTTP client is available, but it is not initialized. To " 282 "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() " 283 "at the beginning of main."); 284 285 HTTPClient Client; 286 Client.setTimeout(Timeout); 287 for (StringRef ServerUrl : DebuginfodUrls) { 288 SmallString<64> ArtifactUrl; 289 sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath); 290 291 // Perform the HTTP request and if successful, write the response body to 292 // the cache. 293 { 294 StreamedHTTPResponseHandler Handler( 295 [&]() { return CacheAddStream(Task, ""); }, Client); 296 HTTPRequest Request(ArtifactUrl); 297 Request.Headers = getHeaders(); 298 Error Err = Client.perform(Request, Handler); 299 if (Err) 300 return std::move(Err); 301 302 unsigned Code = Client.responseCode(); 303 if (Code && Code != 200) 304 continue; 305 } 306 307 Expected<CachePruningPolicy> PruningPolicyOrErr = 308 parseCachePruningPolicy(std::getenv("DEBUGINFOD_CACHE_POLICY")); 309 if (!PruningPolicyOrErr) 310 return PruningPolicyOrErr.takeError(); 311 pruneCache(CacheDirectoryPath, *PruningPolicyOrErr); 312 313 // Return the path to the artifact on disk. 314 return std::string(AbsCachedArtifactPath); 315 } 316 317 return createStringError(errc::argument_out_of_domain, "build id not found"); 318 } 319 320 DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message) 321 : Message(Message.str()) {} 322 323 void DebuginfodLog::push(const Twine &Message) { 324 push(DebuginfodLogEntry(Message)); 325 } 326 327 void DebuginfodLog::push(DebuginfodLogEntry Entry) { 328 { 329 std::lock_guard<std::mutex> Guard(QueueMutex); 330 LogEntryQueue.push(Entry); 331 } 332 QueueCondition.notify_one(); 333 } 334 335 DebuginfodLogEntry DebuginfodLog::pop() { 336 { 337 std::unique_lock<std::mutex> Guard(QueueMutex); 338 // Wait for messages to be pushed into the queue. 339 QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); }); 340 } 341 std::lock_guard<std::mutex> Guard(QueueMutex); 342 if (!LogEntryQueue.size()) 343 llvm_unreachable("Expected message in the queue."); 344 345 DebuginfodLogEntry Entry = LogEntryQueue.front(); 346 LogEntryQueue.pop(); 347 return Entry; 348 } 349 350 DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef, 351 DebuginfodLog &Log, 352 ThreadPoolInterface &Pool, 353 double MinInterval) 354 : Log(Log), Pool(Pool), MinInterval(MinInterval) { 355 for (StringRef Path : PathsRef) 356 Paths.push_back(Path.str()); 357 } 358 359 Error DebuginfodCollection::update() { 360 std::lock_guard<sys::Mutex> Guard(UpdateMutex); 361 if (UpdateTimer.isRunning()) 362 UpdateTimer.stopTimer(); 363 UpdateTimer.clear(); 364 for (const std::string &Path : Paths) { 365 Log.push("Updating binaries at path " + Path); 366 if (Error Err = findBinaries(Path)) 367 return Err; 368 } 369 Log.push("Updated collection"); 370 UpdateTimer.startTimer(); 371 return Error::success(); 372 } 373 374 Expected<bool> DebuginfodCollection::updateIfStale() { 375 if (!UpdateTimer.isRunning()) 376 return false; 377 UpdateTimer.stopTimer(); 378 double Time = UpdateTimer.getTotalTime().getWallTime(); 379 UpdateTimer.startTimer(); 380 if (Time < MinInterval) 381 return false; 382 if (Error Err = update()) 383 return std::move(Err); 384 return true; 385 } 386 387 Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) { 388 while (true) { 389 if (Error Err = update()) 390 return Err; 391 std::this_thread::sleep_for(Interval); 392 } 393 llvm_unreachable("updateForever loop should never end"); 394 } 395 396 static bool hasELFMagic(StringRef FilePath) { 397 file_magic Type; 398 std::error_code EC = identify_magic(FilePath, Type); 399 if (EC) 400 return false; 401 switch (Type) { 402 case file_magic::elf: 403 case file_magic::elf_relocatable: 404 case file_magic::elf_executable: 405 case file_magic::elf_shared_object: 406 case file_magic::elf_core: 407 return true; 408 default: 409 return false; 410 } 411 } 412 413 Error DebuginfodCollection::findBinaries(StringRef Path) { 414 std::error_code EC; 415 sys::fs::recursive_directory_iterator I(Twine(Path), EC), E; 416 std::mutex IteratorMutex; 417 ThreadPoolTaskGroup IteratorGroup(Pool); 418 for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getMaxConcurrency(); 419 WorkerIndex++) { 420 IteratorGroup.async([&, this]() -> void { 421 std::string FilePath; 422 while (true) { 423 { 424 // Check if iteration is over or there is an error during iteration 425 std::lock_guard<std::mutex> Guard(IteratorMutex); 426 if (I == E || EC) 427 return; 428 // Grab a file path from the directory iterator and advance the 429 // iterator. 430 FilePath = I->path(); 431 I.increment(EC); 432 } 433 434 // Inspect the file at this path to determine if it is debuginfo. 435 if (!hasELFMagic(FilePath)) 436 continue; 437 438 Expected<object::OwningBinary<object::Binary>> BinOrErr = 439 object::createBinary(FilePath); 440 441 if (!BinOrErr) { 442 consumeError(BinOrErr.takeError()); 443 continue; 444 } 445 object::Binary *Bin = std::move(BinOrErr.get().getBinary()); 446 if (!Bin->isObject()) 447 continue; 448 449 // TODO: Support non-ELF binaries 450 object::ELFObjectFileBase *Object = 451 dyn_cast<object::ELFObjectFileBase>(Bin); 452 if (!Object) 453 continue; 454 455 BuildIDRef ID = getBuildID(Object); 456 if (ID.empty()) 457 continue; 458 459 std::string IDString = buildIDToString(ID); 460 if (Object->hasDebugInfo()) { 461 std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex); 462 (void)DebugBinaries.try_emplace(IDString, std::move(FilePath)); 463 } else { 464 std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex); 465 (void)Binaries.try_emplace(IDString, std::move(FilePath)); 466 } 467 } 468 }); 469 } 470 IteratorGroup.wait(); 471 std::unique_lock<std::mutex> Guard(IteratorMutex); 472 if (EC) 473 return errorCodeToError(EC); 474 return Error::success(); 475 } 476 477 Expected<std::optional<std::string>> 478 DebuginfodCollection::getBinaryPath(BuildIDRef ID) { 479 Log.push("getting binary path of ID " + buildIDToString(ID)); 480 std::shared_lock<sys::RWMutex> Guard(BinariesMutex); 481 auto Loc = Binaries.find(buildIDToString(ID)); 482 if (Loc != Binaries.end()) { 483 std::string Path = Loc->getValue(); 484 return Path; 485 } 486 return std::nullopt; 487 } 488 489 Expected<std::optional<std::string>> 490 DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) { 491 Log.push("getting debug binary path of ID " + buildIDToString(ID)); 492 std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex); 493 auto Loc = DebugBinaries.find(buildIDToString(ID)); 494 if (Loc != DebugBinaries.end()) { 495 std::string Path = Loc->getValue(); 496 return Path; 497 } 498 return std::nullopt; 499 } 500 501 Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) { 502 { 503 // Check collection; perform on-demand update if stale. 504 Expected<std::optional<std::string>> PathOrErr = getBinaryPath(ID); 505 if (!PathOrErr) 506 return PathOrErr.takeError(); 507 std::optional<std::string> Path = *PathOrErr; 508 if (!Path) { 509 Expected<bool> UpdatedOrErr = updateIfStale(); 510 if (!UpdatedOrErr) 511 return UpdatedOrErr.takeError(); 512 if (*UpdatedOrErr) { 513 // Try once more. 514 PathOrErr = getBinaryPath(ID); 515 if (!PathOrErr) 516 return PathOrErr.takeError(); 517 Path = *PathOrErr; 518 } 519 } 520 if (Path) 521 return *Path; 522 } 523 524 // Try federation. 525 Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID); 526 if (!PathOrErr) 527 consumeError(PathOrErr.takeError()); 528 529 // Fall back to debug binary. 530 return findDebugBinaryPath(ID); 531 } 532 533 Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) { 534 // Check collection; perform on-demand update if stale. 535 Expected<std::optional<std::string>> PathOrErr = getDebugBinaryPath(ID); 536 if (!PathOrErr) 537 return PathOrErr.takeError(); 538 std::optional<std::string> Path = *PathOrErr; 539 if (!Path) { 540 Expected<bool> UpdatedOrErr = updateIfStale(); 541 if (!UpdatedOrErr) 542 return UpdatedOrErr.takeError(); 543 if (*UpdatedOrErr) { 544 // Try once more. 545 PathOrErr = getBinaryPath(ID); 546 if (!PathOrErr) 547 return PathOrErr.takeError(); 548 Path = *PathOrErr; 549 } 550 } 551 if (Path) 552 return *Path; 553 554 // Try federation. 555 return getCachedOrDownloadDebuginfo(ID); 556 } 557 558 DebuginfodServer::DebuginfodServer(DebuginfodLog &Log, 559 DebuginfodCollection &Collection) 560 : Log(Log), Collection(Collection) { 561 cantFail( 562 Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) { 563 Log.push("GET " + Request.UrlPath); 564 std::string IDString; 565 if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) { 566 Request.setResponse( 567 {404, "text/plain", "Build ID is not a hex string\n"}); 568 return; 569 } 570 object::BuildID ID(IDString.begin(), IDString.end()); 571 Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID); 572 if (Error Err = PathOrErr.takeError()) { 573 consumeError(std::move(Err)); 574 Request.setResponse({404, "text/plain", "Build ID not found\n"}); 575 return; 576 } 577 streamFile(Request, *PathOrErr); 578 })); 579 cantFail( 580 Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) { 581 Log.push("GET " + Request.UrlPath); 582 std::string IDString; 583 if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) { 584 Request.setResponse( 585 {404, "text/plain", "Build ID is not a hex string\n"}); 586 return; 587 } 588 object::BuildID ID(IDString.begin(), IDString.end()); 589 Expected<std::string> PathOrErr = Collection.findBinaryPath(ID); 590 if (Error Err = PathOrErr.takeError()) { 591 consumeError(std::move(Err)); 592 Request.setResponse({404, "text/plain", "Build ID not found\n"}); 593 return; 594 } 595 streamFile(Request, *PathOrErr); 596 })); 597 } 598 599 } // namespace llvm 600