xref: /freebsd/contrib/llvm-project/llvm/lib/Debuginfod/Debuginfod.cpp (revision 96190b4fef3b4a0cc3ca0606b0c4e3e69a5e6717)
1 //===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 ///
11 /// This file contains several definitions for the debuginfod client and server.
12 /// For the client, this file defines the fetchInfo function. For the server,
13 /// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as
14 /// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo
15 /// function retrieves any of the three supported artifact types: (executable,
16 /// debuginfo, source file) associated with a build-id from debuginfod servers.
17 /// If a source file is to be fetched, its absolute path must be specified in
18 /// the Description argument to fetchInfo. The DebuginfodLogEntry,
19 /// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to
20 /// scan the local filesystem for binaries and serve the debuginfod protocol.
21 ///
22 //===----------------------------------------------------------------------===//
23 
24 #include "llvm/Debuginfod/Debuginfod.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/BinaryFormat/Magic.h"
28 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
29 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
30 #include "llvm/Debuginfod/HTTPClient.h"
31 #include "llvm/Object/BuildID.h"
32 #include "llvm/Object/ELFObjectFile.h"
33 #include "llvm/Support/CachePruning.h"
34 #include "llvm/Support/Caching.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/FileUtilities.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/Path.h"
40 #include "llvm/Support/ThreadPool.h"
41 #include "llvm/Support/xxhash.h"
42 
43 #include <atomic>
44 #include <optional>
45 #include <thread>
46 
47 namespace llvm {
48 
49 using llvm::object::BuildIDRef;
50 
51 namespace {
52 std::optional<SmallVector<StringRef>> DebuginfodUrls;
53 // Many Readers/Single Writer lock protecting the global debuginfod URL list.
54 llvm::sys::RWMutex UrlsMutex;
55 } // namespace
56 
57 std::string getDebuginfodCacheKey(llvm::StringRef S) {
58   return utostr(xxh3_64bits(S));
59 }
60 
61 // Returns a binary BuildID as a normalized hex string.
62 // Uses lowercase for compatibility with common debuginfod servers.
63 static std::string buildIDToString(BuildIDRef ID) {
64   return llvm::toHex(ID, /*LowerCase=*/true);
65 }
66 
67 bool canUseDebuginfod() {
68   return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty();
69 }
70 
71 SmallVector<StringRef> getDefaultDebuginfodUrls() {
72   std::shared_lock<llvm::sys::RWMutex> ReadGuard(UrlsMutex);
73   if (!DebuginfodUrls) {
74     // Only read from the environment variable if the user hasn't already
75     // set the value.
76     ReadGuard.unlock();
77     std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);
78     DebuginfodUrls = SmallVector<StringRef>();
79     if (const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS")) {
80       StringRef(DebuginfodUrlsEnv)
81           .split(DebuginfodUrls.value(), " ", -1, false);
82     }
83     WriteGuard.unlock();
84     ReadGuard.lock();
85   }
86   return DebuginfodUrls.value();
87 }
88 
89 // Set the default debuginfod URL list, override the environment variable.
90 void setDefaultDebuginfodUrls(const SmallVector<StringRef> &URLs) {
91   std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);
92   DebuginfodUrls = URLs;
93 }
94 
95 /// Finds a default local file caching directory for the debuginfod client,
96 /// first checking DEBUGINFOD_CACHE_PATH.
97 Expected<std::string> getDefaultDebuginfodCacheDirectory() {
98   if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))
99     return CacheDirectoryEnv;
100 
101   SmallString<64> CacheDirectory;
102   if (!sys::path::cache_directory(CacheDirectory))
103     return createStringError(
104         errc::io_error, "Unable to determine appropriate cache directory.");
105   sys::path::append(CacheDirectory, "llvm-debuginfod", "client");
106   return std::string(CacheDirectory);
107 }
108 
109 std::chrono::milliseconds getDefaultDebuginfodTimeout() {
110   long Timeout;
111   const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT");
112   if (DebuginfodTimeoutEnv &&
113       to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10))
114     return std::chrono::milliseconds(Timeout * 1000);
115 
116   return std::chrono::milliseconds(90 * 1000);
117 }
118 
119 /// The following functions fetch a debuginfod artifact to a file in a local
120 /// cache and return the cached file path. They first search the local cache,
121 /// followed by the debuginfod servers.
122 
123 std::string getDebuginfodSourceUrlPath(BuildIDRef ID,
124                                        StringRef SourceFilePath) {
125   SmallString<64> UrlPath;
126   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
127                     buildIDToString(ID), "source",
128                     sys::path::convert_to_slash(SourceFilePath));
129   return std::string(UrlPath);
130 }
131 
132 Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID,
133                                                 StringRef SourceFilePath) {
134   std::string UrlPath = getDebuginfodSourceUrlPath(ID, SourceFilePath);
135   return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath);
136 }
137 
138 std::string getDebuginfodExecutableUrlPath(BuildIDRef ID) {
139   SmallString<64> UrlPath;
140   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
141                     buildIDToString(ID), "executable");
142   return std::string(UrlPath);
143 }
144 
145 Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) {
146   std::string UrlPath = getDebuginfodExecutableUrlPath(ID);
147   return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath);
148 }
149 
150 std::string getDebuginfodDebuginfoUrlPath(BuildIDRef ID) {
151   SmallString<64> UrlPath;
152   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
153                     buildIDToString(ID), "debuginfo");
154   return std::string(UrlPath);
155 }
156 
157 Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) {
158   std::string UrlPath = getDebuginfodDebuginfoUrlPath(ID);
159   return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath);
160 }
161 
162 // General fetching function.
163 Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
164                                                   StringRef UrlPath) {
165   SmallString<10> CacheDir;
166 
167   Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory();
168   if (!CacheDirOrErr)
169     return CacheDirOrErr.takeError();
170   CacheDir = *CacheDirOrErr;
171 
172   return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir,
173                                      getDefaultDebuginfodUrls(),
174                                      getDefaultDebuginfodTimeout());
175 }
176 
177 namespace {
178 
179 /// A simple handler which streams the returned data to a cache file. The cache
180 /// file is only created if a 200 OK status is observed.
181 class StreamedHTTPResponseHandler : public HTTPResponseHandler {
182   using CreateStreamFn =
183       std::function<Expected<std::unique_ptr<CachedFileStream>>()>;
184   CreateStreamFn CreateStream;
185   HTTPClient &Client;
186   std::unique_ptr<CachedFileStream> FileStream;
187 
188 public:
189   StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client)
190       : CreateStream(CreateStream), Client(Client) {}
191   virtual ~StreamedHTTPResponseHandler() = default;
192 
193   Error handleBodyChunk(StringRef BodyChunk) override;
194 };
195 
196 } // namespace
197 
198 Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
199   if (!FileStream) {
200     unsigned Code = Client.responseCode();
201     if (Code && Code != 200)
202       return Error::success();
203     Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =
204         CreateStream();
205     if (!FileStreamOrError)
206       return FileStreamOrError.takeError();
207     FileStream = std::move(*FileStreamOrError);
208   }
209   *FileStream->OS << BodyChunk;
210   return Error::success();
211 }
212 
213 // An over-accepting simplification of the HTTP RFC 7230 spec.
214 static bool isHeader(StringRef S) {
215   StringRef Name;
216   StringRef Value;
217   std::tie(Name, Value) = S.split(':');
218   if (Name.empty() || Value.empty())
219     return false;
220   return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) &&
221          all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; });
222 }
223 
224 static SmallVector<std::string, 0> getHeaders() {
225   const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE");
226   if (!Filename)
227     return {};
228   ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile =
229       MemoryBuffer::getFile(Filename, /*IsText=*/true);
230   if (!HeadersFile)
231     return {};
232 
233   SmallVector<std::string, 0> Headers;
234   uint64_t LineNumber = 0;
235   for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) {
236     LineNumber++;
237     if (!Line.empty() && Line.back() == '\r')
238       Line = Line.drop_back();
239     if (!isHeader(Line)) {
240       if (!all_of(Line, llvm::isSpace))
241         WithColor::warning()
242             << "could not parse debuginfod header: " << Filename << ':'
243             << LineNumber << '\n';
244       continue;
245     }
246     Headers.emplace_back(Line);
247   }
248   return Headers;
249 }
250 
251 Expected<std::string> getCachedOrDownloadArtifact(
252     StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
253     ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
254   SmallString<64> AbsCachedArtifactPath;
255   sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath,
256                     "llvmcache-" + UniqueKey);
257 
258   Expected<FileCache> CacheOrErr =
259       localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath);
260   if (!CacheOrErr)
261     return CacheOrErr.takeError();
262 
263   FileCache Cache = *CacheOrErr;
264   // We choose an arbitrary Task parameter as we do not make use of it.
265   unsigned Task = 0;
266   Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, "");
267   if (!CacheAddStreamOrErr)
268     return CacheAddStreamOrErr.takeError();
269   AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
270   if (!CacheAddStream)
271     return std::string(AbsCachedArtifactPath);
272   // The artifact was not found in the local cache, query the debuginfod
273   // servers.
274   if (!HTTPClient::isAvailable())
275     return createStringError(errc::io_error,
276                              "No working HTTP client is available.");
277 
278   if (!HTTPClient::IsInitialized)
279     return createStringError(
280         errc::io_error,
281         "A working HTTP client is available, but it is not initialized. To "
282         "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() "
283         "at the beginning of main.");
284 
285   HTTPClient Client;
286   Client.setTimeout(Timeout);
287   for (StringRef ServerUrl : DebuginfodUrls) {
288     SmallString<64> ArtifactUrl;
289     sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);
290 
291     // Perform the HTTP request and if successful, write the response body to
292     // the cache.
293     {
294       StreamedHTTPResponseHandler Handler(
295           [&]() { return CacheAddStream(Task, ""); }, Client);
296       HTTPRequest Request(ArtifactUrl);
297       Request.Headers = getHeaders();
298       Error Err = Client.perform(Request, Handler);
299       if (Err)
300         return std::move(Err);
301 
302       unsigned Code = Client.responseCode();
303       if (Code && Code != 200)
304         continue;
305     }
306 
307     Expected<CachePruningPolicy> PruningPolicyOrErr =
308         parseCachePruningPolicy(std::getenv("DEBUGINFOD_CACHE_POLICY"));
309     if (!PruningPolicyOrErr)
310       return PruningPolicyOrErr.takeError();
311     pruneCache(CacheDirectoryPath, *PruningPolicyOrErr);
312 
313     // Return the path to the artifact on disk.
314     return std::string(AbsCachedArtifactPath);
315   }
316 
317   return createStringError(errc::argument_out_of_domain, "build id not found");
318 }
319 
320 DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)
321     : Message(Message.str()) {}
322 
323 void DebuginfodLog::push(const Twine &Message) {
324   push(DebuginfodLogEntry(Message));
325 }
326 
327 void DebuginfodLog::push(DebuginfodLogEntry Entry) {
328   {
329     std::lock_guard<std::mutex> Guard(QueueMutex);
330     LogEntryQueue.push(Entry);
331   }
332   QueueCondition.notify_one();
333 }
334 
335 DebuginfodLogEntry DebuginfodLog::pop() {
336   {
337     std::unique_lock<std::mutex> Guard(QueueMutex);
338     // Wait for messages to be pushed into the queue.
339     QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); });
340   }
341   std::lock_guard<std::mutex> Guard(QueueMutex);
342   if (!LogEntryQueue.size())
343     llvm_unreachable("Expected message in the queue.");
344 
345   DebuginfodLogEntry Entry = LogEntryQueue.front();
346   LogEntryQueue.pop();
347   return Entry;
348 }
349 
350 DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
351                                            DebuginfodLog &Log, ThreadPool &Pool,
352                                            double MinInterval)
353     : Log(Log), Pool(Pool), MinInterval(MinInterval) {
354   for (StringRef Path : PathsRef)
355     Paths.push_back(Path.str());
356 }
357 
358 Error DebuginfodCollection::update() {
359   std::lock_guard<sys::Mutex> Guard(UpdateMutex);
360   if (UpdateTimer.isRunning())
361     UpdateTimer.stopTimer();
362   UpdateTimer.clear();
363   for (const std::string &Path : Paths) {
364     Log.push("Updating binaries at path " + Path);
365     if (Error Err = findBinaries(Path))
366       return Err;
367   }
368   Log.push("Updated collection");
369   UpdateTimer.startTimer();
370   return Error::success();
371 }
372 
373 Expected<bool> DebuginfodCollection::updateIfStale() {
374   if (!UpdateTimer.isRunning())
375     return false;
376   UpdateTimer.stopTimer();
377   double Time = UpdateTimer.getTotalTime().getWallTime();
378   UpdateTimer.startTimer();
379   if (Time < MinInterval)
380     return false;
381   if (Error Err = update())
382     return std::move(Err);
383   return true;
384 }
385 
386 Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {
387   while (true) {
388     if (Error Err = update())
389       return Err;
390     std::this_thread::sleep_for(Interval);
391   }
392   llvm_unreachable("updateForever loop should never end");
393 }
394 
395 static bool hasELFMagic(StringRef FilePath) {
396   file_magic Type;
397   std::error_code EC = identify_magic(FilePath, Type);
398   if (EC)
399     return false;
400   switch (Type) {
401   case file_magic::elf:
402   case file_magic::elf_relocatable:
403   case file_magic::elf_executable:
404   case file_magic::elf_shared_object:
405   case file_magic::elf_core:
406     return true;
407   default:
408     return false;
409   }
410 }
411 
412 Error DebuginfodCollection::findBinaries(StringRef Path) {
413   std::error_code EC;
414   sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;
415   std::mutex IteratorMutex;
416   ThreadPoolTaskGroup IteratorGroup(Pool);
417   for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount();
418        WorkerIndex++) {
419     IteratorGroup.async([&, this]() -> void {
420       std::string FilePath;
421       while (true) {
422         {
423           // Check if iteration is over or there is an error during iteration
424           std::lock_guard<std::mutex> Guard(IteratorMutex);
425           if (I == E || EC)
426             return;
427           // Grab a file path from the directory iterator and advance the
428           // iterator.
429           FilePath = I->path();
430           I.increment(EC);
431         }
432 
433         // Inspect the file at this path to determine if it is debuginfo.
434         if (!hasELFMagic(FilePath))
435           continue;
436 
437         Expected<object::OwningBinary<object::Binary>> BinOrErr =
438             object::createBinary(FilePath);
439 
440         if (!BinOrErr) {
441           consumeError(BinOrErr.takeError());
442           continue;
443         }
444         object::Binary *Bin = std::move(BinOrErr.get().getBinary());
445         if (!Bin->isObject())
446           continue;
447 
448         // TODO: Support non-ELF binaries
449         object::ELFObjectFileBase *Object =
450             dyn_cast<object::ELFObjectFileBase>(Bin);
451         if (!Object)
452           continue;
453 
454         BuildIDRef ID = getBuildID(Object);
455         if (ID.empty())
456           continue;
457 
458         std::string IDString = buildIDToString(ID);
459         if (Object->hasDebugInfo()) {
460           std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
461           (void)DebugBinaries.try_emplace(IDString, std::move(FilePath));
462         } else {
463           std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);
464           (void)Binaries.try_emplace(IDString, std::move(FilePath));
465         }
466       }
467     });
468   }
469   IteratorGroup.wait();
470   std::unique_lock<std::mutex> Guard(IteratorMutex);
471   if (EC)
472     return errorCodeToError(EC);
473   return Error::success();
474 }
475 
476 Expected<std::optional<std::string>>
477 DebuginfodCollection::getBinaryPath(BuildIDRef ID) {
478   Log.push("getting binary path of ID " + buildIDToString(ID));
479   std::shared_lock<sys::RWMutex> Guard(BinariesMutex);
480   auto Loc = Binaries.find(buildIDToString(ID));
481   if (Loc != Binaries.end()) {
482     std::string Path = Loc->getValue();
483     return Path;
484   }
485   return std::nullopt;
486 }
487 
488 Expected<std::optional<std::string>>
489 DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {
490   Log.push("getting debug binary path of ID " + buildIDToString(ID));
491   std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);
492   auto Loc = DebugBinaries.find(buildIDToString(ID));
493   if (Loc != DebugBinaries.end()) {
494     std::string Path = Loc->getValue();
495     return Path;
496   }
497   return std::nullopt;
498 }
499 
500 Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {
501   {
502     // Check collection; perform on-demand update if stale.
503     Expected<std::optional<std::string>> PathOrErr = getBinaryPath(ID);
504     if (!PathOrErr)
505       return PathOrErr.takeError();
506     std::optional<std::string> Path = *PathOrErr;
507     if (!Path) {
508       Expected<bool> UpdatedOrErr = updateIfStale();
509       if (!UpdatedOrErr)
510         return UpdatedOrErr.takeError();
511       if (*UpdatedOrErr) {
512         // Try once more.
513         PathOrErr = getBinaryPath(ID);
514         if (!PathOrErr)
515           return PathOrErr.takeError();
516         Path = *PathOrErr;
517       }
518     }
519     if (Path)
520       return *Path;
521   }
522 
523   // Try federation.
524   Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);
525   if (!PathOrErr)
526     consumeError(PathOrErr.takeError());
527 
528   // Fall back to debug binary.
529   return findDebugBinaryPath(ID);
530 }
531 
532 Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {
533   // Check collection; perform on-demand update if stale.
534   Expected<std::optional<std::string>> PathOrErr = getDebugBinaryPath(ID);
535   if (!PathOrErr)
536     return PathOrErr.takeError();
537   std::optional<std::string> Path = *PathOrErr;
538   if (!Path) {
539     Expected<bool> UpdatedOrErr = updateIfStale();
540     if (!UpdatedOrErr)
541       return UpdatedOrErr.takeError();
542     if (*UpdatedOrErr) {
543       // Try once more.
544       PathOrErr = getBinaryPath(ID);
545       if (!PathOrErr)
546         return PathOrErr.takeError();
547       Path = *PathOrErr;
548     }
549   }
550   if (Path)
551     return *Path;
552 
553   // Try federation.
554   return getCachedOrDownloadDebuginfo(ID);
555 }
556 
557 DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,
558                                    DebuginfodCollection &Collection)
559     : Log(Log), Collection(Collection) {
560   cantFail(
561       Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) {
562         Log.push("GET " + Request.UrlPath);
563         std::string IDString;
564         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
565           Request.setResponse(
566               {404, "text/plain", "Build ID is not a hex string\n"});
567           return;
568         }
569         object::BuildID ID(IDString.begin(), IDString.end());
570         Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);
571         if (Error Err = PathOrErr.takeError()) {
572           consumeError(std::move(Err));
573           Request.setResponse({404, "text/plain", "Build ID not found\n"});
574           return;
575         }
576         streamFile(Request, *PathOrErr);
577       }));
578   cantFail(
579       Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) {
580         Log.push("GET " + Request.UrlPath);
581         std::string IDString;
582         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
583           Request.setResponse(
584               {404, "text/plain", "Build ID is not a hex string\n"});
585           return;
586         }
587         object::BuildID ID(IDString.begin(), IDString.end());
588         Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);
589         if (Error Err = PathOrErr.takeError()) {
590           consumeError(std::move(Err));
591           Request.setResponse({404, "text/plain", "Build ID not found\n"});
592           return;
593         }
594         streamFile(Request, *PathOrErr);
595       }));
596 }
597 
598 } // namespace llvm
599