xref: /freebsd/contrib/llvm-project/llvm/lib/Debuginfod/Debuginfod.cpp (revision 51015e6d0f570239b0c2088dc6cf2b018928375d)
1 //===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 ///
11 /// This file contains several definitions for the debuginfod client and server.
12 /// For the client, this file defines the fetchInfo function. For the server,
13 /// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as
14 /// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo
15 /// function retrieves any of the three supported artifact types: (executable,
16 /// debuginfo, source file) associated with a build-id from debuginfod servers.
17 /// If a source file is to be fetched, its absolute path must be specified in
18 /// the Description argument to fetchInfo. The DebuginfodLogEntry,
19 /// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to
20 /// scan the local filesystem for binaries and serve the debuginfod protocol.
21 ///
22 //===----------------------------------------------------------------------===//
23 
24 #include "llvm/Debuginfod/Debuginfod.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/BinaryFormat/Magic.h"
27 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
28 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
29 #include "llvm/Debuginfod/HTTPClient.h"
30 #include "llvm/Object/Binary.h"
31 #include "llvm/Object/ELFObjectFile.h"
32 #include "llvm/Object/ObjectFile.h"
33 #include "llvm/Support/CachePruning.h"
34 #include "llvm/Support/Caching.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/FileUtilities.h"
38 #include "llvm/Support/Path.h"
39 #include "llvm/Support/ThreadPool.h"
40 #include "llvm/Support/xxhash.h"
41 
42 #include <atomic>
43 
44 namespace llvm {
45 static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); }
46 
47 // Returns a binary BuildID as a normalized hex string.
48 // Uses lowercase for compatibility with common debuginfod servers.
49 static std::string buildIDToString(BuildIDRef ID) {
50   return llvm::toHex(ID, /*LowerCase=*/true);
51 }
52 
53 Expected<SmallVector<StringRef>> getDefaultDebuginfodUrls() {
54   const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS");
55   if (DebuginfodUrlsEnv == nullptr)
56     return SmallVector<StringRef>();
57 
58   SmallVector<StringRef> DebuginfodUrls;
59   StringRef(DebuginfodUrlsEnv).split(DebuginfodUrls, " ");
60   return DebuginfodUrls;
61 }
62 
63 /// Finds a default local file caching directory for the debuginfod client,
64 /// first checking DEBUGINFOD_CACHE_PATH.
65 Expected<std::string> getDefaultDebuginfodCacheDirectory() {
66   if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))
67     return CacheDirectoryEnv;
68 
69   SmallString<64> CacheDirectory;
70   if (!sys::path::cache_directory(CacheDirectory))
71     return createStringError(
72         errc::io_error, "Unable to determine appropriate cache directory.");
73   sys::path::append(CacheDirectory, "llvm-debuginfod", "client");
74   return std::string(CacheDirectory);
75 }
76 
77 std::chrono::milliseconds getDefaultDebuginfodTimeout() {
78   long Timeout;
79   const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT");
80   if (DebuginfodTimeoutEnv &&
81       to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10))
82     return std::chrono::milliseconds(Timeout * 1000);
83 
84   return std::chrono::milliseconds(90 * 1000);
85 }
86 
87 /// The following functions fetch a debuginfod artifact to a file in a local
88 /// cache and return the cached file path. They first search the local cache,
89 /// followed by the debuginfod servers.
90 
91 Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID,
92                                                 StringRef SourceFilePath) {
93   SmallString<64> UrlPath;
94   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
95                     buildIDToString(ID), "source",
96                     sys::path::convert_to_slash(SourceFilePath));
97   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
98 }
99 
100 Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) {
101   SmallString<64> UrlPath;
102   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
103                     buildIDToString(ID), "executable");
104   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
105 }
106 
107 Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) {
108   SmallString<64> UrlPath;
109   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
110                     buildIDToString(ID), "debuginfo");
111   return getCachedOrDownloadArtifact(uniqueKey(UrlPath), UrlPath);
112 }
113 
114 // General fetching function.
115 Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
116                                                   StringRef UrlPath) {
117   SmallString<10> CacheDir;
118 
119   Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory();
120   if (!CacheDirOrErr)
121     return CacheDirOrErr.takeError();
122   CacheDir = *CacheDirOrErr;
123 
124   Expected<SmallVector<StringRef>> DebuginfodUrlsOrErr =
125       getDefaultDebuginfodUrls();
126   if (!DebuginfodUrlsOrErr)
127     return DebuginfodUrlsOrErr.takeError();
128   SmallVector<StringRef> &DebuginfodUrls = *DebuginfodUrlsOrErr;
129   return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir,
130                                      DebuginfodUrls,
131                                      getDefaultDebuginfodTimeout());
132 }
133 
134 namespace {
135 
136 /// A simple handler which streams the returned data to a cache file. The cache
137 /// file is only created if a 200 OK status is observed.
138 class StreamedHTTPResponseHandler : public HTTPResponseHandler {
139   using CreateStreamFn =
140       std::function<Expected<std::unique_ptr<CachedFileStream>>()>;
141   CreateStreamFn CreateStream;
142   HTTPClient &Client;
143   std::unique_ptr<CachedFileStream> FileStream;
144 
145 public:
146   StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client)
147       : CreateStream(CreateStream), Client(Client) {}
148   virtual ~StreamedHTTPResponseHandler() = default;
149 
150   Error handleBodyChunk(StringRef BodyChunk) override;
151 };
152 
153 } // namespace
154 
155 Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
156   if (!FileStream) {
157     if (Client.responseCode() != 200)
158       return Error::success();
159     Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =
160         CreateStream();
161     if (!FileStreamOrError)
162       return FileStreamOrError.takeError();
163     FileStream = std::move(*FileStreamOrError);
164   }
165   *FileStream->OS << BodyChunk;
166   return Error::success();
167 }
168 
169 Expected<std::string> getCachedOrDownloadArtifact(
170     StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
171     ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
172   SmallString<64> AbsCachedArtifactPath;
173   sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath,
174                     "llvmcache-" + UniqueKey);
175 
176   Expected<FileCache> CacheOrErr =
177       localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath);
178   if (!CacheOrErr)
179     return CacheOrErr.takeError();
180 
181   FileCache Cache = *CacheOrErr;
182   // We choose an arbitrary Task parameter as we do not make use of it.
183   unsigned Task = 0;
184   Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey);
185   if (!CacheAddStreamOrErr)
186     return CacheAddStreamOrErr.takeError();
187   AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
188   if (!CacheAddStream)
189     return std::string(AbsCachedArtifactPath);
190   // The artifact was not found in the local cache, query the debuginfod
191   // servers.
192   if (!HTTPClient::isAvailable())
193     return createStringError(errc::io_error,
194                              "No working HTTP client is available.");
195 
196   if (!HTTPClient::IsInitialized)
197     return createStringError(
198         errc::io_error,
199         "A working HTTP client is available, but it is not initialized. To "
200         "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() "
201         "at the beginning of main.");
202 
203   HTTPClient Client;
204   Client.setTimeout(Timeout);
205   for (StringRef ServerUrl : DebuginfodUrls) {
206     SmallString<64> ArtifactUrl;
207     sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);
208 
209     // Perform the HTTP request and if successful, write the response body to
210     // the cache.
211     StreamedHTTPResponseHandler Handler([&]() { return CacheAddStream(Task); },
212                                         Client);
213     HTTPRequest Request(ArtifactUrl);
214     Error Err = Client.perform(Request, Handler);
215     if (Err)
216       return std::move(Err);
217 
218     if (Client.responseCode() != 200)
219       continue;
220 
221     // Return the path to the artifact on disk.
222     return std::string(AbsCachedArtifactPath);
223   }
224 
225   return createStringError(errc::argument_out_of_domain, "build id not found");
226 }
227 
228 DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)
229     : Message(Message.str()) {}
230 
231 void DebuginfodLog::push(const Twine &Message) {
232   push(DebuginfodLogEntry(Message));
233 }
234 
235 void DebuginfodLog::push(DebuginfodLogEntry Entry) {
236   {
237     std::lock_guard<std::mutex> Guard(QueueMutex);
238     LogEntryQueue.push(Entry);
239   }
240   QueueCondition.notify_one();
241 }
242 
243 DebuginfodLogEntry DebuginfodLog::pop() {
244   {
245     std::unique_lock<std::mutex> Guard(QueueMutex);
246     // Wait for messages to be pushed into the queue.
247     QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); });
248   }
249   std::lock_guard<std::mutex> Guard(QueueMutex);
250   if (!LogEntryQueue.size())
251     llvm_unreachable("Expected message in the queue.");
252 
253   DebuginfodLogEntry Entry = LogEntryQueue.front();
254   LogEntryQueue.pop();
255   return Entry;
256 }
257 
258 DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
259                                            DebuginfodLog &Log, ThreadPool &Pool,
260                                            double MinInterval)
261     : Log(Log), Pool(Pool), MinInterval(MinInterval) {
262   for (StringRef Path : PathsRef)
263     Paths.push_back(Path.str());
264 }
265 
266 Error DebuginfodCollection::update() {
267   std::lock_guard<sys::Mutex> Guard(UpdateMutex);
268   if (UpdateTimer.isRunning())
269     UpdateTimer.stopTimer();
270   UpdateTimer.clear();
271   for (const std::string &Path : Paths) {
272     Log.push("Updating binaries at path " + Path);
273     if (Error Err = findBinaries(Path))
274       return Err;
275   }
276   Log.push("Updated collection");
277   UpdateTimer.startTimer();
278   return Error::success();
279 }
280 
281 Expected<bool> DebuginfodCollection::updateIfStale() {
282   if (!UpdateTimer.isRunning())
283     return false;
284   UpdateTimer.stopTimer();
285   double Time = UpdateTimer.getTotalTime().getWallTime();
286   UpdateTimer.startTimer();
287   if (Time < MinInterval)
288     return false;
289   if (Error Err = update())
290     return std::move(Err);
291   return true;
292 }
293 
294 Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {
295   while (true) {
296     if (Error Err = update())
297       return Err;
298     std::this_thread::sleep_for(Interval);
299   }
300   llvm_unreachable("updateForever loop should never end");
301 }
302 
303 static bool isDebugBinary(object::ObjectFile *Object) {
304   // TODO: handle PDB debuginfo
305   std::unique_ptr<DWARFContext> Context = DWARFContext::create(
306       *Object, DWARFContext::ProcessDebugRelocations::Process);
307   const DWARFObject &DObj = Context->getDWARFObj();
308   unsigned NumSections = 0;
309   DObj.forEachInfoSections([&](const DWARFSection &S) { NumSections++; });
310   return NumSections;
311 }
312 
313 static bool hasELFMagic(StringRef FilePath) {
314   file_magic Type;
315   std::error_code EC = identify_magic(FilePath, Type);
316   if (EC)
317     return false;
318   switch (Type) {
319   case file_magic::elf:
320   case file_magic::elf_relocatable:
321   case file_magic::elf_executable:
322   case file_magic::elf_shared_object:
323   case file_magic::elf_core:
324     return true;
325   default:
326     return false;
327   }
328 }
329 
330 Error DebuginfodCollection::findBinaries(StringRef Path) {
331   std::error_code EC;
332   sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;
333   std::mutex IteratorMutex;
334   ThreadPoolTaskGroup IteratorGroup(Pool);
335   for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getThreadCount();
336        WorkerIndex++) {
337     IteratorGroup.async([&, this]() -> void {
338       std::string FilePath;
339       while (true) {
340         {
341           // Check if iteration is over or there is an error during iteration
342           std::lock_guard<std::mutex> Guard(IteratorMutex);
343           if (I == E || EC)
344             return;
345           // Grab a file path from the directory iterator and advance the
346           // iterator.
347           FilePath = I->path();
348           I.increment(EC);
349         }
350 
351         // Inspect the file at this path to determine if it is debuginfo.
352         if (!hasELFMagic(FilePath))
353           continue;
354 
355         Expected<object::OwningBinary<object::Binary>> BinOrErr =
356             object::createBinary(FilePath);
357 
358         if (!BinOrErr) {
359           consumeError(BinOrErr.takeError());
360           continue;
361         }
362         object::Binary *Bin = std::move(BinOrErr.get().getBinary());
363         if (!Bin->isObject())
364           continue;
365 
366         // TODO: Support non-ELF binaries
367         object::ELFObjectFileBase *Object =
368             dyn_cast<object::ELFObjectFileBase>(Bin);
369         if (!Object)
370           continue;
371 
372         Optional<BuildIDRef> ID = symbolize::getBuildID(Object);
373         if (!ID)
374           continue;
375 
376         std::string IDString = buildIDToString(ID.value());
377         if (isDebugBinary(Object)) {
378           std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
379           DebugBinaries[IDString] = FilePath;
380         } else {
381           std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);
382           Binaries[IDString] = FilePath;
383         }
384       }
385     });
386   }
387   IteratorGroup.wait();
388   std::unique_lock<std::mutex> Guard(IteratorMutex);
389   if (EC)
390     return errorCodeToError(EC);
391   return Error::success();
392 }
393 
394 Expected<Optional<std::string>>
395 DebuginfodCollection::getBinaryPath(BuildIDRef ID) {
396   Log.push("getting binary path of ID " + buildIDToString(ID));
397   std::shared_lock<sys::RWMutex> Guard(BinariesMutex);
398   auto Loc = Binaries.find(buildIDToString(ID));
399   if (Loc != Binaries.end()) {
400     std::string Path = Loc->getValue();
401     return Path;
402   }
403   return None;
404 }
405 
406 Expected<Optional<std::string>>
407 DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {
408   Log.push("getting debug binary path of ID " + buildIDToString(ID));
409   std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);
410   auto Loc = DebugBinaries.find(buildIDToString(ID));
411   if (Loc != DebugBinaries.end()) {
412     std::string Path = Loc->getValue();
413     return Path;
414   }
415   return None;
416 }
417 
418 Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {
419   {
420     // Check collection; perform on-demand update if stale.
421     Expected<Optional<std::string>> PathOrErr = getBinaryPath(ID);
422     if (!PathOrErr)
423       return PathOrErr.takeError();
424     Optional<std::string> Path = *PathOrErr;
425     if (!Path) {
426       Expected<bool> UpdatedOrErr = updateIfStale();
427       if (!UpdatedOrErr)
428         return UpdatedOrErr.takeError();
429       if (*UpdatedOrErr) {
430         // Try once more.
431         PathOrErr = getBinaryPath(ID);
432         if (!PathOrErr)
433           return PathOrErr.takeError();
434         Path = *PathOrErr;
435       }
436     }
437     if (Path)
438       return Path.value();
439   }
440 
441   // Try federation.
442   Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);
443   if (!PathOrErr)
444     consumeError(PathOrErr.takeError());
445 
446   // Fall back to debug binary.
447   return findDebugBinaryPath(ID);
448 }
449 
450 Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {
451   // Check collection; perform on-demand update if stale.
452   Expected<Optional<std::string>> PathOrErr = getDebugBinaryPath(ID);
453   if (!PathOrErr)
454     return PathOrErr.takeError();
455   Optional<std::string> Path = *PathOrErr;
456   if (!Path) {
457     Expected<bool> UpdatedOrErr = updateIfStale();
458     if (!UpdatedOrErr)
459       return UpdatedOrErr.takeError();
460     if (*UpdatedOrErr) {
461       // Try once more.
462       PathOrErr = getBinaryPath(ID);
463       if (!PathOrErr)
464         return PathOrErr.takeError();
465       Path = *PathOrErr;
466     }
467   }
468   if (Path)
469     return Path.value();
470 
471   // Try federation.
472   return getCachedOrDownloadDebuginfo(ID);
473 }
474 
475 DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,
476                                    DebuginfodCollection &Collection)
477     : Log(Log), Collection(Collection) {
478   cantFail(
479       Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) {
480         Log.push("GET " + Request.UrlPath);
481         std::string IDString;
482         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
483           Request.setResponse(
484               {404, "text/plain", "Build ID is not a hex string\n"});
485           return;
486         }
487         BuildID ID(IDString.begin(), IDString.end());
488         Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);
489         if (Error Err = PathOrErr.takeError()) {
490           consumeError(std::move(Err));
491           Request.setResponse({404, "text/plain", "Build ID not found\n"});
492           return;
493         }
494         streamFile(Request, *PathOrErr);
495       }));
496   cantFail(
497       Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) {
498         Log.push("GET " + Request.UrlPath);
499         std::string IDString;
500         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
501           Request.setResponse(
502               {404, "text/plain", "Build ID is not a hex string\n"});
503           return;
504         }
505         BuildID ID(IDString.begin(), IDString.end());
506         Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);
507         if (Error Err = PathOrErr.takeError()) {
508           consumeError(std::move(Err));
509           Request.setResponse({404, "text/plain", "Build ID not found\n"});
510           return;
511         }
512         streamFile(Request, *PathOrErr);
513       }));
514 }
515 
516 } // namespace llvm
517