xref: /freebsd/contrib/llvm-project/llvm/lib/Debuginfod/Debuginfod.cpp (revision 35c0a8c449fd2b7f75029ebed5e10852240f0865)
1 //===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 ///
11 /// This file contains several definitions for the debuginfod client and server.
12 /// For the client, this file defines the fetchInfo function. For the server,
13 /// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as
14 /// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo
15 /// function retrieves any of the three supported artifact types: (executable,
16 /// debuginfo, source file) associated with a build-id from debuginfod servers.
17 /// If a source file is to be fetched, its absolute path must be specified in
18 /// the Description argument to fetchInfo. The DebuginfodLogEntry,
19 /// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to
20 /// scan the local filesystem for binaries and serve the debuginfod protocol.
21 ///
22 //===----------------------------------------------------------------------===//
23 
24 #include "llvm/Debuginfod/Debuginfod.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/BinaryFormat/Magic.h"
28 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
29 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
30 #include "llvm/Debuginfod/HTTPClient.h"
31 #include "llvm/Object/BuildID.h"
32 #include "llvm/Object/ELFObjectFile.h"
33 #include "llvm/Support/CachePruning.h"
34 #include "llvm/Support/Caching.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/FileUtilities.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/Path.h"
40 #include "llvm/Support/ThreadPool.h"
41 #include "llvm/Support/xxhash.h"
42 
43 #include <atomic>
44 #include <optional>
45 #include <thread>
46 
47 namespace llvm {
48 
49 using llvm::object::BuildIDRef;
50 
51 namespace {
52 std::optional<SmallVector<StringRef>> DebuginfodUrls;
53 // Many Readers/Single Writer lock protecting the global debuginfod URL list.
54 llvm::sys::RWMutex UrlsMutex;
55 } // namespace
56 
57 std::string getDebuginfodCacheKey(llvm::StringRef S) {
58   return utostr(xxh3_64bits(S));
59 }
60 
61 // Returns a binary BuildID as a normalized hex string.
62 // Uses lowercase for compatibility with common debuginfod servers.
63 static std::string buildIDToString(BuildIDRef ID) {
64   return llvm::toHex(ID, /*LowerCase=*/true);
65 }
66 
67 bool canUseDebuginfod() {
68   return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty();
69 }
70 
71 SmallVector<StringRef> getDefaultDebuginfodUrls() {
72   std::shared_lock<llvm::sys::RWMutex> ReadGuard(UrlsMutex);
73   if (!DebuginfodUrls) {
74     // Only read from the environment variable if the user hasn't already
75     // set the value.
76     ReadGuard.unlock();
77     std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);
78     DebuginfodUrls = SmallVector<StringRef>();
79     if (const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS")) {
80       StringRef(DebuginfodUrlsEnv)
81           .split(DebuginfodUrls.value(), " ", -1, false);
82     }
83     WriteGuard.unlock();
84     ReadGuard.lock();
85   }
86   return DebuginfodUrls.value();
87 }
88 
89 // Set the default debuginfod URL list, override the environment variable.
90 void setDefaultDebuginfodUrls(const SmallVector<StringRef> &URLs) {
91   std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);
92   DebuginfodUrls = URLs;
93 }
94 
95 /// Finds a default local file caching directory for the debuginfod client,
96 /// first checking DEBUGINFOD_CACHE_PATH.
97 Expected<std::string> getDefaultDebuginfodCacheDirectory() {
98   if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))
99     return CacheDirectoryEnv;
100 
101   SmallString<64> CacheDirectory;
102   if (!sys::path::cache_directory(CacheDirectory))
103     return createStringError(
104         errc::io_error, "Unable to determine appropriate cache directory.");
105   sys::path::append(CacheDirectory, "llvm-debuginfod", "client");
106   return std::string(CacheDirectory);
107 }
108 
109 std::chrono::milliseconds getDefaultDebuginfodTimeout() {
110   long Timeout;
111   const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT");
112   if (DebuginfodTimeoutEnv &&
113       to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10))
114     return std::chrono::milliseconds(Timeout * 1000);
115 
116   return std::chrono::milliseconds(90 * 1000);
117 }
118 
119 /// The following functions fetch a debuginfod artifact to a file in a local
120 /// cache and return the cached file path. They first search the local cache,
121 /// followed by the debuginfod servers.
122 
123 std::string getDebuginfodSourceUrlPath(BuildIDRef ID,
124                                        StringRef SourceFilePath) {
125   SmallString<64> UrlPath;
126   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
127                     buildIDToString(ID), "source",
128                     sys::path::convert_to_slash(SourceFilePath));
129   return std::string(UrlPath);
130 }
131 
132 Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID,
133                                                 StringRef SourceFilePath) {
134   std::string UrlPath = getDebuginfodSourceUrlPath(ID, SourceFilePath);
135   return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath);
136 }
137 
138 std::string getDebuginfodExecutableUrlPath(BuildIDRef ID) {
139   SmallString<64> UrlPath;
140   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
141                     buildIDToString(ID), "executable");
142   return std::string(UrlPath);
143 }
144 
145 Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) {
146   std::string UrlPath = getDebuginfodExecutableUrlPath(ID);
147   return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath);
148 }
149 
150 std::string getDebuginfodDebuginfoUrlPath(BuildIDRef ID) {
151   SmallString<64> UrlPath;
152   sys::path::append(UrlPath, sys::path::Style::posix, "buildid",
153                     buildIDToString(ID), "debuginfo");
154   return std::string(UrlPath);
155 }
156 
157 Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) {
158   std::string UrlPath = getDebuginfodDebuginfoUrlPath(ID);
159   return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath);
160 }
161 
162 // General fetching function.
163 Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
164                                                   StringRef UrlPath) {
165   SmallString<10> CacheDir;
166 
167   Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory();
168   if (!CacheDirOrErr)
169     return CacheDirOrErr.takeError();
170   CacheDir = *CacheDirOrErr;
171 
172   return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir,
173                                      getDefaultDebuginfodUrls(),
174                                      getDefaultDebuginfodTimeout());
175 }
176 
177 namespace {
178 
179 /// A simple handler which streams the returned data to a cache file. The cache
180 /// file is only created if a 200 OK status is observed.
181 class StreamedHTTPResponseHandler : public HTTPResponseHandler {
182   using CreateStreamFn =
183       std::function<Expected<std::unique_ptr<CachedFileStream>>()>;
184   CreateStreamFn CreateStream;
185   HTTPClient &Client;
186   std::unique_ptr<CachedFileStream> FileStream;
187 
188 public:
189   StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client)
190       : CreateStream(CreateStream), Client(Client) {}
191   virtual ~StreamedHTTPResponseHandler() = default;
192 
193   Error handleBodyChunk(StringRef BodyChunk) override;
194 };
195 
196 } // namespace
197 
198 Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
199   if (!FileStream) {
200     unsigned Code = Client.responseCode();
201     if (Code && Code != 200)
202       return Error::success();
203     Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =
204         CreateStream();
205     if (!FileStreamOrError)
206       return FileStreamOrError.takeError();
207     FileStream = std::move(*FileStreamOrError);
208   }
209   *FileStream->OS << BodyChunk;
210   return Error::success();
211 }
212 
213 // An over-accepting simplification of the HTTP RFC 7230 spec.
214 static bool isHeader(StringRef S) {
215   StringRef Name;
216   StringRef Value;
217   std::tie(Name, Value) = S.split(':');
218   if (Name.empty() || Value.empty())
219     return false;
220   return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) &&
221          all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; });
222 }
223 
224 static SmallVector<std::string, 0> getHeaders() {
225   const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE");
226   if (!Filename)
227     return {};
228   ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile =
229       MemoryBuffer::getFile(Filename, /*IsText=*/true);
230   if (!HeadersFile)
231     return {};
232 
233   SmallVector<std::string, 0> Headers;
234   uint64_t LineNumber = 0;
235   for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) {
236     LineNumber++;
237     if (!Line.empty() && Line.back() == '\r')
238       Line = Line.drop_back();
239     if (!isHeader(Line)) {
240       if (!all_of(Line, llvm::isSpace))
241         WithColor::warning()
242             << "could not parse debuginfod header: " << Filename << ':'
243             << LineNumber << '\n';
244       continue;
245     }
246     Headers.emplace_back(Line);
247   }
248   return Headers;
249 }
250 
251 Expected<std::string> getCachedOrDownloadArtifact(
252     StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,
253     ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {
254   SmallString<64> AbsCachedArtifactPath;
255   sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath,
256                     "llvmcache-" + UniqueKey);
257 
258   Expected<FileCache> CacheOrErr =
259       localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath);
260   if (!CacheOrErr)
261     return CacheOrErr.takeError();
262 
263   FileCache Cache = *CacheOrErr;
264   // We choose an arbitrary Task parameter as we do not make use of it.
265   unsigned Task = 0;
266   Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, "");
267   if (!CacheAddStreamOrErr)
268     return CacheAddStreamOrErr.takeError();
269   AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
270   if (!CacheAddStream)
271     return std::string(AbsCachedArtifactPath);
272   // The artifact was not found in the local cache, query the debuginfod
273   // servers.
274   if (!HTTPClient::isAvailable())
275     return createStringError(errc::io_error,
276                              "No working HTTP client is available.");
277 
278   if (!HTTPClient::IsInitialized)
279     return createStringError(
280         errc::io_error,
281         "A working HTTP client is available, but it is not initialized. To "
282         "allow Debuginfod to make HTTP requests, call HTTPClient::initialize() "
283         "at the beginning of main.");
284 
285   HTTPClient Client;
286   Client.setTimeout(Timeout);
287   for (StringRef ServerUrl : DebuginfodUrls) {
288     SmallString<64> ArtifactUrl;
289     sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);
290 
291     // Perform the HTTP request and if successful, write the response body to
292     // the cache.
293     {
294       StreamedHTTPResponseHandler Handler(
295           [&]() { return CacheAddStream(Task, ""); }, Client);
296       HTTPRequest Request(ArtifactUrl);
297       Request.Headers = getHeaders();
298       Error Err = Client.perform(Request, Handler);
299       if (Err)
300         return std::move(Err);
301 
302       unsigned Code = Client.responseCode();
303       if (Code && Code != 200)
304         continue;
305     }
306 
307     Expected<CachePruningPolicy> PruningPolicyOrErr =
308         parseCachePruningPolicy(std::getenv("DEBUGINFOD_CACHE_POLICY"));
309     if (!PruningPolicyOrErr)
310       return PruningPolicyOrErr.takeError();
311     pruneCache(CacheDirectoryPath, *PruningPolicyOrErr);
312 
313     // Return the path to the artifact on disk.
314     return std::string(AbsCachedArtifactPath);
315   }
316 
317   return createStringError(errc::argument_out_of_domain, "build id not found");
318 }
319 
320 DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)
321     : Message(Message.str()) {}
322 
323 void DebuginfodLog::push(const Twine &Message) {
324   push(DebuginfodLogEntry(Message));
325 }
326 
327 void DebuginfodLog::push(DebuginfodLogEntry Entry) {
328   {
329     std::lock_guard<std::mutex> Guard(QueueMutex);
330     LogEntryQueue.push(Entry);
331   }
332   QueueCondition.notify_one();
333 }
334 
335 DebuginfodLogEntry DebuginfodLog::pop() {
336   {
337     std::unique_lock<std::mutex> Guard(QueueMutex);
338     // Wait for messages to be pushed into the queue.
339     QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); });
340   }
341   std::lock_guard<std::mutex> Guard(QueueMutex);
342   if (!LogEntryQueue.size())
343     llvm_unreachable("Expected message in the queue.");
344 
345   DebuginfodLogEntry Entry = LogEntryQueue.front();
346   LogEntryQueue.pop();
347   return Entry;
348 }
349 
350 DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,
351                                            DebuginfodLog &Log,
352                                            ThreadPoolInterface &Pool,
353                                            double MinInterval)
354     : Log(Log), Pool(Pool), MinInterval(MinInterval) {
355   for (StringRef Path : PathsRef)
356     Paths.push_back(Path.str());
357 }
358 
359 Error DebuginfodCollection::update() {
360   std::lock_guard<sys::Mutex> Guard(UpdateMutex);
361   if (UpdateTimer.isRunning())
362     UpdateTimer.stopTimer();
363   UpdateTimer.clear();
364   for (const std::string &Path : Paths) {
365     Log.push("Updating binaries at path " + Path);
366     if (Error Err = findBinaries(Path))
367       return Err;
368   }
369   Log.push("Updated collection");
370   UpdateTimer.startTimer();
371   return Error::success();
372 }
373 
374 Expected<bool> DebuginfodCollection::updateIfStale() {
375   if (!UpdateTimer.isRunning())
376     return false;
377   UpdateTimer.stopTimer();
378   double Time = UpdateTimer.getTotalTime().getWallTime();
379   UpdateTimer.startTimer();
380   if (Time < MinInterval)
381     return false;
382   if (Error Err = update())
383     return std::move(Err);
384   return true;
385 }
386 
387 Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {
388   while (true) {
389     if (Error Err = update())
390       return Err;
391     std::this_thread::sleep_for(Interval);
392   }
393   llvm_unreachable("updateForever loop should never end");
394 }
395 
396 static bool hasELFMagic(StringRef FilePath) {
397   file_magic Type;
398   std::error_code EC = identify_magic(FilePath, Type);
399   if (EC)
400     return false;
401   switch (Type) {
402   case file_magic::elf:
403   case file_magic::elf_relocatable:
404   case file_magic::elf_executable:
405   case file_magic::elf_shared_object:
406   case file_magic::elf_core:
407     return true;
408   default:
409     return false;
410   }
411 }
412 
413 Error DebuginfodCollection::findBinaries(StringRef Path) {
414   std::error_code EC;
415   sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;
416   std::mutex IteratorMutex;
417   ThreadPoolTaskGroup IteratorGroup(Pool);
418   for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getMaxConcurrency();
419        WorkerIndex++) {
420     IteratorGroup.async([&, this]() -> void {
421       std::string FilePath;
422       while (true) {
423         {
424           // Check if iteration is over or there is an error during iteration
425           std::lock_guard<std::mutex> Guard(IteratorMutex);
426           if (I == E || EC)
427             return;
428           // Grab a file path from the directory iterator and advance the
429           // iterator.
430           FilePath = I->path();
431           I.increment(EC);
432         }
433 
434         // Inspect the file at this path to determine if it is debuginfo.
435         if (!hasELFMagic(FilePath))
436           continue;
437 
438         Expected<object::OwningBinary<object::Binary>> BinOrErr =
439             object::createBinary(FilePath);
440 
441         if (!BinOrErr) {
442           consumeError(BinOrErr.takeError());
443           continue;
444         }
445         object::Binary *Bin = std::move(BinOrErr.get().getBinary());
446         if (!Bin->isObject())
447           continue;
448 
449         // TODO: Support non-ELF binaries
450         object::ELFObjectFileBase *Object =
451             dyn_cast<object::ELFObjectFileBase>(Bin);
452         if (!Object)
453           continue;
454 
455         BuildIDRef ID = getBuildID(Object);
456         if (ID.empty())
457           continue;
458 
459         std::string IDString = buildIDToString(ID);
460         if (Object->hasDebugInfo()) {
461           std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
462           (void)DebugBinaries.try_emplace(IDString, std::move(FilePath));
463         } else {
464           std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);
465           (void)Binaries.try_emplace(IDString, std::move(FilePath));
466         }
467       }
468     });
469   }
470   IteratorGroup.wait();
471   std::unique_lock<std::mutex> Guard(IteratorMutex);
472   if (EC)
473     return errorCodeToError(EC);
474   return Error::success();
475 }
476 
477 Expected<std::optional<std::string>>
478 DebuginfodCollection::getBinaryPath(BuildIDRef ID) {
479   Log.push("getting binary path of ID " + buildIDToString(ID));
480   std::shared_lock<sys::RWMutex> Guard(BinariesMutex);
481   auto Loc = Binaries.find(buildIDToString(ID));
482   if (Loc != Binaries.end()) {
483     std::string Path = Loc->getValue();
484     return Path;
485   }
486   return std::nullopt;
487 }
488 
489 Expected<std::optional<std::string>>
490 DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {
491   Log.push("getting debug binary path of ID " + buildIDToString(ID));
492   std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);
493   auto Loc = DebugBinaries.find(buildIDToString(ID));
494   if (Loc != DebugBinaries.end()) {
495     std::string Path = Loc->getValue();
496     return Path;
497   }
498   return std::nullopt;
499 }
500 
501 Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {
502   {
503     // Check collection; perform on-demand update if stale.
504     Expected<std::optional<std::string>> PathOrErr = getBinaryPath(ID);
505     if (!PathOrErr)
506       return PathOrErr.takeError();
507     std::optional<std::string> Path = *PathOrErr;
508     if (!Path) {
509       Expected<bool> UpdatedOrErr = updateIfStale();
510       if (!UpdatedOrErr)
511         return UpdatedOrErr.takeError();
512       if (*UpdatedOrErr) {
513         // Try once more.
514         PathOrErr = getBinaryPath(ID);
515         if (!PathOrErr)
516           return PathOrErr.takeError();
517         Path = *PathOrErr;
518       }
519     }
520     if (Path)
521       return *Path;
522   }
523 
524   // Try federation.
525   Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);
526   if (!PathOrErr)
527     consumeError(PathOrErr.takeError());
528 
529   // Fall back to debug binary.
530   return findDebugBinaryPath(ID);
531 }
532 
533 Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {
534   // Check collection; perform on-demand update if stale.
535   Expected<std::optional<std::string>> PathOrErr = getDebugBinaryPath(ID);
536   if (!PathOrErr)
537     return PathOrErr.takeError();
538   std::optional<std::string> Path = *PathOrErr;
539   if (!Path) {
540     Expected<bool> UpdatedOrErr = updateIfStale();
541     if (!UpdatedOrErr)
542       return UpdatedOrErr.takeError();
543     if (*UpdatedOrErr) {
544       // Try once more.
545       PathOrErr = getBinaryPath(ID);
546       if (!PathOrErr)
547         return PathOrErr.takeError();
548       Path = *PathOrErr;
549     }
550   }
551   if (Path)
552     return *Path;
553 
554   // Try federation.
555   return getCachedOrDownloadDebuginfo(ID);
556 }
557 
558 DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,
559                                    DebuginfodCollection &Collection)
560     : Log(Log), Collection(Collection) {
561   cantFail(
562       Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) {
563         Log.push("GET " + Request.UrlPath);
564         std::string IDString;
565         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
566           Request.setResponse(
567               {404, "text/plain", "Build ID is not a hex string\n"});
568           return;
569         }
570         object::BuildID ID(IDString.begin(), IDString.end());
571         Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);
572         if (Error Err = PathOrErr.takeError()) {
573           consumeError(std::move(Err));
574           Request.setResponse({404, "text/plain", "Build ID not found\n"});
575           return;
576         }
577         streamFile(Request, *PathOrErr);
578       }));
579   cantFail(
580       Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) {
581         Log.push("GET " + Request.UrlPath);
582         std::string IDString;
583         if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {
584           Request.setResponse(
585               {404, "text/plain", "Build ID is not a hex string\n"});
586           return;
587         }
588         object::BuildID ID(IDString.begin(), IDString.end());
589         Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);
590         if (Error Err = PathOrErr.takeError()) {
591           consumeError(std::move(Err));
592           Request.setResponse({404, "text/plain", "Build ID not found\n"});
593           return;
594         }
595         streamFile(Request, *PathOrErr);
596       }));
597 }
598 
599 } // namespace llvm
600