xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of SanitizerBinaryMetadata.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/Analysis/CaptureTracking.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/IR/Constant.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include "llvm/IR/Metadata.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/ProfileData/InstrProf.h"
37 #include "llvm/Support/Allocator.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/SpecialCaseList.h"
41 #include "llvm/Support/StringSaver.h"
42 #include "llvm/Support/VirtualFileSystem.h"
43 #include "llvm/TargetParser/Triple.h"
44 #include "llvm/Transforms/Utils/ModuleUtils.h"
45 
46 #include <array>
47 #include <cstdint>
48 #include <memory>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "sanmd"
53 
54 namespace {
55 
56 //===--- Constants --------------------------------------------------------===//
57 
58 constexpr uint32_t kVersionBase = 2;                // occupies lower 16 bits
59 constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
60 constexpr int kCtorDtorPriority = 2;
61 
62 // Pairs of names of initialization callback functions and which section
63 // contains the relevant metadata.
64 class MetadataInfo {
65 public:
66   const StringRef FunctionPrefix;
67   const StringRef SectionSuffix;
68 
69   static const MetadataInfo Covered;
70   static const MetadataInfo Atomics;
71 
72 private:
73   // Forbid construction elsewhere.
MetadataInfo(StringRef FunctionPrefix,StringRef SectionSuffix)74   explicit constexpr MetadataInfo(StringRef FunctionPrefix,
75                                   StringRef SectionSuffix)
76       : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {}
77 };
78 const MetadataInfo MetadataInfo::Covered{
79     "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection};
80 const MetadataInfo MetadataInfo::Atomics{
81     "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection};
82 
83 // The only instances of MetadataInfo are the constants above, so a set of
84 // them may simply store pointers to them. To deterministically generate code,
85 // we need to use a set with stable iteration order, such as SetVector.
86 using MetadataInfoSet = SetVector<const MetadataInfo *>;
87 
88 //===--- Command-line options ---------------------------------------------===//
89 
90 cl::opt<bool> ClWeakCallbacks(
91     "sanitizer-metadata-weak-callbacks",
92     cl::desc("Declare callbacks extern weak, and only call if non-null."),
93     cl::Hidden, cl::init(true));
94 cl::opt<bool>
95     ClNoSanitize("sanitizer-metadata-nosanitize-attr",
96                  cl::desc("Mark some metadata features uncovered in functions "
97                           "with associated no_sanitize attributes."),
98                  cl::Hidden, cl::init(true));
99 
100 cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
101                             cl::desc("Emit PCs for covered functions."),
102                             cl::Hidden, cl::init(false));
103 cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",
104                             cl::desc("Emit PCs for atomic operations."),
105                             cl::Hidden, cl::init(false));
106 cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar",
107                         cl::desc("Emit PCs for start of functions that are "
108                                  "subject for use-after-return checking"),
109                         cl::Hidden, cl::init(false));
110 
111 //===--- Statistics -------------------------------------------------------===//
112 
113 STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");
114 STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");
115 STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions");
116 
117 //===----------------------------------------------------------------------===//
118 
119 // Apply opt overrides.
120 SanitizerBinaryMetadataOptions &&
transformOptionsFromCl(SanitizerBinaryMetadataOptions && Opts)121 transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
122   Opts.Covered |= ClEmitCovered;
123   Opts.Atomics |= ClEmitAtomics;
124   Opts.UAR |= ClEmitUAR;
125   return std::move(Opts);
126 }
127 
128 class SanitizerBinaryMetadata {
129 public:
SanitizerBinaryMetadata(Module & M,SanitizerBinaryMetadataOptions Opts,std::unique_ptr<SpecialCaseList> Ignorelist)130   SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts,
131                           std::unique_ptr<SpecialCaseList> Ignorelist)
132       : Mod(M), Options(transformOptionsFromCl(std::move(Opts))),
133         Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()),
134         VersionStr(utostr(getVersion())), IRB(M.getContext()) {
135     // FIXME: Make it work with other formats.
136     assert(TargetTriple.isOSBinFormatELF() && "ELF only");
137     assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) &&
138            "Device targets are not supported");
139   }
140 
141   bool run();
142 
143 private:
getVersion() const144   uint32_t getVersion() const {
145     uint32_t Version = kVersionBase;
146     const auto CM = Mod.getCodeModel();
147     if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))
148       Version |= kVersionPtrSizeRel;
149     return Version;
150   }
151 
152   void runOn(Function &F, MetadataInfoSet &MIS);
153 
154   // Determines which set of metadata to collect for this instruction.
155   //
156   // Returns true if covered metadata is required to unambiguously interpret
157   // other metadata. For example, if we are interested in atomics metadata, any
158   // function with memory operations (atomic or not) requires covered metadata
159   // to determine if a memory operation is atomic or not in modules compiled
160   // with SanitizerBinaryMetadata.
161   bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
162              uint64_t &FeatureMask);
163 
164   // Get start/end section marker pointer.
165   GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
166 
167   // Returns the target-dependent section name.
168   StringRef getSectionName(StringRef SectionSuffix);
169 
170   // Returns the section start marker name.
171   StringRef getSectionStart(StringRef SectionSuffix);
172 
173   // Returns the section end marker name.
174   StringRef getSectionEnd(StringRef SectionSuffix);
175 
176   // Returns true if the access to the address should be considered "atomic".
177   bool pretendAtomicAccess(const Value *Addr);
178 
179   Module &Mod;
180   const SanitizerBinaryMetadataOptions Options;
181   std::unique_ptr<SpecialCaseList> Ignorelist;
182   const Triple TargetTriple;
183   const std::string VersionStr;
184   IRBuilder<> IRB;
185   BumpPtrAllocator Alloc;
186   UniqueStringSaver StringPool{Alloc};
187 };
188 
run()189 bool SanitizerBinaryMetadata::run() {
190   MetadataInfoSet MIS;
191 
192   for (Function &F : Mod)
193     runOn(F, MIS);
194 
195   if (MIS.empty())
196     return false;
197 
198   //
199   // Setup constructors and call all initialization functions for requested
200   // metadata features.
201   //
202 
203   auto *PtrTy = IRB.getPtrTy();
204   auto *Int32Ty = IRB.getInt32Ty();
205   const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy};
206   auto *Version = ConstantInt::get(Int32Ty, getVersion());
207 
208   for (const MetadataInfo *MI : MIS) {
209     const std::array<Value *, InitTypes.size()> InitArgs = {
210         Version,
211         getSectionMarker(getSectionStart(MI->SectionSuffix), PtrTy),
212         getSectionMarker(getSectionEnd(MI->SectionSuffix), PtrTy),
213     };
214 
215     // Calls to the initialization functions with different versions cannot be
216     // merged. Give the structors unique names based on the version, which will
217     // also be used as the COMDAT key.
218     const std::string StructorPrefix = (MI->FunctionPrefix + VersionStr).str();
219 
220     // We declare the _add and _del functions as weak, and only call them if
221     // there is a valid symbol linked. This allows building binaries with
222     // semantic metadata, but without having callbacks. When a tool that wants
223     // the metadata is linked which provides the callbacks, they will be called.
224     Function *Ctor =
225         createSanitizerCtorAndInitFunctions(
226             Mod, StructorPrefix + ".module_ctor",
227             (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs,
228             /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
229             .first;
230     Function *Dtor =
231         createSanitizerCtorAndInitFunctions(
232             Mod, StructorPrefix + ".module_dtor",
233             (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs,
234             /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
235             .first;
236     Constant *CtorComdatKey = nullptr;
237     Constant *DtorComdatKey = nullptr;
238     if (TargetTriple.supportsCOMDAT()) {
239       // Use COMDAT to deduplicate constructor/destructor function. The COMDAT
240       // key needs to be a non-local linkage.
241       Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName()));
242       Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName()));
243       Ctor->setLinkage(GlobalValue::ExternalLinkage);
244       Dtor->setLinkage(GlobalValue::ExternalLinkage);
245       // DSOs should _not_ call another constructor/destructor!
246       Ctor->setVisibility(GlobalValue::HiddenVisibility);
247       Dtor->setVisibility(GlobalValue::HiddenVisibility);
248       CtorComdatKey = Ctor;
249       DtorComdatKey = Dtor;
250     }
251     appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey);
252     appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey);
253   }
254 
255   return true;
256 }
257 
runOn(Function & F,MetadataInfoSet & MIS)258 void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
259   if (F.empty())
260     return;
261   if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
262     return;
263   if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName()))
264     return;
265   // Don't touch available_externally functions, their actual body is elsewhere.
266   if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
267     return;
268 
269   MDBuilder MDB(F.getContext());
270 
271   // The metadata features enabled for this function, stored along covered
272   // metadata (if enabled).
273   uint64_t FeatureMask = 0;
274   // Don't emit unnecessary covered metadata for all functions to save space.
275   bool RequiresCovered = false;
276 
277   if (Options.Atomics || Options.UAR) {
278     for (BasicBlock &BB : F)
279       for (Instruction &I : BB)
280         RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
281   }
282 
283   if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread"))
284     FeatureMask &= ~kSanitizerBinaryMetadataAtomics;
285   if (F.isVarArg())
286     FeatureMask &= ~kSanitizerBinaryMetadataUAR;
287   if (FeatureMask & kSanitizerBinaryMetadataUAR) {
288     RequiresCovered = true;
289     NumMetadataUAR++;
290   }
291 
292   // Covered metadata is always emitted if explicitly requested, otherwise only
293   // if some other metadata requires it to unambiguously interpret it for
294   // modules compiled with SanitizerBinaryMetadata.
295   if (Options.Covered || (FeatureMask && RequiresCovered)) {
296     NumMetadataCovered++;
297     const auto *MI = &MetadataInfo::Covered;
298     MIS.insert(MI);
299     const StringRef Section = getSectionName(MI->SectionSuffix);
300     // The feature mask will be placed after the function size.
301     Constant *CFM = IRB.getInt64(FeatureMask);
302     F.setMetadata(LLVMContext::MD_pcsections,
303                   MDB.createPCSections({{Section, {CFM}}}));
304   }
305 }
306 
isUARSafeCall(CallInst * CI)307 bool isUARSafeCall(CallInst *CI) {
308   auto *F = CI->getCalledFunction();
309   // There are no intrinsic functions that leak arguments.
310   // If the called function does not return, the current function
311   // does not return as well, so no possibility of use-after-return.
312   // Sanitizer function also don't leak or don't return.
313   // It's safe to both pass pointers to local variables to them
314   // and to tail-call them.
315   return F && (F->isIntrinsic() || F->doesNotReturn() ||
316                F->getName().starts_with("__asan_") ||
317                F->getName().starts_with("__hwsan_") ||
318                F->getName().starts_with("__ubsan_") ||
319                F->getName().starts_with("__msan_") ||
320                F->getName().starts_with("__tsan_"));
321 }
322 
hasUseAfterReturnUnsafeUses(Value & V)323 bool hasUseAfterReturnUnsafeUses(Value &V) {
324   for (User *U : V.users()) {
325     if (auto *I = dyn_cast<Instruction>(U)) {
326       if (I->isLifetimeStartOrEnd() || I->isDroppable())
327         continue;
328       if (auto *CI = dyn_cast<CallInst>(U)) {
329         if (isUARSafeCall(CI))
330           continue;
331       }
332       if (isa<LoadInst>(U))
333         continue;
334       if (auto *SI = dyn_cast<StoreInst>(U)) {
335         // If storing TO the alloca, then the address isn't taken.
336         if (SI->getOperand(1) == &V)
337           continue;
338       }
339       if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
340         if (!hasUseAfterReturnUnsafeUses(*GEPI))
341           continue;
342       } else if (auto *BCI = dyn_cast<BitCastInst>(U)) {
343         if (!hasUseAfterReturnUnsafeUses(*BCI))
344           continue;
345       }
346     }
347     return true;
348   }
349   return false;
350 }
351 
useAfterReturnUnsafe(Instruction & I)352 bool useAfterReturnUnsafe(Instruction &I) {
353   if (isa<AllocaInst>(I))
354     return hasUseAfterReturnUnsafeUses(I);
355   // Tail-called functions are not necessary intercepted
356   // at runtime because there is no call instruction.
357   // So conservatively mark the caller as requiring checking.
358   else if (auto *CI = dyn_cast<CallInst>(&I))
359     return CI->isTailCall() && !isUARSafeCall(CI);
360   return false;
361 }
362 
pretendAtomicAccess(const Value * Addr)363 bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) {
364   if (!Addr)
365     return false;
366 
367   Addr = Addr->stripInBoundsOffsets();
368   auto *GV = dyn_cast<GlobalVariable>(Addr);
369   if (!GV)
370     return false;
371 
372   // Some compiler-generated accesses are known racy, to avoid false positives
373   // in data-race analysis pretend they're atomic.
374   if (GV->hasSection()) {
375     const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat();
376     const auto ProfSec =
377         getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false);
378     if (GV->getSection().ends_with(ProfSec))
379       return true;
380   }
381   if (GV->getName().starts_with("__llvm_gcov") ||
382       GV->getName().starts_with("__llvm_gcda"))
383     return true;
384 
385   return false;
386 }
387 
388 // Returns true if the memory at `Addr` may be shared with other threads.
maybeSharedMutable(const Value * Addr)389 bool maybeSharedMutable(const Value *Addr) {
390   // By default assume memory may be shared.
391   if (!Addr)
392     return true;
393 
394   if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&
395       !PointerMayBeCaptured(Addr, true, true))
396     return false; // Object is on stack but does not escape.
397 
398   Addr = Addr->stripInBoundsOffsets();
399   if (auto *GV = dyn_cast<GlobalVariable>(Addr)) {
400     if (GV->isConstant())
401       return false; // Shared, but not mutable.
402   }
403 
404   return true;
405 }
406 
runOn(Instruction & I,MetadataInfoSet & MIS,MDBuilder & MDB,uint64_t & FeatureMask)407 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
408                                     MDBuilder &MDB, uint64_t &FeatureMask) {
409   SmallVector<const MetadataInfo *, 1> InstMetadata;
410   bool RequiresCovered = false;
411 
412   // Only call if at least 1 type of metadata is requested.
413   assert(Options.UAR || Options.Atomics);
414 
415   if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
416     if (useAfterReturnUnsafe(I))
417       FeatureMask |= kSanitizerBinaryMetadataUAR;
418   }
419 
420   if (Options.Atomics) {
421     const Value *Addr = nullptr;
422     if (auto *SI = dyn_cast<StoreInst>(&I))
423       Addr = SI->getPointerOperand();
424     else if (auto *LI = dyn_cast<LoadInst>(&I))
425       Addr = LI->getPointerOperand();
426 
427     if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) {
428       auto SSID = getAtomicSyncScopeID(&I);
429       if ((SSID.has_value() && *SSID != SyncScope::SingleThread) ||
430           pretendAtomicAccess(Addr)) {
431         NumMetadataAtomics++;
432         InstMetadata.push_back(&MetadataInfo::Atomics);
433       }
434       FeatureMask |= kSanitizerBinaryMetadataAtomics;
435       RequiresCovered = true;
436     }
437   }
438 
439   // Attach MD_pcsections to instruction.
440   if (!InstMetadata.empty()) {
441     MIS.insert(InstMetadata.begin(), InstMetadata.end());
442     SmallVector<MDBuilder::PCSection, 1> Sections;
443     for (const auto &MI : InstMetadata)
444       Sections.push_back({getSectionName(MI->SectionSuffix), {}});
445     I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections));
446   }
447 
448   return RequiresCovered;
449 }
450 
451 GlobalVariable *
getSectionMarker(const Twine & MarkerName,Type * Ty)452 SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
453   // Use ExternalWeak so that if all sections are discarded due to section
454   // garbage collection, the linker will not report undefined symbol errors.
455   auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,
456                                     GlobalVariable::ExternalWeakLinkage,
457                                     /*Initializer=*/nullptr, MarkerName);
458   Marker->setVisibility(GlobalValue::HiddenVisibility);
459   return Marker;
460 }
461 
getSectionName(StringRef SectionSuffix)462 StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
463   // FIXME: Other TargetTriples.
464   // Request ULEB128 encoding for all integer constants.
465   return StringPool.save(SectionSuffix + VersionStr + "!C");
466 }
467 
getSectionStart(StringRef SectionSuffix)468 StringRef SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
469   // Twine only concatenates 2 strings; with >2 strings, concatenating them
470   // creates Twine temporaries, and returning the final Twine no longer works
471   // because we'd end up with a stack-use-after-return. So here we also use the
472   // StringPool to store the new string.
473   return StringPool.save("__start_" + SectionSuffix + VersionStr);
474 }
475 
getSectionEnd(StringRef SectionSuffix)476 StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
477   return StringPool.save("__stop_" + SectionSuffix + VersionStr);
478 }
479 
480 } // namespace
481 
SanitizerBinaryMetadataPass(SanitizerBinaryMetadataOptions Opts,ArrayRef<std::string> IgnorelistFiles)482 SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
483     SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles)
484     : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {}
485 
486 PreservedAnalyses
run(Module & M,AnalysisManager<Module> & AM)487 SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
488   std::unique_ptr<SpecialCaseList> Ignorelist;
489   if (!IgnorelistFiles.empty()) {
490     Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles,
491                                               *vfs::getRealFileSystem());
492     if (Ignorelist->inSection("metadata", "src", M.getSourceFileName()))
493       return PreservedAnalyses::all();
494   }
495 
496   SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist));
497   if (Pass.run())
498     return PreservedAnalyses::none();
499   return PreservedAnalyses::all();
500 }
501