1 //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of SanitizerBinaryMetadata. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" 14 #include "llvm/ADT/SetVector.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/Triple.h" 19 #include "llvm/ADT/Twine.h" 20 #include "llvm/IR/Constant.h" 21 #include "llvm/IR/DerivedTypes.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/IR/GlobalValue.h" 24 #include "llvm/IR/GlobalVariable.h" 25 #include "llvm/IR/IRBuilder.h" 26 #include "llvm/IR/Instruction.h" 27 #include "llvm/IR/Instructions.h" 28 #include "llvm/IR/LLVMContext.h" 29 #include "llvm/IR/MDBuilder.h" 30 #include "llvm/IR/Metadata.h" 31 #include "llvm/IR/Module.h" 32 #include "llvm/IR/Type.h" 33 #include "llvm/IR/Value.h" 34 #include "llvm/InitializePasses.h" 35 #include "llvm/Pass.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Transforms/Instrumentation.h" 39 #include "llvm/Transforms/Utils/ModuleUtils.h" 40 41 #include <array> 42 #include <cstdint> 43 44 using namespace llvm; 45 46 #define DEBUG_TYPE "sanmd" 47 48 namespace { 49 50 //===--- Constants --------------------------------------------------------===// 51 52 constexpr uint32_t kVersionBase = 1; // occupies lower 16 bits 53 constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized 54 constexpr int kCtorDtorPriority = 2; 55 56 // Pairs of names of initialization callback functions and which section 57 // contains the relevant metadata. 58 class MetadataInfo { 59 public: 60 const StringRef FunctionPrefix; 61 const StringRef SectionSuffix; 62 const uint32_t FeatureMask; 63 64 static const MetadataInfo Covered; 65 static const MetadataInfo Atomics; 66 67 private: 68 // Forbid construction elsewhere. 69 explicit constexpr MetadataInfo(StringRef FunctionPrefix, 70 StringRef SectionSuffix, uint32_t Feature) 71 : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix), 72 FeatureMask(Feature) {} 73 }; 74 const MetadataInfo MetadataInfo::Covered{"__sanitizer_metadata_covered", 75 kSanitizerBinaryMetadataCoveredSection, 76 kSanitizerBinaryMetadataNone}; 77 const MetadataInfo MetadataInfo::Atomics{"__sanitizer_metadata_atomics", 78 kSanitizerBinaryMetadataAtomicsSection, 79 kSanitizerBinaryMetadataAtomics}; 80 81 // The only instances of MetadataInfo are the constants above, so a set of 82 // them may simply store pointers to them. To deterministically generate code, 83 // we need to use a set with stable iteration order, such as SetVector. 84 using MetadataInfoSet = SetVector<const MetadataInfo *>; 85 86 //===--- Command-line options ---------------------------------------------===// 87 88 cl::opt<bool> ClWeakCallbacks( 89 "sanitizer-metadata-weak-callbacks", 90 cl::desc("Declare callbacks extern weak, and only call if non-null."), 91 cl::Hidden, cl::init(true)); 92 93 cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered", 94 cl::desc("Emit PCs for covered functions."), 95 cl::Hidden, cl::init(false)); 96 cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics", 97 cl::desc("Emit PCs for atomic operations."), 98 cl::Hidden, cl::init(false)); 99 cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar", 100 cl::desc("Emit PCs for start of functions that are " 101 "subject for use-after-return checking"), 102 cl::Hidden, cl::init(false)); 103 104 //===--- Statistics -------------------------------------------------------===// 105 106 STATISTIC(NumMetadataCovered, "Metadata attached to covered functions"); 107 STATISTIC(NumMetadataAtomics, "Metadata attached to atomics"); 108 STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions"); 109 110 //===----------------------------------------------------------------------===// 111 112 // Apply opt overrides. 113 SanitizerBinaryMetadataOptions && 114 transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) { 115 Opts.Covered |= ClEmitCovered; 116 Opts.Atomics |= ClEmitAtomics; 117 Opts.UAR |= ClEmitUAR; 118 return std::move(Opts); 119 } 120 121 class SanitizerBinaryMetadata { 122 public: 123 SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts) 124 : Mod(M), Options(transformOptionsFromCl(std::move(Opts))), 125 TargetTriple(M.getTargetTriple()), IRB(M.getContext()) { 126 // FIXME: Make it work with other formats. 127 assert(TargetTriple.isOSBinFormatELF() && "ELF only"); 128 } 129 130 bool run(); 131 132 private: 133 // Return enabled feature mask of per-instruction metadata. 134 uint32_t getEnabledPerInstructionFeature() const { 135 uint32_t FeatureMask = 0; 136 if (Options.Atomics) 137 FeatureMask |= MetadataInfo::Atomics.FeatureMask; 138 return FeatureMask; 139 } 140 141 uint32_t getVersion() const { 142 uint32_t Version = kVersionBase; 143 const auto CM = Mod.getCodeModel(); 144 if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large)) 145 Version |= kVersionPtrSizeRel; 146 return Version; 147 } 148 149 void runOn(Function &F, MetadataInfoSet &MIS); 150 151 // Determines which set of metadata to collect for this instruction. 152 // 153 // Returns true if covered metadata is required to unambiguously interpret 154 // other metadata. For example, if we are interested in atomics metadata, any 155 // function with memory operations (atomic or not) requires covered metadata 156 // to determine if a memory operation is atomic or not in modules compiled 157 // with SanitizerBinaryMetadata. 158 bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB, 159 uint32_t &FeatureMask); 160 161 // Get start/end section marker pointer. 162 GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty); 163 164 // Returns the target-dependent section name. 165 StringRef getSectionName(StringRef SectionSuffix); 166 167 // Returns the section start marker name. 168 Twine getSectionStart(StringRef SectionSuffix); 169 170 // Returns the section end marker name. 171 Twine getSectionEnd(StringRef SectionSuffix); 172 173 Module &Mod; 174 const SanitizerBinaryMetadataOptions Options; 175 const Triple TargetTriple; 176 IRBuilder<> IRB; 177 }; 178 179 bool SanitizerBinaryMetadata::run() { 180 MetadataInfoSet MIS; 181 182 for (Function &F : Mod) 183 runOn(F, MIS); 184 185 if (MIS.empty()) 186 return false; 187 188 // 189 // Setup constructors and call all initialization functions for requested 190 // metadata features. 191 // 192 193 auto *Int8PtrTy = IRB.getInt8PtrTy(); 194 auto *Int8PtrPtrTy = PointerType::getUnqual(Int8PtrTy); 195 auto *Int32Ty = IRB.getInt32Ty(); 196 const std::array<Type *, 3> InitTypes = {Int32Ty, Int8PtrPtrTy, Int8PtrPtrTy}; 197 auto *Version = ConstantInt::get(Int32Ty, getVersion()); 198 199 for (const MetadataInfo *MI : MIS) { 200 const std::array<Value *, InitTypes.size()> InitArgs = { 201 Version, 202 getSectionMarker(getSectionStart(MI->SectionSuffix), Int8PtrTy), 203 getSectionMarker(getSectionEnd(MI->SectionSuffix), Int8PtrTy), 204 }; 205 // We declare the _add and _del functions as weak, and only call them if 206 // there is a valid symbol linked. This allows building binaries with 207 // semantic metadata, but without having callbacks. When a tool that wants 208 // the metadata is linked which provides the callbacks, they will be called. 209 Function *Ctor = 210 createSanitizerCtorAndInitFunctions( 211 Mod, (MI->FunctionPrefix + ".module_ctor").str(), 212 (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs, 213 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 214 .first; 215 Function *Dtor = 216 createSanitizerCtorAndInitFunctions( 217 Mod, (MI->FunctionPrefix + ".module_dtor").str(), 218 (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs, 219 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks) 220 .first; 221 Constant *CtorData = nullptr; 222 Constant *DtorData = nullptr; 223 if (TargetTriple.supportsCOMDAT()) { 224 // Use COMDAT to deduplicate constructor/destructor function. 225 Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName())); 226 Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName())); 227 CtorData = Ctor; 228 DtorData = Dtor; 229 } 230 appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorData); 231 appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorData); 232 } 233 234 return true; 235 } 236 237 void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) { 238 if (F.empty()) 239 return; 240 if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) 241 return; 242 // Don't touch available_externally functions, their actual body is elsewhere. 243 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 244 return; 245 246 MDBuilder MDB(F.getContext()); 247 248 // The metadata features enabled for this function, stored along covered 249 // metadata (if enabled). 250 uint32_t FeatureMask = getEnabledPerInstructionFeature(); 251 // Don't emit unnecessary covered metadata for all functions to save space. 252 bool RequiresCovered = false; 253 // We can only understand if we need to set UAR feature after looking 254 // at the instructions. So we need to check instructions even if FeatureMask 255 // is empty. 256 if (FeatureMask || Options.UAR) { 257 for (BasicBlock &BB : F) 258 for (Instruction &I : BB) 259 RequiresCovered |= runOn(I, MIS, MDB, FeatureMask); 260 } 261 262 if (F.isVarArg()) 263 FeatureMask &= ~kSanitizerBinaryMetadataUAR; 264 if (FeatureMask & kSanitizerBinaryMetadataUAR) { 265 RequiresCovered = true; 266 NumMetadataUAR++; 267 } 268 269 // Covered metadata is always emitted if explicitly requested, otherwise only 270 // if some other metadata requires it to unambiguously interpret it for 271 // modules compiled with SanitizerBinaryMetadata. 272 if (Options.Covered || (FeatureMask && RequiresCovered)) { 273 NumMetadataCovered++; 274 const auto *MI = &MetadataInfo::Covered; 275 MIS.insert(MI); 276 const StringRef Section = getSectionName(MI->SectionSuffix); 277 // The feature mask will be placed after the size (32 bit) of the function, 278 // so in total one covered entry will use `sizeof(void*) + 4 + 4`. 279 Constant *CFM = IRB.getInt32(FeatureMask); 280 F.setMetadata(LLVMContext::MD_pcsections, 281 MDB.createPCSections({{Section, {CFM}}})); 282 } 283 } 284 285 bool isUARSafeCall(CallInst *CI) { 286 auto *F = CI->getCalledFunction(); 287 // There are no intrinsic functions that leak arguments. 288 // If the called function does not return, the current function 289 // does not return as well, so no possibility of use-after-return. 290 // Sanitizer function also don't leak or don't return. 291 // It's safe to both pass pointers to local variables to them 292 // and to tail-call them. 293 return F && (F->isIntrinsic() || F->doesNotReturn() || 294 F->getName().startswith("__asan_") || 295 F->getName().startswith("__hwsan_") || 296 F->getName().startswith("__ubsan_") || 297 F->getName().startswith("__msan_") || 298 F->getName().startswith("__tsan_")); 299 } 300 301 bool hasUseAfterReturnUnsafeUses(Value &V) { 302 for (User *U : V.users()) { 303 if (auto *I = dyn_cast<Instruction>(U)) { 304 if (I->isLifetimeStartOrEnd() || I->isDroppable()) 305 continue; 306 if (auto *CI = dyn_cast<CallInst>(U)) { 307 if (isUARSafeCall(CI)) 308 continue; 309 } 310 if (isa<LoadInst>(U)) 311 continue; 312 if (auto *SI = dyn_cast<StoreInst>(U)) { 313 // If storing TO the alloca, then the address isn't taken. 314 if (SI->getOperand(1) == &V) 315 continue; 316 } 317 if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) { 318 if (!hasUseAfterReturnUnsafeUses(*GEPI)) 319 continue; 320 } else if (auto *BCI = dyn_cast<BitCastInst>(U)) { 321 if (!hasUseAfterReturnUnsafeUses(*BCI)) 322 continue; 323 } 324 } 325 return true; 326 } 327 return false; 328 } 329 330 bool useAfterReturnUnsafe(Instruction &I) { 331 if (isa<AllocaInst>(I)) 332 return hasUseAfterReturnUnsafeUses(I); 333 // Tail-called functions are not necessary intercepted 334 // at runtime because there is no call instruction. 335 // So conservatively mark the caller as requiring checking. 336 else if (auto *CI = dyn_cast<CallInst>(&I)) 337 return CI->isTailCall() && !isUARSafeCall(CI); 338 return false; 339 } 340 341 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS, 342 MDBuilder &MDB, uint32_t &FeatureMask) { 343 SmallVector<const MetadataInfo *, 1> InstMetadata; 344 bool RequiresCovered = false; 345 346 if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) { 347 if (useAfterReturnUnsafe(I)) 348 FeatureMask |= kSanitizerBinaryMetadataUAR; 349 } 350 351 if (Options.Atomics && I.mayReadOrWriteMemory()) { 352 auto SSID = getAtomicSyncScopeID(&I); 353 if (SSID.has_value() && *SSID != SyncScope::SingleThread) { 354 NumMetadataAtomics++; 355 InstMetadata.push_back(&MetadataInfo::Atomics); 356 } 357 RequiresCovered = true; 358 } 359 360 // Attach MD_pcsections to instruction. 361 if (!InstMetadata.empty()) { 362 MIS.insert(InstMetadata.begin(), InstMetadata.end()); 363 SmallVector<MDBuilder::PCSection, 1> Sections; 364 for (const auto &MI : InstMetadata) 365 Sections.push_back({getSectionName(MI->SectionSuffix), {}}); 366 I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections)); 367 } 368 369 return RequiresCovered; 370 } 371 372 GlobalVariable * 373 SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) { 374 // Use ExternalWeak so that if all sections are discarded due to section 375 // garbage collection, the linker will not report undefined symbol errors. 376 auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false, 377 GlobalVariable::ExternalWeakLinkage, 378 /*Initializer=*/nullptr, MarkerName); 379 Marker->setVisibility(GlobalValue::HiddenVisibility); 380 return Marker; 381 } 382 383 StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) { 384 // FIXME: Other TargetTriple (req. string pool) 385 return SectionSuffix; 386 } 387 388 Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) { 389 return "__start_" + SectionSuffix; 390 } 391 392 Twine SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) { 393 return "__stop_" + SectionSuffix; 394 } 395 396 } // namespace 397 398 SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass( 399 SanitizerBinaryMetadataOptions Opts) 400 : Options(std::move(Opts)) {} 401 402 PreservedAnalyses 403 SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) { 404 SanitizerBinaryMetadata Pass(M, Options); 405 if (Pass.run()) 406 return PreservedAnalyses::none(); 407 return PreservedAnalyses::all(); 408 } 409