1 //===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file is a part of DataFlowSanitizer, a generalised dynamic data flow 11 /// analysis. 12 /// 13 /// Unlike other Sanitizer tools, this tool is not designed to detect a specific 14 /// class of bugs on its own. Instead, it provides a generic dynamic data flow 15 /// analysis framework to be used by clients to help detect application-specific 16 /// issues within their own code. 17 /// 18 /// The analysis is based on automatic propagation of data flow labels (also 19 /// known as taint labels) through a program as it performs computation. Each 20 /// byte of application memory is backed by two bytes of shadow memory which 21 /// hold the label. On Linux/x86_64, memory is laid out as follows: 22 /// 23 /// +--------------------+ 0x800000000000 (top of memory) 24 /// | application memory | 25 /// +--------------------+ 0x700000008000 (kAppAddr) 26 /// | | 27 /// | unused | 28 /// | | 29 /// +--------------------+ 0x200200000000 (kUnusedAddr) 30 /// | union table | 31 /// +--------------------+ 0x200000000000 (kUnionTableAddr) 32 /// | shadow memory | 33 /// +--------------------+ 0x000000010000 (kShadowAddr) 34 /// | reserved by kernel | 35 /// +--------------------+ 0x000000000000 36 /// 37 /// To derive a shadow memory address from an application memory address, 38 /// bits 44-46 are cleared to bring the address into the range 39 /// [0x000000008000,0x100000000000). Then the address is shifted left by 1 to 40 /// account for the double byte representation of shadow labels and move the 41 /// address into the shadow memory range. See the function 42 /// DataFlowSanitizer::getShadowAddress below. 43 /// 44 /// For more information, please refer to the design document: 45 /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html 46 // 47 //===----------------------------------------------------------------------===// 48 49 #include "llvm/ADT/DenseMap.h" 50 #include "llvm/ADT/DenseSet.h" 51 #include "llvm/ADT/DepthFirstIterator.h" 52 #include "llvm/ADT/None.h" 53 #include "llvm/ADT/SmallPtrSet.h" 54 #include "llvm/ADT/SmallVector.h" 55 #include "llvm/ADT/StringExtras.h" 56 #include "llvm/ADT/StringRef.h" 57 #include "llvm/ADT/Triple.h" 58 #include "llvm/Transforms/Utils/Local.h" 59 #include "llvm/Analysis/ValueTracking.h" 60 #include "llvm/IR/Argument.h" 61 #include "llvm/IR/Attributes.h" 62 #include "llvm/IR/BasicBlock.h" 63 #include "llvm/IR/CallSite.h" 64 #include "llvm/IR/Constant.h" 65 #include "llvm/IR/Constants.h" 66 #include "llvm/IR/DataLayout.h" 67 #include "llvm/IR/DerivedTypes.h" 68 #include "llvm/IR/Dominators.h" 69 #include "llvm/IR/Function.h" 70 #include "llvm/IR/GlobalAlias.h" 71 #include "llvm/IR/GlobalValue.h" 72 #include "llvm/IR/GlobalVariable.h" 73 #include "llvm/IR/IRBuilder.h" 74 #include "llvm/IR/InlineAsm.h" 75 #include "llvm/IR/InstVisitor.h" 76 #include "llvm/IR/InstrTypes.h" 77 #include "llvm/IR/Instruction.h" 78 #include "llvm/IR/Instructions.h" 79 #include "llvm/IR/IntrinsicInst.h" 80 #include "llvm/IR/LLVMContext.h" 81 #include "llvm/IR/MDBuilder.h" 82 #include "llvm/IR/Module.h" 83 #include "llvm/IR/Type.h" 84 #include "llvm/IR/User.h" 85 #include "llvm/IR/Value.h" 86 #include "llvm/Pass.h" 87 #include "llvm/Support/Casting.h" 88 #include "llvm/Support/CommandLine.h" 89 #include "llvm/Support/ErrorHandling.h" 90 #include "llvm/Support/SpecialCaseList.h" 91 #include "llvm/Transforms/Instrumentation.h" 92 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 93 #include <algorithm> 94 #include <cassert> 95 #include <cstddef> 96 #include <cstdint> 97 #include <iterator> 98 #include <memory> 99 #include <set> 100 #include <string> 101 #include <utility> 102 #include <vector> 103 104 using namespace llvm; 105 106 // External symbol to be used when generating the shadow address for 107 // architectures with multiple VMAs. Instead of using a constant integer 108 // the runtime will set the external mask based on the VMA range. 109 static const char *const kDFSanExternShadowPtrMask = "__dfsan_shadow_ptr_mask"; 110 111 // The -dfsan-preserve-alignment flag controls whether this pass assumes that 112 // alignment requirements provided by the input IR are correct. For example, 113 // if the input IR contains a load with alignment 8, this flag will cause 114 // the shadow load to have alignment 16. This flag is disabled by default as 115 // we have unfortunately encountered too much code (including Clang itself; 116 // see PR14291) which performs misaligned access. 117 static cl::opt<bool> ClPreserveAlignment( 118 "dfsan-preserve-alignment", 119 cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, 120 cl::init(false)); 121 122 // The ABI list files control how shadow parameters are passed. The pass treats 123 // every function labelled "uninstrumented" in the ABI list file as conforming 124 // to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains 125 // additional annotations for those functions, a call to one of those functions 126 // will produce a warning message, as the labelling behaviour of the function is 127 // unknown. The other supported annotations are "functional" and "discard", 128 // which are described below under DataFlowSanitizer::WrapperKind. 129 static cl::list<std::string> ClABIListFiles( 130 "dfsan-abilist", 131 cl::desc("File listing native ABI functions and how the pass treats them"), 132 cl::Hidden); 133 134 // Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented 135 // functions (see DataFlowSanitizer::InstrumentedABI below). 136 static cl::opt<bool> ClArgsABI( 137 "dfsan-args-abi", 138 cl::desc("Use the argument ABI rather than the TLS ABI"), 139 cl::Hidden); 140 141 // Controls whether the pass includes or ignores the labels of pointers in load 142 // instructions. 143 static cl::opt<bool> ClCombinePointerLabelsOnLoad( 144 "dfsan-combine-pointer-labels-on-load", 145 cl::desc("Combine the label of the pointer with the label of the data when " 146 "loading from memory."), 147 cl::Hidden, cl::init(true)); 148 149 // Controls whether the pass includes or ignores the labels of pointers in 150 // stores instructions. 151 static cl::opt<bool> ClCombinePointerLabelsOnStore( 152 "dfsan-combine-pointer-labels-on-store", 153 cl::desc("Combine the label of the pointer with the label of the data when " 154 "storing in memory."), 155 cl::Hidden, cl::init(false)); 156 157 static cl::opt<bool> ClDebugNonzeroLabels( 158 "dfsan-debug-nonzero-labels", 159 cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " 160 "load or return with a nonzero label"), 161 cl::Hidden); 162 163 static StringRef GetGlobalTypeString(const GlobalValue &G) { 164 // Types of GlobalVariables are always pointer types. 165 Type *GType = G.getValueType(); 166 // For now we support blacklisting struct types only. 167 if (StructType *SGType = dyn_cast<StructType>(GType)) { 168 if (!SGType->isLiteral()) 169 return SGType->getName(); 170 } 171 return "<unknown type>"; 172 } 173 174 namespace { 175 176 class DFSanABIList { 177 std::unique_ptr<SpecialCaseList> SCL; 178 179 public: 180 DFSanABIList() = default; 181 182 void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); } 183 184 /// Returns whether either this function or its source file are listed in the 185 /// given category. 186 bool isIn(const Function &F, StringRef Category) const { 187 return isIn(*F.getParent(), Category) || 188 SCL->inSection("dataflow", "fun", F.getName(), Category); 189 } 190 191 /// Returns whether this global alias is listed in the given category. 192 /// 193 /// If GA aliases a function, the alias's name is matched as a function name 194 /// would be. Similarly, aliases of globals are matched like globals. 195 bool isIn(const GlobalAlias &GA, StringRef Category) const { 196 if (isIn(*GA.getParent(), Category)) 197 return true; 198 199 if (isa<FunctionType>(GA.getValueType())) 200 return SCL->inSection("dataflow", "fun", GA.getName(), Category); 201 202 return SCL->inSection("dataflow", "global", GA.getName(), Category) || 203 SCL->inSection("dataflow", "type", GetGlobalTypeString(GA), 204 Category); 205 } 206 207 /// Returns whether this module is listed in the given category. 208 bool isIn(const Module &M, StringRef Category) const { 209 return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category); 210 } 211 }; 212 213 /// TransformedFunction is used to express the result of transforming one 214 /// function type into another. This struct is immutable. It holds metadata 215 /// useful for updating calls of the old function to the new type. 216 struct TransformedFunction { 217 TransformedFunction(FunctionType* OriginalType, 218 FunctionType* TransformedType, 219 std::vector<unsigned> ArgumentIndexMapping) 220 : OriginalType(OriginalType), 221 TransformedType(TransformedType), 222 ArgumentIndexMapping(ArgumentIndexMapping) {} 223 224 // Disallow copies. 225 TransformedFunction(const TransformedFunction&) = delete; 226 TransformedFunction& operator=(const TransformedFunction&) = delete; 227 228 // Allow moves. 229 TransformedFunction(TransformedFunction&&) = default; 230 TransformedFunction& operator=(TransformedFunction&&) = default; 231 232 /// Type of the function before the transformation. 233 FunctionType *OriginalType; 234 235 /// Type of the function after the transformation. 236 FunctionType *TransformedType; 237 238 /// Transforming a function may change the position of arguments. This 239 /// member records the mapping from each argument's old position to its new 240 /// position. Argument positions are zero-indexed. If the transformation 241 /// from F to F' made the first argument of F into the third argument of F', 242 /// then ArgumentIndexMapping[0] will equal 2. 243 std::vector<unsigned> ArgumentIndexMapping; 244 }; 245 246 /// Given function attributes from a call site for the original function, 247 /// return function attributes appropriate for a call to the transformed 248 /// function. 249 AttributeList TransformFunctionAttributes( 250 const TransformedFunction& TransformedFunction, 251 LLVMContext& Ctx, AttributeList CallSiteAttrs) { 252 253 // Construct a vector of AttributeSet for each function argument. 254 std::vector<llvm::AttributeSet> ArgumentAttributes( 255 TransformedFunction.TransformedType->getNumParams()); 256 257 // Copy attributes from the parameter of the original function to the 258 // transformed version. 'ArgumentIndexMapping' holds the mapping from 259 // old argument position to new. 260 for (unsigned i=0, ie = TransformedFunction.ArgumentIndexMapping.size(); 261 i < ie; ++i) { 262 unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[i]; 263 ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttributes(i); 264 } 265 266 // Copy annotations on varargs arguments. 267 for (unsigned i = TransformedFunction.OriginalType->getNumParams(), 268 ie = CallSiteAttrs.getNumAttrSets(); i<ie; ++i) { 269 ArgumentAttributes.push_back(CallSiteAttrs.getParamAttributes(i)); 270 } 271 272 return AttributeList::get( 273 Ctx, 274 CallSiteAttrs.getFnAttributes(), 275 CallSiteAttrs.getRetAttributes(), 276 llvm::makeArrayRef(ArgumentAttributes)); 277 } 278 279 class DataFlowSanitizer : public ModulePass { 280 friend struct DFSanFunction; 281 friend class DFSanVisitor; 282 283 enum { 284 ShadowWidth = 16 285 }; 286 287 /// Which ABI should be used for instrumented functions? 288 enum InstrumentedABI { 289 /// Argument and return value labels are passed through additional 290 /// arguments and by modifying the return type. 291 IA_Args, 292 293 /// Argument and return value labels are passed through TLS variables 294 /// __dfsan_arg_tls and __dfsan_retval_tls. 295 IA_TLS 296 }; 297 298 /// How should calls to uninstrumented functions be handled? 299 enum WrapperKind { 300 /// This function is present in an uninstrumented form but we don't know 301 /// how it should be handled. Print a warning and call the function anyway. 302 /// Don't label the return value. 303 WK_Warning, 304 305 /// This function does not write to (user-accessible) memory, and its return 306 /// value is unlabelled. 307 WK_Discard, 308 309 /// This function does not write to (user-accessible) memory, and the label 310 /// of its return value is the union of the label of its arguments. 311 WK_Functional, 312 313 /// Instead of calling the function, a custom wrapper __dfsw_F is called, 314 /// where F is the name of the function. This function may wrap the 315 /// original function or provide its own implementation. This is similar to 316 /// the IA_Args ABI, except that IA_Args uses a struct return type to 317 /// pass the return value shadow in a register, while WK_Custom uses an 318 /// extra pointer argument to return the shadow. This allows the wrapped 319 /// form of the function type to be expressed in C. 320 WK_Custom 321 }; 322 323 Module *Mod; 324 LLVMContext *Ctx; 325 IntegerType *ShadowTy; 326 PointerType *ShadowPtrTy; 327 IntegerType *IntptrTy; 328 ConstantInt *ZeroShadow; 329 ConstantInt *ShadowPtrMask; 330 ConstantInt *ShadowPtrMul; 331 Constant *ArgTLS; 332 Constant *RetvalTLS; 333 void *(*GetArgTLSPtr)(); 334 void *(*GetRetvalTLSPtr)(); 335 FunctionType *GetArgTLSTy; 336 FunctionType *GetRetvalTLSTy; 337 Constant *GetArgTLS; 338 Constant *GetRetvalTLS; 339 Constant *ExternalShadowMask; 340 FunctionType *DFSanUnionFnTy; 341 FunctionType *DFSanUnionLoadFnTy; 342 FunctionType *DFSanUnimplementedFnTy; 343 FunctionType *DFSanSetLabelFnTy; 344 FunctionType *DFSanNonzeroLabelFnTy; 345 FunctionType *DFSanVarargWrapperFnTy; 346 FunctionCallee DFSanUnionFn; 347 FunctionCallee DFSanCheckedUnionFn; 348 FunctionCallee DFSanUnionLoadFn; 349 FunctionCallee DFSanUnimplementedFn; 350 FunctionCallee DFSanSetLabelFn; 351 FunctionCallee DFSanNonzeroLabelFn; 352 FunctionCallee DFSanVarargWrapperFn; 353 MDNode *ColdCallWeights; 354 DFSanABIList ABIList; 355 DenseMap<Value *, Function *> UnwrappedFnMap; 356 AttrBuilder ReadOnlyNoneAttrs; 357 bool DFSanRuntimeShadowMask = false; 358 359 Value *getShadowAddress(Value *Addr, Instruction *Pos); 360 bool isInstrumented(const Function *F); 361 bool isInstrumented(const GlobalAlias *GA); 362 FunctionType *getArgsFunctionType(FunctionType *T); 363 FunctionType *getTrampolineFunctionType(FunctionType *T); 364 TransformedFunction getCustomFunctionType(FunctionType *T); 365 InstrumentedABI getInstrumentedABI(); 366 WrapperKind getWrapperKind(Function *F); 367 void addGlobalNamePrefix(GlobalValue *GV); 368 Function *buildWrapperFunction(Function *F, StringRef NewFName, 369 GlobalValue::LinkageTypes NewFLink, 370 FunctionType *NewFT); 371 Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName); 372 373 public: 374 static char ID; 375 376 DataFlowSanitizer( 377 const std::vector<std::string> &ABIListFiles = std::vector<std::string>(), 378 void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr); 379 380 bool doInitialization(Module &M) override; 381 bool runOnModule(Module &M) override; 382 }; 383 384 struct DFSanFunction { 385 DataFlowSanitizer &DFS; 386 Function *F; 387 DominatorTree DT; 388 DataFlowSanitizer::InstrumentedABI IA; 389 bool IsNativeABI; 390 Value *ArgTLSPtr = nullptr; 391 Value *RetvalTLSPtr = nullptr; 392 AllocaInst *LabelReturnAlloca = nullptr; 393 DenseMap<Value *, Value *> ValShadowMap; 394 DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap; 395 std::vector<std::pair<PHINode *, PHINode *>> PHIFixups; 396 DenseSet<Instruction *> SkipInsts; 397 std::vector<Value *> NonZeroChecks; 398 bool AvoidNewBlocks; 399 400 struct CachedCombinedShadow { 401 BasicBlock *Block; 402 Value *Shadow; 403 }; 404 DenseMap<std::pair<Value *, Value *>, CachedCombinedShadow> 405 CachedCombinedShadows; 406 DenseMap<Value *, std::set<Value *>> ShadowElements; 407 408 DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI) 409 : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) { 410 DT.recalculate(*F); 411 // FIXME: Need to track down the register allocator issue which causes poor 412 // performance in pathological cases with large numbers of basic blocks. 413 AvoidNewBlocks = F->size() > 1000; 414 } 415 416 Value *getArgTLSPtr(); 417 Value *getArgTLS(unsigned Index, Instruction *Pos); 418 Value *getRetvalTLS(); 419 Value *getShadow(Value *V); 420 void setShadow(Instruction *I, Value *Shadow); 421 Value *combineShadows(Value *V1, Value *V2, Instruction *Pos); 422 Value *combineOperandShadows(Instruction *Inst); 423 Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align, 424 Instruction *Pos); 425 void storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *Shadow, 426 Instruction *Pos); 427 }; 428 429 class DFSanVisitor : public InstVisitor<DFSanVisitor> { 430 public: 431 DFSanFunction &DFSF; 432 433 DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {} 434 435 const DataLayout &getDataLayout() const { 436 return DFSF.F->getParent()->getDataLayout(); 437 } 438 439 void visitOperandShadowInst(Instruction &I); 440 void visitUnaryOperator(UnaryOperator &UO); 441 void visitBinaryOperator(BinaryOperator &BO); 442 void visitCastInst(CastInst &CI); 443 void visitCmpInst(CmpInst &CI); 444 void visitGetElementPtrInst(GetElementPtrInst &GEPI); 445 void visitLoadInst(LoadInst &LI); 446 void visitStoreInst(StoreInst &SI); 447 void visitReturnInst(ReturnInst &RI); 448 void visitCallSite(CallSite CS); 449 void visitPHINode(PHINode &PN); 450 void visitExtractElementInst(ExtractElementInst &I); 451 void visitInsertElementInst(InsertElementInst &I); 452 void visitShuffleVectorInst(ShuffleVectorInst &I); 453 void visitExtractValueInst(ExtractValueInst &I); 454 void visitInsertValueInst(InsertValueInst &I); 455 void visitAllocaInst(AllocaInst &I); 456 void visitSelectInst(SelectInst &I); 457 void visitMemSetInst(MemSetInst &I); 458 void visitMemTransferInst(MemTransferInst &I); 459 }; 460 461 } // end anonymous namespace 462 463 char DataFlowSanitizer::ID; 464 465 INITIALIZE_PASS(DataFlowSanitizer, "dfsan", 466 "DataFlowSanitizer: dynamic data flow analysis.", false, false) 467 468 ModulePass * 469 llvm::createDataFlowSanitizerPass(const std::vector<std::string> &ABIListFiles, 470 void *(*getArgTLS)(), 471 void *(*getRetValTLS)()) { 472 return new DataFlowSanitizer(ABIListFiles, getArgTLS, getRetValTLS); 473 } 474 475 DataFlowSanitizer::DataFlowSanitizer( 476 const std::vector<std::string> &ABIListFiles, void *(*getArgTLS)(), 477 void *(*getRetValTLS)()) 478 : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) { 479 std::vector<std::string> AllABIListFiles(std::move(ABIListFiles)); 480 AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(), 481 ClABIListFiles.end()); 482 ABIList.set(SpecialCaseList::createOrDie(AllABIListFiles)); 483 } 484 485 FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) { 486 SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end()); 487 ArgTypes.append(T->getNumParams(), ShadowTy); 488 if (T->isVarArg()) 489 ArgTypes.push_back(ShadowPtrTy); 490 Type *RetType = T->getReturnType(); 491 if (!RetType->isVoidTy()) 492 RetType = StructType::get(RetType, ShadowTy); 493 return FunctionType::get(RetType, ArgTypes, T->isVarArg()); 494 } 495 496 FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) { 497 assert(!T->isVarArg()); 498 SmallVector<Type *, 4> ArgTypes; 499 ArgTypes.push_back(T->getPointerTo()); 500 ArgTypes.append(T->param_begin(), T->param_end()); 501 ArgTypes.append(T->getNumParams(), ShadowTy); 502 Type *RetType = T->getReturnType(); 503 if (!RetType->isVoidTy()) 504 ArgTypes.push_back(ShadowPtrTy); 505 return FunctionType::get(T->getReturnType(), ArgTypes, false); 506 } 507 508 TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) { 509 SmallVector<Type *, 4> ArgTypes; 510 511 // Some parameters of the custom function being constructed are 512 // parameters of T. Record the mapping from parameters of T to 513 // parameters of the custom function, so that parameter attributes 514 // at call sites can be updated. 515 std::vector<unsigned> ArgumentIndexMapping; 516 for (unsigned i = 0, ie = T->getNumParams(); i != ie; ++i) { 517 Type* param_type = T->getParamType(i); 518 FunctionType *FT; 519 if (isa<PointerType>(param_type) && (FT = dyn_cast<FunctionType>( 520 cast<PointerType>(param_type)->getElementType()))) { 521 ArgumentIndexMapping.push_back(ArgTypes.size()); 522 ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo()); 523 ArgTypes.push_back(Type::getInt8PtrTy(*Ctx)); 524 } else { 525 ArgumentIndexMapping.push_back(ArgTypes.size()); 526 ArgTypes.push_back(param_type); 527 } 528 } 529 for (unsigned i = 0, e = T->getNumParams(); i != e; ++i) 530 ArgTypes.push_back(ShadowTy); 531 if (T->isVarArg()) 532 ArgTypes.push_back(ShadowPtrTy); 533 Type *RetType = T->getReturnType(); 534 if (!RetType->isVoidTy()) 535 ArgTypes.push_back(ShadowPtrTy); 536 return TransformedFunction( 537 T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()), 538 ArgumentIndexMapping); 539 } 540 541 bool DataFlowSanitizer::doInitialization(Module &M) { 542 Triple TargetTriple(M.getTargetTriple()); 543 bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64; 544 bool IsMIPS64 = TargetTriple.isMIPS64(); 545 bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64 || 546 TargetTriple.getArch() == Triple::aarch64_be; 547 548 const DataLayout &DL = M.getDataLayout(); 549 550 Mod = &M; 551 Ctx = &M.getContext(); 552 ShadowTy = IntegerType::get(*Ctx, ShadowWidth); 553 ShadowPtrTy = PointerType::getUnqual(ShadowTy); 554 IntptrTy = DL.getIntPtrType(*Ctx); 555 ZeroShadow = ConstantInt::getSigned(ShadowTy, 0); 556 ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8); 557 if (IsX86_64) 558 ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL); 559 else if (IsMIPS64) 560 ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0xF000000000LL); 561 // AArch64 supports multiple VMAs and the shadow mask is set at runtime. 562 else if (IsAArch64) 563 DFSanRuntimeShadowMask = true; 564 else 565 report_fatal_error("unsupported triple"); 566 567 Type *DFSanUnionArgs[2] = { ShadowTy, ShadowTy }; 568 DFSanUnionFnTy = 569 FunctionType::get(ShadowTy, DFSanUnionArgs, /*isVarArg=*/ false); 570 Type *DFSanUnionLoadArgs[2] = { ShadowPtrTy, IntptrTy }; 571 DFSanUnionLoadFnTy = 572 FunctionType::get(ShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/ false); 573 DFSanUnimplementedFnTy = FunctionType::get( 574 Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); 575 Type *DFSanSetLabelArgs[3] = { ShadowTy, Type::getInt8PtrTy(*Ctx), IntptrTy }; 576 DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), 577 DFSanSetLabelArgs, /*isVarArg=*/false); 578 DFSanNonzeroLabelFnTy = FunctionType::get( 579 Type::getVoidTy(*Ctx), None, /*isVarArg=*/false); 580 DFSanVarargWrapperFnTy = FunctionType::get( 581 Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); 582 583 if (GetArgTLSPtr) { 584 Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); 585 ArgTLS = nullptr; 586 GetArgTLSTy = FunctionType::get(PointerType::getUnqual(ArgTLSTy), false); 587 GetArgTLS = ConstantExpr::getIntToPtr( 588 ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)), 589 PointerType::getUnqual(GetArgTLSTy)); 590 } 591 if (GetRetvalTLSPtr) { 592 RetvalTLS = nullptr; 593 GetRetvalTLSTy = FunctionType::get(PointerType::getUnqual(ShadowTy), false); 594 GetRetvalTLS = ConstantExpr::getIntToPtr( 595 ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)), 596 PointerType::getUnqual(GetRetvalTLSTy)); 597 } 598 599 ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000); 600 return true; 601 } 602 603 bool DataFlowSanitizer::isInstrumented(const Function *F) { 604 return !ABIList.isIn(*F, "uninstrumented"); 605 } 606 607 bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) { 608 return !ABIList.isIn(*GA, "uninstrumented"); 609 } 610 611 DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() { 612 return ClArgsABI ? IA_Args : IA_TLS; 613 } 614 615 DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) { 616 if (ABIList.isIn(*F, "functional")) 617 return WK_Functional; 618 if (ABIList.isIn(*F, "discard")) 619 return WK_Discard; 620 if (ABIList.isIn(*F, "custom")) 621 return WK_Custom; 622 623 return WK_Warning; 624 } 625 626 void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) { 627 std::string GVName = GV->getName(), Prefix = "dfs$"; 628 GV->setName(Prefix + GVName); 629 630 // Try to change the name of the function in module inline asm. We only do 631 // this for specific asm directives, currently only ".symver", to try to avoid 632 // corrupting asm which happens to contain the symbol name as a substring. 633 // Note that the substitution for .symver assumes that the versioned symbol 634 // also has an instrumented name. 635 std::string Asm = GV->getParent()->getModuleInlineAsm(); 636 std::string SearchStr = ".symver " + GVName + ","; 637 size_t Pos = Asm.find(SearchStr); 638 if (Pos != std::string::npos) { 639 Asm.replace(Pos, SearchStr.size(), 640 ".symver " + Prefix + GVName + "," + Prefix); 641 GV->getParent()->setModuleInlineAsm(Asm); 642 } 643 } 644 645 Function * 646 DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, 647 GlobalValue::LinkageTypes NewFLink, 648 FunctionType *NewFT) { 649 FunctionType *FT = F->getFunctionType(); 650 Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(), 651 NewFName, F->getParent()); 652 NewF->copyAttributesFrom(F); 653 NewF->removeAttributes( 654 AttributeList::ReturnIndex, 655 AttributeFuncs::typeIncompatible(NewFT->getReturnType())); 656 657 BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF); 658 if (F->isVarArg()) { 659 NewF->removeAttributes(AttributeList::FunctionIndex, 660 AttrBuilder().addAttribute("split-stack")); 661 CallInst::Create(DFSanVarargWrapperFn, 662 IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "", 663 BB); 664 new UnreachableInst(*Ctx, BB); 665 } else { 666 std::vector<Value *> Args; 667 unsigned n = FT->getNumParams(); 668 for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n) 669 Args.push_back(&*ai); 670 CallInst *CI = CallInst::Create(F, Args, "", BB); 671 if (FT->getReturnType()->isVoidTy()) 672 ReturnInst::Create(*Ctx, BB); 673 else 674 ReturnInst::Create(*Ctx, CI, BB); 675 } 676 677 return NewF; 678 } 679 680 Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, 681 StringRef FName) { 682 FunctionType *FTT = getTrampolineFunctionType(FT); 683 FunctionCallee C = Mod->getOrInsertFunction(FName, FTT); 684 Function *F = dyn_cast<Function>(C.getCallee()); 685 if (F && F->isDeclaration()) { 686 F->setLinkage(GlobalValue::LinkOnceODRLinkage); 687 BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); 688 std::vector<Value *> Args; 689 Function::arg_iterator AI = F->arg_begin(); ++AI; 690 for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N) 691 Args.push_back(&*AI); 692 CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB); 693 ReturnInst *RI; 694 if (FT->getReturnType()->isVoidTy()) 695 RI = ReturnInst::Create(*Ctx, BB); 696 else 697 RI = ReturnInst::Create(*Ctx, CI, BB); 698 699 DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true); 700 Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI; 701 for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) 702 DFSF.ValShadowMap[&*ValAI] = &*ShadowAI; 703 DFSanVisitor(DFSF).visitCallInst(*CI); 704 if (!FT->getReturnType()->isVoidTy()) 705 new StoreInst(DFSF.getShadow(RI->getReturnValue()), 706 &*std::prev(F->arg_end()), RI); 707 } 708 709 return cast<Constant>(C.getCallee()); 710 } 711 712 bool DataFlowSanitizer::runOnModule(Module &M) { 713 if (ABIList.isIn(M, "skip")) 714 return false; 715 716 if (!GetArgTLSPtr) { 717 Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); 718 ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy); 719 if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS)) 720 G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); 721 } 722 if (!GetRetvalTLSPtr) { 723 RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy); 724 if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS)) 725 G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); 726 } 727 728 ExternalShadowMask = 729 Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy); 730 731 { 732 AttributeList AL; 733 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 734 Attribute::NoUnwind); 735 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 736 Attribute::ReadNone); 737 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex, 738 Attribute::ZExt); 739 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); 740 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt); 741 DFSanUnionFn = 742 Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy, AL); 743 } 744 745 { 746 AttributeList AL; 747 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 748 Attribute::NoUnwind); 749 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 750 Attribute::ReadNone); 751 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex, 752 Attribute::ZExt); 753 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); 754 AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt); 755 DFSanCheckedUnionFn = 756 Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy, AL); 757 } 758 { 759 AttributeList AL; 760 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 761 Attribute::NoUnwind); 762 AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex, 763 Attribute::ReadOnly); 764 AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex, 765 Attribute::ZExt); 766 DFSanUnionLoadFn = 767 Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL); 768 } 769 DFSanUnimplementedFn = 770 Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy); 771 { 772 AttributeList AL; 773 AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); 774 DFSanSetLabelFn = 775 Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL); 776 } 777 DFSanNonzeroLabelFn = 778 Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy); 779 DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper", 780 DFSanVarargWrapperFnTy); 781 782 std::vector<Function *> FnsToInstrument; 783 SmallPtrSet<Function *, 2> FnsWithNativeABI; 784 for (Function &i : M) { 785 if (!i.isIntrinsic() && 786 &i != DFSanUnionFn.getCallee()->stripPointerCasts() && 787 &i != DFSanCheckedUnionFn.getCallee()->stripPointerCasts() && 788 &i != DFSanUnionLoadFn.getCallee()->stripPointerCasts() && 789 &i != DFSanUnimplementedFn.getCallee()->stripPointerCasts() && 790 &i != DFSanSetLabelFn.getCallee()->stripPointerCasts() && 791 &i != DFSanNonzeroLabelFn.getCallee()->stripPointerCasts() && 792 &i != DFSanVarargWrapperFn.getCallee()->stripPointerCasts()) 793 FnsToInstrument.push_back(&i); 794 } 795 796 // Give function aliases prefixes when necessary, and build wrappers where the 797 // instrumentedness is inconsistent. 798 for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) { 799 GlobalAlias *GA = &*i; 800 ++i; 801 // Don't stop on weak. We assume people aren't playing games with the 802 // instrumentedness of overridden weak aliases. 803 if (auto F = dyn_cast<Function>(GA->getBaseObject())) { 804 bool GAInst = isInstrumented(GA), FInst = isInstrumented(F); 805 if (GAInst && FInst) { 806 addGlobalNamePrefix(GA); 807 } else if (GAInst != FInst) { 808 // Non-instrumented alias of an instrumented function, or vice versa. 809 // Replace the alias with a native-ABI wrapper of the aliasee. The pass 810 // below will take care of instrumenting it. 811 Function *NewF = 812 buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType()); 813 GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType())); 814 NewF->takeName(GA); 815 GA->eraseFromParent(); 816 FnsToInstrument.push_back(NewF); 817 } 818 } 819 } 820 821 ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly) 822 .addAttribute(Attribute::ReadNone); 823 824 // First, change the ABI of every function in the module. ABI-listed 825 // functions keep their original ABI and get a wrapper function. 826 for (std::vector<Function *>::iterator i = FnsToInstrument.begin(), 827 e = FnsToInstrument.end(); 828 i != e; ++i) { 829 Function &F = **i; 830 FunctionType *FT = F.getFunctionType(); 831 832 bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() && 833 FT->getReturnType()->isVoidTy()); 834 835 if (isInstrumented(&F)) { 836 // Instrumented functions get a 'dfs$' prefix. This allows us to more 837 // easily identify cases of mismatching ABIs. 838 if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) { 839 FunctionType *NewFT = getArgsFunctionType(FT); 840 Function *NewF = Function::Create(NewFT, F.getLinkage(), 841 F.getAddressSpace(), "", &M); 842 NewF->copyAttributesFrom(&F); 843 NewF->removeAttributes( 844 AttributeList::ReturnIndex, 845 AttributeFuncs::typeIncompatible(NewFT->getReturnType())); 846 for (Function::arg_iterator FArg = F.arg_begin(), 847 NewFArg = NewF->arg_begin(), 848 FArgEnd = F.arg_end(); 849 FArg != FArgEnd; ++FArg, ++NewFArg) { 850 FArg->replaceAllUsesWith(&*NewFArg); 851 } 852 NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList()); 853 854 for (Function::user_iterator UI = F.user_begin(), UE = F.user_end(); 855 UI != UE;) { 856 BlockAddress *BA = dyn_cast<BlockAddress>(*UI); 857 ++UI; 858 if (BA) { 859 BA->replaceAllUsesWith( 860 BlockAddress::get(NewF, BA->getBasicBlock())); 861 delete BA; 862 } 863 } 864 F.replaceAllUsesWith( 865 ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT))); 866 NewF->takeName(&F); 867 F.eraseFromParent(); 868 *i = NewF; 869 addGlobalNamePrefix(NewF); 870 } else { 871 addGlobalNamePrefix(&F); 872 } 873 } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) { 874 // Build a wrapper function for F. The wrapper simply calls F, and is 875 // added to FnsToInstrument so that any instrumentation according to its 876 // WrapperKind is done in the second pass below. 877 FunctionType *NewFT = getInstrumentedABI() == IA_Args 878 ? getArgsFunctionType(FT) 879 : FT; 880 881 // If the function being wrapped has local linkage, then preserve the 882 // function's linkage in the wrapper function. 883 GlobalValue::LinkageTypes wrapperLinkage = 884 F.hasLocalLinkage() 885 ? F.getLinkage() 886 : GlobalValue::LinkOnceODRLinkage; 887 888 Function *NewF = buildWrapperFunction( 889 &F, std::string("dfsw$") + std::string(F.getName()), 890 wrapperLinkage, NewFT); 891 if (getInstrumentedABI() == IA_TLS) 892 NewF->removeAttributes(AttributeList::FunctionIndex, ReadOnlyNoneAttrs); 893 894 Value *WrappedFnCst = 895 ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)); 896 F.replaceAllUsesWith(WrappedFnCst); 897 898 UnwrappedFnMap[WrappedFnCst] = &F; 899 *i = NewF; 900 901 if (!F.isDeclaration()) { 902 // This function is probably defining an interposition of an 903 // uninstrumented function and hence needs to keep the original ABI. 904 // But any functions it may call need to use the instrumented ABI, so 905 // we instrument it in a mode which preserves the original ABI. 906 FnsWithNativeABI.insert(&F); 907 908 // This code needs to rebuild the iterators, as they may be invalidated 909 // by the push_back, taking care that the new range does not include 910 // any functions added by this code. 911 size_t N = i - FnsToInstrument.begin(), 912 Count = e - FnsToInstrument.begin(); 913 FnsToInstrument.push_back(&F); 914 i = FnsToInstrument.begin() + N; 915 e = FnsToInstrument.begin() + Count; 916 } 917 // Hopefully, nobody will try to indirectly call a vararg 918 // function... yet. 919 } else if (FT->isVarArg()) { 920 UnwrappedFnMap[&F] = &F; 921 *i = nullptr; 922 } 923 } 924 925 for (Function *i : FnsToInstrument) { 926 if (!i || i->isDeclaration()) 927 continue; 928 929 removeUnreachableBlocks(*i); 930 931 DFSanFunction DFSF(*this, i, FnsWithNativeABI.count(i)); 932 933 // DFSanVisitor may create new basic blocks, which confuses df_iterator. 934 // Build a copy of the list before iterating over it. 935 SmallVector<BasicBlock *, 4> BBList(depth_first(&i->getEntryBlock())); 936 937 for (BasicBlock *i : BBList) { 938 Instruction *Inst = &i->front(); 939 while (true) { 940 // DFSanVisitor may split the current basic block, changing the current 941 // instruction's next pointer and moving the next instruction to the 942 // tail block from which we should continue. 943 Instruction *Next = Inst->getNextNode(); 944 // DFSanVisitor may delete Inst, so keep track of whether it was a 945 // terminator. 946 bool IsTerminator = Inst->isTerminator(); 947 if (!DFSF.SkipInsts.count(Inst)) 948 DFSanVisitor(DFSF).visit(Inst); 949 if (IsTerminator) 950 break; 951 Inst = Next; 952 } 953 } 954 955 // We will not necessarily be able to compute the shadow for every phi node 956 // until we have visited every block. Therefore, the code that handles phi 957 // nodes adds them to the PHIFixups list so that they can be properly 958 // handled here. 959 for (std::vector<std::pair<PHINode *, PHINode *>>::iterator 960 i = DFSF.PHIFixups.begin(), 961 e = DFSF.PHIFixups.end(); 962 i != e; ++i) { 963 for (unsigned val = 0, n = i->first->getNumIncomingValues(); val != n; 964 ++val) { 965 i->second->setIncomingValue( 966 val, DFSF.getShadow(i->first->getIncomingValue(val))); 967 } 968 } 969 970 // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy 971 // places (i.e. instructions in basic blocks we haven't even begun visiting 972 // yet). To make our life easier, do this work in a pass after the main 973 // instrumentation. 974 if (ClDebugNonzeroLabels) { 975 for (Value *V : DFSF.NonZeroChecks) { 976 Instruction *Pos; 977 if (Instruction *I = dyn_cast<Instruction>(V)) 978 Pos = I->getNextNode(); 979 else 980 Pos = &DFSF.F->getEntryBlock().front(); 981 while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos)) 982 Pos = Pos->getNextNode(); 983 IRBuilder<> IRB(Pos); 984 Value *Ne = IRB.CreateICmpNE(V, DFSF.DFS.ZeroShadow); 985 BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen( 986 Ne, Pos, /*Unreachable=*/false, ColdCallWeights)); 987 IRBuilder<> ThenIRB(BI); 988 ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {}); 989 } 990 } 991 } 992 993 return false; 994 } 995 996 Value *DFSanFunction::getArgTLSPtr() { 997 if (ArgTLSPtr) 998 return ArgTLSPtr; 999 if (DFS.ArgTLS) 1000 return ArgTLSPtr = DFS.ArgTLS; 1001 1002 IRBuilder<> IRB(&F->getEntryBlock().front()); 1003 return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLSTy, DFS.GetArgTLS, {}); 1004 } 1005 1006 Value *DFSanFunction::getRetvalTLS() { 1007 if (RetvalTLSPtr) 1008 return RetvalTLSPtr; 1009 if (DFS.RetvalTLS) 1010 return RetvalTLSPtr = DFS.RetvalTLS; 1011 1012 IRBuilder<> IRB(&F->getEntryBlock().front()); 1013 return RetvalTLSPtr = 1014 IRB.CreateCall(DFS.GetRetvalTLSTy, DFS.GetRetvalTLS, {}); 1015 } 1016 1017 Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) { 1018 IRBuilder<> IRB(Pos); 1019 return IRB.CreateConstGEP2_64(ArrayType::get(DFS.ShadowTy, 64), 1020 getArgTLSPtr(), 0, Idx); 1021 } 1022 1023 Value *DFSanFunction::getShadow(Value *V) { 1024 if (!isa<Argument>(V) && !isa<Instruction>(V)) 1025 return DFS.ZeroShadow; 1026 Value *&Shadow = ValShadowMap[V]; 1027 if (!Shadow) { 1028 if (Argument *A = dyn_cast<Argument>(V)) { 1029 if (IsNativeABI) 1030 return DFS.ZeroShadow; 1031 switch (IA) { 1032 case DataFlowSanitizer::IA_TLS: { 1033 Value *ArgTLSPtr = getArgTLSPtr(); 1034 Instruction *ArgTLSPos = 1035 DFS.ArgTLS ? &*F->getEntryBlock().begin() 1036 : cast<Instruction>(ArgTLSPtr)->getNextNode(); 1037 IRBuilder<> IRB(ArgTLSPos); 1038 Shadow = 1039 IRB.CreateLoad(DFS.ShadowTy, getArgTLS(A->getArgNo(), ArgTLSPos)); 1040 break; 1041 } 1042 case DataFlowSanitizer::IA_Args: { 1043 unsigned ArgIdx = A->getArgNo() + F->arg_size() / 2; 1044 Function::arg_iterator i = F->arg_begin(); 1045 while (ArgIdx--) 1046 ++i; 1047 Shadow = &*i; 1048 assert(Shadow->getType() == DFS.ShadowTy); 1049 break; 1050 } 1051 } 1052 NonZeroChecks.push_back(Shadow); 1053 } else { 1054 Shadow = DFS.ZeroShadow; 1055 } 1056 } 1057 return Shadow; 1058 } 1059 1060 void DFSanFunction::setShadow(Instruction *I, Value *Shadow) { 1061 assert(!ValShadowMap.count(I)); 1062 assert(Shadow->getType() == DFS.ShadowTy); 1063 ValShadowMap[I] = Shadow; 1064 } 1065 1066 Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) { 1067 assert(Addr != RetvalTLS && "Reinstrumenting?"); 1068 IRBuilder<> IRB(Pos); 1069 Value *ShadowPtrMaskValue; 1070 if (DFSanRuntimeShadowMask) 1071 ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask); 1072 else 1073 ShadowPtrMaskValue = ShadowPtrMask; 1074 return IRB.CreateIntToPtr( 1075 IRB.CreateMul( 1076 IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), 1077 IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)), 1078 ShadowPtrMul), 1079 ShadowPtrTy); 1080 } 1081 1082 // Generates IR to compute the union of the two given shadows, inserting it 1083 // before Pos. Returns the computed union Value. 1084 Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { 1085 if (V1 == DFS.ZeroShadow) 1086 return V2; 1087 if (V2 == DFS.ZeroShadow) 1088 return V1; 1089 if (V1 == V2) 1090 return V1; 1091 1092 auto V1Elems = ShadowElements.find(V1); 1093 auto V2Elems = ShadowElements.find(V2); 1094 if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) { 1095 if (std::includes(V1Elems->second.begin(), V1Elems->second.end(), 1096 V2Elems->second.begin(), V2Elems->second.end())) { 1097 return V1; 1098 } else if (std::includes(V2Elems->second.begin(), V2Elems->second.end(), 1099 V1Elems->second.begin(), V1Elems->second.end())) { 1100 return V2; 1101 } 1102 } else if (V1Elems != ShadowElements.end()) { 1103 if (V1Elems->second.count(V2)) 1104 return V1; 1105 } else if (V2Elems != ShadowElements.end()) { 1106 if (V2Elems->second.count(V1)) 1107 return V2; 1108 } 1109 1110 auto Key = std::make_pair(V1, V2); 1111 if (V1 > V2) 1112 std::swap(Key.first, Key.second); 1113 CachedCombinedShadow &CCS = CachedCombinedShadows[Key]; 1114 if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent())) 1115 return CCS.Shadow; 1116 1117 IRBuilder<> IRB(Pos); 1118 if (AvoidNewBlocks) { 1119 CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {V1, V2}); 1120 Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); 1121 Call->addParamAttr(0, Attribute::ZExt); 1122 Call->addParamAttr(1, Attribute::ZExt); 1123 1124 CCS.Block = Pos->getParent(); 1125 CCS.Shadow = Call; 1126 } else { 1127 BasicBlock *Head = Pos->getParent(); 1128 Value *Ne = IRB.CreateICmpNE(V1, V2); 1129 BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen( 1130 Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT)); 1131 IRBuilder<> ThenIRB(BI); 1132 CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {V1, V2}); 1133 Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); 1134 Call->addParamAttr(0, Attribute::ZExt); 1135 Call->addParamAttr(1, Attribute::ZExt); 1136 1137 BasicBlock *Tail = BI->getSuccessor(0); 1138 PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front()); 1139 Phi->addIncoming(Call, Call->getParent()); 1140 Phi->addIncoming(V1, Head); 1141 1142 CCS.Block = Tail; 1143 CCS.Shadow = Phi; 1144 } 1145 1146 std::set<Value *> UnionElems; 1147 if (V1Elems != ShadowElements.end()) { 1148 UnionElems = V1Elems->second; 1149 } else { 1150 UnionElems.insert(V1); 1151 } 1152 if (V2Elems != ShadowElements.end()) { 1153 UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end()); 1154 } else { 1155 UnionElems.insert(V2); 1156 } 1157 ShadowElements[CCS.Shadow] = std::move(UnionElems); 1158 1159 return CCS.Shadow; 1160 } 1161 1162 // A convenience function which folds the shadows of each of the operands 1163 // of the provided instruction Inst, inserting the IR before Inst. Returns 1164 // the computed union Value. 1165 Value *DFSanFunction::combineOperandShadows(Instruction *Inst) { 1166 if (Inst->getNumOperands() == 0) 1167 return DFS.ZeroShadow; 1168 1169 Value *Shadow = getShadow(Inst->getOperand(0)); 1170 for (unsigned i = 1, n = Inst->getNumOperands(); i != n; ++i) { 1171 Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst); 1172 } 1173 return Shadow; 1174 } 1175 1176 void DFSanVisitor::visitOperandShadowInst(Instruction &I) { 1177 Value *CombinedShadow = DFSF.combineOperandShadows(&I); 1178 DFSF.setShadow(&I, CombinedShadow); 1179 } 1180 1181 // Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where 1182 // Addr has alignment Align, and take the union of each of those shadows. 1183 Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, 1184 Instruction *Pos) { 1185 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) { 1186 const auto i = AllocaShadowMap.find(AI); 1187 if (i != AllocaShadowMap.end()) { 1188 IRBuilder<> IRB(Pos); 1189 return IRB.CreateLoad(DFS.ShadowTy, i->second); 1190 } 1191 } 1192 1193 uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8; 1194 SmallVector<const Value *, 2> Objs; 1195 GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout()); 1196 bool AllConstants = true; 1197 for (const Value *Obj : Objs) { 1198 if (isa<Function>(Obj) || isa<BlockAddress>(Obj)) 1199 continue; 1200 if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant()) 1201 continue; 1202 1203 AllConstants = false; 1204 break; 1205 } 1206 if (AllConstants) 1207 return DFS.ZeroShadow; 1208 1209 Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos); 1210 switch (Size) { 1211 case 0: 1212 return DFS.ZeroShadow; 1213 case 1: { 1214 LoadInst *LI = new LoadInst(DFS.ShadowTy, ShadowAddr, "", Pos); 1215 LI->setAlignment(ShadowAlign); 1216 return LI; 1217 } 1218 case 2: { 1219 IRBuilder<> IRB(Pos); 1220 Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr, 1221 ConstantInt::get(DFS.IntptrTy, 1)); 1222 return combineShadows( 1223 IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr, ShadowAlign), 1224 IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr1, ShadowAlign), Pos); 1225 } 1226 } 1227 if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidth) == 0) { 1228 // Fast path for the common case where each byte has identical shadow: load 1229 // shadow 64 bits at a time, fall out to a __dfsan_union_load call if any 1230 // shadow is non-equal. 1231 BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F); 1232 IRBuilder<> FallbackIRB(FallbackBB); 1233 CallInst *FallbackCall = FallbackIRB.CreateCall( 1234 DFS.DFSanUnionLoadFn, 1235 {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)}); 1236 FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); 1237 1238 // Compare each of the shadows stored in the loaded 64 bits to each other, 1239 // by computing (WideShadow rotl ShadowWidth) == WideShadow. 1240 IRBuilder<> IRB(Pos); 1241 Value *WideAddr = 1242 IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx)); 1243 Value *WideShadow = 1244 IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign); 1245 Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy); 1246 Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidth); 1247 Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidth); 1248 Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow); 1249 Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow); 1250 1251 BasicBlock *Head = Pos->getParent(); 1252 BasicBlock *Tail = Head->splitBasicBlock(Pos->getIterator()); 1253 1254 if (DomTreeNode *OldNode = DT.getNode(Head)) { 1255 std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); 1256 1257 DomTreeNode *NewNode = DT.addNewBlock(Tail, Head); 1258 for (auto Child : Children) 1259 DT.changeImmediateDominator(Child, NewNode); 1260 } 1261 1262 // In the following code LastBr will refer to the previous basic block's 1263 // conditional branch instruction, whose true successor is fixed up to point 1264 // to the next block during the loop below or to the tail after the final 1265 // iteration. 1266 BranchInst *LastBr = BranchInst::Create(FallbackBB, FallbackBB, ShadowsEq); 1267 ReplaceInstWithInst(Head->getTerminator(), LastBr); 1268 DT.addNewBlock(FallbackBB, Head); 1269 1270 for (uint64_t Ofs = 64 / DFS.ShadowWidth; Ofs != Size; 1271 Ofs += 64 / DFS.ShadowWidth) { 1272 BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F); 1273 DT.addNewBlock(NextBB, LastBr->getParent()); 1274 IRBuilder<> NextIRB(NextBB); 1275 WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr, 1276 ConstantInt::get(DFS.IntptrTy, 1)); 1277 Value *NextWideShadow = NextIRB.CreateAlignedLoad(NextIRB.getInt64Ty(), 1278 WideAddr, ShadowAlign); 1279 ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow); 1280 LastBr->setSuccessor(0, NextBB); 1281 LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB); 1282 } 1283 1284 LastBr->setSuccessor(0, Tail); 1285 FallbackIRB.CreateBr(Tail); 1286 PHINode *Shadow = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front()); 1287 Shadow->addIncoming(FallbackCall, FallbackBB); 1288 Shadow->addIncoming(TruncShadow, LastBr->getParent()); 1289 return Shadow; 1290 } 1291 1292 IRBuilder<> IRB(Pos); 1293 CallInst *FallbackCall = IRB.CreateCall( 1294 DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)}); 1295 FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); 1296 return FallbackCall; 1297 } 1298 1299 void DFSanVisitor::visitLoadInst(LoadInst &LI) { 1300 auto &DL = LI.getModule()->getDataLayout(); 1301 uint64_t Size = DL.getTypeStoreSize(LI.getType()); 1302 if (Size == 0) { 1303 DFSF.setShadow(&LI, DFSF.DFS.ZeroShadow); 1304 return; 1305 } 1306 1307 uint64_t Align; 1308 if (ClPreserveAlignment) { 1309 Align = LI.getAlignment(); 1310 if (Align == 0) 1311 Align = DL.getABITypeAlignment(LI.getType()); 1312 } else { 1313 Align = 1; 1314 } 1315 IRBuilder<> IRB(&LI); 1316 Value *Shadow = DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI); 1317 if (ClCombinePointerLabelsOnLoad) { 1318 Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand()); 1319 Shadow = DFSF.combineShadows(Shadow, PtrShadow, &LI); 1320 } 1321 if (Shadow != DFSF.DFS.ZeroShadow) 1322 DFSF.NonZeroChecks.push_back(Shadow); 1323 1324 DFSF.setShadow(&LI, Shadow); 1325 } 1326 1327 void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, 1328 Value *Shadow, Instruction *Pos) { 1329 if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) { 1330 const auto i = AllocaShadowMap.find(AI); 1331 if (i != AllocaShadowMap.end()) { 1332 IRBuilder<> IRB(Pos); 1333 IRB.CreateStore(Shadow, i->second); 1334 return; 1335 } 1336 } 1337 1338 uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8; 1339 IRBuilder<> IRB(Pos); 1340 Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos); 1341 if (Shadow == DFS.ZeroShadow) { 1342 IntegerType *ShadowTy = IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidth); 1343 Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0); 1344 Value *ExtShadowAddr = 1345 IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy)); 1346 IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign); 1347 return; 1348 } 1349 1350 const unsigned ShadowVecSize = 128 / DFS.ShadowWidth; 1351 uint64_t Offset = 0; 1352 if (Size >= ShadowVecSize) { 1353 VectorType *ShadowVecTy = VectorType::get(DFS.ShadowTy, ShadowVecSize); 1354 Value *ShadowVec = UndefValue::get(ShadowVecTy); 1355 for (unsigned i = 0; i != ShadowVecSize; ++i) { 1356 ShadowVec = IRB.CreateInsertElement( 1357 ShadowVec, Shadow, ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), i)); 1358 } 1359 Value *ShadowVecAddr = 1360 IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy)); 1361 do { 1362 Value *CurShadowVecAddr = 1363 IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset); 1364 IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign); 1365 Size -= ShadowVecSize; 1366 ++Offset; 1367 } while (Size >= ShadowVecSize); 1368 Offset *= ShadowVecSize; 1369 } 1370 while (Size > 0) { 1371 Value *CurShadowAddr = 1372 IRB.CreateConstGEP1_32(DFS.ShadowTy, ShadowAddr, Offset); 1373 IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign); 1374 --Size; 1375 ++Offset; 1376 } 1377 } 1378 1379 void DFSanVisitor::visitStoreInst(StoreInst &SI) { 1380 auto &DL = SI.getModule()->getDataLayout(); 1381 uint64_t Size = DL.getTypeStoreSize(SI.getValueOperand()->getType()); 1382 if (Size == 0) 1383 return; 1384 1385 uint64_t Align; 1386 if (ClPreserveAlignment) { 1387 Align = SI.getAlignment(); 1388 if (Align == 0) 1389 Align = DL.getABITypeAlignment(SI.getValueOperand()->getType()); 1390 } else { 1391 Align = 1; 1392 } 1393 1394 Value* Shadow = DFSF.getShadow(SI.getValueOperand()); 1395 if (ClCombinePointerLabelsOnStore) { 1396 Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand()); 1397 Shadow = DFSF.combineShadows(Shadow, PtrShadow, &SI); 1398 } 1399 DFSF.storeShadow(SI.getPointerOperand(), Size, Align, Shadow, &SI); 1400 } 1401 1402 void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) { 1403 visitOperandShadowInst(UO); 1404 } 1405 1406 void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) { 1407 visitOperandShadowInst(BO); 1408 } 1409 1410 void DFSanVisitor::visitCastInst(CastInst &CI) { visitOperandShadowInst(CI); } 1411 1412 void DFSanVisitor::visitCmpInst(CmpInst &CI) { visitOperandShadowInst(CI); } 1413 1414 void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { 1415 visitOperandShadowInst(GEPI); 1416 } 1417 1418 void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) { 1419 visitOperandShadowInst(I); 1420 } 1421 1422 void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) { 1423 visitOperandShadowInst(I); 1424 } 1425 1426 void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) { 1427 visitOperandShadowInst(I); 1428 } 1429 1430 void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) { 1431 visitOperandShadowInst(I); 1432 } 1433 1434 void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) { 1435 visitOperandShadowInst(I); 1436 } 1437 1438 void DFSanVisitor::visitAllocaInst(AllocaInst &I) { 1439 bool AllLoadsStores = true; 1440 for (User *U : I.users()) { 1441 if (isa<LoadInst>(U)) 1442 continue; 1443 1444 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 1445 if (SI->getPointerOperand() == &I) 1446 continue; 1447 } 1448 1449 AllLoadsStores = false; 1450 break; 1451 } 1452 if (AllLoadsStores) { 1453 IRBuilder<> IRB(&I); 1454 DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.ShadowTy); 1455 } 1456 DFSF.setShadow(&I, DFSF.DFS.ZeroShadow); 1457 } 1458 1459 void DFSanVisitor::visitSelectInst(SelectInst &I) { 1460 Value *CondShadow = DFSF.getShadow(I.getCondition()); 1461 Value *TrueShadow = DFSF.getShadow(I.getTrueValue()); 1462 Value *FalseShadow = DFSF.getShadow(I.getFalseValue()); 1463 1464 if (isa<VectorType>(I.getCondition()->getType())) { 1465 DFSF.setShadow( 1466 &I, 1467 DFSF.combineShadows( 1468 CondShadow, DFSF.combineShadows(TrueShadow, FalseShadow, &I), &I)); 1469 } else { 1470 Value *ShadowSel; 1471 if (TrueShadow == FalseShadow) { 1472 ShadowSel = TrueShadow; 1473 } else { 1474 ShadowSel = 1475 SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I); 1476 } 1477 DFSF.setShadow(&I, DFSF.combineShadows(CondShadow, ShadowSel, &I)); 1478 } 1479 } 1480 1481 void DFSanVisitor::visitMemSetInst(MemSetInst &I) { 1482 IRBuilder<> IRB(&I); 1483 Value *ValShadow = DFSF.getShadow(I.getValue()); 1484 IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn, 1485 {ValShadow, IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy( 1486 *DFSF.DFS.Ctx)), 1487 IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)}); 1488 } 1489 1490 void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) { 1491 IRBuilder<> IRB(&I); 1492 Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I); 1493 Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I); 1494 Value *LenShadow = IRB.CreateMul( 1495 I.getLength(), 1496 ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8)); 1497 Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx); 1498 DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr); 1499 SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr); 1500 auto *MTI = cast<MemTransferInst>( 1501 IRB.CreateCall(I.getFunctionType(), I.getCalledValue(), 1502 {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()})); 1503 if (ClPreserveAlignment) { 1504 MTI->setDestAlignment(I.getDestAlignment() * (DFSF.DFS.ShadowWidth / 8)); 1505 MTI->setSourceAlignment(I.getSourceAlignment() * (DFSF.DFS.ShadowWidth / 8)); 1506 } else { 1507 MTI->setDestAlignment(DFSF.DFS.ShadowWidth / 8); 1508 MTI->setSourceAlignment(DFSF.DFS.ShadowWidth / 8); 1509 } 1510 } 1511 1512 void DFSanVisitor::visitReturnInst(ReturnInst &RI) { 1513 if (!DFSF.IsNativeABI && RI.getReturnValue()) { 1514 switch (DFSF.IA) { 1515 case DataFlowSanitizer::IA_TLS: { 1516 Value *S = DFSF.getShadow(RI.getReturnValue()); 1517 IRBuilder<> IRB(&RI); 1518 IRB.CreateStore(S, DFSF.getRetvalTLS()); 1519 break; 1520 } 1521 case DataFlowSanitizer::IA_Args: { 1522 IRBuilder<> IRB(&RI); 1523 Type *RT = DFSF.F->getFunctionType()->getReturnType(); 1524 Value *InsVal = 1525 IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0); 1526 Value *InsShadow = 1527 IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1); 1528 RI.setOperand(0, InsShadow); 1529 break; 1530 } 1531 } 1532 } 1533 } 1534 1535 void DFSanVisitor::visitCallSite(CallSite CS) { 1536 Function *F = CS.getCalledFunction(); 1537 if ((F && F->isIntrinsic()) || isa<InlineAsm>(CS.getCalledValue())) { 1538 visitOperandShadowInst(*CS.getInstruction()); 1539 return; 1540 } 1541 1542 // Calls to this function are synthesized in wrappers, and we shouldn't 1543 // instrument them. 1544 if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts()) 1545 return; 1546 1547 IRBuilder<> IRB(CS.getInstruction()); 1548 1549 DenseMap<Value *, Function *>::iterator i = 1550 DFSF.DFS.UnwrappedFnMap.find(CS.getCalledValue()); 1551 if (i != DFSF.DFS.UnwrappedFnMap.end()) { 1552 Function *F = i->second; 1553 switch (DFSF.DFS.getWrapperKind(F)) { 1554 case DataFlowSanitizer::WK_Warning: 1555 CS.setCalledFunction(F); 1556 IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn, 1557 IRB.CreateGlobalStringPtr(F->getName())); 1558 DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow); 1559 return; 1560 case DataFlowSanitizer::WK_Discard: 1561 CS.setCalledFunction(F); 1562 DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow); 1563 return; 1564 case DataFlowSanitizer::WK_Functional: 1565 CS.setCalledFunction(F); 1566 visitOperandShadowInst(*CS.getInstruction()); 1567 return; 1568 case DataFlowSanitizer::WK_Custom: 1569 // Don't try to handle invokes of custom functions, it's too complicated. 1570 // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_ 1571 // wrapper. 1572 if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { 1573 FunctionType *FT = F->getFunctionType(); 1574 TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT); 1575 std::string CustomFName = "__dfsw_"; 1576 CustomFName += F->getName(); 1577 FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction( 1578 CustomFName, CustomFn.TransformedType); 1579 if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) { 1580 CustomFn->copyAttributesFrom(F); 1581 1582 // Custom functions returning non-void will write to the return label. 1583 if (!FT->getReturnType()->isVoidTy()) { 1584 CustomFn->removeAttributes(AttributeList::FunctionIndex, 1585 DFSF.DFS.ReadOnlyNoneAttrs); 1586 } 1587 } 1588 1589 std::vector<Value *> Args; 1590 1591 CallSite::arg_iterator i = CS.arg_begin(); 1592 for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) { 1593 Type *T = (*i)->getType(); 1594 FunctionType *ParamFT; 1595 if (isa<PointerType>(T) && 1596 (ParamFT = dyn_cast<FunctionType>( 1597 cast<PointerType>(T)->getElementType()))) { 1598 std::string TName = "dfst"; 1599 TName += utostr(FT->getNumParams() - n); 1600 TName += "$"; 1601 TName += F->getName(); 1602 Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName); 1603 Args.push_back(T); 1604 Args.push_back( 1605 IRB.CreateBitCast(*i, Type::getInt8PtrTy(*DFSF.DFS.Ctx))); 1606 } else { 1607 Args.push_back(*i); 1608 } 1609 } 1610 1611 i = CS.arg_begin(); 1612 const unsigned ShadowArgStart = Args.size(); 1613 for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) 1614 Args.push_back(DFSF.getShadow(*i)); 1615 1616 if (FT->isVarArg()) { 1617 auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy, 1618 CS.arg_size() - FT->getNumParams()); 1619 auto *LabelVAAlloca = new AllocaInst( 1620 LabelVATy, getDataLayout().getAllocaAddrSpace(), 1621 "labelva", &DFSF.F->getEntryBlock().front()); 1622 1623 for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) { 1624 auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n); 1625 IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr); 1626 } 1627 1628 Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0)); 1629 } 1630 1631 if (!FT->getReturnType()->isVoidTy()) { 1632 if (!DFSF.LabelReturnAlloca) { 1633 DFSF.LabelReturnAlloca = 1634 new AllocaInst(DFSF.DFS.ShadowTy, 1635 getDataLayout().getAllocaAddrSpace(), 1636 "labelreturn", &DFSF.F->getEntryBlock().front()); 1637 } 1638 Args.push_back(DFSF.LabelReturnAlloca); 1639 } 1640 1641 for (i = CS.arg_begin() + FT->getNumParams(); i != CS.arg_end(); ++i) 1642 Args.push_back(*i); 1643 1644 CallInst *CustomCI = IRB.CreateCall(CustomF, Args); 1645 CustomCI->setCallingConv(CI->getCallingConv()); 1646 CustomCI->setAttributes(TransformFunctionAttributes(CustomFn, 1647 CI->getContext(), CI->getAttributes())); 1648 1649 // Update the parameter attributes of the custom call instruction to 1650 // zero extend the shadow parameters. This is required for targets 1651 // which consider ShadowTy an illegal type. 1652 for (unsigned n = 0; n < FT->getNumParams(); n++) { 1653 const unsigned ArgNo = ShadowArgStart + n; 1654 if (CustomCI->getArgOperand(ArgNo)->getType() == DFSF.DFS.ShadowTy) 1655 CustomCI->addParamAttr(ArgNo, Attribute::ZExt); 1656 } 1657 1658 if (!FT->getReturnType()->isVoidTy()) { 1659 LoadInst *LabelLoad = 1660 IRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.LabelReturnAlloca); 1661 DFSF.setShadow(CustomCI, LabelLoad); 1662 } 1663 1664 CI->replaceAllUsesWith(CustomCI); 1665 CI->eraseFromParent(); 1666 return; 1667 } 1668 break; 1669 } 1670 } 1671 1672 FunctionType *FT = cast<FunctionType>( 1673 CS.getCalledValue()->getType()->getPointerElementType()); 1674 if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { 1675 for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) { 1676 IRB.CreateStore(DFSF.getShadow(CS.getArgument(i)), 1677 DFSF.getArgTLS(i, CS.getInstruction())); 1678 } 1679 } 1680 1681 Instruction *Next = nullptr; 1682 if (!CS.getType()->isVoidTy()) { 1683 if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { 1684 if (II->getNormalDest()->getSinglePredecessor()) { 1685 Next = &II->getNormalDest()->front(); 1686 } else { 1687 BasicBlock *NewBB = 1688 SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT); 1689 Next = &NewBB->front(); 1690 } 1691 } else { 1692 assert(CS->getIterator() != CS->getParent()->end()); 1693 Next = CS->getNextNode(); 1694 } 1695 1696 if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { 1697 IRBuilder<> NextIRB(Next); 1698 LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.getRetvalTLS()); 1699 DFSF.SkipInsts.insert(LI); 1700 DFSF.setShadow(CS.getInstruction(), LI); 1701 DFSF.NonZeroChecks.push_back(LI); 1702 } 1703 } 1704 1705 // Do all instrumentation for IA_Args down here to defer tampering with the 1706 // CFG in a way that SplitEdge may be able to detect. 1707 if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) { 1708 FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT); 1709 Value *Func = 1710 IRB.CreateBitCast(CS.getCalledValue(), PointerType::getUnqual(NewFT)); 1711 std::vector<Value *> Args; 1712 1713 CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1714 for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) 1715 Args.push_back(*i); 1716 1717 i = CS.arg_begin(); 1718 for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) 1719 Args.push_back(DFSF.getShadow(*i)); 1720 1721 if (FT->isVarArg()) { 1722 unsigned VarArgSize = CS.arg_size() - FT->getNumParams(); 1723 ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize); 1724 AllocaInst *VarArgShadow = 1725 new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(), 1726 "", &DFSF.F->getEntryBlock().front()); 1727 Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0)); 1728 for (unsigned n = 0; i != e; ++i, ++n) { 1729 IRB.CreateStore( 1730 DFSF.getShadow(*i), 1731 IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n)); 1732 Args.push_back(*i); 1733 } 1734 } 1735 1736 CallSite NewCS; 1737 if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { 1738 NewCS = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(), 1739 II->getUnwindDest(), Args); 1740 } else { 1741 NewCS = IRB.CreateCall(NewFT, Func, Args); 1742 } 1743 NewCS.setCallingConv(CS.getCallingConv()); 1744 NewCS.setAttributes(CS.getAttributes().removeAttributes( 1745 *DFSF.DFS.Ctx, AttributeList::ReturnIndex, 1746 AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType()))); 1747 1748 if (Next) { 1749 ExtractValueInst *ExVal = 1750 ExtractValueInst::Create(NewCS.getInstruction(), 0, "", Next); 1751 DFSF.SkipInsts.insert(ExVal); 1752 ExtractValueInst *ExShadow = 1753 ExtractValueInst::Create(NewCS.getInstruction(), 1, "", Next); 1754 DFSF.SkipInsts.insert(ExShadow); 1755 DFSF.setShadow(ExVal, ExShadow); 1756 DFSF.NonZeroChecks.push_back(ExShadow); 1757 1758 CS.getInstruction()->replaceAllUsesWith(ExVal); 1759 } 1760 1761 CS.getInstruction()->eraseFromParent(); 1762 } 1763 } 1764 1765 void DFSanVisitor::visitPHINode(PHINode &PN) { 1766 PHINode *ShadowPN = 1767 PHINode::Create(DFSF.DFS.ShadowTy, PN.getNumIncomingValues(), "", &PN); 1768 1769 // Give the shadow phi node valid predecessors to fool SplitEdge into working. 1770 Value *UndefShadow = UndefValue::get(DFSF.DFS.ShadowTy); 1771 for (PHINode::block_iterator i = PN.block_begin(), e = PN.block_end(); i != e; 1772 ++i) { 1773 ShadowPN->addIncoming(UndefShadow, *i); 1774 } 1775 1776 DFSF.PHIFixups.push_back(std::make_pair(&PN, ShadowPN)); 1777 DFSF.setShadow(&PN, ShadowPN); 1778 } 1779