xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/TypeSanitizer.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===----- TypeSanitizer.cpp - type-based-aliasing-violation detector -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of TypeSanitizer, a type-based-aliasing-violation
10 // detector.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/TypeSanitizer.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/Analysis/MemoryLocation.h"
20 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/InstIterator.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/ProfileData/InstrProf.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/MD5.h"
36 #include "llvm/Support/Regex.h"
37 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
38 #include "llvm/Transforms/Utils/Local.h"
39 #include "llvm/Transforms/Utils/ModuleUtils.h"
40 
41 #include <cctype>
42 
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "tysan"
46 
47 static const char *const kTysanModuleCtorName = "tysan.module_ctor";
48 static const char *const kTysanInitName = "__tysan_init";
49 static const char *const kTysanCheckName = "__tysan_check";
50 static const char *const kTysanGVNamePrefix = "__tysan_v1_";
51 
52 static const char *const kTysanShadowMemoryAddress =
53     "__tysan_shadow_memory_address";
54 static const char *const kTysanAppMemMask = "__tysan_app_memory_mask";
55 
56 static cl::opt<bool>
57     ClWritesAlwaysSetType("tysan-writes-always-set-type",
58                           cl::desc("Writes always set the type"), cl::Hidden,
59                           cl::init(false));
60 
61 STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses");
62 
63 namespace {
64 
65 /// TypeSanitizer: instrument the code in module to find type-based aliasing
66 /// violations.
67 struct TypeSanitizer {
68   TypeSanitizer(Module &M);
69   bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
70   void instrumentGlobals(Module &M);
71 
72 private:
73   typedef SmallDenseMap<const MDNode *, GlobalVariable *, 8>
74       TypeDescriptorsMapTy;
75   typedef SmallDenseMap<const MDNode *, std::string, 8> TypeNameMapTy;
76 
77   void initializeCallbacks(Module &M);
78 
79   Instruction *getShadowBase(Function &F);
80   Instruction *getAppMemMask(Function &F);
81 
82   bool instrumentWithShadowUpdate(IRBuilder<> &IRB, const MDNode *TBAAMD,
83                                   Value *Ptr, uint64_t AccessSize, bool IsRead,
84                                   bool IsWrite, Value *ShadowBase,
85                                   Value *AppMemMask, bool ForceSetType,
86                                   bool SanitizeFunction,
87                                   TypeDescriptorsMapTy &TypeDescriptors,
88                                   const DataLayout &DL);
89 
90   /// Memory-related intrinsics/instructions reset the type of the destination
91   /// memory (including allocas and byval arguments).
92   bool instrumentMemInst(Value *I, Instruction *ShadowBase,
93                          Instruction *AppMemMask, const DataLayout &DL);
94 
95   std::string getAnonymousStructIdentifier(const MDNode *MD,
96                                            TypeNameMapTy &TypeNames);
97   bool generateTypeDescriptor(const MDNode *MD,
98                               TypeDescriptorsMapTy &TypeDescriptors,
99                               TypeNameMapTy &TypeNames, Module &M);
100   bool generateBaseTypeDescriptor(const MDNode *MD,
101                                   TypeDescriptorsMapTy &TypeDescriptors,
102                                   TypeNameMapTy &TypeNames, Module &M);
103 
104   const Triple TargetTriple;
105   Regex AnonNameRegex;
106   Type *IntptrTy;
107   uint64_t PtrShift;
108   IntegerType *OrdTy;
109 
110   /// Callbacks to run-time library are computed in initializeCallbacks.
111   FunctionCallee TysanCheck;
112   FunctionCallee TysanCtorFunction;
113 
114   /// Callback to set types for gloabls.
115   Function *TysanGlobalsSetTypeFunction;
116 };
117 } // namespace
118 
TypeSanitizer(Module & M)119 TypeSanitizer::TypeSanitizer(Module &M)
120     : TargetTriple(M.getTargetTriple()),
121       AnonNameRegex("^_ZTS.*N[1-9][0-9]*_GLOBAL__N") {
122   const DataLayout &DL = M.getDataLayout();
123   IntptrTy = DL.getIntPtrType(M.getContext());
124   PtrShift = countr_zero(IntptrTy->getPrimitiveSizeInBits() / 8);
125 
126   TysanGlobalsSetTypeFunction = M.getFunction("__tysan_set_globals_types");
127   initializeCallbacks(M);
128 }
129 
initializeCallbacks(Module & M)130 void TypeSanitizer::initializeCallbacks(Module &M) {
131   IRBuilder<> IRB(M.getContext());
132   OrdTy = IRB.getInt32Ty();
133 
134   AttributeList Attr;
135   Attr = Attr.addFnAttribute(M.getContext(), Attribute::NoUnwind);
136   // Initialize the callbacks.
137   TysanCheck =
138       M.getOrInsertFunction(kTysanCheckName, Attr, IRB.getVoidTy(),
139                             IRB.getPtrTy(), // Pointer to data to be read.
140                             OrdTy,          // Size of the data in bytes.
141                             IRB.getPtrTy(), // Pointer to type descriptor.
142                             OrdTy           // Flags.
143       );
144 
145   TysanCtorFunction =
146       M.getOrInsertFunction(kTysanModuleCtorName, Attr, IRB.getVoidTy());
147 }
148 
instrumentGlobals(Module & M)149 void TypeSanitizer::instrumentGlobals(Module &M) {
150   TysanGlobalsSetTypeFunction = nullptr;
151 
152   NamedMDNode *Globals = M.getNamedMetadata("llvm.tysan.globals");
153   if (!Globals)
154     return;
155 
156   TysanGlobalsSetTypeFunction = Function::Create(
157       FunctionType::get(Type::getVoidTy(M.getContext()), false),
158       GlobalValue::InternalLinkage, "__tysan_set_globals_types", &M);
159   BasicBlock *BB =
160       BasicBlock::Create(M.getContext(), "", TysanGlobalsSetTypeFunction);
161   ReturnInst::Create(M.getContext(), BB);
162 
163   const DataLayout &DL = M.getDataLayout();
164   Value *ShadowBase = getShadowBase(*TysanGlobalsSetTypeFunction);
165   Value *AppMemMask = getAppMemMask(*TysanGlobalsSetTypeFunction);
166   TypeDescriptorsMapTy TypeDescriptors;
167   TypeNameMapTy TypeNames;
168 
169   for (const auto &GMD : Globals->operands()) {
170     auto *GV = mdconst::dyn_extract_or_null<GlobalVariable>(GMD->getOperand(0));
171     if (!GV)
172       continue;
173     const MDNode *TBAAMD = cast<MDNode>(GMD->getOperand(1));
174     if (!generateBaseTypeDescriptor(TBAAMD, TypeDescriptors, TypeNames, M))
175       continue;
176 
177     IRBuilder<> IRB(
178         TysanGlobalsSetTypeFunction->getEntryBlock().getTerminator());
179     Type *AccessTy = GV->getValueType();
180     assert(AccessTy->isSized());
181     uint64_t AccessSize = DL.getTypeStoreSize(AccessTy);
182     instrumentWithShadowUpdate(IRB, TBAAMD, GV, AccessSize, false, false,
183                                ShadowBase, AppMemMask, true, false,
184                                TypeDescriptors, DL);
185   }
186 
187   if (TysanGlobalsSetTypeFunction) {
188     IRBuilder<> IRB(cast<Function>(TysanCtorFunction.getCallee())
189                         ->getEntryBlock()
190                         .getTerminator());
191     IRB.CreateCall(TysanGlobalsSetTypeFunction, {});
192   }
193 }
194 
195 static const char LUT[] = "0123456789abcdef";
196 
encodeName(StringRef Name)197 static std::string encodeName(StringRef Name) {
198   size_t Length = Name.size();
199   std::string Output = kTysanGVNamePrefix;
200   Output.reserve(Output.size() + 3 * Length);
201   for (size_t i = 0; i < Length; ++i) {
202     const unsigned char c = Name[i];
203     if (isalnum(c)) {
204       Output.push_back(c);
205       continue;
206     }
207 
208     if (c == '_') {
209       Output.append("__");
210       continue;
211     }
212 
213     Output.push_back('_');
214     Output.push_back(LUT[c >> 4]);
215     Output.push_back(LUT[c & 15]);
216   }
217 
218   return Output;
219 }
220 
221 std::string
getAnonymousStructIdentifier(const MDNode * MD,TypeNameMapTy & TypeNames)222 TypeSanitizer::getAnonymousStructIdentifier(const MDNode *MD,
223                                             TypeNameMapTy &TypeNames) {
224   MD5 Hash;
225 
226   for (int i = 1, e = MD->getNumOperands(); i < e; i += 2) {
227     const MDNode *MemberNode = dyn_cast<MDNode>(MD->getOperand(i));
228     if (!MemberNode)
229       return "";
230 
231     auto TNI = TypeNames.find(MemberNode);
232     std::string MemberName;
233     if (TNI != TypeNames.end()) {
234       MemberName = TNI->second;
235     } else {
236       if (MemberNode->getNumOperands() < 1)
237         return "";
238       MDString *MemberNameNode = dyn_cast<MDString>(MemberNode->getOperand(0));
239       if (!MemberNameNode)
240         return "";
241       MemberName = MemberNameNode->getString().str();
242       if (MemberName.empty())
243         MemberName = getAnonymousStructIdentifier(MemberNode, TypeNames);
244       if (MemberName.empty())
245         return "";
246       TypeNames[MemberNode] = MemberName;
247     }
248 
249     Hash.update(MemberName);
250     Hash.update("\0");
251 
252     uint64_t Offset =
253         mdconst::extract<ConstantInt>(MD->getOperand(i + 1))->getZExtValue();
254     Hash.update(utostr(Offset));
255     Hash.update("\0");
256   }
257 
258   MD5::MD5Result HashResult;
259   Hash.final(HashResult);
260   return "__anonymous_" + std::string(HashResult.digest().str());
261 }
262 
generateBaseTypeDescriptor(const MDNode * MD,TypeDescriptorsMapTy & TypeDescriptors,TypeNameMapTy & TypeNames,Module & M)263 bool TypeSanitizer::generateBaseTypeDescriptor(
264     const MDNode *MD, TypeDescriptorsMapTy &TypeDescriptors,
265     TypeNameMapTy &TypeNames, Module &M) {
266   if (MD->getNumOperands() < 1)
267     return false;
268 
269   MDString *NameNode = dyn_cast<MDString>(MD->getOperand(0));
270   if (!NameNode)
271     return false;
272 
273   std::string Name = NameNode->getString().str();
274   if (Name.empty())
275     Name = getAnonymousStructIdentifier(MD, TypeNames);
276   if (Name.empty())
277     return false;
278   TypeNames[MD] = Name;
279   std::string EncodedName = encodeName(Name);
280 
281   GlobalVariable *GV =
282       dyn_cast_or_null<GlobalVariable>(M.getNamedValue(EncodedName));
283   if (GV) {
284     TypeDescriptors[MD] = GV;
285     return true;
286   }
287 
288   SmallVector<std::pair<Constant *, uint64_t>> Members;
289   for (int i = 1, e = MD->getNumOperands(); i < e; i += 2) {
290     const MDNode *MemberNode = dyn_cast<MDNode>(MD->getOperand(i));
291     if (!MemberNode)
292       return false;
293 
294     Constant *Member;
295     auto TDI = TypeDescriptors.find(MemberNode);
296     if (TDI != TypeDescriptors.end()) {
297       Member = TDI->second;
298     } else {
299       if (!generateBaseTypeDescriptor(MemberNode, TypeDescriptors, TypeNames,
300                                       M))
301         return false;
302 
303       Member = TypeDescriptors[MemberNode];
304     }
305 
306     uint64_t Offset =
307         mdconst::extract<ConstantInt>(MD->getOperand(i + 1))->getZExtValue();
308 
309     Members.push_back(std::make_pair(Member, Offset));
310   }
311 
312   // The descriptor for a scalar is:
313   //   [2, member count, [type pointer, offset]..., name]
314 
315   LLVMContext &C = MD->getContext();
316   Constant *NameData = ConstantDataArray::getString(C, NameNode->getString());
317   SmallVector<Type *> TDSubTys;
318   SmallVector<Constant *> TDSubData;
319 
320   auto PushTDSub = [&](Constant *C) {
321     TDSubTys.push_back(C->getType());
322     TDSubData.push_back(C);
323   };
324 
325   PushTDSub(ConstantInt::get(IntptrTy, 2));
326   PushTDSub(ConstantInt::get(IntptrTy, Members.size()));
327 
328   // Types that are in an anonymous namespace are local to this module.
329   // FIXME: This should really be marked by the frontend in the metadata
330   // instead of having us guess this from the mangled name. Moreover, the regex
331   // here can pick up (unlikely) names in the non-reserved namespace (because
332   // it needs to search into the type to pick up cases where the type in the
333   // anonymous namespace is a template parameter, etc.).
334   bool ShouldBeComdat = !AnonNameRegex.match(NameNode->getString());
335   for (auto &Member : Members) {
336     PushTDSub(Member.first);
337     PushTDSub(ConstantInt::get(IntptrTy, Member.second));
338   }
339 
340   PushTDSub(NameData);
341 
342   StructType *TDTy = StructType::get(C, TDSubTys);
343   Constant *TD = ConstantStruct::get(TDTy, TDSubData);
344 
345   GlobalVariable *TDGV =
346       new GlobalVariable(TDTy, true,
347                          !ShouldBeComdat ? GlobalValue::InternalLinkage
348                                          : GlobalValue::LinkOnceODRLinkage,
349                          TD, EncodedName);
350   M.insertGlobalVariable(TDGV);
351 
352   if (ShouldBeComdat) {
353     if (TargetTriple.isOSBinFormatELF()) {
354       Comdat *TDComdat = M.getOrInsertComdat(EncodedName);
355       TDGV->setComdat(TDComdat);
356     }
357     appendToUsed(M, TDGV);
358   }
359 
360   TypeDescriptors[MD] = TDGV;
361   return true;
362 }
363 
generateTypeDescriptor(const MDNode * MD,TypeDescriptorsMapTy & TypeDescriptors,TypeNameMapTy & TypeNames,Module & M)364 bool TypeSanitizer::generateTypeDescriptor(
365     const MDNode *MD, TypeDescriptorsMapTy &TypeDescriptors,
366     TypeNameMapTy &TypeNames, Module &M) {
367   // Here we need to generate a type descriptor corresponding to this TBAA
368   // metadata node. Under the current scheme there are three kinds of TBAA
369   // metadata nodes: scalar nodes, struct nodes, and struct tag nodes.
370 
371   if (MD->getNumOperands() < 3)
372     return false;
373 
374   const MDNode *BaseNode = dyn_cast<MDNode>(MD->getOperand(0));
375   if (!BaseNode)
376     return false;
377 
378   // This is a struct tag (element-access) node.
379 
380   const MDNode *AccessNode = dyn_cast<MDNode>(MD->getOperand(1));
381   if (!AccessNode)
382     return false;
383 
384   Constant *Base;
385   auto TDI = TypeDescriptors.find(BaseNode);
386   if (TDI != TypeDescriptors.end()) {
387     Base = TDI->second;
388   } else {
389     if (!generateBaseTypeDescriptor(BaseNode, TypeDescriptors, TypeNames, M))
390       return false;
391 
392     Base = TypeDescriptors[BaseNode];
393   }
394 
395   Constant *Access;
396   TDI = TypeDescriptors.find(AccessNode);
397   if (TDI != TypeDescriptors.end()) {
398     Access = TDI->second;
399   } else {
400     if (!generateBaseTypeDescriptor(AccessNode, TypeDescriptors, TypeNames, M))
401       return false;
402 
403     Access = TypeDescriptors[AccessNode];
404   }
405 
406   uint64_t Offset =
407       mdconst::extract<ConstantInt>(MD->getOperand(2))->getZExtValue();
408   std::string EncodedName =
409       std::string(Base->getName()) + "_o_" + utostr(Offset);
410 
411   GlobalVariable *GV =
412       dyn_cast_or_null<GlobalVariable>(M.getNamedValue(EncodedName));
413   if (GV) {
414     TypeDescriptors[MD] = GV;
415     return true;
416   }
417 
418   // The descriptor for a scalar is:
419   //   [1, base-type pointer, access-type pointer, offset]
420 
421   StructType *TDTy =
422       StructType::get(IntptrTy, Base->getType(), Access->getType(), IntptrTy);
423   Constant *TD =
424       ConstantStruct::get(TDTy, ConstantInt::get(IntptrTy, 1), Base, Access,
425                           ConstantInt::get(IntptrTy, Offset));
426 
427   bool ShouldBeComdat = cast<GlobalVariable>(Base)->getLinkage() ==
428                         GlobalValue::LinkOnceODRLinkage;
429 
430   GlobalVariable *TDGV =
431       new GlobalVariable(TDTy, true,
432                          !ShouldBeComdat ? GlobalValue::InternalLinkage
433                                          : GlobalValue::LinkOnceODRLinkage,
434                          TD, EncodedName);
435   M.insertGlobalVariable(TDGV);
436 
437   if (ShouldBeComdat) {
438     if (TargetTriple.isOSBinFormatELF()) {
439       Comdat *TDComdat = M.getOrInsertComdat(EncodedName);
440       TDGV->setComdat(TDComdat);
441     }
442     appendToUsed(M, TDGV);
443   }
444 
445   TypeDescriptors[MD] = TDGV;
446   return true;
447 }
448 
getShadowBase(Function & F)449 Instruction *TypeSanitizer::getShadowBase(Function &F) {
450   IRBuilder<> IRB(&F.front().front());
451   Constant *GlobalShadowAddress =
452       F.getParent()->getOrInsertGlobal(kTysanShadowMemoryAddress, IntptrTy);
453   return IRB.CreateLoad(IntptrTy, GlobalShadowAddress, "shadow.base");
454 }
455 
getAppMemMask(Function & F)456 Instruction *TypeSanitizer::getAppMemMask(Function &F) {
457   IRBuilder<> IRB(&F.front().front());
458   Value *GlobalAppMemMask =
459       F.getParent()->getOrInsertGlobal(kTysanAppMemMask, IntptrTy);
460   return IRB.CreateLoad(IntptrTy, GlobalAppMemMask, "app.mem.mask");
461 }
462 
463 /// Collect all loads and stores, and for what TBAA nodes we need to generate
464 /// type descriptors.
collectMemAccessInfo(Function & F,const TargetLibraryInfo & TLI,SmallVectorImpl<std::pair<Instruction *,MemoryLocation>> & MemoryAccesses,SmallSetVector<const MDNode *,8> & TBAAMetadata,SmallVectorImpl<Value * > & MemTypeResetInsts)465 void collectMemAccessInfo(
466     Function &F, const TargetLibraryInfo &TLI,
467     SmallVectorImpl<std::pair<Instruction *, MemoryLocation>> &MemoryAccesses,
468     SmallSetVector<const MDNode *, 8> &TBAAMetadata,
469     SmallVectorImpl<Value *> &MemTypeResetInsts) {
470   // Traverse all instructions, collect loads/stores/returns, check for calls.
471   for (Instruction &Inst : instructions(F)) {
472     // Skip memory accesses inserted by another instrumentation.
473     if (Inst.getMetadata(LLVMContext::MD_nosanitize))
474       continue;
475 
476     if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
477         isa<AtomicCmpXchgInst>(Inst) || isa<AtomicRMWInst>(Inst)) {
478       MemoryLocation MLoc = MemoryLocation::get(&Inst);
479 
480       // Swift errors are special (we can't introduce extra uses on them).
481       if (MLoc.Ptr->isSwiftError())
482         continue;
483 
484       // Skip non-address-space-0 pointers; we don't know how to handle them.
485       Type *PtrTy = cast<PointerType>(MLoc.Ptr->getType());
486       if (PtrTy->getPointerAddressSpace() != 0)
487         continue;
488 
489       if (MLoc.AATags.TBAA)
490         TBAAMetadata.insert(MLoc.AATags.TBAA);
491       MemoryAccesses.push_back(std::make_pair(&Inst, MLoc));
492     } else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
493       if (CallInst *CI = dyn_cast<CallInst>(&Inst))
494         maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
495 
496       if (isa<MemIntrinsic, LifetimeIntrinsic>(Inst))
497         MemTypeResetInsts.push_back(&Inst);
498     } else if (isa<AllocaInst>(Inst)) {
499       MemTypeResetInsts.push_back(&Inst);
500     }
501   }
502 }
503 
sanitizeFunction(Function & F,const TargetLibraryInfo & TLI)504 bool TypeSanitizer::sanitizeFunction(Function &F,
505                                      const TargetLibraryInfo &TLI) {
506   if (F.isDeclaration())
507     return false;
508   // This is required to prevent instrumenting call to __tysan_init from within
509   // the module constructor.
510   if (&F == TysanCtorFunction.getCallee() || &F == TysanGlobalsSetTypeFunction)
511     return false;
512   initializeCallbacks(*F.getParent());
513 
514   // We need to collect all loads and stores, and know for what TBAA nodes we
515   // need to generate type descriptors.
516   SmallVector<std::pair<Instruction *, MemoryLocation>> MemoryAccesses;
517   SmallSetVector<const MDNode *, 8> TBAAMetadata;
518   SmallVector<Value *> MemTypeResetInsts;
519   collectMemAccessInfo(F, TLI, MemoryAccesses, TBAAMetadata, MemTypeResetInsts);
520 
521   // byval arguments also need their types reset (they're new stack memory,
522   // just like allocas).
523   for (auto &A : F.args())
524     if (A.hasByValAttr())
525       MemTypeResetInsts.push_back(&A);
526 
527   Module &M = *F.getParent();
528   TypeDescriptorsMapTy TypeDescriptors;
529   TypeNameMapTy TypeNames;
530   bool Res = false;
531   for (const MDNode *MD : TBAAMetadata) {
532     if (TypeDescriptors.count(MD))
533       continue;
534 
535     if (!generateTypeDescriptor(MD, TypeDescriptors, TypeNames, M))
536       return Res; // Giving up.
537 
538     Res = true;
539   }
540 
541   const DataLayout &DL = F.getParent()->getDataLayout();
542   bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeType);
543   bool NeedsInstrumentation =
544       MemTypeResetInsts.empty() && MemoryAccesses.empty();
545   Instruction *ShadowBase = NeedsInstrumentation ? nullptr : getShadowBase(F);
546   Instruction *AppMemMask = NeedsInstrumentation ? nullptr : getAppMemMask(F);
547   for (const auto &[I, MLoc] : MemoryAccesses) {
548     IRBuilder<> IRB(I);
549     assert(MLoc.Size.isPrecise());
550     if (instrumentWithShadowUpdate(
551             IRB, MLoc.AATags.TBAA, const_cast<Value *>(MLoc.Ptr),
552             MLoc.Size.getValue(), I->mayReadFromMemory(), I->mayWriteToMemory(),
553             ShadowBase, AppMemMask, false, SanitizeFunction, TypeDescriptors,
554             DL)) {
555       ++NumInstrumentedAccesses;
556       Res = true;
557     }
558   }
559 
560   for (auto Inst : MemTypeResetInsts)
561     Res |= instrumentMemInst(Inst, ShadowBase, AppMemMask, DL);
562 
563   return Res;
564 }
565 
convertToShadowDataInt(IRBuilder<> & IRB,Value * Ptr,Type * IntptrTy,uint64_t PtrShift,Value * ShadowBase,Value * AppMemMask)566 static Value *convertToShadowDataInt(IRBuilder<> &IRB, Value *Ptr,
567                                      Type *IntptrTy, uint64_t PtrShift,
568                                      Value *ShadowBase, Value *AppMemMask) {
569   return IRB.CreateAdd(
570       IRB.CreateShl(
571           IRB.CreateAnd(IRB.CreatePtrToInt(Ptr, IntptrTy, "app.ptr.int"),
572                         AppMemMask, "app.ptr.masked"),
573           PtrShift, "app.ptr.shifted"),
574       ShadowBase, "shadow.ptr.int");
575 }
576 
instrumentWithShadowUpdate(IRBuilder<> & IRB,const MDNode * TBAAMD,Value * Ptr,uint64_t AccessSize,bool IsRead,bool IsWrite,Value * ShadowBase,Value * AppMemMask,bool ForceSetType,bool SanitizeFunction,TypeDescriptorsMapTy & TypeDescriptors,const DataLayout & DL)577 bool TypeSanitizer::instrumentWithShadowUpdate(
578     IRBuilder<> &IRB, const MDNode *TBAAMD, Value *Ptr, uint64_t AccessSize,
579     bool IsRead, bool IsWrite, Value *ShadowBase, Value *AppMemMask,
580     bool ForceSetType, bool SanitizeFunction,
581     TypeDescriptorsMapTy &TypeDescriptors, const DataLayout &DL) {
582   Constant *TDGV;
583   if (TBAAMD)
584     TDGV = TypeDescriptors[TBAAMD];
585   else
586     TDGV = Constant::getNullValue(IRB.getPtrTy());
587 
588   Value *TD = IRB.CreateBitCast(TDGV, IRB.getPtrTy());
589 
590   Value *ShadowDataInt = convertToShadowDataInt(IRB, Ptr, IntptrTy, PtrShift,
591                                                 ShadowBase, AppMemMask);
592   Type *Int8PtrPtrTy = PointerType::get(IRB.getContext(), 0);
593   Value *ShadowData =
594       IRB.CreateIntToPtr(ShadowDataInt, Int8PtrPtrTy, "shadow.ptr");
595 
596   auto SetType = [&]() {
597     IRB.CreateStore(TD, ShadowData);
598 
599     // Now fill the remainder of the shadow memory corresponding to the
600     // remainder of the the bytes of the type with a bad type descriptor.
601     for (uint64_t i = 1; i < AccessSize; ++i) {
602       Value *BadShadowData = IRB.CreateIntToPtr(
603           IRB.CreateAdd(ShadowDataInt,
604                         ConstantInt::get(IntptrTy, i << PtrShift),
605                         "shadow.byte." + Twine(i) + ".offset"),
606           Int8PtrPtrTy, "shadow.byte." + Twine(i) + ".ptr");
607 
608       // This is the TD value, -i, which is used to indicate that the byte is
609       // i bytes after the first byte of the type.
610       Value *BadTD =
611           IRB.CreateIntToPtr(ConstantInt::getSigned(IntptrTy, -i),
612                              IRB.getPtrTy(), "bad.descriptor" + Twine(i));
613       IRB.CreateStore(BadTD, BadShadowData);
614     }
615   };
616 
617   if (ForceSetType || (ClWritesAlwaysSetType && IsWrite)) {
618     // In the mode where writes always set the type, for a write (which does
619     // not also read), we just set the type.
620     SetType();
621     return true;
622   }
623 
624   assert((!ClWritesAlwaysSetType || IsRead) &&
625          "should have handled case above");
626   LLVMContext &C = IRB.getContext();
627   MDNode *UnlikelyBW = MDBuilder(C).createBranchWeights(1, 100000);
628 
629   if (!SanitizeFunction) {
630     // If we're not sanitizing this function, then we only care whether we
631     // need to *set* the type.
632     Value *LoadedTD = IRB.CreateLoad(IRB.getPtrTy(), ShadowData, "shadow.desc");
633     Value *NullTDCmp = IRB.CreateIsNull(LoadedTD, "desc.set");
634     Instruction *NullTDTerm = SplitBlockAndInsertIfThen(
635         NullTDCmp, &*IRB.GetInsertPoint(), false, UnlikelyBW);
636     IRB.SetInsertPoint(NullTDTerm);
637     NullTDTerm->getParent()->setName("set.type");
638     SetType();
639     return true;
640   }
641   // We need to check the type here. If the type is unknown, then the read
642   // sets the type. If the type is known, then it is checked. If the type
643   // doesn't match, then we call the runtime (which may yet determine that
644   // the mismatch is okay).
645   //
646   // The checks generated below have the following strucutre.
647   //
648   //   ; First we load the descriptor for the load from shadow memory and
649   //   ; compare it against the type descriptor for the current access type.
650   //   %shadow.desc = load ptr %shadow.data
651   //   %bad.desc = icmp ne %shadow.desc, %td
652   //   br %bad.desc, %bad.bb, %good.bb
653   //
654   // bad.bb:
655   //   %shadow.desc.null = icmp eq %shadow.desc, null
656   //   br %shadow.desc.null, %null.td.bb, %good.td.bb
657   //
658   // null.td.bb:
659   //   ; The typ is unknown, set it if all bytes in the value are also unknown.
660   //   ; To check, we load the shadow data for all bytes of the access. For the
661   //   ; pseudo code below, assume an access of size 1.
662   //   %shadow.data.int = add %shadow.data.int, 0
663   //   %l = load (inttoptr %shadow.data.int)
664   //   %is.not.null = icmp ne %l, null
665   //   %not.all.unknown = %is.not.null
666   //   br %no.all.unknown, before.set.type.bb
667   //
668   // before.set.type.bb:
669   //   ; Call runtime to check mismatch.
670   //   call void @__tysan_check()
671   //   br %set.type.bb
672   //
673   // set.type.bb:
674   //   ; Now fill the remainder of the shadow memory corresponding to the
675   //   ; remainder of the the bytes of the type with a bad type descriptor.
676   //   store %TD, %shadow.data
677   //   br %continue.bb
678   //
679   // good.td.bb::
680   //   ; We have a non-trivial mismatch. Call the runtime.
681   //   call void @__tysan_check()
682   //   br %continue.bb
683   //
684   // good.bb:
685   //  ; We appear to have the right type. Make sure that all other bytes in
686   //  ; the type are still marked as interior bytes. If not, call the runtime.
687   //   %shadow.data.int = add %shadow.data.int, 0
688   //   %l = load (inttoptr %shadow.data.int)
689   //   %not.all.interior = icmp sge %l, 0
690   //   br %not.all.interior, label %check.rt.bb, label %continue.bb
691   //
692   //  check.rt.bb:
693   //   call void @__tysan_check()
694   //   br %continue.bb
695 
696   Constant *Flags = ConstantInt::get(OrdTy, int(IsRead) | (int(IsWrite) << 1));
697 
698   Value *LoadedTD = IRB.CreateLoad(IRB.getPtrTy(), ShadowData, "shadow.desc");
699   Value *BadTDCmp = IRB.CreateICmpNE(LoadedTD, TD, "bad.desc");
700   Instruction *BadTDTerm, *GoodTDTerm;
701   SplitBlockAndInsertIfThenElse(BadTDCmp, &*IRB.GetInsertPoint(), &BadTDTerm,
702                                 &GoodTDTerm, UnlikelyBW);
703   IRB.SetInsertPoint(BadTDTerm);
704 
705   // We now know that the types did not match (we're on the slow path). If
706   // the type is unknown, then set it.
707   Value *NullTDCmp = IRB.CreateIsNull(LoadedTD);
708   Instruction *NullTDTerm, *MismatchTerm;
709   SplitBlockAndInsertIfThenElse(NullTDCmp, &*IRB.GetInsertPoint(), &NullTDTerm,
710                                 &MismatchTerm);
711 
712   // If the type is unknown, then set the type.
713   IRB.SetInsertPoint(NullTDTerm);
714 
715   // We're about to set the type. Make sure that all bytes in the value are
716   // also of unknown type.
717   Value *Size = ConstantInt::get(OrdTy, AccessSize);
718   Value *NotAllUnkTD = IRB.getFalse();
719   for (uint64_t i = 1; i < AccessSize; ++i) {
720     Value *UnkShadowData = IRB.CreateIntToPtr(
721         IRB.CreateAdd(ShadowDataInt, ConstantInt::get(IntptrTy, i << PtrShift)),
722         Int8PtrPtrTy);
723     Value *ILdTD = IRB.CreateLoad(IRB.getPtrTy(), UnkShadowData);
724     NotAllUnkTD = IRB.CreateOr(NotAllUnkTD, IRB.CreateIsNotNull(ILdTD));
725   }
726 
727   Instruction *BeforeSetType = &*IRB.GetInsertPoint();
728   Instruction *BadUTDTerm =
729       SplitBlockAndInsertIfThen(NotAllUnkTD, BeforeSetType, false, UnlikelyBW);
730   IRB.SetInsertPoint(BadUTDTerm);
731   IRB.CreateCall(TysanCheck, {IRB.CreateBitCast(Ptr, IRB.getPtrTy()), Size,
732                               (Value *)TD, (Value *)Flags});
733 
734   IRB.SetInsertPoint(BeforeSetType);
735   SetType();
736 
737   // We have a non-trivial mismatch. Call the runtime.
738   IRB.SetInsertPoint(MismatchTerm);
739   IRB.CreateCall(TysanCheck, {IRB.CreateBitCast(Ptr, IRB.getPtrTy()), Size,
740                               (Value *)TD, (Value *)Flags});
741 
742   // We appear to have the right type. Make sure that all other bytes in
743   // the type are still marked as interior bytes. If not, call the runtime.
744   IRB.SetInsertPoint(GoodTDTerm);
745   Value *NotAllBadTD = IRB.getFalse();
746   for (uint64_t i = 1; i < AccessSize; ++i) {
747     Value *BadShadowData = IRB.CreateIntToPtr(
748         IRB.CreateAdd(ShadowDataInt, ConstantInt::get(IntptrTy, i << PtrShift)),
749         Int8PtrPtrTy);
750     Value *ILdTD = IRB.CreatePtrToInt(
751         IRB.CreateLoad(IRB.getPtrTy(), BadShadowData), IntptrTy);
752     NotAllBadTD = IRB.CreateOr(
753         NotAllBadTD, IRB.CreateICmpSGE(ILdTD, ConstantInt::get(IntptrTy, 0)));
754   }
755 
756   Instruction *BadITDTerm = SplitBlockAndInsertIfThen(
757       NotAllBadTD, &*IRB.GetInsertPoint(), false, UnlikelyBW);
758   IRB.SetInsertPoint(BadITDTerm);
759   IRB.CreateCall(TysanCheck, {IRB.CreateBitCast(Ptr, IRB.getPtrTy()), Size,
760                               (Value *)TD, (Value *)Flags});
761   return true;
762 }
763 
instrumentMemInst(Value * V,Instruction * ShadowBase,Instruction * AppMemMask,const DataLayout & DL)764 bool TypeSanitizer::instrumentMemInst(Value *V, Instruction *ShadowBase,
765                                       Instruction *AppMemMask,
766                                       const DataLayout &DL) {
767   BasicBlock::iterator IP;
768   BasicBlock *BB;
769   Function *F;
770 
771   if (auto *I = dyn_cast<Instruction>(V)) {
772     IP = BasicBlock::iterator(I);
773     BB = I->getParent();
774     F = BB->getParent();
775   } else {
776     auto *A = cast<Argument>(V);
777     F = A->getParent();
778     BB = &F->getEntryBlock();
779     IP = BB->getFirstInsertionPt();
780 
781     // Find the next insert point after both ShadowBase and AppMemMask.
782     if (IP->comesBefore(ShadowBase))
783       IP = ShadowBase->getNextNode()->getIterator();
784     if (IP->comesBefore(AppMemMask))
785       IP = AppMemMask->getNextNode()->getIterator();
786   }
787 
788   Value *Dest, *Size, *Src = nullptr;
789   bool NeedsMemMove = false;
790   IRBuilder<> IRB(BB, IP);
791 
792   if (auto *A = dyn_cast<Argument>(V)) {
793     assert(A->hasByValAttr() && "Type reset for non-byval argument?");
794 
795     Dest = A;
796     Size =
797         ConstantInt::get(IntptrTy, DL.getTypeAllocSize(A->getParamByValType()));
798   } else {
799     auto *I = cast<Instruction>(V);
800     if (auto *MI = dyn_cast<MemIntrinsic>(I)) {
801       if (MI->getDestAddressSpace() != 0)
802         return false;
803 
804       Dest = MI->getDest();
805       Size = MI->getLength();
806 
807       if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
808         if (MTI->getSourceAddressSpace() == 0) {
809           Src = MTI->getSource();
810           NeedsMemMove = isa<MemMoveInst>(MTI);
811         }
812       }
813     } else if (auto *II = dyn_cast<LifetimeIntrinsic>(I)) {
814       Size = II->getArgOperand(0);
815       Dest = II->getArgOperand(1);
816     } else if (auto *AI = dyn_cast<AllocaInst>(I)) {
817       // We need to clear the types for new stack allocations (or else we might
818       // read stale type information from a previous function execution).
819 
820       IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(I)));
821       IRB.SetInstDebugLocation(I);
822 
823       Size = IRB.CreateMul(
824           IRB.CreateZExtOrTrunc(AI->getArraySize(), IntptrTy),
825           ConstantInt::get(IntptrTy,
826                            DL.getTypeAllocSize(AI->getAllocatedType())));
827       Dest = I;
828     } else {
829       return false;
830     }
831   }
832 
833   if (!ShadowBase)
834     ShadowBase = getShadowBase(*F);
835   if (!AppMemMask)
836     AppMemMask = getAppMemMask(*F);
837 
838   Value *ShadowDataInt = IRB.CreateAdd(
839       IRB.CreateShl(
840           IRB.CreateAnd(IRB.CreatePtrToInt(Dest, IntptrTy), AppMemMask),
841           PtrShift),
842       ShadowBase);
843   Value *ShadowData = IRB.CreateIntToPtr(ShadowDataInt, IRB.getPtrTy());
844 
845   if (!Src) {
846     IRB.CreateMemSet(ShadowData, IRB.getInt8(0), IRB.CreateShl(Size, PtrShift),
847                      Align(1ull << PtrShift));
848     return true;
849   }
850 
851   Value *SrcShadowDataInt = IRB.CreateAdd(
852       IRB.CreateShl(
853           IRB.CreateAnd(IRB.CreatePtrToInt(Src, IntptrTy), AppMemMask),
854           PtrShift),
855       ShadowBase);
856   Value *SrcShadowData = IRB.CreateIntToPtr(SrcShadowDataInt, IRB.getPtrTy());
857 
858   if (NeedsMemMove) {
859     IRB.CreateMemMove(ShadowData, Align(1ull << PtrShift), SrcShadowData,
860                       Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
861   } else {
862     IRB.CreateMemCpy(ShadowData, Align(1ull << PtrShift), SrcShadowData,
863                      Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
864   }
865 
866   return true;
867 }
868 
run(Module & M,ModuleAnalysisManager & MAM)869 PreservedAnalyses TypeSanitizerPass::run(Module &M,
870                                          ModuleAnalysisManager &MAM) {
871   Function *TysanCtorFunction;
872   std::tie(TysanCtorFunction, std::ignore) =
873       createSanitizerCtorAndInitFunctions(M, kTysanModuleCtorName,
874                                           kTysanInitName, /*InitArgTypes=*/{},
875                                           /*InitArgs=*/{});
876 
877   TypeSanitizer TySan(M);
878   TySan.instrumentGlobals(M);
879   appendToGlobalCtors(M, TysanCtorFunction, 0);
880 
881   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
882   for (Function &F : M) {
883     const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
884     TySan.sanitizeFunction(F, TLI);
885   }
886 
887   return PreservedAnalyses::none();
888 }
889