1 //===----- TypeSanitizer.cpp - type-based-aliasing-violation detector -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of TypeSanitizer, a type-based-aliasing-violation
10 // detector.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Transforms/Instrumentation/TypeSanitizer.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/Analysis/MemoryLocation.h"
20 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/InstIterator.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/ProfileData/InstrProf.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/MD5.h"
36 #include "llvm/Support/Regex.h"
37 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
38 #include "llvm/Transforms/Utils/Local.h"
39 #include "llvm/Transforms/Utils/ModuleUtils.h"
40
41 #include <cctype>
42
43 using namespace llvm;
44
45 #define DEBUG_TYPE "tysan"
46
47 static const char *const kTysanModuleCtorName = "tysan.module_ctor";
48 static const char *const kTysanInitName = "__tysan_init";
49 static const char *const kTysanCheckName = "__tysan_check";
50 static const char *const kTysanGVNamePrefix = "__tysan_v1_";
51
52 static const char *const kTysanShadowMemoryAddress =
53 "__tysan_shadow_memory_address";
54 static const char *const kTysanAppMemMask = "__tysan_app_memory_mask";
55
56 static cl::opt<bool>
57 ClWritesAlwaysSetType("tysan-writes-always-set-type",
58 cl::desc("Writes always set the type"), cl::Hidden,
59 cl::init(false));
60
61 STATISTIC(NumInstrumentedAccesses, "Number of instrumented accesses");
62
63 namespace {
64
65 /// TypeSanitizer: instrument the code in module to find type-based aliasing
66 /// violations.
67 struct TypeSanitizer {
68 TypeSanitizer(Module &M);
69 bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
70 void instrumentGlobals(Module &M);
71
72 private:
73 typedef SmallDenseMap<const MDNode *, GlobalVariable *, 8>
74 TypeDescriptorsMapTy;
75 typedef SmallDenseMap<const MDNode *, std::string, 8> TypeNameMapTy;
76
77 void initializeCallbacks(Module &M);
78
79 Instruction *getShadowBase(Function &F);
80 Instruction *getAppMemMask(Function &F);
81
82 bool instrumentWithShadowUpdate(IRBuilder<> &IRB, const MDNode *TBAAMD,
83 Value *Ptr, uint64_t AccessSize, bool IsRead,
84 bool IsWrite, Value *ShadowBase,
85 Value *AppMemMask, bool ForceSetType,
86 bool SanitizeFunction,
87 TypeDescriptorsMapTy &TypeDescriptors,
88 const DataLayout &DL);
89
90 /// Memory-related intrinsics/instructions reset the type of the destination
91 /// memory (including allocas and byval arguments).
92 bool instrumentMemInst(Value *I, Instruction *ShadowBase,
93 Instruction *AppMemMask, const DataLayout &DL);
94
95 std::string getAnonymousStructIdentifier(const MDNode *MD,
96 TypeNameMapTy &TypeNames);
97 bool generateTypeDescriptor(const MDNode *MD,
98 TypeDescriptorsMapTy &TypeDescriptors,
99 TypeNameMapTy &TypeNames, Module &M);
100 bool generateBaseTypeDescriptor(const MDNode *MD,
101 TypeDescriptorsMapTy &TypeDescriptors,
102 TypeNameMapTy &TypeNames, Module &M);
103
104 const Triple TargetTriple;
105 Regex AnonNameRegex;
106 Type *IntptrTy;
107 uint64_t PtrShift;
108 IntegerType *OrdTy;
109
110 /// Callbacks to run-time library are computed in initializeCallbacks.
111 FunctionCallee TysanCheck;
112 FunctionCallee TysanCtorFunction;
113
114 /// Callback to set types for gloabls.
115 Function *TysanGlobalsSetTypeFunction;
116 };
117 } // namespace
118
TypeSanitizer(Module & M)119 TypeSanitizer::TypeSanitizer(Module &M)
120 : TargetTriple(M.getTargetTriple()),
121 AnonNameRegex("^_ZTS.*N[1-9][0-9]*_GLOBAL__N") {
122 const DataLayout &DL = M.getDataLayout();
123 IntptrTy = DL.getIntPtrType(M.getContext());
124 PtrShift = countr_zero(IntptrTy->getPrimitiveSizeInBits() / 8);
125
126 TysanGlobalsSetTypeFunction = M.getFunction("__tysan_set_globals_types");
127 initializeCallbacks(M);
128 }
129
initializeCallbacks(Module & M)130 void TypeSanitizer::initializeCallbacks(Module &M) {
131 IRBuilder<> IRB(M.getContext());
132 OrdTy = IRB.getInt32Ty();
133
134 AttributeList Attr;
135 Attr = Attr.addFnAttribute(M.getContext(), Attribute::NoUnwind);
136 // Initialize the callbacks.
137 TysanCheck =
138 M.getOrInsertFunction(kTysanCheckName, Attr, IRB.getVoidTy(),
139 IRB.getPtrTy(), // Pointer to data to be read.
140 OrdTy, // Size of the data in bytes.
141 IRB.getPtrTy(), // Pointer to type descriptor.
142 OrdTy // Flags.
143 );
144
145 TysanCtorFunction =
146 M.getOrInsertFunction(kTysanModuleCtorName, Attr, IRB.getVoidTy());
147 }
148
instrumentGlobals(Module & M)149 void TypeSanitizer::instrumentGlobals(Module &M) {
150 TysanGlobalsSetTypeFunction = nullptr;
151
152 NamedMDNode *Globals = M.getNamedMetadata("llvm.tysan.globals");
153 if (!Globals)
154 return;
155
156 TysanGlobalsSetTypeFunction = Function::Create(
157 FunctionType::get(Type::getVoidTy(M.getContext()), false),
158 GlobalValue::InternalLinkage, "__tysan_set_globals_types", &M);
159 BasicBlock *BB =
160 BasicBlock::Create(M.getContext(), "", TysanGlobalsSetTypeFunction);
161 ReturnInst::Create(M.getContext(), BB);
162
163 const DataLayout &DL = M.getDataLayout();
164 Value *ShadowBase = getShadowBase(*TysanGlobalsSetTypeFunction);
165 Value *AppMemMask = getAppMemMask(*TysanGlobalsSetTypeFunction);
166 TypeDescriptorsMapTy TypeDescriptors;
167 TypeNameMapTy TypeNames;
168
169 for (const auto &GMD : Globals->operands()) {
170 auto *GV = mdconst::dyn_extract_or_null<GlobalVariable>(GMD->getOperand(0));
171 if (!GV)
172 continue;
173 const MDNode *TBAAMD = cast<MDNode>(GMD->getOperand(1));
174 if (!generateBaseTypeDescriptor(TBAAMD, TypeDescriptors, TypeNames, M))
175 continue;
176
177 IRBuilder<> IRB(
178 TysanGlobalsSetTypeFunction->getEntryBlock().getTerminator());
179 Type *AccessTy = GV->getValueType();
180 assert(AccessTy->isSized());
181 uint64_t AccessSize = DL.getTypeStoreSize(AccessTy);
182 instrumentWithShadowUpdate(IRB, TBAAMD, GV, AccessSize, false, false,
183 ShadowBase, AppMemMask, true, false,
184 TypeDescriptors, DL);
185 }
186
187 if (TysanGlobalsSetTypeFunction) {
188 IRBuilder<> IRB(cast<Function>(TysanCtorFunction.getCallee())
189 ->getEntryBlock()
190 .getTerminator());
191 IRB.CreateCall(TysanGlobalsSetTypeFunction, {});
192 }
193 }
194
195 static const char LUT[] = "0123456789abcdef";
196
encodeName(StringRef Name)197 static std::string encodeName(StringRef Name) {
198 size_t Length = Name.size();
199 std::string Output = kTysanGVNamePrefix;
200 Output.reserve(Output.size() + 3 * Length);
201 for (size_t i = 0; i < Length; ++i) {
202 const unsigned char c = Name[i];
203 if (isalnum(c)) {
204 Output.push_back(c);
205 continue;
206 }
207
208 if (c == '_') {
209 Output.append("__");
210 continue;
211 }
212
213 Output.push_back('_');
214 Output.push_back(LUT[c >> 4]);
215 Output.push_back(LUT[c & 15]);
216 }
217
218 return Output;
219 }
220
221 std::string
getAnonymousStructIdentifier(const MDNode * MD,TypeNameMapTy & TypeNames)222 TypeSanitizer::getAnonymousStructIdentifier(const MDNode *MD,
223 TypeNameMapTy &TypeNames) {
224 MD5 Hash;
225
226 for (int i = 1, e = MD->getNumOperands(); i < e; i += 2) {
227 const MDNode *MemberNode = dyn_cast<MDNode>(MD->getOperand(i));
228 if (!MemberNode)
229 return "";
230
231 auto TNI = TypeNames.find(MemberNode);
232 std::string MemberName;
233 if (TNI != TypeNames.end()) {
234 MemberName = TNI->second;
235 } else {
236 if (MemberNode->getNumOperands() < 1)
237 return "";
238 MDString *MemberNameNode = dyn_cast<MDString>(MemberNode->getOperand(0));
239 if (!MemberNameNode)
240 return "";
241 MemberName = MemberNameNode->getString().str();
242 if (MemberName.empty())
243 MemberName = getAnonymousStructIdentifier(MemberNode, TypeNames);
244 if (MemberName.empty())
245 return "";
246 TypeNames[MemberNode] = MemberName;
247 }
248
249 Hash.update(MemberName);
250 Hash.update("\0");
251
252 uint64_t Offset =
253 mdconst::extract<ConstantInt>(MD->getOperand(i + 1))->getZExtValue();
254 Hash.update(utostr(Offset));
255 Hash.update("\0");
256 }
257
258 MD5::MD5Result HashResult;
259 Hash.final(HashResult);
260 return "__anonymous_" + std::string(HashResult.digest().str());
261 }
262
generateBaseTypeDescriptor(const MDNode * MD,TypeDescriptorsMapTy & TypeDescriptors,TypeNameMapTy & TypeNames,Module & M)263 bool TypeSanitizer::generateBaseTypeDescriptor(
264 const MDNode *MD, TypeDescriptorsMapTy &TypeDescriptors,
265 TypeNameMapTy &TypeNames, Module &M) {
266 if (MD->getNumOperands() < 1)
267 return false;
268
269 MDString *NameNode = dyn_cast<MDString>(MD->getOperand(0));
270 if (!NameNode)
271 return false;
272
273 std::string Name = NameNode->getString().str();
274 if (Name.empty())
275 Name = getAnonymousStructIdentifier(MD, TypeNames);
276 if (Name.empty())
277 return false;
278 TypeNames[MD] = Name;
279 std::string EncodedName = encodeName(Name);
280
281 GlobalVariable *GV =
282 dyn_cast_or_null<GlobalVariable>(M.getNamedValue(EncodedName));
283 if (GV) {
284 TypeDescriptors[MD] = GV;
285 return true;
286 }
287
288 SmallVector<std::pair<Constant *, uint64_t>> Members;
289 for (int i = 1, e = MD->getNumOperands(); i < e; i += 2) {
290 const MDNode *MemberNode = dyn_cast<MDNode>(MD->getOperand(i));
291 if (!MemberNode)
292 return false;
293
294 Constant *Member;
295 auto TDI = TypeDescriptors.find(MemberNode);
296 if (TDI != TypeDescriptors.end()) {
297 Member = TDI->second;
298 } else {
299 if (!generateBaseTypeDescriptor(MemberNode, TypeDescriptors, TypeNames,
300 M))
301 return false;
302
303 Member = TypeDescriptors[MemberNode];
304 }
305
306 uint64_t Offset =
307 mdconst::extract<ConstantInt>(MD->getOperand(i + 1))->getZExtValue();
308
309 Members.push_back(std::make_pair(Member, Offset));
310 }
311
312 // The descriptor for a scalar is:
313 // [2, member count, [type pointer, offset]..., name]
314
315 LLVMContext &C = MD->getContext();
316 Constant *NameData = ConstantDataArray::getString(C, NameNode->getString());
317 SmallVector<Type *> TDSubTys;
318 SmallVector<Constant *> TDSubData;
319
320 auto PushTDSub = [&](Constant *C) {
321 TDSubTys.push_back(C->getType());
322 TDSubData.push_back(C);
323 };
324
325 PushTDSub(ConstantInt::get(IntptrTy, 2));
326 PushTDSub(ConstantInt::get(IntptrTy, Members.size()));
327
328 // Types that are in an anonymous namespace are local to this module.
329 // FIXME: This should really be marked by the frontend in the metadata
330 // instead of having us guess this from the mangled name. Moreover, the regex
331 // here can pick up (unlikely) names in the non-reserved namespace (because
332 // it needs to search into the type to pick up cases where the type in the
333 // anonymous namespace is a template parameter, etc.).
334 bool ShouldBeComdat = !AnonNameRegex.match(NameNode->getString());
335 for (auto &Member : Members) {
336 PushTDSub(Member.first);
337 PushTDSub(ConstantInt::get(IntptrTy, Member.second));
338 }
339
340 PushTDSub(NameData);
341
342 StructType *TDTy = StructType::get(C, TDSubTys);
343 Constant *TD = ConstantStruct::get(TDTy, TDSubData);
344
345 GlobalVariable *TDGV =
346 new GlobalVariable(TDTy, true,
347 !ShouldBeComdat ? GlobalValue::InternalLinkage
348 : GlobalValue::LinkOnceODRLinkage,
349 TD, EncodedName);
350 M.insertGlobalVariable(TDGV);
351
352 if (ShouldBeComdat) {
353 if (TargetTriple.isOSBinFormatELF()) {
354 Comdat *TDComdat = M.getOrInsertComdat(EncodedName);
355 TDGV->setComdat(TDComdat);
356 }
357 appendToUsed(M, TDGV);
358 }
359
360 TypeDescriptors[MD] = TDGV;
361 return true;
362 }
363
generateTypeDescriptor(const MDNode * MD,TypeDescriptorsMapTy & TypeDescriptors,TypeNameMapTy & TypeNames,Module & M)364 bool TypeSanitizer::generateTypeDescriptor(
365 const MDNode *MD, TypeDescriptorsMapTy &TypeDescriptors,
366 TypeNameMapTy &TypeNames, Module &M) {
367 // Here we need to generate a type descriptor corresponding to this TBAA
368 // metadata node. Under the current scheme there are three kinds of TBAA
369 // metadata nodes: scalar nodes, struct nodes, and struct tag nodes.
370
371 if (MD->getNumOperands() < 3)
372 return false;
373
374 const MDNode *BaseNode = dyn_cast<MDNode>(MD->getOperand(0));
375 if (!BaseNode)
376 return false;
377
378 // This is a struct tag (element-access) node.
379
380 const MDNode *AccessNode = dyn_cast<MDNode>(MD->getOperand(1));
381 if (!AccessNode)
382 return false;
383
384 Constant *Base;
385 auto TDI = TypeDescriptors.find(BaseNode);
386 if (TDI != TypeDescriptors.end()) {
387 Base = TDI->second;
388 } else {
389 if (!generateBaseTypeDescriptor(BaseNode, TypeDescriptors, TypeNames, M))
390 return false;
391
392 Base = TypeDescriptors[BaseNode];
393 }
394
395 Constant *Access;
396 TDI = TypeDescriptors.find(AccessNode);
397 if (TDI != TypeDescriptors.end()) {
398 Access = TDI->second;
399 } else {
400 if (!generateBaseTypeDescriptor(AccessNode, TypeDescriptors, TypeNames, M))
401 return false;
402
403 Access = TypeDescriptors[AccessNode];
404 }
405
406 uint64_t Offset =
407 mdconst::extract<ConstantInt>(MD->getOperand(2))->getZExtValue();
408 std::string EncodedName =
409 std::string(Base->getName()) + "_o_" + utostr(Offset);
410
411 GlobalVariable *GV =
412 dyn_cast_or_null<GlobalVariable>(M.getNamedValue(EncodedName));
413 if (GV) {
414 TypeDescriptors[MD] = GV;
415 return true;
416 }
417
418 // The descriptor for a scalar is:
419 // [1, base-type pointer, access-type pointer, offset]
420
421 StructType *TDTy =
422 StructType::get(IntptrTy, Base->getType(), Access->getType(), IntptrTy);
423 Constant *TD =
424 ConstantStruct::get(TDTy, ConstantInt::get(IntptrTy, 1), Base, Access,
425 ConstantInt::get(IntptrTy, Offset));
426
427 bool ShouldBeComdat = cast<GlobalVariable>(Base)->getLinkage() ==
428 GlobalValue::LinkOnceODRLinkage;
429
430 GlobalVariable *TDGV =
431 new GlobalVariable(TDTy, true,
432 !ShouldBeComdat ? GlobalValue::InternalLinkage
433 : GlobalValue::LinkOnceODRLinkage,
434 TD, EncodedName);
435 M.insertGlobalVariable(TDGV);
436
437 if (ShouldBeComdat) {
438 if (TargetTriple.isOSBinFormatELF()) {
439 Comdat *TDComdat = M.getOrInsertComdat(EncodedName);
440 TDGV->setComdat(TDComdat);
441 }
442 appendToUsed(M, TDGV);
443 }
444
445 TypeDescriptors[MD] = TDGV;
446 return true;
447 }
448
getShadowBase(Function & F)449 Instruction *TypeSanitizer::getShadowBase(Function &F) {
450 IRBuilder<> IRB(&F.front().front());
451 Constant *GlobalShadowAddress =
452 F.getParent()->getOrInsertGlobal(kTysanShadowMemoryAddress, IntptrTy);
453 return IRB.CreateLoad(IntptrTy, GlobalShadowAddress, "shadow.base");
454 }
455
getAppMemMask(Function & F)456 Instruction *TypeSanitizer::getAppMemMask(Function &F) {
457 IRBuilder<> IRB(&F.front().front());
458 Value *GlobalAppMemMask =
459 F.getParent()->getOrInsertGlobal(kTysanAppMemMask, IntptrTy);
460 return IRB.CreateLoad(IntptrTy, GlobalAppMemMask, "app.mem.mask");
461 }
462
463 /// Collect all loads and stores, and for what TBAA nodes we need to generate
464 /// type descriptors.
collectMemAccessInfo(Function & F,const TargetLibraryInfo & TLI,SmallVectorImpl<std::pair<Instruction *,MemoryLocation>> & MemoryAccesses,SmallSetVector<const MDNode *,8> & TBAAMetadata,SmallVectorImpl<Value * > & MemTypeResetInsts)465 void collectMemAccessInfo(
466 Function &F, const TargetLibraryInfo &TLI,
467 SmallVectorImpl<std::pair<Instruction *, MemoryLocation>> &MemoryAccesses,
468 SmallSetVector<const MDNode *, 8> &TBAAMetadata,
469 SmallVectorImpl<Value *> &MemTypeResetInsts) {
470 // Traverse all instructions, collect loads/stores/returns, check for calls.
471 for (Instruction &Inst : instructions(F)) {
472 // Skip memory accesses inserted by another instrumentation.
473 if (Inst.getMetadata(LLVMContext::MD_nosanitize))
474 continue;
475
476 if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
477 isa<AtomicCmpXchgInst>(Inst) || isa<AtomicRMWInst>(Inst)) {
478 MemoryLocation MLoc = MemoryLocation::get(&Inst);
479
480 // Swift errors are special (we can't introduce extra uses on them).
481 if (MLoc.Ptr->isSwiftError())
482 continue;
483
484 // Skip non-address-space-0 pointers; we don't know how to handle them.
485 Type *PtrTy = cast<PointerType>(MLoc.Ptr->getType());
486 if (PtrTy->getPointerAddressSpace() != 0)
487 continue;
488
489 if (MLoc.AATags.TBAA)
490 TBAAMetadata.insert(MLoc.AATags.TBAA);
491 MemoryAccesses.push_back(std::make_pair(&Inst, MLoc));
492 } else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
493 if (CallInst *CI = dyn_cast<CallInst>(&Inst))
494 maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
495
496 if (isa<MemIntrinsic, LifetimeIntrinsic>(Inst))
497 MemTypeResetInsts.push_back(&Inst);
498 } else if (isa<AllocaInst>(Inst)) {
499 MemTypeResetInsts.push_back(&Inst);
500 }
501 }
502 }
503
sanitizeFunction(Function & F,const TargetLibraryInfo & TLI)504 bool TypeSanitizer::sanitizeFunction(Function &F,
505 const TargetLibraryInfo &TLI) {
506 if (F.isDeclaration())
507 return false;
508 // This is required to prevent instrumenting call to __tysan_init from within
509 // the module constructor.
510 if (&F == TysanCtorFunction.getCallee() || &F == TysanGlobalsSetTypeFunction)
511 return false;
512 initializeCallbacks(*F.getParent());
513
514 // We need to collect all loads and stores, and know for what TBAA nodes we
515 // need to generate type descriptors.
516 SmallVector<std::pair<Instruction *, MemoryLocation>> MemoryAccesses;
517 SmallSetVector<const MDNode *, 8> TBAAMetadata;
518 SmallVector<Value *> MemTypeResetInsts;
519 collectMemAccessInfo(F, TLI, MemoryAccesses, TBAAMetadata, MemTypeResetInsts);
520
521 // byval arguments also need their types reset (they're new stack memory,
522 // just like allocas).
523 for (auto &A : F.args())
524 if (A.hasByValAttr())
525 MemTypeResetInsts.push_back(&A);
526
527 Module &M = *F.getParent();
528 TypeDescriptorsMapTy TypeDescriptors;
529 TypeNameMapTy TypeNames;
530 bool Res = false;
531 for (const MDNode *MD : TBAAMetadata) {
532 if (TypeDescriptors.count(MD))
533 continue;
534
535 if (!generateTypeDescriptor(MD, TypeDescriptors, TypeNames, M))
536 return Res; // Giving up.
537
538 Res = true;
539 }
540
541 const DataLayout &DL = F.getParent()->getDataLayout();
542 bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeType);
543 bool NeedsInstrumentation =
544 MemTypeResetInsts.empty() && MemoryAccesses.empty();
545 Instruction *ShadowBase = NeedsInstrumentation ? nullptr : getShadowBase(F);
546 Instruction *AppMemMask = NeedsInstrumentation ? nullptr : getAppMemMask(F);
547 for (const auto &[I, MLoc] : MemoryAccesses) {
548 IRBuilder<> IRB(I);
549 assert(MLoc.Size.isPrecise());
550 if (instrumentWithShadowUpdate(
551 IRB, MLoc.AATags.TBAA, const_cast<Value *>(MLoc.Ptr),
552 MLoc.Size.getValue(), I->mayReadFromMemory(), I->mayWriteToMemory(),
553 ShadowBase, AppMemMask, false, SanitizeFunction, TypeDescriptors,
554 DL)) {
555 ++NumInstrumentedAccesses;
556 Res = true;
557 }
558 }
559
560 for (auto Inst : MemTypeResetInsts)
561 Res |= instrumentMemInst(Inst, ShadowBase, AppMemMask, DL);
562
563 return Res;
564 }
565
convertToShadowDataInt(IRBuilder<> & IRB,Value * Ptr,Type * IntptrTy,uint64_t PtrShift,Value * ShadowBase,Value * AppMemMask)566 static Value *convertToShadowDataInt(IRBuilder<> &IRB, Value *Ptr,
567 Type *IntptrTy, uint64_t PtrShift,
568 Value *ShadowBase, Value *AppMemMask) {
569 return IRB.CreateAdd(
570 IRB.CreateShl(
571 IRB.CreateAnd(IRB.CreatePtrToInt(Ptr, IntptrTy, "app.ptr.int"),
572 AppMemMask, "app.ptr.masked"),
573 PtrShift, "app.ptr.shifted"),
574 ShadowBase, "shadow.ptr.int");
575 }
576
instrumentWithShadowUpdate(IRBuilder<> & IRB,const MDNode * TBAAMD,Value * Ptr,uint64_t AccessSize,bool IsRead,bool IsWrite,Value * ShadowBase,Value * AppMemMask,bool ForceSetType,bool SanitizeFunction,TypeDescriptorsMapTy & TypeDescriptors,const DataLayout & DL)577 bool TypeSanitizer::instrumentWithShadowUpdate(
578 IRBuilder<> &IRB, const MDNode *TBAAMD, Value *Ptr, uint64_t AccessSize,
579 bool IsRead, bool IsWrite, Value *ShadowBase, Value *AppMemMask,
580 bool ForceSetType, bool SanitizeFunction,
581 TypeDescriptorsMapTy &TypeDescriptors, const DataLayout &DL) {
582 Constant *TDGV;
583 if (TBAAMD)
584 TDGV = TypeDescriptors[TBAAMD];
585 else
586 TDGV = Constant::getNullValue(IRB.getPtrTy());
587
588 Value *TD = IRB.CreateBitCast(TDGV, IRB.getPtrTy());
589
590 Value *ShadowDataInt = convertToShadowDataInt(IRB, Ptr, IntptrTy, PtrShift,
591 ShadowBase, AppMemMask);
592 Type *Int8PtrPtrTy = PointerType::get(IRB.getContext(), 0);
593 Value *ShadowData =
594 IRB.CreateIntToPtr(ShadowDataInt, Int8PtrPtrTy, "shadow.ptr");
595
596 auto SetType = [&]() {
597 IRB.CreateStore(TD, ShadowData);
598
599 // Now fill the remainder of the shadow memory corresponding to the
600 // remainder of the the bytes of the type with a bad type descriptor.
601 for (uint64_t i = 1; i < AccessSize; ++i) {
602 Value *BadShadowData = IRB.CreateIntToPtr(
603 IRB.CreateAdd(ShadowDataInt,
604 ConstantInt::get(IntptrTy, i << PtrShift),
605 "shadow.byte." + Twine(i) + ".offset"),
606 Int8PtrPtrTy, "shadow.byte." + Twine(i) + ".ptr");
607
608 // This is the TD value, -i, which is used to indicate that the byte is
609 // i bytes after the first byte of the type.
610 Value *BadTD =
611 IRB.CreateIntToPtr(ConstantInt::getSigned(IntptrTy, -i),
612 IRB.getPtrTy(), "bad.descriptor" + Twine(i));
613 IRB.CreateStore(BadTD, BadShadowData);
614 }
615 };
616
617 if (ForceSetType || (ClWritesAlwaysSetType && IsWrite)) {
618 // In the mode where writes always set the type, for a write (which does
619 // not also read), we just set the type.
620 SetType();
621 return true;
622 }
623
624 assert((!ClWritesAlwaysSetType || IsRead) &&
625 "should have handled case above");
626 LLVMContext &C = IRB.getContext();
627 MDNode *UnlikelyBW = MDBuilder(C).createBranchWeights(1, 100000);
628
629 if (!SanitizeFunction) {
630 // If we're not sanitizing this function, then we only care whether we
631 // need to *set* the type.
632 Value *LoadedTD = IRB.CreateLoad(IRB.getPtrTy(), ShadowData, "shadow.desc");
633 Value *NullTDCmp = IRB.CreateIsNull(LoadedTD, "desc.set");
634 Instruction *NullTDTerm = SplitBlockAndInsertIfThen(
635 NullTDCmp, &*IRB.GetInsertPoint(), false, UnlikelyBW);
636 IRB.SetInsertPoint(NullTDTerm);
637 NullTDTerm->getParent()->setName("set.type");
638 SetType();
639 return true;
640 }
641 // We need to check the type here. If the type is unknown, then the read
642 // sets the type. If the type is known, then it is checked. If the type
643 // doesn't match, then we call the runtime (which may yet determine that
644 // the mismatch is okay).
645 //
646 // The checks generated below have the following strucutre.
647 //
648 // ; First we load the descriptor for the load from shadow memory and
649 // ; compare it against the type descriptor for the current access type.
650 // %shadow.desc = load ptr %shadow.data
651 // %bad.desc = icmp ne %shadow.desc, %td
652 // br %bad.desc, %bad.bb, %good.bb
653 //
654 // bad.bb:
655 // %shadow.desc.null = icmp eq %shadow.desc, null
656 // br %shadow.desc.null, %null.td.bb, %good.td.bb
657 //
658 // null.td.bb:
659 // ; The typ is unknown, set it if all bytes in the value are also unknown.
660 // ; To check, we load the shadow data for all bytes of the access. For the
661 // ; pseudo code below, assume an access of size 1.
662 // %shadow.data.int = add %shadow.data.int, 0
663 // %l = load (inttoptr %shadow.data.int)
664 // %is.not.null = icmp ne %l, null
665 // %not.all.unknown = %is.not.null
666 // br %no.all.unknown, before.set.type.bb
667 //
668 // before.set.type.bb:
669 // ; Call runtime to check mismatch.
670 // call void @__tysan_check()
671 // br %set.type.bb
672 //
673 // set.type.bb:
674 // ; Now fill the remainder of the shadow memory corresponding to the
675 // ; remainder of the the bytes of the type with a bad type descriptor.
676 // store %TD, %shadow.data
677 // br %continue.bb
678 //
679 // good.td.bb::
680 // ; We have a non-trivial mismatch. Call the runtime.
681 // call void @__tysan_check()
682 // br %continue.bb
683 //
684 // good.bb:
685 // ; We appear to have the right type. Make sure that all other bytes in
686 // ; the type are still marked as interior bytes. If not, call the runtime.
687 // %shadow.data.int = add %shadow.data.int, 0
688 // %l = load (inttoptr %shadow.data.int)
689 // %not.all.interior = icmp sge %l, 0
690 // br %not.all.interior, label %check.rt.bb, label %continue.bb
691 //
692 // check.rt.bb:
693 // call void @__tysan_check()
694 // br %continue.bb
695
696 Constant *Flags = ConstantInt::get(OrdTy, int(IsRead) | (int(IsWrite) << 1));
697
698 Value *LoadedTD = IRB.CreateLoad(IRB.getPtrTy(), ShadowData, "shadow.desc");
699 Value *BadTDCmp = IRB.CreateICmpNE(LoadedTD, TD, "bad.desc");
700 Instruction *BadTDTerm, *GoodTDTerm;
701 SplitBlockAndInsertIfThenElse(BadTDCmp, &*IRB.GetInsertPoint(), &BadTDTerm,
702 &GoodTDTerm, UnlikelyBW);
703 IRB.SetInsertPoint(BadTDTerm);
704
705 // We now know that the types did not match (we're on the slow path). If
706 // the type is unknown, then set it.
707 Value *NullTDCmp = IRB.CreateIsNull(LoadedTD);
708 Instruction *NullTDTerm, *MismatchTerm;
709 SplitBlockAndInsertIfThenElse(NullTDCmp, &*IRB.GetInsertPoint(), &NullTDTerm,
710 &MismatchTerm);
711
712 // If the type is unknown, then set the type.
713 IRB.SetInsertPoint(NullTDTerm);
714
715 // We're about to set the type. Make sure that all bytes in the value are
716 // also of unknown type.
717 Value *Size = ConstantInt::get(OrdTy, AccessSize);
718 Value *NotAllUnkTD = IRB.getFalse();
719 for (uint64_t i = 1; i < AccessSize; ++i) {
720 Value *UnkShadowData = IRB.CreateIntToPtr(
721 IRB.CreateAdd(ShadowDataInt, ConstantInt::get(IntptrTy, i << PtrShift)),
722 Int8PtrPtrTy);
723 Value *ILdTD = IRB.CreateLoad(IRB.getPtrTy(), UnkShadowData);
724 NotAllUnkTD = IRB.CreateOr(NotAllUnkTD, IRB.CreateIsNotNull(ILdTD));
725 }
726
727 Instruction *BeforeSetType = &*IRB.GetInsertPoint();
728 Instruction *BadUTDTerm =
729 SplitBlockAndInsertIfThen(NotAllUnkTD, BeforeSetType, false, UnlikelyBW);
730 IRB.SetInsertPoint(BadUTDTerm);
731 IRB.CreateCall(TysanCheck, {IRB.CreateBitCast(Ptr, IRB.getPtrTy()), Size,
732 (Value *)TD, (Value *)Flags});
733
734 IRB.SetInsertPoint(BeforeSetType);
735 SetType();
736
737 // We have a non-trivial mismatch. Call the runtime.
738 IRB.SetInsertPoint(MismatchTerm);
739 IRB.CreateCall(TysanCheck, {IRB.CreateBitCast(Ptr, IRB.getPtrTy()), Size,
740 (Value *)TD, (Value *)Flags});
741
742 // We appear to have the right type. Make sure that all other bytes in
743 // the type are still marked as interior bytes. If not, call the runtime.
744 IRB.SetInsertPoint(GoodTDTerm);
745 Value *NotAllBadTD = IRB.getFalse();
746 for (uint64_t i = 1; i < AccessSize; ++i) {
747 Value *BadShadowData = IRB.CreateIntToPtr(
748 IRB.CreateAdd(ShadowDataInt, ConstantInt::get(IntptrTy, i << PtrShift)),
749 Int8PtrPtrTy);
750 Value *ILdTD = IRB.CreatePtrToInt(
751 IRB.CreateLoad(IRB.getPtrTy(), BadShadowData), IntptrTy);
752 NotAllBadTD = IRB.CreateOr(
753 NotAllBadTD, IRB.CreateICmpSGE(ILdTD, ConstantInt::get(IntptrTy, 0)));
754 }
755
756 Instruction *BadITDTerm = SplitBlockAndInsertIfThen(
757 NotAllBadTD, &*IRB.GetInsertPoint(), false, UnlikelyBW);
758 IRB.SetInsertPoint(BadITDTerm);
759 IRB.CreateCall(TysanCheck, {IRB.CreateBitCast(Ptr, IRB.getPtrTy()), Size,
760 (Value *)TD, (Value *)Flags});
761 return true;
762 }
763
instrumentMemInst(Value * V,Instruction * ShadowBase,Instruction * AppMemMask,const DataLayout & DL)764 bool TypeSanitizer::instrumentMemInst(Value *V, Instruction *ShadowBase,
765 Instruction *AppMemMask,
766 const DataLayout &DL) {
767 BasicBlock::iterator IP;
768 BasicBlock *BB;
769 Function *F;
770
771 if (auto *I = dyn_cast<Instruction>(V)) {
772 IP = BasicBlock::iterator(I);
773 BB = I->getParent();
774 F = BB->getParent();
775 } else {
776 auto *A = cast<Argument>(V);
777 F = A->getParent();
778 BB = &F->getEntryBlock();
779 IP = BB->getFirstInsertionPt();
780
781 // Find the next insert point after both ShadowBase and AppMemMask.
782 if (IP->comesBefore(ShadowBase))
783 IP = ShadowBase->getNextNode()->getIterator();
784 if (IP->comesBefore(AppMemMask))
785 IP = AppMemMask->getNextNode()->getIterator();
786 }
787
788 Value *Dest, *Size, *Src = nullptr;
789 bool NeedsMemMove = false;
790 IRBuilder<> IRB(BB, IP);
791
792 if (auto *A = dyn_cast<Argument>(V)) {
793 assert(A->hasByValAttr() && "Type reset for non-byval argument?");
794
795 Dest = A;
796 Size =
797 ConstantInt::get(IntptrTy, DL.getTypeAllocSize(A->getParamByValType()));
798 } else {
799 auto *I = cast<Instruction>(V);
800 if (auto *MI = dyn_cast<MemIntrinsic>(I)) {
801 if (MI->getDestAddressSpace() != 0)
802 return false;
803
804 Dest = MI->getDest();
805 Size = MI->getLength();
806
807 if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
808 if (MTI->getSourceAddressSpace() == 0) {
809 Src = MTI->getSource();
810 NeedsMemMove = isa<MemMoveInst>(MTI);
811 }
812 }
813 } else if (auto *II = dyn_cast<LifetimeIntrinsic>(I)) {
814 Size = II->getArgOperand(0);
815 Dest = II->getArgOperand(1);
816 } else if (auto *AI = dyn_cast<AllocaInst>(I)) {
817 // We need to clear the types for new stack allocations (or else we might
818 // read stale type information from a previous function execution).
819
820 IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(I)));
821 IRB.SetInstDebugLocation(I);
822
823 Size = IRB.CreateMul(
824 IRB.CreateZExtOrTrunc(AI->getArraySize(), IntptrTy),
825 ConstantInt::get(IntptrTy,
826 DL.getTypeAllocSize(AI->getAllocatedType())));
827 Dest = I;
828 } else {
829 return false;
830 }
831 }
832
833 if (!ShadowBase)
834 ShadowBase = getShadowBase(*F);
835 if (!AppMemMask)
836 AppMemMask = getAppMemMask(*F);
837
838 Value *ShadowDataInt = IRB.CreateAdd(
839 IRB.CreateShl(
840 IRB.CreateAnd(IRB.CreatePtrToInt(Dest, IntptrTy), AppMemMask),
841 PtrShift),
842 ShadowBase);
843 Value *ShadowData = IRB.CreateIntToPtr(ShadowDataInt, IRB.getPtrTy());
844
845 if (!Src) {
846 IRB.CreateMemSet(ShadowData, IRB.getInt8(0), IRB.CreateShl(Size, PtrShift),
847 Align(1ull << PtrShift));
848 return true;
849 }
850
851 Value *SrcShadowDataInt = IRB.CreateAdd(
852 IRB.CreateShl(
853 IRB.CreateAnd(IRB.CreatePtrToInt(Src, IntptrTy), AppMemMask),
854 PtrShift),
855 ShadowBase);
856 Value *SrcShadowData = IRB.CreateIntToPtr(SrcShadowDataInt, IRB.getPtrTy());
857
858 if (NeedsMemMove) {
859 IRB.CreateMemMove(ShadowData, Align(1ull << PtrShift), SrcShadowData,
860 Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
861 } else {
862 IRB.CreateMemCpy(ShadowData, Align(1ull << PtrShift), SrcShadowData,
863 Align(1ull << PtrShift), IRB.CreateShl(Size, PtrShift));
864 }
865
866 return true;
867 }
868
run(Module & M,ModuleAnalysisManager & MAM)869 PreservedAnalyses TypeSanitizerPass::run(Module &M,
870 ModuleAnalysisManager &MAM) {
871 Function *TysanCtorFunction;
872 std::tie(TysanCtorFunction, std::ignore) =
873 createSanitizerCtorAndInitFunctions(M, kTysanModuleCtorName,
874 kTysanInitName, /*InitArgTypes=*/{},
875 /*InitArgs=*/{});
876
877 TypeSanitizer TySan(M);
878 TySan.instrumentGlobals(M);
879 appendToGlobalCtors(M, TysanCtorFunction, 0);
880
881 auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
882 for (Function &F : M) {
883 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
884 TySan.sanitizeFunction(F, TLI);
885 }
886
887 return PreservedAnalyses::none();
888 }
889