xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (revision d5e3895ea4fe4ef9db8823774e07b4368180a23e)
1 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of MemorySanitizer, a detector of uninitialized
11 /// reads.
12 ///
13 /// The algorithm of the tool is similar to Memcheck
14 /// (http://goo.gl/QKbem). We associate a few shadow bits with every
15 /// byte of the application memory, poison the shadow of the malloc-ed
16 /// or alloca-ed memory, load the shadow bits on every memory read,
17 /// propagate the shadow bits through some of the arithmetic
18 /// instruction (including MOV), store the shadow bits on every memory
19 /// write, report a bug on some other instructions (e.g. JMP) if the
20 /// associated shadow is poisoned.
21 ///
22 /// But there are differences too. The first and the major one:
23 /// compiler instrumentation instead of binary instrumentation. This
24 /// gives us much better register allocation, possible compiler
25 /// optimizations and a fast start-up. But this brings the major issue
26 /// as well: msan needs to see all program events, including system
27 /// calls and reads/writes in system libraries, so we either need to
28 /// compile *everything* with msan or use a binary translation
29 /// component (e.g. DynamoRIO) to instrument pre-built libraries.
30 /// Another difference from Memcheck is that we use 8 shadow bits per
31 /// byte of application memory and use a direct shadow mapping. This
32 /// greatly simplifies the instrumentation code and avoids races on
33 /// shadow updates (Memcheck is single-threaded so races are not a
34 /// concern there. Memcheck uses 2 shadow bits per byte with a slow
35 /// path storage that uses 8 bits per byte).
36 ///
37 /// The default value of shadow is 0, which means "clean" (not poisoned).
38 ///
39 /// Every module initializer should call __msan_init to ensure that the
40 /// shadow memory is ready. On error, __msan_warning is called. Since
41 /// parameters and return values may be passed via registers, we have a
42 /// specialized thread-local shadow for return values
43 /// (__msan_retval_tls) and parameters (__msan_param_tls).
44 ///
45 ///                           Origin tracking.
46 ///
47 /// MemorySanitizer can track origins (allocation points) of all uninitialized
48 /// values. This behavior is controlled with a flag (msan-track-origins) and is
49 /// disabled by default.
50 ///
51 /// Origins are 4-byte values created and interpreted by the runtime library.
52 /// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
53 /// of application memory. Propagation of origins is basically a bunch of
54 /// "select" instructions that pick the origin of a dirty argument, if an
55 /// instruction has one.
56 ///
57 /// Every 4 aligned, consecutive bytes of application memory have one origin
58 /// value associated with them. If these bytes contain uninitialized data
59 /// coming from 2 different allocations, the last store wins. Because of this,
60 /// MemorySanitizer reports can show unrelated origins, but this is unlikely in
61 /// practice.
62 ///
63 /// Origins are meaningless for fully initialized values, so MemorySanitizer
64 /// avoids storing origin to memory when a fully initialized value is stored.
65 /// This way it avoids needless overwriting origin of the 4-byte region on
66 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
67 ///
68 ///                            Atomic handling.
69 ///
70 /// Ideally, every atomic store of application value should update the
71 /// corresponding shadow location in an atomic way. Unfortunately, atomic store
72 /// of two disjoint locations can not be done without severe slowdown.
73 ///
74 /// Therefore, we implement an approximation that may err on the safe side.
75 /// In this implementation, every atomically accessed location in the program
76 /// may only change from (partially) uninitialized to fully initialized, but
77 /// not the other way around. We load the shadow _after_ the application load,
78 /// and we store the shadow _before_ the app store. Also, we always store clean
79 /// shadow (if the application store is atomic). This way, if the store-load
80 /// pair constitutes a happens-before arc, shadow store and load are correctly
81 /// ordered such that the load will get either the value that was stored, or
82 /// some later value (which is always clean).
83 ///
84 /// This does not work very well with Compare-And-Swap (CAS) and
85 /// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
86 /// must store the new shadow before the app operation, and load the shadow
87 /// after the app operation. Computers don't work this way. Current
88 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
89 /// value. It implements the store part as a simple atomic store by storing a
90 /// clean shadow.
91 ///
92 ///                      Instrumenting inline assembly.
93 ///
94 /// For inline assembly code LLVM has little idea about which memory locations
95 /// become initialized depending on the arguments. It can be possible to figure
96 /// out which arguments are meant to point to inputs and outputs, but the
97 /// actual semantics can be only visible at runtime. In the Linux kernel it's
98 /// also possible that the arguments only indicate the offset for a base taken
99 /// from a segment register, so it's dangerous to treat any asm() arguments as
100 /// pointers. We take a conservative approach generating calls to
101 ///   __msan_instrument_asm_store(ptr, size)
102 /// , which defer the memory unpoisoning to the runtime library.
103 /// The latter can perform more complex address checks to figure out whether
104 /// it's safe to touch the shadow memory.
105 /// Like with atomic operations, we call __msan_instrument_asm_store() before
106 /// the assembly call, so that changes to the shadow memory will be seen by
107 /// other threads together with main memory initialization.
108 ///
109 ///                  KernelMemorySanitizer (KMSAN) implementation.
110 ///
111 /// The major differences between KMSAN and MSan instrumentation are:
112 ///  - KMSAN always tracks the origins and implies msan-keep-going=true;
113 ///  - KMSAN allocates shadow and origin memory for each page separately, so
114 ///    there are no explicit accesses to shadow and origin in the
115 ///    instrumentation.
116 ///    Shadow and origin values for a particular X-byte memory location
117 ///    (X=1,2,4,8) are accessed through pointers obtained via the
118 ///      __msan_metadata_ptr_for_load_X(ptr)
119 ///      __msan_metadata_ptr_for_store_X(ptr)
120 ///    functions. The corresponding functions check that the X-byte accesses
121 ///    are possible and returns the pointers to shadow and origin memory.
122 ///    Arbitrary sized accesses are handled with:
123 ///      __msan_metadata_ptr_for_load_n(ptr, size)
124 ///      __msan_metadata_ptr_for_store_n(ptr, size);
125 ///  - TLS variables are stored in a single per-task struct. A call to a
126 ///    function __msan_get_context_state() returning a pointer to that struct
127 ///    is inserted into every instrumented function before the entry block;
128 ///  - __msan_warning() takes a 32-bit origin parameter;
129 ///  - local variables are poisoned with __msan_poison_alloca() upon function
130 ///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the
131 ///    function;
132 ///  - the pass doesn't declare any global variables or add global constructors
133 ///    to the translation unit.
134 ///
135 /// Also, KMSAN currently ignores uninitialized memory passed into inline asm
136 /// calls, making sure we're on the safe side wrt. possible false positives.
137 ///
138 ///  KernelMemorySanitizer only supports X86_64 at the moment.
139 ///
140 //
141 // FIXME: This sanitizer does not yet handle scalable vectors
142 //
143 //===----------------------------------------------------------------------===//
144 
145 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
146 #include "llvm/ADT/APInt.h"
147 #include "llvm/ADT/ArrayRef.h"
148 #include "llvm/ADT/DepthFirstIterator.h"
149 #include "llvm/ADT/SmallSet.h"
150 #include "llvm/ADT/SmallString.h"
151 #include "llvm/ADT/SmallVector.h"
152 #include "llvm/ADT/StringExtras.h"
153 #include "llvm/ADT/StringRef.h"
154 #include "llvm/ADT/Triple.h"
155 #include "llvm/Analysis/TargetLibraryInfo.h"
156 #include "llvm/IR/Argument.h"
157 #include "llvm/IR/Attributes.h"
158 #include "llvm/IR/BasicBlock.h"
159 #include "llvm/IR/CallingConv.h"
160 #include "llvm/IR/Constant.h"
161 #include "llvm/IR/Constants.h"
162 #include "llvm/IR/DataLayout.h"
163 #include "llvm/IR/DerivedTypes.h"
164 #include "llvm/IR/Function.h"
165 #include "llvm/IR/GlobalValue.h"
166 #include "llvm/IR/GlobalVariable.h"
167 #include "llvm/IR/IRBuilder.h"
168 #include "llvm/IR/InlineAsm.h"
169 #include "llvm/IR/InstVisitor.h"
170 #include "llvm/IR/InstrTypes.h"
171 #include "llvm/IR/Instruction.h"
172 #include "llvm/IR/Instructions.h"
173 #include "llvm/IR/IntrinsicInst.h"
174 #include "llvm/IR/Intrinsics.h"
175 #include "llvm/IR/IntrinsicsX86.h"
176 #include "llvm/IR/LLVMContext.h"
177 #include "llvm/IR/MDBuilder.h"
178 #include "llvm/IR/Module.h"
179 #include "llvm/IR/Type.h"
180 #include "llvm/IR/Value.h"
181 #include "llvm/IR/ValueMap.h"
182 #include "llvm/InitializePasses.h"
183 #include "llvm/Pass.h"
184 #include "llvm/Support/AtomicOrdering.h"
185 #include "llvm/Support/Casting.h"
186 #include "llvm/Support/CommandLine.h"
187 #include "llvm/Support/Compiler.h"
188 #include "llvm/Support/Debug.h"
189 #include "llvm/Support/ErrorHandling.h"
190 #include "llvm/Support/MathExtras.h"
191 #include "llvm/Support/raw_ostream.h"
192 #include "llvm/Transforms/Instrumentation.h"
193 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
194 #include "llvm/Transforms/Utils/Local.h"
195 #include "llvm/Transforms/Utils/ModuleUtils.h"
196 #include <algorithm>
197 #include <cassert>
198 #include <cstddef>
199 #include <cstdint>
200 #include <memory>
201 #include <string>
202 #include <tuple>
203 
204 using namespace llvm;
205 
206 #define DEBUG_TYPE "msan"
207 
208 static const unsigned kOriginSize = 4;
209 static const Align kMinOriginAlignment = Align(4);
210 static const Align kShadowTLSAlignment = Align(8);
211 
212 // These constants must be kept in sync with the ones in msan.h.
213 static const unsigned kParamTLSSize = 800;
214 static const unsigned kRetvalTLSSize = 800;
215 
216 // Accesses sizes are powers of two: 1, 2, 4, 8.
217 static const size_t kNumberOfAccessSizes = 4;
218 
219 /// Track origins of uninitialized values.
220 ///
221 /// Adds a section to MemorySanitizer report that points to the allocation
222 /// (stack or heap) the uninitialized bits came from originally.
223 static cl::opt<int> ClTrackOrigins("msan-track-origins",
224        cl::desc("Track origins (allocation sites) of poisoned memory"),
225        cl::Hidden, cl::init(0));
226 
227 static cl::opt<bool> ClKeepGoing("msan-keep-going",
228        cl::desc("keep going after reporting a UMR"),
229        cl::Hidden, cl::init(false));
230 
231 static cl::opt<bool> ClPoisonStack("msan-poison-stack",
232        cl::desc("poison uninitialized stack variables"),
233        cl::Hidden, cl::init(true));
234 
235 static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
236        cl::desc("poison uninitialized stack variables with a call"),
237        cl::Hidden, cl::init(false));
238 
239 static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
240        cl::desc("poison uninitialized stack variables with the given pattern"),
241        cl::Hidden, cl::init(0xff));
242 
243 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
244        cl::desc("poison undef temps"),
245        cl::Hidden, cl::init(true));
246 
247 static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
248        cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
249        cl::Hidden, cl::init(true));
250 
251 static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
252        cl::desc("exact handling of relational integer ICmp"),
253        cl::Hidden, cl::init(false));
254 
255 static cl::opt<bool> ClHandleLifetimeIntrinsics(
256     "msan-handle-lifetime-intrinsics",
257     cl::desc(
258         "when possible, poison scoped variables at the beginning of the scope "
259         "(slower, but more precise)"),
260     cl::Hidden, cl::init(true));
261 
262 // When compiling the Linux kernel, we sometimes see false positives related to
263 // MSan being unable to understand that inline assembly calls may initialize
264 // local variables.
265 // This flag makes the compiler conservatively unpoison every memory location
266 // passed into an assembly call. Note that this may cause false positives.
267 // Because it's impossible to figure out the array sizes, we can only unpoison
268 // the first sizeof(type) bytes for each type* pointer.
269 // The instrumentation is only enabled in KMSAN builds, and only if
270 // -msan-handle-asm-conservative is on. This is done because we may want to
271 // quickly disable assembly instrumentation when it breaks.
272 static cl::opt<bool> ClHandleAsmConservative(
273     "msan-handle-asm-conservative",
274     cl::desc("conservative handling of inline assembly"), cl::Hidden,
275     cl::init(true));
276 
277 // This flag controls whether we check the shadow of the address
278 // operand of load or store. Such bugs are very rare, since load from
279 // a garbage address typically results in SEGV, but still happen
280 // (e.g. only lower bits of address are garbage, or the access happens
281 // early at program startup where malloc-ed memory is more likely to
282 // be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
283 static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
284        cl::desc("report accesses through a pointer which has poisoned shadow"),
285        cl::Hidden, cl::init(true));
286 
287 static cl::opt<bool> ClEagerChecks(
288     "msan-eager-checks",
289     cl::desc("check arguments and return values at function call boundaries"),
290     cl::Hidden, cl::init(false));
291 
292 static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
293        cl::desc("print out instructions with default strict semantics"),
294        cl::Hidden, cl::init(false));
295 
296 static cl::opt<int> ClInstrumentationWithCallThreshold(
297     "msan-instrumentation-with-call-threshold",
298     cl::desc(
299         "If the function being instrumented requires more than "
300         "this number of checks and origin stores, use callbacks instead of "
301         "inline checks (-1 means never use callbacks)."),
302     cl::Hidden, cl::init(3500));
303 
304 static cl::opt<bool>
305     ClEnableKmsan("msan-kernel",
306                   cl::desc("Enable KernelMemorySanitizer instrumentation"),
307                   cl::Hidden, cl::init(false));
308 
309 // This is an experiment to enable handling of cases where shadow is a non-zero
310 // compile-time constant. For some unexplainable reason they were silently
311 // ignored in the instrumentation.
312 static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
313        cl::desc("Insert checks for constant shadow values"),
314        cl::Hidden, cl::init(false));
315 
316 // This is off by default because of a bug in gold:
317 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
318 static cl::opt<bool> ClWithComdat("msan-with-comdat",
319        cl::desc("Place MSan constructors in comdat sections"),
320        cl::Hidden, cl::init(false));
321 
322 // These options allow to specify custom memory map parameters
323 // See MemoryMapParams for details.
324 static cl::opt<uint64_t> ClAndMask("msan-and-mask",
325                                    cl::desc("Define custom MSan AndMask"),
326                                    cl::Hidden, cl::init(0));
327 
328 static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
329                                    cl::desc("Define custom MSan XorMask"),
330                                    cl::Hidden, cl::init(0));
331 
332 static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
333                                       cl::desc("Define custom MSan ShadowBase"),
334                                       cl::Hidden, cl::init(0));
335 
336 static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
337                                       cl::desc("Define custom MSan OriginBase"),
338                                       cl::Hidden, cl::init(0));
339 
340 static const char *const kMsanModuleCtorName = "msan.module_ctor";
341 static const char *const kMsanInitName = "__msan_init";
342 
343 namespace {
344 
345 // Memory map parameters used in application-to-shadow address calculation.
346 // Offset = (Addr & ~AndMask) ^ XorMask
347 // Shadow = ShadowBase + Offset
348 // Origin = OriginBase + Offset
349 struct MemoryMapParams {
350   uint64_t AndMask;
351   uint64_t XorMask;
352   uint64_t ShadowBase;
353   uint64_t OriginBase;
354 };
355 
356 struct PlatformMemoryMapParams {
357   const MemoryMapParams *bits32;
358   const MemoryMapParams *bits64;
359 };
360 
361 } // end anonymous namespace
362 
363 // i386 Linux
364 static const MemoryMapParams Linux_I386_MemoryMapParams = {
365   0x000080000000,  // AndMask
366   0,               // XorMask (not used)
367   0,               // ShadowBase (not used)
368   0x000040000000,  // OriginBase
369 };
370 
371 // x86_64 Linux
372 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
373 #ifdef MSAN_LINUX_X86_64_OLD_MAPPING
374   0x400000000000,  // AndMask
375   0,               // XorMask (not used)
376   0,               // ShadowBase (not used)
377   0x200000000000,  // OriginBase
378 #else
379   0,               // AndMask (not used)
380   0x500000000000,  // XorMask
381   0,               // ShadowBase (not used)
382   0x100000000000,  // OriginBase
383 #endif
384 };
385 
386 // mips64 Linux
387 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
388   0,               // AndMask (not used)
389   0x008000000000,  // XorMask
390   0,               // ShadowBase (not used)
391   0x002000000000,  // OriginBase
392 };
393 
394 // ppc64 Linux
395 static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
396   0xE00000000000,  // AndMask
397   0x100000000000,  // XorMask
398   0x080000000000,  // ShadowBase
399   0x1C0000000000,  // OriginBase
400 };
401 
402 // s390x Linux
403 static const MemoryMapParams Linux_S390X_MemoryMapParams = {
404     0xC00000000000, // AndMask
405     0,              // XorMask (not used)
406     0x080000000000, // ShadowBase
407     0x1C0000000000, // OriginBase
408 };
409 
410 // aarch64 Linux
411 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
412   0,               // AndMask (not used)
413   0x06000000000,   // XorMask
414   0,               // ShadowBase (not used)
415   0x01000000000,   // OriginBase
416 };
417 
418 // i386 FreeBSD
419 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
420   0x000180000000,  // AndMask
421   0x000040000000,  // XorMask
422   0x000020000000,  // ShadowBase
423   0x000700000000,  // OriginBase
424 };
425 
426 // x86_64 FreeBSD
427 static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
428   0xc00000000000,  // AndMask
429   0x200000000000,  // XorMask
430   0x100000000000,  // ShadowBase
431   0x380000000000,  // OriginBase
432 };
433 
434 // x86_64 NetBSD
435 static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
436   0,               // AndMask
437   0x500000000000,  // XorMask
438   0,               // ShadowBase
439   0x100000000000,  // OriginBase
440 };
441 
442 static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
443   &Linux_I386_MemoryMapParams,
444   &Linux_X86_64_MemoryMapParams,
445 };
446 
447 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
448   nullptr,
449   &Linux_MIPS64_MemoryMapParams,
450 };
451 
452 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
453   nullptr,
454   &Linux_PowerPC64_MemoryMapParams,
455 };
456 
457 static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
458     nullptr,
459     &Linux_S390X_MemoryMapParams,
460 };
461 
462 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
463   nullptr,
464   &Linux_AArch64_MemoryMapParams,
465 };
466 
467 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
468   &FreeBSD_I386_MemoryMapParams,
469   &FreeBSD_X86_64_MemoryMapParams,
470 };
471 
472 static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
473   nullptr,
474   &NetBSD_X86_64_MemoryMapParams,
475 };
476 
477 namespace {
478 
479 /// Instrument functions of a module to detect uninitialized reads.
480 ///
481 /// Instantiating MemorySanitizer inserts the msan runtime library API function
482 /// declarations into the module if they don't exist already. Instantiating
483 /// ensures the __msan_init function is in the list of global constructors for
484 /// the module.
485 class MemorySanitizer {
486 public:
487   MemorySanitizer(Module &M, MemorySanitizerOptions Options)
488       : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
489         Recover(Options.Recover) {
490     initializeModule(M);
491   }
492 
493   // MSan cannot be moved or copied because of MapParams.
494   MemorySanitizer(MemorySanitizer &&) = delete;
495   MemorySanitizer &operator=(MemorySanitizer &&) = delete;
496   MemorySanitizer(const MemorySanitizer &) = delete;
497   MemorySanitizer &operator=(const MemorySanitizer &) = delete;
498 
499   bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
500 
501 private:
502   friend struct MemorySanitizerVisitor;
503   friend struct VarArgAMD64Helper;
504   friend struct VarArgMIPS64Helper;
505   friend struct VarArgAArch64Helper;
506   friend struct VarArgPowerPC64Helper;
507   friend struct VarArgSystemZHelper;
508 
509   void initializeModule(Module &M);
510   void initializeCallbacks(Module &M);
511   void createKernelApi(Module &M);
512   void createUserspaceApi(Module &M);
513 
514   /// True if we're compiling the Linux kernel.
515   bool CompileKernel;
516   /// Track origins (allocation points) of uninitialized values.
517   int TrackOrigins;
518   bool Recover;
519 
520   LLVMContext *C;
521   Type *IntptrTy;
522   Type *OriginTy;
523 
524   // XxxTLS variables represent the per-thread state in MSan and per-task state
525   // in KMSAN.
526   // For the userspace these point to thread-local globals. In the kernel land
527   // they point to the members of a per-task struct obtained via a call to
528   // __msan_get_context_state().
529 
530   /// Thread-local shadow storage for function parameters.
531   Value *ParamTLS;
532 
533   /// Thread-local origin storage for function parameters.
534   Value *ParamOriginTLS;
535 
536   /// Thread-local shadow storage for function return value.
537   Value *RetvalTLS;
538 
539   /// Thread-local origin storage for function return value.
540   Value *RetvalOriginTLS;
541 
542   /// Thread-local shadow storage for in-register va_arg function
543   /// parameters (x86_64-specific).
544   Value *VAArgTLS;
545 
546   /// Thread-local shadow storage for in-register va_arg function
547   /// parameters (x86_64-specific).
548   Value *VAArgOriginTLS;
549 
550   /// Thread-local shadow storage for va_arg overflow area
551   /// (x86_64-specific).
552   Value *VAArgOverflowSizeTLS;
553 
554   /// Are the instrumentation callbacks set up?
555   bool CallbacksInitialized = false;
556 
557   /// The run-time callback to print a warning.
558   FunctionCallee WarningFn;
559 
560   // These arrays are indexed by log2(AccessSize).
561   FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
562   FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
563 
564   /// Run-time helper that generates a new origin value for a stack
565   /// allocation.
566   FunctionCallee MsanSetAllocaOrigin4Fn;
567 
568   /// Run-time helper that poisons stack on function entry.
569   FunctionCallee MsanPoisonStackFn;
570 
571   /// Run-time helper that records a store (or any event) of an
572   /// uninitialized value and returns an updated origin id encoding this info.
573   FunctionCallee MsanChainOriginFn;
574 
575   /// MSan runtime replacements for memmove, memcpy and memset.
576   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
577 
578   /// KMSAN callback for task-local function argument shadow.
579   StructType *MsanContextStateTy;
580   FunctionCallee MsanGetContextStateFn;
581 
582   /// Functions for poisoning/unpoisoning local variables
583   FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
584 
585   /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
586   /// pointers.
587   FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
588   FunctionCallee MsanMetadataPtrForLoad_1_8[4];
589   FunctionCallee MsanMetadataPtrForStore_1_8[4];
590   FunctionCallee MsanInstrumentAsmStoreFn;
591 
592   /// Helper to choose between different MsanMetadataPtrXxx().
593   FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
594 
595   /// Memory map parameters used in application-to-shadow calculation.
596   const MemoryMapParams *MapParams;
597 
598   /// Custom memory map parameters used when -msan-shadow-base or
599   // -msan-origin-base is provided.
600   MemoryMapParams CustomMapParams;
601 
602   MDNode *ColdCallWeights;
603 
604   /// Branch weights for origin store.
605   MDNode *OriginStoreWeights;
606 };
607 
608 void insertModuleCtor(Module &M) {
609   getOrCreateSanitizerCtorAndInitFunctions(
610       M, kMsanModuleCtorName, kMsanInitName,
611       /*InitArgTypes=*/{},
612       /*InitArgs=*/{},
613       // This callback is invoked when the functions are created the first
614       // time. Hook them into the global ctors list in that case:
615       [&](Function *Ctor, FunctionCallee) {
616         if (!ClWithComdat) {
617           appendToGlobalCtors(M, Ctor, 0);
618           return;
619         }
620         Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
621         Ctor->setComdat(MsanCtorComdat);
622         appendToGlobalCtors(M, Ctor, 0, Ctor);
623       });
624 }
625 
626 /// A legacy function pass for msan instrumentation.
627 ///
628 /// Instruments functions to detect uninitialized reads.
629 struct MemorySanitizerLegacyPass : public FunctionPass {
630   // Pass identification, replacement for typeid.
631   static char ID;
632 
633   MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {})
634       : FunctionPass(ID), Options(Options) {
635     initializeMemorySanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
636   }
637   StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
638 
639   void getAnalysisUsage(AnalysisUsage &AU) const override {
640     AU.addRequired<TargetLibraryInfoWrapperPass>();
641   }
642 
643   bool runOnFunction(Function &F) override {
644     return MSan->sanitizeFunction(
645         F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
646   }
647   bool doInitialization(Module &M) override;
648 
649   Optional<MemorySanitizer> MSan;
650   MemorySanitizerOptions Options;
651 };
652 
653 template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
654   return (Opt.getNumOccurrences() > 0) ? Opt : Default;
655 }
656 
657 } // end anonymous namespace
658 
659 MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K)
660     : Kernel(getOptOrDefault(ClEnableKmsan, K)),
661       TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
662       Recover(getOptOrDefault(ClKeepGoing, Kernel || R)) {}
663 
664 PreservedAnalyses MemorySanitizerPass::run(Function &F,
665                                            FunctionAnalysisManager &FAM) {
666   MemorySanitizer Msan(*F.getParent(), Options);
667   if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
668     return PreservedAnalyses::none();
669   return PreservedAnalyses::all();
670 }
671 
672 PreservedAnalyses MemorySanitizerPass::run(Module &M,
673                                            ModuleAnalysisManager &AM) {
674   if (Options.Kernel)
675     return PreservedAnalyses::all();
676   insertModuleCtor(M);
677   return PreservedAnalyses::none();
678 }
679 
680 char MemorySanitizerLegacyPass::ID = 0;
681 
682 INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
683                       "MemorySanitizer: detects uninitialized reads.", false,
684                       false)
685 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
686 INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
687                     "MemorySanitizer: detects uninitialized reads.", false,
688                     false)
689 
690 FunctionPass *
691 llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) {
692   return new MemorySanitizerLegacyPass(Options);
693 }
694 
695 /// Create a non-const global initialized with the given string.
696 ///
697 /// Creates a writable global for Str so that we can pass it to the
698 /// run-time lib. Runtime uses first 4 bytes of the string to store the
699 /// frame ID, so the string needs to be mutable.
700 static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
701                                                             StringRef Str) {
702   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
703   return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
704                             GlobalValue::PrivateLinkage, StrConst, "");
705 }
706 
707 /// Create KMSAN API callbacks.
708 void MemorySanitizer::createKernelApi(Module &M) {
709   IRBuilder<> IRB(*C);
710 
711   // These will be initialized in insertKmsanPrologue().
712   RetvalTLS = nullptr;
713   RetvalOriginTLS = nullptr;
714   ParamTLS = nullptr;
715   ParamOriginTLS = nullptr;
716   VAArgTLS = nullptr;
717   VAArgOriginTLS = nullptr;
718   VAArgOverflowSizeTLS = nullptr;
719 
720   WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(),
721                                     IRB.getInt32Ty());
722   // Requests the per-task context state (kmsan_context_state*) from the
723   // runtime library.
724   MsanContextStateTy = StructType::get(
725       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
726       ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
727       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
728       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
729       IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
730       OriginTy);
731   MsanGetContextStateFn = M.getOrInsertFunction(
732       "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
733 
734   Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
735                                 PointerType::get(IRB.getInt32Ty(), 0));
736 
737   for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
738     std::string name_load =
739         "__msan_metadata_ptr_for_load_" + std::to_string(size);
740     std::string name_store =
741         "__msan_metadata_ptr_for_store_" + std::to_string(size);
742     MsanMetadataPtrForLoad_1_8[ind] = M.getOrInsertFunction(
743         name_load, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
744     MsanMetadataPtrForStore_1_8[ind] = M.getOrInsertFunction(
745         name_store, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
746   }
747 
748   MsanMetadataPtrForLoadN = M.getOrInsertFunction(
749       "__msan_metadata_ptr_for_load_n", RetTy,
750       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
751   MsanMetadataPtrForStoreN = M.getOrInsertFunction(
752       "__msan_metadata_ptr_for_store_n", RetTy,
753       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
754 
755   // Functions for poisoning and unpoisoning memory.
756   MsanPoisonAllocaFn =
757       M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
758                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
759   MsanUnpoisonAllocaFn = M.getOrInsertFunction(
760       "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
761 }
762 
763 static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
764   return M.getOrInsertGlobal(Name, Ty, [&] {
765     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
766                               nullptr, Name, nullptr,
767                               GlobalVariable::InitialExecTLSModel);
768   });
769 }
770 
771 /// Insert declarations for userspace-specific functions and globals.
772 void MemorySanitizer::createUserspaceApi(Module &M) {
773   IRBuilder<> IRB(*C);
774 
775   // Create the callback.
776   // FIXME: this function should have "Cold" calling conv,
777   // which is not yet implemented.
778   StringRef WarningFnName = Recover ? "__msan_warning_with_origin"
779                                     : "__msan_warning_with_origin_noreturn";
780   WarningFn =
781       M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), IRB.getInt32Ty());
782 
783   // Create the global TLS variables.
784   RetvalTLS =
785       getOrInsertGlobal(M, "__msan_retval_tls",
786                         ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
787 
788   RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
789 
790   ParamTLS =
791       getOrInsertGlobal(M, "__msan_param_tls",
792                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
793 
794   ParamOriginTLS =
795       getOrInsertGlobal(M, "__msan_param_origin_tls",
796                         ArrayType::get(OriginTy, kParamTLSSize / 4));
797 
798   VAArgTLS =
799       getOrInsertGlobal(M, "__msan_va_arg_tls",
800                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
801 
802   VAArgOriginTLS =
803       getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
804                         ArrayType::get(OriginTy, kParamTLSSize / 4));
805 
806   VAArgOverflowSizeTLS =
807       getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
808 
809   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
810        AccessSizeIndex++) {
811     unsigned AccessSize = 1 << AccessSizeIndex;
812     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
813     SmallVector<std::pair<unsigned, Attribute>, 2> MaybeWarningFnAttrs;
814     MaybeWarningFnAttrs.push_back(std::make_pair(
815         AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
816     MaybeWarningFnAttrs.push_back(std::make_pair(
817         AttributeList::FirstArgIndex + 1, Attribute::get(*C, Attribute::ZExt)));
818     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
819         FunctionName, AttributeList::get(*C, MaybeWarningFnAttrs),
820         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
821 
822     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
823     SmallVector<std::pair<unsigned, Attribute>, 2> MaybeStoreOriginFnAttrs;
824     MaybeStoreOriginFnAttrs.push_back(std::make_pair(
825         AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
826     MaybeStoreOriginFnAttrs.push_back(std::make_pair(
827         AttributeList::FirstArgIndex + 2, Attribute::get(*C, Attribute::ZExt)));
828     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
829         FunctionName, AttributeList::get(*C, MaybeStoreOriginFnAttrs),
830         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt8PtrTy(),
831         IRB.getInt32Ty());
832   }
833 
834   MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
835     "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
836     IRB.getInt8PtrTy(), IntptrTy);
837   MsanPoisonStackFn =
838       M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
839                             IRB.getInt8PtrTy(), IntptrTy);
840 }
841 
842 /// Insert extern declaration of runtime-provided functions and globals.
843 void MemorySanitizer::initializeCallbacks(Module &M) {
844   // Only do this once.
845   if (CallbacksInitialized)
846     return;
847 
848   IRBuilder<> IRB(*C);
849   // Initialize callbacks that are common for kernel and userspace
850   // instrumentation.
851   MsanChainOriginFn = M.getOrInsertFunction(
852     "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
853   MemmoveFn = M.getOrInsertFunction(
854     "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
855     IRB.getInt8PtrTy(), IntptrTy);
856   MemcpyFn = M.getOrInsertFunction(
857     "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
858     IntptrTy);
859   MemsetFn = M.getOrInsertFunction(
860     "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
861     IntptrTy);
862 
863   MsanInstrumentAsmStoreFn =
864       M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
865                             PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
866 
867   if (CompileKernel) {
868     createKernelApi(M);
869   } else {
870     createUserspaceApi(M);
871   }
872   CallbacksInitialized = true;
873 }
874 
875 FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
876                                                              int size) {
877   FunctionCallee *Fns =
878       isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
879   switch (size) {
880   case 1:
881     return Fns[0];
882   case 2:
883     return Fns[1];
884   case 4:
885     return Fns[2];
886   case 8:
887     return Fns[3];
888   default:
889     return nullptr;
890   }
891 }
892 
893 /// Module-level initialization.
894 ///
895 /// inserts a call to __msan_init to the module's constructor list.
896 void MemorySanitizer::initializeModule(Module &M) {
897   auto &DL = M.getDataLayout();
898 
899   bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
900   bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
901   // Check the overrides first
902   if (ShadowPassed || OriginPassed) {
903     CustomMapParams.AndMask = ClAndMask;
904     CustomMapParams.XorMask = ClXorMask;
905     CustomMapParams.ShadowBase = ClShadowBase;
906     CustomMapParams.OriginBase = ClOriginBase;
907     MapParams = &CustomMapParams;
908   } else {
909     Triple TargetTriple(M.getTargetTriple());
910     switch (TargetTriple.getOS()) {
911       case Triple::FreeBSD:
912         switch (TargetTriple.getArch()) {
913           case Triple::x86_64:
914             MapParams = FreeBSD_X86_MemoryMapParams.bits64;
915             break;
916           case Triple::x86:
917             MapParams = FreeBSD_X86_MemoryMapParams.bits32;
918             break;
919           default:
920             report_fatal_error("unsupported architecture");
921         }
922         break;
923       case Triple::NetBSD:
924         switch (TargetTriple.getArch()) {
925           case Triple::x86_64:
926             MapParams = NetBSD_X86_MemoryMapParams.bits64;
927             break;
928           default:
929             report_fatal_error("unsupported architecture");
930         }
931         break;
932       case Triple::Linux:
933         switch (TargetTriple.getArch()) {
934           case Triple::x86_64:
935             MapParams = Linux_X86_MemoryMapParams.bits64;
936             break;
937           case Triple::x86:
938             MapParams = Linux_X86_MemoryMapParams.bits32;
939             break;
940           case Triple::mips64:
941           case Triple::mips64el:
942             MapParams = Linux_MIPS_MemoryMapParams.bits64;
943             break;
944           case Triple::ppc64:
945           case Triple::ppc64le:
946             MapParams = Linux_PowerPC_MemoryMapParams.bits64;
947             break;
948           case Triple::systemz:
949             MapParams = Linux_S390_MemoryMapParams.bits64;
950             break;
951           case Triple::aarch64:
952           case Triple::aarch64_be:
953             MapParams = Linux_ARM_MemoryMapParams.bits64;
954             break;
955           default:
956             report_fatal_error("unsupported architecture");
957         }
958         break;
959       default:
960         report_fatal_error("unsupported operating system");
961     }
962   }
963 
964   C = &(M.getContext());
965   IRBuilder<> IRB(*C);
966   IntptrTy = IRB.getIntPtrTy(DL);
967   OriginTy = IRB.getInt32Ty();
968 
969   ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
970   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
971 
972   if (!CompileKernel) {
973     if (TrackOrigins)
974       M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
975         return new GlobalVariable(
976             M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
977             IRB.getInt32(TrackOrigins), "__msan_track_origins");
978       });
979 
980     if (Recover)
981       M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
982         return new GlobalVariable(M, IRB.getInt32Ty(), true,
983                                   GlobalValue::WeakODRLinkage,
984                                   IRB.getInt32(Recover), "__msan_keep_going");
985       });
986 }
987 }
988 
989 bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
990   if (!Options.Kernel)
991     insertModuleCtor(M);
992   MSan.emplace(M, Options);
993   return true;
994 }
995 
996 namespace {
997 
998 /// A helper class that handles instrumentation of VarArg
999 /// functions on a particular platform.
1000 ///
1001 /// Implementations are expected to insert the instrumentation
1002 /// necessary to propagate argument shadow through VarArg function
1003 /// calls. Visit* methods are called during an InstVisitor pass over
1004 /// the function, and should avoid creating new basic blocks. A new
1005 /// instance of this class is created for each instrumented function.
1006 struct VarArgHelper {
1007   virtual ~VarArgHelper() = default;
1008 
1009   /// Visit a CallBase.
1010   virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0;
1011 
1012   /// Visit a va_start call.
1013   virtual void visitVAStartInst(VAStartInst &I) = 0;
1014 
1015   /// Visit a va_copy call.
1016   virtual void visitVACopyInst(VACopyInst &I) = 0;
1017 
1018   /// Finalize function instrumentation.
1019   ///
1020   /// This method is called after visiting all interesting (see above)
1021   /// instructions in a function.
1022   virtual void finalizeInstrumentation() = 0;
1023 };
1024 
1025 struct MemorySanitizerVisitor;
1026 
1027 } // end anonymous namespace
1028 
1029 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1030                                         MemorySanitizerVisitor &Visitor);
1031 
1032 static unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
1033   if (TypeSize <= 8) return 0;
1034   return Log2_32_Ceil((TypeSize + 7) / 8);
1035 }
1036 
1037 namespace {
1038 
1039 /// This class does all the work for a given function. Store and Load
1040 /// instructions store and load corresponding shadow and origin
1041 /// values. Most instructions propagate shadow from arguments to their
1042 /// return values. Certain instructions (most importantly, BranchInst)
1043 /// test their argument shadow and print reports (with a runtime call) if it's
1044 /// non-zero.
1045 struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
1046   Function &F;
1047   MemorySanitizer &MS;
1048   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
1049   ValueMap<Value*, Value*> ShadowMap, OriginMap;
1050   std::unique_ptr<VarArgHelper> VAHelper;
1051   const TargetLibraryInfo *TLI;
1052   BasicBlock *ActualFnStart;
1053 
1054   // The following flags disable parts of MSan instrumentation based on
1055   // exclusion list contents and command-line options.
1056   bool InsertChecks;
1057   bool PropagateShadow;
1058   bool PoisonStack;
1059   bool PoisonUndef;
1060 
1061   struct ShadowOriginAndInsertPoint {
1062     Value *Shadow;
1063     Value *Origin;
1064     Instruction *OrigIns;
1065 
1066     ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
1067       : Shadow(S), Origin(O), OrigIns(I) {}
1068   };
1069   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
1070   bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
1071   SmallSet<AllocaInst *, 16> AllocaSet;
1072   SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
1073   SmallVector<StoreInst *, 16> StoreList;
1074 
1075   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
1076                          const TargetLibraryInfo &TLI)
1077       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
1078     bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
1079     InsertChecks = SanitizeFunction;
1080     PropagateShadow = SanitizeFunction;
1081     PoisonStack = SanitizeFunction && ClPoisonStack;
1082     PoisonUndef = SanitizeFunction && ClPoisonUndef;
1083 
1084     MS.initializeCallbacks(*F.getParent());
1085     if (MS.CompileKernel)
1086       ActualFnStart = insertKmsanPrologue(F);
1087     else
1088       ActualFnStart = &F.getEntryBlock();
1089 
1090     LLVM_DEBUG(if (!InsertChecks) dbgs()
1091                << "MemorySanitizer is not inserting checks into '"
1092                << F.getName() << "'\n");
1093   }
1094 
1095   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
1096     if (MS.TrackOrigins <= 1) return V;
1097     return IRB.CreateCall(MS.MsanChainOriginFn, V);
1098   }
1099 
1100   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
1101     const DataLayout &DL = F.getParent()->getDataLayout();
1102     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1103     if (IntptrSize == kOriginSize) return Origin;
1104     assert(IntptrSize == kOriginSize * 2);
1105     Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
1106     return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
1107   }
1108 
1109   /// Fill memory range with the given origin value.
1110   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
1111                    unsigned Size, Align Alignment) {
1112     const DataLayout &DL = F.getParent()->getDataLayout();
1113     const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy);
1114     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1115     assert(IntptrAlignment >= kMinOriginAlignment);
1116     assert(IntptrSize >= kOriginSize);
1117 
1118     unsigned Ofs = 0;
1119     Align CurrentAlignment = Alignment;
1120     if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
1121       Value *IntptrOrigin = originToIntptr(IRB, Origin);
1122       Value *IntptrOriginPtr =
1123           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
1124       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
1125         Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
1126                        : IntptrOriginPtr;
1127         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
1128         Ofs += IntptrSize / kOriginSize;
1129         CurrentAlignment = IntptrAlignment;
1130       }
1131     }
1132 
1133     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
1134       Value *GEP =
1135           i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
1136       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
1137       CurrentAlignment = kMinOriginAlignment;
1138     }
1139   }
1140 
1141   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
1142                    Value *OriginPtr, Align Alignment, bool AsCall) {
1143     const DataLayout &DL = F.getParent()->getDataLayout();
1144     const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1145     unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
1146     if (Shadow->getType()->isAggregateType()) {
1147       paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1148                   OriginAlignment);
1149     } else {
1150       Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
1151       if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1152         if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
1153           paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1154                       OriginAlignment);
1155         return;
1156       }
1157 
1158       unsigned TypeSizeInBits =
1159           DL.getTypeSizeInBits(ConvertedShadow->getType());
1160       unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1161       if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1162         FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
1163         Value *ConvertedShadow2 = IRB.CreateZExt(
1164             ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1165         IRB.CreateCall(Fn, {ConvertedShadow2,
1166                             IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
1167                             Origin});
1168       } else {
1169         Value *Cmp = IRB.CreateICmpNE(
1170             ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
1171         Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1172             Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
1173         IRBuilder<> IRBNew(CheckTerm);
1174         paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
1175                     OriginAlignment);
1176       }
1177     }
1178   }
1179 
1180   void materializeStores(bool InstrumentWithCalls) {
1181     for (StoreInst *SI : StoreList) {
1182       IRBuilder<> IRB(SI);
1183       Value *Val = SI->getValueOperand();
1184       Value *Addr = SI->getPointerOperand();
1185       Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
1186       Value *ShadowPtr, *OriginPtr;
1187       Type *ShadowTy = Shadow->getType();
1188       const Align Alignment = assumeAligned(SI->getAlignment());
1189       const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1190       std::tie(ShadowPtr, OriginPtr) =
1191           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
1192 
1193       StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
1194       LLVM_DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
1195       (void)NewSI;
1196 
1197       if (SI->isAtomic())
1198         SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
1199 
1200       if (MS.TrackOrigins && !SI->isAtomic())
1201         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
1202                     OriginAlignment, InstrumentWithCalls);
1203     }
1204   }
1205 
1206   /// Helper function to insert a warning at IRB's current insert point.
1207   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
1208     if (!Origin)
1209       Origin = (Value *)IRB.getInt32(0);
1210     assert(Origin->getType()->isIntegerTy());
1211     IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge();
1212     // FIXME: Insert UnreachableInst if !MS.Recover?
1213     // This may invalidate some of the following checks and needs to be done
1214     // at the very end.
1215   }
1216 
1217   void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin,
1218                            bool AsCall) {
1219     IRBuilder<> IRB(OrigIns);
1220     LLVM_DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
1221     Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
1222     LLVM_DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
1223 
1224     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1225       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
1226         insertWarningFn(IRB, Origin);
1227       }
1228       return;
1229     }
1230 
1231     const DataLayout &DL = OrigIns->getModule()->getDataLayout();
1232 
1233     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1234     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1235     if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1236       FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
1237       Value *ConvertedShadow2 =
1238           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1239       IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
1240                                                 ? Origin
1241                                                 : (Value *)IRB.getInt32(0)});
1242     } else {
1243       Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
1244                                     getCleanShadow(ConvertedShadow), "_mscmp");
1245       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1246           Cmp, OrigIns,
1247           /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
1248 
1249       IRB.SetInsertPoint(CheckTerm);
1250       insertWarningFn(IRB, Origin);
1251       LLVM_DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
1252     }
1253   }
1254 
1255   void materializeChecks(bool InstrumentWithCalls) {
1256     for (const auto &ShadowData : InstrumentationList) {
1257       Instruction *OrigIns = ShadowData.OrigIns;
1258       Value *Shadow = ShadowData.Shadow;
1259       Value *Origin = ShadowData.Origin;
1260       materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
1261     }
1262     LLVM_DEBUG(dbgs() << "DONE:\n" << F);
1263   }
1264 
1265   BasicBlock *insertKmsanPrologue(Function &F) {
1266     BasicBlock *ret =
1267         SplitBlock(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHI());
1268     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
1269     Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
1270     Constant *Zero = IRB.getInt32(0);
1271     MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1272                                 {Zero, IRB.getInt32(0)}, "param_shadow");
1273     MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1274                                  {Zero, IRB.getInt32(1)}, "retval_shadow");
1275     MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1276                                 {Zero, IRB.getInt32(2)}, "va_arg_shadow");
1277     MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1278                                       {Zero, IRB.getInt32(3)}, "va_arg_origin");
1279     MS.VAArgOverflowSizeTLS =
1280         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1281                       {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
1282     MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1283                                       {Zero, IRB.getInt32(5)}, "param_origin");
1284     MS.RetvalOriginTLS =
1285         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1286                       {Zero, IRB.getInt32(6)}, "retval_origin");
1287     return ret;
1288   }
1289 
1290   /// Add MemorySanitizer instrumentation to a function.
1291   bool runOnFunction() {
1292     // In the presence of unreachable blocks, we may see Phi nodes with
1293     // incoming nodes from such blocks. Since InstVisitor skips unreachable
1294     // blocks, such nodes will not have any shadow value associated with them.
1295     // It's easier to remove unreachable blocks than deal with missing shadow.
1296     removeUnreachableBlocks(F);
1297 
1298     // Iterate all BBs in depth-first order and create shadow instructions
1299     // for all instructions (where applicable).
1300     // For PHI nodes we create dummy shadow PHIs which will be finalized later.
1301     for (BasicBlock *BB : depth_first(ActualFnStart))
1302       visit(*BB);
1303 
1304     // Finalize PHI nodes.
1305     for (PHINode *PN : ShadowPHINodes) {
1306       PHINode *PNS = cast<PHINode>(getShadow(PN));
1307       PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
1308       size_t NumValues = PN->getNumIncomingValues();
1309       for (size_t v = 0; v < NumValues; v++) {
1310         PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
1311         if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
1312       }
1313     }
1314 
1315     VAHelper->finalizeInstrumentation();
1316 
1317     // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
1318     // instrumenting only allocas.
1319     if (InstrumentLifetimeStart) {
1320       for (auto Item : LifetimeStartList) {
1321         instrumentAlloca(*Item.second, Item.first);
1322         AllocaSet.erase(Item.second);
1323       }
1324     }
1325     // Poison the allocas for which we didn't instrument the corresponding
1326     // lifetime intrinsics.
1327     for (AllocaInst *AI : AllocaSet)
1328       instrumentAlloca(*AI);
1329 
1330     bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
1331                                InstrumentationList.size() + StoreList.size() >
1332                                    (unsigned)ClInstrumentationWithCallThreshold;
1333 
1334     // Insert shadow value checks.
1335     materializeChecks(InstrumentWithCalls);
1336 
1337     // Delayed instrumentation of StoreInst.
1338     // This may not add new address checks.
1339     materializeStores(InstrumentWithCalls);
1340 
1341     return true;
1342   }
1343 
1344   /// Compute the shadow type that corresponds to a given Value.
1345   Type *getShadowTy(Value *V) {
1346     return getShadowTy(V->getType());
1347   }
1348 
1349   /// Compute the shadow type that corresponds to a given Type.
1350   Type *getShadowTy(Type *OrigTy) {
1351     if (!OrigTy->isSized()) {
1352       return nullptr;
1353     }
1354     // For integer type, shadow is the same as the original type.
1355     // This may return weird-sized types like i1.
1356     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
1357       return IT;
1358     const DataLayout &DL = F.getParent()->getDataLayout();
1359     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
1360       uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
1361       return FixedVectorType::get(IntegerType::get(*MS.C, EltSize),
1362                                   cast<FixedVectorType>(VT)->getNumElements());
1363     }
1364     if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
1365       return ArrayType::get(getShadowTy(AT->getElementType()),
1366                             AT->getNumElements());
1367     }
1368     if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1369       SmallVector<Type*, 4> Elements;
1370       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1371         Elements.push_back(getShadowTy(ST->getElementType(i)));
1372       StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
1373       LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1374       return Res;
1375     }
1376     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
1377     return IntegerType::get(*MS.C, TypeSize);
1378   }
1379 
1380   /// Flatten a vector type.
1381   Type *getShadowTyNoVec(Type *ty) {
1382     if (VectorType *vt = dyn_cast<VectorType>(ty))
1383       return IntegerType::get(*MS.C,
1384                               vt->getPrimitiveSizeInBits().getFixedSize());
1385     return ty;
1386   }
1387 
1388   /// Convert a shadow value to it's flattened variant.
1389   Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) {
1390     Type *Ty = V->getType();
1391     Type *NoVecTy = getShadowTyNoVec(Ty);
1392     if (Ty == NoVecTy) return V;
1393     return IRB.CreateBitCast(V, NoVecTy);
1394   }
1395 
1396   /// Compute the integer shadow offset that corresponds to a given
1397   /// application address.
1398   ///
1399   /// Offset = (Addr & ~AndMask) ^ XorMask
1400   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1401     Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
1402 
1403     uint64_t AndMask = MS.MapParams->AndMask;
1404     if (AndMask)
1405       OffsetLong =
1406           IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
1407 
1408     uint64_t XorMask = MS.MapParams->XorMask;
1409     if (XorMask)
1410       OffsetLong =
1411           IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
1412     return OffsetLong;
1413   }
1414 
1415   /// Compute the shadow and origin addresses corresponding to a given
1416   /// application address.
1417   ///
1418   /// Shadow = ShadowBase + Offset
1419   /// Origin = (OriginBase + Offset) & ~3ULL
1420   std::pair<Value *, Value *>
1421   getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
1422                               MaybeAlign Alignment) {
1423     Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1424     Value *ShadowLong = ShadowOffset;
1425     uint64_t ShadowBase = MS.MapParams->ShadowBase;
1426     if (ShadowBase != 0) {
1427       ShadowLong =
1428         IRB.CreateAdd(ShadowLong,
1429                       ConstantInt::get(MS.IntptrTy, ShadowBase));
1430     }
1431     Value *ShadowPtr =
1432         IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1433     Value *OriginPtr = nullptr;
1434     if (MS.TrackOrigins) {
1435       Value *OriginLong = ShadowOffset;
1436       uint64_t OriginBase = MS.MapParams->OriginBase;
1437       if (OriginBase != 0)
1438         OriginLong = IRB.CreateAdd(OriginLong,
1439                                    ConstantInt::get(MS.IntptrTy, OriginBase));
1440       if (!Alignment || *Alignment < kMinOriginAlignment) {
1441         uint64_t Mask = kMinOriginAlignment.value() - 1;
1442         OriginLong =
1443             IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
1444       }
1445       OriginPtr =
1446           IRB.CreateIntToPtr(OriginLong, PointerType::get(MS.OriginTy, 0));
1447     }
1448     return std::make_pair(ShadowPtr, OriginPtr);
1449   }
1450 
1451   std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
1452                                                        IRBuilder<> &IRB,
1453                                                        Type *ShadowTy,
1454                                                        bool isStore) {
1455     Value *ShadowOriginPtrs;
1456     const DataLayout &DL = F.getParent()->getDataLayout();
1457     int Size = DL.getTypeStoreSize(ShadowTy);
1458 
1459     FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
1460     Value *AddrCast =
1461         IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
1462     if (Getter) {
1463       ShadowOriginPtrs = IRB.CreateCall(Getter, AddrCast);
1464     } else {
1465       Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
1466       ShadowOriginPtrs = IRB.CreateCall(isStore ? MS.MsanMetadataPtrForStoreN
1467                                                 : MS.MsanMetadataPtrForLoadN,
1468                                         {AddrCast, SizeVal});
1469     }
1470     Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
1471     ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
1472     Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
1473 
1474     return std::make_pair(ShadowPtr, OriginPtr);
1475   }
1476 
1477   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1478                                                  Type *ShadowTy,
1479                                                  MaybeAlign Alignment,
1480                                                  bool isStore) {
1481     if (MS.CompileKernel)
1482       return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
1483     return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
1484   }
1485 
1486   /// Compute the shadow address for a given function argument.
1487   ///
1488   /// Shadow = ParamTLS+ArgOffset.
1489   Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
1490                                  int ArgOffset) {
1491     Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
1492     if (ArgOffset)
1493       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1494     return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
1495                               "_msarg");
1496   }
1497 
1498   /// Compute the origin address for a given function argument.
1499   Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
1500                                  int ArgOffset) {
1501     if (!MS.TrackOrigins)
1502       return nullptr;
1503     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
1504     if (ArgOffset)
1505       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1506     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
1507                               "_msarg_o");
1508   }
1509 
1510   /// Compute the shadow address for a retval.
1511   Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
1512     return IRB.CreatePointerCast(MS.RetvalTLS,
1513                                  PointerType::get(getShadowTy(A), 0),
1514                                  "_msret");
1515   }
1516 
1517   /// Compute the origin address for a retval.
1518   Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
1519     // We keep a single origin for the entire retval. Might be too optimistic.
1520     return MS.RetvalOriginTLS;
1521   }
1522 
1523   /// Set SV to be the shadow value for V.
1524   void setShadow(Value *V, Value *SV) {
1525     assert(!ShadowMap.count(V) && "Values may only have one shadow");
1526     ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
1527   }
1528 
1529   /// Set Origin to be the origin value for V.
1530   void setOrigin(Value *V, Value *Origin) {
1531     if (!MS.TrackOrigins) return;
1532     assert(!OriginMap.count(V) && "Values may only have one origin");
1533     LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
1534     OriginMap[V] = Origin;
1535   }
1536 
1537   Constant *getCleanShadow(Type *OrigTy) {
1538     Type *ShadowTy = getShadowTy(OrigTy);
1539     if (!ShadowTy)
1540       return nullptr;
1541     return Constant::getNullValue(ShadowTy);
1542   }
1543 
1544   /// Create a clean shadow value for a given value.
1545   ///
1546   /// Clean shadow (all zeroes) means all bits of the value are defined
1547   /// (initialized).
1548   Constant *getCleanShadow(Value *V) {
1549     return getCleanShadow(V->getType());
1550   }
1551 
1552   /// Create a dirty shadow of a given shadow type.
1553   Constant *getPoisonedShadow(Type *ShadowTy) {
1554     assert(ShadowTy);
1555     if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
1556       return Constant::getAllOnesValue(ShadowTy);
1557     if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
1558       SmallVector<Constant *, 4> Vals(AT->getNumElements(),
1559                                       getPoisonedShadow(AT->getElementType()));
1560       return ConstantArray::get(AT, Vals);
1561     }
1562     if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
1563       SmallVector<Constant *, 4> Vals;
1564       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1565         Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
1566       return ConstantStruct::get(ST, Vals);
1567     }
1568     llvm_unreachable("Unexpected shadow type");
1569   }
1570 
1571   /// Create a dirty shadow for a given value.
1572   Constant *getPoisonedShadow(Value *V) {
1573     Type *ShadowTy = getShadowTy(V);
1574     if (!ShadowTy)
1575       return nullptr;
1576     return getPoisonedShadow(ShadowTy);
1577   }
1578 
1579   /// Create a clean (zero) origin.
1580   Value *getCleanOrigin() {
1581     return Constant::getNullValue(MS.OriginTy);
1582   }
1583 
1584   /// Get the shadow value for a given Value.
1585   ///
1586   /// This function either returns the value set earlier with setShadow,
1587   /// or extracts if from ParamTLS (for function arguments).
1588   Value *getShadow(Value *V) {
1589     if (!PropagateShadow) return getCleanShadow(V);
1590     if (Instruction *I = dyn_cast<Instruction>(V)) {
1591       if (I->getMetadata("nosanitize"))
1592         return getCleanShadow(V);
1593       // For instructions the shadow is already stored in the map.
1594       Value *Shadow = ShadowMap[V];
1595       if (!Shadow) {
1596         LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
1597         (void)I;
1598         assert(Shadow && "No shadow for a value");
1599       }
1600       return Shadow;
1601     }
1602     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
1603       Value *AllOnes = PoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
1604       LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
1605       (void)U;
1606       return AllOnes;
1607     }
1608     if (Argument *A = dyn_cast<Argument>(V)) {
1609       // For arguments we compute the shadow on demand and store it in the map.
1610       Value **ShadowPtr = &ShadowMap[V];
1611       if (*ShadowPtr)
1612         return *ShadowPtr;
1613       Function *F = A->getParent();
1614       IRBuilder<> EntryIRB(ActualFnStart->getFirstNonPHI());
1615       unsigned ArgOffset = 0;
1616       const DataLayout &DL = F->getParent()->getDataLayout();
1617       for (auto &FArg : F->args()) {
1618         if (!FArg.getType()->isSized()) {
1619           LLVM_DEBUG(dbgs() << "Arg is not sized\n");
1620           continue;
1621         }
1622 
1623         bool FArgByVal = FArg.hasByValAttr();
1624         bool FArgNoUndef = FArg.hasAttribute(Attribute::NoUndef);
1625         bool FArgEagerCheck = ClEagerChecks && !FArgByVal && FArgNoUndef;
1626         unsigned Size =
1627             FArg.hasByValAttr()
1628                 ? DL.getTypeAllocSize(FArg.getParamByValType())
1629                 : DL.getTypeAllocSize(FArg.getType());
1630 
1631         if (A == &FArg) {
1632           bool Overflow = ArgOffset + Size > kParamTLSSize;
1633           if (FArgEagerCheck) {
1634             *ShadowPtr = getCleanShadow(V);
1635             setOrigin(A, getCleanOrigin());
1636             continue;
1637           } else if (FArgByVal) {
1638             Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1639             // ByVal pointer itself has clean shadow. We copy the actual
1640             // argument shadow to the underlying memory.
1641             // Figure out maximal valid memcpy alignment.
1642             const Align ArgAlign = DL.getValueOrABITypeAlignment(
1643                 MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType());
1644             Value *CpShadowPtr =
1645                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
1646                                    /*isStore*/ true)
1647                     .first;
1648             // TODO(glider): need to copy origins.
1649             if (Overflow) {
1650               // ParamTLS overflow.
1651               EntryIRB.CreateMemSet(
1652                   CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
1653                   Size, ArgAlign);
1654             } else {
1655               const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
1656               Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
1657                                                  CopyAlign, Size);
1658               LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
1659               (void)Cpy;
1660             }
1661             *ShadowPtr = getCleanShadow(V);
1662           } else {
1663             // Shadow over TLS
1664             Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1665             if (Overflow) {
1666               // ParamTLS overflow.
1667               *ShadowPtr = getCleanShadow(V);
1668             } else {
1669               *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
1670                                                       kShadowTLSAlignment);
1671             }
1672           }
1673           LLVM_DEBUG(dbgs()
1674                      << "  ARG:    " << FArg << " ==> " << **ShadowPtr << "\n");
1675           if (MS.TrackOrigins && !Overflow) {
1676             Value *OriginPtr =
1677                 getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
1678             setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
1679           } else {
1680             setOrigin(A, getCleanOrigin());
1681           }
1682         }
1683 
1684         if (!FArgEagerCheck)
1685           ArgOffset += alignTo(Size, kShadowTLSAlignment);
1686       }
1687       assert(*ShadowPtr && "Could not find shadow for an argument");
1688       return *ShadowPtr;
1689     }
1690     // For everything else the shadow is zero.
1691     return getCleanShadow(V);
1692   }
1693 
1694   /// Get the shadow for i-th argument of the instruction I.
1695   Value *getShadow(Instruction *I, int i) {
1696     return getShadow(I->getOperand(i));
1697   }
1698 
1699   /// Get the origin for a value.
1700   Value *getOrigin(Value *V) {
1701     if (!MS.TrackOrigins) return nullptr;
1702     if (!PropagateShadow) return getCleanOrigin();
1703     if (isa<Constant>(V)) return getCleanOrigin();
1704     assert((isa<Instruction>(V) || isa<Argument>(V)) &&
1705            "Unexpected value type in getOrigin()");
1706     if (Instruction *I = dyn_cast<Instruction>(V)) {
1707       if (I->getMetadata("nosanitize"))
1708         return getCleanOrigin();
1709     }
1710     Value *Origin = OriginMap[V];
1711     assert(Origin && "Missing origin");
1712     return Origin;
1713   }
1714 
1715   /// Get the origin for i-th argument of the instruction I.
1716   Value *getOrigin(Instruction *I, int i) {
1717     return getOrigin(I->getOperand(i));
1718   }
1719 
1720   /// Remember the place where a shadow check should be inserted.
1721   ///
1722   /// This location will be later instrumented with a check that will print a
1723   /// UMR warning in runtime if the shadow value is not 0.
1724   void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
1725     assert(Shadow);
1726     if (!InsertChecks) return;
1727 #ifndef NDEBUG
1728     Type *ShadowTy = Shadow->getType();
1729     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
1730            "Can only insert checks for integer and vector shadow types");
1731 #endif
1732     InstrumentationList.push_back(
1733         ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
1734   }
1735 
1736   /// Remember the place where a shadow check should be inserted.
1737   ///
1738   /// This location will be later instrumented with a check that will print a
1739   /// UMR warning in runtime if the value is not fully defined.
1740   void insertShadowCheck(Value *Val, Instruction *OrigIns) {
1741     assert(Val);
1742     Value *Shadow, *Origin;
1743     if (ClCheckConstantShadow) {
1744       Shadow = getShadow(Val);
1745       if (!Shadow) return;
1746       Origin = getOrigin(Val);
1747     } else {
1748       Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
1749       if (!Shadow) return;
1750       Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
1751     }
1752     insertShadowCheck(Shadow, Origin, OrigIns);
1753   }
1754 
1755   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
1756     switch (a) {
1757       case AtomicOrdering::NotAtomic:
1758         return AtomicOrdering::NotAtomic;
1759       case AtomicOrdering::Unordered:
1760       case AtomicOrdering::Monotonic:
1761       case AtomicOrdering::Release:
1762         return AtomicOrdering::Release;
1763       case AtomicOrdering::Acquire:
1764       case AtomicOrdering::AcquireRelease:
1765         return AtomicOrdering::AcquireRelease;
1766       case AtomicOrdering::SequentiallyConsistent:
1767         return AtomicOrdering::SequentiallyConsistent;
1768     }
1769     llvm_unreachable("Unknown ordering");
1770   }
1771 
1772   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
1773     switch (a) {
1774       case AtomicOrdering::NotAtomic:
1775         return AtomicOrdering::NotAtomic;
1776       case AtomicOrdering::Unordered:
1777       case AtomicOrdering::Monotonic:
1778       case AtomicOrdering::Acquire:
1779         return AtomicOrdering::Acquire;
1780       case AtomicOrdering::Release:
1781       case AtomicOrdering::AcquireRelease:
1782         return AtomicOrdering::AcquireRelease;
1783       case AtomicOrdering::SequentiallyConsistent:
1784         return AtomicOrdering::SequentiallyConsistent;
1785     }
1786     llvm_unreachable("Unknown ordering");
1787   }
1788 
1789   // ------------------- Visitors.
1790   using InstVisitor<MemorySanitizerVisitor>::visit;
1791   void visit(Instruction &I) {
1792     if (!I.getMetadata("nosanitize"))
1793       InstVisitor<MemorySanitizerVisitor>::visit(I);
1794   }
1795 
1796   /// Instrument LoadInst
1797   ///
1798   /// Loads the corresponding shadow and (optionally) origin.
1799   /// Optionally, checks that the load address is fully defined.
1800   void visitLoadInst(LoadInst &I) {
1801     assert(I.getType()->isSized() && "Load type must have size");
1802     assert(!I.getMetadata("nosanitize"));
1803     IRBuilder<> IRB(I.getNextNode());
1804     Type *ShadowTy = getShadowTy(&I);
1805     Value *Addr = I.getPointerOperand();
1806     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
1807     const Align Alignment = assumeAligned(I.getAlignment());
1808     if (PropagateShadow) {
1809       std::tie(ShadowPtr, OriginPtr) =
1810           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
1811       setShadow(&I,
1812                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
1813     } else {
1814       setShadow(&I, getCleanShadow(&I));
1815     }
1816 
1817     if (ClCheckAccessAddress)
1818       insertShadowCheck(I.getPointerOperand(), &I);
1819 
1820     if (I.isAtomic())
1821       I.setOrdering(addAcquireOrdering(I.getOrdering()));
1822 
1823     if (MS.TrackOrigins) {
1824       if (PropagateShadow) {
1825         const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1826         setOrigin(
1827             &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
1828       } else {
1829         setOrigin(&I, getCleanOrigin());
1830       }
1831     }
1832   }
1833 
1834   /// Instrument StoreInst
1835   ///
1836   /// Stores the corresponding shadow and (optionally) origin.
1837   /// Optionally, checks that the store address is fully defined.
1838   void visitStoreInst(StoreInst &I) {
1839     StoreList.push_back(&I);
1840     if (ClCheckAccessAddress)
1841       insertShadowCheck(I.getPointerOperand(), &I);
1842   }
1843 
1844   void handleCASOrRMW(Instruction &I) {
1845     assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
1846 
1847     IRBuilder<> IRB(&I);
1848     Value *Addr = I.getOperand(0);
1849     Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, I.getType(), Align(1),
1850                                           /*isStore*/ true)
1851                            .first;
1852 
1853     if (ClCheckAccessAddress)
1854       insertShadowCheck(Addr, &I);
1855 
1856     // Only test the conditional argument of cmpxchg instruction.
1857     // The other argument can potentially be uninitialized, but we can not
1858     // detect this situation reliably without possible false positives.
1859     if (isa<AtomicCmpXchgInst>(I))
1860       insertShadowCheck(I.getOperand(1), &I);
1861 
1862     IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
1863 
1864     setShadow(&I, getCleanShadow(&I));
1865     setOrigin(&I, getCleanOrigin());
1866   }
1867 
1868   void visitAtomicRMWInst(AtomicRMWInst &I) {
1869     handleCASOrRMW(I);
1870     I.setOrdering(addReleaseOrdering(I.getOrdering()));
1871   }
1872 
1873   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
1874     handleCASOrRMW(I);
1875     I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
1876   }
1877 
1878   // Vector manipulation.
1879   void visitExtractElementInst(ExtractElementInst &I) {
1880     insertShadowCheck(I.getOperand(1), &I);
1881     IRBuilder<> IRB(&I);
1882     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
1883               "_msprop"));
1884     setOrigin(&I, getOrigin(&I, 0));
1885   }
1886 
1887   void visitInsertElementInst(InsertElementInst &I) {
1888     insertShadowCheck(I.getOperand(2), &I);
1889     IRBuilder<> IRB(&I);
1890     setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
1891               I.getOperand(2), "_msprop"));
1892     setOriginForNaryOp(I);
1893   }
1894 
1895   void visitShuffleVectorInst(ShuffleVectorInst &I) {
1896     IRBuilder<> IRB(&I);
1897     setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
1898                                           I.getShuffleMask(), "_msprop"));
1899     setOriginForNaryOp(I);
1900   }
1901 
1902   // Casts.
1903   void visitSExtInst(SExtInst &I) {
1904     IRBuilder<> IRB(&I);
1905     setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
1906     setOrigin(&I, getOrigin(&I, 0));
1907   }
1908 
1909   void visitZExtInst(ZExtInst &I) {
1910     IRBuilder<> IRB(&I);
1911     setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
1912     setOrigin(&I, getOrigin(&I, 0));
1913   }
1914 
1915   void visitTruncInst(TruncInst &I) {
1916     IRBuilder<> IRB(&I);
1917     setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
1918     setOrigin(&I, getOrigin(&I, 0));
1919   }
1920 
1921   void visitBitCastInst(BitCastInst &I) {
1922     // Special case: if this is the bitcast (there is exactly 1 allowed) between
1923     // a musttail call and a ret, don't instrument. New instructions are not
1924     // allowed after a musttail call.
1925     if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
1926       if (CI->isMustTailCall())
1927         return;
1928     IRBuilder<> IRB(&I);
1929     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
1930     setOrigin(&I, getOrigin(&I, 0));
1931   }
1932 
1933   void visitPtrToIntInst(PtrToIntInst &I) {
1934     IRBuilder<> IRB(&I);
1935     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
1936              "_msprop_ptrtoint"));
1937     setOrigin(&I, getOrigin(&I, 0));
1938   }
1939 
1940   void visitIntToPtrInst(IntToPtrInst &I) {
1941     IRBuilder<> IRB(&I);
1942     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
1943              "_msprop_inttoptr"));
1944     setOrigin(&I, getOrigin(&I, 0));
1945   }
1946 
1947   void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
1948   void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
1949   void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
1950   void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
1951   void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
1952   void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
1953 
1954   /// Propagate shadow for bitwise AND.
1955   ///
1956   /// This code is exact, i.e. if, for example, a bit in the left argument
1957   /// is defined and 0, then neither the value not definedness of the
1958   /// corresponding bit in B don't affect the resulting shadow.
1959   void visitAnd(BinaryOperator &I) {
1960     IRBuilder<> IRB(&I);
1961     //  "And" of 0 and a poisoned value results in unpoisoned value.
1962     //  1&1 => 1;     0&1 => 0;     p&1 => p;
1963     //  1&0 => 0;     0&0 => 0;     p&0 => 0;
1964     //  1&p => p;     0&p => 0;     p&p => p;
1965     //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
1966     Value *S1 = getShadow(&I, 0);
1967     Value *S2 = getShadow(&I, 1);
1968     Value *V1 = I.getOperand(0);
1969     Value *V2 = I.getOperand(1);
1970     if (V1->getType() != S1->getType()) {
1971       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
1972       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
1973     }
1974     Value *S1S2 = IRB.CreateAnd(S1, S2);
1975     Value *V1S2 = IRB.CreateAnd(V1, S2);
1976     Value *S1V2 = IRB.CreateAnd(S1, V2);
1977     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
1978     setOriginForNaryOp(I);
1979   }
1980 
1981   void visitOr(BinaryOperator &I) {
1982     IRBuilder<> IRB(&I);
1983     //  "Or" of 1 and a poisoned value results in unpoisoned value.
1984     //  1|1 => 1;     0|1 => 1;     p|1 => 1;
1985     //  1|0 => 1;     0|0 => 0;     p|0 => p;
1986     //  1|p => 1;     0|p => p;     p|p => p;
1987     //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
1988     Value *S1 = getShadow(&I, 0);
1989     Value *S2 = getShadow(&I, 1);
1990     Value *V1 = IRB.CreateNot(I.getOperand(0));
1991     Value *V2 = IRB.CreateNot(I.getOperand(1));
1992     if (V1->getType() != S1->getType()) {
1993       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
1994       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
1995     }
1996     Value *S1S2 = IRB.CreateAnd(S1, S2);
1997     Value *V1S2 = IRB.CreateAnd(V1, S2);
1998     Value *S1V2 = IRB.CreateAnd(S1, V2);
1999     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2000     setOriginForNaryOp(I);
2001   }
2002 
2003   /// Default propagation of shadow and/or origin.
2004   ///
2005   /// This class implements the general case of shadow propagation, used in all
2006   /// cases where we don't know and/or don't care about what the operation
2007   /// actually does. It converts all input shadow values to a common type
2008   /// (extending or truncating as necessary), and bitwise OR's them.
2009   ///
2010   /// This is much cheaper than inserting checks (i.e. requiring inputs to be
2011   /// fully initialized), and less prone to false positives.
2012   ///
2013   /// This class also implements the general case of origin propagation. For a
2014   /// Nary operation, result origin is set to the origin of an argument that is
2015   /// not entirely initialized. If there is more than one such arguments, the
2016   /// rightmost of them is picked. It does not matter which one is picked if all
2017   /// arguments are initialized.
2018   template <bool CombineShadow>
2019   class Combiner {
2020     Value *Shadow = nullptr;
2021     Value *Origin = nullptr;
2022     IRBuilder<> &IRB;
2023     MemorySanitizerVisitor *MSV;
2024 
2025   public:
2026     Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
2027         : IRB(IRB), MSV(MSV) {}
2028 
2029     /// Add a pair of shadow and origin values to the mix.
2030     Combiner &Add(Value *OpShadow, Value *OpOrigin) {
2031       if (CombineShadow) {
2032         assert(OpShadow);
2033         if (!Shadow)
2034           Shadow = OpShadow;
2035         else {
2036           OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
2037           Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
2038         }
2039       }
2040 
2041       if (MSV->MS.TrackOrigins) {
2042         assert(OpOrigin);
2043         if (!Origin) {
2044           Origin = OpOrigin;
2045         } else {
2046           Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
2047           // No point in adding something that might result in 0 origin value.
2048           if (!ConstOrigin || !ConstOrigin->isNullValue()) {
2049             Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB);
2050             Value *Cond =
2051                 IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
2052             Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2053           }
2054         }
2055       }
2056       return *this;
2057     }
2058 
2059     /// Add an application value to the mix.
2060     Combiner &Add(Value *V) {
2061       Value *OpShadow = MSV->getShadow(V);
2062       Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
2063       return Add(OpShadow, OpOrigin);
2064     }
2065 
2066     /// Set the current combined values as the given instruction's shadow
2067     /// and origin.
2068     void Done(Instruction *I) {
2069       if (CombineShadow) {
2070         assert(Shadow);
2071         Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
2072         MSV->setShadow(I, Shadow);
2073       }
2074       if (MSV->MS.TrackOrigins) {
2075         assert(Origin);
2076         MSV->setOrigin(I, Origin);
2077       }
2078     }
2079   };
2080 
2081   using ShadowAndOriginCombiner = Combiner<true>;
2082   using OriginCombiner = Combiner<false>;
2083 
2084   /// Propagate origin for arbitrary operation.
2085   void setOriginForNaryOp(Instruction &I) {
2086     if (!MS.TrackOrigins) return;
2087     IRBuilder<> IRB(&I);
2088     OriginCombiner OC(this, IRB);
2089     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
2090       OC.Add(OI->get());
2091     OC.Done(&I);
2092   }
2093 
2094   size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
2095     assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
2096            "Vector of pointers is not a valid shadow type");
2097     return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() *
2098                                   Ty->getScalarSizeInBits()
2099                             : Ty->getPrimitiveSizeInBits();
2100   }
2101 
2102   /// Cast between two shadow types, extending or truncating as
2103   /// necessary.
2104   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
2105                           bool Signed = false) {
2106     Type *srcTy = V->getType();
2107     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
2108     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
2109     if (srcSizeInBits > 1 && dstSizeInBits == 1)
2110       return IRB.CreateICmpNE(V, getCleanShadow(V));
2111 
2112     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
2113       return IRB.CreateIntCast(V, dstTy, Signed);
2114     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
2115         cast<FixedVectorType>(dstTy)->getNumElements() ==
2116             cast<FixedVectorType>(srcTy)->getNumElements())
2117       return IRB.CreateIntCast(V, dstTy, Signed);
2118     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
2119     Value *V2 =
2120       IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
2121     return IRB.CreateBitCast(V2, dstTy);
2122     // TODO: handle struct types.
2123   }
2124 
2125   /// Cast an application value to the type of its own shadow.
2126   Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
2127     Type *ShadowTy = getShadowTy(V);
2128     if (V->getType() == ShadowTy)
2129       return V;
2130     if (V->getType()->isPtrOrPtrVectorTy())
2131       return IRB.CreatePtrToInt(V, ShadowTy);
2132     else
2133       return IRB.CreateBitCast(V, ShadowTy);
2134   }
2135 
2136   /// Propagate shadow for arbitrary operation.
2137   void handleShadowOr(Instruction &I) {
2138     IRBuilder<> IRB(&I);
2139     ShadowAndOriginCombiner SC(this, IRB);
2140     for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
2141       SC.Add(OI->get());
2142     SC.Done(&I);
2143   }
2144 
2145   void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
2146 
2147   // Handle multiplication by constant.
2148   //
2149   // Handle a special case of multiplication by constant that may have one or
2150   // more zeros in the lower bits. This makes corresponding number of lower bits
2151   // of the result zero as well. We model it by shifting the other operand
2152   // shadow left by the required number of bits. Effectively, we transform
2153   // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
2154   // We use multiplication by 2**N instead of shift to cover the case of
2155   // multiplication by 0, which may occur in some elements of a vector operand.
2156   void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
2157                            Value *OtherArg) {
2158     Constant *ShadowMul;
2159     Type *Ty = ConstArg->getType();
2160     if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2161       unsigned NumElements = cast<FixedVectorType>(VTy)->getNumElements();
2162       Type *EltTy = VTy->getElementType();
2163       SmallVector<Constant *, 16> Elements;
2164       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
2165         if (ConstantInt *Elt =
2166                 dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
2167           const APInt &V = Elt->getValue();
2168           APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2169           Elements.push_back(ConstantInt::get(EltTy, V2));
2170         } else {
2171           Elements.push_back(ConstantInt::get(EltTy, 1));
2172         }
2173       }
2174       ShadowMul = ConstantVector::get(Elements);
2175     } else {
2176       if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
2177         const APInt &V = Elt->getValue();
2178         APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2179         ShadowMul = ConstantInt::get(Ty, V2);
2180       } else {
2181         ShadowMul = ConstantInt::get(Ty, 1);
2182       }
2183     }
2184 
2185     IRBuilder<> IRB(&I);
2186     setShadow(&I,
2187               IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
2188     setOrigin(&I, getOrigin(OtherArg));
2189   }
2190 
2191   void visitMul(BinaryOperator &I) {
2192     Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
2193     Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
2194     if (constOp0 && !constOp1)
2195       handleMulByConstant(I, constOp0, I.getOperand(1));
2196     else if (constOp1 && !constOp0)
2197       handleMulByConstant(I, constOp1, I.getOperand(0));
2198     else
2199       handleShadowOr(I);
2200   }
2201 
2202   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
2203   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
2204   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
2205   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
2206   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
2207   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
2208 
2209   void handleIntegerDiv(Instruction &I) {
2210     IRBuilder<> IRB(&I);
2211     // Strict on the second argument.
2212     insertShadowCheck(I.getOperand(1), &I);
2213     setShadow(&I, getShadow(&I, 0));
2214     setOrigin(&I, getOrigin(&I, 0));
2215   }
2216 
2217   void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2218   void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2219   void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
2220   void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
2221 
2222   // Floating point division is side-effect free. We can not require that the
2223   // divisor is fully initialized and must propagate shadow. See PR37523.
2224   void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
2225   void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
2226 
2227   /// Instrument == and != comparisons.
2228   ///
2229   /// Sometimes the comparison result is known even if some of the bits of the
2230   /// arguments are not.
2231   void handleEqualityComparison(ICmpInst &I) {
2232     IRBuilder<> IRB(&I);
2233     Value *A = I.getOperand(0);
2234     Value *B = I.getOperand(1);
2235     Value *Sa = getShadow(A);
2236     Value *Sb = getShadow(B);
2237 
2238     // Get rid of pointers and vectors of pointers.
2239     // For ints (and vectors of ints), types of A and Sa match,
2240     // and this is a no-op.
2241     A = IRB.CreatePointerCast(A, Sa->getType());
2242     B = IRB.CreatePointerCast(B, Sb->getType());
2243 
2244     // A == B  <==>  (C = A^B) == 0
2245     // A != B  <==>  (C = A^B) != 0
2246     // Sc = Sa | Sb
2247     Value *C = IRB.CreateXor(A, B);
2248     Value *Sc = IRB.CreateOr(Sa, Sb);
2249     // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
2250     // Result is defined if one of the following is true
2251     // * there is a defined 1 bit in C
2252     // * C is fully defined
2253     // Si = !(C & ~Sc) && Sc
2254     Value *Zero = Constant::getNullValue(Sc->getType());
2255     Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
2256     Value *Si =
2257       IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
2258                     IRB.CreateICmpEQ(
2259                       IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
2260     Si->setName("_msprop_icmp");
2261     setShadow(&I, Si);
2262     setOriginForNaryOp(I);
2263   }
2264 
2265   /// Build the lowest possible value of V, taking into account V's
2266   ///        uninitialized bits.
2267   Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2268                                 bool isSigned) {
2269     if (isSigned) {
2270       // Split shadow into sign bit and other bits.
2271       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2272       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2273       // Maximise the undefined shadow bit, minimize other undefined bits.
2274       return
2275         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
2276     } else {
2277       // Minimize undefined bits.
2278       return IRB.CreateAnd(A, IRB.CreateNot(Sa));
2279     }
2280   }
2281 
2282   /// Build the highest possible value of V, taking into account V's
2283   ///        uninitialized bits.
2284   Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2285                                 bool isSigned) {
2286     if (isSigned) {
2287       // Split shadow into sign bit and other bits.
2288       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2289       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2290       // Minimise the undefined shadow bit, maximise other undefined bits.
2291       return
2292         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
2293     } else {
2294       // Maximize undefined bits.
2295       return IRB.CreateOr(A, Sa);
2296     }
2297   }
2298 
2299   /// Instrument relational comparisons.
2300   ///
2301   /// This function does exact shadow propagation for all relational
2302   /// comparisons of integers, pointers and vectors of those.
2303   /// FIXME: output seems suboptimal when one of the operands is a constant
2304   void handleRelationalComparisonExact(ICmpInst &I) {
2305     IRBuilder<> IRB(&I);
2306     Value *A = I.getOperand(0);
2307     Value *B = I.getOperand(1);
2308     Value *Sa = getShadow(A);
2309     Value *Sb = getShadow(B);
2310 
2311     // Get rid of pointers and vectors of pointers.
2312     // For ints (and vectors of ints), types of A and Sa match,
2313     // and this is a no-op.
2314     A = IRB.CreatePointerCast(A, Sa->getType());
2315     B = IRB.CreatePointerCast(B, Sb->getType());
2316 
2317     // Let [a0, a1] be the interval of possible values of A, taking into account
2318     // its undefined bits. Let [b0, b1] be the interval of possible values of B.
2319     // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
2320     bool IsSigned = I.isSigned();
2321     Value *S1 = IRB.CreateICmp(I.getPredicate(),
2322                                getLowestPossibleValue(IRB, A, Sa, IsSigned),
2323                                getHighestPossibleValue(IRB, B, Sb, IsSigned));
2324     Value *S2 = IRB.CreateICmp(I.getPredicate(),
2325                                getHighestPossibleValue(IRB, A, Sa, IsSigned),
2326                                getLowestPossibleValue(IRB, B, Sb, IsSigned));
2327     Value *Si = IRB.CreateXor(S1, S2);
2328     setShadow(&I, Si);
2329     setOriginForNaryOp(I);
2330   }
2331 
2332   /// Instrument signed relational comparisons.
2333   ///
2334   /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
2335   /// bit of the shadow. Everything else is delegated to handleShadowOr().
2336   void handleSignedRelationalComparison(ICmpInst &I) {
2337     Constant *constOp;
2338     Value *op = nullptr;
2339     CmpInst::Predicate pre;
2340     if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
2341       op = I.getOperand(0);
2342       pre = I.getPredicate();
2343     } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
2344       op = I.getOperand(1);
2345       pre = I.getSwappedPredicate();
2346     } else {
2347       handleShadowOr(I);
2348       return;
2349     }
2350 
2351     if ((constOp->isNullValue() &&
2352          (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
2353         (constOp->isAllOnesValue() &&
2354          (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
2355       IRBuilder<> IRB(&I);
2356       Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
2357                                         "_msprop_icmp_s");
2358       setShadow(&I, Shadow);
2359       setOrigin(&I, getOrigin(op));
2360     } else {
2361       handleShadowOr(I);
2362     }
2363   }
2364 
2365   void visitICmpInst(ICmpInst &I) {
2366     if (!ClHandleICmp) {
2367       handleShadowOr(I);
2368       return;
2369     }
2370     if (I.isEquality()) {
2371       handleEqualityComparison(I);
2372       return;
2373     }
2374 
2375     assert(I.isRelational());
2376     if (ClHandleICmpExact) {
2377       handleRelationalComparisonExact(I);
2378       return;
2379     }
2380     if (I.isSigned()) {
2381       handleSignedRelationalComparison(I);
2382       return;
2383     }
2384 
2385     assert(I.isUnsigned());
2386     if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
2387       handleRelationalComparisonExact(I);
2388       return;
2389     }
2390 
2391     handleShadowOr(I);
2392   }
2393 
2394   void visitFCmpInst(FCmpInst &I) {
2395     handleShadowOr(I);
2396   }
2397 
2398   void handleShift(BinaryOperator &I) {
2399     IRBuilder<> IRB(&I);
2400     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2401     // Otherwise perform the same shift on S1.
2402     Value *S1 = getShadow(&I, 0);
2403     Value *S2 = getShadow(&I, 1);
2404     Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
2405                                    S2->getType());
2406     Value *V2 = I.getOperand(1);
2407     Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
2408     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2409     setOriginForNaryOp(I);
2410   }
2411 
2412   void visitShl(BinaryOperator &I) { handleShift(I); }
2413   void visitAShr(BinaryOperator &I) { handleShift(I); }
2414   void visitLShr(BinaryOperator &I) { handleShift(I); }
2415 
2416   /// Instrument llvm.memmove
2417   ///
2418   /// At this point we don't know if llvm.memmove will be inlined or not.
2419   /// If we don't instrument it and it gets inlined,
2420   /// our interceptor will not kick in and we will lose the memmove.
2421   /// If we instrument the call here, but it does not get inlined,
2422   /// we will memove the shadow twice: which is bad in case
2423   /// of overlapping regions. So, we simply lower the intrinsic to a call.
2424   ///
2425   /// Similar situation exists for memcpy and memset.
2426   void visitMemMoveInst(MemMoveInst &I) {
2427     IRBuilder<> IRB(&I);
2428     IRB.CreateCall(
2429         MS.MemmoveFn,
2430         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2431          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2432          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2433     I.eraseFromParent();
2434   }
2435 
2436   // Similar to memmove: avoid copying shadow twice.
2437   // This is somewhat unfortunate as it may slowdown small constant memcpys.
2438   // FIXME: consider doing manual inline for small constant sizes and proper
2439   // alignment.
2440   void visitMemCpyInst(MemCpyInst &I) {
2441     IRBuilder<> IRB(&I);
2442     IRB.CreateCall(
2443         MS.MemcpyFn,
2444         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2445          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2446          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2447     I.eraseFromParent();
2448   }
2449 
2450   // Same as memcpy.
2451   void visitMemSetInst(MemSetInst &I) {
2452     IRBuilder<> IRB(&I);
2453     IRB.CreateCall(
2454         MS.MemsetFn,
2455         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2456          IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
2457          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2458     I.eraseFromParent();
2459   }
2460 
2461   void visitVAStartInst(VAStartInst &I) {
2462     VAHelper->visitVAStartInst(I);
2463   }
2464 
2465   void visitVACopyInst(VACopyInst &I) {
2466     VAHelper->visitVACopyInst(I);
2467   }
2468 
2469   /// Handle vector store-like intrinsics.
2470   ///
2471   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
2472   /// has 1 pointer argument and 1 vector argument, returns void.
2473   bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
2474     IRBuilder<> IRB(&I);
2475     Value* Addr = I.getArgOperand(0);
2476     Value *Shadow = getShadow(&I, 1);
2477     Value *ShadowPtr, *OriginPtr;
2478 
2479     // We don't know the pointer alignment (could be unaligned SSE store!).
2480     // Have to assume to worst case.
2481     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
2482         Addr, IRB, Shadow->getType(), Align(1), /*isStore*/ true);
2483     IRB.CreateAlignedStore(Shadow, ShadowPtr, Align(1));
2484 
2485     if (ClCheckAccessAddress)
2486       insertShadowCheck(Addr, &I);
2487 
2488     // FIXME: factor out common code from materializeStores
2489     if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
2490     return true;
2491   }
2492 
2493   /// Handle vector load-like intrinsics.
2494   ///
2495   /// Instrument intrinsics that look like a simple SIMD load: reads memory,
2496   /// has 1 pointer argument, returns a vector.
2497   bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
2498     IRBuilder<> IRB(&I);
2499     Value *Addr = I.getArgOperand(0);
2500 
2501     Type *ShadowTy = getShadowTy(&I);
2502     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2503     if (PropagateShadow) {
2504       // We don't know the pointer alignment (could be unaligned SSE load!).
2505       // Have to assume to worst case.
2506       const Align Alignment = Align(1);
2507       std::tie(ShadowPtr, OriginPtr) =
2508           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2509       setShadow(&I,
2510                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2511     } else {
2512       setShadow(&I, getCleanShadow(&I));
2513     }
2514 
2515     if (ClCheckAccessAddress)
2516       insertShadowCheck(Addr, &I);
2517 
2518     if (MS.TrackOrigins) {
2519       if (PropagateShadow)
2520         setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
2521       else
2522         setOrigin(&I, getCleanOrigin());
2523     }
2524     return true;
2525   }
2526 
2527   /// Handle (SIMD arithmetic)-like intrinsics.
2528   ///
2529   /// Instrument intrinsics with any number of arguments of the same type,
2530   /// equal to the return type. The type should be simple (no aggregates or
2531   /// pointers; vectors are fine).
2532   /// Caller guarantees that this intrinsic does not access memory.
2533   bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
2534     Type *RetTy = I.getType();
2535     if (!(RetTy->isIntOrIntVectorTy() ||
2536           RetTy->isFPOrFPVectorTy() ||
2537           RetTy->isX86_MMXTy()))
2538       return false;
2539 
2540     unsigned NumArgOperands = I.getNumArgOperands();
2541 
2542     for (unsigned i = 0; i < NumArgOperands; ++i) {
2543       Type *Ty = I.getArgOperand(i)->getType();
2544       if (Ty != RetTy)
2545         return false;
2546     }
2547 
2548     IRBuilder<> IRB(&I);
2549     ShadowAndOriginCombiner SC(this, IRB);
2550     for (unsigned i = 0; i < NumArgOperands; ++i)
2551       SC.Add(I.getArgOperand(i));
2552     SC.Done(&I);
2553 
2554     return true;
2555   }
2556 
2557   /// Heuristically instrument unknown intrinsics.
2558   ///
2559   /// The main purpose of this code is to do something reasonable with all
2560   /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
2561   /// We recognize several classes of intrinsics by their argument types and
2562   /// ModRefBehaviour and apply special instrumentation when we are reasonably
2563   /// sure that we know what the intrinsic does.
2564   ///
2565   /// We special-case intrinsics where this approach fails. See llvm.bswap
2566   /// handling as an example of that.
2567   bool handleUnknownIntrinsic(IntrinsicInst &I) {
2568     unsigned NumArgOperands = I.getNumArgOperands();
2569     if (NumArgOperands == 0)
2570       return false;
2571 
2572     if (NumArgOperands == 2 &&
2573         I.getArgOperand(0)->getType()->isPointerTy() &&
2574         I.getArgOperand(1)->getType()->isVectorTy() &&
2575         I.getType()->isVoidTy() &&
2576         !I.onlyReadsMemory()) {
2577       // This looks like a vector store.
2578       return handleVectorStoreIntrinsic(I);
2579     }
2580 
2581     if (NumArgOperands == 1 &&
2582         I.getArgOperand(0)->getType()->isPointerTy() &&
2583         I.getType()->isVectorTy() &&
2584         I.onlyReadsMemory()) {
2585       // This looks like a vector load.
2586       return handleVectorLoadIntrinsic(I);
2587     }
2588 
2589     if (I.doesNotAccessMemory())
2590       if (maybeHandleSimpleNomemIntrinsic(I))
2591         return true;
2592 
2593     // FIXME: detect and handle SSE maskstore/maskload
2594     return false;
2595   }
2596 
2597   void handleInvariantGroup(IntrinsicInst &I) {
2598     setShadow(&I, getShadow(&I, 0));
2599     setOrigin(&I, getOrigin(&I, 0));
2600   }
2601 
2602   void handleLifetimeStart(IntrinsicInst &I) {
2603     if (!PoisonStack)
2604       return;
2605     DenseMap<Value *, AllocaInst *> AllocaForValue;
2606     AllocaInst *AI =
2607         llvm::findAllocaForValue(I.getArgOperand(1), AllocaForValue);
2608     if (!AI)
2609       InstrumentLifetimeStart = false;
2610     LifetimeStartList.push_back(std::make_pair(&I, AI));
2611   }
2612 
2613   void handleBswap(IntrinsicInst &I) {
2614     IRBuilder<> IRB(&I);
2615     Value *Op = I.getArgOperand(0);
2616     Type *OpType = Op->getType();
2617     Function *BswapFunc = Intrinsic::getDeclaration(
2618       F.getParent(), Intrinsic::bswap, makeArrayRef(&OpType, 1));
2619     setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
2620     setOrigin(&I, getOrigin(Op));
2621   }
2622 
2623   // Instrument vector convert intrinsic.
2624   //
2625   // This function instruments intrinsics like cvtsi2ss:
2626   // %Out = int_xxx_cvtyyy(%ConvertOp)
2627   // or
2628   // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
2629   // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
2630   // number \p Out elements, and (if has 2 arguments) copies the rest of the
2631   // elements from \p CopyOp.
2632   // In most cases conversion involves floating-point value which may trigger a
2633   // hardware exception when not fully initialized. For this reason we require
2634   // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
2635   // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
2636   // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
2637   // return a fully initialized value.
2638   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements) {
2639     IRBuilder<> IRB(&I);
2640     Value *CopyOp, *ConvertOp;
2641 
2642     switch (I.getNumArgOperands()) {
2643     case 3:
2644       assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode");
2645       LLVM_FALLTHROUGH;
2646     case 2:
2647       CopyOp = I.getArgOperand(0);
2648       ConvertOp = I.getArgOperand(1);
2649       break;
2650     case 1:
2651       ConvertOp = I.getArgOperand(0);
2652       CopyOp = nullptr;
2653       break;
2654     default:
2655       llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
2656     }
2657 
2658     // The first *NumUsedElements* elements of ConvertOp are converted to the
2659     // same number of output elements. The rest of the output is copied from
2660     // CopyOp, or (if not available) filled with zeroes.
2661     // Combine shadow for elements of ConvertOp that are used in this operation,
2662     // and insert a check.
2663     // FIXME: consider propagating shadow of ConvertOp, at least in the case of
2664     // int->any conversion.
2665     Value *ConvertShadow = getShadow(ConvertOp);
2666     Value *AggShadow = nullptr;
2667     if (ConvertOp->getType()->isVectorTy()) {
2668       AggShadow = IRB.CreateExtractElement(
2669           ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
2670       for (int i = 1; i < NumUsedElements; ++i) {
2671         Value *MoreShadow = IRB.CreateExtractElement(
2672             ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
2673         AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
2674       }
2675     } else {
2676       AggShadow = ConvertShadow;
2677     }
2678     assert(AggShadow->getType()->isIntegerTy());
2679     insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
2680 
2681     // Build result shadow by zero-filling parts of CopyOp shadow that come from
2682     // ConvertOp.
2683     if (CopyOp) {
2684       assert(CopyOp->getType() == I.getType());
2685       assert(CopyOp->getType()->isVectorTy());
2686       Value *ResultShadow = getShadow(CopyOp);
2687       Type *EltTy = cast<VectorType>(ResultShadow->getType())->getElementType();
2688       for (int i = 0; i < NumUsedElements; ++i) {
2689         ResultShadow = IRB.CreateInsertElement(
2690             ResultShadow, ConstantInt::getNullValue(EltTy),
2691             ConstantInt::get(IRB.getInt32Ty(), i));
2692       }
2693       setShadow(&I, ResultShadow);
2694       setOrigin(&I, getOrigin(CopyOp));
2695     } else {
2696       setShadow(&I, getCleanShadow(&I));
2697       setOrigin(&I, getCleanOrigin());
2698     }
2699   }
2700 
2701   // Given a scalar or vector, extract lower 64 bits (or less), and return all
2702   // zeroes if it is zero, and all ones otherwise.
2703   Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2704     if (S->getType()->isVectorTy())
2705       S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
2706     assert(S->getType()->getPrimitiveSizeInBits() <= 64);
2707     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2708     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2709   }
2710 
2711   // Given a vector, extract its first element, and return all
2712   // zeroes if it is zero, and all ones otherwise.
2713   Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2714     Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
2715     Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
2716     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2717   }
2718 
2719   Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
2720     Type *T = S->getType();
2721     assert(T->isVectorTy());
2722     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2723     return IRB.CreateSExt(S2, T);
2724   }
2725 
2726   // Instrument vector shift intrinsic.
2727   //
2728   // This function instruments intrinsics like int_x86_avx2_psll_w.
2729   // Intrinsic shifts %In by %ShiftSize bits.
2730   // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
2731   // size, and the rest is ignored. Behavior is defined even if shift size is
2732   // greater than register (or field) width.
2733   void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
2734     assert(I.getNumArgOperands() == 2);
2735     IRBuilder<> IRB(&I);
2736     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2737     // Otherwise perform the same shift on S1.
2738     Value *S1 = getShadow(&I, 0);
2739     Value *S2 = getShadow(&I, 1);
2740     Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
2741                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
2742     Value *V1 = I.getOperand(0);
2743     Value *V2 = I.getOperand(1);
2744     Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2745                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
2746     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
2747     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2748     setOriginForNaryOp(I);
2749   }
2750 
2751   // Get an X86_MMX-sized vector type.
2752   Type *getMMXVectorTy(unsigned EltSizeInBits) {
2753     const unsigned X86_MMXSizeInBits = 64;
2754     assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
2755            "Illegal MMX vector element size");
2756     return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
2757                                 X86_MMXSizeInBits / EltSizeInBits);
2758   }
2759 
2760   // Returns a signed counterpart for an (un)signed-saturate-and-pack
2761   // intrinsic.
2762   Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
2763     switch (id) {
2764       case Intrinsic::x86_sse2_packsswb_128:
2765       case Intrinsic::x86_sse2_packuswb_128:
2766         return Intrinsic::x86_sse2_packsswb_128;
2767 
2768       case Intrinsic::x86_sse2_packssdw_128:
2769       case Intrinsic::x86_sse41_packusdw:
2770         return Intrinsic::x86_sse2_packssdw_128;
2771 
2772       case Intrinsic::x86_avx2_packsswb:
2773       case Intrinsic::x86_avx2_packuswb:
2774         return Intrinsic::x86_avx2_packsswb;
2775 
2776       case Intrinsic::x86_avx2_packssdw:
2777       case Intrinsic::x86_avx2_packusdw:
2778         return Intrinsic::x86_avx2_packssdw;
2779 
2780       case Intrinsic::x86_mmx_packsswb:
2781       case Intrinsic::x86_mmx_packuswb:
2782         return Intrinsic::x86_mmx_packsswb;
2783 
2784       case Intrinsic::x86_mmx_packssdw:
2785         return Intrinsic::x86_mmx_packssdw;
2786       default:
2787         llvm_unreachable("unexpected intrinsic id");
2788     }
2789   }
2790 
2791   // Instrument vector pack intrinsic.
2792   //
2793   // This function instruments intrinsics like x86_mmx_packsswb, that
2794   // packs elements of 2 input vectors into half as many bits with saturation.
2795   // Shadow is propagated with the signed variant of the same intrinsic applied
2796   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
2797   // EltSizeInBits is used only for x86mmx arguments.
2798   void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
2799     assert(I.getNumArgOperands() == 2);
2800     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2801     IRBuilder<> IRB(&I);
2802     Value *S1 = getShadow(&I, 0);
2803     Value *S2 = getShadow(&I, 1);
2804     assert(isX86_MMX || S1->getType()->isVectorTy());
2805 
2806     // SExt and ICmpNE below must apply to individual elements of input vectors.
2807     // In case of x86mmx arguments, cast them to appropriate vector types and
2808     // back.
2809     Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
2810     if (isX86_MMX) {
2811       S1 = IRB.CreateBitCast(S1, T);
2812       S2 = IRB.CreateBitCast(S2, T);
2813     }
2814     Value *S1_ext = IRB.CreateSExt(
2815         IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
2816     Value *S2_ext = IRB.CreateSExt(
2817         IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
2818     if (isX86_MMX) {
2819       Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
2820       S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
2821       S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
2822     }
2823 
2824     Function *ShadowFn = Intrinsic::getDeclaration(
2825         F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
2826 
2827     Value *S =
2828         IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
2829     if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
2830     setShadow(&I, S);
2831     setOriginForNaryOp(I);
2832   }
2833 
2834   // Instrument sum-of-absolute-differences intrinsic.
2835   void handleVectorSadIntrinsic(IntrinsicInst &I) {
2836     const unsigned SignificantBitsPerResultElement = 16;
2837     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2838     Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
2839     unsigned ZeroBitsPerResultElement =
2840         ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
2841 
2842     IRBuilder<> IRB(&I);
2843     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2844     S = IRB.CreateBitCast(S, ResTy);
2845     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2846                        ResTy);
2847     S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
2848     S = IRB.CreateBitCast(S, getShadowTy(&I));
2849     setShadow(&I, S);
2850     setOriginForNaryOp(I);
2851   }
2852 
2853   // Instrument multiply-add intrinsic.
2854   void handleVectorPmaddIntrinsic(IntrinsicInst &I,
2855                                   unsigned EltSizeInBits = 0) {
2856     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2857     Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
2858     IRBuilder<> IRB(&I);
2859     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2860     S = IRB.CreateBitCast(S, ResTy);
2861     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2862                        ResTy);
2863     S = IRB.CreateBitCast(S, getShadowTy(&I));
2864     setShadow(&I, S);
2865     setOriginForNaryOp(I);
2866   }
2867 
2868   // Instrument compare-packed intrinsic.
2869   // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
2870   // all-ones shadow.
2871   void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
2872     IRBuilder<> IRB(&I);
2873     Type *ResTy = getShadowTy(&I);
2874     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2875     Value *S = IRB.CreateSExt(
2876         IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
2877     setShadow(&I, S);
2878     setOriginForNaryOp(I);
2879   }
2880 
2881   // Instrument compare-scalar intrinsic.
2882   // This handles both cmp* intrinsics which return the result in the first
2883   // element of a vector, and comi* which return the result as i32.
2884   void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
2885     IRBuilder<> IRB(&I);
2886     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2887     Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
2888     setShadow(&I, S);
2889     setOriginForNaryOp(I);
2890   }
2891 
2892   // Instrument generic vector reduction intrinsics
2893   // by ORing together all their fields.
2894   void handleVectorReduceIntrinsic(IntrinsicInst &I) {
2895     IRBuilder<> IRB(&I);
2896     Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
2897     setShadow(&I, S);
2898     setOrigin(&I, getOrigin(&I, 0));
2899   }
2900 
2901   // Instrument experimental.vector.reduce.or intrinsic.
2902   // Valid (non-poisoned) set bits in the operand pull low the
2903   // corresponding shadow bits.
2904   void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
2905     IRBuilder<> IRB(&I);
2906     Value *OperandShadow = getShadow(&I, 0);
2907     Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0));
2908     Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow);
2909     // Bit N is clean if any field's bit N is 1 and unpoison
2910     Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison);
2911     // Otherwise, it is clean if every field's bit N is unpoison
2912     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
2913     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
2914 
2915     setShadow(&I, S);
2916     setOrigin(&I, getOrigin(&I, 0));
2917   }
2918 
2919   // Instrument experimental.vector.reduce.or intrinsic.
2920   // Valid (non-poisoned) unset bits in the operand pull down the
2921   // corresponding shadow bits.
2922   void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
2923     IRBuilder<> IRB(&I);
2924     Value *OperandShadow = getShadow(&I, 0);
2925     Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow);
2926     // Bit N is clean if any field's bit N is 0 and unpoison
2927     Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison);
2928     // Otherwise, it is clean if every field's bit N is unpoison
2929     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
2930     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
2931 
2932     setShadow(&I, S);
2933     setOrigin(&I, getOrigin(&I, 0));
2934   }
2935 
2936   void handleStmxcsr(IntrinsicInst &I) {
2937     IRBuilder<> IRB(&I);
2938     Value* Addr = I.getArgOperand(0);
2939     Type *Ty = IRB.getInt32Ty();
2940     Value *ShadowPtr =
2941         getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first;
2942 
2943     IRB.CreateStore(getCleanShadow(Ty),
2944                     IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
2945 
2946     if (ClCheckAccessAddress)
2947       insertShadowCheck(Addr, &I);
2948   }
2949 
2950   void handleLdmxcsr(IntrinsicInst &I) {
2951     if (!InsertChecks) return;
2952 
2953     IRBuilder<> IRB(&I);
2954     Value *Addr = I.getArgOperand(0);
2955     Type *Ty = IRB.getInt32Ty();
2956     const Align Alignment = Align(1);
2957     Value *ShadowPtr, *OriginPtr;
2958     std::tie(ShadowPtr, OriginPtr) =
2959         getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
2960 
2961     if (ClCheckAccessAddress)
2962       insertShadowCheck(Addr, &I);
2963 
2964     Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
2965     Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
2966                                     : getCleanOrigin();
2967     insertShadowCheck(Shadow, Origin, &I);
2968   }
2969 
2970   void handleMaskedStore(IntrinsicInst &I) {
2971     IRBuilder<> IRB(&I);
2972     Value *V = I.getArgOperand(0);
2973     Value *Addr = I.getArgOperand(1);
2974     const Align Alignment(
2975         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
2976     Value *Mask = I.getArgOperand(3);
2977     Value *Shadow = getShadow(V);
2978 
2979     Value *ShadowPtr;
2980     Value *OriginPtr;
2981     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
2982         Addr, IRB, Shadow->getType(), Alignment, /*isStore*/ true);
2983 
2984     if (ClCheckAccessAddress) {
2985       insertShadowCheck(Addr, &I);
2986       // Uninitialized mask is kind of like uninitialized address, but not as
2987       // scary.
2988       insertShadowCheck(Mask, &I);
2989     }
2990 
2991     IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment, Mask);
2992 
2993     if (MS.TrackOrigins) {
2994       auto &DL = F.getParent()->getDataLayout();
2995       paintOrigin(IRB, getOrigin(V), OriginPtr,
2996                   DL.getTypeStoreSize(Shadow->getType()),
2997                   std::max(Alignment, kMinOriginAlignment));
2998     }
2999   }
3000 
3001   bool handleMaskedLoad(IntrinsicInst &I) {
3002     IRBuilder<> IRB(&I);
3003     Value *Addr = I.getArgOperand(0);
3004     const Align Alignment(
3005         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3006     Value *Mask = I.getArgOperand(2);
3007     Value *PassThru = I.getArgOperand(3);
3008 
3009     Type *ShadowTy = getShadowTy(&I);
3010     Value *ShadowPtr, *OriginPtr;
3011     if (PropagateShadow) {
3012       std::tie(ShadowPtr, OriginPtr) =
3013           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
3014       setShadow(&I, IRB.CreateMaskedLoad(ShadowPtr, Alignment, Mask,
3015                                          getShadow(PassThru), "_msmaskedld"));
3016     } else {
3017       setShadow(&I, getCleanShadow(&I));
3018     }
3019 
3020     if (ClCheckAccessAddress) {
3021       insertShadowCheck(Addr, &I);
3022       insertShadowCheck(Mask, &I);
3023     }
3024 
3025     if (MS.TrackOrigins) {
3026       if (PropagateShadow) {
3027         // Choose between PassThru's and the loaded value's origins.
3028         Value *MaskedPassThruShadow = IRB.CreateAnd(
3029             getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
3030 
3031         Value *Acc = IRB.CreateExtractElement(
3032             MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
3033         for (int i = 1, N = cast<FixedVectorType>(PassThru->getType())
3034                                 ->getNumElements();
3035              i < N; ++i) {
3036           Value *More = IRB.CreateExtractElement(
3037               MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), i));
3038           Acc = IRB.CreateOr(Acc, More);
3039         }
3040 
3041         Value *Origin = IRB.CreateSelect(
3042             IRB.CreateICmpNE(Acc, Constant::getNullValue(Acc->getType())),
3043             getOrigin(PassThru), IRB.CreateLoad(MS.OriginTy, OriginPtr));
3044 
3045         setOrigin(&I, Origin);
3046       } else {
3047         setOrigin(&I, getCleanOrigin());
3048       }
3049     }
3050     return true;
3051   }
3052 
3053   // Instrument BMI / BMI2 intrinsics.
3054   // All of these intrinsics are Z = I(X, Y)
3055   // where the types of all operands and the result match, and are either i32 or i64.
3056   // The following instrumentation happens to work for all of them:
3057   //   Sz = I(Sx, Y) | (sext (Sy != 0))
3058   void handleBmiIntrinsic(IntrinsicInst &I) {
3059     IRBuilder<> IRB(&I);
3060     Type *ShadowTy = getShadowTy(&I);
3061 
3062     // If any bit of the mask operand is poisoned, then the whole thing is.
3063     Value *SMask = getShadow(&I, 1);
3064     SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
3065                            ShadowTy);
3066     // Apply the same intrinsic to the shadow of the first operand.
3067     Value *S = IRB.CreateCall(I.getCalledFunction(),
3068                               {getShadow(&I, 0), I.getOperand(1)});
3069     S = IRB.CreateOr(SMask, S);
3070     setShadow(&I, S);
3071     setOriginForNaryOp(I);
3072   }
3073 
3074   SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
3075     SmallVector<int, 8> Mask;
3076     for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
3077       Mask.append(2, X);
3078     }
3079     return Mask;
3080   }
3081 
3082   // Instrument pclmul intrinsics.
3083   // These intrinsics operate either on odd or on even elements of the input
3084   // vectors, depending on the constant in the 3rd argument, ignoring the rest.
3085   // Replace the unused elements with copies of the used ones, ex:
3086   //   (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
3087   // or
3088   //   (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
3089   // and then apply the usual shadow combining logic.
3090   void handlePclmulIntrinsic(IntrinsicInst &I) {
3091     IRBuilder<> IRB(&I);
3092     Type *ShadowTy = getShadowTy(&I);
3093     unsigned Width =
3094         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3095     assert(isa<ConstantInt>(I.getArgOperand(2)) &&
3096            "pclmul 3rd operand must be a constant");
3097     unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3098     Value *Shuf0 =
3099         IRB.CreateShuffleVector(getShadow(&I, 0), UndefValue::get(ShadowTy),
3100                                 getPclmulMask(Width, Imm & 0x01));
3101     Value *Shuf1 =
3102         IRB.CreateShuffleVector(getShadow(&I, 1), UndefValue::get(ShadowTy),
3103                                 getPclmulMask(Width, Imm & 0x10));
3104     ShadowAndOriginCombiner SOC(this, IRB);
3105     SOC.Add(Shuf0, getOrigin(&I, 0));
3106     SOC.Add(Shuf1, getOrigin(&I, 1));
3107     SOC.Done(&I);
3108   }
3109 
3110   // Instrument _mm_*_sd intrinsics
3111   void handleUnarySdIntrinsic(IntrinsicInst &I) {
3112     IRBuilder<> IRB(&I);
3113     Value *First = getShadow(&I, 0);
3114     Value *Second = getShadow(&I, 1);
3115     // High word of first operand, low word of second
3116     Value *Shadow =
3117         IRB.CreateShuffleVector(First, Second, llvm::makeArrayRef<int>({2, 1}));
3118 
3119     setShadow(&I, Shadow);
3120     setOriginForNaryOp(I);
3121   }
3122 
3123   void handleBinarySdIntrinsic(IntrinsicInst &I) {
3124     IRBuilder<> IRB(&I);
3125     Value *First = getShadow(&I, 0);
3126     Value *Second = getShadow(&I, 1);
3127     Value *OrShadow = IRB.CreateOr(First, Second);
3128     // High word of first operand, low word of both OR'd together
3129     Value *Shadow = IRB.CreateShuffleVector(First, OrShadow,
3130                                             llvm::makeArrayRef<int>({2, 1}));
3131 
3132     setShadow(&I, Shadow);
3133     setOriginForNaryOp(I);
3134   }
3135 
3136   void visitIntrinsicInst(IntrinsicInst &I) {
3137     switch (I.getIntrinsicID()) {
3138     case Intrinsic::lifetime_start:
3139       handleLifetimeStart(I);
3140       break;
3141     case Intrinsic::launder_invariant_group:
3142     case Intrinsic::strip_invariant_group:
3143       handleInvariantGroup(I);
3144       break;
3145     case Intrinsic::bswap:
3146       handleBswap(I);
3147       break;
3148     case Intrinsic::masked_store:
3149       handleMaskedStore(I);
3150       break;
3151     case Intrinsic::masked_load:
3152       handleMaskedLoad(I);
3153       break;
3154     case Intrinsic::experimental_vector_reduce_and:
3155       handleVectorReduceAndIntrinsic(I);
3156       break;
3157     case Intrinsic::experimental_vector_reduce_or:
3158       handleVectorReduceOrIntrinsic(I);
3159       break;
3160     case Intrinsic::experimental_vector_reduce_add:
3161     case Intrinsic::experimental_vector_reduce_xor:
3162     case Intrinsic::experimental_vector_reduce_mul:
3163       handleVectorReduceIntrinsic(I);
3164       break;
3165     case Intrinsic::x86_sse_stmxcsr:
3166       handleStmxcsr(I);
3167       break;
3168     case Intrinsic::x86_sse_ldmxcsr:
3169       handleLdmxcsr(I);
3170       break;
3171     case Intrinsic::x86_avx512_vcvtsd2usi64:
3172     case Intrinsic::x86_avx512_vcvtsd2usi32:
3173     case Intrinsic::x86_avx512_vcvtss2usi64:
3174     case Intrinsic::x86_avx512_vcvtss2usi32:
3175     case Intrinsic::x86_avx512_cvttss2usi64:
3176     case Intrinsic::x86_avx512_cvttss2usi:
3177     case Intrinsic::x86_avx512_cvttsd2usi64:
3178     case Intrinsic::x86_avx512_cvttsd2usi:
3179     case Intrinsic::x86_avx512_cvtusi2ss:
3180     case Intrinsic::x86_avx512_cvtusi642sd:
3181     case Intrinsic::x86_avx512_cvtusi642ss:
3182     case Intrinsic::x86_sse2_cvtsd2si64:
3183     case Intrinsic::x86_sse2_cvtsd2si:
3184     case Intrinsic::x86_sse2_cvtsd2ss:
3185     case Intrinsic::x86_sse2_cvttsd2si64:
3186     case Intrinsic::x86_sse2_cvttsd2si:
3187     case Intrinsic::x86_sse_cvtss2si64:
3188     case Intrinsic::x86_sse_cvtss2si:
3189     case Intrinsic::x86_sse_cvttss2si64:
3190     case Intrinsic::x86_sse_cvttss2si:
3191       handleVectorConvertIntrinsic(I, 1);
3192       break;
3193     case Intrinsic::x86_sse_cvtps2pi:
3194     case Intrinsic::x86_sse_cvttps2pi:
3195       handleVectorConvertIntrinsic(I, 2);
3196       break;
3197 
3198     case Intrinsic::x86_avx512_psll_w_512:
3199     case Intrinsic::x86_avx512_psll_d_512:
3200     case Intrinsic::x86_avx512_psll_q_512:
3201     case Intrinsic::x86_avx512_pslli_w_512:
3202     case Intrinsic::x86_avx512_pslli_d_512:
3203     case Intrinsic::x86_avx512_pslli_q_512:
3204     case Intrinsic::x86_avx512_psrl_w_512:
3205     case Intrinsic::x86_avx512_psrl_d_512:
3206     case Intrinsic::x86_avx512_psrl_q_512:
3207     case Intrinsic::x86_avx512_psra_w_512:
3208     case Intrinsic::x86_avx512_psra_d_512:
3209     case Intrinsic::x86_avx512_psra_q_512:
3210     case Intrinsic::x86_avx512_psrli_w_512:
3211     case Intrinsic::x86_avx512_psrli_d_512:
3212     case Intrinsic::x86_avx512_psrli_q_512:
3213     case Intrinsic::x86_avx512_psrai_w_512:
3214     case Intrinsic::x86_avx512_psrai_d_512:
3215     case Intrinsic::x86_avx512_psrai_q_512:
3216     case Intrinsic::x86_avx512_psra_q_256:
3217     case Intrinsic::x86_avx512_psra_q_128:
3218     case Intrinsic::x86_avx512_psrai_q_256:
3219     case Intrinsic::x86_avx512_psrai_q_128:
3220     case Intrinsic::x86_avx2_psll_w:
3221     case Intrinsic::x86_avx2_psll_d:
3222     case Intrinsic::x86_avx2_psll_q:
3223     case Intrinsic::x86_avx2_pslli_w:
3224     case Intrinsic::x86_avx2_pslli_d:
3225     case Intrinsic::x86_avx2_pslli_q:
3226     case Intrinsic::x86_avx2_psrl_w:
3227     case Intrinsic::x86_avx2_psrl_d:
3228     case Intrinsic::x86_avx2_psrl_q:
3229     case Intrinsic::x86_avx2_psra_w:
3230     case Intrinsic::x86_avx2_psra_d:
3231     case Intrinsic::x86_avx2_psrli_w:
3232     case Intrinsic::x86_avx2_psrli_d:
3233     case Intrinsic::x86_avx2_psrli_q:
3234     case Intrinsic::x86_avx2_psrai_w:
3235     case Intrinsic::x86_avx2_psrai_d:
3236     case Intrinsic::x86_sse2_psll_w:
3237     case Intrinsic::x86_sse2_psll_d:
3238     case Intrinsic::x86_sse2_psll_q:
3239     case Intrinsic::x86_sse2_pslli_w:
3240     case Intrinsic::x86_sse2_pslli_d:
3241     case Intrinsic::x86_sse2_pslli_q:
3242     case Intrinsic::x86_sse2_psrl_w:
3243     case Intrinsic::x86_sse2_psrl_d:
3244     case Intrinsic::x86_sse2_psrl_q:
3245     case Intrinsic::x86_sse2_psra_w:
3246     case Intrinsic::x86_sse2_psra_d:
3247     case Intrinsic::x86_sse2_psrli_w:
3248     case Intrinsic::x86_sse2_psrli_d:
3249     case Intrinsic::x86_sse2_psrli_q:
3250     case Intrinsic::x86_sse2_psrai_w:
3251     case Intrinsic::x86_sse2_psrai_d:
3252     case Intrinsic::x86_mmx_psll_w:
3253     case Intrinsic::x86_mmx_psll_d:
3254     case Intrinsic::x86_mmx_psll_q:
3255     case Intrinsic::x86_mmx_pslli_w:
3256     case Intrinsic::x86_mmx_pslli_d:
3257     case Intrinsic::x86_mmx_pslli_q:
3258     case Intrinsic::x86_mmx_psrl_w:
3259     case Intrinsic::x86_mmx_psrl_d:
3260     case Intrinsic::x86_mmx_psrl_q:
3261     case Intrinsic::x86_mmx_psra_w:
3262     case Intrinsic::x86_mmx_psra_d:
3263     case Intrinsic::x86_mmx_psrli_w:
3264     case Intrinsic::x86_mmx_psrli_d:
3265     case Intrinsic::x86_mmx_psrli_q:
3266     case Intrinsic::x86_mmx_psrai_w:
3267     case Intrinsic::x86_mmx_psrai_d:
3268       handleVectorShiftIntrinsic(I, /* Variable */ false);
3269       break;
3270     case Intrinsic::x86_avx2_psllv_d:
3271     case Intrinsic::x86_avx2_psllv_d_256:
3272     case Intrinsic::x86_avx512_psllv_d_512:
3273     case Intrinsic::x86_avx2_psllv_q:
3274     case Intrinsic::x86_avx2_psllv_q_256:
3275     case Intrinsic::x86_avx512_psllv_q_512:
3276     case Intrinsic::x86_avx2_psrlv_d:
3277     case Intrinsic::x86_avx2_psrlv_d_256:
3278     case Intrinsic::x86_avx512_psrlv_d_512:
3279     case Intrinsic::x86_avx2_psrlv_q:
3280     case Intrinsic::x86_avx2_psrlv_q_256:
3281     case Intrinsic::x86_avx512_psrlv_q_512:
3282     case Intrinsic::x86_avx2_psrav_d:
3283     case Intrinsic::x86_avx2_psrav_d_256:
3284     case Intrinsic::x86_avx512_psrav_d_512:
3285     case Intrinsic::x86_avx512_psrav_q_128:
3286     case Intrinsic::x86_avx512_psrav_q_256:
3287     case Intrinsic::x86_avx512_psrav_q_512:
3288       handleVectorShiftIntrinsic(I, /* Variable */ true);
3289       break;
3290 
3291     case Intrinsic::x86_sse2_packsswb_128:
3292     case Intrinsic::x86_sse2_packssdw_128:
3293     case Intrinsic::x86_sse2_packuswb_128:
3294     case Intrinsic::x86_sse41_packusdw:
3295     case Intrinsic::x86_avx2_packsswb:
3296     case Intrinsic::x86_avx2_packssdw:
3297     case Intrinsic::x86_avx2_packuswb:
3298     case Intrinsic::x86_avx2_packusdw:
3299       handleVectorPackIntrinsic(I);
3300       break;
3301 
3302     case Intrinsic::x86_mmx_packsswb:
3303     case Intrinsic::x86_mmx_packuswb:
3304       handleVectorPackIntrinsic(I, 16);
3305       break;
3306 
3307     case Intrinsic::x86_mmx_packssdw:
3308       handleVectorPackIntrinsic(I, 32);
3309       break;
3310 
3311     case Intrinsic::x86_mmx_psad_bw:
3312     case Intrinsic::x86_sse2_psad_bw:
3313     case Intrinsic::x86_avx2_psad_bw:
3314       handleVectorSadIntrinsic(I);
3315       break;
3316 
3317     case Intrinsic::x86_sse2_pmadd_wd:
3318     case Intrinsic::x86_avx2_pmadd_wd:
3319     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3320     case Intrinsic::x86_avx2_pmadd_ub_sw:
3321       handleVectorPmaddIntrinsic(I);
3322       break;
3323 
3324     case Intrinsic::x86_ssse3_pmadd_ub_sw:
3325       handleVectorPmaddIntrinsic(I, 8);
3326       break;
3327 
3328     case Intrinsic::x86_mmx_pmadd_wd:
3329       handleVectorPmaddIntrinsic(I, 16);
3330       break;
3331 
3332     case Intrinsic::x86_sse_cmp_ss:
3333     case Intrinsic::x86_sse2_cmp_sd:
3334     case Intrinsic::x86_sse_comieq_ss:
3335     case Intrinsic::x86_sse_comilt_ss:
3336     case Intrinsic::x86_sse_comile_ss:
3337     case Intrinsic::x86_sse_comigt_ss:
3338     case Intrinsic::x86_sse_comige_ss:
3339     case Intrinsic::x86_sse_comineq_ss:
3340     case Intrinsic::x86_sse_ucomieq_ss:
3341     case Intrinsic::x86_sse_ucomilt_ss:
3342     case Intrinsic::x86_sse_ucomile_ss:
3343     case Intrinsic::x86_sse_ucomigt_ss:
3344     case Intrinsic::x86_sse_ucomige_ss:
3345     case Intrinsic::x86_sse_ucomineq_ss:
3346     case Intrinsic::x86_sse2_comieq_sd:
3347     case Intrinsic::x86_sse2_comilt_sd:
3348     case Intrinsic::x86_sse2_comile_sd:
3349     case Intrinsic::x86_sse2_comigt_sd:
3350     case Intrinsic::x86_sse2_comige_sd:
3351     case Intrinsic::x86_sse2_comineq_sd:
3352     case Intrinsic::x86_sse2_ucomieq_sd:
3353     case Intrinsic::x86_sse2_ucomilt_sd:
3354     case Intrinsic::x86_sse2_ucomile_sd:
3355     case Intrinsic::x86_sse2_ucomigt_sd:
3356     case Intrinsic::x86_sse2_ucomige_sd:
3357     case Intrinsic::x86_sse2_ucomineq_sd:
3358       handleVectorCompareScalarIntrinsic(I);
3359       break;
3360 
3361     case Intrinsic::x86_sse_cmp_ps:
3362     case Intrinsic::x86_sse2_cmp_pd:
3363       // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function
3364       // generates reasonably looking IR that fails in the backend with "Do not
3365       // know how to split the result of this operator!".
3366       handleVectorComparePackedIntrinsic(I);
3367       break;
3368 
3369     case Intrinsic::x86_bmi_bextr_32:
3370     case Intrinsic::x86_bmi_bextr_64:
3371     case Intrinsic::x86_bmi_bzhi_32:
3372     case Intrinsic::x86_bmi_bzhi_64:
3373     case Intrinsic::x86_bmi_pdep_32:
3374     case Intrinsic::x86_bmi_pdep_64:
3375     case Intrinsic::x86_bmi_pext_32:
3376     case Intrinsic::x86_bmi_pext_64:
3377       handleBmiIntrinsic(I);
3378       break;
3379 
3380     case Intrinsic::x86_pclmulqdq:
3381     case Intrinsic::x86_pclmulqdq_256:
3382     case Intrinsic::x86_pclmulqdq_512:
3383       handlePclmulIntrinsic(I);
3384       break;
3385 
3386     case Intrinsic::x86_sse41_round_sd:
3387       handleUnarySdIntrinsic(I);
3388       break;
3389     case Intrinsic::x86_sse2_max_sd:
3390     case Intrinsic::x86_sse2_min_sd:
3391       handleBinarySdIntrinsic(I);
3392       break;
3393 
3394     case Intrinsic::is_constant:
3395       // The result of llvm.is.constant() is always defined.
3396       setShadow(&I, getCleanShadow(&I));
3397       setOrigin(&I, getCleanOrigin());
3398       break;
3399 
3400     default:
3401       if (!handleUnknownIntrinsic(I))
3402         visitInstruction(I);
3403       break;
3404     }
3405   }
3406 
3407   void visitCallBase(CallBase &CB) {
3408     assert(!CB.getMetadata("nosanitize"));
3409     if (CB.isInlineAsm()) {
3410       // For inline asm (either a call to asm function, or callbr instruction),
3411       // do the usual thing: check argument shadow and mark all outputs as
3412       // clean. Note that any side effects of the inline asm that are not
3413       // immediately visible in its constraints are not handled.
3414       if (ClHandleAsmConservative && MS.CompileKernel)
3415         visitAsmInstruction(CB);
3416       else
3417         visitInstruction(CB);
3418       return;
3419     }
3420     if (auto *Call = dyn_cast<CallInst>(&CB)) {
3421       assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
3422 
3423       // We are going to insert code that relies on the fact that the callee
3424       // will become a non-readonly function after it is instrumented by us. To
3425       // prevent this code from being optimized out, mark that function
3426       // non-readonly in advance.
3427       if (Function *Func = Call->getCalledFunction()) {
3428         // Clear out readonly/readnone attributes.
3429         AttrBuilder B;
3430         B.addAttribute(Attribute::ReadOnly)
3431             .addAttribute(Attribute::ReadNone)
3432             .addAttribute(Attribute::WriteOnly)
3433             .addAttribute(Attribute::ArgMemOnly)
3434             .addAttribute(Attribute::Speculatable);
3435         Func->removeAttributes(AttributeList::FunctionIndex, B);
3436       }
3437 
3438       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
3439     }
3440     IRBuilder<> IRB(&CB);
3441 
3442     unsigned ArgOffset = 0;
3443     LLVM_DEBUG(dbgs() << "  CallSite: " << CB << "\n");
3444     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
3445          ++ArgIt) {
3446       Value *A = *ArgIt;
3447       unsigned i = ArgIt - CB.arg_begin();
3448       if (!A->getType()->isSized()) {
3449         LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n");
3450         continue;
3451       }
3452       unsigned Size = 0;
3453       Value *Store = nullptr;
3454       // Compute the Shadow for arg even if it is ByVal, because
3455       // in that case getShadow() will copy the actual arg shadow to
3456       // __msan_param_tls.
3457       Value *ArgShadow = getShadow(A);
3458       Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
3459       LLVM_DEBUG(dbgs() << "  Arg#" << i << ": " << *A
3460                         << " Shadow: " << *ArgShadow << "\n");
3461       bool ArgIsInitialized = false;
3462       const DataLayout &DL = F.getParent()->getDataLayout();
3463 
3464       bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
3465       bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
3466       bool EagerCheck = ClEagerChecks && !ByVal && NoUndef;
3467 
3468       if (EagerCheck) {
3469         insertShadowCheck(A, &CB);
3470         continue;
3471       }
3472       if (ByVal) {
3473         // ByVal requires some special handling as it's too big for a single
3474         // load
3475         assert(A->getType()->isPointerTy() &&
3476                "ByVal argument is not a pointer!");
3477         Size = DL.getTypeAllocSize(CB.getParamByValType(i));
3478         if (ArgOffset + Size > kParamTLSSize) break;
3479         const MaybeAlign ParamAlignment(CB.getParamAlign(i));
3480         MaybeAlign Alignment = llvm::None;
3481         if (ParamAlignment)
3482           Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
3483         Value *AShadowPtr =
3484             getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
3485                                /*isStore*/ false)
3486                 .first;
3487 
3488         Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
3489                                  Alignment, Size);
3490         // TODO(glider): need to copy origins.
3491       } else {
3492         // Any other parameters mean we need bit-grained tracking of uninit data
3493         Size = DL.getTypeAllocSize(A->getType());
3494         if (ArgOffset + Size > kParamTLSSize) break;
3495         Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
3496                                        kShadowTLSAlignment);
3497         Constant *Cst = dyn_cast<Constant>(ArgShadow);
3498         if (Cst && Cst->isNullValue()) ArgIsInitialized = true;
3499       }
3500       if (MS.TrackOrigins && !ArgIsInitialized)
3501         IRB.CreateStore(getOrigin(A),
3502                         getOriginPtrForArgument(A, IRB, ArgOffset));
3503       (void)Store;
3504       assert(Size != 0 && Store != nullptr);
3505       LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n");
3506       ArgOffset += alignTo(Size, 8);
3507     }
3508     LLVM_DEBUG(dbgs() << "  done with call args\n");
3509 
3510     FunctionType *FT = CB.getFunctionType();
3511     if (FT->isVarArg()) {
3512       VAHelper->visitCallBase(CB, IRB);
3513     }
3514 
3515     // Now, get the shadow for the RetVal.
3516     if (!CB.getType()->isSized())
3517       return;
3518     // Don't emit the epilogue for musttail call returns.
3519     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
3520       return;
3521 
3522     if (ClEagerChecks && CB.hasRetAttr(Attribute::NoUndef)) {
3523       setShadow(&CB, getCleanShadow(&CB));
3524       setOrigin(&CB, getCleanOrigin());
3525       return;
3526     }
3527 
3528     IRBuilder<> IRBBefore(&CB);
3529     // Until we have full dynamic coverage, make sure the retval shadow is 0.
3530     Value *Base = getShadowPtrForRetval(&CB, IRBBefore);
3531     IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base,
3532                                  kShadowTLSAlignment);
3533     BasicBlock::iterator NextInsn;
3534     if (isa<CallInst>(CB)) {
3535       NextInsn = ++CB.getIterator();
3536       assert(NextInsn != CB.getParent()->end());
3537     } else {
3538       BasicBlock *NormalDest = cast<InvokeInst>(CB).getNormalDest();
3539       if (!NormalDest->getSinglePredecessor()) {
3540         // FIXME: this case is tricky, so we are just conservative here.
3541         // Perhaps we need to split the edge between this BB and NormalDest,
3542         // but a naive attempt to use SplitEdge leads to a crash.
3543         setShadow(&CB, getCleanShadow(&CB));
3544         setOrigin(&CB, getCleanOrigin());
3545         return;
3546       }
3547       // FIXME: NextInsn is likely in a basic block that has not been visited yet.
3548       // Anything inserted there will be instrumented by MSan later!
3549       NextInsn = NormalDest->getFirstInsertionPt();
3550       assert(NextInsn != NormalDest->end() &&
3551              "Could not find insertion point for retval shadow load");
3552     }
3553     IRBuilder<> IRBAfter(&*NextInsn);
3554     Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
3555         getShadowTy(&CB), getShadowPtrForRetval(&CB, IRBAfter),
3556         kShadowTLSAlignment, "_msret");
3557     setShadow(&CB, RetvalShadow);
3558     if (MS.TrackOrigins)
3559       setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy,
3560                                          getOriginPtrForRetval(IRBAfter)));
3561   }
3562 
3563   bool isAMustTailRetVal(Value *RetVal) {
3564     if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
3565       RetVal = I->getOperand(0);
3566     }
3567     if (auto *I = dyn_cast<CallInst>(RetVal)) {
3568       return I->isMustTailCall();
3569     }
3570     return false;
3571   }
3572 
3573   void visitReturnInst(ReturnInst &I) {
3574     IRBuilder<> IRB(&I);
3575     Value *RetVal = I.getReturnValue();
3576     if (!RetVal) return;
3577     // Don't emit the epilogue for musttail call returns.
3578     if (isAMustTailRetVal(RetVal)) return;
3579     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
3580     bool HasNoUndef =
3581         F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef);
3582     bool StoreShadow = !(ClEagerChecks && HasNoUndef);
3583     // FIXME: Consider using SpecialCaseList to specify a list of functions that
3584     // must always return fully initialized values. For now, we hardcode "main".
3585     bool EagerCheck = (ClEagerChecks && HasNoUndef) || (F.getName() == "main");
3586 
3587     Value *Shadow = getShadow(RetVal);
3588     bool StoreOrigin = true;
3589     if (EagerCheck) {
3590       insertShadowCheck(RetVal, &I);
3591       Shadow = getCleanShadow(RetVal);
3592       StoreOrigin = false;
3593     }
3594 
3595     // The caller may still expect information passed over TLS if we pass our
3596     // check
3597     if (StoreShadow) {
3598       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
3599       if (MS.TrackOrigins && StoreOrigin)
3600         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
3601     }
3602   }
3603 
3604   void visitPHINode(PHINode &I) {
3605     IRBuilder<> IRB(&I);
3606     if (!PropagateShadow) {
3607       setShadow(&I, getCleanShadow(&I));
3608       setOrigin(&I, getCleanOrigin());
3609       return;
3610     }
3611 
3612     ShadowPHINodes.push_back(&I);
3613     setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
3614                                 "_msphi_s"));
3615     if (MS.TrackOrigins)
3616       setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
3617                                   "_msphi_o"));
3618   }
3619 
3620   Value *getLocalVarDescription(AllocaInst &I) {
3621     SmallString<2048> StackDescriptionStorage;
3622     raw_svector_ostream StackDescription(StackDescriptionStorage);
3623     // We create a string with a description of the stack allocation and
3624     // pass it into __msan_set_alloca_origin.
3625     // It will be printed by the run-time if stack-originated UMR is found.
3626     // The first 4 bytes of the string are set to '----' and will be replaced
3627     // by __msan_va_arg_overflow_size_tls at the first call.
3628     StackDescription << "----" << I.getName() << "@" << F.getName();
3629     return createPrivateNonConstGlobalForString(*F.getParent(),
3630                                                 StackDescription.str());
3631   }
3632 
3633   void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3634     if (PoisonStack && ClPoisonStackWithCall) {
3635       IRB.CreateCall(MS.MsanPoisonStackFn,
3636                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3637     } else {
3638       Value *ShadowBase, *OriginBase;
3639       std::tie(ShadowBase, OriginBase) = getShadowOriginPtr(
3640           &I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true);
3641 
3642       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
3643       IRB.CreateMemSet(ShadowBase, PoisonValue, Len,
3644                        MaybeAlign(I.getAlignment()));
3645     }
3646 
3647     if (PoisonStack && MS.TrackOrigins) {
3648       Value *Descr = getLocalVarDescription(I);
3649       IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
3650                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3651                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
3652                       IRB.CreatePointerCast(&F, MS.IntptrTy)});
3653     }
3654   }
3655 
3656   void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3657     Value *Descr = getLocalVarDescription(I);
3658     if (PoisonStack) {
3659       IRB.CreateCall(MS.MsanPoisonAllocaFn,
3660                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3661                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
3662     } else {
3663       IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
3664                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3665     }
3666   }
3667 
3668   void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
3669     if (!InsPoint)
3670       InsPoint = &I;
3671     IRBuilder<> IRB(InsPoint->getNextNode());
3672     const DataLayout &DL = F.getParent()->getDataLayout();
3673     uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
3674     Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
3675     if (I.isArrayAllocation())
3676       Len = IRB.CreateMul(Len, I.getArraySize());
3677 
3678     if (MS.CompileKernel)
3679       poisonAllocaKmsan(I, IRB, Len);
3680     else
3681       poisonAllocaUserspace(I, IRB, Len);
3682   }
3683 
3684   void visitAllocaInst(AllocaInst &I) {
3685     setShadow(&I, getCleanShadow(&I));
3686     setOrigin(&I, getCleanOrigin());
3687     // We'll get to this alloca later unless it's poisoned at the corresponding
3688     // llvm.lifetime.start.
3689     AllocaSet.insert(&I);
3690   }
3691 
3692   void visitSelectInst(SelectInst& I) {
3693     IRBuilder<> IRB(&I);
3694     // a = select b, c, d
3695     Value *B = I.getCondition();
3696     Value *C = I.getTrueValue();
3697     Value *D = I.getFalseValue();
3698     Value *Sb = getShadow(B);
3699     Value *Sc = getShadow(C);
3700     Value *Sd = getShadow(D);
3701 
3702     // Result shadow if condition shadow is 0.
3703     Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
3704     Value *Sa1;
3705     if (I.getType()->isAggregateType()) {
3706       // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
3707       // an extra "select". This results in much more compact IR.
3708       // Sa = select Sb, poisoned, (select b, Sc, Sd)
3709       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
3710     } else {
3711       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
3712       // If Sb (condition is poisoned), look for bits in c and d that are equal
3713       // and both unpoisoned.
3714       // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
3715 
3716       // Cast arguments to shadow-compatible type.
3717       C = CreateAppToShadowCast(IRB, C);
3718       D = CreateAppToShadowCast(IRB, D);
3719 
3720       // Result shadow if condition shadow is 1.
3721       Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
3722     }
3723     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
3724     setShadow(&I, Sa);
3725     if (MS.TrackOrigins) {
3726       // Origins are always i32, so any vector conditions must be flattened.
3727       // FIXME: consider tracking vector origins for app vectors?
3728       if (B->getType()->isVectorTy()) {
3729         Type *FlatTy = getShadowTyNoVec(B->getType());
3730         B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy),
3731                                 ConstantInt::getNullValue(FlatTy));
3732         Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy),
3733                                       ConstantInt::getNullValue(FlatTy));
3734       }
3735       // a = select b, c, d
3736       // Oa = Sb ? Ob : (b ? Oc : Od)
3737       setOrigin(
3738           &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()),
3739                                IRB.CreateSelect(B, getOrigin(I.getTrueValue()),
3740                                                 getOrigin(I.getFalseValue()))));
3741     }
3742   }
3743 
3744   void visitLandingPadInst(LandingPadInst &I) {
3745     // Do nothing.
3746     // See https://github.com/google/sanitizers/issues/504
3747     setShadow(&I, getCleanShadow(&I));
3748     setOrigin(&I, getCleanOrigin());
3749   }
3750 
3751   void visitCatchSwitchInst(CatchSwitchInst &I) {
3752     setShadow(&I, getCleanShadow(&I));
3753     setOrigin(&I, getCleanOrigin());
3754   }
3755 
3756   void visitFuncletPadInst(FuncletPadInst &I) {
3757     setShadow(&I, getCleanShadow(&I));
3758     setOrigin(&I, getCleanOrigin());
3759   }
3760 
3761   void visitGetElementPtrInst(GetElementPtrInst &I) {
3762     handleShadowOr(I);
3763   }
3764 
3765   void visitExtractValueInst(ExtractValueInst &I) {
3766     IRBuilder<> IRB(&I);
3767     Value *Agg = I.getAggregateOperand();
3768     LLVM_DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
3769     Value *AggShadow = getShadow(Agg);
3770     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
3771     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
3772     LLVM_DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
3773     setShadow(&I, ResShadow);
3774     setOriginForNaryOp(I);
3775   }
3776 
3777   void visitInsertValueInst(InsertValueInst &I) {
3778     IRBuilder<> IRB(&I);
3779     LLVM_DEBUG(dbgs() << "InsertValue:  " << I << "\n");
3780     Value *AggShadow = getShadow(I.getAggregateOperand());
3781     Value *InsShadow = getShadow(I.getInsertedValueOperand());
3782     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
3783     LLVM_DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
3784     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
3785     LLVM_DEBUG(dbgs() << "   Res:        " << *Res << "\n");
3786     setShadow(&I, Res);
3787     setOriginForNaryOp(I);
3788   }
3789 
3790   void dumpInst(Instruction &I) {
3791     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3792       errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
3793     } else {
3794       errs() << "ZZZ " << I.getOpcodeName() << "\n";
3795     }
3796     errs() << "QQQ " << I << "\n";
3797   }
3798 
3799   void visitResumeInst(ResumeInst &I) {
3800     LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
3801     // Nothing to do here.
3802   }
3803 
3804   void visitCleanupReturnInst(CleanupReturnInst &CRI) {
3805     LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
3806     // Nothing to do here.
3807   }
3808 
3809   void visitCatchReturnInst(CatchReturnInst &CRI) {
3810     LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
3811     // Nothing to do here.
3812   }
3813 
3814   void instrumentAsmArgument(Value *Operand, Instruction &I, IRBuilder<> &IRB,
3815                              const DataLayout &DL, bool isOutput) {
3816     // For each assembly argument, we check its value for being initialized.
3817     // If the argument is a pointer, we assume it points to a single element
3818     // of the corresponding type (or to a 8-byte word, if the type is unsized).
3819     // Each such pointer is instrumented with a call to the runtime library.
3820     Type *OpType = Operand->getType();
3821     // Check the operand value itself.
3822     insertShadowCheck(Operand, &I);
3823     if (!OpType->isPointerTy() || !isOutput) {
3824       assert(!isOutput);
3825       return;
3826     }
3827     Type *ElType = OpType->getPointerElementType();
3828     if (!ElType->isSized())
3829       return;
3830     int Size = DL.getTypeStoreSize(ElType);
3831     Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
3832     Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
3833     IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
3834   }
3835 
3836   /// Get the number of output arguments returned by pointers.
3837   int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
3838     int NumRetOutputs = 0;
3839     int NumOutputs = 0;
3840     Type *RetTy = cast<Value>(CB)->getType();
3841     if (!RetTy->isVoidTy()) {
3842       // Register outputs are returned via the CallInst return value.
3843       auto *ST = dyn_cast<StructType>(RetTy);
3844       if (ST)
3845         NumRetOutputs = ST->getNumElements();
3846       else
3847         NumRetOutputs = 1;
3848     }
3849     InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
3850     for (size_t i = 0, n = Constraints.size(); i < n; i++) {
3851       InlineAsm::ConstraintInfo Info = Constraints[i];
3852       switch (Info.Type) {
3853       case InlineAsm::isOutput:
3854         NumOutputs++;
3855         break;
3856       default:
3857         break;
3858       }
3859     }
3860     return NumOutputs - NumRetOutputs;
3861   }
3862 
3863   void visitAsmInstruction(Instruction &I) {
3864     // Conservative inline assembly handling: check for poisoned shadow of
3865     // asm() arguments, then unpoison the result and all the memory locations
3866     // pointed to by those arguments.
3867     // An inline asm() statement in C++ contains lists of input and output
3868     // arguments used by the assembly code. These are mapped to operands of the
3869     // CallInst as follows:
3870     //  - nR register outputs ("=r) are returned by value in a single structure
3871     //  (SSA value of the CallInst);
3872     //  - nO other outputs ("=m" and others) are returned by pointer as first
3873     // nO operands of the CallInst;
3874     //  - nI inputs ("r", "m" and others) are passed to CallInst as the
3875     // remaining nI operands.
3876     // The total number of asm() arguments in the source is nR+nO+nI, and the
3877     // corresponding CallInst has nO+nI+1 operands (the last operand is the
3878     // function to be called).
3879     const DataLayout &DL = F.getParent()->getDataLayout();
3880     CallBase *CB = cast<CallBase>(&I);
3881     IRBuilder<> IRB(&I);
3882     InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
3883     int OutputArgs = getNumOutputArgs(IA, CB);
3884     // The last operand of a CallInst is the function itself.
3885     int NumOperands = CB->getNumOperands() - 1;
3886 
3887     // Check input arguments. Doing so before unpoisoning output arguments, so
3888     // that we won't overwrite uninit values before checking them.
3889     for (int i = OutputArgs; i < NumOperands; i++) {
3890       Value *Operand = CB->getOperand(i);
3891       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
3892     }
3893     // Unpoison output arguments. This must happen before the actual InlineAsm
3894     // call, so that the shadow for memory published in the asm() statement
3895     // remains valid.
3896     for (int i = 0; i < OutputArgs; i++) {
3897       Value *Operand = CB->getOperand(i);
3898       instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
3899     }
3900 
3901     setShadow(&I, getCleanShadow(&I));
3902     setOrigin(&I, getCleanOrigin());
3903   }
3904 
3905   void visitInstruction(Instruction &I) {
3906     // Everything else: stop propagating and check for poisoned shadow.
3907     if (ClDumpStrictInstructions)
3908       dumpInst(I);
3909     LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
3910     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
3911       Value *Operand = I.getOperand(i);
3912       if (Operand->getType()->isSized())
3913         insertShadowCheck(Operand, &I);
3914     }
3915     setShadow(&I, getCleanShadow(&I));
3916     setOrigin(&I, getCleanOrigin());
3917   }
3918 };
3919 
3920 /// AMD64-specific implementation of VarArgHelper.
3921 struct VarArgAMD64Helper : public VarArgHelper {
3922   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
3923   // See a comment in visitCallBase for more details.
3924   static const unsigned AMD64GpEndOffset = 48;  // AMD64 ABI Draft 0.99.6 p3.5.7
3925   static const unsigned AMD64FpEndOffsetSSE = 176;
3926   // If SSE is disabled, fp_offset in va_list is zero.
3927   static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
3928 
3929   unsigned AMD64FpEndOffset;
3930   Function &F;
3931   MemorySanitizer &MS;
3932   MemorySanitizerVisitor &MSV;
3933   Value *VAArgTLSCopy = nullptr;
3934   Value *VAArgTLSOriginCopy = nullptr;
3935   Value *VAArgOverflowSize = nullptr;
3936 
3937   SmallVector<CallInst*, 16> VAStartInstrumentationList;
3938 
3939   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
3940 
3941   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
3942                     MemorySanitizerVisitor &MSV)
3943       : F(F), MS(MS), MSV(MSV) {
3944     AMD64FpEndOffset = AMD64FpEndOffsetSSE;
3945     for (const auto &Attr : F.getAttributes().getFnAttributes()) {
3946       if (Attr.isStringAttribute() &&
3947           (Attr.getKindAsString() == "target-features")) {
3948         if (Attr.getValueAsString().contains("-sse"))
3949           AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
3950         break;
3951       }
3952     }
3953   }
3954 
3955   ArgKind classifyArgument(Value* arg) {
3956     // A very rough approximation of X86_64 argument classification rules.
3957     Type *T = arg->getType();
3958     if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
3959       return AK_FloatingPoint;
3960     if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
3961       return AK_GeneralPurpose;
3962     if (T->isPointerTy())
3963       return AK_GeneralPurpose;
3964     return AK_Memory;
3965   }
3966 
3967   // For VarArg functions, store the argument shadow in an ABI-specific format
3968   // that corresponds to va_list layout.
3969   // We do this because Clang lowers va_arg in the frontend, and this pass
3970   // only sees the low level code that deals with va_list internals.
3971   // A much easier alternative (provided that Clang emits va_arg instructions)
3972   // would have been to associate each live instance of va_list with a copy of
3973   // MSanParamTLS, and extract shadow on va_arg() call in the argument list
3974   // order.
3975   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
3976     unsigned GpOffset = 0;
3977     unsigned FpOffset = AMD64GpEndOffset;
3978     unsigned OverflowOffset = AMD64FpEndOffset;
3979     const DataLayout &DL = F.getParent()->getDataLayout();
3980     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
3981          ++ArgIt) {
3982       Value *A = *ArgIt;
3983       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
3984       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
3985       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
3986       if (IsByVal) {
3987         // ByVal arguments always go to the overflow area.
3988         // Fixed arguments passed through the overflow area will be stepped
3989         // over by va_start, so don't count them towards the offset.
3990         if (IsFixed)
3991           continue;
3992         assert(A->getType()->isPointerTy());
3993         Type *RealTy = CB.getParamByValType(ArgNo);
3994         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
3995         Value *ShadowBase = getShadowPtrForVAArgument(
3996             RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
3997         Value *OriginBase = nullptr;
3998         if (MS.TrackOrigins)
3999           OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
4000         OverflowOffset += alignTo(ArgSize, 8);
4001         if (!ShadowBase)
4002           continue;
4003         Value *ShadowPtr, *OriginPtr;
4004         std::tie(ShadowPtr, OriginPtr) =
4005             MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
4006                                    /*isStore*/ false);
4007 
4008         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
4009                          kShadowTLSAlignment, ArgSize);
4010         if (MS.TrackOrigins)
4011           IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
4012                            kShadowTLSAlignment, ArgSize);
4013       } else {
4014         ArgKind AK = classifyArgument(A);
4015         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
4016           AK = AK_Memory;
4017         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
4018           AK = AK_Memory;
4019         Value *ShadowBase, *OriginBase = nullptr;
4020         switch (AK) {
4021           case AK_GeneralPurpose:
4022             ShadowBase =
4023                 getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
4024             if (MS.TrackOrigins)
4025               OriginBase =
4026                   getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
4027             GpOffset += 8;
4028             break;
4029           case AK_FloatingPoint:
4030             ShadowBase =
4031                 getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
4032             if (MS.TrackOrigins)
4033               OriginBase =
4034                   getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
4035             FpOffset += 16;
4036             break;
4037           case AK_Memory:
4038             if (IsFixed)
4039               continue;
4040             uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4041             ShadowBase =
4042                 getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
4043             if (MS.TrackOrigins)
4044               OriginBase =
4045                   getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
4046             OverflowOffset += alignTo(ArgSize, 8);
4047         }
4048         // Take fixed arguments into account for GpOffset and FpOffset,
4049         // but don't actually store shadows for them.
4050         // TODO(glider): don't call get*PtrForVAArgument() for them.
4051         if (IsFixed)
4052           continue;
4053         if (!ShadowBase)
4054           continue;
4055         Value *Shadow = MSV.getShadow(A);
4056         IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
4057         if (MS.TrackOrigins) {
4058           Value *Origin = MSV.getOrigin(A);
4059           unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
4060           MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
4061                           std::max(kShadowTLSAlignment, kMinOriginAlignment));
4062         }
4063       }
4064     }
4065     Constant *OverflowSize =
4066       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
4067     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4068   }
4069 
4070   /// Compute the shadow address for a given va_arg.
4071   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4072                                    unsigned ArgOffset, unsigned ArgSize) {
4073     // Make sure we don't overflow __msan_va_arg_tls.
4074     if (ArgOffset + ArgSize > kParamTLSSize)
4075       return nullptr;
4076     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4077     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4078     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4079                               "_msarg_va_s");
4080   }
4081 
4082   /// Compute the origin address for a given va_arg.
4083   Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
4084     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
4085     // getOriginPtrForVAArgument() is always called after
4086     // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
4087     // overflow.
4088     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4089     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
4090                               "_msarg_va_o");
4091   }
4092 
4093   void unpoisonVAListTagForInst(IntrinsicInst &I) {
4094     IRBuilder<> IRB(&I);
4095     Value *VAListTag = I.getArgOperand(0);
4096     Value *ShadowPtr, *OriginPtr;
4097     const Align Alignment = Align(8);
4098     std::tie(ShadowPtr, OriginPtr) =
4099         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
4100                                /*isStore*/ true);
4101 
4102     // Unpoison the whole __va_list_tag.
4103     // FIXME: magic ABI constants.
4104     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4105                      /* size */ 24, Alignment, false);
4106     // We shouldn't need to zero out the origins, as they're only checked for
4107     // nonzero shadow.
4108   }
4109 
4110   void visitVAStartInst(VAStartInst &I) override {
4111     if (F.getCallingConv() == CallingConv::Win64)
4112       return;
4113     VAStartInstrumentationList.push_back(&I);
4114     unpoisonVAListTagForInst(I);
4115   }
4116 
4117   void visitVACopyInst(VACopyInst &I) override {
4118     if (F.getCallingConv() == CallingConv::Win64) return;
4119     unpoisonVAListTagForInst(I);
4120   }
4121 
4122   void finalizeInstrumentation() override {
4123     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4124            "finalizeInstrumentation called twice");
4125     if (!VAStartInstrumentationList.empty()) {
4126       // If there is a va_start in this function, make a backup copy of
4127       // va_arg_tls somewhere in the function entry block.
4128       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4129       VAArgOverflowSize =
4130           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4131       Value *CopySize =
4132         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
4133                       VAArgOverflowSize);
4134       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4135       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4136       if (MS.TrackOrigins) {
4137         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4138         IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
4139                          Align(8), CopySize);
4140       }
4141     }
4142 
4143     // Instrument va_start.
4144     // Copy va_list shadow from the backup copy of the TLS contents.
4145     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4146       CallInst *OrigInst = VAStartInstrumentationList[i];
4147       IRBuilder<> IRB(OrigInst->getNextNode());
4148       Value *VAListTag = OrigInst->getArgOperand(0);
4149 
4150       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4151       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
4152           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4153                         ConstantInt::get(MS.IntptrTy, 16)),
4154           PointerType::get(RegSaveAreaPtrTy, 0));
4155       Value *RegSaveAreaPtr =
4156           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4157       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4158       const Align Alignment = Align(16);
4159       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4160           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4161                                  Alignment, /*isStore*/ true);
4162       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4163                        AMD64FpEndOffset);
4164       if (MS.TrackOrigins)
4165         IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
4166                          Alignment, AMD64FpEndOffset);
4167       Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4168       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
4169           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4170                         ConstantInt::get(MS.IntptrTy, 8)),
4171           PointerType::get(OverflowArgAreaPtrTy, 0));
4172       Value *OverflowArgAreaPtr =
4173           IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
4174       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
4175       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
4176           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
4177                                  Alignment, /*isStore*/ true);
4178       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
4179                                              AMD64FpEndOffset);
4180       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
4181                        VAArgOverflowSize);
4182       if (MS.TrackOrigins) {
4183         SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
4184                                         AMD64FpEndOffset);
4185         IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
4186                          VAArgOverflowSize);
4187       }
4188     }
4189   }
4190 };
4191 
4192 /// MIPS64-specific implementation of VarArgHelper.
4193 struct VarArgMIPS64Helper : public VarArgHelper {
4194   Function &F;
4195   MemorySanitizer &MS;
4196   MemorySanitizerVisitor &MSV;
4197   Value *VAArgTLSCopy = nullptr;
4198   Value *VAArgSize = nullptr;
4199 
4200   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4201 
4202   VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
4203                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4204 
4205   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4206     unsigned VAArgOffset = 0;
4207     const DataLayout &DL = F.getParent()->getDataLayout();
4208     for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(),
4209               End = CB.arg_end();
4210          ArgIt != End; ++ArgIt) {
4211       Triple TargetTriple(F.getParent()->getTargetTriple());
4212       Value *A = *ArgIt;
4213       Value *Base;
4214       uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4215       if (TargetTriple.getArch() == Triple::mips64) {
4216         // Adjusting the shadow for argument with size < 8 to match the placement
4217         // of bits in big endian system
4218         if (ArgSize < 8)
4219           VAArgOffset += (8 - ArgSize);
4220       }
4221       Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
4222       VAArgOffset += ArgSize;
4223       VAArgOffset = alignTo(VAArgOffset, 8);
4224       if (!Base)
4225         continue;
4226       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4227     }
4228 
4229     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
4230     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4231     // a new class member i.e. it is the total size of all VarArgs.
4232     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4233   }
4234 
4235   /// Compute the shadow address for a given va_arg.
4236   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4237                                    unsigned ArgOffset, unsigned ArgSize) {
4238     // Make sure we don't overflow __msan_va_arg_tls.
4239     if (ArgOffset + ArgSize > kParamTLSSize)
4240       return nullptr;
4241     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4242     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4243     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4244                               "_msarg");
4245   }
4246 
4247   void visitVAStartInst(VAStartInst &I) override {
4248     IRBuilder<> IRB(&I);
4249     VAStartInstrumentationList.push_back(&I);
4250     Value *VAListTag = I.getArgOperand(0);
4251     Value *ShadowPtr, *OriginPtr;
4252     const Align Alignment = Align(8);
4253     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4254         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4255     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4256                      /* size */ 8, Alignment, false);
4257   }
4258 
4259   void visitVACopyInst(VACopyInst &I) override {
4260     IRBuilder<> IRB(&I);
4261     VAStartInstrumentationList.push_back(&I);
4262     Value *VAListTag = I.getArgOperand(0);
4263     Value *ShadowPtr, *OriginPtr;
4264     const Align Alignment = Align(8);
4265     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4266         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4267     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4268                      /* size */ 8, Alignment, false);
4269   }
4270 
4271   void finalizeInstrumentation() override {
4272     assert(!VAArgSize && !VAArgTLSCopy &&
4273            "finalizeInstrumentation called twice");
4274     IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4275     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4276     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4277                                     VAArgSize);
4278 
4279     if (!VAStartInstrumentationList.empty()) {
4280       // If there is a va_start in this function, make a backup copy of
4281       // va_arg_tls somewhere in the function entry block.
4282       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4283       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4284     }
4285 
4286     // Instrument va_start.
4287     // Copy va_list shadow from the backup copy of the TLS contents.
4288     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4289       CallInst *OrigInst = VAStartInstrumentationList[i];
4290       IRBuilder<> IRB(OrigInst->getNextNode());
4291       Value *VAListTag = OrigInst->getArgOperand(0);
4292       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4293       Value *RegSaveAreaPtrPtr =
4294           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4295                              PointerType::get(RegSaveAreaPtrTy, 0));
4296       Value *RegSaveAreaPtr =
4297           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4298       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4299       const Align Alignment = Align(8);
4300       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4301           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4302                                  Alignment, /*isStore*/ true);
4303       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4304                        CopySize);
4305     }
4306   }
4307 };
4308 
4309 /// AArch64-specific implementation of VarArgHelper.
4310 struct VarArgAArch64Helper : public VarArgHelper {
4311   static const unsigned kAArch64GrArgSize = 64;
4312   static const unsigned kAArch64VrArgSize = 128;
4313 
4314   static const unsigned AArch64GrBegOffset = 0;
4315   static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
4316   // Make VR space aligned to 16 bytes.
4317   static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
4318   static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
4319                                              + kAArch64VrArgSize;
4320   static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
4321 
4322   Function &F;
4323   MemorySanitizer &MS;
4324   MemorySanitizerVisitor &MSV;
4325   Value *VAArgTLSCopy = nullptr;
4326   Value *VAArgOverflowSize = nullptr;
4327 
4328   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4329 
4330   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4331 
4332   VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
4333                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4334 
4335   ArgKind classifyArgument(Value* arg) {
4336     Type *T = arg->getType();
4337     if (T->isFPOrFPVectorTy())
4338       return AK_FloatingPoint;
4339     if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4340         || (T->isPointerTy()))
4341       return AK_GeneralPurpose;
4342     return AK_Memory;
4343   }
4344 
4345   // The instrumentation stores the argument shadow in a non ABI-specific
4346   // format because it does not know which argument is named (since Clang,
4347   // like x86_64 case, lowers the va_args in the frontend and this pass only
4348   // sees the low level code that deals with va_list internals).
4349   // The first seven GR registers are saved in the first 56 bytes of the
4350   // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
4351   // the remaining arguments.
4352   // Using constant offset within the va_arg TLS array allows fast copy
4353   // in the finalize instrumentation.
4354   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4355     unsigned GrOffset = AArch64GrBegOffset;
4356     unsigned VrOffset = AArch64VrBegOffset;
4357     unsigned OverflowOffset = AArch64VAEndOffset;
4358 
4359     const DataLayout &DL = F.getParent()->getDataLayout();
4360     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4361          ++ArgIt) {
4362       Value *A = *ArgIt;
4363       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4364       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4365       ArgKind AK = classifyArgument(A);
4366       if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
4367         AK = AK_Memory;
4368       if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
4369         AK = AK_Memory;
4370       Value *Base;
4371       switch (AK) {
4372         case AK_GeneralPurpose:
4373           Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
4374           GrOffset += 8;
4375           break;
4376         case AK_FloatingPoint:
4377           Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
4378           VrOffset += 16;
4379           break;
4380         case AK_Memory:
4381           // Don't count fixed arguments in the overflow area - va_start will
4382           // skip right over them.
4383           if (IsFixed)
4384             continue;
4385           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4386           Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
4387                                            alignTo(ArgSize, 8));
4388           OverflowOffset += alignTo(ArgSize, 8);
4389           break;
4390       }
4391       // Count Gp/Vr fixed arguments to their respective offsets, but don't
4392       // bother to actually store a shadow.
4393       if (IsFixed)
4394         continue;
4395       if (!Base)
4396         continue;
4397       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4398     }
4399     Constant *OverflowSize =
4400       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
4401     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4402   }
4403 
4404   /// Compute the shadow address for a given va_arg.
4405   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4406                                    unsigned ArgOffset, unsigned ArgSize) {
4407     // Make sure we don't overflow __msan_va_arg_tls.
4408     if (ArgOffset + ArgSize > kParamTLSSize)
4409       return nullptr;
4410     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4411     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4412     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4413                               "_msarg");
4414   }
4415 
4416   void visitVAStartInst(VAStartInst &I) override {
4417     IRBuilder<> IRB(&I);
4418     VAStartInstrumentationList.push_back(&I);
4419     Value *VAListTag = I.getArgOperand(0);
4420     Value *ShadowPtr, *OriginPtr;
4421     const Align Alignment = Align(8);
4422     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4423         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4424     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4425                      /* size */ 32, Alignment, false);
4426   }
4427 
4428   void visitVACopyInst(VACopyInst &I) override {
4429     IRBuilder<> IRB(&I);
4430     VAStartInstrumentationList.push_back(&I);
4431     Value *VAListTag = I.getArgOperand(0);
4432     Value *ShadowPtr, *OriginPtr;
4433     const Align Alignment = Align(8);
4434     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4435         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4436     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4437                      /* size */ 32, Alignment, false);
4438   }
4439 
4440   // Retrieve a va_list field of 'void*' size.
4441   Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4442     Value *SaveAreaPtrPtr =
4443       IRB.CreateIntToPtr(
4444         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4445                       ConstantInt::get(MS.IntptrTy, offset)),
4446         Type::getInt64PtrTy(*MS.C));
4447     return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
4448   }
4449 
4450   // Retrieve a va_list field of 'int' size.
4451   Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4452     Value *SaveAreaPtr =
4453       IRB.CreateIntToPtr(
4454         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4455                       ConstantInt::get(MS.IntptrTy, offset)),
4456         Type::getInt32PtrTy(*MS.C));
4457     Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
4458     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
4459   }
4460 
4461   void finalizeInstrumentation() override {
4462     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4463            "finalizeInstrumentation called twice");
4464     if (!VAStartInstrumentationList.empty()) {
4465       // If there is a va_start in this function, make a backup copy of
4466       // va_arg_tls somewhere in the function entry block.
4467       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4468       VAArgOverflowSize =
4469           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4470       Value *CopySize =
4471         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
4472                       VAArgOverflowSize);
4473       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4474       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4475     }
4476 
4477     Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
4478     Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
4479 
4480     // Instrument va_start, copy va_list shadow from the backup copy of
4481     // the TLS contents.
4482     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4483       CallInst *OrigInst = VAStartInstrumentationList[i];
4484       IRBuilder<> IRB(OrigInst->getNextNode());
4485 
4486       Value *VAListTag = OrigInst->getArgOperand(0);
4487 
4488       // The variadic ABI for AArch64 creates two areas to save the incoming
4489       // argument registers (one for 64-bit general register xn-x7 and another
4490       // for 128-bit FP/SIMD vn-v7).
4491       // We need then to propagate the shadow arguments on both regions
4492       // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
4493       // The remaining arguments are saved on shadow for 'va::stack'.
4494       // One caveat is it requires only to propagate the non-named arguments,
4495       // however on the call site instrumentation 'all' the arguments are
4496       // saved. So to copy the shadow values from the va_arg TLS array
4497       // we need to adjust the offset for both GR and VR fields based on
4498       // the __{gr,vr}_offs value (since they are stores based on incoming
4499       // named arguments).
4500 
4501       // Read the stack pointer from the va_list.
4502       Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
4503 
4504       // Read both the __gr_top and __gr_off and add them up.
4505       Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
4506       Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
4507 
4508       Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
4509 
4510       // Read both the __vr_top and __vr_off and add them up.
4511       Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
4512       Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
4513 
4514       Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
4515 
4516       // It does not know how many named arguments is being used and, on the
4517       // callsite all the arguments were saved.  Since __gr_off is defined as
4518       // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
4519       // argument by ignoring the bytes of shadow from named arguments.
4520       Value *GrRegSaveAreaShadowPtrOff =
4521         IRB.CreateAdd(GrArgSize, GrOffSaveArea);
4522 
4523       Value *GrRegSaveAreaShadowPtr =
4524           MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4525                                  Align(8), /*isStore*/ true)
4526               .first;
4527 
4528       Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4529                                               GrRegSaveAreaShadowPtrOff);
4530       Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
4531 
4532       IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8),
4533                        GrCopySize);
4534 
4535       // Again, but for FP/SIMD values.
4536       Value *VrRegSaveAreaShadowPtrOff =
4537           IRB.CreateAdd(VrArgSize, VrOffSaveArea);
4538 
4539       Value *VrRegSaveAreaShadowPtr =
4540           MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4541                                  Align(8), /*isStore*/ true)
4542               .first;
4543 
4544       Value *VrSrcPtr = IRB.CreateInBoundsGEP(
4545         IRB.getInt8Ty(),
4546         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4547                               IRB.getInt32(AArch64VrBegOffset)),
4548         VrRegSaveAreaShadowPtrOff);
4549       Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
4550 
4551       IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, Align(8), VrSrcPtr, Align(8),
4552                        VrCopySize);
4553 
4554       // And finally for remaining arguments.
4555       Value *StackSaveAreaShadowPtr =
4556           MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
4557                                  Align(16), /*isStore*/ true)
4558               .first;
4559 
4560       Value *StackSrcPtr =
4561         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4562                               IRB.getInt32(AArch64VAEndOffset));
4563 
4564       IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr,
4565                        Align(16), VAArgOverflowSize);
4566     }
4567   }
4568 };
4569 
4570 /// PowerPC64-specific implementation of VarArgHelper.
4571 struct VarArgPowerPC64Helper : public VarArgHelper {
4572   Function &F;
4573   MemorySanitizer &MS;
4574   MemorySanitizerVisitor &MSV;
4575   Value *VAArgTLSCopy = nullptr;
4576   Value *VAArgSize = nullptr;
4577 
4578   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4579 
4580   VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
4581                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4582 
4583   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4584     // For PowerPC, we need to deal with alignment of stack arguments -
4585     // they are mostly aligned to 8 bytes, but vectors and i128 arrays
4586     // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
4587     // and QPX vectors are aligned to 32 bytes.  For that reason, we
4588     // compute current offset from stack pointer (which is always properly
4589     // aligned), and offset for the first vararg, then subtract them.
4590     unsigned VAArgBase;
4591     Triple TargetTriple(F.getParent()->getTargetTriple());
4592     // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
4593     // and 32 bytes for ABIv2.  This is usually determined by target
4594     // endianness, but in theory could be overridden by function attribute.
4595     // For simplicity, we ignore it here (it'd only matter for QPX vectors).
4596     if (TargetTriple.getArch() == Triple::ppc64)
4597       VAArgBase = 48;
4598     else
4599       VAArgBase = 32;
4600     unsigned VAArgOffset = VAArgBase;
4601     const DataLayout &DL = F.getParent()->getDataLayout();
4602     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4603          ++ArgIt) {
4604       Value *A = *ArgIt;
4605       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4606       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4607       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
4608       if (IsByVal) {
4609         assert(A->getType()->isPointerTy());
4610         Type *RealTy = CB.getParamByValType(ArgNo);
4611         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4612         MaybeAlign ArgAlign = CB.getParamAlign(ArgNo);
4613         if (!ArgAlign || *ArgAlign < Align(8))
4614           ArgAlign = Align(8);
4615         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4616         if (!IsFixed) {
4617           Value *Base = getShadowPtrForVAArgument(
4618               RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
4619           if (Base) {
4620             Value *AShadowPtr, *AOriginPtr;
4621             std::tie(AShadowPtr, AOriginPtr) =
4622                 MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
4623                                        kShadowTLSAlignment, /*isStore*/ false);
4624 
4625             IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
4626                              kShadowTLSAlignment, ArgSize);
4627           }
4628         }
4629         VAArgOffset += alignTo(ArgSize, 8);
4630       } else {
4631         Value *Base;
4632         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4633         uint64_t ArgAlign = 8;
4634         if (A->getType()->isArrayTy()) {
4635           // Arrays are aligned to element size, except for long double
4636           // arrays, which are aligned to 8 bytes.
4637           Type *ElementTy = A->getType()->getArrayElementType();
4638           if (!ElementTy->isPPC_FP128Ty())
4639             ArgAlign = DL.getTypeAllocSize(ElementTy);
4640         } else if (A->getType()->isVectorTy()) {
4641           // Vectors are naturally aligned.
4642           ArgAlign = DL.getTypeAllocSize(A->getType());
4643         }
4644         if (ArgAlign < 8)
4645           ArgAlign = 8;
4646         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4647         if (DL.isBigEndian()) {
4648           // Adjusting the shadow for argument with size < 8 to match the placement
4649           // of bits in big endian system
4650           if (ArgSize < 8)
4651             VAArgOffset += (8 - ArgSize);
4652         }
4653         if (!IsFixed) {
4654           Base = getShadowPtrForVAArgument(A->getType(), IRB,
4655                                            VAArgOffset - VAArgBase, ArgSize);
4656           if (Base)
4657             IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4658         }
4659         VAArgOffset += ArgSize;
4660         VAArgOffset = alignTo(VAArgOffset, 8);
4661       }
4662       if (IsFixed)
4663         VAArgBase = VAArgOffset;
4664     }
4665 
4666     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(),
4667                                                 VAArgOffset - VAArgBase);
4668     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4669     // a new class member i.e. it is the total size of all VarArgs.
4670     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4671   }
4672 
4673   /// Compute the shadow address for a given va_arg.
4674   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4675                                    unsigned ArgOffset, unsigned ArgSize) {
4676     // Make sure we don't overflow __msan_va_arg_tls.
4677     if (ArgOffset + ArgSize > kParamTLSSize)
4678       return nullptr;
4679     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4680     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4681     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4682                               "_msarg");
4683   }
4684 
4685   void visitVAStartInst(VAStartInst &I) override {
4686     IRBuilder<> IRB(&I);
4687     VAStartInstrumentationList.push_back(&I);
4688     Value *VAListTag = I.getArgOperand(0);
4689     Value *ShadowPtr, *OriginPtr;
4690     const Align Alignment = Align(8);
4691     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4692         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4693     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4694                      /* size */ 8, Alignment, false);
4695   }
4696 
4697   void visitVACopyInst(VACopyInst &I) override {
4698     IRBuilder<> IRB(&I);
4699     Value *VAListTag = I.getArgOperand(0);
4700     Value *ShadowPtr, *OriginPtr;
4701     const Align Alignment = Align(8);
4702     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4703         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4704     // Unpoison the whole __va_list_tag.
4705     // FIXME: magic ABI constants.
4706     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4707                      /* size */ 8, Alignment, false);
4708   }
4709 
4710   void finalizeInstrumentation() override {
4711     assert(!VAArgSize && !VAArgTLSCopy &&
4712            "finalizeInstrumentation called twice");
4713     IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
4714     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4715     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4716                                     VAArgSize);
4717 
4718     if (!VAStartInstrumentationList.empty()) {
4719       // If there is a va_start in this function, make a backup copy of
4720       // va_arg_tls somewhere in the function entry block.
4721       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4722       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4723     }
4724 
4725     // Instrument va_start.
4726     // Copy va_list shadow from the backup copy of the TLS contents.
4727     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4728       CallInst *OrigInst = VAStartInstrumentationList[i];
4729       IRBuilder<> IRB(OrigInst->getNextNode());
4730       Value *VAListTag = OrigInst->getArgOperand(0);
4731       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4732       Value *RegSaveAreaPtrPtr =
4733           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4734                              PointerType::get(RegSaveAreaPtrTy, 0));
4735       Value *RegSaveAreaPtr =
4736           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4737       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4738       const Align Alignment = Align(8);
4739       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4740           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4741                                  Alignment, /*isStore*/ true);
4742       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4743                        CopySize);
4744     }
4745   }
4746 };
4747 
4748 /// SystemZ-specific implementation of VarArgHelper.
4749 struct VarArgSystemZHelper : public VarArgHelper {
4750   static const unsigned SystemZGpOffset = 16;
4751   static const unsigned SystemZGpEndOffset = 56;
4752   static const unsigned SystemZFpOffset = 128;
4753   static const unsigned SystemZFpEndOffset = 160;
4754   static const unsigned SystemZMaxVrArgs = 8;
4755   static const unsigned SystemZRegSaveAreaSize = 160;
4756   static const unsigned SystemZOverflowOffset = 160;
4757   static const unsigned SystemZVAListTagSize = 32;
4758   static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
4759   static const unsigned SystemZRegSaveAreaPtrOffset = 24;
4760 
4761   Function &F;
4762   MemorySanitizer &MS;
4763   MemorySanitizerVisitor &MSV;
4764   Value *VAArgTLSCopy = nullptr;
4765   Value *VAArgTLSOriginCopy = nullptr;
4766   Value *VAArgOverflowSize = nullptr;
4767 
4768   SmallVector<CallInst *, 16> VAStartInstrumentationList;
4769 
4770   enum class ArgKind {
4771     GeneralPurpose,
4772     FloatingPoint,
4773     Vector,
4774     Memory,
4775     Indirect,
4776   };
4777 
4778   enum class ShadowExtension { None, Zero, Sign };
4779 
4780   VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
4781                       MemorySanitizerVisitor &MSV)
4782       : F(F), MS(MS), MSV(MSV) {}
4783 
4784   ArgKind classifyArgument(Type *T, bool IsSoftFloatABI) {
4785     // T is a SystemZABIInfo::classifyArgumentType() output, and there are
4786     // only a few possibilities of what it can be. In particular, enums, single
4787     // element structs and large types have already been taken care of.
4788 
4789     // Some i128 and fp128 arguments are converted to pointers only in the
4790     // back end.
4791     if (T->isIntegerTy(128) || T->isFP128Ty())
4792       return ArgKind::Indirect;
4793     if (T->isFloatingPointTy())
4794       return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
4795     if (T->isIntegerTy() || T->isPointerTy())
4796       return ArgKind::GeneralPurpose;
4797     if (T->isVectorTy())
4798       return ArgKind::Vector;
4799     return ArgKind::Memory;
4800   }
4801 
4802   ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) {
4803     // ABI says: "One of the simple integer types no more than 64 bits wide.
4804     // ... If such an argument is shorter than 64 bits, replace it by a full
4805     // 64-bit integer representing the same number, using sign or zero
4806     // extension". Shadow for an integer argument has the same type as the
4807     // argument itself, so it can be sign or zero extended as well.
4808     bool ZExt = CB.paramHasAttr(ArgNo, Attribute::ZExt);
4809     bool SExt = CB.paramHasAttr(ArgNo, Attribute::SExt);
4810     if (ZExt) {
4811       assert(!SExt);
4812       return ShadowExtension::Zero;
4813     }
4814     if (SExt) {
4815       assert(!ZExt);
4816       return ShadowExtension::Sign;
4817     }
4818     return ShadowExtension::None;
4819   }
4820 
4821   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4822     bool IsSoftFloatABI = CB.getCalledFunction()
4823                               ->getFnAttribute("use-soft-float")
4824                               .getValueAsString() == "true";
4825     unsigned GpOffset = SystemZGpOffset;
4826     unsigned FpOffset = SystemZFpOffset;
4827     unsigned VrIndex = 0;
4828     unsigned OverflowOffset = SystemZOverflowOffset;
4829     const DataLayout &DL = F.getParent()->getDataLayout();
4830     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4831          ++ArgIt) {
4832       Value *A = *ArgIt;
4833       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4834       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4835       // SystemZABIInfo does not produce ByVal parameters.
4836       assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
4837       Type *T = A->getType();
4838       ArgKind AK = classifyArgument(T, IsSoftFloatABI);
4839       if (AK == ArgKind::Indirect) {
4840         T = PointerType::get(T, 0);
4841         AK = ArgKind::GeneralPurpose;
4842       }
4843       if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
4844         AK = ArgKind::Memory;
4845       if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
4846         AK = ArgKind::Memory;
4847       if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
4848         AK = ArgKind::Memory;
4849       Value *ShadowBase = nullptr;
4850       Value *OriginBase = nullptr;
4851       ShadowExtension SE = ShadowExtension::None;
4852       switch (AK) {
4853       case ArgKind::GeneralPurpose: {
4854         // Always keep track of GpOffset, but store shadow only for varargs.
4855         uint64_t ArgSize = 8;
4856         if (GpOffset + ArgSize <= kParamTLSSize) {
4857           if (!IsFixed) {
4858             SE = getShadowExtension(CB, ArgNo);
4859             uint64_t GapSize = 0;
4860             if (SE == ShadowExtension::None) {
4861               uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
4862               assert(ArgAllocSize <= ArgSize);
4863               GapSize = ArgSize - ArgAllocSize;
4864             }
4865             ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize);
4866             if (MS.TrackOrigins)
4867               OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize);
4868           }
4869           GpOffset += ArgSize;
4870         } else {
4871           GpOffset = kParamTLSSize;
4872         }
4873         break;
4874       }
4875       case ArgKind::FloatingPoint: {
4876         // Always keep track of FpOffset, but store shadow only for varargs.
4877         uint64_t ArgSize = 8;
4878         if (FpOffset + ArgSize <= kParamTLSSize) {
4879           if (!IsFixed) {
4880             // PoP says: "A short floating-point datum requires only the
4881             // left-most 32 bit positions of a floating-point register".
4882             // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
4883             // don't extend shadow and don't mind the gap.
4884             ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset);
4885             if (MS.TrackOrigins)
4886               OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
4887           }
4888           FpOffset += ArgSize;
4889         } else {
4890           FpOffset = kParamTLSSize;
4891         }
4892         break;
4893       }
4894       case ArgKind::Vector: {
4895         // Keep track of VrIndex. No need to store shadow, since vector varargs
4896         // go through AK_Memory.
4897         assert(IsFixed);
4898         VrIndex++;
4899         break;
4900       }
4901       case ArgKind::Memory: {
4902         // Keep track of OverflowOffset and store shadow only for varargs.
4903         // Ignore fixed args, since we need to copy only the vararg portion of
4904         // the overflow area shadow.
4905         if (!IsFixed) {
4906           uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
4907           uint64_t ArgSize = alignTo(ArgAllocSize, 8);
4908           if (OverflowOffset + ArgSize <= kParamTLSSize) {
4909             SE = getShadowExtension(CB, ArgNo);
4910             uint64_t GapSize =
4911                 SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
4912             ShadowBase =
4913                 getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize);
4914             if (MS.TrackOrigins)
4915               OriginBase =
4916                   getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize);
4917             OverflowOffset += ArgSize;
4918           } else {
4919             OverflowOffset = kParamTLSSize;
4920           }
4921         }
4922         break;
4923       }
4924       case ArgKind::Indirect:
4925         llvm_unreachable("Indirect must be converted to GeneralPurpose");
4926       }
4927       if (ShadowBase == nullptr)
4928         continue;
4929       Value *Shadow = MSV.getShadow(A);
4930       if (SE != ShadowExtension::None)
4931         Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(),
4932                                       /*Signed*/ SE == ShadowExtension::Sign);
4933       ShadowBase = IRB.CreateIntToPtr(
4934           ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s");
4935       IRB.CreateStore(Shadow, ShadowBase);
4936       if (MS.TrackOrigins) {
4937         Value *Origin = MSV.getOrigin(A);
4938         unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
4939         MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
4940                         kMinOriginAlignment);
4941       }
4942     }
4943     Constant *OverflowSize = ConstantInt::get(
4944         IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset);
4945     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4946   }
4947 
4948   Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
4949     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4950     return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4951   }
4952 
4953   Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
4954     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
4955     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4956     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
4957                               "_msarg_va_o");
4958   }
4959 
4960   void unpoisonVAListTagForInst(IntrinsicInst &I) {
4961     IRBuilder<> IRB(&I);
4962     Value *VAListTag = I.getArgOperand(0);
4963     Value *ShadowPtr, *OriginPtr;
4964     const Align Alignment = Align(8);
4965     std::tie(ShadowPtr, OriginPtr) =
4966         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
4967                                /*isStore*/ true);
4968     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4969                      SystemZVAListTagSize, Alignment, false);
4970   }
4971 
4972   void visitVAStartInst(VAStartInst &I) override {
4973     VAStartInstrumentationList.push_back(&I);
4974     unpoisonVAListTagForInst(I);
4975   }
4976 
4977   void visitVACopyInst(VACopyInst &I) override { unpoisonVAListTagForInst(I); }
4978 
4979   void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
4980     Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4981     Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
4982         IRB.CreateAdd(
4983             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4984             ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)),
4985         PointerType::get(RegSaveAreaPtrTy, 0));
4986     Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4987     Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4988     const Align Alignment = Align(8);
4989     std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4990         MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment,
4991                                /*isStore*/ true);
4992     // TODO(iii): copy only fragments filled by visitCallBase()
4993     IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4994                      SystemZRegSaveAreaSize);
4995     if (MS.TrackOrigins)
4996       IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
4997                        Alignment, SystemZRegSaveAreaSize);
4998   }
4999 
5000   void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
5001     Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5002     Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
5003         IRB.CreateAdd(
5004             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5005             ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)),
5006         PointerType::get(OverflowArgAreaPtrTy, 0));
5007     Value *OverflowArgAreaPtr =
5008         IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
5009     Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
5010     const Align Alignment = Align(8);
5011     std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
5012         MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
5013                                Alignment, /*isStore*/ true);
5014     Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
5015                                            SystemZOverflowOffset);
5016     IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
5017                      VAArgOverflowSize);
5018     if (MS.TrackOrigins) {
5019       SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
5020                                       SystemZOverflowOffset);
5021       IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
5022                        VAArgOverflowSize);
5023     }
5024   }
5025 
5026   void finalizeInstrumentation() override {
5027     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5028            "finalizeInstrumentation called twice");
5029     if (!VAStartInstrumentationList.empty()) {
5030       // If there is a va_start in this function, make a backup copy of
5031       // va_arg_tls somewhere in the function entry block.
5032       IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
5033       VAArgOverflowSize =
5034           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5035       Value *CopySize =
5036           IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset),
5037                         VAArgOverflowSize);
5038       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5039       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
5040       if (MS.TrackOrigins) {
5041         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5042         IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
5043                          Align(8), CopySize);
5044       }
5045     }
5046 
5047     // Instrument va_start.
5048     // Copy va_list shadow from the backup copy of the TLS contents.
5049     for (size_t VaStartNo = 0, VaStartNum = VAStartInstrumentationList.size();
5050          VaStartNo < VaStartNum; VaStartNo++) {
5051       CallInst *OrigInst = VAStartInstrumentationList[VaStartNo];
5052       IRBuilder<> IRB(OrigInst->getNextNode());
5053       Value *VAListTag = OrigInst->getArgOperand(0);
5054       copyRegSaveArea(IRB, VAListTag);
5055       copyOverflowArea(IRB, VAListTag);
5056     }
5057   }
5058 };
5059 
5060 /// A no-op implementation of VarArgHelper.
5061 struct VarArgNoOpHelper : public VarArgHelper {
5062   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
5063                    MemorySanitizerVisitor &MSV) {}
5064 
5065   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {}
5066 
5067   void visitVAStartInst(VAStartInst &I) override {}
5068 
5069   void visitVACopyInst(VACopyInst &I) override {}
5070 
5071   void finalizeInstrumentation() override {}
5072 };
5073 
5074 } // end anonymous namespace
5075 
5076 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
5077                                         MemorySanitizerVisitor &Visitor) {
5078   // VarArg handling is only implemented on AMD64. False positives are possible
5079   // on other platforms.
5080   Triple TargetTriple(Func.getParent()->getTargetTriple());
5081   if (TargetTriple.getArch() == Triple::x86_64)
5082     return new VarArgAMD64Helper(Func, Msan, Visitor);
5083   else if (TargetTriple.isMIPS64())
5084     return new VarArgMIPS64Helper(Func, Msan, Visitor);
5085   else if (TargetTriple.getArch() == Triple::aarch64)
5086     return new VarArgAArch64Helper(Func, Msan, Visitor);
5087   else if (TargetTriple.getArch() == Triple::ppc64 ||
5088            TargetTriple.getArch() == Triple::ppc64le)
5089     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
5090   else if (TargetTriple.getArch() == Triple::systemz)
5091     return new VarArgSystemZHelper(Func, Msan, Visitor);
5092   else
5093     return new VarArgNoOpHelper(Func, Msan, Visitor);
5094 }
5095 
5096 bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
5097   if (!CompileKernel && F.getName() == kMsanModuleCtorName)
5098     return false;
5099 
5100   MemorySanitizerVisitor Visitor(F, *this, TLI);
5101 
5102   // Clear out readonly/readnone attributes.
5103   AttrBuilder B;
5104   B.addAttribute(Attribute::ReadOnly)
5105       .addAttribute(Attribute::ReadNone)
5106       .addAttribute(Attribute::WriteOnly)
5107       .addAttribute(Attribute::ArgMemOnly)
5108       .addAttribute(Attribute::Speculatable);
5109   F.removeAttributes(AttributeList::FunctionIndex, B);
5110 
5111   return Visitor.runOnFunction();
5112 }
5113