xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (revision e6bfd18d21b225af6a0ed67ceeaf1293b7b9eba5)
1 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of MemorySanitizer, a detector of uninitialized
11 /// reads.
12 ///
13 /// The algorithm of the tool is similar to Memcheck
14 /// (http://goo.gl/QKbem). We associate a few shadow bits with every
15 /// byte of the application memory, poison the shadow of the malloc-ed
16 /// or alloca-ed memory, load the shadow bits on every memory read,
17 /// propagate the shadow bits through some of the arithmetic
18 /// instruction (including MOV), store the shadow bits on every memory
19 /// write, report a bug on some other instructions (e.g. JMP) if the
20 /// associated shadow is poisoned.
21 ///
22 /// But there are differences too. The first and the major one:
23 /// compiler instrumentation instead of binary instrumentation. This
24 /// gives us much better register allocation, possible compiler
25 /// optimizations and a fast start-up. But this brings the major issue
26 /// as well: msan needs to see all program events, including system
27 /// calls and reads/writes in system libraries, so we either need to
28 /// compile *everything* with msan or use a binary translation
29 /// component (e.g. DynamoRIO) to instrument pre-built libraries.
30 /// Another difference from Memcheck is that we use 8 shadow bits per
31 /// byte of application memory and use a direct shadow mapping. This
32 /// greatly simplifies the instrumentation code and avoids races on
33 /// shadow updates (Memcheck is single-threaded so races are not a
34 /// concern there. Memcheck uses 2 shadow bits per byte with a slow
35 /// path storage that uses 8 bits per byte).
36 ///
37 /// The default value of shadow is 0, which means "clean" (not poisoned).
38 ///
39 /// Every module initializer should call __msan_init to ensure that the
40 /// shadow memory is ready. On error, __msan_warning is called. Since
41 /// parameters and return values may be passed via registers, we have a
42 /// specialized thread-local shadow for return values
43 /// (__msan_retval_tls) and parameters (__msan_param_tls).
44 ///
45 ///                           Origin tracking.
46 ///
47 /// MemorySanitizer can track origins (allocation points) of all uninitialized
48 /// values. This behavior is controlled with a flag (msan-track-origins) and is
49 /// disabled by default.
50 ///
51 /// Origins are 4-byte values created and interpreted by the runtime library.
52 /// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
53 /// of application memory. Propagation of origins is basically a bunch of
54 /// "select" instructions that pick the origin of a dirty argument, if an
55 /// instruction has one.
56 ///
57 /// Every 4 aligned, consecutive bytes of application memory have one origin
58 /// value associated with them. If these bytes contain uninitialized data
59 /// coming from 2 different allocations, the last store wins. Because of this,
60 /// MemorySanitizer reports can show unrelated origins, but this is unlikely in
61 /// practice.
62 ///
63 /// Origins are meaningless for fully initialized values, so MemorySanitizer
64 /// avoids storing origin to memory when a fully initialized value is stored.
65 /// This way it avoids needless overwriting origin of the 4-byte region on
66 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
67 ///
68 ///                            Atomic handling.
69 ///
70 /// Ideally, every atomic store of application value should update the
71 /// corresponding shadow location in an atomic way. Unfortunately, atomic store
72 /// of two disjoint locations can not be done without severe slowdown.
73 ///
74 /// Therefore, we implement an approximation that may err on the safe side.
75 /// In this implementation, every atomically accessed location in the program
76 /// may only change from (partially) uninitialized to fully initialized, but
77 /// not the other way around. We load the shadow _after_ the application load,
78 /// and we store the shadow _before_ the app store. Also, we always store clean
79 /// shadow (if the application store is atomic). This way, if the store-load
80 /// pair constitutes a happens-before arc, shadow store and load are correctly
81 /// ordered such that the load will get either the value that was stored, or
82 /// some later value (which is always clean).
83 ///
84 /// This does not work very well with Compare-And-Swap (CAS) and
85 /// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
86 /// must store the new shadow before the app operation, and load the shadow
87 /// after the app operation. Computers don't work this way. Current
88 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
89 /// value. It implements the store part as a simple atomic store by storing a
90 /// clean shadow.
91 ///
92 ///                      Instrumenting inline assembly.
93 ///
94 /// For inline assembly code LLVM has little idea about which memory locations
95 /// become initialized depending on the arguments. It can be possible to figure
96 /// out which arguments are meant to point to inputs and outputs, but the
97 /// actual semantics can be only visible at runtime. In the Linux kernel it's
98 /// also possible that the arguments only indicate the offset for a base taken
99 /// from a segment register, so it's dangerous to treat any asm() arguments as
100 /// pointers. We take a conservative approach generating calls to
101 ///   __msan_instrument_asm_store(ptr, size)
102 /// , which defer the memory unpoisoning to the runtime library.
103 /// The latter can perform more complex address checks to figure out whether
104 /// it's safe to touch the shadow memory.
105 /// Like with atomic operations, we call __msan_instrument_asm_store() before
106 /// the assembly call, so that changes to the shadow memory will be seen by
107 /// other threads together with main memory initialization.
108 ///
109 ///                  KernelMemorySanitizer (KMSAN) implementation.
110 ///
111 /// The major differences between KMSAN and MSan instrumentation are:
112 ///  - KMSAN always tracks the origins and implies msan-keep-going=true;
113 ///  - KMSAN allocates shadow and origin memory for each page separately, so
114 ///    there are no explicit accesses to shadow and origin in the
115 ///    instrumentation.
116 ///    Shadow and origin values for a particular X-byte memory location
117 ///    (X=1,2,4,8) are accessed through pointers obtained via the
118 ///      __msan_metadata_ptr_for_load_X(ptr)
119 ///      __msan_metadata_ptr_for_store_X(ptr)
120 ///    functions. The corresponding functions check that the X-byte accesses
121 ///    are possible and returns the pointers to shadow and origin memory.
122 ///    Arbitrary sized accesses are handled with:
123 ///      __msan_metadata_ptr_for_load_n(ptr, size)
124 ///      __msan_metadata_ptr_for_store_n(ptr, size);
125 ///  - TLS variables are stored in a single per-task struct. A call to a
126 ///    function __msan_get_context_state() returning a pointer to that struct
127 ///    is inserted into every instrumented function before the entry block;
128 ///  - __msan_warning() takes a 32-bit origin parameter;
129 ///  - local variables are poisoned with __msan_poison_alloca() upon function
130 ///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the
131 ///    function;
132 ///  - the pass doesn't declare any global variables or add global constructors
133 ///    to the translation unit.
134 ///
135 /// Also, KMSAN currently ignores uninitialized memory passed into inline asm
136 /// calls, making sure we're on the safe side wrt. possible false positives.
137 ///
138 ///  KernelMemorySanitizer only supports X86_64 at the moment.
139 ///
140 //
141 // FIXME: This sanitizer does not yet handle scalable vectors
142 //
143 //===----------------------------------------------------------------------===//
144 
145 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
146 #include "llvm/ADT/APInt.h"
147 #include "llvm/ADT/ArrayRef.h"
148 #include "llvm/ADT/DepthFirstIterator.h"
149 #include "llvm/ADT/SmallSet.h"
150 #include "llvm/ADT/SmallString.h"
151 #include "llvm/ADT/SmallVector.h"
152 #include "llvm/ADT/StringExtras.h"
153 #include "llvm/ADT/StringRef.h"
154 #include "llvm/ADT/Triple.h"
155 #include "llvm/Analysis/TargetLibraryInfo.h"
156 #include "llvm/Analysis/ValueTracking.h"
157 #include "llvm/IR/Argument.h"
158 #include "llvm/IR/Attributes.h"
159 #include "llvm/IR/BasicBlock.h"
160 #include "llvm/IR/CallingConv.h"
161 #include "llvm/IR/Constant.h"
162 #include "llvm/IR/Constants.h"
163 #include "llvm/IR/DataLayout.h"
164 #include "llvm/IR/DerivedTypes.h"
165 #include "llvm/IR/Function.h"
166 #include "llvm/IR/GlobalValue.h"
167 #include "llvm/IR/GlobalVariable.h"
168 #include "llvm/IR/IRBuilder.h"
169 #include "llvm/IR/InlineAsm.h"
170 #include "llvm/IR/InstVisitor.h"
171 #include "llvm/IR/InstrTypes.h"
172 #include "llvm/IR/Instruction.h"
173 #include "llvm/IR/Instructions.h"
174 #include "llvm/IR/IntrinsicInst.h"
175 #include "llvm/IR/Intrinsics.h"
176 #include "llvm/IR/IntrinsicsX86.h"
177 #include "llvm/IR/MDBuilder.h"
178 #include "llvm/IR/Module.h"
179 #include "llvm/IR/Type.h"
180 #include "llvm/IR/Value.h"
181 #include "llvm/IR/ValueMap.h"
182 #include "llvm/Support/Alignment.h"
183 #include "llvm/Support/AtomicOrdering.h"
184 #include "llvm/Support/Casting.h"
185 #include "llvm/Support/CommandLine.h"
186 #include "llvm/Support/Debug.h"
187 #include "llvm/Support/ErrorHandling.h"
188 #include "llvm/Support/MathExtras.h"
189 #include "llvm/Support/raw_ostream.h"
190 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
191 #include "llvm/Transforms/Utils/Local.h"
192 #include "llvm/Transforms/Utils/ModuleUtils.h"
193 #include <algorithm>
194 #include <cassert>
195 #include <cstddef>
196 #include <cstdint>
197 #include <memory>
198 #include <string>
199 #include <tuple>
200 
201 using namespace llvm;
202 
203 #define DEBUG_TYPE "msan"
204 
205 static const unsigned kOriginSize = 4;
206 static const Align kMinOriginAlignment = Align(4);
207 static const Align kShadowTLSAlignment = Align(8);
208 
209 // These constants must be kept in sync with the ones in msan.h.
210 static const unsigned kParamTLSSize = 800;
211 static const unsigned kRetvalTLSSize = 800;
212 
213 // Accesses sizes are powers of two: 1, 2, 4, 8.
214 static const size_t kNumberOfAccessSizes = 4;
215 
216 /// Track origins of uninitialized values.
217 ///
218 /// Adds a section to MemorySanitizer report that points to the allocation
219 /// (stack or heap) the uninitialized bits came from originally.
220 static cl::opt<int> ClTrackOrigins("msan-track-origins",
221        cl::desc("Track origins (allocation sites) of poisoned memory"),
222        cl::Hidden, cl::init(0));
223 
224 static cl::opt<bool> ClKeepGoing("msan-keep-going",
225        cl::desc("keep going after reporting a UMR"),
226        cl::Hidden, cl::init(false));
227 
228 static cl::opt<bool> ClPoisonStack("msan-poison-stack",
229        cl::desc("poison uninitialized stack variables"),
230        cl::Hidden, cl::init(true));
231 
232 static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
233        cl::desc("poison uninitialized stack variables with a call"),
234        cl::Hidden, cl::init(false));
235 
236 static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
237        cl::desc("poison uninitialized stack variables with the given pattern"),
238        cl::Hidden, cl::init(0xff));
239 
240 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
241        cl::desc("poison undef temps"),
242        cl::Hidden, cl::init(true));
243 
244 static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
245        cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
246        cl::Hidden, cl::init(true));
247 
248 static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
249        cl::desc("exact handling of relational integer ICmp"),
250        cl::Hidden, cl::init(false));
251 
252 static cl::opt<bool> ClHandleLifetimeIntrinsics(
253     "msan-handle-lifetime-intrinsics",
254     cl::desc(
255         "when possible, poison scoped variables at the beginning of the scope "
256         "(slower, but more precise)"),
257     cl::Hidden, cl::init(true));
258 
259 // When compiling the Linux kernel, we sometimes see false positives related to
260 // MSan being unable to understand that inline assembly calls may initialize
261 // local variables.
262 // This flag makes the compiler conservatively unpoison every memory location
263 // passed into an assembly call. Note that this may cause false positives.
264 // Because it's impossible to figure out the array sizes, we can only unpoison
265 // the first sizeof(type) bytes for each type* pointer.
266 // The instrumentation is only enabled in KMSAN builds, and only if
267 // -msan-handle-asm-conservative is on. This is done because we may want to
268 // quickly disable assembly instrumentation when it breaks.
269 static cl::opt<bool> ClHandleAsmConservative(
270     "msan-handle-asm-conservative",
271     cl::desc("conservative handling of inline assembly"), cl::Hidden,
272     cl::init(true));
273 
274 // This flag controls whether we check the shadow of the address
275 // operand of load or store. Such bugs are very rare, since load from
276 // a garbage address typically results in SEGV, but still happen
277 // (e.g. only lower bits of address are garbage, or the access happens
278 // early at program startup where malloc-ed memory is more likely to
279 // be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
280 static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
281        cl::desc("report accesses through a pointer which has poisoned shadow"),
282        cl::Hidden, cl::init(true));
283 
284 static cl::opt<bool> ClEagerChecks(
285     "msan-eager-checks",
286     cl::desc("check arguments and return values at function call boundaries"),
287     cl::Hidden, cl::init(false));
288 
289 static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
290        cl::desc("print out instructions with default strict semantics"),
291        cl::Hidden, cl::init(false));
292 
293 static cl::opt<int> ClInstrumentationWithCallThreshold(
294     "msan-instrumentation-with-call-threshold",
295     cl::desc(
296         "If the function being instrumented requires more than "
297         "this number of checks and origin stores, use callbacks instead of "
298         "inline checks (-1 means never use callbacks)."),
299     cl::Hidden, cl::init(3500));
300 
301 static cl::opt<bool>
302     ClEnableKmsan("msan-kernel",
303                   cl::desc("Enable KernelMemorySanitizer instrumentation"),
304                   cl::Hidden, cl::init(false));
305 
306 static cl::opt<bool>
307     ClDisableChecks("msan-disable-checks",
308                     cl::desc("Apply no_sanitize to the whole file"), cl::Hidden,
309                     cl::init(false));
310 
311 // This is an experiment to enable handling of cases where shadow is a non-zero
312 // compile-time constant. For some unexplainable reason they were silently
313 // ignored in the instrumentation.
314 static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
315        cl::desc("Insert checks for constant shadow values"),
316        cl::Hidden, cl::init(false));
317 
318 // This is off by default because of a bug in gold:
319 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
320 static cl::opt<bool> ClWithComdat("msan-with-comdat",
321        cl::desc("Place MSan constructors in comdat sections"),
322        cl::Hidden, cl::init(false));
323 
324 // These options allow to specify custom memory map parameters
325 // See MemoryMapParams for details.
326 static cl::opt<uint64_t> ClAndMask("msan-and-mask",
327                                    cl::desc("Define custom MSan AndMask"),
328                                    cl::Hidden, cl::init(0));
329 
330 static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
331                                    cl::desc("Define custom MSan XorMask"),
332                                    cl::Hidden, cl::init(0));
333 
334 static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
335                                       cl::desc("Define custom MSan ShadowBase"),
336                                       cl::Hidden, cl::init(0));
337 
338 static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
339                                       cl::desc("Define custom MSan OriginBase"),
340                                       cl::Hidden, cl::init(0));
341 
342 const char kMsanModuleCtorName[] = "msan.module_ctor";
343 const char kMsanInitName[] = "__msan_init";
344 
345 namespace {
346 
347 // Memory map parameters used in application-to-shadow address calculation.
348 // Offset = (Addr & ~AndMask) ^ XorMask
349 // Shadow = ShadowBase + Offset
350 // Origin = OriginBase + Offset
351 struct MemoryMapParams {
352   uint64_t AndMask;
353   uint64_t XorMask;
354   uint64_t ShadowBase;
355   uint64_t OriginBase;
356 };
357 
358 struct PlatformMemoryMapParams {
359   const MemoryMapParams *bits32;
360   const MemoryMapParams *bits64;
361 };
362 
363 } // end anonymous namespace
364 
365 // i386 Linux
366 static const MemoryMapParams Linux_I386_MemoryMapParams = {
367   0x000080000000,  // AndMask
368   0,               // XorMask (not used)
369   0,               // ShadowBase (not used)
370   0x000040000000,  // OriginBase
371 };
372 
373 // x86_64 Linux
374 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
375 #ifdef MSAN_LINUX_X86_64_OLD_MAPPING
376   0x400000000000,  // AndMask
377   0,               // XorMask (not used)
378   0,               // ShadowBase (not used)
379   0x200000000000,  // OriginBase
380 #else
381   0,               // AndMask (not used)
382   0x500000000000,  // XorMask
383   0,               // ShadowBase (not used)
384   0x100000000000,  // OriginBase
385 #endif
386 };
387 
388 // mips64 Linux
389 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
390   0,               // AndMask (not used)
391   0x008000000000,  // XorMask
392   0,               // ShadowBase (not used)
393   0x002000000000,  // OriginBase
394 };
395 
396 // ppc64 Linux
397 static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
398   0xE00000000000,  // AndMask
399   0x100000000000,  // XorMask
400   0x080000000000,  // ShadowBase
401   0x1C0000000000,  // OriginBase
402 };
403 
404 // s390x Linux
405 static const MemoryMapParams Linux_S390X_MemoryMapParams = {
406     0xC00000000000, // AndMask
407     0,              // XorMask (not used)
408     0x080000000000, // ShadowBase
409     0x1C0000000000, // OriginBase
410 };
411 
412 // aarch64 Linux
413 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
414   0,               // AndMask (not used)
415   0x06000000000,   // XorMask
416   0,               // ShadowBase (not used)
417   0x01000000000,   // OriginBase
418 };
419 
420 // aarch64 FreeBSD
421 static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = {
422   0x1800000000000,  // AndMask
423   0x0400000000000,  // XorMask
424   0x0200000000000,  // ShadowBase
425   0x0700000000000,  // OriginBase
426 };
427 
428 // i386 FreeBSD
429 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
430   0x000180000000,  // AndMask
431   0x000040000000,  // XorMask
432   0x000020000000,  // ShadowBase
433   0x000700000000,  // OriginBase
434 };
435 
436 // x86_64 FreeBSD
437 static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
438   0xc00000000000,  // AndMask
439   0x200000000000,  // XorMask
440   0x100000000000,  // ShadowBase
441   0x380000000000,  // OriginBase
442 };
443 
444 // x86_64 NetBSD
445 static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
446   0,               // AndMask
447   0x500000000000,  // XorMask
448   0,               // ShadowBase
449   0x100000000000,  // OriginBase
450 };
451 
452 static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
453   &Linux_I386_MemoryMapParams,
454   &Linux_X86_64_MemoryMapParams,
455 };
456 
457 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
458   nullptr,
459   &Linux_MIPS64_MemoryMapParams,
460 };
461 
462 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
463   nullptr,
464   &Linux_PowerPC64_MemoryMapParams,
465 };
466 
467 static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
468     nullptr,
469     &Linux_S390X_MemoryMapParams,
470 };
471 
472 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
473   nullptr,
474   &Linux_AArch64_MemoryMapParams,
475 };
476 
477 static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = {
478   nullptr,
479   &FreeBSD_AArch64_MemoryMapParams,
480 };
481 
482 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
483   &FreeBSD_I386_MemoryMapParams,
484   &FreeBSD_X86_64_MemoryMapParams,
485 };
486 
487 static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
488   nullptr,
489   &NetBSD_X86_64_MemoryMapParams,
490 };
491 
492 namespace {
493 
494 /// Instrument functions of a module to detect uninitialized reads.
495 ///
496 /// Instantiating MemorySanitizer inserts the msan runtime library API function
497 /// declarations into the module if they don't exist already. Instantiating
498 /// ensures the __msan_init function is in the list of global constructors for
499 /// the module.
500 class MemorySanitizer {
501 public:
502   MemorySanitizer(Module &M, MemorySanitizerOptions Options)
503       : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
504         Recover(Options.Recover), EagerChecks(Options.EagerChecks) {
505     initializeModule(M);
506   }
507 
508   // MSan cannot be moved or copied because of MapParams.
509   MemorySanitizer(MemorySanitizer &&) = delete;
510   MemorySanitizer &operator=(MemorySanitizer &&) = delete;
511   MemorySanitizer(const MemorySanitizer &) = delete;
512   MemorySanitizer &operator=(const MemorySanitizer &) = delete;
513 
514   bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
515 
516 private:
517   friend struct MemorySanitizerVisitor;
518   friend struct VarArgAMD64Helper;
519   friend struct VarArgMIPS64Helper;
520   friend struct VarArgAArch64Helper;
521   friend struct VarArgPowerPC64Helper;
522   friend struct VarArgSystemZHelper;
523 
524   void initializeModule(Module &M);
525   void initializeCallbacks(Module &M);
526   void createKernelApi(Module &M);
527   void createUserspaceApi(Module &M);
528 
529   /// True if we're compiling the Linux kernel.
530   bool CompileKernel;
531   /// Track origins (allocation points) of uninitialized values.
532   int TrackOrigins;
533   bool Recover;
534   bool EagerChecks;
535 
536   LLVMContext *C;
537   Type *IntptrTy;
538   Type *OriginTy;
539 
540   // XxxTLS variables represent the per-thread state in MSan and per-task state
541   // in KMSAN.
542   // For the userspace these point to thread-local globals. In the kernel land
543   // they point to the members of a per-task struct obtained via a call to
544   // __msan_get_context_state().
545 
546   /// Thread-local shadow storage for function parameters.
547   Value *ParamTLS;
548 
549   /// Thread-local origin storage for function parameters.
550   Value *ParamOriginTLS;
551 
552   /// Thread-local shadow storage for function return value.
553   Value *RetvalTLS;
554 
555   /// Thread-local origin storage for function return value.
556   Value *RetvalOriginTLS;
557 
558   /// Thread-local shadow storage for in-register va_arg function
559   /// parameters (x86_64-specific).
560   Value *VAArgTLS;
561 
562   /// Thread-local shadow storage for in-register va_arg function
563   /// parameters (x86_64-specific).
564   Value *VAArgOriginTLS;
565 
566   /// Thread-local shadow storage for va_arg overflow area
567   /// (x86_64-specific).
568   Value *VAArgOverflowSizeTLS;
569 
570   /// Are the instrumentation callbacks set up?
571   bool CallbacksInitialized = false;
572 
573   /// The run-time callback to print a warning.
574   FunctionCallee WarningFn;
575 
576   // These arrays are indexed by log2(AccessSize).
577   FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
578   FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
579 
580   /// Run-time helper that generates a new origin value for a stack
581   /// allocation.
582   FunctionCallee MsanSetAllocaOrigin4Fn;
583 
584   /// Run-time helper that poisons stack on function entry.
585   FunctionCallee MsanPoisonStackFn;
586 
587   /// Run-time helper that records a store (or any event) of an
588   /// uninitialized value and returns an updated origin id encoding this info.
589   FunctionCallee MsanChainOriginFn;
590 
591   /// Run-time helper that paints an origin over a region.
592   FunctionCallee MsanSetOriginFn;
593 
594   /// MSan runtime replacements for memmove, memcpy and memset.
595   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
596 
597   /// KMSAN callback for task-local function argument shadow.
598   StructType *MsanContextStateTy;
599   FunctionCallee MsanGetContextStateFn;
600 
601   /// Functions for poisoning/unpoisoning local variables
602   FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
603 
604   /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
605   /// pointers.
606   FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
607   FunctionCallee MsanMetadataPtrForLoad_1_8[4];
608   FunctionCallee MsanMetadataPtrForStore_1_8[4];
609   FunctionCallee MsanInstrumentAsmStoreFn;
610 
611   /// Helper to choose between different MsanMetadataPtrXxx().
612   FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
613 
614   /// Memory map parameters used in application-to-shadow calculation.
615   const MemoryMapParams *MapParams;
616 
617   /// Custom memory map parameters used when -msan-shadow-base or
618   // -msan-origin-base is provided.
619   MemoryMapParams CustomMapParams;
620 
621   MDNode *ColdCallWeights;
622 
623   /// Branch weights for origin store.
624   MDNode *OriginStoreWeights;
625 };
626 
627 void insertModuleCtor(Module &M) {
628   getOrCreateSanitizerCtorAndInitFunctions(
629       M, kMsanModuleCtorName, kMsanInitName,
630       /*InitArgTypes=*/{},
631       /*InitArgs=*/{},
632       // This callback is invoked when the functions are created the first
633       // time. Hook them into the global ctors list in that case:
634       [&](Function *Ctor, FunctionCallee) {
635         if (!ClWithComdat) {
636           appendToGlobalCtors(M, Ctor, 0);
637           return;
638         }
639         Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
640         Ctor->setComdat(MsanCtorComdat);
641         appendToGlobalCtors(M, Ctor, 0, Ctor);
642       });
643 }
644 
645 template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
646   return (Opt.getNumOccurrences() > 0) ? Opt : Default;
647 }
648 
649 } // end anonymous namespace
650 
651 MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K,
652                                                bool EagerChecks)
653     : Kernel(getOptOrDefault(ClEnableKmsan, K)),
654       TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
655       Recover(getOptOrDefault(ClKeepGoing, Kernel || R)),
656       EagerChecks(getOptOrDefault(ClEagerChecks, EagerChecks)) {}
657 
658 PreservedAnalyses MemorySanitizerPass::run(Function &F,
659                                            FunctionAnalysisManager &FAM) {
660   MemorySanitizer Msan(*F.getParent(), Options);
661   if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
662     return PreservedAnalyses::none();
663   return PreservedAnalyses::all();
664 }
665 
666 PreservedAnalyses
667 ModuleMemorySanitizerPass::run(Module &M, ModuleAnalysisManager &AM) {
668   if (Options.Kernel)
669     return PreservedAnalyses::all();
670   insertModuleCtor(M);
671   return PreservedAnalyses::none();
672 }
673 
674 void MemorySanitizerPass::printPipeline(
675     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
676   static_cast<PassInfoMixin<MemorySanitizerPass> *>(this)->printPipeline(
677       OS, MapClassName2PassName);
678   OS << "<";
679   if (Options.Recover)
680     OS << "recover;";
681   if (Options.Kernel)
682     OS << "kernel;";
683   if (Options.EagerChecks)
684     OS << "eager-checks;";
685   OS << "track-origins=" << Options.TrackOrigins;
686   OS << ">";
687 }
688 
689 /// Create a non-const global initialized with the given string.
690 ///
691 /// Creates a writable global for Str so that we can pass it to the
692 /// run-time lib. Runtime uses first 4 bytes of the string to store the
693 /// frame ID, so the string needs to be mutable.
694 static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
695                                                             StringRef Str) {
696   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
697   return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
698                             GlobalValue::PrivateLinkage, StrConst, "");
699 }
700 
701 /// Create KMSAN API callbacks.
702 void MemorySanitizer::createKernelApi(Module &M) {
703   IRBuilder<> IRB(*C);
704 
705   // These will be initialized in insertKmsanPrologue().
706   RetvalTLS = nullptr;
707   RetvalOriginTLS = nullptr;
708   ParamTLS = nullptr;
709   ParamOriginTLS = nullptr;
710   VAArgTLS = nullptr;
711   VAArgOriginTLS = nullptr;
712   VAArgOverflowSizeTLS = nullptr;
713 
714   WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(),
715                                     IRB.getInt32Ty());
716   // Requests the per-task context state (kmsan_context_state*) from the
717   // runtime library.
718   MsanContextStateTy = StructType::get(
719       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
720       ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
721       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
722       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
723       IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
724       OriginTy);
725   MsanGetContextStateFn = M.getOrInsertFunction(
726       "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
727 
728   Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
729                                 PointerType::get(IRB.getInt32Ty(), 0));
730 
731   for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
732     std::string name_load =
733         "__msan_metadata_ptr_for_load_" + std::to_string(size);
734     std::string name_store =
735         "__msan_metadata_ptr_for_store_" + std::to_string(size);
736     MsanMetadataPtrForLoad_1_8[ind] = M.getOrInsertFunction(
737         name_load, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
738     MsanMetadataPtrForStore_1_8[ind] = M.getOrInsertFunction(
739         name_store, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
740   }
741 
742   MsanMetadataPtrForLoadN = M.getOrInsertFunction(
743       "__msan_metadata_ptr_for_load_n", RetTy,
744       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
745   MsanMetadataPtrForStoreN = M.getOrInsertFunction(
746       "__msan_metadata_ptr_for_store_n", RetTy,
747       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
748 
749   // Functions for poisoning and unpoisoning memory.
750   MsanPoisonAllocaFn =
751       M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
752                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
753   MsanUnpoisonAllocaFn = M.getOrInsertFunction(
754       "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
755 }
756 
757 static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
758   return M.getOrInsertGlobal(Name, Ty, [&] {
759     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
760                               nullptr, Name, nullptr,
761                               GlobalVariable::InitialExecTLSModel);
762   });
763 }
764 
765 /// Insert declarations for userspace-specific functions and globals.
766 void MemorySanitizer::createUserspaceApi(Module &M) {
767   IRBuilder<> IRB(*C);
768 
769   // Create the callback.
770   // FIXME: this function should have "Cold" calling conv,
771   // which is not yet implemented.
772   StringRef WarningFnName = Recover ? "__msan_warning_with_origin"
773                                     : "__msan_warning_with_origin_noreturn";
774   WarningFn =
775       M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), IRB.getInt32Ty());
776 
777   // Create the global TLS variables.
778   RetvalTLS =
779       getOrInsertGlobal(M, "__msan_retval_tls",
780                         ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
781 
782   RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
783 
784   ParamTLS =
785       getOrInsertGlobal(M, "__msan_param_tls",
786                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
787 
788   ParamOriginTLS =
789       getOrInsertGlobal(M, "__msan_param_origin_tls",
790                         ArrayType::get(OriginTy, kParamTLSSize / 4));
791 
792   VAArgTLS =
793       getOrInsertGlobal(M, "__msan_va_arg_tls",
794                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
795 
796   VAArgOriginTLS =
797       getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
798                         ArrayType::get(OriginTy, kParamTLSSize / 4));
799 
800   VAArgOverflowSizeTLS =
801       getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
802 
803   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
804        AccessSizeIndex++) {
805     unsigned AccessSize = 1 << AccessSizeIndex;
806     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
807     SmallVector<std::pair<unsigned, Attribute>, 2> MaybeWarningFnAttrs;
808     MaybeWarningFnAttrs.push_back(std::make_pair(
809         AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
810     MaybeWarningFnAttrs.push_back(std::make_pair(
811         AttributeList::FirstArgIndex + 1, Attribute::get(*C, Attribute::ZExt)));
812     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
813         FunctionName, AttributeList::get(*C, MaybeWarningFnAttrs),
814         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
815 
816     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
817     SmallVector<std::pair<unsigned, Attribute>, 2> MaybeStoreOriginFnAttrs;
818     MaybeStoreOriginFnAttrs.push_back(std::make_pair(
819         AttributeList::FirstArgIndex, Attribute::get(*C, Attribute::ZExt)));
820     MaybeStoreOriginFnAttrs.push_back(std::make_pair(
821         AttributeList::FirstArgIndex + 2, Attribute::get(*C, Attribute::ZExt)));
822     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
823         FunctionName, AttributeList::get(*C, MaybeStoreOriginFnAttrs),
824         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt8PtrTy(),
825         IRB.getInt32Ty());
826   }
827 
828   MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
829     "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
830     IRB.getInt8PtrTy(), IntptrTy);
831   MsanPoisonStackFn =
832       M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
833                             IRB.getInt8PtrTy(), IntptrTy);
834 }
835 
836 /// Insert extern declaration of runtime-provided functions and globals.
837 void MemorySanitizer::initializeCallbacks(Module &M) {
838   // Only do this once.
839   if (CallbacksInitialized)
840     return;
841 
842   IRBuilder<> IRB(*C);
843   // Initialize callbacks that are common for kernel and userspace
844   // instrumentation.
845   MsanChainOriginFn = M.getOrInsertFunction(
846     "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
847   MsanSetOriginFn =
848       M.getOrInsertFunction("__msan_set_origin", IRB.getVoidTy(),
849                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt32Ty());
850   MemmoveFn = M.getOrInsertFunction(
851     "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
852     IRB.getInt8PtrTy(), IntptrTy);
853   MemcpyFn = M.getOrInsertFunction(
854     "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
855     IntptrTy);
856   MemsetFn = M.getOrInsertFunction(
857     "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
858     IntptrTy);
859 
860   MsanInstrumentAsmStoreFn =
861       M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
862                             PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
863 
864   if (CompileKernel) {
865     createKernelApi(M);
866   } else {
867     createUserspaceApi(M);
868   }
869   CallbacksInitialized = true;
870 }
871 
872 FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
873                                                              int size) {
874   FunctionCallee *Fns =
875       isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
876   switch (size) {
877   case 1:
878     return Fns[0];
879   case 2:
880     return Fns[1];
881   case 4:
882     return Fns[2];
883   case 8:
884     return Fns[3];
885   default:
886     return nullptr;
887   }
888 }
889 
890 /// Module-level initialization.
891 ///
892 /// inserts a call to __msan_init to the module's constructor list.
893 void MemorySanitizer::initializeModule(Module &M) {
894   auto &DL = M.getDataLayout();
895 
896   bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
897   bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
898   // Check the overrides first
899   if (ShadowPassed || OriginPassed) {
900     CustomMapParams.AndMask = ClAndMask;
901     CustomMapParams.XorMask = ClXorMask;
902     CustomMapParams.ShadowBase = ClShadowBase;
903     CustomMapParams.OriginBase = ClOriginBase;
904     MapParams = &CustomMapParams;
905   } else {
906     Triple TargetTriple(M.getTargetTriple());
907     switch (TargetTriple.getOS()) {
908       case Triple::FreeBSD:
909         switch (TargetTriple.getArch()) {
910           case Triple::aarch64:
911             MapParams = FreeBSD_ARM_MemoryMapParams.bits64;
912             break;
913           case Triple::x86_64:
914             MapParams = FreeBSD_X86_MemoryMapParams.bits64;
915             break;
916           case Triple::x86:
917             MapParams = FreeBSD_X86_MemoryMapParams.bits32;
918             break;
919           default:
920             report_fatal_error("unsupported architecture");
921         }
922         break;
923       case Triple::NetBSD:
924         switch (TargetTriple.getArch()) {
925           case Triple::x86_64:
926             MapParams = NetBSD_X86_MemoryMapParams.bits64;
927             break;
928           default:
929             report_fatal_error("unsupported architecture");
930         }
931         break;
932       case Triple::Linux:
933         switch (TargetTriple.getArch()) {
934           case Triple::x86_64:
935             MapParams = Linux_X86_MemoryMapParams.bits64;
936             break;
937           case Triple::x86:
938             MapParams = Linux_X86_MemoryMapParams.bits32;
939             break;
940           case Triple::mips64:
941           case Triple::mips64el:
942             MapParams = Linux_MIPS_MemoryMapParams.bits64;
943             break;
944           case Triple::ppc64:
945           case Triple::ppc64le:
946             MapParams = Linux_PowerPC_MemoryMapParams.bits64;
947             break;
948           case Triple::systemz:
949             MapParams = Linux_S390_MemoryMapParams.bits64;
950             break;
951           case Triple::aarch64:
952           case Triple::aarch64_be:
953             MapParams = Linux_ARM_MemoryMapParams.bits64;
954             break;
955           default:
956             report_fatal_error("unsupported architecture");
957         }
958         break;
959       default:
960         report_fatal_error("unsupported operating system");
961     }
962   }
963 
964   C = &(M.getContext());
965   IRBuilder<> IRB(*C);
966   IntptrTy = IRB.getIntPtrTy(DL);
967   OriginTy = IRB.getInt32Ty();
968 
969   ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
970   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
971 
972   if (!CompileKernel) {
973     if (TrackOrigins)
974       M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
975         return new GlobalVariable(
976             M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
977             IRB.getInt32(TrackOrigins), "__msan_track_origins");
978       });
979 
980     if (Recover)
981       M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
982         return new GlobalVariable(M, IRB.getInt32Ty(), true,
983                                   GlobalValue::WeakODRLinkage,
984                                   IRB.getInt32(Recover), "__msan_keep_going");
985       });
986 }
987 }
988 
989 namespace {
990 
991 /// A helper class that handles instrumentation of VarArg
992 /// functions on a particular platform.
993 ///
994 /// Implementations are expected to insert the instrumentation
995 /// necessary to propagate argument shadow through VarArg function
996 /// calls. Visit* methods are called during an InstVisitor pass over
997 /// the function, and should avoid creating new basic blocks. A new
998 /// instance of this class is created for each instrumented function.
999 struct VarArgHelper {
1000   virtual ~VarArgHelper() = default;
1001 
1002   /// Visit a CallBase.
1003   virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0;
1004 
1005   /// Visit a va_start call.
1006   virtual void visitVAStartInst(VAStartInst &I) = 0;
1007 
1008   /// Visit a va_copy call.
1009   virtual void visitVACopyInst(VACopyInst &I) = 0;
1010 
1011   /// Finalize function instrumentation.
1012   ///
1013   /// This method is called after visiting all interesting (see above)
1014   /// instructions in a function.
1015   virtual void finalizeInstrumentation() = 0;
1016 };
1017 
1018 struct MemorySanitizerVisitor;
1019 
1020 } // end anonymous namespace
1021 
1022 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1023                                         MemorySanitizerVisitor &Visitor);
1024 
1025 static unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
1026   if (TypeSize <= 8) return 0;
1027   return Log2_32_Ceil((TypeSize + 7) / 8);
1028 }
1029 
1030 namespace {
1031 
1032 /// This class does all the work for a given function. Store and Load
1033 /// instructions store and load corresponding shadow and origin
1034 /// values. Most instructions propagate shadow from arguments to their
1035 /// return values. Certain instructions (most importantly, BranchInst)
1036 /// test their argument shadow and print reports (with a runtime call) if it's
1037 /// non-zero.
1038 struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
1039   Function &F;
1040   MemorySanitizer &MS;
1041   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
1042   ValueMap<Value*, Value*> ShadowMap, OriginMap;
1043   std::unique_ptr<VarArgHelper> VAHelper;
1044   const TargetLibraryInfo *TLI;
1045   Instruction *FnPrologueEnd;
1046 
1047   // The following flags disable parts of MSan instrumentation based on
1048   // exclusion list contents and command-line options.
1049   bool InsertChecks;
1050   bool PropagateShadow;
1051   bool PoisonStack;
1052   bool PoisonUndef;
1053 
1054   struct ShadowOriginAndInsertPoint {
1055     Value *Shadow;
1056     Value *Origin;
1057     Instruction *OrigIns;
1058 
1059     ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
1060       : Shadow(S), Origin(O), OrigIns(I) {}
1061   };
1062   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
1063   bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
1064   SmallSet<AllocaInst *, 16> AllocaSet;
1065   SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
1066   SmallVector<StoreInst *, 16> StoreList;
1067 
1068   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
1069                          const TargetLibraryInfo &TLI)
1070       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
1071     bool SanitizeFunction =
1072         F.hasFnAttribute(Attribute::SanitizeMemory) && !ClDisableChecks;
1073     InsertChecks = SanitizeFunction;
1074     PropagateShadow = SanitizeFunction;
1075     PoisonStack = SanitizeFunction && ClPoisonStack;
1076     PoisonUndef = SanitizeFunction && ClPoisonUndef;
1077 
1078     // In the presence of unreachable blocks, we may see Phi nodes with
1079     // incoming nodes from such blocks. Since InstVisitor skips unreachable
1080     // blocks, such nodes will not have any shadow value associated with them.
1081     // It's easier to remove unreachable blocks than deal with missing shadow.
1082     removeUnreachableBlocks(F);
1083 
1084     MS.initializeCallbacks(*F.getParent());
1085     FnPrologueEnd = IRBuilder<>(F.getEntryBlock().getFirstNonPHI())
1086                         .CreateIntrinsic(Intrinsic::donothing, {}, {});
1087 
1088     if (MS.CompileKernel) {
1089       IRBuilder<> IRB(FnPrologueEnd);
1090       insertKmsanPrologue(IRB);
1091     }
1092 
1093     LLVM_DEBUG(if (!InsertChecks) dbgs()
1094                << "MemorySanitizer is not inserting checks into '"
1095                << F.getName() << "'\n");
1096   }
1097 
1098   bool isInPrologue(Instruction &I) {
1099     return I.getParent() == FnPrologueEnd->getParent() &&
1100            (&I == FnPrologueEnd || I.comesBefore(FnPrologueEnd));
1101   }
1102 
1103   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
1104     if (MS.TrackOrigins <= 1) return V;
1105     return IRB.CreateCall(MS.MsanChainOriginFn, V);
1106   }
1107 
1108   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
1109     const DataLayout &DL = F.getParent()->getDataLayout();
1110     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1111     if (IntptrSize == kOriginSize) return Origin;
1112     assert(IntptrSize == kOriginSize * 2);
1113     Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
1114     return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
1115   }
1116 
1117   /// Fill memory range with the given origin value.
1118   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
1119                    unsigned Size, Align Alignment) {
1120     const DataLayout &DL = F.getParent()->getDataLayout();
1121     const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy);
1122     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1123     assert(IntptrAlignment >= kMinOriginAlignment);
1124     assert(IntptrSize >= kOriginSize);
1125 
1126     unsigned Ofs = 0;
1127     Align CurrentAlignment = Alignment;
1128     if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
1129       Value *IntptrOrigin = originToIntptr(IRB, Origin);
1130       Value *IntptrOriginPtr =
1131           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
1132       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
1133         Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
1134                        : IntptrOriginPtr;
1135         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
1136         Ofs += IntptrSize / kOriginSize;
1137         CurrentAlignment = IntptrAlignment;
1138       }
1139     }
1140 
1141     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
1142       Value *GEP =
1143           i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
1144       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
1145       CurrentAlignment = kMinOriginAlignment;
1146     }
1147   }
1148 
1149   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
1150                    Value *OriginPtr, Align Alignment, bool AsCall) {
1151     const DataLayout &DL = F.getParent()->getDataLayout();
1152     const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1153     unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
1154     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
1155     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1156       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
1157         paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1158                     OriginAlignment);
1159       return;
1160     }
1161 
1162     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1163     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1164     if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1165       FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
1166       Value *ConvertedShadow2 =
1167           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1168       CallBase *CB = IRB.CreateCall(
1169           Fn, {ConvertedShadow2,
1170                IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), Origin});
1171       CB->addParamAttr(0, Attribute::ZExt);
1172       CB->addParamAttr(2, Attribute::ZExt);
1173     } else {
1174       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1175       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1176           Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
1177       IRBuilder<> IRBNew(CheckTerm);
1178       paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
1179                   OriginAlignment);
1180     }
1181   }
1182 
1183   void materializeStores(bool InstrumentWithCalls) {
1184     for (StoreInst *SI : StoreList) {
1185       IRBuilder<> IRB(SI);
1186       Value *Val = SI->getValueOperand();
1187       Value *Addr = SI->getPointerOperand();
1188       Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
1189       Value *ShadowPtr, *OriginPtr;
1190       Type *ShadowTy = Shadow->getType();
1191       const Align Alignment = SI->getAlign();
1192       const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1193       std::tie(ShadowPtr, OriginPtr) =
1194           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
1195 
1196       StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
1197       LLVM_DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
1198       (void)NewSI;
1199 
1200       if (SI->isAtomic())
1201         SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
1202 
1203       if (MS.TrackOrigins && !SI->isAtomic())
1204         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
1205                     OriginAlignment, InstrumentWithCalls);
1206     }
1207   }
1208 
1209   /// Helper function to insert a warning at IRB's current insert point.
1210   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
1211     if (!Origin)
1212       Origin = (Value *)IRB.getInt32(0);
1213     assert(Origin->getType()->isIntegerTy());
1214     IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge();
1215     // FIXME: Insert UnreachableInst if !MS.Recover?
1216     // This may invalidate some of the following checks and needs to be done
1217     // at the very end.
1218   }
1219 
1220   void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin,
1221                            bool AsCall) {
1222     IRBuilder<> IRB(OrigIns);
1223     LLVM_DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
1224     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
1225     LLVM_DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
1226 
1227     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1228       if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
1229         insertWarningFn(IRB, Origin);
1230       }
1231       return;
1232     }
1233 
1234     const DataLayout &DL = OrigIns->getModule()->getDataLayout();
1235 
1236     unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1237     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1238     if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1239       FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
1240       Value *ConvertedShadow2 =
1241           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1242       CallBase *CB = IRB.CreateCall(
1243           Fn, {ConvertedShadow2,
1244                MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
1245       CB->addParamAttr(0, Attribute::ZExt);
1246       CB->addParamAttr(1, Attribute::ZExt);
1247     } else {
1248       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1249       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1250           Cmp, OrigIns,
1251           /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
1252 
1253       IRB.SetInsertPoint(CheckTerm);
1254       insertWarningFn(IRB, Origin);
1255       LLVM_DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
1256     }
1257   }
1258 
1259   void materializeChecks(bool InstrumentWithCalls) {
1260     for (const auto &ShadowData : InstrumentationList) {
1261       Instruction *OrigIns = ShadowData.OrigIns;
1262       Value *Shadow = ShadowData.Shadow;
1263       Value *Origin = ShadowData.Origin;
1264       materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
1265     }
1266     LLVM_DEBUG(dbgs() << "DONE:\n" << F);
1267   }
1268 
1269   // Returns the last instruction in the new prologue
1270   void insertKmsanPrologue(IRBuilder<> &IRB) {
1271     Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
1272     Constant *Zero = IRB.getInt32(0);
1273     MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1274                                 {Zero, IRB.getInt32(0)}, "param_shadow");
1275     MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1276                                  {Zero, IRB.getInt32(1)}, "retval_shadow");
1277     MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1278                                 {Zero, IRB.getInt32(2)}, "va_arg_shadow");
1279     MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1280                                       {Zero, IRB.getInt32(3)}, "va_arg_origin");
1281     MS.VAArgOverflowSizeTLS =
1282         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1283                       {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
1284     MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1285                                       {Zero, IRB.getInt32(5)}, "param_origin");
1286     MS.RetvalOriginTLS =
1287         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1288                       {Zero, IRB.getInt32(6)}, "retval_origin");
1289   }
1290 
1291   /// Add MemorySanitizer instrumentation to a function.
1292   bool runOnFunction() {
1293     // Iterate all BBs in depth-first order and create shadow instructions
1294     // for all instructions (where applicable).
1295     // For PHI nodes we create dummy shadow PHIs which will be finalized later.
1296     for (BasicBlock *BB : depth_first(FnPrologueEnd->getParent()))
1297       visit(*BB);
1298 
1299     // Finalize PHI nodes.
1300     for (PHINode *PN : ShadowPHINodes) {
1301       PHINode *PNS = cast<PHINode>(getShadow(PN));
1302       PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
1303       size_t NumValues = PN->getNumIncomingValues();
1304       for (size_t v = 0; v < NumValues; v++) {
1305         PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
1306         if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
1307       }
1308     }
1309 
1310     VAHelper->finalizeInstrumentation();
1311 
1312     // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
1313     // instrumenting only allocas.
1314     if (InstrumentLifetimeStart) {
1315       for (auto Item : LifetimeStartList) {
1316         instrumentAlloca(*Item.second, Item.first);
1317         AllocaSet.erase(Item.second);
1318       }
1319     }
1320     // Poison the allocas for which we didn't instrument the corresponding
1321     // lifetime intrinsics.
1322     for (AllocaInst *AI : AllocaSet)
1323       instrumentAlloca(*AI);
1324 
1325     bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
1326                                InstrumentationList.size() + StoreList.size() >
1327                                    (unsigned)ClInstrumentationWithCallThreshold;
1328 
1329     // Insert shadow value checks.
1330     materializeChecks(InstrumentWithCalls);
1331 
1332     // Delayed instrumentation of StoreInst.
1333     // This may not add new address checks.
1334     materializeStores(InstrumentWithCalls);
1335 
1336     return true;
1337   }
1338 
1339   /// Compute the shadow type that corresponds to a given Value.
1340   Type *getShadowTy(Value *V) {
1341     return getShadowTy(V->getType());
1342   }
1343 
1344   /// Compute the shadow type that corresponds to a given Type.
1345   Type *getShadowTy(Type *OrigTy) {
1346     if (!OrigTy->isSized()) {
1347       return nullptr;
1348     }
1349     // For integer type, shadow is the same as the original type.
1350     // This may return weird-sized types like i1.
1351     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
1352       return IT;
1353     const DataLayout &DL = F.getParent()->getDataLayout();
1354     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
1355       uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
1356       return FixedVectorType::get(IntegerType::get(*MS.C, EltSize),
1357                                   cast<FixedVectorType>(VT)->getNumElements());
1358     }
1359     if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
1360       return ArrayType::get(getShadowTy(AT->getElementType()),
1361                             AT->getNumElements());
1362     }
1363     if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1364       SmallVector<Type*, 4> Elements;
1365       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1366         Elements.push_back(getShadowTy(ST->getElementType(i)));
1367       StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
1368       LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1369       return Res;
1370     }
1371     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
1372     return IntegerType::get(*MS.C, TypeSize);
1373   }
1374 
1375   /// Flatten a vector type.
1376   Type *getShadowTyNoVec(Type *ty) {
1377     if (VectorType *vt = dyn_cast<VectorType>(ty))
1378       return IntegerType::get(*MS.C,
1379                               vt->getPrimitiveSizeInBits().getFixedSize());
1380     return ty;
1381   }
1382 
1383   /// Extract combined shadow of struct elements as a bool
1384   Value *collapseStructShadow(StructType *Struct, Value *Shadow,
1385                               IRBuilder<> &IRB) {
1386     Value *FalseVal = IRB.getIntN(/* width */ 1, /* value */ 0);
1387     Value *Aggregator = FalseVal;
1388 
1389     for (unsigned Idx = 0; Idx < Struct->getNumElements(); Idx++) {
1390       // Combine by ORing together each element's bool shadow
1391       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1392       Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
1393       Value *ShadowBool = convertToBool(ShadowInner, IRB);
1394 
1395       if (Aggregator != FalseVal)
1396         Aggregator = IRB.CreateOr(Aggregator, ShadowBool);
1397       else
1398         Aggregator = ShadowBool;
1399     }
1400 
1401     return Aggregator;
1402   }
1403 
1404   // Extract combined shadow of array elements
1405   Value *collapseArrayShadow(ArrayType *Array, Value *Shadow,
1406                              IRBuilder<> &IRB) {
1407     if (!Array->getNumElements())
1408       return IRB.getIntN(/* width */ 1, /* value */ 0);
1409 
1410     Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
1411     Value *Aggregator = convertShadowToScalar(FirstItem, IRB);
1412 
1413     for (unsigned Idx = 1; Idx < Array->getNumElements(); Idx++) {
1414       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1415       Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
1416       Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
1417     }
1418     return Aggregator;
1419   }
1420 
1421   /// Convert a shadow value to it's flattened variant. The resulting
1422   /// shadow may not necessarily have the same bit width as the input
1423   /// value, but it will always be comparable to zero.
1424   Value *convertShadowToScalar(Value *V, IRBuilder<> &IRB) {
1425     if (StructType *Struct = dyn_cast<StructType>(V->getType()))
1426       return collapseStructShadow(Struct, V, IRB);
1427     if (ArrayType *Array = dyn_cast<ArrayType>(V->getType()))
1428       return collapseArrayShadow(Array, V, IRB);
1429     Type *Ty = V->getType();
1430     Type *NoVecTy = getShadowTyNoVec(Ty);
1431     if (Ty == NoVecTy) return V;
1432     return IRB.CreateBitCast(V, NoVecTy);
1433   }
1434 
1435   // Convert a scalar value to an i1 by comparing with 0
1436   Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &name = "") {
1437     Type *VTy = V->getType();
1438     assert(VTy->isIntegerTy());
1439     if (VTy->getIntegerBitWidth() == 1)
1440       // Just converting a bool to a bool, so do nothing.
1441       return V;
1442     return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), name);
1443   }
1444 
1445   /// Compute the integer shadow offset that corresponds to a given
1446   /// application address.
1447   ///
1448   /// Offset = (Addr & ~AndMask) ^ XorMask
1449   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1450     Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
1451 
1452     uint64_t AndMask = MS.MapParams->AndMask;
1453     if (AndMask)
1454       OffsetLong =
1455           IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
1456 
1457     uint64_t XorMask = MS.MapParams->XorMask;
1458     if (XorMask)
1459       OffsetLong =
1460           IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
1461     return OffsetLong;
1462   }
1463 
1464   /// Compute the shadow and origin addresses corresponding to a given
1465   /// application address.
1466   ///
1467   /// Shadow = ShadowBase + Offset
1468   /// Origin = (OriginBase + Offset) & ~3ULL
1469   std::pair<Value *, Value *>
1470   getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
1471                               MaybeAlign Alignment) {
1472     Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1473     Value *ShadowLong = ShadowOffset;
1474     uint64_t ShadowBase = MS.MapParams->ShadowBase;
1475     if (ShadowBase != 0) {
1476       ShadowLong =
1477         IRB.CreateAdd(ShadowLong,
1478                       ConstantInt::get(MS.IntptrTy, ShadowBase));
1479     }
1480     Value *ShadowPtr =
1481         IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1482     Value *OriginPtr = nullptr;
1483     if (MS.TrackOrigins) {
1484       Value *OriginLong = ShadowOffset;
1485       uint64_t OriginBase = MS.MapParams->OriginBase;
1486       if (OriginBase != 0)
1487         OriginLong = IRB.CreateAdd(OriginLong,
1488                                    ConstantInt::get(MS.IntptrTy, OriginBase));
1489       if (!Alignment || *Alignment < kMinOriginAlignment) {
1490         uint64_t Mask = kMinOriginAlignment.value() - 1;
1491         OriginLong =
1492             IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
1493       }
1494       OriginPtr =
1495           IRB.CreateIntToPtr(OriginLong, PointerType::get(MS.OriginTy, 0));
1496     }
1497     return std::make_pair(ShadowPtr, OriginPtr);
1498   }
1499 
1500   std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
1501                                                        IRBuilder<> &IRB,
1502                                                        Type *ShadowTy,
1503                                                        bool isStore) {
1504     Value *ShadowOriginPtrs;
1505     const DataLayout &DL = F.getParent()->getDataLayout();
1506     int Size = DL.getTypeStoreSize(ShadowTy);
1507 
1508     FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
1509     Value *AddrCast =
1510         IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
1511     if (Getter) {
1512       ShadowOriginPtrs = IRB.CreateCall(Getter, AddrCast);
1513     } else {
1514       Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
1515       ShadowOriginPtrs = IRB.CreateCall(isStore ? MS.MsanMetadataPtrForStoreN
1516                                                 : MS.MsanMetadataPtrForLoadN,
1517                                         {AddrCast, SizeVal});
1518     }
1519     Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
1520     ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
1521     Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
1522 
1523     return std::make_pair(ShadowPtr, OriginPtr);
1524   }
1525 
1526   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1527                                                  Type *ShadowTy,
1528                                                  MaybeAlign Alignment,
1529                                                  bool isStore) {
1530     if (MS.CompileKernel)
1531       return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
1532     return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
1533   }
1534 
1535   /// Compute the shadow address for a given function argument.
1536   ///
1537   /// Shadow = ParamTLS+ArgOffset.
1538   Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
1539                                  int ArgOffset) {
1540     Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
1541     if (ArgOffset)
1542       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1543     return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
1544                               "_msarg");
1545   }
1546 
1547   /// Compute the origin address for a given function argument.
1548   Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
1549                                  int ArgOffset) {
1550     if (!MS.TrackOrigins)
1551       return nullptr;
1552     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
1553     if (ArgOffset)
1554       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1555     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
1556                               "_msarg_o");
1557   }
1558 
1559   /// Compute the shadow address for a retval.
1560   Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
1561     return IRB.CreatePointerCast(MS.RetvalTLS,
1562                                  PointerType::get(getShadowTy(A), 0),
1563                                  "_msret");
1564   }
1565 
1566   /// Compute the origin address for a retval.
1567   Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
1568     // We keep a single origin for the entire retval. Might be too optimistic.
1569     return MS.RetvalOriginTLS;
1570   }
1571 
1572   /// Set SV to be the shadow value for V.
1573   void setShadow(Value *V, Value *SV) {
1574     assert(!ShadowMap.count(V) && "Values may only have one shadow");
1575     ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
1576   }
1577 
1578   /// Set Origin to be the origin value for V.
1579   void setOrigin(Value *V, Value *Origin) {
1580     if (!MS.TrackOrigins) return;
1581     assert(!OriginMap.count(V) && "Values may only have one origin");
1582     LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
1583     OriginMap[V] = Origin;
1584   }
1585 
1586   Constant *getCleanShadow(Type *OrigTy) {
1587     Type *ShadowTy = getShadowTy(OrigTy);
1588     if (!ShadowTy)
1589       return nullptr;
1590     return Constant::getNullValue(ShadowTy);
1591   }
1592 
1593   /// Create a clean shadow value for a given value.
1594   ///
1595   /// Clean shadow (all zeroes) means all bits of the value are defined
1596   /// (initialized).
1597   Constant *getCleanShadow(Value *V) {
1598     return getCleanShadow(V->getType());
1599   }
1600 
1601   /// Create a dirty shadow of a given shadow type.
1602   Constant *getPoisonedShadow(Type *ShadowTy) {
1603     assert(ShadowTy);
1604     if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
1605       return Constant::getAllOnesValue(ShadowTy);
1606     if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
1607       SmallVector<Constant *, 4> Vals(AT->getNumElements(),
1608                                       getPoisonedShadow(AT->getElementType()));
1609       return ConstantArray::get(AT, Vals);
1610     }
1611     if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
1612       SmallVector<Constant *, 4> Vals;
1613       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1614         Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
1615       return ConstantStruct::get(ST, Vals);
1616     }
1617     llvm_unreachable("Unexpected shadow type");
1618   }
1619 
1620   /// Create a dirty shadow for a given value.
1621   Constant *getPoisonedShadow(Value *V) {
1622     Type *ShadowTy = getShadowTy(V);
1623     if (!ShadowTy)
1624       return nullptr;
1625     return getPoisonedShadow(ShadowTy);
1626   }
1627 
1628   /// Create a clean (zero) origin.
1629   Value *getCleanOrigin() {
1630     return Constant::getNullValue(MS.OriginTy);
1631   }
1632 
1633   /// Get the shadow value for a given Value.
1634   ///
1635   /// This function either returns the value set earlier with setShadow,
1636   /// or extracts if from ParamTLS (for function arguments).
1637   Value *getShadow(Value *V) {
1638     if (Instruction *I = dyn_cast<Instruction>(V)) {
1639       if (!PropagateShadow || I->getMetadata(LLVMContext::MD_nosanitize))
1640         return getCleanShadow(V);
1641       // For instructions the shadow is already stored in the map.
1642       Value *Shadow = ShadowMap[V];
1643       if (!Shadow) {
1644         LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
1645         (void)I;
1646         assert(Shadow && "No shadow for a value");
1647       }
1648       return Shadow;
1649     }
1650     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
1651       Value *AllOnes = (PropagateShadow && PoisonUndef) ? getPoisonedShadow(V)
1652                                                         : getCleanShadow(V);
1653       LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
1654       (void)U;
1655       return AllOnes;
1656     }
1657     if (Argument *A = dyn_cast<Argument>(V)) {
1658       // For arguments we compute the shadow on demand and store it in the map.
1659       Value *&ShadowPtr = ShadowMap[V];
1660       if (ShadowPtr)
1661         return ShadowPtr;
1662       Function *F = A->getParent();
1663       IRBuilder<> EntryIRB(FnPrologueEnd);
1664       unsigned ArgOffset = 0;
1665       const DataLayout &DL = F->getParent()->getDataLayout();
1666       for (auto &FArg : F->args()) {
1667         if (!FArg.getType()->isSized()) {
1668           LLVM_DEBUG(dbgs() << "Arg is not sized\n");
1669           continue;
1670         }
1671 
1672         unsigned Size = FArg.hasByValAttr()
1673                             ? DL.getTypeAllocSize(FArg.getParamByValType())
1674                             : DL.getTypeAllocSize(FArg.getType());
1675 
1676         if (A == &FArg) {
1677           bool Overflow = ArgOffset + Size > kParamTLSSize;
1678           if (FArg.hasByValAttr()) {
1679             // ByVal pointer itself has clean shadow. We copy the actual
1680             // argument shadow to the underlying memory.
1681             // Figure out maximal valid memcpy alignment.
1682             const Align ArgAlign = DL.getValueOrABITypeAlignment(
1683                 MaybeAlign(FArg.getParamAlignment()), FArg.getParamByValType());
1684             Value *CpShadowPtr, *CpOriginPtr;
1685             std::tie(CpShadowPtr, CpOriginPtr) =
1686                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
1687                                    /*isStore*/ true);
1688             if (!PropagateShadow || Overflow) {
1689               // ParamTLS overflow.
1690               EntryIRB.CreateMemSet(
1691                   CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
1692                   Size, ArgAlign);
1693             } else {
1694               Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1695               const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
1696               Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
1697                                                  CopyAlign, Size);
1698               LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
1699               (void)Cpy;
1700 
1701               if (MS.TrackOrigins) {
1702                 Value *OriginPtr =
1703                     getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
1704                 // FIXME: OriginSize should be:
1705                 // alignTo(V % kMinOriginAlignment + Size, kMinOriginAlignment)
1706                 unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
1707                 EntryIRB.CreateMemCpy(
1708                     CpOriginPtr,
1709                     /* by getShadowOriginPtr */ kMinOriginAlignment, OriginPtr,
1710                     /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
1711                     OriginSize);
1712               }
1713             }
1714           }
1715 
1716           if (!PropagateShadow || Overflow || FArg.hasByValAttr() ||
1717               (MS.EagerChecks && FArg.hasAttribute(Attribute::NoUndef))) {
1718             ShadowPtr = getCleanShadow(V);
1719             setOrigin(A, getCleanOrigin());
1720           } else {
1721             // Shadow over TLS
1722             Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1723             ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
1724                                                     kShadowTLSAlignment);
1725             if (MS.TrackOrigins) {
1726               Value *OriginPtr =
1727                   getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
1728               setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
1729             }
1730           }
1731           LLVM_DEBUG(dbgs()
1732                      << "  ARG:    " << FArg << " ==> " << *ShadowPtr << "\n");
1733           break;
1734         }
1735 
1736         ArgOffset += alignTo(Size, kShadowTLSAlignment);
1737       }
1738       assert(ShadowPtr && "Could not find shadow for an argument");
1739       return ShadowPtr;
1740     }
1741     // For everything else the shadow is zero.
1742     return getCleanShadow(V);
1743   }
1744 
1745   /// Get the shadow for i-th argument of the instruction I.
1746   Value *getShadow(Instruction *I, int i) {
1747     return getShadow(I->getOperand(i));
1748   }
1749 
1750   /// Get the origin for a value.
1751   Value *getOrigin(Value *V) {
1752     if (!MS.TrackOrigins) return nullptr;
1753     if (!PropagateShadow) return getCleanOrigin();
1754     if (isa<Constant>(V)) return getCleanOrigin();
1755     assert((isa<Instruction>(V) || isa<Argument>(V)) &&
1756            "Unexpected value type in getOrigin()");
1757     if (Instruction *I = dyn_cast<Instruction>(V)) {
1758       if (I->getMetadata(LLVMContext::MD_nosanitize))
1759         return getCleanOrigin();
1760     }
1761     Value *Origin = OriginMap[V];
1762     assert(Origin && "Missing origin");
1763     return Origin;
1764   }
1765 
1766   /// Get the origin for i-th argument of the instruction I.
1767   Value *getOrigin(Instruction *I, int i) {
1768     return getOrigin(I->getOperand(i));
1769   }
1770 
1771   /// Remember the place where a shadow check should be inserted.
1772   ///
1773   /// This location will be later instrumented with a check that will print a
1774   /// UMR warning in runtime if the shadow value is not 0.
1775   void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
1776     assert(Shadow);
1777     if (!InsertChecks) return;
1778 #ifndef NDEBUG
1779     Type *ShadowTy = Shadow->getType();
1780     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
1781             isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
1782            "Can only insert checks for integer, vector, and aggregate shadow "
1783            "types");
1784 #endif
1785     InstrumentationList.push_back(
1786         ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
1787   }
1788 
1789   /// Remember the place where a shadow check should be inserted.
1790   ///
1791   /// This location will be later instrumented with a check that will print a
1792   /// UMR warning in runtime if the value is not fully defined.
1793   void insertShadowCheck(Value *Val, Instruction *OrigIns) {
1794     assert(Val);
1795     Value *Shadow, *Origin;
1796     if (ClCheckConstantShadow) {
1797       Shadow = getShadow(Val);
1798       if (!Shadow) return;
1799       Origin = getOrigin(Val);
1800     } else {
1801       Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
1802       if (!Shadow) return;
1803       Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
1804     }
1805     insertShadowCheck(Shadow, Origin, OrigIns);
1806   }
1807 
1808   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
1809     switch (a) {
1810       case AtomicOrdering::NotAtomic:
1811         return AtomicOrdering::NotAtomic;
1812       case AtomicOrdering::Unordered:
1813       case AtomicOrdering::Monotonic:
1814       case AtomicOrdering::Release:
1815         return AtomicOrdering::Release;
1816       case AtomicOrdering::Acquire:
1817       case AtomicOrdering::AcquireRelease:
1818         return AtomicOrdering::AcquireRelease;
1819       case AtomicOrdering::SequentiallyConsistent:
1820         return AtomicOrdering::SequentiallyConsistent;
1821     }
1822     llvm_unreachable("Unknown ordering");
1823   }
1824 
1825   Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
1826     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
1827     uint32_t OrderingTable[NumOrderings] = {};
1828 
1829     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
1830         OrderingTable[(int)AtomicOrderingCABI::release] =
1831             (int)AtomicOrderingCABI::release;
1832     OrderingTable[(int)AtomicOrderingCABI::consume] =
1833         OrderingTable[(int)AtomicOrderingCABI::acquire] =
1834             OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
1835                 (int)AtomicOrderingCABI::acq_rel;
1836     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
1837         (int)AtomicOrderingCABI::seq_cst;
1838 
1839     return ConstantDataVector::get(IRB.getContext(),
1840                                    makeArrayRef(OrderingTable, NumOrderings));
1841   }
1842 
1843   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
1844     switch (a) {
1845       case AtomicOrdering::NotAtomic:
1846         return AtomicOrdering::NotAtomic;
1847       case AtomicOrdering::Unordered:
1848       case AtomicOrdering::Monotonic:
1849       case AtomicOrdering::Acquire:
1850         return AtomicOrdering::Acquire;
1851       case AtomicOrdering::Release:
1852       case AtomicOrdering::AcquireRelease:
1853         return AtomicOrdering::AcquireRelease;
1854       case AtomicOrdering::SequentiallyConsistent:
1855         return AtomicOrdering::SequentiallyConsistent;
1856     }
1857     llvm_unreachable("Unknown ordering");
1858   }
1859 
1860   Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
1861     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
1862     uint32_t OrderingTable[NumOrderings] = {};
1863 
1864     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
1865         OrderingTable[(int)AtomicOrderingCABI::acquire] =
1866             OrderingTable[(int)AtomicOrderingCABI::consume] =
1867                 (int)AtomicOrderingCABI::acquire;
1868     OrderingTable[(int)AtomicOrderingCABI::release] =
1869         OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
1870             (int)AtomicOrderingCABI::acq_rel;
1871     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
1872         (int)AtomicOrderingCABI::seq_cst;
1873 
1874     return ConstantDataVector::get(IRB.getContext(),
1875                                    makeArrayRef(OrderingTable, NumOrderings));
1876   }
1877 
1878   // ------------------- Visitors.
1879   using InstVisitor<MemorySanitizerVisitor>::visit;
1880   void visit(Instruction &I) {
1881     if (I.getMetadata(LLVMContext::MD_nosanitize))
1882       return;
1883     // Don't want to visit if we're in the prologue
1884     if (isInPrologue(I))
1885       return;
1886     InstVisitor<MemorySanitizerVisitor>::visit(I);
1887   }
1888 
1889   /// Instrument LoadInst
1890   ///
1891   /// Loads the corresponding shadow and (optionally) origin.
1892   /// Optionally, checks that the load address is fully defined.
1893   void visitLoadInst(LoadInst &I) {
1894     assert(I.getType()->isSized() && "Load type must have size");
1895     assert(!I.getMetadata(LLVMContext::MD_nosanitize));
1896     IRBuilder<> IRB(I.getNextNode());
1897     Type *ShadowTy = getShadowTy(&I);
1898     Value *Addr = I.getPointerOperand();
1899     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
1900     const Align Alignment = I.getAlign();
1901     if (PropagateShadow) {
1902       std::tie(ShadowPtr, OriginPtr) =
1903           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
1904       setShadow(&I,
1905                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
1906     } else {
1907       setShadow(&I, getCleanShadow(&I));
1908     }
1909 
1910     if (ClCheckAccessAddress)
1911       insertShadowCheck(I.getPointerOperand(), &I);
1912 
1913     if (I.isAtomic())
1914       I.setOrdering(addAcquireOrdering(I.getOrdering()));
1915 
1916     if (MS.TrackOrigins) {
1917       if (PropagateShadow) {
1918         const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1919         setOrigin(
1920             &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
1921       } else {
1922         setOrigin(&I, getCleanOrigin());
1923       }
1924     }
1925   }
1926 
1927   /// Instrument StoreInst
1928   ///
1929   /// Stores the corresponding shadow and (optionally) origin.
1930   /// Optionally, checks that the store address is fully defined.
1931   void visitStoreInst(StoreInst &I) {
1932     StoreList.push_back(&I);
1933     if (ClCheckAccessAddress)
1934       insertShadowCheck(I.getPointerOperand(), &I);
1935   }
1936 
1937   void handleCASOrRMW(Instruction &I) {
1938     assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
1939 
1940     IRBuilder<> IRB(&I);
1941     Value *Addr = I.getOperand(0);
1942     Value *Val = I.getOperand(1);
1943     Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, Val->getType(), Align(1),
1944                                           /*isStore*/ true)
1945                            .first;
1946 
1947     if (ClCheckAccessAddress)
1948       insertShadowCheck(Addr, &I);
1949 
1950     // Only test the conditional argument of cmpxchg instruction.
1951     // The other argument can potentially be uninitialized, but we can not
1952     // detect this situation reliably without possible false positives.
1953     if (isa<AtomicCmpXchgInst>(I))
1954       insertShadowCheck(Val, &I);
1955 
1956     IRB.CreateStore(getCleanShadow(Val), ShadowPtr);
1957 
1958     setShadow(&I, getCleanShadow(&I));
1959     setOrigin(&I, getCleanOrigin());
1960   }
1961 
1962   void visitAtomicRMWInst(AtomicRMWInst &I) {
1963     handleCASOrRMW(I);
1964     I.setOrdering(addReleaseOrdering(I.getOrdering()));
1965   }
1966 
1967   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
1968     handleCASOrRMW(I);
1969     I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
1970   }
1971 
1972   // Vector manipulation.
1973   void visitExtractElementInst(ExtractElementInst &I) {
1974     insertShadowCheck(I.getOperand(1), &I);
1975     IRBuilder<> IRB(&I);
1976     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
1977               "_msprop"));
1978     setOrigin(&I, getOrigin(&I, 0));
1979   }
1980 
1981   void visitInsertElementInst(InsertElementInst &I) {
1982     insertShadowCheck(I.getOperand(2), &I);
1983     IRBuilder<> IRB(&I);
1984     setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
1985               I.getOperand(2), "_msprop"));
1986     setOriginForNaryOp(I);
1987   }
1988 
1989   void visitShuffleVectorInst(ShuffleVectorInst &I) {
1990     IRBuilder<> IRB(&I);
1991     setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
1992                                           I.getShuffleMask(), "_msprop"));
1993     setOriginForNaryOp(I);
1994   }
1995 
1996   // Casts.
1997   void visitSExtInst(SExtInst &I) {
1998     IRBuilder<> IRB(&I);
1999     setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
2000     setOrigin(&I, getOrigin(&I, 0));
2001   }
2002 
2003   void visitZExtInst(ZExtInst &I) {
2004     IRBuilder<> IRB(&I);
2005     setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
2006     setOrigin(&I, getOrigin(&I, 0));
2007   }
2008 
2009   void visitTruncInst(TruncInst &I) {
2010     IRBuilder<> IRB(&I);
2011     setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
2012     setOrigin(&I, getOrigin(&I, 0));
2013   }
2014 
2015   void visitBitCastInst(BitCastInst &I) {
2016     // Special case: if this is the bitcast (there is exactly 1 allowed) between
2017     // a musttail call and a ret, don't instrument. New instructions are not
2018     // allowed after a musttail call.
2019     if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
2020       if (CI->isMustTailCall())
2021         return;
2022     IRBuilder<> IRB(&I);
2023     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
2024     setOrigin(&I, getOrigin(&I, 0));
2025   }
2026 
2027   void visitPtrToIntInst(PtrToIntInst &I) {
2028     IRBuilder<> IRB(&I);
2029     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2030              "_msprop_ptrtoint"));
2031     setOrigin(&I, getOrigin(&I, 0));
2032   }
2033 
2034   void visitIntToPtrInst(IntToPtrInst &I) {
2035     IRBuilder<> IRB(&I);
2036     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2037              "_msprop_inttoptr"));
2038     setOrigin(&I, getOrigin(&I, 0));
2039   }
2040 
2041   void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
2042   void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
2043   void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
2044   void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
2045   void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
2046   void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
2047 
2048   /// Propagate shadow for bitwise AND.
2049   ///
2050   /// This code is exact, i.e. if, for example, a bit in the left argument
2051   /// is defined and 0, then neither the value not definedness of the
2052   /// corresponding bit in B don't affect the resulting shadow.
2053   void visitAnd(BinaryOperator &I) {
2054     IRBuilder<> IRB(&I);
2055     //  "And" of 0 and a poisoned value results in unpoisoned value.
2056     //  1&1 => 1;     0&1 => 0;     p&1 => p;
2057     //  1&0 => 0;     0&0 => 0;     p&0 => 0;
2058     //  1&p => p;     0&p => 0;     p&p => p;
2059     //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
2060     Value *S1 = getShadow(&I, 0);
2061     Value *S2 = getShadow(&I, 1);
2062     Value *V1 = I.getOperand(0);
2063     Value *V2 = I.getOperand(1);
2064     if (V1->getType() != S1->getType()) {
2065       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2066       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2067     }
2068     Value *S1S2 = IRB.CreateAnd(S1, S2);
2069     Value *V1S2 = IRB.CreateAnd(V1, S2);
2070     Value *S1V2 = IRB.CreateAnd(S1, V2);
2071     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2072     setOriginForNaryOp(I);
2073   }
2074 
2075   void visitOr(BinaryOperator &I) {
2076     IRBuilder<> IRB(&I);
2077     //  "Or" of 1 and a poisoned value results in unpoisoned value.
2078     //  1|1 => 1;     0|1 => 1;     p|1 => 1;
2079     //  1|0 => 1;     0|0 => 0;     p|0 => p;
2080     //  1|p => 1;     0|p => p;     p|p => p;
2081     //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
2082     Value *S1 = getShadow(&I, 0);
2083     Value *S2 = getShadow(&I, 1);
2084     Value *V1 = IRB.CreateNot(I.getOperand(0));
2085     Value *V2 = IRB.CreateNot(I.getOperand(1));
2086     if (V1->getType() != S1->getType()) {
2087       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2088       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2089     }
2090     Value *S1S2 = IRB.CreateAnd(S1, S2);
2091     Value *V1S2 = IRB.CreateAnd(V1, S2);
2092     Value *S1V2 = IRB.CreateAnd(S1, V2);
2093     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2094     setOriginForNaryOp(I);
2095   }
2096 
2097   /// Default propagation of shadow and/or origin.
2098   ///
2099   /// This class implements the general case of shadow propagation, used in all
2100   /// cases where we don't know and/or don't care about what the operation
2101   /// actually does. It converts all input shadow values to a common type
2102   /// (extending or truncating as necessary), and bitwise OR's them.
2103   ///
2104   /// This is much cheaper than inserting checks (i.e. requiring inputs to be
2105   /// fully initialized), and less prone to false positives.
2106   ///
2107   /// This class also implements the general case of origin propagation. For a
2108   /// Nary operation, result origin is set to the origin of an argument that is
2109   /// not entirely initialized. If there is more than one such arguments, the
2110   /// rightmost of them is picked. It does not matter which one is picked if all
2111   /// arguments are initialized.
2112   template <bool CombineShadow>
2113   class Combiner {
2114     Value *Shadow = nullptr;
2115     Value *Origin = nullptr;
2116     IRBuilder<> &IRB;
2117     MemorySanitizerVisitor *MSV;
2118 
2119   public:
2120     Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
2121         : IRB(IRB), MSV(MSV) {}
2122 
2123     /// Add a pair of shadow and origin values to the mix.
2124     Combiner &Add(Value *OpShadow, Value *OpOrigin) {
2125       if (CombineShadow) {
2126         assert(OpShadow);
2127         if (!Shadow)
2128           Shadow = OpShadow;
2129         else {
2130           OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
2131           Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
2132         }
2133       }
2134 
2135       if (MSV->MS.TrackOrigins) {
2136         assert(OpOrigin);
2137         if (!Origin) {
2138           Origin = OpOrigin;
2139         } else {
2140           Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
2141           // No point in adding something that might result in 0 origin value.
2142           if (!ConstOrigin || !ConstOrigin->isNullValue()) {
2143             Value *FlatShadow = MSV->convertShadowToScalar(OpShadow, IRB);
2144             Value *Cond =
2145                 IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
2146             Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2147           }
2148         }
2149       }
2150       return *this;
2151     }
2152 
2153     /// Add an application value to the mix.
2154     Combiner &Add(Value *V) {
2155       Value *OpShadow = MSV->getShadow(V);
2156       Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
2157       return Add(OpShadow, OpOrigin);
2158     }
2159 
2160     /// Set the current combined values as the given instruction's shadow
2161     /// and origin.
2162     void Done(Instruction *I) {
2163       if (CombineShadow) {
2164         assert(Shadow);
2165         Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
2166         MSV->setShadow(I, Shadow);
2167       }
2168       if (MSV->MS.TrackOrigins) {
2169         assert(Origin);
2170         MSV->setOrigin(I, Origin);
2171       }
2172     }
2173   };
2174 
2175   using ShadowAndOriginCombiner = Combiner<true>;
2176   using OriginCombiner = Combiner<false>;
2177 
2178   /// Propagate origin for arbitrary operation.
2179   void setOriginForNaryOp(Instruction &I) {
2180     if (!MS.TrackOrigins) return;
2181     IRBuilder<> IRB(&I);
2182     OriginCombiner OC(this, IRB);
2183     for (Use &Op : I.operands())
2184       OC.Add(Op.get());
2185     OC.Done(&I);
2186   }
2187 
2188   size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
2189     assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
2190            "Vector of pointers is not a valid shadow type");
2191     return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() *
2192                                   Ty->getScalarSizeInBits()
2193                             : Ty->getPrimitiveSizeInBits();
2194   }
2195 
2196   /// Cast between two shadow types, extending or truncating as
2197   /// necessary.
2198   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
2199                           bool Signed = false) {
2200     Type *srcTy = V->getType();
2201     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
2202     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
2203     if (srcSizeInBits > 1 && dstSizeInBits == 1)
2204       return IRB.CreateICmpNE(V, getCleanShadow(V));
2205 
2206     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
2207       return IRB.CreateIntCast(V, dstTy, Signed);
2208     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
2209         cast<FixedVectorType>(dstTy)->getNumElements() ==
2210             cast<FixedVectorType>(srcTy)->getNumElements())
2211       return IRB.CreateIntCast(V, dstTy, Signed);
2212     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
2213     Value *V2 =
2214       IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
2215     return IRB.CreateBitCast(V2, dstTy);
2216     // TODO: handle struct types.
2217   }
2218 
2219   /// Cast an application value to the type of its own shadow.
2220   Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
2221     Type *ShadowTy = getShadowTy(V);
2222     if (V->getType() == ShadowTy)
2223       return V;
2224     if (V->getType()->isPtrOrPtrVectorTy())
2225       return IRB.CreatePtrToInt(V, ShadowTy);
2226     else
2227       return IRB.CreateBitCast(V, ShadowTy);
2228   }
2229 
2230   /// Propagate shadow for arbitrary operation.
2231   void handleShadowOr(Instruction &I) {
2232     IRBuilder<> IRB(&I);
2233     ShadowAndOriginCombiner SC(this, IRB);
2234     for (Use &Op : I.operands())
2235       SC.Add(Op.get());
2236     SC.Done(&I);
2237   }
2238 
2239   void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
2240 
2241   // Handle multiplication by constant.
2242   //
2243   // Handle a special case of multiplication by constant that may have one or
2244   // more zeros in the lower bits. This makes corresponding number of lower bits
2245   // of the result zero as well. We model it by shifting the other operand
2246   // shadow left by the required number of bits. Effectively, we transform
2247   // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
2248   // We use multiplication by 2**N instead of shift to cover the case of
2249   // multiplication by 0, which may occur in some elements of a vector operand.
2250   void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
2251                            Value *OtherArg) {
2252     Constant *ShadowMul;
2253     Type *Ty = ConstArg->getType();
2254     if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2255       unsigned NumElements = cast<FixedVectorType>(VTy)->getNumElements();
2256       Type *EltTy = VTy->getElementType();
2257       SmallVector<Constant *, 16> Elements;
2258       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
2259         if (ConstantInt *Elt =
2260                 dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
2261           const APInt &V = Elt->getValue();
2262           APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2263           Elements.push_back(ConstantInt::get(EltTy, V2));
2264         } else {
2265           Elements.push_back(ConstantInt::get(EltTy, 1));
2266         }
2267       }
2268       ShadowMul = ConstantVector::get(Elements);
2269     } else {
2270       if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
2271         const APInt &V = Elt->getValue();
2272         APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
2273         ShadowMul = ConstantInt::get(Ty, V2);
2274       } else {
2275         ShadowMul = ConstantInt::get(Ty, 1);
2276       }
2277     }
2278 
2279     IRBuilder<> IRB(&I);
2280     setShadow(&I,
2281               IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
2282     setOrigin(&I, getOrigin(OtherArg));
2283   }
2284 
2285   void visitMul(BinaryOperator &I) {
2286     Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
2287     Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
2288     if (constOp0 && !constOp1)
2289       handleMulByConstant(I, constOp0, I.getOperand(1));
2290     else if (constOp1 && !constOp0)
2291       handleMulByConstant(I, constOp1, I.getOperand(0));
2292     else
2293       handleShadowOr(I);
2294   }
2295 
2296   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
2297   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
2298   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
2299   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
2300   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
2301   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
2302 
2303   void handleIntegerDiv(Instruction &I) {
2304     IRBuilder<> IRB(&I);
2305     // Strict on the second argument.
2306     insertShadowCheck(I.getOperand(1), &I);
2307     setShadow(&I, getShadow(&I, 0));
2308     setOrigin(&I, getOrigin(&I, 0));
2309   }
2310 
2311   void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2312   void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2313   void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
2314   void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
2315 
2316   // Floating point division is side-effect free. We can not require that the
2317   // divisor is fully initialized and must propagate shadow. See PR37523.
2318   void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
2319   void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
2320 
2321   /// Instrument == and != comparisons.
2322   ///
2323   /// Sometimes the comparison result is known even if some of the bits of the
2324   /// arguments are not.
2325   void handleEqualityComparison(ICmpInst &I) {
2326     IRBuilder<> IRB(&I);
2327     Value *A = I.getOperand(0);
2328     Value *B = I.getOperand(1);
2329     Value *Sa = getShadow(A);
2330     Value *Sb = getShadow(B);
2331 
2332     // Get rid of pointers and vectors of pointers.
2333     // For ints (and vectors of ints), types of A and Sa match,
2334     // and this is a no-op.
2335     A = IRB.CreatePointerCast(A, Sa->getType());
2336     B = IRB.CreatePointerCast(B, Sb->getType());
2337 
2338     // A == B  <==>  (C = A^B) == 0
2339     // A != B  <==>  (C = A^B) != 0
2340     // Sc = Sa | Sb
2341     Value *C = IRB.CreateXor(A, B);
2342     Value *Sc = IRB.CreateOr(Sa, Sb);
2343     // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
2344     // Result is defined if one of the following is true
2345     // * there is a defined 1 bit in C
2346     // * C is fully defined
2347     // Si = !(C & ~Sc) && Sc
2348     Value *Zero = Constant::getNullValue(Sc->getType());
2349     Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
2350     Value *Si =
2351       IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
2352                     IRB.CreateICmpEQ(
2353                       IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
2354     Si->setName("_msprop_icmp");
2355     setShadow(&I, Si);
2356     setOriginForNaryOp(I);
2357   }
2358 
2359   /// Build the lowest possible value of V, taking into account V's
2360   ///        uninitialized bits.
2361   Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2362                                 bool isSigned) {
2363     if (isSigned) {
2364       // Split shadow into sign bit and other bits.
2365       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2366       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2367       // Maximise the undefined shadow bit, minimize other undefined bits.
2368       return
2369         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
2370     } else {
2371       // Minimize undefined bits.
2372       return IRB.CreateAnd(A, IRB.CreateNot(Sa));
2373     }
2374   }
2375 
2376   /// Build the highest possible value of V, taking into account V's
2377   ///        uninitialized bits.
2378   Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2379                                 bool isSigned) {
2380     if (isSigned) {
2381       // Split shadow into sign bit and other bits.
2382       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2383       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2384       // Minimise the undefined shadow bit, maximise other undefined bits.
2385       return
2386         IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
2387     } else {
2388       // Maximize undefined bits.
2389       return IRB.CreateOr(A, Sa);
2390     }
2391   }
2392 
2393   /// Instrument relational comparisons.
2394   ///
2395   /// This function does exact shadow propagation for all relational
2396   /// comparisons of integers, pointers and vectors of those.
2397   /// FIXME: output seems suboptimal when one of the operands is a constant
2398   void handleRelationalComparisonExact(ICmpInst &I) {
2399     IRBuilder<> IRB(&I);
2400     Value *A = I.getOperand(0);
2401     Value *B = I.getOperand(1);
2402     Value *Sa = getShadow(A);
2403     Value *Sb = getShadow(B);
2404 
2405     // Get rid of pointers and vectors of pointers.
2406     // For ints (and vectors of ints), types of A and Sa match,
2407     // and this is a no-op.
2408     A = IRB.CreatePointerCast(A, Sa->getType());
2409     B = IRB.CreatePointerCast(B, Sb->getType());
2410 
2411     // Let [a0, a1] be the interval of possible values of A, taking into account
2412     // its undefined bits. Let [b0, b1] be the interval of possible values of B.
2413     // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
2414     bool IsSigned = I.isSigned();
2415     Value *S1 = IRB.CreateICmp(I.getPredicate(),
2416                                getLowestPossibleValue(IRB, A, Sa, IsSigned),
2417                                getHighestPossibleValue(IRB, B, Sb, IsSigned));
2418     Value *S2 = IRB.CreateICmp(I.getPredicate(),
2419                                getHighestPossibleValue(IRB, A, Sa, IsSigned),
2420                                getLowestPossibleValue(IRB, B, Sb, IsSigned));
2421     Value *Si = IRB.CreateXor(S1, S2);
2422     setShadow(&I, Si);
2423     setOriginForNaryOp(I);
2424   }
2425 
2426   /// Instrument signed relational comparisons.
2427   ///
2428   /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
2429   /// bit of the shadow. Everything else is delegated to handleShadowOr().
2430   void handleSignedRelationalComparison(ICmpInst &I) {
2431     Constant *constOp;
2432     Value *op = nullptr;
2433     CmpInst::Predicate pre;
2434     if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
2435       op = I.getOperand(0);
2436       pre = I.getPredicate();
2437     } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
2438       op = I.getOperand(1);
2439       pre = I.getSwappedPredicate();
2440     } else {
2441       handleShadowOr(I);
2442       return;
2443     }
2444 
2445     if ((constOp->isNullValue() &&
2446          (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
2447         (constOp->isAllOnesValue() &&
2448          (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
2449       IRBuilder<> IRB(&I);
2450       Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
2451                                         "_msprop_icmp_s");
2452       setShadow(&I, Shadow);
2453       setOrigin(&I, getOrigin(op));
2454     } else {
2455       handleShadowOr(I);
2456     }
2457   }
2458 
2459   void visitICmpInst(ICmpInst &I) {
2460     if (!ClHandleICmp) {
2461       handleShadowOr(I);
2462       return;
2463     }
2464     if (I.isEquality()) {
2465       handleEqualityComparison(I);
2466       return;
2467     }
2468 
2469     assert(I.isRelational());
2470     if (ClHandleICmpExact) {
2471       handleRelationalComparisonExact(I);
2472       return;
2473     }
2474     if (I.isSigned()) {
2475       handleSignedRelationalComparison(I);
2476       return;
2477     }
2478 
2479     assert(I.isUnsigned());
2480     if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
2481       handleRelationalComparisonExact(I);
2482       return;
2483     }
2484 
2485     handleShadowOr(I);
2486   }
2487 
2488   void visitFCmpInst(FCmpInst &I) {
2489     handleShadowOr(I);
2490   }
2491 
2492   void handleShift(BinaryOperator &I) {
2493     IRBuilder<> IRB(&I);
2494     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2495     // Otherwise perform the same shift on S1.
2496     Value *S1 = getShadow(&I, 0);
2497     Value *S2 = getShadow(&I, 1);
2498     Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
2499                                    S2->getType());
2500     Value *V2 = I.getOperand(1);
2501     Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
2502     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2503     setOriginForNaryOp(I);
2504   }
2505 
2506   void visitShl(BinaryOperator &I) { handleShift(I); }
2507   void visitAShr(BinaryOperator &I) { handleShift(I); }
2508   void visitLShr(BinaryOperator &I) { handleShift(I); }
2509 
2510   void handleFunnelShift(IntrinsicInst &I) {
2511     IRBuilder<> IRB(&I);
2512     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2513     // Otherwise perform the same shift on S0 and S1.
2514     Value *S0 = getShadow(&I, 0);
2515     Value *S1 = getShadow(&I, 1);
2516     Value *S2 = getShadow(&I, 2);
2517     Value *S2Conv =
2518         IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)), S2->getType());
2519     Value *V2 = I.getOperand(2);
2520     Function *Intrin = Intrinsic::getDeclaration(
2521         I.getModule(), I.getIntrinsicID(), S2Conv->getType());
2522     Value *Shift = IRB.CreateCall(Intrin, {S0, S1, V2});
2523     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2524     setOriginForNaryOp(I);
2525   }
2526 
2527   /// Instrument llvm.memmove
2528   ///
2529   /// At this point we don't know if llvm.memmove will be inlined or not.
2530   /// If we don't instrument it and it gets inlined,
2531   /// our interceptor will not kick in and we will lose the memmove.
2532   /// If we instrument the call here, but it does not get inlined,
2533   /// we will memove the shadow twice: which is bad in case
2534   /// of overlapping regions. So, we simply lower the intrinsic to a call.
2535   ///
2536   /// Similar situation exists for memcpy and memset.
2537   void visitMemMoveInst(MemMoveInst &I) {
2538     getShadow(I.getArgOperand(1)); // Ensure shadow initialized
2539     IRBuilder<> IRB(&I);
2540     IRB.CreateCall(
2541         MS.MemmoveFn,
2542         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2543          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2544          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2545     I.eraseFromParent();
2546   }
2547 
2548   // Similar to memmove: avoid copying shadow twice.
2549   // This is somewhat unfortunate as it may slowdown small constant memcpys.
2550   // FIXME: consider doing manual inline for small constant sizes and proper
2551   // alignment.
2552   void visitMemCpyInst(MemCpyInst &I) {
2553     getShadow(I.getArgOperand(1)); // Ensure shadow initialized
2554     IRBuilder<> IRB(&I);
2555     IRB.CreateCall(
2556         MS.MemcpyFn,
2557         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2558          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2559          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2560     I.eraseFromParent();
2561   }
2562 
2563   // Same as memcpy.
2564   void visitMemSetInst(MemSetInst &I) {
2565     IRBuilder<> IRB(&I);
2566     IRB.CreateCall(
2567         MS.MemsetFn,
2568         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2569          IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
2570          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2571     I.eraseFromParent();
2572   }
2573 
2574   void visitVAStartInst(VAStartInst &I) {
2575     VAHelper->visitVAStartInst(I);
2576   }
2577 
2578   void visitVACopyInst(VACopyInst &I) {
2579     VAHelper->visitVACopyInst(I);
2580   }
2581 
2582   /// Handle vector store-like intrinsics.
2583   ///
2584   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
2585   /// has 1 pointer argument and 1 vector argument, returns void.
2586   bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
2587     IRBuilder<> IRB(&I);
2588     Value* Addr = I.getArgOperand(0);
2589     Value *Shadow = getShadow(&I, 1);
2590     Value *ShadowPtr, *OriginPtr;
2591 
2592     // We don't know the pointer alignment (could be unaligned SSE store!).
2593     // Have to assume to worst case.
2594     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
2595         Addr, IRB, Shadow->getType(), Align(1), /*isStore*/ true);
2596     IRB.CreateAlignedStore(Shadow, ShadowPtr, Align(1));
2597 
2598     if (ClCheckAccessAddress)
2599       insertShadowCheck(Addr, &I);
2600 
2601     // FIXME: factor out common code from materializeStores
2602     if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
2603     return true;
2604   }
2605 
2606   /// Handle vector load-like intrinsics.
2607   ///
2608   /// Instrument intrinsics that look like a simple SIMD load: reads memory,
2609   /// has 1 pointer argument, returns a vector.
2610   bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
2611     IRBuilder<> IRB(&I);
2612     Value *Addr = I.getArgOperand(0);
2613 
2614     Type *ShadowTy = getShadowTy(&I);
2615     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2616     if (PropagateShadow) {
2617       // We don't know the pointer alignment (could be unaligned SSE load!).
2618       // Have to assume to worst case.
2619       const Align Alignment = Align(1);
2620       std::tie(ShadowPtr, OriginPtr) =
2621           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2622       setShadow(&I,
2623                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2624     } else {
2625       setShadow(&I, getCleanShadow(&I));
2626     }
2627 
2628     if (ClCheckAccessAddress)
2629       insertShadowCheck(Addr, &I);
2630 
2631     if (MS.TrackOrigins) {
2632       if (PropagateShadow)
2633         setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
2634       else
2635         setOrigin(&I, getCleanOrigin());
2636     }
2637     return true;
2638   }
2639 
2640   /// Handle (SIMD arithmetic)-like intrinsics.
2641   ///
2642   /// Instrument intrinsics with any number of arguments of the same type,
2643   /// equal to the return type. The type should be simple (no aggregates or
2644   /// pointers; vectors are fine).
2645   /// Caller guarantees that this intrinsic does not access memory.
2646   bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
2647     Type *RetTy = I.getType();
2648     if (!(RetTy->isIntOrIntVectorTy() ||
2649           RetTy->isFPOrFPVectorTy() ||
2650           RetTy->isX86_MMXTy()))
2651       return false;
2652 
2653     unsigned NumArgOperands = I.arg_size();
2654     for (unsigned i = 0; i < NumArgOperands; ++i) {
2655       Type *Ty = I.getArgOperand(i)->getType();
2656       if (Ty != RetTy)
2657         return false;
2658     }
2659 
2660     IRBuilder<> IRB(&I);
2661     ShadowAndOriginCombiner SC(this, IRB);
2662     for (unsigned i = 0; i < NumArgOperands; ++i)
2663       SC.Add(I.getArgOperand(i));
2664     SC.Done(&I);
2665 
2666     return true;
2667   }
2668 
2669   /// Heuristically instrument unknown intrinsics.
2670   ///
2671   /// The main purpose of this code is to do something reasonable with all
2672   /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
2673   /// We recognize several classes of intrinsics by their argument types and
2674   /// ModRefBehaviour and apply special instrumentation when we are reasonably
2675   /// sure that we know what the intrinsic does.
2676   ///
2677   /// We special-case intrinsics where this approach fails. See llvm.bswap
2678   /// handling as an example of that.
2679   bool handleUnknownIntrinsic(IntrinsicInst &I) {
2680     unsigned NumArgOperands = I.arg_size();
2681     if (NumArgOperands == 0)
2682       return false;
2683 
2684     if (NumArgOperands == 2 &&
2685         I.getArgOperand(0)->getType()->isPointerTy() &&
2686         I.getArgOperand(1)->getType()->isVectorTy() &&
2687         I.getType()->isVoidTy() &&
2688         !I.onlyReadsMemory()) {
2689       // This looks like a vector store.
2690       return handleVectorStoreIntrinsic(I);
2691     }
2692 
2693     if (NumArgOperands == 1 &&
2694         I.getArgOperand(0)->getType()->isPointerTy() &&
2695         I.getType()->isVectorTy() &&
2696         I.onlyReadsMemory()) {
2697       // This looks like a vector load.
2698       return handleVectorLoadIntrinsic(I);
2699     }
2700 
2701     if (I.doesNotAccessMemory())
2702       if (maybeHandleSimpleNomemIntrinsic(I))
2703         return true;
2704 
2705     // FIXME: detect and handle SSE maskstore/maskload
2706     return false;
2707   }
2708 
2709   void handleInvariantGroup(IntrinsicInst &I) {
2710     setShadow(&I, getShadow(&I, 0));
2711     setOrigin(&I, getOrigin(&I, 0));
2712   }
2713 
2714   void handleLifetimeStart(IntrinsicInst &I) {
2715     if (!PoisonStack)
2716       return;
2717     AllocaInst *AI = llvm::findAllocaForValue(I.getArgOperand(1));
2718     if (!AI)
2719       InstrumentLifetimeStart = false;
2720     LifetimeStartList.push_back(std::make_pair(&I, AI));
2721   }
2722 
2723   void handleBswap(IntrinsicInst &I) {
2724     IRBuilder<> IRB(&I);
2725     Value *Op = I.getArgOperand(0);
2726     Type *OpType = Op->getType();
2727     Function *BswapFunc = Intrinsic::getDeclaration(
2728       F.getParent(), Intrinsic::bswap, makeArrayRef(&OpType, 1));
2729     setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
2730     setOrigin(&I, getOrigin(Op));
2731   }
2732 
2733   // Instrument vector convert intrinsic.
2734   //
2735   // This function instruments intrinsics like cvtsi2ss:
2736   // %Out = int_xxx_cvtyyy(%ConvertOp)
2737   // or
2738   // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
2739   // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
2740   // number \p Out elements, and (if has 2 arguments) copies the rest of the
2741   // elements from \p CopyOp.
2742   // In most cases conversion involves floating-point value which may trigger a
2743   // hardware exception when not fully initialized. For this reason we require
2744   // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
2745   // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
2746   // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
2747   // return a fully initialized value.
2748   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements,
2749                                     bool HasRoundingMode = false) {
2750     IRBuilder<> IRB(&I);
2751     Value *CopyOp, *ConvertOp;
2752 
2753     assert((!HasRoundingMode ||
2754             isa<ConstantInt>(I.getArgOperand(I.arg_size() - 1))) &&
2755            "Invalid rounding mode");
2756 
2757     switch (I.arg_size() - HasRoundingMode) {
2758     case 2:
2759       CopyOp = I.getArgOperand(0);
2760       ConvertOp = I.getArgOperand(1);
2761       break;
2762     case 1:
2763       ConvertOp = I.getArgOperand(0);
2764       CopyOp = nullptr;
2765       break;
2766     default:
2767       llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
2768     }
2769 
2770     // The first *NumUsedElements* elements of ConvertOp are converted to the
2771     // same number of output elements. The rest of the output is copied from
2772     // CopyOp, or (if not available) filled with zeroes.
2773     // Combine shadow for elements of ConvertOp that are used in this operation,
2774     // and insert a check.
2775     // FIXME: consider propagating shadow of ConvertOp, at least in the case of
2776     // int->any conversion.
2777     Value *ConvertShadow = getShadow(ConvertOp);
2778     Value *AggShadow = nullptr;
2779     if (ConvertOp->getType()->isVectorTy()) {
2780       AggShadow = IRB.CreateExtractElement(
2781           ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
2782       for (int i = 1; i < NumUsedElements; ++i) {
2783         Value *MoreShadow = IRB.CreateExtractElement(
2784             ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
2785         AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
2786       }
2787     } else {
2788       AggShadow = ConvertShadow;
2789     }
2790     assert(AggShadow->getType()->isIntegerTy());
2791     insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
2792 
2793     // Build result shadow by zero-filling parts of CopyOp shadow that come from
2794     // ConvertOp.
2795     if (CopyOp) {
2796       assert(CopyOp->getType() == I.getType());
2797       assert(CopyOp->getType()->isVectorTy());
2798       Value *ResultShadow = getShadow(CopyOp);
2799       Type *EltTy = cast<VectorType>(ResultShadow->getType())->getElementType();
2800       for (int i = 0; i < NumUsedElements; ++i) {
2801         ResultShadow = IRB.CreateInsertElement(
2802             ResultShadow, ConstantInt::getNullValue(EltTy),
2803             ConstantInt::get(IRB.getInt32Ty(), i));
2804       }
2805       setShadow(&I, ResultShadow);
2806       setOrigin(&I, getOrigin(CopyOp));
2807     } else {
2808       setShadow(&I, getCleanShadow(&I));
2809       setOrigin(&I, getCleanOrigin());
2810     }
2811   }
2812 
2813   // Given a scalar or vector, extract lower 64 bits (or less), and return all
2814   // zeroes if it is zero, and all ones otherwise.
2815   Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2816     if (S->getType()->isVectorTy())
2817       S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
2818     assert(S->getType()->getPrimitiveSizeInBits() <= 64);
2819     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2820     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2821   }
2822 
2823   // Given a vector, extract its first element, and return all
2824   // zeroes if it is zero, and all ones otherwise.
2825   Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
2826     Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
2827     Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
2828     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
2829   }
2830 
2831   Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
2832     Type *T = S->getType();
2833     assert(T->isVectorTy());
2834     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
2835     return IRB.CreateSExt(S2, T);
2836   }
2837 
2838   // Instrument vector shift intrinsic.
2839   //
2840   // This function instruments intrinsics like int_x86_avx2_psll_w.
2841   // Intrinsic shifts %In by %ShiftSize bits.
2842   // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
2843   // size, and the rest is ignored. Behavior is defined even if shift size is
2844   // greater than register (or field) width.
2845   void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
2846     assert(I.arg_size() == 2);
2847     IRBuilder<> IRB(&I);
2848     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2849     // Otherwise perform the same shift on S1.
2850     Value *S1 = getShadow(&I, 0);
2851     Value *S2 = getShadow(&I, 1);
2852     Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
2853                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
2854     Value *V1 = I.getOperand(0);
2855     Value *V2 = I.getOperand(1);
2856     Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2857                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
2858     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
2859     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2860     setOriginForNaryOp(I);
2861   }
2862 
2863   // Get an X86_MMX-sized vector type.
2864   Type *getMMXVectorTy(unsigned EltSizeInBits) {
2865     const unsigned X86_MMXSizeInBits = 64;
2866     assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
2867            "Illegal MMX vector element size");
2868     return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
2869                                 X86_MMXSizeInBits / EltSizeInBits);
2870   }
2871 
2872   // Returns a signed counterpart for an (un)signed-saturate-and-pack
2873   // intrinsic.
2874   Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
2875     switch (id) {
2876       case Intrinsic::x86_sse2_packsswb_128:
2877       case Intrinsic::x86_sse2_packuswb_128:
2878         return Intrinsic::x86_sse2_packsswb_128;
2879 
2880       case Intrinsic::x86_sse2_packssdw_128:
2881       case Intrinsic::x86_sse41_packusdw:
2882         return Intrinsic::x86_sse2_packssdw_128;
2883 
2884       case Intrinsic::x86_avx2_packsswb:
2885       case Intrinsic::x86_avx2_packuswb:
2886         return Intrinsic::x86_avx2_packsswb;
2887 
2888       case Intrinsic::x86_avx2_packssdw:
2889       case Intrinsic::x86_avx2_packusdw:
2890         return Intrinsic::x86_avx2_packssdw;
2891 
2892       case Intrinsic::x86_mmx_packsswb:
2893       case Intrinsic::x86_mmx_packuswb:
2894         return Intrinsic::x86_mmx_packsswb;
2895 
2896       case Intrinsic::x86_mmx_packssdw:
2897         return Intrinsic::x86_mmx_packssdw;
2898       default:
2899         llvm_unreachable("unexpected intrinsic id");
2900     }
2901   }
2902 
2903   // Instrument vector pack intrinsic.
2904   //
2905   // This function instruments intrinsics like x86_mmx_packsswb, that
2906   // packs elements of 2 input vectors into half as many bits with saturation.
2907   // Shadow is propagated with the signed variant of the same intrinsic applied
2908   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
2909   // EltSizeInBits is used only for x86mmx arguments.
2910   void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
2911     assert(I.arg_size() == 2);
2912     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2913     IRBuilder<> IRB(&I);
2914     Value *S1 = getShadow(&I, 0);
2915     Value *S2 = getShadow(&I, 1);
2916     assert(isX86_MMX || S1->getType()->isVectorTy());
2917 
2918     // SExt and ICmpNE below must apply to individual elements of input vectors.
2919     // In case of x86mmx arguments, cast them to appropriate vector types and
2920     // back.
2921     Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
2922     if (isX86_MMX) {
2923       S1 = IRB.CreateBitCast(S1, T);
2924       S2 = IRB.CreateBitCast(S2, T);
2925     }
2926     Value *S1_ext = IRB.CreateSExt(
2927         IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
2928     Value *S2_ext = IRB.CreateSExt(
2929         IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
2930     if (isX86_MMX) {
2931       Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
2932       S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
2933       S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
2934     }
2935 
2936     Function *ShadowFn = Intrinsic::getDeclaration(
2937         F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
2938 
2939     Value *S =
2940         IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
2941     if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
2942     setShadow(&I, S);
2943     setOriginForNaryOp(I);
2944   }
2945 
2946   // Instrument sum-of-absolute-differences intrinsic.
2947   void handleVectorSadIntrinsic(IntrinsicInst &I) {
2948     const unsigned SignificantBitsPerResultElement = 16;
2949     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2950     Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
2951     unsigned ZeroBitsPerResultElement =
2952         ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
2953 
2954     IRBuilder<> IRB(&I);
2955     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2956     S = IRB.CreateBitCast(S, ResTy);
2957     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2958                        ResTy);
2959     S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
2960     S = IRB.CreateBitCast(S, getShadowTy(&I));
2961     setShadow(&I, S);
2962     setOriginForNaryOp(I);
2963   }
2964 
2965   // Instrument multiply-add intrinsic.
2966   void handleVectorPmaddIntrinsic(IntrinsicInst &I,
2967                                   unsigned EltSizeInBits = 0) {
2968     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
2969     Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
2970     IRBuilder<> IRB(&I);
2971     Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2972     S = IRB.CreateBitCast(S, ResTy);
2973     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
2974                        ResTy);
2975     S = IRB.CreateBitCast(S, getShadowTy(&I));
2976     setShadow(&I, S);
2977     setOriginForNaryOp(I);
2978   }
2979 
2980   // Instrument compare-packed intrinsic.
2981   // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
2982   // all-ones shadow.
2983   void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
2984     IRBuilder<> IRB(&I);
2985     Type *ResTy = getShadowTy(&I);
2986     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2987     Value *S = IRB.CreateSExt(
2988         IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
2989     setShadow(&I, S);
2990     setOriginForNaryOp(I);
2991   }
2992 
2993   // Instrument compare-scalar intrinsic.
2994   // This handles both cmp* intrinsics which return the result in the first
2995   // element of a vector, and comi* which return the result as i32.
2996   void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
2997     IRBuilder<> IRB(&I);
2998     Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
2999     Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
3000     setShadow(&I, S);
3001     setOriginForNaryOp(I);
3002   }
3003 
3004   // Instrument generic vector reduction intrinsics
3005   // by ORing together all their fields.
3006   void handleVectorReduceIntrinsic(IntrinsicInst &I) {
3007     IRBuilder<> IRB(&I);
3008     Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
3009     setShadow(&I, S);
3010     setOrigin(&I, getOrigin(&I, 0));
3011   }
3012 
3013   // Instrument vector.reduce.or intrinsic.
3014   // Valid (non-poisoned) set bits in the operand pull low the
3015   // corresponding shadow bits.
3016   void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
3017     IRBuilder<> IRB(&I);
3018     Value *OperandShadow = getShadow(&I, 0);
3019     Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0));
3020     Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow);
3021     // Bit N is clean if any field's bit N is 1 and unpoison
3022     Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison);
3023     // Otherwise, it is clean if every field's bit N is unpoison
3024     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3025     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3026 
3027     setShadow(&I, S);
3028     setOrigin(&I, getOrigin(&I, 0));
3029   }
3030 
3031   // Instrument vector.reduce.and intrinsic.
3032   // Valid (non-poisoned) unset bits in the operand pull down the
3033   // corresponding shadow bits.
3034   void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
3035     IRBuilder<> IRB(&I);
3036     Value *OperandShadow = getShadow(&I, 0);
3037     Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow);
3038     // Bit N is clean if any field's bit N is 0 and unpoison
3039     Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison);
3040     // Otherwise, it is clean if every field's bit N is unpoison
3041     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3042     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3043 
3044     setShadow(&I, S);
3045     setOrigin(&I, getOrigin(&I, 0));
3046   }
3047 
3048   void handleStmxcsr(IntrinsicInst &I) {
3049     IRBuilder<> IRB(&I);
3050     Value* Addr = I.getArgOperand(0);
3051     Type *Ty = IRB.getInt32Ty();
3052     Value *ShadowPtr =
3053         getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first;
3054 
3055     IRB.CreateStore(getCleanShadow(Ty),
3056                     IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
3057 
3058     if (ClCheckAccessAddress)
3059       insertShadowCheck(Addr, &I);
3060   }
3061 
3062   void handleLdmxcsr(IntrinsicInst &I) {
3063     if (!InsertChecks) return;
3064 
3065     IRBuilder<> IRB(&I);
3066     Value *Addr = I.getArgOperand(0);
3067     Type *Ty = IRB.getInt32Ty();
3068     const Align Alignment = Align(1);
3069     Value *ShadowPtr, *OriginPtr;
3070     std::tie(ShadowPtr, OriginPtr) =
3071         getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
3072 
3073     if (ClCheckAccessAddress)
3074       insertShadowCheck(Addr, &I);
3075 
3076     Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
3077     Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
3078                                     : getCleanOrigin();
3079     insertShadowCheck(Shadow, Origin, &I);
3080   }
3081 
3082   void handleMaskedStore(IntrinsicInst &I) {
3083     IRBuilder<> IRB(&I);
3084     Value *V = I.getArgOperand(0);
3085     Value *Addr = I.getArgOperand(1);
3086     const Align Alignment(
3087         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
3088     Value *Mask = I.getArgOperand(3);
3089     Value *Shadow = getShadow(V);
3090 
3091     Value *ShadowPtr;
3092     Value *OriginPtr;
3093     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
3094         Addr, IRB, Shadow->getType(), Alignment, /*isStore*/ true);
3095 
3096     if (ClCheckAccessAddress) {
3097       insertShadowCheck(Addr, &I);
3098       // Uninitialized mask is kind of like uninitialized address, but not as
3099       // scary.
3100       insertShadowCheck(Mask, &I);
3101     }
3102 
3103     IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment, Mask);
3104 
3105     if (MS.TrackOrigins) {
3106       auto &DL = F.getParent()->getDataLayout();
3107       paintOrigin(IRB, getOrigin(V), OriginPtr,
3108                   DL.getTypeStoreSize(Shadow->getType()),
3109                   std::max(Alignment, kMinOriginAlignment));
3110     }
3111   }
3112 
3113   bool handleMaskedLoad(IntrinsicInst &I) {
3114     IRBuilder<> IRB(&I);
3115     Value *Addr = I.getArgOperand(0);
3116     const Align Alignment(
3117         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3118     Value *Mask = I.getArgOperand(2);
3119     Value *PassThru = I.getArgOperand(3);
3120 
3121     Type *ShadowTy = getShadowTy(&I);
3122     Value *ShadowPtr, *OriginPtr;
3123     if (PropagateShadow) {
3124       std::tie(ShadowPtr, OriginPtr) =
3125           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
3126       setShadow(&I, IRB.CreateMaskedLoad(ShadowTy, ShadowPtr, Alignment, Mask,
3127                                          getShadow(PassThru), "_msmaskedld"));
3128     } else {
3129       setShadow(&I, getCleanShadow(&I));
3130     }
3131 
3132     if (ClCheckAccessAddress) {
3133       insertShadowCheck(Addr, &I);
3134       insertShadowCheck(Mask, &I);
3135     }
3136 
3137     if (MS.TrackOrigins) {
3138       if (PropagateShadow) {
3139         // Choose between PassThru's and the loaded value's origins.
3140         Value *MaskedPassThruShadow = IRB.CreateAnd(
3141             getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
3142 
3143         Value *Acc = IRB.CreateExtractElement(
3144             MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
3145         for (int i = 1, N = cast<FixedVectorType>(PassThru->getType())
3146                                 ->getNumElements();
3147              i < N; ++i) {
3148           Value *More = IRB.CreateExtractElement(
3149               MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), i));
3150           Acc = IRB.CreateOr(Acc, More);
3151         }
3152 
3153         Value *Origin = IRB.CreateSelect(
3154             IRB.CreateICmpNE(Acc, Constant::getNullValue(Acc->getType())),
3155             getOrigin(PassThru), IRB.CreateLoad(MS.OriginTy, OriginPtr));
3156 
3157         setOrigin(&I, Origin);
3158       } else {
3159         setOrigin(&I, getCleanOrigin());
3160       }
3161     }
3162     return true;
3163   }
3164 
3165   // Instrument BMI / BMI2 intrinsics.
3166   // All of these intrinsics are Z = I(X, Y)
3167   // where the types of all operands and the result match, and are either i32 or i64.
3168   // The following instrumentation happens to work for all of them:
3169   //   Sz = I(Sx, Y) | (sext (Sy != 0))
3170   void handleBmiIntrinsic(IntrinsicInst &I) {
3171     IRBuilder<> IRB(&I);
3172     Type *ShadowTy = getShadowTy(&I);
3173 
3174     // If any bit of the mask operand is poisoned, then the whole thing is.
3175     Value *SMask = getShadow(&I, 1);
3176     SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
3177                            ShadowTy);
3178     // Apply the same intrinsic to the shadow of the first operand.
3179     Value *S = IRB.CreateCall(I.getCalledFunction(),
3180                               {getShadow(&I, 0), I.getOperand(1)});
3181     S = IRB.CreateOr(SMask, S);
3182     setShadow(&I, S);
3183     setOriginForNaryOp(I);
3184   }
3185 
3186   SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
3187     SmallVector<int, 8> Mask;
3188     for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
3189       Mask.append(2, X);
3190     }
3191     return Mask;
3192   }
3193 
3194   // Instrument pclmul intrinsics.
3195   // These intrinsics operate either on odd or on even elements of the input
3196   // vectors, depending on the constant in the 3rd argument, ignoring the rest.
3197   // Replace the unused elements with copies of the used ones, ex:
3198   //   (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
3199   // or
3200   //   (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
3201   // and then apply the usual shadow combining logic.
3202   void handlePclmulIntrinsic(IntrinsicInst &I) {
3203     IRBuilder<> IRB(&I);
3204     unsigned Width =
3205         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3206     assert(isa<ConstantInt>(I.getArgOperand(2)) &&
3207            "pclmul 3rd operand must be a constant");
3208     unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3209     Value *Shuf0 = IRB.CreateShuffleVector(getShadow(&I, 0),
3210                                            getPclmulMask(Width, Imm & 0x01));
3211     Value *Shuf1 = IRB.CreateShuffleVector(getShadow(&I, 1),
3212                                            getPclmulMask(Width, Imm & 0x10));
3213     ShadowAndOriginCombiner SOC(this, IRB);
3214     SOC.Add(Shuf0, getOrigin(&I, 0));
3215     SOC.Add(Shuf1, getOrigin(&I, 1));
3216     SOC.Done(&I);
3217   }
3218 
3219   // Instrument _mm_*_sd|ss intrinsics
3220   void handleUnarySdSsIntrinsic(IntrinsicInst &I) {
3221     IRBuilder<> IRB(&I);
3222     unsigned Width =
3223         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3224     Value *First = getShadow(&I, 0);
3225     Value *Second = getShadow(&I, 1);
3226     // First element of second operand, remaining elements of first operand
3227     SmallVector<int, 16> Mask;
3228     Mask.push_back(Width);
3229     for (unsigned i = 1; i < Width; i++)
3230       Mask.push_back(i);
3231     Value *Shadow = IRB.CreateShuffleVector(First, Second, Mask);
3232 
3233     setShadow(&I, Shadow);
3234     setOriginForNaryOp(I);
3235   }
3236 
3237   void handleBinarySdSsIntrinsic(IntrinsicInst &I) {
3238     IRBuilder<> IRB(&I);
3239     unsigned Width =
3240         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3241     Value *First = getShadow(&I, 0);
3242     Value *Second = getShadow(&I, 1);
3243     Value *OrShadow = IRB.CreateOr(First, Second);
3244     // First element of both OR'd together, remaining elements of first operand
3245     SmallVector<int, 16> Mask;
3246     Mask.push_back(Width);
3247     for (unsigned i = 1; i < Width; i++)
3248       Mask.push_back(i);
3249     Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, Mask);
3250 
3251     setShadow(&I, Shadow);
3252     setOriginForNaryOp(I);
3253   }
3254 
3255   // Instrument abs intrinsic.
3256   // handleUnknownIntrinsic can't handle it because of the last
3257   // is_int_min_poison argument which does not match the result type.
3258   void handleAbsIntrinsic(IntrinsicInst &I) {
3259     assert(I.getType()->isIntOrIntVectorTy());
3260     assert(I.getArgOperand(0)->getType() == I.getType());
3261 
3262     // FIXME: Handle is_int_min_poison.
3263     IRBuilder<> IRB(&I);
3264     setShadow(&I, getShadow(&I, 0));
3265     setOrigin(&I, getOrigin(&I, 0));
3266   }
3267 
3268   void visitIntrinsicInst(IntrinsicInst &I) {
3269     switch (I.getIntrinsicID()) {
3270     case Intrinsic::abs:
3271       handleAbsIntrinsic(I);
3272       break;
3273     case Intrinsic::lifetime_start:
3274       handleLifetimeStart(I);
3275       break;
3276     case Intrinsic::launder_invariant_group:
3277     case Intrinsic::strip_invariant_group:
3278       handleInvariantGroup(I);
3279       break;
3280     case Intrinsic::bswap:
3281       handleBswap(I);
3282       break;
3283     case Intrinsic::masked_store:
3284       handleMaskedStore(I);
3285       break;
3286     case Intrinsic::masked_load:
3287       handleMaskedLoad(I);
3288       break;
3289     case Intrinsic::vector_reduce_and:
3290       handleVectorReduceAndIntrinsic(I);
3291       break;
3292     case Intrinsic::vector_reduce_or:
3293       handleVectorReduceOrIntrinsic(I);
3294       break;
3295     case Intrinsic::vector_reduce_add:
3296     case Intrinsic::vector_reduce_xor:
3297     case Intrinsic::vector_reduce_mul:
3298       handleVectorReduceIntrinsic(I);
3299       break;
3300     case Intrinsic::x86_sse_stmxcsr:
3301       handleStmxcsr(I);
3302       break;
3303     case Intrinsic::x86_sse_ldmxcsr:
3304       handleLdmxcsr(I);
3305       break;
3306     case Intrinsic::x86_avx512_vcvtsd2usi64:
3307     case Intrinsic::x86_avx512_vcvtsd2usi32:
3308     case Intrinsic::x86_avx512_vcvtss2usi64:
3309     case Intrinsic::x86_avx512_vcvtss2usi32:
3310     case Intrinsic::x86_avx512_cvttss2usi64:
3311     case Intrinsic::x86_avx512_cvttss2usi:
3312     case Intrinsic::x86_avx512_cvttsd2usi64:
3313     case Intrinsic::x86_avx512_cvttsd2usi:
3314     case Intrinsic::x86_avx512_cvtusi2ss:
3315     case Intrinsic::x86_avx512_cvtusi642sd:
3316     case Intrinsic::x86_avx512_cvtusi642ss:
3317       handleVectorConvertIntrinsic(I, 1, true);
3318       break;
3319     case Intrinsic::x86_sse2_cvtsd2si64:
3320     case Intrinsic::x86_sse2_cvtsd2si:
3321     case Intrinsic::x86_sse2_cvtsd2ss:
3322     case Intrinsic::x86_sse2_cvttsd2si64:
3323     case Intrinsic::x86_sse2_cvttsd2si:
3324     case Intrinsic::x86_sse_cvtss2si64:
3325     case Intrinsic::x86_sse_cvtss2si:
3326     case Intrinsic::x86_sse_cvttss2si64:
3327     case Intrinsic::x86_sse_cvttss2si:
3328       handleVectorConvertIntrinsic(I, 1);
3329       break;
3330     case Intrinsic::x86_sse_cvtps2pi:
3331     case Intrinsic::x86_sse_cvttps2pi:
3332       handleVectorConvertIntrinsic(I, 2);
3333       break;
3334 
3335     case Intrinsic::x86_avx512_psll_w_512:
3336     case Intrinsic::x86_avx512_psll_d_512:
3337     case Intrinsic::x86_avx512_psll_q_512:
3338     case Intrinsic::x86_avx512_pslli_w_512:
3339     case Intrinsic::x86_avx512_pslli_d_512:
3340     case Intrinsic::x86_avx512_pslli_q_512:
3341     case Intrinsic::x86_avx512_psrl_w_512:
3342     case Intrinsic::x86_avx512_psrl_d_512:
3343     case Intrinsic::x86_avx512_psrl_q_512:
3344     case Intrinsic::x86_avx512_psra_w_512:
3345     case Intrinsic::x86_avx512_psra_d_512:
3346     case Intrinsic::x86_avx512_psra_q_512:
3347     case Intrinsic::x86_avx512_psrli_w_512:
3348     case Intrinsic::x86_avx512_psrli_d_512:
3349     case Intrinsic::x86_avx512_psrli_q_512:
3350     case Intrinsic::x86_avx512_psrai_w_512:
3351     case Intrinsic::x86_avx512_psrai_d_512:
3352     case Intrinsic::x86_avx512_psrai_q_512:
3353     case Intrinsic::x86_avx512_psra_q_256:
3354     case Intrinsic::x86_avx512_psra_q_128:
3355     case Intrinsic::x86_avx512_psrai_q_256:
3356     case Intrinsic::x86_avx512_psrai_q_128:
3357     case Intrinsic::x86_avx2_psll_w:
3358     case Intrinsic::x86_avx2_psll_d:
3359     case Intrinsic::x86_avx2_psll_q:
3360     case Intrinsic::x86_avx2_pslli_w:
3361     case Intrinsic::x86_avx2_pslli_d:
3362     case Intrinsic::x86_avx2_pslli_q:
3363     case Intrinsic::x86_avx2_psrl_w:
3364     case Intrinsic::x86_avx2_psrl_d:
3365     case Intrinsic::x86_avx2_psrl_q:
3366     case Intrinsic::x86_avx2_psra_w:
3367     case Intrinsic::x86_avx2_psra_d:
3368     case Intrinsic::x86_avx2_psrli_w:
3369     case Intrinsic::x86_avx2_psrli_d:
3370     case Intrinsic::x86_avx2_psrli_q:
3371     case Intrinsic::x86_avx2_psrai_w:
3372     case Intrinsic::x86_avx2_psrai_d:
3373     case Intrinsic::x86_sse2_psll_w:
3374     case Intrinsic::x86_sse2_psll_d:
3375     case Intrinsic::x86_sse2_psll_q:
3376     case Intrinsic::x86_sse2_pslli_w:
3377     case Intrinsic::x86_sse2_pslli_d:
3378     case Intrinsic::x86_sse2_pslli_q:
3379     case Intrinsic::x86_sse2_psrl_w:
3380     case Intrinsic::x86_sse2_psrl_d:
3381     case Intrinsic::x86_sse2_psrl_q:
3382     case Intrinsic::x86_sse2_psra_w:
3383     case Intrinsic::x86_sse2_psra_d:
3384     case Intrinsic::x86_sse2_psrli_w:
3385     case Intrinsic::x86_sse2_psrli_d:
3386     case Intrinsic::x86_sse2_psrli_q:
3387     case Intrinsic::x86_sse2_psrai_w:
3388     case Intrinsic::x86_sse2_psrai_d:
3389     case Intrinsic::x86_mmx_psll_w:
3390     case Intrinsic::x86_mmx_psll_d:
3391     case Intrinsic::x86_mmx_psll_q:
3392     case Intrinsic::x86_mmx_pslli_w:
3393     case Intrinsic::x86_mmx_pslli_d:
3394     case Intrinsic::x86_mmx_pslli_q:
3395     case Intrinsic::x86_mmx_psrl_w:
3396     case Intrinsic::x86_mmx_psrl_d:
3397     case Intrinsic::x86_mmx_psrl_q:
3398     case Intrinsic::x86_mmx_psra_w:
3399     case Intrinsic::x86_mmx_psra_d:
3400     case Intrinsic::x86_mmx_psrli_w:
3401     case Intrinsic::x86_mmx_psrli_d:
3402     case Intrinsic::x86_mmx_psrli_q:
3403     case Intrinsic::x86_mmx_psrai_w:
3404     case Intrinsic::x86_mmx_psrai_d:
3405       handleVectorShiftIntrinsic(I, /* Variable */ false);
3406       break;
3407     case Intrinsic::x86_avx2_psllv_d:
3408     case Intrinsic::x86_avx2_psllv_d_256:
3409     case Intrinsic::x86_avx512_psllv_d_512:
3410     case Intrinsic::x86_avx2_psllv_q:
3411     case Intrinsic::x86_avx2_psllv_q_256:
3412     case Intrinsic::x86_avx512_psllv_q_512:
3413     case Intrinsic::x86_avx2_psrlv_d:
3414     case Intrinsic::x86_avx2_psrlv_d_256:
3415     case Intrinsic::x86_avx512_psrlv_d_512:
3416     case Intrinsic::x86_avx2_psrlv_q:
3417     case Intrinsic::x86_avx2_psrlv_q_256:
3418     case Intrinsic::x86_avx512_psrlv_q_512:
3419     case Intrinsic::x86_avx2_psrav_d:
3420     case Intrinsic::x86_avx2_psrav_d_256:
3421     case Intrinsic::x86_avx512_psrav_d_512:
3422     case Intrinsic::x86_avx512_psrav_q_128:
3423     case Intrinsic::x86_avx512_psrav_q_256:
3424     case Intrinsic::x86_avx512_psrav_q_512:
3425       handleVectorShiftIntrinsic(I, /* Variable */ true);
3426       break;
3427 
3428     case Intrinsic::x86_sse2_packsswb_128:
3429     case Intrinsic::x86_sse2_packssdw_128:
3430     case Intrinsic::x86_sse2_packuswb_128:
3431     case Intrinsic::x86_sse41_packusdw:
3432     case Intrinsic::x86_avx2_packsswb:
3433     case Intrinsic::x86_avx2_packssdw:
3434     case Intrinsic::x86_avx2_packuswb:
3435     case Intrinsic::x86_avx2_packusdw:
3436       handleVectorPackIntrinsic(I);
3437       break;
3438 
3439     case Intrinsic::x86_mmx_packsswb:
3440     case Intrinsic::x86_mmx_packuswb:
3441       handleVectorPackIntrinsic(I, 16);
3442       break;
3443 
3444     case Intrinsic::x86_mmx_packssdw:
3445       handleVectorPackIntrinsic(I, 32);
3446       break;
3447 
3448     case Intrinsic::x86_mmx_psad_bw:
3449     case Intrinsic::x86_sse2_psad_bw:
3450     case Intrinsic::x86_avx2_psad_bw:
3451       handleVectorSadIntrinsic(I);
3452       break;
3453 
3454     case Intrinsic::x86_sse2_pmadd_wd:
3455     case Intrinsic::x86_avx2_pmadd_wd:
3456     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3457     case Intrinsic::x86_avx2_pmadd_ub_sw:
3458       handleVectorPmaddIntrinsic(I);
3459       break;
3460 
3461     case Intrinsic::x86_ssse3_pmadd_ub_sw:
3462       handleVectorPmaddIntrinsic(I, 8);
3463       break;
3464 
3465     case Intrinsic::x86_mmx_pmadd_wd:
3466       handleVectorPmaddIntrinsic(I, 16);
3467       break;
3468 
3469     case Intrinsic::x86_sse_cmp_ss:
3470     case Intrinsic::x86_sse2_cmp_sd:
3471     case Intrinsic::x86_sse_comieq_ss:
3472     case Intrinsic::x86_sse_comilt_ss:
3473     case Intrinsic::x86_sse_comile_ss:
3474     case Intrinsic::x86_sse_comigt_ss:
3475     case Intrinsic::x86_sse_comige_ss:
3476     case Intrinsic::x86_sse_comineq_ss:
3477     case Intrinsic::x86_sse_ucomieq_ss:
3478     case Intrinsic::x86_sse_ucomilt_ss:
3479     case Intrinsic::x86_sse_ucomile_ss:
3480     case Intrinsic::x86_sse_ucomigt_ss:
3481     case Intrinsic::x86_sse_ucomige_ss:
3482     case Intrinsic::x86_sse_ucomineq_ss:
3483     case Intrinsic::x86_sse2_comieq_sd:
3484     case Intrinsic::x86_sse2_comilt_sd:
3485     case Intrinsic::x86_sse2_comile_sd:
3486     case Intrinsic::x86_sse2_comigt_sd:
3487     case Intrinsic::x86_sse2_comige_sd:
3488     case Intrinsic::x86_sse2_comineq_sd:
3489     case Intrinsic::x86_sse2_ucomieq_sd:
3490     case Intrinsic::x86_sse2_ucomilt_sd:
3491     case Intrinsic::x86_sse2_ucomile_sd:
3492     case Intrinsic::x86_sse2_ucomigt_sd:
3493     case Intrinsic::x86_sse2_ucomige_sd:
3494     case Intrinsic::x86_sse2_ucomineq_sd:
3495       handleVectorCompareScalarIntrinsic(I);
3496       break;
3497 
3498     case Intrinsic::x86_sse_cmp_ps:
3499     case Intrinsic::x86_sse2_cmp_pd:
3500       // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function
3501       // generates reasonably looking IR that fails in the backend with "Do not
3502       // know how to split the result of this operator!".
3503       handleVectorComparePackedIntrinsic(I);
3504       break;
3505 
3506     case Intrinsic::x86_bmi_bextr_32:
3507     case Intrinsic::x86_bmi_bextr_64:
3508     case Intrinsic::x86_bmi_bzhi_32:
3509     case Intrinsic::x86_bmi_bzhi_64:
3510     case Intrinsic::x86_bmi_pdep_32:
3511     case Intrinsic::x86_bmi_pdep_64:
3512     case Intrinsic::x86_bmi_pext_32:
3513     case Intrinsic::x86_bmi_pext_64:
3514       handleBmiIntrinsic(I);
3515       break;
3516 
3517     case Intrinsic::x86_pclmulqdq:
3518     case Intrinsic::x86_pclmulqdq_256:
3519     case Intrinsic::x86_pclmulqdq_512:
3520       handlePclmulIntrinsic(I);
3521       break;
3522 
3523     case Intrinsic::x86_sse41_round_sd:
3524     case Intrinsic::x86_sse41_round_ss:
3525       handleUnarySdSsIntrinsic(I);
3526       break;
3527     case Intrinsic::x86_sse2_max_sd:
3528     case Intrinsic::x86_sse_max_ss:
3529     case Intrinsic::x86_sse2_min_sd:
3530     case Intrinsic::x86_sse_min_ss:
3531       handleBinarySdSsIntrinsic(I);
3532       break;
3533 
3534     case Intrinsic::fshl:
3535     case Intrinsic::fshr:
3536       handleFunnelShift(I);
3537       break;
3538 
3539     case Intrinsic::is_constant:
3540       // The result of llvm.is.constant() is always defined.
3541       setShadow(&I, getCleanShadow(&I));
3542       setOrigin(&I, getCleanOrigin());
3543       break;
3544 
3545     default:
3546       if (!handleUnknownIntrinsic(I))
3547         visitInstruction(I);
3548       break;
3549     }
3550   }
3551 
3552   void visitLibAtomicLoad(CallBase &CB) {
3553     // Since we use getNextNode here, we can't have CB terminate the BB.
3554     assert(isa<CallInst>(CB));
3555 
3556     IRBuilder<> IRB(&CB);
3557     Value *Size = CB.getArgOperand(0);
3558     Value *SrcPtr = CB.getArgOperand(1);
3559     Value *DstPtr = CB.getArgOperand(2);
3560     Value *Ordering = CB.getArgOperand(3);
3561     // Convert the call to have at least Acquire ordering to make sure
3562     // the shadow operations aren't reordered before it.
3563     Value *NewOrdering =
3564         IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
3565     CB.setArgOperand(3, NewOrdering);
3566 
3567     IRBuilder<> NextIRB(CB.getNextNode());
3568     NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
3569 
3570     Value *SrcShadowPtr, *SrcOriginPtr;
3571     std::tie(SrcShadowPtr, SrcOriginPtr) =
3572         getShadowOriginPtr(SrcPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
3573                            /*isStore*/ false);
3574     Value *DstShadowPtr =
3575         getShadowOriginPtr(DstPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
3576                            /*isStore*/ true)
3577             .first;
3578 
3579     NextIRB.CreateMemCpy(DstShadowPtr, Align(1), SrcShadowPtr, Align(1), Size);
3580     if (MS.TrackOrigins) {
3581       Value *SrcOrigin = NextIRB.CreateAlignedLoad(MS.OriginTy, SrcOriginPtr,
3582                                                    kMinOriginAlignment);
3583       Value *NewOrigin = updateOrigin(SrcOrigin, NextIRB);
3584       NextIRB.CreateCall(MS.MsanSetOriginFn, {DstPtr, Size, NewOrigin});
3585     }
3586   }
3587 
3588   void visitLibAtomicStore(CallBase &CB) {
3589     IRBuilder<> IRB(&CB);
3590     Value *Size = CB.getArgOperand(0);
3591     Value *DstPtr = CB.getArgOperand(2);
3592     Value *Ordering = CB.getArgOperand(3);
3593     // Convert the call to have at least Release ordering to make sure
3594     // the shadow operations aren't reordered after it.
3595     Value *NewOrdering =
3596         IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
3597     CB.setArgOperand(3, NewOrdering);
3598 
3599     Value *DstShadowPtr =
3600         getShadowOriginPtr(DstPtr, IRB, IRB.getInt8Ty(), Align(1),
3601                            /*isStore*/ true)
3602             .first;
3603 
3604     // Atomic store always paints clean shadow/origin. See file header.
3605     IRB.CreateMemSet(DstShadowPtr, getCleanShadow(IRB.getInt8Ty()), Size,
3606                      Align(1));
3607   }
3608 
3609   void visitCallBase(CallBase &CB) {
3610     assert(!CB.getMetadata(LLVMContext::MD_nosanitize));
3611     if (CB.isInlineAsm()) {
3612       // For inline asm (either a call to asm function, or callbr instruction),
3613       // do the usual thing: check argument shadow and mark all outputs as
3614       // clean. Note that any side effects of the inline asm that are not
3615       // immediately visible in its constraints are not handled.
3616       if (ClHandleAsmConservative && MS.CompileKernel)
3617         visitAsmInstruction(CB);
3618       else
3619         visitInstruction(CB);
3620       return;
3621     }
3622     LibFunc LF;
3623     if (TLI->getLibFunc(CB, LF)) {
3624       // libatomic.a functions need to have special handling because there isn't
3625       // a good way to intercept them or compile the library with
3626       // instrumentation.
3627       switch (LF) {
3628       case LibFunc_atomic_load:
3629         if (!isa<CallInst>(CB)) {
3630           llvm::errs() << "MSAN -- cannot instrument invoke of libatomic load."
3631                           "Ignoring!\n";
3632           break;
3633         }
3634         visitLibAtomicLoad(CB);
3635         return;
3636       case LibFunc_atomic_store:
3637         visitLibAtomicStore(CB);
3638         return;
3639       default:
3640         break;
3641       }
3642     }
3643 
3644     if (auto *Call = dyn_cast<CallInst>(&CB)) {
3645       assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
3646 
3647       // We are going to insert code that relies on the fact that the callee
3648       // will become a non-readonly function after it is instrumented by us. To
3649       // prevent this code from being optimized out, mark that function
3650       // non-readonly in advance.
3651       AttributeMask B;
3652       B.addAttribute(Attribute::ReadOnly)
3653           .addAttribute(Attribute::ReadNone)
3654           .addAttribute(Attribute::WriteOnly)
3655           .addAttribute(Attribute::ArgMemOnly)
3656           .addAttribute(Attribute::Speculatable);
3657 
3658       Call->removeFnAttrs(B);
3659       if (Function *Func = Call->getCalledFunction()) {
3660         Func->removeFnAttrs(B);
3661       }
3662 
3663       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
3664     }
3665     IRBuilder<> IRB(&CB);
3666     bool MayCheckCall = MS.EagerChecks;
3667     if (Function *Func = CB.getCalledFunction()) {
3668       // __sanitizer_unaligned_{load,store} functions may be called by users
3669       // and always expects shadows in the TLS. So don't check them.
3670       MayCheckCall &= !Func->getName().startswith("__sanitizer_unaligned_");
3671     }
3672 
3673     unsigned ArgOffset = 0;
3674     LLVM_DEBUG(dbgs() << "  CallSite: " << CB << "\n");
3675     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
3676          ++ArgIt) {
3677       Value *A = *ArgIt;
3678       unsigned i = ArgIt - CB.arg_begin();
3679       if (!A->getType()->isSized()) {
3680         LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n");
3681         continue;
3682       }
3683       unsigned Size = 0;
3684       const DataLayout &DL = F.getParent()->getDataLayout();
3685 
3686       bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
3687       bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
3688       bool EagerCheck = MayCheckCall && !ByVal && NoUndef;
3689 
3690       if (EagerCheck) {
3691         insertShadowCheck(A, &CB);
3692         Size = DL.getTypeAllocSize(A->getType());
3693       } else {
3694         Value *Store = nullptr;
3695         // Compute the Shadow for arg even if it is ByVal, because
3696         // in that case getShadow() will copy the actual arg shadow to
3697         // __msan_param_tls.
3698         Value *ArgShadow = getShadow(A);
3699         Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
3700         LLVM_DEBUG(dbgs() << "  Arg#" << i << ": " << *A
3701                           << " Shadow: " << *ArgShadow << "\n");
3702         if (ByVal) {
3703           // ByVal requires some special handling as it's too big for a single
3704           // load
3705           assert(A->getType()->isPointerTy() &&
3706                  "ByVal argument is not a pointer!");
3707           Size = DL.getTypeAllocSize(CB.getParamByValType(i));
3708           if (ArgOffset + Size > kParamTLSSize)
3709             break;
3710           const MaybeAlign ParamAlignment(CB.getParamAlign(i));
3711           MaybeAlign Alignment = llvm::None;
3712           if (ParamAlignment)
3713             Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
3714           Value *AShadowPtr, *AOriginPtr;
3715           std::tie(AShadowPtr, AOriginPtr) =
3716               getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
3717                                  /*isStore*/ false);
3718           if (!PropagateShadow) {
3719             Store = IRB.CreateMemSet(ArgShadowBase,
3720                                      Constant::getNullValue(IRB.getInt8Ty()),
3721                                      Size, Alignment);
3722           } else {
3723             Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
3724                                      Alignment, Size);
3725             if (MS.TrackOrigins) {
3726               Value *ArgOriginBase = getOriginPtrForArgument(A, IRB, ArgOffset);
3727               // FIXME: OriginSize should be:
3728               // alignTo(A % kMinOriginAlignment + Size, kMinOriginAlignment)
3729               unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
3730               IRB.CreateMemCpy(
3731                   ArgOriginBase,
3732                   /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
3733                   AOriginPtr,
3734                   /* by getShadowOriginPtr */ kMinOriginAlignment, OriginSize);
3735             }
3736           }
3737         } else {
3738           // Any other parameters mean we need bit-grained tracking of uninit
3739           // data
3740           Size = DL.getTypeAllocSize(A->getType());
3741           if (ArgOffset + Size > kParamTLSSize)
3742             break;
3743           Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
3744                                          kShadowTLSAlignment);
3745           Constant *Cst = dyn_cast<Constant>(ArgShadow);
3746           if (MS.TrackOrigins && !(Cst && Cst->isNullValue())) {
3747             IRB.CreateStore(getOrigin(A),
3748                             getOriginPtrForArgument(A, IRB, ArgOffset));
3749           }
3750         }
3751         (void)Store;
3752         assert(Store != nullptr);
3753         LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n");
3754       }
3755       assert(Size != 0);
3756       ArgOffset += alignTo(Size, kShadowTLSAlignment);
3757     }
3758     LLVM_DEBUG(dbgs() << "  done with call args\n");
3759 
3760     FunctionType *FT = CB.getFunctionType();
3761     if (FT->isVarArg()) {
3762       VAHelper->visitCallBase(CB, IRB);
3763     }
3764 
3765     // Now, get the shadow for the RetVal.
3766     if (!CB.getType()->isSized())
3767       return;
3768     // Don't emit the epilogue for musttail call returns.
3769     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
3770       return;
3771 
3772     if (MayCheckCall && CB.hasRetAttr(Attribute::NoUndef)) {
3773       setShadow(&CB, getCleanShadow(&CB));
3774       setOrigin(&CB, getCleanOrigin());
3775       return;
3776     }
3777 
3778     IRBuilder<> IRBBefore(&CB);
3779     // Until we have full dynamic coverage, make sure the retval shadow is 0.
3780     Value *Base = getShadowPtrForRetval(&CB, IRBBefore);
3781     IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base,
3782                                  kShadowTLSAlignment);
3783     BasicBlock::iterator NextInsn;
3784     if (isa<CallInst>(CB)) {
3785       NextInsn = ++CB.getIterator();
3786       assert(NextInsn != CB.getParent()->end());
3787     } else {
3788       BasicBlock *NormalDest = cast<InvokeInst>(CB).getNormalDest();
3789       if (!NormalDest->getSinglePredecessor()) {
3790         // FIXME: this case is tricky, so we are just conservative here.
3791         // Perhaps we need to split the edge between this BB and NormalDest,
3792         // but a naive attempt to use SplitEdge leads to a crash.
3793         setShadow(&CB, getCleanShadow(&CB));
3794         setOrigin(&CB, getCleanOrigin());
3795         return;
3796       }
3797       // FIXME: NextInsn is likely in a basic block that has not been visited yet.
3798       // Anything inserted there will be instrumented by MSan later!
3799       NextInsn = NormalDest->getFirstInsertionPt();
3800       assert(NextInsn != NormalDest->end() &&
3801              "Could not find insertion point for retval shadow load");
3802     }
3803     IRBuilder<> IRBAfter(&*NextInsn);
3804     Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
3805         getShadowTy(&CB), getShadowPtrForRetval(&CB, IRBAfter),
3806         kShadowTLSAlignment, "_msret");
3807     setShadow(&CB, RetvalShadow);
3808     if (MS.TrackOrigins)
3809       setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy,
3810                                          getOriginPtrForRetval(IRBAfter)));
3811   }
3812 
3813   bool isAMustTailRetVal(Value *RetVal) {
3814     if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
3815       RetVal = I->getOperand(0);
3816     }
3817     if (auto *I = dyn_cast<CallInst>(RetVal)) {
3818       return I->isMustTailCall();
3819     }
3820     return false;
3821   }
3822 
3823   void visitReturnInst(ReturnInst &I) {
3824     IRBuilder<> IRB(&I);
3825     Value *RetVal = I.getReturnValue();
3826     if (!RetVal) return;
3827     // Don't emit the epilogue for musttail call returns.
3828     if (isAMustTailRetVal(RetVal)) return;
3829     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
3830     bool HasNoUndef =
3831         F.hasRetAttribute(Attribute::NoUndef);
3832     bool StoreShadow = !(MS.EagerChecks && HasNoUndef);
3833     // FIXME: Consider using SpecialCaseList to specify a list of functions that
3834     // must always return fully initialized values. For now, we hardcode "main".
3835     bool EagerCheck = (MS.EagerChecks && HasNoUndef) || (F.getName() == "main");
3836 
3837     Value *Shadow = getShadow(RetVal);
3838     bool StoreOrigin = true;
3839     if (EagerCheck) {
3840       insertShadowCheck(RetVal, &I);
3841       Shadow = getCleanShadow(RetVal);
3842       StoreOrigin = false;
3843     }
3844 
3845     // The caller may still expect information passed over TLS if we pass our
3846     // check
3847     if (StoreShadow) {
3848       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
3849       if (MS.TrackOrigins && StoreOrigin)
3850         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
3851     }
3852   }
3853 
3854   void visitPHINode(PHINode &I) {
3855     IRBuilder<> IRB(&I);
3856     if (!PropagateShadow) {
3857       setShadow(&I, getCleanShadow(&I));
3858       setOrigin(&I, getCleanOrigin());
3859       return;
3860     }
3861 
3862     ShadowPHINodes.push_back(&I);
3863     setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
3864                                 "_msphi_s"));
3865     if (MS.TrackOrigins)
3866       setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
3867                                   "_msphi_o"));
3868   }
3869 
3870   Value *getLocalVarDescription(AllocaInst &I) {
3871     SmallString<2048> StackDescriptionStorage;
3872     raw_svector_ostream StackDescription(StackDescriptionStorage);
3873     // We create a string with a description of the stack allocation and
3874     // pass it into __msan_set_alloca_origin.
3875     // It will be printed by the run-time if stack-originated UMR is found.
3876     // The first 4 bytes of the string are set to '----' and will be replaced
3877     // by __msan_va_arg_overflow_size_tls at the first call.
3878     StackDescription << "----" << I.getName() << "@" << F.getName();
3879     return createPrivateNonConstGlobalForString(*F.getParent(),
3880                                                 StackDescription.str());
3881   }
3882 
3883   void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3884     if (PoisonStack && ClPoisonStackWithCall) {
3885       IRB.CreateCall(MS.MsanPoisonStackFn,
3886                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3887     } else {
3888       Value *ShadowBase, *OriginBase;
3889       std::tie(ShadowBase, OriginBase) = getShadowOriginPtr(
3890           &I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true);
3891 
3892       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
3893       IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlign());
3894     }
3895 
3896     if (PoisonStack && MS.TrackOrigins) {
3897       Value *Descr = getLocalVarDescription(I);
3898       IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
3899                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3900                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
3901                       IRB.CreatePointerCast(&F, MS.IntptrTy)});
3902     }
3903   }
3904 
3905   void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
3906     Value *Descr = getLocalVarDescription(I);
3907     if (PoisonStack) {
3908       IRB.CreateCall(MS.MsanPoisonAllocaFn,
3909                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
3910                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
3911     } else {
3912       IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
3913                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
3914     }
3915   }
3916 
3917   void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
3918     if (!InsPoint)
3919       InsPoint = &I;
3920     IRBuilder<> IRB(InsPoint->getNextNode());
3921     const DataLayout &DL = F.getParent()->getDataLayout();
3922     uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
3923     Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
3924     if (I.isArrayAllocation())
3925       Len = IRB.CreateMul(Len, I.getArraySize());
3926 
3927     if (MS.CompileKernel)
3928       poisonAllocaKmsan(I, IRB, Len);
3929     else
3930       poisonAllocaUserspace(I, IRB, Len);
3931   }
3932 
3933   void visitAllocaInst(AllocaInst &I) {
3934     setShadow(&I, getCleanShadow(&I));
3935     setOrigin(&I, getCleanOrigin());
3936     // We'll get to this alloca later unless it's poisoned at the corresponding
3937     // llvm.lifetime.start.
3938     AllocaSet.insert(&I);
3939   }
3940 
3941   void visitSelectInst(SelectInst& I) {
3942     IRBuilder<> IRB(&I);
3943     // a = select b, c, d
3944     Value *B = I.getCondition();
3945     Value *C = I.getTrueValue();
3946     Value *D = I.getFalseValue();
3947     Value *Sb = getShadow(B);
3948     Value *Sc = getShadow(C);
3949     Value *Sd = getShadow(D);
3950 
3951     // Result shadow if condition shadow is 0.
3952     Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
3953     Value *Sa1;
3954     if (I.getType()->isAggregateType()) {
3955       // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
3956       // an extra "select". This results in much more compact IR.
3957       // Sa = select Sb, poisoned, (select b, Sc, Sd)
3958       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
3959     } else {
3960       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
3961       // If Sb (condition is poisoned), look for bits in c and d that are equal
3962       // and both unpoisoned.
3963       // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
3964 
3965       // Cast arguments to shadow-compatible type.
3966       C = CreateAppToShadowCast(IRB, C);
3967       D = CreateAppToShadowCast(IRB, D);
3968 
3969       // Result shadow if condition shadow is 1.
3970       Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
3971     }
3972     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
3973     setShadow(&I, Sa);
3974     if (MS.TrackOrigins) {
3975       // Origins are always i32, so any vector conditions must be flattened.
3976       // FIXME: consider tracking vector origins for app vectors?
3977       if (B->getType()->isVectorTy()) {
3978         Type *FlatTy = getShadowTyNoVec(B->getType());
3979         B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy),
3980                                 ConstantInt::getNullValue(FlatTy));
3981         Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy),
3982                                       ConstantInt::getNullValue(FlatTy));
3983       }
3984       // a = select b, c, d
3985       // Oa = Sb ? Ob : (b ? Oc : Od)
3986       setOrigin(
3987           &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()),
3988                                IRB.CreateSelect(B, getOrigin(I.getTrueValue()),
3989                                                 getOrigin(I.getFalseValue()))));
3990     }
3991   }
3992 
3993   void visitLandingPadInst(LandingPadInst &I) {
3994     // Do nothing.
3995     // See https://github.com/google/sanitizers/issues/504
3996     setShadow(&I, getCleanShadow(&I));
3997     setOrigin(&I, getCleanOrigin());
3998   }
3999 
4000   void visitCatchSwitchInst(CatchSwitchInst &I) {
4001     setShadow(&I, getCleanShadow(&I));
4002     setOrigin(&I, getCleanOrigin());
4003   }
4004 
4005   void visitFuncletPadInst(FuncletPadInst &I) {
4006     setShadow(&I, getCleanShadow(&I));
4007     setOrigin(&I, getCleanOrigin());
4008   }
4009 
4010   void visitGetElementPtrInst(GetElementPtrInst &I) {
4011     handleShadowOr(I);
4012   }
4013 
4014   void visitExtractValueInst(ExtractValueInst &I) {
4015     IRBuilder<> IRB(&I);
4016     Value *Agg = I.getAggregateOperand();
4017     LLVM_DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
4018     Value *AggShadow = getShadow(Agg);
4019     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
4020     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
4021     LLVM_DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
4022     setShadow(&I, ResShadow);
4023     setOriginForNaryOp(I);
4024   }
4025 
4026   void visitInsertValueInst(InsertValueInst &I) {
4027     IRBuilder<> IRB(&I);
4028     LLVM_DEBUG(dbgs() << "InsertValue:  " << I << "\n");
4029     Value *AggShadow = getShadow(I.getAggregateOperand());
4030     Value *InsShadow = getShadow(I.getInsertedValueOperand());
4031     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
4032     LLVM_DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
4033     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
4034     LLVM_DEBUG(dbgs() << "   Res:        " << *Res << "\n");
4035     setShadow(&I, Res);
4036     setOriginForNaryOp(I);
4037   }
4038 
4039   void dumpInst(Instruction &I) {
4040     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
4041       errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
4042     } else {
4043       errs() << "ZZZ " << I.getOpcodeName() << "\n";
4044     }
4045     errs() << "QQQ " << I << "\n";
4046   }
4047 
4048   void visitResumeInst(ResumeInst &I) {
4049     LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
4050     // Nothing to do here.
4051   }
4052 
4053   void visitCleanupReturnInst(CleanupReturnInst &CRI) {
4054     LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
4055     // Nothing to do here.
4056   }
4057 
4058   void visitCatchReturnInst(CatchReturnInst &CRI) {
4059     LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
4060     // Nothing to do here.
4061   }
4062 
4063   void instrumentAsmArgument(Value *Operand, Type *ElemTy, Instruction &I,
4064                              IRBuilder<> &IRB, const DataLayout &DL,
4065                              bool isOutput) {
4066     // For each assembly argument, we check its value for being initialized.
4067     // If the argument is a pointer, we assume it points to a single element
4068     // of the corresponding type (or to a 8-byte word, if the type is unsized).
4069     // Each such pointer is instrumented with a call to the runtime library.
4070     Type *OpType = Operand->getType();
4071     // Check the operand value itself.
4072     insertShadowCheck(Operand, &I);
4073     if (!OpType->isPointerTy() || !isOutput) {
4074       assert(!isOutput);
4075       return;
4076     }
4077     if (!ElemTy->isSized())
4078       return;
4079     int Size = DL.getTypeStoreSize(ElemTy);
4080     Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
4081     Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
4082     IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
4083   }
4084 
4085   /// Get the number of output arguments returned by pointers.
4086   int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
4087     int NumRetOutputs = 0;
4088     int NumOutputs = 0;
4089     Type *RetTy = cast<Value>(CB)->getType();
4090     if (!RetTy->isVoidTy()) {
4091       // Register outputs are returned via the CallInst return value.
4092       auto *ST = dyn_cast<StructType>(RetTy);
4093       if (ST)
4094         NumRetOutputs = ST->getNumElements();
4095       else
4096         NumRetOutputs = 1;
4097     }
4098     InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
4099     for (const InlineAsm::ConstraintInfo &Info : Constraints) {
4100       switch (Info.Type) {
4101       case InlineAsm::isOutput:
4102         NumOutputs++;
4103         break;
4104       default:
4105         break;
4106       }
4107     }
4108     return NumOutputs - NumRetOutputs;
4109   }
4110 
4111   void visitAsmInstruction(Instruction &I) {
4112     // Conservative inline assembly handling: check for poisoned shadow of
4113     // asm() arguments, then unpoison the result and all the memory locations
4114     // pointed to by those arguments.
4115     // An inline asm() statement in C++ contains lists of input and output
4116     // arguments used by the assembly code. These are mapped to operands of the
4117     // CallInst as follows:
4118     //  - nR register outputs ("=r) are returned by value in a single structure
4119     //  (SSA value of the CallInst);
4120     //  - nO other outputs ("=m" and others) are returned by pointer as first
4121     // nO operands of the CallInst;
4122     //  - nI inputs ("r", "m" and others) are passed to CallInst as the
4123     // remaining nI operands.
4124     // The total number of asm() arguments in the source is nR+nO+nI, and the
4125     // corresponding CallInst has nO+nI+1 operands (the last operand is the
4126     // function to be called).
4127     const DataLayout &DL = F.getParent()->getDataLayout();
4128     CallBase *CB = cast<CallBase>(&I);
4129     IRBuilder<> IRB(&I);
4130     InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
4131     int OutputArgs = getNumOutputArgs(IA, CB);
4132     // The last operand of a CallInst is the function itself.
4133     int NumOperands = CB->getNumOperands() - 1;
4134 
4135     // Check input arguments. Doing so before unpoisoning output arguments, so
4136     // that we won't overwrite uninit values before checking them.
4137     for (int i = OutputArgs; i < NumOperands; i++) {
4138       Value *Operand = CB->getOperand(i);
4139       instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL,
4140                             /*isOutput*/ false);
4141     }
4142     // Unpoison output arguments. This must happen before the actual InlineAsm
4143     // call, so that the shadow for memory published in the asm() statement
4144     // remains valid.
4145     for (int i = 0; i < OutputArgs; i++) {
4146       Value *Operand = CB->getOperand(i);
4147       instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL,
4148                             /*isOutput*/ true);
4149     }
4150 
4151     setShadow(&I, getCleanShadow(&I));
4152     setOrigin(&I, getCleanOrigin());
4153   }
4154 
4155   void visitFreezeInst(FreezeInst &I) {
4156     // Freeze always returns a fully defined value.
4157     setShadow(&I, getCleanShadow(&I));
4158     setOrigin(&I, getCleanOrigin());
4159   }
4160 
4161   void visitInstruction(Instruction &I) {
4162     // Everything else: stop propagating and check for poisoned shadow.
4163     if (ClDumpStrictInstructions)
4164       dumpInst(I);
4165     LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
4166     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
4167       Value *Operand = I.getOperand(i);
4168       if (Operand->getType()->isSized())
4169         insertShadowCheck(Operand, &I);
4170     }
4171     setShadow(&I, getCleanShadow(&I));
4172     setOrigin(&I, getCleanOrigin());
4173   }
4174 };
4175 
4176 /// AMD64-specific implementation of VarArgHelper.
4177 struct VarArgAMD64Helper : public VarArgHelper {
4178   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
4179   // See a comment in visitCallBase for more details.
4180   static const unsigned AMD64GpEndOffset = 48;  // AMD64 ABI Draft 0.99.6 p3.5.7
4181   static const unsigned AMD64FpEndOffsetSSE = 176;
4182   // If SSE is disabled, fp_offset in va_list is zero.
4183   static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
4184 
4185   unsigned AMD64FpEndOffset;
4186   Function &F;
4187   MemorySanitizer &MS;
4188   MemorySanitizerVisitor &MSV;
4189   Value *VAArgTLSCopy = nullptr;
4190   Value *VAArgTLSOriginCopy = nullptr;
4191   Value *VAArgOverflowSize = nullptr;
4192 
4193   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4194 
4195   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4196 
4197   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
4198                     MemorySanitizerVisitor &MSV)
4199       : F(F), MS(MS), MSV(MSV) {
4200     AMD64FpEndOffset = AMD64FpEndOffsetSSE;
4201     for (const auto &Attr : F.getAttributes().getFnAttrs()) {
4202       if (Attr.isStringAttribute() &&
4203           (Attr.getKindAsString() == "target-features")) {
4204         if (Attr.getValueAsString().contains("-sse"))
4205           AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
4206         break;
4207       }
4208     }
4209   }
4210 
4211   ArgKind classifyArgument(Value* arg) {
4212     // A very rough approximation of X86_64 argument classification rules.
4213     Type *T = arg->getType();
4214     if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
4215       return AK_FloatingPoint;
4216     if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4217       return AK_GeneralPurpose;
4218     if (T->isPointerTy())
4219       return AK_GeneralPurpose;
4220     return AK_Memory;
4221   }
4222 
4223   // For VarArg functions, store the argument shadow in an ABI-specific format
4224   // that corresponds to va_list layout.
4225   // We do this because Clang lowers va_arg in the frontend, and this pass
4226   // only sees the low level code that deals with va_list internals.
4227   // A much easier alternative (provided that Clang emits va_arg instructions)
4228   // would have been to associate each live instance of va_list with a copy of
4229   // MSanParamTLS, and extract shadow on va_arg() call in the argument list
4230   // order.
4231   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4232     unsigned GpOffset = 0;
4233     unsigned FpOffset = AMD64GpEndOffset;
4234     unsigned OverflowOffset = AMD64FpEndOffset;
4235     const DataLayout &DL = F.getParent()->getDataLayout();
4236     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4237          ++ArgIt) {
4238       Value *A = *ArgIt;
4239       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4240       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4241       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
4242       if (IsByVal) {
4243         // ByVal arguments always go to the overflow area.
4244         // Fixed arguments passed through the overflow area will be stepped
4245         // over by va_start, so don't count them towards the offset.
4246         if (IsFixed)
4247           continue;
4248         assert(A->getType()->isPointerTy());
4249         Type *RealTy = CB.getParamByValType(ArgNo);
4250         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4251         Value *ShadowBase = getShadowPtrForVAArgument(
4252             RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
4253         Value *OriginBase = nullptr;
4254         if (MS.TrackOrigins)
4255           OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
4256         OverflowOffset += alignTo(ArgSize, 8);
4257         if (!ShadowBase)
4258           continue;
4259         Value *ShadowPtr, *OriginPtr;
4260         std::tie(ShadowPtr, OriginPtr) =
4261             MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
4262                                    /*isStore*/ false);
4263 
4264         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
4265                          kShadowTLSAlignment, ArgSize);
4266         if (MS.TrackOrigins)
4267           IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
4268                            kShadowTLSAlignment, ArgSize);
4269       } else {
4270         ArgKind AK = classifyArgument(A);
4271         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
4272           AK = AK_Memory;
4273         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
4274           AK = AK_Memory;
4275         Value *ShadowBase, *OriginBase = nullptr;
4276         switch (AK) {
4277           case AK_GeneralPurpose:
4278             ShadowBase =
4279                 getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
4280             if (MS.TrackOrigins)
4281               OriginBase =
4282                   getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
4283             GpOffset += 8;
4284             break;
4285           case AK_FloatingPoint:
4286             ShadowBase =
4287                 getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
4288             if (MS.TrackOrigins)
4289               OriginBase =
4290                   getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
4291             FpOffset += 16;
4292             break;
4293           case AK_Memory:
4294             if (IsFixed)
4295               continue;
4296             uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4297             ShadowBase =
4298                 getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
4299             if (MS.TrackOrigins)
4300               OriginBase =
4301                   getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
4302             OverflowOffset += alignTo(ArgSize, 8);
4303         }
4304         // Take fixed arguments into account for GpOffset and FpOffset,
4305         // but don't actually store shadows for them.
4306         // TODO(glider): don't call get*PtrForVAArgument() for them.
4307         if (IsFixed)
4308           continue;
4309         if (!ShadowBase)
4310           continue;
4311         Value *Shadow = MSV.getShadow(A);
4312         IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
4313         if (MS.TrackOrigins) {
4314           Value *Origin = MSV.getOrigin(A);
4315           unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
4316           MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
4317                           std::max(kShadowTLSAlignment, kMinOriginAlignment));
4318         }
4319       }
4320     }
4321     Constant *OverflowSize =
4322       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
4323     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4324   }
4325 
4326   /// Compute the shadow address for a given va_arg.
4327   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4328                                    unsigned ArgOffset, unsigned ArgSize) {
4329     // Make sure we don't overflow __msan_va_arg_tls.
4330     if (ArgOffset + ArgSize > kParamTLSSize)
4331       return nullptr;
4332     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4333     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4334     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4335                               "_msarg_va_s");
4336   }
4337 
4338   /// Compute the origin address for a given va_arg.
4339   Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
4340     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
4341     // getOriginPtrForVAArgument() is always called after
4342     // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
4343     // overflow.
4344     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4345     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
4346                               "_msarg_va_o");
4347   }
4348 
4349   void unpoisonVAListTagForInst(IntrinsicInst &I) {
4350     IRBuilder<> IRB(&I);
4351     Value *VAListTag = I.getArgOperand(0);
4352     Value *ShadowPtr, *OriginPtr;
4353     const Align Alignment = Align(8);
4354     std::tie(ShadowPtr, OriginPtr) =
4355         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
4356                                /*isStore*/ true);
4357 
4358     // Unpoison the whole __va_list_tag.
4359     // FIXME: magic ABI constants.
4360     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4361                      /* size */ 24, Alignment, false);
4362     // We shouldn't need to zero out the origins, as they're only checked for
4363     // nonzero shadow.
4364   }
4365 
4366   void visitVAStartInst(VAStartInst &I) override {
4367     if (F.getCallingConv() == CallingConv::Win64)
4368       return;
4369     VAStartInstrumentationList.push_back(&I);
4370     unpoisonVAListTagForInst(I);
4371   }
4372 
4373   void visitVACopyInst(VACopyInst &I) override {
4374     if (F.getCallingConv() == CallingConv::Win64) return;
4375     unpoisonVAListTagForInst(I);
4376   }
4377 
4378   void finalizeInstrumentation() override {
4379     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4380            "finalizeInstrumentation called twice");
4381     if (!VAStartInstrumentationList.empty()) {
4382       // If there is a va_start in this function, make a backup copy of
4383       // va_arg_tls somewhere in the function entry block.
4384       IRBuilder<> IRB(MSV.FnPrologueEnd);
4385       VAArgOverflowSize =
4386           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4387       Value *CopySize =
4388         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
4389                       VAArgOverflowSize);
4390       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4391       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4392       if (MS.TrackOrigins) {
4393         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4394         IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
4395                          Align(8), CopySize);
4396       }
4397     }
4398 
4399     // Instrument va_start.
4400     // Copy va_list shadow from the backup copy of the TLS contents.
4401     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4402       CallInst *OrigInst = VAStartInstrumentationList[i];
4403       IRBuilder<> IRB(OrigInst->getNextNode());
4404       Value *VAListTag = OrigInst->getArgOperand(0);
4405 
4406       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4407       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
4408           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4409                         ConstantInt::get(MS.IntptrTy, 16)),
4410           PointerType::get(RegSaveAreaPtrTy, 0));
4411       Value *RegSaveAreaPtr =
4412           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4413       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4414       const Align Alignment = Align(16);
4415       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4416           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4417                                  Alignment, /*isStore*/ true);
4418       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4419                        AMD64FpEndOffset);
4420       if (MS.TrackOrigins)
4421         IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
4422                          Alignment, AMD64FpEndOffset);
4423       Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4424       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
4425           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4426                         ConstantInt::get(MS.IntptrTy, 8)),
4427           PointerType::get(OverflowArgAreaPtrTy, 0));
4428       Value *OverflowArgAreaPtr =
4429           IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
4430       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
4431       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
4432           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
4433                                  Alignment, /*isStore*/ true);
4434       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
4435                                              AMD64FpEndOffset);
4436       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
4437                        VAArgOverflowSize);
4438       if (MS.TrackOrigins) {
4439         SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
4440                                         AMD64FpEndOffset);
4441         IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
4442                          VAArgOverflowSize);
4443       }
4444     }
4445   }
4446 };
4447 
4448 /// MIPS64-specific implementation of VarArgHelper.
4449 struct VarArgMIPS64Helper : public VarArgHelper {
4450   Function &F;
4451   MemorySanitizer &MS;
4452   MemorySanitizerVisitor &MSV;
4453   Value *VAArgTLSCopy = nullptr;
4454   Value *VAArgSize = nullptr;
4455 
4456   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4457 
4458   VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
4459                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4460 
4461   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4462     unsigned VAArgOffset = 0;
4463     const DataLayout &DL = F.getParent()->getDataLayout();
4464     for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(),
4465               End = CB.arg_end();
4466          ArgIt != End; ++ArgIt) {
4467       Triple TargetTriple(F.getParent()->getTargetTriple());
4468       Value *A = *ArgIt;
4469       Value *Base;
4470       uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4471       if (TargetTriple.getArch() == Triple::mips64) {
4472         // Adjusting the shadow for argument with size < 8 to match the placement
4473         // of bits in big endian system
4474         if (ArgSize < 8)
4475           VAArgOffset += (8 - ArgSize);
4476       }
4477       Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
4478       VAArgOffset += ArgSize;
4479       VAArgOffset = alignTo(VAArgOffset, 8);
4480       if (!Base)
4481         continue;
4482       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4483     }
4484 
4485     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
4486     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4487     // a new class member i.e. it is the total size of all VarArgs.
4488     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4489   }
4490 
4491   /// Compute the shadow address for a given va_arg.
4492   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4493                                    unsigned ArgOffset, unsigned ArgSize) {
4494     // Make sure we don't overflow __msan_va_arg_tls.
4495     if (ArgOffset + ArgSize > kParamTLSSize)
4496       return nullptr;
4497     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4498     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4499     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4500                               "_msarg");
4501   }
4502 
4503   void visitVAStartInst(VAStartInst &I) override {
4504     IRBuilder<> IRB(&I);
4505     VAStartInstrumentationList.push_back(&I);
4506     Value *VAListTag = I.getArgOperand(0);
4507     Value *ShadowPtr, *OriginPtr;
4508     const Align Alignment = Align(8);
4509     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4510         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4511     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4512                      /* size */ 8, Alignment, false);
4513   }
4514 
4515   void visitVACopyInst(VACopyInst &I) override {
4516     IRBuilder<> IRB(&I);
4517     VAStartInstrumentationList.push_back(&I);
4518     Value *VAListTag = I.getArgOperand(0);
4519     Value *ShadowPtr, *OriginPtr;
4520     const Align Alignment = Align(8);
4521     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4522         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4523     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4524                      /* size */ 8, Alignment, false);
4525   }
4526 
4527   void finalizeInstrumentation() override {
4528     assert(!VAArgSize && !VAArgTLSCopy &&
4529            "finalizeInstrumentation called twice");
4530     IRBuilder<> IRB(MSV.FnPrologueEnd);
4531     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4532     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4533                                     VAArgSize);
4534 
4535     if (!VAStartInstrumentationList.empty()) {
4536       // If there is a va_start in this function, make a backup copy of
4537       // va_arg_tls somewhere in the function entry block.
4538       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4539       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4540     }
4541 
4542     // Instrument va_start.
4543     // Copy va_list shadow from the backup copy of the TLS contents.
4544     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4545       CallInst *OrigInst = VAStartInstrumentationList[i];
4546       IRBuilder<> IRB(OrigInst->getNextNode());
4547       Value *VAListTag = OrigInst->getArgOperand(0);
4548       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4549       Value *RegSaveAreaPtrPtr =
4550           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4551                              PointerType::get(RegSaveAreaPtrTy, 0));
4552       Value *RegSaveAreaPtr =
4553           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4554       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4555       const Align Alignment = Align(8);
4556       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4557           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4558                                  Alignment, /*isStore*/ true);
4559       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4560                        CopySize);
4561     }
4562   }
4563 };
4564 
4565 /// AArch64-specific implementation of VarArgHelper.
4566 struct VarArgAArch64Helper : public VarArgHelper {
4567   static const unsigned kAArch64GrArgSize = 64;
4568   static const unsigned kAArch64VrArgSize = 128;
4569 
4570   static const unsigned AArch64GrBegOffset = 0;
4571   static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
4572   // Make VR space aligned to 16 bytes.
4573   static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
4574   static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
4575                                              + kAArch64VrArgSize;
4576   static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
4577 
4578   Function &F;
4579   MemorySanitizer &MS;
4580   MemorySanitizerVisitor &MSV;
4581   Value *VAArgTLSCopy = nullptr;
4582   Value *VAArgOverflowSize = nullptr;
4583 
4584   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4585 
4586   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4587 
4588   VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
4589                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4590 
4591   ArgKind classifyArgument(Value* arg) {
4592     Type *T = arg->getType();
4593     if (T->isFPOrFPVectorTy())
4594       return AK_FloatingPoint;
4595     if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4596         || (T->isPointerTy()))
4597       return AK_GeneralPurpose;
4598     return AK_Memory;
4599   }
4600 
4601   // The instrumentation stores the argument shadow in a non ABI-specific
4602   // format because it does not know which argument is named (since Clang,
4603   // like x86_64 case, lowers the va_args in the frontend and this pass only
4604   // sees the low level code that deals with va_list internals).
4605   // The first seven GR registers are saved in the first 56 bytes of the
4606   // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
4607   // the remaining arguments.
4608   // Using constant offset within the va_arg TLS array allows fast copy
4609   // in the finalize instrumentation.
4610   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4611     unsigned GrOffset = AArch64GrBegOffset;
4612     unsigned VrOffset = AArch64VrBegOffset;
4613     unsigned OverflowOffset = AArch64VAEndOffset;
4614 
4615     const DataLayout &DL = F.getParent()->getDataLayout();
4616     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4617          ++ArgIt) {
4618       Value *A = *ArgIt;
4619       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4620       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4621       ArgKind AK = classifyArgument(A);
4622       if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
4623         AK = AK_Memory;
4624       if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
4625         AK = AK_Memory;
4626       Value *Base;
4627       switch (AK) {
4628         case AK_GeneralPurpose:
4629           Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
4630           GrOffset += 8;
4631           break;
4632         case AK_FloatingPoint:
4633           Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
4634           VrOffset += 16;
4635           break;
4636         case AK_Memory:
4637           // Don't count fixed arguments in the overflow area - va_start will
4638           // skip right over them.
4639           if (IsFixed)
4640             continue;
4641           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4642           Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
4643                                            alignTo(ArgSize, 8));
4644           OverflowOffset += alignTo(ArgSize, 8);
4645           break;
4646       }
4647       // Count Gp/Vr fixed arguments to their respective offsets, but don't
4648       // bother to actually store a shadow.
4649       if (IsFixed)
4650         continue;
4651       if (!Base)
4652         continue;
4653       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4654     }
4655     Constant *OverflowSize =
4656       ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
4657     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4658   }
4659 
4660   /// Compute the shadow address for a given va_arg.
4661   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4662                                    unsigned ArgOffset, unsigned ArgSize) {
4663     // Make sure we don't overflow __msan_va_arg_tls.
4664     if (ArgOffset + ArgSize > kParamTLSSize)
4665       return nullptr;
4666     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4667     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4668     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4669                               "_msarg");
4670   }
4671 
4672   void visitVAStartInst(VAStartInst &I) override {
4673     IRBuilder<> IRB(&I);
4674     VAStartInstrumentationList.push_back(&I);
4675     Value *VAListTag = I.getArgOperand(0);
4676     Value *ShadowPtr, *OriginPtr;
4677     const Align Alignment = Align(8);
4678     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4679         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4680     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4681                      /* size */ 32, Alignment, false);
4682   }
4683 
4684   void visitVACopyInst(VACopyInst &I) override {
4685     IRBuilder<> IRB(&I);
4686     VAStartInstrumentationList.push_back(&I);
4687     Value *VAListTag = I.getArgOperand(0);
4688     Value *ShadowPtr, *OriginPtr;
4689     const Align Alignment = Align(8);
4690     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4691         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4692     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4693                      /* size */ 32, Alignment, false);
4694   }
4695 
4696   // Retrieve a va_list field of 'void*' size.
4697   Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4698     Value *SaveAreaPtrPtr =
4699       IRB.CreateIntToPtr(
4700         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4701                       ConstantInt::get(MS.IntptrTy, offset)),
4702         Type::getInt64PtrTy(*MS.C));
4703     return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
4704   }
4705 
4706   // Retrieve a va_list field of 'int' size.
4707   Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
4708     Value *SaveAreaPtr =
4709       IRB.CreateIntToPtr(
4710         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4711                       ConstantInt::get(MS.IntptrTy, offset)),
4712         Type::getInt32PtrTy(*MS.C));
4713     Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
4714     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
4715   }
4716 
4717   void finalizeInstrumentation() override {
4718     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4719            "finalizeInstrumentation called twice");
4720     if (!VAStartInstrumentationList.empty()) {
4721       // If there is a va_start in this function, make a backup copy of
4722       // va_arg_tls somewhere in the function entry block.
4723       IRBuilder<> IRB(MSV.FnPrologueEnd);
4724       VAArgOverflowSize =
4725           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4726       Value *CopySize =
4727         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
4728                       VAArgOverflowSize);
4729       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4730       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4731     }
4732 
4733     Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
4734     Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
4735 
4736     // Instrument va_start, copy va_list shadow from the backup copy of
4737     // the TLS contents.
4738     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4739       CallInst *OrigInst = VAStartInstrumentationList[i];
4740       IRBuilder<> IRB(OrigInst->getNextNode());
4741 
4742       Value *VAListTag = OrigInst->getArgOperand(0);
4743 
4744       // The variadic ABI for AArch64 creates two areas to save the incoming
4745       // argument registers (one for 64-bit general register xn-x7 and another
4746       // for 128-bit FP/SIMD vn-v7).
4747       // We need then to propagate the shadow arguments on both regions
4748       // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
4749       // The remaining arguments are saved on shadow for 'va::stack'.
4750       // One caveat is it requires only to propagate the non-named arguments,
4751       // however on the call site instrumentation 'all' the arguments are
4752       // saved. So to copy the shadow values from the va_arg TLS array
4753       // we need to adjust the offset for both GR and VR fields based on
4754       // the __{gr,vr}_offs value (since they are stores based on incoming
4755       // named arguments).
4756 
4757       // Read the stack pointer from the va_list.
4758       Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
4759 
4760       // Read both the __gr_top and __gr_off and add them up.
4761       Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
4762       Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
4763 
4764       Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
4765 
4766       // Read both the __vr_top and __vr_off and add them up.
4767       Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
4768       Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
4769 
4770       Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
4771 
4772       // It does not know how many named arguments is being used and, on the
4773       // callsite all the arguments were saved.  Since __gr_off is defined as
4774       // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
4775       // argument by ignoring the bytes of shadow from named arguments.
4776       Value *GrRegSaveAreaShadowPtrOff =
4777         IRB.CreateAdd(GrArgSize, GrOffSaveArea);
4778 
4779       Value *GrRegSaveAreaShadowPtr =
4780           MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4781                                  Align(8), /*isStore*/ true)
4782               .first;
4783 
4784       Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4785                                               GrRegSaveAreaShadowPtrOff);
4786       Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
4787 
4788       IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8),
4789                        GrCopySize);
4790 
4791       // Again, but for FP/SIMD values.
4792       Value *VrRegSaveAreaShadowPtrOff =
4793           IRB.CreateAdd(VrArgSize, VrOffSaveArea);
4794 
4795       Value *VrRegSaveAreaShadowPtr =
4796           MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4797                                  Align(8), /*isStore*/ true)
4798               .first;
4799 
4800       Value *VrSrcPtr = IRB.CreateInBoundsGEP(
4801         IRB.getInt8Ty(),
4802         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4803                               IRB.getInt32(AArch64VrBegOffset)),
4804         VrRegSaveAreaShadowPtrOff);
4805       Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
4806 
4807       IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, Align(8), VrSrcPtr, Align(8),
4808                        VrCopySize);
4809 
4810       // And finally for remaining arguments.
4811       Value *StackSaveAreaShadowPtr =
4812           MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
4813                                  Align(16), /*isStore*/ true)
4814               .first;
4815 
4816       Value *StackSrcPtr =
4817         IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
4818                               IRB.getInt32(AArch64VAEndOffset));
4819 
4820       IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr,
4821                        Align(16), VAArgOverflowSize);
4822     }
4823   }
4824 };
4825 
4826 /// PowerPC64-specific implementation of VarArgHelper.
4827 struct VarArgPowerPC64Helper : public VarArgHelper {
4828   Function &F;
4829   MemorySanitizer &MS;
4830   MemorySanitizerVisitor &MSV;
4831   Value *VAArgTLSCopy = nullptr;
4832   Value *VAArgSize = nullptr;
4833 
4834   SmallVector<CallInst*, 16> VAStartInstrumentationList;
4835 
4836   VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
4837                     MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
4838 
4839   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4840     // For PowerPC, we need to deal with alignment of stack arguments -
4841     // they are mostly aligned to 8 bytes, but vectors and i128 arrays
4842     // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
4843     // For that reason, we compute current offset from stack pointer (which is
4844     // always properly aligned), and offset for the first vararg, then subtract
4845     // them.
4846     unsigned VAArgBase;
4847     Triple TargetTriple(F.getParent()->getTargetTriple());
4848     // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
4849     // and 32 bytes for ABIv2.  This is usually determined by target
4850     // endianness, but in theory could be overridden by function attribute.
4851     if (TargetTriple.getArch() == Triple::ppc64)
4852       VAArgBase = 48;
4853     else
4854       VAArgBase = 32;
4855     unsigned VAArgOffset = VAArgBase;
4856     const DataLayout &DL = F.getParent()->getDataLayout();
4857     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
4858          ++ArgIt) {
4859       Value *A = *ArgIt;
4860       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
4861       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4862       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
4863       if (IsByVal) {
4864         assert(A->getType()->isPointerTy());
4865         Type *RealTy = CB.getParamByValType(ArgNo);
4866         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4867         Align ArgAlign = CB.getParamAlign(ArgNo).value_or(Align(8));
4868         if (ArgAlign < 8)
4869           ArgAlign = Align(8);
4870         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4871         if (!IsFixed) {
4872           Value *Base = getShadowPtrForVAArgument(
4873               RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
4874           if (Base) {
4875             Value *AShadowPtr, *AOriginPtr;
4876             std::tie(AShadowPtr, AOriginPtr) =
4877                 MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
4878                                        kShadowTLSAlignment, /*isStore*/ false);
4879 
4880             IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
4881                              kShadowTLSAlignment, ArgSize);
4882           }
4883         }
4884         VAArgOffset += alignTo(ArgSize, Align(8));
4885       } else {
4886         Value *Base;
4887         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4888         Align ArgAlign = Align(8);
4889         if (A->getType()->isArrayTy()) {
4890           // Arrays are aligned to element size, except for long double
4891           // arrays, which are aligned to 8 bytes.
4892           Type *ElementTy = A->getType()->getArrayElementType();
4893           if (!ElementTy->isPPC_FP128Ty())
4894             ArgAlign = Align(DL.getTypeAllocSize(ElementTy));
4895         } else if (A->getType()->isVectorTy()) {
4896           // Vectors are naturally aligned.
4897           ArgAlign = Align(ArgSize);
4898         }
4899         if (ArgAlign < 8)
4900           ArgAlign = Align(8);
4901         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
4902         if (DL.isBigEndian()) {
4903           // Adjusting the shadow for argument with size < 8 to match the
4904           // placement of bits in big endian system
4905           if (ArgSize < 8)
4906             VAArgOffset += (8 - ArgSize);
4907         }
4908         if (!IsFixed) {
4909           Base = getShadowPtrForVAArgument(A->getType(), IRB,
4910                                            VAArgOffset - VAArgBase, ArgSize);
4911           if (Base)
4912             IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4913         }
4914         VAArgOffset += ArgSize;
4915         VAArgOffset = alignTo(VAArgOffset, Align(8));
4916       }
4917       if (IsFixed)
4918         VAArgBase = VAArgOffset;
4919     }
4920 
4921     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(),
4922                                                 VAArgOffset - VAArgBase);
4923     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4924     // a new class member i.e. it is the total size of all VarArgs.
4925     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4926   }
4927 
4928   /// Compute the shadow address for a given va_arg.
4929   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4930                                    unsigned ArgOffset, unsigned ArgSize) {
4931     // Make sure we don't overflow __msan_va_arg_tls.
4932     if (ArgOffset + ArgSize > kParamTLSSize)
4933       return nullptr;
4934     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4935     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4936     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4937                               "_msarg");
4938   }
4939 
4940   void visitVAStartInst(VAStartInst &I) override {
4941     IRBuilder<> IRB(&I);
4942     VAStartInstrumentationList.push_back(&I);
4943     Value *VAListTag = I.getArgOperand(0);
4944     Value *ShadowPtr, *OriginPtr;
4945     const Align Alignment = Align(8);
4946     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4947         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4948     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4949                      /* size */ 8, Alignment, false);
4950   }
4951 
4952   void visitVACopyInst(VACopyInst &I) override {
4953     IRBuilder<> IRB(&I);
4954     Value *VAListTag = I.getArgOperand(0);
4955     Value *ShadowPtr, *OriginPtr;
4956     const Align Alignment = Align(8);
4957     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
4958         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4959     // Unpoison the whole __va_list_tag.
4960     // FIXME: magic ABI constants.
4961     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4962                      /* size */ 8, Alignment, false);
4963   }
4964 
4965   void finalizeInstrumentation() override {
4966     assert(!VAArgSize && !VAArgTLSCopy &&
4967            "finalizeInstrumentation called twice");
4968     IRBuilder<> IRB(MSV.FnPrologueEnd);
4969     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4970     Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
4971                                     VAArgSize);
4972 
4973     if (!VAStartInstrumentationList.empty()) {
4974       // If there is a va_start in this function, make a backup copy of
4975       // va_arg_tls somewhere in the function entry block.
4976       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4977       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
4978     }
4979 
4980     // Instrument va_start.
4981     // Copy va_list shadow from the backup copy of the TLS contents.
4982     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4983       CallInst *OrigInst = VAStartInstrumentationList[i];
4984       IRBuilder<> IRB(OrigInst->getNextNode());
4985       Value *VAListTag = OrigInst->getArgOperand(0);
4986       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4987       Value *RegSaveAreaPtrPtr =
4988           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4989                              PointerType::get(RegSaveAreaPtrTy, 0));
4990       Value *RegSaveAreaPtr =
4991           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4992       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4993       const Align Alignment = Align(8);
4994       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4995           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4996                                  Alignment, /*isStore*/ true);
4997       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4998                        CopySize);
4999     }
5000   }
5001 };
5002 
5003 /// SystemZ-specific implementation of VarArgHelper.
5004 struct VarArgSystemZHelper : public VarArgHelper {
5005   static const unsigned SystemZGpOffset = 16;
5006   static const unsigned SystemZGpEndOffset = 56;
5007   static const unsigned SystemZFpOffset = 128;
5008   static const unsigned SystemZFpEndOffset = 160;
5009   static const unsigned SystemZMaxVrArgs = 8;
5010   static const unsigned SystemZRegSaveAreaSize = 160;
5011   static const unsigned SystemZOverflowOffset = 160;
5012   static const unsigned SystemZVAListTagSize = 32;
5013   static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
5014   static const unsigned SystemZRegSaveAreaPtrOffset = 24;
5015 
5016   Function &F;
5017   MemorySanitizer &MS;
5018   MemorySanitizerVisitor &MSV;
5019   Value *VAArgTLSCopy = nullptr;
5020   Value *VAArgTLSOriginCopy = nullptr;
5021   Value *VAArgOverflowSize = nullptr;
5022 
5023   SmallVector<CallInst *, 16> VAStartInstrumentationList;
5024 
5025   enum class ArgKind {
5026     GeneralPurpose,
5027     FloatingPoint,
5028     Vector,
5029     Memory,
5030     Indirect,
5031   };
5032 
5033   enum class ShadowExtension { None, Zero, Sign };
5034 
5035   VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
5036                       MemorySanitizerVisitor &MSV)
5037       : F(F), MS(MS), MSV(MSV) {}
5038 
5039   ArgKind classifyArgument(Type *T, bool IsSoftFloatABI) {
5040     // T is a SystemZABIInfo::classifyArgumentType() output, and there are
5041     // only a few possibilities of what it can be. In particular, enums, single
5042     // element structs and large types have already been taken care of.
5043 
5044     // Some i128 and fp128 arguments are converted to pointers only in the
5045     // back end.
5046     if (T->isIntegerTy(128) || T->isFP128Ty())
5047       return ArgKind::Indirect;
5048     if (T->isFloatingPointTy())
5049       return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
5050     if (T->isIntegerTy() || T->isPointerTy())
5051       return ArgKind::GeneralPurpose;
5052     if (T->isVectorTy())
5053       return ArgKind::Vector;
5054     return ArgKind::Memory;
5055   }
5056 
5057   ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) {
5058     // ABI says: "One of the simple integer types no more than 64 bits wide.
5059     // ... If such an argument is shorter than 64 bits, replace it by a full
5060     // 64-bit integer representing the same number, using sign or zero
5061     // extension". Shadow for an integer argument has the same type as the
5062     // argument itself, so it can be sign or zero extended as well.
5063     bool ZExt = CB.paramHasAttr(ArgNo, Attribute::ZExt);
5064     bool SExt = CB.paramHasAttr(ArgNo, Attribute::SExt);
5065     if (ZExt) {
5066       assert(!SExt);
5067       return ShadowExtension::Zero;
5068     }
5069     if (SExt) {
5070       assert(!ZExt);
5071       return ShadowExtension::Sign;
5072     }
5073     return ShadowExtension::None;
5074   }
5075 
5076   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5077     bool IsSoftFloatABI = CB.getCalledFunction()
5078                               ->getFnAttribute("use-soft-float")
5079                               .getValueAsBool();
5080     unsigned GpOffset = SystemZGpOffset;
5081     unsigned FpOffset = SystemZFpOffset;
5082     unsigned VrIndex = 0;
5083     unsigned OverflowOffset = SystemZOverflowOffset;
5084     const DataLayout &DL = F.getParent()->getDataLayout();
5085     for (auto ArgIt = CB.arg_begin(), End = CB.arg_end(); ArgIt != End;
5086          ++ArgIt) {
5087       Value *A = *ArgIt;
5088       unsigned ArgNo = CB.getArgOperandNo(ArgIt);
5089       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5090       // SystemZABIInfo does not produce ByVal parameters.
5091       assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
5092       Type *T = A->getType();
5093       ArgKind AK = classifyArgument(T, IsSoftFloatABI);
5094       if (AK == ArgKind::Indirect) {
5095         T = PointerType::get(T, 0);
5096         AK = ArgKind::GeneralPurpose;
5097       }
5098       if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
5099         AK = ArgKind::Memory;
5100       if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
5101         AK = ArgKind::Memory;
5102       if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
5103         AK = ArgKind::Memory;
5104       Value *ShadowBase = nullptr;
5105       Value *OriginBase = nullptr;
5106       ShadowExtension SE = ShadowExtension::None;
5107       switch (AK) {
5108       case ArgKind::GeneralPurpose: {
5109         // Always keep track of GpOffset, but store shadow only for varargs.
5110         uint64_t ArgSize = 8;
5111         if (GpOffset + ArgSize <= kParamTLSSize) {
5112           if (!IsFixed) {
5113             SE = getShadowExtension(CB, ArgNo);
5114             uint64_t GapSize = 0;
5115             if (SE == ShadowExtension::None) {
5116               uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
5117               assert(ArgAllocSize <= ArgSize);
5118               GapSize = ArgSize - ArgAllocSize;
5119             }
5120             ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize);
5121             if (MS.TrackOrigins)
5122               OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize);
5123           }
5124           GpOffset += ArgSize;
5125         } else {
5126           GpOffset = kParamTLSSize;
5127         }
5128         break;
5129       }
5130       case ArgKind::FloatingPoint: {
5131         // Always keep track of FpOffset, but store shadow only for varargs.
5132         uint64_t ArgSize = 8;
5133         if (FpOffset + ArgSize <= kParamTLSSize) {
5134           if (!IsFixed) {
5135             // PoP says: "A short floating-point datum requires only the
5136             // left-most 32 bit positions of a floating-point register".
5137             // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
5138             // don't extend shadow and don't mind the gap.
5139             ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset);
5140             if (MS.TrackOrigins)
5141               OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
5142           }
5143           FpOffset += ArgSize;
5144         } else {
5145           FpOffset = kParamTLSSize;
5146         }
5147         break;
5148       }
5149       case ArgKind::Vector: {
5150         // Keep track of VrIndex. No need to store shadow, since vector varargs
5151         // go through AK_Memory.
5152         assert(IsFixed);
5153         VrIndex++;
5154         break;
5155       }
5156       case ArgKind::Memory: {
5157         // Keep track of OverflowOffset and store shadow only for varargs.
5158         // Ignore fixed args, since we need to copy only the vararg portion of
5159         // the overflow area shadow.
5160         if (!IsFixed) {
5161           uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
5162           uint64_t ArgSize = alignTo(ArgAllocSize, 8);
5163           if (OverflowOffset + ArgSize <= kParamTLSSize) {
5164             SE = getShadowExtension(CB, ArgNo);
5165             uint64_t GapSize =
5166                 SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
5167             ShadowBase =
5168                 getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize);
5169             if (MS.TrackOrigins)
5170               OriginBase =
5171                   getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize);
5172             OverflowOffset += ArgSize;
5173           } else {
5174             OverflowOffset = kParamTLSSize;
5175           }
5176         }
5177         break;
5178       }
5179       case ArgKind::Indirect:
5180         llvm_unreachable("Indirect must be converted to GeneralPurpose");
5181       }
5182       if (ShadowBase == nullptr)
5183         continue;
5184       Value *Shadow = MSV.getShadow(A);
5185       if (SE != ShadowExtension::None)
5186         Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(),
5187                                       /*Signed*/ SE == ShadowExtension::Sign);
5188       ShadowBase = IRB.CreateIntToPtr(
5189           ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s");
5190       IRB.CreateStore(Shadow, ShadowBase);
5191       if (MS.TrackOrigins) {
5192         Value *Origin = MSV.getOrigin(A);
5193         unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
5194         MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
5195                         kMinOriginAlignment);
5196       }
5197     }
5198     Constant *OverflowSize = ConstantInt::get(
5199         IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset);
5200     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5201   }
5202 
5203   Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
5204     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5205     return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5206   }
5207 
5208   Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
5209     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
5210     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5211     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
5212                               "_msarg_va_o");
5213   }
5214 
5215   void unpoisonVAListTagForInst(IntrinsicInst &I) {
5216     IRBuilder<> IRB(&I);
5217     Value *VAListTag = I.getArgOperand(0);
5218     Value *ShadowPtr, *OriginPtr;
5219     const Align Alignment = Align(8);
5220     std::tie(ShadowPtr, OriginPtr) =
5221         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
5222                                /*isStore*/ true);
5223     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5224                      SystemZVAListTagSize, Alignment, false);
5225   }
5226 
5227   void visitVAStartInst(VAStartInst &I) override {
5228     VAStartInstrumentationList.push_back(&I);
5229     unpoisonVAListTagForInst(I);
5230   }
5231 
5232   void visitVACopyInst(VACopyInst &I) override { unpoisonVAListTagForInst(I); }
5233 
5234   void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
5235     Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5236     Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
5237         IRB.CreateAdd(
5238             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5239             ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)),
5240         PointerType::get(RegSaveAreaPtrTy, 0));
5241     Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5242     Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5243     const Align Alignment = Align(8);
5244     std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5245         MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment,
5246                                /*isStore*/ true);
5247     // TODO(iii): copy only fragments filled by visitCallBase()
5248     IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5249                      SystemZRegSaveAreaSize);
5250     if (MS.TrackOrigins)
5251       IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
5252                        Alignment, SystemZRegSaveAreaSize);
5253   }
5254 
5255   void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
5256     Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5257     Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
5258         IRB.CreateAdd(
5259             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5260             ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)),
5261         PointerType::get(OverflowArgAreaPtrTy, 0));
5262     Value *OverflowArgAreaPtr =
5263         IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
5264     Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
5265     const Align Alignment = Align(8);
5266     std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
5267         MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
5268                                Alignment, /*isStore*/ true);
5269     Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
5270                                            SystemZOverflowOffset);
5271     IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
5272                      VAArgOverflowSize);
5273     if (MS.TrackOrigins) {
5274       SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
5275                                       SystemZOverflowOffset);
5276       IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
5277                        VAArgOverflowSize);
5278     }
5279   }
5280 
5281   void finalizeInstrumentation() override {
5282     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5283            "finalizeInstrumentation called twice");
5284     if (!VAStartInstrumentationList.empty()) {
5285       // If there is a va_start in this function, make a backup copy of
5286       // va_arg_tls somewhere in the function entry block.
5287       IRBuilder<> IRB(MSV.FnPrologueEnd);
5288       VAArgOverflowSize =
5289           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5290       Value *CopySize =
5291           IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset),
5292                         VAArgOverflowSize);
5293       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5294       IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
5295       if (MS.TrackOrigins) {
5296         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5297         IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
5298                          Align(8), CopySize);
5299       }
5300     }
5301 
5302     // Instrument va_start.
5303     // Copy va_list shadow from the backup copy of the TLS contents.
5304     for (size_t VaStartNo = 0, VaStartNum = VAStartInstrumentationList.size();
5305          VaStartNo < VaStartNum; VaStartNo++) {
5306       CallInst *OrigInst = VAStartInstrumentationList[VaStartNo];
5307       IRBuilder<> IRB(OrigInst->getNextNode());
5308       Value *VAListTag = OrigInst->getArgOperand(0);
5309       copyRegSaveArea(IRB, VAListTag);
5310       copyOverflowArea(IRB, VAListTag);
5311     }
5312   }
5313 };
5314 
5315 /// A no-op implementation of VarArgHelper.
5316 struct VarArgNoOpHelper : public VarArgHelper {
5317   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
5318                    MemorySanitizerVisitor &MSV) {}
5319 
5320   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {}
5321 
5322   void visitVAStartInst(VAStartInst &I) override {}
5323 
5324   void visitVACopyInst(VACopyInst &I) override {}
5325 
5326   void finalizeInstrumentation() override {}
5327 };
5328 
5329 } // end anonymous namespace
5330 
5331 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
5332                                         MemorySanitizerVisitor &Visitor) {
5333   // VarArg handling is only implemented on AMD64. False positives are possible
5334   // on other platforms.
5335   Triple TargetTriple(Func.getParent()->getTargetTriple());
5336   if (TargetTriple.getArch() == Triple::x86_64)
5337     return new VarArgAMD64Helper(Func, Msan, Visitor);
5338   else if (TargetTriple.isMIPS64())
5339     return new VarArgMIPS64Helper(Func, Msan, Visitor);
5340   else if (TargetTriple.getArch() == Triple::aarch64)
5341     return new VarArgAArch64Helper(Func, Msan, Visitor);
5342   else if (TargetTriple.getArch() == Triple::ppc64 ||
5343            TargetTriple.getArch() == Triple::ppc64le)
5344     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
5345   else if (TargetTriple.getArch() == Triple::systemz)
5346     return new VarArgSystemZHelper(Func, Msan, Visitor);
5347   else
5348     return new VarArgNoOpHelper(Func, Msan, Visitor);
5349 }
5350 
5351 bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
5352   if (!CompileKernel && F.getName() == kMsanModuleCtorName)
5353     return false;
5354 
5355   if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
5356     return false;
5357 
5358   MemorySanitizerVisitor Visitor(F, *this, TLI);
5359 
5360   // Clear out readonly/readnone attributes.
5361   AttributeMask B;
5362   B.addAttribute(Attribute::ReadOnly)
5363       .addAttribute(Attribute::ReadNone)
5364       .addAttribute(Attribute::WriteOnly)
5365       .addAttribute(Attribute::ArgMemOnly)
5366       .addAttribute(Attribute::Speculatable);
5367   F.removeFnAttrs(B);
5368 
5369   return Visitor.runOnFunction();
5370 }
5371