xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (revision f126890ac5386406dadf7c4cfa9566cbb56537c5)
1 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of MemorySanitizer, a detector of uninitialized
11 /// reads.
12 ///
13 /// The algorithm of the tool is similar to Memcheck
14 /// (http://goo.gl/QKbem). We associate a few shadow bits with every
15 /// byte of the application memory, poison the shadow of the malloc-ed
16 /// or alloca-ed memory, load the shadow bits on every memory read,
17 /// propagate the shadow bits through some of the arithmetic
18 /// instruction (including MOV), store the shadow bits on every memory
19 /// write, report a bug on some other instructions (e.g. JMP) if the
20 /// associated shadow is poisoned.
21 ///
22 /// But there are differences too. The first and the major one:
23 /// compiler instrumentation instead of binary instrumentation. This
24 /// gives us much better register allocation, possible compiler
25 /// optimizations and a fast start-up. But this brings the major issue
26 /// as well: msan needs to see all program events, including system
27 /// calls and reads/writes in system libraries, so we either need to
28 /// compile *everything* with msan or use a binary translation
29 /// component (e.g. DynamoRIO) to instrument pre-built libraries.
30 /// Another difference from Memcheck is that we use 8 shadow bits per
31 /// byte of application memory and use a direct shadow mapping. This
32 /// greatly simplifies the instrumentation code and avoids races on
33 /// shadow updates (Memcheck is single-threaded so races are not a
34 /// concern there. Memcheck uses 2 shadow bits per byte with a slow
35 /// path storage that uses 8 bits per byte).
36 ///
37 /// The default value of shadow is 0, which means "clean" (not poisoned).
38 ///
39 /// Every module initializer should call __msan_init to ensure that the
40 /// shadow memory is ready. On error, __msan_warning is called. Since
41 /// parameters and return values may be passed via registers, we have a
42 /// specialized thread-local shadow for return values
43 /// (__msan_retval_tls) and parameters (__msan_param_tls).
44 ///
45 ///                           Origin tracking.
46 ///
47 /// MemorySanitizer can track origins (allocation points) of all uninitialized
48 /// values. This behavior is controlled with a flag (msan-track-origins) and is
49 /// disabled by default.
50 ///
51 /// Origins are 4-byte values created and interpreted by the runtime library.
52 /// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
53 /// of application memory. Propagation of origins is basically a bunch of
54 /// "select" instructions that pick the origin of a dirty argument, if an
55 /// instruction has one.
56 ///
57 /// Every 4 aligned, consecutive bytes of application memory have one origin
58 /// value associated with them. If these bytes contain uninitialized data
59 /// coming from 2 different allocations, the last store wins. Because of this,
60 /// MemorySanitizer reports can show unrelated origins, but this is unlikely in
61 /// practice.
62 ///
63 /// Origins are meaningless for fully initialized values, so MemorySanitizer
64 /// avoids storing origin to memory when a fully initialized value is stored.
65 /// This way it avoids needless overwriting origin of the 4-byte region on
66 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
67 ///
68 ///                            Atomic handling.
69 ///
70 /// Ideally, every atomic store of application value should update the
71 /// corresponding shadow location in an atomic way. Unfortunately, atomic store
72 /// of two disjoint locations can not be done without severe slowdown.
73 ///
74 /// Therefore, we implement an approximation that may err on the safe side.
75 /// In this implementation, every atomically accessed location in the program
76 /// may only change from (partially) uninitialized to fully initialized, but
77 /// not the other way around. We load the shadow _after_ the application load,
78 /// and we store the shadow _before_ the app store. Also, we always store clean
79 /// shadow (if the application store is atomic). This way, if the store-load
80 /// pair constitutes a happens-before arc, shadow store and load are correctly
81 /// ordered such that the load will get either the value that was stored, or
82 /// some later value (which is always clean).
83 ///
84 /// This does not work very well with Compare-And-Swap (CAS) and
85 /// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
86 /// must store the new shadow before the app operation, and load the shadow
87 /// after the app operation. Computers don't work this way. Current
88 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
89 /// value. It implements the store part as a simple atomic store by storing a
90 /// clean shadow.
91 ///
92 ///                      Instrumenting inline assembly.
93 ///
94 /// For inline assembly code LLVM has little idea about which memory locations
95 /// become initialized depending on the arguments. It can be possible to figure
96 /// out which arguments are meant to point to inputs and outputs, but the
97 /// actual semantics can be only visible at runtime. In the Linux kernel it's
98 /// also possible that the arguments only indicate the offset for a base taken
99 /// from a segment register, so it's dangerous to treat any asm() arguments as
100 /// pointers. We take a conservative approach generating calls to
101 ///   __msan_instrument_asm_store(ptr, size)
102 /// , which defer the memory unpoisoning to the runtime library.
103 /// The latter can perform more complex address checks to figure out whether
104 /// it's safe to touch the shadow memory.
105 /// Like with atomic operations, we call __msan_instrument_asm_store() before
106 /// the assembly call, so that changes to the shadow memory will be seen by
107 /// other threads together with main memory initialization.
108 ///
109 ///                  KernelMemorySanitizer (KMSAN) implementation.
110 ///
111 /// The major differences between KMSAN and MSan instrumentation are:
112 ///  - KMSAN always tracks the origins and implies msan-keep-going=true;
113 ///  - KMSAN allocates shadow and origin memory for each page separately, so
114 ///    there are no explicit accesses to shadow and origin in the
115 ///    instrumentation.
116 ///    Shadow and origin values for a particular X-byte memory location
117 ///    (X=1,2,4,8) are accessed through pointers obtained via the
118 ///      __msan_metadata_ptr_for_load_X(ptr)
119 ///      __msan_metadata_ptr_for_store_X(ptr)
120 ///    functions. The corresponding functions check that the X-byte accesses
121 ///    are possible and returns the pointers to shadow and origin memory.
122 ///    Arbitrary sized accesses are handled with:
123 ///      __msan_metadata_ptr_for_load_n(ptr, size)
124 ///      __msan_metadata_ptr_for_store_n(ptr, size);
125 ///    Note that the sanitizer code has to deal with how shadow/origin pairs
126 ///    returned by the these functions are represented in different ABIs. In
127 ///    the X86_64 ABI they are returned in RDX:RAX, and in the SystemZ ABI they
128 ///    are written to memory pointed to by a hidden parameter.
129 ///  - TLS variables are stored in a single per-task struct. A call to a
130 ///    function __msan_get_context_state() returning a pointer to that struct
131 ///    is inserted into every instrumented function before the entry block;
132 ///  - __msan_warning() takes a 32-bit origin parameter;
133 ///  - local variables are poisoned with __msan_poison_alloca() upon function
134 ///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the
135 ///    function;
136 ///  - the pass doesn't declare any global variables or add global constructors
137 ///    to the translation unit.
138 ///
139 /// Also, KMSAN currently ignores uninitialized memory passed into inline asm
140 /// calls, making sure we're on the safe side wrt. possible false positives.
141 ///
142 ///  KernelMemorySanitizer only supports X86_64 and SystemZ at the moment.
143 ///
144 //
145 // FIXME: This sanitizer does not yet handle scalable vectors
146 //
147 //===----------------------------------------------------------------------===//
148 
149 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
150 #include "llvm/ADT/APInt.h"
151 #include "llvm/ADT/ArrayRef.h"
152 #include "llvm/ADT/DenseMap.h"
153 #include "llvm/ADT/DepthFirstIterator.h"
154 #include "llvm/ADT/SetVector.h"
155 #include "llvm/ADT/SmallString.h"
156 #include "llvm/ADT/SmallVector.h"
157 #include "llvm/ADT/StringExtras.h"
158 #include "llvm/ADT/StringRef.h"
159 #include "llvm/Analysis/GlobalsModRef.h"
160 #include "llvm/Analysis/TargetLibraryInfo.h"
161 #include "llvm/Analysis/ValueTracking.h"
162 #include "llvm/IR/Argument.h"
163 #include "llvm/IR/AttributeMask.h"
164 #include "llvm/IR/Attributes.h"
165 #include "llvm/IR/BasicBlock.h"
166 #include "llvm/IR/CallingConv.h"
167 #include "llvm/IR/Constant.h"
168 #include "llvm/IR/Constants.h"
169 #include "llvm/IR/DataLayout.h"
170 #include "llvm/IR/DerivedTypes.h"
171 #include "llvm/IR/Function.h"
172 #include "llvm/IR/GlobalValue.h"
173 #include "llvm/IR/GlobalVariable.h"
174 #include "llvm/IR/IRBuilder.h"
175 #include "llvm/IR/InlineAsm.h"
176 #include "llvm/IR/InstVisitor.h"
177 #include "llvm/IR/InstrTypes.h"
178 #include "llvm/IR/Instruction.h"
179 #include "llvm/IR/Instructions.h"
180 #include "llvm/IR/IntrinsicInst.h"
181 #include "llvm/IR/Intrinsics.h"
182 #include "llvm/IR/IntrinsicsX86.h"
183 #include "llvm/IR/MDBuilder.h"
184 #include "llvm/IR/Module.h"
185 #include "llvm/IR/Type.h"
186 #include "llvm/IR/Value.h"
187 #include "llvm/IR/ValueMap.h"
188 #include "llvm/Support/Alignment.h"
189 #include "llvm/Support/AtomicOrdering.h"
190 #include "llvm/Support/Casting.h"
191 #include "llvm/Support/CommandLine.h"
192 #include "llvm/Support/Debug.h"
193 #include "llvm/Support/DebugCounter.h"
194 #include "llvm/Support/ErrorHandling.h"
195 #include "llvm/Support/MathExtras.h"
196 #include "llvm/Support/raw_ostream.h"
197 #include "llvm/TargetParser/Triple.h"
198 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
199 #include "llvm/Transforms/Utils/Local.h"
200 #include "llvm/Transforms/Utils/ModuleUtils.h"
201 #include <algorithm>
202 #include <cassert>
203 #include <cstddef>
204 #include <cstdint>
205 #include <memory>
206 #include <string>
207 #include <tuple>
208 
209 using namespace llvm;
210 
211 #define DEBUG_TYPE "msan"
212 
213 DEBUG_COUNTER(DebugInsertCheck, "msan-insert-check",
214               "Controls which checks to insert");
215 
216 static const unsigned kOriginSize = 4;
217 static const Align kMinOriginAlignment = Align(4);
218 static const Align kShadowTLSAlignment = Align(8);
219 
220 // These constants must be kept in sync with the ones in msan.h.
221 static const unsigned kParamTLSSize = 800;
222 static const unsigned kRetvalTLSSize = 800;
223 
224 // Accesses sizes are powers of two: 1, 2, 4, 8.
225 static const size_t kNumberOfAccessSizes = 4;
226 
227 /// Track origins of uninitialized values.
228 ///
229 /// Adds a section to MemorySanitizer report that points to the allocation
230 /// (stack or heap) the uninitialized bits came from originally.
231 static cl::opt<int> ClTrackOrigins(
232     "msan-track-origins",
233     cl::desc("Track origins (allocation sites) of poisoned memory"), cl::Hidden,
234     cl::init(0));
235 
236 static cl::opt<bool> ClKeepGoing("msan-keep-going",
237                                  cl::desc("keep going after reporting a UMR"),
238                                  cl::Hidden, cl::init(false));
239 
240 static cl::opt<bool>
241     ClPoisonStack("msan-poison-stack",
242                   cl::desc("poison uninitialized stack variables"), cl::Hidden,
243                   cl::init(true));
244 
245 static cl::opt<bool> ClPoisonStackWithCall(
246     "msan-poison-stack-with-call",
247     cl::desc("poison uninitialized stack variables with a call"), cl::Hidden,
248     cl::init(false));
249 
250 static cl::opt<int> ClPoisonStackPattern(
251     "msan-poison-stack-pattern",
252     cl::desc("poison uninitialized stack variables with the given pattern"),
253     cl::Hidden, cl::init(0xff));
254 
255 static cl::opt<bool>
256     ClPrintStackNames("msan-print-stack-names",
257                       cl::desc("Print name of local stack variable"),
258                       cl::Hidden, cl::init(true));
259 
260 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
261                                    cl::desc("poison undef temps"), cl::Hidden,
262                                    cl::init(true));
263 
264 static cl::opt<bool>
265     ClHandleICmp("msan-handle-icmp",
266                  cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
267                  cl::Hidden, cl::init(true));
268 
269 static cl::opt<bool>
270     ClHandleICmpExact("msan-handle-icmp-exact",
271                       cl::desc("exact handling of relational integer ICmp"),
272                       cl::Hidden, cl::init(false));
273 
274 static cl::opt<bool> ClHandleLifetimeIntrinsics(
275     "msan-handle-lifetime-intrinsics",
276     cl::desc(
277         "when possible, poison scoped variables at the beginning of the scope "
278         "(slower, but more precise)"),
279     cl::Hidden, cl::init(true));
280 
281 // When compiling the Linux kernel, we sometimes see false positives related to
282 // MSan being unable to understand that inline assembly calls may initialize
283 // local variables.
284 // This flag makes the compiler conservatively unpoison every memory location
285 // passed into an assembly call. Note that this may cause false positives.
286 // Because it's impossible to figure out the array sizes, we can only unpoison
287 // the first sizeof(type) bytes for each type* pointer.
288 // The instrumentation is only enabled in KMSAN builds, and only if
289 // -msan-handle-asm-conservative is on. This is done because we may want to
290 // quickly disable assembly instrumentation when it breaks.
291 static cl::opt<bool> ClHandleAsmConservative(
292     "msan-handle-asm-conservative",
293     cl::desc("conservative handling of inline assembly"), cl::Hidden,
294     cl::init(true));
295 
296 // This flag controls whether we check the shadow of the address
297 // operand of load or store. Such bugs are very rare, since load from
298 // a garbage address typically results in SEGV, but still happen
299 // (e.g. only lower bits of address are garbage, or the access happens
300 // early at program startup where malloc-ed memory is more likely to
301 // be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
302 static cl::opt<bool> ClCheckAccessAddress(
303     "msan-check-access-address",
304     cl::desc("report accesses through a pointer which has poisoned shadow"),
305     cl::Hidden, cl::init(true));
306 
307 static cl::opt<bool> ClEagerChecks(
308     "msan-eager-checks",
309     cl::desc("check arguments and return values at function call boundaries"),
310     cl::Hidden, cl::init(false));
311 
312 static cl::opt<bool> ClDumpStrictInstructions(
313     "msan-dump-strict-instructions",
314     cl::desc("print out instructions with default strict semantics"),
315     cl::Hidden, cl::init(false));
316 
317 static cl::opt<int> ClInstrumentationWithCallThreshold(
318     "msan-instrumentation-with-call-threshold",
319     cl::desc(
320         "If the function being instrumented requires more than "
321         "this number of checks and origin stores, use callbacks instead of "
322         "inline checks (-1 means never use callbacks)."),
323     cl::Hidden, cl::init(3500));
324 
325 static cl::opt<bool>
326     ClEnableKmsan("msan-kernel",
327                   cl::desc("Enable KernelMemorySanitizer instrumentation"),
328                   cl::Hidden, cl::init(false));
329 
330 static cl::opt<bool>
331     ClDisableChecks("msan-disable-checks",
332                     cl::desc("Apply no_sanitize to the whole file"), cl::Hidden,
333                     cl::init(false));
334 
335 static cl::opt<bool>
336     ClCheckConstantShadow("msan-check-constant-shadow",
337                           cl::desc("Insert checks for constant shadow values"),
338                           cl::Hidden, cl::init(true));
339 
340 // This is off by default because of a bug in gold:
341 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
342 static cl::opt<bool>
343     ClWithComdat("msan-with-comdat",
344                  cl::desc("Place MSan constructors in comdat sections"),
345                  cl::Hidden, cl::init(false));
346 
347 // These options allow to specify custom memory map parameters
348 // See MemoryMapParams for details.
349 static cl::opt<uint64_t> ClAndMask("msan-and-mask",
350                                    cl::desc("Define custom MSan AndMask"),
351                                    cl::Hidden, cl::init(0));
352 
353 static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
354                                    cl::desc("Define custom MSan XorMask"),
355                                    cl::Hidden, cl::init(0));
356 
357 static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
358                                       cl::desc("Define custom MSan ShadowBase"),
359                                       cl::Hidden, cl::init(0));
360 
361 static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
362                                       cl::desc("Define custom MSan OriginBase"),
363                                       cl::Hidden, cl::init(0));
364 
365 static cl::opt<int>
366     ClDisambiguateWarning("msan-disambiguate-warning-threshold",
367                           cl::desc("Define threshold for number of checks per "
368                                    "debug location to force origin update."),
369                           cl::Hidden, cl::init(3));
370 
371 const char kMsanModuleCtorName[] = "msan.module_ctor";
372 const char kMsanInitName[] = "__msan_init";
373 
374 namespace {
375 
376 // Memory map parameters used in application-to-shadow address calculation.
377 // Offset = (Addr & ~AndMask) ^ XorMask
378 // Shadow = ShadowBase + Offset
379 // Origin = OriginBase + Offset
380 struct MemoryMapParams {
381   uint64_t AndMask;
382   uint64_t XorMask;
383   uint64_t ShadowBase;
384   uint64_t OriginBase;
385 };
386 
387 struct PlatformMemoryMapParams {
388   const MemoryMapParams *bits32;
389   const MemoryMapParams *bits64;
390 };
391 
392 } // end anonymous namespace
393 
394 // i386 Linux
395 static const MemoryMapParams Linux_I386_MemoryMapParams = {
396     0x000080000000, // AndMask
397     0,              // XorMask (not used)
398     0,              // ShadowBase (not used)
399     0x000040000000, // OriginBase
400 };
401 
402 // x86_64 Linux
403 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
404     0,              // AndMask (not used)
405     0x500000000000, // XorMask
406     0,              // ShadowBase (not used)
407     0x100000000000, // OriginBase
408 };
409 
410 // mips64 Linux
411 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
412     0,              // AndMask (not used)
413     0x008000000000, // XorMask
414     0,              // ShadowBase (not used)
415     0x002000000000, // OriginBase
416 };
417 
418 // ppc64 Linux
419 static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
420     0xE00000000000, // AndMask
421     0x100000000000, // XorMask
422     0x080000000000, // ShadowBase
423     0x1C0000000000, // OriginBase
424 };
425 
426 // s390x Linux
427 static const MemoryMapParams Linux_S390X_MemoryMapParams = {
428     0xC00000000000, // AndMask
429     0,              // XorMask (not used)
430     0x080000000000, // ShadowBase
431     0x1C0000000000, // OriginBase
432 };
433 
434 // aarch64 Linux
435 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
436     0,               // AndMask (not used)
437     0x0B00000000000, // XorMask
438     0,               // ShadowBase (not used)
439     0x0200000000000, // OriginBase
440 };
441 
442 // loongarch64 Linux
443 static const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
444     0,              // AndMask (not used)
445     0x500000000000, // XorMask
446     0,              // ShadowBase (not used)
447     0x100000000000, // OriginBase
448 };
449 
450 // aarch64 FreeBSD
451 static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = {
452     0x1800000000000, // AndMask
453     0x0400000000000, // XorMask
454     0x0200000000000, // ShadowBase
455     0x0700000000000, // OriginBase
456 };
457 
458 // i386 FreeBSD
459 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
460     0x000180000000, // AndMask
461     0x000040000000, // XorMask
462     0x000020000000, // ShadowBase
463     0x000700000000, // OriginBase
464 };
465 
466 // x86_64 FreeBSD
467 static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
468     0xc00000000000, // AndMask
469     0x200000000000, // XorMask
470     0x100000000000, // ShadowBase
471     0x380000000000, // OriginBase
472 };
473 
474 // x86_64 NetBSD
475 static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
476     0,              // AndMask
477     0x500000000000, // XorMask
478     0,              // ShadowBase
479     0x100000000000, // OriginBase
480 };
481 
482 static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
483     &Linux_I386_MemoryMapParams,
484     &Linux_X86_64_MemoryMapParams,
485 };
486 
487 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
488     nullptr,
489     &Linux_MIPS64_MemoryMapParams,
490 };
491 
492 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
493     nullptr,
494     &Linux_PowerPC64_MemoryMapParams,
495 };
496 
497 static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
498     nullptr,
499     &Linux_S390X_MemoryMapParams,
500 };
501 
502 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
503     nullptr,
504     &Linux_AArch64_MemoryMapParams,
505 };
506 
507 static const PlatformMemoryMapParams Linux_LoongArch_MemoryMapParams = {
508     nullptr,
509     &Linux_LoongArch64_MemoryMapParams,
510 };
511 
512 static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = {
513     nullptr,
514     &FreeBSD_AArch64_MemoryMapParams,
515 };
516 
517 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
518     &FreeBSD_I386_MemoryMapParams,
519     &FreeBSD_X86_64_MemoryMapParams,
520 };
521 
522 static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
523     nullptr,
524     &NetBSD_X86_64_MemoryMapParams,
525 };
526 
527 namespace {
528 
529 /// Instrument functions of a module to detect uninitialized reads.
530 ///
531 /// Instantiating MemorySanitizer inserts the msan runtime library API function
532 /// declarations into the module if they don't exist already. Instantiating
533 /// ensures the __msan_init function is in the list of global constructors for
534 /// the module.
535 class MemorySanitizer {
536 public:
537   MemorySanitizer(Module &M, MemorySanitizerOptions Options)
538       : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
539         Recover(Options.Recover), EagerChecks(Options.EagerChecks) {
540     initializeModule(M);
541   }
542 
543   // MSan cannot be moved or copied because of MapParams.
544   MemorySanitizer(MemorySanitizer &&) = delete;
545   MemorySanitizer &operator=(MemorySanitizer &&) = delete;
546   MemorySanitizer(const MemorySanitizer &) = delete;
547   MemorySanitizer &operator=(const MemorySanitizer &) = delete;
548 
549   bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
550 
551 private:
552   friend struct MemorySanitizerVisitor;
553   friend struct VarArgAMD64Helper;
554   friend struct VarArgMIPS64Helper;
555   friend struct VarArgAArch64Helper;
556   friend struct VarArgPowerPC64Helper;
557   friend struct VarArgSystemZHelper;
558 
559   void initializeModule(Module &M);
560   void initializeCallbacks(Module &M, const TargetLibraryInfo &TLI);
561   void createKernelApi(Module &M, const TargetLibraryInfo &TLI);
562   void createUserspaceApi(Module &M, const TargetLibraryInfo &TLI);
563 
564   template <typename... ArgsTy>
565   FunctionCallee getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
566                                                  ArgsTy... Args);
567 
568   /// True if we're compiling the Linux kernel.
569   bool CompileKernel;
570   /// Track origins (allocation points) of uninitialized values.
571   int TrackOrigins;
572   bool Recover;
573   bool EagerChecks;
574 
575   Triple TargetTriple;
576   LLVMContext *C;
577   Type *IntptrTy;
578   Type *OriginTy;
579 
580   // XxxTLS variables represent the per-thread state in MSan and per-task state
581   // in KMSAN.
582   // For the userspace these point to thread-local globals. In the kernel land
583   // they point to the members of a per-task struct obtained via a call to
584   // __msan_get_context_state().
585 
586   /// Thread-local shadow storage for function parameters.
587   Value *ParamTLS;
588 
589   /// Thread-local origin storage for function parameters.
590   Value *ParamOriginTLS;
591 
592   /// Thread-local shadow storage for function return value.
593   Value *RetvalTLS;
594 
595   /// Thread-local origin storage for function return value.
596   Value *RetvalOriginTLS;
597 
598   /// Thread-local shadow storage for in-register va_arg function
599   /// parameters (x86_64-specific).
600   Value *VAArgTLS;
601 
602   /// Thread-local shadow storage for in-register va_arg function
603   /// parameters (x86_64-specific).
604   Value *VAArgOriginTLS;
605 
606   /// Thread-local shadow storage for va_arg overflow area
607   /// (x86_64-specific).
608   Value *VAArgOverflowSizeTLS;
609 
610   /// Are the instrumentation callbacks set up?
611   bool CallbacksInitialized = false;
612 
613   /// The run-time callback to print a warning.
614   FunctionCallee WarningFn;
615 
616   // These arrays are indexed by log2(AccessSize).
617   FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
618   FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
619 
620   /// Run-time helper that generates a new origin value for a stack
621   /// allocation.
622   FunctionCallee MsanSetAllocaOriginWithDescriptionFn;
623   // No description version
624   FunctionCallee MsanSetAllocaOriginNoDescriptionFn;
625 
626   /// Run-time helper that poisons stack on function entry.
627   FunctionCallee MsanPoisonStackFn;
628 
629   /// Run-time helper that records a store (or any event) of an
630   /// uninitialized value and returns an updated origin id encoding this info.
631   FunctionCallee MsanChainOriginFn;
632 
633   /// Run-time helper that paints an origin over a region.
634   FunctionCallee MsanSetOriginFn;
635 
636   /// MSan runtime replacements for memmove, memcpy and memset.
637   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
638 
639   /// KMSAN callback for task-local function argument shadow.
640   StructType *MsanContextStateTy;
641   FunctionCallee MsanGetContextStateFn;
642 
643   /// Functions for poisoning/unpoisoning local variables
644   FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
645 
646   /// Pair of shadow/origin pointers.
647   Type *MsanMetadata;
648 
649   /// Each of the MsanMetadataPtrXxx functions returns a MsanMetadata.
650   FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
651   FunctionCallee MsanMetadataPtrForLoad_1_8[4];
652   FunctionCallee MsanMetadataPtrForStore_1_8[4];
653   FunctionCallee MsanInstrumentAsmStoreFn;
654 
655   /// Storage for return values of the MsanMetadataPtrXxx functions.
656   Value *MsanMetadataAlloca;
657 
658   /// Helper to choose between different MsanMetadataPtrXxx().
659   FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
660 
661   /// Memory map parameters used in application-to-shadow calculation.
662   const MemoryMapParams *MapParams;
663 
664   /// Custom memory map parameters used when -msan-shadow-base or
665   // -msan-origin-base is provided.
666   MemoryMapParams CustomMapParams;
667 
668   MDNode *ColdCallWeights;
669 
670   /// Branch weights for origin store.
671   MDNode *OriginStoreWeights;
672 };
673 
674 void insertModuleCtor(Module &M) {
675   getOrCreateSanitizerCtorAndInitFunctions(
676       M, kMsanModuleCtorName, kMsanInitName,
677       /*InitArgTypes=*/{},
678       /*InitArgs=*/{},
679       // This callback is invoked when the functions are created the first
680       // time. Hook them into the global ctors list in that case:
681       [&](Function *Ctor, FunctionCallee) {
682         if (!ClWithComdat) {
683           appendToGlobalCtors(M, Ctor, 0);
684           return;
685         }
686         Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
687         Ctor->setComdat(MsanCtorComdat);
688         appendToGlobalCtors(M, Ctor, 0, Ctor);
689       });
690 }
691 
692 template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
693   return (Opt.getNumOccurrences() > 0) ? Opt : Default;
694 }
695 
696 } // end anonymous namespace
697 
698 MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K,
699                                                bool EagerChecks)
700     : Kernel(getOptOrDefault(ClEnableKmsan, K)),
701       TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
702       Recover(getOptOrDefault(ClKeepGoing, Kernel || R)),
703       EagerChecks(getOptOrDefault(ClEagerChecks, EagerChecks)) {}
704 
705 PreservedAnalyses MemorySanitizerPass::run(Module &M,
706                                            ModuleAnalysisManager &AM) {
707   bool Modified = false;
708   if (!Options.Kernel) {
709     insertModuleCtor(M);
710     Modified = true;
711   }
712 
713   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
714   for (Function &F : M) {
715     if (F.empty())
716       continue;
717     MemorySanitizer Msan(*F.getParent(), Options);
718     Modified |=
719         Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F));
720   }
721 
722   if (!Modified)
723     return PreservedAnalyses::all();
724 
725   PreservedAnalyses PA = PreservedAnalyses::none();
726   // GlobalsAA is considered stateless and does not get invalidated unless
727   // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
728   // make changes that require GlobalsAA to be invalidated.
729   PA.abandon<GlobalsAA>();
730   return PA;
731 }
732 
733 void MemorySanitizerPass::printPipeline(
734     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
735   static_cast<PassInfoMixin<MemorySanitizerPass> *>(this)->printPipeline(
736       OS, MapClassName2PassName);
737   OS << '<';
738   if (Options.Recover)
739     OS << "recover;";
740   if (Options.Kernel)
741     OS << "kernel;";
742   if (Options.EagerChecks)
743     OS << "eager-checks;";
744   OS << "track-origins=" << Options.TrackOrigins;
745   OS << '>';
746 }
747 
748 /// Create a non-const global initialized with the given string.
749 ///
750 /// Creates a writable global for Str so that we can pass it to the
751 /// run-time lib. Runtime uses first 4 bytes of the string to store the
752 /// frame ID, so the string needs to be mutable.
753 static GlobalVariable *createPrivateConstGlobalForString(Module &M,
754                                                          StringRef Str) {
755   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
756   return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/true,
757                             GlobalValue::PrivateLinkage, StrConst, "");
758 }
759 
760 template <typename... ArgsTy>
761 FunctionCallee
762 MemorySanitizer::getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
763                                                  ArgsTy... Args) {
764   if (TargetTriple.getArch() == Triple::systemz) {
765     // SystemZ ABI: shadow/origin pair is returned via a hidden parameter.
766     return M.getOrInsertFunction(Name, Type::getVoidTy(*C),
767                                  PointerType::get(MsanMetadata, 0),
768                                  std::forward<ArgsTy>(Args)...);
769   }
770 
771   return M.getOrInsertFunction(Name, MsanMetadata,
772                                std::forward<ArgsTy>(Args)...);
773 }
774 
775 /// Create KMSAN API callbacks.
776 void MemorySanitizer::createKernelApi(Module &M, const TargetLibraryInfo &TLI) {
777   IRBuilder<> IRB(*C);
778 
779   // These will be initialized in insertKmsanPrologue().
780   RetvalTLS = nullptr;
781   RetvalOriginTLS = nullptr;
782   ParamTLS = nullptr;
783   ParamOriginTLS = nullptr;
784   VAArgTLS = nullptr;
785   VAArgOriginTLS = nullptr;
786   VAArgOverflowSizeTLS = nullptr;
787 
788   WarningFn = M.getOrInsertFunction("__msan_warning",
789                                     TLI.getAttrList(C, {0}, /*Signed=*/false),
790                                     IRB.getVoidTy(), IRB.getInt32Ty());
791 
792   // Requests the per-task context state (kmsan_context_state*) from the
793   // runtime library.
794   MsanContextStateTy = StructType::get(
795       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
796       ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
797       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
798       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
799       IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
800       OriginTy);
801   MsanGetContextStateFn = M.getOrInsertFunction(
802       "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
803 
804   MsanMetadata = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
805                                  PointerType::get(IRB.getInt32Ty(), 0));
806 
807   for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
808     std::string name_load =
809         "__msan_metadata_ptr_for_load_" + std::to_string(size);
810     std::string name_store =
811         "__msan_metadata_ptr_for_store_" + std::to_string(size);
812     MsanMetadataPtrForLoad_1_8[ind] = getOrInsertMsanMetadataFunction(
813         M, name_load, PointerType::get(IRB.getInt8Ty(), 0));
814     MsanMetadataPtrForStore_1_8[ind] = getOrInsertMsanMetadataFunction(
815         M, name_store, PointerType::get(IRB.getInt8Ty(), 0));
816   }
817 
818   MsanMetadataPtrForLoadN = getOrInsertMsanMetadataFunction(
819       M, "__msan_metadata_ptr_for_load_n", PointerType::get(IRB.getInt8Ty(), 0),
820       IRB.getInt64Ty());
821   MsanMetadataPtrForStoreN = getOrInsertMsanMetadataFunction(
822       M, "__msan_metadata_ptr_for_store_n",
823       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
824 
825   // Functions for poisoning and unpoisoning memory.
826   MsanPoisonAllocaFn =
827       M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
828                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
829   MsanUnpoisonAllocaFn = M.getOrInsertFunction(
830       "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
831 }
832 
833 static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
834   return M.getOrInsertGlobal(Name, Ty, [&] {
835     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
836                               nullptr, Name, nullptr,
837                               GlobalVariable::InitialExecTLSModel);
838   });
839 }
840 
841 /// Insert declarations for userspace-specific functions and globals.
842 void MemorySanitizer::createUserspaceApi(Module &M, const TargetLibraryInfo &TLI) {
843   IRBuilder<> IRB(*C);
844 
845   // Create the callback.
846   // FIXME: this function should have "Cold" calling conv,
847   // which is not yet implemented.
848   if (TrackOrigins) {
849     StringRef WarningFnName = Recover ? "__msan_warning_with_origin"
850                                       : "__msan_warning_with_origin_noreturn";
851     WarningFn = M.getOrInsertFunction(WarningFnName,
852                                       TLI.getAttrList(C, {0}, /*Signed=*/false),
853                                       IRB.getVoidTy(), IRB.getInt32Ty());
854   } else {
855     StringRef WarningFnName =
856         Recover ? "__msan_warning" : "__msan_warning_noreturn";
857     WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy());
858   }
859 
860   // Create the global TLS variables.
861   RetvalTLS =
862       getOrInsertGlobal(M, "__msan_retval_tls",
863                         ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
864 
865   RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
866 
867   ParamTLS =
868       getOrInsertGlobal(M, "__msan_param_tls",
869                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
870 
871   ParamOriginTLS =
872       getOrInsertGlobal(M, "__msan_param_origin_tls",
873                         ArrayType::get(OriginTy, kParamTLSSize / 4));
874 
875   VAArgTLS =
876       getOrInsertGlobal(M, "__msan_va_arg_tls",
877                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
878 
879   VAArgOriginTLS =
880       getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
881                         ArrayType::get(OriginTy, kParamTLSSize / 4));
882 
883   VAArgOverflowSizeTLS =
884       getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
885 
886   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
887        AccessSizeIndex++) {
888     unsigned AccessSize = 1 << AccessSizeIndex;
889     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
890     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
891         FunctionName, TLI.getAttrList(C, {0, 1}, /*Signed=*/false),
892         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
893 
894     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
895     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
896         FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false),
897         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt8PtrTy(),
898         IRB.getInt32Ty());
899   }
900 
901   MsanSetAllocaOriginWithDescriptionFn = M.getOrInsertFunction(
902       "__msan_set_alloca_origin_with_descr", IRB.getVoidTy(),
903       IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy(), IRB.getInt8PtrTy());
904   MsanSetAllocaOriginNoDescriptionFn = M.getOrInsertFunction(
905       "__msan_set_alloca_origin_no_descr", IRB.getVoidTy(), IRB.getInt8PtrTy(),
906       IntptrTy, IRB.getInt8PtrTy());
907   MsanPoisonStackFn = M.getOrInsertFunction(
908       "__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
909 }
910 
911 /// Insert extern declaration of runtime-provided functions and globals.
912 void MemorySanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo &TLI) {
913   // Only do this once.
914   if (CallbacksInitialized)
915     return;
916 
917   IRBuilder<> IRB(*C);
918   // Initialize callbacks that are common for kernel and userspace
919   // instrumentation.
920   MsanChainOriginFn = M.getOrInsertFunction(
921       "__msan_chain_origin",
922       TLI.getAttrList(C, {0}, /*Signed=*/false, /*Ret=*/true), IRB.getInt32Ty(),
923       IRB.getInt32Ty());
924   MsanSetOriginFn = M.getOrInsertFunction(
925       "__msan_set_origin", TLI.getAttrList(C, {2}, /*Signed=*/false),
926       IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, IRB.getInt32Ty());
927   MemmoveFn =
928       M.getOrInsertFunction("__msan_memmove", IRB.getInt8PtrTy(),
929                             IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
930   MemcpyFn =
931       M.getOrInsertFunction("__msan_memcpy", IRB.getInt8PtrTy(),
932                             IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
933   MemsetFn = M.getOrInsertFunction(
934       "__msan_memset", TLI.getAttrList(C, {1}, /*Signed=*/true),
935       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy);
936 
937   MsanInstrumentAsmStoreFn =
938       M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
939                             PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
940 
941   if (CompileKernel) {
942     createKernelApi(M, TLI);
943   } else {
944     createUserspaceApi(M, TLI);
945   }
946   CallbacksInitialized = true;
947 }
948 
949 FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
950                                                              int size) {
951   FunctionCallee *Fns =
952       isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
953   switch (size) {
954   case 1:
955     return Fns[0];
956   case 2:
957     return Fns[1];
958   case 4:
959     return Fns[2];
960   case 8:
961     return Fns[3];
962   default:
963     return nullptr;
964   }
965 }
966 
967 /// Module-level initialization.
968 ///
969 /// inserts a call to __msan_init to the module's constructor list.
970 void MemorySanitizer::initializeModule(Module &M) {
971   auto &DL = M.getDataLayout();
972 
973   TargetTriple = Triple(M.getTargetTriple());
974 
975   bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
976   bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
977   // Check the overrides first
978   if (ShadowPassed || OriginPassed) {
979     CustomMapParams.AndMask = ClAndMask;
980     CustomMapParams.XorMask = ClXorMask;
981     CustomMapParams.ShadowBase = ClShadowBase;
982     CustomMapParams.OriginBase = ClOriginBase;
983     MapParams = &CustomMapParams;
984   } else {
985     switch (TargetTriple.getOS()) {
986     case Triple::FreeBSD:
987       switch (TargetTriple.getArch()) {
988       case Triple::aarch64:
989         MapParams = FreeBSD_ARM_MemoryMapParams.bits64;
990         break;
991       case Triple::x86_64:
992         MapParams = FreeBSD_X86_MemoryMapParams.bits64;
993         break;
994       case Triple::x86:
995         MapParams = FreeBSD_X86_MemoryMapParams.bits32;
996         break;
997       default:
998         report_fatal_error("unsupported architecture");
999       }
1000       break;
1001     case Triple::NetBSD:
1002       switch (TargetTriple.getArch()) {
1003       case Triple::x86_64:
1004         MapParams = NetBSD_X86_MemoryMapParams.bits64;
1005         break;
1006       default:
1007         report_fatal_error("unsupported architecture");
1008       }
1009       break;
1010     case Triple::Linux:
1011       switch (TargetTriple.getArch()) {
1012       case Triple::x86_64:
1013         MapParams = Linux_X86_MemoryMapParams.bits64;
1014         break;
1015       case Triple::x86:
1016         MapParams = Linux_X86_MemoryMapParams.bits32;
1017         break;
1018       case Triple::mips64:
1019       case Triple::mips64el:
1020         MapParams = Linux_MIPS_MemoryMapParams.bits64;
1021         break;
1022       case Triple::ppc64:
1023       case Triple::ppc64le:
1024         MapParams = Linux_PowerPC_MemoryMapParams.bits64;
1025         break;
1026       case Triple::systemz:
1027         MapParams = Linux_S390_MemoryMapParams.bits64;
1028         break;
1029       case Triple::aarch64:
1030       case Triple::aarch64_be:
1031         MapParams = Linux_ARM_MemoryMapParams.bits64;
1032         break;
1033       case Triple::loongarch64:
1034         MapParams = Linux_LoongArch_MemoryMapParams.bits64;
1035         break;
1036       default:
1037         report_fatal_error("unsupported architecture");
1038       }
1039       break;
1040     default:
1041       report_fatal_error("unsupported operating system");
1042     }
1043   }
1044 
1045   C = &(M.getContext());
1046   IRBuilder<> IRB(*C);
1047   IntptrTy = IRB.getIntPtrTy(DL);
1048   OriginTy = IRB.getInt32Ty();
1049 
1050   ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
1051   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
1052 
1053   if (!CompileKernel) {
1054     if (TrackOrigins)
1055       M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
1056         return new GlobalVariable(
1057             M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
1058             IRB.getInt32(TrackOrigins), "__msan_track_origins");
1059       });
1060 
1061     if (Recover)
1062       M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
1063         return new GlobalVariable(M, IRB.getInt32Ty(), true,
1064                                   GlobalValue::WeakODRLinkage,
1065                                   IRB.getInt32(Recover), "__msan_keep_going");
1066       });
1067   }
1068 }
1069 
1070 namespace {
1071 
1072 /// A helper class that handles instrumentation of VarArg
1073 /// functions on a particular platform.
1074 ///
1075 /// Implementations are expected to insert the instrumentation
1076 /// necessary to propagate argument shadow through VarArg function
1077 /// calls. Visit* methods are called during an InstVisitor pass over
1078 /// the function, and should avoid creating new basic blocks. A new
1079 /// instance of this class is created for each instrumented function.
1080 struct VarArgHelper {
1081   virtual ~VarArgHelper() = default;
1082 
1083   /// Visit a CallBase.
1084   virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0;
1085 
1086   /// Visit a va_start call.
1087   virtual void visitVAStartInst(VAStartInst &I) = 0;
1088 
1089   /// Visit a va_copy call.
1090   virtual void visitVACopyInst(VACopyInst &I) = 0;
1091 
1092   /// Finalize function instrumentation.
1093   ///
1094   /// This method is called after visiting all interesting (see above)
1095   /// instructions in a function.
1096   virtual void finalizeInstrumentation() = 0;
1097 };
1098 
1099 struct MemorySanitizerVisitor;
1100 
1101 } // end anonymous namespace
1102 
1103 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1104                                         MemorySanitizerVisitor &Visitor);
1105 
1106 static unsigned TypeSizeToSizeIndex(TypeSize TS) {
1107   if (TS.isScalable())
1108     // Scalable types unconditionally take slowpaths.
1109     return kNumberOfAccessSizes;
1110   unsigned TypeSizeFixed = TS.getFixedValue();
1111   if (TypeSizeFixed <= 8)
1112     return 0;
1113   return Log2_32_Ceil((TypeSizeFixed + 7) / 8);
1114 }
1115 
1116 namespace {
1117 
1118 /// Helper class to attach debug information of the given instruction onto new
1119 /// instructions inserted after.
1120 class NextNodeIRBuilder : public IRBuilder<> {
1121 public:
1122   explicit NextNodeIRBuilder(Instruction *IP) : IRBuilder<>(IP->getNextNode()) {
1123     SetCurrentDebugLocation(IP->getDebugLoc());
1124   }
1125 };
1126 
1127 /// This class does all the work for a given function. Store and Load
1128 /// instructions store and load corresponding shadow and origin
1129 /// values. Most instructions propagate shadow from arguments to their
1130 /// return values. Certain instructions (most importantly, BranchInst)
1131 /// test their argument shadow and print reports (with a runtime call) if it's
1132 /// non-zero.
1133 struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
1134   Function &F;
1135   MemorySanitizer &MS;
1136   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
1137   ValueMap<Value *, Value *> ShadowMap, OriginMap;
1138   std::unique_ptr<VarArgHelper> VAHelper;
1139   const TargetLibraryInfo *TLI;
1140   Instruction *FnPrologueEnd;
1141 
1142   // The following flags disable parts of MSan instrumentation based on
1143   // exclusion list contents and command-line options.
1144   bool InsertChecks;
1145   bool PropagateShadow;
1146   bool PoisonStack;
1147   bool PoisonUndef;
1148 
1149   struct ShadowOriginAndInsertPoint {
1150     Value *Shadow;
1151     Value *Origin;
1152     Instruction *OrigIns;
1153 
1154     ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
1155         : Shadow(S), Origin(O), OrigIns(I) {}
1156   };
1157   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
1158   DenseMap<const DILocation *, int> LazyWarningDebugLocationCount;
1159   bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
1160   SmallSetVector<AllocaInst *, 16> AllocaSet;
1161   SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
1162   SmallVector<StoreInst *, 16> StoreList;
1163   int64_t SplittableBlocksCount = 0;
1164 
1165   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
1166                          const TargetLibraryInfo &TLI)
1167       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
1168     bool SanitizeFunction =
1169         F.hasFnAttribute(Attribute::SanitizeMemory) && !ClDisableChecks;
1170     InsertChecks = SanitizeFunction;
1171     PropagateShadow = SanitizeFunction;
1172     PoisonStack = SanitizeFunction && ClPoisonStack;
1173     PoisonUndef = SanitizeFunction && ClPoisonUndef;
1174 
1175     // In the presence of unreachable blocks, we may see Phi nodes with
1176     // incoming nodes from such blocks. Since InstVisitor skips unreachable
1177     // blocks, such nodes will not have any shadow value associated with them.
1178     // It's easier to remove unreachable blocks than deal with missing shadow.
1179     removeUnreachableBlocks(F);
1180 
1181     MS.initializeCallbacks(*F.getParent(), TLI);
1182     FnPrologueEnd = IRBuilder<>(F.getEntryBlock().getFirstNonPHI())
1183                         .CreateIntrinsic(Intrinsic::donothing, {}, {});
1184 
1185     if (MS.CompileKernel) {
1186       IRBuilder<> IRB(FnPrologueEnd);
1187       insertKmsanPrologue(IRB);
1188     }
1189 
1190     LLVM_DEBUG(if (!InsertChecks) dbgs()
1191                << "MemorySanitizer is not inserting checks into '"
1192                << F.getName() << "'\n");
1193   }
1194 
1195   bool instrumentWithCalls(Value *V) {
1196     // Constants likely will be eliminated by follow-up passes.
1197     if (isa<Constant>(V))
1198       return false;
1199 
1200     ++SplittableBlocksCount;
1201     return ClInstrumentationWithCallThreshold >= 0 &&
1202            SplittableBlocksCount > ClInstrumentationWithCallThreshold;
1203   }
1204 
1205   bool isInPrologue(Instruction &I) {
1206     return I.getParent() == FnPrologueEnd->getParent() &&
1207            (&I == FnPrologueEnd || I.comesBefore(FnPrologueEnd));
1208   }
1209 
1210   // Creates a new origin and records the stack trace. In general we can call
1211   // this function for any origin manipulation we like. However it will cost
1212   // runtime resources. So use this wisely only if it can provide additional
1213   // information helpful to a user.
1214   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
1215     if (MS.TrackOrigins <= 1)
1216       return V;
1217     return IRB.CreateCall(MS.MsanChainOriginFn, V);
1218   }
1219 
1220   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
1221     const DataLayout &DL = F.getParent()->getDataLayout();
1222     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1223     if (IntptrSize == kOriginSize)
1224       return Origin;
1225     assert(IntptrSize == kOriginSize * 2);
1226     Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
1227     return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
1228   }
1229 
1230   /// Fill memory range with the given origin value.
1231   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
1232                    TypeSize TS, Align Alignment) {
1233     const DataLayout &DL = F.getParent()->getDataLayout();
1234     const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy);
1235     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1236     assert(IntptrAlignment >= kMinOriginAlignment);
1237     assert(IntptrSize >= kOriginSize);
1238 
1239     // Note: The loop based formation works for fixed length vectors too,
1240     // however we prefer to unroll and specialize alignment below.
1241     if (TS.isScalable()) {
1242       Value *Size = IRB.CreateTypeSize(IRB.getInt32Ty(), TS);
1243       Value *RoundUp = IRB.CreateAdd(Size, IRB.getInt32(kOriginSize - 1));
1244       Value *End = IRB.CreateUDiv(RoundUp, IRB.getInt32(kOriginSize));
1245       auto [InsertPt, Index] =
1246         SplitBlockAndInsertSimpleForLoop(End, &*IRB.GetInsertPoint());
1247       IRB.SetInsertPoint(InsertPt);
1248 
1249       Value *GEP = IRB.CreateGEP(MS.OriginTy, OriginPtr, Index);
1250       IRB.CreateAlignedStore(Origin, GEP, kMinOriginAlignment);
1251       return;
1252     }
1253 
1254     unsigned Size = TS.getFixedValue();
1255 
1256     unsigned Ofs = 0;
1257     Align CurrentAlignment = Alignment;
1258     if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
1259       Value *IntptrOrigin = originToIntptr(IRB, Origin);
1260       Value *IntptrOriginPtr =
1261           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
1262       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
1263         Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
1264                        : IntptrOriginPtr;
1265         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
1266         Ofs += IntptrSize / kOriginSize;
1267         CurrentAlignment = IntptrAlignment;
1268       }
1269     }
1270 
1271     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
1272       Value *GEP =
1273           i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
1274       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
1275       CurrentAlignment = kMinOriginAlignment;
1276     }
1277   }
1278 
1279   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
1280                    Value *OriginPtr, Align Alignment) {
1281     const DataLayout &DL = F.getParent()->getDataLayout();
1282     const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1283     TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
1284     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
1285     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1286       if (!ClCheckConstantShadow || ConstantShadow->isZeroValue()) {
1287         // Origin is not needed: value is initialized or const shadow is
1288         // ignored.
1289         return;
1290       }
1291       if (llvm::isKnownNonZero(ConvertedShadow, DL)) {
1292         // Copy origin as the value is definitely uninitialized.
1293         paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1294                     OriginAlignment);
1295         return;
1296       }
1297       // Fallback to runtime check, which still can be optimized out later.
1298     }
1299 
1300     TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1301     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1302     if (instrumentWithCalls(ConvertedShadow) &&
1303         SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1304       FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
1305       Value *ConvertedShadow2 =
1306           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1307       CallBase *CB = IRB.CreateCall(
1308           Fn, {ConvertedShadow2,
1309                IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), Origin});
1310       CB->addParamAttr(0, Attribute::ZExt);
1311       CB->addParamAttr(2, Attribute::ZExt);
1312     } else {
1313       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1314       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1315           Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
1316       IRBuilder<> IRBNew(CheckTerm);
1317       paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
1318                   OriginAlignment);
1319     }
1320   }
1321 
1322   void materializeStores() {
1323     for (StoreInst *SI : StoreList) {
1324       IRBuilder<> IRB(SI);
1325       Value *Val = SI->getValueOperand();
1326       Value *Addr = SI->getPointerOperand();
1327       Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
1328       Value *ShadowPtr, *OriginPtr;
1329       Type *ShadowTy = Shadow->getType();
1330       const Align Alignment = SI->getAlign();
1331       const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1332       std::tie(ShadowPtr, OriginPtr) =
1333           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
1334 
1335       StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
1336       LLVM_DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
1337       (void)NewSI;
1338 
1339       if (SI->isAtomic())
1340         SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
1341 
1342       if (MS.TrackOrigins && !SI->isAtomic())
1343         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
1344                     OriginAlignment);
1345     }
1346   }
1347 
1348   // Returns true if Debug Location curresponds to multiple warnings.
1349   bool shouldDisambiguateWarningLocation(const DebugLoc &DebugLoc) {
1350     if (MS.TrackOrigins < 2)
1351       return false;
1352 
1353     if (LazyWarningDebugLocationCount.empty())
1354       for (const auto &I : InstrumentationList)
1355         ++LazyWarningDebugLocationCount[I.OrigIns->getDebugLoc()];
1356 
1357     return LazyWarningDebugLocationCount[DebugLoc] >= ClDisambiguateWarning;
1358   }
1359 
1360   /// Helper function to insert a warning at IRB's current insert point.
1361   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
1362     if (!Origin)
1363       Origin = (Value *)IRB.getInt32(0);
1364     assert(Origin->getType()->isIntegerTy());
1365 
1366     if (shouldDisambiguateWarningLocation(IRB.getCurrentDebugLocation())) {
1367       // Try to create additional origin with debug info of the last origin
1368       // instruction. It may provide additional information to the user.
1369       if (Instruction *OI = dyn_cast_or_null<Instruction>(Origin)) {
1370         assert(MS.TrackOrigins);
1371         auto NewDebugLoc = OI->getDebugLoc();
1372         // Origin update with missing or the same debug location provides no
1373         // additional value.
1374         if (NewDebugLoc && NewDebugLoc != IRB.getCurrentDebugLocation()) {
1375           // Insert update just before the check, so we call runtime only just
1376           // before the report.
1377           IRBuilder<> IRBOrigin(&*IRB.GetInsertPoint());
1378           IRBOrigin.SetCurrentDebugLocation(NewDebugLoc);
1379           Origin = updateOrigin(Origin, IRBOrigin);
1380         }
1381       }
1382     }
1383 
1384     if (MS.CompileKernel || MS.TrackOrigins)
1385       IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge();
1386     else
1387       IRB.CreateCall(MS.WarningFn)->setCannotMerge();
1388     // FIXME: Insert UnreachableInst if !MS.Recover?
1389     // This may invalidate some of the following checks and needs to be done
1390     // at the very end.
1391   }
1392 
1393   void materializeOneCheck(IRBuilder<> &IRB, Value *ConvertedShadow,
1394                            Value *Origin) {
1395     const DataLayout &DL = F.getParent()->getDataLayout();
1396     TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1397     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1398     if (instrumentWithCalls(ConvertedShadow) &&
1399         SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1400       FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
1401       Value *ConvertedShadow2 =
1402           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1403       CallBase *CB = IRB.CreateCall(
1404           Fn, {ConvertedShadow2,
1405                MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
1406       CB->addParamAttr(0, Attribute::ZExt);
1407       CB->addParamAttr(1, Attribute::ZExt);
1408     } else {
1409       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1410       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1411           Cmp, &*IRB.GetInsertPoint(),
1412           /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
1413 
1414       IRB.SetInsertPoint(CheckTerm);
1415       insertWarningFn(IRB, Origin);
1416       LLVM_DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
1417     }
1418   }
1419 
1420   void materializeInstructionChecks(
1421       ArrayRef<ShadowOriginAndInsertPoint> InstructionChecks) {
1422     const DataLayout &DL = F.getParent()->getDataLayout();
1423     // Disable combining in some cases. TrackOrigins checks each shadow to pick
1424     // correct origin.
1425     bool Combine = !MS.TrackOrigins;
1426     Instruction *Instruction = InstructionChecks.front().OrigIns;
1427     Value *Shadow = nullptr;
1428     for (const auto &ShadowData : InstructionChecks) {
1429       assert(ShadowData.OrigIns == Instruction);
1430       IRBuilder<> IRB(Instruction);
1431 
1432       Value *ConvertedShadow = ShadowData.Shadow;
1433 
1434       if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1435         if (!ClCheckConstantShadow || ConstantShadow->isZeroValue()) {
1436           // Skip, value is initialized or const shadow is ignored.
1437           continue;
1438         }
1439         if (llvm::isKnownNonZero(ConvertedShadow, DL)) {
1440           // Report as the value is definitely uninitialized.
1441           insertWarningFn(IRB, ShadowData.Origin);
1442           if (!MS.Recover)
1443             return; // Always fail and stop here, not need to check the rest.
1444           // Skip entire instruction,
1445           continue;
1446         }
1447         // Fallback to runtime check, which still can be optimized out later.
1448       }
1449 
1450       if (!Combine) {
1451         materializeOneCheck(IRB, ConvertedShadow, ShadowData.Origin);
1452         continue;
1453       }
1454 
1455       if (!Shadow) {
1456         Shadow = ConvertedShadow;
1457         continue;
1458       }
1459 
1460       Shadow = convertToBool(Shadow, IRB, "_mscmp");
1461       ConvertedShadow = convertToBool(ConvertedShadow, IRB, "_mscmp");
1462       Shadow = IRB.CreateOr(Shadow, ConvertedShadow, "_msor");
1463     }
1464 
1465     if (Shadow) {
1466       assert(Combine);
1467       IRBuilder<> IRB(Instruction);
1468       materializeOneCheck(IRB, Shadow, nullptr);
1469     }
1470   }
1471 
1472   void materializeChecks() {
1473     llvm::stable_sort(InstrumentationList,
1474                       [](const ShadowOriginAndInsertPoint &L,
1475                          const ShadowOriginAndInsertPoint &R) {
1476                         return L.OrigIns < R.OrigIns;
1477                       });
1478 
1479     for (auto I = InstrumentationList.begin();
1480          I != InstrumentationList.end();) {
1481       auto J =
1482           std::find_if(I + 1, InstrumentationList.end(),
1483                        [L = I->OrigIns](const ShadowOriginAndInsertPoint &R) {
1484                          return L != R.OrigIns;
1485                        });
1486       // Process all checks of instruction at once.
1487       materializeInstructionChecks(ArrayRef<ShadowOriginAndInsertPoint>(I, J));
1488       I = J;
1489     }
1490 
1491     LLVM_DEBUG(dbgs() << "DONE:\n" << F);
1492   }
1493 
1494   // Returns the last instruction in the new prologue
1495   void insertKmsanPrologue(IRBuilder<> &IRB) {
1496     Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
1497     Constant *Zero = IRB.getInt32(0);
1498     MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1499                                 {Zero, IRB.getInt32(0)}, "param_shadow");
1500     MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1501                                  {Zero, IRB.getInt32(1)}, "retval_shadow");
1502     MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1503                                 {Zero, IRB.getInt32(2)}, "va_arg_shadow");
1504     MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1505                                       {Zero, IRB.getInt32(3)}, "va_arg_origin");
1506     MS.VAArgOverflowSizeTLS =
1507         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1508                       {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
1509     MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1510                                       {Zero, IRB.getInt32(5)}, "param_origin");
1511     MS.RetvalOriginTLS =
1512         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1513                       {Zero, IRB.getInt32(6)}, "retval_origin");
1514     if (MS.TargetTriple.getArch() == Triple::systemz)
1515       MS.MsanMetadataAlloca = IRB.CreateAlloca(MS.MsanMetadata, 0u);
1516   }
1517 
1518   /// Add MemorySanitizer instrumentation to a function.
1519   bool runOnFunction() {
1520     // Iterate all BBs in depth-first order and create shadow instructions
1521     // for all instructions (where applicable).
1522     // For PHI nodes we create dummy shadow PHIs which will be finalized later.
1523     for (BasicBlock *BB : depth_first(FnPrologueEnd->getParent()))
1524       visit(*BB);
1525 
1526     // Finalize PHI nodes.
1527     for (PHINode *PN : ShadowPHINodes) {
1528       PHINode *PNS = cast<PHINode>(getShadow(PN));
1529       PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
1530       size_t NumValues = PN->getNumIncomingValues();
1531       for (size_t v = 0; v < NumValues; v++) {
1532         PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
1533         if (PNO)
1534           PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
1535       }
1536     }
1537 
1538     VAHelper->finalizeInstrumentation();
1539 
1540     // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
1541     // instrumenting only allocas.
1542     if (InstrumentLifetimeStart) {
1543       for (auto Item : LifetimeStartList) {
1544         instrumentAlloca(*Item.second, Item.first);
1545         AllocaSet.remove(Item.second);
1546       }
1547     }
1548     // Poison the allocas for which we didn't instrument the corresponding
1549     // lifetime intrinsics.
1550     for (AllocaInst *AI : AllocaSet)
1551       instrumentAlloca(*AI);
1552 
1553     // Insert shadow value checks.
1554     materializeChecks();
1555 
1556     // Delayed instrumentation of StoreInst.
1557     // This may not add new address checks.
1558     materializeStores();
1559 
1560     return true;
1561   }
1562 
1563   /// Compute the shadow type that corresponds to a given Value.
1564   Type *getShadowTy(Value *V) { return getShadowTy(V->getType()); }
1565 
1566   /// Compute the shadow type that corresponds to a given Type.
1567   Type *getShadowTy(Type *OrigTy) {
1568     if (!OrigTy->isSized()) {
1569       return nullptr;
1570     }
1571     // For integer type, shadow is the same as the original type.
1572     // This may return weird-sized types like i1.
1573     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
1574       return IT;
1575     const DataLayout &DL = F.getParent()->getDataLayout();
1576     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
1577       uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
1578       return VectorType::get(IntegerType::get(*MS.C, EltSize),
1579                              VT->getElementCount());
1580     }
1581     if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
1582       return ArrayType::get(getShadowTy(AT->getElementType()),
1583                             AT->getNumElements());
1584     }
1585     if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1586       SmallVector<Type *, 4> Elements;
1587       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1588         Elements.push_back(getShadowTy(ST->getElementType(i)));
1589       StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
1590       LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1591       return Res;
1592     }
1593     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
1594     return IntegerType::get(*MS.C, TypeSize);
1595   }
1596 
1597   /// Extract combined shadow of struct elements as a bool
1598   Value *collapseStructShadow(StructType *Struct, Value *Shadow,
1599                               IRBuilder<> &IRB) {
1600     Value *FalseVal = IRB.getIntN(/* width */ 1, /* value */ 0);
1601     Value *Aggregator = FalseVal;
1602 
1603     for (unsigned Idx = 0; Idx < Struct->getNumElements(); Idx++) {
1604       // Combine by ORing together each element's bool shadow
1605       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1606       Value *ShadowBool = convertToBool(ShadowItem, IRB);
1607 
1608       if (Aggregator != FalseVal)
1609         Aggregator = IRB.CreateOr(Aggregator, ShadowBool);
1610       else
1611         Aggregator = ShadowBool;
1612     }
1613 
1614     return Aggregator;
1615   }
1616 
1617   // Extract combined shadow of array elements
1618   Value *collapseArrayShadow(ArrayType *Array, Value *Shadow,
1619                              IRBuilder<> &IRB) {
1620     if (!Array->getNumElements())
1621       return IRB.getIntN(/* width */ 1, /* value */ 0);
1622 
1623     Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
1624     Value *Aggregator = convertShadowToScalar(FirstItem, IRB);
1625 
1626     for (unsigned Idx = 1; Idx < Array->getNumElements(); Idx++) {
1627       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1628       Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
1629       Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
1630     }
1631     return Aggregator;
1632   }
1633 
1634   /// Convert a shadow value to it's flattened variant. The resulting
1635   /// shadow may not necessarily have the same bit width as the input
1636   /// value, but it will always be comparable to zero.
1637   Value *convertShadowToScalar(Value *V, IRBuilder<> &IRB) {
1638     if (StructType *Struct = dyn_cast<StructType>(V->getType()))
1639       return collapseStructShadow(Struct, V, IRB);
1640     if (ArrayType *Array = dyn_cast<ArrayType>(V->getType()))
1641       return collapseArrayShadow(Array, V, IRB);
1642     if (isa<VectorType>(V->getType())) {
1643       if (isa<ScalableVectorType>(V->getType()))
1644         return convertShadowToScalar(IRB.CreateOrReduce(V), IRB);
1645       unsigned BitWidth =
1646         V->getType()->getPrimitiveSizeInBits().getFixedValue();
1647       return IRB.CreateBitCast(V, IntegerType::get(*MS.C, BitWidth));
1648     }
1649     return V;
1650   }
1651 
1652   // Convert a scalar value to an i1 by comparing with 0
1653   Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &name = "") {
1654     Type *VTy = V->getType();
1655     if (!VTy->isIntegerTy())
1656       return convertToBool(convertShadowToScalar(V, IRB), IRB, name);
1657     if (VTy->getIntegerBitWidth() == 1)
1658       // Just converting a bool to a bool, so do nothing.
1659       return V;
1660     return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), name);
1661   }
1662 
1663   Type *ptrToIntPtrType(Type *PtrTy) const {
1664     if (VectorType *VectTy = dyn_cast<VectorType>(PtrTy)) {
1665       return VectorType::get(ptrToIntPtrType(VectTy->getElementType()),
1666                              VectTy->getElementCount());
1667     }
1668     assert(PtrTy->isIntOrPtrTy());
1669     return MS.IntptrTy;
1670   }
1671 
1672   Type *getPtrToShadowPtrType(Type *IntPtrTy, Type *ShadowTy) const {
1673     if (VectorType *VectTy = dyn_cast<VectorType>(IntPtrTy)) {
1674       return VectorType::get(
1675           getPtrToShadowPtrType(VectTy->getElementType(), ShadowTy),
1676           VectTy->getElementCount());
1677     }
1678     assert(IntPtrTy == MS.IntptrTy);
1679     return ShadowTy->getPointerTo();
1680   }
1681 
1682   Constant *constToIntPtr(Type *IntPtrTy, uint64_t C) const {
1683     if (VectorType *VectTy = dyn_cast<VectorType>(IntPtrTy)) {
1684       return ConstantVector::getSplat(
1685           VectTy->getElementCount(), constToIntPtr(VectTy->getElementType(), C));
1686     }
1687     assert(IntPtrTy == MS.IntptrTy);
1688     return ConstantInt::get(MS.IntptrTy, C);
1689   }
1690 
1691   /// Compute the integer shadow offset that corresponds to a given
1692   /// application address.
1693   ///
1694   /// Offset = (Addr & ~AndMask) ^ XorMask
1695   /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1696   /// a single pointee.
1697   /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1698   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1699     Type *IntptrTy = ptrToIntPtrType(Addr->getType());
1700     Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy);
1701 
1702     if (uint64_t AndMask = MS.MapParams->AndMask)
1703       OffsetLong = IRB.CreateAnd(OffsetLong, constToIntPtr(IntptrTy, ~AndMask));
1704 
1705     if (uint64_t XorMask = MS.MapParams->XorMask)
1706       OffsetLong = IRB.CreateXor(OffsetLong, constToIntPtr(IntptrTy, XorMask));
1707     return OffsetLong;
1708   }
1709 
1710   /// Compute the shadow and origin addresses corresponding to a given
1711   /// application address.
1712   ///
1713   /// Shadow = ShadowBase + Offset
1714   /// Origin = (OriginBase + Offset) & ~3ULL
1715   /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1716   /// a single pointee.
1717   /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1718   std::pair<Value *, Value *>
1719   getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
1720                               MaybeAlign Alignment) {
1721     VectorType *VectTy = dyn_cast<VectorType>(Addr->getType());
1722     if (!VectTy) {
1723       assert(Addr->getType()->isPointerTy());
1724     } else {
1725       assert(VectTy->getElementType()->isPointerTy());
1726     }
1727     Type *IntptrTy = ptrToIntPtrType(Addr->getType());
1728     Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1729     Value *ShadowLong = ShadowOffset;
1730     if (uint64_t ShadowBase = MS.MapParams->ShadowBase) {
1731       ShadowLong =
1732           IRB.CreateAdd(ShadowLong, constToIntPtr(IntptrTy, ShadowBase));
1733     }
1734     Value *ShadowPtr = IRB.CreateIntToPtr(
1735         ShadowLong, getPtrToShadowPtrType(IntptrTy, ShadowTy));
1736 
1737     Value *OriginPtr = nullptr;
1738     if (MS.TrackOrigins) {
1739       Value *OriginLong = ShadowOffset;
1740       uint64_t OriginBase = MS.MapParams->OriginBase;
1741       if (OriginBase != 0)
1742         OriginLong =
1743             IRB.CreateAdd(OriginLong, constToIntPtr(IntptrTy, OriginBase));
1744       if (!Alignment || *Alignment < kMinOriginAlignment) {
1745         uint64_t Mask = kMinOriginAlignment.value() - 1;
1746         OriginLong = IRB.CreateAnd(OriginLong, constToIntPtr(IntptrTy, ~Mask));
1747       }
1748       OriginPtr = IRB.CreateIntToPtr(
1749           OriginLong, getPtrToShadowPtrType(IntptrTy, MS.OriginTy));
1750     }
1751     return std::make_pair(ShadowPtr, OriginPtr);
1752   }
1753 
1754   template <typename... ArgsTy>
1755   Value *createMetadataCall(IRBuilder<> &IRB, FunctionCallee Callee,
1756                             ArgsTy... Args) {
1757     if (MS.TargetTriple.getArch() == Triple::systemz) {
1758       IRB.CreateCall(Callee,
1759                      {MS.MsanMetadataAlloca, std::forward<ArgsTy>(Args)...});
1760       return IRB.CreateLoad(MS.MsanMetadata, MS.MsanMetadataAlloca);
1761     }
1762 
1763     return IRB.CreateCall(Callee, {std::forward<ArgsTy>(Args)...});
1764   }
1765 
1766   std::pair<Value *, Value *> getShadowOriginPtrKernelNoVec(Value *Addr,
1767                                                             IRBuilder<> &IRB,
1768                                                             Type *ShadowTy,
1769                                                             bool isStore) {
1770     Value *ShadowOriginPtrs;
1771     const DataLayout &DL = F.getParent()->getDataLayout();
1772     TypeSize Size = DL.getTypeStoreSize(ShadowTy);
1773 
1774     FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
1775     Value *AddrCast =
1776         IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
1777     if (Getter) {
1778       ShadowOriginPtrs = createMetadataCall(IRB, Getter, AddrCast);
1779     } else {
1780       Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
1781       ShadowOriginPtrs = createMetadataCall(
1782           IRB,
1783           isStore ? MS.MsanMetadataPtrForStoreN : MS.MsanMetadataPtrForLoadN,
1784           AddrCast, SizeVal);
1785     }
1786     Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
1787     ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
1788     Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
1789 
1790     return std::make_pair(ShadowPtr, OriginPtr);
1791   }
1792 
1793   /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1794   /// a single pointee.
1795   /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1796   std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
1797                                                        IRBuilder<> &IRB,
1798                                                        Type *ShadowTy,
1799                                                        bool isStore) {
1800     VectorType *VectTy = dyn_cast<VectorType>(Addr->getType());
1801     if (!VectTy) {
1802       assert(Addr->getType()->isPointerTy());
1803       return getShadowOriginPtrKernelNoVec(Addr, IRB, ShadowTy, isStore);
1804     }
1805 
1806     // TODO: Support callbacs with vectors of addresses.
1807     unsigned NumElements = cast<FixedVectorType>(VectTy)->getNumElements();
1808     Value *ShadowPtrs = ConstantInt::getNullValue(
1809         FixedVectorType::get(ShadowTy->getPointerTo(), NumElements));
1810     Value *OriginPtrs = nullptr;
1811     if (MS.TrackOrigins)
1812       OriginPtrs = ConstantInt::getNullValue(
1813           FixedVectorType::get(MS.OriginTy->getPointerTo(), NumElements));
1814     for (unsigned i = 0; i < NumElements; ++i) {
1815       Value *OneAddr =
1816           IRB.CreateExtractElement(Addr, ConstantInt::get(IRB.getInt32Ty(), i));
1817       auto [ShadowPtr, OriginPtr] =
1818           getShadowOriginPtrKernelNoVec(OneAddr, IRB, ShadowTy, isStore);
1819 
1820       ShadowPtrs = IRB.CreateInsertElement(
1821           ShadowPtrs, ShadowPtr, ConstantInt::get(IRB.getInt32Ty(), i));
1822       if (MS.TrackOrigins)
1823         OriginPtrs = IRB.CreateInsertElement(
1824             OriginPtrs, OriginPtr, ConstantInt::get(IRB.getInt32Ty(), i));
1825     }
1826     return {ShadowPtrs, OriginPtrs};
1827   }
1828 
1829   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1830                                                  Type *ShadowTy,
1831                                                  MaybeAlign Alignment,
1832                                                  bool isStore) {
1833     if (MS.CompileKernel)
1834       return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
1835     return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
1836   }
1837 
1838   /// Compute the shadow address for a given function argument.
1839   ///
1840   /// Shadow = ParamTLS+ArgOffset.
1841   Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB, int ArgOffset) {
1842     Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
1843     if (ArgOffset)
1844       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1845     return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
1846                               "_msarg");
1847   }
1848 
1849   /// Compute the origin address for a given function argument.
1850   Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB, int ArgOffset) {
1851     if (!MS.TrackOrigins)
1852       return nullptr;
1853     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
1854     if (ArgOffset)
1855       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1856     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
1857                               "_msarg_o");
1858   }
1859 
1860   /// Compute the shadow address for a retval.
1861   Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
1862     return IRB.CreatePointerCast(MS.RetvalTLS,
1863                                  PointerType::get(getShadowTy(A), 0), "_msret");
1864   }
1865 
1866   /// Compute the origin address for a retval.
1867   Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
1868     // We keep a single origin for the entire retval. Might be too optimistic.
1869     return MS.RetvalOriginTLS;
1870   }
1871 
1872   /// Set SV to be the shadow value for V.
1873   void setShadow(Value *V, Value *SV) {
1874     assert(!ShadowMap.count(V) && "Values may only have one shadow");
1875     ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
1876   }
1877 
1878   /// Set Origin to be the origin value for V.
1879   void setOrigin(Value *V, Value *Origin) {
1880     if (!MS.TrackOrigins)
1881       return;
1882     assert(!OriginMap.count(V) && "Values may only have one origin");
1883     LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
1884     OriginMap[V] = Origin;
1885   }
1886 
1887   Constant *getCleanShadow(Type *OrigTy) {
1888     Type *ShadowTy = getShadowTy(OrigTy);
1889     if (!ShadowTy)
1890       return nullptr;
1891     return Constant::getNullValue(ShadowTy);
1892   }
1893 
1894   /// Create a clean shadow value for a given value.
1895   ///
1896   /// Clean shadow (all zeroes) means all bits of the value are defined
1897   /// (initialized).
1898   Constant *getCleanShadow(Value *V) { return getCleanShadow(V->getType()); }
1899 
1900   /// Create a dirty shadow of a given shadow type.
1901   Constant *getPoisonedShadow(Type *ShadowTy) {
1902     assert(ShadowTy);
1903     if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
1904       return Constant::getAllOnesValue(ShadowTy);
1905     if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
1906       SmallVector<Constant *, 4> Vals(AT->getNumElements(),
1907                                       getPoisonedShadow(AT->getElementType()));
1908       return ConstantArray::get(AT, Vals);
1909     }
1910     if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
1911       SmallVector<Constant *, 4> Vals;
1912       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1913         Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
1914       return ConstantStruct::get(ST, Vals);
1915     }
1916     llvm_unreachable("Unexpected shadow type");
1917   }
1918 
1919   /// Create a dirty shadow for a given value.
1920   Constant *getPoisonedShadow(Value *V) {
1921     Type *ShadowTy = getShadowTy(V);
1922     if (!ShadowTy)
1923       return nullptr;
1924     return getPoisonedShadow(ShadowTy);
1925   }
1926 
1927   /// Create a clean (zero) origin.
1928   Value *getCleanOrigin() { return Constant::getNullValue(MS.OriginTy); }
1929 
1930   /// Get the shadow value for a given Value.
1931   ///
1932   /// This function either returns the value set earlier with setShadow,
1933   /// or extracts if from ParamTLS (for function arguments).
1934   Value *getShadow(Value *V) {
1935     if (Instruction *I = dyn_cast<Instruction>(V)) {
1936       if (!PropagateShadow || I->getMetadata(LLVMContext::MD_nosanitize))
1937         return getCleanShadow(V);
1938       // For instructions the shadow is already stored in the map.
1939       Value *Shadow = ShadowMap[V];
1940       if (!Shadow) {
1941         LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
1942         (void)I;
1943         assert(Shadow && "No shadow for a value");
1944       }
1945       return Shadow;
1946     }
1947     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
1948       Value *AllOnes = (PropagateShadow && PoisonUndef) ? getPoisonedShadow(V)
1949                                                         : getCleanShadow(V);
1950       LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
1951       (void)U;
1952       return AllOnes;
1953     }
1954     if (Argument *A = dyn_cast<Argument>(V)) {
1955       // For arguments we compute the shadow on demand and store it in the map.
1956       Value *&ShadowPtr = ShadowMap[V];
1957       if (ShadowPtr)
1958         return ShadowPtr;
1959       Function *F = A->getParent();
1960       IRBuilder<> EntryIRB(FnPrologueEnd);
1961       unsigned ArgOffset = 0;
1962       const DataLayout &DL = F->getParent()->getDataLayout();
1963       for (auto &FArg : F->args()) {
1964         if (!FArg.getType()->isSized()) {
1965           LLVM_DEBUG(dbgs() << "Arg is not sized\n");
1966           continue;
1967         }
1968 
1969         unsigned Size = FArg.hasByValAttr()
1970                             ? DL.getTypeAllocSize(FArg.getParamByValType())
1971                             : DL.getTypeAllocSize(FArg.getType());
1972 
1973         if (A == &FArg) {
1974           bool Overflow = ArgOffset + Size > kParamTLSSize;
1975           if (FArg.hasByValAttr()) {
1976             // ByVal pointer itself has clean shadow. We copy the actual
1977             // argument shadow to the underlying memory.
1978             // Figure out maximal valid memcpy alignment.
1979             const Align ArgAlign = DL.getValueOrABITypeAlignment(
1980                 FArg.getParamAlign(), FArg.getParamByValType());
1981             Value *CpShadowPtr, *CpOriginPtr;
1982             std::tie(CpShadowPtr, CpOriginPtr) =
1983                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
1984                                    /*isStore*/ true);
1985             if (!PropagateShadow || Overflow) {
1986               // ParamTLS overflow.
1987               EntryIRB.CreateMemSet(
1988                   CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
1989                   Size, ArgAlign);
1990             } else {
1991               Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1992               const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
1993               Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
1994                                                  CopyAlign, Size);
1995               LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
1996               (void)Cpy;
1997 
1998               if (MS.TrackOrigins) {
1999                 Value *OriginPtr =
2000                     getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
2001                 // FIXME: OriginSize should be:
2002                 // alignTo(V % kMinOriginAlignment + Size, kMinOriginAlignment)
2003                 unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
2004                 EntryIRB.CreateMemCpy(
2005                     CpOriginPtr,
2006                     /* by getShadowOriginPtr */ kMinOriginAlignment, OriginPtr,
2007                     /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
2008                     OriginSize);
2009               }
2010             }
2011           }
2012 
2013           if (!PropagateShadow || Overflow || FArg.hasByValAttr() ||
2014               (MS.EagerChecks && FArg.hasAttribute(Attribute::NoUndef))) {
2015             ShadowPtr = getCleanShadow(V);
2016             setOrigin(A, getCleanOrigin());
2017           } else {
2018             // Shadow over TLS
2019             Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
2020             ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
2021                                                    kShadowTLSAlignment);
2022             if (MS.TrackOrigins) {
2023               Value *OriginPtr =
2024                   getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
2025               setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
2026             }
2027           }
2028           LLVM_DEBUG(dbgs()
2029                      << "  ARG:    " << FArg << " ==> " << *ShadowPtr << "\n");
2030           break;
2031         }
2032 
2033         ArgOffset += alignTo(Size, kShadowTLSAlignment);
2034       }
2035       assert(ShadowPtr && "Could not find shadow for an argument");
2036       return ShadowPtr;
2037     }
2038     // For everything else the shadow is zero.
2039     return getCleanShadow(V);
2040   }
2041 
2042   /// Get the shadow for i-th argument of the instruction I.
2043   Value *getShadow(Instruction *I, int i) {
2044     return getShadow(I->getOperand(i));
2045   }
2046 
2047   /// Get the origin for a value.
2048   Value *getOrigin(Value *V) {
2049     if (!MS.TrackOrigins)
2050       return nullptr;
2051     if (!PropagateShadow || isa<Constant>(V) || isa<InlineAsm>(V))
2052       return getCleanOrigin();
2053     assert((isa<Instruction>(V) || isa<Argument>(V)) &&
2054            "Unexpected value type in getOrigin()");
2055     if (Instruction *I = dyn_cast<Instruction>(V)) {
2056       if (I->getMetadata(LLVMContext::MD_nosanitize))
2057         return getCleanOrigin();
2058     }
2059     Value *Origin = OriginMap[V];
2060     assert(Origin && "Missing origin");
2061     return Origin;
2062   }
2063 
2064   /// Get the origin for i-th argument of the instruction I.
2065   Value *getOrigin(Instruction *I, int i) {
2066     return getOrigin(I->getOperand(i));
2067   }
2068 
2069   /// Remember the place where a shadow check should be inserted.
2070   ///
2071   /// This location will be later instrumented with a check that will print a
2072   /// UMR warning in runtime if the shadow value is not 0.
2073   void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
2074     assert(Shadow);
2075     if (!InsertChecks)
2076       return;
2077 
2078     if (!DebugCounter::shouldExecute(DebugInsertCheck)) {
2079       LLVM_DEBUG(dbgs() << "Skipping check of " << *Shadow << " before "
2080                         << *OrigIns << "\n");
2081       return;
2082     }
2083 #ifndef NDEBUG
2084     Type *ShadowTy = Shadow->getType();
2085     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
2086             isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
2087            "Can only insert checks for integer, vector, and aggregate shadow "
2088            "types");
2089 #endif
2090     InstrumentationList.push_back(
2091         ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
2092   }
2093 
2094   /// Remember the place where a shadow check should be inserted.
2095   ///
2096   /// This location will be later instrumented with a check that will print a
2097   /// UMR warning in runtime if the value is not fully defined.
2098   void insertShadowCheck(Value *Val, Instruction *OrigIns) {
2099     assert(Val);
2100     Value *Shadow, *Origin;
2101     if (ClCheckConstantShadow) {
2102       Shadow = getShadow(Val);
2103       if (!Shadow)
2104         return;
2105       Origin = getOrigin(Val);
2106     } else {
2107       Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
2108       if (!Shadow)
2109         return;
2110       Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
2111     }
2112     insertShadowCheck(Shadow, Origin, OrigIns);
2113   }
2114 
2115   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
2116     switch (a) {
2117     case AtomicOrdering::NotAtomic:
2118       return AtomicOrdering::NotAtomic;
2119     case AtomicOrdering::Unordered:
2120     case AtomicOrdering::Monotonic:
2121     case AtomicOrdering::Release:
2122       return AtomicOrdering::Release;
2123     case AtomicOrdering::Acquire:
2124     case AtomicOrdering::AcquireRelease:
2125       return AtomicOrdering::AcquireRelease;
2126     case AtomicOrdering::SequentiallyConsistent:
2127       return AtomicOrdering::SequentiallyConsistent;
2128     }
2129     llvm_unreachable("Unknown ordering");
2130   }
2131 
2132   Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
2133     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
2134     uint32_t OrderingTable[NumOrderings] = {};
2135 
2136     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
2137         OrderingTable[(int)AtomicOrderingCABI::release] =
2138             (int)AtomicOrderingCABI::release;
2139     OrderingTable[(int)AtomicOrderingCABI::consume] =
2140         OrderingTable[(int)AtomicOrderingCABI::acquire] =
2141             OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
2142                 (int)AtomicOrderingCABI::acq_rel;
2143     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
2144         (int)AtomicOrderingCABI::seq_cst;
2145 
2146     return ConstantDataVector::get(IRB.getContext(),
2147                                    ArrayRef(OrderingTable, NumOrderings));
2148   }
2149 
2150   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
2151     switch (a) {
2152     case AtomicOrdering::NotAtomic:
2153       return AtomicOrdering::NotAtomic;
2154     case AtomicOrdering::Unordered:
2155     case AtomicOrdering::Monotonic:
2156     case AtomicOrdering::Acquire:
2157       return AtomicOrdering::Acquire;
2158     case AtomicOrdering::Release:
2159     case AtomicOrdering::AcquireRelease:
2160       return AtomicOrdering::AcquireRelease;
2161     case AtomicOrdering::SequentiallyConsistent:
2162       return AtomicOrdering::SequentiallyConsistent;
2163     }
2164     llvm_unreachable("Unknown ordering");
2165   }
2166 
2167   Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
2168     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
2169     uint32_t OrderingTable[NumOrderings] = {};
2170 
2171     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
2172         OrderingTable[(int)AtomicOrderingCABI::acquire] =
2173             OrderingTable[(int)AtomicOrderingCABI::consume] =
2174                 (int)AtomicOrderingCABI::acquire;
2175     OrderingTable[(int)AtomicOrderingCABI::release] =
2176         OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
2177             (int)AtomicOrderingCABI::acq_rel;
2178     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
2179         (int)AtomicOrderingCABI::seq_cst;
2180 
2181     return ConstantDataVector::get(IRB.getContext(),
2182                                    ArrayRef(OrderingTable, NumOrderings));
2183   }
2184 
2185   // ------------------- Visitors.
2186   using InstVisitor<MemorySanitizerVisitor>::visit;
2187   void visit(Instruction &I) {
2188     if (I.getMetadata(LLVMContext::MD_nosanitize))
2189       return;
2190     // Don't want to visit if we're in the prologue
2191     if (isInPrologue(I))
2192       return;
2193     InstVisitor<MemorySanitizerVisitor>::visit(I);
2194   }
2195 
2196   /// Instrument LoadInst
2197   ///
2198   /// Loads the corresponding shadow and (optionally) origin.
2199   /// Optionally, checks that the load address is fully defined.
2200   void visitLoadInst(LoadInst &I) {
2201     assert(I.getType()->isSized() && "Load type must have size");
2202     assert(!I.getMetadata(LLVMContext::MD_nosanitize));
2203     NextNodeIRBuilder IRB(&I);
2204     Type *ShadowTy = getShadowTy(&I);
2205     Value *Addr = I.getPointerOperand();
2206     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2207     const Align Alignment = I.getAlign();
2208     if (PropagateShadow) {
2209       std::tie(ShadowPtr, OriginPtr) =
2210           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2211       setShadow(&I,
2212                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2213     } else {
2214       setShadow(&I, getCleanShadow(&I));
2215     }
2216 
2217     if (ClCheckAccessAddress)
2218       insertShadowCheck(I.getPointerOperand(), &I);
2219 
2220     if (I.isAtomic())
2221       I.setOrdering(addAcquireOrdering(I.getOrdering()));
2222 
2223     if (MS.TrackOrigins) {
2224       if (PropagateShadow) {
2225         const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
2226         setOrigin(
2227             &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
2228       } else {
2229         setOrigin(&I, getCleanOrigin());
2230       }
2231     }
2232   }
2233 
2234   /// Instrument StoreInst
2235   ///
2236   /// Stores the corresponding shadow and (optionally) origin.
2237   /// Optionally, checks that the store address is fully defined.
2238   void visitStoreInst(StoreInst &I) {
2239     StoreList.push_back(&I);
2240     if (ClCheckAccessAddress)
2241       insertShadowCheck(I.getPointerOperand(), &I);
2242   }
2243 
2244   void handleCASOrRMW(Instruction &I) {
2245     assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
2246 
2247     IRBuilder<> IRB(&I);
2248     Value *Addr = I.getOperand(0);
2249     Value *Val = I.getOperand(1);
2250     Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, getShadowTy(Val), Align(1),
2251                                           /*isStore*/ true)
2252                            .first;
2253 
2254     if (ClCheckAccessAddress)
2255       insertShadowCheck(Addr, &I);
2256 
2257     // Only test the conditional argument of cmpxchg instruction.
2258     // The other argument can potentially be uninitialized, but we can not
2259     // detect this situation reliably without possible false positives.
2260     if (isa<AtomicCmpXchgInst>(I))
2261       insertShadowCheck(Val, &I);
2262 
2263     IRB.CreateStore(getCleanShadow(Val), ShadowPtr);
2264 
2265     setShadow(&I, getCleanShadow(&I));
2266     setOrigin(&I, getCleanOrigin());
2267   }
2268 
2269   void visitAtomicRMWInst(AtomicRMWInst &I) {
2270     handleCASOrRMW(I);
2271     I.setOrdering(addReleaseOrdering(I.getOrdering()));
2272   }
2273 
2274   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2275     handleCASOrRMW(I);
2276     I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
2277   }
2278 
2279   // Vector manipulation.
2280   void visitExtractElementInst(ExtractElementInst &I) {
2281     insertShadowCheck(I.getOperand(1), &I);
2282     IRBuilder<> IRB(&I);
2283     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
2284                                            "_msprop"));
2285     setOrigin(&I, getOrigin(&I, 0));
2286   }
2287 
2288   void visitInsertElementInst(InsertElementInst &I) {
2289     insertShadowCheck(I.getOperand(2), &I);
2290     IRBuilder<> IRB(&I);
2291     auto *Shadow0 = getShadow(&I, 0);
2292     auto *Shadow1 = getShadow(&I, 1);
2293     setShadow(&I, IRB.CreateInsertElement(Shadow0, Shadow1, I.getOperand(2),
2294                                           "_msprop"));
2295     setOriginForNaryOp(I);
2296   }
2297 
2298   void visitShuffleVectorInst(ShuffleVectorInst &I) {
2299     IRBuilder<> IRB(&I);
2300     auto *Shadow0 = getShadow(&I, 0);
2301     auto *Shadow1 = getShadow(&I, 1);
2302     setShadow(&I, IRB.CreateShuffleVector(Shadow0, Shadow1, I.getShuffleMask(),
2303                                           "_msprop"));
2304     setOriginForNaryOp(I);
2305   }
2306 
2307   // Casts.
2308   void visitSExtInst(SExtInst &I) {
2309     IRBuilder<> IRB(&I);
2310     setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
2311     setOrigin(&I, getOrigin(&I, 0));
2312   }
2313 
2314   void visitZExtInst(ZExtInst &I) {
2315     IRBuilder<> IRB(&I);
2316     setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
2317     setOrigin(&I, getOrigin(&I, 0));
2318   }
2319 
2320   void visitTruncInst(TruncInst &I) {
2321     IRBuilder<> IRB(&I);
2322     setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
2323     setOrigin(&I, getOrigin(&I, 0));
2324   }
2325 
2326   void visitBitCastInst(BitCastInst &I) {
2327     // Special case: if this is the bitcast (there is exactly 1 allowed) between
2328     // a musttail call and a ret, don't instrument. New instructions are not
2329     // allowed after a musttail call.
2330     if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
2331       if (CI->isMustTailCall())
2332         return;
2333     IRBuilder<> IRB(&I);
2334     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
2335     setOrigin(&I, getOrigin(&I, 0));
2336   }
2337 
2338   void visitPtrToIntInst(PtrToIntInst &I) {
2339     IRBuilder<> IRB(&I);
2340     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2341                                     "_msprop_ptrtoint"));
2342     setOrigin(&I, getOrigin(&I, 0));
2343   }
2344 
2345   void visitIntToPtrInst(IntToPtrInst &I) {
2346     IRBuilder<> IRB(&I);
2347     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2348                                     "_msprop_inttoptr"));
2349     setOrigin(&I, getOrigin(&I, 0));
2350   }
2351 
2352   void visitFPToSIInst(CastInst &I) { handleShadowOr(I); }
2353   void visitFPToUIInst(CastInst &I) { handleShadowOr(I); }
2354   void visitSIToFPInst(CastInst &I) { handleShadowOr(I); }
2355   void visitUIToFPInst(CastInst &I) { handleShadowOr(I); }
2356   void visitFPExtInst(CastInst &I) { handleShadowOr(I); }
2357   void visitFPTruncInst(CastInst &I) { handleShadowOr(I); }
2358 
2359   /// Propagate shadow for bitwise AND.
2360   ///
2361   /// This code is exact, i.e. if, for example, a bit in the left argument
2362   /// is defined and 0, then neither the value not definedness of the
2363   /// corresponding bit in B don't affect the resulting shadow.
2364   void visitAnd(BinaryOperator &I) {
2365     IRBuilder<> IRB(&I);
2366     //  "And" of 0 and a poisoned value results in unpoisoned value.
2367     //  1&1 => 1;     0&1 => 0;     p&1 => p;
2368     //  1&0 => 0;     0&0 => 0;     p&0 => 0;
2369     //  1&p => p;     0&p => 0;     p&p => p;
2370     //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
2371     Value *S1 = getShadow(&I, 0);
2372     Value *S2 = getShadow(&I, 1);
2373     Value *V1 = I.getOperand(0);
2374     Value *V2 = I.getOperand(1);
2375     if (V1->getType() != S1->getType()) {
2376       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2377       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2378     }
2379     Value *S1S2 = IRB.CreateAnd(S1, S2);
2380     Value *V1S2 = IRB.CreateAnd(V1, S2);
2381     Value *S1V2 = IRB.CreateAnd(S1, V2);
2382     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2383     setOriginForNaryOp(I);
2384   }
2385 
2386   void visitOr(BinaryOperator &I) {
2387     IRBuilder<> IRB(&I);
2388     //  "Or" of 1 and a poisoned value results in unpoisoned value.
2389     //  1|1 => 1;     0|1 => 1;     p|1 => 1;
2390     //  1|0 => 1;     0|0 => 0;     p|0 => p;
2391     //  1|p => 1;     0|p => p;     p|p => p;
2392     //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
2393     Value *S1 = getShadow(&I, 0);
2394     Value *S2 = getShadow(&I, 1);
2395     Value *V1 = IRB.CreateNot(I.getOperand(0));
2396     Value *V2 = IRB.CreateNot(I.getOperand(1));
2397     if (V1->getType() != S1->getType()) {
2398       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2399       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2400     }
2401     Value *S1S2 = IRB.CreateAnd(S1, S2);
2402     Value *V1S2 = IRB.CreateAnd(V1, S2);
2403     Value *S1V2 = IRB.CreateAnd(S1, V2);
2404     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2405     setOriginForNaryOp(I);
2406   }
2407 
2408   /// Default propagation of shadow and/or origin.
2409   ///
2410   /// This class implements the general case of shadow propagation, used in all
2411   /// cases where we don't know and/or don't care about what the operation
2412   /// actually does. It converts all input shadow values to a common type
2413   /// (extending or truncating as necessary), and bitwise OR's them.
2414   ///
2415   /// This is much cheaper than inserting checks (i.e. requiring inputs to be
2416   /// fully initialized), and less prone to false positives.
2417   ///
2418   /// This class also implements the general case of origin propagation. For a
2419   /// Nary operation, result origin is set to the origin of an argument that is
2420   /// not entirely initialized. If there is more than one such arguments, the
2421   /// rightmost of them is picked. It does not matter which one is picked if all
2422   /// arguments are initialized.
2423   template <bool CombineShadow> class Combiner {
2424     Value *Shadow = nullptr;
2425     Value *Origin = nullptr;
2426     IRBuilder<> &IRB;
2427     MemorySanitizerVisitor *MSV;
2428 
2429   public:
2430     Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
2431         : IRB(IRB), MSV(MSV) {}
2432 
2433     /// Add a pair of shadow and origin values to the mix.
2434     Combiner &Add(Value *OpShadow, Value *OpOrigin) {
2435       if (CombineShadow) {
2436         assert(OpShadow);
2437         if (!Shadow)
2438           Shadow = OpShadow;
2439         else {
2440           OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
2441           Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
2442         }
2443       }
2444 
2445       if (MSV->MS.TrackOrigins) {
2446         assert(OpOrigin);
2447         if (!Origin) {
2448           Origin = OpOrigin;
2449         } else {
2450           Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
2451           // No point in adding something that might result in 0 origin value.
2452           if (!ConstOrigin || !ConstOrigin->isNullValue()) {
2453             Value *Cond = MSV->convertToBool(OpShadow, IRB);
2454             Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2455           }
2456         }
2457       }
2458       return *this;
2459     }
2460 
2461     /// Add an application value to the mix.
2462     Combiner &Add(Value *V) {
2463       Value *OpShadow = MSV->getShadow(V);
2464       Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
2465       return Add(OpShadow, OpOrigin);
2466     }
2467 
2468     /// Set the current combined values as the given instruction's shadow
2469     /// and origin.
2470     void Done(Instruction *I) {
2471       if (CombineShadow) {
2472         assert(Shadow);
2473         Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
2474         MSV->setShadow(I, Shadow);
2475       }
2476       if (MSV->MS.TrackOrigins) {
2477         assert(Origin);
2478         MSV->setOrigin(I, Origin);
2479       }
2480     }
2481   };
2482 
2483   using ShadowAndOriginCombiner = Combiner<true>;
2484   using OriginCombiner = Combiner<false>;
2485 
2486   /// Propagate origin for arbitrary operation.
2487   void setOriginForNaryOp(Instruction &I) {
2488     if (!MS.TrackOrigins)
2489       return;
2490     IRBuilder<> IRB(&I);
2491     OriginCombiner OC(this, IRB);
2492     for (Use &Op : I.operands())
2493       OC.Add(Op.get());
2494     OC.Done(&I);
2495   }
2496 
2497   size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
2498     assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
2499            "Vector of pointers is not a valid shadow type");
2500     return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() *
2501                                   Ty->getScalarSizeInBits()
2502                             : Ty->getPrimitiveSizeInBits();
2503   }
2504 
2505   /// Cast between two shadow types, extending or truncating as
2506   /// necessary.
2507   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
2508                           bool Signed = false) {
2509     Type *srcTy = V->getType();
2510     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
2511     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
2512     if (srcSizeInBits > 1 && dstSizeInBits == 1)
2513       return IRB.CreateICmpNE(V, getCleanShadow(V));
2514 
2515     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
2516       return IRB.CreateIntCast(V, dstTy, Signed);
2517     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
2518         cast<VectorType>(dstTy)->getElementCount() ==
2519             cast<VectorType>(srcTy)->getElementCount())
2520       return IRB.CreateIntCast(V, dstTy, Signed);
2521     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
2522     Value *V2 =
2523         IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
2524     return IRB.CreateBitCast(V2, dstTy);
2525     // TODO: handle struct types.
2526   }
2527 
2528   /// Cast an application value to the type of its own shadow.
2529   Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
2530     Type *ShadowTy = getShadowTy(V);
2531     if (V->getType() == ShadowTy)
2532       return V;
2533     if (V->getType()->isPtrOrPtrVectorTy())
2534       return IRB.CreatePtrToInt(V, ShadowTy);
2535     else
2536       return IRB.CreateBitCast(V, ShadowTy);
2537   }
2538 
2539   /// Propagate shadow for arbitrary operation.
2540   void handleShadowOr(Instruction &I) {
2541     IRBuilder<> IRB(&I);
2542     ShadowAndOriginCombiner SC(this, IRB);
2543     for (Use &Op : I.operands())
2544       SC.Add(Op.get());
2545     SC.Done(&I);
2546   }
2547 
2548   void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
2549 
2550   // Handle multiplication by constant.
2551   //
2552   // Handle a special case of multiplication by constant that may have one or
2553   // more zeros in the lower bits. This makes corresponding number of lower bits
2554   // of the result zero as well. We model it by shifting the other operand
2555   // shadow left by the required number of bits. Effectively, we transform
2556   // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
2557   // We use multiplication by 2**N instead of shift to cover the case of
2558   // multiplication by 0, which may occur in some elements of a vector operand.
2559   void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
2560                            Value *OtherArg) {
2561     Constant *ShadowMul;
2562     Type *Ty = ConstArg->getType();
2563     if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2564       unsigned NumElements = cast<FixedVectorType>(VTy)->getNumElements();
2565       Type *EltTy = VTy->getElementType();
2566       SmallVector<Constant *, 16> Elements;
2567       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
2568         if (ConstantInt *Elt =
2569                 dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
2570           const APInt &V = Elt->getValue();
2571           APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
2572           Elements.push_back(ConstantInt::get(EltTy, V2));
2573         } else {
2574           Elements.push_back(ConstantInt::get(EltTy, 1));
2575         }
2576       }
2577       ShadowMul = ConstantVector::get(Elements);
2578     } else {
2579       if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
2580         const APInt &V = Elt->getValue();
2581         APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
2582         ShadowMul = ConstantInt::get(Ty, V2);
2583       } else {
2584         ShadowMul = ConstantInt::get(Ty, 1);
2585       }
2586     }
2587 
2588     IRBuilder<> IRB(&I);
2589     setShadow(&I,
2590               IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
2591     setOrigin(&I, getOrigin(OtherArg));
2592   }
2593 
2594   void visitMul(BinaryOperator &I) {
2595     Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
2596     Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
2597     if (constOp0 && !constOp1)
2598       handleMulByConstant(I, constOp0, I.getOperand(1));
2599     else if (constOp1 && !constOp0)
2600       handleMulByConstant(I, constOp1, I.getOperand(0));
2601     else
2602       handleShadowOr(I);
2603   }
2604 
2605   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
2606   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
2607   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
2608   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
2609   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
2610   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
2611 
2612   void handleIntegerDiv(Instruction &I) {
2613     IRBuilder<> IRB(&I);
2614     // Strict on the second argument.
2615     insertShadowCheck(I.getOperand(1), &I);
2616     setShadow(&I, getShadow(&I, 0));
2617     setOrigin(&I, getOrigin(&I, 0));
2618   }
2619 
2620   void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2621   void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2622   void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
2623   void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
2624 
2625   // Floating point division is side-effect free. We can not require that the
2626   // divisor is fully initialized and must propagate shadow. See PR37523.
2627   void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
2628   void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
2629 
2630   /// Instrument == and != comparisons.
2631   ///
2632   /// Sometimes the comparison result is known even if some of the bits of the
2633   /// arguments are not.
2634   void handleEqualityComparison(ICmpInst &I) {
2635     IRBuilder<> IRB(&I);
2636     Value *A = I.getOperand(0);
2637     Value *B = I.getOperand(1);
2638     Value *Sa = getShadow(A);
2639     Value *Sb = getShadow(B);
2640 
2641     // Get rid of pointers and vectors of pointers.
2642     // For ints (and vectors of ints), types of A and Sa match,
2643     // and this is a no-op.
2644     A = IRB.CreatePointerCast(A, Sa->getType());
2645     B = IRB.CreatePointerCast(B, Sb->getType());
2646 
2647     // A == B  <==>  (C = A^B) == 0
2648     // A != B  <==>  (C = A^B) != 0
2649     // Sc = Sa | Sb
2650     Value *C = IRB.CreateXor(A, B);
2651     Value *Sc = IRB.CreateOr(Sa, Sb);
2652     // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
2653     // Result is defined if one of the following is true
2654     // * there is a defined 1 bit in C
2655     // * C is fully defined
2656     // Si = !(C & ~Sc) && Sc
2657     Value *Zero = Constant::getNullValue(Sc->getType());
2658     Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
2659     Value *LHS = IRB.CreateICmpNE(Sc, Zero);
2660     Value *RHS =
2661         IRB.CreateICmpEQ(IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero);
2662     Value *Si = IRB.CreateAnd(LHS, RHS);
2663     Si->setName("_msprop_icmp");
2664     setShadow(&I, Si);
2665     setOriginForNaryOp(I);
2666   }
2667 
2668   /// Build the lowest possible value of V, taking into account V's
2669   ///        uninitialized bits.
2670   Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2671                                 bool isSigned) {
2672     if (isSigned) {
2673       // Split shadow into sign bit and other bits.
2674       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2675       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2676       // Maximise the undefined shadow bit, minimize other undefined bits.
2677       return IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)),
2678                           SaSignBit);
2679     } else {
2680       // Minimize undefined bits.
2681       return IRB.CreateAnd(A, IRB.CreateNot(Sa));
2682     }
2683   }
2684 
2685   /// Build the highest possible value of V, taking into account V's
2686   ///        uninitialized bits.
2687   Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2688                                  bool isSigned) {
2689     if (isSigned) {
2690       // Split shadow into sign bit and other bits.
2691       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2692       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2693       // Minimise the undefined shadow bit, maximise other undefined bits.
2694       return IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)),
2695                           SaOtherBits);
2696     } else {
2697       // Maximize undefined bits.
2698       return IRB.CreateOr(A, Sa);
2699     }
2700   }
2701 
2702   /// Instrument relational comparisons.
2703   ///
2704   /// This function does exact shadow propagation for all relational
2705   /// comparisons of integers, pointers and vectors of those.
2706   /// FIXME: output seems suboptimal when one of the operands is a constant
2707   void handleRelationalComparisonExact(ICmpInst &I) {
2708     IRBuilder<> IRB(&I);
2709     Value *A = I.getOperand(0);
2710     Value *B = I.getOperand(1);
2711     Value *Sa = getShadow(A);
2712     Value *Sb = getShadow(B);
2713 
2714     // Get rid of pointers and vectors of pointers.
2715     // For ints (and vectors of ints), types of A and Sa match,
2716     // and this is a no-op.
2717     A = IRB.CreatePointerCast(A, Sa->getType());
2718     B = IRB.CreatePointerCast(B, Sb->getType());
2719 
2720     // Let [a0, a1] be the interval of possible values of A, taking into account
2721     // its undefined bits. Let [b0, b1] be the interval of possible values of B.
2722     // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
2723     bool IsSigned = I.isSigned();
2724     Value *S1 = IRB.CreateICmp(I.getPredicate(),
2725                                getLowestPossibleValue(IRB, A, Sa, IsSigned),
2726                                getHighestPossibleValue(IRB, B, Sb, IsSigned));
2727     Value *S2 = IRB.CreateICmp(I.getPredicate(),
2728                                getHighestPossibleValue(IRB, A, Sa, IsSigned),
2729                                getLowestPossibleValue(IRB, B, Sb, IsSigned));
2730     Value *Si = IRB.CreateXor(S1, S2);
2731     setShadow(&I, Si);
2732     setOriginForNaryOp(I);
2733   }
2734 
2735   /// Instrument signed relational comparisons.
2736   ///
2737   /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
2738   /// bit of the shadow. Everything else is delegated to handleShadowOr().
2739   void handleSignedRelationalComparison(ICmpInst &I) {
2740     Constant *constOp;
2741     Value *op = nullptr;
2742     CmpInst::Predicate pre;
2743     if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
2744       op = I.getOperand(0);
2745       pre = I.getPredicate();
2746     } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
2747       op = I.getOperand(1);
2748       pre = I.getSwappedPredicate();
2749     } else {
2750       handleShadowOr(I);
2751       return;
2752     }
2753 
2754     if ((constOp->isNullValue() &&
2755          (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
2756         (constOp->isAllOnesValue() &&
2757          (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
2758       IRBuilder<> IRB(&I);
2759       Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
2760                                         "_msprop_icmp_s");
2761       setShadow(&I, Shadow);
2762       setOrigin(&I, getOrigin(op));
2763     } else {
2764       handleShadowOr(I);
2765     }
2766   }
2767 
2768   void visitICmpInst(ICmpInst &I) {
2769     if (!ClHandleICmp) {
2770       handleShadowOr(I);
2771       return;
2772     }
2773     if (I.isEquality()) {
2774       handleEqualityComparison(I);
2775       return;
2776     }
2777 
2778     assert(I.isRelational());
2779     if (ClHandleICmpExact) {
2780       handleRelationalComparisonExact(I);
2781       return;
2782     }
2783     if (I.isSigned()) {
2784       handleSignedRelationalComparison(I);
2785       return;
2786     }
2787 
2788     assert(I.isUnsigned());
2789     if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
2790       handleRelationalComparisonExact(I);
2791       return;
2792     }
2793 
2794     handleShadowOr(I);
2795   }
2796 
2797   void visitFCmpInst(FCmpInst &I) { handleShadowOr(I); }
2798 
2799   void handleShift(BinaryOperator &I) {
2800     IRBuilder<> IRB(&I);
2801     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2802     // Otherwise perform the same shift on S1.
2803     Value *S1 = getShadow(&I, 0);
2804     Value *S2 = getShadow(&I, 1);
2805     Value *S2Conv =
2806         IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)), S2->getType());
2807     Value *V2 = I.getOperand(1);
2808     Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
2809     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2810     setOriginForNaryOp(I);
2811   }
2812 
2813   void visitShl(BinaryOperator &I) { handleShift(I); }
2814   void visitAShr(BinaryOperator &I) { handleShift(I); }
2815   void visitLShr(BinaryOperator &I) { handleShift(I); }
2816 
2817   void handleFunnelShift(IntrinsicInst &I) {
2818     IRBuilder<> IRB(&I);
2819     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2820     // Otherwise perform the same shift on S0 and S1.
2821     Value *S0 = getShadow(&I, 0);
2822     Value *S1 = getShadow(&I, 1);
2823     Value *S2 = getShadow(&I, 2);
2824     Value *S2Conv =
2825         IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)), S2->getType());
2826     Value *V2 = I.getOperand(2);
2827     Function *Intrin = Intrinsic::getDeclaration(
2828         I.getModule(), I.getIntrinsicID(), S2Conv->getType());
2829     Value *Shift = IRB.CreateCall(Intrin, {S0, S1, V2});
2830     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2831     setOriginForNaryOp(I);
2832   }
2833 
2834   /// Instrument llvm.memmove
2835   ///
2836   /// At this point we don't know if llvm.memmove will be inlined or not.
2837   /// If we don't instrument it and it gets inlined,
2838   /// our interceptor will not kick in and we will lose the memmove.
2839   /// If we instrument the call here, but it does not get inlined,
2840   /// we will memove the shadow twice: which is bad in case
2841   /// of overlapping regions. So, we simply lower the intrinsic to a call.
2842   ///
2843   /// Similar situation exists for memcpy and memset.
2844   void visitMemMoveInst(MemMoveInst &I) {
2845     getShadow(I.getArgOperand(1)); // Ensure shadow initialized
2846     IRBuilder<> IRB(&I);
2847     IRB.CreateCall(
2848         MS.MemmoveFn,
2849         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2850          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2851          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2852     I.eraseFromParent();
2853   }
2854 
2855   /// Instrument memcpy
2856   ///
2857   /// Similar to memmove: avoid copying shadow twice. This is somewhat
2858   /// unfortunate as it may slowdown small constant memcpys.
2859   /// FIXME: consider doing manual inline for small constant sizes and proper
2860   /// alignment.
2861   ///
2862   /// Note: This also handles memcpy.inline, which promises no calls to external
2863   /// functions as an optimization. However, with instrumentation enabled this
2864   /// is difficult to promise; additionally, we know that the MSan runtime
2865   /// exists and provides __msan_memcpy(). Therefore, we assume that with
2866   /// instrumentation it's safe to turn memcpy.inline into a call to
2867   /// __msan_memcpy(). Should this be wrong, such as when implementing memcpy()
2868   /// itself, instrumentation should be disabled with the no_sanitize attribute.
2869   void visitMemCpyInst(MemCpyInst &I) {
2870     getShadow(I.getArgOperand(1)); // Ensure shadow initialized
2871     IRBuilder<> IRB(&I);
2872     IRB.CreateCall(
2873         MS.MemcpyFn,
2874         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2875          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2876          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2877     I.eraseFromParent();
2878   }
2879 
2880   // Same as memcpy.
2881   void visitMemSetInst(MemSetInst &I) {
2882     IRBuilder<> IRB(&I);
2883     IRB.CreateCall(
2884         MS.MemsetFn,
2885         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2886          IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
2887          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2888     I.eraseFromParent();
2889   }
2890 
2891   void visitVAStartInst(VAStartInst &I) { VAHelper->visitVAStartInst(I); }
2892 
2893   void visitVACopyInst(VACopyInst &I) { VAHelper->visitVACopyInst(I); }
2894 
2895   /// Handle vector store-like intrinsics.
2896   ///
2897   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
2898   /// has 1 pointer argument and 1 vector argument, returns void.
2899   bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
2900     IRBuilder<> IRB(&I);
2901     Value *Addr = I.getArgOperand(0);
2902     Value *Shadow = getShadow(&I, 1);
2903     Value *ShadowPtr, *OriginPtr;
2904 
2905     // We don't know the pointer alignment (could be unaligned SSE store!).
2906     // Have to assume to worst case.
2907     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
2908         Addr, IRB, Shadow->getType(), Align(1), /*isStore*/ true);
2909     IRB.CreateAlignedStore(Shadow, ShadowPtr, Align(1));
2910 
2911     if (ClCheckAccessAddress)
2912       insertShadowCheck(Addr, &I);
2913 
2914     // FIXME: factor out common code from materializeStores
2915     if (MS.TrackOrigins)
2916       IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
2917     return true;
2918   }
2919 
2920   /// Handle vector load-like intrinsics.
2921   ///
2922   /// Instrument intrinsics that look like a simple SIMD load: reads memory,
2923   /// has 1 pointer argument, returns a vector.
2924   bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
2925     IRBuilder<> IRB(&I);
2926     Value *Addr = I.getArgOperand(0);
2927 
2928     Type *ShadowTy = getShadowTy(&I);
2929     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2930     if (PropagateShadow) {
2931       // We don't know the pointer alignment (could be unaligned SSE load!).
2932       // Have to assume to worst case.
2933       const Align Alignment = Align(1);
2934       std::tie(ShadowPtr, OriginPtr) =
2935           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2936       setShadow(&I,
2937                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2938     } else {
2939       setShadow(&I, getCleanShadow(&I));
2940     }
2941 
2942     if (ClCheckAccessAddress)
2943       insertShadowCheck(Addr, &I);
2944 
2945     if (MS.TrackOrigins) {
2946       if (PropagateShadow)
2947         setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
2948       else
2949         setOrigin(&I, getCleanOrigin());
2950     }
2951     return true;
2952   }
2953 
2954   /// Handle (SIMD arithmetic)-like intrinsics.
2955   ///
2956   /// Instrument intrinsics with any number of arguments of the same type,
2957   /// equal to the return type. The type should be simple (no aggregates or
2958   /// pointers; vectors are fine).
2959   /// Caller guarantees that this intrinsic does not access memory.
2960   bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
2961     Type *RetTy = I.getType();
2962     if (!(RetTy->isIntOrIntVectorTy() || RetTy->isFPOrFPVectorTy() ||
2963           RetTy->isX86_MMXTy()))
2964       return false;
2965 
2966     unsigned NumArgOperands = I.arg_size();
2967     for (unsigned i = 0; i < NumArgOperands; ++i) {
2968       Type *Ty = I.getArgOperand(i)->getType();
2969       if (Ty != RetTy)
2970         return false;
2971     }
2972 
2973     IRBuilder<> IRB(&I);
2974     ShadowAndOriginCombiner SC(this, IRB);
2975     for (unsigned i = 0; i < NumArgOperands; ++i)
2976       SC.Add(I.getArgOperand(i));
2977     SC.Done(&I);
2978 
2979     return true;
2980   }
2981 
2982   /// Heuristically instrument unknown intrinsics.
2983   ///
2984   /// The main purpose of this code is to do something reasonable with all
2985   /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
2986   /// We recognize several classes of intrinsics by their argument types and
2987   /// ModRefBehaviour and apply special instrumentation when we are reasonably
2988   /// sure that we know what the intrinsic does.
2989   ///
2990   /// We special-case intrinsics where this approach fails. See llvm.bswap
2991   /// handling as an example of that.
2992   bool handleUnknownIntrinsic(IntrinsicInst &I) {
2993     unsigned NumArgOperands = I.arg_size();
2994     if (NumArgOperands == 0)
2995       return false;
2996 
2997     if (NumArgOperands == 2 && I.getArgOperand(0)->getType()->isPointerTy() &&
2998         I.getArgOperand(1)->getType()->isVectorTy() &&
2999         I.getType()->isVoidTy() && !I.onlyReadsMemory()) {
3000       // This looks like a vector store.
3001       return handleVectorStoreIntrinsic(I);
3002     }
3003 
3004     if (NumArgOperands == 1 && I.getArgOperand(0)->getType()->isPointerTy() &&
3005         I.getType()->isVectorTy() && I.onlyReadsMemory()) {
3006       // This looks like a vector load.
3007       return handleVectorLoadIntrinsic(I);
3008     }
3009 
3010     if (I.doesNotAccessMemory())
3011       if (maybeHandleSimpleNomemIntrinsic(I))
3012         return true;
3013 
3014     // FIXME: detect and handle SSE maskstore/maskload
3015     return false;
3016   }
3017 
3018   void handleInvariantGroup(IntrinsicInst &I) {
3019     setShadow(&I, getShadow(&I, 0));
3020     setOrigin(&I, getOrigin(&I, 0));
3021   }
3022 
3023   void handleLifetimeStart(IntrinsicInst &I) {
3024     if (!PoisonStack)
3025       return;
3026     AllocaInst *AI = llvm::findAllocaForValue(I.getArgOperand(1));
3027     if (!AI)
3028       InstrumentLifetimeStart = false;
3029     LifetimeStartList.push_back(std::make_pair(&I, AI));
3030   }
3031 
3032   void handleBswap(IntrinsicInst &I) {
3033     IRBuilder<> IRB(&I);
3034     Value *Op = I.getArgOperand(0);
3035     Type *OpType = Op->getType();
3036     Function *BswapFunc = Intrinsic::getDeclaration(
3037         F.getParent(), Intrinsic::bswap, ArrayRef(&OpType, 1));
3038     setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
3039     setOrigin(&I, getOrigin(Op));
3040   }
3041 
3042   void handleCountZeroes(IntrinsicInst &I) {
3043     IRBuilder<> IRB(&I);
3044     Value *Src = I.getArgOperand(0);
3045 
3046     // Set the Output shadow based on input Shadow
3047     Value *BoolShadow = IRB.CreateIsNotNull(getShadow(Src), "_mscz_bs");
3048 
3049     // If zero poison is requested, mix in with the shadow
3050     Constant *IsZeroPoison = cast<Constant>(I.getOperand(1));
3051     if (!IsZeroPoison->isZeroValue()) {
3052       Value *BoolZeroPoison = IRB.CreateIsNull(Src, "_mscz_bzp");
3053       BoolShadow = IRB.CreateOr(BoolShadow, BoolZeroPoison, "_mscz_bs");
3054     }
3055 
3056     Value *OutputShadow =
3057         IRB.CreateSExt(BoolShadow, getShadowTy(Src), "_mscz_os");
3058 
3059     setShadow(&I, OutputShadow);
3060     setOriginForNaryOp(I);
3061   }
3062 
3063   // Instrument vector convert intrinsic.
3064   //
3065   // This function instruments intrinsics like cvtsi2ss:
3066   // %Out = int_xxx_cvtyyy(%ConvertOp)
3067   // or
3068   // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
3069   // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
3070   // number \p Out elements, and (if has 2 arguments) copies the rest of the
3071   // elements from \p CopyOp.
3072   // In most cases conversion involves floating-point value which may trigger a
3073   // hardware exception when not fully initialized. For this reason we require
3074   // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
3075   // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
3076   // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
3077   // return a fully initialized value.
3078   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements,
3079                                     bool HasRoundingMode = false) {
3080     IRBuilder<> IRB(&I);
3081     Value *CopyOp, *ConvertOp;
3082 
3083     assert((!HasRoundingMode ||
3084             isa<ConstantInt>(I.getArgOperand(I.arg_size() - 1))) &&
3085            "Invalid rounding mode");
3086 
3087     switch (I.arg_size() - HasRoundingMode) {
3088     case 2:
3089       CopyOp = I.getArgOperand(0);
3090       ConvertOp = I.getArgOperand(1);
3091       break;
3092     case 1:
3093       ConvertOp = I.getArgOperand(0);
3094       CopyOp = nullptr;
3095       break;
3096     default:
3097       llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
3098     }
3099 
3100     // The first *NumUsedElements* elements of ConvertOp are converted to the
3101     // same number of output elements. The rest of the output is copied from
3102     // CopyOp, or (if not available) filled with zeroes.
3103     // Combine shadow for elements of ConvertOp that are used in this operation,
3104     // and insert a check.
3105     // FIXME: consider propagating shadow of ConvertOp, at least in the case of
3106     // int->any conversion.
3107     Value *ConvertShadow = getShadow(ConvertOp);
3108     Value *AggShadow = nullptr;
3109     if (ConvertOp->getType()->isVectorTy()) {
3110       AggShadow = IRB.CreateExtractElement(
3111           ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
3112       for (int i = 1; i < NumUsedElements; ++i) {
3113         Value *MoreShadow = IRB.CreateExtractElement(
3114             ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
3115         AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
3116       }
3117     } else {
3118       AggShadow = ConvertShadow;
3119     }
3120     assert(AggShadow->getType()->isIntegerTy());
3121     insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
3122 
3123     // Build result shadow by zero-filling parts of CopyOp shadow that come from
3124     // ConvertOp.
3125     if (CopyOp) {
3126       assert(CopyOp->getType() == I.getType());
3127       assert(CopyOp->getType()->isVectorTy());
3128       Value *ResultShadow = getShadow(CopyOp);
3129       Type *EltTy = cast<VectorType>(ResultShadow->getType())->getElementType();
3130       for (int i = 0; i < NumUsedElements; ++i) {
3131         ResultShadow = IRB.CreateInsertElement(
3132             ResultShadow, ConstantInt::getNullValue(EltTy),
3133             ConstantInt::get(IRB.getInt32Ty(), i));
3134       }
3135       setShadow(&I, ResultShadow);
3136       setOrigin(&I, getOrigin(CopyOp));
3137     } else {
3138       setShadow(&I, getCleanShadow(&I));
3139       setOrigin(&I, getCleanOrigin());
3140     }
3141   }
3142 
3143   // Given a scalar or vector, extract lower 64 bits (or less), and return all
3144   // zeroes if it is zero, and all ones otherwise.
3145   Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
3146     if (S->getType()->isVectorTy())
3147       S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
3148     assert(S->getType()->getPrimitiveSizeInBits() <= 64);
3149     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
3150     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
3151   }
3152 
3153   // Given a vector, extract its first element, and return all
3154   // zeroes if it is zero, and all ones otherwise.
3155   Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
3156     Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
3157     Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
3158     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
3159   }
3160 
3161   Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
3162     Type *T = S->getType();
3163     assert(T->isVectorTy());
3164     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
3165     return IRB.CreateSExt(S2, T);
3166   }
3167 
3168   // Instrument vector shift intrinsic.
3169   //
3170   // This function instruments intrinsics like int_x86_avx2_psll_w.
3171   // Intrinsic shifts %In by %ShiftSize bits.
3172   // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
3173   // size, and the rest is ignored. Behavior is defined even if shift size is
3174   // greater than register (or field) width.
3175   void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
3176     assert(I.arg_size() == 2);
3177     IRBuilder<> IRB(&I);
3178     // If any of the S2 bits are poisoned, the whole thing is poisoned.
3179     // Otherwise perform the same shift on S1.
3180     Value *S1 = getShadow(&I, 0);
3181     Value *S2 = getShadow(&I, 1);
3182     Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
3183                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
3184     Value *V1 = I.getOperand(0);
3185     Value *V2 = I.getOperand(1);
3186     Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
3187                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
3188     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
3189     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
3190     setOriginForNaryOp(I);
3191   }
3192 
3193   // Get an X86_MMX-sized vector type.
3194   Type *getMMXVectorTy(unsigned EltSizeInBits) {
3195     const unsigned X86_MMXSizeInBits = 64;
3196     assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
3197            "Illegal MMX vector element size");
3198     return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
3199                                 X86_MMXSizeInBits / EltSizeInBits);
3200   }
3201 
3202   // Returns a signed counterpart for an (un)signed-saturate-and-pack
3203   // intrinsic.
3204   Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
3205     switch (id) {
3206     case Intrinsic::x86_sse2_packsswb_128:
3207     case Intrinsic::x86_sse2_packuswb_128:
3208       return Intrinsic::x86_sse2_packsswb_128;
3209 
3210     case Intrinsic::x86_sse2_packssdw_128:
3211     case Intrinsic::x86_sse41_packusdw:
3212       return Intrinsic::x86_sse2_packssdw_128;
3213 
3214     case Intrinsic::x86_avx2_packsswb:
3215     case Intrinsic::x86_avx2_packuswb:
3216       return Intrinsic::x86_avx2_packsswb;
3217 
3218     case Intrinsic::x86_avx2_packssdw:
3219     case Intrinsic::x86_avx2_packusdw:
3220       return Intrinsic::x86_avx2_packssdw;
3221 
3222     case Intrinsic::x86_mmx_packsswb:
3223     case Intrinsic::x86_mmx_packuswb:
3224       return Intrinsic::x86_mmx_packsswb;
3225 
3226     case Intrinsic::x86_mmx_packssdw:
3227       return Intrinsic::x86_mmx_packssdw;
3228     default:
3229       llvm_unreachable("unexpected intrinsic id");
3230     }
3231   }
3232 
3233   // Instrument vector pack intrinsic.
3234   //
3235   // This function instruments intrinsics like x86_mmx_packsswb, that
3236   // packs elements of 2 input vectors into half as many bits with saturation.
3237   // Shadow is propagated with the signed variant of the same intrinsic applied
3238   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
3239   // EltSizeInBits is used only for x86mmx arguments.
3240   void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
3241     assert(I.arg_size() == 2);
3242     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
3243     IRBuilder<> IRB(&I);
3244     Value *S1 = getShadow(&I, 0);
3245     Value *S2 = getShadow(&I, 1);
3246     assert(isX86_MMX || S1->getType()->isVectorTy());
3247 
3248     // SExt and ICmpNE below must apply to individual elements of input vectors.
3249     // In case of x86mmx arguments, cast them to appropriate vector types and
3250     // back.
3251     Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
3252     if (isX86_MMX) {
3253       S1 = IRB.CreateBitCast(S1, T);
3254       S2 = IRB.CreateBitCast(S2, T);
3255     }
3256     Value *S1_ext =
3257         IRB.CreateSExt(IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
3258     Value *S2_ext =
3259         IRB.CreateSExt(IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
3260     if (isX86_MMX) {
3261       Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
3262       S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
3263       S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
3264     }
3265 
3266     Function *ShadowFn = Intrinsic::getDeclaration(
3267         F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
3268 
3269     Value *S =
3270         IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
3271     if (isX86_MMX)
3272       S = IRB.CreateBitCast(S, getShadowTy(&I));
3273     setShadow(&I, S);
3274     setOriginForNaryOp(I);
3275   }
3276 
3277   // Instrument sum-of-absolute-differences intrinsic.
3278   void handleVectorSadIntrinsic(IntrinsicInst &I) {
3279     const unsigned SignificantBitsPerResultElement = 16;
3280     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
3281     Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
3282     unsigned ZeroBitsPerResultElement =
3283         ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
3284 
3285     IRBuilder<> IRB(&I);
3286     auto *Shadow0 = getShadow(&I, 0);
3287     auto *Shadow1 = getShadow(&I, 1);
3288     Value *S = IRB.CreateOr(Shadow0, Shadow1);
3289     S = IRB.CreateBitCast(S, ResTy);
3290     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
3291                        ResTy);
3292     S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
3293     S = IRB.CreateBitCast(S, getShadowTy(&I));
3294     setShadow(&I, S);
3295     setOriginForNaryOp(I);
3296   }
3297 
3298   // Instrument multiply-add intrinsic.
3299   void handleVectorPmaddIntrinsic(IntrinsicInst &I,
3300                                   unsigned EltSizeInBits = 0) {
3301     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
3302     Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
3303     IRBuilder<> IRB(&I);
3304     auto *Shadow0 = getShadow(&I, 0);
3305     auto *Shadow1 = getShadow(&I, 1);
3306     Value *S = IRB.CreateOr(Shadow0, Shadow1);
3307     S = IRB.CreateBitCast(S, ResTy);
3308     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
3309                        ResTy);
3310     S = IRB.CreateBitCast(S, getShadowTy(&I));
3311     setShadow(&I, S);
3312     setOriginForNaryOp(I);
3313   }
3314 
3315   // Instrument compare-packed intrinsic.
3316   // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
3317   // all-ones shadow.
3318   void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
3319     IRBuilder<> IRB(&I);
3320     Type *ResTy = getShadowTy(&I);
3321     auto *Shadow0 = getShadow(&I, 0);
3322     auto *Shadow1 = getShadow(&I, 1);
3323     Value *S0 = IRB.CreateOr(Shadow0, Shadow1);
3324     Value *S = IRB.CreateSExt(
3325         IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
3326     setShadow(&I, S);
3327     setOriginForNaryOp(I);
3328   }
3329 
3330   // Instrument compare-scalar intrinsic.
3331   // This handles both cmp* intrinsics which return the result in the first
3332   // element of a vector, and comi* which return the result as i32.
3333   void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
3334     IRBuilder<> IRB(&I);
3335     auto *Shadow0 = getShadow(&I, 0);
3336     auto *Shadow1 = getShadow(&I, 1);
3337     Value *S0 = IRB.CreateOr(Shadow0, Shadow1);
3338     Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
3339     setShadow(&I, S);
3340     setOriginForNaryOp(I);
3341   }
3342 
3343   // Instrument generic vector reduction intrinsics
3344   // by ORing together all their fields.
3345   void handleVectorReduceIntrinsic(IntrinsicInst &I) {
3346     IRBuilder<> IRB(&I);
3347     Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
3348     setShadow(&I, S);
3349     setOrigin(&I, getOrigin(&I, 0));
3350   }
3351 
3352   // Instrument vector.reduce.or intrinsic.
3353   // Valid (non-poisoned) set bits in the operand pull low the
3354   // corresponding shadow bits.
3355   void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
3356     IRBuilder<> IRB(&I);
3357     Value *OperandShadow = getShadow(&I, 0);
3358     Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0));
3359     Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow);
3360     // Bit N is clean if any field's bit N is 1 and unpoison
3361     Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison);
3362     // Otherwise, it is clean if every field's bit N is unpoison
3363     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3364     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3365 
3366     setShadow(&I, S);
3367     setOrigin(&I, getOrigin(&I, 0));
3368   }
3369 
3370   // Instrument vector.reduce.and intrinsic.
3371   // Valid (non-poisoned) unset bits in the operand pull down the
3372   // corresponding shadow bits.
3373   void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
3374     IRBuilder<> IRB(&I);
3375     Value *OperandShadow = getShadow(&I, 0);
3376     Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow);
3377     // Bit N is clean if any field's bit N is 0 and unpoison
3378     Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison);
3379     // Otherwise, it is clean if every field's bit N is unpoison
3380     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3381     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3382 
3383     setShadow(&I, S);
3384     setOrigin(&I, getOrigin(&I, 0));
3385   }
3386 
3387   void handleStmxcsr(IntrinsicInst &I) {
3388     IRBuilder<> IRB(&I);
3389     Value *Addr = I.getArgOperand(0);
3390     Type *Ty = IRB.getInt32Ty();
3391     Value *ShadowPtr =
3392         getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first;
3393 
3394     IRB.CreateStore(getCleanShadow(Ty),
3395                     IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
3396 
3397     if (ClCheckAccessAddress)
3398       insertShadowCheck(Addr, &I);
3399   }
3400 
3401   void handleLdmxcsr(IntrinsicInst &I) {
3402     if (!InsertChecks)
3403       return;
3404 
3405     IRBuilder<> IRB(&I);
3406     Value *Addr = I.getArgOperand(0);
3407     Type *Ty = IRB.getInt32Ty();
3408     const Align Alignment = Align(1);
3409     Value *ShadowPtr, *OriginPtr;
3410     std::tie(ShadowPtr, OriginPtr) =
3411         getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
3412 
3413     if (ClCheckAccessAddress)
3414       insertShadowCheck(Addr, &I);
3415 
3416     Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
3417     Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
3418                                     : getCleanOrigin();
3419     insertShadowCheck(Shadow, Origin, &I);
3420   }
3421 
3422   void handleMaskedExpandLoad(IntrinsicInst &I) {
3423     IRBuilder<> IRB(&I);
3424     Value *Ptr = I.getArgOperand(0);
3425     Value *Mask = I.getArgOperand(1);
3426     Value *PassThru = I.getArgOperand(2);
3427 
3428     if (ClCheckAccessAddress) {
3429       insertShadowCheck(Ptr, &I);
3430       insertShadowCheck(Mask, &I);
3431     }
3432 
3433     if (!PropagateShadow) {
3434       setShadow(&I, getCleanShadow(&I));
3435       setOrigin(&I, getCleanOrigin());
3436       return;
3437     }
3438 
3439     Type *ShadowTy = getShadowTy(&I);
3440     Type *ElementShadowTy = cast<VectorType>(ShadowTy)->getElementType();
3441     auto [ShadowPtr, OriginPtr] =
3442         getShadowOriginPtr(Ptr, IRB, ElementShadowTy, {}, /*isStore*/ false);
3443 
3444     Value *Shadow = IRB.CreateMaskedExpandLoad(
3445         ShadowTy, ShadowPtr, Mask, getShadow(PassThru), "_msmaskedexpload");
3446 
3447     setShadow(&I, Shadow);
3448 
3449     // TODO: Store origins.
3450     setOrigin(&I, getCleanOrigin());
3451   }
3452 
3453   void handleMaskedCompressStore(IntrinsicInst &I) {
3454     IRBuilder<> IRB(&I);
3455     Value *Values = I.getArgOperand(0);
3456     Value *Ptr = I.getArgOperand(1);
3457     Value *Mask = I.getArgOperand(2);
3458 
3459     if (ClCheckAccessAddress) {
3460       insertShadowCheck(Ptr, &I);
3461       insertShadowCheck(Mask, &I);
3462     }
3463 
3464     Value *Shadow = getShadow(Values);
3465     Type *ElementShadowTy =
3466         getShadowTy(cast<VectorType>(Values->getType())->getElementType());
3467     auto [ShadowPtr, OriginPtrs] =
3468         getShadowOriginPtr(Ptr, IRB, ElementShadowTy, {}, /*isStore*/ true);
3469 
3470     IRB.CreateMaskedCompressStore(Shadow, ShadowPtr, Mask);
3471 
3472     // TODO: Store origins.
3473   }
3474 
3475   void handleMaskedGather(IntrinsicInst &I) {
3476     IRBuilder<> IRB(&I);
3477     Value *Ptrs = I.getArgOperand(0);
3478     const Align Alignment(
3479         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3480     Value *Mask = I.getArgOperand(2);
3481     Value *PassThru = I.getArgOperand(3);
3482 
3483     Type *PtrsShadowTy = getShadowTy(Ptrs);
3484     if (ClCheckAccessAddress) {
3485       insertShadowCheck(Mask, &I);
3486       Value *MaskedPtrShadow = IRB.CreateSelect(
3487           Mask, getShadow(Ptrs), Constant::getNullValue((PtrsShadowTy)),
3488           "_msmaskedptrs");
3489       insertShadowCheck(MaskedPtrShadow, getOrigin(Ptrs), &I);
3490     }
3491 
3492     if (!PropagateShadow) {
3493       setShadow(&I, getCleanShadow(&I));
3494       setOrigin(&I, getCleanOrigin());
3495       return;
3496     }
3497 
3498     Type *ShadowTy = getShadowTy(&I);
3499     Type *ElementShadowTy = cast<VectorType>(ShadowTy)->getElementType();
3500     auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
3501         Ptrs, IRB, ElementShadowTy, Alignment, /*isStore*/ false);
3502 
3503     Value *Shadow =
3504         IRB.CreateMaskedGather(ShadowTy, ShadowPtrs, Alignment, Mask,
3505                                getShadow(PassThru), "_msmaskedgather");
3506 
3507     setShadow(&I, Shadow);
3508 
3509     // TODO: Store origins.
3510     setOrigin(&I, getCleanOrigin());
3511   }
3512 
3513   void handleMaskedScatter(IntrinsicInst &I) {
3514     IRBuilder<> IRB(&I);
3515     Value *Values = I.getArgOperand(0);
3516     Value *Ptrs = I.getArgOperand(1);
3517     const Align Alignment(
3518         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
3519     Value *Mask = I.getArgOperand(3);
3520 
3521     Type *PtrsShadowTy = getShadowTy(Ptrs);
3522     if (ClCheckAccessAddress) {
3523       insertShadowCheck(Mask, &I);
3524       Value *MaskedPtrShadow = IRB.CreateSelect(
3525           Mask, getShadow(Ptrs), Constant::getNullValue((PtrsShadowTy)),
3526           "_msmaskedptrs");
3527       insertShadowCheck(MaskedPtrShadow, getOrigin(Ptrs), &I);
3528     }
3529 
3530     Value *Shadow = getShadow(Values);
3531     Type *ElementShadowTy =
3532         getShadowTy(cast<VectorType>(Values->getType())->getElementType());
3533     auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
3534         Ptrs, IRB, ElementShadowTy, Alignment, /*isStore*/ true);
3535 
3536     IRB.CreateMaskedScatter(Shadow, ShadowPtrs, Alignment, Mask);
3537 
3538     // TODO: Store origin.
3539   }
3540 
3541   void handleMaskedStore(IntrinsicInst &I) {
3542     IRBuilder<> IRB(&I);
3543     Value *V = I.getArgOperand(0);
3544     Value *Ptr = I.getArgOperand(1);
3545     const Align Alignment(
3546         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
3547     Value *Mask = I.getArgOperand(3);
3548     Value *Shadow = getShadow(V);
3549 
3550     if (ClCheckAccessAddress) {
3551       insertShadowCheck(Ptr, &I);
3552       insertShadowCheck(Mask, &I);
3553     }
3554 
3555     Value *ShadowPtr;
3556     Value *OriginPtr;
3557     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
3558         Ptr, IRB, Shadow->getType(), Alignment, /*isStore*/ true);
3559 
3560     IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment, Mask);
3561 
3562     if (!MS.TrackOrigins)
3563       return;
3564 
3565     auto &DL = F.getParent()->getDataLayout();
3566     paintOrigin(IRB, getOrigin(V), OriginPtr,
3567                 DL.getTypeStoreSize(Shadow->getType()),
3568                 std::max(Alignment, kMinOriginAlignment));
3569   }
3570 
3571   void handleMaskedLoad(IntrinsicInst &I) {
3572     IRBuilder<> IRB(&I);
3573     Value *Ptr = I.getArgOperand(0);
3574     const Align Alignment(
3575         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3576     Value *Mask = I.getArgOperand(2);
3577     Value *PassThru = I.getArgOperand(3);
3578 
3579     if (ClCheckAccessAddress) {
3580       insertShadowCheck(Ptr, &I);
3581       insertShadowCheck(Mask, &I);
3582     }
3583 
3584     if (!PropagateShadow) {
3585       setShadow(&I, getCleanShadow(&I));
3586       setOrigin(&I, getCleanOrigin());
3587       return;
3588     }
3589 
3590     Type *ShadowTy = getShadowTy(&I);
3591     Value *ShadowPtr, *OriginPtr;
3592     std::tie(ShadowPtr, OriginPtr) =
3593         getShadowOriginPtr(Ptr, IRB, ShadowTy, Alignment, /*isStore*/ false);
3594     setShadow(&I, IRB.CreateMaskedLoad(ShadowTy, ShadowPtr, Alignment, Mask,
3595                                        getShadow(PassThru), "_msmaskedld"));
3596 
3597     if (!MS.TrackOrigins)
3598       return;
3599 
3600     // Choose between PassThru's and the loaded value's origins.
3601     Value *MaskedPassThruShadow = IRB.CreateAnd(
3602         getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
3603 
3604     Value *NotNull = convertToBool(MaskedPassThruShadow, IRB, "_mscmp");
3605 
3606     Value *PtrOrigin = IRB.CreateLoad(MS.OriginTy, OriginPtr);
3607     Value *Origin = IRB.CreateSelect(NotNull, getOrigin(PassThru), PtrOrigin);
3608 
3609     setOrigin(&I, Origin);
3610   }
3611 
3612   // Instrument BMI / BMI2 intrinsics.
3613   // All of these intrinsics are Z = I(X, Y)
3614   // where the types of all operands and the result match, and are either i32 or
3615   // i64. The following instrumentation happens to work for all of them:
3616   //   Sz = I(Sx, Y) | (sext (Sy != 0))
3617   void handleBmiIntrinsic(IntrinsicInst &I) {
3618     IRBuilder<> IRB(&I);
3619     Type *ShadowTy = getShadowTy(&I);
3620 
3621     // If any bit of the mask operand is poisoned, then the whole thing is.
3622     Value *SMask = getShadow(&I, 1);
3623     SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
3624                            ShadowTy);
3625     // Apply the same intrinsic to the shadow of the first operand.
3626     Value *S = IRB.CreateCall(I.getCalledFunction(),
3627                               {getShadow(&I, 0), I.getOperand(1)});
3628     S = IRB.CreateOr(SMask, S);
3629     setShadow(&I, S);
3630     setOriginForNaryOp(I);
3631   }
3632 
3633   SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
3634     SmallVector<int, 8> Mask;
3635     for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
3636       Mask.append(2, X);
3637     }
3638     return Mask;
3639   }
3640 
3641   // Instrument pclmul intrinsics.
3642   // These intrinsics operate either on odd or on even elements of the input
3643   // vectors, depending on the constant in the 3rd argument, ignoring the rest.
3644   // Replace the unused elements with copies of the used ones, ex:
3645   //   (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
3646   // or
3647   //   (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
3648   // and then apply the usual shadow combining logic.
3649   void handlePclmulIntrinsic(IntrinsicInst &I) {
3650     IRBuilder<> IRB(&I);
3651     unsigned Width =
3652         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3653     assert(isa<ConstantInt>(I.getArgOperand(2)) &&
3654            "pclmul 3rd operand must be a constant");
3655     unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3656     Value *Shuf0 = IRB.CreateShuffleVector(getShadow(&I, 0),
3657                                            getPclmulMask(Width, Imm & 0x01));
3658     Value *Shuf1 = IRB.CreateShuffleVector(getShadow(&I, 1),
3659                                            getPclmulMask(Width, Imm & 0x10));
3660     ShadowAndOriginCombiner SOC(this, IRB);
3661     SOC.Add(Shuf0, getOrigin(&I, 0));
3662     SOC.Add(Shuf1, getOrigin(&I, 1));
3663     SOC.Done(&I);
3664   }
3665 
3666   // Instrument _mm_*_sd|ss intrinsics
3667   void handleUnarySdSsIntrinsic(IntrinsicInst &I) {
3668     IRBuilder<> IRB(&I);
3669     unsigned Width =
3670         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3671     Value *First = getShadow(&I, 0);
3672     Value *Second = getShadow(&I, 1);
3673     // First element of second operand, remaining elements of first operand
3674     SmallVector<int, 16> Mask;
3675     Mask.push_back(Width);
3676     for (unsigned i = 1; i < Width; i++)
3677       Mask.push_back(i);
3678     Value *Shadow = IRB.CreateShuffleVector(First, Second, Mask);
3679 
3680     setShadow(&I, Shadow);
3681     setOriginForNaryOp(I);
3682   }
3683 
3684   void handleVtestIntrinsic(IntrinsicInst &I) {
3685     IRBuilder<> IRB(&I);
3686     Value *Shadow0 = getShadow(&I, 0);
3687     Value *Shadow1 = getShadow(&I, 1);
3688     Value *Or = IRB.CreateOr(Shadow0, Shadow1);
3689     Value *NZ = IRB.CreateICmpNE(Or, Constant::getNullValue(Or->getType()));
3690     Value *Scalar = convertShadowToScalar(NZ, IRB);
3691     Value *Shadow = IRB.CreateZExt(Scalar, getShadowTy(&I));
3692 
3693     setShadow(&I, Shadow);
3694     setOriginForNaryOp(I);
3695   }
3696 
3697   void handleBinarySdSsIntrinsic(IntrinsicInst &I) {
3698     IRBuilder<> IRB(&I);
3699     unsigned Width =
3700         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3701     Value *First = getShadow(&I, 0);
3702     Value *Second = getShadow(&I, 1);
3703     Value *OrShadow = IRB.CreateOr(First, Second);
3704     // First element of both OR'd together, remaining elements of first operand
3705     SmallVector<int, 16> Mask;
3706     Mask.push_back(Width);
3707     for (unsigned i = 1; i < Width; i++)
3708       Mask.push_back(i);
3709     Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, Mask);
3710 
3711     setShadow(&I, Shadow);
3712     setOriginForNaryOp(I);
3713   }
3714 
3715   // Instrument abs intrinsic.
3716   // handleUnknownIntrinsic can't handle it because of the last
3717   // is_int_min_poison argument which does not match the result type.
3718   void handleAbsIntrinsic(IntrinsicInst &I) {
3719     assert(I.getType()->isIntOrIntVectorTy());
3720     assert(I.getArgOperand(0)->getType() == I.getType());
3721 
3722     // FIXME: Handle is_int_min_poison.
3723     IRBuilder<> IRB(&I);
3724     setShadow(&I, getShadow(&I, 0));
3725     setOrigin(&I, getOrigin(&I, 0));
3726   }
3727 
3728   void handleIsFpClass(IntrinsicInst &I) {
3729     IRBuilder<> IRB(&I);
3730     Value *Shadow = getShadow(&I, 0);
3731     setShadow(&I, IRB.CreateICmpNE(Shadow, getCleanShadow(Shadow)));
3732     setOrigin(&I, getOrigin(&I, 0));
3733   }
3734 
3735   void visitIntrinsicInst(IntrinsicInst &I) {
3736     switch (I.getIntrinsicID()) {
3737     case Intrinsic::abs:
3738       handleAbsIntrinsic(I);
3739       break;
3740     case Intrinsic::is_fpclass:
3741       handleIsFpClass(I);
3742       break;
3743     case Intrinsic::lifetime_start:
3744       handleLifetimeStart(I);
3745       break;
3746     case Intrinsic::launder_invariant_group:
3747     case Intrinsic::strip_invariant_group:
3748       handleInvariantGroup(I);
3749       break;
3750     case Intrinsic::bswap:
3751       handleBswap(I);
3752       break;
3753     case Intrinsic::ctlz:
3754     case Intrinsic::cttz:
3755       handleCountZeroes(I);
3756       break;
3757     case Intrinsic::masked_compressstore:
3758       handleMaskedCompressStore(I);
3759       break;
3760     case Intrinsic::masked_expandload:
3761       handleMaskedExpandLoad(I);
3762       break;
3763     case Intrinsic::masked_gather:
3764       handleMaskedGather(I);
3765       break;
3766     case Intrinsic::masked_scatter:
3767       handleMaskedScatter(I);
3768       break;
3769     case Intrinsic::masked_store:
3770       handleMaskedStore(I);
3771       break;
3772     case Intrinsic::masked_load:
3773       handleMaskedLoad(I);
3774       break;
3775     case Intrinsic::vector_reduce_and:
3776       handleVectorReduceAndIntrinsic(I);
3777       break;
3778     case Intrinsic::vector_reduce_or:
3779       handleVectorReduceOrIntrinsic(I);
3780       break;
3781     case Intrinsic::vector_reduce_add:
3782     case Intrinsic::vector_reduce_xor:
3783     case Intrinsic::vector_reduce_mul:
3784       handleVectorReduceIntrinsic(I);
3785       break;
3786     case Intrinsic::x86_sse_stmxcsr:
3787       handleStmxcsr(I);
3788       break;
3789     case Intrinsic::x86_sse_ldmxcsr:
3790       handleLdmxcsr(I);
3791       break;
3792     case Intrinsic::x86_avx512_vcvtsd2usi64:
3793     case Intrinsic::x86_avx512_vcvtsd2usi32:
3794     case Intrinsic::x86_avx512_vcvtss2usi64:
3795     case Intrinsic::x86_avx512_vcvtss2usi32:
3796     case Intrinsic::x86_avx512_cvttss2usi64:
3797     case Intrinsic::x86_avx512_cvttss2usi:
3798     case Intrinsic::x86_avx512_cvttsd2usi64:
3799     case Intrinsic::x86_avx512_cvttsd2usi:
3800     case Intrinsic::x86_avx512_cvtusi2ss:
3801     case Intrinsic::x86_avx512_cvtusi642sd:
3802     case Intrinsic::x86_avx512_cvtusi642ss:
3803       handleVectorConvertIntrinsic(I, 1, true);
3804       break;
3805     case Intrinsic::x86_sse2_cvtsd2si64:
3806     case Intrinsic::x86_sse2_cvtsd2si:
3807     case Intrinsic::x86_sse2_cvtsd2ss:
3808     case Intrinsic::x86_sse2_cvttsd2si64:
3809     case Intrinsic::x86_sse2_cvttsd2si:
3810     case Intrinsic::x86_sse_cvtss2si64:
3811     case Intrinsic::x86_sse_cvtss2si:
3812     case Intrinsic::x86_sse_cvttss2si64:
3813     case Intrinsic::x86_sse_cvttss2si:
3814       handleVectorConvertIntrinsic(I, 1);
3815       break;
3816     case Intrinsic::x86_sse_cvtps2pi:
3817     case Intrinsic::x86_sse_cvttps2pi:
3818       handleVectorConvertIntrinsic(I, 2);
3819       break;
3820 
3821     case Intrinsic::x86_avx512_psll_w_512:
3822     case Intrinsic::x86_avx512_psll_d_512:
3823     case Intrinsic::x86_avx512_psll_q_512:
3824     case Intrinsic::x86_avx512_pslli_w_512:
3825     case Intrinsic::x86_avx512_pslli_d_512:
3826     case Intrinsic::x86_avx512_pslli_q_512:
3827     case Intrinsic::x86_avx512_psrl_w_512:
3828     case Intrinsic::x86_avx512_psrl_d_512:
3829     case Intrinsic::x86_avx512_psrl_q_512:
3830     case Intrinsic::x86_avx512_psra_w_512:
3831     case Intrinsic::x86_avx512_psra_d_512:
3832     case Intrinsic::x86_avx512_psra_q_512:
3833     case Intrinsic::x86_avx512_psrli_w_512:
3834     case Intrinsic::x86_avx512_psrli_d_512:
3835     case Intrinsic::x86_avx512_psrli_q_512:
3836     case Intrinsic::x86_avx512_psrai_w_512:
3837     case Intrinsic::x86_avx512_psrai_d_512:
3838     case Intrinsic::x86_avx512_psrai_q_512:
3839     case Intrinsic::x86_avx512_psra_q_256:
3840     case Intrinsic::x86_avx512_psra_q_128:
3841     case Intrinsic::x86_avx512_psrai_q_256:
3842     case Intrinsic::x86_avx512_psrai_q_128:
3843     case Intrinsic::x86_avx2_psll_w:
3844     case Intrinsic::x86_avx2_psll_d:
3845     case Intrinsic::x86_avx2_psll_q:
3846     case Intrinsic::x86_avx2_pslli_w:
3847     case Intrinsic::x86_avx2_pslli_d:
3848     case Intrinsic::x86_avx2_pslli_q:
3849     case Intrinsic::x86_avx2_psrl_w:
3850     case Intrinsic::x86_avx2_psrl_d:
3851     case Intrinsic::x86_avx2_psrl_q:
3852     case Intrinsic::x86_avx2_psra_w:
3853     case Intrinsic::x86_avx2_psra_d:
3854     case Intrinsic::x86_avx2_psrli_w:
3855     case Intrinsic::x86_avx2_psrli_d:
3856     case Intrinsic::x86_avx2_psrli_q:
3857     case Intrinsic::x86_avx2_psrai_w:
3858     case Intrinsic::x86_avx2_psrai_d:
3859     case Intrinsic::x86_sse2_psll_w:
3860     case Intrinsic::x86_sse2_psll_d:
3861     case Intrinsic::x86_sse2_psll_q:
3862     case Intrinsic::x86_sse2_pslli_w:
3863     case Intrinsic::x86_sse2_pslli_d:
3864     case Intrinsic::x86_sse2_pslli_q:
3865     case Intrinsic::x86_sse2_psrl_w:
3866     case Intrinsic::x86_sse2_psrl_d:
3867     case Intrinsic::x86_sse2_psrl_q:
3868     case Intrinsic::x86_sse2_psra_w:
3869     case Intrinsic::x86_sse2_psra_d:
3870     case Intrinsic::x86_sse2_psrli_w:
3871     case Intrinsic::x86_sse2_psrli_d:
3872     case Intrinsic::x86_sse2_psrli_q:
3873     case Intrinsic::x86_sse2_psrai_w:
3874     case Intrinsic::x86_sse2_psrai_d:
3875     case Intrinsic::x86_mmx_psll_w:
3876     case Intrinsic::x86_mmx_psll_d:
3877     case Intrinsic::x86_mmx_psll_q:
3878     case Intrinsic::x86_mmx_pslli_w:
3879     case Intrinsic::x86_mmx_pslli_d:
3880     case Intrinsic::x86_mmx_pslli_q:
3881     case Intrinsic::x86_mmx_psrl_w:
3882     case Intrinsic::x86_mmx_psrl_d:
3883     case Intrinsic::x86_mmx_psrl_q:
3884     case Intrinsic::x86_mmx_psra_w:
3885     case Intrinsic::x86_mmx_psra_d:
3886     case Intrinsic::x86_mmx_psrli_w:
3887     case Intrinsic::x86_mmx_psrli_d:
3888     case Intrinsic::x86_mmx_psrli_q:
3889     case Intrinsic::x86_mmx_psrai_w:
3890     case Intrinsic::x86_mmx_psrai_d:
3891       handleVectorShiftIntrinsic(I, /* Variable */ false);
3892       break;
3893     case Intrinsic::x86_avx2_psllv_d:
3894     case Intrinsic::x86_avx2_psllv_d_256:
3895     case Intrinsic::x86_avx512_psllv_d_512:
3896     case Intrinsic::x86_avx2_psllv_q:
3897     case Intrinsic::x86_avx2_psllv_q_256:
3898     case Intrinsic::x86_avx512_psllv_q_512:
3899     case Intrinsic::x86_avx2_psrlv_d:
3900     case Intrinsic::x86_avx2_psrlv_d_256:
3901     case Intrinsic::x86_avx512_psrlv_d_512:
3902     case Intrinsic::x86_avx2_psrlv_q:
3903     case Intrinsic::x86_avx2_psrlv_q_256:
3904     case Intrinsic::x86_avx512_psrlv_q_512:
3905     case Intrinsic::x86_avx2_psrav_d:
3906     case Intrinsic::x86_avx2_psrav_d_256:
3907     case Intrinsic::x86_avx512_psrav_d_512:
3908     case Intrinsic::x86_avx512_psrav_q_128:
3909     case Intrinsic::x86_avx512_psrav_q_256:
3910     case Intrinsic::x86_avx512_psrav_q_512:
3911       handleVectorShiftIntrinsic(I, /* Variable */ true);
3912       break;
3913 
3914     case Intrinsic::x86_sse2_packsswb_128:
3915     case Intrinsic::x86_sse2_packssdw_128:
3916     case Intrinsic::x86_sse2_packuswb_128:
3917     case Intrinsic::x86_sse41_packusdw:
3918     case Intrinsic::x86_avx2_packsswb:
3919     case Intrinsic::x86_avx2_packssdw:
3920     case Intrinsic::x86_avx2_packuswb:
3921     case Intrinsic::x86_avx2_packusdw:
3922       handleVectorPackIntrinsic(I);
3923       break;
3924 
3925     case Intrinsic::x86_mmx_packsswb:
3926     case Intrinsic::x86_mmx_packuswb:
3927       handleVectorPackIntrinsic(I, 16);
3928       break;
3929 
3930     case Intrinsic::x86_mmx_packssdw:
3931       handleVectorPackIntrinsic(I, 32);
3932       break;
3933 
3934     case Intrinsic::x86_mmx_psad_bw:
3935     case Intrinsic::x86_sse2_psad_bw:
3936     case Intrinsic::x86_avx2_psad_bw:
3937       handleVectorSadIntrinsic(I);
3938       break;
3939 
3940     case Intrinsic::x86_sse2_pmadd_wd:
3941     case Intrinsic::x86_avx2_pmadd_wd:
3942     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3943     case Intrinsic::x86_avx2_pmadd_ub_sw:
3944       handleVectorPmaddIntrinsic(I);
3945       break;
3946 
3947     case Intrinsic::x86_ssse3_pmadd_ub_sw:
3948       handleVectorPmaddIntrinsic(I, 8);
3949       break;
3950 
3951     case Intrinsic::x86_mmx_pmadd_wd:
3952       handleVectorPmaddIntrinsic(I, 16);
3953       break;
3954 
3955     case Intrinsic::x86_sse_cmp_ss:
3956     case Intrinsic::x86_sse2_cmp_sd:
3957     case Intrinsic::x86_sse_comieq_ss:
3958     case Intrinsic::x86_sse_comilt_ss:
3959     case Intrinsic::x86_sse_comile_ss:
3960     case Intrinsic::x86_sse_comigt_ss:
3961     case Intrinsic::x86_sse_comige_ss:
3962     case Intrinsic::x86_sse_comineq_ss:
3963     case Intrinsic::x86_sse_ucomieq_ss:
3964     case Intrinsic::x86_sse_ucomilt_ss:
3965     case Intrinsic::x86_sse_ucomile_ss:
3966     case Intrinsic::x86_sse_ucomigt_ss:
3967     case Intrinsic::x86_sse_ucomige_ss:
3968     case Intrinsic::x86_sse_ucomineq_ss:
3969     case Intrinsic::x86_sse2_comieq_sd:
3970     case Intrinsic::x86_sse2_comilt_sd:
3971     case Intrinsic::x86_sse2_comile_sd:
3972     case Intrinsic::x86_sse2_comigt_sd:
3973     case Intrinsic::x86_sse2_comige_sd:
3974     case Intrinsic::x86_sse2_comineq_sd:
3975     case Intrinsic::x86_sse2_ucomieq_sd:
3976     case Intrinsic::x86_sse2_ucomilt_sd:
3977     case Intrinsic::x86_sse2_ucomile_sd:
3978     case Intrinsic::x86_sse2_ucomigt_sd:
3979     case Intrinsic::x86_sse2_ucomige_sd:
3980     case Intrinsic::x86_sse2_ucomineq_sd:
3981       handleVectorCompareScalarIntrinsic(I);
3982       break;
3983 
3984     case Intrinsic::x86_avx_cmp_pd_256:
3985     case Intrinsic::x86_avx_cmp_ps_256:
3986     case Intrinsic::x86_sse2_cmp_pd:
3987     case Intrinsic::x86_sse_cmp_ps:
3988       handleVectorComparePackedIntrinsic(I);
3989       break;
3990 
3991     case Intrinsic::x86_bmi_bextr_32:
3992     case Intrinsic::x86_bmi_bextr_64:
3993     case Intrinsic::x86_bmi_bzhi_32:
3994     case Intrinsic::x86_bmi_bzhi_64:
3995     case Intrinsic::x86_bmi_pdep_32:
3996     case Intrinsic::x86_bmi_pdep_64:
3997     case Intrinsic::x86_bmi_pext_32:
3998     case Intrinsic::x86_bmi_pext_64:
3999       handleBmiIntrinsic(I);
4000       break;
4001 
4002     case Intrinsic::x86_pclmulqdq:
4003     case Intrinsic::x86_pclmulqdq_256:
4004     case Intrinsic::x86_pclmulqdq_512:
4005       handlePclmulIntrinsic(I);
4006       break;
4007 
4008     case Intrinsic::x86_sse41_round_sd:
4009     case Intrinsic::x86_sse41_round_ss:
4010       handleUnarySdSsIntrinsic(I);
4011       break;
4012     case Intrinsic::x86_sse2_max_sd:
4013     case Intrinsic::x86_sse_max_ss:
4014     case Intrinsic::x86_sse2_min_sd:
4015     case Intrinsic::x86_sse_min_ss:
4016       handleBinarySdSsIntrinsic(I);
4017       break;
4018 
4019     case Intrinsic::x86_avx_vtestc_pd:
4020     case Intrinsic::x86_avx_vtestc_pd_256:
4021     case Intrinsic::x86_avx_vtestc_ps:
4022     case Intrinsic::x86_avx_vtestc_ps_256:
4023     case Intrinsic::x86_avx_vtestnzc_pd:
4024     case Intrinsic::x86_avx_vtestnzc_pd_256:
4025     case Intrinsic::x86_avx_vtestnzc_ps:
4026     case Intrinsic::x86_avx_vtestnzc_ps_256:
4027     case Intrinsic::x86_avx_vtestz_pd:
4028     case Intrinsic::x86_avx_vtestz_pd_256:
4029     case Intrinsic::x86_avx_vtestz_ps:
4030     case Intrinsic::x86_avx_vtestz_ps_256:
4031     case Intrinsic::x86_avx_ptestc_256:
4032     case Intrinsic::x86_avx_ptestnzc_256:
4033     case Intrinsic::x86_avx_ptestz_256:
4034     case Intrinsic::x86_sse41_ptestc:
4035     case Intrinsic::x86_sse41_ptestnzc:
4036     case Intrinsic::x86_sse41_ptestz:
4037       handleVtestIntrinsic(I);
4038       break;
4039 
4040     case Intrinsic::fshl:
4041     case Intrinsic::fshr:
4042       handleFunnelShift(I);
4043       break;
4044 
4045     case Intrinsic::is_constant:
4046       // The result of llvm.is.constant() is always defined.
4047       setShadow(&I, getCleanShadow(&I));
4048       setOrigin(&I, getCleanOrigin());
4049       break;
4050 
4051     default:
4052       if (!handleUnknownIntrinsic(I))
4053         visitInstruction(I);
4054       break;
4055     }
4056   }
4057 
4058   void visitLibAtomicLoad(CallBase &CB) {
4059     // Since we use getNextNode here, we can't have CB terminate the BB.
4060     assert(isa<CallInst>(CB));
4061 
4062     IRBuilder<> IRB(&CB);
4063     Value *Size = CB.getArgOperand(0);
4064     Value *SrcPtr = CB.getArgOperand(1);
4065     Value *DstPtr = CB.getArgOperand(2);
4066     Value *Ordering = CB.getArgOperand(3);
4067     // Convert the call to have at least Acquire ordering to make sure
4068     // the shadow operations aren't reordered before it.
4069     Value *NewOrdering =
4070         IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
4071     CB.setArgOperand(3, NewOrdering);
4072 
4073     NextNodeIRBuilder NextIRB(&CB);
4074     Value *SrcShadowPtr, *SrcOriginPtr;
4075     std::tie(SrcShadowPtr, SrcOriginPtr) =
4076         getShadowOriginPtr(SrcPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
4077                            /*isStore*/ false);
4078     Value *DstShadowPtr =
4079         getShadowOriginPtr(DstPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
4080                            /*isStore*/ true)
4081             .first;
4082 
4083     NextIRB.CreateMemCpy(DstShadowPtr, Align(1), SrcShadowPtr, Align(1), Size);
4084     if (MS.TrackOrigins) {
4085       Value *SrcOrigin = NextIRB.CreateAlignedLoad(MS.OriginTy, SrcOriginPtr,
4086                                                    kMinOriginAlignment);
4087       Value *NewOrigin = updateOrigin(SrcOrigin, NextIRB);
4088       NextIRB.CreateCall(MS.MsanSetOriginFn, {DstPtr, Size, NewOrigin});
4089     }
4090   }
4091 
4092   void visitLibAtomicStore(CallBase &CB) {
4093     IRBuilder<> IRB(&CB);
4094     Value *Size = CB.getArgOperand(0);
4095     Value *DstPtr = CB.getArgOperand(2);
4096     Value *Ordering = CB.getArgOperand(3);
4097     // Convert the call to have at least Release ordering to make sure
4098     // the shadow operations aren't reordered after it.
4099     Value *NewOrdering =
4100         IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
4101     CB.setArgOperand(3, NewOrdering);
4102 
4103     Value *DstShadowPtr =
4104         getShadowOriginPtr(DstPtr, IRB, IRB.getInt8Ty(), Align(1),
4105                            /*isStore*/ true)
4106             .first;
4107 
4108     // Atomic store always paints clean shadow/origin. See file header.
4109     IRB.CreateMemSet(DstShadowPtr, getCleanShadow(IRB.getInt8Ty()), Size,
4110                      Align(1));
4111   }
4112 
4113   void visitCallBase(CallBase &CB) {
4114     assert(!CB.getMetadata(LLVMContext::MD_nosanitize));
4115     if (CB.isInlineAsm()) {
4116       // For inline asm (either a call to asm function, or callbr instruction),
4117       // do the usual thing: check argument shadow and mark all outputs as
4118       // clean. Note that any side effects of the inline asm that are not
4119       // immediately visible in its constraints are not handled.
4120       if (ClHandleAsmConservative && MS.CompileKernel)
4121         visitAsmInstruction(CB);
4122       else
4123         visitInstruction(CB);
4124       return;
4125     }
4126     LibFunc LF;
4127     if (TLI->getLibFunc(CB, LF)) {
4128       // libatomic.a functions need to have special handling because there isn't
4129       // a good way to intercept them or compile the library with
4130       // instrumentation.
4131       switch (LF) {
4132       case LibFunc_atomic_load:
4133         if (!isa<CallInst>(CB)) {
4134           llvm::errs() << "MSAN -- cannot instrument invoke of libatomic load."
4135                           "Ignoring!\n";
4136           break;
4137         }
4138         visitLibAtomicLoad(CB);
4139         return;
4140       case LibFunc_atomic_store:
4141         visitLibAtomicStore(CB);
4142         return;
4143       default:
4144         break;
4145       }
4146     }
4147 
4148     if (auto *Call = dyn_cast<CallInst>(&CB)) {
4149       assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
4150 
4151       // We are going to insert code that relies on the fact that the callee
4152       // will become a non-readonly function after it is instrumented by us. To
4153       // prevent this code from being optimized out, mark that function
4154       // non-readonly in advance.
4155       // TODO: We can likely do better than dropping memory() completely here.
4156       AttributeMask B;
4157       B.addAttribute(Attribute::Memory).addAttribute(Attribute::Speculatable);
4158 
4159       Call->removeFnAttrs(B);
4160       if (Function *Func = Call->getCalledFunction()) {
4161         Func->removeFnAttrs(B);
4162       }
4163 
4164       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
4165     }
4166     IRBuilder<> IRB(&CB);
4167     bool MayCheckCall = MS.EagerChecks;
4168     if (Function *Func = CB.getCalledFunction()) {
4169       // __sanitizer_unaligned_{load,store} functions may be called by users
4170       // and always expects shadows in the TLS. So don't check them.
4171       MayCheckCall &= !Func->getName().startswith("__sanitizer_unaligned_");
4172     }
4173 
4174     unsigned ArgOffset = 0;
4175     LLVM_DEBUG(dbgs() << "  CallSite: " << CB << "\n");
4176     for (const auto &[i, A] : llvm::enumerate(CB.args())) {
4177       if (!A->getType()->isSized()) {
4178         LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n");
4179         continue;
4180       }
4181       unsigned Size = 0;
4182       const DataLayout &DL = F.getParent()->getDataLayout();
4183 
4184       bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
4185       bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
4186       bool EagerCheck = MayCheckCall && !ByVal && NoUndef;
4187 
4188       if (EagerCheck) {
4189         insertShadowCheck(A, &CB);
4190         Size = DL.getTypeAllocSize(A->getType());
4191       } else {
4192         Value *Store = nullptr;
4193         // Compute the Shadow for arg even if it is ByVal, because
4194         // in that case getShadow() will copy the actual arg shadow to
4195         // __msan_param_tls.
4196         Value *ArgShadow = getShadow(A);
4197         Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
4198         LLVM_DEBUG(dbgs() << "  Arg#" << i << ": " << *A
4199                           << " Shadow: " << *ArgShadow << "\n");
4200         if (ByVal) {
4201           // ByVal requires some special handling as it's too big for a single
4202           // load
4203           assert(A->getType()->isPointerTy() &&
4204                  "ByVal argument is not a pointer!");
4205           Size = DL.getTypeAllocSize(CB.getParamByValType(i));
4206           if (ArgOffset + Size > kParamTLSSize)
4207             break;
4208           const MaybeAlign ParamAlignment(CB.getParamAlign(i));
4209           MaybeAlign Alignment = std::nullopt;
4210           if (ParamAlignment)
4211             Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
4212           Value *AShadowPtr, *AOriginPtr;
4213           std::tie(AShadowPtr, AOriginPtr) =
4214               getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
4215                                  /*isStore*/ false);
4216           if (!PropagateShadow) {
4217             Store = IRB.CreateMemSet(ArgShadowBase,
4218                                      Constant::getNullValue(IRB.getInt8Ty()),
4219                                      Size, Alignment);
4220           } else {
4221             Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
4222                                      Alignment, Size);
4223             if (MS.TrackOrigins) {
4224               Value *ArgOriginBase = getOriginPtrForArgument(A, IRB, ArgOffset);
4225               // FIXME: OriginSize should be:
4226               // alignTo(A % kMinOriginAlignment + Size, kMinOriginAlignment)
4227               unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
4228               IRB.CreateMemCpy(
4229                   ArgOriginBase,
4230                   /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
4231                   AOriginPtr,
4232                   /* by getShadowOriginPtr */ kMinOriginAlignment, OriginSize);
4233             }
4234           }
4235         } else {
4236           // Any other parameters mean we need bit-grained tracking of uninit
4237           // data
4238           Size = DL.getTypeAllocSize(A->getType());
4239           if (ArgOffset + Size > kParamTLSSize)
4240             break;
4241           Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
4242                                          kShadowTLSAlignment);
4243           Constant *Cst = dyn_cast<Constant>(ArgShadow);
4244           if (MS.TrackOrigins && !(Cst && Cst->isNullValue())) {
4245             IRB.CreateStore(getOrigin(A),
4246                             getOriginPtrForArgument(A, IRB, ArgOffset));
4247           }
4248         }
4249         (void)Store;
4250         assert(Store != nullptr);
4251         LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n");
4252       }
4253       assert(Size != 0);
4254       ArgOffset += alignTo(Size, kShadowTLSAlignment);
4255     }
4256     LLVM_DEBUG(dbgs() << "  done with call args\n");
4257 
4258     FunctionType *FT = CB.getFunctionType();
4259     if (FT->isVarArg()) {
4260       VAHelper->visitCallBase(CB, IRB);
4261     }
4262 
4263     // Now, get the shadow for the RetVal.
4264     if (!CB.getType()->isSized())
4265       return;
4266     // Don't emit the epilogue for musttail call returns.
4267     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
4268       return;
4269 
4270     if (MayCheckCall && CB.hasRetAttr(Attribute::NoUndef)) {
4271       setShadow(&CB, getCleanShadow(&CB));
4272       setOrigin(&CB, getCleanOrigin());
4273       return;
4274     }
4275 
4276     IRBuilder<> IRBBefore(&CB);
4277     // Until we have full dynamic coverage, make sure the retval shadow is 0.
4278     Value *Base = getShadowPtrForRetval(&CB, IRBBefore);
4279     IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base,
4280                                  kShadowTLSAlignment);
4281     BasicBlock::iterator NextInsn;
4282     if (isa<CallInst>(CB)) {
4283       NextInsn = ++CB.getIterator();
4284       assert(NextInsn != CB.getParent()->end());
4285     } else {
4286       BasicBlock *NormalDest = cast<InvokeInst>(CB).getNormalDest();
4287       if (!NormalDest->getSinglePredecessor()) {
4288         // FIXME: this case is tricky, so we are just conservative here.
4289         // Perhaps we need to split the edge between this BB and NormalDest,
4290         // but a naive attempt to use SplitEdge leads to a crash.
4291         setShadow(&CB, getCleanShadow(&CB));
4292         setOrigin(&CB, getCleanOrigin());
4293         return;
4294       }
4295       // FIXME: NextInsn is likely in a basic block that has not been visited
4296       // yet. Anything inserted there will be instrumented by MSan later!
4297       NextInsn = NormalDest->getFirstInsertionPt();
4298       assert(NextInsn != NormalDest->end() &&
4299              "Could not find insertion point for retval shadow load");
4300     }
4301     IRBuilder<> IRBAfter(&*NextInsn);
4302     Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
4303         getShadowTy(&CB), getShadowPtrForRetval(&CB, IRBAfter),
4304         kShadowTLSAlignment, "_msret");
4305     setShadow(&CB, RetvalShadow);
4306     if (MS.TrackOrigins)
4307       setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy,
4308                                          getOriginPtrForRetval(IRBAfter)));
4309   }
4310 
4311   bool isAMustTailRetVal(Value *RetVal) {
4312     if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
4313       RetVal = I->getOperand(0);
4314     }
4315     if (auto *I = dyn_cast<CallInst>(RetVal)) {
4316       return I->isMustTailCall();
4317     }
4318     return false;
4319   }
4320 
4321   void visitReturnInst(ReturnInst &I) {
4322     IRBuilder<> IRB(&I);
4323     Value *RetVal = I.getReturnValue();
4324     if (!RetVal)
4325       return;
4326     // Don't emit the epilogue for musttail call returns.
4327     if (isAMustTailRetVal(RetVal))
4328       return;
4329     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
4330     bool HasNoUndef = F.hasRetAttribute(Attribute::NoUndef);
4331     bool StoreShadow = !(MS.EagerChecks && HasNoUndef);
4332     // FIXME: Consider using SpecialCaseList to specify a list of functions that
4333     // must always return fully initialized values. For now, we hardcode "main".
4334     bool EagerCheck = (MS.EagerChecks && HasNoUndef) || (F.getName() == "main");
4335 
4336     Value *Shadow = getShadow(RetVal);
4337     bool StoreOrigin = true;
4338     if (EagerCheck) {
4339       insertShadowCheck(RetVal, &I);
4340       Shadow = getCleanShadow(RetVal);
4341       StoreOrigin = false;
4342     }
4343 
4344     // The caller may still expect information passed over TLS if we pass our
4345     // check
4346     if (StoreShadow) {
4347       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
4348       if (MS.TrackOrigins && StoreOrigin)
4349         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
4350     }
4351   }
4352 
4353   void visitPHINode(PHINode &I) {
4354     IRBuilder<> IRB(&I);
4355     if (!PropagateShadow) {
4356       setShadow(&I, getCleanShadow(&I));
4357       setOrigin(&I, getCleanOrigin());
4358       return;
4359     }
4360 
4361     ShadowPHINodes.push_back(&I);
4362     setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
4363                                 "_msphi_s"));
4364     if (MS.TrackOrigins)
4365       setOrigin(
4366           &I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(), "_msphi_o"));
4367   }
4368 
4369   Value *getLocalVarIdptr(AllocaInst &I) {
4370     ConstantInt *IntConst =
4371         ConstantInt::get(Type::getInt32Ty((*F.getParent()).getContext()), 0);
4372     return new GlobalVariable(*F.getParent(), IntConst->getType(),
4373                               /*isConstant=*/false, GlobalValue::PrivateLinkage,
4374                               IntConst);
4375   }
4376 
4377   Value *getLocalVarDescription(AllocaInst &I) {
4378     return createPrivateConstGlobalForString(*F.getParent(), I.getName());
4379   }
4380 
4381   void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
4382     if (PoisonStack && ClPoisonStackWithCall) {
4383       IRB.CreateCall(MS.MsanPoisonStackFn,
4384                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
4385     } else {
4386       Value *ShadowBase, *OriginBase;
4387       std::tie(ShadowBase, OriginBase) = getShadowOriginPtr(
4388           &I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true);
4389 
4390       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
4391       IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlign());
4392     }
4393 
4394     if (PoisonStack && MS.TrackOrigins) {
4395       Value *Idptr = getLocalVarIdptr(I);
4396       if (ClPrintStackNames) {
4397         Value *Descr = getLocalVarDescription(I);
4398         IRB.CreateCall(MS.MsanSetAllocaOriginWithDescriptionFn,
4399                        {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
4400                         IRB.CreatePointerCast(Idptr, IRB.getInt8PtrTy()),
4401                         IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
4402       } else {
4403         IRB.CreateCall(MS.MsanSetAllocaOriginNoDescriptionFn,
4404                        {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
4405                         IRB.CreatePointerCast(Idptr, IRB.getInt8PtrTy())});
4406       }
4407     }
4408   }
4409 
4410   void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
4411     Value *Descr = getLocalVarDescription(I);
4412     if (PoisonStack) {
4413       IRB.CreateCall(MS.MsanPoisonAllocaFn,
4414                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
4415                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
4416     } else {
4417       IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
4418                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
4419     }
4420   }
4421 
4422   void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
4423     if (!InsPoint)
4424       InsPoint = &I;
4425     NextNodeIRBuilder IRB(InsPoint);
4426     const DataLayout &DL = F.getParent()->getDataLayout();
4427     uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
4428     Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
4429     if (I.isArrayAllocation())
4430       Len = IRB.CreateMul(Len,
4431                           IRB.CreateZExtOrTrunc(I.getArraySize(), MS.IntptrTy));
4432 
4433     if (MS.CompileKernel)
4434       poisonAllocaKmsan(I, IRB, Len);
4435     else
4436       poisonAllocaUserspace(I, IRB, Len);
4437   }
4438 
4439   void visitAllocaInst(AllocaInst &I) {
4440     setShadow(&I, getCleanShadow(&I));
4441     setOrigin(&I, getCleanOrigin());
4442     // We'll get to this alloca later unless it's poisoned at the corresponding
4443     // llvm.lifetime.start.
4444     AllocaSet.insert(&I);
4445   }
4446 
4447   void visitSelectInst(SelectInst &I) {
4448     IRBuilder<> IRB(&I);
4449     // a = select b, c, d
4450     Value *B = I.getCondition();
4451     Value *C = I.getTrueValue();
4452     Value *D = I.getFalseValue();
4453     Value *Sb = getShadow(B);
4454     Value *Sc = getShadow(C);
4455     Value *Sd = getShadow(D);
4456 
4457     // Result shadow if condition shadow is 0.
4458     Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
4459     Value *Sa1;
4460     if (I.getType()->isAggregateType()) {
4461       // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
4462       // an extra "select". This results in much more compact IR.
4463       // Sa = select Sb, poisoned, (select b, Sc, Sd)
4464       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
4465     } else {
4466       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
4467       // If Sb (condition is poisoned), look for bits in c and d that are equal
4468       // and both unpoisoned.
4469       // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
4470 
4471       // Cast arguments to shadow-compatible type.
4472       C = CreateAppToShadowCast(IRB, C);
4473       D = CreateAppToShadowCast(IRB, D);
4474 
4475       // Result shadow if condition shadow is 1.
4476       Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
4477     }
4478     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
4479     setShadow(&I, Sa);
4480     if (MS.TrackOrigins) {
4481       // Origins are always i32, so any vector conditions must be flattened.
4482       // FIXME: consider tracking vector origins for app vectors?
4483       if (B->getType()->isVectorTy()) {
4484         B = convertToBool(B, IRB);
4485         Sb = convertToBool(Sb, IRB);
4486       }
4487       // a = select b, c, d
4488       // Oa = Sb ? Ob : (b ? Oc : Od)
4489       setOrigin(
4490           &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()),
4491                                IRB.CreateSelect(B, getOrigin(I.getTrueValue()),
4492                                                 getOrigin(I.getFalseValue()))));
4493     }
4494   }
4495 
4496   void visitLandingPadInst(LandingPadInst &I) {
4497     // Do nothing.
4498     // See https://github.com/google/sanitizers/issues/504
4499     setShadow(&I, getCleanShadow(&I));
4500     setOrigin(&I, getCleanOrigin());
4501   }
4502 
4503   void visitCatchSwitchInst(CatchSwitchInst &I) {
4504     setShadow(&I, getCleanShadow(&I));
4505     setOrigin(&I, getCleanOrigin());
4506   }
4507 
4508   void visitFuncletPadInst(FuncletPadInst &I) {
4509     setShadow(&I, getCleanShadow(&I));
4510     setOrigin(&I, getCleanOrigin());
4511   }
4512 
4513   void visitGetElementPtrInst(GetElementPtrInst &I) { handleShadowOr(I); }
4514 
4515   void visitExtractValueInst(ExtractValueInst &I) {
4516     IRBuilder<> IRB(&I);
4517     Value *Agg = I.getAggregateOperand();
4518     LLVM_DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
4519     Value *AggShadow = getShadow(Agg);
4520     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
4521     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
4522     LLVM_DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
4523     setShadow(&I, ResShadow);
4524     setOriginForNaryOp(I);
4525   }
4526 
4527   void visitInsertValueInst(InsertValueInst &I) {
4528     IRBuilder<> IRB(&I);
4529     LLVM_DEBUG(dbgs() << "InsertValue:  " << I << "\n");
4530     Value *AggShadow = getShadow(I.getAggregateOperand());
4531     Value *InsShadow = getShadow(I.getInsertedValueOperand());
4532     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
4533     LLVM_DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
4534     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
4535     LLVM_DEBUG(dbgs() << "   Res:        " << *Res << "\n");
4536     setShadow(&I, Res);
4537     setOriginForNaryOp(I);
4538   }
4539 
4540   void dumpInst(Instruction &I) {
4541     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
4542       errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
4543     } else {
4544       errs() << "ZZZ " << I.getOpcodeName() << "\n";
4545     }
4546     errs() << "QQQ " << I << "\n";
4547   }
4548 
4549   void visitResumeInst(ResumeInst &I) {
4550     LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
4551     // Nothing to do here.
4552   }
4553 
4554   void visitCleanupReturnInst(CleanupReturnInst &CRI) {
4555     LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
4556     // Nothing to do here.
4557   }
4558 
4559   void visitCatchReturnInst(CatchReturnInst &CRI) {
4560     LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
4561     // Nothing to do here.
4562   }
4563 
4564   void instrumentAsmArgument(Value *Operand, Type *ElemTy, Instruction &I,
4565                              IRBuilder<> &IRB, const DataLayout &DL,
4566                              bool isOutput) {
4567     // For each assembly argument, we check its value for being initialized.
4568     // If the argument is a pointer, we assume it points to a single element
4569     // of the corresponding type (or to a 8-byte word, if the type is unsized).
4570     // Each such pointer is instrumented with a call to the runtime library.
4571     Type *OpType = Operand->getType();
4572     // Check the operand value itself.
4573     insertShadowCheck(Operand, &I);
4574     if (!OpType->isPointerTy() || !isOutput) {
4575       assert(!isOutput);
4576       return;
4577     }
4578     if (!ElemTy->isSized())
4579       return;
4580     Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
4581     Value *SizeVal =
4582       IRB.CreateTypeSize(MS.IntptrTy, DL.getTypeStoreSize(ElemTy));
4583     IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
4584   }
4585 
4586   /// Get the number of output arguments returned by pointers.
4587   int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
4588     int NumRetOutputs = 0;
4589     int NumOutputs = 0;
4590     Type *RetTy = cast<Value>(CB)->getType();
4591     if (!RetTy->isVoidTy()) {
4592       // Register outputs are returned via the CallInst return value.
4593       auto *ST = dyn_cast<StructType>(RetTy);
4594       if (ST)
4595         NumRetOutputs = ST->getNumElements();
4596       else
4597         NumRetOutputs = 1;
4598     }
4599     InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
4600     for (const InlineAsm::ConstraintInfo &Info : Constraints) {
4601       switch (Info.Type) {
4602       case InlineAsm::isOutput:
4603         NumOutputs++;
4604         break;
4605       default:
4606         break;
4607       }
4608     }
4609     return NumOutputs - NumRetOutputs;
4610   }
4611 
4612   void visitAsmInstruction(Instruction &I) {
4613     // Conservative inline assembly handling: check for poisoned shadow of
4614     // asm() arguments, then unpoison the result and all the memory locations
4615     // pointed to by those arguments.
4616     // An inline asm() statement in C++ contains lists of input and output
4617     // arguments used by the assembly code. These are mapped to operands of the
4618     // CallInst as follows:
4619     //  - nR register outputs ("=r) are returned by value in a single structure
4620     //  (SSA value of the CallInst);
4621     //  - nO other outputs ("=m" and others) are returned by pointer as first
4622     // nO operands of the CallInst;
4623     //  - nI inputs ("r", "m" and others) are passed to CallInst as the
4624     // remaining nI operands.
4625     // The total number of asm() arguments in the source is nR+nO+nI, and the
4626     // corresponding CallInst has nO+nI+1 operands (the last operand is the
4627     // function to be called).
4628     const DataLayout &DL = F.getParent()->getDataLayout();
4629     CallBase *CB = cast<CallBase>(&I);
4630     IRBuilder<> IRB(&I);
4631     InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
4632     int OutputArgs = getNumOutputArgs(IA, CB);
4633     // The last operand of a CallInst is the function itself.
4634     int NumOperands = CB->getNumOperands() - 1;
4635 
4636     // Check input arguments. Doing so before unpoisoning output arguments, so
4637     // that we won't overwrite uninit values before checking them.
4638     for (int i = OutputArgs; i < NumOperands; i++) {
4639       Value *Operand = CB->getOperand(i);
4640       instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL,
4641                             /*isOutput*/ false);
4642     }
4643     // Unpoison output arguments. This must happen before the actual InlineAsm
4644     // call, so that the shadow for memory published in the asm() statement
4645     // remains valid.
4646     for (int i = 0; i < OutputArgs; i++) {
4647       Value *Operand = CB->getOperand(i);
4648       instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL,
4649                             /*isOutput*/ true);
4650     }
4651 
4652     setShadow(&I, getCleanShadow(&I));
4653     setOrigin(&I, getCleanOrigin());
4654   }
4655 
4656   void visitFreezeInst(FreezeInst &I) {
4657     // Freeze always returns a fully defined value.
4658     setShadow(&I, getCleanShadow(&I));
4659     setOrigin(&I, getCleanOrigin());
4660   }
4661 
4662   void visitInstruction(Instruction &I) {
4663     // Everything else: stop propagating and check for poisoned shadow.
4664     if (ClDumpStrictInstructions)
4665       dumpInst(I);
4666     LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
4667     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
4668       Value *Operand = I.getOperand(i);
4669       if (Operand->getType()->isSized())
4670         insertShadowCheck(Operand, &I);
4671     }
4672     setShadow(&I, getCleanShadow(&I));
4673     setOrigin(&I, getCleanOrigin());
4674   }
4675 };
4676 
4677 /// AMD64-specific implementation of VarArgHelper.
4678 struct VarArgAMD64Helper : public VarArgHelper {
4679   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
4680   // See a comment in visitCallBase for more details.
4681   static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
4682   static const unsigned AMD64FpEndOffsetSSE = 176;
4683   // If SSE is disabled, fp_offset in va_list is zero.
4684   static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
4685 
4686   unsigned AMD64FpEndOffset;
4687   Function &F;
4688   MemorySanitizer &MS;
4689   MemorySanitizerVisitor &MSV;
4690   AllocaInst *VAArgTLSCopy = nullptr;
4691   AllocaInst *VAArgTLSOriginCopy = nullptr;
4692   Value *VAArgOverflowSize = nullptr;
4693 
4694   SmallVector<CallInst *, 16> VAStartInstrumentationList;
4695 
4696   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4697 
4698   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
4699                     MemorySanitizerVisitor &MSV)
4700       : F(F), MS(MS), MSV(MSV) {
4701     AMD64FpEndOffset = AMD64FpEndOffsetSSE;
4702     for (const auto &Attr : F.getAttributes().getFnAttrs()) {
4703       if (Attr.isStringAttribute() &&
4704           (Attr.getKindAsString() == "target-features")) {
4705         if (Attr.getValueAsString().contains("-sse"))
4706           AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
4707         break;
4708       }
4709     }
4710   }
4711 
4712   ArgKind classifyArgument(Value *arg) {
4713     // A very rough approximation of X86_64 argument classification rules.
4714     Type *T = arg->getType();
4715     if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
4716       return AK_FloatingPoint;
4717     if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4718       return AK_GeneralPurpose;
4719     if (T->isPointerTy())
4720       return AK_GeneralPurpose;
4721     return AK_Memory;
4722   }
4723 
4724   // For VarArg functions, store the argument shadow in an ABI-specific format
4725   // that corresponds to va_list layout.
4726   // We do this because Clang lowers va_arg in the frontend, and this pass
4727   // only sees the low level code that deals with va_list internals.
4728   // A much easier alternative (provided that Clang emits va_arg instructions)
4729   // would have been to associate each live instance of va_list with a copy of
4730   // MSanParamTLS, and extract shadow on va_arg() call in the argument list
4731   // order.
4732   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4733     unsigned GpOffset = 0;
4734     unsigned FpOffset = AMD64GpEndOffset;
4735     unsigned OverflowOffset = AMD64FpEndOffset;
4736     const DataLayout &DL = F.getParent()->getDataLayout();
4737     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
4738       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4739       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
4740       if (IsByVal) {
4741         // ByVal arguments always go to the overflow area.
4742         // Fixed arguments passed through the overflow area will be stepped
4743         // over by va_start, so don't count them towards the offset.
4744         if (IsFixed)
4745           continue;
4746         assert(A->getType()->isPointerTy());
4747         Type *RealTy = CB.getParamByValType(ArgNo);
4748         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4749         Value *ShadowBase = getShadowPtrForVAArgument(
4750             RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
4751         Value *OriginBase = nullptr;
4752         if (MS.TrackOrigins)
4753           OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
4754         OverflowOffset += alignTo(ArgSize, 8);
4755         if (!ShadowBase)
4756           continue;
4757         Value *ShadowPtr, *OriginPtr;
4758         std::tie(ShadowPtr, OriginPtr) =
4759             MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
4760                                    /*isStore*/ false);
4761 
4762         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
4763                          kShadowTLSAlignment, ArgSize);
4764         if (MS.TrackOrigins)
4765           IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
4766                            kShadowTLSAlignment, ArgSize);
4767       } else {
4768         ArgKind AK = classifyArgument(A);
4769         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
4770           AK = AK_Memory;
4771         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
4772           AK = AK_Memory;
4773         Value *ShadowBase, *OriginBase = nullptr;
4774         switch (AK) {
4775         case AK_GeneralPurpose:
4776           ShadowBase =
4777               getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
4778           if (MS.TrackOrigins)
4779             OriginBase = getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
4780           GpOffset += 8;
4781           break;
4782         case AK_FloatingPoint:
4783           ShadowBase =
4784               getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
4785           if (MS.TrackOrigins)
4786             OriginBase = getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
4787           FpOffset += 16;
4788           break;
4789         case AK_Memory:
4790           if (IsFixed)
4791             continue;
4792           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4793           ShadowBase =
4794               getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
4795           if (MS.TrackOrigins)
4796             OriginBase =
4797                 getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
4798           OverflowOffset += alignTo(ArgSize, 8);
4799         }
4800         // Take fixed arguments into account for GpOffset and FpOffset,
4801         // but don't actually store shadows for them.
4802         // TODO(glider): don't call get*PtrForVAArgument() for them.
4803         if (IsFixed)
4804           continue;
4805         if (!ShadowBase)
4806           continue;
4807         Value *Shadow = MSV.getShadow(A);
4808         IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
4809         if (MS.TrackOrigins) {
4810           Value *Origin = MSV.getOrigin(A);
4811           TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
4812           MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
4813                           std::max(kShadowTLSAlignment, kMinOriginAlignment));
4814         }
4815       }
4816     }
4817     Constant *OverflowSize =
4818         ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
4819     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4820   }
4821 
4822   /// Compute the shadow address for a given va_arg.
4823   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4824                                    unsigned ArgOffset, unsigned ArgSize) {
4825     // Make sure we don't overflow __msan_va_arg_tls.
4826     if (ArgOffset + ArgSize > kParamTLSSize)
4827       return nullptr;
4828     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4829     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4830     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4831                               "_msarg_va_s");
4832   }
4833 
4834   /// Compute the origin address for a given va_arg.
4835   Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
4836     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
4837     // getOriginPtrForVAArgument() is always called after
4838     // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
4839     // overflow.
4840     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4841     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
4842                               "_msarg_va_o");
4843   }
4844 
4845   void unpoisonVAListTagForInst(IntrinsicInst &I) {
4846     IRBuilder<> IRB(&I);
4847     Value *VAListTag = I.getArgOperand(0);
4848     Value *ShadowPtr, *OriginPtr;
4849     const Align Alignment = Align(8);
4850     std::tie(ShadowPtr, OriginPtr) =
4851         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
4852                                /*isStore*/ true);
4853 
4854     // Unpoison the whole __va_list_tag.
4855     // FIXME: magic ABI constants.
4856     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4857                      /* size */ 24, Alignment, false);
4858     // We shouldn't need to zero out the origins, as they're only checked for
4859     // nonzero shadow.
4860   }
4861 
4862   void visitVAStartInst(VAStartInst &I) override {
4863     if (F.getCallingConv() == CallingConv::Win64)
4864       return;
4865     VAStartInstrumentationList.push_back(&I);
4866     unpoisonVAListTagForInst(I);
4867   }
4868 
4869   void visitVACopyInst(VACopyInst &I) override {
4870     if (F.getCallingConv() == CallingConv::Win64)
4871       return;
4872     unpoisonVAListTagForInst(I);
4873   }
4874 
4875   void finalizeInstrumentation() override {
4876     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4877            "finalizeInstrumentation called twice");
4878     if (!VAStartInstrumentationList.empty()) {
4879       // If there is a va_start in this function, make a backup copy of
4880       // va_arg_tls somewhere in the function entry block.
4881       IRBuilder<> IRB(MSV.FnPrologueEnd);
4882       VAArgOverflowSize =
4883           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4884       Value *CopySize = IRB.CreateAdd(
4885           ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset), VAArgOverflowSize);
4886       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4887       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
4888       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
4889                        CopySize, kShadowTLSAlignment, false);
4890 
4891       Value *SrcSize = IRB.CreateBinaryIntrinsic(
4892           Intrinsic::umin, CopySize,
4893           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
4894       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
4895                        kShadowTLSAlignment, SrcSize);
4896       if (MS.TrackOrigins) {
4897         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4898         VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
4899         IRB.CreateMemCpy(VAArgTLSOriginCopy, kShadowTLSAlignment,
4900                          MS.VAArgOriginTLS, kShadowTLSAlignment, SrcSize);
4901       }
4902     }
4903 
4904     // Instrument va_start.
4905     // Copy va_list shadow from the backup copy of the TLS contents.
4906     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4907       CallInst *OrigInst = VAStartInstrumentationList[i];
4908       NextNodeIRBuilder IRB(OrigInst);
4909       Value *VAListTag = OrigInst->getArgOperand(0);
4910 
4911       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4912       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
4913           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4914                         ConstantInt::get(MS.IntptrTy, 16)),
4915           PointerType::get(RegSaveAreaPtrTy, 0));
4916       Value *RegSaveAreaPtr =
4917           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4918       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4919       const Align Alignment = Align(16);
4920       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4921           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4922                                  Alignment, /*isStore*/ true);
4923       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4924                        AMD64FpEndOffset);
4925       if (MS.TrackOrigins)
4926         IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
4927                          Alignment, AMD64FpEndOffset);
4928       Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4929       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
4930           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4931                         ConstantInt::get(MS.IntptrTy, 8)),
4932           PointerType::get(OverflowArgAreaPtrTy, 0));
4933       Value *OverflowArgAreaPtr =
4934           IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
4935       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
4936       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
4937           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
4938                                  Alignment, /*isStore*/ true);
4939       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
4940                                              AMD64FpEndOffset);
4941       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
4942                        VAArgOverflowSize);
4943       if (MS.TrackOrigins) {
4944         SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
4945                                         AMD64FpEndOffset);
4946         IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
4947                          VAArgOverflowSize);
4948       }
4949     }
4950   }
4951 };
4952 
4953 /// MIPS64-specific implementation of VarArgHelper.
4954 struct VarArgMIPS64Helper : public VarArgHelper {
4955   Function &F;
4956   MemorySanitizer &MS;
4957   MemorySanitizerVisitor &MSV;
4958   AllocaInst *VAArgTLSCopy = nullptr;
4959   Value *VAArgSize = nullptr;
4960 
4961   SmallVector<CallInst *, 16> VAStartInstrumentationList;
4962 
4963   VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
4964                      MemorySanitizerVisitor &MSV)
4965       : F(F), MS(MS), MSV(MSV) {}
4966 
4967   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4968     unsigned VAArgOffset = 0;
4969     const DataLayout &DL = F.getParent()->getDataLayout();
4970     for (Value *A :
4971          llvm::drop_begin(CB.args(), CB.getFunctionType()->getNumParams())) {
4972       Triple TargetTriple(F.getParent()->getTargetTriple());
4973       Value *Base;
4974       uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4975       if (TargetTriple.getArch() == Triple::mips64) {
4976         // Adjusting the shadow for argument with size < 8 to match the
4977         // placement of bits in big endian system
4978         if (ArgSize < 8)
4979           VAArgOffset += (8 - ArgSize);
4980       }
4981       Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
4982       VAArgOffset += ArgSize;
4983       VAArgOffset = alignTo(VAArgOffset, 8);
4984       if (!Base)
4985         continue;
4986       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4987     }
4988 
4989     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
4990     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4991     // a new class member i.e. it is the total size of all VarArgs.
4992     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4993   }
4994 
4995   /// Compute the shadow address for a given va_arg.
4996   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4997                                    unsigned ArgOffset, unsigned ArgSize) {
4998     // Make sure we don't overflow __msan_va_arg_tls.
4999     if (ArgOffset + ArgSize > kParamTLSSize)
5000       return nullptr;
5001     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5002     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5003     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
5004                               "_msarg");
5005   }
5006 
5007   void visitVAStartInst(VAStartInst &I) override {
5008     IRBuilder<> IRB(&I);
5009     VAStartInstrumentationList.push_back(&I);
5010     Value *VAListTag = I.getArgOperand(0);
5011     Value *ShadowPtr, *OriginPtr;
5012     const Align Alignment = Align(8);
5013     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
5014         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
5015     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5016                      /* size */ 8, Alignment, false);
5017   }
5018 
5019   void visitVACopyInst(VACopyInst &I) override {
5020     IRBuilder<> IRB(&I);
5021     VAStartInstrumentationList.push_back(&I);
5022     Value *VAListTag = I.getArgOperand(0);
5023     Value *ShadowPtr, *OriginPtr;
5024     const Align Alignment = Align(8);
5025     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
5026         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
5027     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5028                      /* size */ 8, Alignment, false);
5029   }
5030 
5031   void finalizeInstrumentation() override {
5032     assert(!VAArgSize && !VAArgTLSCopy &&
5033            "finalizeInstrumentation called twice");
5034     IRBuilder<> IRB(MSV.FnPrologueEnd);
5035     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5036     Value *CopySize =
5037         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), VAArgSize);
5038 
5039     if (!VAStartInstrumentationList.empty()) {
5040       // If there is a va_start in this function, make a backup copy of
5041       // va_arg_tls somewhere in the function entry block.
5042       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5043       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5044       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5045                        CopySize, kShadowTLSAlignment, false);
5046 
5047       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5048           Intrinsic::umin, CopySize,
5049           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5050       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5051                        kShadowTLSAlignment, SrcSize);
5052     }
5053 
5054     // Instrument va_start.
5055     // Copy va_list shadow from the backup copy of the TLS contents.
5056     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
5057       CallInst *OrigInst = VAStartInstrumentationList[i];
5058       NextNodeIRBuilder IRB(OrigInst);
5059       Value *VAListTag = OrigInst->getArgOperand(0);
5060       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5061       Value *RegSaveAreaPtrPtr =
5062           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5063                              PointerType::get(RegSaveAreaPtrTy, 0));
5064       Value *RegSaveAreaPtr =
5065           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5066       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5067       const Align Alignment = Align(8);
5068       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5069           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5070                                  Alignment, /*isStore*/ true);
5071       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5072                        CopySize);
5073     }
5074   }
5075 };
5076 
5077 /// AArch64-specific implementation of VarArgHelper.
5078 struct VarArgAArch64Helper : public VarArgHelper {
5079   static const unsigned kAArch64GrArgSize = 64;
5080   static const unsigned kAArch64VrArgSize = 128;
5081 
5082   static const unsigned AArch64GrBegOffset = 0;
5083   static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
5084   // Make VR space aligned to 16 bytes.
5085   static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
5086   static const unsigned AArch64VrEndOffset =
5087       AArch64VrBegOffset + kAArch64VrArgSize;
5088   static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
5089 
5090   Function &F;
5091   MemorySanitizer &MS;
5092   MemorySanitizerVisitor &MSV;
5093   AllocaInst *VAArgTLSCopy = nullptr;
5094   Value *VAArgOverflowSize = nullptr;
5095 
5096   SmallVector<CallInst *, 16> VAStartInstrumentationList;
5097 
5098   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
5099 
5100   VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
5101                       MemorySanitizerVisitor &MSV)
5102       : F(F), MS(MS), MSV(MSV) {}
5103 
5104   ArgKind classifyArgument(Value *arg) {
5105     Type *T = arg->getType();
5106     if (T->isFPOrFPVectorTy())
5107       return AK_FloatingPoint;
5108     if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64) ||
5109         (T->isPointerTy()))
5110       return AK_GeneralPurpose;
5111     return AK_Memory;
5112   }
5113 
5114   // The instrumentation stores the argument shadow in a non ABI-specific
5115   // format because it does not know which argument is named (since Clang,
5116   // like x86_64 case, lowers the va_args in the frontend and this pass only
5117   // sees the low level code that deals with va_list internals).
5118   // The first seven GR registers are saved in the first 56 bytes of the
5119   // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
5120   // the remaining arguments.
5121   // Using constant offset within the va_arg TLS array allows fast copy
5122   // in the finalize instrumentation.
5123   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5124     unsigned GrOffset = AArch64GrBegOffset;
5125     unsigned VrOffset = AArch64VrBegOffset;
5126     unsigned OverflowOffset = AArch64VAEndOffset;
5127 
5128     const DataLayout &DL = F.getParent()->getDataLayout();
5129     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
5130       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5131       ArgKind AK = classifyArgument(A);
5132       if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
5133         AK = AK_Memory;
5134       if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
5135         AK = AK_Memory;
5136       Value *Base;
5137       switch (AK) {
5138       case AK_GeneralPurpose:
5139         Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
5140         GrOffset += 8;
5141         break;
5142       case AK_FloatingPoint:
5143         Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
5144         VrOffset += 16;
5145         break;
5146       case AK_Memory:
5147         // Don't count fixed arguments in the overflow area - va_start will
5148         // skip right over them.
5149         if (IsFixed)
5150           continue;
5151         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
5152         Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
5153                                          alignTo(ArgSize, 8));
5154         OverflowOffset += alignTo(ArgSize, 8);
5155         break;
5156       }
5157       // Count Gp/Vr fixed arguments to their respective offsets, but don't
5158       // bother to actually store a shadow.
5159       if (IsFixed)
5160         continue;
5161       if (!Base)
5162         continue;
5163       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
5164     }
5165     Constant *OverflowSize =
5166         ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
5167     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5168   }
5169 
5170   /// Compute the shadow address for a given va_arg.
5171   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
5172                                    unsigned ArgOffset, unsigned ArgSize) {
5173     // Make sure we don't overflow __msan_va_arg_tls.
5174     if (ArgOffset + ArgSize > kParamTLSSize)
5175       return nullptr;
5176     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5177     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5178     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
5179                               "_msarg");
5180   }
5181 
5182   void visitVAStartInst(VAStartInst &I) override {
5183     IRBuilder<> IRB(&I);
5184     VAStartInstrumentationList.push_back(&I);
5185     Value *VAListTag = I.getArgOperand(0);
5186     Value *ShadowPtr, *OriginPtr;
5187     const Align Alignment = Align(8);
5188     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
5189         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
5190     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5191                      /* size */ 32, Alignment, false);
5192   }
5193 
5194   void visitVACopyInst(VACopyInst &I) override {
5195     IRBuilder<> IRB(&I);
5196     VAStartInstrumentationList.push_back(&I);
5197     Value *VAListTag = I.getArgOperand(0);
5198     Value *ShadowPtr, *OriginPtr;
5199     const Align Alignment = Align(8);
5200     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
5201         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
5202     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5203                      /* size */ 32, Alignment, false);
5204   }
5205 
5206   // Retrieve a va_list field of 'void*' size.
5207   Value *getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
5208     Value *SaveAreaPtrPtr = IRB.CreateIntToPtr(
5209         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5210                       ConstantInt::get(MS.IntptrTy, offset)),
5211         Type::getInt64PtrTy(*MS.C));
5212     return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
5213   }
5214 
5215   // Retrieve a va_list field of 'int' size.
5216   Value *getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
5217     Value *SaveAreaPtr = IRB.CreateIntToPtr(
5218         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5219                       ConstantInt::get(MS.IntptrTy, offset)),
5220         Type::getInt32PtrTy(*MS.C));
5221     Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
5222     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
5223   }
5224 
5225   void finalizeInstrumentation() override {
5226     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5227            "finalizeInstrumentation called twice");
5228     if (!VAStartInstrumentationList.empty()) {
5229       // If there is a va_start in this function, make a backup copy of
5230       // va_arg_tls somewhere in the function entry block.
5231       IRBuilder<> IRB(MSV.FnPrologueEnd);
5232       VAArgOverflowSize =
5233           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5234       Value *CopySize = IRB.CreateAdd(
5235           ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset), VAArgOverflowSize);
5236       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5237       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5238       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5239                        CopySize, kShadowTLSAlignment, false);
5240 
5241       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5242           Intrinsic::umin, CopySize,
5243           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5244       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5245                        kShadowTLSAlignment, SrcSize);
5246     }
5247 
5248     Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
5249     Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
5250 
5251     // Instrument va_start, copy va_list shadow from the backup copy of
5252     // the TLS contents.
5253     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
5254       CallInst *OrigInst = VAStartInstrumentationList[i];
5255       NextNodeIRBuilder IRB(OrigInst);
5256 
5257       Value *VAListTag = OrigInst->getArgOperand(0);
5258 
5259       // The variadic ABI for AArch64 creates two areas to save the incoming
5260       // argument registers (one for 64-bit general register xn-x7 and another
5261       // for 128-bit FP/SIMD vn-v7).
5262       // We need then to propagate the shadow arguments on both regions
5263       // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
5264       // The remaining arguments are saved on shadow for 'va::stack'.
5265       // One caveat is it requires only to propagate the non-named arguments,
5266       // however on the call site instrumentation 'all' the arguments are
5267       // saved. So to copy the shadow values from the va_arg TLS array
5268       // we need to adjust the offset for both GR and VR fields based on
5269       // the __{gr,vr}_offs value (since they are stores based on incoming
5270       // named arguments).
5271       Type *RegSaveAreaPtrTy = IRB.getInt8PtrTy();
5272 
5273       // Read the stack pointer from the va_list.
5274       Value *StackSaveAreaPtr =
5275           IRB.CreateIntToPtr(getVAField64(IRB, VAListTag, 0), RegSaveAreaPtrTy);
5276 
5277       // Read both the __gr_top and __gr_off and add them up.
5278       Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
5279       Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
5280 
5281       Value *GrRegSaveAreaPtr = IRB.CreateIntToPtr(
5282           IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea), RegSaveAreaPtrTy);
5283 
5284       // Read both the __vr_top and __vr_off and add them up.
5285       Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
5286       Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
5287 
5288       Value *VrRegSaveAreaPtr = IRB.CreateIntToPtr(
5289           IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea), RegSaveAreaPtrTy);
5290 
5291       // It does not know how many named arguments is being used and, on the
5292       // callsite all the arguments were saved.  Since __gr_off is defined as
5293       // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
5294       // argument by ignoring the bytes of shadow from named arguments.
5295       Value *GrRegSaveAreaShadowPtrOff =
5296           IRB.CreateAdd(GrArgSize, GrOffSaveArea);
5297 
5298       Value *GrRegSaveAreaShadowPtr =
5299           MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5300                                  Align(8), /*isStore*/ true)
5301               .first;
5302 
5303       Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
5304                                               GrRegSaveAreaShadowPtrOff);
5305       Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
5306 
5307       IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8),
5308                        GrCopySize);
5309 
5310       // Again, but for FP/SIMD values.
5311       Value *VrRegSaveAreaShadowPtrOff =
5312           IRB.CreateAdd(VrArgSize, VrOffSaveArea);
5313 
5314       Value *VrRegSaveAreaShadowPtr =
5315           MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5316                                  Align(8), /*isStore*/ true)
5317               .first;
5318 
5319       Value *VrSrcPtr = IRB.CreateInBoundsGEP(
5320           IRB.getInt8Ty(),
5321           IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
5322                                 IRB.getInt32(AArch64VrBegOffset)),
5323           VrRegSaveAreaShadowPtrOff);
5324       Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
5325 
5326       IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, Align(8), VrSrcPtr, Align(8),
5327                        VrCopySize);
5328 
5329       // And finally for remaining arguments.
5330       Value *StackSaveAreaShadowPtr =
5331           MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
5332                                  Align(16), /*isStore*/ true)
5333               .first;
5334 
5335       Value *StackSrcPtr = IRB.CreateInBoundsGEP(
5336           IRB.getInt8Ty(), VAArgTLSCopy, IRB.getInt32(AArch64VAEndOffset));
5337 
5338       IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr,
5339                        Align(16), VAArgOverflowSize);
5340     }
5341   }
5342 };
5343 
5344 /// PowerPC64-specific implementation of VarArgHelper.
5345 struct VarArgPowerPC64Helper : public VarArgHelper {
5346   Function &F;
5347   MemorySanitizer &MS;
5348   MemorySanitizerVisitor &MSV;
5349   AllocaInst *VAArgTLSCopy = nullptr;
5350   Value *VAArgSize = nullptr;
5351 
5352   SmallVector<CallInst *, 16> VAStartInstrumentationList;
5353 
5354   VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
5355                         MemorySanitizerVisitor &MSV)
5356       : F(F), MS(MS), MSV(MSV) {}
5357 
5358   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5359     // For PowerPC, we need to deal with alignment of stack arguments -
5360     // they are mostly aligned to 8 bytes, but vectors and i128 arrays
5361     // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
5362     // For that reason, we compute current offset from stack pointer (which is
5363     // always properly aligned), and offset for the first vararg, then subtract
5364     // them.
5365     unsigned VAArgBase;
5366     Triple TargetTriple(F.getParent()->getTargetTriple());
5367     // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
5368     // and 32 bytes for ABIv2.  This is usually determined by target
5369     // endianness, but in theory could be overridden by function attribute.
5370     if (TargetTriple.getArch() == Triple::ppc64)
5371       VAArgBase = 48;
5372     else
5373       VAArgBase = 32;
5374     unsigned VAArgOffset = VAArgBase;
5375     const DataLayout &DL = F.getParent()->getDataLayout();
5376     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
5377       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5378       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
5379       if (IsByVal) {
5380         assert(A->getType()->isPointerTy());
5381         Type *RealTy = CB.getParamByValType(ArgNo);
5382         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
5383         Align ArgAlign = CB.getParamAlign(ArgNo).value_or(Align(8));
5384         if (ArgAlign < 8)
5385           ArgAlign = Align(8);
5386         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
5387         if (!IsFixed) {
5388           Value *Base = getShadowPtrForVAArgument(
5389               RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
5390           if (Base) {
5391             Value *AShadowPtr, *AOriginPtr;
5392             std::tie(AShadowPtr, AOriginPtr) =
5393                 MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
5394                                        kShadowTLSAlignment, /*isStore*/ false);
5395 
5396             IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
5397                              kShadowTLSAlignment, ArgSize);
5398           }
5399         }
5400         VAArgOffset += alignTo(ArgSize, Align(8));
5401       } else {
5402         Value *Base;
5403         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
5404         Align ArgAlign = Align(8);
5405         if (A->getType()->isArrayTy()) {
5406           // Arrays are aligned to element size, except for long double
5407           // arrays, which are aligned to 8 bytes.
5408           Type *ElementTy = A->getType()->getArrayElementType();
5409           if (!ElementTy->isPPC_FP128Ty())
5410             ArgAlign = Align(DL.getTypeAllocSize(ElementTy));
5411         } else if (A->getType()->isVectorTy()) {
5412           // Vectors are naturally aligned.
5413           ArgAlign = Align(ArgSize);
5414         }
5415         if (ArgAlign < 8)
5416           ArgAlign = Align(8);
5417         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
5418         if (DL.isBigEndian()) {
5419           // Adjusting the shadow for argument with size < 8 to match the
5420           // placement of bits in big endian system
5421           if (ArgSize < 8)
5422             VAArgOffset += (8 - ArgSize);
5423         }
5424         if (!IsFixed) {
5425           Base = getShadowPtrForVAArgument(A->getType(), IRB,
5426                                            VAArgOffset - VAArgBase, ArgSize);
5427           if (Base)
5428             IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
5429         }
5430         VAArgOffset += ArgSize;
5431         VAArgOffset = alignTo(VAArgOffset, Align(8));
5432       }
5433       if (IsFixed)
5434         VAArgBase = VAArgOffset;
5435     }
5436 
5437     Constant *TotalVAArgSize =
5438         ConstantInt::get(IRB.getInt64Ty(), VAArgOffset - VAArgBase);
5439     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
5440     // a new class member i.e. it is the total size of all VarArgs.
5441     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
5442   }
5443 
5444   /// Compute the shadow address for a given va_arg.
5445   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
5446                                    unsigned ArgOffset, unsigned ArgSize) {
5447     // Make sure we don't overflow __msan_va_arg_tls.
5448     if (ArgOffset + ArgSize > kParamTLSSize)
5449       return nullptr;
5450     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5451     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5452     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
5453                               "_msarg");
5454   }
5455 
5456   void visitVAStartInst(VAStartInst &I) override {
5457     IRBuilder<> IRB(&I);
5458     VAStartInstrumentationList.push_back(&I);
5459     Value *VAListTag = I.getArgOperand(0);
5460     Value *ShadowPtr, *OriginPtr;
5461     const Align Alignment = Align(8);
5462     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
5463         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
5464     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5465                      /* size */ 8, Alignment, false);
5466   }
5467 
5468   void visitVACopyInst(VACopyInst &I) override {
5469     IRBuilder<> IRB(&I);
5470     Value *VAListTag = I.getArgOperand(0);
5471     Value *ShadowPtr, *OriginPtr;
5472     const Align Alignment = Align(8);
5473     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
5474         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
5475     // Unpoison the whole __va_list_tag.
5476     // FIXME: magic ABI constants.
5477     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5478                      /* size */ 8, Alignment, false);
5479   }
5480 
5481   void finalizeInstrumentation() override {
5482     assert(!VAArgSize && !VAArgTLSCopy &&
5483            "finalizeInstrumentation called twice");
5484     IRBuilder<> IRB(MSV.FnPrologueEnd);
5485     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5486     Value *CopySize =
5487         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), VAArgSize);
5488 
5489     if (!VAStartInstrumentationList.empty()) {
5490       // If there is a va_start in this function, make a backup copy of
5491       // va_arg_tls somewhere in the function entry block.
5492 
5493       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5494       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5495       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5496                        CopySize, kShadowTLSAlignment, false);
5497 
5498       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5499           Intrinsic::umin, CopySize,
5500           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5501       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5502                        kShadowTLSAlignment, SrcSize);
5503     }
5504 
5505     // Instrument va_start.
5506     // Copy va_list shadow from the backup copy of the TLS contents.
5507     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
5508       CallInst *OrigInst = VAStartInstrumentationList[i];
5509       NextNodeIRBuilder IRB(OrigInst);
5510       Value *VAListTag = OrigInst->getArgOperand(0);
5511       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5512       Value *RegSaveAreaPtrPtr =
5513           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5514                              PointerType::get(RegSaveAreaPtrTy, 0));
5515       Value *RegSaveAreaPtr =
5516           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5517       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5518       const Align Alignment = Align(8);
5519       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5520           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5521                                  Alignment, /*isStore*/ true);
5522       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5523                        CopySize);
5524     }
5525   }
5526 };
5527 
5528 /// SystemZ-specific implementation of VarArgHelper.
5529 struct VarArgSystemZHelper : public VarArgHelper {
5530   static const unsigned SystemZGpOffset = 16;
5531   static const unsigned SystemZGpEndOffset = 56;
5532   static const unsigned SystemZFpOffset = 128;
5533   static const unsigned SystemZFpEndOffset = 160;
5534   static const unsigned SystemZMaxVrArgs = 8;
5535   static const unsigned SystemZRegSaveAreaSize = 160;
5536   static const unsigned SystemZOverflowOffset = 160;
5537   static const unsigned SystemZVAListTagSize = 32;
5538   static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
5539   static const unsigned SystemZRegSaveAreaPtrOffset = 24;
5540 
5541   Function &F;
5542   MemorySanitizer &MS;
5543   MemorySanitizerVisitor &MSV;
5544   bool IsSoftFloatABI;
5545   AllocaInst *VAArgTLSCopy = nullptr;
5546   AllocaInst *VAArgTLSOriginCopy = nullptr;
5547   Value *VAArgOverflowSize = nullptr;
5548 
5549   SmallVector<CallInst *, 16> VAStartInstrumentationList;
5550 
5551   enum class ArgKind {
5552     GeneralPurpose,
5553     FloatingPoint,
5554     Vector,
5555     Memory,
5556     Indirect,
5557   };
5558 
5559   enum class ShadowExtension { None, Zero, Sign };
5560 
5561   VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
5562                       MemorySanitizerVisitor &MSV)
5563       : F(F), MS(MS), MSV(MSV),
5564         IsSoftFloatABI(F.getFnAttribute("use-soft-float").getValueAsBool()) {}
5565 
5566   ArgKind classifyArgument(Type *T) {
5567     // T is a SystemZABIInfo::classifyArgumentType() output, and there are
5568     // only a few possibilities of what it can be. In particular, enums, single
5569     // element structs and large types have already been taken care of.
5570 
5571     // Some i128 and fp128 arguments are converted to pointers only in the
5572     // back end.
5573     if (T->isIntegerTy(128) || T->isFP128Ty())
5574       return ArgKind::Indirect;
5575     if (T->isFloatingPointTy())
5576       return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
5577     if (T->isIntegerTy() || T->isPointerTy())
5578       return ArgKind::GeneralPurpose;
5579     if (T->isVectorTy())
5580       return ArgKind::Vector;
5581     return ArgKind::Memory;
5582   }
5583 
5584   ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) {
5585     // ABI says: "One of the simple integer types no more than 64 bits wide.
5586     // ... If such an argument is shorter than 64 bits, replace it by a full
5587     // 64-bit integer representing the same number, using sign or zero
5588     // extension". Shadow for an integer argument has the same type as the
5589     // argument itself, so it can be sign or zero extended as well.
5590     bool ZExt = CB.paramHasAttr(ArgNo, Attribute::ZExt);
5591     bool SExt = CB.paramHasAttr(ArgNo, Attribute::SExt);
5592     if (ZExt) {
5593       assert(!SExt);
5594       return ShadowExtension::Zero;
5595     }
5596     if (SExt) {
5597       assert(!ZExt);
5598       return ShadowExtension::Sign;
5599     }
5600     return ShadowExtension::None;
5601   }
5602 
5603   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5604     unsigned GpOffset = SystemZGpOffset;
5605     unsigned FpOffset = SystemZFpOffset;
5606     unsigned VrIndex = 0;
5607     unsigned OverflowOffset = SystemZOverflowOffset;
5608     const DataLayout &DL = F.getParent()->getDataLayout();
5609     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
5610       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5611       // SystemZABIInfo does not produce ByVal parameters.
5612       assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
5613       Type *T = A->getType();
5614       ArgKind AK = classifyArgument(T);
5615       if (AK == ArgKind::Indirect) {
5616         T = PointerType::get(T, 0);
5617         AK = ArgKind::GeneralPurpose;
5618       }
5619       if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
5620         AK = ArgKind::Memory;
5621       if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
5622         AK = ArgKind::Memory;
5623       if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
5624         AK = ArgKind::Memory;
5625       Value *ShadowBase = nullptr;
5626       Value *OriginBase = nullptr;
5627       ShadowExtension SE = ShadowExtension::None;
5628       switch (AK) {
5629       case ArgKind::GeneralPurpose: {
5630         // Always keep track of GpOffset, but store shadow only for varargs.
5631         uint64_t ArgSize = 8;
5632         if (GpOffset + ArgSize <= kParamTLSSize) {
5633           if (!IsFixed) {
5634             SE = getShadowExtension(CB, ArgNo);
5635             uint64_t GapSize = 0;
5636             if (SE == ShadowExtension::None) {
5637               uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
5638               assert(ArgAllocSize <= ArgSize);
5639               GapSize = ArgSize - ArgAllocSize;
5640             }
5641             ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize);
5642             if (MS.TrackOrigins)
5643               OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize);
5644           }
5645           GpOffset += ArgSize;
5646         } else {
5647           GpOffset = kParamTLSSize;
5648         }
5649         break;
5650       }
5651       case ArgKind::FloatingPoint: {
5652         // Always keep track of FpOffset, but store shadow only for varargs.
5653         uint64_t ArgSize = 8;
5654         if (FpOffset + ArgSize <= kParamTLSSize) {
5655           if (!IsFixed) {
5656             // PoP says: "A short floating-point datum requires only the
5657             // left-most 32 bit positions of a floating-point register".
5658             // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
5659             // don't extend shadow and don't mind the gap.
5660             ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset);
5661             if (MS.TrackOrigins)
5662               OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
5663           }
5664           FpOffset += ArgSize;
5665         } else {
5666           FpOffset = kParamTLSSize;
5667         }
5668         break;
5669       }
5670       case ArgKind::Vector: {
5671         // Keep track of VrIndex. No need to store shadow, since vector varargs
5672         // go through AK_Memory.
5673         assert(IsFixed);
5674         VrIndex++;
5675         break;
5676       }
5677       case ArgKind::Memory: {
5678         // Keep track of OverflowOffset and store shadow only for varargs.
5679         // Ignore fixed args, since we need to copy only the vararg portion of
5680         // the overflow area shadow.
5681         if (!IsFixed) {
5682           uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
5683           uint64_t ArgSize = alignTo(ArgAllocSize, 8);
5684           if (OverflowOffset + ArgSize <= kParamTLSSize) {
5685             SE = getShadowExtension(CB, ArgNo);
5686             uint64_t GapSize =
5687                 SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
5688             ShadowBase =
5689                 getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize);
5690             if (MS.TrackOrigins)
5691               OriginBase =
5692                   getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize);
5693             OverflowOffset += ArgSize;
5694           } else {
5695             OverflowOffset = kParamTLSSize;
5696           }
5697         }
5698         break;
5699       }
5700       case ArgKind::Indirect:
5701         llvm_unreachable("Indirect must be converted to GeneralPurpose");
5702       }
5703       if (ShadowBase == nullptr)
5704         continue;
5705       Value *Shadow = MSV.getShadow(A);
5706       if (SE != ShadowExtension::None)
5707         Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(),
5708                                       /*Signed*/ SE == ShadowExtension::Sign);
5709       ShadowBase = IRB.CreateIntToPtr(
5710           ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s");
5711       IRB.CreateStore(Shadow, ShadowBase);
5712       if (MS.TrackOrigins) {
5713         Value *Origin = MSV.getOrigin(A);
5714         TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
5715         MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
5716                         kMinOriginAlignment);
5717       }
5718     }
5719     Constant *OverflowSize = ConstantInt::get(
5720         IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset);
5721     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5722   }
5723 
5724   Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
5725     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5726     return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5727   }
5728 
5729   Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
5730     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
5731     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5732     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
5733                               "_msarg_va_o");
5734   }
5735 
5736   void unpoisonVAListTagForInst(IntrinsicInst &I) {
5737     IRBuilder<> IRB(&I);
5738     Value *VAListTag = I.getArgOperand(0);
5739     Value *ShadowPtr, *OriginPtr;
5740     const Align Alignment = Align(8);
5741     std::tie(ShadowPtr, OriginPtr) =
5742         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
5743                                /*isStore*/ true);
5744     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5745                      SystemZVAListTagSize, Alignment, false);
5746   }
5747 
5748   void visitVAStartInst(VAStartInst &I) override {
5749     VAStartInstrumentationList.push_back(&I);
5750     unpoisonVAListTagForInst(I);
5751   }
5752 
5753   void visitVACopyInst(VACopyInst &I) override { unpoisonVAListTagForInst(I); }
5754 
5755   void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
5756     Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5757     Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
5758         IRB.CreateAdd(
5759             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5760             ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)),
5761         PointerType::get(RegSaveAreaPtrTy, 0));
5762     Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5763     Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5764     const Align Alignment = Align(8);
5765     std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5766         MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment,
5767                                /*isStore*/ true);
5768     // TODO(iii): copy only fragments filled by visitCallBase()
5769     // TODO(iii): support packed-stack && !use-soft-float
5770     // For use-soft-float functions, it is enough to copy just the GPRs.
5771     unsigned RegSaveAreaSize =
5772         IsSoftFloatABI ? SystemZGpEndOffset : SystemZRegSaveAreaSize;
5773     IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5774                      RegSaveAreaSize);
5775     if (MS.TrackOrigins)
5776       IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
5777                        Alignment, RegSaveAreaSize);
5778   }
5779 
5780   void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
5781     Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5782     Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
5783         IRB.CreateAdd(
5784             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5785             ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)),
5786         PointerType::get(OverflowArgAreaPtrTy, 0));
5787     Value *OverflowArgAreaPtr =
5788         IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
5789     Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
5790     const Align Alignment = Align(8);
5791     std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
5792         MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
5793                                Alignment, /*isStore*/ true);
5794     Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
5795                                            SystemZOverflowOffset);
5796     IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
5797                      VAArgOverflowSize);
5798     if (MS.TrackOrigins) {
5799       SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
5800                                       SystemZOverflowOffset);
5801       IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
5802                        VAArgOverflowSize);
5803     }
5804   }
5805 
5806   void finalizeInstrumentation() override {
5807     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5808            "finalizeInstrumentation called twice");
5809     if (!VAStartInstrumentationList.empty()) {
5810       // If there is a va_start in this function, make a backup copy of
5811       // va_arg_tls somewhere in the function entry block.
5812       IRBuilder<> IRB(MSV.FnPrologueEnd);
5813       VAArgOverflowSize =
5814           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5815       Value *CopySize =
5816           IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset),
5817                         VAArgOverflowSize);
5818       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5819       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5820       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5821                        CopySize, kShadowTLSAlignment, false);
5822 
5823       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5824           Intrinsic::umin, CopySize,
5825           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5826       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5827                        kShadowTLSAlignment, SrcSize);
5828       if (MS.TrackOrigins) {
5829         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5830         VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
5831         IRB.CreateMemCpy(VAArgTLSOriginCopy, kShadowTLSAlignment,
5832                          MS.VAArgOriginTLS, kShadowTLSAlignment, SrcSize);
5833       }
5834     }
5835 
5836     // Instrument va_start.
5837     // Copy va_list shadow from the backup copy of the TLS contents.
5838     for (size_t VaStartNo = 0, VaStartNum = VAStartInstrumentationList.size();
5839          VaStartNo < VaStartNum; VaStartNo++) {
5840       CallInst *OrigInst = VAStartInstrumentationList[VaStartNo];
5841       NextNodeIRBuilder IRB(OrigInst);
5842       Value *VAListTag = OrigInst->getArgOperand(0);
5843       copyRegSaveArea(IRB, VAListTag);
5844       copyOverflowArea(IRB, VAListTag);
5845     }
5846   }
5847 };
5848 
5849 /// A no-op implementation of VarArgHelper.
5850 struct VarArgNoOpHelper : public VarArgHelper {
5851   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
5852                    MemorySanitizerVisitor &MSV) {}
5853 
5854   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {}
5855 
5856   void visitVAStartInst(VAStartInst &I) override {}
5857 
5858   void visitVACopyInst(VACopyInst &I) override {}
5859 
5860   void finalizeInstrumentation() override {}
5861 };
5862 
5863 } // end anonymous namespace
5864 
5865 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
5866                                         MemorySanitizerVisitor &Visitor) {
5867   // VarArg handling is only implemented on AMD64. False positives are possible
5868   // on other platforms.
5869   Triple TargetTriple(Func.getParent()->getTargetTriple());
5870   if (TargetTriple.getArch() == Triple::x86_64)
5871     return new VarArgAMD64Helper(Func, Msan, Visitor);
5872   else if (TargetTriple.isMIPS64())
5873     return new VarArgMIPS64Helper(Func, Msan, Visitor);
5874   else if (TargetTriple.getArch() == Triple::aarch64)
5875     return new VarArgAArch64Helper(Func, Msan, Visitor);
5876   else if (TargetTriple.getArch() == Triple::ppc64 ||
5877            TargetTriple.getArch() == Triple::ppc64le)
5878     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
5879   else if (TargetTriple.getArch() == Triple::systemz)
5880     return new VarArgSystemZHelper(Func, Msan, Visitor);
5881   else
5882     return new VarArgNoOpHelper(Func, Msan, Visitor);
5883 }
5884 
5885 bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
5886   if (!CompileKernel && F.getName() == kMsanModuleCtorName)
5887     return false;
5888 
5889   if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
5890     return false;
5891 
5892   MemorySanitizerVisitor Visitor(F, *this, TLI);
5893 
5894   // Clear out memory attributes.
5895   AttributeMask B;
5896   B.addAttribute(Attribute::Memory).addAttribute(Attribute::Speculatable);
5897   F.removeFnAttrs(B);
5898 
5899   return Visitor.runOnFunction();
5900 }
5901