xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (revision 35c0a8c449fd2b7f75029ebed5e10852240f0865)
1 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of MemorySanitizer, a detector of uninitialized
11 /// reads.
12 ///
13 /// The algorithm of the tool is similar to Memcheck
14 /// (https://static.usenix.org/event/usenix05/tech/general/full_papers/seward/seward_html/usenix2005.html)
15 /// We associate a few shadow bits with every byte of the application memory,
16 /// poison the shadow of the malloc-ed or alloca-ed memory, load the shadow,
17 /// bits on every memory read, propagate the shadow bits through some of the
18 /// arithmetic instruction (including MOV), store the shadow bits on every memory
19 /// write, report a bug on some other instructions (e.g. JMP) if the
20 /// associated shadow is poisoned.
21 ///
22 /// But there are differences too. The first and the major one:
23 /// compiler instrumentation instead of binary instrumentation. This
24 /// gives us much better register allocation, possible compiler
25 /// optimizations and a fast start-up. But this brings the major issue
26 /// as well: msan needs to see all program events, including system
27 /// calls and reads/writes in system libraries, so we either need to
28 /// compile *everything* with msan or use a binary translation
29 /// component (e.g. DynamoRIO) to instrument pre-built libraries.
30 /// Another difference from Memcheck is that we use 8 shadow bits per
31 /// byte of application memory and use a direct shadow mapping. This
32 /// greatly simplifies the instrumentation code and avoids races on
33 /// shadow updates (Memcheck is single-threaded so races are not a
34 /// concern there. Memcheck uses 2 shadow bits per byte with a slow
35 /// path storage that uses 8 bits per byte).
36 ///
37 /// The default value of shadow is 0, which means "clean" (not poisoned).
38 ///
39 /// Every module initializer should call __msan_init to ensure that the
40 /// shadow memory is ready. On error, __msan_warning is called. Since
41 /// parameters and return values may be passed via registers, we have a
42 /// specialized thread-local shadow for return values
43 /// (__msan_retval_tls) and parameters (__msan_param_tls).
44 ///
45 ///                           Origin tracking.
46 ///
47 /// MemorySanitizer can track origins (allocation points) of all uninitialized
48 /// values. This behavior is controlled with a flag (msan-track-origins) and is
49 /// disabled by default.
50 ///
51 /// Origins are 4-byte values created and interpreted by the runtime library.
52 /// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
53 /// of application memory. Propagation of origins is basically a bunch of
54 /// "select" instructions that pick the origin of a dirty argument, if an
55 /// instruction has one.
56 ///
57 /// Every 4 aligned, consecutive bytes of application memory have one origin
58 /// value associated with them. If these bytes contain uninitialized data
59 /// coming from 2 different allocations, the last store wins. Because of this,
60 /// MemorySanitizer reports can show unrelated origins, but this is unlikely in
61 /// practice.
62 ///
63 /// Origins are meaningless for fully initialized values, so MemorySanitizer
64 /// avoids storing origin to memory when a fully initialized value is stored.
65 /// This way it avoids needless overwriting origin of the 4-byte region on
66 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
67 ///
68 ///                            Atomic handling.
69 ///
70 /// Ideally, every atomic store of application value should update the
71 /// corresponding shadow location in an atomic way. Unfortunately, atomic store
72 /// of two disjoint locations can not be done without severe slowdown.
73 ///
74 /// Therefore, we implement an approximation that may err on the safe side.
75 /// In this implementation, every atomically accessed location in the program
76 /// may only change from (partially) uninitialized to fully initialized, but
77 /// not the other way around. We load the shadow _after_ the application load,
78 /// and we store the shadow _before_ the app store. Also, we always store clean
79 /// shadow (if the application store is atomic). This way, if the store-load
80 /// pair constitutes a happens-before arc, shadow store and load are correctly
81 /// ordered such that the load will get either the value that was stored, or
82 /// some later value (which is always clean).
83 ///
84 /// This does not work very well with Compare-And-Swap (CAS) and
85 /// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
86 /// must store the new shadow before the app operation, and load the shadow
87 /// after the app operation. Computers don't work this way. Current
88 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
89 /// value. It implements the store part as a simple atomic store by storing a
90 /// clean shadow.
91 ///
92 ///                      Instrumenting inline assembly.
93 ///
94 /// For inline assembly code LLVM has little idea about which memory locations
95 /// become initialized depending on the arguments. It can be possible to figure
96 /// out which arguments are meant to point to inputs and outputs, but the
97 /// actual semantics can be only visible at runtime. In the Linux kernel it's
98 /// also possible that the arguments only indicate the offset for a base taken
99 /// from a segment register, so it's dangerous to treat any asm() arguments as
100 /// pointers. We take a conservative approach generating calls to
101 ///   __msan_instrument_asm_store(ptr, size)
102 /// , which defer the memory unpoisoning to the runtime library.
103 /// The latter can perform more complex address checks to figure out whether
104 /// it's safe to touch the shadow memory.
105 /// Like with atomic operations, we call __msan_instrument_asm_store() before
106 /// the assembly call, so that changes to the shadow memory will be seen by
107 /// other threads together with main memory initialization.
108 ///
109 ///                  KernelMemorySanitizer (KMSAN) implementation.
110 ///
111 /// The major differences between KMSAN and MSan instrumentation are:
112 ///  - KMSAN always tracks the origins and implies msan-keep-going=true;
113 ///  - KMSAN allocates shadow and origin memory for each page separately, so
114 ///    there are no explicit accesses to shadow and origin in the
115 ///    instrumentation.
116 ///    Shadow and origin values for a particular X-byte memory location
117 ///    (X=1,2,4,8) are accessed through pointers obtained via the
118 ///      __msan_metadata_ptr_for_load_X(ptr)
119 ///      __msan_metadata_ptr_for_store_X(ptr)
120 ///    functions. The corresponding functions check that the X-byte accesses
121 ///    are possible and returns the pointers to shadow and origin memory.
122 ///    Arbitrary sized accesses are handled with:
123 ///      __msan_metadata_ptr_for_load_n(ptr, size)
124 ///      __msan_metadata_ptr_for_store_n(ptr, size);
125 ///    Note that the sanitizer code has to deal with how shadow/origin pairs
126 ///    returned by the these functions are represented in different ABIs. In
127 ///    the X86_64 ABI they are returned in RDX:RAX, in PowerPC64 they are
128 ///    returned in r3 and r4, and in the SystemZ ABI they are written to memory
129 ///    pointed to by a hidden parameter.
130 ///  - TLS variables are stored in a single per-task struct. A call to a
131 ///    function __msan_get_context_state() returning a pointer to that struct
132 ///    is inserted into every instrumented function before the entry block;
133 ///  - __msan_warning() takes a 32-bit origin parameter;
134 ///  - local variables are poisoned with __msan_poison_alloca() upon function
135 ///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the
136 ///    function;
137 ///  - the pass doesn't declare any global variables or add global constructors
138 ///    to the translation unit.
139 ///
140 /// Also, KMSAN currently ignores uninitialized memory passed into inline asm
141 /// calls, making sure we're on the safe side wrt. possible false positives.
142 ///
143 ///  KernelMemorySanitizer only supports X86_64, SystemZ and PowerPC64 at the
144 ///  moment.
145 ///
146 //
147 // FIXME: This sanitizer does not yet handle scalable vectors
148 //
149 //===----------------------------------------------------------------------===//
150 
151 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
152 #include "llvm/ADT/APInt.h"
153 #include "llvm/ADT/ArrayRef.h"
154 #include "llvm/ADT/DenseMap.h"
155 #include "llvm/ADT/DepthFirstIterator.h"
156 #include "llvm/ADT/SetVector.h"
157 #include "llvm/ADT/SmallPtrSet.h"
158 #include "llvm/ADT/SmallVector.h"
159 #include "llvm/ADT/StringExtras.h"
160 #include "llvm/ADT/StringRef.h"
161 #include "llvm/Analysis/GlobalsModRef.h"
162 #include "llvm/Analysis/TargetLibraryInfo.h"
163 #include "llvm/Analysis/ValueTracking.h"
164 #include "llvm/IR/Argument.h"
165 #include "llvm/IR/AttributeMask.h"
166 #include "llvm/IR/Attributes.h"
167 #include "llvm/IR/BasicBlock.h"
168 #include "llvm/IR/CallingConv.h"
169 #include "llvm/IR/Constant.h"
170 #include "llvm/IR/Constants.h"
171 #include "llvm/IR/DataLayout.h"
172 #include "llvm/IR/DerivedTypes.h"
173 #include "llvm/IR/Function.h"
174 #include "llvm/IR/GlobalValue.h"
175 #include "llvm/IR/GlobalVariable.h"
176 #include "llvm/IR/IRBuilder.h"
177 #include "llvm/IR/InlineAsm.h"
178 #include "llvm/IR/InstVisitor.h"
179 #include "llvm/IR/InstrTypes.h"
180 #include "llvm/IR/Instruction.h"
181 #include "llvm/IR/Instructions.h"
182 #include "llvm/IR/IntrinsicInst.h"
183 #include "llvm/IR/Intrinsics.h"
184 #include "llvm/IR/IntrinsicsAArch64.h"
185 #include "llvm/IR/IntrinsicsX86.h"
186 #include "llvm/IR/MDBuilder.h"
187 #include "llvm/IR/Module.h"
188 #include "llvm/IR/Type.h"
189 #include "llvm/IR/Value.h"
190 #include "llvm/IR/ValueMap.h"
191 #include "llvm/Support/Alignment.h"
192 #include "llvm/Support/AtomicOrdering.h"
193 #include "llvm/Support/Casting.h"
194 #include "llvm/Support/CommandLine.h"
195 #include "llvm/Support/Debug.h"
196 #include "llvm/Support/DebugCounter.h"
197 #include "llvm/Support/ErrorHandling.h"
198 #include "llvm/Support/MathExtras.h"
199 #include "llvm/Support/raw_ostream.h"
200 #include "llvm/TargetParser/Triple.h"
201 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
202 #include "llvm/Transforms/Utils/Local.h"
203 #include "llvm/Transforms/Utils/ModuleUtils.h"
204 #include <algorithm>
205 #include <cassert>
206 #include <cstddef>
207 #include <cstdint>
208 #include <memory>
209 #include <string>
210 #include <tuple>
211 
212 using namespace llvm;
213 
214 #define DEBUG_TYPE "msan"
215 
216 DEBUG_COUNTER(DebugInsertCheck, "msan-insert-check",
217               "Controls which checks to insert");
218 
219 DEBUG_COUNTER(DebugInstrumentInstruction, "msan-instrument-instruction",
220               "Controls which instruction to instrument");
221 
222 static const unsigned kOriginSize = 4;
223 static const Align kMinOriginAlignment = Align(4);
224 static const Align kShadowTLSAlignment = Align(8);
225 
226 // These constants must be kept in sync with the ones in msan.h.
227 static const unsigned kParamTLSSize = 800;
228 static const unsigned kRetvalTLSSize = 800;
229 
230 // Accesses sizes are powers of two: 1, 2, 4, 8.
231 static const size_t kNumberOfAccessSizes = 4;
232 
233 /// Track origins of uninitialized values.
234 ///
235 /// Adds a section to MemorySanitizer report that points to the allocation
236 /// (stack or heap) the uninitialized bits came from originally.
237 static cl::opt<int> ClTrackOrigins(
238     "msan-track-origins",
239     cl::desc("Track origins (allocation sites) of poisoned memory"), cl::Hidden,
240     cl::init(0));
241 
242 static cl::opt<bool> ClKeepGoing("msan-keep-going",
243                                  cl::desc("keep going after reporting a UMR"),
244                                  cl::Hidden, cl::init(false));
245 
246 static cl::opt<bool>
247     ClPoisonStack("msan-poison-stack",
248                   cl::desc("poison uninitialized stack variables"), cl::Hidden,
249                   cl::init(true));
250 
251 static cl::opt<bool> ClPoisonStackWithCall(
252     "msan-poison-stack-with-call",
253     cl::desc("poison uninitialized stack variables with a call"), cl::Hidden,
254     cl::init(false));
255 
256 static cl::opt<int> ClPoisonStackPattern(
257     "msan-poison-stack-pattern",
258     cl::desc("poison uninitialized stack variables with the given pattern"),
259     cl::Hidden, cl::init(0xff));
260 
261 static cl::opt<bool>
262     ClPrintStackNames("msan-print-stack-names",
263                       cl::desc("Print name of local stack variable"),
264                       cl::Hidden, cl::init(true));
265 
266 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
267                                    cl::desc("poison undef temps"), cl::Hidden,
268                                    cl::init(true));
269 
270 static cl::opt<bool>
271     ClHandleICmp("msan-handle-icmp",
272                  cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
273                  cl::Hidden, cl::init(true));
274 
275 static cl::opt<bool>
276     ClHandleICmpExact("msan-handle-icmp-exact",
277                       cl::desc("exact handling of relational integer ICmp"),
278                       cl::Hidden, cl::init(false));
279 
280 static cl::opt<bool> ClHandleLifetimeIntrinsics(
281     "msan-handle-lifetime-intrinsics",
282     cl::desc(
283         "when possible, poison scoped variables at the beginning of the scope "
284         "(slower, but more precise)"),
285     cl::Hidden, cl::init(true));
286 
287 // When compiling the Linux kernel, we sometimes see false positives related to
288 // MSan being unable to understand that inline assembly calls may initialize
289 // local variables.
290 // This flag makes the compiler conservatively unpoison every memory location
291 // passed into an assembly call. Note that this may cause false positives.
292 // Because it's impossible to figure out the array sizes, we can only unpoison
293 // the first sizeof(type) bytes for each type* pointer.
294 static cl::opt<bool> ClHandleAsmConservative(
295     "msan-handle-asm-conservative",
296     cl::desc("conservative handling of inline assembly"), cl::Hidden,
297     cl::init(true));
298 
299 // This flag controls whether we check the shadow of the address
300 // operand of load or store. Such bugs are very rare, since load from
301 // a garbage address typically results in SEGV, but still happen
302 // (e.g. only lower bits of address are garbage, or the access happens
303 // early at program startup where malloc-ed memory is more likely to
304 // be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
305 static cl::opt<bool> ClCheckAccessAddress(
306     "msan-check-access-address",
307     cl::desc("report accesses through a pointer which has poisoned shadow"),
308     cl::Hidden, cl::init(true));
309 
310 static cl::opt<bool> ClEagerChecks(
311     "msan-eager-checks",
312     cl::desc("check arguments and return values at function call boundaries"),
313     cl::Hidden, cl::init(false));
314 
315 static cl::opt<bool> ClDumpStrictInstructions(
316     "msan-dump-strict-instructions",
317     cl::desc("print out instructions with default strict semantics"),
318     cl::Hidden, cl::init(false));
319 
320 static cl::opt<int> ClInstrumentationWithCallThreshold(
321     "msan-instrumentation-with-call-threshold",
322     cl::desc(
323         "If the function being instrumented requires more than "
324         "this number of checks and origin stores, use callbacks instead of "
325         "inline checks (-1 means never use callbacks)."),
326     cl::Hidden, cl::init(3500));
327 
328 static cl::opt<bool>
329     ClEnableKmsan("msan-kernel",
330                   cl::desc("Enable KernelMemorySanitizer instrumentation"),
331                   cl::Hidden, cl::init(false));
332 
333 static cl::opt<bool>
334     ClDisableChecks("msan-disable-checks",
335                     cl::desc("Apply no_sanitize to the whole file"), cl::Hidden,
336                     cl::init(false));
337 
338 static cl::opt<bool>
339     ClCheckConstantShadow("msan-check-constant-shadow",
340                           cl::desc("Insert checks for constant shadow values"),
341                           cl::Hidden, cl::init(true));
342 
343 // This is off by default because of a bug in gold:
344 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
345 static cl::opt<bool>
346     ClWithComdat("msan-with-comdat",
347                  cl::desc("Place MSan constructors in comdat sections"),
348                  cl::Hidden, cl::init(false));
349 
350 // These options allow to specify custom memory map parameters
351 // See MemoryMapParams for details.
352 static cl::opt<uint64_t> ClAndMask("msan-and-mask",
353                                    cl::desc("Define custom MSan AndMask"),
354                                    cl::Hidden, cl::init(0));
355 
356 static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
357                                    cl::desc("Define custom MSan XorMask"),
358                                    cl::Hidden, cl::init(0));
359 
360 static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
361                                       cl::desc("Define custom MSan ShadowBase"),
362                                       cl::Hidden, cl::init(0));
363 
364 static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
365                                       cl::desc("Define custom MSan OriginBase"),
366                                       cl::Hidden, cl::init(0));
367 
368 static cl::opt<int>
369     ClDisambiguateWarning("msan-disambiguate-warning-threshold",
370                           cl::desc("Define threshold for number of checks per "
371                                    "debug location to force origin update."),
372                           cl::Hidden, cl::init(3));
373 
374 const char kMsanModuleCtorName[] = "msan.module_ctor";
375 const char kMsanInitName[] = "__msan_init";
376 
377 namespace {
378 
379 // Memory map parameters used in application-to-shadow address calculation.
380 // Offset = (Addr & ~AndMask) ^ XorMask
381 // Shadow = ShadowBase + Offset
382 // Origin = OriginBase + Offset
383 struct MemoryMapParams {
384   uint64_t AndMask;
385   uint64_t XorMask;
386   uint64_t ShadowBase;
387   uint64_t OriginBase;
388 };
389 
390 struct PlatformMemoryMapParams {
391   const MemoryMapParams *bits32;
392   const MemoryMapParams *bits64;
393 };
394 
395 } // end anonymous namespace
396 
397 // i386 Linux
398 static const MemoryMapParams Linux_I386_MemoryMapParams = {
399     0x000080000000, // AndMask
400     0,              // XorMask (not used)
401     0,              // ShadowBase (not used)
402     0x000040000000, // OriginBase
403 };
404 
405 // x86_64 Linux
406 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
407     0,              // AndMask (not used)
408     0x500000000000, // XorMask
409     0,              // ShadowBase (not used)
410     0x100000000000, // OriginBase
411 };
412 
413 // mips64 Linux
414 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
415     0,              // AndMask (not used)
416     0x008000000000, // XorMask
417     0,              // ShadowBase (not used)
418     0x002000000000, // OriginBase
419 };
420 
421 // ppc64 Linux
422 static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
423     0xE00000000000, // AndMask
424     0x100000000000, // XorMask
425     0x080000000000, // ShadowBase
426     0x1C0000000000, // OriginBase
427 };
428 
429 // s390x Linux
430 static const MemoryMapParams Linux_S390X_MemoryMapParams = {
431     0xC00000000000, // AndMask
432     0,              // XorMask (not used)
433     0x080000000000, // ShadowBase
434     0x1C0000000000, // OriginBase
435 };
436 
437 // aarch64 Linux
438 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
439     0,               // AndMask (not used)
440     0x0B00000000000, // XorMask
441     0,               // ShadowBase (not used)
442     0x0200000000000, // OriginBase
443 };
444 
445 // loongarch64 Linux
446 static const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
447     0,              // AndMask (not used)
448     0x500000000000, // XorMask
449     0,              // ShadowBase (not used)
450     0x100000000000, // OriginBase
451 };
452 
453 // aarch64 FreeBSD
454 static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = {
455     0x1800000000000, // AndMask
456     0x0400000000000, // XorMask
457     0x0200000000000, // ShadowBase
458     0x0700000000000, // OriginBase
459 };
460 
461 // i386 FreeBSD
462 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
463     0x000180000000, // AndMask
464     0x000040000000, // XorMask
465     0x000020000000, // ShadowBase
466     0x000700000000, // OriginBase
467 };
468 
469 // x86_64 FreeBSD
470 static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
471     0xc00000000000, // AndMask
472     0x200000000000, // XorMask
473     0x100000000000, // ShadowBase
474     0x380000000000, // OriginBase
475 };
476 
477 // x86_64 NetBSD
478 static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
479     0,              // AndMask
480     0x500000000000, // XorMask
481     0,              // ShadowBase
482     0x100000000000, // OriginBase
483 };
484 
485 static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
486     &Linux_I386_MemoryMapParams,
487     &Linux_X86_64_MemoryMapParams,
488 };
489 
490 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
491     nullptr,
492     &Linux_MIPS64_MemoryMapParams,
493 };
494 
495 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
496     nullptr,
497     &Linux_PowerPC64_MemoryMapParams,
498 };
499 
500 static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
501     nullptr,
502     &Linux_S390X_MemoryMapParams,
503 };
504 
505 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
506     nullptr,
507     &Linux_AArch64_MemoryMapParams,
508 };
509 
510 static const PlatformMemoryMapParams Linux_LoongArch_MemoryMapParams = {
511     nullptr,
512     &Linux_LoongArch64_MemoryMapParams,
513 };
514 
515 static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = {
516     nullptr,
517     &FreeBSD_AArch64_MemoryMapParams,
518 };
519 
520 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
521     &FreeBSD_I386_MemoryMapParams,
522     &FreeBSD_X86_64_MemoryMapParams,
523 };
524 
525 static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
526     nullptr,
527     &NetBSD_X86_64_MemoryMapParams,
528 };
529 
530 namespace {
531 
532 /// Instrument functions of a module to detect uninitialized reads.
533 ///
534 /// Instantiating MemorySanitizer inserts the msan runtime library API function
535 /// declarations into the module if they don't exist already. Instantiating
536 /// ensures the __msan_init function is in the list of global constructors for
537 /// the module.
538 class MemorySanitizer {
539 public:
540   MemorySanitizer(Module &M, MemorySanitizerOptions Options)
541       : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
542         Recover(Options.Recover), EagerChecks(Options.EagerChecks) {
543     initializeModule(M);
544   }
545 
546   // MSan cannot be moved or copied because of MapParams.
547   MemorySanitizer(MemorySanitizer &&) = delete;
548   MemorySanitizer &operator=(MemorySanitizer &&) = delete;
549   MemorySanitizer(const MemorySanitizer &) = delete;
550   MemorySanitizer &operator=(const MemorySanitizer &) = delete;
551 
552   bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
553 
554 private:
555   friend struct MemorySanitizerVisitor;
556   friend struct VarArgHelperBase;
557   friend struct VarArgAMD64Helper;
558   friend struct VarArgMIPS64Helper;
559   friend struct VarArgAArch64Helper;
560   friend struct VarArgPowerPC64Helper;
561   friend struct VarArgSystemZHelper;
562 
563   void initializeModule(Module &M);
564   void initializeCallbacks(Module &M, const TargetLibraryInfo &TLI);
565   void createKernelApi(Module &M, const TargetLibraryInfo &TLI);
566   void createUserspaceApi(Module &M, const TargetLibraryInfo &TLI);
567 
568   template <typename... ArgsTy>
569   FunctionCallee getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
570                                                  ArgsTy... Args);
571 
572   /// True if we're compiling the Linux kernel.
573   bool CompileKernel;
574   /// Track origins (allocation points) of uninitialized values.
575   int TrackOrigins;
576   bool Recover;
577   bool EagerChecks;
578 
579   Triple TargetTriple;
580   LLVMContext *C;
581   Type *IntptrTy;  ///< Integer type with the size of a ptr in default AS.
582   Type *OriginTy;
583   PointerType *PtrTy; ///< Integer type with the size of a ptr in default AS.
584 
585   // XxxTLS variables represent the per-thread state in MSan and per-task state
586   // in KMSAN.
587   // For the userspace these point to thread-local globals. In the kernel land
588   // they point to the members of a per-task struct obtained via a call to
589   // __msan_get_context_state().
590 
591   /// Thread-local shadow storage for function parameters.
592   Value *ParamTLS;
593 
594   /// Thread-local origin storage for function parameters.
595   Value *ParamOriginTLS;
596 
597   /// Thread-local shadow storage for function return value.
598   Value *RetvalTLS;
599 
600   /// Thread-local origin storage for function return value.
601   Value *RetvalOriginTLS;
602 
603   /// Thread-local shadow storage for in-register va_arg function.
604   Value *VAArgTLS;
605 
606   /// Thread-local shadow storage for in-register va_arg function.
607   Value *VAArgOriginTLS;
608 
609   /// Thread-local shadow storage for va_arg overflow area.
610   Value *VAArgOverflowSizeTLS;
611 
612   /// Are the instrumentation callbacks set up?
613   bool CallbacksInitialized = false;
614 
615   /// The run-time callback to print a warning.
616   FunctionCallee WarningFn;
617 
618   // These arrays are indexed by log2(AccessSize).
619   FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
620   FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
621 
622   /// Run-time helper that generates a new origin value for a stack
623   /// allocation.
624   FunctionCallee MsanSetAllocaOriginWithDescriptionFn;
625   // No description version
626   FunctionCallee MsanSetAllocaOriginNoDescriptionFn;
627 
628   /// Run-time helper that poisons stack on function entry.
629   FunctionCallee MsanPoisonStackFn;
630 
631   /// Run-time helper that records a store (or any event) of an
632   /// uninitialized value and returns an updated origin id encoding this info.
633   FunctionCallee MsanChainOriginFn;
634 
635   /// Run-time helper that paints an origin over a region.
636   FunctionCallee MsanSetOriginFn;
637 
638   /// MSan runtime replacements for memmove, memcpy and memset.
639   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
640 
641   /// KMSAN callback for task-local function argument shadow.
642   StructType *MsanContextStateTy;
643   FunctionCallee MsanGetContextStateFn;
644 
645   /// Functions for poisoning/unpoisoning local variables
646   FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
647 
648   /// Pair of shadow/origin pointers.
649   Type *MsanMetadata;
650 
651   /// Each of the MsanMetadataPtrXxx functions returns a MsanMetadata.
652   FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
653   FunctionCallee MsanMetadataPtrForLoad_1_8[4];
654   FunctionCallee MsanMetadataPtrForStore_1_8[4];
655   FunctionCallee MsanInstrumentAsmStoreFn;
656 
657   /// Storage for return values of the MsanMetadataPtrXxx functions.
658   Value *MsanMetadataAlloca;
659 
660   /// Helper to choose between different MsanMetadataPtrXxx().
661   FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
662 
663   /// Memory map parameters used in application-to-shadow calculation.
664   const MemoryMapParams *MapParams;
665 
666   /// Custom memory map parameters used when -msan-shadow-base or
667   // -msan-origin-base is provided.
668   MemoryMapParams CustomMapParams;
669 
670   MDNode *ColdCallWeights;
671 
672   /// Branch weights for origin store.
673   MDNode *OriginStoreWeights;
674 };
675 
676 void insertModuleCtor(Module &M) {
677   getOrCreateSanitizerCtorAndInitFunctions(
678       M, kMsanModuleCtorName, kMsanInitName,
679       /*InitArgTypes=*/{},
680       /*InitArgs=*/{},
681       // This callback is invoked when the functions are created the first
682       // time. Hook them into the global ctors list in that case:
683       [&](Function *Ctor, FunctionCallee) {
684         if (!ClWithComdat) {
685           appendToGlobalCtors(M, Ctor, 0);
686           return;
687         }
688         Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
689         Ctor->setComdat(MsanCtorComdat);
690         appendToGlobalCtors(M, Ctor, 0, Ctor);
691       });
692 }
693 
694 template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
695   return (Opt.getNumOccurrences() > 0) ? Opt : Default;
696 }
697 
698 } // end anonymous namespace
699 
700 MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K,
701                                                bool EagerChecks)
702     : Kernel(getOptOrDefault(ClEnableKmsan, K)),
703       TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
704       Recover(getOptOrDefault(ClKeepGoing, Kernel || R)),
705       EagerChecks(getOptOrDefault(ClEagerChecks, EagerChecks)) {}
706 
707 PreservedAnalyses MemorySanitizerPass::run(Module &M,
708                                            ModuleAnalysisManager &AM) {
709   bool Modified = false;
710   if (!Options.Kernel) {
711     insertModuleCtor(M);
712     Modified = true;
713   }
714 
715   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
716   for (Function &F : M) {
717     if (F.empty())
718       continue;
719     MemorySanitizer Msan(*F.getParent(), Options);
720     Modified |=
721         Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F));
722   }
723 
724   if (!Modified)
725     return PreservedAnalyses::all();
726 
727   PreservedAnalyses PA = PreservedAnalyses::none();
728   // GlobalsAA is considered stateless and does not get invalidated unless
729   // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
730   // make changes that require GlobalsAA to be invalidated.
731   PA.abandon<GlobalsAA>();
732   return PA;
733 }
734 
735 void MemorySanitizerPass::printPipeline(
736     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
737   static_cast<PassInfoMixin<MemorySanitizerPass> *>(this)->printPipeline(
738       OS, MapClassName2PassName);
739   OS << '<';
740   if (Options.Recover)
741     OS << "recover;";
742   if (Options.Kernel)
743     OS << "kernel;";
744   if (Options.EagerChecks)
745     OS << "eager-checks;";
746   OS << "track-origins=" << Options.TrackOrigins;
747   OS << '>';
748 }
749 
750 /// Create a non-const global initialized with the given string.
751 ///
752 /// Creates a writable global for Str so that we can pass it to the
753 /// run-time lib. Runtime uses first 4 bytes of the string to store the
754 /// frame ID, so the string needs to be mutable.
755 static GlobalVariable *createPrivateConstGlobalForString(Module &M,
756                                                          StringRef Str) {
757   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
758   return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/true,
759                             GlobalValue::PrivateLinkage, StrConst, "");
760 }
761 
762 template <typename... ArgsTy>
763 FunctionCallee
764 MemorySanitizer::getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
765                                                  ArgsTy... Args) {
766   if (TargetTriple.getArch() == Triple::systemz) {
767     // SystemZ ABI: shadow/origin pair is returned via a hidden parameter.
768     return M.getOrInsertFunction(Name, Type::getVoidTy(*C),
769                                  PointerType::get(MsanMetadata, 0),
770                                  std::forward<ArgsTy>(Args)...);
771   }
772 
773   return M.getOrInsertFunction(Name, MsanMetadata,
774                                std::forward<ArgsTy>(Args)...);
775 }
776 
777 /// Create KMSAN API callbacks.
778 void MemorySanitizer::createKernelApi(Module &M, const TargetLibraryInfo &TLI) {
779   IRBuilder<> IRB(*C);
780 
781   // These will be initialized in insertKmsanPrologue().
782   RetvalTLS = nullptr;
783   RetvalOriginTLS = nullptr;
784   ParamTLS = nullptr;
785   ParamOriginTLS = nullptr;
786   VAArgTLS = nullptr;
787   VAArgOriginTLS = nullptr;
788   VAArgOverflowSizeTLS = nullptr;
789 
790   WarningFn = M.getOrInsertFunction("__msan_warning",
791                                     TLI.getAttrList(C, {0}, /*Signed=*/false),
792                                     IRB.getVoidTy(), IRB.getInt32Ty());
793 
794   // Requests the per-task context state (kmsan_context_state*) from the
795   // runtime library.
796   MsanContextStateTy = StructType::get(
797       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
798       ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
799       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
800       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
801       IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
802       OriginTy);
803   MsanGetContextStateFn = M.getOrInsertFunction(
804       "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
805 
806   MsanMetadata = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
807                                  PointerType::get(IRB.getInt32Ty(), 0));
808 
809   for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
810     std::string name_load =
811         "__msan_metadata_ptr_for_load_" + std::to_string(size);
812     std::string name_store =
813         "__msan_metadata_ptr_for_store_" + std::to_string(size);
814     MsanMetadataPtrForLoad_1_8[ind] = getOrInsertMsanMetadataFunction(
815         M, name_load, PointerType::get(IRB.getInt8Ty(), 0));
816     MsanMetadataPtrForStore_1_8[ind] = getOrInsertMsanMetadataFunction(
817         M, name_store, PointerType::get(IRB.getInt8Ty(), 0));
818   }
819 
820   MsanMetadataPtrForLoadN = getOrInsertMsanMetadataFunction(
821       M, "__msan_metadata_ptr_for_load_n", PointerType::get(IRB.getInt8Ty(), 0),
822       IRB.getInt64Ty());
823   MsanMetadataPtrForStoreN = getOrInsertMsanMetadataFunction(
824       M, "__msan_metadata_ptr_for_store_n",
825       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
826 
827   // Functions for poisoning and unpoisoning memory.
828   MsanPoisonAllocaFn = M.getOrInsertFunction(
829       "__msan_poison_alloca", IRB.getVoidTy(), PtrTy, IntptrTy, PtrTy);
830   MsanUnpoisonAllocaFn = M.getOrInsertFunction(
831       "__msan_unpoison_alloca", IRB.getVoidTy(), PtrTy, IntptrTy);
832 }
833 
834 static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
835   return M.getOrInsertGlobal(Name, Ty, [&] {
836     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
837                               nullptr, Name, nullptr,
838                               GlobalVariable::InitialExecTLSModel);
839   });
840 }
841 
842 /// Insert declarations for userspace-specific functions and globals.
843 void MemorySanitizer::createUserspaceApi(Module &M, const TargetLibraryInfo &TLI) {
844   IRBuilder<> IRB(*C);
845 
846   // Create the callback.
847   // FIXME: this function should have "Cold" calling conv,
848   // which is not yet implemented.
849   if (TrackOrigins) {
850     StringRef WarningFnName = Recover ? "__msan_warning_with_origin"
851                                       : "__msan_warning_with_origin_noreturn";
852     WarningFn = M.getOrInsertFunction(WarningFnName,
853                                       TLI.getAttrList(C, {0}, /*Signed=*/false),
854                                       IRB.getVoidTy(), IRB.getInt32Ty());
855   } else {
856     StringRef WarningFnName =
857         Recover ? "__msan_warning" : "__msan_warning_noreturn";
858     WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy());
859   }
860 
861   // Create the global TLS variables.
862   RetvalTLS =
863       getOrInsertGlobal(M, "__msan_retval_tls",
864                         ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
865 
866   RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
867 
868   ParamTLS =
869       getOrInsertGlobal(M, "__msan_param_tls",
870                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
871 
872   ParamOriginTLS =
873       getOrInsertGlobal(M, "__msan_param_origin_tls",
874                         ArrayType::get(OriginTy, kParamTLSSize / 4));
875 
876   VAArgTLS =
877       getOrInsertGlobal(M, "__msan_va_arg_tls",
878                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
879 
880   VAArgOriginTLS =
881       getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
882                         ArrayType::get(OriginTy, kParamTLSSize / 4));
883 
884   VAArgOverflowSizeTLS =
885       getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
886 
887   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
888        AccessSizeIndex++) {
889     unsigned AccessSize = 1 << AccessSizeIndex;
890     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
891     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
892         FunctionName, TLI.getAttrList(C, {0, 1}, /*Signed=*/false),
893         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
894 
895     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
896     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
897         FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false),
898         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), PtrTy,
899         IRB.getInt32Ty());
900   }
901 
902   MsanSetAllocaOriginWithDescriptionFn =
903       M.getOrInsertFunction("__msan_set_alloca_origin_with_descr",
904                             IRB.getVoidTy(), PtrTy, IntptrTy, PtrTy, PtrTy);
905   MsanSetAllocaOriginNoDescriptionFn =
906       M.getOrInsertFunction("__msan_set_alloca_origin_no_descr",
907                             IRB.getVoidTy(), PtrTy, IntptrTy, PtrTy);
908   MsanPoisonStackFn = M.getOrInsertFunction("__msan_poison_stack",
909                                             IRB.getVoidTy(), PtrTy, IntptrTy);
910 }
911 
912 /// Insert extern declaration of runtime-provided functions and globals.
913 void MemorySanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo &TLI) {
914   // Only do this once.
915   if (CallbacksInitialized)
916     return;
917 
918   IRBuilder<> IRB(*C);
919   // Initialize callbacks that are common for kernel and userspace
920   // instrumentation.
921   MsanChainOriginFn = M.getOrInsertFunction(
922       "__msan_chain_origin",
923       TLI.getAttrList(C, {0}, /*Signed=*/false, /*Ret=*/true), IRB.getInt32Ty(),
924       IRB.getInt32Ty());
925   MsanSetOriginFn = M.getOrInsertFunction(
926       "__msan_set_origin", TLI.getAttrList(C, {2}, /*Signed=*/false),
927       IRB.getVoidTy(), PtrTy, IntptrTy, IRB.getInt32Ty());
928   MemmoveFn =
929       M.getOrInsertFunction("__msan_memmove", PtrTy, PtrTy, PtrTy, IntptrTy);
930   MemcpyFn =
931       M.getOrInsertFunction("__msan_memcpy", PtrTy, PtrTy, PtrTy, IntptrTy);
932   MemsetFn = M.getOrInsertFunction("__msan_memset",
933                                    TLI.getAttrList(C, {1}, /*Signed=*/true),
934                                    PtrTy, PtrTy, IRB.getInt32Ty(), IntptrTy);
935 
936   MsanInstrumentAsmStoreFn =
937       M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
938                             PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
939 
940   if (CompileKernel) {
941     createKernelApi(M, TLI);
942   } else {
943     createUserspaceApi(M, TLI);
944   }
945   CallbacksInitialized = true;
946 }
947 
948 FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
949                                                              int size) {
950   FunctionCallee *Fns =
951       isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
952   switch (size) {
953   case 1:
954     return Fns[0];
955   case 2:
956     return Fns[1];
957   case 4:
958     return Fns[2];
959   case 8:
960     return Fns[3];
961   default:
962     return nullptr;
963   }
964 }
965 
966 /// Module-level initialization.
967 ///
968 /// inserts a call to __msan_init to the module's constructor list.
969 void MemorySanitizer::initializeModule(Module &M) {
970   auto &DL = M.getDataLayout();
971 
972   TargetTriple = Triple(M.getTargetTriple());
973 
974   bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
975   bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
976   // Check the overrides first
977   if (ShadowPassed || OriginPassed) {
978     CustomMapParams.AndMask = ClAndMask;
979     CustomMapParams.XorMask = ClXorMask;
980     CustomMapParams.ShadowBase = ClShadowBase;
981     CustomMapParams.OriginBase = ClOriginBase;
982     MapParams = &CustomMapParams;
983   } else {
984     switch (TargetTriple.getOS()) {
985     case Triple::FreeBSD:
986       switch (TargetTriple.getArch()) {
987       case Triple::aarch64:
988         MapParams = FreeBSD_ARM_MemoryMapParams.bits64;
989         break;
990       case Triple::x86_64:
991         MapParams = FreeBSD_X86_MemoryMapParams.bits64;
992         break;
993       case Triple::x86:
994         MapParams = FreeBSD_X86_MemoryMapParams.bits32;
995         break;
996       default:
997         report_fatal_error("unsupported architecture");
998       }
999       break;
1000     case Triple::NetBSD:
1001       switch (TargetTriple.getArch()) {
1002       case Triple::x86_64:
1003         MapParams = NetBSD_X86_MemoryMapParams.bits64;
1004         break;
1005       default:
1006         report_fatal_error("unsupported architecture");
1007       }
1008       break;
1009     case Triple::Linux:
1010       switch (TargetTriple.getArch()) {
1011       case Triple::x86_64:
1012         MapParams = Linux_X86_MemoryMapParams.bits64;
1013         break;
1014       case Triple::x86:
1015         MapParams = Linux_X86_MemoryMapParams.bits32;
1016         break;
1017       case Triple::mips64:
1018       case Triple::mips64el:
1019         MapParams = Linux_MIPS_MemoryMapParams.bits64;
1020         break;
1021       case Triple::ppc64:
1022       case Triple::ppc64le:
1023         MapParams = Linux_PowerPC_MemoryMapParams.bits64;
1024         break;
1025       case Triple::systemz:
1026         MapParams = Linux_S390_MemoryMapParams.bits64;
1027         break;
1028       case Triple::aarch64:
1029       case Triple::aarch64_be:
1030         MapParams = Linux_ARM_MemoryMapParams.bits64;
1031         break;
1032       case Triple::loongarch64:
1033         MapParams = Linux_LoongArch_MemoryMapParams.bits64;
1034         break;
1035       default:
1036         report_fatal_error("unsupported architecture");
1037       }
1038       break;
1039     default:
1040       report_fatal_error("unsupported operating system");
1041     }
1042   }
1043 
1044   C = &(M.getContext());
1045   IRBuilder<> IRB(*C);
1046   IntptrTy = IRB.getIntPtrTy(DL);
1047   OriginTy = IRB.getInt32Ty();
1048   PtrTy = IRB.getPtrTy();
1049 
1050   ColdCallWeights = MDBuilder(*C).createUnlikelyBranchWeights();
1051   OriginStoreWeights = MDBuilder(*C).createUnlikelyBranchWeights();
1052 
1053   if (!CompileKernel) {
1054     if (TrackOrigins)
1055       M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
1056         return new GlobalVariable(
1057             M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
1058             IRB.getInt32(TrackOrigins), "__msan_track_origins");
1059       });
1060 
1061     if (Recover)
1062       M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
1063         return new GlobalVariable(M, IRB.getInt32Ty(), true,
1064                                   GlobalValue::WeakODRLinkage,
1065                                   IRB.getInt32(Recover), "__msan_keep_going");
1066       });
1067   }
1068 }
1069 
1070 namespace {
1071 
1072 /// A helper class that handles instrumentation of VarArg
1073 /// functions on a particular platform.
1074 ///
1075 /// Implementations are expected to insert the instrumentation
1076 /// necessary to propagate argument shadow through VarArg function
1077 /// calls. Visit* methods are called during an InstVisitor pass over
1078 /// the function, and should avoid creating new basic blocks. A new
1079 /// instance of this class is created for each instrumented function.
1080 struct VarArgHelper {
1081   virtual ~VarArgHelper() = default;
1082 
1083   /// Visit a CallBase.
1084   virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0;
1085 
1086   /// Visit a va_start call.
1087   virtual void visitVAStartInst(VAStartInst &I) = 0;
1088 
1089   /// Visit a va_copy call.
1090   virtual void visitVACopyInst(VACopyInst &I) = 0;
1091 
1092   /// Finalize function instrumentation.
1093   ///
1094   /// This method is called after visiting all interesting (see above)
1095   /// instructions in a function.
1096   virtual void finalizeInstrumentation() = 0;
1097 };
1098 
1099 struct MemorySanitizerVisitor;
1100 
1101 } // end anonymous namespace
1102 
1103 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1104                                         MemorySanitizerVisitor &Visitor);
1105 
1106 static unsigned TypeSizeToSizeIndex(TypeSize TS) {
1107   if (TS.isScalable())
1108     // Scalable types unconditionally take slowpaths.
1109     return kNumberOfAccessSizes;
1110   unsigned TypeSizeFixed = TS.getFixedValue();
1111   if (TypeSizeFixed <= 8)
1112     return 0;
1113   return Log2_32_Ceil((TypeSizeFixed + 7) / 8);
1114 }
1115 
1116 namespace {
1117 
1118 /// Helper class to attach debug information of the given instruction onto new
1119 /// instructions inserted after.
1120 class NextNodeIRBuilder : public IRBuilder<> {
1121 public:
1122   explicit NextNodeIRBuilder(Instruction *IP) : IRBuilder<>(IP->getNextNode()) {
1123     SetCurrentDebugLocation(IP->getDebugLoc());
1124   }
1125 };
1126 
1127 /// This class does all the work for a given function. Store and Load
1128 /// instructions store and load corresponding shadow and origin
1129 /// values. Most instructions propagate shadow from arguments to their
1130 /// return values. Certain instructions (most importantly, BranchInst)
1131 /// test their argument shadow and print reports (with a runtime call) if it's
1132 /// non-zero.
1133 struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
1134   Function &F;
1135   MemorySanitizer &MS;
1136   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
1137   ValueMap<Value *, Value *> ShadowMap, OriginMap;
1138   std::unique_ptr<VarArgHelper> VAHelper;
1139   const TargetLibraryInfo *TLI;
1140   Instruction *FnPrologueEnd;
1141   SmallVector<Instruction *, 16> Instructions;
1142 
1143   // The following flags disable parts of MSan instrumentation based on
1144   // exclusion list contents and command-line options.
1145   bool InsertChecks;
1146   bool PropagateShadow;
1147   bool PoisonStack;
1148   bool PoisonUndef;
1149 
1150   struct ShadowOriginAndInsertPoint {
1151     Value *Shadow;
1152     Value *Origin;
1153     Instruction *OrigIns;
1154 
1155     ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
1156         : Shadow(S), Origin(O), OrigIns(I) {}
1157   };
1158   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
1159   DenseMap<const DILocation *, int> LazyWarningDebugLocationCount;
1160   bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
1161   SmallSetVector<AllocaInst *, 16> AllocaSet;
1162   SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
1163   SmallVector<StoreInst *, 16> StoreList;
1164   int64_t SplittableBlocksCount = 0;
1165 
1166   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
1167                          const TargetLibraryInfo &TLI)
1168       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
1169     bool SanitizeFunction =
1170         F.hasFnAttribute(Attribute::SanitizeMemory) && !ClDisableChecks;
1171     InsertChecks = SanitizeFunction;
1172     PropagateShadow = SanitizeFunction;
1173     PoisonStack = SanitizeFunction && ClPoisonStack;
1174     PoisonUndef = SanitizeFunction && ClPoisonUndef;
1175 
1176     // In the presence of unreachable blocks, we may see Phi nodes with
1177     // incoming nodes from such blocks. Since InstVisitor skips unreachable
1178     // blocks, such nodes will not have any shadow value associated with them.
1179     // It's easier to remove unreachable blocks than deal with missing shadow.
1180     removeUnreachableBlocks(F);
1181 
1182     MS.initializeCallbacks(*F.getParent(), TLI);
1183     FnPrologueEnd = IRBuilder<>(F.getEntryBlock().getFirstNonPHI())
1184                         .CreateIntrinsic(Intrinsic::donothing, {}, {});
1185 
1186     if (MS.CompileKernel) {
1187       IRBuilder<> IRB(FnPrologueEnd);
1188       insertKmsanPrologue(IRB);
1189     }
1190 
1191     LLVM_DEBUG(if (!InsertChecks) dbgs()
1192                << "MemorySanitizer is not inserting checks into '"
1193                << F.getName() << "'\n");
1194   }
1195 
1196   bool instrumentWithCalls(Value *V) {
1197     // Constants likely will be eliminated by follow-up passes.
1198     if (isa<Constant>(V))
1199       return false;
1200 
1201     ++SplittableBlocksCount;
1202     return ClInstrumentationWithCallThreshold >= 0 &&
1203            SplittableBlocksCount > ClInstrumentationWithCallThreshold;
1204   }
1205 
1206   bool isInPrologue(Instruction &I) {
1207     return I.getParent() == FnPrologueEnd->getParent() &&
1208            (&I == FnPrologueEnd || I.comesBefore(FnPrologueEnd));
1209   }
1210 
1211   // Creates a new origin and records the stack trace. In general we can call
1212   // this function for any origin manipulation we like. However it will cost
1213   // runtime resources. So use this wisely only if it can provide additional
1214   // information helpful to a user.
1215   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
1216     if (MS.TrackOrigins <= 1)
1217       return V;
1218     return IRB.CreateCall(MS.MsanChainOriginFn, V);
1219   }
1220 
1221   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
1222     const DataLayout &DL = F.getDataLayout();
1223     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1224     if (IntptrSize == kOriginSize)
1225       return Origin;
1226     assert(IntptrSize == kOriginSize * 2);
1227     Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
1228     return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
1229   }
1230 
1231   /// Fill memory range with the given origin value.
1232   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
1233                    TypeSize TS, Align Alignment) {
1234     const DataLayout &DL = F.getDataLayout();
1235     const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy);
1236     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1237     assert(IntptrAlignment >= kMinOriginAlignment);
1238     assert(IntptrSize >= kOriginSize);
1239 
1240     // Note: The loop based formation works for fixed length vectors too,
1241     // however we prefer to unroll and specialize alignment below.
1242     if (TS.isScalable()) {
1243       Value *Size = IRB.CreateTypeSize(MS.IntptrTy, TS);
1244       Value *RoundUp =
1245           IRB.CreateAdd(Size, ConstantInt::get(MS.IntptrTy, kOriginSize - 1));
1246       Value *End =
1247           IRB.CreateUDiv(RoundUp, ConstantInt::get(MS.IntptrTy, kOriginSize));
1248       auto [InsertPt, Index] =
1249         SplitBlockAndInsertSimpleForLoop(End, &*IRB.GetInsertPoint());
1250       IRB.SetInsertPoint(InsertPt);
1251 
1252       Value *GEP = IRB.CreateGEP(MS.OriginTy, OriginPtr, Index);
1253       IRB.CreateAlignedStore(Origin, GEP, kMinOriginAlignment);
1254       return;
1255     }
1256 
1257     unsigned Size = TS.getFixedValue();
1258 
1259     unsigned Ofs = 0;
1260     Align CurrentAlignment = Alignment;
1261     if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
1262       Value *IntptrOrigin = originToIntptr(IRB, Origin);
1263       Value *IntptrOriginPtr =
1264           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
1265       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
1266         Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
1267                        : IntptrOriginPtr;
1268         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
1269         Ofs += IntptrSize / kOriginSize;
1270         CurrentAlignment = IntptrAlignment;
1271       }
1272     }
1273 
1274     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
1275       Value *GEP =
1276           i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
1277       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
1278       CurrentAlignment = kMinOriginAlignment;
1279     }
1280   }
1281 
1282   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
1283                    Value *OriginPtr, Align Alignment) {
1284     const DataLayout &DL = F.getDataLayout();
1285     const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1286     TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
1287     // ZExt cannot convert between vector and scalar
1288     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
1289     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1290       if (!ClCheckConstantShadow || ConstantShadow->isZeroValue()) {
1291         // Origin is not needed: value is initialized or const shadow is
1292         // ignored.
1293         return;
1294       }
1295       if (llvm::isKnownNonZero(ConvertedShadow, DL)) {
1296         // Copy origin as the value is definitely uninitialized.
1297         paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1298                     OriginAlignment);
1299         return;
1300       }
1301       // Fallback to runtime check, which still can be optimized out later.
1302     }
1303 
1304     TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1305     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1306     if (instrumentWithCalls(ConvertedShadow) &&
1307         SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1308       FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
1309       Value *ConvertedShadow2 =
1310           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1311       CallBase *CB = IRB.CreateCall(Fn, {ConvertedShadow2, Addr, Origin});
1312       CB->addParamAttr(0, Attribute::ZExt);
1313       CB->addParamAttr(2, Attribute::ZExt);
1314     } else {
1315       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1316       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1317           Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
1318       IRBuilder<> IRBNew(CheckTerm);
1319       paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
1320                   OriginAlignment);
1321     }
1322   }
1323 
1324   void materializeStores() {
1325     for (StoreInst *SI : StoreList) {
1326       IRBuilder<> IRB(SI);
1327       Value *Val = SI->getValueOperand();
1328       Value *Addr = SI->getPointerOperand();
1329       Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
1330       Value *ShadowPtr, *OriginPtr;
1331       Type *ShadowTy = Shadow->getType();
1332       const Align Alignment = SI->getAlign();
1333       const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1334       std::tie(ShadowPtr, OriginPtr) =
1335           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
1336 
1337       StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
1338       LLVM_DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
1339       (void)NewSI;
1340 
1341       if (SI->isAtomic())
1342         SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
1343 
1344       if (MS.TrackOrigins && !SI->isAtomic())
1345         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
1346                     OriginAlignment);
1347     }
1348   }
1349 
1350   // Returns true if Debug Location corresponds to multiple warnings.
1351   bool shouldDisambiguateWarningLocation(const DebugLoc &DebugLoc) {
1352     if (MS.TrackOrigins < 2)
1353       return false;
1354 
1355     if (LazyWarningDebugLocationCount.empty())
1356       for (const auto &I : InstrumentationList)
1357         ++LazyWarningDebugLocationCount[I.OrigIns->getDebugLoc()];
1358 
1359     return LazyWarningDebugLocationCount[DebugLoc] >= ClDisambiguateWarning;
1360   }
1361 
1362   /// Helper function to insert a warning at IRB's current insert point.
1363   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
1364     if (!Origin)
1365       Origin = (Value *)IRB.getInt32(0);
1366     assert(Origin->getType()->isIntegerTy());
1367 
1368     if (shouldDisambiguateWarningLocation(IRB.getCurrentDebugLocation())) {
1369       // Try to create additional origin with debug info of the last origin
1370       // instruction. It may provide additional information to the user.
1371       if (Instruction *OI = dyn_cast_or_null<Instruction>(Origin)) {
1372         assert(MS.TrackOrigins);
1373         auto NewDebugLoc = OI->getDebugLoc();
1374         // Origin update with missing or the same debug location provides no
1375         // additional value.
1376         if (NewDebugLoc && NewDebugLoc != IRB.getCurrentDebugLocation()) {
1377           // Insert update just before the check, so we call runtime only just
1378           // before the report.
1379           IRBuilder<> IRBOrigin(&*IRB.GetInsertPoint());
1380           IRBOrigin.SetCurrentDebugLocation(NewDebugLoc);
1381           Origin = updateOrigin(Origin, IRBOrigin);
1382         }
1383       }
1384     }
1385 
1386     if (MS.CompileKernel || MS.TrackOrigins)
1387       IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge();
1388     else
1389       IRB.CreateCall(MS.WarningFn)->setCannotMerge();
1390     // FIXME: Insert UnreachableInst if !MS.Recover?
1391     // This may invalidate some of the following checks and needs to be done
1392     // at the very end.
1393   }
1394 
1395   void materializeOneCheck(IRBuilder<> &IRB, Value *ConvertedShadow,
1396                            Value *Origin) {
1397     const DataLayout &DL = F.getDataLayout();
1398     TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1399     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1400     if (instrumentWithCalls(ConvertedShadow) &&
1401         SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1402       FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
1403       // ZExt cannot convert between vector and scalar
1404       ConvertedShadow = convertShadowToScalar(ConvertedShadow, IRB);
1405       Value *ConvertedShadow2 =
1406           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1407       CallBase *CB = IRB.CreateCall(
1408           Fn, {ConvertedShadow2,
1409                MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
1410       CB->addParamAttr(0, Attribute::ZExt);
1411       CB->addParamAttr(1, Attribute::ZExt);
1412     } else {
1413       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1414       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1415           Cmp, &*IRB.GetInsertPoint(),
1416           /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
1417 
1418       IRB.SetInsertPoint(CheckTerm);
1419       insertWarningFn(IRB, Origin);
1420       LLVM_DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
1421     }
1422   }
1423 
1424   void materializeInstructionChecks(
1425       ArrayRef<ShadowOriginAndInsertPoint> InstructionChecks) {
1426     const DataLayout &DL = F.getDataLayout();
1427     // Disable combining in some cases. TrackOrigins checks each shadow to pick
1428     // correct origin.
1429     bool Combine = !MS.TrackOrigins;
1430     Instruction *Instruction = InstructionChecks.front().OrigIns;
1431     Value *Shadow = nullptr;
1432     for (const auto &ShadowData : InstructionChecks) {
1433       assert(ShadowData.OrigIns == Instruction);
1434       IRBuilder<> IRB(Instruction);
1435 
1436       Value *ConvertedShadow = ShadowData.Shadow;
1437 
1438       if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1439         if (!ClCheckConstantShadow || ConstantShadow->isZeroValue()) {
1440           // Skip, value is initialized or const shadow is ignored.
1441           continue;
1442         }
1443         if (llvm::isKnownNonZero(ConvertedShadow, DL)) {
1444           // Report as the value is definitely uninitialized.
1445           insertWarningFn(IRB, ShadowData.Origin);
1446           if (!MS.Recover)
1447             return; // Always fail and stop here, not need to check the rest.
1448           // Skip entire instruction,
1449           continue;
1450         }
1451         // Fallback to runtime check, which still can be optimized out later.
1452       }
1453 
1454       if (!Combine) {
1455         materializeOneCheck(IRB, ConvertedShadow, ShadowData.Origin);
1456         continue;
1457       }
1458 
1459       if (!Shadow) {
1460         Shadow = ConvertedShadow;
1461         continue;
1462       }
1463 
1464       Shadow = convertToBool(Shadow, IRB, "_mscmp");
1465       ConvertedShadow = convertToBool(ConvertedShadow, IRB, "_mscmp");
1466       Shadow = IRB.CreateOr(Shadow, ConvertedShadow, "_msor");
1467     }
1468 
1469     if (Shadow) {
1470       assert(Combine);
1471       IRBuilder<> IRB(Instruction);
1472       materializeOneCheck(IRB, Shadow, nullptr);
1473     }
1474   }
1475 
1476   void materializeChecks() {
1477 #ifndef NDEBUG
1478     // For assert below.
1479     SmallPtrSet<Instruction *, 16> Done;
1480 #endif
1481 
1482     for (auto I = InstrumentationList.begin();
1483          I != InstrumentationList.end();) {
1484       auto OrigIns = I->OrigIns;
1485       // Checks are grouped by the original instruction. We call all
1486       // `insertShadowCheck` for an instruction at once.
1487       assert(Done.insert(OrigIns).second);
1488       auto J = std::find_if(I + 1, InstrumentationList.end(),
1489                             [OrigIns](const ShadowOriginAndInsertPoint &R) {
1490                               return OrigIns != R.OrigIns;
1491                             });
1492       // Process all checks of instruction at once.
1493       materializeInstructionChecks(ArrayRef<ShadowOriginAndInsertPoint>(I, J));
1494       I = J;
1495     }
1496 
1497     LLVM_DEBUG(dbgs() << "DONE:\n" << F);
1498   }
1499 
1500   // Returns the last instruction in the new prologue
1501   void insertKmsanPrologue(IRBuilder<> &IRB) {
1502     Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
1503     Constant *Zero = IRB.getInt32(0);
1504     MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1505                                 {Zero, IRB.getInt32(0)}, "param_shadow");
1506     MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1507                                  {Zero, IRB.getInt32(1)}, "retval_shadow");
1508     MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1509                                 {Zero, IRB.getInt32(2)}, "va_arg_shadow");
1510     MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1511                                       {Zero, IRB.getInt32(3)}, "va_arg_origin");
1512     MS.VAArgOverflowSizeTLS =
1513         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1514                       {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
1515     MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1516                                       {Zero, IRB.getInt32(5)}, "param_origin");
1517     MS.RetvalOriginTLS =
1518         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1519                       {Zero, IRB.getInt32(6)}, "retval_origin");
1520     if (MS.TargetTriple.getArch() == Triple::systemz)
1521       MS.MsanMetadataAlloca = IRB.CreateAlloca(MS.MsanMetadata, 0u);
1522   }
1523 
1524   /// Add MemorySanitizer instrumentation to a function.
1525   bool runOnFunction() {
1526     // Iterate all BBs in depth-first order and create shadow instructions
1527     // for all instructions (where applicable).
1528     // For PHI nodes we create dummy shadow PHIs which will be finalized later.
1529     for (BasicBlock *BB : depth_first(FnPrologueEnd->getParent()))
1530       visit(*BB);
1531 
1532     // `visit` above only collects instructions. Process them after iterating
1533     // CFG to avoid requirement on CFG transformations.
1534     for (Instruction *I : Instructions)
1535       InstVisitor<MemorySanitizerVisitor>::visit(*I);
1536 
1537     // Finalize PHI nodes.
1538     for (PHINode *PN : ShadowPHINodes) {
1539       PHINode *PNS = cast<PHINode>(getShadow(PN));
1540       PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
1541       size_t NumValues = PN->getNumIncomingValues();
1542       for (size_t v = 0; v < NumValues; v++) {
1543         PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
1544         if (PNO)
1545           PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
1546       }
1547     }
1548 
1549     VAHelper->finalizeInstrumentation();
1550 
1551     // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
1552     // instrumenting only allocas.
1553     if (InstrumentLifetimeStart) {
1554       for (auto Item : LifetimeStartList) {
1555         instrumentAlloca(*Item.second, Item.first);
1556         AllocaSet.remove(Item.second);
1557       }
1558     }
1559     // Poison the allocas for which we didn't instrument the corresponding
1560     // lifetime intrinsics.
1561     for (AllocaInst *AI : AllocaSet)
1562       instrumentAlloca(*AI);
1563 
1564     // Insert shadow value checks.
1565     materializeChecks();
1566 
1567     // Delayed instrumentation of StoreInst.
1568     // This may not add new address checks.
1569     materializeStores();
1570 
1571     return true;
1572   }
1573 
1574   /// Compute the shadow type that corresponds to a given Value.
1575   Type *getShadowTy(Value *V) { return getShadowTy(V->getType()); }
1576 
1577   /// Compute the shadow type that corresponds to a given Type.
1578   Type *getShadowTy(Type *OrigTy) {
1579     if (!OrigTy->isSized()) {
1580       return nullptr;
1581     }
1582     // For integer type, shadow is the same as the original type.
1583     // This may return weird-sized types like i1.
1584     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
1585       return IT;
1586     const DataLayout &DL = F.getDataLayout();
1587     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
1588       uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
1589       return VectorType::get(IntegerType::get(*MS.C, EltSize),
1590                              VT->getElementCount());
1591     }
1592     if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
1593       return ArrayType::get(getShadowTy(AT->getElementType()),
1594                             AT->getNumElements());
1595     }
1596     if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1597       SmallVector<Type *, 4> Elements;
1598       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1599         Elements.push_back(getShadowTy(ST->getElementType(i)));
1600       StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
1601       LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1602       return Res;
1603     }
1604     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
1605     return IntegerType::get(*MS.C, TypeSize);
1606   }
1607 
1608   /// Extract combined shadow of struct elements as a bool
1609   Value *collapseStructShadow(StructType *Struct, Value *Shadow,
1610                               IRBuilder<> &IRB) {
1611     Value *FalseVal = IRB.getIntN(/* width */ 1, /* value */ 0);
1612     Value *Aggregator = FalseVal;
1613 
1614     for (unsigned Idx = 0; Idx < Struct->getNumElements(); Idx++) {
1615       // Combine by ORing together each element's bool shadow
1616       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1617       Value *ShadowBool = convertToBool(ShadowItem, IRB);
1618 
1619       if (Aggregator != FalseVal)
1620         Aggregator = IRB.CreateOr(Aggregator, ShadowBool);
1621       else
1622         Aggregator = ShadowBool;
1623     }
1624 
1625     return Aggregator;
1626   }
1627 
1628   // Extract combined shadow of array elements
1629   Value *collapseArrayShadow(ArrayType *Array, Value *Shadow,
1630                              IRBuilder<> &IRB) {
1631     if (!Array->getNumElements())
1632       return IRB.getIntN(/* width */ 1, /* value */ 0);
1633 
1634     Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
1635     Value *Aggregator = convertShadowToScalar(FirstItem, IRB);
1636 
1637     for (unsigned Idx = 1; Idx < Array->getNumElements(); Idx++) {
1638       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1639       Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
1640       Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
1641     }
1642     return Aggregator;
1643   }
1644 
1645   /// Convert a shadow value to it's flattened variant. The resulting
1646   /// shadow may not necessarily have the same bit width as the input
1647   /// value, but it will always be comparable to zero.
1648   Value *convertShadowToScalar(Value *V, IRBuilder<> &IRB) {
1649     if (StructType *Struct = dyn_cast<StructType>(V->getType()))
1650       return collapseStructShadow(Struct, V, IRB);
1651     if (ArrayType *Array = dyn_cast<ArrayType>(V->getType()))
1652       return collapseArrayShadow(Array, V, IRB);
1653     if (isa<VectorType>(V->getType())) {
1654       if (isa<ScalableVectorType>(V->getType()))
1655         return convertShadowToScalar(IRB.CreateOrReduce(V), IRB);
1656       unsigned BitWidth =
1657         V->getType()->getPrimitiveSizeInBits().getFixedValue();
1658       return IRB.CreateBitCast(V, IntegerType::get(*MS.C, BitWidth));
1659     }
1660     return V;
1661   }
1662 
1663   // Convert a scalar value to an i1 by comparing with 0
1664   Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &name = "") {
1665     Type *VTy = V->getType();
1666     if (!VTy->isIntegerTy())
1667       return convertToBool(convertShadowToScalar(V, IRB), IRB, name);
1668     if (VTy->getIntegerBitWidth() == 1)
1669       // Just converting a bool to a bool, so do nothing.
1670       return V;
1671     return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), name);
1672   }
1673 
1674   Type *ptrToIntPtrType(Type *PtrTy) const {
1675     if (VectorType *VectTy = dyn_cast<VectorType>(PtrTy)) {
1676       return VectorType::get(ptrToIntPtrType(VectTy->getElementType()),
1677                              VectTy->getElementCount());
1678     }
1679     assert(PtrTy->isIntOrPtrTy());
1680     return MS.IntptrTy;
1681   }
1682 
1683   Type *getPtrToShadowPtrType(Type *IntPtrTy, Type *ShadowTy) const {
1684     if (VectorType *VectTy = dyn_cast<VectorType>(IntPtrTy)) {
1685       return VectorType::get(
1686           getPtrToShadowPtrType(VectTy->getElementType(), ShadowTy),
1687           VectTy->getElementCount());
1688     }
1689     assert(IntPtrTy == MS.IntptrTy);
1690     return PointerType::get(*MS.C, 0);
1691   }
1692 
1693   Constant *constToIntPtr(Type *IntPtrTy, uint64_t C) const {
1694     if (VectorType *VectTy = dyn_cast<VectorType>(IntPtrTy)) {
1695       return ConstantVector::getSplat(
1696           VectTy->getElementCount(), constToIntPtr(VectTy->getElementType(), C));
1697     }
1698     assert(IntPtrTy == MS.IntptrTy);
1699     return ConstantInt::get(MS.IntptrTy, C);
1700   }
1701 
1702   /// Compute the integer shadow offset that corresponds to a given
1703   /// application address.
1704   ///
1705   /// Offset = (Addr & ~AndMask) ^ XorMask
1706   /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1707   /// a single pointee.
1708   /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1709   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1710     Type *IntptrTy = ptrToIntPtrType(Addr->getType());
1711     Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy);
1712 
1713     if (uint64_t AndMask = MS.MapParams->AndMask)
1714       OffsetLong = IRB.CreateAnd(OffsetLong, constToIntPtr(IntptrTy, ~AndMask));
1715 
1716     if (uint64_t XorMask = MS.MapParams->XorMask)
1717       OffsetLong = IRB.CreateXor(OffsetLong, constToIntPtr(IntptrTy, XorMask));
1718     return OffsetLong;
1719   }
1720 
1721   /// Compute the shadow and origin addresses corresponding to a given
1722   /// application address.
1723   ///
1724   /// Shadow = ShadowBase + Offset
1725   /// Origin = (OriginBase + Offset) & ~3ULL
1726   /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1727   /// a single pointee.
1728   /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1729   std::pair<Value *, Value *>
1730   getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
1731                               MaybeAlign Alignment) {
1732     VectorType *VectTy = dyn_cast<VectorType>(Addr->getType());
1733     if (!VectTy) {
1734       assert(Addr->getType()->isPointerTy());
1735     } else {
1736       assert(VectTy->getElementType()->isPointerTy());
1737     }
1738     Type *IntptrTy = ptrToIntPtrType(Addr->getType());
1739     Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1740     Value *ShadowLong = ShadowOffset;
1741     if (uint64_t ShadowBase = MS.MapParams->ShadowBase) {
1742       ShadowLong =
1743           IRB.CreateAdd(ShadowLong, constToIntPtr(IntptrTy, ShadowBase));
1744     }
1745     Value *ShadowPtr = IRB.CreateIntToPtr(
1746         ShadowLong, getPtrToShadowPtrType(IntptrTy, ShadowTy));
1747 
1748     Value *OriginPtr = nullptr;
1749     if (MS.TrackOrigins) {
1750       Value *OriginLong = ShadowOffset;
1751       uint64_t OriginBase = MS.MapParams->OriginBase;
1752       if (OriginBase != 0)
1753         OriginLong =
1754             IRB.CreateAdd(OriginLong, constToIntPtr(IntptrTy, OriginBase));
1755       if (!Alignment || *Alignment < kMinOriginAlignment) {
1756         uint64_t Mask = kMinOriginAlignment.value() - 1;
1757         OriginLong = IRB.CreateAnd(OriginLong, constToIntPtr(IntptrTy, ~Mask));
1758       }
1759       OriginPtr = IRB.CreateIntToPtr(
1760           OriginLong, getPtrToShadowPtrType(IntptrTy, MS.OriginTy));
1761     }
1762     return std::make_pair(ShadowPtr, OriginPtr);
1763   }
1764 
1765   template <typename... ArgsTy>
1766   Value *createMetadataCall(IRBuilder<> &IRB, FunctionCallee Callee,
1767                             ArgsTy... Args) {
1768     if (MS.TargetTriple.getArch() == Triple::systemz) {
1769       IRB.CreateCall(Callee,
1770                      {MS.MsanMetadataAlloca, std::forward<ArgsTy>(Args)...});
1771       return IRB.CreateLoad(MS.MsanMetadata, MS.MsanMetadataAlloca);
1772     }
1773 
1774     return IRB.CreateCall(Callee, {std::forward<ArgsTy>(Args)...});
1775   }
1776 
1777   std::pair<Value *, Value *> getShadowOriginPtrKernelNoVec(Value *Addr,
1778                                                             IRBuilder<> &IRB,
1779                                                             Type *ShadowTy,
1780                                                             bool isStore) {
1781     Value *ShadowOriginPtrs;
1782     const DataLayout &DL = F.getDataLayout();
1783     TypeSize Size = DL.getTypeStoreSize(ShadowTy);
1784 
1785     FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
1786     Value *AddrCast =
1787         IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
1788     if (Getter) {
1789       ShadowOriginPtrs = createMetadataCall(IRB, Getter, AddrCast);
1790     } else {
1791       Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
1792       ShadowOriginPtrs = createMetadataCall(
1793           IRB,
1794           isStore ? MS.MsanMetadataPtrForStoreN : MS.MsanMetadataPtrForLoadN,
1795           AddrCast, SizeVal);
1796     }
1797     Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
1798     ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
1799     Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
1800 
1801     return std::make_pair(ShadowPtr, OriginPtr);
1802   }
1803 
1804   /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1805   /// a single pointee.
1806   /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1807   std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
1808                                                        IRBuilder<> &IRB,
1809                                                        Type *ShadowTy,
1810                                                        bool isStore) {
1811     VectorType *VectTy = dyn_cast<VectorType>(Addr->getType());
1812     if (!VectTy) {
1813       assert(Addr->getType()->isPointerTy());
1814       return getShadowOriginPtrKernelNoVec(Addr, IRB, ShadowTy, isStore);
1815     }
1816 
1817     // TODO: Support callbacs with vectors of addresses.
1818     unsigned NumElements = cast<FixedVectorType>(VectTy)->getNumElements();
1819     Value *ShadowPtrs = ConstantInt::getNullValue(
1820         FixedVectorType::get(IRB.getPtrTy(), NumElements));
1821     Value *OriginPtrs = nullptr;
1822     if (MS.TrackOrigins)
1823       OriginPtrs = ConstantInt::getNullValue(
1824           FixedVectorType::get(IRB.getPtrTy(), NumElements));
1825     for (unsigned i = 0; i < NumElements; ++i) {
1826       Value *OneAddr =
1827           IRB.CreateExtractElement(Addr, ConstantInt::get(IRB.getInt32Ty(), i));
1828       auto [ShadowPtr, OriginPtr] =
1829           getShadowOriginPtrKernelNoVec(OneAddr, IRB, ShadowTy, isStore);
1830 
1831       ShadowPtrs = IRB.CreateInsertElement(
1832           ShadowPtrs, ShadowPtr, ConstantInt::get(IRB.getInt32Ty(), i));
1833       if (MS.TrackOrigins)
1834         OriginPtrs = IRB.CreateInsertElement(
1835             OriginPtrs, OriginPtr, ConstantInt::get(IRB.getInt32Ty(), i));
1836     }
1837     return {ShadowPtrs, OriginPtrs};
1838   }
1839 
1840   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1841                                                  Type *ShadowTy,
1842                                                  MaybeAlign Alignment,
1843                                                  bool isStore) {
1844     if (MS.CompileKernel)
1845       return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
1846     return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
1847   }
1848 
1849   /// Compute the shadow address for a given function argument.
1850   ///
1851   /// Shadow = ParamTLS+ArgOffset.
1852   Value *getShadowPtrForArgument(IRBuilder<> &IRB, int ArgOffset) {
1853     Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
1854     if (ArgOffset)
1855       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1856     return IRB.CreateIntToPtr(Base, IRB.getPtrTy(0), "_msarg");
1857   }
1858 
1859   /// Compute the origin address for a given function argument.
1860   Value *getOriginPtrForArgument(IRBuilder<> &IRB, int ArgOffset) {
1861     if (!MS.TrackOrigins)
1862       return nullptr;
1863     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
1864     if (ArgOffset)
1865       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1866     return IRB.CreateIntToPtr(Base, IRB.getPtrTy(0), "_msarg_o");
1867   }
1868 
1869   /// Compute the shadow address for a retval.
1870   Value *getShadowPtrForRetval(IRBuilder<> &IRB) {
1871     return IRB.CreatePointerCast(MS.RetvalTLS, IRB.getPtrTy(0), "_msret");
1872   }
1873 
1874   /// Compute the origin address for a retval.
1875   Value *getOriginPtrForRetval() {
1876     // We keep a single origin for the entire retval. Might be too optimistic.
1877     return MS.RetvalOriginTLS;
1878   }
1879 
1880   /// Set SV to be the shadow value for V.
1881   void setShadow(Value *V, Value *SV) {
1882     assert(!ShadowMap.count(V) && "Values may only have one shadow");
1883     ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
1884   }
1885 
1886   /// Set Origin to be the origin value for V.
1887   void setOrigin(Value *V, Value *Origin) {
1888     if (!MS.TrackOrigins)
1889       return;
1890     assert(!OriginMap.count(V) && "Values may only have one origin");
1891     LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
1892     OriginMap[V] = Origin;
1893   }
1894 
1895   Constant *getCleanShadow(Type *OrigTy) {
1896     Type *ShadowTy = getShadowTy(OrigTy);
1897     if (!ShadowTy)
1898       return nullptr;
1899     return Constant::getNullValue(ShadowTy);
1900   }
1901 
1902   /// Create a clean shadow value for a given value.
1903   ///
1904   /// Clean shadow (all zeroes) means all bits of the value are defined
1905   /// (initialized).
1906   Constant *getCleanShadow(Value *V) { return getCleanShadow(V->getType()); }
1907 
1908   /// Create a dirty shadow of a given shadow type.
1909   Constant *getPoisonedShadow(Type *ShadowTy) {
1910     assert(ShadowTy);
1911     if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
1912       return Constant::getAllOnesValue(ShadowTy);
1913     if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
1914       SmallVector<Constant *, 4> Vals(AT->getNumElements(),
1915                                       getPoisonedShadow(AT->getElementType()));
1916       return ConstantArray::get(AT, Vals);
1917     }
1918     if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
1919       SmallVector<Constant *, 4> Vals;
1920       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1921         Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
1922       return ConstantStruct::get(ST, Vals);
1923     }
1924     llvm_unreachable("Unexpected shadow type");
1925   }
1926 
1927   /// Create a dirty shadow for a given value.
1928   Constant *getPoisonedShadow(Value *V) {
1929     Type *ShadowTy = getShadowTy(V);
1930     if (!ShadowTy)
1931       return nullptr;
1932     return getPoisonedShadow(ShadowTy);
1933   }
1934 
1935   /// Create a clean (zero) origin.
1936   Value *getCleanOrigin() { return Constant::getNullValue(MS.OriginTy); }
1937 
1938   /// Get the shadow value for a given Value.
1939   ///
1940   /// This function either returns the value set earlier with setShadow,
1941   /// or extracts if from ParamTLS (for function arguments).
1942   Value *getShadow(Value *V) {
1943     if (Instruction *I = dyn_cast<Instruction>(V)) {
1944       if (!PropagateShadow || I->getMetadata(LLVMContext::MD_nosanitize))
1945         return getCleanShadow(V);
1946       // For instructions the shadow is already stored in the map.
1947       Value *Shadow = ShadowMap[V];
1948       if (!Shadow) {
1949         LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
1950         (void)I;
1951         assert(Shadow && "No shadow for a value");
1952       }
1953       return Shadow;
1954     }
1955     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
1956       Value *AllOnes = (PropagateShadow && PoisonUndef) ? getPoisonedShadow(V)
1957                                                         : getCleanShadow(V);
1958       LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
1959       (void)U;
1960       return AllOnes;
1961     }
1962     if (Argument *A = dyn_cast<Argument>(V)) {
1963       // For arguments we compute the shadow on demand and store it in the map.
1964       Value *&ShadowPtr = ShadowMap[V];
1965       if (ShadowPtr)
1966         return ShadowPtr;
1967       Function *F = A->getParent();
1968       IRBuilder<> EntryIRB(FnPrologueEnd);
1969       unsigned ArgOffset = 0;
1970       const DataLayout &DL = F->getDataLayout();
1971       for (auto &FArg : F->args()) {
1972         if (!FArg.getType()->isSized() || FArg.getType()->isScalableTy()) {
1973           LLVM_DEBUG(dbgs() << (FArg.getType()->isScalableTy()
1974                                     ? "vscale not fully supported\n"
1975                                     : "Arg is not sized\n"));
1976           if (A == &FArg) {
1977             ShadowPtr = getCleanShadow(V);
1978             setOrigin(A, getCleanOrigin());
1979             break;
1980           }
1981           continue;
1982         }
1983 
1984         unsigned Size = FArg.hasByValAttr()
1985                             ? DL.getTypeAllocSize(FArg.getParamByValType())
1986                             : DL.getTypeAllocSize(FArg.getType());
1987 
1988         if (A == &FArg) {
1989           bool Overflow = ArgOffset + Size > kParamTLSSize;
1990           if (FArg.hasByValAttr()) {
1991             // ByVal pointer itself has clean shadow. We copy the actual
1992             // argument shadow to the underlying memory.
1993             // Figure out maximal valid memcpy alignment.
1994             const Align ArgAlign = DL.getValueOrABITypeAlignment(
1995                 FArg.getParamAlign(), FArg.getParamByValType());
1996             Value *CpShadowPtr, *CpOriginPtr;
1997             std::tie(CpShadowPtr, CpOriginPtr) =
1998                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
1999                                    /*isStore*/ true);
2000             if (!PropagateShadow || Overflow) {
2001               // ParamTLS overflow.
2002               EntryIRB.CreateMemSet(
2003                   CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
2004                   Size, ArgAlign);
2005             } else {
2006               Value *Base = getShadowPtrForArgument(EntryIRB, ArgOffset);
2007               const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
2008               Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
2009                                                  CopyAlign, Size);
2010               LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
2011               (void)Cpy;
2012 
2013               if (MS.TrackOrigins) {
2014                 Value *OriginPtr =
2015                     getOriginPtrForArgument(EntryIRB, ArgOffset);
2016                 // FIXME: OriginSize should be:
2017                 // alignTo(V % kMinOriginAlignment + Size, kMinOriginAlignment)
2018                 unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
2019                 EntryIRB.CreateMemCpy(
2020                     CpOriginPtr,
2021                     /* by getShadowOriginPtr */ kMinOriginAlignment, OriginPtr,
2022                     /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
2023                     OriginSize);
2024               }
2025             }
2026           }
2027 
2028           if (!PropagateShadow || Overflow || FArg.hasByValAttr() ||
2029               (MS.EagerChecks && FArg.hasAttribute(Attribute::NoUndef))) {
2030             ShadowPtr = getCleanShadow(V);
2031             setOrigin(A, getCleanOrigin());
2032           } else {
2033             // Shadow over TLS
2034             Value *Base = getShadowPtrForArgument(EntryIRB, ArgOffset);
2035             ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
2036                                                    kShadowTLSAlignment);
2037             if (MS.TrackOrigins) {
2038               Value *OriginPtr =
2039                   getOriginPtrForArgument(EntryIRB, ArgOffset);
2040               setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
2041             }
2042           }
2043           LLVM_DEBUG(dbgs()
2044                      << "  ARG:    " << FArg << " ==> " << *ShadowPtr << "\n");
2045           break;
2046         }
2047 
2048         ArgOffset += alignTo(Size, kShadowTLSAlignment);
2049       }
2050       assert(ShadowPtr && "Could not find shadow for an argument");
2051       return ShadowPtr;
2052     }
2053     // For everything else the shadow is zero.
2054     return getCleanShadow(V);
2055   }
2056 
2057   /// Get the shadow for i-th argument of the instruction I.
2058   Value *getShadow(Instruction *I, int i) {
2059     return getShadow(I->getOperand(i));
2060   }
2061 
2062   /// Get the origin for a value.
2063   Value *getOrigin(Value *V) {
2064     if (!MS.TrackOrigins)
2065       return nullptr;
2066     if (!PropagateShadow || isa<Constant>(V) || isa<InlineAsm>(V))
2067       return getCleanOrigin();
2068     assert((isa<Instruction>(V) || isa<Argument>(V)) &&
2069            "Unexpected value type in getOrigin()");
2070     if (Instruction *I = dyn_cast<Instruction>(V)) {
2071       if (I->getMetadata(LLVMContext::MD_nosanitize))
2072         return getCleanOrigin();
2073     }
2074     Value *Origin = OriginMap[V];
2075     assert(Origin && "Missing origin");
2076     return Origin;
2077   }
2078 
2079   /// Get the origin for i-th argument of the instruction I.
2080   Value *getOrigin(Instruction *I, int i) {
2081     return getOrigin(I->getOperand(i));
2082   }
2083 
2084   /// Remember the place where a shadow check should be inserted.
2085   ///
2086   /// This location will be later instrumented with a check that will print a
2087   /// UMR warning in runtime if the shadow value is not 0.
2088   void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
2089     assert(Shadow);
2090     if (!InsertChecks)
2091       return;
2092 
2093     if (!DebugCounter::shouldExecute(DebugInsertCheck)) {
2094       LLVM_DEBUG(dbgs() << "Skipping check of " << *Shadow << " before "
2095                         << *OrigIns << "\n");
2096       return;
2097     }
2098 #ifndef NDEBUG
2099     Type *ShadowTy = Shadow->getType();
2100     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
2101             isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
2102            "Can only insert checks for integer, vector, and aggregate shadow "
2103            "types");
2104 #endif
2105     InstrumentationList.push_back(
2106         ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
2107   }
2108 
2109   /// Remember the place where a shadow check should be inserted.
2110   ///
2111   /// This location will be later instrumented with a check that will print a
2112   /// UMR warning in runtime if the value is not fully defined.
2113   void insertShadowCheck(Value *Val, Instruction *OrigIns) {
2114     assert(Val);
2115     Value *Shadow, *Origin;
2116     if (ClCheckConstantShadow) {
2117       Shadow = getShadow(Val);
2118       if (!Shadow)
2119         return;
2120       Origin = getOrigin(Val);
2121     } else {
2122       Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
2123       if (!Shadow)
2124         return;
2125       Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
2126     }
2127     insertShadowCheck(Shadow, Origin, OrigIns);
2128   }
2129 
2130   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
2131     switch (a) {
2132     case AtomicOrdering::NotAtomic:
2133       return AtomicOrdering::NotAtomic;
2134     case AtomicOrdering::Unordered:
2135     case AtomicOrdering::Monotonic:
2136     case AtomicOrdering::Release:
2137       return AtomicOrdering::Release;
2138     case AtomicOrdering::Acquire:
2139     case AtomicOrdering::AcquireRelease:
2140       return AtomicOrdering::AcquireRelease;
2141     case AtomicOrdering::SequentiallyConsistent:
2142       return AtomicOrdering::SequentiallyConsistent;
2143     }
2144     llvm_unreachable("Unknown ordering");
2145   }
2146 
2147   Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
2148     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
2149     uint32_t OrderingTable[NumOrderings] = {};
2150 
2151     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
2152         OrderingTable[(int)AtomicOrderingCABI::release] =
2153             (int)AtomicOrderingCABI::release;
2154     OrderingTable[(int)AtomicOrderingCABI::consume] =
2155         OrderingTable[(int)AtomicOrderingCABI::acquire] =
2156             OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
2157                 (int)AtomicOrderingCABI::acq_rel;
2158     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
2159         (int)AtomicOrderingCABI::seq_cst;
2160 
2161     return ConstantDataVector::get(IRB.getContext(), OrderingTable);
2162   }
2163 
2164   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
2165     switch (a) {
2166     case AtomicOrdering::NotAtomic:
2167       return AtomicOrdering::NotAtomic;
2168     case AtomicOrdering::Unordered:
2169     case AtomicOrdering::Monotonic:
2170     case AtomicOrdering::Acquire:
2171       return AtomicOrdering::Acquire;
2172     case AtomicOrdering::Release:
2173     case AtomicOrdering::AcquireRelease:
2174       return AtomicOrdering::AcquireRelease;
2175     case AtomicOrdering::SequentiallyConsistent:
2176       return AtomicOrdering::SequentiallyConsistent;
2177     }
2178     llvm_unreachable("Unknown ordering");
2179   }
2180 
2181   Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
2182     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
2183     uint32_t OrderingTable[NumOrderings] = {};
2184 
2185     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
2186         OrderingTable[(int)AtomicOrderingCABI::acquire] =
2187             OrderingTable[(int)AtomicOrderingCABI::consume] =
2188                 (int)AtomicOrderingCABI::acquire;
2189     OrderingTable[(int)AtomicOrderingCABI::release] =
2190         OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
2191             (int)AtomicOrderingCABI::acq_rel;
2192     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
2193         (int)AtomicOrderingCABI::seq_cst;
2194 
2195     return ConstantDataVector::get(IRB.getContext(), OrderingTable);
2196   }
2197 
2198   // ------------------- Visitors.
2199   using InstVisitor<MemorySanitizerVisitor>::visit;
2200   void visit(Instruction &I) {
2201     if (I.getMetadata(LLVMContext::MD_nosanitize))
2202       return;
2203     // Don't want to visit if we're in the prologue
2204     if (isInPrologue(I))
2205       return;
2206     if (!DebugCounter::shouldExecute(DebugInstrumentInstruction)) {
2207       LLVM_DEBUG(dbgs() << "Skipping instruction: " << I << "\n");
2208       // We still need to set the shadow and origin to clean values.
2209       setShadow(&I, getCleanShadow(&I));
2210       setOrigin(&I, getCleanOrigin());
2211       return;
2212     }
2213 
2214     Instructions.push_back(&I);
2215   }
2216 
2217   /// Instrument LoadInst
2218   ///
2219   /// Loads the corresponding shadow and (optionally) origin.
2220   /// Optionally, checks that the load address is fully defined.
2221   void visitLoadInst(LoadInst &I) {
2222     assert(I.getType()->isSized() && "Load type must have size");
2223     assert(!I.getMetadata(LLVMContext::MD_nosanitize));
2224     NextNodeIRBuilder IRB(&I);
2225     Type *ShadowTy = getShadowTy(&I);
2226     Value *Addr = I.getPointerOperand();
2227     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2228     const Align Alignment = I.getAlign();
2229     if (PropagateShadow) {
2230       std::tie(ShadowPtr, OriginPtr) =
2231           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2232       setShadow(&I,
2233                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2234     } else {
2235       setShadow(&I, getCleanShadow(&I));
2236     }
2237 
2238     if (ClCheckAccessAddress)
2239       insertShadowCheck(I.getPointerOperand(), &I);
2240 
2241     if (I.isAtomic())
2242       I.setOrdering(addAcquireOrdering(I.getOrdering()));
2243 
2244     if (MS.TrackOrigins) {
2245       if (PropagateShadow) {
2246         const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
2247         setOrigin(
2248             &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
2249       } else {
2250         setOrigin(&I, getCleanOrigin());
2251       }
2252     }
2253   }
2254 
2255   /// Instrument StoreInst
2256   ///
2257   /// Stores the corresponding shadow and (optionally) origin.
2258   /// Optionally, checks that the store address is fully defined.
2259   void visitStoreInst(StoreInst &I) {
2260     StoreList.push_back(&I);
2261     if (ClCheckAccessAddress)
2262       insertShadowCheck(I.getPointerOperand(), &I);
2263   }
2264 
2265   void handleCASOrRMW(Instruction &I) {
2266     assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
2267 
2268     IRBuilder<> IRB(&I);
2269     Value *Addr = I.getOperand(0);
2270     Value *Val = I.getOperand(1);
2271     Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, getShadowTy(Val), Align(1),
2272                                           /*isStore*/ true)
2273                            .first;
2274 
2275     if (ClCheckAccessAddress)
2276       insertShadowCheck(Addr, &I);
2277 
2278     // Only test the conditional argument of cmpxchg instruction.
2279     // The other argument can potentially be uninitialized, but we can not
2280     // detect this situation reliably without possible false positives.
2281     if (isa<AtomicCmpXchgInst>(I))
2282       insertShadowCheck(Val, &I);
2283 
2284     IRB.CreateStore(getCleanShadow(Val), ShadowPtr);
2285 
2286     setShadow(&I, getCleanShadow(&I));
2287     setOrigin(&I, getCleanOrigin());
2288   }
2289 
2290   void visitAtomicRMWInst(AtomicRMWInst &I) {
2291     handleCASOrRMW(I);
2292     I.setOrdering(addReleaseOrdering(I.getOrdering()));
2293   }
2294 
2295   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2296     handleCASOrRMW(I);
2297     I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
2298   }
2299 
2300   // Vector manipulation.
2301   void visitExtractElementInst(ExtractElementInst &I) {
2302     insertShadowCheck(I.getOperand(1), &I);
2303     IRBuilder<> IRB(&I);
2304     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
2305                                            "_msprop"));
2306     setOrigin(&I, getOrigin(&I, 0));
2307   }
2308 
2309   void visitInsertElementInst(InsertElementInst &I) {
2310     insertShadowCheck(I.getOperand(2), &I);
2311     IRBuilder<> IRB(&I);
2312     auto *Shadow0 = getShadow(&I, 0);
2313     auto *Shadow1 = getShadow(&I, 1);
2314     setShadow(&I, IRB.CreateInsertElement(Shadow0, Shadow1, I.getOperand(2),
2315                                           "_msprop"));
2316     setOriginForNaryOp(I);
2317   }
2318 
2319   void visitShuffleVectorInst(ShuffleVectorInst &I) {
2320     IRBuilder<> IRB(&I);
2321     auto *Shadow0 = getShadow(&I, 0);
2322     auto *Shadow1 = getShadow(&I, 1);
2323     setShadow(&I, IRB.CreateShuffleVector(Shadow0, Shadow1, I.getShuffleMask(),
2324                                           "_msprop"));
2325     setOriginForNaryOp(I);
2326   }
2327 
2328   // Casts.
2329   void visitSExtInst(SExtInst &I) {
2330     IRBuilder<> IRB(&I);
2331     setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
2332     setOrigin(&I, getOrigin(&I, 0));
2333   }
2334 
2335   void visitZExtInst(ZExtInst &I) {
2336     IRBuilder<> IRB(&I);
2337     setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
2338     setOrigin(&I, getOrigin(&I, 0));
2339   }
2340 
2341   void visitTruncInst(TruncInst &I) {
2342     IRBuilder<> IRB(&I);
2343     setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
2344     setOrigin(&I, getOrigin(&I, 0));
2345   }
2346 
2347   void visitBitCastInst(BitCastInst &I) {
2348     // Special case: if this is the bitcast (there is exactly 1 allowed) between
2349     // a musttail call and a ret, don't instrument. New instructions are not
2350     // allowed after a musttail call.
2351     if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
2352       if (CI->isMustTailCall())
2353         return;
2354     IRBuilder<> IRB(&I);
2355     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
2356     setOrigin(&I, getOrigin(&I, 0));
2357   }
2358 
2359   void visitPtrToIntInst(PtrToIntInst &I) {
2360     IRBuilder<> IRB(&I);
2361     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2362                                     "_msprop_ptrtoint"));
2363     setOrigin(&I, getOrigin(&I, 0));
2364   }
2365 
2366   void visitIntToPtrInst(IntToPtrInst &I) {
2367     IRBuilder<> IRB(&I);
2368     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2369                                     "_msprop_inttoptr"));
2370     setOrigin(&I, getOrigin(&I, 0));
2371   }
2372 
2373   void visitFPToSIInst(CastInst &I) { handleShadowOr(I); }
2374   void visitFPToUIInst(CastInst &I) { handleShadowOr(I); }
2375   void visitSIToFPInst(CastInst &I) { handleShadowOr(I); }
2376   void visitUIToFPInst(CastInst &I) { handleShadowOr(I); }
2377   void visitFPExtInst(CastInst &I) { handleShadowOr(I); }
2378   void visitFPTruncInst(CastInst &I) { handleShadowOr(I); }
2379 
2380   /// Propagate shadow for bitwise AND.
2381   ///
2382   /// This code is exact, i.e. if, for example, a bit in the left argument
2383   /// is defined and 0, then neither the value not definedness of the
2384   /// corresponding bit in B don't affect the resulting shadow.
2385   void visitAnd(BinaryOperator &I) {
2386     IRBuilder<> IRB(&I);
2387     //  "And" of 0 and a poisoned value results in unpoisoned value.
2388     //  1&1 => 1;     0&1 => 0;     p&1 => p;
2389     //  1&0 => 0;     0&0 => 0;     p&0 => 0;
2390     //  1&p => p;     0&p => 0;     p&p => p;
2391     //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
2392     Value *S1 = getShadow(&I, 0);
2393     Value *S2 = getShadow(&I, 1);
2394     Value *V1 = I.getOperand(0);
2395     Value *V2 = I.getOperand(1);
2396     if (V1->getType() != S1->getType()) {
2397       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2398       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2399     }
2400     Value *S1S2 = IRB.CreateAnd(S1, S2);
2401     Value *V1S2 = IRB.CreateAnd(V1, S2);
2402     Value *S1V2 = IRB.CreateAnd(S1, V2);
2403     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2404     setOriginForNaryOp(I);
2405   }
2406 
2407   void visitOr(BinaryOperator &I) {
2408     IRBuilder<> IRB(&I);
2409     //  "Or" of 1 and a poisoned value results in unpoisoned value.
2410     //  1|1 => 1;     0|1 => 1;     p|1 => 1;
2411     //  1|0 => 1;     0|0 => 0;     p|0 => p;
2412     //  1|p => 1;     0|p => p;     p|p => p;
2413     //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
2414     Value *S1 = getShadow(&I, 0);
2415     Value *S2 = getShadow(&I, 1);
2416     Value *V1 = IRB.CreateNot(I.getOperand(0));
2417     Value *V2 = IRB.CreateNot(I.getOperand(1));
2418     if (V1->getType() != S1->getType()) {
2419       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2420       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2421     }
2422     Value *S1S2 = IRB.CreateAnd(S1, S2);
2423     Value *V1S2 = IRB.CreateAnd(V1, S2);
2424     Value *S1V2 = IRB.CreateAnd(S1, V2);
2425     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2426     setOriginForNaryOp(I);
2427   }
2428 
2429   /// Default propagation of shadow and/or origin.
2430   ///
2431   /// This class implements the general case of shadow propagation, used in all
2432   /// cases where we don't know and/or don't care about what the operation
2433   /// actually does. It converts all input shadow values to a common type
2434   /// (extending or truncating as necessary), and bitwise OR's them.
2435   ///
2436   /// This is much cheaper than inserting checks (i.e. requiring inputs to be
2437   /// fully initialized), and less prone to false positives.
2438   ///
2439   /// This class also implements the general case of origin propagation. For a
2440   /// Nary operation, result origin is set to the origin of an argument that is
2441   /// not entirely initialized. If there is more than one such arguments, the
2442   /// rightmost of them is picked. It does not matter which one is picked if all
2443   /// arguments are initialized.
2444   template <bool CombineShadow> class Combiner {
2445     Value *Shadow = nullptr;
2446     Value *Origin = nullptr;
2447     IRBuilder<> &IRB;
2448     MemorySanitizerVisitor *MSV;
2449 
2450   public:
2451     Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
2452         : IRB(IRB), MSV(MSV) {}
2453 
2454     /// Add a pair of shadow and origin values to the mix.
2455     Combiner &Add(Value *OpShadow, Value *OpOrigin) {
2456       if (CombineShadow) {
2457         assert(OpShadow);
2458         if (!Shadow)
2459           Shadow = OpShadow;
2460         else {
2461           OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
2462           Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
2463         }
2464       }
2465 
2466       if (MSV->MS.TrackOrigins) {
2467         assert(OpOrigin);
2468         if (!Origin) {
2469           Origin = OpOrigin;
2470         } else {
2471           Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
2472           // No point in adding something that might result in 0 origin value.
2473           if (!ConstOrigin || !ConstOrigin->isNullValue()) {
2474             Value *Cond = MSV->convertToBool(OpShadow, IRB);
2475             Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2476           }
2477         }
2478       }
2479       return *this;
2480     }
2481 
2482     /// Add an application value to the mix.
2483     Combiner &Add(Value *V) {
2484       Value *OpShadow = MSV->getShadow(V);
2485       Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
2486       return Add(OpShadow, OpOrigin);
2487     }
2488 
2489     /// Set the current combined values as the given instruction's shadow
2490     /// and origin.
2491     void Done(Instruction *I) {
2492       if (CombineShadow) {
2493         assert(Shadow);
2494         Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
2495         MSV->setShadow(I, Shadow);
2496       }
2497       if (MSV->MS.TrackOrigins) {
2498         assert(Origin);
2499         MSV->setOrigin(I, Origin);
2500       }
2501     }
2502 
2503     /// Store the current combined value at the specified origin
2504     /// location.
2505     void DoneAndStoreOrigin(TypeSize TS, Value *OriginPtr) {
2506       if (MSV->MS.TrackOrigins) {
2507         assert(Origin);
2508         MSV->paintOrigin(IRB, Origin, OriginPtr, TS, kMinOriginAlignment);
2509       }
2510     }
2511   };
2512 
2513   using ShadowAndOriginCombiner = Combiner<true>;
2514   using OriginCombiner = Combiner<false>;
2515 
2516   /// Propagate origin for arbitrary operation.
2517   void setOriginForNaryOp(Instruction &I) {
2518     if (!MS.TrackOrigins)
2519       return;
2520     IRBuilder<> IRB(&I);
2521     OriginCombiner OC(this, IRB);
2522     for (Use &Op : I.operands())
2523       OC.Add(Op.get());
2524     OC.Done(&I);
2525   }
2526 
2527   size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
2528     assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
2529            "Vector of pointers is not a valid shadow type");
2530     return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() *
2531                                   Ty->getScalarSizeInBits()
2532                             : Ty->getPrimitiveSizeInBits();
2533   }
2534 
2535   /// Cast between two shadow types, extending or truncating as
2536   /// necessary.
2537   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
2538                           bool Signed = false) {
2539     Type *srcTy = V->getType();
2540     if (srcTy == dstTy)
2541       return V;
2542     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
2543     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
2544     if (srcSizeInBits > 1 && dstSizeInBits == 1)
2545       return IRB.CreateICmpNE(V, getCleanShadow(V));
2546 
2547     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
2548       return IRB.CreateIntCast(V, dstTy, Signed);
2549     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
2550         cast<VectorType>(dstTy)->getElementCount() ==
2551             cast<VectorType>(srcTy)->getElementCount())
2552       return IRB.CreateIntCast(V, dstTy, Signed);
2553     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
2554     Value *V2 =
2555         IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
2556     return IRB.CreateBitCast(V2, dstTy);
2557     // TODO: handle struct types.
2558   }
2559 
2560   /// Cast an application value to the type of its own shadow.
2561   Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
2562     Type *ShadowTy = getShadowTy(V);
2563     if (V->getType() == ShadowTy)
2564       return V;
2565     if (V->getType()->isPtrOrPtrVectorTy())
2566       return IRB.CreatePtrToInt(V, ShadowTy);
2567     else
2568       return IRB.CreateBitCast(V, ShadowTy);
2569   }
2570 
2571   /// Propagate shadow for arbitrary operation.
2572   void handleShadowOr(Instruction &I) {
2573     IRBuilder<> IRB(&I);
2574     ShadowAndOriginCombiner SC(this, IRB);
2575     for (Use &Op : I.operands())
2576       SC.Add(Op.get());
2577     SC.Done(&I);
2578   }
2579 
2580   void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
2581 
2582   // Handle multiplication by constant.
2583   //
2584   // Handle a special case of multiplication by constant that may have one or
2585   // more zeros in the lower bits. This makes corresponding number of lower bits
2586   // of the result zero as well. We model it by shifting the other operand
2587   // shadow left by the required number of bits. Effectively, we transform
2588   // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
2589   // We use multiplication by 2**N instead of shift to cover the case of
2590   // multiplication by 0, which may occur in some elements of a vector operand.
2591   void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
2592                            Value *OtherArg) {
2593     Constant *ShadowMul;
2594     Type *Ty = ConstArg->getType();
2595     if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2596       unsigned NumElements = cast<FixedVectorType>(VTy)->getNumElements();
2597       Type *EltTy = VTy->getElementType();
2598       SmallVector<Constant *, 16> Elements;
2599       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
2600         if (ConstantInt *Elt =
2601                 dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
2602           const APInt &V = Elt->getValue();
2603           APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
2604           Elements.push_back(ConstantInt::get(EltTy, V2));
2605         } else {
2606           Elements.push_back(ConstantInt::get(EltTy, 1));
2607         }
2608       }
2609       ShadowMul = ConstantVector::get(Elements);
2610     } else {
2611       if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
2612         const APInt &V = Elt->getValue();
2613         APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
2614         ShadowMul = ConstantInt::get(Ty, V2);
2615       } else {
2616         ShadowMul = ConstantInt::get(Ty, 1);
2617       }
2618     }
2619 
2620     IRBuilder<> IRB(&I);
2621     setShadow(&I,
2622               IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
2623     setOrigin(&I, getOrigin(OtherArg));
2624   }
2625 
2626   void visitMul(BinaryOperator &I) {
2627     Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
2628     Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
2629     if (constOp0 && !constOp1)
2630       handleMulByConstant(I, constOp0, I.getOperand(1));
2631     else if (constOp1 && !constOp0)
2632       handleMulByConstant(I, constOp1, I.getOperand(0));
2633     else
2634       handleShadowOr(I);
2635   }
2636 
2637   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
2638   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
2639   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
2640   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
2641   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
2642   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
2643 
2644   void handleIntegerDiv(Instruction &I) {
2645     IRBuilder<> IRB(&I);
2646     // Strict on the second argument.
2647     insertShadowCheck(I.getOperand(1), &I);
2648     setShadow(&I, getShadow(&I, 0));
2649     setOrigin(&I, getOrigin(&I, 0));
2650   }
2651 
2652   void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2653   void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2654   void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
2655   void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
2656 
2657   // Floating point division is side-effect free. We can not require that the
2658   // divisor is fully initialized and must propagate shadow. See PR37523.
2659   void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
2660   void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
2661 
2662   /// Instrument == and != comparisons.
2663   ///
2664   /// Sometimes the comparison result is known even if some of the bits of the
2665   /// arguments are not.
2666   void handleEqualityComparison(ICmpInst &I) {
2667     IRBuilder<> IRB(&I);
2668     Value *A = I.getOperand(0);
2669     Value *B = I.getOperand(1);
2670     Value *Sa = getShadow(A);
2671     Value *Sb = getShadow(B);
2672 
2673     // Get rid of pointers and vectors of pointers.
2674     // For ints (and vectors of ints), types of A and Sa match,
2675     // and this is a no-op.
2676     A = IRB.CreatePointerCast(A, Sa->getType());
2677     B = IRB.CreatePointerCast(B, Sb->getType());
2678 
2679     // A == B  <==>  (C = A^B) == 0
2680     // A != B  <==>  (C = A^B) != 0
2681     // Sc = Sa | Sb
2682     Value *C = IRB.CreateXor(A, B);
2683     Value *Sc = IRB.CreateOr(Sa, Sb);
2684     // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
2685     // Result is defined if one of the following is true
2686     // * there is a defined 1 bit in C
2687     // * C is fully defined
2688     // Si = !(C & ~Sc) && Sc
2689     Value *Zero = Constant::getNullValue(Sc->getType());
2690     Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
2691     Value *LHS = IRB.CreateICmpNE(Sc, Zero);
2692     Value *RHS =
2693         IRB.CreateICmpEQ(IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero);
2694     Value *Si = IRB.CreateAnd(LHS, RHS);
2695     Si->setName("_msprop_icmp");
2696     setShadow(&I, Si);
2697     setOriginForNaryOp(I);
2698   }
2699 
2700   /// Build the lowest possible value of V, taking into account V's
2701   ///        uninitialized bits.
2702   Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2703                                 bool isSigned) {
2704     if (isSigned) {
2705       // Split shadow into sign bit and other bits.
2706       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2707       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2708       // Maximise the undefined shadow bit, minimize other undefined bits.
2709       return IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)),
2710                           SaSignBit);
2711     } else {
2712       // Minimize undefined bits.
2713       return IRB.CreateAnd(A, IRB.CreateNot(Sa));
2714     }
2715   }
2716 
2717   /// Build the highest possible value of V, taking into account V's
2718   ///        uninitialized bits.
2719   Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2720                                  bool isSigned) {
2721     if (isSigned) {
2722       // Split shadow into sign bit and other bits.
2723       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2724       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2725       // Minimise the undefined shadow bit, maximise other undefined bits.
2726       return IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)),
2727                           SaOtherBits);
2728     } else {
2729       // Maximize undefined bits.
2730       return IRB.CreateOr(A, Sa);
2731     }
2732   }
2733 
2734   /// Instrument relational comparisons.
2735   ///
2736   /// This function does exact shadow propagation for all relational
2737   /// comparisons of integers, pointers and vectors of those.
2738   /// FIXME: output seems suboptimal when one of the operands is a constant
2739   void handleRelationalComparisonExact(ICmpInst &I) {
2740     IRBuilder<> IRB(&I);
2741     Value *A = I.getOperand(0);
2742     Value *B = I.getOperand(1);
2743     Value *Sa = getShadow(A);
2744     Value *Sb = getShadow(B);
2745 
2746     // Get rid of pointers and vectors of pointers.
2747     // For ints (and vectors of ints), types of A and Sa match,
2748     // and this is a no-op.
2749     A = IRB.CreatePointerCast(A, Sa->getType());
2750     B = IRB.CreatePointerCast(B, Sb->getType());
2751 
2752     // Let [a0, a1] be the interval of possible values of A, taking into account
2753     // its undefined bits. Let [b0, b1] be the interval of possible values of B.
2754     // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
2755     bool IsSigned = I.isSigned();
2756     Value *S1 = IRB.CreateICmp(I.getPredicate(),
2757                                getLowestPossibleValue(IRB, A, Sa, IsSigned),
2758                                getHighestPossibleValue(IRB, B, Sb, IsSigned));
2759     Value *S2 = IRB.CreateICmp(I.getPredicate(),
2760                                getHighestPossibleValue(IRB, A, Sa, IsSigned),
2761                                getLowestPossibleValue(IRB, B, Sb, IsSigned));
2762     Value *Si = IRB.CreateXor(S1, S2);
2763     setShadow(&I, Si);
2764     setOriginForNaryOp(I);
2765   }
2766 
2767   /// Instrument signed relational comparisons.
2768   ///
2769   /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
2770   /// bit of the shadow. Everything else is delegated to handleShadowOr().
2771   void handleSignedRelationalComparison(ICmpInst &I) {
2772     Constant *constOp;
2773     Value *op = nullptr;
2774     CmpInst::Predicate pre;
2775     if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
2776       op = I.getOperand(0);
2777       pre = I.getPredicate();
2778     } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
2779       op = I.getOperand(1);
2780       pre = I.getSwappedPredicate();
2781     } else {
2782       handleShadowOr(I);
2783       return;
2784     }
2785 
2786     if ((constOp->isNullValue() &&
2787          (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
2788         (constOp->isAllOnesValue() &&
2789          (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
2790       IRBuilder<> IRB(&I);
2791       Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
2792                                         "_msprop_icmp_s");
2793       setShadow(&I, Shadow);
2794       setOrigin(&I, getOrigin(op));
2795     } else {
2796       handleShadowOr(I);
2797     }
2798   }
2799 
2800   void visitICmpInst(ICmpInst &I) {
2801     if (!ClHandleICmp) {
2802       handleShadowOr(I);
2803       return;
2804     }
2805     if (I.isEquality()) {
2806       handleEqualityComparison(I);
2807       return;
2808     }
2809 
2810     assert(I.isRelational());
2811     if (ClHandleICmpExact) {
2812       handleRelationalComparisonExact(I);
2813       return;
2814     }
2815     if (I.isSigned()) {
2816       handleSignedRelationalComparison(I);
2817       return;
2818     }
2819 
2820     assert(I.isUnsigned());
2821     if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
2822       handleRelationalComparisonExact(I);
2823       return;
2824     }
2825 
2826     handleShadowOr(I);
2827   }
2828 
2829   void visitFCmpInst(FCmpInst &I) { handleShadowOr(I); }
2830 
2831   void handleShift(BinaryOperator &I) {
2832     IRBuilder<> IRB(&I);
2833     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2834     // Otherwise perform the same shift on S1.
2835     Value *S1 = getShadow(&I, 0);
2836     Value *S2 = getShadow(&I, 1);
2837     Value *S2Conv =
2838         IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)), S2->getType());
2839     Value *V2 = I.getOperand(1);
2840     Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
2841     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2842     setOriginForNaryOp(I);
2843   }
2844 
2845   void visitShl(BinaryOperator &I) { handleShift(I); }
2846   void visitAShr(BinaryOperator &I) { handleShift(I); }
2847   void visitLShr(BinaryOperator &I) { handleShift(I); }
2848 
2849   void handleFunnelShift(IntrinsicInst &I) {
2850     IRBuilder<> IRB(&I);
2851     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2852     // Otherwise perform the same shift on S0 and S1.
2853     Value *S0 = getShadow(&I, 0);
2854     Value *S1 = getShadow(&I, 1);
2855     Value *S2 = getShadow(&I, 2);
2856     Value *S2Conv =
2857         IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)), S2->getType());
2858     Value *V2 = I.getOperand(2);
2859     Function *Intrin = Intrinsic::getDeclaration(
2860         I.getModule(), I.getIntrinsicID(), S2Conv->getType());
2861     Value *Shift = IRB.CreateCall(Intrin, {S0, S1, V2});
2862     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2863     setOriginForNaryOp(I);
2864   }
2865 
2866   /// Instrument llvm.memmove
2867   ///
2868   /// At this point we don't know if llvm.memmove will be inlined or not.
2869   /// If we don't instrument it and it gets inlined,
2870   /// our interceptor will not kick in and we will lose the memmove.
2871   /// If we instrument the call here, but it does not get inlined,
2872   /// we will memove the shadow twice: which is bad in case
2873   /// of overlapping regions. So, we simply lower the intrinsic to a call.
2874   ///
2875   /// Similar situation exists for memcpy and memset.
2876   void visitMemMoveInst(MemMoveInst &I) {
2877     getShadow(I.getArgOperand(1)); // Ensure shadow initialized
2878     IRBuilder<> IRB(&I);
2879     IRB.CreateCall(MS.MemmoveFn,
2880                    {I.getArgOperand(0), I.getArgOperand(1),
2881                     IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2882     I.eraseFromParent();
2883   }
2884 
2885   /// Instrument memcpy
2886   ///
2887   /// Similar to memmove: avoid copying shadow twice. This is somewhat
2888   /// unfortunate as it may slowdown small constant memcpys.
2889   /// FIXME: consider doing manual inline for small constant sizes and proper
2890   /// alignment.
2891   ///
2892   /// Note: This also handles memcpy.inline, which promises no calls to external
2893   /// functions as an optimization. However, with instrumentation enabled this
2894   /// is difficult to promise; additionally, we know that the MSan runtime
2895   /// exists and provides __msan_memcpy(). Therefore, we assume that with
2896   /// instrumentation it's safe to turn memcpy.inline into a call to
2897   /// __msan_memcpy(). Should this be wrong, such as when implementing memcpy()
2898   /// itself, instrumentation should be disabled with the no_sanitize attribute.
2899   void visitMemCpyInst(MemCpyInst &I) {
2900     getShadow(I.getArgOperand(1)); // Ensure shadow initialized
2901     IRBuilder<> IRB(&I);
2902     IRB.CreateCall(MS.MemcpyFn,
2903                    {I.getArgOperand(0), I.getArgOperand(1),
2904                     IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2905     I.eraseFromParent();
2906   }
2907 
2908   // Same as memcpy.
2909   void visitMemSetInst(MemSetInst &I) {
2910     IRBuilder<> IRB(&I);
2911     IRB.CreateCall(
2912         MS.MemsetFn,
2913         {I.getArgOperand(0),
2914          IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
2915          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2916     I.eraseFromParent();
2917   }
2918 
2919   void visitVAStartInst(VAStartInst &I) { VAHelper->visitVAStartInst(I); }
2920 
2921   void visitVACopyInst(VACopyInst &I) { VAHelper->visitVACopyInst(I); }
2922 
2923   /// Handle vector store-like intrinsics.
2924   ///
2925   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
2926   /// has 1 pointer argument and 1 vector argument, returns void.
2927   bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
2928     IRBuilder<> IRB(&I);
2929     Value *Addr = I.getArgOperand(0);
2930     Value *Shadow = getShadow(&I, 1);
2931     Value *ShadowPtr, *OriginPtr;
2932 
2933     // We don't know the pointer alignment (could be unaligned SSE store!).
2934     // Have to assume to worst case.
2935     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
2936         Addr, IRB, Shadow->getType(), Align(1), /*isStore*/ true);
2937     IRB.CreateAlignedStore(Shadow, ShadowPtr, Align(1));
2938 
2939     if (ClCheckAccessAddress)
2940       insertShadowCheck(Addr, &I);
2941 
2942     // FIXME: factor out common code from materializeStores
2943     if (MS.TrackOrigins)
2944       IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
2945     return true;
2946   }
2947 
2948   /// Handle vector load-like intrinsics.
2949   ///
2950   /// Instrument intrinsics that look like a simple SIMD load: reads memory,
2951   /// has 1 pointer argument, returns a vector.
2952   bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
2953     IRBuilder<> IRB(&I);
2954     Value *Addr = I.getArgOperand(0);
2955 
2956     Type *ShadowTy = getShadowTy(&I);
2957     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2958     if (PropagateShadow) {
2959       // We don't know the pointer alignment (could be unaligned SSE load!).
2960       // Have to assume to worst case.
2961       const Align Alignment = Align(1);
2962       std::tie(ShadowPtr, OriginPtr) =
2963           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2964       setShadow(&I,
2965                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2966     } else {
2967       setShadow(&I, getCleanShadow(&I));
2968     }
2969 
2970     if (ClCheckAccessAddress)
2971       insertShadowCheck(Addr, &I);
2972 
2973     if (MS.TrackOrigins) {
2974       if (PropagateShadow)
2975         setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
2976       else
2977         setOrigin(&I, getCleanOrigin());
2978     }
2979     return true;
2980   }
2981 
2982   /// Handle (SIMD arithmetic)-like intrinsics.
2983   ///
2984   /// Instrument intrinsics with any number of arguments of the same type,
2985   /// equal to the return type. The type should be simple (no aggregates or
2986   /// pointers; vectors are fine).
2987   /// Caller guarantees that this intrinsic does not access memory.
2988   bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
2989     Type *RetTy = I.getType();
2990     if (!(RetTy->isIntOrIntVectorTy() || RetTy->isFPOrFPVectorTy() ||
2991           RetTy->isX86_MMXTy()))
2992       return false;
2993 
2994     unsigned NumArgOperands = I.arg_size();
2995     for (unsigned i = 0; i < NumArgOperands; ++i) {
2996       Type *Ty = I.getArgOperand(i)->getType();
2997       if (Ty != RetTy)
2998         return false;
2999     }
3000 
3001     IRBuilder<> IRB(&I);
3002     ShadowAndOriginCombiner SC(this, IRB);
3003     for (unsigned i = 0; i < NumArgOperands; ++i)
3004       SC.Add(I.getArgOperand(i));
3005     SC.Done(&I);
3006 
3007     return true;
3008   }
3009 
3010   /// Heuristically instrument unknown intrinsics.
3011   ///
3012   /// The main purpose of this code is to do something reasonable with all
3013   /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
3014   /// We recognize several classes of intrinsics by their argument types and
3015   /// ModRefBehaviour and apply special instrumentation when we are reasonably
3016   /// sure that we know what the intrinsic does.
3017   ///
3018   /// We special-case intrinsics where this approach fails. See llvm.bswap
3019   /// handling as an example of that.
3020   bool handleUnknownIntrinsic(IntrinsicInst &I) {
3021     unsigned NumArgOperands = I.arg_size();
3022     if (NumArgOperands == 0)
3023       return false;
3024 
3025     if (NumArgOperands == 2 && I.getArgOperand(0)->getType()->isPointerTy() &&
3026         I.getArgOperand(1)->getType()->isVectorTy() &&
3027         I.getType()->isVoidTy() && !I.onlyReadsMemory()) {
3028       // This looks like a vector store.
3029       return handleVectorStoreIntrinsic(I);
3030     }
3031 
3032     if (NumArgOperands == 1 && I.getArgOperand(0)->getType()->isPointerTy() &&
3033         I.getType()->isVectorTy() && I.onlyReadsMemory()) {
3034       // This looks like a vector load.
3035       return handleVectorLoadIntrinsic(I);
3036     }
3037 
3038     if (I.doesNotAccessMemory())
3039       if (maybeHandleSimpleNomemIntrinsic(I))
3040         return true;
3041 
3042     // FIXME: detect and handle SSE maskstore/maskload
3043     return false;
3044   }
3045 
3046   void handleInvariantGroup(IntrinsicInst &I) {
3047     setShadow(&I, getShadow(&I, 0));
3048     setOrigin(&I, getOrigin(&I, 0));
3049   }
3050 
3051   void handleLifetimeStart(IntrinsicInst &I) {
3052     if (!PoisonStack)
3053       return;
3054     AllocaInst *AI = llvm::findAllocaForValue(I.getArgOperand(1));
3055     if (!AI)
3056       InstrumentLifetimeStart = false;
3057     LifetimeStartList.push_back(std::make_pair(&I, AI));
3058   }
3059 
3060   void handleBswap(IntrinsicInst &I) {
3061     IRBuilder<> IRB(&I);
3062     Value *Op = I.getArgOperand(0);
3063     Type *OpType = Op->getType();
3064     Function *BswapFunc = Intrinsic::getDeclaration(
3065         F.getParent(), Intrinsic::bswap, ArrayRef(&OpType, 1));
3066     setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
3067     setOrigin(&I, getOrigin(Op));
3068   }
3069 
3070   void handleCountZeroes(IntrinsicInst &I) {
3071     IRBuilder<> IRB(&I);
3072     Value *Src = I.getArgOperand(0);
3073 
3074     // Set the Output shadow based on input Shadow
3075     Value *BoolShadow = IRB.CreateIsNotNull(getShadow(Src), "_mscz_bs");
3076 
3077     // If zero poison is requested, mix in with the shadow
3078     Constant *IsZeroPoison = cast<Constant>(I.getOperand(1));
3079     if (!IsZeroPoison->isZeroValue()) {
3080       Value *BoolZeroPoison = IRB.CreateIsNull(Src, "_mscz_bzp");
3081       BoolShadow = IRB.CreateOr(BoolShadow, BoolZeroPoison, "_mscz_bs");
3082     }
3083 
3084     Value *OutputShadow =
3085         IRB.CreateSExt(BoolShadow, getShadowTy(Src), "_mscz_os");
3086 
3087     setShadow(&I, OutputShadow);
3088     setOriginForNaryOp(I);
3089   }
3090 
3091   // Instrument vector convert intrinsic.
3092   //
3093   // This function instruments intrinsics like cvtsi2ss:
3094   // %Out = int_xxx_cvtyyy(%ConvertOp)
3095   // or
3096   // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
3097   // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
3098   // number \p Out elements, and (if has 2 arguments) copies the rest of the
3099   // elements from \p CopyOp.
3100   // In most cases conversion involves floating-point value which may trigger a
3101   // hardware exception when not fully initialized. For this reason we require
3102   // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
3103   // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
3104   // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
3105   // return a fully initialized value.
3106   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements,
3107                                     bool HasRoundingMode = false) {
3108     IRBuilder<> IRB(&I);
3109     Value *CopyOp, *ConvertOp;
3110 
3111     assert((!HasRoundingMode ||
3112             isa<ConstantInt>(I.getArgOperand(I.arg_size() - 1))) &&
3113            "Invalid rounding mode");
3114 
3115     switch (I.arg_size() - HasRoundingMode) {
3116     case 2:
3117       CopyOp = I.getArgOperand(0);
3118       ConvertOp = I.getArgOperand(1);
3119       break;
3120     case 1:
3121       ConvertOp = I.getArgOperand(0);
3122       CopyOp = nullptr;
3123       break;
3124     default:
3125       llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
3126     }
3127 
3128     // The first *NumUsedElements* elements of ConvertOp are converted to the
3129     // same number of output elements. The rest of the output is copied from
3130     // CopyOp, or (if not available) filled with zeroes.
3131     // Combine shadow for elements of ConvertOp that are used in this operation,
3132     // and insert a check.
3133     // FIXME: consider propagating shadow of ConvertOp, at least in the case of
3134     // int->any conversion.
3135     Value *ConvertShadow = getShadow(ConvertOp);
3136     Value *AggShadow = nullptr;
3137     if (ConvertOp->getType()->isVectorTy()) {
3138       AggShadow = IRB.CreateExtractElement(
3139           ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
3140       for (int i = 1; i < NumUsedElements; ++i) {
3141         Value *MoreShadow = IRB.CreateExtractElement(
3142             ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
3143         AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
3144       }
3145     } else {
3146       AggShadow = ConvertShadow;
3147     }
3148     assert(AggShadow->getType()->isIntegerTy());
3149     insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
3150 
3151     // Build result shadow by zero-filling parts of CopyOp shadow that come from
3152     // ConvertOp.
3153     if (CopyOp) {
3154       assert(CopyOp->getType() == I.getType());
3155       assert(CopyOp->getType()->isVectorTy());
3156       Value *ResultShadow = getShadow(CopyOp);
3157       Type *EltTy = cast<VectorType>(ResultShadow->getType())->getElementType();
3158       for (int i = 0; i < NumUsedElements; ++i) {
3159         ResultShadow = IRB.CreateInsertElement(
3160             ResultShadow, ConstantInt::getNullValue(EltTy),
3161             ConstantInt::get(IRB.getInt32Ty(), i));
3162       }
3163       setShadow(&I, ResultShadow);
3164       setOrigin(&I, getOrigin(CopyOp));
3165     } else {
3166       setShadow(&I, getCleanShadow(&I));
3167       setOrigin(&I, getCleanOrigin());
3168     }
3169   }
3170 
3171   // Given a scalar or vector, extract lower 64 bits (or less), and return all
3172   // zeroes if it is zero, and all ones otherwise.
3173   Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
3174     if (S->getType()->isVectorTy())
3175       S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
3176     assert(S->getType()->getPrimitiveSizeInBits() <= 64);
3177     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
3178     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
3179   }
3180 
3181   // Given a vector, extract its first element, and return all
3182   // zeroes if it is zero, and all ones otherwise.
3183   Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
3184     Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
3185     Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
3186     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
3187   }
3188 
3189   Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
3190     Type *T = S->getType();
3191     assert(T->isVectorTy());
3192     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
3193     return IRB.CreateSExt(S2, T);
3194   }
3195 
3196   // Instrument vector shift intrinsic.
3197   //
3198   // This function instruments intrinsics like int_x86_avx2_psll_w.
3199   // Intrinsic shifts %In by %ShiftSize bits.
3200   // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
3201   // size, and the rest is ignored. Behavior is defined even if shift size is
3202   // greater than register (or field) width.
3203   void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
3204     assert(I.arg_size() == 2);
3205     IRBuilder<> IRB(&I);
3206     // If any of the S2 bits are poisoned, the whole thing is poisoned.
3207     // Otherwise perform the same shift on S1.
3208     Value *S1 = getShadow(&I, 0);
3209     Value *S2 = getShadow(&I, 1);
3210     Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
3211                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
3212     Value *V1 = I.getOperand(0);
3213     Value *V2 = I.getOperand(1);
3214     Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
3215                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
3216     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
3217     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
3218     setOriginForNaryOp(I);
3219   }
3220 
3221   // Get an X86_MMX-sized vector type.
3222   Type *getMMXVectorTy(unsigned EltSizeInBits) {
3223     const unsigned X86_MMXSizeInBits = 64;
3224     assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
3225            "Illegal MMX vector element size");
3226     return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
3227                                 X86_MMXSizeInBits / EltSizeInBits);
3228   }
3229 
3230   // Returns a signed counterpart for an (un)signed-saturate-and-pack
3231   // intrinsic.
3232   Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
3233     switch (id) {
3234     case Intrinsic::x86_sse2_packsswb_128:
3235     case Intrinsic::x86_sse2_packuswb_128:
3236       return Intrinsic::x86_sse2_packsswb_128;
3237 
3238     case Intrinsic::x86_sse2_packssdw_128:
3239     case Intrinsic::x86_sse41_packusdw:
3240       return Intrinsic::x86_sse2_packssdw_128;
3241 
3242     case Intrinsic::x86_avx2_packsswb:
3243     case Intrinsic::x86_avx2_packuswb:
3244       return Intrinsic::x86_avx2_packsswb;
3245 
3246     case Intrinsic::x86_avx2_packssdw:
3247     case Intrinsic::x86_avx2_packusdw:
3248       return Intrinsic::x86_avx2_packssdw;
3249 
3250     case Intrinsic::x86_mmx_packsswb:
3251     case Intrinsic::x86_mmx_packuswb:
3252       return Intrinsic::x86_mmx_packsswb;
3253 
3254     case Intrinsic::x86_mmx_packssdw:
3255       return Intrinsic::x86_mmx_packssdw;
3256     default:
3257       llvm_unreachable("unexpected intrinsic id");
3258     }
3259   }
3260 
3261   // Instrument vector pack intrinsic.
3262   //
3263   // This function instruments intrinsics like x86_mmx_packsswb, that
3264   // packs elements of 2 input vectors into half as many bits with saturation.
3265   // Shadow is propagated with the signed variant of the same intrinsic applied
3266   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
3267   // EltSizeInBits is used only for x86mmx arguments.
3268   void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
3269     assert(I.arg_size() == 2);
3270     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
3271     IRBuilder<> IRB(&I);
3272     Value *S1 = getShadow(&I, 0);
3273     Value *S2 = getShadow(&I, 1);
3274     assert(isX86_MMX || S1->getType()->isVectorTy());
3275 
3276     // SExt and ICmpNE below must apply to individual elements of input vectors.
3277     // In case of x86mmx arguments, cast them to appropriate vector types and
3278     // back.
3279     Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
3280     if (isX86_MMX) {
3281       S1 = IRB.CreateBitCast(S1, T);
3282       S2 = IRB.CreateBitCast(S2, T);
3283     }
3284     Value *S1_ext =
3285         IRB.CreateSExt(IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
3286     Value *S2_ext =
3287         IRB.CreateSExt(IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
3288     if (isX86_MMX) {
3289       Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
3290       S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
3291       S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
3292     }
3293 
3294     Function *ShadowFn = Intrinsic::getDeclaration(
3295         F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
3296 
3297     Value *S =
3298         IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
3299     if (isX86_MMX)
3300       S = IRB.CreateBitCast(S, getShadowTy(&I));
3301     setShadow(&I, S);
3302     setOriginForNaryOp(I);
3303   }
3304 
3305   // Convert `Mask` into `<n x i1>`.
3306   Constant *createDppMask(unsigned Width, unsigned Mask) {
3307     SmallVector<Constant *, 4> R(Width);
3308     for (auto &M : R) {
3309       M = ConstantInt::getBool(F.getContext(), Mask & 1);
3310       Mask >>= 1;
3311     }
3312     return ConstantVector::get(R);
3313   }
3314 
3315   // Calculate output shadow as array of booleans `<n x i1>`, assuming if any
3316   // arg is poisoned, entire dot product is poisoned.
3317   Value *findDppPoisonedOutput(IRBuilder<> &IRB, Value *S, unsigned SrcMask,
3318                                unsigned DstMask) {
3319     const unsigned Width =
3320         cast<FixedVectorType>(S->getType())->getNumElements();
3321 
3322     S = IRB.CreateSelect(createDppMask(Width, SrcMask), S,
3323                          Constant::getNullValue(S->getType()));
3324     Value *SElem = IRB.CreateOrReduce(S);
3325     Value *IsClean = IRB.CreateIsNull(SElem, "_msdpp");
3326     Value *DstMaskV = createDppMask(Width, DstMask);
3327 
3328     return IRB.CreateSelect(
3329         IsClean, Constant::getNullValue(DstMaskV->getType()), DstMaskV);
3330   }
3331 
3332   // See `Intel Intrinsics Guide` for `_dp_p*` instructions.
3333   //
3334   // 2 and 4 element versions produce single scalar of dot product, and then
3335   // puts it into elements of output vector, selected by 4 lowest bits of the
3336   // mask. Top 4 bits of the mask control which elements of input to use for dot
3337   // product.
3338   //
3339   // 8 element version mask still has only 4 bit for input, and 4 bit for output
3340   // mask. According to the spec it just operates as 4 element version on first
3341   // 4 elements of inputs and output, and then on last 4 elements of inputs and
3342   // output.
3343   void handleDppIntrinsic(IntrinsicInst &I) {
3344     IRBuilder<> IRB(&I);
3345 
3346     Value *S0 = getShadow(&I, 0);
3347     Value *S1 = getShadow(&I, 1);
3348     Value *S = IRB.CreateOr(S0, S1);
3349 
3350     const unsigned Width =
3351         cast<FixedVectorType>(S->getType())->getNumElements();
3352     assert(Width == 2 || Width == 4 || Width == 8);
3353 
3354     const unsigned Mask = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3355     const unsigned SrcMask = Mask >> 4;
3356     const unsigned DstMask = Mask & 0xf;
3357 
3358     // Calculate shadow as `<n x i1>`.
3359     Value *SI1 = findDppPoisonedOutput(IRB, S, SrcMask, DstMask);
3360     if (Width == 8) {
3361       // First 4 elements of shadow are already calculated. `makeDppShadow`
3362       // operats on 32 bit masks, so we can just shift masks, and repeat.
3363       SI1 = IRB.CreateOr(
3364           SI1, findDppPoisonedOutput(IRB, S, SrcMask << 4, DstMask << 4));
3365     }
3366     // Extend to real size of shadow, poisoning either all or none bits of an
3367     // element.
3368     S = IRB.CreateSExt(SI1, S->getType(), "_msdpp");
3369 
3370     setShadow(&I, S);
3371     setOriginForNaryOp(I);
3372   }
3373 
3374   Value *convertBlendvToSelectMask(IRBuilder<> &IRB, Value *C) {
3375     C = CreateAppToShadowCast(IRB, C);
3376     FixedVectorType *FVT = cast<FixedVectorType>(C->getType());
3377     unsigned ElSize = FVT->getElementType()->getPrimitiveSizeInBits();
3378     C = IRB.CreateAShr(C, ElSize - 1);
3379     FVT = FixedVectorType::get(IRB.getInt1Ty(), FVT->getNumElements());
3380     return IRB.CreateTrunc(C, FVT);
3381   }
3382 
3383   // `blendv(f, t, c)` is effectively `select(c[top_bit], t, f)`.
3384   void handleBlendvIntrinsic(IntrinsicInst &I) {
3385     Value *C = I.getOperand(2);
3386     Value *T = I.getOperand(1);
3387     Value *F = I.getOperand(0);
3388 
3389     Value *Sc = getShadow(&I, 2);
3390     Value *Oc = MS.TrackOrigins ? getOrigin(C) : nullptr;
3391 
3392     {
3393       IRBuilder<> IRB(&I);
3394       // Extract top bit from condition and its shadow.
3395       C = convertBlendvToSelectMask(IRB, C);
3396       Sc = convertBlendvToSelectMask(IRB, Sc);
3397 
3398       setShadow(C, Sc);
3399       setOrigin(C, Oc);
3400     }
3401 
3402     handleSelectLikeInst(I, C, T, F);
3403   }
3404 
3405   // Instrument sum-of-absolute-differences intrinsic.
3406   void handleVectorSadIntrinsic(IntrinsicInst &I) {
3407     const unsigned SignificantBitsPerResultElement = 16;
3408     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
3409     Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
3410     unsigned ZeroBitsPerResultElement =
3411         ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
3412 
3413     IRBuilder<> IRB(&I);
3414     auto *Shadow0 = getShadow(&I, 0);
3415     auto *Shadow1 = getShadow(&I, 1);
3416     Value *S = IRB.CreateOr(Shadow0, Shadow1);
3417     S = IRB.CreateBitCast(S, ResTy);
3418     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
3419                        ResTy);
3420     S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
3421     S = IRB.CreateBitCast(S, getShadowTy(&I));
3422     setShadow(&I, S);
3423     setOriginForNaryOp(I);
3424   }
3425 
3426   // Instrument multiply-add intrinsic.
3427   void handleVectorPmaddIntrinsic(IntrinsicInst &I,
3428                                   unsigned EltSizeInBits = 0) {
3429     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
3430     Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
3431     IRBuilder<> IRB(&I);
3432     auto *Shadow0 = getShadow(&I, 0);
3433     auto *Shadow1 = getShadow(&I, 1);
3434     Value *S = IRB.CreateOr(Shadow0, Shadow1);
3435     S = IRB.CreateBitCast(S, ResTy);
3436     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
3437                        ResTy);
3438     S = IRB.CreateBitCast(S, getShadowTy(&I));
3439     setShadow(&I, S);
3440     setOriginForNaryOp(I);
3441   }
3442 
3443   // Instrument compare-packed intrinsic.
3444   // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
3445   // all-ones shadow.
3446   void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
3447     IRBuilder<> IRB(&I);
3448     Type *ResTy = getShadowTy(&I);
3449     auto *Shadow0 = getShadow(&I, 0);
3450     auto *Shadow1 = getShadow(&I, 1);
3451     Value *S0 = IRB.CreateOr(Shadow0, Shadow1);
3452     Value *S = IRB.CreateSExt(
3453         IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
3454     setShadow(&I, S);
3455     setOriginForNaryOp(I);
3456   }
3457 
3458   // Instrument compare-scalar intrinsic.
3459   // This handles both cmp* intrinsics which return the result in the first
3460   // element of a vector, and comi* which return the result as i32.
3461   void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
3462     IRBuilder<> IRB(&I);
3463     auto *Shadow0 = getShadow(&I, 0);
3464     auto *Shadow1 = getShadow(&I, 1);
3465     Value *S0 = IRB.CreateOr(Shadow0, Shadow1);
3466     Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
3467     setShadow(&I, S);
3468     setOriginForNaryOp(I);
3469   }
3470 
3471   // Instrument generic vector reduction intrinsics
3472   // by ORing together all their fields.
3473   void handleVectorReduceIntrinsic(IntrinsicInst &I) {
3474     IRBuilder<> IRB(&I);
3475     Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
3476     setShadow(&I, S);
3477     setOrigin(&I, getOrigin(&I, 0));
3478   }
3479 
3480   // Instrument vector.reduce.or intrinsic.
3481   // Valid (non-poisoned) set bits in the operand pull low the
3482   // corresponding shadow bits.
3483   void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
3484     IRBuilder<> IRB(&I);
3485     Value *OperandShadow = getShadow(&I, 0);
3486     Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0));
3487     Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow);
3488     // Bit N is clean if any field's bit N is 1 and unpoison
3489     Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison);
3490     // Otherwise, it is clean if every field's bit N is unpoison
3491     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3492     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3493 
3494     setShadow(&I, S);
3495     setOrigin(&I, getOrigin(&I, 0));
3496   }
3497 
3498   // Instrument vector.reduce.and intrinsic.
3499   // Valid (non-poisoned) unset bits in the operand pull down the
3500   // corresponding shadow bits.
3501   void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
3502     IRBuilder<> IRB(&I);
3503     Value *OperandShadow = getShadow(&I, 0);
3504     Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow);
3505     // Bit N is clean if any field's bit N is 0 and unpoison
3506     Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison);
3507     // Otherwise, it is clean if every field's bit N is unpoison
3508     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3509     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3510 
3511     setShadow(&I, S);
3512     setOrigin(&I, getOrigin(&I, 0));
3513   }
3514 
3515   void handleStmxcsr(IntrinsicInst &I) {
3516     IRBuilder<> IRB(&I);
3517     Value *Addr = I.getArgOperand(0);
3518     Type *Ty = IRB.getInt32Ty();
3519     Value *ShadowPtr =
3520         getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first;
3521 
3522     IRB.CreateStore(getCleanShadow(Ty), ShadowPtr);
3523 
3524     if (ClCheckAccessAddress)
3525       insertShadowCheck(Addr, &I);
3526   }
3527 
3528   void handleLdmxcsr(IntrinsicInst &I) {
3529     if (!InsertChecks)
3530       return;
3531 
3532     IRBuilder<> IRB(&I);
3533     Value *Addr = I.getArgOperand(0);
3534     Type *Ty = IRB.getInt32Ty();
3535     const Align Alignment = Align(1);
3536     Value *ShadowPtr, *OriginPtr;
3537     std::tie(ShadowPtr, OriginPtr) =
3538         getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
3539 
3540     if (ClCheckAccessAddress)
3541       insertShadowCheck(Addr, &I);
3542 
3543     Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
3544     Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
3545                                     : getCleanOrigin();
3546     insertShadowCheck(Shadow, Origin, &I);
3547   }
3548 
3549   void handleMaskedExpandLoad(IntrinsicInst &I) {
3550     IRBuilder<> IRB(&I);
3551     Value *Ptr = I.getArgOperand(0);
3552     Value *Mask = I.getArgOperand(1);
3553     Value *PassThru = I.getArgOperand(2);
3554 
3555     if (ClCheckAccessAddress) {
3556       insertShadowCheck(Ptr, &I);
3557       insertShadowCheck(Mask, &I);
3558     }
3559 
3560     if (!PropagateShadow) {
3561       setShadow(&I, getCleanShadow(&I));
3562       setOrigin(&I, getCleanOrigin());
3563       return;
3564     }
3565 
3566     Type *ShadowTy = getShadowTy(&I);
3567     Type *ElementShadowTy = cast<VectorType>(ShadowTy)->getElementType();
3568     auto [ShadowPtr, OriginPtr] =
3569         getShadowOriginPtr(Ptr, IRB, ElementShadowTy, {}, /*isStore*/ false);
3570 
3571     Value *Shadow = IRB.CreateMaskedExpandLoad(
3572         ShadowTy, ShadowPtr, Mask, getShadow(PassThru), "_msmaskedexpload");
3573 
3574     setShadow(&I, Shadow);
3575 
3576     // TODO: Store origins.
3577     setOrigin(&I, getCleanOrigin());
3578   }
3579 
3580   void handleMaskedCompressStore(IntrinsicInst &I) {
3581     IRBuilder<> IRB(&I);
3582     Value *Values = I.getArgOperand(0);
3583     Value *Ptr = I.getArgOperand(1);
3584     Value *Mask = I.getArgOperand(2);
3585 
3586     if (ClCheckAccessAddress) {
3587       insertShadowCheck(Ptr, &I);
3588       insertShadowCheck(Mask, &I);
3589     }
3590 
3591     Value *Shadow = getShadow(Values);
3592     Type *ElementShadowTy =
3593         getShadowTy(cast<VectorType>(Values->getType())->getElementType());
3594     auto [ShadowPtr, OriginPtrs] =
3595         getShadowOriginPtr(Ptr, IRB, ElementShadowTy, {}, /*isStore*/ true);
3596 
3597     IRB.CreateMaskedCompressStore(Shadow, ShadowPtr, Mask);
3598 
3599     // TODO: Store origins.
3600   }
3601 
3602   void handleMaskedGather(IntrinsicInst &I) {
3603     IRBuilder<> IRB(&I);
3604     Value *Ptrs = I.getArgOperand(0);
3605     const Align Alignment(
3606         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3607     Value *Mask = I.getArgOperand(2);
3608     Value *PassThru = I.getArgOperand(3);
3609 
3610     Type *PtrsShadowTy = getShadowTy(Ptrs);
3611     if (ClCheckAccessAddress) {
3612       insertShadowCheck(Mask, &I);
3613       Value *MaskedPtrShadow = IRB.CreateSelect(
3614           Mask, getShadow(Ptrs), Constant::getNullValue((PtrsShadowTy)),
3615           "_msmaskedptrs");
3616       insertShadowCheck(MaskedPtrShadow, getOrigin(Ptrs), &I);
3617     }
3618 
3619     if (!PropagateShadow) {
3620       setShadow(&I, getCleanShadow(&I));
3621       setOrigin(&I, getCleanOrigin());
3622       return;
3623     }
3624 
3625     Type *ShadowTy = getShadowTy(&I);
3626     Type *ElementShadowTy = cast<VectorType>(ShadowTy)->getElementType();
3627     auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
3628         Ptrs, IRB, ElementShadowTy, Alignment, /*isStore*/ false);
3629 
3630     Value *Shadow =
3631         IRB.CreateMaskedGather(ShadowTy, ShadowPtrs, Alignment, Mask,
3632                                getShadow(PassThru), "_msmaskedgather");
3633 
3634     setShadow(&I, Shadow);
3635 
3636     // TODO: Store origins.
3637     setOrigin(&I, getCleanOrigin());
3638   }
3639 
3640   void handleMaskedScatter(IntrinsicInst &I) {
3641     IRBuilder<> IRB(&I);
3642     Value *Values = I.getArgOperand(0);
3643     Value *Ptrs = I.getArgOperand(1);
3644     const Align Alignment(
3645         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
3646     Value *Mask = I.getArgOperand(3);
3647 
3648     Type *PtrsShadowTy = getShadowTy(Ptrs);
3649     if (ClCheckAccessAddress) {
3650       insertShadowCheck(Mask, &I);
3651       Value *MaskedPtrShadow = IRB.CreateSelect(
3652           Mask, getShadow(Ptrs), Constant::getNullValue((PtrsShadowTy)),
3653           "_msmaskedptrs");
3654       insertShadowCheck(MaskedPtrShadow, getOrigin(Ptrs), &I);
3655     }
3656 
3657     Value *Shadow = getShadow(Values);
3658     Type *ElementShadowTy =
3659         getShadowTy(cast<VectorType>(Values->getType())->getElementType());
3660     auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
3661         Ptrs, IRB, ElementShadowTy, Alignment, /*isStore*/ true);
3662 
3663     IRB.CreateMaskedScatter(Shadow, ShadowPtrs, Alignment, Mask);
3664 
3665     // TODO: Store origin.
3666   }
3667 
3668   void handleMaskedStore(IntrinsicInst &I) {
3669     IRBuilder<> IRB(&I);
3670     Value *V = I.getArgOperand(0);
3671     Value *Ptr = I.getArgOperand(1);
3672     const Align Alignment(
3673         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
3674     Value *Mask = I.getArgOperand(3);
3675     Value *Shadow = getShadow(V);
3676 
3677     if (ClCheckAccessAddress) {
3678       insertShadowCheck(Ptr, &I);
3679       insertShadowCheck(Mask, &I);
3680     }
3681 
3682     Value *ShadowPtr;
3683     Value *OriginPtr;
3684     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
3685         Ptr, IRB, Shadow->getType(), Alignment, /*isStore*/ true);
3686 
3687     IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment, Mask);
3688 
3689     if (!MS.TrackOrigins)
3690       return;
3691 
3692     auto &DL = F.getDataLayout();
3693     paintOrigin(IRB, getOrigin(V), OriginPtr,
3694                 DL.getTypeStoreSize(Shadow->getType()),
3695                 std::max(Alignment, kMinOriginAlignment));
3696   }
3697 
3698   void handleMaskedLoad(IntrinsicInst &I) {
3699     IRBuilder<> IRB(&I);
3700     Value *Ptr = I.getArgOperand(0);
3701     const Align Alignment(
3702         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3703     Value *Mask = I.getArgOperand(2);
3704     Value *PassThru = I.getArgOperand(3);
3705 
3706     if (ClCheckAccessAddress) {
3707       insertShadowCheck(Ptr, &I);
3708       insertShadowCheck(Mask, &I);
3709     }
3710 
3711     if (!PropagateShadow) {
3712       setShadow(&I, getCleanShadow(&I));
3713       setOrigin(&I, getCleanOrigin());
3714       return;
3715     }
3716 
3717     Type *ShadowTy = getShadowTy(&I);
3718     Value *ShadowPtr, *OriginPtr;
3719     std::tie(ShadowPtr, OriginPtr) =
3720         getShadowOriginPtr(Ptr, IRB, ShadowTy, Alignment, /*isStore*/ false);
3721     setShadow(&I, IRB.CreateMaskedLoad(ShadowTy, ShadowPtr, Alignment, Mask,
3722                                        getShadow(PassThru), "_msmaskedld"));
3723 
3724     if (!MS.TrackOrigins)
3725       return;
3726 
3727     // Choose between PassThru's and the loaded value's origins.
3728     Value *MaskedPassThruShadow = IRB.CreateAnd(
3729         getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
3730 
3731     Value *NotNull = convertToBool(MaskedPassThruShadow, IRB, "_mscmp");
3732 
3733     Value *PtrOrigin = IRB.CreateLoad(MS.OriginTy, OriginPtr);
3734     Value *Origin = IRB.CreateSelect(NotNull, getOrigin(PassThru), PtrOrigin);
3735 
3736     setOrigin(&I, Origin);
3737   }
3738 
3739   // Instrument BMI / BMI2 intrinsics.
3740   // All of these intrinsics are Z = I(X, Y)
3741   // where the types of all operands and the result match, and are either i32 or
3742   // i64. The following instrumentation happens to work for all of them:
3743   //   Sz = I(Sx, Y) | (sext (Sy != 0))
3744   void handleBmiIntrinsic(IntrinsicInst &I) {
3745     IRBuilder<> IRB(&I);
3746     Type *ShadowTy = getShadowTy(&I);
3747 
3748     // If any bit of the mask operand is poisoned, then the whole thing is.
3749     Value *SMask = getShadow(&I, 1);
3750     SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
3751                            ShadowTy);
3752     // Apply the same intrinsic to the shadow of the first operand.
3753     Value *S = IRB.CreateCall(I.getCalledFunction(),
3754                               {getShadow(&I, 0), I.getOperand(1)});
3755     S = IRB.CreateOr(SMask, S);
3756     setShadow(&I, S);
3757     setOriginForNaryOp(I);
3758   }
3759 
3760   static SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
3761     SmallVector<int, 8> Mask;
3762     for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
3763       Mask.append(2, X);
3764     }
3765     return Mask;
3766   }
3767 
3768   // Instrument pclmul intrinsics.
3769   // These intrinsics operate either on odd or on even elements of the input
3770   // vectors, depending on the constant in the 3rd argument, ignoring the rest.
3771   // Replace the unused elements with copies of the used ones, ex:
3772   //   (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
3773   // or
3774   //   (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
3775   // and then apply the usual shadow combining logic.
3776   void handlePclmulIntrinsic(IntrinsicInst &I) {
3777     IRBuilder<> IRB(&I);
3778     unsigned Width =
3779         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3780     assert(isa<ConstantInt>(I.getArgOperand(2)) &&
3781            "pclmul 3rd operand must be a constant");
3782     unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3783     Value *Shuf0 = IRB.CreateShuffleVector(getShadow(&I, 0),
3784                                            getPclmulMask(Width, Imm & 0x01));
3785     Value *Shuf1 = IRB.CreateShuffleVector(getShadow(&I, 1),
3786                                            getPclmulMask(Width, Imm & 0x10));
3787     ShadowAndOriginCombiner SOC(this, IRB);
3788     SOC.Add(Shuf0, getOrigin(&I, 0));
3789     SOC.Add(Shuf1, getOrigin(&I, 1));
3790     SOC.Done(&I);
3791   }
3792 
3793   // Instrument _mm_*_sd|ss intrinsics
3794   void handleUnarySdSsIntrinsic(IntrinsicInst &I) {
3795     IRBuilder<> IRB(&I);
3796     unsigned Width =
3797         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3798     Value *First = getShadow(&I, 0);
3799     Value *Second = getShadow(&I, 1);
3800     // First element of second operand, remaining elements of first operand
3801     SmallVector<int, 16> Mask;
3802     Mask.push_back(Width);
3803     for (unsigned i = 1; i < Width; i++)
3804       Mask.push_back(i);
3805     Value *Shadow = IRB.CreateShuffleVector(First, Second, Mask);
3806 
3807     setShadow(&I, Shadow);
3808     setOriginForNaryOp(I);
3809   }
3810 
3811   void handleVtestIntrinsic(IntrinsicInst &I) {
3812     IRBuilder<> IRB(&I);
3813     Value *Shadow0 = getShadow(&I, 0);
3814     Value *Shadow1 = getShadow(&I, 1);
3815     Value *Or = IRB.CreateOr(Shadow0, Shadow1);
3816     Value *NZ = IRB.CreateICmpNE(Or, Constant::getNullValue(Or->getType()));
3817     Value *Scalar = convertShadowToScalar(NZ, IRB);
3818     Value *Shadow = IRB.CreateZExt(Scalar, getShadowTy(&I));
3819 
3820     setShadow(&I, Shadow);
3821     setOriginForNaryOp(I);
3822   }
3823 
3824   void handleBinarySdSsIntrinsic(IntrinsicInst &I) {
3825     IRBuilder<> IRB(&I);
3826     unsigned Width =
3827         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3828     Value *First = getShadow(&I, 0);
3829     Value *Second = getShadow(&I, 1);
3830     Value *OrShadow = IRB.CreateOr(First, Second);
3831     // First element of both OR'd together, remaining elements of first operand
3832     SmallVector<int, 16> Mask;
3833     Mask.push_back(Width);
3834     for (unsigned i = 1; i < Width; i++)
3835       Mask.push_back(i);
3836     Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, Mask);
3837 
3838     setShadow(&I, Shadow);
3839     setOriginForNaryOp(I);
3840   }
3841 
3842   // Instrument abs intrinsic.
3843   // handleUnknownIntrinsic can't handle it because of the last
3844   // is_int_min_poison argument which does not match the result type.
3845   void handleAbsIntrinsic(IntrinsicInst &I) {
3846     assert(I.getType()->isIntOrIntVectorTy());
3847     assert(I.getArgOperand(0)->getType() == I.getType());
3848 
3849     // FIXME: Handle is_int_min_poison.
3850     IRBuilder<> IRB(&I);
3851     setShadow(&I, getShadow(&I, 0));
3852     setOrigin(&I, getOrigin(&I, 0));
3853   }
3854 
3855   void handleIsFpClass(IntrinsicInst &I) {
3856     IRBuilder<> IRB(&I);
3857     Value *Shadow = getShadow(&I, 0);
3858     setShadow(&I, IRB.CreateICmpNE(Shadow, getCleanShadow(Shadow)));
3859     setOrigin(&I, getOrigin(&I, 0));
3860   }
3861 
3862   void handleArithmeticWithOverflow(IntrinsicInst &I) {
3863     IRBuilder<> IRB(&I);
3864     Value *Shadow0 = getShadow(&I, 0);
3865     Value *Shadow1 = getShadow(&I, 1);
3866     Value *ShadowElt0 = IRB.CreateOr(Shadow0, Shadow1);
3867     Value *ShadowElt1 =
3868         IRB.CreateICmpNE(ShadowElt0, getCleanShadow(ShadowElt0));
3869 
3870     Value *Shadow = PoisonValue::get(getShadowTy(&I));
3871     Shadow = IRB.CreateInsertValue(Shadow, ShadowElt0, 0);
3872     Shadow = IRB.CreateInsertValue(Shadow, ShadowElt1, 1);
3873 
3874     setShadow(&I, Shadow);
3875     setOriginForNaryOp(I);
3876   }
3877 
3878   /// Handle Arm NEON vector store intrinsics (vst{2,3,4}).
3879   ///
3880   /// Arm NEON vector store intrinsics have the output address (pointer) as the
3881   /// last argument, with the initial arguments being the inputs. They return
3882   /// void.
3883   void handleNEONVectorStoreIntrinsic(IntrinsicInst &I) {
3884     IRBuilder<> IRB(&I);
3885 
3886     // Don't use getNumOperands() because it includes the callee
3887     int numArgOperands = I.arg_size();
3888     assert(numArgOperands >= 1);
3889 
3890     // The last arg operand is the output
3891     Value *Addr = I.getArgOperand(numArgOperands - 1);
3892     assert(Addr->getType()->isPointerTy());
3893 
3894     if (ClCheckAccessAddress)
3895       insertShadowCheck(Addr, &I);
3896 
3897     // Every arg operand, other than the last one, is an input vector
3898     IntrinsicInst *ShadowI = cast<IntrinsicInst>(I.clone());
3899     for (int i = 0; i < numArgOperands - 1; i++) {
3900       assert(isa<FixedVectorType>(I.getArgOperand(i)->getType()));
3901       ShadowI->setArgOperand(i, getShadow(&I, i));
3902     }
3903 
3904     // MSan's GetShadowTy assumes the LHS is the type we want the shadow for
3905     // e.g., for:
3906     //     [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3907     // we know the type of the output (and its shadow) is <16 x i8>.
3908     //
3909     // Arm NEON VST is unusual because the last argument is the output address:
3910     //     define void @st2_16b(<16 x i8> %A, <16 x i8> %B, ptr %P) {
3911     //         call void @llvm.aarch64.neon.st2.v16i8.p0
3912     //                   (<16 x i8> [[A]], <16 x i8> [[B]], ptr [[P]])
3913     // and we have no type information about P's operand. We must manually
3914     // compute the type (<16 x i8> x 2).
3915     FixedVectorType *OutputVectorTy = FixedVectorType::get(
3916         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getElementType(),
3917         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements() *
3918             (numArgOperands - 1));
3919     Type *ShadowTy = getShadowTy(OutputVectorTy);
3920     Value *ShadowPtr, *OriginPtr;
3921     // AArch64 NEON does not need alignment (unless OS requires it)
3922     std::tie(ShadowPtr, OriginPtr) =
3923         getShadowOriginPtr(Addr, IRB, ShadowTy, Align(1), /*isStore*/ true);
3924     ShadowI->setArgOperand(numArgOperands - 1, ShadowPtr);
3925     ShadowI->insertAfter(&I);
3926 
3927     if (MS.TrackOrigins) {
3928       // TODO: if we modelled the vst* instruction more precisely, we could
3929       // more accurately track the origins (e.g., if both inputs are
3930       // uninitialized for vst2, we currently blame the second input, even
3931       // though part of the output depends only on the first input).
3932       OriginCombiner OC(this, IRB);
3933       for (int i = 0; i < numArgOperands - 1; i++)
3934         OC.Add(I.getArgOperand(i));
3935 
3936       const DataLayout &DL = F.getDataLayout();
3937       OC.DoneAndStoreOrigin(DL.getTypeStoreSize(OutputVectorTy), OriginPtr);
3938     }
3939   }
3940 
3941   void visitIntrinsicInst(IntrinsicInst &I) {
3942     switch (I.getIntrinsicID()) {
3943     case Intrinsic::uadd_with_overflow:
3944     case Intrinsic::sadd_with_overflow:
3945     case Intrinsic::usub_with_overflow:
3946     case Intrinsic::ssub_with_overflow:
3947     case Intrinsic::umul_with_overflow:
3948     case Intrinsic::smul_with_overflow:
3949       handleArithmeticWithOverflow(I);
3950       break;
3951     case Intrinsic::abs:
3952       handleAbsIntrinsic(I);
3953       break;
3954     case Intrinsic::is_fpclass:
3955       handleIsFpClass(I);
3956       break;
3957     case Intrinsic::lifetime_start:
3958       handleLifetimeStart(I);
3959       break;
3960     case Intrinsic::launder_invariant_group:
3961     case Intrinsic::strip_invariant_group:
3962       handleInvariantGroup(I);
3963       break;
3964     case Intrinsic::bswap:
3965       handleBswap(I);
3966       break;
3967     case Intrinsic::ctlz:
3968     case Intrinsic::cttz:
3969       handleCountZeroes(I);
3970       break;
3971     case Intrinsic::masked_compressstore:
3972       handleMaskedCompressStore(I);
3973       break;
3974     case Intrinsic::masked_expandload:
3975       handleMaskedExpandLoad(I);
3976       break;
3977     case Intrinsic::masked_gather:
3978       handleMaskedGather(I);
3979       break;
3980     case Intrinsic::masked_scatter:
3981       handleMaskedScatter(I);
3982       break;
3983     case Intrinsic::masked_store:
3984       handleMaskedStore(I);
3985       break;
3986     case Intrinsic::masked_load:
3987       handleMaskedLoad(I);
3988       break;
3989     case Intrinsic::vector_reduce_and:
3990       handleVectorReduceAndIntrinsic(I);
3991       break;
3992     case Intrinsic::vector_reduce_or:
3993       handleVectorReduceOrIntrinsic(I);
3994       break;
3995     case Intrinsic::vector_reduce_add:
3996     case Intrinsic::vector_reduce_xor:
3997     case Intrinsic::vector_reduce_mul:
3998       handleVectorReduceIntrinsic(I);
3999       break;
4000     case Intrinsic::x86_sse_stmxcsr:
4001       handleStmxcsr(I);
4002       break;
4003     case Intrinsic::x86_sse_ldmxcsr:
4004       handleLdmxcsr(I);
4005       break;
4006     case Intrinsic::x86_avx512_vcvtsd2usi64:
4007     case Intrinsic::x86_avx512_vcvtsd2usi32:
4008     case Intrinsic::x86_avx512_vcvtss2usi64:
4009     case Intrinsic::x86_avx512_vcvtss2usi32:
4010     case Intrinsic::x86_avx512_cvttss2usi64:
4011     case Intrinsic::x86_avx512_cvttss2usi:
4012     case Intrinsic::x86_avx512_cvttsd2usi64:
4013     case Intrinsic::x86_avx512_cvttsd2usi:
4014     case Intrinsic::x86_avx512_cvtusi2ss:
4015     case Intrinsic::x86_avx512_cvtusi642sd:
4016     case Intrinsic::x86_avx512_cvtusi642ss:
4017       handleVectorConvertIntrinsic(I, 1, true);
4018       break;
4019     case Intrinsic::x86_sse2_cvtsd2si64:
4020     case Intrinsic::x86_sse2_cvtsd2si:
4021     case Intrinsic::x86_sse2_cvtsd2ss:
4022     case Intrinsic::x86_sse2_cvttsd2si64:
4023     case Intrinsic::x86_sse2_cvttsd2si:
4024     case Intrinsic::x86_sse_cvtss2si64:
4025     case Intrinsic::x86_sse_cvtss2si:
4026     case Intrinsic::x86_sse_cvttss2si64:
4027     case Intrinsic::x86_sse_cvttss2si:
4028       handleVectorConvertIntrinsic(I, 1);
4029       break;
4030     case Intrinsic::x86_sse_cvtps2pi:
4031     case Intrinsic::x86_sse_cvttps2pi:
4032       handleVectorConvertIntrinsic(I, 2);
4033       break;
4034 
4035     case Intrinsic::x86_avx512_psll_w_512:
4036     case Intrinsic::x86_avx512_psll_d_512:
4037     case Intrinsic::x86_avx512_psll_q_512:
4038     case Intrinsic::x86_avx512_pslli_w_512:
4039     case Intrinsic::x86_avx512_pslli_d_512:
4040     case Intrinsic::x86_avx512_pslli_q_512:
4041     case Intrinsic::x86_avx512_psrl_w_512:
4042     case Intrinsic::x86_avx512_psrl_d_512:
4043     case Intrinsic::x86_avx512_psrl_q_512:
4044     case Intrinsic::x86_avx512_psra_w_512:
4045     case Intrinsic::x86_avx512_psra_d_512:
4046     case Intrinsic::x86_avx512_psra_q_512:
4047     case Intrinsic::x86_avx512_psrli_w_512:
4048     case Intrinsic::x86_avx512_psrli_d_512:
4049     case Intrinsic::x86_avx512_psrli_q_512:
4050     case Intrinsic::x86_avx512_psrai_w_512:
4051     case Intrinsic::x86_avx512_psrai_d_512:
4052     case Intrinsic::x86_avx512_psrai_q_512:
4053     case Intrinsic::x86_avx512_psra_q_256:
4054     case Intrinsic::x86_avx512_psra_q_128:
4055     case Intrinsic::x86_avx512_psrai_q_256:
4056     case Intrinsic::x86_avx512_psrai_q_128:
4057     case Intrinsic::x86_avx2_psll_w:
4058     case Intrinsic::x86_avx2_psll_d:
4059     case Intrinsic::x86_avx2_psll_q:
4060     case Intrinsic::x86_avx2_pslli_w:
4061     case Intrinsic::x86_avx2_pslli_d:
4062     case Intrinsic::x86_avx2_pslli_q:
4063     case Intrinsic::x86_avx2_psrl_w:
4064     case Intrinsic::x86_avx2_psrl_d:
4065     case Intrinsic::x86_avx2_psrl_q:
4066     case Intrinsic::x86_avx2_psra_w:
4067     case Intrinsic::x86_avx2_psra_d:
4068     case Intrinsic::x86_avx2_psrli_w:
4069     case Intrinsic::x86_avx2_psrli_d:
4070     case Intrinsic::x86_avx2_psrli_q:
4071     case Intrinsic::x86_avx2_psrai_w:
4072     case Intrinsic::x86_avx2_psrai_d:
4073     case Intrinsic::x86_sse2_psll_w:
4074     case Intrinsic::x86_sse2_psll_d:
4075     case Intrinsic::x86_sse2_psll_q:
4076     case Intrinsic::x86_sse2_pslli_w:
4077     case Intrinsic::x86_sse2_pslli_d:
4078     case Intrinsic::x86_sse2_pslli_q:
4079     case Intrinsic::x86_sse2_psrl_w:
4080     case Intrinsic::x86_sse2_psrl_d:
4081     case Intrinsic::x86_sse2_psrl_q:
4082     case Intrinsic::x86_sse2_psra_w:
4083     case Intrinsic::x86_sse2_psra_d:
4084     case Intrinsic::x86_sse2_psrli_w:
4085     case Intrinsic::x86_sse2_psrli_d:
4086     case Intrinsic::x86_sse2_psrli_q:
4087     case Intrinsic::x86_sse2_psrai_w:
4088     case Intrinsic::x86_sse2_psrai_d:
4089     case Intrinsic::x86_mmx_psll_w:
4090     case Intrinsic::x86_mmx_psll_d:
4091     case Intrinsic::x86_mmx_psll_q:
4092     case Intrinsic::x86_mmx_pslli_w:
4093     case Intrinsic::x86_mmx_pslli_d:
4094     case Intrinsic::x86_mmx_pslli_q:
4095     case Intrinsic::x86_mmx_psrl_w:
4096     case Intrinsic::x86_mmx_psrl_d:
4097     case Intrinsic::x86_mmx_psrl_q:
4098     case Intrinsic::x86_mmx_psra_w:
4099     case Intrinsic::x86_mmx_psra_d:
4100     case Intrinsic::x86_mmx_psrli_w:
4101     case Intrinsic::x86_mmx_psrli_d:
4102     case Intrinsic::x86_mmx_psrli_q:
4103     case Intrinsic::x86_mmx_psrai_w:
4104     case Intrinsic::x86_mmx_psrai_d:
4105       handleVectorShiftIntrinsic(I, /* Variable */ false);
4106       break;
4107     case Intrinsic::x86_avx2_psllv_d:
4108     case Intrinsic::x86_avx2_psllv_d_256:
4109     case Intrinsic::x86_avx512_psllv_d_512:
4110     case Intrinsic::x86_avx2_psllv_q:
4111     case Intrinsic::x86_avx2_psllv_q_256:
4112     case Intrinsic::x86_avx512_psllv_q_512:
4113     case Intrinsic::x86_avx2_psrlv_d:
4114     case Intrinsic::x86_avx2_psrlv_d_256:
4115     case Intrinsic::x86_avx512_psrlv_d_512:
4116     case Intrinsic::x86_avx2_psrlv_q:
4117     case Intrinsic::x86_avx2_psrlv_q_256:
4118     case Intrinsic::x86_avx512_psrlv_q_512:
4119     case Intrinsic::x86_avx2_psrav_d:
4120     case Intrinsic::x86_avx2_psrav_d_256:
4121     case Intrinsic::x86_avx512_psrav_d_512:
4122     case Intrinsic::x86_avx512_psrav_q_128:
4123     case Intrinsic::x86_avx512_psrav_q_256:
4124     case Intrinsic::x86_avx512_psrav_q_512:
4125       handleVectorShiftIntrinsic(I, /* Variable */ true);
4126       break;
4127 
4128     case Intrinsic::x86_sse2_packsswb_128:
4129     case Intrinsic::x86_sse2_packssdw_128:
4130     case Intrinsic::x86_sse2_packuswb_128:
4131     case Intrinsic::x86_sse41_packusdw:
4132     case Intrinsic::x86_avx2_packsswb:
4133     case Intrinsic::x86_avx2_packssdw:
4134     case Intrinsic::x86_avx2_packuswb:
4135     case Intrinsic::x86_avx2_packusdw:
4136       handleVectorPackIntrinsic(I);
4137       break;
4138 
4139     case Intrinsic::x86_sse41_pblendvb:
4140     case Intrinsic::x86_sse41_blendvpd:
4141     case Intrinsic::x86_sse41_blendvps:
4142     case Intrinsic::x86_avx_blendv_pd_256:
4143     case Intrinsic::x86_avx_blendv_ps_256:
4144     case Intrinsic::x86_avx2_pblendvb:
4145       handleBlendvIntrinsic(I);
4146       break;
4147 
4148     case Intrinsic::x86_avx_dp_ps_256:
4149     case Intrinsic::x86_sse41_dppd:
4150     case Intrinsic::x86_sse41_dpps:
4151       handleDppIntrinsic(I);
4152       break;
4153 
4154     case Intrinsic::x86_mmx_packsswb:
4155     case Intrinsic::x86_mmx_packuswb:
4156       handleVectorPackIntrinsic(I, 16);
4157       break;
4158 
4159     case Intrinsic::x86_mmx_packssdw:
4160       handleVectorPackIntrinsic(I, 32);
4161       break;
4162 
4163     case Intrinsic::x86_mmx_psad_bw:
4164     case Intrinsic::x86_sse2_psad_bw:
4165     case Intrinsic::x86_avx2_psad_bw:
4166       handleVectorSadIntrinsic(I);
4167       break;
4168 
4169     case Intrinsic::x86_sse2_pmadd_wd:
4170     case Intrinsic::x86_avx2_pmadd_wd:
4171     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
4172     case Intrinsic::x86_avx2_pmadd_ub_sw:
4173       handleVectorPmaddIntrinsic(I);
4174       break;
4175 
4176     case Intrinsic::x86_ssse3_pmadd_ub_sw:
4177       handleVectorPmaddIntrinsic(I, 8);
4178       break;
4179 
4180     case Intrinsic::x86_mmx_pmadd_wd:
4181       handleVectorPmaddIntrinsic(I, 16);
4182       break;
4183 
4184     case Intrinsic::x86_sse_cmp_ss:
4185     case Intrinsic::x86_sse2_cmp_sd:
4186     case Intrinsic::x86_sse_comieq_ss:
4187     case Intrinsic::x86_sse_comilt_ss:
4188     case Intrinsic::x86_sse_comile_ss:
4189     case Intrinsic::x86_sse_comigt_ss:
4190     case Intrinsic::x86_sse_comige_ss:
4191     case Intrinsic::x86_sse_comineq_ss:
4192     case Intrinsic::x86_sse_ucomieq_ss:
4193     case Intrinsic::x86_sse_ucomilt_ss:
4194     case Intrinsic::x86_sse_ucomile_ss:
4195     case Intrinsic::x86_sse_ucomigt_ss:
4196     case Intrinsic::x86_sse_ucomige_ss:
4197     case Intrinsic::x86_sse_ucomineq_ss:
4198     case Intrinsic::x86_sse2_comieq_sd:
4199     case Intrinsic::x86_sse2_comilt_sd:
4200     case Intrinsic::x86_sse2_comile_sd:
4201     case Intrinsic::x86_sse2_comigt_sd:
4202     case Intrinsic::x86_sse2_comige_sd:
4203     case Intrinsic::x86_sse2_comineq_sd:
4204     case Intrinsic::x86_sse2_ucomieq_sd:
4205     case Intrinsic::x86_sse2_ucomilt_sd:
4206     case Intrinsic::x86_sse2_ucomile_sd:
4207     case Intrinsic::x86_sse2_ucomigt_sd:
4208     case Intrinsic::x86_sse2_ucomige_sd:
4209     case Intrinsic::x86_sse2_ucomineq_sd:
4210       handleVectorCompareScalarIntrinsic(I);
4211       break;
4212 
4213     case Intrinsic::x86_avx_cmp_pd_256:
4214     case Intrinsic::x86_avx_cmp_ps_256:
4215     case Intrinsic::x86_sse2_cmp_pd:
4216     case Intrinsic::x86_sse_cmp_ps:
4217       handleVectorComparePackedIntrinsic(I);
4218       break;
4219 
4220     case Intrinsic::x86_bmi_bextr_32:
4221     case Intrinsic::x86_bmi_bextr_64:
4222     case Intrinsic::x86_bmi_bzhi_32:
4223     case Intrinsic::x86_bmi_bzhi_64:
4224     case Intrinsic::x86_bmi_pdep_32:
4225     case Intrinsic::x86_bmi_pdep_64:
4226     case Intrinsic::x86_bmi_pext_32:
4227     case Intrinsic::x86_bmi_pext_64:
4228       handleBmiIntrinsic(I);
4229       break;
4230 
4231     case Intrinsic::x86_pclmulqdq:
4232     case Intrinsic::x86_pclmulqdq_256:
4233     case Intrinsic::x86_pclmulqdq_512:
4234       handlePclmulIntrinsic(I);
4235       break;
4236 
4237     case Intrinsic::x86_sse41_round_sd:
4238     case Intrinsic::x86_sse41_round_ss:
4239       handleUnarySdSsIntrinsic(I);
4240       break;
4241     case Intrinsic::x86_sse2_max_sd:
4242     case Intrinsic::x86_sse_max_ss:
4243     case Intrinsic::x86_sse2_min_sd:
4244     case Intrinsic::x86_sse_min_ss:
4245       handleBinarySdSsIntrinsic(I);
4246       break;
4247 
4248     case Intrinsic::x86_avx_vtestc_pd:
4249     case Intrinsic::x86_avx_vtestc_pd_256:
4250     case Intrinsic::x86_avx_vtestc_ps:
4251     case Intrinsic::x86_avx_vtestc_ps_256:
4252     case Intrinsic::x86_avx_vtestnzc_pd:
4253     case Intrinsic::x86_avx_vtestnzc_pd_256:
4254     case Intrinsic::x86_avx_vtestnzc_ps:
4255     case Intrinsic::x86_avx_vtestnzc_ps_256:
4256     case Intrinsic::x86_avx_vtestz_pd:
4257     case Intrinsic::x86_avx_vtestz_pd_256:
4258     case Intrinsic::x86_avx_vtestz_ps:
4259     case Intrinsic::x86_avx_vtestz_ps_256:
4260     case Intrinsic::x86_avx_ptestc_256:
4261     case Intrinsic::x86_avx_ptestnzc_256:
4262     case Intrinsic::x86_avx_ptestz_256:
4263     case Intrinsic::x86_sse41_ptestc:
4264     case Intrinsic::x86_sse41_ptestnzc:
4265     case Intrinsic::x86_sse41_ptestz:
4266       handleVtestIntrinsic(I);
4267       break;
4268 
4269     case Intrinsic::fshl:
4270     case Intrinsic::fshr:
4271       handleFunnelShift(I);
4272       break;
4273 
4274     case Intrinsic::is_constant:
4275       // The result of llvm.is.constant() is always defined.
4276       setShadow(&I, getCleanShadow(&I));
4277       setOrigin(&I, getCleanOrigin());
4278       break;
4279 
4280     case Intrinsic::aarch64_neon_st2:
4281     case Intrinsic::aarch64_neon_st3:
4282     case Intrinsic::aarch64_neon_st4: {
4283       handleNEONVectorStoreIntrinsic(I);
4284       break;
4285     }
4286 
4287     default:
4288       if (!handleUnknownIntrinsic(I))
4289         visitInstruction(I);
4290       break;
4291     }
4292   }
4293 
4294   void visitLibAtomicLoad(CallBase &CB) {
4295     // Since we use getNextNode here, we can't have CB terminate the BB.
4296     assert(isa<CallInst>(CB));
4297 
4298     IRBuilder<> IRB(&CB);
4299     Value *Size = CB.getArgOperand(0);
4300     Value *SrcPtr = CB.getArgOperand(1);
4301     Value *DstPtr = CB.getArgOperand(2);
4302     Value *Ordering = CB.getArgOperand(3);
4303     // Convert the call to have at least Acquire ordering to make sure
4304     // the shadow operations aren't reordered before it.
4305     Value *NewOrdering =
4306         IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
4307     CB.setArgOperand(3, NewOrdering);
4308 
4309     NextNodeIRBuilder NextIRB(&CB);
4310     Value *SrcShadowPtr, *SrcOriginPtr;
4311     std::tie(SrcShadowPtr, SrcOriginPtr) =
4312         getShadowOriginPtr(SrcPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
4313                            /*isStore*/ false);
4314     Value *DstShadowPtr =
4315         getShadowOriginPtr(DstPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
4316                            /*isStore*/ true)
4317             .first;
4318 
4319     NextIRB.CreateMemCpy(DstShadowPtr, Align(1), SrcShadowPtr, Align(1), Size);
4320     if (MS.TrackOrigins) {
4321       Value *SrcOrigin = NextIRB.CreateAlignedLoad(MS.OriginTy, SrcOriginPtr,
4322                                                    kMinOriginAlignment);
4323       Value *NewOrigin = updateOrigin(SrcOrigin, NextIRB);
4324       NextIRB.CreateCall(MS.MsanSetOriginFn, {DstPtr, Size, NewOrigin});
4325     }
4326   }
4327 
4328   void visitLibAtomicStore(CallBase &CB) {
4329     IRBuilder<> IRB(&CB);
4330     Value *Size = CB.getArgOperand(0);
4331     Value *DstPtr = CB.getArgOperand(2);
4332     Value *Ordering = CB.getArgOperand(3);
4333     // Convert the call to have at least Release ordering to make sure
4334     // the shadow operations aren't reordered after it.
4335     Value *NewOrdering =
4336         IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
4337     CB.setArgOperand(3, NewOrdering);
4338 
4339     Value *DstShadowPtr =
4340         getShadowOriginPtr(DstPtr, IRB, IRB.getInt8Ty(), Align(1),
4341                            /*isStore*/ true)
4342             .first;
4343 
4344     // Atomic store always paints clean shadow/origin. See file header.
4345     IRB.CreateMemSet(DstShadowPtr, getCleanShadow(IRB.getInt8Ty()), Size,
4346                      Align(1));
4347   }
4348 
4349   void visitCallBase(CallBase &CB) {
4350     assert(!CB.getMetadata(LLVMContext::MD_nosanitize));
4351     if (CB.isInlineAsm()) {
4352       // For inline asm (either a call to asm function, or callbr instruction),
4353       // do the usual thing: check argument shadow and mark all outputs as
4354       // clean. Note that any side effects of the inline asm that are not
4355       // immediately visible in its constraints are not handled.
4356       if (ClHandleAsmConservative)
4357         visitAsmInstruction(CB);
4358       else
4359         visitInstruction(CB);
4360       return;
4361     }
4362     LibFunc LF;
4363     if (TLI->getLibFunc(CB, LF)) {
4364       // libatomic.a functions need to have special handling because there isn't
4365       // a good way to intercept them or compile the library with
4366       // instrumentation.
4367       switch (LF) {
4368       case LibFunc_atomic_load:
4369         if (!isa<CallInst>(CB)) {
4370           llvm::errs() << "MSAN -- cannot instrument invoke of libatomic load."
4371                           "Ignoring!\n";
4372           break;
4373         }
4374         visitLibAtomicLoad(CB);
4375         return;
4376       case LibFunc_atomic_store:
4377         visitLibAtomicStore(CB);
4378         return;
4379       default:
4380         break;
4381       }
4382     }
4383 
4384     if (auto *Call = dyn_cast<CallInst>(&CB)) {
4385       assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
4386 
4387       // We are going to insert code that relies on the fact that the callee
4388       // will become a non-readonly function after it is instrumented by us. To
4389       // prevent this code from being optimized out, mark that function
4390       // non-readonly in advance.
4391       // TODO: We can likely do better than dropping memory() completely here.
4392       AttributeMask B;
4393       B.addAttribute(Attribute::Memory).addAttribute(Attribute::Speculatable);
4394 
4395       Call->removeFnAttrs(B);
4396       if (Function *Func = Call->getCalledFunction()) {
4397         Func->removeFnAttrs(B);
4398       }
4399 
4400       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
4401     }
4402     IRBuilder<> IRB(&CB);
4403     bool MayCheckCall = MS.EagerChecks;
4404     if (Function *Func = CB.getCalledFunction()) {
4405       // __sanitizer_unaligned_{load,store} functions may be called by users
4406       // and always expects shadows in the TLS. So don't check them.
4407       MayCheckCall &= !Func->getName().starts_with("__sanitizer_unaligned_");
4408     }
4409 
4410     unsigned ArgOffset = 0;
4411     LLVM_DEBUG(dbgs() << "  CallSite: " << CB << "\n");
4412     for (const auto &[i, A] : llvm::enumerate(CB.args())) {
4413       if (!A->getType()->isSized()) {
4414         LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n");
4415         continue;
4416       }
4417 
4418       if (A->getType()->isScalableTy()) {
4419         LLVM_DEBUG(dbgs() << "Arg  " << i << " is vscale: " << CB << "\n");
4420         // Handle as noundef, but don't reserve tls slots.
4421         insertShadowCheck(A, &CB);
4422         continue;
4423       }
4424 
4425       unsigned Size = 0;
4426       const DataLayout &DL = F.getDataLayout();
4427 
4428       bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
4429       bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
4430       bool EagerCheck = MayCheckCall && !ByVal && NoUndef;
4431 
4432       if (EagerCheck) {
4433         insertShadowCheck(A, &CB);
4434         Size = DL.getTypeAllocSize(A->getType());
4435       } else {
4436         Value *Store = nullptr;
4437         // Compute the Shadow for arg even if it is ByVal, because
4438         // in that case getShadow() will copy the actual arg shadow to
4439         // __msan_param_tls.
4440         Value *ArgShadow = getShadow(A);
4441         Value *ArgShadowBase = getShadowPtrForArgument(IRB, ArgOffset);
4442         LLVM_DEBUG(dbgs() << "  Arg#" << i << ": " << *A
4443                           << " Shadow: " << *ArgShadow << "\n");
4444         if (ByVal) {
4445           // ByVal requires some special handling as it's too big for a single
4446           // load
4447           assert(A->getType()->isPointerTy() &&
4448                  "ByVal argument is not a pointer!");
4449           Size = DL.getTypeAllocSize(CB.getParamByValType(i));
4450           if (ArgOffset + Size > kParamTLSSize)
4451             break;
4452           const MaybeAlign ParamAlignment(CB.getParamAlign(i));
4453           MaybeAlign Alignment = std::nullopt;
4454           if (ParamAlignment)
4455             Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
4456           Value *AShadowPtr, *AOriginPtr;
4457           std::tie(AShadowPtr, AOriginPtr) =
4458               getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
4459                                  /*isStore*/ false);
4460           if (!PropagateShadow) {
4461             Store = IRB.CreateMemSet(ArgShadowBase,
4462                                      Constant::getNullValue(IRB.getInt8Ty()),
4463                                      Size, Alignment);
4464           } else {
4465             Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
4466                                      Alignment, Size);
4467             if (MS.TrackOrigins) {
4468               Value *ArgOriginBase = getOriginPtrForArgument(IRB, ArgOffset);
4469               // FIXME: OriginSize should be:
4470               // alignTo(A % kMinOriginAlignment + Size, kMinOriginAlignment)
4471               unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
4472               IRB.CreateMemCpy(
4473                   ArgOriginBase,
4474                   /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
4475                   AOriginPtr,
4476                   /* by getShadowOriginPtr */ kMinOriginAlignment, OriginSize);
4477             }
4478           }
4479         } else {
4480           // Any other parameters mean we need bit-grained tracking of uninit
4481           // data
4482           Size = DL.getTypeAllocSize(A->getType());
4483           if (ArgOffset + Size > kParamTLSSize)
4484             break;
4485           Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
4486                                          kShadowTLSAlignment);
4487           Constant *Cst = dyn_cast<Constant>(ArgShadow);
4488           if (MS.TrackOrigins && !(Cst && Cst->isNullValue())) {
4489             IRB.CreateStore(getOrigin(A),
4490                             getOriginPtrForArgument(IRB, ArgOffset));
4491           }
4492         }
4493         (void)Store;
4494         assert(Store != nullptr);
4495         LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n");
4496       }
4497       assert(Size != 0);
4498       ArgOffset += alignTo(Size, kShadowTLSAlignment);
4499     }
4500     LLVM_DEBUG(dbgs() << "  done with call args\n");
4501 
4502     FunctionType *FT = CB.getFunctionType();
4503     if (FT->isVarArg()) {
4504       VAHelper->visitCallBase(CB, IRB);
4505     }
4506 
4507     // Now, get the shadow for the RetVal.
4508     if (!CB.getType()->isSized())
4509       return;
4510     // Don't emit the epilogue for musttail call returns.
4511     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
4512       return;
4513 
4514     if (MayCheckCall && CB.hasRetAttr(Attribute::NoUndef)) {
4515       setShadow(&CB, getCleanShadow(&CB));
4516       setOrigin(&CB, getCleanOrigin());
4517       return;
4518     }
4519 
4520     IRBuilder<> IRBBefore(&CB);
4521     // Until we have full dynamic coverage, make sure the retval shadow is 0.
4522     Value *Base = getShadowPtrForRetval(IRBBefore);
4523     IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base,
4524                                  kShadowTLSAlignment);
4525     BasicBlock::iterator NextInsn;
4526     if (isa<CallInst>(CB)) {
4527       NextInsn = ++CB.getIterator();
4528       assert(NextInsn != CB.getParent()->end());
4529     } else {
4530       BasicBlock *NormalDest = cast<InvokeInst>(CB).getNormalDest();
4531       if (!NormalDest->getSinglePredecessor()) {
4532         // FIXME: this case is tricky, so we are just conservative here.
4533         // Perhaps we need to split the edge between this BB and NormalDest,
4534         // but a naive attempt to use SplitEdge leads to a crash.
4535         setShadow(&CB, getCleanShadow(&CB));
4536         setOrigin(&CB, getCleanOrigin());
4537         return;
4538       }
4539       // FIXME: NextInsn is likely in a basic block that has not been visited
4540       // yet. Anything inserted there will be instrumented by MSan later!
4541       NextInsn = NormalDest->getFirstInsertionPt();
4542       assert(NextInsn != NormalDest->end() &&
4543              "Could not find insertion point for retval shadow load");
4544     }
4545     IRBuilder<> IRBAfter(&*NextInsn);
4546     Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
4547         getShadowTy(&CB), getShadowPtrForRetval(IRBAfter),
4548         kShadowTLSAlignment, "_msret");
4549     setShadow(&CB, RetvalShadow);
4550     if (MS.TrackOrigins)
4551       setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy,
4552                                          getOriginPtrForRetval()));
4553   }
4554 
4555   bool isAMustTailRetVal(Value *RetVal) {
4556     if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
4557       RetVal = I->getOperand(0);
4558     }
4559     if (auto *I = dyn_cast<CallInst>(RetVal)) {
4560       return I->isMustTailCall();
4561     }
4562     return false;
4563   }
4564 
4565   void visitReturnInst(ReturnInst &I) {
4566     IRBuilder<> IRB(&I);
4567     Value *RetVal = I.getReturnValue();
4568     if (!RetVal)
4569       return;
4570     // Don't emit the epilogue for musttail call returns.
4571     if (isAMustTailRetVal(RetVal))
4572       return;
4573     Value *ShadowPtr = getShadowPtrForRetval(IRB);
4574     bool HasNoUndef = F.hasRetAttribute(Attribute::NoUndef);
4575     bool StoreShadow = !(MS.EagerChecks && HasNoUndef);
4576     // FIXME: Consider using SpecialCaseList to specify a list of functions that
4577     // must always return fully initialized values. For now, we hardcode "main".
4578     bool EagerCheck = (MS.EagerChecks && HasNoUndef) || (F.getName() == "main");
4579 
4580     Value *Shadow = getShadow(RetVal);
4581     bool StoreOrigin = true;
4582     if (EagerCheck) {
4583       insertShadowCheck(RetVal, &I);
4584       Shadow = getCleanShadow(RetVal);
4585       StoreOrigin = false;
4586     }
4587 
4588     // The caller may still expect information passed over TLS if we pass our
4589     // check
4590     if (StoreShadow) {
4591       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
4592       if (MS.TrackOrigins && StoreOrigin)
4593         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval());
4594     }
4595   }
4596 
4597   void visitPHINode(PHINode &I) {
4598     IRBuilder<> IRB(&I);
4599     if (!PropagateShadow) {
4600       setShadow(&I, getCleanShadow(&I));
4601       setOrigin(&I, getCleanOrigin());
4602       return;
4603     }
4604 
4605     ShadowPHINodes.push_back(&I);
4606     setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
4607                                 "_msphi_s"));
4608     if (MS.TrackOrigins)
4609       setOrigin(
4610           &I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(), "_msphi_o"));
4611   }
4612 
4613   Value *getLocalVarIdptr(AllocaInst &I) {
4614     ConstantInt *IntConst =
4615         ConstantInt::get(Type::getInt32Ty((*F.getParent()).getContext()), 0);
4616     return new GlobalVariable(*F.getParent(), IntConst->getType(),
4617                               /*isConstant=*/false, GlobalValue::PrivateLinkage,
4618                               IntConst);
4619   }
4620 
4621   Value *getLocalVarDescription(AllocaInst &I) {
4622     return createPrivateConstGlobalForString(*F.getParent(), I.getName());
4623   }
4624 
4625   void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
4626     if (PoisonStack && ClPoisonStackWithCall) {
4627       IRB.CreateCall(MS.MsanPoisonStackFn, {&I, Len});
4628     } else {
4629       Value *ShadowBase, *OriginBase;
4630       std::tie(ShadowBase, OriginBase) = getShadowOriginPtr(
4631           &I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true);
4632 
4633       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
4634       IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlign());
4635     }
4636 
4637     if (PoisonStack && MS.TrackOrigins) {
4638       Value *Idptr = getLocalVarIdptr(I);
4639       if (ClPrintStackNames) {
4640         Value *Descr = getLocalVarDescription(I);
4641         IRB.CreateCall(MS.MsanSetAllocaOriginWithDescriptionFn,
4642                        {&I, Len, Idptr, Descr});
4643       } else {
4644         IRB.CreateCall(MS.MsanSetAllocaOriginNoDescriptionFn, {&I, Len, Idptr});
4645       }
4646     }
4647   }
4648 
4649   void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
4650     Value *Descr = getLocalVarDescription(I);
4651     if (PoisonStack) {
4652       IRB.CreateCall(MS.MsanPoisonAllocaFn, {&I, Len, Descr});
4653     } else {
4654       IRB.CreateCall(MS.MsanUnpoisonAllocaFn, {&I, Len});
4655     }
4656   }
4657 
4658   void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
4659     if (!InsPoint)
4660       InsPoint = &I;
4661     NextNodeIRBuilder IRB(InsPoint);
4662     const DataLayout &DL = F.getDataLayout();
4663     TypeSize TS = DL.getTypeAllocSize(I.getAllocatedType());
4664     Value *Len = IRB.CreateTypeSize(MS.IntptrTy, TS);
4665     if (I.isArrayAllocation())
4666       Len = IRB.CreateMul(Len,
4667                           IRB.CreateZExtOrTrunc(I.getArraySize(), MS.IntptrTy));
4668 
4669     if (MS.CompileKernel)
4670       poisonAllocaKmsan(I, IRB, Len);
4671     else
4672       poisonAllocaUserspace(I, IRB, Len);
4673   }
4674 
4675   void visitAllocaInst(AllocaInst &I) {
4676     setShadow(&I, getCleanShadow(&I));
4677     setOrigin(&I, getCleanOrigin());
4678     // We'll get to this alloca later unless it's poisoned at the corresponding
4679     // llvm.lifetime.start.
4680     AllocaSet.insert(&I);
4681   }
4682 
4683   void visitSelectInst(SelectInst &I) {
4684     // a = select b, c, d
4685     Value *B = I.getCondition();
4686     Value *C = I.getTrueValue();
4687     Value *D = I.getFalseValue();
4688 
4689     handleSelectLikeInst(I, B, C, D);
4690   }
4691 
4692   void handleSelectLikeInst(Instruction &I, Value *B, Value *C, Value *D) {
4693     IRBuilder<> IRB(&I);
4694 
4695     Value *Sb = getShadow(B);
4696     Value *Sc = getShadow(C);
4697     Value *Sd = getShadow(D);
4698 
4699     Value *Ob = MS.TrackOrigins ? getOrigin(B) : nullptr;
4700     Value *Oc = MS.TrackOrigins ? getOrigin(C) : nullptr;
4701     Value *Od = MS.TrackOrigins ? getOrigin(D) : nullptr;
4702 
4703     // Result shadow if condition shadow is 0.
4704     Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
4705     Value *Sa1;
4706     if (I.getType()->isAggregateType()) {
4707       // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
4708       // an extra "select". This results in much more compact IR.
4709       // Sa = select Sb, poisoned, (select b, Sc, Sd)
4710       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
4711     } else {
4712       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
4713       // If Sb (condition is poisoned), look for bits in c and d that are equal
4714       // and both unpoisoned.
4715       // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
4716 
4717       // Cast arguments to shadow-compatible type.
4718       C = CreateAppToShadowCast(IRB, C);
4719       D = CreateAppToShadowCast(IRB, D);
4720 
4721       // Result shadow if condition shadow is 1.
4722       Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
4723     }
4724     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
4725     setShadow(&I, Sa);
4726     if (MS.TrackOrigins) {
4727       // Origins are always i32, so any vector conditions must be flattened.
4728       // FIXME: consider tracking vector origins for app vectors?
4729       if (B->getType()->isVectorTy()) {
4730         B = convertToBool(B, IRB);
4731         Sb = convertToBool(Sb, IRB);
4732       }
4733       // a = select b, c, d
4734       // Oa = Sb ? Ob : (b ? Oc : Od)
4735       setOrigin(&I, IRB.CreateSelect(Sb, Ob, IRB.CreateSelect(B, Oc, Od)));
4736     }
4737   }
4738 
4739   void visitLandingPadInst(LandingPadInst &I) {
4740     // Do nothing.
4741     // See https://github.com/google/sanitizers/issues/504
4742     setShadow(&I, getCleanShadow(&I));
4743     setOrigin(&I, getCleanOrigin());
4744   }
4745 
4746   void visitCatchSwitchInst(CatchSwitchInst &I) {
4747     setShadow(&I, getCleanShadow(&I));
4748     setOrigin(&I, getCleanOrigin());
4749   }
4750 
4751   void visitFuncletPadInst(FuncletPadInst &I) {
4752     setShadow(&I, getCleanShadow(&I));
4753     setOrigin(&I, getCleanOrigin());
4754   }
4755 
4756   void visitGetElementPtrInst(GetElementPtrInst &I) { handleShadowOr(I); }
4757 
4758   void visitExtractValueInst(ExtractValueInst &I) {
4759     IRBuilder<> IRB(&I);
4760     Value *Agg = I.getAggregateOperand();
4761     LLVM_DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
4762     Value *AggShadow = getShadow(Agg);
4763     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
4764     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
4765     LLVM_DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
4766     setShadow(&I, ResShadow);
4767     setOriginForNaryOp(I);
4768   }
4769 
4770   void visitInsertValueInst(InsertValueInst &I) {
4771     IRBuilder<> IRB(&I);
4772     LLVM_DEBUG(dbgs() << "InsertValue:  " << I << "\n");
4773     Value *AggShadow = getShadow(I.getAggregateOperand());
4774     Value *InsShadow = getShadow(I.getInsertedValueOperand());
4775     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
4776     LLVM_DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
4777     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
4778     LLVM_DEBUG(dbgs() << "   Res:        " << *Res << "\n");
4779     setShadow(&I, Res);
4780     setOriginForNaryOp(I);
4781   }
4782 
4783   void dumpInst(Instruction &I) {
4784     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
4785       errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
4786     } else {
4787       errs() << "ZZZ " << I.getOpcodeName() << "\n";
4788     }
4789     errs() << "QQQ " << I << "\n";
4790   }
4791 
4792   void visitResumeInst(ResumeInst &I) {
4793     LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
4794     // Nothing to do here.
4795   }
4796 
4797   void visitCleanupReturnInst(CleanupReturnInst &CRI) {
4798     LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
4799     // Nothing to do here.
4800   }
4801 
4802   void visitCatchReturnInst(CatchReturnInst &CRI) {
4803     LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
4804     // Nothing to do here.
4805   }
4806 
4807   void instrumentAsmArgument(Value *Operand, Type *ElemTy, Instruction &I,
4808                              IRBuilder<> &IRB, const DataLayout &DL,
4809                              bool isOutput) {
4810     // For each assembly argument, we check its value for being initialized.
4811     // If the argument is a pointer, we assume it points to a single element
4812     // of the corresponding type (or to a 8-byte word, if the type is unsized).
4813     // Each such pointer is instrumented with a call to the runtime library.
4814     Type *OpType = Operand->getType();
4815     // Check the operand value itself.
4816     insertShadowCheck(Operand, &I);
4817     if (!OpType->isPointerTy() || !isOutput) {
4818       assert(!isOutput);
4819       return;
4820     }
4821     if (!ElemTy->isSized())
4822       return;
4823     auto Size = DL.getTypeStoreSize(ElemTy);
4824     Value *SizeVal = IRB.CreateTypeSize(MS.IntptrTy, Size);
4825     if (MS.CompileKernel) {
4826       IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Operand, SizeVal});
4827     } else {
4828       // ElemTy, derived from elementtype(), does not encode the alignment of
4829       // the pointer. Conservatively assume that the shadow memory is unaligned.
4830       // When Size is large, avoid StoreInst as it would expand to many
4831       // instructions.
4832       auto [ShadowPtr, _] =
4833           getShadowOriginPtrUserspace(Operand, IRB, IRB.getInt8Ty(), Align(1));
4834       if (Size <= 32)
4835         IRB.CreateAlignedStore(getCleanShadow(ElemTy), ShadowPtr, Align(1));
4836       else
4837         IRB.CreateMemSet(ShadowPtr, ConstantInt::getNullValue(IRB.getInt8Ty()),
4838                          SizeVal, Align(1));
4839     }
4840   }
4841 
4842   /// Get the number of output arguments returned by pointers.
4843   int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
4844     int NumRetOutputs = 0;
4845     int NumOutputs = 0;
4846     Type *RetTy = cast<Value>(CB)->getType();
4847     if (!RetTy->isVoidTy()) {
4848       // Register outputs are returned via the CallInst return value.
4849       auto *ST = dyn_cast<StructType>(RetTy);
4850       if (ST)
4851         NumRetOutputs = ST->getNumElements();
4852       else
4853         NumRetOutputs = 1;
4854     }
4855     InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
4856     for (const InlineAsm::ConstraintInfo &Info : Constraints) {
4857       switch (Info.Type) {
4858       case InlineAsm::isOutput:
4859         NumOutputs++;
4860         break;
4861       default:
4862         break;
4863       }
4864     }
4865     return NumOutputs - NumRetOutputs;
4866   }
4867 
4868   void visitAsmInstruction(Instruction &I) {
4869     // Conservative inline assembly handling: check for poisoned shadow of
4870     // asm() arguments, then unpoison the result and all the memory locations
4871     // pointed to by those arguments.
4872     // An inline asm() statement in C++ contains lists of input and output
4873     // arguments used by the assembly code. These are mapped to operands of the
4874     // CallInst as follows:
4875     //  - nR register outputs ("=r) are returned by value in a single structure
4876     //  (SSA value of the CallInst);
4877     //  - nO other outputs ("=m" and others) are returned by pointer as first
4878     // nO operands of the CallInst;
4879     //  - nI inputs ("r", "m" and others) are passed to CallInst as the
4880     // remaining nI operands.
4881     // The total number of asm() arguments in the source is nR+nO+nI, and the
4882     // corresponding CallInst has nO+nI+1 operands (the last operand is the
4883     // function to be called).
4884     const DataLayout &DL = F.getDataLayout();
4885     CallBase *CB = cast<CallBase>(&I);
4886     IRBuilder<> IRB(&I);
4887     InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
4888     int OutputArgs = getNumOutputArgs(IA, CB);
4889     // The last operand of a CallInst is the function itself.
4890     int NumOperands = CB->getNumOperands() - 1;
4891 
4892     // Check input arguments. Doing so before unpoisoning output arguments, so
4893     // that we won't overwrite uninit values before checking them.
4894     for (int i = OutputArgs; i < NumOperands; i++) {
4895       Value *Operand = CB->getOperand(i);
4896       instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL,
4897                             /*isOutput*/ false);
4898     }
4899     // Unpoison output arguments. This must happen before the actual InlineAsm
4900     // call, so that the shadow for memory published in the asm() statement
4901     // remains valid.
4902     for (int i = 0; i < OutputArgs; i++) {
4903       Value *Operand = CB->getOperand(i);
4904       instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL,
4905                             /*isOutput*/ true);
4906     }
4907 
4908     setShadow(&I, getCleanShadow(&I));
4909     setOrigin(&I, getCleanOrigin());
4910   }
4911 
4912   void visitFreezeInst(FreezeInst &I) {
4913     // Freeze always returns a fully defined value.
4914     setShadow(&I, getCleanShadow(&I));
4915     setOrigin(&I, getCleanOrigin());
4916   }
4917 
4918   void visitInstruction(Instruction &I) {
4919     // Everything else: stop propagating and check for poisoned shadow.
4920     if (ClDumpStrictInstructions)
4921       dumpInst(I);
4922     LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
4923     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
4924       Value *Operand = I.getOperand(i);
4925       if (Operand->getType()->isSized())
4926         insertShadowCheck(Operand, &I);
4927     }
4928     setShadow(&I, getCleanShadow(&I));
4929     setOrigin(&I, getCleanOrigin());
4930   }
4931 };
4932 
4933 struct VarArgHelperBase : public VarArgHelper {
4934   Function &F;
4935   MemorySanitizer &MS;
4936   MemorySanitizerVisitor &MSV;
4937   SmallVector<CallInst *, 16> VAStartInstrumentationList;
4938   const unsigned VAListTagSize;
4939 
4940   VarArgHelperBase(Function &F, MemorySanitizer &MS,
4941                    MemorySanitizerVisitor &MSV, unsigned VAListTagSize)
4942       : F(F), MS(MS), MSV(MSV), VAListTagSize(VAListTagSize) {}
4943 
4944   Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
4945     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4946     return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4947   }
4948 
4949   /// Compute the shadow address for a given va_arg.
4950   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4951                                    unsigned ArgOffset) {
4952     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4953     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4954     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4955                               "_msarg_va_s");
4956   }
4957 
4958   /// Compute the shadow address for a given va_arg.
4959   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4960                                    unsigned ArgOffset, unsigned ArgSize) {
4961     // Make sure we don't overflow __msan_va_arg_tls.
4962     if (ArgOffset + ArgSize > kParamTLSSize)
4963       return nullptr;
4964     return getShadowPtrForVAArgument(Ty, IRB, ArgOffset);
4965   }
4966 
4967   /// Compute the origin address for a given va_arg.
4968   Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
4969     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
4970     // getOriginPtrForVAArgument() is always called after
4971     // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
4972     // overflow.
4973     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4974     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
4975                               "_msarg_va_o");
4976   }
4977 
4978   void CleanUnusedTLS(IRBuilder<> &IRB, Value *ShadowBase,
4979                       unsigned BaseOffset) {
4980     // The tails of __msan_va_arg_tls is not large enough to fit full
4981     // value shadow, but it will be copied to backup anyway. Make it
4982     // clean.
4983     if (BaseOffset >= kParamTLSSize)
4984       return;
4985     Value *TailSize =
4986         ConstantInt::getSigned(IRB.getInt32Ty(), kParamTLSSize - BaseOffset);
4987     IRB.CreateMemSet(ShadowBase, ConstantInt::getNullValue(IRB.getInt8Ty()),
4988                      TailSize, Align(8));
4989   }
4990 
4991   void unpoisonVAListTagForInst(IntrinsicInst &I) {
4992     IRBuilder<> IRB(&I);
4993     Value *VAListTag = I.getArgOperand(0);
4994     const Align Alignment = Align(8);
4995     auto [ShadowPtr, OriginPtr] = MSV.getShadowOriginPtr(
4996         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
4997     // Unpoison the whole __va_list_tag.
4998     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4999                      VAListTagSize, Alignment, false);
5000   }
5001 
5002   void visitVAStartInst(VAStartInst &I) override {
5003     if (F.getCallingConv() == CallingConv::Win64)
5004       return;
5005     VAStartInstrumentationList.push_back(&I);
5006     unpoisonVAListTagForInst(I);
5007   }
5008 
5009   void visitVACopyInst(VACopyInst &I) override {
5010     if (F.getCallingConv() == CallingConv::Win64)
5011       return;
5012     unpoisonVAListTagForInst(I);
5013   }
5014 };
5015 
5016 /// AMD64-specific implementation of VarArgHelper.
5017 struct VarArgAMD64Helper : public VarArgHelperBase {
5018   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
5019   // See a comment in visitCallBase for more details.
5020   static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
5021   static const unsigned AMD64FpEndOffsetSSE = 176;
5022   // If SSE is disabled, fp_offset in va_list is zero.
5023   static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
5024 
5025   unsigned AMD64FpEndOffset;
5026   AllocaInst *VAArgTLSCopy = nullptr;
5027   AllocaInst *VAArgTLSOriginCopy = nullptr;
5028   Value *VAArgOverflowSize = nullptr;
5029 
5030   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
5031 
5032   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
5033                     MemorySanitizerVisitor &MSV)
5034       : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/24) {
5035     AMD64FpEndOffset = AMD64FpEndOffsetSSE;
5036     for (const auto &Attr : F.getAttributes().getFnAttrs()) {
5037       if (Attr.isStringAttribute() &&
5038           (Attr.getKindAsString() == "target-features")) {
5039         if (Attr.getValueAsString().contains("-sse"))
5040           AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
5041         break;
5042       }
5043     }
5044   }
5045 
5046   ArgKind classifyArgument(Value *arg) {
5047     // A very rough approximation of X86_64 argument classification rules.
5048     Type *T = arg->getType();
5049     if (T->isX86_FP80Ty())
5050       return AK_Memory;
5051     if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
5052       return AK_FloatingPoint;
5053     if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
5054       return AK_GeneralPurpose;
5055     if (T->isPointerTy())
5056       return AK_GeneralPurpose;
5057     return AK_Memory;
5058   }
5059 
5060   // For VarArg functions, store the argument shadow in an ABI-specific format
5061   // that corresponds to va_list layout.
5062   // We do this because Clang lowers va_arg in the frontend, and this pass
5063   // only sees the low level code that deals with va_list internals.
5064   // A much easier alternative (provided that Clang emits va_arg instructions)
5065   // would have been to associate each live instance of va_list with a copy of
5066   // MSanParamTLS, and extract shadow on va_arg() call in the argument list
5067   // order.
5068   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5069     unsigned GpOffset = 0;
5070     unsigned FpOffset = AMD64GpEndOffset;
5071     unsigned OverflowOffset = AMD64FpEndOffset;
5072     const DataLayout &DL = F.getDataLayout();
5073 
5074     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
5075       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5076       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
5077       if (IsByVal) {
5078         // ByVal arguments always go to the overflow area.
5079         // Fixed arguments passed through the overflow area will be stepped
5080         // over by va_start, so don't count them towards the offset.
5081         if (IsFixed)
5082           continue;
5083         assert(A->getType()->isPointerTy());
5084         Type *RealTy = CB.getParamByValType(ArgNo);
5085         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
5086         uint64_t AlignedSize = alignTo(ArgSize, 8);
5087         unsigned BaseOffset = OverflowOffset;
5088         Value *ShadowBase =
5089             getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset);
5090         Value *OriginBase = nullptr;
5091         if (MS.TrackOrigins)
5092           OriginBase = getOriginPtrForVAArgument(IRB, OverflowOffset);
5093         OverflowOffset += AlignedSize;
5094 
5095         if (OverflowOffset > kParamTLSSize) {
5096           CleanUnusedTLS(IRB, ShadowBase, BaseOffset);
5097           continue; // We have no space to copy shadow there.
5098         }
5099 
5100         Value *ShadowPtr, *OriginPtr;
5101         std::tie(ShadowPtr, OriginPtr) =
5102             MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
5103                                    /*isStore*/ false);
5104         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
5105                          kShadowTLSAlignment, ArgSize);
5106         if (MS.TrackOrigins)
5107           IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
5108                            kShadowTLSAlignment, ArgSize);
5109       } else {
5110         ArgKind AK = classifyArgument(A);
5111         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
5112           AK = AK_Memory;
5113         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
5114           AK = AK_Memory;
5115         Value *ShadowBase, *OriginBase = nullptr;
5116         switch (AK) {
5117         case AK_GeneralPurpose:
5118           ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, GpOffset);
5119           if (MS.TrackOrigins)
5120             OriginBase = getOriginPtrForVAArgument(IRB, GpOffset);
5121           GpOffset += 8;
5122           assert(GpOffset <= kParamTLSSize);
5123           break;
5124         case AK_FloatingPoint:
5125           ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, FpOffset);
5126           if (MS.TrackOrigins)
5127             OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
5128           FpOffset += 16;
5129           assert(FpOffset <= kParamTLSSize);
5130           break;
5131         case AK_Memory:
5132           if (IsFixed)
5133             continue;
5134           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
5135           uint64_t AlignedSize = alignTo(ArgSize, 8);
5136           unsigned BaseOffset = OverflowOffset;
5137           ShadowBase =
5138               getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
5139           if (MS.TrackOrigins) {
5140             OriginBase = getOriginPtrForVAArgument(IRB, OverflowOffset);
5141           }
5142           OverflowOffset += AlignedSize;
5143           if (OverflowOffset > kParamTLSSize) {
5144             // We have no space to copy shadow there.
5145             CleanUnusedTLS(IRB, ShadowBase, BaseOffset);
5146             continue;
5147           }
5148         }
5149         // Take fixed arguments into account for GpOffset and FpOffset,
5150         // but don't actually store shadows for them.
5151         // TODO(glider): don't call get*PtrForVAArgument() for them.
5152         if (IsFixed)
5153           continue;
5154         Value *Shadow = MSV.getShadow(A);
5155         IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
5156         if (MS.TrackOrigins) {
5157           Value *Origin = MSV.getOrigin(A);
5158           TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
5159           MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
5160                           std::max(kShadowTLSAlignment, kMinOriginAlignment));
5161         }
5162       }
5163     }
5164     Constant *OverflowSize =
5165         ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
5166     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5167   }
5168 
5169   void finalizeInstrumentation() override {
5170     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5171            "finalizeInstrumentation called twice");
5172     if (!VAStartInstrumentationList.empty()) {
5173       // If there is a va_start in this function, make a backup copy of
5174       // va_arg_tls somewhere in the function entry block.
5175       IRBuilder<> IRB(MSV.FnPrologueEnd);
5176       VAArgOverflowSize =
5177           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5178       Value *CopySize = IRB.CreateAdd(
5179           ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset), VAArgOverflowSize);
5180       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5181       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5182       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5183                        CopySize, kShadowTLSAlignment, false);
5184 
5185       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5186           Intrinsic::umin, CopySize,
5187           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5188       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5189                        kShadowTLSAlignment, SrcSize);
5190       if (MS.TrackOrigins) {
5191         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5192         VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
5193         IRB.CreateMemCpy(VAArgTLSOriginCopy, kShadowTLSAlignment,
5194                          MS.VAArgOriginTLS, kShadowTLSAlignment, SrcSize);
5195       }
5196     }
5197 
5198     // Instrument va_start.
5199     // Copy va_list shadow from the backup copy of the TLS contents.
5200     for (CallInst *OrigInst : VAStartInstrumentationList) {
5201       NextNodeIRBuilder IRB(OrigInst);
5202       Value *VAListTag = OrigInst->getArgOperand(0);
5203 
5204       Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C); // i64*
5205       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
5206           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5207                         ConstantInt::get(MS.IntptrTy, 16)),
5208           PointerType::get(RegSaveAreaPtrTy, 0));
5209       Value *RegSaveAreaPtr =
5210           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5211       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5212       const Align Alignment = Align(16);
5213       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5214           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5215                                  Alignment, /*isStore*/ true);
5216       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5217                        AMD64FpEndOffset);
5218       if (MS.TrackOrigins)
5219         IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
5220                          Alignment, AMD64FpEndOffset);
5221       Type *OverflowArgAreaPtrTy = PointerType::getUnqual(*MS.C); // i64*
5222       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
5223           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5224                         ConstantInt::get(MS.IntptrTy, 8)),
5225           PointerType::get(OverflowArgAreaPtrTy, 0));
5226       Value *OverflowArgAreaPtr =
5227           IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
5228       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
5229       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
5230           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
5231                                  Alignment, /*isStore*/ true);
5232       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
5233                                              AMD64FpEndOffset);
5234       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
5235                        VAArgOverflowSize);
5236       if (MS.TrackOrigins) {
5237         SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
5238                                         AMD64FpEndOffset);
5239         IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
5240                          VAArgOverflowSize);
5241       }
5242     }
5243   }
5244 };
5245 
5246 /// MIPS64-specific implementation of VarArgHelper.
5247 /// NOTE: This is also used for LoongArch64.
5248 struct VarArgMIPS64Helper : public VarArgHelperBase {
5249   AllocaInst *VAArgTLSCopy = nullptr;
5250   Value *VAArgSize = nullptr;
5251 
5252   VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
5253                      MemorySanitizerVisitor &MSV)
5254       : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/8) {}
5255 
5256   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5257     unsigned VAArgOffset = 0;
5258     const DataLayout &DL = F.getDataLayout();
5259     for (Value *A :
5260          llvm::drop_begin(CB.args(), CB.getFunctionType()->getNumParams())) {
5261       Triple TargetTriple(F.getParent()->getTargetTriple());
5262       Value *Base;
5263       uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
5264       if (TargetTriple.getArch() == Triple::mips64) {
5265         // Adjusting the shadow for argument with size < 8 to match the
5266         // placement of bits in big endian system
5267         if (ArgSize < 8)
5268           VAArgOffset += (8 - ArgSize);
5269       }
5270       Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
5271       VAArgOffset += ArgSize;
5272       VAArgOffset = alignTo(VAArgOffset, 8);
5273       if (!Base)
5274         continue;
5275       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
5276     }
5277 
5278     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
5279     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
5280     // a new class member i.e. it is the total size of all VarArgs.
5281     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
5282   }
5283 
5284   void finalizeInstrumentation() override {
5285     assert(!VAArgSize && !VAArgTLSCopy &&
5286            "finalizeInstrumentation called twice");
5287     IRBuilder<> IRB(MSV.FnPrologueEnd);
5288     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5289     Value *CopySize =
5290         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), VAArgSize);
5291 
5292     if (!VAStartInstrumentationList.empty()) {
5293       // If there is a va_start in this function, make a backup copy of
5294       // va_arg_tls somewhere in the function entry block.
5295       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5296       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5297       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5298                        CopySize, kShadowTLSAlignment, false);
5299 
5300       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5301           Intrinsic::umin, CopySize,
5302           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5303       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5304                        kShadowTLSAlignment, SrcSize);
5305     }
5306 
5307     // Instrument va_start.
5308     // Copy va_list shadow from the backup copy of the TLS contents.
5309     for (CallInst *OrigInst : VAStartInstrumentationList) {
5310       NextNodeIRBuilder IRB(OrigInst);
5311       Value *VAListTag = OrigInst->getArgOperand(0);
5312       Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C); // i64*
5313       Value *RegSaveAreaPtrPtr =
5314           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5315                              PointerType::get(RegSaveAreaPtrTy, 0));
5316       Value *RegSaveAreaPtr =
5317           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5318       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5319       const Align Alignment = Align(8);
5320       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5321           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5322                                  Alignment, /*isStore*/ true);
5323       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5324                        CopySize);
5325     }
5326   }
5327 };
5328 
5329 /// AArch64-specific implementation of VarArgHelper.
5330 struct VarArgAArch64Helper : public VarArgHelperBase {
5331   static const unsigned kAArch64GrArgSize = 64;
5332   static const unsigned kAArch64VrArgSize = 128;
5333 
5334   static const unsigned AArch64GrBegOffset = 0;
5335   static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
5336   // Make VR space aligned to 16 bytes.
5337   static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
5338   static const unsigned AArch64VrEndOffset =
5339       AArch64VrBegOffset + kAArch64VrArgSize;
5340   static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
5341 
5342   AllocaInst *VAArgTLSCopy = nullptr;
5343   Value *VAArgOverflowSize = nullptr;
5344 
5345   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
5346 
5347   VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
5348                       MemorySanitizerVisitor &MSV)
5349       : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/32) {}
5350 
5351   // A very rough approximation of aarch64 argument classification rules.
5352   std::pair<ArgKind, uint64_t> classifyArgument(Type *T) {
5353     if (T->isIntOrPtrTy() && T->getPrimitiveSizeInBits() <= 64)
5354       return {AK_GeneralPurpose, 1};
5355     if (T->isFloatingPointTy() && T->getPrimitiveSizeInBits() <= 128)
5356       return {AK_FloatingPoint, 1};
5357 
5358     if (T->isArrayTy()) {
5359       auto R = classifyArgument(T->getArrayElementType());
5360       R.second *= T->getScalarType()->getArrayNumElements();
5361       return R;
5362     }
5363 
5364     if (const FixedVectorType *FV = dyn_cast<FixedVectorType>(T)) {
5365       auto R = classifyArgument(FV->getScalarType());
5366       R.second *= FV->getNumElements();
5367       return R;
5368     }
5369 
5370     LLVM_DEBUG(errs() << "Unknown vararg type: " << *T << "\n");
5371     return {AK_Memory, 0};
5372   }
5373 
5374   // The instrumentation stores the argument shadow in a non ABI-specific
5375   // format because it does not know which argument is named (since Clang,
5376   // like x86_64 case, lowers the va_args in the frontend and this pass only
5377   // sees the low level code that deals with va_list internals).
5378   // The first seven GR registers are saved in the first 56 bytes of the
5379   // va_arg tls arra, followed by the first 8 FP/SIMD registers, and then
5380   // the remaining arguments.
5381   // Using constant offset within the va_arg TLS array allows fast copy
5382   // in the finalize instrumentation.
5383   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5384     unsigned GrOffset = AArch64GrBegOffset;
5385     unsigned VrOffset = AArch64VrBegOffset;
5386     unsigned OverflowOffset = AArch64VAEndOffset;
5387 
5388     const DataLayout &DL = F.getDataLayout();
5389     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
5390       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5391       auto [AK, RegNum] = classifyArgument(A->getType());
5392       if (AK == AK_GeneralPurpose &&
5393           (GrOffset + RegNum * 8) > AArch64GrEndOffset)
5394         AK = AK_Memory;
5395       if (AK == AK_FloatingPoint &&
5396           (VrOffset + RegNum * 16) > AArch64VrEndOffset)
5397         AK = AK_Memory;
5398       Value *Base;
5399       switch (AK) {
5400       case AK_GeneralPurpose:
5401         Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset);
5402         GrOffset += 8 * RegNum;
5403         break;
5404       case AK_FloatingPoint:
5405         Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset);
5406         VrOffset += 16 * RegNum;
5407         break;
5408       case AK_Memory:
5409         // Don't count fixed arguments in the overflow area - va_start will
5410         // skip right over them.
5411         if (IsFixed)
5412           continue;
5413         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
5414         uint64_t AlignedSize = alignTo(ArgSize, 8);
5415         unsigned BaseOffset = OverflowOffset;
5416         Base = getShadowPtrForVAArgument(A->getType(), IRB, BaseOffset);
5417         OverflowOffset += AlignedSize;
5418         if (OverflowOffset > kParamTLSSize) {
5419           // We have no space to copy shadow there.
5420           CleanUnusedTLS(IRB, Base, BaseOffset);
5421           continue;
5422         }
5423         break;
5424       }
5425       // Count Gp/Vr fixed arguments to their respective offsets, but don't
5426       // bother to actually store a shadow.
5427       if (IsFixed)
5428         continue;
5429       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
5430     }
5431     Constant *OverflowSize =
5432         ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
5433     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5434   }
5435 
5436   // Retrieve a va_list field of 'void*' size.
5437   Value *getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
5438     Value *SaveAreaPtrPtr = IRB.CreateIntToPtr(
5439         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5440                       ConstantInt::get(MS.IntptrTy, offset)),
5441         PointerType::get(*MS.C, 0));
5442     return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
5443   }
5444 
5445   // Retrieve a va_list field of 'int' size.
5446   Value *getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
5447     Value *SaveAreaPtr = IRB.CreateIntToPtr(
5448         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5449                       ConstantInt::get(MS.IntptrTy, offset)),
5450         PointerType::get(*MS.C, 0));
5451     Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
5452     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
5453   }
5454 
5455   void finalizeInstrumentation() override {
5456     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5457            "finalizeInstrumentation called twice");
5458     if (!VAStartInstrumentationList.empty()) {
5459       // If there is a va_start in this function, make a backup copy of
5460       // va_arg_tls somewhere in the function entry block.
5461       IRBuilder<> IRB(MSV.FnPrologueEnd);
5462       VAArgOverflowSize =
5463           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5464       Value *CopySize = IRB.CreateAdd(
5465           ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset), VAArgOverflowSize);
5466       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5467       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5468       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5469                        CopySize, kShadowTLSAlignment, false);
5470 
5471       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5472           Intrinsic::umin, CopySize,
5473           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5474       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5475                        kShadowTLSAlignment, SrcSize);
5476     }
5477 
5478     Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
5479     Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
5480 
5481     // Instrument va_start, copy va_list shadow from the backup copy of
5482     // the TLS contents.
5483     for (CallInst *OrigInst : VAStartInstrumentationList) {
5484       NextNodeIRBuilder IRB(OrigInst);
5485 
5486       Value *VAListTag = OrigInst->getArgOperand(0);
5487 
5488       // The variadic ABI for AArch64 creates two areas to save the incoming
5489       // argument registers (one for 64-bit general register xn-x7 and another
5490       // for 128-bit FP/SIMD vn-v7).
5491       // We need then to propagate the shadow arguments on both regions
5492       // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
5493       // The remaining arguments are saved on shadow for 'va::stack'.
5494       // One caveat is it requires only to propagate the non-named arguments,
5495       // however on the call site instrumentation 'all' the arguments are
5496       // saved. So to copy the shadow values from the va_arg TLS array
5497       // we need to adjust the offset for both GR and VR fields based on
5498       // the __{gr,vr}_offs value (since they are stores based on incoming
5499       // named arguments).
5500       Type *RegSaveAreaPtrTy = IRB.getPtrTy();
5501 
5502       // Read the stack pointer from the va_list.
5503       Value *StackSaveAreaPtr =
5504           IRB.CreateIntToPtr(getVAField64(IRB, VAListTag, 0), RegSaveAreaPtrTy);
5505 
5506       // Read both the __gr_top and __gr_off and add them up.
5507       Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
5508       Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
5509 
5510       Value *GrRegSaveAreaPtr = IRB.CreateIntToPtr(
5511           IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea), RegSaveAreaPtrTy);
5512 
5513       // Read both the __vr_top and __vr_off and add them up.
5514       Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
5515       Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
5516 
5517       Value *VrRegSaveAreaPtr = IRB.CreateIntToPtr(
5518           IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea), RegSaveAreaPtrTy);
5519 
5520       // It does not know how many named arguments is being used and, on the
5521       // callsite all the arguments were saved.  Since __gr_off is defined as
5522       // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
5523       // argument by ignoring the bytes of shadow from named arguments.
5524       Value *GrRegSaveAreaShadowPtrOff =
5525           IRB.CreateAdd(GrArgSize, GrOffSaveArea);
5526 
5527       Value *GrRegSaveAreaShadowPtr =
5528           MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5529                                  Align(8), /*isStore*/ true)
5530               .first;
5531 
5532       Value *GrSrcPtr =
5533           IRB.CreateInBoundsPtrAdd(VAArgTLSCopy, GrRegSaveAreaShadowPtrOff);
5534       Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
5535 
5536       IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8),
5537                        GrCopySize);
5538 
5539       // Again, but for FP/SIMD values.
5540       Value *VrRegSaveAreaShadowPtrOff =
5541           IRB.CreateAdd(VrArgSize, VrOffSaveArea);
5542 
5543       Value *VrRegSaveAreaShadowPtr =
5544           MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5545                                  Align(8), /*isStore*/ true)
5546               .first;
5547 
5548       Value *VrSrcPtr = IRB.CreateInBoundsPtrAdd(
5549           IRB.CreateInBoundsPtrAdd(VAArgTLSCopy,
5550                                    IRB.getInt32(AArch64VrBegOffset)),
5551           VrRegSaveAreaShadowPtrOff);
5552       Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
5553 
5554       IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, Align(8), VrSrcPtr, Align(8),
5555                        VrCopySize);
5556 
5557       // And finally for remaining arguments.
5558       Value *StackSaveAreaShadowPtr =
5559           MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
5560                                  Align(16), /*isStore*/ true)
5561               .first;
5562 
5563       Value *StackSrcPtr = IRB.CreateInBoundsPtrAdd(
5564           VAArgTLSCopy, IRB.getInt32(AArch64VAEndOffset));
5565 
5566       IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr,
5567                        Align(16), VAArgOverflowSize);
5568     }
5569   }
5570 };
5571 
5572 /// PowerPC64-specific implementation of VarArgHelper.
5573 struct VarArgPowerPC64Helper : public VarArgHelperBase {
5574   AllocaInst *VAArgTLSCopy = nullptr;
5575   Value *VAArgSize = nullptr;
5576 
5577   VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
5578                         MemorySanitizerVisitor &MSV)
5579       : VarArgHelperBase(F, MS, MSV, /*VAListTagSize=*/8) {}
5580 
5581   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5582     // For PowerPC, we need to deal with alignment of stack arguments -
5583     // they are mostly aligned to 8 bytes, but vectors and i128 arrays
5584     // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
5585     // For that reason, we compute current offset from stack pointer (which is
5586     // always properly aligned), and offset for the first vararg, then subtract
5587     // them.
5588     unsigned VAArgBase;
5589     Triple TargetTriple(F.getParent()->getTargetTriple());
5590     // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
5591     // and 32 bytes for ABIv2.  This is usually determined by target
5592     // endianness, but in theory could be overridden by function attribute.
5593     if (TargetTriple.getArch() == Triple::ppc64)
5594       VAArgBase = 48;
5595     else
5596       VAArgBase = 32;
5597     unsigned VAArgOffset = VAArgBase;
5598     const DataLayout &DL = F.getDataLayout();
5599     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
5600       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5601       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
5602       if (IsByVal) {
5603         assert(A->getType()->isPointerTy());
5604         Type *RealTy = CB.getParamByValType(ArgNo);
5605         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
5606         Align ArgAlign = CB.getParamAlign(ArgNo).value_or(Align(8));
5607         if (ArgAlign < 8)
5608           ArgAlign = Align(8);
5609         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
5610         if (!IsFixed) {
5611           Value *Base = getShadowPtrForVAArgument(
5612               RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
5613           if (Base) {
5614             Value *AShadowPtr, *AOriginPtr;
5615             std::tie(AShadowPtr, AOriginPtr) =
5616                 MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
5617                                        kShadowTLSAlignment, /*isStore*/ false);
5618 
5619             IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
5620                              kShadowTLSAlignment, ArgSize);
5621           }
5622         }
5623         VAArgOffset += alignTo(ArgSize, Align(8));
5624       } else {
5625         Value *Base;
5626         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
5627         Align ArgAlign = Align(8);
5628         if (A->getType()->isArrayTy()) {
5629           // Arrays are aligned to element size, except for long double
5630           // arrays, which are aligned to 8 bytes.
5631           Type *ElementTy = A->getType()->getArrayElementType();
5632           if (!ElementTy->isPPC_FP128Ty())
5633             ArgAlign = Align(DL.getTypeAllocSize(ElementTy));
5634         } else if (A->getType()->isVectorTy()) {
5635           // Vectors are naturally aligned.
5636           ArgAlign = Align(ArgSize);
5637         }
5638         if (ArgAlign < 8)
5639           ArgAlign = Align(8);
5640         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
5641         if (DL.isBigEndian()) {
5642           // Adjusting the shadow for argument with size < 8 to match the
5643           // placement of bits in big endian system
5644           if (ArgSize < 8)
5645             VAArgOffset += (8 - ArgSize);
5646         }
5647         if (!IsFixed) {
5648           Base = getShadowPtrForVAArgument(A->getType(), IRB,
5649                                            VAArgOffset - VAArgBase, ArgSize);
5650           if (Base)
5651             IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
5652         }
5653         VAArgOffset += ArgSize;
5654         VAArgOffset = alignTo(VAArgOffset, Align(8));
5655       }
5656       if (IsFixed)
5657         VAArgBase = VAArgOffset;
5658     }
5659 
5660     Constant *TotalVAArgSize =
5661         ConstantInt::get(IRB.getInt64Ty(), VAArgOffset - VAArgBase);
5662     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
5663     // a new class member i.e. it is the total size of all VarArgs.
5664     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
5665   }
5666 
5667   void finalizeInstrumentation() override {
5668     assert(!VAArgSize && !VAArgTLSCopy &&
5669            "finalizeInstrumentation called twice");
5670     IRBuilder<> IRB(MSV.FnPrologueEnd);
5671     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5672     Value *CopySize =
5673         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), VAArgSize);
5674 
5675     if (!VAStartInstrumentationList.empty()) {
5676       // If there is a va_start in this function, make a backup copy of
5677       // va_arg_tls somewhere in the function entry block.
5678 
5679       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5680       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5681       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5682                        CopySize, kShadowTLSAlignment, false);
5683 
5684       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5685           Intrinsic::umin, CopySize,
5686           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5687       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5688                        kShadowTLSAlignment, SrcSize);
5689     }
5690 
5691     // Instrument va_start.
5692     // Copy va_list shadow from the backup copy of the TLS contents.
5693     for (CallInst *OrigInst : VAStartInstrumentationList) {
5694       NextNodeIRBuilder IRB(OrigInst);
5695       Value *VAListTag = OrigInst->getArgOperand(0);
5696       Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C); // i64*
5697       Value *RegSaveAreaPtrPtr =
5698           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5699                              PointerType::get(RegSaveAreaPtrTy, 0));
5700       Value *RegSaveAreaPtr =
5701           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5702       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5703       const Align Alignment = Align(8);
5704       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5705           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5706                                  Alignment, /*isStore*/ true);
5707       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5708                        CopySize);
5709     }
5710   }
5711 };
5712 
5713 /// SystemZ-specific implementation of VarArgHelper.
5714 struct VarArgSystemZHelper : public VarArgHelperBase {
5715   static const unsigned SystemZGpOffset = 16;
5716   static const unsigned SystemZGpEndOffset = 56;
5717   static const unsigned SystemZFpOffset = 128;
5718   static const unsigned SystemZFpEndOffset = 160;
5719   static const unsigned SystemZMaxVrArgs = 8;
5720   static const unsigned SystemZRegSaveAreaSize = 160;
5721   static const unsigned SystemZOverflowOffset = 160;
5722   static const unsigned SystemZVAListTagSize = 32;
5723   static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
5724   static const unsigned SystemZRegSaveAreaPtrOffset = 24;
5725 
5726   bool IsSoftFloatABI;
5727   AllocaInst *VAArgTLSCopy = nullptr;
5728   AllocaInst *VAArgTLSOriginCopy = nullptr;
5729   Value *VAArgOverflowSize = nullptr;
5730 
5731   enum class ArgKind {
5732     GeneralPurpose,
5733     FloatingPoint,
5734     Vector,
5735     Memory,
5736     Indirect,
5737   };
5738 
5739   enum class ShadowExtension { None, Zero, Sign };
5740 
5741   VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
5742                       MemorySanitizerVisitor &MSV)
5743       : VarArgHelperBase(F, MS, MSV, SystemZVAListTagSize),
5744         IsSoftFloatABI(F.getFnAttribute("use-soft-float").getValueAsBool()) {}
5745 
5746   ArgKind classifyArgument(Type *T) {
5747     // T is a SystemZABIInfo::classifyArgumentType() output, and there are
5748     // only a few possibilities of what it can be. In particular, enums, single
5749     // element structs and large types have already been taken care of.
5750 
5751     // Some i128 and fp128 arguments are converted to pointers only in the
5752     // back end.
5753     if (T->isIntegerTy(128) || T->isFP128Ty())
5754       return ArgKind::Indirect;
5755     if (T->isFloatingPointTy())
5756       return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
5757     if (T->isIntegerTy() || T->isPointerTy())
5758       return ArgKind::GeneralPurpose;
5759     if (T->isVectorTy())
5760       return ArgKind::Vector;
5761     return ArgKind::Memory;
5762   }
5763 
5764   ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) {
5765     // ABI says: "One of the simple integer types no more than 64 bits wide.
5766     // ... If such an argument is shorter than 64 bits, replace it by a full
5767     // 64-bit integer representing the same number, using sign or zero
5768     // extension". Shadow for an integer argument has the same type as the
5769     // argument itself, so it can be sign or zero extended as well.
5770     bool ZExt = CB.paramHasAttr(ArgNo, Attribute::ZExt);
5771     bool SExt = CB.paramHasAttr(ArgNo, Attribute::SExt);
5772     if (ZExt) {
5773       assert(!SExt);
5774       return ShadowExtension::Zero;
5775     }
5776     if (SExt) {
5777       assert(!ZExt);
5778       return ShadowExtension::Sign;
5779     }
5780     return ShadowExtension::None;
5781   }
5782 
5783   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5784     unsigned GpOffset = SystemZGpOffset;
5785     unsigned FpOffset = SystemZFpOffset;
5786     unsigned VrIndex = 0;
5787     unsigned OverflowOffset = SystemZOverflowOffset;
5788     const DataLayout &DL = F.getDataLayout();
5789     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
5790       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5791       // SystemZABIInfo does not produce ByVal parameters.
5792       assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
5793       Type *T = A->getType();
5794       ArgKind AK = classifyArgument(T);
5795       if (AK == ArgKind::Indirect) {
5796         T = PointerType::get(T, 0);
5797         AK = ArgKind::GeneralPurpose;
5798       }
5799       if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
5800         AK = ArgKind::Memory;
5801       if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
5802         AK = ArgKind::Memory;
5803       if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
5804         AK = ArgKind::Memory;
5805       Value *ShadowBase = nullptr;
5806       Value *OriginBase = nullptr;
5807       ShadowExtension SE = ShadowExtension::None;
5808       switch (AK) {
5809       case ArgKind::GeneralPurpose: {
5810         // Always keep track of GpOffset, but store shadow only for varargs.
5811         uint64_t ArgSize = 8;
5812         if (GpOffset + ArgSize <= kParamTLSSize) {
5813           if (!IsFixed) {
5814             SE = getShadowExtension(CB, ArgNo);
5815             uint64_t GapSize = 0;
5816             if (SE == ShadowExtension::None) {
5817               uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
5818               assert(ArgAllocSize <= ArgSize);
5819               GapSize = ArgSize - ArgAllocSize;
5820             }
5821             ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize);
5822             if (MS.TrackOrigins)
5823               OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize);
5824           }
5825           GpOffset += ArgSize;
5826         } else {
5827           GpOffset = kParamTLSSize;
5828         }
5829         break;
5830       }
5831       case ArgKind::FloatingPoint: {
5832         // Always keep track of FpOffset, but store shadow only for varargs.
5833         uint64_t ArgSize = 8;
5834         if (FpOffset + ArgSize <= kParamTLSSize) {
5835           if (!IsFixed) {
5836             // PoP says: "A short floating-point datum requires only the
5837             // left-most 32 bit positions of a floating-point register".
5838             // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
5839             // don't extend shadow and don't mind the gap.
5840             ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset);
5841             if (MS.TrackOrigins)
5842               OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
5843           }
5844           FpOffset += ArgSize;
5845         } else {
5846           FpOffset = kParamTLSSize;
5847         }
5848         break;
5849       }
5850       case ArgKind::Vector: {
5851         // Keep track of VrIndex. No need to store shadow, since vector varargs
5852         // go through AK_Memory.
5853         assert(IsFixed);
5854         VrIndex++;
5855         break;
5856       }
5857       case ArgKind::Memory: {
5858         // Keep track of OverflowOffset and store shadow only for varargs.
5859         // Ignore fixed args, since we need to copy only the vararg portion of
5860         // the overflow area shadow.
5861         if (!IsFixed) {
5862           uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
5863           uint64_t ArgSize = alignTo(ArgAllocSize, 8);
5864           if (OverflowOffset + ArgSize <= kParamTLSSize) {
5865             SE = getShadowExtension(CB, ArgNo);
5866             uint64_t GapSize =
5867                 SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
5868             ShadowBase =
5869                 getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize);
5870             if (MS.TrackOrigins)
5871               OriginBase =
5872                   getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize);
5873             OverflowOffset += ArgSize;
5874           } else {
5875             OverflowOffset = kParamTLSSize;
5876           }
5877         }
5878         break;
5879       }
5880       case ArgKind::Indirect:
5881         llvm_unreachable("Indirect must be converted to GeneralPurpose");
5882       }
5883       if (ShadowBase == nullptr)
5884         continue;
5885       Value *Shadow = MSV.getShadow(A);
5886       if (SE != ShadowExtension::None)
5887         Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(),
5888                                       /*Signed*/ SE == ShadowExtension::Sign);
5889       ShadowBase = IRB.CreateIntToPtr(
5890           ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s");
5891       IRB.CreateStore(Shadow, ShadowBase);
5892       if (MS.TrackOrigins) {
5893         Value *Origin = MSV.getOrigin(A);
5894         TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
5895         MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
5896                         kMinOriginAlignment);
5897       }
5898     }
5899     Constant *OverflowSize = ConstantInt::get(
5900         IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset);
5901     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5902   }
5903 
5904   void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
5905     Type *RegSaveAreaPtrTy = PointerType::getUnqual(*MS.C); // i64*
5906     Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
5907         IRB.CreateAdd(
5908             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5909             ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)),
5910         PointerType::get(RegSaveAreaPtrTy, 0));
5911     Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5912     Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5913     const Align Alignment = Align(8);
5914     std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5915         MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment,
5916                                /*isStore*/ true);
5917     // TODO(iii): copy only fragments filled by visitCallBase()
5918     // TODO(iii): support packed-stack && !use-soft-float
5919     // For use-soft-float functions, it is enough to copy just the GPRs.
5920     unsigned RegSaveAreaSize =
5921         IsSoftFloatABI ? SystemZGpEndOffset : SystemZRegSaveAreaSize;
5922     IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5923                      RegSaveAreaSize);
5924     if (MS.TrackOrigins)
5925       IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
5926                        Alignment, RegSaveAreaSize);
5927   }
5928 
5929   // FIXME: This implementation limits OverflowOffset to kParamTLSSize, so we
5930   // don't know real overflow size and can't clear shadow beyond kParamTLSSize.
5931   void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
5932     Type *OverflowArgAreaPtrTy = PointerType::getUnqual(*MS.C); // i64*
5933     Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
5934         IRB.CreateAdd(
5935             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5936             ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)),
5937         PointerType::get(OverflowArgAreaPtrTy, 0));
5938     Value *OverflowArgAreaPtr =
5939         IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
5940     Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
5941     const Align Alignment = Align(8);
5942     std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
5943         MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
5944                                Alignment, /*isStore*/ true);
5945     Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
5946                                            SystemZOverflowOffset);
5947     IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
5948                      VAArgOverflowSize);
5949     if (MS.TrackOrigins) {
5950       SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
5951                                       SystemZOverflowOffset);
5952       IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
5953                        VAArgOverflowSize);
5954     }
5955   }
5956 
5957   void finalizeInstrumentation() override {
5958     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5959            "finalizeInstrumentation called twice");
5960     if (!VAStartInstrumentationList.empty()) {
5961       // If there is a va_start in this function, make a backup copy of
5962       // va_arg_tls somewhere in the function entry block.
5963       IRBuilder<> IRB(MSV.FnPrologueEnd);
5964       VAArgOverflowSize =
5965           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5966       Value *CopySize =
5967           IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset),
5968                         VAArgOverflowSize);
5969       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5970       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5971       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5972                        CopySize, kShadowTLSAlignment, false);
5973 
5974       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5975           Intrinsic::umin, CopySize,
5976           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5977       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5978                        kShadowTLSAlignment, SrcSize);
5979       if (MS.TrackOrigins) {
5980         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5981         VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
5982         IRB.CreateMemCpy(VAArgTLSOriginCopy, kShadowTLSAlignment,
5983                          MS.VAArgOriginTLS, kShadowTLSAlignment, SrcSize);
5984       }
5985     }
5986 
5987     // Instrument va_start.
5988     // Copy va_list shadow from the backup copy of the TLS contents.
5989     for (CallInst *OrigInst : VAStartInstrumentationList) {
5990       NextNodeIRBuilder IRB(OrigInst);
5991       Value *VAListTag = OrigInst->getArgOperand(0);
5992       copyRegSaveArea(IRB, VAListTag);
5993       copyOverflowArea(IRB, VAListTag);
5994     }
5995   }
5996 };
5997 
5998 // Loongarch64 is not a MIPS, but the current vargs calling convention matches
5999 // the MIPS.
6000 using VarArgLoongArch64Helper = VarArgMIPS64Helper;
6001 
6002 /// A no-op implementation of VarArgHelper.
6003 struct VarArgNoOpHelper : public VarArgHelper {
6004   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
6005                    MemorySanitizerVisitor &MSV) {}
6006 
6007   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {}
6008 
6009   void visitVAStartInst(VAStartInst &I) override {}
6010 
6011   void visitVACopyInst(VACopyInst &I) override {}
6012 
6013   void finalizeInstrumentation() override {}
6014 };
6015 
6016 } // end anonymous namespace
6017 
6018 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
6019                                         MemorySanitizerVisitor &Visitor) {
6020   // VarArg handling is only implemented on AMD64. False positives are possible
6021   // on other platforms.
6022   Triple TargetTriple(Func.getParent()->getTargetTriple());
6023   if (TargetTriple.getArch() == Triple::x86_64)
6024     return new VarArgAMD64Helper(Func, Msan, Visitor);
6025   else if (TargetTriple.isMIPS64())
6026     return new VarArgMIPS64Helper(Func, Msan, Visitor);
6027   else if (TargetTriple.getArch() == Triple::aarch64)
6028     return new VarArgAArch64Helper(Func, Msan, Visitor);
6029   else if (TargetTriple.getArch() == Triple::ppc64 ||
6030            TargetTriple.getArch() == Triple::ppc64le)
6031     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
6032   else if (TargetTriple.getArch() == Triple::systemz)
6033     return new VarArgSystemZHelper(Func, Msan, Visitor);
6034   else if (TargetTriple.isLoongArch64())
6035     return new VarArgLoongArch64Helper(Func, Msan, Visitor);
6036   else
6037     return new VarArgNoOpHelper(Func, Msan, Visitor);
6038 }
6039 
6040 bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
6041   if (!CompileKernel && F.getName() == kMsanModuleCtorName)
6042     return false;
6043 
6044   if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
6045     return false;
6046 
6047   MemorySanitizerVisitor Visitor(F, *this, TLI);
6048 
6049   // Clear out memory attributes.
6050   AttributeMask B;
6051   B.addAttribute(Attribute::Memory).addAttribute(Attribute::Speculatable);
6052   F.removeFnAttrs(B);
6053 
6054   return Visitor.runOnFunction();
6055 }
6056