xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- NumericalStabilitySanitizer.cpp -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the instrumentation pass for the numerical sanitizer.
10 // Conceptually the pass injects shadow computations using higher precision
11 // types and inserts consistency checks. For details see the paper
12 // https://arxiv.org/abs/2102.12782.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
17 
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/Analysis/TargetLibraryInfo.h"
23 #include "llvm/Analysis/ValueTracking.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/LLVMContext.h"
30 #include "llvm/IR/MDBuilder.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Regex.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
39 #include "llvm/Transforms/Utils/Instrumentation.h"
40 #include "llvm/Transforms/Utils/Local.h"
41 #include "llvm/Transforms/Utils/ModuleUtils.h"
42 
43 #include <cstdint>
44 
45 using namespace llvm;
46 
47 #define DEBUG_TYPE "nsan"
48 
49 STATISTIC(NumInstrumentedFTLoads,
50           "Number of instrumented floating-point loads");
51 
52 STATISTIC(NumInstrumentedFTCalls,
53           "Number of instrumented floating-point calls");
54 STATISTIC(NumInstrumentedFTRets,
55           "Number of instrumented floating-point returns");
56 STATISTIC(NumInstrumentedFTStores,
57           "Number of instrumented floating-point stores");
58 STATISTIC(NumInstrumentedNonFTStores,
59           "Number of instrumented non floating-point stores");
60 STATISTIC(
61     NumInstrumentedNonFTMemcpyStores,
62     "Number of instrumented non floating-point stores with memcpy semantics");
63 STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps");
64 
65 // Using smaller shadow types types can help improve speed. For example, `dlq`
66 // is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to
67 // `dqq`.
68 static cl::opt<std::string> ClShadowMapping(
69     "nsan-shadow-type-mapping", cl::init("dqq"),
70     cl::desc("One shadow type id for each of `float`, `double`, `long double`. "
71              "`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and "
72              "ppc_fp128 (extended double) respectively. The default is to "
73              "shadow `float` as `double`, and `double` and `x86_fp80` as "
74              "`fp128`"),
75     cl::Hidden);
76 
77 static cl::opt<bool>
78     ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(true),
79                      cl::desc("Instrument floating-point comparisons"),
80                      cl::Hidden);
81 
82 static cl::opt<std::string> ClCheckFunctionsFilter(
83     "check-functions-filter",
84     cl::desc("Only emit checks for arguments of functions "
85              "whose names match the given regular expression"),
86     cl::value_desc("regex"));
87 
88 static cl::opt<bool> ClTruncateFCmpEq(
89     "nsan-truncate-fcmp-eq", cl::init(true),
90     cl::desc(
91         "This flag controls the behaviour of fcmp equality comparisons."
92         "For equality comparisons such as `x == 0.0f`, we can perform the "
93         "shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app "
94         " domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps "
95         "catch the case when `x_shadow` is accurate enough (and therefore "
96         "close enough to zero) so that `trunc(x_shadow)` is zero even though "
97         "both `x` and `x_shadow` are not"),
98     cl::Hidden);
99 
100 // When there is external, uninstrumented code writing to memory, the shadow
101 // memory can get out of sync with the application memory. Enabling this flag
102 // emits consistency checks for loads to catch this situation.
103 // When everything is instrumented, this is not strictly necessary because any
104 // load should have a corresponding store, but can help debug cases when the
105 // framework did a bad job at tracking shadow memory modifications by failing on
106 // load rather than store.
107 // TODO: provide a way to resume computations from the FT value when the load
108 // is inconsistent. This ensures that further computations are not polluted.
109 static cl::opt<bool> ClCheckLoads("nsan-check-loads",
110                                   cl::desc("Check floating-point load"),
111                                   cl::Hidden);
112 
113 static cl::opt<bool> ClCheckStores("nsan-check-stores", cl::init(true),
114                                    cl::desc("Check floating-point stores"),
115                                    cl::Hidden);
116 
117 static cl::opt<bool> ClCheckRet("nsan-check-ret", cl::init(true),
118                                 cl::desc("Check floating-point return values"),
119                                 cl::Hidden);
120 
121 // LLVM may store constant floats as bitcasted ints.
122 // It's not really necessary to shadow such stores,
123 // if the shadow value is unknown the framework will re-extend it on load
124 // anyway. Moreover, because of size collisions (e.g. bf16 vs f16) it is
125 // impossible to determine the floating-point type based on the size.
126 // However, for debugging purposes it can be useful to model such stores.
127 static cl::opt<bool> ClPropagateNonFTConstStoresAsFT(
128     "nsan-propagate-non-ft-const-stores-as-ft",
129     cl::desc(
130         "Propagate non floating-point const stores as floating point values."
131         "For debugging purposes only"),
132     cl::Hidden);
133 
134 constexpr StringLiteral kNsanModuleCtorName("nsan.module_ctor");
135 constexpr StringLiteral kNsanInitName("__nsan_init");
136 
137 // The following values must be kept in sync with the runtime.
138 constexpr int kShadowScale = 2;
139 constexpr int kMaxVectorWidth = 8;
140 constexpr int kMaxNumArgs = 128;
141 constexpr int kMaxShadowTypeSizeBytes = 16; // fp128
142 
143 namespace {
144 
145 // Defines the characteristics (type id, type, and floating-point semantics)
146 // attached for all possible shadow types.
147 class ShadowTypeConfig {
148 public:
149   static std::unique_ptr<ShadowTypeConfig> fromNsanTypeId(char TypeId);
150 
151   // The LLVM Type corresponding to the shadow type.
152   virtual Type *getType(LLVMContext &Context) const = 0;
153 
154   // The nsan type id of the shadow type (`d`, `l`, `q`, ...).
155   virtual char getNsanTypeId() const = 0;
156 
157   virtual ~ShadowTypeConfig() = default;
158 };
159 
160 template <char NsanTypeId>
161 class ShadowTypeConfigImpl : public ShadowTypeConfig {
162 public:
getNsanTypeId() const163   char getNsanTypeId() const override { return NsanTypeId; }
164   static constexpr const char kNsanTypeId = NsanTypeId;
165 };
166 
167 // `double` (`d`) shadow type.
168 class F64ShadowConfig : public ShadowTypeConfigImpl<'d'> {
getType(LLVMContext & Context) const169   Type *getType(LLVMContext &Context) const override {
170     return Type::getDoubleTy(Context);
171   }
172 };
173 
174 // `x86_fp80` (`l`) shadow type: X86 long double.
175 class F80ShadowConfig : public ShadowTypeConfigImpl<'l'> {
getType(LLVMContext & Context) const176   Type *getType(LLVMContext &Context) const override {
177     return Type::getX86_FP80Ty(Context);
178   }
179 };
180 
181 // `fp128` (`q`) shadow type.
182 class F128ShadowConfig : public ShadowTypeConfigImpl<'q'> {
getType(LLVMContext & Context) const183   Type *getType(LLVMContext &Context) const override {
184     return Type::getFP128Ty(Context);
185   }
186 };
187 
188 // `ppc_fp128` (`e`) shadow type: IBM extended double with 106 bits of mantissa.
189 class PPC128ShadowConfig : public ShadowTypeConfigImpl<'e'> {
getType(LLVMContext & Context) const190   Type *getType(LLVMContext &Context) const override {
191     return Type::getPPC_FP128Ty(Context);
192   }
193 };
194 
195 // Creates a ShadowTypeConfig given its type id.
196 std::unique_ptr<ShadowTypeConfig>
fromNsanTypeId(const char TypeId)197 ShadowTypeConfig::fromNsanTypeId(const char TypeId) {
198   switch (TypeId) {
199   case F64ShadowConfig::kNsanTypeId:
200     return std::make_unique<F64ShadowConfig>();
201   case F80ShadowConfig::kNsanTypeId:
202     return std::make_unique<F80ShadowConfig>();
203   case F128ShadowConfig::kNsanTypeId:
204     return std::make_unique<F128ShadowConfig>();
205   case PPC128ShadowConfig::kNsanTypeId:
206     return std::make_unique<PPC128ShadowConfig>();
207   }
208   report_fatal_error("nsan: invalid shadow type id '" + Twine(TypeId) + "'");
209 }
210 
211 // An enum corresponding to shadow value types. Used as indices in arrays, so
212 // not an `enum class`.
213 enum FTValueType { kFloat, kDouble, kLongDouble, kNumValueTypes };
214 
215 // If `FT` corresponds to a primitive FTValueType, return it.
ftValueTypeFromType(Type * FT)216 static std::optional<FTValueType> ftValueTypeFromType(Type *FT) {
217   if (FT->isFloatTy())
218     return kFloat;
219   if (FT->isDoubleTy())
220     return kDouble;
221   if (FT->isX86_FP80Ty())
222     return kLongDouble;
223   return {};
224 }
225 
226 // Returns the LLVM type for an FTValueType.
typeFromFTValueType(FTValueType VT,LLVMContext & Context)227 static Type *typeFromFTValueType(FTValueType VT, LLVMContext &Context) {
228   switch (VT) {
229   case kFloat:
230     return Type::getFloatTy(Context);
231   case kDouble:
232     return Type::getDoubleTy(Context);
233   case kLongDouble:
234     return Type::getX86_FP80Ty(Context);
235   case kNumValueTypes:
236     return nullptr;
237   }
238   llvm_unreachable("Unhandled FTValueType enum");
239 }
240 
241 // Returns the type name for an FTValueType.
typeNameFromFTValueType(FTValueType VT)242 static const char *typeNameFromFTValueType(FTValueType VT) {
243   switch (VT) {
244   case kFloat:
245     return "float";
246   case kDouble:
247     return "double";
248   case kLongDouble:
249     return "longdouble";
250   case kNumValueTypes:
251     return nullptr;
252   }
253   llvm_unreachable("Unhandled FTValueType enum");
254 }
255 
256 // A specific mapping configuration of application type to shadow type for nsan
257 // (see -nsan-shadow-mapping flag).
258 class MappingConfig {
259 public:
MappingConfig(LLVMContext & C)260   explicit MappingConfig(LLVMContext &C) : Context(C) {
261     if (ClShadowMapping.size() != 3)
262       report_fatal_error("Invalid nsan mapping: " + Twine(ClShadowMapping));
263     unsigned ShadowTypeSizeBits[kNumValueTypes];
264     for (int VT = 0; VT < kNumValueTypes; ++VT) {
265       auto Config = ShadowTypeConfig::fromNsanTypeId(ClShadowMapping[VT]);
266       if (!Config)
267         report_fatal_error("Failed to get ShadowTypeConfig for " +
268                            Twine(ClShadowMapping[VT]));
269       const unsigned AppTypeSize =
270           typeFromFTValueType(static_cast<FTValueType>(VT), Context)
271               ->getScalarSizeInBits();
272       const unsigned ShadowTypeSize =
273           Config->getType(Context)->getScalarSizeInBits();
274       // Check that the shadow type size is at most kShadowScale times the
275       // application type size, so that shadow memory compoutations are valid.
276       if (ShadowTypeSize > kShadowScale * AppTypeSize)
277         report_fatal_error("Invalid nsan mapping f" + Twine(AppTypeSize) +
278                            "->f" + Twine(ShadowTypeSize) +
279                            ": The shadow type size should be at most " +
280                            Twine(kShadowScale) +
281                            " times the application type size");
282       ShadowTypeSizeBits[VT] = ShadowTypeSize;
283       Configs[VT] = std::move(Config);
284     }
285 
286     // Check that the mapping is monotonous. This is required because if one
287     // does an fpextend of `float->long double` in application code, nsan is
288     // going to do an fpextend of `shadow(float) -> shadow(long double)` in
289     // shadow code. This will fail in `qql` mode, since nsan would be
290     // fpextending `f128->long`, which is invalid.
291     // TODO: Relax this.
292     if (ShadowTypeSizeBits[kFloat] > ShadowTypeSizeBits[kDouble] ||
293         ShadowTypeSizeBits[kDouble] > ShadowTypeSizeBits[kLongDouble])
294       report_fatal_error("Invalid nsan mapping: { float->f" +
295                          Twine(ShadowTypeSizeBits[kFloat]) + "; double->f" +
296                          Twine(ShadowTypeSizeBits[kDouble]) +
297                          "; long double->f" +
298                          Twine(ShadowTypeSizeBits[kLongDouble]) + " }");
299   }
300 
byValueType(FTValueType VT) const301   const ShadowTypeConfig &byValueType(FTValueType VT) const {
302     assert(VT < FTValueType::kNumValueTypes && "invalid value type");
303     return *Configs[VT];
304   }
305 
306   // Returns the extended shadow type for a given application type.
getExtendedFPType(Type * FT) const307   Type *getExtendedFPType(Type *FT) const {
308     if (const auto VT = ftValueTypeFromType(FT))
309       return Configs[*VT]->getType(Context);
310     if (FT->isVectorTy()) {
311       auto *VecTy = cast<VectorType>(FT);
312       // TODO: add support for scalable vector types.
313       if (VecTy->isScalableTy())
314         return nullptr;
315       Type *ExtendedScalar = getExtendedFPType(VecTy->getElementType());
316       return ExtendedScalar
317                  ? VectorType::get(ExtendedScalar, VecTy->getElementCount())
318                  : nullptr;
319     }
320     return nullptr;
321   }
322 
323 private:
324   LLVMContext &Context;
325   std::unique_ptr<ShadowTypeConfig> Configs[FTValueType::kNumValueTypes];
326 };
327 
328 // The memory extents of a type specifies how many elements of a given
329 // FTValueType needs to be stored when storing this type.
330 struct MemoryExtents {
331   FTValueType ValueType;
332   uint64_t NumElts;
333 };
334 
getMemoryExtentsOrDie(Type * FT)335 static MemoryExtents getMemoryExtentsOrDie(Type *FT) {
336   if (const auto VT = ftValueTypeFromType(FT))
337     return {*VT, 1};
338   if (auto *VecTy = dyn_cast<VectorType>(FT)) {
339     const auto ScalarExtents = getMemoryExtentsOrDie(VecTy->getElementType());
340     return {ScalarExtents.ValueType,
341             ScalarExtents.NumElts * VecTy->getElementCount().getFixedValue()};
342   }
343   llvm_unreachable("invalid value type");
344 }
345 
346 // The location of a check. Passed as parameters to runtime checking functions.
347 class CheckLoc {
348 public:
349   // Creates a location that references an application memory location.
makeStore(Value * Address)350   static CheckLoc makeStore(Value *Address) {
351     CheckLoc Result(kStore);
352     Result.Address = Address;
353     return Result;
354   }
makeLoad(Value * Address)355   static CheckLoc makeLoad(Value *Address) {
356     CheckLoc Result(kLoad);
357     Result.Address = Address;
358     return Result;
359   }
360 
361   // Creates a location that references an argument, given by id.
makeArg(int ArgId)362   static CheckLoc makeArg(int ArgId) {
363     CheckLoc Result(kArg);
364     Result.ArgId = ArgId;
365     return Result;
366   }
367 
368   // Creates a location that references the return value of a function.
makeRet()369   static CheckLoc makeRet() { return CheckLoc(kRet); }
370 
371   // Creates a location that references a vector insert.
makeInsert()372   static CheckLoc makeInsert() { return CheckLoc(kInsert); }
373 
374   // Returns the CheckType of location this refers to, as an integer-typed LLVM
375   // IR value.
getType(LLVMContext & C) const376   Value *getType(LLVMContext &C) const {
377     return ConstantInt::get(Type::getInt32Ty(C), static_cast<int>(CheckTy));
378   }
379 
380   // Returns a CheckType-specific value representing details of the location
381   // (e.g. application address for loads or stores), as an `IntptrTy`-typed LLVM
382   // IR value.
getValue(Type * IntptrTy,IRBuilder<> & Builder) const383   Value *getValue(Type *IntptrTy, IRBuilder<> &Builder) const {
384     switch (CheckTy) {
385     case kUnknown:
386       llvm_unreachable("unknown type");
387     case kRet:
388     case kInsert:
389       return ConstantInt::get(IntptrTy, 0);
390     case kArg:
391       return ConstantInt::get(IntptrTy, ArgId);
392     case kLoad:
393     case kStore:
394       return Builder.CreatePtrToInt(Address, IntptrTy);
395     }
396     llvm_unreachable("Unhandled CheckType enum");
397   }
398 
399 private:
400   // Must be kept in sync with the runtime,
401   // see compiler-rt/lib/nsan/nsan_stats.h
402   enum CheckType {
403     kUnknown = 0,
404     kRet,
405     kArg,
406     kLoad,
407     kStore,
408     kInsert,
409   };
CheckLoc(CheckType CheckTy)410   explicit CheckLoc(CheckType CheckTy) : CheckTy(CheckTy) {}
411 
412   Value *Address = nullptr;
413   const CheckType CheckTy;
414   int ArgId = -1;
415 };
416 
417 // A map of LLVM IR values to shadow LLVM IR values.
418 class ValueToShadowMap {
419 public:
ValueToShadowMap(const MappingConfig & Config)420   explicit ValueToShadowMap(const MappingConfig &Config) : Config(Config) {}
421 
422   ValueToShadowMap(const ValueToShadowMap &) = delete;
423   ValueToShadowMap &operator=(const ValueToShadowMap &) = delete;
424 
425   // Sets the shadow value for a value. Asserts that the value does not already
426   // have a value.
setShadow(Value & V,Value & Shadow)427   void setShadow(Value &V, Value &Shadow) {
428     [[maybe_unused]] const bool Inserted = Map.try_emplace(&V, &Shadow).second;
429     LLVM_DEBUG({
430       if (!Inserted) {
431         if (auto *I = dyn_cast<Instruction>(&V))
432           errs() << I->getFunction()->getName() << ": ";
433         errs() << "duplicate shadow (" << &V << "): ";
434         V.dump();
435       }
436     });
437     assert(Inserted && "duplicate shadow");
438   }
439 
440   // Returns true if the value already has a shadow (including if the value is a
441   // constant). If true, calling getShadow() is valid.
hasShadow(Value * V) const442   bool hasShadow(Value *V) const { return isa<Constant>(V) || Map.contains(V); }
443 
444   // Returns the shadow value for a given value. Asserts that the value has
445   // a shadow value. Lazily creates shadows for constant values.
getShadow(Value * V) const446   Value *getShadow(Value *V) const {
447     if (Constant *C = dyn_cast<Constant>(V))
448       return getShadowConstant(C);
449     return Map.find(V)->second;
450   }
451 
empty() const452   bool empty() const { return Map.empty(); }
453 
454 private:
455   // Extends a constant application value to its shadow counterpart.
extendConstantFP(APFloat CV,const fltSemantics & To) const456   APFloat extendConstantFP(APFloat CV, const fltSemantics &To) const {
457     bool LosesInfo = false;
458     CV.convert(To, APFloatBase::rmTowardZero, &LosesInfo);
459     return CV;
460   }
461 
462   // Returns the shadow constant for the given application constant.
getShadowConstant(Constant * C) const463   Constant *getShadowConstant(Constant *C) const {
464     if (UndefValue *U = dyn_cast<UndefValue>(C)) {
465       return UndefValue::get(Config.getExtendedFPType(U->getType()));
466     }
467     if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
468       // Floating-point constants.
469       Type *Ty = Config.getExtendedFPType(CFP->getType());
470       return ConstantFP::get(
471           Ty, extendConstantFP(CFP->getValueAPF(), Ty->getFltSemantics()));
472     }
473     // Vector, array, or aggregate constants.
474     if (C->getType()->isVectorTy()) {
475       SmallVector<Constant *, 8> Elements;
476       for (int I = 0, E = cast<VectorType>(C->getType())
477                               ->getElementCount()
478                               .getFixedValue();
479            I < E; ++I)
480         Elements.push_back(getShadowConstant(C->getAggregateElement(I)));
481       return ConstantVector::get(Elements);
482     }
483     llvm_unreachable("unimplemented");
484   }
485 
486   const MappingConfig &Config;
487   DenseMap<Value *, Value *> Map;
488 };
489 
490 class NsanMemOpFn {
491 public:
492   NsanMemOpFn(Module &M, ArrayRef<StringRef> Sized, StringRef Fallback,
493               size_t NumArgs);
494   FunctionCallee getFunctionFor(uint64_t MemOpSize) const;
495   FunctionCallee getFallback() const;
496 
497 private:
498   SmallVector<FunctionCallee> Funcs;
499   size_t NumSizedFuncs;
500 };
501 
NsanMemOpFn(Module & M,ArrayRef<StringRef> Sized,StringRef Fallback,size_t NumArgs)502 NsanMemOpFn::NsanMemOpFn(Module &M, ArrayRef<StringRef> Sized,
503                          StringRef Fallback, size_t NumArgs) {
504   LLVMContext &Ctx = M.getContext();
505   AttributeList Attr;
506   Attr = Attr.addFnAttribute(Ctx, Attribute::NoUnwind);
507   Type *PtrTy = PointerType::getUnqual(Ctx);
508   Type *VoidTy = Type::getVoidTy(Ctx);
509   IntegerType *IntptrTy = M.getDataLayout().getIntPtrType(Ctx);
510   FunctionType *SizedFnTy = nullptr;
511 
512   NumSizedFuncs = Sized.size();
513 
514   // First entry is fallback function
515   if (NumArgs == 3) {
516     Funcs.push_back(
517         M.getOrInsertFunction(Fallback, Attr, VoidTy, PtrTy, PtrTy, IntptrTy));
518     SizedFnTy = FunctionType::get(VoidTy, {PtrTy, PtrTy}, false);
519   } else if (NumArgs == 2) {
520     Funcs.push_back(
521         M.getOrInsertFunction(Fallback, Attr, VoidTy, PtrTy, IntptrTy));
522     SizedFnTy = FunctionType::get(VoidTy, {PtrTy}, false);
523   } else {
524     llvm_unreachable("Unexpected value of sized functions arguments");
525   }
526 
527   for (size_t i = 0; i < NumSizedFuncs; ++i)
528     Funcs.push_back(M.getOrInsertFunction(Sized[i], SizedFnTy, Attr));
529 }
530 
getFunctionFor(uint64_t MemOpSize) const531 FunctionCallee NsanMemOpFn::getFunctionFor(uint64_t MemOpSize) const {
532   // Now `getFunctionFor` operates on `Funcs` of size 4 (at least) and the
533   // following code assumes that the number of functions in `Func` is sufficient
534   assert(NumSizedFuncs >= 3 && "Unexpected number of sized functions");
535 
536   size_t Idx =
537       MemOpSize == 4 ? 1 : (MemOpSize == 8 ? 2 : (MemOpSize == 16 ? 3 : 0));
538 
539   return Funcs[Idx];
540 }
541 
getFallback() const542 FunctionCallee NsanMemOpFn::getFallback() const { return Funcs[0]; }
543 
544 /// Instantiating NumericalStabilitySanitizer inserts the nsan runtime library
545 /// API function declarations into the module if they don't exist already.
546 /// Instantiating ensures the __nsan_init function is in the list of global
547 /// constructors for the module.
548 class NumericalStabilitySanitizer {
549 public:
550   NumericalStabilitySanitizer(Module &M);
551   bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
552 
553 private:
554   bool instrumentMemIntrinsic(MemIntrinsic *MI);
555   void maybeAddSuffixForNsanInterface(CallBase *CI);
556   bool addrPointsToConstantData(Value *Addr);
557   void maybeCreateShadowValue(Instruction &Root, const TargetLibraryInfo &TLI,
558                               ValueToShadowMap &Map);
559   Value *createShadowValueWithOperandsAvailable(Instruction &Inst,
560                                                 const TargetLibraryInfo &TLI,
561                                                 const ValueToShadowMap &Map);
562   PHINode *maybeCreateShadowPhi(PHINode &Phi, const TargetLibraryInfo &TLI);
563   void createShadowArguments(Function &F, const TargetLibraryInfo &TLI,
564                              ValueToShadowMap &Map);
565 
566   void populateShadowStack(CallBase &CI, const TargetLibraryInfo &TLI,
567                            const ValueToShadowMap &Map);
568 
569   void propagateShadowValues(Instruction &Inst, const TargetLibraryInfo &TLI,
570                              const ValueToShadowMap &Map);
571   Value *emitCheck(Value *V, Value *ShadowV, IRBuilder<> &Builder,
572                    CheckLoc Loc);
573   Value *emitCheckInternal(Value *V, Value *ShadowV, IRBuilder<> &Builder,
574                            CheckLoc Loc);
575   void emitFCmpCheck(FCmpInst &FCmp, const ValueToShadowMap &Map);
576 
577   // Value creation handlers.
578   Value *handleLoad(LoadInst &Load, Type *VT, Type *ExtendedVT);
579   Value *handleCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
580                         const TargetLibraryInfo &TLI,
581                         const ValueToShadowMap &Map, IRBuilder<> &Builder);
582   Value *maybeHandleKnownCallBase(CallBase &Call, Type *VT, Type *ExtendedVT,
583                                   const TargetLibraryInfo &TLI,
584                                   const ValueToShadowMap &Map,
585                                   IRBuilder<> &Builder);
586   Value *handleTrunc(const FPTruncInst &Trunc, Type *VT, Type *ExtendedVT,
587                      const ValueToShadowMap &Map, IRBuilder<> &Builder);
588   Value *handleExt(const FPExtInst &Ext, Type *VT, Type *ExtendedVT,
589                    const ValueToShadowMap &Map, IRBuilder<> &Builder);
590 
591   // Value propagation handlers.
592   void propagateFTStore(StoreInst &Store, Type *VT, Type *ExtendedVT,
593                         const ValueToShadowMap &Map);
594   void propagateNonFTStore(StoreInst &Store, Type *VT,
595                            const ValueToShadowMap &Map);
596 
597   const DataLayout &DL;
598   LLVMContext &Context;
599   MappingConfig Config;
600   IntegerType *IntptrTy = nullptr;
601 
602   // TODO: Use std::array instead?
603   FunctionCallee NsanGetShadowPtrForStore[FTValueType::kNumValueTypes] = {};
604   FunctionCallee NsanGetShadowPtrForLoad[FTValueType::kNumValueTypes] = {};
605   FunctionCallee NsanCheckValue[FTValueType::kNumValueTypes] = {};
606   FunctionCallee NsanFCmpFail[FTValueType::kNumValueTypes] = {};
607 
608   NsanMemOpFn NsanCopyFns;
609   NsanMemOpFn NsanSetUnknownFns;
610 
611   FunctionCallee NsanGetRawShadowTypePtr;
612   FunctionCallee NsanGetRawShadowPtr;
613   GlobalValue *NsanShadowRetTag = nullptr;
614 
615   Type *NsanShadowRetType = nullptr;
616   GlobalValue *NsanShadowRetPtr = nullptr;
617 
618   GlobalValue *NsanShadowArgsTag = nullptr;
619 
620   Type *NsanShadowArgsType = nullptr;
621   GlobalValue *NsanShadowArgsPtr = nullptr;
622 
623   std::optional<Regex> CheckFunctionsFilter;
624 };
625 } // end anonymous namespace
626 
627 PreservedAnalyses
run(Module & M,ModuleAnalysisManager & MAM)628 NumericalStabilitySanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) {
629   getOrCreateSanitizerCtorAndInitFunctions(
630       M, kNsanModuleCtorName, kNsanInitName, /*InitArgTypes=*/{},
631       /*InitArgs=*/{},
632       // This callback is invoked when the functions are created the first
633       // time. Hook them into the global ctors list in that case:
634       [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); });
635 
636   NumericalStabilitySanitizer Nsan(M);
637   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
638   for (Function &F : M)
639     Nsan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F));
640 
641   return PreservedAnalyses::none();
642 }
643 
createThreadLocalGV(const char * Name,Module & M,Type * Ty)644 static GlobalValue *createThreadLocalGV(const char *Name, Module &M, Type *Ty) {
645   return M.getOrInsertGlobal(Name, Ty, [&M, Ty, Name] {
646     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
647                               nullptr, Name, nullptr,
648                               GlobalVariable::InitialExecTLSModel);
649   });
650 }
651 
NumericalStabilitySanitizer(Module & M)652 NumericalStabilitySanitizer::NumericalStabilitySanitizer(Module &M)
653     : DL(M.getDataLayout()), Context(M.getContext()), Config(Context),
654       NsanCopyFns(M, {"__nsan_copy_4", "__nsan_copy_8", "__nsan_copy_16"},
655                   "__nsan_copy_values", /*NumArgs=*/3),
656       NsanSetUnknownFns(M,
657                         {"__nsan_set_value_unknown_4",
658                          "__nsan_set_value_unknown_8",
659                          "__nsan_set_value_unknown_16"},
660                         "__nsan_set_value_unknown", /*NumArgs=*/2) {
661   IntptrTy = DL.getIntPtrType(Context);
662   Type *PtrTy = PointerType::getUnqual(Context);
663   Type *Int32Ty = Type::getInt32Ty(Context);
664   Type *Int1Ty = Type::getInt1Ty(Context);
665   Type *VoidTy = Type::getVoidTy(Context);
666 
667   AttributeList Attr;
668   Attr = Attr.addFnAttribute(Context, Attribute::NoUnwind);
669   // Initialize the runtime values (functions and global variables).
670   for (int I = 0; I < kNumValueTypes; ++I) {
671     const FTValueType VT = static_cast<FTValueType>(I);
672     const char *VTName = typeNameFromFTValueType(VT);
673     Type *VTTy = typeFromFTValueType(VT, Context);
674 
675     // Load/store.
676     const std::string GetterPrefix =
677         std::string("__nsan_get_shadow_ptr_for_") + VTName;
678     NsanGetShadowPtrForStore[VT] = M.getOrInsertFunction(
679         GetterPrefix + "_store", Attr, PtrTy, PtrTy, IntptrTy);
680     NsanGetShadowPtrForLoad[VT] = M.getOrInsertFunction(
681         GetterPrefix + "_load", Attr, PtrTy, PtrTy, IntptrTy);
682 
683     // Check.
684     const auto &ShadowConfig = Config.byValueType(VT);
685     Type *ShadowTy = ShadowConfig.getType(Context);
686     NsanCheckValue[VT] =
687         M.getOrInsertFunction(std::string("__nsan_internal_check_") + VTName +
688                                   "_" + ShadowConfig.getNsanTypeId(),
689                               Attr, Int32Ty, VTTy, ShadowTy, Int32Ty, IntptrTy);
690     NsanFCmpFail[VT] = M.getOrInsertFunction(
691         std::string("__nsan_fcmp_fail_") + VTName + "_" +
692             ShadowConfig.getNsanTypeId(),
693         Attr, VoidTy, VTTy, VTTy, ShadowTy, ShadowTy, Int32Ty, Int1Ty, Int1Ty);
694   }
695 
696   // TODO: Add attributes nofree, nosync, readnone, readonly,
697   NsanGetRawShadowTypePtr = M.getOrInsertFunction(
698       "__nsan_internal_get_raw_shadow_type_ptr", Attr, PtrTy, PtrTy);
699   NsanGetRawShadowPtr = M.getOrInsertFunction(
700       "__nsan_internal_get_raw_shadow_ptr", Attr, PtrTy, PtrTy);
701 
702   NsanShadowRetTag = createThreadLocalGV("__nsan_shadow_ret_tag", M, IntptrTy);
703 
704   NsanShadowRetType = ArrayType::get(Type::getInt8Ty(Context),
705                                      kMaxVectorWidth * kMaxShadowTypeSizeBytes);
706   NsanShadowRetPtr =
707       createThreadLocalGV("__nsan_shadow_ret_ptr", M, NsanShadowRetType);
708 
709   NsanShadowArgsTag =
710       createThreadLocalGV("__nsan_shadow_args_tag", M, IntptrTy);
711 
712   NsanShadowArgsType =
713       ArrayType::get(Type::getInt8Ty(Context),
714                      kMaxVectorWidth * kMaxNumArgs * kMaxShadowTypeSizeBytes);
715 
716   NsanShadowArgsPtr =
717       createThreadLocalGV("__nsan_shadow_args_ptr", M, NsanShadowArgsType);
718 
719   if (!ClCheckFunctionsFilter.empty()) {
720     Regex R = Regex(ClCheckFunctionsFilter);
721     std::string RegexError;
722     assert(R.isValid(RegexError));
723     CheckFunctionsFilter = std::move(R);
724   }
725 }
726 
727 // Returns true if the given LLVM Value points to constant data (typically, a
728 // global variable reference).
addrPointsToConstantData(Value * Addr)729 bool NumericalStabilitySanitizer::addrPointsToConstantData(Value *Addr) {
730   // If this is a GEP, just analyze its pointer operand.
731   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr))
732     Addr = GEP->getPointerOperand();
733 
734   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr))
735     return GV->isConstant();
736   return false;
737 }
738 
739 // This instruments the function entry to create shadow arguments.
740 // Pseudocode:
741 //   if (this_fn_ptr == __nsan_shadow_args_tag) {
742 //     s(arg0) = LOAD<sizeof(arg0)>(__nsan_shadow_args);
743 //     s(arg1) = LOAD<sizeof(arg1)>(__nsan_shadow_args + sizeof(arg0));
744 //     ...
745 //     __nsan_shadow_args_tag = 0;
746 //   } else {
747 //     s(arg0) = fext(arg0);
748 //     s(arg1) = fext(arg1);
749 //     ...
750 //   }
createShadowArguments(Function & F,const TargetLibraryInfo & TLI,ValueToShadowMap & Map)751 void NumericalStabilitySanitizer::createShadowArguments(
752     Function &F, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
753   assert(!F.getIntrinsicID() && "found a definition of an intrinsic");
754 
755   // Do not bother if there are no FP args.
756   if (all_of(F.args(), [this](const Argument &Arg) {
757         return Config.getExtendedFPType(Arg.getType()) == nullptr;
758       }))
759     return;
760 
761   IRBuilder<> Builder(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHIIt());
762   // The function has shadow args if the shadow args tag matches the function
763   // address.
764   Value *HasShadowArgs = Builder.CreateICmpEQ(
765       Builder.CreateLoad(IntptrTy, NsanShadowArgsTag, /*isVolatile=*/false),
766       Builder.CreatePtrToInt(&F, IntptrTy));
767 
768   unsigned ShadowArgsOffsetBytes = 0;
769   for (Argument &Arg : F.args()) {
770     Type *VT = Arg.getType();
771     Type *ExtendedVT = Config.getExtendedFPType(VT);
772     if (ExtendedVT == nullptr)
773       continue; // Not an FT value.
774     Value *L = Builder.CreateAlignedLoad(
775         ExtendedVT,
776         Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0,
777                                    ShadowArgsOffsetBytes),
778         Align(1), /*isVolatile=*/false);
779     Value *Shadow = Builder.CreateSelect(HasShadowArgs, L,
780                                          Builder.CreateFPExt(&Arg, ExtendedVT));
781     Map.setShadow(Arg, *Shadow);
782     TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT);
783     assert(!SlotSize.isScalable() && "unsupported");
784     ShadowArgsOffsetBytes += SlotSize;
785   }
786   Builder.CreateStore(ConstantInt::get(IntptrTy, 0), NsanShadowArgsTag);
787 }
788 
789 // Returns true if the instrumentation should emit code to check arguments
790 // before a function call.
shouldCheckArgs(CallBase & CI,const TargetLibraryInfo & TLI,const std::optional<Regex> & CheckFunctionsFilter)791 static bool shouldCheckArgs(CallBase &CI, const TargetLibraryInfo &TLI,
792                             const std::optional<Regex> &CheckFunctionsFilter) {
793 
794   Function *Fn = CI.getCalledFunction();
795 
796   if (CheckFunctionsFilter) {
797     // Skip checking args of indirect calls.
798     if (Fn == nullptr)
799       return false;
800     if (CheckFunctionsFilter->match(Fn->getName()))
801       return true;
802     return false;
803   }
804 
805   if (Fn == nullptr)
806     return true; // Always check args of indirect calls.
807 
808   // Never check nsan functions, the user called them for a reason.
809   if (Fn->getName().starts_with("__nsan_"))
810     return false;
811 
812   const auto ID = Fn->getIntrinsicID();
813   LibFunc LFunc = LibFunc::NumLibFuncs;
814   // Always check args of unknown functions.
815   if (ID == Intrinsic::ID() && !TLI.getLibFunc(*Fn, LFunc))
816     return true;
817 
818   // Do not check args of an `fabs` call that is used for a comparison.
819   // This is typically used for `fabs(a-b) < tolerance`, where what matters is
820   // the result of the comparison, which is already caught be the fcmp checks.
821   if (ID == Intrinsic::fabs || LFunc == LibFunc_fabsf ||
822       LFunc == LibFunc_fabs || LFunc == LibFunc_fabsl)
823     for (const auto &U : CI.users())
824       if (isa<CmpInst>(U))
825         return false;
826 
827   return true; // Default is check.
828 }
829 
830 // Populates the shadow call stack (which contains shadow values for every
831 // floating-point parameter to the function).
populateShadowStack(CallBase & CI,const TargetLibraryInfo & TLI,const ValueToShadowMap & Map)832 void NumericalStabilitySanitizer::populateShadowStack(
833     CallBase &CI, const TargetLibraryInfo &TLI, const ValueToShadowMap &Map) {
834   // Do not create a shadow stack for inline asm.
835   if (CI.isInlineAsm())
836     return;
837 
838   // Do not bother if there are no FP args.
839   if (all_of(CI.operands(), [this](const Value *Arg) {
840         return Config.getExtendedFPType(Arg->getType()) == nullptr;
841       }))
842     return;
843 
844   IRBuilder<> Builder(&CI);
845   SmallVector<Value *, 8> ArgShadows;
846   const bool ShouldCheckArgs = shouldCheckArgs(CI, TLI, CheckFunctionsFilter);
847   for (auto [ArgIdx, Arg] : enumerate(CI.operands())) {
848     if (Config.getExtendedFPType(Arg->getType()) == nullptr)
849       continue; // Not an FT value.
850     Value *ArgShadow = Map.getShadow(Arg);
851     ArgShadows.push_back(ShouldCheckArgs ? emitCheck(Arg, ArgShadow, Builder,
852                                                      CheckLoc::makeArg(ArgIdx))
853                                          : ArgShadow);
854   }
855 
856   // Do not create shadow stacks for intrinsics/known lib funcs.
857   if (Function *Fn = CI.getCalledFunction()) {
858     LibFunc LFunc;
859     if (Fn->isIntrinsic() || TLI.getLibFunc(*Fn, LFunc))
860       return;
861   }
862 
863   // Set the shadow stack tag.
864   Builder.CreateStore(CI.getCalledOperand(), NsanShadowArgsTag);
865   TypeSize ShadowArgsOffsetBytes = TypeSize::getFixed(0);
866 
867   unsigned ShadowArgId = 0;
868   for (const Value *Arg : CI.operands()) {
869     Type *VT = Arg->getType();
870     Type *ExtendedVT = Config.getExtendedFPType(VT);
871     if (ExtendedVT == nullptr)
872       continue; // Not an FT value.
873     Builder.CreateAlignedStore(
874         ArgShadows[ShadowArgId++],
875         Builder.CreateConstGEP2_64(NsanShadowArgsType, NsanShadowArgsPtr, 0,
876                                    ShadowArgsOffsetBytes),
877         Align(1), /*isVolatile=*/false);
878     TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT);
879     assert(!SlotSize.isScalable() && "unsupported");
880     ShadowArgsOffsetBytes += SlotSize;
881   }
882 }
883 
884 // Internal part of emitCheck(). Returns a value that indicates whether
885 // computation should continue with the shadow or resume by re-fextending the
886 // value.
887 enum class ContinuationType { // Keep in sync with runtime.
888   ContinueWithShadow = 0,
889   ResumeFromValue = 1,
890 };
891 
emitCheckInternal(Value * V,Value * ShadowV,IRBuilder<> & Builder,CheckLoc Loc)892 Value *NumericalStabilitySanitizer::emitCheckInternal(Value *V, Value *ShadowV,
893                                                       IRBuilder<> &Builder,
894                                                       CheckLoc Loc) {
895   // Do not emit checks for constant values, this is redundant.
896   if (isa<Constant>(V))
897     return ConstantInt::get(
898         Builder.getInt32Ty(),
899         static_cast<int>(ContinuationType::ContinueWithShadow));
900 
901   Type *Ty = V->getType();
902   if (const auto VT = ftValueTypeFromType(Ty))
903     return Builder.CreateCall(
904         NsanCheckValue[*VT],
905         {V, ShadowV, Loc.getType(Context), Loc.getValue(IntptrTy, Builder)});
906 
907   if (Ty->isVectorTy()) {
908     auto *VecTy = cast<VectorType>(Ty);
909     // We currently skip scalable vector types in MappingConfig,
910     // thus we should not encounter any such types here.
911     assert(!VecTy->isScalableTy() &&
912            "Scalable vector types are not supported yet");
913     Value *CheckResult = nullptr;
914     for (int I = 0, E = VecTy->getElementCount().getFixedValue(); I < E; ++I) {
915       // We resume if any element resumes. Another option would be to create a
916       // vector shuffle with the array of ContinueWithShadow, but that is too
917       // complex.
918       Value *ExtractV = Builder.CreateExtractElement(V, I);
919       Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
920       Value *ComponentCheckResult =
921           emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
922       CheckResult = CheckResult
923                         ? Builder.CreateOr(CheckResult, ComponentCheckResult)
924                         : ComponentCheckResult;
925     }
926     return CheckResult;
927   }
928   if (Ty->isArrayTy()) {
929     Value *CheckResult = nullptr;
930     for (auto I : seq(Ty->getArrayNumElements())) {
931       Value *ExtractV = Builder.CreateExtractElement(V, I);
932       Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
933       Value *ComponentCheckResult =
934           emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
935       CheckResult = CheckResult
936                         ? Builder.CreateOr(CheckResult, ComponentCheckResult)
937                         : ComponentCheckResult;
938     }
939     return CheckResult;
940   }
941   if (Ty->isStructTy()) {
942     Value *CheckResult = nullptr;
943     for (auto I : seq(Ty->getStructNumElements())) {
944       if (Config.getExtendedFPType(Ty->getStructElementType(I)) == nullptr)
945         continue; // Only check FT values.
946       Value *ExtractV = Builder.CreateExtractValue(V, I);
947       Value *ExtractShadowV = Builder.CreateExtractElement(ShadowV, I);
948       Value *ComponentCheckResult =
949           emitCheckInternal(ExtractV, ExtractShadowV, Builder, Loc);
950       CheckResult = CheckResult
951                         ? Builder.CreateOr(CheckResult, ComponentCheckResult)
952                         : ComponentCheckResult;
953     }
954     if (!CheckResult)
955       return ConstantInt::get(
956           Builder.getInt32Ty(),
957           static_cast<int>(ContinuationType::ContinueWithShadow));
958     return CheckResult;
959   }
960 
961   llvm_unreachable("not implemented");
962 }
963 
964 // Inserts a runtime check of V against its shadow value ShadowV.
965 // We check values whenever they escape: on return, call, stores, and
966 // insertvalue.
967 // Returns the shadow value that should be used to continue the computations,
968 // depending on the answer from the runtime.
969 // TODO: Should we check on select ? phi ?
emitCheck(Value * V,Value * ShadowV,IRBuilder<> & Builder,CheckLoc Loc)970 Value *NumericalStabilitySanitizer::emitCheck(Value *V, Value *ShadowV,
971                                               IRBuilder<> &Builder,
972                                               CheckLoc Loc) {
973   // Do not emit checks for constant values, this is redundant.
974   if (isa<Constant>(V))
975     return ShadowV;
976 
977   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
978     Function *F = Inst->getFunction();
979     if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName())) {
980       return ShadowV;
981     }
982   }
983 
984   Value *CheckResult = emitCheckInternal(V, ShadowV, Builder, Loc);
985   Value *ICmpEQ = Builder.CreateICmpEQ(
986       CheckResult,
987       ConstantInt::get(Builder.getInt32Ty(),
988                        static_cast<int>(ContinuationType::ResumeFromValue)));
989   return Builder.CreateSelect(
990       ICmpEQ, Builder.CreateFPExt(V, Config.getExtendedFPType(V->getType())),
991       ShadowV);
992 }
993 
994 // Inserts a check that fcmp on shadow values are consistent with that on base
995 // values.
emitFCmpCheck(FCmpInst & FCmp,const ValueToShadowMap & Map)996 void NumericalStabilitySanitizer::emitFCmpCheck(FCmpInst &FCmp,
997                                                 const ValueToShadowMap &Map) {
998   if (!ClInstrumentFCmp)
999     return;
1000 
1001   Function *F = FCmp.getFunction();
1002   if (CheckFunctionsFilter && !CheckFunctionsFilter->match(F->getName()))
1003     return;
1004 
1005   Value *LHS = FCmp.getOperand(0);
1006   if (Config.getExtendedFPType(LHS->getType()) == nullptr)
1007     return;
1008   Value *RHS = FCmp.getOperand(1);
1009 
1010   // Split the basic block. On mismatch, we'll jump to the new basic block with
1011   // a call to the runtime for error reporting.
1012   BasicBlock *FCmpBB = FCmp.getParent();
1013   BasicBlock *NextBB = FCmpBB->splitBasicBlock(FCmp.getNextNode());
1014   // Remove the newly created terminator unconditional branch.
1015   FCmpBB->back().eraseFromParent();
1016   BasicBlock *FailBB =
1017       BasicBlock::Create(Context, "", FCmpBB->getParent(), NextBB);
1018 
1019   // Create the shadow fcmp and comparison between the fcmps.
1020   IRBuilder<> FCmpBuilder(FCmpBB);
1021   FCmpBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
1022   Value *ShadowLHS = Map.getShadow(LHS);
1023   Value *ShadowRHS = Map.getShadow(RHS);
1024   // See comment on ClTruncateFCmpEq.
1025   if (FCmp.isEquality() && ClTruncateFCmpEq) {
1026     Type *Ty = ShadowLHS->getType();
1027     ShadowLHS = FCmpBuilder.CreateFPExt(
1028         FCmpBuilder.CreateFPTrunc(ShadowLHS, LHS->getType()), Ty);
1029     ShadowRHS = FCmpBuilder.CreateFPExt(
1030         FCmpBuilder.CreateFPTrunc(ShadowRHS, RHS->getType()), Ty);
1031   }
1032   Value *ShadowFCmp =
1033       FCmpBuilder.CreateFCmp(FCmp.getPredicate(), ShadowLHS, ShadowRHS);
1034   Value *OriginalAndShadowFcmpMatch =
1035       FCmpBuilder.CreateICmpEQ(&FCmp, ShadowFCmp);
1036 
1037   if (OriginalAndShadowFcmpMatch->getType()->isVectorTy()) {
1038     // If we have a vector type, `OriginalAndShadowFcmpMatch` is a vector of i1,
1039     // where an element is true if the corresponding elements in original and
1040     // shadow are the same. We want all elements to be 1.
1041     OriginalAndShadowFcmpMatch =
1042         FCmpBuilder.CreateAndReduce(OriginalAndShadowFcmpMatch);
1043   }
1044 
1045   // Use MDBuilder(*C).createLikelyBranchWeights() because "match" is the common
1046   // case.
1047   FCmpBuilder.CreateCondBr(OriginalAndShadowFcmpMatch, NextBB, FailBB,
1048                            MDBuilder(Context).createLikelyBranchWeights());
1049 
1050   // Fill in FailBB.
1051   IRBuilder<> FailBuilder(FailBB);
1052   FailBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc());
1053 
1054   const auto EmitFailCall = [this, &FCmp, &FCmpBuilder,
1055                              &FailBuilder](Value *L, Value *R, Value *ShadowL,
1056                                            Value *ShadowR, Value *Result,
1057                                            Value *ShadowResult) {
1058     Type *FT = L->getType();
1059     FunctionCallee *Callee = nullptr;
1060     if (FT->isFloatTy()) {
1061       Callee = &(NsanFCmpFail[kFloat]);
1062     } else if (FT->isDoubleTy()) {
1063       Callee = &(NsanFCmpFail[kDouble]);
1064     } else if (FT->isX86_FP80Ty()) {
1065       // TODO: make NsanFCmpFailLongDouble work.
1066       Callee = &(NsanFCmpFail[kDouble]);
1067       L = FailBuilder.CreateFPTrunc(L, Type::getDoubleTy(Context));
1068       R = FailBuilder.CreateFPTrunc(L, Type::getDoubleTy(Context));
1069     } else {
1070       llvm_unreachable("not implemented");
1071     }
1072     FailBuilder.CreateCall(*Callee, {L, R, ShadowL, ShadowR,
1073                                      ConstantInt::get(FCmpBuilder.getInt32Ty(),
1074                                                       FCmp.getPredicate()),
1075                                      Result, ShadowResult});
1076   };
1077   if (LHS->getType()->isVectorTy()) {
1078     for (int I = 0, E = cast<VectorType>(LHS->getType())
1079                             ->getElementCount()
1080                             .getFixedValue();
1081          I < E; ++I) {
1082       Value *ExtractLHS = FailBuilder.CreateExtractElement(LHS, I);
1083       Value *ExtractRHS = FailBuilder.CreateExtractElement(RHS, I);
1084       Value *ExtractShaodwLHS = FailBuilder.CreateExtractElement(ShadowLHS, I);
1085       Value *ExtractShaodwRHS = FailBuilder.CreateExtractElement(ShadowRHS, I);
1086       Value *ExtractFCmp = FailBuilder.CreateExtractElement(&FCmp, I);
1087       Value *ExtractShadowFCmp =
1088           FailBuilder.CreateExtractElement(ShadowFCmp, I);
1089       EmitFailCall(ExtractLHS, ExtractRHS, ExtractShaodwLHS, ExtractShaodwRHS,
1090                    ExtractFCmp, ExtractShadowFCmp);
1091     }
1092   } else {
1093     EmitFailCall(LHS, RHS, ShadowLHS, ShadowRHS, &FCmp, ShadowFCmp);
1094   }
1095   FailBuilder.CreateBr(NextBB);
1096 
1097   ++NumInstrumentedFCmp;
1098 }
1099 
1100 // Creates a shadow phi value for any phi that defines a value of FT type.
maybeCreateShadowPhi(PHINode & Phi,const TargetLibraryInfo & TLI)1101 PHINode *NumericalStabilitySanitizer::maybeCreateShadowPhi(
1102     PHINode &Phi, const TargetLibraryInfo &TLI) {
1103   Type *VT = Phi.getType();
1104   Type *ExtendedVT = Config.getExtendedFPType(VT);
1105   if (ExtendedVT == nullptr)
1106     return nullptr; // Not an FT value.
1107   // The phi operands are shadow values and are not available when the phi is
1108   // created. They will be populated in a final phase, once all shadow values
1109   // have been created.
1110   PHINode *Shadow = PHINode::Create(ExtendedVT, Phi.getNumIncomingValues());
1111   Shadow->insertAfter(Phi.getIterator());
1112   return Shadow;
1113 }
1114 
handleLoad(LoadInst & Load,Type * VT,Type * ExtendedVT)1115 Value *NumericalStabilitySanitizer::handleLoad(LoadInst &Load, Type *VT,
1116                                                Type *ExtendedVT) {
1117   IRBuilder<> Builder(Load.getNextNode());
1118   Builder.SetCurrentDebugLocation(Load.getDebugLoc());
1119   if (addrPointsToConstantData(Load.getPointerOperand())) {
1120     // No need to look into the shadow memory, the value is a constant. Just
1121     // convert from FT to 2FT.
1122     return Builder.CreateFPExt(&Load, ExtendedVT);
1123   }
1124 
1125   // if (%shadowptr == &)
1126   //    %shadow = fpext %v
1127   // else
1128   //    %shadow = load (ptrcast %shadow_ptr))
1129   // Considered options here:
1130   //  - Have `NsanGetShadowPtrForLoad` return a fixed address
1131   //    &__nsan_unknown_value_shadow_address that is valid to load from, and
1132   //    use a select. This has the advantage that the generated IR is simpler.
1133   //  - Have `NsanGetShadowPtrForLoad` return nullptr.  Because `select` does
1134   //    not short-circuit, dereferencing the returned pointer is no longer an
1135   //    option, have to split and create a separate basic block. This has the
1136   //    advantage of being easier to debug because it crashes if we ever mess
1137   //    up.
1138 
1139   const auto Extents = getMemoryExtentsOrDie(VT);
1140   Value *ShadowPtr = Builder.CreateCall(
1141       NsanGetShadowPtrForLoad[Extents.ValueType],
1142       {Load.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)});
1143   ++NumInstrumentedFTLoads;
1144 
1145   // Split the basic block.
1146   BasicBlock *LoadBB = Load.getParent();
1147   BasicBlock *NextBB = LoadBB->splitBasicBlock(Builder.GetInsertPoint());
1148   // Create the two options for creating the shadow value.
1149   BasicBlock *ShadowLoadBB =
1150       BasicBlock::Create(Context, "", LoadBB->getParent(), NextBB);
1151   BasicBlock *FExtBB =
1152       BasicBlock::Create(Context, "", LoadBB->getParent(), NextBB);
1153 
1154   // Replace the newly created terminator unconditional branch by a conditional
1155   // branch to one of the options.
1156   {
1157     LoadBB->back().eraseFromParent();
1158     IRBuilder<> LoadBBBuilder(LoadBB); // The old builder has been invalidated.
1159     LoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1160     LoadBBBuilder.CreateCondBr(LoadBBBuilder.CreateIsNull(ShadowPtr), FExtBB,
1161                                ShadowLoadBB);
1162   }
1163 
1164   // Fill in ShadowLoadBB.
1165   IRBuilder<> ShadowLoadBBBuilder(ShadowLoadBB);
1166   ShadowLoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1167   Value *ShadowLoad = ShadowLoadBBBuilder.CreateAlignedLoad(
1168       ExtendedVT, ShadowPtr, Align(1), Load.isVolatile());
1169   if (ClCheckLoads) {
1170     ShadowLoad = emitCheck(&Load, ShadowLoad, ShadowLoadBBBuilder,
1171                            CheckLoc::makeLoad(Load.getPointerOperand()));
1172   }
1173   ShadowLoadBBBuilder.CreateBr(NextBB);
1174 
1175   // Fill in FExtBB.
1176   IRBuilder<> FExtBBBuilder(FExtBB);
1177   FExtBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1178   Value *FExt = FExtBBBuilder.CreateFPExt(&Load, ExtendedVT);
1179   FExtBBBuilder.CreateBr(NextBB);
1180 
1181   // The shadow value come from any of the options.
1182   IRBuilder<> NextBBBuilder(&*NextBB->begin());
1183   NextBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc());
1184   PHINode *ShadowPhi = NextBBBuilder.CreatePHI(ExtendedVT, 2);
1185   ShadowPhi->addIncoming(ShadowLoad, ShadowLoadBB);
1186   ShadowPhi->addIncoming(FExt, FExtBB);
1187   return ShadowPhi;
1188 }
1189 
handleTrunc(const FPTruncInst & Trunc,Type * VT,Type * ExtendedVT,const ValueToShadowMap & Map,IRBuilder<> & Builder)1190 Value *NumericalStabilitySanitizer::handleTrunc(const FPTruncInst &Trunc,
1191                                                 Type *VT, Type *ExtendedVT,
1192                                                 const ValueToShadowMap &Map,
1193                                                 IRBuilder<> &Builder) {
1194   Value *OrigSource = Trunc.getOperand(0);
1195   Type *OrigSourceTy = OrigSource->getType();
1196   Type *ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy);
1197 
1198   // When truncating:
1199   //  - (A) If the source has a shadow, we truncate from the shadow, else we
1200   //    truncate from the original source.
1201   //  - (B) If the shadow of the source is larger than the shadow of the dest,
1202   //    we still need a truncate. Else, the shadow of the source is the same
1203   //    type as the shadow of the dest (because mappings are non-decreasing), so
1204   //   we don't need to emit a truncate.
1205   // Examples,
1206   //   with a mapping of {f32->f64;f64->f80;f80->f128}
1207   //     fptrunc double   %1 to float     ->  fptrunc x86_fp80 s(%1) to double
1208   //     fptrunc x86_fp80 %1 to float     ->  fptrunc fp128    s(%1) to double
1209   //     fptrunc fp128    %1 to float     ->  fptrunc fp128    %1    to double
1210   //     fptrunc x86_fp80 %1 to double    ->  x86_fp80 s(%1)
1211   //     fptrunc fp128    %1 to double    ->  fptrunc fp128 %1 to x86_fp80
1212   //     fptrunc fp128    %1 to x86_fp80  ->  fp128 %1
1213   //   with a mapping of {f32->f64;f64->f128;f80->f128}
1214   //     fptrunc double   %1 to float     ->  fptrunc fp128    s(%1) to double
1215   //     fptrunc x86_fp80 %1 to float     ->  fptrunc fp128    s(%1) to double
1216   //     fptrunc fp128    %1 to float     ->  fptrunc fp128    %1    to double
1217   //     fptrunc x86_fp80 %1 to double    ->  fp128 %1
1218   //     fptrunc fp128    %1 to double    ->  fp128 %1
1219   //     fptrunc fp128    %1 to x86_fp80  ->  fp128 %1
1220   //   with a mapping of {f32->f32;f64->f32;f80->f64}
1221   //     fptrunc double   %1 to float     ->  float s(%1)
1222   //     fptrunc x86_fp80 %1 to float     ->  fptrunc double    s(%1) to float
1223   //     fptrunc fp128    %1 to float     ->  fptrunc fp128     %1    to float
1224   //     fptrunc x86_fp80 %1 to double    ->  fptrunc double    s(%1) to float
1225   //     fptrunc fp128    %1 to double    ->  fptrunc fp128     %1    to float
1226   //     fptrunc fp128    %1 to x86_fp80  ->  fptrunc fp128     %1    to double
1227 
1228   // See (A) above.
1229   Value *Source = ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource;
1230   Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1231   // See (B) above.
1232   if (SourceTy == ExtendedVT)
1233     return Source;
1234 
1235   return Builder.CreateFPTrunc(Source, ExtendedVT);
1236 }
1237 
handleExt(const FPExtInst & Ext,Type * VT,Type * ExtendedVT,const ValueToShadowMap & Map,IRBuilder<> & Builder)1238 Value *NumericalStabilitySanitizer::handleExt(const FPExtInst &Ext, Type *VT,
1239                                               Type *ExtendedVT,
1240                                               const ValueToShadowMap &Map,
1241                                               IRBuilder<> &Builder) {
1242   Value *OrigSource = Ext.getOperand(0);
1243   Type *OrigSourceTy = OrigSource->getType();
1244   Type *ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy);
1245   // When extending:
1246   //  - (A) If the source has a shadow, we extend from the shadow, else we
1247   //    extend from the original source.
1248   //  - (B) If the shadow of the dest is larger than the shadow of the source,
1249   //    we still need an extend. Else, the shadow of the source is the same
1250   //    type as the shadow of the dest (because mappings are non-decreasing), so
1251   //    we don't need to emit an extend.
1252   // Examples,
1253   //   with a mapping of {f32->f64;f64->f80;f80->f128}
1254   //     fpext half    %1 to float     ->  fpext half     %1    to double
1255   //     fpext half    %1 to double    ->  fpext half     %1    to x86_fp80
1256   //     fpext half    %1 to x86_fp80  ->  fpext half     %1    to fp128
1257   //     fpext float   %1 to double    ->  double s(%1)
1258   //     fpext float   %1 to x86_fp80  ->  fpext double   s(%1) to fp128
1259   //     fpext double  %1 to x86_fp80  ->  fpext x86_fp80 s(%1) to fp128
1260   //   with a mapping of {f32->f64;f64->f128;f80->f128}
1261   //     fpext half    %1 to float     ->  fpext half     %1    to double
1262   //     fpext half    %1 to double    ->  fpext half     %1    to fp128
1263   //     fpext half    %1 to x86_fp80  ->  fpext half     %1    to fp128
1264   //     fpext float   %1 to double    ->  fpext double   s(%1) to fp128
1265   //     fpext float   %1 to x86_fp80  ->  fpext double   s(%1) to fp128
1266   //     fpext double  %1 to x86_fp80  ->  fp128 s(%1)
1267   //   with a mapping of {f32->f32;f64->f32;f80->f64}
1268   //     fpext half    %1 to float     ->  fpext half     %1    to float
1269   //     fpext half    %1 to double    ->  fpext half     %1    to float
1270   //     fpext half    %1 to x86_fp80  ->  fpext half     %1    to double
1271   //     fpext float   %1 to double    ->  s(%1)
1272   //     fpext float   %1 to x86_fp80  ->  fpext float    s(%1) to double
1273   //     fpext double  %1 to x86_fp80  ->  fpext float    s(%1) to double
1274 
1275   // See (A) above.
1276   Value *Source = ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource;
1277   Type *SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy;
1278   // See (B) above.
1279   if (SourceTy == ExtendedVT)
1280     return Source;
1281 
1282   return Builder.CreateFPExt(Source, ExtendedVT);
1283 }
1284 
1285 namespace {
1286 // TODO: This should be tablegen-ed.
1287 struct KnownIntrinsic {
1288   struct WidenedIntrinsic {
1289     const char *NarrowName;
1290     Intrinsic::ID ID; // wide id.
1291     using FnTypeFactory = FunctionType *(*)(LLVMContext &);
1292     FnTypeFactory MakeFnTy;
1293   };
1294 
1295   static const char *get(LibFunc LFunc);
1296 
1297   // Given an intrinsic with an `FT` argument, try to find a wider intrinsic
1298   // that applies the same operation on the shadow argument.
1299   // Options are:
1300   //  - pass in the ID and full function type,
1301   //  - pass in the name, which includes the function type through mangling.
1302   static const WidenedIntrinsic *widen(StringRef Name);
1303 
1304 private:
1305   struct LFEntry {
1306     LibFunc LFunc;
1307     const char *IntrinsicName;
1308   };
1309   static const LFEntry kLibfuncIntrinsics[];
1310 
1311   static const WidenedIntrinsic kWidenedIntrinsics[];
1312 };
1313 } // namespace
1314 
makeDoubleDouble(LLVMContext & C)1315 static FunctionType *makeDoubleDouble(LLVMContext &C) {
1316   return FunctionType::get(Type::getDoubleTy(C), {Type::getDoubleTy(C)}, false);
1317 }
1318 
makeX86FP80X86FP80(LLVMContext & C)1319 static FunctionType *makeX86FP80X86FP80(LLVMContext &C) {
1320   return FunctionType::get(Type::getX86_FP80Ty(C), {Type::getX86_FP80Ty(C)},
1321                            false);
1322 }
1323 
makeDoubleDoubleI32(LLVMContext & C)1324 static FunctionType *makeDoubleDoubleI32(LLVMContext &C) {
1325   return FunctionType::get(Type::getDoubleTy(C),
1326                            {Type::getDoubleTy(C), Type::getInt32Ty(C)}, false);
1327 }
1328 
makeX86FP80X86FP80I32(LLVMContext & C)1329 static FunctionType *makeX86FP80X86FP80I32(LLVMContext &C) {
1330   return FunctionType::get(Type::getX86_FP80Ty(C),
1331                            {Type::getX86_FP80Ty(C), Type::getInt32Ty(C)},
1332                            false);
1333 }
1334 
makeDoubleDoubleDouble(LLVMContext & C)1335 static FunctionType *makeDoubleDoubleDouble(LLVMContext &C) {
1336   return FunctionType::get(Type::getDoubleTy(C),
1337                            {Type::getDoubleTy(C), Type::getDoubleTy(C)}, false);
1338 }
1339 
makeX86FP80X86FP80X86FP80(LLVMContext & C)1340 static FunctionType *makeX86FP80X86FP80X86FP80(LLVMContext &C) {
1341   return FunctionType::get(Type::getX86_FP80Ty(C),
1342                            {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1343                            false);
1344 }
1345 
makeDoubleDoubleDoubleDouble(LLVMContext & C)1346 static FunctionType *makeDoubleDoubleDoubleDouble(LLVMContext &C) {
1347   return FunctionType::get(
1348       Type::getDoubleTy(C),
1349       {Type::getDoubleTy(C), Type::getDoubleTy(C), Type::getDoubleTy(C)},
1350       false);
1351 }
1352 
makeX86FP80X86FP80X86FP80X86FP80(LLVMContext & C)1353 static FunctionType *makeX86FP80X86FP80X86FP80X86FP80(LLVMContext &C) {
1354   return FunctionType::get(
1355       Type::getX86_FP80Ty(C),
1356       {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)},
1357       false);
1358 }
1359 
1360 const KnownIntrinsic::WidenedIntrinsic KnownIntrinsic::kWidenedIntrinsics[] = {
1361     // TODO: Right now we ignore vector intrinsics.
1362     // This is hard because we have to model the semantics of the intrinsics,
1363     // e.g. llvm.x86.sse2.min.sd means extract first element, min, insert back.
1364     // Intrinsics that take any non-vector FT types:
1365     // NOTE: Right now because of
1366     // https://github.com/llvm/llvm-project/issues/44744
1367     // for f128 we need to use makeX86FP80X86FP80 (go to a lower precision and
1368     // come back).
1369     {"llvm.sqrt.f32", Intrinsic::sqrt, makeDoubleDouble},
1370     {"llvm.sqrt.f64", Intrinsic::sqrt, makeX86FP80X86FP80},
1371     {"llvm.sqrt.f80", Intrinsic::sqrt, makeX86FP80X86FP80},
1372     {"llvm.powi.f32", Intrinsic::powi, makeDoubleDoubleI32},
1373     {"llvm.powi.f64", Intrinsic::powi, makeX86FP80X86FP80I32},
1374     {"llvm.powi.f80", Intrinsic::powi, makeX86FP80X86FP80I32},
1375     {"llvm.sin.f32", Intrinsic::sin, makeDoubleDouble},
1376     {"llvm.sin.f64", Intrinsic::sin, makeX86FP80X86FP80},
1377     {"llvm.sin.f80", Intrinsic::sin, makeX86FP80X86FP80},
1378     {"llvm.cos.f32", Intrinsic::cos, makeDoubleDouble},
1379     {"llvm.cos.f64", Intrinsic::cos, makeX86FP80X86FP80},
1380     {"llvm.cos.f80", Intrinsic::cos, makeX86FP80X86FP80},
1381     {"llvm.pow.f32", Intrinsic::pow, makeDoubleDoubleDouble},
1382     {"llvm.pow.f64", Intrinsic::pow, makeX86FP80X86FP80X86FP80},
1383     {"llvm.pow.f80", Intrinsic::pow, makeX86FP80X86FP80X86FP80},
1384     {"llvm.exp.f32", Intrinsic::exp, makeDoubleDouble},
1385     {"llvm.exp.f64", Intrinsic::exp, makeX86FP80X86FP80},
1386     {"llvm.exp.f80", Intrinsic::exp, makeX86FP80X86FP80},
1387     {"llvm.exp2.f32", Intrinsic::exp2, makeDoubleDouble},
1388     {"llvm.exp2.f64", Intrinsic::exp2, makeX86FP80X86FP80},
1389     {"llvm.exp2.f80", Intrinsic::exp2, makeX86FP80X86FP80},
1390     {"llvm.log.f32", Intrinsic::log, makeDoubleDouble},
1391     {"llvm.log.f64", Intrinsic::log, makeX86FP80X86FP80},
1392     {"llvm.log.f80", Intrinsic::log, makeX86FP80X86FP80},
1393     {"llvm.log10.f32", Intrinsic::log10, makeDoubleDouble},
1394     {"llvm.log10.f64", Intrinsic::log10, makeX86FP80X86FP80},
1395     {"llvm.log10.f80", Intrinsic::log10, makeX86FP80X86FP80},
1396     {"llvm.log2.f32", Intrinsic::log2, makeDoubleDouble},
1397     {"llvm.log2.f64", Intrinsic::log2, makeX86FP80X86FP80},
1398     {"llvm.log2.f80", Intrinsic::log2, makeX86FP80X86FP80},
1399     {"llvm.fma.f32", Intrinsic::fma, makeDoubleDoubleDoubleDouble},
1400 
1401     {"llvm.fmuladd.f32", Intrinsic::fmuladd, makeDoubleDoubleDoubleDouble},
1402 
1403     {"llvm.fma.f64", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1404 
1405     {"llvm.fmuladd.f64", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1406 
1407     {"llvm.fma.f80", Intrinsic::fma, makeX86FP80X86FP80X86FP80X86FP80},
1408     {"llvm.fabs.f32", Intrinsic::fabs, makeDoubleDouble},
1409     {"llvm.fabs.f64", Intrinsic::fabs, makeX86FP80X86FP80},
1410     {"llvm.fabs.f80", Intrinsic::fabs, makeX86FP80X86FP80},
1411     {"llvm.minnum.f32", Intrinsic::minnum, makeDoubleDoubleDouble},
1412     {"llvm.minnum.f64", Intrinsic::minnum, makeX86FP80X86FP80X86FP80},
1413     {"llvm.minnum.f80", Intrinsic::minnum, makeX86FP80X86FP80X86FP80},
1414     {"llvm.maxnum.f32", Intrinsic::maxnum, makeDoubleDoubleDouble},
1415     {"llvm.maxnum.f64", Intrinsic::maxnum, makeX86FP80X86FP80X86FP80},
1416     {"llvm.maxnum.f80", Intrinsic::maxnum, makeX86FP80X86FP80X86FP80},
1417     {"llvm.minimum.f32", Intrinsic::minimum, makeDoubleDoubleDouble},
1418     {"llvm.minimum.f64", Intrinsic::minimum, makeX86FP80X86FP80X86FP80},
1419     {"llvm.minimum.f80", Intrinsic::minimum, makeX86FP80X86FP80X86FP80},
1420     {"llvm.maximum.f32", Intrinsic::maximum, makeDoubleDoubleDouble},
1421     {"llvm.maximum.f64", Intrinsic::maximum, makeX86FP80X86FP80X86FP80},
1422     {"llvm.maximum.f80", Intrinsic::maximum, makeX86FP80X86FP80X86FP80},
1423     {"llvm.copysign.f32", Intrinsic::copysign, makeDoubleDoubleDouble},
1424     {"llvm.copysign.f64", Intrinsic::copysign, makeX86FP80X86FP80X86FP80},
1425     {"llvm.copysign.f80", Intrinsic::copysign, makeX86FP80X86FP80X86FP80},
1426     {"llvm.floor.f32", Intrinsic::floor, makeDoubleDouble},
1427     {"llvm.floor.f64", Intrinsic::floor, makeX86FP80X86FP80},
1428     {"llvm.floor.f80", Intrinsic::floor, makeX86FP80X86FP80},
1429     {"llvm.ceil.f32", Intrinsic::ceil, makeDoubleDouble},
1430     {"llvm.ceil.f64", Intrinsic::ceil, makeX86FP80X86FP80},
1431     {"llvm.ceil.f80", Intrinsic::ceil, makeX86FP80X86FP80},
1432     {"llvm.trunc.f32", Intrinsic::trunc, makeDoubleDouble},
1433     {"llvm.trunc.f64", Intrinsic::trunc, makeX86FP80X86FP80},
1434     {"llvm.trunc.f80", Intrinsic::trunc, makeX86FP80X86FP80},
1435     {"llvm.rint.f32", Intrinsic::rint, makeDoubleDouble},
1436     {"llvm.rint.f64", Intrinsic::rint, makeX86FP80X86FP80},
1437     {"llvm.rint.f80", Intrinsic::rint, makeX86FP80X86FP80},
1438     {"llvm.nearbyint.f32", Intrinsic::nearbyint, makeDoubleDouble},
1439     {"llvm.nearbyint.f64", Intrinsic::nearbyint, makeX86FP80X86FP80},
1440     {"llvm.nearbyin80f64", Intrinsic::nearbyint, makeX86FP80X86FP80},
1441     {"llvm.round.f32", Intrinsic::round, makeDoubleDouble},
1442     {"llvm.round.f64", Intrinsic::round, makeX86FP80X86FP80},
1443     {"llvm.round.f80", Intrinsic::round, makeX86FP80X86FP80},
1444     {"llvm.lround.f32", Intrinsic::lround, makeDoubleDouble},
1445     {"llvm.lround.f64", Intrinsic::lround, makeX86FP80X86FP80},
1446     {"llvm.lround.f80", Intrinsic::lround, makeX86FP80X86FP80},
1447     {"llvm.llround.f32", Intrinsic::llround, makeDoubleDouble},
1448     {"llvm.llround.f64", Intrinsic::llround, makeX86FP80X86FP80},
1449     {"llvm.llround.f80", Intrinsic::llround, makeX86FP80X86FP80},
1450     {"llvm.lrint.f32", Intrinsic::lrint, makeDoubleDouble},
1451     {"llvm.lrint.f64", Intrinsic::lrint, makeX86FP80X86FP80},
1452     {"llvm.lrint.f80", Intrinsic::lrint, makeX86FP80X86FP80},
1453     {"llvm.llrint.f32", Intrinsic::llrint, makeDoubleDouble},
1454     {"llvm.llrint.f64", Intrinsic::llrint, makeX86FP80X86FP80},
1455     {"llvm.llrint.f80", Intrinsic::llrint, makeX86FP80X86FP80},
1456 };
1457 
1458 const KnownIntrinsic::LFEntry KnownIntrinsic::kLibfuncIntrinsics[] = {
1459     {LibFunc_sqrtf, "llvm.sqrt.f32"},
1460     {LibFunc_sqrt, "llvm.sqrt.f64"},
1461     {LibFunc_sqrtl, "llvm.sqrt.f80"},
1462     {LibFunc_sinf, "llvm.sin.f32"},
1463     {LibFunc_sin, "llvm.sin.f64"},
1464     {LibFunc_sinl, "llvm.sin.f80"},
1465     {LibFunc_cosf, "llvm.cos.f32"},
1466     {LibFunc_cos, "llvm.cos.f64"},
1467     {LibFunc_cosl, "llvm.cos.f80"},
1468     {LibFunc_powf, "llvm.pow.f32"},
1469     {LibFunc_pow, "llvm.pow.f64"},
1470     {LibFunc_powl, "llvm.pow.f80"},
1471     {LibFunc_expf, "llvm.exp.f32"},
1472     {LibFunc_exp, "llvm.exp.f64"},
1473     {LibFunc_expl, "llvm.exp.f80"},
1474     {LibFunc_exp2f, "llvm.exp2.f32"},
1475     {LibFunc_exp2, "llvm.exp2.f64"},
1476     {LibFunc_exp2l, "llvm.exp2.f80"},
1477     {LibFunc_logf, "llvm.log.f32"},
1478     {LibFunc_log, "llvm.log.f64"},
1479     {LibFunc_logl, "llvm.log.f80"},
1480     {LibFunc_log10f, "llvm.log10.f32"},
1481     {LibFunc_log10, "llvm.log10.f64"},
1482     {LibFunc_log10l, "llvm.log10.f80"},
1483     {LibFunc_log2f, "llvm.log2.f32"},
1484     {LibFunc_log2, "llvm.log2.f64"},
1485     {LibFunc_log2l, "llvm.log2.f80"},
1486     {LibFunc_fabsf, "llvm.fabs.f32"},
1487     {LibFunc_fabs, "llvm.fabs.f64"},
1488     {LibFunc_fabsl, "llvm.fabs.f80"},
1489     {LibFunc_copysignf, "llvm.copysign.f32"},
1490     {LibFunc_copysign, "llvm.copysign.f64"},
1491     {LibFunc_copysignl, "llvm.copysign.f80"},
1492     {LibFunc_floorf, "llvm.floor.f32"},
1493     {LibFunc_floor, "llvm.floor.f64"},
1494     {LibFunc_floorl, "llvm.floor.f80"},
1495     {LibFunc_fmaxf, "llvm.maxnum.f32"},
1496     {LibFunc_fmax, "llvm.maxnum.f64"},
1497     {LibFunc_fmaxl, "llvm.maxnum.f80"},
1498     {LibFunc_fminf, "llvm.minnum.f32"},
1499     {LibFunc_fmin, "llvm.minnum.f64"},
1500     {LibFunc_fminl, "llvm.minnum.f80"},
1501     {LibFunc_ceilf, "llvm.ceil.f32"},
1502     {LibFunc_ceil, "llvm.ceil.f64"},
1503     {LibFunc_ceill, "llvm.ceil.f80"},
1504     {LibFunc_truncf, "llvm.trunc.f32"},
1505     {LibFunc_trunc, "llvm.trunc.f64"},
1506     {LibFunc_truncl, "llvm.trunc.f80"},
1507     {LibFunc_rintf, "llvm.rint.f32"},
1508     {LibFunc_rint, "llvm.rint.f64"},
1509     {LibFunc_rintl, "llvm.rint.f80"},
1510     {LibFunc_nearbyintf, "llvm.nearbyint.f32"},
1511     {LibFunc_nearbyint, "llvm.nearbyint.f64"},
1512     {LibFunc_nearbyintl, "llvm.nearbyint.f80"},
1513     {LibFunc_roundf, "llvm.round.f32"},
1514     {LibFunc_round, "llvm.round.f64"},
1515     {LibFunc_roundl, "llvm.round.f80"},
1516 };
1517 
get(LibFunc LFunc)1518 const char *KnownIntrinsic::get(LibFunc LFunc) {
1519   for (const auto &E : kLibfuncIntrinsics) {
1520     if (E.LFunc == LFunc)
1521       return E.IntrinsicName;
1522   }
1523   return nullptr;
1524 }
1525 
widen(StringRef Name)1526 const KnownIntrinsic::WidenedIntrinsic *KnownIntrinsic::widen(StringRef Name) {
1527   for (const auto &E : kWidenedIntrinsics) {
1528     if (E.NarrowName == Name)
1529       return &E;
1530   }
1531   return nullptr;
1532 }
1533 
1534 // Returns the name of the LLVM intrinsic corresponding to the given function.
getIntrinsicFromLibfunc(Function & Fn,Type * VT,const TargetLibraryInfo & TLI)1535 static const char *getIntrinsicFromLibfunc(Function &Fn, Type *VT,
1536                                            const TargetLibraryInfo &TLI) {
1537   LibFunc LFunc;
1538   if (!TLI.getLibFunc(Fn, LFunc))
1539     return nullptr;
1540 
1541   if (const char *Name = KnownIntrinsic::get(LFunc))
1542     return Name;
1543 
1544   LLVM_DEBUG(errs() << "TODO: LibFunc: " << TLI.getName(LFunc) << "\n");
1545   return nullptr;
1546 }
1547 
1548 // Try to handle a known function call.
maybeHandleKnownCallBase(CallBase & Call,Type * VT,Type * ExtendedVT,const TargetLibraryInfo & TLI,const ValueToShadowMap & Map,IRBuilder<> & Builder)1549 Value *NumericalStabilitySanitizer::maybeHandleKnownCallBase(
1550     CallBase &Call, Type *VT, Type *ExtendedVT, const TargetLibraryInfo &TLI,
1551     const ValueToShadowMap &Map, IRBuilder<> &Builder) {
1552   Function *Fn = Call.getCalledFunction();
1553   if (Fn == nullptr)
1554     return nullptr;
1555 
1556   Intrinsic::ID WidenedId = Intrinsic::ID();
1557   FunctionType *WidenedFnTy = nullptr;
1558   if (const auto ID = Fn->getIntrinsicID()) {
1559     const auto *Widened = KnownIntrinsic::widen(Fn->getName());
1560     if (Widened) {
1561       WidenedId = Widened->ID;
1562       WidenedFnTy = Widened->MakeFnTy(Context);
1563     } else {
1564       // If we don't know how to widen the intrinsic, we have no choice but to
1565       // call the non-wide version on a truncated shadow and extend again
1566       // afterwards.
1567       WidenedId = ID;
1568       WidenedFnTy = Fn->getFunctionType();
1569     }
1570   } else if (const char *Name = getIntrinsicFromLibfunc(*Fn, VT, TLI)) {
1571     // We might have a call to a library function that we can replace with a
1572     // wider Intrinsic.
1573     const auto *Widened = KnownIntrinsic::widen(Name);
1574     assert(Widened && "make sure KnownIntrinsic entries are consistent");
1575     WidenedId = Widened->ID;
1576     WidenedFnTy = Widened->MakeFnTy(Context);
1577   } else {
1578     // This is not a known library function or intrinsic.
1579     return nullptr;
1580   }
1581 
1582   // Check that the widened intrinsic is valid.
1583   SmallVector<Intrinsic::IITDescriptor, 8> Table;
1584   getIntrinsicInfoTableEntries(WidenedId, Table);
1585   SmallVector<Type *, 4> ArgTys;
1586   ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
1587   [[maybe_unused]] Intrinsic::MatchIntrinsicTypesResult MatchResult =
1588       Intrinsic::matchIntrinsicSignature(WidenedFnTy, TableRef, ArgTys);
1589   assert(MatchResult == Intrinsic::MatchIntrinsicTypes_Match &&
1590          "invalid widened intrinsic");
1591   // For known intrinsic functions, we create a second call to the same
1592   // intrinsic with a different type.
1593   SmallVector<Value *, 4> Args;
1594   // The last operand is the intrinsic itself, skip it.
1595   for (unsigned I = 0, E = Call.getNumOperands() - 1; I < E; ++I) {
1596     Value *Arg = Call.getOperand(I);
1597     Type *OrigArgTy = Arg->getType();
1598     Type *IntrinsicArgTy = WidenedFnTy->getParamType(I);
1599     if (OrigArgTy == IntrinsicArgTy) {
1600       Args.push_back(Arg); // The arg is passed as is.
1601       continue;
1602     }
1603     Type *ShadowArgTy = Config.getExtendedFPType(Arg->getType());
1604     assert(ShadowArgTy &&
1605            "don't know how to get the shadow value for a non-FT");
1606     Value *Shadow = Map.getShadow(Arg);
1607     if (ShadowArgTy == IntrinsicArgTy) {
1608       // The shadow is the right type for the intrinsic.
1609       assert(Shadow->getType() == ShadowArgTy);
1610       Args.push_back(Shadow);
1611       continue;
1612     }
1613     // There is no intrinsic with his level of precision, truncate the shadow.
1614     Args.push_back(Builder.CreateFPTrunc(Shadow, IntrinsicArgTy));
1615   }
1616   Value *IntrinsicCall = Builder.CreateIntrinsic(WidenedId, ArgTys, Args);
1617   return WidenedFnTy->getReturnType() == ExtendedVT
1618              ? IntrinsicCall
1619              : Builder.CreateFPExt(IntrinsicCall, ExtendedVT);
1620 }
1621 
1622 // Handle a CallBase, i.e. a function call, an inline asm sequence, or an
1623 // invoke.
handleCallBase(CallBase & Call,Type * VT,Type * ExtendedVT,const TargetLibraryInfo & TLI,const ValueToShadowMap & Map,IRBuilder<> & Builder)1624 Value *NumericalStabilitySanitizer::handleCallBase(CallBase &Call, Type *VT,
1625                                                    Type *ExtendedVT,
1626                                                    const TargetLibraryInfo &TLI,
1627                                                    const ValueToShadowMap &Map,
1628                                                    IRBuilder<> &Builder) {
1629   // We cannot look inside inline asm, just expand the result again.
1630   if (Call.isInlineAsm())
1631     return Builder.CreateFPExt(&Call, ExtendedVT);
1632 
1633   // Intrinsics and library functions (e.g. sin, exp) are handled
1634   // specifically, because we know their semantics and can do better than
1635   // blindly calling them (e.g. compute the sinus in the actual shadow domain).
1636   if (Value *V =
1637           maybeHandleKnownCallBase(Call, VT, ExtendedVT, TLI, Map, Builder))
1638     return V;
1639 
1640   // If the return tag matches that of the called function, read the extended
1641   // return value from the shadow ret ptr. Else, just extend the return value.
1642   Value *L =
1643       Builder.CreateLoad(IntptrTy, NsanShadowRetTag, /*isVolatile=*/false);
1644   Value *HasShadowRet = Builder.CreateICmpEQ(
1645       L, Builder.CreatePtrToInt(Call.getCalledOperand(), IntptrTy));
1646 
1647   Value *ShadowRetVal = Builder.CreateLoad(
1648       ExtendedVT,
1649       Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0),
1650       /*isVolatile=*/false);
1651   Value *Shadow = Builder.CreateSelect(HasShadowRet, ShadowRetVal,
1652                                        Builder.CreateFPExt(&Call, ExtendedVT));
1653   ++NumInstrumentedFTCalls;
1654   return Shadow;
1655 }
1656 
1657 // Creates a shadow value for the given FT value. At that point all operands are
1658 // guaranteed to be available.
createShadowValueWithOperandsAvailable(Instruction & Inst,const TargetLibraryInfo & TLI,const ValueToShadowMap & Map)1659 Value *NumericalStabilitySanitizer::createShadowValueWithOperandsAvailable(
1660     Instruction &Inst, const TargetLibraryInfo &TLI,
1661     const ValueToShadowMap &Map) {
1662   Type *VT = Inst.getType();
1663   Type *ExtendedVT = Config.getExtendedFPType(VT);
1664   assert(ExtendedVT != nullptr && "trying to create a shadow for a non-FT");
1665 
1666   if (auto *Load = dyn_cast<LoadInst>(&Inst))
1667     return handleLoad(*Load, VT, ExtendedVT);
1668 
1669   if (auto *Call = dyn_cast<CallInst>(&Inst)) {
1670     // Insert after the call.
1671     BasicBlock::iterator It(Inst);
1672     IRBuilder<> Builder(Call->getParent(), ++It);
1673     Builder.SetCurrentDebugLocation(Call->getDebugLoc());
1674     return handleCallBase(*Call, VT, ExtendedVT, TLI, Map, Builder);
1675   }
1676 
1677   if (auto *Invoke = dyn_cast<InvokeInst>(&Inst)) {
1678     // The Invoke terminates the basic block, create a new basic block in
1679     // between the successful invoke and the next block.
1680     BasicBlock *InvokeBB = Invoke->getParent();
1681     BasicBlock *NextBB = Invoke->getNormalDest();
1682     BasicBlock *NewBB =
1683         BasicBlock::Create(Context, "", NextBB->getParent(), NextBB);
1684     Inst.replaceSuccessorWith(NextBB, NewBB);
1685 
1686     IRBuilder<> Builder(NewBB);
1687     Builder.SetCurrentDebugLocation(Invoke->getDebugLoc());
1688     Value *Shadow = handleCallBase(*Invoke, VT, ExtendedVT, TLI, Map, Builder);
1689     Builder.CreateBr(NextBB);
1690     NewBB->replaceSuccessorsPhiUsesWith(InvokeBB, NewBB);
1691     return Shadow;
1692   }
1693 
1694   IRBuilder<> Builder(Inst.getNextNode());
1695   Builder.SetCurrentDebugLocation(Inst.getDebugLoc());
1696 
1697   if (auto *Trunc = dyn_cast<FPTruncInst>(&Inst))
1698     return handleTrunc(*Trunc, VT, ExtendedVT, Map, Builder);
1699   if (auto *Ext = dyn_cast<FPExtInst>(&Inst))
1700     return handleExt(*Ext, VT, ExtendedVT, Map, Builder);
1701 
1702   if (auto *UnaryOp = dyn_cast<UnaryOperator>(&Inst))
1703     return Builder.CreateUnOp(UnaryOp->getOpcode(),
1704                               Map.getShadow(UnaryOp->getOperand(0)));
1705 
1706   if (auto *BinOp = dyn_cast<BinaryOperator>(&Inst))
1707     return Builder.CreateBinOp(BinOp->getOpcode(),
1708                                Map.getShadow(BinOp->getOperand(0)),
1709                                Map.getShadow(BinOp->getOperand(1)));
1710 
1711   if (isa<UIToFPInst>(&Inst) || isa<SIToFPInst>(&Inst)) {
1712     auto *Cast = cast<CastInst>(&Inst);
1713     return Builder.CreateCast(Cast->getOpcode(), Cast->getOperand(0),
1714                               ExtendedVT);
1715   }
1716 
1717   if (auto *S = dyn_cast<SelectInst>(&Inst))
1718     return Builder.CreateSelect(S->getCondition(),
1719                                 Map.getShadow(S->getTrueValue()),
1720                                 Map.getShadow(S->getFalseValue()));
1721 
1722   if (auto *Freeze = dyn_cast<FreezeInst>(&Inst))
1723     return Builder.CreateFreeze(Map.getShadow(Freeze->getOperand(0)));
1724 
1725   if (auto *Extract = dyn_cast<ExtractElementInst>(&Inst))
1726     return Builder.CreateExtractElement(
1727         Map.getShadow(Extract->getVectorOperand()), Extract->getIndexOperand());
1728 
1729   if (auto *Insert = dyn_cast<InsertElementInst>(&Inst))
1730     return Builder.CreateInsertElement(Map.getShadow(Insert->getOperand(0)),
1731                                        Map.getShadow(Insert->getOperand(1)),
1732                                        Insert->getOperand(2));
1733 
1734   if (auto *Shuffle = dyn_cast<ShuffleVectorInst>(&Inst))
1735     return Builder.CreateShuffleVector(Map.getShadow(Shuffle->getOperand(0)),
1736                                        Map.getShadow(Shuffle->getOperand(1)),
1737                                        Shuffle->getShuffleMask());
1738   // TODO: We could make aggregate object first class citizens. For now we
1739   // just extend the extracted value.
1740   if (auto *Extract = dyn_cast<ExtractValueInst>(&Inst))
1741     return Builder.CreateFPExt(Extract, ExtendedVT);
1742 
1743   if (auto *BC = dyn_cast<BitCastInst>(&Inst))
1744     return Builder.CreateFPExt(BC, ExtendedVT);
1745 
1746   report_fatal_error("Unimplemented support for " +
1747                      Twine(Inst.getOpcodeName()));
1748 }
1749 
1750 // Creates a shadow value for an instruction that defines a value of FT type.
1751 // FT operands that do not already have shadow values are created recursively.
1752 // The DFS is guaranteed to not loop as phis and arguments already have
1753 // shadows.
maybeCreateShadowValue(Instruction & Root,const TargetLibraryInfo & TLI,ValueToShadowMap & Map)1754 void NumericalStabilitySanitizer::maybeCreateShadowValue(
1755     Instruction &Root, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) {
1756   Type *VT = Root.getType();
1757   Type *ExtendedVT = Config.getExtendedFPType(VT);
1758   if (ExtendedVT == nullptr)
1759     return; // Not an FT value.
1760 
1761   if (Map.hasShadow(&Root))
1762     return; // Shadow already exists.
1763 
1764   assert(!isa<PHINode>(Root) && "phi nodes should already have shadows");
1765 
1766   std::vector<Instruction *> DfsStack(1, &Root);
1767   while (!DfsStack.empty()) {
1768     // Ensure that all operands to the instruction have shadows before
1769     // proceeding.
1770     Instruction *I = DfsStack.back();
1771     // The shadow for the instruction might have been created deeper in the DFS,
1772     // see `forward_use_with_two_uses` test.
1773     if (Map.hasShadow(I)) {
1774       DfsStack.pop_back();
1775       continue;
1776     }
1777 
1778     bool MissingShadow = false;
1779     for (Value *Op : I->operands()) {
1780       Type *VT = Op->getType();
1781       if (!Config.getExtendedFPType(VT))
1782         continue; // Not an FT value.
1783       if (Map.hasShadow(Op))
1784         continue; // Shadow is already available.
1785       MissingShadow = true;
1786       DfsStack.push_back(cast<Instruction>(Op));
1787     }
1788     if (MissingShadow)
1789       continue; // Process operands and come back to this instruction later.
1790 
1791     // All operands have shadows. Create a shadow for the current value.
1792     Value *Shadow = createShadowValueWithOperandsAvailable(*I, TLI, Map);
1793     Map.setShadow(*I, *Shadow);
1794     DfsStack.pop_back();
1795   }
1796 }
1797 
1798 // A floating-point store needs its value and type written to shadow memory.
propagateFTStore(StoreInst & Store,Type * VT,Type * ExtendedVT,const ValueToShadowMap & Map)1799 void NumericalStabilitySanitizer::propagateFTStore(
1800     StoreInst &Store, Type *VT, Type *ExtendedVT, const ValueToShadowMap &Map) {
1801   Value *StoredValue = Store.getValueOperand();
1802   IRBuilder<> Builder(&Store);
1803   Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1804   const auto Extents = getMemoryExtentsOrDie(VT);
1805   Value *ShadowPtr = Builder.CreateCall(
1806       NsanGetShadowPtrForStore[Extents.ValueType],
1807       {Store.getPointerOperand(), ConstantInt::get(IntptrTy, Extents.NumElts)});
1808 
1809   Value *StoredShadow = Map.getShadow(StoredValue);
1810   if (!Store.getParent()->getParent()->hasOptNone()) {
1811     // Only check stores when optimizing, because non-optimized code generates
1812     // too many stores to the stack, creating false positives.
1813     if (ClCheckStores) {
1814       StoredShadow = emitCheck(StoredValue, StoredShadow, Builder,
1815                                CheckLoc::makeStore(Store.getPointerOperand()));
1816       ++NumInstrumentedFTStores;
1817     }
1818   }
1819 
1820   Builder.CreateAlignedStore(StoredShadow, ShadowPtr, Align(1),
1821                              Store.isVolatile());
1822 }
1823 
1824 // A non-ft store needs to invalidate shadow memory. Exceptions are:
1825 //   - memory transfers of floating-point data through other pointer types (llvm
1826 //     optimization passes transform `*(float*)a = *(float*)b` into
1827 //     `*(i32*)a = *(i32*)b` ). These have the same semantics as memcpy.
1828 //   - Writes of FT-sized constants. LLVM likes to do float stores as bitcasted
1829 //     ints. Note that this is not really necessary because if the value is
1830 //     unknown the framework will re-extend it on load anyway. It just felt
1831 //     easier to debug tests with vectors of FTs.
propagateNonFTStore(StoreInst & Store,Type * VT,const ValueToShadowMap & Map)1832 void NumericalStabilitySanitizer::propagateNonFTStore(
1833     StoreInst &Store, Type *VT, const ValueToShadowMap &Map) {
1834   Value *PtrOp = Store.getPointerOperand();
1835   IRBuilder<> Builder(Store.getNextNode());
1836   Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1837   Value *Dst = PtrOp;
1838   TypeSize SlotSize = DL.getTypeStoreSize(VT);
1839   assert(!SlotSize.isScalable() && "unsupported");
1840   const auto LoadSizeBytes = SlotSize.getFixedValue();
1841   Value *ValueSize = Constant::getIntegerValue(
1842       IntptrTy, APInt(IntptrTy->getPrimitiveSizeInBits(), LoadSizeBytes));
1843 
1844   ++NumInstrumentedNonFTStores;
1845   Value *StoredValue = Store.getValueOperand();
1846   if (LoadInst *Load = dyn_cast<LoadInst>(StoredValue)) {
1847     // TODO: Handle the case when the value is from a phi.
1848     // This is a memory transfer with memcpy semantics. Copy the type and
1849     // value from the source. Note that we cannot use __nsan_copy_values()
1850     // here, because that will not work when there is a write to memory in
1851     // between the load and the store, e.g. in the case of a swap.
1852     Type *ShadowTypeIntTy = Type::getIntNTy(Context, 8 * LoadSizeBytes);
1853     Type *ShadowValueIntTy =
1854         Type::getIntNTy(Context, 8 * kShadowScale * LoadSizeBytes);
1855     IRBuilder<> LoadBuilder(Load->getNextNode());
1856     Builder.SetCurrentDebugLocation(Store.getDebugLoc());
1857     Value *LoadSrc = Load->getPointerOperand();
1858     // Read the shadow type and value at load time. The type has the same size
1859     // as the FT value, the value has twice its size.
1860     // TODO: cache them to avoid re-creating them when a load is used by
1861     // several stores. Maybe create them like the FT shadows when a load is
1862     // encountered.
1863     Value *RawShadowType = LoadBuilder.CreateAlignedLoad(
1864         ShadowTypeIntTy,
1865         LoadBuilder.CreateCall(NsanGetRawShadowTypePtr, {LoadSrc}), Align(1),
1866         /*isVolatile=*/false);
1867     Value *RawShadowValue = LoadBuilder.CreateAlignedLoad(
1868         ShadowValueIntTy,
1869         LoadBuilder.CreateCall(NsanGetRawShadowPtr, {LoadSrc}), Align(1),
1870         /*isVolatile=*/false);
1871 
1872     // Write back the shadow type and value at store time.
1873     Builder.CreateAlignedStore(
1874         RawShadowType, Builder.CreateCall(NsanGetRawShadowTypePtr, {Dst}),
1875         Align(1),
1876         /*isVolatile=*/false);
1877     Builder.CreateAlignedStore(RawShadowValue,
1878                                Builder.CreateCall(NsanGetRawShadowPtr, {Dst}),
1879                                Align(1),
1880                                /*isVolatile=*/false);
1881 
1882     ++NumInstrumentedNonFTMemcpyStores;
1883     return;
1884   }
1885   // ClPropagateNonFTConstStoresAsFT is by default false.
1886   if (Constant *C; ClPropagateNonFTConstStoresAsFT &&
1887                    (C = dyn_cast<Constant>(StoredValue))) {
1888     // This might be a fp constant stored as an int. Bitcast and store if it has
1889     // appropriate size.
1890     Type *BitcastTy = nullptr; // The FT type to bitcast to.
1891     if (auto *CInt = dyn_cast<ConstantInt>(C)) {
1892       switch (CInt->getType()->getScalarSizeInBits()) {
1893       case 32:
1894         BitcastTy = Type::getFloatTy(Context);
1895         break;
1896       case 64:
1897         BitcastTy = Type::getDoubleTy(Context);
1898         break;
1899       case 80:
1900         BitcastTy = Type::getX86_FP80Ty(Context);
1901         break;
1902       default:
1903         break;
1904       }
1905     } else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
1906       const int NumElements =
1907           cast<VectorType>(CDV->getType())->getElementCount().getFixedValue();
1908       switch (CDV->getType()->getScalarSizeInBits()) {
1909       case 32:
1910         BitcastTy =
1911             VectorType::get(Type::getFloatTy(Context), NumElements, false);
1912         break;
1913       case 64:
1914         BitcastTy =
1915             VectorType::get(Type::getDoubleTy(Context), NumElements, false);
1916         break;
1917       case 80:
1918         BitcastTy =
1919             VectorType::get(Type::getX86_FP80Ty(Context), NumElements, false);
1920         break;
1921       default:
1922         break;
1923       }
1924     }
1925     if (BitcastTy) {
1926       const MemoryExtents Extents = getMemoryExtentsOrDie(BitcastTy);
1927       Value *ShadowPtr = Builder.CreateCall(
1928           NsanGetShadowPtrForStore[Extents.ValueType],
1929           {PtrOp, ConstantInt::get(IntptrTy, Extents.NumElts)});
1930       // Bitcast the integer value to the appropriate FT type and extend to 2FT.
1931       Type *ExtVT = Config.getExtendedFPType(BitcastTy);
1932       Value *Shadow =
1933           Builder.CreateFPExt(Builder.CreateBitCast(C, BitcastTy), ExtVT);
1934       Builder.CreateAlignedStore(Shadow, ShadowPtr, Align(1),
1935                                  Store.isVolatile());
1936       return;
1937     }
1938   }
1939   // All other stores just reset the shadow value to unknown.
1940   Builder.CreateCall(NsanSetUnknownFns.getFallback(), {Dst, ValueSize});
1941 }
1942 
propagateShadowValues(Instruction & Inst,const TargetLibraryInfo & TLI,const ValueToShadowMap & Map)1943 void NumericalStabilitySanitizer::propagateShadowValues(
1944     Instruction &Inst, const TargetLibraryInfo &TLI,
1945     const ValueToShadowMap &Map) {
1946   if (auto *Store = dyn_cast<StoreInst>(&Inst)) {
1947     Value *StoredValue = Store->getValueOperand();
1948     Type *VT = StoredValue->getType();
1949     Type *ExtendedVT = Config.getExtendedFPType(VT);
1950     if (ExtendedVT == nullptr)
1951       return propagateNonFTStore(*Store, VT, Map);
1952     return propagateFTStore(*Store, VT, ExtendedVT, Map);
1953   }
1954 
1955   if (auto *FCmp = dyn_cast<FCmpInst>(&Inst)) {
1956     emitFCmpCheck(*FCmp, Map);
1957     return;
1958   }
1959 
1960   if (auto *CB = dyn_cast<CallBase>(&Inst)) {
1961     maybeAddSuffixForNsanInterface(CB);
1962     if (CallInst *CI = dyn_cast<CallInst>(&Inst))
1963       maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
1964     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst)) {
1965       instrumentMemIntrinsic(MI);
1966       return;
1967     }
1968     populateShadowStack(*CB, TLI, Map);
1969     return;
1970   }
1971 
1972   if (auto *RetInst = dyn_cast<ReturnInst>(&Inst)) {
1973     if (!ClCheckRet)
1974       return;
1975 
1976     Value *RV = RetInst->getReturnValue();
1977     if (RV == nullptr)
1978       return; // This is a `ret void`.
1979     Type *VT = RV->getType();
1980     Type *ExtendedVT = Config.getExtendedFPType(VT);
1981     if (ExtendedVT == nullptr)
1982       return; // Not an FT ret.
1983     Value *RVShadow = Map.getShadow(RV);
1984     IRBuilder<> Builder(RetInst);
1985 
1986     RVShadow = emitCheck(RV, RVShadow, Builder, CheckLoc::makeRet());
1987     ++NumInstrumentedFTRets;
1988     // Store tag.
1989     Value *FnAddr =
1990         Builder.CreatePtrToInt(Inst.getParent()->getParent(), IntptrTy);
1991     Builder.CreateStore(FnAddr, NsanShadowRetTag);
1992     // Store value.
1993     Value *ShadowRetValPtr =
1994         Builder.CreateConstGEP2_64(NsanShadowRetType, NsanShadowRetPtr, 0, 0);
1995     Builder.CreateStore(RVShadow, ShadowRetValPtr);
1996     return;
1997   }
1998 
1999   if (InsertValueInst *Insert = dyn_cast<InsertValueInst>(&Inst)) {
2000     Value *V = Insert->getOperand(1);
2001     Type *VT = V->getType();
2002     Type *ExtendedVT = Config.getExtendedFPType(VT);
2003     if (ExtendedVT == nullptr)
2004       return;
2005     IRBuilder<> Builder(Insert);
2006     emitCheck(V, Map.getShadow(V), Builder, CheckLoc::makeInsert());
2007     return;
2008   }
2009 }
2010 
2011 // Moves fast math flags from the function to individual instructions, and
2012 // removes the attribute from the function.
2013 // TODO: Make this controllable with a flag.
moveFastMathFlags(Function & F,std::vector<Instruction * > & Instructions)2014 static void moveFastMathFlags(Function &F,
2015                               std::vector<Instruction *> &Instructions) {
2016   FastMathFlags FMF;
2017 #define MOVE_FLAG(attr, setter)                                                \
2018   if (F.getFnAttribute(attr).getValueAsString() == "true") {                   \
2019     F.removeFnAttr(attr);                                                      \
2020     FMF.set##setter();                                                         \
2021   }
2022   MOVE_FLAG("unsafe-fp-math", Fast)
2023   MOVE_FLAG("no-infs-fp-math", NoInfs)
2024   MOVE_FLAG("no-nans-fp-math", NoNaNs)
2025   MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros)
2026 #undef MOVE_FLAG
2027 
2028   for (Instruction *I : Instructions)
2029     if (isa<FPMathOperator>(I))
2030       I->setFastMathFlags(FMF);
2031 }
2032 
sanitizeFunction(Function & F,const TargetLibraryInfo & TLI)2033 bool NumericalStabilitySanitizer::sanitizeFunction(
2034     Function &F, const TargetLibraryInfo &TLI) {
2035   if (!F.hasFnAttribute(Attribute::SanitizeNumericalStability) ||
2036       F.isDeclaration())
2037     return false;
2038 
2039   // This is required to prevent instrumenting call to __nsan_init from within
2040   // the module constructor.
2041   if (F.getName() == kNsanModuleCtorName)
2042     return false;
2043 
2044   // The instrumentation maintains:
2045   //  - for each IR value `v` of floating-point (or vector floating-point) type
2046   //    FT, a shadow IR value `s(v)` with twice the precision 2FT (e.g.
2047   //    double for float and f128 for double).
2048   //  - A shadow memory, which stores `s(v)` for any `v` that has been stored,
2049   //    along with a shadow memory tag, which stores whether the value in the
2050   //    corresponding shadow memory is valid. Note that this might be
2051   //    incorrect if a non-instrumented function stores to memory, or if
2052   //    memory is stored to through a char pointer.
2053   //  - A shadow stack, which holds `s(v)` for any floating-point argument `v`
2054   //    of a call to an instrumented function. This allows
2055   //    instrumented functions to retrieve the shadow values for their
2056   //    arguments.
2057   //    Because instrumented functions can be called from non-instrumented
2058   //    functions, the stack needs to include a tag so that the instrumented
2059   //    function knows whether shadow values are available for their
2060   //    parameters (i.e. whether is was called by an instrumented function).
2061   //    When shadow arguments are not available, they have to be recreated by
2062   //    extending the precision of the non-shadow arguments to the non-shadow
2063   //    value. Non-instrumented functions do not modify (or even know about) the
2064   //    shadow stack. The shadow stack pointer is __nsan_shadow_args. The shadow
2065   //    stack tag is __nsan_shadow_args_tag. The tag is any unique identifier
2066   //    for the function (we use the address of the function). Both variables
2067   //    are thread local.
2068   //    Example:
2069   //     calls                             shadow stack tag      shadow stack
2070   //     =======================================================================
2071   //     non_instrumented_1()              0                     0
2072   //             |
2073   //             v
2074   //     instrumented_2(float a)           0                     0
2075   //             |
2076   //             v
2077   //     instrumented_3(float b, double c) &instrumented_3       s(b),s(c)
2078   //             |
2079   //             v
2080   //     instrumented_4(float d)           &instrumented_4       s(d)
2081   //             |
2082   //             v
2083   //     non_instrumented_5(float e)       &non_instrumented_5   s(e)
2084   //             |
2085   //             v
2086   //     instrumented_6(float f)           &non_instrumented_5   s(e)
2087   //
2088   //   On entry, instrumented_2 checks whether the tag corresponds to its
2089   //   function ptr.
2090   //   Note that functions reset the tag to 0 after reading shadow parameters.
2091   //   This ensures that the function does not erroneously read invalid data if
2092   //   called twice in the same stack, once from an instrumented function and
2093   //   once from an uninstrumented one. For example, in the following example,
2094   //   resetting the tag in (A) ensures that (B) does not reuse the same the
2095   //   shadow arguments (which would be incorrect).
2096   //      instrumented_1(float a)
2097   //             |
2098   //             v
2099   //      instrumented_2(float b)  (A)
2100   //             |
2101   //             v
2102   //      non_instrumented_3()
2103   //             |
2104   //             v
2105   //      instrumented_2(float b)  (B)
2106   //
2107   //  - A shadow return slot. Any function that returns a floating-point value
2108   //    places a shadow return value in __nsan_shadow_ret_val. Again, because
2109   //    we might be calling non-instrumented functions, this value is guarded
2110   //    by __nsan_shadow_ret_tag marker indicating which instrumented function
2111   //    placed the value in __nsan_shadow_ret_val, so that the caller can check
2112   //    that this corresponds to the callee. Both variables are thread local.
2113   //
2114   //    For example, in the following example, the instrumentation in
2115   //    `instrumented_1` rejects the shadow return value from `instrumented_3`
2116   //    because is is not tagged as expected (`&instrumented_3` instead of
2117   //    `non_instrumented_2`):
2118   //
2119   //        instrumented_1()
2120   //            |
2121   //            v
2122   //        float non_instrumented_2()
2123   //            |
2124   //            v
2125   //        float instrumented_3()
2126   //
2127   // Calls of known math functions (sin, cos, exp, ...) are duplicated to call
2128   // their overload on the shadow type.
2129 
2130   // Collect all instructions before processing, as creating shadow values
2131   // creates new instructions inside the function.
2132   std::vector<Instruction *> OriginalInstructions;
2133   for (BasicBlock &BB : F)
2134     for (Instruction &Inst : BB)
2135       OriginalInstructions.emplace_back(&Inst);
2136 
2137   moveFastMathFlags(F, OriginalInstructions);
2138   ValueToShadowMap ValueToShadow(Config);
2139 
2140   // In the first pass, we create shadow values for all FT function arguments
2141   // and all phis. This ensures that the DFS of the next pass does not have
2142   // any loops.
2143   std::vector<PHINode *> OriginalPhis;
2144   createShadowArguments(F, TLI, ValueToShadow);
2145   for (Instruction *I : OriginalInstructions) {
2146     if (PHINode *Phi = dyn_cast<PHINode>(I)) {
2147       if (PHINode *Shadow = maybeCreateShadowPhi(*Phi, TLI)) {
2148         OriginalPhis.push_back(Phi);
2149         ValueToShadow.setShadow(*Phi, *Shadow);
2150       }
2151     }
2152   }
2153 
2154   // Create shadow values for all instructions creating FT values.
2155   for (Instruction *I : OriginalInstructions)
2156     maybeCreateShadowValue(*I, TLI, ValueToShadow);
2157 
2158   // Propagate shadow values across stores, calls and rets.
2159   for (Instruction *I : OriginalInstructions)
2160     propagateShadowValues(*I, TLI, ValueToShadow);
2161 
2162   // The last pass populates shadow phis with shadow values.
2163   for (PHINode *Phi : OriginalPhis) {
2164     PHINode *ShadowPhi = cast<PHINode>(ValueToShadow.getShadow(Phi));
2165     for (unsigned I : seq(Phi->getNumOperands())) {
2166       Value *V = Phi->getOperand(I);
2167       Value *Shadow = ValueToShadow.getShadow(V);
2168       BasicBlock *IncomingBB = Phi->getIncomingBlock(I);
2169       // For some instructions (e.g. invoke), we create the shadow in a separate
2170       // block, different from the block where the original value is created.
2171       // In that case, the shadow phi might need to refer to this block instead
2172       // of the original block.
2173       // Note that this can only happen for instructions as constant shadows are
2174       // always created in the same block.
2175       ShadowPhi->addIncoming(Shadow, IncomingBB);
2176     }
2177   }
2178 
2179   return !ValueToShadow.empty();
2180 }
2181 
GetMemOpSize(Value * V)2182 static uint64_t GetMemOpSize(Value *V) {
2183   uint64_t OpSize = 0;
2184   if (Constant *C = dyn_cast<Constant>(V)) {
2185     auto *CInt = dyn_cast<ConstantInt>(C);
2186     if (CInt && CInt->getValue().getBitWidth() <= 64)
2187       OpSize = CInt->getValue().getZExtValue();
2188   }
2189 
2190   return OpSize;
2191 }
2192 
2193 // Instrument the memory intrinsics so that they properly modify the shadow
2194 // memory.
instrumentMemIntrinsic(MemIntrinsic * MI)2195 bool NumericalStabilitySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
2196   IRBuilder<> Builder(MI);
2197   if (auto *M = dyn_cast<MemSetInst>(MI)) {
2198     FunctionCallee SetUnknownFn =
2199         NsanSetUnknownFns.getFunctionFor(GetMemOpSize(M->getArgOperand(2)));
2200     if (SetUnknownFn.getFunctionType()->getNumParams() == 1)
2201       Builder.CreateCall(SetUnknownFn, {/*Address=*/M->getArgOperand(0)});
2202     else
2203       Builder.CreateCall(SetUnknownFn,
2204                          {/*Address=*/M->getArgOperand(0),
2205                           /*Size=*/Builder.CreateIntCast(M->getArgOperand(2),
2206                                                          IntptrTy, false)});
2207 
2208   } else if (auto *M = dyn_cast<MemTransferInst>(MI)) {
2209     FunctionCallee CopyFn =
2210         NsanCopyFns.getFunctionFor(GetMemOpSize(M->getArgOperand(2)));
2211 
2212     if (CopyFn.getFunctionType()->getNumParams() == 2)
2213       Builder.CreateCall(CopyFn, {/*Destination=*/M->getArgOperand(0),
2214                                   /*Source=*/M->getArgOperand(1)});
2215     else
2216       Builder.CreateCall(CopyFn, {/*Destination=*/M->getArgOperand(0),
2217                                   /*Source=*/M->getArgOperand(1),
2218                                   /*Size=*/
2219                                   Builder.CreateIntCast(M->getArgOperand(2),
2220                                                         IntptrTy, false)});
2221   }
2222   return false;
2223 }
2224 
maybeAddSuffixForNsanInterface(CallBase * CI)2225 void NumericalStabilitySanitizer::maybeAddSuffixForNsanInterface(CallBase *CI) {
2226   Function *Fn = CI->getCalledFunction();
2227   if (Fn == nullptr)
2228     return;
2229 
2230   if (!Fn->getName().starts_with("__nsan_"))
2231     return;
2232 
2233   if (Fn->getName() == "__nsan_dump_shadow_mem") {
2234     assert(CI->arg_size() == 4 &&
2235            "invalid prototype for __nsan_dump_shadow_mem");
2236     // __nsan_dump_shadow_mem requires an extra parameter with the dynamic
2237     // configuration:
2238     // (shadow_type_id_for_long_double << 16) | (shadow_type_id_for_double << 8)
2239     // | shadow_type_id_for_double
2240     const uint64_t shadow_value_type_ids =
2241         (static_cast<size_t>(Config.byValueType(kLongDouble).getNsanTypeId())
2242          << 16) |
2243         (static_cast<size_t>(Config.byValueType(kDouble).getNsanTypeId())
2244          << 8) |
2245         static_cast<size_t>(Config.byValueType(kFloat).getNsanTypeId());
2246     CI->setArgOperand(3, ConstantInt::get(IntptrTy, shadow_value_type_ids));
2247   }
2248 }
2249