1 //===- PoisonChecking.cpp - -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implements a transform pass which instruments IR such that poison semantics 10 // are made explicit. That is, it provides a (possibly partial) executable 11 // semantics for every instruction w.r.t. poison as specified in the LLVM 12 // LangRef. There are obvious parallels to the sanitizer tools, but this pass 13 // is focused purely on the semantics of LLVM IR, not any particular source 14 // language. If you're looking for something to see if your C/C++ contains 15 // UB, this is not it. 16 // 17 // The rewritten semantics of each instruction will include the following 18 // components: 19 // 20 // 1) The original instruction, unmodified. 21 // 2) A propagation rule which translates dynamic information about the poison 22 // state of each input to whether the dynamic output of the instruction 23 // produces poison. 24 // 3) A creation rule which validates any poison producing flags on the 25 // instruction itself (e.g. checks for overflow on nsw). 26 // 4) A check rule which traps (to a handler function) if this instruction must 27 // execute undefined behavior given the poison state of it's inputs. 28 // 29 // This is a must analysis based transform; that is, the resulting code may 30 // produce a false negative result (not report UB when actually exists 31 // according to the LangRef spec), but should never produce a false positive 32 // (report UB where it doesn't exist). 33 // 34 // Use cases for this pass include: 35 // - Understanding (and testing!) the implications of the definition of poison 36 // from the LangRef. 37 // - Validating the output of a IR fuzzer to ensure that all programs produced 38 // are well defined on the specific input used. 39 // - Finding/confirming poison specific miscompiles by checking the poison 40 // status of an input/IR pair is the same before and after an optimization 41 // transform. 42 // - Checking that a bugpoint reduction does not introduce UB which didn't 43 // exist in the original program being reduced. 44 // 45 // The major sources of inaccuracy are currently: 46 // - Most validation rules not yet implemented for instructions with poison 47 // relavant flags. At the moment, only nsw/nuw on add/sub are supported. 48 // - UB which is control dependent on a branch on poison is not yet 49 // reported. Currently, only data flow dependence is modeled. 50 // - Poison which is propagated through memory is not modeled. As such, 51 // storing poison to memory and then reloading it will cause a false negative 52 // as we consider the reloaded value to not be poisoned. 53 // - Poison propagation across function boundaries is not modeled. At the 54 // moment, all arguments and return values are assumed not to be poison. 55 // - Undef is not modeled. In particular, the optimizer's freedom to pick 56 // concrete values for undef bits so as to maximize potential for producing 57 // poison is not modeled. 58 // 59 //===----------------------------------------------------------------------===// 60 61 #include "llvm/Transforms/Instrumentation/PoisonChecking.h" 62 #include "llvm/ADT/DenseMap.h" 63 #include "llvm/ADT/Statistic.h" 64 #include "llvm/Analysis/MemoryBuiltins.h" 65 #include "llvm/Analysis/ValueTracking.h" 66 #include "llvm/IR/IRBuilder.h" 67 #include "llvm/IR/InstVisitor.h" 68 #include "llvm/IR/IntrinsicInst.h" 69 #include "llvm/IR/PatternMatch.h" 70 #include "llvm/Support/CommandLine.h" 71 #include "llvm/Support/Debug.h" 72 73 using namespace llvm; 74 75 #define DEBUG_TYPE "poison-checking" 76 77 static cl::opt<bool> 78 LocalCheck("poison-checking-function-local", 79 cl::init(false), 80 cl::desc("Check that returns are non-poison (for testing)")); 81 82 83 static bool isConstantFalse(Value* V) { 84 assert(V->getType()->isIntegerTy(1)); 85 if (auto *CI = dyn_cast<ConstantInt>(V)) 86 return CI->isZero(); 87 return false; 88 } 89 90 static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) { 91 if (Ops.size() == 0) 92 return B.getFalse(); 93 unsigned i = 0; 94 for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {} 95 if (i == Ops.size()) 96 return B.getFalse(); 97 Value *Accum = Ops[i++]; 98 for (; i < Ops.size(); i++) 99 if (!isConstantFalse(Ops[i])) 100 Accum = B.CreateOr(Accum, Ops[i]); 101 return Accum; 102 } 103 104 static void generateCreationChecksForBinOp(Instruction &I, 105 SmallVectorImpl<Value*> &Checks) { 106 assert(isa<BinaryOperator>(I)); 107 108 IRBuilder<> B(&I); 109 Value *LHS = I.getOperand(0); 110 Value *RHS = I.getOperand(1); 111 switch (I.getOpcode()) { 112 default: 113 return; 114 case Instruction::Add: { 115 if (I.hasNoSignedWrap()) { 116 auto *OverflowOp = 117 B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS); 118 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 119 } 120 if (I.hasNoUnsignedWrap()) { 121 auto *OverflowOp = 122 B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS); 123 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 124 } 125 break; 126 } 127 case Instruction::Sub: { 128 if (I.hasNoSignedWrap()) { 129 auto *OverflowOp = 130 B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS); 131 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 132 } 133 if (I.hasNoUnsignedWrap()) { 134 auto *OverflowOp = 135 B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS); 136 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 137 } 138 break; 139 } 140 case Instruction::Mul: { 141 if (I.hasNoSignedWrap()) { 142 auto *OverflowOp = 143 B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS); 144 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 145 } 146 if (I.hasNoUnsignedWrap()) { 147 auto *OverflowOp = 148 B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS); 149 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 150 } 151 break; 152 } 153 case Instruction::UDiv: { 154 if (I.isExact()) { 155 auto *Check = 156 B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS), 157 ConstantInt::get(LHS->getType(), 0)); 158 Checks.push_back(Check); 159 } 160 break; 161 } 162 case Instruction::SDiv: { 163 if (I.isExact()) { 164 auto *Check = 165 B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS), 166 ConstantInt::get(LHS->getType(), 0)); 167 Checks.push_back(Check); 168 } 169 break; 170 } 171 case Instruction::AShr: 172 case Instruction::LShr: 173 case Instruction::Shl: { 174 Value *ShiftCheck = 175 B.CreateICmp(ICmpInst::ICMP_UGE, RHS, 176 ConstantInt::get(RHS->getType(), 177 LHS->getType()->getScalarSizeInBits())); 178 Checks.push_back(ShiftCheck); 179 break; 180 } 181 }; 182 } 183 184 /// Given an instruction which can produce poison on non-poison inputs 185 /// (i.e. canCreatePoison returns true), generate runtime checks to produce 186 /// boolean indicators of when poison would result. 187 static void generateCreationChecks(Instruction &I, 188 SmallVectorImpl<Value*> &Checks) { 189 IRBuilder<> B(&I); 190 if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy()) 191 generateCreationChecksForBinOp(I, Checks); 192 193 // Handle non-binops separately 194 switch (I.getOpcode()) { 195 default: 196 // Note there are a couple of missing cases here, once implemented, this 197 // should become an llvm_unreachable. 198 break; 199 case Instruction::ExtractElement: { 200 Value *Vec = I.getOperand(0); 201 auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType()); 202 if (!VecVTy) 203 break; 204 Value *Idx = I.getOperand(1); 205 unsigned NumElts = VecVTy->getNumElements(); 206 Value *Check = 207 B.CreateICmp(ICmpInst::ICMP_UGE, Idx, 208 ConstantInt::get(Idx->getType(), NumElts)); 209 Checks.push_back(Check); 210 break; 211 } 212 case Instruction::InsertElement: { 213 Value *Vec = I.getOperand(0); 214 auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType()); 215 if (!VecVTy) 216 break; 217 Value *Idx = I.getOperand(2); 218 unsigned NumElts = VecVTy->getNumElements(); 219 Value *Check = 220 B.CreateICmp(ICmpInst::ICMP_UGE, Idx, 221 ConstantInt::get(Idx->getType(), NumElts)); 222 Checks.push_back(Check); 223 break; 224 } 225 }; 226 } 227 228 static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) { 229 auto Itr = ValToPoison.find(V); 230 if (Itr != ValToPoison.end()) 231 return Itr->second; 232 if (isa<Constant>(V)) { 233 return ConstantInt::getFalse(V->getContext()); 234 } 235 // Return false for unknwon values - this implements a non-strict mode where 236 // unhandled IR constructs are simply considered to never produce poison. At 237 // some point in the future, we probably want a "strict mode" for testing if 238 // nothing else. 239 return ConstantInt::getFalse(V->getContext()); 240 } 241 242 static void CreateAssert(IRBuilder<> &B, Value *Cond) { 243 assert(Cond->getType()->isIntegerTy(1)); 244 if (auto *CI = dyn_cast<ConstantInt>(Cond)) 245 if (CI->isAllOnesValue()) 246 return; 247 248 Module *M = B.GetInsertBlock()->getModule(); 249 M->getOrInsertFunction("__poison_checker_assert", 250 Type::getVoidTy(M->getContext()), 251 Type::getInt1Ty(M->getContext())); 252 Function *TrapFunc = M->getFunction("__poison_checker_assert"); 253 B.CreateCall(TrapFunc, Cond); 254 } 255 256 static void CreateAssertNot(IRBuilder<> &B, Value *Cond) { 257 assert(Cond->getType()->isIntegerTy(1)); 258 CreateAssert(B, B.CreateNot(Cond)); 259 } 260 261 static bool rewrite(Function &F) { 262 auto * const Int1Ty = Type::getInt1Ty(F.getContext()); 263 264 DenseMap<Value *, Value *> ValToPoison; 265 266 for (BasicBlock &BB : F) 267 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { 268 auto *OldPHI = cast<PHINode>(&*I); 269 auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues()); 270 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) 271 NewPHI->addIncoming(UndefValue::get(Int1Ty), 272 OldPHI->getIncomingBlock(i)); 273 NewPHI->insertBefore(OldPHI); 274 ValToPoison[OldPHI] = NewPHI; 275 } 276 277 for (BasicBlock &BB : F) 278 for (Instruction &I : BB) { 279 if (isa<PHINode>(I)) continue; 280 281 IRBuilder<> B(cast<Instruction>(&I)); 282 283 // Note: There are many more sources of documented UB, but this pass only 284 // attempts to find UB triggered by propagation of poison. 285 if (Value *Op = const_cast<Value*>(getGuaranteedNonPoisonOp(&I))) 286 CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); 287 288 if (LocalCheck) 289 if (auto *RI = dyn_cast<ReturnInst>(&I)) 290 if (RI->getNumOperands() != 0) { 291 Value *Op = RI->getOperand(0); 292 CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); 293 } 294 295 SmallVector<Value*, 4> Checks; 296 if (propagatesPoison(&I)) 297 for (Value *V : I.operands()) 298 Checks.push_back(getPoisonFor(ValToPoison, V)); 299 300 if (canCreatePoison(&I)) 301 generateCreationChecks(I, Checks); 302 ValToPoison[&I] = buildOrChain(B, Checks); 303 } 304 305 for (BasicBlock &BB : F) 306 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { 307 auto *OldPHI = cast<PHINode>(&*I); 308 if (!ValToPoison.count(OldPHI)) 309 continue; // skip the newly inserted phis 310 auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]); 311 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) { 312 auto *OldVal = OldPHI->getIncomingValue(i); 313 NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal)); 314 } 315 } 316 return true; 317 } 318 319 320 PreservedAnalyses PoisonCheckingPass::run(Module &M, 321 ModuleAnalysisManager &AM) { 322 bool Changed = false; 323 for (auto &F : M) 324 Changed |= rewrite(F); 325 326 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 327 } 328 329 PreservedAnalyses PoisonCheckingPass::run(Function &F, 330 FunctionAnalysisManager &AM) { 331 return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all(); 332 } 333 334 /* Major TODO Items: 335 - Control dependent poison UB 336 - Strict mode - (i.e. must analyze every operand) 337 - Poison through memory 338 - Function ABIs 339 - Full coverage of intrinsics, etc.. (ouch) 340 341 Instructions w/Unclear Semantics: 342 - shufflevector - It would seem reasonable for an out of bounds mask element 343 to produce poison, but the LangRef does not state. 344 - all binary ops w/vector operands - The likely interpretation would be that 345 any element overflowing should produce poison for the entire result, but 346 the LangRef does not state. 347 - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems 348 strange that only certian flags should be documented as producing poison. 349 350 Cases of clear poison semantics not yet implemented: 351 - Exact flags on ashr/lshr produce poison 352 - NSW/NUW flags on shl produce poison 353 - Inbounds flag on getelementptr produce poison 354 - fptosi/fptoui (out of bounds input) produce poison 355 - Scalable vector types for insertelement/extractelement 356 - Floating point binary ops w/fmf nnan/noinfs flags produce poison 357 */ 358