1 //===- PoisonChecking.cpp - -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implements a transform pass which instruments IR such that poison semantics 10 // are made explicit. That is, it provides a (possibly partial) executable 11 // semantics for every instruction w.r.t. poison as specified in the LLVM 12 // LangRef. There are obvious parallels to the sanitizer tools, but this pass 13 // is focused purely on the semantics of LLVM IR, not any particular source 14 // language. If you're looking for something to see if your C/C++ contains 15 // UB, this is not it. 16 // 17 // The rewritten semantics of each instruction will include the following 18 // components: 19 // 20 // 1) The original instruction, unmodified. 21 // 2) A propagation rule which translates dynamic information about the poison 22 // state of each input to whether the dynamic output of the instruction 23 // produces poison. 24 // 3) A creation rule which validates any poison producing flags on the 25 // instruction itself (e.g. checks for overflow on nsw). 26 // 4) A check rule which traps (to a handler function) if this instruction must 27 // execute undefined behavior given the poison state of it's inputs. 28 // 29 // This is a must analysis based transform; that is, the resulting code may 30 // produce a false negative result (not report UB when actually exists 31 // according to the LangRef spec), but should never produce a false positive 32 // (report UB where it doesn't exist). 33 // 34 // Use cases for this pass include: 35 // - Understanding (and testing!) the implications of the definition of poison 36 // from the LangRef. 37 // - Validating the output of a IR fuzzer to ensure that all programs produced 38 // are well defined on the specific input used. 39 // - Finding/confirming poison specific miscompiles by checking the poison 40 // status of an input/IR pair is the same before and after an optimization 41 // transform. 42 // - Checking that a bugpoint reduction does not introduce UB which didn't 43 // exist in the original program being reduced. 44 // 45 // The major sources of inaccuracy are currently: 46 // - Most validation rules not yet implemented for instructions with poison 47 // relavant flags. At the moment, only nsw/nuw on add/sub are supported. 48 // - UB which is control dependent on a branch on poison is not yet 49 // reported. Currently, only data flow dependence is modeled. 50 // - Poison which is propagated through memory is not modeled. As such, 51 // storing poison to memory and then reloading it will cause a false negative 52 // as we consider the reloaded value to not be poisoned. 53 // - Poison propagation across function boundaries is not modeled. At the 54 // moment, all arguments and return values are assumed not to be poison. 55 // - Undef is not modeled. In particular, the optimizer's freedom to pick 56 // concrete values for undef bits so as to maximize potential for producing 57 // poison is not modeled. 58 // 59 //===----------------------------------------------------------------------===// 60 61 #include "llvm/Transforms/Instrumentation/PoisonChecking.h" 62 #include "llvm/ADT/DenseMap.h" 63 #include "llvm/Analysis/ValueTracking.h" 64 #include "llvm/IR/IRBuilder.h" 65 #include "llvm/IR/Module.h" 66 #include "llvm/Support/CommandLine.h" 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "poison-checking" 71 72 static cl::opt<bool> 73 LocalCheck("poison-checking-function-local", 74 cl::init(false), 75 cl::desc("Check that returns are non-poison (for testing)")); 76 77 78 static bool isConstantFalse(Value* V) { 79 assert(V->getType()->isIntegerTy(1)); 80 if (auto *CI = dyn_cast<ConstantInt>(V)) 81 return CI->isZero(); 82 return false; 83 } 84 85 static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) { 86 if (Ops.size() == 0) 87 return B.getFalse(); 88 unsigned i = 0; 89 for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {} 90 if (i == Ops.size()) 91 return B.getFalse(); 92 Value *Accum = Ops[i++]; 93 for (Value *Op : llvm::drop_begin(Ops, i)) 94 if (!isConstantFalse(Op)) 95 Accum = B.CreateOr(Accum, Op); 96 return Accum; 97 } 98 99 static void generateCreationChecksForBinOp(Instruction &I, 100 SmallVectorImpl<Value*> &Checks) { 101 assert(isa<BinaryOperator>(I)); 102 103 IRBuilder<> B(&I); 104 Value *LHS = I.getOperand(0); 105 Value *RHS = I.getOperand(1); 106 switch (I.getOpcode()) { 107 default: 108 return; 109 case Instruction::Add: { 110 if (I.hasNoSignedWrap()) { 111 auto *OverflowOp = 112 B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS); 113 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 114 } 115 if (I.hasNoUnsignedWrap()) { 116 auto *OverflowOp = 117 B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS); 118 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 119 } 120 break; 121 } 122 case Instruction::Sub: { 123 if (I.hasNoSignedWrap()) { 124 auto *OverflowOp = 125 B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS); 126 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 127 } 128 if (I.hasNoUnsignedWrap()) { 129 auto *OverflowOp = 130 B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS); 131 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 132 } 133 break; 134 } 135 case Instruction::Mul: { 136 if (I.hasNoSignedWrap()) { 137 auto *OverflowOp = 138 B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS); 139 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 140 } 141 if (I.hasNoUnsignedWrap()) { 142 auto *OverflowOp = 143 B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS); 144 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 145 } 146 break; 147 } 148 case Instruction::UDiv: { 149 if (I.isExact()) { 150 auto *Check = 151 B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS), 152 ConstantInt::get(LHS->getType(), 0)); 153 Checks.push_back(Check); 154 } 155 break; 156 } 157 case Instruction::SDiv: { 158 if (I.isExact()) { 159 auto *Check = 160 B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS), 161 ConstantInt::get(LHS->getType(), 0)); 162 Checks.push_back(Check); 163 } 164 break; 165 } 166 case Instruction::AShr: 167 case Instruction::LShr: 168 case Instruction::Shl: { 169 Value *ShiftCheck = 170 B.CreateICmp(ICmpInst::ICMP_UGE, RHS, 171 ConstantInt::get(RHS->getType(), 172 LHS->getType()->getScalarSizeInBits())); 173 Checks.push_back(ShiftCheck); 174 break; 175 } 176 }; 177 } 178 179 /// Given an instruction which can produce poison on non-poison inputs 180 /// (i.e. canCreatePoison returns true), generate runtime checks to produce 181 /// boolean indicators of when poison would result. 182 static void generateCreationChecks(Instruction &I, 183 SmallVectorImpl<Value*> &Checks) { 184 IRBuilder<> B(&I); 185 if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy()) 186 generateCreationChecksForBinOp(I, Checks); 187 188 // Handle non-binops separately 189 switch (I.getOpcode()) { 190 default: 191 // Note there are a couple of missing cases here, once implemented, this 192 // should become an llvm_unreachable. 193 break; 194 case Instruction::ExtractElement: { 195 Value *Vec = I.getOperand(0); 196 auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType()); 197 if (!VecVTy) 198 break; 199 Value *Idx = I.getOperand(1); 200 unsigned NumElts = VecVTy->getNumElements(); 201 Value *Check = 202 B.CreateICmp(ICmpInst::ICMP_UGE, Idx, 203 ConstantInt::get(Idx->getType(), NumElts)); 204 Checks.push_back(Check); 205 break; 206 } 207 case Instruction::InsertElement: { 208 Value *Vec = I.getOperand(0); 209 auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType()); 210 if (!VecVTy) 211 break; 212 Value *Idx = I.getOperand(2); 213 unsigned NumElts = VecVTy->getNumElements(); 214 Value *Check = 215 B.CreateICmp(ICmpInst::ICMP_UGE, Idx, 216 ConstantInt::get(Idx->getType(), NumElts)); 217 Checks.push_back(Check); 218 break; 219 } 220 }; 221 } 222 223 static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) { 224 auto Itr = ValToPoison.find(V); 225 if (Itr != ValToPoison.end()) 226 return Itr->second; 227 if (isa<Constant>(V)) { 228 return ConstantInt::getFalse(V->getContext()); 229 } 230 // Return false for unknwon values - this implements a non-strict mode where 231 // unhandled IR constructs are simply considered to never produce poison. At 232 // some point in the future, we probably want a "strict mode" for testing if 233 // nothing else. 234 return ConstantInt::getFalse(V->getContext()); 235 } 236 237 static void CreateAssert(IRBuilder<> &B, Value *Cond) { 238 assert(Cond->getType()->isIntegerTy(1)); 239 if (auto *CI = dyn_cast<ConstantInt>(Cond)) 240 if (CI->isAllOnesValue()) 241 return; 242 243 Module *M = B.GetInsertBlock()->getModule(); 244 M->getOrInsertFunction("__poison_checker_assert", 245 Type::getVoidTy(M->getContext()), 246 Type::getInt1Ty(M->getContext())); 247 Function *TrapFunc = M->getFunction("__poison_checker_assert"); 248 B.CreateCall(TrapFunc, Cond); 249 } 250 251 static void CreateAssertNot(IRBuilder<> &B, Value *Cond) { 252 assert(Cond->getType()->isIntegerTy(1)); 253 CreateAssert(B, B.CreateNot(Cond)); 254 } 255 256 static bool rewrite(Function &F) { 257 auto * const Int1Ty = Type::getInt1Ty(F.getContext()); 258 259 DenseMap<Value *, Value *> ValToPoison; 260 261 for (BasicBlock &BB : F) 262 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { 263 auto *OldPHI = cast<PHINode>(&*I); 264 auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues()); 265 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) 266 NewPHI->addIncoming(UndefValue::get(Int1Ty), 267 OldPHI->getIncomingBlock(i)); 268 NewPHI->insertBefore(OldPHI); 269 ValToPoison[OldPHI] = NewPHI; 270 } 271 272 for (BasicBlock &BB : F) 273 for (Instruction &I : BB) { 274 if (isa<PHINode>(I)) continue; 275 276 IRBuilder<> B(cast<Instruction>(&I)); 277 278 // Note: There are many more sources of documented UB, but this pass only 279 // attempts to find UB triggered by propagation of poison. 280 SmallVector<const Value *, 4> NonPoisonOps; 281 SmallPtrSet<const Value *, 4> SeenNonPoisonOps; 282 getGuaranteedNonPoisonOps(&I, NonPoisonOps); 283 for (const Value *Op : NonPoisonOps) 284 if (SeenNonPoisonOps.insert(Op).second) 285 CreateAssertNot(B, 286 getPoisonFor(ValToPoison, const_cast<Value *>(Op))); 287 288 if (LocalCheck) 289 if (auto *RI = dyn_cast<ReturnInst>(&I)) 290 if (RI->getNumOperands() != 0) { 291 Value *Op = RI->getOperand(0); 292 CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); 293 } 294 295 SmallVector<Value*, 4> Checks; 296 for (const Use &U : I.operands()) { 297 if (ValToPoison.count(U) && propagatesPoison(U)) 298 Checks.push_back(getPoisonFor(ValToPoison, U)); 299 } 300 301 if (canCreatePoison(cast<Operator>(&I))) 302 generateCreationChecks(I, Checks); 303 ValToPoison[&I] = buildOrChain(B, Checks); 304 } 305 306 for (BasicBlock &BB : F) 307 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { 308 auto *OldPHI = cast<PHINode>(&*I); 309 if (!ValToPoison.count(OldPHI)) 310 continue; // skip the newly inserted phis 311 auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]); 312 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) { 313 auto *OldVal = OldPHI->getIncomingValue(i); 314 NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal)); 315 } 316 } 317 return true; 318 } 319 320 321 PreservedAnalyses PoisonCheckingPass::run(Module &M, 322 ModuleAnalysisManager &AM) { 323 bool Changed = false; 324 for (auto &F : M) 325 Changed |= rewrite(F); 326 327 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 328 } 329 330 PreservedAnalyses PoisonCheckingPass::run(Function &F, 331 FunctionAnalysisManager &AM) { 332 return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all(); 333 } 334 335 /* Major TODO Items: 336 - Control dependent poison UB 337 - Strict mode - (i.e. must analyze every operand) 338 - Poison through memory 339 - Function ABIs 340 - Full coverage of intrinsics, etc.. (ouch) 341 342 Instructions w/Unclear Semantics: 343 - shufflevector - It would seem reasonable for an out of bounds mask element 344 to produce poison, but the LangRef does not state. 345 - all binary ops w/vector operands - The likely interpretation would be that 346 any element overflowing should produce poison for the entire result, but 347 the LangRef does not state. 348 - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems 349 strange that only certian flags should be documented as producing poison. 350 351 Cases of clear poison semantics not yet implemented: 352 - Exact flags on ashr/lshr produce poison 353 - NSW/NUW flags on shl produce poison 354 - Inbounds flag on getelementptr produce poison 355 - fptosi/fptoui (out of bounds input) produce poison 356 - Scalable vector types for insertelement/extractelement 357 - Floating point binary ops w/fmf nnan/noinfs flags produce poison 358 */ 359