1 //===- PoisonChecking.cpp - -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implements a transform pass which instruments IR such that poison semantics 10 // are made explicit. That is, it provides a (possibly partial) executable 11 // semantics for every instruction w.r.t. poison as specified in the LLVM 12 // LangRef. There are obvious parallels to the sanitizer tools, but this pass 13 // is focused purely on the semantics of LLVM IR, not any particular source 14 // language. If you're looking for something to see if your C/C++ contains 15 // UB, this is not it. 16 // 17 // The rewritten semantics of each instruction will include the following 18 // components: 19 // 20 // 1) The original instruction, unmodified. 21 // 2) A propagation rule which translates dynamic information about the poison 22 // state of each input to whether the dynamic output of the instruction 23 // produces poison. 24 // 3) A creation rule which validates any poison producing flags on the 25 // instruction itself (e.g. checks for overflow on nsw). 26 // 4) A check rule which traps (to a handler function) if this instruction must 27 // execute undefined behavior given the poison state of it's inputs. 28 // 29 // This is a must analysis based transform; that is, the resulting code may 30 // produce a false negative result (not report UB when actually exists 31 // according to the LangRef spec), but should never produce a false positive 32 // (report UB where it doesn't exist). 33 // 34 // Use cases for this pass include: 35 // - Understanding (and testing!) the implications of the definition of poison 36 // from the LangRef. 37 // - Validating the output of a IR fuzzer to ensure that all programs produced 38 // are well defined on the specific input used. 39 // - Finding/confirming poison specific miscompiles by checking the poison 40 // status of an input/IR pair is the same before and after an optimization 41 // transform. 42 // - Checking that a bugpoint reduction does not introduce UB which didn't 43 // exist in the original program being reduced. 44 // 45 // The major sources of inaccuracy are currently: 46 // - Most validation rules not yet implemented for instructions with poison 47 // relavant flags. At the moment, only nsw/nuw on add/sub are supported. 48 // - UB which is control dependent on a branch on poison is not yet 49 // reported. Currently, only data flow dependence is modeled. 50 // - Poison which is propagated through memory is not modeled. As such, 51 // storing poison to memory and then reloading it will cause a false negative 52 // as we consider the reloaded value to not be poisoned. 53 // - Poison propagation across function boundaries is not modeled. At the 54 // moment, all arguments and return values are assumed not to be poison. 55 // - Undef is not modeled. In particular, the optimizer's freedom to pick 56 // concrete values for undef bits so as to maximize potential for producing 57 // poison is not modeled. 58 // 59 //===----------------------------------------------------------------------===// 60 61 #include "llvm/Transforms/Instrumentation/PoisonChecking.h" 62 #include "llvm/ADT/DenseMap.h" 63 #include "llvm/Analysis/ValueTracking.h" 64 #include "llvm/IR/IRBuilder.h" 65 #include "llvm/Support/CommandLine.h" 66 67 using namespace llvm; 68 69 #define DEBUG_TYPE "poison-checking" 70 71 static cl::opt<bool> 72 LocalCheck("poison-checking-function-local", 73 cl::init(false), 74 cl::desc("Check that returns are non-poison (for testing)")); 75 76 77 static bool isConstantFalse(Value* V) { 78 assert(V->getType()->isIntegerTy(1)); 79 if (auto *CI = dyn_cast<ConstantInt>(V)) 80 return CI->isZero(); 81 return false; 82 } 83 84 static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) { 85 if (Ops.size() == 0) 86 return B.getFalse(); 87 unsigned i = 0; 88 for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {} 89 if (i == Ops.size()) 90 return B.getFalse(); 91 Value *Accum = Ops[i++]; 92 for (Value *Op : llvm::drop_begin(Ops, i)) 93 if (!isConstantFalse(Op)) 94 Accum = B.CreateOr(Accum, Op); 95 return Accum; 96 } 97 98 static void generateCreationChecksForBinOp(Instruction &I, 99 SmallVectorImpl<Value*> &Checks) { 100 assert(isa<BinaryOperator>(I)); 101 102 IRBuilder<> B(&I); 103 Value *LHS = I.getOperand(0); 104 Value *RHS = I.getOperand(1); 105 switch (I.getOpcode()) { 106 default: 107 return; 108 case Instruction::Add: { 109 if (I.hasNoSignedWrap()) { 110 auto *OverflowOp = 111 B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS); 112 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 113 } 114 if (I.hasNoUnsignedWrap()) { 115 auto *OverflowOp = 116 B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS); 117 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 118 } 119 break; 120 } 121 case Instruction::Sub: { 122 if (I.hasNoSignedWrap()) { 123 auto *OverflowOp = 124 B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS); 125 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 126 } 127 if (I.hasNoUnsignedWrap()) { 128 auto *OverflowOp = 129 B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS); 130 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 131 } 132 break; 133 } 134 case Instruction::Mul: { 135 if (I.hasNoSignedWrap()) { 136 auto *OverflowOp = 137 B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS); 138 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 139 } 140 if (I.hasNoUnsignedWrap()) { 141 auto *OverflowOp = 142 B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS); 143 Checks.push_back(B.CreateExtractValue(OverflowOp, 1)); 144 } 145 break; 146 } 147 case Instruction::UDiv: { 148 if (I.isExact()) { 149 auto *Check = 150 B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS), 151 ConstantInt::get(LHS->getType(), 0)); 152 Checks.push_back(Check); 153 } 154 break; 155 } 156 case Instruction::SDiv: { 157 if (I.isExact()) { 158 auto *Check = 159 B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS), 160 ConstantInt::get(LHS->getType(), 0)); 161 Checks.push_back(Check); 162 } 163 break; 164 } 165 case Instruction::AShr: 166 case Instruction::LShr: 167 case Instruction::Shl: { 168 Value *ShiftCheck = 169 B.CreateICmp(ICmpInst::ICMP_UGE, RHS, 170 ConstantInt::get(RHS->getType(), 171 LHS->getType()->getScalarSizeInBits())); 172 Checks.push_back(ShiftCheck); 173 break; 174 } 175 }; 176 } 177 178 /// Given an instruction which can produce poison on non-poison inputs 179 /// (i.e. canCreatePoison returns true), generate runtime checks to produce 180 /// boolean indicators of when poison would result. 181 static void generateCreationChecks(Instruction &I, 182 SmallVectorImpl<Value*> &Checks) { 183 IRBuilder<> B(&I); 184 if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy()) 185 generateCreationChecksForBinOp(I, Checks); 186 187 // Handle non-binops separately 188 switch (I.getOpcode()) { 189 default: 190 // Note there are a couple of missing cases here, once implemented, this 191 // should become an llvm_unreachable. 192 break; 193 case Instruction::ExtractElement: { 194 Value *Vec = I.getOperand(0); 195 auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType()); 196 if (!VecVTy) 197 break; 198 Value *Idx = I.getOperand(1); 199 unsigned NumElts = VecVTy->getNumElements(); 200 Value *Check = 201 B.CreateICmp(ICmpInst::ICMP_UGE, Idx, 202 ConstantInt::get(Idx->getType(), NumElts)); 203 Checks.push_back(Check); 204 break; 205 } 206 case Instruction::InsertElement: { 207 Value *Vec = I.getOperand(0); 208 auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType()); 209 if (!VecVTy) 210 break; 211 Value *Idx = I.getOperand(2); 212 unsigned NumElts = VecVTy->getNumElements(); 213 Value *Check = 214 B.CreateICmp(ICmpInst::ICMP_UGE, Idx, 215 ConstantInt::get(Idx->getType(), NumElts)); 216 Checks.push_back(Check); 217 break; 218 } 219 }; 220 } 221 222 static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) { 223 auto Itr = ValToPoison.find(V); 224 if (Itr != ValToPoison.end()) 225 return Itr->second; 226 if (isa<Constant>(V)) { 227 return ConstantInt::getFalse(V->getContext()); 228 } 229 // Return false for unknwon values - this implements a non-strict mode where 230 // unhandled IR constructs are simply considered to never produce poison. At 231 // some point in the future, we probably want a "strict mode" for testing if 232 // nothing else. 233 return ConstantInt::getFalse(V->getContext()); 234 } 235 236 static void CreateAssert(IRBuilder<> &B, Value *Cond) { 237 assert(Cond->getType()->isIntegerTy(1)); 238 if (auto *CI = dyn_cast<ConstantInt>(Cond)) 239 if (CI->isAllOnesValue()) 240 return; 241 242 Module *M = B.GetInsertBlock()->getModule(); 243 M->getOrInsertFunction("__poison_checker_assert", 244 Type::getVoidTy(M->getContext()), 245 Type::getInt1Ty(M->getContext())); 246 Function *TrapFunc = M->getFunction("__poison_checker_assert"); 247 B.CreateCall(TrapFunc, Cond); 248 } 249 250 static void CreateAssertNot(IRBuilder<> &B, Value *Cond) { 251 assert(Cond->getType()->isIntegerTy(1)); 252 CreateAssert(B, B.CreateNot(Cond)); 253 } 254 255 static bool rewrite(Function &F) { 256 auto * const Int1Ty = Type::getInt1Ty(F.getContext()); 257 258 DenseMap<Value *, Value *> ValToPoison; 259 260 for (BasicBlock &BB : F) 261 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { 262 auto *OldPHI = cast<PHINode>(&*I); 263 auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues()); 264 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) 265 NewPHI->addIncoming(UndefValue::get(Int1Ty), 266 OldPHI->getIncomingBlock(i)); 267 NewPHI->insertBefore(OldPHI); 268 ValToPoison[OldPHI] = NewPHI; 269 } 270 271 for (BasicBlock &BB : F) 272 for (Instruction &I : BB) { 273 if (isa<PHINode>(I)) continue; 274 275 IRBuilder<> B(cast<Instruction>(&I)); 276 277 // Note: There are many more sources of documented UB, but this pass only 278 // attempts to find UB triggered by propagation of poison. 279 SmallVector<const Value *, 4> NonPoisonOps; 280 SmallPtrSet<const Value *, 4> SeenNonPoisonOps; 281 getGuaranteedNonPoisonOps(&I, NonPoisonOps); 282 for (const Value *Op : NonPoisonOps) 283 if (SeenNonPoisonOps.insert(Op).second) 284 CreateAssertNot(B, 285 getPoisonFor(ValToPoison, const_cast<Value *>(Op))); 286 287 if (LocalCheck) 288 if (auto *RI = dyn_cast<ReturnInst>(&I)) 289 if (RI->getNumOperands() != 0) { 290 Value *Op = RI->getOperand(0); 291 CreateAssertNot(B, getPoisonFor(ValToPoison, Op)); 292 } 293 294 SmallVector<Value*, 4> Checks; 295 for (const Use &U : I.operands()) { 296 if (ValToPoison.count(U) && propagatesPoison(U)) 297 Checks.push_back(getPoisonFor(ValToPoison, U)); 298 } 299 300 if (canCreatePoison(cast<Operator>(&I))) 301 generateCreationChecks(I, Checks); 302 ValToPoison[&I] = buildOrChain(B, Checks); 303 } 304 305 for (BasicBlock &BB : F) 306 for (auto I = BB.begin(); isa<PHINode>(&*I); I++) { 307 auto *OldPHI = cast<PHINode>(&*I); 308 if (!ValToPoison.count(OldPHI)) 309 continue; // skip the newly inserted phis 310 auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]); 311 for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) { 312 auto *OldVal = OldPHI->getIncomingValue(i); 313 NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal)); 314 } 315 } 316 return true; 317 } 318 319 320 PreservedAnalyses PoisonCheckingPass::run(Module &M, 321 ModuleAnalysisManager &AM) { 322 bool Changed = false; 323 for (auto &F : M) 324 Changed |= rewrite(F); 325 326 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 327 } 328 329 PreservedAnalyses PoisonCheckingPass::run(Function &F, 330 FunctionAnalysisManager &AM) { 331 return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all(); 332 } 333 334 /* Major TODO Items: 335 - Control dependent poison UB 336 - Strict mode - (i.e. must analyze every operand) 337 - Poison through memory 338 - Function ABIs 339 - Full coverage of intrinsics, etc.. (ouch) 340 341 Instructions w/Unclear Semantics: 342 - shufflevector - It would seem reasonable for an out of bounds mask element 343 to produce poison, but the LangRef does not state. 344 - all binary ops w/vector operands - The likely interpretation would be that 345 any element overflowing should produce poison for the entire result, but 346 the LangRef does not state. 347 - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems 348 strange that only certian flags should be documented as producing poison. 349 350 Cases of clear poison semantics not yet implemented: 351 - Exact flags on ashr/lshr produce poison 352 - NSW/NUW flags on shl produce poison 353 - Inbounds flag on getelementptr produce poison 354 - fptosi/fptoui (out of bounds input) produce poison 355 - Scalable vector types for insertelement/extractelement 356 - Floating point binary ops w/fmf nnan/noinfs flags produce poison 357 */ 358