1 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass statically checks for common and easily-identified constructs 10 // which produce undefined or likely unintended behavior in LLVM IR. 11 // 12 // It is not a guarantee of correctness, in two ways. First, it isn't 13 // comprehensive. There are checks which could be done statically which are 14 // not yet implemented. Some of these are indicated by TODO comments, but 15 // those aren't comprehensive either. Second, many conditions cannot be 16 // checked statically. This pass does no dynamic instrumentation, so it 17 // can't check for all possible problems. 18 // 19 // Another limitation is that it assumes all code will be executed. A store 20 // through a null pointer in a basic block which is never reached is harmless, 21 // but this pass will warn about it anyway. This is the main reason why most 22 // of these checks live here instead of in the Verifier pass. 23 // 24 // Optimization passes may make conditions that this pass checks for more or 25 // less obvious. If an optimization pass appears to be introducing a warning, 26 // it may be that the optimization pass is merely exposing an existing 27 // condition in the code. 28 // 29 // This code may be run before instcombine. In many cases, instcombine checks 30 // for the same kinds of things and turns instructions with undefined behavior 31 // into unreachable (or equivalent). Because of this, this pass makes some 32 // effort to look through bitcasts and so on. 33 // 34 //===----------------------------------------------------------------------===// 35 36 #include "llvm/Analysis/Lint.h" 37 #include "llvm/ADT/APInt.h" 38 #include "llvm/ADT/ArrayRef.h" 39 #include "llvm/ADT/SmallPtrSet.h" 40 #include "llvm/ADT/Twine.h" 41 #include "llvm/Analysis/AliasAnalysis.h" 42 #include "llvm/Analysis/AssumptionCache.h" 43 #include "llvm/Analysis/ConstantFolding.h" 44 #include "llvm/Analysis/InstructionSimplify.h" 45 #include "llvm/Analysis/Loads.h" 46 #include "llvm/Analysis/MemoryLocation.h" 47 #include "llvm/Analysis/Passes.h" 48 #include "llvm/Analysis/TargetLibraryInfo.h" 49 #include "llvm/Analysis/ValueTracking.h" 50 #include "llvm/IR/Argument.h" 51 #include "llvm/IR/BasicBlock.h" 52 #include "llvm/IR/Constant.h" 53 #include "llvm/IR/Constants.h" 54 #include "llvm/IR/DataLayout.h" 55 #include "llvm/IR/DerivedTypes.h" 56 #include "llvm/IR/Dominators.h" 57 #include "llvm/IR/Function.h" 58 #include "llvm/IR/GlobalVariable.h" 59 #include "llvm/IR/InstVisitor.h" 60 #include "llvm/IR/InstrTypes.h" 61 #include "llvm/IR/Instruction.h" 62 #include "llvm/IR/Instructions.h" 63 #include "llvm/IR/IntrinsicInst.h" 64 #include "llvm/IR/LegacyPassManager.h" 65 #include "llvm/IR/Module.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Value.h" 68 #include "llvm/InitializePasses.h" 69 #include "llvm/Pass.h" 70 #include "llvm/Support/Casting.h" 71 #include "llvm/Support/Debug.h" 72 #include "llvm/Support/KnownBits.h" 73 #include "llvm/Support/MathExtras.h" 74 #include "llvm/Support/raw_ostream.h" 75 #include <cassert> 76 #include <cstdint> 77 #include <iterator> 78 #include <string> 79 80 using namespace llvm; 81 82 namespace { 83 namespace MemRef { 84 static const unsigned Read = 1; 85 static const unsigned Write = 2; 86 static const unsigned Callee = 4; 87 static const unsigned Branchee = 8; 88 } // end namespace MemRef 89 90 class Lint : public FunctionPass, public InstVisitor<Lint> { 91 friend class InstVisitor<Lint>; 92 93 void visitFunction(Function &F); 94 95 void visitCallBase(CallBase &CB); 96 void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, 97 MaybeAlign Alignment, Type *Ty, unsigned Flags); 98 void visitEHBeginCatch(IntrinsicInst *II); 99 void visitEHEndCatch(IntrinsicInst *II); 100 101 void visitReturnInst(ReturnInst &I); 102 void visitLoadInst(LoadInst &I); 103 void visitStoreInst(StoreInst &I); 104 void visitXor(BinaryOperator &I); 105 void visitSub(BinaryOperator &I); 106 void visitLShr(BinaryOperator &I); 107 void visitAShr(BinaryOperator &I); 108 void visitShl(BinaryOperator &I); 109 void visitSDiv(BinaryOperator &I); 110 void visitUDiv(BinaryOperator &I); 111 void visitSRem(BinaryOperator &I); 112 void visitURem(BinaryOperator &I); 113 void visitAllocaInst(AllocaInst &I); 114 void visitVAArgInst(VAArgInst &I); 115 void visitIndirectBrInst(IndirectBrInst &I); 116 void visitExtractElementInst(ExtractElementInst &I); 117 void visitInsertElementInst(InsertElementInst &I); 118 void visitUnreachableInst(UnreachableInst &I); 119 120 Value *findValue(Value *V, bool OffsetOk) const; 121 Value *findValueImpl(Value *V, bool OffsetOk, 122 SmallPtrSetImpl<Value *> &Visited) const; 123 124 public: 125 Module *Mod; 126 const DataLayout *DL; 127 AliasAnalysis *AA; 128 AssumptionCache *AC; 129 DominatorTree *DT; 130 TargetLibraryInfo *TLI; 131 132 std::string Messages; 133 raw_string_ostream MessagesStr; 134 135 static char ID; // Pass identification, replacement for typeid 136 Lint() : FunctionPass(ID), MessagesStr(Messages) { 137 initializeLintPass(*PassRegistry::getPassRegistry()); 138 } 139 140 bool runOnFunction(Function &F) override; 141 142 void getAnalysisUsage(AnalysisUsage &AU) const override { 143 AU.setPreservesAll(); 144 AU.addRequired<AAResultsWrapperPass>(); 145 AU.addRequired<AssumptionCacheTracker>(); 146 AU.addRequired<TargetLibraryInfoWrapperPass>(); 147 AU.addRequired<DominatorTreeWrapperPass>(); 148 } 149 void print(raw_ostream &O, const Module *M) const override {} 150 151 void WriteValues(ArrayRef<const Value *> Vs) { 152 for (const Value *V : Vs) { 153 if (!V) 154 continue; 155 if (isa<Instruction>(V)) { 156 MessagesStr << *V << '\n'; 157 } else { 158 V->printAsOperand(MessagesStr, true, Mod); 159 MessagesStr << '\n'; 160 } 161 } 162 } 163 164 /// A check failed, so printout out the condition and the message. 165 /// 166 /// This provides a nice place to put a breakpoint if you want to see why 167 /// something is not correct. 168 void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } 169 170 /// A check failed (with values to print). 171 /// 172 /// This calls the Message-only version so that the above is easier to set 173 /// a breakpoint on. 174 template <typename T1, typename... Ts> 175 void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) { 176 CheckFailed(Message); 177 WriteValues({V1, Vs...}); 178 } 179 }; 180 } // end anonymous namespace 181 182 char Lint::ID = 0; 183 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", 184 false, true) 185 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 186 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 187 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 188 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 189 INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", 190 false, true) 191 192 // Assert - We know that cond should be true, if not print an error message. 193 #define Assert(C, ...) \ 194 do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false) 195 196 // Lint::run - This is the main Analysis entry point for a 197 // function. 198 // 199 bool Lint::runOnFunction(Function &F) { 200 Mod = F.getParent(); 201 DL = &F.getParent()->getDataLayout(); 202 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 203 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 204 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 205 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 206 visit(F); 207 dbgs() << MessagesStr.str(); 208 Messages.clear(); 209 return false; 210 } 211 212 void Lint::visitFunction(Function &F) { 213 // This isn't undefined behavior, it's just a little unusual, and it's a 214 // fairly common mistake to neglect to name a function. 215 Assert(F.hasName() || F.hasLocalLinkage(), 216 "Unusual: Unnamed function with non-local linkage", &F); 217 218 // TODO: Check for irreducible control flow. 219 } 220 221 void Lint::visitCallBase(CallBase &I) { 222 Value *Callee = I.getCalledOperand(); 223 224 visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, None, nullptr, 225 MemRef::Callee); 226 227 if (Function *F = dyn_cast<Function>(findValue(Callee, 228 /*OffsetOk=*/false))) { 229 Assert(I.getCallingConv() == F->getCallingConv(), 230 "Undefined behavior: Caller and callee calling convention differ", 231 &I); 232 233 FunctionType *FT = F->getFunctionType(); 234 unsigned NumActualArgs = I.arg_size(); 235 236 Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs 237 : FT->getNumParams() == NumActualArgs, 238 "Undefined behavior: Call argument count mismatches callee " 239 "argument count", 240 &I); 241 242 Assert(FT->getReturnType() == I.getType(), 243 "Undefined behavior: Call return type mismatches " 244 "callee return type", 245 &I); 246 247 // Check argument types (in case the callee was casted) and attributes. 248 // TODO: Verify that caller and callee attributes are compatible. 249 Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); 250 auto AI = I.arg_begin(), AE = I.arg_end(); 251 for (; AI != AE; ++AI) { 252 Value *Actual = *AI; 253 if (PI != PE) { 254 Argument *Formal = &*PI++; 255 Assert(Formal->getType() == Actual->getType(), 256 "Undefined behavior: Call argument type mismatches " 257 "callee parameter type", 258 &I); 259 260 // Check that noalias arguments don't alias other arguments. This is 261 // not fully precise because we don't know the sizes of the dereferenced 262 // memory regions. 263 if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) { 264 AttributeList PAL = I.getAttributes(); 265 unsigned ArgNo = 0; 266 for (auto BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) { 267 // Skip ByVal arguments since they will be memcpy'd to the callee's 268 // stack so we're not really passing the pointer anyway. 269 if (PAL.hasParamAttribute(ArgNo, Attribute::ByVal)) 270 continue; 271 // If both arguments are readonly, they have no dependence. 272 if (Formal->onlyReadsMemory() && I.onlyReadsMemory(ArgNo)) 273 continue; 274 if (AI != BI && (*BI)->getType()->isPointerTy()) { 275 AliasResult Result = AA->alias(*AI, *BI); 276 Assert(Result != MustAlias && Result != PartialAlias, 277 "Unusual: noalias argument aliases another argument", &I); 278 } 279 } 280 } 281 282 // Check that an sret argument points to valid memory. 283 if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { 284 Type *Ty = 285 cast<PointerType>(Formal->getType())->getElementType(); 286 visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), 287 DL->getABITypeAlign(Ty), Ty, 288 MemRef::Read | MemRef::Write); 289 } 290 } 291 } 292 } 293 294 if (const auto *CI = dyn_cast<CallInst>(&I)) { 295 if (CI->isTailCall()) { 296 const AttributeList &PAL = CI->getAttributes(); 297 unsigned ArgNo = 0; 298 for (Value *Arg : I.args()) { 299 // Skip ByVal arguments since they will be memcpy'd to the callee's 300 // stack anyway. 301 if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal)) 302 continue; 303 Value *Obj = findValue(Arg, /*OffsetOk=*/true); 304 Assert(!isa<AllocaInst>(Obj), 305 "Undefined behavior: Call with \"tail\" keyword references " 306 "alloca", 307 &I); 308 } 309 } 310 } 311 312 313 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) 314 switch (II->getIntrinsicID()) { 315 default: break; 316 317 // TODO: Check more intrinsics 318 319 case Intrinsic::memcpy: { 320 MemCpyInst *MCI = cast<MemCpyInst>(&I); 321 // TODO: If the size is known, use it. 322 visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize, 323 MCI->getDestAlign(), nullptr, MemRef::Write); 324 visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize, 325 MCI->getSourceAlign(), nullptr, MemRef::Read); 326 327 // Check that the memcpy arguments don't overlap. The AliasAnalysis API 328 // isn't expressive enough for what we really want to do. Known partial 329 // overlap is not distinguished from the case where nothing is known. 330 auto Size = LocationSize::unknown(); 331 if (const ConstantInt *Len = 332 dyn_cast<ConstantInt>(findValue(MCI->getLength(), 333 /*OffsetOk=*/false))) 334 if (Len->getValue().isIntN(32)) 335 Size = LocationSize::precise(Len->getValue().getZExtValue()); 336 Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != 337 MustAlias, 338 "Undefined behavior: memcpy source and destination overlap", &I); 339 break; 340 } 341 case Intrinsic::memcpy_inline: { 342 MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I); 343 const uint64_t Size = MCII->getLength()->getValue().getLimitedValue(); 344 visitMemoryReference(I, MCII->getDest(), Size, MCII->getDestAlign(), 345 nullptr, MemRef::Write); 346 visitMemoryReference(I, MCII->getSource(), Size, MCII->getSourceAlign(), 347 nullptr, MemRef::Read); 348 349 // Check that the memcpy arguments don't overlap. The AliasAnalysis API 350 // isn't expressive enough for what we really want to do. Known partial 351 // overlap is not distinguished from the case where nothing is known. 352 const LocationSize LS = LocationSize::precise(Size); 353 Assert(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != MustAlias, 354 "Undefined behavior: memcpy source and destination overlap", &I); 355 break; 356 } 357 case Intrinsic::memmove: { 358 MemMoveInst *MMI = cast<MemMoveInst>(&I); 359 // TODO: If the size is known, use it. 360 visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize, 361 MMI->getDestAlign(), nullptr, MemRef::Write); 362 visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize, 363 MMI->getSourceAlign(), nullptr, MemRef::Read); 364 break; 365 } 366 case Intrinsic::memset: { 367 MemSetInst *MSI = cast<MemSetInst>(&I); 368 // TODO: If the size is known, use it. 369 visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize, 370 MSI->getDestAlign(), nullptr, MemRef::Write); 371 break; 372 } 373 374 case Intrinsic::vastart: 375 Assert(I.getParent()->getParent()->isVarArg(), 376 "Undefined behavior: va_start called in a non-varargs function", 377 &I); 378 379 visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize, 380 None, nullptr, MemRef::Read | MemRef::Write); 381 break; 382 case Intrinsic::vacopy: 383 visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize, 384 None, nullptr, MemRef::Write); 385 visitMemoryReference(I, I.getArgOperand(1), MemoryLocation::UnknownSize, 386 None, nullptr, MemRef::Read); 387 break; 388 case Intrinsic::vaend: 389 visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize, 390 None, nullptr, MemRef::Read | MemRef::Write); 391 break; 392 393 case Intrinsic::stackrestore: 394 // Stackrestore doesn't read or write memory, but it sets the 395 // stack pointer, which the compiler may read from or write to 396 // at any time, so check it for both readability and writeability. 397 visitMemoryReference(I, I.getArgOperand(0), MemoryLocation::UnknownSize, 398 None, nullptr, MemRef::Read | MemRef::Write); 399 break; 400 } 401 } 402 403 void Lint::visitReturnInst(ReturnInst &I) { 404 Function *F = I.getParent()->getParent(); 405 Assert(!F->doesNotReturn(), 406 "Unusual: Return statement in function with noreturn attribute", &I); 407 408 if (Value *V = I.getReturnValue()) { 409 Value *Obj = findValue(V, /*OffsetOk=*/true); 410 Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I); 411 } 412 } 413 414 // TODO: Check that the reference is in bounds. 415 // TODO: Check readnone/readonly function attributes. 416 void Lint::visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, 417 MaybeAlign Align, Type *Ty, unsigned Flags) { 418 // If no memory is being referenced, it doesn't matter if the pointer 419 // is valid. 420 if (Size == 0) 421 return; 422 423 Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); 424 Assert(!isa<ConstantPointerNull>(UnderlyingObject), 425 "Undefined behavior: Null pointer dereference", &I); 426 Assert(!isa<UndefValue>(UnderlyingObject), 427 "Undefined behavior: Undef pointer dereference", &I); 428 Assert(!isa<ConstantInt>(UnderlyingObject) || 429 !cast<ConstantInt>(UnderlyingObject)->isMinusOne(), 430 "Unusual: All-ones pointer dereference", &I); 431 Assert(!isa<ConstantInt>(UnderlyingObject) || 432 !cast<ConstantInt>(UnderlyingObject)->isOne(), 433 "Unusual: Address one pointer dereference", &I); 434 435 if (Flags & MemRef::Write) { 436 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) 437 Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory", 438 &I); 439 Assert(!isa<Function>(UnderlyingObject) && 440 !isa<BlockAddress>(UnderlyingObject), 441 "Undefined behavior: Write to text section", &I); 442 } 443 if (Flags & MemRef::Read) { 444 Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body", 445 &I); 446 Assert(!isa<BlockAddress>(UnderlyingObject), 447 "Undefined behavior: Load from block address", &I); 448 } 449 if (Flags & MemRef::Callee) { 450 Assert(!isa<BlockAddress>(UnderlyingObject), 451 "Undefined behavior: Call to block address", &I); 452 } 453 if (Flags & MemRef::Branchee) { 454 Assert(!isa<Constant>(UnderlyingObject) || 455 isa<BlockAddress>(UnderlyingObject), 456 "Undefined behavior: Branch to non-blockaddress", &I); 457 } 458 459 // Check for buffer overflows and misalignment. 460 // Only handles memory references that read/write something simple like an 461 // alloca instruction or a global variable. 462 int64_t Offset = 0; 463 if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) { 464 // OK, so the access is to a constant offset from Ptr. Check that Ptr is 465 // something we can handle and if so extract the size of this base object 466 // along with its alignment. 467 uint64_t BaseSize = MemoryLocation::UnknownSize; 468 MaybeAlign BaseAlign; 469 470 if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { 471 Type *ATy = AI->getAllocatedType(); 472 if (!AI->isArrayAllocation() && ATy->isSized()) 473 BaseSize = DL->getTypeAllocSize(ATy); 474 BaseAlign = AI->getAlign(); 475 } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { 476 // If the global may be defined differently in another compilation unit 477 // then don't warn about funky memory accesses. 478 if (GV->hasDefinitiveInitializer()) { 479 Type *GTy = GV->getValueType(); 480 if (GTy->isSized()) 481 BaseSize = DL->getTypeAllocSize(GTy); 482 BaseAlign = GV->getAlign(); 483 if (!BaseAlign && GTy->isSized()) 484 BaseAlign = DL->getABITypeAlign(GTy); 485 } 486 } 487 488 // Accesses from before the start or after the end of the object are not 489 // defined. 490 Assert(Size == MemoryLocation::UnknownSize || 491 BaseSize == MemoryLocation::UnknownSize || 492 (Offset >= 0 && Offset + Size <= BaseSize), 493 "Undefined behavior: Buffer overflow", &I); 494 495 // Accesses that say that the memory is more aligned than it is are not 496 // defined. 497 if (!Align && Ty && Ty->isSized()) 498 Align = DL->getABITypeAlign(Ty); 499 if (BaseAlign && Align) 500 Assert(*Align <= commonAlignment(*BaseAlign, Offset), 501 "Undefined behavior: Memory reference address is misaligned", &I); 502 } 503 } 504 505 void Lint::visitLoadInst(LoadInst &I) { 506 visitMemoryReference(I, I.getPointerOperand(), 507 DL->getTypeStoreSize(I.getType()), I.getAlign(), 508 I.getType(), MemRef::Read); 509 } 510 511 void Lint::visitStoreInst(StoreInst &I) { 512 visitMemoryReference(I, I.getPointerOperand(), 513 DL->getTypeStoreSize(I.getOperand(0)->getType()), 514 I.getAlign(), I.getOperand(0)->getType(), MemRef::Write); 515 } 516 517 void Lint::visitXor(BinaryOperator &I) { 518 Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 519 "Undefined result: xor(undef, undef)", &I); 520 } 521 522 void Lint::visitSub(BinaryOperator &I) { 523 Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 524 "Undefined result: sub(undef, undef)", &I); 525 } 526 527 void Lint::visitLShr(BinaryOperator &I) { 528 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1), 529 /*OffsetOk=*/false))) 530 Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 531 "Undefined result: Shift count out of range", &I); 532 } 533 534 void Lint::visitAShr(BinaryOperator &I) { 535 if (ConstantInt *CI = 536 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 537 Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 538 "Undefined result: Shift count out of range", &I); 539 } 540 541 void Lint::visitShl(BinaryOperator &I) { 542 if (ConstantInt *CI = 543 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 544 Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 545 "Undefined result: Shift count out of range", &I); 546 } 547 548 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, 549 AssumptionCache *AC) { 550 // Assume undef could be zero. 551 if (isa<UndefValue>(V)) 552 return true; 553 554 VectorType *VecTy = dyn_cast<VectorType>(V->getType()); 555 if (!VecTy) { 556 KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT); 557 return Known.isZero(); 558 } 559 560 // Per-component check doesn't work with zeroinitializer 561 Constant *C = dyn_cast<Constant>(V); 562 if (!C) 563 return false; 564 565 if (C->isZeroValue()) 566 return true; 567 568 // For a vector, KnownZero will only be true if all values are zero, so check 569 // this per component 570 for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { 571 Constant *Elem = C->getAggregateElement(I); 572 if (isa<UndefValue>(Elem)) 573 return true; 574 575 KnownBits Known = computeKnownBits(Elem, DL); 576 if (Known.isZero()) 577 return true; 578 } 579 580 return false; 581 } 582 583 void Lint::visitSDiv(BinaryOperator &I) { 584 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 585 "Undefined behavior: Division by zero", &I); 586 } 587 588 void Lint::visitUDiv(BinaryOperator &I) { 589 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 590 "Undefined behavior: Division by zero", &I); 591 } 592 593 void Lint::visitSRem(BinaryOperator &I) { 594 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 595 "Undefined behavior: Division by zero", &I); 596 } 597 598 void Lint::visitURem(BinaryOperator &I) { 599 Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 600 "Undefined behavior: Division by zero", &I); 601 } 602 603 void Lint::visitAllocaInst(AllocaInst &I) { 604 if (isa<ConstantInt>(I.getArraySize())) 605 // This isn't undefined behavior, it's just an obvious pessimization. 606 Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), 607 "Pessimization: Static alloca outside of entry block", &I); 608 609 // TODO: Check for an unusual size (MSB set?) 610 } 611 612 void Lint::visitVAArgInst(VAArgInst &I) { 613 visitMemoryReference(I, I.getOperand(0), MemoryLocation::UnknownSize, None, 614 nullptr, MemRef::Read | MemRef::Write); 615 } 616 617 void Lint::visitIndirectBrInst(IndirectBrInst &I) { 618 visitMemoryReference(I, I.getAddress(), MemoryLocation::UnknownSize, None, 619 nullptr, MemRef::Branchee); 620 621 Assert(I.getNumDestinations() != 0, 622 "Undefined behavior: indirectbr with no destinations", &I); 623 } 624 625 void Lint::visitExtractElementInst(ExtractElementInst &I) { 626 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), 627 /*OffsetOk=*/false))) 628 Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), 629 "Undefined result: extractelement index out of range", &I); 630 } 631 632 void Lint::visitInsertElementInst(InsertElementInst &I) { 633 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2), 634 /*OffsetOk=*/false))) 635 Assert(CI->getValue().ult(I.getType()->getNumElements()), 636 "Undefined result: insertelement index out of range", &I); 637 } 638 639 void Lint::visitUnreachableInst(UnreachableInst &I) { 640 // This isn't undefined behavior, it's merely suspicious. 641 Assert(&I == &I.getParent()->front() || 642 std::prev(I.getIterator())->mayHaveSideEffects(), 643 "Unusual: unreachable immediately preceded by instruction without " 644 "side effects", 645 &I); 646 } 647 648 /// findValue - Look through bitcasts and simple memory reference patterns 649 /// to identify an equivalent, but more informative, value. If OffsetOk 650 /// is true, look through getelementptrs with non-zero offsets too. 651 /// 652 /// Most analysis passes don't require this logic, because instcombine 653 /// will simplify most of these kinds of things away. But it's a goal of 654 /// this Lint pass to be useful even on non-optimized IR. 655 Value *Lint::findValue(Value *V, bool OffsetOk) const { 656 SmallPtrSet<Value *, 4> Visited; 657 return findValueImpl(V, OffsetOk, Visited); 658 } 659 660 /// findValueImpl - Implementation helper for findValue. 661 Value *Lint::findValueImpl(Value *V, bool OffsetOk, 662 SmallPtrSetImpl<Value *> &Visited) const { 663 // Detect self-referential values. 664 if (!Visited.insert(V).second) 665 return UndefValue::get(V->getType()); 666 667 // TODO: Look through sext or zext cast, when the result is known to 668 // be interpreted as signed or unsigned, respectively. 669 // TODO: Look through eliminable cast pairs. 670 // TODO: Look through calls with unique return values. 671 // TODO: Look through vector insert/extract/shuffle. 672 V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts(); 673 if (LoadInst *L = dyn_cast<LoadInst>(V)) { 674 BasicBlock::iterator BBI = L->getIterator(); 675 BasicBlock *BB = L->getParent(); 676 SmallPtrSet<BasicBlock *, 4> VisitedBlocks; 677 for (;;) { 678 if (!VisitedBlocks.insert(BB).second) 679 break; 680 if (Value *U = 681 FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) 682 return findValueImpl(U, OffsetOk, Visited); 683 if (BBI != BB->begin()) break; 684 BB = BB->getUniquePredecessor(); 685 if (!BB) break; 686 BBI = BB->end(); 687 } 688 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 689 if (Value *W = PN->hasConstantValue()) 690 return findValueImpl(W, OffsetOk, Visited); 691 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 692 if (CI->isNoopCast(*DL)) 693 return findValueImpl(CI->getOperand(0), OffsetOk, Visited); 694 } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { 695 if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), 696 Ex->getIndices())) 697 if (W != V) 698 return findValueImpl(W, OffsetOk, Visited); 699 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 700 // Same as above, but for ConstantExpr instead of Instruction. 701 if (Instruction::isCast(CE->getOpcode())) { 702 if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), 703 CE->getOperand(0)->getType(), CE->getType(), 704 *DL)) 705 return findValueImpl(CE->getOperand(0), OffsetOk, Visited); 706 } else if (CE->getOpcode() == Instruction::ExtractValue) { 707 ArrayRef<unsigned> Indices = CE->getIndices(); 708 if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) 709 if (W != V) 710 return findValueImpl(W, OffsetOk, Visited); 711 } 712 } 713 714 // As a last resort, try SimplifyInstruction or constant folding. 715 if (Instruction *Inst = dyn_cast<Instruction>(V)) { 716 if (Value *W = SimplifyInstruction(Inst, {*DL, TLI, DT, AC})) 717 return findValueImpl(W, OffsetOk, Visited); 718 } else if (auto *C = dyn_cast<Constant>(V)) { 719 Value *W = ConstantFoldConstant(C, *DL, TLI); 720 if (W != V) 721 return findValueImpl(W, OffsetOk, Visited); 722 } 723 724 return V; 725 } 726 727 //===----------------------------------------------------------------------===// 728 // Implement the public interfaces to this file... 729 //===----------------------------------------------------------------------===// 730 731 FunctionPass *llvm::createLintPass() { 732 return new Lint(); 733 } 734 735 /// lintFunction - Check a function for errors, printing messages on stderr. 736 /// 737 void llvm::lintFunction(const Function &f) { 738 Function &F = const_cast<Function&>(f); 739 assert(!F.isDeclaration() && "Cannot lint external functions"); 740 741 legacy::FunctionPassManager FPM(F.getParent()); 742 Lint *V = new Lint(); 743 FPM.add(V); 744 FPM.run(F); 745 } 746 747 /// lintModule - Check a module for errors, printing messages on stderr. 748 /// 749 void llvm::lintModule(const Module &M) { 750 legacy::PassManager PM; 751 Lint *V = new Lint(); 752 PM.add(V); 753 PM.run(const_cast<Module&>(M)); 754 } 755