1 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass statically checks for common and easily-identified constructs 10 // which produce undefined or likely unintended behavior in LLVM IR. 11 // 12 // It is not a guarantee of correctness, in two ways. First, it isn't 13 // comprehensive. There are checks which could be done statically which are 14 // not yet implemented. Some of these are indicated by TODO comments, but 15 // those aren't comprehensive either. Second, many conditions cannot be 16 // checked statically. This pass does no dynamic instrumentation, so it 17 // can't check for all possible problems. 18 // 19 // Another limitation is that it assumes all code will be executed. A store 20 // through a null pointer in a basic block which is never reached is harmless, 21 // but this pass will warn about it anyway. This is the main reason why most 22 // of these checks live here instead of in the Verifier pass. 23 // 24 // Optimization passes may make conditions that this pass checks for more or 25 // less obvious. If an optimization pass appears to be introducing a warning, 26 // it may be that the optimization pass is merely exposing an existing 27 // condition in the code. 28 // 29 // This code may be run before instcombine. In many cases, instcombine checks 30 // for the same kinds of things and turns instructions with undefined behavior 31 // into unreachable (or equivalent). Because of this, this pass makes some 32 // effort to look through bitcasts and so on. 33 // 34 //===----------------------------------------------------------------------===// 35 36 #include "llvm/Analysis/Lint.h" 37 #include "llvm/ADT/APInt.h" 38 #include "llvm/ADT/ArrayRef.h" 39 #include "llvm/ADT/SmallPtrSet.h" 40 #include "llvm/ADT/Twine.h" 41 #include "llvm/Analysis/AliasAnalysis.h" 42 #include "llvm/Analysis/AssumptionCache.h" 43 #include "llvm/Analysis/ConstantFolding.h" 44 #include "llvm/Analysis/InstructionSimplify.h" 45 #include "llvm/Analysis/Loads.h" 46 #include "llvm/Analysis/MemoryLocation.h" 47 #include "llvm/Analysis/TargetLibraryInfo.h" 48 #include "llvm/Analysis/ValueTracking.h" 49 #include "llvm/IR/Argument.h" 50 #include "llvm/IR/BasicBlock.h" 51 #include "llvm/IR/Constant.h" 52 #include "llvm/IR/Constants.h" 53 #include "llvm/IR/DataLayout.h" 54 #include "llvm/IR/DerivedTypes.h" 55 #include "llvm/IR/Dominators.h" 56 #include "llvm/IR/Function.h" 57 #include "llvm/IR/GlobalVariable.h" 58 #include "llvm/IR/InstVisitor.h" 59 #include "llvm/IR/InstrTypes.h" 60 #include "llvm/IR/Instruction.h" 61 #include "llvm/IR/Instructions.h" 62 #include "llvm/IR/IntrinsicInst.h" 63 #include "llvm/IR/LegacyPassManager.h" 64 #include "llvm/IR/Module.h" 65 #include "llvm/IR/PassManager.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Value.h" 68 #include "llvm/InitializePasses.h" 69 #include "llvm/Pass.h" 70 #include "llvm/Support/Casting.h" 71 #include "llvm/Support/KnownBits.h" 72 #include "llvm/Support/raw_ostream.h" 73 #include <cassert> 74 #include <cstdint> 75 #include <iterator> 76 #include <string> 77 78 using namespace llvm; 79 80 namespace { 81 namespace MemRef { 82 static const unsigned Read = 1; 83 static const unsigned Write = 2; 84 static const unsigned Callee = 4; 85 static const unsigned Branchee = 8; 86 } // end namespace MemRef 87 88 class Lint : public InstVisitor<Lint> { 89 friend class InstVisitor<Lint>; 90 91 void visitFunction(Function &F); 92 93 void visitCallBase(CallBase &CB); 94 void visitMemoryReference(Instruction &I, const MemoryLocation &Loc, 95 MaybeAlign Alignment, Type *Ty, unsigned Flags); 96 void visitEHBeginCatch(IntrinsicInst *II); 97 void visitEHEndCatch(IntrinsicInst *II); 98 99 void visitReturnInst(ReturnInst &I); 100 void visitLoadInst(LoadInst &I); 101 void visitStoreInst(StoreInst &I); 102 void visitXor(BinaryOperator &I); 103 void visitSub(BinaryOperator &I); 104 void visitLShr(BinaryOperator &I); 105 void visitAShr(BinaryOperator &I); 106 void visitShl(BinaryOperator &I); 107 void visitSDiv(BinaryOperator &I); 108 void visitUDiv(BinaryOperator &I); 109 void visitSRem(BinaryOperator &I); 110 void visitURem(BinaryOperator &I); 111 void visitAllocaInst(AllocaInst &I); 112 void visitVAArgInst(VAArgInst &I); 113 void visitIndirectBrInst(IndirectBrInst &I); 114 void visitExtractElementInst(ExtractElementInst &I); 115 void visitInsertElementInst(InsertElementInst &I); 116 void visitUnreachableInst(UnreachableInst &I); 117 118 Value *findValue(Value *V, bool OffsetOk) const; 119 Value *findValueImpl(Value *V, bool OffsetOk, 120 SmallPtrSetImpl<Value *> &Visited) const; 121 122 public: 123 Module *Mod; 124 const DataLayout *DL; 125 AliasAnalysis *AA; 126 AssumptionCache *AC; 127 DominatorTree *DT; 128 TargetLibraryInfo *TLI; 129 130 std::string Messages; 131 raw_string_ostream MessagesStr; 132 133 Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA, 134 AssumptionCache *AC, DominatorTree *DT, TargetLibraryInfo *TLI) 135 : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI), 136 MessagesStr(Messages) {} 137 138 void WriteValues(ArrayRef<const Value *> Vs) { 139 for (const Value *V : Vs) { 140 if (!V) 141 continue; 142 if (isa<Instruction>(V)) { 143 MessagesStr << *V << '\n'; 144 } else { 145 V->printAsOperand(MessagesStr, true, Mod); 146 MessagesStr << '\n'; 147 } 148 } 149 } 150 151 /// A check failed, so printout out the condition and the message. 152 /// 153 /// This provides a nice place to put a breakpoint if you want to see why 154 /// something is not correct. 155 void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } 156 157 /// A check failed (with values to print). 158 /// 159 /// This calls the Message-only version so that the above is easier to set 160 /// a breakpoint on. 161 template <typename T1, typename... Ts> 162 void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) { 163 CheckFailed(Message); 164 WriteValues({V1, Vs...}); 165 } 166 }; 167 } // end anonymous namespace 168 169 // Check - We know that cond should be true, if not print an error message. 170 #define Check(C, ...) \ 171 do { \ 172 if (!(C)) { \ 173 CheckFailed(__VA_ARGS__); \ 174 return; \ 175 } \ 176 } while (false) 177 178 void Lint::visitFunction(Function &F) { 179 // This isn't undefined behavior, it's just a little unusual, and it's a 180 // fairly common mistake to neglect to name a function. 181 Check(F.hasName() || F.hasLocalLinkage(), 182 "Unusual: Unnamed function with non-local linkage", &F); 183 184 // TODO: Check for irreducible control flow. 185 } 186 187 void Lint::visitCallBase(CallBase &I) { 188 Value *Callee = I.getCalledOperand(); 189 190 visitMemoryReference(I, MemoryLocation::getAfter(Callee), std::nullopt, 191 nullptr, MemRef::Callee); 192 193 if (Function *F = dyn_cast<Function>(findValue(Callee, 194 /*OffsetOk=*/false))) { 195 Check(I.getCallingConv() == F->getCallingConv(), 196 "Undefined behavior: Caller and callee calling convention differ", 197 &I); 198 199 FunctionType *FT = F->getFunctionType(); 200 unsigned NumActualArgs = I.arg_size(); 201 202 Check(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs 203 : FT->getNumParams() == NumActualArgs, 204 "Undefined behavior: Call argument count mismatches callee " 205 "argument count", 206 &I); 207 208 Check(FT->getReturnType() == I.getType(), 209 "Undefined behavior: Call return type mismatches " 210 "callee return type", 211 &I); 212 213 // Check argument types (in case the callee was casted) and attributes. 214 // TODO: Verify that caller and callee attributes are compatible. 215 Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); 216 auto AI = I.arg_begin(), AE = I.arg_end(); 217 for (; AI != AE; ++AI) { 218 Value *Actual = *AI; 219 if (PI != PE) { 220 Argument *Formal = &*PI++; 221 Check(Formal->getType() == Actual->getType(), 222 "Undefined behavior: Call argument type mismatches " 223 "callee parameter type", 224 &I); 225 226 // Check that noalias arguments don't alias other arguments. This is 227 // not fully precise because we don't know the sizes of the dereferenced 228 // memory regions. 229 if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) { 230 AttributeList PAL = I.getAttributes(); 231 unsigned ArgNo = 0; 232 for (auto *BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) { 233 // Skip ByVal arguments since they will be memcpy'd to the callee's 234 // stack so we're not really passing the pointer anyway. 235 if (PAL.hasParamAttr(ArgNo, Attribute::ByVal)) 236 continue; 237 // If both arguments are readonly, they have no dependence. 238 if (Formal->onlyReadsMemory() && I.onlyReadsMemory(ArgNo)) 239 continue; 240 if (AI != BI && (*BI)->getType()->isPointerTy()) { 241 AliasResult Result = AA->alias(*AI, *BI); 242 Check(Result != AliasResult::MustAlias && 243 Result != AliasResult::PartialAlias, 244 "Unusual: noalias argument aliases another argument", &I); 245 } 246 } 247 } 248 249 // Check that an sret argument points to valid memory. 250 if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { 251 Type *Ty = Formal->getParamStructRetType(); 252 MemoryLocation Loc( 253 Actual, LocationSize::precise(DL->getTypeStoreSize(Ty))); 254 visitMemoryReference(I, Loc, DL->getABITypeAlign(Ty), Ty, 255 MemRef::Read | MemRef::Write); 256 } 257 } 258 } 259 } 260 261 if (const auto *CI = dyn_cast<CallInst>(&I)) { 262 if (CI->isTailCall()) { 263 const AttributeList &PAL = CI->getAttributes(); 264 unsigned ArgNo = 0; 265 for (Value *Arg : I.args()) { 266 // Skip ByVal arguments since they will be memcpy'd to the callee's 267 // stack anyway. 268 if (PAL.hasParamAttr(ArgNo++, Attribute::ByVal)) 269 continue; 270 Value *Obj = findValue(Arg, /*OffsetOk=*/true); 271 Check(!isa<AllocaInst>(Obj), 272 "Undefined behavior: Call with \"tail\" keyword references " 273 "alloca", 274 &I); 275 } 276 } 277 } 278 279 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) 280 switch (II->getIntrinsicID()) { 281 default: 282 break; 283 284 // TODO: Check more intrinsics 285 286 case Intrinsic::memcpy: { 287 MemCpyInst *MCI = cast<MemCpyInst>(&I); 288 visitMemoryReference(I, MemoryLocation::getForDest(MCI), 289 MCI->getDestAlign(), nullptr, MemRef::Write); 290 visitMemoryReference(I, MemoryLocation::getForSource(MCI), 291 MCI->getSourceAlign(), nullptr, MemRef::Read); 292 293 // Check that the memcpy arguments don't overlap. The AliasAnalysis API 294 // isn't expressive enough for what we really want to do. Known partial 295 // overlap is not distinguished from the case where nothing is known. 296 auto Size = LocationSize::afterPointer(); 297 if (const ConstantInt *Len = 298 dyn_cast<ConstantInt>(findValue(MCI->getLength(), 299 /*OffsetOk=*/false))) 300 if (Len->getValue().isIntN(32)) 301 Size = LocationSize::precise(Len->getValue().getZExtValue()); 302 Check(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != 303 AliasResult::MustAlias, 304 "Undefined behavior: memcpy source and destination overlap", &I); 305 break; 306 } 307 case Intrinsic::memcpy_inline: { 308 MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I); 309 const uint64_t Size = MCII->getLength()->getValue().getLimitedValue(); 310 visitMemoryReference(I, MemoryLocation::getForDest(MCII), 311 MCII->getDestAlign(), nullptr, MemRef::Write); 312 visitMemoryReference(I, MemoryLocation::getForSource(MCII), 313 MCII->getSourceAlign(), nullptr, MemRef::Read); 314 315 // Check that the memcpy arguments don't overlap. The AliasAnalysis API 316 // isn't expressive enough for what we really want to do. Known partial 317 // overlap is not distinguished from the case where nothing is known. 318 const LocationSize LS = LocationSize::precise(Size); 319 Check(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != 320 AliasResult::MustAlias, 321 "Undefined behavior: memcpy source and destination overlap", &I); 322 break; 323 } 324 case Intrinsic::memmove: { 325 MemMoveInst *MMI = cast<MemMoveInst>(&I); 326 visitMemoryReference(I, MemoryLocation::getForDest(MMI), 327 MMI->getDestAlign(), nullptr, MemRef::Write); 328 visitMemoryReference(I, MemoryLocation::getForSource(MMI), 329 MMI->getSourceAlign(), nullptr, MemRef::Read); 330 break; 331 } 332 case Intrinsic::memset: { 333 MemSetInst *MSI = cast<MemSetInst>(&I); 334 visitMemoryReference(I, MemoryLocation::getForDest(MSI), 335 MSI->getDestAlign(), nullptr, MemRef::Write); 336 break; 337 } 338 case Intrinsic::memset_inline: { 339 MemSetInlineInst *MSII = cast<MemSetInlineInst>(&I); 340 visitMemoryReference(I, MemoryLocation::getForDest(MSII), 341 MSII->getDestAlign(), nullptr, MemRef::Write); 342 break; 343 } 344 345 case Intrinsic::vastart: 346 Check(I.getParent()->getParent()->isVarArg(), 347 "Undefined behavior: va_start called in a non-varargs function", 348 &I); 349 350 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 351 std::nullopt, nullptr, MemRef::Read | MemRef::Write); 352 break; 353 case Intrinsic::vacopy: 354 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 355 std::nullopt, nullptr, MemRef::Write); 356 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 1, TLI), 357 std::nullopt, nullptr, MemRef::Read); 358 break; 359 case Intrinsic::vaend: 360 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 361 std::nullopt, nullptr, MemRef::Read | MemRef::Write); 362 break; 363 364 case Intrinsic::stackrestore: 365 // Stackrestore doesn't read or write memory, but it sets the 366 // stack pointer, which the compiler may read from or write to 367 // at any time, so check it for both readability and writeability. 368 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 369 std::nullopt, nullptr, MemRef::Read | MemRef::Write); 370 break; 371 case Intrinsic::get_active_lane_mask: 372 if (auto *TripCount = dyn_cast<ConstantInt>(I.getArgOperand(1))) 373 Check(!TripCount->isZero(), 374 "get_active_lane_mask: operand #2 " 375 "must be greater than 0", 376 &I); 377 break; 378 } 379 } 380 381 void Lint::visitReturnInst(ReturnInst &I) { 382 Function *F = I.getParent()->getParent(); 383 Check(!F->doesNotReturn(), 384 "Unusual: Return statement in function with noreturn attribute", &I); 385 386 if (Value *V = I.getReturnValue()) { 387 Value *Obj = findValue(V, /*OffsetOk=*/true); 388 Check(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I); 389 } 390 } 391 392 // TODO: Check that the reference is in bounds. 393 // TODO: Check readnone/readonly function attributes. 394 void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc, 395 MaybeAlign Align, Type *Ty, unsigned Flags) { 396 // If no memory is being referenced, it doesn't matter if the pointer 397 // is valid. 398 if (Loc.Size.isZero()) 399 return; 400 401 Value *Ptr = const_cast<Value *>(Loc.Ptr); 402 Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); 403 Check(!isa<ConstantPointerNull>(UnderlyingObject), 404 "Undefined behavior: Null pointer dereference", &I); 405 Check(!isa<UndefValue>(UnderlyingObject), 406 "Undefined behavior: Undef pointer dereference", &I); 407 Check(!isa<ConstantInt>(UnderlyingObject) || 408 !cast<ConstantInt>(UnderlyingObject)->isMinusOne(), 409 "Unusual: All-ones pointer dereference", &I); 410 Check(!isa<ConstantInt>(UnderlyingObject) || 411 !cast<ConstantInt>(UnderlyingObject)->isOne(), 412 "Unusual: Address one pointer dereference", &I); 413 414 if (Flags & MemRef::Write) { 415 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) 416 Check(!GV->isConstant(), "Undefined behavior: Write to read-only memory", 417 &I); 418 Check(!isa<Function>(UnderlyingObject) && 419 !isa<BlockAddress>(UnderlyingObject), 420 "Undefined behavior: Write to text section", &I); 421 } 422 if (Flags & MemRef::Read) { 423 Check(!isa<Function>(UnderlyingObject), "Unusual: Load from function body", 424 &I); 425 Check(!isa<BlockAddress>(UnderlyingObject), 426 "Undefined behavior: Load from block address", &I); 427 } 428 if (Flags & MemRef::Callee) { 429 Check(!isa<BlockAddress>(UnderlyingObject), 430 "Undefined behavior: Call to block address", &I); 431 } 432 if (Flags & MemRef::Branchee) { 433 Check(!isa<Constant>(UnderlyingObject) || 434 isa<BlockAddress>(UnderlyingObject), 435 "Undefined behavior: Branch to non-blockaddress", &I); 436 } 437 438 // Check for buffer overflows and misalignment. 439 // Only handles memory references that read/write something simple like an 440 // alloca instruction or a global variable. 441 int64_t Offset = 0; 442 if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) { 443 // OK, so the access is to a constant offset from Ptr. Check that Ptr is 444 // something we can handle and if so extract the size of this base object 445 // along with its alignment. 446 uint64_t BaseSize = MemoryLocation::UnknownSize; 447 MaybeAlign BaseAlign; 448 449 if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { 450 Type *ATy = AI->getAllocatedType(); 451 if (!AI->isArrayAllocation() && ATy->isSized()) 452 BaseSize = DL->getTypeAllocSize(ATy); 453 BaseAlign = AI->getAlign(); 454 } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { 455 // If the global may be defined differently in another compilation unit 456 // then don't warn about funky memory accesses. 457 if (GV->hasDefinitiveInitializer()) { 458 Type *GTy = GV->getValueType(); 459 if (GTy->isSized()) 460 BaseSize = DL->getTypeAllocSize(GTy); 461 BaseAlign = GV->getAlign(); 462 if (!BaseAlign && GTy->isSized()) 463 BaseAlign = DL->getABITypeAlign(GTy); 464 } 465 } 466 467 // Accesses from before the start or after the end of the object are not 468 // defined. 469 Check(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize || 470 (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize), 471 "Undefined behavior: Buffer overflow", &I); 472 473 // Accesses that say that the memory is more aligned than it is are not 474 // defined. 475 if (!Align && Ty && Ty->isSized()) 476 Align = DL->getABITypeAlign(Ty); 477 if (BaseAlign && Align) 478 Check(*Align <= commonAlignment(*BaseAlign, Offset), 479 "Undefined behavior: Memory reference address is misaligned", &I); 480 } 481 } 482 483 void Lint::visitLoadInst(LoadInst &I) { 484 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), I.getType(), 485 MemRef::Read); 486 } 487 488 void Lint::visitStoreInst(StoreInst &I) { 489 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), 490 I.getOperand(0)->getType(), MemRef::Write); 491 } 492 493 void Lint::visitXor(BinaryOperator &I) { 494 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 495 "Undefined result: xor(undef, undef)", &I); 496 } 497 498 void Lint::visitSub(BinaryOperator &I) { 499 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 500 "Undefined result: sub(undef, undef)", &I); 501 } 502 503 void Lint::visitLShr(BinaryOperator &I) { 504 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1), 505 /*OffsetOk=*/false))) 506 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 507 "Undefined result: Shift count out of range", &I); 508 } 509 510 void Lint::visitAShr(BinaryOperator &I) { 511 if (ConstantInt *CI = 512 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 513 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 514 "Undefined result: Shift count out of range", &I); 515 } 516 517 void Lint::visitShl(BinaryOperator &I) { 518 if (ConstantInt *CI = 519 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 520 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 521 "Undefined result: Shift count out of range", &I); 522 } 523 524 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, 525 AssumptionCache *AC) { 526 // Assume undef could be zero. 527 if (isa<UndefValue>(V)) 528 return true; 529 530 VectorType *VecTy = dyn_cast<VectorType>(V->getType()); 531 if (!VecTy) { 532 KnownBits Known = 533 computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT); 534 return Known.isZero(); 535 } 536 537 // Per-component check doesn't work with zeroinitializer 538 Constant *C = dyn_cast<Constant>(V); 539 if (!C) 540 return false; 541 542 if (C->isZeroValue()) 543 return true; 544 545 // For a vector, KnownZero will only be true if all values are zero, so check 546 // this per component 547 for (unsigned I = 0, N = cast<FixedVectorType>(VecTy)->getNumElements(); 548 I != N; ++I) { 549 Constant *Elem = C->getAggregateElement(I); 550 if (isa<UndefValue>(Elem)) 551 return true; 552 553 KnownBits Known = computeKnownBits(Elem, DL); 554 if (Known.isZero()) 555 return true; 556 } 557 558 return false; 559 } 560 561 void Lint::visitSDiv(BinaryOperator &I) { 562 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 563 "Undefined behavior: Division by zero", &I); 564 } 565 566 void Lint::visitUDiv(BinaryOperator &I) { 567 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 568 "Undefined behavior: Division by zero", &I); 569 } 570 571 void Lint::visitSRem(BinaryOperator &I) { 572 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 573 "Undefined behavior: Division by zero", &I); 574 } 575 576 void Lint::visitURem(BinaryOperator &I) { 577 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 578 "Undefined behavior: Division by zero", &I); 579 } 580 581 void Lint::visitAllocaInst(AllocaInst &I) { 582 if (isa<ConstantInt>(I.getArraySize())) 583 // This isn't undefined behavior, it's just an obvious pessimization. 584 Check(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), 585 "Pessimization: Static alloca outside of entry block", &I); 586 587 // TODO: Check for an unusual size (MSB set?) 588 } 589 590 void Lint::visitVAArgInst(VAArgInst &I) { 591 visitMemoryReference(I, MemoryLocation::get(&I), std::nullopt, nullptr, 592 MemRef::Read | MemRef::Write); 593 } 594 595 void Lint::visitIndirectBrInst(IndirectBrInst &I) { 596 visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()), 597 std::nullopt, nullptr, MemRef::Branchee); 598 599 Check(I.getNumDestinations() != 0, 600 "Undefined behavior: indirectbr with no destinations", &I); 601 } 602 603 void Lint::visitExtractElementInst(ExtractElementInst &I) { 604 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), 605 /*OffsetOk=*/false))) 606 Check( 607 CI->getValue().ult( 608 cast<FixedVectorType>(I.getVectorOperandType())->getNumElements()), 609 "Undefined result: extractelement index out of range", &I); 610 } 611 612 void Lint::visitInsertElementInst(InsertElementInst &I) { 613 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2), 614 /*OffsetOk=*/false))) 615 Check(CI->getValue().ult( 616 cast<FixedVectorType>(I.getType())->getNumElements()), 617 "Undefined result: insertelement index out of range", &I); 618 } 619 620 void Lint::visitUnreachableInst(UnreachableInst &I) { 621 // This isn't undefined behavior, it's merely suspicious. 622 Check(&I == &I.getParent()->front() || 623 std::prev(I.getIterator())->mayHaveSideEffects(), 624 "Unusual: unreachable immediately preceded by instruction without " 625 "side effects", 626 &I); 627 } 628 629 /// findValue - Look through bitcasts and simple memory reference patterns 630 /// to identify an equivalent, but more informative, value. If OffsetOk 631 /// is true, look through getelementptrs with non-zero offsets too. 632 /// 633 /// Most analysis passes don't require this logic, because instcombine 634 /// will simplify most of these kinds of things away. But it's a goal of 635 /// this Lint pass to be useful even on non-optimized IR. 636 Value *Lint::findValue(Value *V, bool OffsetOk) const { 637 SmallPtrSet<Value *, 4> Visited; 638 return findValueImpl(V, OffsetOk, Visited); 639 } 640 641 /// findValueImpl - Implementation helper for findValue. 642 Value *Lint::findValueImpl(Value *V, bool OffsetOk, 643 SmallPtrSetImpl<Value *> &Visited) const { 644 // Detect self-referential values. 645 if (!Visited.insert(V).second) 646 return UndefValue::get(V->getType()); 647 648 // TODO: Look through sext or zext cast, when the result is known to 649 // be interpreted as signed or unsigned, respectively. 650 // TODO: Look through eliminable cast pairs. 651 // TODO: Look through calls with unique return values. 652 // TODO: Look through vector insert/extract/shuffle. 653 V = OffsetOk ? getUnderlyingObject(V) : V->stripPointerCasts(); 654 if (LoadInst *L = dyn_cast<LoadInst>(V)) { 655 BasicBlock::iterator BBI = L->getIterator(); 656 BasicBlock *BB = L->getParent(); 657 SmallPtrSet<BasicBlock *, 4> VisitedBlocks; 658 for (;;) { 659 if (!VisitedBlocks.insert(BB).second) 660 break; 661 if (Value *U = 662 FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) 663 return findValueImpl(U, OffsetOk, Visited); 664 if (BBI != BB->begin()) 665 break; 666 BB = BB->getUniquePredecessor(); 667 if (!BB) 668 break; 669 BBI = BB->end(); 670 } 671 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 672 if (Value *W = PN->hasConstantValue()) 673 return findValueImpl(W, OffsetOk, Visited); 674 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 675 if (CI->isNoopCast(*DL)) 676 return findValueImpl(CI->getOperand(0), OffsetOk, Visited); 677 } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { 678 if (Value *W = 679 FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) 680 if (W != V) 681 return findValueImpl(W, OffsetOk, Visited); 682 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 683 // Same as above, but for ConstantExpr instead of Instruction. 684 if (Instruction::isCast(CE->getOpcode())) { 685 if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), 686 CE->getOperand(0)->getType(), CE->getType(), 687 *DL)) 688 return findValueImpl(CE->getOperand(0), OffsetOk, Visited); 689 } 690 } 691 692 // As a last resort, try SimplifyInstruction or constant folding. 693 if (Instruction *Inst = dyn_cast<Instruction>(V)) { 694 if (Value *W = simplifyInstruction(Inst, {*DL, TLI, DT, AC})) 695 return findValueImpl(W, OffsetOk, Visited); 696 } else if (auto *C = dyn_cast<Constant>(V)) { 697 Value *W = ConstantFoldConstant(C, *DL, TLI); 698 if (W != V) 699 return findValueImpl(W, OffsetOk, Visited); 700 } 701 702 return V; 703 } 704 705 PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) { 706 auto *Mod = F.getParent(); 707 auto *DL = &F.getParent()->getDataLayout(); 708 auto *AA = &AM.getResult<AAManager>(F); 709 auto *AC = &AM.getResult<AssumptionAnalysis>(F); 710 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); 711 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F); 712 Lint L(Mod, DL, AA, AC, DT, TLI); 713 L.visit(F); 714 dbgs() << L.MessagesStr.str(); 715 return PreservedAnalyses::all(); 716 } 717 718 namespace { 719 class LintLegacyPass : public FunctionPass { 720 public: 721 static char ID; // Pass identification, replacement for typeid 722 LintLegacyPass() : FunctionPass(ID) { 723 initializeLintLegacyPassPass(*PassRegistry::getPassRegistry()); 724 } 725 726 bool runOnFunction(Function &F) override; 727 728 void getAnalysisUsage(AnalysisUsage &AU) const override { 729 AU.setPreservesAll(); 730 AU.addRequired<AAResultsWrapperPass>(); 731 AU.addRequired<AssumptionCacheTracker>(); 732 AU.addRequired<TargetLibraryInfoWrapperPass>(); 733 AU.addRequired<DominatorTreeWrapperPass>(); 734 } 735 void print(raw_ostream &O, const Module *M) const override {} 736 }; 737 } // namespace 738 739 char LintLegacyPass::ID = 0; 740 INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", 741 false, true) 742 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 743 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 744 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 745 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 746 INITIALIZE_PASS_END(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", 747 false, true) 748 749 bool LintLegacyPass::runOnFunction(Function &F) { 750 auto *Mod = F.getParent(); 751 auto *DL = &F.getParent()->getDataLayout(); 752 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 753 auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 754 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 755 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 756 Lint L(Mod, DL, AA, AC, DT, TLI); 757 L.visit(F); 758 dbgs() << L.MessagesStr.str(); 759 return false; 760 } 761 762 //===----------------------------------------------------------------------===// 763 // Implement the public interfaces to this file... 764 //===----------------------------------------------------------------------===// 765 766 FunctionPass *llvm::createLintLegacyPassPass() { return new LintLegacyPass(); } 767 768 /// lintFunction - Check a function for errors, printing messages on stderr. 769 /// 770 void llvm::lintFunction(const Function &f) { 771 Function &F = const_cast<Function &>(f); 772 assert(!F.isDeclaration() && "Cannot lint external functions"); 773 774 legacy::FunctionPassManager FPM(F.getParent()); 775 auto *V = new LintLegacyPass(); 776 FPM.add(V); 777 FPM.run(F); 778 } 779 780 /// lintModule - Check a module for errors, printing messages on stderr. 781 /// 782 void llvm::lintModule(const Module &M) { 783 legacy::PassManager PM; 784 auto *V = new LintLegacyPass(); 785 PM.add(V); 786 PM.run(const_cast<Module &>(M)); 787 } 788