1 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass statically checks for common and easily-identified constructs 10 // which produce undefined or likely unintended behavior in LLVM IR. 11 // 12 // It is not a guarantee of correctness, in two ways. First, it isn't 13 // comprehensive. There are checks which could be done statically which are 14 // not yet implemented. Some of these are indicated by TODO comments, but 15 // those aren't comprehensive either. Second, many conditions cannot be 16 // checked statically. This pass does no dynamic instrumentation, so it 17 // can't check for all possible problems. 18 // 19 // Another limitation is that it assumes all code will be executed. A store 20 // through a null pointer in a basic block which is never reached is harmless, 21 // but this pass will warn about it anyway. This is the main reason why most 22 // of these checks live here instead of in the Verifier pass. 23 // 24 // Optimization passes may make conditions that this pass checks for more or 25 // less obvious. If an optimization pass appears to be introducing a warning, 26 // it may be that the optimization pass is merely exposing an existing 27 // condition in the code. 28 // 29 // This code may be run before instcombine. In many cases, instcombine checks 30 // for the same kinds of things and turns instructions with undefined behavior 31 // into unreachable (or equivalent). Because of this, this pass makes some 32 // effort to look through bitcasts and so on. 33 // 34 //===----------------------------------------------------------------------===// 35 36 #include "llvm/Analysis/Lint.h" 37 #include "llvm/ADT/APInt.h" 38 #include "llvm/ADT/ArrayRef.h" 39 #include "llvm/ADT/SmallPtrSet.h" 40 #include "llvm/ADT/Twine.h" 41 #include "llvm/Analysis/AliasAnalysis.h" 42 #include "llvm/Analysis/AssumptionCache.h" 43 #include "llvm/Analysis/BasicAliasAnalysis.h" 44 #include "llvm/Analysis/ConstantFolding.h" 45 #include "llvm/Analysis/InstructionSimplify.h" 46 #include "llvm/Analysis/Loads.h" 47 #include "llvm/Analysis/MemoryLocation.h" 48 #include "llvm/Analysis/ScopedNoAliasAA.h" 49 #include "llvm/Analysis/TargetLibraryInfo.h" 50 #include "llvm/Analysis/TypeBasedAliasAnalysis.h" 51 #include "llvm/Analysis/ValueTracking.h" 52 #include "llvm/IR/Argument.h" 53 #include "llvm/IR/BasicBlock.h" 54 #include "llvm/IR/Constant.h" 55 #include "llvm/IR/Constants.h" 56 #include "llvm/IR/DataLayout.h" 57 #include "llvm/IR/DerivedTypes.h" 58 #include "llvm/IR/Dominators.h" 59 #include "llvm/IR/Function.h" 60 #include "llvm/IR/GlobalVariable.h" 61 #include "llvm/IR/InstVisitor.h" 62 #include "llvm/IR/InstrTypes.h" 63 #include "llvm/IR/Instruction.h" 64 #include "llvm/IR/Instructions.h" 65 #include "llvm/IR/IntrinsicInst.h" 66 #include "llvm/IR/Module.h" 67 #include "llvm/IR/PassManager.h" 68 #include "llvm/IR/Type.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/Support/Casting.h" 71 #include "llvm/Support/KnownBits.h" 72 #include "llvm/Support/raw_ostream.h" 73 #include <cassert> 74 #include <cstdint> 75 #include <iterator> 76 #include <string> 77 78 using namespace llvm; 79 80 namespace { 81 namespace MemRef { 82 static const unsigned Read = 1; 83 static const unsigned Write = 2; 84 static const unsigned Callee = 4; 85 static const unsigned Branchee = 8; 86 } // end namespace MemRef 87 88 class Lint : public InstVisitor<Lint> { 89 friend class InstVisitor<Lint>; 90 91 void visitFunction(Function &F); 92 93 void visitCallBase(CallBase &CB); 94 void visitMemoryReference(Instruction &I, const MemoryLocation &Loc, 95 MaybeAlign Alignment, Type *Ty, unsigned Flags); 96 97 void visitReturnInst(ReturnInst &I); 98 void visitLoadInst(LoadInst &I); 99 void visitStoreInst(StoreInst &I); 100 void visitXor(BinaryOperator &I); 101 void visitSub(BinaryOperator &I); 102 void visitLShr(BinaryOperator &I); 103 void visitAShr(BinaryOperator &I); 104 void visitShl(BinaryOperator &I); 105 void visitSDiv(BinaryOperator &I); 106 void visitUDiv(BinaryOperator &I); 107 void visitSRem(BinaryOperator &I); 108 void visitURem(BinaryOperator &I); 109 void visitAllocaInst(AllocaInst &I); 110 void visitVAArgInst(VAArgInst &I); 111 void visitIndirectBrInst(IndirectBrInst &I); 112 void visitExtractElementInst(ExtractElementInst &I); 113 void visitInsertElementInst(InsertElementInst &I); 114 void visitUnreachableInst(UnreachableInst &I); 115 116 Value *findValue(Value *V, bool OffsetOk) const; 117 Value *findValueImpl(Value *V, bool OffsetOk, 118 SmallPtrSetImpl<Value *> &Visited) const; 119 120 public: 121 Module *Mod; 122 const DataLayout *DL; 123 AliasAnalysis *AA; 124 AssumptionCache *AC; 125 DominatorTree *DT; 126 TargetLibraryInfo *TLI; 127 128 std::string Messages; 129 raw_string_ostream MessagesStr; 130 131 Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA, 132 AssumptionCache *AC, DominatorTree *DT, TargetLibraryInfo *TLI) 133 : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI), 134 MessagesStr(Messages) {} 135 136 void WriteValues(ArrayRef<const Value *> Vs) { 137 for (const Value *V : Vs) { 138 if (!V) 139 continue; 140 if (isa<Instruction>(V)) { 141 MessagesStr << *V << '\n'; 142 } else { 143 V->printAsOperand(MessagesStr, true, Mod); 144 MessagesStr << '\n'; 145 } 146 } 147 } 148 149 /// A check failed, so printout out the condition and the message. 150 /// 151 /// This provides a nice place to put a breakpoint if you want to see why 152 /// something is not correct. 153 void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } 154 155 /// A check failed (with values to print). 156 /// 157 /// This calls the Message-only version so that the above is easier to set 158 /// a breakpoint on. 159 template <typename T1, typename... Ts> 160 void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) { 161 CheckFailed(Message); 162 WriteValues({V1, Vs...}); 163 } 164 }; 165 } // end anonymous namespace 166 167 // Check - We know that cond should be true, if not print an error message. 168 #define Check(C, ...) \ 169 do { \ 170 if (!(C)) { \ 171 CheckFailed(__VA_ARGS__); \ 172 return; \ 173 } \ 174 } while (false) 175 176 void Lint::visitFunction(Function &F) { 177 // This isn't undefined behavior, it's just a little unusual, and it's a 178 // fairly common mistake to neglect to name a function. 179 Check(F.hasName() || F.hasLocalLinkage(), 180 "Unusual: Unnamed function with non-local linkage", &F); 181 182 // TODO: Check for irreducible control flow. 183 } 184 185 void Lint::visitCallBase(CallBase &I) { 186 Value *Callee = I.getCalledOperand(); 187 188 visitMemoryReference(I, MemoryLocation::getAfter(Callee), std::nullopt, 189 nullptr, MemRef::Callee); 190 191 if (Function *F = dyn_cast<Function>(findValue(Callee, 192 /*OffsetOk=*/false))) { 193 Check(I.getCallingConv() == F->getCallingConv(), 194 "Undefined behavior: Caller and callee calling convention differ", 195 &I); 196 197 FunctionType *FT = F->getFunctionType(); 198 unsigned NumActualArgs = I.arg_size(); 199 200 Check(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs 201 : FT->getNumParams() == NumActualArgs, 202 "Undefined behavior: Call argument count mismatches callee " 203 "argument count", 204 &I); 205 206 Check(FT->getReturnType() == I.getType(), 207 "Undefined behavior: Call return type mismatches " 208 "callee return type", 209 &I); 210 211 // Check argument types (in case the callee was casted) and attributes. 212 // TODO: Verify that caller and callee attributes are compatible. 213 Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); 214 auto AI = I.arg_begin(), AE = I.arg_end(); 215 for (; AI != AE; ++AI) { 216 Value *Actual = *AI; 217 if (PI != PE) { 218 Argument *Formal = &*PI++; 219 Check(Formal->getType() == Actual->getType(), 220 "Undefined behavior: Call argument type mismatches " 221 "callee parameter type", 222 &I); 223 224 // Check that noalias arguments don't alias other arguments. This is 225 // not fully precise because we don't know the sizes of the dereferenced 226 // memory regions. 227 if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) { 228 AttributeList PAL = I.getAttributes(); 229 unsigned ArgNo = 0; 230 for (auto *BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) { 231 // Skip ByVal arguments since they will be memcpy'd to the callee's 232 // stack so we're not really passing the pointer anyway. 233 if (PAL.hasParamAttr(ArgNo, Attribute::ByVal)) 234 continue; 235 // If both arguments are readonly, they have no dependence. 236 if (Formal->onlyReadsMemory() && I.onlyReadsMemory(ArgNo)) 237 continue; 238 if (AI != BI && (*BI)->getType()->isPointerTy()) { 239 AliasResult Result = AA->alias(*AI, *BI); 240 Check(Result != AliasResult::MustAlias && 241 Result != AliasResult::PartialAlias, 242 "Unusual: noalias argument aliases another argument", &I); 243 } 244 } 245 } 246 247 // Check that an sret argument points to valid memory. 248 if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { 249 Type *Ty = Formal->getParamStructRetType(); 250 MemoryLocation Loc( 251 Actual, LocationSize::precise(DL->getTypeStoreSize(Ty))); 252 visitMemoryReference(I, Loc, DL->getABITypeAlign(Ty), Ty, 253 MemRef::Read | MemRef::Write); 254 } 255 } 256 } 257 } 258 259 if (const auto *CI = dyn_cast<CallInst>(&I)) { 260 if (CI->isTailCall()) { 261 const AttributeList &PAL = CI->getAttributes(); 262 unsigned ArgNo = 0; 263 for (Value *Arg : I.args()) { 264 // Skip ByVal arguments since they will be memcpy'd to the callee's 265 // stack anyway. 266 if (PAL.hasParamAttr(ArgNo++, Attribute::ByVal)) 267 continue; 268 Value *Obj = findValue(Arg, /*OffsetOk=*/true); 269 Check(!isa<AllocaInst>(Obj), 270 "Undefined behavior: Call with \"tail\" keyword references " 271 "alloca", 272 &I); 273 } 274 } 275 } 276 277 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) 278 switch (II->getIntrinsicID()) { 279 default: 280 break; 281 282 // TODO: Check more intrinsics 283 284 case Intrinsic::memcpy: { 285 MemCpyInst *MCI = cast<MemCpyInst>(&I); 286 visitMemoryReference(I, MemoryLocation::getForDest(MCI), 287 MCI->getDestAlign(), nullptr, MemRef::Write); 288 visitMemoryReference(I, MemoryLocation::getForSource(MCI), 289 MCI->getSourceAlign(), nullptr, MemRef::Read); 290 291 // Check that the memcpy arguments don't overlap. The AliasAnalysis API 292 // isn't expressive enough for what we really want to do. Known partial 293 // overlap is not distinguished from the case where nothing is known. 294 auto Size = LocationSize::afterPointer(); 295 if (const ConstantInt *Len = 296 dyn_cast<ConstantInt>(findValue(MCI->getLength(), 297 /*OffsetOk=*/false))) 298 if (Len->getValue().isIntN(32)) 299 Size = LocationSize::precise(Len->getValue().getZExtValue()); 300 Check(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != 301 AliasResult::MustAlias, 302 "Undefined behavior: memcpy source and destination overlap", &I); 303 break; 304 } 305 case Intrinsic::memcpy_inline: { 306 MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I); 307 const uint64_t Size = MCII->getLength()->getValue().getLimitedValue(); 308 visitMemoryReference(I, MemoryLocation::getForDest(MCII), 309 MCII->getDestAlign(), nullptr, MemRef::Write); 310 visitMemoryReference(I, MemoryLocation::getForSource(MCII), 311 MCII->getSourceAlign(), nullptr, MemRef::Read); 312 313 // Check that the memcpy arguments don't overlap. The AliasAnalysis API 314 // isn't expressive enough for what we really want to do. Known partial 315 // overlap is not distinguished from the case where nothing is known. 316 const LocationSize LS = LocationSize::precise(Size); 317 Check(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != 318 AliasResult::MustAlias, 319 "Undefined behavior: memcpy source and destination overlap", &I); 320 break; 321 } 322 case Intrinsic::memmove: { 323 MemMoveInst *MMI = cast<MemMoveInst>(&I); 324 visitMemoryReference(I, MemoryLocation::getForDest(MMI), 325 MMI->getDestAlign(), nullptr, MemRef::Write); 326 visitMemoryReference(I, MemoryLocation::getForSource(MMI), 327 MMI->getSourceAlign(), nullptr, MemRef::Read); 328 break; 329 } 330 case Intrinsic::memset: { 331 MemSetInst *MSI = cast<MemSetInst>(&I); 332 visitMemoryReference(I, MemoryLocation::getForDest(MSI), 333 MSI->getDestAlign(), nullptr, MemRef::Write); 334 break; 335 } 336 case Intrinsic::memset_inline: { 337 MemSetInlineInst *MSII = cast<MemSetInlineInst>(&I); 338 visitMemoryReference(I, MemoryLocation::getForDest(MSII), 339 MSII->getDestAlign(), nullptr, MemRef::Write); 340 break; 341 } 342 343 case Intrinsic::vastart: 344 Check(I.getParent()->getParent()->isVarArg(), 345 "Undefined behavior: va_start called in a non-varargs function", 346 &I); 347 348 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 349 std::nullopt, nullptr, MemRef::Read | MemRef::Write); 350 break; 351 case Intrinsic::vacopy: 352 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 353 std::nullopt, nullptr, MemRef::Write); 354 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 1, TLI), 355 std::nullopt, nullptr, MemRef::Read); 356 break; 357 case Intrinsic::vaend: 358 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 359 std::nullopt, nullptr, MemRef::Read | MemRef::Write); 360 break; 361 362 case Intrinsic::stackrestore: 363 // Stackrestore doesn't read or write memory, but it sets the 364 // stack pointer, which the compiler may read from or write to 365 // at any time, so check it for both readability and writeability. 366 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 367 std::nullopt, nullptr, MemRef::Read | MemRef::Write); 368 break; 369 case Intrinsic::get_active_lane_mask: 370 if (auto *TripCount = dyn_cast<ConstantInt>(I.getArgOperand(1))) 371 Check(!TripCount->isZero(), 372 "get_active_lane_mask: operand #2 " 373 "must be greater than 0", 374 &I); 375 break; 376 } 377 } 378 379 void Lint::visitReturnInst(ReturnInst &I) { 380 Function *F = I.getParent()->getParent(); 381 Check(!F->doesNotReturn(), 382 "Unusual: Return statement in function with noreturn attribute", &I); 383 384 if (Value *V = I.getReturnValue()) { 385 Value *Obj = findValue(V, /*OffsetOk=*/true); 386 Check(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I); 387 } 388 } 389 390 // TODO: Check that the reference is in bounds. 391 // TODO: Check readnone/readonly function attributes. 392 void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc, 393 MaybeAlign Align, Type *Ty, unsigned Flags) { 394 // If no memory is being referenced, it doesn't matter if the pointer 395 // is valid. 396 if (Loc.Size.isZero()) 397 return; 398 399 Value *Ptr = const_cast<Value *>(Loc.Ptr); 400 Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); 401 Check(!isa<ConstantPointerNull>(UnderlyingObject), 402 "Undefined behavior: Null pointer dereference", &I); 403 Check(!isa<UndefValue>(UnderlyingObject), 404 "Undefined behavior: Undef pointer dereference", &I); 405 Check(!isa<ConstantInt>(UnderlyingObject) || 406 !cast<ConstantInt>(UnderlyingObject)->isMinusOne(), 407 "Unusual: All-ones pointer dereference", &I); 408 Check(!isa<ConstantInt>(UnderlyingObject) || 409 !cast<ConstantInt>(UnderlyingObject)->isOne(), 410 "Unusual: Address one pointer dereference", &I); 411 412 if (Flags & MemRef::Write) { 413 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) 414 Check(!GV->isConstant(), "Undefined behavior: Write to read-only memory", 415 &I); 416 Check(!isa<Function>(UnderlyingObject) && 417 !isa<BlockAddress>(UnderlyingObject), 418 "Undefined behavior: Write to text section", &I); 419 } 420 if (Flags & MemRef::Read) { 421 Check(!isa<Function>(UnderlyingObject), "Unusual: Load from function body", 422 &I); 423 Check(!isa<BlockAddress>(UnderlyingObject), 424 "Undefined behavior: Load from block address", &I); 425 } 426 if (Flags & MemRef::Callee) { 427 Check(!isa<BlockAddress>(UnderlyingObject), 428 "Undefined behavior: Call to block address", &I); 429 } 430 if (Flags & MemRef::Branchee) { 431 Check(!isa<Constant>(UnderlyingObject) || 432 isa<BlockAddress>(UnderlyingObject), 433 "Undefined behavior: Branch to non-blockaddress", &I); 434 } 435 436 // Check for buffer overflows and misalignment. 437 // Only handles memory references that read/write something simple like an 438 // alloca instruction or a global variable. 439 int64_t Offset = 0; 440 if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) { 441 // OK, so the access is to a constant offset from Ptr. Check that Ptr is 442 // something we can handle and if so extract the size of this base object 443 // along with its alignment. 444 uint64_t BaseSize = MemoryLocation::UnknownSize; 445 MaybeAlign BaseAlign; 446 447 if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { 448 Type *ATy = AI->getAllocatedType(); 449 if (!AI->isArrayAllocation() && ATy->isSized()) 450 BaseSize = DL->getTypeAllocSize(ATy); 451 BaseAlign = AI->getAlign(); 452 } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { 453 // If the global may be defined differently in another compilation unit 454 // then don't warn about funky memory accesses. 455 if (GV->hasDefinitiveInitializer()) { 456 Type *GTy = GV->getValueType(); 457 if (GTy->isSized()) 458 BaseSize = DL->getTypeAllocSize(GTy); 459 BaseAlign = GV->getAlign(); 460 if (!BaseAlign && GTy->isSized()) 461 BaseAlign = DL->getABITypeAlign(GTy); 462 } 463 } 464 465 // Accesses from before the start or after the end of the object are not 466 // defined. 467 Check(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize || 468 (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize), 469 "Undefined behavior: Buffer overflow", &I); 470 471 // Accesses that say that the memory is more aligned than it is are not 472 // defined. 473 if (!Align && Ty && Ty->isSized()) 474 Align = DL->getABITypeAlign(Ty); 475 if (BaseAlign && Align) 476 Check(*Align <= commonAlignment(*BaseAlign, Offset), 477 "Undefined behavior: Memory reference address is misaligned", &I); 478 } 479 } 480 481 void Lint::visitLoadInst(LoadInst &I) { 482 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), I.getType(), 483 MemRef::Read); 484 } 485 486 void Lint::visitStoreInst(StoreInst &I) { 487 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), 488 I.getOperand(0)->getType(), MemRef::Write); 489 } 490 491 void Lint::visitXor(BinaryOperator &I) { 492 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 493 "Undefined result: xor(undef, undef)", &I); 494 } 495 496 void Lint::visitSub(BinaryOperator &I) { 497 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 498 "Undefined result: sub(undef, undef)", &I); 499 } 500 501 void Lint::visitLShr(BinaryOperator &I) { 502 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1), 503 /*OffsetOk=*/false))) 504 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 505 "Undefined result: Shift count out of range", &I); 506 } 507 508 void Lint::visitAShr(BinaryOperator &I) { 509 if (ConstantInt *CI = 510 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 511 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 512 "Undefined result: Shift count out of range", &I); 513 } 514 515 void Lint::visitShl(BinaryOperator &I) { 516 if (ConstantInt *CI = 517 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 518 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 519 "Undefined result: Shift count out of range", &I); 520 } 521 522 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, 523 AssumptionCache *AC) { 524 // Assume undef could be zero. 525 if (isa<UndefValue>(V)) 526 return true; 527 528 VectorType *VecTy = dyn_cast<VectorType>(V->getType()); 529 if (!VecTy) { 530 KnownBits Known = 531 computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT); 532 return Known.isZero(); 533 } 534 535 // Per-component check doesn't work with zeroinitializer 536 Constant *C = dyn_cast<Constant>(V); 537 if (!C) 538 return false; 539 540 if (C->isZeroValue()) 541 return true; 542 543 // For a vector, KnownZero will only be true if all values are zero, so check 544 // this per component 545 for (unsigned I = 0, N = cast<FixedVectorType>(VecTy)->getNumElements(); 546 I != N; ++I) { 547 Constant *Elem = C->getAggregateElement(I); 548 if (isa<UndefValue>(Elem)) 549 return true; 550 551 KnownBits Known = computeKnownBits(Elem, DL); 552 if (Known.isZero()) 553 return true; 554 } 555 556 return false; 557 } 558 559 void Lint::visitSDiv(BinaryOperator &I) { 560 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 561 "Undefined behavior: Division by zero", &I); 562 } 563 564 void Lint::visitUDiv(BinaryOperator &I) { 565 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 566 "Undefined behavior: Division by zero", &I); 567 } 568 569 void Lint::visitSRem(BinaryOperator &I) { 570 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 571 "Undefined behavior: Division by zero", &I); 572 } 573 574 void Lint::visitURem(BinaryOperator &I) { 575 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC), 576 "Undefined behavior: Division by zero", &I); 577 } 578 579 void Lint::visitAllocaInst(AllocaInst &I) { 580 if (isa<ConstantInt>(I.getArraySize())) 581 // This isn't undefined behavior, it's just an obvious pessimization. 582 Check(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), 583 "Pessimization: Static alloca outside of entry block", &I); 584 585 // TODO: Check for an unusual size (MSB set?) 586 } 587 588 void Lint::visitVAArgInst(VAArgInst &I) { 589 visitMemoryReference(I, MemoryLocation::get(&I), std::nullopt, nullptr, 590 MemRef::Read | MemRef::Write); 591 } 592 593 void Lint::visitIndirectBrInst(IndirectBrInst &I) { 594 visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()), 595 std::nullopt, nullptr, MemRef::Branchee); 596 597 Check(I.getNumDestinations() != 0, 598 "Undefined behavior: indirectbr with no destinations", &I); 599 } 600 601 void Lint::visitExtractElementInst(ExtractElementInst &I) { 602 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), 603 /*OffsetOk=*/false))) 604 Check( 605 CI->getValue().ult( 606 cast<FixedVectorType>(I.getVectorOperandType())->getNumElements()), 607 "Undefined result: extractelement index out of range", &I); 608 } 609 610 void Lint::visitInsertElementInst(InsertElementInst &I) { 611 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2), 612 /*OffsetOk=*/false))) 613 Check(CI->getValue().ult( 614 cast<FixedVectorType>(I.getType())->getNumElements()), 615 "Undefined result: insertelement index out of range", &I); 616 } 617 618 void Lint::visitUnreachableInst(UnreachableInst &I) { 619 // This isn't undefined behavior, it's merely suspicious. 620 Check(&I == &I.getParent()->front() || 621 std::prev(I.getIterator())->mayHaveSideEffects(), 622 "Unusual: unreachable immediately preceded by instruction without " 623 "side effects", 624 &I); 625 } 626 627 /// findValue - Look through bitcasts and simple memory reference patterns 628 /// to identify an equivalent, but more informative, value. If OffsetOk 629 /// is true, look through getelementptrs with non-zero offsets too. 630 /// 631 /// Most analysis passes don't require this logic, because instcombine 632 /// will simplify most of these kinds of things away. But it's a goal of 633 /// this Lint pass to be useful even on non-optimized IR. 634 Value *Lint::findValue(Value *V, bool OffsetOk) const { 635 SmallPtrSet<Value *, 4> Visited; 636 return findValueImpl(V, OffsetOk, Visited); 637 } 638 639 /// findValueImpl - Implementation helper for findValue. 640 Value *Lint::findValueImpl(Value *V, bool OffsetOk, 641 SmallPtrSetImpl<Value *> &Visited) const { 642 // Detect self-referential values. 643 if (!Visited.insert(V).second) 644 return UndefValue::get(V->getType()); 645 646 // TODO: Look through sext or zext cast, when the result is known to 647 // be interpreted as signed or unsigned, respectively. 648 // TODO: Look through eliminable cast pairs. 649 // TODO: Look through calls with unique return values. 650 // TODO: Look through vector insert/extract/shuffle. 651 V = OffsetOk ? getUnderlyingObject(V) : V->stripPointerCasts(); 652 if (LoadInst *L = dyn_cast<LoadInst>(V)) { 653 BasicBlock::iterator BBI = L->getIterator(); 654 BasicBlock *BB = L->getParent(); 655 SmallPtrSet<BasicBlock *, 4> VisitedBlocks; 656 for (;;) { 657 if (!VisitedBlocks.insert(BB).second) 658 break; 659 if (Value *U = 660 FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, AA)) 661 return findValueImpl(U, OffsetOk, Visited); 662 if (BBI != BB->begin()) 663 break; 664 BB = BB->getUniquePredecessor(); 665 if (!BB) 666 break; 667 BBI = BB->end(); 668 } 669 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 670 if (Value *W = PN->hasConstantValue()) 671 return findValueImpl(W, OffsetOk, Visited); 672 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 673 if (CI->isNoopCast(*DL)) 674 return findValueImpl(CI->getOperand(0), OffsetOk, Visited); 675 } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { 676 if (Value *W = 677 FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) 678 if (W != V) 679 return findValueImpl(W, OffsetOk, Visited); 680 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 681 // Same as above, but for ConstantExpr instead of Instruction. 682 if (Instruction::isCast(CE->getOpcode())) { 683 if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), 684 CE->getOperand(0)->getType(), CE->getType(), 685 *DL)) 686 return findValueImpl(CE->getOperand(0), OffsetOk, Visited); 687 } 688 } 689 690 // As a last resort, try SimplifyInstruction or constant folding. 691 if (Instruction *Inst = dyn_cast<Instruction>(V)) { 692 if (Value *W = simplifyInstruction(Inst, {*DL, TLI, DT, AC})) 693 return findValueImpl(W, OffsetOk, Visited); 694 } else if (auto *C = dyn_cast<Constant>(V)) { 695 Value *W = ConstantFoldConstant(C, *DL, TLI); 696 if (W != V) 697 return findValueImpl(W, OffsetOk, Visited); 698 } 699 700 return V; 701 } 702 703 PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) { 704 auto *Mod = F.getParent(); 705 auto *DL = &F.getParent()->getDataLayout(); 706 auto *AA = &AM.getResult<AAManager>(F); 707 auto *AC = &AM.getResult<AssumptionAnalysis>(F); 708 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); 709 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F); 710 Lint L(Mod, DL, AA, AC, DT, TLI); 711 L.visit(F); 712 dbgs() << L.MessagesStr.str(); 713 return PreservedAnalyses::all(); 714 } 715 716 //===----------------------------------------------------------------------===// 717 // Implement the public interfaces to this file... 718 //===----------------------------------------------------------------------===// 719 720 /// lintFunction - Check a function for errors, printing messages on stderr. 721 /// 722 void llvm::lintFunction(const Function &f) { 723 Function &F = const_cast<Function &>(f); 724 assert(!F.isDeclaration() && "Cannot lint external functions"); 725 726 FunctionAnalysisManager FAM; 727 FAM.registerPass([&] { return TargetLibraryAnalysis(); }); 728 FAM.registerPass([&] { return DominatorTreeAnalysis(); }); 729 FAM.registerPass([&] { return AssumptionAnalysis(); }); 730 FAM.registerPass([&] { 731 AAManager AA; 732 AA.registerFunctionAnalysis<BasicAA>(); 733 AA.registerFunctionAnalysis<ScopedNoAliasAA>(); 734 AA.registerFunctionAnalysis<TypeBasedAA>(); 735 return AA; 736 }); 737 LintPass().run(F, FAM); 738 } 739 740 /// lintModule - Check a module for errors, printing messages on stderr. 741 /// 742 void llvm::lintModule(const Module &M) { 743 for (const Function &F : M) { 744 if (!F.isDeclaration()) 745 lintFunction(F); 746 } 747 } 748