1 //===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass statically checks for common and easily-identified constructs 10 // which produce undefined or likely unintended behavior in LLVM IR. 11 // 12 // It is not a guarantee of correctness, in two ways. First, it isn't 13 // comprehensive. There are checks which could be done statically which are 14 // not yet implemented. Some of these are indicated by TODO comments, but 15 // those aren't comprehensive either. Second, many conditions cannot be 16 // checked statically. This pass does no dynamic instrumentation, so it 17 // can't check for all possible problems. 18 // 19 // Another limitation is that it assumes all code will be executed. A store 20 // through a null pointer in a basic block which is never reached is harmless, 21 // but this pass will warn about it anyway. This is the main reason why most 22 // of these checks live here instead of in the Verifier pass. 23 // 24 // Optimization passes may make conditions that this pass checks for more or 25 // less obvious. If an optimization pass appears to be introducing a warning, 26 // it may be that the optimization pass is merely exposing an existing 27 // condition in the code. 28 // 29 // This code may be run before instcombine. In many cases, instcombine checks 30 // for the same kinds of things and turns instructions with undefined behavior 31 // into unreachable (or equivalent). Because of this, this pass makes some 32 // effort to look through bitcasts and so on. 33 // 34 //===----------------------------------------------------------------------===// 35 36 #include "llvm/Analysis/Lint.h" 37 #include "llvm/ADT/APInt.h" 38 #include "llvm/ADT/ArrayRef.h" 39 #include "llvm/ADT/SmallPtrSet.h" 40 #include "llvm/ADT/Twine.h" 41 #include "llvm/Analysis/AliasAnalysis.h" 42 #include "llvm/Analysis/AssumptionCache.h" 43 #include "llvm/Analysis/BasicAliasAnalysis.h" 44 #include "llvm/Analysis/ConstantFolding.h" 45 #include "llvm/Analysis/InstructionSimplify.h" 46 #include "llvm/Analysis/Loads.h" 47 #include "llvm/Analysis/MemoryLocation.h" 48 #include "llvm/Analysis/ScopedNoAliasAA.h" 49 #include "llvm/Analysis/TargetLibraryInfo.h" 50 #include "llvm/Analysis/TypeBasedAliasAnalysis.h" 51 #include "llvm/Analysis/ValueTracking.h" 52 #include "llvm/IR/Argument.h" 53 #include "llvm/IR/BasicBlock.h" 54 #include "llvm/IR/Constant.h" 55 #include "llvm/IR/Constants.h" 56 #include "llvm/IR/DataLayout.h" 57 #include "llvm/IR/DerivedTypes.h" 58 #include "llvm/IR/Dominators.h" 59 #include "llvm/IR/Function.h" 60 #include "llvm/IR/GlobalVariable.h" 61 #include "llvm/IR/InstVisitor.h" 62 #include "llvm/IR/InstrTypes.h" 63 #include "llvm/IR/Instruction.h" 64 #include "llvm/IR/Instructions.h" 65 #include "llvm/IR/IntrinsicInst.h" 66 #include "llvm/IR/Module.h" 67 #include "llvm/IR/PassManager.h" 68 #include "llvm/IR/Type.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/Support/AMDGPUAddrSpace.h" 71 #include "llvm/Support/Casting.h" 72 #include "llvm/Support/KnownBits.h" 73 #include "llvm/Support/raw_ostream.h" 74 #include <cassert> 75 #include <cstdint> 76 #include <iterator> 77 #include <string> 78 79 using namespace llvm; 80 81 namespace { 82 namespace MemRef { 83 static const unsigned Read = 1; 84 static const unsigned Write = 2; 85 static const unsigned Callee = 4; 86 static const unsigned Branchee = 8; 87 } // end namespace MemRef 88 89 class Lint : public InstVisitor<Lint> { 90 friend class InstVisitor<Lint>; 91 92 void visitFunction(Function &F); 93 94 void visitCallBase(CallBase &CB); 95 void visitMemoryReference(Instruction &I, const MemoryLocation &Loc, 96 MaybeAlign Alignment, Type *Ty, unsigned Flags); 97 98 void visitReturnInst(ReturnInst &I); 99 void visitLoadInst(LoadInst &I); 100 void visitStoreInst(StoreInst &I); 101 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I); 102 void visitAtomicRMWInst(AtomicRMWInst &I); 103 void visitXor(BinaryOperator &I); 104 void visitSub(BinaryOperator &I); 105 void visitLShr(BinaryOperator &I); 106 void visitAShr(BinaryOperator &I); 107 void visitShl(BinaryOperator &I); 108 void visitSDiv(BinaryOperator &I); 109 void visitUDiv(BinaryOperator &I); 110 void visitSRem(BinaryOperator &I); 111 void visitURem(BinaryOperator &I); 112 void visitAllocaInst(AllocaInst &I); 113 void visitVAArgInst(VAArgInst &I); 114 void visitIndirectBrInst(IndirectBrInst &I); 115 void visitExtractElementInst(ExtractElementInst &I); 116 void visitInsertElementInst(InsertElementInst &I); 117 void visitUnreachableInst(UnreachableInst &I); 118 119 Value *findValue(Value *V, bool OffsetOk) const; 120 Value *findValueImpl(Value *V, bool OffsetOk, 121 SmallPtrSetImpl<Value *> &Visited) const; 122 123 public: 124 Module *Mod; 125 const Triple &TT; 126 const DataLayout *DL; 127 AliasAnalysis *AA; 128 AssumptionCache *AC; 129 DominatorTree *DT; 130 TargetLibraryInfo *TLI; 131 132 std::string Messages; 133 raw_string_ostream MessagesStr; 134 135 Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA, 136 AssumptionCache *AC, DominatorTree *DT, TargetLibraryInfo *TLI) 137 : Mod(Mod), TT(Mod->getTargetTriple()), DL(DL), AA(AA), AC(AC), DT(DT), 138 TLI(TLI), MessagesStr(Messages) {} 139 140 void WriteValues(ArrayRef<const Value *> Vs) { 141 for (const Value *V : Vs) { 142 if (!V) 143 continue; 144 if (isa<Instruction>(V)) { 145 MessagesStr << *V << '\n'; 146 } else { 147 V->printAsOperand(MessagesStr, true, Mod); 148 MessagesStr << '\n'; 149 } 150 } 151 } 152 153 /// A check failed, so printout out the condition and the message. 154 /// 155 /// This provides a nice place to put a breakpoint if you want to see why 156 /// something is not correct. 157 void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; } 158 159 /// A check failed (with values to print). 160 /// 161 /// This calls the Message-only version so that the above is easier to set 162 /// a breakpoint on. 163 template <typename T1, typename... Ts> 164 void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) { 165 CheckFailed(Message); 166 WriteValues({V1, Vs...}); 167 } 168 }; 169 } // end anonymous namespace 170 171 // Check - We know that cond should be true, if not print an error message. 172 #define Check(C, ...) \ 173 do { \ 174 if (!(C)) { \ 175 CheckFailed(__VA_ARGS__); \ 176 return; \ 177 } \ 178 } while (false) 179 180 void Lint::visitFunction(Function &F) { 181 // This isn't undefined behavior, it's just a little unusual, and it's a 182 // fairly common mistake to neglect to name a function. 183 Check(F.hasName() || F.hasLocalLinkage(), 184 "Unusual: Unnamed function with non-local linkage", &F); 185 186 // TODO: Check for irreducible control flow. 187 } 188 189 void Lint::visitCallBase(CallBase &I) { 190 Value *Callee = I.getCalledOperand(); 191 192 visitMemoryReference(I, MemoryLocation::getAfter(Callee), std::nullopt, 193 nullptr, MemRef::Callee); 194 195 if (Function *F = dyn_cast<Function>(findValue(Callee, 196 /*OffsetOk=*/false))) { 197 Check(I.getCallingConv() == F->getCallingConv(), 198 "Undefined behavior: Caller and callee calling convention differ", 199 &I); 200 201 FunctionType *FT = F->getFunctionType(); 202 unsigned NumActualArgs = I.arg_size(); 203 204 Check(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs 205 : FT->getNumParams() == NumActualArgs, 206 "Undefined behavior: Call argument count mismatches callee " 207 "argument count", 208 &I); 209 210 Check(FT->getReturnType() == I.getType(), 211 "Undefined behavior: Call return type mismatches " 212 "callee return type", 213 &I); 214 215 // Check argument types (in case the callee was casted) and attributes. 216 // TODO: Verify that caller and callee attributes are compatible. 217 Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); 218 auto AI = I.arg_begin(), AE = I.arg_end(); 219 for (; AI != AE; ++AI) { 220 Value *Actual = *AI; 221 if (PI != PE) { 222 Argument *Formal = &*PI++; 223 Check(Formal->getType() == Actual->getType(), 224 "Undefined behavior: Call argument type mismatches " 225 "callee parameter type", 226 &I); 227 228 // Check that noalias arguments don't alias other arguments. This is 229 // not fully precise because we don't know the sizes of the dereferenced 230 // memory regions. 231 if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) { 232 AttributeList PAL = I.getAttributes(); 233 unsigned ArgNo = 0; 234 for (auto *BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) { 235 // Skip ByVal arguments since they will be memcpy'd to the callee's 236 // stack so we're not really passing the pointer anyway. 237 if (PAL.hasParamAttr(ArgNo, Attribute::ByVal)) 238 continue; 239 // If both arguments are readonly, they have no dependence. 240 if (Formal->onlyReadsMemory() && I.onlyReadsMemory(ArgNo)) 241 continue; 242 // Skip readnone arguments since those are guaranteed not to be 243 // dereferenced anyway. 244 if (I.doesNotAccessMemory(ArgNo)) 245 continue; 246 if (AI != BI && (*BI)->getType()->isPointerTy() && 247 !isa<ConstantPointerNull>(*BI)) { 248 AliasResult Result = AA->alias(*AI, *BI); 249 Check(Result != AliasResult::MustAlias && 250 Result != AliasResult::PartialAlias, 251 "Unusual: noalias argument aliases another argument", &I); 252 } 253 } 254 } 255 256 // Check that an sret argument points to valid memory. 257 if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { 258 Type *Ty = Formal->getParamStructRetType(); 259 MemoryLocation Loc( 260 Actual, LocationSize::precise(DL->getTypeStoreSize(Ty))); 261 visitMemoryReference(I, Loc, DL->getABITypeAlign(Ty), Ty, 262 MemRef::Read | MemRef::Write); 263 } 264 265 // Check that ABI attributes for the function and call-site match. 266 unsigned ArgNo = AI->getOperandNo(); 267 Attribute::AttrKind ABIAttributes[] = { 268 Attribute::ZExt, Attribute::SExt, Attribute::InReg, 269 Attribute::ByVal, Attribute::ByRef, Attribute::InAlloca, 270 Attribute::Preallocated, Attribute::StructRet}; 271 AttributeList CallAttrs = I.getAttributes(); 272 for (Attribute::AttrKind Attr : ABIAttributes) { 273 Attribute CallAttr = CallAttrs.getParamAttr(ArgNo, Attr); 274 Attribute FnAttr = F->getParamAttribute(ArgNo, Attr); 275 Check(CallAttr.isValid() == FnAttr.isValid(), 276 Twine("Undefined behavior: ABI attribute ") + 277 Attribute::getNameFromAttrKind(Attr) + 278 " not present on both function and call-site", 279 &I); 280 if (CallAttr.isValid() && FnAttr.isValid()) { 281 Check(CallAttr == FnAttr, 282 Twine("Undefined behavior: ABI attribute ") + 283 Attribute::getNameFromAttrKind(Attr) + 284 " does not have same argument for function and call-site", 285 &I); 286 } 287 } 288 } 289 } 290 } 291 292 if (const auto *CI = dyn_cast<CallInst>(&I)) { 293 if (CI->isTailCall()) { 294 const AttributeList &PAL = CI->getAttributes(); 295 unsigned ArgNo = 0; 296 for (Value *Arg : I.args()) { 297 // Skip ByVal arguments since they will be memcpy'd to the callee's 298 // stack anyway. 299 if (PAL.hasParamAttr(ArgNo++, Attribute::ByVal)) 300 continue; 301 Value *Obj = findValue(Arg, /*OffsetOk=*/true); 302 Check(!isa<AllocaInst>(Obj), 303 "Undefined behavior: Call with \"tail\" keyword references " 304 "alloca", 305 &I); 306 } 307 } 308 } 309 310 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) 311 switch (II->getIntrinsicID()) { 312 default: 313 break; 314 315 // TODO: Check more intrinsics 316 317 case Intrinsic::memcpy: 318 case Intrinsic::memcpy_inline: { 319 MemCpyInst *MCI = cast<MemCpyInst>(&I); 320 visitMemoryReference(I, MemoryLocation::getForDest(MCI), 321 MCI->getDestAlign(), nullptr, MemRef::Write); 322 visitMemoryReference(I, MemoryLocation::getForSource(MCI), 323 MCI->getSourceAlign(), nullptr, MemRef::Read); 324 325 // Check that the memcpy arguments don't overlap. The AliasAnalysis API 326 // isn't expressive enough for what we really want to do. Known partial 327 // overlap is not distinguished from the case where nothing is known. 328 auto Size = LocationSize::afterPointer(); 329 if (const ConstantInt *Len = 330 dyn_cast<ConstantInt>(findValue(MCI->getLength(), 331 /*OffsetOk=*/false))) 332 if (Len->getValue().isIntN(32)) 333 Size = LocationSize::precise(Len->getValue().getZExtValue()); 334 Check(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != 335 AliasResult::MustAlias, 336 "Undefined behavior: memcpy source and destination overlap", &I); 337 break; 338 } 339 case Intrinsic::memmove: { 340 MemMoveInst *MMI = cast<MemMoveInst>(&I); 341 visitMemoryReference(I, MemoryLocation::getForDest(MMI), 342 MMI->getDestAlign(), nullptr, MemRef::Write); 343 visitMemoryReference(I, MemoryLocation::getForSource(MMI), 344 MMI->getSourceAlign(), nullptr, MemRef::Read); 345 break; 346 } 347 case Intrinsic::memset: 348 case Intrinsic::memset_inline: { 349 MemSetInst *MSI = cast<MemSetInst>(&I); 350 visitMemoryReference(I, MemoryLocation::getForDest(MSI), 351 MSI->getDestAlign(), nullptr, MemRef::Write); 352 break; 353 } 354 case Intrinsic::vastart: 355 // vastart in non-varargs function is rejected by the verifier 356 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 357 std::nullopt, nullptr, MemRef::Read | MemRef::Write); 358 break; 359 case Intrinsic::vacopy: 360 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 361 std::nullopt, nullptr, MemRef::Write); 362 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 1, TLI), 363 std::nullopt, nullptr, MemRef::Read); 364 break; 365 case Intrinsic::vaend: 366 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 367 std::nullopt, nullptr, MemRef::Read | MemRef::Write); 368 break; 369 370 case Intrinsic::stackrestore: 371 // Stackrestore doesn't read or write memory, but it sets the 372 // stack pointer, which the compiler may read from or write to 373 // at any time, so check it for both readability and writeability. 374 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI), 375 std::nullopt, nullptr, MemRef::Read | MemRef::Write); 376 break; 377 case Intrinsic::get_active_lane_mask: 378 if (auto *TripCount = dyn_cast<ConstantInt>(I.getArgOperand(1))) 379 Check(!TripCount->isZero(), 380 "get_active_lane_mask: operand #2 " 381 "must be greater than 0", 382 &I); 383 break; 384 } 385 } 386 387 void Lint::visitReturnInst(ReturnInst &I) { 388 Function *F = I.getParent()->getParent(); 389 Check(!F->doesNotReturn(), 390 "Unusual: Return statement in function with noreturn attribute", &I); 391 392 if (Value *V = I.getReturnValue()) { 393 Value *Obj = findValue(V, /*OffsetOk=*/true); 394 Check(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I); 395 } 396 } 397 398 // TODO: Check that the reference is in bounds. 399 // TODO: Check readnone/readonly function attributes. 400 void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc, 401 MaybeAlign Align, Type *Ty, unsigned Flags) { 402 // If no memory is being referenced, it doesn't matter if the pointer 403 // is valid. 404 if (Loc.Size.isZero()) 405 return; 406 407 Value *Ptr = const_cast<Value *>(Loc.Ptr); 408 Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); 409 Check(!isa<ConstantPointerNull>(UnderlyingObject), 410 "Undefined behavior: Null pointer dereference", &I); 411 Check(!isa<UndefValue>(UnderlyingObject), 412 "Undefined behavior: Undef pointer dereference", &I); 413 Check(!isa<ConstantInt>(UnderlyingObject) || 414 !cast<ConstantInt>(UnderlyingObject)->isMinusOne(), 415 "Unusual: All-ones pointer dereference", &I); 416 Check(!isa<ConstantInt>(UnderlyingObject) || 417 !cast<ConstantInt>(UnderlyingObject)->isOne(), 418 "Unusual: Address one pointer dereference", &I); 419 420 if (Flags & MemRef::Write) { 421 if (TT.isAMDGPU()) 422 Check(!AMDGPU::isConstantAddressSpace( 423 UnderlyingObject->getType()->getPointerAddressSpace()), 424 "Undefined behavior: Write to memory in const addrspace", &I); 425 426 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) 427 Check(!GV->isConstant(), "Undefined behavior: Write to read-only memory", 428 &I); 429 Check(!isa<Function>(UnderlyingObject) && 430 !isa<BlockAddress>(UnderlyingObject), 431 "Undefined behavior: Write to text section", &I); 432 } 433 if (Flags & MemRef::Read) { 434 Check(!isa<Function>(UnderlyingObject), "Unusual: Load from function body", 435 &I); 436 Check(!isa<BlockAddress>(UnderlyingObject), 437 "Undefined behavior: Load from block address", &I); 438 } 439 if (Flags & MemRef::Callee) { 440 Check(!isa<BlockAddress>(UnderlyingObject), 441 "Undefined behavior: Call to block address", &I); 442 } 443 if (Flags & MemRef::Branchee) { 444 Check(!isa<Constant>(UnderlyingObject) || 445 isa<BlockAddress>(UnderlyingObject), 446 "Undefined behavior: Branch to non-blockaddress", &I); 447 } 448 449 // Check for buffer overflows and misalignment. 450 // Only handles memory references that read/write something simple like an 451 // alloca instruction or a global variable. 452 int64_t Offset = 0; 453 if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) { 454 // OK, so the access is to a constant offset from Ptr. Check that Ptr is 455 // something we can handle and if so extract the size of this base object 456 // along with its alignment. 457 uint64_t BaseSize = MemoryLocation::UnknownSize; 458 MaybeAlign BaseAlign; 459 460 if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { 461 Type *ATy = AI->getAllocatedType(); 462 if (!AI->isArrayAllocation() && ATy->isSized() && !ATy->isScalableTy()) 463 BaseSize = DL->getTypeAllocSize(ATy).getFixedValue(); 464 BaseAlign = AI->getAlign(); 465 } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { 466 // If the global may be defined differently in another compilation unit 467 // then don't warn about funky memory accesses. 468 if (GV->hasDefinitiveInitializer()) { 469 Type *GTy = GV->getValueType(); 470 if (GTy->isSized()) 471 BaseSize = DL->getTypeAllocSize(GTy); 472 BaseAlign = GV->getAlign(); 473 if (!BaseAlign && GTy->isSized()) 474 BaseAlign = DL->getABITypeAlign(GTy); 475 } 476 } 477 478 // Accesses from before the start or after the end of the object are not 479 // defined. 480 Check(!Loc.Size.hasValue() || Loc.Size.isScalable() || 481 BaseSize == MemoryLocation::UnknownSize || 482 (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize), 483 "Undefined behavior: Buffer overflow", &I); 484 485 // Accesses that say that the memory is more aligned than it is are not 486 // defined. 487 if (!Align && Ty && Ty->isSized()) 488 Align = DL->getABITypeAlign(Ty); 489 if (BaseAlign && Align) 490 Check(*Align <= commonAlignment(*BaseAlign, Offset), 491 "Undefined behavior: Memory reference address is misaligned", &I); 492 } 493 } 494 495 void Lint::visitLoadInst(LoadInst &I) { 496 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), I.getType(), 497 MemRef::Read); 498 } 499 500 void Lint::visitStoreInst(StoreInst &I) { 501 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), 502 I.getOperand(0)->getType(), MemRef::Write); 503 } 504 505 void Lint::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { 506 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), 507 I.getOperand(0)->getType(), MemRef::Write); 508 } 509 510 void Lint::visitAtomicRMWInst(AtomicRMWInst &I) { 511 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), 512 I.getOperand(0)->getType(), MemRef::Write); 513 } 514 515 void Lint::visitXor(BinaryOperator &I) { 516 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 517 "Undefined result: xor(undef, undef)", &I); 518 } 519 520 void Lint::visitSub(BinaryOperator &I) { 521 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)), 522 "Undefined result: sub(undef, undef)", &I); 523 } 524 525 void Lint::visitLShr(BinaryOperator &I) { 526 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1), 527 /*OffsetOk=*/false))) 528 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 529 "Undefined result: Shift count out of range", &I); 530 } 531 532 void Lint::visitAShr(BinaryOperator &I) { 533 if (ConstantInt *CI = 534 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 535 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 536 "Undefined result: Shift count out of range", &I); 537 } 538 539 void Lint::visitShl(BinaryOperator &I) { 540 if (ConstantInt *CI = 541 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 542 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 543 "Undefined result: Shift count out of range", &I); 544 } 545 546 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, 547 AssumptionCache *AC) { 548 // Assume undef could be zero. 549 if (isa<UndefValue>(V)) 550 return true; 551 552 VectorType *VecTy = dyn_cast<VectorType>(V->getType()); 553 if (!VecTy) { 554 KnownBits Known = computeKnownBits(V, DL, AC, dyn_cast<Instruction>(V), DT); 555 return Known.isZero(); 556 } 557 558 // Per-component check doesn't work with zeroinitializer 559 Constant *C = dyn_cast<Constant>(V); 560 if (!C) 561 return false; 562 563 if (C->isZeroValue()) 564 return true; 565 566 // For a vector, KnownZero will only be true if all values are zero, so check 567 // this per component 568 for (unsigned I = 0, N = cast<FixedVectorType>(VecTy)->getNumElements(); 569 I != N; ++I) { 570 Constant *Elem = C->getAggregateElement(I); 571 if (isa<UndefValue>(Elem)) 572 return true; 573 574 KnownBits Known = computeKnownBits(Elem, DL); 575 if (Known.isZero()) 576 return true; 577 } 578 579 return false; 580 } 581 582 void Lint::visitSDiv(BinaryOperator &I) { 583 Check(!isZero(I.getOperand(1), I.getDataLayout(), DT, AC), 584 "Undefined behavior: Division by zero", &I); 585 } 586 587 void Lint::visitUDiv(BinaryOperator &I) { 588 Check(!isZero(I.getOperand(1), I.getDataLayout(), DT, AC), 589 "Undefined behavior: Division by zero", &I); 590 } 591 592 void Lint::visitSRem(BinaryOperator &I) { 593 Check(!isZero(I.getOperand(1), I.getDataLayout(), DT, AC), 594 "Undefined behavior: Division by zero", &I); 595 } 596 597 void Lint::visitURem(BinaryOperator &I) { 598 Check(!isZero(I.getOperand(1), I.getDataLayout(), DT, AC), 599 "Undefined behavior: Division by zero", &I); 600 } 601 602 void Lint::visitAllocaInst(AllocaInst &I) { 603 if (isa<ConstantInt>(I.getArraySize())) 604 // This isn't undefined behavior, it's just an obvious pessimization. 605 Check(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), 606 "Pessimization: Static alloca outside of entry block", &I); 607 608 // TODO: Check for an unusual size (MSB set?) 609 } 610 611 void Lint::visitVAArgInst(VAArgInst &I) { 612 visitMemoryReference(I, MemoryLocation::get(&I), std::nullopt, nullptr, 613 MemRef::Read | MemRef::Write); 614 } 615 616 void Lint::visitIndirectBrInst(IndirectBrInst &I) { 617 visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()), 618 std::nullopt, nullptr, MemRef::Branchee); 619 620 Check(I.getNumDestinations() != 0, 621 "Undefined behavior: indirectbr with no destinations", &I); 622 } 623 624 void Lint::visitExtractElementInst(ExtractElementInst &I) { 625 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), 626 /*OffsetOk=*/false))) { 627 ElementCount EC = I.getVectorOperandType()->getElementCount(); 628 Check(EC.isScalable() || CI->getValue().ult(EC.getFixedValue()), 629 "Undefined result: extractelement index out of range", &I); 630 } 631 } 632 633 void Lint::visitInsertElementInst(InsertElementInst &I) { 634 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2), 635 /*OffsetOk=*/false))) { 636 ElementCount EC = I.getType()->getElementCount(); 637 Check(EC.isScalable() || CI->getValue().ult(EC.getFixedValue()), 638 "Undefined result: insertelement index out of range", &I); 639 } 640 } 641 642 void Lint::visitUnreachableInst(UnreachableInst &I) { 643 // This isn't undefined behavior, it's merely suspicious. 644 Check(&I == &I.getParent()->front() || 645 std::prev(I.getIterator())->mayHaveSideEffects(), 646 "Unusual: unreachable immediately preceded by instruction without " 647 "side effects", 648 &I); 649 } 650 651 /// findValue - Look through bitcasts and simple memory reference patterns 652 /// to identify an equivalent, but more informative, value. If OffsetOk 653 /// is true, look through getelementptrs with non-zero offsets too. 654 /// 655 /// Most analysis passes don't require this logic, because instcombine 656 /// will simplify most of these kinds of things away. But it's a goal of 657 /// this Lint pass to be useful even on non-optimized IR. 658 Value *Lint::findValue(Value *V, bool OffsetOk) const { 659 SmallPtrSet<Value *, 4> Visited; 660 return findValueImpl(V, OffsetOk, Visited); 661 } 662 663 /// findValueImpl - Implementation helper for findValue. 664 Value *Lint::findValueImpl(Value *V, bool OffsetOk, 665 SmallPtrSetImpl<Value *> &Visited) const { 666 // Detect self-referential values. 667 if (!Visited.insert(V).second) 668 return PoisonValue::get(V->getType()); 669 670 // TODO: Look through sext or zext cast, when the result is known to 671 // be interpreted as signed or unsigned, respectively. 672 // TODO: Look through eliminable cast pairs. 673 // TODO: Look through calls with unique return values. 674 // TODO: Look through vector insert/extract/shuffle. 675 V = OffsetOk ? getUnderlyingObject(V) : V->stripPointerCasts(); 676 if (LoadInst *L = dyn_cast<LoadInst>(V)) { 677 BasicBlock::iterator BBI = L->getIterator(); 678 BasicBlock *BB = L->getParent(); 679 SmallPtrSet<BasicBlock *, 4> VisitedBlocks; 680 BatchAAResults BatchAA(*AA); 681 for (;;) { 682 if (!VisitedBlocks.insert(BB).second) 683 break; 684 if (Value *U = 685 FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, &BatchAA)) 686 return findValueImpl(U, OffsetOk, Visited); 687 if (BBI != BB->begin()) 688 break; 689 BB = BB->getUniquePredecessor(); 690 if (!BB) 691 break; 692 BBI = BB->end(); 693 } 694 } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 695 if (Value *W = PN->hasConstantValue()) 696 return findValueImpl(W, OffsetOk, Visited); 697 } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 698 if (CI->isNoopCast(*DL)) 699 return findValueImpl(CI->getOperand(0), OffsetOk, Visited); 700 } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { 701 if (Value *W = 702 FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices())) 703 if (W != V) 704 return findValueImpl(W, OffsetOk, Visited); 705 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 706 // Same as above, but for ConstantExpr instead of Instruction. 707 if (Instruction::isCast(CE->getOpcode())) { 708 if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), 709 CE->getOperand(0)->getType(), CE->getType(), 710 *DL)) 711 return findValueImpl(CE->getOperand(0), OffsetOk, Visited); 712 } 713 } 714 715 // As a last resort, try SimplifyInstruction or constant folding. 716 if (Instruction *Inst = dyn_cast<Instruction>(V)) { 717 if (Value *W = simplifyInstruction(Inst, {*DL, TLI, DT, AC})) 718 return findValueImpl(W, OffsetOk, Visited); 719 } else if (auto *C = dyn_cast<Constant>(V)) { 720 Value *W = ConstantFoldConstant(C, *DL, TLI); 721 if (W != V) 722 return findValueImpl(W, OffsetOk, Visited); 723 } 724 725 return V; 726 } 727 728 PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) { 729 auto *Mod = F.getParent(); 730 auto *DL = &F.getDataLayout(); 731 auto *AA = &AM.getResult<AAManager>(F); 732 auto *AC = &AM.getResult<AssumptionAnalysis>(F); 733 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); 734 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F); 735 Lint L(Mod, DL, AA, AC, DT, TLI); 736 L.visit(F); 737 dbgs() << L.MessagesStr.str(); 738 if (AbortOnError && !L.MessagesStr.str().empty()) 739 report_fatal_error( 740 "linter found errors, aborting. (enabled by abort-on-error)", false); 741 return PreservedAnalyses::all(); 742 } 743 744 void LintPass::printPipeline( 745 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 746 PassInfoMixin<LintPass>::printPipeline(OS, MapClassName2PassName); 747 if (AbortOnError) 748 OS << "<abort-on-error>"; 749 } 750 751 //===----------------------------------------------------------------------===// 752 // Implement the public interfaces to this file... 753 //===----------------------------------------------------------------------===// 754 755 /// lintFunction - Check a function for errors, printing messages on stderr. 756 /// 757 void llvm::lintFunction(const Function &f, bool AbortOnError) { 758 Function &F = const_cast<Function &>(f); 759 assert(!F.isDeclaration() && "Cannot lint external functions"); 760 761 FunctionAnalysisManager FAM; 762 FAM.registerPass([&] { return TargetLibraryAnalysis(); }); 763 FAM.registerPass([&] { return DominatorTreeAnalysis(); }); 764 FAM.registerPass([&] { return AssumptionAnalysis(); }); 765 FAM.registerPass([&] { 766 AAManager AA; 767 AA.registerFunctionAnalysis<BasicAA>(); 768 AA.registerFunctionAnalysis<ScopedNoAliasAA>(); 769 AA.registerFunctionAnalysis<TypeBasedAA>(); 770 return AA; 771 }); 772 LintPass(AbortOnError).run(F, FAM); 773 } 774 775 /// lintModule - Check a module for errors, printing messages on stderr. 776 /// 777 void llvm::lintModule(const Module &M, bool AbortOnError) { 778 for (const Function &F : M) { 779 if (!F.isDeclaration()) 780 lintFunction(F, AbortOnError); 781 } 782 } 783