1 //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines ArrayBoundCheckerV2, which is a path-sensitive check 10 // which looks for an out-of-bound array element access. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/AST/CharUnits.h" 15 #include "clang/AST/ParentMapContext.h" 16 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 17 #include "clang/StaticAnalyzer/Checkers/Taint.h" 18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 19 #include "clang/StaticAnalyzer/Core/Checker.h" 20 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" 24 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/Support/FormatVariadic.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <optional> 29 30 using namespace clang; 31 using namespace ento; 32 using namespace taint; 33 using llvm::formatv; 34 35 namespace { 36 /// If `E` is a "clean" array subscript expression, return the type of the 37 /// accessed element. If the base of the subscript expression is modified by 38 /// pointer arithmetic (and not the beginning of a "full" memory region), this 39 /// always returns nullopt because that's the right (or the least bad) thing to 40 /// do for the diagnostic output that's relying on this. 41 static std::optional<QualType> determineElementType(const Expr *E, 42 const CheckerContext &C) { 43 const auto *ASE = dyn_cast<ArraySubscriptExpr>(E); 44 if (!ASE) 45 return std::nullopt; 46 47 const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion(); 48 if (!SubscriptBaseReg) 49 return std::nullopt; 50 51 // The base of the subscript expression is affected by pointer arithmetics, 52 // so we want to report byte offsets instead of indices. 53 if (isa<ElementRegion>(SubscriptBaseReg->StripCasts())) 54 return std::nullopt; 55 56 return ASE->getType(); 57 } 58 59 static std::optional<int64_t> 60 determineElementSize(const std::optional<QualType> T, const CheckerContext &C) { 61 if (!T) 62 return std::nullopt; 63 return C.getASTContext().getTypeSizeInChars(*T).getQuantity(); 64 } 65 66 class StateUpdateReporter { 67 const SubRegion *Reg; 68 const NonLoc ByteOffsetVal; 69 const std::optional<QualType> ElementType; 70 const std::optional<int64_t> ElementSize; 71 bool AssumedNonNegative = false; 72 std::optional<NonLoc> AssumedUpperBound = std::nullopt; 73 74 public: 75 StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E, 76 CheckerContext &C) 77 : Reg(R), ByteOffsetVal(ByteOffsVal), 78 ElementType(determineElementType(E, C)), 79 ElementSize(determineElementSize(ElementType, C)) {} 80 81 void recordNonNegativeAssumption() { AssumedNonNegative = true; } 82 void recordUpperBoundAssumption(NonLoc UpperBoundVal) { 83 AssumedUpperBound = UpperBoundVal; 84 } 85 86 bool assumedNonNegative() { return AssumedNonNegative; } 87 88 const NoteTag *createNoteTag(CheckerContext &C) const; 89 90 private: 91 std::string getMessage(PathSensitiveBugReport &BR) const; 92 93 /// Return true if information about the value of `Sym` can put constraints 94 /// on some symbol which is interesting within the bug report `BR`. 95 /// In particular, this returns true when `Sym` is interesting within `BR`; 96 /// but it also returns true if `Sym` is an expression that contains integer 97 /// constants and a single symbolic operand which is interesting (in `BR`). 98 /// We need to use this instead of plain `BR.isInteresting()` because if we 99 /// are analyzing code like 100 /// int array[10]; 101 /// int f(int arg) { 102 /// return array[arg] && array[arg + 10]; 103 /// } 104 /// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not 105 /// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough 106 /// to detect this out of bounds access). 107 static bool providesInformationAboutInteresting(SymbolRef Sym, 108 PathSensitiveBugReport &BR); 109 static bool providesInformationAboutInteresting(SVal SV, 110 PathSensitiveBugReport &BR) { 111 return providesInformationAboutInteresting(SV.getAsSymbol(), BR); 112 } 113 }; 114 115 struct Messages { 116 std::string Short, Full; 117 }; 118 119 // NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt` 120 // instead of `PreStmt` because the current implementation passes the whole 121 // expression to `CheckerContext::getSVal()` which only works after the 122 // symbolic evaluation of the expression. (To turn them into `PreStmt` 123 // callbacks, we'd need to duplicate the logic that evaluates these 124 // expressions.) The `MemberExpr` callback would work as `PreStmt` but it's 125 // defined as `PostStmt` for the sake of consistency with the other callbacks. 126 class ArrayBoundCheckerV2 : public Checker<check::PostStmt<ArraySubscriptExpr>, 127 check::PostStmt<UnaryOperator>, 128 check::PostStmt<MemberExpr>> { 129 BugType BT{this, "Out-of-bound access"}; 130 BugType TaintBT{this, "Out-of-bound access", categories::TaintedData}; 131 132 void performCheck(const Expr *E, CheckerContext &C) const; 133 134 void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs, 135 NonLoc Offset, std::optional<NonLoc> Extent, 136 bool IsTaintBug = false) const; 137 138 static void markPartsInteresting(PathSensitiveBugReport &BR, 139 ProgramStateRef ErrorState, NonLoc Val, 140 bool MarkTaint); 141 142 static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); 143 144 static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State, 145 NonLoc Offset, NonLoc Limit, 146 CheckerContext &C); 147 static bool isInAddressOf(const Stmt *S, ASTContext &AC); 148 149 public: 150 void checkPostStmt(const ArraySubscriptExpr *E, CheckerContext &C) const { 151 performCheck(E, C); 152 } 153 void checkPostStmt(const UnaryOperator *E, CheckerContext &C) const { 154 if (E->getOpcode() == UO_Deref) 155 performCheck(E, C); 156 } 157 void checkPostStmt(const MemberExpr *E, CheckerContext &C) const { 158 if (E->isArrow()) 159 performCheck(E->getBase(), C); 160 } 161 }; 162 163 } // anonymous namespace 164 165 /// For a given Location that can be represented as a symbolic expression 166 /// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block 167 /// Arr and the distance of Location from the beginning of Arr (expressed in a 168 /// NonLoc that specifies the number of CharUnits). Returns nullopt when these 169 /// cannot be determined. 170 static std::optional<std::pair<const SubRegion *, NonLoc>> 171 computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) { 172 QualType T = SVB.getArrayIndexType(); 173 auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) { 174 // We will use this utility to add and multiply values. 175 return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>(); 176 }; 177 178 const SubRegion *OwnerRegion = nullptr; 179 std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex(); 180 181 const ElementRegion *CurRegion = 182 dyn_cast_or_null<ElementRegion>(Location.getAsRegion()); 183 184 while (CurRegion) { 185 const auto Index = CurRegion->getIndex().getAs<NonLoc>(); 186 if (!Index) 187 return std::nullopt; 188 189 QualType ElemType = CurRegion->getElementType(); 190 191 // FIXME: The following early return was presumably added to safeguard the 192 // getTypeSizeInChars() call (which doesn't accept an incomplete type), but 193 // it seems that `ElemType` cannot be incomplete at this point. 194 if (ElemType->isIncompleteType()) 195 return std::nullopt; 196 197 // Calculate Delta = Index * sizeof(ElemType). 198 NonLoc Size = SVB.makeArrayIndex( 199 SVB.getContext().getTypeSizeInChars(ElemType).getQuantity()); 200 auto Delta = EvalBinOp(BO_Mul, *Index, Size); 201 if (!Delta) 202 return std::nullopt; 203 204 // Perform Offset += Delta. 205 Offset = EvalBinOp(BO_Add, *Offset, *Delta); 206 if (!Offset) 207 return std::nullopt; 208 209 OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>(); 210 // When this is just another ElementRegion layer, we need to continue the 211 // offset calculations: 212 CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion); 213 } 214 215 if (OwnerRegion) 216 return std::make_pair(OwnerRegion, *Offset); 217 218 return std::nullopt; 219 } 220 221 // NOTE: This function is the "heart" of this checker. It simplifies 222 // inequalities with transformations that are valid (and very elementary) in 223 // pure mathematics, but become invalid if we use them in C++ number model 224 // where the calculations may overflow. 225 // Due to the overflow issues I think it's impossible (or at least not 226 // practical) to integrate this kind of simplification into the resolution of 227 // arbitrary inequalities (i.e. the code of `evalBinOp`); but this function 228 // produces valid results when the calculations are handling memory offsets 229 // and every value is well below SIZE_MAX. 230 // TODO: This algorithm should be moved to a central location where it's 231 // available for other checkers that need to compare memory offsets. 232 // NOTE: the simplification preserves the order of the two operands in a 233 // mathematical sense, but it may change the result produced by a C++ 234 // comparison operator (and the automatic type conversions). 235 // For example, consider a comparison "X+1 < 0", where the LHS is stored as a 236 // size_t and the RHS is stored in an int. (As size_t is unsigned, this 237 // comparison is false for all values of "X".) However, the simplification may 238 // turn it into "X < -1", which is still always false in a mathematical sense, 239 // but can produce a true result when evaluated by `evalBinOp` (which follows 240 // the rules of C++ and casts -1 to SIZE_MAX). 241 static std::pair<NonLoc, nonloc::ConcreteInt> 242 getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, 243 SValBuilder &svalBuilder) { 244 std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>(); 245 if (SymVal && SymVal->isExpression()) { 246 if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) { 247 llvm::APSInt constant = 248 APSIntType(extent.getValue()).convert(SIE->getRHS()); 249 switch (SIE->getOpcode()) { 250 case BO_Mul: 251 // The constant should never be 0 here, becasue multiplication by zero 252 // is simplified by the engine. 253 if ((extent.getValue() % constant) != 0) 254 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 255 else 256 return getSimplifiedOffsets( 257 nonloc::SymbolVal(SIE->getLHS()), 258 svalBuilder.makeIntVal(extent.getValue() / constant), 259 svalBuilder); 260 case BO_Add: 261 return getSimplifiedOffsets( 262 nonloc::SymbolVal(SIE->getLHS()), 263 svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder); 264 default: 265 break; 266 } 267 } 268 } 269 270 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 271 } 272 273 static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value) { 274 const llvm::APSInt *MaxV = SVB.getMaxValue(State, Value); 275 return MaxV && MaxV->isNegative(); 276 } 277 278 static bool isUnsigned(SValBuilder &SVB, NonLoc Value) { 279 QualType T = Value.getType(SVB.getContext()); 280 return T->isUnsignedIntegerType(); 281 } 282 283 // Evaluate the comparison Value < Threshold with the help of the custom 284 // simplification algorithm defined for this checker. Return a pair of states, 285 // where the first one corresponds to "value below threshold" and the second 286 // corresponds to "value at or above threshold". Returns {nullptr, nullptr} in 287 // the case when the evaluation fails. 288 // If the optional argument CheckEquality is true, then use BO_EQ instead of 289 // the default BO_LT after consistently applying the same simplification steps. 290 static std::pair<ProgramStateRef, ProgramStateRef> 291 compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, 292 SValBuilder &SVB, bool CheckEquality = false) { 293 if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { 294 std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB); 295 } 296 297 // We want to perform a _mathematical_ comparison between the numbers `Value` 298 // and `Threshold`; but `evalBinOpNN` evaluates a C/C++ operator that may 299 // perform automatic conversions. For example the number -1 is less than the 300 // number 1000, but -1 < `1000ull` will evaluate to `false` because the `int` 301 // -1 is converted to ULONGLONG_MAX. 302 // To avoid automatic conversions, we evaluate the "obvious" cases without 303 // calling `evalBinOpNN`: 304 if (isNegative(SVB, State, Value) && isUnsigned(SVB, Threshold)) { 305 if (CheckEquality) { 306 // negative_value == unsigned_threshold is always false 307 return {nullptr, State}; 308 } 309 // negative_value < unsigned_threshold is always true 310 return {State, nullptr}; 311 } 312 if (isUnsigned(SVB, Value) && isNegative(SVB, State, Threshold)) { 313 // unsigned_value == negative_threshold and 314 // unsigned_value < negative_threshold are both always false 315 return {nullptr, State}; 316 } 317 // FIXME: These special cases are sufficient for handling real-world 318 // comparisons, but in theory there could be contrived situations where 319 // automatic conversion of a symbolic value (which can be negative and can be 320 // positive) leads to incorrect results. 321 // NOTE: We NEED to use the `evalBinOpNN` call in the "common" case, because 322 // we want to ensure that assumptions coming from this precondition and 323 // assumptions coming from regular C/C++ operator calls are represented by 324 // constraints on the same symbolic expression. A solution that would 325 // evaluate these "mathematical" compariosns through a separate pathway would 326 // be a step backwards in this sense. 327 328 const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT; 329 auto BelowThreshold = 330 SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType()) 331 .getAs<NonLoc>(); 332 333 if (BelowThreshold) 334 return State->assume(*BelowThreshold); 335 336 return {nullptr, nullptr}; 337 } 338 339 static std::string getRegionName(const SubRegion *Region) { 340 if (std::string RegName = Region->getDescriptiveName(); !RegName.empty()) 341 return RegName; 342 343 // Field regions only have descriptive names when their parent has a 344 // descriptive name; so we provide a fallback representation for them: 345 if (const auto *FR = Region->getAs<FieldRegion>()) { 346 if (StringRef Name = FR->getDecl()->getName(); !Name.empty()) 347 return formatv("the field '{0}'", Name); 348 return "the unnamed field"; 349 } 350 351 if (isa<AllocaRegion>(Region)) 352 return "the memory returned by 'alloca'"; 353 354 if (isa<SymbolicRegion>(Region) && 355 isa<HeapSpaceRegion>(Region->getMemorySpace())) 356 return "the heap area"; 357 358 if (isa<StringRegion>(Region)) 359 return "the string literal"; 360 361 return "the region"; 362 } 363 364 static std::optional<int64_t> getConcreteValue(NonLoc SV) { 365 if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) { 366 return ConcreteVal->getValue().tryExtValue(); 367 } 368 return std::nullopt; 369 } 370 371 static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) { 372 return SV ? getConcreteValue(*SV) : std::nullopt; 373 } 374 375 static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) { 376 std::string RegName = getRegionName(Region), OffsetStr = ""; 377 378 if (auto ConcreteOffset = getConcreteValue(Offset)) 379 OffsetStr = formatv(" {0}", ConcreteOffset); 380 381 return { 382 formatv("Out of bound access to memory preceding {0}", RegName), 383 formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)}; 384 } 385 386 /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if 387 /// it can be performed (`Divisor` is nonzero and there is no remainder). The 388 /// values `Val1` and `Val2` may be nullopt and in that case the corresponding 389 /// division is considered to be successful. 390 static bool tryDividePair(std::optional<int64_t> &Val1, 391 std::optional<int64_t> &Val2, int64_t Divisor) { 392 if (!Divisor) 393 return false; 394 const bool Val1HasRemainder = Val1 && *Val1 % Divisor; 395 const bool Val2HasRemainder = Val2 && *Val2 % Divisor; 396 if (!Val1HasRemainder && !Val2HasRemainder) { 397 if (Val1) 398 *Val1 /= Divisor; 399 if (Val2) 400 *Val2 /= Divisor; 401 return true; 402 } 403 return false; 404 } 405 406 static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region, 407 NonLoc Offset, NonLoc Extent, SVal Location, 408 bool AlsoMentionUnderflow) { 409 std::string RegName = getRegionName(Region); 410 const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>(); 411 assert(EReg && "this checker only handles element access"); 412 QualType ElemType = EReg->getElementType(); 413 414 std::optional<int64_t> OffsetN = getConcreteValue(Offset); 415 std::optional<int64_t> ExtentN = getConcreteValue(Extent); 416 417 int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity(); 418 419 bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize); 420 const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index"; 421 422 SmallString<256> Buf; 423 llvm::raw_svector_ostream Out(Buf); 424 Out << "Access of "; 425 if (!ExtentN && !UseByteOffsets) 426 Out << "'" << ElemType.getAsString() << "' element in "; 427 Out << RegName << " at "; 428 if (AlsoMentionUnderflow) { 429 Out << "a negative or overflowing " << OffsetOrIndex; 430 } else if (OffsetN) { 431 Out << OffsetOrIndex << " " << *OffsetN; 432 } else { 433 Out << "an overflowing " << OffsetOrIndex; 434 } 435 if (ExtentN) { 436 Out << ", while it holds only "; 437 if (*ExtentN != 1) 438 Out << *ExtentN; 439 else 440 Out << "a single"; 441 if (UseByteOffsets) 442 Out << " byte"; 443 else 444 Out << " '" << ElemType.getAsString() << "' element"; 445 446 if (*ExtentN > 1) 447 Out << "s"; 448 } 449 450 return {formatv("Out of bound access to memory {0} {1}", 451 AlsoMentionUnderflow ? "around" : "after the end of", 452 RegName), 453 std::string(Buf)}; 454 } 455 456 static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName, 457 bool AlsoMentionUnderflow) { 458 std::string RegName = getRegionName(Region); 459 return {formatv("Potential out of bound access to {0} with tainted {1}", 460 RegName, OffsetName), 461 formatv("Access of {0} with a tainted {1} that may be {2}too large", 462 RegName, OffsetName, 463 AlsoMentionUnderflow ? "negative or " : "")}; 464 } 465 466 const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const { 467 // Don't create a note tag if we didn't assume anything: 468 if (!AssumedNonNegative && !AssumedUpperBound) 469 return nullptr; 470 471 return C.getNoteTag([*this](PathSensitiveBugReport &BR) -> std::string { 472 return getMessage(BR); 473 }); 474 } 475 476 std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const { 477 bool ShouldReportNonNegative = AssumedNonNegative; 478 if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) { 479 if (AssumedUpperBound && 480 providesInformationAboutInteresting(*AssumedUpperBound, BR)) { 481 // Even if the byte offset isn't interesting (e.g. it's a constant value), 482 // the assumption can still be interesting if it provides information 483 // about an interesting symbolic upper bound. 484 ShouldReportNonNegative = false; 485 } else { 486 // We don't have anything interesting, don't report the assumption. 487 return ""; 488 } 489 } 490 491 std::optional<int64_t> OffsetN = getConcreteValue(ByteOffsetVal); 492 std::optional<int64_t> ExtentN = getConcreteValue(AssumedUpperBound); 493 494 const bool UseIndex = 495 ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize); 496 497 SmallString<256> Buf; 498 llvm::raw_svector_ostream Out(Buf); 499 Out << "Assuming "; 500 if (UseIndex) { 501 Out << "index "; 502 if (OffsetN) 503 Out << "'" << OffsetN << "' "; 504 } else if (AssumedUpperBound) { 505 Out << "byte offset "; 506 if (OffsetN) 507 Out << "'" << OffsetN << "' "; 508 } else { 509 Out << "offset "; 510 } 511 512 Out << "is"; 513 if (ShouldReportNonNegative) { 514 Out << " non-negative"; 515 } 516 if (AssumedUpperBound) { 517 if (ShouldReportNonNegative) 518 Out << " and"; 519 Out << " less than "; 520 if (ExtentN) 521 Out << *ExtentN << ", "; 522 if (UseIndex && ElementType) 523 Out << "the number of '" << ElementType->getAsString() 524 << "' elements in "; 525 else 526 Out << "the extent of "; 527 Out << getRegionName(Reg); 528 } 529 return std::string(Out.str()); 530 } 531 532 bool StateUpdateReporter::providesInformationAboutInteresting( 533 SymbolRef Sym, PathSensitiveBugReport &BR) { 534 if (!Sym) 535 return false; 536 for (SymbolRef PartSym : Sym->symbols()) { 537 // The interestingess mark may appear on any layer as we're stripping off 538 // the SymIntExpr, UnarySymExpr etc. layers... 539 if (BR.isInteresting(PartSym)) 540 return true; 541 // ...but if both sides of the expression are symbolic, then there is no 542 // practical algorithm to produce separate constraints for the two 543 // operands (from the single combined result). 544 if (isa<SymSymExpr>(PartSym)) 545 return false; 546 } 547 return false; 548 } 549 550 void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { 551 const SVal Location = C.getSVal(E); 552 553 // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as 554 // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX) 555 // and incomplete analysis of these leads to false positives. As even 556 // accurate reports would be confusing for the users, just disable reports 557 // from these macros: 558 if (isFromCtypeMacro(E, C.getASTContext())) 559 return; 560 561 ProgramStateRef State = C.getState(); 562 SValBuilder &SVB = C.getSValBuilder(); 563 564 const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset = 565 computeOffset(State, SVB, Location); 566 567 if (!RawOffset) 568 return; 569 570 auto [Reg, ByteOffset] = *RawOffset; 571 572 // The state updates will be reported as a single note tag, which will be 573 // composed by this helper class. 574 StateUpdateReporter SUR(Reg, ByteOffset, E, C); 575 576 // CHECK LOWER BOUND 577 const MemSpaceRegion *Space = Reg->getMemorySpace(); 578 if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) { 579 // A symbolic region in unknown space represents an unknown pointer that 580 // may point into the middle of an array, so we don't look for underflows. 581 // Both conditions are significant because we want to check underflows in 582 // symbolic regions on the heap (which may be introduced by checkers like 583 // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and 584 // non-symbolic regions (e.g. a field subregion of a symbolic region) in 585 // unknown space. 586 auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold( 587 State, ByteOffset, SVB.makeZeroArrayIndex(), SVB); 588 589 if (PrecedesLowerBound) { 590 // The offset may be invalid (negative)... 591 if (!WithinLowerBound) { 592 // ...and it cannot be valid (>= 0), so report an error. 593 Messages Msgs = getPrecedesMsgs(Reg, ByteOffset); 594 reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt); 595 return; 596 } 597 // ...but it can be valid as well, so the checker will (optimistically) 598 // assume that it's valid and mention this in the note tag. 599 SUR.recordNonNegativeAssumption(); 600 } 601 602 // Actually update the state. The "if" only fails in the extremely unlikely 603 // case when compareValueToThreshold returns {nullptr, nullptr} becasue 604 // evalBinOpNN fails to evaluate the less-than operator. 605 if (WithinLowerBound) 606 State = WithinLowerBound; 607 } 608 609 // CHECK UPPER BOUND 610 DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB); 611 if (auto KnownSize = Size.getAs<NonLoc>()) { 612 // In a situation where both underflow and overflow are possible (but the 613 // index is either tainted or known to be invalid), the logic of this 614 // checker will first assume that the offset is non-negative, and then 615 // (with this additional assumption) it will detect an overflow error. 616 // In this situation the warning message should mention both possibilities. 617 bool AlsoMentionUnderflow = SUR.assumedNonNegative(); 618 619 auto [WithinUpperBound, ExceedsUpperBound] = 620 compareValueToThreshold(State, ByteOffset, *KnownSize, SVB); 621 622 if (ExceedsUpperBound) { 623 // The offset may be invalid (>= Size)... 624 if (!WithinUpperBound) { 625 // ...and it cannot be within bounds, so report an error, unless we can 626 // definitely determine that this is an idiomatic `&array[size]` 627 // expression that calculates the past-the-end pointer. 628 if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset, 629 *KnownSize, C)) { 630 C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C)); 631 return; 632 } 633 634 Messages Msgs = 635 getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize, 636 Location, AlsoMentionUnderflow); 637 reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize); 638 return; 639 } 640 // ...and it can be valid as well... 641 if (isTainted(State, ByteOffset)) { 642 // ...but it's tainted, so report an error. 643 644 // Diagnostic detail: saying "tainted offset" is always correct, but 645 // the common case is that 'idx' is tainted in 'arr[idx]' and then it's 646 // nicer to say "tainted index". 647 const char *OffsetName = "offset"; 648 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) 649 if (isTainted(State, ASE->getIdx(), C.getLocationContext())) 650 OffsetName = "index"; 651 652 Messages Msgs = getTaintMsgs(Reg, OffsetName, AlsoMentionUnderflow); 653 reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize, 654 /*IsTaintBug=*/true); 655 return; 656 } 657 // ...and it isn't tainted, so the checker will (optimistically) assume 658 // that the offset is in bounds and mention this in the note tag. 659 SUR.recordUpperBoundAssumption(*KnownSize); 660 } 661 662 // Actually update the state. The "if" only fails in the extremely unlikely 663 // case when compareValueToThreshold returns {nullptr, nullptr} becasue 664 // evalBinOpNN fails to evaluate the less-than operator. 665 if (WithinUpperBound) 666 State = WithinUpperBound; 667 } 668 669 // Add a transition, reporting the state updates that we accumulated. 670 C.addTransition(State, SUR.createNoteTag(C)); 671 } 672 673 void ArrayBoundCheckerV2::markPartsInteresting(PathSensitiveBugReport &BR, 674 ProgramStateRef ErrorState, 675 NonLoc Val, bool MarkTaint) { 676 if (SymbolRef Sym = Val.getAsSymbol()) { 677 // If the offset is a symbolic value, iterate over its "parts" with 678 // `SymExpr::symbols()` and mark each of them as interesting. 679 // For example, if the offset is `x*4 + y` then we put interestingness onto 680 // the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols 681 // `x` and `y`. 682 for (SymbolRef PartSym : Sym->symbols()) 683 BR.markInteresting(PartSym); 684 } 685 686 if (MarkTaint) { 687 // If the issue that we're reporting depends on the taintedness of the 688 // offset, then put interestingness onto symbols that could be the origin 689 // of the taint. Note that this may find symbols that did not appear in 690 // `Sym->symbols()` (because they're only loosely connected to `Val`). 691 for (SymbolRef Sym : getTaintedSymbols(ErrorState, Val)) 692 BR.markInteresting(Sym); 693 } 694 } 695 696 void ArrayBoundCheckerV2::reportOOB(CheckerContext &C, 697 ProgramStateRef ErrorState, Messages Msgs, 698 NonLoc Offset, std::optional<NonLoc> Extent, 699 bool IsTaintBug /*=false*/) const { 700 701 ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState); 702 if (!ErrorNode) 703 return; 704 705 auto BR = std::make_unique<PathSensitiveBugReport>( 706 IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode); 707 708 // FIXME: ideally we would just call trackExpressionValue() and that would 709 // "do the right thing": mark the relevant symbols as interesting, track the 710 // control dependencies and statements storing the relevant values and add 711 // helpful diagnostic pieces. However, right now trackExpressionValue() is 712 // a heap of unreliable heuristics, so it would cause several issues: 713 // - Interestingness is not applied consistently, e.g. if `array[x+10]` 714 // causes an overflow, then `x` is not marked as interesting. 715 // - We get irrelevant diagnostic pieces, e.g. in the code 716 // `int *p = (int*)malloc(2*sizeof(int)); p[3] = 0;` 717 // it places a "Storing uninitialized value" note on the `malloc` call 718 // (which is technically true, but irrelevant). 719 // If trackExpressionValue() becomes reliable, it should be applied instead 720 // of this custom markPartsInteresting(). 721 markPartsInteresting(*BR, ErrorState, Offset, IsTaintBug); 722 if (Extent) 723 markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug); 724 725 C.emitReport(std::move(BR)); 726 } 727 728 bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) { 729 SourceLocation Loc = S->getBeginLoc(); 730 if (!Loc.isMacroID()) 731 return false; 732 733 StringRef MacroName = Lexer::getImmediateMacroName( 734 Loc, ACtx.getSourceManager(), ACtx.getLangOpts()); 735 736 if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's') 737 return false; 738 739 return ((MacroName == "isalnum") || (MacroName == "isalpha") || 740 (MacroName == "isblank") || (MacroName == "isdigit") || 741 (MacroName == "isgraph") || (MacroName == "islower") || 742 (MacroName == "isnctrl") || (MacroName == "isprint") || 743 (MacroName == "ispunct") || (MacroName == "isspace") || 744 (MacroName == "isupper") || (MacroName == "isxdigit")); 745 } 746 747 bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) { 748 ParentMapContext &ParentCtx = ACtx.getParentMapContext(); 749 do { 750 const DynTypedNodeList Parents = ParentCtx.getParents(*S); 751 if (Parents.empty()) 752 return false; 753 S = Parents[0].get<Stmt>(); 754 } while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S)); 755 const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S); 756 return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf; 757 } 758 759 bool ArrayBoundCheckerV2::isIdiomaticPastTheEndPtr(const Expr *E, 760 ProgramStateRef State, 761 NonLoc Offset, NonLoc Limit, 762 CheckerContext &C) { 763 if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) { 764 auto [EqualsToThreshold, NotEqualToThreshold] = compareValueToThreshold( 765 State, Offset, Limit, C.getSValBuilder(), /*CheckEquality=*/true); 766 return EqualsToThreshold && !NotEqualToThreshold; 767 } 768 return false; 769 } 770 771 void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { 772 mgr.registerChecker<ArrayBoundCheckerV2>(); 773 } 774 775 bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) { 776 return true; 777 } 778