10b57cec5SDimitry Andric //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file defines ArrayBoundCheckerV2, which is a path-sensitive check 100b57cec5SDimitry Andric // which looks for an out-of-bound array element access. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "clang/AST/CharUnits.h" 15*5f757f3fSDimitry Andric #include "clang/AST/ParentMapContext.h" 165ffd83dbSDimitry Andric #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 1781ad6265SDimitry Andric #include "clang/StaticAnalyzer/Checkers/Taint.h" 180b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 190b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/Checker.h" 200b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/CheckerManager.h" 210b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" 220b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 23fe6060f1SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" 240b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" 250b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h" 26*5f757f3fSDimitry Andric #include "llvm/Support/FormatVariadic.h" 270b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 28bdd1243dSDimitry Andric #include <optional> 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric using namespace clang; 310b57cec5SDimitry Andric using namespace ento; 320b57cec5SDimitry Andric using namespace taint; 33*5f757f3fSDimitry Andric using llvm::formatv; 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric namespace { 36*5f757f3fSDimitry Andric enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; 370b57cec5SDimitry Andric 38*5f757f3fSDimitry Andric struct Messages { 39*5f757f3fSDimitry Andric std::string Short, Full; 40*5f757f3fSDimitry Andric }; 410b57cec5SDimitry Andric 42*5f757f3fSDimitry Andric // NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt` 43*5f757f3fSDimitry Andric // instead of `PreStmt` because the current implementation passes the whole 44*5f757f3fSDimitry Andric // expression to `CheckerContext::getSVal()` which only works after the 45*5f757f3fSDimitry Andric // symbolic evaluation of the expression. (To turn them into `PreStmt` 46*5f757f3fSDimitry Andric // callbacks, we'd need to duplicate the logic that evaluates these 47*5f757f3fSDimitry Andric // expressions.) The `MemberExpr` callback would work as `PreStmt` but it's 48*5f757f3fSDimitry Andric // defined as `PostStmt` for the sake of consistency with the other callbacks. 49*5f757f3fSDimitry Andric class ArrayBoundCheckerV2 : public Checker<check::PostStmt<ArraySubscriptExpr>, 50*5f757f3fSDimitry Andric check::PostStmt<UnaryOperator>, 51*5f757f3fSDimitry Andric check::PostStmt<MemberExpr>> { 52*5f757f3fSDimitry Andric BugType BT{this, "Out-of-bound access"}; 53*5f757f3fSDimitry Andric BugType TaintBT{this, "Out-of-bound access", categories::TaintedData}; 54*5f757f3fSDimitry Andric 55*5f757f3fSDimitry Andric void performCheck(const Expr *E, CheckerContext &C) const; 56*5f757f3fSDimitry Andric 57*5f757f3fSDimitry Andric void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind, 58*5f757f3fSDimitry Andric NonLoc Offset, Messages Msgs) const; 5906c3fb27SDimitry Andric 6006c3fb27SDimitry Andric static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); 610b57cec5SDimitry Andric 62*5f757f3fSDimitry Andric static bool isInAddressOf(const Stmt *S, ASTContext &AC); 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric public: 65*5f757f3fSDimitry Andric void checkPostStmt(const ArraySubscriptExpr *E, CheckerContext &C) const { 66*5f757f3fSDimitry Andric performCheck(E, C); 67*5f757f3fSDimitry Andric } 68*5f757f3fSDimitry Andric void checkPostStmt(const UnaryOperator *E, CheckerContext &C) const { 69*5f757f3fSDimitry Andric if (E->getOpcode() == UO_Deref) 70*5f757f3fSDimitry Andric performCheck(E, C); 71*5f757f3fSDimitry Andric } 72*5f757f3fSDimitry Andric void checkPostStmt(const MemberExpr *E, CheckerContext &C) const { 73*5f757f3fSDimitry Andric if (E->isArrow()) 74*5f757f3fSDimitry Andric performCheck(E->getBase(), C); 75*5f757f3fSDimitry Andric } 760b57cec5SDimitry Andric }; 77*5f757f3fSDimitry Andric 78*5f757f3fSDimitry Andric } // anonymous namespace 79*5f757f3fSDimitry Andric 80*5f757f3fSDimitry Andric /// For a given Location that can be represented as a symbolic expression 81*5f757f3fSDimitry Andric /// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block 82*5f757f3fSDimitry Andric /// Arr and the distance of Location from the beginning of Arr (expressed in a 83*5f757f3fSDimitry Andric /// NonLoc that specifies the number of CharUnits). Returns nullopt when these 84*5f757f3fSDimitry Andric /// cannot be determined. 85*5f757f3fSDimitry Andric static std::optional<std::pair<const SubRegion *, NonLoc>> 86*5f757f3fSDimitry Andric computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) { 87*5f757f3fSDimitry Andric QualType T = SVB.getArrayIndexType(); 88*5f757f3fSDimitry Andric auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) { 89*5f757f3fSDimitry Andric // We will use this utility to add and multiply values. 90*5f757f3fSDimitry Andric return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>(); 91*5f757f3fSDimitry Andric }; 92*5f757f3fSDimitry Andric 93*5f757f3fSDimitry Andric const SubRegion *OwnerRegion = nullptr; 94*5f757f3fSDimitry Andric std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex(); 95*5f757f3fSDimitry Andric 96*5f757f3fSDimitry Andric const ElementRegion *CurRegion = 97*5f757f3fSDimitry Andric dyn_cast_or_null<ElementRegion>(Location.getAsRegion()); 98*5f757f3fSDimitry Andric 99*5f757f3fSDimitry Andric while (CurRegion) { 100*5f757f3fSDimitry Andric const auto Index = CurRegion->getIndex().getAs<NonLoc>(); 101*5f757f3fSDimitry Andric if (!Index) 102*5f757f3fSDimitry Andric return std::nullopt; 103*5f757f3fSDimitry Andric 104*5f757f3fSDimitry Andric QualType ElemType = CurRegion->getElementType(); 105*5f757f3fSDimitry Andric 106*5f757f3fSDimitry Andric // FIXME: The following early return was presumably added to safeguard the 107*5f757f3fSDimitry Andric // getTypeSizeInChars() call (which doesn't accept an incomplete type), but 108*5f757f3fSDimitry Andric // it seems that `ElemType` cannot be incomplete at this point. 109*5f757f3fSDimitry Andric if (ElemType->isIncompleteType()) 110*5f757f3fSDimitry Andric return std::nullopt; 111*5f757f3fSDimitry Andric 112*5f757f3fSDimitry Andric // Calculate Delta = Index * sizeof(ElemType). 113*5f757f3fSDimitry Andric NonLoc Size = SVB.makeArrayIndex( 114*5f757f3fSDimitry Andric SVB.getContext().getTypeSizeInChars(ElemType).getQuantity()); 115*5f757f3fSDimitry Andric auto Delta = EvalBinOp(BO_Mul, *Index, Size); 116*5f757f3fSDimitry Andric if (!Delta) 117*5f757f3fSDimitry Andric return std::nullopt; 118*5f757f3fSDimitry Andric 119*5f757f3fSDimitry Andric // Perform Offset += Delta. 120*5f757f3fSDimitry Andric Offset = EvalBinOp(BO_Add, *Offset, *Delta); 121*5f757f3fSDimitry Andric if (!Offset) 122*5f757f3fSDimitry Andric return std::nullopt; 123*5f757f3fSDimitry Andric 124*5f757f3fSDimitry Andric OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>(); 125*5f757f3fSDimitry Andric // When this is just another ElementRegion layer, we need to continue the 126*5f757f3fSDimitry Andric // offset calculations: 127*5f757f3fSDimitry Andric CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion); 128*5f757f3fSDimitry Andric } 129*5f757f3fSDimitry Andric 130*5f757f3fSDimitry Andric if (OwnerRegion) 131*5f757f3fSDimitry Andric return std::make_pair(OwnerRegion, *Offset); 132*5f757f3fSDimitry Andric 133*5f757f3fSDimitry Andric return std::nullopt; 1340b57cec5SDimitry Andric } 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric // TODO: once the constraint manager is smart enough to handle non simplified 1370b57cec5SDimitry Andric // symbolic expressions remove this function. Note that this can not be used in 1380b57cec5SDimitry Andric // the constraint manager as is, since this does not handle overflows. It is 1390b57cec5SDimitry Andric // safe to assume, however, that memory offsets will not overflow. 14006c3fb27SDimitry Andric // NOTE: callers of this function need to be aware of the effects of overflows 14106c3fb27SDimitry Andric // and signed<->unsigned conversions! 1420b57cec5SDimitry Andric static std::pair<NonLoc, nonloc::ConcreteInt> 1430b57cec5SDimitry Andric getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, 1440b57cec5SDimitry Andric SValBuilder &svalBuilder) { 145bdd1243dSDimitry Andric std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>(); 1460b57cec5SDimitry Andric if (SymVal && SymVal->isExpression()) { 1470b57cec5SDimitry Andric if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) { 1480b57cec5SDimitry Andric llvm::APSInt constant = 1490b57cec5SDimitry Andric APSIntType(extent.getValue()).convert(SIE->getRHS()); 1500b57cec5SDimitry Andric switch (SIE->getOpcode()) { 1510b57cec5SDimitry Andric case BO_Mul: 152*5f757f3fSDimitry Andric // The constant should never be 0 here, becasue multiplication by zero 153*5f757f3fSDimitry Andric // is simplified by the engine. 1540b57cec5SDimitry Andric if ((extent.getValue() % constant) != 0) 1550b57cec5SDimitry Andric return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 1560b57cec5SDimitry Andric else 1570b57cec5SDimitry Andric return getSimplifiedOffsets( 1580b57cec5SDimitry Andric nonloc::SymbolVal(SIE->getLHS()), 1590b57cec5SDimitry Andric svalBuilder.makeIntVal(extent.getValue() / constant), 1600b57cec5SDimitry Andric svalBuilder); 1610b57cec5SDimitry Andric case BO_Add: 1620b57cec5SDimitry Andric return getSimplifiedOffsets( 1630b57cec5SDimitry Andric nonloc::SymbolVal(SIE->getLHS()), 1640b57cec5SDimitry Andric svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder); 1650b57cec5SDimitry Andric default: 1660b57cec5SDimitry Andric break; 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric } 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 1720b57cec5SDimitry Andric } 1730b57cec5SDimitry Andric 17406c3fb27SDimitry Andric // Evaluate the comparison Value < Threshold with the help of the custom 17506c3fb27SDimitry Andric // simplification algorithm defined for this checker. Return a pair of states, 17606c3fb27SDimitry Andric // where the first one corresponds to "value below threshold" and the second 17706c3fb27SDimitry Andric // corresponds to "value at or above threshold". Returns {nullptr, nullptr} in 17806c3fb27SDimitry Andric // the case when the evaluation fails. 179*5f757f3fSDimitry Andric // If the optional argument CheckEquality is true, then use BO_EQ instead of 180*5f757f3fSDimitry Andric // the default BO_LT after consistently applying the same simplification steps. 18106c3fb27SDimitry Andric static std::pair<ProgramStateRef, ProgramStateRef> 18206c3fb27SDimitry Andric compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, 183*5f757f3fSDimitry Andric SValBuilder &SVB, bool CheckEquality = false) { 18406c3fb27SDimitry Andric if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { 18506c3fb27SDimitry Andric std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB); 18606c3fb27SDimitry Andric } 18706c3fb27SDimitry Andric if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { 18806c3fb27SDimitry Andric QualType T = Value.getType(SVB.getContext()); 18906c3fb27SDimitry Andric if (T->isUnsignedIntegerType() && ConcreteThreshold->getValue().isNegative()) { 19006c3fb27SDimitry Andric // In this case we reduced the bound check to a comparison of the form 19106c3fb27SDimitry Andric // (symbol or value with unsigned type) < (negative number) 19206c3fb27SDimitry Andric // which is always false. We are handling these cases separately because 19306c3fb27SDimitry Andric // evalBinOpNN can perform a signed->unsigned conversion that turns the 19406c3fb27SDimitry Andric // negative number into a huge positive value and leads to wildly 19506c3fb27SDimitry Andric // inaccurate conclusions. 19606c3fb27SDimitry Andric return {nullptr, State}; 19706c3fb27SDimitry Andric } 19806c3fb27SDimitry Andric } 199*5f757f3fSDimitry Andric const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT; 20006c3fb27SDimitry Andric auto BelowThreshold = 201*5f757f3fSDimitry Andric SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType()) 202*5f757f3fSDimitry Andric .getAs<NonLoc>(); 20306c3fb27SDimitry Andric 20406c3fb27SDimitry Andric if (BelowThreshold) 20506c3fb27SDimitry Andric return State->assume(*BelowThreshold); 20606c3fb27SDimitry Andric 20706c3fb27SDimitry Andric return {nullptr, nullptr}; 20806c3fb27SDimitry Andric } 20906c3fb27SDimitry Andric 210*5f757f3fSDimitry Andric static std::string getRegionName(const SubRegion *Region) { 211*5f757f3fSDimitry Andric if (std::string RegName = Region->getDescriptiveName(); !RegName.empty()) 212*5f757f3fSDimitry Andric return RegName; 2130b57cec5SDimitry Andric 214*5f757f3fSDimitry Andric // Field regions only have descriptive names when their parent has a 215*5f757f3fSDimitry Andric // descriptive name; so we provide a fallback representation for them: 216*5f757f3fSDimitry Andric if (const auto *FR = Region->getAs<FieldRegion>()) { 217*5f757f3fSDimitry Andric if (StringRef Name = FR->getDecl()->getName(); !Name.empty()) 218*5f757f3fSDimitry Andric return formatv("the field '{0}'", Name); 219*5f757f3fSDimitry Andric return "the unnamed field"; 220*5f757f3fSDimitry Andric } 221*5f757f3fSDimitry Andric 222*5f757f3fSDimitry Andric if (isa<AllocaRegion>(Region)) 223*5f757f3fSDimitry Andric return "the memory returned by 'alloca'"; 224*5f757f3fSDimitry Andric 225*5f757f3fSDimitry Andric if (isa<SymbolicRegion>(Region) && 226*5f757f3fSDimitry Andric isa<HeapSpaceRegion>(Region->getMemorySpace())) 227*5f757f3fSDimitry Andric return "the heap area"; 228*5f757f3fSDimitry Andric 229*5f757f3fSDimitry Andric if (isa<StringRegion>(Region)) 230*5f757f3fSDimitry Andric return "the string literal"; 231*5f757f3fSDimitry Andric 232*5f757f3fSDimitry Andric return "the region"; 233*5f757f3fSDimitry Andric } 234*5f757f3fSDimitry Andric 235*5f757f3fSDimitry Andric static std::optional<int64_t> getConcreteValue(NonLoc SV) { 236*5f757f3fSDimitry Andric if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) { 237*5f757f3fSDimitry Andric return ConcreteVal->getValue().tryExtValue(); 238*5f757f3fSDimitry Andric } 239*5f757f3fSDimitry Andric return std::nullopt; 240*5f757f3fSDimitry Andric } 241*5f757f3fSDimitry Andric 242*5f757f3fSDimitry Andric static std::string getShortMsg(OOB_Kind Kind, std::string RegName) { 243*5f757f3fSDimitry Andric static const char *ShortMsgTemplates[] = { 244*5f757f3fSDimitry Andric "Out of bound access to memory preceding {0}", 245*5f757f3fSDimitry Andric "Out of bound access to memory after the end of {0}", 246*5f757f3fSDimitry Andric "Potential out of bound access to {0} with tainted offset"}; 247*5f757f3fSDimitry Andric 248*5f757f3fSDimitry Andric return formatv(ShortMsgTemplates[Kind], RegName); 249*5f757f3fSDimitry Andric } 250*5f757f3fSDimitry Andric 251*5f757f3fSDimitry Andric static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) { 252*5f757f3fSDimitry Andric std::string RegName = getRegionName(Region); 253*5f757f3fSDimitry Andric SmallString<128> Buf; 254*5f757f3fSDimitry Andric llvm::raw_svector_ostream Out(Buf); 255*5f757f3fSDimitry Andric Out << "Access of " << RegName << " at negative byte offset"; 256*5f757f3fSDimitry Andric if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>()) 257*5f757f3fSDimitry Andric Out << ' ' << ConcreteIdx->getValue(); 258*5f757f3fSDimitry Andric return {getShortMsg(OOB_Precedes, RegName), std::string(Buf)}; 259*5f757f3fSDimitry Andric } 260*5f757f3fSDimitry Andric 261*5f757f3fSDimitry Andric static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region, 262*5f757f3fSDimitry Andric NonLoc Offset, NonLoc Extent, SVal Location) { 263*5f757f3fSDimitry Andric std::string RegName = getRegionName(Region); 264*5f757f3fSDimitry Andric const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>(); 265*5f757f3fSDimitry Andric assert(EReg && "this checker only handles element access"); 266*5f757f3fSDimitry Andric QualType ElemType = EReg->getElementType(); 267*5f757f3fSDimitry Andric 268*5f757f3fSDimitry Andric std::optional<int64_t> OffsetN = getConcreteValue(Offset); 269*5f757f3fSDimitry Andric std::optional<int64_t> ExtentN = getConcreteValue(Extent); 270*5f757f3fSDimitry Andric 271*5f757f3fSDimitry Andric bool UseByteOffsets = true; 272*5f757f3fSDimitry Andric if (int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity()) { 273*5f757f3fSDimitry Andric const bool OffsetHasRemainder = OffsetN && *OffsetN % ElemSize; 274*5f757f3fSDimitry Andric const bool ExtentHasRemainder = ExtentN && *ExtentN % ElemSize; 275*5f757f3fSDimitry Andric if (!OffsetHasRemainder && !ExtentHasRemainder) { 276*5f757f3fSDimitry Andric UseByteOffsets = false; 277*5f757f3fSDimitry Andric if (OffsetN) 278*5f757f3fSDimitry Andric *OffsetN /= ElemSize; 279*5f757f3fSDimitry Andric if (ExtentN) 280*5f757f3fSDimitry Andric *ExtentN /= ElemSize; 281*5f757f3fSDimitry Andric } 282*5f757f3fSDimitry Andric } 283*5f757f3fSDimitry Andric 284*5f757f3fSDimitry Andric SmallString<256> Buf; 285*5f757f3fSDimitry Andric llvm::raw_svector_ostream Out(Buf); 286*5f757f3fSDimitry Andric Out << "Access of "; 287*5f757f3fSDimitry Andric if (!ExtentN && !UseByteOffsets) 288*5f757f3fSDimitry Andric Out << "'" << ElemType.getAsString() << "' element in "; 289*5f757f3fSDimitry Andric Out << RegName << " at "; 290*5f757f3fSDimitry Andric if (OffsetN) { 291*5f757f3fSDimitry Andric Out << (UseByteOffsets ? "byte offset " : "index ") << *OffsetN; 292*5f757f3fSDimitry Andric } else { 293*5f757f3fSDimitry Andric Out << "an overflowing " << (UseByteOffsets ? "byte offset" : "index"); 294*5f757f3fSDimitry Andric } 295*5f757f3fSDimitry Andric if (ExtentN) { 296*5f757f3fSDimitry Andric Out << ", while it holds only "; 297*5f757f3fSDimitry Andric if (*ExtentN != 1) 298*5f757f3fSDimitry Andric Out << *ExtentN; 299*5f757f3fSDimitry Andric else 300*5f757f3fSDimitry Andric Out << "a single"; 301*5f757f3fSDimitry Andric if (UseByteOffsets) 302*5f757f3fSDimitry Andric Out << " byte"; 303*5f757f3fSDimitry Andric else 304*5f757f3fSDimitry Andric Out << " '" << ElemType.getAsString() << "' element"; 305*5f757f3fSDimitry Andric 306*5f757f3fSDimitry Andric if (*ExtentN > 1) 307*5f757f3fSDimitry Andric Out << "s"; 308*5f757f3fSDimitry Andric } 309*5f757f3fSDimitry Andric 310*5f757f3fSDimitry Andric return {getShortMsg(OOB_Exceeds, RegName), std::string(Buf)}; 311*5f757f3fSDimitry Andric } 312*5f757f3fSDimitry Andric 313*5f757f3fSDimitry Andric static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) { 314*5f757f3fSDimitry Andric std::string RegName = getRegionName(Region); 315*5f757f3fSDimitry Andric return {formatv("Potential out of bound access to {0} with tainted {1}", 316*5f757f3fSDimitry Andric RegName, OffsetName), 317*5f757f3fSDimitry Andric formatv("Access of {0} with a tainted {1} that may be too large", 318*5f757f3fSDimitry Andric RegName, OffsetName)}; 319*5f757f3fSDimitry Andric } 320*5f757f3fSDimitry Andric 321*5f757f3fSDimitry Andric void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { 3220b57cec5SDimitry Andric // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping 3230b57cec5SDimitry Andric // some new logic here that reasons directly about memory region extents. 3240b57cec5SDimitry Andric // Once that logic is more mature, we can bring it back to assumeInBound() 3250b57cec5SDimitry Andric // for all clients to use. 3260b57cec5SDimitry Andric // 3270b57cec5SDimitry Andric // The algorithm we are using here for bounds checking is to see if the 3280b57cec5SDimitry Andric // memory access is within the extent of the base region. Since we 3290b57cec5SDimitry Andric // have some flexibility in defining the base region, we can achieve 3300b57cec5SDimitry Andric // various levels of conservatism in our buffer overflow checking. 33106c3fb27SDimitry Andric 332*5f757f3fSDimitry Andric const SVal Location = C.getSVal(E); 333*5f757f3fSDimitry Andric 33406c3fb27SDimitry Andric // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as 33506c3fb27SDimitry Andric // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX) 33606c3fb27SDimitry Andric // and incomplete analysis of these leads to false positives. As even 33706c3fb27SDimitry Andric // accurate reports would be confusing for the users, just disable reports 33806c3fb27SDimitry Andric // from these macros: 339*5f757f3fSDimitry Andric if (isFromCtypeMacro(E, C.getASTContext())) 34006c3fb27SDimitry Andric return; 34106c3fb27SDimitry Andric 342*5f757f3fSDimitry Andric ProgramStateRef State = C.getState(); 343*5f757f3fSDimitry Andric SValBuilder &SVB = C.getSValBuilder(); 3440b57cec5SDimitry Andric 345*5f757f3fSDimitry Andric const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset = 346*5f757f3fSDimitry Andric computeOffset(State, SVB, Location); 3470b57cec5SDimitry Andric 34806c3fb27SDimitry Andric if (!RawOffset) 3490b57cec5SDimitry Andric return; 3500b57cec5SDimitry Andric 351*5f757f3fSDimitry Andric auto [Reg, ByteOffset] = *RawOffset; 3520b57cec5SDimitry Andric 35306c3fb27SDimitry Andric // CHECK LOWER BOUND 354*5f757f3fSDimitry Andric const MemSpaceRegion *Space = Reg->getMemorySpace(); 355*5f757f3fSDimitry Andric if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) { 356*5f757f3fSDimitry Andric // A symbolic region in unknown space represents an unknown pointer that 357*5f757f3fSDimitry Andric // may point into the middle of an array, so we don't look for underflows. 358*5f757f3fSDimitry Andric // Both conditions are significant because we want to check underflows in 359*5f757f3fSDimitry Andric // symbolic regions on the heap (which may be introduced by checkers like 360*5f757f3fSDimitry Andric // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and 361*5f757f3fSDimitry Andric // non-symbolic regions (e.g. a field subregion of a symbolic region) in 362*5f757f3fSDimitry Andric // unknown space. 363*5f757f3fSDimitry Andric auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold( 364*5f757f3fSDimitry Andric State, ByteOffset, SVB.makeZeroArrayIndex(), SVB); 3650b57cec5SDimitry Andric 366*5f757f3fSDimitry Andric if (PrecedesLowerBound && !WithinLowerBound) { 36706c3fb27SDimitry Andric // We know that the index definitely precedes the lower bound. 368*5f757f3fSDimitry Andric Messages Msgs = getPrecedesMsgs(Reg, ByteOffset); 369*5f757f3fSDimitry Andric reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, Msgs); 3700b57cec5SDimitry Andric return; 3710b57cec5SDimitry Andric } 3720b57cec5SDimitry Andric 373*5f757f3fSDimitry Andric if (WithinLowerBound) 374*5f757f3fSDimitry Andric State = WithinLowerBound; 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 37706c3fb27SDimitry Andric // CHECK UPPER BOUND 378*5f757f3fSDimitry Andric DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB); 37906c3fb27SDimitry Andric if (auto KnownSize = Size.getAs<NonLoc>()) { 380*5f757f3fSDimitry Andric auto [WithinUpperBound, ExceedsUpperBound] = 381*5f757f3fSDimitry Andric compareValueToThreshold(State, ByteOffset, *KnownSize, SVB); 3820b57cec5SDimitry Andric 383*5f757f3fSDimitry Andric if (ExceedsUpperBound) { 384*5f757f3fSDimitry Andric if (!WithinUpperBound) { 38506c3fb27SDimitry Andric // We know that the index definitely exceeds the upper bound. 386*5f757f3fSDimitry Andric if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) { 387*5f757f3fSDimitry Andric // ...but this is within an addressof expression, so we need to check 388*5f757f3fSDimitry Andric // for the exceptional case that `&array[size]` is valid. 389*5f757f3fSDimitry Andric auto [EqualsToThreshold, NotEqualToThreshold] = 390*5f757f3fSDimitry Andric compareValueToThreshold(ExceedsUpperBound, ByteOffset, *KnownSize, 391*5f757f3fSDimitry Andric SVB, /*CheckEquality=*/true); 392*5f757f3fSDimitry Andric if (EqualsToThreshold && !NotEqualToThreshold) { 393*5f757f3fSDimitry Andric // We are definitely in the exceptional case, so return early 394*5f757f3fSDimitry Andric // instead of reporting a bug. 395*5f757f3fSDimitry Andric C.addTransition(EqualsToThreshold); 3960b57cec5SDimitry Andric return; 3970b57cec5SDimitry Andric } 398*5f757f3fSDimitry Andric } 399*5f757f3fSDimitry Andric Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, 400*5f757f3fSDimitry Andric *KnownSize, Location); 401*5f757f3fSDimitry Andric reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs); 402*5f757f3fSDimitry Andric return; 403*5f757f3fSDimitry Andric } 404*5f757f3fSDimitry Andric if (isTainted(State, ByteOffset)) { 405*5f757f3fSDimitry Andric // Both cases are possible, but the offset is tainted, so report. 406*5f757f3fSDimitry Andric std::string RegName = getRegionName(Reg); 407*5f757f3fSDimitry Andric 408*5f757f3fSDimitry Andric // Diagnostic detail: "tainted offset" is always correct, but the 409*5f757f3fSDimitry Andric // common case is that 'idx' is tainted in 'arr[idx]' and then it's 410*5f757f3fSDimitry Andric // nicer to say "tainted index". 411*5f757f3fSDimitry Andric const char *OffsetName = "offset"; 412*5f757f3fSDimitry Andric if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) 413*5f757f3fSDimitry Andric if (isTainted(State, ASE->getIdx(), C.getLocationContext())) 414*5f757f3fSDimitry Andric OffsetName = "index"; 415*5f757f3fSDimitry Andric 416*5f757f3fSDimitry Andric Messages Msgs = getTaintMsgs(Reg, OffsetName); 417*5f757f3fSDimitry Andric reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs); 41806c3fb27SDimitry Andric return; 41906c3fb27SDimitry Andric } 42006c3fb27SDimitry Andric } 4210b57cec5SDimitry Andric 422*5f757f3fSDimitry Andric if (WithinUpperBound) 423*5f757f3fSDimitry Andric State = WithinUpperBound; 4240b57cec5SDimitry Andric } 4250b57cec5SDimitry Andric 426*5f757f3fSDimitry Andric C.addTransition(State); 4270b57cec5SDimitry Andric } 4280b57cec5SDimitry Andric 429*5f757f3fSDimitry Andric void ArrayBoundCheckerV2::reportOOB(CheckerContext &C, 430*5f757f3fSDimitry Andric ProgramStateRef ErrorState, OOB_Kind Kind, 431*5f757f3fSDimitry Andric NonLoc Offset, Messages Msgs) const { 432*5f757f3fSDimitry Andric 433*5f757f3fSDimitry Andric ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState); 434*5f757f3fSDimitry Andric if (!ErrorNode) 43506c3fb27SDimitry Andric return; 43606c3fb27SDimitry Andric 437*5f757f3fSDimitry Andric auto BR = std::make_unique<PathSensitiveBugReport>( 438*5f757f3fSDimitry Andric Kind == OOB_Taint ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode); 43906c3fb27SDimitry Andric 44006c3fb27SDimitry Andric // Track back the propagation of taintedness. 441*5f757f3fSDimitry Andric if (Kind == OOB_Taint) 442*5f757f3fSDimitry Andric for (SymbolRef Sym : getTaintedSymbols(ErrorState, Offset)) 44306c3fb27SDimitry Andric BR->markInteresting(Sym); 44406c3fb27SDimitry Andric 445*5f757f3fSDimitry Andric C.emitReport(std::move(BR)); 4460b57cec5SDimitry Andric } 4470b57cec5SDimitry Andric 44806c3fb27SDimitry Andric bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) { 44906c3fb27SDimitry Andric SourceLocation Loc = S->getBeginLoc(); 45006c3fb27SDimitry Andric if (!Loc.isMacroID()) 45106c3fb27SDimitry Andric return false; 45206c3fb27SDimitry Andric 45306c3fb27SDimitry Andric StringRef MacroName = Lexer::getImmediateMacroName( 45406c3fb27SDimitry Andric Loc, ACtx.getSourceManager(), ACtx.getLangOpts()); 45506c3fb27SDimitry Andric 45606c3fb27SDimitry Andric if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's') 45706c3fb27SDimitry Andric return false; 45806c3fb27SDimitry Andric 45906c3fb27SDimitry Andric return ((MacroName == "isalnum") || (MacroName == "isalpha") || 46006c3fb27SDimitry Andric (MacroName == "isblank") || (MacroName == "isdigit") || 46106c3fb27SDimitry Andric (MacroName == "isgraph") || (MacroName == "islower") || 46206c3fb27SDimitry Andric (MacroName == "isnctrl") || (MacroName == "isprint") || 46306c3fb27SDimitry Andric (MacroName == "ispunct") || (MacroName == "isspace") || 46406c3fb27SDimitry Andric (MacroName == "isupper") || (MacroName == "isxdigit")); 4650b57cec5SDimitry Andric } 4660b57cec5SDimitry Andric 467*5f757f3fSDimitry Andric bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) { 468*5f757f3fSDimitry Andric ParentMapContext &ParentCtx = ACtx.getParentMapContext(); 469*5f757f3fSDimitry Andric do { 470*5f757f3fSDimitry Andric const DynTypedNodeList Parents = ParentCtx.getParents(*S); 471*5f757f3fSDimitry Andric if (Parents.empty()) 472*5f757f3fSDimitry Andric return false; 473*5f757f3fSDimitry Andric S = Parents[0].get<Stmt>(); 474*5f757f3fSDimitry Andric } while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S)); 475*5f757f3fSDimitry Andric const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S); 476*5f757f3fSDimitry Andric return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf; 4770b57cec5SDimitry Andric } 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { 4800b57cec5SDimitry Andric mgr.registerChecker<ArrayBoundCheckerV2>(); 4810b57cec5SDimitry Andric } 4820b57cec5SDimitry Andric 4835ffd83dbSDimitry Andric bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) { 4840b57cec5SDimitry Andric return true; 4850b57cec5SDimitry Andric } 486