10b57cec5SDimitry Andric //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines ArrayBoundCheckerV2, which is a path-sensitive check
100b57cec5SDimitry Andric // which looks for an out-of-bound array element access.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric
140b57cec5SDimitry Andric #include "clang/AST/CharUnits.h"
155f757f3fSDimitry Andric #include "clang/AST/ParentMapContext.h"
165ffd83dbSDimitry Andric #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
1781ad6265SDimitry Andric #include "clang/StaticAnalyzer/Checkers/Taint.h"
180b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
190b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/Checker.h"
200b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/CheckerManager.h"
210b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
220b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
23fe6060f1SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
240b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
250b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h"
265f757f3fSDimitry Andric #include "llvm/Support/FormatVariadic.h"
270b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
28bdd1243dSDimitry Andric #include <optional>
290b57cec5SDimitry Andric
300b57cec5SDimitry Andric using namespace clang;
310b57cec5SDimitry Andric using namespace ento;
320b57cec5SDimitry Andric using namespace taint;
335f757f3fSDimitry Andric using llvm::formatv;
340b57cec5SDimitry Andric
350b57cec5SDimitry Andric namespace {
36*0fca6ea1SDimitry Andric /// If `E` is a "clean" array subscript expression, return the type of the
37*0fca6ea1SDimitry Andric /// accessed element. If the base of the subscript expression is modified by
38*0fca6ea1SDimitry Andric /// pointer arithmetic (and not the beginning of a "full" memory region), this
39*0fca6ea1SDimitry Andric /// always returns nullopt because that's the right (or the least bad) thing to
40*0fca6ea1SDimitry Andric /// do for the diagnostic output that's relying on this.
determineElementType(const Expr * E,const CheckerContext & C)41*0fca6ea1SDimitry Andric static std::optional<QualType> determineElementType(const Expr *E,
42*0fca6ea1SDimitry Andric const CheckerContext &C) {
43*0fca6ea1SDimitry Andric const auto *ASE = dyn_cast<ArraySubscriptExpr>(E);
44*0fca6ea1SDimitry Andric if (!ASE)
45*0fca6ea1SDimitry Andric return std::nullopt;
46*0fca6ea1SDimitry Andric
47*0fca6ea1SDimitry Andric const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion();
48*0fca6ea1SDimitry Andric if (!SubscriptBaseReg)
49*0fca6ea1SDimitry Andric return std::nullopt;
50*0fca6ea1SDimitry Andric
51*0fca6ea1SDimitry Andric // The base of the subscript expression is affected by pointer arithmetics,
52*0fca6ea1SDimitry Andric // so we want to report byte offsets instead of indices.
53*0fca6ea1SDimitry Andric if (isa<ElementRegion>(SubscriptBaseReg->StripCasts()))
54*0fca6ea1SDimitry Andric return std::nullopt;
55*0fca6ea1SDimitry Andric
56*0fca6ea1SDimitry Andric return ASE->getType();
57*0fca6ea1SDimitry Andric }
58*0fca6ea1SDimitry Andric
59*0fca6ea1SDimitry Andric static std::optional<int64_t>
determineElementSize(const std::optional<QualType> T,const CheckerContext & C)60*0fca6ea1SDimitry Andric determineElementSize(const std::optional<QualType> T, const CheckerContext &C) {
61*0fca6ea1SDimitry Andric if (!T)
62*0fca6ea1SDimitry Andric return std::nullopt;
63*0fca6ea1SDimitry Andric return C.getASTContext().getTypeSizeInChars(*T).getQuantity();
64*0fca6ea1SDimitry Andric }
65*0fca6ea1SDimitry Andric
66*0fca6ea1SDimitry Andric class StateUpdateReporter {
67*0fca6ea1SDimitry Andric const SubRegion *Reg;
68*0fca6ea1SDimitry Andric const NonLoc ByteOffsetVal;
69*0fca6ea1SDimitry Andric const std::optional<QualType> ElementType;
70*0fca6ea1SDimitry Andric const std::optional<int64_t> ElementSize;
71*0fca6ea1SDimitry Andric bool AssumedNonNegative = false;
72*0fca6ea1SDimitry Andric std::optional<NonLoc> AssumedUpperBound = std::nullopt;
73*0fca6ea1SDimitry Andric
74*0fca6ea1SDimitry Andric public:
StateUpdateReporter(const SubRegion * R,NonLoc ByteOffsVal,const Expr * E,CheckerContext & C)75*0fca6ea1SDimitry Andric StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,
76*0fca6ea1SDimitry Andric CheckerContext &C)
77*0fca6ea1SDimitry Andric : Reg(R), ByteOffsetVal(ByteOffsVal),
78*0fca6ea1SDimitry Andric ElementType(determineElementType(E, C)),
79*0fca6ea1SDimitry Andric ElementSize(determineElementSize(ElementType, C)) {}
80*0fca6ea1SDimitry Andric
recordNonNegativeAssumption()81*0fca6ea1SDimitry Andric void recordNonNegativeAssumption() { AssumedNonNegative = true; }
recordUpperBoundAssumption(NonLoc UpperBoundVal)82*0fca6ea1SDimitry Andric void recordUpperBoundAssumption(NonLoc UpperBoundVal) {
83*0fca6ea1SDimitry Andric AssumedUpperBound = UpperBoundVal;
84*0fca6ea1SDimitry Andric }
85*0fca6ea1SDimitry Andric
assumedNonNegative()86*0fca6ea1SDimitry Andric bool assumedNonNegative() { return AssumedNonNegative; }
87*0fca6ea1SDimitry Andric
88*0fca6ea1SDimitry Andric const NoteTag *createNoteTag(CheckerContext &C) const;
89*0fca6ea1SDimitry Andric
90*0fca6ea1SDimitry Andric private:
91*0fca6ea1SDimitry Andric std::string getMessage(PathSensitiveBugReport &BR) const;
92*0fca6ea1SDimitry Andric
93*0fca6ea1SDimitry Andric /// Return true if information about the value of `Sym` can put constraints
94*0fca6ea1SDimitry Andric /// on some symbol which is interesting within the bug report `BR`.
95*0fca6ea1SDimitry Andric /// In particular, this returns true when `Sym` is interesting within `BR`;
96*0fca6ea1SDimitry Andric /// but it also returns true if `Sym` is an expression that contains integer
97*0fca6ea1SDimitry Andric /// constants and a single symbolic operand which is interesting (in `BR`).
98*0fca6ea1SDimitry Andric /// We need to use this instead of plain `BR.isInteresting()` because if we
99*0fca6ea1SDimitry Andric /// are analyzing code like
100*0fca6ea1SDimitry Andric /// int array[10];
101*0fca6ea1SDimitry Andric /// int f(int arg) {
102*0fca6ea1SDimitry Andric /// return array[arg] && array[arg + 10];
103*0fca6ea1SDimitry Andric /// }
104*0fca6ea1SDimitry Andric /// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not
105*0fca6ea1SDimitry Andric /// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough
106*0fca6ea1SDimitry Andric /// to detect this out of bounds access).
107*0fca6ea1SDimitry Andric static bool providesInformationAboutInteresting(SymbolRef Sym,
108*0fca6ea1SDimitry Andric PathSensitiveBugReport &BR);
providesInformationAboutInteresting(SVal SV,PathSensitiveBugReport & BR)109*0fca6ea1SDimitry Andric static bool providesInformationAboutInteresting(SVal SV,
110*0fca6ea1SDimitry Andric PathSensitiveBugReport &BR) {
111*0fca6ea1SDimitry Andric return providesInformationAboutInteresting(SV.getAsSymbol(), BR);
112*0fca6ea1SDimitry Andric }
113*0fca6ea1SDimitry Andric };
1140b57cec5SDimitry Andric
1155f757f3fSDimitry Andric struct Messages {
1165f757f3fSDimitry Andric std::string Short, Full;
1175f757f3fSDimitry Andric };
1180b57cec5SDimitry Andric
1195f757f3fSDimitry Andric // NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
1205f757f3fSDimitry Andric // instead of `PreStmt` because the current implementation passes the whole
1215f757f3fSDimitry Andric // expression to `CheckerContext::getSVal()` which only works after the
1225f757f3fSDimitry Andric // symbolic evaluation of the expression. (To turn them into `PreStmt`
1235f757f3fSDimitry Andric // callbacks, we'd need to duplicate the logic that evaluates these
1245f757f3fSDimitry Andric // expressions.) The `MemberExpr` callback would work as `PreStmt` but it's
1255f757f3fSDimitry Andric // defined as `PostStmt` for the sake of consistency with the other callbacks.
1265f757f3fSDimitry Andric class ArrayBoundCheckerV2 : public Checker<check::PostStmt<ArraySubscriptExpr>,
1275f757f3fSDimitry Andric check::PostStmt<UnaryOperator>,
1285f757f3fSDimitry Andric check::PostStmt<MemberExpr>> {
1295f757f3fSDimitry Andric BugType BT{this, "Out-of-bound access"};
1305f757f3fSDimitry Andric BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
1315f757f3fSDimitry Andric
1325f757f3fSDimitry Andric void performCheck(const Expr *E, CheckerContext &C) const;
1335f757f3fSDimitry Andric
134*0fca6ea1SDimitry Andric void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs,
135*0fca6ea1SDimitry Andric NonLoc Offset, std::optional<NonLoc> Extent,
136*0fca6ea1SDimitry Andric bool IsTaintBug = false) const;
137*0fca6ea1SDimitry Andric
138*0fca6ea1SDimitry Andric static void markPartsInteresting(PathSensitiveBugReport &BR,
139*0fca6ea1SDimitry Andric ProgramStateRef ErrorState, NonLoc Val,
140*0fca6ea1SDimitry Andric bool MarkTaint);
14106c3fb27SDimitry Andric
14206c3fb27SDimitry Andric static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
1430b57cec5SDimitry Andric
144*0fca6ea1SDimitry Andric static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State,
145*0fca6ea1SDimitry Andric NonLoc Offset, NonLoc Limit,
146*0fca6ea1SDimitry Andric CheckerContext &C);
1475f757f3fSDimitry Andric static bool isInAddressOf(const Stmt *S, ASTContext &AC);
1480b57cec5SDimitry Andric
1490b57cec5SDimitry Andric public:
checkPostStmt(const ArraySubscriptExpr * E,CheckerContext & C) const1505f757f3fSDimitry Andric void checkPostStmt(const ArraySubscriptExpr *E, CheckerContext &C) const {
1515f757f3fSDimitry Andric performCheck(E, C);
1525f757f3fSDimitry Andric }
checkPostStmt(const UnaryOperator * E,CheckerContext & C) const1535f757f3fSDimitry Andric void checkPostStmt(const UnaryOperator *E, CheckerContext &C) const {
1545f757f3fSDimitry Andric if (E->getOpcode() == UO_Deref)
1555f757f3fSDimitry Andric performCheck(E, C);
1565f757f3fSDimitry Andric }
checkPostStmt(const MemberExpr * E,CheckerContext & C) const1575f757f3fSDimitry Andric void checkPostStmt(const MemberExpr *E, CheckerContext &C) const {
1585f757f3fSDimitry Andric if (E->isArrow())
1595f757f3fSDimitry Andric performCheck(E->getBase(), C);
1605f757f3fSDimitry Andric }
1610b57cec5SDimitry Andric };
1625f757f3fSDimitry Andric
1635f757f3fSDimitry Andric } // anonymous namespace
1645f757f3fSDimitry Andric
1655f757f3fSDimitry Andric /// For a given Location that can be represented as a symbolic expression
1665f757f3fSDimitry Andric /// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
1675f757f3fSDimitry Andric /// Arr and the distance of Location from the beginning of Arr (expressed in a
1685f757f3fSDimitry Andric /// NonLoc that specifies the number of CharUnits). Returns nullopt when these
1695f757f3fSDimitry Andric /// cannot be determined.
1705f757f3fSDimitry Andric static std::optional<std::pair<const SubRegion *, NonLoc>>
computeOffset(ProgramStateRef State,SValBuilder & SVB,SVal Location)1715f757f3fSDimitry Andric computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
1725f757f3fSDimitry Andric QualType T = SVB.getArrayIndexType();
1735f757f3fSDimitry Andric auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
1745f757f3fSDimitry Andric // We will use this utility to add and multiply values.
1755f757f3fSDimitry Andric return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>();
1765f757f3fSDimitry Andric };
1775f757f3fSDimitry Andric
1785f757f3fSDimitry Andric const SubRegion *OwnerRegion = nullptr;
1795f757f3fSDimitry Andric std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex();
1805f757f3fSDimitry Andric
1815f757f3fSDimitry Andric const ElementRegion *CurRegion =
1825f757f3fSDimitry Andric dyn_cast_or_null<ElementRegion>(Location.getAsRegion());
1835f757f3fSDimitry Andric
1845f757f3fSDimitry Andric while (CurRegion) {
1855f757f3fSDimitry Andric const auto Index = CurRegion->getIndex().getAs<NonLoc>();
1865f757f3fSDimitry Andric if (!Index)
1875f757f3fSDimitry Andric return std::nullopt;
1885f757f3fSDimitry Andric
1895f757f3fSDimitry Andric QualType ElemType = CurRegion->getElementType();
1905f757f3fSDimitry Andric
1915f757f3fSDimitry Andric // FIXME: The following early return was presumably added to safeguard the
1925f757f3fSDimitry Andric // getTypeSizeInChars() call (which doesn't accept an incomplete type), but
1935f757f3fSDimitry Andric // it seems that `ElemType` cannot be incomplete at this point.
1945f757f3fSDimitry Andric if (ElemType->isIncompleteType())
1955f757f3fSDimitry Andric return std::nullopt;
1965f757f3fSDimitry Andric
1975f757f3fSDimitry Andric // Calculate Delta = Index * sizeof(ElemType).
1985f757f3fSDimitry Andric NonLoc Size = SVB.makeArrayIndex(
1995f757f3fSDimitry Andric SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
2005f757f3fSDimitry Andric auto Delta = EvalBinOp(BO_Mul, *Index, Size);
2015f757f3fSDimitry Andric if (!Delta)
2025f757f3fSDimitry Andric return std::nullopt;
2035f757f3fSDimitry Andric
2045f757f3fSDimitry Andric // Perform Offset += Delta.
2055f757f3fSDimitry Andric Offset = EvalBinOp(BO_Add, *Offset, *Delta);
2065f757f3fSDimitry Andric if (!Offset)
2075f757f3fSDimitry Andric return std::nullopt;
2085f757f3fSDimitry Andric
2095f757f3fSDimitry Andric OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>();
2105f757f3fSDimitry Andric // When this is just another ElementRegion layer, we need to continue the
2115f757f3fSDimitry Andric // offset calculations:
2125f757f3fSDimitry Andric CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);
2135f757f3fSDimitry Andric }
2145f757f3fSDimitry Andric
2155f757f3fSDimitry Andric if (OwnerRegion)
2165f757f3fSDimitry Andric return std::make_pair(OwnerRegion, *Offset);
2175f757f3fSDimitry Andric
2185f757f3fSDimitry Andric return std::nullopt;
2190b57cec5SDimitry Andric }
2200b57cec5SDimitry Andric
221*0fca6ea1SDimitry Andric // NOTE: This function is the "heart" of this checker. It simplifies
222*0fca6ea1SDimitry Andric // inequalities with transformations that are valid (and very elementary) in
223*0fca6ea1SDimitry Andric // pure mathematics, but become invalid if we use them in C++ number model
224*0fca6ea1SDimitry Andric // where the calculations may overflow.
225*0fca6ea1SDimitry Andric // Due to the overflow issues I think it's impossible (or at least not
226*0fca6ea1SDimitry Andric // practical) to integrate this kind of simplification into the resolution of
227*0fca6ea1SDimitry Andric // arbitrary inequalities (i.e. the code of `evalBinOp`); but this function
228*0fca6ea1SDimitry Andric // produces valid results when the calculations are handling memory offsets
229*0fca6ea1SDimitry Andric // and every value is well below SIZE_MAX.
230*0fca6ea1SDimitry Andric // TODO: This algorithm should be moved to a central location where it's
231*0fca6ea1SDimitry Andric // available for other checkers that need to compare memory offsets.
232*0fca6ea1SDimitry Andric // NOTE: the simplification preserves the order of the two operands in a
233*0fca6ea1SDimitry Andric // mathematical sense, but it may change the result produced by a C++
234*0fca6ea1SDimitry Andric // comparison operator (and the automatic type conversions).
235*0fca6ea1SDimitry Andric // For example, consider a comparison "X+1 < 0", where the LHS is stored as a
236*0fca6ea1SDimitry Andric // size_t and the RHS is stored in an int. (As size_t is unsigned, this
237*0fca6ea1SDimitry Andric // comparison is false for all values of "X".) However, the simplification may
238*0fca6ea1SDimitry Andric // turn it into "X < -1", which is still always false in a mathematical sense,
239*0fca6ea1SDimitry Andric // but can produce a true result when evaluated by `evalBinOp` (which follows
240*0fca6ea1SDimitry Andric // the rules of C++ and casts -1 to SIZE_MAX).
2410b57cec5SDimitry Andric static std::pair<NonLoc, nonloc::ConcreteInt>
getSimplifiedOffsets(NonLoc offset,nonloc::ConcreteInt extent,SValBuilder & svalBuilder)2420b57cec5SDimitry Andric getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
2430b57cec5SDimitry Andric SValBuilder &svalBuilder) {
244bdd1243dSDimitry Andric std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
2450b57cec5SDimitry Andric if (SymVal && SymVal->isExpression()) {
2460b57cec5SDimitry Andric if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
2470b57cec5SDimitry Andric llvm::APSInt constant =
2480b57cec5SDimitry Andric APSIntType(extent.getValue()).convert(SIE->getRHS());
2490b57cec5SDimitry Andric switch (SIE->getOpcode()) {
2500b57cec5SDimitry Andric case BO_Mul:
2515f757f3fSDimitry Andric // The constant should never be 0 here, becasue multiplication by zero
2525f757f3fSDimitry Andric // is simplified by the engine.
2530b57cec5SDimitry Andric if ((extent.getValue() % constant) != 0)
2540b57cec5SDimitry Andric return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
2550b57cec5SDimitry Andric else
2560b57cec5SDimitry Andric return getSimplifiedOffsets(
2570b57cec5SDimitry Andric nonloc::SymbolVal(SIE->getLHS()),
2580b57cec5SDimitry Andric svalBuilder.makeIntVal(extent.getValue() / constant),
2590b57cec5SDimitry Andric svalBuilder);
2600b57cec5SDimitry Andric case BO_Add:
2610b57cec5SDimitry Andric return getSimplifiedOffsets(
2620b57cec5SDimitry Andric nonloc::SymbolVal(SIE->getLHS()),
2630b57cec5SDimitry Andric svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder);
2640b57cec5SDimitry Andric default:
2650b57cec5SDimitry Andric break;
2660b57cec5SDimitry Andric }
2670b57cec5SDimitry Andric }
2680b57cec5SDimitry Andric }
2690b57cec5SDimitry Andric
2700b57cec5SDimitry Andric return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
2710b57cec5SDimitry Andric }
2720b57cec5SDimitry Andric
isNegative(SValBuilder & SVB,ProgramStateRef State,NonLoc Value)273*0fca6ea1SDimitry Andric static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value) {
274*0fca6ea1SDimitry Andric const llvm::APSInt *MaxV = SVB.getMaxValue(State, Value);
275*0fca6ea1SDimitry Andric return MaxV && MaxV->isNegative();
276*0fca6ea1SDimitry Andric }
277*0fca6ea1SDimitry Andric
isUnsigned(SValBuilder & SVB,NonLoc Value)278*0fca6ea1SDimitry Andric static bool isUnsigned(SValBuilder &SVB, NonLoc Value) {
279*0fca6ea1SDimitry Andric QualType T = Value.getType(SVB.getContext());
280*0fca6ea1SDimitry Andric return T->isUnsignedIntegerType();
281*0fca6ea1SDimitry Andric }
282*0fca6ea1SDimitry Andric
28306c3fb27SDimitry Andric // Evaluate the comparison Value < Threshold with the help of the custom
28406c3fb27SDimitry Andric // simplification algorithm defined for this checker. Return a pair of states,
28506c3fb27SDimitry Andric // where the first one corresponds to "value below threshold" and the second
28606c3fb27SDimitry Andric // corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
28706c3fb27SDimitry Andric // the case when the evaluation fails.
2885f757f3fSDimitry Andric // If the optional argument CheckEquality is true, then use BO_EQ instead of
2895f757f3fSDimitry Andric // the default BO_LT after consistently applying the same simplification steps.
29006c3fb27SDimitry Andric static std::pair<ProgramStateRef, ProgramStateRef>
compareValueToThreshold(ProgramStateRef State,NonLoc Value,NonLoc Threshold,SValBuilder & SVB,bool CheckEquality=false)29106c3fb27SDimitry Andric compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
2925f757f3fSDimitry Andric SValBuilder &SVB, bool CheckEquality = false) {
29306c3fb27SDimitry Andric if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
29406c3fb27SDimitry Andric std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);
29506c3fb27SDimitry Andric }
296*0fca6ea1SDimitry Andric
297*0fca6ea1SDimitry Andric // We want to perform a _mathematical_ comparison between the numbers `Value`
298*0fca6ea1SDimitry Andric // and `Threshold`; but `evalBinOpNN` evaluates a C/C++ operator that may
299*0fca6ea1SDimitry Andric // perform automatic conversions. For example the number -1 is less than the
300*0fca6ea1SDimitry Andric // number 1000, but -1 < `1000ull` will evaluate to `false` because the `int`
301*0fca6ea1SDimitry Andric // -1 is converted to ULONGLONG_MAX.
302*0fca6ea1SDimitry Andric // To avoid automatic conversions, we evaluate the "obvious" cases without
303*0fca6ea1SDimitry Andric // calling `evalBinOpNN`:
304*0fca6ea1SDimitry Andric if (isNegative(SVB, State, Value) && isUnsigned(SVB, Threshold)) {
305*0fca6ea1SDimitry Andric if (CheckEquality) {
306*0fca6ea1SDimitry Andric // negative_value == unsigned_threshold is always false
30706c3fb27SDimitry Andric return {nullptr, State};
30806c3fb27SDimitry Andric }
309*0fca6ea1SDimitry Andric // negative_value < unsigned_threshold is always true
310*0fca6ea1SDimitry Andric return {State, nullptr};
31106c3fb27SDimitry Andric }
312*0fca6ea1SDimitry Andric if (isUnsigned(SVB, Value) && isNegative(SVB, State, Threshold)) {
313*0fca6ea1SDimitry Andric // unsigned_value == negative_threshold and
314*0fca6ea1SDimitry Andric // unsigned_value < negative_threshold are both always false
315*0fca6ea1SDimitry Andric return {nullptr, State};
316*0fca6ea1SDimitry Andric }
317*0fca6ea1SDimitry Andric // FIXME: These special cases are sufficient for handling real-world
318*0fca6ea1SDimitry Andric // comparisons, but in theory there could be contrived situations where
319*0fca6ea1SDimitry Andric // automatic conversion of a symbolic value (which can be negative and can be
320*0fca6ea1SDimitry Andric // positive) leads to incorrect results.
321*0fca6ea1SDimitry Andric // NOTE: We NEED to use the `evalBinOpNN` call in the "common" case, because
322*0fca6ea1SDimitry Andric // we want to ensure that assumptions coming from this precondition and
323*0fca6ea1SDimitry Andric // assumptions coming from regular C/C++ operator calls are represented by
324*0fca6ea1SDimitry Andric // constraints on the same symbolic expression. A solution that would
325*0fca6ea1SDimitry Andric // evaluate these "mathematical" compariosns through a separate pathway would
326*0fca6ea1SDimitry Andric // be a step backwards in this sense.
327*0fca6ea1SDimitry Andric
3285f757f3fSDimitry Andric const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT;
32906c3fb27SDimitry Andric auto BelowThreshold =
3305f757f3fSDimitry Andric SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType())
3315f757f3fSDimitry Andric .getAs<NonLoc>();
33206c3fb27SDimitry Andric
33306c3fb27SDimitry Andric if (BelowThreshold)
33406c3fb27SDimitry Andric return State->assume(*BelowThreshold);
33506c3fb27SDimitry Andric
33606c3fb27SDimitry Andric return {nullptr, nullptr};
33706c3fb27SDimitry Andric }
33806c3fb27SDimitry Andric
getRegionName(const SubRegion * Region)3395f757f3fSDimitry Andric static std::string getRegionName(const SubRegion *Region) {
3405f757f3fSDimitry Andric if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
3415f757f3fSDimitry Andric return RegName;
3420b57cec5SDimitry Andric
3435f757f3fSDimitry Andric // Field regions only have descriptive names when their parent has a
3445f757f3fSDimitry Andric // descriptive name; so we provide a fallback representation for them:
3455f757f3fSDimitry Andric if (const auto *FR = Region->getAs<FieldRegion>()) {
3465f757f3fSDimitry Andric if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
3475f757f3fSDimitry Andric return formatv("the field '{0}'", Name);
3485f757f3fSDimitry Andric return "the unnamed field";
3495f757f3fSDimitry Andric }
3505f757f3fSDimitry Andric
3515f757f3fSDimitry Andric if (isa<AllocaRegion>(Region))
3525f757f3fSDimitry Andric return "the memory returned by 'alloca'";
3535f757f3fSDimitry Andric
3545f757f3fSDimitry Andric if (isa<SymbolicRegion>(Region) &&
3555f757f3fSDimitry Andric isa<HeapSpaceRegion>(Region->getMemorySpace()))
3565f757f3fSDimitry Andric return "the heap area";
3575f757f3fSDimitry Andric
3585f757f3fSDimitry Andric if (isa<StringRegion>(Region))
3595f757f3fSDimitry Andric return "the string literal";
3605f757f3fSDimitry Andric
3615f757f3fSDimitry Andric return "the region";
3625f757f3fSDimitry Andric }
3635f757f3fSDimitry Andric
getConcreteValue(NonLoc SV)3645f757f3fSDimitry Andric static std::optional<int64_t> getConcreteValue(NonLoc SV) {
3655f757f3fSDimitry Andric if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) {
3665f757f3fSDimitry Andric return ConcreteVal->getValue().tryExtValue();
3675f757f3fSDimitry Andric }
3685f757f3fSDimitry Andric return std::nullopt;
3695f757f3fSDimitry Andric }
3705f757f3fSDimitry Andric
getConcreteValue(std::optional<NonLoc> SV)371*0fca6ea1SDimitry Andric static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
372*0fca6ea1SDimitry Andric return SV ? getConcreteValue(*SV) : std::nullopt;
3735f757f3fSDimitry Andric }
3745f757f3fSDimitry Andric
getPrecedesMsgs(const SubRegion * Region,NonLoc Offset)3755f757f3fSDimitry Andric static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
376*0fca6ea1SDimitry Andric std::string RegName = getRegionName(Region), OffsetStr = "";
377*0fca6ea1SDimitry Andric
378*0fca6ea1SDimitry Andric if (auto ConcreteOffset = getConcreteValue(Offset))
379*0fca6ea1SDimitry Andric OffsetStr = formatv(" {0}", ConcreteOffset);
380*0fca6ea1SDimitry Andric
381*0fca6ea1SDimitry Andric return {
382*0fca6ea1SDimitry Andric formatv("Out of bound access to memory preceding {0}", RegName),
383*0fca6ea1SDimitry Andric formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
384*0fca6ea1SDimitry Andric }
385*0fca6ea1SDimitry Andric
386*0fca6ea1SDimitry Andric /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
387*0fca6ea1SDimitry Andric /// it can be performed (`Divisor` is nonzero and there is no remainder). The
388*0fca6ea1SDimitry Andric /// values `Val1` and `Val2` may be nullopt and in that case the corresponding
389*0fca6ea1SDimitry Andric /// division is considered to be successful.
tryDividePair(std::optional<int64_t> & Val1,std::optional<int64_t> & Val2,int64_t Divisor)390*0fca6ea1SDimitry Andric static bool tryDividePair(std::optional<int64_t> &Val1,
391*0fca6ea1SDimitry Andric std::optional<int64_t> &Val2, int64_t Divisor) {
392*0fca6ea1SDimitry Andric if (!Divisor)
393*0fca6ea1SDimitry Andric return false;
394*0fca6ea1SDimitry Andric const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
395*0fca6ea1SDimitry Andric const bool Val2HasRemainder = Val2 && *Val2 % Divisor;
396*0fca6ea1SDimitry Andric if (!Val1HasRemainder && !Val2HasRemainder) {
397*0fca6ea1SDimitry Andric if (Val1)
398*0fca6ea1SDimitry Andric *Val1 /= Divisor;
399*0fca6ea1SDimitry Andric if (Val2)
400*0fca6ea1SDimitry Andric *Val2 /= Divisor;
401*0fca6ea1SDimitry Andric return true;
402*0fca6ea1SDimitry Andric }
403*0fca6ea1SDimitry Andric return false;
4045f757f3fSDimitry Andric }
4055f757f3fSDimitry Andric
getExceedsMsgs(ASTContext & ACtx,const SubRegion * Region,NonLoc Offset,NonLoc Extent,SVal Location,bool AlsoMentionUnderflow)4065f757f3fSDimitry Andric static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
407*0fca6ea1SDimitry Andric NonLoc Offset, NonLoc Extent, SVal Location,
408*0fca6ea1SDimitry Andric bool AlsoMentionUnderflow) {
4095f757f3fSDimitry Andric std::string RegName = getRegionName(Region);
4105f757f3fSDimitry Andric const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
4115f757f3fSDimitry Andric assert(EReg && "this checker only handles element access");
4125f757f3fSDimitry Andric QualType ElemType = EReg->getElementType();
4135f757f3fSDimitry Andric
4145f757f3fSDimitry Andric std::optional<int64_t> OffsetN = getConcreteValue(Offset);
4155f757f3fSDimitry Andric std::optional<int64_t> ExtentN = getConcreteValue(Extent);
4165f757f3fSDimitry Andric
417*0fca6ea1SDimitry Andric int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity();
418*0fca6ea1SDimitry Andric
419*0fca6ea1SDimitry Andric bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize);
420*0fca6ea1SDimitry Andric const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";
4215f757f3fSDimitry Andric
4225f757f3fSDimitry Andric SmallString<256> Buf;
4235f757f3fSDimitry Andric llvm::raw_svector_ostream Out(Buf);
4245f757f3fSDimitry Andric Out << "Access of ";
4255f757f3fSDimitry Andric if (!ExtentN && !UseByteOffsets)
4265f757f3fSDimitry Andric Out << "'" << ElemType.getAsString() << "' element in ";
4275f757f3fSDimitry Andric Out << RegName << " at ";
428*0fca6ea1SDimitry Andric if (AlsoMentionUnderflow) {
429*0fca6ea1SDimitry Andric Out << "a negative or overflowing " << OffsetOrIndex;
430*0fca6ea1SDimitry Andric } else if (OffsetN) {
431*0fca6ea1SDimitry Andric Out << OffsetOrIndex << " " << *OffsetN;
4325f757f3fSDimitry Andric } else {
433*0fca6ea1SDimitry Andric Out << "an overflowing " << OffsetOrIndex;
4345f757f3fSDimitry Andric }
4355f757f3fSDimitry Andric if (ExtentN) {
4365f757f3fSDimitry Andric Out << ", while it holds only ";
4375f757f3fSDimitry Andric if (*ExtentN != 1)
4385f757f3fSDimitry Andric Out << *ExtentN;
4395f757f3fSDimitry Andric else
4405f757f3fSDimitry Andric Out << "a single";
4415f757f3fSDimitry Andric if (UseByteOffsets)
4425f757f3fSDimitry Andric Out << " byte";
4435f757f3fSDimitry Andric else
4445f757f3fSDimitry Andric Out << " '" << ElemType.getAsString() << "' element";
4455f757f3fSDimitry Andric
4465f757f3fSDimitry Andric if (*ExtentN > 1)
4475f757f3fSDimitry Andric Out << "s";
4485f757f3fSDimitry Andric }
4495f757f3fSDimitry Andric
450*0fca6ea1SDimitry Andric return {formatv("Out of bound access to memory {0} {1}",
451*0fca6ea1SDimitry Andric AlsoMentionUnderflow ? "around" : "after the end of",
452*0fca6ea1SDimitry Andric RegName),
453*0fca6ea1SDimitry Andric std::string(Buf)};
4545f757f3fSDimitry Andric }
4555f757f3fSDimitry Andric
getTaintMsgs(const SubRegion * Region,const char * OffsetName,bool AlsoMentionUnderflow)456*0fca6ea1SDimitry Andric static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName,
457*0fca6ea1SDimitry Andric bool AlsoMentionUnderflow) {
4585f757f3fSDimitry Andric std::string RegName = getRegionName(Region);
4595f757f3fSDimitry Andric return {formatv("Potential out of bound access to {0} with tainted {1}",
4605f757f3fSDimitry Andric RegName, OffsetName),
461*0fca6ea1SDimitry Andric formatv("Access of {0} with a tainted {1} that may be {2}too large",
462*0fca6ea1SDimitry Andric RegName, OffsetName,
463*0fca6ea1SDimitry Andric AlsoMentionUnderflow ? "negative or " : "")};
464*0fca6ea1SDimitry Andric }
465*0fca6ea1SDimitry Andric
createNoteTag(CheckerContext & C) const466*0fca6ea1SDimitry Andric const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const {
467*0fca6ea1SDimitry Andric // Don't create a note tag if we didn't assume anything:
468*0fca6ea1SDimitry Andric if (!AssumedNonNegative && !AssumedUpperBound)
469*0fca6ea1SDimitry Andric return nullptr;
470*0fca6ea1SDimitry Andric
471*0fca6ea1SDimitry Andric return C.getNoteTag([*this](PathSensitiveBugReport &BR) -> std::string {
472*0fca6ea1SDimitry Andric return getMessage(BR);
473*0fca6ea1SDimitry Andric });
474*0fca6ea1SDimitry Andric }
475*0fca6ea1SDimitry Andric
getMessage(PathSensitiveBugReport & BR) const476*0fca6ea1SDimitry Andric std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
477*0fca6ea1SDimitry Andric bool ShouldReportNonNegative = AssumedNonNegative;
478*0fca6ea1SDimitry Andric if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
479*0fca6ea1SDimitry Andric if (AssumedUpperBound &&
480*0fca6ea1SDimitry Andric providesInformationAboutInteresting(*AssumedUpperBound, BR)) {
481*0fca6ea1SDimitry Andric // Even if the byte offset isn't interesting (e.g. it's a constant value),
482*0fca6ea1SDimitry Andric // the assumption can still be interesting if it provides information
483*0fca6ea1SDimitry Andric // about an interesting symbolic upper bound.
484*0fca6ea1SDimitry Andric ShouldReportNonNegative = false;
485*0fca6ea1SDimitry Andric } else {
486*0fca6ea1SDimitry Andric // We don't have anything interesting, don't report the assumption.
487*0fca6ea1SDimitry Andric return "";
488*0fca6ea1SDimitry Andric }
489*0fca6ea1SDimitry Andric }
490*0fca6ea1SDimitry Andric
491*0fca6ea1SDimitry Andric std::optional<int64_t> OffsetN = getConcreteValue(ByteOffsetVal);
492*0fca6ea1SDimitry Andric std::optional<int64_t> ExtentN = getConcreteValue(AssumedUpperBound);
493*0fca6ea1SDimitry Andric
494*0fca6ea1SDimitry Andric const bool UseIndex =
495*0fca6ea1SDimitry Andric ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize);
496*0fca6ea1SDimitry Andric
497*0fca6ea1SDimitry Andric SmallString<256> Buf;
498*0fca6ea1SDimitry Andric llvm::raw_svector_ostream Out(Buf);
499*0fca6ea1SDimitry Andric Out << "Assuming ";
500*0fca6ea1SDimitry Andric if (UseIndex) {
501*0fca6ea1SDimitry Andric Out << "index ";
502*0fca6ea1SDimitry Andric if (OffsetN)
503*0fca6ea1SDimitry Andric Out << "'" << OffsetN << "' ";
504*0fca6ea1SDimitry Andric } else if (AssumedUpperBound) {
505*0fca6ea1SDimitry Andric Out << "byte offset ";
506*0fca6ea1SDimitry Andric if (OffsetN)
507*0fca6ea1SDimitry Andric Out << "'" << OffsetN << "' ";
508*0fca6ea1SDimitry Andric } else {
509*0fca6ea1SDimitry Andric Out << "offset ";
510*0fca6ea1SDimitry Andric }
511*0fca6ea1SDimitry Andric
512*0fca6ea1SDimitry Andric Out << "is";
513*0fca6ea1SDimitry Andric if (ShouldReportNonNegative) {
514*0fca6ea1SDimitry Andric Out << " non-negative";
515*0fca6ea1SDimitry Andric }
516*0fca6ea1SDimitry Andric if (AssumedUpperBound) {
517*0fca6ea1SDimitry Andric if (ShouldReportNonNegative)
518*0fca6ea1SDimitry Andric Out << " and";
519*0fca6ea1SDimitry Andric Out << " less than ";
520*0fca6ea1SDimitry Andric if (ExtentN)
521*0fca6ea1SDimitry Andric Out << *ExtentN << ", ";
522*0fca6ea1SDimitry Andric if (UseIndex && ElementType)
523*0fca6ea1SDimitry Andric Out << "the number of '" << ElementType->getAsString()
524*0fca6ea1SDimitry Andric << "' elements in ";
525*0fca6ea1SDimitry Andric else
526*0fca6ea1SDimitry Andric Out << "the extent of ";
527*0fca6ea1SDimitry Andric Out << getRegionName(Reg);
528*0fca6ea1SDimitry Andric }
529*0fca6ea1SDimitry Andric return std::string(Out.str());
530*0fca6ea1SDimitry Andric }
531*0fca6ea1SDimitry Andric
providesInformationAboutInteresting(SymbolRef Sym,PathSensitiveBugReport & BR)532*0fca6ea1SDimitry Andric bool StateUpdateReporter::providesInformationAboutInteresting(
533*0fca6ea1SDimitry Andric SymbolRef Sym, PathSensitiveBugReport &BR) {
534*0fca6ea1SDimitry Andric if (!Sym)
535*0fca6ea1SDimitry Andric return false;
536*0fca6ea1SDimitry Andric for (SymbolRef PartSym : Sym->symbols()) {
537*0fca6ea1SDimitry Andric // The interestingess mark may appear on any layer as we're stripping off
538*0fca6ea1SDimitry Andric // the SymIntExpr, UnarySymExpr etc. layers...
539*0fca6ea1SDimitry Andric if (BR.isInteresting(PartSym))
540*0fca6ea1SDimitry Andric return true;
541*0fca6ea1SDimitry Andric // ...but if both sides of the expression are symbolic, then there is no
542*0fca6ea1SDimitry Andric // practical algorithm to produce separate constraints for the two
543*0fca6ea1SDimitry Andric // operands (from the single combined result).
544*0fca6ea1SDimitry Andric if (isa<SymSymExpr>(PartSym))
545*0fca6ea1SDimitry Andric return false;
546*0fca6ea1SDimitry Andric }
547*0fca6ea1SDimitry Andric return false;
5485f757f3fSDimitry Andric }
5495f757f3fSDimitry Andric
performCheck(const Expr * E,CheckerContext & C) const5505f757f3fSDimitry Andric void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
5515f757f3fSDimitry Andric const SVal Location = C.getSVal(E);
5525f757f3fSDimitry Andric
55306c3fb27SDimitry Andric // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
55406c3fb27SDimitry Andric // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)
55506c3fb27SDimitry Andric // and incomplete analysis of these leads to false positives. As even
55606c3fb27SDimitry Andric // accurate reports would be confusing for the users, just disable reports
55706c3fb27SDimitry Andric // from these macros:
5585f757f3fSDimitry Andric if (isFromCtypeMacro(E, C.getASTContext()))
55906c3fb27SDimitry Andric return;
56006c3fb27SDimitry Andric
5615f757f3fSDimitry Andric ProgramStateRef State = C.getState();
5625f757f3fSDimitry Andric SValBuilder &SVB = C.getSValBuilder();
5630b57cec5SDimitry Andric
5645f757f3fSDimitry Andric const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
5655f757f3fSDimitry Andric computeOffset(State, SVB, Location);
5660b57cec5SDimitry Andric
56706c3fb27SDimitry Andric if (!RawOffset)
5680b57cec5SDimitry Andric return;
5690b57cec5SDimitry Andric
5705f757f3fSDimitry Andric auto [Reg, ByteOffset] = *RawOffset;
5710b57cec5SDimitry Andric
572*0fca6ea1SDimitry Andric // The state updates will be reported as a single note tag, which will be
573*0fca6ea1SDimitry Andric // composed by this helper class.
574*0fca6ea1SDimitry Andric StateUpdateReporter SUR(Reg, ByteOffset, E, C);
575*0fca6ea1SDimitry Andric
57606c3fb27SDimitry Andric // CHECK LOWER BOUND
5775f757f3fSDimitry Andric const MemSpaceRegion *Space = Reg->getMemorySpace();
5785f757f3fSDimitry Andric if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
5795f757f3fSDimitry Andric // A symbolic region in unknown space represents an unknown pointer that
5805f757f3fSDimitry Andric // may point into the middle of an array, so we don't look for underflows.
5815f757f3fSDimitry Andric // Both conditions are significant because we want to check underflows in
5825f757f3fSDimitry Andric // symbolic regions on the heap (which may be introduced by checkers like
5835f757f3fSDimitry Andric // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
5845f757f3fSDimitry Andric // non-symbolic regions (e.g. a field subregion of a symbolic region) in
5855f757f3fSDimitry Andric // unknown space.
5865f757f3fSDimitry Andric auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold(
5875f757f3fSDimitry Andric State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);
5880b57cec5SDimitry Andric
589*0fca6ea1SDimitry Andric if (PrecedesLowerBound) {
590*0fca6ea1SDimitry Andric // The offset may be invalid (negative)...
591*0fca6ea1SDimitry Andric if (!WithinLowerBound) {
592*0fca6ea1SDimitry Andric // ...and it cannot be valid (>= 0), so report an error.
5935f757f3fSDimitry Andric Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
594*0fca6ea1SDimitry Andric reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
5950b57cec5SDimitry Andric return;
5960b57cec5SDimitry Andric }
597*0fca6ea1SDimitry Andric // ...but it can be valid as well, so the checker will (optimistically)
598*0fca6ea1SDimitry Andric // assume that it's valid and mention this in the note tag.
599*0fca6ea1SDimitry Andric SUR.recordNonNegativeAssumption();
600*0fca6ea1SDimitry Andric }
6010b57cec5SDimitry Andric
602*0fca6ea1SDimitry Andric // Actually update the state. The "if" only fails in the extremely unlikely
603*0fca6ea1SDimitry Andric // case when compareValueToThreshold returns {nullptr, nullptr} becasue
604*0fca6ea1SDimitry Andric // evalBinOpNN fails to evaluate the less-than operator.
6055f757f3fSDimitry Andric if (WithinLowerBound)
6065f757f3fSDimitry Andric State = WithinLowerBound;
6070b57cec5SDimitry Andric }
6080b57cec5SDimitry Andric
60906c3fb27SDimitry Andric // CHECK UPPER BOUND
6105f757f3fSDimitry Andric DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);
61106c3fb27SDimitry Andric if (auto KnownSize = Size.getAs<NonLoc>()) {
612*0fca6ea1SDimitry Andric // In a situation where both underflow and overflow are possible (but the
613*0fca6ea1SDimitry Andric // index is either tainted or known to be invalid), the logic of this
614*0fca6ea1SDimitry Andric // checker will first assume that the offset is non-negative, and then
615*0fca6ea1SDimitry Andric // (with this additional assumption) it will detect an overflow error.
616*0fca6ea1SDimitry Andric // In this situation the warning message should mention both possibilities.
617*0fca6ea1SDimitry Andric bool AlsoMentionUnderflow = SUR.assumedNonNegative();
618*0fca6ea1SDimitry Andric
6195f757f3fSDimitry Andric auto [WithinUpperBound, ExceedsUpperBound] =
6205f757f3fSDimitry Andric compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);
6210b57cec5SDimitry Andric
6225f757f3fSDimitry Andric if (ExceedsUpperBound) {
623*0fca6ea1SDimitry Andric // The offset may be invalid (>= Size)...
6245f757f3fSDimitry Andric if (!WithinUpperBound) {
625*0fca6ea1SDimitry Andric // ...and it cannot be within bounds, so report an error, unless we can
626*0fca6ea1SDimitry Andric // definitely determine that this is an idiomatic `&array[size]`
627*0fca6ea1SDimitry Andric // expression that calculates the past-the-end pointer.
628*0fca6ea1SDimitry Andric if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,
629*0fca6ea1SDimitry Andric *KnownSize, C)) {
630*0fca6ea1SDimitry Andric C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C));
6310b57cec5SDimitry Andric return;
6320b57cec5SDimitry Andric }
6335f757f3fSDimitry Andric
634*0fca6ea1SDimitry Andric Messages Msgs =
635*0fca6ea1SDimitry Andric getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize,
636*0fca6ea1SDimitry Andric Location, AlsoMentionUnderflow);
637*0fca6ea1SDimitry Andric reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
638*0fca6ea1SDimitry Andric return;
639*0fca6ea1SDimitry Andric }
640*0fca6ea1SDimitry Andric // ...and it can be valid as well...
641*0fca6ea1SDimitry Andric if (isTainted(State, ByteOffset)) {
642*0fca6ea1SDimitry Andric // ...but it's tainted, so report an error.
643*0fca6ea1SDimitry Andric
644*0fca6ea1SDimitry Andric // Diagnostic detail: saying "tainted offset" is always correct, but
645*0fca6ea1SDimitry Andric // the common case is that 'idx' is tainted in 'arr[idx]' and then it's
6465f757f3fSDimitry Andric // nicer to say "tainted index".
6475f757f3fSDimitry Andric const char *OffsetName = "offset";
6485f757f3fSDimitry Andric if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
6495f757f3fSDimitry Andric if (isTainted(State, ASE->getIdx(), C.getLocationContext()))
6505f757f3fSDimitry Andric OffsetName = "index";
6515f757f3fSDimitry Andric
652*0fca6ea1SDimitry Andric Messages Msgs = getTaintMsgs(Reg, OffsetName, AlsoMentionUnderflow);
653*0fca6ea1SDimitry Andric reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
654*0fca6ea1SDimitry Andric /*IsTaintBug=*/true);
65506c3fb27SDimitry Andric return;
65606c3fb27SDimitry Andric }
657*0fca6ea1SDimitry Andric // ...and it isn't tainted, so the checker will (optimistically) assume
658*0fca6ea1SDimitry Andric // that the offset is in bounds and mention this in the note tag.
659*0fca6ea1SDimitry Andric SUR.recordUpperBoundAssumption(*KnownSize);
66006c3fb27SDimitry Andric }
6610b57cec5SDimitry Andric
662*0fca6ea1SDimitry Andric // Actually update the state. The "if" only fails in the extremely unlikely
663*0fca6ea1SDimitry Andric // case when compareValueToThreshold returns {nullptr, nullptr} becasue
664*0fca6ea1SDimitry Andric // evalBinOpNN fails to evaluate the less-than operator.
6655f757f3fSDimitry Andric if (WithinUpperBound)
6665f757f3fSDimitry Andric State = WithinUpperBound;
6670b57cec5SDimitry Andric }
6680b57cec5SDimitry Andric
669*0fca6ea1SDimitry Andric // Add a transition, reporting the state updates that we accumulated.
670*0fca6ea1SDimitry Andric C.addTransition(State, SUR.createNoteTag(C));
671*0fca6ea1SDimitry Andric }
672*0fca6ea1SDimitry Andric
markPartsInteresting(PathSensitiveBugReport & BR,ProgramStateRef ErrorState,NonLoc Val,bool MarkTaint)673*0fca6ea1SDimitry Andric void ArrayBoundCheckerV2::markPartsInteresting(PathSensitiveBugReport &BR,
674*0fca6ea1SDimitry Andric ProgramStateRef ErrorState,
675*0fca6ea1SDimitry Andric NonLoc Val, bool MarkTaint) {
676*0fca6ea1SDimitry Andric if (SymbolRef Sym = Val.getAsSymbol()) {
677*0fca6ea1SDimitry Andric // If the offset is a symbolic value, iterate over its "parts" with
678*0fca6ea1SDimitry Andric // `SymExpr::symbols()` and mark each of them as interesting.
679*0fca6ea1SDimitry Andric // For example, if the offset is `x*4 + y` then we put interestingness onto
680*0fca6ea1SDimitry Andric // the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols
681*0fca6ea1SDimitry Andric // `x` and `y`.
682*0fca6ea1SDimitry Andric for (SymbolRef PartSym : Sym->symbols())
683*0fca6ea1SDimitry Andric BR.markInteresting(PartSym);
684*0fca6ea1SDimitry Andric }
685*0fca6ea1SDimitry Andric
686*0fca6ea1SDimitry Andric if (MarkTaint) {
687*0fca6ea1SDimitry Andric // If the issue that we're reporting depends on the taintedness of the
688*0fca6ea1SDimitry Andric // offset, then put interestingness onto symbols that could be the origin
689*0fca6ea1SDimitry Andric // of the taint. Note that this may find symbols that did not appear in
690*0fca6ea1SDimitry Andric // `Sym->symbols()` (because they're only loosely connected to `Val`).
691*0fca6ea1SDimitry Andric for (SymbolRef Sym : getTaintedSymbols(ErrorState, Val))
692*0fca6ea1SDimitry Andric BR.markInteresting(Sym);
693*0fca6ea1SDimitry Andric }
6940b57cec5SDimitry Andric }
6950b57cec5SDimitry Andric
reportOOB(CheckerContext & C,ProgramStateRef ErrorState,Messages Msgs,NonLoc Offset,std::optional<NonLoc> Extent,bool IsTaintBug) const6965f757f3fSDimitry Andric void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
697*0fca6ea1SDimitry Andric ProgramStateRef ErrorState, Messages Msgs,
698*0fca6ea1SDimitry Andric NonLoc Offset, std::optional<NonLoc> Extent,
699*0fca6ea1SDimitry Andric bool IsTaintBug /*=false*/) const {
7005f757f3fSDimitry Andric
7015f757f3fSDimitry Andric ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
7025f757f3fSDimitry Andric if (!ErrorNode)
70306c3fb27SDimitry Andric return;
70406c3fb27SDimitry Andric
7055f757f3fSDimitry Andric auto BR = std::make_unique<PathSensitiveBugReport>(
706*0fca6ea1SDimitry Andric IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);
70706c3fb27SDimitry Andric
708*0fca6ea1SDimitry Andric // FIXME: ideally we would just call trackExpressionValue() and that would
709*0fca6ea1SDimitry Andric // "do the right thing": mark the relevant symbols as interesting, track the
710*0fca6ea1SDimitry Andric // control dependencies and statements storing the relevant values and add
711*0fca6ea1SDimitry Andric // helpful diagnostic pieces. However, right now trackExpressionValue() is
712*0fca6ea1SDimitry Andric // a heap of unreliable heuristics, so it would cause several issues:
713*0fca6ea1SDimitry Andric // - Interestingness is not applied consistently, e.g. if `array[x+10]`
714*0fca6ea1SDimitry Andric // causes an overflow, then `x` is not marked as interesting.
715*0fca6ea1SDimitry Andric // - We get irrelevant diagnostic pieces, e.g. in the code
716*0fca6ea1SDimitry Andric // `int *p = (int*)malloc(2*sizeof(int)); p[3] = 0;`
717*0fca6ea1SDimitry Andric // it places a "Storing uninitialized value" note on the `malloc` call
718*0fca6ea1SDimitry Andric // (which is technically true, but irrelevant).
719*0fca6ea1SDimitry Andric // If trackExpressionValue() becomes reliable, it should be applied instead
720*0fca6ea1SDimitry Andric // of this custom markPartsInteresting().
721*0fca6ea1SDimitry Andric markPartsInteresting(*BR, ErrorState, Offset, IsTaintBug);
722*0fca6ea1SDimitry Andric if (Extent)
723*0fca6ea1SDimitry Andric markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug);
72406c3fb27SDimitry Andric
7255f757f3fSDimitry Andric C.emitReport(std::move(BR));
7260b57cec5SDimitry Andric }
7270b57cec5SDimitry Andric
isFromCtypeMacro(const Stmt * S,ASTContext & ACtx)72806c3fb27SDimitry Andric bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
72906c3fb27SDimitry Andric SourceLocation Loc = S->getBeginLoc();
73006c3fb27SDimitry Andric if (!Loc.isMacroID())
73106c3fb27SDimitry Andric return false;
73206c3fb27SDimitry Andric
73306c3fb27SDimitry Andric StringRef MacroName = Lexer::getImmediateMacroName(
73406c3fb27SDimitry Andric Loc, ACtx.getSourceManager(), ACtx.getLangOpts());
73506c3fb27SDimitry Andric
73606c3fb27SDimitry Andric if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's')
73706c3fb27SDimitry Andric return false;
73806c3fb27SDimitry Andric
73906c3fb27SDimitry Andric return ((MacroName == "isalnum") || (MacroName == "isalpha") ||
74006c3fb27SDimitry Andric (MacroName == "isblank") || (MacroName == "isdigit") ||
74106c3fb27SDimitry Andric (MacroName == "isgraph") || (MacroName == "islower") ||
74206c3fb27SDimitry Andric (MacroName == "isnctrl") || (MacroName == "isprint") ||
74306c3fb27SDimitry Andric (MacroName == "ispunct") || (MacroName == "isspace") ||
74406c3fb27SDimitry Andric (MacroName == "isupper") || (MacroName == "isxdigit"));
7450b57cec5SDimitry Andric }
7460b57cec5SDimitry Andric
isInAddressOf(const Stmt * S,ASTContext & ACtx)7475f757f3fSDimitry Andric bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) {
7485f757f3fSDimitry Andric ParentMapContext &ParentCtx = ACtx.getParentMapContext();
7495f757f3fSDimitry Andric do {
7505f757f3fSDimitry Andric const DynTypedNodeList Parents = ParentCtx.getParents(*S);
7515f757f3fSDimitry Andric if (Parents.empty())
7525f757f3fSDimitry Andric return false;
7535f757f3fSDimitry Andric S = Parents[0].get<Stmt>();
7545f757f3fSDimitry Andric } while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S));
7555f757f3fSDimitry Andric const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S);
7565f757f3fSDimitry Andric return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;
7570b57cec5SDimitry Andric }
7580b57cec5SDimitry Andric
isIdiomaticPastTheEndPtr(const Expr * E,ProgramStateRef State,NonLoc Offset,NonLoc Limit,CheckerContext & C)759*0fca6ea1SDimitry Andric bool ArrayBoundCheckerV2::isIdiomaticPastTheEndPtr(const Expr *E,
760*0fca6ea1SDimitry Andric ProgramStateRef State,
761*0fca6ea1SDimitry Andric NonLoc Offset, NonLoc Limit,
762*0fca6ea1SDimitry Andric CheckerContext &C) {
763*0fca6ea1SDimitry Andric if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) {
764*0fca6ea1SDimitry Andric auto [EqualsToThreshold, NotEqualToThreshold] = compareValueToThreshold(
765*0fca6ea1SDimitry Andric State, Offset, Limit, C.getSValBuilder(), /*CheckEquality=*/true);
766*0fca6ea1SDimitry Andric return EqualsToThreshold && !NotEqualToThreshold;
767*0fca6ea1SDimitry Andric }
768*0fca6ea1SDimitry Andric return false;
769*0fca6ea1SDimitry Andric }
770*0fca6ea1SDimitry Andric
registerArrayBoundCheckerV2(CheckerManager & mgr)7710b57cec5SDimitry Andric void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
7720b57cec5SDimitry Andric mgr.registerChecker<ArrayBoundCheckerV2>();
7730b57cec5SDimitry Andric }
7740b57cec5SDimitry Andric
shouldRegisterArrayBoundCheckerV2(const CheckerManager & mgr)7755ffd83dbSDimitry Andric bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {
7760b57cec5SDimitry Andric return true;
7770b57cec5SDimitry Andric }
778