xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines ArrayBoundCheckerV2, which is a path-sensitive check
100b57cec5SDimitry Andric // which looks for an out-of-bound array element access.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "clang/AST/CharUnits.h"
15*5f757f3fSDimitry Andric #include "clang/AST/ParentMapContext.h"
165ffd83dbSDimitry Andric #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
1781ad6265SDimitry Andric #include "clang/StaticAnalyzer/Checkers/Taint.h"
180b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
190b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/Checker.h"
200b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/CheckerManager.h"
210b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
220b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
23fe6060f1SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
240b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
250b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h"
26*5f757f3fSDimitry Andric #include "llvm/Support/FormatVariadic.h"
270b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
28bdd1243dSDimitry Andric #include <optional>
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric using namespace clang;
310b57cec5SDimitry Andric using namespace ento;
320b57cec5SDimitry Andric using namespace taint;
33*5f757f3fSDimitry Andric using llvm::formatv;
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric namespace {
36*5f757f3fSDimitry Andric enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
370b57cec5SDimitry Andric 
38*5f757f3fSDimitry Andric struct Messages {
39*5f757f3fSDimitry Andric   std::string Short, Full;
40*5f757f3fSDimitry Andric };
410b57cec5SDimitry Andric 
42*5f757f3fSDimitry Andric // NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
43*5f757f3fSDimitry Andric // instead of `PreStmt` because the current implementation passes the whole
44*5f757f3fSDimitry Andric // expression to `CheckerContext::getSVal()` which only works after the
45*5f757f3fSDimitry Andric // symbolic evaluation of the expression. (To turn them into `PreStmt`
46*5f757f3fSDimitry Andric // callbacks, we'd need to duplicate the logic that evaluates these
47*5f757f3fSDimitry Andric // expressions.) The `MemberExpr` callback would work as `PreStmt` but it's
48*5f757f3fSDimitry Andric // defined as `PostStmt` for the sake of consistency with the other callbacks.
49*5f757f3fSDimitry Andric class ArrayBoundCheckerV2 : public Checker<check::PostStmt<ArraySubscriptExpr>,
50*5f757f3fSDimitry Andric                                            check::PostStmt<UnaryOperator>,
51*5f757f3fSDimitry Andric                                            check::PostStmt<MemberExpr>> {
52*5f757f3fSDimitry Andric   BugType BT{this, "Out-of-bound access"};
53*5f757f3fSDimitry Andric   BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
54*5f757f3fSDimitry Andric 
55*5f757f3fSDimitry Andric   void performCheck(const Expr *E, CheckerContext &C) const;
56*5f757f3fSDimitry Andric 
57*5f757f3fSDimitry Andric   void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
58*5f757f3fSDimitry Andric                  NonLoc Offset, Messages Msgs) const;
5906c3fb27SDimitry Andric 
6006c3fb27SDimitry Andric   static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
610b57cec5SDimitry Andric 
62*5f757f3fSDimitry Andric   static bool isInAddressOf(const Stmt *S, ASTContext &AC);
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric public:
65*5f757f3fSDimitry Andric   void checkPostStmt(const ArraySubscriptExpr *E, CheckerContext &C) const {
66*5f757f3fSDimitry Andric     performCheck(E, C);
67*5f757f3fSDimitry Andric   }
68*5f757f3fSDimitry Andric   void checkPostStmt(const UnaryOperator *E, CheckerContext &C) const {
69*5f757f3fSDimitry Andric     if (E->getOpcode() == UO_Deref)
70*5f757f3fSDimitry Andric       performCheck(E, C);
71*5f757f3fSDimitry Andric   }
72*5f757f3fSDimitry Andric   void checkPostStmt(const MemberExpr *E, CheckerContext &C) const {
73*5f757f3fSDimitry Andric     if (E->isArrow())
74*5f757f3fSDimitry Andric       performCheck(E->getBase(), C);
75*5f757f3fSDimitry Andric   }
760b57cec5SDimitry Andric };
77*5f757f3fSDimitry Andric 
78*5f757f3fSDimitry Andric } // anonymous namespace
79*5f757f3fSDimitry Andric 
80*5f757f3fSDimitry Andric /// For a given Location that can be represented as a symbolic expression
81*5f757f3fSDimitry Andric /// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
82*5f757f3fSDimitry Andric /// Arr and the distance of Location from the beginning of Arr (expressed in a
83*5f757f3fSDimitry Andric /// NonLoc that specifies the number of CharUnits). Returns nullopt when these
84*5f757f3fSDimitry Andric /// cannot be determined.
85*5f757f3fSDimitry Andric static std::optional<std::pair<const SubRegion *, NonLoc>>
86*5f757f3fSDimitry Andric computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
87*5f757f3fSDimitry Andric   QualType T = SVB.getArrayIndexType();
88*5f757f3fSDimitry Andric   auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
89*5f757f3fSDimitry Andric     // We will use this utility to add and multiply values.
90*5f757f3fSDimitry Andric     return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>();
91*5f757f3fSDimitry Andric   };
92*5f757f3fSDimitry Andric 
93*5f757f3fSDimitry Andric   const SubRegion *OwnerRegion = nullptr;
94*5f757f3fSDimitry Andric   std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex();
95*5f757f3fSDimitry Andric 
96*5f757f3fSDimitry Andric   const ElementRegion *CurRegion =
97*5f757f3fSDimitry Andric       dyn_cast_or_null<ElementRegion>(Location.getAsRegion());
98*5f757f3fSDimitry Andric 
99*5f757f3fSDimitry Andric   while (CurRegion) {
100*5f757f3fSDimitry Andric     const auto Index = CurRegion->getIndex().getAs<NonLoc>();
101*5f757f3fSDimitry Andric     if (!Index)
102*5f757f3fSDimitry Andric       return std::nullopt;
103*5f757f3fSDimitry Andric 
104*5f757f3fSDimitry Andric     QualType ElemType = CurRegion->getElementType();
105*5f757f3fSDimitry Andric 
106*5f757f3fSDimitry Andric     // FIXME: The following early return was presumably added to safeguard the
107*5f757f3fSDimitry Andric     // getTypeSizeInChars() call (which doesn't accept an incomplete type), but
108*5f757f3fSDimitry Andric     // it seems that `ElemType` cannot be incomplete at this point.
109*5f757f3fSDimitry Andric     if (ElemType->isIncompleteType())
110*5f757f3fSDimitry Andric       return std::nullopt;
111*5f757f3fSDimitry Andric 
112*5f757f3fSDimitry Andric     // Calculate Delta = Index * sizeof(ElemType).
113*5f757f3fSDimitry Andric     NonLoc Size = SVB.makeArrayIndex(
114*5f757f3fSDimitry Andric         SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
115*5f757f3fSDimitry Andric     auto Delta = EvalBinOp(BO_Mul, *Index, Size);
116*5f757f3fSDimitry Andric     if (!Delta)
117*5f757f3fSDimitry Andric       return std::nullopt;
118*5f757f3fSDimitry Andric 
119*5f757f3fSDimitry Andric     // Perform Offset += Delta.
120*5f757f3fSDimitry Andric     Offset = EvalBinOp(BO_Add, *Offset, *Delta);
121*5f757f3fSDimitry Andric     if (!Offset)
122*5f757f3fSDimitry Andric       return std::nullopt;
123*5f757f3fSDimitry Andric 
124*5f757f3fSDimitry Andric     OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>();
125*5f757f3fSDimitry Andric     // When this is just another ElementRegion layer, we need to continue the
126*5f757f3fSDimitry Andric     // offset calculations:
127*5f757f3fSDimitry Andric     CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);
128*5f757f3fSDimitry Andric   }
129*5f757f3fSDimitry Andric 
130*5f757f3fSDimitry Andric   if (OwnerRegion)
131*5f757f3fSDimitry Andric     return std::make_pair(OwnerRegion, *Offset);
132*5f757f3fSDimitry Andric 
133*5f757f3fSDimitry Andric   return std::nullopt;
1340b57cec5SDimitry Andric }
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric // TODO: once the constraint manager is smart enough to handle non simplified
1370b57cec5SDimitry Andric // symbolic expressions remove this function. Note that this can not be used in
1380b57cec5SDimitry Andric // the constraint manager as is, since this does not handle overflows. It is
1390b57cec5SDimitry Andric // safe to assume, however, that memory offsets will not overflow.
14006c3fb27SDimitry Andric // NOTE: callers of this function need to be aware of the effects of overflows
14106c3fb27SDimitry Andric // and signed<->unsigned conversions!
1420b57cec5SDimitry Andric static std::pair<NonLoc, nonloc::ConcreteInt>
1430b57cec5SDimitry Andric getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
1440b57cec5SDimitry Andric                      SValBuilder &svalBuilder) {
145bdd1243dSDimitry Andric   std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
1460b57cec5SDimitry Andric   if (SymVal && SymVal->isExpression()) {
1470b57cec5SDimitry Andric     if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
1480b57cec5SDimitry Andric       llvm::APSInt constant =
1490b57cec5SDimitry Andric           APSIntType(extent.getValue()).convert(SIE->getRHS());
1500b57cec5SDimitry Andric       switch (SIE->getOpcode()) {
1510b57cec5SDimitry Andric       case BO_Mul:
152*5f757f3fSDimitry Andric         // The constant should never be 0 here, becasue multiplication by zero
153*5f757f3fSDimitry Andric         // is simplified by the engine.
1540b57cec5SDimitry Andric         if ((extent.getValue() % constant) != 0)
1550b57cec5SDimitry Andric           return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
1560b57cec5SDimitry Andric         else
1570b57cec5SDimitry Andric           return getSimplifiedOffsets(
1580b57cec5SDimitry Andric               nonloc::SymbolVal(SIE->getLHS()),
1590b57cec5SDimitry Andric               svalBuilder.makeIntVal(extent.getValue() / constant),
1600b57cec5SDimitry Andric               svalBuilder);
1610b57cec5SDimitry Andric       case BO_Add:
1620b57cec5SDimitry Andric         return getSimplifiedOffsets(
1630b57cec5SDimitry Andric             nonloc::SymbolVal(SIE->getLHS()),
1640b57cec5SDimitry Andric             svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder);
1650b57cec5SDimitry Andric       default:
1660b57cec5SDimitry Andric         break;
1670b57cec5SDimitry Andric       }
1680b57cec5SDimitry Andric     }
1690b57cec5SDimitry Andric   }
1700b57cec5SDimitry Andric 
1710b57cec5SDimitry Andric   return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
1720b57cec5SDimitry Andric }
1730b57cec5SDimitry Andric 
17406c3fb27SDimitry Andric // Evaluate the comparison Value < Threshold with the help of the custom
17506c3fb27SDimitry Andric // simplification algorithm defined for this checker. Return a pair of states,
17606c3fb27SDimitry Andric // where the first one corresponds to "value below threshold" and the second
17706c3fb27SDimitry Andric // corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
17806c3fb27SDimitry Andric // the case when the evaluation fails.
179*5f757f3fSDimitry Andric // If the optional argument CheckEquality is true, then use BO_EQ instead of
180*5f757f3fSDimitry Andric // the default BO_LT after consistently applying the same simplification steps.
18106c3fb27SDimitry Andric static std::pair<ProgramStateRef, ProgramStateRef>
18206c3fb27SDimitry Andric compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
183*5f757f3fSDimitry Andric                         SValBuilder &SVB, bool CheckEquality = false) {
18406c3fb27SDimitry Andric   if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
18506c3fb27SDimitry Andric     std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);
18606c3fb27SDimitry Andric   }
18706c3fb27SDimitry Andric   if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
18806c3fb27SDimitry Andric     QualType T = Value.getType(SVB.getContext());
18906c3fb27SDimitry Andric     if (T->isUnsignedIntegerType() && ConcreteThreshold->getValue().isNegative()) {
19006c3fb27SDimitry Andric       // In this case we reduced the bound check to a comparison of the form
19106c3fb27SDimitry Andric       //   (symbol or value with unsigned type) < (negative number)
19206c3fb27SDimitry Andric       // which is always false. We are handling these cases separately because
19306c3fb27SDimitry Andric       // evalBinOpNN can perform a signed->unsigned conversion that turns the
19406c3fb27SDimitry Andric       // negative number into a huge positive value and leads to wildly
19506c3fb27SDimitry Andric       // inaccurate conclusions.
19606c3fb27SDimitry Andric       return {nullptr, State};
19706c3fb27SDimitry Andric     }
19806c3fb27SDimitry Andric   }
199*5f757f3fSDimitry Andric   const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT;
20006c3fb27SDimitry Andric   auto BelowThreshold =
201*5f757f3fSDimitry Andric       SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType())
202*5f757f3fSDimitry Andric           .getAs<NonLoc>();
20306c3fb27SDimitry Andric 
20406c3fb27SDimitry Andric   if (BelowThreshold)
20506c3fb27SDimitry Andric     return State->assume(*BelowThreshold);
20606c3fb27SDimitry Andric 
20706c3fb27SDimitry Andric   return {nullptr, nullptr};
20806c3fb27SDimitry Andric }
20906c3fb27SDimitry Andric 
210*5f757f3fSDimitry Andric static std::string getRegionName(const SubRegion *Region) {
211*5f757f3fSDimitry Andric   if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
212*5f757f3fSDimitry Andric     return RegName;
2130b57cec5SDimitry Andric 
214*5f757f3fSDimitry Andric   // Field regions only have descriptive names when their parent has a
215*5f757f3fSDimitry Andric   // descriptive name; so we provide a fallback representation for them:
216*5f757f3fSDimitry Andric   if (const auto *FR = Region->getAs<FieldRegion>()) {
217*5f757f3fSDimitry Andric     if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
218*5f757f3fSDimitry Andric       return formatv("the field '{0}'", Name);
219*5f757f3fSDimitry Andric     return "the unnamed field";
220*5f757f3fSDimitry Andric   }
221*5f757f3fSDimitry Andric 
222*5f757f3fSDimitry Andric   if (isa<AllocaRegion>(Region))
223*5f757f3fSDimitry Andric     return "the memory returned by 'alloca'";
224*5f757f3fSDimitry Andric 
225*5f757f3fSDimitry Andric   if (isa<SymbolicRegion>(Region) &&
226*5f757f3fSDimitry Andric       isa<HeapSpaceRegion>(Region->getMemorySpace()))
227*5f757f3fSDimitry Andric     return "the heap area";
228*5f757f3fSDimitry Andric 
229*5f757f3fSDimitry Andric   if (isa<StringRegion>(Region))
230*5f757f3fSDimitry Andric     return "the string literal";
231*5f757f3fSDimitry Andric 
232*5f757f3fSDimitry Andric   return "the region";
233*5f757f3fSDimitry Andric }
234*5f757f3fSDimitry Andric 
235*5f757f3fSDimitry Andric static std::optional<int64_t> getConcreteValue(NonLoc SV) {
236*5f757f3fSDimitry Andric   if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) {
237*5f757f3fSDimitry Andric     return ConcreteVal->getValue().tryExtValue();
238*5f757f3fSDimitry Andric   }
239*5f757f3fSDimitry Andric   return std::nullopt;
240*5f757f3fSDimitry Andric }
241*5f757f3fSDimitry Andric 
242*5f757f3fSDimitry Andric static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
243*5f757f3fSDimitry Andric   static const char *ShortMsgTemplates[] = {
244*5f757f3fSDimitry Andric       "Out of bound access to memory preceding {0}",
245*5f757f3fSDimitry Andric       "Out of bound access to memory after the end of {0}",
246*5f757f3fSDimitry Andric       "Potential out of bound access to {0} with tainted offset"};
247*5f757f3fSDimitry Andric 
248*5f757f3fSDimitry Andric   return formatv(ShortMsgTemplates[Kind], RegName);
249*5f757f3fSDimitry Andric }
250*5f757f3fSDimitry Andric 
251*5f757f3fSDimitry Andric static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
252*5f757f3fSDimitry Andric   std::string RegName = getRegionName(Region);
253*5f757f3fSDimitry Andric   SmallString<128> Buf;
254*5f757f3fSDimitry Andric   llvm::raw_svector_ostream Out(Buf);
255*5f757f3fSDimitry Andric   Out << "Access of " << RegName << " at negative byte offset";
256*5f757f3fSDimitry Andric   if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>())
257*5f757f3fSDimitry Andric     Out << ' ' << ConcreteIdx->getValue();
258*5f757f3fSDimitry Andric   return {getShortMsg(OOB_Precedes, RegName), std::string(Buf)};
259*5f757f3fSDimitry Andric }
260*5f757f3fSDimitry Andric 
261*5f757f3fSDimitry Andric static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
262*5f757f3fSDimitry Andric                                NonLoc Offset, NonLoc Extent, SVal Location) {
263*5f757f3fSDimitry Andric   std::string RegName = getRegionName(Region);
264*5f757f3fSDimitry Andric   const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
265*5f757f3fSDimitry Andric   assert(EReg && "this checker only handles element access");
266*5f757f3fSDimitry Andric   QualType ElemType = EReg->getElementType();
267*5f757f3fSDimitry Andric 
268*5f757f3fSDimitry Andric   std::optional<int64_t> OffsetN = getConcreteValue(Offset);
269*5f757f3fSDimitry Andric   std::optional<int64_t> ExtentN = getConcreteValue(Extent);
270*5f757f3fSDimitry Andric 
271*5f757f3fSDimitry Andric   bool UseByteOffsets = true;
272*5f757f3fSDimitry Andric   if (int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity()) {
273*5f757f3fSDimitry Andric     const bool OffsetHasRemainder = OffsetN && *OffsetN % ElemSize;
274*5f757f3fSDimitry Andric     const bool ExtentHasRemainder = ExtentN && *ExtentN % ElemSize;
275*5f757f3fSDimitry Andric     if (!OffsetHasRemainder && !ExtentHasRemainder) {
276*5f757f3fSDimitry Andric       UseByteOffsets = false;
277*5f757f3fSDimitry Andric       if (OffsetN)
278*5f757f3fSDimitry Andric         *OffsetN /= ElemSize;
279*5f757f3fSDimitry Andric       if (ExtentN)
280*5f757f3fSDimitry Andric         *ExtentN /= ElemSize;
281*5f757f3fSDimitry Andric     }
282*5f757f3fSDimitry Andric   }
283*5f757f3fSDimitry Andric 
284*5f757f3fSDimitry Andric   SmallString<256> Buf;
285*5f757f3fSDimitry Andric   llvm::raw_svector_ostream Out(Buf);
286*5f757f3fSDimitry Andric   Out << "Access of ";
287*5f757f3fSDimitry Andric   if (!ExtentN && !UseByteOffsets)
288*5f757f3fSDimitry Andric     Out << "'" << ElemType.getAsString() << "' element in ";
289*5f757f3fSDimitry Andric   Out << RegName << " at ";
290*5f757f3fSDimitry Andric   if (OffsetN) {
291*5f757f3fSDimitry Andric     Out << (UseByteOffsets ? "byte offset " : "index ") << *OffsetN;
292*5f757f3fSDimitry Andric   } else {
293*5f757f3fSDimitry Andric     Out << "an overflowing " << (UseByteOffsets ? "byte offset" : "index");
294*5f757f3fSDimitry Andric   }
295*5f757f3fSDimitry Andric   if (ExtentN) {
296*5f757f3fSDimitry Andric     Out << ", while it holds only ";
297*5f757f3fSDimitry Andric     if (*ExtentN != 1)
298*5f757f3fSDimitry Andric       Out << *ExtentN;
299*5f757f3fSDimitry Andric     else
300*5f757f3fSDimitry Andric       Out << "a single";
301*5f757f3fSDimitry Andric     if (UseByteOffsets)
302*5f757f3fSDimitry Andric       Out << " byte";
303*5f757f3fSDimitry Andric     else
304*5f757f3fSDimitry Andric       Out << " '" << ElemType.getAsString() << "' element";
305*5f757f3fSDimitry Andric 
306*5f757f3fSDimitry Andric     if (*ExtentN > 1)
307*5f757f3fSDimitry Andric       Out << "s";
308*5f757f3fSDimitry Andric   }
309*5f757f3fSDimitry Andric 
310*5f757f3fSDimitry Andric   return {getShortMsg(OOB_Exceeds, RegName), std::string(Buf)};
311*5f757f3fSDimitry Andric }
312*5f757f3fSDimitry Andric 
313*5f757f3fSDimitry Andric static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) {
314*5f757f3fSDimitry Andric   std::string RegName = getRegionName(Region);
315*5f757f3fSDimitry Andric   return {formatv("Potential out of bound access to {0} with tainted {1}",
316*5f757f3fSDimitry Andric                   RegName, OffsetName),
317*5f757f3fSDimitry Andric           formatv("Access of {0} with a tainted {1} that may be too large",
318*5f757f3fSDimitry Andric                   RegName, OffsetName)};
319*5f757f3fSDimitry Andric }
320*5f757f3fSDimitry Andric 
321*5f757f3fSDimitry Andric void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
3220b57cec5SDimitry Andric   // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
3230b57cec5SDimitry Andric   // some new logic here that reasons directly about memory region extents.
3240b57cec5SDimitry Andric   // Once that logic is more mature, we can bring it back to assumeInBound()
3250b57cec5SDimitry Andric   // for all clients to use.
3260b57cec5SDimitry Andric   //
3270b57cec5SDimitry Andric   // The algorithm we are using here for bounds checking is to see if the
3280b57cec5SDimitry Andric   // memory access is within the extent of the base region.  Since we
3290b57cec5SDimitry Andric   // have some flexibility in defining the base region, we can achieve
3300b57cec5SDimitry Andric   // various levels of conservatism in our buffer overflow checking.
33106c3fb27SDimitry Andric 
332*5f757f3fSDimitry Andric   const SVal Location = C.getSVal(E);
333*5f757f3fSDimitry Andric 
33406c3fb27SDimitry Andric   // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
33506c3fb27SDimitry Andric   //   #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)
33606c3fb27SDimitry Andric   // and incomplete analysis of these leads to false positives. As even
33706c3fb27SDimitry Andric   // accurate reports would be confusing for the users, just disable reports
33806c3fb27SDimitry Andric   // from these macros:
339*5f757f3fSDimitry Andric   if (isFromCtypeMacro(E, C.getASTContext()))
34006c3fb27SDimitry Andric     return;
34106c3fb27SDimitry Andric 
342*5f757f3fSDimitry Andric   ProgramStateRef State = C.getState();
343*5f757f3fSDimitry Andric   SValBuilder &SVB = C.getSValBuilder();
3440b57cec5SDimitry Andric 
345*5f757f3fSDimitry Andric   const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
346*5f757f3fSDimitry Andric       computeOffset(State, SVB, Location);
3470b57cec5SDimitry Andric 
34806c3fb27SDimitry Andric   if (!RawOffset)
3490b57cec5SDimitry Andric     return;
3500b57cec5SDimitry Andric 
351*5f757f3fSDimitry Andric   auto [Reg, ByteOffset] = *RawOffset;
3520b57cec5SDimitry Andric 
35306c3fb27SDimitry Andric   // CHECK LOWER BOUND
354*5f757f3fSDimitry Andric   const MemSpaceRegion *Space = Reg->getMemorySpace();
355*5f757f3fSDimitry Andric   if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
356*5f757f3fSDimitry Andric     // A symbolic region in unknown space represents an unknown pointer that
357*5f757f3fSDimitry Andric     // may point into the middle of an array, so we don't look for underflows.
358*5f757f3fSDimitry Andric     // Both conditions are significant because we want to check underflows in
359*5f757f3fSDimitry Andric     // symbolic regions on the heap (which may be introduced by checkers like
360*5f757f3fSDimitry Andric     // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
361*5f757f3fSDimitry Andric     // non-symbolic regions (e.g. a field subregion of a symbolic region) in
362*5f757f3fSDimitry Andric     // unknown space.
363*5f757f3fSDimitry Andric     auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold(
364*5f757f3fSDimitry Andric         State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);
3650b57cec5SDimitry Andric 
366*5f757f3fSDimitry Andric     if (PrecedesLowerBound && !WithinLowerBound) {
36706c3fb27SDimitry Andric       // We know that the index definitely precedes the lower bound.
368*5f757f3fSDimitry Andric       Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
369*5f757f3fSDimitry Andric       reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, Msgs);
3700b57cec5SDimitry Andric       return;
3710b57cec5SDimitry Andric     }
3720b57cec5SDimitry Andric 
373*5f757f3fSDimitry Andric     if (WithinLowerBound)
374*5f757f3fSDimitry Andric       State = WithinLowerBound;
3750b57cec5SDimitry Andric   }
3760b57cec5SDimitry Andric 
37706c3fb27SDimitry Andric   // CHECK UPPER BOUND
378*5f757f3fSDimitry Andric   DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);
37906c3fb27SDimitry Andric   if (auto KnownSize = Size.getAs<NonLoc>()) {
380*5f757f3fSDimitry Andric     auto [WithinUpperBound, ExceedsUpperBound] =
381*5f757f3fSDimitry Andric         compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);
3820b57cec5SDimitry Andric 
383*5f757f3fSDimitry Andric     if (ExceedsUpperBound) {
384*5f757f3fSDimitry Andric       if (!WithinUpperBound) {
38506c3fb27SDimitry Andric         // We know that the index definitely exceeds the upper bound.
386*5f757f3fSDimitry Andric         if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) {
387*5f757f3fSDimitry Andric           // ...but this is within an addressof expression, so we need to check
388*5f757f3fSDimitry Andric           // for the exceptional case that `&array[size]` is valid.
389*5f757f3fSDimitry Andric           auto [EqualsToThreshold, NotEqualToThreshold] =
390*5f757f3fSDimitry Andric               compareValueToThreshold(ExceedsUpperBound, ByteOffset, *KnownSize,
391*5f757f3fSDimitry Andric                                       SVB, /*CheckEquality=*/true);
392*5f757f3fSDimitry Andric           if (EqualsToThreshold && !NotEqualToThreshold) {
393*5f757f3fSDimitry Andric             // We are definitely in the exceptional case, so return early
394*5f757f3fSDimitry Andric             // instead of reporting a bug.
395*5f757f3fSDimitry Andric             C.addTransition(EqualsToThreshold);
3960b57cec5SDimitry Andric             return;
3970b57cec5SDimitry Andric           }
398*5f757f3fSDimitry Andric         }
399*5f757f3fSDimitry Andric         Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset,
400*5f757f3fSDimitry Andric                                        *KnownSize, Location);
401*5f757f3fSDimitry Andric         reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs);
402*5f757f3fSDimitry Andric         return;
403*5f757f3fSDimitry Andric       }
404*5f757f3fSDimitry Andric       if (isTainted(State, ByteOffset)) {
405*5f757f3fSDimitry Andric         // Both cases are possible, but the offset is tainted, so report.
406*5f757f3fSDimitry Andric         std::string RegName = getRegionName(Reg);
407*5f757f3fSDimitry Andric 
408*5f757f3fSDimitry Andric         // Diagnostic detail: "tainted offset" is always correct, but the
409*5f757f3fSDimitry Andric         // common case is that 'idx' is tainted in 'arr[idx]' and then it's
410*5f757f3fSDimitry Andric         // nicer to say "tainted index".
411*5f757f3fSDimitry Andric         const char *OffsetName = "offset";
412*5f757f3fSDimitry Andric         if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
413*5f757f3fSDimitry Andric           if (isTainted(State, ASE->getIdx(), C.getLocationContext()))
414*5f757f3fSDimitry Andric             OffsetName = "index";
415*5f757f3fSDimitry Andric 
416*5f757f3fSDimitry Andric         Messages Msgs = getTaintMsgs(Reg, OffsetName);
417*5f757f3fSDimitry Andric         reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs);
41806c3fb27SDimitry Andric         return;
41906c3fb27SDimitry Andric       }
42006c3fb27SDimitry Andric     }
4210b57cec5SDimitry Andric 
422*5f757f3fSDimitry Andric     if (WithinUpperBound)
423*5f757f3fSDimitry Andric       State = WithinUpperBound;
4240b57cec5SDimitry Andric   }
4250b57cec5SDimitry Andric 
426*5f757f3fSDimitry Andric   C.addTransition(State);
4270b57cec5SDimitry Andric }
4280b57cec5SDimitry Andric 
429*5f757f3fSDimitry Andric void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
430*5f757f3fSDimitry Andric                                     ProgramStateRef ErrorState, OOB_Kind Kind,
431*5f757f3fSDimitry Andric                                     NonLoc Offset, Messages Msgs) const {
432*5f757f3fSDimitry Andric 
433*5f757f3fSDimitry Andric   ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
434*5f757f3fSDimitry Andric   if (!ErrorNode)
43506c3fb27SDimitry Andric     return;
43606c3fb27SDimitry Andric 
437*5f757f3fSDimitry Andric   auto BR = std::make_unique<PathSensitiveBugReport>(
438*5f757f3fSDimitry Andric       Kind == OOB_Taint ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);
43906c3fb27SDimitry Andric 
44006c3fb27SDimitry Andric   // Track back the propagation of taintedness.
441*5f757f3fSDimitry Andric   if (Kind == OOB_Taint)
442*5f757f3fSDimitry Andric     for (SymbolRef Sym : getTaintedSymbols(ErrorState, Offset))
44306c3fb27SDimitry Andric       BR->markInteresting(Sym);
44406c3fb27SDimitry Andric 
445*5f757f3fSDimitry Andric   C.emitReport(std::move(BR));
4460b57cec5SDimitry Andric }
4470b57cec5SDimitry Andric 
44806c3fb27SDimitry Andric bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
44906c3fb27SDimitry Andric   SourceLocation Loc = S->getBeginLoc();
45006c3fb27SDimitry Andric   if (!Loc.isMacroID())
45106c3fb27SDimitry Andric     return false;
45206c3fb27SDimitry Andric 
45306c3fb27SDimitry Andric   StringRef MacroName = Lexer::getImmediateMacroName(
45406c3fb27SDimitry Andric       Loc, ACtx.getSourceManager(), ACtx.getLangOpts());
45506c3fb27SDimitry Andric 
45606c3fb27SDimitry Andric   if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's')
45706c3fb27SDimitry Andric     return false;
45806c3fb27SDimitry Andric 
45906c3fb27SDimitry Andric   return ((MacroName == "isalnum") || (MacroName == "isalpha") ||
46006c3fb27SDimitry Andric           (MacroName == "isblank") || (MacroName == "isdigit") ||
46106c3fb27SDimitry Andric           (MacroName == "isgraph") || (MacroName == "islower") ||
46206c3fb27SDimitry Andric           (MacroName == "isnctrl") || (MacroName == "isprint") ||
46306c3fb27SDimitry Andric           (MacroName == "ispunct") || (MacroName == "isspace") ||
46406c3fb27SDimitry Andric           (MacroName == "isupper") || (MacroName == "isxdigit"));
4650b57cec5SDimitry Andric }
4660b57cec5SDimitry Andric 
467*5f757f3fSDimitry Andric bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) {
468*5f757f3fSDimitry Andric   ParentMapContext &ParentCtx = ACtx.getParentMapContext();
469*5f757f3fSDimitry Andric   do {
470*5f757f3fSDimitry Andric     const DynTypedNodeList Parents = ParentCtx.getParents(*S);
471*5f757f3fSDimitry Andric     if (Parents.empty())
472*5f757f3fSDimitry Andric       return false;
473*5f757f3fSDimitry Andric     S = Parents[0].get<Stmt>();
474*5f757f3fSDimitry Andric   } while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S));
475*5f757f3fSDimitry Andric   const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S);
476*5f757f3fSDimitry Andric   return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;
4770b57cec5SDimitry Andric }
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
4800b57cec5SDimitry Andric   mgr.registerChecker<ArrayBoundCheckerV2>();
4810b57cec5SDimitry Andric }
4820b57cec5SDimitry Andric 
4835ffd83dbSDimitry Andric bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {
4840b57cec5SDimitry Andric   return true;
4850b57cec5SDimitry Andric }
486