xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This defines CStringChecker, which is an assortment of checks on calls
10  // to functions in <string.h>.
11  //
12  //===----------------------------------------------------------------------===//
13  
14  #include "InterCheckerAPI.h"
15  #include "clang/AST/OperationKinds.h"
16  #include "clang/Basic/Builtins.h"
17  #include "clang/Basic/CharInfo.h"
18  #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
19  #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
20  #include "clang/StaticAnalyzer/Core/Checker.h"
21  #include "clang/StaticAnalyzer/Core/CheckerManager.h"
22  #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
23  #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
24  #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
25  #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
26  #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
27  #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28  #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
29  #include "llvm/ADT/APSInt.h"
30  #include "llvm/ADT/STLExtras.h"
31  #include "llvm/ADT/StringExtras.h"
32  #include "llvm/Support/Casting.h"
33  #include "llvm/Support/raw_ostream.h"
34  #include <functional>
35  #include <optional>
36  
37  using namespace clang;
38  using namespace ento;
39  using namespace std::placeholders;
40  
41  namespace {
42  struct AnyArgExpr {
43    const Expr *Expression;
44    unsigned ArgumentIndex;
45  };
46  struct SourceArgExpr : AnyArgExpr {};
47  struct DestinationArgExpr : AnyArgExpr {};
48  struct SizeArgExpr : AnyArgExpr {};
49  
50  using ErrorMessage = SmallString<128>;
51  enum class AccessKind { write, read };
52  
createOutOfBoundErrorMsg(StringRef FunctionDescription,AccessKind Access)53  static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
54                                               AccessKind Access) {
55    ErrorMessage Message;
56    llvm::raw_svector_ostream Os(Message);
57  
58    // Function classification like: Memory copy function
59    Os << toUppercase(FunctionDescription.front())
60       << &FunctionDescription.data()[1];
61  
62    if (Access == AccessKind::write) {
63      Os << " overflows the destination buffer";
64    } else { // read access
65      Os << " accesses out-of-bound array element";
66    }
67  
68    return Message;
69  }
70  
71  enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
72  
73  enum class CharKind { Regular = 0, Wide };
74  constexpr CharKind CK_Regular = CharKind::Regular;
75  constexpr CharKind CK_Wide = CharKind::Wide;
76  
getCharPtrType(ASTContext & Ctx,CharKind CK)77  static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
78    return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy
79                                                      : Ctx.WideCharTy);
80  }
81  
82  class CStringChecker : public Checker< eval::Call,
83                                           check::PreStmt<DeclStmt>,
84                                           check::LiveSymbols,
85                                           check::DeadSymbols,
86                                           check::RegionChanges
87                                           > {
88    mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
89        BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
90  
91    mutable const char *CurrentFunctionDescription = nullptr;
92  
93  public:
94    /// The filter is used to filter out the diagnostics which are not enabled by
95    /// the user.
96    struct CStringChecksFilter {
97      bool CheckCStringNullArg = false;
98      bool CheckCStringOutOfBounds = false;
99      bool CheckCStringBufferOverlap = false;
100      bool CheckCStringNotNullTerm = false;
101      bool CheckCStringUninitializedRead = false;
102  
103      CheckerNameRef CheckNameCStringNullArg;
104      CheckerNameRef CheckNameCStringOutOfBounds;
105      CheckerNameRef CheckNameCStringBufferOverlap;
106      CheckerNameRef CheckNameCStringNotNullTerm;
107      CheckerNameRef CheckNameCStringUninitializedRead;
108    };
109  
110    CStringChecksFilter Filter;
111  
getTag()112    static void *getTag() { static int tag; return &tag; }
113  
114    bool evalCall(const CallEvent &Call, CheckerContext &C) const;
115    void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
116    void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
117    void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
118  
119    ProgramStateRef
120      checkRegionChanges(ProgramStateRef state,
121                         const InvalidatedSymbols *,
122                         ArrayRef<const MemRegion *> ExplicitRegions,
123                         ArrayRef<const MemRegion *> Regions,
124                         const LocationContext *LCtx,
125                         const CallEvent *Call) const;
126  
127    using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
128                                       const CallEvent &)>;
129  
130    CallDescriptionMap<FnCheck> Callbacks = {
131        {{CDM::CLibraryMaybeHardened, {"memcpy"}, 3},
132         std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
133        {{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3},
134         std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
135        {{CDM::CLibraryMaybeHardened, {"mempcpy"}, 3},
136         std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
137        {{CDM::CLibraryMaybeHardened, {"wmempcpy"}, 3},
138         std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
139        {{CDM::CLibrary, {"memcmp"}, 3},
140         std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
141        {{CDM::CLibrary, {"wmemcmp"}, 3},
142         std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
143        {{CDM::CLibraryMaybeHardened, {"memmove"}, 3},
144         std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
145        {{CDM::CLibraryMaybeHardened, {"wmemmove"}, 3},
146         std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
147        {{CDM::CLibraryMaybeHardened, {"memset"}, 3},
148         &CStringChecker::evalMemset},
149        {{CDM::CLibrary, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
150        // FIXME: C23 introduces 'memset_explicit', maybe also model that
151        {{CDM::CLibraryMaybeHardened, {"strcpy"}, 2},
152         &CStringChecker::evalStrcpy},
153        {{CDM::CLibraryMaybeHardened, {"strncpy"}, 3},
154         &CStringChecker::evalStrncpy},
155        {{CDM::CLibraryMaybeHardened, {"stpcpy"}, 2},
156         &CStringChecker::evalStpcpy},
157        {{CDM::CLibraryMaybeHardened, {"strlcpy"}, 3},
158         &CStringChecker::evalStrlcpy},
159        {{CDM::CLibraryMaybeHardened, {"strcat"}, 2},
160         &CStringChecker::evalStrcat},
161        {{CDM::CLibraryMaybeHardened, {"strncat"}, 3},
162         &CStringChecker::evalStrncat},
163        {{CDM::CLibraryMaybeHardened, {"strlcat"}, 3},
164         &CStringChecker::evalStrlcat},
165        {{CDM::CLibraryMaybeHardened, {"strlen"}, 1},
166         &CStringChecker::evalstrLength},
167        {{CDM::CLibrary, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
168        {{CDM::CLibraryMaybeHardened, {"strnlen"}, 2},
169         &CStringChecker::evalstrnLength},
170        {{CDM::CLibrary, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
171        {{CDM::CLibrary, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
172        {{CDM::CLibrary, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
173        {{CDM::CLibrary, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
174        {{CDM::CLibrary, {"strncasecmp"}, 3}, &CStringChecker::evalStrncasecmp},
175        {{CDM::CLibrary, {"strsep"}, 2}, &CStringChecker::evalStrsep},
176        {{CDM::CLibrary, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
177        {{CDM::CLibrary, {"bcmp"}, 3},
178         std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
179        {{CDM::CLibrary, {"bzero"}, 2}, &CStringChecker::evalBzero},
180        {{CDM::CLibraryMaybeHardened, {"explicit_bzero"}, 2},
181         &CStringChecker::evalBzero},
182  
183        // When recognizing calls to the following variadic functions, we accept
184        // any number of arguments in the call (std::nullopt = accept any
185        // number), but check that in the declaration there are 2 and 3
186        // parameters respectively. (Note that the parameter count does not
187        // include the "...". Calls where the number of arguments is too small
188        // will be discarded by the callback.)
189        {{CDM::CLibraryMaybeHardened, {"sprintf"}, std::nullopt, 2},
190         &CStringChecker::evalSprintf},
191        {{CDM::CLibraryMaybeHardened, {"snprintf"}, std::nullopt, 3},
192         &CStringChecker::evalSnprintf},
193    };
194  
195    // These require a bit of special handling.
196    CallDescription StdCopy{CDM::SimpleFunc, {"std", "copy"}, 3},
197        StdCopyBackward{CDM::SimpleFunc, {"std", "copy_backward"}, 3};
198  
199    FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
200    void evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
201    void evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
202    void evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
203    void evalBcopy(CheckerContext &C, const CallEvent &Call) const;
204    void evalCopyCommon(CheckerContext &C, const CallEvent &Call,
205                        ProgramStateRef state, SizeArgExpr Size,
206                        DestinationArgExpr Dest, SourceArgExpr Source,
207                        bool Restricted, bool IsMempcpy, CharKind CK) const;
208  
209    void evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
210  
211    void evalstrLength(CheckerContext &C, const CallEvent &Call) const;
212    void evalstrnLength(CheckerContext &C, const CallEvent &Call) const;
213    void evalstrLengthCommon(CheckerContext &C, const CallEvent &Call,
214                             bool IsStrnlen = false) const;
215  
216    void evalStrcpy(CheckerContext &C, const CallEvent &Call) const;
217    void evalStrncpy(CheckerContext &C, const CallEvent &Call) const;
218    void evalStpcpy(CheckerContext &C, const CallEvent &Call) const;
219    void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const;
220    void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
221                          bool ReturnEnd, bool IsBounded, ConcatFnKind appendK,
222                          bool returnPtr = true) const;
223  
224    void evalStrcat(CheckerContext &C, const CallEvent &Call) const;
225    void evalStrncat(CheckerContext &C, const CallEvent &Call) const;
226    void evalStrlcat(CheckerContext &C, const CallEvent &Call) const;
227  
228    void evalStrcmp(CheckerContext &C, const CallEvent &Call) const;
229    void evalStrncmp(CheckerContext &C, const CallEvent &Call) const;
230    void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const;
231    void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const;
232    void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
233                          bool IsBounded = false, bool IgnoreCase = false) const;
234  
235    void evalStrsep(CheckerContext &C, const CallEvent &Call) const;
236  
237    void evalStdCopy(CheckerContext &C, const CallEvent &Call) const;
238    void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const;
239    void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const;
240    void evalMemset(CheckerContext &C, const CallEvent &Call) const;
241    void evalBzero(CheckerContext &C, const CallEvent &Call) const;
242  
243    void evalSprintf(CheckerContext &C, const CallEvent &Call) const;
244    void evalSnprintf(CheckerContext &C, const CallEvent &Call) const;
245    void evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
246                           bool IsBounded) const;
247  
248    // Utility methods
249    std::pair<ProgramStateRef , ProgramStateRef >
250    static assumeZero(CheckerContext &C,
251                      ProgramStateRef state, SVal V, QualType Ty);
252  
253    static ProgramStateRef setCStringLength(ProgramStateRef state,
254                                                const MemRegion *MR,
255                                                SVal strLength);
256    static SVal getCStringLengthForRegion(CheckerContext &C,
257                                          ProgramStateRef &state,
258                                          const Expr *Ex,
259                                          const MemRegion *MR,
260                                          bool hypothetical);
261    SVal getCStringLength(CheckerContext &C,
262                          ProgramStateRef &state,
263                          const Expr *Ex,
264                          SVal Buf,
265                          bool hypothetical = false) const;
266  
267    const StringLiteral *getCStringLiteral(CheckerContext &C,
268                                           ProgramStateRef &state,
269                                           const Expr *expr,
270                                           SVal val) const;
271  
272    /// Invalidate the destination buffer determined by characters copied.
273    static ProgramStateRef
274    invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
275                                      const Expr *BufE, SVal BufV, SVal SizeV,
276                                      QualType SizeTy);
277  
278    /// Operation never overflows, do not invalidate the super region.
279    static ProgramStateRef invalidateDestinationBufferNeverOverflows(
280        CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
281  
282    /// We do not know whether the operation can overflow (e.g. size is unknown),
283    /// invalidate the super region and escape related pointers.
284    static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
285        CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
286  
287    /// Invalidate the source buffer for escaping pointers.
288    static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
289                                                  ProgramStateRef S,
290                                                  const Expr *BufE, SVal BufV);
291  
292    /// @param InvalidationTraitOperations Determine how to invlidate the
293    /// MemRegion by setting the invalidation traits. Return true to cause pointer
294    /// escape, or false otherwise.
295    static ProgramStateRef invalidateBufferAux(
296        CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V,
297        llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
298                                const MemRegion *)>
299            InvalidationTraitOperations);
300  
301    static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
302                                const MemRegion *MR);
303  
304    static bool memsetAux(const Expr *DstBuffer, SVal CharE,
305                          const Expr *Size, CheckerContext &C,
306                          ProgramStateRef &State);
307  
308    // Re-usable checks
309    ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
310                                 AnyArgExpr Arg, SVal l) const;
311    // Check whether the origin region behind \p Element (like the actual array
312    // region \p Element is from) is initialized.
313    ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
314                              AnyArgExpr Buffer, SVal Element, SVal Size) const;
315    ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
316                                  AnyArgExpr Buffer, SVal Element,
317                                  AccessKind Access,
318                                  CharKind CK = CharKind::Regular) const;
319    ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
320                                      AnyArgExpr Buffer, SizeArgExpr Size,
321                                      AccessKind Access,
322                                      CharKind CK = CharKind::Regular) const;
323    ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
324                                 SizeArgExpr Size, AnyArgExpr First,
325                                 AnyArgExpr Second,
326                                 CharKind CK = CharKind::Regular) const;
327    void emitOverlapBug(CheckerContext &C,
328                        ProgramStateRef state,
329                        const Stmt *First,
330                        const Stmt *Second) const;
331  
332    void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
333                        StringRef WarningMsg) const;
334    void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
335                            const Stmt *S, StringRef WarningMsg) const;
336    void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
337                           const Stmt *S, StringRef WarningMsg) const;
338    void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
339    void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
340                                  const Expr *E, StringRef Msg) const;
341    ProgramStateRef checkAdditionOverflow(CheckerContext &C,
342                                              ProgramStateRef state,
343                                              NonLoc left,
344                                              NonLoc right) const;
345  
346    // Return true if the destination buffer of the copy function may be in bound.
347    // Expects SVal of Size to be positive and unsigned.
348    // Expects SVal of FirstBuf to be a FieldRegion.
349    static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
350                                  SVal BufVal, QualType BufTy, SVal LengthVal,
351                                  QualType LengthTy);
352  };
353  
354  } //end anonymous namespace
355  
REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength,const MemRegion *,SVal)356  REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
357  
358  //===----------------------------------------------------------------------===//
359  // Individual checks and utility methods.
360  //===----------------------------------------------------------------------===//
361  
362  std::pair<ProgramStateRef, ProgramStateRef>
363  CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
364                             QualType Ty) {
365    std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
366    if (!val)
367      return std::pair<ProgramStateRef, ProgramStateRef>(State, State);
368  
369    SValBuilder &svalBuilder = C.getSValBuilder();
370    DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
371    return State->assume(svalBuilder.evalEQ(State, *val, zero));
372  }
373  
checkNonNull(CheckerContext & C,ProgramStateRef State,AnyArgExpr Arg,SVal l) const374  ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
375                                               ProgramStateRef State,
376                                               AnyArgExpr Arg, SVal l) const {
377    // If a previous check has failed, propagate the failure.
378    if (!State)
379      return nullptr;
380  
381    ProgramStateRef stateNull, stateNonNull;
382    std::tie(stateNull, stateNonNull) =
383        assumeZero(C, State, l, Arg.Expression->getType());
384  
385    if (stateNull && !stateNonNull) {
386      if (Filter.CheckCStringNullArg) {
387        SmallString<80> buf;
388        llvm::raw_svector_ostream OS(buf);
389        assert(CurrentFunctionDescription);
390        OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
391           << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "
392           << CurrentFunctionDescription;
393  
394        emitNullArgBug(C, stateNull, Arg.Expression, OS.str());
395      }
396      return nullptr;
397    }
398  
399    // From here on, assume that the value is non-null.
400    assert(stateNonNull);
401    return stateNonNull;
402  }
403  
getIndex(ProgramStateRef State,const ElementRegion * ER,CharKind CK)404  static std::optional<NonLoc> getIndex(ProgramStateRef State,
405                                        const ElementRegion *ER, CharKind CK) {
406    SValBuilder &SVB = State->getStateManager().getSValBuilder();
407    ASTContext &Ctx = SVB.getContext();
408  
409    if (CK == CharKind::Regular) {
410      if (ER->getValueType() != Ctx.CharTy)
411        return {};
412      return ER->getIndex();
413    }
414  
415    if (ER->getValueType() != Ctx.WideCharTy)
416      return {};
417  
418    QualType SizeTy = Ctx.getSizeType();
419    NonLoc WideSize =
420        SVB.makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
421                       SizeTy)
422            .castAs<NonLoc>();
423    SVal Offset =
424        SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
425    if (Offset.isUnknown())
426      return {};
427    return Offset.castAs<NonLoc>();
428  }
429  
430  // Basically 1 -> 1st, 12 -> 12th, etc.
printIdxWithOrdinalSuffix(llvm::raw_ostream & Os,unsigned Idx)431  static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
432    Os << Idx << llvm::getOrdinalSuffix(Idx);
433  }
434  
checkInit(CheckerContext & C,ProgramStateRef State,AnyArgExpr Buffer,SVal Element,SVal Size) const435  ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
436                                            ProgramStateRef State,
437                                            AnyArgExpr Buffer, SVal Element,
438                                            SVal Size) const {
439  
440    // If a previous check has failed, propagate the failure.
441    if (!State)
442      return nullptr;
443  
444    const MemRegion *R = Element.getAsRegion();
445    const auto *ER = dyn_cast_or_null<ElementRegion>(R);
446    if (!ER)
447      return State;
448  
449    const auto *SuperR = ER->getSuperRegion()->getAs<TypedValueRegion>();
450    if (!SuperR)
451      return State;
452  
453    // FIXME: We ought to able to check objects as well. Maybe
454    // UninitializedObjectChecker could help?
455    if (!SuperR->getValueType()->isArrayType())
456      return State;
457  
458    SValBuilder &SVB = C.getSValBuilder();
459    ASTContext &Ctx = SVB.getContext();
460  
461    const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType());
462    const NonLoc Zero = SVB.makeZeroArrayIndex();
463  
464    std::optional<Loc> FirstElementVal =
465        State->getLValue(ElemTy, Zero, loc::MemRegionVal(SuperR)).getAs<Loc>();
466    if (!FirstElementVal)
467      return State;
468  
469    // Ensure that we wouldn't read uninitialized value.
470    if (Filter.CheckCStringUninitializedRead &&
471        State->getSVal(*FirstElementVal).isUndef()) {
472      llvm::SmallString<258> Buf;
473      llvm::raw_svector_ostream OS(Buf);
474      OS << "The first element of the ";
475      printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
476      OS << " argument is undefined";
477      emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
478      return nullptr;
479    }
480  
481    // We won't check whether the entire region is fully initialized -- lets just
482    // check that the first and the last element is. So, onto checking the last
483    // element:
484    const QualType IdxTy = SVB.getArrayIndexType();
485  
486    NonLoc ElemSize =
487        SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
488            .castAs<NonLoc>();
489  
490    // FIXME: Check that the size arg to the cstring function is divisible by
491    // size of the actual element type?
492  
493    // The type of the argument to the cstring function is either char or wchar,
494    // but thats not the type of the original array (or memory region).
495    // Suppose the following:
496    //   int t[5];
497    //   memcpy(dst, t, sizeof(t) / sizeof(t[0]));
498    // When checking whether t is fully initialized, we see it as char array of
499    // size sizeof(int)*5. If we check the last element as a character, we read
500    // the last byte of an integer, which will be undefined. But just because
501    // that value is undefined, it doesn't mean that the element is uninitialized!
502    // For this reason, we need to retrieve the actual last element with the
503    // correct type.
504  
505    // Divide the size argument to the cstring function by the actual element
506    // type. This value will be size of the array, or the index to the
507    // past-the-end element.
508    std::optional<NonLoc> Offset =
509        SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
510                        IdxTy)
511            .getAs<NonLoc>();
512  
513    // Retrieve the index of the last element.
514    const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs<NonLoc>();
515    SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
516  
517    if (!Offset)
518      return State;
519  
520    SVal LastElementVal =
521        State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(SuperR));
522    if (!isa<Loc>(LastElementVal))
523      return State;
524  
525    if (Filter.CheckCStringUninitializedRead &&
526        State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
527      const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
528      // If we can't get emit a sensible last element index, just bail out --
529      // prefer to emit nothing in favour of emitting garbage quality reports.
530      if (!IdxInt) {
531        C.addSink();
532        return nullptr;
533      }
534      llvm::SmallString<258> Buf;
535      llvm::raw_svector_ostream OS(Buf);
536      OS << "The last accessed element (at index ";
537      OS << IdxInt->getExtValue();
538      OS << ") in the ";
539      printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
540      OS << " argument is undefined";
541      emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
542      return nullptr;
543    }
544    return State;
545  }
546  
547  // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
CheckLocation(CheckerContext & C,ProgramStateRef state,AnyArgExpr Buffer,SVal Element,AccessKind Access,CharKind CK) const548  ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
549                                                ProgramStateRef state,
550                                                AnyArgExpr Buffer, SVal Element,
551                                                AccessKind Access,
552                                                CharKind CK) const {
553  
554    // If a previous check has failed, propagate the failure.
555    if (!state)
556      return nullptr;
557  
558    // Check for out of bound array element access.
559    const MemRegion *R = Element.getAsRegion();
560    if (!R)
561      return state;
562  
563    const auto *ER = dyn_cast<ElementRegion>(R);
564    if (!ER)
565      return state;
566  
567    // Get the index of the accessed element.
568    std::optional<NonLoc> Idx = getIndex(state, ER, CK);
569    if (!Idx)
570      return state;
571  
572    // Get the size of the array.
573    const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
574    DefinedOrUnknownSVal Size =
575        getDynamicExtent(state, superReg, C.getSValBuilder());
576  
577    auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
578    if (StOutBound && !StInBound) {
579      // These checks are either enabled by the CString out-of-bounds checker
580      // explicitly or implicitly by the Malloc checker.
581      // In the latter case we only do modeling but do not emit warning.
582      if (!Filter.CheckCStringOutOfBounds)
583        return nullptr;
584  
585      // Emit a bug report.
586      ErrorMessage Message =
587          createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
588      emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
589      return nullptr;
590    }
591  
592    // Array bound check succeeded.  From this point forward the array bound
593    // should always succeed.
594    return StInBound;
595  }
596  
597  ProgramStateRef
CheckBufferAccess(CheckerContext & C,ProgramStateRef State,AnyArgExpr Buffer,SizeArgExpr Size,AccessKind Access,CharKind CK) const598  CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
599                                    AnyArgExpr Buffer, SizeArgExpr Size,
600                                    AccessKind Access, CharKind CK) const {
601    // If a previous check has failed, propagate the failure.
602    if (!State)
603      return nullptr;
604  
605    SValBuilder &svalBuilder = C.getSValBuilder();
606    ASTContext &Ctx = svalBuilder.getContext();
607  
608    QualType SizeTy = Size.Expression->getType();
609    QualType PtrTy = getCharPtrType(Ctx, CK);
610  
611    // Check that the first buffer is non-null.
612    SVal BufVal = C.getSVal(Buffer.Expression);
613    State = checkNonNull(C, State, Buffer, BufVal);
614    if (!State)
615      return nullptr;
616  
617    // If out-of-bounds checking is turned off, skip the rest.
618    if (!Filter.CheckCStringOutOfBounds)
619      return State;
620  
621    SVal BufStart =
622        svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());
623  
624    // Check if the first byte of the buffer is accessible.
625    State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
626  
627    if (!State)
628      return nullptr;
629  
630    // Get the access length and make sure it is known.
631    // FIXME: This assumes the caller has already checked that the access length
632    // is positive. And that it's unsigned.
633    SVal LengthVal = C.getSVal(Size.Expression);
634    std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
635    if (!Length)
636      return State;
637  
638    // Compute the offset of the last element to be accessed: size-1.
639    NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();
640    SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);
641    if (Offset.isUnknown())
642      return nullptr;
643    NonLoc LastOffset = Offset.castAs<NonLoc>();
644  
645    // Check that the first buffer is sufficiently long.
646    if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
647  
648      SVal BufEnd =
649          svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
650      State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
651      if (Access == AccessKind::read)
652        State = checkInit(C, State, Buffer, BufEnd, *Length);
653  
654      // If the buffer isn't large enough, abort.
655      if (!State)
656        return nullptr;
657    }
658  
659    // Large enough or not, return this state!
660    return State;
661  }
662  
CheckOverlap(CheckerContext & C,ProgramStateRef state,SizeArgExpr Size,AnyArgExpr First,AnyArgExpr Second,CharKind CK) const663  ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
664                                               ProgramStateRef state,
665                                               SizeArgExpr Size, AnyArgExpr First,
666                                               AnyArgExpr Second,
667                                               CharKind CK) const {
668    if (!Filter.CheckCStringBufferOverlap)
669      return state;
670  
671    // Do a simple check for overlap: if the two arguments are from the same
672    // buffer, see if the end of the first is greater than the start of the second
673    // or vice versa.
674  
675    // If a previous check has failed, propagate the failure.
676    if (!state)
677      return nullptr;
678  
679    ProgramStateRef stateTrue, stateFalse;
680  
681    // Assume different address spaces cannot overlap.
682    if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
683        Second.Expression->getType()->getPointeeType().getAddressSpace())
684      return state;
685  
686    // Get the buffer values and make sure they're known locations.
687    const LocationContext *LCtx = C.getLocationContext();
688    SVal firstVal = state->getSVal(First.Expression, LCtx);
689    SVal secondVal = state->getSVal(Second.Expression, LCtx);
690  
691    std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
692    if (!firstLoc)
693      return state;
694  
695    std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
696    if (!secondLoc)
697      return state;
698  
699    // Are the two values the same?
700    SValBuilder &svalBuilder = C.getSValBuilder();
701    std::tie(stateTrue, stateFalse) =
702        state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
703  
704    if (stateTrue && !stateFalse) {
705      // If the values are known to be equal, that's automatically an overlap.
706      emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
707      return nullptr;
708    }
709  
710    // assume the two expressions are not equal.
711    assert(stateFalse);
712    state = stateFalse;
713  
714    // Which value comes first?
715    QualType cmpTy = svalBuilder.getConditionType();
716    SVal reverse =
717        svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);
718    std::optional<DefinedOrUnknownSVal> reverseTest =
719        reverse.getAs<DefinedOrUnknownSVal>();
720    if (!reverseTest)
721      return state;
722  
723    std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
724    if (stateTrue) {
725      if (stateFalse) {
726        // If we don't know which one comes first, we can't perform this test.
727        return state;
728      } else {
729        // Switch the values so that firstVal is before secondVal.
730        std::swap(firstLoc, secondLoc);
731  
732        // Switch the Exprs as well, so that they still correspond.
733        std::swap(First, Second);
734      }
735    }
736  
737    // Get the length, and make sure it too is known.
738    SVal LengthVal = state->getSVal(Size.Expression, LCtx);
739    std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
740    if (!Length)
741      return state;
742  
743    // Convert the first buffer's start address to char*.
744    // Bail out if the cast fails.
745    ASTContext &Ctx = svalBuilder.getContext();
746    QualType CharPtrTy = getCharPtrType(Ctx, CK);
747    SVal FirstStart =
748        svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());
749    std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
750    if (!FirstStartLoc)
751      return state;
752  
753    // Compute the end of the first buffer. Bail out if THAT fails.
754    SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,
755                                            *Length, CharPtrTy);
756    std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
757    if (!FirstEndLoc)
758      return state;
759  
760    // Is the end of the first buffer past the start of the second buffer?
761    SVal Overlap =
762        svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);
763    std::optional<DefinedOrUnknownSVal> OverlapTest =
764        Overlap.getAs<DefinedOrUnknownSVal>();
765    if (!OverlapTest)
766      return state;
767  
768    std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
769  
770    if (stateTrue && !stateFalse) {
771      // Overlap!
772      emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
773      return nullptr;
774    }
775  
776    // assume the two expressions don't overlap.
777    assert(stateFalse);
778    return stateFalse;
779  }
780  
emitOverlapBug(CheckerContext & C,ProgramStateRef state,const Stmt * First,const Stmt * Second) const781  void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
782                                    const Stmt *First, const Stmt *Second) const {
783    ExplodedNode *N = C.generateErrorNode(state);
784    if (!N)
785      return;
786  
787    if (!BT_Overlap)
788      BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
789                                   categories::UnixAPI, "Improper arguments"));
790  
791    // Generate a report for this bug.
792    auto report = std::make_unique<PathSensitiveBugReport>(
793        *BT_Overlap, "Arguments must not be overlapping buffers", N);
794    report->addRange(First->getSourceRange());
795    report->addRange(Second->getSourceRange());
796  
797    C.emitReport(std::move(report));
798  }
799  
emitNullArgBug(CheckerContext & C,ProgramStateRef State,const Stmt * S,StringRef WarningMsg) const800  void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
801                                      const Stmt *S, StringRef WarningMsg) const {
802    if (ExplodedNode *N = C.generateErrorNode(State)) {
803      if (!BT_Null) {
804        // FIXME: This call uses the string constant 'categories::UnixAPI' as the
805        // description of the bug; it should be replaced by a real description.
806        BT_Null.reset(
807            new BugType(Filter.CheckNameCStringNullArg, categories::UnixAPI));
808      }
809  
810      auto Report =
811          std::make_unique<PathSensitiveBugReport>(*BT_Null, WarningMsg, N);
812      Report->addRange(S->getSourceRange());
813      if (const auto *Ex = dyn_cast<Expr>(S))
814        bugreporter::trackExpressionValue(N, Ex, *Report);
815      C.emitReport(std::move(Report));
816    }
817  }
818  
emitUninitializedReadBug(CheckerContext & C,ProgramStateRef State,const Expr * E,StringRef Msg) const819  void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
820                                                ProgramStateRef State,
821                                                const Expr *E,
822                                                StringRef Msg) const {
823    if (ExplodedNode *N = C.generateErrorNode(State)) {
824      if (!BT_UninitRead)
825        BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead,
826                                        "Accessing unitialized/garbage values"));
827  
828      auto Report =
829          std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N);
830      Report->addNote("Other elements might also be undefined",
831                      Report->getLocation());
832      Report->addRange(E->getSourceRange());
833      bugreporter::trackExpressionValue(N, E, *Report);
834      C.emitReport(std::move(Report));
835    }
836  }
837  
emitOutOfBoundsBug(CheckerContext & C,ProgramStateRef State,const Stmt * S,StringRef WarningMsg) const838  void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
839                                          ProgramStateRef State, const Stmt *S,
840                                          StringRef WarningMsg) const {
841    if (ExplodedNode *N = C.generateErrorNode(State)) {
842      if (!BT_Bounds)
843        BT_Bounds.reset(new BugType(Filter.CheckCStringOutOfBounds
844                                        ? Filter.CheckNameCStringOutOfBounds
845                                        : Filter.CheckNameCStringNullArg,
846                                    "Out-of-bound array access"));
847  
848      // FIXME: It would be nice to eventually make this diagnostic more clear,
849      // e.g., by referencing the original declaration or by saying *why* this
850      // reference is outside the range.
851      auto Report =
852          std::make_unique<PathSensitiveBugReport>(*BT_Bounds, WarningMsg, N);
853      Report->addRange(S->getSourceRange());
854      C.emitReport(std::move(Report));
855    }
856  }
857  
emitNotCStringBug(CheckerContext & C,ProgramStateRef State,const Stmt * S,StringRef WarningMsg) const858  void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
859                                         const Stmt *S,
860                                         StringRef WarningMsg) const {
861    if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
862      if (!BT_NotCString) {
863        // FIXME: This call uses the string constant 'categories::UnixAPI' as the
864        // description of the bug; it should be replaced by a real description.
865        BT_NotCString.reset(
866            new BugType(Filter.CheckNameCStringNotNullTerm, categories::UnixAPI));
867      }
868  
869      auto Report =
870          std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N);
871  
872      Report->addRange(S->getSourceRange());
873      C.emitReport(std::move(Report));
874    }
875  }
876  
emitAdditionOverflowBug(CheckerContext & C,ProgramStateRef State) const877  void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
878                                               ProgramStateRef State) const {
879    if (ExplodedNode *N = C.generateErrorNode(State)) {
880      if (!BT_AdditionOverflow) {
881        // FIXME: This call uses the word "API" as the description of the bug;
882        // it should be replaced by a better error message (if this unlikely
883        // situation continues to exist as a separate bug type).
884        BT_AdditionOverflow.reset(
885            new BugType(Filter.CheckNameCStringOutOfBounds, "API"));
886      }
887  
888      // This isn't a great error message, but this should never occur in real
889      // code anyway -- you'd have to create a buffer longer than a size_t can
890      // represent, which is sort of a contradiction.
891      const char *WarningMsg =
892          "This expression will create a string whose length is too big to "
893          "be represented as a size_t";
894  
895      auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow,
896                                                             WarningMsg, N);
897      C.emitReport(std::move(Report));
898    }
899  }
900  
checkAdditionOverflow(CheckerContext & C,ProgramStateRef state,NonLoc left,NonLoc right) const901  ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
902                                                       ProgramStateRef state,
903                                                       NonLoc left,
904                                                       NonLoc right) const {
905    // If out-of-bounds checking is turned off, skip the rest.
906    if (!Filter.CheckCStringOutOfBounds)
907      return state;
908  
909    // If a previous check has failed, propagate the failure.
910    if (!state)
911      return nullptr;
912  
913    SValBuilder &svalBuilder = C.getSValBuilder();
914    BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
915  
916    QualType sizeTy = svalBuilder.getContext().getSizeType();
917    const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
918    NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
919  
920    SVal maxMinusRight;
921    if (isa<nonloc::ConcreteInt>(right)) {
922      maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
923                                                   sizeTy);
924    } else {
925      // Try switching the operands. (The order of these two assignments is
926      // important!)
927      maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
928                                              sizeTy);
929      left = right;
930    }
931  
932    if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
933      QualType cmpTy = svalBuilder.getConditionType();
934      // If left > max - right, we have an overflow.
935      SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
936                                                  *maxMinusRightNL, cmpTy);
937  
938      ProgramStateRef stateOverflow, stateOkay;
939      std::tie(stateOverflow, stateOkay) =
940        state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
941  
942      if (stateOverflow && !stateOkay) {
943        // We have an overflow. Emit a bug report.
944        emitAdditionOverflowBug(C, stateOverflow);
945        return nullptr;
946      }
947  
948      // From now on, assume an overflow didn't occur.
949      assert(stateOkay);
950      state = stateOkay;
951    }
952  
953    return state;
954  }
955  
setCStringLength(ProgramStateRef state,const MemRegion * MR,SVal strLength)956  ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
957                                                  const MemRegion *MR,
958                                                  SVal strLength) {
959    assert(!strLength.isUndef() && "Attempt to set an undefined string length");
960  
961    MR = MR->StripCasts();
962  
963    switch (MR->getKind()) {
964    case MemRegion::StringRegionKind:
965      // FIXME: This can happen if we strcpy() into a string region. This is
966      // undefined [C99 6.4.5p6], but we should still warn about it.
967      return state;
968  
969    case MemRegion::SymbolicRegionKind:
970    case MemRegion::AllocaRegionKind:
971    case MemRegion::NonParamVarRegionKind:
972    case MemRegion::ParamVarRegionKind:
973    case MemRegion::FieldRegionKind:
974    case MemRegion::ObjCIvarRegionKind:
975      // These are the types we can currently track string lengths for.
976      break;
977  
978    case MemRegion::ElementRegionKind:
979      // FIXME: Handle element regions by upper-bounding the parent region's
980      // string length.
981      return state;
982  
983    default:
984      // Other regions (mostly non-data) can't have a reliable C string length.
985      // For now, just ignore the change.
986      // FIXME: These are rare but not impossible. We should output some kind of
987      // warning for things like strcpy((char[]){'a', 0}, "b");
988      return state;
989    }
990  
991    if (strLength.isUnknown())
992      return state->remove<CStringLength>(MR);
993  
994    return state->set<CStringLength>(MR, strLength);
995  }
996  
getCStringLengthForRegion(CheckerContext & C,ProgramStateRef & state,const Expr * Ex,const MemRegion * MR,bool hypothetical)997  SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
998                                                 ProgramStateRef &state,
999                                                 const Expr *Ex,
1000                                                 const MemRegion *MR,
1001                                                 bool hypothetical) {
1002    if (!hypothetical) {
1003      // If there's a recorded length, go ahead and return it.
1004      const SVal *Recorded = state->get<CStringLength>(MR);
1005      if (Recorded)
1006        return *Recorded;
1007    }
1008  
1009    // Otherwise, get a new symbol and update the state.
1010    SValBuilder &svalBuilder = C.getSValBuilder();
1011    QualType sizeTy = svalBuilder.getContext().getSizeType();
1012    SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
1013                                                      MR, Ex, sizeTy,
1014                                                      C.getLocationContext(),
1015                                                      C.blockCount());
1016  
1017    if (!hypothetical) {
1018      if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
1019        // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
1020        BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
1021        const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
1022        llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
1023        const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
1024                                                          fourInt);
1025        NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
1026        SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, maxLength,
1027                                                  svalBuilder.getConditionType());
1028        state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
1029      }
1030      state = state->set<CStringLength>(MR, strLength);
1031    }
1032  
1033    return strLength;
1034  }
1035  
getCStringLength(CheckerContext & C,ProgramStateRef & state,const Expr * Ex,SVal Buf,bool hypothetical) const1036  SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
1037                                        const Expr *Ex, SVal Buf,
1038                                        bool hypothetical) const {
1039    const MemRegion *MR = Buf.getAsRegion();
1040    if (!MR) {
1041      // If we can't get a region, see if it's something we /know/ isn't a
1042      // C string. In the context of locations, the only time we can issue such
1043      // a warning is for labels.
1044      if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
1045        if (Filter.CheckCStringNotNullTerm) {
1046          SmallString<120> buf;
1047          llvm::raw_svector_ostream os(buf);
1048          assert(CurrentFunctionDescription);
1049          os << "Argument to " << CurrentFunctionDescription
1050             << " is the address of the label '" << Label->getLabel()->getName()
1051             << "', which is not a null-terminated string";
1052  
1053          emitNotCStringBug(C, state, Ex, os.str());
1054        }
1055        return UndefinedVal();
1056      }
1057  
1058      // If it's not a region and not a label, give up.
1059      return UnknownVal();
1060    }
1061  
1062    // If we have a region, strip casts from it and see if we can figure out
1063    // its length. For anything we can't figure out, just return UnknownVal.
1064    MR = MR->StripCasts();
1065  
1066    switch (MR->getKind()) {
1067    case MemRegion::StringRegionKind: {
1068      // Modifying the contents of string regions is undefined [C99 6.4.5p6],
1069      // so we can assume that the byte length is the correct C string length.
1070      SValBuilder &svalBuilder = C.getSValBuilder();
1071      QualType sizeTy = svalBuilder.getContext().getSizeType();
1072      const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
1073      return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);
1074    }
1075    case MemRegion::NonParamVarRegionKind: {
1076      // If we have a global constant with a string literal initializer,
1077      // compute the initializer's length.
1078      const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
1079      if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) {
1080        if (const Expr *Init = Decl->getInit()) {
1081          if (auto *StrLit = dyn_cast<StringLiteral>(Init)) {
1082            SValBuilder &SvalBuilder = C.getSValBuilder();
1083            QualType SizeTy = SvalBuilder.getContext().getSizeType();
1084            return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy);
1085          }
1086        }
1087      }
1088      [[fallthrough]];
1089    }
1090    case MemRegion::SymbolicRegionKind:
1091    case MemRegion::AllocaRegionKind:
1092    case MemRegion::ParamVarRegionKind:
1093    case MemRegion::FieldRegionKind:
1094    case MemRegion::ObjCIvarRegionKind:
1095      return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
1096    case MemRegion::CompoundLiteralRegionKind:
1097      // FIXME: Can we track this? Is it necessary?
1098      return UnknownVal();
1099    case MemRegion::ElementRegionKind:
1100      // FIXME: How can we handle this? It's not good enough to subtract the
1101      // offset from the base string length; consider "123\x00567" and &a[5].
1102      return UnknownVal();
1103    default:
1104      // Other regions (mostly non-data) can't have a reliable C string length.
1105      // In this case, an error is emitted and UndefinedVal is returned.
1106      // The caller should always be prepared to handle this case.
1107      if (Filter.CheckCStringNotNullTerm) {
1108        SmallString<120> buf;
1109        llvm::raw_svector_ostream os(buf);
1110  
1111        assert(CurrentFunctionDescription);
1112        os << "Argument to " << CurrentFunctionDescription << " is ";
1113  
1114        if (SummarizeRegion(os, C.getASTContext(), MR))
1115          os << ", which is not a null-terminated string";
1116        else
1117          os << "not a null-terminated string";
1118  
1119        emitNotCStringBug(C, state, Ex, os.str());
1120      }
1121      return UndefinedVal();
1122    }
1123  }
1124  
getCStringLiteral(CheckerContext & C,ProgramStateRef & state,const Expr * expr,SVal val) const1125  const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
1126    ProgramStateRef &state, const Expr *expr, SVal val) const {
1127  
1128    // Get the memory region pointed to by the val.
1129    const MemRegion *bufRegion = val.getAsRegion();
1130    if (!bufRegion)
1131      return nullptr;
1132  
1133    // Strip casts off the memory region.
1134    bufRegion = bufRegion->StripCasts();
1135  
1136    // Cast the memory region to a string region.
1137    const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
1138    if (!strRegion)
1139      return nullptr;
1140  
1141    // Return the actual string in the string region.
1142    return strRegion->getStringLiteral();
1143  }
1144  
isFirstBufInBound(CheckerContext & C,ProgramStateRef State,SVal BufVal,QualType BufTy,SVal LengthVal,QualType LengthTy)1145  bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
1146                                         SVal BufVal, QualType BufTy,
1147                                         SVal LengthVal, QualType LengthTy) {
1148    // If we do not know that the buffer is long enough we return 'true'.
1149    // Otherwise the parent region of this field region would also get
1150    // invalidated, which would lead to warnings based on an unknown state.
1151  
1152    if (LengthVal.isUnknown())
1153      return false;
1154  
1155    // Originally copied from CheckBufferAccess and CheckLocation.
1156    SValBuilder &SB = C.getSValBuilder();
1157    ASTContext &Ctx = C.getASTContext();
1158  
1159    QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
1160  
1161    std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
1162    if (!Length)
1163      return true; // cf top comment.
1164  
1165    // Compute the offset of the last element to be accessed: size-1.
1166    NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
1167    SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
1168    if (Offset.isUnknown())
1169      return true; // cf top comment
1170    NonLoc LastOffset = Offset.castAs<NonLoc>();
1171  
1172    // Check that the first buffer is sufficiently long.
1173    SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
1174    std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
1175    if (!BufLoc)
1176      return true; // cf top comment.
1177  
1178    SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
1179  
1180    // Check for out of bound array element access.
1181    const MemRegion *R = BufEnd.getAsRegion();
1182    if (!R)
1183      return true; // cf top comment.
1184  
1185    const ElementRegion *ER = dyn_cast<ElementRegion>(R);
1186    if (!ER)
1187      return true; // cf top comment.
1188  
1189    // FIXME: Does this crash when a non-standard definition
1190    // of a library function is encountered?
1191    assert(ER->getValueType() == C.getASTContext().CharTy &&
1192           "isFirstBufInBound should only be called with char* ElementRegions");
1193  
1194    // Get the size of the array.
1195    const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1196    DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
1197  
1198    // Get the index of the accessed element.
1199    DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1200  
1201    ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
1202  
1203    return static_cast<bool>(StInBound);
1204  }
1205  
invalidateDestinationBufferBySize(CheckerContext & C,ProgramStateRef S,const Expr * BufE,SVal BufV,SVal SizeV,QualType SizeTy)1206  ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1207      CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV,
1208      SVal SizeV, QualType SizeTy) {
1209    auto InvalidationTraitOperations =
1210        [&C, S, BufTy = BufE->getType(), BufV, SizeV,
1211         SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1212          // If destination buffer is a field region and access is in bound, do
1213          // not invalidate its super region.
1214          if (MemRegion::FieldRegionKind == R->getKind() &&
1215              isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
1216            ITraits.setTrait(
1217                R,
1218                RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1219          }
1220          return false;
1221        };
1222  
1223    return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1224  }
1225  
1226  ProgramStateRef
invalidateDestinationBufferAlwaysEscapeSuperRegion(CheckerContext & C,ProgramStateRef S,const Expr * BufE,SVal BufV)1227  CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1228      CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1229    auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1230                                          const MemRegion *R) {
1231      return isa<FieldRegion>(R);
1232    };
1233  
1234    return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1235  }
1236  
invalidateDestinationBufferNeverOverflows(CheckerContext & C,ProgramStateRef S,const Expr * BufE,SVal BufV)1237  ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1238      CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1239    auto InvalidationTraitOperations =
1240        [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1241          if (MemRegion::FieldRegionKind == R->getKind())
1242            ITraits.setTrait(
1243                R,
1244                RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1245          return false;
1246        };
1247  
1248    return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1249  }
1250  
invalidateSourceBuffer(CheckerContext & C,ProgramStateRef S,const Expr * BufE,SVal BufV)1251  ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1252                                                         ProgramStateRef S,
1253                                                         const Expr *BufE,
1254                                                         SVal BufV) {
1255    auto InvalidationTraitOperations =
1256        [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1257          ITraits.setTrait(
1258              R->getBaseRegion(),
1259              RegionAndSymbolInvalidationTraits::TK_PreserveContents);
1260          ITraits.setTrait(R,
1261                           RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
1262          return true;
1263        };
1264  
1265    return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1266  }
1267  
invalidateBufferAux(CheckerContext & C,ProgramStateRef State,const Expr * E,SVal V,llvm::function_ref<bool (RegionAndSymbolInvalidationTraits &,const MemRegion *)> InvalidationTraitOperations)1268  ProgramStateRef CStringChecker::invalidateBufferAux(
1269      CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V,
1270      llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1271                              const MemRegion *)>
1272          InvalidationTraitOperations) {
1273    std::optional<Loc> L = V.getAs<Loc>();
1274    if (!L)
1275      return State;
1276  
1277    // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1278    // some assumptions about the value that CFRefCount can't. Even so, it should
1279    // probably be refactored.
1280    if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1281      const MemRegion *R = MR->getRegion()->StripCasts();
1282  
1283      // Are we dealing with an ElementRegion?  If so, we should be invalidating
1284      // the super-region.
1285      if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
1286        R = ER->getSuperRegion();
1287        // FIXME: What about layers of ElementRegions?
1288      }
1289  
1290      // Invalidate this region.
1291      const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1292      RegionAndSymbolInvalidationTraits ITraits;
1293      bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
1294  
1295      return State->invalidateRegions(R, E, C.blockCount(), LCtx,
1296                                      CausesPointerEscape, nullptr, nullptr,
1297                                      &ITraits);
1298    }
1299  
1300    // If we have a non-region value by chance, just remove the binding.
1301    // FIXME: is this necessary or correct? This handles the non-Region
1302    //  cases.  Is it ever valid to store to these?
1303    return State->killBinding(*L);
1304  }
1305  
SummarizeRegion(raw_ostream & os,ASTContext & Ctx,const MemRegion * MR)1306  bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1307                                       const MemRegion *MR) {
1308    switch (MR->getKind()) {
1309    case MemRegion::FunctionCodeRegionKind: {
1310      if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())
1311        os << "the address of the function '" << *FD << '\'';
1312      else
1313        os << "the address of a function";
1314      return true;
1315    }
1316    case MemRegion::BlockCodeRegionKind:
1317      os << "block text";
1318      return true;
1319    case MemRegion::BlockDataRegionKind:
1320      os << "a block";
1321      return true;
1322    case MemRegion::CXXThisRegionKind:
1323    case MemRegion::CXXTempObjectRegionKind:
1324      os << "a C++ temp object of type "
1325         << cast<TypedValueRegion>(MR)->getValueType();
1326      return true;
1327    case MemRegion::NonParamVarRegionKind:
1328      os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();
1329      return true;
1330    case MemRegion::ParamVarRegionKind:
1331      os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();
1332      return true;
1333    case MemRegion::FieldRegionKind:
1334      os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();
1335      return true;
1336    case MemRegion::ObjCIvarRegionKind:
1337      os << "an instance variable of type "
1338         << cast<TypedValueRegion>(MR)->getValueType();
1339      return true;
1340    default:
1341      return false;
1342    }
1343  }
1344  
memsetAux(const Expr * DstBuffer,SVal CharVal,const Expr * Size,CheckerContext & C,ProgramStateRef & State)1345  bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
1346                                 const Expr *Size, CheckerContext &C,
1347                                 ProgramStateRef &State) {
1348    SVal MemVal = C.getSVal(DstBuffer);
1349    SVal SizeVal = C.getSVal(Size);
1350    const MemRegion *MR = MemVal.getAsRegion();
1351    if (!MR)
1352      return false;
1353  
1354    // We're about to model memset by producing a "default binding" in the Store.
1355    // Our current implementation - RegionStore - doesn't support default bindings
1356    // that don't cover the whole base region. So we should first get the offset
1357    // and the base region to figure out whether the offset of buffer is 0.
1358    RegionOffset Offset = MR->getAsOffset();
1359    const MemRegion *BR = Offset.getRegion();
1360  
1361    std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1362    if (!SizeNL)
1363      return false;
1364  
1365    SValBuilder &svalBuilder = C.getSValBuilder();
1366    ASTContext &Ctx = C.getASTContext();
1367  
1368    // void *memset(void *dest, int ch, size_t count);
1369    // For now we can only handle the case of offset is 0 and concrete char value.
1370    if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1371        Offset.getOffset() == 0) {
1372      // Get the base region's size.
1373      DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
1374  
1375      ProgramStateRef StateWholeReg, StateNotWholeReg;
1376      std::tie(StateWholeReg, StateNotWholeReg) =
1377          State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
1378  
1379      // With the semantic of 'memset()', we should convert the CharVal to
1380      // unsigned char.
1381      CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
1382  
1383      ProgramStateRef StateNullChar, StateNonNullChar;
1384      std::tie(StateNullChar, StateNonNullChar) =
1385          assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
1386  
1387      if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1388          !StateNonNullChar) {
1389        // If the 'memset()' acts on the whole region of destination buffer and
1390        // the value of the second argument of 'memset()' is zero, bind the second
1391        // argument's value to the destination buffer with 'default binding'.
1392        // FIXME: Since there is no perfect way to bind the non-zero character, we
1393        // can only deal with zero value here. In the future, we need to deal with
1394        // the binding of non-zero value in the case of whole region.
1395        State = State->bindDefaultZero(svalBuilder.makeLoc(BR),
1396                                       C.getLocationContext());
1397      } else {
1398        // If the destination buffer's extent is not equal to the value of
1399        // third argument, just invalidate buffer.
1400        State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1401                                                  SizeVal, Size->getType());
1402      }
1403  
1404      if (StateNullChar && !StateNonNullChar) {
1405        // If the value of the second argument of 'memset()' is zero, set the
1406        // string length of destination buffer to 0 directly.
1407        State = setCStringLength(State, MR,
1408                                 svalBuilder.makeZeroVal(Ctx.getSizeType()));
1409      } else if (!StateNullChar && StateNonNullChar) {
1410        SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1411            CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
1412            C.getLocationContext(), C.blockCount());
1413  
1414        // If the value of second argument is not zero, then the string length
1415        // is at least the size argument.
1416        SVal NewStrLenGESize = svalBuilder.evalBinOp(
1417            State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
1418  
1419        State = setCStringLength(
1420            State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
1421            MR, NewStrLen);
1422      }
1423    } else {
1424      // If the offset is not zero and char value is not concrete, we can do
1425      // nothing but invalidate the buffer.
1426      State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1427                                                SizeVal, Size->getType());
1428    }
1429    return true;
1430  }
1431  
1432  //===----------------------------------------------------------------------===//
1433  // evaluation of individual function calls.
1434  //===----------------------------------------------------------------------===//
1435  
evalCopyCommon(CheckerContext & C,const CallEvent & Call,ProgramStateRef state,SizeArgExpr Size,DestinationArgExpr Dest,SourceArgExpr Source,bool Restricted,bool IsMempcpy,CharKind CK) const1436  void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,
1437                                      ProgramStateRef state, SizeArgExpr Size,
1438                                      DestinationArgExpr Dest,
1439                                      SourceArgExpr Source, bool Restricted,
1440                                      bool IsMempcpy, CharKind CK) const {
1441    CurrentFunctionDescription = "memory copy function";
1442  
1443    // See if the size argument is zero.
1444    const LocationContext *LCtx = C.getLocationContext();
1445    SVal sizeVal = state->getSVal(Size.Expression, LCtx);
1446    QualType sizeTy = Size.Expression->getType();
1447  
1448    ProgramStateRef stateZeroSize, stateNonZeroSize;
1449    std::tie(stateZeroSize, stateNonZeroSize) =
1450        assumeZero(C, state, sizeVal, sizeTy);
1451  
1452    // Get the value of the Dest.
1453    SVal destVal = state->getSVal(Dest.Expression, LCtx);
1454  
1455    // If the size is zero, there won't be any actual memory access, so
1456    // just bind the return value to the destination buffer and return.
1457    if (stateZeroSize && !stateNonZeroSize) {
1458      stateZeroSize =
1459          stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1460      C.addTransition(stateZeroSize);
1461      return;
1462    }
1463  
1464    // If the size can be nonzero, we have to check the other arguments.
1465    if (stateNonZeroSize) {
1466      // TODO: If Size is tainted and we cannot prove that it is smaller or equal
1467      // to the size of the destination buffer, then emit a warning
1468      // that an attacker may provoke a buffer overflow error.
1469      state = stateNonZeroSize;
1470  
1471      // Ensure the destination is not null. If it is NULL there will be a
1472      // NULL pointer dereference.
1473      state = checkNonNull(C, state, Dest, destVal);
1474      if (!state)
1475        return;
1476  
1477      // Get the value of the Src.
1478      SVal srcVal = state->getSVal(Source.Expression, LCtx);
1479  
1480      // Ensure the source is not null. If it is NULL there will be a
1481      // NULL pointer dereference.
1482      state = checkNonNull(C, state, Source, srcVal);
1483      if (!state)
1484        return;
1485  
1486      // Ensure the accesses are valid and that the buffers do not overlap.
1487      state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);
1488      state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);
1489  
1490      if (Restricted)
1491        state = CheckOverlap(C, state, Size, Dest, Source, CK);
1492  
1493      if (!state)
1494        return;
1495  
1496      // If this is mempcpy, get the byte after the last byte copied and
1497      // bind the expr.
1498      if (IsMempcpy) {
1499        // Get the byte after the last byte copied.
1500        SValBuilder &SvalBuilder = C.getSValBuilder();
1501        ASTContext &Ctx = SvalBuilder.getContext();
1502        QualType CharPtrTy = getCharPtrType(Ctx, CK);
1503        SVal DestRegCharVal =
1504            SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());
1505        SVal lastElement = C.getSValBuilder().evalBinOp(
1506            state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());
1507        // If we don't know how much we copied, we can at least
1508        // conjure a return value for later.
1509        if (lastElement.isUnknown())
1510          lastElement = C.getSValBuilder().conjureSymbolVal(
1511              nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
1512  
1513        // The byte after the last byte copied is the return value.
1514        state = state->BindExpr(Call.getOriginExpr(), LCtx, lastElement);
1515      } else {
1516        // All other copies return the destination buffer.
1517        // (Well, bcopy() has a void return type, but this won't hurt.)
1518        state = state->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1519      }
1520  
1521      // Invalidate the destination (regular invalidation without pointer-escaping
1522      // the address of the top-level region).
1523      // FIXME: Even if we can't perfectly model the copy, we should see if we
1524      // can use LazyCompoundVals to copy the source values into the destination.
1525      // This would probably remove any existing bindings past the end of the
1526      // copied region, but that's still an improvement over blank invalidation.
1527      state = invalidateDestinationBufferBySize(
1528          C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal,
1529          Size.Expression->getType());
1530  
1531      // Invalidate the source (const-invalidation without const-pointer-escaping
1532      // the address of the top-level region).
1533      state = invalidateSourceBuffer(C, state, Source.Expression,
1534                                     C.getSVal(Source.Expression));
1535  
1536      C.addTransition(state);
1537    }
1538  }
1539  
evalMemcpy(CheckerContext & C,const CallEvent & Call,CharKind CK) const1540  void CStringChecker::evalMemcpy(CheckerContext &C, const CallEvent &Call,
1541                                  CharKind CK) const {
1542    // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1543    // The return value is the address of the destination buffer.
1544    DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1545    SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1546    SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1547  
1548    ProgramStateRef State = C.getState();
1549  
1550    constexpr bool IsRestricted = true;
1551    constexpr bool IsMempcpy = false;
1552    evalCopyCommon(C, Call, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);
1553  }
1554  
evalMempcpy(CheckerContext & C,const CallEvent & Call,CharKind CK) const1555  void CStringChecker::evalMempcpy(CheckerContext &C, const CallEvent &Call,
1556                                   CharKind CK) const {
1557    // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1558    // The return value is a pointer to the byte following the last written byte.
1559    DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1560    SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1561    SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1562  
1563    constexpr bool IsRestricted = true;
1564    constexpr bool IsMempcpy = true;
1565    evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1566                   IsMempcpy, CK);
1567  }
1568  
evalMemmove(CheckerContext & C,const CallEvent & Call,CharKind CK) const1569  void CStringChecker::evalMemmove(CheckerContext &C, const CallEvent &Call,
1570                                   CharKind CK) const {
1571    // void *memmove(void *dst, const void *src, size_t n);
1572    // The return value is the address of the destination buffer.
1573    DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1574    SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1575    SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1576  
1577    constexpr bool IsRestricted = false;
1578    constexpr bool IsMempcpy = false;
1579    evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1580                   IsMempcpy, CK);
1581  }
1582  
evalBcopy(CheckerContext & C,const CallEvent & Call) const1583  void CStringChecker::evalBcopy(CheckerContext &C, const CallEvent &Call) const {
1584    // void bcopy(const void *src, void *dst, size_t n);
1585    SourceArgExpr Src{{Call.getArgExpr(0), 0}};
1586    DestinationArgExpr Dest = {{Call.getArgExpr(1), 1}};
1587    SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1588  
1589    constexpr bool IsRestricted = false;
1590    constexpr bool IsMempcpy = false;
1591    evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1592                   IsMempcpy, CharKind::Regular);
1593  }
1594  
evalMemcmp(CheckerContext & C,const CallEvent & Call,CharKind CK) const1595  void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call,
1596                                  CharKind CK) const {
1597    // int memcmp(const void *s1, const void *s2, size_t n);
1598    CurrentFunctionDescription = "memory comparison function";
1599  
1600    AnyArgExpr Left = {Call.getArgExpr(0), 0};
1601    AnyArgExpr Right = {Call.getArgExpr(1), 1};
1602    SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1603  
1604    ProgramStateRef State = C.getState();
1605    SValBuilder &Builder = C.getSValBuilder();
1606    const LocationContext *LCtx = C.getLocationContext();
1607  
1608    // See if the size argument is zero.
1609    SVal sizeVal = State->getSVal(Size.Expression, LCtx);
1610    QualType sizeTy = Size.Expression->getType();
1611  
1612    ProgramStateRef stateZeroSize, stateNonZeroSize;
1613    std::tie(stateZeroSize, stateNonZeroSize) =
1614        assumeZero(C, State, sizeVal, sizeTy);
1615  
1616    // If the size can be zero, the result will be 0 in that case, and we don't
1617    // have to check either of the buffers.
1618    if (stateZeroSize) {
1619      State = stateZeroSize;
1620      State = State->BindExpr(Call.getOriginExpr(), LCtx,
1621                              Builder.makeZeroVal(Call.getResultType()));
1622      C.addTransition(State);
1623    }
1624  
1625    // If the size can be nonzero, we have to check the other arguments.
1626    if (stateNonZeroSize) {
1627      State = stateNonZeroSize;
1628      // If we know the two buffers are the same, we know the result is 0.
1629      // First, get the two buffers' addresses. Another checker will have already
1630      // made sure they're not undefined.
1631      DefinedOrUnknownSVal LV =
1632          State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1633      DefinedOrUnknownSVal RV =
1634          State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1635  
1636      // See if they are the same.
1637      ProgramStateRef SameBuffer, NotSameBuffer;
1638      std::tie(SameBuffer, NotSameBuffer) =
1639          State->assume(Builder.evalEQ(State, LV, RV));
1640  
1641      // If the two arguments are the same buffer, we know the result is 0,
1642      // and we only need to check one size.
1643      if (SameBuffer && !NotSameBuffer) {
1644        State = SameBuffer;
1645        State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);
1646        if (State) {
1647          State = SameBuffer->BindExpr(Call.getOriginExpr(), LCtx,
1648                                       Builder.makeZeroVal(Call.getResultType()));
1649          C.addTransition(State);
1650        }
1651        return;
1652      }
1653  
1654      // If the two arguments might be different buffers, we have to check
1655      // the size of both of them.
1656      assert(NotSameBuffer);
1657      State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);
1658      State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);
1659      if (State) {
1660        // The return value is the comparison result, which we don't know.
1661        SVal CmpV = Builder.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,
1662                                             C.blockCount());
1663        State = State->BindExpr(Call.getOriginExpr(), LCtx, CmpV);
1664        C.addTransition(State);
1665      }
1666    }
1667  }
1668  
evalstrLength(CheckerContext & C,const CallEvent & Call) const1669  void CStringChecker::evalstrLength(CheckerContext &C,
1670                                     const CallEvent &Call) const {
1671    // size_t strlen(const char *s);
1672    evalstrLengthCommon(C, Call, /* IsStrnlen = */ false);
1673  }
1674  
evalstrnLength(CheckerContext & C,const CallEvent & Call) const1675  void CStringChecker::evalstrnLength(CheckerContext &C,
1676                                      const CallEvent &Call) const {
1677    // size_t strnlen(const char *s, size_t maxlen);
1678    evalstrLengthCommon(C, Call, /* IsStrnlen = */ true);
1679  }
1680  
evalstrLengthCommon(CheckerContext & C,const CallEvent & Call,bool IsStrnlen) const1681  void CStringChecker::evalstrLengthCommon(CheckerContext &C,
1682                                           const CallEvent &Call,
1683                                           bool IsStrnlen) const {
1684    CurrentFunctionDescription = "string length function";
1685    ProgramStateRef state = C.getState();
1686    const LocationContext *LCtx = C.getLocationContext();
1687  
1688    if (IsStrnlen) {
1689      const Expr *maxlenExpr = Call.getArgExpr(1);
1690      SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1691  
1692      ProgramStateRef stateZeroSize, stateNonZeroSize;
1693      std::tie(stateZeroSize, stateNonZeroSize) =
1694        assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1695  
1696      // If the size can be zero, the result will be 0 in that case, and we don't
1697      // have to check the string itself.
1698      if (stateZeroSize) {
1699        SVal zero = C.getSValBuilder().makeZeroVal(Call.getResultType());
1700        stateZeroSize = stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, zero);
1701        C.addTransition(stateZeroSize);
1702      }
1703  
1704      // If the size is GUARANTEED to be zero, we're done!
1705      if (!stateNonZeroSize)
1706        return;
1707  
1708      // Otherwise, record the assumption that the size is nonzero.
1709      state = stateNonZeroSize;
1710    }
1711  
1712    // Check that the string argument is non-null.
1713    AnyArgExpr Arg = {Call.getArgExpr(0), 0};
1714    SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
1715    state = checkNonNull(C, state, Arg, ArgVal);
1716  
1717    if (!state)
1718      return;
1719  
1720    SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
1721  
1722    // If the argument isn't a valid C string, there's no valid state to
1723    // transition to.
1724    if (strLength.isUndef())
1725      return;
1726  
1727    DefinedOrUnknownSVal result = UnknownVal();
1728  
1729    // If the check is for strnlen() then bind the return value to no more than
1730    // the maxlen value.
1731    if (IsStrnlen) {
1732      QualType cmpTy = C.getSValBuilder().getConditionType();
1733  
1734      // It's a little unfortunate to be getting this again,
1735      // but it's not that expensive...
1736      const Expr *maxlenExpr = Call.getArgExpr(1);
1737      SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1738  
1739      std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1740      std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1741  
1742      if (strLengthNL && maxlenValNL) {
1743        ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1744  
1745        // Check if the strLength is greater than the maxlen.
1746        std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1747            C.getSValBuilder()
1748                .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1749                .castAs<DefinedOrUnknownSVal>());
1750  
1751        if (stateStringTooLong && !stateStringNotTooLong) {
1752          // If the string is longer than maxlen, return maxlen.
1753          result = *maxlenValNL;
1754        } else if (stateStringNotTooLong && !stateStringTooLong) {
1755          // If the string is shorter than maxlen, return its length.
1756          result = *strLengthNL;
1757        }
1758      }
1759  
1760      if (result.isUnknown()) {
1761        // If we don't have enough information for a comparison, there's
1762        // no guarantee the full string length will actually be returned.
1763        // All we know is the return value is the min of the string length
1764        // and the limit. This is better than nothing.
1765        result = C.getSValBuilder().conjureSymbolVal(
1766            nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
1767        NonLoc resultNL = result.castAs<NonLoc>();
1768  
1769        if (strLengthNL) {
1770          state = state->assume(C.getSValBuilder().evalBinOpNN(
1771                                    state, BO_LE, resultNL, *strLengthNL, cmpTy)
1772                                    .castAs<DefinedOrUnknownSVal>(), true);
1773        }
1774  
1775        if (maxlenValNL) {
1776          state = state->assume(C.getSValBuilder().evalBinOpNN(
1777                                    state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1778                                    .castAs<DefinedOrUnknownSVal>(), true);
1779        }
1780      }
1781  
1782    } else {
1783      // This is a plain strlen(), not strnlen().
1784      result = strLength.castAs<DefinedOrUnknownSVal>();
1785  
1786      // If we don't know the length of the string, conjure a return
1787      // value, so it can be used in constraints, at least.
1788      if (result.isUnknown()) {
1789        result = C.getSValBuilder().conjureSymbolVal(
1790            nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
1791      }
1792    }
1793  
1794    // Bind the return value.
1795    assert(!result.isUnknown() && "Should have conjured a value by now");
1796    state = state->BindExpr(Call.getOriginExpr(), LCtx, result);
1797    C.addTransition(state);
1798  }
1799  
evalStrcpy(CheckerContext & C,const CallEvent & Call) const1800  void CStringChecker::evalStrcpy(CheckerContext &C,
1801                                  const CallEvent &Call) const {
1802    // char *strcpy(char *restrict dst, const char *restrict src);
1803    evalStrcpyCommon(C, Call,
1804                     /* ReturnEnd = */ false,
1805                     /* IsBounded = */ false,
1806                     /* appendK = */ ConcatFnKind::none);
1807  }
1808  
evalStrncpy(CheckerContext & C,const CallEvent & Call) const1809  void CStringChecker::evalStrncpy(CheckerContext &C,
1810                                   const CallEvent &Call) const {
1811    // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1812    evalStrcpyCommon(C, Call,
1813                     /* ReturnEnd = */ false,
1814                     /* IsBounded = */ true,
1815                     /* appendK = */ ConcatFnKind::none);
1816  }
1817  
evalStpcpy(CheckerContext & C,const CallEvent & Call) const1818  void CStringChecker::evalStpcpy(CheckerContext &C,
1819                                  const CallEvent &Call) const {
1820    // char *stpcpy(char *restrict dst, const char *restrict src);
1821    evalStrcpyCommon(C, Call,
1822                     /* ReturnEnd = */ true,
1823                     /* IsBounded = */ false,
1824                     /* appendK = */ ConcatFnKind::none);
1825  }
1826  
evalStrlcpy(CheckerContext & C,const CallEvent & Call) const1827  void CStringChecker::evalStrlcpy(CheckerContext &C,
1828                                   const CallEvent &Call) const {
1829    // size_t strlcpy(char *dest, const char *src, size_t size);
1830    evalStrcpyCommon(C, Call,
1831                     /* ReturnEnd = */ true,
1832                     /* IsBounded = */ true,
1833                     /* appendK = */ ConcatFnKind::none,
1834                     /* returnPtr = */ false);
1835  }
1836  
evalStrcat(CheckerContext & C,const CallEvent & Call) const1837  void CStringChecker::evalStrcat(CheckerContext &C,
1838                                  const CallEvent &Call) const {
1839    // char *strcat(char *restrict s1, const char *restrict s2);
1840    evalStrcpyCommon(C, Call,
1841                     /* ReturnEnd = */ false,
1842                     /* IsBounded = */ false,
1843                     /* appendK = */ ConcatFnKind::strcat);
1844  }
1845  
evalStrncat(CheckerContext & C,const CallEvent & Call) const1846  void CStringChecker::evalStrncat(CheckerContext &C,
1847                                   const CallEvent &Call) const {
1848    // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1849    evalStrcpyCommon(C, Call,
1850                     /* ReturnEnd = */ false,
1851                     /* IsBounded = */ true,
1852                     /* appendK = */ ConcatFnKind::strcat);
1853  }
1854  
evalStrlcat(CheckerContext & C,const CallEvent & Call) const1855  void CStringChecker::evalStrlcat(CheckerContext &C,
1856                                   const CallEvent &Call) const {
1857    // size_t strlcat(char *dst, const char *src, size_t size);
1858    // It will append at most size - strlen(dst) - 1 bytes,
1859    // NULL-terminating the result.
1860    evalStrcpyCommon(C, Call,
1861                     /* ReturnEnd = */ false,
1862                     /* IsBounded = */ true,
1863                     /* appendK = */ ConcatFnKind::strlcat,
1864                     /* returnPtr = */ false);
1865  }
1866  
evalStrcpyCommon(CheckerContext & C,const CallEvent & Call,bool ReturnEnd,bool IsBounded,ConcatFnKind appendK,bool returnPtr) const1867  void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
1868                                        bool ReturnEnd, bool IsBounded,
1869                                        ConcatFnKind appendK,
1870                                        bool returnPtr) const {
1871    if (appendK == ConcatFnKind::none)
1872      CurrentFunctionDescription = "string copy function";
1873    else
1874      CurrentFunctionDescription = "string concatenation function";
1875  
1876    ProgramStateRef state = C.getState();
1877    const LocationContext *LCtx = C.getLocationContext();
1878  
1879    // Check that the destination is non-null.
1880    DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};
1881    SVal DstVal = state->getSVal(Dst.Expression, LCtx);
1882    state = checkNonNull(C, state, Dst, DstVal);
1883    if (!state)
1884      return;
1885  
1886    // Check that the source is non-null.
1887    SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};
1888    SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
1889    state = checkNonNull(C, state, srcExpr, srcVal);
1890    if (!state)
1891      return;
1892  
1893    // Get the string length of the source.
1894    SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
1895    std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1896  
1897    // Get the string length of the destination buffer.
1898    SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
1899    std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1900  
1901    // If the source isn't a valid C string, give up.
1902    if (strLength.isUndef())
1903      return;
1904  
1905    SValBuilder &svalBuilder = C.getSValBuilder();
1906    QualType cmpTy = svalBuilder.getConditionType();
1907    QualType sizeTy = svalBuilder.getContext().getSizeType();
1908  
1909    // These two values allow checking two kinds of errors:
1910    // - actual overflows caused by a source that doesn't fit in the destination
1911    // - potential overflows caused by a bound that could exceed the destination
1912    SVal amountCopied = UnknownVal();
1913    SVal maxLastElementIndex = UnknownVal();
1914    const char *boundWarning = nullptr;
1915  
1916    // FIXME: Why do we choose the srcExpr if the access has no size?
1917    //  Note that the 3rd argument of the call would be the size parameter.
1918    SizeArgExpr SrcExprAsSizeDummy = {
1919        {srcExpr.Expression, srcExpr.ArgumentIndex}};
1920    state = CheckOverlap(
1921        C, state,
1922        (IsBounded ? SizeArgExpr{{Call.getArgExpr(2), 2}} : SrcExprAsSizeDummy),
1923        Dst, srcExpr);
1924  
1925    if (!state)
1926      return;
1927  
1928    // If the function is strncpy, strncat, etc... it is bounded.
1929    if (IsBounded) {
1930      // Get the max number of characters to copy.
1931      SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}};
1932      SVal lenVal = state->getSVal(lenExpr.Expression, LCtx);
1933  
1934      // Protect against misdeclared strncpy().
1935      lenVal =
1936          svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());
1937  
1938      std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1939  
1940      // If we know both values, we might be able to figure out how much
1941      // we're copying.
1942      if (strLengthNL && lenValNL) {
1943        switch (appendK) {
1944        case ConcatFnKind::none:
1945        case ConcatFnKind::strcat: {
1946          ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1947          // Check if the max number to copy is less than the length of the src.
1948          // If the bound is equal to the source length, strncpy won't null-
1949          // terminate the result!
1950          std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1951              svalBuilder
1952                  .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1953                  .castAs<DefinedOrUnknownSVal>());
1954  
1955          if (stateSourceTooLong && !stateSourceNotTooLong) {
1956            // Max number to copy is less than the length of the src, so the
1957            // actual strLength copied is the max number arg.
1958            state = stateSourceTooLong;
1959            amountCopied = lenVal;
1960  
1961          } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1962            // The source buffer entirely fits in the bound.
1963            state = stateSourceNotTooLong;
1964            amountCopied = strLength;
1965          }
1966          break;
1967        }
1968        case ConcatFnKind::strlcat:
1969          if (!dstStrLengthNL)
1970            return;
1971  
1972          // amountCopied = min (size - dstLen - 1 , srcLen)
1973          SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1974                                                   *dstStrLengthNL, sizeTy);
1975          if (!isa<NonLoc>(freeSpace))
1976            return;
1977          freeSpace =
1978              svalBuilder.evalBinOp(state, BO_Sub, freeSpace,
1979                                    svalBuilder.makeIntVal(1, sizeTy), sizeTy);
1980          std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1981  
1982          // While unlikely, it is possible that the subtraction is
1983          // too complex to compute, let's check whether it succeeded.
1984          if (!freeSpaceNL)
1985            return;
1986          SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1987              state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);
1988  
1989          ProgramStateRef TrueState, FalseState;
1990          std::tie(TrueState, FalseState) =
1991              state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
1992  
1993          // srcStrLength <= size - dstStrLength -1
1994          if (TrueState && !FalseState) {
1995            amountCopied = strLength;
1996          }
1997  
1998          // srcStrLength > size - dstStrLength -1
1999          if (!TrueState && FalseState) {
2000            amountCopied = freeSpace;
2001          }
2002  
2003          if (TrueState && FalseState)
2004            amountCopied = UnknownVal();
2005          break;
2006        }
2007      }
2008      // We still want to know if the bound is known to be too large.
2009      if (lenValNL) {
2010        switch (appendK) {
2011        case ConcatFnKind::strcat:
2012          // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
2013  
2014          // Get the string length of the destination. If the destination is
2015          // memory that can't have a string length, we shouldn't be copying
2016          // into it anyway.
2017          if (dstStrLength.isUndef())
2018            return;
2019  
2020          if (dstStrLengthNL) {
2021            maxLastElementIndex = svalBuilder.evalBinOpNN(
2022                state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);
2023  
2024            boundWarning = "Size argument is greater than the free space in the "
2025                           "destination buffer";
2026          }
2027          break;
2028        case ConcatFnKind::none:
2029        case ConcatFnKind::strlcat:
2030          // For strncpy and strlcat, this is just checking
2031          //  that lenVal <= sizeof(dst).
2032          // (Yes, strncpy and strncat differ in how they treat termination.
2033          // strncat ALWAYS terminates, but strncpy doesn't.)
2034  
2035          // We need a special case for when the copy size is zero, in which
2036          // case strncpy will do no work at all. Our bounds check uses n-1
2037          // as the last element accessed, so n == 0 is problematic.
2038          ProgramStateRef StateZeroSize, StateNonZeroSize;
2039          std::tie(StateZeroSize, StateNonZeroSize) =
2040              assumeZero(C, state, *lenValNL, sizeTy);
2041  
2042          // If the size is known to be zero, we're done.
2043          if (StateZeroSize && !StateNonZeroSize) {
2044            if (returnPtr) {
2045              StateZeroSize =
2046                  StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, DstVal);
2047            } else {
2048              if (appendK == ConcatFnKind::none) {
2049                // strlcpy returns strlen(src)
2050                StateZeroSize = StateZeroSize->BindExpr(Call.getOriginExpr(),
2051                                                        LCtx, strLength);
2052              } else {
2053                // strlcat returns strlen(src) + strlen(dst)
2054                SVal retSize = svalBuilder.evalBinOp(
2055                    state, BO_Add, strLength, dstStrLength, sizeTy);
2056                StateZeroSize =
2057                    StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, retSize);
2058              }
2059            }
2060            C.addTransition(StateZeroSize);
2061            return;
2062          }
2063  
2064          // Otherwise, go ahead and figure out the last element we'll touch.
2065          // We don't record the non-zero assumption here because we can't
2066          // be sure. We won't warn on a possible zero.
2067          NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
2068          maxLastElementIndex =
2069              svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);
2070          boundWarning = "Size argument is greater than the length of the "
2071                         "destination buffer";
2072          break;
2073        }
2074      }
2075    } else {
2076      // The function isn't bounded. The amount copied should match the length
2077      // of the source buffer.
2078      amountCopied = strLength;
2079    }
2080  
2081    assert(state);
2082  
2083    // This represents the number of characters copied into the destination
2084    // buffer. (It may not actually be the strlen if the destination buffer
2085    // is not terminated.)
2086    SVal finalStrLength = UnknownVal();
2087    SVal strlRetVal = UnknownVal();
2088  
2089    if (appendK == ConcatFnKind::none && !returnPtr) {
2090      // strlcpy returns the sizeof(src)
2091      strlRetVal = strLength;
2092    }
2093  
2094    // If this is an appending function (strcat, strncat...) then set the
2095    // string length to strlen(src) + strlen(dst) since the buffer will
2096    // ultimately contain both.
2097    if (appendK != ConcatFnKind::none) {
2098      // Get the string length of the destination. If the destination is memory
2099      // that can't have a string length, we shouldn't be copying into it anyway.
2100      if (dstStrLength.isUndef())
2101        return;
2102  
2103      if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
2104        strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,
2105                                             *dstStrLengthNL, sizeTy);
2106      }
2107  
2108      std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
2109  
2110      // If we know both string lengths, we might know the final string length.
2111      if (amountCopiedNL && dstStrLengthNL) {
2112        // Make sure the two lengths together don't overflow a size_t.
2113        state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);
2114        if (!state)
2115          return;
2116  
2117        finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,
2118                                                 *dstStrLengthNL, sizeTy);
2119      }
2120  
2121      // If we couldn't get a single value for the final string length,
2122      // we can at least bound it by the individual lengths.
2123      if (finalStrLength.isUnknown()) {
2124        // Try to get a "hypothetical" string length symbol, which we can later
2125        // set as a real value if that turns out to be the case.
2126        finalStrLength =
2127            getCStringLength(C, state, Call.getOriginExpr(), DstVal, true);
2128        assert(!finalStrLength.isUndef());
2129  
2130        if (std::optional<NonLoc> finalStrLengthNL =
2131                finalStrLength.getAs<NonLoc>()) {
2132          if (amountCopiedNL && appendK == ConcatFnKind::none) {
2133            // we overwrite dst string with the src
2134            // finalStrLength >= srcStrLength
2135            SVal sourceInResult = svalBuilder.evalBinOpNN(
2136                state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);
2137            state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
2138                                  true);
2139            if (!state)
2140              return;
2141          }
2142  
2143          if (dstStrLengthNL && appendK != ConcatFnKind::none) {
2144            // we extend the dst string with the src
2145            // finalStrLength >= dstStrLength
2146            SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
2147                                                        *finalStrLengthNL,
2148                                                        *dstStrLengthNL,
2149                                                        cmpTy);
2150            state =
2151                state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
2152            if (!state)
2153              return;
2154          }
2155        }
2156      }
2157  
2158    } else {
2159      // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
2160      // the final string length will match the input string length.
2161      finalStrLength = amountCopied;
2162    }
2163  
2164    SVal Result;
2165  
2166    if (returnPtr) {
2167      // The final result of the function will either be a pointer past the last
2168      // copied element, or a pointer to the start of the destination buffer.
2169      Result = (ReturnEnd ? UnknownVal() : DstVal);
2170    } else {
2171      if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
2172        //strlcpy, strlcat
2173        Result = strlRetVal;
2174      else
2175        Result = finalStrLength;
2176    }
2177  
2178    assert(state);
2179  
2180    // If the destination is a MemRegion, try to check for a buffer overflow and
2181    // record the new string length.
2182    if (std::optional<loc::MemRegionVal> dstRegVal =
2183            DstVal.getAs<loc::MemRegionVal>()) {
2184      QualType ptrTy = Dst.Expression->getType();
2185  
2186      // If we have an exact value on a bounded copy, use that to check for
2187      // overflows, rather than our estimate about how much is actually copied.
2188      if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
2189        SVal maxLastElement =
2190            svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);
2191  
2192        // Check if the first byte of the destination is writable.
2193        state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2194        if (!state)
2195          return;
2196        // Check if the last byte of the destination is writable.
2197        state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);
2198        if (!state)
2199          return;
2200      }
2201  
2202      // Then, if the final length is known...
2203      if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
2204        SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
2205            *knownStrLength, ptrTy);
2206  
2207        // ...and we haven't checked the bound, we'll check the actual copy.
2208        if (!boundWarning) {
2209          // Check if the first byte of the destination is writable.
2210          state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2211          if (!state)
2212            return;
2213          // Check if the last byte of the destination is writable.
2214          state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);
2215          if (!state)
2216            return;
2217        }
2218  
2219        // If this is a stpcpy-style copy, the last element is the return value.
2220        if (returnPtr && ReturnEnd)
2221          Result = lastElement;
2222      }
2223  
2224      // Invalidate the destination (regular invalidation without pointer-escaping
2225      // the address of the top-level region). This must happen before we set the
2226      // C string length because invalidation will clear the length.
2227      // FIXME: Even if we can't perfectly model the copy, we should see if we
2228      // can use LazyCompoundVals to copy the source values into the destination.
2229      // This would probably remove any existing bindings past the end of the
2230      // string, but that's still an improvement over blank invalidation.
2231      state = invalidateDestinationBufferBySize(C, state, Dst.Expression,
2232                                                *dstRegVal, amountCopied,
2233                                                C.getASTContext().getSizeType());
2234  
2235      // Invalidate the source (const-invalidation without const-pointer-escaping
2236      // the address of the top-level region).
2237      state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal);
2238  
2239      // Set the C string length of the destination, if we know it.
2240      if (IsBounded && (appendK == ConcatFnKind::none)) {
2241        // strncpy is annoying in that it doesn't guarantee to null-terminate
2242        // the result string. If the original string didn't fit entirely inside
2243        // the bound (including the null-terminator), we don't know how long the
2244        // result is.
2245        if (amountCopied != strLength)
2246          finalStrLength = UnknownVal();
2247      }
2248      state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
2249    }
2250  
2251    assert(state);
2252  
2253    if (returnPtr) {
2254      // If this is a stpcpy-style copy, but we were unable to check for a buffer
2255      // overflow, we still need a result. Conjure a return value.
2256      if (ReturnEnd && Result.isUnknown()) {
2257        Result = svalBuilder.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,
2258                                              C.blockCount());
2259      }
2260    }
2261    // Set the return value.
2262    state = state->BindExpr(Call.getOriginExpr(), LCtx, Result);
2263    C.addTransition(state);
2264  }
2265  
evalStrcmp(CheckerContext & C,const CallEvent & Call) const2266  void CStringChecker::evalStrcmp(CheckerContext &C,
2267                                  const CallEvent &Call) const {
2268    //int strcmp(const char *s1, const char *s2);
2269    evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ false);
2270  }
2271  
evalStrncmp(CheckerContext & C,const CallEvent & Call) const2272  void CStringChecker::evalStrncmp(CheckerContext &C,
2273                                   const CallEvent &Call) const {
2274    //int strncmp(const char *s1, const char *s2, size_t n);
2275    evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ false);
2276  }
2277  
evalStrcasecmp(CheckerContext & C,const CallEvent & Call) const2278  void CStringChecker::evalStrcasecmp(CheckerContext &C,
2279                                      const CallEvent &Call) const {
2280    //int strcasecmp(const char *s1, const char *s2);
2281    evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ true);
2282  }
2283  
evalStrncasecmp(CheckerContext & C,const CallEvent & Call) const2284  void CStringChecker::evalStrncasecmp(CheckerContext &C,
2285                                       const CallEvent &Call) const {
2286    //int strncasecmp(const char *s1, const char *s2, size_t n);
2287    evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ true);
2288  }
2289  
evalStrcmpCommon(CheckerContext & C,const CallEvent & Call,bool IsBounded,bool IgnoreCase) const2290  void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
2291                                        bool IsBounded, bool IgnoreCase) const {
2292    CurrentFunctionDescription = "string comparison function";
2293    ProgramStateRef state = C.getState();
2294    const LocationContext *LCtx = C.getLocationContext();
2295  
2296    // Check that the first string is non-null
2297    AnyArgExpr Left = {Call.getArgExpr(0), 0};
2298    SVal LeftVal = state->getSVal(Left.Expression, LCtx);
2299    state = checkNonNull(C, state, Left, LeftVal);
2300    if (!state)
2301      return;
2302  
2303    // Check that the second string is non-null.
2304    AnyArgExpr Right = {Call.getArgExpr(1), 1};
2305    SVal RightVal = state->getSVal(Right.Expression, LCtx);
2306    state = checkNonNull(C, state, Right, RightVal);
2307    if (!state)
2308      return;
2309  
2310    // Get the string length of the first string or give up.
2311    SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
2312    if (LeftLength.isUndef())
2313      return;
2314  
2315    // Get the string length of the second string or give up.
2316    SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
2317    if (RightLength.isUndef())
2318      return;
2319  
2320    // If we know the two buffers are the same, we know the result is 0.
2321    // First, get the two buffers' addresses. Another checker will have already
2322    // made sure they're not undefined.
2323    DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2324    DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2325  
2326    // See if they are the same.
2327    SValBuilder &svalBuilder = C.getSValBuilder();
2328    DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
2329    ProgramStateRef StSameBuf, StNotSameBuf;
2330    std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
2331  
2332    // If the two arguments might be the same buffer, we know the result is 0,
2333    // and we only need to check one size.
2334    if (StSameBuf) {
2335      StSameBuf =
2336          StSameBuf->BindExpr(Call.getOriginExpr(), LCtx,
2337                              svalBuilder.makeZeroVal(Call.getResultType()));
2338      C.addTransition(StSameBuf);
2339  
2340      // If the two arguments are GUARANTEED to be the same, we're done!
2341      if (!StNotSameBuf)
2342        return;
2343    }
2344  
2345    assert(StNotSameBuf);
2346    state = StNotSameBuf;
2347  
2348    // At this point we can go about comparing the two buffers.
2349    // For now, we only do this if they're both known string literals.
2350  
2351    // Attempt to extract string literals from both expressions.
2352    const StringLiteral *LeftStrLiteral =
2353        getCStringLiteral(C, state, Left.Expression, LeftVal);
2354    const StringLiteral *RightStrLiteral =
2355        getCStringLiteral(C, state, Right.Expression, RightVal);
2356    bool canComputeResult = false;
2357    SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, Call.getOriginExpr(),
2358                                                  LCtx, C.blockCount());
2359  
2360    if (LeftStrLiteral && RightStrLiteral) {
2361      StringRef LeftStrRef = LeftStrLiteral->getString();
2362      StringRef RightStrRef = RightStrLiteral->getString();
2363  
2364      if (IsBounded) {
2365        // Get the max number of characters to compare.
2366        const Expr *lenExpr = Call.getArgExpr(2);
2367        SVal lenVal = state->getSVal(lenExpr, LCtx);
2368  
2369        // If the length is known, we can get the right substrings.
2370        if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
2371          // Create substrings of each to compare the prefix.
2372          LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());
2373          RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());
2374          canComputeResult = true;
2375        }
2376      } else {
2377        // This is a normal, unbounded strcmp.
2378        canComputeResult = true;
2379      }
2380  
2381      if (canComputeResult) {
2382        // Real strcmp stops at null characters.
2383        size_t s1Term = LeftStrRef.find('\0');
2384        if (s1Term != StringRef::npos)
2385          LeftStrRef = LeftStrRef.substr(0, s1Term);
2386  
2387        size_t s2Term = RightStrRef.find('\0');
2388        if (s2Term != StringRef::npos)
2389          RightStrRef = RightStrRef.substr(0, s2Term);
2390  
2391        // Use StringRef's comparison methods to compute the actual result.
2392        int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)
2393                                    : LeftStrRef.compare(RightStrRef);
2394  
2395        // The strcmp function returns an integer greater than, equal to, or less
2396        // than zero, [c11, p7.24.4.2].
2397        if (compareRes == 0) {
2398          resultVal = svalBuilder.makeIntVal(compareRes, Call.getResultType());
2399        }
2400        else {
2401          DefinedSVal zeroVal = svalBuilder.makeIntVal(0, Call.getResultType());
2402          // Constrain strcmp's result range based on the result of StringRef's
2403          // comparison methods.
2404          BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2405          SVal compareWithZero =
2406            svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
2407                svalBuilder.getConditionType());
2408          DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2409          state = state->assume(compareWithZeroVal, true);
2410        }
2411      }
2412    }
2413  
2414    state = state->BindExpr(Call.getOriginExpr(), LCtx, resultVal);
2415  
2416    // Record this as a possible path.
2417    C.addTransition(state);
2418  }
2419  
evalStrsep(CheckerContext & C,const CallEvent & Call) const2420  void CStringChecker::evalStrsep(CheckerContext &C,
2421                                  const CallEvent &Call) const {
2422    // char *strsep(char **stringp, const char *delim);
2423    // Verify whether the search string parameter matches the return type.
2424    SourceArgExpr SearchStrPtr = {{Call.getArgExpr(0), 0}};
2425  
2426    QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2427    if (CharPtrTy.isNull() || Call.getResultType().getUnqualifiedType() !=
2428                                  CharPtrTy.getUnqualifiedType())
2429      return;
2430  
2431    CurrentFunctionDescription = "strsep()";
2432    ProgramStateRef State = C.getState();
2433    const LocationContext *LCtx = C.getLocationContext();
2434  
2435    // Check that the search string pointer is non-null (though it may point to
2436    // a null string).
2437    SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
2438    State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
2439    if (!State)
2440      return;
2441  
2442    // Check that the delimiter string is non-null.
2443    AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};
2444    SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
2445    State = checkNonNull(C, State, DelimStr, DelimStrVal);
2446    if (!State)
2447      return;
2448  
2449    SValBuilder &SVB = C.getSValBuilder();
2450    SVal Result;
2451    if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2452      // Get the current value of the search string pointer, as a char*.
2453      Result = State->getSVal(*SearchStrLoc, CharPtrTy);
2454  
2455      // Invalidate the search string, representing the change of one delimiter
2456      // character to NUL.
2457      // As the replacement never overflows, do not invalidate its super region.
2458      State = invalidateDestinationBufferNeverOverflows(
2459          C, State, SearchStrPtr.Expression, Result);
2460  
2461      // Overwrite the search string pointer. The new value is either an address
2462      // further along in the same string, or NULL if there are no more tokens.
2463      State =
2464          State->bindLoc(*SearchStrLoc,
2465                         SVB.conjureSymbolVal(getTag(), Call.getOriginExpr(),
2466                                              LCtx, CharPtrTy, C.blockCount()),
2467                         LCtx);
2468    } else {
2469      assert(SearchStrVal.isUnknown());
2470      // Conjure a symbolic value. It's the best we can do.
2471      Result = SVB.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,
2472                                    C.blockCount());
2473    }
2474  
2475    // Set the return value, and finish.
2476    State = State->BindExpr(Call.getOriginExpr(), LCtx, Result);
2477    C.addTransition(State);
2478  }
2479  
2480  // These should probably be moved into a C++ standard library checker.
evalStdCopy(CheckerContext & C,const CallEvent & Call) const2481  void CStringChecker::evalStdCopy(CheckerContext &C,
2482                                   const CallEvent &Call) const {
2483    evalStdCopyCommon(C, Call);
2484  }
2485  
evalStdCopyBackward(CheckerContext & C,const CallEvent & Call) const2486  void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2487                                           const CallEvent &Call) const {
2488    evalStdCopyCommon(C, Call);
2489  }
2490  
evalStdCopyCommon(CheckerContext & C,const CallEvent & Call) const2491  void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2492                                         const CallEvent &Call) const {
2493    if (!Call.getArgExpr(2)->getType()->isPointerType())
2494      return;
2495  
2496    ProgramStateRef State = C.getState();
2497  
2498    const LocationContext *LCtx = C.getLocationContext();
2499  
2500    // template <class _InputIterator, class _OutputIterator>
2501    // _OutputIterator
2502    // copy(_InputIterator __first, _InputIterator __last,
2503    //        _OutputIterator __result)
2504  
2505    // Invalidate the destination buffer
2506    const Expr *Dst = Call.getArgExpr(2);
2507    SVal DstVal = State->getSVal(Dst, LCtx);
2508    // FIXME: As we do not know how many items are copied, we also invalidate the
2509    // super region containing the target location.
2510    State =
2511        invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal);
2512  
2513    SValBuilder &SVB = C.getSValBuilder();
2514  
2515    SVal ResultVal =
2516        SVB.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
2517    State = State->BindExpr(Call.getOriginExpr(), LCtx, ResultVal);
2518  
2519    C.addTransition(State);
2520  }
2521  
evalMemset(CheckerContext & C,const CallEvent & Call) const2522  void CStringChecker::evalMemset(CheckerContext &C,
2523                                  const CallEvent &Call) const {
2524    // void *memset(void *s, int c, size_t n);
2525    CurrentFunctionDescription = "memory set function";
2526  
2527    DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2528    AnyArgExpr CharE = {Call.getArgExpr(1), 1};
2529    SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2530  
2531    ProgramStateRef State = C.getState();
2532  
2533    // See if the size argument is zero.
2534    const LocationContext *LCtx = C.getLocationContext();
2535    SVal SizeVal = C.getSVal(Size.Expression);
2536    QualType SizeTy = Size.Expression->getType();
2537  
2538    ProgramStateRef ZeroSize, NonZeroSize;
2539    std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);
2540  
2541    // Get the value of the memory area.
2542    SVal BufferPtrVal = C.getSVal(Buffer.Expression);
2543  
2544    // If the size is zero, there won't be any actual memory access, so
2545    // just bind the return value to the buffer and return.
2546    if (ZeroSize && !NonZeroSize) {
2547      ZeroSize = ZeroSize->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2548      C.addTransition(ZeroSize);
2549      return;
2550    }
2551  
2552    // Ensure the memory area is not null.
2553    // If it is NULL there will be a NULL pointer dereference.
2554    State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);
2555    if (!State)
2556      return;
2557  
2558    State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2559    if (!State)
2560      return;
2561  
2562    // According to the values of the arguments, bind the value of the second
2563    // argument to the destination buffer and set string length, or just
2564    // invalidate the destination buffer.
2565    if (!memsetAux(Buffer.Expression, C.getSVal(CharE.Expression),
2566                   Size.Expression, C, State))
2567      return;
2568  
2569    State = State->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2570    C.addTransition(State);
2571  }
2572  
evalBzero(CheckerContext & C,const CallEvent & Call) const2573  void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const {
2574    CurrentFunctionDescription = "memory clearance function";
2575  
2576    DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2577    SizeArgExpr Size = {{Call.getArgExpr(1), 1}};
2578    SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);
2579  
2580    ProgramStateRef State = C.getState();
2581  
2582    // See if the size argument is zero.
2583    SVal SizeVal = C.getSVal(Size.Expression);
2584    QualType SizeTy = Size.Expression->getType();
2585  
2586    ProgramStateRef StateZeroSize, StateNonZeroSize;
2587    std::tie(StateZeroSize, StateNonZeroSize) =
2588      assumeZero(C, State, SizeVal, SizeTy);
2589  
2590    // If the size is zero, there won't be any actual memory access,
2591    // In this case we just return.
2592    if (StateZeroSize && !StateNonZeroSize) {
2593      C.addTransition(StateZeroSize);
2594      return;
2595    }
2596  
2597    // Get the value of the memory area.
2598    SVal MemVal = C.getSVal(Buffer.Expression);
2599  
2600    // Ensure the memory area is not null.
2601    // If it is NULL there will be a NULL pointer dereference.
2602    State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);
2603    if (!State)
2604      return;
2605  
2606    State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2607    if (!State)
2608      return;
2609  
2610    if (!memsetAux(Buffer.Expression, Zero, Size.Expression, C, State))
2611      return;
2612  
2613    C.addTransition(State);
2614  }
2615  
evalSprintf(CheckerContext & C,const CallEvent & Call) const2616  void CStringChecker::evalSprintf(CheckerContext &C,
2617                                   const CallEvent &Call) const {
2618    CurrentFunctionDescription = "'sprintf'";
2619    evalSprintfCommon(C, Call, /* IsBounded = */ false);
2620  }
2621  
evalSnprintf(CheckerContext & C,const CallEvent & Call) const2622  void CStringChecker::evalSnprintf(CheckerContext &C,
2623                                    const CallEvent &Call) const {
2624    CurrentFunctionDescription = "'snprintf'";
2625    evalSprintfCommon(C, Call, /* IsBounded = */ true);
2626  }
2627  
evalSprintfCommon(CheckerContext & C,const CallEvent & Call,bool IsBounded) const2628  void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
2629                                         bool IsBounded) const {
2630    ProgramStateRef State = C.getState();
2631    const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2632    DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2633  
2634    const auto NumParams = Call.parameters().size();
2635    if (CE->getNumArgs() < NumParams) {
2636      // This is an invalid call, let's just ignore it.
2637      return;
2638    }
2639  
2640    const auto AllArguments =
2641        llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
2642    const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams);
2643  
2644    for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {
2645      // We consider only string buffers
2646      if (const QualType type = ArgExpr->getType();
2647          !type->isAnyPointerType() ||
2648          !type->getPointeeType()->isAnyCharacterType())
2649        continue;
2650      SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}};
2651  
2652      // Ensure the buffers do not overlap.
2653      SizeArgExpr SrcExprAsSizeDummy = {
2654          {Source.Expression, Source.ArgumentIndex}};
2655      State = CheckOverlap(
2656          C, State,
2657          (IsBounded ? SizeArgExpr{{Call.getArgExpr(1), 1}} : SrcExprAsSizeDummy),
2658          Dest, Source);
2659      if (!State)
2660        return;
2661    }
2662  
2663    C.addTransition(State);
2664  }
2665  
2666  //===----------------------------------------------------------------------===//
2667  // The driver method, and other Checker callbacks.
2668  //===----------------------------------------------------------------------===//
2669  
identifyCall(const CallEvent & Call,CheckerContext & C) const2670  CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2671                                                       CheckerContext &C) const {
2672    const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
2673    if (!CE)
2674      return nullptr;
2675  
2676    const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
2677    if (!FD)
2678      return nullptr;
2679  
2680    if (StdCopy.matches(Call))
2681      return &CStringChecker::evalStdCopy;
2682    if (StdCopyBackward.matches(Call))
2683      return &CStringChecker::evalStdCopyBackward;
2684  
2685    // Pro-actively check that argument types are safe to do arithmetic upon.
2686    // We do not want to crash if someone accidentally passes a structure
2687    // into, say, a C++ overload of any of these functions. We could not check
2688    // that for std::copy because they may have arguments of other types.
2689    for (auto I : CE->arguments()) {
2690      QualType T = I->getType();
2691      if (!T->isIntegralOrEnumerationType() && !T->isPointerType())
2692        return nullptr;
2693    }
2694  
2695    const FnCheck *Callback = Callbacks.lookup(Call);
2696    if (Callback)
2697      return *Callback;
2698  
2699    return nullptr;
2700  }
2701  
evalCall(const CallEvent & Call,CheckerContext & C) const2702  bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2703    FnCheck Callback = identifyCall(Call, C);
2704  
2705    // If the callee isn't a string function, let another checker handle it.
2706    if (!Callback)
2707      return false;
2708  
2709    // Check and evaluate the call.
2710    assert(isa<CallExpr>(Call.getOriginExpr()));
2711    Callback(this, C, Call);
2712  
2713    // If the evaluate call resulted in no change, chain to the next eval call
2714    // handler.
2715    // Note, the custom CString evaluation calls assume that basic safety
2716    // properties are held. However, if the user chooses to turn off some of these
2717    // checks, we ignore the issues and leave the call evaluation to a generic
2718    // handler.
2719    return C.isDifferent();
2720  }
2721  
checkPreStmt(const DeclStmt * DS,CheckerContext & C) const2722  void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2723    // Record string length for char a[] = "abc";
2724    ProgramStateRef state = C.getState();
2725  
2726    for (const auto *I : DS->decls()) {
2727      const VarDecl *D = dyn_cast<VarDecl>(I);
2728      if (!D)
2729        continue;
2730  
2731      // FIXME: Handle array fields of structs.
2732      if (!D->getType()->isArrayType())
2733        continue;
2734  
2735      const Expr *Init = D->getInit();
2736      if (!Init)
2737        continue;
2738      if (!isa<StringLiteral>(Init))
2739        continue;
2740  
2741      Loc VarLoc = state->getLValue(D, C.getLocationContext());
2742      const MemRegion *MR = VarLoc.getAsRegion();
2743      if (!MR)
2744        continue;
2745  
2746      SVal StrVal = C.getSVal(Init);
2747      assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2748      DefinedOrUnknownSVal strLength =
2749        getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2750  
2751      state = state->set<CStringLength>(MR, strLength);
2752    }
2753  
2754    C.addTransition(state);
2755  }
2756  
2757  ProgramStateRef
checkRegionChanges(ProgramStateRef state,const InvalidatedSymbols *,ArrayRef<const MemRegion * > ExplicitRegions,ArrayRef<const MemRegion * > Regions,const LocationContext * LCtx,const CallEvent * Call) const2758  CStringChecker::checkRegionChanges(ProgramStateRef state,
2759      const InvalidatedSymbols *,
2760      ArrayRef<const MemRegion *> ExplicitRegions,
2761      ArrayRef<const MemRegion *> Regions,
2762      const LocationContext *LCtx,
2763      const CallEvent *Call) const {
2764    CStringLengthTy Entries = state->get<CStringLength>();
2765    if (Entries.isEmpty())
2766      return state;
2767  
2768    llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2769    llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2770  
2771    // First build sets for the changed regions and their super-regions.
2772    for (const MemRegion *MR : Regions) {
2773      Invalidated.insert(MR);
2774  
2775      SuperRegions.insert(MR);
2776      while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2777        MR = SR->getSuperRegion();
2778        SuperRegions.insert(MR);
2779      }
2780    }
2781  
2782    CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2783  
2784    // Then loop over the entries in the current state.
2785    for (const MemRegion *MR : llvm::make_first_range(Entries)) {
2786      // Is this entry for a super-region of a changed region?
2787      if (SuperRegions.count(MR)) {
2788        Entries = F.remove(Entries, MR);
2789        continue;
2790      }
2791  
2792      // Is this entry for a sub-region of a changed region?
2793      const MemRegion *Super = MR;
2794      while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2795        Super = SR->getSuperRegion();
2796        if (Invalidated.count(Super)) {
2797          Entries = F.remove(Entries, MR);
2798          break;
2799        }
2800      }
2801    }
2802  
2803    return state->set<CStringLength>(Entries);
2804  }
2805  
checkLiveSymbols(ProgramStateRef state,SymbolReaper & SR) const2806  void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2807      SymbolReaper &SR) const {
2808    // Mark all symbols in our string length map as valid.
2809    CStringLengthTy Entries = state->get<CStringLength>();
2810  
2811    for (SVal Len : llvm::make_second_range(Entries)) {
2812      for (SymbolRef Sym : Len.symbols())
2813        SR.markInUse(Sym);
2814    }
2815  }
2816  
checkDeadSymbols(SymbolReaper & SR,CheckerContext & C) const2817  void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2818      CheckerContext &C) const {
2819    ProgramStateRef state = C.getState();
2820    CStringLengthTy Entries = state->get<CStringLength>();
2821    if (Entries.isEmpty())
2822      return;
2823  
2824    CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2825    for (auto [Reg, Len] : Entries) {
2826      if (SymbolRef Sym = Len.getAsSymbol()) {
2827        if (SR.isDead(Sym))
2828          Entries = F.remove(Entries, Reg);
2829      }
2830    }
2831  
2832    state = state->set<CStringLength>(Entries);
2833    C.addTransition(state);
2834  }
2835  
registerCStringModeling(CheckerManager & Mgr)2836  void ento::registerCStringModeling(CheckerManager &Mgr) {
2837    Mgr.registerChecker<CStringChecker>();
2838  }
2839  
shouldRegisterCStringModeling(const CheckerManager & mgr)2840  bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) {
2841    return true;
2842  }
2843  
2844  #define REGISTER_CHECKER(name)                                                 \
2845    void ento::register##name(CheckerManager &mgr) {                             \
2846      CStringChecker *checker = mgr.getChecker<CStringChecker>();                \
2847      checker->Filter.Check##name = true;                                        \
2848      checker->Filter.CheckName##name = mgr.getCurrentCheckerName();             \
2849    }                                                                            \
2850                                                                                 \
2851    bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
2852  
2853  REGISTER_CHECKER(CStringNullArg)
2854  REGISTER_CHECKER(CStringOutOfBounds)
2855  REGISTER_CHECKER(CStringBufferOverlap)
2856  REGISTER_CHECKER(CStringNotNullTerm)
2857  REGISTER_CHECKER(CStringUninitializedRead)
2858