xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This defines CStringChecker, which is an assortment of checks on calls
10 // to functions in <string.h>.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InterCheckerAPI.h"
15 #include "clang/AST/OperationKinds.h"
16 #include "clang/Basic/Builtins.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
19 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
20 #include "clang/StaticAnalyzer/Core/Checker.h"
21 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
29 #include "llvm/ADT/APSInt.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include <functional>
35 #include <optional>
36 
37 using namespace clang;
38 using namespace ento;
39 using namespace std::placeholders;
40 
41 namespace {
42 struct AnyArgExpr {
43   const Expr *Expression;
44   unsigned ArgumentIndex;
45 };
46 struct SourceArgExpr : AnyArgExpr {};
47 struct DestinationArgExpr : AnyArgExpr {};
48 struct SizeArgExpr : AnyArgExpr {};
49 
50 using ErrorMessage = SmallString<128>;
51 enum class AccessKind { write, read };
52 
createOutOfBoundErrorMsg(StringRef FunctionDescription,AccessKind Access)53 static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
54                                              AccessKind Access) {
55   ErrorMessage Message;
56   llvm::raw_svector_ostream Os(Message);
57 
58   // Function classification like: Memory copy function
59   Os << toUppercase(FunctionDescription.front())
60      << &FunctionDescription.data()[1];
61 
62   if (Access == AccessKind::write) {
63     Os << " overflows the destination buffer";
64   } else { // read access
65     Os << " accesses out-of-bound array element";
66   }
67 
68   return Message;
69 }
70 
71 enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
72 
73 enum class CharKind { Regular = 0, Wide };
74 constexpr CharKind CK_Regular = CharKind::Regular;
75 constexpr CharKind CK_Wide = CharKind::Wide;
76 
getCharPtrType(ASTContext & Ctx,CharKind CK)77 static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
78   return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy
79                                                     : Ctx.WideCharTy);
80 }
81 
82 class CStringChecker : public Checker< eval::Call,
83                                          check::PreStmt<DeclStmt>,
84                                          check::LiveSymbols,
85                                          check::DeadSymbols,
86                                          check::RegionChanges
87                                          > {
88   mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
89       BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
90 
91   mutable const char *CurrentFunctionDescription = nullptr;
92 
93 public:
94   /// The filter is used to filter out the diagnostics which are not enabled by
95   /// the user.
96   struct CStringChecksFilter {
97     bool CheckCStringNullArg = false;
98     bool CheckCStringOutOfBounds = false;
99     bool CheckCStringBufferOverlap = false;
100     bool CheckCStringNotNullTerm = false;
101     bool CheckCStringUninitializedRead = false;
102 
103     CheckerNameRef CheckNameCStringNullArg;
104     CheckerNameRef CheckNameCStringOutOfBounds;
105     CheckerNameRef CheckNameCStringBufferOverlap;
106     CheckerNameRef CheckNameCStringNotNullTerm;
107     CheckerNameRef CheckNameCStringUninitializedRead;
108   };
109 
110   CStringChecksFilter Filter;
111 
getTag()112   static void *getTag() { static int tag; return &tag; }
113 
114   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
115   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
116   void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
117   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
118 
119   ProgramStateRef
120     checkRegionChanges(ProgramStateRef state,
121                        const InvalidatedSymbols *,
122                        ArrayRef<const MemRegion *> ExplicitRegions,
123                        ArrayRef<const MemRegion *> Regions,
124                        const LocationContext *LCtx,
125                        const CallEvent *Call) const;
126 
127   using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
128                                      const CallEvent &)>;
129 
130   CallDescriptionMap<FnCheck> Callbacks = {
131       {{CDM::CLibraryMaybeHardened, {"memcpy"}, 3},
132        std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
133       {{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3},
134        std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
135       {{CDM::CLibraryMaybeHardened, {"mempcpy"}, 3},
136        std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
137       {{CDM::CLibraryMaybeHardened, {"wmempcpy"}, 3},
138        std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
139       {{CDM::CLibrary, {"memcmp"}, 3},
140        std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
141       {{CDM::CLibrary, {"wmemcmp"}, 3},
142        std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
143       {{CDM::CLibraryMaybeHardened, {"memmove"}, 3},
144        std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
145       {{CDM::CLibraryMaybeHardened, {"wmemmove"}, 3},
146        std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
147       {{CDM::CLibraryMaybeHardened, {"memset"}, 3},
148        &CStringChecker::evalMemset},
149       {{CDM::CLibrary, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
150       // FIXME: C23 introduces 'memset_explicit', maybe also model that
151       {{CDM::CLibraryMaybeHardened, {"strcpy"}, 2},
152        &CStringChecker::evalStrcpy},
153       {{CDM::CLibraryMaybeHardened, {"strncpy"}, 3},
154        &CStringChecker::evalStrncpy},
155       {{CDM::CLibraryMaybeHardened, {"stpcpy"}, 2},
156        &CStringChecker::evalStpcpy},
157       {{CDM::CLibraryMaybeHardened, {"strlcpy"}, 3},
158        &CStringChecker::evalStrlcpy},
159       {{CDM::CLibraryMaybeHardened, {"strcat"}, 2},
160        &CStringChecker::evalStrcat},
161       {{CDM::CLibraryMaybeHardened, {"strncat"}, 3},
162        &CStringChecker::evalStrncat},
163       {{CDM::CLibraryMaybeHardened, {"strlcat"}, 3},
164        &CStringChecker::evalStrlcat},
165       {{CDM::CLibraryMaybeHardened, {"strlen"}, 1},
166        &CStringChecker::evalstrLength},
167       {{CDM::CLibrary, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
168       {{CDM::CLibraryMaybeHardened, {"strnlen"}, 2},
169        &CStringChecker::evalstrnLength},
170       {{CDM::CLibrary, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
171       {{CDM::CLibrary, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
172       {{CDM::CLibrary, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
173       {{CDM::CLibrary, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
174       {{CDM::CLibrary, {"strncasecmp"}, 3}, &CStringChecker::evalStrncasecmp},
175       {{CDM::CLibrary, {"strsep"}, 2}, &CStringChecker::evalStrsep},
176       {{CDM::CLibrary, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
177       {{CDM::CLibrary, {"bcmp"}, 3},
178        std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
179       {{CDM::CLibrary, {"bzero"}, 2}, &CStringChecker::evalBzero},
180       {{CDM::CLibraryMaybeHardened, {"explicit_bzero"}, 2},
181        &CStringChecker::evalBzero},
182 
183       // When recognizing calls to the following variadic functions, we accept
184       // any number of arguments in the call (std::nullopt = accept any
185       // number), but check that in the declaration there are 2 and 3
186       // parameters respectively. (Note that the parameter count does not
187       // include the "...". Calls where the number of arguments is too small
188       // will be discarded by the callback.)
189       {{CDM::CLibraryMaybeHardened, {"sprintf"}, std::nullopt, 2},
190        &CStringChecker::evalSprintf},
191       {{CDM::CLibraryMaybeHardened, {"snprintf"}, std::nullopt, 3},
192        &CStringChecker::evalSnprintf},
193   };
194 
195   // These require a bit of special handling.
196   CallDescription StdCopy{CDM::SimpleFunc, {"std", "copy"}, 3},
197       StdCopyBackward{CDM::SimpleFunc, {"std", "copy_backward"}, 3};
198 
199   FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
200   void evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
201   void evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
202   void evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
203   void evalBcopy(CheckerContext &C, const CallEvent &Call) const;
204   void evalCopyCommon(CheckerContext &C, const CallEvent &Call,
205                       ProgramStateRef state, SizeArgExpr Size,
206                       DestinationArgExpr Dest, SourceArgExpr Source,
207                       bool Restricted, bool IsMempcpy, CharKind CK) const;
208 
209   void evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
210 
211   void evalstrLength(CheckerContext &C, const CallEvent &Call) const;
212   void evalstrnLength(CheckerContext &C, const CallEvent &Call) const;
213   void evalstrLengthCommon(CheckerContext &C, const CallEvent &Call,
214                            bool IsStrnlen = false) const;
215 
216   void evalStrcpy(CheckerContext &C, const CallEvent &Call) const;
217   void evalStrncpy(CheckerContext &C, const CallEvent &Call) const;
218   void evalStpcpy(CheckerContext &C, const CallEvent &Call) const;
219   void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const;
220   void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
221                         bool ReturnEnd, bool IsBounded, ConcatFnKind appendK,
222                         bool returnPtr = true) const;
223 
224   void evalStrcat(CheckerContext &C, const CallEvent &Call) const;
225   void evalStrncat(CheckerContext &C, const CallEvent &Call) const;
226   void evalStrlcat(CheckerContext &C, const CallEvent &Call) const;
227 
228   void evalStrcmp(CheckerContext &C, const CallEvent &Call) const;
229   void evalStrncmp(CheckerContext &C, const CallEvent &Call) const;
230   void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const;
231   void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const;
232   void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
233                         bool IsBounded = false, bool IgnoreCase = false) const;
234 
235   void evalStrsep(CheckerContext &C, const CallEvent &Call) const;
236 
237   void evalStdCopy(CheckerContext &C, const CallEvent &Call) const;
238   void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const;
239   void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const;
240   void evalMemset(CheckerContext &C, const CallEvent &Call) const;
241   void evalBzero(CheckerContext &C, const CallEvent &Call) const;
242 
243   void evalSprintf(CheckerContext &C, const CallEvent &Call) const;
244   void evalSnprintf(CheckerContext &C, const CallEvent &Call) const;
245   void evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
246                          bool IsBounded) const;
247 
248   // Utility methods
249   std::pair<ProgramStateRef , ProgramStateRef >
250   static assumeZero(CheckerContext &C,
251                     ProgramStateRef state, SVal V, QualType Ty);
252 
253   static ProgramStateRef setCStringLength(ProgramStateRef state,
254                                               const MemRegion *MR,
255                                               SVal strLength);
256   static SVal getCStringLengthForRegion(CheckerContext &C,
257                                         ProgramStateRef &state,
258                                         const Expr *Ex,
259                                         const MemRegion *MR,
260                                         bool hypothetical);
261   SVal getCStringLength(CheckerContext &C,
262                         ProgramStateRef &state,
263                         const Expr *Ex,
264                         SVal Buf,
265                         bool hypothetical = false) const;
266 
267   const StringLiteral *getCStringLiteral(CheckerContext &C,
268                                          ProgramStateRef &state,
269                                          const Expr *expr,
270                                          SVal val) const;
271 
272   /// Invalidate the destination buffer determined by characters copied.
273   static ProgramStateRef
274   invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
275                                     const Expr *BufE, SVal BufV, SVal SizeV,
276                                     QualType SizeTy);
277 
278   /// Operation never overflows, do not invalidate the super region.
279   static ProgramStateRef invalidateDestinationBufferNeverOverflows(
280       CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
281 
282   /// We do not know whether the operation can overflow (e.g. size is unknown),
283   /// invalidate the super region and escape related pointers.
284   static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
285       CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
286 
287   /// Invalidate the source buffer for escaping pointers.
288   static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
289                                                 ProgramStateRef S,
290                                                 const Expr *BufE, SVal BufV);
291 
292   /// @param InvalidationTraitOperations Determine how to invlidate the
293   /// MemRegion by setting the invalidation traits. Return true to cause pointer
294   /// escape, or false otherwise.
295   static ProgramStateRef invalidateBufferAux(
296       CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V,
297       llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
298                               const MemRegion *)>
299           InvalidationTraitOperations);
300 
301   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
302                               const MemRegion *MR);
303 
304   static bool memsetAux(const Expr *DstBuffer, SVal CharE,
305                         const Expr *Size, CheckerContext &C,
306                         ProgramStateRef &State);
307 
308   // Re-usable checks
309   ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
310                                AnyArgExpr Arg, SVal l) const;
311   // Check whether the origin region behind \p Element (like the actual array
312   // region \p Element is from) is initialized.
313   ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
314                             AnyArgExpr Buffer, SVal Element, SVal Size) const;
315   ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
316                                 AnyArgExpr Buffer, SVal Element,
317                                 AccessKind Access,
318                                 CharKind CK = CharKind::Regular) const;
319   ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
320                                     AnyArgExpr Buffer, SizeArgExpr Size,
321                                     AccessKind Access,
322                                     CharKind CK = CharKind::Regular) const;
323   ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
324                                SizeArgExpr Size, AnyArgExpr First,
325                                AnyArgExpr Second,
326                                CharKind CK = CharKind::Regular) const;
327   void emitOverlapBug(CheckerContext &C,
328                       ProgramStateRef state,
329                       const Stmt *First,
330                       const Stmt *Second) const;
331 
332   void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
333                       StringRef WarningMsg) const;
334   void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
335                           const Stmt *S, StringRef WarningMsg) const;
336   void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
337                          const Stmt *S, StringRef WarningMsg) const;
338   void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
339   void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
340                                 const Expr *E, StringRef Msg) const;
341   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
342                                             ProgramStateRef state,
343                                             NonLoc left,
344                                             NonLoc right) const;
345 
346   // Return true if the destination buffer of the copy function may be in bound.
347   // Expects SVal of Size to be positive and unsigned.
348   // Expects SVal of FirstBuf to be a FieldRegion.
349   static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
350                                 SVal BufVal, QualType BufTy, SVal LengthVal,
351                                 QualType LengthTy);
352 };
353 
354 } //end anonymous namespace
355 
REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength,const MemRegion *,SVal)356 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
357 
358 //===----------------------------------------------------------------------===//
359 // Individual checks and utility methods.
360 //===----------------------------------------------------------------------===//
361 
362 std::pair<ProgramStateRef, ProgramStateRef>
363 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
364                            QualType Ty) {
365   std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
366   if (!val)
367     return std::pair<ProgramStateRef, ProgramStateRef>(State, State);
368 
369   SValBuilder &svalBuilder = C.getSValBuilder();
370   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
371   return State->assume(svalBuilder.evalEQ(State, *val, zero));
372 }
373 
checkNonNull(CheckerContext & C,ProgramStateRef State,AnyArgExpr Arg,SVal l) const374 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
375                                              ProgramStateRef State,
376                                              AnyArgExpr Arg, SVal l) const {
377   // If a previous check has failed, propagate the failure.
378   if (!State)
379     return nullptr;
380 
381   ProgramStateRef stateNull, stateNonNull;
382   std::tie(stateNull, stateNonNull) =
383       assumeZero(C, State, l, Arg.Expression->getType());
384 
385   if (stateNull && !stateNonNull) {
386     if (Filter.CheckCStringNullArg) {
387       SmallString<80> buf;
388       llvm::raw_svector_ostream OS(buf);
389       assert(CurrentFunctionDescription);
390       OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
391          << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "
392          << CurrentFunctionDescription;
393 
394       emitNullArgBug(C, stateNull, Arg.Expression, OS.str());
395     }
396     return nullptr;
397   }
398 
399   // From here on, assume that the value is non-null.
400   assert(stateNonNull);
401   return stateNonNull;
402 }
403 
getIndex(ProgramStateRef State,const ElementRegion * ER,CharKind CK)404 static std::optional<NonLoc> getIndex(ProgramStateRef State,
405                                       const ElementRegion *ER, CharKind CK) {
406   SValBuilder &SVB = State->getStateManager().getSValBuilder();
407   ASTContext &Ctx = SVB.getContext();
408 
409   if (CK == CharKind::Regular) {
410     if (ER->getValueType() != Ctx.CharTy)
411       return {};
412     return ER->getIndex();
413   }
414 
415   if (ER->getValueType() != Ctx.WideCharTy)
416     return {};
417 
418   QualType SizeTy = Ctx.getSizeType();
419   NonLoc WideSize =
420       SVB.makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
421                      SizeTy)
422           .castAs<NonLoc>();
423   SVal Offset =
424       SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
425   if (Offset.isUnknown())
426     return {};
427   return Offset.castAs<NonLoc>();
428 }
429 
430 // Basically 1 -> 1st, 12 -> 12th, etc.
printIdxWithOrdinalSuffix(llvm::raw_ostream & Os,unsigned Idx)431 static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
432   Os << Idx << llvm::getOrdinalSuffix(Idx);
433 }
434 
checkInit(CheckerContext & C,ProgramStateRef State,AnyArgExpr Buffer,SVal Element,SVal Size) const435 ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
436                                           ProgramStateRef State,
437                                           AnyArgExpr Buffer, SVal Element,
438                                           SVal Size) const {
439 
440   // If a previous check has failed, propagate the failure.
441   if (!State)
442     return nullptr;
443 
444   const MemRegion *R = Element.getAsRegion();
445   const auto *ER = dyn_cast_or_null<ElementRegion>(R);
446   if (!ER)
447     return State;
448 
449   const auto *SuperR = ER->getSuperRegion()->getAs<TypedValueRegion>();
450   if (!SuperR)
451     return State;
452 
453   // FIXME: We ought to able to check objects as well. Maybe
454   // UninitializedObjectChecker could help?
455   if (!SuperR->getValueType()->isArrayType())
456     return State;
457 
458   SValBuilder &SVB = C.getSValBuilder();
459   ASTContext &Ctx = SVB.getContext();
460 
461   const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType());
462   const NonLoc Zero = SVB.makeZeroArrayIndex();
463 
464   std::optional<Loc> FirstElementVal =
465       State->getLValue(ElemTy, Zero, loc::MemRegionVal(SuperR)).getAs<Loc>();
466   if (!FirstElementVal)
467     return State;
468 
469   // Ensure that we wouldn't read uninitialized value.
470   if (Filter.CheckCStringUninitializedRead &&
471       State->getSVal(*FirstElementVal).isUndef()) {
472     llvm::SmallString<258> Buf;
473     llvm::raw_svector_ostream OS(Buf);
474     OS << "The first element of the ";
475     printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
476     OS << " argument is undefined";
477     emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
478     return nullptr;
479   }
480 
481   // We won't check whether the entire region is fully initialized -- lets just
482   // check that the first and the last element is. So, onto checking the last
483   // element:
484   const QualType IdxTy = SVB.getArrayIndexType();
485 
486   NonLoc ElemSize =
487       SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
488           .castAs<NonLoc>();
489 
490   // FIXME: Check that the size arg to the cstring function is divisible by
491   // size of the actual element type?
492 
493   // The type of the argument to the cstring function is either char or wchar,
494   // but thats not the type of the original array (or memory region).
495   // Suppose the following:
496   //   int t[5];
497   //   memcpy(dst, t, sizeof(t) / sizeof(t[0]));
498   // When checking whether t is fully initialized, we see it as char array of
499   // size sizeof(int)*5. If we check the last element as a character, we read
500   // the last byte of an integer, which will be undefined. But just because
501   // that value is undefined, it doesn't mean that the element is uninitialized!
502   // For this reason, we need to retrieve the actual last element with the
503   // correct type.
504 
505   // Divide the size argument to the cstring function by the actual element
506   // type. This value will be size of the array, or the index to the
507   // past-the-end element.
508   std::optional<NonLoc> Offset =
509       SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
510                       IdxTy)
511           .getAs<NonLoc>();
512 
513   // Retrieve the index of the last element.
514   const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs<NonLoc>();
515   SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
516 
517   if (!Offset)
518     return State;
519 
520   SVal LastElementVal =
521       State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(SuperR));
522   if (!isa<Loc>(LastElementVal))
523     return State;
524 
525   if (Filter.CheckCStringUninitializedRead &&
526       State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
527     const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
528     // If we can't get emit a sensible last element index, just bail out --
529     // prefer to emit nothing in favour of emitting garbage quality reports.
530     if (!IdxInt) {
531       C.addSink();
532       return nullptr;
533     }
534     llvm::SmallString<258> Buf;
535     llvm::raw_svector_ostream OS(Buf);
536     OS << "The last accessed element (at index ";
537     OS << IdxInt->getExtValue();
538     OS << ") in the ";
539     printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
540     OS << " argument is undefined";
541     emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
542     return nullptr;
543   }
544   return State;
545 }
546 
547 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
CheckLocation(CheckerContext & C,ProgramStateRef state,AnyArgExpr Buffer,SVal Element,AccessKind Access,CharKind CK) const548 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
549                                               ProgramStateRef state,
550                                               AnyArgExpr Buffer, SVal Element,
551                                               AccessKind Access,
552                                               CharKind CK) const {
553 
554   // If a previous check has failed, propagate the failure.
555   if (!state)
556     return nullptr;
557 
558   // Check for out of bound array element access.
559   const MemRegion *R = Element.getAsRegion();
560   if (!R)
561     return state;
562 
563   const auto *ER = dyn_cast<ElementRegion>(R);
564   if (!ER)
565     return state;
566 
567   // Get the index of the accessed element.
568   std::optional<NonLoc> Idx = getIndex(state, ER, CK);
569   if (!Idx)
570     return state;
571 
572   // Get the size of the array.
573   const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
574   DefinedOrUnknownSVal Size =
575       getDynamicExtent(state, superReg, C.getSValBuilder());
576 
577   auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
578   if (StOutBound && !StInBound) {
579     // These checks are either enabled by the CString out-of-bounds checker
580     // explicitly or implicitly by the Malloc checker.
581     // In the latter case we only do modeling but do not emit warning.
582     if (!Filter.CheckCStringOutOfBounds)
583       return nullptr;
584 
585     // Emit a bug report.
586     ErrorMessage Message =
587         createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
588     emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
589     return nullptr;
590   }
591 
592   // Array bound check succeeded.  From this point forward the array bound
593   // should always succeed.
594   return StInBound;
595 }
596 
597 ProgramStateRef
CheckBufferAccess(CheckerContext & C,ProgramStateRef State,AnyArgExpr Buffer,SizeArgExpr Size,AccessKind Access,CharKind CK) const598 CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
599                                   AnyArgExpr Buffer, SizeArgExpr Size,
600                                   AccessKind Access, CharKind CK) const {
601   // If a previous check has failed, propagate the failure.
602   if (!State)
603     return nullptr;
604 
605   SValBuilder &svalBuilder = C.getSValBuilder();
606   ASTContext &Ctx = svalBuilder.getContext();
607 
608   QualType SizeTy = Size.Expression->getType();
609   QualType PtrTy = getCharPtrType(Ctx, CK);
610 
611   // Check that the first buffer is non-null.
612   SVal BufVal = C.getSVal(Buffer.Expression);
613   State = checkNonNull(C, State, Buffer, BufVal);
614   if (!State)
615     return nullptr;
616 
617   // If out-of-bounds checking is turned off, skip the rest.
618   if (!Filter.CheckCStringOutOfBounds)
619     return State;
620 
621   SVal BufStart =
622       svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());
623 
624   // Check if the first byte of the buffer is accessible.
625   State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
626 
627   if (!State)
628     return nullptr;
629 
630   // Get the access length and make sure it is known.
631   // FIXME: This assumes the caller has already checked that the access length
632   // is positive. And that it's unsigned.
633   SVal LengthVal = C.getSVal(Size.Expression);
634   std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
635   if (!Length)
636     return State;
637 
638   // Compute the offset of the last element to be accessed: size-1.
639   NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();
640   SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);
641   if (Offset.isUnknown())
642     return nullptr;
643   NonLoc LastOffset = Offset.castAs<NonLoc>();
644 
645   // Check that the first buffer is sufficiently long.
646   if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
647 
648     SVal BufEnd =
649         svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
650     State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
651     if (Access == AccessKind::read)
652       State = checkInit(C, State, Buffer, BufEnd, *Length);
653 
654     // If the buffer isn't large enough, abort.
655     if (!State)
656       return nullptr;
657   }
658 
659   // Large enough or not, return this state!
660   return State;
661 }
662 
CheckOverlap(CheckerContext & C,ProgramStateRef state,SizeArgExpr Size,AnyArgExpr First,AnyArgExpr Second,CharKind CK) const663 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
664                                              ProgramStateRef state,
665                                              SizeArgExpr Size, AnyArgExpr First,
666                                              AnyArgExpr Second,
667                                              CharKind CK) const {
668   if (!Filter.CheckCStringBufferOverlap)
669     return state;
670 
671   // Do a simple check for overlap: if the two arguments are from the same
672   // buffer, see if the end of the first is greater than the start of the second
673   // or vice versa.
674 
675   // If a previous check has failed, propagate the failure.
676   if (!state)
677     return nullptr;
678 
679   ProgramStateRef stateTrue, stateFalse;
680 
681   // Assume different address spaces cannot overlap.
682   if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
683       Second.Expression->getType()->getPointeeType().getAddressSpace())
684     return state;
685 
686   // Get the buffer values and make sure they're known locations.
687   const LocationContext *LCtx = C.getLocationContext();
688   SVal firstVal = state->getSVal(First.Expression, LCtx);
689   SVal secondVal = state->getSVal(Second.Expression, LCtx);
690 
691   std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
692   if (!firstLoc)
693     return state;
694 
695   std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
696   if (!secondLoc)
697     return state;
698 
699   // Are the two values the same?
700   SValBuilder &svalBuilder = C.getSValBuilder();
701   std::tie(stateTrue, stateFalse) =
702       state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
703 
704   if (stateTrue && !stateFalse) {
705     // If the values are known to be equal, that's automatically an overlap.
706     emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
707     return nullptr;
708   }
709 
710   // assume the two expressions are not equal.
711   assert(stateFalse);
712   state = stateFalse;
713 
714   // Which value comes first?
715   QualType cmpTy = svalBuilder.getConditionType();
716   SVal reverse =
717       svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);
718   std::optional<DefinedOrUnknownSVal> reverseTest =
719       reverse.getAs<DefinedOrUnknownSVal>();
720   if (!reverseTest)
721     return state;
722 
723   std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
724   if (stateTrue) {
725     if (stateFalse) {
726       // If we don't know which one comes first, we can't perform this test.
727       return state;
728     } else {
729       // Switch the values so that firstVal is before secondVal.
730       std::swap(firstLoc, secondLoc);
731 
732       // Switch the Exprs as well, so that they still correspond.
733       std::swap(First, Second);
734     }
735   }
736 
737   // Get the length, and make sure it too is known.
738   SVal LengthVal = state->getSVal(Size.Expression, LCtx);
739   std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
740   if (!Length)
741     return state;
742 
743   // Convert the first buffer's start address to char*.
744   // Bail out if the cast fails.
745   ASTContext &Ctx = svalBuilder.getContext();
746   QualType CharPtrTy = getCharPtrType(Ctx, CK);
747   SVal FirstStart =
748       svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());
749   std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
750   if (!FirstStartLoc)
751     return state;
752 
753   // Compute the end of the first buffer. Bail out if THAT fails.
754   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,
755                                           *Length, CharPtrTy);
756   std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
757   if (!FirstEndLoc)
758     return state;
759 
760   // Is the end of the first buffer past the start of the second buffer?
761   SVal Overlap =
762       svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);
763   std::optional<DefinedOrUnknownSVal> OverlapTest =
764       Overlap.getAs<DefinedOrUnknownSVal>();
765   if (!OverlapTest)
766     return state;
767 
768   std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
769 
770   if (stateTrue && !stateFalse) {
771     // Overlap!
772     emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
773     return nullptr;
774   }
775 
776   // assume the two expressions don't overlap.
777   assert(stateFalse);
778   return stateFalse;
779 }
780 
emitOverlapBug(CheckerContext & C,ProgramStateRef state,const Stmt * First,const Stmt * Second) const781 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
782                                   const Stmt *First, const Stmt *Second) const {
783   ExplodedNode *N = C.generateErrorNode(state);
784   if (!N)
785     return;
786 
787   if (!BT_Overlap)
788     BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
789                                  categories::UnixAPI, "Improper arguments"));
790 
791   // Generate a report for this bug.
792   auto report = std::make_unique<PathSensitiveBugReport>(
793       *BT_Overlap, "Arguments must not be overlapping buffers", N);
794   report->addRange(First->getSourceRange());
795   report->addRange(Second->getSourceRange());
796 
797   C.emitReport(std::move(report));
798 }
799 
emitNullArgBug(CheckerContext & C,ProgramStateRef State,const Stmt * S,StringRef WarningMsg) const800 void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
801                                     const Stmt *S, StringRef WarningMsg) const {
802   if (ExplodedNode *N = C.generateErrorNode(State)) {
803     if (!BT_Null) {
804       // FIXME: This call uses the string constant 'categories::UnixAPI' as the
805       // description of the bug; it should be replaced by a real description.
806       BT_Null.reset(
807           new BugType(Filter.CheckNameCStringNullArg, categories::UnixAPI));
808     }
809 
810     auto Report =
811         std::make_unique<PathSensitiveBugReport>(*BT_Null, WarningMsg, N);
812     Report->addRange(S->getSourceRange());
813     if (const auto *Ex = dyn_cast<Expr>(S))
814       bugreporter::trackExpressionValue(N, Ex, *Report);
815     C.emitReport(std::move(Report));
816   }
817 }
818 
emitUninitializedReadBug(CheckerContext & C,ProgramStateRef State,const Expr * E,StringRef Msg) const819 void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
820                                               ProgramStateRef State,
821                                               const Expr *E,
822                                               StringRef Msg) const {
823   if (ExplodedNode *N = C.generateErrorNode(State)) {
824     if (!BT_UninitRead)
825       BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead,
826                                       "Accessing unitialized/garbage values"));
827 
828     auto Report =
829         std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N);
830     Report->addNote("Other elements might also be undefined",
831                     Report->getLocation());
832     Report->addRange(E->getSourceRange());
833     bugreporter::trackExpressionValue(N, E, *Report);
834     C.emitReport(std::move(Report));
835   }
836 }
837 
emitOutOfBoundsBug(CheckerContext & C,ProgramStateRef State,const Stmt * S,StringRef WarningMsg) const838 void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
839                                         ProgramStateRef State, const Stmt *S,
840                                         StringRef WarningMsg) const {
841   if (ExplodedNode *N = C.generateErrorNode(State)) {
842     if (!BT_Bounds)
843       BT_Bounds.reset(new BugType(Filter.CheckCStringOutOfBounds
844                                       ? Filter.CheckNameCStringOutOfBounds
845                                       : Filter.CheckNameCStringNullArg,
846                                   "Out-of-bound array access"));
847 
848     // FIXME: It would be nice to eventually make this diagnostic more clear,
849     // e.g., by referencing the original declaration or by saying *why* this
850     // reference is outside the range.
851     auto Report =
852         std::make_unique<PathSensitiveBugReport>(*BT_Bounds, WarningMsg, N);
853     Report->addRange(S->getSourceRange());
854     C.emitReport(std::move(Report));
855   }
856 }
857 
emitNotCStringBug(CheckerContext & C,ProgramStateRef State,const Stmt * S,StringRef WarningMsg) const858 void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
859                                        const Stmt *S,
860                                        StringRef WarningMsg) const {
861   if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
862     if (!BT_NotCString) {
863       // FIXME: This call uses the string constant 'categories::UnixAPI' as the
864       // description of the bug; it should be replaced by a real description.
865       BT_NotCString.reset(
866           new BugType(Filter.CheckNameCStringNotNullTerm, categories::UnixAPI));
867     }
868 
869     auto Report =
870         std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N);
871 
872     Report->addRange(S->getSourceRange());
873     C.emitReport(std::move(Report));
874   }
875 }
876 
emitAdditionOverflowBug(CheckerContext & C,ProgramStateRef State) const877 void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
878                                              ProgramStateRef State) const {
879   if (ExplodedNode *N = C.generateErrorNode(State)) {
880     if (!BT_AdditionOverflow) {
881       // FIXME: This call uses the word "API" as the description of the bug;
882       // it should be replaced by a better error message (if this unlikely
883       // situation continues to exist as a separate bug type).
884       BT_AdditionOverflow.reset(
885           new BugType(Filter.CheckNameCStringOutOfBounds, "API"));
886     }
887 
888     // This isn't a great error message, but this should never occur in real
889     // code anyway -- you'd have to create a buffer longer than a size_t can
890     // represent, which is sort of a contradiction.
891     const char *WarningMsg =
892         "This expression will create a string whose length is too big to "
893         "be represented as a size_t";
894 
895     auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow,
896                                                            WarningMsg, N);
897     C.emitReport(std::move(Report));
898   }
899 }
900 
checkAdditionOverflow(CheckerContext & C,ProgramStateRef state,NonLoc left,NonLoc right) const901 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
902                                                      ProgramStateRef state,
903                                                      NonLoc left,
904                                                      NonLoc right) const {
905   // If out-of-bounds checking is turned off, skip the rest.
906   if (!Filter.CheckCStringOutOfBounds)
907     return state;
908 
909   // If a previous check has failed, propagate the failure.
910   if (!state)
911     return nullptr;
912 
913   SValBuilder &svalBuilder = C.getSValBuilder();
914   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
915 
916   QualType sizeTy = svalBuilder.getContext().getSizeType();
917   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
918   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
919 
920   SVal maxMinusRight;
921   if (isa<nonloc::ConcreteInt>(right)) {
922     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
923                                                  sizeTy);
924   } else {
925     // Try switching the operands. (The order of these two assignments is
926     // important!)
927     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
928                                             sizeTy);
929     left = right;
930   }
931 
932   if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
933     QualType cmpTy = svalBuilder.getConditionType();
934     // If left > max - right, we have an overflow.
935     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
936                                                 *maxMinusRightNL, cmpTy);
937 
938     ProgramStateRef stateOverflow, stateOkay;
939     std::tie(stateOverflow, stateOkay) =
940       state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
941 
942     if (stateOverflow && !stateOkay) {
943       // We have an overflow. Emit a bug report.
944       emitAdditionOverflowBug(C, stateOverflow);
945       return nullptr;
946     }
947 
948     // From now on, assume an overflow didn't occur.
949     assert(stateOkay);
950     state = stateOkay;
951   }
952 
953   return state;
954 }
955 
setCStringLength(ProgramStateRef state,const MemRegion * MR,SVal strLength)956 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
957                                                 const MemRegion *MR,
958                                                 SVal strLength) {
959   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
960 
961   MR = MR->StripCasts();
962 
963   switch (MR->getKind()) {
964   case MemRegion::StringRegionKind:
965     // FIXME: This can happen if we strcpy() into a string region. This is
966     // undefined [C99 6.4.5p6], but we should still warn about it.
967     return state;
968 
969   case MemRegion::SymbolicRegionKind:
970   case MemRegion::AllocaRegionKind:
971   case MemRegion::NonParamVarRegionKind:
972   case MemRegion::ParamVarRegionKind:
973   case MemRegion::FieldRegionKind:
974   case MemRegion::ObjCIvarRegionKind:
975     // These are the types we can currently track string lengths for.
976     break;
977 
978   case MemRegion::ElementRegionKind:
979     // FIXME: Handle element regions by upper-bounding the parent region's
980     // string length.
981     return state;
982 
983   default:
984     // Other regions (mostly non-data) can't have a reliable C string length.
985     // For now, just ignore the change.
986     // FIXME: These are rare but not impossible. We should output some kind of
987     // warning for things like strcpy((char[]){'a', 0}, "b");
988     return state;
989   }
990 
991   if (strLength.isUnknown())
992     return state->remove<CStringLength>(MR);
993 
994   return state->set<CStringLength>(MR, strLength);
995 }
996 
getCStringLengthForRegion(CheckerContext & C,ProgramStateRef & state,const Expr * Ex,const MemRegion * MR,bool hypothetical)997 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
998                                                ProgramStateRef &state,
999                                                const Expr *Ex,
1000                                                const MemRegion *MR,
1001                                                bool hypothetical) {
1002   if (!hypothetical) {
1003     // If there's a recorded length, go ahead and return it.
1004     const SVal *Recorded = state->get<CStringLength>(MR);
1005     if (Recorded)
1006       return *Recorded;
1007   }
1008 
1009   // Otherwise, get a new symbol and update the state.
1010   SValBuilder &svalBuilder = C.getSValBuilder();
1011   QualType sizeTy = svalBuilder.getContext().getSizeType();
1012   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
1013                                                     MR, Ex, sizeTy,
1014                                                     C.getLocationContext(),
1015                                                     C.blockCount());
1016 
1017   if (!hypothetical) {
1018     if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
1019       // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
1020       BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
1021       const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
1022       llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
1023       const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
1024                                                         fourInt);
1025       NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
1026       SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, maxLength,
1027                                                 svalBuilder.getConditionType());
1028       state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
1029     }
1030     state = state->set<CStringLength>(MR, strLength);
1031   }
1032 
1033   return strLength;
1034 }
1035 
getCStringLength(CheckerContext & C,ProgramStateRef & state,const Expr * Ex,SVal Buf,bool hypothetical) const1036 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
1037                                       const Expr *Ex, SVal Buf,
1038                                       bool hypothetical) const {
1039   const MemRegion *MR = Buf.getAsRegion();
1040   if (!MR) {
1041     // If we can't get a region, see if it's something we /know/ isn't a
1042     // C string. In the context of locations, the only time we can issue such
1043     // a warning is for labels.
1044     if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
1045       if (Filter.CheckCStringNotNullTerm) {
1046         SmallString<120> buf;
1047         llvm::raw_svector_ostream os(buf);
1048         assert(CurrentFunctionDescription);
1049         os << "Argument to " << CurrentFunctionDescription
1050            << " is the address of the label '" << Label->getLabel()->getName()
1051            << "', which is not a null-terminated string";
1052 
1053         emitNotCStringBug(C, state, Ex, os.str());
1054       }
1055       return UndefinedVal();
1056     }
1057 
1058     // If it's not a region and not a label, give up.
1059     return UnknownVal();
1060   }
1061 
1062   // If we have a region, strip casts from it and see if we can figure out
1063   // its length. For anything we can't figure out, just return UnknownVal.
1064   MR = MR->StripCasts();
1065 
1066   switch (MR->getKind()) {
1067   case MemRegion::StringRegionKind: {
1068     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
1069     // so we can assume that the byte length is the correct C string length.
1070     SValBuilder &svalBuilder = C.getSValBuilder();
1071     QualType sizeTy = svalBuilder.getContext().getSizeType();
1072     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
1073     return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);
1074   }
1075   case MemRegion::NonParamVarRegionKind: {
1076     // If we have a global constant with a string literal initializer,
1077     // compute the initializer's length.
1078     const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
1079     if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) {
1080       if (const Expr *Init = Decl->getInit()) {
1081         if (auto *StrLit = dyn_cast<StringLiteral>(Init)) {
1082           SValBuilder &SvalBuilder = C.getSValBuilder();
1083           QualType SizeTy = SvalBuilder.getContext().getSizeType();
1084           return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy);
1085         }
1086       }
1087     }
1088     [[fallthrough]];
1089   }
1090   case MemRegion::SymbolicRegionKind:
1091   case MemRegion::AllocaRegionKind:
1092   case MemRegion::ParamVarRegionKind:
1093   case MemRegion::FieldRegionKind:
1094   case MemRegion::ObjCIvarRegionKind:
1095     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
1096   case MemRegion::CompoundLiteralRegionKind:
1097     // FIXME: Can we track this? Is it necessary?
1098     return UnknownVal();
1099   case MemRegion::ElementRegionKind:
1100     // FIXME: How can we handle this? It's not good enough to subtract the
1101     // offset from the base string length; consider "123\x00567" and &a[5].
1102     return UnknownVal();
1103   default:
1104     // Other regions (mostly non-data) can't have a reliable C string length.
1105     // In this case, an error is emitted and UndefinedVal is returned.
1106     // The caller should always be prepared to handle this case.
1107     if (Filter.CheckCStringNotNullTerm) {
1108       SmallString<120> buf;
1109       llvm::raw_svector_ostream os(buf);
1110 
1111       assert(CurrentFunctionDescription);
1112       os << "Argument to " << CurrentFunctionDescription << " is ";
1113 
1114       if (SummarizeRegion(os, C.getASTContext(), MR))
1115         os << ", which is not a null-terminated string";
1116       else
1117         os << "not a null-terminated string";
1118 
1119       emitNotCStringBug(C, state, Ex, os.str());
1120     }
1121     return UndefinedVal();
1122   }
1123 }
1124 
getCStringLiteral(CheckerContext & C,ProgramStateRef & state,const Expr * expr,SVal val) const1125 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
1126   ProgramStateRef &state, const Expr *expr, SVal val) const {
1127 
1128   // Get the memory region pointed to by the val.
1129   const MemRegion *bufRegion = val.getAsRegion();
1130   if (!bufRegion)
1131     return nullptr;
1132 
1133   // Strip casts off the memory region.
1134   bufRegion = bufRegion->StripCasts();
1135 
1136   // Cast the memory region to a string region.
1137   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
1138   if (!strRegion)
1139     return nullptr;
1140 
1141   // Return the actual string in the string region.
1142   return strRegion->getStringLiteral();
1143 }
1144 
isFirstBufInBound(CheckerContext & C,ProgramStateRef State,SVal BufVal,QualType BufTy,SVal LengthVal,QualType LengthTy)1145 bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
1146                                        SVal BufVal, QualType BufTy,
1147                                        SVal LengthVal, QualType LengthTy) {
1148   // If we do not know that the buffer is long enough we return 'true'.
1149   // Otherwise the parent region of this field region would also get
1150   // invalidated, which would lead to warnings based on an unknown state.
1151 
1152   if (LengthVal.isUnknown())
1153     return false;
1154 
1155   // Originally copied from CheckBufferAccess and CheckLocation.
1156   SValBuilder &SB = C.getSValBuilder();
1157   ASTContext &Ctx = C.getASTContext();
1158 
1159   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
1160 
1161   std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
1162   if (!Length)
1163     return true; // cf top comment.
1164 
1165   // Compute the offset of the last element to be accessed: size-1.
1166   NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
1167   SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
1168   if (Offset.isUnknown())
1169     return true; // cf top comment
1170   NonLoc LastOffset = Offset.castAs<NonLoc>();
1171 
1172   // Check that the first buffer is sufficiently long.
1173   SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
1174   std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
1175   if (!BufLoc)
1176     return true; // cf top comment.
1177 
1178   SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
1179 
1180   // Check for out of bound array element access.
1181   const MemRegion *R = BufEnd.getAsRegion();
1182   if (!R)
1183     return true; // cf top comment.
1184 
1185   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
1186   if (!ER)
1187     return true; // cf top comment.
1188 
1189   // FIXME: Does this crash when a non-standard definition
1190   // of a library function is encountered?
1191   assert(ER->getValueType() == C.getASTContext().CharTy &&
1192          "isFirstBufInBound should only be called with char* ElementRegions");
1193 
1194   // Get the size of the array.
1195   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1196   DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
1197 
1198   // Get the index of the accessed element.
1199   DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1200 
1201   ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
1202 
1203   return static_cast<bool>(StInBound);
1204 }
1205 
invalidateDestinationBufferBySize(CheckerContext & C,ProgramStateRef S,const Expr * BufE,SVal BufV,SVal SizeV,QualType SizeTy)1206 ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1207     CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV,
1208     SVal SizeV, QualType SizeTy) {
1209   auto InvalidationTraitOperations =
1210       [&C, S, BufTy = BufE->getType(), BufV, SizeV,
1211        SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1212         // If destination buffer is a field region and access is in bound, do
1213         // not invalidate its super region.
1214         if (MemRegion::FieldRegionKind == R->getKind() &&
1215             isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
1216           ITraits.setTrait(
1217               R,
1218               RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1219         }
1220         return false;
1221       };
1222 
1223   return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1224 }
1225 
1226 ProgramStateRef
invalidateDestinationBufferAlwaysEscapeSuperRegion(CheckerContext & C,ProgramStateRef S,const Expr * BufE,SVal BufV)1227 CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1228     CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1229   auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1230                                         const MemRegion *R) {
1231     return isa<FieldRegion>(R);
1232   };
1233 
1234   return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1235 }
1236 
invalidateDestinationBufferNeverOverflows(CheckerContext & C,ProgramStateRef S,const Expr * BufE,SVal BufV)1237 ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1238     CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1239   auto InvalidationTraitOperations =
1240       [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1241         if (MemRegion::FieldRegionKind == R->getKind())
1242           ITraits.setTrait(
1243               R,
1244               RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1245         return false;
1246       };
1247 
1248   return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1249 }
1250 
invalidateSourceBuffer(CheckerContext & C,ProgramStateRef S,const Expr * BufE,SVal BufV)1251 ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1252                                                        ProgramStateRef S,
1253                                                        const Expr *BufE,
1254                                                        SVal BufV) {
1255   auto InvalidationTraitOperations =
1256       [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1257         ITraits.setTrait(
1258             R->getBaseRegion(),
1259             RegionAndSymbolInvalidationTraits::TK_PreserveContents);
1260         ITraits.setTrait(R,
1261                          RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
1262         return true;
1263       };
1264 
1265   return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1266 }
1267 
invalidateBufferAux(CheckerContext & C,ProgramStateRef State,const Expr * E,SVal V,llvm::function_ref<bool (RegionAndSymbolInvalidationTraits &,const MemRegion *)> InvalidationTraitOperations)1268 ProgramStateRef CStringChecker::invalidateBufferAux(
1269     CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V,
1270     llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1271                             const MemRegion *)>
1272         InvalidationTraitOperations) {
1273   std::optional<Loc> L = V.getAs<Loc>();
1274   if (!L)
1275     return State;
1276 
1277   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1278   // some assumptions about the value that CFRefCount can't. Even so, it should
1279   // probably be refactored.
1280   if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1281     const MemRegion *R = MR->getRegion()->StripCasts();
1282 
1283     // Are we dealing with an ElementRegion?  If so, we should be invalidating
1284     // the super-region.
1285     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
1286       R = ER->getSuperRegion();
1287       // FIXME: What about layers of ElementRegions?
1288     }
1289 
1290     // Invalidate this region.
1291     const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1292     RegionAndSymbolInvalidationTraits ITraits;
1293     bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
1294 
1295     return State->invalidateRegions(R, E, C.blockCount(), LCtx,
1296                                     CausesPointerEscape, nullptr, nullptr,
1297                                     &ITraits);
1298   }
1299 
1300   // If we have a non-region value by chance, just remove the binding.
1301   // FIXME: is this necessary or correct? This handles the non-Region
1302   //  cases.  Is it ever valid to store to these?
1303   return State->killBinding(*L);
1304 }
1305 
SummarizeRegion(raw_ostream & os,ASTContext & Ctx,const MemRegion * MR)1306 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1307                                      const MemRegion *MR) {
1308   switch (MR->getKind()) {
1309   case MemRegion::FunctionCodeRegionKind: {
1310     if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())
1311       os << "the address of the function '" << *FD << '\'';
1312     else
1313       os << "the address of a function";
1314     return true;
1315   }
1316   case MemRegion::BlockCodeRegionKind:
1317     os << "block text";
1318     return true;
1319   case MemRegion::BlockDataRegionKind:
1320     os << "a block";
1321     return true;
1322   case MemRegion::CXXThisRegionKind:
1323   case MemRegion::CXXTempObjectRegionKind:
1324     os << "a C++ temp object of type "
1325        << cast<TypedValueRegion>(MR)->getValueType();
1326     return true;
1327   case MemRegion::NonParamVarRegionKind:
1328     os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();
1329     return true;
1330   case MemRegion::ParamVarRegionKind:
1331     os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();
1332     return true;
1333   case MemRegion::FieldRegionKind:
1334     os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();
1335     return true;
1336   case MemRegion::ObjCIvarRegionKind:
1337     os << "an instance variable of type "
1338        << cast<TypedValueRegion>(MR)->getValueType();
1339     return true;
1340   default:
1341     return false;
1342   }
1343 }
1344 
memsetAux(const Expr * DstBuffer,SVal CharVal,const Expr * Size,CheckerContext & C,ProgramStateRef & State)1345 bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
1346                                const Expr *Size, CheckerContext &C,
1347                                ProgramStateRef &State) {
1348   SVal MemVal = C.getSVal(DstBuffer);
1349   SVal SizeVal = C.getSVal(Size);
1350   const MemRegion *MR = MemVal.getAsRegion();
1351   if (!MR)
1352     return false;
1353 
1354   // We're about to model memset by producing a "default binding" in the Store.
1355   // Our current implementation - RegionStore - doesn't support default bindings
1356   // that don't cover the whole base region. So we should first get the offset
1357   // and the base region to figure out whether the offset of buffer is 0.
1358   RegionOffset Offset = MR->getAsOffset();
1359   const MemRegion *BR = Offset.getRegion();
1360 
1361   std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1362   if (!SizeNL)
1363     return false;
1364 
1365   SValBuilder &svalBuilder = C.getSValBuilder();
1366   ASTContext &Ctx = C.getASTContext();
1367 
1368   // void *memset(void *dest, int ch, size_t count);
1369   // For now we can only handle the case of offset is 0 and concrete char value.
1370   if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1371       Offset.getOffset() == 0) {
1372     // Get the base region's size.
1373     DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
1374 
1375     ProgramStateRef StateWholeReg, StateNotWholeReg;
1376     std::tie(StateWholeReg, StateNotWholeReg) =
1377         State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
1378 
1379     // With the semantic of 'memset()', we should convert the CharVal to
1380     // unsigned char.
1381     CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
1382 
1383     ProgramStateRef StateNullChar, StateNonNullChar;
1384     std::tie(StateNullChar, StateNonNullChar) =
1385         assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
1386 
1387     if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1388         !StateNonNullChar) {
1389       // If the 'memset()' acts on the whole region of destination buffer and
1390       // the value of the second argument of 'memset()' is zero, bind the second
1391       // argument's value to the destination buffer with 'default binding'.
1392       // FIXME: Since there is no perfect way to bind the non-zero character, we
1393       // can only deal with zero value here. In the future, we need to deal with
1394       // the binding of non-zero value in the case of whole region.
1395       State = State->bindDefaultZero(svalBuilder.makeLoc(BR),
1396                                      C.getLocationContext());
1397     } else {
1398       // If the destination buffer's extent is not equal to the value of
1399       // third argument, just invalidate buffer.
1400       State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1401                                                 SizeVal, Size->getType());
1402     }
1403 
1404     if (StateNullChar && !StateNonNullChar) {
1405       // If the value of the second argument of 'memset()' is zero, set the
1406       // string length of destination buffer to 0 directly.
1407       State = setCStringLength(State, MR,
1408                                svalBuilder.makeZeroVal(Ctx.getSizeType()));
1409     } else if (!StateNullChar && StateNonNullChar) {
1410       SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1411           CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
1412           C.getLocationContext(), C.blockCount());
1413 
1414       // If the value of second argument is not zero, then the string length
1415       // is at least the size argument.
1416       SVal NewStrLenGESize = svalBuilder.evalBinOp(
1417           State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
1418 
1419       State = setCStringLength(
1420           State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
1421           MR, NewStrLen);
1422     }
1423   } else {
1424     // If the offset is not zero and char value is not concrete, we can do
1425     // nothing but invalidate the buffer.
1426     State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1427                                               SizeVal, Size->getType());
1428   }
1429   return true;
1430 }
1431 
1432 //===----------------------------------------------------------------------===//
1433 // evaluation of individual function calls.
1434 //===----------------------------------------------------------------------===//
1435 
evalCopyCommon(CheckerContext & C,const CallEvent & Call,ProgramStateRef state,SizeArgExpr Size,DestinationArgExpr Dest,SourceArgExpr Source,bool Restricted,bool IsMempcpy,CharKind CK) const1436 void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,
1437                                     ProgramStateRef state, SizeArgExpr Size,
1438                                     DestinationArgExpr Dest,
1439                                     SourceArgExpr Source, bool Restricted,
1440                                     bool IsMempcpy, CharKind CK) const {
1441   CurrentFunctionDescription = "memory copy function";
1442 
1443   // See if the size argument is zero.
1444   const LocationContext *LCtx = C.getLocationContext();
1445   SVal sizeVal = state->getSVal(Size.Expression, LCtx);
1446   QualType sizeTy = Size.Expression->getType();
1447 
1448   ProgramStateRef stateZeroSize, stateNonZeroSize;
1449   std::tie(stateZeroSize, stateNonZeroSize) =
1450       assumeZero(C, state, sizeVal, sizeTy);
1451 
1452   // Get the value of the Dest.
1453   SVal destVal = state->getSVal(Dest.Expression, LCtx);
1454 
1455   // If the size is zero, there won't be any actual memory access, so
1456   // just bind the return value to the destination buffer and return.
1457   if (stateZeroSize && !stateNonZeroSize) {
1458     stateZeroSize =
1459         stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1460     C.addTransition(stateZeroSize);
1461     return;
1462   }
1463 
1464   // If the size can be nonzero, we have to check the other arguments.
1465   if (stateNonZeroSize) {
1466     // TODO: If Size is tainted and we cannot prove that it is smaller or equal
1467     // to the size of the destination buffer, then emit a warning
1468     // that an attacker may provoke a buffer overflow error.
1469     state = stateNonZeroSize;
1470 
1471     // Ensure the destination is not null. If it is NULL there will be a
1472     // NULL pointer dereference.
1473     state = checkNonNull(C, state, Dest, destVal);
1474     if (!state)
1475       return;
1476 
1477     // Get the value of the Src.
1478     SVal srcVal = state->getSVal(Source.Expression, LCtx);
1479 
1480     // Ensure the source is not null. If it is NULL there will be a
1481     // NULL pointer dereference.
1482     state = checkNonNull(C, state, Source, srcVal);
1483     if (!state)
1484       return;
1485 
1486     // Ensure the accesses are valid and that the buffers do not overlap.
1487     state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);
1488     state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);
1489 
1490     if (Restricted)
1491       state = CheckOverlap(C, state, Size, Dest, Source, CK);
1492 
1493     if (!state)
1494       return;
1495 
1496     // If this is mempcpy, get the byte after the last byte copied and
1497     // bind the expr.
1498     if (IsMempcpy) {
1499       // Get the byte after the last byte copied.
1500       SValBuilder &SvalBuilder = C.getSValBuilder();
1501       ASTContext &Ctx = SvalBuilder.getContext();
1502       QualType CharPtrTy = getCharPtrType(Ctx, CK);
1503       SVal DestRegCharVal =
1504           SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());
1505       SVal lastElement = C.getSValBuilder().evalBinOp(
1506           state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());
1507       // If we don't know how much we copied, we can at least
1508       // conjure a return value for later.
1509       if (lastElement.isUnknown())
1510         lastElement = C.getSValBuilder().conjureSymbolVal(
1511             nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
1512 
1513       // The byte after the last byte copied is the return value.
1514       state = state->BindExpr(Call.getOriginExpr(), LCtx, lastElement);
1515     } else {
1516       // All other copies return the destination buffer.
1517       // (Well, bcopy() has a void return type, but this won't hurt.)
1518       state = state->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1519     }
1520 
1521     // Invalidate the destination (regular invalidation without pointer-escaping
1522     // the address of the top-level region).
1523     // FIXME: Even if we can't perfectly model the copy, we should see if we
1524     // can use LazyCompoundVals to copy the source values into the destination.
1525     // This would probably remove any existing bindings past the end of the
1526     // copied region, but that's still an improvement over blank invalidation.
1527     state = invalidateDestinationBufferBySize(
1528         C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal,
1529         Size.Expression->getType());
1530 
1531     // Invalidate the source (const-invalidation without const-pointer-escaping
1532     // the address of the top-level region).
1533     state = invalidateSourceBuffer(C, state, Source.Expression,
1534                                    C.getSVal(Source.Expression));
1535 
1536     C.addTransition(state);
1537   }
1538 }
1539 
evalMemcpy(CheckerContext & C,const CallEvent & Call,CharKind CK) const1540 void CStringChecker::evalMemcpy(CheckerContext &C, const CallEvent &Call,
1541                                 CharKind CK) const {
1542   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1543   // The return value is the address of the destination buffer.
1544   DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1545   SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1546   SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1547 
1548   ProgramStateRef State = C.getState();
1549 
1550   constexpr bool IsRestricted = true;
1551   constexpr bool IsMempcpy = false;
1552   evalCopyCommon(C, Call, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);
1553 }
1554 
evalMempcpy(CheckerContext & C,const CallEvent & Call,CharKind CK) const1555 void CStringChecker::evalMempcpy(CheckerContext &C, const CallEvent &Call,
1556                                  CharKind CK) const {
1557   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1558   // The return value is a pointer to the byte following the last written byte.
1559   DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1560   SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1561   SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1562 
1563   constexpr bool IsRestricted = true;
1564   constexpr bool IsMempcpy = true;
1565   evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1566                  IsMempcpy, CK);
1567 }
1568 
evalMemmove(CheckerContext & C,const CallEvent & Call,CharKind CK) const1569 void CStringChecker::evalMemmove(CheckerContext &C, const CallEvent &Call,
1570                                  CharKind CK) const {
1571   // void *memmove(void *dst, const void *src, size_t n);
1572   // The return value is the address of the destination buffer.
1573   DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1574   SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1575   SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1576 
1577   constexpr bool IsRestricted = false;
1578   constexpr bool IsMempcpy = false;
1579   evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1580                  IsMempcpy, CK);
1581 }
1582 
evalBcopy(CheckerContext & C,const CallEvent & Call) const1583 void CStringChecker::evalBcopy(CheckerContext &C, const CallEvent &Call) const {
1584   // void bcopy(const void *src, void *dst, size_t n);
1585   SourceArgExpr Src{{Call.getArgExpr(0), 0}};
1586   DestinationArgExpr Dest = {{Call.getArgExpr(1), 1}};
1587   SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1588 
1589   constexpr bool IsRestricted = false;
1590   constexpr bool IsMempcpy = false;
1591   evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1592                  IsMempcpy, CharKind::Regular);
1593 }
1594 
evalMemcmp(CheckerContext & C,const CallEvent & Call,CharKind CK) const1595 void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call,
1596                                 CharKind CK) const {
1597   // int memcmp(const void *s1, const void *s2, size_t n);
1598   CurrentFunctionDescription = "memory comparison function";
1599 
1600   AnyArgExpr Left = {Call.getArgExpr(0), 0};
1601   AnyArgExpr Right = {Call.getArgExpr(1), 1};
1602   SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1603 
1604   ProgramStateRef State = C.getState();
1605   SValBuilder &Builder = C.getSValBuilder();
1606   const LocationContext *LCtx = C.getLocationContext();
1607 
1608   // See if the size argument is zero.
1609   SVal sizeVal = State->getSVal(Size.Expression, LCtx);
1610   QualType sizeTy = Size.Expression->getType();
1611 
1612   ProgramStateRef stateZeroSize, stateNonZeroSize;
1613   std::tie(stateZeroSize, stateNonZeroSize) =
1614       assumeZero(C, State, sizeVal, sizeTy);
1615 
1616   // If the size can be zero, the result will be 0 in that case, and we don't
1617   // have to check either of the buffers.
1618   if (stateZeroSize) {
1619     State = stateZeroSize;
1620     State = State->BindExpr(Call.getOriginExpr(), LCtx,
1621                             Builder.makeZeroVal(Call.getResultType()));
1622     C.addTransition(State);
1623   }
1624 
1625   // If the size can be nonzero, we have to check the other arguments.
1626   if (stateNonZeroSize) {
1627     State = stateNonZeroSize;
1628     // If we know the two buffers are the same, we know the result is 0.
1629     // First, get the two buffers' addresses. Another checker will have already
1630     // made sure they're not undefined.
1631     DefinedOrUnknownSVal LV =
1632         State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1633     DefinedOrUnknownSVal RV =
1634         State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1635 
1636     // See if they are the same.
1637     ProgramStateRef SameBuffer, NotSameBuffer;
1638     std::tie(SameBuffer, NotSameBuffer) =
1639         State->assume(Builder.evalEQ(State, LV, RV));
1640 
1641     // If the two arguments are the same buffer, we know the result is 0,
1642     // and we only need to check one size.
1643     if (SameBuffer && !NotSameBuffer) {
1644       State = SameBuffer;
1645       State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);
1646       if (State) {
1647         State = SameBuffer->BindExpr(Call.getOriginExpr(), LCtx,
1648                                      Builder.makeZeroVal(Call.getResultType()));
1649         C.addTransition(State);
1650       }
1651       return;
1652     }
1653 
1654     // If the two arguments might be different buffers, we have to check
1655     // the size of both of them.
1656     assert(NotSameBuffer);
1657     State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);
1658     State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);
1659     if (State) {
1660       // The return value is the comparison result, which we don't know.
1661       SVal CmpV = Builder.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,
1662                                            C.blockCount());
1663       State = State->BindExpr(Call.getOriginExpr(), LCtx, CmpV);
1664       C.addTransition(State);
1665     }
1666   }
1667 }
1668 
evalstrLength(CheckerContext & C,const CallEvent & Call) const1669 void CStringChecker::evalstrLength(CheckerContext &C,
1670                                    const CallEvent &Call) const {
1671   // size_t strlen(const char *s);
1672   evalstrLengthCommon(C, Call, /* IsStrnlen = */ false);
1673 }
1674 
evalstrnLength(CheckerContext & C,const CallEvent & Call) const1675 void CStringChecker::evalstrnLength(CheckerContext &C,
1676                                     const CallEvent &Call) const {
1677   // size_t strnlen(const char *s, size_t maxlen);
1678   evalstrLengthCommon(C, Call, /* IsStrnlen = */ true);
1679 }
1680 
evalstrLengthCommon(CheckerContext & C,const CallEvent & Call,bool IsStrnlen) const1681 void CStringChecker::evalstrLengthCommon(CheckerContext &C,
1682                                          const CallEvent &Call,
1683                                          bool IsStrnlen) const {
1684   CurrentFunctionDescription = "string length function";
1685   ProgramStateRef state = C.getState();
1686   const LocationContext *LCtx = C.getLocationContext();
1687 
1688   if (IsStrnlen) {
1689     const Expr *maxlenExpr = Call.getArgExpr(1);
1690     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1691 
1692     ProgramStateRef stateZeroSize, stateNonZeroSize;
1693     std::tie(stateZeroSize, stateNonZeroSize) =
1694       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1695 
1696     // If the size can be zero, the result will be 0 in that case, and we don't
1697     // have to check the string itself.
1698     if (stateZeroSize) {
1699       SVal zero = C.getSValBuilder().makeZeroVal(Call.getResultType());
1700       stateZeroSize = stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, zero);
1701       C.addTransition(stateZeroSize);
1702     }
1703 
1704     // If the size is GUARANTEED to be zero, we're done!
1705     if (!stateNonZeroSize)
1706       return;
1707 
1708     // Otherwise, record the assumption that the size is nonzero.
1709     state = stateNonZeroSize;
1710   }
1711 
1712   // Check that the string argument is non-null.
1713   AnyArgExpr Arg = {Call.getArgExpr(0), 0};
1714   SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
1715   state = checkNonNull(C, state, Arg, ArgVal);
1716 
1717   if (!state)
1718     return;
1719 
1720   SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
1721 
1722   // If the argument isn't a valid C string, there's no valid state to
1723   // transition to.
1724   if (strLength.isUndef())
1725     return;
1726 
1727   DefinedOrUnknownSVal result = UnknownVal();
1728 
1729   // If the check is for strnlen() then bind the return value to no more than
1730   // the maxlen value.
1731   if (IsStrnlen) {
1732     QualType cmpTy = C.getSValBuilder().getConditionType();
1733 
1734     // It's a little unfortunate to be getting this again,
1735     // but it's not that expensive...
1736     const Expr *maxlenExpr = Call.getArgExpr(1);
1737     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1738 
1739     std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1740     std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1741 
1742     if (strLengthNL && maxlenValNL) {
1743       ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1744 
1745       // Check if the strLength is greater than the maxlen.
1746       std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1747           C.getSValBuilder()
1748               .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1749               .castAs<DefinedOrUnknownSVal>());
1750 
1751       if (stateStringTooLong && !stateStringNotTooLong) {
1752         // If the string is longer than maxlen, return maxlen.
1753         result = *maxlenValNL;
1754       } else if (stateStringNotTooLong && !stateStringTooLong) {
1755         // If the string is shorter than maxlen, return its length.
1756         result = *strLengthNL;
1757       }
1758     }
1759 
1760     if (result.isUnknown()) {
1761       // If we don't have enough information for a comparison, there's
1762       // no guarantee the full string length will actually be returned.
1763       // All we know is the return value is the min of the string length
1764       // and the limit. This is better than nothing.
1765       result = C.getSValBuilder().conjureSymbolVal(
1766           nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
1767       NonLoc resultNL = result.castAs<NonLoc>();
1768 
1769       if (strLengthNL) {
1770         state = state->assume(C.getSValBuilder().evalBinOpNN(
1771                                   state, BO_LE, resultNL, *strLengthNL, cmpTy)
1772                                   .castAs<DefinedOrUnknownSVal>(), true);
1773       }
1774 
1775       if (maxlenValNL) {
1776         state = state->assume(C.getSValBuilder().evalBinOpNN(
1777                                   state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1778                                   .castAs<DefinedOrUnknownSVal>(), true);
1779       }
1780     }
1781 
1782   } else {
1783     // This is a plain strlen(), not strnlen().
1784     result = strLength.castAs<DefinedOrUnknownSVal>();
1785 
1786     // If we don't know the length of the string, conjure a return
1787     // value, so it can be used in constraints, at least.
1788     if (result.isUnknown()) {
1789       result = C.getSValBuilder().conjureSymbolVal(
1790           nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
1791     }
1792   }
1793 
1794   // Bind the return value.
1795   assert(!result.isUnknown() && "Should have conjured a value by now");
1796   state = state->BindExpr(Call.getOriginExpr(), LCtx, result);
1797   C.addTransition(state);
1798 }
1799 
evalStrcpy(CheckerContext & C,const CallEvent & Call) const1800 void CStringChecker::evalStrcpy(CheckerContext &C,
1801                                 const CallEvent &Call) const {
1802   // char *strcpy(char *restrict dst, const char *restrict src);
1803   evalStrcpyCommon(C, Call,
1804                    /* ReturnEnd = */ false,
1805                    /* IsBounded = */ false,
1806                    /* appendK = */ ConcatFnKind::none);
1807 }
1808 
evalStrncpy(CheckerContext & C,const CallEvent & Call) const1809 void CStringChecker::evalStrncpy(CheckerContext &C,
1810                                  const CallEvent &Call) const {
1811   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1812   evalStrcpyCommon(C, Call,
1813                    /* ReturnEnd = */ false,
1814                    /* IsBounded = */ true,
1815                    /* appendK = */ ConcatFnKind::none);
1816 }
1817 
evalStpcpy(CheckerContext & C,const CallEvent & Call) const1818 void CStringChecker::evalStpcpy(CheckerContext &C,
1819                                 const CallEvent &Call) const {
1820   // char *stpcpy(char *restrict dst, const char *restrict src);
1821   evalStrcpyCommon(C, Call,
1822                    /* ReturnEnd = */ true,
1823                    /* IsBounded = */ false,
1824                    /* appendK = */ ConcatFnKind::none);
1825 }
1826 
evalStrlcpy(CheckerContext & C,const CallEvent & Call) const1827 void CStringChecker::evalStrlcpy(CheckerContext &C,
1828                                  const CallEvent &Call) const {
1829   // size_t strlcpy(char *dest, const char *src, size_t size);
1830   evalStrcpyCommon(C, Call,
1831                    /* ReturnEnd = */ true,
1832                    /* IsBounded = */ true,
1833                    /* appendK = */ ConcatFnKind::none,
1834                    /* returnPtr = */ false);
1835 }
1836 
evalStrcat(CheckerContext & C,const CallEvent & Call) const1837 void CStringChecker::evalStrcat(CheckerContext &C,
1838                                 const CallEvent &Call) const {
1839   // char *strcat(char *restrict s1, const char *restrict s2);
1840   evalStrcpyCommon(C, Call,
1841                    /* ReturnEnd = */ false,
1842                    /* IsBounded = */ false,
1843                    /* appendK = */ ConcatFnKind::strcat);
1844 }
1845 
evalStrncat(CheckerContext & C,const CallEvent & Call) const1846 void CStringChecker::evalStrncat(CheckerContext &C,
1847                                  const CallEvent &Call) const {
1848   // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1849   evalStrcpyCommon(C, Call,
1850                    /* ReturnEnd = */ false,
1851                    /* IsBounded = */ true,
1852                    /* appendK = */ ConcatFnKind::strcat);
1853 }
1854 
evalStrlcat(CheckerContext & C,const CallEvent & Call) const1855 void CStringChecker::evalStrlcat(CheckerContext &C,
1856                                  const CallEvent &Call) const {
1857   // size_t strlcat(char *dst, const char *src, size_t size);
1858   // It will append at most size - strlen(dst) - 1 bytes,
1859   // NULL-terminating the result.
1860   evalStrcpyCommon(C, Call,
1861                    /* ReturnEnd = */ false,
1862                    /* IsBounded = */ true,
1863                    /* appendK = */ ConcatFnKind::strlcat,
1864                    /* returnPtr = */ false);
1865 }
1866 
evalStrcpyCommon(CheckerContext & C,const CallEvent & Call,bool ReturnEnd,bool IsBounded,ConcatFnKind appendK,bool returnPtr) const1867 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
1868                                       bool ReturnEnd, bool IsBounded,
1869                                       ConcatFnKind appendK,
1870                                       bool returnPtr) const {
1871   if (appendK == ConcatFnKind::none)
1872     CurrentFunctionDescription = "string copy function";
1873   else
1874     CurrentFunctionDescription = "string concatenation function";
1875 
1876   ProgramStateRef state = C.getState();
1877   const LocationContext *LCtx = C.getLocationContext();
1878 
1879   // Check that the destination is non-null.
1880   DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};
1881   SVal DstVal = state->getSVal(Dst.Expression, LCtx);
1882   state = checkNonNull(C, state, Dst, DstVal);
1883   if (!state)
1884     return;
1885 
1886   // Check that the source is non-null.
1887   SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};
1888   SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
1889   state = checkNonNull(C, state, srcExpr, srcVal);
1890   if (!state)
1891     return;
1892 
1893   // Get the string length of the source.
1894   SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
1895   std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1896 
1897   // Get the string length of the destination buffer.
1898   SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
1899   std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1900 
1901   // If the source isn't a valid C string, give up.
1902   if (strLength.isUndef())
1903     return;
1904 
1905   SValBuilder &svalBuilder = C.getSValBuilder();
1906   QualType cmpTy = svalBuilder.getConditionType();
1907   QualType sizeTy = svalBuilder.getContext().getSizeType();
1908 
1909   // These two values allow checking two kinds of errors:
1910   // - actual overflows caused by a source that doesn't fit in the destination
1911   // - potential overflows caused by a bound that could exceed the destination
1912   SVal amountCopied = UnknownVal();
1913   SVal maxLastElementIndex = UnknownVal();
1914   const char *boundWarning = nullptr;
1915 
1916   // FIXME: Why do we choose the srcExpr if the access has no size?
1917   //  Note that the 3rd argument of the call would be the size parameter.
1918   SizeArgExpr SrcExprAsSizeDummy = {
1919       {srcExpr.Expression, srcExpr.ArgumentIndex}};
1920   state = CheckOverlap(
1921       C, state,
1922       (IsBounded ? SizeArgExpr{{Call.getArgExpr(2), 2}} : SrcExprAsSizeDummy),
1923       Dst, srcExpr);
1924 
1925   if (!state)
1926     return;
1927 
1928   // If the function is strncpy, strncat, etc... it is bounded.
1929   if (IsBounded) {
1930     // Get the max number of characters to copy.
1931     SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}};
1932     SVal lenVal = state->getSVal(lenExpr.Expression, LCtx);
1933 
1934     // Protect against misdeclared strncpy().
1935     lenVal =
1936         svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());
1937 
1938     std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1939 
1940     // If we know both values, we might be able to figure out how much
1941     // we're copying.
1942     if (strLengthNL && lenValNL) {
1943       switch (appendK) {
1944       case ConcatFnKind::none:
1945       case ConcatFnKind::strcat: {
1946         ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1947         // Check if the max number to copy is less than the length of the src.
1948         // If the bound is equal to the source length, strncpy won't null-
1949         // terminate the result!
1950         std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1951             svalBuilder
1952                 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1953                 .castAs<DefinedOrUnknownSVal>());
1954 
1955         if (stateSourceTooLong && !stateSourceNotTooLong) {
1956           // Max number to copy is less than the length of the src, so the
1957           // actual strLength copied is the max number arg.
1958           state = stateSourceTooLong;
1959           amountCopied = lenVal;
1960 
1961         } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1962           // The source buffer entirely fits in the bound.
1963           state = stateSourceNotTooLong;
1964           amountCopied = strLength;
1965         }
1966         break;
1967       }
1968       case ConcatFnKind::strlcat:
1969         if (!dstStrLengthNL)
1970           return;
1971 
1972         // amountCopied = min (size - dstLen - 1 , srcLen)
1973         SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1974                                                  *dstStrLengthNL, sizeTy);
1975         if (!isa<NonLoc>(freeSpace))
1976           return;
1977         freeSpace =
1978             svalBuilder.evalBinOp(state, BO_Sub, freeSpace,
1979                                   svalBuilder.makeIntVal(1, sizeTy), sizeTy);
1980         std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1981 
1982         // While unlikely, it is possible that the subtraction is
1983         // too complex to compute, let's check whether it succeeded.
1984         if (!freeSpaceNL)
1985           return;
1986         SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1987             state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);
1988 
1989         ProgramStateRef TrueState, FalseState;
1990         std::tie(TrueState, FalseState) =
1991             state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
1992 
1993         // srcStrLength <= size - dstStrLength -1
1994         if (TrueState && !FalseState) {
1995           amountCopied = strLength;
1996         }
1997 
1998         // srcStrLength > size - dstStrLength -1
1999         if (!TrueState && FalseState) {
2000           amountCopied = freeSpace;
2001         }
2002 
2003         if (TrueState && FalseState)
2004           amountCopied = UnknownVal();
2005         break;
2006       }
2007     }
2008     // We still want to know if the bound is known to be too large.
2009     if (lenValNL) {
2010       switch (appendK) {
2011       case ConcatFnKind::strcat:
2012         // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
2013 
2014         // Get the string length of the destination. If the destination is
2015         // memory that can't have a string length, we shouldn't be copying
2016         // into it anyway.
2017         if (dstStrLength.isUndef())
2018           return;
2019 
2020         if (dstStrLengthNL) {
2021           maxLastElementIndex = svalBuilder.evalBinOpNN(
2022               state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);
2023 
2024           boundWarning = "Size argument is greater than the free space in the "
2025                          "destination buffer";
2026         }
2027         break;
2028       case ConcatFnKind::none:
2029       case ConcatFnKind::strlcat:
2030         // For strncpy and strlcat, this is just checking
2031         //  that lenVal <= sizeof(dst).
2032         // (Yes, strncpy and strncat differ in how they treat termination.
2033         // strncat ALWAYS terminates, but strncpy doesn't.)
2034 
2035         // We need a special case for when the copy size is zero, in which
2036         // case strncpy will do no work at all. Our bounds check uses n-1
2037         // as the last element accessed, so n == 0 is problematic.
2038         ProgramStateRef StateZeroSize, StateNonZeroSize;
2039         std::tie(StateZeroSize, StateNonZeroSize) =
2040             assumeZero(C, state, *lenValNL, sizeTy);
2041 
2042         // If the size is known to be zero, we're done.
2043         if (StateZeroSize && !StateNonZeroSize) {
2044           if (returnPtr) {
2045             StateZeroSize =
2046                 StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, DstVal);
2047           } else {
2048             if (appendK == ConcatFnKind::none) {
2049               // strlcpy returns strlen(src)
2050               StateZeroSize = StateZeroSize->BindExpr(Call.getOriginExpr(),
2051                                                       LCtx, strLength);
2052             } else {
2053               // strlcat returns strlen(src) + strlen(dst)
2054               SVal retSize = svalBuilder.evalBinOp(
2055                   state, BO_Add, strLength, dstStrLength, sizeTy);
2056               StateZeroSize =
2057                   StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, retSize);
2058             }
2059           }
2060           C.addTransition(StateZeroSize);
2061           return;
2062         }
2063 
2064         // Otherwise, go ahead and figure out the last element we'll touch.
2065         // We don't record the non-zero assumption here because we can't
2066         // be sure. We won't warn on a possible zero.
2067         NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
2068         maxLastElementIndex =
2069             svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);
2070         boundWarning = "Size argument is greater than the length of the "
2071                        "destination buffer";
2072         break;
2073       }
2074     }
2075   } else {
2076     // The function isn't bounded. The amount copied should match the length
2077     // of the source buffer.
2078     amountCopied = strLength;
2079   }
2080 
2081   assert(state);
2082 
2083   // This represents the number of characters copied into the destination
2084   // buffer. (It may not actually be the strlen if the destination buffer
2085   // is not terminated.)
2086   SVal finalStrLength = UnknownVal();
2087   SVal strlRetVal = UnknownVal();
2088 
2089   if (appendK == ConcatFnKind::none && !returnPtr) {
2090     // strlcpy returns the sizeof(src)
2091     strlRetVal = strLength;
2092   }
2093 
2094   // If this is an appending function (strcat, strncat...) then set the
2095   // string length to strlen(src) + strlen(dst) since the buffer will
2096   // ultimately contain both.
2097   if (appendK != ConcatFnKind::none) {
2098     // Get the string length of the destination. If the destination is memory
2099     // that can't have a string length, we shouldn't be copying into it anyway.
2100     if (dstStrLength.isUndef())
2101       return;
2102 
2103     if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
2104       strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,
2105                                            *dstStrLengthNL, sizeTy);
2106     }
2107 
2108     std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
2109 
2110     // If we know both string lengths, we might know the final string length.
2111     if (amountCopiedNL && dstStrLengthNL) {
2112       // Make sure the two lengths together don't overflow a size_t.
2113       state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);
2114       if (!state)
2115         return;
2116 
2117       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,
2118                                                *dstStrLengthNL, sizeTy);
2119     }
2120 
2121     // If we couldn't get a single value for the final string length,
2122     // we can at least bound it by the individual lengths.
2123     if (finalStrLength.isUnknown()) {
2124       // Try to get a "hypothetical" string length symbol, which we can later
2125       // set as a real value if that turns out to be the case.
2126       finalStrLength =
2127           getCStringLength(C, state, Call.getOriginExpr(), DstVal, true);
2128       assert(!finalStrLength.isUndef());
2129 
2130       if (std::optional<NonLoc> finalStrLengthNL =
2131               finalStrLength.getAs<NonLoc>()) {
2132         if (amountCopiedNL && appendK == ConcatFnKind::none) {
2133           // we overwrite dst string with the src
2134           // finalStrLength >= srcStrLength
2135           SVal sourceInResult = svalBuilder.evalBinOpNN(
2136               state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);
2137           state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
2138                                 true);
2139           if (!state)
2140             return;
2141         }
2142 
2143         if (dstStrLengthNL && appendK != ConcatFnKind::none) {
2144           // we extend the dst string with the src
2145           // finalStrLength >= dstStrLength
2146           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
2147                                                       *finalStrLengthNL,
2148                                                       *dstStrLengthNL,
2149                                                       cmpTy);
2150           state =
2151               state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
2152           if (!state)
2153             return;
2154         }
2155       }
2156     }
2157 
2158   } else {
2159     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
2160     // the final string length will match the input string length.
2161     finalStrLength = amountCopied;
2162   }
2163 
2164   SVal Result;
2165 
2166   if (returnPtr) {
2167     // The final result of the function will either be a pointer past the last
2168     // copied element, or a pointer to the start of the destination buffer.
2169     Result = (ReturnEnd ? UnknownVal() : DstVal);
2170   } else {
2171     if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
2172       //strlcpy, strlcat
2173       Result = strlRetVal;
2174     else
2175       Result = finalStrLength;
2176   }
2177 
2178   assert(state);
2179 
2180   // If the destination is a MemRegion, try to check for a buffer overflow and
2181   // record the new string length.
2182   if (std::optional<loc::MemRegionVal> dstRegVal =
2183           DstVal.getAs<loc::MemRegionVal>()) {
2184     QualType ptrTy = Dst.Expression->getType();
2185 
2186     // If we have an exact value on a bounded copy, use that to check for
2187     // overflows, rather than our estimate about how much is actually copied.
2188     if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
2189       SVal maxLastElement =
2190           svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);
2191 
2192       // Check if the first byte of the destination is writable.
2193       state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2194       if (!state)
2195         return;
2196       // Check if the last byte of the destination is writable.
2197       state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);
2198       if (!state)
2199         return;
2200     }
2201 
2202     // Then, if the final length is known...
2203     if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
2204       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
2205           *knownStrLength, ptrTy);
2206 
2207       // ...and we haven't checked the bound, we'll check the actual copy.
2208       if (!boundWarning) {
2209         // Check if the first byte of the destination is writable.
2210         state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2211         if (!state)
2212           return;
2213         // Check if the last byte of the destination is writable.
2214         state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);
2215         if (!state)
2216           return;
2217       }
2218 
2219       // If this is a stpcpy-style copy, the last element is the return value.
2220       if (returnPtr && ReturnEnd)
2221         Result = lastElement;
2222     }
2223 
2224     // Invalidate the destination (regular invalidation without pointer-escaping
2225     // the address of the top-level region). This must happen before we set the
2226     // C string length because invalidation will clear the length.
2227     // FIXME: Even if we can't perfectly model the copy, we should see if we
2228     // can use LazyCompoundVals to copy the source values into the destination.
2229     // This would probably remove any existing bindings past the end of the
2230     // string, but that's still an improvement over blank invalidation.
2231     state = invalidateDestinationBufferBySize(C, state, Dst.Expression,
2232                                               *dstRegVal, amountCopied,
2233                                               C.getASTContext().getSizeType());
2234 
2235     // Invalidate the source (const-invalidation without const-pointer-escaping
2236     // the address of the top-level region).
2237     state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal);
2238 
2239     // Set the C string length of the destination, if we know it.
2240     if (IsBounded && (appendK == ConcatFnKind::none)) {
2241       // strncpy is annoying in that it doesn't guarantee to null-terminate
2242       // the result string. If the original string didn't fit entirely inside
2243       // the bound (including the null-terminator), we don't know how long the
2244       // result is.
2245       if (amountCopied != strLength)
2246         finalStrLength = UnknownVal();
2247     }
2248     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
2249   }
2250 
2251   assert(state);
2252 
2253   if (returnPtr) {
2254     // If this is a stpcpy-style copy, but we were unable to check for a buffer
2255     // overflow, we still need a result. Conjure a return value.
2256     if (ReturnEnd && Result.isUnknown()) {
2257       Result = svalBuilder.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,
2258                                             C.blockCount());
2259     }
2260   }
2261   // Set the return value.
2262   state = state->BindExpr(Call.getOriginExpr(), LCtx, Result);
2263   C.addTransition(state);
2264 }
2265 
evalStrcmp(CheckerContext & C,const CallEvent & Call) const2266 void CStringChecker::evalStrcmp(CheckerContext &C,
2267                                 const CallEvent &Call) const {
2268   //int strcmp(const char *s1, const char *s2);
2269   evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ false);
2270 }
2271 
evalStrncmp(CheckerContext & C,const CallEvent & Call) const2272 void CStringChecker::evalStrncmp(CheckerContext &C,
2273                                  const CallEvent &Call) const {
2274   //int strncmp(const char *s1, const char *s2, size_t n);
2275   evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ false);
2276 }
2277 
evalStrcasecmp(CheckerContext & C,const CallEvent & Call) const2278 void CStringChecker::evalStrcasecmp(CheckerContext &C,
2279                                     const CallEvent &Call) const {
2280   //int strcasecmp(const char *s1, const char *s2);
2281   evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ true);
2282 }
2283 
evalStrncasecmp(CheckerContext & C,const CallEvent & Call) const2284 void CStringChecker::evalStrncasecmp(CheckerContext &C,
2285                                      const CallEvent &Call) const {
2286   //int strncasecmp(const char *s1, const char *s2, size_t n);
2287   evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ true);
2288 }
2289 
evalStrcmpCommon(CheckerContext & C,const CallEvent & Call,bool IsBounded,bool IgnoreCase) const2290 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
2291                                       bool IsBounded, bool IgnoreCase) const {
2292   CurrentFunctionDescription = "string comparison function";
2293   ProgramStateRef state = C.getState();
2294   const LocationContext *LCtx = C.getLocationContext();
2295 
2296   // Check that the first string is non-null
2297   AnyArgExpr Left = {Call.getArgExpr(0), 0};
2298   SVal LeftVal = state->getSVal(Left.Expression, LCtx);
2299   state = checkNonNull(C, state, Left, LeftVal);
2300   if (!state)
2301     return;
2302 
2303   // Check that the second string is non-null.
2304   AnyArgExpr Right = {Call.getArgExpr(1), 1};
2305   SVal RightVal = state->getSVal(Right.Expression, LCtx);
2306   state = checkNonNull(C, state, Right, RightVal);
2307   if (!state)
2308     return;
2309 
2310   // Get the string length of the first string or give up.
2311   SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
2312   if (LeftLength.isUndef())
2313     return;
2314 
2315   // Get the string length of the second string or give up.
2316   SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
2317   if (RightLength.isUndef())
2318     return;
2319 
2320   // If we know the two buffers are the same, we know the result is 0.
2321   // First, get the two buffers' addresses. Another checker will have already
2322   // made sure they're not undefined.
2323   DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2324   DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2325 
2326   // See if they are the same.
2327   SValBuilder &svalBuilder = C.getSValBuilder();
2328   DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
2329   ProgramStateRef StSameBuf, StNotSameBuf;
2330   std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
2331 
2332   // If the two arguments might be the same buffer, we know the result is 0,
2333   // and we only need to check one size.
2334   if (StSameBuf) {
2335     StSameBuf =
2336         StSameBuf->BindExpr(Call.getOriginExpr(), LCtx,
2337                             svalBuilder.makeZeroVal(Call.getResultType()));
2338     C.addTransition(StSameBuf);
2339 
2340     // If the two arguments are GUARANTEED to be the same, we're done!
2341     if (!StNotSameBuf)
2342       return;
2343   }
2344 
2345   assert(StNotSameBuf);
2346   state = StNotSameBuf;
2347 
2348   // At this point we can go about comparing the two buffers.
2349   // For now, we only do this if they're both known string literals.
2350 
2351   // Attempt to extract string literals from both expressions.
2352   const StringLiteral *LeftStrLiteral =
2353       getCStringLiteral(C, state, Left.Expression, LeftVal);
2354   const StringLiteral *RightStrLiteral =
2355       getCStringLiteral(C, state, Right.Expression, RightVal);
2356   bool canComputeResult = false;
2357   SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, Call.getOriginExpr(),
2358                                                 LCtx, C.blockCount());
2359 
2360   if (LeftStrLiteral && RightStrLiteral) {
2361     StringRef LeftStrRef = LeftStrLiteral->getString();
2362     StringRef RightStrRef = RightStrLiteral->getString();
2363 
2364     if (IsBounded) {
2365       // Get the max number of characters to compare.
2366       const Expr *lenExpr = Call.getArgExpr(2);
2367       SVal lenVal = state->getSVal(lenExpr, LCtx);
2368 
2369       // If the length is known, we can get the right substrings.
2370       if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
2371         // Create substrings of each to compare the prefix.
2372         LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());
2373         RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());
2374         canComputeResult = true;
2375       }
2376     } else {
2377       // This is a normal, unbounded strcmp.
2378       canComputeResult = true;
2379     }
2380 
2381     if (canComputeResult) {
2382       // Real strcmp stops at null characters.
2383       size_t s1Term = LeftStrRef.find('\0');
2384       if (s1Term != StringRef::npos)
2385         LeftStrRef = LeftStrRef.substr(0, s1Term);
2386 
2387       size_t s2Term = RightStrRef.find('\0');
2388       if (s2Term != StringRef::npos)
2389         RightStrRef = RightStrRef.substr(0, s2Term);
2390 
2391       // Use StringRef's comparison methods to compute the actual result.
2392       int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)
2393                                   : LeftStrRef.compare(RightStrRef);
2394 
2395       // The strcmp function returns an integer greater than, equal to, or less
2396       // than zero, [c11, p7.24.4.2].
2397       if (compareRes == 0) {
2398         resultVal = svalBuilder.makeIntVal(compareRes, Call.getResultType());
2399       }
2400       else {
2401         DefinedSVal zeroVal = svalBuilder.makeIntVal(0, Call.getResultType());
2402         // Constrain strcmp's result range based on the result of StringRef's
2403         // comparison methods.
2404         BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2405         SVal compareWithZero =
2406           svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
2407               svalBuilder.getConditionType());
2408         DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2409         state = state->assume(compareWithZeroVal, true);
2410       }
2411     }
2412   }
2413 
2414   state = state->BindExpr(Call.getOriginExpr(), LCtx, resultVal);
2415 
2416   // Record this as a possible path.
2417   C.addTransition(state);
2418 }
2419 
evalStrsep(CheckerContext & C,const CallEvent & Call) const2420 void CStringChecker::evalStrsep(CheckerContext &C,
2421                                 const CallEvent &Call) const {
2422   // char *strsep(char **stringp, const char *delim);
2423   // Verify whether the search string parameter matches the return type.
2424   SourceArgExpr SearchStrPtr = {{Call.getArgExpr(0), 0}};
2425 
2426   QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2427   if (CharPtrTy.isNull() || Call.getResultType().getUnqualifiedType() !=
2428                                 CharPtrTy.getUnqualifiedType())
2429     return;
2430 
2431   CurrentFunctionDescription = "strsep()";
2432   ProgramStateRef State = C.getState();
2433   const LocationContext *LCtx = C.getLocationContext();
2434 
2435   // Check that the search string pointer is non-null (though it may point to
2436   // a null string).
2437   SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
2438   State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
2439   if (!State)
2440     return;
2441 
2442   // Check that the delimiter string is non-null.
2443   AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};
2444   SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
2445   State = checkNonNull(C, State, DelimStr, DelimStrVal);
2446   if (!State)
2447     return;
2448 
2449   SValBuilder &SVB = C.getSValBuilder();
2450   SVal Result;
2451   if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2452     // Get the current value of the search string pointer, as a char*.
2453     Result = State->getSVal(*SearchStrLoc, CharPtrTy);
2454 
2455     // Invalidate the search string, representing the change of one delimiter
2456     // character to NUL.
2457     // As the replacement never overflows, do not invalidate its super region.
2458     State = invalidateDestinationBufferNeverOverflows(
2459         C, State, SearchStrPtr.Expression, Result);
2460 
2461     // Overwrite the search string pointer. The new value is either an address
2462     // further along in the same string, or NULL if there are no more tokens.
2463     State =
2464         State->bindLoc(*SearchStrLoc,
2465                        SVB.conjureSymbolVal(getTag(), Call.getOriginExpr(),
2466                                             LCtx, CharPtrTy, C.blockCount()),
2467                        LCtx);
2468   } else {
2469     assert(SearchStrVal.isUnknown());
2470     // Conjure a symbolic value. It's the best we can do.
2471     Result = SVB.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,
2472                                   C.blockCount());
2473   }
2474 
2475   // Set the return value, and finish.
2476   State = State->BindExpr(Call.getOriginExpr(), LCtx, Result);
2477   C.addTransition(State);
2478 }
2479 
2480 // These should probably be moved into a C++ standard library checker.
evalStdCopy(CheckerContext & C,const CallEvent & Call) const2481 void CStringChecker::evalStdCopy(CheckerContext &C,
2482                                  const CallEvent &Call) const {
2483   evalStdCopyCommon(C, Call);
2484 }
2485 
evalStdCopyBackward(CheckerContext & C,const CallEvent & Call) const2486 void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2487                                          const CallEvent &Call) const {
2488   evalStdCopyCommon(C, Call);
2489 }
2490 
evalStdCopyCommon(CheckerContext & C,const CallEvent & Call) const2491 void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2492                                        const CallEvent &Call) const {
2493   if (!Call.getArgExpr(2)->getType()->isPointerType())
2494     return;
2495 
2496   ProgramStateRef State = C.getState();
2497 
2498   const LocationContext *LCtx = C.getLocationContext();
2499 
2500   // template <class _InputIterator, class _OutputIterator>
2501   // _OutputIterator
2502   // copy(_InputIterator __first, _InputIterator __last,
2503   //        _OutputIterator __result)
2504 
2505   // Invalidate the destination buffer
2506   const Expr *Dst = Call.getArgExpr(2);
2507   SVal DstVal = State->getSVal(Dst, LCtx);
2508   // FIXME: As we do not know how many items are copied, we also invalidate the
2509   // super region containing the target location.
2510   State =
2511       invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal);
2512 
2513   SValBuilder &SVB = C.getSValBuilder();
2514 
2515   SVal ResultVal =
2516       SVB.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
2517   State = State->BindExpr(Call.getOriginExpr(), LCtx, ResultVal);
2518 
2519   C.addTransition(State);
2520 }
2521 
evalMemset(CheckerContext & C,const CallEvent & Call) const2522 void CStringChecker::evalMemset(CheckerContext &C,
2523                                 const CallEvent &Call) const {
2524   // void *memset(void *s, int c, size_t n);
2525   CurrentFunctionDescription = "memory set function";
2526 
2527   DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2528   AnyArgExpr CharE = {Call.getArgExpr(1), 1};
2529   SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2530 
2531   ProgramStateRef State = C.getState();
2532 
2533   // See if the size argument is zero.
2534   const LocationContext *LCtx = C.getLocationContext();
2535   SVal SizeVal = C.getSVal(Size.Expression);
2536   QualType SizeTy = Size.Expression->getType();
2537 
2538   ProgramStateRef ZeroSize, NonZeroSize;
2539   std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);
2540 
2541   // Get the value of the memory area.
2542   SVal BufferPtrVal = C.getSVal(Buffer.Expression);
2543 
2544   // If the size is zero, there won't be any actual memory access, so
2545   // just bind the return value to the buffer and return.
2546   if (ZeroSize && !NonZeroSize) {
2547     ZeroSize = ZeroSize->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2548     C.addTransition(ZeroSize);
2549     return;
2550   }
2551 
2552   // Ensure the memory area is not null.
2553   // If it is NULL there will be a NULL pointer dereference.
2554   State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);
2555   if (!State)
2556     return;
2557 
2558   State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2559   if (!State)
2560     return;
2561 
2562   // According to the values of the arguments, bind the value of the second
2563   // argument to the destination buffer and set string length, or just
2564   // invalidate the destination buffer.
2565   if (!memsetAux(Buffer.Expression, C.getSVal(CharE.Expression),
2566                  Size.Expression, C, State))
2567     return;
2568 
2569   State = State->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2570   C.addTransition(State);
2571 }
2572 
evalBzero(CheckerContext & C,const CallEvent & Call) const2573 void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const {
2574   CurrentFunctionDescription = "memory clearance function";
2575 
2576   DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2577   SizeArgExpr Size = {{Call.getArgExpr(1), 1}};
2578   SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);
2579 
2580   ProgramStateRef State = C.getState();
2581 
2582   // See if the size argument is zero.
2583   SVal SizeVal = C.getSVal(Size.Expression);
2584   QualType SizeTy = Size.Expression->getType();
2585 
2586   ProgramStateRef StateZeroSize, StateNonZeroSize;
2587   std::tie(StateZeroSize, StateNonZeroSize) =
2588     assumeZero(C, State, SizeVal, SizeTy);
2589 
2590   // If the size is zero, there won't be any actual memory access,
2591   // In this case we just return.
2592   if (StateZeroSize && !StateNonZeroSize) {
2593     C.addTransition(StateZeroSize);
2594     return;
2595   }
2596 
2597   // Get the value of the memory area.
2598   SVal MemVal = C.getSVal(Buffer.Expression);
2599 
2600   // Ensure the memory area is not null.
2601   // If it is NULL there will be a NULL pointer dereference.
2602   State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);
2603   if (!State)
2604     return;
2605 
2606   State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2607   if (!State)
2608     return;
2609 
2610   if (!memsetAux(Buffer.Expression, Zero, Size.Expression, C, State))
2611     return;
2612 
2613   C.addTransition(State);
2614 }
2615 
evalSprintf(CheckerContext & C,const CallEvent & Call) const2616 void CStringChecker::evalSprintf(CheckerContext &C,
2617                                  const CallEvent &Call) const {
2618   CurrentFunctionDescription = "'sprintf'";
2619   evalSprintfCommon(C, Call, /* IsBounded = */ false);
2620 }
2621 
evalSnprintf(CheckerContext & C,const CallEvent & Call) const2622 void CStringChecker::evalSnprintf(CheckerContext &C,
2623                                   const CallEvent &Call) const {
2624   CurrentFunctionDescription = "'snprintf'";
2625   evalSprintfCommon(C, Call, /* IsBounded = */ true);
2626 }
2627 
evalSprintfCommon(CheckerContext & C,const CallEvent & Call,bool IsBounded) const2628 void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
2629                                        bool IsBounded) const {
2630   ProgramStateRef State = C.getState();
2631   const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2632   DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2633 
2634   const auto NumParams = Call.parameters().size();
2635   if (CE->getNumArgs() < NumParams) {
2636     // This is an invalid call, let's just ignore it.
2637     return;
2638   }
2639 
2640   const auto AllArguments =
2641       llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
2642   const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams);
2643 
2644   for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {
2645     // We consider only string buffers
2646     if (const QualType type = ArgExpr->getType();
2647         !type->isAnyPointerType() ||
2648         !type->getPointeeType()->isAnyCharacterType())
2649       continue;
2650     SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}};
2651 
2652     // Ensure the buffers do not overlap.
2653     SizeArgExpr SrcExprAsSizeDummy = {
2654         {Source.Expression, Source.ArgumentIndex}};
2655     State = CheckOverlap(
2656         C, State,
2657         (IsBounded ? SizeArgExpr{{Call.getArgExpr(1), 1}} : SrcExprAsSizeDummy),
2658         Dest, Source);
2659     if (!State)
2660       return;
2661   }
2662 
2663   C.addTransition(State);
2664 }
2665 
2666 //===----------------------------------------------------------------------===//
2667 // The driver method, and other Checker callbacks.
2668 //===----------------------------------------------------------------------===//
2669 
identifyCall(const CallEvent & Call,CheckerContext & C) const2670 CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2671                                                      CheckerContext &C) const {
2672   const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
2673   if (!CE)
2674     return nullptr;
2675 
2676   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
2677   if (!FD)
2678     return nullptr;
2679 
2680   if (StdCopy.matches(Call))
2681     return &CStringChecker::evalStdCopy;
2682   if (StdCopyBackward.matches(Call))
2683     return &CStringChecker::evalStdCopyBackward;
2684 
2685   // Pro-actively check that argument types are safe to do arithmetic upon.
2686   // We do not want to crash if someone accidentally passes a structure
2687   // into, say, a C++ overload of any of these functions. We could not check
2688   // that for std::copy because they may have arguments of other types.
2689   for (auto I : CE->arguments()) {
2690     QualType T = I->getType();
2691     if (!T->isIntegralOrEnumerationType() && !T->isPointerType())
2692       return nullptr;
2693   }
2694 
2695   const FnCheck *Callback = Callbacks.lookup(Call);
2696   if (Callback)
2697     return *Callback;
2698 
2699   return nullptr;
2700 }
2701 
evalCall(const CallEvent & Call,CheckerContext & C) const2702 bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2703   FnCheck Callback = identifyCall(Call, C);
2704 
2705   // If the callee isn't a string function, let another checker handle it.
2706   if (!Callback)
2707     return false;
2708 
2709   // Check and evaluate the call.
2710   assert(isa<CallExpr>(Call.getOriginExpr()));
2711   Callback(this, C, Call);
2712 
2713   // If the evaluate call resulted in no change, chain to the next eval call
2714   // handler.
2715   // Note, the custom CString evaluation calls assume that basic safety
2716   // properties are held. However, if the user chooses to turn off some of these
2717   // checks, we ignore the issues and leave the call evaluation to a generic
2718   // handler.
2719   return C.isDifferent();
2720 }
2721 
checkPreStmt(const DeclStmt * DS,CheckerContext & C) const2722 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2723   // Record string length for char a[] = "abc";
2724   ProgramStateRef state = C.getState();
2725 
2726   for (const auto *I : DS->decls()) {
2727     const VarDecl *D = dyn_cast<VarDecl>(I);
2728     if (!D)
2729       continue;
2730 
2731     // FIXME: Handle array fields of structs.
2732     if (!D->getType()->isArrayType())
2733       continue;
2734 
2735     const Expr *Init = D->getInit();
2736     if (!Init)
2737       continue;
2738     if (!isa<StringLiteral>(Init))
2739       continue;
2740 
2741     Loc VarLoc = state->getLValue(D, C.getLocationContext());
2742     const MemRegion *MR = VarLoc.getAsRegion();
2743     if (!MR)
2744       continue;
2745 
2746     SVal StrVal = C.getSVal(Init);
2747     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2748     DefinedOrUnknownSVal strLength =
2749       getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2750 
2751     state = state->set<CStringLength>(MR, strLength);
2752   }
2753 
2754   C.addTransition(state);
2755 }
2756 
2757 ProgramStateRef
checkRegionChanges(ProgramStateRef state,const InvalidatedSymbols *,ArrayRef<const MemRegion * > ExplicitRegions,ArrayRef<const MemRegion * > Regions,const LocationContext * LCtx,const CallEvent * Call) const2758 CStringChecker::checkRegionChanges(ProgramStateRef state,
2759     const InvalidatedSymbols *,
2760     ArrayRef<const MemRegion *> ExplicitRegions,
2761     ArrayRef<const MemRegion *> Regions,
2762     const LocationContext *LCtx,
2763     const CallEvent *Call) const {
2764   CStringLengthTy Entries = state->get<CStringLength>();
2765   if (Entries.isEmpty())
2766     return state;
2767 
2768   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2769   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2770 
2771   // First build sets for the changed regions and their super-regions.
2772   for (const MemRegion *MR : Regions) {
2773     Invalidated.insert(MR);
2774 
2775     SuperRegions.insert(MR);
2776     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2777       MR = SR->getSuperRegion();
2778       SuperRegions.insert(MR);
2779     }
2780   }
2781 
2782   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2783 
2784   // Then loop over the entries in the current state.
2785   for (const MemRegion *MR : llvm::make_first_range(Entries)) {
2786     // Is this entry for a super-region of a changed region?
2787     if (SuperRegions.count(MR)) {
2788       Entries = F.remove(Entries, MR);
2789       continue;
2790     }
2791 
2792     // Is this entry for a sub-region of a changed region?
2793     const MemRegion *Super = MR;
2794     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2795       Super = SR->getSuperRegion();
2796       if (Invalidated.count(Super)) {
2797         Entries = F.remove(Entries, MR);
2798         break;
2799       }
2800     }
2801   }
2802 
2803   return state->set<CStringLength>(Entries);
2804 }
2805 
checkLiveSymbols(ProgramStateRef state,SymbolReaper & SR) const2806 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2807     SymbolReaper &SR) const {
2808   // Mark all symbols in our string length map as valid.
2809   CStringLengthTy Entries = state->get<CStringLength>();
2810 
2811   for (SVal Len : llvm::make_second_range(Entries)) {
2812     for (SymbolRef Sym : Len.symbols())
2813       SR.markInUse(Sym);
2814   }
2815 }
2816 
checkDeadSymbols(SymbolReaper & SR,CheckerContext & C) const2817 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2818     CheckerContext &C) const {
2819   ProgramStateRef state = C.getState();
2820   CStringLengthTy Entries = state->get<CStringLength>();
2821   if (Entries.isEmpty())
2822     return;
2823 
2824   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2825   for (auto [Reg, Len] : Entries) {
2826     if (SymbolRef Sym = Len.getAsSymbol()) {
2827       if (SR.isDead(Sym))
2828         Entries = F.remove(Entries, Reg);
2829     }
2830   }
2831 
2832   state = state->set<CStringLength>(Entries);
2833   C.addTransition(state);
2834 }
2835 
registerCStringModeling(CheckerManager & Mgr)2836 void ento::registerCStringModeling(CheckerManager &Mgr) {
2837   Mgr.registerChecker<CStringChecker>();
2838 }
2839 
shouldRegisterCStringModeling(const CheckerManager & mgr)2840 bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) {
2841   return true;
2842 }
2843 
2844 #define REGISTER_CHECKER(name)                                                 \
2845   void ento::register##name(CheckerManager &mgr) {                             \
2846     CStringChecker *checker = mgr.getChecker<CStringChecker>();                \
2847     checker->Filter.Check##name = true;                                        \
2848     checker->Filter.CheckName##name = mgr.getCurrentCheckerName();             \
2849   }                                                                            \
2850                                                                                \
2851   bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
2852 
2853 REGISTER_CHECKER(CStringNullArg)
2854 REGISTER_CHECKER(CStringOutOfBounds)
2855 REGISTER_CHECKER(CStringBufferOverlap)
2856 REGISTER_CHECKER(CStringNotNullTerm)
2857 REGISTER_CHECKER(CStringUninitializedRead)
2858