xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp (revision 7ef62cebc2f965b0f640263e179276928885e33d)
1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This defines CStringChecker, which is an assortment of checks on calls
10 // to functions in <string.h>.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InterCheckerAPI.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
17 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18 #include "clang/StaticAnalyzer/Core/Checker.h"
19 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SmallString.h"
27 #include "llvm/ADT/StringExtras.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <functional>
30 #include <optional>
31 
32 using namespace clang;
33 using namespace ento;
34 using namespace std::placeholders;
35 
36 namespace {
37 struct AnyArgExpr {
38   // FIXME: Remove constructor in C++17 to turn it into an aggregate.
39   AnyArgExpr(const Expr *Expression, unsigned ArgumentIndex)
40       : Expression{Expression}, ArgumentIndex{ArgumentIndex} {}
41   const Expr *Expression;
42   unsigned ArgumentIndex;
43 };
44 
45 struct SourceArgExpr : AnyArgExpr {
46   using AnyArgExpr::AnyArgExpr; // FIXME: Remove using in C++17.
47 };
48 
49 struct DestinationArgExpr : AnyArgExpr {
50   using AnyArgExpr::AnyArgExpr; // FIXME: Same.
51 };
52 
53 struct SizeArgExpr : AnyArgExpr {
54   using AnyArgExpr::AnyArgExpr; // FIXME: Same.
55 };
56 
57 using ErrorMessage = SmallString<128>;
58 enum class AccessKind { write, read };
59 
60 static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
61                                              AccessKind Access) {
62   ErrorMessage Message;
63   llvm::raw_svector_ostream Os(Message);
64 
65   // Function classification like: Memory copy function
66   Os << toUppercase(FunctionDescription.front())
67      << &FunctionDescription.data()[1];
68 
69   if (Access == AccessKind::write) {
70     Os << " overflows the destination buffer";
71   } else { // read access
72     Os << " accesses out-of-bound array element";
73   }
74 
75   return Message;
76 }
77 
78 enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
79 
80 enum class CharKind { Regular = 0, Wide };
81 constexpr CharKind CK_Regular = CharKind::Regular;
82 constexpr CharKind CK_Wide = CharKind::Wide;
83 
84 static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
85   return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy
86                                                     : Ctx.WideCharTy);
87 }
88 
89 class CStringChecker : public Checker< eval::Call,
90                                          check::PreStmt<DeclStmt>,
91                                          check::LiveSymbols,
92                                          check::DeadSymbols,
93                                          check::RegionChanges
94                                          > {
95   mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
96       BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
97 
98   mutable const char *CurrentFunctionDescription;
99 
100 public:
101   /// The filter is used to filter out the diagnostics which are not enabled by
102   /// the user.
103   struct CStringChecksFilter {
104     bool CheckCStringNullArg = false;
105     bool CheckCStringOutOfBounds = false;
106     bool CheckCStringBufferOverlap = false;
107     bool CheckCStringNotNullTerm = false;
108     bool CheckCStringUninitializedRead = false;
109 
110     CheckerNameRef CheckNameCStringNullArg;
111     CheckerNameRef CheckNameCStringOutOfBounds;
112     CheckerNameRef CheckNameCStringBufferOverlap;
113     CheckerNameRef CheckNameCStringNotNullTerm;
114     CheckerNameRef CheckNameCStringUninitializedRead;
115   };
116 
117   CStringChecksFilter Filter;
118 
119   static void *getTag() { static int tag; return &tag; }
120 
121   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
122   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
123   void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
124   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
125 
126   ProgramStateRef
127     checkRegionChanges(ProgramStateRef state,
128                        const InvalidatedSymbols *,
129                        ArrayRef<const MemRegion *> ExplicitRegions,
130                        ArrayRef<const MemRegion *> Regions,
131                        const LocationContext *LCtx,
132                        const CallEvent *Call) const;
133 
134   using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
135                                      const CallExpr *)>;
136 
137   CallDescriptionMap<FnCheck> Callbacks = {
138       {{CDF_MaybeBuiltin, {"memcpy"}, 3},
139        std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
140       {{CDF_MaybeBuiltin, {"wmemcpy"}, 3},
141        std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
142       {{CDF_MaybeBuiltin, {"mempcpy"}, 3},
143        std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
144       {{CDF_None, {"wmempcpy"}, 3},
145        std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
146       {{CDF_MaybeBuiltin, {"memcmp"}, 3},
147        std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
148       {{CDF_MaybeBuiltin, {"wmemcmp"}, 3},
149        std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
150       {{CDF_MaybeBuiltin, {"memmove"}, 3},
151        std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
152       {{CDF_MaybeBuiltin, {"wmemmove"}, 3},
153        std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
154       {{CDF_MaybeBuiltin, {"memset"}, 3}, &CStringChecker::evalMemset},
155       {{CDF_MaybeBuiltin, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
156       {{CDF_MaybeBuiltin, {"strcpy"}, 2}, &CStringChecker::evalStrcpy},
157       {{CDF_MaybeBuiltin, {"strncpy"}, 3}, &CStringChecker::evalStrncpy},
158       {{CDF_MaybeBuiltin, {"stpcpy"}, 2}, &CStringChecker::evalStpcpy},
159       {{CDF_MaybeBuiltin, {"strlcpy"}, 3}, &CStringChecker::evalStrlcpy},
160       {{CDF_MaybeBuiltin, {"strcat"}, 2}, &CStringChecker::evalStrcat},
161       {{CDF_MaybeBuiltin, {"strncat"}, 3}, &CStringChecker::evalStrncat},
162       {{CDF_MaybeBuiltin, {"strlcat"}, 3}, &CStringChecker::evalStrlcat},
163       {{CDF_MaybeBuiltin, {"strlen"}, 1}, &CStringChecker::evalstrLength},
164       {{CDF_MaybeBuiltin, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
165       {{CDF_MaybeBuiltin, {"strnlen"}, 2}, &CStringChecker::evalstrnLength},
166       {{CDF_MaybeBuiltin, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
167       {{CDF_MaybeBuiltin, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
168       {{CDF_MaybeBuiltin, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
169       {{CDF_MaybeBuiltin, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
170       {{CDF_MaybeBuiltin, {"strncasecmp"}, 3},
171        &CStringChecker::evalStrncasecmp},
172       {{CDF_MaybeBuiltin, {"strsep"}, 2}, &CStringChecker::evalStrsep},
173       {{CDF_MaybeBuiltin, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
174       {{CDF_MaybeBuiltin, {"bcmp"}, 3},
175        std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
176       {{CDF_MaybeBuiltin, {"bzero"}, 2}, &CStringChecker::evalBzero},
177       {{CDF_MaybeBuiltin, {"explicit_bzero"}, 2}, &CStringChecker::evalBzero},
178   };
179 
180   // These require a bit of special handling.
181   CallDescription StdCopy{{"std", "copy"}, 3},
182       StdCopyBackward{{"std", "copy_backward"}, 3};
183 
184   FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
185   void evalMemcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const;
186   void evalMempcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const;
187   void evalMemmove(CheckerContext &C, const CallExpr *CE, CharKind CK) const;
188   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
189   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
190                       ProgramStateRef state, SizeArgExpr Size,
191                       DestinationArgExpr Dest, SourceArgExpr Source,
192                       bool Restricted, bool IsMempcpy, CharKind CK) const;
193 
194   void evalMemcmp(CheckerContext &C, const CallExpr *CE, CharKind CK) const;
195 
196   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
197   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
198   void evalstrLengthCommon(CheckerContext &C,
199                            const CallExpr *CE,
200                            bool IsStrnlen = false) const;
201 
202   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
203   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
204   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
205   void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const;
206   void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd,
207                         bool IsBounded, ConcatFnKind appendK,
208                         bool returnPtr = true) const;
209 
210   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
211   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
212   void evalStrlcat(CheckerContext &C, const CallExpr *CE) const;
213 
214   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
215   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
216   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
217   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
218   void evalStrcmpCommon(CheckerContext &C,
219                         const CallExpr *CE,
220                         bool IsBounded = false,
221                         bool IgnoreCase = false) const;
222 
223   void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
224 
225   void evalStdCopy(CheckerContext &C, const CallExpr *CE) const;
226   void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const;
227   void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const;
228   void evalMemset(CheckerContext &C, const CallExpr *CE) const;
229   void evalBzero(CheckerContext &C, const CallExpr *CE) const;
230 
231   // Utility methods
232   std::pair<ProgramStateRef , ProgramStateRef >
233   static assumeZero(CheckerContext &C,
234                     ProgramStateRef state, SVal V, QualType Ty);
235 
236   static ProgramStateRef setCStringLength(ProgramStateRef state,
237                                               const MemRegion *MR,
238                                               SVal strLength);
239   static SVal getCStringLengthForRegion(CheckerContext &C,
240                                         ProgramStateRef &state,
241                                         const Expr *Ex,
242                                         const MemRegion *MR,
243                                         bool hypothetical);
244   SVal getCStringLength(CheckerContext &C,
245                         ProgramStateRef &state,
246                         const Expr *Ex,
247                         SVal Buf,
248                         bool hypothetical = false) const;
249 
250   const StringLiteral *getCStringLiteral(CheckerContext &C,
251                                          ProgramStateRef &state,
252                                          const Expr *expr,
253                                          SVal val) const;
254 
255   static ProgramStateRef InvalidateBuffer(CheckerContext &C,
256                                           ProgramStateRef state,
257                                           const Expr *Ex, SVal V,
258                                           bool IsSourceBuffer,
259                                           const Expr *Size);
260 
261   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
262                               const MemRegion *MR);
263 
264   static bool memsetAux(const Expr *DstBuffer, SVal CharE,
265                         const Expr *Size, CheckerContext &C,
266                         ProgramStateRef &State);
267 
268   // Re-usable checks
269   ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
270                                AnyArgExpr Arg, SVal l) const;
271   ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
272                                 AnyArgExpr Buffer, SVal Element,
273                                 AccessKind Access,
274                                 CharKind CK = CharKind::Regular) const;
275   ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
276                                     AnyArgExpr Buffer, SizeArgExpr Size,
277                                     AccessKind Access,
278                                     CharKind CK = CharKind::Regular) const;
279   ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
280                                SizeArgExpr Size, AnyArgExpr First,
281                                AnyArgExpr Second,
282                                CharKind CK = CharKind::Regular) const;
283   void emitOverlapBug(CheckerContext &C,
284                       ProgramStateRef state,
285                       const Stmt *First,
286                       const Stmt *Second) const;
287 
288   void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
289                       StringRef WarningMsg) const;
290   void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
291                           const Stmt *S, StringRef WarningMsg) const;
292   void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
293                          const Stmt *S, StringRef WarningMsg) const;
294   void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
295   void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
296                              const Expr *E) const;
297   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
298                                             ProgramStateRef state,
299                                             NonLoc left,
300                                             NonLoc right) const;
301 
302   // Return true if the destination buffer of the copy function may be in bound.
303   // Expects SVal of Size to be positive and unsigned.
304   // Expects SVal of FirstBuf to be a FieldRegion.
305   static bool IsFirstBufInBound(CheckerContext &C,
306                                 ProgramStateRef state,
307                                 const Expr *FirstBuf,
308                                 const Expr *Size);
309 };
310 
311 } //end anonymous namespace
312 
313 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
314 
315 //===----------------------------------------------------------------------===//
316 // Individual checks and utility methods.
317 //===----------------------------------------------------------------------===//
318 
319 std::pair<ProgramStateRef , ProgramStateRef >
320 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
321                            QualType Ty) {
322   std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
323   if (!val)
324     return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
325 
326   SValBuilder &svalBuilder = C.getSValBuilder();
327   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
328   return state->assume(svalBuilder.evalEQ(state, *val, zero));
329 }
330 
331 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
332                                              ProgramStateRef State,
333                                              AnyArgExpr Arg, SVal l) const {
334   // If a previous check has failed, propagate the failure.
335   if (!State)
336     return nullptr;
337 
338   ProgramStateRef stateNull, stateNonNull;
339   std::tie(stateNull, stateNonNull) =
340       assumeZero(C, State, l, Arg.Expression->getType());
341 
342   if (stateNull && !stateNonNull) {
343     if (Filter.CheckCStringNullArg) {
344       SmallString<80> buf;
345       llvm::raw_svector_ostream OS(buf);
346       assert(CurrentFunctionDescription);
347       OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
348          << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "
349          << CurrentFunctionDescription;
350 
351       emitNullArgBug(C, stateNull, Arg.Expression, OS.str());
352     }
353     return nullptr;
354   }
355 
356   // From here on, assume that the value is non-null.
357   assert(stateNonNull);
358   return stateNonNull;
359 }
360 
361 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
362 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
363                                               ProgramStateRef state,
364                                               AnyArgExpr Buffer, SVal Element,
365                                               AccessKind Access,
366                                               CharKind CK) const {
367 
368   // If a previous check has failed, propagate the failure.
369   if (!state)
370     return nullptr;
371 
372   // Check for out of bound array element access.
373   const MemRegion *R = Element.getAsRegion();
374   if (!R)
375     return state;
376 
377   const auto *ER = dyn_cast<ElementRegion>(R);
378   if (!ER)
379     return state;
380 
381   SValBuilder &svalBuilder = C.getSValBuilder();
382   ASTContext &Ctx = svalBuilder.getContext();
383 
384   // Get the index of the accessed element.
385   NonLoc Idx = ER->getIndex();
386 
387   if (CK == CharKind::Regular) {
388     if (ER->getValueType() != Ctx.CharTy)
389       return state;
390   } else {
391     if (ER->getValueType() != Ctx.WideCharTy)
392       return state;
393 
394     QualType SizeTy = Ctx.getSizeType();
395     NonLoc WideSize =
396         svalBuilder
397             .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
398                         SizeTy)
399             .castAs<NonLoc>();
400     SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy);
401     if (Offset.isUnknown())
402       return state;
403     Idx = Offset.castAs<NonLoc>();
404   }
405 
406   // Get the size of the array.
407   const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
408   DefinedOrUnknownSVal Size =
409       getDynamicExtent(state, superReg, C.getSValBuilder());
410 
411   ProgramStateRef StInBound, StOutBound;
412   std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size);
413   if (StOutBound && !StInBound) {
414     // These checks are either enabled by the CString out-of-bounds checker
415     // explicitly or implicitly by the Malloc checker.
416     // In the latter case we only do modeling but do not emit warning.
417     if (!Filter.CheckCStringOutOfBounds)
418       return nullptr;
419 
420     // Emit a bug report.
421     ErrorMessage Message =
422         createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
423     emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
424     return nullptr;
425   }
426 
427   // Ensure that we wouldn't read uninitialized value.
428   if (Access == AccessKind::read) {
429     if (Filter.CheckCStringUninitializedRead &&
430         StInBound->getSVal(ER).isUndef()) {
431       emitUninitializedReadBug(C, StInBound, Buffer.Expression);
432       return nullptr;
433     }
434   }
435 
436   // Array bound check succeeded.  From this point forward the array bound
437   // should always succeed.
438   return StInBound;
439 }
440 
441 ProgramStateRef
442 CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
443                                   AnyArgExpr Buffer, SizeArgExpr Size,
444                                   AccessKind Access, CharKind CK) const {
445   // If a previous check has failed, propagate the failure.
446   if (!State)
447     return nullptr;
448 
449   SValBuilder &svalBuilder = C.getSValBuilder();
450   ASTContext &Ctx = svalBuilder.getContext();
451 
452   QualType SizeTy = Size.Expression->getType();
453   QualType PtrTy = getCharPtrType(Ctx, CK);
454 
455   // Check that the first buffer is non-null.
456   SVal BufVal = C.getSVal(Buffer.Expression);
457   State = checkNonNull(C, State, Buffer, BufVal);
458   if (!State)
459     return nullptr;
460 
461   // If out-of-bounds checking is turned off, skip the rest.
462   if (!Filter.CheckCStringOutOfBounds)
463     return State;
464 
465   // Get the access length and make sure it is known.
466   // FIXME: This assumes the caller has already checked that the access length
467   // is positive. And that it's unsigned.
468   SVal LengthVal = C.getSVal(Size.Expression);
469   std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
470   if (!Length)
471     return State;
472 
473   // Compute the offset of the last element to be accessed: size-1.
474   NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();
475   SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);
476   if (Offset.isUnknown())
477     return nullptr;
478   NonLoc LastOffset = Offset.castAs<NonLoc>();
479 
480   // Check that the first buffer is sufficiently long.
481   SVal BufStart =
482       svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());
483   if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
484 
485     SVal BufEnd =
486         svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
487     State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
488 
489     // If the buffer isn't large enough, abort.
490     if (!State)
491       return nullptr;
492   }
493 
494   // Large enough or not, return this state!
495   return State;
496 }
497 
498 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
499                                              ProgramStateRef state,
500                                              SizeArgExpr Size, AnyArgExpr First,
501                                              AnyArgExpr Second,
502                                              CharKind CK) const {
503   if (!Filter.CheckCStringBufferOverlap)
504     return state;
505 
506   // Do a simple check for overlap: if the two arguments are from the same
507   // buffer, see if the end of the first is greater than the start of the second
508   // or vice versa.
509 
510   // If a previous check has failed, propagate the failure.
511   if (!state)
512     return nullptr;
513 
514   ProgramStateRef stateTrue, stateFalse;
515 
516   // Assume different address spaces cannot overlap.
517   if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
518       Second.Expression->getType()->getPointeeType().getAddressSpace())
519     return state;
520 
521   // Get the buffer values and make sure they're known locations.
522   const LocationContext *LCtx = C.getLocationContext();
523   SVal firstVal = state->getSVal(First.Expression, LCtx);
524   SVal secondVal = state->getSVal(Second.Expression, LCtx);
525 
526   std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
527   if (!firstLoc)
528     return state;
529 
530   std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
531   if (!secondLoc)
532     return state;
533 
534   // Are the two values the same?
535   SValBuilder &svalBuilder = C.getSValBuilder();
536   std::tie(stateTrue, stateFalse) =
537       state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
538 
539   if (stateTrue && !stateFalse) {
540     // If the values are known to be equal, that's automatically an overlap.
541     emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
542     return nullptr;
543   }
544 
545   // assume the two expressions are not equal.
546   assert(stateFalse);
547   state = stateFalse;
548 
549   // Which value comes first?
550   QualType cmpTy = svalBuilder.getConditionType();
551   SVal reverse =
552       svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);
553   std::optional<DefinedOrUnknownSVal> reverseTest =
554       reverse.getAs<DefinedOrUnknownSVal>();
555   if (!reverseTest)
556     return state;
557 
558   std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
559   if (stateTrue) {
560     if (stateFalse) {
561       // If we don't know which one comes first, we can't perform this test.
562       return state;
563     } else {
564       // Switch the values so that firstVal is before secondVal.
565       std::swap(firstLoc, secondLoc);
566 
567       // Switch the Exprs as well, so that they still correspond.
568       std::swap(First, Second);
569     }
570   }
571 
572   // Get the length, and make sure it too is known.
573   SVal LengthVal = state->getSVal(Size.Expression, LCtx);
574   std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
575   if (!Length)
576     return state;
577 
578   // Convert the first buffer's start address to char*.
579   // Bail out if the cast fails.
580   ASTContext &Ctx = svalBuilder.getContext();
581   QualType CharPtrTy = getCharPtrType(Ctx, CK);
582   SVal FirstStart =
583       svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());
584   std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
585   if (!FirstStartLoc)
586     return state;
587 
588   // Compute the end of the first buffer. Bail out if THAT fails.
589   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,
590                                           *Length, CharPtrTy);
591   std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
592   if (!FirstEndLoc)
593     return state;
594 
595   // Is the end of the first buffer past the start of the second buffer?
596   SVal Overlap =
597       svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);
598   std::optional<DefinedOrUnknownSVal> OverlapTest =
599       Overlap.getAs<DefinedOrUnknownSVal>();
600   if (!OverlapTest)
601     return state;
602 
603   std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
604 
605   if (stateTrue && !stateFalse) {
606     // Overlap!
607     emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
608     return nullptr;
609   }
610 
611   // assume the two expressions don't overlap.
612   assert(stateFalse);
613   return stateFalse;
614 }
615 
616 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
617                                   const Stmt *First, const Stmt *Second) const {
618   ExplodedNode *N = C.generateErrorNode(state);
619   if (!N)
620     return;
621 
622   if (!BT_Overlap)
623     BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
624                                  categories::UnixAPI, "Improper arguments"));
625 
626   // Generate a report for this bug.
627   auto report = std::make_unique<PathSensitiveBugReport>(
628       *BT_Overlap, "Arguments must not be overlapping buffers", N);
629   report->addRange(First->getSourceRange());
630   report->addRange(Second->getSourceRange());
631 
632   C.emitReport(std::move(report));
633 }
634 
635 void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
636                                     const Stmt *S, StringRef WarningMsg) const {
637   if (ExplodedNode *N = C.generateErrorNode(State)) {
638     if (!BT_Null)
639       BT_Null.reset(new BuiltinBug(
640           Filter.CheckNameCStringNullArg, categories::UnixAPI,
641           "Null pointer argument in call to byte string function"));
642 
643     BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Null.get());
644     auto Report = std::make_unique<PathSensitiveBugReport>(*BT, WarningMsg, N);
645     Report->addRange(S->getSourceRange());
646     if (const auto *Ex = dyn_cast<Expr>(S))
647       bugreporter::trackExpressionValue(N, Ex, *Report);
648     C.emitReport(std::move(Report));
649   }
650 }
651 
652 void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
653                                               ProgramStateRef State,
654                                               const Expr *E) const {
655   if (ExplodedNode *N = C.generateErrorNode(State)) {
656     const char *Msg =
657         "Bytes string function accesses uninitialized/garbage values";
658     if (!BT_UninitRead)
659       BT_UninitRead.reset(
660           new BuiltinBug(Filter.CheckNameCStringUninitializedRead,
661                          "Accessing unitialized/garbage values", Msg));
662 
663     BuiltinBug *BT = static_cast<BuiltinBug *>(BT_UninitRead.get());
664 
665     auto Report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
666     Report->addRange(E->getSourceRange());
667     bugreporter::trackExpressionValue(N, E, *Report);
668     C.emitReport(std::move(Report));
669   }
670 }
671 
672 void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
673                                         ProgramStateRef State, const Stmt *S,
674                                         StringRef WarningMsg) const {
675   if (ExplodedNode *N = C.generateErrorNode(State)) {
676     if (!BT_Bounds)
677       BT_Bounds.reset(new BuiltinBug(
678           Filter.CheckCStringOutOfBounds ? Filter.CheckNameCStringOutOfBounds
679                                          : Filter.CheckNameCStringNullArg,
680           "Out-of-bound array access",
681           "Byte string function accesses out-of-bound array element"));
682 
683     BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Bounds.get());
684 
685     // FIXME: It would be nice to eventually make this diagnostic more clear,
686     // e.g., by referencing the original declaration or by saying *why* this
687     // reference is outside the range.
688     auto Report = std::make_unique<PathSensitiveBugReport>(*BT, WarningMsg, N);
689     Report->addRange(S->getSourceRange());
690     C.emitReport(std::move(Report));
691   }
692 }
693 
694 void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
695                                        const Stmt *S,
696                                        StringRef WarningMsg) const {
697   if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
698     if (!BT_NotCString)
699       BT_NotCString.reset(new BuiltinBug(
700           Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
701           "Argument is not a null-terminated string."));
702 
703     auto Report =
704         std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N);
705 
706     Report->addRange(S->getSourceRange());
707     C.emitReport(std::move(Report));
708   }
709 }
710 
711 void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
712                                              ProgramStateRef State) const {
713   if (ExplodedNode *N = C.generateErrorNode(State)) {
714     if (!BT_AdditionOverflow)
715       BT_AdditionOverflow.reset(
716           new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API",
717                          "Sum of expressions causes overflow."));
718 
719     // This isn't a great error message, but this should never occur in real
720     // code anyway -- you'd have to create a buffer longer than a size_t can
721     // represent, which is sort of a contradiction.
722     const char *WarningMsg =
723         "This expression will create a string whose length is too big to "
724         "be represented as a size_t";
725 
726     auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow,
727                                                            WarningMsg, N);
728     C.emitReport(std::move(Report));
729   }
730 }
731 
732 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
733                                                      ProgramStateRef state,
734                                                      NonLoc left,
735                                                      NonLoc right) const {
736   // If out-of-bounds checking is turned off, skip the rest.
737   if (!Filter.CheckCStringOutOfBounds)
738     return state;
739 
740   // If a previous check has failed, propagate the failure.
741   if (!state)
742     return nullptr;
743 
744   SValBuilder &svalBuilder = C.getSValBuilder();
745   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
746 
747   QualType sizeTy = svalBuilder.getContext().getSizeType();
748   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
749   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
750 
751   SVal maxMinusRight;
752   if (isa<nonloc::ConcreteInt>(right)) {
753     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
754                                                  sizeTy);
755   } else {
756     // Try switching the operands. (The order of these two assignments is
757     // important!)
758     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
759                                             sizeTy);
760     left = right;
761   }
762 
763   if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
764     QualType cmpTy = svalBuilder.getConditionType();
765     // If left > max - right, we have an overflow.
766     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
767                                                 *maxMinusRightNL, cmpTy);
768 
769     ProgramStateRef stateOverflow, stateOkay;
770     std::tie(stateOverflow, stateOkay) =
771       state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
772 
773     if (stateOverflow && !stateOkay) {
774       // We have an overflow. Emit a bug report.
775       emitAdditionOverflowBug(C, stateOverflow);
776       return nullptr;
777     }
778 
779     // From now on, assume an overflow didn't occur.
780     assert(stateOkay);
781     state = stateOkay;
782   }
783 
784   return state;
785 }
786 
787 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
788                                                 const MemRegion *MR,
789                                                 SVal strLength) {
790   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
791 
792   MR = MR->StripCasts();
793 
794   switch (MR->getKind()) {
795   case MemRegion::StringRegionKind:
796     // FIXME: This can happen if we strcpy() into a string region. This is
797     // undefined [C99 6.4.5p6], but we should still warn about it.
798     return state;
799 
800   case MemRegion::SymbolicRegionKind:
801   case MemRegion::AllocaRegionKind:
802   case MemRegion::NonParamVarRegionKind:
803   case MemRegion::ParamVarRegionKind:
804   case MemRegion::FieldRegionKind:
805   case MemRegion::ObjCIvarRegionKind:
806     // These are the types we can currently track string lengths for.
807     break;
808 
809   case MemRegion::ElementRegionKind:
810     // FIXME: Handle element regions by upper-bounding the parent region's
811     // string length.
812     return state;
813 
814   default:
815     // Other regions (mostly non-data) can't have a reliable C string length.
816     // For now, just ignore the change.
817     // FIXME: These are rare but not impossible. We should output some kind of
818     // warning for things like strcpy((char[]){'a', 0}, "b");
819     return state;
820   }
821 
822   if (strLength.isUnknown())
823     return state->remove<CStringLength>(MR);
824 
825   return state->set<CStringLength>(MR, strLength);
826 }
827 
828 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
829                                                ProgramStateRef &state,
830                                                const Expr *Ex,
831                                                const MemRegion *MR,
832                                                bool hypothetical) {
833   if (!hypothetical) {
834     // If there's a recorded length, go ahead and return it.
835     const SVal *Recorded = state->get<CStringLength>(MR);
836     if (Recorded)
837       return *Recorded;
838   }
839 
840   // Otherwise, get a new symbol and update the state.
841   SValBuilder &svalBuilder = C.getSValBuilder();
842   QualType sizeTy = svalBuilder.getContext().getSizeType();
843   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
844                                                     MR, Ex, sizeTy,
845                                                     C.getLocationContext(),
846                                                     C.blockCount());
847 
848   if (!hypothetical) {
849     if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
850       // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
851       BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
852       const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
853       llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
854       const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
855                                                         fourInt);
856       NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
857       SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
858                                                 maxLength, sizeTy);
859       state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
860     }
861     state = state->set<CStringLength>(MR, strLength);
862   }
863 
864   return strLength;
865 }
866 
867 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
868                                       const Expr *Ex, SVal Buf,
869                                       bool hypothetical) const {
870   const MemRegion *MR = Buf.getAsRegion();
871   if (!MR) {
872     // If we can't get a region, see if it's something we /know/ isn't a
873     // C string. In the context of locations, the only time we can issue such
874     // a warning is for labels.
875     if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
876       if (Filter.CheckCStringNotNullTerm) {
877         SmallString<120> buf;
878         llvm::raw_svector_ostream os(buf);
879         assert(CurrentFunctionDescription);
880         os << "Argument to " << CurrentFunctionDescription
881            << " is the address of the label '" << Label->getLabel()->getName()
882            << "', which is not a null-terminated string";
883 
884         emitNotCStringBug(C, state, Ex, os.str());
885       }
886       return UndefinedVal();
887     }
888 
889     // If it's not a region and not a label, give up.
890     return UnknownVal();
891   }
892 
893   // If we have a region, strip casts from it and see if we can figure out
894   // its length. For anything we can't figure out, just return UnknownVal.
895   MR = MR->StripCasts();
896 
897   switch (MR->getKind()) {
898   case MemRegion::StringRegionKind: {
899     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
900     // so we can assume that the byte length is the correct C string length.
901     SValBuilder &svalBuilder = C.getSValBuilder();
902     QualType sizeTy = svalBuilder.getContext().getSizeType();
903     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
904     return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);
905   }
906   case MemRegion::SymbolicRegionKind:
907   case MemRegion::AllocaRegionKind:
908   case MemRegion::NonParamVarRegionKind:
909   case MemRegion::ParamVarRegionKind:
910   case MemRegion::FieldRegionKind:
911   case MemRegion::ObjCIvarRegionKind:
912     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
913   case MemRegion::CompoundLiteralRegionKind:
914     // FIXME: Can we track this? Is it necessary?
915     return UnknownVal();
916   case MemRegion::ElementRegionKind:
917     // FIXME: How can we handle this? It's not good enough to subtract the
918     // offset from the base string length; consider "123\x00567" and &a[5].
919     return UnknownVal();
920   default:
921     // Other regions (mostly non-data) can't have a reliable C string length.
922     // In this case, an error is emitted and UndefinedVal is returned.
923     // The caller should always be prepared to handle this case.
924     if (Filter.CheckCStringNotNullTerm) {
925       SmallString<120> buf;
926       llvm::raw_svector_ostream os(buf);
927 
928       assert(CurrentFunctionDescription);
929       os << "Argument to " << CurrentFunctionDescription << " is ";
930 
931       if (SummarizeRegion(os, C.getASTContext(), MR))
932         os << ", which is not a null-terminated string";
933       else
934         os << "not a null-terminated string";
935 
936       emitNotCStringBug(C, state, Ex, os.str());
937     }
938     return UndefinedVal();
939   }
940 }
941 
942 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
943   ProgramStateRef &state, const Expr *expr, SVal val) const {
944 
945   // Get the memory region pointed to by the val.
946   const MemRegion *bufRegion = val.getAsRegion();
947   if (!bufRegion)
948     return nullptr;
949 
950   // Strip casts off the memory region.
951   bufRegion = bufRegion->StripCasts();
952 
953   // Cast the memory region to a string region.
954   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
955   if (!strRegion)
956     return nullptr;
957 
958   // Return the actual string in the string region.
959   return strRegion->getStringLiteral();
960 }
961 
962 bool CStringChecker::IsFirstBufInBound(CheckerContext &C,
963                                        ProgramStateRef state,
964                                        const Expr *FirstBuf,
965                                        const Expr *Size) {
966   // If we do not know that the buffer is long enough we return 'true'.
967   // Otherwise the parent region of this field region would also get
968   // invalidated, which would lead to warnings based on an unknown state.
969 
970   // Originally copied from CheckBufferAccess and CheckLocation.
971   SValBuilder &svalBuilder = C.getSValBuilder();
972   ASTContext &Ctx = svalBuilder.getContext();
973   const LocationContext *LCtx = C.getLocationContext();
974 
975   QualType sizeTy = Size->getType();
976   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
977   SVal BufVal = state->getSVal(FirstBuf, LCtx);
978 
979   SVal LengthVal = state->getSVal(Size, LCtx);
980   std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
981   if (!Length)
982     return true; // cf top comment.
983 
984   // Compute the offset of the last element to be accessed: size-1.
985   NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
986   SVal Offset = svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy);
987   if (Offset.isUnknown())
988     return true; // cf top comment
989   NonLoc LastOffset = Offset.castAs<NonLoc>();
990 
991   // Check that the first buffer is sufficiently long.
992   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
993   std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
994   if (!BufLoc)
995     return true; // cf top comment.
996 
997   SVal BufEnd =
998       svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy);
999 
1000   // Check for out of bound array element access.
1001   const MemRegion *R = BufEnd.getAsRegion();
1002   if (!R)
1003     return true; // cf top comment.
1004 
1005   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
1006   if (!ER)
1007     return true; // cf top comment.
1008 
1009   // FIXME: Does this crash when a non-standard definition
1010   // of a library function is encountered?
1011   assert(ER->getValueType() == C.getASTContext().CharTy &&
1012          "IsFirstBufInBound should only be called with char* ElementRegions");
1013 
1014   // Get the size of the array.
1015   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1016   DefinedOrUnknownSVal SizeDV = getDynamicExtent(state, superReg, svalBuilder);
1017 
1018   // Get the index of the accessed element.
1019   DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1020 
1021   ProgramStateRef StInBound = state->assumeInBound(Idx, SizeDV, true);
1022 
1023   return static_cast<bool>(StInBound);
1024 }
1025 
1026 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
1027                                                  ProgramStateRef state,
1028                                                  const Expr *E, SVal V,
1029                                                  bool IsSourceBuffer,
1030                                                  const Expr *Size) {
1031   std::optional<Loc> L = V.getAs<Loc>();
1032   if (!L)
1033     return state;
1034 
1035   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1036   // some assumptions about the value that CFRefCount can't. Even so, it should
1037   // probably be refactored.
1038   if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1039     const MemRegion *R = MR->getRegion()->StripCasts();
1040 
1041     // Are we dealing with an ElementRegion?  If so, we should be invalidating
1042     // the super-region.
1043     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
1044       R = ER->getSuperRegion();
1045       // FIXME: What about layers of ElementRegions?
1046     }
1047 
1048     // Invalidate this region.
1049     const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1050 
1051     bool CausesPointerEscape = false;
1052     RegionAndSymbolInvalidationTraits ITraits;
1053     // Invalidate and escape only indirect regions accessible through the source
1054     // buffer.
1055     if (IsSourceBuffer) {
1056       ITraits.setTrait(R->getBaseRegion(),
1057                        RegionAndSymbolInvalidationTraits::TK_PreserveContents);
1058       ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
1059       CausesPointerEscape = true;
1060     } else {
1061       const MemRegion::Kind& K = R->getKind();
1062       if (K == MemRegion::FieldRegionKind)
1063         if (Size && IsFirstBufInBound(C, state, E, Size)) {
1064           // If destination buffer is a field region and access is in bound,
1065           // do not invalidate its super region.
1066           ITraits.setTrait(
1067               R,
1068               RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1069         }
1070     }
1071 
1072     return state->invalidateRegions(R, E, C.blockCount(), LCtx,
1073                                     CausesPointerEscape, nullptr, nullptr,
1074                                     &ITraits);
1075   }
1076 
1077   // If we have a non-region value by chance, just remove the binding.
1078   // FIXME: is this necessary or correct? This handles the non-Region
1079   //  cases.  Is it ever valid to store to these?
1080   return state->killBinding(*L);
1081 }
1082 
1083 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1084                                      const MemRegion *MR) {
1085   switch (MR->getKind()) {
1086   case MemRegion::FunctionCodeRegionKind: {
1087     if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())
1088       os << "the address of the function '" << *FD << '\'';
1089     else
1090       os << "the address of a function";
1091     return true;
1092   }
1093   case MemRegion::BlockCodeRegionKind:
1094     os << "block text";
1095     return true;
1096   case MemRegion::BlockDataRegionKind:
1097     os << "a block";
1098     return true;
1099   case MemRegion::CXXThisRegionKind:
1100   case MemRegion::CXXTempObjectRegionKind:
1101     os << "a C++ temp object of type "
1102        << cast<TypedValueRegion>(MR)->getValueType();
1103     return true;
1104   case MemRegion::NonParamVarRegionKind:
1105     os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();
1106     return true;
1107   case MemRegion::ParamVarRegionKind:
1108     os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();
1109     return true;
1110   case MemRegion::FieldRegionKind:
1111     os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();
1112     return true;
1113   case MemRegion::ObjCIvarRegionKind:
1114     os << "an instance variable of type "
1115        << cast<TypedValueRegion>(MR)->getValueType();
1116     return true;
1117   default:
1118     return false;
1119   }
1120 }
1121 
1122 bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
1123                                const Expr *Size, CheckerContext &C,
1124                                ProgramStateRef &State) {
1125   SVal MemVal = C.getSVal(DstBuffer);
1126   SVal SizeVal = C.getSVal(Size);
1127   const MemRegion *MR = MemVal.getAsRegion();
1128   if (!MR)
1129     return false;
1130 
1131   // We're about to model memset by producing a "default binding" in the Store.
1132   // Our current implementation - RegionStore - doesn't support default bindings
1133   // that don't cover the whole base region. So we should first get the offset
1134   // and the base region to figure out whether the offset of buffer is 0.
1135   RegionOffset Offset = MR->getAsOffset();
1136   const MemRegion *BR = Offset.getRegion();
1137 
1138   std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1139   if (!SizeNL)
1140     return false;
1141 
1142   SValBuilder &svalBuilder = C.getSValBuilder();
1143   ASTContext &Ctx = C.getASTContext();
1144 
1145   // void *memset(void *dest, int ch, size_t count);
1146   // For now we can only handle the case of offset is 0 and concrete char value.
1147   if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1148       Offset.getOffset() == 0) {
1149     // Get the base region's size.
1150     DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
1151 
1152     ProgramStateRef StateWholeReg, StateNotWholeReg;
1153     std::tie(StateWholeReg, StateNotWholeReg) =
1154         State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
1155 
1156     // With the semantic of 'memset()', we should convert the CharVal to
1157     // unsigned char.
1158     CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
1159 
1160     ProgramStateRef StateNullChar, StateNonNullChar;
1161     std::tie(StateNullChar, StateNonNullChar) =
1162         assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
1163 
1164     if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1165         !StateNonNullChar) {
1166       // If the 'memset()' acts on the whole region of destination buffer and
1167       // the value of the second argument of 'memset()' is zero, bind the second
1168       // argument's value to the destination buffer with 'default binding'.
1169       // FIXME: Since there is no perfect way to bind the non-zero character, we
1170       // can only deal with zero value here. In the future, we need to deal with
1171       // the binding of non-zero value in the case of whole region.
1172       State = State->bindDefaultZero(svalBuilder.makeLoc(BR),
1173                                      C.getLocationContext());
1174     } else {
1175       // If the destination buffer's extent is not equal to the value of
1176       // third argument, just invalidate buffer.
1177       State = InvalidateBuffer(C, State, DstBuffer, MemVal,
1178                                /*IsSourceBuffer*/ false, Size);
1179     }
1180 
1181     if (StateNullChar && !StateNonNullChar) {
1182       // If the value of the second argument of 'memset()' is zero, set the
1183       // string length of destination buffer to 0 directly.
1184       State = setCStringLength(State, MR,
1185                                svalBuilder.makeZeroVal(Ctx.getSizeType()));
1186     } else if (!StateNullChar && StateNonNullChar) {
1187       SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1188           CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
1189           C.getLocationContext(), C.blockCount());
1190 
1191       // If the value of second argument is not zero, then the string length
1192       // is at least the size argument.
1193       SVal NewStrLenGESize = svalBuilder.evalBinOp(
1194           State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
1195 
1196       State = setCStringLength(
1197           State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
1198           MR, NewStrLen);
1199     }
1200   } else {
1201     // If the offset is not zero and char value is not concrete, we can do
1202     // nothing but invalidate the buffer.
1203     State = InvalidateBuffer(C, State, DstBuffer, MemVal,
1204                              /*IsSourceBuffer*/ false, Size);
1205   }
1206   return true;
1207 }
1208 
1209 //===----------------------------------------------------------------------===//
1210 // evaluation of individual function calls.
1211 //===----------------------------------------------------------------------===//
1212 
1213 void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE,
1214                                     ProgramStateRef state, SizeArgExpr Size,
1215                                     DestinationArgExpr Dest,
1216                                     SourceArgExpr Source, bool Restricted,
1217                                     bool IsMempcpy, CharKind CK) const {
1218   CurrentFunctionDescription = "memory copy function";
1219 
1220   // See if the size argument is zero.
1221   const LocationContext *LCtx = C.getLocationContext();
1222   SVal sizeVal = state->getSVal(Size.Expression, LCtx);
1223   QualType sizeTy = Size.Expression->getType();
1224 
1225   ProgramStateRef stateZeroSize, stateNonZeroSize;
1226   std::tie(stateZeroSize, stateNonZeroSize) =
1227       assumeZero(C, state, sizeVal, sizeTy);
1228 
1229   // Get the value of the Dest.
1230   SVal destVal = state->getSVal(Dest.Expression, LCtx);
1231 
1232   // If the size is zero, there won't be any actual memory access, so
1233   // just bind the return value to the destination buffer and return.
1234   if (stateZeroSize && !stateNonZeroSize) {
1235     stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
1236     C.addTransition(stateZeroSize);
1237     return;
1238   }
1239 
1240   // If the size can be nonzero, we have to check the other arguments.
1241   if (stateNonZeroSize) {
1242     state = stateNonZeroSize;
1243 
1244     // Ensure the destination is not null. If it is NULL there will be a
1245     // NULL pointer dereference.
1246     state = checkNonNull(C, state, Dest, destVal);
1247     if (!state)
1248       return;
1249 
1250     // Get the value of the Src.
1251     SVal srcVal = state->getSVal(Source.Expression, LCtx);
1252 
1253     // Ensure the source is not null. If it is NULL there will be a
1254     // NULL pointer dereference.
1255     state = checkNonNull(C, state, Source, srcVal);
1256     if (!state)
1257       return;
1258 
1259     // Ensure the accesses are valid and that the buffers do not overlap.
1260     state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);
1261     state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);
1262 
1263     if (Restricted)
1264       state = CheckOverlap(C, state, Size, Dest, Source, CK);
1265 
1266     if (!state)
1267       return;
1268 
1269     // If this is mempcpy, get the byte after the last byte copied and
1270     // bind the expr.
1271     if (IsMempcpy) {
1272       // Get the byte after the last byte copied.
1273       SValBuilder &SvalBuilder = C.getSValBuilder();
1274       ASTContext &Ctx = SvalBuilder.getContext();
1275       QualType CharPtrTy = getCharPtrType(Ctx, CK);
1276       SVal DestRegCharVal =
1277           SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());
1278       SVal lastElement = C.getSValBuilder().evalBinOp(
1279           state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());
1280       // If we don't know how much we copied, we can at least
1281       // conjure a return value for later.
1282       if (lastElement.isUnknown())
1283         lastElement = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1284                                                           C.blockCount());
1285 
1286       // The byte after the last byte copied is the return value.
1287       state = state->BindExpr(CE, LCtx, lastElement);
1288     } else {
1289       // All other copies return the destination buffer.
1290       // (Well, bcopy() has a void return type, but this won't hurt.)
1291       state = state->BindExpr(CE, LCtx, destVal);
1292     }
1293 
1294     // Invalidate the destination (regular invalidation without pointer-escaping
1295     // the address of the top-level region).
1296     // FIXME: Even if we can't perfectly model the copy, we should see if we
1297     // can use LazyCompoundVals to copy the source values into the destination.
1298     // This would probably remove any existing bindings past the end of the
1299     // copied region, but that's still an improvement over blank invalidation.
1300     state =
1301         InvalidateBuffer(C, state, Dest.Expression, C.getSVal(Dest.Expression),
1302                          /*IsSourceBuffer*/ false, Size.Expression);
1303 
1304     // Invalidate the source (const-invalidation without const-pointer-escaping
1305     // the address of the top-level region).
1306     state = InvalidateBuffer(C, state, Source.Expression,
1307                              C.getSVal(Source.Expression),
1308                              /*IsSourceBuffer*/ true, nullptr);
1309 
1310     C.addTransition(state);
1311   }
1312 }
1313 
1314 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE,
1315                                 CharKind CK) const {
1316   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1317   // The return value is the address of the destination buffer.
1318   DestinationArgExpr Dest = {CE->getArg(0), 0};
1319   SourceArgExpr Src = {CE->getArg(1), 1};
1320   SizeArgExpr Size = {CE->getArg(2), 2};
1321 
1322   ProgramStateRef State = C.getState();
1323 
1324   constexpr bool IsRestricted = true;
1325   constexpr bool IsMempcpy = false;
1326   evalCopyCommon(C, CE, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);
1327 }
1328 
1329 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE,
1330                                  CharKind CK) const {
1331   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1332   // The return value is a pointer to the byte following the last written byte.
1333   DestinationArgExpr Dest = {CE->getArg(0), 0};
1334   SourceArgExpr Src = {CE->getArg(1), 1};
1335   SizeArgExpr Size = {CE->getArg(2), 2};
1336 
1337   constexpr bool IsRestricted = true;
1338   constexpr bool IsMempcpy = true;
1339   evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy,
1340                  CK);
1341 }
1342 
1343 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE,
1344                                  CharKind CK) const {
1345   // void *memmove(void *dst, const void *src, size_t n);
1346   // The return value is the address of the destination buffer.
1347   DestinationArgExpr Dest = {CE->getArg(0), 0};
1348   SourceArgExpr Src = {CE->getArg(1), 1};
1349   SizeArgExpr Size = {CE->getArg(2), 2};
1350 
1351   constexpr bool IsRestricted = false;
1352   constexpr bool IsMempcpy = false;
1353   evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy,
1354                  CK);
1355 }
1356 
1357 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1358   // void bcopy(const void *src, void *dst, size_t n);
1359   SourceArgExpr Src(CE->getArg(0), 0);
1360   DestinationArgExpr Dest = {CE->getArg(1), 1};
1361   SizeArgExpr Size = {CE->getArg(2), 2};
1362 
1363   constexpr bool IsRestricted = false;
1364   constexpr bool IsMempcpy = false;
1365   evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy,
1366                  CharKind::Regular);
1367 }
1368 
1369 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE,
1370                                 CharKind CK) const {
1371   // int memcmp(const void *s1, const void *s2, size_t n);
1372   CurrentFunctionDescription = "memory comparison function";
1373 
1374   AnyArgExpr Left = {CE->getArg(0), 0};
1375   AnyArgExpr Right = {CE->getArg(1), 1};
1376   SizeArgExpr Size = {CE->getArg(2), 2};
1377 
1378   ProgramStateRef State = C.getState();
1379   SValBuilder &Builder = C.getSValBuilder();
1380   const LocationContext *LCtx = C.getLocationContext();
1381 
1382   // See if the size argument is zero.
1383   SVal sizeVal = State->getSVal(Size.Expression, LCtx);
1384   QualType sizeTy = Size.Expression->getType();
1385 
1386   ProgramStateRef stateZeroSize, stateNonZeroSize;
1387   std::tie(stateZeroSize, stateNonZeroSize) =
1388       assumeZero(C, State, sizeVal, sizeTy);
1389 
1390   // If the size can be zero, the result will be 0 in that case, and we don't
1391   // have to check either of the buffers.
1392   if (stateZeroSize) {
1393     State = stateZeroSize;
1394     State = State->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType()));
1395     C.addTransition(State);
1396   }
1397 
1398   // If the size can be nonzero, we have to check the other arguments.
1399   if (stateNonZeroSize) {
1400     State = stateNonZeroSize;
1401     // If we know the two buffers are the same, we know the result is 0.
1402     // First, get the two buffers' addresses. Another checker will have already
1403     // made sure they're not undefined.
1404     DefinedOrUnknownSVal LV =
1405         State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1406     DefinedOrUnknownSVal RV =
1407         State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1408 
1409     // See if they are the same.
1410     ProgramStateRef SameBuffer, NotSameBuffer;
1411     std::tie(SameBuffer, NotSameBuffer) =
1412         State->assume(Builder.evalEQ(State, LV, RV));
1413 
1414     // If the two arguments are the same buffer, we know the result is 0,
1415     // and we only need to check one size.
1416     if (SameBuffer && !NotSameBuffer) {
1417       State = SameBuffer;
1418       State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);
1419       if (State) {
1420         State =
1421             SameBuffer->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType()));
1422         C.addTransition(State);
1423       }
1424       return;
1425     }
1426 
1427     // If the two arguments might be different buffers, we have to check
1428     // the size of both of them.
1429     assert(NotSameBuffer);
1430     State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);
1431     State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);
1432     if (State) {
1433       // The return value is the comparison result, which we don't know.
1434       SVal CmpV = Builder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1435       State = State->BindExpr(CE, LCtx, CmpV);
1436       C.addTransition(State);
1437     }
1438   }
1439 }
1440 
1441 void CStringChecker::evalstrLength(CheckerContext &C,
1442                                    const CallExpr *CE) const {
1443   // size_t strlen(const char *s);
1444   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1445 }
1446 
1447 void CStringChecker::evalstrnLength(CheckerContext &C,
1448                                     const CallExpr *CE) const {
1449   // size_t strnlen(const char *s, size_t maxlen);
1450   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1451 }
1452 
1453 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1454                                          bool IsStrnlen) const {
1455   CurrentFunctionDescription = "string length function";
1456   ProgramStateRef state = C.getState();
1457   const LocationContext *LCtx = C.getLocationContext();
1458 
1459   if (IsStrnlen) {
1460     const Expr *maxlenExpr = CE->getArg(1);
1461     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1462 
1463     ProgramStateRef stateZeroSize, stateNonZeroSize;
1464     std::tie(stateZeroSize, stateNonZeroSize) =
1465       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1466 
1467     // If the size can be zero, the result will be 0 in that case, and we don't
1468     // have to check the string itself.
1469     if (stateZeroSize) {
1470       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1471       stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1472       C.addTransition(stateZeroSize);
1473     }
1474 
1475     // If the size is GUARANTEED to be zero, we're done!
1476     if (!stateNonZeroSize)
1477       return;
1478 
1479     // Otherwise, record the assumption that the size is nonzero.
1480     state = stateNonZeroSize;
1481   }
1482 
1483   // Check that the string argument is non-null.
1484   AnyArgExpr Arg = {CE->getArg(0), 0};
1485   SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
1486   state = checkNonNull(C, state, Arg, ArgVal);
1487 
1488   if (!state)
1489     return;
1490 
1491   SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
1492 
1493   // If the argument isn't a valid C string, there's no valid state to
1494   // transition to.
1495   if (strLength.isUndef())
1496     return;
1497 
1498   DefinedOrUnknownSVal result = UnknownVal();
1499 
1500   // If the check is for strnlen() then bind the return value to no more than
1501   // the maxlen value.
1502   if (IsStrnlen) {
1503     QualType cmpTy = C.getSValBuilder().getConditionType();
1504 
1505     // It's a little unfortunate to be getting this again,
1506     // but it's not that expensive...
1507     const Expr *maxlenExpr = CE->getArg(1);
1508     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1509 
1510     std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1511     std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1512 
1513     if (strLengthNL && maxlenValNL) {
1514       ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1515 
1516       // Check if the strLength is greater than the maxlen.
1517       std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1518           C.getSValBuilder()
1519               .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1520               .castAs<DefinedOrUnknownSVal>());
1521 
1522       if (stateStringTooLong && !stateStringNotTooLong) {
1523         // If the string is longer than maxlen, return maxlen.
1524         result = *maxlenValNL;
1525       } else if (stateStringNotTooLong && !stateStringTooLong) {
1526         // If the string is shorter than maxlen, return its length.
1527         result = *strLengthNL;
1528       }
1529     }
1530 
1531     if (result.isUnknown()) {
1532       // If we don't have enough information for a comparison, there's
1533       // no guarantee the full string length will actually be returned.
1534       // All we know is the return value is the min of the string length
1535       // and the limit. This is better than nothing.
1536       result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1537                                                    C.blockCount());
1538       NonLoc resultNL = result.castAs<NonLoc>();
1539 
1540       if (strLengthNL) {
1541         state = state->assume(C.getSValBuilder().evalBinOpNN(
1542                                   state, BO_LE, resultNL, *strLengthNL, cmpTy)
1543                                   .castAs<DefinedOrUnknownSVal>(), true);
1544       }
1545 
1546       if (maxlenValNL) {
1547         state = state->assume(C.getSValBuilder().evalBinOpNN(
1548                                   state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1549                                   .castAs<DefinedOrUnknownSVal>(), true);
1550       }
1551     }
1552 
1553   } else {
1554     // This is a plain strlen(), not strnlen().
1555     result = strLength.castAs<DefinedOrUnknownSVal>();
1556 
1557     // If we don't know the length of the string, conjure a return
1558     // value, so it can be used in constraints, at least.
1559     if (result.isUnknown()) {
1560       result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1561                                                    C.blockCount());
1562     }
1563   }
1564 
1565   // Bind the return value.
1566   assert(!result.isUnknown() && "Should have conjured a value by now");
1567   state = state->BindExpr(CE, LCtx, result);
1568   C.addTransition(state);
1569 }
1570 
1571 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1572   // char *strcpy(char *restrict dst, const char *restrict src);
1573   evalStrcpyCommon(C, CE,
1574                    /* ReturnEnd = */ false,
1575                    /* IsBounded = */ false,
1576                    /* appendK = */ ConcatFnKind::none);
1577 }
1578 
1579 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1580   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1581   evalStrcpyCommon(C, CE,
1582                    /* ReturnEnd = */ false,
1583                    /* IsBounded = */ true,
1584                    /* appendK = */ ConcatFnKind::none);
1585 }
1586 
1587 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1588   // char *stpcpy(char *restrict dst, const char *restrict src);
1589   evalStrcpyCommon(C, CE,
1590                    /* ReturnEnd = */ true,
1591                    /* IsBounded = */ false,
1592                    /* appendK = */ ConcatFnKind::none);
1593 }
1594 
1595 void CStringChecker::evalStrlcpy(CheckerContext &C, const CallExpr *CE) const {
1596   // size_t strlcpy(char *dest, const char *src, size_t size);
1597   evalStrcpyCommon(C, CE,
1598                    /* ReturnEnd = */ true,
1599                    /* IsBounded = */ true,
1600                    /* appendK = */ ConcatFnKind::none,
1601                    /* returnPtr = */ false);
1602 }
1603 
1604 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1605   // char *strcat(char *restrict s1, const char *restrict s2);
1606   evalStrcpyCommon(C, CE,
1607                    /* ReturnEnd = */ false,
1608                    /* IsBounded = */ false,
1609                    /* appendK = */ ConcatFnKind::strcat);
1610 }
1611 
1612 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1613   // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1614   evalStrcpyCommon(C, CE,
1615                    /* ReturnEnd = */ false,
1616                    /* IsBounded = */ true,
1617                    /* appendK = */ ConcatFnKind::strcat);
1618 }
1619 
1620 void CStringChecker::evalStrlcat(CheckerContext &C, const CallExpr *CE) const {
1621   // size_t strlcat(char *dst, const char *src, size_t size);
1622   // It will append at most size - strlen(dst) - 1 bytes,
1623   // NULL-terminating the result.
1624   evalStrcpyCommon(C, CE,
1625                    /* ReturnEnd = */ false,
1626                    /* IsBounded = */ true,
1627                    /* appendK = */ ConcatFnKind::strlcat,
1628                    /* returnPtr = */ false);
1629 }
1630 
1631 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1632                                       bool ReturnEnd, bool IsBounded,
1633                                       ConcatFnKind appendK,
1634                                       bool returnPtr) const {
1635   if (appendK == ConcatFnKind::none)
1636     CurrentFunctionDescription = "string copy function";
1637   else
1638     CurrentFunctionDescription = "string concatenation function";
1639 
1640   ProgramStateRef state = C.getState();
1641   const LocationContext *LCtx = C.getLocationContext();
1642 
1643   // Check that the destination is non-null.
1644   DestinationArgExpr Dst = {CE->getArg(0), 0};
1645   SVal DstVal = state->getSVal(Dst.Expression, LCtx);
1646   state = checkNonNull(C, state, Dst, DstVal);
1647   if (!state)
1648     return;
1649 
1650   // Check that the source is non-null.
1651   SourceArgExpr srcExpr = {CE->getArg(1), 1};
1652   SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
1653   state = checkNonNull(C, state, srcExpr, srcVal);
1654   if (!state)
1655     return;
1656 
1657   // Get the string length of the source.
1658   SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
1659   std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1660 
1661   // Get the string length of the destination buffer.
1662   SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
1663   std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1664 
1665   // If the source isn't a valid C string, give up.
1666   if (strLength.isUndef())
1667     return;
1668 
1669   SValBuilder &svalBuilder = C.getSValBuilder();
1670   QualType cmpTy = svalBuilder.getConditionType();
1671   QualType sizeTy = svalBuilder.getContext().getSizeType();
1672 
1673   // These two values allow checking two kinds of errors:
1674   // - actual overflows caused by a source that doesn't fit in the destination
1675   // - potential overflows caused by a bound that could exceed the destination
1676   SVal amountCopied = UnknownVal();
1677   SVal maxLastElementIndex = UnknownVal();
1678   const char *boundWarning = nullptr;
1679 
1680   // FIXME: Why do we choose the srcExpr if the access has no size?
1681   //  Note that the 3rd argument of the call would be the size parameter.
1682   SizeArgExpr SrcExprAsSizeDummy = {srcExpr.Expression, srcExpr.ArgumentIndex};
1683   state = CheckOverlap(
1684       C, state,
1685       (IsBounded ? SizeArgExpr{CE->getArg(2), 2} : SrcExprAsSizeDummy), Dst,
1686       srcExpr);
1687 
1688   if (!state)
1689     return;
1690 
1691   // If the function is strncpy, strncat, etc... it is bounded.
1692   if (IsBounded) {
1693     // Get the max number of characters to copy.
1694     SizeArgExpr lenExpr = {CE->getArg(2), 2};
1695     SVal lenVal = state->getSVal(lenExpr.Expression, LCtx);
1696 
1697     // Protect against misdeclared strncpy().
1698     lenVal =
1699         svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());
1700 
1701     std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1702 
1703     // If we know both values, we might be able to figure out how much
1704     // we're copying.
1705     if (strLengthNL && lenValNL) {
1706       switch (appendK) {
1707       case ConcatFnKind::none:
1708       case ConcatFnKind::strcat: {
1709         ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1710         // Check if the max number to copy is less than the length of the src.
1711         // If the bound is equal to the source length, strncpy won't null-
1712         // terminate the result!
1713         std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1714             svalBuilder
1715                 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1716                 .castAs<DefinedOrUnknownSVal>());
1717 
1718         if (stateSourceTooLong && !stateSourceNotTooLong) {
1719           // Max number to copy is less than the length of the src, so the
1720           // actual strLength copied is the max number arg.
1721           state = stateSourceTooLong;
1722           amountCopied = lenVal;
1723 
1724         } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1725           // The source buffer entirely fits in the bound.
1726           state = stateSourceNotTooLong;
1727           amountCopied = strLength;
1728         }
1729         break;
1730       }
1731       case ConcatFnKind::strlcat:
1732         if (!dstStrLengthNL)
1733           return;
1734 
1735         // amountCopied = min (size - dstLen - 1 , srcLen)
1736         SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1737                                                  *dstStrLengthNL, sizeTy);
1738         if (!isa<NonLoc>(freeSpace))
1739           return;
1740         freeSpace =
1741             svalBuilder.evalBinOp(state, BO_Sub, freeSpace,
1742                                   svalBuilder.makeIntVal(1, sizeTy), sizeTy);
1743         std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1744 
1745         // While unlikely, it is possible that the subtraction is
1746         // too complex to compute, let's check whether it succeeded.
1747         if (!freeSpaceNL)
1748           return;
1749         SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1750             state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);
1751 
1752         ProgramStateRef TrueState, FalseState;
1753         std::tie(TrueState, FalseState) =
1754             state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
1755 
1756         // srcStrLength <= size - dstStrLength -1
1757         if (TrueState && !FalseState) {
1758           amountCopied = strLength;
1759         }
1760 
1761         // srcStrLength > size - dstStrLength -1
1762         if (!TrueState && FalseState) {
1763           amountCopied = freeSpace;
1764         }
1765 
1766         if (TrueState && FalseState)
1767           amountCopied = UnknownVal();
1768         break;
1769       }
1770     }
1771     // We still want to know if the bound is known to be too large.
1772     if (lenValNL) {
1773       switch (appendK) {
1774       case ConcatFnKind::strcat:
1775         // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1776 
1777         // Get the string length of the destination. If the destination is
1778         // memory that can't have a string length, we shouldn't be copying
1779         // into it anyway.
1780         if (dstStrLength.isUndef())
1781           return;
1782 
1783         if (dstStrLengthNL) {
1784           maxLastElementIndex = svalBuilder.evalBinOpNN(
1785               state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);
1786 
1787           boundWarning = "Size argument is greater than the free space in the "
1788                          "destination buffer";
1789         }
1790         break;
1791       case ConcatFnKind::none:
1792       case ConcatFnKind::strlcat:
1793         // For strncpy and strlcat, this is just checking
1794         //  that lenVal <= sizeof(dst).
1795         // (Yes, strncpy and strncat differ in how they treat termination.
1796         // strncat ALWAYS terminates, but strncpy doesn't.)
1797 
1798         // We need a special case for when the copy size is zero, in which
1799         // case strncpy will do no work at all. Our bounds check uses n-1
1800         // as the last element accessed, so n == 0 is problematic.
1801         ProgramStateRef StateZeroSize, StateNonZeroSize;
1802         std::tie(StateZeroSize, StateNonZeroSize) =
1803             assumeZero(C, state, *lenValNL, sizeTy);
1804 
1805         // If the size is known to be zero, we're done.
1806         if (StateZeroSize && !StateNonZeroSize) {
1807           if (returnPtr) {
1808             StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1809           } else {
1810             if (appendK == ConcatFnKind::none) {
1811               // strlcpy returns strlen(src)
1812               StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, strLength);
1813             } else {
1814               // strlcat returns strlen(src) + strlen(dst)
1815               SVal retSize = svalBuilder.evalBinOp(
1816                   state, BO_Add, strLength, dstStrLength, sizeTy);
1817               StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, retSize);
1818             }
1819           }
1820           C.addTransition(StateZeroSize);
1821           return;
1822         }
1823 
1824         // Otherwise, go ahead and figure out the last element we'll touch.
1825         // We don't record the non-zero assumption here because we can't
1826         // be sure. We won't warn on a possible zero.
1827         NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1828         maxLastElementIndex =
1829             svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);
1830         boundWarning = "Size argument is greater than the length of the "
1831                        "destination buffer";
1832         break;
1833       }
1834     }
1835   } else {
1836     // The function isn't bounded. The amount copied should match the length
1837     // of the source buffer.
1838     amountCopied = strLength;
1839   }
1840 
1841   assert(state);
1842 
1843   // This represents the number of characters copied into the destination
1844   // buffer. (It may not actually be the strlen if the destination buffer
1845   // is not terminated.)
1846   SVal finalStrLength = UnknownVal();
1847   SVal strlRetVal = UnknownVal();
1848 
1849   if (appendK == ConcatFnKind::none && !returnPtr) {
1850     // strlcpy returns the sizeof(src)
1851     strlRetVal = strLength;
1852   }
1853 
1854   // If this is an appending function (strcat, strncat...) then set the
1855   // string length to strlen(src) + strlen(dst) since the buffer will
1856   // ultimately contain both.
1857   if (appendK != ConcatFnKind::none) {
1858     // Get the string length of the destination. If the destination is memory
1859     // that can't have a string length, we shouldn't be copying into it anyway.
1860     if (dstStrLength.isUndef())
1861       return;
1862 
1863     if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
1864       strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,
1865                                            *dstStrLengthNL, sizeTy);
1866     }
1867 
1868     std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
1869 
1870     // If we know both string lengths, we might know the final string length.
1871     if (amountCopiedNL && dstStrLengthNL) {
1872       // Make sure the two lengths together don't overflow a size_t.
1873       state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);
1874       if (!state)
1875         return;
1876 
1877       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,
1878                                                *dstStrLengthNL, sizeTy);
1879     }
1880 
1881     // If we couldn't get a single value for the final string length,
1882     // we can at least bound it by the individual lengths.
1883     if (finalStrLength.isUnknown()) {
1884       // Try to get a "hypothetical" string length symbol, which we can later
1885       // set as a real value if that turns out to be the case.
1886       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1887       assert(!finalStrLength.isUndef());
1888 
1889       if (std::optional<NonLoc> finalStrLengthNL =
1890               finalStrLength.getAs<NonLoc>()) {
1891         if (amountCopiedNL && appendK == ConcatFnKind::none) {
1892           // we overwrite dst string with the src
1893           // finalStrLength >= srcStrLength
1894           SVal sourceInResult = svalBuilder.evalBinOpNN(
1895               state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);
1896           state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1897                                 true);
1898           if (!state)
1899             return;
1900         }
1901 
1902         if (dstStrLengthNL && appendK != ConcatFnKind::none) {
1903           // we extend the dst string with the src
1904           // finalStrLength >= dstStrLength
1905           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1906                                                       *finalStrLengthNL,
1907                                                       *dstStrLengthNL,
1908                                                       cmpTy);
1909           state =
1910               state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
1911           if (!state)
1912             return;
1913         }
1914       }
1915     }
1916 
1917   } else {
1918     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1919     // the final string length will match the input string length.
1920     finalStrLength = amountCopied;
1921   }
1922 
1923   SVal Result;
1924 
1925   if (returnPtr) {
1926     // The final result of the function will either be a pointer past the last
1927     // copied element, or a pointer to the start of the destination buffer.
1928     Result = (ReturnEnd ? UnknownVal() : DstVal);
1929   } else {
1930     if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
1931       //strlcpy, strlcat
1932       Result = strlRetVal;
1933     else
1934       Result = finalStrLength;
1935   }
1936 
1937   assert(state);
1938 
1939   // If the destination is a MemRegion, try to check for a buffer overflow and
1940   // record the new string length.
1941   if (std::optional<loc::MemRegionVal> dstRegVal =
1942           DstVal.getAs<loc::MemRegionVal>()) {
1943     QualType ptrTy = Dst.Expression->getType();
1944 
1945     // If we have an exact value on a bounded copy, use that to check for
1946     // overflows, rather than our estimate about how much is actually copied.
1947     if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
1948       SVal maxLastElement =
1949           svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);
1950 
1951       state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);
1952       if (!state)
1953         return;
1954     }
1955 
1956     // Then, if the final length is known...
1957     if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
1958       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1959           *knownStrLength, ptrTy);
1960 
1961       // ...and we haven't checked the bound, we'll check the actual copy.
1962       if (!boundWarning) {
1963         state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);
1964         if (!state)
1965           return;
1966       }
1967 
1968       // If this is a stpcpy-style copy, the last element is the return value.
1969       if (returnPtr && ReturnEnd)
1970         Result = lastElement;
1971     }
1972 
1973     // Invalidate the destination (regular invalidation without pointer-escaping
1974     // the address of the top-level region). This must happen before we set the
1975     // C string length because invalidation will clear the length.
1976     // FIXME: Even if we can't perfectly model the copy, we should see if we
1977     // can use LazyCompoundVals to copy the source values into the destination.
1978     // This would probably remove any existing bindings past the end of the
1979     // string, but that's still an improvement over blank invalidation.
1980     state = InvalidateBuffer(C, state, Dst.Expression, *dstRegVal,
1981                              /*IsSourceBuffer*/ false, nullptr);
1982 
1983     // Invalidate the source (const-invalidation without const-pointer-escaping
1984     // the address of the top-level region).
1985     state = InvalidateBuffer(C, state, srcExpr.Expression, srcVal,
1986                              /*IsSourceBuffer*/ true, nullptr);
1987 
1988     // Set the C string length of the destination, if we know it.
1989     if (IsBounded && (appendK == ConcatFnKind::none)) {
1990       // strncpy is annoying in that it doesn't guarantee to null-terminate
1991       // the result string. If the original string didn't fit entirely inside
1992       // the bound (including the null-terminator), we don't know how long the
1993       // result is.
1994       if (amountCopied != strLength)
1995         finalStrLength = UnknownVal();
1996     }
1997     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1998   }
1999 
2000   assert(state);
2001 
2002   if (returnPtr) {
2003     // If this is a stpcpy-style copy, but we were unable to check for a buffer
2004     // overflow, we still need a result. Conjure a return value.
2005     if (ReturnEnd && Result.isUnknown()) {
2006       Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
2007     }
2008   }
2009   // Set the return value.
2010   state = state->BindExpr(CE, LCtx, Result);
2011   C.addTransition(state);
2012 }
2013 
2014 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
2015   //int strcmp(const char *s1, const char *s2);
2016   evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ false);
2017 }
2018 
2019 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
2020   //int strncmp(const char *s1, const char *s2, size_t n);
2021   evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ false);
2022 }
2023 
2024 void CStringChecker::evalStrcasecmp(CheckerContext &C,
2025     const CallExpr *CE) const {
2026   //int strcasecmp(const char *s1, const char *s2);
2027   evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ true);
2028 }
2029 
2030 void CStringChecker::evalStrncasecmp(CheckerContext &C,
2031     const CallExpr *CE) const {
2032   //int strncasecmp(const char *s1, const char *s2, size_t n);
2033   evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ true);
2034 }
2035 
2036 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
2037     bool IsBounded, bool IgnoreCase) const {
2038   CurrentFunctionDescription = "string comparison function";
2039   ProgramStateRef state = C.getState();
2040   const LocationContext *LCtx = C.getLocationContext();
2041 
2042   // Check that the first string is non-null
2043   AnyArgExpr Left = {CE->getArg(0), 0};
2044   SVal LeftVal = state->getSVal(Left.Expression, LCtx);
2045   state = checkNonNull(C, state, Left, LeftVal);
2046   if (!state)
2047     return;
2048 
2049   // Check that the second string is non-null.
2050   AnyArgExpr Right = {CE->getArg(1), 1};
2051   SVal RightVal = state->getSVal(Right.Expression, LCtx);
2052   state = checkNonNull(C, state, Right, RightVal);
2053   if (!state)
2054     return;
2055 
2056   // Get the string length of the first string or give up.
2057   SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
2058   if (LeftLength.isUndef())
2059     return;
2060 
2061   // Get the string length of the second string or give up.
2062   SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
2063   if (RightLength.isUndef())
2064     return;
2065 
2066   // If we know the two buffers are the same, we know the result is 0.
2067   // First, get the two buffers' addresses. Another checker will have already
2068   // made sure they're not undefined.
2069   DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2070   DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2071 
2072   // See if they are the same.
2073   SValBuilder &svalBuilder = C.getSValBuilder();
2074   DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
2075   ProgramStateRef StSameBuf, StNotSameBuf;
2076   std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
2077 
2078   // If the two arguments might be the same buffer, we know the result is 0,
2079   // and we only need to check one size.
2080   if (StSameBuf) {
2081     StSameBuf = StSameBuf->BindExpr(CE, LCtx,
2082         svalBuilder.makeZeroVal(CE->getType()));
2083     C.addTransition(StSameBuf);
2084 
2085     // If the two arguments are GUARANTEED to be the same, we're done!
2086     if (!StNotSameBuf)
2087       return;
2088   }
2089 
2090   assert(StNotSameBuf);
2091   state = StNotSameBuf;
2092 
2093   // At this point we can go about comparing the two buffers.
2094   // For now, we only do this if they're both known string literals.
2095 
2096   // Attempt to extract string literals from both expressions.
2097   const StringLiteral *LeftStrLiteral =
2098       getCStringLiteral(C, state, Left.Expression, LeftVal);
2099   const StringLiteral *RightStrLiteral =
2100       getCStringLiteral(C, state, Right.Expression, RightVal);
2101   bool canComputeResult = false;
2102   SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
2103       C.blockCount());
2104 
2105   if (LeftStrLiteral && RightStrLiteral) {
2106     StringRef LeftStrRef = LeftStrLiteral->getString();
2107     StringRef RightStrRef = RightStrLiteral->getString();
2108 
2109     if (IsBounded) {
2110       // Get the max number of characters to compare.
2111       const Expr *lenExpr = CE->getArg(2);
2112       SVal lenVal = state->getSVal(lenExpr, LCtx);
2113 
2114       // If the length is known, we can get the right substrings.
2115       if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
2116         // Create substrings of each to compare the prefix.
2117         LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());
2118         RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());
2119         canComputeResult = true;
2120       }
2121     } else {
2122       // This is a normal, unbounded strcmp.
2123       canComputeResult = true;
2124     }
2125 
2126     if (canComputeResult) {
2127       // Real strcmp stops at null characters.
2128       size_t s1Term = LeftStrRef.find('\0');
2129       if (s1Term != StringRef::npos)
2130         LeftStrRef = LeftStrRef.substr(0, s1Term);
2131 
2132       size_t s2Term = RightStrRef.find('\0');
2133       if (s2Term != StringRef::npos)
2134         RightStrRef = RightStrRef.substr(0, s2Term);
2135 
2136       // Use StringRef's comparison methods to compute the actual result.
2137       int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)
2138                                   : LeftStrRef.compare(RightStrRef);
2139 
2140       // The strcmp function returns an integer greater than, equal to, or less
2141       // than zero, [c11, p7.24.4.2].
2142       if (compareRes == 0) {
2143         resultVal = svalBuilder.makeIntVal(compareRes, CE->getType());
2144       }
2145       else {
2146         DefinedSVal zeroVal = svalBuilder.makeIntVal(0, CE->getType());
2147         // Constrain strcmp's result range based on the result of StringRef's
2148         // comparison methods.
2149         BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2150         SVal compareWithZero =
2151           svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
2152               svalBuilder.getConditionType());
2153         DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2154         state = state->assume(compareWithZeroVal, true);
2155       }
2156     }
2157   }
2158 
2159   state = state->BindExpr(CE, LCtx, resultVal);
2160 
2161   // Record this as a possible path.
2162   C.addTransition(state);
2163 }
2164 
2165 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
2166   // char *strsep(char **stringp, const char *delim);
2167   // Verify whether the search string parameter matches the return type.
2168   SourceArgExpr SearchStrPtr = {CE->getArg(0), 0};
2169 
2170   QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2171   if (CharPtrTy.isNull() ||
2172       CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
2173     return;
2174 
2175   CurrentFunctionDescription = "strsep()";
2176   ProgramStateRef State = C.getState();
2177   const LocationContext *LCtx = C.getLocationContext();
2178 
2179   // Check that the search string pointer is non-null (though it may point to
2180   // a null string).
2181   SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
2182   State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
2183   if (!State)
2184     return;
2185 
2186   // Check that the delimiter string is non-null.
2187   AnyArgExpr DelimStr = {CE->getArg(1), 1};
2188   SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
2189   State = checkNonNull(C, State, DelimStr, DelimStrVal);
2190   if (!State)
2191     return;
2192 
2193   SValBuilder &SVB = C.getSValBuilder();
2194   SVal Result;
2195   if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2196     // Get the current value of the search string pointer, as a char*.
2197     Result = State->getSVal(*SearchStrLoc, CharPtrTy);
2198 
2199     // Invalidate the search string, representing the change of one delimiter
2200     // character to NUL.
2201     State = InvalidateBuffer(C, State, SearchStrPtr.Expression, Result,
2202                              /*IsSourceBuffer*/ false, nullptr);
2203 
2204     // Overwrite the search string pointer. The new value is either an address
2205     // further along in the same string, or NULL if there are no more tokens.
2206     State = State->bindLoc(*SearchStrLoc,
2207         SVB.conjureSymbolVal(getTag(),
2208           CE,
2209           LCtx,
2210           CharPtrTy,
2211           C.blockCount()),
2212         LCtx);
2213   } else {
2214     assert(SearchStrVal.isUnknown());
2215     // Conjure a symbolic value. It's the best we can do.
2216     Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
2217   }
2218 
2219   // Set the return value, and finish.
2220   State = State->BindExpr(CE, LCtx, Result);
2221   C.addTransition(State);
2222 }
2223 
2224 // These should probably be moved into a C++ standard library checker.
2225 void CStringChecker::evalStdCopy(CheckerContext &C, const CallExpr *CE) const {
2226   evalStdCopyCommon(C, CE);
2227 }
2228 
2229 void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2230     const CallExpr *CE) const {
2231   evalStdCopyCommon(C, CE);
2232 }
2233 
2234 void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2235     const CallExpr *CE) const {
2236   if (!CE->getArg(2)->getType()->isPointerType())
2237     return;
2238 
2239   ProgramStateRef State = C.getState();
2240 
2241   const LocationContext *LCtx = C.getLocationContext();
2242 
2243   // template <class _InputIterator, class _OutputIterator>
2244   // _OutputIterator
2245   // copy(_InputIterator __first, _InputIterator __last,
2246   //        _OutputIterator __result)
2247 
2248   // Invalidate the destination buffer
2249   const Expr *Dst = CE->getArg(2);
2250   SVal DstVal = State->getSVal(Dst, LCtx);
2251   State = InvalidateBuffer(C, State, Dst, DstVal, /*IsSource=*/false,
2252       /*Size=*/nullptr);
2253 
2254   SValBuilder &SVB = C.getSValBuilder();
2255 
2256   SVal ResultVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
2257   State = State->BindExpr(CE, LCtx, ResultVal);
2258 
2259   C.addTransition(State);
2260 }
2261 
2262 void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const {
2263   // void *memset(void *s, int c, size_t n);
2264   CurrentFunctionDescription = "memory set function";
2265 
2266   DestinationArgExpr Buffer = {CE->getArg(0), 0};
2267   AnyArgExpr CharE = {CE->getArg(1), 1};
2268   SizeArgExpr Size = {CE->getArg(2), 2};
2269 
2270   ProgramStateRef State = C.getState();
2271 
2272   // See if the size argument is zero.
2273   const LocationContext *LCtx = C.getLocationContext();
2274   SVal SizeVal = C.getSVal(Size.Expression);
2275   QualType SizeTy = Size.Expression->getType();
2276 
2277   ProgramStateRef ZeroSize, NonZeroSize;
2278   std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);
2279 
2280   // Get the value of the memory area.
2281   SVal BufferPtrVal = C.getSVal(Buffer.Expression);
2282 
2283   // If the size is zero, there won't be any actual memory access, so
2284   // just bind the return value to the buffer and return.
2285   if (ZeroSize && !NonZeroSize) {
2286     ZeroSize = ZeroSize->BindExpr(CE, LCtx, BufferPtrVal);
2287     C.addTransition(ZeroSize);
2288     return;
2289   }
2290 
2291   // Ensure the memory area is not null.
2292   // If it is NULL there will be a NULL pointer dereference.
2293   State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);
2294   if (!State)
2295     return;
2296 
2297   State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2298   if (!State)
2299     return;
2300 
2301   // According to the values of the arguments, bind the value of the second
2302   // argument to the destination buffer and set string length, or just
2303   // invalidate the destination buffer.
2304   if (!memsetAux(Buffer.Expression, C.getSVal(CharE.Expression),
2305                  Size.Expression, C, State))
2306     return;
2307 
2308   State = State->BindExpr(CE, LCtx, BufferPtrVal);
2309   C.addTransition(State);
2310 }
2311 
2312 void CStringChecker::evalBzero(CheckerContext &C, const CallExpr *CE) const {
2313   CurrentFunctionDescription = "memory clearance function";
2314 
2315   DestinationArgExpr Buffer = {CE->getArg(0), 0};
2316   SizeArgExpr Size = {CE->getArg(1), 1};
2317   SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);
2318 
2319   ProgramStateRef State = C.getState();
2320 
2321   // See if the size argument is zero.
2322   SVal SizeVal = C.getSVal(Size.Expression);
2323   QualType SizeTy = Size.Expression->getType();
2324 
2325   ProgramStateRef StateZeroSize, StateNonZeroSize;
2326   std::tie(StateZeroSize, StateNonZeroSize) =
2327     assumeZero(C, State, SizeVal, SizeTy);
2328 
2329   // If the size is zero, there won't be any actual memory access,
2330   // In this case we just return.
2331   if (StateZeroSize && !StateNonZeroSize) {
2332     C.addTransition(StateZeroSize);
2333     return;
2334   }
2335 
2336   // Get the value of the memory area.
2337   SVal MemVal = C.getSVal(Buffer.Expression);
2338 
2339   // Ensure the memory area is not null.
2340   // If it is NULL there will be a NULL pointer dereference.
2341   State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);
2342   if (!State)
2343     return;
2344 
2345   State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2346   if (!State)
2347     return;
2348 
2349   if (!memsetAux(Buffer.Expression, Zero, Size.Expression, C, State))
2350     return;
2351 
2352   C.addTransition(State);
2353 }
2354 
2355 //===----------------------------------------------------------------------===//
2356 // The driver method, and other Checker callbacks.
2357 //===----------------------------------------------------------------------===//
2358 
2359 CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2360                                                      CheckerContext &C) const {
2361   const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
2362   if (!CE)
2363     return nullptr;
2364 
2365   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
2366   if (!FD)
2367     return nullptr;
2368 
2369   if (StdCopy.matches(Call))
2370     return &CStringChecker::evalStdCopy;
2371   if (StdCopyBackward.matches(Call))
2372     return &CStringChecker::evalStdCopyBackward;
2373 
2374   // Pro-actively check that argument types are safe to do arithmetic upon.
2375   // We do not want to crash if someone accidentally passes a structure
2376   // into, say, a C++ overload of any of these functions. We could not check
2377   // that for std::copy because they may have arguments of other types.
2378   for (auto I : CE->arguments()) {
2379     QualType T = I->getType();
2380     if (!T->isIntegralOrEnumerationType() && !T->isPointerType())
2381       return nullptr;
2382   }
2383 
2384   const FnCheck *Callback = Callbacks.lookup(Call);
2385   if (Callback)
2386     return *Callback;
2387 
2388   return nullptr;
2389 }
2390 
2391 bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2392   FnCheck Callback = identifyCall(Call, C);
2393 
2394   // If the callee isn't a string function, let another checker handle it.
2395   if (!Callback)
2396     return false;
2397 
2398   // Check and evaluate the call.
2399   const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2400   Callback(this, C, CE);
2401 
2402   // If the evaluate call resulted in no change, chain to the next eval call
2403   // handler.
2404   // Note, the custom CString evaluation calls assume that basic safety
2405   // properties are held. However, if the user chooses to turn off some of these
2406   // checks, we ignore the issues and leave the call evaluation to a generic
2407   // handler.
2408   return C.isDifferent();
2409 }
2410 
2411 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2412   // Record string length for char a[] = "abc";
2413   ProgramStateRef state = C.getState();
2414 
2415   for (const auto *I : DS->decls()) {
2416     const VarDecl *D = dyn_cast<VarDecl>(I);
2417     if (!D)
2418       continue;
2419 
2420     // FIXME: Handle array fields of structs.
2421     if (!D->getType()->isArrayType())
2422       continue;
2423 
2424     const Expr *Init = D->getInit();
2425     if (!Init)
2426       continue;
2427     if (!isa<StringLiteral>(Init))
2428       continue;
2429 
2430     Loc VarLoc = state->getLValue(D, C.getLocationContext());
2431     const MemRegion *MR = VarLoc.getAsRegion();
2432     if (!MR)
2433       continue;
2434 
2435     SVal StrVal = C.getSVal(Init);
2436     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2437     DefinedOrUnknownSVal strLength =
2438       getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2439 
2440     state = state->set<CStringLength>(MR, strLength);
2441   }
2442 
2443   C.addTransition(state);
2444 }
2445 
2446 ProgramStateRef
2447 CStringChecker::checkRegionChanges(ProgramStateRef state,
2448     const InvalidatedSymbols *,
2449     ArrayRef<const MemRegion *> ExplicitRegions,
2450     ArrayRef<const MemRegion *> Regions,
2451     const LocationContext *LCtx,
2452     const CallEvent *Call) const {
2453   CStringLengthTy Entries = state->get<CStringLength>();
2454   if (Entries.isEmpty())
2455     return state;
2456 
2457   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2458   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2459 
2460   // First build sets for the changed regions and their super-regions.
2461   for (ArrayRef<const MemRegion *>::iterator
2462       I = Regions.begin(), E = Regions.end(); I != E; ++I) {
2463     const MemRegion *MR = *I;
2464     Invalidated.insert(MR);
2465 
2466     SuperRegions.insert(MR);
2467     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2468       MR = SR->getSuperRegion();
2469       SuperRegions.insert(MR);
2470     }
2471   }
2472 
2473   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2474 
2475   // Then loop over the entries in the current state.
2476   for (CStringLengthTy::iterator I = Entries.begin(),
2477       E = Entries.end(); I != E; ++I) {
2478     const MemRegion *MR = I.getKey();
2479 
2480     // Is this entry for a super-region of a changed region?
2481     if (SuperRegions.count(MR)) {
2482       Entries = F.remove(Entries, MR);
2483       continue;
2484     }
2485 
2486     // Is this entry for a sub-region of a changed region?
2487     const MemRegion *Super = MR;
2488     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2489       Super = SR->getSuperRegion();
2490       if (Invalidated.count(Super)) {
2491         Entries = F.remove(Entries, MR);
2492         break;
2493       }
2494     }
2495   }
2496 
2497   return state->set<CStringLength>(Entries);
2498 }
2499 
2500 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2501     SymbolReaper &SR) const {
2502   // Mark all symbols in our string length map as valid.
2503   CStringLengthTy Entries = state->get<CStringLength>();
2504 
2505   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2506       I != E; ++I) {
2507     SVal Len = I.getData();
2508 
2509     for (SymExpr::symbol_iterator si = Len.symbol_begin(),
2510         se = Len.symbol_end(); si != se; ++si)
2511       SR.markInUse(*si);
2512   }
2513 }
2514 
2515 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2516     CheckerContext &C) const {
2517   ProgramStateRef state = C.getState();
2518   CStringLengthTy Entries = state->get<CStringLength>();
2519   if (Entries.isEmpty())
2520     return;
2521 
2522   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2523   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2524       I != E; ++I) {
2525     SVal Len = I.getData();
2526     if (SymbolRef Sym = Len.getAsSymbol()) {
2527       if (SR.isDead(Sym))
2528         Entries = F.remove(Entries, I.getKey());
2529     }
2530   }
2531 
2532   state = state->set<CStringLength>(Entries);
2533   C.addTransition(state);
2534 }
2535 
2536 void ento::registerCStringModeling(CheckerManager &Mgr) {
2537   Mgr.registerChecker<CStringChecker>();
2538 }
2539 
2540 bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) {
2541   return true;
2542 }
2543 
2544 #define REGISTER_CHECKER(name)                                                 \
2545   void ento::register##name(CheckerManager &mgr) {                             \
2546     CStringChecker *checker = mgr.getChecker<CStringChecker>();                \
2547     checker->Filter.Check##name = true;                                        \
2548     checker->Filter.CheckName##name = mgr.getCurrentCheckerName();             \
2549   }                                                                            \
2550                                                                                \
2551   bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
2552 
2553 REGISTER_CHECKER(CStringNullArg)
2554 REGISTER_CHECKER(CStringOutOfBounds)
2555 REGISTER_CHECKER(CStringBufferOverlap)
2556 REGISTER_CHECKER(CStringNotNullTerm)
2557 REGISTER_CHECKER(CStringUninitializedRead)
2558