1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This defines CStringChecker, which is an assortment of checks on calls
10 // to functions in <string.h>.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "InterCheckerAPI.h"
15 #include "clang/AST/OperationKinds.h"
16 #include "clang/Basic/Builtins.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
19 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
20 #include "clang/StaticAnalyzer/Core/Checker.h"
21 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
29 #include "llvm/ADT/APSInt.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include <functional>
35 #include <optional>
36
37 using namespace clang;
38 using namespace ento;
39 using namespace std::placeholders;
40
41 namespace {
42 struct AnyArgExpr {
43 const Expr *Expression;
44 unsigned ArgumentIndex;
45 };
46 struct SourceArgExpr : AnyArgExpr {};
47 struct DestinationArgExpr : AnyArgExpr {};
48 struct SizeArgExpr : AnyArgExpr {};
49
50 using ErrorMessage = SmallString<128>;
51 enum class AccessKind { write, read };
52
createOutOfBoundErrorMsg(StringRef FunctionDescription,AccessKind Access)53 static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
54 AccessKind Access) {
55 ErrorMessage Message;
56 llvm::raw_svector_ostream Os(Message);
57
58 // Function classification like: Memory copy function
59 Os << toUppercase(FunctionDescription.front())
60 << &FunctionDescription.data()[1];
61
62 if (Access == AccessKind::write) {
63 Os << " overflows the destination buffer";
64 } else { // read access
65 Os << " accesses out-of-bound array element";
66 }
67
68 return Message;
69 }
70
71 enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
72
73 enum class CharKind { Regular = 0, Wide };
74 constexpr CharKind CK_Regular = CharKind::Regular;
75 constexpr CharKind CK_Wide = CharKind::Wide;
76
getCharPtrType(ASTContext & Ctx,CharKind CK)77 static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
78 return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy
79 : Ctx.WideCharTy);
80 }
81
82 class CStringChecker : public Checker< eval::Call,
83 check::PreStmt<DeclStmt>,
84 check::LiveSymbols,
85 check::DeadSymbols,
86 check::RegionChanges
87 > {
88 mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
89 BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
90
91 mutable const char *CurrentFunctionDescription = nullptr;
92
93 public:
94 /// The filter is used to filter out the diagnostics which are not enabled by
95 /// the user.
96 struct CStringChecksFilter {
97 bool CheckCStringNullArg = false;
98 bool CheckCStringOutOfBounds = false;
99 bool CheckCStringBufferOverlap = false;
100 bool CheckCStringNotNullTerm = false;
101 bool CheckCStringUninitializedRead = false;
102
103 CheckerNameRef CheckNameCStringNullArg;
104 CheckerNameRef CheckNameCStringOutOfBounds;
105 CheckerNameRef CheckNameCStringBufferOverlap;
106 CheckerNameRef CheckNameCStringNotNullTerm;
107 CheckerNameRef CheckNameCStringUninitializedRead;
108 };
109
110 CStringChecksFilter Filter;
111
getTag()112 static void *getTag() { static int tag; return &tag; }
113
114 bool evalCall(const CallEvent &Call, CheckerContext &C) const;
115 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
116 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
117 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
118
119 ProgramStateRef
120 checkRegionChanges(ProgramStateRef state,
121 const InvalidatedSymbols *,
122 ArrayRef<const MemRegion *> ExplicitRegions,
123 ArrayRef<const MemRegion *> Regions,
124 const LocationContext *LCtx,
125 const CallEvent *Call) const;
126
127 using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
128 const CallEvent &)>;
129
130 CallDescriptionMap<FnCheck> Callbacks = {
131 {{CDM::CLibraryMaybeHardened, {"memcpy"}, 3},
132 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
133 {{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3},
134 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
135 {{CDM::CLibraryMaybeHardened, {"mempcpy"}, 3},
136 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
137 {{CDM::CLibraryMaybeHardened, {"wmempcpy"}, 3},
138 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
139 {{CDM::CLibrary, {"memcmp"}, 3},
140 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
141 {{CDM::CLibrary, {"wmemcmp"}, 3},
142 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
143 {{CDM::CLibraryMaybeHardened, {"memmove"}, 3},
144 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
145 {{CDM::CLibraryMaybeHardened, {"wmemmove"}, 3},
146 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
147 {{CDM::CLibraryMaybeHardened, {"memset"}, 3},
148 &CStringChecker::evalMemset},
149 {{CDM::CLibrary, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
150 // FIXME: C23 introduces 'memset_explicit', maybe also model that
151 {{CDM::CLibraryMaybeHardened, {"strcpy"}, 2},
152 &CStringChecker::evalStrcpy},
153 {{CDM::CLibraryMaybeHardened, {"strncpy"}, 3},
154 &CStringChecker::evalStrncpy},
155 {{CDM::CLibraryMaybeHardened, {"stpcpy"}, 2},
156 &CStringChecker::evalStpcpy},
157 {{CDM::CLibraryMaybeHardened, {"strlcpy"}, 3},
158 &CStringChecker::evalStrlcpy},
159 {{CDM::CLibraryMaybeHardened, {"strcat"}, 2},
160 &CStringChecker::evalStrcat},
161 {{CDM::CLibraryMaybeHardened, {"strncat"}, 3},
162 &CStringChecker::evalStrncat},
163 {{CDM::CLibraryMaybeHardened, {"strlcat"}, 3},
164 &CStringChecker::evalStrlcat},
165 {{CDM::CLibraryMaybeHardened, {"strlen"}, 1},
166 &CStringChecker::evalstrLength},
167 {{CDM::CLibrary, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
168 {{CDM::CLibraryMaybeHardened, {"strnlen"}, 2},
169 &CStringChecker::evalstrnLength},
170 {{CDM::CLibrary, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
171 {{CDM::CLibrary, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
172 {{CDM::CLibrary, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
173 {{CDM::CLibrary, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
174 {{CDM::CLibrary, {"strncasecmp"}, 3}, &CStringChecker::evalStrncasecmp},
175 {{CDM::CLibrary, {"strsep"}, 2}, &CStringChecker::evalStrsep},
176 {{CDM::CLibrary, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
177 {{CDM::CLibrary, {"bcmp"}, 3},
178 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
179 {{CDM::CLibrary, {"bzero"}, 2}, &CStringChecker::evalBzero},
180 {{CDM::CLibraryMaybeHardened, {"explicit_bzero"}, 2},
181 &CStringChecker::evalBzero},
182
183 // When recognizing calls to the following variadic functions, we accept
184 // any number of arguments in the call (std::nullopt = accept any
185 // number), but check that in the declaration there are 2 and 3
186 // parameters respectively. (Note that the parameter count does not
187 // include the "...". Calls where the number of arguments is too small
188 // will be discarded by the callback.)
189 {{CDM::CLibraryMaybeHardened, {"sprintf"}, std::nullopt, 2},
190 &CStringChecker::evalSprintf},
191 {{CDM::CLibraryMaybeHardened, {"snprintf"}, std::nullopt, 3},
192 &CStringChecker::evalSnprintf},
193 };
194
195 // These require a bit of special handling.
196 CallDescription StdCopy{CDM::SimpleFunc, {"std", "copy"}, 3},
197 StdCopyBackward{CDM::SimpleFunc, {"std", "copy_backward"}, 3};
198
199 FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
200 void evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
201 void evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
202 void evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
203 void evalBcopy(CheckerContext &C, const CallEvent &Call) const;
204 void evalCopyCommon(CheckerContext &C, const CallEvent &Call,
205 ProgramStateRef state, SizeArgExpr Size,
206 DestinationArgExpr Dest, SourceArgExpr Source,
207 bool Restricted, bool IsMempcpy, CharKind CK) const;
208
209 void evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
210
211 void evalstrLength(CheckerContext &C, const CallEvent &Call) const;
212 void evalstrnLength(CheckerContext &C, const CallEvent &Call) const;
213 void evalstrLengthCommon(CheckerContext &C, const CallEvent &Call,
214 bool IsStrnlen = false) const;
215
216 void evalStrcpy(CheckerContext &C, const CallEvent &Call) const;
217 void evalStrncpy(CheckerContext &C, const CallEvent &Call) const;
218 void evalStpcpy(CheckerContext &C, const CallEvent &Call) const;
219 void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const;
220 void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
221 bool ReturnEnd, bool IsBounded, ConcatFnKind appendK,
222 bool returnPtr = true) const;
223
224 void evalStrcat(CheckerContext &C, const CallEvent &Call) const;
225 void evalStrncat(CheckerContext &C, const CallEvent &Call) const;
226 void evalStrlcat(CheckerContext &C, const CallEvent &Call) const;
227
228 void evalStrcmp(CheckerContext &C, const CallEvent &Call) const;
229 void evalStrncmp(CheckerContext &C, const CallEvent &Call) const;
230 void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const;
231 void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const;
232 void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
233 bool IsBounded = false, bool IgnoreCase = false) const;
234
235 void evalStrsep(CheckerContext &C, const CallEvent &Call) const;
236
237 void evalStdCopy(CheckerContext &C, const CallEvent &Call) const;
238 void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const;
239 void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const;
240 void evalMemset(CheckerContext &C, const CallEvent &Call) const;
241 void evalBzero(CheckerContext &C, const CallEvent &Call) const;
242
243 void evalSprintf(CheckerContext &C, const CallEvent &Call) const;
244 void evalSnprintf(CheckerContext &C, const CallEvent &Call) const;
245 void evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
246 bool IsBounded) const;
247
248 // Utility methods
249 std::pair<ProgramStateRef , ProgramStateRef >
250 static assumeZero(CheckerContext &C,
251 ProgramStateRef state, SVal V, QualType Ty);
252
253 static ProgramStateRef setCStringLength(ProgramStateRef state,
254 const MemRegion *MR,
255 SVal strLength);
256 static SVal getCStringLengthForRegion(CheckerContext &C,
257 ProgramStateRef &state,
258 const Expr *Ex,
259 const MemRegion *MR,
260 bool hypothetical);
261 SVal getCStringLength(CheckerContext &C,
262 ProgramStateRef &state,
263 const Expr *Ex,
264 SVal Buf,
265 bool hypothetical = false) const;
266
267 const StringLiteral *getCStringLiteral(CheckerContext &C,
268 ProgramStateRef &state,
269 const Expr *expr,
270 SVal val) const;
271
272 /// Invalidate the destination buffer determined by characters copied.
273 static ProgramStateRef
274 invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
275 const Expr *BufE, SVal BufV, SVal SizeV,
276 QualType SizeTy);
277
278 /// Operation never overflows, do not invalidate the super region.
279 static ProgramStateRef invalidateDestinationBufferNeverOverflows(
280 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
281
282 /// We do not know whether the operation can overflow (e.g. size is unknown),
283 /// invalidate the super region and escape related pointers.
284 static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
285 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
286
287 /// Invalidate the source buffer for escaping pointers.
288 static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
289 ProgramStateRef S,
290 const Expr *BufE, SVal BufV);
291
292 /// @param InvalidationTraitOperations Determine how to invlidate the
293 /// MemRegion by setting the invalidation traits. Return true to cause pointer
294 /// escape, or false otherwise.
295 static ProgramStateRef invalidateBufferAux(
296 CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V,
297 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
298 const MemRegion *)>
299 InvalidationTraitOperations);
300
301 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
302 const MemRegion *MR);
303
304 static bool memsetAux(const Expr *DstBuffer, SVal CharE,
305 const Expr *Size, CheckerContext &C,
306 ProgramStateRef &State);
307
308 // Re-usable checks
309 ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
310 AnyArgExpr Arg, SVal l) const;
311 // Check whether the origin region behind \p Element (like the actual array
312 // region \p Element is from) is initialized.
313 ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
314 AnyArgExpr Buffer, SVal Element, SVal Size) const;
315 ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
316 AnyArgExpr Buffer, SVal Element,
317 AccessKind Access,
318 CharKind CK = CharKind::Regular) const;
319 ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
320 AnyArgExpr Buffer, SizeArgExpr Size,
321 AccessKind Access,
322 CharKind CK = CharKind::Regular) const;
323 ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
324 SizeArgExpr Size, AnyArgExpr First,
325 AnyArgExpr Second,
326 CharKind CK = CharKind::Regular) const;
327 void emitOverlapBug(CheckerContext &C,
328 ProgramStateRef state,
329 const Stmt *First,
330 const Stmt *Second) const;
331
332 void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
333 StringRef WarningMsg) const;
334 void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
335 const Stmt *S, StringRef WarningMsg) const;
336 void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
337 const Stmt *S, StringRef WarningMsg) const;
338 void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
339 void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
340 const Expr *E, StringRef Msg) const;
341 ProgramStateRef checkAdditionOverflow(CheckerContext &C,
342 ProgramStateRef state,
343 NonLoc left,
344 NonLoc right) const;
345
346 // Return true if the destination buffer of the copy function may be in bound.
347 // Expects SVal of Size to be positive and unsigned.
348 // Expects SVal of FirstBuf to be a FieldRegion.
349 static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
350 SVal BufVal, QualType BufTy, SVal LengthVal,
351 QualType LengthTy);
352 };
353
354 } //end anonymous namespace
355
REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength,const MemRegion *,SVal)356 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
357
358 //===----------------------------------------------------------------------===//
359 // Individual checks and utility methods.
360 //===----------------------------------------------------------------------===//
361
362 std::pair<ProgramStateRef, ProgramStateRef>
363 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
364 QualType Ty) {
365 std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
366 if (!val)
367 return std::pair<ProgramStateRef, ProgramStateRef>(State, State);
368
369 SValBuilder &svalBuilder = C.getSValBuilder();
370 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
371 return State->assume(svalBuilder.evalEQ(State, *val, zero));
372 }
373
checkNonNull(CheckerContext & C,ProgramStateRef State,AnyArgExpr Arg,SVal l) const374 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
375 ProgramStateRef State,
376 AnyArgExpr Arg, SVal l) const {
377 // If a previous check has failed, propagate the failure.
378 if (!State)
379 return nullptr;
380
381 ProgramStateRef stateNull, stateNonNull;
382 std::tie(stateNull, stateNonNull) =
383 assumeZero(C, State, l, Arg.Expression->getType());
384
385 if (stateNull && !stateNonNull) {
386 if (Filter.CheckCStringNullArg) {
387 SmallString<80> buf;
388 llvm::raw_svector_ostream OS(buf);
389 assert(CurrentFunctionDescription);
390 OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
391 << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "
392 << CurrentFunctionDescription;
393
394 emitNullArgBug(C, stateNull, Arg.Expression, OS.str());
395 }
396 return nullptr;
397 }
398
399 // From here on, assume that the value is non-null.
400 assert(stateNonNull);
401 return stateNonNull;
402 }
403
getIndex(ProgramStateRef State,const ElementRegion * ER,CharKind CK)404 static std::optional<NonLoc> getIndex(ProgramStateRef State,
405 const ElementRegion *ER, CharKind CK) {
406 SValBuilder &SVB = State->getStateManager().getSValBuilder();
407 ASTContext &Ctx = SVB.getContext();
408
409 if (CK == CharKind::Regular) {
410 if (ER->getValueType() != Ctx.CharTy)
411 return {};
412 return ER->getIndex();
413 }
414
415 if (ER->getValueType() != Ctx.WideCharTy)
416 return {};
417
418 QualType SizeTy = Ctx.getSizeType();
419 NonLoc WideSize =
420 SVB.makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
421 SizeTy)
422 .castAs<NonLoc>();
423 SVal Offset =
424 SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
425 if (Offset.isUnknown())
426 return {};
427 return Offset.castAs<NonLoc>();
428 }
429
430 // Basically 1 -> 1st, 12 -> 12th, etc.
printIdxWithOrdinalSuffix(llvm::raw_ostream & Os,unsigned Idx)431 static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
432 Os << Idx << llvm::getOrdinalSuffix(Idx);
433 }
434
checkInit(CheckerContext & C,ProgramStateRef State,AnyArgExpr Buffer,SVal Element,SVal Size) const435 ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
436 ProgramStateRef State,
437 AnyArgExpr Buffer, SVal Element,
438 SVal Size) const {
439
440 // If a previous check has failed, propagate the failure.
441 if (!State)
442 return nullptr;
443
444 const MemRegion *R = Element.getAsRegion();
445 const auto *ER = dyn_cast_or_null<ElementRegion>(R);
446 if (!ER)
447 return State;
448
449 const auto *SuperR = ER->getSuperRegion()->getAs<TypedValueRegion>();
450 if (!SuperR)
451 return State;
452
453 // FIXME: We ought to able to check objects as well. Maybe
454 // UninitializedObjectChecker could help?
455 if (!SuperR->getValueType()->isArrayType())
456 return State;
457
458 SValBuilder &SVB = C.getSValBuilder();
459 ASTContext &Ctx = SVB.getContext();
460
461 const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType());
462 const NonLoc Zero = SVB.makeZeroArrayIndex();
463
464 std::optional<Loc> FirstElementVal =
465 State->getLValue(ElemTy, Zero, loc::MemRegionVal(SuperR)).getAs<Loc>();
466 if (!FirstElementVal)
467 return State;
468
469 // Ensure that we wouldn't read uninitialized value.
470 if (Filter.CheckCStringUninitializedRead &&
471 State->getSVal(*FirstElementVal).isUndef()) {
472 llvm::SmallString<258> Buf;
473 llvm::raw_svector_ostream OS(Buf);
474 OS << "The first element of the ";
475 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
476 OS << " argument is undefined";
477 emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
478 return nullptr;
479 }
480
481 // We won't check whether the entire region is fully initialized -- lets just
482 // check that the first and the last element is. So, onto checking the last
483 // element:
484 const QualType IdxTy = SVB.getArrayIndexType();
485
486 NonLoc ElemSize =
487 SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
488 .castAs<NonLoc>();
489
490 // FIXME: Check that the size arg to the cstring function is divisible by
491 // size of the actual element type?
492
493 // The type of the argument to the cstring function is either char or wchar,
494 // but thats not the type of the original array (or memory region).
495 // Suppose the following:
496 // int t[5];
497 // memcpy(dst, t, sizeof(t) / sizeof(t[0]));
498 // When checking whether t is fully initialized, we see it as char array of
499 // size sizeof(int)*5. If we check the last element as a character, we read
500 // the last byte of an integer, which will be undefined. But just because
501 // that value is undefined, it doesn't mean that the element is uninitialized!
502 // For this reason, we need to retrieve the actual last element with the
503 // correct type.
504
505 // Divide the size argument to the cstring function by the actual element
506 // type. This value will be size of the array, or the index to the
507 // past-the-end element.
508 std::optional<NonLoc> Offset =
509 SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
510 IdxTy)
511 .getAs<NonLoc>();
512
513 // Retrieve the index of the last element.
514 const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs<NonLoc>();
515 SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
516
517 if (!Offset)
518 return State;
519
520 SVal LastElementVal =
521 State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(SuperR));
522 if (!isa<Loc>(LastElementVal))
523 return State;
524
525 if (Filter.CheckCStringUninitializedRead &&
526 State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
527 const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
528 // If we can't get emit a sensible last element index, just bail out --
529 // prefer to emit nothing in favour of emitting garbage quality reports.
530 if (!IdxInt) {
531 C.addSink();
532 return nullptr;
533 }
534 llvm::SmallString<258> Buf;
535 llvm::raw_svector_ostream OS(Buf);
536 OS << "The last accessed element (at index ";
537 OS << IdxInt->getExtValue();
538 OS << ") in the ";
539 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
540 OS << " argument is undefined";
541 emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
542 return nullptr;
543 }
544 return State;
545 }
546
547 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
CheckLocation(CheckerContext & C,ProgramStateRef state,AnyArgExpr Buffer,SVal Element,AccessKind Access,CharKind CK) const548 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
549 ProgramStateRef state,
550 AnyArgExpr Buffer, SVal Element,
551 AccessKind Access,
552 CharKind CK) const {
553
554 // If a previous check has failed, propagate the failure.
555 if (!state)
556 return nullptr;
557
558 // Check for out of bound array element access.
559 const MemRegion *R = Element.getAsRegion();
560 if (!R)
561 return state;
562
563 const auto *ER = dyn_cast<ElementRegion>(R);
564 if (!ER)
565 return state;
566
567 // Get the index of the accessed element.
568 std::optional<NonLoc> Idx = getIndex(state, ER, CK);
569 if (!Idx)
570 return state;
571
572 // Get the size of the array.
573 const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
574 DefinedOrUnknownSVal Size =
575 getDynamicExtent(state, superReg, C.getSValBuilder());
576
577 auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
578 if (StOutBound && !StInBound) {
579 // These checks are either enabled by the CString out-of-bounds checker
580 // explicitly or implicitly by the Malloc checker.
581 // In the latter case we only do modeling but do not emit warning.
582 if (!Filter.CheckCStringOutOfBounds)
583 return nullptr;
584
585 // Emit a bug report.
586 ErrorMessage Message =
587 createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
588 emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
589 return nullptr;
590 }
591
592 // Array bound check succeeded. From this point forward the array bound
593 // should always succeed.
594 return StInBound;
595 }
596
597 ProgramStateRef
CheckBufferAccess(CheckerContext & C,ProgramStateRef State,AnyArgExpr Buffer,SizeArgExpr Size,AccessKind Access,CharKind CK) const598 CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
599 AnyArgExpr Buffer, SizeArgExpr Size,
600 AccessKind Access, CharKind CK) const {
601 // If a previous check has failed, propagate the failure.
602 if (!State)
603 return nullptr;
604
605 SValBuilder &svalBuilder = C.getSValBuilder();
606 ASTContext &Ctx = svalBuilder.getContext();
607
608 QualType SizeTy = Size.Expression->getType();
609 QualType PtrTy = getCharPtrType(Ctx, CK);
610
611 // Check that the first buffer is non-null.
612 SVal BufVal = C.getSVal(Buffer.Expression);
613 State = checkNonNull(C, State, Buffer, BufVal);
614 if (!State)
615 return nullptr;
616
617 // If out-of-bounds checking is turned off, skip the rest.
618 if (!Filter.CheckCStringOutOfBounds)
619 return State;
620
621 SVal BufStart =
622 svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());
623
624 // Check if the first byte of the buffer is accessible.
625 State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
626
627 if (!State)
628 return nullptr;
629
630 // Get the access length and make sure it is known.
631 // FIXME: This assumes the caller has already checked that the access length
632 // is positive. And that it's unsigned.
633 SVal LengthVal = C.getSVal(Size.Expression);
634 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
635 if (!Length)
636 return State;
637
638 // Compute the offset of the last element to be accessed: size-1.
639 NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();
640 SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);
641 if (Offset.isUnknown())
642 return nullptr;
643 NonLoc LastOffset = Offset.castAs<NonLoc>();
644
645 // Check that the first buffer is sufficiently long.
646 if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
647
648 SVal BufEnd =
649 svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
650 State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
651 if (Access == AccessKind::read)
652 State = checkInit(C, State, Buffer, BufEnd, *Length);
653
654 // If the buffer isn't large enough, abort.
655 if (!State)
656 return nullptr;
657 }
658
659 // Large enough or not, return this state!
660 return State;
661 }
662
CheckOverlap(CheckerContext & C,ProgramStateRef state,SizeArgExpr Size,AnyArgExpr First,AnyArgExpr Second,CharKind CK) const663 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
664 ProgramStateRef state,
665 SizeArgExpr Size, AnyArgExpr First,
666 AnyArgExpr Second,
667 CharKind CK) const {
668 if (!Filter.CheckCStringBufferOverlap)
669 return state;
670
671 // Do a simple check for overlap: if the two arguments are from the same
672 // buffer, see if the end of the first is greater than the start of the second
673 // or vice versa.
674
675 // If a previous check has failed, propagate the failure.
676 if (!state)
677 return nullptr;
678
679 ProgramStateRef stateTrue, stateFalse;
680
681 // Assume different address spaces cannot overlap.
682 if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
683 Second.Expression->getType()->getPointeeType().getAddressSpace())
684 return state;
685
686 // Get the buffer values and make sure they're known locations.
687 const LocationContext *LCtx = C.getLocationContext();
688 SVal firstVal = state->getSVal(First.Expression, LCtx);
689 SVal secondVal = state->getSVal(Second.Expression, LCtx);
690
691 std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
692 if (!firstLoc)
693 return state;
694
695 std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
696 if (!secondLoc)
697 return state;
698
699 // Are the two values the same?
700 SValBuilder &svalBuilder = C.getSValBuilder();
701 std::tie(stateTrue, stateFalse) =
702 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
703
704 if (stateTrue && !stateFalse) {
705 // If the values are known to be equal, that's automatically an overlap.
706 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
707 return nullptr;
708 }
709
710 // assume the two expressions are not equal.
711 assert(stateFalse);
712 state = stateFalse;
713
714 // Which value comes first?
715 QualType cmpTy = svalBuilder.getConditionType();
716 SVal reverse =
717 svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);
718 std::optional<DefinedOrUnknownSVal> reverseTest =
719 reverse.getAs<DefinedOrUnknownSVal>();
720 if (!reverseTest)
721 return state;
722
723 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
724 if (stateTrue) {
725 if (stateFalse) {
726 // If we don't know which one comes first, we can't perform this test.
727 return state;
728 } else {
729 // Switch the values so that firstVal is before secondVal.
730 std::swap(firstLoc, secondLoc);
731
732 // Switch the Exprs as well, so that they still correspond.
733 std::swap(First, Second);
734 }
735 }
736
737 // Get the length, and make sure it too is known.
738 SVal LengthVal = state->getSVal(Size.Expression, LCtx);
739 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
740 if (!Length)
741 return state;
742
743 // Convert the first buffer's start address to char*.
744 // Bail out if the cast fails.
745 ASTContext &Ctx = svalBuilder.getContext();
746 QualType CharPtrTy = getCharPtrType(Ctx, CK);
747 SVal FirstStart =
748 svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());
749 std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
750 if (!FirstStartLoc)
751 return state;
752
753 // Compute the end of the first buffer. Bail out if THAT fails.
754 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,
755 *Length, CharPtrTy);
756 std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
757 if (!FirstEndLoc)
758 return state;
759
760 // Is the end of the first buffer past the start of the second buffer?
761 SVal Overlap =
762 svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);
763 std::optional<DefinedOrUnknownSVal> OverlapTest =
764 Overlap.getAs<DefinedOrUnknownSVal>();
765 if (!OverlapTest)
766 return state;
767
768 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
769
770 if (stateTrue && !stateFalse) {
771 // Overlap!
772 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
773 return nullptr;
774 }
775
776 // assume the two expressions don't overlap.
777 assert(stateFalse);
778 return stateFalse;
779 }
780
emitOverlapBug(CheckerContext & C,ProgramStateRef state,const Stmt * First,const Stmt * Second) const781 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
782 const Stmt *First, const Stmt *Second) const {
783 ExplodedNode *N = C.generateErrorNode(state);
784 if (!N)
785 return;
786
787 if (!BT_Overlap)
788 BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
789 categories::UnixAPI, "Improper arguments"));
790
791 // Generate a report for this bug.
792 auto report = std::make_unique<PathSensitiveBugReport>(
793 *BT_Overlap, "Arguments must not be overlapping buffers", N);
794 report->addRange(First->getSourceRange());
795 report->addRange(Second->getSourceRange());
796
797 C.emitReport(std::move(report));
798 }
799
emitNullArgBug(CheckerContext & C,ProgramStateRef State,const Stmt * S,StringRef WarningMsg) const800 void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
801 const Stmt *S, StringRef WarningMsg) const {
802 if (ExplodedNode *N = C.generateErrorNode(State)) {
803 if (!BT_Null) {
804 // FIXME: This call uses the string constant 'categories::UnixAPI' as the
805 // description of the bug; it should be replaced by a real description.
806 BT_Null.reset(
807 new BugType(Filter.CheckNameCStringNullArg, categories::UnixAPI));
808 }
809
810 auto Report =
811 std::make_unique<PathSensitiveBugReport>(*BT_Null, WarningMsg, N);
812 Report->addRange(S->getSourceRange());
813 if (const auto *Ex = dyn_cast<Expr>(S))
814 bugreporter::trackExpressionValue(N, Ex, *Report);
815 C.emitReport(std::move(Report));
816 }
817 }
818
emitUninitializedReadBug(CheckerContext & C,ProgramStateRef State,const Expr * E,StringRef Msg) const819 void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
820 ProgramStateRef State,
821 const Expr *E,
822 StringRef Msg) const {
823 if (ExplodedNode *N = C.generateErrorNode(State)) {
824 if (!BT_UninitRead)
825 BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead,
826 "Accessing unitialized/garbage values"));
827
828 auto Report =
829 std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N);
830 Report->addNote("Other elements might also be undefined",
831 Report->getLocation());
832 Report->addRange(E->getSourceRange());
833 bugreporter::trackExpressionValue(N, E, *Report);
834 C.emitReport(std::move(Report));
835 }
836 }
837
emitOutOfBoundsBug(CheckerContext & C,ProgramStateRef State,const Stmt * S,StringRef WarningMsg) const838 void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
839 ProgramStateRef State, const Stmt *S,
840 StringRef WarningMsg) const {
841 if (ExplodedNode *N = C.generateErrorNode(State)) {
842 if (!BT_Bounds)
843 BT_Bounds.reset(new BugType(Filter.CheckCStringOutOfBounds
844 ? Filter.CheckNameCStringOutOfBounds
845 : Filter.CheckNameCStringNullArg,
846 "Out-of-bound array access"));
847
848 // FIXME: It would be nice to eventually make this diagnostic more clear,
849 // e.g., by referencing the original declaration or by saying *why* this
850 // reference is outside the range.
851 auto Report =
852 std::make_unique<PathSensitiveBugReport>(*BT_Bounds, WarningMsg, N);
853 Report->addRange(S->getSourceRange());
854 C.emitReport(std::move(Report));
855 }
856 }
857
emitNotCStringBug(CheckerContext & C,ProgramStateRef State,const Stmt * S,StringRef WarningMsg) const858 void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
859 const Stmt *S,
860 StringRef WarningMsg) const {
861 if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
862 if (!BT_NotCString) {
863 // FIXME: This call uses the string constant 'categories::UnixAPI' as the
864 // description of the bug; it should be replaced by a real description.
865 BT_NotCString.reset(
866 new BugType(Filter.CheckNameCStringNotNullTerm, categories::UnixAPI));
867 }
868
869 auto Report =
870 std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N);
871
872 Report->addRange(S->getSourceRange());
873 C.emitReport(std::move(Report));
874 }
875 }
876
emitAdditionOverflowBug(CheckerContext & C,ProgramStateRef State) const877 void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
878 ProgramStateRef State) const {
879 if (ExplodedNode *N = C.generateErrorNode(State)) {
880 if (!BT_AdditionOverflow) {
881 // FIXME: This call uses the word "API" as the description of the bug;
882 // it should be replaced by a better error message (if this unlikely
883 // situation continues to exist as a separate bug type).
884 BT_AdditionOverflow.reset(
885 new BugType(Filter.CheckNameCStringOutOfBounds, "API"));
886 }
887
888 // This isn't a great error message, but this should never occur in real
889 // code anyway -- you'd have to create a buffer longer than a size_t can
890 // represent, which is sort of a contradiction.
891 const char *WarningMsg =
892 "This expression will create a string whose length is too big to "
893 "be represented as a size_t";
894
895 auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow,
896 WarningMsg, N);
897 C.emitReport(std::move(Report));
898 }
899 }
900
checkAdditionOverflow(CheckerContext & C,ProgramStateRef state,NonLoc left,NonLoc right) const901 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
902 ProgramStateRef state,
903 NonLoc left,
904 NonLoc right) const {
905 // If out-of-bounds checking is turned off, skip the rest.
906 if (!Filter.CheckCStringOutOfBounds)
907 return state;
908
909 // If a previous check has failed, propagate the failure.
910 if (!state)
911 return nullptr;
912
913 SValBuilder &svalBuilder = C.getSValBuilder();
914 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
915
916 QualType sizeTy = svalBuilder.getContext().getSizeType();
917 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
918 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
919
920 SVal maxMinusRight;
921 if (isa<nonloc::ConcreteInt>(right)) {
922 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
923 sizeTy);
924 } else {
925 // Try switching the operands. (The order of these two assignments is
926 // important!)
927 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
928 sizeTy);
929 left = right;
930 }
931
932 if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
933 QualType cmpTy = svalBuilder.getConditionType();
934 // If left > max - right, we have an overflow.
935 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
936 *maxMinusRightNL, cmpTy);
937
938 ProgramStateRef stateOverflow, stateOkay;
939 std::tie(stateOverflow, stateOkay) =
940 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
941
942 if (stateOverflow && !stateOkay) {
943 // We have an overflow. Emit a bug report.
944 emitAdditionOverflowBug(C, stateOverflow);
945 return nullptr;
946 }
947
948 // From now on, assume an overflow didn't occur.
949 assert(stateOkay);
950 state = stateOkay;
951 }
952
953 return state;
954 }
955
setCStringLength(ProgramStateRef state,const MemRegion * MR,SVal strLength)956 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
957 const MemRegion *MR,
958 SVal strLength) {
959 assert(!strLength.isUndef() && "Attempt to set an undefined string length");
960
961 MR = MR->StripCasts();
962
963 switch (MR->getKind()) {
964 case MemRegion::StringRegionKind:
965 // FIXME: This can happen if we strcpy() into a string region. This is
966 // undefined [C99 6.4.5p6], but we should still warn about it.
967 return state;
968
969 case MemRegion::SymbolicRegionKind:
970 case MemRegion::AllocaRegionKind:
971 case MemRegion::NonParamVarRegionKind:
972 case MemRegion::ParamVarRegionKind:
973 case MemRegion::FieldRegionKind:
974 case MemRegion::ObjCIvarRegionKind:
975 // These are the types we can currently track string lengths for.
976 break;
977
978 case MemRegion::ElementRegionKind:
979 // FIXME: Handle element regions by upper-bounding the parent region's
980 // string length.
981 return state;
982
983 default:
984 // Other regions (mostly non-data) can't have a reliable C string length.
985 // For now, just ignore the change.
986 // FIXME: These are rare but not impossible. We should output some kind of
987 // warning for things like strcpy((char[]){'a', 0}, "b");
988 return state;
989 }
990
991 if (strLength.isUnknown())
992 return state->remove<CStringLength>(MR);
993
994 return state->set<CStringLength>(MR, strLength);
995 }
996
getCStringLengthForRegion(CheckerContext & C,ProgramStateRef & state,const Expr * Ex,const MemRegion * MR,bool hypothetical)997 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
998 ProgramStateRef &state,
999 const Expr *Ex,
1000 const MemRegion *MR,
1001 bool hypothetical) {
1002 if (!hypothetical) {
1003 // If there's a recorded length, go ahead and return it.
1004 const SVal *Recorded = state->get<CStringLength>(MR);
1005 if (Recorded)
1006 return *Recorded;
1007 }
1008
1009 // Otherwise, get a new symbol and update the state.
1010 SValBuilder &svalBuilder = C.getSValBuilder();
1011 QualType sizeTy = svalBuilder.getContext().getSizeType();
1012 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
1013 MR, Ex, sizeTy,
1014 C.getLocationContext(),
1015 C.blockCount());
1016
1017 if (!hypothetical) {
1018 if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
1019 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
1020 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
1021 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
1022 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
1023 const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
1024 fourInt);
1025 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
1026 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, maxLength,
1027 svalBuilder.getConditionType());
1028 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
1029 }
1030 state = state->set<CStringLength>(MR, strLength);
1031 }
1032
1033 return strLength;
1034 }
1035
getCStringLength(CheckerContext & C,ProgramStateRef & state,const Expr * Ex,SVal Buf,bool hypothetical) const1036 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
1037 const Expr *Ex, SVal Buf,
1038 bool hypothetical) const {
1039 const MemRegion *MR = Buf.getAsRegion();
1040 if (!MR) {
1041 // If we can't get a region, see if it's something we /know/ isn't a
1042 // C string. In the context of locations, the only time we can issue such
1043 // a warning is for labels.
1044 if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
1045 if (Filter.CheckCStringNotNullTerm) {
1046 SmallString<120> buf;
1047 llvm::raw_svector_ostream os(buf);
1048 assert(CurrentFunctionDescription);
1049 os << "Argument to " << CurrentFunctionDescription
1050 << " is the address of the label '" << Label->getLabel()->getName()
1051 << "', which is not a null-terminated string";
1052
1053 emitNotCStringBug(C, state, Ex, os.str());
1054 }
1055 return UndefinedVal();
1056 }
1057
1058 // If it's not a region and not a label, give up.
1059 return UnknownVal();
1060 }
1061
1062 // If we have a region, strip casts from it and see if we can figure out
1063 // its length. For anything we can't figure out, just return UnknownVal.
1064 MR = MR->StripCasts();
1065
1066 switch (MR->getKind()) {
1067 case MemRegion::StringRegionKind: {
1068 // Modifying the contents of string regions is undefined [C99 6.4.5p6],
1069 // so we can assume that the byte length is the correct C string length.
1070 SValBuilder &svalBuilder = C.getSValBuilder();
1071 QualType sizeTy = svalBuilder.getContext().getSizeType();
1072 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
1073 return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);
1074 }
1075 case MemRegion::NonParamVarRegionKind: {
1076 // If we have a global constant with a string literal initializer,
1077 // compute the initializer's length.
1078 const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
1079 if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) {
1080 if (const Expr *Init = Decl->getInit()) {
1081 if (auto *StrLit = dyn_cast<StringLiteral>(Init)) {
1082 SValBuilder &SvalBuilder = C.getSValBuilder();
1083 QualType SizeTy = SvalBuilder.getContext().getSizeType();
1084 return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy);
1085 }
1086 }
1087 }
1088 [[fallthrough]];
1089 }
1090 case MemRegion::SymbolicRegionKind:
1091 case MemRegion::AllocaRegionKind:
1092 case MemRegion::ParamVarRegionKind:
1093 case MemRegion::FieldRegionKind:
1094 case MemRegion::ObjCIvarRegionKind:
1095 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
1096 case MemRegion::CompoundLiteralRegionKind:
1097 // FIXME: Can we track this? Is it necessary?
1098 return UnknownVal();
1099 case MemRegion::ElementRegionKind:
1100 // FIXME: How can we handle this? It's not good enough to subtract the
1101 // offset from the base string length; consider "123\x00567" and &a[5].
1102 return UnknownVal();
1103 default:
1104 // Other regions (mostly non-data) can't have a reliable C string length.
1105 // In this case, an error is emitted and UndefinedVal is returned.
1106 // The caller should always be prepared to handle this case.
1107 if (Filter.CheckCStringNotNullTerm) {
1108 SmallString<120> buf;
1109 llvm::raw_svector_ostream os(buf);
1110
1111 assert(CurrentFunctionDescription);
1112 os << "Argument to " << CurrentFunctionDescription << " is ";
1113
1114 if (SummarizeRegion(os, C.getASTContext(), MR))
1115 os << ", which is not a null-terminated string";
1116 else
1117 os << "not a null-terminated string";
1118
1119 emitNotCStringBug(C, state, Ex, os.str());
1120 }
1121 return UndefinedVal();
1122 }
1123 }
1124
getCStringLiteral(CheckerContext & C,ProgramStateRef & state,const Expr * expr,SVal val) const1125 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
1126 ProgramStateRef &state, const Expr *expr, SVal val) const {
1127
1128 // Get the memory region pointed to by the val.
1129 const MemRegion *bufRegion = val.getAsRegion();
1130 if (!bufRegion)
1131 return nullptr;
1132
1133 // Strip casts off the memory region.
1134 bufRegion = bufRegion->StripCasts();
1135
1136 // Cast the memory region to a string region.
1137 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
1138 if (!strRegion)
1139 return nullptr;
1140
1141 // Return the actual string in the string region.
1142 return strRegion->getStringLiteral();
1143 }
1144
isFirstBufInBound(CheckerContext & C,ProgramStateRef State,SVal BufVal,QualType BufTy,SVal LengthVal,QualType LengthTy)1145 bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
1146 SVal BufVal, QualType BufTy,
1147 SVal LengthVal, QualType LengthTy) {
1148 // If we do not know that the buffer is long enough we return 'true'.
1149 // Otherwise the parent region of this field region would also get
1150 // invalidated, which would lead to warnings based on an unknown state.
1151
1152 if (LengthVal.isUnknown())
1153 return false;
1154
1155 // Originally copied from CheckBufferAccess and CheckLocation.
1156 SValBuilder &SB = C.getSValBuilder();
1157 ASTContext &Ctx = C.getASTContext();
1158
1159 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
1160
1161 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
1162 if (!Length)
1163 return true; // cf top comment.
1164
1165 // Compute the offset of the last element to be accessed: size-1.
1166 NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
1167 SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
1168 if (Offset.isUnknown())
1169 return true; // cf top comment
1170 NonLoc LastOffset = Offset.castAs<NonLoc>();
1171
1172 // Check that the first buffer is sufficiently long.
1173 SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
1174 std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
1175 if (!BufLoc)
1176 return true; // cf top comment.
1177
1178 SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
1179
1180 // Check for out of bound array element access.
1181 const MemRegion *R = BufEnd.getAsRegion();
1182 if (!R)
1183 return true; // cf top comment.
1184
1185 const ElementRegion *ER = dyn_cast<ElementRegion>(R);
1186 if (!ER)
1187 return true; // cf top comment.
1188
1189 // FIXME: Does this crash when a non-standard definition
1190 // of a library function is encountered?
1191 assert(ER->getValueType() == C.getASTContext().CharTy &&
1192 "isFirstBufInBound should only be called with char* ElementRegions");
1193
1194 // Get the size of the array.
1195 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1196 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
1197
1198 // Get the index of the accessed element.
1199 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1200
1201 ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
1202
1203 return static_cast<bool>(StInBound);
1204 }
1205
invalidateDestinationBufferBySize(CheckerContext & C,ProgramStateRef S,const Expr * BufE,SVal BufV,SVal SizeV,QualType SizeTy)1206 ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1207 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV,
1208 SVal SizeV, QualType SizeTy) {
1209 auto InvalidationTraitOperations =
1210 [&C, S, BufTy = BufE->getType(), BufV, SizeV,
1211 SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1212 // If destination buffer is a field region and access is in bound, do
1213 // not invalidate its super region.
1214 if (MemRegion::FieldRegionKind == R->getKind() &&
1215 isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
1216 ITraits.setTrait(
1217 R,
1218 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1219 }
1220 return false;
1221 };
1222
1223 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1224 }
1225
1226 ProgramStateRef
invalidateDestinationBufferAlwaysEscapeSuperRegion(CheckerContext & C,ProgramStateRef S,const Expr * BufE,SVal BufV)1227 CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1228 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1229 auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1230 const MemRegion *R) {
1231 return isa<FieldRegion>(R);
1232 };
1233
1234 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1235 }
1236
invalidateDestinationBufferNeverOverflows(CheckerContext & C,ProgramStateRef S,const Expr * BufE,SVal BufV)1237 ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1238 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1239 auto InvalidationTraitOperations =
1240 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1241 if (MemRegion::FieldRegionKind == R->getKind())
1242 ITraits.setTrait(
1243 R,
1244 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1245 return false;
1246 };
1247
1248 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1249 }
1250
invalidateSourceBuffer(CheckerContext & C,ProgramStateRef S,const Expr * BufE,SVal BufV)1251 ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1252 ProgramStateRef S,
1253 const Expr *BufE,
1254 SVal BufV) {
1255 auto InvalidationTraitOperations =
1256 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1257 ITraits.setTrait(
1258 R->getBaseRegion(),
1259 RegionAndSymbolInvalidationTraits::TK_PreserveContents);
1260 ITraits.setTrait(R,
1261 RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
1262 return true;
1263 };
1264
1265 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1266 }
1267
invalidateBufferAux(CheckerContext & C,ProgramStateRef State,const Expr * E,SVal V,llvm::function_ref<bool (RegionAndSymbolInvalidationTraits &,const MemRegion *)> InvalidationTraitOperations)1268 ProgramStateRef CStringChecker::invalidateBufferAux(
1269 CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V,
1270 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1271 const MemRegion *)>
1272 InvalidationTraitOperations) {
1273 std::optional<Loc> L = V.getAs<Loc>();
1274 if (!L)
1275 return State;
1276
1277 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1278 // some assumptions about the value that CFRefCount can't. Even so, it should
1279 // probably be refactored.
1280 if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1281 const MemRegion *R = MR->getRegion()->StripCasts();
1282
1283 // Are we dealing with an ElementRegion? If so, we should be invalidating
1284 // the super-region.
1285 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
1286 R = ER->getSuperRegion();
1287 // FIXME: What about layers of ElementRegions?
1288 }
1289
1290 // Invalidate this region.
1291 const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1292 RegionAndSymbolInvalidationTraits ITraits;
1293 bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
1294
1295 return State->invalidateRegions(R, E, C.blockCount(), LCtx,
1296 CausesPointerEscape, nullptr, nullptr,
1297 &ITraits);
1298 }
1299
1300 // If we have a non-region value by chance, just remove the binding.
1301 // FIXME: is this necessary or correct? This handles the non-Region
1302 // cases. Is it ever valid to store to these?
1303 return State->killBinding(*L);
1304 }
1305
SummarizeRegion(raw_ostream & os,ASTContext & Ctx,const MemRegion * MR)1306 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1307 const MemRegion *MR) {
1308 switch (MR->getKind()) {
1309 case MemRegion::FunctionCodeRegionKind: {
1310 if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())
1311 os << "the address of the function '" << *FD << '\'';
1312 else
1313 os << "the address of a function";
1314 return true;
1315 }
1316 case MemRegion::BlockCodeRegionKind:
1317 os << "block text";
1318 return true;
1319 case MemRegion::BlockDataRegionKind:
1320 os << "a block";
1321 return true;
1322 case MemRegion::CXXThisRegionKind:
1323 case MemRegion::CXXTempObjectRegionKind:
1324 os << "a C++ temp object of type "
1325 << cast<TypedValueRegion>(MR)->getValueType();
1326 return true;
1327 case MemRegion::NonParamVarRegionKind:
1328 os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();
1329 return true;
1330 case MemRegion::ParamVarRegionKind:
1331 os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();
1332 return true;
1333 case MemRegion::FieldRegionKind:
1334 os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();
1335 return true;
1336 case MemRegion::ObjCIvarRegionKind:
1337 os << "an instance variable of type "
1338 << cast<TypedValueRegion>(MR)->getValueType();
1339 return true;
1340 default:
1341 return false;
1342 }
1343 }
1344
memsetAux(const Expr * DstBuffer,SVal CharVal,const Expr * Size,CheckerContext & C,ProgramStateRef & State)1345 bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
1346 const Expr *Size, CheckerContext &C,
1347 ProgramStateRef &State) {
1348 SVal MemVal = C.getSVal(DstBuffer);
1349 SVal SizeVal = C.getSVal(Size);
1350 const MemRegion *MR = MemVal.getAsRegion();
1351 if (!MR)
1352 return false;
1353
1354 // We're about to model memset by producing a "default binding" in the Store.
1355 // Our current implementation - RegionStore - doesn't support default bindings
1356 // that don't cover the whole base region. So we should first get the offset
1357 // and the base region to figure out whether the offset of buffer is 0.
1358 RegionOffset Offset = MR->getAsOffset();
1359 const MemRegion *BR = Offset.getRegion();
1360
1361 std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1362 if (!SizeNL)
1363 return false;
1364
1365 SValBuilder &svalBuilder = C.getSValBuilder();
1366 ASTContext &Ctx = C.getASTContext();
1367
1368 // void *memset(void *dest, int ch, size_t count);
1369 // For now we can only handle the case of offset is 0 and concrete char value.
1370 if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1371 Offset.getOffset() == 0) {
1372 // Get the base region's size.
1373 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
1374
1375 ProgramStateRef StateWholeReg, StateNotWholeReg;
1376 std::tie(StateWholeReg, StateNotWholeReg) =
1377 State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
1378
1379 // With the semantic of 'memset()', we should convert the CharVal to
1380 // unsigned char.
1381 CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
1382
1383 ProgramStateRef StateNullChar, StateNonNullChar;
1384 std::tie(StateNullChar, StateNonNullChar) =
1385 assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
1386
1387 if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1388 !StateNonNullChar) {
1389 // If the 'memset()' acts on the whole region of destination buffer and
1390 // the value of the second argument of 'memset()' is zero, bind the second
1391 // argument's value to the destination buffer with 'default binding'.
1392 // FIXME: Since there is no perfect way to bind the non-zero character, we
1393 // can only deal with zero value here. In the future, we need to deal with
1394 // the binding of non-zero value in the case of whole region.
1395 State = State->bindDefaultZero(svalBuilder.makeLoc(BR),
1396 C.getLocationContext());
1397 } else {
1398 // If the destination buffer's extent is not equal to the value of
1399 // third argument, just invalidate buffer.
1400 State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1401 SizeVal, Size->getType());
1402 }
1403
1404 if (StateNullChar && !StateNonNullChar) {
1405 // If the value of the second argument of 'memset()' is zero, set the
1406 // string length of destination buffer to 0 directly.
1407 State = setCStringLength(State, MR,
1408 svalBuilder.makeZeroVal(Ctx.getSizeType()));
1409 } else if (!StateNullChar && StateNonNullChar) {
1410 SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1411 CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
1412 C.getLocationContext(), C.blockCount());
1413
1414 // If the value of second argument is not zero, then the string length
1415 // is at least the size argument.
1416 SVal NewStrLenGESize = svalBuilder.evalBinOp(
1417 State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
1418
1419 State = setCStringLength(
1420 State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
1421 MR, NewStrLen);
1422 }
1423 } else {
1424 // If the offset is not zero and char value is not concrete, we can do
1425 // nothing but invalidate the buffer.
1426 State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1427 SizeVal, Size->getType());
1428 }
1429 return true;
1430 }
1431
1432 //===----------------------------------------------------------------------===//
1433 // evaluation of individual function calls.
1434 //===----------------------------------------------------------------------===//
1435
evalCopyCommon(CheckerContext & C,const CallEvent & Call,ProgramStateRef state,SizeArgExpr Size,DestinationArgExpr Dest,SourceArgExpr Source,bool Restricted,bool IsMempcpy,CharKind CK) const1436 void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,
1437 ProgramStateRef state, SizeArgExpr Size,
1438 DestinationArgExpr Dest,
1439 SourceArgExpr Source, bool Restricted,
1440 bool IsMempcpy, CharKind CK) const {
1441 CurrentFunctionDescription = "memory copy function";
1442
1443 // See if the size argument is zero.
1444 const LocationContext *LCtx = C.getLocationContext();
1445 SVal sizeVal = state->getSVal(Size.Expression, LCtx);
1446 QualType sizeTy = Size.Expression->getType();
1447
1448 ProgramStateRef stateZeroSize, stateNonZeroSize;
1449 std::tie(stateZeroSize, stateNonZeroSize) =
1450 assumeZero(C, state, sizeVal, sizeTy);
1451
1452 // Get the value of the Dest.
1453 SVal destVal = state->getSVal(Dest.Expression, LCtx);
1454
1455 // If the size is zero, there won't be any actual memory access, so
1456 // just bind the return value to the destination buffer and return.
1457 if (stateZeroSize && !stateNonZeroSize) {
1458 stateZeroSize =
1459 stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1460 C.addTransition(stateZeroSize);
1461 return;
1462 }
1463
1464 // If the size can be nonzero, we have to check the other arguments.
1465 if (stateNonZeroSize) {
1466 // TODO: If Size is tainted and we cannot prove that it is smaller or equal
1467 // to the size of the destination buffer, then emit a warning
1468 // that an attacker may provoke a buffer overflow error.
1469 state = stateNonZeroSize;
1470
1471 // Ensure the destination is not null. If it is NULL there will be a
1472 // NULL pointer dereference.
1473 state = checkNonNull(C, state, Dest, destVal);
1474 if (!state)
1475 return;
1476
1477 // Get the value of the Src.
1478 SVal srcVal = state->getSVal(Source.Expression, LCtx);
1479
1480 // Ensure the source is not null. If it is NULL there will be a
1481 // NULL pointer dereference.
1482 state = checkNonNull(C, state, Source, srcVal);
1483 if (!state)
1484 return;
1485
1486 // Ensure the accesses are valid and that the buffers do not overlap.
1487 state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);
1488 state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);
1489
1490 if (Restricted)
1491 state = CheckOverlap(C, state, Size, Dest, Source, CK);
1492
1493 if (!state)
1494 return;
1495
1496 // If this is mempcpy, get the byte after the last byte copied and
1497 // bind the expr.
1498 if (IsMempcpy) {
1499 // Get the byte after the last byte copied.
1500 SValBuilder &SvalBuilder = C.getSValBuilder();
1501 ASTContext &Ctx = SvalBuilder.getContext();
1502 QualType CharPtrTy = getCharPtrType(Ctx, CK);
1503 SVal DestRegCharVal =
1504 SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());
1505 SVal lastElement = C.getSValBuilder().evalBinOp(
1506 state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());
1507 // If we don't know how much we copied, we can at least
1508 // conjure a return value for later.
1509 if (lastElement.isUnknown())
1510 lastElement = C.getSValBuilder().conjureSymbolVal(
1511 nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
1512
1513 // The byte after the last byte copied is the return value.
1514 state = state->BindExpr(Call.getOriginExpr(), LCtx, lastElement);
1515 } else {
1516 // All other copies return the destination buffer.
1517 // (Well, bcopy() has a void return type, but this won't hurt.)
1518 state = state->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1519 }
1520
1521 // Invalidate the destination (regular invalidation without pointer-escaping
1522 // the address of the top-level region).
1523 // FIXME: Even if we can't perfectly model the copy, we should see if we
1524 // can use LazyCompoundVals to copy the source values into the destination.
1525 // This would probably remove any existing bindings past the end of the
1526 // copied region, but that's still an improvement over blank invalidation.
1527 state = invalidateDestinationBufferBySize(
1528 C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal,
1529 Size.Expression->getType());
1530
1531 // Invalidate the source (const-invalidation without const-pointer-escaping
1532 // the address of the top-level region).
1533 state = invalidateSourceBuffer(C, state, Source.Expression,
1534 C.getSVal(Source.Expression));
1535
1536 C.addTransition(state);
1537 }
1538 }
1539
evalMemcpy(CheckerContext & C,const CallEvent & Call,CharKind CK) const1540 void CStringChecker::evalMemcpy(CheckerContext &C, const CallEvent &Call,
1541 CharKind CK) const {
1542 // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1543 // The return value is the address of the destination buffer.
1544 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1545 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1546 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1547
1548 ProgramStateRef State = C.getState();
1549
1550 constexpr bool IsRestricted = true;
1551 constexpr bool IsMempcpy = false;
1552 evalCopyCommon(C, Call, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);
1553 }
1554
evalMempcpy(CheckerContext & C,const CallEvent & Call,CharKind CK) const1555 void CStringChecker::evalMempcpy(CheckerContext &C, const CallEvent &Call,
1556 CharKind CK) const {
1557 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1558 // The return value is a pointer to the byte following the last written byte.
1559 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1560 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1561 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1562
1563 constexpr bool IsRestricted = true;
1564 constexpr bool IsMempcpy = true;
1565 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1566 IsMempcpy, CK);
1567 }
1568
evalMemmove(CheckerContext & C,const CallEvent & Call,CharKind CK) const1569 void CStringChecker::evalMemmove(CheckerContext &C, const CallEvent &Call,
1570 CharKind CK) const {
1571 // void *memmove(void *dst, const void *src, size_t n);
1572 // The return value is the address of the destination buffer.
1573 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1574 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1575 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1576
1577 constexpr bool IsRestricted = false;
1578 constexpr bool IsMempcpy = false;
1579 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1580 IsMempcpy, CK);
1581 }
1582
evalBcopy(CheckerContext & C,const CallEvent & Call) const1583 void CStringChecker::evalBcopy(CheckerContext &C, const CallEvent &Call) const {
1584 // void bcopy(const void *src, void *dst, size_t n);
1585 SourceArgExpr Src{{Call.getArgExpr(0), 0}};
1586 DestinationArgExpr Dest = {{Call.getArgExpr(1), 1}};
1587 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1588
1589 constexpr bool IsRestricted = false;
1590 constexpr bool IsMempcpy = false;
1591 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1592 IsMempcpy, CharKind::Regular);
1593 }
1594
evalMemcmp(CheckerContext & C,const CallEvent & Call,CharKind CK) const1595 void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call,
1596 CharKind CK) const {
1597 // int memcmp(const void *s1, const void *s2, size_t n);
1598 CurrentFunctionDescription = "memory comparison function";
1599
1600 AnyArgExpr Left = {Call.getArgExpr(0), 0};
1601 AnyArgExpr Right = {Call.getArgExpr(1), 1};
1602 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1603
1604 ProgramStateRef State = C.getState();
1605 SValBuilder &Builder = C.getSValBuilder();
1606 const LocationContext *LCtx = C.getLocationContext();
1607
1608 // See if the size argument is zero.
1609 SVal sizeVal = State->getSVal(Size.Expression, LCtx);
1610 QualType sizeTy = Size.Expression->getType();
1611
1612 ProgramStateRef stateZeroSize, stateNonZeroSize;
1613 std::tie(stateZeroSize, stateNonZeroSize) =
1614 assumeZero(C, State, sizeVal, sizeTy);
1615
1616 // If the size can be zero, the result will be 0 in that case, and we don't
1617 // have to check either of the buffers.
1618 if (stateZeroSize) {
1619 State = stateZeroSize;
1620 State = State->BindExpr(Call.getOriginExpr(), LCtx,
1621 Builder.makeZeroVal(Call.getResultType()));
1622 C.addTransition(State);
1623 }
1624
1625 // If the size can be nonzero, we have to check the other arguments.
1626 if (stateNonZeroSize) {
1627 State = stateNonZeroSize;
1628 // If we know the two buffers are the same, we know the result is 0.
1629 // First, get the two buffers' addresses. Another checker will have already
1630 // made sure they're not undefined.
1631 DefinedOrUnknownSVal LV =
1632 State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1633 DefinedOrUnknownSVal RV =
1634 State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1635
1636 // See if they are the same.
1637 ProgramStateRef SameBuffer, NotSameBuffer;
1638 std::tie(SameBuffer, NotSameBuffer) =
1639 State->assume(Builder.evalEQ(State, LV, RV));
1640
1641 // If the two arguments are the same buffer, we know the result is 0,
1642 // and we only need to check one size.
1643 if (SameBuffer && !NotSameBuffer) {
1644 State = SameBuffer;
1645 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);
1646 if (State) {
1647 State = SameBuffer->BindExpr(Call.getOriginExpr(), LCtx,
1648 Builder.makeZeroVal(Call.getResultType()));
1649 C.addTransition(State);
1650 }
1651 return;
1652 }
1653
1654 // If the two arguments might be different buffers, we have to check
1655 // the size of both of them.
1656 assert(NotSameBuffer);
1657 State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);
1658 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);
1659 if (State) {
1660 // The return value is the comparison result, which we don't know.
1661 SVal CmpV = Builder.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,
1662 C.blockCount());
1663 State = State->BindExpr(Call.getOriginExpr(), LCtx, CmpV);
1664 C.addTransition(State);
1665 }
1666 }
1667 }
1668
evalstrLength(CheckerContext & C,const CallEvent & Call) const1669 void CStringChecker::evalstrLength(CheckerContext &C,
1670 const CallEvent &Call) const {
1671 // size_t strlen(const char *s);
1672 evalstrLengthCommon(C, Call, /* IsStrnlen = */ false);
1673 }
1674
evalstrnLength(CheckerContext & C,const CallEvent & Call) const1675 void CStringChecker::evalstrnLength(CheckerContext &C,
1676 const CallEvent &Call) const {
1677 // size_t strnlen(const char *s, size_t maxlen);
1678 evalstrLengthCommon(C, Call, /* IsStrnlen = */ true);
1679 }
1680
evalstrLengthCommon(CheckerContext & C,const CallEvent & Call,bool IsStrnlen) const1681 void CStringChecker::evalstrLengthCommon(CheckerContext &C,
1682 const CallEvent &Call,
1683 bool IsStrnlen) const {
1684 CurrentFunctionDescription = "string length function";
1685 ProgramStateRef state = C.getState();
1686 const LocationContext *LCtx = C.getLocationContext();
1687
1688 if (IsStrnlen) {
1689 const Expr *maxlenExpr = Call.getArgExpr(1);
1690 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1691
1692 ProgramStateRef stateZeroSize, stateNonZeroSize;
1693 std::tie(stateZeroSize, stateNonZeroSize) =
1694 assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1695
1696 // If the size can be zero, the result will be 0 in that case, and we don't
1697 // have to check the string itself.
1698 if (stateZeroSize) {
1699 SVal zero = C.getSValBuilder().makeZeroVal(Call.getResultType());
1700 stateZeroSize = stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, zero);
1701 C.addTransition(stateZeroSize);
1702 }
1703
1704 // If the size is GUARANTEED to be zero, we're done!
1705 if (!stateNonZeroSize)
1706 return;
1707
1708 // Otherwise, record the assumption that the size is nonzero.
1709 state = stateNonZeroSize;
1710 }
1711
1712 // Check that the string argument is non-null.
1713 AnyArgExpr Arg = {Call.getArgExpr(0), 0};
1714 SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
1715 state = checkNonNull(C, state, Arg, ArgVal);
1716
1717 if (!state)
1718 return;
1719
1720 SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
1721
1722 // If the argument isn't a valid C string, there's no valid state to
1723 // transition to.
1724 if (strLength.isUndef())
1725 return;
1726
1727 DefinedOrUnknownSVal result = UnknownVal();
1728
1729 // If the check is for strnlen() then bind the return value to no more than
1730 // the maxlen value.
1731 if (IsStrnlen) {
1732 QualType cmpTy = C.getSValBuilder().getConditionType();
1733
1734 // It's a little unfortunate to be getting this again,
1735 // but it's not that expensive...
1736 const Expr *maxlenExpr = Call.getArgExpr(1);
1737 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1738
1739 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1740 std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1741
1742 if (strLengthNL && maxlenValNL) {
1743 ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1744
1745 // Check if the strLength is greater than the maxlen.
1746 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1747 C.getSValBuilder()
1748 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1749 .castAs<DefinedOrUnknownSVal>());
1750
1751 if (stateStringTooLong && !stateStringNotTooLong) {
1752 // If the string is longer than maxlen, return maxlen.
1753 result = *maxlenValNL;
1754 } else if (stateStringNotTooLong && !stateStringTooLong) {
1755 // If the string is shorter than maxlen, return its length.
1756 result = *strLengthNL;
1757 }
1758 }
1759
1760 if (result.isUnknown()) {
1761 // If we don't have enough information for a comparison, there's
1762 // no guarantee the full string length will actually be returned.
1763 // All we know is the return value is the min of the string length
1764 // and the limit. This is better than nothing.
1765 result = C.getSValBuilder().conjureSymbolVal(
1766 nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
1767 NonLoc resultNL = result.castAs<NonLoc>();
1768
1769 if (strLengthNL) {
1770 state = state->assume(C.getSValBuilder().evalBinOpNN(
1771 state, BO_LE, resultNL, *strLengthNL, cmpTy)
1772 .castAs<DefinedOrUnknownSVal>(), true);
1773 }
1774
1775 if (maxlenValNL) {
1776 state = state->assume(C.getSValBuilder().evalBinOpNN(
1777 state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1778 .castAs<DefinedOrUnknownSVal>(), true);
1779 }
1780 }
1781
1782 } else {
1783 // This is a plain strlen(), not strnlen().
1784 result = strLength.castAs<DefinedOrUnknownSVal>();
1785
1786 // If we don't know the length of the string, conjure a return
1787 // value, so it can be used in constraints, at least.
1788 if (result.isUnknown()) {
1789 result = C.getSValBuilder().conjureSymbolVal(
1790 nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
1791 }
1792 }
1793
1794 // Bind the return value.
1795 assert(!result.isUnknown() && "Should have conjured a value by now");
1796 state = state->BindExpr(Call.getOriginExpr(), LCtx, result);
1797 C.addTransition(state);
1798 }
1799
evalStrcpy(CheckerContext & C,const CallEvent & Call) const1800 void CStringChecker::evalStrcpy(CheckerContext &C,
1801 const CallEvent &Call) const {
1802 // char *strcpy(char *restrict dst, const char *restrict src);
1803 evalStrcpyCommon(C, Call,
1804 /* ReturnEnd = */ false,
1805 /* IsBounded = */ false,
1806 /* appendK = */ ConcatFnKind::none);
1807 }
1808
evalStrncpy(CheckerContext & C,const CallEvent & Call) const1809 void CStringChecker::evalStrncpy(CheckerContext &C,
1810 const CallEvent &Call) const {
1811 // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1812 evalStrcpyCommon(C, Call,
1813 /* ReturnEnd = */ false,
1814 /* IsBounded = */ true,
1815 /* appendK = */ ConcatFnKind::none);
1816 }
1817
evalStpcpy(CheckerContext & C,const CallEvent & Call) const1818 void CStringChecker::evalStpcpy(CheckerContext &C,
1819 const CallEvent &Call) const {
1820 // char *stpcpy(char *restrict dst, const char *restrict src);
1821 evalStrcpyCommon(C, Call,
1822 /* ReturnEnd = */ true,
1823 /* IsBounded = */ false,
1824 /* appendK = */ ConcatFnKind::none);
1825 }
1826
evalStrlcpy(CheckerContext & C,const CallEvent & Call) const1827 void CStringChecker::evalStrlcpy(CheckerContext &C,
1828 const CallEvent &Call) const {
1829 // size_t strlcpy(char *dest, const char *src, size_t size);
1830 evalStrcpyCommon(C, Call,
1831 /* ReturnEnd = */ true,
1832 /* IsBounded = */ true,
1833 /* appendK = */ ConcatFnKind::none,
1834 /* returnPtr = */ false);
1835 }
1836
evalStrcat(CheckerContext & C,const CallEvent & Call) const1837 void CStringChecker::evalStrcat(CheckerContext &C,
1838 const CallEvent &Call) const {
1839 // char *strcat(char *restrict s1, const char *restrict s2);
1840 evalStrcpyCommon(C, Call,
1841 /* ReturnEnd = */ false,
1842 /* IsBounded = */ false,
1843 /* appendK = */ ConcatFnKind::strcat);
1844 }
1845
evalStrncat(CheckerContext & C,const CallEvent & Call) const1846 void CStringChecker::evalStrncat(CheckerContext &C,
1847 const CallEvent &Call) const {
1848 // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1849 evalStrcpyCommon(C, Call,
1850 /* ReturnEnd = */ false,
1851 /* IsBounded = */ true,
1852 /* appendK = */ ConcatFnKind::strcat);
1853 }
1854
evalStrlcat(CheckerContext & C,const CallEvent & Call) const1855 void CStringChecker::evalStrlcat(CheckerContext &C,
1856 const CallEvent &Call) const {
1857 // size_t strlcat(char *dst, const char *src, size_t size);
1858 // It will append at most size - strlen(dst) - 1 bytes,
1859 // NULL-terminating the result.
1860 evalStrcpyCommon(C, Call,
1861 /* ReturnEnd = */ false,
1862 /* IsBounded = */ true,
1863 /* appendK = */ ConcatFnKind::strlcat,
1864 /* returnPtr = */ false);
1865 }
1866
evalStrcpyCommon(CheckerContext & C,const CallEvent & Call,bool ReturnEnd,bool IsBounded,ConcatFnKind appendK,bool returnPtr) const1867 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
1868 bool ReturnEnd, bool IsBounded,
1869 ConcatFnKind appendK,
1870 bool returnPtr) const {
1871 if (appendK == ConcatFnKind::none)
1872 CurrentFunctionDescription = "string copy function";
1873 else
1874 CurrentFunctionDescription = "string concatenation function";
1875
1876 ProgramStateRef state = C.getState();
1877 const LocationContext *LCtx = C.getLocationContext();
1878
1879 // Check that the destination is non-null.
1880 DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};
1881 SVal DstVal = state->getSVal(Dst.Expression, LCtx);
1882 state = checkNonNull(C, state, Dst, DstVal);
1883 if (!state)
1884 return;
1885
1886 // Check that the source is non-null.
1887 SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};
1888 SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
1889 state = checkNonNull(C, state, srcExpr, srcVal);
1890 if (!state)
1891 return;
1892
1893 // Get the string length of the source.
1894 SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
1895 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1896
1897 // Get the string length of the destination buffer.
1898 SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
1899 std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1900
1901 // If the source isn't a valid C string, give up.
1902 if (strLength.isUndef())
1903 return;
1904
1905 SValBuilder &svalBuilder = C.getSValBuilder();
1906 QualType cmpTy = svalBuilder.getConditionType();
1907 QualType sizeTy = svalBuilder.getContext().getSizeType();
1908
1909 // These two values allow checking two kinds of errors:
1910 // - actual overflows caused by a source that doesn't fit in the destination
1911 // - potential overflows caused by a bound that could exceed the destination
1912 SVal amountCopied = UnknownVal();
1913 SVal maxLastElementIndex = UnknownVal();
1914 const char *boundWarning = nullptr;
1915
1916 // FIXME: Why do we choose the srcExpr if the access has no size?
1917 // Note that the 3rd argument of the call would be the size parameter.
1918 SizeArgExpr SrcExprAsSizeDummy = {
1919 {srcExpr.Expression, srcExpr.ArgumentIndex}};
1920 state = CheckOverlap(
1921 C, state,
1922 (IsBounded ? SizeArgExpr{{Call.getArgExpr(2), 2}} : SrcExprAsSizeDummy),
1923 Dst, srcExpr);
1924
1925 if (!state)
1926 return;
1927
1928 // If the function is strncpy, strncat, etc... it is bounded.
1929 if (IsBounded) {
1930 // Get the max number of characters to copy.
1931 SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}};
1932 SVal lenVal = state->getSVal(lenExpr.Expression, LCtx);
1933
1934 // Protect against misdeclared strncpy().
1935 lenVal =
1936 svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());
1937
1938 std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1939
1940 // If we know both values, we might be able to figure out how much
1941 // we're copying.
1942 if (strLengthNL && lenValNL) {
1943 switch (appendK) {
1944 case ConcatFnKind::none:
1945 case ConcatFnKind::strcat: {
1946 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1947 // Check if the max number to copy is less than the length of the src.
1948 // If the bound is equal to the source length, strncpy won't null-
1949 // terminate the result!
1950 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1951 svalBuilder
1952 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1953 .castAs<DefinedOrUnknownSVal>());
1954
1955 if (stateSourceTooLong && !stateSourceNotTooLong) {
1956 // Max number to copy is less than the length of the src, so the
1957 // actual strLength copied is the max number arg.
1958 state = stateSourceTooLong;
1959 amountCopied = lenVal;
1960
1961 } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1962 // The source buffer entirely fits in the bound.
1963 state = stateSourceNotTooLong;
1964 amountCopied = strLength;
1965 }
1966 break;
1967 }
1968 case ConcatFnKind::strlcat:
1969 if (!dstStrLengthNL)
1970 return;
1971
1972 // amountCopied = min (size - dstLen - 1 , srcLen)
1973 SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1974 *dstStrLengthNL, sizeTy);
1975 if (!isa<NonLoc>(freeSpace))
1976 return;
1977 freeSpace =
1978 svalBuilder.evalBinOp(state, BO_Sub, freeSpace,
1979 svalBuilder.makeIntVal(1, sizeTy), sizeTy);
1980 std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1981
1982 // While unlikely, it is possible that the subtraction is
1983 // too complex to compute, let's check whether it succeeded.
1984 if (!freeSpaceNL)
1985 return;
1986 SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1987 state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);
1988
1989 ProgramStateRef TrueState, FalseState;
1990 std::tie(TrueState, FalseState) =
1991 state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
1992
1993 // srcStrLength <= size - dstStrLength -1
1994 if (TrueState && !FalseState) {
1995 amountCopied = strLength;
1996 }
1997
1998 // srcStrLength > size - dstStrLength -1
1999 if (!TrueState && FalseState) {
2000 amountCopied = freeSpace;
2001 }
2002
2003 if (TrueState && FalseState)
2004 amountCopied = UnknownVal();
2005 break;
2006 }
2007 }
2008 // We still want to know if the bound is known to be too large.
2009 if (lenValNL) {
2010 switch (appendK) {
2011 case ConcatFnKind::strcat:
2012 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
2013
2014 // Get the string length of the destination. If the destination is
2015 // memory that can't have a string length, we shouldn't be copying
2016 // into it anyway.
2017 if (dstStrLength.isUndef())
2018 return;
2019
2020 if (dstStrLengthNL) {
2021 maxLastElementIndex = svalBuilder.evalBinOpNN(
2022 state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);
2023
2024 boundWarning = "Size argument is greater than the free space in the "
2025 "destination buffer";
2026 }
2027 break;
2028 case ConcatFnKind::none:
2029 case ConcatFnKind::strlcat:
2030 // For strncpy and strlcat, this is just checking
2031 // that lenVal <= sizeof(dst).
2032 // (Yes, strncpy and strncat differ in how they treat termination.
2033 // strncat ALWAYS terminates, but strncpy doesn't.)
2034
2035 // We need a special case for when the copy size is zero, in which
2036 // case strncpy will do no work at all. Our bounds check uses n-1
2037 // as the last element accessed, so n == 0 is problematic.
2038 ProgramStateRef StateZeroSize, StateNonZeroSize;
2039 std::tie(StateZeroSize, StateNonZeroSize) =
2040 assumeZero(C, state, *lenValNL, sizeTy);
2041
2042 // If the size is known to be zero, we're done.
2043 if (StateZeroSize && !StateNonZeroSize) {
2044 if (returnPtr) {
2045 StateZeroSize =
2046 StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, DstVal);
2047 } else {
2048 if (appendK == ConcatFnKind::none) {
2049 // strlcpy returns strlen(src)
2050 StateZeroSize = StateZeroSize->BindExpr(Call.getOriginExpr(),
2051 LCtx, strLength);
2052 } else {
2053 // strlcat returns strlen(src) + strlen(dst)
2054 SVal retSize = svalBuilder.evalBinOp(
2055 state, BO_Add, strLength, dstStrLength, sizeTy);
2056 StateZeroSize =
2057 StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, retSize);
2058 }
2059 }
2060 C.addTransition(StateZeroSize);
2061 return;
2062 }
2063
2064 // Otherwise, go ahead and figure out the last element we'll touch.
2065 // We don't record the non-zero assumption here because we can't
2066 // be sure. We won't warn on a possible zero.
2067 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
2068 maxLastElementIndex =
2069 svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);
2070 boundWarning = "Size argument is greater than the length of the "
2071 "destination buffer";
2072 break;
2073 }
2074 }
2075 } else {
2076 // The function isn't bounded. The amount copied should match the length
2077 // of the source buffer.
2078 amountCopied = strLength;
2079 }
2080
2081 assert(state);
2082
2083 // This represents the number of characters copied into the destination
2084 // buffer. (It may not actually be the strlen if the destination buffer
2085 // is not terminated.)
2086 SVal finalStrLength = UnknownVal();
2087 SVal strlRetVal = UnknownVal();
2088
2089 if (appendK == ConcatFnKind::none && !returnPtr) {
2090 // strlcpy returns the sizeof(src)
2091 strlRetVal = strLength;
2092 }
2093
2094 // If this is an appending function (strcat, strncat...) then set the
2095 // string length to strlen(src) + strlen(dst) since the buffer will
2096 // ultimately contain both.
2097 if (appendK != ConcatFnKind::none) {
2098 // Get the string length of the destination. If the destination is memory
2099 // that can't have a string length, we shouldn't be copying into it anyway.
2100 if (dstStrLength.isUndef())
2101 return;
2102
2103 if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
2104 strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,
2105 *dstStrLengthNL, sizeTy);
2106 }
2107
2108 std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
2109
2110 // If we know both string lengths, we might know the final string length.
2111 if (amountCopiedNL && dstStrLengthNL) {
2112 // Make sure the two lengths together don't overflow a size_t.
2113 state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);
2114 if (!state)
2115 return;
2116
2117 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,
2118 *dstStrLengthNL, sizeTy);
2119 }
2120
2121 // If we couldn't get a single value for the final string length,
2122 // we can at least bound it by the individual lengths.
2123 if (finalStrLength.isUnknown()) {
2124 // Try to get a "hypothetical" string length symbol, which we can later
2125 // set as a real value if that turns out to be the case.
2126 finalStrLength =
2127 getCStringLength(C, state, Call.getOriginExpr(), DstVal, true);
2128 assert(!finalStrLength.isUndef());
2129
2130 if (std::optional<NonLoc> finalStrLengthNL =
2131 finalStrLength.getAs<NonLoc>()) {
2132 if (amountCopiedNL && appendK == ConcatFnKind::none) {
2133 // we overwrite dst string with the src
2134 // finalStrLength >= srcStrLength
2135 SVal sourceInResult = svalBuilder.evalBinOpNN(
2136 state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);
2137 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
2138 true);
2139 if (!state)
2140 return;
2141 }
2142
2143 if (dstStrLengthNL && appendK != ConcatFnKind::none) {
2144 // we extend the dst string with the src
2145 // finalStrLength >= dstStrLength
2146 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
2147 *finalStrLengthNL,
2148 *dstStrLengthNL,
2149 cmpTy);
2150 state =
2151 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
2152 if (!state)
2153 return;
2154 }
2155 }
2156 }
2157
2158 } else {
2159 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
2160 // the final string length will match the input string length.
2161 finalStrLength = amountCopied;
2162 }
2163
2164 SVal Result;
2165
2166 if (returnPtr) {
2167 // The final result of the function will either be a pointer past the last
2168 // copied element, or a pointer to the start of the destination buffer.
2169 Result = (ReturnEnd ? UnknownVal() : DstVal);
2170 } else {
2171 if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
2172 //strlcpy, strlcat
2173 Result = strlRetVal;
2174 else
2175 Result = finalStrLength;
2176 }
2177
2178 assert(state);
2179
2180 // If the destination is a MemRegion, try to check for a buffer overflow and
2181 // record the new string length.
2182 if (std::optional<loc::MemRegionVal> dstRegVal =
2183 DstVal.getAs<loc::MemRegionVal>()) {
2184 QualType ptrTy = Dst.Expression->getType();
2185
2186 // If we have an exact value on a bounded copy, use that to check for
2187 // overflows, rather than our estimate about how much is actually copied.
2188 if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
2189 SVal maxLastElement =
2190 svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);
2191
2192 // Check if the first byte of the destination is writable.
2193 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2194 if (!state)
2195 return;
2196 // Check if the last byte of the destination is writable.
2197 state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);
2198 if (!state)
2199 return;
2200 }
2201
2202 // Then, if the final length is known...
2203 if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
2204 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
2205 *knownStrLength, ptrTy);
2206
2207 // ...and we haven't checked the bound, we'll check the actual copy.
2208 if (!boundWarning) {
2209 // Check if the first byte of the destination is writable.
2210 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2211 if (!state)
2212 return;
2213 // Check if the last byte of the destination is writable.
2214 state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);
2215 if (!state)
2216 return;
2217 }
2218
2219 // If this is a stpcpy-style copy, the last element is the return value.
2220 if (returnPtr && ReturnEnd)
2221 Result = lastElement;
2222 }
2223
2224 // Invalidate the destination (regular invalidation without pointer-escaping
2225 // the address of the top-level region). This must happen before we set the
2226 // C string length because invalidation will clear the length.
2227 // FIXME: Even if we can't perfectly model the copy, we should see if we
2228 // can use LazyCompoundVals to copy the source values into the destination.
2229 // This would probably remove any existing bindings past the end of the
2230 // string, but that's still an improvement over blank invalidation.
2231 state = invalidateDestinationBufferBySize(C, state, Dst.Expression,
2232 *dstRegVal, amountCopied,
2233 C.getASTContext().getSizeType());
2234
2235 // Invalidate the source (const-invalidation without const-pointer-escaping
2236 // the address of the top-level region).
2237 state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal);
2238
2239 // Set the C string length of the destination, if we know it.
2240 if (IsBounded && (appendK == ConcatFnKind::none)) {
2241 // strncpy is annoying in that it doesn't guarantee to null-terminate
2242 // the result string. If the original string didn't fit entirely inside
2243 // the bound (including the null-terminator), we don't know how long the
2244 // result is.
2245 if (amountCopied != strLength)
2246 finalStrLength = UnknownVal();
2247 }
2248 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
2249 }
2250
2251 assert(state);
2252
2253 if (returnPtr) {
2254 // If this is a stpcpy-style copy, but we were unable to check for a buffer
2255 // overflow, we still need a result. Conjure a return value.
2256 if (ReturnEnd && Result.isUnknown()) {
2257 Result = svalBuilder.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,
2258 C.blockCount());
2259 }
2260 }
2261 // Set the return value.
2262 state = state->BindExpr(Call.getOriginExpr(), LCtx, Result);
2263 C.addTransition(state);
2264 }
2265
evalStrcmp(CheckerContext & C,const CallEvent & Call) const2266 void CStringChecker::evalStrcmp(CheckerContext &C,
2267 const CallEvent &Call) const {
2268 //int strcmp(const char *s1, const char *s2);
2269 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ false);
2270 }
2271
evalStrncmp(CheckerContext & C,const CallEvent & Call) const2272 void CStringChecker::evalStrncmp(CheckerContext &C,
2273 const CallEvent &Call) const {
2274 //int strncmp(const char *s1, const char *s2, size_t n);
2275 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ false);
2276 }
2277
evalStrcasecmp(CheckerContext & C,const CallEvent & Call) const2278 void CStringChecker::evalStrcasecmp(CheckerContext &C,
2279 const CallEvent &Call) const {
2280 //int strcasecmp(const char *s1, const char *s2);
2281 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ true);
2282 }
2283
evalStrncasecmp(CheckerContext & C,const CallEvent & Call) const2284 void CStringChecker::evalStrncasecmp(CheckerContext &C,
2285 const CallEvent &Call) const {
2286 //int strncasecmp(const char *s1, const char *s2, size_t n);
2287 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ true);
2288 }
2289
evalStrcmpCommon(CheckerContext & C,const CallEvent & Call,bool IsBounded,bool IgnoreCase) const2290 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
2291 bool IsBounded, bool IgnoreCase) const {
2292 CurrentFunctionDescription = "string comparison function";
2293 ProgramStateRef state = C.getState();
2294 const LocationContext *LCtx = C.getLocationContext();
2295
2296 // Check that the first string is non-null
2297 AnyArgExpr Left = {Call.getArgExpr(0), 0};
2298 SVal LeftVal = state->getSVal(Left.Expression, LCtx);
2299 state = checkNonNull(C, state, Left, LeftVal);
2300 if (!state)
2301 return;
2302
2303 // Check that the second string is non-null.
2304 AnyArgExpr Right = {Call.getArgExpr(1), 1};
2305 SVal RightVal = state->getSVal(Right.Expression, LCtx);
2306 state = checkNonNull(C, state, Right, RightVal);
2307 if (!state)
2308 return;
2309
2310 // Get the string length of the first string or give up.
2311 SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
2312 if (LeftLength.isUndef())
2313 return;
2314
2315 // Get the string length of the second string or give up.
2316 SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
2317 if (RightLength.isUndef())
2318 return;
2319
2320 // If we know the two buffers are the same, we know the result is 0.
2321 // First, get the two buffers' addresses. Another checker will have already
2322 // made sure they're not undefined.
2323 DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2324 DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2325
2326 // See if they are the same.
2327 SValBuilder &svalBuilder = C.getSValBuilder();
2328 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
2329 ProgramStateRef StSameBuf, StNotSameBuf;
2330 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
2331
2332 // If the two arguments might be the same buffer, we know the result is 0,
2333 // and we only need to check one size.
2334 if (StSameBuf) {
2335 StSameBuf =
2336 StSameBuf->BindExpr(Call.getOriginExpr(), LCtx,
2337 svalBuilder.makeZeroVal(Call.getResultType()));
2338 C.addTransition(StSameBuf);
2339
2340 // If the two arguments are GUARANTEED to be the same, we're done!
2341 if (!StNotSameBuf)
2342 return;
2343 }
2344
2345 assert(StNotSameBuf);
2346 state = StNotSameBuf;
2347
2348 // At this point we can go about comparing the two buffers.
2349 // For now, we only do this if they're both known string literals.
2350
2351 // Attempt to extract string literals from both expressions.
2352 const StringLiteral *LeftStrLiteral =
2353 getCStringLiteral(C, state, Left.Expression, LeftVal);
2354 const StringLiteral *RightStrLiteral =
2355 getCStringLiteral(C, state, Right.Expression, RightVal);
2356 bool canComputeResult = false;
2357 SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, Call.getOriginExpr(),
2358 LCtx, C.blockCount());
2359
2360 if (LeftStrLiteral && RightStrLiteral) {
2361 StringRef LeftStrRef = LeftStrLiteral->getString();
2362 StringRef RightStrRef = RightStrLiteral->getString();
2363
2364 if (IsBounded) {
2365 // Get the max number of characters to compare.
2366 const Expr *lenExpr = Call.getArgExpr(2);
2367 SVal lenVal = state->getSVal(lenExpr, LCtx);
2368
2369 // If the length is known, we can get the right substrings.
2370 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
2371 // Create substrings of each to compare the prefix.
2372 LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());
2373 RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());
2374 canComputeResult = true;
2375 }
2376 } else {
2377 // This is a normal, unbounded strcmp.
2378 canComputeResult = true;
2379 }
2380
2381 if (canComputeResult) {
2382 // Real strcmp stops at null characters.
2383 size_t s1Term = LeftStrRef.find('\0');
2384 if (s1Term != StringRef::npos)
2385 LeftStrRef = LeftStrRef.substr(0, s1Term);
2386
2387 size_t s2Term = RightStrRef.find('\0');
2388 if (s2Term != StringRef::npos)
2389 RightStrRef = RightStrRef.substr(0, s2Term);
2390
2391 // Use StringRef's comparison methods to compute the actual result.
2392 int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)
2393 : LeftStrRef.compare(RightStrRef);
2394
2395 // The strcmp function returns an integer greater than, equal to, or less
2396 // than zero, [c11, p7.24.4.2].
2397 if (compareRes == 0) {
2398 resultVal = svalBuilder.makeIntVal(compareRes, Call.getResultType());
2399 }
2400 else {
2401 DefinedSVal zeroVal = svalBuilder.makeIntVal(0, Call.getResultType());
2402 // Constrain strcmp's result range based on the result of StringRef's
2403 // comparison methods.
2404 BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2405 SVal compareWithZero =
2406 svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
2407 svalBuilder.getConditionType());
2408 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2409 state = state->assume(compareWithZeroVal, true);
2410 }
2411 }
2412 }
2413
2414 state = state->BindExpr(Call.getOriginExpr(), LCtx, resultVal);
2415
2416 // Record this as a possible path.
2417 C.addTransition(state);
2418 }
2419
evalStrsep(CheckerContext & C,const CallEvent & Call) const2420 void CStringChecker::evalStrsep(CheckerContext &C,
2421 const CallEvent &Call) const {
2422 // char *strsep(char **stringp, const char *delim);
2423 // Verify whether the search string parameter matches the return type.
2424 SourceArgExpr SearchStrPtr = {{Call.getArgExpr(0), 0}};
2425
2426 QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2427 if (CharPtrTy.isNull() || Call.getResultType().getUnqualifiedType() !=
2428 CharPtrTy.getUnqualifiedType())
2429 return;
2430
2431 CurrentFunctionDescription = "strsep()";
2432 ProgramStateRef State = C.getState();
2433 const LocationContext *LCtx = C.getLocationContext();
2434
2435 // Check that the search string pointer is non-null (though it may point to
2436 // a null string).
2437 SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
2438 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
2439 if (!State)
2440 return;
2441
2442 // Check that the delimiter string is non-null.
2443 AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};
2444 SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
2445 State = checkNonNull(C, State, DelimStr, DelimStrVal);
2446 if (!State)
2447 return;
2448
2449 SValBuilder &SVB = C.getSValBuilder();
2450 SVal Result;
2451 if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2452 // Get the current value of the search string pointer, as a char*.
2453 Result = State->getSVal(*SearchStrLoc, CharPtrTy);
2454
2455 // Invalidate the search string, representing the change of one delimiter
2456 // character to NUL.
2457 // As the replacement never overflows, do not invalidate its super region.
2458 State = invalidateDestinationBufferNeverOverflows(
2459 C, State, SearchStrPtr.Expression, Result);
2460
2461 // Overwrite the search string pointer. The new value is either an address
2462 // further along in the same string, or NULL if there are no more tokens.
2463 State =
2464 State->bindLoc(*SearchStrLoc,
2465 SVB.conjureSymbolVal(getTag(), Call.getOriginExpr(),
2466 LCtx, CharPtrTy, C.blockCount()),
2467 LCtx);
2468 } else {
2469 assert(SearchStrVal.isUnknown());
2470 // Conjure a symbolic value. It's the best we can do.
2471 Result = SVB.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,
2472 C.blockCount());
2473 }
2474
2475 // Set the return value, and finish.
2476 State = State->BindExpr(Call.getOriginExpr(), LCtx, Result);
2477 C.addTransition(State);
2478 }
2479
2480 // These should probably be moved into a C++ standard library checker.
evalStdCopy(CheckerContext & C,const CallEvent & Call) const2481 void CStringChecker::evalStdCopy(CheckerContext &C,
2482 const CallEvent &Call) const {
2483 evalStdCopyCommon(C, Call);
2484 }
2485
evalStdCopyBackward(CheckerContext & C,const CallEvent & Call) const2486 void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2487 const CallEvent &Call) const {
2488 evalStdCopyCommon(C, Call);
2489 }
2490
evalStdCopyCommon(CheckerContext & C,const CallEvent & Call) const2491 void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2492 const CallEvent &Call) const {
2493 if (!Call.getArgExpr(2)->getType()->isPointerType())
2494 return;
2495
2496 ProgramStateRef State = C.getState();
2497
2498 const LocationContext *LCtx = C.getLocationContext();
2499
2500 // template <class _InputIterator, class _OutputIterator>
2501 // _OutputIterator
2502 // copy(_InputIterator __first, _InputIterator __last,
2503 // _OutputIterator __result)
2504
2505 // Invalidate the destination buffer
2506 const Expr *Dst = Call.getArgExpr(2);
2507 SVal DstVal = State->getSVal(Dst, LCtx);
2508 // FIXME: As we do not know how many items are copied, we also invalidate the
2509 // super region containing the target location.
2510 State =
2511 invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal);
2512
2513 SValBuilder &SVB = C.getSValBuilder();
2514
2515 SVal ResultVal =
2516 SVB.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx, C.blockCount());
2517 State = State->BindExpr(Call.getOriginExpr(), LCtx, ResultVal);
2518
2519 C.addTransition(State);
2520 }
2521
evalMemset(CheckerContext & C,const CallEvent & Call) const2522 void CStringChecker::evalMemset(CheckerContext &C,
2523 const CallEvent &Call) const {
2524 // void *memset(void *s, int c, size_t n);
2525 CurrentFunctionDescription = "memory set function";
2526
2527 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2528 AnyArgExpr CharE = {Call.getArgExpr(1), 1};
2529 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2530
2531 ProgramStateRef State = C.getState();
2532
2533 // See if the size argument is zero.
2534 const LocationContext *LCtx = C.getLocationContext();
2535 SVal SizeVal = C.getSVal(Size.Expression);
2536 QualType SizeTy = Size.Expression->getType();
2537
2538 ProgramStateRef ZeroSize, NonZeroSize;
2539 std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);
2540
2541 // Get the value of the memory area.
2542 SVal BufferPtrVal = C.getSVal(Buffer.Expression);
2543
2544 // If the size is zero, there won't be any actual memory access, so
2545 // just bind the return value to the buffer and return.
2546 if (ZeroSize && !NonZeroSize) {
2547 ZeroSize = ZeroSize->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2548 C.addTransition(ZeroSize);
2549 return;
2550 }
2551
2552 // Ensure the memory area is not null.
2553 // If it is NULL there will be a NULL pointer dereference.
2554 State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);
2555 if (!State)
2556 return;
2557
2558 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2559 if (!State)
2560 return;
2561
2562 // According to the values of the arguments, bind the value of the second
2563 // argument to the destination buffer and set string length, or just
2564 // invalidate the destination buffer.
2565 if (!memsetAux(Buffer.Expression, C.getSVal(CharE.Expression),
2566 Size.Expression, C, State))
2567 return;
2568
2569 State = State->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2570 C.addTransition(State);
2571 }
2572
evalBzero(CheckerContext & C,const CallEvent & Call) const2573 void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const {
2574 CurrentFunctionDescription = "memory clearance function";
2575
2576 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2577 SizeArgExpr Size = {{Call.getArgExpr(1), 1}};
2578 SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);
2579
2580 ProgramStateRef State = C.getState();
2581
2582 // See if the size argument is zero.
2583 SVal SizeVal = C.getSVal(Size.Expression);
2584 QualType SizeTy = Size.Expression->getType();
2585
2586 ProgramStateRef StateZeroSize, StateNonZeroSize;
2587 std::tie(StateZeroSize, StateNonZeroSize) =
2588 assumeZero(C, State, SizeVal, SizeTy);
2589
2590 // If the size is zero, there won't be any actual memory access,
2591 // In this case we just return.
2592 if (StateZeroSize && !StateNonZeroSize) {
2593 C.addTransition(StateZeroSize);
2594 return;
2595 }
2596
2597 // Get the value of the memory area.
2598 SVal MemVal = C.getSVal(Buffer.Expression);
2599
2600 // Ensure the memory area is not null.
2601 // If it is NULL there will be a NULL pointer dereference.
2602 State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);
2603 if (!State)
2604 return;
2605
2606 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2607 if (!State)
2608 return;
2609
2610 if (!memsetAux(Buffer.Expression, Zero, Size.Expression, C, State))
2611 return;
2612
2613 C.addTransition(State);
2614 }
2615
evalSprintf(CheckerContext & C,const CallEvent & Call) const2616 void CStringChecker::evalSprintf(CheckerContext &C,
2617 const CallEvent &Call) const {
2618 CurrentFunctionDescription = "'sprintf'";
2619 evalSprintfCommon(C, Call, /* IsBounded = */ false);
2620 }
2621
evalSnprintf(CheckerContext & C,const CallEvent & Call) const2622 void CStringChecker::evalSnprintf(CheckerContext &C,
2623 const CallEvent &Call) const {
2624 CurrentFunctionDescription = "'snprintf'";
2625 evalSprintfCommon(C, Call, /* IsBounded = */ true);
2626 }
2627
evalSprintfCommon(CheckerContext & C,const CallEvent & Call,bool IsBounded) const2628 void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
2629 bool IsBounded) const {
2630 ProgramStateRef State = C.getState();
2631 const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2632 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2633
2634 const auto NumParams = Call.parameters().size();
2635 if (CE->getNumArgs() < NumParams) {
2636 // This is an invalid call, let's just ignore it.
2637 return;
2638 }
2639
2640 const auto AllArguments =
2641 llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
2642 const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams);
2643
2644 for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {
2645 // We consider only string buffers
2646 if (const QualType type = ArgExpr->getType();
2647 !type->isAnyPointerType() ||
2648 !type->getPointeeType()->isAnyCharacterType())
2649 continue;
2650 SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}};
2651
2652 // Ensure the buffers do not overlap.
2653 SizeArgExpr SrcExprAsSizeDummy = {
2654 {Source.Expression, Source.ArgumentIndex}};
2655 State = CheckOverlap(
2656 C, State,
2657 (IsBounded ? SizeArgExpr{{Call.getArgExpr(1), 1}} : SrcExprAsSizeDummy),
2658 Dest, Source);
2659 if (!State)
2660 return;
2661 }
2662
2663 C.addTransition(State);
2664 }
2665
2666 //===----------------------------------------------------------------------===//
2667 // The driver method, and other Checker callbacks.
2668 //===----------------------------------------------------------------------===//
2669
identifyCall(const CallEvent & Call,CheckerContext & C) const2670 CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2671 CheckerContext &C) const {
2672 const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
2673 if (!CE)
2674 return nullptr;
2675
2676 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
2677 if (!FD)
2678 return nullptr;
2679
2680 if (StdCopy.matches(Call))
2681 return &CStringChecker::evalStdCopy;
2682 if (StdCopyBackward.matches(Call))
2683 return &CStringChecker::evalStdCopyBackward;
2684
2685 // Pro-actively check that argument types are safe to do arithmetic upon.
2686 // We do not want to crash if someone accidentally passes a structure
2687 // into, say, a C++ overload of any of these functions. We could not check
2688 // that for std::copy because they may have arguments of other types.
2689 for (auto I : CE->arguments()) {
2690 QualType T = I->getType();
2691 if (!T->isIntegralOrEnumerationType() && !T->isPointerType())
2692 return nullptr;
2693 }
2694
2695 const FnCheck *Callback = Callbacks.lookup(Call);
2696 if (Callback)
2697 return *Callback;
2698
2699 return nullptr;
2700 }
2701
evalCall(const CallEvent & Call,CheckerContext & C) const2702 bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2703 FnCheck Callback = identifyCall(Call, C);
2704
2705 // If the callee isn't a string function, let another checker handle it.
2706 if (!Callback)
2707 return false;
2708
2709 // Check and evaluate the call.
2710 assert(isa<CallExpr>(Call.getOriginExpr()));
2711 Callback(this, C, Call);
2712
2713 // If the evaluate call resulted in no change, chain to the next eval call
2714 // handler.
2715 // Note, the custom CString evaluation calls assume that basic safety
2716 // properties are held. However, if the user chooses to turn off some of these
2717 // checks, we ignore the issues and leave the call evaluation to a generic
2718 // handler.
2719 return C.isDifferent();
2720 }
2721
checkPreStmt(const DeclStmt * DS,CheckerContext & C) const2722 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2723 // Record string length for char a[] = "abc";
2724 ProgramStateRef state = C.getState();
2725
2726 for (const auto *I : DS->decls()) {
2727 const VarDecl *D = dyn_cast<VarDecl>(I);
2728 if (!D)
2729 continue;
2730
2731 // FIXME: Handle array fields of structs.
2732 if (!D->getType()->isArrayType())
2733 continue;
2734
2735 const Expr *Init = D->getInit();
2736 if (!Init)
2737 continue;
2738 if (!isa<StringLiteral>(Init))
2739 continue;
2740
2741 Loc VarLoc = state->getLValue(D, C.getLocationContext());
2742 const MemRegion *MR = VarLoc.getAsRegion();
2743 if (!MR)
2744 continue;
2745
2746 SVal StrVal = C.getSVal(Init);
2747 assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2748 DefinedOrUnknownSVal strLength =
2749 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2750
2751 state = state->set<CStringLength>(MR, strLength);
2752 }
2753
2754 C.addTransition(state);
2755 }
2756
2757 ProgramStateRef
checkRegionChanges(ProgramStateRef state,const InvalidatedSymbols *,ArrayRef<const MemRegion * > ExplicitRegions,ArrayRef<const MemRegion * > Regions,const LocationContext * LCtx,const CallEvent * Call) const2758 CStringChecker::checkRegionChanges(ProgramStateRef state,
2759 const InvalidatedSymbols *,
2760 ArrayRef<const MemRegion *> ExplicitRegions,
2761 ArrayRef<const MemRegion *> Regions,
2762 const LocationContext *LCtx,
2763 const CallEvent *Call) const {
2764 CStringLengthTy Entries = state->get<CStringLength>();
2765 if (Entries.isEmpty())
2766 return state;
2767
2768 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2769 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2770
2771 // First build sets for the changed regions and their super-regions.
2772 for (const MemRegion *MR : Regions) {
2773 Invalidated.insert(MR);
2774
2775 SuperRegions.insert(MR);
2776 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2777 MR = SR->getSuperRegion();
2778 SuperRegions.insert(MR);
2779 }
2780 }
2781
2782 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2783
2784 // Then loop over the entries in the current state.
2785 for (const MemRegion *MR : llvm::make_first_range(Entries)) {
2786 // Is this entry for a super-region of a changed region?
2787 if (SuperRegions.count(MR)) {
2788 Entries = F.remove(Entries, MR);
2789 continue;
2790 }
2791
2792 // Is this entry for a sub-region of a changed region?
2793 const MemRegion *Super = MR;
2794 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2795 Super = SR->getSuperRegion();
2796 if (Invalidated.count(Super)) {
2797 Entries = F.remove(Entries, MR);
2798 break;
2799 }
2800 }
2801 }
2802
2803 return state->set<CStringLength>(Entries);
2804 }
2805
checkLiveSymbols(ProgramStateRef state,SymbolReaper & SR) const2806 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2807 SymbolReaper &SR) const {
2808 // Mark all symbols in our string length map as valid.
2809 CStringLengthTy Entries = state->get<CStringLength>();
2810
2811 for (SVal Len : llvm::make_second_range(Entries)) {
2812 for (SymbolRef Sym : Len.symbols())
2813 SR.markInUse(Sym);
2814 }
2815 }
2816
checkDeadSymbols(SymbolReaper & SR,CheckerContext & C) const2817 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2818 CheckerContext &C) const {
2819 ProgramStateRef state = C.getState();
2820 CStringLengthTy Entries = state->get<CStringLength>();
2821 if (Entries.isEmpty())
2822 return;
2823
2824 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2825 for (auto [Reg, Len] : Entries) {
2826 if (SymbolRef Sym = Len.getAsSymbol()) {
2827 if (SR.isDead(Sym))
2828 Entries = F.remove(Entries, Reg);
2829 }
2830 }
2831
2832 state = state->set<CStringLength>(Entries);
2833 C.addTransition(state);
2834 }
2835
registerCStringModeling(CheckerManager & Mgr)2836 void ento::registerCStringModeling(CheckerManager &Mgr) {
2837 Mgr.registerChecker<CStringChecker>();
2838 }
2839
shouldRegisterCStringModeling(const CheckerManager & mgr)2840 bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) {
2841 return true;
2842 }
2843
2844 #define REGISTER_CHECKER(name) \
2845 void ento::register##name(CheckerManager &mgr) { \
2846 CStringChecker *checker = mgr.getChecker<CStringChecker>(); \
2847 checker->Filter.Check##name = true; \
2848 checker->Filter.CheckName##name = mgr.getCurrentCheckerName(); \
2849 } \
2850 \
2851 bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
2852
2853 REGISTER_CHECKER(CStringNullArg)
2854 REGISTER_CHECKER(CStringOutOfBounds)
2855 REGISTER_CHECKER(CStringBufferOverlap)
2856 REGISTER_CHECKER(CStringNotNullTerm)
2857 REGISTER_CHECKER(CStringUninitializedRead)
2858