1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This defines CStringChecker, which is an assortment of checks on calls 10 // to functions in <string.h>. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InterCheckerAPI.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/CharInfo.h" 17 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 19 #include "clang/StaticAnalyzer/Core/Checker.h" 20 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 24 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/ADT/SmallString.h" 28 #include "llvm/ADT/StringExtras.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <functional> 31 #include <optional> 32 33 using namespace clang; 34 using namespace ento; 35 using namespace std::placeholders; 36 37 namespace { 38 struct AnyArgExpr { 39 const Expr *Expression; 40 unsigned ArgumentIndex; 41 }; 42 struct SourceArgExpr : AnyArgExpr {}; 43 struct DestinationArgExpr : AnyArgExpr {}; 44 struct SizeArgExpr : AnyArgExpr {}; 45 46 using ErrorMessage = SmallString<128>; 47 enum class AccessKind { write, read }; 48 49 static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription, 50 AccessKind Access) { 51 ErrorMessage Message; 52 llvm::raw_svector_ostream Os(Message); 53 54 // Function classification like: Memory copy function 55 Os << toUppercase(FunctionDescription.front()) 56 << &FunctionDescription.data()[1]; 57 58 if (Access == AccessKind::write) { 59 Os << " overflows the destination buffer"; 60 } else { // read access 61 Os << " accesses out-of-bound array element"; 62 } 63 64 return Message; 65 } 66 67 enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 }; 68 69 enum class CharKind { Regular = 0, Wide }; 70 constexpr CharKind CK_Regular = CharKind::Regular; 71 constexpr CharKind CK_Wide = CharKind::Wide; 72 73 static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) { 74 return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy 75 : Ctx.WideCharTy); 76 } 77 78 class CStringChecker : public Checker< eval::Call, 79 check::PreStmt<DeclStmt>, 80 check::LiveSymbols, 81 check::DeadSymbols, 82 check::RegionChanges 83 > { 84 mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap, 85 BT_NotCString, BT_AdditionOverflow, BT_UninitRead; 86 87 mutable const char *CurrentFunctionDescription = nullptr; 88 89 public: 90 /// The filter is used to filter out the diagnostics which are not enabled by 91 /// the user. 92 struct CStringChecksFilter { 93 bool CheckCStringNullArg = false; 94 bool CheckCStringOutOfBounds = false; 95 bool CheckCStringBufferOverlap = false; 96 bool CheckCStringNotNullTerm = false; 97 bool CheckCStringUninitializedRead = false; 98 99 CheckerNameRef CheckNameCStringNullArg; 100 CheckerNameRef CheckNameCStringOutOfBounds; 101 CheckerNameRef CheckNameCStringBufferOverlap; 102 CheckerNameRef CheckNameCStringNotNullTerm; 103 CheckerNameRef CheckNameCStringUninitializedRead; 104 }; 105 106 CStringChecksFilter Filter; 107 108 static void *getTag() { static int tag; return &tag; } 109 110 bool evalCall(const CallEvent &Call, CheckerContext &C) const; 111 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 112 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; 113 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 114 115 ProgramStateRef 116 checkRegionChanges(ProgramStateRef state, 117 const InvalidatedSymbols *, 118 ArrayRef<const MemRegion *> ExplicitRegions, 119 ArrayRef<const MemRegion *> Regions, 120 const LocationContext *LCtx, 121 const CallEvent *Call) const; 122 123 using FnCheck = std::function<void(const CStringChecker *, CheckerContext &, 124 const CallExpr *)>; 125 126 CallDescriptionMap<FnCheck> Callbacks = { 127 {{CDF_MaybeBuiltin, {"memcpy"}, 3}, 128 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)}, 129 {{CDF_MaybeBuiltin, {"wmemcpy"}, 3}, 130 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)}, 131 {{CDF_MaybeBuiltin, {"mempcpy"}, 3}, 132 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)}, 133 {{CDF_None, {"wmempcpy"}, 3}, 134 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)}, 135 {{CDF_MaybeBuiltin, {"memcmp"}, 3}, 136 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)}, 137 {{CDF_MaybeBuiltin, {"wmemcmp"}, 3}, 138 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)}, 139 {{CDF_MaybeBuiltin, {"memmove"}, 3}, 140 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)}, 141 {{CDF_MaybeBuiltin, {"wmemmove"}, 3}, 142 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)}, 143 {{CDF_MaybeBuiltin, {"memset"}, 3}, &CStringChecker::evalMemset}, 144 {{CDF_MaybeBuiltin, {"explicit_memset"}, 3}, &CStringChecker::evalMemset}, 145 {{CDF_MaybeBuiltin, {"strcpy"}, 2}, &CStringChecker::evalStrcpy}, 146 {{CDF_MaybeBuiltin, {"strncpy"}, 3}, &CStringChecker::evalStrncpy}, 147 {{CDF_MaybeBuiltin, {"stpcpy"}, 2}, &CStringChecker::evalStpcpy}, 148 {{CDF_MaybeBuiltin, {"strlcpy"}, 3}, &CStringChecker::evalStrlcpy}, 149 {{CDF_MaybeBuiltin, {"strcat"}, 2}, &CStringChecker::evalStrcat}, 150 {{CDF_MaybeBuiltin, {"strncat"}, 3}, &CStringChecker::evalStrncat}, 151 {{CDF_MaybeBuiltin, {"strlcat"}, 3}, &CStringChecker::evalStrlcat}, 152 {{CDF_MaybeBuiltin, {"strlen"}, 1}, &CStringChecker::evalstrLength}, 153 {{CDF_MaybeBuiltin, {"wcslen"}, 1}, &CStringChecker::evalstrLength}, 154 {{CDF_MaybeBuiltin, {"strnlen"}, 2}, &CStringChecker::evalstrnLength}, 155 {{CDF_MaybeBuiltin, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength}, 156 {{CDF_MaybeBuiltin, {"strcmp"}, 2}, &CStringChecker::evalStrcmp}, 157 {{CDF_MaybeBuiltin, {"strncmp"}, 3}, &CStringChecker::evalStrncmp}, 158 {{CDF_MaybeBuiltin, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp}, 159 {{CDF_MaybeBuiltin, {"strncasecmp"}, 3}, 160 &CStringChecker::evalStrncasecmp}, 161 {{CDF_MaybeBuiltin, {"strsep"}, 2}, &CStringChecker::evalStrsep}, 162 {{CDF_MaybeBuiltin, {"bcopy"}, 3}, &CStringChecker::evalBcopy}, 163 {{CDF_MaybeBuiltin, {"bcmp"}, 3}, 164 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)}, 165 {{CDF_MaybeBuiltin, {"bzero"}, 2}, &CStringChecker::evalBzero}, 166 {{CDF_MaybeBuiltin, {"explicit_bzero"}, 2}, &CStringChecker::evalBzero}, 167 {{CDF_MaybeBuiltin, {"sprintf"}, 2}, &CStringChecker::evalSprintf}, 168 {{CDF_MaybeBuiltin, {"snprintf"}, 2}, &CStringChecker::evalSnprintf}, 169 }; 170 171 // These require a bit of special handling. 172 CallDescription StdCopy{{"std", "copy"}, 3}, 173 StdCopyBackward{{"std", "copy_backward"}, 3}; 174 175 FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const; 176 void evalMemcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 177 void evalMempcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 178 void evalMemmove(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 179 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 180 void evalCopyCommon(CheckerContext &C, const CallExpr *CE, 181 ProgramStateRef state, SizeArgExpr Size, 182 DestinationArgExpr Dest, SourceArgExpr Source, 183 bool Restricted, bool IsMempcpy, CharKind CK) const; 184 185 void evalMemcmp(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 186 187 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 188 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 189 void evalstrLengthCommon(CheckerContext &C, 190 const CallExpr *CE, 191 bool IsStrnlen = false) const; 192 193 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 194 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 195 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 196 void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const; 197 void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd, 198 bool IsBounded, ConcatFnKind appendK, 199 bool returnPtr = true) const; 200 201 void evalStrcat(CheckerContext &C, const CallExpr *CE) const; 202 void evalStrncat(CheckerContext &C, const CallExpr *CE) const; 203 void evalStrlcat(CheckerContext &C, const CallExpr *CE) const; 204 205 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; 206 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; 207 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; 208 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; 209 void evalStrcmpCommon(CheckerContext &C, 210 const CallExpr *CE, 211 bool IsBounded = false, 212 bool IgnoreCase = false) const; 213 214 void evalStrsep(CheckerContext &C, const CallExpr *CE) const; 215 216 void evalStdCopy(CheckerContext &C, const CallExpr *CE) const; 217 void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const; 218 void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const; 219 void evalMemset(CheckerContext &C, const CallExpr *CE) const; 220 void evalBzero(CheckerContext &C, const CallExpr *CE) const; 221 222 void evalSprintf(CheckerContext &C, const CallExpr *CE) const; 223 void evalSnprintf(CheckerContext &C, const CallExpr *CE) const; 224 void evalSprintfCommon(CheckerContext &C, const CallExpr *CE, bool IsBounded, 225 bool IsBuiltin) const; 226 227 // Utility methods 228 std::pair<ProgramStateRef , ProgramStateRef > 229 static assumeZero(CheckerContext &C, 230 ProgramStateRef state, SVal V, QualType Ty); 231 232 static ProgramStateRef setCStringLength(ProgramStateRef state, 233 const MemRegion *MR, 234 SVal strLength); 235 static SVal getCStringLengthForRegion(CheckerContext &C, 236 ProgramStateRef &state, 237 const Expr *Ex, 238 const MemRegion *MR, 239 bool hypothetical); 240 SVal getCStringLength(CheckerContext &C, 241 ProgramStateRef &state, 242 const Expr *Ex, 243 SVal Buf, 244 bool hypothetical = false) const; 245 246 const StringLiteral *getCStringLiteral(CheckerContext &C, 247 ProgramStateRef &state, 248 const Expr *expr, 249 SVal val) const; 250 251 /// Invalidate the destination buffer determined by characters copied. 252 static ProgramStateRef 253 invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S, 254 const Expr *BufE, SVal BufV, SVal SizeV, 255 QualType SizeTy); 256 257 /// Operation never overflows, do not invalidate the super region. 258 static ProgramStateRef invalidateDestinationBufferNeverOverflows( 259 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV); 260 261 /// We do not know whether the operation can overflow (e.g. size is unknown), 262 /// invalidate the super region and escape related pointers. 263 static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion( 264 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV); 265 266 /// Invalidate the source buffer for escaping pointers. 267 static ProgramStateRef invalidateSourceBuffer(CheckerContext &C, 268 ProgramStateRef S, 269 const Expr *BufE, SVal BufV); 270 271 /// @param InvalidationTraitOperations Determine how to invlidate the 272 /// MemRegion by setting the invalidation traits. Return true to cause pointer 273 /// escape, or false otherwise. 274 static ProgramStateRef invalidateBufferAux( 275 CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V, 276 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &, 277 const MemRegion *)> 278 InvalidationTraitOperations); 279 280 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 281 const MemRegion *MR); 282 283 static bool memsetAux(const Expr *DstBuffer, SVal CharE, 284 const Expr *Size, CheckerContext &C, 285 ProgramStateRef &State); 286 287 // Re-usable checks 288 ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State, 289 AnyArgExpr Arg, SVal l) const; 290 ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state, 291 AnyArgExpr Buffer, SVal Element, 292 AccessKind Access, 293 CharKind CK = CharKind::Regular) const; 294 ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State, 295 AnyArgExpr Buffer, SizeArgExpr Size, 296 AccessKind Access, 297 CharKind CK = CharKind::Regular) const; 298 ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state, 299 SizeArgExpr Size, AnyArgExpr First, 300 AnyArgExpr Second, 301 CharKind CK = CharKind::Regular) const; 302 void emitOverlapBug(CheckerContext &C, 303 ProgramStateRef state, 304 const Stmt *First, 305 const Stmt *Second) const; 306 307 void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, 308 StringRef WarningMsg) const; 309 void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State, 310 const Stmt *S, StringRef WarningMsg) const; 311 void emitNotCStringBug(CheckerContext &C, ProgramStateRef State, 312 const Stmt *S, StringRef WarningMsg) const; 313 void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const; 314 void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State, 315 const Expr *E) const; 316 ProgramStateRef checkAdditionOverflow(CheckerContext &C, 317 ProgramStateRef state, 318 NonLoc left, 319 NonLoc right) const; 320 321 // Return true if the destination buffer of the copy function may be in bound. 322 // Expects SVal of Size to be positive and unsigned. 323 // Expects SVal of FirstBuf to be a FieldRegion. 324 static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State, 325 SVal BufVal, QualType BufTy, SVal LengthVal, 326 QualType LengthTy); 327 }; 328 329 } //end anonymous namespace 330 331 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal) 332 333 //===----------------------------------------------------------------------===// 334 // Individual checks and utility methods. 335 //===----------------------------------------------------------------------===// 336 337 std::pair<ProgramStateRef , ProgramStateRef > 338 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, 339 QualType Ty) { 340 std::optional<DefinedSVal> val = V.getAs<DefinedSVal>(); 341 if (!val) 342 return std::pair<ProgramStateRef , ProgramStateRef >(state, state); 343 344 SValBuilder &svalBuilder = C.getSValBuilder(); 345 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 346 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 347 } 348 349 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, 350 ProgramStateRef State, 351 AnyArgExpr Arg, SVal l) const { 352 // If a previous check has failed, propagate the failure. 353 if (!State) 354 return nullptr; 355 356 ProgramStateRef stateNull, stateNonNull; 357 std::tie(stateNull, stateNonNull) = 358 assumeZero(C, State, l, Arg.Expression->getType()); 359 360 if (stateNull && !stateNonNull) { 361 if (Filter.CheckCStringNullArg) { 362 SmallString<80> buf; 363 llvm::raw_svector_ostream OS(buf); 364 assert(CurrentFunctionDescription); 365 OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1) 366 << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to " 367 << CurrentFunctionDescription; 368 369 emitNullArgBug(C, stateNull, Arg.Expression, OS.str()); 370 } 371 return nullptr; 372 } 373 374 // From here on, assume that the value is non-null. 375 assert(stateNonNull); 376 return stateNonNull; 377 } 378 379 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 380 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, 381 ProgramStateRef state, 382 AnyArgExpr Buffer, SVal Element, 383 AccessKind Access, 384 CharKind CK) const { 385 386 // If a previous check has failed, propagate the failure. 387 if (!state) 388 return nullptr; 389 390 // Check for out of bound array element access. 391 const MemRegion *R = Element.getAsRegion(); 392 if (!R) 393 return state; 394 395 const auto *ER = dyn_cast<ElementRegion>(R); 396 if (!ER) 397 return state; 398 399 SValBuilder &svalBuilder = C.getSValBuilder(); 400 ASTContext &Ctx = svalBuilder.getContext(); 401 402 // Get the index of the accessed element. 403 NonLoc Idx = ER->getIndex(); 404 405 if (CK == CharKind::Regular) { 406 if (ER->getValueType() != Ctx.CharTy) 407 return state; 408 } else { 409 if (ER->getValueType() != Ctx.WideCharTy) 410 return state; 411 412 QualType SizeTy = Ctx.getSizeType(); 413 NonLoc WideSize = 414 svalBuilder 415 .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(), 416 SizeTy) 417 .castAs<NonLoc>(); 418 SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy); 419 if (Offset.isUnknown()) 420 return state; 421 Idx = Offset.castAs<NonLoc>(); 422 } 423 424 // Get the size of the array. 425 const auto *superReg = cast<SubRegion>(ER->getSuperRegion()); 426 DefinedOrUnknownSVal Size = 427 getDynamicExtent(state, superReg, C.getSValBuilder()); 428 429 ProgramStateRef StInBound, StOutBound; 430 std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size); 431 if (StOutBound && !StInBound) { 432 // These checks are either enabled by the CString out-of-bounds checker 433 // explicitly or implicitly by the Malloc checker. 434 // In the latter case we only do modeling but do not emit warning. 435 if (!Filter.CheckCStringOutOfBounds) 436 return nullptr; 437 438 // Emit a bug report. 439 ErrorMessage Message = 440 createOutOfBoundErrorMsg(CurrentFunctionDescription, Access); 441 emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message); 442 return nullptr; 443 } 444 445 // Ensure that we wouldn't read uninitialized value. 446 if (Access == AccessKind::read) { 447 if (Filter.CheckCStringUninitializedRead && 448 StInBound->getSVal(ER).isUndef()) { 449 emitUninitializedReadBug(C, StInBound, Buffer.Expression); 450 return nullptr; 451 } 452 } 453 454 // Array bound check succeeded. From this point forward the array bound 455 // should always succeed. 456 return StInBound; 457 } 458 459 ProgramStateRef 460 CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State, 461 AnyArgExpr Buffer, SizeArgExpr Size, 462 AccessKind Access, CharKind CK) const { 463 // If a previous check has failed, propagate the failure. 464 if (!State) 465 return nullptr; 466 467 SValBuilder &svalBuilder = C.getSValBuilder(); 468 ASTContext &Ctx = svalBuilder.getContext(); 469 470 QualType SizeTy = Size.Expression->getType(); 471 QualType PtrTy = getCharPtrType(Ctx, CK); 472 473 // Check that the first buffer is non-null. 474 SVal BufVal = C.getSVal(Buffer.Expression); 475 State = checkNonNull(C, State, Buffer, BufVal); 476 if (!State) 477 return nullptr; 478 479 // If out-of-bounds checking is turned off, skip the rest. 480 if (!Filter.CheckCStringOutOfBounds) 481 return State; 482 483 // Get the access length and make sure it is known. 484 // FIXME: This assumes the caller has already checked that the access length 485 // is positive. And that it's unsigned. 486 SVal LengthVal = C.getSVal(Size.Expression); 487 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 488 if (!Length) 489 return State; 490 491 // Compute the offset of the last element to be accessed: size-1. 492 NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>(); 493 SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy); 494 if (Offset.isUnknown()) 495 return nullptr; 496 NonLoc LastOffset = Offset.castAs<NonLoc>(); 497 498 // Check that the first buffer is sufficiently long. 499 SVal BufStart = 500 svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType()); 501 if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) { 502 503 SVal BufEnd = 504 svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy); 505 State = CheckLocation(C, State, Buffer, BufEnd, Access, CK); 506 507 // If the buffer isn't large enough, abort. 508 if (!State) 509 return nullptr; 510 } 511 512 // Large enough or not, return this state! 513 return State; 514 } 515 516 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, 517 ProgramStateRef state, 518 SizeArgExpr Size, AnyArgExpr First, 519 AnyArgExpr Second, 520 CharKind CK) const { 521 if (!Filter.CheckCStringBufferOverlap) 522 return state; 523 524 // Do a simple check for overlap: if the two arguments are from the same 525 // buffer, see if the end of the first is greater than the start of the second 526 // or vice versa. 527 528 // If a previous check has failed, propagate the failure. 529 if (!state) 530 return nullptr; 531 532 ProgramStateRef stateTrue, stateFalse; 533 534 // Assume different address spaces cannot overlap. 535 if (First.Expression->getType()->getPointeeType().getAddressSpace() != 536 Second.Expression->getType()->getPointeeType().getAddressSpace()) 537 return state; 538 539 // Get the buffer values and make sure they're known locations. 540 const LocationContext *LCtx = C.getLocationContext(); 541 SVal firstVal = state->getSVal(First.Expression, LCtx); 542 SVal secondVal = state->getSVal(Second.Expression, LCtx); 543 544 std::optional<Loc> firstLoc = firstVal.getAs<Loc>(); 545 if (!firstLoc) 546 return state; 547 548 std::optional<Loc> secondLoc = secondVal.getAs<Loc>(); 549 if (!secondLoc) 550 return state; 551 552 // Are the two values the same? 553 SValBuilder &svalBuilder = C.getSValBuilder(); 554 std::tie(stateTrue, stateFalse) = 555 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 556 557 if (stateTrue && !stateFalse) { 558 // If the values are known to be equal, that's automatically an overlap. 559 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); 560 return nullptr; 561 } 562 563 // assume the two expressions are not equal. 564 assert(stateFalse); 565 state = stateFalse; 566 567 // Which value comes first? 568 QualType cmpTy = svalBuilder.getConditionType(); 569 SVal reverse = 570 svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy); 571 std::optional<DefinedOrUnknownSVal> reverseTest = 572 reverse.getAs<DefinedOrUnknownSVal>(); 573 if (!reverseTest) 574 return state; 575 576 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 577 if (stateTrue) { 578 if (stateFalse) { 579 // If we don't know which one comes first, we can't perform this test. 580 return state; 581 } else { 582 // Switch the values so that firstVal is before secondVal. 583 std::swap(firstLoc, secondLoc); 584 585 // Switch the Exprs as well, so that they still correspond. 586 std::swap(First, Second); 587 } 588 } 589 590 // Get the length, and make sure it too is known. 591 SVal LengthVal = state->getSVal(Size.Expression, LCtx); 592 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 593 if (!Length) 594 return state; 595 596 // Convert the first buffer's start address to char*. 597 // Bail out if the cast fails. 598 ASTContext &Ctx = svalBuilder.getContext(); 599 QualType CharPtrTy = getCharPtrType(Ctx, CK); 600 SVal FirstStart = 601 svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType()); 602 std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>(); 603 if (!FirstStartLoc) 604 return state; 605 606 // Compute the end of the first buffer. Bail out if THAT fails. 607 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc, 608 *Length, CharPtrTy); 609 std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>(); 610 if (!FirstEndLoc) 611 return state; 612 613 // Is the end of the first buffer past the start of the second buffer? 614 SVal Overlap = 615 svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy); 616 std::optional<DefinedOrUnknownSVal> OverlapTest = 617 Overlap.getAs<DefinedOrUnknownSVal>(); 618 if (!OverlapTest) 619 return state; 620 621 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 622 623 if (stateTrue && !stateFalse) { 624 // Overlap! 625 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); 626 return nullptr; 627 } 628 629 // assume the two expressions don't overlap. 630 assert(stateFalse); 631 return stateFalse; 632 } 633 634 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, 635 const Stmt *First, const Stmt *Second) const { 636 ExplodedNode *N = C.generateErrorNode(state); 637 if (!N) 638 return; 639 640 if (!BT_Overlap) 641 BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap, 642 categories::UnixAPI, "Improper arguments")); 643 644 // Generate a report for this bug. 645 auto report = std::make_unique<PathSensitiveBugReport>( 646 *BT_Overlap, "Arguments must not be overlapping buffers", N); 647 report->addRange(First->getSourceRange()); 648 report->addRange(Second->getSourceRange()); 649 650 C.emitReport(std::move(report)); 651 } 652 653 void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State, 654 const Stmt *S, StringRef WarningMsg) const { 655 if (ExplodedNode *N = C.generateErrorNode(State)) { 656 if (!BT_Null) 657 BT_Null.reset(new BuiltinBug( 658 Filter.CheckNameCStringNullArg, categories::UnixAPI, 659 "Null pointer argument in call to byte string function")); 660 661 BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Null.get()); 662 auto Report = std::make_unique<PathSensitiveBugReport>(*BT, WarningMsg, N); 663 Report->addRange(S->getSourceRange()); 664 if (const auto *Ex = dyn_cast<Expr>(S)) 665 bugreporter::trackExpressionValue(N, Ex, *Report); 666 C.emitReport(std::move(Report)); 667 } 668 } 669 670 void CStringChecker::emitUninitializedReadBug(CheckerContext &C, 671 ProgramStateRef State, 672 const Expr *E) const { 673 if (ExplodedNode *N = C.generateErrorNode(State)) { 674 const char *Msg = 675 "Bytes string function accesses uninitialized/garbage values"; 676 if (!BT_UninitRead) 677 BT_UninitRead.reset( 678 new BuiltinBug(Filter.CheckNameCStringUninitializedRead, 679 "Accessing unitialized/garbage values", Msg)); 680 681 BuiltinBug *BT = static_cast<BuiltinBug *>(BT_UninitRead.get()); 682 683 auto Report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); 684 Report->addRange(E->getSourceRange()); 685 bugreporter::trackExpressionValue(N, E, *Report); 686 C.emitReport(std::move(Report)); 687 } 688 } 689 690 void CStringChecker::emitOutOfBoundsBug(CheckerContext &C, 691 ProgramStateRef State, const Stmt *S, 692 StringRef WarningMsg) const { 693 if (ExplodedNode *N = C.generateErrorNode(State)) { 694 if (!BT_Bounds) 695 BT_Bounds.reset(new BuiltinBug( 696 Filter.CheckCStringOutOfBounds ? Filter.CheckNameCStringOutOfBounds 697 : Filter.CheckNameCStringNullArg, 698 "Out-of-bound array access", 699 "Byte string function accesses out-of-bound array element")); 700 701 BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Bounds.get()); 702 703 // FIXME: It would be nice to eventually make this diagnostic more clear, 704 // e.g., by referencing the original declaration or by saying *why* this 705 // reference is outside the range. 706 auto Report = std::make_unique<PathSensitiveBugReport>(*BT, WarningMsg, N); 707 Report->addRange(S->getSourceRange()); 708 C.emitReport(std::move(Report)); 709 } 710 } 711 712 void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State, 713 const Stmt *S, 714 StringRef WarningMsg) const { 715 if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) { 716 if (!BT_NotCString) 717 BT_NotCString.reset(new BuiltinBug( 718 Filter.CheckNameCStringNotNullTerm, categories::UnixAPI, 719 "Argument is not a null-terminated string.")); 720 721 auto Report = 722 std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N); 723 724 Report->addRange(S->getSourceRange()); 725 C.emitReport(std::move(Report)); 726 } 727 } 728 729 void CStringChecker::emitAdditionOverflowBug(CheckerContext &C, 730 ProgramStateRef State) const { 731 if (ExplodedNode *N = C.generateErrorNode(State)) { 732 if (!BT_AdditionOverflow) 733 BT_AdditionOverflow.reset( 734 new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API", 735 "Sum of expressions causes overflow.")); 736 737 // This isn't a great error message, but this should never occur in real 738 // code anyway -- you'd have to create a buffer longer than a size_t can 739 // represent, which is sort of a contradiction. 740 const char *WarningMsg = 741 "This expression will create a string whose length is too big to " 742 "be represented as a size_t"; 743 744 auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow, 745 WarningMsg, N); 746 C.emitReport(std::move(Report)); 747 } 748 } 749 750 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, 751 ProgramStateRef state, 752 NonLoc left, 753 NonLoc right) const { 754 // If out-of-bounds checking is turned off, skip the rest. 755 if (!Filter.CheckCStringOutOfBounds) 756 return state; 757 758 // If a previous check has failed, propagate the failure. 759 if (!state) 760 return nullptr; 761 762 SValBuilder &svalBuilder = C.getSValBuilder(); 763 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 764 765 QualType sizeTy = svalBuilder.getContext().getSizeType(); 766 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 767 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); 768 769 SVal maxMinusRight; 770 if (isa<nonloc::ConcreteInt>(right)) { 771 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, 772 sizeTy); 773 } else { 774 // Try switching the operands. (The order of these two assignments is 775 // important!) 776 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 777 sizeTy); 778 left = right; 779 } 780 781 if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) { 782 QualType cmpTy = svalBuilder.getConditionType(); 783 // If left > max - right, we have an overflow. 784 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, 785 *maxMinusRightNL, cmpTy); 786 787 ProgramStateRef stateOverflow, stateOkay; 788 std::tie(stateOverflow, stateOkay) = 789 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>()); 790 791 if (stateOverflow && !stateOkay) { 792 // We have an overflow. Emit a bug report. 793 emitAdditionOverflowBug(C, stateOverflow); 794 return nullptr; 795 } 796 797 // From now on, assume an overflow didn't occur. 798 assert(stateOkay); 799 state = stateOkay; 800 } 801 802 return state; 803 } 804 805 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, 806 const MemRegion *MR, 807 SVal strLength) { 808 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 809 810 MR = MR->StripCasts(); 811 812 switch (MR->getKind()) { 813 case MemRegion::StringRegionKind: 814 // FIXME: This can happen if we strcpy() into a string region. This is 815 // undefined [C99 6.4.5p6], but we should still warn about it. 816 return state; 817 818 case MemRegion::SymbolicRegionKind: 819 case MemRegion::AllocaRegionKind: 820 case MemRegion::NonParamVarRegionKind: 821 case MemRegion::ParamVarRegionKind: 822 case MemRegion::FieldRegionKind: 823 case MemRegion::ObjCIvarRegionKind: 824 // These are the types we can currently track string lengths for. 825 break; 826 827 case MemRegion::ElementRegionKind: 828 // FIXME: Handle element regions by upper-bounding the parent region's 829 // string length. 830 return state; 831 832 default: 833 // Other regions (mostly non-data) can't have a reliable C string length. 834 // For now, just ignore the change. 835 // FIXME: These are rare but not impossible. We should output some kind of 836 // warning for things like strcpy((char[]){'a', 0}, "b"); 837 return state; 838 } 839 840 if (strLength.isUnknown()) 841 return state->remove<CStringLength>(MR); 842 843 return state->set<CStringLength>(MR, strLength); 844 } 845 846 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 847 ProgramStateRef &state, 848 const Expr *Ex, 849 const MemRegion *MR, 850 bool hypothetical) { 851 if (!hypothetical) { 852 // If there's a recorded length, go ahead and return it. 853 const SVal *Recorded = state->get<CStringLength>(MR); 854 if (Recorded) 855 return *Recorded; 856 } 857 858 // Otherwise, get a new symbol and update the state. 859 SValBuilder &svalBuilder = C.getSValBuilder(); 860 QualType sizeTy = svalBuilder.getContext().getSizeType(); 861 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 862 MR, Ex, sizeTy, 863 C.getLocationContext(), 864 C.blockCount()); 865 866 if (!hypothetical) { 867 if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) { 868 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4 869 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 870 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 871 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4); 872 const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt, 873 fourInt); 874 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt); 875 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, 876 maxLength, sizeTy); 877 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true); 878 } 879 state = state->set<CStringLength>(MR, strLength); 880 } 881 882 return strLength; 883 } 884 885 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, 886 const Expr *Ex, SVal Buf, 887 bool hypothetical) const { 888 const MemRegion *MR = Buf.getAsRegion(); 889 if (!MR) { 890 // If we can't get a region, see if it's something we /know/ isn't a 891 // C string. In the context of locations, the only time we can issue such 892 // a warning is for labels. 893 if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) { 894 if (Filter.CheckCStringNotNullTerm) { 895 SmallString<120> buf; 896 llvm::raw_svector_ostream os(buf); 897 assert(CurrentFunctionDescription); 898 os << "Argument to " << CurrentFunctionDescription 899 << " is the address of the label '" << Label->getLabel()->getName() 900 << "', which is not a null-terminated string"; 901 902 emitNotCStringBug(C, state, Ex, os.str()); 903 } 904 return UndefinedVal(); 905 } 906 907 // If it's not a region and not a label, give up. 908 return UnknownVal(); 909 } 910 911 // If we have a region, strip casts from it and see if we can figure out 912 // its length. For anything we can't figure out, just return UnknownVal. 913 MR = MR->StripCasts(); 914 915 switch (MR->getKind()) { 916 case MemRegion::StringRegionKind: { 917 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 918 // so we can assume that the byte length is the correct C string length. 919 SValBuilder &svalBuilder = C.getSValBuilder(); 920 QualType sizeTy = svalBuilder.getContext().getSizeType(); 921 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 922 return svalBuilder.makeIntVal(strLit->getLength(), sizeTy); 923 } 924 case MemRegion::SymbolicRegionKind: 925 case MemRegion::AllocaRegionKind: 926 case MemRegion::NonParamVarRegionKind: 927 case MemRegion::ParamVarRegionKind: 928 case MemRegion::FieldRegionKind: 929 case MemRegion::ObjCIvarRegionKind: 930 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); 931 case MemRegion::CompoundLiteralRegionKind: 932 // FIXME: Can we track this? Is it necessary? 933 return UnknownVal(); 934 case MemRegion::ElementRegionKind: 935 // FIXME: How can we handle this? It's not good enough to subtract the 936 // offset from the base string length; consider "123\x00567" and &a[5]. 937 return UnknownVal(); 938 default: 939 // Other regions (mostly non-data) can't have a reliable C string length. 940 // In this case, an error is emitted and UndefinedVal is returned. 941 // The caller should always be prepared to handle this case. 942 if (Filter.CheckCStringNotNullTerm) { 943 SmallString<120> buf; 944 llvm::raw_svector_ostream os(buf); 945 946 assert(CurrentFunctionDescription); 947 os << "Argument to " << CurrentFunctionDescription << " is "; 948 949 if (SummarizeRegion(os, C.getASTContext(), MR)) 950 os << ", which is not a null-terminated string"; 951 else 952 os << "not a null-terminated string"; 953 954 emitNotCStringBug(C, state, Ex, os.str()); 955 } 956 return UndefinedVal(); 957 } 958 } 959 960 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, 961 ProgramStateRef &state, const Expr *expr, SVal val) const { 962 963 // Get the memory region pointed to by the val. 964 const MemRegion *bufRegion = val.getAsRegion(); 965 if (!bufRegion) 966 return nullptr; 967 968 // Strip casts off the memory region. 969 bufRegion = bufRegion->StripCasts(); 970 971 // Cast the memory region to a string region. 972 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); 973 if (!strRegion) 974 return nullptr; 975 976 // Return the actual string in the string region. 977 return strRegion->getStringLiteral(); 978 } 979 980 bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State, 981 SVal BufVal, QualType BufTy, 982 SVal LengthVal, QualType LengthTy) { 983 // If we do not know that the buffer is long enough we return 'true'. 984 // Otherwise the parent region of this field region would also get 985 // invalidated, which would lead to warnings based on an unknown state. 986 987 if (LengthVal.isUnknown()) 988 return false; 989 990 // Originally copied from CheckBufferAccess and CheckLocation. 991 SValBuilder &SB = C.getSValBuilder(); 992 ASTContext &Ctx = C.getASTContext(); 993 994 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 995 996 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 997 if (!Length) 998 return true; // cf top comment. 999 1000 // Compute the offset of the last element to be accessed: size-1. 1001 NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>(); 1002 SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy); 1003 if (Offset.isUnknown()) 1004 return true; // cf top comment 1005 NonLoc LastOffset = Offset.castAs<NonLoc>(); 1006 1007 // Check that the first buffer is sufficiently long. 1008 SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy); 1009 std::optional<Loc> BufLoc = BufStart.getAs<Loc>(); 1010 if (!BufLoc) 1011 return true; // cf top comment. 1012 1013 SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy); 1014 1015 // Check for out of bound array element access. 1016 const MemRegion *R = BufEnd.getAsRegion(); 1017 if (!R) 1018 return true; // cf top comment. 1019 1020 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 1021 if (!ER) 1022 return true; // cf top comment. 1023 1024 // FIXME: Does this crash when a non-standard definition 1025 // of a library function is encountered? 1026 assert(ER->getValueType() == C.getASTContext().CharTy && 1027 "isFirstBufInBound should only be called with char* ElementRegions"); 1028 1029 // Get the size of the array. 1030 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 1031 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB); 1032 1033 // Get the index of the accessed element. 1034 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); 1035 1036 ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true); 1037 1038 return static_cast<bool>(StInBound); 1039 } 1040 1041 ProgramStateRef CStringChecker::invalidateDestinationBufferBySize( 1042 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV, 1043 SVal SizeV, QualType SizeTy) { 1044 auto InvalidationTraitOperations = 1045 [&C, S, BufTy = BufE->getType(), BufV, SizeV, 1046 SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { 1047 // If destination buffer is a field region and access is in bound, do 1048 // not invalidate its super region. 1049 if (MemRegion::FieldRegionKind == R->getKind() && 1050 isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) { 1051 ITraits.setTrait( 1052 R, 1053 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); 1054 } 1055 return false; 1056 }; 1057 1058 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); 1059 } 1060 1061 ProgramStateRef 1062 CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion( 1063 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) { 1064 auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &, 1065 const MemRegion *R) { 1066 return isa<FieldRegion>(R); 1067 }; 1068 1069 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); 1070 } 1071 1072 ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows( 1073 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) { 1074 auto InvalidationTraitOperations = 1075 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { 1076 if (MemRegion::FieldRegionKind == R->getKind()) 1077 ITraits.setTrait( 1078 R, 1079 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); 1080 return false; 1081 }; 1082 1083 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); 1084 } 1085 1086 ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C, 1087 ProgramStateRef S, 1088 const Expr *BufE, 1089 SVal BufV) { 1090 auto InvalidationTraitOperations = 1091 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { 1092 ITraits.setTrait( 1093 R->getBaseRegion(), 1094 RegionAndSymbolInvalidationTraits::TK_PreserveContents); 1095 ITraits.setTrait(R, 1096 RegionAndSymbolInvalidationTraits::TK_SuppressEscape); 1097 return true; 1098 }; 1099 1100 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); 1101 } 1102 1103 ProgramStateRef CStringChecker::invalidateBufferAux( 1104 CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V, 1105 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &, 1106 const MemRegion *)> 1107 InvalidationTraitOperations) { 1108 std::optional<Loc> L = V.getAs<Loc>(); 1109 if (!L) 1110 return State; 1111 1112 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 1113 // some assumptions about the value that CFRefCount can't. Even so, it should 1114 // probably be refactored. 1115 if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) { 1116 const MemRegion *R = MR->getRegion()->StripCasts(); 1117 1118 // Are we dealing with an ElementRegion? If so, we should be invalidating 1119 // the super-region. 1120 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 1121 R = ER->getSuperRegion(); 1122 // FIXME: What about layers of ElementRegions? 1123 } 1124 1125 // Invalidate this region. 1126 const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); 1127 RegionAndSymbolInvalidationTraits ITraits; 1128 bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R); 1129 1130 return State->invalidateRegions(R, E, C.blockCount(), LCtx, 1131 CausesPointerEscape, nullptr, nullptr, 1132 &ITraits); 1133 } 1134 1135 // If we have a non-region value by chance, just remove the binding. 1136 // FIXME: is this necessary or correct? This handles the non-Region 1137 // cases. Is it ever valid to store to these? 1138 return State->killBinding(*L); 1139 } 1140 1141 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 1142 const MemRegion *MR) { 1143 switch (MR->getKind()) { 1144 case MemRegion::FunctionCodeRegionKind: { 1145 if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl()) 1146 os << "the address of the function '" << *FD << '\''; 1147 else 1148 os << "the address of a function"; 1149 return true; 1150 } 1151 case MemRegion::BlockCodeRegionKind: 1152 os << "block text"; 1153 return true; 1154 case MemRegion::BlockDataRegionKind: 1155 os << "a block"; 1156 return true; 1157 case MemRegion::CXXThisRegionKind: 1158 case MemRegion::CXXTempObjectRegionKind: 1159 os << "a C++ temp object of type " 1160 << cast<TypedValueRegion>(MR)->getValueType(); 1161 return true; 1162 case MemRegion::NonParamVarRegionKind: 1163 os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType(); 1164 return true; 1165 case MemRegion::ParamVarRegionKind: 1166 os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType(); 1167 return true; 1168 case MemRegion::FieldRegionKind: 1169 os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType(); 1170 return true; 1171 case MemRegion::ObjCIvarRegionKind: 1172 os << "an instance variable of type " 1173 << cast<TypedValueRegion>(MR)->getValueType(); 1174 return true; 1175 default: 1176 return false; 1177 } 1178 } 1179 1180 bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal, 1181 const Expr *Size, CheckerContext &C, 1182 ProgramStateRef &State) { 1183 SVal MemVal = C.getSVal(DstBuffer); 1184 SVal SizeVal = C.getSVal(Size); 1185 const MemRegion *MR = MemVal.getAsRegion(); 1186 if (!MR) 1187 return false; 1188 1189 // We're about to model memset by producing a "default binding" in the Store. 1190 // Our current implementation - RegionStore - doesn't support default bindings 1191 // that don't cover the whole base region. So we should first get the offset 1192 // and the base region to figure out whether the offset of buffer is 0. 1193 RegionOffset Offset = MR->getAsOffset(); 1194 const MemRegion *BR = Offset.getRegion(); 1195 1196 std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>(); 1197 if (!SizeNL) 1198 return false; 1199 1200 SValBuilder &svalBuilder = C.getSValBuilder(); 1201 ASTContext &Ctx = C.getASTContext(); 1202 1203 // void *memset(void *dest, int ch, size_t count); 1204 // For now we can only handle the case of offset is 0 and concrete char value. 1205 if (Offset.isValid() && !Offset.hasSymbolicOffset() && 1206 Offset.getOffset() == 0) { 1207 // Get the base region's size. 1208 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder); 1209 1210 ProgramStateRef StateWholeReg, StateNotWholeReg; 1211 std::tie(StateWholeReg, StateNotWholeReg) = 1212 State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL)); 1213 1214 // With the semantic of 'memset()', we should convert the CharVal to 1215 // unsigned char. 1216 CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy); 1217 1218 ProgramStateRef StateNullChar, StateNonNullChar; 1219 std::tie(StateNullChar, StateNonNullChar) = 1220 assumeZero(C, State, CharVal, Ctx.UnsignedCharTy); 1221 1222 if (StateWholeReg && !StateNotWholeReg && StateNullChar && 1223 !StateNonNullChar) { 1224 // If the 'memset()' acts on the whole region of destination buffer and 1225 // the value of the second argument of 'memset()' is zero, bind the second 1226 // argument's value to the destination buffer with 'default binding'. 1227 // FIXME: Since there is no perfect way to bind the non-zero character, we 1228 // can only deal with zero value here. In the future, we need to deal with 1229 // the binding of non-zero value in the case of whole region. 1230 State = State->bindDefaultZero(svalBuilder.makeLoc(BR), 1231 C.getLocationContext()); 1232 } else { 1233 // If the destination buffer's extent is not equal to the value of 1234 // third argument, just invalidate buffer. 1235 State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal, 1236 SizeVal, Size->getType()); 1237 } 1238 1239 if (StateNullChar && !StateNonNullChar) { 1240 // If the value of the second argument of 'memset()' is zero, set the 1241 // string length of destination buffer to 0 directly. 1242 State = setCStringLength(State, MR, 1243 svalBuilder.makeZeroVal(Ctx.getSizeType())); 1244 } else if (!StateNullChar && StateNonNullChar) { 1245 SVal NewStrLen = svalBuilder.getMetadataSymbolVal( 1246 CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(), 1247 C.getLocationContext(), C.blockCount()); 1248 1249 // If the value of second argument is not zero, then the string length 1250 // is at least the size argument. 1251 SVal NewStrLenGESize = svalBuilder.evalBinOp( 1252 State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType()); 1253 1254 State = setCStringLength( 1255 State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true), 1256 MR, NewStrLen); 1257 } 1258 } else { 1259 // If the offset is not zero and char value is not concrete, we can do 1260 // nothing but invalidate the buffer. 1261 State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal, 1262 SizeVal, Size->getType()); 1263 } 1264 return true; 1265 } 1266 1267 //===----------------------------------------------------------------------===// 1268 // evaluation of individual function calls. 1269 //===----------------------------------------------------------------------===// 1270 1271 void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE, 1272 ProgramStateRef state, SizeArgExpr Size, 1273 DestinationArgExpr Dest, 1274 SourceArgExpr Source, bool Restricted, 1275 bool IsMempcpy, CharKind CK) const { 1276 CurrentFunctionDescription = "memory copy function"; 1277 1278 // See if the size argument is zero. 1279 const LocationContext *LCtx = C.getLocationContext(); 1280 SVal sizeVal = state->getSVal(Size.Expression, LCtx); 1281 QualType sizeTy = Size.Expression->getType(); 1282 1283 ProgramStateRef stateZeroSize, stateNonZeroSize; 1284 std::tie(stateZeroSize, stateNonZeroSize) = 1285 assumeZero(C, state, sizeVal, sizeTy); 1286 1287 // Get the value of the Dest. 1288 SVal destVal = state->getSVal(Dest.Expression, LCtx); 1289 1290 // If the size is zero, there won't be any actual memory access, so 1291 // just bind the return value to the destination buffer and return. 1292 if (stateZeroSize && !stateNonZeroSize) { 1293 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal); 1294 C.addTransition(stateZeroSize); 1295 return; 1296 } 1297 1298 // If the size can be nonzero, we have to check the other arguments. 1299 if (stateNonZeroSize) { 1300 state = stateNonZeroSize; 1301 1302 // Ensure the destination is not null. If it is NULL there will be a 1303 // NULL pointer dereference. 1304 state = checkNonNull(C, state, Dest, destVal); 1305 if (!state) 1306 return; 1307 1308 // Get the value of the Src. 1309 SVal srcVal = state->getSVal(Source.Expression, LCtx); 1310 1311 // Ensure the source is not null. If it is NULL there will be a 1312 // NULL pointer dereference. 1313 state = checkNonNull(C, state, Source, srcVal); 1314 if (!state) 1315 return; 1316 1317 // Ensure the accesses are valid and that the buffers do not overlap. 1318 state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK); 1319 state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK); 1320 1321 if (Restricted) 1322 state = CheckOverlap(C, state, Size, Dest, Source, CK); 1323 1324 if (!state) 1325 return; 1326 1327 // If this is mempcpy, get the byte after the last byte copied and 1328 // bind the expr. 1329 if (IsMempcpy) { 1330 // Get the byte after the last byte copied. 1331 SValBuilder &SvalBuilder = C.getSValBuilder(); 1332 ASTContext &Ctx = SvalBuilder.getContext(); 1333 QualType CharPtrTy = getCharPtrType(Ctx, CK); 1334 SVal DestRegCharVal = 1335 SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType()); 1336 SVal lastElement = C.getSValBuilder().evalBinOp( 1337 state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType()); 1338 // If we don't know how much we copied, we can at least 1339 // conjure a return value for later. 1340 if (lastElement.isUnknown()) 1341 lastElement = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1342 C.blockCount()); 1343 1344 // The byte after the last byte copied is the return value. 1345 state = state->BindExpr(CE, LCtx, lastElement); 1346 } else { 1347 // All other copies return the destination buffer. 1348 // (Well, bcopy() has a void return type, but this won't hurt.) 1349 state = state->BindExpr(CE, LCtx, destVal); 1350 } 1351 1352 // Invalidate the destination (regular invalidation without pointer-escaping 1353 // the address of the top-level region). 1354 // FIXME: Even if we can't perfectly model the copy, we should see if we 1355 // can use LazyCompoundVals to copy the source values into the destination. 1356 // This would probably remove any existing bindings past the end of the 1357 // copied region, but that's still an improvement over blank invalidation. 1358 state = invalidateDestinationBufferBySize( 1359 C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal, 1360 Size.Expression->getType()); 1361 1362 // Invalidate the source (const-invalidation without const-pointer-escaping 1363 // the address of the top-level region). 1364 state = invalidateSourceBuffer(C, state, Source.Expression, 1365 C.getSVal(Source.Expression)); 1366 1367 C.addTransition(state); 1368 } 1369 } 1370 1371 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE, 1372 CharKind CK) const { 1373 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 1374 // The return value is the address of the destination buffer. 1375 DestinationArgExpr Dest = {{CE->getArg(0), 0}}; 1376 SourceArgExpr Src = {{CE->getArg(1), 1}}; 1377 SizeArgExpr Size = {{CE->getArg(2), 2}}; 1378 1379 ProgramStateRef State = C.getState(); 1380 1381 constexpr bool IsRestricted = true; 1382 constexpr bool IsMempcpy = false; 1383 evalCopyCommon(C, CE, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK); 1384 } 1385 1386 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE, 1387 CharKind CK) const { 1388 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); 1389 // The return value is a pointer to the byte following the last written byte. 1390 DestinationArgExpr Dest = {{CE->getArg(0), 0}}; 1391 SourceArgExpr Src = {{CE->getArg(1), 1}}; 1392 SizeArgExpr Size = {{CE->getArg(2), 2}}; 1393 1394 constexpr bool IsRestricted = true; 1395 constexpr bool IsMempcpy = true; 1396 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, 1397 CK); 1398 } 1399 1400 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE, 1401 CharKind CK) const { 1402 // void *memmove(void *dst, const void *src, size_t n); 1403 // The return value is the address of the destination buffer. 1404 DestinationArgExpr Dest = {{CE->getArg(0), 0}}; 1405 SourceArgExpr Src = {{CE->getArg(1), 1}}; 1406 SizeArgExpr Size = {{CE->getArg(2), 2}}; 1407 1408 constexpr bool IsRestricted = false; 1409 constexpr bool IsMempcpy = false; 1410 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, 1411 CK); 1412 } 1413 1414 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 1415 // void bcopy(const void *src, void *dst, size_t n); 1416 SourceArgExpr Src{{CE->getArg(0), 0}}; 1417 DestinationArgExpr Dest = {{CE->getArg(1), 1}}; 1418 SizeArgExpr Size = {{CE->getArg(2), 2}}; 1419 1420 constexpr bool IsRestricted = false; 1421 constexpr bool IsMempcpy = false; 1422 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, 1423 CharKind::Regular); 1424 } 1425 1426 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE, 1427 CharKind CK) const { 1428 // int memcmp(const void *s1, const void *s2, size_t n); 1429 CurrentFunctionDescription = "memory comparison function"; 1430 1431 AnyArgExpr Left = {CE->getArg(0), 0}; 1432 AnyArgExpr Right = {CE->getArg(1), 1}; 1433 SizeArgExpr Size = {{CE->getArg(2), 2}}; 1434 1435 ProgramStateRef State = C.getState(); 1436 SValBuilder &Builder = C.getSValBuilder(); 1437 const LocationContext *LCtx = C.getLocationContext(); 1438 1439 // See if the size argument is zero. 1440 SVal sizeVal = State->getSVal(Size.Expression, LCtx); 1441 QualType sizeTy = Size.Expression->getType(); 1442 1443 ProgramStateRef stateZeroSize, stateNonZeroSize; 1444 std::tie(stateZeroSize, stateNonZeroSize) = 1445 assumeZero(C, State, sizeVal, sizeTy); 1446 1447 // If the size can be zero, the result will be 0 in that case, and we don't 1448 // have to check either of the buffers. 1449 if (stateZeroSize) { 1450 State = stateZeroSize; 1451 State = State->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType())); 1452 C.addTransition(State); 1453 } 1454 1455 // If the size can be nonzero, we have to check the other arguments. 1456 if (stateNonZeroSize) { 1457 State = stateNonZeroSize; 1458 // If we know the two buffers are the same, we know the result is 0. 1459 // First, get the two buffers' addresses. Another checker will have already 1460 // made sure they're not undefined. 1461 DefinedOrUnknownSVal LV = 1462 State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>(); 1463 DefinedOrUnknownSVal RV = 1464 State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>(); 1465 1466 // See if they are the same. 1467 ProgramStateRef SameBuffer, NotSameBuffer; 1468 std::tie(SameBuffer, NotSameBuffer) = 1469 State->assume(Builder.evalEQ(State, LV, RV)); 1470 1471 // If the two arguments are the same buffer, we know the result is 0, 1472 // and we only need to check one size. 1473 if (SameBuffer && !NotSameBuffer) { 1474 State = SameBuffer; 1475 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read); 1476 if (State) { 1477 State = 1478 SameBuffer->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType())); 1479 C.addTransition(State); 1480 } 1481 return; 1482 } 1483 1484 // If the two arguments might be different buffers, we have to check 1485 // the size of both of them. 1486 assert(NotSameBuffer); 1487 State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK); 1488 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK); 1489 if (State) { 1490 // The return value is the comparison result, which we don't know. 1491 SVal CmpV = Builder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 1492 State = State->BindExpr(CE, LCtx, CmpV); 1493 C.addTransition(State); 1494 } 1495 } 1496 } 1497 1498 void CStringChecker::evalstrLength(CheckerContext &C, 1499 const CallExpr *CE) const { 1500 // size_t strlen(const char *s); 1501 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 1502 } 1503 1504 void CStringChecker::evalstrnLength(CheckerContext &C, 1505 const CallExpr *CE) const { 1506 // size_t strnlen(const char *s, size_t maxlen); 1507 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 1508 } 1509 1510 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 1511 bool IsStrnlen) const { 1512 CurrentFunctionDescription = "string length function"; 1513 ProgramStateRef state = C.getState(); 1514 const LocationContext *LCtx = C.getLocationContext(); 1515 1516 if (IsStrnlen) { 1517 const Expr *maxlenExpr = CE->getArg(1); 1518 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1519 1520 ProgramStateRef stateZeroSize, stateNonZeroSize; 1521 std::tie(stateZeroSize, stateNonZeroSize) = 1522 assumeZero(C, state, maxlenVal, maxlenExpr->getType()); 1523 1524 // If the size can be zero, the result will be 0 in that case, and we don't 1525 // have to check the string itself. 1526 if (stateZeroSize) { 1527 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); 1528 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero); 1529 C.addTransition(stateZeroSize); 1530 } 1531 1532 // If the size is GUARANTEED to be zero, we're done! 1533 if (!stateNonZeroSize) 1534 return; 1535 1536 // Otherwise, record the assumption that the size is nonzero. 1537 state = stateNonZeroSize; 1538 } 1539 1540 // Check that the string argument is non-null. 1541 AnyArgExpr Arg = {CE->getArg(0), 0}; 1542 SVal ArgVal = state->getSVal(Arg.Expression, LCtx); 1543 state = checkNonNull(C, state, Arg, ArgVal); 1544 1545 if (!state) 1546 return; 1547 1548 SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal); 1549 1550 // If the argument isn't a valid C string, there's no valid state to 1551 // transition to. 1552 if (strLength.isUndef()) 1553 return; 1554 1555 DefinedOrUnknownSVal result = UnknownVal(); 1556 1557 // If the check is for strnlen() then bind the return value to no more than 1558 // the maxlen value. 1559 if (IsStrnlen) { 1560 QualType cmpTy = C.getSValBuilder().getConditionType(); 1561 1562 // It's a little unfortunate to be getting this again, 1563 // but it's not that expensive... 1564 const Expr *maxlenExpr = CE->getArg(1); 1565 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1566 1567 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1568 std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>(); 1569 1570 if (strLengthNL && maxlenValNL) { 1571 ProgramStateRef stateStringTooLong, stateStringNotTooLong; 1572 1573 // Check if the strLength is greater than the maxlen. 1574 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume( 1575 C.getSValBuilder() 1576 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy) 1577 .castAs<DefinedOrUnknownSVal>()); 1578 1579 if (stateStringTooLong && !stateStringNotTooLong) { 1580 // If the string is longer than maxlen, return maxlen. 1581 result = *maxlenValNL; 1582 } else if (stateStringNotTooLong && !stateStringTooLong) { 1583 // If the string is shorter than maxlen, return its length. 1584 result = *strLengthNL; 1585 } 1586 } 1587 1588 if (result.isUnknown()) { 1589 // If we don't have enough information for a comparison, there's 1590 // no guarantee the full string length will actually be returned. 1591 // All we know is the return value is the min of the string length 1592 // and the limit. This is better than nothing. 1593 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1594 C.blockCount()); 1595 NonLoc resultNL = result.castAs<NonLoc>(); 1596 1597 if (strLengthNL) { 1598 state = state->assume(C.getSValBuilder().evalBinOpNN( 1599 state, BO_LE, resultNL, *strLengthNL, cmpTy) 1600 .castAs<DefinedOrUnknownSVal>(), true); 1601 } 1602 1603 if (maxlenValNL) { 1604 state = state->assume(C.getSValBuilder().evalBinOpNN( 1605 state, BO_LE, resultNL, *maxlenValNL, cmpTy) 1606 .castAs<DefinedOrUnknownSVal>(), true); 1607 } 1608 } 1609 1610 } else { 1611 // This is a plain strlen(), not strnlen(). 1612 result = strLength.castAs<DefinedOrUnknownSVal>(); 1613 1614 // If we don't know the length of the string, conjure a return 1615 // value, so it can be used in constraints, at least. 1616 if (result.isUnknown()) { 1617 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1618 C.blockCount()); 1619 } 1620 } 1621 1622 // Bind the return value. 1623 assert(!result.isUnknown() && "Should have conjured a value by now"); 1624 state = state->BindExpr(CE, LCtx, result); 1625 C.addTransition(state); 1626 } 1627 1628 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 1629 // char *strcpy(char *restrict dst, const char *restrict src); 1630 evalStrcpyCommon(C, CE, 1631 /* ReturnEnd = */ false, 1632 /* IsBounded = */ false, 1633 /* appendK = */ ConcatFnKind::none); 1634 } 1635 1636 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 1637 // char *strncpy(char *restrict dst, const char *restrict src, size_t n); 1638 evalStrcpyCommon(C, CE, 1639 /* ReturnEnd = */ false, 1640 /* IsBounded = */ true, 1641 /* appendK = */ ConcatFnKind::none); 1642 } 1643 1644 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 1645 // char *stpcpy(char *restrict dst, const char *restrict src); 1646 evalStrcpyCommon(C, CE, 1647 /* ReturnEnd = */ true, 1648 /* IsBounded = */ false, 1649 /* appendK = */ ConcatFnKind::none); 1650 } 1651 1652 void CStringChecker::evalStrlcpy(CheckerContext &C, const CallExpr *CE) const { 1653 // size_t strlcpy(char *dest, const char *src, size_t size); 1654 evalStrcpyCommon(C, CE, 1655 /* ReturnEnd = */ true, 1656 /* IsBounded = */ true, 1657 /* appendK = */ ConcatFnKind::none, 1658 /* returnPtr = */ false); 1659 } 1660 1661 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { 1662 // char *strcat(char *restrict s1, const char *restrict s2); 1663 evalStrcpyCommon(C, CE, 1664 /* ReturnEnd = */ false, 1665 /* IsBounded = */ false, 1666 /* appendK = */ ConcatFnKind::strcat); 1667 } 1668 1669 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { 1670 // char *strncat(char *restrict s1, const char *restrict s2, size_t n); 1671 evalStrcpyCommon(C, CE, 1672 /* ReturnEnd = */ false, 1673 /* IsBounded = */ true, 1674 /* appendK = */ ConcatFnKind::strcat); 1675 } 1676 1677 void CStringChecker::evalStrlcat(CheckerContext &C, const CallExpr *CE) const { 1678 // size_t strlcat(char *dst, const char *src, size_t size); 1679 // It will append at most size - strlen(dst) - 1 bytes, 1680 // NULL-terminating the result. 1681 evalStrcpyCommon(C, CE, 1682 /* ReturnEnd = */ false, 1683 /* IsBounded = */ true, 1684 /* appendK = */ ConcatFnKind::strlcat, 1685 /* returnPtr = */ false); 1686 } 1687 1688 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 1689 bool ReturnEnd, bool IsBounded, 1690 ConcatFnKind appendK, 1691 bool returnPtr) const { 1692 if (appendK == ConcatFnKind::none) 1693 CurrentFunctionDescription = "string copy function"; 1694 else 1695 CurrentFunctionDescription = "string concatenation function"; 1696 1697 ProgramStateRef state = C.getState(); 1698 const LocationContext *LCtx = C.getLocationContext(); 1699 1700 // Check that the destination is non-null. 1701 DestinationArgExpr Dst = {{CE->getArg(0), 0}}; 1702 SVal DstVal = state->getSVal(Dst.Expression, LCtx); 1703 state = checkNonNull(C, state, Dst, DstVal); 1704 if (!state) 1705 return; 1706 1707 // Check that the source is non-null. 1708 SourceArgExpr srcExpr = {{CE->getArg(1), 1}}; 1709 SVal srcVal = state->getSVal(srcExpr.Expression, LCtx); 1710 state = checkNonNull(C, state, srcExpr, srcVal); 1711 if (!state) 1712 return; 1713 1714 // Get the string length of the source. 1715 SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal); 1716 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1717 1718 // Get the string length of the destination buffer. 1719 SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal); 1720 std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>(); 1721 1722 // If the source isn't a valid C string, give up. 1723 if (strLength.isUndef()) 1724 return; 1725 1726 SValBuilder &svalBuilder = C.getSValBuilder(); 1727 QualType cmpTy = svalBuilder.getConditionType(); 1728 QualType sizeTy = svalBuilder.getContext().getSizeType(); 1729 1730 // These two values allow checking two kinds of errors: 1731 // - actual overflows caused by a source that doesn't fit in the destination 1732 // - potential overflows caused by a bound that could exceed the destination 1733 SVal amountCopied = UnknownVal(); 1734 SVal maxLastElementIndex = UnknownVal(); 1735 const char *boundWarning = nullptr; 1736 1737 // FIXME: Why do we choose the srcExpr if the access has no size? 1738 // Note that the 3rd argument of the call would be the size parameter. 1739 SizeArgExpr SrcExprAsSizeDummy = { 1740 {srcExpr.Expression, srcExpr.ArgumentIndex}}; 1741 state = CheckOverlap( 1742 C, state, 1743 (IsBounded ? SizeArgExpr{{CE->getArg(2), 2}} : SrcExprAsSizeDummy), Dst, 1744 srcExpr); 1745 1746 if (!state) 1747 return; 1748 1749 // If the function is strncpy, strncat, etc... it is bounded. 1750 if (IsBounded) { 1751 // Get the max number of characters to copy. 1752 SizeArgExpr lenExpr = {{CE->getArg(2), 2}}; 1753 SVal lenVal = state->getSVal(lenExpr.Expression, LCtx); 1754 1755 // Protect against misdeclared strncpy(). 1756 lenVal = 1757 svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType()); 1758 1759 std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>(); 1760 1761 // If we know both values, we might be able to figure out how much 1762 // we're copying. 1763 if (strLengthNL && lenValNL) { 1764 switch (appendK) { 1765 case ConcatFnKind::none: 1766 case ConcatFnKind::strcat: { 1767 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong; 1768 // Check if the max number to copy is less than the length of the src. 1769 // If the bound is equal to the source length, strncpy won't null- 1770 // terminate the result! 1771 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume( 1772 svalBuilder 1773 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy) 1774 .castAs<DefinedOrUnknownSVal>()); 1775 1776 if (stateSourceTooLong && !stateSourceNotTooLong) { 1777 // Max number to copy is less than the length of the src, so the 1778 // actual strLength copied is the max number arg. 1779 state = stateSourceTooLong; 1780 amountCopied = lenVal; 1781 1782 } else if (!stateSourceTooLong && stateSourceNotTooLong) { 1783 // The source buffer entirely fits in the bound. 1784 state = stateSourceNotTooLong; 1785 amountCopied = strLength; 1786 } 1787 break; 1788 } 1789 case ConcatFnKind::strlcat: 1790 if (!dstStrLengthNL) 1791 return; 1792 1793 // amountCopied = min (size - dstLen - 1 , srcLen) 1794 SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, 1795 *dstStrLengthNL, sizeTy); 1796 if (!isa<NonLoc>(freeSpace)) 1797 return; 1798 freeSpace = 1799 svalBuilder.evalBinOp(state, BO_Sub, freeSpace, 1800 svalBuilder.makeIntVal(1, sizeTy), sizeTy); 1801 std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>(); 1802 1803 // While unlikely, it is possible that the subtraction is 1804 // too complex to compute, let's check whether it succeeded. 1805 if (!freeSpaceNL) 1806 return; 1807 SVal hasEnoughSpace = svalBuilder.evalBinOpNN( 1808 state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy); 1809 1810 ProgramStateRef TrueState, FalseState; 1811 std::tie(TrueState, FalseState) = 1812 state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>()); 1813 1814 // srcStrLength <= size - dstStrLength -1 1815 if (TrueState && !FalseState) { 1816 amountCopied = strLength; 1817 } 1818 1819 // srcStrLength > size - dstStrLength -1 1820 if (!TrueState && FalseState) { 1821 amountCopied = freeSpace; 1822 } 1823 1824 if (TrueState && FalseState) 1825 amountCopied = UnknownVal(); 1826 break; 1827 } 1828 } 1829 // We still want to know if the bound is known to be too large. 1830 if (lenValNL) { 1831 switch (appendK) { 1832 case ConcatFnKind::strcat: 1833 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst) 1834 1835 // Get the string length of the destination. If the destination is 1836 // memory that can't have a string length, we shouldn't be copying 1837 // into it anyway. 1838 if (dstStrLength.isUndef()) 1839 return; 1840 1841 if (dstStrLengthNL) { 1842 maxLastElementIndex = svalBuilder.evalBinOpNN( 1843 state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy); 1844 1845 boundWarning = "Size argument is greater than the free space in the " 1846 "destination buffer"; 1847 } 1848 break; 1849 case ConcatFnKind::none: 1850 case ConcatFnKind::strlcat: 1851 // For strncpy and strlcat, this is just checking 1852 // that lenVal <= sizeof(dst). 1853 // (Yes, strncpy and strncat differ in how they treat termination. 1854 // strncat ALWAYS terminates, but strncpy doesn't.) 1855 1856 // We need a special case for when the copy size is zero, in which 1857 // case strncpy will do no work at all. Our bounds check uses n-1 1858 // as the last element accessed, so n == 0 is problematic. 1859 ProgramStateRef StateZeroSize, StateNonZeroSize; 1860 std::tie(StateZeroSize, StateNonZeroSize) = 1861 assumeZero(C, state, *lenValNL, sizeTy); 1862 1863 // If the size is known to be zero, we're done. 1864 if (StateZeroSize && !StateNonZeroSize) { 1865 if (returnPtr) { 1866 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal); 1867 } else { 1868 if (appendK == ConcatFnKind::none) { 1869 // strlcpy returns strlen(src) 1870 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, strLength); 1871 } else { 1872 // strlcat returns strlen(src) + strlen(dst) 1873 SVal retSize = svalBuilder.evalBinOp( 1874 state, BO_Add, strLength, dstStrLength, sizeTy); 1875 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, retSize); 1876 } 1877 } 1878 C.addTransition(StateZeroSize); 1879 return; 1880 } 1881 1882 // Otherwise, go ahead and figure out the last element we'll touch. 1883 // We don't record the non-zero assumption here because we can't 1884 // be sure. We won't warn on a possible zero. 1885 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 1886 maxLastElementIndex = 1887 svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy); 1888 boundWarning = "Size argument is greater than the length of the " 1889 "destination buffer"; 1890 break; 1891 } 1892 } 1893 } else { 1894 // The function isn't bounded. The amount copied should match the length 1895 // of the source buffer. 1896 amountCopied = strLength; 1897 } 1898 1899 assert(state); 1900 1901 // This represents the number of characters copied into the destination 1902 // buffer. (It may not actually be the strlen if the destination buffer 1903 // is not terminated.) 1904 SVal finalStrLength = UnknownVal(); 1905 SVal strlRetVal = UnknownVal(); 1906 1907 if (appendK == ConcatFnKind::none && !returnPtr) { 1908 // strlcpy returns the sizeof(src) 1909 strlRetVal = strLength; 1910 } 1911 1912 // If this is an appending function (strcat, strncat...) then set the 1913 // string length to strlen(src) + strlen(dst) since the buffer will 1914 // ultimately contain both. 1915 if (appendK != ConcatFnKind::none) { 1916 // Get the string length of the destination. If the destination is memory 1917 // that can't have a string length, we shouldn't be copying into it anyway. 1918 if (dstStrLength.isUndef()) 1919 return; 1920 1921 if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) { 1922 strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL, 1923 *dstStrLengthNL, sizeTy); 1924 } 1925 1926 std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>(); 1927 1928 // If we know both string lengths, we might know the final string length. 1929 if (amountCopiedNL && dstStrLengthNL) { 1930 // Make sure the two lengths together don't overflow a size_t. 1931 state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL); 1932 if (!state) 1933 return; 1934 1935 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL, 1936 *dstStrLengthNL, sizeTy); 1937 } 1938 1939 // If we couldn't get a single value for the final string length, 1940 // we can at least bound it by the individual lengths. 1941 if (finalStrLength.isUnknown()) { 1942 // Try to get a "hypothetical" string length symbol, which we can later 1943 // set as a real value if that turns out to be the case. 1944 finalStrLength = getCStringLength(C, state, CE, DstVal, true); 1945 assert(!finalStrLength.isUndef()); 1946 1947 if (std::optional<NonLoc> finalStrLengthNL = 1948 finalStrLength.getAs<NonLoc>()) { 1949 if (amountCopiedNL && appendK == ConcatFnKind::none) { 1950 // we overwrite dst string with the src 1951 // finalStrLength >= srcStrLength 1952 SVal sourceInResult = svalBuilder.evalBinOpNN( 1953 state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy); 1954 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(), 1955 true); 1956 if (!state) 1957 return; 1958 } 1959 1960 if (dstStrLengthNL && appendK != ConcatFnKind::none) { 1961 // we extend the dst string with the src 1962 // finalStrLength >= dstStrLength 1963 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1964 *finalStrLengthNL, 1965 *dstStrLengthNL, 1966 cmpTy); 1967 state = 1968 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true); 1969 if (!state) 1970 return; 1971 } 1972 } 1973 } 1974 1975 } else { 1976 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and 1977 // the final string length will match the input string length. 1978 finalStrLength = amountCopied; 1979 } 1980 1981 SVal Result; 1982 1983 if (returnPtr) { 1984 // The final result of the function will either be a pointer past the last 1985 // copied element, or a pointer to the start of the destination buffer. 1986 Result = (ReturnEnd ? UnknownVal() : DstVal); 1987 } else { 1988 if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none) 1989 //strlcpy, strlcat 1990 Result = strlRetVal; 1991 else 1992 Result = finalStrLength; 1993 } 1994 1995 assert(state); 1996 1997 // If the destination is a MemRegion, try to check for a buffer overflow and 1998 // record the new string length. 1999 if (std::optional<loc::MemRegionVal> dstRegVal = 2000 DstVal.getAs<loc::MemRegionVal>()) { 2001 QualType ptrTy = Dst.Expression->getType(); 2002 2003 // If we have an exact value on a bounded copy, use that to check for 2004 // overflows, rather than our estimate about how much is actually copied. 2005 if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) { 2006 SVal maxLastElement = 2007 svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy); 2008 2009 state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write); 2010 if (!state) 2011 return; 2012 } 2013 2014 // Then, if the final length is known... 2015 if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) { 2016 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 2017 *knownStrLength, ptrTy); 2018 2019 // ...and we haven't checked the bound, we'll check the actual copy. 2020 if (!boundWarning) { 2021 state = CheckLocation(C, state, Dst, lastElement, AccessKind::write); 2022 if (!state) 2023 return; 2024 } 2025 2026 // If this is a stpcpy-style copy, the last element is the return value. 2027 if (returnPtr && ReturnEnd) 2028 Result = lastElement; 2029 } 2030 2031 // Invalidate the destination (regular invalidation without pointer-escaping 2032 // the address of the top-level region). This must happen before we set the 2033 // C string length because invalidation will clear the length. 2034 // FIXME: Even if we can't perfectly model the copy, we should see if we 2035 // can use LazyCompoundVals to copy the source values into the destination. 2036 // This would probably remove any existing bindings past the end of the 2037 // string, but that's still an improvement over blank invalidation. 2038 state = invalidateDestinationBufferBySize(C, state, Dst.Expression, 2039 *dstRegVal, amountCopied, 2040 C.getASTContext().getSizeType()); 2041 2042 // Invalidate the source (const-invalidation without const-pointer-escaping 2043 // the address of the top-level region). 2044 state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal); 2045 2046 // Set the C string length of the destination, if we know it. 2047 if (IsBounded && (appendK == ConcatFnKind::none)) { 2048 // strncpy is annoying in that it doesn't guarantee to null-terminate 2049 // the result string. If the original string didn't fit entirely inside 2050 // the bound (including the null-terminator), we don't know how long the 2051 // result is. 2052 if (amountCopied != strLength) 2053 finalStrLength = UnknownVal(); 2054 } 2055 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); 2056 } 2057 2058 assert(state); 2059 2060 if (returnPtr) { 2061 // If this is a stpcpy-style copy, but we were unable to check for a buffer 2062 // overflow, we still need a result. Conjure a return value. 2063 if (ReturnEnd && Result.isUnknown()) { 2064 Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 2065 } 2066 } 2067 // Set the return value. 2068 state = state->BindExpr(CE, LCtx, Result); 2069 C.addTransition(state); 2070 } 2071 2072 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { 2073 //int strcmp(const char *s1, const char *s2); 2074 evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ false); 2075 } 2076 2077 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { 2078 //int strncmp(const char *s1, const char *s2, size_t n); 2079 evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ false); 2080 } 2081 2082 void CStringChecker::evalStrcasecmp(CheckerContext &C, 2083 const CallExpr *CE) const { 2084 //int strcasecmp(const char *s1, const char *s2); 2085 evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ true); 2086 } 2087 2088 void CStringChecker::evalStrncasecmp(CheckerContext &C, 2089 const CallExpr *CE) const { 2090 //int strncasecmp(const char *s1, const char *s2, size_t n); 2091 evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ true); 2092 } 2093 2094 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 2095 bool IsBounded, bool IgnoreCase) const { 2096 CurrentFunctionDescription = "string comparison function"; 2097 ProgramStateRef state = C.getState(); 2098 const LocationContext *LCtx = C.getLocationContext(); 2099 2100 // Check that the first string is non-null 2101 AnyArgExpr Left = {CE->getArg(0), 0}; 2102 SVal LeftVal = state->getSVal(Left.Expression, LCtx); 2103 state = checkNonNull(C, state, Left, LeftVal); 2104 if (!state) 2105 return; 2106 2107 // Check that the second string is non-null. 2108 AnyArgExpr Right = {CE->getArg(1), 1}; 2109 SVal RightVal = state->getSVal(Right.Expression, LCtx); 2110 state = checkNonNull(C, state, Right, RightVal); 2111 if (!state) 2112 return; 2113 2114 // Get the string length of the first string or give up. 2115 SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal); 2116 if (LeftLength.isUndef()) 2117 return; 2118 2119 // Get the string length of the second string or give up. 2120 SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal); 2121 if (RightLength.isUndef()) 2122 return; 2123 2124 // If we know the two buffers are the same, we know the result is 0. 2125 // First, get the two buffers' addresses. Another checker will have already 2126 // made sure they're not undefined. 2127 DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>(); 2128 DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>(); 2129 2130 // See if they are the same. 2131 SValBuilder &svalBuilder = C.getSValBuilder(); 2132 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 2133 ProgramStateRef StSameBuf, StNotSameBuf; 2134 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 2135 2136 // If the two arguments might be the same buffer, we know the result is 0, 2137 // and we only need to check one size. 2138 if (StSameBuf) { 2139 StSameBuf = StSameBuf->BindExpr(CE, LCtx, 2140 svalBuilder.makeZeroVal(CE->getType())); 2141 C.addTransition(StSameBuf); 2142 2143 // If the two arguments are GUARANTEED to be the same, we're done! 2144 if (!StNotSameBuf) 2145 return; 2146 } 2147 2148 assert(StNotSameBuf); 2149 state = StNotSameBuf; 2150 2151 // At this point we can go about comparing the two buffers. 2152 // For now, we only do this if they're both known string literals. 2153 2154 // Attempt to extract string literals from both expressions. 2155 const StringLiteral *LeftStrLiteral = 2156 getCStringLiteral(C, state, Left.Expression, LeftVal); 2157 const StringLiteral *RightStrLiteral = 2158 getCStringLiteral(C, state, Right.Expression, RightVal); 2159 bool canComputeResult = false; 2160 SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, 2161 C.blockCount()); 2162 2163 if (LeftStrLiteral && RightStrLiteral) { 2164 StringRef LeftStrRef = LeftStrLiteral->getString(); 2165 StringRef RightStrRef = RightStrLiteral->getString(); 2166 2167 if (IsBounded) { 2168 // Get the max number of characters to compare. 2169 const Expr *lenExpr = CE->getArg(2); 2170 SVal lenVal = state->getSVal(lenExpr, LCtx); 2171 2172 // If the length is known, we can get the right substrings. 2173 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { 2174 // Create substrings of each to compare the prefix. 2175 LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue()); 2176 RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue()); 2177 canComputeResult = true; 2178 } 2179 } else { 2180 // This is a normal, unbounded strcmp. 2181 canComputeResult = true; 2182 } 2183 2184 if (canComputeResult) { 2185 // Real strcmp stops at null characters. 2186 size_t s1Term = LeftStrRef.find('\0'); 2187 if (s1Term != StringRef::npos) 2188 LeftStrRef = LeftStrRef.substr(0, s1Term); 2189 2190 size_t s2Term = RightStrRef.find('\0'); 2191 if (s2Term != StringRef::npos) 2192 RightStrRef = RightStrRef.substr(0, s2Term); 2193 2194 // Use StringRef's comparison methods to compute the actual result. 2195 int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef) 2196 : LeftStrRef.compare(RightStrRef); 2197 2198 // The strcmp function returns an integer greater than, equal to, or less 2199 // than zero, [c11, p7.24.4.2]. 2200 if (compareRes == 0) { 2201 resultVal = svalBuilder.makeIntVal(compareRes, CE->getType()); 2202 } 2203 else { 2204 DefinedSVal zeroVal = svalBuilder.makeIntVal(0, CE->getType()); 2205 // Constrain strcmp's result range based on the result of StringRef's 2206 // comparison methods. 2207 BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT; 2208 SVal compareWithZero = 2209 svalBuilder.evalBinOp(state, op, resultVal, zeroVal, 2210 svalBuilder.getConditionType()); 2211 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>(); 2212 state = state->assume(compareWithZeroVal, true); 2213 } 2214 } 2215 } 2216 2217 state = state->BindExpr(CE, LCtx, resultVal); 2218 2219 // Record this as a possible path. 2220 C.addTransition(state); 2221 } 2222 2223 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const { 2224 // char *strsep(char **stringp, const char *delim); 2225 // Verify whether the search string parameter matches the return type. 2226 SourceArgExpr SearchStrPtr = {{CE->getArg(0), 0}}; 2227 2228 QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType(); 2229 if (CharPtrTy.isNull() || 2230 CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType()) 2231 return; 2232 2233 CurrentFunctionDescription = "strsep()"; 2234 ProgramStateRef State = C.getState(); 2235 const LocationContext *LCtx = C.getLocationContext(); 2236 2237 // Check that the search string pointer is non-null (though it may point to 2238 // a null string). 2239 SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx); 2240 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal); 2241 if (!State) 2242 return; 2243 2244 // Check that the delimiter string is non-null. 2245 AnyArgExpr DelimStr = {CE->getArg(1), 1}; 2246 SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx); 2247 State = checkNonNull(C, State, DelimStr, DelimStrVal); 2248 if (!State) 2249 return; 2250 2251 SValBuilder &SVB = C.getSValBuilder(); 2252 SVal Result; 2253 if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) { 2254 // Get the current value of the search string pointer, as a char*. 2255 Result = State->getSVal(*SearchStrLoc, CharPtrTy); 2256 2257 // Invalidate the search string, representing the change of one delimiter 2258 // character to NUL. 2259 // As the replacement never overflows, do not invalidate its super region. 2260 State = invalidateDestinationBufferNeverOverflows( 2261 C, State, SearchStrPtr.Expression, Result); 2262 2263 // Overwrite the search string pointer. The new value is either an address 2264 // further along in the same string, or NULL if there are no more tokens. 2265 State = State->bindLoc(*SearchStrLoc, 2266 SVB.conjureSymbolVal(getTag(), 2267 CE, 2268 LCtx, 2269 CharPtrTy, 2270 C.blockCount()), 2271 LCtx); 2272 } else { 2273 assert(SearchStrVal.isUnknown()); 2274 // Conjure a symbolic value. It's the best we can do. 2275 Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 2276 } 2277 2278 // Set the return value, and finish. 2279 State = State->BindExpr(CE, LCtx, Result); 2280 C.addTransition(State); 2281 } 2282 2283 // These should probably be moved into a C++ standard library checker. 2284 void CStringChecker::evalStdCopy(CheckerContext &C, const CallExpr *CE) const { 2285 evalStdCopyCommon(C, CE); 2286 } 2287 2288 void CStringChecker::evalStdCopyBackward(CheckerContext &C, 2289 const CallExpr *CE) const { 2290 evalStdCopyCommon(C, CE); 2291 } 2292 2293 void CStringChecker::evalStdCopyCommon(CheckerContext &C, 2294 const CallExpr *CE) const { 2295 if (!CE->getArg(2)->getType()->isPointerType()) 2296 return; 2297 2298 ProgramStateRef State = C.getState(); 2299 2300 const LocationContext *LCtx = C.getLocationContext(); 2301 2302 // template <class _InputIterator, class _OutputIterator> 2303 // _OutputIterator 2304 // copy(_InputIterator __first, _InputIterator __last, 2305 // _OutputIterator __result) 2306 2307 // Invalidate the destination buffer 2308 const Expr *Dst = CE->getArg(2); 2309 SVal DstVal = State->getSVal(Dst, LCtx); 2310 // FIXME: As we do not know how many items are copied, we also invalidate the 2311 // super region containing the target location. 2312 State = 2313 invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal); 2314 2315 SValBuilder &SVB = C.getSValBuilder(); 2316 2317 SVal ResultVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 2318 State = State->BindExpr(CE, LCtx, ResultVal); 2319 2320 C.addTransition(State); 2321 } 2322 2323 void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const { 2324 // void *memset(void *s, int c, size_t n); 2325 CurrentFunctionDescription = "memory set function"; 2326 2327 DestinationArgExpr Buffer = {{CE->getArg(0), 0}}; 2328 AnyArgExpr CharE = {CE->getArg(1), 1}; 2329 SizeArgExpr Size = {{CE->getArg(2), 2}}; 2330 2331 ProgramStateRef State = C.getState(); 2332 2333 // See if the size argument is zero. 2334 const LocationContext *LCtx = C.getLocationContext(); 2335 SVal SizeVal = C.getSVal(Size.Expression); 2336 QualType SizeTy = Size.Expression->getType(); 2337 2338 ProgramStateRef ZeroSize, NonZeroSize; 2339 std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy); 2340 2341 // Get the value of the memory area. 2342 SVal BufferPtrVal = C.getSVal(Buffer.Expression); 2343 2344 // If the size is zero, there won't be any actual memory access, so 2345 // just bind the return value to the buffer and return. 2346 if (ZeroSize && !NonZeroSize) { 2347 ZeroSize = ZeroSize->BindExpr(CE, LCtx, BufferPtrVal); 2348 C.addTransition(ZeroSize); 2349 return; 2350 } 2351 2352 // Ensure the memory area is not null. 2353 // If it is NULL there will be a NULL pointer dereference. 2354 State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal); 2355 if (!State) 2356 return; 2357 2358 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write); 2359 if (!State) 2360 return; 2361 2362 // According to the values of the arguments, bind the value of the second 2363 // argument to the destination buffer and set string length, or just 2364 // invalidate the destination buffer. 2365 if (!memsetAux(Buffer.Expression, C.getSVal(CharE.Expression), 2366 Size.Expression, C, State)) 2367 return; 2368 2369 State = State->BindExpr(CE, LCtx, BufferPtrVal); 2370 C.addTransition(State); 2371 } 2372 2373 void CStringChecker::evalBzero(CheckerContext &C, const CallExpr *CE) const { 2374 CurrentFunctionDescription = "memory clearance function"; 2375 2376 DestinationArgExpr Buffer = {{CE->getArg(0), 0}}; 2377 SizeArgExpr Size = {{CE->getArg(1), 1}}; 2378 SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy); 2379 2380 ProgramStateRef State = C.getState(); 2381 2382 // See if the size argument is zero. 2383 SVal SizeVal = C.getSVal(Size.Expression); 2384 QualType SizeTy = Size.Expression->getType(); 2385 2386 ProgramStateRef StateZeroSize, StateNonZeroSize; 2387 std::tie(StateZeroSize, StateNonZeroSize) = 2388 assumeZero(C, State, SizeVal, SizeTy); 2389 2390 // If the size is zero, there won't be any actual memory access, 2391 // In this case we just return. 2392 if (StateZeroSize && !StateNonZeroSize) { 2393 C.addTransition(StateZeroSize); 2394 return; 2395 } 2396 2397 // Get the value of the memory area. 2398 SVal MemVal = C.getSVal(Buffer.Expression); 2399 2400 // Ensure the memory area is not null. 2401 // If it is NULL there will be a NULL pointer dereference. 2402 State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal); 2403 if (!State) 2404 return; 2405 2406 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write); 2407 if (!State) 2408 return; 2409 2410 if (!memsetAux(Buffer.Expression, Zero, Size.Expression, C, State)) 2411 return; 2412 2413 C.addTransition(State); 2414 } 2415 2416 void CStringChecker::evalSprintf(CheckerContext &C, const CallExpr *CE) const { 2417 CurrentFunctionDescription = "'sprintf'"; 2418 bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___sprintf_chk; 2419 evalSprintfCommon(C, CE, /* IsBounded */ false, IsBI); 2420 } 2421 2422 void CStringChecker::evalSnprintf(CheckerContext &C, const CallExpr *CE) const { 2423 CurrentFunctionDescription = "'snprintf'"; 2424 bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___snprintf_chk; 2425 evalSprintfCommon(C, CE, /* IsBounded */ true, IsBI); 2426 } 2427 2428 void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallExpr *CE, 2429 bool IsBounded, bool IsBuiltin) const { 2430 ProgramStateRef State = C.getState(); 2431 DestinationArgExpr Dest = {{CE->getArg(0), 0}}; 2432 2433 const auto NumParams = CE->getCalleeDecl()->getAsFunction()->getNumParams(); 2434 assert(CE->getNumArgs() >= NumParams); 2435 2436 const auto AllArguments = 2437 llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs()); 2438 const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams); 2439 2440 for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) { 2441 // We consider only string buffers 2442 if (const QualType type = ArgExpr->getType(); 2443 !type->isAnyPointerType() || 2444 !type->getPointeeType()->isAnyCharacterType()) 2445 continue; 2446 SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}}; 2447 2448 // Ensure the buffers do not overlap. 2449 SizeArgExpr SrcExprAsSizeDummy = { 2450 {Source.Expression, Source.ArgumentIndex}}; 2451 State = CheckOverlap( 2452 C, State, 2453 (IsBounded ? SizeArgExpr{{CE->getArg(1), 1}} : SrcExprAsSizeDummy), 2454 Dest, Source); 2455 if (!State) 2456 return; 2457 } 2458 2459 C.addTransition(State); 2460 } 2461 2462 //===----------------------------------------------------------------------===// 2463 // The driver method, and other Checker callbacks. 2464 //===----------------------------------------------------------------------===// 2465 2466 CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call, 2467 CheckerContext &C) const { 2468 const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); 2469 if (!CE) 2470 return nullptr; 2471 2472 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); 2473 if (!FD) 2474 return nullptr; 2475 2476 if (StdCopy.matches(Call)) 2477 return &CStringChecker::evalStdCopy; 2478 if (StdCopyBackward.matches(Call)) 2479 return &CStringChecker::evalStdCopyBackward; 2480 2481 // Pro-actively check that argument types are safe to do arithmetic upon. 2482 // We do not want to crash if someone accidentally passes a structure 2483 // into, say, a C++ overload of any of these functions. We could not check 2484 // that for std::copy because they may have arguments of other types. 2485 for (auto I : CE->arguments()) { 2486 QualType T = I->getType(); 2487 if (!T->isIntegralOrEnumerationType() && !T->isPointerType()) 2488 return nullptr; 2489 } 2490 2491 const FnCheck *Callback = Callbacks.lookup(Call); 2492 if (Callback) 2493 return *Callback; 2494 2495 return nullptr; 2496 } 2497 2498 bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const { 2499 FnCheck Callback = identifyCall(Call, C); 2500 2501 // If the callee isn't a string function, let another checker handle it. 2502 if (!Callback) 2503 return false; 2504 2505 // Check and evaluate the call. 2506 const auto *CE = cast<CallExpr>(Call.getOriginExpr()); 2507 Callback(this, C, CE); 2508 2509 // If the evaluate call resulted in no change, chain to the next eval call 2510 // handler. 2511 // Note, the custom CString evaluation calls assume that basic safety 2512 // properties are held. However, if the user chooses to turn off some of these 2513 // checks, we ignore the issues and leave the call evaluation to a generic 2514 // handler. 2515 return C.isDifferent(); 2516 } 2517 2518 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 2519 // Record string length for char a[] = "abc"; 2520 ProgramStateRef state = C.getState(); 2521 2522 for (const auto *I : DS->decls()) { 2523 const VarDecl *D = dyn_cast<VarDecl>(I); 2524 if (!D) 2525 continue; 2526 2527 // FIXME: Handle array fields of structs. 2528 if (!D->getType()->isArrayType()) 2529 continue; 2530 2531 const Expr *Init = D->getInit(); 2532 if (!Init) 2533 continue; 2534 if (!isa<StringLiteral>(Init)) 2535 continue; 2536 2537 Loc VarLoc = state->getLValue(D, C.getLocationContext()); 2538 const MemRegion *MR = VarLoc.getAsRegion(); 2539 if (!MR) 2540 continue; 2541 2542 SVal StrVal = C.getSVal(Init); 2543 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 2544 DefinedOrUnknownSVal strLength = 2545 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>(); 2546 2547 state = state->set<CStringLength>(MR, strLength); 2548 } 2549 2550 C.addTransition(state); 2551 } 2552 2553 ProgramStateRef 2554 CStringChecker::checkRegionChanges(ProgramStateRef state, 2555 const InvalidatedSymbols *, 2556 ArrayRef<const MemRegion *> ExplicitRegions, 2557 ArrayRef<const MemRegion *> Regions, 2558 const LocationContext *LCtx, 2559 const CallEvent *Call) const { 2560 CStringLengthTy Entries = state->get<CStringLength>(); 2561 if (Entries.isEmpty()) 2562 return state; 2563 2564 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 2565 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 2566 2567 // First build sets for the changed regions and their super-regions. 2568 for (const MemRegion *MR : Regions) { 2569 Invalidated.insert(MR); 2570 2571 SuperRegions.insert(MR); 2572 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 2573 MR = SR->getSuperRegion(); 2574 SuperRegions.insert(MR); 2575 } 2576 } 2577 2578 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2579 2580 // Then loop over the entries in the current state. 2581 for (const MemRegion *MR : llvm::make_first_range(Entries)) { 2582 // Is this entry for a super-region of a changed region? 2583 if (SuperRegions.count(MR)) { 2584 Entries = F.remove(Entries, MR); 2585 continue; 2586 } 2587 2588 // Is this entry for a sub-region of a changed region? 2589 const MemRegion *Super = MR; 2590 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 2591 Super = SR->getSuperRegion(); 2592 if (Invalidated.count(Super)) { 2593 Entries = F.remove(Entries, MR); 2594 break; 2595 } 2596 } 2597 } 2598 2599 return state->set<CStringLength>(Entries); 2600 } 2601 2602 void CStringChecker::checkLiveSymbols(ProgramStateRef state, 2603 SymbolReaper &SR) const { 2604 // Mark all symbols in our string length map as valid. 2605 CStringLengthTy Entries = state->get<CStringLength>(); 2606 2607 for (SVal Len : llvm::make_second_range(Entries)) { 2608 for (SymbolRef Sym : Len.symbols()) 2609 SR.markInUse(Sym); 2610 } 2611 } 2612 2613 void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 2614 CheckerContext &C) const { 2615 ProgramStateRef state = C.getState(); 2616 CStringLengthTy Entries = state->get<CStringLength>(); 2617 if (Entries.isEmpty()) 2618 return; 2619 2620 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2621 for (auto [Reg, Len] : Entries) { 2622 if (SymbolRef Sym = Len.getAsSymbol()) { 2623 if (SR.isDead(Sym)) 2624 Entries = F.remove(Entries, Reg); 2625 } 2626 } 2627 2628 state = state->set<CStringLength>(Entries); 2629 C.addTransition(state); 2630 } 2631 2632 void ento::registerCStringModeling(CheckerManager &Mgr) { 2633 Mgr.registerChecker<CStringChecker>(); 2634 } 2635 2636 bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) { 2637 return true; 2638 } 2639 2640 #define REGISTER_CHECKER(name) \ 2641 void ento::register##name(CheckerManager &mgr) { \ 2642 CStringChecker *checker = mgr.getChecker<CStringChecker>(); \ 2643 checker->Filter.Check##name = true; \ 2644 checker->Filter.CheckName##name = mgr.getCurrentCheckerName(); \ 2645 } \ 2646 \ 2647 bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; } 2648 2649 REGISTER_CHECKER(CStringNullArg) 2650 REGISTER_CHECKER(CStringOutOfBounds) 2651 REGISTER_CHECKER(CStringBufferOverlap) 2652 REGISTER_CHECKER(CStringNotNullTerm) 2653 REGISTER_CHECKER(CStringUninitializedRead) 2654