1 //===----- UninitializedObjectChecker.cpp ------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a checker that reports uninitialized fields in objects
10 // created after a constructor call.
11 //
12 // To read about command line options and how the checker works, refer to the
13 // top of the file and inline comments in UninitializedObject.h.
14 //
15 // Some of the logic is implemented in UninitializedPointee.cpp, to reduce the
16 // complexity of this file.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
21 #include "UninitializedObject.h"
22 #include "clang/ASTMatchers/ASTMatchFinder.h"
23 #include "clang/Driver/DriverDiagnostic.h"
24 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
25 #include "clang/StaticAnalyzer/Core/Checker.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
28 
29 using namespace clang;
30 using namespace clang::ento;
31 using namespace clang::ast_matchers;
32 
33 /// We'll mark fields (and pointee of fields) that are confirmed to be
34 /// uninitialized as already analyzed.
35 REGISTER_SET_WITH_PROGRAMSTATE(AnalyzedRegions, const MemRegion *)
36 
37 namespace {
38 
39 class UninitializedObjectChecker
40     : public Checker<check::EndFunction, check::DeadSymbols> {
41   std::unique_ptr<BuiltinBug> BT_uninitField;
42 
43 public:
44   // The fields of this struct will be initialized when registering the checker.
45   UninitObjCheckerOptions Opts;
46 
47   UninitializedObjectChecker()
48       : BT_uninitField(new BuiltinBug(this, "Uninitialized fields")) {}
49 
50   void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const;
51   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
52 };
53 
54 /// A basic field type, that is not a pointer or a reference, it's dynamic and
55 /// static type is the same.
56 class RegularField final : public FieldNode {
57 public:
58   RegularField(const FieldRegion *FR) : FieldNode(FR) {}
59 
60   virtual void printNoteMsg(llvm::raw_ostream &Out) const override {
61     Out << "uninitialized field ";
62   }
63 
64   virtual void printPrefix(llvm::raw_ostream &Out) const override {}
65 
66   virtual void printNode(llvm::raw_ostream &Out) const override {
67     Out << getVariableName(getDecl());
68   }
69 
70   virtual void printSeparator(llvm::raw_ostream &Out) const override {
71     Out << '.';
72   }
73 };
74 
75 /// Represents that the FieldNode that comes after this is declared in a base
76 /// of the previous FieldNode. As such, this descendant doesn't wrap a
77 /// FieldRegion, and is purely a tool to describe a relation between two other
78 /// FieldRegion wrapping descendants.
79 class BaseClass final : public FieldNode {
80   const QualType BaseClassT;
81 
82 public:
83   BaseClass(const QualType &T) : FieldNode(nullptr), BaseClassT(T) {
84     assert(!T.isNull());
85     assert(T->getAsCXXRecordDecl());
86   }
87 
88   virtual void printNoteMsg(llvm::raw_ostream &Out) const override {
89     llvm_unreachable("This node can never be the final node in the "
90                      "fieldchain!");
91   }
92 
93   virtual void printPrefix(llvm::raw_ostream &Out) const override {}
94 
95   virtual void printNode(llvm::raw_ostream &Out) const override {
96     Out << BaseClassT->getAsCXXRecordDecl()->getName() << "::";
97   }
98 
99   virtual void printSeparator(llvm::raw_ostream &Out) const override {}
100 
101   virtual bool isBase() const override { return true; }
102 };
103 
104 } // end of anonymous namespace
105 
106 // Utility function declarations.
107 
108 /// Returns the region that was constructed by CtorDecl, or nullptr if that
109 /// isn't possible.
110 static const TypedValueRegion *
111 getConstructedRegion(const CXXConstructorDecl *CtorDecl,
112                      CheckerContext &Context);
113 
114 /// Checks whether the object constructed by \p Ctor will be analyzed later
115 /// (e.g. if the object is a field of another object, in which case we'd check
116 /// it multiple times).
117 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
118                                       CheckerContext &Context);
119 
120 /// Checks whether RD contains a field with a name or type name that matches
121 /// \p Pattern.
122 static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern);
123 
124 /// Checks _syntactically_ whether it is possible to access FD from the record
125 /// that contains it without a preceding assert (even if that access happens
126 /// inside a method). This is mainly used for records that act like unions, like
127 /// having multiple bit fields, with only a fraction being properly initialized.
128 /// If these fields are properly guarded with asserts, this method returns
129 /// false.
130 ///
131 /// Since this check is done syntactically, this method could be inaccurate.
132 static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State);
133 
134 //===----------------------------------------------------------------------===//
135 //                  Methods for UninitializedObjectChecker.
136 //===----------------------------------------------------------------------===//
137 
138 void UninitializedObjectChecker::checkEndFunction(
139     const ReturnStmt *RS, CheckerContext &Context) const {
140 
141   const auto *CtorDecl = dyn_cast_or_null<CXXConstructorDecl>(
142       Context.getLocationContext()->getDecl());
143   if (!CtorDecl)
144     return;
145 
146   if (!CtorDecl->isUserProvided())
147     return;
148 
149   if (CtorDecl->getParent()->isUnion())
150     return;
151 
152   // This avoids essentially the same error being reported multiple times.
153   if (willObjectBeAnalyzedLater(CtorDecl, Context))
154     return;
155 
156   const TypedValueRegion *R = getConstructedRegion(CtorDecl, Context);
157   if (!R)
158     return;
159 
160   FindUninitializedFields F(Context.getState(), R, Opts);
161 
162   std::pair<ProgramStateRef, const UninitFieldMap &> UninitInfo =
163       F.getResults();
164 
165   ProgramStateRef UpdatedState = UninitInfo.first;
166   const UninitFieldMap &UninitFields = UninitInfo.second;
167 
168   if (UninitFields.empty()) {
169     Context.addTransition(UpdatedState);
170     return;
171   }
172 
173   // There are uninitialized fields in the record.
174 
175   ExplodedNode *Node = Context.generateNonFatalErrorNode(UpdatedState);
176   if (!Node)
177     return;
178 
179   PathDiagnosticLocation LocUsedForUniqueing;
180   const Stmt *CallSite = Context.getStackFrame()->getCallSite();
181   if (CallSite)
182     LocUsedForUniqueing = PathDiagnosticLocation::createBegin(
183         CallSite, Context.getSourceManager(), Node->getLocationContext());
184 
185   // For Plist consumers that don't support notes just yet, we'll convert notes
186   // to warnings.
187   if (Opts.ShouldConvertNotesToWarnings) {
188     for (const auto &Pair : UninitFields) {
189 
190       auto Report = llvm::make_unique<BugReport>(
191           *BT_uninitField, Pair.second, Node, LocUsedForUniqueing,
192           Node->getLocationContext()->getDecl());
193       Context.emitReport(std::move(Report));
194     }
195     return;
196   }
197 
198   SmallString<100> WarningBuf;
199   llvm::raw_svector_ostream WarningOS(WarningBuf);
200   WarningOS << UninitFields.size() << " uninitialized field"
201             << (UninitFields.size() == 1 ? "" : "s")
202             << " at the end of the constructor call";
203 
204   auto Report = llvm::make_unique<BugReport>(
205       *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing,
206       Node->getLocationContext()->getDecl());
207 
208   for (const auto &Pair : UninitFields) {
209     Report->addNote(Pair.second,
210                     PathDiagnosticLocation::create(Pair.first->getDecl(),
211                                                    Context.getSourceManager()));
212   }
213   Context.emitReport(std::move(Report));
214 }
215 
216 void UninitializedObjectChecker::checkDeadSymbols(SymbolReaper &SR,
217                                                   CheckerContext &C) const {
218   ProgramStateRef State = C.getState();
219   for (const MemRegion *R : State->get<AnalyzedRegions>()) {
220     if (!SR.isLiveRegion(R))
221       State = State->remove<AnalyzedRegions>(R);
222   }
223 }
224 
225 //===----------------------------------------------------------------------===//
226 //                   Methods for FindUninitializedFields.
227 //===----------------------------------------------------------------------===//
228 
229 FindUninitializedFields::FindUninitializedFields(
230     ProgramStateRef State, const TypedValueRegion *const R,
231     const UninitObjCheckerOptions &Opts)
232     : State(State), ObjectR(R), Opts(Opts) {
233 
234   isNonUnionUninit(ObjectR, FieldChainInfo(ChainFactory));
235 
236   // In non-pedantic mode, if ObjectR doesn't contain a single initialized
237   // field, we'll assume that Object was intentionally left uninitialized.
238   if (!Opts.IsPedantic && !isAnyFieldInitialized())
239     UninitFields.clear();
240 }
241 
242 bool FindUninitializedFields::addFieldToUninits(FieldChainInfo Chain,
243                                                 const MemRegion *PointeeR) {
244   const FieldRegion *FR = Chain.getUninitRegion();
245 
246   assert((PointeeR || !isDereferencableType(FR->getDecl()->getType())) &&
247          "One must also pass the pointee region as a parameter for "
248          "dereferenceable fields!");
249 
250   if (State->getStateManager().getContext().getSourceManager().isInSystemHeader(
251           FR->getDecl()->getLocation()))
252     return false;
253 
254   if (Opts.IgnoreGuardedFields && !hasUnguardedAccess(FR->getDecl(), State))
255     return false;
256 
257   if (State->contains<AnalyzedRegions>(FR))
258     return false;
259 
260   if (PointeeR) {
261     if (State->contains<AnalyzedRegions>(PointeeR)) {
262       return false;
263     }
264     State = State->add<AnalyzedRegions>(PointeeR);
265   }
266 
267   State = State->add<AnalyzedRegions>(FR);
268 
269   UninitFieldMap::mapped_type NoteMsgBuf;
270   llvm::raw_svector_ostream OS(NoteMsgBuf);
271   Chain.printNoteMsg(OS);
272 
273   return UninitFields.insert({FR, std::move(NoteMsgBuf)}).second;
274 }
275 
276 bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R,
277                                                FieldChainInfo LocalChain) {
278   assert(R->getValueType()->isRecordType() &&
279          !R->getValueType()->isUnionType() &&
280          "This method only checks non-union record objects!");
281 
282   const RecordDecl *RD = R->getValueType()->getAsRecordDecl()->getDefinition();
283 
284   if (!RD) {
285     IsAnyFieldInitialized = true;
286     return true;
287   }
288 
289   if (!Opts.IgnoredRecordsWithFieldPattern.empty() &&
290       shouldIgnoreRecord(RD, Opts.IgnoredRecordsWithFieldPattern)) {
291     IsAnyFieldInitialized = true;
292     return false;
293   }
294 
295   bool ContainsUninitField = false;
296 
297   // Are all of this non-union's fields initialized?
298   for (const FieldDecl *I : RD->fields()) {
299 
300     const auto FieldVal =
301         State->getLValue(I, loc::MemRegionVal(R)).castAs<loc::MemRegionVal>();
302     const auto *FR = FieldVal.getRegionAs<FieldRegion>();
303     QualType T = I->getType();
304 
305     // If LocalChain already contains FR, then we encountered a cyclic
306     // reference. In this case, region FR is already under checking at an
307     // earlier node in the directed tree.
308     if (LocalChain.contains(FR))
309       return false;
310 
311     if (T->isStructureOrClassType()) {
312       if (isNonUnionUninit(FR, LocalChain.add(RegularField(FR))))
313         ContainsUninitField = true;
314       continue;
315     }
316 
317     if (T->isUnionType()) {
318       if (isUnionUninit(FR)) {
319         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
320           ContainsUninitField = true;
321       } else
322         IsAnyFieldInitialized = true;
323       continue;
324     }
325 
326     if (T->isArrayType()) {
327       IsAnyFieldInitialized = true;
328       continue;
329     }
330 
331     SVal V = State->getSVal(FieldVal);
332 
333     if (isDereferencableType(T) || V.getAs<nonloc::LocAsInteger>()) {
334       if (isDereferencableUninit(FR, LocalChain))
335         ContainsUninitField = true;
336       continue;
337     }
338 
339     if (isPrimitiveType(T)) {
340       if (isPrimitiveUninit(V)) {
341         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
342           ContainsUninitField = true;
343       }
344       continue;
345     }
346 
347     llvm_unreachable("All cases are handled!");
348   }
349 
350   // Checking bases. The checker will regard inherited data members as direct
351   // fields.
352   const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
353   if (!CXXRD)
354     return ContainsUninitField;
355 
356   for (const CXXBaseSpecifier &BaseSpec : CXXRD->bases()) {
357     const auto *BaseRegion = State->getLValue(BaseSpec, R)
358                                  .castAs<loc::MemRegionVal>()
359                                  .getRegionAs<TypedValueRegion>();
360 
361     // If the head of the list is also a BaseClass, we'll overwrite it to avoid
362     // note messages like 'this->A::B::x'.
363     if (!LocalChain.isEmpty() && LocalChain.getHead().isBase()) {
364       if (isNonUnionUninit(BaseRegion, LocalChain.replaceHead(
365                                            BaseClass(BaseSpec.getType()))))
366         ContainsUninitField = true;
367     } else {
368       if (isNonUnionUninit(BaseRegion,
369                            LocalChain.add(BaseClass(BaseSpec.getType()))))
370         ContainsUninitField = true;
371     }
372   }
373 
374   return ContainsUninitField;
375 }
376 
377 bool FindUninitializedFields::isUnionUninit(const TypedValueRegion *R) {
378   assert(R->getValueType()->isUnionType() &&
379          "This method only checks union objects!");
380   // TODO: Implement support for union fields.
381   return false;
382 }
383 
384 bool FindUninitializedFields::isPrimitiveUninit(const SVal &V) {
385   if (V.isUndef())
386     return true;
387 
388   IsAnyFieldInitialized = true;
389   return false;
390 }
391 
392 //===----------------------------------------------------------------------===//
393 //                       Methods for FieldChainInfo.
394 //===----------------------------------------------------------------------===//
395 
396 bool FieldChainInfo::contains(const FieldRegion *FR) const {
397   for (const FieldNode &Node : Chain) {
398     if (Node.isSameRegion(FR))
399       return true;
400   }
401   return false;
402 }
403 
404 /// Prints every element except the last to `Out`. Since ImmutableLists store
405 /// elements in reverse order, and have no reverse iterators, we use a
406 /// recursive function to print the fieldchain correctly. The last element in
407 /// the chain is to be printed by `FieldChainInfo::print`.
408 static void printTail(llvm::raw_ostream &Out,
409                       const FieldChainInfo::FieldChain L);
410 
411 // FIXME: This function constructs an incorrect string in the following case:
412 //
413 //   struct Base { int x; };
414 //   struct D1 : Base {}; struct D2 : Base {};
415 //
416 //   struct MostDerived : D1, D2 {
417 //     MostDerived() {}
418 //   }
419 //
420 // A call to MostDerived::MostDerived() will cause two notes that say
421 // "uninitialized field 'this->x'", but we can't refer to 'x' directly,
422 // we need an explicit namespace resolution whether the uninit field was
423 // 'D1::x' or 'D2::x'.
424 void FieldChainInfo::printNoteMsg(llvm::raw_ostream &Out) const {
425   if (Chain.isEmpty())
426     return;
427 
428   const FieldNode &LastField = getHead();
429 
430   LastField.printNoteMsg(Out);
431   Out << '\'';
432 
433   for (const FieldNode &Node : Chain)
434     Node.printPrefix(Out);
435 
436   Out << "this->";
437   printTail(Out, Chain.getTail());
438   LastField.printNode(Out);
439   Out << '\'';
440 }
441 
442 static void printTail(llvm::raw_ostream &Out,
443                       const FieldChainInfo::FieldChain L) {
444   if (L.isEmpty())
445     return;
446 
447   printTail(Out, L.getTail());
448 
449   L.getHead().printNode(Out);
450   L.getHead().printSeparator(Out);
451 }
452 
453 //===----------------------------------------------------------------------===//
454 //                           Utility functions.
455 //===----------------------------------------------------------------------===//
456 
457 static const TypedValueRegion *
458 getConstructedRegion(const CXXConstructorDecl *CtorDecl,
459                      CheckerContext &Context) {
460 
461   Loc ThisLoc =
462       Context.getSValBuilder().getCXXThis(CtorDecl, Context.getStackFrame());
463 
464   SVal ObjectV = Context.getState()->getSVal(ThisLoc);
465 
466   auto *R = ObjectV.getAsRegion()->getAs<TypedValueRegion>();
467   if (R && !R->getValueType()->getAsCXXRecordDecl())
468     return nullptr;
469 
470   return R;
471 }
472 
473 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
474                                       CheckerContext &Context) {
475 
476   const TypedValueRegion *CurrRegion = getConstructedRegion(Ctor, Context);
477   if (!CurrRegion)
478     return false;
479 
480   const LocationContext *LC = Context.getLocationContext();
481   while ((LC = LC->getParent())) {
482 
483     // If \p Ctor was called by another constructor.
484     const auto *OtherCtor = dyn_cast<CXXConstructorDecl>(LC->getDecl());
485     if (!OtherCtor)
486       continue;
487 
488     const TypedValueRegion *OtherRegion =
489         getConstructedRegion(OtherCtor, Context);
490     if (!OtherRegion)
491       continue;
492 
493     // If the CurrRegion is a subregion of OtherRegion, it will be analyzed
494     // during the analysis of OtherRegion.
495     if (CurrRegion->isSubRegionOf(OtherRegion))
496       return true;
497   }
498 
499   return false;
500 }
501 
502 static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern) {
503   llvm::Regex R(Pattern);
504 
505   for (const FieldDecl *FD : RD->fields()) {
506     if (R.match(FD->getType().getAsString()))
507       return true;
508     if (R.match(FD->getName()))
509       return true;
510   }
511 
512   return false;
513 }
514 
515 static const Stmt *getMethodBody(const CXXMethodDecl *M) {
516   if (isa<CXXConstructorDecl>(M))
517     return nullptr;
518 
519   if (!M->isDefined())
520     return nullptr;
521 
522   return M->getDefinition()->getBody();
523 }
524 
525 static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State) {
526 
527   if (FD->getAccess() == AccessSpecifier::AS_public)
528     return true;
529 
530   const auto *Parent = dyn_cast<CXXRecordDecl>(FD->getParent());
531 
532   if (!Parent)
533     return true;
534 
535   Parent = Parent->getDefinition();
536   assert(Parent && "The record's definition must be avaible if an uninitialized"
537                    " field of it was found!");
538 
539   ASTContext &AC = State->getStateManager().getContext();
540 
541   auto FieldAccessM = memberExpr(hasDeclaration(equalsNode(FD))).bind("access");
542 
543   auto AssertLikeM = callExpr(callee(functionDecl(
544       anyOf(hasName("exit"), hasName("panic"), hasName("error"),
545             hasName("Assert"), hasName("assert"), hasName("ziperr"),
546             hasName("assfail"), hasName("db_error"), hasName("__assert"),
547             hasName("__assert2"), hasName("_wassert"), hasName("__assert_rtn"),
548             hasName("__assert_fail"), hasName("dtrace_assfail"),
549             hasName("yy_fatal_error"), hasName("_XCAssertionFailureHandler"),
550             hasName("_DTAssertionFailureHandler"),
551             hasName("_TSAssertionFailureHandler")))));
552 
553   auto NoReturnFuncM = callExpr(callee(functionDecl(isNoReturn())));
554 
555   auto GuardM =
556       stmt(anyOf(ifStmt(), switchStmt(), conditionalOperator(), AssertLikeM,
557             NoReturnFuncM))
558           .bind("guard");
559 
560   for (const CXXMethodDecl *M : Parent->methods()) {
561     const Stmt *MethodBody = getMethodBody(M);
562     if (!MethodBody)
563       continue;
564 
565     auto Accesses = match(stmt(hasDescendant(FieldAccessM)), *MethodBody, AC);
566     if (Accesses.empty())
567       continue;
568     const auto *FirstAccess = Accesses[0].getNodeAs<MemberExpr>("access");
569     assert(FirstAccess);
570 
571     auto Guards = match(stmt(hasDescendant(GuardM)), *MethodBody, AC);
572     if (Guards.empty())
573       return true;
574     const auto *FirstGuard = Guards[0].getNodeAs<Stmt>("guard");
575     assert(FirstGuard);
576 
577     if (FirstAccess->getBeginLoc() < FirstGuard->getBeginLoc())
578       return true;
579   }
580 
581   return false;
582 }
583 
584 std::string clang::ento::getVariableName(const FieldDecl *Field) {
585   // If Field is a captured lambda variable, Field->getName() will return with
586   // an empty string. We can however acquire it's name from the lambda's
587   // captures.
588   const auto *CXXParent = dyn_cast<CXXRecordDecl>(Field->getParent());
589 
590   if (CXXParent && CXXParent->isLambda()) {
591     assert(CXXParent->captures_begin());
592     auto It = CXXParent->captures_begin() + Field->getFieldIndex();
593 
594     if (It->capturesVariable())
595       return llvm::Twine("/*captured variable*/" +
596                          It->getCapturedVar()->getName())
597           .str();
598 
599     if (It->capturesThis())
600       return "/*'this' capture*/";
601 
602     llvm_unreachable("No other capture type is expected!");
603   }
604 
605   return Field->getName();
606 }
607 
608 void ento::registerUninitializedObjectChecker(CheckerManager &Mgr) {
609   auto Chk = Mgr.registerChecker<UninitializedObjectChecker>();
610 
611   AnalyzerOptions &AnOpts = Mgr.getAnalyzerOptions();
612   UninitObjCheckerOptions &ChOpts = Chk->Opts;
613 
614   ChOpts.IsPedantic = AnOpts.getCheckerBooleanOption(Chk, "Pedantic");
615   ChOpts.ShouldConvertNotesToWarnings = AnOpts.getCheckerBooleanOption(
616       Chk, "NotesAsWarnings");
617   ChOpts.CheckPointeeInitialization = AnOpts.getCheckerBooleanOption(
618       Chk, "CheckPointeeInitialization");
619   ChOpts.IgnoredRecordsWithFieldPattern =
620       AnOpts.getCheckerStringOption(Chk, "IgnoreRecordsWithField");
621   ChOpts.IgnoreGuardedFields =
622       AnOpts.getCheckerBooleanOption(Chk, "IgnoreGuardedFields");
623 
624   std::string ErrorMsg;
625   if (!llvm::Regex(ChOpts.IgnoredRecordsWithFieldPattern).isValid(ErrorMsg))
626     Mgr.reportInvalidCheckerOptionValue(Chk, "IgnoreRecordsWithField",
627         "a valid regex, building failed with error message "
628         "\"" + ErrorMsg + "\"");
629 }
630 
631 bool ento::shouldRegisterUninitializedObjectChecker(const LangOptions &LO) {
632   return true;
633 }
634