1 //===----- UninitializedObjectChecker.cpp ------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a checker that reports uninitialized fields in objects
10 // created after a constructor call.
11 //
12 // To read about command line options and how the checker works, refer to the
13 // top of the file and inline comments in UninitializedObject.h.
14 //
15 // Some of the logic is implemented in UninitializedPointee.cpp, to reduce the
16 // complexity of this file.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
21 #include "UninitializedObject.h"
22 #include "clang/ASTMatchers/ASTMatchFinder.h"
23 #include "clang/Driver/DriverDiagnostic.h"
24 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
25 #include "clang/StaticAnalyzer/Core/Checker.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h"
28 
29 using namespace clang;
30 using namespace clang::ento;
31 using namespace clang::ast_matchers;
32 
33 /// We'll mark fields (and pointee of fields) that are confirmed to be
34 /// uninitialized as already analyzed.
35 REGISTER_SET_WITH_PROGRAMSTATE(AnalyzedRegions, const MemRegion *)
36 
37 namespace {
38 
39 class UninitializedObjectChecker
40     : public Checker<check::EndFunction, check::DeadSymbols> {
41   std::unique_ptr<BuiltinBug> BT_uninitField;
42 
43 public:
44   // The fields of this struct will be initialized when registering the checker.
45   UninitObjCheckerOptions Opts;
46 
47   UninitializedObjectChecker()
48       : BT_uninitField(new BuiltinBug(this, "Uninitialized fields")) {}
49 
50   void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const;
51   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
52 };
53 
54 /// A basic field type, that is not a pointer or a reference, it's dynamic and
55 /// static type is the same.
56 class RegularField final : public FieldNode {
57 public:
58   RegularField(const FieldRegion *FR) : FieldNode(FR) {}
59 
60   void printNoteMsg(llvm::raw_ostream &Out) const override {
61     Out << "uninitialized field ";
62   }
63 
64   void printPrefix(llvm::raw_ostream &Out) const override {}
65 
66   void printNode(llvm::raw_ostream &Out) const override {
67     Out << getVariableName(getDecl());
68   }
69 
70   void printSeparator(llvm::raw_ostream &Out) const override { Out << '.'; }
71 };
72 
73 /// Represents that the FieldNode that comes after this is declared in a base
74 /// of the previous FieldNode. As such, this descendant doesn't wrap a
75 /// FieldRegion, and is purely a tool to describe a relation between two other
76 /// FieldRegion wrapping descendants.
77 class BaseClass final : public FieldNode {
78   const QualType BaseClassT;
79 
80 public:
81   BaseClass(const QualType &T) : FieldNode(nullptr), BaseClassT(T) {
82     assert(!T.isNull());
83     assert(T->getAsCXXRecordDecl());
84   }
85 
86   void printNoteMsg(llvm::raw_ostream &Out) const override {
87     llvm_unreachable("This node can never be the final node in the "
88                      "fieldchain!");
89   }
90 
91   void printPrefix(llvm::raw_ostream &Out) const override {}
92 
93   void printNode(llvm::raw_ostream &Out) const override {
94     Out << BaseClassT->getAsCXXRecordDecl()->getName() << "::";
95   }
96 
97   void printSeparator(llvm::raw_ostream &Out) const override {}
98 
99   bool isBase() const override { return true; }
100 };
101 
102 } // end of anonymous namespace
103 
104 // Utility function declarations.
105 
106 /// Returns the region that was constructed by CtorDecl, or nullptr if that
107 /// isn't possible.
108 static const TypedValueRegion *
109 getConstructedRegion(const CXXConstructorDecl *CtorDecl,
110                      CheckerContext &Context);
111 
112 /// Checks whether the object constructed by \p Ctor will be analyzed later
113 /// (e.g. if the object is a field of another object, in which case we'd check
114 /// it multiple times).
115 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
116                                       CheckerContext &Context);
117 
118 /// Checks whether RD contains a field with a name or type name that matches
119 /// \p Pattern.
120 static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern);
121 
122 /// Checks _syntactically_ whether it is possible to access FD from the record
123 /// that contains it without a preceding assert (even if that access happens
124 /// inside a method). This is mainly used for records that act like unions, like
125 /// having multiple bit fields, with only a fraction being properly initialized.
126 /// If these fields are properly guarded with asserts, this method returns
127 /// false.
128 ///
129 /// Since this check is done syntactically, this method could be inaccurate.
130 static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State);
131 
132 //===----------------------------------------------------------------------===//
133 //                  Methods for UninitializedObjectChecker.
134 //===----------------------------------------------------------------------===//
135 
136 void UninitializedObjectChecker::checkEndFunction(
137     const ReturnStmt *RS, CheckerContext &Context) const {
138 
139   const auto *CtorDecl = dyn_cast_or_null<CXXConstructorDecl>(
140       Context.getLocationContext()->getDecl());
141   if (!CtorDecl)
142     return;
143 
144   if (!CtorDecl->isUserProvided())
145     return;
146 
147   if (CtorDecl->getParent()->isUnion())
148     return;
149 
150   // This avoids essentially the same error being reported multiple times.
151   if (willObjectBeAnalyzedLater(CtorDecl, Context))
152     return;
153 
154   const TypedValueRegion *R = getConstructedRegion(CtorDecl, Context);
155   if (!R)
156     return;
157 
158   FindUninitializedFields F(Context.getState(), R, Opts);
159 
160   std::pair<ProgramStateRef, const UninitFieldMap &> UninitInfo =
161       F.getResults();
162 
163   ProgramStateRef UpdatedState = UninitInfo.first;
164   const UninitFieldMap &UninitFields = UninitInfo.second;
165 
166   if (UninitFields.empty()) {
167     Context.addTransition(UpdatedState);
168     return;
169   }
170 
171   // There are uninitialized fields in the record.
172 
173   ExplodedNode *Node = Context.generateNonFatalErrorNode(UpdatedState);
174   if (!Node)
175     return;
176 
177   PathDiagnosticLocation LocUsedForUniqueing;
178   const Stmt *CallSite = Context.getStackFrame()->getCallSite();
179   if (CallSite)
180     LocUsedForUniqueing = PathDiagnosticLocation::createBegin(
181         CallSite, Context.getSourceManager(), Node->getLocationContext());
182 
183   // For Plist consumers that don't support notes just yet, we'll convert notes
184   // to warnings.
185   if (Opts.ShouldConvertNotesToWarnings) {
186     for (const auto &Pair : UninitFields) {
187 
188       auto Report = std::make_unique<PathSensitiveBugReport>(
189           *BT_uninitField, Pair.second, Node, LocUsedForUniqueing,
190           Node->getLocationContext()->getDecl());
191       Context.emitReport(std::move(Report));
192     }
193     return;
194   }
195 
196   SmallString<100> WarningBuf;
197   llvm::raw_svector_ostream WarningOS(WarningBuf);
198   WarningOS << UninitFields.size() << " uninitialized field"
199             << (UninitFields.size() == 1 ? "" : "s")
200             << " at the end of the constructor call";
201 
202   auto Report = std::make_unique<PathSensitiveBugReport>(
203       *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing,
204       Node->getLocationContext()->getDecl());
205 
206   for (const auto &Pair : UninitFields) {
207     Report->addNote(Pair.second,
208                     PathDiagnosticLocation::create(Pair.first->getDecl(),
209                                                    Context.getSourceManager()));
210   }
211   Context.emitReport(std::move(Report));
212 }
213 
214 void UninitializedObjectChecker::checkDeadSymbols(SymbolReaper &SR,
215                                                   CheckerContext &C) const {
216   ProgramStateRef State = C.getState();
217   for (const MemRegion *R : State->get<AnalyzedRegions>()) {
218     if (!SR.isLiveRegion(R))
219       State = State->remove<AnalyzedRegions>(R);
220   }
221 }
222 
223 //===----------------------------------------------------------------------===//
224 //                   Methods for FindUninitializedFields.
225 //===----------------------------------------------------------------------===//
226 
227 FindUninitializedFields::FindUninitializedFields(
228     ProgramStateRef State, const TypedValueRegion *const R,
229     const UninitObjCheckerOptions &Opts)
230     : State(State), ObjectR(R), Opts(Opts) {
231 
232   isNonUnionUninit(ObjectR, FieldChainInfo(ChainFactory));
233 
234   // In non-pedantic mode, if ObjectR doesn't contain a single initialized
235   // field, we'll assume that Object was intentionally left uninitialized.
236   if (!Opts.IsPedantic && !isAnyFieldInitialized())
237     UninitFields.clear();
238 }
239 
240 bool FindUninitializedFields::addFieldToUninits(FieldChainInfo Chain,
241                                                 const MemRegion *PointeeR) {
242   const FieldRegion *FR = Chain.getUninitRegion();
243 
244   assert((PointeeR || !isDereferencableType(FR->getDecl()->getType())) &&
245          "One must also pass the pointee region as a parameter for "
246          "dereferenceable fields!");
247 
248   if (State->getStateManager().getContext().getSourceManager().isInSystemHeader(
249           FR->getDecl()->getLocation()))
250     return false;
251 
252   if (Opts.IgnoreGuardedFields && !hasUnguardedAccess(FR->getDecl(), State))
253     return false;
254 
255   if (State->contains<AnalyzedRegions>(FR))
256     return false;
257 
258   if (PointeeR) {
259     if (State->contains<AnalyzedRegions>(PointeeR)) {
260       return false;
261     }
262     State = State->add<AnalyzedRegions>(PointeeR);
263   }
264 
265   State = State->add<AnalyzedRegions>(FR);
266 
267   UninitFieldMap::mapped_type NoteMsgBuf;
268   llvm::raw_svector_ostream OS(NoteMsgBuf);
269   Chain.printNoteMsg(OS);
270 
271   return UninitFields.insert({FR, std::move(NoteMsgBuf)}).second;
272 }
273 
274 bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R,
275                                                FieldChainInfo LocalChain) {
276   assert(R->getValueType()->isRecordType() &&
277          !R->getValueType()->isUnionType() &&
278          "This method only checks non-union record objects!");
279 
280   const RecordDecl *RD = R->getValueType()->getAsRecordDecl()->getDefinition();
281 
282   if (!RD) {
283     IsAnyFieldInitialized = true;
284     return true;
285   }
286 
287   if (!Opts.IgnoredRecordsWithFieldPattern.empty() &&
288       shouldIgnoreRecord(RD, Opts.IgnoredRecordsWithFieldPattern)) {
289     IsAnyFieldInitialized = true;
290     return false;
291   }
292 
293   bool ContainsUninitField = false;
294 
295   // Are all of this non-union's fields initialized?
296   for (const FieldDecl *I : RD->fields()) {
297 
298     const auto FieldVal =
299         State->getLValue(I, loc::MemRegionVal(R)).castAs<loc::MemRegionVal>();
300     const auto *FR = FieldVal.getRegionAs<FieldRegion>();
301     QualType T = I->getType();
302 
303     // If LocalChain already contains FR, then we encountered a cyclic
304     // reference. In this case, region FR is already under checking at an
305     // earlier node in the directed tree.
306     if (LocalChain.contains(FR))
307       return false;
308 
309     if (T->isStructureOrClassType()) {
310       if (isNonUnionUninit(FR, LocalChain.add(RegularField(FR))))
311         ContainsUninitField = true;
312       continue;
313     }
314 
315     if (T->isUnionType()) {
316       if (isUnionUninit(FR)) {
317         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
318           ContainsUninitField = true;
319       } else
320         IsAnyFieldInitialized = true;
321       continue;
322     }
323 
324     if (T->isArrayType()) {
325       IsAnyFieldInitialized = true;
326       continue;
327     }
328 
329     SVal V = State->getSVal(FieldVal);
330 
331     if (isDereferencableType(T) || isa<nonloc::LocAsInteger>(V)) {
332       if (isDereferencableUninit(FR, LocalChain))
333         ContainsUninitField = true;
334       continue;
335     }
336 
337     if (isPrimitiveType(T)) {
338       if (isPrimitiveUninit(V)) {
339         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
340           ContainsUninitField = true;
341       }
342       continue;
343     }
344 
345     llvm_unreachable("All cases are handled!");
346   }
347 
348   // Checking bases. The checker will regard inherited data members as direct
349   // fields.
350   const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
351   if (!CXXRD)
352     return ContainsUninitField;
353 
354   for (const CXXBaseSpecifier &BaseSpec : CXXRD->bases()) {
355     const auto *BaseRegion = State->getLValue(BaseSpec, R)
356                                  .castAs<loc::MemRegionVal>()
357                                  .getRegionAs<TypedValueRegion>();
358 
359     // If the head of the list is also a BaseClass, we'll overwrite it to avoid
360     // note messages like 'this->A::B::x'.
361     if (!LocalChain.isEmpty() && LocalChain.getHead().isBase()) {
362       if (isNonUnionUninit(BaseRegion, LocalChain.replaceHead(
363                                            BaseClass(BaseSpec.getType()))))
364         ContainsUninitField = true;
365     } else {
366       if (isNonUnionUninit(BaseRegion,
367                            LocalChain.add(BaseClass(BaseSpec.getType()))))
368         ContainsUninitField = true;
369     }
370   }
371 
372   return ContainsUninitField;
373 }
374 
375 bool FindUninitializedFields::isUnionUninit(const TypedValueRegion *R) {
376   assert(R->getValueType()->isUnionType() &&
377          "This method only checks union objects!");
378   // TODO: Implement support for union fields.
379   return false;
380 }
381 
382 bool FindUninitializedFields::isPrimitiveUninit(const SVal &V) {
383   if (V.isUndef())
384     return true;
385 
386   IsAnyFieldInitialized = true;
387   return false;
388 }
389 
390 //===----------------------------------------------------------------------===//
391 //                       Methods for FieldChainInfo.
392 //===----------------------------------------------------------------------===//
393 
394 bool FieldChainInfo::contains(const FieldRegion *FR) const {
395   for (const FieldNode &Node : Chain) {
396     if (Node.isSameRegion(FR))
397       return true;
398   }
399   return false;
400 }
401 
402 /// Prints every element except the last to `Out`. Since ImmutableLists store
403 /// elements in reverse order, and have no reverse iterators, we use a
404 /// recursive function to print the fieldchain correctly. The last element in
405 /// the chain is to be printed by `FieldChainInfo::print`.
406 static void printTail(llvm::raw_ostream &Out,
407                       const FieldChainInfo::FieldChain L);
408 
409 // FIXME: This function constructs an incorrect string in the following case:
410 //
411 //   struct Base { int x; };
412 //   struct D1 : Base {}; struct D2 : Base {};
413 //
414 //   struct MostDerived : D1, D2 {
415 //     MostDerived() {}
416 //   }
417 //
418 // A call to MostDerived::MostDerived() will cause two notes that say
419 // "uninitialized field 'this->x'", but we can't refer to 'x' directly,
420 // we need an explicit namespace resolution whether the uninit field was
421 // 'D1::x' or 'D2::x'.
422 void FieldChainInfo::printNoteMsg(llvm::raw_ostream &Out) const {
423   if (Chain.isEmpty())
424     return;
425 
426   const FieldNode &LastField = getHead();
427 
428   LastField.printNoteMsg(Out);
429   Out << '\'';
430 
431   for (const FieldNode &Node : Chain)
432     Node.printPrefix(Out);
433 
434   Out << "this->";
435   printTail(Out, Chain.getTail());
436   LastField.printNode(Out);
437   Out << '\'';
438 }
439 
440 static void printTail(llvm::raw_ostream &Out,
441                       const FieldChainInfo::FieldChain L) {
442   if (L.isEmpty())
443     return;
444 
445   printTail(Out, L.getTail());
446 
447   L.getHead().printNode(Out);
448   L.getHead().printSeparator(Out);
449 }
450 
451 //===----------------------------------------------------------------------===//
452 //                           Utility functions.
453 //===----------------------------------------------------------------------===//
454 
455 static const TypedValueRegion *
456 getConstructedRegion(const CXXConstructorDecl *CtorDecl,
457                      CheckerContext &Context) {
458 
459   Loc ThisLoc =
460       Context.getSValBuilder().getCXXThis(CtorDecl, Context.getStackFrame());
461 
462   SVal ObjectV = Context.getState()->getSVal(ThisLoc);
463 
464   auto *R = ObjectV.getAsRegion()->getAs<TypedValueRegion>();
465   if (R && !R->getValueType()->getAsCXXRecordDecl())
466     return nullptr;
467 
468   return R;
469 }
470 
471 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
472                                       CheckerContext &Context) {
473 
474   const TypedValueRegion *CurrRegion = getConstructedRegion(Ctor, Context);
475   if (!CurrRegion)
476     return false;
477 
478   const LocationContext *LC = Context.getLocationContext();
479   while ((LC = LC->getParent())) {
480 
481     // If \p Ctor was called by another constructor.
482     const auto *OtherCtor = dyn_cast<CXXConstructorDecl>(LC->getDecl());
483     if (!OtherCtor)
484       continue;
485 
486     const TypedValueRegion *OtherRegion =
487         getConstructedRegion(OtherCtor, Context);
488     if (!OtherRegion)
489       continue;
490 
491     // If the CurrRegion is a subregion of OtherRegion, it will be analyzed
492     // during the analysis of OtherRegion.
493     if (CurrRegion->isSubRegionOf(OtherRegion))
494       return true;
495   }
496 
497   return false;
498 }
499 
500 static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern) {
501   llvm::Regex R(Pattern);
502 
503   for (const FieldDecl *FD : RD->fields()) {
504     if (R.match(FD->getType().getAsString()))
505       return true;
506     if (R.match(FD->getName()))
507       return true;
508   }
509 
510   return false;
511 }
512 
513 static const Stmt *getMethodBody(const CXXMethodDecl *M) {
514   if (isa<CXXConstructorDecl>(M))
515     return nullptr;
516 
517   if (!M->isDefined())
518     return nullptr;
519 
520   return M->getDefinition()->getBody();
521 }
522 
523 static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State) {
524 
525   if (FD->getAccess() == AccessSpecifier::AS_public)
526     return true;
527 
528   const auto *Parent = dyn_cast<CXXRecordDecl>(FD->getParent());
529 
530   if (!Parent)
531     return true;
532 
533   Parent = Parent->getDefinition();
534   assert(Parent && "The record's definition must be avaible if an uninitialized"
535                    " field of it was found!");
536 
537   ASTContext &AC = State->getStateManager().getContext();
538 
539   auto FieldAccessM = memberExpr(hasDeclaration(equalsNode(FD))).bind("access");
540 
541   auto AssertLikeM = callExpr(callee(functionDecl(
542       hasAnyName("exit", "panic", "error", "Assert", "assert", "ziperr",
543                  "assfail", "db_error", "__assert", "__assert2", "_wassert",
544                  "__assert_rtn", "__assert_fail", "dtrace_assfail",
545                  "yy_fatal_error", "_XCAssertionFailureHandler",
546                  "_DTAssertionFailureHandler", "_TSAssertionFailureHandler"))));
547 
548   auto NoReturnFuncM = callExpr(callee(functionDecl(isNoReturn())));
549 
550   auto GuardM =
551       stmt(anyOf(ifStmt(), switchStmt(), conditionalOperator(), AssertLikeM,
552             NoReturnFuncM))
553           .bind("guard");
554 
555   for (const CXXMethodDecl *M : Parent->methods()) {
556     const Stmt *MethodBody = getMethodBody(M);
557     if (!MethodBody)
558       continue;
559 
560     auto Accesses = match(stmt(hasDescendant(FieldAccessM)), *MethodBody, AC);
561     if (Accesses.empty())
562       continue;
563     const auto *FirstAccess = Accesses[0].getNodeAs<MemberExpr>("access");
564     assert(FirstAccess);
565 
566     auto Guards = match(stmt(hasDescendant(GuardM)), *MethodBody, AC);
567     if (Guards.empty())
568       return true;
569     const auto *FirstGuard = Guards[0].getNodeAs<Stmt>("guard");
570     assert(FirstGuard);
571 
572     if (FirstAccess->getBeginLoc() < FirstGuard->getBeginLoc())
573       return true;
574   }
575 
576   return false;
577 }
578 
579 std::string clang::ento::getVariableName(const FieldDecl *Field) {
580   // If Field is a captured lambda variable, Field->getName() will return with
581   // an empty string. We can however acquire it's name from the lambda's
582   // captures.
583   const auto *CXXParent = dyn_cast<CXXRecordDecl>(Field->getParent());
584 
585   if (CXXParent && CXXParent->isLambda()) {
586     assert(CXXParent->captures_begin());
587     auto It = CXXParent->captures_begin() + Field->getFieldIndex();
588 
589     if (It->capturesVariable())
590       return llvm::Twine("/*captured variable*/" +
591                          It->getCapturedVar()->getName())
592           .str();
593 
594     if (It->capturesThis())
595       return "/*'this' capture*/";
596 
597     llvm_unreachable("No other capture type is expected!");
598   }
599 
600   return std::string(Field->getName());
601 }
602 
603 void ento::registerUninitializedObjectChecker(CheckerManager &Mgr) {
604   auto Chk = Mgr.registerChecker<UninitializedObjectChecker>();
605 
606   const AnalyzerOptions &AnOpts = Mgr.getAnalyzerOptions();
607   UninitObjCheckerOptions &ChOpts = Chk->Opts;
608 
609   ChOpts.IsPedantic = AnOpts.getCheckerBooleanOption(Chk, "Pedantic");
610   ChOpts.ShouldConvertNotesToWarnings = AnOpts.getCheckerBooleanOption(
611       Chk, "NotesAsWarnings");
612   ChOpts.CheckPointeeInitialization = AnOpts.getCheckerBooleanOption(
613       Chk, "CheckPointeeInitialization");
614   ChOpts.IgnoredRecordsWithFieldPattern =
615       std::string(AnOpts.getCheckerStringOption(Chk, "IgnoreRecordsWithField"));
616   ChOpts.IgnoreGuardedFields =
617       AnOpts.getCheckerBooleanOption(Chk, "IgnoreGuardedFields");
618 
619   std::string ErrorMsg;
620   if (!llvm::Regex(ChOpts.IgnoredRecordsWithFieldPattern).isValid(ErrorMsg))
621     Mgr.reportInvalidCheckerOptionValue(Chk, "IgnoreRecordsWithField",
622         "a valid regex, building failed with error message "
623         "\"" + ErrorMsg + "\"");
624 }
625 
626 bool ento::shouldRegisterUninitializedObjectChecker(const CheckerManager &mgr) {
627   return true;
628 }
629