1 //===----- UninitializedObjectChecker.cpp ------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a checker that reports uninitialized fields in objects
10 // created after a constructor call.
11 //
12 // To read about command line options and how the checker works, refer to the
13 // top of the file and inline comments in UninitializedObject.h.
14 //
15 // Some of the logic is implemented in UninitializedPointee.cpp, to reduce the
16 // complexity of this file.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
21 #include "UninitializedObject.h"
22 #include "clang/ASTMatchers/ASTMatchFinder.h"
23 #include "clang/Driver/DriverDiagnostic.h"
24 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
25 #include "clang/StaticAnalyzer/Core/Checker.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h"
28 
29 using namespace clang;
30 using namespace clang::ento;
31 using namespace clang::ast_matchers;
32 
33 /// We'll mark fields (and pointee of fields) that are confirmed to be
34 /// uninitialized as already analyzed.
35 REGISTER_SET_WITH_PROGRAMSTATE(AnalyzedRegions, const MemRegion *)
36 
37 namespace {
38 
39 class UninitializedObjectChecker
40     : public Checker<check::EndFunction, check::DeadSymbols> {
41   const BugType BT_uninitField{this, "Uninitialized fields"};
42 
43 public:
44   // The fields of this struct will be initialized when registering the checker.
45   UninitObjCheckerOptions Opts;
46 
47   void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const;
48   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
49 };
50 
51 /// A basic field type, that is not a pointer or a reference, it's dynamic and
52 /// static type is the same.
53 class RegularField final : public FieldNode {
54 public:
55   RegularField(const FieldRegion *FR) : FieldNode(FR) {}
56 
57   void printNoteMsg(llvm::raw_ostream &Out) const override {
58     Out << "uninitialized field ";
59   }
60 
61   void printPrefix(llvm::raw_ostream &Out) const override {}
62 
63   void printNode(llvm::raw_ostream &Out) const override {
64     Out << getVariableName(getDecl());
65   }
66 
67   void printSeparator(llvm::raw_ostream &Out) const override { Out << '.'; }
68 };
69 
70 /// Represents that the FieldNode that comes after this is declared in a base
71 /// of the previous FieldNode. As such, this descendant doesn't wrap a
72 /// FieldRegion, and is purely a tool to describe a relation between two other
73 /// FieldRegion wrapping descendants.
74 class BaseClass final : public FieldNode {
75   const QualType BaseClassT;
76 
77 public:
78   BaseClass(const QualType &T) : FieldNode(nullptr), BaseClassT(T) {
79     assert(!T.isNull());
80     assert(T->getAsCXXRecordDecl());
81   }
82 
83   void printNoteMsg(llvm::raw_ostream &Out) const override {
84     llvm_unreachable("This node can never be the final node in the "
85                      "fieldchain!");
86   }
87 
88   void printPrefix(llvm::raw_ostream &Out) const override {}
89 
90   void printNode(llvm::raw_ostream &Out) const override {
91     Out << BaseClassT->getAsCXXRecordDecl()->getName() << "::";
92   }
93 
94   void printSeparator(llvm::raw_ostream &Out) const override {}
95 
96   bool isBase() const override { return true; }
97 };
98 
99 } // end of anonymous namespace
100 
101 // Utility function declarations.
102 
103 /// Returns the region that was constructed by CtorDecl, or nullptr if that
104 /// isn't possible.
105 static const TypedValueRegion *
106 getConstructedRegion(const CXXConstructorDecl *CtorDecl,
107                      CheckerContext &Context);
108 
109 /// Checks whether the object constructed by \p Ctor will be analyzed later
110 /// (e.g. if the object is a field of another object, in which case we'd check
111 /// it multiple times).
112 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
113                                       CheckerContext &Context);
114 
115 /// Checks whether RD contains a field with a name or type name that matches
116 /// \p Pattern.
117 static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern);
118 
119 /// Checks _syntactically_ whether it is possible to access FD from the record
120 /// that contains it without a preceding assert (even if that access happens
121 /// inside a method). This is mainly used for records that act like unions, like
122 /// having multiple bit fields, with only a fraction being properly initialized.
123 /// If these fields are properly guarded with asserts, this method returns
124 /// false.
125 ///
126 /// Since this check is done syntactically, this method could be inaccurate.
127 static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State);
128 
129 //===----------------------------------------------------------------------===//
130 //                  Methods for UninitializedObjectChecker.
131 //===----------------------------------------------------------------------===//
132 
133 void UninitializedObjectChecker::checkEndFunction(
134     const ReturnStmt *RS, CheckerContext &Context) const {
135 
136   const auto *CtorDecl = dyn_cast_or_null<CXXConstructorDecl>(
137       Context.getLocationContext()->getDecl());
138   if (!CtorDecl)
139     return;
140 
141   if (!CtorDecl->isUserProvided())
142     return;
143 
144   if (CtorDecl->getParent()->isUnion())
145     return;
146 
147   // This avoids essentially the same error being reported multiple times.
148   if (willObjectBeAnalyzedLater(CtorDecl, Context))
149     return;
150 
151   const TypedValueRegion *R = getConstructedRegion(CtorDecl, Context);
152   if (!R)
153     return;
154 
155   FindUninitializedFields F(Context.getState(), R, Opts);
156 
157   std::pair<ProgramStateRef, const UninitFieldMap &> UninitInfo =
158       F.getResults();
159 
160   ProgramStateRef UpdatedState = UninitInfo.first;
161   const UninitFieldMap &UninitFields = UninitInfo.second;
162 
163   if (UninitFields.empty()) {
164     Context.addTransition(UpdatedState);
165     return;
166   }
167 
168   // There are uninitialized fields in the record.
169 
170   ExplodedNode *Node = Context.generateNonFatalErrorNode(UpdatedState);
171   if (!Node)
172     return;
173 
174   PathDiagnosticLocation LocUsedForUniqueing;
175   const Stmt *CallSite = Context.getStackFrame()->getCallSite();
176   if (CallSite)
177     LocUsedForUniqueing = PathDiagnosticLocation::createBegin(
178         CallSite, Context.getSourceManager(), Node->getLocationContext());
179 
180   // For Plist consumers that don't support notes just yet, we'll convert notes
181   // to warnings.
182   if (Opts.ShouldConvertNotesToWarnings) {
183     for (const auto &Pair : UninitFields) {
184 
185       auto Report = std::make_unique<PathSensitiveBugReport>(
186           BT_uninitField, Pair.second, Node, LocUsedForUniqueing,
187           Node->getLocationContext()->getDecl());
188       Context.emitReport(std::move(Report));
189     }
190     return;
191   }
192 
193   SmallString<100> WarningBuf;
194   llvm::raw_svector_ostream WarningOS(WarningBuf);
195   WarningOS << UninitFields.size() << " uninitialized field"
196             << (UninitFields.size() == 1 ? "" : "s")
197             << " at the end of the constructor call";
198 
199   auto Report = std::make_unique<PathSensitiveBugReport>(
200       BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing,
201       Node->getLocationContext()->getDecl());
202 
203   for (const auto &Pair : UninitFields) {
204     Report->addNote(Pair.second,
205                     PathDiagnosticLocation::create(Pair.first->getDecl(),
206                                                    Context.getSourceManager()));
207   }
208   Context.emitReport(std::move(Report));
209 }
210 
211 void UninitializedObjectChecker::checkDeadSymbols(SymbolReaper &SR,
212                                                   CheckerContext &C) const {
213   ProgramStateRef State = C.getState();
214   for (const MemRegion *R : State->get<AnalyzedRegions>()) {
215     if (!SR.isLiveRegion(R))
216       State = State->remove<AnalyzedRegions>(R);
217   }
218 }
219 
220 //===----------------------------------------------------------------------===//
221 //                   Methods for FindUninitializedFields.
222 //===----------------------------------------------------------------------===//
223 
224 FindUninitializedFields::FindUninitializedFields(
225     ProgramStateRef State, const TypedValueRegion *const R,
226     const UninitObjCheckerOptions &Opts)
227     : State(State), ObjectR(R), Opts(Opts) {
228 
229   isNonUnionUninit(ObjectR, FieldChainInfo(ChainFactory));
230 
231   // In non-pedantic mode, if ObjectR doesn't contain a single initialized
232   // field, we'll assume that Object was intentionally left uninitialized.
233   if (!Opts.IsPedantic && !isAnyFieldInitialized())
234     UninitFields.clear();
235 }
236 
237 bool FindUninitializedFields::addFieldToUninits(FieldChainInfo Chain,
238                                                 const MemRegion *PointeeR) {
239   const FieldRegion *FR = Chain.getUninitRegion();
240 
241   assert((PointeeR || !isDereferencableType(FR->getDecl()->getType())) &&
242          "One must also pass the pointee region as a parameter for "
243          "dereferenceable fields!");
244 
245   if (State->getStateManager().getContext().getSourceManager().isInSystemHeader(
246           FR->getDecl()->getLocation()))
247     return false;
248 
249   if (Opts.IgnoreGuardedFields && !hasUnguardedAccess(FR->getDecl(), State))
250     return false;
251 
252   if (State->contains<AnalyzedRegions>(FR))
253     return false;
254 
255   if (PointeeR) {
256     if (State->contains<AnalyzedRegions>(PointeeR)) {
257       return false;
258     }
259     State = State->add<AnalyzedRegions>(PointeeR);
260   }
261 
262   State = State->add<AnalyzedRegions>(FR);
263 
264   UninitFieldMap::mapped_type NoteMsgBuf;
265   llvm::raw_svector_ostream OS(NoteMsgBuf);
266   Chain.printNoteMsg(OS);
267 
268   return UninitFields.insert({FR, std::move(NoteMsgBuf)}).second;
269 }
270 
271 bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R,
272                                                FieldChainInfo LocalChain) {
273   assert(R->getValueType()->isRecordType() &&
274          !R->getValueType()->isUnionType() &&
275          "This method only checks non-union record objects!");
276 
277   const RecordDecl *RD = R->getValueType()->getAsRecordDecl()->getDefinition();
278 
279   if (!RD) {
280     IsAnyFieldInitialized = true;
281     return true;
282   }
283 
284   if (!Opts.IgnoredRecordsWithFieldPattern.empty() &&
285       shouldIgnoreRecord(RD, Opts.IgnoredRecordsWithFieldPattern)) {
286     IsAnyFieldInitialized = true;
287     return false;
288   }
289 
290   bool ContainsUninitField = false;
291 
292   // Are all of this non-union's fields initialized?
293   for (const FieldDecl *I : RD->fields()) {
294     if (I->isUnnamedBitField()) {
295       continue;
296     }
297     const auto FieldVal =
298         State->getLValue(I, loc::MemRegionVal(R)).castAs<loc::MemRegionVal>();
299     const auto *FR = FieldVal.getRegionAs<FieldRegion>();
300     QualType T = I->getType();
301 
302     // If LocalChain already contains FR, then we encountered a cyclic
303     // reference. In this case, region FR is already under checking at an
304     // earlier node in the directed tree.
305     if (LocalChain.contains(FR))
306       return false;
307 
308     if (T->isStructureOrClassType()) {
309       if (isNonUnionUninit(FR, LocalChain.add(RegularField(FR))))
310         ContainsUninitField = true;
311       continue;
312     }
313 
314     if (T->isUnionType()) {
315       if (isUnionUninit(FR)) {
316         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
317           ContainsUninitField = true;
318       } else
319         IsAnyFieldInitialized = true;
320       continue;
321     }
322 
323     if (T->isArrayType()) {
324       IsAnyFieldInitialized = true;
325       continue;
326     }
327 
328     SVal V = State->getSVal(FieldVal);
329 
330     if (isDereferencableType(T) || isa<nonloc::LocAsInteger>(V)) {
331       if (isDereferencableUninit(FR, LocalChain))
332         ContainsUninitField = true;
333       continue;
334     }
335 
336     if (isPrimitiveType(T)) {
337       if (isPrimitiveUninit(V)) {
338         if (addFieldToUninits(LocalChain.add(RegularField(FR))))
339           ContainsUninitField = true;
340       }
341       continue;
342     }
343 
344     llvm_unreachable("All cases are handled!");
345   }
346 
347   // Checking bases. The checker will regard inherited data members as direct
348   // fields.
349   const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
350   if (!CXXRD)
351     return ContainsUninitField;
352 
353   for (const CXXBaseSpecifier &BaseSpec : CXXRD->bases()) {
354     const auto *BaseRegion = State->getLValue(BaseSpec, R)
355                                  .castAs<loc::MemRegionVal>()
356                                  .getRegionAs<TypedValueRegion>();
357 
358     // If the head of the list is also a BaseClass, we'll overwrite it to avoid
359     // note messages like 'this->A::B::x'.
360     if (!LocalChain.isEmpty() && LocalChain.getHead().isBase()) {
361       if (isNonUnionUninit(BaseRegion, LocalChain.replaceHead(
362                                            BaseClass(BaseSpec.getType()))))
363         ContainsUninitField = true;
364     } else {
365       if (isNonUnionUninit(BaseRegion,
366                            LocalChain.add(BaseClass(BaseSpec.getType()))))
367         ContainsUninitField = true;
368     }
369   }
370 
371   return ContainsUninitField;
372 }
373 
374 bool FindUninitializedFields::isUnionUninit(const TypedValueRegion *R) {
375   assert(R->getValueType()->isUnionType() &&
376          "This method only checks union objects!");
377   // TODO: Implement support for union fields.
378   return false;
379 }
380 
381 bool FindUninitializedFields::isPrimitiveUninit(SVal V) {
382   if (V.isUndef())
383     return true;
384 
385   IsAnyFieldInitialized = true;
386   return false;
387 }
388 
389 //===----------------------------------------------------------------------===//
390 //                       Methods for FieldChainInfo.
391 //===----------------------------------------------------------------------===//
392 
393 bool FieldChainInfo::contains(const FieldRegion *FR) const {
394   for (const FieldNode &Node : Chain) {
395     if (Node.isSameRegion(FR))
396       return true;
397   }
398   return false;
399 }
400 
401 /// Prints every element except the last to `Out`. Since ImmutableLists store
402 /// elements in reverse order, and have no reverse iterators, we use a
403 /// recursive function to print the fieldchain correctly. The last element in
404 /// the chain is to be printed by `FieldChainInfo::print`.
405 static void printTail(llvm::raw_ostream &Out,
406                       const FieldChainInfo::FieldChain L);
407 
408 // FIXME: This function constructs an incorrect string in the following case:
409 //
410 //   struct Base { int x; };
411 //   struct D1 : Base {}; struct D2 : Base {};
412 //
413 //   struct MostDerived : D1, D2 {
414 //     MostDerived() {}
415 //   }
416 //
417 // A call to MostDerived::MostDerived() will cause two notes that say
418 // "uninitialized field 'this->x'", but we can't refer to 'x' directly,
419 // we need an explicit namespace resolution whether the uninit field was
420 // 'D1::x' or 'D2::x'.
421 void FieldChainInfo::printNoteMsg(llvm::raw_ostream &Out) const {
422   if (Chain.isEmpty())
423     return;
424 
425   const FieldNode &LastField = getHead();
426 
427   LastField.printNoteMsg(Out);
428   Out << '\'';
429 
430   for (const FieldNode &Node : Chain)
431     Node.printPrefix(Out);
432 
433   Out << "this->";
434   printTail(Out, Chain.getTail());
435   LastField.printNode(Out);
436   Out << '\'';
437 }
438 
439 static void printTail(llvm::raw_ostream &Out,
440                       const FieldChainInfo::FieldChain L) {
441   if (L.isEmpty())
442     return;
443 
444   printTail(Out, L.getTail());
445 
446   L.getHead().printNode(Out);
447   L.getHead().printSeparator(Out);
448 }
449 
450 //===----------------------------------------------------------------------===//
451 //                           Utility functions.
452 //===----------------------------------------------------------------------===//
453 
454 static const TypedValueRegion *
455 getConstructedRegion(const CXXConstructorDecl *CtorDecl,
456                      CheckerContext &Context) {
457 
458   Loc ThisLoc =
459       Context.getSValBuilder().getCXXThis(CtorDecl, Context.getStackFrame());
460 
461   SVal ObjectV = Context.getState()->getSVal(ThisLoc);
462 
463   auto *R = ObjectV.getAsRegion()->getAs<TypedValueRegion>();
464   if (R && !R->getValueType()->getAsCXXRecordDecl())
465     return nullptr;
466 
467   return R;
468 }
469 
470 static bool willObjectBeAnalyzedLater(const CXXConstructorDecl *Ctor,
471                                       CheckerContext &Context) {
472 
473   const TypedValueRegion *CurrRegion = getConstructedRegion(Ctor, Context);
474   if (!CurrRegion)
475     return false;
476 
477   const LocationContext *LC = Context.getLocationContext();
478   while ((LC = LC->getParent())) {
479 
480     // If \p Ctor was called by another constructor.
481     const auto *OtherCtor = dyn_cast<CXXConstructorDecl>(LC->getDecl());
482     if (!OtherCtor)
483       continue;
484 
485     const TypedValueRegion *OtherRegion =
486         getConstructedRegion(OtherCtor, Context);
487     if (!OtherRegion)
488       continue;
489 
490     // If the CurrRegion is a subregion of OtherRegion, it will be analyzed
491     // during the analysis of OtherRegion.
492     if (CurrRegion->isSubRegionOf(OtherRegion))
493       return true;
494   }
495 
496   return false;
497 }
498 
499 static bool shouldIgnoreRecord(const RecordDecl *RD, StringRef Pattern) {
500   llvm::Regex R(Pattern);
501 
502   for (const FieldDecl *FD : RD->fields()) {
503     if (R.match(FD->getType().getAsString()))
504       return true;
505     if (R.match(FD->getName()))
506       return true;
507   }
508 
509   return false;
510 }
511 
512 static const Stmt *getMethodBody(const CXXMethodDecl *M) {
513   if (isa<CXXConstructorDecl>(M))
514     return nullptr;
515 
516   if (!M->isDefined())
517     return nullptr;
518 
519   return M->getDefinition()->getBody();
520 }
521 
522 static bool hasUnguardedAccess(const FieldDecl *FD, ProgramStateRef State) {
523 
524   if (FD->getAccess() == AccessSpecifier::AS_public)
525     return true;
526 
527   const auto *Parent = dyn_cast<CXXRecordDecl>(FD->getParent());
528 
529   if (!Parent)
530     return true;
531 
532   Parent = Parent->getDefinition();
533   assert(Parent && "The record's definition must be avaible if an uninitialized"
534                    " field of it was found!");
535 
536   ASTContext &AC = State->getStateManager().getContext();
537 
538   auto FieldAccessM = memberExpr(hasDeclaration(equalsNode(FD))).bind("access");
539 
540   auto AssertLikeM = callExpr(callee(functionDecl(
541       hasAnyName("exit", "panic", "error", "Assert", "assert", "ziperr",
542                  "assfail", "db_error", "__assert", "__assert2", "_wassert",
543                  "__assert_rtn", "__assert_fail", "dtrace_assfail",
544                  "yy_fatal_error", "_XCAssertionFailureHandler",
545                  "_DTAssertionFailureHandler", "_TSAssertionFailureHandler"))));
546 
547   auto NoReturnFuncM = callExpr(callee(functionDecl(isNoReturn())));
548 
549   auto GuardM =
550       stmt(anyOf(ifStmt(), switchStmt(), conditionalOperator(), AssertLikeM,
551             NoReturnFuncM))
552           .bind("guard");
553 
554   for (const CXXMethodDecl *M : Parent->methods()) {
555     const Stmt *MethodBody = getMethodBody(M);
556     if (!MethodBody)
557       continue;
558 
559     auto Accesses = match(stmt(hasDescendant(FieldAccessM)), *MethodBody, AC);
560     if (Accesses.empty())
561       continue;
562     const auto *FirstAccess = Accesses[0].getNodeAs<MemberExpr>("access");
563     assert(FirstAccess);
564 
565     auto Guards = match(stmt(hasDescendant(GuardM)), *MethodBody, AC);
566     if (Guards.empty())
567       return true;
568     const auto *FirstGuard = Guards[0].getNodeAs<Stmt>("guard");
569     assert(FirstGuard);
570 
571     if (FirstAccess->getBeginLoc() < FirstGuard->getBeginLoc())
572       return true;
573   }
574 
575   return false;
576 }
577 
578 std::string clang::ento::getVariableName(const FieldDecl *Field) {
579   // If Field is a captured lambda variable, Field->getName() will return with
580   // an empty string. We can however acquire it's name from the lambda's
581   // captures.
582   const auto *CXXParent = dyn_cast<CXXRecordDecl>(Field->getParent());
583 
584   if (CXXParent && CXXParent->isLambda()) {
585     assert(CXXParent->captures_begin());
586     auto It = CXXParent->captures_begin() + Field->getFieldIndex();
587 
588     if (It->capturesVariable())
589       return llvm::Twine("/*captured variable*/" +
590                          It->getCapturedVar()->getName())
591           .str();
592 
593     if (It->capturesThis())
594       return "/*'this' capture*/";
595 
596     llvm_unreachable("No other capture type is expected!");
597   }
598 
599   return std::string(Field->getName());
600 }
601 
602 void ento::registerUninitializedObjectChecker(CheckerManager &Mgr) {
603   auto Chk = Mgr.registerChecker<UninitializedObjectChecker>();
604 
605   const AnalyzerOptions &AnOpts = Mgr.getAnalyzerOptions();
606   UninitObjCheckerOptions &ChOpts = Chk->Opts;
607 
608   ChOpts.IsPedantic = AnOpts.getCheckerBooleanOption(Chk, "Pedantic");
609   ChOpts.ShouldConvertNotesToWarnings = AnOpts.getCheckerBooleanOption(
610       Chk, "NotesAsWarnings");
611   ChOpts.CheckPointeeInitialization = AnOpts.getCheckerBooleanOption(
612       Chk, "CheckPointeeInitialization");
613   ChOpts.IgnoredRecordsWithFieldPattern =
614       std::string(AnOpts.getCheckerStringOption(Chk, "IgnoreRecordsWithField"));
615   ChOpts.IgnoreGuardedFields =
616       AnOpts.getCheckerBooleanOption(Chk, "IgnoreGuardedFields");
617 
618   std::string ErrorMsg;
619   if (!llvm::Regex(ChOpts.IgnoredRecordsWithFieldPattern).isValid(ErrorMsg))
620     Mgr.reportInvalidCheckerOptionValue(Chk, "IgnoreRecordsWithField",
621         "a valid regex, building failed with error message "
622         "\"" + ErrorMsg + "\"");
623 }
624 
625 bool ento::shouldRegisterUninitializedObjectChecker(const CheckerManager &mgr) {
626   return true;
627 }
628