xref: /freebsd/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp (revision b7daab8be1d4555f23a297e60e4128c01caabf82)
1 //=======- PaddingChecker.cpp ------------------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines a checker that checks for padding that could be
10 //  removed by re-ordering members.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/CharUnits.h"
15 #include "clang/AST/DeclTemplate.h"
16 #include "clang/AST/DynamicRecursiveASTVisitor.h"
17 #include "clang/AST/RecordLayout.h"
18 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
19 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
20 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
21 #include "clang/StaticAnalyzer/Core/Checker.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25 
26 using namespace clang;
27 using namespace ento;
28 
29 namespace {
30 class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> {
31 private:
32   const BugType PaddingBug{this, "Excessive Padding", "Performance"};
33   mutable BugReporter *BR;
34 
35 public:
36   int64_t AllowedPad;
37 
38   void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR,
39                     BugReporter &BRArg) const {
40     BR = &BRArg;
41 
42     // The calls to checkAST* from AnalysisConsumer don't
43     // visit template instantiations or lambda classes. We
44     // want to visit those, so we make our own RecursiveASTVisitor.
45     struct LocalVisitor : DynamicRecursiveASTVisitor {
46       const PaddingChecker *Checker;
47       explicit LocalVisitor(const PaddingChecker *Checker) : Checker(Checker) {
48         ShouldVisitTemplateInstantiations = true;
49         ShouldVisitImplicitCode = true;
50       }
51       bool VisitRecordDecl(RecordDecl *RD) override {
52         Checker->visitRecord(RD);
53         return true;
54       }
55       bool VisitVarDecl(VarDecl *VD) override {
56         Checker->visitVariable(VD);
57         return true;
58       }
59       // TODO: Visit array new and mallocs for arrays.
60     };
61 
62     LocalVisitor visitor(this);
63     visitor.TraverseDecl(const_cast<TranslationUnitDecl *>(TUD));
64   }
65 
66   /// Look for records of overly padded types. If padding *
67   /// PadMultiplier exceeds AllowedPad, then generate a report.
68   /// PadMultiplier is used to share code with the array padding
69   /// checker.
70   void visitRecord(const RecordDecl *RD, uint64_t PadMultiplier = 1) const {
71     if (shouldSkipDecl(RD))
72       return;
73 
74     // TODO: Figure out why we are going through declarations and not only
75     // definitions.
76     if (!(RD = RD->getDefinition()))
77       return;
78 
79     // This is the simplest correct case: a class with no fields and one base
80     // class. Other cases are more complicated because of how the base classes
81     // & fields might interact, so we don't bother dealing with them.
82     // TODO: Support other combinations of base classes and fields.
83     if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
84       if (CXXRD->field_empty() && CXXRD->getNumBases() == 1)
85         return visitRecord(CXXRD->bases().begin()->getType()->getAsRecordDecl(),
86                            PadMultiplier);
87 
88     auto &ASTContext = RD->getASTContext();
89     const ASTRecordLayout &RL = ASTContext.getASTRecordLayout(RD);
90     assert(llvm::isPowerOf2_64(RL.getAlignment().getQuantity()));
91 
92     CharUnits BaselinePad = calculateBaselinePad(RD, ASTContext, RL);
93     if (BaselinePad.isZero())
94       return;
95 
96     CharUnits OptimalPad;
97     SmallVector<const FieldDecl *, 20> OptimalFieldsOrder;
98     std::tie(OptimalPad, OptimalFieldsOrder) =
99         calculateOptimalPad(RD, ASTContext, RL);
100 
101     CharUnits DiffPad = PadMultiplier * (BaselinePad - OptimalPad);
102     if (DiffPad.getQuantity() <= AllowedPad) {
103       assert(!DiffPad.isNegative() && "DiffPad should not be negative");
104       // There is not enough excess padding to trigger a warning.
105       return;
106     }
107     reportRecord(RD, BaselinePad, OptimalPad, OptimalFieldsOrder);
108   }
109 
110   /// Look for arrays of overly padded types. If the padding of the
111   /// array type exceeds AllowedPad, then generate a report.
112   void visitVariable(const VarDecl *VD) const {
113     const ArrayType *ArrTy = VD->getType()->getAsArrayTypeUnsafe();
114     if (ArrTy == nullptr)
115       return;
116     uint64_t Elts = 0;
117     if (const ConstantArrayType *CArrTy = dyn_cast<ConstantArrayType>(ArrTy))
118       Elts = CArrTy->getZExtSize();
119     if (Elts == 0)
120       return;
121     const RecordType *RT = ArrTy->getElementType()->getAs<RecordType>();
122     if (RT == nullptr)
123       return;
124 
125     // TODO: Recurse into the fields to see if they have excess padding.
126     visitRecord(RT->getDecl(), Elts);
127   }
128 
129   bool shouldSkipDecl(const RecordDecl *RD) const {
130     // TODO: Figure out why we are going through declarations and not only
131     // definitions.
132     if (!(RD = RD->getDefinition()))
133       return true;
134     auto Location = RD->getLocation();
135     // If the construct doesn't have a source file, then it's not something
136     // we want to diagnose.
137     if (!Location.isValid())
138       return true;
139     SrcMgr::CharacteristicKind Kind =
140         BR->getSourceManager().getFileCharacteristic(Location);
141     // Throw out all records that come from system headers.
142     if (Kind != SrcMgr::C_User)
143       return true;
144 
145     // Not going to attempt to optimize unions.
146     if (RD->isUnion())
147       return true;
148     if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
149       // Tail padding with base classes ends up being very complicated.
150       // We will skip objects with base classes for now, unless they do not
151       // have fields.
152       // TODO: Handle more base class scenarios.
153       if (!CXXRD->field_empty() && CXXRD->getNumBases() != 0)
154         return true;
155       if (CXXRD->field_empty() && CXXRD->getNumBases() != 1)
156         return true;
157       // Virtual bases are complicated, skipping those for now.
158       if (CXXRD->getNumVBases() != 0)
159         return true;
160       // Can't layout a template, so skip it. We do still layout the
161       // instantiations though.
162       if (CXXRD->getTypeForDecl()->isDependentType())
163         return true;
164       if (CXXRD->getTypeForDecl()->isInstantiationDependentType())
165         return true;
166     }
167     // How do you reorder fields if you haven't got any?
168     else if (RD->field_empty())
169       return true;
170 
171     auto IsTrickyField = [](const FieldDecl *FD) -> bool {
172       // Bitfield layout is hard.
173       if (FD->isBitField())
174         return true;
175 
176       // Variable length arrays are tricky too.
177       QualType Ty = FD->getType();
178       if (Ty->isIncompleteArrayType())
179         return true;
180       return false;
181     };
182 
183     if (llvm::any_of(RD->fields(), IsTrickyField))
184       return true;
185     return false;
186   }
187 
188   static CharUnits calculateBaselinePad(const RecordDecl *RD,
189                                         const ASTContext &ASTContext,
190                                         const ASTRecordLayout &RL) {
191     CharUnits PaddingSum;
192     CharUnits Offset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
193     for (const FieldDecl *FD : RD->fields()) {
194       // Skip field that is a subobject of zero size, marked with
195       // [[no_unique_address]] or an empty bitfield, because its address can be
196       // set the same as the other fields addresses.
197       if (FD->isZeroSize(ASTContext))
198         continue;
199       // This checker only cares about the padded size of the
200       // field, and not the data size. If the field is a record
201       // with tail padding, then we won't put that number in our
202       // total because reordering fields won't fix that problem.
203       CharUnits FieldSize = ASTContext.getTypeSizeInChars(FD->getType());
204       auto FieldOffsetBits = RL.getFieldOffset(FD->getFieldIndex());
205       CharUnits FieldOffset = ASTContext.toCharUnitsFromBits(FieldOffsetBits);
206       PaddingSum += (FieldOffset - Offset);
207       Offset = FieldOffset + FieldSize;
208     }
209     PaddingSum += RL.getSize() - Offset;
210     return PaddingSum;
211   }
212 
213   /// Optimal padding overview:
214   /// 1.  Find a close approximation to where we can place our first field.
215   ///     This will usually be at offset 0.
216   /// 2.  Try to find the best field that can legally be placed at the current
217   ///     offset.
218   ///   a.  "Best" is the largest alignment that is legal, but smallest size.
219   ///       This is to account for overly aligned types.
220   /// 3.  If no fields can fit, pad by rounding the current offset up to the
221   ///     smallest alignment requirement of our fields. Measure and track the
222   //      amount of padding added. Go back to 2.
223   /// 4.  Increment the current offset by the size of the chosen field.
224   /// 5.  Remove the chosen field from the set of future possibilities.
225   /// 6.  Go back to 2 if there are still unplaced fields.
226   /// 7.  Add tail padding by rounding the current offset up to the structure
227   ///     alignment. Track the amount of padding added.
228 
229   static std::pair<CharUnits, SmallVector<const FieldDecl *, 20>>
230   calculateOptimalPad(const RecordDecl *RD, const ASTContext &ASTContext,
231                       const ASTRecordLayout &RL) {
232     struct FieldInfo {
233       CharUnits Align;
234       CharUnits Size;
235       const FieldDecl *Field;
236       bool operator<(const FieldInfo &RHS) const {
237         // Order from small alignments to large alignments,
238         // then large sizes to small sizes.
239         // then large field indices to small field indices
240         return std::make_tuple(Align, -Size,
241                                Field ? -static_cast<int>(Field->getFieldIndex())
242                                      : 0) <
243                std::make_tuple(
244                    RHS.Align, -RHS.Size,
245                    RHS.Field ? -static_cast<int>(RHS.Field->getFieldIndex())
246                              : 0);
247       }
248     };
249     SmallVector<FieldInfo, 20> Fields;
250     auto GatherSizesAndAlignments = [](const FieldDecl *FD) {
251       FieldInfo RetVal;
252       RetVal.Field = FD;
253       auto &Ctx = FD->getASTContext();
254       auto Info = Ctx.getTypeInfoInChars(FD->getType());
255       RetVal.Size = FD->isZeroSize(Ctx) ? CharUnits::Zero() : Info.Width;
256       RetVal.Align = Info.Align;
257       assert(llvm::isPowerOf2_64(RetVal.Align.getQuantity()));
258       if (auto Max = FD->getMaxAlignment())
259         RetVal.Align = std::max(Ctx.toCharUnitsFromBits(Max), RetVal.Align);
260       return RetVal;
261     };
262     std::transform(RD->field_begin(), RD->field_end(),
263                    std::back_inserter(Fields), GatherSizesAndAlignments);
264     llvm::sort(Fields);
265     // This lets us skip over vptrs and non-virtual bases,
266     // so that we can just worry about the fields in our object.
267     // Note that this does cause us to miss some cases where we
268     // could pack more bytes in to a base class's tail padding.
269     CharUnits NewOffset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
270     CharUnits NewPad;
271     SmallVector<const FieldDecl *, 20> OptimalFieldsOrder;
272     while (!Fields.empty()) {
273       unsigned TrailingZeros =
274           llvm::countr_zero((unsigned long long)NewOffset.getQuantity());
275       // If NewOffset is zero, then countTrailingZeros will be 64. Shifting
276       // 64 will overflow our unsigned long long. Shifting 63 will turn
277       // our long long (and CharUnits internal type) negative. So shift 62.
278       long long CurAlignmentBits = 1ull << (std::min)(TrailingZeros, 62u);
279       CharUnits CurAlignment = CharUnits::fromQuantity(CurAlignmentBits);
280       FieldInfo InsertPoint = {CurAlignment, CharUnits::Zero(), nullptr};
281 
282       // In the typical case, this will find the last element
283       // of the vector. We won't find a middle element unless
284       // we started on a poorly aligned address or have an overly
285       // aligned field.
286       auto Iter = llvm::upper_bound(Fields, InsertPoint);
287       if (Iter != Fields.begin()) {
288         // We found a field that we can layout with the current alignment.
289         --Iter;
290         NewOffset += Iter->Size;
291         OptimalFieldsOrder.push_back(Iter->Field);
292         Fields.erase(Iter);
293       } else {
294         // We are poorly aligned, and we need to pad in order to layout another
295         // field. Round up to at least the smallest field alignment that we
296         // currently have.
297         CharUnits NextOffset = NewOffset.alignTo(Fields[0].Align);
298         NewPad += NextOffset - NewOffset;
299         NewOffset = NextOffset;
300       }
301     }
302     // Calculate tail padding.
303     CharUnits NewSize = NewOffset.alignTo(RL.getAlignment());
304     NewPad += NewSize - NewOffset;
305     return {NewPad, std::move(OptimalFieldsOrder)};
306   }
307 
308   void reportRecord(
309       const RecordDecl *RD, CharUnits BaselinePad, CharUnits OptimalPad,
310       const SmallVector<const FieldDecl *, 20> &OptimalFieldsOrder) const {
311     SmallString<100> Buf;
312     llvm::raw_svector_ostream Os(Buf);
313     Os << "Excessive padding in '";
314     Os << QualType::getAsString(RD->getTypeForDecl(), Qualifiers(),
315                                 LangOptions())
316        << "'";
317 
318     if (auto *TSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
319       // TODO: make this show up better in the console output and in
320       // the HTML. Maybe just make it show up in HTML like the path
321       // diagnostics show.
322       SourceLocation ILoc = TSD->getPointOfInstantiation();
323       if (ILoc.isValid())
324         Os << " instantiated here: "
325            << ILoc.printToString(BR->getSourceManager());
326     }
327 
328     Os << " (" << BaselinePad.getQuantity() << " padding bytes, where "
329        << OptimalPad.getQuantity() << " is optimal). "
330        << "Optimal fields order: ";
331     for (const auto *FD : OptimalFieldsOrder)
332       Os << FD->getName() << ", ";
333     Os << "consider reordering the fields or adding explicit padding "
334           "members.";
335 
336     PathDiagnosticLocation CELoc =
337         PathDiagnosticLocation::create(RD, BR->getSourceManager());
338     auto Report = std::make_unique<BasicBugReport>(PaddingBug, Os.str(), CELoc);
339     Report->setDeclWithIssue(RD);
340     Report->addRange(RD->getSourceRange());
341     BR->emitReport(std::move(Report));
342   }
343 };
344 } // namespace
345 
346 void ento::registerPaddingChecker(CheckerManager &Mgr) {
347   auto *Checker = Mgr.registerChecker<PaddingChecker>();
348   Checker->AllowedPad = Mgr.getAnalyzerOptions()
349           .getCheckerIntegerOption(Checker, "AllowedPad");
350   if (Checker->AllowedPad < 0)
351     Mgr.reportInvalidCheckerOptionValue(
352         Checker, "AllowedPad", "a non-negative value");
353 }
354 
355 bool ento::shouldRegisterPaddingChecker(const CheckerManager &mgr) {
356   return true;
357 }
358