//===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "llvm/ADT/SmallVector.h" #include #include using namespace llvm; using namespace clang; using namespace ast_matchers; namespace clang::ast_matchers { // A `RecursiveASTVisitor` that traverses all descendants of a given node "n" // except for those belonging to a different callable of "n". class MatchDescendantVisitor : public RecursiveASTVisitor { public: typedef RecursiveASTVisitor VisitorBase; // Creates an AST visitor that matches `Matcher` on all // descendants of a given node "n" except for the ones // belonging to a different callable of "n". MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher, internal::ASTMatchFinder *Finder, internal::BoundNodesTreeBuilder *Builder, internal::ASTMatchFinder::BindKind Bind) : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind), Matches(false) {} // Returns true if a match is found in a subtree of `DynNode`, which belongs // to the same callable of `DynNode`. bool findMatch(const DynTypedNode &DynNode) { Matches = false; if (const Stmt *StmtNode = DynNode.get()) { TraverseStmt(const_cast(StmtNode)); *Builder = ResultBindings; return Matches; } return false; } // The following are overriding methods from the base visitor class. // They are public only to allow CRTP to work. They are *not *part // of the public API of this class. // For the matchers so far used in safe buffers, we only need to match // `Stmt`s. To override more as needed. bool TraverseDecl(Decl *Node) { if (!Node) return true; if (!match(*Node)) return false; // To skip callables: if (isa(Node)) return true; // Traverse descendants return VisitorBase::TraverseDecl(Node); } bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) { if (!Node) return true; if (!match(*Node)) return false; // To skip callables: if (isa(Node)) return true; return VisitorBase::TraverseStmt(Node); } bool shouldVisitTemplateInstantiations() const { return true; } bool shouldVisitImplicitCode() const { // TODO: let's ignore implicit code for now return false; } private: // Sets 'Matched' to true if 'Matcher' matches 'Node' // // Returns 'true' if traversal should continue after this function // returns, i.e. if no match is found or 'Bind' is 'BK_All'. template bool match(const T &Node) { internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder); if (Matcher->matches(DynTypedNode::create(Node), Finder, &RecursiveBuilder)) { ResultBindings.addMatch(RecursiveBuilder); Matches = true; if (Bind != internal::ASTMatchFinder::BK_All) return false; // Abort as soon as a match is found. } return true; } const internal::DynTypedMatcher *const Matcher; internal::ASTMatchFinder *const Finder; internal::BoundNodesTreeBuilder *const Builder; internal::BoundNodesTreeBuilder ResultBindings; const internal::ASTMatchFinder::BindKind Bind; bool Matches; }; AST_MATCHER_P(Stmt, forEveryDescendant, internal::Matcher, innerMatcher) { const DynTypedMatcher &DTM = static_cast(innerMatcher); MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All); return Visitor.findMatch(DynTypedNode::create(Node)); } } // namespace clang::ast_matchers namespace { // Because the analysis revolves around variables and their types, we'll need to // track uses of variables (aka DeclRefExprs). using DeclUseList = SmallVector; // Convenience typedef. using FixItList = SmallVector; // Defined below. class Strategy; } // namespace // Because we're dealing with raw pointers, let's define what we mean by that. static auto hasPointerType() { return hasType(hasCanonicalType(pointerType())); } static auto hasArrayType() { return hasType(hasCanonicalType(arrayType())); } namespace { /// Gadget is an individual operation in the code that may be of interest to /// this analysis. Each (non-abstract) subclass corresponds to a specific /// rigid AST structure that constitutes an operation on a pointer-type object. /// Discovery of a gadget in the code corresponds to claiming that we understand /// what this part of code is doing well enough to potentially improve it. /// Gadgets can be warning (immediately deserving a warning) or fixable (not /// always deserving a warning per se, but requires our attention to identify /// it warrants a fixit). class Gadget { public: enum class Kind { #define GADGET(x) x, #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" }; /// Common type of ASTMatchers used for discovering gadgets. /// Useful for implementing the static matcher() methods /// that are expected from all non-abstract subclasses. using Matcher = decltype(stmt()); Gadget(Kind K) : K(K) {} Kind getKind() const { return K; } virtual bool isWarningGadget() const = 0; virtual const Stmt *getBaseStmt() const = 0; /// Returns the list of pointer-type variables on which this gadget performs /// its operation. Typically, there's only one variable. This isn't a list /// of all DeclRefExprs in the gadget's AST! virtual DeclUseList getClaimedVarUseSites() const = 0; virtual ~Gadget() = default; private: Kind K; }; /// Warning gadgets correspond to unsafe code patterns that warrants /// an immediate warning. class WarningGadget : public Gadget { public: WarningGadget(Kind K) : Gadget(K) {} static bool classof(const Gadget *G) { return G->isWarningGadget(); } bool isWarningGadget() const final { return true; } }; /// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be /// properly recognized in order to emit fixes. For example, if a raw pointer-type /// variable is replaced by a safe C++ container, every use of such variable must be /// carefully considered and possibly updated. class FixableGadget : public Gadget { public: FixableGadget(Kind K) : Gadget(K) {} static bool classof(const Gadget *G) { return !G->isWarningGadget(); } bool isWarningGadget() const final { return false; } /// Returns a fixit that would fix the current gadget according to /// the current strategy. Returns None if the fix cannot be produced; /// returns an empty list if no fixes are necessary. virtual std::optional getFixits(const Strategy &) const { return std::nullopt; } }; using FixableGadgetList = std::vector>; using WarningGadgetList = std::vector>; /// An increment of a pointer-type value is unsafe as it may run the pointer /// out of bounds. class IncrementGadget : public WarningGadget { static constexpr const char *const OpTag = "op"; const UnaryOperator *Op; public: IncrementGadget(const MatchFinder::MatchResult &Result) : WarningGadget(Kind::Increment), Op(Result.Nodes.getNodeAs(OpTag)) {} static bool classof(const Gadget *G) { return G->getKind() == Kind::Increment; } static Matcher matcher() { return stmt(unaryOperator( hasOperatorName("++"), hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) ).bind(OpTag)); } const UnaryOperator *getBaseStmt() const override { return Op; } DeclUseList getClaimedVarUseSites() const override { SmallVector Uses; if (const auto *DRE = dyn_cast(Op->getSubExpr()->IgnoreParenImpCasts())) { Uses.push_back(DRE); } return std::move(Uses); } }; /// A decrement of a pointer-type value is unsafe as it may run the pointer /// out of bounds. class DecrementGadget : public WarningGadget { static constexpr const char *const OpTag = "op"; const UnaryOperator *Op; public: DecrementGadget(const MatchFinder::MatchResult &Result) : WarningGadget(Kind::Decrement), Op(Result.Nodes.getNodeAs(OpTag)) {} static bool classof(const Gadget *G) { return G->getKind() == Kind::Decrement; } static Matcher matcher() { return stmt(unaryOperator( hasOperatorName("--"), hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) ).bind(OpTag)); } const UnaryOperator *getBaseStmt() const override { return Op; } DeclUseList getClaimedVarUseSites() const override { if (const auto *DRE = dyn_cast(Op->getSubExpr()->IgnoreParenImpCasts())) { return {DRE}; } return {}; } }; /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as /// it doesn't have any bounds checks for the array. class ArraySubscriptGadget : public WarningGadget { static constexpr const char *const ArraySubscrTag = "arraySubscr"; const ArraySubscriptExpr *ASE; public: ArraySubscriptGadget(const MatchFinder::MatchResult &Result) : WarningGadget(Kind::ArraySubscript), ASE(Result.Nodes.getNodeAs(ArraySubscrTag)) {} static bool classof(const Gadget *G) { return G->getKind() == Kind::ArraySubscript; } static Matcher matcher() { // FIXME: What if the index is integer literal 0? Should this be // a safe gadget in this case? // clang-format off return stmt(arraySubscriptExpr( hasBase(ignoringParenImpCasts( anyOf(hasPointerType(), hasArrayType()))), unless(hasIndex(integerLiteral(equals(0))))) .bind(ArraySubscrTag)); // clang-format on } const ArraySubscriptExpr *getBaseStmt() const override { return ASE; } DeclUseList getClaimedVarUseSites() const override { if (const auto *DRE = dyn_cast(ASE->getBase()->IgnoreParenImpCasts())) { return {DRE}; } return {}; } }; /// A pointer arithmetic expression of one of the forms: /// \code /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n /// \endcode class PointerArithmeticGadget : public WarningGadget { static constexpr const char *const PointerArithmeticTag = "ptrAdd"; static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr"; const BinaryOperator *PA; // pointer arithmetic expression const Expr * Ptr; // the pointer expression in `PA` public: PointerArithmeticGadget(const MatchFinder::MatchResult &Result) : WarningGadget(Kind::PointerArithmetic), PA(Result.Nodes.getNodeAs(PointerArithmeticTag)), Ptr(Result.Nodes.getNodeAs(PointerArithmeticPointerTag)) {} static bool classof(const Gadget *G) { return G->getKind() == Kind::PointerArithmetic; } static Matcher matcher() { auto HasIntegerType = anyOf( hasType(isInteger()), hasType(enumType())); auto PtrAtRight = allOf(hasOperatorName("+"), hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), hasLHS(HasIntegerType)); auto PtrAtLeft = allOf( anyOf(hasOperatorName("+"), hasOperatorName("-"), hasOperatorName("+="), hasOperatorName("-=")), hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), hasRHS(HasIntegerType)); return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight)).bind(PointerArithmeticTag)); } const Stmt *getBaseStmt() const override { return PA; } DeclUseList getClaimedVarUseSites() const override { if (const auto *DRE = dyn_cast(Ptr->IgnoreParenImpCasts())) { return {DRE}; } return {}; } // FIXME: pointer adding zero should be fine //FIXME: this gadge will need a fix-it }; } // namespace namespace { // An auxiliary tracking facility for the fixit analysis. It helps connect // declarations to its and make sure we've covered all uses with our analysis // before we try to fix the declaration. class DeclUseTracker { using UseSetTy = SmallSet; using DefMapTy = DenseMap; // Allocate on the heap for easier move. std::unique_ptr Uses{std::make_unique()}; DefMapTy Defs{}; public: DeclUseTracker() = default; DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies. DeclUseTracker(DeclUseTracker &&) = default; DeclUseTracker &operator=(DeclUseTracker &&) = default; // Start tracking a freshly discovered DRE. void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); } // Stop tracking the DRE as it's been fully figured out. void claimUse(const DeclRefExpr *DRE) { assert(Uses->count(DRE) && "DRE not found or claimed by multiple matchers!"); Uses->erase(DRE); } // A variable is unclaimed if at least one use is unclaimed. bool hasUnclaimedUses(const VarDecl *VD) const { // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs? return any_of(*Uses, [VD](const DeclRefExpr *DRE) { return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl(); }); } void discoverDecl(const DeclStmt *DS) { for (const Decl *D : DS->decls()) { if (const auto *VD = dyn_cast(D)) { // FIXME: Assertion temporarily disabled due to a bug in // ASTMatcher internal behavior in presence of GNU // statement-expressions. We need to properly investigate this // because it can screw up our algorithm in other ways. // assert(Defs.count(VD) == 0 && "Definition already discovered!"); Defs[VD] = DS; } } } const DeclStmt *lookupDecl(const VarDecl *VD) const { auto It = Defs.find(VD); assert(It != Defs.end() && "Definition never discovered!"); return It->second; } }; } // namespace namespace { // Strategy is a map from variables to the way we plan to emit fixes for // these variables. It is figured out gradually by trying different fixes // for different variables depending on gadgets in which these variables // participate. class Strategy { public: enum class Kind { Wontfix, // We don't plan to emit a fixit for this variable. Span, // We recommend replacing the variable with std::span. Iterator, // We recommend replacing the variable with std::span::iterator. Array, // We recommend replacing the variable with std::array. Vector // We recommend replacing the variable with std::vector. }; private: using MapTy = llvm::DenseMap; MapTy Map; public: Strategy() = default; Strategy(const Strategy &) = delete; // Let's avoid copies. Strategy(Strategy &&) = default; void set(const VarDecl *VD, Kind K) { Map[VD] = K; } Kind lookup(const VarDecl *VD) const { auto I = Map.find(VD); if (I == Map.end()) return Kind::Wontfix; return I->second; } }; } // namespace /// Scan the function and return a list of gadgets found with provided kits. static std::tuple findGadgets(const Decl *D) { struct GadgetFinderCallback : MatchFinder::MatchCallback { FixableGadgetList FixableGadgets; WarningGadgetList WarningGadgets; DeclUseTracker Tracker; void run(const MatchFinder::MatchResult &Result) override { // In debug mode, assert that we've found exactly one gadget. // This helps us avoid conflicts in .bind() tags. #if NDEBUG #define NEXT return #else [[maybe_unused]] int numFound = 0; #define NEXT ++numFound #endif if (const auto *DRE = Result.Nodes.getNodeAs("any_dre")) { Tracker.discoverUse(DRE); NEXT; } if (const auto *DS = Result.Nodes.getNodeAs("any_ds")) { Tracker.discoverDecl(DS); NEXT; } // Figure out which matcher we've found, and call the appropriate // subclass constructor. // FIXME: Can we do this more logarithmically? #define FIXABLE_GADGET(name) \ if (Result.Nodes.getNodeAs(#name)) { \ FixableGadgets.push_back(std::make_unique(Result)); \ NEXT; \ } #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" #define WARNING_GADGET(name) \ if (Result.Nodes.getNodeAs(#name)) { \ WarningGadgets.push_back(std::make_unique(Result)); \ NEXT; \ } #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" assert(numFound >= 1 && "Gadgets not found in match result!"); assert(numFound <= 1 && "Conflicting bind tags in gadgets!"); } }; MatchFinder M; GadgetFinderCallback CB; // clang-format off M.addMatcher( stmt(forEveryDescendant( stmt(anyOf( // Add Gadget::matcher() for every gadget in the registry. #define GADGET(x) \ x ## Gadget::matcher().bind(#x), #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" // In parallel, match all DeclRefExprs so that to find out // whether there are any uncovered by gadgets. declRefExpr(anyOf(hasPointerType(), hasArrayType()), to(varDecl())).bind("any_dre"), // Also match DeclStmts because we'll need them when fixing // their underlying VarDecls that otherwise don't have // any backreferences to DeclStmts. declStmt().bind("any_ds") )) // FIXME: Idiomatically there should be a forCallable(equalsNode(D)) // here, to make sure that the statement actually belongs to the // function and not to a nested function. However, forCallable uses // ParentMap which can't be used before the AST is fully constructed. // The original problem doesn't sound like it needs ParentMap though, // maybe there's a more direct solution? )), &CB ); // clang-format on M.match(*D->getBody(), D->getASTContext()); // Gadgets "claim" variables they're responsible for. Once this loop finishes, // the tracker will only track DREs that weren't claimed by any gadgets, // i.e. not understood by the analysis. for (const auto &G : CB.FixableGadgets) { for (const auto *DRE : G->getClaimedVarUseSites()) { CB.Tracker.claimUse(DRE); } } return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets), std::move(CB.Tracker)}; } struct WarningGadgetSets { std::map>> byVar; // These Gadgets are not related to pointer variables (e. g. temporaries). llvm::SmallVector, 16> noVar; }; static WarningGadgetSets groupWarningGadgetsByVar(WarningGadgetList &&AllUnsafeOperations) { WarningGadgetSets result; // If some gadgets cover more than one // variable, they'll appear more than once in the map. for (auto &G : AllUnsafeOperations) { DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites(); bool AssociatedWithVarDecl = false; for (const DeclRefExpr *DRE : ClaimedVarUseSites) { if (const auto *VD = dyn_cast(DRE->getDecl())) { result.byVar[VD].emplace(std::move(G)); AssociatedWithVarDecl = true; } } if (!AssociatedWithVarDecl) { result.noVar.emplace_back(std::move(G)); continue; } } return result; } struct FixableGadgetSets { std::map>> byVar; }; static FixableGadgetSets groupFixablesByVar(FixableGadgetList &&AllFixableOperations) { FixableGadgetSets FixablesForUnsafeVars; for (auto &F : AllFixableOperations) { DeclUseList DREs = F->getClaimedVarUseSites(); for (const DeclRefExpr *DRE : DREs) { if (const auto *VD = dyn_cast(DRE->getDecl())) { FixablesForUnsafeVars.byVar[VD].emplace(std::move(F)); } } } return FixablesForUnsafeVars; } static std::map getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S) { std::map FixItsForVariable; for (const auto &[VD, Fixables] : FixablesForUnsafeVars.byVar) { // TODO fixVariable - fixit for the variable itself bool ImpossibleToFix = false; llvm::SmallVector FixItsForVD; for (const auto &F : Fixables) { llvm::Optional Fixits = F->getFixits(S); if (!Fixits) { ImpossibleToFix = true; break; } else { const FixItList CorrectFixes = Fixits.value(); FixItsForVD.insert(FixItsForVD.end(), CorrectFixes.begin(), CorrectFixes.end()); } } if (ImpossibleToFix) FixItsForVariable.erase(VD); else FixItsForVariable[VD].insert(FixItsForVariable[VD].end(), FixItsForVD.begin(), FixItsForVD.end()); } return FixItsForVariable; } static Strategy getNaiveStrategy(const llvm::SmallVectorImpl &UnsafeVars) { Strategy S; for (const VarDecl *VD : UnsafeVars) { S.set(VD, Strategy::Kind::Span); } return S; } void clang::checkUnsafeBufferUsage(const Decl *D, UnsafeBufferUsageHandler &Handler) { assert(D && D->getBody()); WarningGadgetSets UnsafeOps; FixableGadgetSets FixablesForUnsafeVars; DeclUseTracker Tracker; { auto [FixableGadgets, WarningGadgets, TrackerRes] = findGadgets(D); UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets)); FixablesForUnsafeVars = groupFixablesByVar(std::move(FixableGadgets)); Tracker = std::move(TrackerRes); } // Filter out non-local vars and vars with unclaimed DeclRefExpr-s. for (auto it = FixablesForUnsafeVars.byVar.cbegin(); it != FixablesForUnsafeVars.byVar.cend();) { // FIXME: Support ParmVarDecl as well. if (!it->first->isLocalVarDecl() || Tracker.hasUnclaimedUses(it->first)) { it = FixablesForUnsafeVars.byVar.erase(it); } else { ++it; } } llvm::SmallVector UnsafeVars; for (const auto &[VD, ignore] : FixablesForUnsafeVars.byVar) UnsafeVars.push_back(VD); Strategy NaiveStrategy = getNaiveStrategy(UnsafeVars); std::map FixItsForVariable = getFixIts(FixablesForUnsafeVars, NaiveStrategy); // FIXME Detect overlapping FixIts. for (const auto &G : UnsafeOps.noVar) { Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false); } for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) { auto FixItsIt = FixItsForVariable.find(VD); Handler.handleFixableVariable(VD, FixItsIt != FixItsForVariable.end() ? std::move(FixItsIt->second) : FixItList{}); for (const auto &G : WarningGadgets) { Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true); } } }