1 //===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/Expr.h" 12 #include "clang/AST/RecursiveASTVisitor.h" 13 #include "clang/AST/StmtVisitor.h" 14 #include "clang/ASTMatchers/ASTMatchFinder.h" 15 #include "clang/Lex/Lexer.h" 16 #include "clang/Lex/Preprocessor.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include <memory> 19 #include <optional> 20 #include <sstream> 21 #include <queue> 22 23 using namespace llvm; 24 using namespace clang; 25 using namespace ast_matchers; 26 27 #ifndef NDEBUG 28 namespace { 29 class StmtDebugPrinter 30 : public ConstStmtVisitor<StmtDebugPrinter, std::string> { 31 public: 32 std::string VisitStmt(const Stmt *S) { return S->getStmtClassName(); } 33 34 std::string VisitBinaryOperator(const BinaryOperator *BO) { 35 return "BinaryOperator(" + BO->getOpcodeStr().str() + ")"; 36 } 37 38 std::string VisitUnaryOperator(const UnaryOperator *UO) { 39 return "UnaryOperator(" + UO->getOpcodeStr(UO->getOpcode()).str() + ")"; 40 } 41 42 std::string VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { 43 return "ImplicitCastExpr(" + std::string(ICE->getCastKindName()) + ")"; 44 } 45 }; 46 47 // Returns a string of ancestor `Stmt`s of the given `DRE` in such a form: 48 // "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...". 49 static std::string getDREAncestorString(const DeclRefExpr *DRE, 50 ASTContext &Ctx) { 51 std::stringstream SS; 52 const Stmt *St = DRE; 53 StmtDebugPrinter StmtPriner; 54 55 do { 56 SS << StmtPriner.Visit(St); 57 58 DynTypedNodeList StParents = Ctx.getParents(*St); 59 60 if (StParents.size() > 1) 61 return "unavailable due to multiple parents"; 62 if (StParents.size() == 0) 63 break; 64 St = StParents.begin()->get<Stmt>(); 65 if (St) 66 SS << " ==> "; 67 } while (St); 68 return SS.str(); 69 } 70 } // namespace 71 #endif /* NDEBUG */ 72 73 namespace clang::ast_matchers { 74 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n" 75 // except for those belonging to a different callable of "n". 76 class MatchDescendantVisitor 77 : public RecursiveASTVisitor<MatchDescendantVisitor> { 78 public: 79 typedef RecursiveASTVisitor<MatchDescendantVisitor> VisitorBase; 80 81 // Creates an AST visitor that matches `Matcher` on all 82 // descendants of a given node "n" except for the ones 83 // belonging to a different callable of "n". 84 MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher, 85 internal::ASTMatchFinder *Finder, 86 internal::BoundNodesTreeBuilder *Builder, 87 internal::ASTMatchFinder::BindKind Bind, 88 const bool ignoreUnevaluatedContext) 89 : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind), 90 Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext) {} 91 92 // Returns true if a match is found in a subtree of `DynNode`, which belongs 93 // to the same callable of `DynNode`. 94 bool findMatch(const DynTypedNode &DynNode) { 95 Matches = false; 96 if (const Stmt *StmtNode = DynNode.get<Stmt>()) { 97 TraverseStmt(const_cast<Stmt *>(StmtNode)); 98 *Builder = ResultBindings; 99 return Matches; 100 } 101 return false; 102 } 103 104 // The following are overriding methods from the base visitor class. 105 // They are public only to allow CRTP to work. They are *not *part 106 // of the public API of this class. 107 108 // For the matchers so far used in safe buffers, we only need to match 109 // `Stmt`s. To override more as needed. 110 111 bool TraverseDecl(Decl *Node) { 112 if (!Node) 113 return true; 114 if (!match(*Node)) 115 return false; 116 // To skip callables: 117 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Node)) 118 return true; 119 // Traverse descendants 120 return VisitorBase::TraverseDecl(Node); 121 } 122 123 bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) { 124 // These are unevaluated, except the result expression. 125 if(ignoreUnevaluatedContext) 126 return TraverseStmt(Node->getResultExpr()); 127 return VisitorBase::TraverseGenericSelectionExpr(Node); 128 } 129 130 bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) { 131 // Unevaluated context. 132 if(ignoreUnevaluatedContext) 133 return true; 134 return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node); 135 } 136 137 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) { 138 // Unevaluated context. 139 if(ignoreUnevaluatedContext) 140 return true; 141 return VisitorBase::TraverseTypeOfExprTypeLoc(Node); 142 } 143 144 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) { 145 // Unevaluated context. 146 if(ignoreUnevaluatedContext) 147 return true; 148 return VisitorBase::TraverseDecltypeTypeLoc(Node); 149 } 150 151 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) { 152 // Unevaluated context. 153 if(ignoreUnevaluatedContext) 154 return true; 155 return VisitorBase::TraverseCXXNoexceptExpr(Node); 156 } 157 158 bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) { 159 // Unevaluated context. 160 if(ignoreUnevaluatedContext) 161 return true; 162 return VisitorBase::TraverseCXXTypeidExpr(Node); 163 } 164 165 bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) { 166 if (!Node) 167 return true; 168 if (!match(*Node)) 169 return false; 170 return VisitorBase::TraverseStmt(Node); 171 } 172 173 bool shouldVisitTemplateInstantiations() const { return true; } 174 bool shouldVisitImplicitCode() const { 175 // TODO: let's ignore implicit code for now 176 return false; 177 } 178 179 private: 180 // Sets 'Matched' to true if 'Matcher' matches 'Node' 181 // 182 // Returns 'true' if traversal should continue after this function 183 // returns, i.e. if no match is found or 'Bind' is 'BK_All'. 184 template <typename T> bool match(const T &Node) { 185 internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder); 186 187 if (Matcher->matches(DynTypedNode::create(Node), Finder, 188 &RecursiveBuilder)) { 189 ResultBindings.addMatch(RecursiveBuilder); 190 Matches = true; 191 if (Bind != internal::ASTMatchFinder::BK_All) 192 return false; // Abort as soon as a match is found. 193 } 194 return true; 195 } 196 197 const internal::DynTypedMatcher *const Matcher; 198 internal::ASTMatchFinder *const Finder; 199 internal::BoundNodesTreeBuilder *const Builder; 200 internal::BoundNodesTreeBuilder ResultBindings; 201 const internal::ASTMatchFinder::BindKind Bind; 202 bool Matches; 203 bool ignoreUnevaluatedContext; 204 }; 205 206 // Because we're dealing with raw pointers, let's define what we mean by that. 207 static auto hasPointerType() { 208 return hasType(hasCanonicalType(pointerType())); 209 } 210 211 static auto hasArrayType() { 212 return hasType(hasCanonicalType(arrayType())); 213 } 214 215 AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher<Stmt>, innerMatcher) { 216 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher); 217 218 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, true); 219 return Visitor.findMatch(DynTypedNode::create(Node)); 220 } 221 222 AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher<Stmt>, innerMatcher) { 223 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher); 224 225 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, false); 226 return Visitor.findMatch(DynTypedNode::create(Node)); 227 } 228 229 // Matches a `Stmt` node iff the node is in a safe-buffer opt-out region 230 AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *, 231 Handler) { 232 return !Handler->isSafeBufferOptOut(Node.getBeginLoc()); 233 } 234 235 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) { 236 return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder); 237 } 238 239 // Matches a `UnaryOperator` whose operator is pre-increment: 240 AST_MATCHER(UnaryOperator, isPreInc) { 241 return Node.getOpcode() == UnaryOperator::Opcode::UO_PreInc; 242 } 243 244 // Returns a matcher that matches any expression 'e' such that `innerMatcher` 245 // matches 'e' and 'e' is in an Unspecified Lvalue Context. 246 static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) { 247 // clang-format off 248 return 249 expr(anyOf( 250 implicitCastExpr( 251 hasCastKind(CastKind::CK_LValueToRValue), 252 castSubExpr(innerMatcher)), 253 binaryOperator( 254 hasAnyOperatorName("="), 255 hasLHS(innerMatcher) 256 ) 257 )); 258 // clang-format on 259 } 260 261 262 // Returns a matcher that matches any expression `e` such that `InnerMatcher` 263 // matches `e` and `e` is in an Unspecified Pointer Context (UPC). 264 static internal::Matcher<Stmt> 265 isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) { 266 // A UPC can be 267 // 1. an argument of a function call (except the callee has [[unsafe_...]] 268 // attribute), or 269 // 2. the operand of a pointer-to-(integer or bool) cast operation; or 270 // 3. the operand of a comparator operation; or 271 // 4. the operand of a pointer subtraction operation 272 // (i.e., computing the distance between two pointers); or ... 273 274 auto CallArgMatcher = 275 callExpr(forEachArgumentWithParam(InnerMatcher, 276 hasPointerType() /* array also decays to pointer type*/), 277 unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))); 278 279 auto CastOperandMatcher = 280 castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral), 281 hasCastKind(CastKind::CK_PointerToBoolean)), 282 castSubExpr(allOf(hasPointerType(), InnerMatcher))); 283 284 auto CompOperandMatcher = 285 binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="), 286 eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher)), 287 hasRHS(allOf(hasPointerType(), InnerMatcher)))); 288 289 // A matcher that matches pointer subtractions: 290 auto PtrSubtractionMatcher = 291 binaryOperator(hasOperatorName("-"), 292 // Note that here we need both LHS and RHS to be 293 // pointer. Then the inner matcher can match any of 294 // them: 295 allOf(hasLHS(hasPointerType()), 296 hasRHS(hasPointerType())), 297 eachOf(hasLHS(InnerMatcher), 298 hasRHS(InnerMatcher))); 299 300 return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher, 301 PtrSubtractionMatcher)); 302 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we 303 // don't have to check that.) 304 } 305 306 // Returns a matcher that matches any expression 'e' such that `innerMatcher` 307 // matches 'e' and 'e' is in an unspecified untyped context (i.e the expression 308 // 'e' isn't evaluated to an RValue). For example, consider the following code: 309 // int *p = new int[4]; 310 // int *q = new int[4]; 311 // if ((p = q)) {} 312 // p = q; 313 // The expression `p = q` in the conditional of the `if` statement 314 // `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;` 315 // in the assignment statement is in an untyped context. 316 static internal::Matcher<Stmt> 317 isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) { 318 // An unspecified context can be 319 // 1. A compound statement, 320 // 2. The body of an if statement 321 // 3. Body of a loop 322 auto CompStmt = compoundStmt(forEach(InnerMatcher)); 323 auto IfStmtThen = ifStmt(hasThen(InnerMatcher)); 324 auto IfStmtElse = ifStmt(hasElse(InnerMatcher)); 325 // FIXME: Handle loop bodies. 326 return stmt(anyOf(CompStmt, IfStmtThen, IfStmtElse)); 327 } 328 } // namespace clang::ast_matchers 329 330 namespace { 331 // Because the analysis revolves around variables and their types, we'll need to 332 // track uses of variables (aka DeclRefExprs). 333 using DeclUseList = SmallVector<const DeclRefExpr *, 1>; 334 335 // Convenience typedef. 336 using FixItList = SmallVector<FixItHint, 4>; 337 338 // Defined below. 339 class Strategy; 340 } // namespace 341 342 namespace { 343 /// Gadget is an individual operation in the code that may be of interest to 344 /// this analysis. Each (non-abstract) subclass corresponds to a specific 345 /// rigid AST structure that constitutes an operation on a pointer-type object. 346 /// Discovery of a gadget in the code corresponds to claiming that we understand 347 /// what this part of code is doing well enough to potentially improve it. 348 /// Gadgets can be warning (immediately deserving a warning) or fixable (not 349 /// always deserving a warning per se, but requires our attention to identify 350 /// it warrants a fixit). 351 class Gadget { 352 public: 353 enum class Kind { 354 #define GADGET(x) x, 355 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 356 }; 357 358 /// Common type of ASTMatchers used for discovering gadgets. 359 /// Useful for implementing the static matcher() methods 360 /// that are expected from all non-abstract subclasses. 361 using Matcher = decltype(stmt()); 362 363 Gadget(Kind K) : K(K) {} 364 365 Kind getKind() const { return K; } 366 367 #ifndef NDEBUG 368 StringRef getDebugName() const { 369 switch (K) { 370 #define GADGET(x) case Kind::x: return #x; 371 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 372 } 373 llvm_unreachable("Unhandled Gadget::Kind enum"); 374 } 375 #endif 376 377 virtual bool isWarningGadget() const = 0; 378 virtual const Stmt *getBaseStmt() const = 0; 379 380 /// Returns the list of pointer-type variables on which this gadget performs 381 /// its operation. Typically, there's only one variable. This isn't a list 382 /// of all DeclRefExprs in the gadget's AST! 383 virtual DeclUseList getClaimedVarUseSites() const = 0; 384 385 virtual ~Gadget() = default; 386 387 private: 388 Kind K; 389 }; 390 391 392 /// Warning gadgets correspond to unsafe code patterns that warrants 393 /// an immediate warning. 394 class WarningGadget : public Gadget { 395 public: 396 WarningGadget(Kind K) : Gadget(K) {} 397 398 static bool classof(const Gadget *G) { return G->isWarningGadget(); } 399 bool isWarningGadget() const final { return true; } 400 }; 401 402 /// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be 403 /// properly recognized in order to emit fixes. For example, if a raw pointer-type 404 /// variable is replaced by a safe C++ container, every use of such variable must be 405 /// carefully considered and possibly updated. 406 class FixableGadget : public Gadget { 407 public: 408 FixableGadget(Kind K) : Gadget(K) {} 409 410 static bool classof(const Gadget *G) { return !G->isWarningGadget(); } 411 bool isWarningGadget() const final { return false; } 412 413 /// Returns a fixit that would fix the current gadget according to 414 /// the current strategy. Returns std::nullopt if the fix cannot be produced; 415 /// returns an empty list if no fixes are necessary. 416 virtual std::optional<FixItList> getFixits(const Strategy &) const { 417 return std::nullopt; 418 } 419 420 /// Returns a list of two elements where the first element is the LHS of a pointer assignment 421 /// statement and the second element is the RHS. This two-element list represents the fact that 422 /// the LHS buffer gets its bounds information from the RHS buffer. This information will be used 423 /// later to group all those variables whose types must be modified together to prevent type 424 /// mismatches. 425 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 426 getStrategyImplications() const { 427 return std::nullopt; 428 } 429 }; 430 431 static auto toSupportedVariable() { 432 return to(varDecl()); 433 } 434 435 using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>; 436 using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>; 437 438 /// An increment of a pointer-type value is unsafe as it may run the pointer 439 /// out of bounds. 440 class IncrementGadget : public WarningGadget { 441 static constexpr const char *const OpTag = "op"; 442 const UnaryOperator *Op; 443 444 public: 445 IncrementGadget(const MatchFinder::MatchResult &Result) 446 : WarningGadget(Kind::Increment), 447 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} 448 449 static bool classof(const Gadget *G) { 450 return G->getKind() == Kind::Increment; 451 } 452 453 static Matcher matcher() { 454 return stmt(unaryOperator( 455 hasOperatorName("++"), 456 hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) 457 ).bind(OpTag)); 458 } 459 460 const UnaryOperator *getBaseStmt() const override { return Op; } 461 462 DeclUseList getClaimedVarUseSites() const override { 463 SmallVector<const DeclRefExpr *, 2> Uses; 464 if (const auto *DRE = 465 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { 466 Uses.push_back(DRE); 467 } 468 469 return std::move(Uses); 470 } 471 }; 472 473 /// A decrement of a pointer-type value is unsafe as it may run the pointer 474 /// out of bounds. 475 class DecrementGadget : public WarningGadget { 476 static constexpr const char *const OpTag = "op"; 477 const UnaryOperator *Op; 478 479 public: 480 DecrementGadget(const MatchFinder::MatchResult &Result) 481 : WarningGadget(Kind::Decrement), 482 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} 483 484 static bool classof(const Gadget *G) { 485 return G->getKind() == Kind::Decrement; 486 } 487 488 static Matcher matcher() { 489 return stmt(unaryOperator( 490 hasOperatorName("--"), 491 hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) 492 ).bind(OpTag)); 493 } 494 495 const UnaryOperator *getBaseStmt() const override { return Op; } 496 497 DeclUseList getClaimedVarUseSites() const override { 498 if (const auto *DRE = 499 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { 500 return {DRE}; 501 } 502 503 return {}; 504 } 505 }; 506 507 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as 508 /// it doesn't have any bounds checks for the array. 509 class ArraySubscriptGadget : public WarningGadget { 510 static constexpr const char *const ArraySubscrTag = "ArraySubscript"; 511 const ArraySubscriptExpr *ASE; 512 513 public: 514 ArraySubscriptGadget(const MatchFinder::MatchResult &Result) 515 : WarningGadget(Kind::ArraySubscript), 516 ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ArraySubscrTag)) {} 517 518 static bool classof(const Gadget *G) { 519 return G->getKind() == Kind::ArraySubscript; 520 } 521 522 static Matcher matcher() { 523 // FIXME: What if the index is integer literal 0? Should this be 524 // a safe gadget in this case? 525 // clang-format off 526 return stmt(arraySubscriptExpr( 527 hasBase(ignoringParenImpCasts( 528 anyOf(hasPointerType(), hasArrayType()))), 529 unless(hasIndex( 530 anyOf(integerLiteral(equals(0)), arrayInitIndexExpr()) 531 ))) 532 .bind(ArraySubscrTag)); 533 // clang-format on 534 } 535 536 const ArraySubscriptExpr *getBaseStmt() const override { return ASE; } 537 538 DeclUseList getClaimedVarUseSites() const override { 539 if (const auto *DRE = 540 dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) { 541 return {DRE}; 542 } 543 544 return {}; 545 } 546 }; 547 548 /// A pointer arithmetic expression of one of the forms: 549 /// \code 550 /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n 551 /// \endcode 552 class PointerArithmeticGadget : public WarningGadget { 553 static constexpr const char *const PointerArithmeticTag = "ptrAdd"; 554 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr"; 555 const BinaryOperator *PA; // pointer arithmetic expression 556 const Expr *Ptr; // the pointer expression in `PA` 557 558 public: 559 PointerArithmeticGadget(const MatchFinder::MatchResult &Result) 560 : WarningGadget(Kind::PointerArithmetic), 561 PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerArithmeticTag)), 562 Ptr(Result.Nodes.getNodeAs<Expr>(PointerArithmeticPointerTag)) {} 563 564 static bool classof(const Gadget *G) { 565 return G->getKind() == Kind::PointerArithmetic; 566 } 567 568 static Matcher matcher() { 569 auto HasIntegerType = anyOf(hasType(isInteger()), hasType(enumType())); 570 auto PtrAtRight = 571 allOf(hasOperatorName("+"), 572 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), 573 hasLHS(HasIntegerType)); 574 auto PtrAtLeft = 575 allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"), 576 hasOperatorName("+="), hasOperatorName("-=")), 577 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), 578 hasRHS(HasIntegerType)); 579 580 return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight)) 581 .bind(PointerArithmeticTag)); 582 } 583 584 const Stmt *getBaseStmt() const override { return PA; } 585 586 DeclUseList getClaimedVarUseSites() const override { 587 if (const auto *DRE = dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) { 588 return {DRE}; 589 } 590 591 return {}; 592 } 593 // FIXME: pointer adding zero should be fine 594 // FIXME: this gadge will need a fix-it 595 }; 596 597 /// A pointer initialization expression of the form: 598 /// \code 599 /// int *p = q; 600 /// \endcode 601 class PointerInitGadget : public FixableGadget { 602 private: 603 static constexpr const char *const PointerInitLHSTag = "ptrInitLHS"; 604 static constexpr const char *const PointerInitRHSTag = "ptrInitRHS"; 605 const VarDecl * PtrInitLHS; // the LHS pointer expression in `PI` 606 const DeclRefExpr * PtrInitRHS; // the RHS pointer expression in `PI` 607 608 public: 609 PointerInitGadget(const MatchFinder::MatchResult &Result) 610 : FixableGadget(Kind::PointerInit), 611 PtrInitLHS(Result.Nodes.getNodeAs<VarDecl>(PointerInitLHSTag)), 612 PtrInitRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerInitRHSTag)) {} 613 614 static bool classof(const Gadget *G) { 615 return G->getKind() == Kind::PointerInit; 616 } 617 618 static Matcher matcher() { 619 auto PtrInitStmt = declStmt(hasSingleDecl(varDecl( 620 hasInitializer(ignoringImpCasts(declRefExpr( 621 hasPointerType(), 622 toSupportedVariable()). 623 bind(PointerInitRHSTag)))). 624 bind(PointerInitLHSTag))); 625 626 return stmt(PtrInitStmt); 627 } 628 629 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 630 631 virtual const Stmt *getBaseStmt() const override { 632 // FIXME: This needs to be the entire DeclStmt, assuming that this method 633 // makes sense at all on a FixableGadget. 634 return PtrInitRHS; 635 } 636 637 virtual DeclUseList getClaimedVarUseSites() const override { 638 return DeclUseList{PtrInitRHS}; 639 } 640 641 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 642 getStrategyImplications() const override { 643 return std::make_pair(PtrInitLHS, 644 cast<VarDecl>(PtrInitRHS->getDecl())); 645 } 646 }; 647 648 /// A pointer assignment expression of the form: 649 /// \code 650 /// p = q; 651 /// \endcode 652 class PointerAssignmentGadget : public FixableGadget { 653 private: 654 static constexpr const char *const PointerAssignLHSTag = "ptrLHS"; 655 static constexpr const char *const PointerAssignRHSTag = "ptrRHS"; 656 const DeclRefExpr * PtrLHS; // the LHS pointer expression in `PA` 657 const DeclRefExpr * PtrRHS; // the RHS pointer expression in `PA` 658 659 public: 660 PointerAssignmentGadget(const MatchFinder::MatchResult &Result) 661 : FixableGadget(Kind::PointerAssignment), 662 PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)), 663 PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {} 664 665 static bool classof(const Gadget *G) { 666 return G->getKind() == Kind::PointerAssignment; 667 } 668 669 static Matcher matcher() { 670 auto PtrAssignExpr = binaryOperator(allOf(hasOperatorName("="), 671 hasRHS(ignoringParenImpCasts(declRefExpr(hasPointerType(), 672 toSupportedVariable()). 673 bind(PointerAssignRHSTag))), 674 hasLHS(declRefExpr(hasPointerType(), 675 toSupportedVariable()). 676 bind(PointerAssignLHSTag)))); 677 678 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr)); 679 } 680 681 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 682 683 virtual const Stmt *getBaseStmt() const override { 684 // FIXME: This should be the binary operator, assuming that this method 685 // makes sense at all on a FixableGadget. 686 return PtrLHS; 687 } 688 689 virtual DeclUseList getClaimedVarUseSites() const override { 690 return DeclUseList{PtrLHS, PtrRHS}; 691 } 692 693 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 694 getStrategyImplications() const override { 695 return std::make_pair(cast<VarDecl>(PtrLHS->getDecl()), 696 cast<VarDecl>(PtrRHS->getDecl())); 697 } 698 }; 699 700 /// A call of a function or method that performs unchecked buffer operations 701 /// over one of its pointer parameters. 702 class UnsafeBufferUsageAttrGadget : public WarningGadget { 703 constexpr static const char *const OpTag = "call_expr"; 704 const CallExpr *Op; 705 706 public: 707 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult &Result) 708 : WarningGadget(Kind::UnsafeBufferUsageAttr), 709 Op(Result.Nodes.getNodeAs<CallExpr>(OpTag)) {} 710 711 static bool classof(const Gadget *G) { 712 return G->getKind() == Kind::UnsafeBufferUsageAttr; 713 } 714 715 static Matcher matcher() { 716 return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))) 717 .bind(OpTag)); 718 } 719 const Stmt *getBaseStmt() const override { return Op; } 720 721 DeclUseList getClaimedVarUseSites() const override { return {}; } 722 }; 723 724 // Warning gadget for unsafe invocation of span::data method. 725 // Triggers when the pointer returned by the invocation is immediately 726 // cast to a larger type. 727 728 class DataInvocationGadget : public WarningGadget { 729 constexpr static const char *const OpTag = "data_invocation_expr"; 730 const ExplicitCastExpr *Op; 731 732 public: 733 DataInvocationGadget(const MatchFinder::MatchResult &Result) 734 : WarningGadget(Kind::DataInvocation), 735 Op(Result.Nodes.getNodeAs<ExplicitCastExpr>(OpTag)) {} 736 737 static bool classof(const Gadget *G) { 738 return G->getKind() == Kind::DataInvocation; 739 } 740 741 static Matcher matcher() { 742 return stmt( 743 explicitCastExpr(has(cxxMemberCallExpr(callee(cxxMethodDecl( 744 hasName("data"), ofClass(hasName("std::span"))))))) 745 .bind(OpTag)); 746 } 747 const Stmt *getBaseStmt() const override { return Op; } 748 749 DeclUseList getClaimedVarUseSites() const override { return {}; } 750 }; 751 752 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue 753 // Context (see `isInUnspecifiedLvalueContext`). 754 // Note here `[]` is the built-in subscript operator. 755 class ULCArraySubscriptGadget : public FixableGadget { 756 private: 757 static constexpr const char *const ULCArraySubscriptTag = 758 "ArraySubscriptUnderULC"; 759 const ArraySubscriptExpr *Node; 760 761 public: 762 ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result) 763 : FixableGadget(Kind::ULCArraySubscript), 764 Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) { 765 assert(Node != nullptr && "Expecting a non-null matching result"); 766 } 767 768 static bool classof(const Gadget *G) { 769 return G->getKind() == Kind::ULCArraySubscript; 770 } 771 772 static Matcher matcher() { 773 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); 774 auto BaseIsArrayOrPtrDRE = 775 hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr, 776 toSupportedVariable()))); 777 auto Target = 778 arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ULCArraySubscriptTag); 779 780 return expr(isInUnspecifiedLvalueContext(Target)); 781 } 782 783 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 784 785 virtual const Stmt *getBaseStmt() const override { return Node; } 786 787 virtual DeclUseList getClaimedVarUseSites() const override { 788 if (const auto *DRE = 789 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) { 790 return {DRE}; 791 } 792 return {}; 793 } 794 }; 795 796 // Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the 797 // unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits 798 // fixit of the form `UPC(DRE.data())`. 799 class UPCStandalonePointerGadget : public FixableGadget { 800 private: 801 static constexpr const char *const DeclRefExprTag = "StandalonePointer"; 802 const DeclRefExpr *Node; 803 804 public: 805 UPCStandalonePointerGadget(const MatchFinder::MatchResult &Result) 806 : FixableGadget(Kind::UPCStandalonePointer), 807 Node(Result.Nodes.getNodeAs<DeclRefExpr>(DeclRefExprTag)) { 808 assert(Node != nullptr && "Expecting a non-null matching result"); 809 } 810 811 static bool classof(const Gadget *G) { 812 return G->getKind() == Kind::UPCStandalonePointer; 813 } 814 815 static Matcher matcher() { 816 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); 817 auto target = expr( 818 ignoringParenImpCasts(declRefExpr(allOf(ArrayOrPtr, 819 toSupportedVariable())).bind(DeclRefExprTag))); 820 return stmt(isInUnspecifiedPointerContext(target)); 821 } 822 823 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 824 825 virtual const Stmt *getBaseStmt() const override { return Node; } 826 827 virtual DeclUseList getClaimedVarUseSites() const override { 828 return {Node}; 829 } 830 }; 831 832 class PointerDereferenceGadget : public FixableGadget { 833 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; 834 static constexpr const char *const OperatorTag = "op"; 835 836 const DeclRefExpr *BaseDeclRefExpr = nullptr; 837 const UnaryOperator *Op = nullptr; 838 839 public: 840 PointerDereferenceGadget(const MatchFinder::MatchResult &Result) 841 : FixableGadget(Kind::PointerDereference), 842 BaseDeclRefExpr( 843 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)), 844 Op(Result.Nodes.getNodeAs<UnaryOperator>(OperatorTag)) {} 845 846 static bool classof(const Gadget *G) { 847 return G->getKind() == Kind::PointerDereference; 848 } 849 850 static Matcher matcher() { 851 auto Target = 852 unaryOperator( 853 hasOperatorName("*"), 854 has(expr(ignoringParenImpCasts( 855 declRefExpr(toSupportedVariable()).bind(BaseDeclRefExprTag))))) 856 .bind(OperatorTag); 857 858 return expr(isInUnspecifiedLvalueContext(Target)); 859 } 860 861 DeclUseList getClaimedVarUseSites() const override { 862 return {BaseDeclRefExpr}; 863 } 864 865 virtual const Stmt *getBaseStmt() const final { return Op; } 866 867 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 868 }; 869 870 // Represents expressions of the form `&DRE[any]` in the Unspecified Pointer 871 // Context (see `isInUnspecifiedPointerContext`). 872 // Note here `[]` is the built-in subscript operator. 873 class UPCAddressofArraySubscriptGadget : public FixableGadget { 874 private: 875 static constexpr const char *const UPCAddressofArraySubscriptTag = 876 "AddressofArraySubscriptUnderUPC"; 877 const UnaryOperator *Node; // the `&DRE[any]` node 878 879 public: 880 UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result) 881 : FixableGadget(Kind::ULCArraySubscript), 882 Node(Result.Nodes.getNodeAs<UnaryOperator>( 883 UPCAddressofArraySubscriptTag)) { 884 assert(Node != nullptr && "Expecting a non-null matching result"); 885 } 886 887 static bool classof(const Gadget *G) { 888 return G->getKind() == Kind::UPCAddressofArraySubscript; 889 } 890 891 static Matcher matcher() { 892 return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts( 893 unaryOperator(hasOperatorName("&"), 894 hasUnaryOperand(arraySubscriptExpr( 895 hasBase(ignoringParenImpCasts(declRefExpr( 896 toSupportedVariable())))))) 897 .bind(UPCAddressofArraySubscriptTag))))); 898 } 899 900 virtual std::optional<FixItList> getFixits(const Strategy &) const override; 901 902 virtual const Stmt *getBaseStmt() const override { return Node; } 903 904 virtual DeclUseList getClaimedVarUseSites() const override { 905 const auto *ArraySubst = cast<ArraySubscriptExpr>(Node->getSubExpr()); 906 const auto *DRE = 907 cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreImpCasts()); 908 return {DRE}; 909 } 910 }; 911 } // namespace 912 913 namespace { 914 // An auxiliary tracking facility for the fixit analysis. It helps connect 915 // declarations to its uses and make sure we've covered all uses with our 916 // analysis before we try to fix the declaration. 917 class DeclUseTracker { 918 using UseSetTy = SmallSet<const DeclRefExpr *, 16>; 919 using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>; 920 921 // Allocate on the heap for easier move. 922 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()}; 923 DefMapTy Defs{}; 924 925 public: 926 DeclUseTracker() = default; 927 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies. 928 DeclUseTracker &operator=(const DeclUseTracker &) = delete; 929 DeclUseTracker(DeclUseTracker &&) = default; 930 DeclUseTracker &operator=(DeclUseTracker &&) = default; 931 932 // Start tracking a freshly discovered DRE. 933 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); } 934 935 // Stop tracking the DRE as it's been fully figured out. 936 void claimUse(const DeclRefExpr *DRE) { 937 assert(Uses->count(DRE) && 938 "DRE not found or claimed by multiple matchers!"); 939 Uses->erase(DRE); 940 } 941 942 // A variable is unclaimed if at least one use is unclaimed. 943 bool hasUnclaimedUses(const VarDecl *VD) const { 944 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs? 945 return any_of(*Uses, [VD](const DeclRefExpr *DRE) { 946 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl(); 947 }); 948 } 949 950 UseSetTy getUnclaimedUses(const VarDecl *VD) const { 951 UseSetTy ReturnSet; 952 for (auto use : *Uses) { 953 if (use->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl()) { 954 ReturnSet.insert(use); 955 } 956 } 957 return ReturnSet; 958 } 959 960 void discoverDecl(const DeclStmt *DS) { 961 for (const Decl *D : DS->decls()) { 962 if (const auto *VD = dyn_cast<VarDecl>(D)) { 963 // FIXME: Assertion temporarily disabled due to a bug in 964 // ASTMatcher internal behavior in presence of GNU 965 // statement-expressions. We need to properly investigate this 966 // because it can screw up our algorithm in other ways. 967 // assert(Defs.count(VD) == 0 && "Definition already discovered!"); 968 Defs[VD] = DS; 969 } 970 } 971 } 972 973 const DeclStmt *lookupDecl(const VarDecl *VD) const { 974 return Defs.lookup(VD); 975 } 976 }; 977 } // namespace 978 979 namespace { 980 // Strategy is a map from variables to the way we plan to emit fixes for 981 // these variables. It is figured out gradually by trying different fixes 982 // for different variables depending on gadgets in which these variables 983 // participate. 984 class Strategy { 985 public: 986 enum class Kind { 987 Wontfix, // We don't plan to emit a fixit for this variable. 988 Span, // We recommend replacing the variable with std::span. 989 Iterator, // We recommend replacing the variable with std::span::iterator. 990 Array, // We recommend replacing the variable with std::array. 991 Vector // We recommend replacing the variable with std::vector. 992 }; 993 994 private: 995 using MapTy = llvm::DenseMap<const VarDecl *, Kind>; 996 997 MapTy Map; 998 999 public: 1000 Strategy() = default; 1001 Strategy(const Strategy &) = delete; // Let's avoid copies. 1002 Strategy &operator=(const Strategy &) = delete; 1003 Strategy(Strategy &&) = default; 1004 Strategy &operator=(Strategy &&) = default; 1005 1006 void set(const VarDecl *VD, Kind K) { Map[VD] = K; } 1007 1008 Kind lookup(const VarDecl *VD) const { 1009 auto I = Map.find(VD); 1010 if (I == Map.end()) 1011 return Kind::Wontfix; 1012 1013 return I->second; 1014 } 1015 }; 1016 } // namespace 1017 1018 1019 // Representing a pointer type expression of the form `++Ptr` in an Unspecified 1020 // Pointer Context (UPC): 1021 class UPCPreIncrementGadget : public FixableGadget { 1022 private: 1023 static constexpr const char *const UPCPreIncrementTag = 1024 "PointerPreIncrementUnderUPC"; 1025 const UnaryOperator *Node; // the `++Ptr` node 1026 1027 public: 1028 UPCPreIncrementGadget(const MatchFinder::MatchResult &Result) 1029 : FixableGadget(Kind::UPCPreIncrement), 1030 Node(Result.Nodes.getNodeAs<UnaryOperator>(UPCPreIncrementTag)) { 1031 assert(Node != nullptr && "Expecting a non-null matching result"); 1032 } 1033 1034 static bool classof(const Gadget *G) { 1035 return G->getKind() == Kind::UPCPreIncrement; 1036 } 1037 1038 static Matcher matcher() { 1039 // Note here we match `++Ptr` for any expression `Ptr` of pointer type. 1040 // Although currently we can only provide fix-its when `Ptr` is a DRE, we 1041 // can have the matcher be general, so long as `getClaimedVarUseSites` does 1042 // things right. 1043 return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts( 1044 unaryOperator(isPreInc(), 1045 hasUnaryOperand(declRefExpr( 1046 toSupportedVariable())) 1047 ).bind(UPCPreIncrementTag))))); 1048 } 1049 1050 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 1051 1052 virtual const Stmt *getBaseStmt() const override { return Node; } 1053 1054 virtual DeclUseList getClaimedVarUseSites() const override { 1055 return {dyn_cast<DeclRefExpr>(Node->getSubExpr())}; 1056 } 1057 }; 1058 1059 // Representing a pointer type expression of the form `Ptr += n` in an 1060 // Unspecified Untyped Context (UUC): 1061 class UUCAddAssignGadget : public FixableGadget { 1062 private: 1063 static constexpr const char *const UUCAddAssignTag = 1064 "PointerAddAssignUnderUUC"; 1065 static constexpr const char *const OffsetTag = "Offset"; 1066 1067 const BinaryOperator *Node; // the `Ptr += n` node 1068 const Expr *Offset = nullptr; 1069 1070 public: 1071 UUCAddAssignGadget(const MatchFinder::MatchResult &Result) 1072 : FixableGadget(Kind::UUCAddAssign), 1073 Node(Result.Nodes.getNodeAs<BinaryOperator>(UUCAddAssignTag)), 1074 Offset(Result.Nodes.getNodeAs<Expr>(OffsetTag)) { 1075 assert(Node != nullptr && "Expecting a non-null matching result"); 1076 } 1077 1078 static bool classof(const Gadget *G) { 1079 return G->getKind() == Kind::UUCAddAssign; 1080 } 1081 1082 static Matcher matcher() { 1083 return stmt(isInUnspecifiedUntypedContext(expr(ignoringImpCasts( 1084 binaryOperator(hasOperatorName("+="), 1085 hasLHS(declRefExpr(toSupportedVariable())), 1086 hasRHS(expr().bind(OffsetTag))) 1087 .bind(UUCAddAssignTag))))); 1088 } 1089 1090 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 1091 1092 virtual const Stmt *getBaseStmt() const override { return Node; } 1093 1094 virtual DeclUseList getClaimedVarUseSites() const override { 1095 return {dyn_cast<DeclRefExpr>(Node->getLHS())}; 1096 } 1097 }; 1098 1099 // Representing a fixable expression of the form `*(ptr + 123)` or `*(123 + 1100 // ptr)`: 1101 class DerefSimplePtrArithFixableGadget : public FixableGadget { 1102 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; 1103 static constexpr const char *const DerefOpTag = "DerefOp"; 1104 static constexpr const char *const AddOpTag = "AddOp"; 1105 static constexpr const char *const OffsetTag = "Offset"; 1106 1107 const DeclRefExpr *BaseDeclRefExpr = nullptr; 1108 const UnaryOperator *DerefOp = nullptr; 1109 const BinaryOperator *AddOp = nullptr; 1110 const IntegerLiteral *Offset = nullptr; 1111 1112 public: 1113 DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult &Result) 1114 : FixableGadget(Kind::DerefSimplePtrArithFixable), 1115 BaseDeclRefExpr( 1116 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)), 1117 DerefOp(Result.Nodes.getNodeAs<UnaryOperator>(DerefOpTag)), 1118 AddOp(Result.Nodes.getNodeAs<BinaryOperator>(AddOpTag)), 1119 Offset(Result.Nodes.getNodeAs<IntegerLiteral>(OffsetTag)) {} 1120 1121 static Matcher matcher() { 1122 // clang-format off 1123 auto ThePtr = expr(hasPointerType(), 1124 ignoringImpCasts(declRefExpr(toSupportedVariable()). 1125 bind(BaseDeclRefExprTag))); 1126 auto PlusOverPtrAndInteger = expr(anyOf( 1127 binaryOperator(hasOperatorName("+"), hasLHS(ThePtr), 1128 hasRHS(integerLiteral().bind(OffsetTag))) 1129 .bind(AddOpTag), 1130 binaryOperator(hasOperatorName("+"), hasRHS(ThePtr), 1131 hasLHS(integerLiteral().bind(OffsetTag))) 1132 .bind(AddOpTag))); 1133 return isInUnspecifiedLvalueContext(unaryOperator( 1134 hasOperatorName("*"), 1135 hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger))) 1136 .bind(DerefOpTag)); 1137 // clang-format on 1138 } 1139 1140 virtual std::optional<FixItList> getFixits(const Strategy &s) const final; 1141 1142 // TODO remove this method from FixableGadget interface 1143 virtual const Stmt *getBaseStmt() const final { return nullptr; } 1144 1145 virtual DeclUseList getClaimedVarUseSites() const final { 1146 return {BaseDeclRefExpr}; 1147 } 1148 }; 1149 1150 /// Scan the function and return a list of gadgets found with provided kits. 1151 static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker> 1152 findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, 1153 bool EmitSuggestions) { 1154 1155 struct GadgetFinderCallback : MatchFinder::MatchCallback { 1156 FixableGadgetList FixableGadgets; 1157 WarningGadgetList WarningGadgets; 1158 DeclUseTracker Tracker; 1159 1160 void run(const MatchFinder::MatchResult &Result) override { 1161 // In debug mode, assert that we've found exactly one gadget. 1162 // This helps us avoid conflicts in .bind() tags. 1163 #if NDEBUG 1164 #define NEXT return 1165 #else 1166 [[maybe_unused]] int numFound = 0; 1167 #define NEXT ++numFound 1168 #endif 1169 1170 if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) { 1171 Tracker.discoverUse(DRE); 1172 NEXT; 1173 } 1174 1175 if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) { 1176 Tracker.discoverDecl(DS); 1177 NEXT; 1178 } 1179 1180 // Figure out which matcher we've found, and call the appropriate 1181 // subclass constructor. 1182 // FIXME: Can we do this more logarithmically? 1183 #define FIXABLE_GADGET(name) \ 1184 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ 1185 FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \ 1186 NEXT; \ 1187 } 1188 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1189 #define WARNING_GADGET(name) \ 1190 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ 1191 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \ 1192 NEXT; \ 1193 } 1194 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1195 1196 assert(numFound >= 1 && "Gadgets not found in match result!"); 1197 assert(numFound <= 1 && "Conflicting bind tags in gadgets!"); 1198 } 1199 }; 1200 1201 MatchFinder M; 1202 GadgetFinderCallback CB; 1203 1204 // clang-format off 1205 M.addMatcher( 1206 stmt( 1207 forEachDescendantEvaluatedStmt(stmt(anyOf( 1208 // Add Gadget::matcher() for every gadget in the registry. 1209 #define WARNING_GADGET(x) \ 1210 allOf(x ## Gadget::matcher().bind(#x), \ 1211 notInSafeBufferOptOut(&Handler)), 1212 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1213 // Avoid a hanging comma. 1214 unless(stmt()) 1215 ))) 1216 ), 1217 &CB 1218 ); 1219 // clang-format on 1220 1221 if (EmitSuggestions) { 1222 // clang-format off 1223 M.addMatcher( 1224 stmt( 1225 forEachDescendantStmt(stmt(eachOf( 1226 #define FIXABLE_GADGET(x) \ 1227 x ## Gadget::matcher().bind(#x), 1228 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1229 // In parallel, match all DeclRefExprs so that to find out 1230 // whether there are any uncovered by gadgets. 1231 declRefExpr(anyOf(hasPointerType(), hasArrayType()), 1232 to(anyOf(varDecl(), bindingDecl()))).bind("any_dre"), 1233 // Also match DeclStmts because we'll need them when fixing 1234 // their underlying VarDecls that otherwise don't have 1235 // any backreferences to DeclStmts. 1236 declStmt().bind("any_ds") 1237 ))) 1238 ), 1239 &CB 1240 ); 1241 // clang-format on 1242 } 1243 1244 M.match(*D->getBody(), D->getASTContext()); 1245 return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets), 1246 std::move(CB.Tracker)}; 1247 } 1248 1249 // Compares AST nodes by source locations. 1250 template <typename NodeTy> struct CompareNode { 1251 bool operator()(const NodeTy *N1, const NodeTy *N2) const { 1252 return N1->getBeginLoc().getRawEncoding() < 1253 N2->getBeginLoc().getRawEncoding(); 1254 } 1255 }; 1256 1257 struct WarningGadgetSets { 1258 std::map<const VarDecl *, std::set<const WarningGadget *>, 1259 // To keep keys sorted by their locations in the map so that the 1260 // order is deterministic: 1261 CompareNode<VarDecl>> 1262 byVar; 1263 // These Gadgets are not related to pointer variables (e. g. temporaries). 1264 llvm::SmallVector<const WarningGadget *, 16> noVar; 1265 }; 1266 1267 static WarningGadgetSets 1268 groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) { 1269 WarningGadgetSets result; 1270 // If some gadgets cover more than one 1271 // variable, they'll appear more than once in the map. 1272 for (auto &G : AllUnsafeOperations) { 1273 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites(); 1274 1275 bool AssociatedWithVarDecl = false; 1276 for (const DeclRefExpr *DRE : ClaimedVarUseSites) { 1277 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 1278 result.byVar[VD].insert(G.get()); 1279 AssociatedWithVarDecl = true; 1280 } 1281 } 1282 1283 if (!AssociatedWithVarDecl) { 1284 result.noVar.push_back(G.get()); 1285 continue; 1286 } 1287 } 1288 return result; 1289 } 1290 1291 struct FixableGadgetSets { 1292 std::map<const VarDecl *, std::set<const FixableGadget *>, 1293 // To keep keys sorted by their locations in the map so that the 1294 // order is deterministic: 1295 CompareNode<VarDecl>> 1296 byVar; 1297 }; 1298 1299 static FixableGadgetSets 1300 groupFixablesByVar(FixableGadgetList &&AllFixableOperations) { 1301 FixableGadgetSets FixablesForUnsafeVars; 1302 for (auto &F : AllFixableOperations) { 1303 DeclUseList DREs = F->getClaimedVarUseSites(); 1304 1305 for (const DeclRefExpr *DRE : DREs) { 1306 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 1307 FixablesForUnsafeVars.byVar[VD].insert(F.get()); 1308 } 1309 } 1310 } 1311 return FixablesForUnsafeVars; 1312 } 1313 1314 bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts, 1315 const SourceManager &SM) { 1316 // A simple interval overlap detection algorithm. Sorts all ranges by their 1317 // begin location then finds the first overlap in one pass. 1318 std::vector<const FixItHint *> All; // a copy of `FixIts` 1319 1320 for (const FixItHint &H : FixIts) 1321 All.push_back(&H); 1322 std::sort(All.begin(), All.end(), 1323 [&SM](const FixItHint *H1, const FixItHint *H2) { 1324 return SM.isBeforeInTranslationUnit(H1->RemoveRange.getBegin(), 1325 H2->RemoveRange.getBegin()); 1326 }); 1327 1328 const FixItHint *CurrHint = nullptr; 1329 1330 for (const FixItHint *Hint : All) { 1331 if (!CurrHint || 1332 SM.isBeforeInTranslationUnit(CurrHint->RemoveRange.getEnd(), 1333 Hint->RemoveRange.getBegin())) { 1334 // Either to initialize `CurrHint` or `CurrHint` does not 1335 // overlap with `Hint`: 1336 CurrHint = Hint; 1337 } else 1338 // In case `Hint` overlaps the `CurrHint`, we found at least one 1339 // conflict: 1340 return true; 1341 } 1342 return false; 1343 } 1344 1345 std::optional<FixItList> 1346 PointerAssignmentGadget::getFixits(const Strategy &S) const { 1347 const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl()); 1348 const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl()); 1349 switch (S.lookup(LeftVD)) { 1350 case Strategy::Kind::Span: 1351 if (S.lookup(RightVD) == Strategy::Kind::Span) 1352 return FixItList{}; 1353 return std::nullopt; 1354 case Strategy::Kind::Wontfix: 1355 return std::nullopt; 1356 case Strategy::Kind::Iterator: 1357 case Strategy::Kind::Array: 1358 case Strategy::Kind::Vector: 1359 llvm_unreachable("unsupported strategies for FixableGadgets"); 1360 } 1361 return std::nullopt; 1362 } 1363 1364 std::optional<FixItList> 1365 PointerInitGadget::getFixits(const Strategy &S) const { 1366 const auto *LeftVD = PtrInitLHS; 1367 const auto *RightVD = cast<VarDecl>(PtrInitRHS->getDecl()); 1368 switch (S.lookup(LeftVD)) { 1369 case Strategy::Kind::Span: 1370 if (S.lookup(RightVD) == Strategy::Kind::Span) 1371 return FixItList{}; 1372 return std::nullopt; 1373 case Strategy::Kind::Wontfix: 1374 return std::nullopt; 1375 case Strategy::Kind::Iterator: 1376 case Strategy::Kind::Array: 1377 case Strategy::Kind::Vector: 1378 llvm_unreachable("unsupported strategies for FixableGadgets"); 1379 } 1380 return std::nullopt; 1381 } 1382 1383 static bool isNonNegativeIntegerExpr(const Expr *Expr, const VarDecl *VD, 1384 const ASTContext &Ctx) { 1385 if (auto ConstVal = Expr->getIntegerConstantExpr(Ctx)) { 1386 if (ConstVal->isNegative()) 1387 return false; 1388 } else if (!Expr->getType()->isUnsignedIntegerType()) 1389 return false; 1390 return true; 1391 } 1392 1393 std::optional<FixItList> 1394 ULCArraySubscriptGadget::getFixits(const Strategy &S) const { 1395 if (const auto *DRE = 1396 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) 1397 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 1398 switch (S.lookup(VD)) { 1399 case Strategy::Kind::Span: { 1400 1401 // If the index has a negative constant value, we give up as no valid 1402 // fix-it can be generated: 1403 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in! 1404 VD->getASTContext(); 1405 if (!isNonNegativeIntegerExpr(Node->getIdx(), VD, Ctx)) 1406 return std::nullopt; 1407 // no-op is a good fix-it, otherwise 1408 return FixItList{}; 1409 } 1410 case Strategy::Kind::Wontfix: 1411 case Strategy::Kind::Iterator: 1412 case Strategy::Kind::Array: 1413 case Strategy::Kind::Vector: 1414 llvm_unreachable("unsupported strategies for FixableGadgets"); 1415 } 1416 } 1417 return std::nullopt; 1418 } 1419 1420 static std::optional<FixItList> // forward declaration 1421 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node); 1422 1423 std::optional<FixItList> 1424 UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const { 1425 auto DREs = getClaimedVarUseSites(); 1426 const auto *VD = cast<VarDecl>(DREs.front()->getDecl()); 1427 1428 switch (S.lookup(VD)) { 1429 case Strategy::Kind::Span: 1430 return fixUPCAddressofArraySubscriptWithSpan(Node); 1431 case Strategy::Kind::Wontfix: 1432 case Strategy::Kind::Iterator: 1433 case Strategy::Kind::Array: 1434 case Strategy::Kind::Vector: 1435 llvm_unreachable("unsupported strategies for FixableGadgets"); 1436 } 1437 return std::nullopt; // something went wrong, no fix-it 1438 } 1439 1440 // FIXME: this function should be customizable through format 1441 static StringRef getEndOfLine() { 1442 static const char *const EOL = "\n"; 1443 return EOL; 1444 } 1445 1446 // Returns the text indicating that the user needs to provide input there: 1447 std::string getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") { 1448 std::string s = std::string("<# "); 1449 s += HintTextToUser; 1450 s += " #>"; 1451 return s; 1452 } 1453 1454 // Return the text representation of the given `APInt Val`: 1455 static std::string getAPIntText(APInt Val) { 1456 SmallVector<char> Txt; 1457 Val.toString(Txt, 10, true); 1458 // APInt::toString does not add '\0' to the end of the string for us: 1459 Txt.push_back('\0'); 1460 return Txt.data(); 1461 } 1462 1463 // Return the source location of the last character of the AST `Node`. 1464 template <typename NodeTy> 1465 static std::optional<SourceLocation> 1466 getEndCharLoc(const NodeTy *Node, const SourceManager &SM, 1467 const LangOptions &LangOpts) { 1468 unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts); 1469 SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1); 1470 1471 if (Loc.isValid()) 1472 return Loc; 1473 1474 return std::nullopt; 1475 } 1476 1477 // Return the source location just past the last character of the AST `Node`. 1478 template <typename NodeTy> 1479 static std::optional<SourceLocation> getPastLoc(const NodeTy *Node, 1480 const SourceManager &SM, 1481 const LangOptions &LangOpts) { 1482 SourceLocation Loc = 1483 Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts); 1484 if (Loc.isValid()) 1485 return Loc; 1486 return std::nullopt; 1487 } 1488 1489 // Return text representation of an `Expr`. 1490 static std::optional<StringRef> getExprText(const Expr *E, 1491 const SourceManager &SM, 1492 const LangOptions &LangOpts) { 1493 std::optional<SourceLocation> LastCharLoc = getPastLoc(E, SM, LangOpts); 1494 1495 if (LastCharLoc) 1496 return Lexer::getSourceText( 1497 CharSourceRange::getCharRange(E->getBeginLoc(), *LastCharLoc), SM, 1498 LangOpts); 1499 1500 return std::nullopt; 1501 } 1502 1503 // Returns the literal text in `SourceRange SR`, if `SR` is a valid range. 1504 static std::optional<StringRef> getRangeText(SourceRange SR, 1505 const SourceManager &SM, 1506 const LangOptions &LangOpts) { 1507 bool Invalid = false; 1508 CharSourceRange CSR = CharSourceRange::getCharRange(SR); 1509 StringRef Text = Lexer::getSourceText(CSR, SM, LangOpts, &Invalid); 1510 1511 if (!Invalid) 1512 return Text; 1513 return std::nullopt; 1514 } 1515 1516 // Returns the begin location of the identifier of the given variable 1517 // declaration. 1518 static SourceLocation getVarDeclIdentifierLoc(const VarDecl *VD) { 1519 // According to the implementation of `VarDecl`, `VD->getLocation()` actually 1520 // returns the begin location of the identifier of the declaration: 1521 return VD->getLocation(); 1522 } 1523 1524 // Returns the literal text of the identifier of the given variable declaration. 1525 static std::optional<StringRef> 1526 getVarDeclIdentifierText(const VarDecl *VD, const SourceManager &SM, 1527 const LangOptions &LangOpts) { 1528 SourceLocation ParmIdentBeginLoc = getVarDeclIdentifierLoc(VD); 1529 SourceLocation ParmIdentEndLoc = 1530 Lexer::getLocForEndOfToken(ParmIdentBeginLoc, 0, SM, LangOpts); 1531 1532 if (ParmIdentEndLoc.isMacroID() && 1533 !Lexer::isAtEndOfMacroExpansion(ParmIdentEndLoc, SM, LangOpts)) 1534 return std::nullopt; 1535 return getRangeText({ParmIdentBeginLoc, ParmIdentEndLoc}, SM, LangOpts); 1536 } 1537 1538 // We cannot fix a variable declaration if it has some other specifiers than the 1539 // type specifier. Because the source ranges of those specifiers could overlap 1540 // with the source range that is being replaced using fix-its. Especially when 1541 // we often cannot obtain accurate source ranges of cv-qualified type 1542 // specifiers. 1543 // FIXME: also deal with type attributes 1544 static bool hasUnsupportedSpecifiers(const VarDecl *VD, 1545 const SourceManager &SM) { 1546 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the 1547 // source range of `VD`: 1548 bool AttrRangeOverlapping = llvm::any_of(VD->attrs(), [&](Attr *At) -> bool { 1549 return !(SM.isBeforeInTranslationUnit(At->getRange().getEnd(), 1550 VD->getBeginLoc())) && 1551 !(SM.isBeforeInTranslationUnit(VD->getEndLoc(), 1552 At->getRange().getBegin())); 1553 }); 1554 return VD->isInlineSpecified() || VD->isConstexpr() || 1555 VD->hasConstantInitialization() || !VD->hasLocalStorage() || 1556 AttrRangeOverlapping; 1557 } 1558 1559 // Returns the `SourceRange` of `D`. The reason why this function exists is 1560 // that `D->getSourceRange()` may return a range where the end location is the 1561 // starting location of the last token. The end location of the source range 1562 // returned by this function is the last location of the last token. 1563 static SourceRange getSourceRangeToTokenEnd(const Decl *D, 1564 const SourceManager &SM, 1565 const LangOptions &LangOpts) { 1566 SourceLocation Begin = D->getBeginLoc(); 1567 SourceLocation 1568 End = // `D->getEndLoc` should always return the starting location of the 1569 // last token, so we should get the end of the token 1570 Lexer::getLocForEndOfToken(D->getEndLoc(), 0, SM, LangOpts); 1571 1572 return SourceRange(Begin, End); 1573 } 1574 1575 // Returns the text of the pointee type of `T` from a `VarDecl` of a pointer 1576 // type. The text is obtained through from `TypeLoc`s. Since `TypeLoc` does not 1577 // have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me 1578 // :( ), `Qualifiers` of the pointee type is returned separately through the 1579 // output parameter `QualifiersToAppend`. 1580 static std::optional<std::string> 1581 getPointeeTypeText(const VarDecl *VD, const SourceManager &SM, 1582 const LangOptions &LangOpts, 1583 std::optional<Qualifiers> *QualifiersToAppend) { 1584 QualType Ty = VD->getType(); 1585 QualType PteTy; 1586 1587 assert(Ty->isPointerType() && !Ty->isFunctionPointerType() && 1588 "Expecting a VarDecl of type of pointer to object type"); 1589 PteTy = Ty->getPointeeType(); 1590 1591 TypeLoc TyLoc = VD->getTypeSourceInfo()->getTypeLoc().getUnqualifiedLoc(); 1592 TypeLoc PteTyLoc; 1593 1594 // We only deal with the cases that we know `TypeLoc::getNextTypeLoc` returns 1595 // the `TypeLoc` of the pointee type: 1596 switch (TyLoc.getTypeLocClass()) { 1597 case TypeLoc::ConstantArray: 1598 case TypeLoc::IncompleteArray: 1599 case TypeLoc::VariableArray: 1600 case TypeLoc::DependentSizedArray: 1601 case TypeLoc::Decayed: 1602 assert(isa<ParmVarDecl>(VD) && "An array type shall not be treated as a " 1603 "pointer type unless it decays."); 1604 PteTyLoc = TyLoc.getNextTypeLoc(); 1605 break; 1606 case TypeLoc::Pointer: 1607 PteTyLoc = TyLoc.castAs<PointerTypeLoc>().getPointeeLoc(); 1608 break; 1609 default: 1610 return std::nullopt; 1611 } 1612 if (PteTyLoc.isNull()) 1613 // Sometimes we cannot get a useful `TypeLoc` for the pointee type, e.g., 1614 // when the pointer type is `auto`. 1615 return std::nullopt; 1616 1617 SourceLocation IdentLoc = getVarDeclIdentifierLoc(VD); 1618 1619 if (!(IdentLoc.isValid() && PteTyLoc.getSourceRange().isValid())) { 1620 // We are expecting these locations to be valid. But in some cases, they are 1621 // not all valid. It is a Clang bug to me and we are not responsible for 1622 // fixing it. So we will just give up for now when it happens. 1623 return std::nullopt; 1624 } 1625 1626 // Note that TypeLoc.getEndLoc() returns the begin location of the last token: 1627 SourceLocation PteEndOfTokenLoc = 1628 Lexer::getLocForEndOfToken(PteTyLoc.getEndLoc(), 0, SM, LangOpts); 1629 1630 if (!PteEndOfTokenLoc.isValid()) 1631 // Sometimes we cannot get the end location of the pointee type, e.g., when 1632 // there are macros involved. 1633 return std::nullopt; 1634 if (!SM.isBeforeInTranslationUnit(PteEndOfTokenLoc, IdentLoc)) { 1635 // We only deal with the cases where the source text of the pointee type 1636 // appears on the left-hand side of the variable identifier completely, 1637 // including the following forms: 1638 // `T ident`, 1639 // `T ident[]`, where `T` is any type. 1640 // Examples of excluded cases are `T (*ident)[]` or `T ident[][n]`. 1641 return std::nullopt; 1642 } 1643 if (PteTy.hasQualifiers()) { 1644 // TypeLoc does not provide source ranges for qualifiers (it says it's 1645 // intentional but seems fishy to me), so we cannot get the full text 1646 // `PteTy` via source ranges. 1647 *QualifiersToAppend = PteTy.getQualifiers(); 1648 } 1649 return getRangeText({PteTyLoc.getBeginLoc(), PteEndOfTokenLoc}, SM, LangOpts) 1650 ->str(); 1651 } 1652 1653 // Returns the text of the name (with qualifiers) of a `FunctionDecl`. 1654 static std::optional<StringRef> getFunNameText(const FunctionDecl *FD, 1655 const SourceManager &SM, 1656 const LangOptions &LangOpts) { 1657 SourceLocation BeginLoc = FD->getQualifier() 1658 ? FD->getQualifierLoc().getBeginLoc() 1659 : FD->getNameInfo().getBeginLoc(); 1660 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the 1661 // last token: 1662 SourceLocation EndLoc = Lexer::getLocForEndOfToken( 1663 FD->getNameInfo().getEndLoc(), 0, SM, LangOpts); 1664 SourceRange NameRange{BeginLoc, EndLoc}; 1665 1666 return getRangeText(NameRange, SM, LangOpts); 1667 } 1668 1669 // Returns the text representing a `std::span` type where the element type is 1670 // represented by `EltTyText`. 1671 // 1672 // Note the optional parameter `Qualifiers`: one needs to pass qualifiers 1673 // explicitly if the element type needs to be qualified. 1674 static std::string 1675 getSpanTypeText(StringRef EltTyText, 1676 std::optional<Qualifiers> Quals = std::nullopt) { 1677 const char *const SpanOpen = "std::span<"; 1678 1679 if (Quals) 1680 return SpanOpen + EltTyText.str() + ' ' + Quals->getAsString() + '>'; 1681 return SpanOpen + EltTyText.str() + '>'; 1682 } 1683 1684 std::optional<FixItList> 1685 DerefSimplePtrArithFixableGadget::getFixits(const Strategy &s) const { 1686 const VarDecl *VD = dyn_cast<VarDecl>(BaseDeclRefExpr->getDecl()); 1687 1688 if (VD && s.lookup(VD) == Strategy::Kind::Span) { 1689 ASTContext &Ctx = VD->getASTContext(); 1690 // std::span can't represent elements before its begin() 1691 if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx)) 1692 if (ConstVal->isNegative()) 1693 return std::nullopt; 1694 1695 // note that the expr may (oddly) has multiple layers of parens 1696 // example: 1697 // *((..(pointer + 123)..)) 1698 // goal: 1699 // pointer[123] 1700 // Fix-It: 1701 // remove '*(' 1702 // replace ' + ' with '[' 1703 // replace ')' with ']' 1704 1705 // example: 1706 // *((..(123 + pointer)..)) 1707 // goal: 1708 // 123[pointer] 1709 // Fix-It: 1710 // remove '*(' 1711 // replace ' + ' with '[' 1712 // replace ')' with ']' 1713 1714 const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS(); 1715 const SourceManager &SM = Ctx.getSourceManager(); 1716 const LangOptions &LangOpts = Ctx.getLangOpts(); 1717 CharSourceRange StarWithTrailWhitespace = 1718 clang::CharSourceRange::getCharRange(DerefOp->getOperatorLoc(), 1719 LHS->getBeginLoc()); 1720 1721 std::optional<SourceLocation> LHSLocation = getPastLoc(LHS, SM, LangOpts); 1722 if (!LHSLocation) 1723 return std::nullopt; 1724 1725 CharSourceRange PlusWithSurroundingWhitespace = 1726 clang::CharSourceRange::getCharRange(*LHSLocation, RHS->getBeginLoc()); 1727 1728 std::optional<SourceLocation> AddOpLocation = 1729 getPastLoc(AddOp, SM, LangOpts); 1730 std::optional<SourceLocation> DerefOpLocation = 1731 getPastLoc(DerefOp, SM, LangOpts); 1732 1733 if (!AddOpLocation || !DerefOpLocation) 1734 return std::nullopt; 1735 1736 CharSourceRange ClosingParenWithPrecWhitespace = 1737 clang::CharSourceRange::getCharRange(*AddOpLocation, *DerefOpLocation); 1738 1739 return FixItList{ 1740 {FixItHint::CreateRemoval(StarWithTrailWhitespace), 1741 FixItHint::CreateReplacement(PlusWithSurroundingWhitespace, "["), 1742 FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace, "]")}}; 1743 } 1744 return std::nullopt; // something wrong or unsupported, give up 1745 } 1746 1747 std::optional<FixItList> 1748 PointerDereferenceGadget::getFixits(const Strategy &S) const { 1749 const VarDecl *VD = cast<VarDecl>(BaseDeclRefExpr->getDecl()); 1750 switch (S.lookup(VD)) { 1751 case Strategy::Kind::Span: { 1752 ASTContext &Ctx = VD->getASTContext(); 1753 SourceManager &SM = Ctx.getSourceManager(); 1754 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0] 1755 // Deletes the *operand 1756 CharSourceRange derefRange = clang::CharSourceRange::getCharRange( 1757 Op->getBeginLoc(), Op->getBeginLoc().getLocWithOffset(1)); 1758 // Inserts the [0] 1759 if (auto LocPastOperand = 1760 getPastLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts())) { 1761 return FixItList{{FixItHint::CreateRemoval(derefRange), 1762 FixItHint::CreateInsertion(*LocPastOperand, "[0]")}}; 1763 } 1764 break; 1765 } 1766 case Strategy::Kind::Iterator: 1767 case Strategy::Kind::Array: 1768 case Strategy::Kind::Vector: 1769 llvm_unreachable("Strategy not implemented yet!"); 1770 case Strategy::Kind::Wontfix: 1771 llvm_unreachable("Invalid strategy!"); 1772 } 1773 1774 return std::nullopt; 1775 } 1776 1777 // Generates fix-its replacing an expression of the form UPC(DRE) with 1778 // `DRE.data()` 1779 std::optional<FixItList> UPCStandalonePointerGadget::getFixits(const Strategy &S) 1780 const { 1781 const auto VD = cast<VarDecl>(Node->getDecl()); 1782 switch (S.lookup(VD)) { 1783 case Strategy::Kind::Span: { 1784 ASTContext &Ctx = VD->getASTContext(); 1785 SourceManager &SM = Ctx.getSourceManager(); 1786 // Inserts the .data() after the DRE 1787 std::optional<SourceLocation> EndOfOperand = 1788 getPastLoc(Node, SM, Ctx.getLangOpts()); 1789 1790 if (EndOfOperand) 1791 return FixItList{{FixItHint::CreateInsertion( 1792 *EndOfOperand, ".data()")}}; 1793 // FIXME: Points inside a macro expansion. 1794 break; 1795 } 1796 case Strategy::Kind::Wontfix: 1797 case Strategy::Kind::Iterator: 1798 case Strategy::Kind::Array: 1799 case Strategy::Kind::Vector: 1800 llvm_unreachable("unsupported strategies for FixableGadgets"); 1801 } 1802 1803 return std::nullopt; 1804 } 1805 1806 // Generates fix-its replacing an expression of the form `&DRE[e]` with 1807 // `&DRE.data()[e]`: 1808 static std::optional<FixItList> 1809 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) { 1810 const auto *ArraySub = cast<ArraySubscriptExpr>(Node->getSubExpr()); 1811 const auto *DRE = cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts()); 1812 // FIXME: this `getASTContext` call is costly, we should pass the 1813 // ASTContext in: 1814 const ASTContext &Ctx = DRE->getDecl()->getASTContext(); 1815 const Expr *Idx = ArraySub->getIdx(); 1816 const SourceManager &SM = Ctx.getSourceManager(); 1817 const LangOptions &LangOpts = Ctx.getLangOpts(); 1818 std::stringstream SS; 1819 bool IdxIsLitZero = false; 1820 1821 if (auto ICE = Idx->getIntegerConstantExpr(Ctx)) 1822 if ((*ICE).isZero()) 1823 IdxIsLitZero = true; 1824 std::optional<StringRef> DreString = getExprText(DRE, SM, LangOpts); 1825 if (!DreString) 1826 return std::nullopt; 1827 1828 if (IdxIsLitZero) { 1829 // If the index is literal zero, we produce the most concise fix-it: 1830 SS << (*DreString).str() << ".data()"; 1831 } else { 1832 std::optional<StringRef> IndexString = getExprText(Idx, SM, LangOpts); 1833 if (!IndexString) 1834 return std::nullopt; 1835 1836 SS << "&" << (*DreString).str() << ".data()" 1837 << "[" << (*IndexString).str() << "]"; 1838 } 1839 return FixItList{ 1840 FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())}; 1841 } 1842 1843 std::optional<FixItList> 1844 UUCAddAssignGadget::getFixits(const Strategy &S) const { 1845 DeclUseList DREs = getClaimedVarUseSites(); 1846 1847 if (DREs.size() != 1) 1848 return std::nullopt; // In cases of `Ptr += n` where `Ptr` is not a DRE, we 1849 // give up 1850 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) { 1851 if (S.lookup(VD) == Strategy::Kind::Span) { 1852 FixItList Fixes; 1853 1854 const Stmt *AddAssignNode = getBaseStmt(); 1855 StringRef varName = VD->getName(); 1856 const ASTContext &Ctx = VD->getASTContext(); 1857 1858 if (!isNonNegativeIntegerExpr(Offset, VD, Ctx)) 1859 return std::nullopt; 1860 1861 // To transform UUC(p += n) to UUC(p = p.subspan(..)): 1862 bool NotParenExpr = 1863 (Offset->IgnoreParens()->getBeginLoc() == Offset->getBeginLoc()); 1864 std::string SS = varName.str() + " = " + varName.str() + ".subspan"; 1865 if (NotParenExpr) 1866 SS += "("; 1867 1868 std::optional<SourceLocation> AddAssignLocation = getEndCharLoc( 1869 AddAssignNode, Ctx.getSourceManager(), Ctx.getLangOpts()); 1870 if (!AddAssignLocation) 1871 return std::nullopt; 1872 1873 Fixes.push_back(FixItHint::CreateReplacement( 1874 SourceRange(AddAssignNode->getBeginLoc(), Node->getOperatorLoc()), 1875 SS)); 1876 if (NotParenExpr) 1877 Fixes.push_back(FixItHint::CreateInsertion( 1878 Offset->getEndLoc().getLocWithOffset(1), ")")); 1879 return Fixes; 1880 } 1881 } 1882 return std::nullopt; // Not in the cases that we can handle for now, give up. 1883 } 1884 1885 std::optional<FixItList> UPCPreIncrementGadget::getFixits(const Strategy &S) const { 1886 DeclUseList DREs = getClaimedVarUseSites(); 1887 1888 if (DREs.size() != 1) 1889 return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we 1890 // give up 1891 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) { 1892 if (S.lookup(VD) == Strategy::Kind::Span) { 1893 FixItList Fixes; 1894 std::stringstream SS; 1895 const Stmt *PreIncNode = getBaseStmt(); 1896 StringRef varName = VD->getName(); 1897 const ASTContext &Ctx = VD->getASTContext(); 1898 1899 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()): 1900 SS << "(" << varName.data() << " = " << varName.data() 1901 << ".subspan(1)).data()"; 1902 std::optional<SourceLocation> PreIncLocation = 1903 getEndCharLoc(PreIncNode, Ctx.getSourceManager(), Ctx.getLangOpts()); 1904 if (!PreIncLocation) 1905 return std::nullopt; 1906 1907 Fixes.push_back(FixItHint::CreateReplacement( 1908 SourceRange(PreIncNode->getBeginLoc(), *PreIncLocation), SS.str())); 1909 return Fixes; 1910 } 1911 } 1912 return std::nullopt; // Not in the cases that we can handle for now, give up. 1913 } 1914 1915 1916 // For a non-null initializer `Init` of `T *` type, this function returns 1917 // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it 1918 // to output stream. 1919 // In many cases, this function cannot figure out the actual extent `S`. It 1920 // then will use a place holder to replace `S` to ask users to fill `S` in. The 1921 // initializer shall be used to initialize a variable of type `std::span<T>`. 1922 // 1923 // FIXME: Support multi-level pointers 1924 // 1925 // Parameters: 1926 // `Init` a pointer to the initializer expression 1927 // `Ctx` a reference to the ASTContext 1928 static FixItList 1929 FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx, 1930 const StringRef UserFillPlaceHolder) { 1931 const SourceManager &SM = Ctx.getSourceManager(); 1932 const LangOptions &LangOpts = Ctx.getLangOpts(); 1933 1934 // If `Init` has a constant value that is (or equivalent to) a 1935 // NULL pointer, we use the default constructor to initialize the span 1936 // object, i.e., a `std:span` variable declaration with no initializer. 1937 // So the fix-it is just to remove the initializer. 1938 if (Init->isNullPointerConstant(Ctx, 1939 // FIXME: Why does this function not ask for `const ASTContext 1940 // &`? It should. Maybe worth an NFC patch later. 1941 Expr::NullPointerConstantValueDependence:: 1942 NPC_ValueDependentIsNotNull)) { 1943 std::optional<SourceLocation> InitLocation = 1944 getEndCharLoc(Init, SM, LangOpts); 1945 if (!InitLocation) 1946 return {}; 1947 1948 SourceRange SR(Init->getBeginLoc(), *InitLocation); 1949 1950 return {FixItHint::CreateRemoval(SR)}; 1951 } 1952 1953 FixItList FixIts{}; 1954 std::string ExtentText = UserFillPlaceHolder.data(); 1955 StringRef One = "1"; 1956 1957 // Insert `{` before `Init`: 1958 FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{")); 1959 // Try to get the data extent. Break into different cases: 1960 if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) { 1961 // In cases `Init` is `new T[n]` and there is no explicit cast over 1962 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects 1963 // of `T`. So the extent is `n` unless `n` has side effects. Similar but 1964 // simpler for the case where `Init` is `new T`. 1965 if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) { 1966 if (!Ext->HasSideEffects(Ctx)) { 1967 std::optional<StringRef> ExtentString = getExprText(Ext, SM, LangOpts); 1968 if (!ExtentString) 1969 return {}; 1970 ExtentText = *ExtentString; 1971 } 1972 } else if (!CxxNew->isArray()) 1973 // Although the initializer is not allocating a buffer, the pointer 1974 // variable could still be used in buffer access operations. 1975 ExtentText = One; 1976 } else if (const auto *CArrTy = Ctx.getAsConstantArrayType( 1977 Init->IgnoreImpCasts()->getType())) { 1978 // In cases `Init` is of an array type after stripping off implicit casts, 1979 // the extent is the array size. Note that if the array size is not a 1980 // constant, we cannot use it as the extent. 1981 ExtentText = getAPIntText(CArrTy->getSize()); 1982 } else { 1983 // In cases `Init` is of the form `&Var` after stripping of implicit 1984 // casts, where `&` is the built-in operator, the extent is 1. 1985 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts())) 1986 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf && 1987 isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr())) 1988 ExtentText = One; 1989 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`, 1990 // and explicit casting, etc. etc. 1991 } 1992 1993 SmallString<32> StrBuffer{}; 1994 std::optional<SourceLocation> LocPassInit = getPastLoc(Init, SM, LangOpts); 1995 1996 if (!LocPassInit) 1997 return {}; 1998 1999 StrBuffer.append(", "); 2000 StrBuffer.append(ExtentText); 2001 StrBuffer.append("}"); 2002 FixIts.push_back(FixItHint::CreateInsertion(*LocPassInit, StrBuffer.str())); 2003 return FixIts; 2004 } 2005 2006 #ifndef NDEBUG 2007 #define DEBUG_NOTE_DECL_FAIL(D, Msg) \ 2008 Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), "failed to produce fixit for declaration '" + (D)->getNameAsString() + "'" + (Msg)) 2009 #else 2010 #define DEBUG_NOTE_DECL_FAIL(D, Msg) 2011 #endif 2012 2013 // For the given variable declaration with a pointer-to-T type, returns the text 2014 // `std::span<T>`. If it is unable to generate the text, returns 2015 // `std::nullopt`. 2016 static std::optional<std::string> createSpanTypeForVarDecl(const VarDecl *VD, 2017 const ASTContext &Ctx) { 2018 assert(VD->getType()->isPointerType()); 2019 2020 std::optional<Qualifiers> PteTyQualifiers = std::nullopt; 2021 std::optional<std::string> PteTyText = getPointeeTypeText( 2022 VD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers); 2023 2024 if (!PteTyText) 2025 return std::nullopt; 2026 2027 std::string SpanTyText = "std::span<"; 2028 2029 SpanTyText.append(*PteTyText); 2030 // Append qualifiers to span element type if any: 2031 if (PteTyQualifiers) { 2032 SpanTyText.append(" "); 2033 SpanTyText.append(PteTyQualifiers->getAsString()); 2034 } 2035 SpanTyText.append(">"); 2036 return SpanTyText; 2037 } 2038 2039 // For a `VarDecl` of the form `T * var (= Init)?`, this 2040 // function generates fix-its that 2041 // 1) replace `T * var` with `std::span<T> var`; and 2042 // 2) change `Init` accordingly to a span constructor, if it exists. 2043 // 2044 // FIXME: support Multi-level pointers 2045 // 2046 // Parameters: 2047 // `D` a pointer the variable declaration node 2048 // `Ctx` a reference to the ASTContext 2049 // `UserFillPlaceHolder` the user-input placeholder text 2050 // Returns: 2051 // the non-empty fix-it list, if fix-its are successfuly generated; empty 2052 // list otherwise. 2053 static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx, 2054 const StringRef UserFillPlaceHolder, 2055 UnsafeBufferUsageHandler &Handler) { 2056 if (hasUnsupportedSpecifiers(D, Ctx.getSourceManager())) 2057 return {}; 2058 2059 FixItList FixIts{}; 2060 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(D, Ctx); 2061 2062 if (!SpanTyText) { 2063 DEBUG_NOTE_DECL_FAIL(D, " : failed to generate 'std::span' type"); 2064 return {}; 2065 } 2066 2067 // Will hold the text for `std::span<T> Ident`: 2068 std::stringstream SS; 2069 2070 SS << *SpanTyText; 2071 // Append qualifiers to the type of `D`, if any: 2072 if (D->getType().hasQualifiers()) 2073 SS << " " << D->getType().getQualifiers().getAsString(); 2074 2075 // The end of the range of the original source that will be replaced 2076 // by `std::span<T> ident`: 2077 SourceLocation EndLocForReplacement = D->getEndLoc(); 2078 std::optional<StringRef> IdentText = 2079 getVarDeclIdentifierText(D, Ctx.getSourceManager(), Ctx.getLangOpts()); 2080 2081 if (!IdentText) { 2082 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier"); 2083 return {}; 2084 } 2085 // Fix the initializer if it exists: 2086 if (const Expr *Init = D->getInit()) { 2087 FixItList InitFixIts = 2088 FixVarInitializerWithSpan(Init, Ctx, UserFillPlaceHolder); 2089 if (InitFixIts.empty()) 2090 return {}; 2091 FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts.begin()), 2092 std::make_move_iterator(InitFixIts.end())); 2093 // If the declaration has the form `T *ident = init`, we want to replace 2094 // `T *ident = ` with `std::span<T> ident`: 2095 EndLocForReplacement = Init->getBeginLoc().getLocWithOffset(-1); 2096 } 2097 SS << " " << IdentText->str(); 2098 if (!EndLocForReplacement.isValid()) { 2099 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration"); 2100 return {}; 2101 } 2102 FixIts.push_back(FixItHint::CreateReplacement( 2103 SourceRange(D->getBeginLoc(), EndLocForReplacement), SS.str())); 2104 return FixIts; 2105 } 2106 2107 static bool hasConflictingOverload(const FunctionDecl *FD) { 2108 return !FD->getDeclContext()->lookup(FD->getDeclName()).isSingleResult(); 2109 } 2110 2111 // For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new 2112 // types, this function produces fix-its to make the change self-contained. Let 2113 // 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the 2114 // entity defined by the `FunctionDecl` after the change to the parameters. 2115 // Fix-its produced by this function are 2116 // 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration 2117 // of 'F'; 2118 // 2. Create a declaration of "NewF" next to each declaration of `F`; 2119 // 3. Create a definition of "F" (as its' original definition is now belongs 2120 // to "NewF") next to its original definition. The body of the creating 2121 // definition calls to "NewF". 2122 // 2123 // Example: 2124 // 2125 // void f(int *p); // original declaration 2126 // void f(int *p) { // original definition 2127 // p[5]; 2128 // } 2129 // 2130 // To change the parameter `p` to be of `std::span<int>` type, we 2131 // also add overloads: 2132 // 2133 // [[clang::unsafe_buffer_usage]] void f(int *p); // original decl 2134 // void f(std::span<int> p); // added overload decl 2135 // void f(std::span<int> p) { // original def where param is changed 2136 // p[5]; 2137 // } 2138 // [[clang::unsafe_buffer_usage]] void f(int *p) { // added def 2139 // return f(std::span(p, <# size #>)); 2140 // } 2141 // 2142 static std::optional<FixItList> 2143 createOverloadsForFixedParams(const Strategy &S, const FunctionDecl *FD, 2144 const ASTContext &Ctx, 2145 UnsafeBufferUsageHandler &Handler) { 2146 // FIXME: need to make this conflict checking better: 2147 if (hasConflictingOverload(FD)) 2148 return std::nullopt; 2149 2150 const SourceManager &SM = Ctx.getSourceManager(); 2151 const LangOptions &LangOpts = Ctx.getLangOpts(); 2152 const unsigned NumParms = FD->getNumParams(); 2153 std::vector<std::string> NewTysTexts(NumParms); 2154 std::vector<bool> ParmsMask(NumParms, false); 2155 bool AtLeastOneParmToFix = false; 2156 2157 for (unsigned i = 0; i < NumParms; i++) { 2158 const ParmVarDecl *PVD = FD->getParamDecl(i); 2159 2160 if (S.lookup(PVD) == Strategy::Kind::Wontfix) 2161 continue; 2162 if (S.lookup(PVD) != Strategy::Kind::Span) 2163 // Not supported, not suppose to happen: 2164 return std::nullopt; 2165 2166 std::optional<Qualifiers> PteTyQuals = std::nullopt; 2167 std::optional<std::string> PteTyText = 2168 getPointeeTypeText(PVD, SM, LangOpts, &PteTyQuals); 2169 2170 if (!PteTyText) 2171 // something wrong in obtaining the text of the pointee type, give up 2172 return std::nullopt; 2173 // FIXME: whether we should create std::span type depends on the Strategy. 2174 NewTysTexts[i] = getSpanTypeText(*PteTyText, PteTyQuals); 2175 ParmsMask[i] = true; 2176 AtLeastOneParmToFix = true; 2177 } 2178 if (!AtLeastOneParmToFix) 2179 // No need to create function overloads: 2180 return {}; 2181 // FIXME Respect indentation of the original code. 2182 2183 // A lambda that creates the text representation of a function declaration 2184 // with the new type signatures: 2185 const auto NewOverloadSignatureCreator = 2186 [&SM, &LangOpts, &NewTysTexts, 2187 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> { 2188 std::stringstream SS; 2189 2190 SS << ";"; 2191 SS << getEndOfLine().str(); 2192 // Append: ret-type func-name "(" 2193 if (auto Prefix = getRangeText( 2194 SourceRange(FD->getBeginLoc(), (*FD->param_begin())->getBeginLoc()), 2195 SM, LangOpts)) 2196 SS << Prefix->str(); 2197 else 2198 return std::nullopt; // give up 2199 // Append: parameter-type-list 2200 const unsigned NumParms = FD->getNumParams(); 2201 2202 for (unsigned i = 0; i < NumParms; i++) { 2203 const ParmVarDecl *Parm = FD->getParamDecl(i); 2204 2205 if (Parm->isImplicit()) 2206 continue; 2207 if (ParmsMask[i]) { 2208 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its 2209 // new type: 2210 SS << NewTysTexts[i]; 2211 // print parameter name if provided: 2212 if (IdentifierInfo *II = Parm->getIdentifier()) 2213 SS << ' ' << II->getName().str(); 2214 } else if (auto ParmTypeText = getRangeText( 2215 getSourceRangeToTokenEnd(Parm, SM, LangOpts), 2216 SM, LangOpts)) { 2217 // print the whole `Parm` without modification: 2218 SS << ParmTypeText->str(); 2219 } else 2220 return std::nullopt; // something wrong, give up 2221 if (i != NumParms - 1) 2222 SS << ", "; 2223 } 2224 SS << ")"; 2225 return SS.str(); 2226 }; 2227 2228 // A lambda that creates the text representation of a function definition with 2229 // the original signature: 2230 const auto OldOverloadDefCreator = 2231 [&Handler, &SM, &LangOpts, &NewTysTexts, 2232 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> { 2233 std::stringstream SS; 2234 2235 SS << getEndOfLine().str(); 2236 // Append: attr-name ret-type func-name "(" param-list ")" "{" 2237 if (auto FDPrefix = getRangeText( 2238 SourceRange(FD->getBeginLoc(), FD->getBody()->getBeginLoc()), SM, 2239 LangOpts)) 2240 SS << Handler.getUnsafeBufferUsageAttributeTextAt(FD->getBeginLoc(), " ") 2241 << FDPrefix->str() << "{"; 2242 else 2243 return std::nullopt; 2244 // Append: "return" func-name "(" 2245 if (auto FunQualName = getFunNameText(FD, SM, LangOpts)) 2246 SS << "return " << FunQualName->str() << "("; 2247 else 2248 return std::nullopt; 2249 2250 // Append: arg-list 2251 const unsigned NumParms = FD->getNumParams(); 2252 for (unsigned i = 0; i < NumParms; i++) { 2253 const ParmVarDecl *Parm = FD->getParamDecl(i); 2254 2255 if (Parm->isImplicit()) 2256 continue; 2257 // FIXME: If a parameter has no name, it is unused in the 2258 // definition. So we could just leave it as it is. 2259 if (!Parm->getIdentifier()) 2260 // If a parameter of a function definition has no name: 2261 return std::nullopt; 2262 if (ParmsMask[i]) 2263 // This is our spanified paramter! 2264 SS << NewTysTexts[i] << "(" << Parm->getIdentifier()->getName().str() 2265 << ", " << getUserFillPlaceHolder("size") << ")"; 2266 else 2267 SS << Parm->getIdentifier()->getName().str(); 2268 if (i != NumParms - 1) 2269 SS << ", "; 2270 } 2271 // finish call and the body 2272 SS << ");}" << getEndOfLine().str(); 2273 // FIXME: 80-char line formatting? 2274 return SS.str(); 2275 }; 2276 2277 FixItList FixIts{}; 2278 for (FunctionDecl *FReDecl : FD->redecls()) { 2279 std::optional<SourceLocation> Loc = getPastLoc(FReDecl, SM, LangOpts); 2280 2281 if (!Loc) 2282 return {}; 2283 if (FReDecl->isThisDeclarationADefinition()) { 2284 assert(FReDecl == FD && "inconsistent function definition"); 2285 // Inserts a definition with the old signature to the end of 2286 // `FReDecl`: 2287 if (auto OldOverloadDef = OldOverloadDefCreator(FReDecl)) 2288 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *OldOverloadDef)); 2289 else 2290 return {}; // give up 2291 } else { 2292 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`: 2293 if (!FReDecl->hasAttr<UnsafeBufferUsageAttr>()) { 2294 FixIts.emplace_back(FixItHint::CreateInsertion( 2295 FReDecl->getBeginLoc(), Handler.getUnsafeBufferUsageAttributeTextAt( 2296 FReDecl->getBeginLoc(), " "))); 2297 } 2298 // Inserts a declaration with the new signature to the end of `FReDecl`: 2299 if (auto NewOverloadDecl = NewOverloadSignatureCreator(FReDecl)) 2300 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *NewOverloadDecl)); 2301 else 2302 return {}; 2303 } 2304 } 2305 return FixIts; 2306 } 2307 2308 // To fix a `ParmVarDecl` to be of `std::span` type. 2309 static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx, 2310 UnsafeBufferUsageHandler &Handler) { 2311 if (hasUnsupportedSpecifiers(PVD, Ctx.getSourceManager())) { 2312 DEBUG_NOTE_DECL_FAIL(PVD, " : has unsupport specifier(s)"); 2313 return {}; 2314 } 2315 if (PVD->hasDefaultArg()) { 2316 // FIXME: generate fix-its for default values: 2317 DEBUG_NOTE_DECL_FAIL(PVD, " : has default arg"); 2318 return {}; 2319 } 2320 2321 std::optional<Qualifiers> PteTyQualifiers = std::nullopt; 2322 std::optional<std::string> PteTyText = getPointeeTypeText( 2323 PVD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers); 2324 2325 if (!PteTyText) { 2326 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid pointee type"); 2327 return {}; 2328 } 2329 2330 std::optional<StringRef> PVDNameText = PVD->getIdentifier()->getName(); 2331 2332 if (!PVDNameText) { 2333 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid identifier name"); 2334 return {}; 2335 } 2336 2337 std::stringstream SS; 2338 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(PVD, Ctx); 2339 2340 if (PteTyQualifiers) 2341 // Append qualifiers if they exist: 2342 SS << getSpanTypeText(*PteTyText, PteTyQualifiers); 2343 else 2344 SS << getSpanTypeText(*PteTyText); 2345 // Append qualifiers to the type of the parameter: 2346 if (PVD->getType().hasQualifiers()) 2347 SS << ' ' << PVD->getType().getQualifiers().getAsString(); 2348 // Append parameter's name: 2349 SS << ' ' << PVDNameText->str(); 2350 // Add replacement fix-it: 2351 return {FixItHint::CreateReplacement(PVD->getSourceRange(), SS.str())}; 2352 } 2353 2354 static FixItList fixVariableWithSpan(const VarDecl *VD, 2355 const DeclUseTracker &Tracker, 2356 ASTContext &Ctx, 2357 UnsafeBufferUsageHandler &Handler) { 2358 const DeclStmt *DS = Tracker.lookupDecl(VD); 2359 if (!DS) { 2360 DEBUG_NOTE_DECL_FAIL(VD, " : variables declared this way not implemented yet"); 2361 return {}; 2362 } 2363 if (!DS->isSingleDecl()) { 2364 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt` 2365 DEBUG_NOTE_DECL_FAIL(VD, " : multiple VarDecls"); 2366 return {}; 2367 } 2368 // Currently DS is an unused variable but we'll need it when 2369 // non-single decls are implemented, where the pointee type name 2370 // and the '*' are spread around the place. 2371 (void)DS; 2372 2373 // FIXME: handle cases where DS has multiple declarations 2374 return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler); 2375 } 2376 2377 // TODO: we should be consistent to use `std::nullopt` to represent no-fix due 2378 // to any unexpected problem. 2379 static FixItList 2380 fixVariable(const VarDecl *VD, Strategy::Kind K, 2381 /* The function decl under analysis */ const Decl *D, 2382 const DeclUseTracker &Tracker, ASTContext &Ctx, 2383 UnsafeBufferUsageHandler &Handler) { 2384 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) { 2385 auto *FD = dyn_cast<clang::FunctionDecl>(PVD->getDeclContext()); 2386 if (!FD || FD != D) { 2387 // `FD != D` means that `PVD` belongs to a function that is not being 2388 // analyzed currently. Thus `FD` may not be complete. 2389 DEBUG_NOTE_DECL_FAIL(VD, " : function not currently analyzed"); 2390 return {}; 2391 } 2392 2393 // TODO If function has a try block we can't change params unless we check 2394 // also its catch block for their use. 2395 // FIXME We might support static class methods, some select methods, 2396 // operators and possibly lamdas. 2397 if (FD->isMain() || FD->isConstexpr() || 2398 FD->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate || 2399 FD->isVariadic() || 2400 // also covers call-operator of lamdas 2401 isa<CXXMethodDecl>(FD) || 2402 // skip when the function body is a try-block 2403 (FD->hasBody() && isa<CXXTryStmt>(FD->getBody())) || 2404 FD->isOverloadedOperator()) { 2405 DEBUG_NOTE_DECL_FAIL(VD, " : unsupported function decl"); 2406 return {}; // TODO test all these cases 2407 } 2408 } 2409 2410 switch (K) { 2411 case Strategy::Kind::Span: { 2412 if (VD->getType()->isPointerType()) { 2413 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) 2414 return fixParamWithSpan(PVD, Ctx, Handler); 2415 2416 if (VD->isLocalVarDecl()) 2417 return fixVariableWithSpan(VD, Tracker, Ctx, Handler); 2418 } 2419 DEBUG_NOTE_DECL_FAIL(VD, " : not a pointer"); 2420 return {}; 2421 } 2422 case Strategy::Kind::Iterator: 2423 case Strategy::Kind::Array: 2424 case Strategy::Kind::Vector: 2425 llvm_unreachable("Strategy not implemented yet!"); 2426 case Strategy::Kind::Wontfix: 2427 llvm_unreachable("Invalid strategy!"); 2428 } 2429 llvm_unreachable("Unknown strategy!"); 2430 } 2431 2432 // Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the 2433 // `RemoveRange` of 'h' overlaps with a macro use. 2434 static bool overlapWithMacro(const FixItList &FixIts) { 2435 // FIXME: For now we only check if the range (or the first token) is (part of) 2436 // a macro expansion. Ideally, we want to check for all tokens in the range. 2437 return llvm::any_of(FixIts, [](const FixItHint &Hint) { 2438 auto Range = Hint.RemoveRange; 2439 if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) 2440 // If the range (or the first token) is (part of) a macro expansion: 2441 return true; 2442 return false; 2443 }); 2444 } 2445 2446 // Returns true iff `VD` is a parameter of the declaration `D`: 2447 static bool isParameterOf(const VarDecl *VD, const Decl *D) { 2448 return isa<ParmVarDecl>(VD) && 2449 VD->getDeclContext() == dyn_cast<DeclContext>(D); 2450 } 2451 2452 // Erases variables in `FixItsForVariable`, if such a variable has an unfixable 2453 // group mate. A variable `v` is unfixable iff `FixItsForVariable` does not 2454 // contain `v`. 2455 static void eraseVarsForUnfixableGroupMates( 2456 std::map<const VarDecl *, FixItList> &FixItsForVariable, 2457 const VariableGroupsManager &VarGrpMgr) { 2458 // Variables will be removed from `FixItsForVariable`: 2459 SmallVector<const VarDecl *, 8> ToErase; 2460 2461 for (const auto &[VD, Ignore] : FixItsForVariable) { 2462 VarGrpRef Grp = VarGrpMgr.getGroupOfVar(VD); 2463 if (llvm::any_of(Grp, 2464 [&FixItsForVariable](const VarDecl *GrpMember) -> bool { 2465 return !FixItsForVariable.count(GrpMember); 2466 })) { 2467 // At least one group member cannot be fixed, so we have to erase the 2468 // whole group: 2469 for (const VarDecl *Member : Grp) 2470 ToErase.push_back(Member); 2471 } 2472 } 2473 for (auto *VarToErase : ToErase) 2474 FixItsForVariable.erase(VarToErase); 2475 } 2476 2477 // Returns the fix-its that create bounds-safe function overloads for the 2478 // function `D`, if `D`'s parameters will be changed to safe-types through 2479 // fix-its in `FixItsForVariable`. 2480 // 2481 // NOTE: In case `D`'s parameters will be changed but bounds-safe function 2482 // overloads cannot created, the whole group that contains the parameters will 2483 // be erased from `FixItsForVariable`. 2484 static FixItList createFunctionOverloadsForParms( 2485 std::map<const VarDecl *, FixItList> &FixItsForVariable /* mutable */, 2486 const VariableGroupsManager &VarGrpMgr, const FunctionDecl *FD, 2487 const Strategy &S, ASTContext &Ctx, UnsafeBufferUsageHandler &Handler) { 2488 FixItList FixItsSharedByParms{}; 2489 2490 std::optional<FixItList> OverloadFixes = 2491 createOverloadsForFixedParams(S, FD, Ctx, Handler); 2492 2493 if (OverloadFixes) { 2494 FixItsSharedByParms.append(*OverloadFixes); 2495 } else { 2496 // Something wrong in generating `OverloadFixes`, need to remove the 2497 // whole group, where parameters are in, from `FixItsForVariable` (Note 2498 // that all parameters should be in the same group): 2499 for (auto *Member : VarGrpMgr.getGroupOfParms()) 2500 FixItsForVariable.erase(Member); 2501 } 2502 return FixItsSharedByParms; 2503 } 2504 2505 // Constructs self-contained fix-its for each variable in `FixablesForAllVars`. 2506 static std::map<const VarDecl *, FixItList> 2507 getFixIts(FixableGadgetSets &FixablesForAllVars, const Strategy &S, 2508 ASTContext &Ctx, 2509 /* The function decl under analysis */ const Decl *D, 2510 const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler, 2511 const VariableGroupsManager &VarGrpMgr) { 2512 // `FixItsForVariable` will map each variable to a set of fix-its directly 2513 // associated to the variable itself. Fix-its of distinct variables in 2514 // `FixItsForVariable` are disjoint. 2515 std::map<const VarDecl *, FixItList> FixItsForVariable; 2516 2517 // Populate `FixItsForVariable` with fix-its directly associated with each 2518 // variable. Fix-its directly associated to a variable 'v' are the ones 2519 // produced by the `FixableGadget`s whose claimed variable is 'v'. 2520 for (const auto &[VD, Fixables] : FixablesForAllVars.byVar) { 2521 FixItsForVariable[VD] = 2522 fixVariable(VD, S.lookup(VD), D, Tracker, Ctx, Handler); 2523 // If we fail to produce Fix-It for the declaration we have to skip the 2524 // variable entirely. 2525 if (FixItsForVariable[VD].empty()) { 2526 FixItsForVariable.erase(VD); 2527 continue; 2528 } 2529 for (const auto &F : Fixables) { 2530 std::optional<FixItList> Fixits = F->getFixits(S); 2531 2532 if (Fixits) { 2533 FixItsForVariable[VD].insert(FixItsForVariable[VD].end(), 2534 Fixits->begin(), Fixits->end()); 2535 continue; 2536 } 2537 #ifndef NDEBUG 2538 Handler.addDebugNoteForVar( 2539 VD, F->getBaseStmt()->getBeginLoc(), 2540 ("gadget '" + F->getDebugName() + "' refused to produce a fix") 2541 .str()); 2542 #endif 2543 FixItsForVariable.erase(VD); 2544 break; 2545 } 2546 } 2547 2548 // `FixItsForVariable` now contains only variables that can be 2549 // fixed. A variable can be fixed if its' declaration and all Fixables 2550 // associated to it can all be fixed. 2551 2552 // To further remove from `FixItsForVariable` variables whose group mates 2553 // cannot be fixed... 2554 eraseVarsForUnfixableGroupMates(FixItsForVariable, VarGrpMgr); 2555 // Now `FixItsForVariable` gets further reduced: a variable is in 2556 // `FixItsForVariable` iff it can be fixed and all its group mates can be 2557 // fixed. 2558 2559 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`. 2560 // That is, when fixing multiple parameters in one step, these fix-its will 2561 // be applied only once (instead of being applied per parameter). 2562 FixItList FixItsSharedByParms{}; 2563 2564 if (auto *FD = dyn_cast<FunctionDecl>(D)) 2565 FixItsSharedByParms = createFunctionOverloadsForParms( 2566 FixItsForVariable, VarGrpMgr, FD, S, Ctx, Handler); 2567 2568 // The map that maps each variable `v` to fix-its for the whole group where 2569 // `v` is in: 2570 std::map<const VarDecl *, FixItList> FinalFixItsForVariable{ 2571 FixItsForVariable}; 2572 2573 for (auto &[Var, Ignore] : FixItsForVariable) { 2574 bool AnyParm = false; 2575 const auto VarGroupForVD = VarGrpMgr.getGroupOfVar(Var, &AnyParm); 2576 2577 for (const VarDecl *GrpMate : VarGroupForVD) { 2578 if (Var == GrpMate) 2579 continue; 2580 if (FixItsForVariable.count(GrpMate)) 2581 FinalFixItsForVariable[Var].append(FixItsForVariable[GrpMate]); 2582 } 2583 if (AnyParm) { 2584 // This assertion should never fail. Otherwise we have a bug. 2585 assert(!FixItsSharedByParms.empty() && 2586 "Should not try to fix a parameter that does not belong to a " 2587 "FunctionDecl"); 2588 FinalFixItsForVariable[Var].append(FixItsSharedByParms); 2589 } 2590 } 2591 // Fix-its that will be applied in one step shall NOT: 2592 // 1. overlap with macros or/and templates; or 2593 // 2. conflict with each other. 2594 // Otherwise, the fix-its will be dropped. 2595 for (auto Iter = FinalFixItsForVariable.begin(); 2596 Iter != FinalFixItsForVariable.end();) 2597 if (overlapWithMacro(Iter->second) || 2598 clang::internal::anyConflict(Iter->second, Ctx.getSourceManager())) { 2599 Iter = FinalFixItsForVariable.erase(Iter); 2600 } else 2601 Iter++; 2602 return FinalFixItsForVariable; 2603 } 2604 2605 template <typename VarDeclIterTy> 2606 static Strategy 2607 getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars) { 2608 Strategy S; 2609 for (const VarDecl *VD : UnsafeVars) { 2610 S.set(VD, Strategy::Kind::Span); 2611 } 2612 return S; 2613 } 2614 2615 // Manages variable groups: 2616 class VariableGroupsManagerImpl : public VariableGroupsManager { 2617 const std::vector<VarGrpTy> Groups; 2618 const std::map<const VarDecl *, unsigned> &VarGrpMap; 2619 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms; 2620 2621 public: 2622 VariableGroupsManagerImpl( 2623 const std::vector<VarGrpTy> &Groups, 2624 const std::map<const VarDecl *, unsigned> &VarGrpMap, 2625 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms) 2626 : Groups(Groups), VarGrpMap(VarGrpMap), 2627 GrpsUnionForParms(GrpsUnionForParms) {} 2628 2629 VarGrpRef getGroupOfVar(const VarDecl *Var, bool *HasParm) const override { 2630 if (GrpsUnionForParms.contains(Var)) { 2631 if (HasParm) 2632 *HasParm = true; 2633 return GrpsUnionForParms.getArrayRef(); 2634 } 2635 if (HasParm) 2636 *HasParm = false; 2637 2638 auto It = VarGrpMap.find(Var); 2639 2640 if (It == VarGrpMap.end()) 2641 return std::nullopt; 2642 return Groups[It->second]; 2643 } 2644 2645 VarGrpRef getGroupOfParms() const override { 2646 return GrpsUnionForParms.getArrayRef(); 2647 } 2648 }; 2649 2650 void clang::checkUnsafeBufferUsage(const Decl *D, 2651 UnsafeBufferUsageHandler &Handler, 2652 bool EmitSuggestions) { 2653 #ifndef NDEBUG 2654 Handler.clearDebugNotes(); 2655 #endif 2656 2657 assert(D && D->getBody()); 2658 // We do not want to visit a Lambda expression defined inside a method independently. 2659 // Instead, it should be visited along with the outer method. 2660 // FIXME: do we want to do the same thing for `BlockDecl`s? 2661 if (const auto *fd = dyn_cast<CXXMethodDecl>(D)) { 2662 if (fd->getParent()->isLambda() && fd->getParent()->isLocalClass()) 2663 return; 2664 } 2665 2666 // Do not emit fixit suggestions for functions declared in an 2667 // extern "C" block. 2668 if (const auto *FD = dyn_cast<FunctionDecl>(D)) { 2669 for (FunctionDecl *FReDecl : FD->redecls()) { 2670 if (FReDecl->isExternC()) { 2671 EmitSuggestions = false; 2672 break; 2673 } 2674 } 2675 } 2676 2677 WarningGadgetSets UnsafeOps; 2678 FixableGadgetSets FixablesForAllVars; 2679 2680 auto [FixableGadgets, WarningGadgets, Tracker] = 2681 findGadgets(D, Handler, EmitSuggestions); 2682 2683 if (!EmitSuggestions) { 2684 // Our job is very easy without suggestions. Just warn about 2685 // every problematic operation and consider it done. No need to deal 2686 // with fixable gadgets, no need to group operations by variable. 2687 for (const auto &G : WarningGadgets) { 2688 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false, 2689 D->getASTContext()); 2690 } 2691 2692 // This return guarantees that most of the machine doesn't run when 2693 // suggestions aren't requested. 2694 assert(FixableGadgets.size() == 0 && 2695 "Fixable gadgets found but suggestions not requested!"); 2696 return; 2697 } 2698 2699 // If no `WarningGadget`s ever matched, there is no unsafe operations in the 2700 // function under the analysis. No need to fix any Fixables. 2701 if (!WarningGadgets.empty()) { 2702 // Gadgets "claim" variables they're responsible for. Once this loop 2703 // finishes, the tracker will only track DREs that weren't claimed by any 2704 // gadgets, i.e. not understood by the analysis. 2705 for (const auto &G : FixableGadgets) { 2706 for (const auto *DRE : G->getClaimedVarUseSites()) { 2707 Tracker.claimUse(DRE); 2708 } 2709 } 2710 } 2711 2712 // If no `WarningGadget`s ever matched, there is no unsafe operations in the 2713 // function under the analysis. Thus, it early returns here as there is 2714 // nothing needs to be fixed. 2715 // 2716 // Note this claim is based on the assumption that there is no unsafe 2717 // variable whose declaration is invisible from the analyzing function. 2718 // Otherwise, we need to consider if the uses of those unsafe varuables needs 2719 // fix. 2720 // So far, we are not fixing any global variables or class members. And, 2721 // lambdas will be analyzed along with the enclosing function. So this early 2722 // return is correct for now. 2723 if (WarningGadgets.empty()) 2724 return; 2725 2726 UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets)); 2727 FixablesForAllVars = groupFixablesByVar(std::move(FixableGadgets)); 2728 2729 std::map<const VarDecl *, FixItList> FixItsForVariableGroup; 2730 2731 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s. 2732 for (auto it = FixablesForAllVars.byVar.cbegin(); 2733 it != FixablesForAllVars.byVar.cend();) { 2734 // FIXME: need to deal with global variables later 2735 if ((!it->first->isLocalVarDecl() && !isa<ParmVarDecl>(it->first))) { 2736 #ifndef NDEBUG 2737 Handler.addDebugNoteForVar( 2738 it->first, it->first->getBeginLoc(), 2739 ("failed to produce fixit for '" + it->first->getNameAsString() + 2740 "' : neither local nor a parameter")); 2741 #endif 2742 it = FixablesForAllVars.byVar.erase(it); 2743 } else if (it->first->getType().getCanonicalType()->isReferenceType()) { 2744 #ifndef NDEBUG 2745 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(), 2746 ("failed to produce fixit for '" + 2747 it->first->getNameAsString() + 2748 "' : has a reference type")); 2749 #endif 2750 it = FixablesForAllVars.byVar.erase(it); 2751 } else if (Tracker.hasUnclaimedUses(it->first)) { 2752 #ifndef NDEBUG 2753 auto AllUnclaimed = Tracker.getUnclaimedUses(it->first); 2754 for (auto UnclaimedDRE : AllUnclaimed) { 2755 std::string UnclaimedUseTrace = 2756 getDREAncestorString(UnclaimedDRE, D->getASTContext()); 2757 2758 Handler.addDebugNoteForVar( 2759 it->first, UnclaimedDRE->getBeginLoc(), 2760 ("failed to produce fixit for '" + it->first->getNameAsString() + 2761 "' : has an unclaimed use\nThe unclaimed DRE trace: " + 2762 UnclaimedUseTrace)); 2763 } 2764 #endif 2765 it = FixablesForAllVars.byVar.erase(it); 2766 } else if (it->first->isInitCapture()) { 2767 #ifndef NDEBUG 2768 Handler.addDebugNoteForVar( 2769 it->first, it->first->getBeginLoc(), 2770 ("failed to produce fixit for '" + it->first->getNameAsString() + 2771 "' : init capture")); 2772 #endif 2773 it = FixablesForAllVars.byVar.erase(it); 2774 }else { 2775 ++it; 2776 } 2777 } 2778 2779 // Fixpoint iteration for pointer assignments 2780 using DepMapTy = DenseMap<const VarDecl *, llvm::SetVector<const VarDecl *>>; 2781 DepMapTy DependenciesMap{}; 2782 DepMapTy PtrAssignmentGraph{}; 2783 2784 for (auto it : FixablesForAllVars.byVar) { 2785 for (const FixableGadget *fixable : it.second) { 2786 std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair = 2787 fixable->getStrategyImplications(); 2788 if (ImplPair) { 2789 std::pair<const VarDecl *, const VarDecl *> Impl = std::move(*ImplPair); 2790 PtrAssignmentGraph[Impl.first].insert(Impl.second); 2791 } 2792 } 2793 } 2794 2795 /* 2796 The following code does a BFS traversal of the `PtrAssignmentGraph` 2797 considering all unsafe vars as starting nodes and constructs an undirected 2798 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner 2799 elimiates all variables that are unreachable from any unsafe var. In other 2800 words, this removes all dependencies that don't include any unsafe variable 2801 and consequently don't need any fixit generation. 2802 Note: A careful reader would observe that the code traverses 2803 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and 2804 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would 2805 achieve the same result but the one used here dramatically cuts the 2806 amount of hoops the second part of the algorithm needs to jump, given that 2807 a lot of these connections become "direct". The reader is advised not to 2808 imagine how the graph is transformed because of using `Var` instead of 2809 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used, 2810 and think about why it's equivalent later. 2811 */ 2812 std::set<const VarDecl *> VisitedVarsDirected{}; 2813 for (const auto &[Var, ignore] : UnsafeOps.byVar) { 2814 if (VisitedVarsDirected.find(Var) == VisitedVarsDirected.end()) { 2815 2816 std::queue<const VarDecl*> QueueDirected{}; 2817 QueueDirected.push(Var); 2818 while(!QueueDirected.empty()) { 2819 const VarDecl* CurrentVar = QueueDirected.front(); 2820 QueueDirected.pop(); 2821 VisitedVarsDirected.insert(CurrentVar); 2822 auto AdjacentNodes = PtrAssignmentGraph[CurrentVar]; 2823 for (const VarDecl *Adj : AdjacentNodes) { 2824 if (VisitedVarsDirected.find(Adj) == VisitedVarsDirected.end()) { 2825 QueueDirected.push(Adj); 2826 } 2827 DependenciesMap[Var].insert(Adj); 2828 DependenciesMap[Adj].insert(Var); 2829 } 2830 } 2831 } 2832 } 2833 2834 // `Groups` stores the set of Connected Components in the graph. 2835 std::vector<VarGrpTy> Groups; 2836 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the 2837 // variables belong to. Group indexes refer to the elements in `Groups`. 2838 // `VarGrpMap` is complete in that every variable that needs fix is in it. 2839 std::map<const VarDecl *, unsigned> VarGrpMap; 2840 // The union group over the ones in "Groups" that contain parameters of `D`: 2841 llvm::SetVector<const VarDecl *> 2842 GrpsUnionForParms; // these variables need to be fixed in one step 2843 2844 // Group Connected Components for Unsafe Vars 2845 // (Dependencies based on pointer assignments) 2846 std::set<const VarDecl *> VisitedVars{}; 2847 for (const auto &[Var, ignore] : UnsafeOps.byVar) { 2848 if (VisitedVars.find(Var) == VisitedVars.end()) { 2849 VarGrpTy &VarGroup = Groups.emplace_back(); 2850 std::queue<const VarDecl*> Queue{}; 2851 2852 Queue.push(Var); 2853 while(!Queue.empty()) { 2854 const VarDecl* CurrentVar = Queue.front(); 2855 Queue.pop(); 2856 VisitedVars.insert(CurrentVar); 2857 VarGroup.push_back(CurrentVar); 2858 auto AdjacentNodes = DependenciesMap[CurrentVar]; 2859 for (const VarDecl *Adj : AdjacentNodes) { 2860 if (VisitedVars.find(Adj) == VisitedVars.end()) { 2861 Queue.push(Adj); 2862 } 2863 } 2864 } 2865 2866 bool HasParm = false; 2867 unsigned GrpIdx = Groups.size() - 1; 2868 2869 for (const VarDecl *V : VarGroup) { 2870 VarGrpMap[V] = GrpIdx; 2871 if (!HasParm && isParameterOf(V, D)) 2872 HasParm = true; 2873 } 2874 if (HasParm) 2875 GrpsUnionForParms.insert(VarGroup.begin(), VarGroup.end()); 2876 } 2877 } 2878 2879 // Remove a `FixableGadget` if the associated variable is not in the graph 2880 // computed above. We do not want to generate fix-its for such variables, 2881 // since they are neither warned nor reachable from a warned one. 2882 // 2883 // Note a variable is not warned if it is not directly used in any unsafe 2884 // operation. A variable `v` is NOT reachable from an unsafe variable, if it 2885 // does not exist another variable `u` such that `u` is warned and fixing `u` 2886 // (transitively) implicates fixing `v`. 2887 // 2888 // For example, 2889 // ``` 2890 // void f(int * p) { 2891 // int * a = p; *p = 0; 2892 // } 2893 // ``` 2894 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither 2895 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of 2896 // the function above, `p` becomes reachable from a warned variable. 2897 for (auto I = FixablesForAllVars.byVar.begin(); 2898 I != FixablesForAllVars.byVar.end();) { 2899 // Note `VisitedVars` contain all the variables in the graph: 2900 if (!VisitedVars.count((*I).first)) { 2901 // no such var in graph: 2902 I = FixablesForAllVars.byVar.erase(I); 2903 } else 2904 ++I; 2905 } 2906 2907 // We assign strategies to variables that are 1) in the graph and 2) can be 2908 // fixed. Other variables have the default "Won't fix" strategy. 2909 Strategy NaiveStrategy = getNaiveStrategy(llvm::make_filter_range( 2910 VisitedVars, [&FixablesForAllVars](const VarDecl *V) { 2911 // If a warned variable has no "Fixable", it is considered unfixable: 2912 return FixablesForAllVars.byVar.count(V); 2913 })); 2914 VariableGroupsManagerImpl VarGrpMgr(Groups, VarGrpMap, GrpsUnionForParms); 2915 2916 if (isa<NamedDecl>(D)) 2917 // The only case where `D` is not a `NamedDecl` is when `D` is a 2918 // `BlockDecl`. Let's not fix variables in blocks for now 2919 FixItsForVariableGroup = 2920 getFixIts(FixablesForAllVars, NaiveStrategy, D->getASTContext(), D, 2921 Tracker, Handler, VarGrpMgr); 2922 2923 for (const auto &G : UnsafeOps.noVar) { 2924 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false, 2925 D->getASTContext()); 2926 } 2927 2928 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) { 2929 auto FixItsIt = FixItsForVariableGroup.find(VD); 2930 Handler.handleUnsafeVariableGroup(VD, VarGrpMgr, 2931 FixItsIt != FixItsForVariableGroup.end() 2932 ? std::move(FixItsIt->second) 2933 : FixItList{}, 2934 D); 2935 for (const auto &G : WarningGadgets) { 2936 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true, 2937 D->getASTContext()); 2938 } 2939 } 2940 } 2941