1 //===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/Expr.h" 12 #include "clang/AST/RecursiveASTVisitor.h" 13 #include "clang/AST/StmtVisitor.h" 14 #include "clang/ASTMatchers/ASTMatchFinder.h" 15 #include "clang/Lex/Lexer.h" 16 #include "clang/Lex/Preprocessor.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include <memory> 19 #include <optional> 20 #include <sstream> 21 #include <queue> 22 23 using namespace llvm; 24 using namespace clang; 25 using namespace ast_matchers; 26 27 #ifndef NDEBUG 28 namespace { 29 class StmtDebugPrinter 30 : public ConstStmtVisitor<StmtDebugPrinter, std::string> { 31 public: 32 std::string VisitStmt(const Stmt *S) { return S->getStmtClassName(); } 33 34 std::string VisitBinaryOperator(const BinaryOperator *BO) { 35 return "BinaryOperator(" + BO->getOpcodeStr().str() + ")"; 36 } 37 38 std::string VisitUnaryOperator(const UnaryOperator *UO) { 39 return "UnaryOperator(" + UO->getOpcodeStr(UO->getOpcode()).str() + ")"; 40 } 41 42 std::string VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { 43 return "ImplicitCastExpr(" + std::string(ICE->getCastKindName()) + ")"; 44 } 45 }; 46 47 // Returns a string of ancestor `Stmt`s of the given `DRE` in such a form: 48 // "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...". 49 static std::string getDREAncestorString(const DeclRefExpr *DRE, 50 ASTContext &Ctx) { 51 std::stringstream SS; 52 const Stmt *St = DRE; 53 StmtDebugPrinter StmtPriner; 54 55 do { 56 SS << StmtPriner.Visit(St); 57 58 DynTypedNodeList StParents = Ctx.getParents(*St); 59 60 if (StParents.size() > 1) 61 return "unavailable due to multiple parents"; 62 if (StParents.size() == 0) 63 break; 64 St = StParents.begin()->get<Stmt>(); 65 if (St) 66 SS << " ==> "; 67 } while (St); 68 return SS.str(); 69 } 70 } // namespace 71 #endif /* NDEBUG */ 72 73 namespace clang::ast_matchers { 74 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n" 75 // except for those belonging to a different callable of "n". 76 class MatchDescendantVisitor 77 : public RecursiveASTVisitor<MatchDescendantVisitor> { 78 public: 79 typedef RecursiveASTVisitor<MatchDescendantVisitor> VisitorBase; 80 81 // Creates an AST visitor that matches `Matcher` on all 82 // descendants of a given node "n" except for the ones 83 // belonging to a different callable of "n". 84 MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher, 85 internal::ASTMatchFinder *Finder, 86 internal::BoundNodesTreeBuilder *Builder, 87 internal::ASTMatchFinder::BindKind Bind, 88 const bool ignoreUnevaluatedContext) 89 : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind), 90 Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext) {} 91 92 // Returns true if a match is found in a subtree of `DynNode`, which belongs 93 // to the same callable of `DynNode`. 94 bool findMatch(const DynTypedNode &DynNode) { 95 Matches = false; 96 if (const Stmt *StmtNode = DynNode.get<Stmt>()) { 97 TraverseStmt(const_cast<Stmt *>(StmtNode)); 98 *Builder = ResultBindings; 99 return Matches; 100 } 101 return false; 102 } 103 104 // The following are overriding methods from the base visitor class. 105 // They are public only to allow CRTP to work. They are *not *part 106 // of the public API of this class. 107 108 // For the matchers so far used in safe buffers, we only need to match 109 // `Stmt`s. To override more as needed. 110 111 bool TraverseDecl(Decl *Node) { 112 if (!Node) 113 return true; 114 if (!match(*Node)) 115 return false; 116 // To skip callables: 117 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Node)) 118 return true; 119 // Traverse descendants 120 return VisitorBase::TraverseDecl(Node); 121 } 122 123 bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) { 124 // These are unevaluated, except the result expression. 125 if(ignoreUnevaluatedContext) 126 return TraverseStmt(Node->getResultExpr()); 127 return VisitorBase::TraverseGenericSelectionExpr(Node); 128 } 129 130 bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) { 131 // Unevaluated context. 132 if(ignoreUnevaluatedContext) 133 return true; 134 return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node); 135 } 136 137 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) { 138 // Unevaluated context. 139 if(ignoreUnevaluatedContext) 140 return true; 141 return VisitorBase::TraverseTypeOfExprTypeLoc(Node); 142 } 143 144 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) { 145 // Unevaluated context. 146 if(ignoreUnevaluatedContext) 147 return true; 148 return VisitorBase::TraverseDecltypeTypeLoc(Node); 149 } 150 151 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) { 152 // Unevaluated context. 153 if(ignoreUnevaluatedContext) 154 return true; 155 return VisitorBase::TraverseCXXNoexceptExpr(Node); 156 } 157 158 bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) { 159 // Unevaluated context. 160 if(ignoreUnevaluatedContext) 161 return true; 162 return VisitorBase::TraverseCXXTypeidExpr(Node); 163 } 164 165 bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) { 166 if (!Node) 167 return true; 168 if (!match(*Node)) 169 return false; 170 return VisitorBase::TraverseStmt(Node); 171 } 172 173 bool shouldVisitTemplateInstantiations() const { return true; } 174 bool shouldVisitImplicitCode() const { 175 // TODO: let's ignore implicit code for now 176 return false; 177 } 178 179 private: 180 // Sets 'Matched' to true if 'Matcher' matches 'Node' 181 // 182 // Returns 'true' if traversal should continue after this function 183 // returns, i.e. if no match is found or 'Bind' is 'BK_All'. 184 template <typename T> bool match(const T &Node) { 185 internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder); 186 187 if (Matcher->matches(DynTypedNode::create(Node), Finder, 188 &RecursiveBuilder)) { 189 ResultBindings.addMatch(RecursiveBuilder); 190 Matches = true; 191 if (Bind != internal::ASTMatchFinder::BK_All) 192 return false; // Abort as soon as a match is found. 193 } 194 return true; 195 } 196 197 const internal::DynTypedMatcher *const Matcher; 198 internal::ASTMatchFinder *const Finder; 199 internal::BoundNodesTreeBuilder *const Builder; 200 internal::BoundNodesTreeBuilder ResultBindings; 201 const internal::ASTMatchFinder::BindKind Bind; 202 bool Matches; 203 bool ignoreUnevaluatedContext; 204 }; 205 206 // Because we're dealing with raw pointers, let's define what we mean by that. 207 static auto hasPointerType() { 208 return hasType(hasCanonicalType(pointerType())); 209 } 210 211 static auto hasArrayType() { 212 return hasType(hasCanonicalType(arrayType())); 213 } 214 215 AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher<Stmt>, innerMatcher) { 216 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher); 217 218 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, true); 219 return Visitor.findMatch(DynTypedNode::create(Node)); 220 } 221 222 AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher<Stmt>, innerMatcher) { 223 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher); 224 225 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, false); 226 return Visitor.findMatch(DynTypedNode::create(Node)); 227 } 228 229 // Matches a `Stmt` node iff the node is in a safe-buffer opt-out region 230 AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *, 231 Handler) { 232 return !Handler->isSafeBufferOptOut(Node.getBeginLoc()); 233 } 234 235 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) { 236 return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder); 237 } 238 239 // Matches a `UnaryOperator` whose operator is pre-increment: 240 AST_MATCHER(UnaryOperator, isPreInc) { 241 return Node.getOpcode() == UnaryOperator::Opcode::UO_PreInc; 242 } 243 244 // Returns a matcher that matches any expression 'e' such that `innerMatcher` 245 // matches 'e' and 'e' is in an Unspecified Lvalue Context. 246 static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) { 247 // clang-format off 248 return 249 expr(anyOf( 250 implicitCastExpr( 251 hasCastKind(CastKind::CK_LValueToRValue), 252 castSubExpr(innerMatcher)), 253 binaryOperator( 254 hasAnyOperatorName("="), 255 hasLHS(innerMatcher) 256 ) 257 )); 258 // clang-format on 259 } 260 261 262 // Returns a matcher that matches any expression `e` such that `InnerMatcher` 263 // matches `e` and `e` is in an Unspecified Pointer Context (UPC). 264 static internal::Matcher<Stmt> 265 isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) { 266 // A UPC can be 267 // 1. an argument of a function call (except the callee has [[unsafe_...]] 268 // attribute), or 269 // 2. the operand of a pointer-to-(integer or bool) cast operation; or 270 // 3. the operand of a comparator operation; or 271 // 4. the operand of a pointer subtraction operation 272 // (i.e., computing the distance between two pointers); or ... 273 274 auto CallArgMatcher = 275 callExpr(forEachArgumentWithParam(InnerMatcher, 276 hasPointerType() /* array also decays to pointer type*/), 277 unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))); 278 279 auto CastOperandMatcher = 280 castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral), 281 hasCastKind(CastKind::CK_PointerToBoolean)), 282 castSubExpr(allOf(hasPointerType(), InnerMatcher))); 283 284 auto CompOperandMatcher = 285 binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="), 286 eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher)), 287 hasRHS(allOf(hasPointerType(), InnerMatcher)))); 288 289 // A matcher that matches pointer subtractions: 290 auto PtrSubtractionMatcher = 291 binaryOperator(hasOperatorName("-"), 292 // Note that here we need both LHS and RHS to be 293 // pointer. Then the inner matcher can match any of 294 // them: 295 allOf(hasLHS(hasPointerType()), 296 hasRHS(hasPointerType())), 297 eachOf(hasLHS(InnerMatcher), 298 hasRHS(InnerMatcher))); 299 300 return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher, 301 PtrSubtractionMatcher)); 302 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we 303 // don't have to check that.) 304 } 305 306 // Returns a matcher that matches any expression 'e' such that `innerMatcher` 307 // matches 'e' and 'e' is in an unspecified untyped context (i.e the expression 308 // 'e' isn't evaluated to an RValue). For example, consider the following code: 309 // int *p = new int[4]; 310 // int *q = new int[4]; 311 // if ((p = q)) {} 312 // p = q; 313 // The expression `p = q` in the conditional of the `if` statement 314 // `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;` 315 // in the assignment statement is in an untyped context. 316 static internal::Matcher<Stmt> 317 isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) { 318 // An unspecified context can be 319 // 1. A compound statement, 320 // 2. The body of an if statement 321 // 3. Body of a loop 322 auto CompStmt = compoundStmt(forEach(InnerMatcher)); 323 auto IfStmtThen = ifStmt(hasThen(InnerMatcher)); 324 auto IfStmtElse = ifStmt(hasElse(InnerMatcher)); 325 // FIXME: Handle loop bodies. 326 return stmt(anyOf(CompStmt, IfStmtThen, IfStmtElse)); 327 } 328 } // namespace clang::ast_matchers 329 330 namespace { 331 // Because the analysis revolves around variables and their types, we'll need to 332 // track uses of variables (aka DeclRefExprs). 333 using DeclUseList = SmallVector<const DeclRefExpr *, 1>; 334 335 // Convenience typedef. 336 using FixItList = SmallVector<FixItHint, 4>; 337 338 // Defined below. 339 class Strategy; 340 } // namespace 341 342 namespace { 343 /// Gadget is an individual operation in the code that may be of interest to 344 /// this analysis. Each (non-abstract) subclass corresponds to a specific 345 /// rigid AST structure that constitutes an operation on a pointer-type object. 346 /// Discovery of a gadget in the code corresponds to claiming that we understand 347 /// what this part of code is doing well enough to potentially improve it. 348 /// Gadgets can be warning (immediately deserving a warning) or fixable (not 349 /// always deserving a warning per se, but requires our attention to identify 350 /// it warrants a fixit). 351 class Gadget { 352 public: 353 enum class Kind { 354 #define GADGET(x) x, 355 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 356 }; 357 358 /// Common type of ASTMatchers used for discovering gadgets. 359 /// Useful for implementing the static matcher() methods 360 /// that are expected from all non-abstract subclasses. 361 using Matcher = decltype(stmt()); 362 363 Gadget(Kind K) : K(K) {} 364 365 Kind getKind() const { return K; } 366 367 #ifndef NDEBUG 368 StringRef getDebugName() const { 369 switch (K) { 370 #define GADGET(x) case Kind::x: return #x; 371 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 372 } 373 llvm_unreachable("Unhandled Gadget::Kind enum"); 374 } 375 #endif 376 377 virtual bool isWarningGadget() const = 0; 378 virtual const Stmt *getBaseStmt() const = 0; 379 380 /// Returns the list of pointer-type variables on which this gadget performs 381 /// its operation. Typically, there's only one variable. This isn't a list 382 /// of all DeclRefExprs in the gadget's AST! 383 virtual DeclUseList getClaimedVarUseSites() const = 0; 384 385 virtual ~Gadget() = default; 386 387 private: 388 Kind K; 389 }; 390 391 392 /// Warning gadgets correspond to unsafe code patterns that warrants 393 /// an immediate warning. 394 class WarningGadget : public Gadget { 395 public: 396 WarningGadget(Kind K) : Gadget(K) {} 397 398 static bool classof(const Gadget *G) { return G->isWarningGadget(); } 399 bool isWarningGadget() const final { return true; } 400 }; 401 402 /// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be 403 /// properly recognized in order to emit fixes. For example, if a raw pointer-type 404 /// variable is replaced by a safe C++ container, every use of such variable must be 405 /// carefully considered and possibly updated. 406 class FixableGadget : public Gadget { 407 public: 408 FixableGadget(Kind K) : Gadget(K) {} 409 410 static bool classof(const Gadget *G) { return !G->isWarningGadget(); } 411 bool isWarningGadget() const final { return false; } 412 413 /// Returns a fixit that would fix the current gadget according to 414 /// the current strategy. Returns std::nullopt if the fix cannot be produced; 415 /// returns an empty list if no fixes are necessary. 416 virtual std::optional<FixItList> getFixits(const Strategy &) const { 417 return std::nullopt; 418 } 419 420 /// Returns a list of two elements where the first element is the LHS of a pointer assignment 421 /// statement and the second element is the RHS. This two-element list represents the fact that 422 /// the LHS buffer gets its bounds information from the RHS buffer. This information will be used 423 /// later to group all those variables whose types must be modified together to prevent type 424 /// mismatches. 425 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 426 getStrategyImplications() const { 427 return std::nullopt; 428 } 429 }; 430 431 static auto toSupportedVariable() { 432 return to(varDecl()); 433 } 434 435 using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>; 436 using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>; 437 438 /// An increment of a pointer-type value is unsafe as it may run the pointer 439 /// out of bounds. 440 class IncrementGadget : public WarningGadget { 441 static constexpr const char *const OpTag = "op"; 442 const UnaryOperator *Op; 443 444 public: 445 IncrementGadget(const MatchFinder::MatchResult &Result) 446 : WarningGadget(Kind::Increment), 447 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} 448 449 static bool classof(const Gadget *G) { 450 return G->getKind() == Kind::Increment; 451 } 452 453 static Matcher matcher() { 454 return stmt(unaryOperator( 455 hasOperatorName("++"), 456 hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) 457 ).bind(OpTag)); 458 } 459 460 const UnaryOperator *getBaseStmt() const override { return Op; } 461 462 DeclUseList getClaimedVarUseSites() const override { 463 SmallVector<const DeclRefExpr *, 2> Uses; 464 if (const auto *DRE = 465 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { 466 Uses.push_back(DRE); 467 } 468 469 return std::move(Uses); 470 } 471 }; 472 473 /// A decrement of a pointer-type value is unsafe as it may run the pointer 474 /// out of bounds. 475 class DecrementGadget : public WarningGadget { 476 static constexpr const char *const OpTag = "op"; 477 const UnaryOperator *Op; 478 479 public: 480 DecrementGadget(const MatchFinder::MatchResult &Result) 481 : WarningGadget(Kind::Decrement), 482 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} 483 484 static bool classof(const Gadget *G) { 485 return G->getKind() == Kind::Decrement; 486 } 487 488 static Matcher matcher() { 489 return stmt(unaryOperator( 490 hasOperatorName("--"), 491 hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) 492 ).bind(OpTag)); 493 } 494 495 const UnaryOperator *getBaseStmt() const override { return Op; } 496 497 DeclUseList getClaimedVarUseSites() const override { 498 if (const auto *DRE = 499 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { 500 return {DRE}; 501 } 502 503 return {}; 504 } 505 }; 506 507 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as 508 /// it doesn't have any bounds checks for the array. 509 class ArraySubscriptGadget : public WarningGadget { 510 static constexpr const char *const ArraySubscrTag = "ArraySubscript"; 511 const ArraySubscriptExpr *ASE; 512 513 public: 514 ArraySubscriptGadget(const MatchFinder::MatchResult &Result) 515 : WarningGadget(Kind::ArraySubscript), 516 ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ArraySubscrTag)) {} 517 518 static bool classof(const Gadget *G) { 519 return G->getKind() == Kind::ArraySubscript; 520 } 521 522 static Matcher matcher() { 523 // FIXME: What if the index is integer literal 0? Should this be 524 // a safe gadget in this case? 525 // clang-format off 526 return stmt(arraySubscriptExpr( 527 hasBase(ignoringParenImpCasts( 528 anyOf(hasPointerType(), hasArrayType()))), 529 unless(hasIndex( 530 anyOf(integerLiteral(equals(0)), arrayInitIndexExpr()) 531 ))) 532 .bind(ArraySubscrTag)); 533 // clang-format on 534 } 535 536 const ArraySubscriptExpr *getBaseStmt() const override { return ASE; } 537 538 DeclUseList getClaimedVarUseSites() const override { 539 if (const auto *DRE = 540 dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) { 541 return {DRE}; 542 } 543 544 return {}; 545 } 546 }; 547 548 /// A pointer arithmetic expression of one of the forms: 549 /// \code 550 /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n 551 /// \endcode 552 class PointerArithmeticGadget : public WarningGadget { 553 static constexpr const char *const PointerArithmeticTag = "ptrAdd"; 554 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr"; 555 const BinaryOperator *PA; // pointer arithmetic expression 556 const Expr *Ptr; // the pointer expression in `PA` 557 558 public: 559 PointerArithmeticGadget(const MatchFinder::MatchResult &Result) 560 : WarningGadget(Kind::PointerArithmetic), 561 PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerArithmeticTag)), 562 Ptr(Result.Nodes.getNodeAs<Expr>(PointerArithmeticPointerTag)) {} 563 564 static bool classof(const Gadget *G) { 565 return G->getKind() == Kind::PointerArithmetic; 566 } 567 568 static Matcher matcher() { 569 auto HasIntegerType = anyOf(hasType(isInteger()), hasType(enumType())); 570 auto PtrAtRight = 571 allOf(hasOperatorName("+"), 572 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), 573 hasLHS(HasIntegerType)); 574 auto PtrAtLeft = 575 allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"), 576 hasOperatorName("+="), hasOperatorName("-=")), 577 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), 578 hasRHS(HasIntegerType)); 579 580 return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight)) 581 .bind(PointerArithmeticTag)); 582 } 583 584 const Stmt *getBaseStmt() const override { return PA; } 585 586 DeclUseList getClaimedVarUseSites() const override { 587 if (const auto *DRE = dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) { 588 return {DRE}; 589 } 590 591 return {}; 592 } 593 // FIXME: pointer adding zero should be fine 594 // FIXME: this gadge will need a fix-it 595 }; 596 597 /// A pointer initialization expression of the form: 598 /// \code 599 /// int *p = q; 600 /// \endcode 601 class PointerInitGadget : public FixableGadget { 602 private: 603 static constexpr const char *const PointerInitLHSTag = "ptrInitLHS"; 604 static constexpr const char *const PointerInitRHSTag = "ptrInitRHS"; 605 const VarDecl * PtrInitLHS; // the LHS pointer expression in `PI` 606 const DeclRefExpr * PtrInitRHS; // the RHS pointer expression in `PI` 607 608 public: 609 PointerInitGadget(const MatchFinder::MatchResult &Result) 610 : FixableGadget(Kind::PointerInit), 611 PtrInitLHS(Result.Nodes.getNodeAs<VarDecl>(PointerInitLHSTag)), 612 PtrInitRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerInitRHSTag)) {} 613 614 static bool classof(const Gadget *G) { 615 return G->getKind() == Kind::PointerInit; 616 } 617 618 static Matcher matcher() { 619 auto PtrInitStmt = declStmt(hasSingleDecl(varDecl( 620 hasInitializer(ignoringImpCasts(declRefExpr( 621 hasPointerType(), 622 toSupportedVariable()). 623 bind(PointerInitRHSTag)))). 624 bind(PointerInitLHSTag))); 625 626 return stmt(PtrInitStmt); 627 } 628 629 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 630 631 virtual const Stmt *getBaseStmt() const override { 632 // FIXME: This needs to be the entire DeclStmt, assuming that this method 633 // makes sense at all on a FixableGadget. 634 return PtrInitRHS; 635 } 636 637 virtual DeclUseList getClaimedVarUseSites() const override { 638 return DeclUseList{PtrInitRHS}; 639 } 640 641 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 642 getStrategyImplications() const override { 643 return std::make_pair(PtrInitLHS, 644 cast<VarDecl>(PtrInitRHS->getDecl())); 645 } 646 }; 647 648 /// A pointer assignment expression of the form: 649 /// \code 650 /// p = q; 651 /// \endcode 652 class PointerAssignmentGadget : public FixableGadget { 653 private: 654 static constexpr const char *const PointerAssignLHSTag = "ptrLHS"; 655 static constexpr const char *const PointerAssignRHSTag = "ptrRHS"; 656 const DeclRefExpr * PtrLHS; // the LHS pointer expression in `PA` 657 const DeclRefExpr * PtrRHS; // the RHS pointer expression in `PA` 658 659 public: 660 PointerAssignmentGadget(const MatchFinder::MatchResult &Result) 661 : FixableGadget(Kind::PointerAssignment), 662 PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)), 663 PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {} 664 665 static bool classof(const Gadget *G) { 666 return G->getKind() == Kind::PointerAssignment; 667 } 668 669 static Matcher matcher() { 670 auto PtrAssignExpr = binaryOperator(allOf(hasOperatorName("="), 671 hasRHS(ignoringParenImpCasts(declRefExpr(hasPointerType(), 672 toSupportedVariable()). 673 bind(PointerAssignRHSTag))), 674 hasLHS(declRefExpr(hasPointerType(), 675 toSupportedVariable()). 676 bind(PointerAssignLHSTag)))); 677 678 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr)); 679 } 680 681 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 682 683 virtual const Stmt *getBaseStmt() const override { 684 // FIXME: This should be the binary operator, assuming that this method 685 // makes sense at all on a FixableGadget. 686 return PtrLHS; 687 } 688 689 virtual DeclUseList getClaimedVarUseSites() const override { 690 return DeclUseList{PtrLHS, PtrRHS}; 691 } 692 693 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 694 getStrategyImplications() const override { 695 return std::make_pair(cast<VarDecl>(PtrLHS->getDecl()), 696 cast<VarDecl>(PtrRHS->getDecl())); 697 } 698 }; 699 700 /// A call of a function or method that performs unchecked buffer operations 701 /// over one of its pointer parameters. 702 class UnsafeBufferUsageAttrGadget : public WarningGadget { 703 constexpr static const char *const OpTag = "call_expr"; 704 const CallExpr *Op; 705 706 public: 707 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult &Result) 708 : WarningGadget(Kind::UnsafeBufferUsageAttr), 709 Op(Result.Nodes.getNodeAs<CallExpr>(OpTag)) {} 710 711 static bool classof(const Gadget *G) { 712 return G->getKind() == Kind::UnsafeBufferUsageAttr; 713 } 714 715 static Matcher matcher() { 716 return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))) 717 .bind(OpTag)); 718 } 719 const Stmt *getBaseStmt() const override { return Op; } 720 721 DeclUseList getClaimedVarUseSites() const override { return {}; } 722 }; 723 724 // Warning gadget for unsafe invocation of span::data method. 725 // Triggers when the pointer returned by the invocation is immediately 726 // cast to a larger type. 727 728 class DataInvocationGadget : public WarningGadget { 729 constexpr static const char *const OpTag = "data_invocation_expr"; 730 const ExplicitCastExpr *Op; 731 732 public: 733 DataInvocationGadget(const MatchFinder::MatchResult &Result) 734 : WarningGadget(Kind::DataInvocation), 735 Op(Result.Nodes.getNodeAs<ExplicitCastExpr>(OpTag)) {} 736 737 static bool classof(const Gadget *G) { 738 return G->getKind() == Kind::DataInvocation; 739 } 740 741 static Matcher matcher() { 742 Matcher callExpr = cxxMemberCallExpr( 743 callee(cxxMethodDecl(hasName("data"), ofClass(hasName("std::span"))))); 744 return stmt( 745 explicitCastExpr(anyOf(has(callExpr), has(parenExpr(has(callExpr))))) 746 .bind(OpTag)); 747 } 748 const Stmt *getBaseStmt() const override { return Op; } 749 750 DeclUseList getClaimedVarUseSites() const override { return {}; } 751 }; 752 753 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue 754 // Context (see `isInUnspecifiedLvalueContext`). 755 // Note here `[]` is the built-in subscript operator. 756 class ULCArraySubscriptGadget : public FixableGadget { 757 private: 758 static constexpr const char *const ULCArraySubscriptTag = 759 "ArraySubscriptUnderULC"; 760 const ArraySubscriptExpr *Node; 761 762 public: 763 ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result) 764 : FixableGadget(Kind::ULCArraySubscript), 765 Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) { 766 assert(Node != nullptr && "Expecting a non-null matching result"); 767 } 768 769 static bool classof(const Gadget *G) { 770 return G->getKind() == Kind::ULCArraySubscript; 771 } 772 773 static Matcher matcher() { 774 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); 775 auto BaseIsArrayOrPtrDRE = 776 hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr, 777 toSupportedVariable()))); 778 auto Target = 779 arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ULCArraySubscriptTag); 780 781 return expr(isInUnspecifiedLvalueContext(Target)); 782 } 783 784 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 785 786 virtual const Stmt *getBaseStmt() const override { return Node; } 787 788 virtual DeclUseList getClaimedVarUseSites() const override { 789 if (const auto *DRE = 790 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) { 791 return {DRE}; 792 } 793 return {}; 794 } 795 }; 796 797 // Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the 798 // unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits 799 // fixit of the form `UPC(DRE.data())`. 800 class UPCStandalonePointerGadget : public FixableGadget { 801 private: 802 static constexpr const char *const DeclRefExprTag = "StandalonePointer"; 803 const DeclRefExpr *Node; 804 805 public: 806 UPCStandalonePointerGadget(const MatchFinder::MatchResult &Result) 807 : FixableGadget(Kind::UPCStandalonePointer), 808 Node(Result.Nodes.getNodeAs<DeclRefExpr>(DeclRefExprTag)) { 809 assert(Node != nullptr && "Expecting a non-null matching result"); 810 } 811 812 static bool classof(const Gadget *G) { 813 return G->getKind() == Kind::UPCStandalonePointer; 814 } 815 816 static Matcher matcher() { 817 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); 818 auto target = expr( 819 ignoringParenImpCasts(declRefExpr(allOf(ArrayOrPtr, 820 toSupportedVariable())).bind(DeclRefExprTag))); 821 return stmt(isInUnspecifiedPointerContext(target)); 822 } 823 824 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 825 826 virtual const Stmt *getBaseStmt() const override { return Node; } 827 828 virtual DeclUseList getClaimedVarUseSites() const override { 829 return {Node}; 830 } 831 }; 832 833 class PointerDereferenceGadget : public FixableGadget { 834 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; 835 static constexpr const char *const OperatorTag = "op"; 836 837 const DeclRefExpr *BaseDeclRefExpr = nullptr; 838 const UnaryOperator *Op = nullptr; 839 840 public: 841 PointerDereferenceGadget(const MatchFinder::MatchResult &Result) 842 : FixableGadget(Kind::PointerDereference), 843 BaseDeclRefExpr( 844 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)), 845 Op(Result.Nodes.getNodeAs<UnaryOperator>(OperatorTag)) {} 846 847 static bool classof(const Gadget *G) { 848 return G->getKind() == Kind::PointerDereference; 849 } 850 851 static Matcher matcher() { 852 auto Target = 853 unaryOperator( 854 hasOperatorName("*"), 855 has(expr(ignoringParenImpCasts( 856 declRefExpr(toSupportedVariable()).bind(BaseDeclRefExprTag))))) 857 .bind(OperatorTag); 858 859 return expr(isInUnspecifiedLvalueContext(Target)); 860 } 861 862 DeclUseList getClaimedVarUseSites() const override { 863 return {BaseDeclRefExpr}; 864 } 865 866 virtual const Stmt *getBaseStmt() const final { return Op; } 867 868 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 869 }; 870 871 // Represents expressions of the form `&DRE[any]` in the Unspecified Pointer 872 // Context (see `isInUnspecifiedPointerContext`). 873 // Note here `[]` is the built-in subscript operator. 874 class UPCAddressofArraySubscriptGadget : public FixableGadget { 875 private: 876 static constexpr const char *const UPCAddressofArraySubscriptTag = 877 "AddressofArraySubscriptUnderUPC"; 878 const UnaryOperator *Node; // the `&DRE[any]` node 879 880 public: 881 UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result) 882 : FixableGadget(Kind::ULCArraySubscript), 883 Node(Result.Nodes.getNodeAs<UnaryOperator>( 884 UPCAddressofArraySubscriptTag)) { 885 assert(Node != nullptr && "Expecting a non-null matching result"); 886 } 887 888 static bool classof(const Gadget *G) { 889 return G->getKind() == Kind::UPCAddressofArraySubscript; 890 } 891 892 static Matcher matcher() { 893 return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts( 894 unaryOperator(hasOperatorName("&"), 895 hasUnaryOperand(arraySubscriptExpr( 896 hasBase(ignoringParenImpCasts(declRefExpr( 897 toSupportedVariable())))))) 898 .bind(UPCAddressofArraySubscriptTag))))); 899 } 900 901 virtual std::optional<FixItList> getFixits(const Strategy &) const override; 902 903 virtual const Stmt *getBaseStmt() const override { return Node; } 904 905 virtual DeclUseList getClaimedVarUseSites() const override { 906 const auto *ArraySubst = cast<ArraySubscriptExpr>(Node->getSubExpr()); 907 const auto *DRE = 908 cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreImpCasts()); 909 return {DRE}; 910 } 911 }; 912 } // namespace 913 914 namespace { 915 // An auxiliary tracking facility for the fixit analysis. It helps connect 916 // declarations to its uses and make sure we've covered all uses with our 917 // analysis before we try to fix the declaration. 918 class DeclUseTracker { 919 using UseSetTy = SmallSet<const DeclRefExpr *, 16>; 920 using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>; 921 922 // Allocate on the heap for easier move. 923 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()}; 924 DefMapTy Defs{}; 925 926 public: 927 DeclUseTracker() = default; 928 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies. 929 DeclUseTracker &operator=(const DeclUseTracker &) = delete; 930 DeclUseTracker(DeclUseTracker &&) = default; 931 DeclUseTracker &operator=(DeclUseTracker &&) = default; 932 933 // Start tracking a freshly discovered DRE. 934 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); } 935 936 // Stop tracking the DRE as it's been fully figured out. 937 void claimUse(const DeclRefExpr *DRE) { 938 assert(Uses->count(DRE) && 939 "DRE not found or claimed by multiple matchers!"); 940 Uses->erase(DRE); 941 } 942 943 // A variable is unclaimed if at least one use is unclaimed. 944 bool hasUnclaimedUses(const VarDecl *VD) const { 945 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs? 946 return any_of(*Uses, [VD](const DeclRefExpr *DRE) { 947 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl(); 948 }); 949 } 950 951 UseSetTy getUnclaimedUses(const VarDecl *VD) const { 952 UseSetTy ReturnSet; 953 for (auto use : *Uses) { 954 if (use->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl()) { 955 ReturnSet.insert(use); 956 } 957 } 958 return ReturnSet; 959 } 960 961 void discoverDecl(const DeclStmt *DS) { 962 for (const Decl *D : DS->decls()) { 963 if (const auto *VD = dyn_cast<VarDecl>(D)) { 964 // FIXME: Assertion temporarily disabled due to a bug in 965 // ASTMatcher internal behavior in presence of GNU 966 // statement-expressions. We need to properly investigate this 967 // because it can screw up our algorithm in other ways. 968 // assert(Defs.count(VD) == 0 && "Definition already discovered!"); 969 Defs[VD] = DS; 970 } 971 } 972 } 973 974 const DeclStmt *lookupDecl(const VarDecl *VD) const { 975 return Defs.lookup(VD); 976 } 977 }; 978 } // namespace 979 980 namespace { 981 // Strategy is a map from variables to the way we plan to emit fixes for 982 // these variables. It is figured out gradually by trying different fixes 983 // for different variables depending on gadgets in which these variables 984 // participate. 985 class Strategy { 986 public: 987 enum class Kind { 988 Wontfix, // We don't plan to emit a fixit for this variable. 989 Span, // We recommend replacing the variable with std::span. 990 Iterator, // We recommend replacing the variable with std::span::iterator. 991 Array, // We recommend replacing the variable with std::array. 992 Vector // We recommend replacing the variable with std::vector. 993 }; 994 995 private: 996 using MapTy = llvm::DenseMap<const VarDecl *, Kind>; 997 998 MapTy Map; 999 1000 public: 1001 Strategy() = default; 1002 Strategy(const Strategy &) = delete; // Let's avoid copies. 1003 Strategy &operator=(const Strategy &) = delete; 1004 Strategy(Strategy &&) = default; 1005 Strategy &operator=(Strategy &&) = default; 1006 1007 void set(const VarDecl *VD, Kind K) { Map[VD] = K; } 1008 1009 Kind lookup(const VarDecl *VD) const { 1010 auto I = Map.find(VD); 1011 if (I == Map.end()) 1012 return Kind::Wontfix; 1013 1014 return I->second; 1015 } 1016 }; 1017 } // namespace 1018 1019 1020 // Representing a pointer type expression of the form `++Ptr` in an Unspecified 1021 // Pointer Context (UPC): 1022 class UPCPreIncrementGadget : public FixableGadget { 1023 private: 1024 static constexpr const char *const UPCPreIncrementTag = 1025 "PointerPreIncrementUnderUPC"; 1026 const UnaryOperator *Node; // the `++Ptr` node 1027 1028 public: 1029 UPCPreIncrementGadget(const MatchFinder::MatchResult &Result) 1030 : FixableGadget(Kind::UPCPreIncrement), 1031 Node(Result.Nodes.getNodeAs<UnaryOperator>(UPCPreIncrementTag)) { 1032 assert(Node != nullptr && "Expecting a non-null matching result"); 1033 } 1034 1035 static bool classof(const Gadget *G) { 1036 return G->getKind() == Kind::UPCPreIncrement; 1037 } 1038 1039 static Matcher matcher() { 1040 // Note here we match `++Ptr` for any expression `Ptr` of pointer type. 1041 // Although currently we can only provide fix-its when `Ptr` is a DRE, we 1042 // can have the matcher be general, so long as `getClaimedVarUseSites` does 1043 // things right. 1044 return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts( 1045 unaryOperator(isPreInc(), 1046 hasUnaryOperand(declRefExpr( 1047 toSupportedVariable())) 1048 ).bind(UPCPreIncrementTag))))); 1049 } 1050 1051 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 1052 1053 virtual const Stmt *getBaseStmt() const override { return Node; } 1054 1055 virtual DeclUseList getClaimedVarUseSites() const override { 1056 return {dyn_cast<DeclRefExpr>(Node->getSubExpr())}; 1057 } 1058 }; 1059 1060 // Representing a pointer type expression of the form `Ptr += n` in an 1061 // Unspecified Untyped Context (UUC): 1062 class UUCAddAssignGadget : public FixableGadget { 1063 private: 1064 static constexpr const char *const UUCAddAssignTag = 1065 "PointerAddAssignUnderUUC"; 1066 static constexpr const char *const OffsetTag = "Offset"; 1067 1068 const BinaryOperator *Node; // the `Ptr += n` node 1069 const Expr *Offset = nullptr; 1070 1071 public: 1072 UUCAddAssignGadget(const MatchFinder::MatchResult &Result) 1073 : FixableGadget(Kind::UUCAddAssign), 1074 Node(Result.Nodes.getNodeAs<BinaryOperator>(UUCAddAssignTag)), 1075 Offset(Result.Nodes.getNodeAs<Expr>(OffsetTag)) { 1076 assert(Node != nullptr && "Expecting a non-null matching result"); 1077 } 1078 1079 static bool classof(const Gadget *G) { 1080 return G->getKind() == Kind::UUCAddAssign; 1081 } 1082 1083 static Matcher matcher() { 1084 return stmt(isInUnspecifiedUntypedContext(expr(ignoringImpCasts( 1085 binaryOperator(hasOperatorName("+="), 1086 hasLHS(declRefExpr(toSupportedVariable())), 1087 hasRHS(expr().bind(OffsetTag))) 1088 .bind(UUCAddAssignTag))))); 1089 } 1090 1091 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 1092 1093 virtual const Stmt *getBaseStmt() const override { return Node; } 1094 1095 virtual DeclUseList getClaimedVarUseSites() const override { 1096 return {dyn_cast<DeclRefExpr>(Node->getLHS())}; 1097 } 1098 }; 1099 1100 // Representing a fixable expression of the form `*(ptr + 123)` or `*(123 + 1101 // ptr)`: 1102 class DerefSimplePtrArithFixableGadget : public FixableGadget { 1103 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; 1104 static constexpr const char *const DerefOpTag = "DerefOp"; 1105 static constexpr const char *const AddOpTag = "AddOp"; 1106 static constexpr const char *const OffsetTag = "Offset"; 1107 1108 const DeclRefExpr *BaseDeclRefExpr = nullptr; 1109 const UnaryOperator *DerefOp = nullptr; 1110 const BinaryOperator *AddOp = nullptr; 1111 const IntegerLiteral *Offset = nullptr; 1112 1113 public: 1114 DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult &Result) 1115 : FixableGadget(Kind::DerefSimplePtrArithFixable), 1116 BaseDeclRefExpr( 1117 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)), 1118 DerefOp(Result.Nodes.getNodeAs<UnaryOperator>(DerefOpTag)), 1119 AddOp(Result.Nodes.getNodeAs<BinaryOperator>(AddOpTag)), 1120 Offset(Result.Nodes.getNodeAs<IntegerLiteral>(OffsetTag)) {} 1121 1122 static Matcher matcher() { 1123 // clang-format off 1124 auto ThePtr = expr(hasPointerType(), 1125 ignoringImpCasts(declRefExpr(toSupportedVariable()). 1126 bind(BaseDeclRefExprTag))); 1127 auto PlusOverPtrAndInteger = expr(anyOf( 1128 binaryOperator(hasOperatorName("+"), hasLHS(ThePtr), 1129 hasRHS(integerLiteral().bind(OffsetTag))) 1130 .bind(AddOpTag), 1131 binaryOperator(hasOperatorName("+"), hasRHS(ThePtr), 1132 hasLHS(integerLiteral().bind(OffsetTag))) 1133 .bind(AddOpTag))); 1134 return isInUnspecifiedLvalueContext(unaryOperator( 1135 hasOperatorName("*"), 1136 hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger))) 1137 .bind(DerefOpTag)); 1138 // clang-format on 1139 } 1140 1141 virtual std::optional<FixItList> getFixits(const Strategy &s) const final; 1142 1143 // TODO remove this method from FixableGadget interface 1144 virtual const Stmt *getBaseStmt() const final { return nullptr; } 1145 1146 virtual DeclUseList getClaimedVarUseSites() const final { 1147 return {BaseDeclRefExpr}; 1148 } 1149 }; 1150 1151 /// Scan the function and return a list of gadgets found with provided kits. 1152 static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker> 1153 findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, 1154 bool EmitSuggestions) { 1155 1156 struct GadgetFinderCallback : MatchFinder::MatchCallback { 1157 FixableGadgetList FixableGadgets; 1158 WarningGadgetList WarningGadgets; 1159 DeclUseTracker Tracker; 1160 1161 void run(const MatchFinder::MatchResult &Result) override { 1162 // In debug mode, assert that we've found exactly one gadget. 1163 // This helps us avoid conflicts in .bind() tags. 1164 #if NDEBUG 1165 #define NEXT return 1166 #else 1167 [[maybe_unused]] int numFound = 0; 1168 #define NEXT ++numFound 1169 #endif 1170 1171 if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) { 1172 Tracker.discoverUse(DRE); 1173 NEXT; 1174 } 1175 1176 if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) { 1177 Tracker.discoverDecl(DS); 1178 NEXT; 1179 } 1180 1181 // Figure out which matcher we've found, and call the appropriate 1182 // subclass constructor. 1183 // FIXME: Can we do this more logarithmically? 1184 #define FIXABLE_GADGET(name) \ 1185 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ 1186 FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \ 1187 NEXT; \ 1188 } 1189 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1190 #define WARNING_GADGET(name) \ 1191 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ 1192 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \ 1193 NEXT; \ 1194 } 1195 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1196 1197 assert(numFound >= 1 && "Gadgets not found in match result!"); 1198 assert(numFound <= 1 && "Conflicting bind tags in gadgets!"); 1199 } 1200 }; 1201 1202 MatchFinder M; 1203 GadgetFinderCallback CB; 1204 1205 // clang-format off 1206 M.addMatcher( 1207 stmt( 1208 forEachDescendantEvaluatedStmt(stmt(anyOf( 1209 // Add Gadget::matcher() for every gadget in the registry. 1210 #define WARNING_GADGET(x) \ 1211 allOf(x ## Gadget::matcher().bind(#x), \ 1212 notInSafeBufferOptOut(&Handler)), 1213 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1214 // Avoid a hanging comma. 1215 unless(stmt()) 1216 ))) 1217 ), 1218 &CB 1219 ); 1220 // clang-format on 1221 1222 if (EmitSuggestions) { 1223 // clang-format off 1224 M.addMatcher( 1225 stmt( 1226 forEachDescendantStmt(stmt(eachOf( 1227 #define FIXABLE_GADGET(x) \ 1228 x ## Gadget::matcher().bind(#x), 1229 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1230 // In parallel, match all DeclRefExprs so that to find out 1231 // whether there are any uncovered by gadgets. 1232 declRefExpr(anyOf(hasPointerType(), hasArrayType()), 1233 to(anyOf(varDecl(), bindingDecl()))).bind("any_dre"), 1234 // Also match DeclStmts because we'll need them when fixing 1235 // their underlying VarDecls that otherwise don't have 1236 // any backreferences to DeclStmts. 1237 declStmt().bind("any_ds") 1238 ))) 1239 ), 1240 &CB 1241 ); 1242 // clang-format on 1243 } 1244 1245 M.match(*D->getBody(), D->getASTContext()); 1246 return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets), 1247 std::move(CB.Tracker)}; 1248 } 1249 1250 // Compares AST nodes by source locations. 1251 template <typename NodeTy> struct CompareNode { 1252 bool operator()(const NodeTy *N1, const NodeTy *N2) const { 1253 return N1->getBeginLoc().getRawEncoding() < 1254 N2->getBeginLoc().getRawEncoding(); 1255 } 1256 }; 1257 1258 struct WarningGadgetSets { 1259 std::map<const VarDecl *, std::set<const WarningGadget *>, 1260 // To keep keys sorted by their locations in the map so that the 1261 // order is deterministic: 1262 CompareNode<VarDecl>> 1263 byVar; 1264 // These Gadgets are not related to pointer variables (e. g. temporaries). 1265 llvm::SmallVector<const WarningGadget *, 16> noVar; 1266 }; 1267 1268 static WarningGadgetSets 1269 groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) { 1270 WarningGadgetSets result; 1271 // If some gadgets cover more than one 1272 // variable, they'll appear more than once in the map. 1273 for (auto &G : AllUnsafeOperations) { 1274 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites(); 1275 1276 bool AssociatedWithVarDecl = false; 1277 for (const DeclRefExpr *DRE : ClaimedVarUseSites) { 1278 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 1279 result.byVar[VD].insert(G.get()); 1280 AssociatedWithVarDecl = true; 1281 } 1282 } 1283 1284 if (!AssociatedWithVarDecl) { 1285 result.noVar.push_back(G.get()); 1286 continue; 1287 } 1288 } 1289 return result; 1290 } 1291 1292 struct FixableGadgetSets { 1293 std::map<const VarDecl *, std::set<const FixableGadget *>, 1294 // To keep keys sorted by their locations in the map so that the 1295 // order is deterministic: 1296 CompareNode<VarDecl>> 1297 byVar; 1298 }; 1299 1300 static FixableGadgetSets 1301 groupFixablesByVar(FixableGadgetList &&AllFixableOperations) { 1302 FixableGadgetSets FixablesForUnsafeVars; 1303 for (auto &F : AllFixableOperations) { 1304 DeclUseList DREs = F->getClaimedVarUseSites(); 1305 1306 for (const DeclRefExpr *DRE : DREs) { 1307 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 1308 FixablesForUnsafeVars.byVar[VD].insert(F.get()); 1309 } 1310 } 1311 } 1312 return FixablesForUnsafeVars; 1313 } 1314 1315 bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts, 1316 const SourceManager &SM) { 1317 // A simple interval overlap detection algorithm. Sorts all ranges by their 1318 // begin location then finds the first overlap in one pass. 1319 std::vector<const FixItHint *> All; // a copy of `FixIts` 1320 1321 for (const FixItHint &H : FixIts) 1322 All.push_back(&H); 1323 std::sort(All.begin(), All.end(), 1324 [&SM](const FixItHint *H1, const FixItHint *H2) { 1325 return SM.isBeforeInTranslationUnit(H1->RemoveRange.getBegin(), 1326 H2->RemoveRange.getBegin()); 1327 }); 1328 1329 const FixItHint *CurrHint = nullptr; 1330 1331 for (const FixItHint *Hint : All) { 1332 if (!CurrHint || 1333 SM.isBeforeInTranslationUnit(CurrHint->RemoveRange.getEnd(), 1334 Hint->RemoveRange.getBegin())) { 1335 // Either to initialize `CurrHint` or `CurrHint` does not 1336 // overlap with `Hint`: 1337 CurrHint = Hint; 1338 } else 1339 // In case `Hint` overlaps the `CurrHint`, we found at least one 1340 // conflict: 1341 return true; 1342 } 1343 return false; 1344 } 1345 1346 std::optional<FixItList> 1347 PointerAssignmentGadget::getFixits(const Strategy &S) const { 1348 const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl()); 1349 const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl()); 1350 switch (S.lookup(LeftVD)) { 1351 case Strategy::Kind::Span: 1352 if (S.lookup(RightVD) == Strategy::Kind::Span) 1353 return FixItList{}; 1354 return std::nullopt; 1355 case Strategy::Kind::Wontfix: 1356 return std::nullopt; 1357 case Strategy::Kind::Iterator: 1358 case Strategy::Kind::Array: 1359 case Strategy::Kind::Vector: 1360 llvm_unreachable("unsupported strategies for FixableGadgets"); 1361 } 1362 return std::nullopt; 1363 } 1364 1365 std::optional<FixItList> 1366 PointerInitGadget::getFixits(const Strategy &S) const { 1367 const auto *LeftVD = PtrInitLHS; 1368 const auto *RightVD = cast<VarDecl>(PtrInitRHS->getDecl()); 1369 switch (S.lookup(LeftVD)) { 1370 case Strategy::Kind::Span: 1371 if (S.lookup(RightVD) == Strategy::Kind::Span) 1372 return FixItList{}; 1373 return std::nullopt; 1374 case Strategy::Kind::Wontfix: 1375 return std::nullopt; 1376 case Strategy::Kind::Iterator: 1377 case Strategy::Kind::Array: 1378 case Strategy::Kind::Vector: 1379 llvm_unreachable("unsupported strategies for FixableGadgets"); 1380 } 1381 return std::nullopt; 1382 } 1383 1384 static bool isNonNegativeIntegerExpr(const Expr *Expr, const VarDecl *VD, 1385 const ASTContext &Ctx) { 1386 if (auto ConstVal = Expr->getIntegerConstantExpr(Ctx)) { 1387 if (ConstVal->isNegative()) 1388 return false; 1389 } else if (!Expr->getType()->isUnsignedIntegerType()) 1390 return false; 1391 return true; 1392 } 1393 1394 std::optional<FixItList> 1395 ULCArraySubscriptGadget::getFixits(const Strategy &S) const { 1396 if (const auto *DRE = 1397 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) 1398 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 1399 switch (S.lookup(VD)) { 1400 case Strategy::Kind::Span: { 1401 1402 // If the index has a negative constant value, we give up as no valid 1403 // fix-it can be generated: 1404 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in! 1405 VD->getASTContext(); 1406 if (!isNonNegativeIntegerExpr(Node->getIdx(), VD, Ctx)) 1407 return std::nullopt; 1408 // no-op is a good fix-it, otherwise 1409 return FixItList{}; 1410 } 1411 case Strategy::Kind::Wontfix: 1412 case Strategy::Kind::Iterator: 1413 case Strategy::Kind::Array: 1414 case Strategy::Kind::Vector: 1415 llvm_unreachable("unsupported strategies for FixableGadgets"); 1416 } 1417 } 1418 return std::nullopt; 1419 } 1420 1421 static std::optional<FixItList> // forward declaration 1422 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node); 1423 1424 std::optional<FixItList> 1425 UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const { 1426 auto DREs = getClaimedVarUseSites(); 1427 const auto *VD = cast<VarDecl>(DREs.front()->getDecl()); 1428 1429 switch (S.lookup(VD)) { 1430 case Strategy::Kind::Span: 1431 return fixUPCAddressofArraySubscriptWithSpan(Node); 1432 case Strategy::Kind::Wontfix: 1433 case Strategy::Kind::Iterator: 1434 case Strategy::Kind::Array: 1435 case Strategy::Kind::Vector: 1436 llvm_unreachable("unsupported strategies for FixableGadgets"); 1437 } 1438 return std::nullopt; // something went wrong, no fix-it 1439 } 1440 1441 // FIXME: this function should be customizable through format 1442 static StringRef getEndOfLine() { 1443 static const char *const EOL = "\n"; 1444 return EOL; 1445 } 1446 1447 // Returns the text indicating that the user needs to provide input there: 1448 std::string getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") { 1449 std::string s = std::string("<# "); 1450 s += HintTextToUser; 1451 s += " #>"; 1452 return s; 1453 } 1454 1455 // Return the text representation of the given `APInt Val`: 1456 static std::string getAPIntText(APInt Val) { 1457 SmallVector<char> Txt; 1458 Val.toString(Txt, 10, true); 1459 // APInt::toString does not add '\0' to the end of the string for us: 1460 Txt.push_back('\0'); 1461 return Txt.data(); 1462 } 1463 1464 // Return the source location of the last character of the AST `Node`. 1465 template <typename NodeTy> 1466 static std::optional<SourceLocation> 1467 getEndCharLoc(const NodeTy *Node, const SourceManager &SM, 1468 const LangOptions &LangOpts) { 1469 unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts); 1470 SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1); 1471 1472 if (Loc.isValid()) 1473 return Loc; 1474 1475 return std::nullopt; 1476 } 1477 1478 // Return the source location just past the last character of the AST `Node`. 1479 template <typename NodeTy> 1480 static std::optional<SourceLocation> getPastLoc(const NodeTy *Node, 1481 const SourceManager &SM, 1482 const LangOptions &LangOpts) { 1483 SourceLocation Loc = 1484 Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts); 1485 if (Loc.isValid()) 1486 return Loc; 1487 return std::nullopt; 1488 } 1489 1490 // Return text representation of an `Expr`. 1491 static std::optional<StringRef> getExprText(const Expr *E, 1492 const SourceManager &SM, 1493 const LangOptions &LangOpts) { 1494 std::optional<SourceLocation> LastCharLoc = getPastLoc(E, SM, LangOpts); 1495 1496 if (LastCharLoc) 1497 return Lexer::getSourceText( 1498 CharSourceRange::getCharRange(E->getBeginLoc(), *LastCharLoc), SM, 1499 LangOpts); 1500 1501 return std::nullopt; 1502 } 1503 1504 // Returns the literal text in `SourceRange SR`, if `SR` is a valid range. 1505 static std::optional<StringRef> getRangeText(SourceRange SR, 1506 const SourceManager &SM, 1507 const LangOptions &LangOpts) { 1508 bool Invalid = false; 1509 CharSourceRange CSR = CharSourceRange::getCharRange(SR); 1510 StringRef Text = Lexer::getSourceText(CSR, SM, LangOpts, &Invalid); 1511 1512 if (!Invalid) 1513 return Text; 1514 return std::nullopt; 1515 } 1516 1517 // Returns the begin location of the identifier of the given variable 1518 // declaration. 1519 static SourceLocation getVarDeclIdentifierLoc(const VarDecl *VD) { 1520 // According to the implementation of `VarDecl`, `VD->getLocation()` actually 1521 // returns the begin location of the identifier of the declaration: 1522 return VD->getLocation(); 1523 } 1524 1525 // Returns the literal text of the identifier of the given variable declaration. 1526 static std::optional<StringRef> 1527 getVarDeclIdentifierText(const VarDecl *VD, const SourceManager &SM, 1528 const LangOptions &LangOpts) { 1529 SourceLocation ParmIdentBeginLoc = getVarDeclIdentifierLoc(VD); 1530 SourceLocation ParmIdentEndLoc = 1531 Lexer::getLocForEndOfToken(ParmIdentBeginLoc, 0, SM, LangOpts); 1532 1533 if (ParmIdentEndLoc.isMacroID() && 1534 !Lexer::isAtEndOfMacroExpansion(ParmIdentEndLoc, SM, LangOpts)) 1535 return std::nullopt; 1536 return getRangeText({ParmIdentBeginLoc, ParmIdentEndLoc}, SM, LangOpts); 1537 } 1538 1539 // We cannot fix a variable declaration if it has some other specifiers than the 1540 // type specifier. Because the source ranges of those specifiers could overlap 1541 // with the source range that is being replaced using fix-its. Especially when 1542 // we often cannot obtain accurate source ranges of cv-qualified type 1543 // specifiers. 1544 // FIXME: also deal with type attributes 1545 static bool hasUnsupportedSpecifiers(const VarDecl *VD, 1546 const SourceManager &SM) { 1547 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the 1548 // source range of `VD`: 1549 bool AttrRangeOverlapping = llvm::any_of(VD->attrs(), [&](Attr *At) -> bool { 1550 return !(SM.isBeforeInTranslationUnit(At->getRange().getEnd(), 1551 VD->getBeginLoc())) && 1552 !(SM.isBeforeInTranslationUnit(VD->getEndLoc(), 1553 At->getRange().getBegin())); 1554 }); 1555 return VD->isInlineSpecified() || VD->isConstexpr() || 1556 VD->hasConstantInitialization() || !VD->hasLocalStorage() || 1557 AttrRangeOverlapping; 1558 } 1559 1560 // Returns the `SourceRange` of `D`. The reason why this function exists is 1561 // that `D->getSourceRange()` may return a range where the end location is the 1562 // starting location of the last token. The end location of the source range 1563 // returned by this function is the last location of the last token. 1564 static SourceRange getSourceRangeToTokenEnd(const Decl *D, 1565 const SourceManager &SM, 1566 const LangOptions &LangOpts) { 1567 SourceLocation Begin = D->getBeginLoc(); 1568 SourceLocation 1569 End = // `D->getEndLoc` should always return the starting location of the 1570 // last token, so we should get the end of the token 1571 Lexer::getLocForEndOfToken(D->getEndLoc(), 0, SM, LangOpts); 1572 1573 return SourceRange(Begin, End); 1574 } 1575 1576 // Returns the text of the pointee type of `T` from a `VarDecl` of a pointer 1577 // type. The text is obtained through from `TypeLoc`s. Since `TypeLoc` does not 1578 // have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me 1579 // :( ), `Qualifiers` of the pointee type is returned separately through the 1580 // output parameter `QualifiersToAppend`. 1581 static std::optional<std::string> 1582 getPointeeTypeText(const VarDecl *VD, const SourceManager &SM, 1583 const LangOptions &LangOpts, 1584 std::optional<Qualifiers> *QualifiersToAppend) { 1585 QualType Ty = VD->getType(); 1586 QualType PteTy; 1587 1588 assert(Ty->isPointerType() && !Ty->isFunctionPointerType() && 1589 "Expecting a VarDecl of type of pointer to object type"); 1590 PteTy = Ty->getPointeeType(); 1591 1592 TypeLoc TyLoc = VD->getTypeSourceInfo()->getTypeLoc().getUnqualifiedLoc(); 1593 TypeLoc PteTyLoc; 1594 1595 // We only deal with the cases that we know `TypeLoc::getNextTypeLoc` returns 1596 // the `TypeLoc` of the pointee type: 1597 switch (TyLoc.getTypeLocClass()) { 1598 case TypeLoc::ConstantArray: 1599 case TypeLoc::IncompleteArray: 1600 case TypeLoc::VariableArray: 1601 case TypeLoc::DependentSizedArray: 1602 case TypeLoc::Decayed: 1603 assert(isa<ParmVarDecl>(VD) && "An array type shall not be treated as a " 1604 "pointer type unless it decays."); 1605 PteTyLoc = TyLoc.getNextTypeLoc(); 1606 break; 1607 case TypeLoc::Pointer: 1608 PteTyLoc = TyLoc.castAs<PointerTypeLoc>().getPointeeLoc(); 1609 break; 1610 default: 1611 return std::nullopt; 1612 } 1613 if (PteTyLoc.isNull()) 1614 // Sometimes we cannot get a useful `TypeLoc` for the pointee type, e.g., 1615 // when the pointer type is `auto`. 1616 return std::nullopt; 1617 1618 SourceLocation IdentLoc = getVarDeclIdentifierLoc(VD); 1619 1620 if (!(IdentLoc.isValid() && PteTyLoc.getSourceRange().isValid())) { 1621 // We are expecting these locations to be valid. But in some cases, they are 1622 // not all valid. It is a Clang bug to me and we are not responsible for 1623 // fixing it. So we will just give up for now when it happens. 1624 return std::nullopt; 1625 } 1626 1627 // Note that TypeLoc.getEndLoc() returns the begin location of the last token: 1628 SourceLocation PteEndOfTokenLoc = 1629 Lexer::getLocForEndOfToken(PteTyLoc.getEndLoc(), 0, SM, LangOpts); 1630 1631 if (!PteEndOfTokenLoc.isValid()) 1632 // Sometimes we cannot get the end location of the pointee type, e.g., when 1633 // there are macros involved. 1634 return std::nullopt; 1635 if (!SM.isBeforeInTranslationUnit(PteEndOfTokenLoc, IdentLoc)) { 1636 // We only deal with the cases where the source text of the pointee type 1637 // appears on the left-hand side of the variable identifier completely, 1638 // including the following forms: 1639 // `T ident`, 1640 // `T ident[]`, where `T` is any type. 1641 // Examples of excluded cases are `T (*ident)[]` or `T ident[][n]`. 1642 return std::nullopt; 1643 } 1644 if (PteTy.hasQualifiers()) { 1645 // TypeLoc does not provide source ranges for qualifiers (it says it's 1646 // intentional but seems fishy to me), so we cannot get the full text 1647 // `PteTy` via source ranges. 1648 *QualifiersToAppend = PteTy.getQualifiers(); 1649 } 1650 return getRangeText({PteTyLoc.getBeginLoc(), PteEndOfTokenLoc}, SM, LangOpts) 1651 ->str(); 1652 } 1653 1654 // Returns the text of the name (with qualifiers) of a `FunctionDecl`. 1655 static std::optional<StringRef> getFunNameText(const FunctionDecl *FD, 1656 const SourceManager &SM, 1657 const LangOptions &LangOpts) { 1658 SourceLocation BeginLoc = FD->getQualifier() 1659 ? FD->getQualifierLoc().getBeginLoc() 1660 : FD->getNameInfo().getBeginLoc(); 1661 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the 1662 // last token: 1663 SourceLocation EndLoc = Lexer::getLocForEndOfToken( 1664 FD->getNameInfo().getEndLoc(), 0, SM, LangOpts); 1665 SourceRange NameRange{BeginLoc, EndLoc}; 1666 1667 return getRangeText(NameRange, SM, LangOpts); 1668 } 1669 1670 // Returns the text representing a `std::span` type where the element type is 1671 // represented by `EltTyText`. 1672 // 1673 // Note the optional parameter `Qualifiers`: one needs to pass qualifiers 1674 // explicitly if the element type needs to be qualified. 1675 static std::string 1676 getSpanTypeText(StringRef EltTyText, 1677 std::optional<Qualifiers> Quals = std::nullopt) { 1678 const char *const SpanOpen = "std::span<"; 1679 1680 if (Quals) 1681 return SpanOpen + EltTyText.str() + ' ' + Quals->getAsString() + '>'; 1682 return SpanOpen + EltTyText.str() + '>'; 1683 } 1684 1685 std::optional<FixItList> 1686 DerefSimplePtrArithFixableGadget::getFixits(const Strategy &s) const { 1687 const VarDecl *VD = dyn_cast<VarDecl>(BaseDeclRefExpr->getDecl()); 1688 1689 if (VD && s.lookup(VD) == Strategy::Kind::Span) { 1690 ASTContext &Ctx = VD->getASTContext(); 1691 // std::span can't represent elements before its begin() 1692 if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx)) 1693 if (ConstVal->isNegative()) 1694 return std::nullopt; 1695 1696 // note that the expr may (oddly) has multiple layers of parens 1697 // example: 1698 // *((..(pointer + 123)..)) 1699 // goal: 1700 // pointer[123] 1701 // Fix-It: 1702 // remove '*(' 1703 // replace ' + ' with '[' 1704 // replace ')' with ']' 1705 1706 // example: 1707 // *((..(123 + pointer)..)) 1708 // goal: 1709 // 123[pointer] 1710 // Fix-It: 1711 // remove '*(' 1712 // replace ' + ' with '[' 1713 // replace ')' with ']' 1714 1715 const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS(); 1716 const SourceManager &SM = Ctx.getSourceManager(); 1717 const LangOptions &LangOpts = Ctx.getLangOpts(); 1718 CharSourceRange StarWithTrailWhitespace = 1719 clang::CharSourceRange::getCharRange(DerefOp->getOperatorLoc(), 1720 LHS->getBeginLoc()); 1721 1722 std::optional<SourceLocation> LHSLocation = getPastLoc(LHS, SM, LangOpts); 1723 if (!LHSLocation) 1724 return std::nullopt; 1725 1726 CharSourceRange PlusWithSurroundingWhitespace = 1727 clang::CharSourceRange::getCharRange(*LHSLocation, RHS->getBeginLoc()); 1728 1729 std::optional<SourceLocation> AddOpLocation = 1730 getPastLoc(AddOp, SM, LangOpts); 1731 std::optional<SourceLocation> DerefOpLocation = 1732 getPastLoc(DerefOp, SM, LangOpts); 1733 1734 if (!AddOpLocation || !DerefOpLocation) 1735 return std::nullopt; 1736 1737 CharSourceRange ClosingParenWithPrecWhitespace = 1738 clang::CharSourceRange::getCharRange(*AddOpLocation, *DerefOpLocation); 1739 1740 return FixItList{ 1741 {FixItHint::CreateRemoval(StarWithTrailWhitespace), 1742 FixItHint::CreateReplacement(PlusWithSurroundingWhitespace, "["), 1743 FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace, "]")}}; 1744 } 1745 return std::nullopt; // something wrong or unsupported, give up 1746 } 1747 1748 std::optional<FixItList> 1749 PointerDereferenceGadget::getFixits(const Strategy &S) const { 1750 const VarDecl *VD = cast<VarDecl>(BaseDeclRefExpr->getDecl()); 1751 switch (S.lookup(VD)) { 1752 case Strategy::Kind::Span: { 1753 ASTContext &Ctx = VD->getASTContext(); 1754 SourceManager &SM = Ctx.getSourceManager(); 1755 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0] 1756 // Deletes the *operand 1757 CharSourceRange derefRange = clang::CharSourceRange::getCharRange( 1758 Op->getBeginLoc(), Op->getBeginLoc().getLocWithOffset(1)); 1759 // Inserts the [0] 1760 if (auto LocPastOperand = 1761 getPastLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts())) { 1762 return FixItList{{FixItHint::CreateRemoval(derefRange), 1763 FixItHint::CreateInsertion(*LocPastOperand, "[0]")}}; 1764 } 1765 break; 1766 } 1767 case Strategy::Kind::Iterator: 1768 case Strategy::Kind::Array: 1769 case Strategy::Kind::Vector: 1770 llvm_unreachable("Strategy not implemented yet!"); 1771 case Strategy::Kind::Wontfix: 1772 llvm_unreachable("Invalid strategy!"); 1773 } 1774 1775 return std::nullopt; 1776 } 1777 1778 // Generates fix-its replacing an expression of the form UPC(DRE) with 1779 // `DRE.data()` 1780 std::optional<FixItList> UPCStandalonePointerGadget::getFixits(const Strategy &S) 1781 const { 1782 const auto VD = cast<VarDecl>(Node->getDecl()); 1783 switch (S.lookup(VD)) { 1784 case Strategy::Kind::Span: { 1785 ASTContext &Ctx = VD->getASTContext(); 1786 SourceManager &SM = Ctx.getSourceManager(); 1787 // Inserts the .data() after the DRE 1788 std::optional<SourceLocation> EndOfOperand = 1789 getPastLoc(Node, SM, Ctx.getLangOpts()); 1790 1791 if (EndOfOperand) 1792 return FixItList{{FixItHint::CreateInsertion( 1793 *EndOfOperand, ".data()")}}; 1794 // FIXME: Points inside a macro expansion. 1795 break; 1796 } 1797 case Strategy::Kind::Wontfix: 1798 case Strategy::Kind::Iterator: 1799 case Strategy::Kind::Array: 1800 case Strategy::Kind::Vector: 1801 llvm_unreachable("unsupported strategies for FixableGadgets"); 1802 } 1803 1804 return std::nullopt; 1805 } 1806 1807 // Generates fix-its replacing an expression of the form `&DRE[e]` with 1808 // `&DRE.data()[e]`: 1809 static std::optional<FixItList> 1810 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) { 1811 const auto *ArraySub = cast<ArraySubscriptExpr>(Node->getSubExpr()); 1812 const auto *DRE = cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts()); 1813 // FIXME: this `getASTContext` call is costly, we should pass the 1814 // ASTContext in: 1815 const ASTContext &Ctx = DRE->getDecl()->getASTContext(); 1816 const Expr *Idx = ArraySub->getIdx(); 1817 const SourceManager &SM = Ctx.getSourceManager(); 1818 const LangOptions &LangOpts = Ctx.getLangOpts(); 1819 std::stringstream SS; 1820 bool IdxIsLitZero = false; 1821 1822 if (auto ICE = Idx->getIntegerConstantExpr(Ctx)) 1823 if ((*ICE).isZero()) 1824 IdxIsLitZero = true; 1825 std::optional<StringRef> DreString = getExprText(DRE, SM, LangOpts); 1826 if (!DreString) 1827 return std::nullopt; 1828 1829 if (IdxIsLitZero) { 1830 // If the index is literal zero, we produce the most concise fix-it: 1831 SS << (*DreString).str() << ".data()"; 1832 } else { 1833 std::optional<StringRef> IndexString = getExprText(Idx, SM, LangOpts); 1834 if (!IndexString) 1835 return std::nullopt; 1836 1837 SS << "&" << (*DreString).str() << ".data()" 1838 << "[" << (*IndexString).str() << "]"; 1839 } 1840 return FixItList{ 1841 FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())}; 1842 } 1843 1844 std::optional<FixItList> 1845 UUCAddAssignGadget::getFixits(const Strategy &S) const { 1846 DeclUseList DREs = getClaimedVarUseSites(); 1847 1848 if (DREs.size() != 1) 1849 return std::nullopt; // In cases of `Ptr += n` where `Ptr` is not a DRE, we 1850 // give up 1851 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) { 1852 if (S.lookup(VD) == Strategy::Kind::Span) { 1853 FixItList Fixes; 1854 1855 const Stmt *AddAssignNode = getBaseStmt(); 1856 StringRef varName = VD->getName(); 1857 const ASTContext &Ctx = VD->getASTContext(); 1858 1859 if (!isNonNegativeIntegerExpr(Offset, VD, Ctx)) 1860 return std::nullopt; 1861 1862 // To transform UUC(p += n) to UUC(p = p.subspan(..)): 1863 bool NotParenExpr = 1864 (Offset->IgnoreParens()->getBeginLoc() == Offset->getBeginLoc()); 1865 std::string SS = varName.str() + " = " + varName.str() + ".subspan"; 1866 if (NotParenExpr) 1867 SS += "("; 1868 1869 std::optional<SourceLocation> AddAssignLocation = getEndCharLoc( 1870 AddAssignNode, Ctx.getSourceManager(), Ctx.getLangOpts()); 1871 if (!AddAssignLocation) 1872 return std::nullopt; 1873 1874 Fixes.push_back(FixItHint::CreateReplacement( 1875 SourceRange(AddAssignNode->getBeginLoc(), Node->getOperatorLoc()), 1876 SS)); 1877 if (NotParenExpr) 1878 Fixes.push_back(FixItHint::CreateInsertion( 1879 Offset->getEndLoc().getLocWithOffset(1), ")")); 1880 return Fixes; 1881 } 1882 } 1883 return std::nullopt; // Not in the cases that we can handle for now, give up. 1884 } 1885 1886 std::optional<FixItList> UPCPreIncrementGadget::getFixits(const Strategy &S) const { 1887 DeclUseList DREs = getClaimedVarUseSites(); 1888 1889 if (DREs.size() != 1) 1890 return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we 1891 // give up 1892 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) { 1893 if (S.lookup(VD) == Strategy::Kind::Span) { 1894 FixItList Fixes; 1895 std::stringstream SS; 1896 const Stmt *PreIncNode = getBaseStmt(); 1897 StringRef varName = VD->getName(); 1898 const ASTContext &Ctx = VD->getASTContext(); 1899 1900 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()): 1901 SS << "(" << varName.data() << " = " << varName.data() 1902 << ".subspan(1)).data()"; 1903 std::optional<SourceLocation> PreIncLocation = 1904 getEndCharLoc(PreIncNode, Ctx.getSourceManager(), Ctx.getLangOpts()); 1905 if (!PreIncLocation) 1906 return std::nullopt; 1907 1908 Fixes.push_back(FixItHint::CreateReplacement( 1909 SourceRange(PreIncNode->getBeginLoc(), *PreIncLocation), SS.str())); 1910 return Fixes; 1911 } 1912 } 1913 return std::nullopt; // Not in the cases that we can handle for now, give up. 1914 } 1915 1916 1917 // For a non-null initializer `Init` of `T *` type, this function returns 1918 // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it 1919 // to output stream. 1920 // In many cases, this function cannot figure out the actual extent `S`. It 1921 // then will use a place holder to replace `S` to ask users to fill `S` in. The 1922 // initializer shall be used to initialize a variable of type `std::span<T>`. 1923 // 1924 // FIXME: Support multi-level pointers 1925 // 1926 // Parameters: 1927 // `Init` a pointer to the initializer expression 1928 // `Ctx` a reference to the ASTContext 1929 static FixItList 1930 FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx, 1931 const StringRef UserFillPlaceHolder) { 1932 const SourceManager &SM = Ctx.getSourceManager(); 1933 const LangOptions &LangOpts = Ctx.getLangOpts(); 1934 1935 // If `Init` has a constant value that is (or equivalent to) a 1936 // NULL pointer, we use the default constructor to initialize the span 1937 // object, i.e., a `std:span` variable declaration with no initializer. 1938 // So the fix-it is just to remove the initializer. 1939 if (Init->isNullPointerConstant(Ctx, 1940 // FIXME: Why does this function not ask for `const ASTContext 1941 // &`? It should. Maybe worth an NFC patch later. 1942 Expr::NullPointerConstantValueDependence:: 1943 NPC_ValueDependentIsNotNull)) { 1944 std::optional<SourceLocation> InitLocation = 1945 getEndCharLoc(Init, SM, LangOpts); 1946 if (!InitLocation) 1947 return {}; 1948 1949 SourceRange SR(Init->getBeginLoc(), *InitLocation); 1950 1951 return {FixItHint::CreateRemoval(SR)}; 1952 } 1953 1954 FixItList FixIts{}; 1955 std::string ExtentText = UserFillPlaceHolder.data(); 1956 StringRef One = "1"; 1957 1958 // Insert `{` before `Init`: 1959 FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{")); 1960 // Try to get the data extent. Break into different cases: 1961 if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) { 1962 // In cases `Init` is `new T[n]` and there is no explicit cast over 1963 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects 1964 // of `T`. So the extent is `n` unless `n` has side effects. Similar but 1965 // simpler for the case where `Init` is `new T`. 1966 if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) { 1967 if (!Ext->HasSideEffects(Ctx)) { 1968 std::optional<StringRef> ExtentString = getExprText(Ext, SM, LangOpts); 1969 if (!ExtentString) 1970 return {}; 1971 ExtentText = *ExtentString; 1972 } 1973 } else if (!CxxNew->isArray()) 1974 // Although the initializer is not allocating a buffer, the pointer 1975 // variable could still be used in buffer access operations. 1976 ExtentText = One; 1977 } else if (const auto *CArrTy = Ctx.getAsConstantArrayType( 1978 Init->IgnoreImpCasts()->getType())) { 1979 // In cases `Init` is of an array type after stripping off implicit casts, 1980 // the extent is the array size. Note that if the array size is not a 1981 // constant, we cannot use it as the extent. 1982 ExtentText = getAPIntText(CArrTy->getSize()); 1983 } else { 1984 // In cases `Init` is of the form `&Var` after stripping of implicit 1985 // casts, where `&` is the built-in operator, the extent is 1. 1986 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts())) 1987 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf && 1988 isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr())) 1989 ExtentText = One; 1990 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`, 1991 // and explicit casting, etc. etc. 1992 } 1993 1994 SmallString<32> StrBuffer{}; 1995 std::optional<SourceLocation> LocPassInit = getPastLoc(Init, SM, LangOpts); 1996 1997 if (!LocPassInit) 1998 return {}; 1999 2000 StrBuffer.append(", "); 2001 StrBuffer.append(ExtentText); 2002 StrBuffer.append("}"); 2003 FixIts.push_back(FixItHint::CreateInsertion(*LocPassInit, StrBuffer.str())); 2004 return FixIts; 2005 } 2006 2007 #ifndef NDEBUG 2008 #define DEBUG_NOTE_DECL_FAIL(D, Msg) \ 2009 Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), "failed to produce fixit for declaration '" + (D)->getNameAsString() + "'" + (Msg)) 2010 #else 2011 #define DEBUG_NOTE_DECL_FAIL(D, Msg) 2012 #endif 2013 2014 // For the given variable declaration with a pointer-to-T type, returns the text 2015 // `std::span<T>`. If it is unable to generate the text, returns 2016 // `std::nullopt`. 2017 static std::optional<std::string> createSpanTypeForVarDecl(const VarDecl *VD, 2018 const ASTContext &Ctx) { 2019 assert(VD->getType()->isPointerType()); 2020 2021 std::optional<Qualifiers> PteTyQualifiers = std::nullopt; 2022 std::optional<std::string> PteTyText = getPointeeTypeText( 2023 VD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers); 2024 2025 if (!PteTyText) 2026 return std::nullopt; 2027 2028 std::string SpanTyText = "std::span<"; 2029 2030 SpanTyText.append(*PteTyText); 2031 // Append qualifiers to span element type if any: 2032 if (PteTyQualifiers) { 2033 SpanTyText.append(" "); 2034 SpanTyText.append(PteTyQualifiers->getAsString()); 2035 } 2036 SpanTyText.append(">"); 2037 return SpanTyText; 2038 } 2039 2040 // For a `VarDecl` of the form `T * var (= Init)?`, this 2041 // function generates fix-its that 2042 // 1) replace `T * var` with `std::span<T> var`; and 2043 // 2) change `Init` accordingly to a span constructor, if it exists. 2044 // 2045 // FIXME: support Multi-level pointers 2046 // 2047 // Parameters: 2048 // `D` a pointer the variable declaration node 2049 // `Ctx` a reference to the ASTContext 2050 // `UserFillPlaceHolder` the user-input placeholder text 2051 // Returns: 2052 // the non-empty fix-it list, if fix-its are successfuly generated; empty 2053 // list otherwise. 2054 static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx, 2055 const StringRef UserFillPlaceHolder, 2056 UnsafeBufferUsageHandler &Handler) { 2057 if (hasUnsupportedSpecifiers(D, Ctx.getSourceManager())) 2058 return {}; 2059 2060 FixItList FixIts{}; 2061 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(D, Ctx); 2062 2063 if (!SpanTyText) { 2064 DEBUG_NOTE_DECL_FAIL(D, " : failed to generate 'std::span' type"); 2065 return {}; 2066 } 2067 2068 // Will hold the text for `std::span<T> Ident`: 2069 std::stringstream SS; 2070 2071 SS << *SpanTyText; 2072 // Append qualifiers to the type of `D`, if any: 2073 if (D->getType().hasQualifiers()) 2074 SS << " " << D->getType().getQualifiers().getAsString(); 2075 2076 // The end of the range of the original source that will be replaced 2077 // by `std::span<T> ident`: 2078 SourceLocation EndLocForReplacement = D->getEndLoc(); 2079 std::optional<StringRef> IdentText = 2080 getVarDeclIdentifierText(D, Ctx.getSourceManager(), Ctx.getLangOpts()); 2081 2082 if (!IdentText) { 2083 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier"); 2084 return {}; 2085 } 2086 // Fix the initializer if it exists: 2087 if (const Expr *Init = D->getInit()) { 2088 FixItList InitFixIts = 2089 FixVarInitializerWithSpan(Init, Ctx, UserFillPlaceHolder); 2090 if (InitFixIts.empty()) 2091 return {}; 2092 FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts.begin()), 2093 std::make_move_iterator(InitFixIts.end())); 2094 // If the declaration has the form `T *ident = init`, we want to replace 2095 // `T *ident = ` with `std::span<T> ident`: 2096 EndLocForReplacement = Init->getBeginLoc().getLocWithOffset(-1); 2097 } 2098 SS << " " << IdentText->str(); 2099 if (!EndLocForReplacement.isValid()) { 2100 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration"); 2101 return {}; 2102 } 2103 FixIts.push_back(FixItHint::CreateReplacement( 2104 SourceRange(D->getBeginLoc(), EndLocForReplacement), SS.str())); 2105 return FixIts; 2106 } 2107 2108 static bool hasConflictingOverload(const FunctionDecl *FD) { 2109 return !FD->getDeclContext()->lookup(FD->getDeclName()).isSingleResult(); 2110 } 2111 2112 // For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new 2113 // types, this function produces fix-its to make the change self-contained. Let 2114 // 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the 2115 // entity defined by the `FunctionDecl` after the change to the parameters. 2116 // Fix-its produced by this function are 2117 // 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration 2118 // of 'F'; 2119 // 2. Create a declaration of "NewF" next to each declaration of `F`; 2120 // 3. Create a definition of "F" (as its' original definition is now belongs 2121 // to "NewF") next to its original definition. The body of the creating 2122 // definition calls to "NewF". 2123 // 2124 // Example: 2125 // 2126 // void f(int *p); // original declaration 2127 // void f(int *p) { // original definition 2128 // p[5]; 2129 // } 2130 // 2131 // To change the parameter `p` to be of `std::span<int>` type, we 2132 // also add overloads: 2133 // 2134 // [[clang::unsafe_buffer_usage]] void f(int *p); // original decl 2135 // void f(std::span<int> p); // added overload decl 2136 // void f(std::span<int> p) { // original def where param is changed 2137 // p[5]; 2138 // } 2139 // [[clang::unsafe_buffer_usage]] void f(int *p) { // added def 2140 // return f(std::span(p, <# size #>)); 2141 // } 2142 // 2143 static std::optional<FixItList> 2144 createOverloadsForFixedParams(const Strategy &S, const FunctionDecl *FD, 2145 const ASTContext &Ctx, 2146 UnsafeBufferUsageHandler &Handler) { 2147 // FIXME: need to make this conflict checking better: 2148 if (hasConflictingOverload(FD)) 2149 return std::nullopt; 2150 2151 const SourceManager &SM = Ctx.getSourceManager(); 2152 const LangOptions &LangOpts = Ctx.getLangOpts(); 2153 const unsigned NumParms = FD->getNumParams(); 2154 std::vector<std::string> NewTysTexts(NumParms); 2155 std::vector<bool> ParmsMask(NumParms, false); 2156 bool AtLeastOneParmToFix = false; 2157 2158 for (unsigned i = 0; i < NumParms; i++) { 2159 const ParmVarDecl *PVD = FD->getParamDecl(i); 2160 2161 if (S.lookup(PVD) == Strategy::Kind::Wontfix) 2162 continue; 2163 if (S.lookup(PVD) != Strategy::Kind::Span) 2164 // Not supported, not suppose to happen: 2165 return std::nullopt; 2166 2167 std::optional<Qualifiers> PteTyQuals = std::nullopt; 2168 std::optional<std::string> PteTyText = 2169 getPointeeTypeText(PVD, SM, LangOpts, &PteTyQuals); 2170 2171 if (!PteTyText) 2172 // something wrong in obtaining the text of the pointee type, give up 2173 return std::nullopt; 2174 // FIXME: whether we should create std::span type depends on the Strategy. 2175 NewTysTexts[i] = getSpanTypeText(*PteTyText, PteTyQuals); 2176 ParmsMask[i] = true; 2177 AtLeastOneParmToFix = true; 2178 } 2179 if (!AtLeastOneParmToFix) 2180 // No need to create function overloads: 2181 return {}; 2182 // FIXME Respect indentation of the original code. 2183 2184 // A lambda that creates the text representation of a function declaration 2185 // with the new type signatures: 2186 const auto NewOverloadSignatureCreator = 2187 [&SM, &LangOpts, &NewTysTexts, 2188 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> { 2189 std::stringstream SS; 2190 2191 SS << ";"; 2192 SS << getEndOfLine().str(); 2193 // Append: ret-type func-name "(" 2194 if (auto Prefix = getRangeText( 2195 SourceRange(FD->getBeginLoc(), (*FD->param_begin())->getBeginLoc()), 2196 SM, LangOpts)) 2197 SS << Prefix->str(); 2198 else 2199 return std::nullopt; // give up 2200 // Append: parameter-type-list 2201 const unsigned NumParms = FD->getNumParams(); 2202 2203 for (unsigned i = 0; i < NumParms; i++) { 2204 const ParmVarDecl *Parm = FD->getParamDecl(i); 2205 2206 if (Parm->isImplicit()) 2207 continue; 2208 if (ParmsMask[i]) { 2209 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its 2210 // new type: 2211 SS << NewTysTexts[i]; 2212 // print parameter name if provided: 2213 if (IdentifierInfo *II = Parm->getIdentifier()) 2214 SS << ' ' << II->getName().str(); 2215 } else if (auto ParmTypeText = getRangeText( 2216 getSourceRangeToTokenEnd(Parm, SM, LangOpts), 2217 SM, LangOpts)) { 2218 // print the whole `Parm` without modification: 2219 SS << ParmTypeText->str(); 2220 } else 2221 return std::nullopt; // something wrong, give up 2222 if (i != NumParms - 1) 2223 SS << ", "; 2224 } 2225 SS << ")"; 2226 return SS.str(); 2227 }; 2228 2229 // A lambda that creates the text representation of a function definition with 2230 // the original signature: 2231 const auto OldOverloadDefCreator = 2232 [&Handler, &SM, &LangOpts, &NewTysTexts, 2233 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> { 2234 std::stringstream SS; 2235 2236 SS << getEndOfLine().str(); 2237 // Append: attr-name ret-type func-name "(" param-list ")" "{" 2238 if (auto FDPrefix = getRangeText( 2239 SourceRange(FD->getBeginLoc(), FD->getBody()->getBeginLoc()), SM, 2240 LangOpts)) 2241 SS << Handler.getUnsafeBufferUsageAttributeTextAt(FD->getBeginLoc(), " ") 2242 << FDPrefix->str() << "{"; 2243 else 2244 return std::nullopt; 2245 // Append: "return" func-name "(" 2246 if (auto FunQualName = getFunNameText(FD, SM, LangOpts)) 2247 SS << "return " << FunQualName->str() << "("; 2248 else 2249 return std::nullopt; 2250 2251 // Append: arg-list 2252 const unsigned NumParms = FD->getNumParams(); 2253 for (unsigned i = 0; i < NumParms; i++) { 2254 const ParmVarDecl *Parm = FD->getParamDecl(i); 2255 2256 if (Parm->isImplicit()) 2257 continue; 2258 // FIXME: If a parameter has no name, it is unused in the 2259 // definition. So we could just leave it as it is. 2260 if (!Parm->getIdentifier()) 2261 // If a parameter of a function definition has no name: 2262 return std::nullopt; 2263 if (ParmsMask[i]) 2264 // This is our spanified paramter! 2265 SS << NewTysTexts[i] << "(" << Parm->getIdentifier()->getName().str() 2266 << ", " << getUserFillPlaceHolder("size") << ")"; 2267 else 2268 SS << Parm->getIdentifier()->getName().str(); 2269 if (i != NumParms - 1) 2270 SS << ", "; 2271 } 2272 // finish call and the body 2273 SS << ");}" << getEndOfLine().str(); 2274 // FIXME: 80-char line formatting? 2275 return SS.str(); 2276 }; 2277 2278 FixItList FixIts{}; 2279 for (FunctionDecl *FReDecl : FD->redecls()) { 2280 std::optional<SourceLocation> Loc = getPastLoc(FReDecl, SM, LangOpts); 2281 2282 if (!Loc) 2283 return {}; 2284 if (FReDecl->isThisDeclarationADefinition()) { 2285 assert(FReDecl == FD && "inconsistent function definition"); 2286 // Inserts a definition with the old signature to the end of 2287 // `FReDecl`: 2288 if (auto OldOverloadDef = OldOverloadDefCreator(FReDecl)) 2289 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *OldOverloadDef)); 2290 else 2291 return {}; // give up 2292 } else { 2293 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`: 2294 if (!FReDecl->hasAttr<UnsafeBufferUsageAttr>()) { 2295 FixIts.emplace_back(FixItHint::CreateInsertion( 2296 FReDecl->getBeginLoc(), Handler.getUnsafeBufferUsageAttributeTextAt( 2297 FReDecl->getBeginLoc(), " "))); 2298 } 2299 // Inserts a declaration with the new signature to the end of `FReDecl`: 2300 if (auto NewOverloadDecl = NewOverloadSignatureCreator(FReDecl)) 2301 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *NewOverloadDecl)); 2302 else 2303 return {}; 2304 } 2305 } 2306 return FixIts; 2307 } 2308 2309 // To fix a `ParmVarDecl` to be of `std::span` type. 2310 static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx, 2311 UnsafeBufferUsageHandler &Handler) { 2312 if (hasUnsupportedSpecifiers(PVD, Ctx.getSourceManager())) { 2313 DEBUG_NOTE_DECL_FAIL(PVD, " : has unsupport specifier(s)"); 2314 return {}; 2315 } 2316 if (PVD->hasDefaultArg()) { 2317 // FIXME: generate fix-its for default values: 2318 DEBUG_NOTE_DECL_FAIL(PVD, " : has default arg"); 2319 return {}; 2320 } 2321 2322 std::optional<Qualifiers> PteTyQualifiers = std::nullopt; 2323 std::optional<std::string> PteTyText = getPointeeTypeText( 2324 PVD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers); 2325 2326 if (!PteTyText) { 2327 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid pointee type"); 2328 return {}; 2329 } 2330 2331 std::optional<StringRef> PVDNameText = PVD->getIdentifier()->getName(); 2332 2333 if (!PVDNameText) { 2334 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid identifier name"); 2335 return {}; 2336 } 2337 2338 std::stringstream SS; 2339 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(PVD, Ctx); 2340 2341 if (PteTyQualifiers) 2342 // Append qualifiers if they exist: 2343 SS << getSpanTypeText(*PteTyText, PteTyQualifiers); 2344 else 2345 SS << getSpanTypeText(*PteTyText); 2346 // Append qualifiers to the type of the parameter: 2347 if (PVD->getType().hasQualifiers()) 2348 SS << ' ' << PVD->getType().getQualifiers().getAsString(); 2349 // Append parameter's name: 2350 SS << ' ' << PVDNameText->str(); 2351 // Add replacement fix-it: 2352 return {FixItHint::CreateReplacement(PVD->getSourceRange(), SS.str())}; 2353 } 2354 2355 static FixItList fixVariableWithSpan(const VarDecl *VD, 2356 const DeclUseTracker &Tracker, 2357 ASTContext &Ctx, 2358 UnsafeBufferUsageHandler &Handler) { 2359 const DeclStmt *DS = Tracker.lookupDecl(VD); 2360 if (!DS) { 2361 DEBUG_NOTE_DECL_FAIL(VD, " : variables declared this way not implemented yet"); 2362 return {}; 2363 } 2364 if (!DS->isSingleDecl()) { 2365 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt` 2366 DEBUG_NOTE_DECL_FAIL(VD, " : multiple VarDecls"); 2367 return {}; 2368 } 2369 // Currently DS is an unused variable but we'll need it when 2370 // non-single decls are implemented, where the pointee type name 2371 // and the '*' are spread around the place. 2372 (void)DS; 2373 2374 // FIXME: handle cases where DS has multiple declarations 2375 return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler); 2376 } 2377 2378 // TODO: we should be consistent to use `std::nullopt` to represent no-fix due 2379 // to any unexpected problem. 2380 static FixItList 2381 fixVariable(const VarDecl *VD, Strategy::Kind K, 2382 /* The function decl under analysis */ const Decl *D, 2383 const DeclUseTracker &Tracker, ASTContext &Ctx, 2384 UnsafeBufferUsageHandler &Handler) { 2385 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) { 2386 auto *FD = dyn_cast<clang::FunctionDecl>(PVD->getDeclContext()); 2387 if (!FD || FD != D) { 2388 // `FD != D` means that `PVD` belongs to a function that is not being 2389 // analyzed currently. Thus `FD` may not be complete. 2390 DEBUG_NOTE_DECL_FAIL(VD, " : function not currently analyzed"); 2391 return {}; 2392 } 2393 2394 // TODO If function has a try block we can't change params unless we check 2395 // also its catch block for their use. 2396 // FIXME We might support static class methods, some select methods, 2397 // operators and possibly lamdas. 2398 if (FD->isMain() || FD->isConstexpr() || 2399 FD->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate || 2400 FD->isVariadic() || 2401 // also covers call-operator of lamdas 2402 isa<CXXMethodDecl>(FD) || 2403 // skip when the function body is a try-block 2404 (FD->hasBody() && isa<CXXTryStmt>(FD->getBody())) || 2405 FD->isOverloadedOperator()) { 2406 DEBUG_NOTE_DECL_FAIL(VD, " : unsupported function decl"); 2407 return {}; // TODO test all these cases 2408 } 2409 } 2410 2411 switch (K) { 2412 case Strategy::Kind::Span: { 2413 if (VD->getType()->isPointerType()) { 2414 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) 2415 return fixParamWithSpan(PVD, Ctx, Handler); 2416 2417 if (VD->isLocalVarDecl()) 2418 return fixVariableWithSpan(VD, Tracker, Ctx, Handler); 2419 } 2420 DEBUG_NOTE_DECL_FAIL(VD, " : not a pointer"); 2421 return {}; 2422 } 2423 case Strategy::Kind::Iterator: 2424 case Strategy::Kind::Array: 2425 case Strategy::Kind::Vector: 2426 llvm_unreachable("Strategy not implemented yet!"); 2427 case Strategy::Kind::Wontfix: 2428 llvm_unreachable("Invalid strategy!"); 2429 } 2430 llvm_unreachable("Unknown strategy!"); 2431 } 2432 2433 // Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the 2434 // `RemoveRange` of 'h' overlaps with a macro use. 2435 static bool overlapWithMacro(const FixItList &FixIts) { 2436 // FIXME: For now we only check if the range (or the first token) is (part of) 2437 // a macro expansion. Ideally, we want to check for all tokens in the range. 2438 return llvm::any_of(FixIts, [](const FixItHint &Hint) { 2439 auto Range = Hint.RemoveRange; 2440 if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) 2441 // If the range (or the first token) is (part of) a macro expansion: 2442 return true; 2443 return false; 2444 }); 2445 } 2446 2447 // Returns true iff `VD` is a parameter of the declaration `D`: 2448 static bool isParameterOf(const VarDecl *VD, const Decl *D) { 2449 return isa<ParmVarDecl>(VD) && 2450 VD->getDeclContext() == dyn_cast<DeclContext>(D); 2451 } 2452 2453 // Erases variables in `FixItsForVariable`, if such a variable has an unfixable 2454 // group mate. A variable `v` is unfixable iff `FixItsForVariable` does not 2455 // contain `v`. 2456 static void eraseVarsForUnfixableGroupMates( 2457 std::map<const VarDecl *, FixItList> &FixItsForVariable, 2458 const VariableGroupsManager &VarGrpMgr) { 2459 // Variables will be removed from `FixItsForVariable`: 2460 SmallVector<const VarDecl *, 8> ToErase; 2461 2462 for (const auto &[VD, Ignore] : FixItsForVariable) { 2463 VarGrpRef Grp = VarGrpMgr.getGroupOfVar(VD); 2464 if (llvm::any_of(Grp, 2465 [&FixItsForVariable](const VarDecl *GrpMember) -> bool { 2466 return !FixItsForVariable.count(GrpMember); 2467 })) { 2468 // At least one group member cannot be fixed, so we have to erase the 2469 // whole group: 2470 for (const VarDecl *Member : Grp) 2471 ToErase.push_back(Member); 2472 } 2473 } 2474 for (auto *VarToErase : ToErase) 2475 FixItsForVariable.erase(VarToErase); 2476 } 2477 2478 // Returns the fix-its that create bounds-safe function overloads for the 2479 // function `D`, if `D`'s parameters will be changed to safe-types through 2480 // fix-its in `FixItsForVariable`. 2481 // 2482 // NOTE: In case `D`'s parameters will be changed but bounds-safe function 2483 // overloads cannot created, the whole group that contains the parameters will 2484 // be erased from `FixItsForVariable`. 2485 static FixItList createFunctionOverloadsForParms( 2486 std::map<const VarDecl *, FixItList> &FixItsForVariable /* mutable */, 2487 const VariableGroupsManager &VarGrpMgr, const FunctionDecl *FD, 2488 const Strategy &S, ASTContext &Ctx, UnsafeBufferUsageHandler &Handler) { 2489 FixItList FixItsSharedByParms{}; 2490 2491 std::optional<FixItList> OverloadFixes = 2492 createOverloadsForFixedParams(S, FD, Ctx, Handler); 2493 2494 if (OverloadFixes) { 2495 FixItsSharedByParms.append(*OverloadFixes); 2496 } else { 2497 // Something wrong in generating `OverloadFixes`, need to remove the 2498 // whole group, where parameters are in, from `FixItsForVariable` (Note 2499 // that all parameters should be in the same group): 2500 for (auto *Member : VarGrpMgr.getGroupOfParms()) 2501 FixItsForVariable.erase(Member); 2502 } 2503 return FixItsSharedByParms; 2504 } 2505 2506 // Constructs self-contained fix-its for each variable in `FixablesForAllVars`. 2507 static std::map<const VarDecl *, FixItList> 2508 getFixIts(FixableGadgetSets &FixablesForAllVars, const Strategy &S, 2509 ASTContext &Ctx, 2510 /* The function decl under analysis */ const Decl *D, 2511 const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler, 2512 const VariableGroupsManager &VarGrpMgr) { 2513 // `FixItsForVariable` will map each variable to a set of fix-its directly 2514 // associated to the variable itself. Fix-its of distinct variables in 2515 // `FixItsForVariable` are disjoint. 2516 std::map<const VarDecl *, FixItList> FixItsForVariable; 2517 2518 // Populate `FixItsForVariable` with fix-its directly associated with each 2519 // variable. Fix-its directly associated to a variable 'v' are the ones 2520 // produced by the `FixableGadget`s whose claimed variable is 'v'. 2521 for (const auto &[VD, Fixables] : FixablesForAllVars.byVar) { 2522 FixItsForVariable[VD] = 2523 fixVariable(VD, S.lookup(VD), D, Tracker, Ctx, Handler); 2524 // If we fail to produce Fix-It for the declaration we have to skip the 2525 // variable entirely. 2526 if (FixItsForVariable[VD].empty()) { 2527 FixItsForVariable.erase(VD); 2528 continue; 2529 } 2530 for (const auto &F : Fixables) { 2531 std::optional<FixItList> Fixits = F->getFixits(S); 2532 2533 if (Fixits) { 2534 FixItsForVariable[VD].insert(FixItsForVariable[VD].end(), 2535 Fixits->begin(), Fixits->end()); 2536 continue; 2537 } 2538 #ifndef NDEBUG 2539 Handler.addDebugNoteForVar( 2540 VD, F->getBaseStmt()->getBeginLoc(), 2541 ("gadget '" + F->getDebugName() + "' refused to produce a fix") 2542 .str()); 2543 #endif 2544 FixItsForVariable.erase(VD); 2545 break; 2546 } 2547 } 2548 2549 // `FixItsForVariable` now contains only variables that can be 2550 // fixed. A variable can be fixed if its' declaration and all Fixables 2551 // associated to it can all be fixed. 2552 2553 // To further remove from `FixItsForVariable` variables whose group mates 2554 // cannot be fixed... 2555 eraseVarsForUnfixableGroupMates(FixItsForVariable, VarGrpMgr); 2556 // Now `FixItsForVariable` gets further reduced: a variable is in 2557 // `FixItsForVariable` iff it can be fixed and all its group mates can be 2558 // fixed. 2559 2560 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`. 2561 // That is, when fixing multiple parameters in one step, these fix-its will 2562 // be applied only once (instead of being applied per parameter). 2563 FixItList FixItsSharedByParms{}; 2564 2565 if (auto *FD = dyn_cast<FunctionDecl>(D)) 2566 FixItsSharedByParms = createFunctionOverloadsForParms( 2567 FixItsForVariable, VarGrpMgr, FD, S, Ctx, Handler); 2568 2569 // The map that maps each variable `v` to fix-its for the whole group where 2570 // `v` is in: 2571 std::map<const VarDecl *, FixItList> FinalFixItsForVariable{ 2572 FixItsForVariable}; 2573 2574 for (auto &[Var, Ignore] : FixItsForVariable) { 2575 bool AnyParm = false; 2576 const auto VarGroupForVD = VarGrpMgr.getGroupOfVar(Var, &AnyParm); 2577 2578 for (const VarDecl *GrpMate : VarGroupForVD) { 2579 if (Var == GrpMate) 2580 continue; 2581 if (FixItsForVariable.count(GrpMate)) 2582 FinalFixItsForVariable[Var].append(FixItsForVariable[GrpMate]); 2583 } 2584 if (AnyParm) { 2585 // This assertion should never fail. Otherwise we have a bug. 2586 assert(!FixItsSharedByParms.empty() && 2587 "Should not try to fix a parameter that does not belong to a " 2588 "FunctionDecl"); 2589 FinalFixItsForVariable[Var].append(FixItsSharedByParms); 2590 } 2591 } 2592 // Fix-its that will be applied in one step shall NOT: 2593 // 1. overlap with macros or/and templates; or 2594 // 2. conflict with each other. 2595 // Otherwise, the fix-its will be dropped. 2596 for (auto Iter = FinalFixItsForVariable.begin(); 2597 Iter != FinalFixItsForVariable.end();) 2598 if (overlapWithMacro(Iter->second) || 2599 clang::internal::anyConflict(Iter->second, Ctx.getSourceManager())) { 2600 Iter = FinalFixItsForVariable.erase(Iter); 2601 } else 2602 Iter++; 2603 return FinalFixItsForVariable; 2604 } 2605 2606 template <typename VarDeclIterTy> 2607 static Strategy 2608 getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars) { 2609 Strategy S; 2610 for (const VarDecl *VD : UnsafeVars) { 2611 S.set(VD, Strategy::Kind::Span); 2612 } 2613 return S; 2614 } 2615 2616 // Manages variable groups: 2617 class VariableGroupsManagerImpl : public VariableGroupsManager { 2618 const std::vector<VarGrpTy> Groups; 2619 const std::map<const VarDecl *, unsigned> &VarGrpMap; 2620 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms; 2621 2622 public: 2623 VariableGroupsManagerImpl( 2624 const std::vector<VarGrpTy> &Groups, 2625 const std::map<const VarDecl *, unsigned> &VarGrpMap, 2626 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms) 2627 : Groups(Groups), VarGrpMap(VarGrpMap), 2628 GrpsUnionForParms(GrpsUnionForParms) {} 2629 2630 VarGrpRef getGroupOfVar(const VarDecl *Var, bool *HasParm) const override { 2631 if (GrpsUnionForParms.contains(Var)) { 2632 if (HasParm) 2633 *HasParm = true; 2634 return GrpsUnionForParms.getArrayRef(); 2635 } 2636 if (HasParm) 2637 *HasParm = false; 2638 2639 auto It = VarGrpMap.find(Var); 2640 2641 if (It == VarGrpMap.end()) 2642 return std::nullopt; 2643 return Groups[It->second]; 2644 } 2645 2646 VarGrpRef getGroupOfParms() const override { 2647 return GrpsUnionForParms.getArrayRef(); 2648 } 2649 }; 2650 2651 void clang::checkUnsafeBufferUsage(const Decl *D, 2652 UnsafeBufferUsageHandler &Handler, 2653 bool EmitSuggestions) { 2654 #ifndef NDEBUG 2655 Handler.clearDebugNotes(); 2656 #endif 2657 2658 assert(D && D->getBody()); 2659 // We do not want to visit a Lambda expression defined inside a method independently. 2660 // Instead, it should be visited along with the outer method. 2661 // FIXME: do we want to do the same thing for `BlockDecl`s? 2662 if (const auto *fd = dyn_cast<CXXMethodDecl>(D)) { 2663 if (fd->getParent()->isLambda() && fd->getParent()->isLocalClass()) 2664 return; 2665 } 2666 2667 // Do not emit fixit suggestions for functions declared in an 2668 // extern "C" block. 2669 if (const auto *FD = dyn_cast<FunctionDecl>(D)) { 2670 for (FunctionDecl *FReDecl : FD->redecls()) { 2671 if (FReDecl->isExternC()) { 2672 EmitSuggestions = false; 2673 break; 2674 } 2675 } 2676 } 2677 2678 WarningGadgetSets UnsafeOps; 2679 FixableGadgetSets FixablesForAllVars; 2680 2681 auto [FixableGadgets, WarningGadgets, Tracker] = 2682 findGadgets(D, Handler, EmitSuggestions); 2683 2684 if (!EmitSuggestions) { 2685 // Our job is very easy without suggestions. Just warn about 2686 // every problematic operation and consider it done. No need to deal 2687 // with fixable gadgets, no need to group operations by variable. 2688 for (const auto &G : WarningGadgets) { 2689 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false, 2690 D->getASTContext()); 2691 } 2692 2693 // This return guarantees that most of the machine doesn't run when 2694 // suggestions aren't requested. 2695 assert(FixableGadgets.size() == 0 && 2696 "Fixable gadgets found but suggestions not requested!"); 2697 return; 2698 } 2699 2700 // If no `WarningGadget`s ever matched, there is no unsafe operations in the 2701 // function under the analysis. No need to fix any Fixables. 2702 if (!WarningGadgets.empty()) { 2703 // Gadgets "claim" variables they're responsible for. Once this loop 2704 // finishes, the tracker will only track DREs that weren't claimed by any 2705 // gadgets, i.e. not understood by the analysis. 2706 for (const auto &G : FixableGadgets) { 2707 for (const auto *DRE : G->getClaimedVarUseSites()) { 2708 Tracker.claimUse(DRE); 2709 } 2710 } 2711 } 2712 2713 // If no `WarningGadget`s ever matched, there is no unsafe operations in the 2714 // function under the analysis. Thus, it early returns here as there is 2715 // nothing needs to be fixed. 2716 // 2717 // Note this claim is based on the assumption that there is no unsafe 2718 // variable whose declaration is invisible from the analyzing function. 2719 // Otherwise, we need to consider if the uses of those unsafe varuables needs 2720 // fix. 2721 // So far, we are not fixing any global variables or class members. And, 2722 // lambdas will be analyzed along with the enclosing function. So this early 2723 // return is correct for now. 2724 if (WarningGadgets.empty()) 2725 return; 2726 2727 UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets)); 2728 FixablesForAllVars = groupFixablesByVar(std::move(FixableGadgets)); 2729 2730 std::map<const VarDecl *, FixItList> FixItsForVariableGroup; 2731 2732 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s. 2733 for (auto it = FixablesForAllVars.byVar.cbegin(); 2734 it != FixablesForAllVars.byVar.cend();) { 2735 // FIXME: need to deal with global variables later 2736 if ((!it->first->isLocalVarDecl() && !isa<ParmVarDecl>(it->first))) { 2737 #ifndef NDEBUG 2738 Handler.addDebugNoteForVar( 2739 it->first, it->first->getBeginLoc(), 2740 ("failed to produce fixit for '" + it->first->getNameAsString() + 2741 "' : neither local nor a parameter")); 2742 #endif 2743 it = FixablesForAllVars.byVar.erase(it); 2744 } else if (it->first->getType().getCanonicalType()->isReferenceType()) { 2745 #ifndef NDEBUG 2746 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(), 2747 ("failed to produce fixit for '" + 2748 it->first->getNameAsString() + 2749 "' : has a reference type")); 2750 #endif 2751 it = FixablesForAllVars.byVar.erase(it); 2752 } else if (Tracker.hasUnclaimedUses(it->first)) { 2753 #ifndef NDEBUG 2754 auto AllUnclaimed = Tracker.getUnclaimedUses(it->first); 2755 for (auto UnclaimedDRE : AllUnclaimed) { 2756 std::string UnclaimedUseTrace = 2757 getDREAncestorString(UnclaimedDRE, D->getASTContext()); 2758 2759 Handler.addDebugNoteForVar( 2760 it->first, UnclaimedDRE->getBeginLoc(), 2761 ("failed to produce fixit for '" + it->first->getNameAsString() + 2762 "' : has an unclaimed use\nThe unclaimed DRE trace: " + 2763 UnclaimedUseTrace)); 2764 } 2765 #endif 2766 it = FixablesForAllVars.byVar.erase(it); 2767 } else if (it->first->isInitCapture()) { 2768 #ifndef NDEBUG 2769 Handler.addDebugNoteForVar( 2770 it->first, it->first->getBeginLoc(), 2771 ("failed to produce fixit for '" + it->first->getNameAsString() + 2772 "' : init capture")); 2773 #endif 2774 it = FixablesForAllVars.byVar.erase(it); 2775 }else { 2776 ++it; 2777 } 2778 } 2779 2780 // Fixpoint iteration for pointer assignments 2781 using DepMapTy = DenseMap<const VarDecl *, llvm::SetVector<const VarDecl *>>; 2782 DepMapTy DependenciesMap{}; 2783 DepMapTy PtrAssignmentGraph{}; 2784 2785 for (auto it : FixablesForAllVars.byVar) { 2786 for (const FixableGadget *fixable : it.second) { 2787 std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair = 2788 fixable->getStrategyImplications(); 2789 if (ImplPair) { 2790 std::pair<const VarDecl *, const VarDecl *> Impl = std::move(*ImplPair); 2791 PtrAssignmentGraph[Impl.first].insert(Impl.second); 2792 } 2793 } 2794 } 2795 2796 /* 2797 The following code does a BFS traversal of the `PtrAssignmentGraph` 2798 considering all unsafe vars as starting nodes and constructs an undirected 2799 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner 2800 elimiates all variables that are unreachable from any unsafe var. In other 2801 words, this removes all dependencies that don't include any unsafe variable 2802 and consequently don't need any fixit generation. 2803 Note: A careful reader would observe that the code traverses 2804 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and 2805 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would 2806 achieve the same result but the one used here dramatically cuts the 2807 amount of hoops the second part of the algorithm needs to jump, given that 2808 a lot of these connections become "direct". The reader is advised not to 2809 imagine how the graph is transformed because of using `Var` instead of 2810 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used, 2811 and think about why it's equivalent later. 2812 */ 2813 std::set<const VarDecl *> VisitedVarsDirected{}; 2814 for (const auto &[Var, ignore] : UnsafeOps.byVar) { 2815 if (VisitedVarsDirected.find(Var) == VisitedVarsDirected.end()) { 2816 2817 std::queue<const VarDecl*> QueueDirected{}; 2818 QueueDirected.push(Var); 2819 while(!QueueDirected.empty()) { 2820 const VarDecl* CurrentVar = QueueDirected.front(); 2821 QueueDirected.pop(); 2822 VisitedVarsDirected.insert(CurrentVar); 2823 auto AdjacentNodes = PtrAssignmentGraph[CurrentVar]; 2824 for (const VarDecl *Adj : AdjacentNodes) { 2825 if (VisitedVarsDirected.find(Adj) == VisitedVarsDirected.end()) { 2826 QueueDirected.push(Adj); 2827 } 2828 DependenciesMap[Var].insert(Adj); 2829 DependenciesMap[Adj].insert(Var); 2830 } 2831 } 2832 } 2833 } 2834 2835 // `Groups` stores the set of Connected Components in the graph. 2836 std::vector<VarGrpTy> Groups; 2837 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the 2838 // variables belong to. Group indexes refer to the elements in `Groups`. 2839 // `VarGrpMap` is complete in that every variable that needs fix is in it. 2840 std::map<const VarDecl *, unsigned> VarGrpMap; 2841 // The union group over the ones in "Groups" that contain parameters of `D`: 2842 llvm::SetVector<const VarDecl *> 2843 GrpsUnionForParms; // these variables need to be fixed in one step 2844 2845 // Group Connected Components for Unsafe Vars 2846 // (Dependencies based on pointer assignments) 2847 std::set<const VarDecl *> VisitedVars{}; 2848 for (const auto &[Var, ignore] : UnsafeOps.byVar) { 2849 if (VisitedVars.find(Var) == VisitedVars.end()) { 2850 VarGrpTy &VarGroup = Groups.emplace_back(); 2851 std::queue<const VarDecl*> Queue{}; 2852 2853 Queue.push(Var); 2854 while(!Queue.empty()) { 2855 const VarDecl* CurrentVar = Queue.front(); 2856 Queue.pop(); 2857 VisitedVars.insert(CurrentVar); 2858 VarGroup.push_back(CurrentVar); 2859 auto AdjacentNodes = DependenciesMap[CurrentVar]; 2860 for (const VarDecl *Adj : AdjacentNodes) { 2861 if (VisitedVars.find(Adj) == VisitedVars.end()) { 2862 Queue.push(Adj); 2863 } 2864 } 2865 } 2866 2867 bool HasParm = false; 2868 unsigned GrpIdx = Groups.size() - 1; 2869 2870 for (const VarDecl *V : VarGroup) { 2871 VarGrpMap[V] = GrpIdx; 2872 if (!HasParm && isParameterOf(V, D)) 2873 HasParm = true; 2874 } 2875 if (HasParm) 2876 GrpsUnionForParms.insert(VarGroup.begin(), VarGroup.end()); 2877 } 2878 } 2879 2880 // Remove a `FixableGadget` if the associated variable is not in the graph 2881 // computed above. We do not want to generate fix-its for such variables, 2882 // since they are neither warned nor reachable from a warned one. 2883 // 2884 // Note a variable is not warned if it is not directly used in any unsafe 2885 // operation. A variable `v` is NOT reachable from an unsafe variable, if it 2886 // does not exist another variable `u` such that `u` is warned and fixing `u` 2887 // (transitively) implicates fixing `v`. 2888 // 2889 // For example, 2890 // ``` 2891 // void f(int * p) { 2892 // int * a = p; *p = 0; 2893 // } 2894 // ``` 2895 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither 2896 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of 2897 // the function above, `p` becomes reachable from a warned variable. 2898 for (auto I = FixablesForAllVars.byVar.begin(); 2899 I != FixablesForAllVars.byVar.end();) { 2900 // Note `VisitedVars` contain all the variables in the graph: 2901 if (!VisitedVars.count((*I).first)) { 2902 // no such var in graph: 2903 I = FixablesForAllVars.byVar.erase(I); 2904 } else 2905 ++I; 2906 } 2907 2908 // We assign strategies to variables that are 1) in the graph and 2) can be 2909 // fixed. Other variables have the default "Won't fix" strategy. 2910 Strategy NaiveStrategy = getNaiveStrategy(llvm::make_filter_range( 2911 VisitedVars, [&FixablesForAllVars](const VarDecl *V) { 2912 // If a warned variable has no "Fixable", it is considered unfixable: 2913 return FixablesForAllVars.byVar.count(V); 2914 })); 2915 VariableGroupsManagerImpl VarGrpMgr(Groups, VarGrpMap, GrpsUnionForParms); 2916 2917 if (isa<NamedDecl>(D)) 2918 // The only case where `D` is not a `NamedDecl` is when `D` is a 2919 // `BlockDecl`. Let's not fix variables in blocks for now 2920 FixItsForVariableGroup = 2921 getFixIts(FixablesForAllVars, NaiveStrategy, D->getASTContext(), D, 2922 Tracker, Handler, VarGrpMgr); 2923 2924 for (const auto &G : UnsafeOps.noVar) { 2925 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false, 2926 D->getASTContext()); 2927 } 2928 2929 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) { 2930 auto FixItsIt = FixItsForVariableGroup.find(VD); 2931 Handler.handleUnsafeVariableGroup(VD, VarGrpMgr, 2932 FixItsIt != FixItsForVariableGroup.end() 2933 ? std::move(FixItsIt->second) 2934 : FixItList{}, 2935 D); 2936 for (const auto &G : WarningGadgets) { 2937 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true, 2938 D->getASTContext()); 2939 } 2940 } 2941 } 2942