1 //===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/RecursiveASTVisitor.h" 12 #include "clang/ASTMatchers/ASTMatchFinder.h" 13 #include "clang/Lex/Lexer.h" 14 #include "clang/Lex/Preprocessor.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include <memory> 17 #include <optional> 18 #include <sstream> 19 #include <queue> 20 21 using namespace llvm; 22 using namespace clang; 23 using namespace ast_matchers; 24 25 namespace clang::ast_matchers { 26 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n" 27 // except for those belonging to a different callable of "n". 28 class MatchDescendantVisitor 29 : public RecursiveASTVisitor<MatchDescendantVisitor> { 30 public: 31 typedef RecursiveASTVisitor<MatchDescendantVisitor> VisitorBase; 32 33 // Creates an AST visitor that matches `Matcher` on all 34 // descendants of a given node "n" except for the ones 35 // belonging to a different callable of "n". 36 MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher, 37 internal::ASTMatchFinder *Finder, 38 internal::BoundNodesTreeBuilder *Builder, 39 internal::ASTMatchFinder::BindKind Bind, 40 const bool ignoreUnevaluatedContext) 41 : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind), 42 Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext) {} 43 44 // Returns true if a match is found in a subtree of `DynNode`, which belongs 45 // to the same callable of `DynNode`. 46 bool findMatch(const DynTypedNode &DynNode) { 47 Matches = false; 48 if (const Stmt *StmtNode = DynNode.get<Stmt>()) { 49 TraverseStmt(const_cast<Stmt *>(StmtNode)); 50 *Builder = ResultBindings; 51 return Matches; 52 } 53 return false; 54 } 55 56 // The following are overriding methods from the base visitor class. 57 // They are public only to allow CRTP to work. They are *not *part 58 // of the public API of this class. 59 60 // For the matchers so far used in safe buffers, we only need to match 61 // `Stmt`s. To override more as needed. 62 63 bool TraverseDecl(Decl *Node) { 64 if (!Node) 65 return true; 66 if (!match(*Node)) 67 return false; 68 // To skip callables: 69 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Node)) 70 return true; 71 // Traverse descendants 72 return VisitorBase::TraverseDecl(Node); 73 } 74 75 bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) { 76 // These are unevaluated, except the result expression. 77 if(ignoreUnevaluatedContext) 78 return TraverseStmt(Node->getResultExpr()); 79 return VisitorBase::TraverseGenericSelectionExpr(Node); 80 } 81 82 bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) { 83 // Unevaluated context. 84 if(ignoreUnevaluatedContext) 85 return true; 86 return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node); 87 } 88 89 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) { 90 // Unevaluated context. 91 if(ignoreUnevaluatedContext) 92 return true; 93 return VisitorBase::TraverseTypeOfExprTypeLoc(Node); 94 } 95 96 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) { 97 // Unevaluated context. 98 if(ignoreUnevaluatedContext) 99 return true; 100 return VisitorBase::TraverseDecltypeTypeLoc(Node); 101 } 102 103 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) { 104 // Unevaluated context. 105 if(ignoreUnevaluatedContext) 106 return true; 107 return VisitorBase::TraverseCXXNoexceptExpr(Node); 108 } 109 110 bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) { 111 // Unevaluated context. 112 if(ignoreUnevaluatedContext) 113 return true; 114 return VisitorBase::TraverseCXXTypeidExpr(Node); 115 } 116 117 bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) { 118 if (!Node) 119 return true; 120 if (!match(*Node)) 121 return false; 122 return VisitorBase::TraverseStmt(Node); 123 } 124 125 bool shouldVisitTemplateInstantiations() const { return true; } 126 bool shouldVisitImplicitCode() const { 127 // TODO: let's ignore implicit code for now 128 return false; 129 } 130 131 private: 132 // Sets 'Matched' to true if 'Matcher' matches 'Node' 133 // 134 // Returns 'true' if traversal should continue after this function 135 // returns, i.e. if no match is found or 'Bind' is 'BK_All'. 136 template <typename T> bool match(const T &Node) { 137 internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder); 138 139 if (Matcher->matches(DynTypedNode::create(Node), Finder, 140 &RecursiveBuilder)) { 141 ResultBindings.addMatch(RecursiveBuilder); 142 Matches = true; 143 if (Bind != internal::ASTMatchFinder::BK_All) 144 return false; // Abort as soon as a match is found. 145 } 146 return true; 147 } 148 149 const internal::DynTypedMatcher *const Matcher; 150 internal::ASTMatchFinder *const Finder; 151 internal::BoundNodesTreeBuilder *const Builder; 152 internal::BoundNodesTreeBuilder ResultBindings; 153 const internal::ASTMatchFinder::BindKind Bind; 154 bool Matches; 155 bool ignoreUnevaluatedContext; 156 }; 157 158 // Because we're dealing with raw pointers, let's define what we mean by that. 159 static auto hasPointerType() { 160 return hasType(hasCanonicalType(pointerType())); 161 } 162 163 static auto hasArrayType() { 164 return hasType(hasCanonicalType(arrayType())); 165 } 166 167 AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher<Stmt>, innerMatcher) { 168 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher); 169 170 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, true); 171 return Visitor.findMatch(DynTypedNode::create(Node)); 172 } 173 174 AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher<Stmt>, innerMatcher) { 175 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher); 176 177 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, false); 178 return Visitor.findMatch(DynTypedNode::create(Node)); 179 } 180 181 // Matches a `Stmt` node iff the node is in a safe-buffer opt-out region 182 AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *, 183 Handler) { 184 return !Handler->isSafeBufferOptOut(Node.getBeginLoc()); 185 } 186 187 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) { 188 return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder); 189 } 190 191 // Matches a `UnaryOperator` whose operator is pre-increment: 192 AST_MATCHER(UnaryOperator, isPreInc) { 193 return Node.getOpcode() == UnaryOperator::Opcode::UO_PreInc; 194 } 195 196 // Returns a matcher that matches any expression 'e' such that `innerMatcher` 197 // matches 'e' and 'e' is in an Unspecified Lvalue Context. 198 static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) { 199 // clang-format off 200 return 201 expr(anyOf( 202 implicitCastExpr( 203 hasCastKind(CastKind::CK_LValueToRValue), 204 castSubExpr(innerMatcher)), 205 binaryOperator( 206 hasAnyOperatorName("="), 207 hasLHS(innerMatcher) 208 ) 209 )); 210 // clang-format on 211 } 212 213 214 // Returns a matcher that matches any expression `e` such that `InnerMatcher` 215 // matches `e` and `e` is in an Unspecified Pointer Context (UPC). 216 static internal::Matcher<Stmt> 217 isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) { 218 // A UPC can be 219 // 1. an argument of a function call (except the callee has [[unsafe_...]] 220 // attribute), or 221 // 2. the operand of a pointer-to-(integer or bool) cast operation; or 222 // 3. the operand of a comparator operation; or 223 // 4. the operand of a pointer subtraction operation 224 // (i.e., computing the distance between two pointers); or ... 225 226 auto CallArgMatcher = 227 callExpr(forEachArgumentWithParam(InnerMatcher, 228 hasPointerType() /* array also decays to pointer type*/), 229 unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))); 230 231 auto CastOperandMatcher = 232 castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral), 233 hasCastKind(CastKind::CK_PointerToBoolean)), 234 castSubExpr(allOf(hasPointerType(), InnerMatcher))); 235 236 auto CompOperandMatcher = 237 binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="), 238 eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher)), 239 hasRHS(allOf(hasPointerType(), InnerMatcher)))); 240 241 // A matcher that matches pointer subtractions: 242 auto PtrSubtractionMatcher = 243 binaryOperator(hasOperatorName("-"), 244 // Note that here we need both LHS and RHS to be 245 // pointer. Then the inner matcher can match any of 246 // them: 247 allOf(hasLHS(hasPointerType()), 248 hasRHS(hasPointerType())), 249 eachOf(hasLHS(InnerMatcher), 250 hasRHS(InnerMatcher))); 251 252 return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher, 253 PtrSubtractionMatcher)); 254 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we 255 // don't have to check that.) 256 } 257 258 // Returns a matcher that matches any expression 'e' such that `innerMatcher` 259 // matches 'e' and 'e' is in an unspecified untyped context (i.e the expression 260 // 'e' isn't evaluated to an RValue). For example, consider the following code: 261 // int *p = new int[4]; 262 // int *q = new int[4]; 263 // if ((p = q)) {} 264 // p = q; 265 // The expression `p = q` in the conditional of the `if` statement 266 // `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;` 267 // in the assignment statement is in an untyped context. 268 static internal::Matcher<Stmt> 269 isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) { 270 // An unspecified context can be 271 // 1. A compound statement, 272 // 2. The body of an if statement 273 // 3. Body of a loop 274 auto CompStmt = compoundStmt(forEach(InnerMatcher)); 275 auto IfStmtThen = ifStmt(hasThen(InnerMatcher)); 276 auto IfStmtElse = ifStmt(hasElse(InnerMatcher)); 277 // FIXME: Handle loop bodies. 278 return stmt(anyOf(CompStmt, IfStmtThen, IfStmtElse)); 279 } 280 } // namespace clang::ast_matchers 281 282 namespace { 283 // Because the analysis revolves around variables and their types, we'll need to 284 // track uses of variables (aka DeclRefExprs). 285 using DeclUseList = SmallVector<const DeclRefExpr *, 1>; 286 287 // Convenience typedef. 288 using FixItList = SmallVector<FixItHint, 4>; 289 290 // Defined below. 291 class Strategy; 292 } // namespace 293 294 namespace { 295 /// Gadget is an individual operation in the code that may be of interest to 296 /// this analysis. Each (non-abstract) subclass corresponds to a specific 297 /// rigid AST structure that constitutes an operation on a pointer-type object. 298 /// Discovery of a gadget in the code corresponds to claiming that we understand 299 /// what this part of code is doing well enough to potentially improve it. 300 /// Gadgets can be warning (immediately deserving a warning) or fixable (not 301 /// always deserving a warning per se, but requires our attention to identify 302 /// it warrants a fixit). 303 class Gadget { 304 public: 305 enum class Kind { 306 #define GADGET(x) x, 307 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 308 }; 309 310 /// Common type of ASTMatchers used for discovering gadgets. 311 /// Useful for implementing the static matcher() methods 312 /// that are expected from all non-abstract subclasses. 313 using Matcher = decltype(stmt()); 314 315 Gadget(Kind K) : K(K) {} 316 317 Kind getKind() const { return K; } 318 319 virtual bool isWarningGadget() const = 0; 320 virtual const Stmt *getBaseStmt() const = 0; 321 322 /// Returns the list of pointer-type variables on which this gadget performs 323 /// its operation. Typically, there's only one variable. This isn't a list 324 /// of all DeclRefExprs in the gadget's AST! 325 virtual DeclUseList getClaimedVarUseSites() const = 0; 326 327 virtual ~Gadget() = default; 328 329 private: 330 Kind K; 331 }; 332 333 334 /// Warning gadgets correspond to unsafe code patterns that warrants 335 /// an immediate warning. 336 class WarningGadget : public Gadget { 337 public: 338 WarningGadget(Kind K) : Gadget(K) {} 339 340 static bool classof(const Gadget *G) { return G->isWarningGadget(); } 341 bool isWarningGadget() const final { return true; } 342 }; 343 344 /// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be 345 /// properly recognized in order to emit fixes. For example, if a raw pointer-type 346 /// variable is replaced by a safe C++ container, every use of such variable must be 347 /// carefully considered and possibly updated. 348 class FixableGadget : public Gadget { 349 public: 350 FixableGadget(Kind K) : Gadget(K) {} 351 352 static bool classof(const Gadget *G) { return !G->isWarningGadget(); } 353 bool isWarningGadget() const final { return false; } 354 355 /// Returns a fixit that would fix the current gadget according to 356 /// the current strategy. Returns std::nullopt if the fix cannot be produced; 357 /// returns an empty list if no fixes are necessary. 358 virtual std::optional<FixItList> getFixits(const Strategy &) const { 359 return std::nullopt; 360 } 361 362 /// Returns a list of two elements where the first element is the LHS of a pointer assignment 363 /// statement and the second element is the RHS. This two-element list represents the fact that 364 /// the LHS buffer gets its bounds information from the RHS buffer. This information will be used 365 /// later to group all those variables whose types must be modified together to prevent type 366 /// mismatches. 367 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 368 getStrategyImplications() const { 369 return std::nullopt; 370 } 371 }; 372 373 using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>; 374 using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>; 375 376 /// An increment of a pointer-type value is unsafe as it may run the pointer 377 /// out of bounds. 378 class IncrementGadget : public WarningGadget { 379 static constexpr const char *const OpTag = "op"; 380 const UnaryOperator *Op; 381 382 public: 383 IncrementGadget(const MatchFinder::MatchResult &Result) 384 : WarningGadget(Kind::Increment), 385 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} 386 387 static bool classof(const Gadget *G) { 388 return G->getKind() == Kind::Increment; 389 } 390 391 static Matcher matcher() { 392 return stmt(unaryOperator( 393 hasOperatorName("++"), 394 hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) 395 ).bind(OpTag)); 396 } 397 398 const UnaryOperator *getBaseStmt() const override { return Op; } 399 400 DeclUseList getClaimedVarUseSites() const override { 401 SmallVector<const DeclRefExpr *, 2> Uses; 402 if (const auto *DRE = 403 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { 404 Uses.push_back(DRE); 405 } 406 407 return std::move(Uses); 408 } 409 }; 410 411 /// A decrement of a pointer-type value is unsafe as it may run the pointer 412 /// out of bounds. 413 class DecrementGadget : public WarningGadget { 414 static constexpr const char *const OpTag = "op"; 415 const UnaryOperator *Op; 416 417 public: 418 DecrementGadget(const MatchFinder::MatchResult &Result) 419 : WarningGadget(Kind::Decrement), 420 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} 421 422 static bool classof(const Gadget *G) { 423 return G->getKind() == Kind::Decrement; 424 } 425 426 static Matcher matcher() { 427 return stmt(unaryOperator( 428 hasOperatorName("--"), 429 hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) 430 ).bind(OpTag)); 431 } 432 433 const UnaryOperator *getBaseStmt() const override { return Op; } 434 435 DeclUseList getClaimedVarUseSites() const override { 436 if (const auto *DRE = 437 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { 438 return {DRE}; 439 } 440 441 return {}; 442 } 443 }; 444 445 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as 446 /// it doesn't have any bounds checks for the array. 447 class ArraySubscriptGadget : public WarningGadget { 448 static constexpr const char *const ArraySubscrTag = "ArraySubscript"; 449 const ArraySubscriptExpr *ASE; 450 451 public: 452 ArraySubscriptGadget(const MatchFinder::MatchResult &Result) 453 : WarningGadget(Kind::ArraySubscript), 454 ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ArraySubscrTag)) {} 455 456 static bool classof(const Gadget *G) { 457 return G->getKind() == Kind::ArraySubscript; 458 } 459 460 static Matcher matcher() { 461 // FIXME: What if the index is integer literal 0? Should this be 462 // a safe gadget in this case? 463 // clang-format off 464 return stmt(arraySubscriptExpr( 465 hasBase(ignoringParenImpCasts( 466 anyOf(hasPointerType(), hasArrayType()))), 467 unless(hasIndex( 468 anyOf(integerLiteral(equals(0)), arrayInitIndexExpr()) 469 ))) 470 .bind(ArraySubscrTag)); 471 // clang-format on 472 } 473 474 const ArraySubscriptExpr *getBaseStmt() const override { return ASE; } 475 476 DeclUseList getClaimedVarUseSites() const override { 477 if (const auto *DRE = 478 dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) { 479 return {DRE}; 480 } 481 482 return {}; 483 } 484 }; 485 486 /// A pointer arithmetic expression of one of the forms: 487 /// \code 488 /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n 489 /// \endcode 490 class PointerArithmeticGadget : public WarningGadget { 491 static constexpr const char *const PointerArithmeticTag = "ptrAdd"; 492 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr"; 493 const BinaryOperator *PA; // pointer arithmetic expression 494 const Expr *Ptr; // the pointer expression in `PA` 495 496 public: 497 PointerArithmeticGadget(const MatchFinder::MatchResult &Result) 498 : WarningGadget(Kind::PointerArithmetic), 499 PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerArithmeticTag)), 500 Ptr(Result.Nodes.getNodeAs<Expr>(PointerArithmeticPointerTag)) {} 501 502 static bool classof(const Gadget *G) { 503 return G->getKind() == Kind::PointerArithmetic; 504 } 505 506 static Matcher matcher() { 507 auto HasIntegerType = anyOf(hasType(isInteger()), hasType(enumType())); 508 auto PtrAtRight = 509 allOf(hasOperatorName("+"), 510 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), 511 hasLHS(HasIntegerType)); 512 auto PtrAtLeft = 513 allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"), 514 hasOperatorName("+="), hasOperatorName("-=")), 515 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), 516 hasRHS(HasIntegerType)); 517 518 return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight)) 519 .bind(PointerArithmeticTag)); 520 } 521 522 const Stmt *getBaseStmt() const override { return PA; } 523 524 DeclUseList getClaimedVarUseSites() const override { 525 if (const auto *DRE = dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) { 526 return {DRE}; 527 } 528 529 return {}; 530 } 531 // FIXME: pointer adding zero should be fine 532 // FIXME: this gadge will need a fix-it 533 }; 534 535 /// A pointer initialization expression of the form: 536 /// \code 537 /// int *p = q; 538 /// \endcode 539 class PointerInitGadget : public FixableGadget { 540 private: 541 static constexpr const char *const PointerInitLHSTag = "ptrInitLHS"; 542 static constexpr const char *const PointerInitRHSTag = "ptrInitRHS"; 543 const VarDecl * PtrInitLHS; // the LHS pointer expression in `PI` 544 const DeclRefExpr * PtrInitRHS; // the RHS pointer expression in `PI` 545 546 public: 547 PointerInitGadget(const MatchFinder::MatchResult &Result) 548 : FixableGadget(Kind::PointerInit), 549 PtrInitLHS(Result.Nodes.getNodeAs<VarDecl>(PointerInitLHSTag)), 550 PtrInitRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerInitRHSTag)) {} 551 552 static bool classof(const Gadget *G) { 553 return G->getKind() == Kind::PointerInit; 554 } 555 556 static Matcher matcher() { 557 auto PtrInitStmt = declStmt(hasSingleDecl(varDecl( 558 hasInitializer(ignoringImpCasts(declRefExpr( 559 hasPointerType()). 560 bind(PointerInitRHSTag)))). 561 bind(PointerInitLHSTag))); 562 563 return stmt(PtrInitStmt); 564 } 565 566 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 567 568 virtual const Stmt *getBaseStmt() const override { return nullptr; } 569 570 virtual DeclUseList getClaimedVarUseSites() const override { 571 return DeclUseList{PtrInitRHS}; 572 } 573 574 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 575 getStrategyImplications() const override { 576 return std::make_pair(PtrInitLHS, 577 cast<VarDecl>(PtrInitRHS->getDecl())); 578 } 579 }; 580 581 /// A pointer assignment expression of the form: 582 /// \code 583 /// p = q; 584 /// \endcode 585 class PointerAssignmentGadget : public FixableGadget { 586 private: 587 static constexpr const char *const PointerAssignLHSTag = "ptrLHS"; 588 static constexpr const char *const PointerAssignRHSTag = "ptrRHS"; 589 const DeclRefExpr * PtrLHS; // the LHS pointer expression in `PA` 590 const DeclRefExpr * PtrRHS; // the RHS pointer expression in `PA` 591 592 public: 593 PointerAssignmentGadget(const MatchFinder::MatchResult &Result) 594 : FixableGadget(Kind::PointerAssignment), 595 PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)), 596 PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {} 597 598 static bool classof(const Gadget *G) { 599 return G->getKind() == Kind::PointerAssignment; 600 } 601 602 static Matcher matcher() { 603 auto PtrAssignExpr = binaryOperator(allOf(hasOperatorName("="), 604 hasRHS(ignoringParenImpCasts(declRefExpr(hasPointerType(), 605 to(varDecl())). 606 bind(PointerAssignRHSTag))), 607 hasLHS(declRefExpr(hasPointerType(), 608 to(varDecl())). 609 bind(PointerAssignLHSTag)))); 610 611 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr)); 612 } 613 614 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 615 616 virtual const Stmt *getBaseStmt() const override { return nullptr; } 617 618 virtual DeclUseList getClaimedVarUseSites() const override { 619 return DeclUseList{PtrLHS, PtrRHS}; 620 } 621 622 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 623 getStrategyImplications() const override { 624 return std::make_pair(cast<VarDecl>(PtrLHS->getDecl()), 625 cast<VarDecl>(PtrRHS->getDecl())); 626 } 627 }; 628 629 /// A call of a function or method that performs unchecked buffer operations 630 /// over one of its pointer parameters. 631 class UnsafeBufferUsageAttrGadget : public WarningGadget { 632 constexpr static const char *const OpTag = "call_expr"; 633 const CallExpr *Op; 634 635 public: 636 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult &Result) 637 : WarningGadget(Kind::UnsafeBufferUsageAttr), 638 Op(Result.Nodes.getNodeAs<CallExpr>(OpTag)) {} 639 640 static bool classof(const Gadget *G) { 641 return G->getKind() == Kind::UnsafeBufferUsageAttr; 642 } 643 644 static Matcher matcher() { 645 return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))) 646 .bind(OpTag)); 647 } 648 const Stmt *getBaseStmt() const override { return Op; } 649 650 DeclUseList getClaimedVarUseSites() const override { return {}; } 651 }; 652 653 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue 654 // Context (see `isInUnspecifiedLvalueContext`). 655 // Note here `[]` is the built-in subscript operator. 656 class ULCArraySubscriptGadget : public FixableGadget { 657 private: 658 static constexpr const char *const ULCArraySubscriptTag = 659 "ArraySubscriptUnderULC"; 660 const ArraySubscriptExpr *Node; 661 662 public: 663 ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result) 664 : FixableGadget(Kind::ULCArraySubscript), 665 Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) { 666 assert(Node != nullptr && "Expecting a non-null matching result"); 667 } 668 669 static bool classof(const Gadget *G) { 670 return G->getKind() == Kind::ULCArraySubscript; 671 } 672 673 static Matcher matcher() { 674 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); 675 auto BaseIsArrayOrPtrDRE = 676 hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr))); 677 auto Target = 678 arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ULCArraySubscriptTag); 679 680 return expr(isInUnspecifiedLvalueContext(Target)); 681 } 682 683 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 684 685 virtual const Stmt *getBaseStmt() const override { return Node; } 686 687 virtual DeclUseList getClaimedVarUseSites() const override { 688 if (const auto *DRE = 689 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) { 690 return {DRE}; 691 } 692 return {}; 693 } 694 }; 695 696 // Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the 697 // unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits 698 // fixit of the form `UPC(DRE.data())`. 699 class UPCStandalonePointerGadget : public FixableGadget { 700 private: 701 static constexpr const char *const DeclRefExprTag = "StandalonePointer"; 702 const DeclRefExpr *Node; 703 704 public: 705 UPCStandalonePointerGadget(const MatchFinder::MatchResult &Result) 706 : FixableGadget(Kind::UPCStandalonePointer), 707 Node(Result.Nodes.getNodeAs<DeclRefExpr>(DeclRefExprTag)) { 708 assert(Node != nullptr && "Expecting a non-null matching result"); 709 } 710 711 static bool classof(const Gadget *G) { 712 return G->getKind() == Kind::UPCStandalonePointer; 713 } 714 715 static Matcher matcher() { 716 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); 717 auto target = expr( 718 ignoringParenImpCasts(declRefExpr(allOf(ArrayOrPtr, to(varDecl()))).bind(DeclRefExprTag))); 719 return stmt(isInUnspecifiedPointerContext(target)); 720 } 721 722 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 723 724 virtual const Stmt *getBaseStmt() const override { return Node; } 725 726 virtual DeclUseList getClaimedVarUseSites() const override { 727 return {Node}; 728 } 729 }; 730 731 class PointerDereferenceGadget : public FixableGadget { 732 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; 733 static constexpr const char *const OperatorTag = "op"; 734 735 const DeclRefExpr *BaseDeclRefExpr = nullptr; 736 const UnaryOperator *Op = nullptr; 737 738 public: 739 PointerDereferenceGadget(const MatchFinder::MatchResult &Result) 740 : FixableGadget(Kind::PointerDereference), 741 BaseDeclRefExpr( 742 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)), 743 Op(Result.Nodes.getNodeAs<UnaryOperator>(OperatorTag)) {} 744 745 static bool classof(const Gadget *G) { 746 return G->getKind() == Kind::PointerDereference; 747 } 748 749 static Matcher matcher() { 750 auto Target = 751 unaryOperator( 752 hasOperatorName("*"), 753 has(expr(ignoringParenImpCasts( 754 declRefExpr(to(varDecl())).bind(BaseDeclRefExprTag))))) 755 .bind(OperatorTag); 756 757 return expr(isInUnspecifiedLvalueContext(Target)); 758 } 759 760 DeclUseList getClaimedVarUseSites() const override { 761 return {BaseDeclRefExpr}; 762 } 763 764 virtual const Stmt *getBaseStmt() const final { return Op; } 765 766 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 767 }; 768 769 // Represents expressions of the form `&DRE[any]` in the Unspecified Pointer 770 // Context (see `isInUnspecifiedPointerContext`). 771 // Note here `[]` is the built-in subscript operator. 772 class UPCAddressofArraySubscriptGadget : public FixableGadget { 773 private: 774 static constexpr const char *const UPCAddressofArraySubscriptTag = 775 "AddressofArraySubscriptUnderUPC"; 776 const UnaryOperator *Node; // the `&DRE[any]` node 777 778 public: 779 UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result) 780 : FixableGadget(Kind::ULCArraySubscript), 781 Node(Result.Nodes.getNodeAs<UnaryOperator>( 782 UPCAddressofArraySubscriptTag)) { 783 assert(Node != nullptr && "Expecting a non-null matching result"); 784 } 785 786 static bool classof(const Gadget *G) { 787 return G->getKind() == Kind::UPCAddressofArraySubscript; 788 } 789 790 static Matcher matcher() { 791 return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts( 792 unaryOperator(hasOperatorName("&"), 793 hasUnaryOperand(arraySubscriptExpr( 794 hasBase(ignoringParenImpCasts(declRefExpr()))))) 795 .bind(UPCAddressofArraySubscriptTag))))); 796 } 797 798 virtual std::optional<FixItList> getFixits(const Strategy &) const override; 799 800 virtual const Stmt *getBaseStmt() const override { return Node; } 801 802 virtual DeclUseList getClaimedVarUseSites() const override { 803 const auto *ArraySubst = cast<ArraySubscriptExpr>(Node->getSubExpr()); 804 const auto *DRE = 805 cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreImpCasts()); 806 return {DRE}; 807 } 808 }; 809 } // namespace 810 811 namespace { 812 // An auxiliary tracking facility for the fixit analysis. It helps connect 813 // declarations to its uses and make sure we've covered all uses with our 814 // analysis before we try to fix the declaration. 815 class DeclUseTracker { 816 using UseSetTy = SmallSet<const DeclRefExpr *, 16>; 817 using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>; 818 819 // Allocate on the heap for easier move. 820 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()}; 821 DefMapTy Defs{}; 822 823 public: 824 DeclUseTracker() = default; 825 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies. 826 DeclUseTracker &operator=(const DeclUseTracker &) = delete; 827 DeclUseTracker(DeclUseTracker &&) = default; 828 DeclUseTracker &operator=(DeclUseTracker &&) = default; 829 830 // Start tracking a freshly discovered DRE. 831 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); } 832 833 // Stop tracking the DRE as it's been fully figured out. 834 void claimUse(const DeclRefExpr *DRE) { 835 assert(Uses->count(DRE) && 836 "DRE not found or claimed by multiple matchers!"); 837 Uses->erase(DRE); 838 } 839 840 // A variable is unclaimed if at least one use is unclaimed. 841 bool hasUnclaimedUses(const VarDecl *VD) const { 842 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs? 843 return any_of(*Uses, [VD](const DeclRefExpr *DRE) { 844 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl(); 845 }); 846 } 847 848 void discoverDecl(const DeclStmt *DS) { 849 for (const Decl *D : DS->decls()) { 850 if (const auto *VD = dyn_cast<VarDecl>(D)) { 851 // FIXME: Assertion temporarily disabled due to a bug in 852 // ASTMatcher internal behavior in presence of GNU 853 // statement-expressions. We need to properly investigate this 854 // because it can screw up our algorithm in other ways. 855 // assert(Defs.count(VD) == 0 && "Definition already discovered!"); 856 Defs[VD] = DS; 857 } 858 } 859 } 860 861 const DeclStmt *lookupDecl(const VarDecl *VD) const { 862 auto It = Defs.find(VD); 863 assert(It != Defs.end() && "Definition never discovered!"); 864 return It->second; 865 } 866 }; 867 } // namespace 868 869 namespace { 870 // Strategy is a map from variables to the way we plan to emit fixes for 871 // these variables. It is figured out gradually by trying different fixes 872 // for different variables depending on gadgets in which these variables 873 // participate. 874 class Strategy { 875 public: 876 enum class Kind { 877 Wontfix, // We don't plan to emit a fixit for this variable. 878 Span, // We recommend replacing the variable with std::span. 879 Iterator, // We recommend replacing the variable with std::span::iterator. 880 Array, // We recommend replacing the variable with std::array. 881 Vector // We recommend replacing the variable with std::vector. 882 }; 883 884 private: 885 using MapTy = llvm::DenseMap<const VarDecl *, Kind>; 886 887 MapTy Map; 888 889 public: 890 Strategy() = default; 891 Strategy(const Strategy &) = delete; // Let's avoid copies. 892 Strategy &operator=(const Strategy &) = delete; 893 Strategy(Strategy &&) = default; 894 Strategy &operator=(Strategy &&) = default; 895 896 void set(const VarDecl *VD, Kind K) { Map[VD] = K; } 897 898 Kind lookup(const VarDecl *VD) const { 899 auto I = Map.find(VD); 900 if (I == Map.end()) 901 return Kind::Wontfix; 902 903 return I->second; 904 } 905 }; 906 } // namespace 907 908 909 // Representing a pointer type expression of the form `++Ptr` in an Unspecified 910 // Pointer Context (UPC): 911 class UPCPreIncrementGadget : public FixableGadget { 912 private: 913 static constexpr const char *const UPCPreIncrementTag = 914 "PointerPreIncrementUnderUPC"; 915 const UnaryOperator *Node; // the `++Ptr` node 916 917 public: 918 UPCPreIncrementGadget(const MatchFinder::MatchResult &Result) 919 : FixableGadget(Kind::UPCPreIncrement), 920 Node(Result.Nodes.getNodeAs<UnaryOperator>(UPCPreIncrementTag)) { 921 assert(Node != nullptr && "Expecting a non-null matching result"); 922 } 923 924 static bool classof(const Gadget *G) { 925 return G->getKind() == Kind::UPCPreIncrement; 926 } 927 928 static Matcher matcher() { 929 // Note here we match `++Ptr` for any expression `Ptr` of pointer type. 930 // Although currently we can only provide fix-its when `Ptr` is a DRE, we 931 // can have the matcher be general, so long as `getClaimedVarUseSites` does 932 // things right. 933 return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts( 934 unaryOperator(isPreInc(), 935 hasUnaryOperand(declRefExpr()) 936 ).bind(UPCPreIncrementTag))))); 937 } 938 939 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 940 941 virtual const Stmt *getBaseStmt() const override { return Node; } 942 943 virtual DeclUseList getClaimedVarUseSites() const override { 944 return {dyn_cast<DeclRefExpr>(Node->getSubExpr())}; 945 } 946 }; 947 948 // Representing a fixable expression of the form `*(ptr + 123)` or `*(123 + 949 // ptr)`: 950 class DerefSimplePtrArithFixableGadget : public FixableGadget { 951 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; 952 static constexpr const char *const DerefOpTag = "DerefOp"; 953 static constexpr const char *const AddOpTag = "AddOp"; 954 static constexpr const char *const OffsetTag = "Offset"; 955 956 const DeclRefExpr *BaseDeclRefExpr = nullptr; 957 const UnaryOperator *DerefOp = nullptr; 958 const BinaryOperator *AddOp = nullptr; 959 const IntegerLiteral *Offset = nullptr; 960 961 public: 962 DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult &Result) 963 : FixableGadget(Kind::DerefSimplePtrArithFixable), 964 BaseDeclRefExpr( 965 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)), 966 DerefOp(Result.Nodes.getNodeAs<UnaryOperator>(DerefOpTag)), 967 AddOp(Result.Nodes.getNodeAs<BinaryOperator>(AddOpTag)), 968 Offset(Result.Nodes.getNodeAs<IntegerLiteral>(OffsetTag)) {} 969 970 static Matcher matcher() { 971 // clang-format off 972 auto ThePtr = expr(hasPointerType(), 973 ignoringImpCasts(declRefExpr(to(varDecl())).bind(BaseDeclRefExprTag))); 974 auto PlusOverPtrAndInteger = expr(anyOf( 975 binaryOperator(hasOperatorName("+"), hasLHS(ThePtr), 976 hasRHS(integerLiteral().bind(OffsetTag))) 977 .bind(AddOpTag), 978 binaryOperator(hasOperatorName("+"), hasRHS(ThePtr), 979 hasLHS(integerLiteral().bind(OffsetTag))) 980 .bind(AddOpTag))); 981 return isInUnspecifiedLvalueContext(unaryOperator( 982 hasOperatorName("*"), 983 hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger))) 984 .bind(DerefOpTag)); 985 // clang-format on 986 } 987 988 virtual std::optional<FixItList> getFixits(const Strategy &s) const final; 989 990 // TODO remove this method from FixableGadget interface 991 virtual const Stmt *getBaseStmt() const final { return nullptr; } 992 993 virtual DeclUseList getClaimedVarUseSites() const final { 994 return {BaseDeclRefExpr}; 995 } 996 }; 997 998 /// Scan the function and return a list of gadgets found with provided kits. 999 static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker> 1000 findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, 1001 bool EmitSuggestions) { 1002 1003 struct GadgetFinderCallback : MatchFinder::MatchCallback { 1004 FixableGadgetList FixableGadgets; 1005 WarningGadgetList WarningGadgets; 1006 DeclUseTracker Tracker; 1007 1008 void run(const MatchFinder::MatchResult &Result) override { 1009 // In debug mode, assert that we've found exactly one gadget. 1010 // This helps us avoid conflicts in .bind() tags. 1011 #if NDEBUG 1012 #define NEXT return 1013 #else 1014 [[maybe_unused]] int numFound = 0; 1015 #define NEXT ++numFound 1016 #endif 1017 1018 if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) { 1019 Tracker.discoverUse(DRE); 1020 NEXT; 1021 } 1022 1023 if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) { 1024 Tracker.discoverDecl(DS); 1025 NEXT; 1026 } 1027 1028 // Figure out which matcher we've found, and call the appropriate 1029 // subclass constructor. 1030 // FIXME: Can we do this more logarithmically? 1031 #define FIXABLE_GADGET(name) \ 1032 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ 1033 FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \ 1034 NEXT; \ 1035 } 1036 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1037 #define WARNING_GADGET(name) \ 1038 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ 1039 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \ 1040 NEXT; \ 1041 } 1042 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1043 1044 assert(numFound >= 1 && "Gadgets not found in match result!"); 1045 assert(numFound <= 1 && "Conflicting bind tags in gadgets!"); 1046 } 1047 }; 1048 1049 MatchFinder M; 1050 GadgetFinderCallback CB; 1051 1052 // clang-format off 1053 M.addMatcher( 1054 stmt( 1055 forEachDescendantEvaluatedStmt(stmt(anyOf( 1056 // Add Gadget::matcher() for every gadget in the registry. 1057 #define WARNING_GADGET(x) \ 1058 allOf(x ## Gadget::matcher().bind(#x), \ 1059 notInSafeBufferOptOut(&Handler)), 1060 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1061 // Avoid a hanging comma. 1062 unless(stmt()) 1063 ))) 1064 ), 1065 &CB 1066 ); 1067 // clang-format on 1068 1069 if (EmitSuggestions) { 1070 // clang-format off 1071 M.addMatcher( 1072 stmt( 1073 forEachDescendantStmt(stmt(eachOf( 1074 #define FIXABLE_GADGET(x) \ 1075 x ## Gadget::matcher().bind(#x), 1076 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1077 // In parallel, match all DeclRefExprs so that to find out 1078 // whether there are any uncovered by gadgets. 1079 declRefExpr(anyOf(hasPointerType(), hasArrayType()), 1080 to(varDecl())).bind("any_dre"), 1081 // Also match DeclStmts because we'll need them when fixing 1082 // their underlying VarDecls that otherwise don't have 1083 // any backreferences to DeclStmts. 1084 declStmt().bind("any_ds") 1085 ))) 1086 ), 1087 &CB 1088 ); 1089 // clang-format on 1090 } 1091 1092 M.match(*D->getBody(), D->getASTContext()); 1093 1094 // Gadgets "claim" variables they're responsible for. Once this loop finishes, 1095 // the tracker will only track DREs that weren't claimed by any gadgets, 1096 // i.e. not understood by the analysis. 1097 for (const auto &G : CB.FixableGadgets) { 1098 for (const auto *DRE : G->getClaimedVarUseSites()) { 1099 CB.Tracker.claimUse(DRE); 1100 } 1101 } 1102 1103 return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets), 1104 std::move(CB.Tracker)}; 1105 } 1106 1107 // Compares AST nodes by source locations. 1108 template <typename NodeTy> struct CompareNode { 1109 bool operator()(const NodeTy *N1, const NodeTy *N2) const { 1110 return N1->getBeginLoc().getRawEncoding() < 1111 N2->getBeginLoc().getRawEncoding(); 1112 } 1113 }; 1114 1115 struct WarningGadgetSets { 1116 std::map<const VarDecl *, std::set<const WarningGadget *>, 1117 // To keep keys sorted by their locations in the map so that the 1118 // order is deterministic: 1119 CompareNode<VarDecl>> 1120 byVar; 1121 // These Gadgets are not related to pointer variables (e. g. temporaries). 1122 llvm::SmallVector<const WarningGadget *, 16> noVar; 1123 }; 1124 1125 static WarningGadgetSets 1126 groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) { 1127 WarningGadgetSets result; 1128 // If some gadgets cover more than one 1129 // variable, they'll appear more than once in the map. 1130 for (auto &G : AllUnsafeOperations) { 1131 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites(); 1132 1133 bool AssociatedWithVarDecl = false; 1134 for (const DeclRefExpr *DRE : ClaimedVarUseSites) { 1135 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 1136 result.byVar[VD].insert(G.get()); 1137 AssociatedWithVarDecl = true; 1138 } 1139 } 1140 1141 if (!AssociatedWithVarDecl) { 1142 result.noVar.push_back(G.get()); 1143 continue; 1144 } 1145 } 1146 return result; 1147 } 1148 1149 struct FixableGadgetSets { 1150 std::map<const VarDecl *, std::set<const FixableGadget *>> byVar; 1151 }; 1152 1153 static FixableGadgetSets 1154 groupFixablesByVar(FixableGadgetList &&AllFixableOperations) { 1155 FixableGadgetSets FixablesForUnsafeVars; 1156 for (auto &F : AllFixableOperations) { 1157 DeclUseList DREs = F->getClaimedVarUseSites(); 1158 1159 for (const DeclRefExpr *DRE : DREs) { 1160 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 1161 FixablesForUnsafeVars.byVar[VD].insert(F.get()); 1162 } 1163 } 1164 } 1165 return FixablesForUnsafeVars; 1166 } 1167 1168 bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts, 1169 const SourceManager &SM) { 1170 // A simple interval overlap detection algorithm. Sorts all ranges by their 1171 // begin location then finds the first overlap in one pass. 1172 std::vector<const FixItHint *> All; // a copy of `FixIts` 1173 1174 for (const FixItHint &H : FixIts) 1175 All.push_back(&H); 1176 std::sort(All.begin(), All.end(), 1177 [&SM](const FixItHint *H1, const FixItHint *H2) { 1178 return SM.isBeforeInTranslationUnit(H1->RemoveRange.getBegin(), 1179 H2->RemoveRange.getBegin()); 1180 }); 1181 1182 const FixItHint *CurrHint = nullptr; 1183 1184 for (const FixItHint *Hint : All) { 1185 if (!CurrHint || 1186 SM.isBeforeInTranslationUnit(CurrHint->RemoveRange.getEnd(), 1187 Hint->RemoveRange.getBegin())) { 1188 // Either to initialize `CurrHint` or `CurrHint` does not 1189 // overlap with `Hint`: 1190 CurrHint = Hint; 1191 } else 1192 // In case `Hint` overlaps the `CurrHint`, we found at least one 1193 // conflict: 1194 return true; 1195 } 1196 return false; 1197 } 1198 1199 std::optional<FixItList> 1200 PointerAssignmentGadget::getFixits(const Strategy &S) const { 1201 const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl()); 1202 const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl()); 1203 switch (S.lookup(LeftVD)) { 1204 case Strategy::Kind::Span: 1205 if (S.lookup(RightVD) == Strategy::Kind::Span) 1206 return FixItList{}; 1207 return std::nullopt; 1208 case Strategy::Kind::Wontfix: 1209 return std::nullopt; 1210 case Strategy::Kind::Iterator: 1211 case Strategy::Kind::Array: 1212 case Strategy::Kind::Vector: 1213 llvm_unreachable("unsupported strategies for FixableGadgets"); 1214 } 1215 return std::nullopt; 1216 } 1217 1218 std::optional<FixItList> 1219 PointerInitGadget::getFixits(const Strategy &S) const { 1220 const auto *LeftVD = PtrInitLHS; 1221 const auto *RightVD = cast<VarDecl>(PtrInitRHS->getDecl()); 1222 switch (S.lookup(LeftVD)) { 1223 case Strategy::Kind::Span: 1224 if (S.lookup(RightVD) == Strategy::Kind::Span) 1225 return FixItList{}; 1226 return std::nullopt; 1227 case Strategy::Kind::Wontfix: 1228 return std::nullopt; 1229 case Strategy::Kind::Iterator: 1230 case Strategy::Kind::Array: 1231 case Strategy::Kind::Vector: 1232 llvm_unreachable("unsupported strategies for FixableGadgets"); 1233 } 1234 return std::nullopt; 1235 } 1236 1237 std::optional<FixItList> 1238 ULCArraySubscriptGadget::getFixits(const Strategy &S) const { 1239 if (const auto *DRE = 1240 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) 1241 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 1242 switch (S.lookup(VD)) { 1243 case Strategy::Kind::Span: { 1244 // If the index has a negative constant value, we give up as no valid 1245 // fix-it can be generated: 1246 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in! 1247 VD->getASTContext(); 1248 if (auto ConstVal = Node->getIdx()->getIntegerConstantExpr(Ctx)) { 1249 if (ConstVal->isNegative()) 1250 return std::nullopt; 1251 } else if (!Node->getIdx()->getType()->isUnsignedIntegerType()) 1252 return std::nullopt; 1253 // no-op is a good fix-it, otherwise 1254 return FixItList{}; 1255 } 1256 case Strategy::Kind::Wontfix: 1257 case Strategy::Kind::Iterator: 1258 case Strategy::Kind::Array: 1259 case Strategy::Kind::Vector: 1260 llvm_unreachable("unsupported strategies for FixableGadgets"); 1261 } 1262 } 1263 return std::nullopt; 1264 } 1265 1266 static std::optional<FixItList> // forward declaration 1267 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node); 1268 1269 std::optional<FixItList> 1270 UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const { 1271 auto DREs = getClaimedVarUseSites(); 1272 const auto *VD = cast<VarDecl>(DREs.front()->getDecl()); 1273 1274 switch (S.lookup(VD)) { 1275 case Strategy::Kind::Span: 1276 return fixUPCAddressofArraySubscriptWithSpan(Node); 1277 case Strategy::Kind::Wontfix: 1278 case Strategy::Kind::Iterator: 1279 case Strategy::Kind::Array: 1280 case Strategy::Kind::Vector: 1281 llvm_unreachable("unsupported strategies for FixableGadgets"); 1282 } 1283 return std::nullopt; // something went wrong, no fix-it 1284 } 1285 1286 // FIXME: this function should be customizable through format 1287 static StringRef getEndOfLine() { 1288 static const char *const EOL = "\n"; 1289 return EOL; 1290 } 1291 1292 // Returns the text indicating that the user needs to provide input there: 1293 std::string getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") { 1294 std::string s = std::string("<# "); 1295 s += HintTextToUser; 1296 s += " #>"; 1297 return s; 1298 } 1299 1300 // Return the text representation of the given `APInt Val`: 1301 static std::string getAPIntText(APInt Val) { 1302 SmallVector<char> Txt; 1303 Val.toString(Txt, 10, true); 1304 // APInt::toString does not add '\0' to the end of the string for us: 1305 Txt.push_back('\0'); 1306 return Txt.data(); 1307 } 1308 1309 // Return the source location of the last character of the AST `Node`. 1310 template <typename NodeTy> 1311 static std::optional<SourceLocation> 1312 getEndCharLoc(const NodeTy *Node, const SourceManager &SM, 1313 const LangOptions &LangOpts) { 1314 unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts); 1315 SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1); 1316 1317 if (Loc.isValid()) 1318 return Loc; 1319 1320 return std::nullopt; 1321 } 1322 1323 // Return the source location just past the last character of the AST `Node`. 1324 template <typename NodeTy> 1325 static std::optional<SourceLocation> getPastLoc(const NodeTy *Node, 1326 const SourceManager &SM, 1327 const LangOptions &LangOpts) { 1328 SourceLocation Loc = 1329 Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts); 1330 1331 if (Loc.isValid()) 1332 return Loc; 1333 1334 return std::nullopt; 1335 } 1336 1337 // Return text representation of an `Expr`. 1338 static std::optional<StringRef> getExprText(const Expr *E, 1339 const SourceManager &SM, 1340 const LangOptions &LangOpts) { 1341 std::optional<SourceLocation> LastCharLoc = getPastLoc(E, SM, LangOpts); 1342 1343 if (LastCharLoc) 1344 return Lexer::getSourceText( 1345 CharSourceRange::getCharRange(E->getBeginLoc(), *LastCharLoc), SM, 1346 LangOpts); 1347 1348 return std::nullopt; 1349 } 1350 1351 // Returns the literal text in `SourceRange SR`, if `SR` is a valid range. 1352 static std::optional<StringRef> getRangeText(SourceRange SR, 1353 const SourceManager &SM, 1354 const LangOptions &LangOpts) { 1355 bool Invalid = false; 1356 CharSourceRange CSR = CharSourceRange::getCharRange(SR.getBegin(), SR.getEnd()); 1357 StringRef Text = Lexer::getSourceText(CSR, SM, LangOpts, &Invalid); 1358 1359 if (!Invalid) 1360 return Text; 1361 return std::nullopt; 1362 } 1363 1364 // Returns the text of the pointee type of `T` from a `VarDecl` of a pointer 1365 // type. The text is obtained through from `TypeLoc`s. Since `TypeLoc` does not 1366 // have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me 1367 // :( ), `Qualifiers` of the pointee type is returned separately through the 1368 // output parameter `QualifiersToAppend`. 1369 static std::optional<std::string> 1370 getPointeeTypeText(const ParmVarDecl *VD, const SourceManager &SM, 1371 const LangOptions &LangOpts, 1372 std::optional<Qualifiers> *QualifiersToAppend) { 1373 QualType Ty = VD->getType(); 1374 QualType PteTy; 1375 1376 assert(Ty->isPointerType() && !Ty->isFunctionPointerType() && 1377 "Expecting a VarDecl of type of pointer to object type"); 1378 PteTy = Ty->getPointeeType(); 1379 if (PteTy->hasUnnamedOrLocalType()) 1380 // If the pointee type is unnamed, we can't refer to it 1381 return std::nullopt; 1382 1383 TypeLoc TyLoc = VD->getTypeSourceInfo()->getTypeLoc(); 1384 TypeLoc PteTyLoc = TyLoc.getUnqualifiedLoc().getNextTypeLoc(); 1385 SourceLocation VDNameStartLoc = VD->getLocation(); 1386 1387 if (!(VDNameStartLoc.isValid() && PteTyLoc.getSourceRange().isValid())) { 1388 // We are expecting these locations to be valid. But in some cases, they are 1389 // not all valid. It is a Clang bug to me and we are not responsible for 1390 // fixing it. So we will just give up for now when it happens. 1391 return std::nullopt; 1392 } 1393 1394 // Note that TypeLoc.getEndLoc() returns the begin location of the last token: 1395 SourceLocation PteEndOfTokenLoc = 1396 Lexer::getLocForEndOfToken(PteTyLoc.getEndLoc(), 0, SM, LangOpts); 1397 1398 if (!SM.isBeforeInTranslationUnit(PteEndOfTokenLoc, VDNameStartLoc)) { 1399 // We only deal with the cases where the source text of the pointee type 1400 // appears on the left-hand side of the variable identifier completely, 1401 // including the following forms: 1402 // `T ident`, 1403 // `T ident[]`, where `T` is any type. 1404 // Examples of excluded cases are `T (*ident)[]` or `T ident[][n]`. 1405 return std::nullopt; 1406 } 1407 if (PteTy.hasQualifiers()) { 1408 // TypeLoc does not provide source ranges for qualifiers (it says it's 1409 // intentional but seems fishy to me), so we cannot get the full text 1410 // `PteTy` via source ranges. 1411 *QualifiersToAppend = PteTy.getQualifiers(); 1412 } 1413 return getRangeText({PteTyLoc.getBeginLoc(), PteEndOfTokenLoc}, SM, LangOpts) 1414 ->str(); 1415 } 1416 1417 // Returns the text of the name (with qualifiers) of a `FunctionDecl`. 1418 static std::optional<StringRef> getFunNameText(const FunctionDecl *FD, 1419 const SourceManager &SM, 1420 const LangOptions &LangOpts) { 1421 SourceLocation BeginLoc = FD->getQualifier() 1422 ? FD->getQualifierLoc().getBeginLoc() 1423 : FD->getNameInfo().getBeginLoc(); 1424 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the 1425 // last token: 1426 SourceLocation EndLoc = Lexer::getLocForEndOfToken( 1427 FD->getNameInfo().getEndLoc(), 0, SM, LangOpts); 1428 SourceRange NameRange{BeginLoc, EndLoc}; 1429 1430 return getRangeText(NameRange, SM, LangOpts); 1431 } 1432 1433 std::optional<FixItList> 1434 DerefSimplePtrArithFixableGadget::getFixits(const Strategy &s) const { 1435 const VarDecl *VD = dyn_cast<VarDecl>(BaseDeclRefExpr->getDecl()); 1436 1437 if (VD && s.lookup(VD) == Strategy::Kind::Span) { 1438 ASTContext &Ctx = VD->getASTContext(); 1439 // std::span can't represent elements before its begin() 1440 if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx)) 1441 if (ConstVal->isNegative()) 1442 return std::nullopt; 1443 1444 // note that the expr may (oddly) has multiple layers of parens 1445 // example: 1446 // *((..(pointer + 123)..)) 1447 // goal: 1448 // pointer[123] 1449 // Fix-It: 1450 // remove '*(' 1451 // replace ' + ' with '[' 1452 // replace ')' with ']' 1453 1454 // example: 1455 // *((..(123 + pointer)..)) 1456 // goal: 1457 // 123[pointer] 1458 // Fix-It: 1459 // remove '*(' 1460 // replace ' + ' with '[' 1461 // replace ')' with ']' 1462 1463 const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS(); 1464 const SourceManager &SM = Ctx.getSourceManager(); 1465 const LangOptions &LangOpts = Ctx.getLangOpts(); 1466 CharSourceRange StarWithTrailWhitespace = 1467 clang::CharSourceRange::getCharRange(DerefOp->getOperatorLoc(), 1468 LHS->getBeginLoc()); 1469 1470 std::optional<SourceLocation> LHSLocation = getPastLoc(LHS, SM, LangOpts); 1471 if (!LHSLocation) 1472 return std::nullopt; 1473 1474 CharSourceRange PlusWithSurroundingWhitespace = 1475 clang::CharSourceRange::getCharRange(*LHSLocation, RHS->getBeginLoc()); 1476 1477 std::optional<SourceLocation> AddOpLocation = 1478 getPastLoc(AddOp, SM, LangOpts); 1479 std::optional<SourceLocation> DerefOpLocation = 1480 getPastLoc(DerefOp, SM, LangOpts); 1481 1482 if (!AddOpLocation || !DerefOpLocation) 1483 return std::nullopt; 1484 1485 CharSourceRange ClosingParenWithPrecWhitespace = 1486 clang::CharSourceRange::getCharRange(*AddOpLocation, *DerefOpLocation); 1487 1488 return FixItList{ 1489 {FixItHint::CreateRemoval(StarWithTrailWhitespace), 1490 FixItHint::CreateReplacement(PlusWithSurroundingWhitespace, "["), 1491 FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace, "]")}}; 1492 } 1493 return std::nullopt; // something wrong or unsupported, give up 1494 } 1495 1496 std::optional<FixItList> 1497 PointerDereferenceGadget::getFixits(const Strategy &S) const { 1498 const VarDecl *VD = cast<VarDecl>(BaseDeclRefExpr->getDecl()); 1499 switch (S.lookup(VD)) { 1500 case Strategy::Kind::Span: { 1501 ASTContext &Ctx = VD->getASTContext(); 1502 SourceManager &SM = Ctx.getSourceManager(); 1503 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0] 1504 // Deletes the *operand 1505 CharSourceRange derefRange = clang::CharSourceRange::getCharRange( 1506 Op->getBeginLoc(), Op->getBeginLoc().getLocWithOffset(1)); 1507 // Inserts the [0] 1508 std::optional<SourceLocation> EndOfOperand = 1509 getEndCharLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts()); 1510 if (EndOfOperand) { 1511 return FixItList{{FixItHint::CreateRemoval(derefRange), 1512 FixItHint::CreateInsertion( 1513 (*EndOfOperand).getLocWithOffset(1), "[0]")}}; 1514 } 1515 } 1516 [[fallthrough]]; 1517 case Strategy::Kind::Iterator: 1518 case Strategy::Kind::Array: 1519 case Strategy::Kind::Vector: 1520 llvm_unreachable("Strategy not implemented yet!"); 1521 case Strategy::Kind::Wontfix: 1522 llvm_unreachable("Invalid strategy!"); 1523 } 1524 1525 return std::nullopt; 1526 } 1527 1528 // Generates fix-its replacing an expression of the form UPC(DRE) with 1529 // `DRE.data()` 1530 std::optional<FixItList> UPCStandalonePointerGadget::getFixits(const Strategy &S) 1531 const { 1532 const auto VD = cast<VarDecl>(Node->getDecl()); 1533 switch (S.lookup(VD)) { 1534 case Strategy::Kind::Span: { 1535 ASTContext &Ctx = VD->getASTContext(); 1536 SourceManager &SM = Ctx.getSourceManager(); 1537 // Inserts the .data() after the DRE 1538 std::optional<SourceLocation> EndOfOperand = 1539 getPastLoc(Node, SM, Ctx.getLangOpts()); 1540 1541 if (EndOfOperand) 1542 return FixItList{{FixItHint::CreateInsertion( 1543 *EndOfOperand, ".data()")}}; 1544 } 1545 [[fallthrough]]; 1546 case Strategy::Kind::Wontfix: 1547 case Strategy::Kind::Iterator: 1548 case Strategy::Kind::Array: 1549 case Strategy::Kind::Vector: 1550 llvm_unreachable("unsupported strategies for FixableGadgets"); 1551 } 1552 1553 return std::nullopt; 1554 } 1555 1556 // Generates fix-its replacing an expression of the form `&DRE[e]` with 1557 // `&DRE.data()[e]`: 1558 static std::optional<FixItList> 1559 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) { 1560 const auto *ArraySub = cast<ArraySubscriptExpr>(Node->getSubExpr()); 1561 const auto *DRE = cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts()); 1562 // FIXME: this `getASTContext` call is costly, we should pass the 1563 // ASTContext in: 1564 const ASTContext &Ctx = DRE->getDecl()->getASTContext(); 1565 const Expr *Idx = ArraySub->getIdx(); 1566 const SourceManager &SM = Ctx.getSourceManager(); 1567 const LangOptions &LangOpts = Ctx.getLangOpts(); 1568 std::stringstream SS; 1569 bool IdxIsLitZero = false; 1570 1571 if (auto ICE = Idx->getIntegerConstantExpr(Ctx)) 1572 if ((*ICE).isZero()) 1573 IdxIsLitZero = true; 1574 std::optional<StringRef> DreString = getExprText(DRE, SM, LangOpts); 1575 if (!DreString) 1576 return std::nullopt; 1577 1578 if (IdxIsLitZero) { 1579 // If the index is literal zero, we produce the most concise fix-it: 1580 SS << (*DreString).str() << ".data()"; 1581 } else { 1582 std::optional<StringRef> IndexString = getExprText(Idx, SM, LangOpts); 1583 if (!IndexString) 1584 return std::nullopt; 1585 1586 SS << "&" << (*DreString).str() << ".data()" 1587 << "[" << (*IndexString).str() << "]"; 1588 } 1589 return FixItList{ 1590 FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())}; 1591 } 1592 1593 1594 std::optional<FixItList> UPCPreIncrementGadget::getFixits(const Strategy &S) const { 1595 DeclUseList DREs = getClaimedVarUseSites(); 1596 1597 if (DREs.size() != 1) 1598 return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we 1599 // give up 1600 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) { 1601 if (S.lookup(VD) == Strategy::Kind::Span) { 1602 FixItList Fixes; 1603 std::stringstream SS; 1604 const Stmt *PreIncNode = getBaseStmt(); 1605 StringRef varName = VD->getName(); 1606 const ASTContext &Ctx = VD->getASTContext(); 1607 1608 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()): 1609 SS << "(" << varName.data() << " = " << varName.data() 1610 << ".subspan(1)).data()"; 1611 std::optional<SourceLocation> PreIncLocation = 1612 getEndCharLoc(PreIncNode, Ctx.getSourceManager(), Ctx.getLangOpts()); 1613 if (!PreIncLocation) 1614 return std::nullopt; 1615 1616 Fixes.push_back(FixItHint::CreateReplacement( 1617 SourceRange(PreIncNode->getBeginLoc(), *PreIncLocation), SS.str())); 1618 return Fixes; 1619 } 1620 } 1621 return std::nullopt; // Not in the cases that we can handle for now, give up. 1622 } 1623 1624 1625 // For a non-null initializer `Init` of `T *` type, this function returns 1626 // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it 1627 // to output stream. 1628 // In many cases, this function cannot figure out the actual extent `S`. It 1629 // then will use a place holder to replace `S` to ask users to fill `S` in. The 1630 // initializer shall be used to initialize a variable of type `std::span<T>`. 1631 // 1632 // FIXME: Support multi-level pointers 1633 // 1634 // Parameters: 1635 // `Init` a pointer to the initializer expression 1636 // `Ctx` a reference to the ASTContext 1637 static FixItList 1638 populateInitializerFixItWithSpan(const Expr *Init, ASTContext &Ctx, 1639 const StringRef UserFillPlaceHolder) { 1640 const SourceManager &SM = Ctx.getSourceManager(); 1641 const LangOptions &LangOpts = Ctx.getLangOpts(); 1642 1643 // If `Init` has a constant value that is (or equivalent to) a 1644 // NULL pointer, we use the default constructor to initialize the span 1645 // object, i.e., a `std:span` variable declaration with no initializer. 1646 // So the fix-it is just to remove the initializer. 1647 if (Init->isNullPointerConstant(Ctx, 1648 // FIXME: Why does this function not ask for `const ASTContext 1649 // &`? It should. Maybe worth an NFC patch later. 1650 Expr::NullPointerConstantValueDependence:: 1651 NPC_ValueDependentIsNotNull)) { 1652 std::optional<SourceLocation> InitLocation = 1653 getEndCharLoc(Init, SM, LangOpts); 1654 if (!InitLocation) 1655 return {}; 1656 1657 SourceRange SR(Init->getBeginLoc(), *InitLocation); 1658 1659 return {FixItHint::CreateRemoval(SR)}; 1660 } 1661 1662 FixItList FixIts{}; 1663 std::string ExtentText = UserFillPlaceHolder.data(); 1664 StringRef One = "1"; 1665 1666 // Insert `{` before `Init`: 1667 FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{")); 1668 // Try to get the data extent. Break into different cases: 1669 if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) { 1670 // In cases `Init` is `new T[n]` and there is no explicit cast over 1671 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects 1672 // of `T`. So the extent is `n` unless `n` has side effects. Similar but 1673 // simpler for the case where `Init` is `new T`. 1674 if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) { 1675 if (!Ext->HasSideEffects(Ctx)) { 1676 std::optional<StringRef> ExtentString = getExprText(Ext, SM, LangOpts); 1677 if (!ExtentString) 1678 return {}; 1679 ExtentText = *ExtentString; 1680 } 1681 } else if (!CxxNew->isArray()) 1682 // Although the initializer is not allocating a buffer, the pointer 1683 // variable could still be used in buffer access operations. 1684 ExtentText = One; 1685 } else if (const auto *CArrTy = Ctx.getAsConstantArrayType( 1686 Init->IgnoreImpCasts()->getType())) { 1687 // In cases `Init` is of an array type after stripping off implicit casts, 1688 // the extent is the array size. Note that if the array size is not a 1689 // constant, we cannot use it as the extent. 1690 ExtentText = getAPIntText(CArrTy->getSize()); 1691 } else { 1692 // In cases `Init` is of the form `&Var` after stripping of implicit 1693 // casts, where `&` is the built-in operator, the extent is 1. 1694 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts())) 1695 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf && 1696 isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr())) 1697 ExtentText = One; 1698 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`, 1699 // and explicit casting, etc. etc. 1700 } 1701 1702 SmallString<32> StrBuffer{}; 1703 std::optional<SourceLocation> LocPassInit = getPastLoc(Init, SM, LangOpts); 1704 1705 if (!LocPassInit) 1706 return {}; 1707 1708 StrBuffer.append(", "); 1709 StrBuffer.append(ExtentText); 1710 StrBuffer.append("}"); 1711 FixIts.push_back(FixItHint::CreateInsertion(*LocPassInit, StrBuffer.str())); 1712 return FixIts; 1713 } 1714 1715 // For a `VarDecl` of the form `T * var (= Init)?`, this 1716 // function generates a fix-it for the declaration, which re-declares `var` to 1717 // be of `span<T>` type and transforms the initializer, if present, to a span 1718 // constructor---`span<T> var {Init, Extent}`, where `Extent` may need the user 1719 // to fill in. 1720 // 1721 // FIXME: support Multi-level pointers 1722 // 1723 // Parameters: 1724 // `D` a pointer the variable declaration node 1725 // `Ctx` a reference to the ASTContext 1726 // Returns: 1727 // the generated fix-it 1728 static FixItList fixVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx, 1729 const StringRef UserFillPlaceHolder) { 1730 const QualType &SpanEltT = D->getType()->getPointeeType(); 1731 assert(!SpanEltT.isNull() && "Trying to fix a non-pointer type variable!"); 1732 1733 FixItList FixIts{}; 1734 std::optional<SourceLocation> 1735 ReplacementLastLoc; // the inclusive end location of the replacement 1736 const SourceManager &SM = Ctx.getSourceManager(); 1737 1738 if (const Expr *Init = D->getInit()) { 1739 FixItList InitFixIts = 1740 populateInitializerFixItWithSpan(Init, Ctx, UserFillPlaceHolder); 1741 1742 if (InitFixIts.empty()) 1743 return {}; 1744 1745 // The loc right before the initializer: 1746 ReplacementLastLoc = Init->getBeginLoc().getLocWithOffset(-1); 1747 FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts.begin()), 1748 std::make_move_iterator(InitFixIts.end())); 1749 } else 1750 ReplacementLastLoc = getEndCharLoc(D, SM, Ctx.getLangOpts()); 1751 1752 // Producing fix-it for variable declaration---make `D` to be of span type: 1753 SmallString<32> Replacement; 1754 raw_svector_ostream OS(Replacement); 1755 1756 OS << "std::span<" << SpanEltT.getAsString() << "> " << D->getName(); 1757 1758 if (!ReplacementLastLoc) 1759 return {}; 1760 1761 FixIts.push_back(FixItHint::CreateReplacement( 1762 SourceRange{D->getBeginLoc(), *ReplacementLastLoc}, OS.str())); 1763 return FixIts; 1764 } 1765 1766 static bool hasConflictingOverload(const FunctionDecl *FD) { 1767 return !FD->getDeclContext()->lookup(FD->getDeclName()).isSingleResult(); 1768 } 1769 1770 // For a `FunDecl`, one of whose `ParmVarDecl`s is being changed to have a new 1771 // type, this function produces fix-its to make the change self-contained. Let 1772 // 'F' be the entity defined by the original `FunDecl` and "NewF" be the entity 1773 // defined by the `FunDecl` after the change to the parameter. Fix-its produced 1774 // by this function are 1775 // 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration 1776 // of 'F'; 1777 // 2. Create a declaration of "NewF" next to each declaration of `F`; 1778 // 3. Create a definition of "F" (as its' original definition is now belongs 1779 // to "NewF") next to its original definition. The body of the creating 1780 // definition calls to "NewF". 1781 // 1782 // Example: 1783 // 1784 // void f(int *p); // original declaration 1785 // void f(int *p) { // original definition 1786 // p[5]; 1787 // } 1788 // 1789 // To change the parameter `p` to be of `std::span<int>` type, we 1790 // also add overloads: 1791 // 1792 // [[clang::unsafe_buffer_usage]] void f(int *p); // original decl 1793 // void f(std::span<int> p); // added overload decl 1794 // void f(std::span<int> p) { // original def where param is changed 1795 // p[5]; 1796 // } 1797 // [[clang::unsafe_buffer_usage]] void f(int *p) { // added def 1798 // return f(std::span(p, <# size #>)); 1799 // } 1800 // 1801 // The actual fix-its may contain more details, e.g., the attribute may be guard 1802 // by a macro 1803 // #if __has_cpp_attribute(clang::unsafe_buffer_usage) 1804 // [[clang::unsafe_buffer_usage]] 1805 // #endif 1806 // 1807 // `NewTypeText` is the string representation of the new type, to which the 1808 // parameter indexed by `ParmIdx` is being changed. 1809 static std::optional<FixItList> 1810 createOverloadsForFixedParams(unsigned ParmIdx, StringRef NewTyText, 1811 const FunctionDecl *FD, const ASTContext &Ctx, 1812 UnsafeBufferUsageHandler &Handler) { 1813 // FIXME: need to make this conflict checking better: 1814 if (hasConflictingOverload(FD)) 1815 return std::nullopt; 1816 1817 const SourceManager &SM = Ctx.getSourceManager(); 1818 const LangOptions &LangOpts = Ctx.getLangOpts(); 1819 // FIXME Respect indentation of the original code. 1820 1821 // A lambda that creates the text representation of a function declaration 1822 // with the new type signature: 1823 const auto NewOverloadSignatureCreator = 1824 [&SM, &LangOpts](const FunctionDecl *FD, unsigned ParmIdx, 1825 StringRef NewTypeText) -> std::optional<std::string> { 1826 std::stringstream SS; 1827 1828 SS << ";"; 1829 SS << getEndOfLine().str(); 1830 // Append: ret-type func-name "(" 1831 if (auto Prefix = getRangeText( 1832 SourceRange(FD->getBeginLoc(), (*FD->param_begin())->getBeginLoc()), 1833 SM, LangOpts)) 1834 SS << Prefix->str(); 1835 else 1836 return std::nullopt; // give up 1837 // Append: parameter-type-list 1838 const unsigned NumParms = FD->getNumParams(); 1839 1840 for (unsigned i = 0; i < NumParms; i++) { 1841 const ParmVarDecl *Parm = FD->getParamDecl(i); 1842 1843 if (Parm->isImplicit()) 1844 continue; 1845 if (i == ParmIdx) { 1846 SS << NewTypeText.str(); 1847 // print parameter name if provided: 1848 if (IdentifierInfo * II = Parm->getIdentifier()) 1849 SS << " " << II->getName().str(); 1850 } else if (auto ParmTypeText = 1851 getRangeText(Parm->getSourceRange(), SM, LangOpts)) { 1852 // print the whole `Parm` without modification: 1853 SS << ParmTypeText->str(); 1854 } else 1855 return std::nullopt; // something wrong, give up 1856 if (i != NumParms - 1) 1857 SS << ", "; 1858 } 1859 SS << ")"; 1860 return SS.str(); 1861 }; 1862 1863 // A lambda that creates the text representation of a function definition with 1864 // the original signature: 1865 const auto OldOverloadDefCreator = 1866 [&SM, &Handler, 1867 &LangOpts](const FunctionDecl *FD, unsigned ParmIdx, 1868 StringRef NewTypeText) -> std::optional<std::string> { 1869 std::stringstream SS; 1870 1871 SS << getEndOfLine().str(); 1872 // Append: attr-name ret-type func-name "(" param-list ")" "{" 1873 if (auto FDPrefix = getRangeText( 1874 SourceRange(FD->getBeginLoc(), FD->getBody()->getBeginLoc()), SM, 1875 LangOpts)) 1876 SS << Handler.getUnsafeBufferUsageAttributeTextAt(FD->getBeginLoc(), " ") 1877 << FDPrefix->str() << "{"; 1878 else 1879 return std::nullopt; 1880 // Append: "return" func-name "(" 1881 if (auto FunQualName = getFunNameText(FD, SM, LangOpts)) 1882 SS << "return " << FunQualName->str() << "("; 1883 else 1884 return std::nullopt; 1885 1886 // Append: arg-list 1887 const unsigned NumParms = FD->getNumParams(); 1888 for (unsigned i = 0; i < NumParms; i++) { 1889 const ParmVarDecl *Parm = FD->getParamDecl(i); 1890 1891 if (Parm->isImplicit()) 1892 continue; 1893 // FIXME: If a parameter has no name, it is unused in the 1894 // definition. So we could just leave it as it is. 1895 if (!Parm->getIdentifier()) 1896 // If a parameter of a function definition has no name: 1897 return std::nullopt; 1898 if (i == ParmIdx) 1899 // This is our spanified paramter! 1900 SS << NewTypeText.str() << "(" << Parm->getIdentifier()->getName().str() << ", " 1901 << getUserFillPlaceHolder("size") << ")"; 1902 else 1903 SS << Parm->getIdentifier()->getName().str(); 1904 if (i != NumParms - 1) 1905 SS << ", "; 1906 } 1907 // finish call and the body 1908 SS << ");}" << getEndOfLine().str(); 1909 // FIXME: 80-char line formatting? 1910 return SS.str(); 1911 }; 1912 1913 FixItList FixIts{}; 1914 for (FunctionDecl *FReDecl : FD->redecls()) { 1915 std::optional<SourceLocation> Loc = getPastLoc(FReDecl, SM, LangOpts); 1916 1917 if (!Loc) 1918 return {}; 1919 if (FReDecl->isThisDeclarationADefinition()) { 1920 assert(FReDecl == FD && "inconsistent function definition"); 1921 // Inserts a definition with the old signature to the end of 1922 // `FReDecl`: 1923 if (auto OldOverloadDef = 1924 OldOverloadDefCreator(FReDecl, ParmIdx, NewTyText)) 1925 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *OldOverloadDef)); 1926 else 1927 return {}; // give up 1928 } else { 1929 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`: 1930 if (!FReDecl->hasAttr<UnsafeBufferUsageAttr>()) { 1931 FixIts.emplace_back(FixItHint::CreateInsertion( 1932 FReDecl->getBeginLoc(), Handler.getUnsafeBufferUsageAttributeTextAt( 1933 FReDecl->getBeginLoc(), " "))); 1934 } 1935 // Inserts a declaration with the new signature to the end of `FReDecl`: 1936 if (auto NewOverloadDecl = 1937 NewOverloadSignatureCreator(FReDecl, ParmIdx, NewTyText)) 1938 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *NewOverloadDecl)); 1939 else 1940 return {}; 1941 } 1942 } 1943 return FixIts; 1944 } 1945 1946 // To fix a `ParmVarDecl` to be of `std::span` type. In addition, creating a 1947 // new overload of the function so that the change is self-contained (see 1948 // `createOverloadsForFixedParams`). 1949 static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx, 1950 UnsafeBufferUsageHandler &Handler) { 1951 if (PVD->hasDefaultArg()) 1952 // FIXME: generate fix-its for default values: 1953 return {}; 1954 assert(PVD->getType()->isPointerType()); 1955 auto *FD = dyn_cast<FunctionDecl>(PVD->getDeclContext()); 1956 1957 if (!FD) 1958 return {}; 1959 1960 std::optional<Qualifiers> PteTyQualifiers = std::nullopt; 1961 std::optional<std::string> PteTyText = getPointeeTypeText( 1962 PVD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers); 1963 1964 if (!PteTyText) 1965 return {}; 1966 1967 std::optional<StringRef> PVDNameText = PVD->getIdentifier()->getName(); 1968 1969 if (!PVDNameText) 1970 return {}; 1971 1972 std::string SpanOpen = "std::span<"; 1973 std::string SpanClose = ">"; 1974 std::string SpanTyText; 1975 std::stringstream SS; 1976 1977 SS << SpanOpen << *PteTyText; 1978 // Append qualifiers to span element type: 1979 if (PteTyQualifiers) 1980 SS << " " << PteTyQualifiers->getAsString(); 1981 SS << SpanClose; 1982 // Save the Span type text: 1983 SpanTyText = SS.str(); 1984 // Append qualifiers to the type of the parameter: 1985 if (PVD->getType().hasQualifiers()) 1986 SS << " " << PVD->getType().getQualifiers().getAsString(); 1987 // Append parameter's name: 1988 SS << " " << PVDNameText->str(); 1989 1990 FixItList Fixes; 1991 unsigned ParmIdx = 0; 1992 1993 Fixes.push_back( 1994 FixItHint::CreateReplacement(PVD->getSourceRange(), SS.str())); 1995 for (auto *ParmIter : FD->parameters()) { 1996 if (PVD == ParmIter) 1997 break; 1998 ParmIdx++; 1999 } 2000 if (ParmIdx < FD->getNumParams()) 2001 if (auto OverloadFix = createOverloadsForFixedParams(ParmIdx, SpanTyText, 2002 FD, Ctx, Handler)) { 2003 Fixes.append(*OverloadFix); 2004 return Fixes; 2005 } 2006 return {}; 2007 } 2008 2009 static FixItList fixVariableWithSpan(const VarDecl *VD, 2010 const DeclUseTracker &Tracker, 2011 ASTContext &Ctx, 2012 UnsafeBufferUsageHandler &Handler) { 2013 const DeclStmt *DS = Tracker.lookupDecl(VD); 2014 assert(DS && "Fixing non-local variables not implemented yet!"); 2015 if (!DS->isSingleDecl()) { 2016 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt` 2017 return {}; 2018 } 2019 // Currently DS is an unused variable but we'll need it when 2020 // non-single decls are implemented, where the pointee type name 2021 // and the '*' are spread around the place. 2022 (void)DS; 2023 2024 // FIXME: handle cases where DS has multiple declarations 2025 return fixVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder()); 2026 } 2027 2028 // TODO: we should be consistent to use `std::nullopt` to represent no-fix due 2029 // to any unexpected problem. 2030 static FixItList 2031 fixVariable(const VarDecl *VD, Strategy::Kind K, 2032 /* The function decl under analysis */ const Decl *D, 2033 const DeclUseTracker &Tracker, ASTContext &Ctx, 2034 UnsafeBufferUsageHandler &Handler) { 2035 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) { 2036 auto *FD = dyn_cast<clang::FunctionDecl>(PVD->getDeclContext()); 2037 if (!FD || FD != D) 2038 // `FD != D` means that `PVD` belongs to a function that is not being 2039 // analyzed currently. Thus `FD` may not be complete. 2040 return {}; 2041 2042 // TODO If function has a try block we can't change params unless we check 2043 // also its catch block for their use. 2044 // FIXME We might support static class methods, some select methods, 2045 // operators and possibly lamdas. 2046 if (FD->isMain() || FD->isConstexpr() || 2047 FD->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate || 2048 FD->isVariadic() || 2049 // also covers call-operator of lamdas 2050 isa<CXXMethodDecl>(FD) || 2051 // skip when the function body is a try-block 2052 (FD->hasBody() && isa<CXXTryStmt>(FD->getBody())) || 2053 FD->isOverloadedOperator()) 2054 return {}; // TODO test all these cases 2055 } 2056 2057 switch (K) { 2058 case Strategy::Kind::Span: { 2059 if (VD->getType()->isPointerType()) { 2060 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) 2061 return fixParamWithSpan(PVD, Ctx, Handler); 2062 2063 if (VD->isLocalVarDecl()) 2064 return fixVariableWithSpan(VD, Tracker, Ctx, Handler); 2065 } 2066 return {}; 2067 } 2068 case Strategy::Kind::Iterator: 2069 case Strategy::Kind::Array: 2070 case Strategy::Kind::Vector: 2071 llvm_unreachable("Strategy not implemented yet!"); 2072 case Strategy::Kind::Wontfix: 2073 llvm_unreachable("Invalid strategy!"); 2074 } 2075 llvm_unreachable("Unknown strategy!"); 2076 } 2077 2078 // Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the 2079 // `RemoveRange` of 'h' overlaps with a macro use. 2080 static bool overlapWithMacro(const FixItList &FixIts) { 2081 // FIXME: For now we only check if the range (or the first token) is (part of) 2082 // a macro expansion. Ideally, we want to check for all tokens in the range. 2083 return llvm::any_of(FixIts, [](const FixItHint &Hint) { 2084 auto Range = Hint.RemoveRange; 2085 if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) 2086 // If the range (or the first token) is (part of) a macro expansion: 2087 return true; 2088 return false; 2089 }); 2090 } 2091 2092 static bool impossibleToFixForVar(const FixableGadgetSets &FixablesForAllVars, 2093 const Strategy &S, 2094 const VarDecl * Var) { 2095 for (const auto &F : FixablesForAllVars.byVar.find(Var)->second) { 2096 std::optional<FixItList> Fixits = F->getFixits(S); 2097 if (!Fixits) { 2098 return true; 2099 } 2100 } 2101 return false; 2102 } 2103 2104 static std::map<const VarDecl *, FixItList> 2105 getFixIts(FixableGadgetSets &FixablesForAllVars, const Strategy &S, 2106 ASTContext &Ctx, 2107 /* The function decl under analysis */ const Decl *D, 2108 const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler, 2109 const DefMapTy &VarGrpMap) { 2110 std::map<const VarDecl *, FixItList> FixItsForVariable; 2111 for (const auto &[VD, Fixables] : FixablesForAllVars.byVar) { 2112 FixItsForVariable[VD] = 2113 fixVariable(VD, S.lookup(VD), D, Tracker, Ctx, Handler); 2114 // If we fail to produce Fix-It for the declaration we have to skip the 2115 // variable entirely. 2116 if (FixItsForVariable[VD].empty()) { 2117 FixItsForVariable.erase(VD); 2118 continue; 2119 } 2120 bool ImpossibleToFix = false; 2121 llvm::SmallVector<FixItHint, 16> FixItsForVD; 2122 for (const auto &F : Fixables) { 2123 std::optional<FixItList> Fixits = F->getFixits(S); 2124 if (!Fixits) { 2125 ImpossibleToFix = true; 2126 break; 2127 } else { 2128 const FixItList CorrectFixes = Fixits.value(); 2129 FixItsForVD.insert(FixItsForVD.end(), CorrectFixes.begin(), 2130 CorrectFixes.end()); 2131 } 2132 } 2133 2134 if (ImpossibleToFix) { 2135 FixItsForVariable.erase(VD); 2136 continue; 2137 } 2138 2139 const auto VarGroupForVD = VarGrpMap.find(VD); 2140 if (VarGroupForVD != VarGrpMap.end()) { 2141 for (const VarDecl * V : VarGroupForVD->second) { 2142 if (V == VD) { 2143 continue; 2144 } 2145 if (impossibleToFixForVar(FixablesForAllVars, S, V)) { 2146 ImpossibleToFix = true; 2147 break; 2148 } 2149 } 2150 2151 if (ImpossibleToFix) { 2152 FixItsForVariable.erase(VD); 2153 for (const VarDecl * V : VarGroupForVD->second) { 2154 FixItsForVariable.erase(V); 2155 } 2156 continue; 2157 } 2158 } 2159 FixItsForVariable[VD].insert(FixItsForVariable[VD].end(), 2160 FixItsForVD.begin(), FixItsForVD.end()); 2161 2162 // Fix-it shall not overlap with macros or/and templates: 2163 if (overlapWithMacro(FixItsForVariable[VD]) || 2164 clang::internal::anyConflict(FixItsForVariable[VD], 2165 Ctx.getSourceManager())) { 2166 FixItsForVariable.erase(VD); 2167 continue; 2168 } 2169 } 2170 2171 for (auto VD : FixItsForVariable) { 2172 const auto VarGroupForVD = VarGrpMap.find(VD.first); 2173 const Strategy::Kind ReplacementTypeForVD = S.lookup(VD.first); 2174 if (VarGroupForVD != VarGrpMap.end()) { 2175 for (const VarDecl * Var : VarGroupForVD->second) { 2176 if (Var == VD.first) { 2177 continue; 2178 } 2179 2180 FixItList GroupFix; 2181 if (FixItsForVariable.find(Var) == FixItsForVariable.end()) { 2182 GroupFix = fixVariable(Var, ReplacementTypeForVD, D, Tracker, 2183 Var->getASTContext(), Handler); 2184 } else { 2185 GroupFix = FixItsForVariable[Var]; 2186 } 2187 2188 for (auto Fix : GroupFix) { 2189 FixItsForVariable[VD.first].push_back(Fix); 2190 } 2191 } 2192 } 2193 } 2194 return FixItsForVariable; 2195 } 2196 2197 2198 static Strategy 2199 getNaiveStrategy(const llvm::SmallVectorImpl<const VarDecl *> &UnsafeVars) { 2200 Strategy S; 2201 for (const VarDecl *VD : UnsafeVars) { 2202 S.set(VD, Strategy::Kind::Span); 2203 } 2204 return S; 2205 } 2206 2207 void clang::checkUnsafeBufferUsage(const Decl *D, 2208 UnsafeBufferUsageHandler &Handler, 2209 bool EmitSuggestions) { 2210 assert(D && D->getBody()); 2211 2212 // We do not want to visit a Lambda expression defined inside a method independently. 2213 // Instead, it should be visited along with the outer method. 2214 if (const auto *fd = dyn_cast<CXXMethodDecl>(D)) { 2215 if (fd->getParent()->isLambda() && fd->getParent()->isLocalClass()) 2216 return; 2217 } 2218 2219 // Do not emit fixit suggestions for functions declared in an 2220 // extern "C" block. 2221 if (const auto *FD = dyn_cast<FunctionDecl>(D)) { 2222 for (FunctionDecl *FReDecl : FD->redecls()) { 2223 if (FReDecl->isExternC()) { 2224 EmitSuggestions = false; 2225 break; 2226 } 2227 } 2228 } 2229 2230 WarningGadgetSets UnsafeOps; 2231 FixableGadgetSets FixablesForAllVars; 2232 2233 auto [FixableGadgets, WarningGadgets, Tracker] = 2234 findGadgets(D, Handler, EmitSuggestions); 2235 2236 if (!EmitSuggestions) { 2237 // Our job is very easy without suggestions. Just warn about 2238 // every problematic operation and consider it done. No need to deal 2239 // with fixable gadgets, no need to group operations by variable. 2240 for (const auto &G : WarningGadgets) { 2241 Handler.handleUnsafeOperation(G->getBaseStmt(), 2242 /*IsRelatedToDecl=*/false); 2243 } 2244 2245 // This return guarantees that most of the machine doesn't run when 2246 // suggestions aren't requested. 2247 assert(FixableGadgets.size() == 0 && 2248 "Fixable gadgets found but suggestions not requested!"); 2249 return; 2250 } 2251 2252 UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets)); 2253 FixablesForAllVars = groupFixablesByVar(std::move(FixableGadgets)); 2254 2255 std::map<const VarDecl *, FixItList> FixItsForVariableGroup; 2256 DefMapTy VariableGroupsMap{}; 2257 2258 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s. 2259 for (auto it = FixablesForAllVars.byVar.cbegin(); 2260 it != FixablesForAllVars.byVar.cend();) { 2261 // FIXME: need to deal with global variables later 2262 if ((!it->first->isLocalVarDecl() && !isa<ParmVarDecl>(it->first)) || 2263 Tracker.hasUnclaimedUses(it->first) || it->first->isInitCapture()) { 2264 it = FixablesForAllVars.byVar.erase(it); 2265 } else { 2266 ++it; 2267 } 2268 } 2269 2270 llvm::SmallVector<const VarDecl *, 16> UnsafeVars; 2271 for (const auto &[VD, ignore] : FixablesForAllVars.byVar) 2272 UnsafeVars.push_back(VD); 2273 2274 // Fixpoint iteration for pointer assignments 2275 using DepMapTy = DenseMap<const VarDecl *, std::set<const VarDecl *>>; 2276 DepMapTy DependenciesMap{}; 2277 DepMapTy PtrAssignmentGraph{}; 2278 2279 for (auto it : FixablesForAllVars.byVar) { 2280 for (const FixableGadget *fixable : it.second) { 2281 std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair = 2282 fixable->getStrategyImplications(); 2283 if (ImplPair) { 2284 std::pair<const VarDecl *, const VarDecl *> Impl = ImplPair.value(); 2285 PtrAssignmentGraph[Impl.first].insert(Impl.second); 2286 } 2287 } 2288 } 2289 2290 /* 2291 The following code does a BFS traversal of the `PtrAssignmentGraph` 2292 considering all unsafe vars as starting nodes and constructs an undirected 2293 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner 2294 elimiates all variables that are unreachable from any unsafe var. In other 2295 words, this removes all dependencies that don't include any unsafe variable 2296 and consequently don't need any fixit generation. 2297 Note: A careful reader would observe that the code traverses 2298 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and 2299 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would 2300 achieve the same result but the one used here dramatically cuts the 2301 amount of hoops the second part of the algorithm needs to jump, given that 2302 a lot of these connections become "direct". The reader is advised not to 2303 imagine how the graph is transformed because of using `Var` instead of 2304 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used, 2305 and think about why it's equivalent later. 2306 */ 2307 std::set<const VarDecl *> VisitedVarsDirected{}; 2308 for (const auto &[Var, ignore] : UnsafeOps.byVar) { 2309 if (VisitedVarsDirected.find(Var) == VisitedVarsDirected.end()) { 2310 2311 std::queue<const VarDecl*> QueueDirected{}; 2312 QueueDirected.push(Var); 2313 while(!QueueDirected.empty()) { 2314 const VarDecl* CurrentVar = QueueDirected.front(); 2315 QueueDirected.pop(); 2316 VisitedVarsDirected.insert(CurrentVar); 2317 auto AdjacentNodes = PtrAssignmentGraph[CurrentVar]; 2318 for (const VarDecl *Adj : AdjacentNodes) { 2319 if (VisitedVarsDirected.find(Adj) == VisitedVarsDirected.end()) { 2320 QueueDirected.push(Adj); 2321 } 2322 DependenciesMap[Var].insert(Adj); 2323 DependenciesMap[Adj].insert(Var); 2324 } 2325 } 2326 } 2327 } 2328 2329 // Group Connected Components for Unsafe Vars 2330 // (Dependencies based on pointer assignments) 2331 std::set<const VarDecl *> VisitedVars{}; 2332 for (const auto &[Var, ignore] : UnsafeOps.byVar) { 2333 if (VisitedVars.find(Var) == VisitedVars.end()) { 2334 std::vector<const VarDecl *> VarGroup{}; 2335 2336 std::queue<const VarDecl*> Queue{}; 2337 Queue.push(Var); 2338 while(!Queue.empty()) { 2339 const VarDecl* CurrentVar = Queue.front(); 2340 Queue.pop(); 2341 VisitedVars.insert(CurrentVar); 2342 VarGroup.push_back(CurrentVar); 2343 auto AdjacentNodes = DependenciesMap[CurrentVar]; 2344 for (const VarDecl *Adj : AdjacentNodes) { 2345 if (VisitedVars.find(Adj) == VisitedVars.end()) { 2346 Queue.push(Adj); 2347 } 2348 } 2349 } 2350 for (const VarDecl * V : VarGroup) { 2351 if (UnsafeOps.byVar.find(V) != UnsafeOps.byVar.end()) { 2352 VariableGroupsMap[V] = VarGroup; 2353 } 2354 } 2355 } 2356 } 2357 2358 Strategy NaiveStrategy = getNaiveStrategy(UnsafeVars); 2359 2360 FixItsForVariableGroup = 2361 getFixIts(FixablesForAllVars, NaiveStrategy, D->getASTContext(), D, 2362 Tracker, Handler, VariableGroupsMap); 2363 2364 // FIXME Detect overlapping FixIts. 2365 2366 for (const auto &G : UnsafeOps.noVar) { 2367 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false); 2368 } 2369 2370 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) { 2371 auto FixItsIt = FixItsForVariableGroup.find(VD); 2372 Handler.handleUnsafeVariableGroup(VD, VariableGroupsMap, 2373 FixItsIt != FixItsForVariableGroup.end() 2374 ? std::move(FixItsIt->second) 2375 : FixItList{}); 2376 for (const auto &G : WarningGadgets) { 2377 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true); 2378 } 2379 } 2380 } 2381