1 //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Transformer/RangeSelector.h" 10 #include "clang/AST/Expr.h" 11 #include "clang/AST/TypeLoc.h" 12 #include "clang/ASTMatchers/ASTMatchFinder.h" 13 #include "clang/Basic/SourceLocation.h" 14 #include "clang/Lex/Lexer.h" 15 #include "clang/Tooling/Transformer/SourceCode.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/Support/Errc.h" 18 #include "llvm/Support/Error.h" 19 #include <string> 20 #include <utility> 21 #include <vector> 22 23 using namespace clang; 24 using namespace transformer; 25 26 using ast_matchers::MatchFinder; 27 using llvm::Error; 28 using llvm::StringError; 29 30 using MatchResult = MatchFinder::MatchResult; 31 32 static Error invalidArgumentError(Twine Message) { 33 return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message); 34 } 35 36 static Error typeError(StringRef ID, const ASTNodeKind &Kind) { 37 return invalidArgumentError("mismatched type (node id=" + ID + 38 " kind=" + Kind.asStringRef() + ")"); 39 } 40 41 static Error typeError(StringRef ID, const ASTNodeKind &Kind, 42 Twine ExpectedType) { 43 return invalidArgumentError("mismatched type: expected one of " + 44 ExpectedType + " (node id=" + ID + 45 " kind=" + Kind.asStringRef() + ")"); 46 } 47 48 static Error missingPropertyError(StringRef ID, Twine Description, 49 StringRef Property) { 50 return invalidArgumentError(Description + " requires property '" + Property + 51 "' (node id=" + ID + ")"); 52 } 53 54 static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes, 55 StringRef ID) { 56 auto &NodesMap = Nodes.getMap(); 57 auto It = NodesMap.find(ID); 58 if (It == NodesMap.end()) 59 return invalidArgumentError("ID not bound: " + ID); 60 return It->second; 61 } 62 63 // FIXME: handling of macros should be configurable. 64 static SourceLocation findPreviousTokenStart(SourceLocation Start, 65 const SourceManager &SM, 66 const LangOptions &LangOpts) { 67 if (Start.isInvalid() || Start.isMacroID()) 68 return SourceLocation(); 69 70 SourceLocation BeforeStart = Start.getLocWithOffset(-1); 71 if (BeforeStart.isInvalid() || BeforeStart.isMacroID()) 72 return SourceLocation(); 73 74 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts); 75 } 76 77 // Finds the start location of the previous token of kind \p TK. 78 // FIXME: handling of macros should be configurable. 79 static SourceLocation findPreviousTokenKind(SourceLocation Start, 80 const SourceManager &SM, 81 const LangOptions &LangOpts, 82 tok::TokenKind TK) { 83 while (true) { 84 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts); 85 if (L.isInvalid() || L.isMacroID()) 86 return SourceLocation(); 87 88 Token T; 89 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true)) 90 return SourceLocation(); 91 92 if (T.is(TK)) 93 return T.getLocation(); 94 95 Start = L; 96 } 97 } 98 99 RangeSelector transformer::before(RangeSelector Selector) { 100 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> { 101 Expected<CharSourceRange> SelectedRange = Selector(Result); 102 if (!SelectedRange) 103 return SelectedRange.takeError(); 104 return CharSourceRange::getCharRange(SelectedRange->getBegin()); 105 }; 106 } 107 108 RangeSelector transformer::after(RangeSelector Selector) { 109 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> { 110 Expected<CharSourceRange> SelectedRange = Selector(Result); 111 if (!SelectedRange) 112 return SelectedRange.takeError(); 113 SourceLocation End = SelectedRange->getEnd(); 114 if (SelectedRange->isTokenRange()) { 115 // We need to find the actual (exclusive) end location from which to 116 // create a new source range. However, that's not guaranteed to be valid, 117 // even if the token location itself is valid. So, we create a token range 118 // consisting only of the last token, then map that range back to the 119 // source file. If that succeeds, we have a valid location for the end of 120 // the generated range. 121 CharSourceRange Range = Lexer::makeFileCharRange( 122 CharSourceRange::getTokenRange(SelectedRange->getEnd()), 123 *Result.SourceManager, Result.Context->getLangOpts()); 124 if (Range.isInvalid()) 125 return invalidArgumentError( 126 "after: can't resolve sub-range to valid source range"); 127 End = Range.getEnd(); 128 } 129 130 return CharSourceRange::getCharRange(End); 131 }; 132 } 133 134 RangeSelector transformer::node(std::string ID) { 135 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { 136 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID); 137 if (!Node) 138 return Node.takeError(); 139 return (Node->get<Decl>() != nullptr || 140 (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr)) 141 ? tooling::getExtendedRange(*Node, tok::TokenKind::semi, 142 *Result.Context) 143 : CharSourceRange::getTokenRange(Node->getSourceRange()); 144 }; 145 } 146 147 RangeSelector transformer::statement(std::string ID) { 148 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { 149 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID); 150 if (!Node) 151 return Node.takeError(); 152 return tooling::getExtendedRange(*Node, tok::TokenKind::semi, 153 *Result.Context); 154 }; 155 } 156 157 RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) { 158 return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> { 159 Expected<CharSourceRange> BeginRange = Begin(Result); 160 if (!BeginRange) 161 return BeginRange.takeError(); 162 Expected<CharSourceRange> EndRange = End(Result); 163 if (!EndRange) 164 return EndRange.takeError(); 165 SourceLocation B = BeginRange->getBegin(); 166 SourceLocation E = EndRange->getEnd(); 167 // Note: we are precluding the possibility of sub-token ranges in the case 168 // that EndRange is a token range. 169 if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) { 170 return invalidArgumentError("Bad range: out of order"); 171 } 172 return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange()); 173 }; 174 } 175 176 RangeSelector transformer::encloseNodes(std::string BeginID, 177 std::string EndID) { 178 return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID))); 179 } 180 181 RangeSelector transformer::member(std::string ID) { 182 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { 183 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID); 184 if (!Node) 185 return Node.takeError(); 186 if (auto *M = Node->get<clang::MemberExpr>()) 187 return CharSourceRange::getTokenRange( 188 M->getMemberNameInfo().getSourceRange()); 189 return typeError(ID, Node->getNodeKind(), "MemberExpr"); 190 }; 191 } 192 193 RangeSelector transformer::name(std::string ID) { 194 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { 195 Expected<DynTypedNode> N = getNode(Result.Nodes, ID); 196 if (!N) 197 return N.takeError(); 198 auto &Node = *N; 199 if (const auto *D = Node.get<NamedDecl>()) { 200 if (!D->getDeclName().isIdentifier()) 201 return missingPropertyError(ID, "name", "identifier"); 202 SourceLocation L = D->getLocation(); 203 auto R = CharSourceRange::getTokenRange(L, L); 204 // Verify that the range covers exactly the name. 205 // FIXME: extend this code to support cases like `operator +` or 206 // `foo<int>` for which this range will be too short. Doing so will 207 // require subcasing `NamedDecl`, because it doesn't provide virtual 208 // access to the \c DeclarationNameInfo. 209 if (tooling::getText(R, *Result.Context) != D->getName()) 210 return CharSourceRange(); 211 return R; 212 } 213 if (const auto *E = Node.get<DeclRefExpr>()) { 214 if (!E->getNameInfo().getName().isIdentifier()) 215 return missingPropertyError(ID, "name", "identifier"); 216 SourceLocation L = E->getLocation(); 217 return CharSourceRange::getTokenRange(L, L); 218 } 219 if (const auto *I = Node.get<CXXCtorInitializer>()) { 220 if (!I->isMemberInitializer() && I->isWritten()) 221 return missingPropertyError(ID, "name", "explicit member initializer"); 222 SourceLocation L = I->getMemberLocation(); 223 return CharSourceRange::getTokenRange(L, L); 224 } 225 if (const auto *T = Node.get<TypeLoc>()) { 226 TypeLoc Loc = *T; 227 auto ET = Loc.getAs<ElaboratedTypeLoc>(); 228 if (!ET.isNull()) 229 Loc = ET.getNamedTypeLoc(); 230 if (auto SpecLoc = Loc.getAs<TemplateSpecializationTypeLoc>(); 231 !SpecLoc.isNull()) 232 return CharSourceRange::getTokenRange(SpecLoc.getTemplateNameLoc()); 233 return CharSourceRange::getTokenRange(Loc.getSourceRange()); 234 } 235 return typeError(ID, Node.getNodeKind(), 236 "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc"); 237 }; 238 } 239 240 namespace { 241 // FIXME: make this available in the public API for users to easily create their 242 // own selectors. 243 244 // Creates a selector from a range-selection function \p Func, which selects a 245 // range that is relative to a bound node id. \c T is the node type expected by 246 // \p Func. 247 template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)> 248 class RelativeSelector { 249 std::string ID; 250 251 public: 252 RelativeSelector(std::string ID) : ID(std::move(ID)) {} 253 254 Expected<CharSourceRange> operator()(const MatchResult &Result) { 255 Expected<DynTypedNode> N = getNode(Result.Nodes, ID); 256 if (!N) 257 return N.takeError(); 258 if (const auto *Arg = N->get<T>()) 259 return Func(Result, *Arg); 260 return typeError(ID, N->getNodeKind()); 261 } 262 }; 263 } // namespace 264 265 // FIXME: Change the following functions from being in an anonymous namespace 266 // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915 267 // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous 268 // namespace works around a bug in earlier versions. 269 namespace { 270 // Returns the range of the statements (all source between the braces). 271 CharSourceRange getStatementsRange(const MatchResult &, 272 const CompoundStmt &CS) { 273 return CharSourceRange::getCharRange(CS.getLBracLoc().getLocWithOffset(1), 274 CS.getRBracLoc()); 275 } 276 } // namespace 277 278 RangeSelector transformer::statements(std::string ID) { 279 return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID)); 280 } 281 282 namespace { 283 284 SourceLocation getRLoc(const CallExpr &E) { return E.getRParenLoc(); } 285 286 SourceLocation getRLoc(const CXXConstructExpr &E) { 287 return E.getParenOrBraceRange().getEnd(); 288 } 289 290 tok::TokenKind getStartToken(const CallExpr &E) { 291 return tok::TokenKind::l_paren; 292 } 293 294 tok::TokenKind getStartToken(const CXXConstructExpr &E) { 295 return isa<CXXTemporaryObjectExpr>(E) ? tok::TokenKind::l_paren 296 : tok::TokenKind::l_brace; 297 } 298 299 template <typename ExprWithArgs> 300 SourceLocation findArgStartDelimiter(const ExprWithArgs &E, SourceLocation RLoc, 301 const SourceManager &SM, 302 const LangOptions &LangOpts) { 303 SourceLocation Loc = E.getNumArgs() == 0 ? RLoc : E.getArg(0)->getBeginLoc(); 304 return findPreviousTokenKind(Loc, SM, LangOpts, getStartToken(E)); 305 } 306 // Returns the range of the source between the call's or construct expr's 307 // parentheses/braces. 308 template <typename ExprWithArgs> 309 CharSourceRange getArgumentsRange(const MatchResult &Result, 310 const ExprWithArgs &CE) { 311 const SourceLocation RLoc = getRLoc(CE); 312 return CharSourceRange::getCharRange( 313 findArgStartDelimiter(CE, RLoc, *Result.SourceManager, 314 Result.Context->getLangOpts()) 315 .getLocWithOffset(1), 316 RLoc); 317 } 318 } // namespace 319 320 RangeSelector transformer::callArgs(std::string ID) { 321 return RelativeSelector<CallExpr, getArgumentsRange<CallExpr>>(std::move(ID)); 322 } 323 324 RangeSelector transformer::constructExprArgs(std::string ID) { 325 return RelativeSelector<CXXConstructExpr, 326 getArgumentsRange<CXXConstructExpr>>(std::move(ID)); 327 } 328 329 namespace { 330 // Returns the range of the elements of the initializer list. Includes all 331 // source between the braces. 332 CharSourceRange getElementsRange(const MatchResult &, 333 const InitListExpr &E) { 334 return CharSourceRange::getCharRange(E.getLBraceLoc().getLocWithOffset(1), 335 E.getRBraceLoc()); 336 } 337 } // namespace 338 339 RangeSelector transformer::initListElements(std::string ID) { 340 return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID)); 341 } 342 343 namespace { 344 // Returns the range of the else branch, including the `else` keyword. 345 CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) { 346 return tooling::maybeExtendRange( 347 CharSourceRange::getTokenRange(S.getElseLoc(), S.getEndLoc()), 348 tok::TokenKind::semi, *Result.Context); 349 } 350 } // namespace 351 352 RangeSelector transformer::elseBranch(std::string ID) { 353 return RelativeSelector<IfStmt, getElseRange>(std::move(ID)); 354 } 355 356 RangeSelector transformer::expansion(RangeSelector S) { 357 return [S](const MatchResult &Result) -> Expected<CharSourceRange> { 358 Expected<CharSourceRange> SRange = S(Result); 359 if (!SRange) 360 return SRange.takeError(); 361 return Result.SourceManager->getExpansionRange(*SRange); 362 }; 363 } 364