1 //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Transformer/RangeSelector.h" 10 #include "clang/AST/Expr.h" 11 #include "clang/AST/TypeLoc.h" 12 #include "clang/ASTMatchers/ASTMatchFinder.h" 13 #include "clang/Basic/SourceLocation.h" 14 #include "clang/Lex/Lexer.h" 15 #include "clang/Tooling/Transformer/SourceCode.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/Support/Errc.h" 18 #include "llvm/Support/Error.h" 19 #include <string> 20 #include <utility> 21 22 using namespace clang; 23 using namespace transformer; 24 25 using ast_matchers::MatchFinder; 26 using llvm::Error; 27 using llvm::StringError; 28 29 using MatchResult = MatchFinder::MatchResult; 30 31 static Error invalidArgumentError(Twine Message) { 32 return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message); 33 } 34 35 static Error typeError(StringRef ID, const ASTNodeKind &Kind) { 36 return invalidArgumentError("mismatched type (node id=" + ID + 37 " kind=" + Kind.asStringRef() + ")"); 38 } 39 40 static Error typeError(StringRef ID, const ASTNodeKind &Kind, 41 Twine ExpectedType) { 42 return invalidArgumentError("mismatched type: expected one of " + 43 ExpectedType + " (node id=" + ID + 44 " kind=" + Kind.asStringRef() + ")"); 45 } 46 47 static Error missingPropertyError(StringRef ID, Twine Description, 48 StringRef Property) { 49 return invalidArgumentError(Description + " requires property '" + Property + 50 "' (node id=" + ID + ")"); 51 } 52 53 static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes, 54 StringRef ID) { 55 auto &NodesMap = Nodes.getMap(); 56 auto It = NodesMap.find(ID); 57 if (It == NodesMap.end()) 58 return invalidArgumentError("ID not bound: " + ID); 59 return It->second; 60 } 61 62 // FIXME: handling of macros should be configurable. 63 static SourceLocation findPreviousTokenStart(SourceLocation Start, 64 const SourceManager &SM, 65 const LangOptions &LangOpts) { 66 if (Start.isInvalid() || Start.isMacroID()) 67 return SourceLocation(); 68 69 SourceLocation BeforeStart = Start.getLocWithOffset(-1); 70 if (BeforeStart.isInvalid() || BeforeStart.isMacroID()) 71 return SourceLocation(); 72 73 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts); 74 } 75 76 // Finds the start location of the previous token of kind \p TK. 77 // FIXME: handling of macros should be configurable. 78 static SourceLocation findPreviousTokenKind(SourceLocation Start, 79 const SourceManager &SM, 80 const LangOptions &LangOpts, 81 tok::TokenKind TK) { 82 while (true) { 83 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts); 84 if (L.isInvalid() || L.isMacroID()) 85 return SourceLocation(); 86 87 Token T; 88 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true)) 89 return SourceLocation(); 90 91 if (T.is(TK)) 92 return T.getLocation(); 93 94 Start = L; 95 } 96 } 97 98 RangeSelector transformer::before(RangeSelector Selector) { 99 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> { 100 Expected<CharSourceRange> SelectedRange = Selector(Result); 101 if (!SelectedRange) 102 return SelectedRange.takeError(); 103 return CharSourceRange::getCharRange(SelectedRange->getBegin()); 104 }; 105 } 106 107 RangeSelector transformer::after(RangeSelector Selector) { 108 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> { 109 Expected<CharSourceRange> SelectedRange = Selector(Result); 110 if (!SelectedRange) 111 return SelectedRange.takeError(); 112 SourceLocation End = SelectedRange->getEnd(); 113 if (SelectedRange->isTokenRange()) { 114 // We need to find the actual (exclusive) end location from which to 115 // create a new source range. However, that's not guaranteed to be valid, 116 // even if the token location itself is valid. So, we create a token range 117 // consisting only of the last token, then map that range back to the 118 // source file. If that succeeds, we have a valid location for the end of 119 // the generated range. 120 CharSourceRange Range = Lexer::makeFileCharRange( 121 CharSourceRange::getTokenRange(SelectedRange->getEnd()), 122 *Result.SourceManager, Result.Context->getLangOpts()); 123 if (Range.isInvalid()) 124 return invalidArgumentError( 125 "after: can't resolve sub-range to valid source range"); 126 End = Range.getEnd(); 127 } 128 129 return CharSourceRange::getCharRange(End); 130 }; 131 } 132 133 RangeSelector transformer::node(std::string ID) { 134 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { 135 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID); 136 if (!Node) 137 return Node.takeError(); 138 return (Node->get<Decl>() != nullptr || 139 (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr)) 140 ? tooling::getExtendedRange(*Node, tok::TokenKind::semi, 141 *Result.Context) 142 : CharSourceRange::getTokenRange(Node->getSourceRange()); 143 }; 144 } 145 146 RangeSelector transformer::statement(std::string ID) { 147 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { 148 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID); 149 if (!Node) 150 return Node.takeError(); 151 return tooling::getExtendedRange(*Node, tok::TokenKind::semi, 152 *Result.Context); 153 }; 154 } 155 156 RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) { 157 return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> { 158 Expected<CharSourceRange> BeginRange = Begin(Result); 159 if (!BeginRange) 160 return BeginRange.takeError(); 161 Expected<CharSourceRange> EndRange = End(Result); 162 if (!EndRange) 163 return EndRange.takeError(); 164 SourceLocation B = BeginRange->getBegin(); 165 SourceLocation E = EndRange->getEnd(); 166 // Note: we are precluding the possibility of sub-token ranges in the case 167 // that EndRange is a token range. 168 if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) { 169 return invalidArgumentError("Bad range: out of order"); 170 } 171 return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange()); 172 }; 173 } 174 175 RangeSelector transformer::encloseNodes(std::string BeginID, 176 std::string EndID) { 177 return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID))); 178 } 179 180 RangeSelector transformer::member(std::string ID) { 181 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { 182 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID); 183 if (!Node) 184 return Node.takeError(); 185 if (auto *M = Node->get<clang::MemberExpr>()) 186 return CharSourceRange::getTokenRange( 187 M->getMemberNameInfo().getSourceRange()); 188 return typeError(ID, Node->getNodeKind(), "MemberExpr"); 189 }; 190 } 191 192 RangeSelector transformer::name(std::string ID) { 193 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> { 194 Expected<DynTypedNode> N = getNode(Result.Nodes, ID); 195 if (!N) 196 return N.takeError(); 197 auto &Node = *N; 198 if (const auto *D = Node.get<NamedDecl>()) { 199 if (!D->getDeclName().isIdentifier()) 200 return missingPropertyError(ID, "name", "identifier"); 201 SourceLocation L = D->getLocation(); 202 auto R = CharSourceRange::getTokenRange(L, L); 203 // Verify that the range covers exactly the name. 204 // FIXME: extend this code to support cases like `operator +` or 205 // `foo<int>` for which this range will be too short. Doing so will 206 // require subcasing `NamedDecl`, because it doesn't provide virtual 207 // access to the \c DeclarationNameInfo. 208 if (tooling::getText(R, *Result.Context) != D->getName()) 209 return CharSourceRange(); 210 return R; 211 } 212 if (const auto *E = Node.get<DeclRefExpr>()) { 213 if (!E->getNameInfo().getName().isIdentifier()) 214 return missingPropertyError(ID, "name", "identifier"); 215 SourceLocation L = E->getLocation(); 216 return CharSourceRange::getTokenRange(L, L); 217 } 218 if (const auto *I = Node.get<CXXCtorInitializer>()) { 219 if (!I->isMemberInitializer() && I->isWritten()) 220 return missingPropertyError(ID, "name", "explicit member initializer"); 221 SourceLocation L = I->getMemberLocation(); 222 return CharSourceRange::getTokenRange(L, L); 223 } 224 if (const auto *T = Node.get<TypeLoc>()) { 225 TypeLoc Loc = *T; 226 auto ET = Loc.getAs<ElaboratedTypeLoc>(); 227 if (!ET.isNull()) 228 Loc = ET.getNamedTypeLoc(); 229 if (auto SpecLoc = Loc.getAs<TemplateSpecializationTypeLoc>(); 230 !SpecLoc.isNull()) 231 return CharSourceRange::getTokenRange(SpecLoc.getTemplateNameLoc()); 232 return CharSourceRange::getTokenRange(Loc.getSourceRange()); 233 } 234 return typeError(ID, Node.getNodeKind(), 235 "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc"); 236 }; 237 } 238 239 namespace { 240 // FIXME: make this available in the public API for users to easily create their 241 // own selectors. 242 243 // Creates a selector from a range-selection function \p Func, which selects a 244 // range that is relative to a bound node id. \c T is the node type expected by 245 // \p Func. 246 template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)> 247 class RelativeSelector { 248 std::string ID; 249 250 public: 251 RelativeSelector(std::string ID) : ID(std::move(ID)) {} 252 253 Expected<CharSourceRange> operator()(const MatchResult &Result) { 254 Expected<DynTypedNode> N = getNode(Result.Nodes, ID); 255 if (!N) 256 return N.takeError(); 257 if (const auto *Arg = N->get<T>()) 258 return Func(Result, *Arg); 259 return typeError(ID, N->getNodeKind()); 260 } 261 }; 262 } // namespace 263 264 // FIXME: Change the following functions from being in an anonymous namespace 265 // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915 266 // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous 267 // namespace works around a bug in earlier versions. 268 namespace { 269 // Returns the range of the statements (all source between the braces). 270 CharSourceRange getStatementsRange(const MatchResult &, 271 const CompoundStmt &CS) { 272 return CharSourceRange::getCharRange(CS.getLBracLoc().getLocWithOffset(1), 273 CS.getRBracLoc()); 274 } 275 } // namespace 276 277 RangeSelector transformer::statements(std::string ID) { 278 return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID)); 279 } 280 281 namespace { 282 283 SourceLocation findArgStartDelimiter(const CallExpr &E, SourceLocation RLoc, 284 const SourceManager &SM, 285 const LangOptions &LangOpts) { 286 SourceLocation Loc = E.getNumArgs() == 0 ? RLoc : E.getArg(0)->getBeginLoc(); 287 return findPreviousTokenKind(Loc, SM, LangOpts, tok::TokenKind::l_paren); 288 } 289 290 // Returns the location after the last argument of the construct expr. Returns 291 // an invalid location if there are no arguments. 292 SourceLocation findLastArgEnd(const CXXConstructExpr &CE, 293 const SourceManager &SM, 294 const LangOptions &LangOpts) { 295 for (int i = CE.getNumArgs() - 1; i >= 0; --i) { 296 const Expr *Arg = CE.getArg(i); 297 if (isa<CXXDefaultArgExpr>(Arg)) 298 continue; 299 return Lexer::getLocForEndOfToken(Arg->getEndLoc(), 0, SM, LangOpts); 300 } 301 return {}; 302 } 303 304 // Returns the range of the source between the call's parentheses/braces. 305 CharSourceRange getCallArgumentsRange(const MatchResult &Result, 306 const CallExpr &CE) { 307 const SourceLocation RLoc = CE.getRParenLoc(); 308 return CharSourceRange::getCharRange( 309 findArgStartDelimiter(CE, RLoc, *Result.SourceManager, 310 Result.Context->getLangOpts()) 311 .getLocWithOffset(1), 312 RLoc); 313 } 314 315 // Returns the range of the source between the construct expr's 316 // parentheses/braces. 317 CharSourceRange getConstructArgumentsRange(const MatchResult &Result, 318 const CXXConstructExpr &CE) { 319 if (SourceRange R = CE.getParenOrBraceRange(); R.isValid()) { 320 return CharSourceRange::getCharRange( 321 Lexer::getLocForEndOfToken(R.getBegin(), 0, *Result.SourceManager, 322 Result.Context->getLangOpts()), 323 R.getEnd()); 324 } 325 326 if (CE.getNumArgs() > 0) { 327 return CharSourceRange::getCharRange( 328 CE.getArg(0)->getBeginLoc(), 329 findLastArgEnd(CE, *Result.SourceManager, 330 Result.Context->getLangOpts())); 331 } 332 333 return {}; 334 } 335 336 } // namespace 337 338 RangeSelector transformer::callArgs(std::string ID) { 339 return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID)); 340 } 341 342 RangeSelector transformer::constructExprArgs(std::string ID) { 343 return RelativeSelector<CXXConstructExpr, getConstructArgumentsRange>( 344 std::move(ID)); 345 } 346 347 namespace { 348 // Returns the range of the elements of the initializer list. Includes all 349 // source between the braces. 350 CharSourceRange getElementsRange(const MatchResult &, 351 const InitListExpr &E) { 352 return CharSourceRange::getCharRange(E.getLBraceLoc().getLocWithOffset(1), 353 E.getRBraceLoc()); 354 } 355 } // namespace 356 357 RangeSelector transformer::initListElements(std::string ID) { 358 return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID)); 359 } 360 361 namespace { 362 // Returns the range of the else branch, including the `else` keyword. 363 CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) { 364 return tooling::maybeExtendRange( 365 CharSourceRange::getTokenRange(S.getElseLoc(), S.getEndLoc()), 366 tok::TokenKind::semi, *Result.Context); 367 } 368 } // namespace 369 370 RangeSelector transformer::elseBranch(std::string ID) { 371 return RelativeSelector<IfStmt, getElseRange>(std::move(ID)); 372 } 373 374 RangeSelector transformer::expansion(RangeSelector S) { 375 return [S](const MatchResult &Result) -> Expected<CharSourceRange> { 376 Expected<CharSourceRange> SRange = S(Result); 377 if (!SRange) 378 return SRange.takeError(); 379 return Result.SourceManager->getExpansionRange(*SRange); 380 }; 381 } 382