1 //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/Tooling/Transformer/RangeSelector.h"
10 #include "clang/AST/Expr.h"
11 #include "clang/AST/TypeLoc.h"
12 #include "clang/ASTMatchers/ASTMatchFinder.h"
13 #include "clang/Basic/SourceLocation.h"
14 #include "clang/Lex/Lexer.h"
15 #include "clang/Tooling/Transformer/SourceCode.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/Errc.h"
18 #include "llvm/Support/Error.h"
19 #include <string>
20 #include <utility>
21
22 using namespace clang;
23 using namespace transformer;
24
25 using ast_matchers::MatchFinder;
26 using llvm::Error;
27 using llvm::StringError;
28
29 using MatchResult = MatchFinder::MatchResult;
30
invalidArgumentError(Twine Message)31 static Error invalidArgumentError(Twine Message) {
32 return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message);
33 }
34
typeError(StringRef ID,const ASTNodeKind & Kind)35 static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
36 return invalidArgumentError("mismatched type (node id=" + ID +
37 " kind=" + Kind.asStringRef() + ")");
38 }
39
typeError(StringRef ID,const ASTNodeKind & Kind,Twine ExpectedType)40 static Error typeError(StringRef ID, const ASTNodeKind &Kind,
41 Twine ExpectedType) {
42 return invalidArgumentError("mismatched type: expected one of " +
43 ExpectedType + " (node id=" + ID +
44 " kind=" + Kind.asStringRef() + ")");
45 }
46
missingPropertyError(StringRef ID,Twine Description,StringRef Property)47 static Error missingPropertyError(StringRef ID, Twine Description,
48 StringRef Property) {
49 return invalidArgumentError(Description + " requires property '" + Property +
50 "' (node id=" + ID + ")");
51 }
52
getNode(const ast_matchers::BoundNodes & Nodes,StringRef ID)53 static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes,
54 StringRef ID) {
55 auto &NodesMap = Nodes.getMap();
56 auto It = NodesMap.find(ID);
57 if (It == NodesMap.end())
58 return invalidArgumentError("ID not bound: " + ID);
59 return It->second;
60 }
61
62 // FIXME: handling of macros should be configurable.
findPreviousTokenStart(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts)63 static SourceLocation findPreviousTokenStart(SourceLocation Start,
64 const SourceManager &SM,
65 const LangOptions &LangOpts) {
66 if (Start.isInvalid() || Start.isMacroID())
67 return SourceLocation();
68
69 SourceLocation BeforeStart = Start.getLocWithOffset(-1);
70 if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
71 return SourceLocation();
72
73 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
74 }
75
76 // Finds the start location of the previous token of kind \p TK.
77 // FIXME: handling of macros should be configurable.
findPreviousTokenKind(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts,tok::TokenKind TK)78 static SourceLocation findPreviousTokenKind(SourceLocation Start,
79 const SourceManager &SM,
80 const LangOptions &LangOpts,
81 tok::TokenKind TK) {
82 while (true) {
83 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
84 if (L.isInvalid() || L.isMacroID())
85 return SourceLocation();
86
87 Token T;
88 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
89 return SourceLocation();
90
91 if (T.is(TK))
92 return T.getLocation();
93
94 Start = L;
95 }
96 }
97
before(RangeSelector Selector)98 RangeSelector transformer::before(RangeSelector Selector) {
99 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
100 Expected<CharSourceRange> SelectedRange = Selector(Result);
101 if (!SelectedRange)
102 return SelectedRange.takeError();
103 return CharSourceRange::getCharRange(SelectedRange->getBegin());
104 };
105 }
106
after(RangeSelector Selector)107 RangeSelector transformer::after(RangeSelector Selector) {
108 return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
109 Expected<CharSourceRange> SelectedRange = Selector(Result);
110 if (!SelectedRange)
111 return SelectedRange.takeError();
112 SourceLocation End = SelectedRange->getEnd();
113 if (SelectedRange->isTokenRange()) {
114 // We need to find the actual (exclusive) end location from which to
115 // create a new source range. However, that's not guaranteed to be valid,
116 // even if the token location itself is valid. So, we create a token range
117 // consisting only of the last token, then map that range back to the
118 // source file. If that succeeds, we have a valid location for the end of
119 // the generated range.
120 CharSourceRange Range = Lexer::makeFileCharRange(
121 CharSourceRange::getTokenRange(SelectedRange->getEnd()),
122 *Result.SourceManager, Result.Context->getLangOpts());
123 if (Range.isInvalid())
124 return invalidArgumentError(
125 "after: can't resolve sub-range to valid source range");
126 End = Range.getEnd();
127 }
128
129 return CharSourceRange::getCharRange(End);
130 };
131 }
132
node(std::string ID)133 RangeSelector transformer::node(std::string ID) {
134 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
135 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
136 if (!Node)
137 return Node.takeError();
138 return (Node->get<Decl>() != nullptr ||
139 (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr))
140 ? tooling::getExtendedRange(*Node, tok::TokenKind::semi,
141 *Result.Context)
142 : CharSourceRange::getTokenRange(Node->getSourceRange());
143 };
144 }
145
statement(std::string ID)146 RangeSelector transformer::statement(std::string ID) {
147 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
148 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
149 if (!Node)
150 return Node.takeError();
151 return tooling::getExtendedRange(*Node, tok::TokenKind::semi,
152 *Result.Context);
153 };
154 }
155
enclose(RangeSelector Begin,RangeSelector End)156 RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) {
157 return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
158 Expected<CharSourceRange> BeginRange = Begin(Result);
159 if (!BeginRange)
160 return BeginRange.takeError();
161 Expected<CharSourceRange> EndRange = End(Result);
162 if (!EndRange)
163 return EndRange.takeError();
164 SourceLocation B = BeginRange->getBegin();
165 SourceLocation E = EndRange->getEnd();
166 // Note: we are precluding the possibility of sub-token ranges in the case
167 // that EndRange is a token range.
168 if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) {
169 return invalidArgumentError("Bad range: out of order");
170 }
171 return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
172 };
173 }
174
encloseNodes(std::string BeginID,std::string EndID)175 RangeSelector transformer::encloseNodes(std::string BeginID,
176 std::string EndID) {
177 return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
178 }
179
member(std::string ID)180 RangeSelector transformer::member(std::string ID) {
181 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
182 Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
183 if (!Node)
184 return Node.takeError();
185 if (auto *M = Node->get<clang::MemberExpr>())
186 return CharSourceRange::getTokenRange(
187 M->getMemberNameInfo().getSourceRange());
188 return typeError(ID, Node->getNodeKind(), "MemberExpr");
189 };
190 }
191
name(std::string ID)192 RangeSelector transformer::name(std::string ID) {
193 return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
194 Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
195 if (!N)
196 return N.takeError();
197 auto &Node = *N;
198 if (const auto *D = Node.get<NamedDecl>()) {
199 if (!D->getDeclName().isIdentifier())
200 return missingPropertyError(ID, "name", "identifier");
201 SourceLocation L = D->getLocation();
202 auto R = CharSourceRange::getTokenRange(L, L);
203 // Verify that the range covers exactly the name.
204 // FIXME: extend this code to support cases like `operator +` or
205 // `foo<int>` for which this range will be too short. Doing so will
206 // require subcasing `NamedDecl`, because it doesn't provide virtual
207 // access to the \c DeclarationNameInfo.
208 if (tooling::getText(R, *Result.Context) != D->getName())
209 return CharSourceRange();
210 return R;
211 }
212 if (const auto *E = Node.get<DeclRefExpr>()) {
213 if (!E->getNameInfo().getName().isIdentifier())
214 return missingPropertyError(ID, "name", "identifier");
215 SourceLocation L = E->getLocation();
216 return CharSourceRange::getTokenRange(L, L);
217 }
218 if (const auto *I = Node.get<CXXCtorInitializer>()) {
219 if (!I->isMemberInitializer() && I->isWritten())
220 return missingPropertyError(ID, "name", "explicit member initializer");
221 SourceLocation L = I->getMemberLocation();
222 return CharSourceRange::getTokenRange(L, L);
223 }
224 if (const auto *T = Node.get<TypeLoc>()) {
225 TypeLoc Loc = *T;
226 auto ET = Loc.getAs<ElaboratedTypeLoc>();
227 if (!ET.isNull())
228 Loc = ET.getNamedTypeLoc();
229 if (auto SpecLoc = Loc.getAs<TemplateSpecializationTypeLoc>();
230 !SpecLoc.isNull())
231 return CharSourceRange::getTokenRange(SpecLoc.getTemplateNameLoc());
232 return CharSourceRange::getTokenRange(Loc.getSourceRange());
233 }
234 return typeError(ID, Node.getNodeKind(),
235 "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc");
236 };
237 }
238
239 namespace {
240 // FIXME: make this available in the public API for users to easily create their
241 // own selectors.
242
243 // Creates a selector from a range-selection function \p Func, which selects a
244 // range that is relative to a bound node id. \c T is the node type expected by
245 // \p Func.
246 template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
247 class RelativeSelector {
248 std::string ID;
249
250 public:
RelativeSelector(std::string ID)251 RelativeSelector(std::string ID) : ID(std::move(ID)) {}
252
operator ()(const MatchResult & Result)253 Expected<CharSourceRange> operator()(const MatchResult &Result) {
254 Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
255 if (!N)
256 return N.takeError();
257 if (const auto *Arg = N->get<T>())
258 return Func(Result, *Arg);
259 return typeError(ID, N->getNodeKind());
260 }
261 };
262 } // namespace
263
264 // FIXME: Change the following functions from being in an anonymous namespace
265 // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
266 // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
267 // namespace works around a bug in earlier versions.
268 namespace {
269 // Returns the range of the statements (all source between the braces).
getStatementsRange(const MatchResult &,const CompoundStmt & CS)270 CharSourceRange getStatementsRange(const MatchResult &,
271 const CompoundStmt &CS) {
272 return CharSourceRange::getCharRange(CS.getLBracLoc().getLocWithOffset(1),
273 CS.getRBracLoc());
274 }
275 } // namespace
276
statements(std::string ID)277 RangeSelector transformer::statements(std::string ID) {
278 return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
279 }
280
281 namespace {
282
findArgStartDelimiter(const CallExpr & E,SourceLocation RLoc,const SourceManager & SM,const LangOptions & LangOpts)283 SourceLocation findArgStartDelimiter(const CallExpr &E, SourceLocation RLoc,
284 const SourceManager &SM,
285 const LangOptions &LangOpts) {
286 SourceLocation Loc = E.getNumArgs() == 0 ? RLoc : E.getArg(0)->getBeginLoc();
287 return findPreviousTokenKind(Loc, SM, LangOpts, tok::TokenKind::l_paren);
288 }
289
290 // Returns the location after the last argument of the construct expr. Returns
291 // an invalid location if there are no arguments.
findLastArgEnd(const CXXConstructExpr & CE,const SourceManager & SM,const LangOptions & LangOpts)292 SourceLocation findLastArgEnd(const CXXConstructExpr &CE,
293 const SourceManager &SM,
294 const LangOptions &LangOpts) {
295 for (int i = CE.getNumArgs() - 1; i >= 0; --i) {
296 const Expr *Arg = CE.getArg(i);
297 if (isa<CXXDefaultArgExpr>(Arg))
298 continue;
299 return Lexer::getLocForEndOfToken(Arg->getEndLoc(), 0, SM, LangOpts);
300 }
301 return {};
302 }
303
304 // Returns the range of the source between the call's parentheses/braces.
getCallArgumentsRange(const MatchResult & Result,const CallExpr & CE)305 CharSourceRange getCallArgumentsRange(const MatchResult &Result,
306 const CallExpr &CE) {
307 const SourceLocation RLoc = CE.getRParenLoc();
308 return CharSourceRange::getCharRange(
309 findArgStartDelimiter(CE, RLoc, *Result.SourceManager,
310 Result.Context->getLangOpts())
311 .getLocWithOffset(1),
312 RLoc);
313 }
314
315 // Returns the range of the source between the construct expr's
316 // parentheses/braces.
getConstructArgumentsRange(const MatchResult & Result,const CXXConstructExpr & CE)317 CharSourceRange getConstructArgumentsRange(const MatchResult &Result,
318 const CXXConstructExpr &CE) {
319 if (SourceRange R = CE.getParenOrBraceRange(); R.isValid()) {
320 return CharSourceRange::getCharRange(
321 Lexer::getLocForEndOfToken(R.getBegin(), 0, *Result.SourceManager,
322 Result.Context->getLangOpts()),
323 R.getEnd());
324 }
325
326 if (CE.getNumArgs() > 0) {
327 return CharSourceRange::getCharRange(
328 CE.getArg(0)->getBeginLoc(),
329 findLastArgEnd(CE, *Result.SourceManager,
330 Result.Context->getLangOpts()));
331 }
332
333 return {};
334 }
335
336 } // namespace
337
callArgs(std::string ID)338 RangeSelector transformer::callArgs(std::string ID) {
339 return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
340 }
341
constructExprArgs(std::string ID)342 RangeSelector transformer::constructExprArgs(std::string ID) {
343 return RelativeSelector<CXXConstructExpr, getConstructArgumentsRange>(
344 std::move(ID));
345 }
346
347 namespace {
348 // Returns the range of the elements of the initializer list. Includes all
349 // source between the braces.
getElementsRange(const MatchResult &,const InitListExpr & E)350 CharSourceRange getElementsRange(const MatchResult &,
351 const InitListExpr &E) {
352 return CharSourceRange::getCharRange(E.getLBraceLoc().getLocWithOffset(1),
353 E.getRBraceLoc());
354 }
355 } // namespace
356
initListElements(std::string ID)357 RangeSelector transformer::initListElements(std::string ID) {
358 return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
359 }
360
361 namespace {
362 // Returns the range of the else branch, including the `else` keyword.
getElseRange(const MatchResult & Result,const IfStmt & S)363 CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
364 return tooling::maybeExtendRange(
365 CharSourceRange::getTokenRange(S.getElseLoc(), S.getEndLoc()),
366 tok::TokenKind::semi, *Result.Context);
367 }
368 } // namespace
369
elseBranch(std::string ID)370 RangeSelector transformer::elseBranch(std::string ID) {
371 return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
372 }
373
expansion(RangeSelector S)374 RangeSelector transformer::expansion(RangeSelector S) {
375 return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
376 Expected<CharSourceRange> SRange = S(Result);
377 if (!SRange)
378 return SRange.takeError();
379 return Result.SourceManager->getExpansionRange(*SRange);
380 };
381 }
382