xref: /freebsd/contrib/llvm-project/clang/lib/Tooling/Transformer/RangeSelector.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/Transformer/RangeSelector.h"
10 #include "clang/AST/Expr.h"
11 #include "clang/AST/TypeLoc.h"
12 #include "clang/ASTMatchers/ASTMatchFinder.h"
13 #include "clang/Basic/SourceLocation.h"
14 #include "clang/Lex/Lexer.h"
15 #include "clang/Tooling/Transformer/SourceCode.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/Errc.h"
18 #include "llvm/Support/Error.h"
19 #include <string>
20 #include <utility>
21 
22 using namespace clang;
23 using namespace transformer;
24 
25 using ast_matchers::MatchFinder;
26 using llvm::Error;
27 using llvm::StringError;
28 
29 using MatchResult = MatchFinder::MatchResult;
30 
invalidArgumentError(Twine Message)31 static Error invalidArgumentError(Twine Message) {
32   return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message);
33 }
34 
typeError(StringRef ID,const ASTNodeKind & Kind)35 static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
36   return invalidArgumentError("mismatched type (node id=" + ID +
37                               " kind=" + Kind.asStringRef() + ")");
38 }
39 
typeError(StringRef ID,const ASTNodeKind & Kind,Twine ExpectedType)40 static Error typeError(StringRef ID, const ASTNodeKind &Kind,
41                        Twine ExpectedType) {
42   return invalidArgumentError("mismatched type: expected one of " +
43                               ExpectedType + " (node id=" + ID +
44                               " kind=" + Kind.asStringRef() + ")");
45 }
46 
missingPropertyError(StringRef ID,Twine Description,StringRef Property)47 static Error missingPropertyError(StringRef ID, Twine Description,
48                                   StringRef Property) {
49   return invalidArgumentError(Description + " requires property '" + Property +
50                               "' (node id=" + ID + ")");
51 }
52 
getNode(const ast_matchers::BoundNodes & Nodes,StringRef ID)53 static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes,
54                                       StringRef ID) {
55   auto &NodesMap = Nodes.getMap();
56   auto It = NodesMap.find(ID);
57   if (It == NodesMap.end())
58     return invalidArgumentError("ID not bound: " + ID);
59   return It->second;
60 }
61 
62 // FIXME: handling of macros should be configurable.
findPreviousTokenStart(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts)63 static SourceLocation findPreviousTokenStart(SourceLocation Start,
64                                              const SourceManager &SM,
65                                              const LangOptions &LangOpts) {
66   if (Start.isInvalid() || Start.isMacroID())
67     return SourceLocation();
68 
69   SourceLocation BeforeStart = Start.getLocWithOffset(-1);
70   if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
71     return SourceLocation();
72 
73   return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
74 }
75 
76 // Finds the start location of the previous token of kind \p TK.
77 // FIXME: handling of macros should be configurable.
findPreviousTokenKind(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts,tok::TokenKind TK)78 static SourceLocation findPreviousTokenKind(SourceLocation Start,
79                                             const SourceManager &SM,
80                                             const LangOptions &LangOpts,
81                                             tok::TokenKind TK) {
82   while (true) {
83     SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
84     if (L.isInvalid() || L.isMacroID())
85       return SourceLocation();
86 
87     Token T;
88     if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
89       return SourceLocation();
90 
91     if (T.is(TK))
92       return T.getLocation();
93 
94     Start = L;
95   }
96 }
97 
before(RangeSelector Selector)98 RangeSelector transformer::before(RangeSelector Selector) {
99   return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
100     Expected<CharSourceRange> SelectedRange = Selector(Result);
101     if (!SelectedRange)
102       return SelectedRange.takeError();
103     return CharSourceRange::getCharRange(SelectedRange->getBegin());
104   };
105 }
106 
after(RangeSelector Selector)107 RangeSelector transformer::after(RangeSelector Selector) {
108   return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
109     Expected<CharSourceRange> SelectedRange = Selector(Result);
110     if (!SelectedRange)
111       return SelectedRange.takeError();
112     SourceLocation End = SelectedRange->getEnd();
113     if (SelectedRange->isTokenRange()) {
114       // We need to find the actual (exclusive) end location from which to
115       // create a new source range. However, that's not guaranteed to be valid,
116       // even if the token location itself is valid. So, we create a token range
117       // consisting only of the last token, then map that range back to the
118       // source file. If that succeeds, we have a valid location for the end of
119       // the generated range.
120       CharSourceRange Range = Lexer::makeFileCharRange(
121           CharSourceRange::getTokenRange(SelectedRange->getEnd()),
122           *Result.SourceManager, Result.Context->getLangOpts());
123       if (Range.isInvalid())
124         return invalidArgumentError(
125             "after: can't resolve sub-range to valid source range");
126       End = Range.getEnd();
127     }
128 
129     return CharSourceRange::getCharRange(End);
130   };
131 }
132 
node(std::string ID)133 RangeSelector transformer::node(std::string ID) {
134   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
135     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
136     if (!Node)
137       return Node.takeError();
138     return (Node->get<Decl>() != nullptr ||
139             (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr))
140                ? tooling::getExtendedRange(*Node, tok::TokenKind::semi,
141                                            *Result.Context)
142                : CharSourceRange::getTokenRange(Node->getSourceRange());
143   };
144 }
145 
statement(std::string ID)146 RangeSelector transformer::statement(std::string ID) {
147   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
148     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
149     if (!Node)
150       return Node.takeError();
151     return tooling::getExtendedRange(*Node, tok::TokenKind::semi,
152                                      *Result.Context);
153   };
154 }
155 
enclose(RangeSelector Begin,RangeSelector End)156 RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) {
157   return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
158     Expected<CharSourceRange> BeginRange = Begin(Result);
159     if (!BeginRange)
160       return BeginRange.takeError();
161     Expected<CharSourceRange> EndRange = End(Result);
162     if (!EndRange)
163       return EndRange.takeError();
164     SourceLocation B = BeginRange->getBegin();
165     SourceLocation E = EndRange->getEnd();
166     // Note: we are precluding the possibility of sub-token ranges in the case
167     // that EndRange is a token range.
168     if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) {
169       return invalidArgumentError("Bad range: out of order");
170     }
171     return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
172   };
173 }
174 
encloseNodes(std::string BeginID,std::string EndID)175 RangeSelector transformer::encloseNodes(std::string BeginID,
176                                         std::string EndID) {
177   return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
178 }
179 
member(std::string ID)180 RangeSelector transformer::member(std::string ID) {
181   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
182     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
183     if (!Node)
184       return Node.takeError();
185     if (auto *M = Node->get<clang::MemberExpr>())
186       return CharSourceRange::getTokenRange(
187           M->getMemberNameInfo().getSourceRange());
188     return typeError(ID, Node->getNodeKind(), "MemberExpr");
189   };
190 }
191 
name(std::string ID)192 RangeSelector transformer::name(std::string ID) {
193   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
194     Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
195     if (!N)
196       return N.takeError();
197     auto &Node = *N;
198     if (const auto *D = Node.get<NamedDecl>()) {
199       if (!D->getDeclName().isIdentifier())
200         return missingPropertyError(ID, "name", "identifier");
201       SourceLocation L = D->getLocation();
202       auto R = CharSourceRange::getTokenRange(L, L);
203       // Verify that the range covers exactly the name.
204       // FIXME: extend this code to support cases like `operator +` or
205       // `foo<int>` for which this range will be too short.  Doing so will
206       // require subcasing `NamedDecl`, because it doesn't provide virtual
207       // access to the \c DeclarationNameInfo.
208       if (tooling::getText(R, *Result.Context) != D->getName())
209         return CharSourceRange();
210       return R;
211     }
212     if (const auto *E = Node.get<DeclRefExpr>()) {
213       if (!E->getNameInfo().getName().isIdentifier())
214         return missingPropertyError(ID, "name", "identifier");
215       SourceLocation L = E->getLocation();
216       return CharSourceRange::getTokenRange(L, L);
217     }
218     if (const auto *I = Node.get<CXXCtorInitializer>()) {
219       if (!I->isMemberInitializer() && I->isWritten())
220         return missingPropertyError(ID, "name", "explicit member initializer");
221       SourceLocation L = I->getMemberLocation();
222       return CharSourceRange::getTokenRange(L, L);
223     }
224     if (const auto *T = Node.get<TypeLoc>()) {
225       TypeLoc Loc = *T;
226       auto ET = Loc.getAs<ElaboratedTypeLoc>();
227       if (!ET.isNull())
228         Loc = ET.getNamedTypeLoc();
229       if (auto SpecLoc = Loc.getAs<TemplateSpecializationTypeLoc>();
230           !SpecLoc.isNull())
231         return CharSourceRange::getTokenRange(SpecLoc.getTemplateNameLoc());
232       return CharSourceRange::getTokenRange(Loc.getSourceRange());
233     }
234     return typeError(ID, Node.getNodeKind(),
235                      "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc");
236   };
237 }
238 
239 namespace {
240 // FIXME: make this available in the public API for users to easily create their
241 // own selectors.
242 
243 // Creates a selector from a range-selection function \p Func, which selects a
244 // range that is relative to a bound node id.  \c T is the node type expected by
245 // \p Func.
246 template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
247 class RelativeSelector {
248   std::string ID;
249 
250 public:
RelativeSelector(std::string ID)251   RelativeSelector(std::string ID) : ID(std::move(ID)) {}
252 
operator ()(const MatchResult & Result)253   Expected<CharSourceRange> operator()(const MatchResult &Result) {
254     Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
255     if (!N)
256       return N.takeError();
257     if (const auto *Arg = N->get<T>())
258       return Func(Result, *Arg);
259     return typeError(ID, N->getNodeKind());
260   }
261 };
262 } // namespace
263 
264 // FIXME: Change the following functions from being in an anonymous namespace
265 // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
266 // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
267 // namespace works around a bug in earlier versions.
268 namespace {
269 // Returns the range of the statements (all source between the braces).
getStatementsRange(const MatchResult &,const CompoundStmt & CS)270 CharSourceRange getStatementsRange(const MatchResult &,
271                                    const CompoundStmt &CS) {
272   return CharSourceRange::getCharRange(CS.getLBracLoc().getLocWithOffset(1),
273                                        CS.getRBracLoc());
274 }
275 } // namespace
276 
statements(std::string ID)277 RangeSelector transformer::statements(std::string ID) {
278   return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
279 }
280 
281 namespace {
282 
findArgStartDelimiter(const CallExpr & E,SourceLocation RLoc,const SourceManager & SM,const LangOptions & LangOpts)283 SourceLocation findArgStartDelimiter(const CallExpr &E, SourceLocation RLoc,
284                                      const SourceManager &SM,
285                                      const LangOptions &LangOpts) {
286   SourceLocation Loc = E.getNumArgs() == 0 ? RLoc : E.getArg(0)->getBeginLoc();
287   return findPreviousTokenKind(Loc, SM, LangOpts, tok::TokenKind::l_paren);
288 }
289 
290 // Returns the location after the last argument of the construct expr. Returns
291 // an invalid location if there are no arguments.
findLastArgEnd(const CXXConstructExpr & CE,const SourceManager & SM,const LangOptions & LangOpts)292 SourceLocation findLastArgEnd(const CXXConstructExpr &CE,
293                               const SourceManager &SM,
294                               const LangOptions &LangOpts) {
295   for (int i = CE.getNumArgs() - 1; i >= 0; --i) {
296     const Expr *Arg = CE.getArg(i);
297     if (isa<CXXDefaultArgExpr>(Arg))
298       continue;
299     return Lexer::getLocForEndOfToken(Arg->getEndLoc(), 0, SM, LangOpts);
300   }
301   return {};
302 }
303 
304 // Returns the range of the source between the call's parentheses/braces.
getCallArgumentsRange(const MatchResult & Result,const CallExpr & CE)305 CharSourceRange getCallArgumentsRange(const MatchResult &Result,
306                                       const CallExpr &CE) {
307   const SourceLocation RLoc = CE.getRParenLoc();
308   return CharSourceRange::getCharRange(
309       findArgStartDelimiter(CE, RLoc, *Result.SourceManager,
310                             Result.Context->getLangOpts())
311           .getLocWithOffset(1),
312       RLoc);
313 }
314 
315 // Returns the range of the source between the construct expr's
316 // parentheses/braces.
getConstructArgumentsRange(const MatchResult & Result,const CXXConstructExpr & CE)317 CharSourceRange getConstructArgumentsRange(const MatchResult &Result,
318                                            const CXXConstructExpr &CE) {
319   if (SourceRange R = CE.getParenOrBraceRange(); R.isValid()) {
320     return CharSourceRange::getCharRange(
321         Lexer::getLocForEndOfToken(R.getBegin(), 0, *Result.SourceManager,
322                                    Result.Context->getLangOpts()),
323         R.getEnd());
324   }
325 
326   if (CE.getNumArgs() > 0) {
327     return CharSourceRange::getCharRange(
328         CE.getArg(0)->getBeginLoc(),
329         findLastArgEnd(CE, *Result.SourceManager,
330                        Result.Context->getLangOpts()));
331   }
332 
333   return {};
334 }
335 
336 } // namespace
337 
callArgs(std::string ID)338 RangeSelector transformer::callArgs(std::string ID) {
339   return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
340 }
341 
constructExprArgs(std::string ID)342 RangeSelector transformer::constructExprArgs(std::string ID) {
343   return RelativeSelector<CXXConstructExpr, getConstructArgumentsRange>(
344       std::move(ID));
345 }
346 
347 namespace {
348 // Returns the range of the elements of the initializer list. Includes all
349 // source between the braces.
getElementsRange(const MatchResult &,const InitListExpr & E)350 CharSourceRange getElementsRange(const MatchResult &,
351                                  const InitListExpr &E) {
352   return CharSourceRange::getCharRange(E.getLBraceLoc().getLocWithOffset(1),
353                                        E.getRBraceLoc());
354 }
355 } // namespace
356 
initListElements(std::string ID)357 RangeSelector transformer::initListElements(std::string ID) {
358   return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
359 }
360 
361 namespace {
362 // Returns the range of the else branch, including the `else` keyword.
getElseRange(const MatchResult & Result,const IfStmt & S)363 CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
364   return tooling::maybeExtendRange(
365       CharSourceRange::getTokenRange(S.getElseLoc(), S.getEndLoc()),
366       tok::TokenKind::semi, *Result.Context);
367 }
368 } // namespace
369 
elseBranch(std::string ID)370 RangeSelector transformer::elseBranch(std::string ID) {
371   return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
372 }
373 
expansion(RangeSelector S)374 RangeSelector transformer::expansion(RangeSelector S) {
375   return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
376     Expected<CharSourceRange> SRange = S(Result);
377     if (!SRange)
378       return SRange.takeError();
379     return Result.SourceManager->getExpansionRange(*SRange);
380   };
381 }
382