xref: /freebsd/contrib/llvm-project/clang/lib/Tooling/Transformer/Parsing.cpp (revision 02e9120893770924227138ba49df1edb3896112a)
1 //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/Transformer/Parsing.h"
10 #include "clang/AST/Expr.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Basic/CharInfo.h"
13 #include "clang/Basic/SourceLocation.h"
14 #include "clang/Lex/Lexer.h"
15 #include "clang/Tooling/Transformer/RangeSelector.h"
16 #include "clang/Tooling/Transformer/SourceCode.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Support/Errc.h"
20 #include "llvm/Support/Error.h"
21 #include <optional>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 
26 using namespace clang;
27 using namespace transformer;
28 
29 // FIXME: This implementation is entirely separate from that of the AST
30 // matchers. Given the similarity of the languages and uses of the two parsers,
31 // the two should share a common parsing infrastructure, as should other
32 // Transformer types. We intend to unify this implementation soon to share as
33 // much as possible with the AST Matchers parsing.
34 
35 namespace {
36 using llvm::Expected;
37 
38 template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
39 
40 struct ParseState {
41   // The remaining input to be processed.
42   StringRef Input;
43   // The original input. Not modified during parsing; only for reference in
44   // error reporting.
45   StringRef OriginalInput;
46 };
47 
48 // Represents an intermediate result returned by a parsing function. Functions
49 // that don't generate values should use `std::nullopt`
50 template <typename ResultType> struct ParseProgress {
51   ParseState State;
52   // Intermediate result generated by the Parser.
53   ResultType Value;
54 };
55 
56 template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
57 template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
58 
59 class ParseError : public llvm::ErrorInfo<ParseError> {
60 public:
61   // Required field for all ErrorInfo derivatives.
62   static char ID;
63 
64   ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
65       : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
66         Excerpt(std::move(InputExcerpt)) {}
67 
68   void log(llvm::raw_ostream &OS) const override {
69     OS << "parse error at position (" << Pos << "): " << ErrorMsg
70        << ": " + Excerpt;
71   }
72 
73   std::error_code convertToErrorCode() const override {
74     return llvm::inconvertibleErrorCode();
75   }
76 
77   // Position of the error in the input string.
78   size_t Pos;
79   std::string ErrorMsg;
80   // Excerpt of the input starting at the error position.
81   std::string Excerpt;
82 };
83 
84 char ParseError::ID;
85 } // namespace
86 
87 static const llvm::StringMap<RangeSelectorOp<std::string>> &
88 getUnaryStringSelectors() {
89   static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
90       {"name", name},
91       {"node", node},
92       {"statement", statement},
93       {"statements", statements},
94       {"member", member},
95       {"callArgs", callArgs},
96       {"elseBranch", elseBranch},
97       {"initListElements", initListElements}};
98   return M;
99 }
100 
101 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
102 getUnaryRangeSelectors() {
103   static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
104       {"before", before}, {"after", after}, {"expansion", expansion}};
105   return M;
106 }
107 
108 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
109 getBinaryStringSelectors() {
110   static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
111       {"encloseNodes", encloseNodes}};
112   return M;
113 }
114 
115 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
116 getBinaryRangeSelectors() {
117   static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
118       M = {{"enclose", enclose}, {"between", between}};
119   return M;
120 }
121 
122 template <typename Element>
123 std::optional<Element> findOptional(const llvm::StringMap<Element> &Map,
124                                     llvm::StringRef Key) {
125   auto it = Map.find(Key);
126   if (it == Map.end())
127     return std::nullopt;
128   return it->second;
129 }
130 
131 template <typename ResultType>
132 ParseProgress<ResultType> makeParseProgress(ParseState State,
133                                             ResultType Result) {
134   return ParseProgress<ResultType>{State, std::move(Result)};
135 }
136 
137 static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
138   size_t Pos = S.OriginalInput.size() - S.Input.size();
139   return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
140                                       S.OriginalInput.substr(Pos, 20).str());
141 }
142 
143 // Returns a new ParseState that advances \c S by \c N characters.
144 static ParseState advance(ParseState S, size_t N) {
145   S.Input = S.Input.drop_front(N);
146   return S;
147 }
148 
149 static StringRef consumeWhitespace(StringRef S) {
150   return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
151 }
152 
153 // Parses a single expected character \c c from \c State, skipping preceding
154 // whitespace.  Error if the expected character isn't found.
155 static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) {
156   State.Input = consumeWhitespace(State.Input);
157   if (State.Input.empty() || State.Input.front() != c)
158     return makeParseError(State,
159                           ("expected char not found: " + llvm::Twine(c)).str());
160   return makeParseProgress(advance(State, 1), std::nullopt);
161 }
162 
163 // Parses an identitifer "token" -- handles preceding whitespace.
164 static ExpectedProgress<std::string> parseId(ParseState State) {
165   State.Input = consumeWhitespace(State.Input);
166   auto Id = State.Input.take_while(
167       [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); });
168   if (Id.empty())
169     return makeParseError(State, "failed to parse name");
170   return makeParseProgress(advance(State, Id.size()), Id.str());
171 }
172 
173 // For consistency with the AST matcher parser and C++ code, node ids are
174 // written as strings. However, we do not support escaping in the string.
175 static ExpectedProgress<std::string> parseStringId(ParseState State) {
176   State.Input = consumeWhitespace(State.Input);
177   if (State.Input.empty())
178     return makeParseError(State, "unexpected end of input");
179   if (!State.Input.consume_front("\""))
180     return makeParseError(
181         State,
182         "expecting string, but encountered other character or end of input");
183 
184   StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
185   if (State.Input.size() == Id.size())
186     return makeParseError(State, "unterminated string");
187   // Advance past the trailing quote as well.
188   return makeParseProgress(advance(State, Id.size() + 1), Id.str());
189 }
190 
191 // Parses a single element surrounded by parens. `Op` is applied to the parsed
192 // result to create the result of this function call.
193 template <typename T>
194 ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
195                                             RangeSelectorOp<T> Op,
196                                             ParseState State) {
197   auto P = parseChar('(', State);
198   if (!P)
199     return P.takeError();
200 
201   auto E = ParseElement(P->State);
202   if (!E)
203     return E.takeError();
204 
205   P = parseChar(')', E->State);
206   if (!P)
207     return P.takeError();
208 
209   return makeParseProgress(P->State, Op(std::move(E->Value)));
210 }
211 
212 // Parses a pair of elements surrounded by parens and separated by comma. `Op`
213 // is applied to the parsed results to create the result of this function call.
214 template <typename T>
215 ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
216                                           RangeSelectorOp<T, T> Op,
217                                           ParseState State) {
218   auto P = parseChar('(', State);
219   if (!P)
220     return P.takeError();
221 
222   auto Left = ParseElement(P->State);
223   if (!Left)
224     return Left.takeError();
225 
226   P = parseChar(',', Left->State);
227   if (!P)
228     return P.takeError();
229 
230   auto Right = ParseElement(P->State);
231   if (!Right)
232     return Right.takeError();
233 
234   P = parseChar(')', Right->State);
235   if (!P)
236     return P.takeError();
237 
238   return makeParseProgress(P->State,
239                            Op(std::move(Left->Value), std::move(Right->Value)));
240 }
241 
242 // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
243 // Id operator). Returns StencilType representing the operator on success and
244 // error if it fails to parse input for an operator.
245 static ExpectedProgress<RangeSelector>
246 parseRangeSelectorImpl(ParseState State) {
247   auto Id = parseId(State);
248   if (!Id)
249     return Id.takeError();
250 
251   std::string OpName = std::move(Id->Value);
252   if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
253     return parseSingle(parseStringId, *Op, Id->State);
254 
255   if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
256     return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
257 
258   if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
259     return parsePair(parseStringId, *Op, Id->State);
260 
261   if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
262     return parsePair(parseRangeSelectorImpl, *Op, Id->State);
263 
264   return makeParseError(State, "unknown selector name: " + OpName);
265 }
266 
267 Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
268   ParseState State = {Input, Input};
269   ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
270   if (!Result)
271     return Result.takeError();
272   State = Result->State;
273   // Discard any potentially trailing whitespace.
274   State.Input = consumeWhitespace(State.Input);
275   if (State.Input.empty())
276     return Result->Value;
277   return makeParseError(State, "unexpected input after selector");
278 }
279