xref: /freebsd/contrib/llvm-project/clang/lib/Tooling/Transformer/Parsing.cpp (revision 9c77fb6aaa366cbabc80ee1b834bcfe4df135491)
1 //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/Transformer/Parsing.h"
10 #include "clang/Basic/CharInfo.h"
11 #include "clang/Tooling/Transformer/RangeSelector.h"
12 #include "llvm/ADT/StringMap.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/Error.h"
15 #include <optional>
16 #include <string>
17 #include <utility>
18 
19 using namespace clang;
20 using namespace transformer;
21 
22 // FIXME: This implementation is entirely separate from that of the AST
23 // matchers. Given the similarity of the languages and uses of the two parsers,
24 // the two should share a common parsing infrastructure, as should other
25 // Transformer types. We intend to unify this implementation soon to share as
26 // much as possible with the AST Matchers parsing.
27 
28 namespace {
29 using llvm::Expected;
30 
31 template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
32 
33 struct ParseState {
34   // The remaining input to be processed.
35   StringRef Input;
36   // The original input. Not modified during parsing; only for reference in
37   // error reporting.
38   StringRef OriginalInput;
39 };
40 
41 // Represents an intermediate result returned by a parsing function. Functions
42 // that don't generate values should use `std::nullopt`
43 template <typename ResultType> struct ParseProgress {
44   ParseState State;
45   // Intermediate result generated by the Parser.
46   ResultType Value;
47 };
48 
49 template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
50 template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
51 
52 class ParseError : public llvm::ErrorInfo<ParseError> {
53 public:
54   // Required field for all ErrorInfo derivatives.
55   static char ID;
56 
57   ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
58       : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
59         Excerpt(std::move(InputExcerpt)) {}
60 
61   void log(llvm::raw_ostream &OS) const override {
62     OS << "parse error at position (" << Pos << "): " << ErrorMsg
63        << ": " + Excerpt;
64   }
65 
66   std::error_code convertToErrorCode() const override {
67     return llvm::inconvertibleErrorCode();
68   }
69 
70   // Position of the error in the input string.
71   size_t Pos;
72   std::string ErrorMsg;
73   // Excerpt of the input starting at the error position.
74   std::string Excerpt;
75 };
76 
77 char ParseError::ID;
78 } // namespace
79 
80 static const llvm::StringMap<RangeSelectorOp<std::string>> &
81 getUnaryStringSelectors() {
82   static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
83       {"name", name},
84       {"node", node},
85       {"statement", statement},
86       {"statements", statements},
87       {"member", member},
88       {"callArgs", callArgs},
89       {"elseBranch", elseBranch},
90       {"initListElements", initListElements}};
91   return M;
92 }
93 
94 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
95 getUnaryRangeSelectors() {
96   static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
97       {"before", before}, {"after", after}, {"expansion", expansion}};
98   return M;
99 }
100 
101 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
102 getBinaryStringSelectors() {
103   static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
104       {"encloseNodes", encloseNodes}};
105   return M;
106 }
107 
108 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
109 getBinaryRangeSelectors() {
110   static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
111       M = {{"enclose", enclose}, {"between", between}};
112   return M;
113 }
114 
115 template <typename Element>
116 std::optional<Element> findOptional(const llvm::StringMap<Element> &Map,
117                                     llvm::StringRef Key) {
118   auto it = Map.find(Key);
119   if (it == Map.end())
120     return std::nullopt;
121   return it->second;
122 }
123 
124 template <typename ResultType>
125 ParseProgress<ResultType> makeParseProgress(ParseState State,
126                                             ResultType Result) {
127   return ParseProgress<ResultType>{State, std::move(Result)};
128 }
129 
130 static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
131   size_t Pos = S.OriginalInput.size() - S.Input.size();
132   return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
133                                       S.OriginalInput.substr(Pos, 20).str());
134 }
135 
136 // Returns a new ParseState that advances \c S by \c N characters.
137 static ParseState advance(ParseState S, size_t N) {
138   S.Input = S.Input.drop_front(N);
139   return S;
140 }
141 
142 static StringRef consumeWhitespace(StringRef S) {
143   return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
144 }
145 
146 // Parses a single expected character \c c from \c State, skipping preceding
147 // whitespace.  Error if the expected character isn't found.
148 static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) {
149   State.Input = consumeWhitespace(State.Input);
150   if (State.Input.empty() || State.Input.front() != c)
151     return makeParseError(State,
152                           ("expected char not found: " + llvm::Twine(c)).str());
153   return makeParseProgress(advance(State, 1), std::nullopt);
154 }
155 
156 // Parses an identitifer "token" -- handles preceding whitespace.
157 static ExpectedProgress<std::string> parseId(ParseState State) {
158   State.Input = consumeWhitespace(State.Input);
159   auto Id = State.Input.take_while(
160       [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); });
161   if (Id.empty())
162     return makeParseError(State, "failed to parse name");
163   return makeParseProgress(advance(State, Id.size()), Id.str());
164 }
165 
166 // For consistency with the AST matcher parser and C++ code, node ids are
167 // written as strings. However, we do not support escaping in the string.
168 static ExpectedProgress<std::string> parseStringId(ParseState State) {
169   State.Input = consumeWhitespace(State.Input);
170   if (State.Input.empty())
171     return makeParseError(State, "unexpected end of input");
172   if (!State.Input.consume_front("\""))
173     return makeParseError(
174         State,
175         "expecting string, but encountered other character or end of input");
176 
177   StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
178   if (State.Input.size() == Id.size())
179     return makeParseError(State, "unterminated string");
180   // Advance past the trailing quote as well.
181   return makeParseProgress(advance(State, Id.size() + 1), Id.str());
182 }
183 
184 // Parses a single element surrounded by parens. `Op` is applied to the parsed
185 // result to create the result of this function call.
186 template <typename T>
187 ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
188                                             RangeSelectorOp<T> Op,
189                                             ParseState State) {
190   auto P = parseChar('(', State);
191   if (!P)
192     return P.takeError();
193 
194   auto E = ParseElement(P->State);
195   if (!E)
196     return E.takeError();
197 
198   P = parseChar(')', E->State);
199   if (!P)
200     return P.takeError();
201 
202   return makeParseProgress(P->State, Op(std::move(E->Value)));
203 }
204 
205 // Parses a pair of elements surrounded by parens and separated by comma. `Op`
206 // is applied to the parsed results to create the result of this function call.
207 template <typename T>
208 ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
209                                           RangeSelectorOp<T, T> Op,
210                                           ParseState State) {
211   auto P = parseChar('(', State);
212   if (!P)
213     return P.takeError();
214 
215   auto Left = ParseElement(P->State);
216   if (!Left)
217     return Left.takeError();
218 
219   P = parseChar(',', Left->State);
220   if (!P)
221     return P.takeError();
222 
223   auto Right = ParseElement(P->State);
224   if (!Right)
225     return Right.takeError();
226 
227   P = parseChar(')', Right->State);
228   if (!P)
229     return P.takeError();
230 
231   return makeParseProgress(P->State,
232                            Op(std::move(Left->Value), std::move(Right->Value)));
233 }
234 
235 // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
236 // Id operator). Returns StencilType representing the operator on success and
237 // error if it fails to parse input for an operator.
238 static ExpectedProgress<RangeSelector>
239 parseRangeSelectorImpl(ParseState State) {
240   auto Id = parseId(State);
241   if (!Id)
242     return Id.takeError();
243 
244   std::string OpName = std::move(Id->Value);
245   if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
246     return parseSingle(parseStringId, *Op, Id->State);
247 
248   if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
249     return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
250 
251   if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
252     return parsePair(parseStringId, *Op, Id->State);
253 
254   if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
255     return parsePair(parseRangeSelectorImpl, *Op, Id->State);
256 
257   return makeParseError(State, "unknown selector name: " + OpName);
258 }
259 
260 Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
261   ParseState State = {Input, Input};
262   ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
263   if (!Result)
264     return Result.takeError();
265   State = Result->State;
266   // Discard any potentially trailing whitespace.
267   State.Input = consumeWhitespace(State.Input);
268   if (State.Input.empty())
269     return Result->Value;
270   return makeParseError(State, "unexpected input after selector");
271 }
272