1 //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Transformer/Parsing.h" 10 #include "clang/Basic/CharInfo.h" 11 #include "clang/Tooling/Transformer/RangeSelector.h" 12 #include "llvm/ADT/StringMap.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/Error.h" 15 #include <optional> 16 #include <string> 17 #include <utility> 18 19 using namespace clang; 20 using namespace transformer; 21 22 // FIXME: This implementation is entirely separate from that of the AST 23 // matchers. Given the similarity of the languages and uses of the two parsers, 24 // the two should share a common parsing infrastructure, as should other 25 // Transformer types. We intend to unify this implementation soon to share as 26 // much as possible with the AST Matchers parsing. 27 28 namespace { 29 using llvm::Expected; 30 31 template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...); 32 33 struct ParseState { 34 // The remaining input to be processed. 35 StringRef Input; 36 // The original input. Not modified during parsing; only for reference in 37 // error reporting. 38 StringRef OriginalInput; 39 }; 40 41 // Represents an intermediate result returned by a parsing function. Functions 42 // that don't generate values should use `std::nullopt` 43 template <typename ResultType> struct ParseProgress { 44 ParseState State; 45 // Intermediate result generated by the Parser. 46 ResultType Value; 47 }; 48 49 template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>; 50 template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState); 51 52 class ParseError : public llvm::ErrorInfo<ParseError> { 53 public: 54 // Required field for all ErrorInfo derivatives. 55 static char ID; 56 57 ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt) 58 : Pos(Pos), ErrorMsg(std::move(ErrorMsg)), 59 Excerpt(std::move(InputExcerpt)) {} 60 61 void log(llvm::raw_ostream &OS) const override { 62 OS << "parse error at position (" << Pos << "): " << ErrorMsg 63 << ": " + Excerpt; 64 } 65 66 std::error_code convertToErrorCode() const override { 67 return llvm::inconvertibleErrorCode(); 68 } 69 70 // Position of the error in the input string. 71 size_t Pos; 72 std::string ErrorMsg; 73 // Excerpt of the input starting at the error position. 74 std::string Excerpt; 75 }; 76 77 char ParseError::ID; 78 } // namespace 79 80 static const llvm::StringMap<RangeSelectorOp<std::string>> & 81 getUnaryStringSelectors() { 82 static const llvm::StringMap<RangeSelectorOp<std::string>> M = { 83 {"name", name}, 84 {"node", node}, 85 {"statement", statement}, 86 {"statements", statements}, 87 {"member", member}, 88 {"callArgs", callArgs}, 89 {"elseBranch", elseBranch}, 90 {"initListElements", initListElements}}; 91 return M; 92 } 93 94 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> & 95 getUnaryRangeSelectors() { 96 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = { 97 {"before", before}, {"after", after}, {"expansion", expansion}}; 98 return M; 99 } 100 101 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> & 102 getBinaryStringSelectors() { 103 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = { 104 {"encloseNodes", encloseNodes}}; 105 return M; 106 } 107 108 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> & 109 getBinaryRangeSelectors() { 110 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> 111 M = {{"enclose", enclose}, {"between", between}}; 112 return M; 113 } 114 115 template <typename Element> 116 std::optional<Element> findOptional(const llvm::StringMap<Element> &Map, 117 llvm::StringRef Key) { 118 auto it = Map.find(Key); 119 if (it == Map.end()) 120 return std::nullopt; 121 return it->second; 122 } 123 124 template <typename ResultType> 125 ParseProgress<ResultType> makeParseProgress(ParseState State, 126 ResultType Result) { 127 return ParseProgress<ResultType>{State, std::move(Result)}; 128 } 129 130 static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) { 131 size_t Pos = S.OriginalInput.size() - S.Input.size(); 132 return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg), 133 S.OriginalInput.substr(Pos, 20).str()); 134 } 135 136 // Returns a new ParseState that advances \c S by \c N characters. 137 static ParseState advance(ParseState S, size_t N) { 138 S.Input = S.Input.drop_front(N); 139 return S; 140 } 141 142 static StringRef consumeWhitespace(StringRef S) { 143 return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); }); 144 } 145 146 // Parses a single expected character \c c from \c State, skipping preceding 147 // whitespace. Error if the expected character isn't found. 148 static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) { 149 State.Input = consumeWhitespace(State.Input); 150 if (State.Input.empty() || State.Input.front() != c) 151 return makeParseError(State, 152 ("expected char not found: " + llvm::Twine(c)).str()); 153 return makeParseProgress(advance(State, 1), std::nullopt); 154 } 155 156 // Parses an identitifer "token" -- handles preceding whitespace. 157 static ExpectedProgress<std::string> parseId(ParseState State) { 158 State.Input = consumeWhitespace(State.Input); 159 auto Id = State.Input.take_while( 160 [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); }); 161 if (Id.empty()) 162 return makeParseError(State, "failed to parse name"); 163 return makeParseProgress(advance(State, Id.size()), Id.str()); 164 } 165 166 // For consistency with the AST matcher parser and C++ code, node ids are 167 // written as strings. However, we do not support escaping in the string. 168 static ExpectedProgress<std::string> parseStringId(ParseState State) { 169 State.Input = consumeWhitespace(State.Input); 170 if (State.Input.empty()) 171 return makeParseError(State, "unexpected end of input"); 172 if (!State.Input.consume_front("\"")) 173 return makeParseError( 174 State, 175 "expecting string, but encountered other character or end of input"); 176 177 StringRef Id = State.Input.take_until([](char c) { return c == '"'; }); 178 if (State.Input.size() == Id.size()) 179 return makeParseError(State, "unterminated string"); 180 // Advance past the trailing quote as well. 181 return makeParseProgress(advance(State, Id.size() + 1), Id.str()); 182 } 183 184 // Parses a single element surrounded by parens. `Op` is applied to the parsed 185 // result to create the result of this function call. 186 template <typename T> 187 ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement, 188 RangeSelectorOp<T> Op, 189 ParseState State) { 190 auto P = parseChar('(', State); 191 if (!P) 192 return P.takeError(); 193 194 auto E = ParseElement(P->State); 195 if (!E) 196 return E.takeError(); 197 198 P = parseChar(')', E->State); 199 if (!P) 200 return P.takeError(); 201 202 return makeParseProgress(P->State, Op(std::move(E->Value))); 203 } 204 205 // Parses a pair of elements surrounded by parens and separated by comma. `Op` 206 // is applied to the parsed results to create the result of this function call. 207 template <typename T> 208 ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement, 209 RangeSelectorOp<T, T> Op, 210 ParseState State) { 211 auto P = parseChar('(', State); 212 if (!P) 213 return P.takeError(); 214 215 auto Left = ParseElement(P->State); 216 if (!Left) 217 return Left.takeError(); 218 219 P = parseChar(',', Left->State); 220 if (!P) 221 return P.takeError(); 222 223 auto Right = ParseElement(P->State); 224 if (!Right) 225 return Right.takeError(); 226 227 P = parseChar(')', Right->State); 228 if (!P) 229 return P.takeError(); 230 231 return makeParseProgress(P->State, 232 Op(std::move(Left->Value), std::move(Right->Value))); 233 } 234 235 // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or 236 // Id operator). Returns StencilType representing the operator on success and 237 // error if it fails to parse input for an operator. 238 static ExpectedProgress<RangeSelector> 239 parseRangeSelectorImpl(ParseState State) { 240 auto Id = parseId(State); 241 if (!Id) 242 return Id.takeError(); 243 244 std::string OpName = std::move(Id->Value); 245 if (auto Op = findOptional(getUnaryStringSelectors(), OpName)) 246 return parseSingle(parseStringId, *Op, Id->State); 247 248 if (auto Op = findOptional(getUnaryRangeSelectors(), OpName)) 249 return parseSingle(parseRangeSelectorImpl, *Op, Id->State); 250 251 if (auto Op = findOptional(getBinaryStringSelectors(), OpName)) 252 return parsePair(parseStringId, *Op, Id->State); 253 254 if (auto Op = findOptional(getBinaryRangeSelectors(), OpName)) 255 return parsePair(parseRangeSelectorImpl, *Op, Id->State); 256 257 return makeParseError(State, "unknown selector name: " + OpName); 258 } 259 260 Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) { 261 ParseState State = {Input, Input}; 262 ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State); 263 if (!Result) 264 return Result.takeError(); 265 State = Result->State; 266 // Discard any potentially trailing whitespace. 267 State.Input = consumeWhitespace(State.Input); 268 if (State.Input.empty()) 269 return Result->Value; 270 return makeParseError(State, "unexpected input after selector"); 271 } 272