1 //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Transformer/Parsing.h" 10 #include "clang/AST/Expr.h" 11 #include "clang/ASTMatchers/ASTMatchFinder.h" 12 #include "clang/Basic/CharInfo.h" 13 #include "clang/Basic/SourceLocation.h" 14 #include "clang/Lex/Lexer.h" 15 #include "clang/Tooling/Transformer/RangeSelector.h" 16 #include "clang/Tooling/Transformer/SourceCode.h" 17 #include "llvm/ADT/StringMap.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/Support/Errc.h" 20 #include "llvm/Support/Error.h" 21 #include <optional> 22 #include <string> 23 #include <utility> 24 #include <vector> 25 26 using namespace clang; 27 using namespace transformer; 28 29 // FIXME: This implementation is entirely separate from that of the AST 30 // matchers. Given the similarity of the languages and uses of the two parsers, 31 // the two should share a common parsing infrastructure, as should other 32 // Transformer types. We intend to unify this implementation soon to share as 33 // much as possible with the AST Matchers parsing. 34 35 namespace { 36 using llvm::Expected; 37 38 template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...); 39 40 struct ParseState { 41 // The remaining input to be processed. 42 StringRef Input; 43 // The original input. Not modified during parsing; only for reference in 44 // error reporting. 45 StringRef OriginalInput; 46 }; 47 48 // Represents an intermediate result returned by a parsing function. Functions 49 // that don't generate values should use `std::nullopt` 50 template <typename ResultType> struct ParseProgress { 51 ParseState State; 52 // Intermediate result generated by the Parser. 53 ResultType Value; 54 }; 55 56 template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>; 57 template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState); 58 59 class ParseError : public llvm::ErrorInfo<ParseError> { 60 public: 61 // Required field for all ErrorInfo derivatives. 62 static char ID; 63 64 ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt) 65 : Pos(Pos), ErrorMsg(std::move(ErrorMsg)), 66 Excerpt(std::move(InputExcerpt)) {} 67 68 void log(llvm::raw_ostream &OS) const override { 69 OS << "parse error at position (" << Pos << "): " << ErrorMsg 70 << ": " + Excerpt; 71 } 72 73 std::error_code convertToErrorCode() const override { 74 return llvm::inconvertibleErrorCode(); 75 } 76 77 // Position of the error in the input string. 78 size_t Pos; 79 std::string ErrorMsg; 80 // Excerpt of the input starting at the error position. 81 std::string Excerpt; 82 }; 83 84 char ParseError::ID; 85 } // namespace 86 87 static const llvm::StringMap<RangeSelectorOp<std::string>> & 88 getUnaryStringSelectors() { 89 static const llvm::StringMap<RangeSelectorOp<std::string>> M = { 90 {"name", name}, 91 {"node", node}, 92 {"statement", statement}, 93 {"statements", statements}, 94 {"member", member}, 95 {"callArgs", callArgs}, 96 {"elseBranch", elseBranch}, 97 {"initListElements", initListElements}}; 98 return M; 99 } 100 101 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> & 102 getUnaryRangeSelectors() { 103 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = { 104 {"before", before}, {"after", after}, {"expansion", expansion}}; 105 return M; 106 } 107 108 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> & 109 getBinaryStringSelectors() { 110 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = { 111 {"encloseNodes", encloseNodes}}; 112 return M; 113 } 114 115 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> & 116 getBinaryRangeSelectors() { 117 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> 118 M = {{"enclose", enclose}, {"between", between}}; 119 return M; 120 } 121 122 template <typename Element> 123 std::optional<Element> findOptional(const llvm::StringMap<Element> &Map, 124 llvm::StringRef Key) { 125 auto it = Map.find(Key); 126 if (it == Map.end()) 127 return std::nullopt; 128 return it->second; 129 } 130 131 template <typename ResultType> 132 ParseProgress<ResultType> makeParseProgress(ParseState State, 133 ResultType Result) { 134 return ParseProgress<ResultType>{State, std::move(Result)}; 135 } 136 137 static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) { 138 size_t Pos = S.OriginalInput.size() - S.Input.size(); 139 return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg), 140 S.OriginalInput.substr(Pos, 20).str()); 141 } 142 143 // Returns a new ParseState that advances \c S by \c N characters. 144 static ParseState advance(ParseState S, size_t N) { 145 S.Input = S.Input.drop_front(N); 146 return S; 147 } 148 149 static StringRef consumeWhitespace(StringRef S) { 150 return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); }); 151 } 152 153 // Parses a single expected character \c c from \c State, skipping preceding 154 // whitespace. Error if the expected character isn't found. 155 static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) { 156 State.Input = consumeWhitespace(State.Input); 157 if (State.Input.empty() || State.Input.front() != c) 158 return makeParseError(State, 159 ("expected char not found: " + llvm::Twine(c)).str()); 160 return makeParseProgress(advance(State, 1), std::nullopt); 161 } 162 163 // Parses an identitifer "token" -- handles preceding whitespace. 164 static ExpectedProgress<std::string> parseId(ParseState State) { 165 State.Input = consumeWhitespace(State.Input); 166 auto Id = State.Input.take_while( 167 [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); }); 168 if (Id.empty()) 169 return makeParseError(State, "failed to parse name"); 170 return makeParseProgress(advance(State, Id.size()), Id.str()); 171 } 172 173 // For consistency with the AST matcher parser and C++ code, node ids are 174 // written as strings. However, we do not support escaping in the string. 175 static ExpectedProgress<std::string> parseStringId(ParseState State) { 176 State.Input = consumeWhitespace(State.Input); 177 if (State.Input.empty()) 178 return makeParseError(State, "unexpected end of input"); 179 if (!State.Input.consume_front("\"")) 180 return makeParseError( 181 State, 182 "expecting string, but encountered other character or end of input"); 183 184 StringRef Id = State.Input.take_until([](char c) { return c == '"'; }); 185 if (State.Input.size() == Id.size()) 186 return makeParseError(State, "unterminated string"); 187 // Advance past the trailing quote as well. 188 return makeParseProgress(advance(State, Id.size() + 1), Id.str()); 189 } 190 191 // Parses a single element surrounded by parens. `Op` is applied to the parsed 192 // result to create the result of this function call. 193 template <typename T> 194 ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement, 195 RangeSelectorOp<T> Op, 196 ParseState State) { 197 auto P = parseChar('(', State); 198 if (!P) 199 return P.takeError(); 200 201 auto E = ParseElement(P->State); 202 if (!E) 203 return E.takeError(); 204 205 P = parseChar(')', E->State); 206 if (!P) 207 return P.takeError(); 208 209 return makeParseProgress(P->State, Op(std::move(E->Value))); 210 } 211 212 // Parses a pair of elements surrounded by parens and separated by comma. `Op` 213 // is applied to the parsed results to create the result of this function call. 214 template <typename T> 215 ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement, 216 RangeSelectorOp<T, T> Op, 217 ParseState State) { 218 auto P = parseChar('(', State); 219 if (!P) 220 return P.takeError(); 221 222 auto Left = ParseElement(P->State); 223 if (!Left) 224 return Left.takeError(); 225 226 P = parseChar(',', Left->State); 227 if (!P) 228 return P.takeError(); 229 230 auto Right = ParseElement(P->State); 231 if (!Right) 232 return Right.takeError(); 233 234 P = parseChar(')', Right->State); 235 if (!P) 236 return P.takeError(); 237 238 return makeParseProgress(P->State, 239 Op(std::move(Left->Value), std::move(Right->Value))); 240 } 241 242 // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or 243 // Id operator). Returns StencilType representing the operator on success and 244 // error if it fails to parse input for an operator. 245 static ExpectedProgress<RangeSelector> 246 parseRangeSelectorImpl(ParseState State) { 247 auto Id = parseId(State); 248 if (!Id) 249 return Id.takeError(); 250 251 std::string OpName = std::move(Id->Value); 252 if (auto Op = findOptional(getUnaryStringSelectors(), OpName)) 253 return parseSingle(parseStringId, *Op, Id->State); 254 255 if (auto Op = findOptional(getUnaryRangeSelectors(), OpName)) 256 return parseSingle(parseRangeSelectorImpl, *Op, Id->State); 257 258 if (auto Op = findOptional(getBinaryStringSelectors(), OpName)) 259 return parsePair(parseStringId, *Op, Id->State); 260 261 if (auto Op = findOptional(getBinaryRangeSelectors(), OpName)) 262 return parsePair(parseRangeSelectorImpl, *Op, Id->State); 263 264 return makeParseError(State, "unknown selector name: " + OpName); 265 } 266 267 Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) { 268 ParseState State = {Input, Input}; 269 ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State); 270 if (!Result) 271 return Result.takeError(); 272 State = Result->State; 273 // Discard any potentially trailing whitespace. 274 State.Input = consumeWhitespace(State.Input); 275 if (State.Input.empty()) 276 return Result->Value; 277 return makeParseError(State, "unexpected input after selector"); 278 } 279