1 //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Transformer/Parsing.h" 10 #include "clang/AST/Expr.h" 11 #include "clang/ASTMatchers/ASTMatchFinder.h" 12 #include "clang/Basic/CharInfo.h" 13 #include "clang/Basic/SourceLocation.h" 14 #include "clang/Lex/Lexer.h" 15 #include "clang/Tooling/Transformer/RangeSelector.h" 16 #include "clang/Tooling/Transformer/SourceCode.h" 17 #include "llvm/ADT/None.h" 18 #include "llvm/ADT/StringMap.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Support/Errc.h" 21 #include "llvm/Support/Error.h" 22 #include <string> 23 #include <utility> 24 #include <vector> 25 26 using namespace clang; 27 using namespace transformer; 28 29 // FIXME: This implementation is entirely separate from that of the AST 30 // matchers. Given the similarity of the languages and uses of the two parsers, 31 // the two should share a common parsing infrastructure, as should other 32 // Transformer types. We intend to unify this implementation soon to share as 33 // much as possible with the AST Matchers parsing. 34 35 namespace { 36 using llvm::Error; 37 using llvm::Expected; 38 39 template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...); 40 41 struct ParseState { 42 // The remaining input to be processed. 43 StringRef Input; 44 // The original input. Not modified during parsing; only for reference in 45 // error reporting. 46 StringRef OriginalInput; 47 }; 48 49 // Represents an intermediate result returned by a parsing function. Functions 50 // that don't generate values should use `llvm::None` 51 template <typename ResultType> struct ParseProgress { 52 ParseState State; 53 // Intermediate result generated by the Parser. 54 ResultType Value; 55 }; 56 57 template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>; 58 template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState); 59 60 class ParseError : public llvm::ErrorInfo<ParseError> { 61 public: 62 // Required field for all ErrorInfo derivatives. 63 static char ID; 64 65 ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt) 66 : Pos(Pos), ErrorMsg(std::move(ErrorMsg)), 67 Excerpt(std::move(InputExcerpt)) {} 68 69 void log(llvm::raw_ostream &OS) const override { 70 OS << "parse error at position (" << Pos << "): " << ErrorMsg 71 << ": " + Excerpt; 72 } 73 74 std::error_code convertToErrorCode() const override { 75 return llvm::inconvertibleErrorCode(); 76 } 77 78 // Position of the error in the input string. 79 size_t Pos; 80 std::string ErrorMsg; 81 // Excerpt of the input starting at the error position. 82 std::string Excerpt; 83 }; 84 85 char ParseError::ID; 86 } // namespace 87 88 static const llvm::StringMap<RangeSelectorOp<std::string>> & 89 getUnaryStringSelectors() { 90 static const llvm::StringMap<RangeSelectorOp<std::string>> M = { 91 {"name", name}, 92 {"node", node}, 93 {"statement", statement}, 94 {"statements", statements}, 95 {"member", member}, 96 {"callArgs", callArgs}, 97 {"elseBranch", elseBranch}, 98 {"initListElements", initListElements}}; 99 return M; 100 } 101 102 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> & 103 getUnaryRangeSelectors() { 104 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = { 105 {"before", before}, {"after", after}, {"expansion", expansion}}; 106 return M; 107 } 108 109 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> & 110 getBinaryStringSelectors() { 111 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = { 112 {"encloseNodes", encloseNodes}}; 113 return M; 114 } 115 116 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> & 117 getBinaryRangeSelectors() { 118 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> 119 M = {{"enclose", enclose}, {"between", between}}; 120 return M; 121 } 122 123 template <typename Element> 124 llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map, 125 llvm::StringRef Key) { 126 auto it = Map.find(Key); 127 if (it == Map.end()) 128 return llvm::None; 129 return it->second; 130 } 131 132 template <typename ResultType> 133 ParseProgress<ResultType> makeParseProgress(ParseState State, 134 ResultType Result) { 135 return ParseProgress<ResultType>{State, std::move(Result)}; 136 } 137 138 static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) { 139 size_t Pos = S.OriginalInput.size() - S.Input.size(); 140 return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg), 141 S.OriginalInput.substr(Pos, 20).str()); 142 } 143 144 // Returns a new ParseState that advances \c S by \c N characters. 145 static ParseState advance(ParseState S, size_t N) { 146 S.Input = S.Input.drop_front(N); 147 return S; 148 } 149 150 static StringRef consumeWhitespace(StringRef S) { 151 return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); }); 152 } 153 154 // Parses a single expected character \c c from \c State, skipping preceding 155 // whitespace. Error if the expected character isn't found. 156 static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) { 157 State.Input = consumeWhitespace(State.Input); 158 if (State.Input.empty() || State.Input.front() != c) 159 return makeParseError(State, 160 ("expected char not found: " + llvm::Twine(c)).str()); 161 return makeParseProgress(advance(State, 1), llvm::None); 162 } 163 164 // Parses an identitifer "token" -- handles preceding whitespace. 165 static ExpectedProgress<std::string> parseId(ParseState State) { 166 State.Input = consumeWhitespace(State.Input); 167 auto Id = State.Input.take_while( 168 [](char c) { return isASCII(c) && isIdentifierBody(c); }); 169 if (Id.empty()) 170 return makeParseError(State, "failed to parse name"); 171 return makeParseProgress(advance(State, Id.size()), Id.str()); 172 } 173 174 // For consistency with the AST matcher parser and C++ code, node ids are 175 // written as strings. However, we do not support escaping in the string. 176 static ExpectedProgress<std::string> parseStringId(ParseState State) { 177 State.Input = consumeWhitespace(State.Input); 178 if (State.Input.empty()) 179 return makeParseError(State, "unexpected end of input"); 180 if (!State.Input.consume_front("\"")) 181 return makeParseError( 182 State, 183 "expecting string, but encountered other character or end of input"); 184 185 StringRef Id = State.Input.take_until([](char c) { return c == '"'; }); 186 if (State.Input.size() == Id.size()) 187 return makeParseError(State, "unterminated string"); 188 // Advance past the trailing quote as well. 189 return makeParseProgress(advance(State, Id.size() + 1), Id.str()); 190 } 191 192 // Parses a single element surrounded by parens. `Op` is applied to the parsed 193 // result to create the result of this function call. 194 template <typename T> 195 ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement, 196 RangeSelectorOp<T> Op, 197 ParseState State) { 198 auto P = parseChar('(', State); 199 if (!P) 200 return P.takeError(); 201 202 auto E = ParseElement(P->State); 203 if (!E) 204 return E.takeError(); 205 206 P = parseChar(')', E->State); 207 if (!P) 208 return P.takeError(); 209 210 return makeParseProgress(P->State, Op(std::move(E->Value))); 211 } 212 213 // Parses a pair of elements surrounded by parens and separated by comma. `Op` 214 // is applied to the parsed results to create the result of this function call. 215 template <typename T> 216 ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement, 217 RangeSelectorOp<T, T> Op, 218 ParseState State) { 219 auto P = parseChar('(', State); 220 if (!P) 221 return P.takeError(); 222 223 auto Left = ParseElement(P->State); 224 if (!Left) 225 return Left.takeError(); 226 227 P = parseChar(',', Left->State); 228 if (!P) 229 return P.takeError(); 230 231 auto Right = ParseElement(P->State); 232 if (!Right) 233 return Right.takeError(); 234 235 P = parseChar(')', Right->State); 236 if (!P) 237 return P.takeError(); 238 239 return makeParseProgress(P->State, 240 Op(std::move(Left->Value), std::move(Right->Value))); 241 } 242 243 // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or 244 // Id operator). Returns StencilType representing the operator on success and 245 // error if it fails to parse input for an operator. 246 static ExpectedProgress<RangeSelector> 247 parseRangeSelectorImpl(ParseState State) { 248 auto Id = parseId(State); 249 if (!Id) 250 return Id.takeError(); 251 252 std::string OpName = std::move(Id->Value); 253 if (auto Op = findOptional(getUnaryStringSelectors(), OpName)) 254 return parseSingle(parseStringId, *Op, Id->State); 255 256 if (auto Op = findOptional(getUnaryRangeSelectors(), OpName)) 257 return parseSingle(parseRangeSelectorImpl, *Op, Id->State); 258 259 if (auto Op = findOptional(getBinaryStringSelectors(), OpName)) 260 return parsePair(parseStringId, *Op, Id->State); 261 262 if (auto Op = findOptional(getBinaryRangeSelectors(), OpName)) 263 return parsePair(parseRangeSelectorImpl, *Op, Id->State); 264 265 return makeParseError(State, "unknown selector name: " + OpName); 266 } 267 268 Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) { 269 ParseState State = {Input, Input}; 270 ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State); 271 if (!Result) 272 return Result.takeError(); 273 State = Result->State; 274 // Discard any potentially trailing whitespace. 275 State.Input = consumeWhitespace(State.Input); 276 if (State.Input.empty()) 277 return Result->Value; 278 return makeParseError(State, "unexpected input after selector"); 279 } 280