1 //===--- TokenAnalyzer.h - Analyze Token Streams ----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file declares an abstract TokenAnalyzer, and associated helper 11 /// classes. TokenAnalyzer can be extended to generate replacements based on 12 /// an annotated and pre-processed token stream. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANALYZER_H 17 #define LLVM_CLANG_LIB_FORMAT_TOKENANALYZER_H 18 19 #include "AffectedRangeManager.h" 20 #include "Encoding.h" 21 #include "FormatToken.h" 22 #include "FormatTokenLexer.h" 23 #include "TokenAnnotator.h" 24 #include "UnwrappedLineParser.h" 25 #include "clang/Basic/Diagnostic.h" 26 #include "clang/Basic/DiagnosticOptions.h" 27 #include "clang/Basic/FileManager.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/Format/Format.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/Support/Debug.h" 32 #include <memory> 33 34 namespace clang { 35 namespace format { 36 37 class Environment { 38 public: 39 // This sets up an virtual file system with file \p FileName containing the 40 // fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn, 41 // that the next lines of \p Code should start at \p NextStartColumn, and 42 // that \p Code should end at \p LastStartColumn if it ends in newline. 43 // See also the documentation of clang::format::internal::reformat. 44 Environment(StringRef Code, StringRef FileName, unsigned FirstStartColumn = 0, 45 unsigned NextStartColumn = 0, unsigned LastStartColumn = 0); 46 47 FileID getFileID() const { return ID; } 48 49 const SourceManager &getSourceManager() const { return SM; } 50 51 ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; } 52 53 // Returns the column at which the fragment of code managed by this 54 // environment starts. 55 unsigned getFirstStartColumn() const { return FirstStartColumn; } 56 57 // Returns the column at which subsequent lines of the fragment of code 58 // managed by this environment should start. 59 unsigned getNextStartColumn() const { return NextStartColumn; } 60 61 // Returns the column at which the fragment of code managed by this 62 // environment should end if it ends in a newline. 63 unsigned getLastStartColumn() const { return LastStartColumn; } 64 65 // Returns nullptr and prints a diagnostic to stderr if the environment 66 // can't be created. 67 static std::unique_ptr<Environment> make(StringRef Code, StringRef FileName, 68 ArrayRef<tooling::Range> Ranges, 69 unsigned FirstStartColumn = 0, 70 unsigned NextStartColumn = 0, 71 unsigned LastStartColumn = 0); 72 73 private: 74 // This is only set if constructed from string. 75 std::unique_ptr<SourceManagerForFile> VirtualSM; 76 77 // This refers to either a SourceManager provided by users or VirtualSM 78 // created for a single file. 79 SourceManager &SM; 80 FileID ID; 81 82 SmallVector<CharSourceRange, 8> CharRanges; 83 unsigned FirstStartColumn; 84 unsigned NextStartColumn; 85 unsigned LastStartColumn; 86 }; 87 88 class TokenAnalyzer : public UnwrappedLineConsumer { 89 public: 90 TokenAnalyzer(const Environment &Env, const FormatStyle &Style); 91 92 std::pair<tooling::Replacements, unsigned> 93 process(bool SkipAnnotation = false); 94 95 protected: 96 virtual std::pair<tooling::Replacements, unsigned> 97 analyze(TokenAnnotator &Annotator, 98 SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, 99 FormatTokenLexer &Tokens) = 0; 100 101 void consumeUnwrappedLine(const UnwrappedLine &TheLine) override; 102 103 void finishRun() override; 104 105 FormatStyle Style; 106 // Stores Style, FileID and SourceManager etc. 107 const Environment &Env; 108 // AffectedRangeMgr stores ranges to be fixed. 109 AffectedRangeManager AffectedRangeMgr; 110 SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines; 111 encoding::Encoding Encoding; 112 }; 113 114 } // end namespace format 115 } // end namespace clang 116 117 #endif 118