1 //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// WhitespaceManager class manages whitespace around tokens and their 11 /// replacements. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H 16 #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H 17 18 #include "TokenAnnotator.h" 19 #include "clang/Basic/SourceManager.h" 20 #include "clang/Format/Format.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include <algorithm> 23 #include <string> 24 #include <tuple> 25 26 namespace clang { 27 namespace format { 28 29 /// Manages the whitespaces around tokens and their replacements. 30 /// 31 /// This includes special handling for certain constructs, e.g. the alignment of 32 /// trailing line comments. 33 /// 34 /// To guarantee correctness of alignment operations, the \c WhitespaceManager 35 /// must be informed about every token in the source file; for each token, there 36 /// must be exactly one call to either \c replaceWhitespace or 37 /// \c addUntouchableToken. 38 /// 39 /// There may be multiple calls to \c breakToken for a given token. 40 class WhitespaceManager { 41 public: 42 WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, 43 bool UseCRLF) 44 : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {} 45 46 bool useCRLF() const { return UseCRLF; } 47 48 /// Replaces the whitespace in front of \p Tok. Only call once for 49 /// each \c AnnotatedToken. 50 /// 51 /// \p StartOfTokenColumn is the column at which the token will start after 52 /// this replacement. It is needed for determining how \p Spaces is turned 53 /// into tabs and spaces for some format styles. 54 void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, 55 unsigned StartOfTokenColumn, bool isAligned = false, 56 bool InPPDirective = false); 57 58 /// Adds information about an unchangeable token's whitespace. 59 /// 60 /// Needs to be called for every token for which \c replaceWhitespace 61 /// was not called. 62 void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); 63 64 llvm::Error addReplacement(const tooling::Replacement &Replacement); 65 66 /// Inserts or replaces whitespace in the middle of a token. 67 /// 68 /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix 69 /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars 70 /// characters. 71 /// 72 /// Note: \p Spaces can be negative to retain information about initial 73 /// relative column offset between a line of a block comment and the start of 74 /// the comment. This negative offset may be compensated by trailing comment 75 /// alignment here. In all other cases negative \p Spaces will be truncated to 76 /// 0. 77 /// 78 /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is 79 /// used to align backslashes correctly. 80 void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, 81 unsigned ReplaceChars, 82 StringRef PreviousPostfix, 83 StringRef CurrentPrefix, bool InPPDirective, 84 unsigned Newlines, int Spaces); 85 86 /// Returns all the \c Replacements created during formatting. 87 const tooling::Replacements &generateReplacements(); 88 89 /// Represents a change before a token, a break inside a token, 90 /// or the layout of an unchanged token (or whitespace within). 91 struct Change { 92 /// Functor to sort changes in original source order. 93 class IsBeforeInFile { 94 public: 95 IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} 96 bool operator()(const Change &C1, const Change &C2) const; 97 98 private: 99 const SourceManager &SourceMgr; 100 }; 101 102 /// Creates a \c Change. 103 /// 104 /// The generated \c Change will replace the characters at 105 /// \p OriginalWhitespaceRange with a concatenation of 106 /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces 107 /// and \p CurrentLinePrefix. 108 /// 109 /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out 110 /// trailing comments and escaped newlines. 111 Change(const FormatToken &Tok, bool CreateReplacement, 112 SourceRange OriginalWhitespaceRange, int Spaces, 113 unsigned StartOfTokenColumn, unsigned NewlinesBefore, 114 StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, 115 bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken); 116 117 // The kind of the token whose whitespace this change replaces, or in which 118 // this change inserts whitespace. 119 // FIXME: Currently this is not set correctly for breaks inside comments, as 120 // the \c BreakableToken is still doing its own alignment. 121 const FormatToken *Tok; 122 123 bool CreateReplacement; 124 // Changes might be in the middle of a token, so we cannot just keep the 125 // FormatToken around to query its information. 126 SourceRange OriginalWhitespaceRange; 127 unsigned StartOfTokenColumn; 128 unsigned NewlinesBefore; 129 std::string PreviousLinePostfix; 130 std::string CurrentLinePrefix; 131 bool IsAligned; 132 bool ContinuesPPDirective; 133 134 // The number of spaces in front of the token or broken part of the token. 135 // This will be adapted when aligning tokens. 136 // Can be negative to retain information about the initial relative offset 137 // of the lines in a block comment. This is used when aligning trailing 138 // comments. Uncompensated negative offset is truncated to 0. 139 int Spaces; 140 141 // If this change is inside of a token but not at the start of the token or 142 // directly after a newline. 143 bool IsInsideToken; 144 145 // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and 146 // \c EscapedNewlineColumn will be calculated in 147 // \c calculateLineBreakInformation. 148 bool IsTrailingComment; 149 unsigned TokenLength; 150 unsigned PreviousEndOfTokenColumn; 151 unsigned EscapedNewlineColumn; 152 153 // These fields are used to retain correct relative line indentation in a 154 // block comment when aligning trailing comments. 155 // 156 // If this Change represents a continuation of a block comment, 157 // \c StartOfBlockComment is pointer to the first Change in the block 158 // comment. \c IndentationOffset is a relative column offset to this 159 // change, so that the correct column can be reconstructed at the end of 160 // the alignment process. 161 const Change *StartOfBlockComment; 162 int IndentationOffset; 163 164 // Depth of conditionals. Computed from tracking fake parenthesis, except 165 // it does not increase the indent for "chained" conditionals. 166 int ConditionalsLevel; 167 168 // A combination of indent, nesting and conditionals levels, which are used 169 // in tandem to compute lexical scope, for the purposes of deciding 170 // when to stop consecutive alignment runs. 171 std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const { 172 return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel, 173 ConditionalsLevel); 174 } 175 }; 176 177 private: 178 struct CellDescription { 179 unsigned Index = 0; 180 unsigned Cell = 0; 181 unsigned EndIndex = 0; 182 bool HasSplit = false; 183 CellDescription *NextColumnElement = nullptr; 184 185 constexpr bool operator==(const CellDescription &Other) const { 186 return Index == Other.Index && Cell == Other.Cell && 187 EndIndex == Other.EndIndex; 188 } 189 constexpr bool operator!=(const CellDescription &Other) const { 190 return !(*this == Other); 191 } 192 }; 193 194 struct CellDescriptions { 195 SmallVector<CellDescription> Cells; 196 unsigned CellCount = 0; 197 unsigned InitialSpaces = 0; 198 }; 199 200 /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens 201 /// or token parts in a line and \c PreviousEndOfTokenColumn and 202 /// \c EscapedNewlineColumn for the first tokens or token parts in a line. 203 void calculateLineBreakInformation(); 204 205 /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes. 206 void alignConsecutiveMacros(); 207 208 /// Align consecutive assignments over all \c Changes. 209 void alignConsecutiveAssignments(); 210 211 /// Align consecutive bitfields over all \c Changes. 212 void alignConsecutiveBitFields(); 213 214 /// Align consecutive declarations over all \c Changes. 215 void alignConsecutiveDeclarations(); 216 217 /// Align consecutive declarations over all \c Changes. 218 void alignChainedConditionals(); 219 220 /// Align trailing comments over all \c Changes. 221 void alignTrailingComments(); 222 223 /// Align trailing comments from change \p Start to change \p End at 224 /// the specified \p Column. 225 void alignTrailingComments(unsigned Start, unsigned End, unsigned Column); 226 227 /// Align escaped newlines over all \c Changes. 228 void alignEscapedNewlines(); 229 230 /// Align escaped newlines from change \p Start to change \p End at 231 /// the specified \p Column. 232 void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column); 233 234 /// Align Array Initializers over all \c Changes. 235 void alignArrayInitializers(); 236 237 /// Align Array Initializers from change \p Start to change \p End at 238 /// the specified \p Column. 239 void alignArrayInitializers(unsigned Start, unsigned End); 240 241 /// Align Array Initializers being careful to right justify the columns 242 /// as described by \p CellDescs. 243 void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs); 244 245 /// Align Array Initializers being careful to leftt justify the columns 246 /// as described by \p CellDescs. 247 void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs); 248 249 /// Calculate the cell width between two indexes. 250 unsigned calculateCellWidth(unsigned Start, unsigned End, 251 bool WithSpaces = false) const; 252 253 /// Get a set of fully specified CellDescriptions between \p Start and 254 /// \p End of the change list. 255 CellDescriptions getCells(unsigned Start, unsigned End); 256 257 /// Does this \p Cell contain a split element? 258 static bool isSplitCell(const CellDescription &Cell); 259 260 /// Get the width of the preceeding cells from \p Start to \p End. 261 template <typename I> 262 auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const { 263 auto NetWidth = InitialSpaces; 264 for (auto PrevIter = Start; PrevIter != End; ++PrevIter) { 265 // If we broke the line the initial spaces are already 266 // accounted for. 267 if (Changes[PrevIter->Index].NewlinesBefore > 0) 268 NetWidth = 0; 269 NetWidth += 270 calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1; 271 } 272 return NetWidth; 273 } 274 275 /// Get the maximum width of a cell in a sequence of columns. 276 template <typename I> 277 unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const { 278 unsigned CellWidth = 279 calculateCellWidth(CellIter->Index, CellIter->EndIndex, true); 280 if (Changes[CellIter->Index].NewlinesBefore == 0) 281 CellWidth += NetWidth; 282 for (const auto *Next = CellIter->NextColumnElement; Next != nullptr; 283 Next = Next->NextColumnElement) { 284 auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true); 285 if (Changes[Next->Index].NewlinesBefore == 0) 286 ThisWidth += NetWidth; 287 CellWidth = std::max(CellWidth, ThisWidth); 288 } 289 return CellWidth; 290 } 291 292 /// Get The maximum width of all columns to a given cell. 293 template <typename I> 294 unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop, 295 unsigned InitialSpaces, 296 unsigned CellCount) const { 297 auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces); 298 auto RowCount = 1U; 299 auto Offset = std::distance(CellStart, CellStop); 300 for (const auto *Next = CellStop->NextColumnElement; Next != nullptr; 301 Next = Next->NextColumnElement) { 302 auto Start = (CellStart + RowCount * CellCount); 303 auto End = Start + Offset; 304 MaxNetWidth = 305 std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces)); 306 ++RowCount; 307 } 308 return MaxNetWidth; 309 } 310 311 /// Align a split cell with a newline to the first element in the cell. 312 void alignToStartOfCell(unsigned Start, unsigned End); 313 314 /// Link the Cell pointers in the list of Cells. 315 static CellDescriptions linkCells(CellDescriptions &&CellDesc); 316 317 /// Fill \c Replaces with the replacements for all effective changes. 318 void generateChanges(); 319 320 /// Stores \p Text as the replacement for the whitespace in \p Range. 321 void storeReplacement(SourceRange Range, StringRef Text); 322 void appendNewlineText(std::string &Text, unsigned Newlines); 323 void appendEscapedNewlineText(std::string &Text, unsigned Newlines, 324 unsigned PreviousEndOfTokenColumn, 325 unsigned EscapedNewlineColumn); 326 void appendIndentText(std::string &Text, unsigned IndentLevel, 327 unsigned Spaces, unsigned WhitespaceStartColumn, 328 bool IsAligned); 329 unsigned appendTabIndent(std::string &Text, unsigned Spaces, 330 unsigned Indentation); 331 332 SmallVector<Change, 16> Changes; 333 const SourceManager &SourceMgr; 334 tooling::Replacements Replaces; 335 const FormatStyle &Style; 336 bool UseCRLF; 337 }; 338 339 } // namespace format 340 } // namespace clang 341 342 #endif 343