1 //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// WhitespaceManager class manages whitespace around tokens and their 11 /// replacements. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H 16 #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H 17 18 #include "TokenAnnotator.h" 19 #include "clang/Basic/SourceManager.h" 20 #include "clang/Format/Format.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include <algorithm> 23 #include <string> 24 #include <tuple> 25 26 namespace clang { 27 namespace format { 28 29 /// Manages the whitespaces around tokens and their replacements. 30 /// 31 /// This includes special handling for certain constructs, e.g. the alignment of 32 /// trailing line comments. 33 /// 34 /// To guarantee correctness of alignment operations, the \c WhitespaceManager 35 /// must be informed about every token in the source file; for each token, there 36 /// must be exactly one call to either \c replaceWhitespace or 37 /// \c addUntouchableToken. 38 /// 39 /// There may be multiple calls to \c breakToken for a given token. 40 class WhitespaceManager { 41 public: 42 WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, 43 bool UseCRLF) 44 : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {} 45 46 bool useCRLF() const { return UseCRLF; } 47 48 /// Infers whether the input is using CRLF. 49 static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF); 50 51 /// Replaces the whitespace in front of \p Tok. Only call once for 52 /// each \c AnnotatedToken. 53 /// 54 /// \p StartOfTokenColumn is the column at which the token will start after 55 /// this replacement. It is needed for determining how \p Spaces is turned 56 /// into tabs and spaces for some format styles. 57 void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, 58 unsigned StartOfTokenColumn, bool isAligned = false, 59 bool InPPDirective = false); 60 61 /// Adds information about an unchangeable token's whitespace. 62 /// 63 /// Needs to be called for every token for which \c replaceWhitespace 64 /// was not called. 65 void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); 66 67 llvm::Error addReplacement(const tooling::Replacement &Replacement); 68 69 /// Inserts or replaces whitespace in the middle of a token. 70 /// 71 /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix 72 /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars 73 /// characters. 74 /// 75 /// Note: \p Spaces can be negative to retain information about initial 76 /// relative column offset between a line of a block comment and the start of 77 /// the comment. This negative offset may be compensated by trailing comment 78 /// alignment here. In all other cases negative \p Spaces will be truncated to 79 /// 0. 80 /// 81 /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is 82 /// used to align backslashes correctly. 83 void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, 84 unsigned ReplaceChars, 85 StringRef PreviousPostfix, 86 StringRef CurrentPrefix, bool InPPDirective, 87 unsigned Newlines, int Spaces); 88 89 /// Returns all the \c Replacements created during formatting. 90 const tooling::Replacements &generateReplacements(); 91 92 /// Represents a change before a token, a break inside a token, 93 /// or the layout of an unchanged token (or whitespace within). 94 struct Change { 95 /// Functor to sort changes in original source order. 96 class IsBeforeInFile { 97 public: 98 IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} 99 bool operator()(const Change &C1, const Change &C2) const; 100 101 private: 102 const SourceManager &SourceMgr; 103 }; 104 105 /// Creates a \c Change. 106 /// 107 /// The generated \c Change will replace the characters at 108 /// \p OriginalWhitespaceRange with a concatenation of 109 /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces 110 /// and \p CurrentLinePrefix. 111 /// 112 /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out 113 /// trailing comments and escaped newlines. 114 Change(const FormatToken &Tok, bool CreateReplacement, 115 SourceRange OriginalWhitespaceRange, int Spaces, 116 unsigned StartOfTokenColumn, unsigned NewlinesBefore, 117 StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, 118 bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken); 119 120 // The kind of the token whose whitespace this change replaces, or in which 121 // this change inserts whitespace. 122 // FIXME: Currently this is not set correctly for breaks inside comments, as 123 // the \c BreakableToken is still doing its own alignment. 124 const FormatToken *Tok; 125 126 bool CreateReplacement; 127 // Changes might be in the middle of a token, so we cannot just keep the 128 // FormatToken around to query its information. 129 SourceRange OriginalWhitespaceRange; 130 unsigned StartOfTokenColumn; 131 unsigned NewlinesBefore; 132 std::string PreviousLinePostfix; 133 std::string CurrentLinePrefix; 134 bool IsAligned; 135 bool ContinuesPPDirective; 136 137 // The number of spaces in front of the token or broken part of the token. 138 // This will be adapted when aligning tokens. 139 // Can be negative to retain information about the initial relative offset 140 // of the lines in a block comment. This is used when aligning trailing 141 // comments. Uncompensated negative offset is truncated to 0. 142 int Spaces; 143 144 // If this change is inside of a token but not at the start of the token or 145 // directly after a newline. 146 bool IsInsideToken; 147 148 // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and 149 // \c EscapedNewlineColumn will be calculated in 150 // \c calculateLineBreakInformation. 151 bool IsTrailingComment; 152 unsigned TokenLength; 153 unsigned PreviousEndOfTokenColumn; 154 unsigned EscapedNewlineColumn; 155 156 // These fields are used to retain correct relative line indentation in a 157 // block comment when aligning trailing comments. 158 // 159 // If this Change represents a continuation of a block comment, 160 // \c StartOfBlockComment is pointer to the first Change in the block 161 // comment. \c IndentationOffset is a relative column offset to this 162 // change, so that the correct column can be reconstructed at the end of 163 // the alignment process. 164 const Change *StartOfBlockComment; 165 int IndentationOffset; 166 167 // Depth of conditionals. Computed from tracking fake parenthesis, except 168 // it does not increase the indent for "chained" conditionals. 169 int ConditionalsLevel; 170 171 // A combination of indent, nesting and conditionals levels, which are used 172 // in tandem to compute lexical scope, for the purposes of deciding 173 // when to stop consecutive alignment runs. 174 std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const { 175 return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel, 176 ConditionalsLevel); 177 } 178 }; 179 180 private: 181 struct CellDescription { 182 unsigned Index = 0; 183 unsigned Cell = 0; 184 unsigned EndIndex = 0; 185 bool HasSplit = false; 186 CellDescription *NextColumnElement = nullptr; 187 188 constexpr bool operator==(const CellDescription &Other) const { 189 return Index == Other.Index && Cell == Other.Cell && 190 EndIndex == Other.EndIndex; 191 } 192 constexpr bool operator!=(const CellDescription &Other) const { 193 return !(*this == Other); 194 } 195 }; 196 197 struct CellDescriptions { 198 SmallVector<CellDescription> Cells; 199 SmallVector<unsigned> CellCounts; 200 unsigned InitialSpaces = 0; 201 202 // Determine if every row in the the array 203 // has the same number of columns. 204 bool isRectangular() const { 205 if (CellCounts.empty()) 206 return false; 207 208 for (auto NumberOfColumns : CellCounts) 209 if (NumberOfColumns != CellCounts[0]) 210 return false; 211 return true; 212 } 213 }; 214 215 /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens 216 /// or token parts in a line and \c PreviousEndOfTokenColumn and 217 /// \c EscapedNewlineColumn for the first tokens or token parts in a line. 218 void calculateLineBreakInformation(); 219 220 /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes. 221 void alignConsecutiveMacros(); 222 223 /// Align consecutive assignments over all \c Changes. 224 void alignConsecutiveAssignments(); 225 226 /// Align consecutive bitfields over all \c Changes. 227 void alignConsecutiveBitFields(); 228 229 /// Align consecutive declarations over all \c Changes. 230 void alignConsecutiveDeclarations(); 231 232 /// Align consecutive declarations over all \c Changes. 233 void alignChainedConditionals(); 234 235 /// Align trailing comments over all \c Changes. 236 void alignTrailingComments(); 237 238 /// Align trailing comments from change \p Start to change \p End at 239 /// the specified \p Column. 240 void alignTrailingComments(unsigned Start, unsigned End, unsigned Column); 241 242 /// Align escaped newlines over all \c Changes. 243 void alignEscapedNewlines(); 244 245 /// Align escaped newlines from change \p Start to change \p End at 246 /// the specified \p Column. 247 void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column); 248 249 /// Align Array Initializers over all \c Changes. 250 void alignArrayInitializers(); 251 252 /// Align Array Initializers from change \p Start to change \p End at 253 /// the specified \p Column. 254 void alignArrayInitializers(unsigned Start, unsigned End); 255 256 /// Align Array Initializers being careful to right justify the columns 257 /// as described by \p CellDescs. 258 void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs); 259 260 /// Align Array Initializers being careful to left justify the columns 261 /// as described by \p CellDescs. 262 void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs); 263 264 /// Calculate the cell width between two indexes. 265 unsigned calculateCellWidth(unsigned Start, unsigned End, 266 bool WithSpaces = false) const; 267 268 /// Get a set of fully specified CellDescriptions between \p Start and 269 /// \p End of the change list. 270 CellDescriptions getCells(unsigned Start, unsigned End); 271 272 /// Does this \p Cell contain a split element? 273 static bool isSplitCell(const CellDescription &Cell); 274 275 /// Get the width of the preceding cells from \p Start to \p End. 276 template <typename I> 277 auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const { 278 auto NetWidth = InitialSpaces; 279 for (auto PrevIter = Start; PrevIter != End; ++PrevIter) { 280 // If we broke the line the initial spaces are already 281 // accounted for. 282 if (Changes[PrevIter->Index].NewlinesBefore > 0) 283 NetWidth = 0; 284 NetWidth += 285 calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1; 286 } 287 return NetWidth; 288 } 289 290 /// Get the maximum width of a cell in a sequence of columns. 291 template <typename I> 292 unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const { 293 unsigned CellWidth = 294 calculateCellWidth(CellIter->Index, CellIter->EndIndex, true); 295 if (Changes[CellIter->Index].NewlinesBefore == 0) 296 CellWidth += NetWidth; 297 for (const auto *Next = CellIter->NextColumnElement; Next != nullptr; 298 Next = Next->NextColumnElement) { 299 auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true); 300 if (Changes[Next->Index].NewlinesBefore == 0) 301 ThisWidth += NetWidth; 302 CellWidth = std::max(CellWidth, ThisWidth); 303 } 304 return CellWidth; 305 } 306 307 /// Get The maximum width of all columns to a given cell. 308 template <typename I> 309 unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop, 310 unsigned InitialSpaces, unsigned CellCount, 311 unsigned MaxRowCount) const { 312 auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces); 313 auto RowCount = 1U; 314 auto Offset = std::distance(CellStart, CellStop); 315 for (const auto *Next = CellStop->NextColumnElement; Next != nullptr; 316 Next = Next->NextColumnElement) { 317 if (RowCount > MaxRowCount) 318 break; 319 auto Start = (CellStart + RowCount * CellCount); 320 auto End = Start + Offset; 321 MaxNetWidth = 322 std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces)); 323 ++RowCount; 324 } 325 return MaxNetWidth; 326 } 327 328 /// Align a split cell with a newline to the first element in the cell. 329 void alignToStartOfCell(unsigned Start, unsigned End); 330 331 /// Link the Cell pointers in the list of Cells. 332 static CellDescriptions linkCells(CellDescriptions &&CellDesc); 333 334 /// Fill \c Replaces with the replacements for all effective changes. 335 void generateChanges(); 336 337 /// Stores \p Text as the replacement for the whitespace in \p Range. 338 void storeReplacement(SourceRange Range, StringRef Text); 339 void appendNewlineText(std::string &Text, unsigned Newlines); 340 void appendEscapedNewlineText(std::string &Text, unsigned Newlines, 341 unsigned PreviousEndOfTokenColumn, 342 unsigned EscapedNewlineColumn); 343 void appendIndentText(std::string &Text, unsigned IndentLevel, 344 unsigned Spaces, unsigned WhitespaceStartColumn, 345 bool IsAligned); 346 unsigned appendTabIndent(std::string &Text, unsigned Spaces, 347 unsigned Indentation); 348 349 SmallVector<Change, 16> Changes; 350 const SourceManager &SourceMgr; 351 tooling::Replacements Replaces; 352 const FormatStyle &Style; 353 bool UseCRLF; 354 }; 355 356 } // namespace format 357 } // namespace clang 358 359 #endif 360