1 //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// WhitespaceManager class manages whitespace around tokens and their 11 /// replacements. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H 16 #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H 17 18 #include "TokenAnnotator.h" 19 #include "clang/Basic/SourceManager.h" 20 #include "clang/Format/Format.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include <algorithm> 23 #include <string> 24 #include <tuple> 25 26 namespace clang { 27 namespace format { 28 29 /// Manages the whitespaces around tokens and their replacements. 30 /// 31 /// This includes special handling for certain constructs, e.g. the alignment of 32 /// trailing line comments. 33 /// 34 /// To guarantee correctness of alignment operations, the \c WhitespaceManager 35 /// must be informed about every token in the source file; for each token, there 36 /// must be exactly one call to either \c replaceWhitespace or 37 /// \c addUntouchableToken. 38 /// 39 /// There may be multiple calls to \c breakToken for a given token. 40 class WhitespaceManager { 41 public: 42 WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, 43 bool UseCRLF) 44 : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {} 45 46 bool useCRLF() const { return UseCRLF; } 47 48 /// Infers whether the input is using CRLF. 49 static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF); 50 51 /// Replaces the whitespace in front of \p Tok. Only call once for 52 /// each \c AnnotatedToken. 53 /// 54 /// \p StartOfTokenColumn is the column at which the token will start after 55 /// this replacement. It is needed for determining how \p Spaces is turned 56 /// into tabs and spaces for some format styles. 57 void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, 58 unsigned StartOfTokenColumn, bool isAligned = false, 59 bool InPPDirective = false); 60 61 /// Adds information about an unchangeable token's whitespace. 62 /// 63 /// Needs to be called for every token for which \c replaceWhitespace 64 /// was not called. 65 void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); 66 67 llvm::Error addReplacement(const tooling::Replacement &Replacement); 68 69 /// Inserts or replaces whitespace in the middle of a token. 70 /// 71 /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix 72 /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars 73 /// characters. 74 /// 75 /// Note: \p Spaces can be negative to retain information about initial 76 /// relative column offset between a line of a block comment and the start of 77 /// the comment. This negative offset may be compensated by trailing comment 78 /// alignment here. In all other cases negative \p Spaces will be truncated to 79 /// 0. 80 /// 81 /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is 82 /// used to align backslashes correctly. 83 void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, 84 unsigned ReplaceChars, 85 StringRef PreviousPostfix, 86 StringRef CurrentPrefix, bool InPPDirective, 87 unsigned Newlines, int Spaces); 88 89 /// Returns all the \c Replacements created during formatting. 90 const tooling::Replacements &generateReplacements(); 91 92 /// Represents a change before a token, a break inside a token, 93 /// or the layout of an unchanged token (or whitespace within). 94 struct Change { 95 /// Functor to sort changes in original source order. 96 class IsBeforeInFile { 97 public: 98 IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} 99 bool operator()(const Change &C1, const Change &C2) const; 100 101 private: 102 const SourceManager &SourceMgr; 103 }; 104 105 /// Creates a \c Change. 106 /// 107 /// The generated \c Change will replace the characters at 108 /// \p OriginalWhitespaceRange with a concatenation of 109 /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces 110 /// and \p CurrentLinePrefix. 111 /// 112 /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out 113 /// trailing comments and escaped newlines. 114 Change(const FormatToken &Tok, bool CreateReplacement, 115 SourceRange OriginalWhitespaceRange, int Spaces, 116 unsigned StartOfTokenColumn, unsigned NewlinesBefore, 117 StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, 118 bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken); 119 120 // The kind of the token whose whitespace this change replaces, or in which 121 // this change inserts whitespace. 122 // FIXME: Currently this is not set correctly for breaks inside comments, as 123 // the \c BreakableToken is still doing its own alignment. 124 const FormatToken *Tok; 125 126 bool CreateReplacement; 127 // Changes might be in the middle of a token, so we cannot just keep the 128 // FormatToken around to query its information. 129 SourceRange OriginalWhitespaceRange; 130 unsigned StartOfTokenColumn; 131 unsigned NewlinesBefore; 132 std::string PreviousLinePostfix; 133 std::string CurrentLinePrefix; 134 bool IsAligned; 135 bool ContinuesPPDirective; 136 137 // The number of spaces in front of the token or broken part of the token. 138 // This will be adapted when aligning tokens. 139 // Can be negative to retain information about the initial relative offset 140 // of the lines in a block comment. This is used when aligning trailing 141 // comments. Uncompensated negative offset is truncated to 0. 142 int Spaces; 143 144 // If this change is inside of a token but not at the start of the token or 145 // directly after a newline. 146 bool IsInsideToken; 147 148 // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and 149 // \c EscapedNewlineColumn will be calculated in 150 // \c calculateLineBreakInformation. 151 bool IsTrailingComment; 152 unsigned TokenLength; 153 unsigned PreviousEndOfTokenColumn; 154 unsigned EscapedNewlineColumn; 155 156 // These fields are used to retain correct relative line indentation in a 157 // block comment when aligning trailing comments. 158 // 159 // If this Change represents a continuation of a block comment, 160 // \c StartOfBlockComment is pointer to the first Change in the block 161 // comment. \c IndentationOffset is a relative column offset to this 162 // change, so that the correct column can be reconstructed at the end of 163 // the alignment process. 164 const Change *StartOfBlockComment; 165 int IndentationOffset; 166 167 // Depth of conditionals. Computed from tracking fake parenthesis, except 168 // it does not increase the indent for "chained" conditionals. 169 int ConditionalsLevel; 170 171 // A combination of indent, nesting and conditionals levels, which are used 172 // in tandem to compute lexical scope, for the purposes of deciding 173 // when to stop consecutive alignment runs. 174 std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const { 175 return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel, 176 ConditionalsLevel); 177 } 178 }; 179 180 private: 181 struct CellDescription { 182 unsigned Index = 0; 183 unsigned Cell = 0; 184 unsigned EndIndex = 0; 185 bool HasSplit = false; 186 CellDescription *NextColumnElement = nullptr; 187 188 constexpr bool operator==(const CellDescription &Other) const { 189 return Index == Other.Index && Cell == Other.Cell && 190 EndIndex == Other.EndIndex; 191 } 192 constexpr bool operator!=(const CellDescription &Other) const { 193 return !(*this == Other); 194 } 195 }; 196 197 struct CellDescriptions { 198 SmallVector<CellDescription> Cells; 199 unsigned CellCount = 0; 200 unsigned InitialSpaces = 0; 201 }; 202 203 /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens 204 /// or token parts in a line and \c PreviousEndOfTokenColumn and 205 /// \c EscapedNewlineColumn for the first tokens or token parts in a line. 206 void calculateLineBreakInformation(); 207 208 /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes. 209 void alignConsecutiveMacros(); 210 211 /// Align consecutive assignments over all \c Changes. 212 void alignConsecutiveAssignments(); 213 214 /// Align consecutive bitfields over all \c Changes. 215 void alignConsecutiveBitFields(); 216 217 /// Align consecutive declarations over all \c Changes. 218 void alignConsecutiveDeclarations(); 219 220 /// Align consecutive declarations over all \c Changes. 221 void alignChainedConditionals(); 222 223 /// Align trailing comments over all \c Changes. 224 void alignTrailingComments(); 225 226 /// Align trailing comments from change \p Start to change \p End at 227 /// the specified \p Column. 228 void alignTrailingComments(unsigned Start, unsigned End, unsigned Column); 229 230 /// Align escaped newlines over all \c Changes. 231 void alignEscapedNewlines(); 232 233 /// Align escaped newlines from change \p Start to change \p End at 234 /// the specified \p Column. 235 void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column); 236 237 /// Align Array Initializers over all \c Changes. 238 void alignArrayInitializers(); 239 240 /// Align Array Initializers from change \p Start to change \p End at 241 /// the specified \p Column. 242 void alignArrayInitializers(unsigned Start, unsigned End); 243 244 /// Align Array Initializers being careful to right justify the columns 245 /// as described by \p CellDescs. 246 void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs); 247 248 /// Align Array Initializers being careful to left justify the columns 249 /// as described by \p CellDescs. 250 void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs); 251 252 /// Calculate the cell width between two indexes. 253 unsigned calculateCellWidth(unsigned Start, unsigned End, 254 bool WithSpaces = false) const; 255 256 /// Get a set of fully specified CellDescriptions between \p Start and 257 /// \p End of the change list. 258 CellDescriptions getCells(unsigned Start, unsigned End); 259 260 /// Does this \p Cell contain a split element? 261 static bool isSplitCell(const CellDescription &Cell); 262 263 /// Get the width of the preceding cells from \p Start to \p End. 264 template <typename I> 265 auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const { 266 auto NetWidth = InitialSpaces; 267 for (auto PrevIter = Start; PrevIter != End; ++PrevIter) { 268 // If we broke the line the initial spaces are already 269 // accounted for. 270 if (Changes[PrevIter->Index].NewlinesBefore > 0) 271 NetWidth = 0; 272 NetWidth += 273 calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1; 274 } 275 return NetWidth; 276 } 277 278 /// Get the maximum width of a cell in a sequence of columns. 279 template <typename I> 280 unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const { 281 unsigned CellWidth = 282 calculateCellWidth(CellIter->Index, CellIter->EndIndex, true); 283 if (Changes[CellIter->Index].NewlinesBefore == 0) 284 CellWidth += NetWidth; 285 for (const auto *Next = CellIter->NextColumnElement; Next != nullptr; 286 Next = Next->NextColumnElement) { 287 auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true); 288 if (Changes[Next->Index].NewlinesBefore == 0) 289 ThisWidth += NetWidth; 290 CellWidth = std::max(CellWidth, ThisWidth); 291 } 292 return CellWidth; 293 } 294 295 /// Get The maximum width of all columns to a given cell. 296 template <typename I> 297 unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop, 298 unsigned InitialSpaces, 299 unsigned CellCount) const { 300 auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces); 301 auto RowCount = 1U; 302 auto Offset = std::distance(CellStart, CellStop); 303 for (const auto *Next = CellStop->NextColumnElement; Next != nullptr; 304 Next = Next->NextColumnElement) { 305 auto Start = (CellStart + RowCount * CellCount); 306 auto End = Start + Offset; 307 MaxNetWidth = 308 std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces)); 309 ++RowCount; 310 } 311 return MaxNetWidth; 312 } 313 314 /// Align a split cell with a newline to the first element in the cell. 315 void alignToStartOfCell(unsigned Start, unsigned End); 316 317 /// Link the Cell pointers in the list of Cells. 318 static CellDescriptions linkCells(CellDescriptions &&CellDesc); 319 320 /// Fill \c Replaces with the replacements for all effective changes. 321 void generateChanges(); 322 323 /// Stores \p Text as the replacement for the whitespace in \p Range. 324 void storeReplacement(SourceRange Range, StringRef Text); 325 void appendNewlineText(std::string &Text, unsigned Newlines); 326 void appendEscapedNewlineText(std::string &Text, unsigned Newlines, 327 unsigned PreviousEndOfTokenColumn, 328 unsigned EscapedNewlineColumn); 329 void appendIndentText(std::string &Text, unsigned IndentLevel, 330 unsigned Spaces, unsigned WhitespaceStartColumn, 331 bool IsAligned); 332 unsigned appendTabIndent(std::string &Text, unsigned Spaces, 333 unsigned Indentation); 334 335 SmallVector<Change, 16> Changes; 336 const SourceManager &SourceMgr; 337 tooling::Replacements Replaces; 338 const FormatStyle &Style; 339 bool UseCRLF; 340 }; 341 342 } // namespace format 343 } // namespace clang 344 345 #endif 346