1 //===--- TextDiagnostic.cpp - Text Diagnostic Pretty-Printing -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Frontend/TextDiagnostic.h" 10 #include "clang/Basic/CharInfo.h" 11 #include "clang/Basic/DiagnosticOptions.h" 12 #include "clang/Basic/FileManager.h" 13 #include "clang/Basic/SourceManager.h" 14 #include "clang/Lex/Lexer.h" 15 #include "clang/Lex/Preprocessor.h" 16 #include "llvm/ADT/SmallString.h" 17 #include "llvm/ADT/StringExtras.h" 18 #include "llvm/Support/ConvertUTF.h" 19 #include "llvm/Support/ErrorHandling.h" 20 #include "llvm/Support/Locale.h" 21 #include "llvm/Support/Path.h" 22 #include "llvm/Support/raw_ostream.h" 23 #include <algorithm> 24 #include <optional> 25 26 using namespace clang; 27 28 static const enum raw_ostream::Colors noteColor = raw_ostream::CYAN; 29 static const enum raw_ostream::Colors remarkColor = 30 raw_ostream::BLUE; 31 static const enum raw_ostream::Colors fixitColor = 32 raw_ostream::GREEN; 33 static const enum raw_ostream::Colors caretColor = 34 raw_ostream::GREEN; 35 static const enum raw_ostream::Colors warningColor = 36 raw_ostream::MAGENTA; 37 static const enum raw_ostream::Colors templateColor = 38 raw_ostream::CYAN; 39 static const enum raw_ostream::Colors errorColor = raw_ostream::RED; 40 static const enum raw_ostream::Colors fatalColor = raw_ostream::RED; 41 // Used for changing only the bold attribute. 42 static const enum raw_ostream::Colors savedColor = 43 raw_ostream::SAVEDCOLOR; 44 45 // Magenta is taken for 'warning'. Red is already 'error' and 'cyan' 46 // is already taken for 'note'. Green is already used to underline 47 // source ranges. White and black are bad because of the usual 48 // terminal backgrounds. Which leaves us only with TWO options. 49 static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW; 50 static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; 51 static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; 52 53 /// Add highlights to differences in template strings. 54 static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str, 55 bool &Normal, bool Bold) { 56 while (true) { 57 size_t Pos = Str.find(ToggleHighlight); 58 OS << Str.slice(0, Pos); 59 if (Pos == StringRef::npos) 60 break; 61 62 Str = Str.substr(Pos + 1); 63 if (Normal) 64 OS.changeColor(templateColor, true); 65 else { 66 OS.resetColor(); 67 if (Bold) 68 OS.changeColor(savedColor, true); 69 } 70 Normal = !Normal; 71 } 72 } 73 74 /// Number of spaces to indent when word-wrapping. 75 const unsigned WordWrapIndentation = 6; 76 77 static int bytesSincePreviousTabOrLineBegin(StringRef SourceLine, size_t i) { 78 int bytes = 0; 79 while (0<i) { 80 if (SourceLine[--i]=='\t') 81 break; 82 ++bytes; 83 } 84 return bytes; 85 } 86 87 /// returns a printable representation of first item from input range 88 /// 89 /// This function returns a printable representation of the next item in a line 90 /// of source. If the next byte begins a valid and printable character, that 91 /// character is returned along with 'true'. 92 /// 93 /// Otherwise, if the next byte begins a valid, but unprintable character, a 94 /// printable, escaped representation of the character is returned, along with 95 /// 'false'. Otherwise a printable, escaped representation of the next byte 96 /// is returned along with 'false'. 97 /// 98 /// \note The index is updated to be used with a subsequent call to 99 /// printableTextForNextCharacter. 100 /// 101 /// \param SourceLine The line of source 102 /// \param I Pointer to byte index, 103 /// \param TabStop used to expand tabs 104 /// \return pair(printable text, 'true' iff original text was printable) 105 /// 106 static std::pair<SmallString<16>, bool> 107 printableTextForNextCharacter(StringRef SourceLine, size_t *I, 108 unsigned TabStop) { 109 assert(I && "I must not be null"); 110 assert(*I < SourceLine.size() && "must point to a valid index"); 111 112 if (SourceLine[*I] == '\t') { 113 assert(0 < TabStop && TabStop <= DiagnosticOptions::MaxTabStop && 114 "Invalid -ftabstop value"); 115 unsigned Col = bytesSincePreviousTabOrLineBegin(SourceLine, *I); 116 unsigned NumSpaces = TabStop - (Col % TabStop); 117 assert(0 < NumSpaces && NumSpaces <= TabStop 118 && "Invalid computation of space amt"); 119 ++(*I); 120 121 SmallString<16> ExpandedTab; 122 ExpandedTab.assign(NumSpaces, ' '); 123 return std::make_pair(ExpandedTab, true); 124 } 125 126 const unsigned char *Begin = SourceLine.bytes_begin() + *I; 127 128 // Fast path for the common ASCII case. 129 if (*Begin < 0x80 && llvm::sys::locale::isPrint(*Begin)) { 130 ++(*I); 131 return std::make_pair(SmallString<16>(Begin, Begin + 1), true); 132 } 133 unsigned CharSize = llvm::getNumBytesForUTF8(*Begin); 134 const unsigned char *End = Begin + CharSize; 135 136 // Convert it to UTF32 and check if it's printable. 137 if (End <= SourceLine.bytes_end() && llvm::isLegalUTF8Sequence(Begin, End)) { 138 llvm::UTF32 C; 139 llvm::UTF32 *CPtr = &C; 140 141 // Begin and end before conversion. 142 unsigned char const *OriginalBegin = Begin; 143 llvm::ConversionResult Res = llvm::ConvertUTF8toUTF32( 144 &Begin, End, &CPtr, CPtr + 1, llvm::strictConversion); 145 (void)Res; 146 assert(Res == llvm::conversionOK); 147 assert(OriginalBegin < Begin); 148 assert(unsigned(Begin - OriginalBegin) == CharSize); 149 150 (*I) += (Begin - OriginalBegin); 151 152 // Valid, multi-byte, printable UTF8 character. 153 if (llvm::sys::locale::isPrint(C)) 154 return std::make_pair(SmallString<16>(OriginalBegin, End), true); 155 156 // Valid but not printable. 157 SmallString<16> Str("<U+>"); 158 while (C) { 159 Str.insert(Str.begin() + 3, llvm::hexdigit(C % 16)); 160 C /= 16; 161 } 162 while (Str.size() < 8) 163 Str.insert(Str.begin() + 3, llvm::hexdigit(0)); 164 return std::make_pair(Str, false); 165 } 166 167 // Otherwise, not printable since it's not valid UTF8. 168 SmallString<16> ExpandedByte("<XX>"); 169 unsigned char Byte = SourceLine[*I]; 170 ExpandedByte[1] = llvm::hexdigit(Byte / 16); 171 ExpandedByte[2] = llvm::hexdigit(Byte % 16); 172 ++(*I); 173 return std::make_pair(ExpandedByte, false); 174 } 175 176 static void expandTabs(std::string &SourceLine, unsigned TabStop) { 177 size_t I = SourceLine.size(); 178 while (I > 0) { 179 I--; 180 if (SourceLine[I] != '\t') 181 continue; 182 size_t TmpI = I; 183 auto [Str, Printable] = 184 printableTextForNextCharacter(SourceLine, &TmpI, TabStop); 185 SourceLine.replace(I, 1, Str.c_str()); 186 } 187 } 188 189 /// \p BytesOut: 190 /// A mapping from columns to the byte of the source line that produced the 191 /// character displaying at that column. This is the inverse of \p ColumnsOut. 192 /// 193 /// The last element in the array is the number of bytes in the source string. 194 /// 195 /// example: (given a tabstop of 8) 196 /// 197 /// "a \t \u3042" -> {0,1,2,-1,-1,-1,-1,-1,3,4,-1,7} 198 /// 199 /// (\\u3042 is represented in UTF-8 by three bytes and takes two columns to 200 /// display) 201 /// 202 /// \p ColumnsOut: 203 /// A mapping from the bytes 204 /// of the printable representation of the line to the columns those printable 205 /// characters will appear at (numbering the first column as 0). 206 /// 207 /// If a byte 'i' corresponds to multiple columns (e.g. the byte contains a tab 208 /// character) then the array will map that byte to the first column the 209 /// tab appears at and the next value in the map will have been incremented 210 /// more than once. 211 /// 212 /// If a byte is the first in a sequence of bytes that together map to a single 213 /// entity in the output, then the array will map that byte to the appropriate 214 /// column while the subsequent bytes will be -1. 215 /// 216 /// The last element in the array does not correspond to any byte in the input 217 /// and instead is the number of columns needed to display the source 218 /// 219 /// example: (given a tabstop of 8) 220 /// 221 /// "a \t \u3042" -> {0,1,2,8,9,-1,-1,11} 222 /// 223 /// (\\u3042 is represented in UTF-8 by three bytes and takes two columns to 224 /// display) 225 static void genColumnByteMapping(StringRef SourceLine, unsigned TabStop, 226 SmallVectorImpl<int> &BytesOut, 227 SmallVectorImpl<int> &ColumnsOut) { 228 assert(BytesOut.empty()); 229 assert(ColumnsOut.empty()); 230 231 if (SourceLine.empty()) { 232 BytesOut.resize(1u, 0); 233 ColumnsOut.resize(1u, 0); 234 return; 235 } 236 237 ColumnsOut.resize(SourceLine.size() + 1, -1); 238 239 int Columns = 0; 240 size_t I = 0; 241 while (I < SourceLine.size()) { 242 ColumnsOut[I] = Columns; 243 BytesOut.resize(Columns + 1, -1); 244 BytesOut.back() = I; 245 auto [Str, Printable] = 246 printableTextForNextCharacter(SourceLine, &I, TabStop); 247 Columns += llvm::sys::locale::columnWidth(Str); 248 } 249 250 ColumnsOut.back() = Columns; 251 BytesOut.resize(Columns + 1, -1); 252 BytesOut.back() = I; 253 } 254 255 namespace { 256 struct SourceColumnMap { 257 SourceColumnMap(StringRef SourceLine, unsigned TabStop) 258 : m_SourceLine(SourceLine) { 259 260 genColumnByteMapping(SourceLine, TabStop, m_columnToByte, m_byteToColumn); 261 262 assert(m_byteToColumn.size()==SourceLine.size()+1); 263 assert(0 < m_byteToColumn.size() && 0 < m_columnToByte.size()); 264 assert(m_byteToColumn.size() 265 == static_cast<unsigned>(m_columnToByte.back()+1)); 266 assert(static_cast<unsigned>(m_byteToColumn.back()+1) 267 == m_columnToByte.size()); 268 } 269 int columns() const { return m_byteToColumn.back(); } 270 int bytes() const { return m_columnToByte.back(); } 271 272 /// Map a byte to the column which it is at the start of, or return -1 273 /// if it is not at the start of a column (for a UTF-8 trailing byte). 274 int byteToColumn(int n) const { 275 assert(0<=n && n<static_cast<int>(m_byteToColumn.size())); 276 return m_byteToColumn[n]; 277 } 278 279 /// Map a byte to the first column which contains it. 280 int byteToContainingColumn(int N) const { 281 assert(0 <= N && N < static_cast<int>(m_byteToColumn.size())); 282 while (m_byteToColumn[N] == -1) 283 --N; 284 return m_byteToColumn[N]; 285 } 286 287 /// Map a column to the byte which starts the column, or return -1 if 288 /// the column the second or subsequent column of an expanded tab or similar 289 /// multi-column entity. 290 int columnToByte(int n) const { 291 assert(0<=n && n<static_cast<int>(m_columnToByte.size())); 292 return m_columnToByte[n]; 293 } 294 295 /// Map from a byte index to the next byte which starts a column. 296 int startOfNextColumn(int N) const { 297 assert(0 <= N && N < static_cast<int>(m_byteToColumn.size() - 1)); 298 while (byteToColumn(++N) == -1) {} 299 return N; 300 } 301 302 /// Map from a byte index to the previous byte which starts a column. 303 int startOfPreviousColumn(int N) const { 304 assert(0 < N && N < static_cast<int>(m_byteToColumn.size())); 305 while (byteToColumn(--N) == -1) {} 306 return N; 307 } 308 309 StringRef getSourceLine() const { 310 return m_SourceLine; 311 } 312 313 private: 314 const std::string m_SourceLine; 315 SmallVector<int,200> m_byteToColumn; 316 SmallVector<int,200> m_columnToByte; 317 }; 318 } // end anonymous namespace 319 320 /// When the source code line we want to print is too long for 321 /// the terminal, select the "interesting" region. 322 static void selectInterestingSourceRegion(std::string &SourceLine, 323 std::string &CaretLine, 324 std::string &FixItInsertionLine, 325 unsigned Columns, 326 const SourceColumnMap &map) { 327 unsigned CaretColumns = CaretLine.size(); 328 unsigned FixItColumns = llvm::sys::locale::columnWidth(FixItInsertionLine); 329 unsigned MaxColumns = std::max(static_cast<unsigned>(map.columns()), 330 std::max(CaretColumns, FixItColumns)); 331 // if the number of columns is less than the desired number we're done 332 if (MaxColumns <= Columns) 333 return; 334 335 // No special characters are allowed in CaretLine. 336 assert(llvm::none_of(CaretLine, [](char c) { return c < ' ' || '~' < c; })); 337 338 // Find the slice that we need to display the full caret line 339 // correctly. 340 unsigned CaretStart = 0, CaretEnd = CaretLine.size(); 341 for (; CaretStart != CaretEnd; ++CaretStart) 342 if (!isWhitespace(CaretLine[CaretStart])) 343 break; 344 345 for (; CaretEnd != CaretStart; --CaretEnd) 346 if (!isWhitespace(CaretLine[CaretEnd - 1])) 347 break; 348 349 // caret has already been inserted into CaretLine so the above whitespace 350 // check is guaranteed to include the caret 351 352 // If we have a fix-it line, make sure the slice includes all of the 353 // fix-it information. 354 if (!FixItInsertionLine.empty()) { 355 unsigned FixItStart = 0, FixItEnd = FixItInsertionLine.size(); 356 for (; FixItStart != FixItEnd; ++FixItStart) 357 if (!isWhitespace(FixItInsertionLine[FixItStart])) 358 break; 359 360 for (; FixItEnd != FixItStart; --FixItEnd) 361 if (!isWhitespace(FixItInsertionLine[FixItEnd - 1])) 362 break; 363 364 // We can safely use the byte offset FixItStart as the column offset 365 // because the characters up until FixItStart are all ASCII whitespace 366 // characters. 367 unsigned FixItStartCol = FixItStart; 368 unsigned FixItEndCol 369 = llvm::sys::locale::columnWidth(FixItInsertionLine.substr(0, FixItEnd)); 370 371 CaretStart = std::min(FixItStartCol, CaretStart); 372 CaretEnd = std::max(FixItEndCol, CaretEnd); 373 } 374 375 // CaretEnd may have been set at the middle of a character 376 // If it's not at a character's first column then advance it past the current 377 // character. 378 while (static_cast<int>(CaretEnd) < map.columns() && 379 -1 == map.columnToByte(CaretEnd)) 380 ++CaretEnd; 381 382 assert((static_cast<int>(CaretStart) > map.columns() || 383 -1!=map.columnToByte(CaretStart)) && 384 "CaretStart must not point to a column in the middle of a source" 385 " line character"); 386 assert((static_cast<int>(CaretEnd) > map.columns() || 387 -1!=map.columnToByte(CaretEnd)) && 388 "CaretEnd must not point to a column in the middle of a source line" 389 " character"); 390 391 // CaretLine[CaretStart, CaretEnd) contains all of the interesting 392 // parts of the caret line. While this slice is smaller than the 393 // number of columns we have, try to grow the slice to encompass 394 // more context. 395 396 unsigned SourceStart = map.columnToByte(std::min<unsigned>(CaretStart, 397 map.columns())); 398 unsigned SourceEnd = map.columnToByte(std::min<unsigned>(CaretEnd, 399 map.columns())); 400 401 unsigned CaretColumnsOutsideSource = CaretEnd-CaretStart 402 - (map.byteToColumn(SourceEnd)-map.byteToColumn(SourceStart)); 403 404 char const *front_ellipse = " ..."; 405 char const *front_space = " "; 406 char const *back_ellipse = "..."; 407 unsigned ellipses_space = strlen(front_ellipse) + strlen(back_ellipse); 408 409 unsigned TargetColumns = Columns; 410 // Give us extra room for the ellipses 411 // and any of the caret line that extends past the source 412 if (TargetColumns > ellipses_space+CaretColumnsOutsideSource) 413 TargetColumns -= ellipses_space+CaretColumnsOutsideSource; 414 415 while (SourceStart>0 || SourceEnd<SourceLine.size()) { 416 bool ExpandedRegion = false; 417 418 if (SourceStart>0) { 419 unsigned NewStart = map.startOfPreviousColumn(SourceStart); 420 421 // Skip over any whitespace we see here; we're looking for 422 // another bit of interesting text. 423 // FIXME: Detect non-ASCII whitespace characters too. 424 while (NewStart && isWhitespace(SourceLine[NewStart])) 425 NewStart = map.startOfPreviousColumn(NewStart); 426 427 // Skip over this bit of "interesting" text. 428 while (NewStart) { 429 unsigned Prev = map.startOfPreviousColumn(NewStart); 430 if (isWhitespace(SourceLine[Prev])) 431 break; 432 NewStart = Prev; 433 } 434 435 assert(map.byteToColumn(NewStart) != -1); 436 unsigned NewColumns = map.byteToColumn(SourceEnd) - 437 map.byteToColumn(NewStart); 438 if (NewColumns <= TargetColumns) { 439 SourceStart = NewStart; 440 ExpandedRegion = true; 441 } 442 } 443 444 if (SourceEnd<SourceLine.size()) { 445 unsigned NewEnd = map.startOfNextColumn(SourceEnd); 446 447 // Skip over any whitespace we see here; we're looking for 448 // another bit of interesting text. 449 // FIXME: Detect non-ASCII whitespace characters too. 450 while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd])) 451 NewEnd = map.startOfNextColumn(NewEnd); 452 453 // Skip over this bit of "interesting" text. 454 while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd])) 455 NewEnd = map.startOfNextColumn(NewEnd); 456 457 assert(map.byteToColumn(NewEnd) != -1); 458 unsigned NewColumns = map.byteToColumn(NewEnd) - 459 map.byteToColumn(SourceStart); 460 if (NewColumns <= TargetColumns) { 461 SourceEnd = NewEnd; 462 ExpandedRegion = true; 463 } 464 } 465 466 if (!ExpandedRegion) 467 break; 468 } 469 470 CaretStart = map.byteToColumn(SourceStart); 471 CaretEnd = map.byteToColumn(SourceEnd) + CaretColumnsOutsideSource; 472 473 // [CaretStart, CaretEnd) is the slice we want. Update the various 474 // output lines to show only this slice. 475 assert(CaretStart!=(unsigned)-1 && CaretEnd!=(unsigned)-1 && 476 SourceStart!=(unsigned)-1 && SourceEnd!=(unsigned)-1); 477 assert(SourceStart <= SourceEnd); 478 assert(CaretStart <= CaretEnd); 479 480 unsigned BackColumnsRemoved 481 = map.byteToColumn(SourceLine.size())-map.byteToColumn(SourceEnd); 482 unsigned FrontColumnsRemoved = CaretStart; 483 unsigned ColumnsKept = CaretEnd-CaretStart; 484 485 // We checked up front that the line needed truncation 486 assert(FrontColumnsRemoved+ColumnsKept+BackColumnsRemoved > Columns); 487 488 // The line needs some truncation, and we'd prefer to keep the front 489 // if possible, so remove the back 490 if (BackColumnsRemoved > strlen(back_ellipse)) 491 SourceLine.replace(SourceEnd, std::string::npos, back_ellipse); 492 493 // If that's enough then we're done 494 if (FrontColumnsRemoved+ColumnsKept <= Columns) 495 return; 496 497 // Otherwise remove the front as well 498 if (FrontColumnsRemoved > strlen(front_ellipse)) { 499 SourceLine.replace(0, SourceStart, front_ellipse); 500 CaretLine.replace(0, CaretStart, front_space); 501 if (!FixItInsertionLine.empty()) 502 FixItInsertionLine.replace(0, CaretStart, front_space); 503 } 504 } 505 506 /// Skip over whitespace in the string, starting at the given 507 /// index. 508 /// 509 /// \returns The index of the first non-whitespace character that is 510 /// greater than or equal to Idx or, if no such character exists, 511 /// returns the end of the string. 512 static unsigned skipWhitespace(unsigned Idx, StringRef Str, unsigned Length) { 513 while (Idx < Length && isWhitespace(Str[Idx])) 514 ++Idx; 515 return Idx; 516 } 517 518 /// If the given character is the start of some kind of 519 /// balanced punctuation (e.g., quotes or parentheses), return the 520 /// character that will terminate the punctuation. 521 /// 522 /// \returns The ending punctuation character, if any, or the NULL 523 /// character if the input character does not start any punctuation. 524 static inline char findMatchingPunctuation(char c) { 525 switch (c) { 526 case '\'': return '\''; 527 case '`': return '\''; 528 case '"': return '"'; 529 case '(': return ')'; 530 case '[': return ']'; 531 case '{': return '}'; 532 default: break; 533 } 534 535 return 0; 536 } 537 538 /// Find the end of the word starting at the given offset 539 /// within a string. 540 /// 541 /// \returns the index pointing one character past the end of the 542 /// word. 543 static unsigned findEndOfWord(unsigned Start, StringRef Str, 544 unsigned Length, unsigned Column, 545 unsigned Columns) { 546 assert(Start < Str.size() && "Invalid start position!"); 547 unsigned End = Start + 1; 548 549 // If we are already at the end of the string, take that as the word. 550 if (End == Str.size()) 551 return End; 552 553 // Determine if the start of the string is actually opening 554 // punctuation, e.g., a quote or parentheses. 555 char EndPunct = findMatchingPunctuation(Str[Start]); 556 if (!EndPunct) { 557 // This is a normal word. Just find the first space character. 558 while (End < Length && !isWhitespace(Str[End])) 559 ++End; 560 return End; 561 } 562 563 // We have the start of a balanced punctuation sequence (quotes, 564 // parentheses, etc.). Determine the full sequence is. 565 SmallString<16> PunctuationEndStack; 566 PunctuationEndStack.push_back(EndPunct); 567 while (End < Length && !PunctuationEndStack.empty()) { 568 if (Str[End] == PunctuationEndStack.back()) 569 PunctuationEndStack.pop_back(); 570 else if (char SubEndPunct = findMatchingPunctuation(Str[End])) 571 PunctuationEndStack.push_back(SubEndPunct); 572 573 ++End; 574 } 575 576 // Find the first space character after the punctuation ended. 577 while (End < Length && !isWhitespace(Str[End])) 578 ++End; 579 580 unsigned PunctWordLength = End - Start; 581 if (// If the word fits on this line 582 Column + PunctWordLength <= Columns || 583 // ... or the word is "short enough" to take up the next line 584 // without too much ugly white space 585 PunctWordLength < Columns/3) 586 return End; // Take the whole thing as a single "word". 587 588 // The whole quoted/parenthesized string is too long to print as a 589 // single "word". Instead, find the "word" that starts just after 590 // the punctuation and use that end-point instead. This will recurse 591 // until it finds something small enough to consider a word. 592 return findEndOfWord(Start + 1, Str, Length, Column + 1, Columns); 593 } 594 595 /// Print the given string to a stream, word-wrapping it to 596 /// some number of columns in the process. 597 /// 598 /// \param OS the stream to which the word-wrapping string will be 599 /// emitted. 600 /// \param Str the string to word-wrap and output. 601 /// \param Columns the number of columns to word-wrap to. 602 /// \param Column the column number at which the first character of \p 603 /// Str will be printed. This will be non-zero when part of the first 604 /// line has already been printed. 605 /// \param Bold if the current text should be bold 606 /// \returns true if word-wrapping was required, or false if the 607 /// string fit on the first line. 608 static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns, 609 unsigned Column, bool Bold) { 610 const unsigned Length = std::min(Str.find('\n'), Str.size()); 611 bool TextNormal = true; 612 613 bool Wrapped = false; 614 for (unsigned WordStart = 0, WordEnd; WordStart < Length; 615 WordStart = WordEnd) { 616 // Find the beginning of the next word. 617 WordStart = skipWhitespace(WordStart, Str, Length); 618 if (WordStart == Length) 619 break; 620 621 // Find the end of this word. 622 WordEnd = findEndOfWord(WordStart, Str, Length, Column, Columns); 623 624 // Does this word fit on the current line? 625 unsigned WordLength = WordEnd - WordStart; 626 if (Column + WordLength < Columns) { 627 // This word fits on the current line; print it there. 628 if (WordStart) { 629 OS << ' '; 630 Column += 1; 631 } 632 applyTemplateHighlighting(OS, Str.substr(WordStart, WordLength), 633 TextNormal, Bold); 634 Column += WordLength; 635 continue; 636 } 637 638 // This word does not fit on the current line, so wrap to the next 639 // line. 640 OS << '\n'; 641 OS.indent(WordWrapIndentation); 642 applyTemplateHighlighting(OS, Str.substr(WordStart, WordLength), 643 TextNormal, Bold); 644 Column = WordWrapIndentation + WordLength; 645 Wrapped = true; 646 } 647 648 // Append any remaning text from the message with its existing formatting. 649 applyTemplateHighlighting(OS, Str.substr(Length), TextNormal, Bold); 650 651 assert(TextNormal && "Text highlighted at end of diagnostic message."); 652 653 return Wrapped; 654 } 655 656 TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, 657 DiagnosticOptions *DiagOpts, 658 const Preprocessor *PP) 659 : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {} 660 661 TextDiagnostic::~TextDiagnostic() {} 662 663 void TextDiagnostic::emitDiagnosticMessage( 664 FullSourceLoc Loc, PresumedLoc PLoc, DiagnosticsEngine::Level Level, 665 StringRef Message, ArrayRef<clang::CharSourceRange> Ranges, 666 DiagOrStoredDiag D) { 667 uint64_t StartOfLocationInfo = OS.tell(); 668 669 // Emit the location of this particular diagnostic. 670 if (Loc.isValid()) 671 emitDiagnosticLoc(Loc, PLoc, Level, Ranges); 672 673 if (DiagOpts->ShowColors) 674 OS.resetColor(); 675 676 if (DiagOpts->ShowLevel) 677 printDiagnosticLevel(OS, Level, DiagOpts->ShowColors); 678 printDiagnosticMessage(OS, 679 /*IsSupplemental*/ Level == DiagnosticsEngine::Note, 680 Message, OS.tell() - StartOfLocationInfo, 681 DiagOpts->MessageLength, DiagOpts->ShowColors); 682 } 683 684 /*static*/ void 685 TextDiagnostic::printDiagnosticLevel(raw_ostream &OS, 686 DiagnosticsEngine::Level Level, 687 bool ShowColors) { 688 if (ShowColors) { 689 // Print diagnostic category in bold and color 690 switch (Level) { 691 case DiagnosticsEngine::Ignored: 692 llvm_unreachable("Invalid diagnostic type"); 693 case DiagnosticsEngine::Note: OS.changeColor(noteColor, true); break; 694 case DiagnosticsEngine::Remark: OS.changeColor(remarkColor, true); break; 695 case DiagnosticsEngine::Warning: OS.changeColor(warningColor, true); break; 696 case DiagnosticsEngine::Error: OS.changeColor(errorColor, true); break; 697 case DiagnosticsEngine::Fatal: OS.changeColor(fatalColor, true); break; 698 } 699 } 700 701 switch (Level) { 702 case DiagnosticsEngine::Ignored: 703 llvm_unreachable("Invalid diagnostic type"); 704 case DiagnosticsEngine::Note: OS << "note: "; break; 705 case DiagnosticsEngine::Remark: OS << "remark: "; break; 706 case DiagnosticsEngine::Warning: OS << "warning: "; break; 707 case DiagnosticsEngine::Error: OS << "error: "; break; 708 case DiagnosticsEngine::Fatal: OS << "fatal error: "; break; 709 } 710 711 if (ShowColors) 712 OS.resetColor(); 713 } 714 715 /*static*/ 716 void TextDiagnostic::printDiagnosticMessage(raw_ostream &OS, 717 bool IsSupplemental, 718 StringRef Message, 719 unsigned CurrentColumn, 720 unsigned Columns, bool ShowColors) { 721 bool Bold = false; 722 if (ShowColors && !IsSupplemental) { 723 // Print primary diagnostic messages in bold and without color, to visually 724 // indicate the transition from continuation notes and other output. 725 OS.changeColor(savedColor, true); 726 Bold = true; 727 } 728 729 if (Columns) 730 printWordWrapped(OS, Message, Columns, CurrentColumn, Bold); 731 else { 732 bool Normal = true; 733 applyTemplateHighlighting(OS, Message, Normal, Bold); 734 assert(Normal && "Formatting should have returned to normal"); 735 } 736 737 if (ShowColors) 738 OS.resetColor(); 739 OS << '\n'; 740 } 741 742 void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) { 743 #ifdef _WIN32 744 SmallString<4096> TmpFilename; 745 #endif 746 if (DiagOpts->AbsolutePath) { 747 auto File = SM.getFileManager().getOptionalFileRef(Filename); 748 if (File) { 749 // We want to print a simplified absolute path, i. e. without "dots". 750 // 751 // The hardest part here are the paths like "<part1>/<link>/../<part2>". 752 // On Unix-like systems, we cannot just collapse "<link>/..", because 753 // paths are resolved sequentially, and, thereby, the path 754 // "<part1>/<part2>" may point to a different location. That is why 755 // we use FileManager::getCanonicalName(), which expands all indirections 756 // with llvm::sys::fs::real_path() and caches the result. 757 // 758 // On the other hand, it would be better to preserve as much of the 759 // original path as possible, because that helps a user to recognize it. 760 // real_path() expands all links, which sometimes too much. Luckily, 761 // on Windows we can just use llvm::sys::path::remove_dots(), because, 762 // on that system, both aforementioned paths point to the same place. 763 #ifdef _WIN32 764 TmpFilename = File->getName(); 765 llvm::sys::fs::make_absolute(TmpFilename); 766 llvm::sys::path::native(TmpFilename); 767 llvm::sys::path::remove_dots(TmpFilename, /* remove_dot_dot */ true); 768 Filename = StringRef(TmpFilename.data(), TmpFilename.size()); 769 #else 770 Filename = SM.getFileManager().getCanonicalName(*File); 771 #endif 772 } 773 } 774 775 OS << Filename; 776 } 777 778 /// Print out the file/line/column information and include trace. 779 /// 780 /// This method handles the emission of the diagnostic location information. 781 /// This includes extracting as much location information as is present for 782 /// the diagnostic and printing it, as well as any include stack or source 783 /// ranges necessary. 784 void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc, 785 DiagnosticsEngine::Level Level, 786 ArrayRef<CharSourceRange> Ranges) { 787 if (PLoc.isInvalid()) { 788 // At least print the file name if available: 789 if (FileID FID = Loc.getFileID(); FID.isValid()) { 790 if (OptionalFileEntryRef FE = Loc.getFileEntryRef()) { 791 emitFilename(FE->getName(), Loc.getManager()); 792 OS << ": "; 793 } 794 } 795 return; 796 } 797 unsigned LineNo = PLoc.getLine(); 798 799 if (!DiagOpts->ShowLocation) 800 return; 801 802 if (DiagOpts->ShowColors) 803 OS.changeColor(savedColor, true); 804 805 emitFilename(PLoc.getFilename(), Loc.getManager()); 806 switch (DiagOpts->getFormat()) { 807 case DiagnosticOptions::SARIF: 808 case DiagnosticOptions::Clang: 809 if (DiagOpts->ShowLine) 810 OS << ':' << LineNo; 811 break; 812 case DiagnosticOptions::MSVC: OS << '(' << LineNo; break; 813 case DiagnosticOptions::Vi: OS << " +" << LineNo; break; 814 } 815 816 if (DiagOpts->ShowColumn) 817 // Compute the column number. 818 if (unsigned ColNo = PLoc.getColumn()) { 819 if (DiagOpts->getFormat() == DiagnosticOptions::MSVC) { 820 OS << ','; 821 // Visual Studio 2010 or earlier expects column number to be off by one 822 if (LangOpts.MSCompatibilityVersion && 823 !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2012)) 824 ColNo--; 825 } else 826 OS << ':'; 827 OS << ColNo; 828 } 829 switch (DiagOpts->getFormat()) { 830 case DiagnosticOptions::SARIF: 831 case DiagnosticOptions::Clang: 832 case DiagnosticOptions::Vi: OS << ':'; break; 833 case DiagnosticOptions::MSVC: 834 // MSVC2013 and before print 'file(4) : error'. MSVC2015 gets rid of the 835 // space and prints 'file(4): error'. 836 OS << ')'; 837 if (LangOpts.MSCompatibilityVersion && 838 !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015)) 839 OS << ' '; 840 OS << ':'; 841 break; 842 } 843 844 if (DiagOpts->ShowSourceRanges && !Ranges.empty()) { 845 FileID CaretFileID = Loc.getExpansionLoc().getFileID(); 846 bool PrintedRange = false; 847 const SourceManager &SM = Loc.getManager(); 848 849 for (const auto &R : Ranges) { 850 // Ignore invalid ranges. 851 if (!R.isValid()) 852 continue; 853 854 SourceLocation B = SM.getExpansionLoc(R.getBegin()); 855 CharSourceRange ERange = SM.getExpansionRange(R.getEnd()); 856 SourceLocation E = ERange.getEnd(); 857 858 // If the start or end of the range is in another file, just 859 // discard it. 860 if (SM.getFileID(B) != CaretFileID || SM.getFileID(E) != CaretFileID) 861 continue; 862 863 // Add in the length of the token, so that we cover multi-char 864 // tokens. 865 unsigned TokSize = 0; 866 if (ERange.isTokenRange()) 867 TokSize = Lexer::MeasureTokenLength(E, SM, LangOpts); 868 869 FullSourceLoc BF(B, SM), EF(E, SM); 870 OS << '{' 871 << BF.getLineNumber() << ':' << BF.getColumnNumber() << '-' 872 << EF.getLineNumber() << ':' << (EF.getColumnNumber() + TokSize) 873 << '}'; 874 PrintedRange = true; 875 } 876 877 if (PrintedRange) 878 OS << ':'; 879 } 880 OS << ' '; 881 } 882 883 void TextDiagnostic::emitIncludeLocation(FullSourceLoc Loc, PresumedLoc PLoc) { 884 if (DiagOpts->ShowLocation && PLoc.isValid()) { 885 OS << "In file included from "; 886 emitFilename(PLoc.getFilename(), Loc.getManager()); 887 OS << ':' << PLoc.getLine() << ":\n"; 888 } else 889 OS << "In included file:\n"; 890 } 891 892 void TextDiagnostic::emitImportLocation(FullSourceLoc Loc, PresumedLoc PLoc, 893 StringRef ModuleName) { 894 if (DiagOpts->ShowLocation && PLoc.isValid()) 895 OS << "In module '" << ModuleName << "' imported from " 896 << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n"; 897 else 898 OS << "In module '" << ModuleName << "':\n"; 899 } 900 901 void TextDiagnostic::emitBuildingModuleLocation(FullSourceLoc Loc, 902 PresumedLoc PLoc, 903 StringRef ModuleName) { 904 if (DiagOpts->ShowLocation && PLoc.isValid()) 905 OS << "While building module '" << ModuleName << "' imported from " 906 << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n"; 907 else 908 OS << "While building module '" << ModuleName << "':\n"; 909 } 910 911 /// Find the suitable set of lines to show to include a set of ranges. 912 static std::optional<std::pair<unsigned, unsigned>> 913 findLinesForRange(const CharSourceRange &R, FileID FID, 914 const SourceManager &SM) { 915 if (!R.isValid()) 916 return std::nullopt; 917 918 SourceLocation Begin = R.getBegin(); 919 SourceLocation End = R.getEnd(); 920 if (SM.getFileID(Begin) != FID || SM.getFileID(End) != FID) 921 return std::nullopt; 922 923 return std::make_pair(SM.getExpansionLineNumber(Begin), 924 SM.getExpansionLineNumber(End)); 925 } 926 927 /// Add as much of range B into range A as possible without exceeding a maximum 928 /// size of MaxRange. Ranges are inclusive. 929 static std::pair<unsigned, unsigned> 930 maybeAddRange(std::pair<unsigned, unsigned> A, std::pair<unsigned, unsigned> B, 931 unsigned MaxRange) { 932 // If A is already the maximum size, we're done. 933 unsigned Slack = MaxRange - (A.second - A.first + 1); 934 if (Slack == 0) 935 return A; 936 937 // Easy case: merge succeeds within MaxRange. 938 unsigned Min = std::min(A.first, B.first); 939 unsigned Max = std::max(A.second, B.second); 940 if (Max - Min + 1 <= MaxRange) 941 return {Min, Max}; 942 943 // If we can't reach B from A within MaxRange, there's nothing to do. 944 // Don't add lines to the range that contain nothing interesting. 945 if ((B.first > A.first && B.first - A.first + 1 > MaxRange) || 946 (B.second < A.second && A.second - B.second + 1 > MaxRange)) 947 return A; 948 949 // Otherwise, expand A towards B to produce a range of size MaxRange. We 950 // attempt to expand by the same amount in both directions if B strictly 951 // contains A. 952 953 // Expand downwards by up to half the available amount, then upwards as 954 // much as possible, then downwards as much as possible. 955 A.second = std::min(A.second + (Slack + 1) / 2, Max); 956 Slack = MaxRange - (A.second - A.first + 1); 957 A.first = std::max(Min + Slack, A.first) - Slack; 958 A.second = std::min(A.first + MaxRange - 1, Max); 959 return A; 960 } 961 962 struct LineRange { 963 unsigned LineNo; 964 unsigned StartCol; 965 unsigned EndCol; 966 }; 967 968 /// Highlight \p R (with ~'s) on the current source line. 969 static void highlightRange(const LineRange &R, const SourceColumnMap &Map, 970 std::string &CaretLine) { 971 // Pick the first non-whitespace column. 972 unsigned StartColNo = R.StartCol; 973 while (StartColNo < Map.getSourceLine().size() && 974 (Map.getSourceLine()[StartColNo] == ' ' || 975 Map.getSourceLine()[StartColNo] == '\t')) 976 StartColNo = Map.startOfNextColumn(StartColNo); 977 978 // Pick the last non-whitespace column. 979 unsigned EndColNo = 980 std::min(static_cast<size_t>(R.EndCol), Map.getSourceLine().size()); 981 while (EndColNo && (Map.getSourceLine()[EndColNo - 1] == ' ' || 982 Map.getSourceLine()[EndColNo - 1] == '\t')) 983 EndColNo = Map.startOfPreviousColumn(EndColNo); 984 985 // If the start/end passed each other, then we are trying to highlight a 986 // range that just exists in whitespace. That most likely means we have 987 // a multi-line highlighting range that covers a blank line. 988 if (StartColNo > EndColNo) 989 return; 990 991 // Fill the range with ~'s. 992 StartColNo = Map.byteToContainingColumn(StartColNo); 993 EndColNo = Map.byteToContainingColumn(EndColNo); 994 995 assert(StartColNo <= EndColNo && "Invalid range!"); 996 if (CaretLine.size() < EndColNo) 997 CaretLine.resize(EndColNo, ' '); 998 std::fill(CaretLine.begin() + StartColNo, CaretLine.begin() + EndColNo, '~'); 999 } 1000 1001 static std::string buildFixItInsertionLine(FileID FID, 1002 unsigned LineNo, 1003 const SourceColumnMap &map, 1004 ArrayRef<FixItHint> Hints, 1005 const SourceManager &SM, 1006 const DiagnosticOptions *DiagOpts) { 1007 std::string FixItInsertionLine; 1008 if (Hints.empty() || !DiagOpts->ShowFixits) 1009 return FixItInsertionLine; 1010 unsigned PrevHintEndCol = 0; 1011 1012 for (const auto &H : Hints) { 1013 if (H.CodeToInsert.empty()) 1014 continue; 1015 1016 // We have an insertion hint. Determine whether the inserted 1017 // code contains no newlines and is on the same line as the caret. 1018 std::pair<FileID, unsigned> HintLocInfo = 1019 SM.getDecomposedExpansionLoc(H.RemoveRange.getBegin()); 1020 if (FID == HintLocInfo.first && 1021 LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second) && 1022 StringRef(H.CodeToInsert).find_first_of("\n\r") == StringRef::npos) { 1023 // Insert the new code into the line just below the code 1024 // that the user wrote. 1025 // Note: When modifying this function, be very careful about what is a 1026 // "column" (printed width, platform-dependent) and what is a 1027 // "byte offset" (SourceManager "column"). 1028 unsigned HintByteOffset = 1029 SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second) - 1; 1030 1031 // The hint must start inside the source or right at the end 1032 assert(HintByteOffset < static_cast<unsigned>(map.bytes()) + 1); 1033 unsigned HintCol = map.byteToContainingColumn(HintByteOffset); 1034 1035 // If we inserted a long previous hint, push this one forwards, and add 1036 // an extra space to show that this is not part of the previous 1037 // completion. This is sort of the best we can do when two hints appear 1038 // to overlap. 1039 // 1040 // Note that if this hint is located immediately after the previous 1041 // hint, no space will be added, since the location is more important. 1042 if (HintCol < PrevHintEndCol) 1043 HintCol = PrevHintEndCol + 1; 1044 1045 // This should NOT use HintByteOffset, because the source might have 1046 // Unicode characters in earlier columns. 1047 unsigned NewFixItLineSize = FixItInsertionLine.size() + 1048 (HintCol - PrevHintEndCol) + 1049 H.CodeToInsert.size(); 1050 if (NewFixItLineSize > FixItInsertionLine.size()) 1051 FixItInsertionLine.resize(NewFixItLineSize, ' '); 1052 1053 std::copy(H.CodeToInsert.begin(), H.CodeToInsert.end(), 1054 FixItInsertionLine.end() - H.CodeToInsert.size()); 1055 1056 PrevHintEndCol = HintCol + llvm::sys::locale::columnWidth(H.CodeToInsert); 1057 } 1058 } 1059 1060 expandTabs(FixItInsertionLine, DiagOpts->TabStop); 1061 1062 return FixItInsertionLine; 1063 } 1064 1065 static unsigned getNumDisplayWidth(unsigned N) { 1066 unsigned L = 1u, M = 10u; 1067 while (M <= N && ++L != std::numeric_limits<unsigned>::digits10 + 1) 1068 M *= 10u; 1069 1070 return L; 1071 } 1072 1073 /// Filter out invalid ranges, ranges that don't fit into the window of 1074 /// source lines we will print, and ranges from other files. 1075 /// 1076 /// For the remaining ranges, convert them to simple LineRange structs, 1077 /// which only cover one line at a time. 1078 static SmallVector<LineRange> 1079 prepareAndFilterRanges(const SmallVectorImpl<CharSourceRange> &Ranges, 1080 const SourceManager &SM, 1081 const std::pair<unsigned, unsigned> &Lines, FileID FID, 1082 const LangOptions &LangOpts) { 1083 SmallVector<LineRange> LineRanges; 1084 1085 for (const CharSourceRange &R : Ranges) { 1086 if (R.isInvalid()) 1087 continue; 1088 SourceLocation Begin = R.getBegin(); 1089 SourceLocation End = R.getEnd(); 1090 1091 unsigned StartLineNo = SM.getExpansionLineNumber(Begin); 1092 if (StartLineNo > Lines.second || SM.getFileID(Begin) != FID) 1093 continue; 1094 1095 unsigned EndLineNo = SM.getExpansionLineNumber(End); 1096 if (EndLineNo < Lines.first || SM.getFileID(End) != FID) 1097 continue; 1098 1099 unsigned StartColumn = SM.getExpansionColumnNumber(Begin); 1100 unsigned EndColumn = SM.getExpansionColumnNumber(End); 1101 if (R.isTokenRange()) 1102 EndColumn += Lexer::MeasureTokenLength(End, SM, LangOpts); 1103 1104 // Only a single line. 1105 if (StartLineNo == EndLineNo) { 1106 LineRanges.push_back({StartLineNo, StartColumn - 1, EndColumn - 1}); 1107 continue; 1108 } 1109 1110 // Start line. 1111 LineRanges.push_back({StartLineNo, StartColumn - 1, ~0u}); 1112 1113 // Middle lines. 1114 for (unsigned S = StartLineNo + 1; S != EndLineNo; ++S) 1115 LineRanges.push_back({S, 0, ~0u}); 1116 1117 // End line. 1118 LineRanges.push_back({EndLineNo, 0, EndColumn - 1}); 1119 } 1120 1121 return LineRanges; 1122 } 1123 1124 /// Creates syntax highlighting information in form of StyleRanges. 1125 /// 1126 /// The returned unique ptr has always exactly size 1127 /// (\p EndLineNumber - \p StartLineNumber + 1). Each SmallVector in there 1128 /// corresponds to syntax highlighting information in one line. In each line, 1129 /// the StyleRanges are non-overlapping and sorted from start to end of the 1130 /// line. 1131 static std::unique_ptr<llvm::SmallVector<TextDiagnostic::StyleRange>[]> 1132 highlightLines(StringRef FileData, unsigned StartLineNumber, 1133 unsigned EndLineNumber, const Preprocessor *PP, 1134 const LangOptions &LangOpts, bool ShowColors, FileID FID, 1135 const SourceManager &SM) { 1136 assert(StartLineNumber <= EndLineNumber); 1137 auto SnippetRanges = 1138 std::make_unique<SmallVector<TextDiagnostic::StyleRange>[]>( 1139 EndLineNumber - StartLineNumber + 1); 1140 1141 if (!PP || !ShowColors) 1142 return SnippetRanges; 1143 1144 // Might cause emission of another diagnostic. 1145 if (PP->getIdentifierTable().getExternalIdentifierLookup()) 1146 return SnippetRanges; 1147 1148 auto Buff = llvm::MemoryBuffer::getMemBuffer(FileData); 1149 Lexer L{FID, *Buff, SM, LangOpts}; 1150 L.SetKeepWhitespaceMode(true); 1151 1152 const char *FirstLineStart = 1153 FileData.data() + 1154 SM.getDecomposedLoc(SM.translateLineCol(FID, StartLineNumber, 1)).second; 1155 if (const char *CheckPoint = PP->getCheckPoint(FID, FirstLineStart)) { 1156 assert(CheckPoint >= Buff->getBufferStart() && 1157 CheckPoint <= Buff->getBufferEnd()); 1158 assert(CheckPoint <= FirstLineStart); 1159 size_t Offset = CheckPoint - Buff->getBufferStart(); 1160 L.seek(Offset, /*IsAtStartOfLine=*/false); 1161 } 1162 1163 // Classify the given token and append it to the given vector. 1164 auto appendStyle = 1165 [PP, &LangOpts](SmallVector<TextDiagnostic::StyleRange> &Vec, 1166 const Token &T, unsigned Start, unsigned Length) -> void { 1167 if (T.is(tok::raw_identifier)) { 1168 StringRef RawIdent = T.getRawIdentifier(); 1169 // Special case true/false/nullptr/... literals, since they will otherwise 1170 // be treated as keywords. 1171 // FIXME: It would be good to have a programmatic way of getting this 1172 // list. 1173 if (llvm::StringSwitch<bool>(RawIdent) 1174 .Case("true", true) 1175 .Case("false", true) 1176 .Case("nullptr", true) 1177 .Case("__func__", true) 1178 .Case("__objc_yes__", true) 1179 .Case("__objc_no__", true) 1180 .Case("__null", true) 1181 .Case("__FUNCDNAME__", true) 1182 .Case("__FUNCSIG__", true) 1183 .Case("__FUNCTION__", true) 1184 .Case("__FUNCSIG__", true) 1185 .Default(false)) { 1186 Vec.emplace_back(Start, Start + Length, LiteralColor); 1187 } else { 1188 const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent); 1189 assert(II); 1190 if (II->isKeyword(LangOpts)) 1191 Vec.emplace_back(Start, Start + Length, KeywordColor); 1192 } 1193 } else if (tok::isLiteral(T.getKind())) { 1194 Vec.emplace_back(Start, Start + Length, LiteralColor); 1195 } else { 1196 assert(T.is(tok::comment)); 1197 Vec.emplace_back(Start, Start + Length, CommentColor); 1198 } 1199 }; 1200 1201 bool Stop = false; 1202 while (!Stop) { 1203 Token T; 1204 Stop = L.LexFromRawLexer(T); 1205 if (T.is(tok::unknown)) 1206 continue; 1207 1208 // We are only interested in identifiers, literals and comments. 1209 if (!T.is(tok::raw_identifier) && !T.is(tok::comment) && 1210 !tok::isLiteral(T.getKind())) 1211 continue; 1212 1213 bool Invalid = false; 1214 unsigned TokenEndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid); 1215 if (Invalid || TokenEndLine < StartLineNumber) 1216 continue; 1217 1218 assert(TokenEndLine >= StartLineNumber); 1219 1220 unsigned TokenStartLine = 1221 SM.getSpellingLineNumber(T.getLocation(), &Invalid); 1222 if (Invalid) 1223 continue; 1224 // If this happens, we're done. 1225 if (TokenStartLine > EndLineNumber) 1226 break; 1227 1228 unsigned StartCol = 1229 SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; 1230 if (Invalid) 1231 continue; 1232 1233 // Simple tokens. 1234 if (TokenStartLine == TokenEndLine) { 1235 SmallVector<TextDiagnostic::StyleRange> &LineRanges = 1236 SnippetRanges[TokenStartLine - StartLineNumber]; 1237 appendStyle(LineRanges, T, StartCol, T.getLength()); 1238 continue; 1239 } 1240 assert((TokenEndLine - TokenStartLine) >= 1); 1241 1242 // For tokens that span multiple lines (think multiline comments), we 1243 // divide them into multiple StyleRanges. 1244 unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1; 1245 if (Invalid) 1246 continue; 1247 1248 std::string Spelling = Lexer::getSpelling(T, SM, LangOpts); 1249 1250 unsigned L = TokenStartLine; 1251 unsigned LineLength = 0; 1252 for (unsigned I = 0; I <= Spelling.size(); ++I) { 1253 // This line is done. 1254 if (I == Spelling.size() || isVerticalWhitespace(Spelling[I])) { 1255 SmallVector<TextDiagnostic::StyleRange> &LineRanges = 1256 SnippetRanges[L - StartLineNumber]; 1257 1258 if (L >= StartLineNumber) { 1259 if (L == TokenStartLine) // First line 1260 appendStyle(LineRanges, T, StartCol, LineLength); 1261 else if (L == TokenEndLine) // Last line 1262 appendStyle(LineRanges, T, 0, EndCol); 1263 else 1264 appendStyle(LineRanges, T, 0, LineLength); 1265 } 1266 1267 ++L; 1268 if (L > EndLineNumber) 1269 break; 1270 LineLength = 0; 1271 continue; 1272 } 1273 ++LineLength; 1274 } 1275 } 1276 1277 return SnippetRanges; 1278 } 1279 1280 /// Emit a code snippet and caret line. 1281 /// 1282 /// This routine emits a single line's code snippet and caret line.. 1283 /// 1284 /// \param Loc The location for the caret. 1285 /// \param Ranges The underlined ranges for this code snippet. 1286 /// \param Hints The FixIt hints active for this diagnostic. 1287 void TextDiagnostic::emitSnippetAndCaret( 1288 FullSourceLoc Loc, DiagnosticsEngine::Level Level, 1289 SmallVectorImpl<CharSourceRange> &Ranges, ArrayRef<FixItHint> Hints) { 1290 assert(Loc.isValid() && "must have a valid source location here"); 1291 assert(Loc.isFileID() && "must have a file location here"); 1292 1293 // If caret diagnostics are enabled and we have location, we want to 1294 // emit the caret. However, we only do this if the location moved 1295 // from the last diagnostic, if the last diagnostic was a note that 1296 // was part of a different warning or error diagnostic, or if the 1297 // diagnostic has ranges. We don't want to emit the same caret 1298 // multiple times if one loc has multiple diagnostics. 1299 if (!DiagOpts->ShowCarets) 1300 return; 1301 if (Loc == LastLoc && Ranges.empty() && Hints.empty() && 1302 (LastLevel != DiagnosticsEngine::Note || Level == LastLevel)) 1303 return; 1304 1305 FileID FID = Loc.getFileID(); 1306 const SourceManager &SM = Loc.getManager(); 1307 1308 // Get information about the buffer it points into. 1309 bool Invalid = false; 1310 StringRef BufData = Loc.getBufferData(&Invalid); 1311 if (Invalid) 1312 return; 1313 const char *BufStart = BufData.data(); 1314 const char *BufEnd = BufStart + BufData.size(); 1315 1316 unsigned CaretLineNo = Loc.getLineNumber(); 1317 unsigned CaretColNo = Loc.getColumnNumber(); 1318 1319 // Arbitrarily stop showing snippets when the line is too long. 1320 static const size_t MaxLineLengthToPrint = 4096; 1321 if (CaretColNo > MaxLineLengthToPrint) 1322 return; 1323 1324 // Find the set of lines to include. 1325 const unsigned MaxLines = DiagOpts->SnippetLineLimit; 1326 std::pair<unsigned, unsigned> Lines = {CaretLineNo, CaretLineNo}; 1327 unsigned DisplayLineNo = Loc.getPresumedLoc().getLine(); 1328 for (const auto &I : Ranges) { 1329 if (auto OptionalRange = findLinesForRange(I, FID, SM)) 1330 Lines = maybeAddRange(Lines, *OptionalRange, MaxLines); 1331 1332 DisplayLineNo = 1333 std::min(DisplayLineNo, SM.getPresumedLineNumber(I.getBegin())); 1334 } 1335 1336 // Our line numbers look like: 1337 // " [number] | " 1338 // Where [number] is MaxLineNoDisplayWidth columns 1339 // and the full thing is therefore MaxLineNoDisplayWidth + 4 columns. 1340 unsigned MaxLineNoDisplayWidth = 1341 DiagOpts->ShowLineNumbers 1342 ? std::max(4u, getNumDisplayWidth(DisplayLineNo + MaxLines)) 1343 : 0; 1344 auto indentForLineNumbers = [&] { 1345 if (MaxLineNoDisplayWidth > 0) 1346 OS.indent(MaxLineNoDisplayWidth + 2) << "| "; 1347 }; 1348 1349 // Prepare source highlighting information for the lines we're about to 1350 // emit, starting from the first line. 1351 std::unique_ptr<SmallVector<StyleRange>[]> SourceStyles = 1352 highlightLines(BufData, Lines.first, Lines.second, PP, LangOpts, 1353 DiagOpts->ShowColors, FID, SM); 1354 1355 SmallVector<LineRange> LineRanges = 1356 prepareAndFilterRanges(Ranges, SM, Lines, FID, LangOpts); 1357 1358 for (unsigned LineNo = Lines.first; LineNo != Lines.second + 1; 1359 ++LineNo, ++DisplayLineNo) { 1360 // Rewind from the current position to the start of the line. 1361 const char *LineStart = 1362 BufStart + 1363 SM.getDecomposedLoc(SM.translateLineCol(FID, LineNo, 1)).second; 1364 if (LineStart == BufEnd) 1365 break; 1366 1367 // Compute the line end. 1368 const char *LineEnd = LineStart; 1369 while (*LineEnd != '\n' && *LineEnd != '\r' && LineEnd != BufEnd) 1370 ++LineEnd; 1371 1372 // Arbitrarily stop showing snippets when the line is too long. 1373 // FIXME: Don't print any lines in this case. 1374 if (size_t(LineEnd - LineStart) > MaxLineLengthToPrint) 1375 return; 1376 1377 // Copy the line of code into an std::string for ease of manipulation. 1378 std::string SourceLine(LineStart, LineEnd); 1379 // Remove trailing null bytes. 1380 while (!SourceLine.empty() && SourceLine.back() == '\0' && 1381 (LineNo != CaretLineNo || SourceLine.size() > CaretColNo)) 1382 SourceLine.pop_back(); 1383 1384 // Build the byte to column map. 1385 const SourceColumnMap sourceColMap(SourceLine, DiagOpts->TabStop); 1386 1387 std::string CaretLine; 1388 // Highlight all of the characters covered by Ranges with ~ characters. 1389 for (const auto &LR : LineRanges) { 1390 if (LR.LineNo == LineNo) 1391 highlightRange(LR, sourceColMap, CaretLine); 1392 } 1393 1394 // Next, insert the caret itself. 1395 if (CaretLineNo == LineNo) { 1396 size_t Col = sourceColMap.byteToContainingColumn(CaretColNo - 1); 1397 CaretLine.resize(std::max(Col + 1, CaretLine.size()), ' '); 1398 CaretLine[Col] = '^'; 1399 } 1400 1401 std::string FixItInsertionLine = buildFixItInsertionLine( 1402 FID, LineNo, sourceColMap, Hints, SM, DiagOpts.get()); 1403 1404 // If the source line is too long for our terminal, select only the 1405 // "interesting" source region within that line. 1406 unsigned Columns = DiagOpts->MessageLength; 1407 if (Columns) 1408 selectInterestingSourceRegion(SourceLine, CaretLine, FixItInsertionLine, 1409 Columns, sourceColMap); 1410 1411 // If we are in -fdiagnostics-print-source-range-info mode, we are trying 1412 // to produce easily machine parsable output. Add a space before the 1413 // source line and the caret to make it trivial to tell the main diagnostic 1414 // line from what the user is intended to see. 1415 if (DiagOpts->ShowSourceRanges && !SourceLine.empty()) { 1416 SourceLine = ' ' + SourceLine; 1417 CaretLine = ' ' + CaretLine; 1418 } 1419 1420 // Emit what we have computed. 1421 emitSnippet(SourceLine, MaxLineNoDisplayWidth, LineNo, DisplayLineNo, 1422 SourceStyles[LineNo - Lines.first]); 1423 1424 if (!CaretLine.empty()) { 1425 indentForLineNumbers(); 1426 if (DiagOpts->ShowColors) 1427 OS.changeColor(caretColor, true); 1428 OS << CaretLine << '\n'; 1429 if (DiagOpts->ShowColors) 1430 OS.resetColor(); 1431 } 1432 1433 if (!FixItInsertionLine.empty()) { 1434 indentForLineNumbers(); 1435 if (DiagOpts->ShowColors) 1436 // Print fixit line in color 1437 OS.changeColor(fixitColor, false); 1438 if (DiagOpts->ShowSourceRanges) 1439 OS << ' '; 1440 OS << FixItInsertionLine << '\n'; 1441 if (DiagOpts->ShowColors) 1442 OS.resetColor(); 1443 } 1444 } 1445 1446 // Print out any parseable fixit information requested by the options. 1447 emitParseableFixits(Hints, SM); 1448 } 1449 1450 void TextDiagnostic::emitSnippet(StringRef SourceLine, 1451 unsigned MaxLineNoDisplayWidth, 1452 unsigned LineNo, unsigned DisplayLineNo, 1453 ArrayRef<StyleRange> Styles) { 1454 // Emit line number. 1455 if (MaxLineNoDisplayWidth > 0) { 1456 unsigned LineNoDisplayWidth = getNumDisplayWidth(DisplayLineNo); 1457 OS.indent(MaxLineNoDisplayWidth - LineNoDisplayWidth + 1) 1458 << DisplayLineNo << " | "; 1459 } 1460 1461 // Print the source line one character at a time. 1462 bool PrintReversed = false; 1463 std::optional<llvm::raw_ostream::Colors> CurrentColor; 1464 size_t I = 0; 1465 while (I < SourceLine.size()) { 1466 auto [Str, WasPrintable] = 1467 printableTextForNextCharacter(SourceLine, &I, DiagOpts->TabStop); 1468 1469 // Toggle inverted colors on or off for this character. 1470 if (DiagOpts->ShowColors) { 1471 if (WasPrintable == PrintReversed) { 1472 PrintReversed = !PrintReversed; 1473 if (PrintReversed) 1474 OS.reverseColor(); 1475 else { 1476 OS.resetColor(); 1477 CurrentColor = std::nullopt; 1478 } 1479 } 1480 1481 // Apply syntax highlighting information. 1482 const auto *CharStyle = llvm::find_if(Styles, [I](const StyleRange &R) { 1483 return (R.Start < I && R.End >= I); 1484 }); 1485 1486 if (CharStyle != Styles.end()) { 1487 if (!CurrentColor || 1488 (CurrentColor && *CurrentColor != CharStyle->Color)) { 1489 OS.changeColor(CharStyle->Color, false); 1490 CurrentColor = CharStyle->Color; 1491 } 1492 } else if (CurrentColor) { 1493 OS.resetColor(); 1494 CurrentColor = std::nullopt; 1495 } 1496 } 1497 1498 OS << Str; 1499 } 1500 1501 if (DiagOpts->ShowColors) 1502 OS.resetColor(); 1503 1504 OS << '\n'; 1505 } 1506 1507 void TextDiagnostic::emitParseableFixits(ArrayRef<FixItHint> Hints, 1508 const SourceManager &SM) { 1509 if (!DiagOpts->ShowParseableFixits) 1510 return; 1511 1512 // We follow FixItRewriter's example in not (yet) handling 1513 // fix-its in macros. 1514 for (const auto &H : Hints) { 1515 if (H.RemoveRange.isInvalid() || H.RemoveRange.getBegin().isMacroID() || 1516 H.RemoveRange.getEnd().isMacroID()) 1517 return; 1518 } 1519 1520 for (const auto &H : Hints) { 1521 SourceLocation BLoc = H.RemoveRange.getBegin(); 1522 SourceLocation ELoc = H.RemoveRange.getEnd(); 1523 1524 std::pair<FileID, unsigned> BInfo = SM.getDecomposedLoc(BLoc); 1525 std::pair<FileID, unsigned> EInfo = SM.getDecomposedLoc(ELoc); 1526 1527 // Adjust for token ranges. 1528 if (H.RemoveRange.isTokenRange()) 1529 EInfo.second += Lexer::MeasureTokenLength(ELoc, SM, LangOpts); 1530 1531 // We specifically do not do word-wrapping or tab-expansion here, 1532 // because this is supposed to be easy to parse. 1533 PresumedLoc PLoc = SM.getPresumedLoc(BLoc); 1534 if (PLoc.isInvalid()) 1535 break; 1536 1537 OS << "fix-it:\""; 1538 OS.write_escaped(PLoc.getFilename()); 1539 OS << "\":{" << SM.getLineNumber(BInfo.first, BInfo.second) 1540 << ':' << SM.getColumnNumber(BInfo.first, BInfo.second) 1541 << '-' << SM.getLineNumber(EInfo.first, EInfo.second) 1542 << ':' << SM.getColumnNumber(EInfo.first, EInfo.second) 1543 << "}:\""; 1544 OS.write_escaped(H.CodeToInsert); 1545 OS << "\"\n"; 1546 } 1547 } 1548