1 //===--- TextDiagnostic.cpp - Text Diagnostic Pretty-Printing -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Frontend/TextDiagnostic.h" 10 #include "clang/Basic/CharInfo.h" 11 #include "clang/Basic/DiagnosticOptions.h" 12 #include "clang/Basic/FileManager.h" 13 #include "clang/Basic/SourceManager.h" 14 #include "clang/Lex/Lexer.h" 15 #include "clang/Lex/Preprocessor.h" 16 #include "llvm/ADT/StringExtras.h" 17 #include "llvm/Support/ConvertUTF.h" 18 #include "llvm/Support/ErrorHandling.h" 19 #include "llvm/Support/Locale.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include <algorithm> 22 #include <optional> 23 24 using namespace clang; 25 26 static const enum raw_ostream::Colors noteColor = raw_ostream::CYAN; 27 static const enum raw_ostream::Colors remarkColor = 28 raw_ostream::BLUE; 29 static const enum raw_ostream::Colors fixitColor = 30 raw_ostream::GREEN; 31 static const enum raw_ostream::Colors caretColor = 32 raw_ostream::GREEN; 33 static const enum raw_ostream::Colors warningColor = 34 raw_ostream::MAGENTA; 35 static const enum raw_ostream::Colors templateColor = 36 raw_ostream::CYAN; 37 static const enum raw_ostream::Colors errorColor = raw_ostream::RED; 38 static const enum raw_ostream::Colors fatalColor = raw_ostream::RED; 39 // Used for changing only the bold attribute. 40 static const enum raw_ostream::Colors savedColor = 41 raw_ostream::SAVEDCOLOR; 42 43 // Magenta is taken for 'warning'. Red is already 'error' and 'cyan' 44 // is already taken for 'note'. Green is already used to underline 45 // source ranges. White and black are bad because of the usual 46 // terminal backgrounds. Which leaves us only with TWO options. 47 static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW; 48 static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN; 49 static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE; 50 51 /// Add highlights to differences in template strings. 52 static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str, 53 bool &Normal, bool Bold) { 54 while (true) { 55 size_t Pos = Str.find(ToggleHighlight); 56 OS << Str.slice(0, Pos); 57 if (Pos == StringRef::npos) 58 break; 59 60 Str = Str.substr(Pos + 1); 61 if (Normal) 62 OS.changeColor(templateColor, true); 63 else { 64 OS.resetColor(); 65 if (Bold) 66 OS.changeColor(savedColor, true); 67 } 68 Normal = !Normal; 69 } 70 } 71 72 /// Number of spaces to indent when word-wrapping. 73 const unsigned WordWrapIndentation = 6; 74 75 static int bytesSincePreviousTabOrLineBegin(StringRef SourceLine, size_t i) { 76 int bytes = 0; 77 while (0<i) { 78 if (SourceLine[--i]=='\t') 79 break; 80 ++bytes; 81 } 82 return bytes; 83 } 84 85 /// returns a printable representation of first item from input range 86 /// 87 /// This function returns a printable representation of the next item in a line 88 /// of source. If the next byte begins a valid and printable character, that 89 /// character is returned along with 'true'. 90 /// 91 /// Otherwise, if the next byte begins a valid, but unprintable character, a 92 /// printable, escaped representation of the character is returned, along with 93 /// 'false'. Otherwise a printable, escaped representation of the next byte 94 /// is returned along with 'false'. 95 /// 96 /// \note The index is updated to be used with a subsequent call to 97 /// printableTextForNextCharacter. 98 /// 99 /// \param SourceLine The line of source 100 /// \param I Pointer to byte index, 101 /// \param TabStop used to expand tabs 102 /// \return pair(printable text, 'true' iff original text was printable) 103 /// 104 static std::pair<SmallString<16>, bool> 105 printableTextForNextCharacter(StringRef SourceLine, size_t *I, 106 unsigned TabStop) { 107 assert(I && "I must not be null"); 108 assert(*I < SourceLine.size() && "must point to a valid index"); 109 110 if (SourceLine[*I] == '\t') { 111 assert(0 < TabStop && TabStop <= DiagnosticOptions::MaxTabStop && 112 "Invalid -ftabstop value"); 113 unsigned Col = bytesSincePreviousTabOrLineBegin(SourceLine, *I); 114 unsigned NumSpaces = TabStop - (Col % TabStop); 115 assert(0 < NumSpaces && NumSpaces <= TabStop 116 && "Invalid computation of space amt"); 117 ++(*I); 118 119 SmallString<16> ExpandedTab; 120 ExpandedTab.assign(NumSpaces, ' '); 121 return std::make_pair(ExpandedTab, true); 122 } 123 124 const unsigned char *Begin = SourceLine.bytes_begin() + *I; 125 126 // Fast path for the common ASCII case. 127 if (*Begin < 0x80 && llvm::sys::locale::isPrint(*Begin)) { 128 ++(*I); 129 return std::make_pair(SmallString<16>(Begin, Begin + 1), true); 130 } 131 unsigned CharSize = llvm::getNumBytesForUTF8(*Begin); 132 const unsigned char *End = Begin + CharSize; 133 134 // Convert it to UTF32 and check if it's printable. 135 if (End <= SourceLine.bytes_end() && llvm::isLegalUTF8Sequence(Begin, End)) { 136 llvm::UTF32 C; 137 llvm::UTF32 *CPtr = &C; 138 139 // Begin and end before conversion. 140 unsigned char const *OriginalBegin = Begin; 141 llvm::ConversionResult Res = llvm::ConvertUTF8toUTF32( 142 &Begin, End, &CPtr, CPtr + 1, llvm::strictConversion); 143 (void)Res; 144 assert(Res == llvm::conversionOK); 145 assert(OriginalBegin < Begin); 146 assert(unsigned(Begin - OriginalBegin) == CharSize); 147 148 (*I) += (Begin - OriginalBegin); 149 150 // Valid, multi-byte, printable UTF8 character. 151 if (llvm::sys::locale::isPrint(C)) 152 return std::make_pair(SmallString<16>(OriginalBegin, End), true); 153 154 // Valid but not printable. 155 SmallString<16> Str("<U+>"); 156 while (C) { 157 Str.insert(Str.begin() + 3, llvm::hexdigit(C % 16)); 158 C /= 16; 159 } 160 while (Str.size() < 8) 161 Str.insert(Str.begin() + 3, llvm::hexdigit(0)); 162 return std::make_pair(Str, false); 163 } 164 165 // Otherwise, not printable since it's not valid UTF8. 166 SmallString<16> ExpandedByte("<XX>"); 167 unsigned char Byte = SourceLine[*I]; 168 ExpandedByte[1] = llvm::hexdigit(Byte / 16); 169 ExpandedByte[2] = llvm::hexdigit(Byte % 16); 170 ++(*I); 171 return std::make_pair(ExpandedByte, false); 172 } 173 174 static void expandTabs(std::string &SourceLine, unsigned TabStop) { 175 size_t I = SourceLine.size(); 176 while (I > 0) { 177 I--; 178 if (SourceLine[I] != '\t') 179 continue; 180 size_t TmpI = I; 181 auto [Str, Printable] = 182 printableTextForNextCharacter(SourceLine, &TmpI, TabStop); 183 SourceLine.replace(I, 1, Str.c_str()); 184 } 185 } 186 187 /// \p BytesOut: 188 /// A mapping from columns to the byte of the source line that produced the 189 /// character displaying at that column. This is the inverse of \p ColumnsOut. 190 /// 191 /// The last element in the array is the number of bytes in the source string. 192 /// 193 /// example: (given a tabstop of 8) 194 /// 195 /// "a \t \u3042" -> {0,1,2,-1,-1,-1,-1,-1,3,4,-1,7} 196 /// 197 /// (\\u3042 is represented in UTF-8 by three bytes and takes two columns to 198 /// display) 199 /// 200 /// \p ColumnsOut: 201 /// A mapping from the bytes 202 /// of the printable representation of the line to the columns those printable 203 /// characters will appear at (numbering the first column as 0). 204 /// 205 /// If a byte 'i' corresponds to multiple columns (e.g. the byte contains a tab 206 /// character) then the array will map that byte to the first column the 207 /// tab appears at and the next value in the map will have been incremented 208 /// more than once. 209 /// 210 /// If a byte is the first in a sequence of bytes that together map to a single 211 /// entity in the output, then the array will map that byte to the appropriate 212 /// column while the subsequent bytes will be -1. 213 /// 214 /// The last element in the array does not correspond to any byte in the input 215 /// and instead is the number of columns needed to display the source 216 /// 217 /// example: (given a tabstop of 8) 218 /// 219 /// "a \t \u3042" -> {0,1,2,8,9,-1,-1,11} 220 /// 221 /// (\\u3042 is represented in UTF-8 by three bytes and takes two columns to 222 /// display) 223 static void genColumnByteMapping(StringRef SourceLine, unsigned TabStop, 224 SmallVectorImpl<int> &BytesOut, 225 SmallVectorImpl<int> &ColumnsOut) { 226 assert(BytesOut.empty()); 227 assert(ColumnsOut.empty()); 228 229 if (SourceLine.empty()) { 230 BytesOut.resize(1u, 0); 231 ColumnsOut.resize(1u, 0); 232 return; 233 } 234 235 ColumnsOut.resize(SourceLine.size() + 1, -1); 236 237 int Columns = 0; 238 size_t I = 0; 239 while (I < SourceLine.size()) { 240 ColumnsOut[I] = Columns; 241 BytesOut.resize(Columns + 1, -1); 242 BytesOut.back() = I; 243 auto [Str, Printable] = 244 printableTextForNextCharacter(SourceLine, &I, TabStop); 245 Columns += llvm::sys::locale::columnWidth(Str); 246 } 247 248 ColumnsOut.back() = Columns; 249 BytesOut.resize(Columns + 1, -1); 250 BytesOut.back() = I; 251 } 252 253 namespace { 254 struct SourceColumnMap { 255 SourceColumnMap(StringRef SourceLine, unsigned TabStop) 256 : m_SourceLine(SourceLine) { 257 258 genColumnByteMapping(SourceLine, TabStop, m_columnToByte, m_byteToColumn); 259 260 assert(m_byteToColumn.size()==SourceLine.size()+1); 261 assert(0 < m_byteToColumn.size() && 0 < m_columnToByte.size()); 262 assert(m_byteToColumn.size() 263 == static_cast<unsigned>(m_columnToByte.back()+1)); 264 assert(static_cast<unsigned>(m_byteToColumn.back()+1) 265 == m_columnToByte.size()); 266 } 267 int columns() const { return m_byteToColumn.back(); } 268 int bytes() const { return m_columnToByte.back(); } 269 270 /// Map a byte to the column which it is at the start of, or return -1 271 /// if it is not at the start of a column (for a UTF-8 trailing byte). 272 int byteToColumn(int n) const { 273 assert(0<=n && n<static_cast<int>(m_byteToColumn.size())); 274 return m_byteToColumn[n]; 275 } 276 277 /// Map a byte to the first column which contains it. 278 int byteToContainingColumn(int N) const { 279 assert(0 <= N && N < static_cast<int>(m_byteToColumn.size())); 280 while (m_byteToColumn[N] == -1) 281 --N; 282 return m_byteToColumn[N]; 283 } 284 285 /// Map a column to the byte which starts the column, or return -1 if 286 /// the column the second or subsequent column of an expanded tab or similar 287 /// multi-column entity. 288 int columnToByte(int n) const { 289 assert(0<=n && n<static_cast<int>(m_columnToByte.size())); 290 return m_columnToByte[n]; 291 } 292 293 /// Map from a byte index to the next byte which starts a column. 294 int startOfNextColumn(int N) const { 295 assert(0 <= N && N < static_cast<int>(m_byteToColumn.size() - 1)); 296 while (byteToColumn(++N) == -1) {} 297 return N; 298 } 299 300 /// Map from a byte index to the previous byte which starts a column. 301 int startOfPreviousColumn(int N) const { 302 assert(0 < N && N < static_cast<int>(m_byteToColumn.size())); 303 while (byteToColumn(--N) == -1) {} 304 return N; 305 } 306 307 StringRef getSourceLine() const { 308 return m_SourceLine; 309 } 310 311 private: 312 const std::string m_SourceLine; 313 SmallVector<int,200> m_byteToColumn; 314 SmallVector<int,200> m_columnToByte; 315 }; 316 } // end anonymous namespace 317 318 /// When the source code line we want to print is too long for 319 /// the terminal, select the "interesting" region. 320 static void selectInterestingSourceRegion(std::string &SourceLine, 321 std::string &CaretLine, 322 std::string &FixItInsertionLine, 323 unsigned Columns, 324 const SourceColumnMap &map) { 325 unsigned CaretColumns = CaretLine.size(); 326 unsigned FixItColumns = llvm::sys::locale::columnWidth(FixItInsertionLine); 327 unsigned MaxColumns = std::max(static_cast<unsigned>(map.columns()), 328 std::max(CaretColumns, FixItColumns)); 329 // if the number of columns is less than the desired number we're done 330 if (MaxColumns <= Columns) 331 return; 332 333 // No special characters are allowed in CaretLine. 334 assert(llvm::none_of(CaretLine, [](char c) { return c < ' ' || '~' < c; })); 335 336 // Find the slice that we need to display the full caret line 337 // correctly. 338 unsigned CaretStart = 0, CaretEnd = CaretLine.size(); 339 for (; CaretStart != CaretEnd; ++CaretStart) 340 if (!isWhitespace(CaretLine[CaretStart])) 341 break; 342 343 for (; CaretEnd != CaretStart; --CaretEnd) 344 if (!isWhitespace(CaretLine[CaretEnd - 1])) 345 break; 346 347 // caret has already been inserted into CaretLine so the above whitespace 348 // check is guaranteed to include the caret 349 350 // If we have a fix-it line, make sure the slice includes all of the 351 // fix-it information. 352 if (!FixItInsertionLine.empty()) { 353 unsigned FixItStart = 0, FixItEnd = FixItInsertionLine.size(); 354 for (; FixItStart != FixItEnd; ++FixItStart) 355 if (!isWhitespace(FixItInsertionLine[FixItStart])) 356 break; 357 358 for (; FixItEnd != FixItStart; --FixItEnd) 359 if (!isWhitespace(FixItInsertionLine[FixItEnd - 1])) 360 break; 361 362 // We can safely use the byte offset FixItStart as the column offset 363 // because the characters up until FixItStart are all ASCII whitespace 364 // characters. 365 unsigned FixItStartCol = FixItStart; 366 unsigned FixItEndCol 367 = llvm::sys::locale::columnWidth(FixItInsertionLine.substr(0, FixItEnd)); 368 369 CaretStart = std::min(FixItStartCol, CaretStart); 370 CaretEnd = std::max(FixItEndCol, CaretEnd); 371 } 372 373 // CaretEnd may have been set at the middle of a character 374 // If it's not at a character's first column then advance it past the current 375 // character. 376 while (static_cast<int>(CaretEnd) < map.columns() && 377 -1 == map.columnToByte(CaretEnd)) 378 ++CaretEnd; 379 380 assert((static_cast<int>(CaretStart) > map.columns() || 381 -1!=map.columnToByte(CaretStart)) && 382 "CaretStart must not point to a column in the middle of a source" 383 " line character"); 384 assert((static_cast<int>(CaretEnd) > map.columns() || 385 -1!=map.columnToByte(CaretEnd)) && 386 "CaretEnd must not point to a column in the middle of a source line" 387 " character"); 388 389 // CaretLine[CaretStart, CaretEnd) contains all of the interesting 390 // parts of the caret line. While this slice is smaller than the 391 // number of columns we have, try to grow the slice to encompass 392 // more context. 393 394 unsigned SourceStart = map.columnToByte(std::min<unsigned>(CaretStart, 395 map.columns())); 396 unsigned SourceEnd = map.columnToByte(std::min<unsigned>(CaretEnd, 397 map.columns())); 398 399 unsigned CaretColumnsOutsideSource = CaretEnd-CaretStart 400 - (map.byteToColumn(SourceEnd)-map.byteToColumn(SourceStart)); 401 402 char const *front_ellipse = " ..."; 403 char const *front_space = " "; 404 char const *back_ellipse = "..."; 405 unsigned ellipses_space = strlen(front_ellipse) + strlen(back_ellipse); 406 407 unsigned TargetColumns = Columns; 408 // Give us extra room for the ellipses 409 // and any of the caret line that extends past the source 410 if (TargetColumns > ellipses_space+CaretColumnsOutsideSource) 411 TargetColumns -= ellipses_space+CaretColumnsOutsideSource; 412 413 while (SourceStart>0 || SourceEnd<SourceLine.size()) { 414 bool ExpandedRegion = false; 415 416 if (SourceStart>0) { 417 unsigned NewStart = map.startOfPreviousColumn(SourceStart); 418 419 // Skip over any whitespace we see here; we're looking for 420 // another bit of interesting text. 421 // FIXME: Detect non-ASCII whitespace characters too. 422 while (NewStart && isWhitespace(SourceLine[NewStart])) 423 NewStart = map.startOfPreviousColumn(NewStart); 424 425 // Skip over this bit of "interesting" text. 426 while (NewStart) { 427 unsigned Prev = map.startOfPreviousColumn(NewStart); 428 if (isWhitespace(SourceLine[Prev])) 429 break; 430 NewStart = Prev; 431 } 432 433 assert(map.byteToColumn(NewStart) != -1); 434 unsigned NewColumns = map.byteToColumn(SourceEnd) - 435 map.byteToColumn(NewStart); 436 if (NewColumns <= TargetColumns) { 437 SourceStart = NewStart; 438 ExpandedRegion = true; 439 } 440 } 441 442 if (SourceEnd<SourceLine.size()) { 443 unsigned NewEnd = map.startOfNextColumn(SourceEnd); 444 445 // Skip over any whitespace we see here; we're looking for 446 // another bit of interesting text. 447 // FIXME: Detect non-ASCII whitespace characters too. 448 while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd])) 449 NewEnd = map.startOfNextColumn(NewEnd); 450 451 // Skip over this bit of "interesting" text. 452 while (NewEnd < SourceLine.size() && isWhitespace(SourceLine[NewEnd])) 453 NewEnd = map.startOfNextColumn(NewEnd); 454 455 assert(map.byteToColumn(NewEnd) != -1); 456 unsigned NewColumns = map.byteToColumn(NewEnd) - 457 map.byteToColumn(SourceStart); 458 if (NewColumns <= TargetColumns) { 459 SourceEnd = NewEnd; 460 ExpandedRegion = true; 461 } 462 } 463 464 if (!ExpandedRegion) 465 break; 466 } 467 468 CaretStart = map.byteToColumn(SourceStart); 469 CaretEnd = map.byteToColumn(SourceEnd) + CaretColumnsOutsideSource; 470 471 // [CaretStart, CaretEnd) is the slice we want. Update the various 472 // output lines to show only this slice. 473 assert(CaretStart!=(unsigned)-1 && CaretEnd!=(unsigned)-1 && 474 SourceStart!=(unsigned)-1 && SourceEnd!=(unsigned)-1); 475 assert(SourceStart <= SourceEnd); 476 assert(CaretStart <= CaretEnd); 477 478 unsigned BackColumnsRemoved 479 = map.byteToColumn(SourceLine.size())-map.byteToColumn(SourceEnd); 480 unsigned FrontColumnsRemoved = CaretStart; 481 unsigned ColumnsKept = CaretEnd-CaretStart; 482 483 // We checked up front that the line needed truncation 484 assert(FrontColumnsRemoved+ColumnsKept+BackColumnsRemoved > Columns); 485 486 // The line needs some truncation, and we'd prefer to keep the front 487 // if possible, so remove the back 488 if (BackColumnsRemoved > strlen(back_ellipse)) 489 SourceLine.replace(SourceEnd, std::string::npos, back_ellipse); 490 491 // If that's enough then we're done 492 if (FrontColumnsRemoved+ColumnsKept <= Columns) 493 return; 494 495 // Otherwise remove the front as well 496 if (FrontColumnsRemoved > strlen(front_ellipse)) { 497 SourceLine.replace(0, SourceStart, front_ellipse); 498 CaretLine.replace(0, CaretStart, front_space); 499 if (!FixItInsertionLine.empty()) 500 FixItInsertionLine.replace(0, CaretStart, front_space); 501 } 502 } 503 504 /// Skip over whitespace in the string, starting at the given 505 /// index. 506 /// 507 /// \returns The index of the first non-whitespace character that is 508 /// greater than or equal to Idx or, if no such character exists, 509 /// returns the end of the string. 510 static unsigned skipWhitespace(unsigned Idx, StringRef Str, unsigned Length) { 511 while (Idx < Length && isWhitespace(Str[Idx])) 512 ++Idx; 513 return Idx; 514 } 515 516 /// If the given character is the start of some kind of 517 /// balanced punctuation (e.g., quotes or parentheses), return the 518 /// character that will terminate the punctuation. 519 /// 520 /// \returns The ending punctuation character, if any, or the NULL 521 /// character if the input character does not start any punctuation. 522 static inline char findMatchingPunctuation(char c) { 523 switch (c) { 524 case '\'': return '\''; 525 case '`': return '\''; 526 case '"': return '"'; 527 case '(': return ')'; 528 case '[': return ']'; 529 case '{': return '}'; 530 default: break; 531 } 532 533 return 0; 534 } 535 536 /// Find the end of the word starting at the given offset 537 /// within a string. 538 /// 539 /// \returns the index pointing one character past the end of the 540 /// word. 541 static unsigned findEndOfWord(unsigned Start, StringRef Str, 542 unsigned Length, unsigned Column, 543 unsigned Columns) { 544 assert(Start < Str.size() && "Invalid start position!"); 545 unsigned End = Start + 1; 546 547 // If we are already at the end of the string, take that as the word. 548 if (End == Str.size()) 549 return End; 550 551 // Determine if the start of the string is actually opening 552 // punctuation, e.g., a quote or parentheses. 553 char EndPunct = findMatchingPunctuation(Str[Start]); 554 if (!EndPunct) { 555 // This is a normal word. Just find the first space character. 556 while (End < Length && !isWhitespace(Str[End])) 557 ++End; 558 return End; 559 } 560 561 // We have the start of a balanced punctuation sequence (quotes, 562 // parentheses, etc.). Determine the full sequence is. 563 SmallString<16> PunctuationEndStack; 564 PunctuationEndStack.push_back(EndPunct); 565 while (End < Length && !PunctuationEndStack.empty()) { 566 if (Str[End] == PunctuationEndStack.back()) 567 PunctuationEndStack.pop_back(); 568 else if (char SubEndPunct = findMatchingPunctuation(Str[End])) 569 PunctuationEndStack.push_back(SubEndPunct); 570 571 ++End; 572 } 573 574 // Find the first space character after the punctuation ended. 575 while (End < Length && !isWhitespace(Str[End])) 576 ++End; 577 578 unsigned PunctWordLength = End - Start; 579 if (// If the word fits on this line 580 Column + PunctWordLength <= Columns || 581 // ... or the word is "short enough" to take up the next line 582 // without too much ugly white space 583 PunctWordLength < Columns/3) 584 return End; // Take the whole thing as a single "word". 585 586 // The whole quoted/parenthesized string is too long to print as a 587 // single "word". Instead, find the "word" that starts just after 588 // the punctuation and use that end-point instead. This will recurse 589 // until it finds something small enough to consider a word. 590 return findEndOfWord(Start + 1, Str, Length, Column + 1, Columns); 591 } 592 593 /// Print the given string to a stream, word-wrapping it to 594 /// some number of columns in the process. 595 /// 596 /// \param OS the stream to which the word-wrapping string will be 597 /// emitted. 598 /// \param Str the string to word-wrap and output. 599 /// \param Columns the number of columns to word-wrap to. 600 /// \param Column the column number at which the first character of \p 601 /// Str will be printed. This will be non-zero when part of the first 602 /// line has already been printed. 603 /// \param Bold if the current text should be bold 604 /// \returns true if word-wrapping was required, or false if the 605 /// string fit on the first line. 606 static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns, 607 unsigned Column, bool Bold) { 608 const unsigned Length = std::min(Str.find('\n'), Str.size()); 609 bool TextNormal = true; 610 611 bool Wrapped = false; 612 for (unsigned WordStart = 0, WordEnd; WordStart < Length; 613 WordStart = WordEnd) { 614 // Find the beginning of the next word. 615 WordStart = skipWhitespace(WordStart, Str, Length); 616 if (WordStart == Length) 617 break; 618 619 // Find the end of this word. 620 WordEnd = findEndOfWord(WordStart, Str, Length, Column, Columns); 621 622 // Does this word fit on the current line? 623 unsigned WordLength = WordEnd - WordStart; 624 if (Column + WordLength < Columns) { 625 // This word fits on the current line; print it there. 626 if (WordStart) { 627 OS << ' '; 628 Column += 1; 629 } 630 applyTemplateHighlighting(OS, Str.substr(WordStart, WordLength), 631 TextNormal, Bold); 632 Column += WordLength; 633 continue; 634 } 635 636 // This word does not fit on the current line, so wrap to the next 637 // line. 638 OS << '\n'; 639 OS.indent(WordWrapIndentation); 640 applyTemplateHighlighting(OS, Str.substr(WordStart, WordLength), 641 TextNormal, Bold); 642 Column = WordWrapIndentation + WordLength; 643 Wrapped = true; 644 } 645 646 // Append any remaning text from the message with its existing formatting. 647 applyTemplateHighlighting(OS, Str.substr(Length), TextNormal, Bold); 648 649 assert(TextNormal && "Text highlighted at end of diagnostic message."); 650 651 return Wrapped; 652 } 653 654 TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts, 655 DiagnosticOptions &DiagOpts, 656 const Preprocessor *PP) 657 : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {} 658 659 TextDiagnostic::~TextDiagnostic() {} 660 661 void TextDiagnostic::emitDiagnosticMessage( 662 FullSourceLoc Loc, PresumedLoc PLoc, DiagnosticsEngine::Level Level, 663 StringRef Message, ArrayRef<clang::CharSourceRange> Ranges, 664 DiagOrStoredDiag D) { 665 uint64_t StartOfLocationInfo = OS.tell(); 666 667 // Emit the location of this particular diagnostic. 668 if (Loc.isValid()) 669 emitDiagnosticLoc(Loc, PLoc, Level, Ranges); 670 671 if (DiagOpts.ShowColors) 672 OS.resetColor(); 673 674 if (DiagOpts.ShowLevel) 675 printDiagnosticLevel(OS, Level, DiagOpts.ShowColors); 676 printDiagnosticMessage(OS, 677 /*IsSupplemental*/ Level == DiagnosticsEngine::Note, 678 Message, OS.tell() - StartOfLocationInfo, 679 DiagOpts.MessageLength, DiagOpts.ShowColors); 680 } 681 682 /*static*/ void 683 TextDiagnostic::printDiagnosticLevel(raw_ostream &OS, 684 DiagnosticsEngine::Level Level, 685 bool ShowColors) { 686 if (ShowColors) { 687 // Print diagnostic category in bold and color 688 switch (Level) { 689 case DiagnosticsEngine::Ignored: 690 llvm_unreachable("Invalid diagnostic type"); 691 case DiagnosticsEngine::Note: OS.changeColor(noteColor, true); break; 692 case DiagnosticsEngine::Remark: OS.changeColor(remarkColor, true); break; 693 case DiagnosticsEngine::Warning: OS.changeColor(warningColor, true); break; 694 case DiagnosticsEngine::Error: OS.changeColor(errorColor, true); break; 695 case DiagnosticsEngine::Fatal: OS.changeColor(fatalColor, true); break; 696 } 697 } 698 699 switch (Level) { 700 case DiagnosticsEngine::Ignored: 701 llvm_unreachable("Invalid diagnostic type"); 702 case DiagnosticsEngine::Note: OS << "note: "; break; 703 case DiagnosticsEngine::Remark: OS << "remark: "; break; 704 case DiagnosticsEngine::Warning: OS << "warning: "; break; 705 case DiagnosticsEngine::Error: OS << "error: "; break; 706 case DiagnosticsEngine::Fatal: OS << "fatal error: "; break; 707 } 708 709 if (ShowColors) 710 OS.resetColor(); 711 } 712 713 /*static*/ 714 void TextDiagnostic::printDiagnosticMessage(raw_ostream &OS, 715 bool IsSupplemental, 716 StringRef Message, 717 unsigned CurrentColumn, 718 unsigned Columns, bool ShowColors) { 719 bool Bold = false; 720 if (ShowColors && !IsSupplemental) { 721 // Print primary diagnostic messages in bold and without color, to visually 722 // indicate the transition from continuation notes and other output. 723 OS.changeColor(savedColor, true); 724 Bold = true; 725 } 726 727 if (Columns) 728 printWordWrapped(OS, Message, Columns, CurrentColumn, Bold); 729 else { 730 bool Normal = true; 731 applyTemplateHighlighting(OS, Message, Normal, Bold); 732 assert(Normal && "Formatting should have returned to normal"); 733 } 734 735 if (ShowColors) 736 OS.resetColor(); 737 OS << '\n'; 738 } 739 740 void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) { 741 #ifdef _WIN32 742 SmallString<4096> TmpFilename; 743 #endif 744 if (DiagOpts.AbsolutePath) { 745 auto File = SM.getFileManager().getOptionalFileRef(Filename); 746 if (File) { 747 // We want to print a simplified absolute path, i. e. without "dots". 748 // 749 // The hardest part here are the paths like "<part1>/<link>/../<part2>". 750 // On Unix-like systems, we cannot just collapse "<link>/..", because 751 // paths are resolved sequentially, and, thereby, the path 752 // "<part1>/<part2>" may point to a different location. That is why 753 // we use FileManager::getCanonicalName(), which expands all indirections 754 // with llvm::sys::fs::real_path() and caches the result. 755 // 756 // On the other hand, it would be better to preserve as much of the 757 // original path as possible, because that helps a user to recognize it. 758 // real_path() expands all links, which sometimes too much. Luckily, 759 // on Windows we can just use llvm::sys::path::remove_dots(), because, 760 // on that system, both aforementioned paths point to the same place. 761 #ifdef _WIN32 762 TmpFilename = File->getName(); 763 llvm::sys::fs::make_absolute(TmpFilename); 764 llvm::sys::path::native(TmpFilename); 765 llvm::sys::path::remove_dots(TmpFilename, /* remove_dot_dot */ true); 766 Filename = StringRef(TmpFilename.data(), TmpFilename.size()); 767 #else 768 Filename = SM.getFileManager().getCanonicalName(*File); 769 #endif 770 } 771 } 772 773 OS << Filename; 774 } 775 776 /// Print out the file/line/column information and include trace. 777 /// 778 /// This method handles the emission of the diagnostic location information. 779 /// This includes extracting as much location information as is present for 780 /// the diagnostic and printing it, as well as any include stack or source 781 /// ranges necessary. 782 void TextDiagnostic::emitDiagnosticLoc(FullSourceLoc Loc, PresumedLoc PLoc, 783 DiagnosticsEngine::Level Level, 784 ArrayRef<CharSourceRange> Ranges) { 785 if (PLoc.isInvalid()) { 786 // At least print the file name if available: 787 if (FileID FID = Loc.getFileID(); FID.isValid()) { 788 if (OptionalFileEntryRef FE = Loc.getFileEntryRef()) { 789 emitFilename(FE->getName(), Loc.getManager()); 790 OS << ": "; 791 } 792 } 793 return; 794 } 795 unsigned LineNo = PLoc.getLine(); 796 797 if (!DiagOpts.ShowLocation) 798 return; 799 800 if (DiagOpts.ShowColors) 801 OS.changeColor(savedColor, true); 802 803 emitFilename(PLoc.getFilename(), Loc.getManager()); 804 switch (DiagOpts.getFormat()) { 805 case DiagnosticOptions::SARIF: 806 case DiagnosticOptions::Clang: 807 if (DiagOpts.ShowLine) 808 OS << ':' << LineNo; 809 break; 810 case DiagnosticOptions::MSVC: OS << '(' << LineNo; break; 811 case DiagnosticOptions::Vi: OS << " +" << LineNo; break; 812 } 813 814 if (DiagOpts.ShowColumn) 815 // Compute the column number. 816 if (unsigned ColNo = PLoc.getColumn()) { 817 if (DiagOpts.getFormat() == DiagnosticOptions::MSVC) { 818 OS << ','; 819 // Visual Studio 2010 or earlier expects column number to be off by one 820 if (LangOpts.MSCompatibilityVersion && 821 !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2012)) 822 ColNo--; 823 } else 824 OS << ':'; 825 OS << ColNo; 826 } 827 switch (DiagOpts.getFormat()) { 828 case DiagnosticOptions::SARIF: 829 case DiagnosticOptions::Clang: 830 case DiagnosticOptions::Vi: OS << ':'; break; 831 case DiagnosticOptions::MSVC: 832 // MSVC2013 and before print 'file(4) : error'. MSVC2015 gets rid of the 833 // space and prints 'file(4): error'. 834 OS << ')'; 835 if (LangOpts.MSCompatibilityVersion && 836 !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015)) 837 OS << ' '; 838 OS << ':'; 839 break; 840 } 841 842 if (DiagOpts.ShowSourceRanges && !Ranges.empty()) { 843 FileID CaretFileID = Loc.getExpansionLoc().getFileID(); 844 bool PrintedRange = false; 845 const SourceManager &SM = Loc.getManager(); 846 847 for (const auto &R : Ranges) { 848 // Ignore invalid ranges. 849 if (!R.isValid()) 850 continue; 851 852 SourceLocation B = SM.getExpansionLoc(R.getBegin()); 853 CharSourceRange ERange = SM.getExpansionRange(R.getEnd()); 854 SourceLocation E = ERange.getEnd(); 855 856 // If the start or end of the range is in another file, just 857 // discard it. 858 if (SM.getFileID(B) != CaretFileID || SM.getFileID(E) != CaretFileID) 859 continue; 860 861 // Add in the length of the token, so that we cover multi-char 862 // tokens. 863 unsigned TokSize = 0; 864 if (ERange.isTokenRange()) 865 TokSize = Lexer::MeasureTokenLength(E, SM, LangOpts); 866 867 FullSourceLoc BF(B, SM), EF(E, SM); 868 OS << '{' 869 << BF.getLineNumber() << ':' << BF.getColumnNumber() << '-' 870 << EF.getLineNumber() << ':' << (EF.getColumnNumber() + TokSize) 871 << '}'; 872 PrintedRange = true; 873 } 874 875 if (PrintedRange) 876 OS << ':'; 877 } 878 OS << ' '; 879 } 880 881 void TextDiagnostic::emitIncludeLocation(FullSourceLoc Loc, PresumedLoc PLoc) { 882 if (DiagOpts.ShowLocation && PLoc.isValid()) { 883 OS << "In file included from "; 884 emitFilename(PLoc.getFilename(), Loc.getManager()); 885 OS << ':' << PLoc.getLine() << ":\n"; 886 } else 887 OS << "In included file:\n"; 888 } 889 890 void TextDiagnostic::emitImportLocation(FullSourceLoc Loc, PresumedLoc PLoc, 891 StringRef ModuleName) { 892 if (DiagOpts.ShowLocation && PLoc.isValid()) 893 OS << "In module '" << ModuleName << "' imported from " 894 << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n"; 895 else 896 OS << "In module '" << ModuleName << "':\n"; 897 } 898 899 void TextDiagnostic::emitBuildingModuleLocation(FullSourceLoc Loc, 900 PresumedLoc PLoc, 901 StringRef ModuleName) { 902 if (DiagOpts.ShowLocation && PLoc.isValid()) 903 OS << "While building module '" << ModuleName << "' imported from " 904 << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n"; 905 else 906 OS << "While building module '" << ModuleName << "':\n"; 907 } 908 909 /// Find the suitable set of lines to show to include a set of ranges. 910 static std::optional<std::pair<unsigned, unsigned>> 911 findLinesForRange(const CharSourceRange &R, FileID FID, 912 const SourceManager &SM) { 913 if (!R.isValid()) 914 return std::nullopt; 915 916 SourceLocation Begin = R.getBegin(); 917 SourceLocation End = R.getEnd(); 918 if (SM.getFileID(Begin) != FID || SM.getFileID(End) != FID) 919 return std::nullopt; 920 921 return std::make_pair(SM.getExpansionLineNumber(Begin), 922 SM.getExpansionLineNumber(End)); 923 } 924 925 /// Add as much of range B into range A as possible without exceeding a maximum 926 /// size of MaxRange. Ranges are inclusive. 927 static std::pair<unsigned, unsigned> 928 maybeAddRange(std::pair<unsigned, unsigned> A, std::pair<unsigned, unsigned> B, 929 unsigned MaxRange) { 930 // If A is already the maximum size, we're done. 931 unsigned Slack = MaxRange - (A.second - A.first + 1); 932 if (Slack == 0) 933 return A; 934 935 // Easy case: merge succeeds within MaxRange. 936 unsigned Min = std::min(A.first, B.first); 937 unsigned Max = std::max(A.second, B.second); 938 if (Max - Min + 1 <= MaxRange) 939 return {Min, Max}; 940 941 // If we can't reach B from A within MaxRange, there's nothing to do. 942 // Don't add lines to the range that contain nothing interesting. 943 if ((B.first > A.first && B.first - A.first + 1 > MaxRange) || 944 (B.second < A.second && A.second - B.second + 1 > MaxRange)) 945 return A; 946 947 // Otherwise, expand A towards B to produce a range of size MaxRange. We 948 // attempt to expand by the same amount in both directions if B strictly 949 // contains A. 950 951 // Expand downwards by up to half the available amount, then upwards as 952 // much as possible, then downwards as much as possible. 953 A.second = std::min(A.second + (Slack + 1) / 2, Max); 954 Slack = MaxRange - (A.second - A.first + 1); 955 A.first = std::max(Min + Slack, A.first) - Slack; 956 A.second = std::min(A.first + MaxRange - 1, Max); 957 return A; 958 } 959 960 struct LineRange { 961 unsigned LineNo; 962 unsigned StartCol; 963 unsigned EndCol; 964 }; 965 966 /// Highlight \p R (with ~'s) on the current source line. 967 static void highlightRange(const LineRange &R, const SourceColumnMap &Map, 968 std::string &CaretLine) { 969 // Pick the first non-whitespace column. 970 unsigned StartColNo = R.StartCol; 971 while (StartColNo < Map.getSourceLine().size() && 972 (Map.getSourceLine()[StartColNo] == ' ' || 973 Map.getSourceLine()[StartColNo] == '\t')) 974 StartColNo = Map.startOfNextColumn(StartColNo); 975 976 // Pick the last non-whitespace column. 977 unsigned EndColNo = 978 std::min(static_cast<size_t>(R.EndCol), Map.getSourceLine().size()); 979 while (EndColNo && (Map.getSourceLine()[EndColNo - 1] == ' ' || 980 Map.getSourceLine()[EndColNo - 1] == '\t')) 981 EndColNo = Map.startOfPreviousColumn(EndColNo); 982 983 // If the start/end passed each other, then we are trying to highlight a 984 // range that just exists in whitespace. That most likely means we have 985 // a multi-line highlighting range that covers a blank line. 986 if (StartColNo > EndColNo) 987 return; 988 989 // Fill the range with ~'s. 990 StartColNo = Map.byteToContainingColumn(StartColNo); 991 EndColNo = Map.byteToContainingColumn(EndColNo); 992 993 assert(StartColNo <= EndColNo && "Invalid range!"); 994 if (CaretLine.size() < EndColNo) 995 CaretLine.resize(EndColNo, ' '); 996 std::fill(CaretLine.begin() + StartColNo, CaretLine.begin() + EndColNo, '~'); 997 } 998 999 static std::string buildFixItInsertionLine(FileID FID, unsigned LineNo, 1000 const SourceColumnMap &map, 1001 ArrayRef<FixItHint> Hints, 1002 const SourceManager &SM, 1003 const DiagnosticOptions &DiagOpts) { 1004 std::string FixItInsertionLine; 1005 if (Hints.empty() || !DiagOpts.ShowFixits) 1006 return FixItInsertionLine; 1007 unsigned PrevHintEndCol = 0; 1008 1009 for (const auto &H : Hints) { 1010 if (H.CodeToInsert.empty()) 1011 continue; 1012 1013 // We have an insertion hint. Determine whether the inserted 1014 // code contains no newlines and is on the same line as the caret. 1015 FileIDAndOffset HintLocInfo = 1016 SM.getDecomposedExpansionLoc(H.RemoveRange.getBegin()); 1017 if (FID == HintLocInfo.first && 1018 LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second) && 1019 StringRef(H.CodeToInsert).find_first_of("\n\r") == StringRef::npos) { 1020 // Insert the new code into the line just below the code 1021 // that the user wrote. 1022 // Note: When modifying this function, be very careful about what is a 1023 // "column" (printed width, platform-dependent) and what is a 1024 // "byte offset" (SourceManager "column"). 1025 unsigned HintByteOffset = 1026 SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second) - 1; 1027 1028 // The hint must start inside the source or right at the end 1029 assert(HintByteOffset < static_cast<unsigned>(map.bytes()) + 1); 1030 unsigned HintCol = map.byteToContainingColumn(HintByteOffset); 1031 1032 // If we inserted a long previous hint, push this one forwards, and add 1033 // an extra space to show that this is not part of the previous 1034 // completion. This is sort of the best we can do when two hints appear 1035 // to overlap. 1036 // 1037 // Note that if this hint is located immediately after the previous 1038 // hint, no space will be added, since the location is more important. 1039 if (HintCol < PrevHintEndCol) 1040 HintCol = PrevHintEndCol + 1; 1041 1042 // This should NOT use HintByteOffset, because the source might have 1043 // Unicode characters in earlier columns. 1044 unsigned NewFixItLineSize = FixItInsertionLine.size() + 1045 (HintCol - PrevHintEndCol) + 1046 H.CodeToInsert.size(); 1047 if (NewFixItLineSize > FixItInsertionLine.size()) 1048 FixItInsertionLine.resize(NewFixItLineSize, ' '); 1049 1050 std::copy(H.CodeToInsert.begin(), H.CodeToInsert.end(), 1051 FixItInsertionLine.end() - H.CodeToInsert.size()); 1052 1053 PrevHintEndCol = HintCol + llvm::sys::locale::columnWidth(H.CodeToInsert); 1054 } 1055 } 1056 1057 expandTabs(FixItInsertionLine, DiagOpts.TabStop); 1058 1059 return FixItInsertionLine; 1060 } 1061 1062 static unsigned getNumDisplayWidth(unsigned N) { 1063 unsigned L = 1u, M = 10u; 1064 while (M <= N && ++L != std::numeric_limits<unsigned>::digits10 + 1) 1065 M *= 10u; 1066 1067 return L; 1068 } 1069 1070 /// Filter out invalid ranges, ranges that don't fit into the window of 1071 /// source lines we will print, and ranges from other files. 1072 /// 1073 /// For the remaining ranges, convert them to simple LineRange structs, 1074 /// which only cover one line at a time. 1075 static SmallVector<LineRange> 1076 prepareAndFilterRanges(const SmallVectorImpl<CharSourceRange> &Ranges, 1077 const SourceManager &SM, 1078 const std::pair<unsigned, unsigned> &Lines, FileID FID, 1079 const LangOptions &LangOpts) { 1080 SmallVector<LineRange> LineRanges; 1081 1082 for (const CharSourceRange &R : Ranges) { 1083 if (R.isInvalid()) 1084 continue; 1085 SourceLocation Begin = R.getBegin(); 1086 SourceLocation End = R.getEnd(); 1087 1088 unsigned StartLineNo = SM.getExpansionLineNumber(Begin); 1089 if (StartLineNo > Lines.second || SM.getFileID(Begin) != FID) 1090 continue; 1091 1092 unsigned EndLineNo = SM.getExpansionLineNumber(End); 1093 if (EndLineNo < Lines.first || SM.getFileID(End) != FID) 1094 continue; 1095 1096 unsigned StartColumn = SM.getExpansionColumnNumber(Begin); 1097 unsigned EndColumn = SM.getExpansionColumnNumber(End); 1098 if (R.isTokenRange()) 1099 EndColumn += Lexer::MeasureTokenLength(End, SM, LangOpts); 1100 1101 // Only a single line. 1102 if (StartLineNo == EndLineNo) { 1103 LineRanges.push_back({StartLineNo, StartColumn - 1, EndColumn - 1}); 1104 continue; 1105 } 1106 1107 // Start line. 1108 LineRanges.push_back({StartLineNo, StartColumn - 1, ~0u}); 1109 1110 // Middle lines. 1111 for (unsigned S = StartLineNo + 1; S != EndLineNo; ++S) 1112 LineRanges.push_back({S, 0, ~0u}); 1113 1114 // End line. 1115 LineRanges.push_back({EndLineNo, 0, EndColumn - 1}); 1116 } 1117 1118 return LineRanges; 1119 } 1120 1121 /// Creates syntax highlighting information in form of StyleRanges. 1122 /// 1123 /// The returned unique ptr has always exactly size 1124 /// (\p EndLineNumber - \p StartLineNumber + 1). Each SmallVector in there 1125 /// corresponds to syntax highlighting information in one line. In each line, 1126 /// the StyleRanges are non-overlapping and sorted from start to end of the 1127 /// line. 1128 static std::unique_ptr<llvm::SmallVector<TextDiagnostic::StyleRange>[]> 1129 highlightLines(StringRef FileData, unsigned StartLineNumber, 1130 unsigned EndLineNumber, const Preprocessor *PP, 1131 const LangOptions &LangOpts, bool ShowColors, FileID FID, 1132 const SourceManager &SM) { 1133 assert(StartLineNumber <= EndLineNumber); 1134 auto SnippetRanges = 1135 std::make_unique<SmallVector<TextDiagnostic::StyleRange>[]>( 1136 EndLineNumber - StartLineNumber + 1); 1137 1138 if (!PP || !ShowColors) 1139 return SnippetRanges; 1140 1141 // Might cause emission of another diagnostic. 1142 if (PP->getIdentifierTable().getExternalIdentifierLookup()) 1143 return SnippetRanges; 1144 1145 auto Buff = llvm::MemoryBuffer::getMemBuffer(FileData); 1146 Lexer L{FID, *Buff, SM, LangOpts}; 1147 L.SetKeepWhitespaceMode(true); 1148 1149 const char *FirstLineStart = 1150 FileData.data() + 1151 SM.getDecomposedLoc(SM.translateLineCol(FID, StartLineNumber, 1)).second; 1152 if (const char *CheckPoint = PP->getCheckPoint(FID, FirstLineStart)) { 1153 assert(CheckPoint >= Buff->getBufferStart() && 1154 CheckPoint <= Buff->getBufferEnd()); 1155 assert(CheckPoint <= FirstLineStart); 1156 size_t Offset = CheckPoint - Buff->getBufferStart(); 1157 L.seek(Offset, /*IsAtStartOfLine=*/false); 1158 } 1159 1160 // Classify the given token and append it to the given vector. 1161 auto appendStyle = 1162 [PP, &LangOpts](SmallVector<TextDiagnostic::StyleRange> &Vec, 1163 const Token &T, unsigned Start, unsigned Length) -> void { 1164 if (T.is(tok::raw_identifier)) { 1165 StringRef RawIdent = T.getRawIdentifier(); 1166 // Special case true/false/nullptr/... literals, since they will otherwise 1167 // be treated as keywords. 1168 // FIXME: It would be good to have a programmatic way of getting this 1169 // list. 1170 if (llvm::StringSwitch<bool>(RawIdent) 1171 .Case("true", true) 1172 .Case("false", true) 1173 .Case("nullptr", true) 1174 .Case("__func__", true) 1175 .Case("__objc_yes__", true) 1176 .Case("__objc_no__", true) 1177 .Case("__null", true) 1178 .Case("__FUNCDNAME__", true) 1179 .Case("__FUNCSIG__", true) 1180 .Case("__FUNCTION__", true) 1181 .Case("__FUNCSIG__", true) 1182 .Default(false)) { 1183 Vec.emplace_back(Start, Start + Length, LiteralColor); 1184 } else { 1185 const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent); 1186 assert(II); 1187 if (II->isKeyword(LangOpts)) 1188 Vec.emplace_back(Start, Start + Length, KeywordColor); 1189 } 1190 } else if (tok::isLiteral(T.getKind())) { 1191 Vec.emplace_back(Start, Start + Length, LiteralColor); 1192 } else { 1193 assert(T.is(tok::comment)); 1194 Vec.emplace_back(Start, Start + Length, CommentColor); 1195 } 1196 }; 1197 1198 bool Stop = false; 1199 while (!Stop) { 1200 Token T; 1201 Stop = L.LexFromRawLexer(T); 1202 if (T.is(tok::unknown)) 1203 continue; 1204 1205 // We are only interested in identifiers, literals and comments. 1206 if (!T.is(tok::raw_identifier) && !T.is(tok::comment) && 1207 !tok::isLiteral(T.getKind())) 1208 continue; 1209 1210 bool Invalid = false; 1211 unsigned TokenEndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid); 1212 if (Invalid || TokenEndLine < StartLineNumber) 1213 continue; 1214 1215 assert(TokenEndLine >= StartLineNumber); 1216 1217 unsigned TokenStartLine = 1218 SM.getSpellingLineNumber(T.getLocation(), &Invalid); 1219 if (Invalid) 1220 continue; 1221 // If this happens, we're done. 1222 if (TokenStartLine > EndLineNumber) 1223 break; 1224 1225 unsigned StartCol = 1226 SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1; 1227 if (Invalid) 1228 continue; 1229 1230 // Simple tokens. 1231 if (TokenStartLine == TokenEndLine) { 1232 SmallVector<TextDiagnostic::StyleRange> &LineRanges = 1233 SnippetRanges[TokenStartLine - StartLineNumber]; 1234 appendStyle(LineRanges, T, StartCol, T.getLength()); 1235 continue; 1236 } 1237 assert((TokenEndLine - TokenStartLine) >= 1); 1238 1239 // For tokens that span multiple lines (think multiline comments), we 1240 // divide them into multiple StyleRanges. 1241 unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1; 1242 if (Invalid) 1243 continue; 1244 1245 std::string Spelling = Lexer::getSpelling(T, SM, LangOpts); 1246 1247 unsigned L = TokenStartLine; 1248 unsigned LineLength = 0; 1249 for (unsigned I = 0; I <= Spelling.size(); ++I) { 1250 // This line is done. 1251 if (I == Spelling.size() || isVerticalWhitespace(Spelling[I])) { 1252 if (L >= StartLineNumber) { 1253 SmallVector<TextDiagnostic::StyleRange> &LineRanges = 1254 SnippetRanges[L - StartLineNumber]; 1255 1256 if (L == TokenStartLine) // First line 1257 appendStyle(LineRanges, T, StartCol, LineLength); 1258 else if (L == TokenEndLine) // Last line 1259 appendStyle(LineRanges, T, 0, EndCol); 1260 else 1261 appendStyle(LineRanges, T, 0, LineLength); 1262 } 1263 1264 ++L; 1265 if (L > EndLineNumber) 1266 break; 1267 LineLength = 0; 1268 continue; 1269 } 1270 ++LineLength; 1271 } 1272 } 1273 1274 return SnippetRanges; 1275 } 1276 1277 /// Emit a code snippet and caret line. 1278 /// 1279 /// This routine emits a single line's code snippet and caret line.. 1280 /// 1281 /// \param Loc The location for the caret. 1282 /// \param Ranges The underlined ranges for this code snippet. 1283 /// \param Hints The FixIt hints active for this diagnostic. 1284 void TextDiagnostic::emitSnippetAndCaret( 1285 FullSourceLoc Loc, DiagnosticsEngine::Level Level, 1286 SmallVectorImpl<CharSourceRange> &Ranges, ArrayRef<FixItHint> Hints) { 1287 assert(Loc.isValid() && "must have a valid source location here"); 1288 assert(Loc.isFileID() && "must have a file location here"); 1289 1290 // If caret diagnostics are enabled and we have location, we want to 1291 // emit the caret. However, we only do this if the location moved 1292 // from the last diagnostic, if the last diagnostic was a note that 1293 // was part of a different warning or error diagnostic, or if the 1294 // diagnostic has ranges. We don't want to emit the same caret 1295 // multiple times if one loc has multiple diagnostics. 1296 if (!DiagOpts.ShowCarets) 1297 return; 1298 if (Loc == LastLoc && Ranges.empty() && Hints.empty() && 1299 (LastLevel != DiagnosticsEngine::Note || Level == LastLevel)) 1300 return; 1301 1302 FileID FID = Loc.getFileID(); 1303 const SourceManager &SM = Loc.getManager(); 1304 1305 // Get information about the buffer it points into. 1306 bool Invalid = false; 1307 StringRef BufData = Loc.getBufferData(&Invalid); 1308 if (Invalid) 1309 return; 1310 const char *BufStart = BufData.data(); 1311 const char *BufEnd = BufStart + BufData.size(); 1312 1313 unsigned CaretLineNo = Loc.getLineNumber(); 1314 unsigned CaretColNo = Loc.getColumnNumber(); 1315 1316 // Arbitrarily stop showing snippets when the line is too long. 1317 static const size_t MaxLineLengthToPrint = 4096; 1318 if (CaretColNo > MaxLineLengthToPrint) 1319 return; 1320 1321 // Find the set of lines to include. 1322 const unsigned MaxLines = DiagOpts.SnippetLineLimit; 1323 std::pair<unsigned, unsigned> Lines = {CaretLineNo, CaretLineNo}; 1324 unsigned DisplayLineNo = Loc.getPresumedLoc().getLine(); 1325 for (const auto &I : Ranges) { 1326 if (auto OptionalRange = findLinesForRange(I, FID, SM)) 1327 Lines = maybeAddRange(Lines, *OptionalRange, MaxLines); 1328 1329 DisplayLineNo = 1330 std::min(DisplayLineNo, SM.getPresumedLineNumber(I.getBegin())); 1331 } 1332 1333 // Our line numbers look like: 1334 // " [number] | " 1335 // Where [number] is MaxLineNoDisplayWidth columns 1336 // and the full thing is therefore MaxLineNoDisplayWidth + 4 columns. 1337 unsigned MaxLineNoDisplayWidth = 1338 DiagOpts.ShowLineNumbers 1339 ? std::max(4u, getNumDisplayWidth(DisplayLineNo + MaxLines)) 1340 : 0; 1341 auto indentForLineNumbers = [&] { 1342 if (MaxLineNoDisplayWidth > 0) 1343 OS.indent(MaxLineNoDisplayWidth + 2) << "| "; 1344 }; 1345 1346 // Prepare source highlighting information for the lines we're about to 1347 // emit, starting from the first line. 1348 std::unique_ptr<SmallVector<StyleRange>[]> SourceStyles = 1349 highlightLines(BufData, Lines.first, Lines.second, PP, LangOpts, 1350 DiagOpts.ShowColors, FID, SM); 1351 1352 SmallVector<LineRange> LineRanges = 1353 prepareAndFilterRanges(Ranges, SM, Lines, FID, LangOpts); 1354 1355 for (unsigned LineNo = Lines.first; LineNo != Lines.second + 1; 1356 ++LineNo, ++DisplayLineNo) { 1357 // Rewind from the current position to the start of the line. 1358 const char *LineStart = 1359 BufStart + 1360 SM.getDecomposedLoc(SM.translateLineCol(FID, LineNo, 1)).second; 1361 if (LineStart == BufEnd) 1362 break; 1363 1364 // Compute the line end. 1365 const char *LineEnd = LineStart; 1366 while (*LineEnd != '\n' && *LineEnd != '\r' && LineEnd != BufEnd) 1367 ++LineEnd; 1368 1369 // Arbitrarily stop showing snippets when the line is too long. 1370 // FIXME: Don't print any lines in this case. 1371 if (size_t(LineEnd - LineStart) > MaxLineLengthToPrint) 1372 return; 1373 1374 // Copy the line of code into an std::string for ease of manipulation. 1375 std::string SourceLine(LineStart, LineEnd); 1376 // Remove trailing null bytes. 1377 while (!SourceLine.empty() && SourceLine.back() == '\0' && 1378 (LineNo != CaretLineNo || SourceLine.size() > CaretColNo)) 1379 SourceLine.pop_back(); 1380 1381 // Build the byte to column map. 1382 const SourceColumnMap sourceColMap(SourceLine, DiagOpts.TabStop); 1383 1384 std::string CaretLine; 1385 // Highlight all of the characters covered by Ranges with ~ characters. 1386 for (const auto &LR : LineRanges) { 1387 if (LR.LineNo == LineNo) 1388 highlightRange(LR, sourceColMap, CaretLine); 1389 } 1390 1391 // Next, insert the caret itself. 1392 if (CaretLineNo == LineNo) { 1393 size_t Col = sourceColMap.byteToContainingColumn(CaretColNo - 1); 1394 CaretLine.resize(std::max(Col + 1, CaretLine.size()), ' '); 1395 CaretLine[Col] = '^'; 1396 } 1397 1398 std::string FixItInsertionLine = 1399 buildFixItInsertionLine(FID, LineNo, sourceColMap, Hints, SM, DiagOpts); 1400 1401 // If the source line is too long for our terminal, select only the 1402 // "interesting" source region within that line. 1403 unsigned Columns = DiagOpts.MessageLength; 1404 if (Columns) 1405 selectInterestingSourceRegion(SourceLine, CaretLine, FixItInsertionLine, 1406 Columns, sourceColMap); 1407 1408 // If we are in -fdiagnostics-print-source-range-info mode, we are trying 1409 // to produce easily machine parsable output. Add a space before the 1410 // source line and the caret to make it trivial to tell the main diagnostic 1411 // line from what the user is intended to see. 1412 if (DiagOpts.ShowSourceRanges && !SourceLine.empty()) { 1413 SourceLine = ' ' + SourceLine; 1414 CaretLine = ' ' + CaretLine; 1415 } 1416 1417 // Emit what we have computed. 1418 emitSnippet(SourceLine, MaxLineNoDisplayWidth, LineNo, DisplayLineNo, 1419 SourceStyles[LineNo - Lines.first]); 1420 1421 if (!CaretLine.empty()) { 1422 indentForLineNumbers(); 1423 if (DiagOpts.ShowColors) 1424 OS.changeColor(caretColor, true); 1425 OS << CaretLine << '\n'; 1426 if (DiagOpts.ShowColors) 1427 OS.resetColor(); 1428 } 1429 1430 if (!FixItInsertionLine.empty()) { 1431 indentForLineNumbers(); 1432 if (DiagOpts.ShowColors) 1433 // Print fixit line in color 1434 OS.changeColor(fixitColor, false); 1435 if (DiagOpts.ShowSourceRanges) 1436 OS << ' '; 1437 OS << FixItInsertionLine << '\n'; 1438 if (DiagOpts.ShowColors) 1439 OS.resetColor(); 1440 } 1441 } 1442 1443 // Print out any parseable fixit information requested by the options. 1444 emitParseableFixits(Hints, SM); 1445 } 1446 1447 void TextDiagnostic::emitSnippet(StringRef SourceLine, 1448 unsigned MaxLineNoDisplayWidth, 1449 unsigned LineNo, unsigned DisplayLineNo, 1450 ArrayRef<StyleRange> Styles) { 1451 // Emit line number. 1452 if (MaxLineNoDisplayWidth > 0) { 1453 unsigned LineNoDisplayWidth = getNumDisplayWidth(DisplayLineNo); 1454 OS.indent(MaxLineNoDisplayWidth - LineNoDisplayWidth + 1) 1455 << DisplayLineNo << " | "; 1456 } 1457 1458 // Print the source line one character at a time. 1459 bool PrintReversed = false; 1460 std::optional<llvm::raw_ostream::Colors> CurrentColor; 1461 size_t I = 0; 1462 while (I < SourceLine.size()) { 1463 auto [Str, WasPrintable] = 1464 printableTextForNextCharacter(SourceLine, &I, DiagOpts.TabStop); 1465 1466 // Toggle inverted colors on or off for this character. 1467 if (DiagOpts.ShowColors) { 1468 if (WasPrintable == PrintReversed) { 1469 PrintReversed = !PrintReversed; 1470 if (PrintReversed) 1471 OS.reverseColor(); 1472 else { 1473 OS.resetColor(); 1474 CurrentColor = std::nullopt; 1475 } 1476 } 1477 1478 // Apply syntax highlighting information. 1479 const auto *CharStyle = llvm::find_if(Styles, [I](const StyleRange &R) { 1480 return (R.Start < I && R.End >= I); 1481 }); 1482 1483 if (CharStyle != Styles.end()) { 1484 if (!CurrentColor || 1485 (CurrentColor && *CurrentColor != CharStyle->Color)) { 1486 OS.changeColor(CharStyle->Color, false); 1487 CurrentColor = CharStyle->Color; 1488 } 1489 } else if (CurrentColor) { 1490 OS.resetColor(); 1491 CurrentColor = std::nullopt; 1492 } 1493 } 1494 1495 OS << Str; 1496 } 1497 1498 if (DiagOpts.ShowColors) 1499 OS.resetColor(); 1500 1501 OS << '\n'; 1502 } 1503 1504 void TextDiagnostic::emitParseableFixits(ArrayRef<FixItHint> Hints, 1505 const SourceManager &SM) { 1506 if (!DiagOpts.ShowParseableFixits) 1507 return; 1508 1509 // We follow FixItRewriter's example in not (yet) handling 1510 // fix-its in macros. 1511 for (const auto &H : Hints) { 1512 if (H.RemoveRange.isInvalid() || H.RemoveRange.getBegin().isMacroID() || 1513 H.RemoveRange.getEnd().isMacroID()) 1514 return; 1515 } 1516 1517 for (const auto &H : Hints) { 1518 SourceLocation BLoc = H.RemoveRange.getBegin(); 1519 SourceLocation ELoc = H.RemoveRange.getEnd(); 1520 1521 FileIDAndOffset BInfo = SM.getDecomposedLoc(BLoc); 1522 FileIDAndOffset EInfo = SM.getDecomposedLoc(ELoc); 1523 1524 // Adjust for token ranges. 1525 if (H.RemoveRange.isTokenRange()) 1526 EInfo.second += Lexer::MeasureTokenLength(ELoc, SM, LangOpts); 1527 1528 // We specifically do not do word-wrapping or tab-expansion here, 1529 // because this is supposed to be easy to parse. 1530 PresumedLoc PLoc = SM.getPresumedLoc(BLoc); 1531 if (PLoc.isInvalid()) 1532 break; 1533 1534 OS << "fix-it:\""; 1535 OS.write_escaped(PLoc.getFilename()); 1536 OS << "\":{" << SM.getLineNumber(BInfo.first, BInfo.second) 1537 << ':' << SM.getColumnNumber(BInfo.first, BInfo.second) 1538 << '-' << SM.getLineNumber(EInfo.first, EInfo.second) 1539 << ':' << SM.getColumnNumber(EInfo.first, EInfo.second) 1540 << "}:\""; 1541 OS.write_escaped(H.CodeToInsert); 1542 OS << "\"\n"; 1543 } 1544 } 1545