1 //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SourceMgr class. This class is used as a simple 10 // substrate for diagnostics, #include handling, and other low level things for 11 // simple parsers. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Support/SourceMgr.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/Support/ErrorOr.h" 22 #include "llvm/Support/Locale.h" 23 #include "llvm/Support/MemoryBuffer.h" 24 #include "llvm/Support/Path.h" 25 #include "llvm/Support/SMLoc.h" 26 #include "llvm/Support/WithColor.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <algorithm> 29 #include <cassert> 30 #include <cstddef> 31 #include <limits> 32 #include <memory> 33 #include <string> 34 #include <utility> 35 36 using namespace llvm; 37 38 static const size_t TabStop = 8; 39 40 unsigned SourceMgr::AddIncludeFile(const std::string &Filename, 41 SMLoc IncludeLoc, 42 std::string &IncludedFile) { 43 IncludedFile = Filename; 44 ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr = 45 MemoryBuffer::getFile(IncludedFile); 46 47 // If the file didn't exist directly, see if it's in an include path. 48 for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr; 49 ++i) { 50 IncludedFile = 51 IncludeDirectories[i] + sys::path::get_separator().data() + Filename; 52 NewBufOrErr = MemoryBuffer::getFile(IncludedFile); 53 } 54 55 if (!NewBufOrErr) 56 return 0; 57 58 return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc); 59 } 60 61 unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const { 62 for (unsigned i = 0, e = Buffers.size(); i != e; ++i) 63 if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() && 64 // Use <= here so that a pointer to the null at the end of the buffer 65 // is included as part of the buffer. 66 Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd()) 67 return i + 1; 68 return 0; 69 } 70 71 template <typename T> 72 static std::vector<T> &GetOrCreateOffsetCache(void *&OffsetCache, 73 MemoryBuffer *Buffer) { 74 if (OffsetCache) 75 return *static_cast<std::vector<T> *>(OffsetCache); 76 77 // Lazily fill in the offset cache. 78 auto *Offsets = new std::vector<T>(); 79 size_t Sz = Buffer->getBufferSize(); 80 assert(Sz <= std::numeric_limits<T>::max()); 81 StringRef S = Buffer->getBuffer(); 82 for (size_t N = 0; N < Sz; ++N) { 83 if (S[N] == '\n') 84 Offsets->push_back(static_cast<T>(N)); 85 } 86 87 OffsetCache = Offsets; 88 return *Offsets; 89 } 90 91 template <typename T> 92 unsigned SourceMgr::SrcBuffer::getLineNumberSpecialized(const char *Ptr) const { 93 std::vector<T> &Offsets = 94 GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get()); 95 96 const char *BufStart = Buffer->getBufferStart(); 97 assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd()); 98 ptrdiff_t PtrDiff = Ptr - BufStart; 99 assert(PtrDiff >= 0 && 100 static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max()); 101 T PtrOffset = static_cast<T>(PtrDiff); 102 103 // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get 104 // the line number. 105 return llvm::lower_bound(Offsets, PtrOffset) - Offsets.begin() + 1; 106 } 107 108 /// Look up a given \p Ptr in in the buffer, determining which line it came 109 /// from. 110 unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const { 111 size_t Sz = Buffer->getBufferSize(); 112 if (Sz <= std::numeric_limits<uint8_t>::max()) 113 return getLineNumberSpecialized<uint8_t>(Ptr); 114 else if (Sz <= std::numeric_limits<uint16_t>::max()) 115 return getLineNumberSpecialized<uint16_t>(Ptr); 116 else if (Sz <= std::numeric_limits<uint32_t>::max()) 117 return getLineNumberSpecialized<uint32_t>(Ptr); 118 else 119 return getLineNumberSpecialized<uint64_t>(Ptr); 120 } 121 122 template <typename T> 123 const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized( 124 unsigned LineNo) const { 125 std::vector<T> &Offsets = 126 GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get()); 127 128 // We start counting line and column numbers from 1. 129 if (LineNo != 0) 130 --LineNo; 131 132 const char *BufStart = Buffer->getBufferStart(); 133 134 // The offset cache contains the location of the \n for the specified line, 135 // we want the start of the line. As such, we look for the previous entry. 136 if (LineNo == 0) 137 return BufStart; 138 if (LineNo > Offsets.size()) 139 return nullptr; 140 return BufStart + Offsets[LineNo - 1] + 1; 141 } 142 143 /// Return a pointer to the first character of the specified line number or 144 /// null if the line number is invalid. 145 const char * 146 SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned LineNo) const { 147 size_t Sz = Buffer->getBufferSize(); 148 if (Sz <= std::numeric_limits<uint8_t>::max()) 149 return getPointerForLineNumberSpecialized<uint8_t>(LineNo); 150 else if (Sz <= std::numeric_limits<uint16_t>::max()) 151 return getPointerForLineNumberSpecialized<uint16_t>(LineNo); 152 else if (Sz <= std::numeric_limits<uint32_t>::max()) 153 return getPointerForLineNumberSpecialized<uint32_t>(LineNo); 154 else 155 return getPointerForLineNumberSpecialized<uint64_t>(LineNo); 156 } 157 158 SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other) 159 : Buffer(std::move(Other.Buffer)), OffsetCache(Other.OffsetCache), 160 IncludeLoc(Other.IncludeLoc) { 161 Other.OffsetCache = nullptr; 162 } 163 164 SourceMgr::SrcBuffer::~SrcBuffer() { 165 if (OffsetCache) { 166 size_t Sz = Buffer->getBufferSize(); 167 if (Sz <= std::numeric_limits<uint8_t>::max()) 168 delete static_cast<std::vector<uint8_t> *>(OffsetCache); 169 else if (Sz <= std::numeric_limits<uint16_t>::max()) 170 delete static_cast<std::vector<uint16_t> *>(OffsetCache); 171 else if (Sz <= std::numeric_limits<uint32_t>::max()) 172 delete static_cast<std::vector<uint32_t> *>(OffsetCache); 173 else 174 delete static_cast<std::vector<uint64_t> *>(OffsetCache); 175 OffsetCache = nullptr; 176 } 177 } 178 179 std::pair<unsigned, unsigned> 180 SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { 181 if (!BufferID) 182 BufferID = FindBufferContainingLoc(Loc); 183 assert(BufferID && "Invalid Location!"); 184 185 auto &SB = getBufferInfo(BufferID); 186 const char *Ptr = Loc.getPointer(); 187 188 unsigned LineNo = SB.getLineNumber(Ptr); 189 const char *BufStart = SB.Buffer->getBufferStart(); 190 size_t NewlineOffs = StringRef(BufStart, Ptr - BufStart).find_last_of("\n\r"); 191 if (NewlineOffs == StringRef::npos) 192 NewlineOffs = ~(size_t)0; 193 return std::make_pair(LineNo, Ptr - BufStart - NewlineOffs); 194 } 195 196 /// Given a line and column number in a mapped buffer, turn it into an SMLoc. 197 /// This will return a null SMLoc if the line/column location is invalid. 198 SMLoc SourceMgr::FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo, 199 unsigned ColNo) { 200 auto &SB = getBufferInfo(BufferID); 201 const char *Ptr = SB.getPointerForLineNumber(LineNo); 202 if (!Ptr) 203 return SMLoc(); 204 205 // We start counting line and column numbers from 1. 206 if (ColNo != 0) 207 --ColNo; 208 209 // If we have a column number, validate it. 210 if (ColNo) { 211 // Make sure the location is within the current line. 212 if (Ptr + ColNo > SB.Buffer->getBufferEnd()) 213 return SMLoc(); 214 215 // Make sure there is no newline in the way. 216 if (StringRef(Ptr, ColNo).find_first_of("\n\r") != StringRef::npos) 217 return SMLoc(); 218 219 Ptr += ColNo; 220 } 221 222 return SMLoc::getFromPointer(Ptr); 223 } 224 225 void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const { 226 if (IncludeLoc == SMLoc()) 227 return; // Top of stack. 228 229 unsigned CurBuf = FindBufferContainingLoc(IncludeLoc); 230 assert(CurBuf && "Invalid or unspecified location!"); 231 232 PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); 233 234 OS << "Included from " << getBufferInfo(CurBuf).Buffer->getBufferIdentifier() 235 << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n"; 236 } 237 238 SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, 239 const Twine &Msg, ArrayRef<SMRange> Ranges, 240 ArrayRef<SMFixIt> FixIts) const { 241 // First thing to do: find the current buffer containing the specified 242 // location to pull out the source line. 243 SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges; 244 std::pair<unsigned, unsigned> LineAndCol; 245 StringRef BufferID = "<unknown>"; 246 std::string LineStr; 247 248 if (Loc.isValid()) { 249 unsigned CurBuf = FindBufferContainingLoc(Loc); 250 assert(CurBuf && "Invalid or unspecified location!"); 251 252 const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf); 253 BufferID = CurMB->getBufferIdentifier(); 254 255 // Scan backward to find the start of the line. 256 const char *LineStart = Loc.getPointer(); 257 const char *BufStart = CurMB->getBufferStart(); 258 while (LineStart != BufStart && LineStart[-1] != '\n' && 259 LineStart[-1] != '\r') 260 --LineStart; 261 262 // Get the end of the line. 263 const char *LineEnd = Loc.getPointer(); 264 const char *BufEnd = CurMB->getBufferEnd(); 265 while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r') 266 ++LineEnd; 267 LineStr = std::string(LineStart, LineEnd); 268 269 // Convert any ranges to column ranges that only intersect the line of the 270 // location. 271 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 272 SMRange R = Ranges[i]; 273 if (!R.isValid()) 274 continue; 275 276 // If the line doesn't contain any part of the range, then ignore it. 277 if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) 278 continue; 279 280 // Ignore pieces of the range that go onto other lines. 281 if (R.Start.getPointer() < LineStart) 282 R.Start = SMLoc::getFromPointer(LineStart); 283 if (R.End.getPointer() > LineEnd) 284 R.End = SMLoc::getFromPointer(LineEnd); 285 286 // Translate from SMLoc ranges to column ranges. 287 // FIXME: Handle multibyte characters. 288 ColRanges.push_back(std::make_pair(R.Start.getPointer() - LineStart, 289 R.End.getPointer() - LineStart)); 290 } 291 292 LineAndCol = getLineAndColumn(Loc, CurBuf); 293 } 294 295 return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first, 296 LineAndCol.second - 1, Kind, Msg.str(), LineStr, 297 ColRanges, FixIts); 298 } 299 300 void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic, 301 bool ShowColors) const { 302 // Report the message with the diagnostic handler if present. 303 if (DiagHandler) { 304 DiagHandler(Diagnostic, DiagContext); 305 return; 306 } 307 308 if (Diagnostic.getLoc().isValid()) { 309 unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc()); 310 assert(CurBuf && "Invalid or unspecified location!"); 311 PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); 312 } 313 314 Diagnostic.print(nullptr, OS, ShowColors); 315 } 316 317 void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc, 318 SourceMgr::DiagKind Kind, const Twine &Msg, 319 ArrayRef<SMRange> Ranges, ArrayRef<SMFixIt> FixIts, 320 bool ShowColors) const { 321 PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors); 322 } 323 324 void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, 325 const Twine &Msg, ArrayRef<SMRange> Ranges, 326 ArrayRef<SMFixIt> FixIts, bool ShowColors) const { 327 PrintMessage(errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors); 328 } 329 330 //===----------------------------------------------------------------------===// 331 // SMFixIt Implementation 332 //===----------------------------------------------------------------------===// 333 334 SMFixIt::SMFixIt(SMRange R, const Twine &Replacement) 335 : Range(R), Text(Replacement.str()) { 336 assert(R.isValid()); 337 } 338 339 //===----------------------------------------------------------------------===// 340 // SMDiagnostic Implementation 341 //===----------------------------------------------------------------------===// 342 343 SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line, 344 int Col, SourceMgr::DiagKind Kind, StringRef Msg, 345 StringRef LineStr, 346 ArrayRef<std::pair<unsigned, unsigned>> Ranges, 347 ArrayRef<SMFixIt> Hints) 348 : SM(&sm), Loc(L), Filename(std::string(FN)), LineNo(Line), ColumnNo(Col), 349 Kind(Kind), Message(std::string(Msg)), LineContents(std::string(LineStr)), 350 Ranges(Ranges.vec()), FixIts(Hints.begin(), Hints.end()) { 351 llvm::sort(FixIts); 352 } 353 354 static void buildFixItLine(std::string &CaretLine, std::string &FixItLine, 355 ArrayRef<SMFixIt> FixIts, 356 ArrayRef<char> SourceLine) { 357 if (FixIts.empty()) 358 return; 359 360 const char *LineStart = SourceLine.begin(); 361 const char *LineEnd = SourceLine.end(); 362 363 size_t PrevHintEndCol = 0; 364 365 for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end(); I != E; 366 ++I) { 367 // If the fixit contains a newline or tab, ignore it. 368 if (I->getText().find_first_of("\n\r\t") != StringRef::npos) 369 continue; 370 371 SMRange R = I->getRange(); 372 373 // If the line doesn't contain any part of the range, then ignore it. 374 if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) 375 continue; 376 377 // Translate from SMLoc to column. 378 // Ignore pieces of the range that go onto other lines. 379 // FIXME: Handle multibyte characters in the source line. 380 unsigned FirstCol; 381 if (R.Start.getPointer() < LineStart) 382 FirstCol = 0; 383 else 384 FirstCol = R.Start.getPointer() - LineStart; 385 386 // If we inserted a long previous hint, push this one forwards, and add 387 // an extra space to show that this is not part of the previous 388 // completion. This is sort of the best we can do when two hints appear 389 // to overlap. 390 // 391 // Note that if this hint is located immediately after the previous 392 // hint, no space will be added, since the location is more important. 393 unsigned HintCol = FirstCol; 394 if (HintCol < PrevHintEndCol) 395 HintCol = PrevHintEndCol + 1; 396 397 // FIXME: This assertion is intended to catch unintended use of multibyte 398 // characters in fixits. If we decide to do this, we'll have to track 399 // separate byte widths for the source and fixit lines. 400 assert((size_t)sys::locale::columnWidth(I->getText()) == 401 I->getText().size()); 402 403 // This relies on one byte per column in our fixit hints. 404 unsigned LastColumnModified = HintCol + I->getText().size(); 405 if (LastColumnModified > FixItLine.size()) 406 FixItLine.resize(LastColumnModified, ' '); 407 408 std::copy(I->getText().begin(), I->getText().end(), 409 FixItLine.begin() + HintCol); 410 411 PrevHintEndCol = LastColumnModified; 412 413 // For replacements, mark the removal range with '~'. 414 // FIXME: Handle multibyte characters in the source line. 415 unsigned LastCol; 416 if (R.End.getPointer() >= LineEnd) 417 LastCol = LineEnd - LineStart; 418 else 419 LastCol = R.End.getPointer() - LineStart; 420 421 std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~'); 422 } 423 } 424 425 static void printSourceLine(raw_ostream &S, StringRef LineContents) { 426 // Print out the source line one character at a time, so we can expand tabs. 427 for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) { 428 size_t NextTab = LineContents.find('\t', i); 429 // If there were no tabs left, print the rest, we are done. 430 if (NextTab == StringRef::npos) { 431 S << LineContents.drop_front(i); 432 break; 433 } 434 435 // Otherwise, print from i to NextTab. 436 S << LineContents.slice(i, NextTab); 437 OutCol += NextTab - i; 438 i = NextTab; 439 440 // If we have a tab, emit at least one space, then round up to 8 columns. 441 do { 442 S << ' '; 443 ++OutCol; 444 } while ((OutCol % TabStop) != 0); 445 } 446 S << '\n'; 447 } 448 449 static bool isNonASCII(char c) { return c & 0x80; } 450 451 void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, bool ShowColors, 452 bool ShowKindLabel) const { 453 ColorMode Mode = ShowColors ? ColorMode::Auto : ColorMode::Disable; 454 455 { 456 WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, Mode); 457 458 if (ProgName && ProgName[0]) 459 S << ProgName << ": "; 460 461 if (!Filename.empty()) { 462 if (Filename == "-") 463 S << "<stdin>"; 464 else 465 S << Filename; 466 467 if (LineNo != -1) { 468 S << ':' << LineNo; 469 if (ColumnNo != -1) 470 S << ':' << (ColumnNo + 1); 471 } 472 S << ": "; 473 } 474 } 475 476 if (ShowKindLabel) { 477 switch (Kind) { 478 case SourceMgr::DK_Error: 479 WithColor::error(OS, "", !ShowColors); 480 break; 481 case SourceMgr::DK_Warning: 482 WithColor::warning(OS, "", !ShowColors); 483 break; 484 case SourceMgr::DK_Note: 485 WithColor::note(OS, "", !ShowColors); 486 break; 487 case SourceMgr::DK_Remark: 488 WithColor::remark(OS, "", !ShowColors); 489 break; 490 } 491 } 492 493 WithColor(OS, raw_ostream::SAVEDCOLOR, true, false, Mode) << Message << '\n'; 494 495 if (LineNo == -1 || ColumnNo == -1) 496 return; 497 498 // FIXME: If there are multibyte or multi-column characters in the source, all 499 // our ranges will be wrong. To do this properly, we'll need a byte-to-column 500 // map like Clang's TextDiagnostic. For now, we'll just handle tabs by 501 // expanding them later, and bail out rather than show incorrect ranges and 502 // misaligned fixits for any other odd characters. 503 if (find_if(LineContents, isNonASCII) != LineContents.end()) { 504 printSourceLine(OS, LineContents); 505 return; 506 } 507 size_t NumColumns = LineContents.size(); 508 509 // Build the line with the caret and ranges. 510 std::string CaretLine(NumColumns + 1, ' '); 511 512 // Expand any ranges. 513 for (unsigned r = 0, e = Ranges.size(); r != e; ++r) { 514 std::pair<unsigned, unsigned> R = Ranges[r]; 515 std::fill(&CaretLine[R.first], 516 &CaretLine[std::min((size_t)R.second, CaretLine.size())], '~'); 517 } 518 519 // Add any fix-its. 520 // FIXME: Find the beginning of the line properly for multibyte characters. 521 std::string FixItInsertionLine; 522 buildFixItLine( 523 CaretLine, FixItInsertionLine, FixIts, 524 makeArrayRef(Loc.getPointer() - ColumnNo, LineContents.size())); 525 526 // Finally, plop on the caret. 527 if (unsigned(ColumnNo) <= NumColumns) 528 CaretLine[ColumnNo] = '^'; 529 else 530 CaretLine[NumColumns] = '^'; 531 532 // ... and remove trailing whitespace so the output doesn't wrap for it. We 533 // know that the line isn't completely empty because it has the caret in it at 534 // least. 535 CaretLine.erase(CaretLine.find_last_not_of(' ') + 1); 536 537 printSourceLine(OS, LineContents); 538 539 { 540 ColorMode Mode = ShowColors ? ColorMode::Auto : ColorMode::Disable; 541 WithColor S(OS, raw_ostream::GREEN, true, false, Mode); 542 543 // Print out the caret line, matching tabs in the source line. 544 for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) { 545 if (i >= LineContents.size() || LineContents[i] != '\t') { 546 S << CaretLine[i]; 547 ++OutCol; 548 continue; 549 } 550 551 // Okay, we have a tab. Insert the appropriate number of characters. 552 do { 553 S << CaretLine[i]; 554 ++OutCol; 555 } while ((OutCol % TabStop) != 0); 556 } 557 S << '\n'; 558 } 559 560 // Print out the replacement line, matching tabs in the source line. 561 if (FixItInsertionLine.empty()) 562 return; 563 564 for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) { 565 if (i >= LineContents.size() || LineContents[i] != '\t') { 566 OS << FixItInsertionLine[i]; 567 ++OutCol; 568 continue; 569 } 570 571 // Okay, we have a tab. Insert the appropriate number of characters. 572 do { 573 OS << FixItInsertionLine[i]; 574 // FIXME: This is trying not to break up replacements, but then to re-sync 575 // with the tabs between replacements. This will fail, though, if two 576 // fix-it replacements are exactly adjacent, or if a fix-it contains a 577 // space. Really we should be precomputing column widths, which we'll 578 // need anyway for multibyte chars. 579 if (FixItInsertionLine[i] != ' ') 580 ++i; 581 ++OutCol; 582 } while (((OutCol % TabStop) != 0) && i != e); 583 } 584 OS << '\n'; 585 } 586