xref: /freebsd/contrib/llvm-project/llvm/lib/Support/SourceMgr.cpp (revision 48daf251932bd09f2dc1356aa1cf72b043f1f892)
1 //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SourceMgr class.  This class is used as a simple
10 // substrate for diagnostics, #include handling, and other low level things for
11 // simple parsers.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Support/SourceMgr.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Support/ErrorOr.h"
22 #include "llvm/Support/Locale.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/SMLoc.h"
26 #include "llvm/Support/WithColor.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <limits>
32 #include <memory>
33 #include <string>
34 #include <utility>
35 
36 using namespace llvm;
37 
38 static const size_t TabStop = 8;
39 
40 unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
41                                    SMLoc IncludeLoc,
42                                    std::string &IncludedFile) {
43   IncludedFile = Filename;
44   ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
45     MemoryBuffer::getFile(IncludedFile);
46 
47   // If the file didn't exist directly, see if it's in an include path.
48   for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr;
49        ++i) {
50     IncludedFile =
51         IncludeDirectories[i] + sys::path::get_separator().data() + Filename;
52     NewBufOrErr = MemoryBuffer::getFile(IncludedFile);
53   }
54 
55   if (!NewBufOrErr)
56     return 0;
57 
58   return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc);
59 }
60 
61 unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
62   for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
63     if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
64         // Use <= here so that a pointer to the null at the end of the buffer
65         // is included as part of the buffer.
66         Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
67       return i + 1;
68   return 0;
69 }
70 
71 template <typename T>
72 unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
73 
74   // Ensure OffsetCache is allocated and populated with offsets of all the
75   // '\n' bytes.
76   std::vector<T> *Offsets = nullptr;
77   if (OffsetCache.isNull()) {
78     Offsets = new std::vector<T>();
79     OffsetCache = Offsets;
80     size_t Sz = Buffer->getBufferSize();
81     assert(Sz <= std::numeric_limits<T>::max());
82     StringRef S = Buffer->getBuffer();
83     for (size_t N = 0; N < Sz; ++N) {
84       if (S[N] == '\n') {
85         Offsets->push_back(static_cast<T>(N));
86       }
87     }
88   } else {
89     Offsets = OffsetCache.get<std::vector<T> *>();
90   }
91 
92   const char *BufStart = Buffer->getBufferStart();
93   assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd());
94   ptrdiff_t PtrDiff = Ptr - BufStart;
95   assert(PtrDiff >= 0 && static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
96   T PtrOffset = static_cast<T>(PtrDiff);
97 
98   // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
99   // the line number.
100   return llvm::lower_bound(*Offsets, PtrOffset) - Offsets->begin() + 1;
101 }
102 
103 SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
104   : Buffer(std::move(Other.Buffer)),
105     OffsetCache(Other.OffsetCache),
106     IncludeLoc(Other.IncludeLoc) {
107   Other.OffsetCache = nullptr;
108 }
109 
110 SourceMgr::SrcBuffer::~SrcBuffer() {
111   if (!OffsetCache.isNull()) {
112     if (OffsetCache.is<std::vector<uint8_t>*>())
113       delete OffsetCache.get<std::vector<uint8_t>*>();
114     else if (OffsetCache.is<std::vector<uint16_t>*>())
115       delete OffsetCache.get<std::vector<uint16_t>*>();
116     else if (OffsetCache.is<std::vector<uint32_t>*>())
117       delete OffsetCache.get<std::vector<uint32_t>*>();
118     else
119       delete OffsetCache.get<std::vector<uint64_t>*>();
120     OffsetCache = nullptr;
121   }
122 }
123 
124 std::pair<unsigned, unsigned>
125 SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
126   if (!BufferID)
127     BufferID = FindBufferContainingLoc(Loc);
128   assert(BufferID && "Invalid Location!");
129 
130   auto &SB = getBufferInfo(BufferID);
131   const char *Ptr = Loc.getPointer();
132 
133   size_t Sz = SB.Buffer->getBufferSize();
134   unsigned LineNo;
135   if (Sz <= std::numeric_limits<uint8_t>::max())
136     LineNo = SB.getLineNumber<uint8_t>(Ptr);
137   else if (Sz <= std::numeric_limits<uint16_t>::max())
138     LineNo = SB.getLineNumber<uint16_t>(Ptr);
139   else if (Sz <= std::numeric_limits<uint32_t>::max())
140     LineNo = SB.getLineNumber<uint32_t>(Ptr);
141   else
142     LineNo = SB.getLineNumber<uint64_t>(Ptr);
143 
144   const char *BufStart = SB.Buffer->getBufferStart();
145   size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r");
146   if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0;
147   return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs);
148 }
149 
150 void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
151   if (IncludeLoc == SMLoc()) return;  // Top of stack.
152 
153   unsigned CurBuf = FindBufferContainingLoc(IncludeLoc);
154   assert(CurBuf && "Invalid or unspecified location!");
155 
156   PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
157 
158   OS << "Included from "
159      << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
160      << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
161 }
162 
163 SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
164                                    const Twine &Msg,
165                                    ArrayRef<SMRange> Ranges,
166                                    ArrayRef<SMFixIt> FixIts) const {
167   // First thing to do: find the current buffer containing the specified
168   // location to pull out the source line.
169   SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
170   std::pair<unsigned, unsigned> LineAndCol;
171   StringRef BufferID = "<unknown>";
172   std::string LineStr;
173 
174   if (Loc.isValid()) {
175     unsigned CurBuf = FindBufferContainingLoc(Loc);
176     assert(CurBuf && "Invalid or unspecified location!");
177 
178     const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf);
179     BufferID = CurMB->getBufferIdentifier();
180 
181     // Scan backward to find the start of the line.
182     const char *LineStart = Loc.getPointer();
183     const char *BufStart = CurMB->getBufferStart();
184     while (LineStart != BufStart && LineStart[-1] != '\n' &&
185            LineStart[-1] != '\r')
186       --LineStart;
187 
188     // Get the end of the line.
189     const char *LineEnd = Loc.getPointer();
190     const char *BufEnd = CurMB->getBufferEnd();
191     while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r')
192       ++LineEnd;
193     LineStr = std::string(LineStart, LineEnd);
194 
195     // Convert any ranges to column ranges that only intersect the line of the
196     // location.
197     for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
198       SMRange R = Ranges[i];
199       if (!R.isValid()) continue;
200 
201       // If the line doesn't contain any part of the range, then ignore it.
202       if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
203         continue;
204 
205       // Ignore pieces of the range that go onto other lines.
206       if (R.Start.getPointer() < LineStart)
207         R.Start = SMLoc::getFromPointer(LineStart);
208       if (R.End.getPointer() > LineEnd)
209         R.End = SMLoc::getFromPointer(LineEnd);
210 
211       // Translate from SMLoc ranges to column ranges.
212       // FIXME: Handle multibyte characters.
213       ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart,
214                                          R.End.getPointer()-LineStart));
215     }
216 
217     LineAndCol = getLineAndColumn(Loc, CurBuf);
218   }
219 
220   return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first,
221                       LineAndCol.second-1, Kind, Msg.str(),
222                       LineStr, ColRanges, FixIts);
223 }
224 
225 void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic,
226                              bool ShowColors) const {
227   // Report the message with the diagnostic handler if present.
228   if (DiagHandler) {
229     DiagHandler(Diagnostic, DiagContext);
230     return;
231   }
232 
233   if (Diagnostic.getLoc().isValid()) {
234     unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc());
235     assert(CurBuf && "Invalid or unspecified location!");
236     PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
237   }
238 
239   Diagnostic.print(nullptr, OS, ShowColors);
240 }
241 
242 void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
243                              SourceMgr::DiagKind Kind,
244                              const Twine &Msg, ArrayRef<SMRange> Ranges,
245                              ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
246   PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors);
247 }
248 
249 void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
250                              const Twine &Msg, ArrayRef<SMRange> Ranges,
251                              ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
252   PrintMessage(errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors);
253 }
254 
255 //===----------------------------------------------------------------------===//
256 // SMDiagnostic Implementation
257 //===----------------------------------------------------------------------===//
258 
259 SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN,
260                            int Line, int Col, SourceMgr::DiagKind Kind,
261                            StringRef Msg, StringRef LineStr,
262                            ArrayRef<std::pair<unsigned,unsigned>> Ranges,
263                            ArrayRef<SMFixIt> Hints)
264   : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind),
265     Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()),
266     FixIts(Hints.begin(), Hints.end()) {
267   llvm::sort(FixIts);
268 }
269 
270 static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
271                            ArrayRef<SMFixIt> FixIts, ArrayRef<char> SourceLine){
272   if (FixIts.empty())
273     return;
274 
275   const char *LineStart = SourceLine.begin();
276   const char *LineEnd = SourceLine.end();
277 
278   size_t PrevHintEndCol = 0;
279 
280   for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end();
281        I != E; ++I) {
282     // If the fixit contains a newline or tab, ignore it.
283     if (I->getText().find_first_of("\n\r\t") != StringRef::npos)
284       continue;
285 
286     SMRange R = I->getRange();
287 
288     // If the line doesn't contain any part of the range, then ignore it.
289     if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
290       continue;
291 
292     // Translate from SMLoc to column.
293     // Ignore pieces of the range that go onto other lines.
294     // FIXME: Handle multibyte characters in the source line.
295     unsigned FirstCol;
296     if (R.Start.getPointer() < LineStart)
297       FirstCol = 0;
298     else
299       FirstCol = R.Start.getPointer() - LineStart;
300 
301     // If we inserted a long previous hint, push this one forwards, and add
302     // an extra space to show that this is not part of the previous
303     // completion. This is sort of the best we can do when two hints appear
304     // to overlap.
305     //
306     // Note that if this hint is located immediately after the previous
307     // hint, no space will be added, since the location is more important.
308     unsigned HintCol = FirstCol;
309     if (HintCol < PrevHintEndCol)
310       HintCol = PrevHintEndCol + 1;
311 
312     // FIXME: This assertion is intended to catch unintended use of multibyte
313     // characters in fixits. If we decide to do this, we'll have to track
314     // separate byte widths for the source and fixit lines.
315     assert((size_t)sys::locale::columnWidth(I->getText()) ==
316            I->getText().size());
317 
318     // This relies on one byte per column in our fixit hints.
319     unsigned LastColumnModified = HintCol + I->getText().size();
320     if (LastColumnModified > FixItLine.size())
321       FixItLine.resize(LastColumnModified, ' ');
322 
323     std::copy(I->getText().begin(), I->getText().end(),
324               FixItLine.begin() + HintCol);
325 
326     PrevHintEndCol = LastColumnModified;
327 
328     // For replacements, mark the removal range with '~'.
329     // FIXME: Handle multibyte characters in the source line.
330     unsigned LastCol;
331     if (R.End.getPointer() >= LineEnd)
332       LastCol = LineEnd - LineStart;
333     else
334       LastCol = R.End.getPointer() - LineStart;
335 
336     std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~');
337   }
338 }
339 
340 static void printSourceLine(raw_ostream &S, StringRef LineContents) {
341   // Print out the source line one character at a time, so we can expand tabs.
342   for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
343     size_t NextTab = LineContents.find('\t', i);
344     // If there were no tabs left, print the rest, we are done.
345     if (NextTab == StringRef::npos) {
346       S << LineContents.drop_front(i);
347       break;
348     }
349 
350     // Otherwise, print from i to NextTab.
351     S << LineContents.slice(i, NextTab);
352     OutCol += NextTab - i;
353     i = NextTab;
354 
355     // If we have a tab, emit at least one space, then round up to 8 columns.
356     do {
357       S << ' ';
358       ++OutCol;
359     } while ((OutCol % TabStop) != 0);
360   }
361   S << '\n';
362 }
363 
364 static bool isNonASCII(char c) {
365   return c & 0x80;
366 }
367 
368 void SMDiagnostic::print(const char *ProgName, raw_ostream &OS,
369                          bool ShowColors, bool ShowKindLabel) const {
370   {
371     WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors);
372 
373     if (ProgName && ProgName[0])
374       S << ProgName << ": ";
375 
376     if (!Filename.empty()) {
377       if (Filename == "-")
378         S << "<stdin>";
379       else
380         S << Filename;
381 
382       if (LineNo != -1) {
383         S << ':' << LineNo;
384         if (ColumnNo != -1)
385           S << ':' << (ColumnNo + 1);
386       }
387       S << ": ";
388     }
389   }
390 
391   if (ShowKindLabel) {
392     switch (Kind) {
393     case SourceMgr::DK_Error:
394       WithColor::error(OS, "", !ShowColors);
395       break;
396     case SourceMgr::DK_Warning:
397       WithColor::warning(OS, "", !ShowColors);
398       break;
399     case SourceMgr::DK_Note:
400       WithColor::note(OS, "", !ShowColors);
401       break;
402     case SourceMgr::DK_Remark:
403       WithColor::remark(OS, "", !ShowColors);
404       break;
405     }
406   }
407 
408   WithColor(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors)
409       << Message << '\n';
410 
411   if (LineNo == -1 || ColumnNo == -1)
412     return;
413 
414   // FIXME: If there are multibyte or multi-column characters in the source, all
415   // our ranges will be wrong. To do this properly, we'll need a byte-to-column
416   // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
417   // expanding them later, and bail out rather than show incorrect ranges and
418   // misaligned fixits for any other odd characters.
419   if (find_if(LineContents, isNonASCII) != LineContents.end()) {
420     printSourceLine(OS, LineContents);
421     return;
422   }
423   size_t NumColumns = LineContents.size();
424 
425   // Build the line with the caret and ranges.
426   std::string CaretLine(NumColumns+1, ' ');
427 
428   // Expand any ranges.
429   for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
430     std::pair<unsigned, unsigned> R = Ranges[r];
431     std::fill(&CaretLine[R.first],
432               &CaretLine[std::min((size_t)R.second, CaretLine.size())],
433               '~');
434   }
435 
436   // Add any fix-its.
437   // FIXME: Find the beginning of the line properly for multibyte characters.
438   std::string FixItInsertionLine;
439   buildFixItLine(CaretLine, FixItInsertionLine, FixIts,
440                  makeArrayRef(Loc.getPointer() - ColumnNo,
441                               LineContents.size()));
442 
443   // Finally, plop on the caret.
444   if (unsigned(ColumnNo) <= NumColumns)
445     CaretLine[ColumnNo] = '^';
446   else
447     CaretLine[NumColumns] = '^';
448 
449   // ... and remove trailing whitespace so the output doesn't wrap for it.  We
450   // know that the line isn't completely empty because it has the caret in it at
451   // least.
452   CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
453 
454   printSourceLine(OS, LineContents);
455 
456   {
457     WithColor S(OS, raw_ostream::GREEN, true, false, !ShowColors);
458 
459     // Print out the caret line, matching tabs in the source line.
460     for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
461       if (i >= LineContents.size() || LineContents[i] != '\t') {
462         S << CaretLine[i];
463         ++OutCol;
464         continue;
465       }
466 
467       // Okay, we have a tab.  Insert the appropriate number of characters.
468       do {
469         S << CaretLine[i];
470         ++OutCol;
471       } while ((OutCol % TabStop) != 0);
472     }
473     S << '\n';
474   }
475 
476   // Print out the replacement line, matching tabs in the source line.
477   if (FixItInsertionLine.empty())
478     return;
479 
480   for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) {
481     if (i >= LineContents.size() || LineContents[i] != '\t') {
482       OS << FixItInsertionLine[i];
483       ++OutCol;
484       continue;
485     }
486 
487     // Okay, we have a tab.  Insert the appropriate number of characters.
488     do {
489       OS << FixItInsertionLine[i];
490       // FIXME: This is trying not to break up replacements, but then to re-sync
491       // with the tabs between replacements. This will fail, though, if two
492       // fix-it replacements are exactly adjacent, or if a fix-it contains a
493       // space. Really we should be precomputing column widths, which we'll
494       // need anyway for multibyte chars.
495       if (FixItInsertionLine[i] != ' ')
496         ++i;
497       ++OutCol;
498     } while (((OutCol % TabStop) != 0) && i != e);
499   }
500   OS << '\n';
501 }
502