xref: /freebsd/contrib/llvm-project/clang/lib/Rewrite/Rewriter.cpp (revision 5fb307d29b364982acbde82cbf77db3cae486f8c)
1 //===- Rewriter.cpp - Code rewriting interface ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the Rewriter class, which is used for code
10 //  transformations.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Rewrite/Core/Rewriter.h"
15 #include "clang/Basic/Diagnostic.h"
16 #include "clang/Basic/DiagnosticIDs.h"
17 #include "clang/Basic/SourceLocation.h"
18 #include "clang/Basic/SourceManager.h"
19 #include "clang/Lex/Lexer.h"
20 #include "clang/Rewrite/Core/RewriteBuffer.h"
21 #include "clang/Rewrite/Core/RewriteRope.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <cassert>
27 #include <iterator>
28 #include <map>
29 #include <utility>
30 
31 using namespace clang;
32 
33 raw_ostream &RewriteBuffer::write(raw_ostream &os) const {
34   // Walk RewriteRope chunks efficiently using MoveToNextPiece() instead of the
35   // character iterator.
36   for (RopePieceBTreeIterator I = begin(), E = end(); I != E;
37        I.MoveToNextPiece())
38     os << I.piece();
39   return os;
40 }
41 
42 /// Return true if this character is non-new-line whitespace:
43 /// ' ', '\\t', '\\f', '\\v', '\\r'.
44 static inline bool isWhitespaceExceptNL(unsigned char c) {
45   switch (c) {
46   case ' ':
47   case '\t':
48   case '\f':
49   case '\v':
50   case '\r':
51     return true;
52   default:
53     return false;
54   }
55 }
56 
57 void RewriteBuffer::RemoveText(unsigned OrigOffset, unsigned Size,
58                                bool removeLineIfEmpty) {
59   // Nothing to remove, exit early.
60   if (Size == 0) return;
61 
62   unsigned RealOffset = getMappedOffset(OrigOffset, true);
63   assert(RealOffset+Size <= Buffer.size() && "Invalid location");
64 
65   // Remove the dead characters.
66   Buffer.erase(RealOffset, Size);
67 
68   // Add a delta so that future changes are offset correctly.
69   AddReplaceDelta(OrigOffset, -Size);
70 
71   if (removeLineIfEmpty) {
72     // Find the line that the remove occurred and if it is completely empty
73     // remove the line as well.
74 
75     iterator curLineStart = begin();
76     unsigned curLineStartOffs = 0;
77     iterator posI = begin();
78     for (unsigned i = 0; i != RealOffset; ++i) {
79       if (*posI == '\n') {
80         curLineStart = posI;
81         ++curLineStart;
82         curLineStartOffs = i + 1;
83       }
84       ++posI;
85     }
86 
87     unsigned lineSize = 0;
88     posI = curLineStart;
89     while (posI != end() && isWhitespaceExceptNL(*posI)) {
90       ++posI;
91       ++lineSize;
92     }
93     if (posI != end() && *posI == '\n') {
94       Buffer.erase(curLineStartOffs, lineSize + 1/* + '\n'*/);
95       // FIXME: Here, the offset of the start of the line is supposed to be
96       // expressed in terms of the original input not the "real" rewrite
97       // buffer.  How do we compute that reliably?  It might be tempting to use
98       // curLineStartOffs + OrigOffset - RealOffset, but that assumes the
99       // difference between the original and real offset is the same at the
100       // removed text and at the start of the line, but that's not true if
101       // edits were previously made earlier on the line.  This bug is also
102       // documented by a FIXME on the definition of
103       // clang::Rewriter::RewriteOptions::RemoveLineIfEmpty.  A reproducer for
104       // the implementation below is the test RemoveLineIfEmpty in
105       // clang/unittests/Rewrite/RewriteBufferTest.cpp.
106       AddReplaceDelta(curLineStartOffs, -(lineSize + 1/* + '\n'*/));
107     }
108   }
109 }
110 
111 void RewriteBuffer::InsertText(unsigned OrigOffset, StringRef Str,
112                                bool InsertAfter) {
113   // Nothing to insert, exit early.
114   if (Str.empty()) return;
115 
116   unsigned RealOffset = getMappedOffset(OrigOffset, InsertAfter);
117   Buffer.insert(RealOffset, Str.begin(), Str.end());
118 
119   // Add a delta so that future changes are offset correctly.
120   AddInsertDelta(OrigOffset, Str.size());
121 }
122 
123 /// ReplaceText - This method replaces a range of characters in the input
124 /// buffer with a new string.  This is effectively a combined "remove+insert"
125 /// operation.
126 void RewriteBuffer::ReplaceText(unsigned OrigOffset, unsigned OrigLength,
127                                 StringRef NewStr) {
128   unsigned RealOffset = getMappedOffset(OrigOffset, true);
129   Buffer.erase(RealOffset, OrigLength);
130   Buffer.insert(RealOffset, NewStr.begin(), NewStr.end());
131   if (OrigLength != NewStr.size())
132     AddReplaceDelta(OrigOffset, NewStr.size() - OrigLength);
133 }
134 
135 //===----------------------------------------------------------------------===//
136 // Rewriter class
137 //===----------------------------------------------------------------------===//
138 
139 /// getRangeSize - Return the size in bytes of the specified range if they
140 /// are in the same file.  If not, this returns -1.
141 int Rewriter::getRangeSize(const CharSourceRange &Range,
142                            RewriteOptions opts) const {
143   if (!isRewritable(Range.getBegin()) ||
144       !isRewritable(Range.getEnd())) return -1;
145 
146   FileID StartFileID, EndFileID;
147   unsigned StartOff = getLocationOffsetAndFileID(Range.getBegin(), StartFileID);
148   unsigned EndOff = getLocationOffsetAndFileID(Range.getEnd(), EndFileID);
149 
150   if (StartFileID != EndFileID)
151     return -1;
152 
153   // If edits have been made to this buffer, the delta between the range may
154   // have changed.
155   std::map<FileID, RewriteBuffer>::const_iterator I =
156     RewriteBuffers.find(StartFileID);
157   if (I != RewriteBuffers.end()) {
158     const RewriteBuffer &RB = I->second;
159     EndOff = RB.getMappedOffset(EndOff, opts.IncludeInsertsAtEndOfRange);
160     StartOff = RB.getMappedOffset(StartOff, !opts.IncludeInsertsAtBeginOfRange);
161   }
162 
163   // Adjust the end offset to the end of the last token, instead of being the
164   // start of the last token if this is a token range.
165   if (Range.isTokenRange())
166     EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts);
167 
168   return EndOff-StartOff;
169 }
170 
171 int Rewriter::getRangeSize(SourceRange Range, RewriteOptions opts) const {
172   return getRangeSize(CharSourceRange::getTokenRange(Range), opts);
173 }
174 
175 /// getRewrittenText - Return the rewritten form of the text in the specified
176 /// range.  If the start or end of the range was unrewritable or if they are
177 /// in different buffers, this returns an empty string.
178 ///
179 /// Note that this method is not particularly efficient.
180 std::string Rewriter::getRewrittenText(CharSourceRange Range) const {
181   if (!isRewritable(Range.getBegin()) ||
182       !isRewritable(Range.getEnd()))
183     return {};
184 
185   FileID StartFileID, EndFileID;
186   unsigned StartOff, EndOff;
187   StartOff = getLocationOffsetAndFileID(Range.getBegin(), StartFileID);
188   EndOff   = getLocationOffsetAndFileID(Range.getEnd(), EndFileID);
189 
190   if (StartFileID != EndFileID)
191     return {}; // Start and end in different buffers.
192 
193   // If edits have been made to this buffer, the delta between the range may
194   // have changed.
195   std::map<FileID, RewriteBuffer>::const_iterator I =
196     RewriteBuffers.find(StartFileID);
197   if (I == RewriteBuffers.end()) {
198     // If the buffer hasn't been rewritten, just return the text from the input.
199     const char *Ptr = SourceMgr->getCharacterData(Range.getBegin());
200 
201     // Adjust the end offset to the end of the last token, instead of being the
202     // start of the last token.
203     if (Range.isTokenRange())
204       EndOff +=
205           Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts);
206     return std::string(Ptr, Ptr+EndOff-StartOff);
207   }
208 
209   const RewriteBuffer &RB = I->second;
210   EndOff = RB.getMappedOffset(EndOff, true);
211   StartOff = RB.getMappedOffset(StartOff);
212 
213   // Adjust the end offset to the end of the last token, instead of being the
214   // start of the last token.
215   if (Range.isTokenRange())
216     EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts);
217 
218   // Advance the iterators to the right spot, yay for linear time algorithms.
219   RewriteBuffer::iterator Start = RB.begin();
220   std::advance(Start, StartOff);
221   RewriteBuffer::iterator End = Start;
222   assert(EndOff >= StartOff && "Invalid iteration distance");
223   std::advance(End, EndOff-StartOff);
224 
225   return std::string(Start, End);
226 }
227 
228 unsigned Rewriter::getLocationOffsetAndFileID(SourceLocation Loc,
229                                               FileID &FID) const {
230   assert(Loc.isValid() && "Invalid location");
231   std::pair<FileID, unsigned> V = SourceMgr->getDecomposedLoc(Loc);
232   FID = V.first;
233   return V.second;
234 }
235 
236 /// getEditBuffer - Get or create a RewriteBuffer for the specified FileID.
237 RewriteBuffer &Rewriter::getEditBuffer(FileID FID) {
238   std::map<FileID, RewriteBuffer>::iterator I =
239     RewriteBuffers.lower_bound(FID);
240   if (I != RewriteBuffers.end() && I->first == FID)
241     return I->second;
242   I = RewriteBuffers.insert(I, std::make_pair(FID, RewriteBuffer()));
243 
244   StringRef MB = SourceMgr->getBufferData(FID);
245   I->second.Initialize(MB.begin(), MB.end());
246 
247   return I->second;
248 }
249 
250 /// InsertText - Insert the specified string at the specified location in the
251 /// original buffer.
252 bool Rewriter::InsertText(SourceLocation Loc, StringRef Str,
253                           bool InsertAfter, bool indentNewLines) {
254   if (!isRewritable(Loc)) return true;
255   FileID FID;
256   unsigned StartOffs = getLocationOffsetAndFileID(Loc, FID);
257 
258   SmallString<128> indentedStr;
259   if (indentNewLines && Str.contains('\n')) {
260     StringRef MB = SourceMgr->getBufferData(FID);
261 
262     unsigned lineNo = SourceMgr->getLineNumber(FID, StartOffs) - 1;
263     const SrcMgr::ContentCache *Content =
264         &SourceMgr->getSLocEntry(FID).getFile().getContentCache();
265     unsigned lineOffs = Content->SourceLineCache[lineNo];
266 
267     // Find the whitespace at the start of the line.
268     StringRef indentSpace;
269     {
270       unsigned i = lineOffs;
271       while (isWhitespaceExceptNL(MB[i]))
272         ++i;
273       indentSpace = MB.substr(lineOffs, i-lineOffs);
274     }
275 
276     SmallVector<StringRef, 4> lines;
277     Str.split(lines, "\n");
278 
279     for (unsigned i = 0, e = lines.size(); i != e; ++i) {
280       indentedStr += lines[i];
281       if (i < e-1) {
282         indentedStr += '\n';
283         indentedStr += indentSpace;
284       }
285     }
286     Str = indentedStr.str();
287   }
288 
289   getEditBuffer(FID).InsertText(StartOffs, Str, InsertAfter);
290   return false;
291 }
292 
293 bool Rewriter::InsertTextAfterToken(SourceLocation Loc, StringRef Str) {
294   if (!isRewritable(Loc)) return true;
295   FileID FID;
296   unsigned StartOffs = getLocationOffsetAndFileID(Loc, FID);
297   RewriteOptions rangeOpts;
298   rangeOpts.IncludeInsertsAtBeginOfRange = false;
299   StartOffs += getRangeSize(SourceRange(Loc, Loc), rangeOpts);
300   getEditBuffer(FID).InsertText(StartOffs, Str, /*InsertAfter*/true);
301   return false;
302 }
303 
304 /// RemoveText - Remove the specified text region.
305 bool Rewriter::RemoveText(SourceLocation Start, unsigned Length,
306                           RewriteOptions opts) {
307   if (!isRewritable(Start)) return true;
308   FileID FID;
309   unsigned StartOffs = getLocationOffsetAndFileID(Start, FID);
310   getEditBuffer(FID).RemoveText(StartOffs, Length, opts.RemoveLineIfEmpty);
311   return false;
312 }
313 
314 /// ReplaceText - This method replaces a range of characters in the input
315 /// buffer with a new string.  This is effectively a combined "remove/insert"
316 /// operation.
317 bool Rewriter::ReplaceText(SourceLocation Start, unsigned OrigLength,
318                            StringRef NewStr) {
319   if (!isRewritable(Start)) return true;
320   FileID StartFileID;
321   unsigned StartOffs = getLocationOffsetAndFileID(Start, StartFileID);
322 
323   getEditBuffer(StartFileID).ReplaceText(StartOffs, OrigLength, NewStr);
324   return false;
325 }
326 
327 bool Rewriter::ReplaceText(SourceRange range, SourceRange replacementRange) {
328   if (!isRewritable(range.getBegin())) return true;
329   if (!isRewritable(range.getEnd())) return true;
330   if (replacementRange.isInvalid()) return true;
331   SourceLocation start = range.getBegin();
332   unsigned origLength = getRangeSize(range);
333   unsigned newLength = getRangeSize(replacementRange);
334   FileID FID;
335   unsigned newOffs = getLocationOffsetAndFileID(replacementRange.getBegin(),
336                                                 FID);
337   StringRef MB = SourceMgr->getBufferData(FID);
338   return ReplaceText(start, origLength, MB.substr(newOffs, newLength));
339 }
340 
341 bool Rewriter::IncreaseIndentation(CharSourceRange range,
342                                    SourceLocation parentIndent) {
343   if (range.isInvalid()) return true;
344   if (!isRewritable(range.getBegin())) return true;
345   if (!isRewritable(range.getEnd())) return true;
346   if (!isRewritable(parentIndent)) return true;
347 
348   FileID StartFileID, EndFileID, parentFileID;
349   unsigned StartOff, EndOff, parentOff;
350 
351   StartOff = getLocationOffsetAndFileID(range.getBegin(), StartFileID);
352   EndOff   = getLocationOffsetAndFileID(range.getEnd(), EndFileID);
353   parentOff = getLocationOffsetAndFileID(parentIndent, parentFileID);
354 
355   if (StartFileID != EndFileID || StartFileID != parentFileID)
356     return true;
357   if (StartOff > EndOff)
358     return true;
359 
360   FileID FID = StartFileID;
361   StringRef MB = SourceMgr->getBufferData(FID);
362 
363   unsigned parentLineNo = SourceMgr->getLineNumber(FID, parentOff) - 1;
364   unsigned startLineNo = SourceMgr->getLineNumber(FID, StartOff) - 1;
365   unsigned endLineNo = SourceMgr->getLineNumber(FID, EndOff) - 1;
366 
367   const SrcMgr::ContentCache *Content =
368       &SourceMgr->getSLocEntry(FID).getFile().getContentCache();
369 
370   // Find where the lines start.
371   unsigned parentLineOffs = Content->SourceLineCache[parentLineNo];
372   unsigned startLineOffs = Content->SourceLineCache[startLineNo];
373 
374   // Find the whitespace at the start of each line.
375   StringRef parentSpace, startSpace;
376   {
377     unsigned i = parentLineOffs;
378     while (isWhitespaceExceptNL(MB[i]))
379       ++i;
380     parentSpace = MB.substr(parentLineOffs, i-parentLineOffs);
381 
382     i = startLineOffs;
383     while (isWhitespaceExceptNL(MB[i]))
384       ++i;
385     startSpace = MB.substr(startLineOffs, i-startLineOffs);
386   }
387   if (parentSpace.size() >= startSpace.size())
388     return true;
389   if (!startSpace.startswith(parentSpace))
390     return true;
391 
392   StringRef indent = startSpace.substr(parentSpace.size());
393 
394   // Indent the lines between start/end offsets.
395   RewriteBuffer &RB = getEditBuffer(FID);
396   for (unsigned lineNo = startLineNo; lineNo <= endLineNo; ++lineNo) {
397     unsigned offs = Content->SourceLineCache[lineNo];
398     unsigned i = offs;
399     while (isWhitespaceExceptNL(MB[i]))
400       ++i;
401     StringRef origIndent = MB.substr(offs, i-offs);
402     if (origIndent.startswith(startSpace))
403       RB.InsertText(offs, indent, /*InsertAfter=*/false);
404   }
405 
406   return false;
407 }
408 
409 bool Rewriter::overwriteChangedFiles() {
410   bool AllWritten = true;
411   auto& Diag = getSourceMgr().getDiagnostics();
412   unsigned OverwriteFailure = Diag.getCustomDiagID(
413       DiagnosticsEngine::Error, "unable to overwrite file %0: %1");
414   for (buffer_iterator I = buffer_begin(), E = buffer_end(); I != E; ++I) {
415     const FileEntry *Entry = getSourceMgr().getFileEntryForID(I->first);
416     if (auto Error =
417             llvm::writeToOutput(Entry->getName(), [&](llvm::raw_ostream &OS) {
418               I->second.write(OS);
419               return llvm::Error::success();
420             })) {
421       Diag.Report(OverwriteFailure)
422           << Entry->getName() << llvm::toString(std::move(Error));
423       AllWritten = false;
424     }
425   }
426   return !AllWritten;
427 }
428