1 //===- Rewriter.cpp - Code rewriting interface ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the Rewriter class, which is used for code 10 // transformations. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Rewrite/Core/Rewriter.h" 15 #include "clang/Basic/Diagnostic.h" 16 #include "clang/Basic/DiagnosticIDs.h" 17 #include "clang/Basic/SourceLocation.h" 18 #include "clang/Basic/SourceManager.h" 19 #include "clang/Lex/Lexer.h" 20 #include "clang/Rewrite/Core/RewriteBuffer.h" 21 #include "clang/Rewrite/Core/RewriteRope.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <cassert> 27 #include <iterator> 28 #include <map> 29 #include <utility> 30 31 using namespace clang; 32 33 raw_ostream &RewriteBuffer::write(raw_ostream &os) const { 34 // Walk RewriteRope chunks efficiently using MoveToNextPiece() instead of the 35 // character iterator. 36 for (RopePieceBTreeIterator I = begin(), E = end(); I != E; 37 I.MoveToNextPiece()) 38 os << I.piece(); 39 return os; 40 } 41 42 /// Return true if this character is non-new-line whitespace: 43 /// ' ', '\\t', '\\f', '\\v', '\\r'. 44 static inline bool isWhitespaceExceptNL(unsigned char c) { 45 switch (c) { 46 case ' ': 47 case '\t': 48 case '\f': 49 case '\v': 50 case '\r': 51 return true; 52 default: 53 return false; 54 } 55 } 56 57 void RewriteBuffer::RemoveText(unsigned OrigOffset, unsigned Size, 58 bool removeLineIfEmpty) { 59 // Nothing to remove, exit early. 60 if (Size == 0) return; 61 62 unsigned RealOffset = getMappedOffset(OrigOffset, true); 63 assert(RealOffset+Size <= Buffer.size() && "Invalid location"); 64 65 // Remove the dead characters. 66 Buffer.erase(RealOffset, Size); 67 68 // Add a delta so that future changes are offset correctly. 69 AddReplaceDelta(OrigOffset, -Size); 70 71 if (removeLineIfEmpty) { 72 // Find the line that the remove occurred and if it is completely empty 73 // remove the line as well. 74 75 iterator curLineStart = begin(); 76 unsigned curLineStartOffs = 0; 77 iterator posI = begin(); 78 for (unsigned i = 0; i != RealOffset; ++i) { 79 if (*posI == '\n') { 80 curLineStart = posI; 81 ++curLineStart; 82 curLineStartOffs = i + 1; 83 } 84 ++posI; 85 } 86 87 unsigned lineSize = 0; 88 posI = curLineStart; 89 while (posI != end() && isWhitespaceExceptNL(*posI)) { 90 ++posI; 91 ++lineSize; 92 } 93 if (posI != end() && *posI == '\n') { 94 Buffer.erase(curLineStartOffs, lineSize + 1/* + '\n'*/); 95 // FIXME: Here, the offset of the start of the line is supposed to be 96 // expressed in terms of the original input not the "real" rewrite 97 // buffer. How do we compute that reliably? It might be tempting to use 98 // curLineStartOffs + OrigOffset - RealOffset, but that assumes the 99 // difference between the original and real offset is the same at the 100 // removed text and at the start of the line, but that's not true if 101 // edits were previously made earlier on the line. This bug is also 102 // documented by a FIXME on the definition of 103 // clang::Rewriter::RewriteOptions::RemoveLineIfEmpty. A reproducer for 104 // the implementation below is the test RemoveLineIfEmpty in 105 // clang/unittests/Rewrite/RewriteBufferTest.cpp. 106 AddReplaceDelta(curLineStartOffs, -(lineSize + 1/* + '\n'*/)); 107 } 108 } 109 } 110 111 void RewriteBuffer::InsertText(unsigned OrigOffset, StringRef Str, 112 bool InsertAfter) { 113 // Nothing to insert, exit early. 114 if (Str.empty()) return; 115 116 unsigned RealOffset = getMappedOffset(OrigOffset, InsertAfter); 117 Buffer.insert(RealOffset, Str.begin(), Str.end()); 118 119 // Add a delta so that future changes are offset correctly. 120 AddInsertDelta(OrigOffset, Str.size()); 121 } 122 123 /// ReplaceText - This method replaces a range of characters in the input 124 /// buffer with a new string. This is effectively a combined "remove+insert" 125 /// operation. 126 void RewriteBuffer::ReplaceText(unsigned OrigOffset, unsigned OrigLength, 127 StringRef NewStr) { 128 unsigned RealOffset = getMappedOffset(OrigOffset, true); 129 Buffer.erase(RealOffset, OrigLength); 130 Buffer.insert(RealOffset, NewStr.begin(), NewStr.end()); 131 if (OrigLength != NewStr.size()) 132 AddReplaceDelta(OrigOffset, NewStr.size() - OrigLength); 133 } 134 135 //===----------------------------------------------------------------------===// 136 // Rewriter class 137 //===----------------------------------------------------------------------===// 138 139 /// getRangeSize - Return the size in bytes of the specified range if they 140 /// are in the same file. If not, this returns -1. 141 int Rewriter::getRangeSize(const CharSourceRange &Range, 142 RewriteOptions opts) const { 143 if (!isRewritable(Range.getBegin()) || 144 !isRewritable(Range.getEnd())) return -1; 145 146 FileID StartFileID, EndFileID; 147 unsigned StartOff = getLocationOffsetAndFileID(Range.getBegin(), StartFileID); 148 unsigned EndOff = getLocationOffsetAndFileID(Range.getEnd(), EndFileID); 149 150 if (StartFileID != EndFileID) 151 return -1; 152 153 // If edits have been made to this buffer, the delta between the range may 154 // have changed. 155 std::map<FileID, RewriteBuffer>::const_iterator I = 156 RewriteBuffers.find(StartFileID); 157 if (I != RewriteBuffers.end()) { 158 const RewriteBuffer &RB = I->second; 159 EndOff = RB.getMappedOffset(EndOff, opts.IncludeInsertsAtEndOfRange); 160 StartOff = RB.getMappedOffset(StartOff, !opts.IncludeInsertsAtBeginOfRange); 161 } 162 163 // Adjust the end offset to the end of the last token, instead of being the 164 // start of the last token if this is a token range. 165 if (Range.isTokenRange()) 166 EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts); 167 168 return EndOff-StartOff; 169 } 170 171 int Rewriter::getRangeSize(SourceRange Range, RewriteOptions opts) const { 172 return getRangeSize(CharSourceRange::getTokenRange(Range), opts); 173 } 174 175 /// getRewrittenText - Return the rewritten form of the text in the specified 176 /// range. If the start or end of the range was unrewritable or if they are 177 /// in different buffers, this returns an empty string. 178 /// 179 /// Note that this method is not particularly efficient. 180 std::string Rewriter::getRewrittenText(CharSourceRange Range) const { 181 if (!isRewritable(Range.getBegin()) || 182 !isRewritable(Range.getEnd())) 183 return {}; 184 185 FileID StartFileID, EndFileID; 186 unsigned StartOff, EndOff; 187 StartOff = getLocationOffsetAndFileID(Range.getBegin(), StartFileID); 188 EndOff = getLocationOffsetAndFileID(Range.getEnd(), EndFileID); 189 190 if (StartFileID != EndFileID) 191 return {}; // Start and end in different buffers. 192 193 // If edits have been made to this buffer, the delta between the range may 194 // have changed. 195 std::map<FileID, RewriteBuffer>::const_iterator I = 196 RewriteBuffers.find(StartFileID); 197 if (I == RewriteBuffers.end()) { 198 // If the buffer hasn't been rewritten, just return the text from the input. 199 const char *Ptr = SourceMgr->getCharacterData(Range.getBegin()); 200 201 // Adjust the end offset to the end of the last token, instead of being the 202 // start of the last token. 203 if (Range.isTokenRange()) 204 EndOff += 205 Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts); 206 return std::string(Ptr, Ptr+EndOff-StartOff); 207 } 208 209 const RewriteBuffer &RB = I->second; 210 EndOff = RB.getMappedOffset(EndOff, true); 211 StartOff = RB.getMappedOffset(StartOff); 212 213 // Adjust the end offset to the end of the last token, instead of being the 214 // start of the last token. 215 if (Range.isTokenRange()) 216 EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts); 217 218 // Advance the iterators to the right spot, yay for linear time algorithms. 219 RewriteBuffer::iterator Start = RB.begin(); 220 std::advance(Start, StartOff); 221 RewriteBuffer::iterator End = Start; 222 assert(EndOff >= StartOff && "Invalid iteration distance"); 223 std::advance(End, EndOff-StartOff); 224 225 return std::string(Start, End); 226 } 227 228 unsigned Rewriter::getLocationOffsetAndFileID(SourceLocation Loc, 229 FileID &FID) const { 230 assert(Loc.isValid() && "Invalid location"); 231 std::pair<FileID, unsigned> V = SourceMgr->getDecomposedLoc(Loc); 232 FID = V.first; 233 return V.second; 234 } 235 236 /// getEditBuffer - Get or create a RewriteBuffer for the specified FileID. 237 RewriteBuffer &Rewriter::getEditBuffer(FileID FID) { 238 std::map<FileID, RewriteBuffer>::iterator I = 239 RewriteBuffers.lower_bound(FID); 240 if (I != RewriteBuffers.end() && I->first == FID) 241 return I->second; 242 I = RewriteBuffers.insert(I, std::make_pair(FID, RewriteBuffer())); 243 244 StringRef MB = SourceMgr->getBufferData(FID); 245 I->second.Initialize(MB.begin(), MB.end()); 246 247 return I->second; 248 } 249 250 /// InsertText - Insert the specified string at the specified location in the 251 /// original buffer. 252 bool Rewriter::InsertText(SourceLocation Loc, StringRef Str, 253 bool InsertAfter, bool indentNewLines) { 254 if (!isRewritable(Loc)) return true; 255 FileID FID; 256 unsigned StartOffs = getLocationOffsetAndFileID(Loc, FID); 257 258 SmallString<128> indentedStr; 259 if (indentNewLines && Str.contains('\n')) { 260 StringRef MB = SourceMgr->getBufferData(FID); 261 262 unsigned lineNo = SourceMgr->getLineNumber(FID, StartOffs) - 1; 263 const SrcMgr::ContentCache *Content = 264 &SourceMgr->getSLocEntry(FID).getFile().getContentCache(); 265 unsigned lineOffs = Content->SourceLineCache[lineNo]; 266 267 // Find the whitespace at the start of the line. 268 StringRef indentSpace; 269 { 270 unsigned i = lineOffs; 271 while (isWhitespaceExceptNL(MB[i])) 272 ++i; 273 indentSpace = MB.substr(lineOffs, i-lineOffs); 274 } 275 276 SmallVector<StringRef, 4> lines; 277 Str.split(lines, "\n"); 278 279 for (unsigned i = 0, e = lines.size(); i != e; ++i) { 280 indentedStr += lines[i]; 281 if (i < e-1) { 282 indentedStr += '\n'; 283 indentedStr += indentSpace; 284 } 285 } 286 Str = indentedStr.str(); 287 } 288 289 getEditBuffer(FID).InsertText(StartOffs, Str, InsertAfter); 290 return false; 291 } 292 293 bool Rewriter::InsertTextAfterToken(SourceLocation Loc, StringRef Str) { 294 if (!isRewritable(Loc)) return true; 295 FileID FID; 296 unsigned StartOffs = getLocationOffsetAndFileID(Loc, FID); 297 RewriteOptions rangeOpts; 298 rangeOpts.IncludeInsertsAtBeginOfRange = false; 299 StartOffs += getRangeSize(SourceRange(Loc, Loc), rangeOpts); 300 getEditBuffer(FID).InsertText(StartOffs, Str, /*InsertAfter*/true); 301 return false; 302 } 303 304 /// RemoveText - Remove the specified text region. 305 bool Rewriter::RemoveText(SourceLocation Start, unsigned Length, 306 RewriteOptions opts) { 307 if (!isRewritable(Start)) return true; 308 FileID FID; 309 unsigned StartOffs = getLocationOffsetAndFileID(Start, FID); 310 getEditBuffer(FID).RemoveText(StartOffs, Length, opts.RemoveLineIfEmpty); 311 return false; 312 } 313 314 /// ReplaceText - This method replaces a range of characters in the input 315 /// buffer with a new string. This is effectively a combined "remove/insert" 316 /// operation. 317 bool Rewriter::ReplaceText(SourceLocation Start, unsigned OrigLength, 318 StringRef NewStr) { 319 if (!isRewritable(Start)) return true; 320 FileID StartFileID; 321 unsigned StartOffs = getLocationOffsetAndFileID(Start, StartFileID); 322 323 getEditBuffer(StartFileID).ReplaceText(StartOffs, OrigLength, NewStr); 324 return false; 325 } 326 327 bool Rewriter::ReplaceText(SourceRange range, SourceRange replacementRange) { 328 if (!isRewritable(range.getBegin())) return true; 329 if (!isRewritable(range.getEnd())) return true; 330 if (replacementRange.isInvalid()) return true; 331 SourceLocation start = range.getBegin(); 332 unsigned origLength = getRangeSize(range); 333 unsigned newLength = getRangeSize(replacementRange); 334 FileID FID; 335 unsigned newOffs = getLocationOffsetAndFileID(replacementRange.getBegin(), 336 FID); 337 StringRef MB = SourceMgr->getBufferData(FID); 338 return ReplaceText(start, origLength, MB.substr(newOffs, newLength)); 339 } 340 341 bool Rewriter::IncreaseIndentation(CharSourceRange range, 342 SourceLocation parentIndent) { 343 if (range.isInvalid()) return true; 344 if (!isRewritable(range.getBegin())) return true; 345 if (!isRewritable(range.getEnd())) return true; 346 if (!isRewritable(parentIndent)) return true; 347 348 FileID StartFileID, EndFileID, parentFileID; 349 unsigned StartOff, EndOff, parentOff; 350 351 StartOff = getLocationOffsetAndFileID(range.getBegin(), StartFileID); 352 EndOff = getLocationOffsetAndFileID(range.getEnd(), EndFileID); 353 parentOff = getLocationOffsetAndFileID(parentIndent, parentFileID); 354 355 if (StartFileID != EndFileID || StartFileID != parentFileID) 356 return true; 357 if (StartOff > EndOff) 358 return true; 359 360 FileID FID = StartFileID; 361 StringRef MB = SourceMgr->getBufferData(FID); 362 363 unsigned parentLineNo = SourceMgr->getLineNumber(FID, parentOff) - 1; 364 unsigned startLineNo = SourceMgr->getLineNumber(FID, StartOff) - 1; 365 unsigned endLineNo = SourceMgr->getLineNumber(FID, EndOff) - 1; 366 367 const SrcMgr::ContentCache *Content = 368 &SourceMgr->getSLocEntry(FID).getFile().getContentCache(); 369 370 // Find where the lines start. 371 unsigned parentLineOffs = Content->SourceLineCache[parentLineNo]; 372 unsigned startLineOffs = Content->SourceLineCache[startLineNo]; 373 374 // Find the whitespace at the start of each line. 375 StringRef parentSpace, startSpace; 376 { 377 unsigned i = parentLineOffs; 378 while (isWhitespaceExceptNL(MB[i])) 379 ++i; 380 parentSpace = MB.substr(parentLineOffs, i-parentLineOffs); 381 382 i = startLineOffs; 383 while (isWhitespaceExceptNL(MB[i])) 384 ++i; 385 startSpace = MB.substr(startLineOffs, i-startLineOffs); 386 } 387 if (parentSpace.size() >= startSpace.size()) 388 return true; 389 if (!startSpace.startswith(parentSpace)) 390 return true; 391 392 StringRef indent = startSpace.substr(parentSpace.size()); 393 394 // Indent the lines between start/end offsets. 395 RewriteBuffer &RB = getEditBuffer(FID); 396 for (unsigned lineNo = startLineNo; lineNo <= endLineNo; ++lineNo) { 397 unsigned offs = Content->SourceLineCache[lineNo]; 398 unsigned i = offs; 399 while (isWhitespaceExceptNL(MB[i])) 400 ++i; 401 StringRef origIndent = MB.substr(offs, i-offs); 402 if (origIndent.startswith(startSpace)) 403 RB.InsertText(offs, indent, /*InsertAfter=*/false); 404 } 405 406 return false; 407 } 408 409 bool Rewriter::overwriteChangedFiles() { 410 bool AllWritten = true; 411 auto& Diag = getSourceMgr().getDiagnostics(); 412 unsigned OverwriteFailure = Diag.getCustomDiagID( 413 DiagnosticsEngine::Error, "unable to overwrite file %0: %1"); 414 for (buffer_iterator I = buffer_begin(), E = buffer_end(); I != E; ++I) { 415 const FileEntry *Entry = getSourceMgr().getFileEntryForID(I->first); 416 if (auto Error = 417 llvm::writeToOutput(Entry->getName(), [&](llvm::raw_ostream &OS) { 418 I->second.write(OS); 419 return llvm::Error::success(); 420 })) { 421 Diag.Report(OverwriteFailure) 422 << Entry->getName() << llvm::toString(std::move(Error)); 423 AllWritten = false; 424 } 425 } 426 return !AllWritten; 427 } 428