Home | History | Annotate | Line # | Download | only in Rewrite
      1 //===- Rewriter.cpp - Code rewriting interface ----------------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 //  This file defines the Rewriter class, which is used for code
     10 //  transformations.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "clang/Rewrite/Core/Rewriter.h"
     15 #include "clang/Basic/Diagnostic.h"
     16 #include "clang/Basic/DiagnosticIDs.h"
     17 #include "clang/Basic/FileManager.h"
     18 #include "clang/Basic/SourceLocation.h"
     19 #include "clang/Basic/SourceManager.h"
     20 #include "clang/Lex/Lexer.h"
     21 #include "clang/Rewrite/Core/RewriteBuffer.h"
     22 #include "clang/Rewrite/Core/RewriteRope.h"
     23 #include "llvm/ADT/SmallString.h"
     24 #include "llvm/ADT/SmallVector.h"
     25 #include "llvm/ADT/StringRef.h"
     26 #include "llvm/Support/FileSystem.h"
     27 #include "llvm/Support/raw_ostream.h"
     28 #include <cassert>
     29 #include <iterator>
     30 #include <map>
     31 #include <memory>
     32 #include <system_error>
     33 #include <utility>
     34 
     35 using namespace clang;
     36 
     37 raw_ostream &RewriteBuffer::write(raw_ostream &os) const {
     38   // Walk RewriteRope chunks efficiently using MoveToNextPiece() instead of the
     39   // character iterator.
     40   for (RopePieceBTreeIterator I = begin(), E = end(); I != E;
     41        I.MoveToNextPiece())
     42     os << I.piece();
     43   return os;
     44 }
     45 
     46 /// Return true if this character is non-new-line whitespace:
     47 /// ' ', '\\t', '\\f', '\\v', '\\r'.
     48 static inline bool isWhitespaceExceptNL(unsigned char c) {
     49   switch (c) {
     50   case ' ':
     51   case '\t':
     52   case '\f':
     53   case '\v':
     54   case '\r':
     55     return true;
     56   default:
     57     return false;
     58   }
     59 }
     60 
     61 void RewriteBuffer::RemoveText(unsigned OrigOffset, unsigned Size,
     62                                bool removeLineIfEmpty) {
     63   // Nothing to remove, exit early.
     64   if (Size == 0) return;
     65 
     66   unsigned RealOffset = getMappedOffset(OrigOffset, true);
     67   assert(RealOffset+Size <= Buffer.size() && "Invalid location");
     68 
     69   // Remove the dead characters.
     70   Buffer.erase(RealOffset, Size);
     71 
     72   // Add a delta so that future changes are offset correctly.
     73   AddReplaceDelta(OrigOffset, -Size);
     74 
     75   if (removeLineIfEmpty) {
     76     // Find the line that the remove occurred and if it is completely empty
     77     // remove the line as well.
     78 
     79     iterator curLineStart = begin();
     80     unsigned curLineStartOffs = 0;
     81     iterator posI = begin();
     82     for (unsigned i = 0; i != RealOffset; ++i) {
     83       if (*posI == '\n') {
     84         curLineStart = posI;
     85         ++curLineStart;
     86         curLineStartOffs = i + 1;
     87       }
     88       ++posI;
     89     }
     90 
     91     unsigned lineSize = 0;
     92     posI = curLineStart;
     93     while (posI != end() && isWhitespaceExceptNL(*posI)) {
     94       ++posI;
     95       ++lineSize;
     96     }
     97     if (posI != end() && *posI == '\n') {
     98       Buffer.erase(curLineStartOffs, lineSize + 1/* + '\n'*/);
     99       // FIXME: Here, the offset of the start of the line is supposed to be
    100       // expressed in terms of the original input not the "real" rewrite
    101       // buffer.  How do we compute that reliably?  It might be tempting to use
    102       // curLineStartOffs + OrigOffset - RealOffset, but that assumes the
    103       // difference between the original and real offset is the same at the
    104       // removed text and at the start of the line, but that's not true if
    105       // edits were previously made earlier on the line.  This bug is also
    106       // documented by a FIXME on the definition of
    107       // clang::Rewriter::RewriteOptions::RemoveLineIfEmpty.  A reproducer for
    108       // the implementation below is the test RemoveLineIfEmpty in
    109       // clang/unittests/Rewrite/RewriteBufferTest.cpp.
    110       AddReplaceDelta(curLineStartOffs, -(lineSize + 1/* + '\n'*/));
    111     }
    112   }
    113 }
    114 
    115 void RewriteBuffer::InsertText(unsigned OrigOffset, StringRef Str,
    116                                bool InsertAfter) {
    117   // Nothing to insert, exit early.
    118   if (Str.empty()) return;
    119 
    120   unsigned RealOffset = getMappedOffset(OrigOffset, InsertAfter);
    121   Buffer.insert(RealOffset, Str.begin(), Str.end());
    122 
    123   // Add a delta so that future changes are offset correctly.
    124   AddInsertDelta(OrigOffset, Str.size());
    125 }
    126 
    127 /// ReplaceText - This method replaces a range of characters in the input
    128 /// buffer with a new string.  This is effectively a combined "remove+insert"
    129 /// operation.
    130 void RewriteBuffer::ReplaceText(unsigned OrigOffset, unsigned OrigLength,
    131                                 StringRef NewStr) {
    132   unsigned RealOffset = getMappedOffset(OrigOffset, true);
    133   Buffer.erase(RealOffset, OrigLength);
    134   Buffer.insert(RealOffset, NewStr.begin(), NewStr.end());
    135   if (OrigLength != NewStr.size())
    136     AddReplaceDelta(OrigOffset, NewStr.size() - OrigLength);
    137 }
    138 
    139 //===----------------------------------------------------------------------===//
    140 // Rewriter class
    141 //===----------------------------------------------------------------------===//
    142 
    143 /// getRangeSize - Return the size in bytes of the specified range if they
    144 /// are in the same file.  If not, this returns -1.
    145 int Rewriter::getRangeSize(const CharSourceRange &Range,
    146                            RewriteOptions opts) const {
    147   if (!isRewritable(Range.getBegin()) ||
    148       !isRewritable(Range.getEnd())) return -1;
    149 
    150   FileID StartFileID, EndFileID;
    151   unsigned StartOff = getLocationOffsetAndFileID(Range.getBegin(), StartFileID);
    152   unsigned EndOff = getLocationOffsetAndFileID(Range.getEnd(), EndFileID);
    153 
    154   if (StartFileID != EndFileID)
    155     return -1;
    156 
    157   // If edits have been made to this buffer, the delta between the range may
    158   // have changed.
    159   std::map<FileID, RewriteBuffer>::const_iterator I =
    160     RewriteBuffers.find(StartFileID);
    161   if (I != RewriteBuffers.end()) {
    162     const RewriteBuffer &RB = I->second;
    163     EndOff = RB.getMappedOffset(EndOff, opts.IncludeInsertsAtEndOfRange);
    164     StartOff = RB.getMappedOffset(StartOff, !opts.IncludeInsertsAtBeginOfRange);
    165   }
    166 
    167   // Adjust the end offset to the end of the last token, instead of being the
    168   // start of the last token if this is a token range.
    169   if (Range.isTokenRange())
    170     EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts);
    171 
    172   return EndOff-StartOff;
    173 }
    174 
    175 int Rewriter::getRangeSize(SourceRange Range, RewriteOptions opts) const {
    176   return getRangeSize(CharSourceRange::getTokenRange(Range), opts);
    177 }
    178 
    179 /// getRewrittenText - Return the rewritten form of the text in the specified
    180 /// range.  If the start or end of the range was unrewritable or if they are
    181 /// in different buffers, this returns an empty string.
    182 ///
    183 /// Note that this method is not particularly efficient.
    184 std::string Rewriter::getRewrittenText(CharSourceRange Range) const {
    185   if (!isRewritable(Range.getBegin()) ||
    186       !isRewritable(Range.getEnd()))
    187     return {};
    188 
    189   FileID StartFileID, EndFileID;
    190   unsigned StartOff, EndOff;
    191   StartOff = getLocationOffsetAndFileID(Range.getBegin(), StartFileID);
    192   EndOff   = getLocationOffsetAndFileID(Range.getEnd(), EndFileID);
    193 
    194   if (StartFileID != EndFileID)
    195     return {}; // Start and end in different buffers.
    196 
    197   // If edits have been made to this buffer, the delta between the range may
    198   // have changed.
    199   std::map<FileID, RewriteBuffer>::const_iterator I =
    200     RewriteBuffers.find(StartFileID);
    201   if (I == RewriteBuffers.end()) {
    202     // If the buffer hasn't been rewritten, just return the text from the input.
    203     const char *Ptr = SourceMgr->getCharacterData(Range.getBegin());
    204 
    205     // Adjust the end offset to the end of the last token, instead of being the
    206     // start of the last token.
    207     if (Range.isTokenRange())
    208       EndOff +=
    209           Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts);
    210     return std::string(Ptr, Ptr+EndOff-StartOff);
    211   }
    212 
    213   const RewriteBuffer &RB = I->second;
    214   EndOff = RB.getMappedOffset(EndOff, true);
    215   StartOff = RB.getMappedOffset(StartOff);
    216 
    217   // Adjust the end offset to the end of the last token, instead of being the
    218   // start of the last token.
    219   if (Range.isTokenRange())
    220     EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts);
    221 
    222   // Advance the iterators to the right spot, yay for linear time algorithms.
    223   RewriteBuffer::iterator Start = RB.begin();
    224   std::advance(Start, StartOff);
    225   RewriteBuffer::iterator End = Start;
    226   std::advance(End, EndOff-StartOff);
    227 
    228   return std::string(Start, End);
    229 }
    230 
    231 unsigned Rewriter::getLocationOffsetAndFileID(SourceLocation Loc,
    232                                               FileID &FID) const {
    233   assert(Loc.isValid() && "Invalid location");
    234   std::pair<FileID, unsigned> V = SourceMgr->getDecomposedLoc(Loc);
    235   FID = V.first;
    236   return V.second;
    237 }
    238 
    239 /// getEditBuffer - Get or create a RewriteBuffer for the specified FileID.
    240 RewriteBuffer &Rewriter::getEditBuffer(FileID FID) {
    241   std::map<FileID, RewriteBuffer>::iterator I =
    242     RewriteBuffers.lower_bound(FID);
    243   if (I != RewriteBuffers.end() && I->first == FID)
    244     return I->second;
    245   I = RewriteBuffers.insert(I, std::make_pair(FID, RewriteBuffer()));
    246 
    247   StringRef MB = SourceMgr->getBufferData(FID);
    248   I->second.Initialize(MB.begin(), MB.end());
    249 
    250   return I->second;
    251 }
    252 
    253 /// InsertText - Insert the specified string at the specified location in the
    254 /// original buffer.
    255 bool Rewriter::InsertText(SourceLocation Loc, StringRef Str,
    256                           bool InsertAfter, bool indentNewLines) {
    257   if (!isRewritable(Loc)) return true;
    258   FileID FID;
    259   unsigned StartOffs = getLocationOffsetAndFileID(Loc, FID);
    260 
    261   SmallString<128> indentedStr;
    262   if (indentNewLines && Str.find('\n') != StringRef::npos) {
    263     StringRef MB = SourceMgr->getBufferData(FID);
    264 
    265     unsigned lineNo = SourceMgr->getLineNumber(FID, StartOffs) - 1;
    266     const SrcMgr::ContentCache *Content =
    267         &SourceMgr->getSLocEntry(FID).getFile().getContentCache();
    268     unsigned lineOffs = Content->SourceLineCache[lineNo];
    269 
    270     // Find the whitespace at the start of the line.
    271     StringRef indentSpace;
    272     {
    273       unsigned i = lineOffs;
    274       while (isWhitespaceExceptNL(MB[i]))
    275         ++i;
    276       indentSpace = MB.substr(lineOffs, i-lineOffs);
    277     }
    278 
    279     SmallVector<StringRef, 4> lines;
    280     Str.split(lines, "\n");
    281 
    282     for (unsigned i = 0, e = lines.size(); i != e; ++i) {
    283       indentedStr += lines[i];
    284       if (i < e-1) {
    285         indentedStr += '\n';
    286         indentedStr += indentSpace;
    287       }
    288     }
    289     Str = indentedStr.str();
    290   }
    291 
    292   getEditBuffer(FID).InsertText(StartOffs, Str, InsertAfter);
    293   return false;
    294 }
    295 
    296 bool Rewriter::InsertTextAfterToken(SourceLocation Loc, StringRef Str) {
    297   if (!isRewritable(Loc)) return true;
    298   FileID FID;
    299   unsigned StartOffs = getLocationOffsetAndFileID(Loc, FID);
    300   RewriteOptions rangeOpts;
    301   rangeOpts.IncludeInsertsAtBeginOfRange = false;
    302   StartOffs += getRangeSize(SourceRange(Loc, Loc), rangeOpts);
    303   getEditBuffer(FID).InsertText(StartOffs, Str, /*InsertAfter*/true);
    304   return false;
    305 }
    306 
    307 /// RemoveText - Remove the specified text region.
    308 bool Rewriter::RemoveText(SourceLocation Start, unsigned Length,
    309                           RewriteOptions opts) {
    310   if (!isRewritable(Start)) return true;
    311   FileID FID;
    312   unsigned StartOffs = getLocationOffsetAndFileID(Start, FID);
    313   getEditBuffer(FID).RemoveText(StartOffs, Length, opts.RemoveLineIfEmpty);
    314   return false;
    315 }
    316 
    317 /// ReplaceText - This method replaces a range of characters in the input
    318 /// buffer with a new string.  This is effectively a combined "remove/insert"
    319 /// operation.
    320 bool Rewriter::ReplaceText(SourceLocation Start, unsigned OrigLength,
    321                            StringRef NewStr) {
    322   if (!isRewritable(Start)) return true;
    323   FileID StartFileID;
    324   unsigned StartOffs = getLocationOffsetAndFileID(Start, StartFileID);
    325 
    326   getEditBuffer(StartFileID).ReplaceText(StartOffs, OrigLength, NewStr);
    327   return false;
    328 }
    329 
    330 bool Rewriter::ReplaceText(SourceRange range, SourceRange replacementRange) {
    331   if (!isRewritable(range.getBegin())) return true;
    332   if (!isRewritable(range.getEnd())) return true;
    333   if (replacementRange.isInvalid()) return true;
    334   SourceLocation start = range.getBegin();
    335   unsigned origLength = getRangeSize(range);
    336   unsigned newLength = getRangeSize(replacementRange);
    337   FileID FID;
    338   unsigned newOffs = getLocationOffsetAndFileID(replacementRange.getBegin(),
    339                                                 FID);
    340   StringRef MB = SourceMgr->getBufferData(FID);
    341   return ReplaceText(start, origLength, MB.substr(newOffs, newLength));
    342 }
    343 
    344 bool Rewriter::IncreaseIndentation(CharSourceRange range,
    345                                    SourceLocation parentIndent) {
    346   if (range.isInvalid()) return true;
    347   if (!isRewritable(range.getBegin())) return true;
    348   if (!isRewritable(range.getEnd())) return true;
    349   if (!isRewritable(parentIndent)) return true;
    350 
    351   FileID StartFileID, EndFileID, parentFileID;
    352   unsigned StartOff, EndOff, parentOff;
    353 
    354   StartOff = getLocationOffsetAndFileID(range.getBegin(), StartFileID);
    355   EndOff   = getLocationOffsetAndFileID(range.getEnd(), EndFileID);
    356   parentOff = getLocationOffsetAndFileID(parentIndent, parentFileID);
    357 
    358   if (StartFileID != EndFileID || StartFileID != parentFileID)
    359     return true;
    360   if (StartOff > EndOff)
    361     return true;
    362 
    363   FileID FID = StartFileID;
    364   StringRef MB = SourceMgr->getBufferData(FID);
    365 
    366   unsigned parentLineNo = SourceMgr->getLineNumber(FID, parentOff) - 1;
    367   unsigned startLineNo = SourceMgr->getLineNumber(FID, StartOff) - 1;
    368   unsigned endLineNo = SourceMgr->getLineNumber(FID, EndOff) - 1;
    369 
    370   const SrcMgr::ContentCache *Content =
    371       &SourceMgr->getSLocEntry(FID).getFile().getContentCache();
    372 
    373   // Find where the lines start.
    374   unsigned parentLineOffs = Content->SourceLineCache[parentLineNo];
    375   unsigned startLineOffs = Content->SourceLineCache[startLineNo];
    376 
    377   // Find the whitespace at the start of each line.
    378   StringRef parentSpace, startSpace;
    379   {
    380     unsigned i = parentLineOffs;
    381     while (isWhitespaceExceptNL(MB[i]))
    382       ++i;
    383     parentSpace = MB.substr(parentLineOffs, i-parentLineOffs);
    384 
    385     i = startLineOffs;
    386     while (isWhitespaceExceptNL(MB[i]))
    387       ++i;
    388     startSpace = MB.substr(startLineOffs, i-startLineOffs);
    389   }
    390   if (parentSpace.size() >= startSpace.size())
    391     return true;
    392   if (!startSpace.startswith(parentSpace))
    393     return true;
    394 
    395   StringRef indent = startSpace.substr(parentSpace.size());
    396 
    397   // Indent the lines between start/end offsets.
    398   RewriteBuffer &RB = getEditBuffer(FID);
    399   for (unsigned lineNo = startLineNo; lineNo <= endLineNo; ++lineNo) {
    400     unsigned offs = Content->SourceLineCache[lineNo];
    401     unsigned i = offs;
    402     while (isWhitespaceExceptNL(MB[i]))
    403       ++i;
    404     StringRef origIndent = MB.substr(offs, i-offs);
    405     if (origIndent.startswith(startSpace))
    406       RB.InsertText(offs, indent, /*InsertAfter=*/false);
    407   }
    408 
    409   return false;
    410 }
    411 
    412 namespace {
    413 
    414 // A wrapper for a file stream that atomically overwrites the target.
    415 //
    416 // Creates a file output stream for a temporary file in the constructor,
    417 // which is later accessible via getStream() if ok() return true.
    418 // Flushes the stream and moves the temporary file to the target location
    419 // in the destructor.
    420 class AtomicallyMovedFile {
    421 public:
    422   AtomicallyMovedFile(DiagnosticsEngine &Diagnostics, StringRef Filename,
    423                       bool &AllWritten)
    424       : Diagnostics(Diagnostics), Filename(Filename), AllWritten(AllWritten) {
    425     TempFilename = Filename;
    426     TempFilename += "-%%%%%%%%";
    427     int FD;
    428     if (llvm::sys::fs::createUniqueFile(TempFilename, FD, TempFilename)) {
    429       AllWritten = false;
    430       Diagnostics.Report(clang::diag::err_unable_to_make_temp)
    431         << TempFilename;
    432     } else {
    433       FileStream.reset(new llvm::raw_fd_ostream(FD, /*shouldClose=*/true));
    434     }
    435   }
    436 
    437   ~AtomicallyMovedFile() {
    438     if (!ok()) return;
    439 
    440     // Close (will also flush) theFileStream.
    441     FileStream->close();
    442     if (std::error_code ec = llvm::sys::fs::rename(TempFilename, Filename)) {
    443       AllWritten = false;
    444       Diagnostics.Report(clang::diag::err_unable_to_rename_temp)
    445         << TempFilename << Filename << ec.message();
    446       // If the remove fails, there's not a lot we can do - this is already an
    447       // error.
    448       llvm::sys::fs::remove(TempFilename);
    449     }
    450   }
    451 
    452   bool ok() { return (bool)FileStream; }
    453   raw_ostream &getStream() { return *FileStream; }
    454 
    455 private:
    456   DiagnosticsEngine &Diagnostics;
    457   StringRef Filename;
    458   SmallString<128> TempFilename;
    459   std::unique_ptr<llvm::raw_fd_ostream> FileStream;
    460   bool &AllWritten;
    461 };
    462 
    463 } // namespace
    464 
    465 bool Rewriter::overwriteChangedFiles() {
    466   bool AllWritten = true;
    467   for (buffer_iterator I = buffer_begin(), E = buffer_end(); I != E; ++I) {
    468     const FileEntry *Entry =
    469         getSourceMgr().getFileEntryForID(I->first);
    470     AtomicallyMovedFile File(getSourceMgr().getDiagnostics(), Entry->getName(),
    471                              AllWritten);
    472     if (File.ok()) {
    473       I->second.write(File.getStream());
    474     }
    475   }
    476   return !AllWritten;
    477 }
    478