/src/llvm-project/clang/lib/Rewrite/Rewriter.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- Rewriter.cpp - Code rewriting interface ----------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file defines the Rewriter class, which is used for code |
10 | | // transformations. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "clang/Rewrite/Core/Rewriter.h" |
15 | | #include "clang/Basic/Diagnostic.h" |
16 | | #include "clang/Basic/DiagnosticIDs.h" |
17 | | #include "clang/Basic/SourceLocation.h" |
18 | | #include "clang/Basic/SourceManager.h" |
19 | | #include "clang/Lex/Lexer.h" |
20 | | #include "clang/Rewrite/Core/RewriteBuffer.h" |
21 | | #include "clang/Rewrite/Core/RewriteRope.h" |
22 | | #include "llvm/ADT/SmallVector.h" |
23 | | #include "llvm/ADT/StringRef.h" |
24 | | #include "llvm/Support/Error.h" |
25 | | #include "llvm/Support/raw_ostream.h" |
26 | | #include <cassert> |
27 | | #include <iterator> |
28 | | #include <map> |
29 | | #include <utility> |
30 | | |
31 | | using namespace clang; |
32 | | |
33 | 544 | raw_ostream &RewriteBuffer::write(raw_ostream &os) const { |
34 | | // Walk RewriteRope chunks efficiently using MoveToNextPiece() instead of the |
35 | | // character iterator. |
36 | 3.57M | for (RopePieceBTreeIterator I = begin(), E = end(); I != E; |
37 | 3.57M | I.MoveToNextPiece()) |
38 | 3.57M | os << I.piece(); |
39 | 544 | return os; |
40 | 544 | } |
41 | | |
42 | | /// Return true if this character is non-new-line whitespace: |
43 | | /// ' ', '\\t', '\\f', '\\v', '\\r'. |
44 | 0 | static inline bool isWhitespaceExceptNL(unsigned char c) { |
45 | 0 | switch (c) { |
46 | 0 | case ' ': |
47 | 0 | case '\t': |
48 | 0 | case '\f': |
49 | 0 | case '\v': |
50 | 0 | case '\r': |
51 | 0 | return true; |
52 | 0 | default: |
53 | 0 | return false; |
54 | 0 | } |
55 | 0 | } |
56 | | |
57 | | void RewriteBuffer::RemoveText(unsigned OrigOffset, unsigned Size, |
58 | 0 | bool removeLineIfEmpty) { |
59 | | // Nothing to remove, exit early. |
60 | 0 | if (Size == 0) return; |
61 | | |
62 | 0 | unsigned RealOffset = getMappedOffset(OrigOffset, true); |
63 | 0 | assert(RealOffset+Size <= Buffer.size() && "Invalid location"); |
64 | | |
65 | | // Remove the dead characters. |
66 | 0 | Buffer.erase(RealOffset, Size); |
67 | | |
68 | | // Add a delta so that future changes are offset correctly. |
69 | 0 | AddReplaceDelta(OrigOffset, -Size); |
70 | |
|
71 | 0 | if (removeLineIfEmpty) { |
72 | | // Find the line that the remove occurred and if it is completely empty |
73 | | // remove the line as well. |
74 | |
|
75 | 0 | iterator curLineStart = begin(); |
76 | 0 | unsigned curLineStartOffs = 0; |
77 | 0 | iterator posI = begin(); |
78 | 0 | for (unsigned i = 0; i != RealOffset; ++i) { |
79 | 0 | if (*posI == '\n') { |
80 | 0 | curLineStart = posI; |
81 | 0 | ++curLineStart; |
82 | 0 | curLineStartOffs = i + 1; |
83 | 0 | } |
84 | 0 | ++posI; |
85 | 0 | } |
86 | |
|
87 | 0 | unsigned lineSize = 0; |
88 | 0 | posI = curLineStart; |
89 | 0 | while (posI != end() && isWhitespaceExceptNL(*posI)) { |
90 | 0 | ++posI; |
91 | 0 | ++lineSize; |
92 | 0 | } |
93 | 0 | if (posI != end() && *posI == '\n') { |
94 | 0 | Buffer.erase(curLineStartOffs, lineSize + 1/* + '\n'*/); |
95 | | // FIXME: Here, the offset of the start of the line is supposed to be |
96 | | // expressed in terms of the original input not the "real" rewrite |
97 | | // buffer. How do we compute that reliably? It might be tempting to use |
98 | | // curLineStartOffs + OrigOffset - RealOffset, but that assumes the |
99 | | // difference between the original and real offset is the same at the |
100 | | // removed text and at the start of the line, but that's not true if |
101 | | // edits were previously made earlier on the line. This bug is also |
102 | | // documented by a FIXME on the definition of |
103 | | // clang::Rewriter::RewriteOptions::RemoveLineIfEmpty. A reproducer for |
104 | | // the implementation below is the test RemoveLineIfEmpty in |
105 | | // clang/unittests/Rewrite/RewriteBufferTest.cpp. |
106 | 0 | AddReplaceDelta(curLineStartOffs, -(lineSize + 1/* + '\n'*/)); |
107 | 0 | } |
108 | 0 | } |
109 | 0 | } |
110 | | |
111 | | void RewriteBuffer::InsertText(unsigned OrigOffset, StringRef Str, |
112 | 0 | bool InsertAfter) { |
113 | | // Nothing to insert, exit early. |
114 | 0 | if (Str.empty()) return; |
115 | | |
116 | 0 | unsigned RealOffset = getMappedOffset(OrigOffset, InsertAfter); |
117 | 0 | Buffer.insert(RealOffset, Str.begin(), Str.end()); |
118 | | |
119 | | // Add a delta so that future changes are offset correctly. |
120 | 0 | AddInsertDelta(OrigOffset, Str.size()); |
121 | 0 | } |
122 | | |
123 | | /// ReplaceText - This method replaces a range of characters in the input |
124 | | /// buffer with a new string. This is effectively a combined "remove+insert" |
125 | | /// operation. |
126 | | void RewriteBuffer::ReplaceText(unsigned OrigOffset, unsigned OrigLength, |
127 | 1.85M | StringRef NewStr) { |
128 | 1.85M | unsigned RealOffset = getMappedOffset(OrigOffset, true); |
129 | 1.85M | Buffer.erase(RealOffset, OrigLength); |
130 | 1.85M | Buffer.insert(RealOffset, NewStr.begin(), NewStr.end()); |
131 | 1.85M | if (OrigLength != NewStr.size()) |
132 | 1.76M | AddReplaceDelta(OrigOffset, NewStr.size() - OrigLength); |
133 | 1.85M | } |
134 | | |
135 | | //===----------------------------------------------------------------------===// |
136 | | // Rewriter class |
137 | | //===----------------------------------------------------------------------===// |
138 | | |
139 | | /// getRangeSize - Return the size in bytes of the specified range if they |
140 | | /// are in the same file. If not, this returns -1. |
141 | | int Rewriter::getRangeSize(const CharSourceRange &Range, |
142 | 0 | RewriteOptions opts) const { |
143 | 0 | if (!isRewritable(Range.getBegin()) || |
144 | 0 | !isRewritable(Range.getEnd())) return -1; |
145 | | |
146 | 0 | FileID StartFileID, EndFileID; |
147 | 0 | unsigned StartOff = getLocationOffsetAndFileID(Range.getBegin(), StartFileID); |
148 | 0 | unsigned EndOff = getLocationOffsetAndFileID(Range.getEnd(), EndFileID); |
149 | |
|
150 | 0 | if (StartFileID != EndFileID) |
151 | 0 | return -1; |
152 | | |
153 | | // If edits have been made to this buffer, the delta between the range may |
154 | | // have changed. |
155 | 0 | std::map<FileID, RewriteBuffer>::const_iterator I = |
156 | 0 | RewriteBuffers.find(StartFileID); |
157 | 0 | if (I != RewriteBuffers.end()) { |
158 | 0 | const RewriteBuffer &RB = I->second; |
159 | 0 | EndOff = RB.getMappedOffset(EndOff, opts.IncludeInsertsAtEndOfRange); |
160 | 0 | StartOff = RB.getMappedOffset(StartOff, !opts.IncludeInsertsAtBeginOfRange); |
161 | 0 | } |
162 | | |
163 | | // Adjust the end offset to the end of the last token, instead of being the |
164 | | // start of the last token if this is a token range. |
165 | 0 | if (Range.isTokenRange()) |
166 | 0 | EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts); |
167 | |
|
168 | 0 | return EndOff-StartOff; |
169 | 0 | } |
170 | | |
171 | 0 | int Rewriter::getRangeSize(SourceRange Range, RewriteOptions opts) const { |
172 | 0 | return getRangeSize(CharSourceRange::getTokenRange(Range), opts); |
173 | 0 | } |
174 | | |
175 | | /// getRewrittenText - Return the rewritten form of the text in the specified |
176 | | /// range. If the start or end of the range was unrewritable or if they are |
177 | | /// in different buffers, this returns an empty string. |
178 | | /// |
179 | | /// Note that this method is not particularly efficient. |
180 | 0 | std::string Rewriter::getRewrittenText(CharSourceRange Range) const { |
181 | 0 | if (!isRewritable(Range.getBegin()) || |
182 | 0 | !isRewritable(Range.getEnd())) |
183 | 0 | return {}; |
184 | | |
185 | 0 | FileID StartFileID, EndFileID; |
186 | 0 | unsigned StartOff, EndOff; |
187 | 0 | StartOff = getLocationOffsetAndFileID(Range.getBegin(), StartFileID); |
188 | 0 | EndOff = getLocationOffsetAndFileID(Range.getEnd(), EndFileID); |
189 | |
|
190 | 0 | if (StartFileID != EndFileID) |
191 | 0 | return {}; // Start and end in different buffers. |
192 | | |
193 | | // If edits have been made to this buffer, the delta between the range may |
194 | | // have changed. |
195 | 0 | std::map<FileID, RewriteBuffer>::const_iterator I = |
196 | 0 | RewriteBuffers.find(StartFileID); |
197 | 0 | if (I == RewriteBuffers.end()) { |
198 | | // If the buffer hasn't been rewritten, just return the text from the input. |
199 | 0 | const char *Ptr = SourceMgr->getCharacterData(Range.getBegin()); |
200 | | |
201 | | // Adjust the end offset to the end of the last token, instead of being the |
202 | | // start of the last token. |
203 | 0 | if (Range.isTokenRange()) |
204 | 0 | EndOff += |
205 | 0 | Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts); |
206 | 0 | return std::string(Ptr, Ptr+EndOff-StartOff); |
207 | 0 | } |
208 | | |
209 | 0 | const RewriteBuffer &RB = I->second; |
210 | 0 | EndOff = RB.getMappedOffset(EndOff, true); |
211 | 0 | StartOff = RB.getMappedOffset(StartOff); |
212 | | |
213 | | // Adjust the end offset to the end of the last token, instead of being the |
214 | | // start of the last token. |
215 | 0 | if (Range.isTokenRange()) |
216 | 0 | EndOff += Lexer::MeasureTokenLength(Range.getEnd(), *SourceMgr, *LangOpts); |
217 | | |
218 | | // Advance the iterators to the right spot, yay for linear time algorithms. |
219 | 0 | RewriteBuffer::iterator Start = RB.begin(); |
220 | 0 | std::advance(Start, StartOff); |
221 | 0 | RewriteBuffer::iterator End = Start; |
222 | 0 | assert(EndOff >= StartOff && "Invalid iteration distance"); |
223 | 0 | std::advance(End, EndOff-StartOff); |
224 | |
|
225 | 0 | return std::string(Start, End); |
226 | 0 | } |
227 | | |
228 | | unsigned Rewriter::getLocationOffsetAndFileID(SourceLocation Loc, |
229 | 1.85M | FileID &FID) const { |
230 | 1.85M | assert(Loc.isValid() && "Invalid location"); |
231 | 0 | std::pair<FileID, unsigned> V = SourceMgr->getDecomposedLoc(Loc); |
232 | 1.85M | FID = V.first; |
233 | 1.85M | return V.second; |
234 | 1.85M | } |
235 | | |
236 | | /// getEditBuffer - Get or create a RewriteBuffer for the specified FileID. |
237 | 1.85M | RewriteBuffer &Rewriter::getEditBuffer(FileID FID) { |
238 | 1.85M | std::map<FileID, RewriteBuffer>::iterator I = |
239 | 1.85M | RewriteBuffers.lower_bound(FID); |
240 | 1.85M | if (I != RewriteBuffers.end() && I->first == FID) |
241 | 1.85M | return I->second; |
242 | 544 | I = RewriteBuffers.insert(I, std::make_pair(FID, RewriteBuffer())); |
243 | | |
244 | 544 | StringRef MB = SourceMgr->getBufferData(FID); |
245 | 544 | I->second.Initialize(MB.begin(), MB.end()); |
246 | | |
247 | 544 | return I->second; |
248 | 1.85M | } |
249 | | |
250 | | /// InsertText - Insert the specified string at the specified location in the |
251 | | /// original buffer. |
252 | | bool Rewriter::InsertText(SourceLocation Loc, StringRef Str, |
253 | 0 | bool InsertAfter, bool indentNewLines) { |
254 | 0 | if (!isRewritable(Loc)) return true; |
255 | 0 | FileID FID; |
256 | 0 | unsigned StartOffs = getLocationOffsetAndFileID(Loc, FID); |
257 | |
|
258 | 0 | SmallString<128> indentedStr; |
259 | 0 | if (indentNewLines && Str.contains('\n')) { |
260 | 0 | StringRef MB = SourceMgr->getBufferData(FID); |
261 | |
|
262 | 0 | unsigned lineNo = SourceMgr->getLineNumber(FID, StartOffs) - 1; |
263 | 0 | const SrcMgr::ContentCache *Content = |
264 | 0 | &SourceMgr->getSLocEntry(FID).getFile().getContentCache(); |
265 | 0 | unsigned lineOffs = Content->SourceLineCache[lineNo]; |
266 | | |
267 | | // Find the whitespace at the start of the line. |
268 | 0 | StringRef indentSpace; |
269 | 0 | { |
270 | 0 | unsigned i = lineOffs; |
271 | 0 | while (isWhitespaceExceptNL(MB[i])) |
272 | 0 | ++i; |
273 | 0 | indentSpace = MB.substr(lineOffs, i-lineOffs); |
274 | 0 | } |
275 | |
|
276 | 0 | SmallVector<StringRef, 4> lines; |
277 | 0 | Str.split(lines, "\n"); |
278 | |
|
279 | 0 | for (unsigned i = 0, e = lines.size(); i != e; ++i) { |
280 | 0 | indentedStr += lines[i]; |
281 | 0 | if (i < e-1) { |
282 | 0 | indentedStr += '\n'; |
283 | 0 | indentedStr += indentSpace; |
284 | 0 | } |
285 | 0 | } |
286 | 0 | Str = indentedStr.str(); |
287 | 0 | } |
288 | |
|
289 | 0 | getEditBuffer(FID).InsertText(StartOffs, Str, InsertAfter); |
290 | 0 | return false; |
291 | 0 | } |
292 | | |
293 | 0 | bool Rewriter::InsertTextAfterToken(SourceLocation Loc, StringRef Str) { |
294 | 0 | if (!isRewritable(Loc)) return true; |
295 | 0 | FileID FID; |
296 | 0 | unsigned StartOffs = getLocationOffsetAndFileID(Loc, FID); |
297 | 0 | RewriteOptions rangeOpts; |
298 | 0 | rangeOpts.IncludeInsertsAtBeginOfRange = false; |
299 | 0 | StartOffs += getRangeSize(SourceRange(Loc, Loc), rangeOpts); |
300 | 0 | getEditBuffer(FID).InsertText(StartOffs, Str, /*InsertAfter*/true); |
301 | 0 | return false; |
302 | 0 | } |
303 | | |
304 | | /// RemoveText - Remove the specified text region. |
305 | | bool Rewriter::RemoveText(SourceLocation Start, unsigned Length, |
306 | 0 | RewriteOptions opts) { |
307 | 0 | if (!isRewritable(Start)) return true; |
308 | 0 | FileID FID; |
309 | 0 | unsigned StartOffs = getLocationOffsetAndFileID(Start, FID); |
310 | 0 | getEditBuffer(FID).RemoveText(StartOffs, Length, opts.RemoveLineIfEmpty); |
311 | 0 | return false; |
312 | 0 | } |
313 | | |
314 | | /// ReplaceText - This method replaces a range of characters in the input |
315 | | /// buffer with a new string. This is effectively a combined "remove/insert" |
316 | | /// operation. |
317 | | bool Rewriter::ReplaceText(SourceLocation Start, unsigned OrigLength, |
318 | 1.85M | StringRef NewStr) { |
319 | 1.85M | if (!isRewritable(Start)) return true; |
320 | 1.85M | FileID StartFileID; |
321 | 1.85M | unsigned StartOffs = getLocationOffsetAndFileID(Start, StartFileID); |
322 | | |
323 | 1.85M | getEditBuffer(StartFileID).ReplaceText(StartOffs, OrigLength, NewStr); |
324 | 1.85M | return false; |
325 | 1.85M | } |
326 | | |
327 | 0 | bool Rewriter::ReplaceText(SourceRange range, SourceRange replacementRange) { |
328 | 0 | if (!isRewritable(range.getBegin())) return true; |
329 | 0 | if (!isRewritable(range.getEnd())) return true; |
330 | 0 | if (replacementRange.isInvalid()) return true; |
331 | 0 | SourceLocation start = range.getBegin(); |
332 | 0 | unsigned origLength = getRangeSize(range); |
333 | 0 | unsigned newLength = getRangeSize(replacementRange); |
334 | 0 | FileID FID; |
335 | 0 | unsigned newOffs = getLocationOffsetAndFileID(replacementRange.getBegin(), |
336 | 0 | FID); |
337 | 0 | StringRef MB = SourceMgr->getBufferData(FID); |
338 | 0 | return ReplaceText(start, origLength, MB.substr(newOffs, newLength)); |
339 | 0 | } |
340 | | |
341 | | bool Rewriter::IncreaseIndentation(CharSourceRange range, |
342 | 0 | SourceLocation parentIndent) { |
343 | 0 | if (range.isInvalid()) return true; |
344 | 0 | if (!isRewritable(range.getBegin())) return true; |
345 | 0 | if (!isRewritable(range.getEnd())) return true; |
346 | 0 | if (!isRewritable(parentIndent)) return true; |
347 | | |
348 | 0 | FileID StartFileID, EndFileID, parentFileID; |
349 | 0 | unsigned StartOff, EndOff, parentOff; |
350 | |
|
351 | 0 | StartOff = getLocationOffsetAndFileID(range.getBegin(), StartFileID); |
352 | 0 | EndOff = getLocationOffsetAndFileID(range.getEnd(), EndFileID); |
353 | 0 | parentOff = getLocationOffsetAndFileID(parentIndent, parentFileID); |
354 | |
|
355 | 0 | if (StartFileID != EndFileID || StartFileID != parentFileID) |
356 | 0 | return true; |
357 | 0 | if (StartOff > EndOff) |
358 | 0 | return true; |
359 | | |
360 | 0 | FileID FID = StartFileID; |
361 | 0 | StringRef MB = SourceMgr->getBufferData(FID); |
362 | |
|
363 | 0 | unsigned parentLineNo = SourceMgr->getLineNumber(FID, parentOff) - 1; |
364 | 0 | unsigned startLineNo = SourceMgr->getLineNumber(FID, StartOff) - 1; |
365 | 0 | unsigned endLineNo = SourceMgr->getLineNumber(FID, EndOff) - 1; |
366 | |
|
367 | 0 | const SrcMgr::ContentCache *Content = |
368 | 0 | &SourceMgr->getSLocEntry(FID).getFile().getContentCache(); |
369 | | |
370 | | // Find where the lines start. |
371 | 0 | unsigned parentLineOffs = Content->SourceLineCache[parentLineNo]; |
372 | 0 | unsigned startLineOffs = Content->SourceLineCache[startLineNo]; |
373 | | |
374 | | // Find the whitespace at the start of each line. |
375 | 0 | StringRef parentSpace, startSpace; |
376 | 0 | { |
377 | 0 | unsigned i = parentLineOffs; |
378 | 0 | while (isWhitespaceExceptNL(MB[i])) |
379 | 0 | ++i; |
380 | 0 | parentSpace = MB.substr(parentLineOffs, i-parentLineOffs); |
381 | |
|
382 | 0 | i = startLineOffs; |
383 | 0 | while (isWhitespaceExceptNL(MB[i])) |
384 | 0 | ++i; |
385 | 0 | startSpace = MB.substr(startLineOffs, i-startLineOffs); |
386 | 0 | } |
387 | 0 | if (parentSpace.size() >= startSpace.size()) |
388 | 0 | return true; |
389 | 0 | if (!startSpace.starts_with(parentSpace)) |
390 | 0 | return true; |
391 | | |
392 | 0 | StringRef indent = startSpace.substr(parentSpace.size()); |
393 | | |
394 | | // Indent the lines between start/end offsets. |
395 | 0 | RewriteBuffer &RB = getEditBuffer(FID); |
396 | 0 | for (unsigned lineNo = startLineNo; lineNo <= endLineNo; ++lineNo) { |
397 | 0 | unsigned offs = Content->SourceLineCache[lineNo]; |
398 | 0 | unsigned i = offs; |
399 | 0 | while (isWhitespaceExceptNL(MB[i])) |
400 | 0 | ++i; |
401 | 0 | StringRef origIndent = MB.substr(offs, i-offs); |
402 | 0 | if (origIndent.starts_with(startSpace)) |
403 | 0 | RB.InsertText(offs, indent, /*InsertAfter=*/false); |
404 | 0 | } |
405 | |
|
406 | 0 | return false; |
407 | 0 | } |
408 | | |
409 | 0 | bool Rewriter::overwriteChangedFiles() { |
410 | 0 | bool AllWritten = true; |
411 | 0 | auto& Diag = getSourceMgr().getDiagnostics(); |
412 | 0 | unsigned OverwriteFailure = Diag.getCustomDiagID( |
413 | 0 | DiagnosticsEngine::Error, "unable to overwrite file %0: %1"); |
414 | 0 | for (buffer_iterator I = buffer_begin(), E = buffer_end(); I != E; ++I) { |
415 | 0 | OptionalFileEntryRef Entry = getSourceMgr().getFileEntryRefForID(I->first); |
416 | 0 | llvm::SmallString<128> Path(Entry->getName()); |
417 | 0 | getSourceMgr().getFileManager().makeAbsolutePath(Path); |
418 | 0 | if (auto Error = llvm::writeToOutput(Path, [&](llvm::raw_ostream &OS) { |
419 | 0 | I->second.write(OS); |
420 | 0 | return llvm::Error::success(); |
421 | 0 | })) { |
422 | 0 | Diag.Report(OverwriteFailure) |
423 | 0 | << Entry->getName() << llvm::toString(std::move(Error)); |
424 | 0 | AllWritten = false; |
425 | 0 | } |
426 | 0 | } |
427 | 0 | return !AllWritten; |
428 | 0 | } |