/src/llvm-project/clang/lib/Format/NamespaceEndCommentsFixer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that |
11 | | /// fixes namespace end comments. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "NamespaceEndCommentsFixer.h" |
16 | | #include "clang/Basic/TokenKinds.h" |
17 | | #include "llvm/Support/Debug.h" |
18 | | #include "llvm/Support/Regex.h" |
19 | | |
20 | | #define DEBUG_TYPE "namespace-end-comments-fixer" |
21 | | |
22 | | namespace clang { |
23 | | namespace format { |
24 | | |
25 | | namespace { |
26 | | // Iterates all tokens starting from StartTok to EndTok and apply Fn to all |
27 | | // tokens between them including StartTok and EndTok. Returns the token after |
28 | | // EndTok. |
29 | | const FormatToken * |
30 | | processTokens(const FormatToken *Tok, tok::TokenKind StartTok, |
31 | | tok::TokenKind EndTok, |
32 | 0 | llvm::function_ref<void(const FormatToken *)> Fn) { |
33 | 0 | if (!Tok || Tok->isNot(StartTok)) |
34 | 0 | return Tok; |
35 | 0 | int NestLevel = 0; |
36 | 0 | do { |
37 | 0 | if (Tok->is(StartTok)) |
38 | 0 | ++NestLevel; |
39 | 0 | else if (Tok->is(EndTok)) |
40 | 0 | --NestLevel; |
41 | 0 | if (Fn) |
42 | 0 | Fn(Tok); |
43 | 0 | Tok = Tok->getNextNonComment(); |
44 | 0 | } while (Tok && NestLevel > 0); |
45 | 0 | return Tok; |
46 | 0 | } |
47 | | |
48 | 0 | const FormatToken *skipAttribute(const FormatToken *Tok) { |
49 | 0 | if (!Tok) |
50 | 0 | return nullptr; |
51 | 0 | if (Tok->isAttribute()) { |
52 | 0 | Tok = Tok->getNextNonComment(); |
53 | 0 | Tok = processTokens(Tok, tok::l_paren, tok::r_paren, nullptr); |
54 | 0 | } else if (Tok->is(tok::l_square)) { |
55 | 0 | Tok = processTokens(Tok, tok::l_square, tok::r_square, nullptr); |
56 | 0 | } |
57 | 0 | return Tok; |
58 | 0 | } |
59 | | |
60 | | // Computes the name of a namespace given the namespace token. |
61 | | // Returns "" for anonymous namespace. |
62 | 0 | std::string computeName(const FormatToken *NamespaceTok) { |
63 | 0 | assert(NamespaceTok && |
64 | 0 | NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && |
65 | 0 | "expecting a namespace token"); |
66 | 0 | std::string name; |
67 | 0 | const FormatToken *Tok = NamespaceTok->getNextNonComment(); |
68 | 0 | if (NamespaceTok->is(TT_NamespaceMacro)) { |
69 | | // Collects all the non-comment tokens between opening parenthesis |
70 | | // and closing parenthesis or comma. |
71 | 0 | assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis"); |
72 | 0 | Tok = Tok->getNextNonComment(); |
73 | 0 | while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) { |
74 | 0 | name += Tok->TokenText; |
75 | 0 | Tok = Tok->getNextNonComment(); |
76 | 0 | } |
77 | 0 | return name; |
78 | 0 | } |
79 | 0 | Tok = skipAttribute(Tok); |
80 | |
|
81 | 0 | std::string FirstNSName; |
82 | | // For `namespace [[foo]] A::B::inline C {` or |
83 | | // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C". |
84 | | // Peek for the first '::' (or '{' or '(')) and then return all tokens from |
85 | | // one token before that up until the '{'. A '(' might be a macro with |
86 | | // arguments. |
87 | 0 | const FormatToken *FirstNSTok = nullptr; |
88 | 0 | while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) { |
89 | 0 | if (FirstNSTok) |
90 | 0 | FirstNSName += FirstNSTok->TokenText; |
91 | 0 | FirstNSTok = Tok; |
92 | 0 | Tok = Tok->getNextNonComment(); |
93 | 0 | } |
94 | |
|
95 | 0 | if (FirstNSTok) |
96 | 0 | Tok = FirstNSTok; |
97 | 0 | Tok = skipAttribute(Tok); |
98 | |
|
99 | 0 | FirstNSTok = nullptr; |
100 | | // Add everything from '(' to ')'. |
101 | 0 | auto AddToken = [&name](const FormatToken *Tok) { name += Tok->TokenText; }; |
102 | 0 | bool IsPrevColoncolon = false; |
103 | 0 | bool HasColoncolon = false; |
104 | 0 | bool IsPrevInline = false; |
105 | 0 | bool NameFinished = false; |
106 | | // If we found '::' in name, then it's the name. Otherwise, we can't tell |
107 | | // which one is name. For example, `namespace A B {`. |
108 | 0 | while (Tok && Tok->isNot(tok::l_brace)) { |
109 | 0 | if (FirstNSTok) { |
110 | 0 | if (!IsPrevInline && HasColoncolon && !IsPrevColoncolon) { |
111 | 0 | if (FirstNSTok->is(tok::l_paren)) { |
112 | 0 | FirstNSTok = Tok = |
113 | 0 | processTokens(FirstNSTok, tok::l_paren, tok::r_paren, AddToken); |
114 | 0 | continue; |
115 | 0 | } |
116 | 0 | if (FirstNSTok->isNot(tok::coloncolon)) { |
117 | 0 | NameFinished = true; |
118 | 0 | break; |
119 | 0 | } |
120 | 0 | } |
121 | 0 | name += FirstNSTok->TokenText; |
122 | 0 | IsPrevColoncolon = FirstNSTok->is(tok::coloncolon); |
123 | 0 | HasColoncolon = HasColoncolon || IsPrevColoncolon; |
124 | 0 | if (FirstNSTok->is(tok::kw_inline)) { |
125 | 0 | name += " "; |
126 | 0 | IsPrevInline = true; |
127 | 0 | } |
128 | 0 | } |
129 | 0 | FirstNSTok = Tok; |
130 | 0 | Tok = Tok->getNextNonComment(); |
131 | 0 | const FormatToken *TokAfterAttr = skipAttribute(Tok); |
132 | 0 | if (TokAfterAttr != Tok) |
133 | 0 | FirstNSTok = Tok = TokAfterAttr; |
134 | 0 | } |
135 | 0 | if (!NameFinished && FirstNSTok && FirstNSTok->isNot(tok::l_brace)) |
136 | 0 | name += FirstNSTok->TokenText; |
137 | 0 | if (FirstNSName.empty() || HasColoncolon) |
138 | 0 | return name; |
139 | 0 | return name.empty() ? FirstNSName : FirstNSName + " " + name; |
140 | 0 | } |
141 | | |
142 | | std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline, |
143 | | const FormatToken *NamespaceTok, |
144 | 0 | unsigned SpacesToAdd) { |
145 | 0 | std::string text = "//"; |
146 | 0 | text.append(SpacesToAdd, ' '); |
147 | 0 | text += NamespaceTok->TokenText; |
148 | 0 | if (NamespaceTok->is(TT_NamespaceMacro)) |
149 | 0 | text += "("; |
150 | 0 | else if (!NamespaceName.empty()) |
151 | 0 | text += ' '; |
152 | 0 | text += NamespaceName; |
153 | 0 | if (NamespaceTok->is(TT_NamespaceMacro)) |
154 | 0 | text += ")"; |
155 | 0 | if (AddNewline) |
156 | 0 | text += '\n'; |
157 | 0 | return text; |
158 | 0 | } |
159 | | |
160 | 0 | bool hasEndComment(const FormatToken *RBraceTok) { |
161 | 0 | return RBraceTok->Next && RBraceTok->Next->is(tok::comment); |
162 | 0 | } |
163 | | |
164 | | bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName, |
165 | 0 | const FormatToken *NamespaceTok) { |
166 | 0 | assert(hasEndComment(RBraceTok)); |
167 | 0 | const FormatToken *Comment = RBraceTok->Next; |
168 | | |
169 | | // Matches a valid namespace end comment. |
170 | | // Valid namespace end comments don't need to be edited. |
171 | 0 | static const llvm::Regex NamespaceCommentPattern = |
172 | 0 | llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *" |
173 | 0 | "namespace( +([a-zA-Z0-9:_ ]+))?\\.? *(\\*/)?$", |
174 | 0 | llvm::Regex::IgnoreCase); |
175 | 0 | static const llvm::Regex NamespaceMacroCommentPattern = |
176 | 0 | llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *" |
177 | 0 | "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*|\".+\")\\)\\.? *(\\*/)?$", |
178 | 0 | llvm::Regex::IgnoreCase); |
179 | |
|
180 | 0 | SmallVector<StringRef, 8> Groups; |
181 | 0 | if (NamespaceTok->is(TT_NamespaceMacro) && |
182 | 0 | NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) { |
183 | 0 | StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : ""; |
184 | | // The name of the macro must be used. |
185 | 0 | if (NamespaceTokenText != NamespaceTok->TokenText) |
186 | 0 | return false; |
187 | 0 | } else if (NamespaceTok->isNot(tok::kw_namespace) || |
188 | 0 | !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) { |
189 | | // Comment does not match regex. |
190 | 0 | return false; |
191 | 0 | } |
192 | 0 | StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5].rtrim() : ""; |
193 | | // Anonymous namespace comments must not mention a namespace name. |
194 | 0 | if (NamespaceName.empty() && !NamespaceNameInComment.empty()) |
195 | 0 | return false; |
196 | 0 | StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : ""; |
197 | | // Named namespace comments must not mention anonymous namespace. |
198 | 0 | if (!NamespaceName.empty() && !AnonymousInComment.empty()) |
199 | 0 | return false; |
200 | 0 | if (NamespaceNameInComment == NamespaceName) |
201 | 0 | return true; |
202 | | |
203 | | // Has namespace comment flowed onto the next line. |
204 | | // } // namespace |
205 | | // // verylongnamespacenamethatdidnotfitonthepreviouscommentline |
206 | 0 | if (!(Comment->Next && Comment->Next->is(TT_LineComment))) |
207 | 0 | return false; |
208 | | |
209 | 0 | static const llvm::Regex CommentPattern = llvm::Regex( |
210 | 0 | "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase); |
211 | | |
212 | | // Pull out just the comment text. |
213 | 0 | if (!CommentPattern.match(Comment->Next->TokenText, &Groups)) |
214 | 0 | return false; |
215 | 0 | NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : ""; |
216 | |
|
217 | 0 | return NamespaceNameInComment == NamespaceName; |
218 | 0 | } |
219 | | |
220 | | void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, |
221 | | const SourceManager &SourceMgr, |
222 | 0 | tooling::Replacements *Fixes) { |
223 | 0 | auto EndLoc = RBraceTok->Tok.getEndLoc(); |
224 | 0 | auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc); |
225 | 0 | auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); |
226 | 0 | if (Err) { |
227 | 0 | llvm::errs() << "Error while adding namespace end comment: " |
228 | 0 | << llvm::toString(std::move(Err)) << "\n"; |
229 | 0 | } |
230 | 0 | } |
231 | | |
232 | | void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, |
233 | | const SourceManager &SourceMgr, |
234 | 0 | tooling::Replacements *Fixes) { |
235 | 0 | assert(hasEndComment(RBraceTok)); |
236 | 0 | const FormatToken *Comment = RBraceTok->Next; |
237 | 0 | auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(), |
238 | 0 | Comment->Tok.getEndLoc()); |
239 | 0 | auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); |
240 | 0 | if (Err) { |
241 | 0 | llvm::errs() << "Error while updating namespace end comment: " |
242 | 0 | << llvm::toString(std::move(Err)) << "\n"; |
243 | 0 | } |
244 | 0 | } |
245 | | } // namespace |
246 | | |
247 | | const FormatToken * |
248 | | getNamespaceToken(const AnnotatedLine *Line, |
249 | 210k | const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { |
250 | 210k | if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace)) |
251 | 90.4k | return nullptr; |
252 | 119k | size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex; |
253 | 119k | if (StartLineIndex == UnwrappedLine::kInvalidIndex) |
254 | 63.7k | return nullptr; |
255 | 56.1k | assert(StartLineIndex < AnnotatedLines.size()); |
256 | 0 | const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First; |
257 | 56.1k | if (NamespaceTok->is(tok::l_brace)) { |
258 | | // "namespace" keyword can be on the line preceding '{', e.g. in styles |
259 | | // where BraceWrapping.AfterNamespace is true. |
260 | 632 | if (StartLineIndex > 0) { |
261 | 607 | NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First; |
262 | 607 | if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi)) |
263 | 0 | return nullptr; |
264 | 607 | } |
265 | 632 | } |
266 | | |
267 | 56.1k | return NamespaceTok->getNamespaceToken(); |
268 | 56.1k | } |
269 | | |
270 | | StringRef |
271 | | getNamespaceTokenText(const AnnotatedLine *Line, |
272 | 0 | const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { |
273 | 0 | const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines); |
274 | 0 | return NamespaceTok ? NamespaceTok->TokenText : StringRef(); |
275 | 0 | } |
276 | | |
277 | | NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env, |
278 | | const FormatStyle &Style) |
279 | 337 | : TokenAnalyzer(Env, Style) {} |
280 | | |
281 | | std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze( |
282 | | TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, |
283 | 351 | FormatTokenLexer &Tokens) { |
284 | 351 | const SourceManager &SourceMgr = Env.getSourceManager(); |
285 | 351 | AffectedRangeMgr.computeAffectedLines(AnnotatedLines); |
286 | 351 | tooling::Replacements Fixes; |
287 | | |
288 | | // Spin through the lines and ensure we have balanced braces. |
289 | 351 | int Braces = 0; |
290 | 441k | for (AnnotatedLine *Line : AnnotatedLines) { |
291 | 441k | FormatToken *Tok = Line->First; |
292 | 13.7M | while (Tok) { |
293 | 13.3M | Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0; |
294 | 13.3M | Tok = Tok->Next; |
295 | 13.3M | } |
296 | 441k | } |
297 | | // Don't attempt to comment unbalanced braces or this can |
298 | | // lead to comments being placed on the closing brace which isn't |
299 | | // the matching brace of the namespace. (occurs during incomplete editing). |
300 | 351 | if (Braces != 0) |
301 | 175 | return {Fixes, 0}; |
302 | | |
303 | 176 | std::string AllNamespaceNames; |
304 | 176 | size_t StartLineIndex = SIZE_MAX; |
305 | 176 | StringRef NamespaceTokenText; |
306 | 176 | unsigned int CompactedNamespacesCount = 0; |
307 | 123k | for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { |
308 | 123k | const AnnotatedLine *EndLine = AnnotatedLines[I]; |
309 | 123k | const FormatToken *NamespaceTok = |
310 | 123k | getNamespaceToken(EndLine, AnnotatedLines); |
311 | 123k | if (!NamespaceTok) |
312 | 123k | continue; |
313 | 0 | FormatToken *RBraceTok = EndLine->First; |
314 | 0 | if (RBraceTok->Finalized) |
315 | 0 | continue; |
316 | 0 | RBraceTok->Finalized = true; |
317 | 0 | const FormatToken *EndCommentPrevTok = RBraceTok; |
318 | | // Namespaces often end with '};'. In that case, attach namespace end |
319 | | // comments to the semicolon tokens. |
320 | 0 | if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) |
321 | 0 | EndCommentPrevTok = RBraceTok->Next; |
322 | 0 | if (StartLineIndex == SIZE_MAX) |
323 | 0 | StartLineIndex = EndLine->MatchingOpeningBlockLineIndex; |
324 | 0 | std::string NamespaceName = computeName(NamespaceTok); |
325 | 0 | if (Style.CompactNamespaces) { |
326 | 0 | if (CompactedNamespacesCount == 0) |
327 | 0 | NamespaceTokenText = NamespaceTok->TokenText; |
328 | 0 | if ((I + 1 < E) && |
329 | 0 | NamespaceTokenText == |
330 | 0 | getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) && |
331 | 0 | StartLineIndex - CompactedNamespacesCount - 1 == |
332 | 0 | AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex && |
333 | 0 | !AnnotatedLines[I + 1]->First->Finalized) { |
334 | 0 | if (hasEndComment(EndCommentPrevTok)) { |
335 | | // remove end comment, it will be merged in next one |
336 | 0 | updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes); |
337 | 0 | } |
338 | 0 | ++CompactedNamespacesCount; |
339 | 0 | if (!NamespaceName.empty()) |
340 | 0 | AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames; |
341 | 0 | continue; |
342 | 0 | } |
343 | 0 | NamespaceName += AllNamespaceNames; |
344 | 0 | CompactedNamespacesCount = 0; |
345 | 0 | AllNamespaceNames = std::string(); |
346 | 0 | } |
347 | | // The next token in the token stream after the place where the end comment |
348 | | // token must be. This is either the next token on the current line or the |
349 | | // first token on the next line. |
350 | 0 | const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next; |
351 | 0 | if (EndCommentNextTok && EndCommentNextTok->is(tok::comment)) |
352 | 0 | EndCommentNextTok = EndCommentNextTok->Next; |
353 | 0 | if (!EndCommentNextTok && I + 1 < E) |
354 | 0 | EndCommentNextTok = AnnotatedLines[I + 1]->First; |
355 | 0 | bool AddNewline = EndCommentNextTok && |
356 | 0 | EndCommentNextTok->NewlinesBefore == 0 && |
357 | 0 | EndCommentNextTok->isNot(tok::eof); |
358 | 0 | const std::string EndCommentText = |
359 | 0 | computeEndCommentText(NamespaceName, AddNewline, NamespaceTok, |
360 | 0 | Style.SpacesInLineCommentPrefix.Minimum); |
361 | 0 | if (!hasEndComment(EndCommentPrevTok)) { |
362 | 0 | unsigned LineCount = 0; |
363 | 0 | for (auto J = StartLineIndex + 1; J < I; ++J) |
364 | 0 | LineCount += AnnotatedLines[J]->size(); |
365 | 0 | if (LineCount > Style.ShortNamespaceLines) { |
366 | 0 | addEndComment(EndCommentPrevTok, |
367 | 0 | std::string(Style.SpacesBeforeTrailingComments, ' ') + |
368 | 0 | EndCommentText, |
369 | 0 | SourceMgr, &Fixes); |
370 | 0 | } |
371 | 0 | } else if (!validEndComment(EndCommentPrevTok, NamespaceName, |
372 | 0 | NamespaceTok)) { |
373 | 0 | updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes); |
374 | 0 | } |
375 | 0 | StartLineIndex = SIZE_MAX; |
376 | 0 | } |
377 | 176 | return {Fixes, 0}; |
378 | 351 | } |
379 | | |
380 | | } // namespace format |
381 | | } // namespace clang |