/src/llvm-project/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "clang/Tooling/Inclusions/HeaderIncludes.h" |
10 | | #include "clang/Basic/FileManager.h" |
11 | | #include "clang/Basic/SourceManager.h" |
12 | | #include "clang/Lex/Lexer.h" |
13 | | #include "llvm/Support/FormatVariadic.h" |
14 | | #include "llvm/Support/Path.h" |
15 | | #include <optional> |
16 | | |
17 | | namespace clang { |
18 | | namespace tooling { |
19 | | namespace { |
20 | | |
21 | 0 | LangOptions createLangOpts() { |
22 | 0 | LangOptions LangOpts; |
23 | 0 | LangOpts.CPlusPlus = 1; |
24 | 0 | LangOpts.CPlusPlus11 = 1; |
25 | 0 | LangOpts.CPlusPlus14 = 1; |
26 | 0 | LangOpts.LineComment = 1; |
27 | 0 | LangOpts.CXXOperatorNames = 1; |
28 | 0 | LangOpts.Bool = 1; |
29 | 0 | LangOpts.ObjC = 1; |
30 | 0 | LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. |
31 | 0 | LangOpts.DeclSpecKeyword = 1; // To get __declspec. |
32 | 0 | LangOpts.WChar = 1; // To get wchar_t |
33 | 0 | return LangOpts; |
34 | 0 | } |
35 | | |
36 | | // Returns the offset after skipping a sequence of tokens, matched by \p |
37 | | // GetOffsetAfterSequence, from the start of the code. |
38 | | // \p GetOffsetAfterSequence should be a function that matches a sequence of |
39 | | // tokens and returns an offset after the sequence. |
40 | | unsigned getOffsetAfterTokenSequence( |
41 | | StringRef FileName, StringRef Code, const IncludeStyle &Style, |
42 | | llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)> |
43 | 0 | GetOffsetAfterSequence) { |
44 | 0 | SourceManagerForFile VirtualSM(FileName, Code); |
45 | 0 | SourceManager &SM = VirtualSM.get(); |
46 | 0 | LangOptions LangOpts = createLangOpts(); |
47 | 0 | Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(SM.getMainFileID()), SM, |
48 | 0 | LangOpts); |
49 | 0 | Token Tok; |
50 | | // Get the first token. |
51 | 0 | Lex.LexFromRawLexer(Tok); |
52 | 0 | return GetOffsetAfterSequence(SM, Lex, Tok); |
53 | 0 | } |
54 | | |
55 | | // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is, |
56 | | // \p Tok will be the token after this directive; otherwise, it can be any token |
57 | | // after the given \p Tok (including \p Tok). If \p RawIDName is provided, the |
58 | | // (second) raw_identifier name is checked. |
59 | | bool checkAndConsumeDirectiveWithName( |
60 | | Lexer &Lex, StringRef Name, Token &Tok, |
61 | 0 | std::optional<StringRef> RawIDName = std::nullopt) { |
62 | 0 | bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && |
63 | 0 | Tok.is(tok::raw_identifier) && |
64 | 0 | Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) && |
65 | 0 | Tok.is(tok::raw_identifier) && |
66 | 0 | (!RawIDName || Tok.getRawIdentifier() == *RawIDName); |
67 | 0 | if (Matched) |
68 | 0 | Lex.LexFromRawLexer(Tok); |
69 | 0 | return Matched; |
70 | 0 | } |
71 | | |
72 | 0 | void skipComments(Lexer &Lex, Token &Tok) { |
73 | 0 | while (Tok.is(tok::comment)) |
74 | 0 | if (Lex.LexFromRawLexer(Tok)) |
75 | 0 | return; |
76 | 0 | } |
77 | | |
78 | | // Returns the offset after header guard directives and any comments |
79 | | // before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no |
80 | | // header guard is present in the code, this will return the offset after |
81 | | // skipping all comments from the start of the code. |
82 | | unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, |
83 | | StringRef Code, |
84 | 0 | const IncludeStyle &Style) { |
85 | | // \p Consume returns location after header guard or 0 if no header guard is |
86 | | // found. |
87 | 0 | auto ConsumeHeaderGuardAndComment = |
88 | 0 | [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex, |
89 | 0 | Token Tok)> |
90 | 0 | Consume) { |
91 | 0 | return getOffsetAfterTokenSequence( |
92 | 0 | FileName, Code, Style, |
93 | 0 | [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) { |
94 | 0 | skipComments(Lex, Tok); |
95 | 0 | unsigned InitialOffset = SM.getFileOffset(Tok.getLocation()); |
96 | 0 | return std::max(InitialOffset, Consume(SM, Lex, Tok)); |
97 | 0 | }); |
98 | 0 | }; |
99 | 0 | return std::max( |
100 | | // #ifndef/#define |
101 | 0 | ConsumeHeaderGuardAndComment( |
102 | 0 | [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { |
103 | 0 | if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { |
104 | 0 | skipComments(Lex, Tok); |
105 | 0 | if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) && |
106 | 0 | Tok.isAtStartOfLine()) |
107 | 0 | return SM.getFileOffset(Tok.getLocation()); |
108 | 0 | } |
109 | 0 | return 0; |
110 | 0 | }), |
111 | | // #pragma once |
112 | 0 | ConsumeHeaderGuardAndComment( |
113 | 0 | [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { |
114 | 0 | if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok, |
115 | 0 | StringRef("once"))) |
116 | 0 | return SM.getFileOffset(Tok.getLocation()); |
117 | 0 | return 0; |
118 | 0 | })); |
119 | 0 | } |
120 | | |
121 | | // Check if a sequence of tokens is like |
122 | | // "#include ("header.h" | <header.h>)". |
123 | | // If it is, \p Tok will be the token after this directive; otherwise, it can be |
124 | | // any token after the given \p Tok (including \p Tok). |
125 | 0 | bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) { |
126 | 0 | auto Matched = [&]() { |
127 | 0 | Lex.LexFromRawLexer(Tok); |
128 | 0 | return true; |
129 | 0 | }; |
130 | 0 | if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && |
131 | 0 | Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") { |
132 | 0 | if (Lex.LexFromRawLexer(Tok)) |
133 | 0 | return false; |
134 | 0 | if (Tok.is(tok::string_literal)) |
135 | 0 | return Matched(); |
136 | 0 | if (Tok.is(tok::less)) { |
137 | 0 | while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) { |
138 | 0 | } |
139 | 0 | if (Tok.is(tok::greater)) |
140 | 0 | return Matched(); |
141 | 0 | } |
142 | 0 | } |
143 | 0 | return false; |
144 | 0 | } |
145 | | |
146 | | // Returns the offset of the last #include directive after which a new |
147 | | // #include can be inserted. This ignores #include's after the #include block(s) |
148 | | // in the beginning of a file to avoid inserting headers into code sections |
149 | | // where new #include's should not be added by default. |
150 | | // These code sections include: |
151 | | // - raw string literals (containing #include). |
152 | | // - #if blocks. |
153 | | // - Special #include's among declarations (e.g. functions). |
154 | | // |
155 | | // If no #include after which a new #include can be inserted, this returns the |
156 | | // offset after skipping all comments from the start of the code. |
157 | | // Inserting after an #include is not allowed if it comes after code that is not |
158 | | // #include (e.g. pre-processing directive that is not #include, declarations). |
159 | | unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code, |
160 | 0 | const IncludeStyle &Style) { |
161 | 0 | return getOffsetAfterTokenSequence( |
162 | 0 | FileName, Code, Style, |
163 | 0 | [](const SourceManager &SM, Lexer &Lex, Token Tok) { |
164 | 0 | skipComments(Lex, Tok); |
165 | 0 | unsigned MaxOffset = SM.getFileOffset(Tok.getLocation()); |
166 | 0 | while (checkAndConsumeInclusiveDirective(Lex, Tok)) |
167 | 0 | MaxOffset = SM.getFileOffset(Tok.getLocation()); |
168 | 0 | return MaxOffset; |
169 | 0 | }); |
170 | 0 | } |
171 | | |
172 | 0 | inline StringRef trimInclude(StringRef IncludeName) { |
173 | 0 | return IncludeName.trim("\"<>"); |
174 | 0 | } |
175 | | |
176 | | const char IncludeRegexPattern[] = |
177 | | R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"; |
178 | | |
179 | | // The filename of Path excluding extension. |
180 | | // Used to match implementation with headers, this differs from sys::path::stem: |
181 | | // - in names with multiple dots (foo.cu.cc) it terminates at the *first* |
182 | | // - an empty stem is never returned: /foo/.bar.x => .bar |
183 | | // - we don't bother to handle . and .. specially |
184 | 0 | StringRef matchingStem(llvm::StringRef Path) { |
185 | 0 | StringRef Name = llvm::sys::path::filename(Path); |
186 | 0 | return Name.substr(0, Name.find('.', 1)); |
187 | 0 | } |
188 | | |
189 | | } // anonymous namespace |
190 | | |
191 | | IncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style, |
192 | | StringRef FileName) |
193 | 0 | : Style(Style), FileName(FileName) { |
194 | 0 | for (const auto &Category : Style.IncludeCategories) { |
195 | 0 | CategoryRegexs.emplace_back(Category.Regex, Category.RegexIsCaseSensitive |
196 | 0 | ? llvm::Regex::NoFlags |
197 | 0 | : llvm::Regex::IgnoreCase); |
198 | 0 | } |
199 | 0 | IsMainFile = FileName.ends_with(".c") || FileName.ends_with(".cc") || |
200 | 0 | FileName.ends_with(".cpp") || FileName.ends_with(".c++") || |
201 | 0 | FileName.ends_with(".cxx") || FileName.ends_with(".m") || |
202 | 0 | FileName.ends_with(".mm"); |
203 | 0 | if (!Style.IncludeIsMainSourceRegex.empty()) { |
204 | 0 | llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex); |
205 | 0 | IsMainFile |= MainFileRegex.match(FileName); |
206 | 0 | } |
207 | 0 | } |
208 | | |
209 | | int IncludeCategoryManager::getIncludePriority(StringRef IncludeName, |
210 | 0 | bool CheckMainHeader) const { |
211 | 0 | int Ret = INT_MAX; |
212 | 0 | for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) |
213 | 0 | if (CategoryRegexs[i].match(IncludeName)) { |
214 | 0 | Ret = Style.IncludeCategories[i].Priority; |
215 | 0 | break; |
216 | 0 | } |
217 | 0 | if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName)) |
218 | 0 | Ret = 0; |
219 | 0 | return Ret; |
220 | 0 | } |
221 | | |
222 | | int IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName, |
223 | 0 | bool CheckMainHeader) const { |
224 | 0 | int Ret = INT_MAX; |
225 | 0 | for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) |
226 | 0 | if (CategoryRegexs[i].match(IncludeName)) { |
227 | 0 | Ret = Style.IncludeCategories[i].SortPriority; |
228 | 0 | if (Ret == 0) |
229 | 0 | Ret = Style.IncludeCategories[i].Priority; |
230 | 0 | break; |
231 | 0 | } |
232 | 0 | if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName)) |
233 | 0 | Ret = 0; |
234 | 0 | return Ret; |
235 | 0 | } |
236 | 0 | bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const { |
237 | 0 | if (!IncludeName.starts_with("\"")) |
238 | 0 | return false; |
239 | | |
240 | 0 | IncludeName = |
241 | 0 | IncludeName.drop_front(1).drop_back(1); // remove the surrounding "" or <> |
242 | | // Not matchingStem: implementation files may have compound extensions but |
243 | | // headers may not. |
244 | 0 | StringRef HeaderStem = llvm::sys::path::stem(IncludeName); |
245 | 0 | StringRef FileStem = llvm::sys::path::stem(FileName); // foo.cu for foo.cu.cc |
246 | 0 | StringRef MatchingFileStem = matchingStem(FileName); // foo for foo.cu.cc |
247 | | // main-header examples: |
248 | | // 1) foo.h => foo.cc |
249 | | // 2) foo.h => foo.cu.cc |
250 | | // 3) foo.proto.h => foo.proto.cc |
251 | | // |
252 | | // non-main-header examples: |
253 | | // 1) foo.h => bar.cc |
254 | | // 2) foo.proto.h => foo.cc |
255 | 0 | StringRef Matching; |
256 | 0 | if (MatchingFileStem.starts_with_insensitive(HeaderStem)) |
257 | 0 | Matching = MatchingFileStem; // example 1), 2) |
258 | 0 | else if (FileStem.equals_insensitive(HeaderStem)) |
259 | 0 | Matching = FileStem; // example 3) |
260 | 0 | if (!Matching.empty()) { |
261 | 0 | llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex, |
262 | 0 | llvm::Regex::IgnoreCase); |
263 | 0 | if (MainIncludeRegex.match(Matching)) |
264 | 0 | return true; |
265 | 0 | } |
266 | 0 | return false; |
267 | 0 | } |
268 | | |
269 | | const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern); |
270 | | |
271 | | HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code, |
272 | | const IncludeStyle &Style) |
273 | | : FileName(FileName), Code(Code), FirstIncludeOffset(-1), |
274 | | MinInsertOffset( |
275 | | getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)), |
276 | | MaxInsertOffset(MinInsertOffset + |
277 | | getMaxHeaderInsertionOffset( |
278 | | FileName, Code.drop_front(MinInsertOffset), Style)), |
279 | | MainIncludeFound(false), |
280 | 0 | Categories(Style, FileName) { |
281 | | // Add 0 for main header and INT_MAX for headers that are not in any |
282 | | // category. |
283 | 0 | Priorities = {0, INT_MAX}; |
284 | 0 | for (const auto &Category : Style.IncludeCategories) |
285 | 0 | Priorities.insert(Category.Priority); |
286 | 0 | SmallVector<StringRef, 32> Lines; |
287 | 0 | Code.drop_front(MinInsertOffset).split(Lines, "\n"); |
288 | |
|
289 | 0 | unsigned Offset = MinInsertOffset; |
290 | 0 | unsigned NextLineOffset; |
291 | 0 | SmallVector<StringRef, 4> Matches; |
292 | 0 | for (auto Line : Lines) { |
293 | 0 | NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1); |
294 | 0 | if (IncludeRegex.match(Line, &Matches)) { |
295 | | // If this is the last line without trailing newline, we need to make |
296 | | // sure we don't delete across the file boundary. |
297 | 0 | addExistingInclude( |
298 | 0 | Include(Matches[2], |
299 | 0 | tooling::Range( |
300 | 0 | Offset, std::min(Line.size() + 1, Code.size() - Offset)), |
301 | 0 | Matches[1] == "import" ? tooling::IncludeDirective::Import |
302 | 0 | : tooling::IncludeDirective::Include), |
303 | 0 | NextLineOffset); |
304 | 0 | } |
305 | 0 | Offset = NextLineOffset; |
306 | 0 | } |
307 | | |
308 | | // Populate CategoryEndOfssets: |
309 | | // - Ensure that CategoryEndOffset[Highest] is always populated. |
310 | | // - If CategoryEndOffset[Priority] isn't set, use the next higher value |
311 | | // that is set, up to CategoryEndOffset[Highest]. |
312 | 0 | auto Highest = Priorities.begin(); |
313 | 0 | if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) { |
314 | 0 | if (FirstIncludeOffset >= 0) |
315 | 0 | CategoryEndOffsets[*Highest] = FirstIncludeOffset; |
316 | 0 | else |
317 | 0 | CategoryEndOffsets[*Highest] = MinInsertOffset; |
318 | 0 | } |
319 | | // By this point, CategoryEndOffset[Highest] is always set appropriately: |
320 | | // - to an appropriate location before/after existing #includes, or |
321 | | // - to right after the header guard, or |
322 | | // - to the beginning of the file. |
323 | 0 | for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I) |
324 | 0 | if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end()) |
325 | 0 | CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)]; |
326 | 0 | } |
327 | | |
328 | | // \p Offset: the start of the line following this include directive. |
329 | | void HeaderIncludes::addExistingInclude(Include IncludeToAdd, |
330 | 0 | unsigned NextLineOffset) { |
331 | 0 | auto Iter = |
332 | 0 | ExistingIncludes.try_emplace(trimInclude(IncludeToAdd.Name)).first; |
333 | 0 | Iter->second.push_back(std::move(IncludeToAdd)); |
334 | 0 | auto &CurInclude = Iter->second.back(); |
335 | | // The header name with quotes or angle brackets. |
336 | | // Only record the offset of current #include if we can insert after it. |
337 | 0 | if (CurInclude.R.getOffset() <= MaxInsertOffset) { |
338 | 0 | int Priority = Categories.getIncludePriority( |
339 | 0 | CurInclude.Name, /*CheckMainHeader=*/!MainIncludeFound); |
340 | 0 | if (Priority == 0) |
341 | 0 | MainIncludeFound = true; |
342 | 0 | CategoryEndOffsets[Priority] = NextLineOffset; |
343 | 0 | IncludesByPriority[Priority].push_back(&CurInclude); |
344 | 0 | if (FirstIncludeOffset < 0) |
345 | 0 | FirstIncludeOffset = CurInclude.R.getOffset(); |
346 | 0 | } |
347 | 0 | } |
348 | | |
349 | | std::optional<tooling::Replacement> |
350 | | HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled, |
351 | 0 | IncludeDirective Directive) const { |
352 | 0 | assert(IncludeName == trimInclude(IncludeName)); |
353 | | // If a <header> ("header") already exists in code, "header" (<header>) with |
354 | | // different quotation and/or directive will still be inserted. |
355 | | // FIXME: figure out if this is the best behavior. |
356 | 0 | auto It = ExistingIncludes.find(IncludeName); |
357 | 0 | if (It != ExistingIncludes.end()) { |
358 | 0 | for (const auto &Inc : It->second) |
359 | 0 | if (Inc.Directive == Directive && |
360 | 0 | ((IsAngled && StringRef(Inc.Name).starts_with("<")) || |
361 | 0 | (!IsAngled && StringRef(Inc.Name).starts_with("\"")))) |
362 | 0 | return std::nullopt; |
363 | 0 | } |
364 | 0 | std::string Quoted = |
365 | 0 | std::string(llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName)); |
366 | 0 | StringRef QuotedName = Quoted; |
367 | 0 | int Priority = Categories.getIncludePriority( |
368 | 0 | QuotedName, /*CheckMainHeader=*/!MainIncludeFound); |
369 | 0 | auto CatOffset = CategoryEndOffsets.find(Priority); |
370 | 0 | assert(CatOffset != CategoryEndOffsets.end()); |
371 | 0 | unsigned InsertOffset = CatOffset->second; // Fall back offset |
372 | 0 | auto Iter = IncludesByPriority.find(Priority); |
373 | 0 | if (Iter != IncludesByPriority.end()) { |
374 | 0 | for (const auto *Inc : Iter->second) { |
375 | 0 | if (QuotedName < Inc->Name) { |
376 | 0 | InsertOffset = Inc->R.getOffset(); |
377 | 0 | break; |
378 | 0 | } |
379 | 0 | } |
380 | 0 | } |
381 | 0 | assert(InsertOffset <= Code.size()); |
382 | 0 | llvm::StringRef DirectiveSpelling = |
383 | 0 | Directive == IncludeDirective::Include ? "include" : "import"; |
384 | 0 | std::string NewInclude = |
385 | 0 | llvm::formatv("#{0} {1}\n", DirectiveSpelling, QuotedName); |
386 | | // When inserting headers at end of the code, also append '\n' to the code |
387 | | // if it does not end with '\n'. |
388 | | // FIXME: when inserting multiple #includes at the end of code, only one |
389 | | // newline should be added. |
390 | 0 | if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n')) |
391 | 0 | NewInclude = "\n" + NewInclude; |
392 | 0 | return tooling::Replacement(FileName, InsertOffset, 0, NewInclude); |
393 | 0 | } |
394 | | |
395 | | tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName, |
396 | 0 | bool IsAngled) const { |
397 | 0 | assert(IncludeName == trimInclude(IncludeName)); |
398 | 0 | tooling::Replacements Result; |
399 | 0 | auto Iter = ExistingIncludes.find(IncludeName); |
400 | 0 | if (Iter == ExistingIncludes.end()) |
401 | 0 | return Result; |
402 | 0 | for (const auto &Inc : Iter->second) { |
403 | 0 | if ((IsAngled && StringRef(Inc.Name).starts_with("\"")) || |
404 | 0 | (!IsAngled && StringRef(Inc.Name).starts_with("<"))) |
405 | 0 | continue; |
406 | 0 | llvm::Error Err = Result.add(tooling::Replacement( |
407 | 0 | FileName, Inc.R.getOffset(), Inc.R.getLength(), "")); |
408 | 0 | if (Err) { |
409 | 0 | auto ErrMsg = "Unexpected conflicts in #include deletions: " + |
410 | 0 | llvm::toString(std::move(Err)); |
411 | 0 | llvm_unreachable(ErrMsg.c_str()); |
412 | 0 | } |
413 | 0 | } |
414 | 0 | return Result; |
415 | 0 | } |
416 | | |
417 | | } // namespace tooling |
418 | | } // namespace clang |