/src/llvm-project/clang-tools-extra/pseudo/lib/DirectiveTree.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- DirectiveTree.cpp - Find and strip preprocessor directives -------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "clang-pseudo/DirectiveTree.h" |
10 | | #include "clang/Basic/IdentifierTable.h" |
11 | | #include "clang/Basic/TokenKinds.h" |
12 | | #include "llvm/Support/FormatVariadic.h" |
13 | | #include <optional> |
14 | | #include <variant> |
15 | | |
16 | | namespace clang { |
17 | | namespace pseudo { |
18 | | namespace { |
19 | | |
20 | | class DirectiveParser { |
21 | | public: |
22 | | explicit DirectiveParser(const TokenStream &Code) |
23 | 9.56k | : Code(Code), Tok(&Code.front()) {} |
24 | 9.56k | void parse(DirectiveTree *Result) { parse(Result, /*TopLevel=*/true); } |
25 | | |
26 | | private: |
27 | | // Roles that a directive might take within a conditional block. |
28 | | enum class Cond { None, If, Else, End }; |
29 | 175k | static Cond classifyDirective(tok::PPKeywordKind K) { |
30 | 175k | switch (K) { |
31 | 15.9k | case clang::tok::pp_if: |
32 | 16.2k | case clang::tok::pp_ifdef: |
33 | 16.6k | case clang::tok::pp_ifndef: |
34 | 16.6k | return Cond::If; |
35 | 18.9k | case clang::tok::pp_elif: |
36 | 22.0k | case clang::tok::pp_elifdef: |
37 | 23.2k | case clang::tok::pp_elifndef: |
38 | 25.6k | case clang::tok::pp_else: |
39 | 25.6k | return Cond::Else; |
40 | 784 | case clang::tok::pp_endif: |
41 | 784 | return Cond::End; |
42 | 132k | default: |
43 | 132k | return Cond::None; |
44 | 175k | } |
45 | 175k | } |
46 | | |
47 | | // Parses tokens starting at Tok into Tree. |
48 | | // If we reach an End or Else directive that ends Tree, returns it. |
49 | | // If TopLevel is true, then we do not expect End and always return |
50 | | // std::nullopt. |
51 | | std::optional<DirectiveTree::Directive> parse(DirectiveTree *Tree, |
52 | 34.0k | bool TopLevel) { |
53 | 34.0k | auto StartsDirective = |
54 | 50.7M | [&, AllowDirectiveAt((const Token *)nullptr)]() mutable { |
55 | 50.7M | if (Tok->flag(LexFlags::StartsPPLine)) { |
56 | | // If we considered a comment at the start of a PP-line, it doesn't |
57 | | // start a directive but the directive can still start after it. |
58 | 1.27M | if (Tok->Kind == tok::comment) |
59 | 17.1k | AllowDirectiveAt = Tok + 1; |
60 | 1.27M | return Tok->Kind == tok::hash; |
61 | 1.27M | } |
62 | 49.4M | return Tok->Kind == tok::hash && AllowDirectiveAt == Tok; |
63 | 50.7M | }; |
64 | | // Each iteration adds one chunk (or returns, if we see #endif). |
65 | 293k | while (Tok->Kind != tok::eof) { |
66 | | // If there's no directive here, we have a code chunk. |
67 | 268k | if (!StartsDirective()) { |
68 | 109k | const Token *Start = Tok; |
69 | 109k | do |
70 | 50.4M | ++Tok; |
71 | 50.4M | while (Tok->Kind != tok::eof && !StartsDirective()); |
72 | 109k | Tree->Chunks.push_back(DirectiveTree::Code{ |
73 | 109k | Token::Range{Code.index(*Start), Code.index(*Tok)}}); |
74 | 109k | continue; |
75 | 109k | } |
76 | | |
77 | | // We have some kind of directive. |
78 | 158k | DirectiveTree::Directive Directive; |
79 | 158k | parseDirective(&Directive); |
80 | 158k | Cond Kind = classifyDirective(Directive.Kind); |
81 | 158k | if (Kind == Cond::If) { |
82 | | // #if or similar, starting a nested conditional block. |
83 | 16.6k | DirectiveTree::Conditional Conditional; |
84 | 16.6k | Conditional.Branches.emplace_back(); |
85 | 16.6k | Conditional.Branches.back().first = std::move(Directive); |
86 | 16.6k | parseConditional(&Conditional); |
87 | 16.6k | Tree->Chunks.push_back(std::move(Conditional)); |
88 | 142k | } else if ((Kind == Cond::Else || Kind == Cond::End) && !TopLevel) { |
89 | | // #endif or similar, ending this PStructure scope. |
90 | | // (#endif is unexpected at the top level, treat as simple directive). |
91 | 8.36k | return std::move(Directive); |
92 | 133k | } else { |
93 | | // #define or similar, a simple directive at the current scope. |
94 | 133k | Tree->Chunks.push_back(std::move(Directive)); |
95 | 133k | } |
96 | 158k | } |
97 | 25.6k | return std::nullopt; |
98 | 34.0k | } |
99 | | |
100 | | // Parse the rest of a conditional section, after seeing the If directive. |
101 | | // Returns after consuming the End directive. |
102 | 16.6k | void parseConditional(DirectiveTree::Conditional *C) { |
103 | 16.6k | assert(C->Branches.size() == 1 && |
104 | 16.6k | C->Branches.front().second.Chunks.empty() && |
105 | 16.6k | "Should be ready to parse first branch body"); |
106 | 24.6k | while (Tok->Kind != tok::eof) { |
107 | 24.4k | auto Terminator = parse(&C->Branches.back().second, /*TopLevel=*/false); |
108 | 24.4k | if (!Terminator) { |
109 | 16.1k | assert(Tok->Kind == tok::eof && "gave up parsing before eof?"); |
110 | 0 | C->End.Tokens = Token::Range::emptyAt(Code.index(*Tok)); |
111 | 16.1k | return; |
112 | 16.1k | } |
113 | 8.36k | if (classifyDirective(Terminator->Kind) == Cond::End) { |
114 | 358 | C->End = std::move(*Terminator); |
115 | 358 | return; |
116 | 358 | } |
117 | 8.01k | assert(classifyDirective(Terminator->Kind) == Cond::Else && |
118 | 8.01k | "ended branch unexpectedly"); |
119 | 0 | C->Branches.emplace_back(); |
120 | 8.01k | C->Branches.back().first = std::move(*Terminator); |
121 | 8.01k | } |
122 | 16.6k | } |
123 | | |
124 | | // Parse a directive. Tok is the hash. |
125 | 158k | void parseDirective(DirectiveTree::Directive *D) { |
126 | 158k | assert(Tok->Kind == tok::hash); |
127 | | |
128 | | // Directive spans from the hash until the end of line or file. |
129 | 0 | const Token *Begin = Tok++; |
130 | 2.44M | while (Tok->Kind != tok::eof && !Tok->flag(LexFlags::StartsPPLine)) |
131 | 2.28M | ++Tok; |
132 | 158k | ArrayRef<Token> Tokens{Begin, Tok}; |
133 | 158k | D->Tokens = {Code.index(*Tokens.begin()), Code.index(*Tokens.end())}; |
134 | | |
135 | | // Directive name is the first non-comment token after the hash. |
136 | 158k | Tokens = Tokens.drop_front().drop_while( |
137 | 158k | [](const Token &T) { return T.Kind == tok::comment; }); |
138 | 158k | if (!Tokens.empty()) |
139 | 80.6k | D->Kind = PPKeywords.get(Tokens.front().text()).getPPKeywordID(); |
140 | 158k | } |
141 | | |
142 | | const TokenStream &Code; |
143 | | const Token *Tok; |
144 | | clang::IdentifierTable PPKeywords; |
145 | | }; |
146 | | |
147 | | struct Dumper { |
148 | | llvm::raw_ostream &OS; |
149 | | unsigned Indent = 0; |
150 | | |
151 | 0 | Dumper(llvm::raw_ostream& OS) : OS(OS) {} |
152 | 0 | void operator()(const DirectiveTree& Tree) { |
153 | 0 | for (const auto& Chunk : Tree.Chunks) |
154 | 0 | std::visit(*this, Chunk); |
155 | 0 | } |
156 | 0 | void operator()(const DirectiveTree::Conditional &Conditional) { |
157 | 0 | for (unsigned I = 0; I < Conditional.Branches.size(); ++I) { |
158 | 0 | const auto &Branch = Conditional.Branches[I]; |
159 | 0 | (*this)(Branch.first, Conditional.Taken == I); |
160 | 0 | Indent += 2; |
161 | 0 | (*this)(Branch.second); |
162 | 0 | Indent -= 2; |
163 | 0 | } |
164 | 0 | (*this)(Conditional.End); |
165 | 0 | } |
166 | | void operator()(const DirectiveTree::Directive &Directive, |
167 | 0 | bool Taken = false) { |
168 | 0 | OS.indent(Indent) << llvm::formatv( |
169 | 0 | "#{0} ({1} tokens){2}\n", tok::getPPKeywordSpelling(Directive.Kind), |
170 | 0 | Directive.Tokens.size(), Taken ? " TAKEN" : ""); |
171 | 0 | } |
172 | 0 | void operator()(const DirectiveTree::Code &Code) { |
173 | 0 | OS.indent(Indent) << llvm::formatv("code ({0} tokens)\n", |
174 | 0 | Code.Tokens.size()); |
175 | 0 | } |
176 | | }; |
177 | | } // namespace |
178 | | |
179 | 9.56k | DirectiveTree DirectiveTree::parse(const TokenStream &Code) { |
180 | 9.56k | DirectiveTree Result; |
181 | 9.56k | DirectiveParser(Code).parse(&Result); |
182 | 9.56k | return Result; |
183 | 9.56k | } |
184 | | |
185 | | // Define operator<< in terms of dump() functions above. |
186 | | #define OSTREAM_DUMP(Type) \ |
187 | 0 | llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Type &T) { \ |
188 | 0 | Dumper{OS}(T); \ |
189 | 0 | return OS; \ |
190 | 0 | } Unexecuted instantiation: clang::pseudo::operator<<(llvm::raw_ostream&, clang::pseudo::DirectiveTree const&) Unexecuted instantiation: clang::pseudo::operator<<(llvm::raw_ostream&, clang::pseudo::DirectiveTree::Directive const&) Unexecuted instantiation: clang::pseudo::operator<<(llvm::raw_ostream&, clang::pseudo::DirectiveTree::Conditional const&) Unexecuted instantiation: clang::pseudo::operator<<(llvm::raw_ostream&, clang::pseudo::DirectiveTree::Code const&) |
191 | | OSTREAM_DUMP(DirectiveTree) |
192 | | OSTREAM_DUMP(DirectiveTree::Directive) |
193 | | OSTREAM_DUMP(DirectiveTree::Conditional) |
194 | | OSTREAM_DUMP(DirectiveTree::Code) |
195 | | #undef OSTREAM_DUMP |
196 | | |
197 | | namespace { |
198 | | // Makes choices about conditional branches. |
199 | | // |
200 | | // Generally it tries to maximize the amount of useful code we see. |
201 | | // |
202 | | // Caveat: each conditional is evaluated independently. Consider this code: |
203 | | // #ifdef WINDOWS |
204 | | // bool isWindows = true; |
205 | | // #endif |
206 | | // #ifndef WINDOWS |
207 | | // bool isWindows = false; |
208 | | // #endif |
209 | | // We take both branches and define isWindows twice. We could track more state |
210 | | // in order to produce a consistent view, but this is complex. |
211 | | class BranchChooser { |
212 | | public: |
213 | 9.56k | BranchChooser(const TokenStream &Code) : Code(Code) {} |
214 | | |
215 | | // Describes code seen by making particular branch choices. Higher is better. |
216 | | struct Score { |
217 | | int Tokens = 0; // excluding comments and directives |
218 | | int Directives = 0; |
219 | | int Errors = 0; // #error directives |
220 | | |
221 | 7.48k | bool operator>(const Score &Other) const { |
222 | | // Seeing errors is bad, other things are good. |
223 | 7.48k | return std::make_tuple(-Errors, Tokens, Directives) > |
224 | 7.48k | std::make_tuple(-Other.Errors, Other.Tokens, Other.Directives); |
225 | 7.48k | } |
226 | | |
227 | 259k | Score &operator+=(const Score &Other) { |
228 | 259k | Tokens += Other.Tokens; |
229 | 259k | Directives += Other.Directives; |
230 | 259k | Errors += Other.Errors; |
231 | 259k | return *this; |
232 | 259k | } |
233 | | }; |
234 | | |
235 | 109k | Score operator()(DirectiveTree::Code &C) { |
236 | 109k | Score S; |
237 | 109k | for (const Token &T : Code.tokens(C.Tokens)) |
238 | 50.4M | if (T.Kind != tok::comment) |
239 | 50.4M | ++S.Tokens; |
240 | 109k | return S; |
241 | 109k | } |
242 | | |
243 | 133k | Score operator()(DirectiveTree::Directive &D) { |
244 | 133k | Score S; |
245 | 133k | S.Directives = 1; |
246 | 133k | S.Errors = D.Kind == tok::pp_error; |
247 | 133k | return S; |
248 | 133k | } |
249 | | |
250 | 16.6k | Score operator()(DirectiveTree::Conditional &C) { |
251 | 16.6k | Score Best; |
252 | 16.6k | bool MayTakeTrivial = true; |
253 | 16.6k | bool TookTrivial = false; |
254 | | |
255 | 41.2k | for (unsigned I = 0; I < C.Branches.size(); ++I) { |
256 | | // Walk the branch to make its nested choices in any case. |
257 | 24.6k | Score BranchScore = walk(C.Branches[I].second); |
258 | | // If we already took an #if 1, don't consider any other branches. |
259 | 24.6k | if (TookTrivial) |
260 | 427 | continue; |
261 | | // Is this a trivial #if 0 or #if 1? |
262 | 24.2k | if (auto TriviallyTaken = isTakenWhenReached(C.Branches[I].first)) { |
263 | 3.17k | if (!*TriviallyTaken) |
264 | 580 | continue; // Don't consider #if 0 even if it scores well. |
265 | 2.59k | if (MayTakeTrivial) |
266 | 2.21k | TookTrivial = true; |
267 | 21.0k | } else { |
268 | | // After a nontrivial condition, #elif 1 isn't guaranteed taken. |
269 | 21.0k | MayTakeTrivial = false; |
270 | 21.0k | } |
271 | | // Is this the best branch so far? (Including if it's #if 1). |
272 | 23.6k | if (TookTrivial || !C.Taken || BranchScore > Best) { |
273 | 17.8k | Best = BranchScore; |
274 | 17.8k | C.Taken = I; |
275 | 17.8k | } |
276 | 23.6k | } |
277 | 16.6k | return Best; |
278 | 16.6k | } |
279 | 34.2k | Score walk(DirectiveTree &M) { |
280 | 34.2k | Score S; |
281 | 34.2k | for (auto &C : M.Chunks) |
282 | 259k | S += std::visit(*this, C); |
283 | 34.2k | return S; |
284 | 34.2k | } |
285 | | |
286 | | private: |
287 | | // Return true if the directive starts an always-taken conditional branch, |
288 | | // false if the branch is never taken, and std::nullopt otherwise. |
289 | 24.2k | std::optional<bool> isTakenWhenReached(const DirectiveTree::Directive &Dir) { |
290 | 24.2k | switch (Dir.Kind) { |
291 | 15.9k | case clang::tok::pp_if: |
292 | 21.8k | case clang::tok::pp_elif: |
293 | 21.8k | break; // handled below |
294 | 382 | case clang::tok::pp_else: |
295 | 382 | return true; |
296 | 1.94k | default: // #ifdef etc |
297 | 1.94k | return std::nullopt; |
298 | 24.2k | } |
299 | | |
300 | 21.8k | const auto &Tokens = Code.tokens(Dir.Tokens); |
301 | 21.8k | assert(!Tokens.empty() && Tokens.front().Kind == tok::hash); |
302 | 0 | const Token &Name = Tokens.front().nextNC(); |
303 | 21.8k | const Token &Value = Name.nextNC(); |
304 | | // Does the condition consist of exactly one token? |
305 | 21.8k | if (&Value >= Tokens.end() || &Value.nextNC() < Tokens.end()) |
306 | 17.2k | return std::nullopt; |
307 | 4.60k | return llvm::StringSwitch<std::optional<bool>>(Value.text()) |
308 | 4.60k | .Cases("true", "1", true) |
309 | 4.60k | .Cases("false", "0", false) |
310 | 4.60k | .Default(std::nullopt); |
311 | 21.8k | } |
312 | | |
313 | | const TokenStream &Code; |
314 | | }; |
315 | | |
316 | | } // namespace |
317 | | |
318 | 9.56k | void chooseConditionalBranches(DirectiveTree &Tree, const TokenStream &Code) { |
319 | 9.56k | BranchChooser{Code}.walk(Tree); |
320 | 9.56k | } |
321 | | |
322 | | namespace { |
323 | | class Preprocessor { |
324 | | const TokenStream &In; |
325 | | TokenStream &Out; |
326 | | |
327 | | public: |
328 | 0 | Preprocessor(const TokenStream &In, TokenStream &Out) : In(In), Out(Out) {} |
329 | 0 | ~Preprocessor() { Out.finalize(); } |
330 | | |
331 | 0 | void walk(const DirectiveTree &T) { |
332 | 0 | for (const auto &C : T.Chunks) |
333 | 0 | std::visit(*this, C); |
334 | 0 | } |
335 | | |
336 | 0 | void operator()(const DirectiveTree::Code &C) { |
337 | 0 | for (const auto &Tok : In.tokens(C.Tokens)) |
338 | 0 | Out.push(Tok); |
339 | 0 | } |
340 | | |
341 | 0 | void operator()(const DirectiveTree::Directive &) {} |
342 | | |
343 | 0 | void operator()(const DirectiveTree::Conditional &C) { |
344 | 0 | if (C.Taken) |
345 | 0 | walk(C.Branches[*C.Taken].second); |
346 | 0 | } |
347 | | }; |
348 | | } // namespace |
349 | | |
350 | 0 | TokenStream DirectiveTree::stripDirectives(const TokenStream &In) const { |
351 | 0 | TokenStream Out; |
352 | 0 | Preprocessor(In, Out).walk(*this); |
353 | 0 | return Out; |
354 | 0 | } |
355 | | |
356 | | } // namespace pseudo |
357 | | } // namespace clang |