Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang-tools-extra/pseudo/lib/DirectiveTree.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- DirectiveTree.cpp - Find and strip preprocessor directives -------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "clang-pseudo/DirectiveTree.h"
10
#include "clang/Basic/IdentifierTable.h"
11
#include "clang/Basic/TokenKinds.h"
12
#include "llvm/Support/FormatVariadic.h"
13
#include <optional>
14
#include <variant>
15
16
namespace clang {
17
namespace pseudo {
18
namespace {
19
20
class DirectiveParser {
21
public:
22
  explicit DirectiveParser(const TokenStream &Code)
23
9.56k
      : Code(Code), Tok(&Code.front()) {}
24
9.56k
  void parse(DirectiveTree *Result) { parse(Result, /*TopLevel=*/true); }
25
26
private:
27
  // Roles that a directive might take within a conditional block.
28
  enum class Cond { None, If, Else, End };
29
175k
  static Cond classifyDirective(tok::PPKeywordKind K) {
30
175k
    switch (K) {
31
15.9k
    case clang::tok::pp_if:
32
16.2k
    case clang::tok::pp_ifdef:
33
16.6k
    case clang::tok::pp_ifndef:
34
16.6k
      return Cond::If;
35
18.9k
    case clang::tok::pp_elif:
36
22.0k
    case clang::tok::pp_elifdef:
37
23.2k
    case clang::tok::pp_elifndef:
38
25.6k
    case clang::tok::pp_else:
39
25.6k
      return Cond::Else;
40
784
    case clang::tok::pp_endif:
41
784
      return Cond::End;
42
132k
    default:
43
132k
      return Cond::None;
44
175k
    }
45
175k
  }
46
47
  // Parses tokens starting at Tok into Tree.
48
  // If we reach an End or Else directive that ends Tree, returns it.
49
  // If TopLevel is true, then we do not expect End and always return
50
  // std::nullopt.
51
  std::optional<DirectiveTree::Directive> parse(DirectiveTree *Tree,
52
34.0k
                                                bool TopLevel) {
53
34.0k
    auto StartsDirective =
54
50.7M
        [&, AllowDirectiveAt((const Token *)nullptr)]() mutable {
55
50.7M
          if (Tok->flag(LexFlags::StartsPPLine)) {
56
            // If we considered a comment at the start of a PP-line, it doesn't
57
            // start a directive but the directive can still start after it.
58
1.27M
            if (Tok->Kind == tok::comment)
59
17.1k
              AllowDirectiveAt = Tok + 1;
60
1.27M
            return Tok->Kind == tok::hash;
61
1.27M
          }
62
49.4M
          return Tok->Kind == tok::hash && AllowDirectiveAt == Tok;
63
50.7M
        };
64
    // Each iteration adds one chunk (or returns, if we see #endif).
65
293k
    while (Tok->Kind != tok::eof) {
66
      // If there's no directive here, we have a code chunk.
67
268k
      if (!StartsDirective()) {
68
109k
        const Token *Start = Tok;
69
109k
        do
70
50.4M
          ++Tok;
71
50.4M
        while (Tok->Kind != tok::eof && !StartsDirective());
72
109k
        Tree->Chunks.push_back(DirectiveTree::Code{
73
109k
            Token::Range{Code.index(*Start), Code.index(*Tok)}});
74
109k
        continue;
75
109k
      }
76
77
      // We have some kind of directive.
78
158k
      DirectiveTree::Directive Directive;
79
158k
      parseDirective(&Directive);
80
158k
      Cond Kind = classifyDirective(Directive.Kind);
81
158k
      if (Kind == Cond::If) {
82
        // #if or similar, starting a nested conditional block.
83
16.6k
        DirectiveTree::Conditional Conditional;
84
16.6k
        Conditional.Branches.emplace_back();
85
16.6k
        Conditional.Branches.back().first = std::move(Directive);
86
16.6k
        parseConditional(&Conditional);
87
16.6k
        Tree->Chunks.push_back(std::move(Conditional));
88
142k
      } else if ((Kind == Cond::Else || Kind == Cond::End) && !TopLevel) {
89
        // #endif or similar, ending this PStructure scope.
90
        // (#endif is unexpected at the top level, treat as simple directive).
91
8.36k
        return std::move(Directive);
92
133k
      } else {
93
        // #define or similar, a simple directive at the current scope.
94
133k
        Tree->Chunks.push_back(std::move(Directive));
95
133k
      }
96
158k
    }
97
25.6k
    return std::nullopt;
98
34.0k
  }
99
100
  // Parse the rest of a conditional section, after seeing the If directive.
101
  // Returns after consuming the End directive.
102
16.6k
  void parseConditional(DirectiveTree::Conditional *C) {
103
16.6k
    assert(C->Branches.size() == 1 &&
104
16.6k
           C->Branches.front().second.Chunks.empty() &&
105
16.6k
           "Should be ready to parse first branch body");
106
24.6k
    while (Tok->Kind != tok::eof) {
107
24.4k
      auto Terminator = parse(&C->Branches.back().second, /*TopLevel=*/false);
108
24.4k
      if (!Terminator) {
109
16.1k
        assert(Tok->Kind == tok::eof && "gave up parsing before eof?");
110
0
        C->End.Tokens = Token::Range::emptyAt(Code.index(*Tok));
111
16.1k
        return;
112
16.1k
      }
113
8.36k
      if (classifyDirective(Terminator->Kind) == Cond::End) {
114
358
        C->End = std::move(*Terminator);
115
358
        return;
116
358
      }
117
8.01k
      assert(classifyDirective(Terminator->Kind) == Cond::Else &&
118
8.01k
             "ended branch unexpectedly");
119
0
      C->Branches.emplace_back();
120
8.01k
      C->Branches.back().first = std::move(*Terminator);
121
8.01k
    }
122
16.6k
  }
123
124
  // Parse a directive. Tok is the hash.
125
158k
  void parseDirective(DirectiveTree::Directive *D) {
126
158k
    assert(Tok->Kind == tok::hash);
127
128
    // Directive spans from the hash until the end of line or file.
129
0
    const Token *Begin = Tok++;
130
2.44M
    while (Tok->Kind != tok::eof && !Tok->flag(LexFlags::StartsPPLine))
131
2.28M
      ++Tok;
132
158k
    ArrayRef<Token> Tokens{Begin, Tok};
133
158k
    D->Tokens = {Code.index(*Tokens.begin()), Code.index(*Tokens.end())};
134
135
    // Directive name is the first non-comment token after the hash.
136
158k
    Tokens = Tokens.drop_front().drop_while(
137
158k
        [](const Token &T) { return T.Kind == tok::comment; });
138
158k
    if (!Tokens.empty())
139
80.6k
      D->Kind = PPKeywords.get(Tokens.front().text()).getPPKeywordID();
140
158k
  }
141
142
  const TokenStream &Code;
143
  const Token *Tok;
144
  clang::IdentifierTable PPKeywords;
145
};
146
147
struct Dumper {
148
  llvm::raw_ostream &OS;
149
  unsigned Indent = 0;
150
151
0
  Dumper(llvm::raw_ostream& OS) : OS(OS) {}
152
0
  void operator()(const DirectiveTree& Tree) {
153
0
    for (const auto& Chunk : Tree.Chunks)
154
0
      std::visit(*this, Chunk);
155
0
  }
156
0
  void operator()(const DirectiveTree::Conditional &Conditional) {
157
0
    for (unsigned I = 0; I < Conditional.Branches.size(); ++I) {
158
0
      const auto &Branch = Conditional.Branches[I];
159
0
      (*this)(Branch.first, Conditional.Taken == I);
160
0
      Indent += 2;
161
0
      (*this)(Branch.second);
162
0
      Indent -= 2;
163
0
    }
164
0
    (*this)(Conditional.End);
165
0
  }
166
  void operator()(const DirectiveTree::Directive &Directive,
167
0
                  bool Taken = false) {
168
0
    OS.indent(Indent) << llvm::formatv(
169
0
        "#{0} ({1} tokens){2}\n", tok::getPPKeywordSpelling(Directive.Kind),
170
0
        Directive.Tokens.size(), Taken ? " TAKEN" : "");
171
0
  }
172
0
  void operator()(const DirectiveTree::Code &Code) {
173
0
    OS.indent(Indent) << llvm::formatv("code ({0} tokens)\n",
174
0
                                       Code.Tokens.size());
175
0
  }
176
};
177
} // namespace
178
179
9.56k
DirectiveTree DirectiveTree::parse(const TokenStream &Code) {
180
9.56k
  DirectiveTree Result;
181
9.56k
  DirectiveParser(Code).parse(&Result);
182
9.56k
  return Result;
183
9.56k
}
184
185
// Define operator<< in terms of dump() functions above.
186
#define OSTREAM_DUMP(Type)                                                     \
187
0
  llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Type &T) {        \
188
0
    Dumper{OS}(T);                                                         \
189
0
    return OS;                                                                 \
190
0
  }
Unexecuted instantiation: clang::pseudo::operator<<(llvm::raw_ostream&, clang::pseudo::DirectiveTree const&)
Unexecuted instantiation: clang::pseudo::operator<<(llvm::raw_ostream&, clang::pseudo::DirectiveTree::Directive const&)
Unexecuted instantiation: clang::pseudo::operator<<(llvm::raw_ostream&, clang::pseudo::DirectiveTree::Conditional const&)
Unexecuted instantiation: clang::pseudo::operator<<(llvm::raw_ostream&, clang::pseudo::DirectiveTree::Code const&)
191
OSTREAM_DUMP(DirectiveTree)
192
OSTREAM_DUMP(DirectiveTree::Directive)
193
OSTREAM_DUMP(DirectiveTree::Conditional)
194
OSTREAM_DUMP(DirectiveTree::Code)
195
#undef OSTREAM_DUMP
196
197
namespace {
198
// Makes choices about conditional branches.
199
//
200
// Generally it tries to maximize the amount of useful code we see.
201
//
202
// Caveat: each conditional is evaluated independently. Consider this code:
203
//   #ifdef WINDOWS
204
//     bool isWindows = true;
205
//   #endif
206
//   #ifndef WINDOWS
207
//     bool isWindows = false;
208
//   #endif
209
// We take both branches and define isWindows twice. We could track more state
210
// in order to produce a consistent view, but this is complex.
211
class BranchChooser {
212
public:
213
9.56k
  BranchChooser(const TokenStream &Code) : Code(Code) {}
214
215
  // Describes code seen by making particular branch choices. Higher is better.
216
  struct Score {
217
    int Tokens = 0; // excluding comments and directives
218
    int Directives = 0;
219
    int Errors = 0; // #error directives
220
221
7.48k
    bool operator>(const Score &Other) const {
222
      // Seeing errors is bad, other things are good.
223
7.48k
      return std::make_tuple(-Errors, Tokens, Directives) >
224
7.48k
             std::make_tuple(-Other.Errors, Other.Tokens, Other.Directives);
225
7.48k
    }
226
227
259k
    Score &operator+=(const Score &Other) {
228
259k
      Tokens += Other.Tokens;
229
259k
      Directives += Other.Directives;
230
259k
      Errors += Other.Errors;
231
259k
      return *this;
232
259k
    }
233
  };
234
235
109k
  Score operator()(DirectiveTree::Code &C) {
236
109k
    Score S;
237
109k
    for (const Token &T : Code.tokens(C.Tokens))
238
50.4M
      if (T.Kind != tok::comment)
239
50.4M
        ++S.Tokens;
240
109k
    return S;
241
109k
  }
242
243
133k
  Score operator()(DirectiveTree::Directive &D) {
244
133k
    Score S;
245
133k
    S.Directives = 1;
246
133k
    S.Errors = D.Kind == tok::pp_error;
247
133k
    return S;
248
133k
  }
249
250
16.6k
  Score operator()(DirectiveTree::Conditional &C) {
251
16.6k
    Score Best;
252
16.6k
    bool MayTakeTrivial = true;
253
16.6k
    bool TookTrivial = false;
254
255
41.2k
    for (unsigned I = 0; I < C.Branches.size(); ++I) {
256
      // Walk the branch to make its nested choices in any case.
257
24.6k
      Score BranchScore = walk(C.Branches[I].second);
258
      // If we already took an #if 1, don't consider any other branches.
259
24.6k
      if (TookTrivial)
260
427
        continue;
261
      // Is this a trivial #if 0 or #if 1?
262
24.2k
      if (auto TriviallyTaken = isTakenWhenReached(C.Branches[I].first)) {
263
3.17k
        if (!*TriviallyTaken)
264
580
          continue; // Don't consider #if 0 even if it scores well.
265
2.59k
        if (MayTakeTrivial)
266
2.21k
          TookTrivial = true;
267
21.0k
      } else {
268
        // After a nontrivial condition, #elif 1 isn't guaranteed taken.
269
21.0k
        MayTakeTrivial = false;
270
21.0k
      }
271
      // Is this the best branch so far? (Including if it's #if 1).
272
23.6k
      if (TookTrivial || !C.Taken || BranchScore > Best) {
273
17.8k
        Best = BranchScore;
274
17.8k
        C.Taken = I;
275
17.8k
      }
276
23.6k
    }
277
16.6k
    return Best;
278
16.6k
  }
279
34.2k
  Score walk(DirectiveTree &M) {
280
34.2k
    Score S;
281
34.2k
    for (auto &C : M.Chunks)
282
259k
      S += std::visit(*this, C);
283
34.2k
    return S;
284
34.2k
  }
285
286
private:
287
  // Return true if the directive starts an always-taken conditional branch,
288
  // false if the branch is never taken, and std::nullopt otherwise.
289
24.2k
  std::optional<bool> isTakenWhenReached(const DirectiveTree::Directive &Dir) {
290
24.2k
    switch (Dir.Kind) {
291
15.9k
    case clang::tok::pp_if:
292
21.8k
    case clang::tok::pp_elif:
293
21.8k
      break; // handled below
294
382
    case clang::tok::pp_else:
295
382
      return true;
296
1.94k
    default: // #ifdef etc
297
1.94k
      return std::nullopt;
298
24.2k
    }
299
300
21.8k
    const auto &Tokens = Code.tokens(Dir.Tokens);
301
21.8k
    assert(!Tokens.empty() && Tokens.front().Kind == tok::hash);
302
0
    const Token &Name = Tokens.front().nextNC();
303
21.8k
    const Token &Value = Name.nextNC();
304
    // Does the condition consist of exactly one token?
305
21.8k
    if (&Value >= Tokens.end() || &Value.nextNC() < Tokens.end())
306
17.2k
      return std::nullopt;
307
4.60k
    return llvm::StringSwitch<std::optional<bool>>(Value.text())
308
4.60k
        .Cases("true", "1", true)
309
4.60k
        .Cases("false", "0", false)
310
4.60k
        .Default(std::nullopt);
311
21.8k
  }
312
313
  const TokenStream &Code;
314
};
315
316
} // namespace
317
318
9.56k
void chooseConditionalBranches(DirectiveTree &Tree, const TokenStream &Code) {
319
9.56k
  BranchChooser{Code}.walk(Tree);
320
9.56k
}
321
322
namespace {
323
class Preprocessor {
324
  const TokenStream &In;
325
  TokenStream &Out;
326
327
public:
328
0
  Preprocessor(const TokenStream &In, TokenStream &Out) : In(In), Out(Out) {}
329
0
  ~Preprocessor() { Out.finalize(); }
330
331
0
  void walk(const DirectiveTree &T) {
332
0
    for (const auto &C : T.Chunks)
333
0
      std::visit(*this, C);
334
0
  }
335
336
0
  void operator()(const DirectiveTree::Code &C) {
337
0
    for (const auto &Tok : In.tokens(C.Tokens))
338
0
      Out.push(Tok);
339
0
  }
340
341
0
  void operator()(const DirectiveTree::Directive &) {}
342
343
0
  void operator()(const DirectiveTree::Conditional &C) {
344
0
    if (C.Taken)
345
0
      walk(C.Branches[*C.Taken].second);
346
0
  }
347
};
348
} // namespace
349
350
0
TokenStream DirectiveTree::stripDirectives(const TokenStream &In) const {
351
0
  TokenStream Out;
352
0
  Preprocessor(In, Out).walk(*this);
353
0
  return Out;
354
0
}
355
356
} // namespace pseudo
357
} // namespace clang