Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/clang/lib/Format/FormatTokenSource.h
Line
Count
Source (jump to first uncovered line)
1
//===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file defines the \c FormatTokenSource interface, which provides a token
11
/// stream as well as the ability to manipulate the token stream.
12
///
13
//===----------------------------------------------------------------------===//
14
15
#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
16
#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
17
18
#include "FormatToken.h"
19
#include "UnwrappedLineParser.h"
20
#include "llvm/ADT/DenseMap.h"
21
#include <cstddef>
22
23
#define DEBUG_TYPE "format-token-source"
24
25
namespace clang {
26
namespace format {
27
28
// Navigate a token stream.
29
//
30
// Enables traversal of a token stream, resetting the position in a token
31
// stream, as well as inserting new tokens.
32
class FormatTokenSource {
33
public:
34
254k
  virtual ~FormatTokenSource() {}
35
36
  // Returns the next token in the token stream.
37
  virtual FormatToken *getNextToken() = 0;
38
39
  // Returns the token preceding the token returned by the last call to
40
  // getNextToken() in the token stream, or nullptr if no such token exists.
41
  //
42
  // Must not be called directly at the position directly after insertTokens()
43
  // is called.
44
  virtual FormatToken *getPreviousToken() = 0;
45
46
  // Returns the token that would be returned by the next call to
47
  // getNextToken().
48
  virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
49
50
  // Returns whether we are at the end of the file.
51
  // This can be different from whether getNextToken() returned an eof token
52
  // when the FormatTokenSource is a view on a part of the token stream.
53
  virtual bool isEOF() = 0;
54
55
  // Gets the current position in the token stream, to be used by setPosition().
56
  //
57
  // Note that the value of the position is not meaningful, and specifically
58
  // should not be used to get relative token positions.
59
  virtual unsigned getPosition() = 0;
60
61
  // Resets the token stream to the state it was in when getPosition() returned
62
  // Position, and return the token at that position in the stream.
63
  virtual FormatToken *setPosition(unsigned Position) = 0;
64
65
  // Insert the given tokens before the current position.
66
  // Returns the first token in \c Tokens.
67
  // The next returned token will be the second token in \c Tokens.
68
  // Requires the last token in Tokens to be EOF; once the EOF token is reached,
69
  // the next token will be the last token returned by getNextToken();
70
  //
71
  // For example, given the token sequence 'a1 a2':
72
  // getNextToken() -> a1
73
  // insertTokens('b1 b2') -> b1
74
  // getNextToken() -> b2
75
  // getNextToken() -> a1
76
  // getNextToken() -> a2
77
  virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0;
78
};
79
80
class IndexedTokenSource : public FormatTokenSource {
81
public:
82
  IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
83
1.01k
      : Tokens(Tokens), Position(-1) {}
84
85
154M
  FormatToken *getNextToken() override {
86
154M
    if (Position >= 0 && isEOF()) {
87
0
      LLVM_DEBUG({
88
0
        llvm::dbgs() << "Next ";
89
0
        dbgToken(Position);
90
0
      });
91
0
      return Tokens[Position];
92
0
    }
93
154M
    Position = successor(Position);
94
154M
    LLVM_DEBUG({
95
154M
      llvm::dbgs() << "Next ";
96
154M
      dbgToken(Position);
97
154M
    });
98
154M
    return Tokens[Position];
99
154M
  }
100
101
186
  FormatToken *getPreviousToken() override {
102
186
    assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof));
103
186
    return Position > 0 ? Tokens[Position - 1] : nullptr;
104
186
  }
105
106
1.40M
  FormatToken *peekNextToken(bool SkipComment = false) override {
107
1.40M
    if (isEOF())
108
0
      return Tokens[Position];
109
1.40M
    int Next = successor(Position);
110
1.40M
    if (SkipComment)
111
25.6k
      while (Tokens[Next]->is(tok::comment))
112
279
        Next = successor(Next);
113
1.40M
    LLVM_DEBUG({
114
1.40M
      llvm::dbgs() << "Peeking ";
115
1.40M
      dbgToken(Next);
116
1.40M
    });
117
1.40M
    return Tokens[Next];
118
1.40M
  }
119
120
155M
  bool isEOF() override {
121
155M
    return Position == -1 ? false : Tokens[Position]->is(tok::eof);
122
155M
  }
123
124
257k
  unsigned getPosition() override {
125
257k
    LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
126
257k
    assert(Position >= 0);
127
0
    return Position;
128
257k
  }
129
130
257k
  FormatToken *setPosition(unsigned P) override {
131
257k
    LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
132
257k
    Position = P;
133
257k
    return Tokens[Position];
134
257k
  }
135
136
0
  FormatToken *insertTokens(ArrayRef<FormatToken *> New) override {
137
0
    assert(Position != -1);
138
0
    assert((*New.rbegin())->Tok.is(tok::eof));
139
0
    int Next = Tokens.size();
140
0
    Tokens.append(New.begin(), New.end());
141
0
    LLVM_DEBUG({
142
0
      llvm::dbgs() << "Inserting:\n";
143
0
      for (int I = Next, E = Tokens.size(); I != E; ++I)
144
0
        dbgToken(I, "  ");
145
0
      llvm::dbgs() << "  Jump from: " << (Tokens.size() - 1) << " -> "
146
0
                   << Position << "\n";
147
0
    });
148
0
    Jumps[Tokens.size() - 1] = Position;
149
0
    Position = Next;
150
0
    LLVM_DEBUG({
151
0
      llvm::dbgs() << "At inserted token ";
152
0
      dbgToken(Position);
153
0
    });
154
0
    return Tokens[Position];
155
0
  }
156
157
1.05k
  void reset() { Position = -1; }
158
159
private:
160
155M
  int successor(int Current) const {
161
155M
    int Next = Current + 1;
162
155M
    auto it = Jumps.find(Next);
163
155M
    if (it != Jumps.end()) {
164
0
      Next = it->second;
165
0
      assert(!Jumps.contains(Next));
166
0
    }
167
0
    return Next;
168
155M
  }
169
170
0
  void dbgToken(int Position, llvm::StringRef Indent = "") {
171
0
    FormatToken *Tok = Tokens[Position];
172
0
    llvm::dbgs() << Indent << "[" << Position
173
0
                 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
174
0
                 << ", Macro: " << !!Tok->MacroCtx << "\n";
175
0
  }
176
177
  SmallVector<FormatToken *> Tokens;
178
  int Position;
179
180
  // Maps from position a to position b, so that when we reach a, the token
181
  // stream continues at position b instead.
182
  llvm::DenseMap<int, int> Jumps;
183
};
184
185
class ScopedMacroState : public FormatTokenSource {
186
public:
187
  ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
188
                   FormatToken *&ResetToken)
189
      : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
190
        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
191
253k
        Token(nullptr), PreviousToken(nullptr) {
192
253k
    FakeEOF.Tok.startToken();
193
253k
    FakeEOF.Tok.setKind(tok::eof);
194
253k
    TokenSource = this;
195
253k
    Line.Level = 0;
196
253k
    Line.InPPDirective = true;
197
    // InMacroBody gets set after the `#define x` part.
198
253k
  }
199
200
253k
  ~ScopedMacroState() override {
201
253k
    TokenSource = PreviousTokenSource;
202
253k
    ResetToken = Token;
203
253k
    Line.InPPDirective = false;
204
253k
    Line.InMacroBody = false;
205
253k
    Line.Level = PreviousLineLevel;
206
253k
  }
207
208
871k
  FormatToken *getNextToken() override {
209
    // The \c UnwrappedLineParser guards against this by never calling
210
    // \c getNextToken() after it has encountered the first eof token.
211
871k
    assert(!eof());
212
0
    PreviousToken = Token;
213
871k
    Token = PreviousTokenSource->getNextToken();
214
871k
    if (eof())
215
253k
      return &FakeEOF;
216
618k
    return Token;
217
871k
  }
218
219
0
  FormatToken *getPreviousToken() override {
220
0
    return PreviousTokenSource->getPreviousToken();
221
0
  }
222
223
4.15k
  FormatToken *peekNextToken(bool SkipComment) override {
224
4.15k
    if (eof())
225
0
      return &FakeEOF;
226
4.15k
    return PreviousTokenSource->peekNextToken(SkipComment);
227
4.15k
  }
228
229
0
  bool isEOF() override { return PreviousTokenSource->isEOF(); }
230
231
0
  unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
232
233
0
  FormatToken *setPosition(unsigned Position) override {
234
0
    PreviousToken = nullptr;
235
0
    Token = PreviousTokenSource->setPosition(Position);
236
0
    return Token;
237
0
  }
238
239
0
  FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override {
240
0
    llvm_unreachable("Cannot insert tokens while parsing a macro.");
241
0
    return nullptr;
242
0
  }
243
244
private:
245
1.74M
  bool eof() {
246
1.74M
    return Token && Token->HasUnescapedNewline &&
247
1.74M
           !continuesLineComment(*Token, PreviousToken,
248
253k
                                 /*MinColumnToken=*/PreviousToken);
249
1.74M
  }
250
251
  FormatToken FakeEOF;
252
  UnwrappedLine &Line;
253
  FormatTokenSource *&TokenSource;
254
  FormatToken *&ResetToken;
255
  unsigned PreviousLineLevel;
256
  FormatTokenSource *PreviousTokenSource;
257
258
  FormatToken *Token;
259
  FormatToken *PreviousToken;
260
};
261
262
} // namespace format
263
} // namespace clang
264
265
#undef DEBUG_TYPE
266
267
#endif