/src/llvm-project/clang/lib/Format/FormatTokenSource.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file defines the \c FormatTokenSource interface, which provides a token |
11 | | /// stream as well as the ability to manipulate the token stream. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H |
16 | | #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H |
17 | | |
18 | | #include "FormatToken.h" |
19 | | #include "UnwrappedLineParser.h" |
20 | | #include "llvm/ADT/DenseMap.h" |
21 | | #include <cstddef> |
22 | | |
23 | | #define DEBUG_TYPE "format-token-source" |
24 | | |
25 | | namespace clang { |
26 | | namespace format { |
27 | | |
28 | | // Navigate a token stream. |
29 | | // |
30 | | // Enables traversal of a token stream, resetting the position in a token |
31 | | // stream, as well as inserting new tokens. |
32 | | class FormatTokenSource { |
33 | | public: |
34 | 254k | virtual ~FormatTokenSource() {} |
35 | | |
36 | | // Returns the next token in the token stream. |
37 | | virtual FormatToken *getNextToken() = 0; |
38 | | |
39 | | // Returns the token preceding the token returned by the last call to |
40 | | // getNextToken() in the token stream, or nullptr if no such token exists. |
41 | | // |
42 | | // Must not be called directly at the position directly after insertTokens() |
43 | | // is called. |
44 | | virtual FormatToken *getPreviousToken() = 0; |
45 | | |
46 | | // Returns the token that would be returned by the next call to |
47 | | // getNextToken(). |
48 | | virtual FormatToken *peekNextToken(bool SkipComment = false) = 0; |
49 | | |
50 | | // Returns whether we are at the end of the file. |
51 | | // This can be different from whether getNextToken() returned an eof token |
52 | | // when the FormatTokenSource is a view on a part of the token stream. |
53 | | virtual bool isEOF() = 0; |
54 | | |
55 | | // Gets the current position in the token stream, to be used by setPosition(). |
56 | | // |
57 | | // Note that the value of the position is not meaningful, and specifically |
58 | | // should not be used to get relative token positions. |
59 | | virtual unsigned getPosition() = 0; |
60 | | |
61 | | // Resets the token stream to the state it was in when getPosition() returned |
62 | | // Position, and return the token at that position in the stream. |
63 | | virtual FormatToken *setPosition(unsigned Position) = 0; |
64 | | |
65 | | // Insert the given tokens before the current position. |
66 | | // Returns the first token in \c Tokens. |
67 | | // The next returned token will be the second token in \c Tokens. |
68 | | // Requires the last token in Tokens to be EOF; once the EOF token is reached, |
69 | | // the next token will be the last token returned by getNextToken(); |
70 | | // |
71 | | // For example, given the token sequence 'a1 a2': |
72 | | // getNextToken() -> a1 |
73 | | // insertTokens('b1 b2') -> b1 |
74 | | // getNextToken() -> b2 |
75 | | // getNextToken() -> a1 |
76 | | // getNextToken() -> a2 |
77 | | virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0; |
78 | | }; |
79 | | |
80 | | class IndexedTokenSource : public FormatTokenSource { |
81 | | public: |
82 | | IndexedTokenSource(ArrayRef<FormatToken *> Tokens) |
83 | 1.01k | : Tokens(Tokens), Position(-1) {} |
84 | | |
85 | 154M | FormatToken *getNextToken() override { |
86 | 154M | if (Position >= 0 && isEOF()) { |
87 | 0 | LLVM_DEBUG({ |
88 | 0 | llvm::dbgs() << "Next "; |
89 | 0 | dbgToken(Position); |
90 | 0 | }); |
91 | 0 | return Tokens[Position]; |
92 | 0 | } |
93 | 154M | Position = successor(Position); |
94 | 154M | LLVM_DEBUG({ |
95 | 154M | llvm::dbgs() << "Next "; |
96 | 154M | dbgToken(Position); |
97 | 154M | }); |
98 | 154M | return Tokens[Position]; |
99 | 154M | } |
100 | | |
101 | 186 | FormatToken *getPreviousToken() override { |
102 | 186 | assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof)); |
103 | 186 | return Position > 0 ? Tokens[Position - 1] : nullptr; |
104 | 186 | } |
105 | | |
106 | 1.40M | FormatToken *peekNextToken(bool SkipComment = false) override { |
107 | 1.40M | if (isEOF()) |
108 | 0 | return Tokens[Position]; |
109 | 1.40M | int Next = successor(Position); |
110 | 1.40M | if (SkipComment) |
111 | 25.6k | while (Tokens[Next]->is(tok::comment)) |
112 | 279 | Next = successor(Next); |
113 | 1.40M | LLVM_DEBUG({ |
114 | 1.40M | llvm::dbgs() << "Peeking "; |
115 | 1.40M | dbgToken(Next); |
116 | 1.40M | }); |
117 | 1.40M | return Tokens[Next]; |
118 | 1.40M | } |
119 | | |
120 | 155M | bool isEOF() override { |
121 | 155M | return Position == -1 ? false : Tokens[Position]->is(tok::eof); |
122 | 155M | } |
123 | | |
124 | 257k | unsigned getPosition() override { |
125 | 257k | LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); |
126 | 257k | assert(Position >= 0); |
127 | 0 | return Position; |
128 | 257k | } |
129 | | |
130 | 257k | FormatToken *setPosition(unsigned P) override { |
131 | 257k | LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); |
132 | 257k | Position = P; |
133 | 257k | return Tokens[Position]; |
134 | 257k | } |
135 | | |
136 | 0 | FormatToken *insertTokens(ArrayRef<FormatToken *> New) override { |
137 | 0 | assert(Position != -1); |
138 | 0 | assert((*New.rbegin())->Tok.is(tok::eof)); |
139 | 0 | int Next = Tokens.size(); |
140 | 0 | Tokens.append(New.begin(), New.end()); |
141 | 0 | LLVM_DEBUG({ |
142 | 0 | llvm::dbgs() << "Inserting:\n"; |
143 | 0 | for (int I = Next, E = Tokens.size(); I != E; ++I) |
144 | 0 | dbgToken(I, " "); |
145 | 0 | llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> " |
146 | 0 | << Position << "\n"; |
147 | 0 | }); |
148 | 0 | Jumps[Tokens.size() - 1] = Position; |
149 | 0 | Position = Next; |
150 | 0 | LLVM_DEBUG({ |
151 | 0 | llvm::dbgs() << "At inserted token "; |
152 | 0 | dbgToken(Position); |
153 | 0 | }); |
154 | 0 | return Tokens[Position]; |
155 | 0 | } |
156 | | |
157 | 1.05k | void reset() { Position = -1; } |
158 | | |
159 | | private: |
160 | 155M | int successor(int Current) const { |
161 | 155M | int Next = Current + 1; |
162 | 155M | auto it = Jumps.find(Next); |
163 | 155M | if (it != Jumps.end()) { |
164 | 0 | Next = it->second; |
165 | 0 | assert(!Jumps.contains(Next)); |
166 | 0 | } |
167 | 0 | return Next; |
168 | 155M | } |
169 | | |
170 | 0 | void dbgToken(int Position, llvm::StringRef Indent = "") { |
171 | 0 | FormatToken *Tok = Tokens[Position]; |
172 | 0 | llvm::dbgs() << Indent << "[" << Position |
173 | 0 | << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText |
174 | 0 | << ", Macro: " << !!Tok->MacroCtx << "\n"; |
175 | 0 | } |
176 | | |
177 | | SmallVector<FormatToken *> Tokens; |
178 | | int Position; |
179 | | |
180 | | // Maps from position a to position b, so that when we reach a, the token |
181 | | // stream continues at position b instead. |
182 | | llvm::DenseMap<int, int> Jumps; |
183 | | }; |
184 | | |
185 | | class ScopedMacroState : public FormatTokenSource { |
186 | | public: |
187 | | ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, |
188 | | FormatToken *&ResetToken) |
189 | | : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), |
190 | | PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), |
191 | 253k | Token(nullptr), PreviousToken(nullptr) { |
192 | 253k | FakeEOF.Tok.startToken(); |
193 | 253k | FakeEOF.Tok.setKind(tok::eof); |
194 | 253k | TokenSource = this; |
195 | 253k | Line.Level = 0; |
196 | 253k | Line.InPPDirective = true; |
197 | | // InMacroBody gets set after the `#define x` part. |
198 | 253k | } |
199 | | |
200 | 253k | ~ScopedMacroState() override { |
201 | 253k | TokenSource = PreviousTokenSource; |
202 | 253k | ResetToken = Token; |
203 | 253k | Line.InPPDirective = false; |
204 | 253k | Line.InMacroBody = false; |
205 | 253k | Line.Level = PreviousLineLevel; |
206 | 253k | } |
207 | | |
208 | 871k | FormatToken *getNextToken() override { |
209 | | // The \c UnwrappedLineParser guards against this by never calling |
210 | | // \c getNextToken() after it has encountered the first eof token. |
211 | 871k | assert(!eof()); |
212 | 0 | PreviousToken = Token; |
213 | 871k | Token = PreviousTokenSource->getNextToken(); |
214 | 871k | if (eof()) |
215 | 253k | return &FakeEOF; |
216 | 618k | return Token; |
217 | 871k | } |
218 | | |
219 | 0 | FormatToken *getPreviousToken() override { |
220 | 0 | return PreviousTokenSource->getPreviousToken(); |
221 | 0 | } |
222 | | |
223 | 4.15k | FormatToken *peekNextToken(bool SkipComment) override { |
224 | 4.15k | if (eof()) |
225 | 0 | return &FakeEOF; |
226 | 4.15k | return PreviousTokenSource->peekNextToken(SkipComment); |
227 | 4.15k | } |
228 | | |
229 | 0 | bool isEOF() override { return PreviousTokenSource->isEOF(); } |
230 | | |
231 | 0 | unsigned getPosition() override { return PreviousTokenSource->getPosition(); } |
232 | | |
233 | 0 | FormatToken *setPosition(unsigned Position) override { |
234 | 0 | PreviousToken = nullptr; |
235 | 0 | Token = PreviousTokenSource->setPosition(Position); |
236 | 0 | return Token; |
237 | 0 | } |
238 | | |
239 | 0 | FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override { |
240 | 0 | llvm_unreachable("Cannot insert tokens while parsing a macro."); |
241 | 0 | return nullptr; |
242 | 0 | } |
243 | | |
244 | | private: |
245 | 1.74M | bool eof() { |
246 | 1.74M | return Token && Token->HasUnescapedNewline && |
247 | 1.74M | !continuesLineComment(*Token, PreviousToken, |
248 | 253k | /*MinColumnToken=*/PreviousToken); |
249 | 1.74M | } |
250 | | |
251 | | FormatToken FakeEOF; |
252 | | UnwrappedLine &Line; |
253 | | FormatTokenSource *&TokenSource; |
254 | | FormatToken *&ResetToken; |
255 | | unsigned PreviousLineLevel; |
256 | | FormatTokenSource *PreviousTokenSource; |
257 | | |
258 | | FormatToken *Token; |
259 | | FormatToken *PreviousToken; |
260 | | }; |
261 | | |
262 | | } // namespace format |
263 | | } // namespace clang |
264 | | |
265 | | #undef DEBUG_TYPE |
266 | | |
267 | | #endif |