/proc/self/cwd/external/antlr4-cpp-runtime~/runtime/src/BufferedTokenStream.cpp
Line | Count | Source |
1 | | /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. |
2 | | * Use of this file is governed by the BSD 3-clause license that |
3 | | * can be found in the LICENSE.txt file in the project root. |
4 | | */ |
5 | | |
6 | | #include "WritableToken.h" |
7 | | #include "Lexer.h" |
8 | | #include "RuleContext.h" |
9 | | #include "misc/Interval.h" |
10 | | #include "Exceptions.h" |
11 | | #include "support/CPPUtils.h" |
12 | | |
13 | | #include "BufferedTokenStream.h" |
14 | | |
15 | | using namespace antlr4; |
16 | | using namespace antlrcpp; |
17 | | |
18 | 4.20k | BufferedTokenStream::BufferedTokenStream(TokenSource *tokenSource) : _tokenSource(tokenSource){ |
19 | 4.20k | InitializeInstanceFields(); |
20 | 4.20k | } |
21 | | |
22 | 2.16k | TokenSource* BufferedTokenStream::getTokenSource() const { |
23 | 2.16k | return _tokenSource; |
24 | 2.16k | } |
25 | | |
26 | 19.6M | size_t BufferedTokenStream::index() { |
27 | 19.6M | return _p; |
28 | 19.6M | } |
29 | | |
30 | 17.4M | ssize_t BufferedTokenStream::mark() { |
31 | 17.4M | return 0; |
32 | 17.4M | } |
33 | | |
34 | 17.4M | void BufferedTokenStream::release(ssize_t /*marker*/) { |
35 | | // no resources to release |
36 | 17.4M | } |
37 | | |
38 | 0 | void BufferedTokenStream::reset() { |
39 | 0 | seek(0); |
40 | 0 | } |
41 | | |
42 | 19.2M | void BufferedTokenStream::seek(size_t index) { |
43 | 19.2M | lazyInit(); |
44 | 19.2M | _p = adjustSeekIndex(index); |
45 | 19.2M | } |
46 | | |
47 | 54.9M | size_t BufferedTokenStream::size() { |
48 | 54.9M | return _tokens.size(); |
49 | 54.9M | } |
50 | | |
51 | 10.3M | void BufferedTokenStream::consume() { |
52 | 10.3M | bool skipEofCheck = false; |
53 | 10.3M | if (!_needSetup) { |
54 | 10.3M | if (_fetchedEOF) { |
55 | | // the last token in tokens is EOF. skip check if p indexes any |
56 | | // fetched token except the last. |
57 | 83.7k | skipEofCheck = _p < _tokens.size() - 1; |
58 | 10.2M | } else { |
59 | | // no EOF token in tokens. skip check if p indexes a fetched token. |
60 | 10.2M | skipEofCheck = _p < _tokens.size(); |
61 | 10.2M | } |
62 | 10.3M | } else { |
63 | | // not yet initialized |
64 | 0 | skipEofCheck = false; |
65 | 0 | } |
66 | | |
67 | 10.3M | if (!skipEofCheck && LA(1) == Token::EOF) { |
68 | 0 | throw IllegalStateException("cannot consume EOF"); |
69 | 0 | } |
70 | | |
71 | 10.3M | if (sync(_p + 1)) { |
72 | 10.3M | _p = adjustSeekIndex(_p + 1); |
73 | 10.3M | } |
74 | 10.3M | } |
75 | | |
76 | 65.2M | bool BufferedTokenStream::sync(size_t i) { |
77 | 65.2M | if (i + 1 < _tokens.size()) |
78 | 29.9M | return true; |
79 | 35.2M | size_t n = i - _tokens.size() + 1; // how many more elements we need? |
80 | | |
81 | 35.2M | if (n > 0) { |
82 | 6.54M | size_t fetched = fetch(n); |
83 | 6.54M | return fetched >= n; |
84 | 6.54M | } |
85 | | |
86 | 28.7M | return true; |
87 | 35.2M | } |
88 | | |
89 | 6.54M | size_t BufferedTokenStream::fetch(size_t n) { |
90 | 6.54M | if (_fetchedEOF) { |
91 | 4.21k | return 0; |
92 | 4.21k | } |
93 | | |
94 | 6.53M | size_t i = 0; |
95 | 13.0M | while (i < n) { |
96 | 6.53M | std::unique_ptr<Token> t(_tokenSource->nextToken()); |
97 | | |
98 | 6.53M | if (is<WritableToken *>(t.get())) { |
99 | 6.53M | (static_cast<WritableToken *>(t.get()))->setTokenIndex(_tokens.size()); |
100 | 6.53M | } |
101 | | |
102 | 6.53M | _tokens.push_back(std::move(t)); |
103 | 6.53M | ++i; |
104 | | |
105 | 6.53M | if (_tokens.back()->getType() == Token::EOF) { |
106 | 4.02k | _fetchedEOF = true; |
107 | 4.02k | break; |
108 | 4.02k | } |
109 | 6.53M | } |
110 | | |
111 | 6.53M | return i; |
112 | 6.54M | } |
113 | | |
114 | 15.9k | Token* BufferedTokenStream::get(size_t i) const { |
115 | 15.9k | if (i >= _tokens.size()) { |
116 | 0 | throw IndexOutOfBoundsException(std::string("token index ") + |
117 | 0 | std::to_string(i) + |
118 | 0 | std::string(" out of range 0..") + |
119 | 0 | std::to_string(_tokens.size() - 1)); |
120 | 0 | } |
121 | 15.9k | return _tokens[i].get(); |
122 | 15.9k | } |
123 | | |
124 | 0 | std::vector<Token *> BufferedTokenStream::get(size_t start, size_t stop) { |
125 | 0 | std::vector<Token *> subset; |
126 | |
|
127 | 0 | lazyInit(); |
128 | |
|
129 | 0 | if (_tokens.empty()) { |
130 | 0 | return subset; |
131 | 0 | } |
132 | | |
133 | 0 | if (stop >= _tokens.size()) { |
134 | 0 | stop = _tokens.size() - 1; |
135 | 0 | } |
136 | 0 | for (size_t i = start; i <= stop; i++) { |
137 | 0 | Token *t = _tokens[i].get(); |
138 | 0 | if (t->getType() == Token::EOF) { |
139 | 0 | break; |
140 | 0 | } |
141 | 0 | subset.push_back(t); |
142 | 0 | } |
143 | 0 | return subset; |
144 | 0 | } |
145 | | |
146 | 54.0M | size_t BufferedTokenStream::LA(ssize_t i) { |
147 | 54.0M | return LT(i)->getType(); |
148 | 54.0M | } |
149 | | |
150 | 0 | Token* BufferedTokenStream::LB(size_t k) { |
151 | 0 | if (k > _p) { |
152 | 0 | return nullptr; |
153 | 0 | } |
154 | 0 | return _tokens[_p - k].get(); |
155 | 0 | } |
156 | | |
157 | 0 | Token* BufferedTokenStream::LT(ssize_t k) { |
158 | 0 | lazyInit(); |
159 | 0 | if (k == 0) { |
160 | 0 | return nullptr; |
161 | 0 | } |
162 | 0 | if (k < 0) { |
163 | 0 | return LB(-k); |
164 | 0 | } |
165 | | |
166 | 0 | size_t i = _p + k - 1; |
167 | 0 | sync(i); |
168 | 0 | if (i >= _tokens.size()) { // return EOF token |
169 | | // EOF must be last token |
170 | 0 | return _tokens.back().get(); |
171 | 0 | } |
172 | | |
173 | 0 | return _tokens[i].get(); |
174 | 0 | } |
175 | | |
176 | 0 | ssize_t BufferedTokenStream::adjustSeekIndex(size_t i) { |
177 | 0 | return i; |
178 | 0 | } |
179 | | |
180 | 129M | void BufferedTokenStream::lazyInit() { |
181 | 129M | if (_needSetup) { |
182 | 4.20k | setup(); |
183 | 4.20k | } |
184 | 129M | } |
185 | | |
186 | 4.20k | void BufferedTokenStream::setup() { |
187 | 4.20k | _needSetup = false; |
188 | 4.20k | sync(0); |
189 | 4.20k | _p = adjustSeekIndex(0); |
190 | 4.20k | } |
191 | | |
192 | 0 | void BufferedTokenStream::setTokenSource(TokenSource *tokenSource) { |
193 | 0 | _tokenSource = tokenSource; |
194 | 0 | _tokens.clear(); |
195 | 0 | _fetchedEOF = false; |
196 | 0 | _needSetup = true; |
197 | 0 | } |
198 | | |
199 | 0 | std::vector<Token *> BufferedTokenStream::getTokens() { |
200 | 0 | std::vector<Token *> result; |
201 | 0 | for (auto &t : _tokens) |
202 | 0 | result.push_back(t.get()); |
203 | 0 | return result; |
204 | 0 | } |
205 | | |
206 | 0 | std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop) { |
207 | 0 | return getTokens(start, stop, std::vector<size_t>()); |
208 | 0 | } |
209 | | |
210 | 0 | std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, const std::vector<size_t> &types) { |
211 | 0 | lazyInit(); |
212 | 0 | if (stop >= _tokens.size() || start >= _tokens.size()) { |
213 | 0 | throw IndexOutOfBoundsException(std::string("start ") + |
214 | 0 | std::to_string(start) + |
215 | 0 | std::string(" or stop ") + |
216 | 0 | std::to_string(stop) + |
217 | 0 | std::string(" not in 0..") + |
218 | 0 | std::to_string(_tokens.size() - 1)); |
219 | 0 | } |
220 | | |
221 | 0 | std::vector<Token *> filteredTokens; |
222 | |
|
223 | 0 | if (start > stop) { |
224 | 0 | return filteredTokens; |
225 | 0 | } |
226 | | |
227 | 0 | for (size_t i = start; i <= stop; i++) { |
228 | 0 | Token *tok = _tokens[i].get(); |
229 | |
|
230 | 0 | if (types.empty() || std::find(types.begin(), types.end(), tok->getType()) != types.end()) { |
231 | 0 | filteredTokens.push_back(tok); |
232 | 0 | } |
233 | 0 | } |
234 | 0 | return filteredTokens; |
235 | 0 | } |
236 | | |
237 | 0 | std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, size_t ttype) { |
238 | 0 | std::vector<size_t> s; |
239 | 0 | s.push_back(ttype); |
240 | 0 | return getTokens(start, stop, s); |
241 | 0 | } |
242 | | |
243 | 29.5M | ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, size_t channel) { |
244 | 29.5M | sync(i); |
245 | 29.5M | if (i >= size()) { |
246 | 0 | return size() - 1; |
247 | 0 | } |
248 | | |
249 | 29.5M | Token *token = _tokens[i].get(); |
250 | 29.5M | while (token->getChannel() != channel) { |
251 | 13.7k | if (token->getType() == Token::EOF) { |
252 | 0 | return i; |
253 | 0 | } |
254 | 13.7k | i++; |
255 | 13.7k | sync(i); |
256 | 13.7k | token = _tokens[i].get(); |
257 | 13.7k | } |
258 | 29.5M | return i; |
259 | 29.5M | } |
260 | | |
261 | 25.3M | ssize_t BufferedTokenStream::previousTokenOnChannel(size_t i, size_t channel) { |
262 | 25.3M | sync(i); |
263 | 25.3M | if (i >= size()) { |
264 | | // the EOF token is on every channel |
265 | 0 | return size() - 1; |
266 | 0 | } |
267 | | |
268 | 25.3M | while (true) { |
269 | 25.3M | Token *token = _tokens[i].get(); |
270 | 25.3M | if (token->getType() == Token::EOF || token->getChannel() == channel) { |
271 | 25.3M | return i; |
272 | 25.3M | } |
273 | | |
274 | 29.4k | if (i == 0) |
275 | 970 | return -1; |
276 | 28.5k | i--; |
277 | 28.5k | } |
278 | 0 | return i; |
279 | 25.3M | } |
280 | | |
281 | 0 | std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex, ssize_t channel) { |
282 | 0 | lazyInit(); |
283 | 0 | if (tokenIndex >= _tokens.size()) { |
284 | 0 | throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1)); |
285 | 0 | } |
286 | | |
287 | 0 | ssize_t nextOnChannel = nextTokenOnChannel(tokenIndex + 1, Lexer::DEFAULT_TOKEN_CHANNEL); |
288 | 0 | size_t to; |
289 | 0 | size_t from = tokenIndex + 1; |
290 | | // if none onchannel to right, nextOnChannel=-1 so set to = last token |
291 | 0 | if (nextOnChannel == -1) { |
292 | 0 | to = static_cast<ssize_t>(size() - 1); |
293 | 0 | } else { |
294 | 0 | to = nextOnChannel; |
295 | 0 | } |
296 | |
|
297 | 0 | return filterForChannel(from, to, channel); |
298 | 0 | } |
299 | | |
300 | 0 | std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex) { |
301 | 0 | return getHiddenTokensToRight(tokenIndex, -1); |
302 | 0 | } |
303 | | |
304 | 0 | std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel) { |
305 | 0 | lazyInit(); |
306 | 0 | if (tokenIndex >= _tokens.size()) { |
307 | 0 | throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1)); |
308 | 0 | } |
309 | | |
310 | 0 | if (tokenIndex == 0) { |
311 | | // Obviously no tokens can appear before the first token. |
312 | 0 | return { }; |
313 | 0 | } |
314 | | |
315 | 0 | ssize_t prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL); |
316 | 0 | if (prevOnChannel == static_cast<ssize_t>(tokenIndex - 1)) { |
317 | 0 | return { }; |
318 | 0 | } |
319 | | // if none onchannel to left, prevOnChannel=-1 then from=0 |
320 | 0 | size_t from = static_cast<size_t>(prevOnChannel + 1); |
321 | 0 | size_t to = tokenIndex - 1; |
322 | |
|
323 | 0 | return filterForChannel(from, to, channel); |
324 | 0 | } |
325 | | |
326 | 0 | std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex) { |
327 | 0 | return getHiddenTokensToLeft(tokenIndex, -1); |
328 | 0 | } |
329 | | |
330 | 0 | std::vector<Token *> BufferedTokenStream::filterForChannel(size_t from, size_t to, ssize_t channel) { |
331 | 0 | std::vector<Token *> hidden; |
332 | 0 | for (size_t i = from; i <= to; i++) { |
333 | 0 | Token *t = _tokens[i].get(); |
334 | 0 | if (channel == -1) { |
335 | 0 | if (t->getChannel() != Lexer::DEFAULT_TOKEN_CHANNEL) { |
336 | 0 | hidden.push_back(t); |
337 | 0 | } |
338 | 0 | } else { |
339 | 0 | if (t->getChannel() == static_cast<size_t>(channel)) { |
340 | 0 | hidden.push_back(t); |
341 | 0 | } |
342 | 0 | } |
343 | 0 | } |
344 | |
|
345 | 0 | return hidden; |
346 | 0 | } |
347 | | |
348 | 0 | bool BufferedTokenStream::isInitialized() const { |
349 | 0 | return !_needSetup; |
350 | 0 | } |
351 | | |
352 | | /** |
353 | | * Get the text of all tokens in this buffer. |
354 | | */ |
355 | | std::string BufferedTokenStream::getSourceName() const |
356 | 0 | { |
357 | 0 | return _tokenSource->getSourceName(); |
358 | 0 | } |
359 | | |
360 | 0 | std::string BufferedTokenStream::getText() { |
361 | 0 | fill(); |
362 | 0 | return getText(misc::Interval(0U, size() - 1)); |
363 | 0 | } |
364 | | |
365 | 1.35k | std::string BufferedTokenStream::getText(const misc::Interval &interval) { |
366 | 1.35k | lazyInit(); |
367 | 1.35k | size_t start = interval.a; |
368 | 1.35k | size_t stop = interval.b; |
369 | 1.35k | if (start == INVALID_INDEX || stop == INVALID_INDEX) { |
370 | 0 | return ""; |
371 | 0 | } |
372 | 1.35k | sync(stop); |
373 | 1.35k | if (stop >= _tokens.size()) { |
374 | 0 | stop = _tokens.size() - 1; |
375 | 0 | } |
376 | | |
377 | 1.35k | std::stringstream ss; |
378 | 4.71k | for (size_t i = start; i <= stop; i++) { |
379 | 3.64k | Token *t = _tokens[i].get(); |
380 | 3.64k | if (t->getType() == Token::EOF) { |
381 | 282 | break; |
382 | 282 | } |
383 | 3.35k | ss << t->getText(); |
384 | 3.35k | } |
385 | 1.35k | return ss.str(); |
386 | 1.35k | } |
387 | | |
388 | 0 | std::string BufferedTokenStream::getText(RuleContext *ctx) { |
389 | 0 | return getText(ctx->getSourceInterval()); |
390 | 0 | } |
391 | | |
392 | 1.35k | std::string BufferedTokenStream::getText(Token *start, Token *stop) { |
393 | 1.35k | if (start != nullptr && stop != nullptr) { |
394 | 1.35k | return getText(misc::Interval(start->getTokenIndex(), stop->getTokenIndex())); |
395 | 1.35k | } |
396 | | |
397 | 0 | return ""; |
398 | 1.35k | } |
399 | | |
400 | 0 | void BufferedTokenStream::fill() { |
401 | 0 | lazyInit(); |
402 | 0 | const size_t blockSize = 1000; |
403 | 0 | while (true) { |
404 | 0 | size_t fetched = fetch(blockSize); |
405 | 0 | if (fetched < blockSize) { |
406 | 0 | return; |
407 | 0 | } |
408 | 0 | } |
409 | 0 | } |
410 | | |
411 | 4.20k | void BufferedTokenStream::InitializeInstanceFields() { |
412 | 4.20k | _needSetup = true; |
413 | 4.20k | _fetchedEOF = false; |
414 | 4.20k | } |