Coverage Report

Created: 2026-02-11 06:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/proc/self/cwd/external/antlr4-cpp-runtime~/runtime/src/Lexer.cpp
Line
Count
Source
1
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2
 * Use of this file is governed by the BSD 3-clause license that
3
 * can be found in the LICENSE.txt file in the project root.
4
 */
5
6
#include "atn/LexerATNSimulator.h"
7
#include "Exceptions.h"
8
#include "misc/Interval.h"
9
#include "CommonTokenFactory.h"
10
#include "LexerNoViableAltException.h"
11
#include "ANTLRErrorListener.h"
12
#include "support/CPPUtils.h"
13
#include "CommonToken.h"
14
15
#include "Lexer.h"
16
17
#define DEBUG_LEXER 0
18
19
using namespace antlrcpp;
20
using namespace antlr4;
21
22
0
Lexer::Lexer() : Recognizer() {
23
0
  InitializeInstanceFields();
24
0
  _input = nullptr;
25
0
}
26
27
6.96k
Lexer::Lexer(CharStream *input) : Recognizer(), _input(input) {
28
6.96k
  InitializeInstanceFields();
29
6.96k
}
30
31
0
void Lexer::reset() {
32
  // wack Lexer state variables
33
0
  _input->seek(0); // rewind the input
34
35
0
  _syntaxErrors = 0;
36
0
  token.reset();
37
0
  type = Token::INVALID_TYPE;
38
0
  channel = Token::DEFAULT_CHANNEL;
39
0
  tokenStartCharIndex = INVALID_INDEX;
40
0
  tokenStartCharPositionInLine = 0;
41
0
  tokenStartLine = 0;
42
0
  type = 0;
43
0
  _text = "";
44
45
0
  hitEOF = false;
46
0
  mode = Lexer::DEFAULT_MODE;
47
0
  modeStack.clear();
48
49
0
  getInterpreter<atn::LexerATNSimulator>()->reset();
50
0
}
51
52
7.91M
std::unique_ptr<Token> Lexer::nextToken() {
53
  // Mark start location in char stream so unbuffered streams are
54
  // guaranteed at least have text of current token
55
7.91M
  ssize_t tokenStartMarker = _input->mark();
56
57
7.91M
  auto onExit = finally([this, tokenStartMarker]{
58
    // make sure we release marker after match or
59
    // unbuffered char stream will keep buffering
60
7.91M
    _input->release(tokenStartMarker);
61
7.91M
  });
62
63
7.91M
  while (true) {
64
18.0M
  outerContinue:
65
18.0M
    if (hitEOF) {
66
6.84k
      emitEOF();
67
6.84k
      return std::move(token);
68
6.84k
    }
69
70
17.9M
    token.reset();
71
17.9M
    channel = Token::DEFAULT_CHANNEL;
72
17.9M
    tokenStartCharIndex = _input->index();
73
17.9M
    tokenStartCharPositionInLine = getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine();
74
17.9M
    tokenStartLine = getInterpreter<atn::LexerATNSimulator>()->getLine();
75
17.9M
    _text = "";
76
17.9M
    do {
77
17.9M
      type = Token::INVALID_TYPE;
78
17.9M
      size_t ttype;
79
17.9M
      try {
80
17.9M
        ttype = getInterpreter<atn::LexerATNSimulator>()->match(_input, mode);
81
17.9M
      } catch (LexerNoViableAltException &e) {
82
10.0M
        notifyListeners(e); // report error
83
10.0M
        recover(e);
84
10.0M
        ttype = SKIP;
85
10.0M
      }
86
17.9M
      if (_input->LA(1) == EOF) {
87
6.85k
        hitEOF = true;
88
6.85k
      }
89
17.9M
      if (type == Token::INVALID_TYPE) {
90
17.9M
        type = ttype;
91
17.9M
      }
92
17.9M
      if (type == SKIP) {
93
10.0M
        goto outerContinue;
94
10.0M
      }
95
17.9M
    } while (type == MORE);
96
7.90M
    if (token == nullptr) {
97
7.90M
      emit();
98
7.90M
    }
99
7.90M
    return std::move(token);
100
17.9M
  }
101
7.91M
}
102
103
0
void Lexer::skip() {
104
0
  type = SKIP;
105
0
}
106
107
0
void Lexer::more() {
108
0
  type = MORE;
109
0
}
110
111
0
void Lexer::setMode(size_t m) {
112
0
  mode = m;
113
0
}
114
115
0
void Lexer::pushMode(size_t m) {
116
#if DEBUG_LEXER == 1
117
    std::cout << "pushMode " << m << std::endl;
118
#endif
119
120
0
  modeStack.push_back(mode);
121
0
  setMode(m);
122
0
}
123
124
0
size_t Lexer::popMode() {
125
0
  if (modeStack.empty()) {
126
0
    throw EmptyStackException();
127
0
  }
128
#if DEBUG_LEXER == 1
129
    std::cout << std::string("popMode back to ") << modeStack.back() << std::endl;
130
#endif
131
132
0
  setMode(modeStack.back());
133
0
  modeStack.pop_back();
134
0
  return mode;
135
0
}
136
137
138
3.72k
TokenFactory<CommonToken>* Lexer::getTokenFactory() {
139
3.72k
  return _factory;
140
3.72k
}
141
142
0
void Lexer::setInputStream(IntStream *input) {
143
0
  reset();
144
0
  _input = dynamic_cast<CharStream*>(input);
145
0
}
146
147
0
std::string Lexer::getSourceName() {
148
0
  return _input->getSourceName();
149
0
}
150
151
3.72k
CharStream* Lexer::getInputStream() {
152
3.72k
  return _input;
153
3.72k
}
154
155
7.91M
void Lexer::emit(std::unique_ptr<Token> newToken) {
156
7.91M
  token = std::move(newToken);
157
7.91M
}
158
159
7.90M
Token* Lexer::emit() {
160
7.90M
  emit(_factory->create({ this, _input }, type, _text, channel,
161
7.90M
    tokenStartCharIndex, getCharIndex() - 1, tokenStartLine, tokenStartCharPositionInLine));
162
7.90M
  return token.get();
163
7.90M
}
164
165
6.84k
Token* Lexer::emitEOF() {
166
6.84k
  size_t cpos = getCharPositionInLine();
167
6.84k
  size_t line = getLine();
168
6.84k
  emit(_factory->create({ this, _input }, EOF, "", Token::DEFAULT_CHANNEL, _input->index(), _input->index() - 1, line, cpos));
169
6.84k
  return token.get();
170
6.84k
}
171
172
7.92M
size_t Lexer::getLine() const {
173
7.92M
  return getInterpreter<atn::LexerATNSimulator>()->getLine();
174
7.92M
}
175
176
7.92M
size_t Lexer::getCharPositionInLine() {
177
7.92M
  return getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine();
178
7.92M
}
179
180
0
void Lexer::setLine(size_t line) {
181
0
  getInterpreter<atn::LexerATNSimulator>()->setLine(line);
182
0
}
183
184
0
void Lexer::setCharPositionInLine(size_t charPositionInLine) {
185
0
  getInterpreter<atn::LexerATNSimulator>()->setCharPositionInLine(charPositionInLine);
186
0
}
187
188
7.90M
size_t Lexer::getCharIndex() {
189
7.90M
  return _input->index();
190
7.90M
}
191
192
0
std::string Lexer::getText() {
193
0
  if (!_text.empty()) {
194
0
    return _text;
195
0
  }
196
0
  return getInterpreter<atn::LexerATNSimulator>()->getText(_input);
197
0
}
198
199
0
void Lexer::setText(const std::string &text) {
200
0
  _text = text;
201
0
}
202
203
0
std::unique_ptr<Token> Lexer::getToken() {
204
0
  return std::move(token);
205
0
}
206
207
0
void Lexer::setToken(std::unique_ptr<Token> newToken) {
208
0
  token = std::move(newToken);
209
0
}
210
211
0
void Lexer::setType(size_t ttype) {
212
0
  type = ttype;
213
0
}
214
215
0
size_t Lexer::getType() {
216
0
  return type;
217
0
}
218
219
15.9k
void Lexer::setChannel(size_t newChannel) {
220
15.9k
  channel = newChannel;
221
15.9k
}
222
223
0
size_t Lexer::getChannel() {
224
0
  return channel;
225
0
}
226
227
0
std::vector<std::unique_ptr<Token>> Lexer::getAllTokens() {
228
0
  std::vector<std::unique_ptr<Token>> tokens;
229
0
  std::unique_ptr<Token> t = nextToken();
230
0
  while (t->getType() != EOF) {
231
0
    tokens.push_back(std::move(t));
232
0
    t = nextToken();
233
0
  }
234
0
  return tokens;
235
0
}
236
237
10.0M
void Lexer::recover(const LexerNoViableAltException &/*e*/) {
238
10.0M
  if (_input->LA(1) != EOF) {
239
    // skip a char and try again
240
10.0M
    getInterpreter<atn::LexerATNSimulator>()->consume(_input);
241
10.0M
  }
242
10.0M
}
243
244
10.0M
void Lexer::notifyListeners(const LexerNoViableAltException & /*e*/) {
245
10.0M
  ++_syntaxErrors;
246
10.0M
  std::string text = _input->getText(misc::Interval(tokenStartCharIndex, _input->index()));
247
10.0M
  std::string msg = std::string("token recognition error at: '") + getErrorDisplay(text) + std::string("'");
248
249
10.0M
  ProxyErrorListener &listener = getErrorListenerDispatch();
250
10.0M
  listener.syntaxError(this, nullptr, tokenStartLine, tokenStartCharPositionInLine, msg, std::current_exception());
251
10.0M
}
252
253
10.0M
std::string Lexer::getErrorDisplay(const std::string &s) {
254
10.0M
  std::stringstream ss;
255
19.6M
  for (auto c : s) {
256
19.6M
    switch (c) {
257
1.14k
    case '\n':
258
1.14k
      ss << "\\n";
259
1.14k
      break;
260
2.35k
    case '\t':
261
2.35k
      ss << "\\t";
262
2.35k
      break;
263
532
    case '\r':
264
532
      ss << "\\r";
265
532
      break;
266
19.6M
    default:
267
19.6M
      ss << c;
268
19.6M
      break;
269
19.6M
    }
270
19.6M
  }
271
10.0M
  return ss.str();
272
10.0M
}
273
274
0
void Lexer::recover(RecognitionException * /*re*/) {
275
  // TODO: Do we lose character or line position information?
276
0
  _input->consume();
277
0
}
278
279
0
size_t Lexer::getNumberOfSyntaxErrors() {
280
0
  return _syntaxErrors;
281
0
}
282
283
6.96k
void Lexer::InitializeInstanceFields() {
284
6.96k
  _syntaxErrors = 0;
285
6.96k
  token = nullptr;
286
6.96k
  _factory = CommonTokenFactory::DEFAULT.get();
287
  tokenStartCharIndex = INVALID_INDEX;
288
6.96k
  tokenStartLine = 0;
289
6.96k
  tokenStartCharPositionInLine = 0;
290
6.96k
  hitEOF = false;
291
6.96k
  channel = 0;
292
6.96k
  type = 0;
293
6.96k
  mode = Lexer::DEFAULT_MODE;
294
6.96k
}