/proc/self/cwd/external/antlr4-cpp-runtime~/runtime/src/Lexer.cpp
Line | Count | Source |
1 | | /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. |
2 | | * Use of this file is governed by the BSD 3-clause license that |
3 | | * can be found in the LICENSE.txt file in the project root. |
4 | | */ |
5 | | |
6 | | #include "atn/LexerATNSimulator.h" |
7 | | #include "Exceptions.h" |
8 | | #include "misc/Interval.h" |
9 | | #include "CommonTokenFactory.h" |
10 | | #include "LexerNoViableAltException.h" |
11 | | #include "ANTLRErrorListener.h" |
12 | | #include "support/CPPUtils.h" |
13 | | #include "CommonToken.h" |
14 | | |
15 | | #include "Lexer.h" |
16 | | |
17 | | #define DEBUG_LEXER 0 |
18 | | |
19 | | using namespace antlrcpp; |
20 | | using namespace antlr4; |
21 | | |
22 | 0 | Lexer::Lexer() : Recognizer() { |
23 | 0 | InitializeInstanceFields(); |
24 | 0 | _input = nullptr; |
25 | 0 | } |
26 | | |
27 | 4.20k | Lexer::Lexer(CharStream *input) : Recognizer(), _input(input) { |
28 | 4.20k | InitializeInstanceFields(); |
29 | 4.20k | } |
30 | | |
31 | 0 | void Lexer::reset() { |
32 | | // wack Lexer state variables |
33 | 0 | _input->seek(0); // rewind the input |
34 | |
|
35 | 0 | _syntaxErrors = 0; |
36 | 0 | token.reset(); |
37 | 0 | type = Token::INVALID_TYPE; |
38 | 0 | channel = Token::DEFAULT_CHANNEL; |
39 | 0 | tokenStartCharIndex = INVALID_INDEX; |
40 | 0 | tokenStartCharPositionInLine = 0; |
41 | 0 | tokenStartLine = 0; |
42 | 0 | type = 0; |
43 | 0 | _text = ""; |
44 | |
|
45 | 0 | hitEOF = false; |
46 | 0 | mode = Lexer::DEFAULT_MODE; |
47 | 0 | modeStack.clear(); |
48 | |
|
49 | 0 | getInterpreter<atn::LexerATNSimulator>()->reset(); |
50 | 0 | } |
51 | | |
52 | 6.53M | std::unique_ptr<Token> Lexer::nextToken() { |
53 | | // Mark start location in char stream so unbuffered streams are |
54 | | // guaranteed at least have text of current token |
55 | 6.53M | ssize_t tokenStartMarker = _input->mark(); |
56 | | |
57 | 6.53M | auto onExit = finally([this, tokenStartMarker]{ |
58 | | // make sure we release marker after match or |
59 | | // unbuffered char stream will keep buffering |
60 | 6.53M | _input->release(tokenStartMarker); |
61 | 6.53M | }); |
62 | | |
63 | 6.53M | while (true) { |
64 | 8.30M | outerContinue: |
65 | 8.30M | if (hitEOF) { |
66 | 4.02k | emitEOF(); |
67 | 4.02k | return std::move(token); |
68 | 4.02k | } |
69 | | |
70 | 8.30M | token.reset(); |
71 | 8.30M | channel = Token::DEFAULT_CHANNEL; |
72 | 8.30M | tokenStartCharIndex = _input->index(); |
73 | 8.30M | tokenStartCharPositionInLine = getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine(); |
74 | 8.30M | tokenStartLine = getInterpreter<atn::LexerATNSimulator>()->getLine(); |
75 | 8.30M | _text = ""; |
76 | 8.30M | do { |
77 | 8.30M | type = Token::INVALID_TYPE; |
78 | 8.30M | size_t ttype; |
79 | 8.30M | try { |
80 | 8.30M | ttype = getInterpreter<atn::LexerATNSimulator>()->match(_input, mode); |
81 | 8.30M | } catch (LexerNoViableAltException &e) { |
82 | 1.76M | notifyListeners(e); // report error |
83 | 1.76M | recover(e); |
84 | 1.76M | ttype = SKIP; |
85 | 1.76M | } |
86 | 8.30M | if (_input->LA(1) == EOF) { |
87 | 4.03k | hitEOF = true; |
88 | 4.03k | } |
89 | 8.30M | if (type == Token::INVALID_TYPE) { |
90 | 8.30M | type = ttype; |
91 | 8.30M | } |
92 | 8.30M | if (type == SKIP) { |
93 | 1.76M | goto outerContinue; |
94 | 1.76M | } |
95 | 8.30M | } while (type == MORE); |
96 | 6.53M | if (token == nullptr) { |
97 | 6.53M | emit(); |
98 | 6.53M | } |
99 | 6.53M | return std::move(token); |
100 | 8.30M | } |
101 | 6.53M | } |
102 | | |
103 | 0 | void Lexer::skip() { |
104 | 0 | type = SKIP; |
105 | 0 | } |
106 | | |
107 | 0 | void Lexer::more() { |
108 | 0 | type = MORE; |
109 | 0 | } |
110 | | |
111 | 0 | void Lexer::setMode(size_t m) { |
112 | 0 | mode = m; |
113 | 0 | } |
114 | | |
115 | 0 | void Lexer::pushMode(size_t m) { |
116 | | #if DEBUG_LEXER == 1 |
117 | | std::cout << "pushMode " << m << std::endl; |
118 | | #endif |
119 | |
|
120 | 0 | modeStack.push_back(mode); |
121 | 0 | setMode(m); |
122 | 0 | } |
123 | | |
124 | 0 | size_t Lexer::popMode() { |
125 | 0 | if (modeStack.empty()) { |
126 | 0 | throw EmptyStackException(); |
127 | 0 | } |
128 | | #if DEBUG_LEXER == 1 |
129 | | std::cout << std::string("popMode back to ") << modeStack.back() << std::endl; |
130 | | #endif |
131 | | |
132 | 0 | setMode(modeStack.back()); |
133 | 0 | modeStack.pop_back(); |
134 | 0 | return mode; |
135 | 0 | } |
136 | | |
137 | | |
138 | 2.16k | TokenFactory<CommonToken>* Lexer::getTokenFactory() { |
139 | 2.16k | return _factory; |
140 | 2.16k | } |
141 | | |
142 | 0 | void Lexer::setInputStream(IntStream *input) { |
143 | 0 | reset(); |
144 | 0 | _input = dynamic_cast<CharStream*>(input); |
145 | 0 | } |
146 | | |
147 | 0 | std::string Lexer::getSourceName() { |
148 | 0 | return _input->getSourceName(); |
149 | 0 | } |
150 | | |
151 | 2.16k | CharStream* Lexer::getInputStream() { |
152 | 2.16k | return _input; |
153 | 2.16k | } |
154 | | |
155 | 6.53M | void Lexer::emit(std::unique_ptr<Token> newToken) { |
156 | 6.53M | token = std::move(newToken); |
157 | 6.53M | } |
158 | | |
159 | 6.53M | Token* Lexer::emit() { |
160 | 6.53M | emit(_factory->create({ this, _input }, type, _text, channel, |
161 | 6.53M | tokenStartCharIndex, getCharIndex() - 1, tokenStartLine, tokenStartCharPositionInLine)); |
162 | 6.53M | return token.get(); |
163 | 6.53M | } |
164 | | |
165 | 4.02k | Token* Lexer::emitEOF() { |
166 | 4.02k | size_t cpos = getCharPositionInLine(); |
167 | 4.02k | size_t line = getLine(); |
168 | 4.02k | emit(_factory->create({ this, _input }, EOF, "", Token::DEFAULT_CHANNEL, _input->index(), _input->index() - 1, line, cpos)); |
169 | 4.02k | return token.get(); |
170 | 4.02k | } |
171 | | |
172 | 6.54M | size_t Lexer::getLine() const { |
173 | 6.54M | return getInterpreter<atn::LexerATNSimulator>()->getLine(); |
174 | 6.54M | } |
175 | | |
176 | 6.54M | size_t Lexer::getCharPositionInLine() { |
177 | 6.54M | return getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine(); |
178 | 6.54M | } |
179 | | |
180 | 0 | void Lexer::setLine(size_t line) { |
181 | 0 | getInterpreter<atn::LexerATNSimulator>()->setLine(line); |
182 | 0 | } |
183 | | |
184 | 0 | void Lexer::setCharPositionInLine(size_t charPositionInLine) { |
185 | 0 | getInterpreter<atn::LexerATNSimulator>()->setCharPositionInLine(charPositionInLine); |
186 | 0 | } |
187 | | |
188 | 6.53M | size_t Lexer::getCharIndex() { |
189 | 6.53M | return _input->index(); |
190 | 6.53M | } |
191 | | |
192 | 0 | std::string Lexer::getText() { |
193 | 0 | if (!_text.empty()) { |
194 | 0 | return _text; |
195 | 0 | } |
196 | 0 | return getInterpreter<atn::LexerATNSimulator>()->getText(_input); |
197 | 0 | } |
198 | | |
199 | 0 | void Lexer::setText(const std::string &text) { |
200 | 0 | _text = text; |
201 | 0 | } |
202 | | |
203 | 0 | std::unique_ptr<Token> Lexer::getToken() { |
204 | 0 | return std::move(token); |
205 | 0 | } |
206 | | |
207 | 0 | void Lexer::setToken(std::unique_ptr<Token> newToken) { |
208 | 0 | token = std::move(newToken); |
209 | 0 | } |
210 | | |
211 | 0 | void Lexer::setType(size_t ttype) { |
212 | 0 | type = ttype; |
213 | 0 | } |
214 | | |
215 | 0 | size_t Lexer::getType() { |
216 | 0 | return type; |
217 | 0 | } |
218 | | |
219 | 10.4k | void Lexer::setChannel(size_t newChannel) { |
220 | 10.4k | channel = newChannel; |
221 | 10.4k | } |
222 | | |
223 | 0 | size_t Lexer::getChannel() { |
224 | 0 | return channel; |
225 | 0 | } |
226 | | |
227 | 0 | std::vector<std::unique_ptr<Token>> Lexer::getAllTokens() { |
228 | 0 | std::vector<std::unique_ptr<Token>> tokens; |
229 | 0 | std::unique_ptr<Token> t = nextToken(); |
230 | 0 | while (t->getType() != EOF) { |
231 | 0 | tokens.push_back(std::move(t)); |
232 | 0 | t = nextToken(); |
233 | 0 | } |
234 | 0 | return tokens; |
235 | 0 | } |
236 | | |
237 | 1.76M | void Lexer::recover(const LexerNoViableAltException &/*e*/) { |
238 | 1.76M | if (_input->LA(1) != EOF) { |
239 | | // skip a char and try again |
240 | 1.76M | getInterpreter<atn::LexerATNSimulator>()->consume(_input); |
241 | 1.76M | } |
242 | 1.76M | } |
243 | | |
244 | 1.76M | void Lexer::notifyListeners(const LexerNoViableAltException & /*e*/) { |
245 | 1.76M | ++_syntaxErrors; |
246 | 1.76M | std::string text = _input->getText(misc::Interval(tokenStartCharIndex, _input->index())); |
247 | 1.76M | std::string msg = std::string("token recognition error at: '") + getErrorDisplay(text) + std::string("'"); |
248 | | |
249 | 1.76M | ProxyErrorListener &listener = getErrorListenerDispatch(); |
250 | 1.76M | listener.syntaxError(this, nullptr, tokenStartLine, tokenStartCharPositionInLine, msg, std::current_exception()); |
251 | 1.76M | } |
252 | | |
253 | 1.76M | std::string Lexer::getErrorDisplay(const std::string &s) { |
254 | 1.76M | std::stringstream ss; |
255 | 3.01M | for (auto c : s) { |
256 | 3.01M | switch (c) { |
257 | 840 | case '\n': |
258 | 840 | ss << "\\n"; |
259 | 840 | break; |
260 | 1.27k | case '\t': |
261 | 1.27k | ss << "\\t"; |
262 | 1.27k | break; |
263 | 665 | case '\r': |
264 | 665 | ss << "\\r"; |
265 | 665 | break; |
266 | 3.00M | default: |
267 | 3.00M | ss << c; |
268 | 3.00M | break; |
269 | 3.01M | } |
270 | 3.01M | } |
271 | 1.76M | return ss.str(); |
272 | 1.76M | } |
273 | | |
274 | 0 | void Lexer::recover(RecognitionException * /*re*/) { |
275 | | // TODO: Do we lose character or line position information? |
276 | 0 | _input->consume(); |
277 | 0 | } |
278 | | |
279 | 0 | size_t Lexer::getNumberOfSyntaxErrors() { |
280 | 0 | return _syntaxErrors; |
281 | 0 | } |
282 | | |
283 | 4.20k | void Lexer::InitializeInstanceFields() { |
284 | 4.20k | _syntaxErrors = 0; |
285 | 4.20k | token = nullptr; |
286 | 4.20k | _factory = CommonTokenFactory::DEFAULT.get(); |
287 | | tokenStartCharIndex = INVALID_INDEX; |
288 | 4.20k | tokenStartLine = 0; |
289 | 4.20k | tokenStartCharPositionInLine = 0; |
290 | 4.20k | hitEOF = false; |
291 | 4.20k | channel = 0; |
292 | 4.20k | type = 0; |
293 | 4.20k | mode = Lexer::DEFAULT_MODE; |
294 | 4.20k | } |