/proc/self/cwd/external/antlr4-cpp-runtime~/runtime/src/Lexer.h
Line | Count | Source |
1 | | /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. |
2 | | * Use of this file is governed by the BSD 3-clause license that |
3 | | * can be found in the LICENSE.txt file in the project root. |
4 | | */ |
5 | | |
6 | | #pragma once |
7 | | |
8 | | #include "Recognizer.h" |
9 | | #include "TokenSource.h" |
10 | | #include "CharStream.h" |
11 | | #include "Token.h" |
12 | | |
13 | | namespace antlr4 { |
14 | | |
15 | | /// A lexer is recognizer that draws input symbols from a character stream. |
16 | | /// lexer grammars result in a subclass of this object. A Lexer object |
17 | | /// uses simplified match() and error recovery mechanisms in the interest |
18 | | /// of speed. |
19 | | class ANTLR4CPP_PUBLIC Lexer : public Recognizer, public TokenSource { |
20 | | public: |
21 | | static constexpr size_t DEFAULT_MODE = 0; |
22 | | static constexpr size_t MORE = std::numeric_limits<size_t>::max() - 1; |
23 | | static constexpr size_t SKIP = std::numeric_limits<size_t>::max() - 2; |
24 | | |
25 | | static constexpr size_t DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL; |
26 | | static constexpr size_t HIDDEN = Token::HIDDEN_CHANNEL; |
27 | | static constexpr size_t MIN_CHAR_VALUE = 0; |
28 | | static constexpr size_t MAX_CHAR_VALUE = 0x10FFFF; |
29 | | |
30 | | CharStream *_input; // Pure reference, usually from statically allocated instance. |
31 | | |
32 | | protected: |
33 | | /// How to create token objects. |
34 | | TokenFactory<CommonToken> *_factory; |
35 | | |
36 | | public: |
37 | | /// The goal of all lexer rules/methods is to create a token object. |
38 | | /// This is an instance variable as multiple rules may collaborate to |
39 | | /// create a single token. nextToken will return this object after |
40 | | /// matching lexer rule(s). If you subclass to allow multiple token |
41 | | /// emissions, then set this to the last token to be matched or |
42 | | /// something nonnull so that the auto token emit mechanism will not |
43 | | /// emit another token. |
44 | | |
45 | | // Life cycle of a token is this: |
46 | | // Created by emit() (via the token factory) or by action code, holding ownership of it. |
47 | | // Ownership is handed over to the token stream when calling nextToken(). |
48 | | std::unique_ptr<Token> token; |
49 | | |
50 | | /// <summary> |
51 | | /// What character index in the stream did the current token start at? |
52 | | /// Needed, for example, to get the text for current token. Set at |
53 | | /// the start of nextToken. |
54 | | /// </summary> |
55 | | size_t tokenStartCharIndex; |
56 | | |
57 | | /// <summary> |
58 | | /// The line on which the first character of the token resides </summary> |
59 | | size_t tokenStartLine; |
60 | | |
61 | | /// The character position of first character within the line. |
62 | | size_t tokenStartCharPositionInLine; |
63 | | |
64 | | /// Once we see EOF on char stream, next token will be EOF. |
65 | | /// If you have DONE : EOF ; then you see DONE EOF. |
66 | | bool hitEOF; |
67 | | |
68 | | /// The channel number for the current token. |
69 | | size_t channel; |
70 | | |
71 | | /// The token type for the current token. |
72 | | size_t type; |
73 | | |
74 | | // Use the vector as a stack. |
75 | | std::vector<size_t> modeStack; |
76 | | size_t mode; |
77 | | |
78 | | Lexer(); |
79 | | Lexer(CharStream *input); |
80 | 0 | virtual ~Lexer() {} |
81 | | |
82 | | virtual void reset(); |
83 | | |
84 | | /// Return a token from this source; i.e., match a token on the char stream. |
85 | | virtual std::unique_ptr<Token> nextToken() override; |
86 | | |
87 | | /// Instruct the lexer to skip creating a token for current lexer rule |
88 | | /// and look for another token. nextToken() knows to keep looking when |
89 | | /// a lexer rule finishes with token set to SKIP_TOKEN. Recall that |
90 | | /// if token == null at end of any token rule, it creates one for you |
91 | | /// and emits it. |
92 | | virtual void skip(); |
93 | | virtual void more(); |
94 | | virtual void setMode(size_t m); |
95 | | virtual void pushMode(size_t m); |
96 | | virtual size_t popMode(); |
97 | | |
98 | | template<typename T1> |
99 | | void setTokenFactory(TokenFactory<T1> *factory) { |
100 | | this->_factory = factory; |
101 | | } |
102 | | |
103 | | virtual TokenFactory<CommonToken>* getTokenFactory() override; |
104 | | |
105 | | /// Set the char stream and reset the lexer |
106 | | virtual void setInputStream(IntStream *input) override; |
107 | | |
108 | | virtual std::string getSourceName() override; |
109 | | |
110 | | virtual CharStream* getInputStream() override; |
111 | | |
112 | | /// By default does not support multiple emits per nextToken invocation |
113 | | /// for efficiency reasons. Subclasses can override this method, nextToken, |
114 | | /// and getToken (to push tokens into a list and pull from that list |
115 | | /// rather than a single variable as this implementation does). |
116 | | virtual void emit(std::unique_ptr<Token> newToken); |
117 | | |
118 | | /// The standard method called to automatically emit a token at the |
119 | | /// outermost lexical rule. The token object should point into the |
120 | | /// char buffer start..stop. If there is a text override in 'text', |
121 | | /// use that to set the token's text. Override this method to emit |
122 | | /// custom Token objects or provide a new factory. |
123 | | virtual Token* emit(); |
124 | | |
125 | | virtual Token* emitEOF(); |
126 | | |
127 | | virtual size_t getLine() const override; |
128 | | |
129 | | virtual size_t getCharPositionInLine() override; |
130 | | |
131 | | virtual void setLine(size_t line); |
132 | | |
133 | | virtual void setCharPositionInLine(size_t charPositionInLine); |
134 | | |
135 | | /// What is the index of the current character of lookahead? |
136 | | virtual size_t getCharIndex(); |
137 | | |
138 | | /// Return the text matched so far for the current token or any |
139 | | /// text override. |
140 | | virtual std::string getText(); |
141 | | |
142 | | /// Set the complete text of this token; it wipes any previous |
143 | | /// changes to the text. |
144 | | virtual void setText(const std::string &text); |
145 | | |
146 | | /// Override if emitting multiple tokens. |
147 | | virtual std::unique_ptr<Token> getToken(); |
148 | | |
149 | | virtual void setToken(std::unique_ptr<Token> newToken); |
150 | | |
151 | | virtual void setType(size_t ttype); |
152 | | |
153 | | virtual size_t getType(); |
154 | | |
155 | | virtual void setChannel(size_t newChannel); |
156 | | |
157 | | virtual size_t getChannel(); |
158 | | |
159 | | virtual const std::vector<std::string>& getChannelNames() const = 0; |
160 | | |
161 | | virtual const std::vector<std::string>& getModeNames() const = 0; |
162 | | |
163 | | /// Return a list of all Token objects in input char stream. |
164 | | /// Forces load of all tokens. Does not include EOF token. |
165 | | virtual std::vector<std::unique_ptr<Token>> getAllTokens(); |
166 | | |
167 | | virtual void recover(const LexerNoViableAltException &e); |
168 | | |
169 | | virtual void notifyListeners(const LexerNoViableAltException &e); |
170 | | |
171 | | virtual std::string getErrorDisplay(const std::string &s); |
172 | | |
173 | | /// Lexers can normally match any char in it's vocabulary after matching |
174 | | /// a token, so do the easy thing and just kill a character and hope |
175 | | /// it all works out. You can instead use the rule invocation stack |
176 | | /// to do sophisticated error recovery if you are in a fragment rule. |
177 | | virtual void recover(RecognitionException *re); |
178 | | |
179 | | /// <summary> |
180 | | /// Gets the number of syntax errors reported during parsing. This value is |
181 | | /// incremented each time <seealso cref="#notifyErrorListeners"/> is called. |
182 | | /// </summary> |
183 | | /// <seealso cref= #notifyListeners </seealso> |
184 | | virtual size_t getNumberOfSyntaxErrors(); |
185 | | |
186 | | protected: |
187 | | /// You can set the text for the current token to override what is in |
188 | | /// the input char buffer (via setText()). |
189 | | std::string _text; |
190 | | |
191 | | private: |
192 | | size_t _syntaxErrors; |
193 | | void InitializeInstanceFields(); |
194 | | }; |
195 | | |
196 | | } // namespace antlr4 |