/proc/self/cwd/external/antlr4-cpp-runtime~/runtime/src/Lexer.h

Source
﻿/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

#pragma once

#include "Recognizer.h"
#include "TokenSource.h"
#include "CharStream.h"
#include "Token.h"

namespace antlr4 {

  /// A lexer is recognizer that draws input symbols from a character stream.
  /// lexer grammars result in a subclass of this object. A Lexer object
  /// uses simplified match() and error recovery mechanisms in the interest
  /// of speed.
  class ANTLR4CPP_PUBLIC Lexer : public Recognizer, public TokenSource {
  public:
    static constexpr size_t DEFAULT_MODE = 0;
    static constexpr size_t MORE = std::numeric_limits<size_t>::max() - 1;
    static constexpr size_t SKIP = std::numeric_limits<size_t>::max() - 2;

    static constexpr size_t DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL;
    static constexpr size_t HIDDEN = Token::HIDDEN_CHANNEL;
    static constexpr size_t MIN_CHAR_VALUE = 0;
    static constexpr size_t MAX_CHAR_VALUE = 0x10FFFF;

    CharStream *_input; // Pure reference, usually from statically allocated instance.

  protected:
    /// How to create token objects.
    TokenFactory<CommonToken> *_factory;

  public:
    /// The goal of all lexer rules/methods is to create a token object.
    ///  This is an instance variable as multiple rules may collaborate to
    ///  create a single token.  nextToken will return this object after
    ///  matching lexer rule(s).  If you subclass to allow multiple token
    ///  emissions, then set this to the last token to be matched or
    ///  something nonnull so that the auto token emit mechanism will not
    ///  emit another token.

    // Life cycle of a token is this:
    // Created by emit() (via the token factory) or by action code, holding ownership of it.
    // Ownership is handed over to the token stream when calling nextToken().
    std::unique_ptr<Token> token;

    /// <summary>
    /// What character index in the stream did the current token start at?
    ///  Needed, for example, to get the text for current token.  Set at
    ///  the start of nextToken.
    /// </summary>
    size_t tokenStartCharIndex;

    /// <summary>
    /// The line on which the first character of the token resides </summary>
    size_t tokenStartLine;

    /// The character position of first character within the line.
    size_t tokenStartCharPositionInLine;

    /// Once we see EOF on char stream, next token will be EOF.
    /// If you have DONE : EOF ; then you see DONE EOF.
    bool hitEOF;

    /// The channel number for the current token.
    size_t channel;

    /// The token type for the current token.
    size_t type;

    // Use the vector as a stack.
    std::vector<size_t> modeStack;
    size_t mode;

    Lexer();
    Lexer(CharStream *input);
    virtual ~Lexer() {}

    virtual void reset();

    /// Return a token from this source; i.e., match a token on the char stream.
    virtual std::unique_ptr<Token> nextToken() override;

    /// Instruct the lexer to skip creating a token for current lexer rule
    /// and look for another token.  nextToken() knows to keep looking when
    /// a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
    /// if token == null at end of any token rule, it creates one for you
    /// and emits it.
    virtual void skip();
    virtual void more();
    virtual void setMode(size_t m);
    virtual void pushMode(size_t m);
    virtual size_t popMode();

    template<typename T1>
    void setTokenFactory(TokenFactory<T1> *factory)  {
      this->_factory = factory;
    }

    virtual TokenFactory<CommonToken>* getTokenFactory() override;

    /// Set the char stream and reset the lexer
    virtual void setInputStream(IntStream *input) override;

    virtual std::string getSourceName() override;

    virtual CharStream* getInputStream() override;

    /// By default does not support multiple emits per nextToken invocation
    /// for efficiency reasons. Subclasses can override this method, nextToken,
    /// and getToken (to push tokens into a list and pull from that list
    /// rather than a single variable as this implementation does).
    virtual void emit(std::unique_ptr<Token> newToken);

    /// The standard method called to automatically emit a token at the
    /// outermost lexical rule.  The token object should point into the
    /// char buffer start..stop.  If there is a text override in 'text',
    /// use that to set the token's text.  Override this method to emit
    /// custom Token objects or provide a new factory.
    virtual Token* emit();

    virtual Token* emitEOF();

    virtual size_t getLine() const override;

    virtual size_t getCharPositionInLine() override;

    virtual void setLine(size_t line);

    virtual void setCharPositionInLine(size_t charPositionInLine);

    /// What is the index of the current character of lookahead?
    virtual size_t getCharIndex();

    /// Return the text matched so far for the current token or any
    /// text override.
    virtual std::string getText();

    /// Set the complete text of this token; it wipes any previous
    /// changes to the text.
    virtual void setText(const std::string &text);

    /// Override if emitting multiple tokens.
    virtual std::unique_ptr<Token> getToken();

    virtual void setToken(std::unique_ptr<Token> newToken);

    virtual void setType(size_t ttype);

    virtual size_t getType();

    virtual void setChannel(size_t newChannel);

    virtual size_t getChannel();

    virtual const std::vector<std::string>& getChannelNames() const = 0;

    virtual const std::vector<std::string>& getModeNames() const = 0;

    /// Return a list of all Token objects in input char stream.
    /// Forces load of all tokens. Does not include EOF token.
    virtual std::vector<std::unique_ptr<Token>> getAllTokens();

    virtual void recover(const LexerNoViableAltException &e);

    virtual void notifyListeners(const LexerNoViableAltException &e);

    virtual std::string getErrorDisplay(const std::string &s);

    /// Lexers can normally match any char in it's vocabulary after matching
    /// a token, so do the easy thing and just kill a character and hope
    /// it all works out.  You can instead use the rule invocation stack
    /// to do sophisticated error recovery if you are in a fragment rule.
    virtual void recover(RecognitionException *re);

    /// <summary>
    /// Gets the number of syntax errors reported during parsing. This value is
    /// incremented each time <seealso cref="#notifyErrorListeners"/> is called.
    /// </summary>
    /// <seealso cref= #notifyListeners </seealso>
    virtual size_t getNumberOfSyntaxErrors();

  protected:
    /// You can set the text for the current token to override what is in
    /// the input char buffer (via setText()).
    std::string _text;

  private:
    size_t _syntaxErrors;
    void InitializeInstanceFields();
  };

} // namespace antlr4

Line	Count	Source
1		/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2		* Use of this file is governed by the BSD 3-clause license that
3		* can be found in the LICENSE.txt file in the project root.
4		*/
5
6		#pragma once
7
8		#include "Recognizer.h"
9		#include "TokenSource.h"
10		#include "CharStream.h"
11		#include "Token.h"
12
13		namespace antlr4 {
14
15		/// A lexer is recognizer that draws input symbols from a character stream.
16		/// lexer grammars result in a subclass of this object. A Lexer object
17		/// uses simplified match() and error recovery mechanisms in the interest
18		/// of speed.
19		class ANTLR4CPP_PUBLIC Lexer : public Recognizer, public TokenSource {
20		public:
21		static constexpr size_t DEFAULT_MODE = 0;
22		static constexpr size_t MORE = std::numeric_limits<size_t>::max() - 1;
23		static constexpr size_t SKIP = std::numeric_limits<size_t>::max() - 2;
24
25		static constexpr size_t DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL;
26		static constexpr size_t HIDDEN = Token::HIDDEN_CHANNEL;
27		static constexpr size_t MIN_CHAR_VALUE = 0;
28		static constexpr size_t MAX_CHAR_VALUE = 0x10FFFF;
29
30		CharStream *_input; // Pure reference, usually from statically allocated instance.
31
32		protected:
33		/// How to create token objects.
34		TokenFactory<CommonToken> *_factory;
35
36		public:
37		/// The goal of all lexer rules/methods is to create a token object.
38		/// This is an instance variable as multiple rules may collaborate to
39		/// create a single token. nextToken will return this object after
40		/// matching lexer rule(s). If you subclass to allow multiple token
41		/// emissions, then set this to the last token to be matched or
42		/// something nonnull so that the auto token emit mechanism will not
43		/// emit another token.
44
45		// Life cycle of a token is this:
46		// Created by emit() (via the token factory) or by action code, holding ownership of it.
47		// Ownership is handed over to the token stream when calling nextToken().
48		std::unique_ptr<Token> token;
49
50		/// <summary>
51		/// What character index in the stream did the current token start at?
52		/// Needed, for example, to get the text for current token. Set at
53		/// the start of nextToken.
54		/// </summary>
55		size_t tokenStartCharIndex;
56
57		/// <summary>
58		/// The line on which the first character of the token resides </summary>
59		size_t tokenStartLine;
60
61		/// The character position of first character within the line.
62		size_t tokenStartCharPositionInLine;
63
64		/// Once we see EOF on char stream, next token will be EOF.
65		/// If you have DONE : EOF ; then you see DONE EOF.
66		bool hitEOF;
67
68		/// The channel number for the current token.
69		size_t channel;
70
71		/// The token type for the current token.
72		size_t type;
73
74		// Use the vector as a stack.
75		std::vector<size_t> modeStack;
76		size_t mode;
77
78		Lexer();
79		Lexer(CharStream *input);
80	0	virtual ~Lexer() {}
81
82		virtual void reset();
83
84		/// Return a token from this source; i.e., match a token on the char stream.
85		virtual std::unique_ptr<Token> nextToken() override;
86
87		/// Instruct the lexer to skip creating a token for current lexer rule
88		/// and look for another token. nextToken() knows to keep looking when
89		/// a lexer rule finishes with token set to SKIP_TOKEN. Recall that
90		/// if token == null at end of any token rule, it creates one for you
91		/// and emits it.
92		virtual void skip();
93		virtual void more();
94		virtual void setMode(size_t m);
95		virtual void pushMode(size_t m);
96		virtual size_t popMode();
97
98		template<typename T1>
99		void setTokenFactory(TokenFactory<T1> *factory) {
100		this->_factory = factory;
101		}
102
103		virtual TokenFactory<CommonToken>* getTokenFactory() override;
104
105		/// Set the char stream and reset the lexer
106		virtual void setInputStream(IntStream *input) override;
107
108		virtual std::string getSourceName() override;
109
110		virtual CharStream* getInputStream() override;
111
112		/// By default does not support multiple emits per nextToken invocation
113		/// for efficiency reasons. Subclasses can override this method, nextToken,
114		/// and getToken (to push tokens into a list and pull from that list
115		/// rather than a single variable as this implementation does).
116		virtual void emit(std::unique_ptr<Token> newToken);
117
118		/// The standard method called to automatically emit a token at the
119		/// outermost lexical rule. The token object should point into the
120		/// char buffer start..stop. If there is a text override in 'text',
121		/// use that to set the token's text. Override this method to emit
122		/// custom Token objects or provide a new factory.
123		virtual Token* emit();
124
125		virtual Token* emitEOF();
126
127		virtual size_t getLine() const override;
128
129		virtual size_t getCharPositionInLine() override;
130
131		virtual void setLine(size_t line);
132
133		virtual void setCharPositionInLine(size_t charPositionInLine);
134
135		/// What is the index of the current character of lookahead?
136		virtual size_t getCharIndex();
137
138		/// Return the text matched so far for the current token or any
139		/// text override.
140		virtual std::string getText();
141
142		/// Set the complete text of this token; it wipes any previous
143		/// changes to the text.
144		virtual void setText(const std::string &text);
145
146		/// Override if emitting multiple tokens.
147		virtual std::unique_ptr<Token> getToken();
148
149		virtual void setToken(std::unique_ptr<Token> newToken);
150
151		virtual void setType(size_t ttype);
152
153		virtual size_t getType();
154
155		virtual void setChannel(size_t newChannel);
156
157		virtual size_t getChannel();
158
159		virtual const std::vector<std::string>& getChannelNames() const = 0;
160
161		virtual const std::vector<std::string>& getModeNames() const = 0;
162
163		/// Return a list of all Token objects in input char stream.
164		/// Forces load of all tokens. Does not include EOF token.
165		virtual std::vector<std::unique_ptr<Token>> getAllTokens();
166
167		virtual void recover(const LexerNoViableAltException &e);
168
169		virtual void notifyListeners(const LexerNoViableAltException &e);
170
171		virtual std::string getErrorDisplay(const std::string &s);
172
173		/// Lexers can normally match any char in it's vocabulary after matching
174		/// a token, so do the easy thing and just kill a character and hope
175		/// it all works out. You can instead use the rule invocation stack
176		/// to do sophisticated error recovery if you are in a fragment rule.
177		virtual void recover(RecognitionException *re);
178
179		/// <summary>
180		/// Gets the number of syntax errors reported during parsing. This value is
181		/// incremented each time <seealso cref="#notifyErrorListeners"/> is called.
182		/// </summary>
183		/// <seealso cref= #notifyListeners </seealso>
184		virtual size_t getNumberOfSyntaxErrors();
185
186		protected:
187		/// You can set the text for the current token to override what is in
188		/// the input char buffer (via setText()).
189		std::string _text;
190
191		private:
192		size_t _syntaxErrors;
193		void InitializeInstanceFields();
194		};
195
196		} // namespace antlr4

Coverage Report

Created: 2025-11-29 07:01