Coverage Report

Created: 2025-08-29 06:54

/src/qpdf/libqpdf/qpdf/QPDFTokenizer_private.hh
Line
Count
Source (jump to first uncovered line)
1
#ifndef QPDFTOKENIZER_PRIVATE_HH
2
#define QPDFTOKENIZER_PRIVATE_HH
3
4
#include <qpdf/QPDFTokenizer.hh>
5
6
namespace qpdf
7
{
8
9
    class Tokenizer
10
    {
11
      public:
12
        Tokenizer();
13
        Tokenizer(Tokenizer const&) = delete;
14
        Tokenizer& operator=(Tokenizer const&) = delete;
15
16
        // Methods to support QPDFTokenizer. See QPDFTokenizer.hh for detail. Some of these are used
17
        // by Tokenizer internally but are not accessed directly by the rest of qpdf.
18
19
        void allowEOF();
20
        void includeIgnorable();
21
        void presentCharacter(char ch);
22
        void presentEOF();
23
        bool betweenTokens();
24
25
        // If a token is available, return true and initialize token with the token, unread_char
26
        // with whether or not we have to unread the last character, and if unread_char, ch with the
27
        // character to unread.
28
        bool getToken(QPDFTokenizer::Token& token, bool& unread_char, char& ch);
29
30
        // Read a token from an input source. Context describes the context in which the token is
31
        // being read and is used in the exception thrown if there is an error. After a token is
32
        // read, the position of the input source returned by input->tell() points to just after the
33
        // token, and the input source's "last offset" as returned by input->getLastOffset() points
34
        // to the beginning of the token.
35
        QPDFTokenizer::Token readToken(
36
            InputSource& input,
37
            std::string const& context,
38
            bool allow_bad = false,
39
            size_t max_len = 0);
40
41
        // Calling this method puts the tokenizer in a state for reading inline images. You should
42
        // call this method after reading the character following the ID operator. In that state, it
43
        // will return all data up to BUT NOT INCLUDING the next EI token. After you call this
44
        // method, the next call to readToken (or the token created next time getToken returns true)
45
        // will either be tt_inline_image or tt_bad. This is the only way readToken returns a
46
        // tt_inline_image token.
47
        void expectInlineImage(InputSource& input);
48
49
        // Read a token from an input source. Context describes the context in which the token is
50
        // being read and is used in the exception thrown if there is an error. After a token is
51
        // read, the position of the input source returned by input->tell() points to just after the
52
        // token, and the input source's "last offset" as returned by input->getLastOffset() points
53
        // to the beginning of the token. Returns false if the token is bad or if scanning produced
54
        // an error message for any reason.
55
        bool nextToken(InputSource& input, std::string const& context, size_t max_len = 0);
56
57
        // The following methods are only valid after nextToken has been called and until another
58
        // QPDFTokenizer method is called. They allow the results of calling nextToken to be
59
        // accessed without creating a Token, thus avoiding copying information that may not be
60
        // needed.
61
62
        inline QPDFTokenizer::token_type_e
63
        getType() const
64
14.3M
        {
65
14.3M
            return this->type;
66
14.3M
        }
67
        inline std::string const&
68
        getValue() const
69
12.5M
        {
70
12.5M
            return (this->type == QPDFTokenizer::tt_name || this->type == QPDFTokenizer::tt_string)
71
12.5M
                ? this->val
72
12.5M
                : this->raw_val;
73
12.5M
        }
74
        inline std::string const&
75
        getRawValue() const
76
0
        {
77
0
            return this->raw_val;
78
0
        }
79
        inline std::string const&
80
        getErrorMessage() const
81
35.1k
        {
82
35.1k
            return this->error_message;
83
35.1k
        }
84
85
      private:
86
        bool isSpace(char);
87
        bool isDelimiter(char);
88
        void findEI(InputSource& input);
89
90
        enum state_e {
91
            st_top,
92
            st_in_hexstring,
93
            st_in_string,
94
            st_in_hexstring_2nd,
95
            st_name,
96
            st_literal,
97
            st_in_space,
98
            st_in_comment,
99
            st_string_escape,
100
            st_char_code,
101
            st_string_after_cr,
102
            st_lt,
103
            st_gt,
104
            st_inline_image,
105
            st_sign,
106
            st_number,
107
            st_real,
108
            st_decimal,
109
            st_name_hex1,
110
            st_name_hex2,
111
            st_before_token,
112
            st_token_ready
113
        };
114
115
        void handleCharacter(char);
116
        void inBeforeToken(char);
117
        void inTop(char);
118
        void inSpace(char);
119
        void inComment(char);
120
        void inString(char);
121
        void inName(char);
122
        void inLt(char);
123
        void inGt(char);
124
        void inStringAfterCR(char);
125
        void inStringEscape(char);
126
        void inLiteral(char);
127
        void inCharCode(char);
128
        void inHexstring(char);
129
        void inHexstring2nd(char);
130
        void inInlineImage(char);
131
        void inTokenReady(char);
132
        void inNameHex1(char);
133
        void inNameHex2(char);
134
        void inSign(char);
135
        void inDecimal(char);
136
        void inNumber(char);
137
        void inReal(char);
138
        void reset();
139
140
        // Lexer state
141
        state_e state;
142
143
        bool allow_eof{false};
144
        bool include_ignorable{false};
145
146
        // Current token accumulation
147
        QPDFTokenizer::token_type_e type;
148
        std::string val;
149
        std::string raw_val;
150
        std::string error_message;
151
        bool before_token;
152
        bool in_token;
153
        char char_to_unread;
154
        size_t inline_image_bytes;
155
        bool bad;
156
157
        // State for strings
158
        int string_depth;
159
        int char_code;
160
        char hex_char;
161
        int digit_count;
162
    };
163
164
} // namespace qpdf
165
166
#endif // QPDFTOKENIZER_PRIVATE_HH