/src/qpdf/libqpdf/qpdf/QPDFTokenizer_private.hh
Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef QPDFTOKENIZER_PRIVATE_HH |
2 | | #define QPDFTOKENIZER_PRIVATE_HH |
3 | | |
4 | | #include <qpdf/QPDFTokenizer.hh> |
5 | | |
6 | | namespace qpdf |
7 | | { |
8 | | |
9 | | class Tokenizer |
10 | | { |
11 | | public: |
12 | | Tokenizer(); |
13 | | Tokenizer(Tokenizer const&) = delete; |
14 | | Tokenizer& operator=(Tokenizer const&) = delete; |
15 | | |
16 | | // Methods to support QPDFTokenizer. See QPDFTokenizer.hh for detail. Some of these are used |
17 | | // by Tokenizer internally but are not accessed directly by the rest of qpdf. |
18 | | |
19 | | void allowEOF(); |
20 | | void includeIgnorable(); |
21 | | void presentCharacter(char ch); |
22 | | void presentEOF(); |
23 | | bool betweenTokens(); |
24 | | |
25 | | // If a token is available, return true and initialize token with the token, unread_char |
26 | | // with whether or not we have to unread the last character, and if unread_char, ch with the |
27 | | // character to unread. |
28 | | bool getToken(QPDFTokenizer::Token& token, bool& unread_char, char& ch); |
29 | | |
30 | | // Read a token from an input source. Context describes the context in which the token is |
31 | | // being read and is used in the exception thrown if there is an error. After a token is |
32 | | // read, the position of the input source returned by input->tell() points to just after the |
33 | | // token, and the input source's "last offset" as returned by input->getLastOffset() points |
34 | | // to the beginning of the token. |
35 | | QPDFTokenizer::Token readToken( |
36 | | InputSource& input, |
37 | | std::string const& context, |
38 | | bool allow_bad = false, |
39 | | size_t max_len = 0); |
40 | | |
41 | | // Calling this method puts the tokenizer in a state for reading inline images. You should |
42 | | // call this method after reading the character following the ID operator. In that state, it |
43 | | // will return all data up to BUT NOT INCLUDING the next EI token. After you call this |
44 | | // method, the next call to readToken (or the token created next time getToken returns true) |
45 | | // will either be tt_inline_image or tt_bad. This is the only way readToken returns a |
46 | | // tt_inline_image token. |
47 | | void expectInlineImage(InputSource& input); |
48 | | |
49 | | // Read a token from an input source. Context describes the context in which the token is |
50 | | // being read and is used in the exception thrown if there is an error. After a token is |
51 | | // read, the position of the input source returned by input->tell() points to just after the |
52 | | // token, and the input source's "last offset" as returned by input->getLastOffset() points |
53 | | // to the beginning of the token. Returns false if the token is bad or if scanning produced |
54 | | // an error message for any reason. |
55 | | bool nextToken(InputSource& input, std::string const& context, size_t max_len = 0); |
56 | | |
57 | | // The following methods are only valid after nextToken has been called and until another |
58 | | // QPDFTokenizer method is called. They allow the results of calling nextToken to be |
59 | | // accessed without creating a Token, thus avoiding copying information that may not be |
60 | | // needed. |
61 | | |
62 | | inline QPDFTokenizer::token_type_e |
63 | | getType() const |
64 | 14.3M | { |
65 | 14.3M | return this->type; |
66 | 14.3M | } |
67 | | inline std::string const& |
68 | | getValue() const |
69 | 12.5M | { |
70 | 12.5M | return (this->type == QPDFTokenizer::tt_name || this->type == QPDFTokenizer::tt_string) |
71 | 12.5M | ? this->val |
72 | 12.5M | : this->raw_val; |
73 | 12.5M | } |
74 | | inline std::string const& |
75 | | getRawValue() const |
76 | 0 | { |
77 | 0 | return this->raw_val; |
78 | 0 | } |
79 | | inline std::string const& |
80 | | getErrorMessage() const |
81 | 35.1k | { |
82 | 35.1k | return this->error_message; |
83 | 35.1k | } |
84 | | |
85 | | private: |
86 | | bool isSpace(char); |
87 | | bool isDelimiter(char); |
88 | | void findEI(InputSource& input); |
89 | | |
90 | | enum state_e { |
91 | | st_top, |
92 | | st_in_hexstring, |
93 | | st_in_string, |
94 | | st_in_hexstring_2nd, |
95 | | st_name, |
96 | | st_literal, |
97 | | st_in_space, |
98 | | st_in_comment, |
99 | | st_string_escape, |
100 | | st_char_code, |
101 | | st_string_after_cr, |
102 | | st_lt, |
103 | | st_gt, |
104 | | st_inline_image, |
105 | | st_sign, |
106 | | st_number, |
107 | | st_real, |
108 | | st_decimal, |
109 | | st_name_hex1, |
110 | | st_name_hex2, |
111 | | st_before_token, |
112 | | st_token_ready |
113 | | }; |
114 | | |
115 | | void handleCharacter(char); |
116 | | void inBeforeToken(char); |
117 | | void inTop(char); |
118 | | void inSpace(char); |
119 | | void inComment(char); |
120 | | void inString(char); |
121 | | void inName(char); |
122 | | void inLt(char); |
123 | | void inGt(char); |
124 | | void inStringAfterCR(char); |
125 | | void inStringEscape(char); |
126 | | void inLiteral(char); |
127 | | void inCharCode(char); |
128 | | void inHexstring(char); |
129 | | void inHexstring2nd(char); |
130 | | void inInlineImage(char); |
131 | | void inTokenReady(char); |
132 | | void inNameHex1(char); |
133 | | void inNameHex2(char); |
134 | | void inSign(char); |
135 | | void inDecimal(char); |
136 | | void inNumber(char); |
137 | | void inReal(char); |
138 | | void reset(); |
139 | | |
140 | | // Lexer state |
141 | | state_e state; |
142 | | |
143 | | bool allow_eof{false}; |
144 | | bool include_ignorable{false}; |
145 | | |
146 | | // Current token accumulation |
147 | | QPDFTokenizer::token_type_e type; |
148 | | std::string val; |
149 | | std::string raw_val; |
150 | | std::string error_message; |
151 | | bool before_token; |
152 | | bool in_token; |
153 | | char char_to_unread; |
154 | | size_t inline_image_bytes; |
155 | | bool bad; |
156 | | |
157 | | // State for strings |
158 | | int string_depth; |
159 | | int char_code; |
160 | | char hex_char; |
161 | | int digit_count; |
162 | | }; |
163 | | |
164 | | } // namespace qpdf |
165 | | |
166 | | #endif // QPDFTOKENIZER_PRIVATE_HH |