/src/expat/expat/lib/xmlrole.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | __ __ _ |
3 | | ___\ \/ /_ __ __ _| |_ |
4 | | / _ \\ /| '_ \ / _` | __| |
5 | | | __// \| |_) | (_| | |_ |
6 | | \___/_/\_\ .__/ \__,_|\__| |
7 | | |_| XML parser |
8 | | |
9 | | Copyright (c) 1997-2000 Thai Open Source Software Center Ltd |
10 | | Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> |
11 | | Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net> |
12 | | Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net> |
13 | | Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> |
14 | | Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> |
15 | | Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org> |
16 | | Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> |
17 | | Copyright (c) 2019 David Loffredo <loffredo@steptools.com> |
18 | | Copyright (c) 2021 Donghee Na <donghee.na@python.org> |
19 | | Licensed under the MIT license: |
20 | | |
21 | | Permission is hereby granted, free of charge, to any person obtaining |
22 | | a copy of this software and associated documentation files (the |
23 | | "Software"), to deal in the Software without restriction, including |
24 | | without limitation the rights to use, copy, modify, merge, publish, |
25 | | distribute, sublicense, and/or sell copies of the Software, and to permit |
26 | | persons to whom the Software is furnished to do so, subject to the |
27 | | following conditions: |
28 | | |
29 | | The above copyright notice and this permission notice shall be included |
30 | | in all copies or substantial portions of the Software. |
31 | | |
32 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
33 | | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
34 | | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN |
35 | | NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, |
36 | | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
37 | | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
38 | | USE OR OTHER DEALINGS IN THE SOFTWARE. |
39 | | */ |
40 | | |
41 | | #include "expat_config.h" |
42 | | |
43 | | #include <stddef.h> |
44 | | |
45 | | #ifdef _WIN32 |
46 | | # include "winconfig.h" |
47 | | #endif |
48 | | |
49 | | #include "expat_external.h" |
50 | | #include "internal.h" |
51 | | #include "xmlrole.h" |
52 | | #include "ascii.h" |
53 | | |
54 | | /* Doesn't check: |
55 | | |
56 | | that ,| are not mixed in a model group |
57 | | content of literals |
58 | | |
59 | | */ |
60 | | |
61 | | static const char KW_ANY[] = {ASCII_A, ASCII_N, ASCII_Y, '\0'}; |
62 | | static const char KW_ATTLIST[] |
63 | | = {ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0'}; |
64 | | static const char KW_CDATA[] |
65 | | = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; |
66 | | static const char KW_DOCTYPE[] |
67 | | = {ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0'}; |
68 | | static const char KW_ELEMENT[] |
69 | | = {ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0'}; |
70 | | static const char KW_EMPTY[] |
71 | | = {ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0'}; |
72 | | static const char KW_ENTITIES[] = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, |
73 | | ASCII_I, ASCII_E, ASCII_S, '\0'}; |
74 | | static const char KW_ENTITY[] |
75 | | = {ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0'}; |
76 | | static const char KW_FIXED[] |
77 | | = {ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0'}; |
78 | | static const char KW_ID[] = {ASCII_I, ASCII_D, '\0'}; |
79 | | static const char KW_IDREF[] |
80 | | = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0'}; |
81 | | static const char KW_IDREFS[] |
82 | | = {ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0'}; |
83 | | #ifdef XML_DTD |
84 | | static const char KW_IGNORE[] |
85 | | = {ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0'}; |
86 | | #endif |
87 | | static const char KW_IMPLIED[] |
88 | | = {ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0'}; |
89 | | #ifdef XML_DTD |
90 | | static const char KW_INCLUDE[] |
91 | | = {ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0'}; |
92 | | #endif |
93 | | static const char KW_NDATA[] |
94 | | = {ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; |
95 | | static const char KW_NMTOKEN[] |
96 | | = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0'}; |
97 | | static const char KW_NMTOKENS[] = {ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, |
98 | | ASCII_E, ASCII_N, ASCII_S, '\0'}; |
99 | | static const char KW_NOTATION[] = {ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, |
100 | | ASCII_I, ASCII_O, ASCII_N, '\0'}; |
101 | | static const char KW_PCDATA[] |
102 | | = {ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0'}; |
103 | | static const char KW_PUBLIC[] |
104 | | = {ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0'}; |
105 | | static const char KW_REQUIRED[] = {ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, |
106 | | ASCII_R, ASCII_E, ASCII_D, '\0'}; |
107 | | static const char KW_SYSTEM[] |
108 | | = {ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0'}; |
109 | | |
110 | | #ifndef MIN_BYTES_PER_CHAR |
111 | | # define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) |
112 | | #endif |
113 | | |
114 | | #ifdef XML_DTD |
115 | | # define setTopLevel(state) \ |
116 | 29.6k | ((state)->handler \ |
117 | 29.6k | = ((state)->documentEntity ? internalSubset : externalSubset1)) |
118 | | #else /* not XML_DTD */ |
119 | | # define setTopLevel(state) ((state)->handler = internalSubset) |
120 | | #endif /* not XML_DTD */ |
121 | | |
122 | | typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state, int tok, |
123 | | const char *ptr, const char *end, |
124 | | const ENCODING *enc); |
125 | | |
126 | | static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2, |
127 | | doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2, |
128 | | entity3, entity4, entity5, entity6, entity7, entity8, entity9, entity10, |
129 | | notation0, notation1, notation2, notation3, notation4, attlist0, attlist1, |
130 | | attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8, |
131 | | attlist9, element0, element1, element2, element3, element4, element5, |
132 | | element6, element7, |
133 | | #ifdef XML_DTD |
134 | | externalSubset0, externalSubset1, condSect0, condSect1, condSect2, |
135 | | #endif /* XML_DTD */ |
136 | | declClose, error; |
137 | | |
138 | | static int FASTCALL common(PROLOG_STATE *state, int tok); |
139 | | |
140 | | static int PTRCALL |
141 | | prolog0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
142 | 34.7k | const ENCODING *enc) { |
143 | 34.7k | switch (tok) { |
144 | 3.23k | case XML_TOK_PROLOG_S: |
145 | 3.23k | state->handler = prolog1; |
146 | 3.23k | return XML_ROLE_NONE; |
147 | 576 | case XML_TOK_XML_DECL: |
148 | 576 | state->handler = prolog1; |
149 | 576 | return XML_ROLE_XML_DECL; |
150 | 387 | case XML_TOK_PI: |
151 | 387 | state->handler = prolog1; |
152 | 387 | return XML_ROLE_PI; |
153 | 69 | case XML_TOK_COMMENT: |
154 | 69 | state->handler = prolog1; |
155 | 69 | return XML_ROLE_COMMENT; |
156 | 7.18k | case XML_TOK_BOM: |
157 | 7.18k | return XML_ROLE_NONE; |
158 | 8.37k | case XML_TOK_DECL_OPEN: |
159 | 8.37k | if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, |
160 | 8.37k | KW_DOCTYPE)) |
161 | 1.38k | break; |
162 | 6.98k | state->handler = doctype0; |
163 | 6.98k | return XML_ROLE_DOCTYPE_NONE; |
164 | 10.4k | case XML_TOK_INSTANCE_START: |
165 | 10.4k | state->handler = error; |
166 | 10.4k | return XML_ROLE_INSTANCE_START; |
167 | 34.7k | } |
168 | 5.82k | return common(state, tok); |
169 | 34.7k | } |
170 | | |
171 | | static int PTRCALL |
172 | | prolog1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
173 | 7.88k | const ENCODING *enc) { |
174 | 7.88k | switch (tok) { |
175 | 1.03k | case XML_TOK_PROLOG_S: |
176 | 1.03k | return XML_ROLE_NONE; |
177 | 3.58k | case XML_TOK_PI: |
178 | 3.58k | return XML_ROLE_PI; |
179 | 815 | case XML_TOK_COMMENT: |
180 | 815 | return XML_ROLE_COMMENT; |
181 | 0 | case XML_TOK_BOM: |
182 | | /* This case can never arise. To reach this role function, the |
183 | | * parse must have passed through prolog0 and therefore have had |
184 | | * some form of input, even if only a space. At that point, a |
185 | | * byte order mark is no longer a valid character (though |
186 | | * technically it should be interpreted as a non-breaking space), |
187 | | * so will be rejected by the tokenizing stages. |
188 | | */ |
189 | 0 | return XML_ROLE_NONE; /* LCOV_EXCL_LINE */ |
190 | 1.01k | case XML_TOK_DECL_OPEN: |
191 | 1.01k | if (! XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, |
192 | 1.01k | KW_DOCTYPE)) |
193 | 108 | break; |
194 | 908 | state->handler = doctype0; |
195 | 908 | return XML_ROLE_DOCTYPE_NONE; |
196 | 1.15k | case XML_TOK_INSTANCE_START: |
197 | 1.15k | state->handler = error; |
198 | 1.15k | return XML_ROLE_INSTANCE_START; |
199 | 7.88k | } |
200 | 381 | return common(state, tok); |
201 | 7.88k | } |
202 | | |
203 | | static int PTRCALL |
204 | | prolog2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
205 | 3.78k | const ENCODING *enc) { |
206 | 3.78k | UNUSED_P(ptr); |
207 | 3.78k | UNUSED_P(end); |
208 | 3.78k | UNUSED_P(enc); |
209 | 3.78k | switch (tok) { |
210 | 364 | case XML_TOK_PROLOG_S: |
211 | 364 | return XML_ROLE_NONE; |
212 | 270 | case XML_TOK_PI: |
213 | 270 | return XML_ROLE_PI; |
214 | 76 | case XML_TOK_COMMENT: |
215 | 76 | return XML_ROLE_COMMENT; |
216 | 3.05k | case XML_TOK_INSTANCE_START: |
217 | 3.05k | state->handler = error; |
218 | 3.05k | return XML_ROLE_INSTANCE_START; |
219 | 3.78k | } |
220 | 20 | return common(state, tok); |
221 | 3.78k | } |
222 | | |
223 | | static int PTRCALL |
224 | | doctype0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
225 | 15.7k | const ENCODING *enc) { |
226 | 15.7k | UNUSED_P(ptr); |
227 | 15.7k | UNUSED_P(end); |
228 | 15.7k | UNUSED_P(enc); |
229 | 15.7k | switch (tok) { |
230 | 7.88k | case XML_TOK_PROLOG_S: |
231 | 7.88k | return XML_ROLE_DOCTYPE_NONE; |
232 | 7.70k | case XML_TOK_NAME: |
233 | 7.71k | case XML_TOK_PREFIXED_NAME: |
234 | 7.71k | state->handler = doctype1; |
235 | 7.71k | return XML_ROLE_DOCTYPE_NAME; |
236 | 15.7k | } |
237 | 131 | return common(state, tok); |
238 | 15.7k | } |
239 | | |
240 | | static int PTRCALL |
241 | | doctype1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
242 | 9.56k | const ENCODING *enc) { |
243 | 9.56k | switch (tok) { |
244 | 1.86k | case XML_TOK_PROLOG_S: |
245 | 1.86k | return XML_ROLE_DOCTYPE_NONE; |
246 | 6.52k | case XML_TOK_OPEN_BRACKET: |
247 | 6.52k | state->handler = internalSubset; |
248 | 6.52k | return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; |
249 | 61 | case XML_TOK_DECL_CLOSE: |
250 | 61 | state->handler = prolog2; |
251 | 61 | return XML_ROLE_DOCTYPE_CLOSE; |
252 | 1.10k | case XML_TOK_NAME: |
253 | 1.10k | if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { |
254 | 26 | state->handler = doctype3; |
255 | 26 | return XML_ROLE_DOCTYPE_NONE; |
256 | 26 | } |
257 | 1.07k | if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { |
258 | 1.06k | state->handler = doctype2; |
259 | 1.06k | return XML_ROLE_DOCTYPE_NONE; |
260 | 1.06k | } |
261 | 17 | break; |
262 | 9.56k | } |
263 | 24 | return common(state, tok); |
264 | 9.56k | } |
265 | | |
266 | | static int PTRCALL |
267 | | doctype2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
268 | 2.11k | const ENCODING *enc) { |
269 | 2.11k | UNUSED_P(ptr); |
270 | 2.11k | UNUSED_P(end); |
271 | 2.11k | UNUSED_P(enc); |
272 | 2.11k | switch (tok) { |
273 | 1.06k | case XML_TOK_PROLOG_S: |
274 | 1.06k | return XML_ROLE_DOCTYPE_NONE; |
275 | 1.04k | case XML_TOK_LITERAL: |
276 | 1.04k | state->handler = doctype3; |
277 | 1.04k | return XML_ROLE_DOCTYPE_PUBLIC_ID; |
278 | 2.11k | } |
279 | 13 | return common(state, tok); |
280 | 2.11k | } |
281 | | |
282 | | static int PTRCALL |
283 | | doctype3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
284 | 340 | const ENCODING *enc) { |
285 | 340 | UNUSED_P(ptr); |
286 | 340 | UNUSED_P(end); |
287 | 340 | UNUSED_P(enc); |
288 | 340 | switch (tok) { |
289 | 184 | case XML_TOK_PROLOG_S: |
290 | 184 | return XML_ROLE_DOCTYPE_NONE; |
291 | 97 | case XML_TOK_LITERAL: |
292 | 97 | state->handler = doctype4; |
293 | 97 | return XML_ROLE_DOCTYPE_SYSTEM_ID; |
294 | 340 | } |
295 | 59 | return common(state, tok); |
296 | 340 | } |
297 | | |
298 | | static int PTRCALL |
299 | | doctype4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
300 | 109 | const ENCODING *enc) { |
301 | 109 | UNUSED_P(ptr); |
302 | 109 | UNUSED_P(end); |
303 | 109 | UNUSED_P(enc); |
304 | 109 | switch (tok) { |
305 | 62 | case XML_TOK_PROLOG_S: |
306 | 62 | return XML_ROLE_DOCTYPE_NONE; |
307 | 13 | case XML_TOK_OPEN_BRACKET: |
308 | 13 | state->handler = internalSubset; |
309 | 13 | return XML_ROLE_DOCTYPE_INTERNAL_SUBSET; |
310 | 7 | case XML_TOK_DECL_CLOSE: |
311 | 7 | state->handler = prolog2; |
312 | 7 | return XML_ROLE_DOCTYPE_CLOSE; |
313 | 109 | } |
314 | 27 | return common(state, tok); |
315 | 109 | } |
316 | | |
317 | | static int PTRCALL |
318 | | doctype5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
319 | 3.20k | const ENCODING *enc) { |
320 | 3.20k | UNUSED_P(ptr); |
321 | 3.20k | UNUSED_P(end); |
322 | 3.20k | UNUSED_P(enc); |
323 | 3.20k | switch (tok) { |
324 | 146 | case XML_TOK_PROLOG_S: |
325 | 146 | return XML_ROLE_DOCTYPE_NONE; |
326 | 3.04k | case XML_TOK_DECL_CLOSE: |
327 | 3.04k | state->handler = prolog2; |
328 | 3.04k | return XML_ROLE_DOCTYPE_CLOSE; |
329 | 3.20k | } |
330 | 10 | return common(state, tok); |
331 | 3.20k | } |
332 | | |
333 | | static int PTRCALL |
334 | | internalSubset(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
335 | 62.7k | const ENCODING *enc) { |
336 | 62.7k | switch (tok) { |
337 | 5.77k | case XML_TOK_PROLOG_S: |
338 | 5.77k | return XML_ROLE_NONE; |
339 | 37.3k | case XML_TOK_DECL_OPEN: |
340 | 37.3k | if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, |
341 | 37.3k | KW_ENTITY)) { |
342 | 11.5k | state->handler = entity0; |
343 | 11.5k | return XML_ROLE_ENTITY_NONE; |
344 | 11.5k | } |
345 | 25.8k | if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, |
346 | 25.8k | KW_ATTLIST)) { |
347 | 12.9k | state->handler = attlist0; |
348 | 12.9k | return XML_ROLE_ATTLIST_NONE; |
349 | 12.9k | } |
350 | 12.8k | if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, |
351 | 12.8k | KW_ELEMENT)) { |
352 | 6.27k | state->handler = element0; |
353 | 6.27k | return XML_ROLE_ELEMENT_NONE; |
354 | 6.27k | } |
355 | 6.60k | if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, |
356 | 6.60k | KW_NOTATION)) { |
357 | 2.33k | state->handler = notation0; |
358 | 2.33k | return XML_ROLE_NOTATION_NONE; |
359 | 2.33k | } |
360 | 4.26k | break; |
361 | 5.53k | case XML_TOK_PI: |
362 | 5.53k | return XML_ROLE_PI; |
363 | 570 | case XML_TOK_COMMENT: |
364 | 570 | return XML_ROLE_COMMENT; |
365 | 2.77k | case XML_TOK_PARAM_ENTITY_REF: |
366 | 2.77k | return XML_ROLE_PARAM_ENTITY_REF; |
367 | 3.06k | case XML_TOK_CLOSE_BRACKET: |
368 | 3.06k | state->handler = doctype5; |
369 | 3.06k | return XML_ROLE_DOCTYPE_NONE; |
370 | 0 | case XML_TOK_NONE: |
371 | 0 | return XML_ROLE_NONE; |
372 | 62.7k | } |
373 | 11.9k | return common(state, tok); |
374 | 62.7k | } |
375 | | |
376 | | #ifdef XML_DTD |
377 | | |
378 | | static int PTRCALL |
379 | | externalSubset0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
380 | 15.8k | const ENCODING *enc) { |
381 | 15.8k | state->handler = externalSubset1; |
382 | 15.8k | if (tok == XML_TOK_XML_DECL) |
383 | 443 | return XML_ROLE_TEXT_DECL; |
384 | 15.4k | return externalSubset1(state, tok, ptr, end, enc); |
385 | 15.8k | } |
386 | | |
387 | | static int PTRCALL |
388 | | externalSubset1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
389 | 22.8k | const ENCODING *enc) { |
390 | 22.8k | switch (tok) { |
391 | 2.03k | case XML_TOK_COND_SECT_OPEN: |
392 | 2.03k | state->handler = condSect0; |
393 | 2.03k | return XML_ROLE_NONE; |
394 | 57 | case XML_TOK_COND_SECT_CLOSE: |
395 | 57 | if (state->includeLevel == 0) |
396 | 8 | break; |
397 | 49 | state->includeLevel -= 1; |
398 | 49 | return XML_ROLE_NONE; |
399 | 2.99k | case XML_TOK_PROLOG_S: |
400 | 2.99k | return XML_ROLE_NONE; |
401 | 247 | case XML_TOK_CLOSE_BRACKET: |
402 | 247 | break; |
403 | 355 | case XML_TOK_NONE: |
404 | 355 | if (state->includeLevel) |
405 | 13 | break; |
406 | 342 | return XML_ROLE_NONE; |
407 | 17.1k | default: |
408 | 17.1k | return internalSubset(state, tok, ptr, end, enc); |
409 | 22.8k | } |
410 | 268 | return common(state, tok); |
411 | 22.8k | } |
412 | | |
413 | | #endif /* XML_DTD */ |
414 | | |
415 | | static int PTRCALL |
416 | | entity0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
417 | 23.4k | const ENCODING *enc) { |
418 | 23.4k | UNUSED_P(ptr); |
419 | 23.4k | UNUSED_P(end); |
420 | 23.4k | UNUSED_P(enc); |
421 | 23.4k | switch (tok) { |
422 | 11.6k | case XML_TOK_PROLOG_S: |
423 | 11.6k | return XML_ROLE_ENTITY_NONE; |
424 | 2.73k | case XML_TOK_PERCENT: |
425 | 2.73k | state->handler = entity1; |
426 | 2.73k | return XML_ROLE_ENTITY_NONE; |
427 | 8.47k | case XML_TOK_NAME: |
428 | 8.47k | state->handler = entity2; |
429 | 8.47k | return XML_ROLE_GENERAL_ENTITY_NAME; |
430 | 23.4k | } |
431 | 578 | return common(state, tok); |
432 | 23.4k | } |
433 | | |
434 | | static int PTRCALL |
435 | | entity1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
436 | 5.69k | const ENCODING *enc) { |
437 | 5.69k | UNUSED_P(ptr); |
438 | 5.69k | UNUSED_P(end); |
439 | 5.69k | UNUSED_P(enc); |
440 | 5.69k | switch (tok) { |
441 | 2.71k | case XML_TOK_PROLOG_S: |
442 | 2.71k | return XML_ROLE_ENTITY_NONE; |
443 | 2.68k | case XML_TOK_NAME: |
444 | 2.68k | state->handler = entity7; |
445 | 2.68k | return XML_ROLE_PARAM_ENTITY_NAME; |
446 | 5.69k | } |
447 | 293 | return common(state, tok); |
448 | 5.69k | } |
449 | | |
450 | | static int PTRCALL |
451 | | entity2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
452 | 17.1k | const ENCODING *enc) { |
453 | 17.1k | switch (tok) { |
454 | 8.54k | case XML_TOK_PROLOG_S: |
455 | 8.54k | return XML_ROLE_ENTITY_NONE; |
456 | 1.33k | case XML_TOK_NAME: |
457 | 1.33k | if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { |
458 | 270 | state->handler = entity4; |
459 | 270 | return XML_ROLE_ENTITY_NONE; |
460 | 270 | } |
461 | 1.06k | if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { |
462 | 1.05k | state->handler = entity3; |
463 | 1.05k | return XML_ROLE_ENTITY_NONE; |
464 | 1.05k | } |
465 | 6 | break; |
466 | 7.01k | case XML_TOK_LITERAL: |
467 | 7.01k | state->handler = declClose; |
468 | 7.01k | state->role_none = XML_ROLE_ENTITY_NONE; |
469 | 7.01k | return XML_ROLE_ENTITY_VALUE; |
470 | 17.1k | } |
471 | 248 | return common(state, tok); |
472 | 17.1k | } |
473 | | |
474 | | static int PTRCALL |
475 | | entity3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
476 | 2.56k | const ENCODING *enc) { |
477 | 2.56k | UNUSED_P(ptr); |
478 | 2.56k | UNUSED_P(end); |
479 | 2.56k | UNUSED_P(enc); |
480 | 2.56k | switch (tok) { |
481 | 1.20k | case XML_TOK_PROLOG_S: |
482 | 1.20k | return XML_ROLE_ENTITY_NONE; |
483 | 1.02k | case XML_TOK_LITERAL: |
484 | 1.02k | state->handler = entity4; |
485 | 1.02k | return XML_ROLE_ENTITY_PUBLIC_ID; |
486 | 2.56k | } |
487 | 330 | return common(state, tok); |
488 | 2.56k | } |
489 | | |
490 | | static int PTRCALL |
491 | | entity4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
492 | 2.76k | const ENCODING *enc) { |
493 | 2.76k | UNUSED_P(ptr); |
494 | 2.76k | UNUSED_P(end); |
495 | 2.76k | UNUSED_P(enc); |
496 | 2.76k | switch (tok) { |
497 | 1.29k | case XML_TOK_PROLOG_S: |
498 | 1.29k | return XML_ROLE_ENTITY_NONE; |
499 | 1.22k | case XML_TOK_LITERAL: |
500 | 1.22k | state->handler = entity5; |
501 | 1.22k | return XML_ROLE_ENTITY_SYSTEM_ID; |
502 | 2.76k | } |
503 | 254 | return common(state, tok); |
504 | 2.76k | } |
505 | | |
506 | | static int PTRCALL |
507 | | entity5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
508 | 2.34k | const ENCODING *enc) { |
509 | 2.34k | switch (tok) { |
510 | 841 | case XML_TOK_PROLOG_S: |
511 | 841 | return XML_ROLE_ENTITY_NONE; |
512 | 1.01k | case XML_TOK_DECL_CLOSE: |
513 | 1.01k | setTopLevel(state); |
514 | 1.01k | return XML_ROLE_ENTITY_COMPLETE; |
515 | 190 | case XML_TOK_NAME: |
516 | 190 | if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) { |
517 | 181 | state->handler = entity6; |
518 | 181 | return XML_ROLE_ENTITY_NONE; |
519 | 181 | } |
520 | 9 | break; |
521 | 2.34k | } |
522 | 309 | return common(state, tok); |
523 | 2.34k | } |
524 | | |
525 | | static int PTRCALL |
526 | | entity6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
527 | 1.29k | const ENCODING *enc) { |
528 | 1.29k | UNUSED_P(ptr); |
529 | 1.29k | UNUSED_P(end); |
530 | 1.29k | UNUSED_P(enc); |
531 | 1.29k | switch (tok) { |
532 | 522 | case XML_TOK_PROLOG_S: |
533 | 522 | return XML_ROLE_ENTITY_NONE; |
534 | 160 | case XML_TOK_NAME: |
535 | 160 | state->handler = declClose; |
536 | 160 | state->role_none = XML_ROLE_ENTITY_NONE; |
537 | 160 | return XML_ROLE_ENTITY_NOTATION_NAME; |
538 | 1.29k | } |
539 | 617 | return common(state, tok); |
540 | 1.29k | } |
541 | | |
542 | | static int PTRCALL |
543 | | entity7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
544 | 5.59k | const ENCODING *enc) { |
545 | 5.59k | switch (tok) { |
546 | 2.69k | case XML_TOK_PROLOG_S: |
547 | 2.69k | return XML_ROLE_ENTITY_NONE; |
548 | 627 | case XML_TOK_NAME: |
549 | 627 | if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { |
550 | 93 | state->handler = entity9; |
551 | 93 | return XML_ROLE_ENTITY_NONE; |
552 | 93 | } |
553 | 534 | if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { |
554 | 528 | state->handler = entity8; |
555 | 528 | return XML_ROLE_ENTITY_NONE; |
556 | 528 | } |
557 | 6 | break; |
558 | 2.01k | case XML_TOK_LITERAL: |
559 | 2.01k | state->handler = declClose; |
560 | 2.01k | state->role_none = XML_ROLE_ENTITY_NONE; |
561 | 2.01k | return XML_ROLE_ENTITY_VALUE; |
562 | 5.59k | } |
563 | 270 | return common(state, tok); |
564 | 5.59k | } |
565 | | |
566 | | static int PTRCALL |
567 | | entity8(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
568 | 1.53k | const ENCODING *enc) { |
569 | 1.53k | UNUSED_P(ptr); |
570 | 1.53k | UNUSED_P(end); |
571 | 1.53k | UNUSED_P(enc); |
572 | 1.53k | switch (tok) { |
573 | 677 | case XML_TOK_PROLOG_S: |
574 | 677 | return XML_ROLE_ENTITY_NONE; |
575 | 503 | case XML_TOK_LITERAL: |
576 | 503 | state->handler = entity9; |
577 | 503 | return XML_ROLE_ENTITY_PUBLIC_ID; |
578 | 1.53k | } |
579 | 352 | return common(state, tok); |
580 | 1.53k | } |
581 | | |
582 | | static int PTRCALL |
583 | | entity9(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
584 | 1.85k | const ENCODING *enc) { |
585 | 1.85k | UNUSED_P(ptr); |
586 | 1.85k | UNUSED_P(end); |
587 | 1.85k | UNUSED_P(enc); |
588 | 1.85k | switch (tok) { |
589 | 833 | case XML_TOK_PROLOG_S: |
590 | 833 | return XML_ROLE_ENTITY_NONE; |
591 | 566 | case XML_TOK_LITERAL: |
592 | 566 | state->handler = entity10; |
593 | 566 | return XML_ROLE_ENTITY_SYSTEM_ID; |
594 | 1.85k | } |
595 | 452 | return common(state, tok); |
596 | 1.85k | } |
597 | | |
598 | | static int PTRCALL |
599 | | entity10(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
600 | 1.60k | const ENCODING *enc) { |
601 | 1.60k | UNUSED_P(ptr); |
602 | 1.60k | UNUSED_P(end); |
603 | 1.60k | UNUSED_P(enc); |
604 | 1.60k | switch (tok) { |
605 | 736 | case XML_TOK_PROLOG_S: |
606 | 736 | return XML_ROLE_ENTITY_NONE; |
607 | 543 | case XML_TOK_DECL_CLOSE: |
608 | 543 | setTopLevel(state); |
609 | 543 | return XML_ROLE_ENTITY_COMPLETE; |
610 | 1.60k | } |
611 | 329 | return common(state, tok); |
612 | 1.60k | } |
613 | | |
614 | | static int PTRCALL |
615 | | notation0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
616 | 5.43k | const ENCODING *enc) { |
617 | 5.43k | UNUSED_P(ptr); |
618 | 5.43k | UNUSED_P(end); |
619 | 5.43k | UNUSED_P(enc); |
620 | 5.43k | switch (tok) { |
621 | 2.60k | case XML_TOK_PROLOG_S: |
622 | 2.60k | return XML_ROLE_NOTATION_NONE; |
623 | 2.30k | case XML_TOK_NAME: |
624 | 2.30k | state->handler = notation1; |
625 | 2.30k | return XML_ROLE_NOTATION_NAME; |
626 | 5.43k | } |
627 | 515 | return common(state, tok); |
628 | 5.43k | } |
629 | | |
630 | | static int PTRCALL |
631 | | notation1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
632 | 5.34k | const ENCODING *enc) { |
633 | 5.34k | switch (tok) { |
634 | 2.57k | case XML_TOK_PROLOG_S: |
635 | 2.57k | return XML_ROLE_NOTATION_NONE; |
636 | 2.28k | case XML_TOK_NAME: |
637 | 2.28k | if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { |
638 | 354 | state->handler = notation3; |
639 | 354 | return XML_ROLE_NOTATION_NONE; |
640 | 354 | } |
641 | 1.92k | if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { |
642 | 1.92k | state->handler = notation2; |
643 | 1.92k | return XML_ROLE_NOTATION_NONE; |
644 | 1.92k | } |
645 | 7 | break; |
646 | 5.34k | } |
647 | 496 | return common(state, tok); |
648 | 5.34k | } |
649 | | |
650 | | static int PTRCALL |
651 | | notation2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
652 | 3.88k | const ENCODING *enc) { |
653 | 3.88k | UNUSED_P(ptr); |
654 | 3.88k | UNUSED_P(end); |
655 | 3.88k | UNUSED_P(enc); |
656 | 3.88k | switch (tok) { |
657 | 1.92k | case XML_TOK_PROLOG_S: |
658 | 1.92k | return XML_ROLE_NOTATION_NONE; |
659 | 1.90k | case XML_TOK_LITERAL: |
660 | 1.90k | state->handler = notation4; |
661 | 1.90k | return XML_ROLE_NOTATION_PUBLIC_ID; |
662 | 3.88k | } |
663 | 53 | return common(state, tok); |
664 | 3.88k | } |
665 | | |
666 | | static int PTRCALL |
667 | | notation3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
668 | 1.48k | const ENCODING *enc) { |
669 | 1.48k | UNUSED_P(ptr); |
670 | 1.48k | UNUSED_P(end); |
671 | 1.48k | UNUSED_P(enc); |
672 | 1.48k | switch (tok) { |
673 | 660 | case XML_TOK_PROLOG_S: |
674 | 660 | return XML_ROLE_NOTATION_NONE; |
675 | 338 | case XML_TOK_LITERAL: |
676 | 338 | state->handler = declClose; |
677 | 338 | state->role_none = XML_ROLE_NOTATION_NONE; |
678 | 338 | return XML_ROLE_NOTATION_SYSTEM_ID; |
679 | 1.48k | } |
680 | 482 | return common(state, tok); |
681 | 1.48k | } |
682 | | |
683 | | static int PTRCALL |
684 | | notation4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
685 | 3.01k | const ENCODING *enc) { |
686 | 3.01k | UNUSED_P(ptr); |
687 | 3.01k | UNUSED_P(end); |
688 | 3.01k | UNUSED_P(enc); |
689 | 3.01k | switch (tok) { |
690 | 1.03k | case XML_TOK_PROLOG_S: |
691 | 1.03k | return XML_ROLE_NOTATION_NONE; |
692 | 413 | case XML_TOK_LITERAL: |
693 | 413 | state->handler = declClose; |
694 | 413 | state->role_none = XML_ROLE_NOTATION_NONE; |
695 | 413 | return XML_ROLE_NOTATION_SYSTEM_ID; |
696 | 1.44k | case XML_TOK_DECL_CLOSE: |
697 | 1.44k | setTopLevel(state); |
698 | 1.44k | return XML_ROLE_NOTATION_NO_SYSTEM_ID; |
699 | 3.01k | } |
700 | 117 | return common(state, tok); |
701 | 3.01k | } |
702 | | |
703 | | static int PTRCALL |
704 | | attlist0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
705 | 26.3k | const ENCODING *enc) { |
706 | 26.3k | UNUSED_P(ptr); |
707 | 26.3k | UNUSED_P(end); |
708 | 26.3k | UNUSED_P(enc); |
709 | 26.3k | switch (tok) { |
710 | 13.0k | case XML_TOK_PROLOG_S: |
711 | 13.0k | return XML_ROLE_ATTLIST_NONE; |
712 | 11.5k | case XML_TOK_NAME: |
713 | 12.8k | case XML_TOK_PREFIXED_NAME: |
714 | 12.8k | state->handler = attlist1; |
715 | 12.8k | return XML_ROLE_ATTLIST_ELEMENT_NAME; |
716 | 26.3k | } |
717 | 410 | return common(state, tok); |
718 | 26.3k | } |
719 | | |
720 | | static int PTRCALL |
721 | | attlist1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
722 | 131k | const ENCODING *enc) { |
723 | 131k | UNUSED_P(ptr); |
724 | 131k | UNUSED_P(end); |
725 | 131k | UNUSED_P(enc); |
726 | 131k | switch (tok) { |
727 | 60.0k | case XML_TOK_PROLOG_S: |
728 | 60.0k | return XML_ROLE_ATTLIST_NONE; |
729 | 11.7k | case XML_TOK_DECL_CLOSE: |
730 | 11.7k | setTopLevel(state); |
731 | 11.7k | return XML_ROLE_ATTLIST_NONE; |
732 | 59.2k | case XML_TOK_NAME: |
733 | 59.6k | case XML_TOK_PREFIXED_NAME: |
734 | 59.6k | state->handler = attlist2; |
735 | 59.6k | return XML_ROLE_ATTRIBUTE_NAME; |
736 | 131k | } |
737 | 307 | return common(state, tok); |
738 | 131k | } |
739 | | |
740 | | static int PTRCALL |
741 | | attlist2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
742 | 119k | const ENCODING *enc) { |
743 | 119k | switch (tok) { |
744 | 59.6k | case XML_TOK_PROLOG_S: |
745 | 59.6k | return XML_ROLE_ATTLIST_NONE; |
746 | 8.99k | case XML_TOK_NAME: { |
747 | 8.99k | static const char *const types[] = { |
748 | 8.99k | KW_CDATA, KW_ID, KW_IDREF, KW_IDREFS, |
749 | 8.99k | KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS, |
750 | 8.99k | }; |
751 | 8.99k | int i; |
752 | 40.2k | for (i = 0; i < (int)(sizeof(types) / sizeof(types[0])); i++) |
753 | 39.4k | if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { |
754 | 8.26k | state->handler = attlist8; |
755 | 8.26k | return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; |
756 | 8.26k | } |
757 | 8.99k | } |
758 | 731 | if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) { |
759 | 677 | state->handler = attlist5; |
760 | 677 | return XML_ROLE_ATTLIST_NONE; |
761 | 677 | } |
762 | 54 | break; |
763 | 50.5k | case XML_TOK_OPEN_PAREN: |
764 | 50.5k | state->handler = attlist3; |
765 | 50.5k | return XML_ROLE_ATTLIST_NONE; |
766 | 119k | } |
767 | 312 | return common(state, tok); |
768 | 119k | } |
769 | | |
770 | | static int PTRCALL |
771 | | attlist3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
772 | 79.6k | const ENCODING *enc) { |
773 | 79.6k | UNUSED_P(ptr); |
774 | 79.6k | UNUSED_P(end); |
775 | 79.6k | UNUSED_P(enc); |
776 | 79.6k | switch (tok) { |
777 | 654 | case XML_TOK_PROLOG_S: |
778 | 654 | return XML_ROLE_ATTLIST_NONE; |
779 | 17.6k | case XML_TOK_NMTOKEN: |
780 | 76.9k | case XML_TOK_NAME: |
781 | 78.7k | case XML_TOK_PREFIXED_NAME: |
782 | 78.7k | state->handler = attlist4; |
783 | 78.7k | return XML_ROLE_ATTRIBUTE_ENUM_VALUE; |
784 | 79.6k | } |
785 | 233 | return common(state, tok); |
786 | 79.6k | } |
787 | | |
788 | | static int PTRCALL |
789 | | attlist4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
790 | 79.2k | const ENCODING *enc) { |
791 | 79.2k | UNUSED_P(ptr); |
792 | 79.2k | UNUSED_P(end); |
793 | 79.2k | UNUSED_P(enc); |
794 | 79.2k | switch (tok) { |
795 | 420 | case XML_TOK_PROLOG_S: |
796 | 420 | return XML_ROLE_ATTLIST_NONE; |
797 | 50.1k | case XML_TOK_CLOSE_PAREN: |
798 | 50.1k | state->handler = attlist8; |
799 | 50.1k | return XML_ROLE_ATTLIST_NONE; |
800 | 28.4k | case XML_TOK_OR: |
801 | 28.4k | state->handler = attlist3; |
802 | 28.4k | return XML_ROLE_ATTLIST_NONE; |
803 | 79.2k | } |
804 | 223 | return common(state, tok); |
805 | 79.2k | } |
806 | | |
807 | | static int PTRCALL |
808 | | attlist5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
809 | 1.67k | const ENCODING *enc) { |
810 | 1.67k | UNUSED_P(ptr); |
811 | 1.67k | UNUSED_P(end); |
812 | 1.67k | UNUSED_P(enc); |
813 | 1.67k | switch (tok) { |
814 | 777 | case XML_TOK_PROLOG_S: |
815 | 777 | return XML_ROLE_ATTLIST_NONE; |
816 | 663 | case XML_TOK_OPEN_PAREN: |
817 | 663 | state->handler = attlist6; |
818 | 663 | return XML_ROLE_ATTLIST_NONE; |
819 | 1.67k | } |
820 | 233 | return common(state, tok); |
821 | 1.67k | } |
822 | | |
823 | | static int PTRCALL |
824 | | attlist6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
825 | 2.01k | const ENCODING *enc) { |
826 | 2.01k | UNUSED_P(ptr); |
827 | 2.01k | UNUSED_P(end); |
828 | 2.01k | UNUSED_P(enc); |
829 | 2.01k | switch (tok) { |
830 | 577 | case XML_TOK_PROLOG_S: |
831 | 577 | return XML_ROLE_ATTLIST_NONE; |
832 | 1.05k | case XML_TOK_NAME: |
833 | 1.05k | state->handler = attlist7; |
834 | 1.05k | return XML_ROLE_ATTRIBUTE_NOTATION_VALUE; |
835 | 2.01k | } |
836 | 377 | return common(state, tok); |
837 | 2.01k | } |
838 | | |
839 | | static int PTRCALL |
840 | | attlist7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
841 | 1.79k | const ENCODING *enc) { |
842 | 1.79k | UNUSED_P(ptr); |
843 | 1.79k | UNUSED_P(end); |
844 | 1.79k | UNUSED_P(enc); |
845 | 1.79k | switch (tok) { |
846 | 323 | case XML_TOK_PROLOG_S: |
847 | 323 | return XML_ROLE_ATTLIST_NONE; |
848 | 599 | case XML_TOK_CLOSE_PAREN: |
849 | 599 | state->handler = attlist8; |
850 | 599 | return XML_ROLE_ATTLIST_NONE; |
851 | 436 | case XML_TOK_OR: |
852 | 436 | state->handler = attlist6; |
853 | 436 | return XML_ROLE_ATTLIST_NONE; |
854 | 1.79k | } |
855 | 438 | return common(state, tok); |
856 | 1.79k | } |
857 | | |
858 | | /* default value */ |
859 | | static int PTRCALL |
860 | | attlist8(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
861 | 118k | const ENCODING *enc) { |
862 | 118k | switch (tok) { |
863 | 59.0k | case XML_TOK_PROLOG_S: |
864 | 59.0k | return XML_ROLE_ATTLIST_NONE; |
865 | 51.8k | case XML_TOK_POUND_NAME: |
866 | 51.8k | if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, |
867 | 51.8k | KW_IMPLIED)) { |
868 | 49.0k | state->handler = attlist1; |
869 | 49.0k | return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE; |
870 | 49.0k | } |
871 | 2.87k | if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, |
872 | 2.87k | KW_REQUIRED)) { |
873 | 1.80k | state->handler = attlist1; |
874 | 1.80k | return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE; |
875 | 1.80k | } |
876 | 1.07k | if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, |
877 | 1.07k | KW_FIXED)) { |
878 | 1.05k | state->handler = attlist9; |
879 | 1.05k | return XML_ROLE_ATTLIST_NONE; |
880 | 1.05k | } |
881 | 18 | break; |
882 | 7.01k | case XML_TOK_LITERAL: |
883 | 7.01k | state->handler = attlist1; |
884 | 7.01k | return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE; |
885 | 118k | } |
886 | 265 | return common(state, tok); |
887 | 118k | } |
888 | | |
889 | | static int PTRCALL |
890 | | attlist9(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
891 | 2.20k | const ENCODING *enc) { |
892 | 2.20k | UNUSED_P(ptr); |
893 | 2.20k | UNUSED_P(end); |
894 | 2.20k | UNUSED_P(enc); |
895 | 2.20k | switch (tok) { |
896 | 1.08k | case XML_TOK_PROLOG_S: |
897 | 1.08k | return XML_ROLE_ATTLIST_NONE; |
898 | 1.01k | case XML_TOK_LITERAL: |
899 | 1.01k | state->handler = attlist1; |
900 | 1.01k | return XML_ROLE_FIXED_ATTRIBUTE_VALUE; |
901 | 2.20k | } |
902 | 102 | return common(state, tok); |
903 | 2.20k | } |
904 | | |
905 | | static int PTRCALL |
906 | | element0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
907 | 12.8k | const ENCODING *enc) { |
908 | 12.8k | UNUSED_P(ptr); |
909 | 12.8k | UNUSED_P(end); |
910 | 12.8k | UNUSED_P(enc); |
911 | 12.8k | switch (tok) { |
912 | 6.37k | case XML_TOK_PROLOG_S: |
913 | 6.37k | return XML_ROLE_ELEMENT_NONE; |
914 | 5.98k | case XML_TOK_NAME: |
915 | 6.24k | case XML_TOK_PREFIXED_NAME: |
916 | 6.24k | state->handler = element1; |
917 | 6.24k | return XML_ROLE_ELEMENT_NAME; |
918 | 12.8k | } |
919 | 231 | return common(state, tok); |
920 | 12.8k | } |
921 | | |
922 | | static int PTRCALL |
923 | | element1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
924 | 12.6k | const ENCODING *enc) { |
925 | 12.6k | switch (tok) { |
926 | 6.23k | case XML_TOK_PROLOG_S: |
927 | 6.23k | return XML_ROLE_ELEMENT_NONE; |
928 | 249 | case XML_TOK_NAME: |
929 | 249 | if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) { |
930 | 46 | state->handler = declClose; |
931 | 46 | state->role_none = XML_ROLE_ELEMENT_NONE; |
932 | 46 | return XML_ROLE_CONTENT_EMPTY; |
933 | 46 | } |
934 | 203 | if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) { |
935 | 195 | state->handler = declClose; |
936 | 195 | state->role_none = XML_ROLE_ELEMENT_NONE; |
937 | 195 | return XML_ROLE_CONTENT_ANY; |
938 | 195 | } |
939 | 8 | break; |
940 | 5.97k | case XML_TOK_OPEN_PAREN: |
941 | 5.97k | state->handler = element2; |
942 | 5.97k | state->level = 1; |
943 | 5.97k | return XML_ROLE_GROUP_OPEN; |
944 | 12.6k | } |
945 | 244 | return common(state, tok); |
946 | 12.6k | } |
947 | | |
948 | | static int PTRCALL |
949 | | element2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
950 | 7.36k | const ENCODING *enc) { |
951 | 7.36k | switch (tok) { |
952 | 742 | case XML_TOK_PROLOG_S: |
953 | 742 | return XML_ROLE_ELEMENT_NONE; |
954 | 2.15k | case XML_TOK_POUND_NAME: |
955 | 2.15k | if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, |
956 | 2.15k | KW_PCDATA)) { |
957 | 2.15k | state->handler = element3; |
958 | 2.15k | return XML_ROLE_CONTENT_PCDATA; |
959 | 2.15k | } |
960 | 4 | break; |
961 | 539 | case XML_TOK_OPEN_PAREN: |
962 | 539 | state->level = 2; |
963 | 539 | state->handler = element6; |
964 | 539 | return XML_ROLE_GROUP_OPEN; |
965 | 2.00k | case XML_TOK_NAME: |
966 | 2.02k | case XML_TOK_PREFIXED_NAME: |
967 | 2.02k | state->handler = element7; |
968 | 2.02k | return XML_ROLE_CONTENT_ELEMENT; |
969 | 834 | case XML_TOK_NAME_QUESTION: |
970 | 834 | state->handler = element7; |
971 | 834 | return XML_ROLE_CONTENT_ELEMENT_OPT; |
972 | 92 | case XML_TOK_NAME_ASTERISK: |
973 | 92 | state->handler = element7; |
974 | 92 | return XML_ROLE_CONTENT_ELEMENT_REP; |
975 | 299 | case XML_TOK_NAME_PLUS: |
976 | 299 | state->handler = element7; |
977 | 299 | return XML_ROLE_CONTENT_ELEMENT_PLUS; |
978 | 7.36k | } |
979 | 682 | return common(state, tok); |
980 | 7.36k | } |
981 | | |
982 | | static int PTRCALL |
983 | | element3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
984 | 3.07k | const ENCODING *enc) { |
985 | 3.07k | UNUSED_P(ptr); |
986 | 3.07k | UNUSED_P(end); |
987 | 3.07k | UNUSED_P(enc); |
988 | 3.07k | switch (tok) { |
989 | 526 | case XML_TOK_PROLOG_S: |
990 | 526 | return XML_ROLE_ELEMENT_NONE; |
991 | 1.76k | case XML_TOK_CLOSE_PAREN: |
992 | 1.76k | state->handler = declClose; |
993 | 1.76k | state->role_none = XML_ROLE_ELEMENT_NONE; |
994 | 1.76k | return XML_ROLE_GROUP_CLOSE; |
995 | 84 | case XML_TOK_CLOSE_PAREN_ASTERISK: |
996 | 84 | state->handler = declClose; |
997 | 84 | state->role_none = XML_ROLE_ELEMENT_NONE; |
998 | 84 | return XML_ROLE_GROUP_CLOSE_REP; |
999 | 277 | case XML_TOK_OR: |
1000 | 277 | state->handler = element4; |
1001 | 277 | return XML_ROLE_ELEMENT_NONE; |
1002 | 3.07k | } |
1003 | 424 | return common(state, tok); |
1004 | 3.07k | } |
1005 | | |
1006 | | static int PTRCALL |
1007 | | element4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1008 | 1.55k | const ENCODING *enc) { |
1009 | 1.55k | UNUSED_P(ptr); |
1010 | 1.55k | UNUSED_P(end); |
1011 | 1.55k | UNUSED_P(enc); |
1012 | 1.55k | switch (tok) { |
1013 | 257 | case XML_TOK_PROLOG_S: |
1014 | 257 | return XML_ROLE_ELEMENT_NONE; |
1015 | 709 | case XML_TOK_NAME: |
1016 | 974 | case XML_TOK_PREFIXED_NAME: |
1017 | 974 | state->handler = element5; |
1018 | 974 | return XML_ROLE_CONTENT_ELEMENT; |
1019 | 1.55k | } |
1020 | 325 | return common(state, tok); |
1021 | 1.55k | } |
1022 | | |
1023 | | static int PTRCALL |
1024 | | element5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1025 | 1.65k | const ENCODING *enc) { |
1026 | 1.65k | UNUSED_P(ptr); |
1027 | 1.65k | UNUSED_P(end); |
1028 | 1.65k | UNUSED_P(enc); |
1029 | 1.65k | switch (tok) { |
1030 | 489 | case XML_TOK_PROLOG_S: |
1031 | 489 | return XML_ROLE_ELEMENT_NONE; |
1032 | 214 | case XML_TOK_CLOSE_PAREN_ASTERISK: |
1033 | 214 | state->handler = declClose; |
1034 | 214 | state->role_none = XML_ROLE_ELEMENT_NONE; |
1035 | 214 | return XML_ROLE_GROUP_CLOSE_REP; |
1036 | 738 | case XML_TOK_OR: |
1037 | 738 | state->handler = element4; |
1038 | 738 | return XML_ROLE_ELEMENT_NONE; |
1039 | 1.65k | } |
1040 | 214 | return common(state, tok); |
1041 | 1.65k | } |
1042 | | |
1043 | | static int PTRCALL |
1044 | | element6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1045 | 6.32M | const ENCODING *enc) { |
1046 | 6.32M | UNUSED_P(ptr); |
1047 | 6.32M | UNUSED_P(end); |
1048 | 6.32M | UNUSED_P(enc); |
1049 | 6.32M | switch (tok) { |
1050 | 123k | case XML_TOK_PROLOG_S: |
1051 | 123k | return XML_ROLE_ELEMENT_NONE; |
1052 | 5.84M | case XML_TOK_OPEN_PAREN: |
1053 | 5.84M | state->level += 1; |
1054 | 5.84M | return XML_ROLE_GROUP_OPEN; |
1055 | 237k | case XML_TOK_NAME: |
1056 | 239k | case XML_TOK_PREFIXED_NAME: |
1057 | 239k | state->handler = element7; |
1058 | 239k | return XML_ROLE_CONTENT_ELEMENT; |
1059 | 5.06k | case XML_TOK_NAME_QUESTION: |
1060 | 5.06k | state->handler = element7; |
1061 | 5.06k | return XML_ROLE_CONTENT_ELEMENT_OPT; |
1062 | 11.1k | case XML_TOK_NAME_ASTERISK: |
1063 | 11.1k | state->handler = element7; |
1064 | 11.1k | return XML_ROLE_CONTENT_ELEMENT_REP; |
1065 | 98.2k | case XML_TOK_NAME_PLUS: |
1066 | 98.2k | state->handler = element7; |
1067 | 98.2k | return XML_ROLE_CONTENT_ELEMENT_PLUS; |
1068 | 6.32M | } |
1069 | 325 | return common(state, tok); |
1070 | 6.32M | } |
1071 | | |
1072 | | static int PTRCALL |
1073 | | element7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1074 | 746k | const ENCODING *enc) { |
1075 | 746k | UNUSED_P(ptr); |
1076 | 746k | UNUSED_P(end); |
1077 | 746k | UNUSED_P(enc); |
1078 | 746k | switch (tok) { |
1079 | 6.11k | case XML_TOK_PROLOG_S: |
1080 | 6.11k | return XML_ROLE_ELEMENT_NONE; |
1081 | 162k | case XML_TOK_CLOSE_PAREN: |
1082 | 162k | state->level -= 1; |
1083 | 162k | if (state->level == 0) { |
1084 | 1.54k | state->handler = declClose; |
1085 | 1.54k | state->role_none = XML_ROLE_ELEMENT_NONE; |
1086 | 1.54k | } |
1087 | 162k | return XML_ROLE_GROUP_CLOSE; |
1088 | 5.73k | case XML_TOK_CLOSE_PAREN_ASTERISK: |
1089 | 5.73k | state->level -= 1; |
1090 | 5.73k | if (state->level == 0) { |
1091 | 1.08k | state->handler = declClose; |
1092 | 1.08k | state->role_none = XML_ROLE_ELEMENT_NONE; |
1093 | 1.08k | } |
1094 | 5.73k | return XML_ROLE_GROUP_CLOSE_REP; |
1095 | 81.2k | case XML_TOK_CLOSE_PAREN_QUESTION: |
1096 | 81.2k | state->level -= 1; |
1097 | 81.2k | if (state->level == 0) { |
1098 | 386 | state->handler = declClose; |
1099 | 386 | state->role_none = XML_ROLE_ELEMENT_NONE; |
1100 | 386 | } |
1101 | 81.2k | return XML_ROLE_GROUP_CLOSE_OPT; |
1102 | 137k | case XML_TOK_CLOSE_PAREN_PLUS: |
1103 | 137k | state->level -= 1; |
1104 | 137k | if (state->level == 0) { |
1105 | 226 | state->handler = declClose; |
1106 | 226 | state->role_none = XML_ROLE_ELEMENT_NONE; |
1107 | 226 | } |
1108 | 137k | return XML_ROLE_GROUP_CLOSE_PLUS; |
1109 | 235k | case XML_TOK_COMMA: |
1110 | 235k | state->handler = element6; |
1111 | 235k | return XML_ROLE_GROUP_SEQUENCE; |
1112 | 117k | case XML_TOK_OR: |
1113 | 117k | state->handler = element6; |
1114 | 117k | return XML_ROLE_GROUP_CHOICE; |
1115 | 746k | } |
1116 | 312 | return common(state, tok); |
1117 | 746k | } |
1118 | | |
1119 | | #ifdef XML_DTD |
1120 | | |
1121 | | static int PTRCALL |
1122 | | condSect0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1123 | 2.38k | const ENCODING *enc) { |
1124 | 2.38k | switch (tok) { |
1125 | 328 | case XML_TOK_PROLOG_S: |
1126 | 328 | return XML_ROLE_NONE; |
1127 | 1.46k | case XML_TOK_NAME: |
1128 | 1.46k | if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) { |
1129 | 119 | state->handler = condSect1; |
1130 | 119 | return XML_ROLE_NONE; |
1131 | 119 | } |
1132 | 1.35k | if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) { |
1133 | 1.07k | state->handler = condSect2; |
1134 | 1.07k | return XML_ROLE_NONE; |
1135 | 1.07k | } |
1136 | 273 | break; |
1137 | 2.38k | } |
1138 | 863 | return common(state, tok); |
1139 | 2.38k | } |
1140 | | |
1141 | | static int PTRCALL |
1142 | | condSect1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1143 | 980 | const ENCODING *enc) { |
1144 | 980 | UNUSED_P(ptr); |
1145 | 980 | UNUSED_P(end); |
1146 | 980 | UNUSED_P(enc); |
1147 | 980 | switch (tok) { |
1148 | 370 | case XML_TOK_PROLOG_S: |
1149 | 370 | return XML_ROLE_NONE; |
1150 | 109 | case XML_TOK_OPEN_BRACKET: |
1151 | 109 | state->handler = externalSubset1; |
1152 | 109 | state->includeLevel += 1; |
1153 | 109 | return XML_ROLE_NONE; |
1154 | 980 | } |
1155 | 501 | return common(state, tok); |
1156 | 980 | } |
1157 | | |
1158 | | static int PTRCALL |
1159 | | condSect2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1160 | 1.76k | const ENCODING *enc) { |
1161 | 1.76k | UNUSED_P(ptr); |
1162 | 1.76k | UNUSED_P(end); |
1163 | 1.76k | UNUSED_P(enc); |
1164 | 1.76k | switch (tok) { |
1165 | 275 | case XML_TOK_PROLOG_S: |
1166 | 275 | return XML_ROLE_NONE; |
1167 | 1.05k | case XML_TOK_OPEN_BRACKET: |
1168 | 1.05k | state->handler = externalSubset1; |
1169 | 1.05k | return XML_ROLE_IGNORE_SECT; |
1170 | 1.76k | } |
1171 | 435 | return common(state, tok); |
1172 | 1.76k | } |
1173 | | |
1174 | | #endif /* XML_DTD */ |
1175 | | |
1176 | | static int PTRCALL |
1177 | | declClose(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1178 | 16.5k | const ENCODING *enc) { |
1179 | 16.5k | UNUSED_P(ptr); |
1180 | 16.5k | UNUSED_P(end); |
1181 | 16.5k | UNUSED_P(enc); |
1182 | 16.5k | switch (tok) { |
1183 | 1.08k | case XML_TOK_PROLOG_S: |
1184 | 1.08k | return state->role_none; |
1185 | 14.8k | case XML_TOK_DECL_CLOSE: |
1186 | 14.8k | setTopLevel(state); |
1187 | 14.8k | return state->role_none; |
1188 | 16.5k | } |
1189 | 577 | return common(state, tok); |
1190 | 16.5k | } |
1191 | | |
1192 | | /* This function will only be invoked if the internal logic of the |
1193 | | * parser has broken down. It is used in two cases: |
1194 | | * |
1195 | | * 1: When the XML prolog has been finished. At this point the |
1196 | | * processor (the parser level above these role handlers) should |
1197 | | * switch from prologProcessor to contentProcessor and reinitialise |
1198 | | * the handler function. |
1199 | | * |
1200 | | * 2: When an error has been detected (via common() below). At this |
1201 | | * point again the processor should be switched to errorProcessor, |
1202 | | * which will never call a handler. |
1203 | | * |
1204 | | * The result of this is that error() can only be called if the |
1205 | | * processor switch failed to happen, which is an internal error and |
1206 | | * therefore we shouldn't be able to provoke it simply by using the |
1207 | | * library. It is a necessary backstop, however, so we merely exclude |
1208 | | * it from the coverage statistics. |
1209 | | * |
1210 | | * LCOV_EXCL_START |
1211 | | */ |
1212 | | static int PTRCALL |
1213 | | error(PROLOG_STATE *state, int tok, const char *ptr, const char *end, |
1214 | 0 | const ENCODING *enc) { |
1215 | 0 | UNUSED_P(state); |
1216 | 0 | UNUSED_P(tok); |
1217 | 0 | UNUSED_P(ptr); |
1218 | 0 | UNUSED_P(end); |
1219 | 0 | UNUSED_P(enc); |
1220 | 0 | return XML_ROLE_NONE; |
1221 | 0 | } |
1222 | | /* LCOV_EXCL_STOP */ |
1223 | | |
1224 | | static int FASTCALL |
1225 | 32.4k | common(PROLOG_STATE *state, int tok) { |
1226 | 32.4k | #ifdef XML_DTD |
1227 | 32.4k | if (! state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) |
1228 | 11.6k | return XML_ROLE_INNER_PARAM_ENTITY_REF; |
1229 | | #else |
1230 | | UNUSED_P(tok); |
1231 | | #endif |
1232 | 20.7k | state->handler = error; |
1233 | 20.7k | return XML_ROLE_ERROR; |
1234 | 32.4k | } |
1235 | | |
1236 | | void |
1237 | 103k | XmlPrologStateInit(PROLOG_STATE *state) { |
1238 | 103k | state->handler = prolog0; |
1239 | 103k | #ifdef XML_DTD |
1240 | 103k | state->documentEntity = 1; |
1241 | 103k | state->includeLevel = 0; |
1242 | 103k | state->inEntityValue = 0; |
1243 | 103k | #endif /* XML_DTD */ |
1244 | 103k | } |
1245 | | |
1246 | | #ifdef XML_DTD |
1247 | | |
1248 | | void |
1249 | 21.3k | XmlPrologStateInitExternalEntity(PROLOG_STATE *state) { |
1250 | 21.3k | state->handler = externalSubset0; |
1251 | 21.3k | state->documentEntity = 0; |
1252 | 21.3k | state->includeLevel = 0; |
1253 | 21.3k | } |
1254 | | |
1255 | | #endif /* XML_DTD */ |