/src/mozilla-central/parser/html/nsHtml5Tokenizer.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2005-2007 Henri Sivonen |
3 | | * Copyright (c) 2007-2015 Mozilla Foundation |
4 | | * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla |
5 | | * Foundation, and Opera Software ASA. |
6 | | * |
7 | | * Permission is hereby granted, free of charge, to any person obtaining a |
8 | | * copy of this software and associated documentation files (the "Software"), |
9 | | * to deal in the Software without restriction, including without limitation |
10 | | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
11 | | * and/or sell copies of the Software, and to permit persons to whom the |
12 | | * Software is furnished to do so, subject to the following conditions: |
13 | | * |
14 | | * The above copyright notice and this permission notice shall be included in |
15 | | * all copies or substantial portions of the Software. |
16 | | * |
17 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
18 | | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
19 | | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
20 | | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
21 | | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
22 | | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
23 | | * DEALINGS IN THE SOFTWARE. |
24 | | */ |
25 | | |
26 | | /* |
27 | | * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. |
28 | | * Please edit Tokenizer.java instead and regenerate. |
29 | | */ |
30 | | |
31 | | #ifndef nsHtml5Tokenizer_h |
32 | | #define nsHtml5Tokenizer_h |
33 | | |
34 | | #include "nsAtom.h" |
35 | | #include "nsHtml5AtomTable.h" |
36 | | #include "nsHtml5String.h" |
37 | | #include "nsIContent.h" |
38 | | #include "nsTraceRefcnt.h" |
39 | | #include "jArray.h" |
40 | | #include "nsHtml5DocumentMode.h" |
41 | | #include "nsHtml5ArrayCopy.h" |
42 | | #include "nsHtml5NamedCharacters.h" |
43 | | #include "nsHtml5NamedCharactersAccel.h" |
44 | | #include "nsGkAtoms.h" |
45 | | #include "nsAHtml5TreeBuilderState.h" |
46 | | #include "nsHtml5Macros.h" |
47 | | #include "nsHtml5Highlighter.h" |
48 | | #include "nsHtml5TokenizerLoopPolicies.h" |
49 | | |
50 | | class nsHtml5StreamParser; |
51 | | |
52 | | class nsHtml5AttributeName; |
53 | | class nsHtml5ElementName; |
54 | | class nsHtml5TreeBuilder; |
55 | | class nsHtml5MetaScanner; |
56 | | class nsHtml5UTF16Buffer; |
57 | | class nsHtml5StateSnapshot; |
58 | | class nsHtml5Portability; |
59 | | |
60 | | class nsHtml5Tokenizer |
61 | | { |
62 | | private: |
63 | | static const int32_t DATA_AND_RCDATA_MASK = ~1; |
64 | | |
65 | | public: |
66 | | static const int32_t DATA = 0; |
67 | | |
68 | | static const int32_t RCDATA = 1; |
69 | | |
70 | | static const int32_t SCRIPT_DATA = 2; |
71 | | |
72 | | static const int32_t RAWTEXT = 3; |
73 | | |
74 | | static const int32_t SCRIPT_DATA_ESCAPED = 4; |
75 | | |
76 | | static const int32_t ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5; |
77 | | |
78 | | static const int32_t ATTRIBUTE_VALUE_SINGLE_QUOTED = 6; |
79 | | |
80 | | static const int32_t ATTRIBUTE_VALUE_UNQUOTED = 7; |
81 | | |
82 | | static const int32_t PLAINTEXT = 8; |
83 | | |
84 | | static const int32_t TAG_OPEN = 9; |
85 | | |
86 | | static const int32_t CLOSE_TAG_OPEN = 10; |
87 | | |
88 | | static const int32_t TAG_NAME = 11; |
89 | | |
90 | | static const int32_t BEFORE_ATTRIBUTE_NAME = 12; |
91 | | |
92 | | static const int32_t ATTRIBUTE_NAME = 13; |
93 | | |
94 | | static const int32_t AFTER_ATTRIBUTE_NAME = 14; |
95 | | |
96 | | static const int32_t BEFORE_ATTRIBUTE_VALUE = 15; |
97 | | |
98 | | static const int32_t AFTER_ATTRIBUTE_VALUE_QUOTED = 16; |
99 | | |
100 | | static const int32_t BOGUS_COMMENT = 17; |
101 | | |
102 | | static const int32_t MARKUP_DECLARATION_OPEN = 18; |
103 | | |
104 | | static const int32_t DOCTYPE = 19; |
105 | | |
106 | | static const int32_t BEFORE_DOCTYPE_NAME = 20; |
107 | | |
108 | | static const int32_t DOCTYPE_NAME = 21; |
109 | | |
110 | | static const int32_t AFTER_DOCTYPE_NAME = 22; |
111 | | |
112 | | static const int32_t BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23; |
113 | | |
114 | | static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24; |
115 | | |
116 | | static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25; |
117 | | |
118 | | static const int32_t AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26; |
119 | | |
120 | | static const int32_t BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27; |
121 | | |
122 | | static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28; |
123 | | |
124 | | static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29; |
125 | | |
126 | | static const int32_t AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30; |
127 | | |
128 | | static const int32_t BOGUS_DOCTYPE = 31; |
129 | | |
130 | | static const int32_t COMMENT_START = 32; |
131 | | |
132 | | static const int32_t COMMENT_START_DASH = 33; |
133 | | |
134 | | static const int32_t COMMENT = 34; |
135 | | |
136 | | static const int32_t COMMENT_END_DASH = 35; |
137 | | |
138 | | static const int32_t COMMENT_END = 36; |
139 | | |
140 | | static const int32_t COMMENT_END_BANG = 37; |
141 | | |
142 | | static const int32_t NON_DATA_END_TAG_NAME = 38; |
143 | | |
144 | | static const int32_t MARKUP_DECLARATION_HYPHEN = 39; |
145 | | |
146 | | static const int32_t MARKUP_DECLARATION_OCTYPE = 40; |
147 | | |
148 | | static const int32_t DOCTYPE_UBLIC = 41; |
149 | | |
150 | | static const int32_t DOCTYPE_YSTEM = 42; |
151 | | |
152 | | static const int32_t AFTER_DOCTYPE_PUBLIC_KEYWORD = 43; |
153 | | |
154 | | static const int32_t BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44; |
155 | | |
156 | | static const int32_t AFTER_DOCTYPE_SYSTEM_KEYWORD = 45; |
157 | | |
158 | | static const int32_t CONSUME_CHARACTER_REFERENCE = 46; |
159 | | |
160 | | static const int32_t CONSUME_NCR = 47; |
161 | | |
162 | | static const int32_t CHARACTER_REFERENCE_TAIL = 48; |
163 | | |
164 | | static const int32_t HEX_NCR_LOOP = 49; |
165 | | |
166 | | static const int32_t DECIMAL_NRC_LOOP = 50; |
167 | | |
168 | | static const int32_t HANDLE_NCR_VALUE = 51; |
169 | | |
170 | | static const int32_t HANDLE_NCR_VALUE_RECONSUME = 52; |
171 | | |
172 | | static const int32_t CHARACTER_REFERENCE_HILO_LOOKUP = 53; |
173 | | |
174 | | static const int32_t SELF_CLOSING_START_TAG = 54; |
175 | | |
176 | | static const int32_t CDATA_START = 55; |
177 | | |
178 | | static const int32_t CDATA_SECTION = 56; |
179 | | |
180 | | static const int32_t CDATA_RSQB = 57; |
181 | | |
182 | | static const int32_t CDATA_RSQB_RSQB = 58; |
183 | | |
184 | | static const int32_t SCRIPT_DATA_LESS_THAN_SIGN = 59; |
185 | | |
186 | | static const int32_t SCRIPT_DATA_ESCAPE_START = 60; |
187 | | |
188 | | static const int32_t SCRIPT_DATA_ESCAPE_START_DASH = 61; |
189 | | |
190 | | static const int32_t SCRIPT_DATA_ESCAPED_DASH = 62; |
191 | | |
192 | | static const int32_t SCRIPT_DATA_ESCAPED_DASH_DASH = 63; |
193 | | |
194 | | static const int32_t BOGUS_COMMENT_HYPHEN = 64; |
195 | | |
196 | | static const int32_t RAWTEXT_RCDATA_LESS_THAN_SIGN = 65; |
197 | | |
198 | | static const int32_t SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66; |
199 | | |
200 | | static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_START = 67; |
201 | | |
202 | | static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED = 68; |
203 | | |
204 | | static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69; |
205 | | |
206 | | static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70; |
207 | | |
208 | | static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71; |
209 | | |
210 | | static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_END = 72; |
211 | | |
212 | | static const int32_t PROCESSING_INSTRUCTION = 73; |
213 | | |
214 | | static const int32_t PROCESSING_INSTRUCTION_QUESTION_MARK = 74; |
215 | | |
216 | | private: |
217 | | static const int32_t LEAD_OFFSET = (0xD800 - (0x10000 >> 10)); |
218 | | |
219 | | static char16_t LT_GT[]; |
220 | | static char16_t LT_SOLIDUS[]; |
221 | | static char16_t RSQB_RSQB[]; |
222 | | static char16_t REPLACEMENT_CHARACTER[]; |
223 | | static char16_t LF[]; |
224 | | static char16_t CDATA_LSQB[]; |
225 | | static char16_t OCTYPE[]; |
226 | | static char16_t UBLIC[]; |
227 | | static char16_t YSTEM[]; |
228 | | static staticJArray<char16_t, int32_t> TITLE_ARR; |
229 | | static staticJArray<char16_t, int32_t> SCRIPT_ARR; |
230 | | static staticJArray<char16_t, int32_t> STYLE_ARR; |
231 | | static staticJArray<char16_t, int32_t> PLAINTEXT_ARR; |
232 | | static staticJArray<char16_t, int32_t> XMP_ARR; |
233 | | static staticJArray<char16_t, int32_t> TEXTAREA_ARR; |
234 | | static staticJArray<char16_t, int32_t> IFRAME_ARR; |
235 | | static staticJArray<char16_t, int32_t> NOEMBED_ARR; |
236 | | static staticJArray<char16_t, int32_t> NOSCRIPT_ARR; |
237 | | static staticJArray<char16_t, int32_t> NOFRAMES_ARR; |
238 | | |
239 | | protected: |
240 | | nsHtml5TreeBuilder* tokenHandler; |
241 | | nsHtml5StreamParser* encodingDeclarationHandler; |
242 | | bool lastCR; |
243 | | int32_t stateSave; |
244 | | |
245 | | private: |
246 | | int32_t returnStateSave; |
247 | | |
248 | | protected: |
249 | | int32_t index; |
250 | | |
251 | | private: |
252 | | bool forceQuirks; |
253 | | char16_t additional; |
254 | | int32_t entCol; |
255 | | int32_t firstCharKey; |
256 | | int32_t lo; |
257 | | int32_t hi; |
258 | | int32_t candidate; |
259 | | int32_t charRefBufMark; |
260 | | |
261 | | protected: |
262 | | int32_t value; |
263 | | |
264 | | private: |
265 | | bool seenDigits; |
266 | | |
267 | | protected: |
268 | | int32_t cstart; |
269 | | |
270 | | private: |
271 | | nsHtml5String publicId; |
272 | | nsHtml5String systemId; |
273 | | autoJArray<char16_t, int32_t> strBuf; |
274 | | int32_t strBufLen; |
275 | | autoJArray<char16_t, int32_t> charRefBuf; |
276 | | int32_t charRefBufLen; |
277 | | autoJArray<char16_t, int32_t> bmpChar; |
278 | | autoJArray<char16_t, int32_t> astralChar; |
279 | | |
280 | | protected: |
281 | | nsHtml5ElementName* endTagExpectation; |
282 | | |
283 | | private: |
284 | | jArray<char16_t, int32_t> endTagExpectationAsArray; |
285 | | |
286 | | protected: |
287 | | bool endTag; |
288 | | |
289 | | private: |
290 | | bool containsHyphen; |
291 | | nsHtml5ElementName* tagName; |
292 | | nsHtml5ElementName* nonInternedTagName; |
293 | | |
294 | | protected: |
295 | | nsHtml5AttributeName* attributeName; |
296 | | |
297 | | private: |
298 | | nsHtml5AttributeName* nonInternedAttributeName; |
299 | | nsAtom* doctypeName; |
300 | | nsHtml5String publicIdentifier; |
301 | | nsHtml5String systemIdentifier; |
302 | | nsHtml5HtmlAttributes* attributes; |
303 | | bool newAttributesEachTime; |
304 | | bool shouldSuspend; |
305 | | |
306 | | protected: |
307 | | bool confident; |
308 | | |
309 | | private: |
310 | | int32_t line; |
311 | | int32_t attributeLine; |
312 | | nsHtml5AtomTable* interner; |
313 | | bool viewingXmlSource; |
314 | | |
315 | | public: |
316 | | nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource); |
317 | | void setInterner(nsHtml5AtomTable* interner); |
318 | | void initLocation(nsHtml5String newPublicId, nsHtml5String newSystemId); |
319 | | bool isViewingXmlSource(); |
320 | | void setStateAndEndTagExpectation(int32_t specialTokenizerState, |
321 | | nsAtom* endTagExpectation); |
322 | | void setStateAndEndTagExpectation(int32_t specialTokenizerState, |
323 | | nsHtml5ElementName* endTagExpectation); |
324 | | |
325 | | private: |
326 | | void endTagExpectationToArray(); |
327 | | |
328 | | public: |
329 | | void setLineNumber(int32_t line); |
330 | 0 | inline int32_t getLineNumber() { return line; } |
331 | | |
332 | | nsHtml5HtmlAttributes* emptyAttributes(); |
333 | | |
334 | | private: |
335 | | inline void appendCharRefBuf(char16_t c) |
336 | 0 | { |
337 | 0 | MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length, |
338 | 0 | "Attempted to overrun charRefBuf!"); |
339 | 0 | charRefBuf[charRefBufLen++] = c; |
340 | 0 | } |
341 | | |
342 | | void emitOrAppendCharRefBuf(int32_t returnState); |
343 | 0 | inline void clearStrBufAfterUse() { strBufLen = 0; } |
344 | | |
345 | | inline void clearStrBufBeforeUse() |
346 | 0 | { |
347 | 0 | MOZ_ASSERT(!strBufLen, "strBufLen not reset after previous use!"); |
348 | 0 | strBufLen = 0; |
349 | 0 | } |
350 | | |
351 | | inline void clearStrBufAfterOneHyphen() |
352 | 0 | { |
353 | 0 | MOZ_ASSERT(strBufLen == 1, "strBufLen length not one!"); |
354 | 0 | MOZ_ASSERT(strBuf[0] == '-', "strBuf does not start with a hyphen!"); |
355 | 0 | strBufLen = 0; |
356 | 0 | } |
357 | | |
358 | | inline void appendStrBuf(char16_t c) |
359 | 0 | { |
360 | 0 | MOZ_ASSERT(strBufLen < strBuf.length, |
361 | 0 | "Previous buffer length insufficient."); |
362 | 0 | if (MOZ_UNLIKELY(strBufLen == strBuf.length)) { |
363 | 0 | if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) { |
364 | 0 | MOZ_CRASH("Unable to recover from buffer reallocation failure"); |
365 | 0 | } |
366 | 0 | } |
367 | 0 | strBuf[strBufLen++] = c; |
368 | 0 | } |
369 | | |
370 | | protected: |
371 | | nsHtml5String strBufToString(); |
372 | | |
373 | | private: |
374 | | void strBufToDoctypeName(); |
375 | | void emitStrBuf(); |
376 | 0 | inline void appendSecondHyphenToBogusComment() { appendStrBuf('-'); } |
377 | | |
378 | | inline void adjustDoubleHyphenAndAppendToStrBufAndErr(char16_t c) |
379 | 0 | { |
380 | 0 | errConsecutiveHyphens(); |
381 | 0 | appendStrBuf(c); |
382 | 0 | } |
383 | | |
384 | | void appendStrBuf(char16_t* buffer, int32_t offset, int32_t length); |
385 | | inline void appendCharRefBufToStrBuf() |
386 | 0 | { |
387 | 0 | appendStrBuf(charRefBuf, 0, charRefBufLen); |
388 | 0 | charRefBufLen = 0; |
389 | 0 | } |
390 | | |
391 | | void emitComment(int32_t provisionalHyphens, int32_t pos); |
392 | | |
393 | | protected: |
394 | | void flushChars(char16_t* buf, int32_t pos); |
395 | | |
396 | | private: |
397 | | void strBufToElementNameString(); |
398 | | int32_t emitCurrentTagToken(bool selfClosing, int32_t pos); |
399 | | void attributeNameComplete(); |
400 | | void addAttributeWithoutValue(); |
401 | | void addAttributeWithValue(); |
402 | | |
403 | | public: |
404 | | void start(); |
405 | | bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer); |
406 | | |
407 | | private: |
408 | | template<class P> |
409 | | int32_t stateLoop(int32_t state, |
410 | | char16_t c, |
411 | | int32_t pos, |
412 | | char16_t* buf, |
413 | | bool reconsume, |
414 | | int32_t returnState, |
415 | | int32_t endPos); |
416 | | void initDoctypeFields(); |
417 | | inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() |
418 | 0 | { |
419 | 0 | silentCarriageReturn(); |
420 | 0 | adjustDoubleHyphenAndAppendToStrBufAndErr('\n'); |
421 | 0 | } |
422 | | |
423 | | inline void adjustDoubleHyphenAndAppendToStrBufLineFeed() |
424 | 0 | { |
425 | 0 | silentLineFeed(); |
426 | 0 | adjustDoubleHyphenAndAppendToStrBufAndErr('\n'); |
427 | 0 | } |
428 | | |
429 | | inline void appendStrBufLineFeed() |
430 | 0 | { |
431 | 0 | silentLineFeed(); |
432 | 0 | appendStrBuf('\n'); |
433 | 0 | } |
434 | | |
435 | | inline void appendStrBufCarriageReturn() |
436 | 0 | { |
437 | 0 | silentCarriageReturn(); |
438 | 0 | appendStrBuf('\n'); |
439 | 0 | } |
440 | | |
441 | | protected: |
442 | | inline void silentCarriageReturn() |
443 | 0 | { |
444 | 0 | ++line; |
445 | 0 | lastCR = true; |
446 | 0 | } |
447 | | |
448 | 0 | inline void silentLineFeed() { ++line; } |
449 | | |
450 | | private: |
451 | | void emitCarriageReturn(char16_t* buf, int32_t pos); |
452 | | void emitReplacementCharacter(char16_t* buf, int32_t pos); |
453 | | void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos); |
454 | | void setAdditionalAndRememberAmpersandLocation(char16_t add); |
455 | | void bogusDoctype(); |
456 | | void bogusDoctypeWithoutQuirks(); |
457 | | void handleNcrValue(int32_t returnState); |
458 | | |
459 | | public: |
460 | | void eof(); |
461 | | |
462 | | private: |
463 | | void emitDoctypeToken(int32_t pos); |
464 | | |
465 | | protected: |
466 | 0 | inline char16_t checkChar(char16_t* buf, int32_t pos) { return buf[pos]; } |
467 | | |
468 | | public: |
469 | | bool internalEncodingDeclaration(nsHtml5String internalCharset); |
470 | | |
471 | | private: |
472 | | void emitOrAppendTwo(const char16_t* val, int32_t returnState); |
473 | | void emitOrAppendOne(const char16_t* val, int32_t returnState); |
474 | | |
475 | | public: |
476 | | void end(); |
477 | | void requestSuspension(); |
478 | | bool isInDataState(); |
479 | | void resetToDataState(); |
480 | | void loadState(nsHtml5Tokenizer* other); |
481 | | void initializeWithoutStarting(); |
482 | | void setEncodingDeclarationHandler( |
483 | | nsHtml5StreamParser* encodingDeclarationHandler); |
484 | | ~nsHtml5Tokenizer(); |
485 | | static void initializeStatics(); |
486 | | static void releaseStatics(); |
487 | | |
488 | | #include "nsHtml5TokenizerHSupplement.h" |
489 | | }; |
490 | | |
491 | | #endif |