Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/parser/html/nsHtml5Tokenizer.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2005-2007 Henri Sivonen
3
 * Copyright (c) 2007-2015 Mozilla Foundation
4
 * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
5
 * Foundation, and Opera Software ASA.
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a
8
 * copy of this software and associated documentation files (the "Software"),
9
 * to deal in the Software without restriction, including without limitation
10
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11
 * and/or sell copies of the Software, and to permit persons to whom the
12
 * Software is furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included in
15
 * all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
 * DEALINGS IN THE SOFTWARE.
24
 */
25
26
/*
27
 * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
28
 * Please edit Tokenizer.java instead and regenerate.
29
 */
30
31
#ifndef nsHtml5Tokenizer_h
32
#define nsHtml5Tokenizer_h
33
34
#include "nsAtom.h"
35
#include "nsHtml5AtomTable.h"
36
#include "nsHtml5String.h"
37
#include "nsIContent.h"
38
#include "nsTraceRefcnt.h"
39
#include "jArray.h"
40
#include "nsHtml5DocumentMode.h"
41
#include "nsHtml5ArrayCopy.h"
42
#include "nsHtml5NamedCharacters.h"
43
#include "nsHtml5NamedCharactersAccel.h"
44
#include "nsGkAtoms.h"
45
#include "nsAHtml5TreeBuilderState.h"
46
#include "nsHtml5Macros.h"
47
#include "nsHtml5Highlighter.h"
48
#include "nsHtml5TokenizerLoopPolicies.h"
49
50
class nsHtml5StreamParser;
51
52
class nsHtml5AttributeName;
53
class nsHtml5ElementName;
54
class nsHtml5TreeBuilder;
55
class nsHtml5MetaScanner;
56
class nsHtml5UTF16Buffer;
57
class nsHtml5StateSnapshot;
58
class nsHtml5Portability;
59
60
class nsHtml5Tokenizer
61
{
62
private:
63
  static const int32_t DATA_AND_RCDATA_MASK = ~1;
64
65
public:
66
  static const int32_t DATA = 0;
67
68
  static const int32_t RCDATA = 1;
69
70
  static const int32_t SCRIPT_DATA = 2;
71
72
  static const int32_t RAWTEXT = 3;
73
74
  static const int32_t SCRIPT_DATA_ESCAPED = 4;
75
76
  static const int32_t ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5;
77
78
  static const int32_t ATTRIBUTE_VALUE_SINGLE_QUOTED = 6;
79
80
  static const int32_t ATTRIBUTE_VALUE_UNQUOTED = 7;
81
82
  static const int32_t PLAINTEXT = 8;
83
84
  static const int32_t TAG_OPEN = 9;
85
86
  static const int32_t CLOSE_TAG_OPEN = 10;
87
88
  static const int32_t TAG_NAME = 11;
89
90
  static const int32_t BEFORE_ATTRIBUTE_NAME = 12;
91
92
  static const int32_t ATTRIBUTE_NAME = 13;
93
94
  static const int32_t AFTER_ATTRIBUTE_NAME = 14;
95
96
  static const int32_t BEFORE_ATTRIBUTE_VALUE = 15;
97
98
  static const int32_t AFTER_ATTRIBUTE_VALUE_QUOTED = 16;
99
100
  static const int32_t BOGUS_COMMENT = 17;
101
102
  static const int32_t MARKUP_DECLARATION_OPEN = 18;
103
104
  static const int32_t DOCTYPE = 19;
105
106
  static const int32_t BEFORE_DOCTYPE_NAME = 20;
107
108
  static const int32_t DOCTYPE_NAME = 21;
109
110
  static const int32_t AFTER_DOCTYPE_NAME = 22;
111
112
  static const int32_t BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23;
113
114
  static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24;
115
116
  static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25;
117
118
  static const int32_t AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26;
119
120
  static const int32_t BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27;
121
122
  static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28;
123
124
  static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29;
125
126
  static const int32_t AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30;
127
128
  static const int32_t BOGUS_DOCTYPE = 31;
129
130
  static const int32_t COMMENT_START = 32;
131
132
  static const int32_t COMMENT_START_DASH = 33;
133
134
  static const int32_t COMMENT = 34;
135
136
  static const int32_t COMMENT_END_DASH = 35;
137
138
  static const int32_t COMMENT_END = 36;
139
140
  static const int32_t COMMENT_END_BANG = 37;
141
142
  static const int32_t NON_DATA_END_TAG_NAME = 38;
143
144
  static const int32_t MARKUP_DECLARATION_HYPHEN = 39;
145
146
  static const int32_t MARKUP_DECLARATION_OCTYPE = 40;
147
148
  static const int32_t DOCTYPE_UBLIC = 41;
149
150
  static const int32_t DOCTYPE_YSTEM = 42;
151
152
  static const int32_t AFTER_DOCTYPE_PUBLIC_KEYWORD = 43;
153
154
  static const int32_t BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44;
155
156
  static const int32_t AFTER_DOCTYPE_SYSTEM_KEYWORD = 45;
157
158
  static const int32_t CONSUME_CHARACTER_REFERENCE = 46;
159
160
  static const int32_t CONSUME_NCR = 47;
161
162
  static const int32_t CHARACTER_REFERENCE_TAIL = 48;
163
164
  static const int32_t HEX_NCR_LOOP = 49;
165
166
  static const int32_t DECIMAL_NRC_LOOP = 50;
167
168
  static const int32_t HANDLE_NCR_VALUE = 51;
169
170
  static const int32_t HANDLE_NCR_VALUE_RECONSUME = 52;
171
172
  static const int32_t CHARACTER_REFERENCE_HILO_LOOKUP = 53;
173
174
  static const int32_t SELF_CLOSING_START_TAG = 54;
175
176
  static const int32_t CDATA_START = 55;
177
178
  static const int32_t CDATA_SECTION = 56;
179
180
  static const int32_t CDATA_RSQB = 57;
181
182
  static const int32_t CDATA_RSQB_RSQB = 58;
183
184
  static const int32_t SCRIPT_DATA_LESS_THAN_SIGN = 59;
185
186
  static const int32_t SCRIPT_DATA_ESCAPE_START = 60;
187
188
  static const int32_t SCRIPT_DATA_ESCAPE_START_DASH = 61;
189
190
  static const int32_t SCRIPT_DATA_ESCAPED_DASH = 62;
191
192
  static const int32_t SCRIPT_DATA_ESCAPED_DASH_DASH = 63;
193
194
  static const int32_t BOGUS_COMMENT_HYPHEN = 64;
195
196
  static const int32_t RAWTEXT_RCDATA_LESS_THAN_SIGN = 65;
197
198
  static const int32_t SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66;
199
200
  static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_START = 67;
201
202
  static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED = 68;
203
204
  static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;
205
206
  static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;
207
208
  static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;
209
210
  static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
211
212
  static const int32_t PROCESSING_INSTRUCTION = 73;
213
214
  static const int32_t PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
215
216
private:
217
  static const int32_t LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
218
219
  static char16_t LT_GT[];
220
  static char16_t LT_SOLIDUS[];
221
  static char16_t RSQB_RSQB[];
222
  static char16_t REPLACEMENT_CHARACTER[];
223
  static char16_t LF[];
224
  static char16_t CDATA_LSQB[];
225
  static char16_t OCTYPE[];
226
  static char16_t UBLIC[];
227
  static char16_t YSTEM[];
228
  static staticJArray<char16_t, int32_t> TITLE_ARR;
229
  static staticJArray<char16_t, int32_t> SCRIPT_ARR;
230
  static staticJArray<char16_t, int32_t> STYLE_ARR;
231
  static staticJArray<char16_t, int32_t> PLAINTEXT_ARR;
232
  static staticJArray<char16_t, int32_t> XMP_ARR;
233
  static staticJArray<char16_t, int32_t> TEXTAREA_ARR;
234
  static staticJArray<char16_t, int32_t> IFRAME_ARR;
235
  static staticJArray<char16_t, int32_t> NOEMBED_ARR;
236
  static staticJArray<char16_t, int32_t> NOSCRIPT_ARR;
237
  static staticJArray<char16_t, int32_t> NOFRAMES_ARR;
238
239
protected:
240
  nsHtml5TreeBuilder* tokenHandler;
241
  nsHtml5StreamParser* encodingDeclarationHandler;
242
  bool lastCR;
243
  int32_t stateSave;
244
245
private:
246
  int32_t returnStateSave;
247
248
protected:
249
  int32_t index;
250
251
private:
252
  bool forceQuirks;
253
  char16_t additional;
254
  int32_t entCol;
255
  int32_t firstCharKey;
256
  int32_t lo;
257
  int32_t hi;
258
  int32_t candidate;
259
  int32_t charRefBufMark;
260
261
protected:
262
  int32_t value;
263
264
private:
265
  bool seenDigits;
266
267
protected:
268
  int32_t cstart;
269
270
private:
271
  nsHtml5String publicId;
272
  nsHtml5String systemId;
273
  autoJArray<char16_t, int32_t> strBuf;
274
  int32_t strBufLen;
275
  autoJArray<char16_t, int32_t> charRefBuf;
276
  int32_t charRefBufLen;
277
  autoJArray<char16_t, int32_t> bmpChar;
278
  autoJArray<char16_t, int32_t> astralChar;
279
280
protected:
281
  nsHtml5ElementName* endTagExpectation;
282
283
private:
284
  jArray<char16_t, int32_t> endTagExpectationAsArray;
285
286
protected:
287
  bool endTag;
288
289
private:
290
  bool containsHyphen;
291
  nsHtml5ElementName* tagName;
292
  nsHtml5ElementName* nonInternedTagName;
293
294
protected:
295
  nsHtml5AttributeName* attributeName;
296
297
private:
298
  nsHtml5AttributeName* nonInternedAttributeName;
299
  nsAtom* doctypeName;
300
  nsHtml5String publicIdentifier;
301
  nsHtml5String systemIdentifier;
302
  nsHtml5HtmlAttributes* attributes;
303
  bool newAttributesEachTime;
304
  bool shouldSuspend;
305
306
protected:
307
  bool confident;
308
309
private:
310
  int32_t line;
311
  int32_t attributeLine;
312
  nsHtml5AtomTable* interner;
313
  bool viewingXmlSource;
314
315
public:
316
  nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
317
  void setInterner(nsHtml5AtomTable* interner);
318
  void initLocation(nsHtml5String newPublicId, nsHtml5String newSystemId);
319
  bool isViewingXmlSource();
320
  void setStateAndEndTagExpectation(int32_t specialTokenizerState,
321
                                    nsAtom* endTagExpectation);
322
  void setStateAndEndTagExpectation(int32_t specialTokenizerState,
323
                                    nsHtml5ElementName* endTagExpectation);
324
325
private:
326
  void endTagExpectationToArray();
327
328
public:
329
  void setLineNumber(int32_t line);
330
0
  inline int32_t getLineNumber() { return line; }
331
332
  nsHtml5HtmlAttributes* emptyAttributes();
333
334
private:
335
  inline void appendCharRefBuf(char16_t c)
336
0
  {
337
0
    MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length,
338
0
                       "Attempted to overrun charRefBuf!");
339
0
    charRefBuf[charRefBufLen++] = c;
340
0
  }
341
342
  void emitOrAppendCharRefBuf(int32_t returnState);
343
0
  inline void clearStrBufAfterUse() { strBufLen = 0; }
344
345
  inline void clearStrBufBeforeUse()
346
0
  {
347
0
    MOZ_ASSERT(!strBufLen, "strBufLen not reset after previous use!");
348
0
    strBufLen = 0;
349
0
  }
350
351
  inline void clearStrBufAfterOneHyphen()
352
0
  {
353
0
    MOZ_ASSERT(strBufLen == 1, "strBufLen length not one!");
354
0
    MOZ_ASSERT(strBuf[0] == '-', "strBuf does not start with a hyphen!");
355
0
    strBufLen = 0;
356
0
  }
357
358
  inline void appendStrBuf(char16_t c)
359
0
  {
360
0
    MOZ_ASSERT(strBufLen < strBuf.length,
361
0
               "Previous buffer length insufficient.");
362
0
    if (MOZ_UNLIKELY(strBufLen == strBuf.length)) {
363
0
      if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) {
364
0
        MOZ_CRASH("Unable to recover from buffer reallocation failure");
365
0
      }
366
0
    }
367
0
    strBuf[strBufLen++] = c;
368
0
  }
369
370
protected:
371
  nsHtml5String strBufToString();
372
373
private:
374
  void strBufToDoctypeName();
375
  void emitStrBuf();
376
0
  inline void appendSecondHyphenToBogusComment() { appendStrBuf('-'); }
377
378
  inline void adjustDoubleHyphenAndAppendToStrBufAndErr(char16_t c)
379
0
  {
380
0
    errConsecutiveHyphens();
381
0
    appendStrBuf(c);
382
0
  }
383
384
  void appendStrBuf(char16_t* buffer, int32_t offset, int32_t length);
385
  inline void appendCharRefBufToStrBuf()
386
0
  {
387
0
    appendStrBuf(charRefBuf, 0, charRefBufLen);
388
0
    charRefBufLen = 0;
389
0
  }
390
391
  void emitComment(int32_t provisionalHyphens, int32_t pos);
392
393
protected:
394
  void flushChars(char16_t* buf, int32_t pos);
395
396
private:
397
  void strBufToElementNameString();
398
  int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
399
  void attributeNameComplete();
400
  void addAttributeWithoutValue();
401
  void addAttributeWithValue();
402
403
public:
404
  void start();
405
  bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
406
407
private:
408
  template<class P>
409
  int32_t stateLoop(int32_t state,
410
                    char16_t c,
411
                    int32_t pos,
412
                    char16_t* buf,
413
                    bool reconsume,
414
                    int32_t returnState,
415
                    int32_t endPos);
416
  void initDoctypeFields();
417
  inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
418
0
  {
419
0
    silentCarriageReturn();
420
0
    adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
421
0
  }
422
423
  inline void adjustDoubleHyphenAndAppendToStrBufLineFeed()
424
0
  {
425
0
    silentLineFeed();
426
0
    adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
427
0
  }
428
429
  inline void appendStrBufLineFeed()
430
0
  {
431
0
    silentLineFeed();
432
0
    appendStrBuf('\n');
433
0
  }
434
435
  inline void appendStrBufCarriageReturn()
436
0
  {
437
0
    silentCarriageReturn();
438
0
    appendStrBuf('\n');
439
0
  }
440
441
protected:
442
  inline void silentCarriageReturn()
443
0
  {
444
0
    ++line;
445
0
    lastCR = true;
446
0
  }
447
448
0
  inline void silentLineFeed() { ++line; }
449
450
private:
451
  void emitCarriageReturn(char16_t* buf, int32_t pos);
452
  void emitReplacementCharacter(char16_t* buf, int32_t pos);
453
  void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
454
  void setAdditionalAndRememberAmpersandLocation(char16_t add);
455
  void bogusDoctype();
456
  void bogusDoctypeWithoutQuirks();
457
  void handleNcrValue(int32_t returnState);
458
459
public:
460
  void eof();
461
462
private:
463
  void emitDoctypeToken(int32_t pos);
464
465
protected:
466
0
  inline char16_t checkChar(char16_t* buf, int32_t pos) { return buf[pos]; }
467
468
public:
469
  bool internalEncodingDeclaration(nsHtml5String internalCharset);
470
471
private:
472
  void emitOrAppendTwo(const char16_t* val, int32_t returnState);
473
  void emitOrAppendOne(const char16_t* val, int32_t returnState);
474
475
public:
476
  void end();
477
  void requestSuspension();
478
  bool isInDataState();
479
  void resetToDataState();
480
  void loadState(nsHtml5Tokenizer* other);
481
  void initializeWithoutStarting();
482
  void setEncodingDeclarationHandler(
483
    nsHtml5StreamParser* encodingDeclarationHandler);
484
  ~nsHtml5Tokenizer();
485
  static void initializeStatics();
486
  static void releaseStatics();
487
488
#include "nsHtml5TokenizerHSupplement.h"
489
};
490
491
#endif