/src/mozilla-central/parser/html/nsHtml5Parser.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | #ifndef NS_HTML5_PARSER |
7 | | #define NS_HTML5_PARSER |
8 | | |
9 | | #include "nsAutoPtr.h" |
10 | | #include "nsIParser.h" |
11 | | #include "nsDeque.h" |
12 | | #include "nsIURL.h" |
13 | | #include "nsParserCIID.h" |
14 | | #include "nsITokenizer.h" |
15 | | #include "nsIContentSink.h" |
16 | | #include "nsIRequest.h" |
17 | | #include "nsIChannel.h" |
18 | | #include "nsCOMArray.h" |
19 | | #include "nsContentSink.h" |
20 | | #include "nsCycleCollectionParticipant.h" |
21 | | #include "nsIInputStream.h" |
22 | | #include "nsDetectionConfident.h" |
23 | | #include "nsHtml5OwningUTF16Buffer.h" |
24 | | #include "nsHtml5TreeOpExecutor.h" |
25 | | #include "nsHtml5StreamParser.h" |
26 | | #include "nsHtml5AtomTable.h" |
27 | | #include "nsWeakReference.h" |
28 | | #include "nsHtml5StreamListener.h" |
29 | | |
30 | | class nsHtml5Parser final |
31 | | : public nsIParser |
32 | | , public nsSupportsWeakReference |
33 | | { |
34 | | public: |
35 | | NS_DECL_CYCLE_COLLECTING_ISUPPORTS |
36 | | |
37 | | NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsHtml5Parser, nsIParser) |
38 | | |
39 | | nsHtml5Parser(); |
40 | | |
41 | | /* Start nsIParser */ |
42 | | /** |
43 | | * No-op for backwards compat. |
44 | | */ |
45 | | NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink) override; |
46 | | |
47 | | /** |
48 | | * Returns the tree op executor for backwards compat. |
49 | | */ |
50 | | NS_IMETHOD_(nsIContentSink*) GetContentSink() override; |
51 | | |
52 | | /** |
53 | | * Always returns "view" for backwards compat. |
54 | | */ |
55 | | NS_IMETHOD_(void) GetCommand(nsCString& aCommand) override; |
56 | | |
57 | | /** |
58 | | * No-op for backwards compat. |
59 | | */ |
60 | | NS_IMETHOD_(void) SetCommand(const char* aCommand) override; |
61 | | |
62 | | /** |
63 | | * No-op for backwards compat. |
64 | | */ |
65 | | NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override; |
66 | | |
67 | | /** |
68 | | * Call this method once you've created a parser, and want to instruct it |
69 | | * about what charset to load |
70 | | * |
71 | | * @param aEncoding the charset of a document |
72 | | * @param aCharsetSource the source of the charset |
73 | | */ |
74 | | virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding, |
75 | | int32_t aSource) override; |
76 | | |
77 | | /** |
78 | | * Get the channel associated with this parser |
79 | | * @param aChannel out param that will contain the result |
80 | | * @return NS_OK if successful or NS_NOT_AVAILABLE if not |
81 | | */ |
82 | | NS_IMETHOD GetChannel(nsIChannel** aChannel) override; |
83 | | |
84 | | /** |
85 | | * Return |this| for backwards compat. |
86 | | */ |
87 | | NS_IMETHOD GetDTD(nsIDTD** aDTD) override; |
88 | | |
89 | | /** |
90 | | * Get the stream parser for this parser |
91 | | */ |
92 | | virtual nsIStreamListener* GetStreamListener() override; |
93 | | |
94 | | /** |
95 | | * Don't call. For interface compat only. |
96 | | */ |
97 | | NS_IMETHOD ContinueInterruptedParsing() override; |
98 | | |
99 | | /** |
100 | | * Blocks the parser. |
101 | | */ |
102 | | NS_IMETHOD_(void) BlockParser() override; |
103 | | |
104 | | /** |
105 | | * Unblocks the parser. |
106 | | */ |
107 | | NS_IMETHOD_(void) UnblockParser() override; |
108 | | |
109 | | /** |
110 | | * Asynchronously continues parsing. |
111 | | */ |
112 | | NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override; |
113 | | |
114 | | /** |
115 | | * Query whether the parser is enabled (i.e. not blocked) or not. |
116 | | */ |
117 | | NS_IMETHOD_(bool) IsParserEnabled() override; |
118 | | |
119 | | /** |
120 | | * Query whether the parser thinks it's done with parsing. |
121 | | */ |
122 | | NS_IMETHOD_(bool) IsComplete() override; |
123 | | |
124 | | /** |
125 | | * Set up request observer. |
126 | | * |
127 | | * @param aURL used for View Source title |
128 | | * @param aListener a listener to forward notifications to |
129 | | * @param aKey the root context key (used for document.write) |
130 | | * @param aMode ignored (for interface compat only) |
131 | | */ |
132 | | NS_IMETHOD Parse(nsIURI* aURL, |
133 | | nsIRequestObserver* aListener = nullptr, |
134 | | void* aKey = 0, |
135 | | nsDTDMode aMode = eDTDMode_autodetect) override; |
136 | | |
137 | | /** |
138 | | * document.write and document.close |
139 | | * |
140 | | * @param aSourceBuffer the argument of document.write (empty for .close()) |
141 | | * @param aKey a key unique to the script element that caused this call |
142 | | * @param aContentType "text/html" for HTML mode, else text/plain mode |
143 | | * @param aLastCall true if .close() false if .write() |
144 | | * @param aMode ignored (for interface compat only) |
145 | | */ |
146 | | nsresult Parse(const nsAString& aSourceBuffer, |
147 | | void* aKey, |
148 | | const nsACString& aContentType, |
149 | | bool aLastCall, |
150 | | nsDTDMode aMode = eDTDMode_autodetect); |
151 | | |
152 | | /** |
153 | | * Stops the parser prematurely |
154 | | */ |
155 | | NS_IMETHOD Terminate() override; |
156 | | |
157 | | /** |
158 | | * Don't call. For interface backwards compat only. |
159 | | */ |
160 | | NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer, |
161 | | nsTArray<nsString>& aTagStack) override; |
162 | | |
163 | | /** |
164 | | * Don't call. For interface compat only. |
165 | | */ |
166 | | NS_IMETHOD BuildModel() override; |
167 | | |
168 | | /** |
169 | | * Don't call. For interface compat only. |
170 | | */ |
171 | | NS_IMETHOD CancelParsingEvents() override; |
172 | | |
173 | | /** |
174 | | * Don't call. For interface compat only. |
175 | | */ |
176 | | virtual void Reset() override; |
177 | | |
178 | | /** |
179 | | * True if the insertion point (per HTML5) is defined. |
180 | | */ |
181 | | virtual bool IsInsertionPointDefined() override; |
182 | | |
183 | | /** |
184 | | * Call immediately before starting to evaluate a parser-inserted script or |
185 | | * in general when the spec says to define an insertion point. |
186 | | */ |
187 | | virtual void PushDefinedInsertionPoint() override; |
188 | | |
189 | | /** |
190 | | * Call immediately after having evaluated a parser-inserted script or |
191 | | * generally want to restore to the state before the last |
192 | | * PushDefinedInsertionPoint call. |
193 | | */ |
194 | | virtual void PopDefinedInsertionPoint() override; |
195 | | |
196 | | /** |
197 | | * Marks the HTML5 parser as not a script-created parser: Prepares the |
198 | | * parser to be able to read a stream. |
199 | | * |
200 | | * @param aCommand the parser command (Yeah, this is bad API design. Let's |
201 | | * make this better when retiring nsIParser) |
202 | | */ |
203 | | virtual void MarkAsNotScriptCreated(const char* aCommand) override; |
204 | | |
205 | | /** |
206 | | * True if this is a script-created HTML5 parser. |
207 | | */ |
208 | | virtual bool IsScriptCreated() override; |
209 | | |
210 | | /* End nsIParser */ |
211 | | |
212 | | // Not from an external interface |
213 | | // Non-inherited methods |
214 | | |
215 | | public: |
216 | | /** |
217 | | * Initializes the parser to load from a channel. |
218 | | */ |
219 | | virtual nsresult Initialize(nsIDocument* aDoc, |
220 | | nsIURI* aURI, |
221 | | nsISupports* aContainer, |
222 | | nsIChannel* aChannel); |
223 | | |
224 | 0 | inline nsHtml5Tokenizer* GetTokenizer() { return mTokenizer; } |
225 | | |
226 | | void InitializeDocWriteParserState(nsAHtml5TreeBuilderState* aState, |
227 | | int32_t aLine); |
228 | | |
229 | | void DropStreamParser() |
230 | 0 | { |
231 | 0 | if (GetStreamParser()) { |
232 | 0 | GetStreamParser()->DropTimer(); |
233 | 0 | mStreamListener->DropDelegate(); |
234 | 0 | mStreamListener = nullptr; |
235 | 0 | } |
236 | 0 | } |
237 | | |
238 | | void StartTokenizer(bool aScriptingEnabled); |
239 | | |
240 | | void ContinueAfterFailedCharsetSwitch(); |
241 | | |
242 | | nsHtml5StreamParser* GetStreamParser() |
243 | 0 | { |
244 | 0 | if (!mStreamListener) { |
245 | 0 | return nullptr; |
246 | 0 | } |
247 | 0 | return mStreamListener->GetDelegate(); |
248 | 0 | } |
249 | | |
250 | | void PermanentlyUndefineInsertionPoint() |
251 | 0 | { |
252 | 0 | mInsertionPointPermanentlyUndefined = true; |
253 | 0 | } |
254 | | |
255 | | /** |
256 | | * Parse until pending data is exhausted or a script blocks the parser |
257 | | */ |
258 | | nsresult ParseUntilBlocked(); |
259 | | |
260 | | private: |
261 | | virtual ~nsHtml5Parser(); |
262 | | |
263 | | // State variables |
264 | | |
265 | | /** |
266 | | * Whether the last character tokenized was a carriage return (for CRLF) |
267 | | */ |
268 | | bool mLastWasCR; |
269 | | |
270 | | /** |
271 | | * Whether the last character tokenized was a carriage return (for CRLF) |
272 | | * when preparsing document.write. |
273 | | */ |
274 | | bool mDocWriteSpeculativeLastWasCR; |
275 | | |
276 | | /** |
277 | | * The parser is blocking on the load of an external script from a web |
278 | | * page, or any number of extension content scripts. |
279 | | */ |
280 | | uint32_t mBlocked; |
281 | | |
282 | | /** |
283 | | * Whether the document.write() speculator is already active. |
284 | | */ |
285 | | bool mDocWriteSpeculatorActive; |
286 | | |
287 | | /** |
288 | | * The number of PushDefinedInsertionPoint calls we've seen without a |
289 | | * matching PopDefinedInsertionPoint. |
290 | | */ |
291 | | int32_t mInsertionPointPushLevel; |
292 | | |
293 | | /** |
294 | | * True if document.close() has been called. |
295 | | */ |
296 | | bool mDocumentClosed; |
297 | | |
298 | | bool mInDocumentWrite; |
299 | | |
300 | | /** |
301 | | * This is set when the tokenizer has seen EOF. The purpose is to |
302 | | * keep the insertion point undefined between the time the |
303 | | * parser has reached the point where it can't accept more input |
304 | | * and the time the document's mParser is set to nullptr. |
305 | | * Scripts can run during this time period due to an update |
306 | | * batch ending and due to various end-of-parse events firing. |
307 | | * (Setting mParser on the document to nullptr at the point |
308 | | * where this flag gets set to true would break things that for |
309 | | * legacy reasons assume that mParser on the document stays |
310 | | * non-null though the end-of-parse events.) |
311 | | */ |
312 | | bool mInsertionPointPermanentlyUndefined; |
313 | | |
314 | | // Portable parser objects |
315 | | /** |
316 | | * The first buffer in the pending UTF-16 buffer queue |
317 | | */ |
318 | | RefPtr<nsHtml5OwningUTF16Buffer> mFirstBuffer; |
319 | | |
320 | | /** |
321 | | * The last buffer in the pending UTF-16 buffer queue. Always points |
322 | | * to a sentinel object with nullptr as its parser key. |
323 | | */ |
324 | | nsHtml5OwningUTF16Buffer* mLastBuffer; // weak ref; |
325 | | |
326 | | /** |
327 | | * The tree operation executor |
328 | | */ |
329 | | RefPtr<nsHtml5TreeOpExecutor> mExecutor; |
330 | | |
331 | | /** |
332 | | * The HTML5 tree builder |
333 | | */ |
334 | | const nsAutoPtr<nsHtml5TreeBuilder> mTreeBuilder; |
335 | | |
336 | | /** |
337 | | * The HTML5 tokenizer |
338 | | */ |
339 | | const nsAutoPtr<nsHtml5Tokenizer> mTokenizer; |
340 | | |
341 | | /** |
342 | | * Another HTML5 tree builder for preloading document.written content. |
343 | | */ |
344 | | nsAutoPtr<nsHtml5TreeBuilder> mDocWriteSpeculativeTreeBuilder; |
345 | | |
346 | | /** |
347 | | * Another HTML5 tokenizer for preloading document.written content. |
348 | | */ |
349 | | nsAutoPtr<nsHtml5Tokenizer> mDocWriteSpeculativeTokenizer; |
350 | | |
351 | | /** |
352 | | * The stream listener holding the stream parser. |
353 | | */ |
354 | | RefPtr<nsHtml5StreamListener> mStreamListener; |
355 | | |
356 | | /** |
357 | | * |
358 | | */ |
359 | | int32_t mRootContextLineNumber; |
360 | | |
361 | | /** |
362 | | * Whether it's OK to transfer parsing back to the stream parser |
363 | | */ |
364 | | bool mReturnToStreamParserPermitted; |
365 | | |
366 | | /** |
367 | | * The scoped atom table |
368 | | */ |
369 | | nsHtml5AtomTable mAtomTable; |
370 | | }; |
371 | | #endif |