/src/mozilla-central/parser/htmlparser/nsParser.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | /** |
7 | | * MODULE NOTES: |
8 | | * |
9 | | * This class does two primary jobs: |
10 | | * 1) It iterates the tokens provided during the |
11 | | * tokenization process, identifing where elements |
12 | | * begin and end (doing validation and normalization). |
13 | | * 2) It controls and coordinates with an instance of |
14 | | * the IContentSink interface, to coordinate the |
15 | | * the production of the content model. |
16 | | * |
17 | | * The basic operation of this class assumes that an HTML |
18 | | * document is non-normalized. Therefore, we don't process |
19 | | * the document in a normalized way. Don't bother to look |
20 | | * for methods like: doHead() or doBody(). |
21 | | * |
22 | | * Instead, in order to be backward compatible, we must |
23 | | * scan the set of tokens and perform this basic set of |
24 | | * operations: |
25 | | * 1) Determine the token type (easy, since the tokens know) |
26 | | * 2) Determine the appropriate section of the HTML document |
27 | | * each token belongs in (HTML,HEAD,BODY,FRAMESET). |
28 | | * 3) Insert content into our document (via the sink) into |
29 | | * the correct section. |
30 | | * 4) In the case of tags that belong in the BODY, we must |
31 | | * ensure that our underlying document state reflects |
32 | | * the appropriate context for our tag. |
33 | | * |
34 | | * For example,if we see a <TR>, we must ensure our |
35 | | * document contains a table into which the row can |
36 | | * be placed. This may result in "implicit containers" |
37 | | * created to ensure a well-formed document. |
38 | | * |
39 | | */ |
40 | | |
41 | | #ifndef NS_PARSER__ |
42 | | #define NS_PARSER__ |
43 | | |
44 | | #include "nsIParser.h" |
45 | | #include "nsDeque.h" |
46 | | #include "nsIURL.h" |
47 | | #include "CParserContext.h" |
48 | | #include "nsParserCIID.h" |
49 | | #include "nsITokenizer.h" |
50 | | #include "nsHTMLTags.h" |
51 | | #include "nsIContentSink.h" |
52 | | #include "nsCOMArray.h" |
53 | | #include "nsCycleCollectionParticipant.h" |
54 | | #include "nsWeakReference.h" |
55 | | |
56 | | class nsIDTD; |
57 | | class nsIRunnable; |
58 | | |
59 | | #ifdef _MSC_VER |
60 | | #pragma warning( disable : 4275 ) |
61 | | #endif |
62 | | |
63 | | |
64 | | class nsParser final : public nsIParser, |
65 | | public nsIStreamListener, |
66 | | public nsSupportsWeakReference |
67 | | { |
68 | | /** |
69 | | * Destructor |
70 | | * @update gess5/11/98 |
71 | | */ |
72 | | virtual ~nsParser(); |
73 | | |
74 | | public: |
75 | | /** |
76 | | * Called on module init |
77 | | */ |
78 | | static nsresult Init(); |
79 | | |
80 | | /** |
81 | | * Called on module shutdown |
82 | | */ |
83 | | static void Shutdown(); |
84 | | |
85 | | NS_DECL_CYCLE_COLLECTING_ISUPPORTS |
86 | | NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser) |
87 | | |
88 | | /** |
89 | | * default constructor |
90 | | * @update gess5/11/98 |
91 | | */ |
92 | | nsParser(); |
93 | | |
94 | | /** |
95 | | * Select given content sink into parser for parser output |
96 | | * @update gess5/11/98 |
97 | | * @param aSink is the new sink to be used by parser |
98 | | * @return old sink, or nullptr |
99 | | */ |
100 | | NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink) override; |
101 | | |
102 | | /** |
103 | | * retrive the sink set into the parser |
104 | | * @update gess5/11/98 |
105 | | * @param aSink is the new sink to be used by parser |
106 | | * @return old sink, or nullptr |
107 | | */ |
108 | | NS_IMETHOD_(nsIContentSink*) GetContentSink(void) override; |
109 | | |
110 | | /** |
111 | | * Call this method once you've created a parser, and want to instruct it |
112 | | * about the command which caused the parser to be constructed. For example, |
113 | | * this allows us to select a DTD which can do, say, view-source. |
114 | | * |
115 | | * @update gess 3/25/98 |
116 | | * @param aCommand -- ptrs to string that contains command |
117 | | * @return nada |
118 | | */ |
119 | | NS_IMETHOD_(void) GetCommand(nsCString& aCommand) override; |
120 | | NS_IMETHOD_(void) SetCommand(const char* aCommand) override; |
121 | | NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override; |
122 | | |
123 | | /** |
124 | | * Call this method once you've created a parser, and want to instruct it |
125 | | * about what charset to load |
126 | | * |
127 | | * @update ftang 4/23/99 |
128 | | * @param aCharset- the charset of a document |
129 | | * @param aCharsetSource- the source of the charset |
130 | | * @return nada |
131 | | */ |
132 | | virtual void SetDocumentCharset(NotNull<const Encoding*> aCharset, |
133 | | int32_t aSource) override; |
134 | | |
135 | | NotNull<const Encoding*> GetDocumentCharset(int32_t& aSource) |
136 | 0 | { |
137 | 0 | aSource = mCharsetSource; |
138 | 0 | return mCharset; |
139 | 0 | } |
140 | | |
141 | | /** |
142 | | * Cause parser to parse input from given URL |
143 | | * @update gess5/11/98 |
144 | | * @param aURL is a descriptor for source document |
145 | | * @param aListener is a listener to forward notifications to |
146 | | * @return TRUE if all went well -- FALSE otherwise |
147 | | */ |
148 | | NS_IMETHOD Parse(nsIURI* aURL, |
149 | | nsIRequestObserver* aListener = nullptr, |
150 | | void* aKey = 0, |
151 | | nsDTDMode aMode = eDTDMode_autodetect) override; |
152 | | |
153 | | /** |
154 | | * This method needs documentation |
155 | | */ |
156 | | NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer, |
157 | | nsTArray<nsString>& aTagStack) override; |
158 | | |
159 | | /** |
160 | | * This method gets called when the tokens have been consumed, and it's time |
161 | | * to build the model via the content sink. |
162 | | * @update gess5/11/98 |
163 | | * @return YES if model building went well -- NO otherwise. |
164 | | */ |
165 | | NS_IMETHOD BuildModel(void) override; |
166 | | |
167 | | NS_IMETHOD ContinueInterruptedParsing() override; |
168 | | NS_IMETHOD_(void) BlockParser() override; |
169 | | NS_IMETHOD_(void) UnblockParser() override; |
170 | | NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override; |
171 | | NS_IMETHOD Terminate(void) override; |
172 | | |
173 | | /** |
174 | | * Call this to query whether the parser is enabled or not. |
175 | | * |
176 | | * @update vidur 4/12/99 |
177 | | * @return current state |
178 | | */ |
179 | | NS_IMETHOD_(bool) IsParserEnabled() override; |
180 | | |
181 | | /** |
182 | | * Call this to query whether the parser thinks it's done with parsing. |
183 | | * |
184 | | * @update rickg 5/12/01 |
185 | | * @return complete state |
186 | | */ |
187 | | NS_IMETHOD_(bool) IsComplete() override; |
188 | | |
189 | | /** |
190 | | * This rather arcane method (hack) is used as a signal between the |
191 | | * DTD and the parser. It allows the DTD to tell the parser that content |
192 | | * that comes through (parser::parser(string)) but not consumed should |
193 | | * propagate into the next string based parse call. |
194 | | * |
195 | | * @update gess 9/1/98 |
196 | | * @param aState determines whether we propagate unused string content. |
197 | | * @return current state |
198 | | */ |
199 | | void SetUnusedInput(nsString& aBuffer); |
200 | | |
201 | | /** |
202 | | * This method gets called (automatically) during incremental parsing |
203 | | * @update gess5/11/98 |
204 | | * @return TRUE if all went well, otherwise FALSE |
205 | | */ |
206 | | virtual nsresult ResumeParse(bool allowIteration = true, |
207 | | bool aIsFinalChunk = false, |
208 | | bool aCanInterrupt = true); |
209 | | |
210 | | //********************************************* |
211 | | // These methods are callback methods used by |
212 | | // net lib to let us know about our inputstream. |
213 | | //********************************************* |
214 | | // nsIRequestObserver methods: |
215 | | NS_DECL_NSIREQUESTOBSERVER |
216 | | |
217 | | // nsIStreamListener methods: |
218 | | NS_DECL_NSISTREAMLISTENER |
219 | | |
220 | | void PushContext(CParserContext& aContext); |
221 | | CParserContext* PopContext(); |
222 | 0 | CParserContext* PeekContext() {return mParserContext;} |
223 | | |
224 | | /** |
225 | | * Get the channel associated with this parser |
226 | | * @update harishd,gagan 07/17/01 |
227 | | * @param aChannel out param that will contain the result |
228 | | * @return NS_OK if successful |
229 | | */ |
230 | | NS_IMETHOD GetChannel(nsIChannel** aChannel) override; |
231 | | |
232 | | /** |
233 | | * Get the DTD associated with this parser |
234 | | * @update vidur 9/29/99 |
235 | | * @param aDTD out param that will contain the result |
236 | | * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error |
237 | | */ |
238 | | NS_IMETHOD GetDTD(nsIDTD** aDTD) override; |
239 | | |
240 | | /** |
241 | | * Get the nsIStreamListener for this parser |
242 | | */ |
243 | | virtual nsIStreamListener* GetStreamListener() override; |
244 | | |
245 | | void SetSinkCharset(NotNull<const Encoding*> aCharset); |
246 | | |
247 | | /** |
248 | | * Removes continue parsing events |
249 | | * @update kmcclusk 5/18/98 |
250 | | */ |
251 | | |
252 | | NS_IMETHOD CancelParsingEvents() override; |
253 | | |
254 | | /** |
255 | | * Return true. |
256 | | */ |
257 | | virtual bool IsInsertionPointDefined() override; |
258 | | |
259 | | /** |
260 | | * No-op. |
261 | | */ |
262 | | virtual void PushDefinedInsertionPoint() override; |
263 | | |
264 | | /** |
265 | | * No-op. |
266 | | */ |
267 | | virtual void PopDefinedInsertionPoint() override; |
268 | | |
269 | | /** |
270 | | * No-op. |
271 | | */ |
272 | | virtual void MarkAsNotScriptCreated(const char* aCommand) override; |
273 | | |
274 | | /** |
275 | | * Always false. |
276 | | */ |
277 | | virtual bool IsScriptCreated() override; |
278 | | |
279 | | /** |
280 | | * Set to parser state to indicate whether parsing tokens can be interrupted |
281 | | * @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted. |
282 | | * @update kmcclusk 5/18/98 |
283 | | */ |
284 | | void SetCanInterrupt(bool aCanInterrupt); |
285 | | |
286 | | /** |
287 | | * This is called when the final chunk has been |
288 | | * passed to the parser and the content sink has |
289 | | * interrupted token processing. It schedules |
290 | | * a ParserContinue PL_Event which will ask the parser |
291 | | * to HandleParserContinueEvent when it is handled. |
292 | | * @update kmcclusk6/1/2001 |
293 | | */ |
294 | | nsresult PostContinueEvent(); |
295 | | |
296 | | /** |
297 | | * Fired when the continue parse event is triggered. |
298 | | * @update kmcclusk 5/18/98 |
299 | | */ |
300 | | void HandleParserContinueEvent(class nsParserContinueEvent *); |
301 | | |
302 | 0 | virtual void Reset() override { |
303 | 0 | Cleanup(); |
304 | 0 | Initialize(); |
305 | 0 | } |
306 | | |
307 | 0 | bool IsScriptExecuting() { |
308 | 0 | return mSink && mSink->IsScriptExecuting(); |
309 | 0 | } |
310 | | |
311 | 0 | bool IsOkToProcessNetworkData() { |
312 | 0 | return !IsScriptExecuting() && !mProcessingNetworkData; |
313 | 0 | } |
314 | | |
315 | | protected: |
316 | | |
317 | | void Initialize(bool aConstructor = false); |
318 | | void Cleanup(); |
319 | | |
320 | | /** |
321 | | * |
322 | | * @update gess5/18/98 |
323 | | * @param |
324 | | * @return |
325 | | */ |
326 | | nsresult WillBuildModel(nsString& aFilename); |
327 | | |
328 | | /** |
329 | | * |
330 | | * @update gess5/18/98 |
331 | | * @param |
332 | | * @return |
333 | | */ |
334 | | nsresult DidBuildModel(nsresult anErrorCode); |
335 | | |
336 | | private: |
337 | | |
338 | | /******************************************* |
339 | | These are the tokenization methods... |
340 | | *******************************************/ |
341 | | |
342 | | /** |
343 | | * Part of the code sandwich, this gets called right before |
344 | | * the tokenization process begins. The main reason for |
345 | | * this call is to allow the delegate to do initialization. |
346 | | * |
347 | | * @update gess 3/25/98 |
348 | | * @param |
349 | | * @return TRUE if it's ok to proceed |
350 | | */ |
351 | | bool WillTokenize(bool aIsFinalChunk = false); |
352 | | |
353 | | |
354 | | /** |
355 | | * This is the primary control routine. It iteratively |
356 | | * consumes tokens until an error occurs or you run out |
357 | | * of data. |
358 | | * |
359 | | * @update gess 3/25/98 |
360 | | * @return error code |
361 | | */ |
362 | | nsresult Tokenize(bool aIsFinalChunk = false); |
363 | | |
364 | | /** |
365 | | * Pushes XML fragment parsing data to expat without an input stream. |
366 | | */ |
367 | | nsresult Parse(const nsAString& aSourceBuffer, |
368 | | void* aKey, |
369 | | bool aLastCall); |
370 | | |
371 | | protected: |
372 | | //********************************************* |
373 | | // And now, some data members... |
374 | | //********************************************* |
375 | | |
376 | | |
377 | | CParserContext* mParserContext; |
378 | | nsCOMPtr<nsIDTD> mDTD; |
379 | | nsCOMPtr<nsIRequestObserver> mObserver; |
380 | | nsCOMPtr<nsIContentSink> mSink; |
381 | | nsIRunnable* mContinueEvent; // weak ref |
382 | | |
383 | | eParserCommands mCommand; |
384 | | nsresult mInternalState; |
385 | | nsresult mStreamStatus; |
386 | | int32_t mCharsetSource; |
387 | | |
388 | | uint16_t mFlags; |
389 | | uint32_t mBlocked; |
390 | | |
391 | | nsString mUnusedInput; |
392 | | NotNull<const Encoding*> mCharset; |
393 | | nsCString mCommandStr; |
394 | | |
395 | | bool mProcessingNetworkData; |
396 | | bool mIsAboutBlank; |
397 | | }; |
398 | | |
399 | | #endif |
400 | | |