/src/mozilla-central/parser/htmlparser/nsParser.h

Source (jump to first uncovered line)
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
/**
 * MODULE NOTES:
 * 
 *  This class does two primary jobs:
 *    1) It iterates the tokens provided during the 
 *       tokenization process, identifing where elements
 *       begin and end (doing validation and normalization).
 *    2) It controls and coordinates with an instance of
 *       the IContentSink interface, to coordinate the
 *       the production of the content model.
 *
 *  The basic operation of this class assumes that an HTML
 *  document is non-normalized. Therefore, we don't process
 *  the document in a normalized way. Don't bother to look
 *  for methods like: doHead() or doBody().
 *
 *  Instead, in order to be backward compatible, we must
 *  scan the set of tokens and perform this basic set of
 *  operations:
 *    1)  Determine the token type (easy, since the tokens know)
 *    2)  Determine the appropriate section of the HTML document
 *        each token belongs in (HTML,HEAD,BODY,FRAMESET).
 *    3)  Insert content into our document (via the sink) into
 *        the correct section.
 *    4)  In the case of tags that belong in the BODY, we must
 *        ensure that our underlying document state reflects
 *        the appropriate context for our tag. 
 *
 *        For example,if we see a <TR>, we must ensure our 
 *        document contains a table into which the row can
 *        be placed. This may result in "implicit containers" 
 *        created to ensure a well-formed document.
 *         
 */

#ifndef NS_PARSER__
#define NS_PARSER__

#include "nsIParser.h"
#include "nsDeque.h"
#include "nsIURL.h"
#include "CParserContext.h"
#include "nsParserCIID.h"
#include "nsITokenizer.h"
#include "nsHTMLTags.h"
#include "nsIContentSink.h"
#include "nsCOMArray.h"
#include "nsCycleCollectionParticipant.h"
#include "nsWeakReference.h"

class nsIDTD;
class nsIRunnable;

#ifdef _MSC_VER
#pragma warning( disable : 4275 )
#endif


class nsParser final : public nsIParser,
                       public nsIStreamListener,
                       public nsSupportsWeakReference
{
    /**
     * Destructor
     * @update  gess5/11/98
     */
    virtual ~nsParser();

  public:
    /**
     * Called on module init
     */
    static nsresult Init();

    /**
     * Called on module shutdown
     */
    static void Shutdown();

    NS_DECL_CYCLE_COLLECTING_ISUPPORTS
    NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)

    /**
     * default constructor
     * @update  gess5/11/98
     */
    nsParser();

    /**
     * Select given content sink into parser for parser output
     * @update  gess5/11/98
     * @param   aSink is the new sink to be used by parser
     * @return  old sink, or nullptr
     */
    NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink) override;

    /**
     * retrive the sink set into the parser 
     * @update  gess5/11/98
     * @param   aSink is the new sink to be used by parser
     * @return  old sink, or nullptr
     */
    NS_IMETHOD_(nsIContentSink*) GetContentSink(void) override;
    
    /**
     *  Call this method once you've created a parser, and want to instruct it
     *  about the command which caused the parser to be constructed. For example,
     *  this allows us to select a DTD which can do, say, view-source.
     *  
     *  @update  gess 3/25/98
     *  @param   aCommand -- ptrs to string that contains command
     *  @return  nada
     */
    NS_IMETHOD_(void) GetCommand(nsCString& aCommand) override;
    NS_IMETHOD_(void) SetCommand(const char* aCommand) override;
    NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override;

    /**
     *  Call this method once you've created a parser, and want to instruct it
     *  about what charset to load
     *  
     *  @update  ftang 4/23/99
     *  @param   aCharset- the charset of a document
     *  @param   aCharsetSource- the source of the charset
     *  @return  nada
     */
    virtual void SetDocumentCharset(NotNull<const Encoding*> aCharset,
                                    int32_t aSource) override;

    NotNull<const Encoding*> GetDocumentCharset(int32_t& aSource)
    {
         aSource = mCharsetSource;
         return mCharset;
    }

    /**
     * Cause parser to parse input from given URL 
     * @update  gess5/11/98
     * @param   aURL is a descriptor for source document
     * @param   aListener is a listener to forward notifications to
     * @return  TRUE if all went well -- FALSE otherwise
     */
    NS_IMETHOD Parse(nsIURI* aURL,
                     nsIRequestObserver* aListener = nullptr,
                     void* aKey = 0,
                     nsDTDMode aMode = eDTDMode_autodetect) override;

    /**
     * This method needs documentation
     */
    NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
                             nsTArray<nsString>& aTagStack) override;
                             
    /**
     * This method gets called when the tokens have been consumed, and it's time
     * to build the model via the content sink.
     * @update  gess5/11/98
     * @return  YES if model building went well -- NO otherwise.
     */
    NS_IMETHOD BuildModel(void) override;

    NS_IMETHOD        ContinueInterruptedParsing() override;
    NS_IMETHOD_(void) BlockParser() override;
    NS_IMETHOD_(void) UnblockParser() override;
    NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override;
    NS_IMETHOD        Terminate(void) override;

    /**
     * Call this to query whether the parser is enabled or not.
     *
     *  @update  vidur 4/12/99
     *  @return  current state
     */
    NS_IMETHOD_(bool) IsParserEnabled() override;

    /**
     * Call this to query whether the parser thinks it's done with parsing.
     *
     *  @update  rickg 5/12/01
     *  @return  complete state
     */
    NS_IMETHOD_(bool) IsComplete() override;

    /**
     *  This rather arcane method (hack) is used as a signal between the
     *  DTD and the parser. It allows the DTD to tell the parser that content
     *  that comes through (parser::parser(string)) but not consumed should
     *  propagate into the next string based parse call.
     *  
     *  @update  gess 9/1/98
     *  @param   aState determines whether we propagate unused string content.
     *  @return  current state
     */
    void SetUnusedInput(nsString& aBuffer);

    /**
     * This method gets called (automatically) during incremental parsing
     * @update  gess5/11/98
     * @return  TRUE if all went well, otherwise FALSE
     */
    virtual nsresult ResumeParse(bool allowIteration = true, 
                                 bool aIsFinalChunk = false,
                                 bool aCanInterrupt = true);

     //*********************************************
      // These methods are callback methods used by
      // net lib to let us know about our inputstream.
      //*********************************************
    // nsIRequestObserver methods:
    NS_DECL_NSIREQUESTOBSERVER

    // nsIStreamListener methods:
    NS_DECL_NSISTREAMLISTENER

    void              PushContext(CParserContext& aContext);
    CParserContext*   PopContext();
    CParserContext*   PeekContext() {return mParserContext;}

    /** 
     * Get the channel associated with this parser
     * @update harishd,gagan 07/17/01
     * @param aChannel out param that will contain the result
     * @return NS_OK if successful
     */
    NS_IMETHOD GetChannel(nsIChannel** aChannel) override;

    /** 
     * Get the DTD associated with this parser
     * @update vidur 9/29/99
     * @param aDTD out param that will contain the result
     * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
     */
    NS_IMETHOD GetDTD(nsIDTD** aDTD) override;
  
    /**
     * Get the nsIStreamListener for this parser
     */
    virtual nsIStreamListener* GetStreamListener() override;

    void SetSinkCharset(NotNull<const Encoding*> aCharset);

    /**
     *  Removes continue parsing events
     *  @update  kmcclusk 5/18/98
     */

    NS_IMETHOD CancelParsingEvents() override;

    /**
     * Return true.
     */
    virtual bool IsInsertionPointDefined() override;

    /**
     * No-op.
     */
    virtual void PushDefinedInsertionPoint() override;

    /**
     * No-op.
     */
    virtual void PopDefinedInsertionPoint() override;

    /**
     * No-op.
     */
    virtual void MarkAsNotScriptCreated(const char* aCommand) override;

    /**
     * Always false.
     */
    virtual bool IsScriptCreated() override;

    /**  
     *  Set to parser state to indicate whether parsing tokens can be interrupted
     *  @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted.
     *  @update  kmcclusk 5/18/98
     */
    void SetCanInterrupt(bool aCanInterrupt);

    /**
     * This is called when the final chunk has been
     * passed to the parser and the content sink has
     * interrupted token processing. It schedules
     * a ParserContinue PL_Event which will ask the parser
     * to HandleParserContinueEvent when it is handled.
     * @update  kmcclusk6/1/2001
     */
    nsresult PostContinueEvent();

    /**
     *  Fired when the continue parse event is triggered.
     *  @update  kmcclusk 5/18/98
     */
    void HandleParserContinueEvent(class nsParserContinueEvent *);

    virtual void Reset() override {
      Cleanup();
      Initialize();
    }

    bool IsScriptExecuting() {
      return mSink && mSink->IsScriptExecuting();
    }

    bool IsOkToProcessNetworkData() {
      return !IsScriptExecuting() && !mProcessingNetworkData;
    }

 protected:

    void Initialize(bool aConstructor = false);
    void Cleanup();

    /**
     * 
     * @update  gess5/18/98
     * @param 
     * @return
     */
    nsresult WillBuildModel(nsString& aFilename);

    /**
     * 
     * @update  gess5/18/98
     * @param 
     * @return
     */
    nsresult DidBuildModel(nsresult anErrorCode);

private:

    /*******************************************
      These are the tokenization methods...
     *******************************************/

    /**
     *  Part of the code sandwich, this gets called right before
     *  the tokenization process begins. The main reason for
     *  this call is to allow the delegate to do initialization.
     *  
     *  @update  gess 3/25/98
     *  @param   
     *  @return  TRUE if it's ok to proceed
     */
    bool WillTokenize(bool aIsFinalChunk = false);

   
    /**
     *  This is the primary control routine. It iteratively
     *  consumes tokens until an error occurs or you run out
     *  of data.
     *  
     *  @update  gess 3/25/98
     *  @return  error code 
     */
    nsresult Tokenize(bool aIsFinalChunk = false);

    /**
     * Pushes XML fragment parsing data to expat without an input stream.
     */
    nsresult Parse(const nsAString& aSourceBuffer,
                   void* aKey,
                   bool aLastCall);

protected:
    //*********************************************
    // And now, some data members...
    //*********************************************
    
      
    CParserContext*              mParserContext;
    nsCOMPtr<nsIDTD>             mDTD;
    nsCOMPtr<nsIRequestObserver> mObserver;
    nsCOMPtr<nsIContentSink>     mSink;
    nsIRunnable*                 mContinueEvent;  // weak ref

    eParserCommands     mCommand;
    nsresult            mInternalState;
    nsresult            mStreamStatus;
    int32_t             mCharsetSource;
    
    uint16_t            mFlags;
    uint32_t            mBlocked;

    nsString            mUnusedInput;
    NotNull<const Encoding*> mCharset;
    nsCString           mCommandStr;

    bool                mProcessingNetworkData;
    bool                mIsAboutBlank;
};

#endif 


Coverage Report

Created: 2018-09-25 14:53

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -- */
2		/* This Source Code Form is subject to the terms of the Mozilla Public
3		* License, v. 2.0. If a copy of the MPL was not distributed with this
4		* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6		/**
7		* MODULE NOTES:
8		*
9		* This class does two primary jobs:
10		* 1) It iterates the tokens provided during the
11		* tokenization process, identifing where elements
12		* begin and end (doing validation and normalization).
13		* 2) It controls and coordinates with an instance of
14		* the IContentSink interface, to coordinate the
15		* the production of the content model.
16		*
17		* The basic operation of this class assumes that an HTML
18		* document is non-normalized. Therefore, we don't process
19		* the document in a normalized way. Don't bother to look
20		* for methods like: doHead() or doBody().
21		*
22		* Instead, in order to be backward compatible, we must
23		* scan the set of tokens and perform this basic set of
24		* operations:
25		* 1) Determine the token type (easy, since the tokens know)
26		* 2) Determine the appropriate section of the HTML document
27		* each token belongs in (HTML,HEAD,BODY,FRAMESET).
28		* 3) Insert content into our document (via the sink) into
29		* the correct section.
30		* 4) In the case of tags that belong in the BODY, we must
31		* ensure that our underlying document state reflects
32		* the appropriate context for our tag.
33		*
34		* For example,if we see a <TR>, we must ensure our
35		* document contains a table into which the row can
36		* be placed. This may result in "implicit containers"
37		* created to ensure a well-formed document.
38		*
39		*/
40
41		#ifndef NS_PARSER__
42		#define NS_PARSER__
43
44		#include "nsIParser.h"
45		#include "nsDeque.h"
46		#include "nsIURL.h"
47		#include "CParserContext.h"
48		#include "nsParserCIID.h"
49		#include "nsITokenizer.h"
50		#include "nsHTMLTags.h"
51		#include "nsIContentSink.h"
52		#include "nsCOMArray.h"
53		#include "nsCycleCollectionParticipant.h"
54		#include "nsWeakReference.h"
55
56		class nsIDTD;
57		class nsIRunnable;
58
59		#ifdef _MSC_VER
60		#pragma warning( disable : 4275 )
61		#endif
62
63
64		class nsParser final : public nsIParser,
65		public nsIStreamListener,
66		public nsSupportsWeakReference
67		{
68		/**
69		* Destructor
70		* @update gess5/11/98
71		*/
72		virtual ~nsParser();
73
74		public:
75		/**
76		* Called on module init
77		*/
78		static nsresult Init();
79
80		/**
81		* Called on module shutdown
82		*/
83		static void Shutdown();
84
85		NS_DECL_CYCLE_COLLECTING_ISUPPORTS
86		NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)
87
88		/**
89		* default constructor
90		* @update gess5/11/98
91		*/
92		nsParser();
93
94		/**
95		* Select given content sink into parser for parser output
96		* @update gess5/11/98
97		* @param aSink is the new sink to be used by parser
98		* @return old sink, or nullptr
99		*/
100		NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink) override;
101
102		/**
103		* retrive the sink set into the parser
104		* @update gess5/11/98
105		* @param aSink is the new sink to be used by parser
106		* @return old sink, or nullptr
107		*/
108		NS_IMETHOD_(nsIContentSink*) GetContentSink(void) override;
109
110		/**
111		* Call this method once you've created a parser, and want to instruct it
112		* about the command which caused the parser to be constructed. For example,
113		* this allows us to select a DTD which can do, say, view-source.
114		*
115		* @update gess 3/25/98
116		* @param aCommand -- ptrs to string that contains command
117		* @return nada
118		*/
119		NS_IMETHOD_(void) GetCommand(nsCString& aCommand) override;
120		NS_IMETHOD_(void) SetCommand(const char* aCommand) override;
121		NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override;
122
123		/**
124		* Call this method once you've created a parser, and want to instruct it
125		* about what charset to load
126		*
127		* @update ftang 4/23/99
128		* @param aCharset- the charset of a document
129		* @param aCharsetSource- the source of the charset
130		* @return nada
131		*/
132		virtual void SetDocumentCharset(NotNull<const Encoding*> aCharset,
133		int32_t aSource) override;
134
135		NotNull<const Encoding*> GetDocumentCharset(int32_t& aSource)
136	0	{
137	0	aSource = mCharsetSource;
138	0	return mCharset;
139	0	}
140
141		/**
142		* Cause parser to parse input from given URL
143		* @update gess5/11/98
144		* @param aURL is a descriptor for source document
145		* @param aListener is a listener to forward notifications to
146		* @return TRUE if all went well -- FALSE otherwise
147		*/
148		NS_IMETHOD Parse(nsIURI* aURL,
149		nsIRequestObserver* aListener = nullptr,
150		void* aKey = 0,
151		nsDTDMode aMode = eDTDMode_autodetect) override;
152
153		/**
154		* This method needs documentation
155		*/
156		NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
157		nsTArray<nsString>& aTagStack) override;
158
159		/**
160		* This method gets called when the tokens have been consumed, and it's time
161		* to build the model via the content sink.
162		* @update gess5/11/98
163		* @return YES if model building went well -- NO otherwise.
164		*/
165		NS_IMETHOD BuildModel(void) override;
166
167		NS_IMETHOD ContinueInterruptedParsing() override;
168		NS_IMETHOD_(void) BlockParser() override;
169		NS_IMETHOD_(void) UnblockParser() override;
170		NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override;
171		NS_IMETHOD Terminate(void) override;
172
173		/**
174		* Call this to query whether the parser is enabled or not.
175		*
176		* @update vidur 4/12/99
177		* @return current state
178		*/
179		NS_IMETHOD_(bool) IsParserEnabled() override;
180
181		/**
182		* Call this to query whether the parser thinks it's done with parsing.
183		*
184		* @update rickg 5/12/01
185		* @return complete state
186		*/
187		NS_IMETHOD_(bool) IsComplete() override;
188
189		/**
190		* This rather arcane method (hack) is used as a signal between the
191		* DTD and the parser. It allows the DTD to tell the parser that content
192		* that comes through (parser::parser(string)) but not consumed should
193		* propagate into the next string based parse call.
194		*
195		* @update gess 9/1/98
196		* @param aState determines whether we propagate unused string content.
197		* @return current state
198		*/
199		void SetUnusedInput(nsString& aBuffer);
200
201		/**
202		* This method gets called (automatically) during incremental parsing
203		* @update gess5/11/98
204		* @return TRUE if all went well, otherwise FALSE
205		*/
206		virtual nsresult ResumeParse(bool allowIteration = true,
207		bool aIsFinalChunk = false,
208		bool aCanInterrupt = true);
209
210		//*********************************************
211		// These methods are callback methods used by
212		// net lib to let us know about our inputstream.
213		//*********************************************
214		// nsIRequestObserver methods:
215		NS_DECL_NSIREQUESTOBSERVER
216
217		// nsIStreamListener methods:
218		NS_DECL_NSISTREAMLISTENER
219
220		void PushContext(CParserContext& aContext);
221		CParserContext* PopContext();
222	0	CParserContext* PeekContext() {return mParserContext;}
223
224		/**
225		* Get the channel associated with this parser
226		* @update harishd,gagan 07/17/01
227		* @param aChannel out param that will contain the result
228		* @return NS_OK if successful
229		*/
230		NS_IMETHOD GetChannel(nsIChannel** aChannel) override;
231
232		/**
233		* Get the DTD associated with this parser
234		* @update vidur 9/29/99
235		* @param aDTD out param that will contain the result
236		* @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
237		*/
238		NS_IMETHOD GetDTD(nsIDTD** aDTD) override;
239
240		/**
241		* Get the nsIStreamListener for this parser
242		*/
243		virtual nsIStreamListener* GetStreamListener() override;
244
245		void SetSinkCharset(NotNull<const Encoding*> aCharset);
246
247		/**
248		* Removes continue parsing events
249		* @update kmcclusk 5/18/98
250		*/
251
252		NS_IMETHOD CancelParsingEvents() override;
253
254		/**
255		* Return true.
256		*/
257		virtual bool IsInsertionPointDefined() override;
258
259		/**
260		* No-op.
261		*/
262		virtual void PushDefinedInsertionPoint() override;
263
264		/**
265		* No-op.
266		*/
267		virtual void PopDefinedInsertionPoint() override;
268
269		/**
270		* No-op.
271		*/
272		virtual void MarkAsNotScriptCreated(const char* aCommand) override;
273
274		/**
275		* Always false.
276		*/
277		virtual bool IsScriptCreated() override;
278
279		/**
280		* Set to parser state to indicate whether parsing tokens can be interrupted
281		* @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted.
282		* @update kmcclusk 5/18/98
283		*/
284		void SetCanInterrupt(bool aCanInterrupt);
285
286		/**
287		* This is called when the final chunk has been
288		* passed to the parser and the content sink has
289		* interrupted token processing. It schedules
290		* a ParserContinue PL_Event which will ask the parser
291		* to HandleParserContinueEvent when it is handled.
292		* @update kmcclusk6/1/2001
293		*/
294		nsresult PostContinueEvent();
295
296		/**
297		* Fired when the continue parse event is triggered.
298		* @update kmcclusk 5/18/98
299		*/
300		void HandleParserContinueEvent(class nsParserContinueEvent *);
301
302	0	virtual void Reset() override {
303	0	Cleanup();
304	0	Initialize();
305	0	}
306
307	0	bool IsScriptExecuting() {
308	0	return mSink && mSink->IsScriptExecuting();
309	0	}
310
311	0	bool IsOkToProcessNetworkData() {
312	0	return !IsScriptExecuting() && !mProcessingNetworkData;
313	0	}
314
315		protected:
316
317		void Initialize(bool aConstructor = false);
318		void Cleanup();
319
320		/**
321		*
322		* @update gess5/18/98
323		* @param
324		* @return
325		*/
326		nsresult WillBuildModel(nsString& aFilename);
327
328		/**
329		*
330		* @update gess5/18/98
331		* @param
332		* @return
333		*/
334		nsresult DidBuildModel(nsresult anErrorCode);
335
336		private:
337
338		/*******************************************
339		These are the tokenization methods...
340		*******************************************/
341
342		/**
343		* Part of the code sandwich, this gets called right before
344		* the tokenization process begins. The main reason for
345		* this call is to allow the delegate to do initialization.
346		*
347		* @update gess 3/25/98
348		* @param
349		* @return TRUE if it's ok to proceed
350		*/
351		bool WillTokenize(bool aIsFinalChunk = false);
352
353
354		/**
355		* This is the primary control routine. It iteratively
356		* consumes tokens until an error occurs or you run out
357		* of data.
358		*
359		* @update gess 3/25/98
360		* @return error code
361		*/
362		nsresult Tokenize(bool aIsFinalChunk = false);
363
364		/**
365		* Pushes XML fragment parsing data to expat without an input stream.
366		*/
367		nsresult Parse(const nsAString& aSourceBuffer,
368		void* aKey,
369		bool aLastCall);
370
371		protected:
372		//*********************************************
373		// And now, some data members...
374		//*********************************************
375
376
377		CParserContext* mParserContext;
378		nsCOMPtr<nsIDTD> mDTD;
379		nsCOMPtr<nsIRequestObserver> mObserver;
380		nsCOMPtr<nsIContentSink> mSink;
381		nsIRunnable* mContinueEvent; // weak ref
382
383		eParserCommands mCommand;
384		nsresult mInternalState;
385		nsresult mStreamStatus;
386		int32_t mCharsetSource;
387
388		uint16_t mFlags;
389		uint32_t mBlocked;
390
391		nsString mUnusedInput;
392		NotNull<const Encoding*> mCharset;
393		nsCString mCommandStr;
394
395		bool mProcessingNetworkData;
396		bool mIsAboutBlank;
397		};
398
399		#endif
400