Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : /**
7 : * MODULE NOTES:
8 : *
9 : * This class does two primary jobs:
10 : * 1) It iterates the tokens provided during the
11 : * tokenization process, identifing where elements
12 : * begin and end (doing validation and normalization).
13 : * 2) It controls and coordinates with an instance of
14 : * the IContentSink interface, to coordinate the
15 : * the production of the content model.
16 : *
17 : * The basic operation of this class assumes that an HTML
18 : * document is non-normalized. Therefore, we don't process
19 : * the document in a normalized way. Don't bother to look
20 : * for methods like: doHead() or doBody().
21 : *
22 : * Instead, in order to be backward compatible, we must
23 : * scan the set of tokens and perform this basic set of
24 : * operations:
25 : * 1) Determine the token type (easy, since the tokens know)
26 : * 2) Determine the appropriate section of the HTML document
27 : * each token belongs in (HTML,HEAD,BODY,FRAMESET).
28 : * 3) Insert content into our document (via the sink) into
29 : * the correct section.
30 : * 4) In the case of tags that belong in the BODY, we must
31 : * ensure that our underlying document state reflects
32 : * the appropriate context for our tag.
33 : *
34 : * For example,if we see a <TR>, we must ensure our
35 : * document contains a table into which the row can
36 : * be placed. This may result in "implicit containers"
37 : * created to ensure a well-formed document.
38 : *
39 : */
40 :
41 : #ifndef NS_PARSER__
42 : #define NS_PARSER__
43 :
44 : #include "nsIParser.h"
45 : #include "nsDeque.h"
46 : #include "nsIURL.h"
47 : #include "CParserContext.h"
48 : #include "nsParserCIID.h"
49 : #include "nsITokenizer.h"
50 : #include "nsHTMLTags.h"
51 : #include "nsIContentSink.h"
52 : #include "nsCOMArray.h"
53 : #include "nsCycleCollectionParticipant.h"
54 : #include "nsWeakReference.h"
55 :
56 : class nsIDTD;
57 : class nsIRunnable;
58 :
59 : #ifdef _MSC_VER
60 : #pragma warning( disable : 4275 )
61 : #endif
62 :
63 :
64 : class nsParser final : public nsIParser,
65 : public nsIStreamListener,
66 : public nsSupportsWeakReference
67 : {
68 : /**
69 : * Destructor
70 : * @update gess5/11/98
71 : */
72 : virtual ~nsParser();
73 :
74 : public:
75 : /**
76 : * Called on module init
77 : */
78 : static nsresult Init();
79 :
80 : /**
81 : * Called on module shutdown
82 : */
83 : static void Shutdown();
84 :
85 : NS_DECL_CYCLE_COLLECTING_ISUPPORTS
86 587 : NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsParser, nsIParser)
87 :
88 : /**
89 : * default constructor
90 : * @update gess5/11/98
91 : */
92 : nsParser();
93 :
94 : /**
95 : * Select given content sink into parser for parser output
96 : * @update gess5/11/98
97 : * @param aSink is the new sink to be used by parser
98 : * @return old sink, or nullptr
99 : */
100 : NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink) override;
101 :
102 : /**
103 : * retrive the sink set into the parser
104 : * @update gess5/11/98
105 : * @param aSink is the new sink to be used by parser
106 : * @return old sink, or nullptr
107 : */
108 : NS_IMETHOD_(nsIContentSink*) GetContentSink(void) override;
109 :
110 : /**
111 : * Call this method once you've created a parser, and want to instruct it
112 : * about the command which caused the parser to be constructed. For example,
113 : * this allows us to select a DTD which can do, say, view-source.
114 : *
115 : * @update gess 3/25/98
116 : * @param aCommand -- ptrs to string that contains command
117 : * @return nada
118 : */
119 : NS_IMETHOD_(void) GetCommand(nsCString& aCommand) override;
120 : NS_IMETHOD_(void) SetCommand(const char* aCommand) override;
121 : NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override;
122 :
123 : /**
124 : * Call this method once you've created a parser, and want to instruct it
125 : * about what charset to load
126 : *
127 : * @update ftang 4/23/99
128 : * @param aCharset- the charset of a document
129 : * @param aCharsetSource- the source of the charset
130 : * @return nada
131 : */
132 : virtual void SetDocumentCharset(NotNull<const Encoding*> aCharset,
133 : int32_t aSource) override;
134 :
135 22 : NotNull<const Encoding*> GetDocumentCharset(int32_t& aSource)
136 : {
137 22 : aSource = mCharsetSource;
138 22 : return mCharset;
139 : }
140 :
141 : /**
142 : * Cause parser to parse input from given URL
143 : * @update gess5/11/98
144 : * @param aURL is a descriptor for source document
145 : * @param aListener is a listener to forward notifications to
146 : * @return TRUE if all went well -- FALSE otherwise
147 : */
148 : NS_IMETHOD Parse(nsIURI* aURL,
149 : nsIRequestObserver* aListener = nullptr,
150 : void* aKey = 0,
151 : nsDTDMode aMode = eDTDMode_autodetect) override;
152 :
153 : /**
154 : * This method needs documentation
155 : */
156 : NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
157 : nsTArray<nsString>& aTagStack) override;
158 :
159 : /**
160 : * This method gets called when the tokens have been consumed, and it's time
161 : * to build the model via the content sink.
162 : * @update gess5/11/98
163 : * @return YES if model building went well -- NO otherwise.
164 : */
165 : NS_IMETHOD BuildModel(void) override;
166 :
167 : NS_IMETHOD ContinueInterruptedParsing() override;
168 : NS_IMETHOD_(void) BlockParser() override;
169 : NS_IMETHOD_(void) UnblockParser() override;
170 : NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override;
171 : NS_IMETHOD Terminate(void) override;
172 :
173 : /**
174 : * Call this to query whether the parser is enabled or not.
175 : *
176 : * @update vidur 4/12/99
177 : * @return current state
178 : */
179 : NS_IMETHOD_(bool) IsParserEnabled() override;
180 :
181 : /**
182 : * Call this to query whether the parser thinks it's done with parsing.
183 : *
184 : * @update rickg 5/12/01
185 : * @return complete state
186 : */
187 : NS_IMETHOD_(bool) IsComplete() override;
188 :
189 : /**
190 : * This rather arcane method (hack) is used as a signal between the
191 : * DTD and the parser. It allows the DTD to tell the parser that content
192 : * that comes through (parser::parser(string)) but not consumed should
193 : * propagate into the next string based parse call.
194 : *
195 : * @update gess 9/1/98
196 : * @param aState determines whether we propagate unused string content.
197 : * @return current state
198 : */
199 : void SetUnusedInput(nsString& aBuffer);
200 :
201 : /**
202 : * This method gets called (automatically) during incremental parsing
203 : * @update gess5/11/98
204 : * @return TRUE if all went well, otherwise FALSE
205 : */
206 : virtual nsresult ResumeParse(bool allowIteration = true,
207 : bool aIsFinalChunk = false,
208 : bool aCanInterrupt = true);
209 :
210 : //*********************************************
211 : // These methods are callback methods used by
212 : // net lib to let us know about our inputstream.
213 : //*********************************************
214 : // nsIRequestObserver methods:
215 : NS_DECL_NSIREQUESTOBSERVER
216 :
217 : // nsIStreamListener methods:
218 : NS_DECL_NSISTREAMLISTENER
219 :
220 : void PushContext(CParserContext& aContext);
221 : CParserContext* PopContext();
222 : CParserContext* PeekContext() {return mParserContext;}
223 :
224 : /**
225 : * Get the channel associated with this parser
226 : * @update harishd,gagan 07/17/01
227 : * @param aChannel out param that will contain the result
228 : * @return NS_OK if successful
229 : */
230 : NS_IMETHOD GetChannel(nsIChannel** aChannel) override;
231 :
232 : /**
233 : * Get the DTD associated with this parser
234 : * @update vidur 9/29/99
235 : * @param aDTD out param that will contain the result
236 : * @return NS_OK if successful, NS_ERROR_FAILURE for runtime error
237 : */
238 : NS_IMETHOD GetDTD(nsIDTD** aDTD) override;
239 :
240 : /**
241 : * Get the nsIStreamListener for this parser
242 : */
243 : virtual nsIStreamListener* GetStreamListener() override;
244 :
245 : void SetSinkCharset(NotNull<const Encoding*> aCharset);
246 :
247 : /**
248 : * Removes continue parsing events
249 : * @update kmcclusk 5/18/98
250 : */
251 :
252 : NS_IMETHOD CancelParsingEvents() override;
253 :
254 : /**
255 : * Return true.
256 : */
257 : virtual bool IsInsertionPointDefined() override;
258 :
259 : /**
260 : * No-op.
261 : */
262 : virtual void PushDefinedInsertionPoint() override;
263 :
264 : /**
265 : * No-op.
266 : */
267 : virtual void PopDefinedInsertionPoint() override;
268 :
269 : /**
270 : * No-op.
271 : */
272 : virtual void MarkAsNotScriptCreated(const char* aCommand) override;
273 :
274 : /**
275 : * Always false.
276 : */
277 : virtual bool IsScriptCreated() override;
278 :
279 : /**
280 : * Set to parser state to indicate whether parsing tokens can be interrupted
281 : * @param aCanInterrupt true if parser can be interrupted, false if it can not be interrupted.
282 : * @update kmcclusk 5/18/98
283 : */
284 : void SetCanInterrupt(bool aCanInterrupt);
285 :
286 : /**
287 : * This is called when the final chunk has been
288 : * passed to the parser and the content sink has
289 : * interrupted token processing. It schedules
290 : * a ParserContinue PL_Event which will ask the parser
291 : * to HandleParserContinueEvent when it is handled.
292 : * @update kmcclusk6/1/2001
293 : */
294 : nsresult PostContinueEvent();
295 :
296 : /**
297 : * Fired when the continue parse event is triggered.
298 : * @update kmcclusk 5/18/98
299 : */
300 : void HandleParserContinueEvent(class nsParserContinueEvent *);
301 :
302 0 : virtual void Reset() override {
303 0 : Cleanup();
304 0 : Initialize();
305 0 : }
306 :
307 45 : bool IsScriptExecuting() {
308 45 : return mSink && mSink->IsScriptExecuting();
309 : }
310 :
311 45 : bool IsOkToProcessNetworkData() {
312 45 : return !IsScriptExecuting() && !mProcessingNetworkData;
313 : }
314 :
315 : protected:
316 :
317 : void Initialize(bool aConstructor = false);
318 : void Cleanup();
319 :
320 : /**
321 : *
322 : * @update gess5/18/98
323 : * @param
324 : * @return
325 : */
326 : nsresult WillBuildModel(nsString& aFilename);
327 :
328 : /**
329 : *
330 : * @update gess5/18/98
331 : * @param
332 : * @return
333 : */
334 : nsresult DidBuildModel(nsresult anErrorCode);
335 :
336 : private:
337 :
338 : /*******************************************
339 : These are the tokenization methods...
340 : *******************************************/
341 :
342 : /**
343 : * Part of the code sandwich, this gets called right before
344 : * the tokenization process begins. The main reason for
345 : * this call is to allow the delegate to do initialization.
346 : *
347 : * @update gess 3/25/98
348 : * @param
349 : * @return TRUE if it's ok to proceed
350 : */
351 : bool WillTokenize(bool aIsFinalChunk = false);
352 :
353 :
354 : /**
355 : * This is the primary control routine. It iteratively
356 : * consumes tokens until an error occurs or you run out
357 : * of data.
358 : *
359 : * @update gess 3/25/98
360 : * @return error code
361 : */
362 : nsresult Tokenize(bool aIsFinalChunk = false);
363 :
364 : /**
365 : * Pushes XML fragment parsing data to expat without an input stream.
366 : */
367 : nsresult Parse(const nsAString& aSourceBuffer,
368 : void* aKey,
369 : bool aLastCall);
370 :
371 : protected:
372 : //*********************************************
373 : // And now, some data members...
374 : //*********************************************
375 :
376 :
377 : CParserContext* mParserContext;
378 : nsCOMPtr<nsIDTD> mDTD;
379 : nsCOMPtr<nsIRequestObserver> mObserver;
380 : nsCOMPtr<nsIContentSink> mSink;
381 : nsIRunnable* mContinueEvent; // weak ref
382 :
383 : eParserCommands mCommand;
384 : nsresult mInternalState;
385 : nsresult mStreamStatus;
386 : int32_t mCharsetSource;
387 :
388 : uint16_t mFlags;
389 : uint32_t mBlocked;
390 :
391 : nsString mUnusedInput;
392 : NotNull<const Encoding*> mCharset;
393 : nsCString mCommandStr;
394 :
395 : bool mProcessingNetworkData;
396 : bool mIsAboutBlank;
397 : };
398 :
399 : #endif
400 :
|