Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : #ifndef NS_HTML5_PARSER
7 : #define NS_HTML5_PARSER
8 :
9 : #include "nsAutoPtr.h"
10 : #include "nsIParser.h"
11 : #include "nsDeque.h"
12 : #include "nsIURL.h"
13 : #include "nsParserCIID.h"
14 : #include "nsITokenizer.h"
15 : #include "nsIContentSink.h"
16 : #include "nsIRequest.h"
17 : #include "nsIChannel.h"
18 : #include "nsCOMArray.h"
19 : #include "nsContentSink.h"
20 : #include "nsCycleCollectionParticipant.h"
21 : #include "nsIInputStream.h"
22 : #include "nsDetectionConfident.h"
23 : #include "nsHtml5OwningUTF16Buffer.h"
24 : #include "nsHtml5TreeOpExecutor.h"
25 : #include "nsHtml5StreamParser.h"
26 : #include "nsHtml5AtomTable.h"
27 : #include "nsWeakReference.h"
28 : #include "nsHtml5StreamListener.h"
29 :
30 : class nsHtml5Parser final : public nsIParser,
31 : public nsSupportsWeakReference
32 : {
33 : public:
34 : NS_DECL_CYCLE_COLLECTING_ISUPPORTS
35 :
36 149 : NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsHtml5Parser, nsIParser)
37 :
38 : nsHtml5Parser();
39 :
40 : /* Start nsIParser */
41 : /**
42 : * No-op for backwards compat.
43 : */
44 : NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink) override;
45 :
46 : /**
47 : * Returns the tree op executor for backwards compat.
48 : */
49 : NS_IMETHOD_(nsIContentSink*) GetContentSink() override;
50 :
51 : /**
52 : * Always returns "view" for backwards compat.
53 : */
54 : NS_IMETHOD_(void) GetCommand(nsCString& aCommand) override;
55 :
56 : /**
57 : * No-op for backwards compat.
58 : */
59 : NS_IMETHOD_(void) SetCommand(const char* aCommand) override;
60 :
61 : /**
62 : * No-op for backwards compat.
63 : */
64 : NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override;
65 :
66 : /**
67 : * Call this method once you've created a parser, and want to instruct it
68 : * about what charset to load
69 : *
70 : * @param aEncoding the charset of a document
71 : * @param aCharsetSource the source of the charset
72 : */
73 : virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding,
74 : int32_t aSource) override;
75 :
76 : /**
77 : * Get the channel associated with this parser
78 : * @param aChannel out param that will contain the result
79 : * @return NS_OK if successful or NS_NOT_AVAILABLE if not
80 : */
81 : NS_IMETHOD GetChannel(nsIChannel** aChannel) override;
82 :
83 : /**
84 : * Return |this| for backwards compat.
85 : */
86 : NS_IMETHOD GetDTD(nsIDTD** aDTD) override;
87 :
88 : /**
89 : * Get the stream parser for this parser
90 : */
91 : virtual nsIStreamListener* GetStreamListener() override;
92 :
93 : /**
94 : * Don't call. For interface compat only.
95 : */
96 : NS_IMETHOD ContinueInterruptedParsing() override;
97 :
98 : /**
99 : * Blocks the parser.
100 : */
101 : NS_IMETHOD_(void) BlockParser() override;
102 :
103 : /**
104 : * Unblocks the parser.
105 : */
106 : NS_IMETHOD_(void) UnblockParser() override;
107 :
108 : /**
109 : * Asynchronously continues parsing.
110 : */
111 : NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override;
112 :
113 : /**
114 : * Query whether the parser is enabled (i.e. not blocked) or not.
115 : */
116 : NS_IMETHOD_(bool) IsParserEnabled() override;
117 :
118 : /**
119 : * Query whether the parser thinks it's done with parsing.
120 : */
121 : NS_IMETHOD_(bool) IsComplete() override;
122 :
123 : /**
124 : * Set up request observer.
125 : *
126 : * @param aURL used for View Source title
127 : * @param aListener a listener to forward notifications to
128 : * @param aKey the root context key (used for document.write)
129 : * @param aMode ignored (for interface compat only)
130 : */
131 : NS_IMETHOD Parse(nsIURI* aURL,
132 : nsIRequestObserver* aListener = nullptr,
133 : void* aKey = 0,
134 : nsDTDMode aMode = eDTDMode_autodetect) override;
135 :
136 : /**
137 : * document.write and document.close
138 : *
139 : * @param aSourceBuffer the argument of document.write (empty for .close())
140 : * @param aKey a key unique to the script element that caused this call
141 : * @param aContentType "text/html" for HTML mode, else text/plain mode
142 : * @param aLastCall true if .close() false if .write()
143 : * @param aMode ignored (for interface compat only)
144 : */
145 : nsresult Parse(const nsAString& aSourceBuffer,
146 : void* aKey,
147 : const nsACString& aContentType,
148 : bool aLastCall,
149 : nsDTDMode aMode = eDTDMode_autodetect);
150 :
151 : /**
152 : * Stops the parser prematurely
153 : */
154 : NS_IMETHOD Terminate() override;
155 :
156 : /**
157 : * Don't call. For interface backwards compat only.
158 : */
159 : NS_IMETHOD ParseFragment(const nsAString& aSourceBuffer,
160 : nsTArray<nsString>& aTagStack) override;
161 :
162 : /**
163 : * Don't call. For interface compat only.
164 : */
165 : NS_IMETHOD BuildModel() override;
166 :
167 : /**
168 : * Don't call. For interface compat only.
169 : */
170 : NS_IMETHOD CancelParsingEvents() override;
171 :
172 : /**
173 : * Don't call. For interface compat only.
174 : */
175 : virtual void Reset() override;
176 :
177 : /**
178 : * True if the insertion point (per HTML5) is defined.
179 : */
180 : virtual bool IsInsertionPointDefined() override;
181 :
182 : /**
183 : * Call immediately before starting to evaluate a parser-inserted script or
184 : * in general when the spec says to define an insertion point.
185 : */
186 : virtual void PushDefinedInsertionPoint() override;
187 :
188 : /**
189 : * Call immediately after having evaluated a parser-inserted script or
190 : * generally want to restore to the state before the last
191 : * PushDefinedInsertionPoint call.
192 : */
193 : virtual void PopDefinedInsertionPoint() override;
194 :
195 : /**
196 : * Marks the HTML5 parser as not a script-created parser: Prepares the
197 : * parser to be able to read a stream.
198 : *
199 : * @param aCommand the parser command (Yeah, this is bad API design. Let's
200 : * make this better when retiring nsIParser)
201 : */
202 : virtual void MarkAsNotScriptCreated(const char* aCommand) override;
203 :
204 : /**
205 : * True if this is a script-created HTML5 parser.
206 : */
207 : virtual bool IsScriptCreated() override;
208 :
209 : /* End nsIParser */
210 :
211 : // Not from an external interface
212 : // Non-inherited methods
213 :
214 : public:
215 :
216 : /**
217 : * Initializes the parser to load from a channel.
218 : */
219 : virtual nsresult Initialize(nsIDocument* aDoc,
220 : nsIURI* aURI,
221 : nsISupports* aContainer,
222 : nsIChannel* aChannel);
223 :
224 : inline nsHtml5Tokenizer* GetTokenizer() {
225 : return mTokenizer;
226 : }
227 :
228 : void InitializeDocWriteParserState(nsAHtml5TreeBuilderState* aState, int32_t aLine);
229 :
230 2 : void DropStreamParser()
231 : {
232 2 : if (GetStreamParser()) {
233 2 : GetStreamParser()->DropTimer();
234 2 : mStreamListener->DropDelegate();
235 2 : mStreamListener = nullptr;
236 : }
237 2 : }
238 :
239 : void StartTokenizer(bool aScriptingEnabled);
240 :
241 : void ContinueAfterFailedCharsetSwitch();
242 :
243 35 : nsHtml5StreamParser* GetStreamParser()
244 : {
245 35 : if (!mStreamListener) {
246 0 : return nullptr;
247 : }
248 35 : return mStreamListener->GetDelegate();
249 : }
250 :
251 : /**
252 : * Parse until pending data is exhausted or a script blocks the parser
253 : */
254 : nsresult ParseUntilBlocked();
255 :
256 : private:
257 :
258 : virtual ~nsHtml5Parser();
259 :
260 : // State variables
261 :
262 : /**
263 : * Whether the last character tokenized was a carriage return (for CRLF)
264 : */
265 : bool mLastWasCR;
266 :
267 : /**
268 : * Whether the last character tokenized was a carriage return (for CRLF)
269 : * when preparsing document.write.
270 : */
271 : bool mDocWriteSpeculativeLastWasCR;
272 :
273 : /**
274 : * The parser is blocking on the load of an external script from a web
275 : * page, or any number of extension content scripts.
276 : */
277 : uint32_t mBlocked;
278 :
279 : /**
280 : * Whether the document.write() speculator is already active.
281 : */
282 : bool mDocWriteSpeculatorActive;
283 :
284 : /**
285 : * The number of PushDefinedInsertionPoint calls we've seen without a
286 : * matching PopDefinedInsertionPoint.
287 : */
288 : int32_t mInsertionPointPushLevel;
289 :
290 : /**
291 : * True if document.close() has been called.
292 : */
293 : bool mDocumentClosed;
294 :
295 : bool mInDocumentWrite;
296 :
297 : // Portable parser objects
298 : /**
299 : * The first buffer in the pending UTF-16 buffer queue
300 : */
301 : RefPtr<nsHtml5OwningUTF16Buffer> mFirstBuffer;
302 :
303 : /**
304 : * The last buffer in the pending UTF-16 buffer queue. Always points
305 : * to a sentinel object with nullptr as its parser key.
306 : */
307 : nsHtml5OwningUTF16Buffer* mLastBuffer; // weak ref;
308 :
309 : /**
310 : * The tree operation executor
311 : */
312 : RefPtr<nsHtml5TreeOpExecutor> mExecutor;
313 :
314 : /**
315 : * The HTML5 tree builder
316 : */
317 : const nsAutoPtr<nsHtml5TreeBuilder> mTreeBuilder;
318 :
319 : /**
320 : * The HTML5 tokenizer
321 : */
322 : const nsAutoPtr<nsHtml5Tokenizer> mTokenizer;
323 :
324 : /**
325 : * Another HTML5 tree builder for preloading document.written content.
326 : */
327 : nsAutoPtr<nsHtml5TreeBuilder> mDocWriteSpeculativeTreeBuilder;
328 :
329 : /**
330 : * Another HTML5 tokenizer for preloading document.written content.
331 : */
332 : nsAutoPtr<nsHtml5Tokenizer> mDocWriteSpeculativeTokenizer;
333 :
334 : /**
335 : * The stream listener holding the stream parser.
336 : */
337 : RefPtr<nsHtml5StreamListener> mStreamListener;
338 :
339 : /**
340 : *
341 : */
342 : int32_t mRootContextLineNumber;
343 :
344 : /**
345 : * Whether it's OK to transfer parsing back to the stream parser
346 : */
347 : bool mReturnToStreamParserPermitted;
348 :
349 : /**
350 : * The scoped atom table
351 : */
352 : nsHtml5AtomTable mAtomTable;
353 :
354 : };
355 : #endif
|