Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set sw=2 ts=2 et tw=79: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "nsIAtom.h"
8 : #include "nsParser.h"
9 : #include "nsString.h"
10 : #include "nsCRT.h"
11 : #include "nsScanner.h"
12 : #include "plstr.h"
13 : #include "nsIStringStream.h"
14 : #include "nsIChannel.h"
15 : #include "nsICachingChannel.h"
16 : #include "nsIInputStream.h"
17 : #include "CNavDTD.h"
18 : #include "prenv.h"
19 : #include "prlock.h"
20 : #include "prcvar.h"
21 : #include "nsParserCIID.h"
22 : #include "nsReadableUtils.h"
23 : #include "nsCOMPtr.h"
24 : #include "nsExpatDriver.h"
25 : #include "nsIServiceManager.h"
26 : #include "nsICategoryManager.h"
27 : #include "nsISupportsPrimitives.h"
28 : #include "nsIFragmentContentSink.h"
29 : #include "nsStreamUtils.h"
30 : #include "nsHTMLTokenizer.h"
31 : #include "nsDataHashtable.h"
32 : #include "nsXPCOMCIDInternal.h"
33 : #include "nsMimeTypes.h"
34 : #include "mozilla/CondVar.h"
35 : #include "mozilla/Mutex.h"
36 : #include "nsCharsetSource.h"
37 : #include "nsThreadUtils.h"
38 : #include "nsIHTMLContentSink.h"
39 :
40 : #include "mozilla/BinarySearch.h"
41 : #include "mozilla/dom/ScriptLoader.h"
42 : #include "mozilla/Encoding.h"
43 :
44 : using namespace mozilla;
45 :
46 : #define NS_PARSER_FLAG_OBSERVERS_ENABLED 0x00000004
47 : #define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000008
48 : #define NS_PARSER_FLAG_FLUSH_TOKENS 0x00000020
49 : #define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000040
50 :
51 : //-------------- Begin ParseContinue Event Definition ------------------------
52 : /*
53 : The parser can be explicitly interrupted by passing a return value of
54 : NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause
55 : the parser to stop processing and allow the application to return to the event
56 : loop. The data which was left at the time of interruption will be processed
57 : the next time OnDataAvailable is called. If the parser has received its final
58 : chunk of data then OnDataAvailable will no longer be called by the networking
59 : module, so the parser will schedule a nsParserContinueEvent which will call
60 : the parser to process the remaining data after returning to the event loop.
61 : If the parser is interrupted while processing the remaining data it will
62 : schedule another ParseContinueEvent. The processing of data followed by
63 : scheduling of the continue events will proceed until either:
64 :
65 : 1) All of the remaining data can be processed without interrupting
66 : 2) The parser has been cancelled.
67 :
68 :
69 : This capability is currently used in CNavDTD and nsHTMLContentSink. The
70 : nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be
71 : processed and when each token is processed. The nsHTML content sink records
72 : the time when the chunk has started processing and will return
73 : NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a
74 : threshold called max tokenizing processing time. This allows the content sink
75 : to limit how much data is processed in a single chunk which in turn gates how
76 : much time is spent away from the event loop. Processing smaller chunks of data
77 : also reduces the time spent in subsequent reflows.
78 :
79 : This capability is most apparent when loading large documents. If the maximum
80 : token processing time is set small enough the application will remain
81 : responsive during document load.
82 :
83 : A side-effect of this capability is that document load is not complete when
84 : the last chunk of data is passed to OnDataAvailable since the parser may have
85 : been interrupted when the last chunk of data arrived. The document is complete
86 : when all of the document has been tokenized and there aren't any pending
87 : nsParserContinueEvents. This can cause problems if the application assumes
88 : that it can monitor the load requests to determine when the document load has
89 : been completed. This is what happens in Mozilla. The document is considered
90 : completely loaded when all of the load requests have been satisfied. To delay
91 : the document load until all of the parsing has been completed the
92 : nsHTMLContentSink adds a dummy parser load request which is not removed until
93 : the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call
94 : DidBuildModel until the final chunk of data has been passed to the parser
95 : through the OnDataAvailable and there aren't any pending
96 : nsParserContineEvents.
97 :
98 : Currently the parser is ignores requests to be interrupted during the
99 : processing of script. This is because a document.write followed by JavaScript
100 : calls to manipulate the DOM may fail if the parser was interrupted during the
101 : document.write.
102 :
103 : For more details @see bugzilla bug 76722
104 : */
105 :
106 :
107 0 : class nsParserContinueEvent : public Runnable
108 : {
109 : public:
110 : RefPtr<nsParser> mParser;
111 :
112 0 : explicit nsParserContinueEvent(nsParser* aParser)
113 0 : : mozilla::Runnable("nsParserContinueEvent")
114 0 : , mParser(aParser)
115 0 : {}
116 :
117 0 : NS_IMETHOD Run() override
118 : {
119 0 : mParser->HandleParserContinueEvent(this);
120 0 : return NS_OK;
121 : }
122 : };
123 :
124 : //-------------- End ParseContinue Event Definition ------------------------
125 :
126 : /**
127 : * default constructor
128 : */
129 23 : nsParser::nsParser()
130 23 : : mCharset(WINDOWS_1252_ENCODING)
131 : {
132 23 : Initialize(true);
133 23 : }
134 :
135 66 : nsParser::~nsParser()
136 : {
137 22 : Cleanup();
138 66 : }
139 :
140 : void
141 23 : nsParser::Initialize(bool aConstructor)
142 : {
143 23 : if (aConstructor) {
144 : // Raw pointer
145 23 : mParserContext = 0;
146 : }
147 : else {
148 : // nsCOMPtrs
149 0 : mObserver = nullptr;
150 0 : mUnusedInput.Truncate();
151 : }
152 :
153 23 : mContinueEvent = nullptr;
154 23 : mCharsetSource = kCharsetUninitialized;
155 23 : mCharset = WINDOWS_1252_ENCODING;
156 23 : mInternalState = NS_OK;
157 23 : mStreamStatus = NS_OK;
158 23 : mCommand = eViewNormal;
159 23 : mBlocked = 0;
160 23 : mFlags = NS_PARSER_FLAG_OBSERVERS_ENABLED |
161 : NS_PARSER_FLAG_CAN_TOKENIZE;
162 :
163 23 : mProcessingNetworkData = false;
164 23 : mIsAboutBlank = false;
165 23 : }
166 :
167 : void
168 22 : nsParser::Cleanup()
169 : {
170 : #ifdef DEBUG
171 22 : if (mParserContext && mParserContext->mPrevContext) {
172 0 : NS_WARNING("Extra parser contexts still on the parser stack");
173 : }
174 : #endif
175 :
176 66 : while (mParserContext) {
177 22 : CParserContext *pc = mParserContext->mPrevContext;
178 22 : delete mParserContext;
179 22 : mParserContext = pc;
180 : }
181 :
182 : // It should not be possible for this flag to be set when we are getting
183 : // destroyed since this flag implies a pending nsParserContinueEvent, which
184 : // has an owning reference to |this|.
185 22 : NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad");
186 22 : }
187 :
188 : NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser)
189 :
190 0 : NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser)
191 0 : NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD)
192 0 : NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink)
193 0 : NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver)
194 0 : NS_IMPL_CYCLE_COLLECTION_UNLINK_END
195 :
196 0 : NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser)
197 0 : NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD)
198 0 : NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink)
199 0 : NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver)
200 0 : CParserContext *pc = tmp->mParserContext;
201 0 : while (pc) {
202 0 : cb.NoteXPCOMChild(pc->mTokenizer);
203 0 : pc = pc->mPrevContext;
204 : }
205 0 : NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
206 :
207 205 : NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser)
208 227 : NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser)
209 246 : NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser)
210 135 : NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
211 89 : NS_INTERFACE_MAP_ENTRY(nsIParser)
212 0 : NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
213 0 : NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
214 0 : NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser)
215 0 : NS_INTERFACE_MAP_END
216 :
217 : // The parser continue event is posted only if
218 : // all of the data to parse has been passed to ::OnDataAvailable
219 : // and the parser has been interrupted by the content sink
220 : // because the processing of tokens took too long.
221 :
222 : nsresult
223 0 : nsParser::PostContinueEvent()
224 : {
225 0 : if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) {
226 : // If this flag isn't set, then there shouldn't be a live continue event!
227 0 : NS_ASSERTION(!mContinueEvent, "bad");
228 :
229 : // This creates a reference cycle between this and the event that is
230 : // broken when the event fires.
231 0 : nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this);
232 0 : if (NS_FAILED(NS_DispatchToCurrentThread(event))) {
233 0 : NS_WARNING("failed to dispatch parser continuation event");
234 : } else {
235 0 : mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
236 0 : mContinueEvent = event;
237 : }
238 : }
239 0 : return NS_OK;
240 : }
241 :
242 : NS_IMETHODIMP_(void)
243 0 : nsParser::GetCommand(nsCString& aCommand)
244 : {
245 0 : aCommand = mCommandStr;
246 0 : }
247 :
248 : /**
249 : * Call this method once you've created a parser, and want to instruct it
250 : * about the command which caused the parser to be constructed. For example,
251 : * this allows us to select a DTD which can do, say, view-source.
252 : *
253 : * @param aCommand the command string to set
254 : */
255 : NS_IMETHODIMP_(void)
256 23 : nsParser::SetCommand(const char* aCommand)
257 : {
258 23 : mCommandStr.Assign(aCommand);
259 23 : if (mCommandStr.EqualsLiteral("view-source")) {
260 0 : mCommand = eViewSource;
261 23 : } else if (mCommandStr.EqualsLiteral("view-fragment")) {
262 0 : mCommand = eViewFragment;
263 : } else {
264 23 : mCommand = eViewNormal;
265 : }
266 23 : }
267 :
268 : /**
269 : * Call this method once you've created a parser, and want to instruct it
270 : * about the command which caused the parser to be constructed. For example,
271 : * this allows us to select a DTD which can do, say, view-source.
272 : *
273 : * @param aParserCommand the command to set
274 : */
275 : NS_IMETHODIMP_(void)
276 0 : nsParser::SetCommand(eParserCommands aParserCommand)
277 : {
278 0 : mCommand = aParserCommand;
279 0 : }
280 :
281 : /**
282 : * Call this method once you've created a parser, and want to instruct it
283 : * about what charset to load
284 : *
285 : * @param aCharset- the charset of a document
286 : * @param aCharsetSource- the source of the charset
287 : */
288 : void
289 45 : nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset,
290 : int32_t aCharsetSource)
291 : {
292 45 : mCharset = aCharset;
293 45 : mCharsetSource = aCharsetSource;
294 45 : if (mParserContext && mParserContext->mScanner) {
295 22 : mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
296 : }
297 45 : }
298 :
299 : void
300 22 : nsParser::SetSinkCharset(NotNull<const Encoding*> aCharset)
301 : {
302 22 : if (mSink) {
303 22 : mSink->SetDocumentCharset(aCharset);
304 : }
305 22 : }
306 :
307 : /**
308 : * This method gets called in order to set the content
309 : * sink for this parser to dump nodes to.
310 : *
311 : * @param nsIContentSink interface for node receiver
312 : */
313 : NS_IMETHODIMP_(void)
314 23 : nsParser::SetContentSink(nsIContentSink* aSink)
315 : {
316 23 : NS_PRECONDITION(aSink, "sink cannot be null!");
317 23 : mSink = aSink;
318 :
319 23 : if (mSink) {
320 23 : mSink->SetParser(this);
321 46 : nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink);
322 23 : if (htmlSink) {
323 1 : mIsAboutBlank = true;
324 : }
325 : }
326 23 : }
327 :
328 : /**
329 : * retrieve the sink set into the parser
330 : * @return current sink
331 : */
332 : NS_IMETHODIMP_(nsIContentSink*)
333 44 : nsParser::GetContentSink()
334 : {
335 44 : return mSink;
336 : }
337 :
338 : static nsIDTD*
339 23 : FindSuitableDTD(CParserContext& aParserContext)
340 : {
341 : // We always find a DTD.
342 23 : aParserContext.mAutoDetectStatus = ePrimaryDetect;
343 :
344 : // Quick check for view source.
345 23 : MOZ_ASSERT(aParserContext.mParserCommand != eViewSource,
346 : "The old parser is not supposed to be used for View Source "
347 : "anymore.");
348 :
349 : // Now see if we're parsing HTML (which, as far as we're concerned, simply
350 : // means "not XML").
351 23 : if (aParserContext.mDocType != eXML) {
352 1 : return new CNavDTD();
353 : }
354 :
355 : // If we're here, then we'd better be parsing XML.
356 22 : NS_ASSERTION(aParserContext.mDocType == eXML, "What are you trying to send me, here?");
357 22 : return new nsExpatDriver();
358 : }
359 :
360 : NS_IMETHODIMP
361 0 : nsParser::CancelParsingEvents()
362 : {
363 0 : if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) {
364 0 : NS_ASSERTION(mContinueEvent, "mContinueEvent is null");
365 : // Revoke the pending continue parsing event
366 0 : mContinueEvent = nullptr;
367 0 : mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
368 : }
369 0 : return NS_OK;
370 : }
371 :
372 : ////////////////////////////////////////////////////////////////////////
373 :
374 : /**
375 : * Evalutes EXPR1 and EXPR2 exactly once each, in that order. Stores the value
376 : * of EXPR2 in RV is EXPR2 fails, otherwise RV contains the result of EXPR1
377 : * (which could be success or failure).
378 : *
379 : * To understand the motivation for this construct, consider these example
380 : * methods:
381 : *
382 : * nsresult nsSomething::DoThatThing(nsIWhatever* obj) {
383 : * nsresult rv = NS_OK;
384 : * ...
385 : * return obj->DoThatThing();
386 : * NS_ENSURE_SUCCESS(rv, rv);
387 : * ...
388 : * return rv;
389 : * }
390 : *
391 : * void nsCaller::MakeThingsHappen() {
392 : * return mSomething->DoThatThing(mWhatever);
393 : * }
394 : *
395 : * Suppose, for whatever reason*, we want to shift responsibility for calling
396 : * mWhatever->DoThatThing() from nsSomething::DoThatThing up to
397 : * nsCaller::MakeThingsHappen. We might rewrite the two methods as follows:
398 : *
399 : * nsresult nsSomething::DoThatThing() {
400 : * nsresult rv = NS_OK;
401 : * ...
402 : * ...
403 : * return rv;
404 : * }
405 : *
406 : * void nsCaller::MakeThingsHappen() {
407 : * nsresult rv;
408 : * PREFER_LATTER_ERROR_CODE(mSomething->DoThatThing(),
409 : * mWhatever->DoThatThing(),
410 : * rv);
411 : * return rv;
412 : * }
413 : *
414 : * *Possible reasons include: nsCaller doesn't want to give mSomething access
415 : * to mWhatever, nsCaller wants to guarantee that mWhatever->DoThatThing() will
416 : * be called regardless of how nsSomething::DoThatThing behaves, &c.
417 : */
418 : #define PREFER_LATTER_ERROR_CODE(EXPR1, EXPR2, RV) { \
419 : nsresult RV##__temp = EXPR1; \
420 : RV = EXPR2; \
421 : if (NS_FAILED(RV)) { \
422 : RV = RV##__temp; \
423 : } \
424 : }
425 :
426 : /**
427 : * This gets called just prior to the model actually
428 : * being constructed. It's important to make this the
429 : * last thing that happens right before parsing, so we
430 : * can delay until the last moment the resolution of
431 : * which DTD to use (unless of course we're assigned one).
432 : */
433 : nsresult
434 45 : nsParser::WillBuildModel(nsString& aFilename)
435 : {
436 45 : if (!mParserContext)
437 0 : return NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT;
438 :
439 45 : if (eUnknownDetect != mParserContext->mAutoDetectStatus)
440 22 : return NS_OK;
441 :
442 46 : if (eDTDMode_unknown == mParserContext->mDTDMode ||
443 23 : eDTDMode_autodetect == mParserContext->mDTDMode) {
444 23 : if (mIsAboutBlank) {
445 1 : mParserContext->mDTDMode = eDTDMode_quirks;
446 1 : mParserContext->mDocType = eHTML_Quirks;
447 : } else {
448 22 : mParserContext->mDTDMode = eDTDMode_full_standards;
449 22 : mParserContext->mDocType = eXML;
450 : }
451 : } // else XML fragment with nested parser context
452 :
453 23 : NS_ASSERTION(!mDTD || !mParserContext->mPrevContext,
454 : "Clobbering DTD for non-root parser context!");
455 23 : mDTD = FindSuitableDTD(*mParserContext);
456 23 : NS_ENSURE_TRUE(mDTD, NS_ERROR_OUT_OF_MEMORY);
457 :
458 : nsITokenizer* tokenizer;
459 23 : nsresult rv = mParserContext->GetTokenizer(mDTD, mSink, tokenizer);
460 23 : NS_ENSURE_SUCCESS(rv, rv);
461 :
462 23 : rv = mDTD->WillBuildModel(*mParserContext, tokenizer, mSink);
463 23 : nsresult sinkResult = mSink->WillBuildModel(mDTD->GetMode());
464 : // nsIDTD::WillBuildModel used to be responsible for calling
465 : // nsIContentSink::WillBuildModel, but that obligation isn't expressible
466 : // in the nsIDTD interface itself, so it's sounder and simpler to give that
467 : // responsibility back to the parser. The former behavior of the DTD was to
468 : // NS_ENSURE_SUCCESS the sink WillBuildModel call, so if the sink returns
469 : // failure we should use sinkResult instead of rv, to preserve the old error
470 : // handling behavior of the DTD:
471 23 : return NS_FAILED(sinkResult) ? sinkResult : rv;
472 : }
473 :
474 : /**
475 : * This gets called when the parser is done with its input.
476 : * Note that the parser may have been called recursively, so we
477 : * have to check for a prev. context before closing out the DTD/sink.
478 : */
479 : nsresult
480 23 : nsParser::DidBuildModel(nsresult anErrorCode)
481 : {
482 23 : nsresult result = anErrorCode;
483 :
484 23 : if (IsComplete()) {
485 23 : if (mParserContext && !mParserContext->mPrevContext) {
486 : // Let sink know if we're about to end load because we've been terminated.
487 : // In that case we don't want it to run deferred scripts.
488 23 : bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING;
489 23 : if (mDTD && mSink) {
490 23 : nsresult dtdResult = mDTD->DidBuildModel(anErrorCode),
491 23 : sinkResult = mSink->DidBuildModel(terminated);
492 : // nsIDTD::DidBuildModel used to be responsible for calling
493 : // nsIContentSink::DidBuildModel, but that obligation isn't expressible
494 : // in the nsIDTD interface itself, so it's sounder and simpler to give
495 : // that responsibility back to the parser. The former behavior of the
496 : // DTD was to NS_ENSURE_SUCCESS the sink DidBuildModel call, so if the
497 : // sink returns failure we should use sinkResult instead of dtdResult,
498 : // to preserve the old error handling behavior of the DTD:
499 23 : result = NS_FAILED(sinkResult) ? sinkResult : dtdResult;
500 : }
501 :
502 : //Ref. to bug 61462.
503 23 : mParserContext->mRequest = nullptr;
504 : }
505 : }
506 :
507 23 : return result;
508 : }
509 :
510 : /**
511 : * This method adds a new parser context to the list,
512 : * pushing the current one to the next position.
513 : *
514 : * @param ptr to new context
515 : */
516 : void
517 23 : nsParser::PushContext(CParserContext& aContext)
518 : {
519 23 : NS_ASSERTION(aContext.mPrevContext == mParserContext,
520 : "Trying to push a context whose previous context differs from "
521 : "the current parser context.");
522 23 : mParserContext = &aContext;
523 23 : }
524 :
525 : /**
526 : * This method pops the topmost context off the stack,
527 : * returning it to the user. The next context (if any)
528 : * becomes the current context.
529 : * @update gess7/22/98
530 : * @return prev. context
531 : */
532 : CParserContext*
533 0 : nsParser::PopContext()
534 : {
535 0 : CParserContext* oldContext = mParserContext;
536 0 : if (oldContext) {
537 0 : mParserContext = oldContext->mPrevContext;
538 0 : if (mParserContext) {
539 : // If the old context was blocked, propagate the blocked state
540 : // back to the new one. Also, propagate the stream listener state
541 : // but don't override onStop state to guarantee the call to DidBuildModel().
542 0 : if (mParserContext->mStreamListenerState != eOnStop) {
543 0 : mParserContext->mStreamListenerState = oldContext->mStreamListenerState;
544 : }
545 : }
546 : }
547 0 : return oldContext;
548 : }
549 :
550 : /**
551 : * Call this when you want control whether or not the parser will parse
552 : * and tokenize input (TRUE), or whether it just caches input to be
553 : * parsed later (FALSE).
554 : *
555 : * @param aState determines whether we parse/tokenize or just cache.
556 : * @return current state
557 : */
558 : void
559 0 : nsParser::SetUnusedInput(nsString& aBuffer)
560 : {
561 0 : mUnusedInput = aBuffer;
562 0 : }
563 :
564 : /**
565 : * Call this when you want to *force* the parser to terminate the
566 : * parsing process altogether. This is binary -- so once you terminate
567 : * you can't resume without restarting altogether.
568 : */
569 : NS_IMETHODIMP
570 0 : nsParser::Terminate(void)
571 : {
572 : // We should only call DidBuildModel once, so don't do anything if this is
573 : // the second time that Terminate has been called.
574 0 : if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
575 0 : return NS_OK;
576 : }
577 :
578 0 : nsresult result = NS_OK;
579 : // XXX - [ until we figure out a way to break parser-sink circularity ]
580 : // Hack - Hold a reference until we are completely done...
581 0 : nsCOMPtr<nsIParser> kungFuDeathGrip(this);
582 0 : mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING;
583 :
584 : // CancelParsingEvents must be called to avoid leaking the nsParser object
585 : // @see bug 108049
586 : // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then CancelParsingEvents
587 : // will reset it so DidBuildModel will call DidBuildModel on the DTD. Note:
588 : // The IsComplete() call inside of DidBuildModel looks at the pendingContinueEvents flag.
589 0 : CancelParsingEvents();
590 :
591 : // If we got interrupted in the middle of a document.write, then we might
592 : // have more than one parser context on our parsercontext stack. This has
593 : // the effect of making DidBuildModel a no-op, meaning that we never call
594 : // our sink's DidBuildModel and break the reference cycle, causing a leak.
595 : // Since we're getting terminated, we manually clean up our context stack.
596 0 : while (mParserContext && mParserContext->mPrevContext) {
597 0 : CParserContext *prev = mParserContext->mPrevContext;
598 0 : delete mParserContext;
599 0 : mParserContext = prev;
600 : }
601 :
602 0 : if (mDTD) {
603 0 : mDTD->Terminate();
604 0 : DidBuildModel(result);
605 0 : } else if (mSink) {
606 : // We have no parser context or no DTD yet (so we got terminated before we
607 : // got any data). Manually break the reference cycle with the sink.
608 0 : result = mSink->DidBuildModel(true);
609 0 : NS_ENSURE_SUCCESS(result, result);
610 : }
611 :
612 0 : return NS_OK;
613 : }
614 :
615 : NS_IMETHODIMP
616 0 : nsParser::ContinueInterruptedParsing()
617 : {
618 : // If there are scripts executing, then the content sink is jumping the gun
619 : // (probably due to a synchronous XMLHttpRequest) and will re-enable us
620 : // later, see bug 460706.
621 0 : if (!IsOkToProcessNetworkData()) {
622 0 : return NS_OK;
623 : }
624 :
625 : // If the stream has already finished, there's a good chance
626 : // that we might start closing things down when the parser
627 : // is reenabled. To make sure that we're not deleted across
628 : // the reenabling process, hold a reference to ourselves.
629 0 : nsresult result=NS_OK;
630 0 : nsCOMPtr<nsIParser> kungFuDeathGrip(this);
631 0 : nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
632 :
633 : #ifdef DEBUG
634 0 : if (mBlocked) {
635 0 : NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser.");
636 : }
637 : #endif
638 :
639 0 : bool isFinalChunk = mParserContext &&
640 0 : mParserContext->mStreamListenerState == eOnStop;
641 :
642 0 : mProcessingNetworkData = true;
643 0 : if (sinkDeathGrip) {
644 0 : sinkDeathGrip->WillParse();
645 : }
646 0 : result = ResumeParse(true, isFinalChunk); // Ref. bug 57999
647 0 : mProcessingNetworkData = false;
648 :
649 0 : if (result != NS_OK) {
650 0 : result=mInternalState;
651 : }
652 :
653 0 : return result;
654 : }
655 :
656 : /**
657 : * Stops parsing temporarily. That is, it will prevent the
658 : * parser from building up content model while scripts
659 : * are being loaded (either an external script from a web
660 : * page, or any number of extension content scripts).
661 : */
662 : NS_IMETHODIMP_(void)
663 0 : nsParser::BlockParser()
664 : {
665 0 : mBlocked++;
666 0 : }
667 :
668 : /**
669 : * Open up the parser for tokenization, building up content
670 : * model..etc. However, this method does not resume parsing
671 : * automatically. It's the callers' responsibility to restart
672 : * the parsing engine.
673 : */
674 : NS_IMETHODIMP_(void)
675 0 : nsParser::UnblockParser()
676 : {
677 0 : MOZ_DIAGNOSTIC_ASSERT(mBlocked > 0);
678 0 : if (MOZ_LIKELY(mBlocked > 0)) {
679 0 : mBlocked--;
680 : }
681 0 : }
682 :
683 : NS_IMETHODIMP_(void)
684 0 : nsParser::ContinueInterruptedParsingAsync()
685 : {
686 0 : MOZ_ASSERT(mSink);
687 0 : if (MOZ_LIKELY(mSink)) {
688 0 : mSink->ContinueInterruptedParsingAsync();
689 : }
690 0 : }
691 :
692 : /**
693 : * Call this to query whether the parser is enabled or not.
694 : */
695 : NS_IMETHODIMP_(bool)
696 22 : nsParser::IsParserEnabled()
697 : {
698 22 : return !mBlocked;
699 : }
700 :
701 : /**
702 : * Call this to query whether the parser thinks it's done with parsing.
703 : */
704 : NS_IMETHODIMP_(bool)
705 23 : nsParser::IsComplete()
706 : {
707 23 : return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT);
708 : }
709 :
710 :
711 0 : void nsParser::HandleParserContinueEvent(nsParserContinueEvent *ev)
712 : {
713 : // Ignore any revoked continue events...
714 0 : if (mContinueEvent != ev)
715 0 : return;
716 :
717 0 : mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT;
718 0 : mContinueEvent = nullptr;
719 :
720 0 : NS_ASSERTION(IsOkToProcessNetworkData(),
721 : "Interrupted in the middle of a script?");
722 0 : ContinueInterruptedParsing();
723 : }
724 :
725 : bool
726 0 : nsParser::IsInsertionPointDefined()
727 : {
728 0 : return false;
729 : }
730 :
731 : void
732 0 : nsParser::PushDefinedInsertionPoint()
733 : {
734 0 : }
735 :
736 : void
737 0 : nsParser::PopDefinedInsertionPoint()
738 : {
739 0 : }
740 :
741 : void
742 0 : nsParser::MarkAsNotScriptCreated(const char* aCommand)
743 : {
744 0 : }
745 :
746 : bool
747 0 : nsParser::IsScriptCreated()
748 : {
749 0 : return false;
750 : }
751 :
752 : /**
753 : * This is the main controlling routine in the parsing process.
754 : * Note that it may get called multiple times for the same scanner,
755 : * since this is a pushed based system, and all the tokens may
756 : * not have been consumed by the scanner during a given invocation
757 : * of this method.
758 : */
759 : NS_IMETHODIMP
760 23 : nsParser::Parse(nsIURI* aURL,
761 : nsIRequestObserver* aListener,
762 : void* aKey,
763 : nsDTDMode aMode)
764 : {
765 :
766 23 : NS_PRECONDITION(aURL, "Error: Null URL given");
767 :
768 23 : nsresult result = NS_ERROR_HTMLPARSER_BADURL;
769 23 : mObserver = aListener;
770 :
771 23 : if (aURL) {
772 46 : nsAutoCString spec;
773 23 : nsresult rv = aURL->GetSpec(spec);
774 23 : if (rv != NS_OK) {
775 0 : return rv;
776 : }
777 46 : NS_ConvertUTF8toUTF16 theName(spec);
778 :
779 23 : nsScanner* theScanner = new nsScanner(theName, false);
780 : CParserContext* pc = new CParserContext(mParserContext, theScanner, aKey,
781 23 : mCommand, aListener);
782 23 : if (pc && theScanner) {
783 23 : pc->mMultipart = true;
784 23 : pc->mContextType = CParserContext::eCTURL;
785 23 : pc->mDTDMode = aMode;
786 23 : PushContext(*pc);
787 :
788 23 : result = NS_OK;
789 : } else {
790 0 : result = mInternalState = NS_ERROR_HTMLPARSER_BADCONTEXT;
791 : }
792 : }
793 23 : return result;
794 : }
795 :
796 : /**
797 : * Used by XML fragment parsing below.
798 : *
799 : * @param aSourceBuffer contains a string-full of real content
800 : */
801 : nsresult
802 0 : nsParser::Parse(const nsAString& aSourceBuffer,
803 : void* aKey,
804 : bool aLastCall)
805 : {
806 0 : nsresult result = NS_OK;
807 :
808 : // Don't bother if we're never going to parse this.
809 0 : if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) {
810 0 : return result;
811 : }
812 :
813 0 : if (!aLastCall && aSourceBuffer.IsEmpty()) {
814 : // Nothing is being passed to the parser so return
815 : // immediately. mUnusedInput will get processed when
816 : // some data is actually passed in.
817 : // But if this is the last call, make sure to finish up
818 : // stuff correctly.
819 0 : return result;
820 : }
821 :
822 : // Maintain a reference to ourselves so we don't go away
823 : // till we're completely done.
824 0 : nsCOMPtr<nsIParser> kungFuDeathGrip(this);
825 :
826 0 : if (aLastCall || !aSourceBuffer.IsEmpty() || !mUnusedInput.IsEmpty()) {
827 : // Note: The following code will always find the parser context associated
828 : // with the given key, even if that context has been suspended (e.g., for
829 : // another document.write call). This doesn't appear to be exactly what IE
830 : // does in the case where this happens, but this makes more sense.
831 0 : CParserContext* pc = mParserContext;
832 0 : while (pc && pc->mKey != aKey) {
833 0 : pc = pc->mPrevContext;
834 : }
835 :
836 0 : if (!pc) {
837 : // Only make a new context if we don't have one, OR if we do, but has a
838 : // different context key.
839 0 : nsScanner* theScanner = new nsScanner(mUnusedInput);
840 0 : NS_ENSURE_TRUE(theScanner, NS_ERROR_OUT_OF_MEMORY);
841 :
842 0 : eAutoDetectResult theStatus = eUnknownDetect;
843 :
844 0 : if (mParserContext &&
845 0 : mParserContext->mMimeType.EqualsLiteral("application/xml")) {
846 : // Ref. Bug 90379
847 0 : NS_ASSERTION(mDTD, "How come the DTD is null?");
848 :
849 0 : if (mParserContext) {
850 0 : theStatus = mParserContext->mAutoDetectStatus;
851 : // Added this to fix bug 32022.
852 : }
853 : }
854 :
855 0 : pc = new CParserContext(mParserContext, theScanner, aKey, mCommand,
856 0 : 0, theStatus, aLastCall);
857 0 : NS_ENSURE_TRUE(pc, NS_ERROR_OUT_OF_MEMORY);
858 :
859 0 : PushContext(*pc);
860 :
861 0 : pc->mMultipart = !aLastCall; // By default
862 0 : if (pc->mPrevContext) {
863 0 : pc->mMultipart |= pc->mPrevContext->mMultipart;
864 : }
865 :
866 : // Start fix bug 40143
867 0 : if (pc->mMultipart) {
868 0 : pc->mStreamListenerState = eOnDataAvail;
869 0 : if (pc->mScanner) {
870 0 : pc->mScanner->SetIncremental(true);
871 : }
872 : } else {
873 0 : pc->mStreamListenerState = eOnStop;
874 0 : if (pc->mScanner) {
875 0 : pc->mScanner->SetIncremental(false);
876 : }
877 : }
878 : // end fix for 40143
879 :
880 0 : pc->mContextType=CParserContext::eCTString;
881 0 : pc->SetMimeType(NS_LITERAL_CSTRING("application/xml"));
882 0 : pc->mDTDMode = eDTDMode_full_standards;
883 :
884 0 : mUnusedInput.Truncate();
885 :
886 0 : pc->mScanner->Append(aSourceBuffer);
887 : // Do not interrupt document.write() - bug 95487
888 0 : result = ResumeParse(false, false, false);
889 : } else {
890 0 : pc->mScanner->Append(aSourceBuffer);
891 0 : if (!pc->mPrevContext) {
892 : // Set stream listener state to eOnStop, on the final context - Fix 68160,
893 : // to guarantee DidBuildModel() call - Fix 36148
894 0 : if (aLastCall) {
895 0 : pc->mStreamListenerState = eOnStop;
896 0 : pc->mScanner->SetIncremental(false);
897 : }
898 :
899 0 : if (pc == mParserContext) {
900 : // If pc is not mParserContext, then this call to ResumeParse would
901 : // do the wrong thing and try to continue parsing using
902 : // mParserContext. We need to wait to actually resume parsing on pc.
903 0 : ResumeParse(false, false, false);
904 : }
905 : }
906 : }
907 : }
908 :
909 0 : return result;
910 : }
911 :
912 : NS_IMETHODIMP
913 0 : nsParser::ParseFragment(const nsAString& aSourceBuffer,
914 : nsTArray<nsString>& aTagStack)
915 : {
916 0 : nsresult result = NS_OK;
917 0 : nsAutoString theContext;
918 0 : uint32_t theCount = aTagStack.Length();
919 0 : uint32_t theIndex = 0;
920 :
921 : // Disable observers for fragments
922 0 : mFlags &= ~NS_PARSER_FLAG_OBSERVERS_ENABLED;
923 :
924 0 : for (theIndex = 0; theIndex < theCount; theIndex++) {
925 0 : theContext.Append('<');
926 0 : theContext.Append(aTagStack[theCount - theIndex - 1]);
927 0 : theContext.Append('>');
928 : }
929 :
930 0 : if (theCount == 0) {
931 : // Ensure that the buffer is not empty. Because none of the DTDs care
932 : // about leading whitespace, this doesn't change the result.
933 0 : theContext.Assign(' ');
934 : }
935 :
936 : // First, parse the context to build up the DTD's tag stack. Note that we
937 : // pass false for the aLastCall parameter.
938 : result = Parse(theContext,
939 : (void*)&theContext,
940 0 : false);
941 0 : if (NS_FAILED(result)) {
942 0 : mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
943 0 : return result;
944 : }
945 :
946 0 : if (!mSink) {
947 : // Parse must have failed in the XML case and so the sink was killed.
948 0 : return NS_ERROR_HTMLPARSER_STOPPARSING;
949 : }
950 :
951 0 : nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink);
952 0 : NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink");
953 :
954 0 : fragSink->WillBuildContent();
955 : // Now, parse the actual content. Note that this is the last call
956 : // for HTML content, but for XML, we will want to build and parse
957 : // the end tags. However, if tagStack is empty, it's the last call
958 : // for XML as well.
959 0 : if (theCount == 0) {
960 : result = Parse(aSourceBuffer,
961 : &theContext,
962 0 : true);
963 0 : fragSink->DidBuildContent();
964 : } else {
965 : // Add an end tag chunk, so expat will read the whole source buffer,
966 : // and not worry about ']]' etc.
967 0 : result = Parse(aSourceBuffer + NS_LITERAL_STRING("</"),
968 : &theContext,
969 0 : false);
970 0 : fragSink->DidBuildContent();
971 :
972 0 : if (NS_SUCCEEDED(result)) {
973 0 : nsAutoString endContext;
974 0 : for (theIndex = 0; theIndex < theCount; theIndex++) {
975 : // we already added an end tag chunk above
976 0 : if (theIndex > 0) {
977 0 : endContext.AppendLiteral("</");
978 : }
979 :
980 0 : nsString& thisTag = aTagStack[theIndex];
981 : // was there an xmlns=?
982 0 : int32_t endOfTag = thisTag.FindChar(char16_t(' '));
983 0 : if (endOfTag == -1) {
984 0 : endContext.Append(thisTag);
985 : } else {
986 0 : endContext.Append(Substring(thisTag,0,endOfTag));
987 : }
988 :
989 0 : endContext.Append('>');
990 : }
991 :
992 : result = Parse(endContext,
993 : &theContext,
994 0 : true);
995 : }
996 : }
997 :
998 0 : mFlags |= NS_PARSER_FLAG_OBSERVERS_ENABLED;
999 :
1000 0 : return result;
1001 : }
1002 :
1003 : /**
1004 : * This routine is called to cause the parser to continue parsing its
1005 : * underlying stream. This call allows the parse process to happen in
1006 : * chunks, such as when the content is push based, and we need to parse in
1007 : * pieces.
1008 : *
1009 : * An interesting change in how the parser gets used has led us to add extra
1010 : * processing to this method. The case occurs when the parser is blocked in
1011 : * one context, and gets a parse(string) call in another context. In this
1012 : * case, the parserContexts are linked. No problem.
1013 : *
1014 : * The problem is that Parse(string) assumes that it can proceed unabated,
1015 : * but if the parser is already blocked that assumption is false. So we
1016 : * needed to add a mechanism here to allow the parser to continue to process
1017 : * (the pop and free) contexts until 1) it get's blocked again; 2) it runs
1018 : * out of contexts.
1019 : *
1020 : *
1021 : * @param allowItertion : set to true if non-script resumption is requested
1022 : * @param aIsFinalChunk : tells us when the last chunk of data is provided.
1023 : * @return error code -- 0 if ok, non-zero if error.
1024 : */
1025 : nsresult
1026 45 : nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk,
1027 : bool aCanInterrupt)
1028 : {
1029 45 : nsresult result = NS_OK;
1030 :
1031 45 : if (!mBlocked && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
1032 :
1033 45 : result = WillBuildModel(mParserContext->mScanner->GetFilename());
1034 45 : if (NS_FAILED(result)) {
1035 0 : mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE;
1036 0 : return result;
1037 : }
1038 :
1039 45 : if (mDTD) {
1040 45 : mSink->WillResume();
1041 45 : bool theIterationIsOk = true;
1042 :
1043 89 : while (result == NS_OK && theIterationIsOk) {
1044 45 : if (!mUnusedInput.IsEmpty() && mParserContext->mScanner) {
1045 : // -- Ref: Bug# 22485 --
1046 : // Insert the unused input into the source buffer
1047 : // as if it was read from the input stream.
1048 : // Adding UngetReadable() per vidur!!
1049 0 : mParserContext->mScanner->UngetReadable(mUnusedInput);
1050 0 : mUnusedInput.Truncate(0);
1051 : }
1052 :
1053 : // Only allow parsing to be interrupted in the subsequent call to
1054 : // build model.
1055 45 : nsresult theTokenizerResult = (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE)
1056 45 : ? Tokenize(aIsFinalChunk)
1057 45 : : NS_OK;
1058 45 : result = BuildModel();
1059 :
1060 45 : if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) {
1061 0 : PostContinueEvent();
1062 : }
1063 :
1064 45 : theIterationIsOk = theTokenizerResult != NS_ERROR_HTMLPARSER_EOF &&
1065 0 : result != NS_ERROR_HTMLPARSER_INTERRUPTED;
1066 :
1067 : // Make sure not to stop parsing too early. Therefore, before shutting
1068 : // down the parser, it's important to check whether the input buffer
1069 : // has been scanned to completion (theTokenizerResult should be kEOF).
1070 : // kEOF -> End of buffer.
1071 :
1072 : // If we're told to block the parser, we disable all further parsing
1073 : // (and cache any data coming in) until the parser is re-enabled.
1074 45 : if (NS_ERROR_HTMLPARSER_BLOCK == result) {
1075 0 : mSink->WillInterrupt();
1076 0 : if (!mBlocked) {
1077 : // If we were blocked by a recursive invocation, don't re-block.
1078 0 : BlockParser();
1079 : }
1080 0 : return NS_OK;
1081 : }
1082 45 : if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
1083 : // Note: Parser Terminate() calls DidBuildModel.
1084 0 : if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
1085 0 : DidBuildModel(mStreamStatus);
1086 0 : mInternalState = result;
1087 : }
1088 :
1089 0 : return NS_OK;
1090 : }
1091 45 : if ((NS_OK == result &&
1092 0 : theTokenizerResult == NS_ERROR_HTMLPARSER_EOF) ||
1093 0 : result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
1094 : bool theContextIsStringBased =
1095 45 : CParserContext::eCTString == mParserContext->mContextType;
1096 :
1097 67 : if (mParserContext->mStreamListenerState == eOnStop ||
1098 44 : !mParserContext->mMultipart || theContextIsStringBased) {
1099 23 : if (!mParserContext->mPrevContext) {
1100 23 : if (mParserContext->mStreamListenerState == eOnStop) {
1101 23 : DidBuildModel(mStreamStatus);
1102 23 : return NS_OK;
1103 : }
1104 : } else {
1105 0 : CParserContext* theContext = PopContext();
1106 0 : if (theContext) {
1107 0 : theIterationIsOk = allowIteration && theContextIsStringBased;
1108 0 : if (theContext->mCopyUnused) {
1109 0 : if (!theContext->mScanner->CopyUnusedData(mUnusedInput)) {
1110 0 : mInternalState = NS_ERROR_OUT_OF_MEMORY;
1111 : }
1112 : }
1113 :
1114 0 : delete theContext;
1115 : }
1116 :
1117 0 : result = mInternalState;
1118 0 : aIsFinalChunk = mParserContext &&
1119 0 : mParserContext->mStreamListenerState == eOnStop;
1120 : // ...then intentionally fall through to mSink->WillInterrupt()...
1121 : }
1122 : }
1123 : }
1124 :
1125 22 : if (theTokenizerResult == NS_ERROR_HTMLPARSER_EOF ||
1126 0 : result == NS_ERROR_HTMLPARSER_INTERRUPTED) {
1127 22 : result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
1128 22 : mSink->WillInterrupt();
1129 : }
1130 : }
1131 : } else {
1132 0 : mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD;
1133 : }
1134 : }
1135 :
1136 22 : return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result;
1137 : }
1138 :
1139 : /**
1140 : * This is where we loop over the tokens created in the
1141 : * tokenization phase, and try to make sense out of them.
1142 : */
1143 : nsresult
1144 45 : nsParser::BuildModel()
1145 : {
1146 45 : nsITokenizer* theTokenizer = nullptr;
1147 :
1148 45 : nsresult result = NS_OK;
1149 45 : if (mParserContext) {
1150 45 : result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1151 : }
1152 :
1153 45 : if (NS_SUCCEEDED(result)) {
1154 45 : if (mDTD) {
1155 45 : result = mDTD->BuildModel(theTokenizer, mSink);
1156 : }
1157 : } else {
1158 0 : mInternalState = result = NS_ERROR_HTMLPARSER_BADTOKENIZER;
1159 : }
1160 45 : return result;
1161 : }
1162 :
1163 : /*******************************************************************
1164 : These methods are used to talk to the netlib system...
1165 : *******************************************************************/
1166 :
1167 : nsresult
1168 23 : nsParser::OnStartRequest(nsIRequest *request, nsISupports* aContext)
1169 : {
1170 23 : NS_PRECONDITION(eNone == mParserContext->mStreamListenerState,
1171 : "Parser's nsIStreamListener API was not setup "
1172 : "correctly in constructor.");
1173 23 : if (mObserver) {
1174 0 : mObserver->OnStartRequest(request, aContext);
1175 : }
1176 23 : mParserContext->mStreamListenerState = eOnStart;
1177 23 : mParserContext->mAutoDetectStatus = eUnknownDetect;
1178 23 : mParserContext->mRequest = request;
1179 :
1180 23 : NS_ASSERTION(!mParserContext->mPrevContext,
1181 : "Clobbering DTD for non-root parser context!");
1182 23 : mDTD = nullptr;
1183 :
1184 : nsresult rv;
1185 46 : nsAutoCString contentType;
1186 46 : nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
1187 23 : if (channel) {
1188 23 : rv = channel->GetContentType(contentType);
1189 23 : if (NS_SUCCEEDED(rv)) {
1190 23 : mParserContext->SetMimeType(contentType);
1191 : }
1192 : }
1193 :
1194 23 : rv = NS_OK;
1195 :
1196 46 : return rv;
1197 : }
1198 :
1199 : static bool
1200 0 : ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, int32_t aLen,
1201 : nsCString& oCharset)
1202 : {
1203 : // This code is rather pointless to have. Might as well reuse expat as
1204 : // seen in nsHtml5StreamParser. -- hsivonen
1205 0 : oCharset.Truncate();
1206 0 : if ((aLen >= 5) &&
1207 0 : ('<' == aBytes[0]) &&
1208 0 : ('?' == aBytes[1]) &&
1209 0 : ('x' == aBytes[2]) &&
1210 0 : ('m' == aBytes[3]) &&
1211 0 : ('l' == aBytes[4])) {
1212 : int32_t i;
1213 0 : bool versionFound = false, encodingFound = false;
1214 0 : for (i = 6; i < aLen && !encodingFound; ++i) {
1215 : // end of XML declaration?
1216 0 : if ((((char*) aBytes)[i] == '?') &&
1217 0 : ((i + 1) < aLen) &&
1218 0 : (((char*) aBytes)[i + 1] == '>')) {
1219 0 : break;
1220 : }
1221 : // Version is required.
1222 0 : if (!versionFound) {
1223 : // Want to avoid string comparisons, hence looking for 'n'
1224 : // and only if found check the string leading to it. Not
1225 : // foolproof, but fast.
1226 : // The shortest string allowed before this is (strlen==13):
1227 : // <?xml version
1228 0 : if ((((char*) aBytes)[i] == 'n') &&
1229 0 : (i >= 12) &&
1230 0 : (0 == PL_strncmp("versio", (char*) (aBytes + i - 6), 6))) {
1231 : // Fast forward through version
1232 0 : char q = 0;
1233 0 : for (++i; i < aLen; ++i) {
1234 0 : char qi = ((char*) aBytes)[i];
1235 0 : if (qi == '\'' || qi == '"') {
1236 0 : if (q && q == qi) {
1237 : // ending quote
1238 0 : versionFound = true;
1239 0 : break;
1240 : } else {
1241 : // Starting quote
1242 0 : q = qi;
1243 : }
1244 : }
1245 : }
1246 : }
1247 : } else {
1248 : // encoding must follow version
1249 : // Want to avoid string comparisons, hence looking for 'g'
1250 : // and only if found check the string leading to it. Not
1251 : // foolproof, but fast.
1252 : // The shortest allowed string before this (strlen==26):
1253 : // <?xml version="1" encoding
1254 0 : if ((((char*) aBytes)[i] == 'g') && (i >= 25) && (0 == PL_strncmp(
1255 0 : "encodin", (char*) (aBytes + i - 7), 7))) {
1256 0 : int32_t encStart = 0;
1257 0 : char q = 0;
1258 0 : for (++i; i < aLen; ++i) {
1259 0 : char qi = ((char*) aBytes)[i];
1260 0 : if (qi == '\'' || qi == '"') {
1261 0 : if (q && q == qi) {
1262 0 : int32_t count = i - encStart;
1263 : // encoding value is invalid if it is UTF-16
1264 0 : if (count > 0 && PL_strncasecmp("UTF-16",
1265 0 : (char*) (aBytes + encStart), count)) {
1266 0 : oCharset.Assign((char*) (aBytes + encStart), count);
1267 : }
1268 0 : encodingFound = true;
1269 0 : break;
1270 : } else {
1271 0 : encStart = i + 1;
1272 0 : q = qi;
1273 : }
1274 : }
1275 : }
1276 : }
1277 : } // if (!versionFound)
1278 : } // for
1279 : }
1280 0 : return !oCharset.IsEmpty();
1281 : }
1282 :
1283 : inline char
1284 : GetNextChar(nsACString::const_iterator& aStart,
1285 : nsACString::const_iterator& aEnd)
1286 : {
1287 : NS_ASSERTION(aStart != aEnd, "end of buffer");
1288 : return (++aStart != aEnd) ? *aStart : '\0';
1289 : }
1290 :
1291 : static nsresult
1292 0 : NoOpParserWriteFunc(nsIInputStream* in,
1293 : void* closure,
1294 : const char* fromRawSegment,
1295 : uint32_t toOffset,
1296 : uint32_t count,
1297 : uint32_t *writeCount)
1298 : {
1299 0 : *writeCount = count;
1300 0 : return NS_OK;
1301 : }
1302 :
1303 : typedef struct {
1304 : bool mNeedCharsetCheck;
1305 : nsParser* mParser;
1306 : nsScanner* mScanner;
1307 : nsIRequest* mRequest;
1308 : } ParserWriteStruct;
1309 :
1310 : /*
1311 : * This function is invoked as a result of a call to a stream's
1312 : * ReadSegments() method. It is called for each contiguous buffer
1313 : * of data in the underlying stream or pipe. Using ReadSegments
1314 : * allows us to avoid copying data to read out of the stream.
1315 : */
1316 : static nsresult
1317 22 : ParserWriteFunc(nsIInputStream* in,
1318 : void* closure,
1319 : const char* fromRawSegment,
1320 : uint32_t toOffset,
1321 : uint32_t count,
1322 : uint32_t *writeCount)
1323 : {
1324 : nsresult result;
1325 22 : ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure);
1326 : const unsigned char* buf =
1327 22 : reinterpret_cast<const unsigned char*> (fromRawSegment);
1328 22 : uint32_t theNumRead = count;
1329 :
1330 22 : if (!pws) {
1331 0 : return NS_ERROR_FAILURE;
1332 : }
1333 :
1334 22 : if (pws->mNeedCharsetCheck) {
1335 22 : pws->mNeedCharsetCheck = false;
1336 : int32_t source;
1337 22 : auto preferred = pws->mParser->GetDocumentCharset(source);
1338 :
1339 : // This code was bogus when I found it. It expects the BOM or the XML
1340 : // declaration to be entirely in the first network buffer. -- hsivonen
1341 : const Encoding* encoding;
1342 : size_t bomLength;
1343 22 : Tie(encoding, bomLength) = Encoding::ForBOM(MakeSpan(buf, count));
1344 : Unused << bomLength;
1345 22 : if (encoding) {
1346 : // The decoder will swallow the BOM. The UTF-16 will re-sniff for
1347 : // endianness. The value of preferred is now "UTF-8", "UTF-16LE"
1348 : // or "UTF-16BE".
1349 0 : preferred = WrapNotNull(encoding);
1350 0 : source = kCharsetFromByteOrderMark;
1351 22 : } else if (source < kCharsetFromChannel) {
1352 0 : nsAutoCString declCharset;
1353 :
1354 0 : if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) {
1355 0 : encoding = Encoding::ForLabel(declCharset);
1356 0 : if (encoding) {
1357 0 : preferred = WrapNotNull(encoding);
1358 0 : source = kCharsetFromMetaTag;
1359 : }
1360 : }
1361 : }
1362 :
1363 22 : pws->mParser->SetDocumentCharset(preferred, source);
1364 22 : pws->mParser->SetSinkCharset(preferred);
1365 :
1366 : }
1367 :
1368 22 : result = pws->mScanner->Append(fromRawSegment, theNumRead);
1369 22 : if (NS_SUCCEEDED(result)) {
1370 22 : *writeCount = count;
1371 : }
1372 :
1373 22 : return result;
1374 : }
1375 :
1376 : nsresult
1377 22 : nsParser::OnDataAvailable(nsIRequest *request, nsISupports* aContext,
1378 : nsIInputStream *pIStream, uint64_t sourceOffset,
1379 : uint32_t aLength)
1380 : {
1381 22 : NS_PRECONDITION((eOnStart == mParserContext->mStreamListenerState ||
1382 : eOnDataAvail == mParserContext->mStreamListenerState),
1383 : "Error: OnStartRequest() must be called before OnDataAvailable()");
1384 22 : NS_PRECONDITION(NS_InputStreamIsBuffered(pIStream),
1385 : "Must have a buffered input stream");
1386 :
1387 22 : nsresult rv = NS_OK;
1388 :
1389 22 : if (mIsAboutBlank) {
1390 0 : MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank");
1391 : // ... but if an extension tries to feed us data for about:blank in a
1392 : // release build, silently ignore the data.
1393 : uint32_t totalRead;
1394 : rv = pIStream->ReadSegments(NoOpParserWriteFunc,
1395 : nullptr,
1396 : aLength,
1397 : &totalRead);
1398 : return rv;
1399 : }
1400 :
1401 22 : CParserContext *theContext = mParserContext;
1402 :
1403 22 : while (theContext && theContext->mRequest != request) {
1404 0 : theContext = theContext->mPrevContext;
1405 : }
1406 :
1407 22 : if (theContext) {
1408 22 : theContext->mStreamListenerState = eOnDataAvail;
1409 :
1410 22 : if (eInvalidDetect == theContext->mAutoDetectStatus) {
1411 0 : if (theContext->mScanner) {
1412 0 : nsScannerIterator iter;
1413 0 : theContext->mScanner->EndReading(iter);
1414 0 : theContext->mScanner->SetPosition(iter, true);
1415 : }
1416 : }
1417 :
1418 : uint32_t totalRead;
1419 : ParserWriteStruct pws;
1420 22 : pws.mNeedCharsetCheck = true;
1421 22 : pws.mParser = this;
1422 22 : pws.mScanner = theContext->mScanner;
1423 22 : pws.mRequest = request;
1424 :
1425 22 : rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead);
1426 22 : if (NS_FAILED(rv)) {
1427 0 : return rv;
1428 : }
1429 :
1430 22 : if (IsOkToProcessNetworkData()) {
1431 44 : nsCOMPtr<nsIParser> kungFuDeathGrip(this);
1432 44 : nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink);
1433 22 : mProcessingNetworkData = true;
1434 22 : if (sinkDeathGrip) {
1435 22 : sinkDeathGrip->WillParse();
1436 : }
1437 22 : rv = ResumeParse();
1438 22 : mProcessingNetworkData = false;
1439 : }
1440 : } else {
1441 0 : rv = NS_ERROR_UNEXPECTED;
1442 : }
1443 :
1444 22 : return rv;
1445 : }
1446 :
1447 : /**
1448 : * This is called by the networking library once the last block of data
1449 : * has been collected from the net.
1450 : */
1451 : nsresult
1452 23 : nsParser::OnStopRequest(nsIRequest *request, nsISupports* aContext,
1453 : nsresult status)
1454 : {
1455 23 : nsresult rv = NS_OK;
1456 :
1457 23 : CParserContext *pc = mParserContext;
1458 23 : while (pc) {
1459 23 : if (pc->mRequest == request) {
1460 23 : pc->mStreamListenerState = eOnStop;
1461 23 : pc->mScanner->SetIncremental(false);
1462 23 : break;
1463 : }
1464 :
1465 0 : pc = pc->mPrevContext;
1466 : }
1467 :
1468 23 : mStreamStatus = status;
1469 :
1470 23 : if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) {
1471 23 : mProcessingNetworkData = true;
1472 23 : if (mSink) {
1473 23 : mSink->WillParse();
1474 : }
1475 23 : rv = ResumeParse(true, true);
1476 23 : mProcessingNetworkData = false;
1477 : }
1478 :
1479 : // If the parser isn't enabled, we don't finish parsing till
1480 : // it is reenabled.
1481 :
1482 :
1483 : // XXX Should we wait to notify our observers as well if the
1484 : // parser isn't yet enabled?
1485 23 : if (mObserver) {
1486 0 : mObserver->OnStopRequest(request, aContext, status);
1487 : }
1488 :
1489 23 : return rv;
1490 : }
1491 :
1492 :
1493 : /*******************************************************************
1494 : Here come the tokenization methods...
1495 : *******************************************************************/
1496 :
1497 :
1498 : /**
1499 : * Part of the code sandwich, this gets called right before
1500 : * the tokenization process begins. The main reason for
1501 : * this call is to allow the delegate to do initialization.
1502 : */
1503 : bool
1504 45 : nsParser::WillTokenize(bool aIsFinalChunk)
1505 : {
1506 45 : if (!mParserContext) {
1507 0 : return true;
1508 : }
1509 :
1510 : nsITokenizer* theTokenizer;
1511 45 : nsresult result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1512 45 : NS_ENSURE_SUCCESS(result, false);
1513 45 : return NS_SUCCEEDED(theTokenizer->WillTokenize(aIsFinalChunk));
1514 : }
1515 :
1516 :
1517 : /**
1518 : * This is the primary control routine to consume tokens.
1519 : * It iteratively consumes tokens until an error occurs or
1520 : * you run out of data.
1521 : */
1522 45 : nsresult nsParser::Tokenize(bool aIsFinalChunk)
1523 : {
1524 : nsITokenizer* theTokenizer;
1525 :
1526 45 : nsresult result = NS_ERROR_NOT_AVAILABLE;
1527 45 : if (mParserContext) {
1528 45 : result = mParserContext->GetTokenizer(mDTD, mSink, theTokenizer);
1529 : }
1530 :
1531 45 : if (NS_SUCCEEDED(result)) {
1532 45 : bool flushTokens = false;
1533 :
1534 45 : bool killSink = false;
1535 :
1536 45 : WillTokenize(aIsFinalChunk);
1537 45 : while (NS_SUCCEEDED(result)) {
1538 45 : mParserContext->mScanner->Mark();
1539 90 : result = theTokenizer->ConsumeToken(*mParserContext->mScanner,
1540 90 : flushTokens);
1541 45 : if (NS_FAILED(result)) {
1542 45 : mParserContext->mScanner->RewindToMark();
1543 45 : if (NS_ERROR_HTMLPARSER_EOF == result) {
1544 45 : break;
1545 : }
1546 0 : if (NS_ERROR_HTMLPARSER_STOPPARSING == result) {
1547 0 : killSink = true;
1548 0 : result = Terminate();
1549 0 : break;
1550 : }
1551 0 : } else if (flushTokens && (mFlags & NS_PARSER_FLAG_OBSERVERS_ENABLED)) {
1552 : // I added the extra test of NS_PARSER_FLAG_OBSERVERS_ENABLED to fix Bug# 23931.
1553 : // Flush tokens on seeing </SCRIPT> -- Ref: Bug# 22485 --
1554 : // Also remember to update the marked position.
1555 0 : mFlags |= NS_PARSER_FLAG_FLUSH_TOKENS;
1556 0 : mParserContext->mScanner->Mark();
1557 0 : break;
1558 : }
1559 : }
1560 :
1561 45 : if (killSink) {
1562 0 : mSink = nullptr;
1563 : }
1564 : } else {
1565 0 : result = mInternalState = NS_ERROR_HTMLPARSER_BADTOKENIZER;
1566 : }
1567 :
1568 45 : return result;
1569 : }
1570 :
1571 : /**
1572 : * Get the channel associated with this parser
1573 : *
1574 : * @param aChannel out param that will contain the result
1575 : * @return NS_OK if successful
1576 : */
1577 : NS_IMETHODIMP
1578 0 : nsParser::GetChannel(nsIChannel** aChannel)
1579 : {
1580 0 : nsresult result = NS_ERROR_NOT_AVAILABLE;
1581 0 : if (mParserContext && mParserContext->mRequest) {
1582 0 : result = CallQueryInterface(mParserContext->mRequest, aChannel);
1583 : }
1584 0 : return result;
1585 : }
1586 :
1587 : /**
1588 : * Get the DTD associated with this parser
1589 : */
1590 : NS_IMETHODIMP
1591 0 : nsParser::GetDTD(nsIDTD** aDTD)
1592 : {
1593 0 : if (mParserContext) {
1594 0 : NS_IF_ADDREF(*aDTD = mDTD);
1595 : }
1596 :
1597 0 : return NS_OK;
1598 : }
1599 :
1600 : /**
1601 : * Get this as nsIStreamListener
1602 : */
1603 : nsIStreamListener*
1604 1 : nsParser::GetStreamListener()
1605 : {
1606 1 : return this;
1607 : }
|