/src/mozilla-central/parser/html/nsHtml5StreamParser.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim: set sw=2 ts=2 et tw=79: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #include "nsHtml5StreamParser.h" |
8 | | |
9 | | #include "mozilla/DebugOnly.h" |
10 | | #include "mozilla/Encoding.h" |
11 | | #include "nsContentUtils.h" |
12 | | #include "nsCyrillicDetector.h" |
13 | | #include "nsHtml5Tokenizer.h" |
14 | | #include "nsIHttpChannel.h" |
15 | | #include "nsHtml5Parser.h" |
16 | | #include "nsHtml5TreeBuilder.h" |
17 | | #include "nsHtml5AtomTable.h" |
18 | | #include "nsHtml5Module.h" |
19 | | #include "nsHtml5StreamParserPtr.h" |
20 | | #include "nsIDocShell.h" |
21 | | #include "nsIScriptError.h" |
22 | | #include "mozilla/Preferences.h" |
23 | | #include "mozilla/StaticPrefs.h" |
24 | | #include "mozilla/SystemGroup.h" |
25 | | #include "mozilla/UniquePtrExtensions.h" |
26 | | #include "nsHtml5Highlighter.h" |
27 | | #include "expat_config.h" |
28 | | #include "expat.h" |
29 | | #include "nsINestedURI.h" |
30 | | #include "nsCharsetSource.h" |
31 | | #include "nsIWyciwygChannel.h" |
32 | | #include "nsIThreadRetargetableRequest.h" |
33 | | #include "nsPrintfCString.h" |
34 | | #include "nsNetUtil.h" |
35 | | #include "nsUdetXPCOMWrapper.h" |
36 | | #include "nsXULAppAPI.h" |
37 | | #include "mozilla/SchedulerGroup.h" |
38 | | #include "nsJSEnvironment.h" |
39 | | |
40 | | using namespace mozilla; |
41 | | |
42 | | /* |
43 | | * Note that nsHtml5StreamParser implements cycle collecting AddRef and |
44 | | * Release. Therefore, nsHtml5StreamParser must never be refcounted from |
45 | | * the parser thread! |
46 | | * |
47 | | * To work around this limitation, runnables posted by the main thread to the |
48 | | * parser thread hold their reference to the stream parser in an |
49 | | * nsHtml5StreamParserPtr. Upon creation, nsHtml5StreamParserPtr addrefs the |
50 | | * object it holds |
51 | | * just like a regular nsRefPtr. This is OK, since the creation of the |
52 | | * runnable and the nsHtml5StreamParserPtr happens on the main thread. |
53 | | * |
54 | | * When the runnable is done on the parser thread, the destructor of |
55 | | * nsHtml5StreamParserPtr runs there. It doesn't call Release on the held object |
56 | | * directly. Instead, it posts another runnable back to the main thread where |
57 | | * that runnable calls Release on the wrapped object. |
58 | | * |
59 | | * When posting runnables in the other direction, the runnables have to be |
60 | | * created on the main thread when nsHtml5StreamParser is instantiated and |
61 | | * held for the lifetime of the nsHtml5StreamParser. This works, because the |
62 | | * same runnabled can be dispatched multiple times and currently runnables |
63 | | * posted from the parser thread to main thread don't need to wrap any |
64 | | * runnable-specific data. (In the other direction, the runnables most notably |
65 | | * wrap the byte data of the stream.) |
66 | | */ |
67 | | NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5StreamParser) |
68 | | NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5StreamParser) |
69 | | |
70 | 0 | NS_INTERFACE_TABLE_HEAD(nsHtml5StreamParser) |
71 | 0 | NS_INTERFACE_TABLE(nsHtml5StreamParser, nsICharsetDetectionObserver) |
72 | 0 | NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5StreamParser) |
73 | 0 | NS_INTERFACE_MAP_END |
74 | | |
75 | | NS_IMPL_CYCLE_COLLECTION_CLASS(nsHtml5StreamParser) |
76 | | |
77 | 0 | NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsHtml5StreamParser) |
78 | 0 | tmp->DropTimer(); |
79 | 0 | NS_IMPL_CYCLE_COLLECTION_UNLINK(mObserver) |
80 | 0 | NS_IMPL_CYCLE_COLLECTION_UNLINK(mRequest) |
81 | 0 | NS_IMPL_CYCLE_COLLECTION_UNLINK(mOwner) |
82 | 0 | tmp->mExecutorFlusher = nullptr; |
83 | 0 | tmp->mLoadFlusher = nullptr; |
84 | 0 | tmp->mExecutor = nullptr; |
85 | 0 | NS_IMPL_CYCLE_COLLECTION_UNLINK(mChardet) |
86 | 0 | NS_IMPL_CYCLE_COLLECTION_UNLINK_END |
87 | | |
88 | 0 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5StreamParser) |
89 | 0 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mObserver) |
90 | 0 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mRequest) |
91 | 0 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mOwner) |
92 | 0 | // hack: count the strongly owned edge wrapped in the runnable |
93 | 0 | if (tmp->mExecutorFlusher) { |
94 | 0 | NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mExecutorFlusher->mExecutor"); |
95 | 0 | cb.NoteXPCOMChild(static_cast<nsIContentSink*>(tmp->mExecutor)); |
96 | 0 | } |
97 | 0 | // hack: count the strongly owned edge wrapped in the runnable |
98 | 0 | if (tmp->mLoadFlusher) { |
99 | 0 | NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mLoadFlusher->mExecutor"); |
100 | 0 | cb.NoteXPCOMChild(static_cast<nsIContentSink*>(tmp->mExecutor)); |
101 | 0 | } |
102 | 0 | // hack: count self if held by mChardet |
103 | 0 | if (tmp->mChardet) { |
104 | 0 | NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mChardet->mObserver"); |
105 | 0 | cb.NoteXPCOMChild(static_cast<nsICharsetDetectionObserver*>(tmp)); |
106 | 0 | } |
107 | 0 | NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END |
108 | | |
109 | | class nsHtml5ExecutorFlusher : public Runnable |
110 | | { |
111 | | private: |
112 | | RefPtr<nsHtml5TreeOpExecutor> mExecutor; |
113 | | |
114 | | public: |
115 | | explicit nsHtml5ExecutorFlusher(nsHtml5TreeOpExecutor* aExecutor) |
116 | | : Runnable("nsHtml5ExecutorFlusher") |
117 | | , mExecutor(aExecutor) |
118 | 0 | { |
119 | 0 | } |
120 | | NS_IMETHOD Run() override |
121 | 0 | { |
122 | 0 | if (!mExecutor->isInList()) { |
123 | 0 | mExecutor->RunFlushLoop(); |
124 | 0 | } |
125 | 0 | return NS_OK; |
126 | 0 | } |
127 | | }; |
128 | | |
129 | | class nsHtml5LoadFlusher : public Runnable |
130 | | { |
131 | | private: |
132 | | RefPtr<nsHtml5TreeOpExecutor> mExecutor; |
133 | | |
134 | | public: |
135 | | explicit nsHtml5LoadFlusher(nsHtml5TreeOpExecutor* aExecutor) |
136 | | : Runnable("nsHtml5LoadFlusher") |
137 | | , mExecutor(aExecutor) |
138 | 0 | { |
139 | 0 | } |
140 | | NS_IMETHOD Run() override |
141 | 0 | { |
142 | 0 | mExecutor->FlushSpeculativeLoads(); |
143 | 0 | return NS_OK; |
144 | 0 | } |
145 | | }; |
146 | | |
147 | | nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor, |
148 | | nsHtml5Parser* aOwner, |
149 | | eParserMode aMode) |
150 | | : mSniffingLength(0) |
151 | | , mBomState(eBomState::BOM_SNIFFING_NOT_STARTED) |
152 | | , mCharsetSource(kCharsetUninitialized) |
153 | | , mEncoding(WINDOWS_1252_ENCODING) |
154 | | , mReparseForbidden(false) |
155 | | , mLastBuffer(nullptr) // Will be filled when starting |
156 | | , mExecutor(aExecutor) |
157 | | , mTreeBuilder(new nsHtml5TreeBuilder( |
158 | | (aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML) |
159 | | ? nullptr |
160 | | : mExecutor->GetStage(), |
161 | | aMode == NORMAL ? mExecutor->GetStage() : nullptr)) |
162 | | , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, aMode == VIEW_SOURCE_XML)) |
163 | | , mTokenizerMutex("nsHtml5StreamParser mTokenizerMutex") |
164 | | , mOwner(aOwner) |
165 | | , mLastWasCR(false) |
166 | | , mStreamState(eHtml5StreamState::STREAM_NOT_STARTED) |
167 | | , mSpeculating(false) |
168 | | , mAtEOF(false) |
169 | | , mSpeculationMutex("nsHtml5StreamParser mSpeculationMutex") |
170 | | , mSpeculationFailureCount(0) |
171 | | , mTerminated(false) |
172 | | , mInterrupted(false) |
173 | | , mTerminatedMutex("nsHtml5StreamParser mTerminatedMutex") |
174 | | , mEventTarget(nsHtml5Module::GetStreamParserThread()->SerialEventTarget()) |
175 | | , mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor)) |
176 | | , mLoadFlusher(new nsHtml5LoadFlusher(aExecutor)) |
177 | | , mFeedChardet(false) |
178 | | , mInitialEncodingWasFromParentFrame(false) |
179 | | , mHasHadErrors(false) |
180 | | , mFlushTimer(NS_NewTimer()) |
181 | | , mFlushTimerMutex("nsHtml5StreamParser mFlushTimerMutex") |
182 | | , mFlushTimerArmed(false) |
183 | | , mFlushTimerEverFired(false) |
184 | | , mMode(aMode) |
185 | 0 | { |
186 | 0 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
187 | 0 | mFlushTimer->SetTarget(mEventTarget); |
188 | | #ifdef DEBUG |
189 | | mAtomTable.SetPermittedLookupEventTarget(mEventTarget); |
190 | | #endif |
191 | | mTokenizer->setInterner(&mAtomTable); |
192 | 0 | mTokenizer->setEncodingDeclarationHandler(this); |
193 | 0 |
|
194 | 0 | if (aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML) { |
195 | 0 | nsHtml5Highlighter* highlighter = |
196 | 0 | new nsHtml5Highlighter(mExecutor->GetStage()); |
197 | 0 | mTokenizer->EnableViewSource(highlighter); // takes ownership |
198 | 0 | mTreeBuilder->EnableViewSource(highlighter); // doesn't own |
199 | 0 | } |
200 | 0 |
|
201 | 0 | // Chardet instantiation adapted from File. |
202 | 0 | // Chardet is initialized here even if it turns out to be useless |
203 | 0 | // to make the chardet refcount its observer (nsHtml5StreamParser) |
204 | 0 | // on the main thread. |
205 | 0 | nsAutoCString detectorName; |
206 | 0 | Preferences::GetLocalizedCString("intl.charset.detector", detectorName); |
207 | 0 | if (!detectorName.IsEmpty()) { |
208 | 0 | // We recognize one of the three magic strings for the following languages. |
209 | 0 | if (detectorName.EqualsLiteral("ruprob")) { |
210 | 0 | mChardet = new nsRUProbDetector(); |
211 | 0 | } else if (detectorName.EqualsLiteral("ukprob")) { |
212 | 0 | mChardet = new nsUKProbDetector(); |
213 | 0 | } else if (detectorName.EqualsLiteral("ja_parallel_state_machine")) { |
214 | 0 | mChardet = new nsJAPSMDetector(); |
215 | 0 | } else { |
216 | 0 | mChardet = nullptr; |
217 | 0 | } |
218 | 0 | if (mChardet) { |
219 | 0 | (void)mChardet->Init(this); |
220 | 0 | mFeedChardet = true; |
221 | 0 | } |
222 | 0 | } |
223 | 0 |
|
224 | 0 | // There's a zeroing operator new for everything else |
225 | 0 | } |
226 | | |
227 | | nsHtml5StreamParser::~nsHtml5StreamParser() |
228 | 0 | { |
229 | 0 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
230 | 0 | mTokenizer->end(); |
231 | 0 | if (recordreplay::IsRecordingOrReplaying()) { |
232 | 0 | recordreplay::EndContentParse(this); |
233 | 0 | } |
234 | | #ifdef DEBUG |
235 | | { |
236 | | mozilla::MutexAutoLock flushTimerLock(mFlushTimerMutex); |
237 | | MOZ_ASSERT(!mFlushTimer, "Flush timer was not dropped before dtor!"); |
238 | | } |
239 | | mRequest = nullptr; |
240 | | mObserver = nullptr; |
241 | | mUnicodeDecoder = nullptr; |
242 | | mSniffingBuffer = nullptr; |
243 | | mMetaScanner = nullptr; |
244 | | mFirstBuffer = nullptr; |
245 | | mExecutor = nullptr; |
246 | | mTreeBuilder = nullptr; |
247 | | mTokenizer = nullptr; |
248 | | mOwner = nullptr; |
249 | | #endif |
250 | | } |
251 | | |
252 | | nsresult |
253 | | nsHtml5StreamParser::GetChannel(nsIChannel** aChannel) |
254 | 0 | { |
255 | 0 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
256 | 0 | return mRequest ? CallQueryInterface(mRequest, aChannel) |
257 | 0 | : NS_ERROR_NOT_AVAILABLE; |
258 | 0 | } |
259 | | |
260 | | NS_IMETHODIMP |
261 | | nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) |
262 | 0 | { |
263 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
264 | 0 | if (aConf == eBestAnswer || aConf == eSureAnswer) { |
265 | 0 | mFeedChardet = false; // just in case |
266 | 0 | auto encoding = |
267 | 0 | Encoding::ForLabelNoReplacement(nsDependentCString(aCharset)); |
268 | 0 | if (!encoding) { |
269 | 0 | return NS_OK; |
270 | 0 | } |
271 | 0 | if (HasDecoder()) { |
272 | 0 | if (mEncoding == encoding) { |
273 | 0 | NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection, |
274 | 0 | "Why are we running chardet at all?"); |
275 | 0 | mCharsetSource = kCharsetFromAutoDetection; |
276 | 0 | mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); |
277 | 0 | } else { |
278 | 0 | // We've already committed to a decoder. Request a reload from the |
279 | 0 | // docshell. |
280 | 0 | mTreeBuilder->NeedsCharsetSwitchTo( |
281 | 0 | WrapNotNull(encoding), kCharsetFromAutoDetection, 0); |
282 | 0 | FlushTreeOpsAndDisarmTimer(); |
283 | 0 | Interrupt(); |
284 | 0 | } |
285 | 0 | } else { |
286 | 0 | // Got a confident answer from the sniffing buffer. That code will |
287 | 0 | // take care of setting up the decoder. |
288 | 0 | mEncoding = WrapNotNull(encoding); |
289 | 0 | mCharsetSource = kCharsetFromAutoDetection; |
290 | 0 | mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); |
291 | 0 | } |
292 | 0 | } |
293 | 0 | return NS_OK; |
294 | 0 | } |
295 | | |
296 | | void |
297 | | nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL) |
298 | 0 | { |
299 | 0 | if (recordreplay::IsRecordingOrReplaying()) { |
300 | 0 | nsAutoCString spec; |
301 | 0 | aURL->GetSpec(spec); |
302 | 0 | recordreplay::BeginContentParse(this, spec.get(), "text/html"); |
303 | 0 | } |
304 | 0 |
|
305 | 0 | if (aURL) { |
306 | 0 | nsCOMPtr<nsIURI> temp; |
307 | 0 | bool isViewSource; |
308 | 0 | aURL->SchemeIs("view-source", &isViewSource); |
309 | 0 | if (isViewSource) { |
310 | 0 | nsCOMPtr<nsINestedURI> nested = do_QueryInterface(aURL); |
311 | 0 | nested->GetInnerURI(getter_AddRefs(temp)); |
312 | 0 | } else { |
313 | 0 | temp = aURL; |
314 | 0 | } |
315 | 0 | bool isData; |
316 | 0 | temp->SchemeIs("data", &isData); |
317 | 0 | if (isData) { |
318 | 0 | // Avoid showing potentially huge data: URLs. The three last bytes are |
319 | 0 | // UTF-8 for an ellipsis. |
320 | 0 | mViewSourceTitle.AssignLiteral("data:\xE2\x80\xA6"); |
321 | 0 | } else { |
322 | 0 | nsresult rv = temp->GetSpec(mViewSourceTitle); |
323 | 0 | if (NS_FAILED(rv)) { |
324 | 0 | mViewSourceTitle.AssignLiteral("\xE2\x80\xA6"); |
325 | 0 | } |
326 | 0 | } |
327 | 0 | } |
328 | 0 | } |
329 | | |
330 | | nsresult |
331 | | nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
332 | | const uint8_t* aFromSegment, // can be null |
333 | | uint32_t aCount, |
334 | | uint32_t* aWriteCount) |
335 | 0 | { |
336 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
337 | 0 | nsresult rv = NS_OK; |
338 | 0 | mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval(); |
339 | 0 | if (mSniffingBuffer) { |
340 | 0 | uint32_t writeCount; |
341 | 0 | rv = WriteStreamBytes(mSniffingBuffer.get(), mSniffingLength, &writeCount); |
342 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
343 | 0 | mSniffingBuffer = nullptr; |
344 | 0 | } |
345 | 0 | mMetaScanner = nullptr; |
346 | 0 | if (aFromSegment) { |
347 | 0 | rv = WriteStreamBytes(aFromSegment, aCount, aWriteCount); |
348 | 0 | } |
349 | 0 | return rv; |
350 | 0 | } |
351 | | |
352 | | nsresult |
353 | | nsHtml5StreamParser::SetupDecodingFromBom(NotNull<const Encoding*> aEncoding) |
354 | 0 | { |
355 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
356 | 0 | mEncoding = aEncoding; |
357 | 0 | mUnicodeDecoder = mEncoding->NewDecoderWithoutBOMHandling(); |
358 | 0 | mCharsetSource = kCharsetFromByteOrderMark; |
359 | 0 | mFeedChardet = false; |
360 | 0 | mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); |
361 | 0 | mSniffingBuffer = nullptr; |
362 | 0 | mMetaScanner = nullptr; |
363 | 0 | mBomState = BOM_SNIFFING_OVER; |
364 | 0 | return NS_OK; |
365 | 0 | } |
366 | | |
367 | | void |
368 | | nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aFromSegment, |
369 | | uint32_t aCountToSniffingLimit) |
370 | 0 | { |
371 | 0 | // Avoid underspecified heuristic craziness for XHR |
372 | 0 | if (mMode == LOAD_AS_DATA) { |
373 | 0 | return; |
374 | 0 | } |
375 | 0 | // Make sure there's enough data. Require room for "<title></title>" |
376 | 0 | if (mSniffingLength + aCountToSniffingLimit < 30) { |
377 | 0 | return; |
378 | 0 | } |
379 | 0 | // even-numbered bytes tracked at 0, odd-numbered bytes tracked at 1 |
380 | 0 | bool byteZero[2] = { false, false }; |
381 | 0 | bool byteNonZero[2] = { false, false }; |
382 | 0 | uint32_t i = 0; |
383 | 0 | if (mSniffingBuffer) { |
384 | 0 | for (; i < mSniffingLength; ++i) { |
385 | 0 | if (mSniffingBuffer[i]) { |
386 | 0 | if (byteNonZero[1 - (i % 2)]) { |
387 | 0 | return; |
388 | 0 | } |
389 | 0 | byteNonZero[i % 2] = true; |
390 | 0 | } else { |
391 | 0 | if (byteZero[1 - (i % 2)]) { |
392 | 0 | return; |
393 | 0 | } |
394 | 0 | byteZero[i % 2] = true; |
395 | 0 | } |
396 | 0 | } |
397 | 0 | } |
398 | 0 | if (aFromSegment) { |
399 | 0 | for (uint32_t j = 0; j < aCountToSniffingLimit; ++j) { |
400 | 0 | if (aFromSegment[j]) { |
401 | 0 | if (byteNonZero[1 - ((i + j) % 2)]) { |
402 | 0 | return; |
403 | 0 | } |
404 | 0 | byteNonZero[(i + j) % 2] = true; |
405 | 0 | } else { |
406 | 0 | if (byteZero[1 - ((i + j) % 2)]) { |
407 | 0 | return; |
408 | 0 | } |
409 | 0 | byteZero[(i + j) % 2] = true; |
410 | 0 | } |
411 | 0 | } |
412 | 0 | } |
413 | 0 |
|
414 | 0 | if (byteNonZero[0]) { |
415 | 0 | mEncoding = UTF_16LE_ENCODING; |
416 | 0 | } else { |
417 | 0 | mEncoding = UTF_16BE_ENCODING; |
418 | 0 | } |
419 | 0 | mCharsetSource = kCharsetFromIrreversibleAutoDetection; |
420 | 0 | mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); |
421 | 0 | mFeedChardet = false; |
422 | 0 | mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", true, 0); |
423 | 0 | } |
424 | | |
425 | | void |
426 | | nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding) |
427 | 0 | { |
428 | 0 | if (aEncoding) { |
429 | 0 | nsDependentString utf16(aEncoding); |
430 | 0 | nsAutoCString utf8; |
431 | 0 | CopyUTF16toUTF8(utf16, utf8); |
432 | 0 | auto encoding = PreferredForInternalEncodingDecl(utf8); |
433 | 0 | if (encoding) { |
434 | 0 | mEncoding = WrapNotNull(encoding); |
435 | 0 | mCharsetSource = kCharsetFromMetaTag; // closest for XML |
436 | 0 | return; |
437 | 0 | } |
438 | 0 | // else the page declared an encoding Gecko doesn't support and we'd |
439 | 0 | // end up defaulting to UTF-8 anyway. Might as well fall through here |
440 | 0 | // right away and let the encoding be set to UTF-8 which we'd default to |
441 | 0 | // anyway. |
442 | 0 | } |
443 | 0 | mEncoding = UTF_8_ENCODING; // XML defaults to UTF-8 without a BOM |
444 | 0 | mCharsetSource = kCharsetFromMetaTag; // means confident |
445 | 0 | } |
446 | | |
447 | | // A separate user data struct is used instead of passing the |
448 | | // nsHtml5StreamParser instance as user data in order to avoid including |
449 | | // expat.h in nsHtml5StreamParser.h. Doing that would cause naming conflicts. |
450 | | // Using a separate user data struct also avoids bloating nsHtml5StreamParser |
451 | | // by one pointer. |
452 | | struct UserData |
453 | | { |
454 | | XML_Parser mExpat; |
455 | | nsHtml5StreamParser* mStreamParser; |
456 | | }; |
457 | | |
458 | | // Using no-namespace handler callbacks to avoid including expat.h in |
459 | | // nsHtml5StreamParser.h, since doing so would cause naming conclicts. |
460 | | static void |
461 | | HandleXMLDeclaration(void* aUserData, |
462 | | const XML_Char* aVersion, |
463 | | const XML_Char* aEncoding, |
464 | | int aStandalone) |
465 | 0 | { |
466 | 0 | UserData* ud = static_cast<UserData*>(aUserData); |
467 | 0 | ud->mStreamParser->SetEncodingFromExpat( |
468 | 0 | reinterpret_cast<const char16_t*>(aEncoding)); |
469 | 0 | XML_StopParser(ud->mExpat, false); |
470 | 0 | } |
471 | | |
472 | | static void |
473 | | HandleStartElement(void* aUserData, |
474 | | const XML_Char* aName, |
475 | | const XML_Char** aAtts) |
476 | 0 | { |
477 | 0 | UserData* ud = static_cast<UserData*>(aUserData); |
478 | 0 | XML_StopParser(ud->mExpat, false); |
479 | 0 | } |
480 | | |
481 | | static void |
482 | | HandleEndElement(void* aUserData, const XML_Char* aName) |
483 | 0 | { |
484 | 0 | UserData* ud = static_cast<UserData*>(aUserData); |
485 | 0 | XML_StopParser(ud->mExpat, false); |
486 | 0 | } |
487 | | |
488 | | static void |
489 | | HandleComment(void* aUserData, const XML_Char* aName) |
490 | 0 | { |
491 | 0 | UserData* ud = static_cast<UserData*>(aUserData); |
492 | 0 | XML_StopParser(ud->mExpat, false); |
493 | 0 | } |
494 | | |
495 | | static void |
496 | | HandleProcessingInstruction(void* aUserData, |
497 | | const XML_Char* aTarget, |
498 | | const XML_Char* aData) |
499 | 0 | { |
500 | 0 | UserData* ud = static_cast<UserData*>(aUserData); |
501 | 0 | XML_StopParser(ud->mExpat, false); |
502 | 0 | } |
503 | | |
504 | | nsresult |
505 | | nsHtml5StreamParser::FinalizeSniffing( |
506 | | const uint8_t* aFromSegment, // can be null |
507 | | uint32_t aCount, |
508 | | uint32_t* aWriteCount, |
509 | | uint32_t aCountToSniffingLimit) |
510 | 0 | { |
511 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
512 | 0 | NS_ASSERTION(mCharsetSource < kCharsetFromParentForced, |
513 | 0 | "Should not finalize sniffing when using forced charset."); |
514 | 0 | if (mMode == VIEW_SOURCE_XML) { |
515 | 0 | static const XML_Memory_Handling_Suite memsuite = { |
516 | 0 | (void* (*)(size_t))moz_xmalloc, |
517 | 0 | (void* (*)(void*, size_t))moz_xrealloc, |
518 | 0 | free |
519 | 0 | }; |
520 | 0 |
|
521 | 0 | static const char16_t kExpatSeparator[] = { 0xFFFF, '\0' }; |
522 | 0 |
|
523 | 0 | static const char16_t kISO88591[] = { 'I', 'S', 'O', '-', '8', '8', |
524 | 0 | '5', '9', '-', '1', '\0' }; |
525 | 0 |
|
526 | 0 | UserData ud; |
527 | 0 | ud.mStreamParser = this; |
528 | 0 |
|
529 | 0 | // If we got this far, the stream didn't have a BOM. UTF-16-encoded XML |
530 | 0 | // documents MUST begin with a BOM. We don't support EBCDIC and such. |
531 | 0 | // Thus, at this point, what we have is garbage or something encoded using |
532 | 0 | // a rough ASCII superset. ISO-8859-1 allows us to decode ASCII bytes |
533 | 0 | // without throwing errors when bytes have the most significant bit set |
534 | 0 | // and without triggering expat's unknown encoding code paths. This is |
535 | 0 | // enough to be able to use expat to parse the XML declaration in order |
536 | 0 | // to extract the encoding name from it. |
537 | 0 | ud.mExpat = XML_ParserCreate_MM(kISO88591, &memsuite, kExpatSeparator); |
538 | 0 | XML_SetXmlDeclHandler(ud.mExpat, HandleXMLDeclaration); |
539 | 0 | XML_SetElementHandler(ud.mExpat, HandleStartElement, HandleEndElement); |
540 | 0 | XML_SetCommentHandler(ud.mExpat, HandleComment); |
541 | 0 | XML_SetProcessingInstructionHandler(ud.mExpat, HandleProcessingInstruction); |
542 | 0 | XML_SetUserData(ud.mExpat, static_cast<void*>(&ud)); |
543 | 0 |
|
544 | 0 | XML_Status status = XML_STATUS_OK; |
545 | 0 |
|
546 | 0 | // aFromSegment points to the data obtained from the current network |
547 | 0 | // event. mSniffingBuffer (if it exists) contains the data obtained before |
548 | 0 | // the current event. Thus, mSniffingLenth bytes of mSniffingBuffer |
549 | 0 | // followed by aCountToSniffingLimit bytes from aFromSegment are the |
550 | 0 | // first 1024 bytes of the file (or the file as a whole if the file is |
551 | 0 | // 1024 bytes long or shorter). Thus, we parse both buffers, but if the |
552 | 0 | // first call succeeds already, we skip parsing the second buffer. |
553 | 0 | if (mSniffingBuffer) { |
554 | 0 | status = XML_Parse(ud.mExpat, |
555 | 0 | reinterpret_cast<const char*>(mSniffingBuffer.get()), |
556 | 0 | mSniffingLength, |
557 | 0 | false); |
558 | 0 | } |
559 | 0 | if (status == XML_STATUS_OK && mCharsetSource < kCharsetFromMetaTag && |
560 | 0 | aFromSegment) { |
561 | 0 | status = XML_Parse(ud.mExpat, |
562 | 0 | reinterpret_cast<const char*>(aFromSegment), |
563 | 0 | aCountToSniffingLimit, |
564 | 0 | false); |
565 | 0 | } |
566 | 0 | XML_ParserFree(ud.mExpat); |
567 | 0 |
|
568 | 0 | if (mCharsetSource < kCharsetFromMetaTag) { |
569 | 0 | // Failed to get an encoding from the XML declaration. XML defaults |
570 | 0 | // confidently to UTF-8 in this case. |
571 | 0 | // It is also possible that the document has an XML declaration that is |
572 | 0 | // longer than 1024 bytes, but that case is not worth worrying about. |
573 | 0 | mEncoding = UTF_8_ENCODING; |
574 | 0 | mCharsetSource = kCharsetFromMetaTag; // means confident |
575 | 0 | } |
576 | 0 |
|
577 | 0 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
578 | 0 | aFromSegment, aCount, aWriteCount); |
579 | 0 | } |
580 | 0 |
|
581 | 0 | // meta scan failed. |
582 | 0 | if (mCharsetSource >= kCharsetFromHintPrevDoc) { |
583 | 0 | mFeedChardet = false; |
584 | 0 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
585 | 0 | aFromSegment, aCount, aWriteCount); |
586 | 0 | } |
587 | 0 | // Check for BOMless UTF-16 with Basic |
588 | 0 | // Latin content for compat with IE. See bug 631751. |
589 | 0 | SniffBOMlessUTF16BasicLatin(aFromSegment, aCountToSniffingLimit); |
590 | 0 | // the charset may have been set now |
591 | 0 | // maybe try chardet now; |
592 | 0 | if (mFeedChardet) { |
593 | 0 | bool dontFeed; |
594 | 0 | nsresult rv; |
595 | 0 | if (mSniffingBuffer) { |
596 | 0 | rv = mChardet->DoIt( |
597 | 0 | (const char*)mSniffingBuffer.get(), mSniffingLength, &dontFeed); |
598 | 0 | mFeedChardet = !dontFeed; |
599 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
600 | 0 | } |
601 | 0 | if (mFeedChardet && aFromSegment) { |
602 | 0 | rv = mChardet->DoIt((const char*)aFromSegment, |
603 | 0 | // Avoid buffer boundary-dependent behavior when |
604 | 0 | // reparsing is forbidden. If reparse is forbidden, |
605 | 0 | // act as if we only saw the first 1024 bytes. |
606 | 0 | // When reparsing isn't forbidden, buffer boundaries |
607 | 0 | // can have an effect on whether the page is loaded |
608 | 0 | // once or twice. :-( |
609 | 0 | mReparseForbidden ? aCountToSniffingLimit : aCount, |
610 | 0 | &dontFeed); |
611 | 0 | mFeedChardet = !dontFeed; |
612 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
613 | 0 | } |
614 | 0 | if (mFeedChardet && (!aFromSegment || mReparseForbidden)) { |
615 | 0 | // mReparseForbidden is checked so that we get to use the sniffing |
616 | 0 | // buffer with the best guess so far if we aren't allowed to guess |
617 | 0 | // better later. |
618 | 0 | mFeedChardet = false; |
619 | 0 | rv = mChardet->Done(); |
620 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
621 | 0 | } |
622 | 0 | // fall thru; callback may have changed charset |
623 | 0 | } |
624 | 0 | if (mCharsetSource == kCharsetUninitialized) { |
625 | 0 | // Hopefully this case is never needed, but dealing with it anyway |
626 | 0 | mEncoding = WINDOWS_1252_ENCODING; |
627 | 0 | mCharsetSource = kCharsetFromFallback; |
628 | 0 | mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); |
629 | 0 | } else if (mMode == LOAD_AS_DATA && mCharsetSource == kCharsetFromFallback) { |
630 | 0 | NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR"); |
631 | 0 | NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR"); |
632 | 0 | NS_ASSERTION(mEncoding == UTF_8_ENCODING, "XHR should default to UTF-8"); |
633 | 0 | // Now mark charset source as non-weak to signal that we have a decision |
634 | 0 | mCharsetSource = kCharsetFromDocTypeDefault; |
635 | 0 | mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); |
636 | 0 | } |
637 | 0 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
638 | 0 | aFromSegment, aCount, aWriteCount); |
639 | 0 | } |
640 | | |
641 | | nsresult |
642 | | nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment, |
643 | | uint32_t aCount, |
644 | | uint32_t* aWriteCount) |
645 | 0 | { |
646 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
647 | 0 | nsresult rv = NS_OK; |
648 | 0 | uint32_t writeCount; |
649 | 0 |
|
650 | 0 | // mEncoding and mCharsetSource potentially have come from channel or higher |
651 | 0 | // by now. If we find a BOM, SetupDecodingFromBom() will overwrite them. |
652 | 0 | // If we don't find a BOM, the previously set values of mEncoding and |
653 | 0 | // mCharsetSource are not modified by the BOM sniffing here. |
654 | 0 | for (uint32_t i = 0; i < aCount && mBomState != BOM_SNIFFING_OVER; i++) { |
655 | 0 | switch (mBomState) { |
656 | 0 | case BOM_SNIFFING_NOT_STARTED: |
657 | 0 | NS_ASSERTION(i == 0, "Bad BOM sniffing state."); |
658 | 0 | switch (*aFromSegment) { |
659 | 0 | case 0xEF: |
660 | 0 | mBomState = SEEN_UTF_8_FIRST_BYTE; |
661 | 0 | break; |
662 | 0 | case 0xFF: |
663 | 0 | mBomState = SEEN_UTF_16_LE_FIRST_BYTE; |
664 | 0 | break; |
665 | 0 | case 0xFE: |
666 | 0 | mBomState = SEEN_UTF_16_BE_FIRST_BYTE; |
667 | 0 | break; |
668 | 0 | default: |
669 | 0 | mBomState = BOM_SNIFFING_OVER; |
670 | 0 | break; |
671 | 0 | } |
672 | 0 | break; |
673 | 0 | case SEEN_UTF_16_LE_FIRST_BYTE: |
674 | 0 | if (aFromSegment[i] == 0xFE) { |
675 | 0 | rv = SetupDecodingFromBom( |
676 | 0 | UTF_16LE_ENCODING); // upper case is the raw form |
677 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
678 | 0 | uint32_t count = aCount - (i + 1); |
679 | 0 | rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); |
680 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
681 | 0 | *aWriteCount = writeCount + (i + 1); |
682 | 0 | return rv; |
683 | 0 | } |
684 | 0 | mBomState = BOM_SNIFFING_OVER; |
685 | 0 | break; |
686 | 0 | case SEEN_UTF_16_BE_FIRST_BYTE: |
687 | 0 | if (aFromSegment[i] == 0xFF) { |
688 | 0 | rv = SetupDecodingFromBom( |
689 | 0 | UTF_16BE_ENCODING); // upper case is the raw form |
690 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
691 | 0 | uint32_t count = aCount - (i + 1); |
692 | 0 | rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); |
693 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
694 | 0 | *aWriteCount = writeCount + (i + 1); |
695 | 0 | return rv; |
696 | 0 | } |
697 | 0 | mBomState = BOM_SNIFFING_OVER; |
698 | 0 | break; |
699 | 0 | case SEEN_UTF_8_FIRST_BYTE: |
700 | 0 | if (aFromSegment[i] == 0xBB) { |
701 | 0 | mBomState = SEEN_UTF_8_SECOND_BYTE; |
702 | 0 | } else { |
703 | 0 | mBomState = BOM_SNIFFING_OVER; |
704 | 0 | } |
705 | 0 | break; |
706 | 0 | case SEEN_UTF_8_SECOND_BYTE: |
707 | 0 | if (aFromSegment[i] == 0xBF) { |
708 | 0 | rv = |
709 | 0 | SetupDecodingFromBom(UTF_8_ENCODING); // upper case is the raw form |
710 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
711 | 0 | uint32_t count = aCount - (i + 1); |
712 | 0 | rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount); |
713 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
714 | 0 | *aWriteCount = writeCount + (i + 1); |
715 | 0 | return rv; |
716 | 0 | } |
717 | 0 | mBomState = BOM_SNIFFING_OVER; |
718 | 0 | break; |
719 | 0 | default: |
720 | 0 | mBomState = BOM_SNIFFING_OVER; |
721 | 0 | break; |
722 | 0 | } |
723 | 0 | } |
724 | 0 | // if we get here, there either was no BOM or the BOM sniffing isn't complete |
725 | 0 | // yet |
726 | 0 |
|
727 | 0 | MOZ_ASSERT(mCharsetSource != kCharsetFromByteOrderMark, |
728 | 0 | "Should not come here if BOM was found."); |
729 | 0 | MOZ_ASSERT(mCharsetSource != kCharsetFromOtherComponent, |
730 | 0 | "kCharsetFromOtherComponent is for XSLT."); |
731 | 0 |
|
732 | 0 | if (mBomState == BOM_SNIFFING_OVER && mCharsetSource == kCharsetFromChannel) { |
733 | 0 | // There was no BOM and the charset came from channel. mEncoding |
734 | 0 | // still contains the charset from the channel as set by an |
735 | 0 | // earlier call to SetDocumentCharset(), since we didn't find a BOM and |
736 | 0 | // overwrite mEncoding. (Note that if the user has overridden the charset, |
737 | 0 | // we don't come here but check <meta> for XSS-dangerous charsets first.) |
738 | 0 | mFeedChardet = false; |
739 | 0 | mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); |
740 | 0 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
741 | 0 | aFromSegment, aCount, aWriteCount); |
742 | 0 | } |
743 | 0 | |
744 | 0 | if (!mMetaScanner && |
745 | 0 | (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) { |
746 | 0 | mMetaScanner = new nsHtml5MetaScanner(mTreeBuilder); |
747 | 0 | } |
748 | 0 |
|
749 | 0 | if (mSniffingLength + aCount >= NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE) { |
750 | 0 | // this is the last buffer |
751 | 0 | uint32_t countToSniffingLimit = |
752 | 0 | NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE - mSniffingLength; |
753 | 0 | if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) { |
754 | 0 | nsHtml5ByteReadable readable(aFromSegment, |
755 | 0 | aFromSegment + countToSniffingLimit); |
756 | 0 | nsAutoCString charset; |
757 | 0 | auto encoding = mMetaScanner->sniff(&readable); |
758 | 0 | // Due to the way nsHtml5Portability reports OOM, ask the tree buider |
759 | 0 | nsresult rv; |
760 | 0 | if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) { |
761 | 0 | MarkAsBroken(rv); |
762 | 0 | return rv; |
763 | 0 | } |
764 | 0 | if (encoding) { |
765 | 0 | // meta scan successful; honor overrides unless meta is XSS-dangerous |
766 | 0 | if ((mCharsetSource == kCharsetFromParentForced || |
767 | 0 | mCharsetSource == kCharsetFromUserForced) && |
768 | 0 | (encoding->IsAsciiCompatible() || |
769 | 0 | encoding == ISO_2022_JP_ENCODING)) { |
770 | 0 | // Honor override |
771 | 0 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
772 | 0 | aFromSegment, aCount, aWriteCount); |
773 | 0 | } |
774 | 0 | mEncoding = WrapNotNull(encoding); |
775 | 0 | mCharsetSource = kCharsetFromMetaPrescan; |
776 | 0 | mFeedChardet = false; |
777 | 0 | mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); |
778 | 0 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
779 | 0 | aFromSegment, aCount, aWriteCount); |
780 | 0 | } |
781 | 0 | } |
782 | 0 | if (mCharsetSource == kCharsetFromParentForced || |
783 | 0 | mCharsetSource == kCharsetFromUserForced) { |
784 | 0 | // meta not found, honor override |
785 | 0 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
786 | 0 | aFromSegment, aCount, aWriteCount); |
787 | 0 | } |
788 | 0 | return FinalizeSniffing( |
789 | 0 | aFromSegment, aCount, aWriteCount, countToSniffingLimit); |
790 | 0 | } |
791 | 0 | |
792 | 0 | // not the last buffer |
793 | 0 | if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) { |
794 | 0 | nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount); |
795 | 0 | auto encoding = mMetaScanner->sniff(&readable); |
796 | 0 | // Due to the way nsHtml5Portability reports OOM, ask the tree buider |
797 | 0 | nsresult rv; |
798 | 0 | if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) { |
799 | 0 | MarkAsBroken(rv); |
800 | 0 | return rv; |
801 | 0 | } |
802 | 0 | if (encoding) { |
803 | 0 | // meta scan successful; honor overrides unless meta is XSS-dangerous |
804 | 0 | if ((mCharsetSource == kCharsetFromParentForced || |
805 | 0 | mCharsetSource == kCharsetFromUserForced) && |
806 | 0 | (encoding->IsAsciiCompatible() || encoding == ISO_2022_JP_ENCODING)) { |
807 | 0 | // Honor override |
808 | 0 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
809 | 0 | aFromSegment, aCount, aWriteCount); |
810 | 0 | } |
811 | 0 | mEncoding = WrapNotNull(encoding); |
812 | 0 | mCharsetSource = kCharsetFromMetaPrescan; |
813 | 0 | mFeedChardet = false; |
814 | 0 | mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource); |
815 | 0 | return SetupDecodingAndWriteSniffingBufferAndCurrentSegment( |
816 | 0 | aFromSegment, aCount, aWriteCount); |
817 | 0 | } |
818 | 0 | } |
819 | 0 |
|
820 | 0 | if (!mSniffingBuffer) { |
821 | 0 | mSniffingBuffer = MakeUniqueFallible<uint8_t[]>( |
822 | 0 | NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE); |
823 | 0 | if (!mSniffingBuffer) { |
824 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
825 | 0 | } |
826 | 0 | } |
827 | 0 | memcpy(&mSniffingBuffer[mSniffingLength], aFromSegment, aCount); |
828 | 0 | mSniffingLength += aCount; |
829 | 0 | *aWriteCount = aCount; |
830 | 0 | return NS_OK; |
831 | 0 | } |
832 | | |
833 | | nsresult |
834 | | nsHtml5StreamParser::WriteStreamBytes(const uint8_t* aFromSegment, |
835 | | uint32_t aCount, |
836 | | uint32_t* aWriteCount) |
837 | 0 | { |
838 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
839 | 0 | // mLastBuffer should always point to a buffer of the size |
840 | 0 | // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE. |
841 | 0 | if (!mLastBuffer) { |
842 | 0 | NS_WARNING("mLastBuffer should not be null!"); |
843 | 0 | MarkAsBroken(NS_ERROR_NULL_POINTER); |
844 | 0 | return NS_ERROR_NULL_POINTER; |
845 | 0 | } |
846 | 0 | size_t totalRead = 0; |
847 | 0 | auto src = MakeSpan(aFromSegment, aCount); |
848 | 0 | for (;;) { |
849 | 0 | auto dst = mLastBuffer->TailAsSpan(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); |
850 | 0 | uint32_t result; |
851 | 0 | size_t read; |
852 | 0 | size_t written; |
853 | 0 | bool hadErrors; |
854 | 0 | Tie(result, read, written, hadErrors) = |
855 | 0 | mUnicodeDecoder->DecodeToUTF16(src, dst, false); |
856 | 0 | if (recordreplay::IsRecordingOrReplaying()) { |
857 | 0 | recordreplay::AddContentParseData(this, dst.data(), written); |
858 | 0 | } |
859 | 0 | if (hadErrors && !mHasHadErrors) { |
860 | 0 | mHasHadErrors = true; |
861 | 0 | if (mEncoding == UTF_8_ENCODING) { |
862 | 0 | mTreeBuilder->TryToEnableEncodingMenu(); |
863 | 0 | } |
864 | 0 | } |
865 | 0 | src = src.From(read); |
866 | 0 | totalRead += read; |
867 | 0 | mLastBuffer->AdvanceEnd(written); |
868 | 0 | if (result == kOutputFull) { |
869 | 0 | RefPtr<nsHtml5OwningUTF16Buffer> newBuf = |
870 | 0 | nsHtml5OwningUTF16Buffer::FalliblyCreate( |
871 | 0 | NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); |
872 | 0 | if (!newBuf) { |
873 | 0 | MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); |
874 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
875 | 0 | } |
876 | 0 | mLastBuffer = (mLastBuffer->next = newBuf.forget()); |
877 | 0 | } else { |
878 | 0 | MOZ_ASSERT(totalRead == aCount, |
879 | 0 | "The Unicode decoder consumed the wrong number of bytes."); |
880 | 0 | *aWriteCount = totalRead; |
881 | 0 | return NS_OK; |
882 | 0 | } |
883 | 0 | } |
884 | 0 | } |
885 | | |
886 | | class MaybeRunCollector : public Runnable |
887 | | { |
888 | | public: |
889 | | explicit MaybeRunCollector(nsIDocShell* aDocShell) |
890 | | : Runnable("MaybeRunCollector") |
891 | | , mDocShell(aDocShell) |
892 | 0 | { |
893 | 0 | } |
894 | | |
895 | | NS_IMETHOD Run() override |
896 | 0 | { |
897 | 0 | nsJSContext::MaybeRunNextCollectorSlice(mDocShell, |
898 | 0 | JS::gcreason::HTML_PARSER); |
899 | 0 | return NS_OK; |
900 | 0 | } |
901 | | |
902 | | nsCOMPtr<nsIDocShell> mDocShell; |
903 | | }; |
904 | | |
905 | | nsresult |
906 | | nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext) |
907 | 0 | { |
908 | 0 | MOZ_RELEASE_ASSERT(STREAM_NOT_STARTED == mStreamState, |
909 | 0 | "Got OnStartRequest when the stream had already started."); |
910 | 0 | MOZ_ASSERT( |
911 | 0 | !mExecutor->HasStarted(), |
912 | 0 | "Got OnStartRequest at the wrong stage in the executor life cycle."); |
913 | 0 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
914 | 0 | if (mObserver) { |
915 | 0 | mObserver->OnStartRequest(aRequest, aContext); |
916 | 0 | } |
917 | 0 | mRequest = aRequest; |
918 | 0 |
|
919 | 0 | mStreamState = STREAM_BEING_READ; |
920 | 0 |
|
921 | 0 | if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { |
922 | 0 | mTokenizer->StartViewSource(NS_ConvertUTF8toUTF16(mViewSourceTitle)); |
923 | 0 | } |
924 | 0 |
|
925 | 0 | // For View Source, the parser should run with scripts "enabled" if a normal |
926 | 0 | // load would have scripts enabled. |
927 | 0 | bool scriptingEnabled = |
928 | 0 | mMode == LOAD_AS_DATA ? false : mExecutor->IsScriptEnabled(); |
929 | 0 | mOwner->StartTokenizer(scriptingEnabled); |
930 | 0 |
|
931 | 0 | bool isSrcdoc = false; |
932 | 0 | nsCOMPtr<nsIChannel> channel; |
933 | 0 | nsresult rv = GetChannel(getter_AddRefs(channel)); |
934 | 0 | if (NS_SUCCEEDED(rv)) { |
935 | 0 | isSrcdoc = NS_IsSrcdocChannel(channel); |
936 | 0 | } |
937 | 0 | mTreeBuilder->setIsSrcdocDocument(isSrcdoc); |
938 | 0 | mTreeBuilder->setScriptingEnabled(scriptingEnabled); |
939 | 0 | mTreeBuilder->SetPreventScriptExecution( |
940 | 0 | !((mMode == NORMAL) && scriptingEnabled)); |
941 | 0 | mTokenizer->start(); |
942 | 0 | mExecutor->Start(); |
943 | 0 | mExecutor->StartReadingFromStage(); |
944 | 0 |
|
945 | 0 | if (mMode == PLAIN_TEXT) { |
946 | 0 | mTreeBuilder->StartPlainText(); |
947 | 0 | mTokenizer->StartPlainText(); |
948 | 0 | } else if (mMode == VIEW_SOURCE_PLAIN) { |
949 | 0 | nsAutoString viewSourceTitle; |
950 | 0 | CopyUTF8toUTF16(mViewSourceTitle, viewSourceTitle); |
951 | 0 | mTreeBuilder->EnsureBufferSpace(viewSourceTitle.Length()); |
952 | 0 | mTreeBuilder->StartPlainTextViewSource(viewSourceTitle); |
953 | 0 | mTokenizer->StartPlainText(); |
954 | 0 | } |
955 | 0 |
|
956 | 0 | /* |
957 | 0 | * If you move the following line, be very careful not to cause |
958 | 0 | * WillBuildModel to be called before the document has had its |
959 | 0 | * script global object set. |
960 | 0 | */ |
961 | 0 | rv = mExecutor->WillBuildModel(eDTDMode_unknown); |
962 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
963 | 0 |
|
964 | 0 | RefPtr<nsHtml5OwningUTF16Buffer> newBuf = |
965 | 0 | nsHtml5OwningUTF16Buffer::FalliblyCreate( |
966 | 0 | NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); |
967 | 0 | if (!newBuf) { |
968 | 0 | // marks this stream parser as terminated, |
969 | 0 | // which prevents entry to code paths that |
970 | 0 | // would use mFirstBuffer or mLastBuffer. |
971 | 0 | return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); |
972 | 0 | } |
973 | 0 | NS_ASSERTION(!mFirstBuffer, "How come we have the first buffer set?"); |
974 | 0 | NS_ASSERTION(!mLastBuffer, "How come we have the last buffer set?"); |
975 | 0 | mFirstBuffer = mLastBuffer = newBuf; |
976 | 0 |
|
977 | 0 | rv = NS_OK; |
978 | 0 |
|
979 | 0 | // The line below means that the encoding can end up being wrong if |
980 | 0 | // a view-source URL is loaded without having the encoding hint from a |
981 | 0 | // previous normal load in the history. |
982 | 0 | mReparseForbidden = !(mMode == NORMAL || mMode == PLAIN_TEXT); |
983 | 0 |
|
984 | 0 | mDocGroup = mExecutor->GetDocument()->GetDocGroup(); |
985 | 0 |
|
986 | 0 | nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(mRequest, &rv)); |
987 | 0 | if (NS_SUCCEEDED(rv)) { |
988 | 0 | // Non-HTTP channels are bogus enough that we let them work with unlabeled |
989 | 0 | // runnables for now. Asserting for HTTP channels only. |
990 | 0 | MOZ_ASSERT(mDocGroup || mMode == LOAD_AS_DATA, |
991 | 0 | "How come the doc group is still null?"); |
992 | 0 |
|
993 | 0 | nsAutoCString method; |
994 | 0 | Unused << httpChannel->GetRequestMethod(method); |
995 | 0 | // XXX does Necko have a way to renavigate POST, etc. without hitting |
996 | 0 | // the network? |
997 | 0 | if (!method.EqualsLiteral("GET")) { |
998 | 0 | // This is the old Gecko behavior but the HTML5 spec disagrees. |
999 | 0 | // Don't reparse on POST. |
1000 | 0 | mReparseForbidden = true; |
1001 | 0 | mFeedChardet = false; // can't restart anyway |
1002 | 0 | } |
1003 | 0 | } |
1004 | 0 |
|
1005 | 0 | // Attempt to retarget delivery of data (via OnDataAvailable) to the parser |
1006 | 0 | // thread, rather than through the main thread. |
1007 | 0 | nsCOMPtr<nsIThreadRetargetableRequest> threadRetargetableRequest = |
1008 | 0 | do_QueryInterface(mRequest, &rv); |
1009 | 0 | if (threadRetargetableRequest) { |
1010 | 0 | rv = threadRetargetableRequest->RetargetDeliveryTo(mEventTarget); |
1011 | 0 | if (NS_SUCCEEDED(rv)) { |
1012 | 0 | // Parser thread should be now ready to get data from necko and parse it |
1013 | 0 | // and main thread might have a chance to process a collector slice. |
1014 | 0 | // We need to do this asynchronously so that necko may continue processing |
1015 | 0 | // the request. |
1016 | 0 | nsCOMPtr<nsIRunnable> runnable = |
1017 | 0 | new MaybeRunCollector(mExecutor->GetDocument()->GetDocShell()); |
1018 | 0 | mozilla::SystemGroup::Dispatch(mozilla::TaskCategory::GarbageCollection, |
1019 | 0 | runnable.forget()); |
1020 | 0 | } |
1021 | 0 | } |
1022 | 0 |
|
1023 | 0 | if (NS_FAILED(rv)) { |
1024 | 0 | NS_WARNING("Failed to retarget HTML data delivery to the parser thread."); |
1025 | 0 | } |
1026 | 0 |
|
1027 | 0 | if (mCharsetSource == kCharsetFromParentFrame) { |
1028 | 0 | // Remember this in case chardet overwrites mCharsetSource |
1029 | 0 | mInitialEncodingWasFromParentFrame = true; |
1030 | 0 | } |
1031 | 0 |
|
1032 | 0 | if (mCharsetSource >= kCharsetFromAutoDetection) { |
1033 | 0 | mFeedChardet = false; |
1034 | 0 | } |
1035 | 0 |
|
1036 | 0 | nsCOMPtr<nsIWyciwygChannel> wyciwygChannel(do_QueryInterface(mRequest)); |
1037 | 0 | if (mCharsetSource < kCharsetFromUtf8OnlyMime && !wyciwygChannel) { |
1038 | 0 | // we aren't ready to commit to an encoding yet |
1039 | 0 | // leave converter uninstantiated for now |
1040 | 0 | return NS_OK; |
1041 | 0 | } |
1042 | 0 | |
1043 | 0 | // We are reloading a document.open()ed doc or loading JSON/WebVTT/etc. into |
1044 | 0 | // a browsing context. In the latter case, there's no need to remove the |
1045 | 0 | // BOM manually here, because the UTF-8 decoder removes it. |
1046 | 0 | mReparseForbidden = true; |
1047 | 0 | mFeedChardet = false; |
1048 | 0 |
|
1049 | 0 | // Instantiate the converter here to avoid BOM sniffing. |
1050 | 0 | mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval(); |
1051 | 0 | return NS_OK; |
1052 | 0 | } |
1053 | | |
1054 | | nsresult |
1055 | | nsHtml5StreamParser::CheckListenerChain() |
1056 | 0 | { |
1057 | 0 | NS_ASSERTION(NS_IsMainThread(), "Should be on the main thread!"); |
1058 | 0 | if (!mObserver) { |
1059 | 0 | return NS_OK; |
1060 | 0 | } |
1061 | 0 | nsresult rv; |
1062 | 0 | nsCOMPtr<nsIThreadRetargetableStreamListener> retargetable = |
1063 | 0 | do_QueryInterface(mObserver, &rv); |
1064 | 0 | if (NS_SUCCEEDED(rv) && retargetable) { |
1065 | 0 | rv = retargetable->CheckListenerChain(); |
1066 | 0 | } |
1067 | 0 | return rv; |
1068 | 0 | } |
1069 | | |
1070 | | void |
1071 | | nsHtml5StreamParser::DoStopRequest() |
1072 | 0 | { |
1073 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
1074 | 0 | MOZ_RELEASE_ASSERT(STREAM_BEING_READ == mStreamState, |
1075 | 0 | "Stream ended without being open."); |
1076 | 0 | mTokenizerMutex.AssertCurrentThreadOwns(); |
1077 | 0 |
|
1078 | 0 | if (IsTerminated()) { |
1079 | 0 | return; |
1080 | 0 | } |
1081 | 0 | |
1082 | 0 | mStreamState = STREAM_ENDED; |
1083 | 0 |
|
1084 | 0 | if (!mUnicodeDecoder) { |
1085 | 0 | uint32_t writeCount; |
1086 | 0 | nsresult rv; |
1087 | 0 | if (NS_FAILED(rv = FinalizeSniffing(nullptr, 0, &writeCount, 0))) { |
1088 | 0 | MarkAsBroken(rv); |
1089 | 0 | return; |
1090 | 0 | } |
1091 | 0 | } else if (mFeedChardet) { |
1092 | 0 | mChardet->Done(); |
1093 | 0 | } |
1094 | 0 |
|
1095 | 0 | MOZ_ASSERT(mUnicodeDecoder, |
1096 | 0 | "Should have a decoder after finalizing sniffing."); |
1097 | 0 |
|
1098 | 0 | // mLastBuffer should always point to a buffer of the size |
1099 | 0 | // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE. |
1100 | 0 | if (!mLastBuffer) { |
1101 | 0 | NS_WARNING("mLastBuffer should not be null!"); |
1102 | 0 | MarkAsBroken(NS_ERROR_NULL_POINTER); |
1103 | 0 | return; |
1104 | 0 | } |
1105 | 0 |
|
1106 | 0 | Span<uint8_t> src; // empty span |
1107 | 0 | for (;;) { |
1108 | 0 | auto dst = mLastBuffer->TailAsSpan(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); |
1109 | 0 | uint32_t result; |
1110 | 0 | size_t read; |
1111 | 0 | size_t written; |
1112 | 0 | bool hadErrors; |
1113 | 0 | Tie(result, read, written, hadErrors) = |
1114 | 0 | mUnicodeDecoder->DecodeToUTF16(src, dst, true); |
1115 | 0 | if (recordreplay::IsRecordingOrReplaying()) { |
1116 | 0 | recordreplay::AddContentParseData(this, dst.data(), written); |
1117 | 0 | } |
1118 | 0 | if (hadErrors && !mHasHadErrors) { |
1119 | 0 | mHasHadErrors = true; |
1120 | 0 | if (mEncoding == UTF_8_ENCODING) { |
1121 | 0 | mTreeBuilder->TryToEnableEncodingMenu(); |
1122 | 0 | } |
1123 | 0 | } |
1124 | 0 | MOZ_ASSERT(read == 0, "How come an empty span was read form?"); |
1125 | 0 | mLastBuffer->AdvanceEnd(written); |
1126 | 0 | if (result == kOutputFull) { |
1127 | 0 | RefPtr<nsHtml5OwningUTF16Buffer> newBuf = |
1128 | 0 | nsHtml5OwningUTF16Buffer::FalliblyCreate( |
1129 | 0 | NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE); |
1130 | 0 | if (!newBuf) { |
1131 | 0 | MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); |
1132 | 0 | return; |
1133 | 0 | } |
1134 | 0 | mLastBuffer = (mLastBuffer->next = newBuf.forget()); |
1135 | 0 | } else { |
1136 | 0 | break; |
1137 | 0 | } |
1138 | 0 | } |
1139 | 0 |
|
1140 | 0 | if (IsTerminatedOrInterrupted()) { |
1141 | 0 | return; |
1142 | 0 | } |
1143 | 0 | |
1144 | 0 | ParseAvailableData(); |
1145 | 0 | } |
1146 | | |
1147 | | class nsHtml5RequestStopper : public Runnable |
1148 | | { |
1149 | | private: |
1150 | | nsHtml5StreamParserPtr mStreamParser; |
1151 | | |
1152 | | public: |
1153 | | explicit nsHtml5RequestStopper(nsHtml5StreamParser* aStreamParser) |
1154 | | : Runnable("nsHtml5RequestStopper") |
1155 | | , mStreamParser(aStreamParser) |
1156 | 0 | { |
1157 | 0 | } |
1158 | | NS_IMETHOD Run() override |
1159 | 0 | { |
1160 | 0 | mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex); |
1161 | 0 | mStreamParser->DoStopRequest(); |
1162 | 0 | return NS_OK; |
1163 | 0 | } |
1164 | | }; |
1165 | | |
1166 | | nsresult |
1167 | | nsHtml5StreamParser::OnStopRequest(nsIRequest* aRequest, |
1168 | | nsISupports* aContext, |
1169 | | nsresult status) |
1170 | 0 | { |
1171 | 0 | NS_ASSERTION(mRequest == aRequest, "Got Stop on wrong stream."); |
1172 | 0 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
1173 | 0 | if (mObserver) { |
1174 | 0 | mObserver->OnStopRequest(aRequest, aContext, status); |
1175 | 0 | } |
1176 | 0 | nsCOMPtr<nsIRunnable> stopper = new nsHtml5RequestStopper(this); |
1177 | 0 | if (NS_FAILED(mEventTarget->Dispatch(stopper, nsIThread::DISPATCH_NORMAL))) { |
1178 | 0 | NS_WARNING("Dispatching StopRequest event failed."); |
1179 | 0 | } |
1180 | 0 | return NS_OK; |
1181 | 0 | } |
1182 | | |
1183 | | void |
1184 | | nsHtml5StreamParser::DoDataAvailable(const uint8_t* aBuffer, uint32_t aLength) |
1185 | 0 | { |
1186 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
1187 | 0 | MOZ_RELEASE_ASSERT(STREAM_BEING_READ == mStreamState, |
1188 | 0 | "DoDataAvailable called when stream not open."); |
1189 | 0 | mTokenizerMutex.AssertCurrentThreadOwns(); |
1190 | 0 |
|
1191 | 0 | if (IsTerminated()) { |
1192 | 0 | return; |
1193 | 0 | } |
1194 | 0 | |
1195 | 0 | uint32_t writeCount; |
1196 | 0 | nsresult rv; |
1197 | 0 | if (HasDecoder()) { |
1198 | 0 | if (mFeedChardet) { |
1199 | 0 | bool dontFeed; |
1200 | 0 | mChardet->DoIt((const char*)aBuffer, aLength, &dontFeed); |
1201 | 0 | mFeedChardet = !dontFeed; |
1202 | 0 | } |
1203 | 0 | rv = WriteStreamBytes(aBuffer, aLength, &writeCount); |
1204 | 0 | } else { |
1205 | 0 | rv = SniffStreamBytes(aBuffer, aLength, &writeCount); |
1206 | 0 | } |
1207 | 0 | if (NS_FAILED(rv)) { |
1208 | 0 | MarkAsBroken(rv); |
1209 | 0 | return; |
1210 | 0 | } |
1211 | 0 | NS_ASSERTION(writeCount == aLength, |
1212 | 0 | "Wrong number of stream bytes written/sniffed."); |
1213 | 0 |
|
1214 | 0 | if (IsTerminatedOrInterrupted()) { |
1215 | 0 | return; |
1216 | 0 | } |
1217 | 0 | |
1218 | 0 | ParseAvailableData(); |
1219 | 0 |
|
1220 | 0 | if (mFlushTimerArmed || mSpeculating) { |
1221 | 0 | return; |
1222 | 0 | } |
1223 | 0 | |
1224 | 0 | { |
1225 | 0 | mozilla::MutexAutoLock flushTimerLock(mFlushTimerMutex); |
1226 | 0 | mFlushTimer->InitWithNamedFuncCallback( |
1227 | 0 | nsHtml5StreamParser::TimerCallback, |
1228 | 0 | static_cast<void*>(this), |
1229 | 0 | mFlushTimerEverFired ? StaticPrefs::html5_flushtimer_initialdelay() |
1230 | 0 | : StaticPrefs::html5_flushtimer_subsequentdelay(), |
1231 | 0 | nsITimer::TYPE_ONE_SHOT, |
1232 | 0 | "nsHtml5StreamParser::DoDataAvailable"); |
1233 | 0 | } |
1234 | 0 | mFlushTimerArmed = true; |
1235 | 0 | } |
1236 | | |
1237 | | class nsHtml5DataAvailable : public Runnable |
1238 | | { |
1239 | | private: |
1240 | | nsHtml5StreamParserPtr mStreamParser; |
1241 | | UniquePtr<uint8_t[]> mData; |
1242 | | uint32_t mLength; |
1243 | | |
1244 | | public: |
1245 | | nsHtml5DataAvailable(nsHtml5StreamParser* aStreamParser, |
1246 | | UniquePtr<uint8_t[]> aData, |
1247 | | uint32_t aLength) |
1248 | | : Runnable("nsHtml5DataAvailable") |
1249 | | , mStreamParser(aStreamParser) |
1250 | | , mData(std::move(aData)) |
1251 | | , mLength(aLength) |
1252 | 0 | { |
1253 | 0 | } |
1254 | | NS_IMETHOD Run() override |
1255 | 0 | { |
1256 | 0 | mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex); |
1257 | 0 | mStreamParser->DoDataAvailable(mData.get(), mLength); |
1258 | 0 | return NS_OK; |
1259 | 0 | } |
1260 | | }; |
1261 | | |
1262 | | nsresult |
1263 | | nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest, |
1264 | | nsISupports* aContext, |
1265 | | nsIInputStream* aInStream, |
1266 | | uint64_t aSourceOffset, |
1267 | | uint32_t aLength) |
1268 | 0 | { |
1269 | 0 | nsresult rv; |
1270 | 0 | if (NS_FAILED(rv = mExecutor->IsBroken())) { |
1271 | 0 | return rv; |
1272 | 0 | } |
1273 | 0 | |
1274 | 0 | NS_ASSERTION(mRequest == aRequest, "Got data on wrong stream."); |
1275 | 0 | uint32_t totalRead; |
1276 | 0 | // Main thread to parser thread dispatch requires copying to buffer first. |
1277 | 0 | if (NS_IsMainThread()) { |
1278 | 0 | auto data = MakeUniqueFallible<uint8_t[]>(aLength); |
1279 | 0 | if (!data) { |
1280 | 0 | return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); |
1281 | 0 | } |
1282 | 0 | rv = |
1283 | 0 | aInStream->Read(reinterpret_cast<char*>(data.get()), aLength, &totalRead); |
1284 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
1285 | 0 | NS_ASSERTION(totalRead <= aLength, "Read more bytes than were available?"); |
1286 | 0 |
|
1287 | 0 | nsCOMPtr<nsIRunnable> dataAvailable = |
1288 | 0 | new nsHtml5DataAvailable(this, std::move(data), totalRead); |
1289 | 0 | if (NS_FAILED( |
1290 | 0 | mEventTarget->Dispatch(dataAvailable, nsIThread::DISPATCH_NORMAL))) { |
1291 | 0 | NS_WARNING("Dispatching DataAvailable event failed."); |
1292 | 0 | } |
1293 | 0 | return rv; |
1294 | 0 | } else { |
1295 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
1296 | 0 | mozilla::MutexAutoLock autoLock(mTokenizerMutex); |
1297 | 0 |
|
1298 | 0 | // Read directly from response buffer. |
1299 | 0 | rv = |
1300 | 0 | aInStream->ReadSegments(CopySegmentsToParser, this, aLength, &totalRead); |
1301 | 0 | if (NS_FAILED(rv)) { |
1302 | 0 | NS_WARNING("Failed reading response data to parser"); |
1303 | 0 | return rv; |
1304 | 0 | } |
1305 | 0 | return NS_OK; |
1306 | 0 | } |
1307 | 0 | } |
1308 | | |
1309 | | /* static */ nsresult |
1310 | | nsHtml5StreamParser::CopySegmentsToParser(nsIInputStream* aInStream, |
1311 | | void* aClosure, |
1312 | | const char* aFromSegment, |
1313 | | uint32_t aToOffset, |
1314 | | uint32_t aCount, |
1315 | | uint32_t* aWriteCount) |
1316 | 0 | { |
1317 | 0 | nsHtml5StreamParser* parser = static_cast<nsHtml5StreamParser*>(aClosure); |
1318 | 0 |
|
1319 | 0 | parser->DoDataAvailable((const uint8_t*)aFromSegment, aCount); |
1320 | 0 | // Assume DoDataAvailable consumed all available bytes. |
1321 | 0 | *aWriteCount = aCount; |
1322 | 0 | return NS_OK; |
1323 | 0 | } |
1324 | | |
1325 | | const Encoding* |
1326 | | nsHtml5StreamParser::PreferredForInternalEncodingDecl( |
1327 | | const nsACString& aEncoding) |
1328 | 0 | { |
1329 | 0 | const Encoding* newEncoding = Encoding::ForLabel(aEncoding); |
1330 | 0 | if (!newEncoding) { |
1331 | 0 | // the encoding name is bogus |
1332 | 0 | mTreeBuilder->MaybeComplainAboutCharset( |
1333 | 0 | "EncMetaUnsupported", true, mTokenizer->getLineNumber()); |
1334 | 0 | return nullptr; |
1335 | 0 | } |
1336 | 0 | |
1337 | 0 | if (newEncoding == UTF_16BE_ENCODING || newEncoding == UTF_16LE_ENCODING) { |
1338 | 0 | mTreeBuilder->MaybeComplainAboutCharset( |
1339 | 0 | "EncMetaUtf16", true, mTokenizer->getLineNumber()); |
1340 | 0 | newEncoding = UTF_8_ENCODING; |
1341 | 0 | } |
1342 | 0 |
|
1343 | 0 | if (newEncoding == X_USER_DEFINED_ENCODING) { |
1344 | 0 | // WebKit/Blink hack for Indian and Armenian legacy sites |
1345 | 0 | mTreeBuilder->MaybeComplainAboutCharset( |
1346 | 0 | "EncMetaUserDefined", true, mTokenizer->getLineNumber()); |
1347 | 0 | newEncoding = WINDOWS_1252_ENCODING; |
1348 | 0 | } |
1349 | 0 |
|
1350 | 0 | if (newEncoding == mEncoding) { |
1351 | 0 | if (mCharsetSource < kCharsetFromMetaPrescan) { |
1352 | 0 | if (mInitialEncodingWasFromParentFrame) { |
1353 | 0 | mTreeBuilder->MaybeComplainAboutCharset( |
1354 | 0 | "EncLateMetaFrame", false, mTokenizer->getLineNumber()); |
1355 | 0 | } else { |
1356 | 0 | mTreeBuilder->MaybeComplainAboutCharset( |
1357 | 0 | "EncLateMeta", false, mTokenizer->getLineNumber()); |
1358 | 0 | } |
1359 | 0 | } |
1360 | 0 | mCharsetSource = kCharsetFromMetaTag; // become confident |
1361 | 0 | mFeedChardet = false; // don't feed chardet when confident |
1362 | 0 | return nullptr; |
1363 | 0 | } |
1364 | 0 |
|
1365 | 0 | return newEncoding; |
1366 | 0 | } |
1367 | | |
1368 | | bool |
1369 | | nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding) |
1370 | 0 | { |
1371 | 0 | // This code needs to stay in sync with |
1372 | 0 | // nsHtml5MetaScanner::tryCharset. Unfortunately, the |
1373 | 0 | // trickery with member fields there leads to some copy-paste reuse. :-( |
1374 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
1375 | 0 | if (mCharsetSource >= kCharsetFromMetaTag) { // this threshold corresponds to |
1376 | 0 | // "confident" in the HTML5 spec |
1377 | 0 | return false; |
1378 | 0 | } |
1379 | 0 | |
1380 | 0 | nsString newEncoding16; // Not Auto, because using it to hold nsStringBuffer* |
1381 | 0 | aEncoding.ToString(newEncoding16); |
1382 | 0 | nsAutoCString newEncoding; |
1383 | 0 | CopyUTF16toUTF8(newEncoding16, newEncoding); |
1384 | 0 |
|
1385 | 0 | auto encoding = PreferredForInternalEncodingDecl(newEncoding); |
1386 | 0 | if (!encoding) { |
1387 | 0 | return false; |
1388 | 0 | } |
1389 | 0 | |
1390 | 0 | if (mReparseForbidden) { |
1391 | 0 | // This mReparseForbidden check happens after the call to |
1392 | 0 | // PreferredForInternalEncodingDecl so that if that method calls |
1393 | 0 | // MaybeComplainAboutCharset, its charset complaint wins over the one |
1394 | 0 | // below. |
1395 | 0 | mTreeBuilder->MaybeComplainAboutCharset( |
1396 | 0 | "EncLateMetaTooLate", true, mTokenizer->getLineNumber()); |
1397 | 0 | return false; // not reparsing even if we wanted to |
1398 | 0 | } |
1399 | 0 | |
1400 | 0 | // Avoid having the chardet ask for another restart after this restart |
1401 | 0 | // request. |
1402 | 0 | mFeedChardet = false; |
1403 | 0 | mTreeBuilder->NeedsCharsetSwitchTo( |
1404 | 0 | WrapNotNull(encoding), kCharsetFromMetaTag, mTokenizer->getLineNumber()); |
1405 | 0 | FlushTreeOpsAndDisarmTimer(); |
1406 | 0 | Interrupt(); |
1407 | 0 | // the tree op executor will cause the stream parser to terminate |
1408 | 0 | // if the charset switch request is accepted or it'll uninterrupt |
1409 | 0 | // if the request failed. Note that if the restart request fails, |
1410 | 0 | // we don't bother trying to make chardet resume. Might as well |
1411 | 0 | // assume that chardet-requested restarts would fail, too. |
1412 | 0 | return true; |
1413 | 0 | } |
1414 | | |
1415 | | void |
1416 | | nsHtml5StreamParser::FlushTreeOpsAndDisarmTimer() |
1417 | 0 | { |
1418 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
1419 | 0 | if (mFlushTimerArmed) { |
1420 | 0 | // avoid calling Cancel if the flush timer isn't armed to avoid acquiring |
1421 | 0 | // a mutex |
1422 | 0 | { |
1423 | 0 | mozilla::MutexAutoLock flushTimerLock(mFlushTimerMutex); |
1424 | 0 | mFlushTimer->Cancel(); |
1425 | 0 | } |
1426 | 0 | mFlushTimerArmed = false; |
1427 | 0 | } |
1428 | 0 | if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { |
1429 | 0 | mTokenizer->FlushViewSource(); |
1430 | 0 | } |
1431 | 0 | mTreeBuilder->Flush(); |
1432 | 0 | nsCOMPtr<nsIRunnable> runnable(mExecutorFlusher); |
1433 | 0 | if (NS_FAILED(DispatchToMain(runnable.forget()))) { |
1434 | 0 | NS_WARNING("failed to dispatch executor flush event"); |
1435 | 0 | } |
1436 | 0 | } |
1437 | | |
1438 | | void |
1439 | | nsHtml5StreamParser::ParseAvailableData() |
1440 | 0 | { |
1441 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
1442 | 0 | mTokenizerMutex.AssertCurrentThreadOwns(); |
1443 | 0 |
|
1444 | 0 | if (IsTerminatedOrInterrupted()) { |
1445 | 0 | return; |
1446 | 0 | } |
1447 | 0 | |
1448 | 0 | if (mSpeculating && !IsSpeculationEnabled()) { |
1449 | 0 | return; |
1450 | 0 | } |
1451 | 0 | |
1452 | 0 | for (;;) { |
1453 | 0 | if (!mFirstBuffer->hasMore()) { |
1454 | 0 | if (mFirstBuffer == mLastBuffer) { |
1455 | 0 | switch (mStreamState) { |
1456 | 0 | case STREAM_BEING_READ: |
1457 | 0 | // never release the last buffer. |
1458 | 0 | if (!mSpeculating) { |
1459 | 0 | // reuse buffer space if not speculating |
1460 | 0 | mFirstBuffer->setStart(0); |
1461 | 0 | mFirstBuffer->setEnd(0); |
1462 | 0 | } |
1463 | 0 | mTreeBuilder->FlushLoads(); |
1464 | 0 | { |
1465 | 0 | // Dispatch this runnable unconditionally, because the loads |
1466 | 0 | // that need flushing may have been flushed earlier even if the |
1467 | 0 | // flush right above here did nothing. |
1468 | 0 | nsCOMPtr<nsIRunnable> runnable(mLoadFlusher); |
1469 | 0 | if (NS_FAILED(DispatchToMain(runnable.forget()))) { |
1470 | 0 | NS_WARNING("failed to dispatch load flush event"); |
1471 | 0 | } |
1472 | 0 | } |
1473 | 0 | return; // no more data for now but expecting more |
1474 | 0 | case STREAM_ENDED: |
1475 | 0 | if (mAtEOF) { |
1476 | 0 | return; |
1477 | 0 | } |
1478 | 0 | mAtEOF = true; |
1479 | 0 | if (mCharsetSource < kCharsetFromMetaTag) { |
1480 | 0 | if (mInitialEncodingWasFromParentFrame) { |
1481 | 0 | // Unfortunately, this check doesn't take effect for |
1482 | 0 | // cross-origin frames, so cross-origin ad frames that have |
1483 | 0 | // no text and only an image or a Flash embed get the more |
1484 | 0 | // severe message from the next if block. The message is |
1485 | 0 | // technically accurate, though. |
1486 | 0 | mTreeBuilder->MaybeComplainAboutCharset( |
1487 | 0 | "EncNoDeclarationFrame", false, 0); |
1488 | 0 | } else if (mMode == NORMAL) { |
1489 | 0 | mTreeBuilder->MaybeComplainAboutCharset( |
1490 | 0 | "EncNoDeclaration", true, 0); |
1491 | 0 | } else if (mMode == PLAIN_TEXT) { |
1492 | 0 | mTreeBuilder->MaybeComplainAboutCharset( |
1493 | 0 | "EncNoDeclarationPlain", true, 0); |
1494 | 0 | } |
1495 | 0 | } |
1496 | 0 | if (NS_SUCCEEDED(mTreeBuilder->IsBroken())) { |
1497 | 0 | mTokenizer->eof(); |
1498 | 0 | nsresult rv; |
1499 | 0 | if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) { |
1500 | 0 | MarkAsBroken(rv); |
1501 | 0 | } else { |
1502 | 0 | mTreeBuilder->StreamEnded(); |
1503 | 0 | if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { |
1504 | 0 | mTokenizer->EndViewSource(); |
1505 | 0 | } |
1506 | 0 | } |
1507 | 0 | } |
1508 | 0 | FlushTreeOpsAndDisarmTimer(); |
1509 | 0 | return; // no more data and not expecting more |
1510 | 0 | default: |
1511 | 0 | MOZ_ASSERT_UNREACHABLE("It should be impossible to reach this."); |
1512 | 0 | return; |
1513 | 0 | } |
1514 | 0 | } |
1515 | 0 | mFirstBuffer = mFirstBuffer->next; |
1516 | 0 | continue; |
1517 | 0 | } |
1518 | 0 | |
1519 | 0 | // now we have a non-empty buffer |
1520 | 0 | mFirstBuffer->adjust(mLastWasCR); |
1521 | 0 | mLastWasCR = false; |
1522 | 0 | if (mFirstBuffer->hasMore()) { |
1523 | 0 | if (!mTokenizer->EnsureBufferSpace(mFirstBuffer->getLength())) { |
1524 | 0 | MarkAsBroken(NS_ERROR_OUT_OF_MEMORY); |
1525 | 0 | return; |
1526 | 0 | } |
1527 | 0 | mLastWasCR = mTokenizer->tokenizeBuffer(mFirstBuffer); |
1528 | 0 | nsresult rv; |
1529 | 0 | if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) { |
1530 | 0 | MarkAsBroken(rv); |
1531 | 0 | return; |
1532 | 0 | } |
1533 | 0 | // At this point, internalEncodingDeclaration() may have called |
1534 | 0 | // Terminate, but that never happens together with script. |
1535 | 0 | // Can't assert that here, though, because it's possible that the main |
1536 | 0 | // thread has called Terminate() while this thread was parsing. |
1537 | 0 | if (mTreeBuilder->HasScript()) { |
1538 | 0 | // HasScript() cannot return true if the tree builder is preventing |
1539 | 0 | // script execution. |
1540 | 0 | MOZ_ASSERT(mMode == NORMAL); |
1541 | 0 | mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex); |
1542 | 0 | nsHtml5Speculation* speculation = |
1543 | 0 | new nsHtml5Speculation(mFirstBuffer, |
1544 | 0 | mFirstBuffer->getStart(), |
1545 | 0 | mTokenizer->getLineNumber(), |
1546 | 0 | mTreeBuilder->newSnapshot()); |
1547 | 0 | mTreeBuilder->AddSnapshotToScript(speculation->GetSnapshot(), |
1548 | 0 | speculation->GetStartLineNumber()); |
1549 | 0 | FlushTreeOpsAndDisarmTimer(); |
1550 | 0 | mTreeBuilder->SetOpSink(speculation); |
1551 | 0 | mSpeculations.AppendElement(speculation); // adopts the pointer |
1552 | 0 | mSpeculating = true; |
1553 | 0 | } |
1554 | 0 | if (IsTerminatedOrInterrupted()) { |
1555 | 0 | return; |
1556 | 0 | } |
1557 | 0 | } |
1558 | 0 | } |
1559 | 0 | } |
1560 | | |
1561 | | class nsHtml5StreamParserContinuation : public Runnable |
1562 | | { |
1563 | | private: |
1564 | | nsHtml5StreamParserPtr mStreamParser; |
1565 | | |
1566 | | public: |
1567 | | explicit nsHtml5StreamParserContinuation(nsHtml5StreamParser* aStreamParser) |
1568 | | : Runnable("nsHtml5StreamParserContinuation") |
1569 | | , mStreamParser(aStreamParser) |
1570 | 0 | { |
1571 | 0 | } |
1572 | | NS_IMETHOD Run() override |
1573 | 0 | { |
1574 | 0 | mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex); |
1575 | 0 | mStreamParser->Uninterrupt(); |
1576 | 0 | mStreamParser->ParseAvailableData(); |
1577 | 0 | return NS_OK; |
1578 | 0 | } |
1579 | | }; |
1580 | | |
1581 | | void |
1582 | | nsHtml5StreamParser::ContinueAfterScripts(nsHtml5Tokenizer* aTokenizer, |
1583 | | nsHtml5TreeBuilder* aTreeBuilder, |
1584 | | bool aLastWasCR) |
1585 | 0 | { |
1586 | 0 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
1587 | 0 | NS_ASSERTION(!(mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML), |
1588 | 0 | "ContinueAfterScripts called in view source mode!"); |
1589 | 0 | if (NS_FAILED(mExecutor->IsBroken())) { |
1590 | 0 | return; |
1591 | 0 | } |
1592 | | #ifdef DEBUG |
1593 | | mExecutor->AssertStageEmpty(); |
1594 | | #endif |
1595 | 0 | bool speculationFailed = false; |
1596 | 0 | { |
1597 | 0 | mozilla::MutexAutoLock speculationAutoLock(mSpeculationMutex); |
1598 | 0 | if (mSpeculations.IsEmpty()) { |
1599 | 0 | MOZ_ASSERT_UNREACHABLE("ContinueAfterScripts called without " |
1600 | 0 | "speculations."); |
1601 | 0 | return; |
1602 | 0 | } |
1603 | 0 |
|
1604 | 0 | nsHtml5Speculation* speculation = mSpeculations.ElementAt(0); |
1605 | 0 | if (aLastWasCR || !aTokenizer->isInDataState() || |
1606 | 0 | !aTreeBuilder->snapshotMatches(speculation->GetSnapshot())) { |
1607 | 0 | speculationFailed = true; |
1608 | 0 | // We've got a failed speculation :-( |
1609 | 0 | MaybeDisableFutureSpeculation(); |
1610 | 0 | Interrupt(); // Make the parser thread release the tokenizer mutex sooner |
1611 | 0 | // now fall out of the speculationAutoLock into the tokenizerAutoLock |
1612 | 0 | // block |
1613 | 0 | } else { |
1614 | 0 | // We've got a successful speculation! |
1615 | 0 | if (mSpeculations.Length() > 1) { |
1616 | 0 | // the first speculation isn't the current speculation, so there's |
1617 | 0 | // no need to bother the parser thread. |
1618 | 0 | speculation->FlushToSink(mExecutor); |
1619 | 0 | NS_ASSERTION(!mExecutor->IsScriptExecuting(), |
1620 | 0 | "ParseUntilBlocked() was supposed to ensure we don't come " |
1621 | 0 | "here when scripts are executing."); |
1622 | 0 | NS_ASSERTION( |
1623 | 0 | mExecutor->IsInFlushLoop(), |
1624 | 0 | "How are we here if " |
1625 | 0 | "RunFlushLoop() didn't call ParseUntilBlocked() which is the " |
1626 | 0 | "only caller of this method?"); |
1627 | 0 | mSpeculations.RemoveElementAt(0); |
1628 | 0 | return; |
1629 | 0 | } |
1630 | 0 | // else |
1631 | 0 | Interrupt(); // Make the parser thread release the tokenizer mutex sooner |
1632 | 0 |
|
1633 | 0 | // now fall through |
1634 | 0 | // the first speculation is the current speculation. Need to |
1635 | 0 | // release the the speculation mutex and acquire the tokenizer |
1636 | 0 | // mutex. (Just acquiring the other mutex here would deadlock) |
1637 | 0 | } |
1638 | 0 | } |
1639 | 0 | { |
1640 | 0 | mozilla::MutexAutoLock tokenizerAutoLock(mTokenizerMutex); |
1641 | | #ifdef DEBUG |
1642 | | { |
1643 | | mAtomTable.SetPermittedLookupEventTarget( |
1644 | | GetMainThreadSerialEventTarget()); |
1645 | | } |
1646 | | #endif |
1647 | | // In principle, the speculation mutex should be acquired here, |
1648 | 0 | // but there's no point, because the parser thread only acquires it |
1649 | 0 | // when it has also acquired the tokenizer mutex and we are already |
1650 | 0 | // holding the tokenizer mutex. |
1651 | 0 | if (speculationFailed) { |
1652 | 0 | // Rewind the stream |
1653 | 0 | mAtEOF = false; |
1654 | 0 | nsHtml5Speculation* speculation = mSpeculations.ElementAt(0); |
1655 | 0 | mFirstBuffer = speculation->GetBuffer(); |
1656 | 0 | mFirstBuffer->setStart(speculation->GetStart()); |
1657 | 0 | mTokenizer->setLineNumber(speculation->GetStartLineNumber()); |
1658 | 0 |
|
1659 | 0 | nsContentUtils::ReportToConsole(nsIScriptError::warningFlag, |
1660 | 0 | NS_LITERAL_CSTRING("DOM Events"), |
1661 | 0 | mExecutor->GetDocument(), |
1662 | 0 | nsContentUtils::eDOM_PROPERTIES, |
1663 | 0 | "SpeculationFailed", |
1664 | 0 | nullptr, |
1665 | 0 | 0, |
1666 | 0 | nullptr, |
1667 | 0 | EmptyString(), |
1668 | 0 | speculation->GetStartLineNumber()); |
1669 | 0 |
|
1670 | 0 | nsHtml5OwningUTF16Buffer* buffer = mFirstBuffer->next; |
1671 | 0 | while (buffer) { |
1672 | 0 | buffer->setStart(0); |
1673 | 0 | buffer = buffer->next; |
1674 | 0 | } |
1675 | 0 |
|
1676 | 0 | mSpeculations.Clear(); // potentially a huge number of destructors |
1677 | 0 | // run here synchronously on the main thread... |
1678 | 0 |
|
1679 | 0 | mTreeBuilder->flushCharacters(); // empty the pending buffer |
1680 | 0 | mTreeBuilder->ClearOps(); // now get rid of the failed ops |
1681 | 0 |
|
1682 | 0 | mTreeBuilder->SetOpSink(mExecutor->GetStage()); |
1683 | 0 | mExecutor->StartReadingFromStage(); |
1684 | 0 | mSpeculating = false; |
1685 | 0 |
|
1686 | 0 | // Copy state over |
1687 | 0 | mLastWasCR = aLastWasCR; |
1688 | 0 | mTokenizer->loadState(aTokenizer); |
1689 | 0 | mTreeBuilder->loadState(aTreeBuilder, &mAtomTable); |
1690 | 0 | } else { |
1691 | 0 | // We've got a successful speculation and at least a moment ago it was |
1692 | 0 | // the current speculation |
1693 | 0 | mSpeculations.ElementAt(0)->FlushToSink(mExecutor); |
1694 | 0 | NS_ASSERTION(!mExecutor->IsScriptExecuting(), |
1695 | 0 | "ParseUntilBlocked() was supposed to ensure we don't come " |
1696 | 0 | "here when scripts are executing."); |
1697 | 0 | NS_ASSERTION( |
1698 | 0 | mExecutor->IsInFlushLoop(), |
1699 | 0 | "How are we here if " |
1700 | 0 | "RunFlushLoop() didn't call ParseUntilBlocked() which is the " |
1701 | 0 | "only caller of this method?"); |
1702 | 0 | mSpeculations.RemoveElementAt(0); |
1703 | 0 | if (mSpeculations.IsEmpty()) { |
1704 | 0 | // yes, it was still the only speculation. Now stop speculating |
1705 | 0 | // However, before telling the executor to read from stage, flush |
1706 | 0 | // any pending ops straight to the executor, because otherwise |
1707 | 0 | // they remain unflushed until we get more data from the network. |
1708 | 0 | mTreeBuilder->SetOpSink(mExecutor); |
1709 | 0 | mTreeBuilder->Flush(true); |
1710 | 0 | mTreeBuilder->SetOpSink(mExecutor->GetStage()); |
1711 | 0 | mExecutor->StartReadingFromStage(); |
1712 | 0 | mSpeculating = false; |
1713 | 0 | } |
1714 | 0 | } |
1715 | 0 | nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this); |
1716 | 0 | if (NS_FAILED(mEventTarget->Dispatch(event, nsIThread::DISPATCH_NORMAL))) { |
1717 | 0 | NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation"); |
1718 | 0 | } |
1719 | 0 | // A stream event might run before this event runs, but that's harmless. |
1720 | | #ifdef DEBUG |
1721 | | mAtomTable.SetPermittedLookupEventTarget(mEventTarget); |
1722 | | #endif |
1723 | | } |
1724 | 0 | } |
1725 | | |
1726 | | void |
1727 | | nsHtml5StreamParser::ContinueAfterFailedCharsetSwitch() |
1728 | 0 | { |
1729 | 0 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
1730 | 0 | nsCOMPtr<nsIRunnable> event = new nsHtml5StreamParserContinuation(this); |
1731 | 0 | if (NS_FAILED(mEventTarget->Dispatch(event, nsIThread::DISPATCH_NORMAL))) { |
1732 | 0 | NS_WARNING("Failed to dispatch nsHtml5StreamParserContinuation"); |
1733 | 0 | } |
1734 | 0 | } |
1735 | | |
1736 | | class nsHtml5TimerKungFu : public Runnable |
1737 | | { |
1738 | | private: |
1739 | | nsHtml5StreamParserPtr mStreamParser; |
1740 | | |
1741 | | public: |
1742 | | explicit nsHtml5TimerKungFu(nsHtml5StreamParser* aStreamParser) |
1743 | | : Runnable("nsHtml5TimerKungFu") |
1744 | | , mStreamParser(aStreamParser) |
1745 | 0 | { |
1746 | 0 | } |
1747 | | NS_IMETHOD Run() override |
1748 | 0 | { |
1749 | 0 | mozilla::MutexAutoLock flushTimerLock(mStreamParser->mFlushTimerMutex); |
1750 | 0 | if (mStreamParser->mFlushTimer) { |
1751 | 0 | mStreamParser->mFlushTimer->Cancel(); |
1752 | 0 | mStreamParser->mFlushTimer = nullptr; |
1753 | 0 | } |
1754 | 0 | return NS_OK; |
1755 | 0 | } |
1756 | | }; |
1757 | | |
1758 | | void |
1759 | | nsHtml5StreamParser::DropTimer() |
1760 | 0 | { |
1761 | 0 | NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); |
1762 | 0 | /* |
1763 | 0 | * Simply nulling out the timer wouldn't work, because if the timer is |
1764 | 0 | * armed, it needs to be canceled first. Simply canceling it first wouldn't |
1765 | 0 | * work, because nsTimerImpl::Cancel is not safe for calling from outside |
1766 | 0 | * the thread where nsTimerImpl::Fire would run. It's not safe to |
1767 | 0 | * dispatch a runnable to cancel the timer from the destructor of this |
1768 | 0 | * class, because the timer has a weak (void*) pointer back to this instance |
1769 | 0 | * of the stream parser and having the timer fire before the runnable |
1770 | 0 | * cancels it would make the timer access a deleted object. |
1771 | 0 | * |
1772 | 0 | * This DropTimer method addresses these issues. This method must be called |
1773 | 0 | * on the main thread before the destructor of this class is reached. |
1774 | 0 | * The nsHtml5TimerKungFu object has an nsHtml5StreamParserPtr that addrefs |
1775 | 0 | * this |
1776 | 0 | * stream parser object to keep it alive until the runnable is done. |
1777 | 0 | * The runnable cancels the timer on the parser thread, drops the timer |
1778 | 0 | * and lets nsHtml5StreamParserPtr send a runnable back to the main thread to |
1779 | 0 | * release the stream parser. |
1780 | 0 | */ |
1781 | 0 | mozilla::MutexAutoLock flushTimerLock(mFlushTimerMutex); |
1782 | 0 | if (mFlushTimer) { |
1783 | 0 | nsCOMPtr<nsIRunnable> event = new nsHtml5TimerKungFu(this); |
1784 | 0 | if (NS_FAILED(mEventTarget->Dispatch(event, nsIThread::DISPATCH_NORMAL))) { |
1785 | 0 | NS_WARNING("Failed to dispatch TimerKungFu event"); |
1786 | 0 | } |
1787 | 0 | } |
1788 | 0 | } |
1789 | | |
1790 | | // Using a static, because the method name Notify is taken by the chardet |
1791 | | // callback. |
1792 | | void |
1793 | | nsHtml5StreamParser::TimerCallback(nsITimer* aTimer, void* aClosure) |
1794 | 0 | { |
1795 | 0 | (static_cast<nsHtml5StreamParser*>(aClosure))->TimerFlush(); |
1796 | 0 | } |
1797 | | |
1798 | | void |
1799 | | nsHtml5StreamParser::TimerFlush() |
1800 | 0 | { |
1801 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
1802 | 0 | mozilla::MutexAutoLock autoLock(mTokenizerMutex); |
1803 | 0 |
|
1804 | 0 | NS_ASSERTION(!mSpeculating, "Flush timer fired while speculating."); |
1805 | 0 |
|
1806 | 0 | // The timer fired if we got here. No need to cancel it. Mark it as |
1807 | 0 | // not armed, though. |
1808 | 0 | mFlushTimerArmed = false; |
1809 | 0 |
|
1810 | 0 | mFlushTimerEverFired = true; |
1811 | 0 |
|
1812 | 0 | if (IsTerminatedOrInterrupted()) { |
1813 | 0 | return; |
1814 | 0 | } |
1815 | 0 | |
1816 | 0 | if (mMode == VIEW_SOURCE_HTML || mMode == VIEW_SOURCE_XML) { |
1817 | 0 | mTreeBuilder->Flush(); // delete useless ops |
1818 | 0 | if (mTokenizer->FlushViewSource()) { |
1819 | 0 | nsCOMPtr<nsIRunnable> runnable(mExecutorFlusher); |
1820 | 0 | if (NS_FAILED(DispatchToMain(runnable.forget()))) { |
1821 | 0 | NS_WARNING("failed to dispatch executor flush event"); |
1822 | 0 | } |
1823 | 0 | } |
1824 | 0 | } else { |
1825 | 0 | // we aren't speculating and we don't know when new data is |
1826 | 0 | // going to arrive. Send data to the main thread. |
1827 | 0 | if (mTreeBuilder->Flush(true)) { |
1828 | 0 | nsCOMPtr<nsIRunnable> runnable(mExecutorFlusher); |
1829 | 0 | if (NS_FAILED(DispatchToMain(runnable.forget()))) { |
1830 | 0 | NS_WARNING("failed to dispatch executor flush event"); |
1831 | 0 | } |
1832 | 0 | } |
1833 | 0 | } |
1834 | 0 | } |
1835 | | |
1836 | | void |
1837 | | nsHtml5StreamParser::MarkAsBroken(nsresult aRv) |
1838 | 0 | { |
1839 | 0 | NS_ASSERTION(IsParserThread(), "Wrong thread!"); |
1840 | 0 | mTokenizerMutex.AssertCurrentThreadOwns(); |
1841 | 0 |
|
1842 | 0 | Terminate(); |
1843 | 0 | mTreeBuilder->MarkAsBroken(aRv); |
1844 | 0 | mozilla::DebugOnly<bool> hadOps = mTreeBuilder->Flush(false); |
1845 | 0 | NS_ASSERTION(hadOps, "Should have had the markAsBroken op!"); |
1846 | 0 | nsCOMPtr<nsIRunnable> runnable(mExecutorFlusher); |
1847 | 0 | if (NS_FAILED(DispatchToMain(runnable.forget()))) { |
1848 | 0 | NS_WARNING("failed to dispatch executor flush event"); |
1849 | 0 | } |
1850 | 0 | } |
1851 | | |
1852 | | nsresult |
1853 | | nsHtml5StreamParser::DispatchToMain(already_AddRefed<nsIRunnable>&& aRunnable) |
1854 | 0 | { |
1855 | 0 | if (mDocGroup) { |
1856 | 0 | return mDocGroup->Dispatch(TaskCategory::Network, std::move(aRunnable)); |
1857 | 0 | } |
1858 | 0 | return SchedulerGroup::UnlabeledDispatch(TaskCategory::Network, |
1859 | 0 | std::move(aRunnable)); |
1860 | 0 | } |