Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : /*
8 : * nsIContentSerializer implementation that can be used with an
9 : * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML
10 : * string that could be parsed into more or less the original DOM.
11 : */
12 :
13 : #include "nsHTMLContentSerializer.h"
14 :
15 : #include "nsIDOMElement.h"
16 : #include "nsIContent.h"
17 : #include "nsIDocument.h"
18 : #include "nsNameSpaceManager.h"
19 : #include "nsString.h"
20 : #include "nsUnicharUtils.h"
21 : #include "nsXPIDLString.h"
22 : #include "nsIServiceManager.h"
23 : #include "nsIDocumentEncoder.h"
24 : #include "nsGkAtoms.h"
25 : #include "nsIURI.h"
26 : #include "nsNetUtil.h"
27 : #include "nsEscape.h"
28 : #include "nsCRT.h"
29 : #include "nsIParserService.h"
30 : #include "nsContentUtils.h"
31 : #include "nsLWBrkCIID.h"
32 : #include "nsIScriptElement.h"
33 : #include "nsAttrName.h"
34 : #include "nsIDocShell.h"
35 : #include "nsIEditor.h"
36 : #include "nsIHTMLEditor.h"
37 : #include "mozilla/dom/Element.h"
38 : #include "nsParserConstants.h"
39 :
40 : using namespace mozilla::dom;
41 :
42 : nsresult
43 0 : NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer)
44 : {
45 0 : RefPtr<nsHTMLContentSerializer> it = new nsHTMLContentSerializer();
46 0 : it.forget(aSerializer);
47 0 : return NS_OK;
48 : }
49 :
50 0 : nsHTMLContentSerializer::nsHTMLContentSerializer()
51 : {
52 0 : mIsHTMLSerializer = true;
53 0 : }
54 :
55 0 : nsHTMLContentSerializer::~nsHTMLContentSerializer()
56 : {
57 0 : }
58 :
59 :
60 : NS_IMETHODIMP
61 0 : nsHTMLContentSerializer::AppendDocumentStart(nsIDocument *aDocument,
62 : nsAString& aStr)
63 : {
64 0 : return NS_OK;
65 : }
66 :
67 : bool
68 0 : nsHTMLContentSerializer::SerializeHTMLAttributes(nsIContent* aContent,
69 : nsIContent *aOriginalElement,
70 : nsAString& aTagPrefix,
71 : const nsAString& aTagNamespaceURI,
72 : nsIAtom* aTagName,
73 : int32_t aNamespace,
74 : nsAString& aStr)
75 : {
76 0 : int32_t count = aContent->GetAttrCount();
77 0 : if (!count)
78 0 : return true;
79 :
80 : nsresult rv;
81 0 : nsAutoString valueStr;
82 0 : NS_NAMED_LITERAL_STRING(_mozStr, "_moz");
83 :
84 0 : for (int32_t index = 0; index < count; index++) {
85 0 : const nsAttrName* name = aContent->GetAttrNameAt(index);
86 0 : int32_t namespaceID = name->NamespaceID();
87 0 : nsIAtom* attrName = name->LocalName();
88 :
89 : // Filter out any attribute starting with [-|_]moz
90 0 : nsDependentAtomString attrNameStr(attrName);
91 0 : if (StringBeginsWith(attrNameStr, NS_LITERAL_STRING("_moz")) ||
92 0 : StringBeginsWith(attrNameStr, NS_LITERAL_STRING("-moz"))) {
93 0 : continue;
94 : }
95 0 : aContent->GetAttr(namespaceID, attrName, valueStr);
96 :
97 : //
98 : // Filter out special case of <br type="_moz"> or <br _moz*>,
99 : // used by the editor. Bug 16988. Yuck.
100 : //
101 0 : if (aTagName == nsGkAtoms::br && aNamespace == kNameSpaceID_XHTML &&
102 0 : attrName == nsGkAtoms::type && namespaceID == kNameSpaceID_None &&
103 0 : StringBeginsWith(valueStr, _mozStr)) {
104 0 : continue;
105 : }
106 :
107 0 : if (mIsCopying && mIsFirstChildOfOL &&
108 0 : aTagName == nsGkAtoms::li && aNamespace == kNameSpaceID_XHTML &&
109 0 : attrName == nsGkAtoms::value && namespaceID == kNameSpaceID_None){
110 : // This is handled separately in SerializeLIValueAttribute()
111 0 : continue;
112 : }
113 0 : bool isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr);
114 :
115 0 : if (((attrName == nsGkAtoms::href &&
116 0 : (namespaceID == kNameSpaceID_None ||
117 0 : namespaceID == kNameSpaceID_XLink)) ||
118 0 : (attrName == nsGkAtoms::src && namespaceID == kNameSpaceID_None))) {
119 : // Make all links absolute when converting only the selection:
120 0 : if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
121 : // Would be nice to handle OBJECT and APPLET tags,
122 : // but that gets more complicated since we have to
123 : // search the tag list for CODEBASE as well.
124 : // For now, just leave them relative.
125 0 : nsCOMPtr<nsIURI> uri = aContent->GetBaseURI();
126 0 : if (uri) {
127 0 : nsAutoString absURI;
128 0 : rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
129 0 : if (NS_SUCCEEDED(rv)) {
130 0 : valueStr = absURI;
131 : }
132 : }
133 : }
134 : }
135 :
136 0 : if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
137 0 : aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content
138 0 : && namespaceID == kNameSpaceID_None) {
139 : // If we're serializing a <meta http-equiv="content-type">,
140 : // use the proper value, rather than what's in the document.
141 0 : nsAutoString header;
142 0 : aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
143 0 : if (header.LowerCaseEqualsLiteral("content-type")) {
144 0 : valueStr = NS_LITERAL_STRING("text/html; charset=") +
145 0 : NS_ConvertASCIItoUTF16(mCharset);
146 : }
147 : }
148 :
149 0 : nsDependentAtomString nameStr(attrName);
150 0 : nsAutoString prefix;
151 0 : if (namespaceID == kNameSpaceID_XML) {
152 0 : prefix.AssignLiteral(u"xml");
153 0 : } else if (namespaceID == kNameSpaceID_XLink) {
154 0 : prefix.AssignLiteral(u"xlink");
155 : }
156 :
157 : // Expand shorthand attribute.
158 0 : if (aNamespace == kNameSpaceID_XHTML &&
159 0 : namespaceID == kNameSpaceID_None &&
160 0 : IsShorthandAttr(attrName, aTagName) &&
161 0 : valueStr.IsEmpty()) {
162 0 : valueStr = nameStr;
163 : }
164 0 : NS_ENSURE_TRUE(SerializeAttr(prefix, nameStr, valueStr,
165 : aStr, !isJS), false);
166 : }
167 :
168 0 : return true;
169 : }
170 :
171 : NS_IMETHODIMP
172 0 : nsHTMLContentSerializer::AppendElementStart(Element* aElement,
173 : Element* aOriginalElement,
174 : nsAString& aStr)
175 : {
176 0 : NS_ENSURE_ARG(aElement);
177 :
178 0 : nsIContent* content = aElement;
179 :
180 0 : bool forceFormat = false;
181 0 : nsresult rv = NS_OK;
182 0 : if (!CheckElementStart(content, forceFormat, aStr, rv)) {
183 : // When we go to AppendElementEnd for this element, we're going to
184 : // MaybeLeaveFromPreContent(). So make sure to MaybeEnterInPreContent()
185 : // now, so our PreLevel() doesn't get confused.
186 0 : MaybeEnterInPreContent(content);
187 0 : return rv;
188 : }
189 :
190 0 : NS_ENSURE_SUCCESS(rv, rv);
191 :
192 0 : nsIAtom *name = content->NodeInfo()->NameAtom();
193 0 : int32_t ns = content->GetNameSpaceID();
194 :
195 0 : bool lineBreakBeforeOpen = LineBreakBeforeOpen(ns, name);
196 :
197 0 : if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
198 0 : if (mColPos && lineBreakBeforeOpen) {
199 0 : NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY);
200 : }
201 : else {
202 0 : NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(aStr), NS_ERROR_OUT_OF_MEMORY);
203 : }
204 0 : if (!mColPos) {
205 0 : NS_ENSURE_TRUE(AppendIndentation(aStr), NS_ERROR_OUT_OF_MEMORY);
206 : }
207 0 : else if (mAddSpace) {
208 0 : bool result = AppendToString(char16_t(' '), aStr);
209 0 : mAddSpace = false;
210 0 : NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
211 : }
212 : }
213 0 : else if (mAddSpace) {
214 0 : bool result = AppendToString(char16_t(' '), aStr);
215 0 : mAddSpace = false;
216 0 : NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
217 : }
218 : else {
219 0 : NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(aStr), NS_ERROR_OUT_OF_MEMORY);
220 : }
221 : // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode wasn't
222 : // called
223 0 : mAddNewlineForRootNode = false;
224 :
225 0 : NS_ENSURE_TRUE(AppendToString(kLessThan, aStr), NS_ERROR_OUT_OF_MEMORY);
226 :
227 0 : NS_ENSURE_TRUE(AppendToString(nsDependentAtomString(name), aStr), NS_ERROR_OUT_OF_MEMORY);
228 :
229 0 : MaybeEnterInPreContent(content);
230 :
231 : // for block elements, we increase the indentation
232 0 : if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel())
233 0 : NS_ENSURE_TRUE(IncrIndentation(name), NS_ERROR_OUT_OF_MEMORY);
234 :
235 : // Need to keep track of OL and LI elements in order to get ordinal number
236 : // for the LI.
237 0 : if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML){
238 : // We are copying and current node is an OL;
239 : // Store its start attribute value in olState->startVal.
240 0 : nsAutoString start;
241 0 : int32_t startAttrVal = 0;
242 :
243 0 : aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start);
244 0 : if (!start.IsEmpty()){
245 0 : nsresult rv = NS_OK;
246 0 : startAttrVal = start.ToInteger(&rv);
247 : //If OL has "start" attribute, first LI element has to start with that value
248 : //Therefore subtracting 1 as all the LI elements are incrementing it before using it;
249 : //In failure of ToInteger(), default StartAttrValue to 0.
250 0 : if (NS_SUCCEEDED(rv))
251 0 : startAttrVal--;
252 : else
253 0 : startAttrVal = 0;
254 : }
255 0 : mOLStateStack.AppendElement(olState(startAttrVal, true));
256 : }
257 :
258 0 : if (mIsCopying && name == nsGkAtoms::li && ns == kNameSpaceID_XHTML) {
259 0 : mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
260 0 : if (mIsFirstChildOfOL){
261 : // If OL is parent of this LI, serialize attributes in different manner.
262 0 : NS_ENSURE_TRUE(SerializeLIValueAttribute(aElement, aStr), NS_ERROR_OUT_OF_MEMORY);
263 : }
264 : }
265 :
266 : // Even LI passed above have to go through this
267 : // for serializing attributes other than "value".
268 0 : nsAutoString dummyPrefix;
269 0 : NS_ENSURE_TRUE(SerializeHTMLAttributes(content,
270 : aOriginalElement,
271 : dummyPrefix,
272 : EmptyString(),
273 : name,
274 : ns,
275 : aStr), NS_ERROR_OUT_OF_MEMORY);
276 :
277 0 : NS_ENSURE_TRUE(AppendToString(kGreaterThan, aStr), NS_ERROR_OUT_OF_MEMORY);
278 :
279 0 : if (ns == kNameSpaceID_XHTML &&
280 0 : (name == nsGkAtoms::script ||
281 0 : name == nsGkAtoms::style ||
282 0 : name == nsGkAtoms::noscript ||
283 0 : name == nsGkAtoms::noframes)) {
284 0 : ++mDisableEntityEncoding;
285 : }
286 :
287 0 : if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel() &&
288 0 : LineBreakAfterOpen(ns, name)) {
289 0 : NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY);
290 : }
291 :
292 0 : NS_ENSURE_TRUE(AfterElementStart(content, aOriginalElement, aStr), NS_ERROR_OUT_OF_MEMORY);
293 :
294 0 : return NS_OK;
295 : }
296 :
297 : NS_IMETHODIMP
298 0 : nsHTMLContentSerializer::AppendElementEnd(Element* aElement,
299 : nsAString& aStr)
300 : {
301 0 : NS_ENSURE_ARG(aElement);
302 :
303 0 : nsIContent* content = aElement;
304 :
305 0 : nsIAtom *name = content->NodeInfo()->NameAtom();
306 0 : int32_t ns = content->GetNameSpaceID();
307 :
308 0 : if (ns == kNameSpaceID_XHTML &&
309 0 : (name == nsGkAtoms::script ||
310 0 : name == nsGkAtoms::style ||
311 0 : name == nsGkAtoms::noscript ||
312 0 : name == nsGkAtoms::noframes)) {
313 0 : --mDisableEntityEncoding;
314 : }
315 :
316 0 : bool forceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
317 0 : content->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty);
318 :
319 0 : if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
320 0 : DecrIndentation(name);
321 : }
322 :
323 0 : if (name == nsGkAtoms::script) {
324 0 : nsCOMPtr<nsIScriptElement> script = do_QueryInterface(aElement);
325 :
326 0 : if (ShouldMaintainPreLevel() && script && script->IsMalformed()) {
327 : // We're looking at a malformed script tag. This means that the end tag
328 : // was missing in the source. Imitate that here by not serializing the end
329 : // tag.
330 0 : --PreLevel();
331 0 : return NS_OK;
332 : }
333 : }
334 0 : else if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) {
335 0 : NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
336 : /* Though at this point we must always have an state to be deleted as all
337 : the OL opening tags are supposed to push an olState object to the stack*/
338 0 : if (!mOLStateStack.IsEmpty()) {
339 0 : mOLStateStack.RemoveElementAt(mOLStateStack.Length() -1);
340 : }
341 : }
342 :
343 0 : if (ns == kNameSpaceID_XHTML) {
344 0 : nsIParserService* parserService = nsContentUtils::GetParserService();
345 :
346 0 : if (parserService) {
347 : bool isContainer;
348 :
349 : parserService->
350 0 : IsContainer(parserService->HTMLCaseSensitiveAtomTagToId(name),
351 0 : isContainer);
352 0 : if (!isContainer) {
353 : // Keep this in sync with the cleanup at the end of this method.
354 0 : MOZ_ASSERT(name != nsGkAtoms::body);
355 0 : MaybeLeaveFromPreContent(content);
356 0 : return NS_OK;
357 : }
358 : }
359 : }
360 :
361 0 : if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
362 :
363 0 : bool lineBreakBeforeClose = LineBreakBeforeClose(ns, name);
364 :
365 0 : if (mColPos && lineBreakBeforeClose) {
366 0 : NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY);
367 : }
368 0 : if (!mColPos) {
369 0 : NS_ENSURE_TRUE(AppendIndentation(aStr), NS_ERROR_OUT_OF_MEMORY);
370 : }
371 0 : else if (mAddSpace) {
372 0 : bool result = AppendToString(char16_t(' '), aStr);
373 0 : mAddSpace = false;
374 0 : NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
375 : }
376 : }
377 0 : else if (mAddSpace) {
378 0 : bool result = AppendToString(char16_t(' '), aStr);
379 0 : mAddSpace = false;
380 0 : NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
381 : }
382 :
383 0 : NS_ENSURE_TRUE(AppendToString(kEndTag, aStr), NS_ERROR_OUT_OF_MEMORY);
384 0 : NS_ENSURE_TRUE(AppendToString(nsDependentAtomString(name), aStr), NS_ERROR_OUT_OF_MEMORY);
385 0 : NS_ENSURE_TRUE(AppendToString(kGreaterThan, aStr), NS_ERROR_OUT_OF_MEMORY);
386 :
387 : // Keep this cleanup in sync with the IsContainer() early return above.
388 0 : MaybeLeaveFromPreContent(content);
389 :
390 0 : if ((mDoFormat || forceFormat)&& !mDoRaw && !PreLevel()
391 0 : && LineBreakAfterClose(ns, name)) {
392 0 : NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY);
393 : }
394 : else {
395 0 : MaybeFlagNewlineForRootNode(aElement);
396 : }
397 :
398 0 : if (name == nsGkAtoms::body && ns == kNameSpaceID_XHTML) {
399 0 : --mInBody;
400 : }
401 :
402 0 : return NS_OK;
403 : }
404 :
405 : static const uint16_t kValNBSP = 160;
406 :
407 : #define _ 0
408 :
409 : // This table indexes into kEntityStrings[].
410 : static const uint8_t kEntities[] = {
411 : _, _, _, _, _, _, _, _, _, _,
412 : _, _, _, _, _, _, _, _, _, _,
413 : _, _, _, _, _, _, _, _, _, _,
414 : _, _, _, _, _, _, _, _, 2, _,
415 : _, _, _, _, _, _, _, _, _, _,
416 : _, _, _, _, _, _, _, _, _, _,
417 : 3, _, 4, _, _, _, _, _, _, _,
418 : _, _, _, _, _, _, _, _, _, _,
419 : _, _, _, _, _, _, _, _, _, _,
420 : _, _, _, _, _, _, _, _, _, _,
421 : _, _, _, _, _, _, _, _, _, _,
422 : _, _, _, _, _, _, _, _, _, _,
423 : _, _, _, _, _, _, _, _, _, _,
424 : _, _, _, _, _, _, _, _, _, _,
425 : _, _, _, _, _, _, _, _, _, _,
426 : _, _, _, _, _, _, _, _, _, _,
427 : 5
428 : };
429 :
430 : // This table indexes into kEntityStrings[].
431 : static const uint8_t kAttrEntities[] = {
432 : _, _, _, _, _, _, _, _, _, _,
433 : _, _, _, _, _, _, _, _, _, _,
434 : _, _, _, _, _, _, _, _, _, _,
435 : _, _, _, _, 1, _, _, _, 2, _,
436 : _, _, _, _, _, _, _, _, _, _,
437 : _, _, _, _, _, _, _, _, _, _,
438 : 3, _, 4, _, _, _, _, _, _, _,
439 : _, _, _, _, _, _, _, _, _, _,
440 : _, _, _, _, _, _, _, _, _, _,
441 : _, _, _, _, _, _, _, _, _, _,
442 : _, _, _, _, _, _, _, _, _, _,
443 : _, _, _, _, _, _, _, _, _, _,
444 : _, _, _, _, _, _, _, _, _, _,
445 : _, _, _, _, _, _, _, _, _, _,
446 : _, _, _, _, _, _, _, _, _, _,
447 : _, _, _, _, _, _, _, _, _, _,
448 : 5
449 : };
450 :
451 : #undef _
452 :
453 : static const char* const kEntityStrings[] = {
454 : /* 0 */ nullptr,
455 : /* 1 */ """,
456 : /* 2 */ "&",
457 : /* 3 */ "<",
458 : /* 4 */ ">",
459 : /* 5 */ " "
460 : };
461 :
462 0 : uint32_t FindNextBasicEntity(const nsAString& aStr,
463 : const uint32_t aLen,
464 : uint32_t aIndex,
465 : const uint8_t* aEntityTable,
466 : const char** aEntity)
467 : {
468 0 : for (; aIndex < aLen; ++aIndex) {
469 : // for each character in this chunk, check if it
470 : // needs to be replaced
471 0 : char16_t val = aStr[aIndex];
472 0 : if (val <= kValNBSP && aEntityTable[val]) {
473 0 : *aEntity = kEntityStrings[aEntityTable[val]];
474 0 : return aIndex;
475 : }
476 : }
477 0 : return aIndex;
478 : }
479 :
480 : bool
481 0 : nsHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr,
482 : nsAString& aOutputStr)
483 : {
484 0 : if (mBodyOnly && !mInBody) {
485 0 : return true;
486 : }
487 :
488 0 : if (mDisableEntityEncoding) {
489 0 : return aOutputStr.Append(aStr, mozilla::fallible);
490 : }
491 :
492 : bool nonBasicEntities =
493 0 : !!(mFlags & (nsIDocumentEncoder::OutputEncodeLatin1Entities |
494 : nsIDocumentEncoder::OutputEncodeHTMLEntities |
495 0 : nsIDocumentEncoder::OutputEncodeW3CEntities));
496 :
497 0 : if (!nonBasicEntities &&
498 0 : (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities))) {
499 0 : const uint8_t* entityTable = mInAttribute ? kAttrEntities : kEntities;
500 0 : uint32_t start = 0;
501 0 : const uint32_t len = aStr.Length();
502 0 : for (uint32_t i = 0; i < len; ++i) {
503 0 : const char* entity = nullptr;
504 0 : i = FindNextBasicEntity(aStr, len, i, entityTable, &entity);
505 0 : uint32_t normalTextLen = i - start;
506 0 : if (normalTextLen) {
507 0 : NS_ENSURE_TRUE(aOutputStr.Append(Substring(aStr, start, normalTextLen),
508 : mozilla::fallible), false);
509 : }
510 0 : if (entity) {
511 0 : NS_ENSURE_TRUE(aOutputStr.AppendASCII(entity, mozilla::fallible), false);
512 0 : start = i + 1;
513 : }
514 : }
515 0 : return true;
516 0 : } else if (nonBasicEntities) {
517 0 : nsIParserService* parserService = nsContentUtils::GetParserService();
518 :
519 0 : if (!parserService) {
520 0 : NS_ERROR("Can't get parser service");
521 0 : return true;
522 : }
523 :
524 0 : nsReadingIterator<char16_t> done_reading;
525 0 : aStr.EndReading(done_reading);
526 :
527 : // for each chunk of |aString|...
528 0 : uint32_t advanceLength = 0;
529 0 : nsReadingIterator<char16_t> iter;
530 :
531 0 : const uint8_t* entityTable = mInAttribute ? kAttrEntities : kEntities;
532 0 : nsAutoCString entityReplacement;
533 :
534 0 : for (aStr.BeginReading(iter);
535 : iter != done_reading;
536 0 : iter.advance(int32_t(advanceLength))) {
537 0 : uint32_t fragmentLength = done_reading - iter;
538 0 : uint32_t lengthReplaced = 0; // the number of UTF-16 codepoints
539 : // replaced by a particular entity
540 0 : const char16_t* c = iter.get();
541 0 : const char16_t* fragmentStart = c;
542 0 : const char16_t* fragmentEnd = c + fragmentLength;
543 0 : const char* entityText = nullptr;
544 0 : const char* fullConstEntityText = nullptr;
545 0 : char* fullEntityText = nullptr;
546 :
547 0 : advanceLength = 0;
548 : // for each character in this chunk, check if it
549 : // needs to be replaced
550 0 : for (; c < fragmentEnd; c++, advanceLength++) {
551 0 : char16_t val = *c;
552 0 : if (val <= kValNBSP && entityTable[val]) {
553 0 : fullConstEntityText = kEntityStrings[entityTable[val]];
554 0 : break;
555 0 : } else if (val > 127 &&
556 0 : ((val < 256 &&
557 0 : mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities) ||
558 0 : mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities)) {
559 0 : entityReplacement.Truncate();
560 0 : parserService->HTMLConvertUnicodeToEntity(val, entityReplacement);
561 :
562 0 : if (!entityReplacement.IsEmpty()) {
563 0 : entityText = entityReplacement.get();
564 0 : break;
565 : }
566 : }
567 0 : else if (val > 127 &&
568 0 : mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities &&
569 0 : mEntityConverter) {
570 0 : if (NS_IS_HIGH_SURROGATE(val) &&
571 0 : c + 1 < fragmentEnd &&
572 0 : NS_IS_LOW_SURROGATE(*(c + 1))) {
573 0 : uint32_t valUTF32 = SURROGATE_TO_UCS4(val, *(++c));
574 0 : if (NS_SUCCEEDED(mEntityConverter->ConvertUTF32ToEntity(valUTF32,
575 : nsIEntityConverter::entityW3C, &fullEntityText))) {
576 0 : lengthReplaced = 2;
577 0 : break;
578 : }
579 : else {
580 0 : advanceLength++;
581 0 : }
582 : }
583 0 : else if (NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val,
584 : nsIEntityConverter::entityW3C,
585 : &fullEntityText))) {
586 0 : lengthReplaced = 1;
587 0 : break;
588 : }
589 : }
590 : }
591 :
592 0 : bool result = aOutputStr.Append(fragmentStart, advanceLength, mozilla::fallible);
593 0 : if (entityText) {
594 0 : NS_ENSURE_TRUE(aOutputStr.Append(char16_t('&'), mozilla::fallible), false);
595 0 : NS_ENSURE_TRUE(AppendASCIItoUTF16(entityText, aOutputStr, mozilla::fallible), false);
596 0 : NS_ENSURE_TRUE(aOutputStr.Append(char16_t(';'), mozilla::fallible), false);
597 0 : advanceLength++;
598 : }
599 0 : else if (fullConstEntityText) {
600 0 : NS_ENSURE_TRUE(aOutputStr.AppendASCII(fullConstEntityText, mozilla::fallible), false);
601 0 : ++advanceLength;
602 : }
603 : // if it comes from nsIEntityConverter, it already has '&' and ';'
604 0 : else if (fullEntityText) {
605 0 : bool ok = AppendASCIItoUTF16(fullEntityText, aOutputStr, mozilla::fallible);
606 0 : free(fullEntityText);
607 0 : advanceLength += lengthReplaced;
608 0 : NS_ENSURE_TRUE(ok, false);
609 : }
610 0 : NS_ENSURE_TRUE(result, false);
611 : }
612 : } else {
613 0 : NS_ENSURE_TRUE(nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr), false);
614 : }
615 :
616 0 : return true;
617 : }
|