Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : /*
8 : * nsIContentSerializer implementation that can be used with an
9 : * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
10 : * (eg for copy/paste as plaintext).
11 : */
12 :
13 : #include "nsPlainTextSerializer.h"
14 : #include "nsLWBrkCIID.h"
15 : #include "nsIServiceManager.h"
16 : #include "nsGkAtoms.h"
17 : #include "nsNameSpaceManager.h"
18 : #include "nsTextFragment.h"
19 : #include "nsContentUtils.h"
20 : #include "nsReadableUtils.h"
21 : #include "nsUnicharUtils.h"
22 : #include "nsCRT.h"
23 : #include "mozilla/dom/Element.h"
24 : #include "mozilla/Preferences.h"
25 : #include "mozilla/BinarySearch.h"
26 : #include "nsComputedDOMStyle.h"
27 :
28 : namespace mozilla {
29 : class Encoding;
30 : }
31 :
32 : using namespace mozilla;
33 : using namespace mozilla::dom;
34 :
35 : #define PREF_STRUCTS "converter.html2txt.structs"
36 : #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
37 : #define PREF_ALWAYS_INCLUDE_RUBY "converter.html2txt.always_include_ruby"
38 :
39 : static const int32_t kTabSize=4;
40 : static const int32_t kIndentSizeHeaders = 2; /* Indention of h1, if
41 : mHeaderStrategy = 1 or = 2.
42 : Indention of other headers
43 : is derived from that.
44 : XXX center h1? */
45 : static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1,
46 : indent h(x+1) this many
47 : columns more than h(x) */
48 : static const int32_t kIndentSizeList = kTabSize;
49 : // Indention of non-first lines of ul and ol
50 : static const int32_t kIndentSizeDD = kTabSize; // Indention of <dd>
51 : static const char16_t kNBSP = 160;
52 : static const char16_t kSPACE = ' ';
53 :
54 : static int32_t HeaderLevel(nsIAtom* aTag);
55 : static int32_t GetUnicharWidth(char16_t ucs);
56 : static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n);
57 :
58 : // Someday may want to make this non-const:
59 : static const uint32_t TagStackSize = 500;
60 : static const uint32_t OLStackSize = 100;
61 :
62 : static bool gPreferenceInitialized = false;
63 : static bool gAlwaysIncludeRuby = false;
64 :
65 0 : NS_IMPL_CYCLE_COLLECTING_ADDREF(nsPlainTextSerializer)
66 0 : NS_IMPL_CYCLE_COLLECTING_RELEASE(nsPlainTextSerializer)
67 :
68 0 : NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsPlainTextSerializer)
69 0 : NS_INTERFACE_MAP_ENTRY(nsIContentSerializer)
70 0 : NS_INTERFACE_MAP_ENTRY(nsISupports)
71 0 : NS_INTERFACE_MAP_END
72 :
73 0 : NS_IMPL_CYCLE_COLLECTION(nsPlainTextSerializer,
74 : mElement)
75 :
76 : nsresult
77 0 : NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer)
78 : {
79 0 : RefPtr<nsPlainTextSerializer> it = new nsPlainTextSerializer();
80 0 : it.forget(aSerializer);
81 0 : return NS_OK;
82 : }
83 :
84 0 : nsPlainTextSerializer::nsPlainTextSerializer()
85 0 : : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant"
86 : {
87 :
88 0 : mOutputString = nullptr;
89 0 : mHeadLevel = 0;
90 0 : mAtFirstColumn = true;
91 0 : mIndent = 0;
92 0 : mCiteQuoteLevel = 0;
93 0 : mStructs = true; // will be read from prefs later
94 0 : mHeaderStrategy = 1 /*indent increasingly*/; // ditto
95 0 : mHasWrittenCiteBlockquote = false;
96 0 : mSpanLevel = 0;
97 0 : for (int32_t i = 0; i <= 6; i++) {
98 0 : mHeaderCounter[i] = 0;
99 : }
100 :
101 : // Line breaker
102 0 : mWrapColumn = 72; // XXX magic number, we expect someone to reset this
103 0 : mCurrentLineWidth = 0;
104 :
105 : // Flow
106 0 : mEmptyLines = 1; // The start of the document is an "empty line" in itself,
107 0 : mInWhitespace = false;
108 0 : mPreFormattedMail = false;
109 0 : mStartedOutput = false;
110 :
111 0 : mPreformattedBlockBoundary = false;
112 0 : mWithRubyAnnotation = false; // will be read from pref and flag later
113 :
114 : // initialize the tag stack to zero:
115 : // The stack only ever contains pointers to static atoms, so they don't
116 : // need refcounting.
117 0 : mTagStack = new nsIAtom*[TagStackSize];
118 0 : mTagStackIndex = 0;
119 0 : mIgnoreAboveIndex = (uint32_t)kNotFound;
120 :
121 : // initialize the OL stack, where numbers for ordered lists are kept
122 0 : mOLStack = new int32_t[OLStackSize];
123 0 : mOLStackIndex = 0;
124 :
125 0 : mULCount = 0;
126 :
127 0 : mIgnoredChildNodeLevel = 0;
128 :
129 0 : if (!gPreferenceInitialized) {
130 : Preferences::AddBoolVarCache(&gAlwaysIncludeRuby, PREF_ALWAYS_INCLUDE_RUBY,
131 0 : true);
132 0 : gPreferenceInitialized = true;
133 : }
134 0 : }
135 :
136 0 : nsPlainTextSerializer::~nsPlainTextSerializer()
137 : {
138 0 : delete[] mTagStack;
139 0 : delete[] mOLStack;
140 0 : NS_WARNING_ASSERTION(mHeadLevel == 0, "Wrong head level!");
141 0 : }
142 :
143 : NS_IMETHODIMP
144 0 : nsPlainTextSerializer::Init(uint32_t aFlags,
145 : uint32_t aWrapColumn,
146 : const Encoding* aEncoding,
147 : bool aIsCopying,
148 : bool aIsWholeDocument,
149 : bool* aNeedsPreformatScanning)
150 : {
151 : #ifdef DEBUG
152 : // Check if the major control flags are set correctly.
153 0 : if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
154 0 : NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
155 : "If you want format=flowed, you must combine it with "
156 : "nsIDocumentEncoder::OutputFormatted");
157 : }
158 :
159 0 : if (aFlags & nsIDocumentEncoder::OutputFormatted) {
160 0 : NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted),
161 : "Can't do formatted and preformatted output at the same time!");
162 : }
163 : #endif
164 :
165 0 : *aNeedsPreformatScanning = true;
166 0 : mFlags = aFlags;
167 0 : mWrapColumn = aWrapColumn;
168 :
169 : // Only create a linebreaker if we will handle wrapping.
170 0 : if (MayWrap() && MayBreakLines()) {
171 0 : mLineBreaker = nsContentUtils::LineBreaker();
172 : }
173 :
174 : // Set the line break character:
175 0 : if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
176 0 : && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
177 : // Windows
178 0 : mLineBreak.AssignLiteral("\r\n");
179 : }
180 0 : else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) {
181 : // Mac
182 0 : mLineBreak.Assign(char16_t('\r'));
183 : }
184 0 : else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) {
185 : // Unix/DOM
186 0 : mLineBreak.Assign(char16_t('\n'));
187 : }
188 : else {
189 : // Platform/default
190 0 : mLineBreak.AssignLiteral(NS_LINEBREAK);
191 : }
192 :
193 0 : mLineBreakDue = false;
194 0 : mFloatingLines = -1;
195 :
196 0 : mPreformattedBlockBoundary = false;
197 :
198 0 : if (mFlags & nsIDocumentEncoder::OutputFormatted) {
199 : // Get some prefs that controls how we do formatted output
200 0 : mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
201 :
202 0 : mHeaderStrategy =
203 0 : Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy);
204 : }
205 :
206 : // The pref is default inited to false in libpref, but we use true
207 : // as fallback value because we don't want to affect behavior in
208 : // other places which use this serializer currently.
209 0 : mWithRubyAnnotation =
210 0 : gAlwaysIncludeRuby ||
211 0 : (mFlags & nsIDocumentEncoder::OutputRubyAnnotation);
212 :
213 : // XXX We should let the caller decide whether to do this or not
214 0 : mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
215 :
216 0 : return NS_OK;
217 : }
218 :
219 : bool
220 0 : nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack)
221 : {
222 0 : uint32_t size = aStack.Length();
223 0 : if (size == 0) {
224 0 : return false;
225 : }
226 0 : return aStack.ElementAt(size-1);
227 : }
228 :
229 : void
230 0 : nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue)
231 : {
232 0 : uint32_t size = aStack.Length();
233 0 : if (size > 0) {
234 0 : aStack.ElementAt(size-1) = aValue;
235 : }
236 : else {
237 0 : NS_ERROR("There is no \"Last\" value");
238 : }
239 0 : }
240 :
241 : void
242 0 : nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue)
243 : {
244 0 : aStack.AppendElement(bool(aValue));
245 0 : }
246 :
247 : bool
248 0 : nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack)
249 : {
250 0 : bool returnValue = false;
251 0 : uint32_t size = aStack.Length();
252 0 : if (size > 0) {
253 0 : returnValue = aStack.ElementAt(size-1);
254 0 : aStack.RemoveElementAt(size-1);
255 : }
256 0 : return returnValue;
257 : }
258 :
259 : bool
260 0 : nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag)
261 : {
262 : // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set,
263 : // non-textual container element should be serialized as placeholder
264 : // character and its child nodes should be ignored. See bug 895239.
265 0 : if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) {
266 0 : return false;
267 : }
268 :
269 : return
270 0 : (aTag == nsGkAtoms::audio) ||
271 0 : (aTag == nsGkAtoms::canvas) ||
272 0 : (aTag == nsGkAtoms::iframe) ||
273 0 : (aTag == nsGkAtoms::meter) ||
274 0 : (aTag == nsGkAtoms::progress) ||
275 0 : (aTag == nsGkAtoms::object) ||
276 0 : (aTag == nsGkAtoms::svg) ||
277 0 : (aTag == nsGkAtoms::video);
278 : }
279 :
280 : bool
281 0 : nsPlainTextSerializer::IsIgnorableRubyAnnotation(nsIAtom* aTag)
282 : {
283 0 : if (mWithRubyAnnotation) {
284 0 : return false;
285 : }
286 :
287 : return
288 0 : aTag == nsGkAtoms::rp ||
289 0 : aTag == nsGkAtoms::rt ||
290 0 : aTag == nsGkAtoms::rtc;
291 : }
292 :
293 : NS_IMETHODIMP
294 0 : nsPlainTextSerializer::AppendText(nsIContent* aText,
295 : int32_t aStartOffset,
296 : int32_t aEndOffset,
297 : nsAString& aStr)
298 : {
299 0 : if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
300 0 : return NS_OK;
301 : }
302 :
303 0 : NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
304 0 : if ( aStartOffset < 0 )
305 0 : return NS_ERROR_INVALID_ARG;
306 :
307 0 : NS_ENSURE_ARG(aText);
308 :
309 0 : nsresult rv = NS_OK;
310 :
311 0 : nsIContent* content = aText;
312 : const nsTextFragment* frag;
313 0 : if (!content || !(frag = content->GetText())) {
314 0 : return NS_ERROR_FAILURE;
315 : }
316 :
317 0 : int32_t fragLength = frag->GetLength();
318 0 : int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
319 0 : NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!");
320 :
321 0 : int32_t length = endoffset - aStartOffset;
322 0 : if (length <= 0) {
323 0 : return NS_OK;
324 : }
325 :
326 0 : nsAutoString textstr;
327 0 : if (frag->Is2b()) {
328 0 : textstr.Assign(frag->Get2b() + aStartOffset, length);
329 : }
330 : else {
331 : // AssignASCII is for 7-bit character only, so don't use it
332 0 : const char *data = frag->Get1b();
333 0 : CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
334 : }
335 :
336 0 : mOutputString = &aStr;
337 :
338 : // We have to split the string across newlines
339 : // to match parser behavior
340 0 : int32_t start = 0;
341 0 : int32_t offset = textstr.FindCharInSet("\n\r");
342 0 : while (offset != kNotFound) {
343 :
344 0 : if (offset>start) {
345 : // Pass in the line
346 : DoAddText(false,
347 0 : Substring(textstr, start, offset-start));
348 : }
349 :
350 : // Pass in a newline
351 0 : DoAddText(true, mLineBreak);
352 :
353 0 : start = offset+1;
354 0 : offset = textstr.FindCharInSet("\n\r", start);
355 : }
356 :
357 : // Consume the last bit of the string if there's any left
358 0 : if (start < length) {
359 0 : if (start) {
360 0 : DoAddText(false, Substring(textstr, start, length - start));
361 : }
362 : else {
363 0 : DoAddText(false, textstr);
364 : }
365 : }
366 :
367 0 : mOutputString = nullptr;
368 :
369 0 : return rv;
370 : }
371 :
372 : NS_IMETHODIMP
373 0 : nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
374 : int32_t aStartOffset,
375 : int32_t aEndOffset,
376 : nsAString& aStr)
377 : {
378 0 : return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr);
379 : }
380 :
381 : NS_IMETHODIMP
382 0 : nsPlainTextSerializer::ScanElementForPreformat(Element* aElement)
383 : {
384 0 : mPreformatStack.push(IsElementPreformatted(aElement));
385 0 : return NS_OK;
386 : }
387 :
388 : NS_IMETHODIMP
389 0 : nsPlainTextSerializer::ForgetElementForPreformat(Element* aElement)
390 : {
391 0 : mPreformatStack.pop();
392 0 : return NS_OK;
393 : }
394 :
395 : NS_IMETHODIMP
396 0 : nsPlainTextSerializer::AppendElementStart(Element* aElement,
397 : Element* aOriginalElement,
398 : nsAString& aStr)
399 : {
400 0 : NS_ENSURE_ARG(aElement);
401 :
402 0 : mElement = aElement;
403 :
404 : nsresult rv;
405 0 : nsIAtom* id = GetIdForContent(mElement);
406 :
407 0 : bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
408 :
409 0 : mOutputString = &aStr;
410 :
411 0 : if (isContainer) {
412 0 : rv = DoOpenContainer(id);
413 : }
414 : else {
415 0 : rv = DoAddLeaf(id);
416 : }
417 :
418 0 : mElement = nullptr;
419 0 : mOutputString = nullptr;
420 :
421 0 : if (id == nsGkAtoms::head) {
422 0 : ++mHeadLevel;
423 : }
424 :
425 0 : return rv;
426 : }
427 :
428 : NS_IMETHODIMP
429 0 : nsPlainTextSerializer::AppendElementEnd(Element* aElement,
430 : nsAString& aStr)
431 : {
432 0 : NS_ENSURE_ARG(aElement);
433 :
434 0 : mElement = aElement;
435 :
436 : nsresult rv;
437 0 : nsIAtom* id = GetIdForContent(mElement);
438 :
439 0 : bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
440 :
441 0 : mOutputString = &aStr;
442 :
443 0 : rv = NS_OK;
444 0 : if (isContainer) {
445 0 : rv = DoCloseContainer(id);
446 : }
447 :
448 0 : mElement = nullptr;
449 0 : mOutputString = nullptr;
450 :
451 0 : if (id == nsGkAtoms::head) {
452 0 : NS_ASSERTION(mHeadLevel != 0,
453 : "mHeadLevel being decremented below 0");
454 0 : --mHeadLevel;
455 : }
456 :
457 0 : return rv;
458 : }
459 :
460 : NS_IMETHODIMP
461 0 : nsPlainTextSerializer::Flush(nsAString& aStr)
462 : {
463 0 : mOutputString = &aStr;
464 0 : FlushLine();
465 0 : mOutputString = nullptr;
466 0 : return NS_OK;
467 : }
468 :
469 : NS_IMETHODIMP
470 0 : nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument,
471 : nsAString& aStr)
472 : {
473 0 : return NS_OK;
474 : }
475 :
476 : nsresult
477 0 : nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag)
478 : {
479 : // Check if we need output current node as placeholder character and ignore
480 : // child nodes.
481 0 : if (ShouldReplaceContainerWithPlaceholder(mElement->NodeInfo()->NameAtom())) {
482 0 : if (mIgnoredChildNodeLevel == 0) {
483 : // Serialize current node as placeholder character
484 0 : Write(NS_LITERAL_STRING(u"\xFFFC"));
485 : }
486 : // Ignore child nodes.
487 0 : mIgnoredChildNodeLevel++;
488 0 : return NS_OK;
489 : }
490 0 : if (IsIgnorableRubyAnnotation(aTag)) {
491 : // Ignorable ruby annotation shouldn't be replaced by a placeholder
492 : // character, neither any of its descendants.
493 0 : mIgnoredChildNodeLevel++;
494 0 : return NS_OK;
495 : }
496 :
497 0 : if (mFlags & nsIDocumentEncoder::OutputForPlainTextClipboardCopy) {
498 0 : if (mPreformattedBlockBoundary && DoOutput()) {
499 : // Should always end a line, but get no more whitespace
500 0 : if (mFloatingLines < 0)
501 0 : mFloatingLines = 0;
502 0 : mLineBreakDue = true;
503 : }
504 0 : mPreformattedBlockBoundary = false;
505 : }
506 :
507 0 : if (mFlags & nsIDocumentEncoder::OutputRaw) {
508 : // Raw means raw. Don't even think about doing anything fancy
509 : // here like indenting, adding line breaks or any other
510 : // characters such as list item bullets, quote characters
511 : // around <q>, etc. I mean it! Don't make me smack you!
512 :
513 0 : return NS_OK;
514 : }
515 :
516 0 : if (mTagStackIndex < TagStackSize) {
517 0 : mTagStack[mTagStackIndex++] = aTag;
518 : }
519 :
520 0 : if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
521 0 : return NS_OK;
522 : }
523 :
524 : // Reset this so that <blockquote type=cite> doesn't affect the whitespace
525 : // above random <pre>s below it.
526 0 : mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote &&
527 0 : aTag == nsGkAtoms::pre;
528 :
529 0 : bool isInCiteBlockquote = false;
530 :
531 : // XXX special-case <blockquote type=cite> so that we don't add additional
532 : // newlines before the text.
533 0 : if (aTag == nsGkAtoms::blockquote) {
534 0 : nsAutoString value;
535 0 : nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
536 0 : isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
537 : }
538 :
539 0 : if (mLineBreakDue && !isInCiteBlockquote)
540 0 : EnsureVerticalSpace(mFloatingLines);
541 :
542 : // Check if this tag's content that should not be output
543 0 : if ((aTag == nsGkAtoms::noscript &&
544 0 : !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
545 0 : ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
546 0 : !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
547 : // Ignore everything that follows the current tag in
548 : // question until a matching end tag is encountered.
549 0 : mIgnoreAboveIndex = mTagStackIndex - 1;
550 0 : return NS_OK;
551 : }
552 :
553 0 : if (aTag == nsGkAtoms::body) {
554 : // Try to figure out here whether we have a
555 : // preformatted style attribute set by Thunderbird.
556 : //
557 : // Trigger on the presence of a "pre-wrap" in the
558 : // style attribute. That's a very simplistic way to do
559 : // it, but better than nothing.
560 : // Also set mWrapColumn to the value given there
561 : // (which arguably we should only do if told to do so).
562 0 : nsAutoString style;
563 : int32_t whitespace;
564 0 : if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
565 : (kNotFound != (whitespace = style.Find("white-space:")))) {
566 :
567 0 : if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
568 : #ifdef DEBUG_preformatted
569 : printf("Set mPreFormattedMail based on style pre-wrap\n");
570 : #endif
571 0 : mPreFormattedMail = true;
572 0 : int32_t widthOffset = style.Find("width:");
573 0 : if (widthOffset >= 0) {
574 : // We have to search for the ch before the semicolon,
575 : // not for the semicolon itself, because nsString::ToInteger()
576 : // considers 'c' to be a valid numeric char (even if radix=10)
577 : // but then gets confused if it sees it next to the number
578 : // when the radix specified was 10, and returns an error code.
579 0 : int32_t semiOffset = style.Find("ch", false, widthOffset+6);
580 0 : int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6
581 0 : : style.Length() - widthOffset);
582 0 : nsAutoString widthstr;
583 0 : style.Mid(widthstr, widthOffset+6, length);
584 : nsresult err;
585 0 : int32_t col = widthstr.ToInteger(&err);
586 :
587 0 : if (NS_SUCCEEDED(err)) {
588 0 : mWrapColumn = (uint32_t)col;
589 : #ifdef DEBUG_preformatted
590 : printf("Set wrap column to %d based on style\n", mWrapColumn);
591 : #endif
592 : }
593 : }
594 : }
595 0 : else if (kNotFound != style.Find("pre", true, whitespace)) {
596 : #ifdef DEBUG_preformatted
597 : printf("Set mPreFormattedMail based on style pre\n");
598 : #endif
599 0 : mPreFormattedMail = true;
600 0 : mWrapColumn = 0;
601 : }
602 : }
603 : else {
604 : /* See comment at end of function. */
605 0 : mInWhitespace = true;
606 0 : mPreFormattedMail = false;
607 : }
608 :
609 0 : return NS_OK;
610 : }
611 :
612 : // Keep this in sync with DoCloseContainer!
613 0 : if (!DoOutput()) {
614 0 : return NS_OK;
615 : }
616 :
617 0 : if (aTag == nsGkAtoms::p)
618 0 : EnsureVerticalSpace(1);
619 0 : else if (aTag == nsGkAtoms::pre) {
620 0 : if (GetLastBool(mIsInCiteBlockquote))
621 0 : EnsureVerticalSpace(0);
622 0 : else if (mHasWrittenCiteBlockquote) {
623 0 : EnsureVerticalSpace(0);
624 0 : mHasWrittenCiteBlockquote = false;
625 : }
626 : else
627 0 : EnsureVerticalSpace(1);
628 : }
629 0 : else if (aTag == nsGkAtoms::tr) {
630 0 : PushBool(mHasWrittenCellsForRow, false);
631 : }
632 0 : else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
633 : // We must make sure that the content of two table cells get a
634 : // space between them.
635 :
636 : // To make the separation between cells most obvious and
637 : // importable, we use a TAB.
638 0 : if (GetLastBool(mHasWrittenCellsForRow)) {
639 : // Bypass |Write| so that the TAB isn't compressed away.
640 0 : AddToLine(u"\t", 1);
641 0 : mInWhitespace = true;
642 : }
643 0 : else if (mHasWrittenCellsForRow.IsEmpty()) {
644 : // We don't always see a <tr> (nor a <table>) before the <td> if we're
645 : // copying part of a table
646 0 : PushBool(mHasWrittenCellsForRow, true); // will never be popped
647 : }
648 : else {
649 0 : SetLastBool(mHasWrittenCellsForRow, true);
650 : }
651 : }
652 0 : else if (aTag == nsGkAtoms::ul) {
653 : // Indent here to support nested lists, which aren't included in li :-(
654 0 : EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
655 : // Must end the current line before we change indention
656 0 : mIndent += kIndentSizeList;
657 0 : mULCount++;
658 : }
659 0 : else if (aTag == nsGkAtoms::ol) {
660 0 : EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
661 0 : if (mFlags & nsIDocumentEncoder::OutputFormatted) {
662 : // Must end the current line before we change indention
663 0 : if (mOLStackIndex < OLStackSize) {
664 0 : nsAutoString startAttr;
665 0 : int32_t startVal = 1;
666 0 : if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
667 0 : nsresult rv = NS_OK;
668 0 : startVal = startAttr.ToInteger(&rv);
669 0 : if (NS_FAILED(rv))
670 0 : startVal = 1;
671 : }
672 0 : mOLStack[mOLStackIndex++] = startVal;
673 : }
674 : } else {
675 0 : mOLStackIndex++;
676 : }
677 0 : mIndent += kIndentSizeList; // see ul
678 : }
679 0 : else if (aTag == nsGkAtoms::li &&
680 0 : (mFlags & nsIDocumentEncoder::OutputFormatted)) {
681 0 : if (mTagStackIndex > 1 && IsInOL()) {
682 0 : if (mOLStackIndex > 0) {
683 0 : nsAutoString valueAttr;
684 0 : if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
685 0 : nsresult rv = NS_OK;
686 0 : int32_t valueAttrVal = valueAttr.ToInteger(&rv);
687 0 : if (NS_SUCCEEDED(rv))
688 0 : mOLStack[mOLStackIndex-1] = valueAttrVal;
689 : }
690 : // This is what nsBulletFrame does for OLs:
691 0 : mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
692 : }
693 : else {
694 0 : mInIndentString.Append(char16_t('#'));
695 : }
696 :
697 0 : mInIndentString.Append(char16_t('.'));
698 :
699 : }
700 : else {
701 : static char bulletCharArray[] = "*o+#";
702 0 : uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
703 0 : char bulletChar = bulletCharArray[index % 4];
704 0 : mInIndentString.Append(char16_t(bulletChar));
705 : }
706 :
707 0 : mInIndentString.Append(char16_t(' '));
708 : }
709 0 : else if (aTag == nsGkAtoms::dl) {
710 0 : EnsureVerticalSpace(1);
711 : }
712 0 : else if (aTag == nsGkAtoms::dt) {
713 0 : EnsureVerticalSpace(0);
714 : }
715 0 : else if (aTag == nsGkAtoms::dd) {
716 0 : EnsureVerticalSpace(0);
717 0 : mIndent += kIndentSizeDD;
718 : }
719 0 : else if (aTag == nsGkAtoms::span) {
720 0 : ++mSpanLevel;
721 : }
722 0 : else if (aTag == nsGkAtoms::blockquote) {
723 : // Push
724 0 : PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
725 0 : if (isInCiteBlockquote) {
726 0 : EnsureVerticalSpace(0);
727 0 : mCiteQuoteLevel++;
728 : }
729 : else {
730 0 : EnsureVerticalSpace(1);
731 0 : mIndent += kTabSize; // Check for some maximum value?
732 : }
733 : }
734 0 : else if (aTag == nsGkAtoms::q) {
735 0 : Write(NS_LITERAL_STRING("\""));
736 : }
737 :
738 : // Else make sure we'll separate block level tags,
739 : // even if we're about to leave, before doing any other formatting.
740 0 : else if (IsElementBlock(mElement)) {
741 0 : EnsureVerticalSpace(0);
742 : }
743 :
744 : //////////////////////////////////////////////////////////////
745 0 : if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
746 0 : return NS_OK;
747 : }
748 : //////////////////////////////////////////////////////////////
749 : // The rest of this routine is formatted output stuff,
750 : // which we should skip if we're not formatted:
751 : //////////////////////////////////////////////////////////////
752 :
753 : // Push on stack
754 0 : bool currentNodeIsConverted = IsCurrentNodeConverted();
755 :
756 0 : if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
757 0 : aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
758 0 : aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6)
759 : {
760 0 : EnsureVerticalSpace(2);
761 0 : if (mHeaderStrategy == 2) { // numbered
762 0 : mIndent += kIndentSizeHeaders;
763 : // Caching
764 0 : int32_t level = HeaderLevel(aTag);
765 : // Increase counter for current level
766 0 : mHeaderCounter[level]++;
767 : // Reset all lower levels
768 : int32_t i;
769 :
770 0 : for (i = level + 1; i <= 6; i++) {
771 0 : mHeaderCounter[i] = 0;
772 : }
773 :
774 : // Construct numbers
775 0 : nsAutoString leadup;
776 0 : for (i = 1; i <= level; i++) {
777 0 : leadup.AppendInt(mHeaderCounter[i]);
778 0 : leadup.Append(char16_t('.'));
779 : }
780 0 : leadup.Append(char16_t(' '));
781 0 : Write(leadup);
782 : }
783 0 : else if (mHeaderStrategy == 1) { // indent increasingly
784 0 : mIndent += kIndentSizeHeaders;
785 0 : for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
786 : // for h(x), run x-1 times
787 0 : mIndent += kIndentIncrementHeaders;
788 : }
789 0 : }
790 : }
791 0 : else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
792 0 : nsAutoString url;
793 0 : if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url))
794 0 : && !url.IsEmpty()) {
795 0 : mURL = url;
796 0 : }
797 : }
798 0 : else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) {
799 0 : Write(NS_LITERAL_STRING("^"));
800 : }
801 0 : else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) {
802 0 : Write(NS_LITERAL_STRING("_"));
803 : }
804 0 : else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
805 0 : Write(NS_LITERAL_STRING("|"));
806 : }
807 0 : else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
808 0 : && mStructs && !currentNodeIsConverted) {
809 0 : Write(NS_LITERAL_STRING("*"));
810 : }
811 0 : else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
812 0 : && mStructs && !currentNodeIsConverted) {
813 0 : Write(NS_LITERAL_STRING("/"));
814 : }
815 0 : else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
816 0 : Write(NS_LITERAL_STRING("_"));
817 : }
818 :
819 : /* Container elements are always block elements, so we shouldn't
820 : output any whitespace immediately after the container tag even if
821 : there's extra whitespace there because the HTML is pretty-printed
822 : or something. To ensure that happens, tell the serializer we're
823 : already in whitespace so it won't output more. */
824 0 : mInWhitespace = true;
825 :
826 0 : return NS_OK;
827 : }
828 :
829 : nsresult
830 0 : nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag)
831 : {
832 0 : if (ShouldReplaceContainerWithPlaceholder(mElement->NodeInfo()->NameAtom())) {
833 0 : mIgnoredChildNodeLevel--;
834 0 : return NS_OK;
835 : }
836 0 : if (IsIgnorableRubyAnnotation(aTag)) {
837 0 : mIgnoredChildNodeLevel--;
838 0 : return NS_OK;
839 : }
840 :
841 0 : if (mFlags & nsIDocumentEncoder::OutputForPlainTextClipboardCopy) {
842 0 : if (DoOutput() && IsInPre() && IsElementBlock(mElement)) {
843 : // If we're closing a preformatted block element, output a line break
844 : // when we find a new container.
845 0 : mPreformattedBlockBoundary = true;
846 : }
847 : }
848 :
849 0 : if (mFlags & nsIDocumentEncoder::OutputRaw) {
850 : // Raw means raw. Don't even think about doing anything fancy
851 : // here like indenting, adding line breaks or any other
852 : // characters such as list item bullets, quote characters
853 : // around <q>, etc. I mean it! Don't make me smack you!
854 :
855 0 : return NS_OK;
856 : }
857 :
858 0 : if (mTagStackIndex > 0) {
859 0 : --mTagStackIndex;
860 : }
861 :
862 0 : if (mTagStackIndex >= mIgnoreAboveIndex) {
863 0 : if (mTagStackIndex == mIgnoreAboveIndex) {
864 : // We're dealing with the close tag whose matching
865 : // open tag had set the mIgnoreAboveIndex value.
866 : // Reset mIgnoreAboveIndex before discarding this tag.
867 0 : mIgnoreAboveIndex = (uint32_t)kNotFound;
868 : }
869 0 : return NS_OK;
870 : }
871 :
872 : // End current line if we're ending a block level tag
873 0 : if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
874 : // We want the output to end with a new line,
875 : // but in preformatted areas like text fields,
876 : // we can't emit newlines that weren't there.
877 : // So add the newline only in the case of formatted output.
878 0 : if (mFlags & nsIDocumentEncoder::OutputFormatted) {
879 0 : EnsureVerticalSpace(0);
880 : }
881 : else {
882 0 : FlushLine();
883 : }
884 : // We won't want to do anything with these in formatted mode either,
885 : // so just return now:
886 0 : return NS_OK;
887 : }
888 :
889 : // Keep this in sync with DoOpenContainer!
890 0 : if (!DoOutput()) {
891 0 : return NS_OK;
892 : }
893 :
894 0 : if (aTag == nsGkAtoms::tr) {
895 0 : PopBool(mHasWrittenCellsForRow);
896 : // Should always end a line, but get no more whitespace
897 0 : if (mFloatingLines < 0)
898 0 : mFloatingLines = 0;
899 0 : mLineBreakDue = true;
900 : }
901 0 : else if (((aTag == nsGkAtoms::li) ||
902 0 : (aTag == nsGkAtoms::dt)) &&
903 0 : (mFlags & nsIDocumentEncoder::OutputFormatted)) {
904 : // Items that should always end a line, but get no more whitespace
905 0 : if (mFloatingLines < 0)
906 0 : mFloatingLines = 0;
907 0 : mLineBreakDue = true;
908 : }
909 0 : else if (aTag == nsGkAtoms::pre) {
910 0 : mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
911 0 : mLineBreakDue = true;
912 : }
913 0 : else if (aTag == nsGkAtoms::ul) {
914 0 : FlushLine();
915 0 : mIndent -= kIndentSizeList;
916 0 : if (--mULCount + mOLStackIndex == 0) {
917 0 : mFloatingLines = 1;
918 0 : mLineBreakDue = true;
919 : }
920 : }
921 0 : else if (aTag == nsGkAtoms::ol) {
922 0 : FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
923 0 : mIndent -= kIndentSizeList;
924 0 : NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!");
925 0 : mOLStackIndex--;
926 0 : if (mULCount + mOLStackIndex == 0) {
927 0 : mFloatingLines = 1;
928 0 : mLineBreakDue = true;
929 : }
930 : }
931 0 : else if (aTag == nsGkAtoms::dl) {
932 0 : mFloatingLines = 1;
933 0 : mLineBreakDue = true;
934 : }
935 0 : else if (aTag == nsGkAtoms::dd) {
936 0 : FlushLine();
937 0 : mIndent -= kIndentSizeDD;
938 : }
939 0 : else if (aTag == nsGkAtoms::span) {
940 0 : NS_ASSERTION(mSpanLevel, "Span level will be negative!");
941 0 : --mSpanLevel;
942 : }
943 0 : else if (aTag == nsGkAtoms::div) {
944 0 : if (mFloatingLines < 0)
945 0 : mFloatingLines = 0;
946 0 : mLineBreakDue = true;
947 : }
948 0 : else if (aTag == nsGkAtoms::blockquote) {
949 0 : FlushLine(); // Is this needed?
950 :
951 : // Pop
952 0 : bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
953 :
954 0 : if (isInCiteBlockquote) {
955 0 : NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!");
956 0 : mCiteQuoteLevel--;
957 0 : mFloatingLines = 0;
958 0 : mHasWrittenCiteBlockquote = true;
959 : }
960 : else {
961 0 : mIndent -= kTabSize;
962 0 : mFloatingLines = 1;
963 : }
964 0 : mLineBreakDue = true;
965 : }
966 0 : else if (aTag == nsGkAtoms::q) {
967 0 : Write(NS_LITERAL_STRING("\""));
968 : }
969 0 : else if (IsElementBlock(mElement) && aTag != nsGkAtoms::script) {
970 : // All other blocks get 1 vertical space after them
971 : // in formatted mode, otherwise 0.
972 : // This is hard. Sometimes 0 is a better number, but
973 : // how to know?
974 0 : if (mFlags & nsIDocumentEncoder::OutputFormatted)
975 0 : EnsureVerticalSpace(1);
976 : else {
977 0 : if (mFloatingLines < 0)
978 0 : mFloatingLines = 0;
979 0 : mLineBreakDue = true;
980 : }
981 : }
982 :
983 : //////////////////////////////////////////////////////////////
984 0 : if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
985 0 : return NS_OK;
986 : }
987 : //////////////////////////////////////////////////////////////
988 : // The rest of this routine is formatted output stuff,
989 : // which we should skip if we're not formatted:
990 : //////////////////////////////////////////////////////////////
991 :
992 : // Pop the currentConverted stack
993 0 : bool currentNodeIsConverted = IsCurrentNodeConverted();
994 :
995 0 : if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
996 0 : aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
997 0 : aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
998 :
999 0 : if (mHeaderStrategy) { /*numbered or indent increasingly*/
1000 0 : mIndent -= kIndentSizeHeaders;
1001 : }
1002 0 : if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
1003 0 : for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
1004 : // for h(x), run x-1 times
1005 0 : mIndent -= kIndentIncrementHeaders;
1006 : }
1007 : }
1008 0 : EnsureVerticalSpace(1);
1009 : }
1010 0 : else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) {
1011 0 : nsAutoString temp;
1012 0 : temp.AssignLiteral(" <");
1013 0 : temp += mURL;
1014 0 : temp.Append(char16_t('>'));
1015 0 : Write(temp);
1016 0 : mURL.Truncate();
1017 : }
1018 0 : else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub)
1019 0 : && mStructs && !currentNodeIsConverted) {
1020 0 : Write(kSpace);
1021 : }
1022 0 : else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
1023 0 : Write(NS_LITERAL_STRING("|"));
1024 : }
1025 0 : else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
1026 0 : && mStructs && !currentNodeIsConverted) {
1027 0 : Write(NS_LITERAL_STRING("*"));
1028 : }
1029 0 : else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
1030 0 : && mStructs && !currentNodeIsConverted) {
1031 0 : Write(NS_LITERAL_STRING("/"));
1032 : }
1033 0 : else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
1034 0 : Write(NS_LITERAL_STRING("_"));
1035 : }
1036 :
1037 0 : return NS_OK;
1038 : }
1039 :
1040 : bool
1041 0 : nsPlainTextSerializer::MustSuppressLeaf()
1042 : {
1043 0 : if (mIgnoredChildNodeLevel > 0) {
1044 0 : return true;
1045 : }
1046 :
1047 0 : if ((mTagStackIndex > 1 &&
1048 0 : mTagStack[mTagStackIndex-2] == nsGkAtoms::select) ||
1049 0 : (mTagStackIndex > 0 &&
1050 0 : mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) {
1051 : // Don't output the contents of SELECT elements;
1052 : // Might be nice, eventually, to output just the selected element.
1053 : // Read more in bug 31994.
1054 0 : return true;
1055 : }
1056 :
1057 0 : if (mTagStackIndex > 0 &&
1058 0 : (mTagStack[mTagStackIndex-1] == nsGkAtoms::script ||
1059 0 : mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) {
1060 : // Don't output the contents of <script> or <style> tags;
1061 0 : return true;
1062 : }
1063 :
1064 0 : return false;
1065 : }
1066 :
1067 : void
1068 0 : nsPlainTextSerializer::DoAddText(bool aIsLineBreak, const nsAString& aText)
1069 : {
1070 : // If we don't want any output, just return
1071 0 : if (!DoOutput()) {
1072 0 : return;
1073 : }
1074 :
1075 0 : if (!aIsLineBreak) {
1076 : // Make sure to reset this, since it's no longer true.
1077 0 : mHasWrittenCiteBlockquote = false;
1078 : }
1079 :
1080 0 : if (mLineBreakDue)
1081 0 : EnsureVerticalSpace(mFloatingLines);
1082 :
1083 0 : if (MustSuppressLeaf()) {
1084 0 : return;
1085 : }
1086 :
1087 0 : if (aIsLineBreak) {
1088 : // The only times we want to pass along whitespace from the original
1089 : // html source are if we're forced into preformatted mode via flags,
1090 : // or if we're prettyprinting and we're inside a <pre>.
1091 : // Otherwise, either we're collapsing to minimal text, or we're
1092 : // prettyprinting to mimic the html format, and in neither case
1093 : // does the formatting of the html source help us.
1094 0 : if ((mFlags & nsIDocumentEncoder::OutputPreformatted) ||
1095 0 : (mPreFormattedMail && !mWrapColumn) ||
1096 0 : IsInPre()) {
1097 0 : EnsureVerticalSpace(mEmptyLines+1);
1098 : }
1099 0 : else if (!mInWhitespace) {
1100 0 : Write(kSpace);
1101 0 : mInWhitespace = true;
1102 : }
1103 0 : return;
1104 : }
1105 :
1106 : /* Check, if we are in a link (symbolized with mURL containing the URL)
1107 : and the text is equal to the URL. In that case we don't want to output
1108 : the URL twice so we scrap the text in mURL. */
1109 0 : if (!mURL.IsEmpty() && mURL.Equals(aText)) {
1110 0 : mURL.Truncate();
1111 : }
1112 0 : Write(aText);
1113 : }
1114 :
1115 : nsresult
1116 0 : nsPlainTextSerializer::DoAddLeaf(nsIAtom* aTag)
1117 : {
1118 0 : mPreformattedBlockBoundary = false;
1119 :
1120 : // If we don't want any output, just return
1121 0 : if (!DoOutput()) {
1122 0 : return NS_OK;
1123 : }
1124 :
1125 0 : if (mLineBreakDue)
1126 0 : EnsureVerticalSpace(mFloatingLines);
1127 :
1128 0 : if (MustSuppressLeaf()) {
1129 0 : return NS_OK;
1130 : }
1131 :
1132 0 : if (aTag == nsGkAtoms::br) {
1133 : // Another egregious editor workaround, see bug 38194:
1134 : // ignore the bogus br tags that the editor sticks here and there.
1135 0 : nsAutoString tagAttr;
1136 0 : if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr))
1137 0 : || !tagAttr.EqualsLiteral("_moz")) {
1138 0 : EnsureVerticalSpace(mEmptyLines+1);
1139 : }
1140 : }
1141 0 : else if (aTag == nsGkAtoms::hr &&
1142 0 : (mFlags & nsIDocumentEncoder::OutputFormatted)) {
1143 0 : EnsureVerticalSpace(0);
1144 :
1145 : // Make a line of dashes as wide as the wrap width
1146 : // XXX honoring percentage would be nice
1147 0 : nsAutoString line;
1148 0 : uint32_t width = (mWrapColumn > 0 ? mWrapColumn : 25);
1149 0 : while (line.Length() < width) {
1150 0 : line.Append(char16_t('-'));
1151 : }
1152 0 : Write(line);
1153 :
1154 0 : EnsureVerticalSpace(0);
1155 : }
1156 0 : else if (mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder) {
1157 0 : Write(NS_LITERAL_STRING(u"\xFFFC"));
1158 : }
1159 0 : else if (aTag == nsGkAtoms::img) {
1160 : /* Output (in decreasing order of preference)
1161 : alt, title or nothing */
1162 : // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
1163 0 : nsAutoString imageDescription;
1164 0 : if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt,
1165 : imageDescription))) {
1166 : // If the alt attribute has an empty value (|alt=""|), output nothing
1167 : }
1168 0 : else if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::title,
1169 : imageDescription))
1170 0 : && !imageDescription.IsEmpty()) {
1171 0 : imageDescription = NS_LITERAL_STRING(" [") +
1172 0 : imageDescription +
1173 0 : NS_LITERAL_STRING("] ");
1174 : }
1175 :
1176 0 : Write(imageDescription);
1177 : }
1178 :
1179 0 : return NS_OK;
1180 : }
1181 :
1182 : /**
1183 : * Adds as many newline as necessary to get |noOfRows| empty lines
1184 : *
1185 : * noOfRows = -1 : Being in the middle of some line of text
1186 : * noOfRows = 0 : Being at the start of a line
1187 : * noOfRows = n>0 : Having n empty lines before the current line.
1188 : */
1189 : void
1190 0 : nsPlainTextSerializer::EnsureVerticalSpace(int32_t noOfRows)
1191 : {
1192 : // If we have something in the indent we probably want to output
1193 : // it and it's not included in the count for empty lines so we don't
1194 : // realize that we should start a new line.
1195 0 : if (noOfRows >= 0 && !mInIndentString.IsEmpty()) {
1196 0 : EndLine(false);
1197 0 : mInWhitespace = true;
1198 : }
1199 :
1200 0 : while(mEmptyLines < noOfRows) {
1201 0 : EndLine(false);
1202 0 : mInWhitespace = true;
1203 : }
1204 0 : mLineBreakDue = false;
1205 0 : mFloatingLines = -1;
1206 0 : }
1207 :
1208 : /**
1209 : * This empties the current line cache without adding a NEWLINE.
1210 : * Should not be used if line wrapping is of importance since
1211 : * this function destroys the cache information.
1212 : *
1213 : * It will also write indentation and quotes if we believe us to be
1214 : * at the start of the line.
1215 : */
1216 : void
1217 0 : nsPlainTextSerializer::FlushLine()
1218 : {
1219 0 : if (!mCurrentLine.IsEmpty()) {
1220 0 : if (mAtFirstColumn) {
1221 0 : OutputQuotesAndIndent(); // XXX: Should we always do this? Bug?
1222 : }
1223 :
1224 0 : Output(mCurrentLine);
1225 0 : mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty();
1226 0 : mCurrentLine.Truncate();
1227 0 : mCurrentLineWidth = 0;
1228 : }
1229 0 : }
1230 :
1231 : /**
1232 : * Prints the text to output to our current output device (the string mOutputString).
1233 : * The only logic here is to replace non breaking spaces with a normal space since
1234 : * most (all?) receivers of the result won't understand the nbsp and even be
1235 : * confused by it.
1236 : */
1237 : void
1238 0 : nsPlainTextSerializer::Output(nsString& aString)
1239 : {
1240 0 : if (!aString.IsEmpty()) {
1241 0 : mStartedOutput = true;
1242 : }
1243 :
1244 0 : if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
1245 : // First, replace all nbsp characters with spaces,
1246 : // which the unicode encoder won't do for us.
1247 0 : aString.ReplaceChar(kNBSP, kSPACE);
1248 : }
1249 0 : mOutputString->Append(aString);
1250 0 : }
1251 :
1252 : static bool
1253 0 : IsSpaceStuffable(const char16_t *s)
1254 : {
1255 0 : if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
1256 0 : nsCRT::strncmp(s, u"From ", 5) == 0)
1257 0 : return true;
1258 : else
1259 0 : return false;
1260 : }
1261 :
1262 : /**
1263 : * This function adds a piece of text to the current stored line. If we are
1264 : * wrapping text and the stored line will become too long, a suitable
1265 : * location to wrap will be found and the line that's complete will be
1266 : * output.
1267 : */
1268 : void
1269 0 : nsPlainTextSerializer::AddToLine(const char16_t * aLineFragment,
1270 : int32_t aLineFragmentLength)
1271 : {
1272 0 : uint32_t prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent;
1273 :
1274 0 : if (mLineBreakDue)
1275 0 : EnsureVerticalSpace(mFloatingLines);
1276 :
1277 0 : int32_t linelength = mCurrentLine.Length();
1278 0 : if (0 == linelength) {
1279 0 : if (0 == aLineFragmentLength) {
1280 : // Nothing at all. Are you kidding me?
1281 0 : return;
1282 : }
1283 :
1284 0 : if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1285 0 : if (IsSpaceStuffable(aLineFragment)
1286 0 : && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway
1287 : )
1288 : {
1289 : // Space stuffing a la RFC 2646 (format=flowed).
1290 0 : mCurrentLine.Append(char16_t(' '));
1291 :
1292 0 : if (MayWrap()) {
1293 0 : mCurrentLineWidth += GetUnicharWidth(' ');
1294 : #ifdef DEBUG_wrapping
1295 : NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(),
1296 : mCurrentLine.Length()) ==
1297 : (int32_t)mCurrentLineWidth,
1298 : "mCurrentLineWidth and reality out of sync!");
1299 : #endif
1300 : }
1301 : }
1302 : }
1303 0 : mEmptyLines=-1;
1304 : }
1305 :
1306 0 : mCurrentLine.Append(aLineFragment, aLineFragmentLength);
1307 0 : if (MayWrap()) {
1308 0 : mCurrentLineWidth += GetUnicharStringWidth(aLineFragment,
1309 0 : aLineFragmentLength);
1310 : #ifdef DEBUG_wrapping
1311 : NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
1312 : mCurrentLine.Length()) ==
1313 : (int32_t)mCurrentLineWidth,
1314 : "mCurrentLineWidth and reality out of sync!");
1315 : #endif
1316 : }
1317 :
1318 0 : linelength = mCurrentLine.Length();
1319 :
1320 : // Wrap?
1321 0 : if (MayWrap())
1322 : {
1323 : #ifdef DEBUG_wrapping
1324 : NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
1325 : mCurrentLine.Length()) ==
1326 : (int32_t)mCurrentLineWidth,
1327 : "mCurrentLineWidth and reality out of sync!");
1328 : #endif
1329 : // Yes, wrap!
1330 : // The "+4" is to avoid wrap lines that only would be a couple
1331 : // of letters too long. We give this bonus only if the
1332 : // wrapcolumn is more than 20.
1333 0 : uint32_t bonuswidth = (mWrapColumn > 20) ? 4 : 0;
1334 :
1335 : // XXX: Should calculate prefixwidth with GetUnicharStringWidth
1336 0 : while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) {
1337 : // We go from the end removing one letter at a time until
1338 : // we have a reasonable width
1339 0 : int32_t goodSpace = mCurrentLine.Length();
1340 0 : uint32_t width = mCurrentLineWidth;
1341 0 : while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) {
1342 0 : goodSpace--;
1343 0 : width -= GetUnicharWidth(mCurrentLine[goodSpace]);
1344 : }
1345 :
1346 0 : goodSpace++;
1347 :
1348 0 : if (mLineBreaker) {
1349 0 : goodSpace = mLineBreaker->Prev(mCurrentLine.get(),
1350 0 : mCurrentLine.Length(), goodSpace);
1351 0 : if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT &&
1352 0 : nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) {
1353 0 : --goodSpace; // adjust the position since line breaker returns a position next to space
1354 : }
1355 : }
1356 : // fallback if the line breaker is unavailable or failed
1357 0 : if (!mLineBreaker) {
1358 0 : if (mCurrentLine.IsEmpty() || mWrapColumn < prefixwidth) {
1359 0 : goodSpace = NS_LINEBREAKER_NEED_MORE_TEXT;
1360 : } else {
1361 0 : goodSpace = std::min(mWrapColumn - prefixwidth, mCurrentLine.Length() - 1);
1362 0 : while (goodSpace >= 0 &&
1363 0 : !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1364 0 : goodSpace--;
1365 : }
1366 : }
1367 : }
1368 :
1369 0 : nsAutoString restOfLine;
1370 0 : if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) {
1371 : // If we didn't find a good place to break, accept long line and
1372 : // try to find another place to break
1373 0 : goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1;
1374 0 : if (mLineBreaker) {
1375 0 : if ((uint32_t)goodSpace < mCurrentLine.Length())
1376 0 : goodSpace = mLineBreaker->Next(mCurrentLine.get(),
1377 0 : mCurrentLine.Length(), goodSpace);
1378 0 : if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT)
1379 0 : goodSpace = mCurrentLine.Length();
1380 : }
1381 : // fallback if the line breaker is unavailable or failed
1382 0 : if (!mLineBreaker) {
1383 0 : goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth;
1384 0 : while (goodSpace < linelength &&
1385 0 : !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1386 0 : goodSpace++;
1387 : }
1388 : }
1389 : }
1390 :
1391 0 : if ((goodSpace < linelength) && (goodSpace > 0)) {
1392 : // Found a place to break
1393 :
1394 : // -1 (trim a char at the break position)
1395 : // only if the line break was a space.
1396 0 : if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1397 0 : mCurrentLine.Right(restOfLine, linelength-goodSpace-1);
1398 : }
1399 : else {
1400 0 : mCurrentLine.Right(restOfLine, linelength-goodSpace);
1401 : }
1402 : // if breaker was U+0020, it has to consider for delsp=yes support
1403 0 : bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' ';
1404 0 : mCurrentLine.Truncate(goodSpace);
1405 0 : EndLine(true, breakBySpace);
1406 0 : mCurrentLine.Truncate();
1407 : // Space stuff new line?
1408 0 : if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1409 0 : if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get())
1410 0 : && mCiteQuoteLevel == 0 // We space-stuff quoted lines anyway
1411 : )
1412 : {
1413 : // Space stuffing a la RFC 2646 (format=flowed).
1414 0 : mCurrentLine.Append(char16_t(' '));
1415 : //XXX doesn't seem to work correctly for ' '
1416 : }
1417 : }
1418 0 : mCurrentLine.Append(restOfLine);
1419 0 : mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(),
1420 0 : mCurrentLine.Length());
1421 0 : linelength = mCurrentLine.Length();
1422 0 : mEmptyLines = -1;
1423 : }
1424 : else {
1425 : // Nothing to do. Hopefully we get more data later
1426 : // to use for a place to break line
1427 : break;
1428 : }
1429 : }
1430 : }
1431 : else {
1432 : // No wrapping.
1433 : }
1434 : }
1435 :
1436 : /**
1437 : * Outputs the contents of mCurrentLine, and resets line specific
1438 : * variables. Also adds an indentation and prefix if there is
1439 : * one specified. Strips ending spaces from the line if it isn't
1440 : * preformatted.
1441 : */
1442 : void
1443 0 : nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace)
1444 : {
1445 0 : uint32_t currentlinelength = mCurrentLine.Length();
1446 :
1447 0 : if (aSoftlinebreak && 0 == currentlinelength) {
1448 : // No meaning
1449 0 : return;
1450 : }
1451 :
1452 : /* In non-preformatted mode, remove spaces from the end of the line for
1453 : * format=flowed compatibility. Don't do this for these special cases:
1454 : * "-- ", the signature separator (RFC 2646) shouldn't be touched and
1455 : * "- -- ", the OpenPGP dash-escaped signature separator in inline
1456 : * signed messages according to the OpenPGP standard (RFC 2440).
1457 : */
1458 0 : if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
1459 0 : !(mFlags & nsIDocumentEncoder::OutputDontRemoveLineEndingSpaces) &&
1460 0 : (aSoftlinebreak ||
1461 0 : !(mCurrentLine.EqualsLiteral("-- ") || mCurrentLine.EqualsLiteral("- -- ")))) {
1462 : // Remove spaces from the end of the line.
1463 0 : while(currentlinelength > 0 &&
1464 0 : mCurrentLine[currentlinelength-1] == ' ') {
1465 0 : --currentlinelength;
1466 : }
1467 0 : mCurrentLine.SetLength(currentlinelength);
1468 : }
1469 :
1470 0 : if (aSoftlinebreak &&
1471 0 : (mFlags & nsIDocumentEncoder::OutputFormatFlowed) &&
1472 0 : (mIndent == 0)) {
1473 : // Add the soft part of the soft linebreak (RFC 2646 4.1)
1474 : // We only do this when there is no indentation since format=flowed
1475 : // lines and indentation doesn't work well together.
1476 :
1477 : // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
1478 : // add twice space.
1479 0 : if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace)
1480 0 : mCurrentLine.AppendLiteral(" ");
1481 : else
1482 0 : mCurrentLine.Append(char16_t(' '));
1483 : }
1484 :
1485 0 : if (aSoftlinebreak) {
1486 0 : mEmptyLines=0;
1487 : }
1488 : else {
1489 : // Hard break
1490 0 : if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) {
1491 0 : mEmptyLines=-1;
1492 : }
1493 :
1494 0 : mEmptyLines++;
1495 : }
1496 :
1497 0 : if (mAtFirstColumn) {
1498 : // If we don't have anything "real" to output we have to
1499 : // make sure the indent doesn't end in a space since that
1500 : // would trick a format=flowed-aware receiver.
1501 0 : bool stripTrailingSpaces = mCurrentLine.IsEmpty();
1502 0 : OutputQuotesAndIndent(stripTrailingSpaces);
1503 : }
1504 :
1505 0 : mCurrentLine.Append(mLineBreak);
1506 0 : Output(mCurrentLine);
1507 0 : mCurrentLine.Truncate();
1508 0 : mCurrentLineWidth = 0;
1509 0 : mAtFirstColumn=true;
1510 0 : mInWhitespace=true;
1511 0 : mLineBreakDue = false;
1512 0 : mFloatingLines = -1;
1513 : }
1514 :
1515 :
1516 : /**
1517 : * Outputs the calculated and stored indent and text in the indentation. That is
1518 : * quote chars and numbers for numbered lists and such. It will also reset any
1519 : * stored text to put in the indentation after using it.
1520 : */
1521 : void
1522 0 : nsPlainTextSerializer::OutputQuotesAndIndent(bool stripTrailingSpaces /* = false */)
1523 : {
1524 0 : nsAutoString stringToOutput;
1525 :
1526 : // Put the mail quote "> " chars in, if appropriate:
1527 0 : if (mCiteQuoteLevel > 0) {
1528 0 : nsAutoString quotes;
1529 0 : for(int i=0; i < mCiteQuoteLevel; i++) {
1530 0 : quotes.Append(char16_t('>'));
1531 : }
1532 0 : if (!mCurrentLine.IsEmpty()) {
1533 : /* Better don't output a space here, if the line is empty,
1534 : in case a receiving f=f-aware UA thinks, this were a flowed line,
1535 : which it isn't - it's just empty.
1536 : (Flowed lines may be joined with the following one,
1537 : so the empty line may be lost completely.) */
1538 0 : quotes.Append(char16_t(' '));
1539 : }
1540 0 : stringToOutput = quotes;
1541 0 : mAtFirstColumn = false;
1542 : }
1543 :
1544 : // Indent if necessary
1545 0 : int32_t indentwidth = mIndent - mInIndentString.Length();
1546 0 : if (indentwidth > 0
1547 0 : && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty())
1548 : // Don't make empty lines look flowed
1549 : ) {
1550 0 : nsAutoString spaces;
1551 0 : for (int i=0; i < indentwidth; ++i)
1552 0 : spaces.Append(char16_t(' '));
1553 0 : stringToOutput += spaces;
1554 0 : mAtFirstColumn = false;
1555 : }
1556 :
1557 0 : if (!mInIndentString.IsEmpty()) {
1558 0 : stringToOutput += mInIndentString;
1559 0 : mAtFirstColumn = false;
1560 0 : mInIndentString.Truncate();
1561 : }
1562 :
1563 0 : if (stripTrailingSpaces) {
1564 0 : int32_t lineLength = stringToOutput.Length();
1565 0 : while(lineLength > 0 &&
1566 0 : ' ' == stringToOutput[lineLength-1]) {
1567 0 : --lineLength;
1568 : }
1569 0 : stringToOutput.SetLength(lineLength);
1570 : }
1571 :
1572 0 : if (!stringToOutput.IsEmpty()) {
1573 0 : Output(stringToOutput);
1574 : }
1575 :
1576 0 : }
1577 :
1578 : /**
1579 : * Write a string. This is the highlevel function to use to get text output.
1580 : * By using AddToLine, Output, EndLine and other functions it handles quotation,
1581 : * line wrapping, indentation, whitespace compression and other things.
1582 : */
1583 : void
1584 0 : nsPlainTextSerializer::Write(const nsAString& aStr)
1585 : {
1586 : // XXX Copy necessary to use nsString methods and gain
1587 : // access to underlying buffer
1588 0 : nsAutoString str(aStr);
1589 :
1590 : #ifdef DEBUG_wrapping
1591 : printf("Write(%s): wrap col = %d\n",
1592 : NS_ConvertUTF16toUTF8(str).get(), mWrapColumn);
1593 : #endif
1594 :
1595 0 : int32_t bol = 0;
1596 : int32_t newline;
1597 :
1598 0 : int32_t totLen = str.Length();
1599 :
1600 : // If the string is empty, do nothing:
1601 0 : if (totLen <= 0) return;
1602 :
1603 : // For Flowed text change nbsp-ses to spaces at end of lines to allow them
1604 : // to be cut off along with usual spaces if required. (bug #125928)
1605 0 : if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1606 0 : for (int32_t i = totLen-1; i >= 0; i--) {
1607 0 : char16_t c = str[i];
1608 0 : if ('\n' == c || '\r' == c || ' ' == c || '\t' == c)
1609 0 : continue;
1610 0 : if (kNBSP == c)
1611 0 : str.Replace(i, 1, ' ');
1612 : else
1613 0 : break;
1614 : }
1615 : }
1616 :
1617 : // We have two major codepaths here. One that does preformatted text and one
1618 : // that does normal formatted text. The one for preformatted text calls
1619 : // Output directly while the other code path goes through AddToLine.
1620 0 : if ((mPreFormattedMail && !mWrapColumn) || (IsInPre() && !mPreFormattedMail)
1621 0 : || (mSpanLevel > 0 && mEmptyLines >= 0 && IsQuotedLine(str))) {
1622 : // No intelligent wrapping.
1623 :
1624 : // This mustn't be mixed with intelligent wrapping without clearing
1625 : // the mCurrentLine buffer before!!!
1626 0 : NS_ASSERTION(mCurrentLine.IsEmpty() || (IsInPre() && !mPreFormattedMail),
1627 : "Mixed wrapping data and nonwrapping data on the same line");
1628 0 : if (!mCurrentLine.IsEmpty()) {
1629 0 : FlushLine();
1630 : }
1631 :
1632 : // Put the mail quote "> " chars in, if appropriate.
1633 : // Have to put it in before every line.
1634 0 : while(bol<totLen) {
1635 0 : bool outputQuotes = mAtFirstColumn;
1636 0 : bool atFirstColumn = mAtFirstColumn;
1637 0 : bool outputLineBreak = false;
1638 0 : bool spacesOnly = true;
1639 :
1640 : // Find one of '\n' or '\r' using iterators since nsAString
1641 : // doesn't have the old FindCharInSet function.
1642 0 : nsAString::const_iterator iter; str.BeginReading(iter);
1643 0 : nsAString::const_iterator done_searching; str.EndReading(done_searching);
1644 0 : iter.advance(bol);
1645 0 : int32_t new_newline = bol;
1646 0 : newline = kNotFound;
1647 0 : while(iter != done_searching) {
1648 0 : if ('\n' == *iter || '\r' == *iter) {
1649 0 : newline = new_newline;
1650 0 : break;
1651 : }
1652 0 : if (' ' != *iter)
1653 0 : spacesOnly = false;
1654 0 : ++new_newline;
1655 0 : ++iter;
1656 : }
1657 :
1658 : // Done searching
1659 0 : nsAutoString stringpart;
1660 0 : if (newline == kNotFound) {
1661 : // No new lines.
1662 0 : stringpart.Assign(Substring(str, bol, totLen - bol));
1663 0 : if (!stringpart.IsEmpty()) {
1664 0 : char16_t lastchar = stringpart[stringpart.Length()-1];
1665 0 : if ((lastchar == '\t') || (lastchar == ' ') ||
1666 0 : (lastchar == '\r') ||(lastchar == '\n')) {
1667 0 : mInWhitespace = true;
1668 : }
1669 : else {
1670 0 : mInWhitespace = false;
1671 : }
1672 : }
1673 0 : mEmptyLines=-1;
1674 0 : atFirstColumn = mAtFirstColumn && (totLen-bol)==0;
1675 0 : bol = totLen;
1676 : }
1677 : else {
1678 : // There is a newline
1679 0 : stringpart.Assign(Substring(str, bol, newline-bol));
1680 0 : mInWhitespace = true;
1681 0 : outputLineBreak = true;
1682 0 : mEmptyLines=0;
1683 0 : atFirstColumn = true;
1684 0 : bol = newline+1;
1685 0 : if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
1686 : // There was a CRLF in the input. This used to be illegal and
1687 : // stripped by the parser. Apparently not anymore. Let's skip
1688 : // over the LF.
1689 0 : bol++;
1690 : }
1691 : }
1692 :
1693 0 : mCurrentLine.Truncate();
1694 0 : if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1695 0 : if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928
1696 0 : !IsQuotedLine(stringpart) &&
1697 0 : !stringpart.EqualsLiteral("-- ") &&
1698 0 : !stringpart.EqualsLiteral("- -- "))
1699 0 : stringpart.Trim(" ", false, true, true);
1700 0 : if (IsSpaceStuffable(stringpart.get()) && !IsQuotedLine(stringpart))
1701 0 : mCurrentLine.Append(char16_t(' '));
1702 : }
1703 0 : mCurrentLine.Append(stringpart);
1704 :
1705 0 : if (outputQuotes) {
1706 : // Note: this call messes with mAtFirstColumn
1707 0 : OutputQuotesAndIndent();
1708 : }
1709 :
1710 0 : Output(mCurrentLine);
1711 0 : if (outputLineBreak) {
1712 0 : Output(mLineBreak);
1713 : }
1714 0 : mAtFirstColumn = atFirstColumn;
1715 : }
1716 :
1717 : // Reset mCurrentLine.
1718 0 : mCurrentLine.Truncate();
1719 :
1720 : #ifdef DEBUG_wrapping
1721 : printf("No wrapping: newline is %d, totLen is %d\n",
1722 : newline, totLen);
1723 : #endif
1724 0 : return;
1725 : }
1726 :
1727 : // Intelligent handling of text
1728 : // If needed, strip out all "end of lines"
1729 : // and multiple whitespace between words
1730 : int32_t nextpos;
1731 0 : const char16_t * offsetIntoBuffer = nullptr;
1732 :
1733 0 : while (bol < totLen) { // Loop over lines
1734 : // Find a place where we may have to do whitespace compression
1735 0 : nextpos = str.FindCharInSet(" \t\n\r", bol);
1736 : #ifdef DEBUG_wrapping
1737 : nsAutoString remaining;
1738 : str.Right(remaining, totLen - bol);
1739 : foo = ToNewCString(remaining);
1740 : // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n",
1741 : // bol, nextpos, totLen, foo);
1742 : free(foo);
1743 : #endif
1744 :
1745 0 : if (nextpos == kNotFound) {
1746 : // The rest of the string
1747 0 : offsetIntoBuffer = str.get() + bol;
1748 0 : AddToLine(offsetIntoBuffer, totLen-bol);
1749 0 : bol=totLen;
1750 0 : mInWhitespace=false;
1751 : }
1752 : else {
1753 : // There's still whitespace left in the string
1754 0 : if (nextpos != 0 && (nextpos + 1) < totLen) {
1755 0 : offsetIntoBuffer = str.get() + nextpos;
1756 : // skip '\n' if it is between CJ chars
1757 0 : if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) {
1758 0 : offsetIntoBuffer = str.get() + bol;
1759 0 : AddToLine(offsetIntoBuffer, nextpos-bol);
1760 0 : bol = nextpos + 1;
1761 0 : continue;
1762 : }
1763 : }
1764 : // If we're already in whitespace and not preformatted, just skip it:
1765 0 : if (mInWhitespace && (nextpos == bol) && !mPreFormattedMail &&
1766 0 : !(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1767 : // Skip whitespace
1768 0 : bol++;
1769 0 : continue;
1770 : }
1771 :
1772 0 : if (nextpos == bol) {
1773 : // Note that we are in whitespace.
1774 0 : mInWhitespace = true;
1775 0 : offsetIntoBuffer = str.get() + nextpos;
1776 0 : AddToLine(offsetIntoBuffer, 1);
1777 0 : bol++;
1778 0 : continue;
1779 : }
1780 :
1781 0 : mInWhitespace = true;
1782 :
1783 0 : offsetIntoBuffer = str.get() + bol;
1784 0 : if (mPreFormattedMail || (mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1785 : // Preserve the real whitespace character
1786 0 : nextpos++;
1787 0 : AddToLine(offsetIntoBuffer, nextpos-bol);
1788 0 : bol = nextpos;
1789 : }
1790 : else {
1791 : // Replace the whitespace with a space
1792 0 : AddToLine(offsetIntoBuffer, nextpos-bol);
1793 0 : AddToLine(kSpace.get(),1);
1794 0 : bol = nextpos + 1; // Let's eat the whitespace
1795 : }
1796 : }
1797 : } // Continue looping over the string
1798 : }
1799 :
1800 :
1801 : /**
1802 : * Gets the value of an attribute in a string. If the function returns
1803 : * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
1804 : */
1805 : nsresult
1806 0 : nsPlainTextSerializer::GetAttributeValue(nsIAtom* aName,
1807 : nsString& aValueRet)
1808 : {
1809 0 : if (mElement) {
1810 0 : if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) {
1811 0 : return NS_OK;
1812 : }
1813 : }
1814 :
1815 0 : return NS_ERROR_NOT_AVAILABLE;
1816 : }
1817 :
1818 : /**
1819 : * Returns true, if the element was inserted by Moz' TXT->HTML converter.
1820 : * In this case, we should ignore it.
1821 : */
1822 : bool
1823 0 : nsPlainTextSerializer::IsCurrentNodeConverted()
1824 : {
1825 0 : nsAutoString value;
1826 0 : nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
1827 0 : return (NS_SUCCEEDED(rv) &&
1828 0 : (value.EqualsIgnoreCase("moz-txt", 7) ||
1829 0 : value.EqualsIgnoreCase("\"moz-txt", 8)));
1830 : }
1831 :
1832 :
1833 : // static
1834 : nsIAtom*
1835 0 : nsPlainTextSerializer::GetIdForContent(nsIContent* aContent)
1836 : {
1837 0 : if (!aContent->IsHTMLElement()) {
1838 0 : return nullptr;
1839 : }
1840 :
1841 0 : nsIAtom* localName = aContent->NodeInfo()->NameAtom();
1842 0 : return localName->IsStaticAtom() ? localName : nullptr;
1843 : }
1844 :
1845 : bool
1846 0 : nsPlainTextSerializer::IsInPre()
1847 : {
1848 0 : return !mPreformatStack.empty() && mPreformatStack.top();
1849 : }
1850 :
1851 : bool
1852 0 : nsPlainTextSerializer::IsElementPreformatted(Element* aElement)
1853 : {
1854 : RefPtr<nsStyleContext> styleContext =
1855 0 : nsComputedDOMStyle::GetStyleContextNoFlush(aElement, nullptr, nullptr);
1856 0 : if (styleContext) {
1857 0 : const nsStyleText* textStyle = styleContext->StyleText();
1858 0 : return textStyle->WhiteSpaceOrNewlineIsSignificant();
1859 : }
1860 : // Fall back to looking at the tag, in case there is no style information.
1861 0 : return GetIdForContent(aElement) == nsGkAtoms::pre;
1862 : }
1863 :
1864 : bool
1865 0 : nsPlainTextSerializer::IsElementBlock(Element* aElement)
1866 : {
1867 : RefPtr<nsStyleContext> styleContext =
1868 0 : nsComputedDOMStyle::GetStyleContextNoFlush(aElement, nullptr, nullptr);
1869 0 : if (styleContext) {
1870 0 : const nsStyleDisplay* displayStyle = styleContext->StyleDisplay();
1871 0 : return displayStyle->IsBlockOutsideStyle();
1872 : }
1873 : // Fall back to looking at the tag, in case there is no style information.
1874 0 : return nsContentUtils::IsHTMLBlock(aElement);
1875 : }
1876 :
1877 : /**
1878 : * This method is required only to identify LI's inside OL.
1879 : * Returns TRUE if we are inside an OL tag and FALSE otherwise.
1880 : */
1881 : bool
1882 0 : nsPlainTextSerializer::IsInOL()
1883 : {
1884 0 : int32_t i = mTagStackIndex;
1885 0 : while(--i >= 0) {
1886 0 : if (mTagStack[i] == nsGkAtoms::ol)
1887 0 : return true;
1888 0 : if (mTagStack[i] == nsGkAtoms::ul) {
1889 : // If a UL is reached first, LI belongs the UL nested in OL.
1890 0 : return false;
1891 : }
1892 : }
1893 : // We may reach here for orphan LI's.
1894 0 : return false;
1895 : }
1896 :
1897 : /*
1898 : @return 0 = no header, 1 = h1, ..., 6 = h6
1899 : */
1900 0 : int32_t HeaderLevel(nsIAtom* aTag)
1901 : {
1902 0 : if (aTag == nsGkAtoms::h1) {
1903 0 : return 1;
1904 : }
1905 0 : if (aTag == nsGkAtoms::h2) {
1906 0 : return 2;
1907 : }
1908 0 : if (aTag == nsGkAtoms::h3) {
1909 0 : return 3;
1910 : }
1911 0 : if (aTag == nsGkAtoms::h4) {
1912 0 : return 4;
1913 : }
1914 0 : if (aTag == nsGkAtoms::h5) {
1915 0 : return 5;
1916 : }
1917 0 : if (aTag == nsGkAtoms::h6) {
1918 0 : return 6;
1919 : }
1920 0 : return 0;
1921 : }
1922 :
1923 :
1924 : /*
1925 : * This is an implementation of GetUnicharWidth() and
1926 : * GetUnicharStringWidth() as defined in
1927 : * "The Single UNIX Specification, Version 2, The Open Group, 1997"
1928 : * <http://www.UNIX-systems.org/online.html>
1929 : *
1930 : * Markus Kuhn -- 2000-02-08 -- public domain
1931 : *
1932 : * Minor alterations to fit Mozilla's data types by Daniel Bratell
1933 : */
1934 :
1935 : /* These functions define the column width of an ISO 10646 character
1936 : * as follows:
1937 : *
1938 : * - The null character (U+0000) has a column width of 0.
1939 : *
1940 : * - Other C0/C1 control characters and DEL will lead to a return
1941 : * value of -1.
1942 : *
1943 : * - Non-spacing and enclosing combining characters (general
1944 : * category code Mn or Me in the Unicode database) have a
1945 : * column width of 0.
1946 : *
1947 : * - Spacing characters in the East Asian Wide (W) or East Asian
1948 : * FullWidth (F) category as defined in Unicode Technical
1949 : * Report #11 have a column width of 2.
1950 : *
1951 : * - All remaining characters (including all printable
1952 : * ISO 8859-1 and WGL4 characters, Unicode control characters,
1953 : * etc.) have a column width of 1.
1954 : *
1955 : * This implementation assumes that wchar_t characters are encoded
1956 : * in ISO 10646.
1957 : */
1958 :
1959 : namespace {
1960 :
1961 : struct interval
1962 : {
1963 : uint16_t first;
1964 : uint16_t last;
1965 : };
1966 :
1967 : struct CombiningComparator
1968 : {
1969 : const char16_t mUcs;
1970 0 : explicit CombiningComparator(char16_t aUcs) : mUcs(aUcs) {}
1971 0 : int operator()(const interval& combining) const {
1972 0 : if (mUcs > combining.last)
1973 0 : return 1;
1974 0 : if (mUcs < combining.first)
1975 0 : return -1;
1976 :
1977 0 : MOZ_ASSERT(combining.first <= mUcs);
1978 0 : MOZ_ASSERT(mUcs <= combining.last);
1979 0 : return 0;
1980 : }
1981 : };
1982 :
1983 : } // namespace
1984 :
1985 0 : int32_t GetUnicharWidth(char16_t ucs)
1986 : {
1987 : /* sorted list of non-overlapping intervals of non-spacing characters */
1988 : static const interval combining[] = {
1989 : { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
1990 : { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
1991 : { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
1992 : { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
1993 : { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
1994 : { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
1995 : { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
1996 : { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
1997 : { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
1998 : { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
1999 : { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
2000 : { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
2001 : { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
2002 : { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
2003 : { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
2004 : { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
2005 : { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
2006 : { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
2007 : { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
2008 : { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
2009 : { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
2010 : { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
2011 : { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
2012 : { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
2013 : { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
2014 : { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
2015 : { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
2016 : { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
2017 : { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
2018 : { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
2019 : { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
2020 : };
2021 :
2022 : /* test for 8-bit control characters */
2023 0 : if (ucs == 0)
2024 0 : return 0;
2025 0 : if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
2026 0 : return -1;
2027 :
2028 : /* first quick check for Latin-1 etc. characters */
2029 0 : if (ucs < combining[0].first)
2030 0 : return 1;
2031 :
2032 : /* binary search in table of non-spacing characters */
2033 : size_t idx;
2034 0 : if (BinarySearchIf(combining, 0, ArrayLength(combining),
2035 0 : CombiningComparator(ucs), &idx)) {
2036 0 : return 0;
2037 : }
2038 :
2039 : /* if we arrive here, ucs is not a combining or C0/C1 control character */
2040 :
2041 : /* fast test for majority of non-wide scripts */
2042 0 : if (ucs < 0x1100)
2043 0 : return 1;
2044 :
2045 : return 1 +
2046 0 : ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
2047 0 : (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
2048 0 : ucs != 0x303f) || /* CJK ... Yi */
2049 0 : (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
2050 0 : (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
2051 0 : (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
2052 0 : (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
2053 0 : (ucs >= 0xffe0 && ucs <= 0xffe6));
2054 : }
2055 :
2056 :
2057 0 : int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n)
2058 : {
2059 0 : int32_t w, width = 0;
2060 :
2061 0 : for (;*pwcs && n-- > 0; pwcs++)
2062 0 : if ((w = GetUnicharWidth(*pwcs)) < 0)
2063 0 : ++width; // Taking 1 as the width of non-printable character, for bug# 94475.
2064 : else
2065 0 : width += w;
2066 :
2067 0 : return width;
2068 : }
|