Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : /*
8 : * nsIContentSerializer implementation that can be used with an
9 : * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
10 : * (eg for copy/paste as plaintext).
11 : */
12 :
13 : #ifndef nsPlainTextSerializer_h__
14 : #define nsPlainTextSerializer_h__
15 :
16 : #include "mozilla/Attributes.h"
17 : #include "nsCOMPtr.h"
18 : #include "nsIAtom.h"
19 : #include "nsCycleCollectionParticipant.h"
20 : #include "nsIContentSerializer.h"
21 : #include "nsIDocumentEncoder.h"
22 : #include "nsILineBreaker.h"
23 : #include "nsString.h"
24 : #include "nsTArray.h"
25 :
26 : #include <stack>
27 :
28 : class nsIContent;
29 :
30 : namespace mozilla {
31 : namespace dom {
32 : class Element;
33 : } // namespace dom
34 : } // namespace mozilla
35 :
36 : class nsPlainTextSerializer final : public nsIContentSerializer
37 : {
38 : public:
39 : nsPlainTextSerializer();
40 :
41 : NS_DECL_CYCLE_COLLECTING_ISUPPORTS
42 0 : NS_DECL_CYCLE_COLLECTION_CLASS(nsPlainTextSerializer)
43 :
44 : // nsIContentSerializer
45 : NS_IMETHOD Init(uint32_t flags,
46 : uint32_t aWrapColumn,
47 : const mozilla::Encoding* aEncoding,
48 : bool aIsCopying,
49 : bool aIsWholeDocument,
50 : bool* aNeedsPreformatScanning) override;
51 :
52 : NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset,
53 : int32_t aEndOffset, nsAString& aStr) override;
54 : NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection,
55 : int32_t aStartOffset, int32_t aEndOffset,
56 : nsAString& aStr) override;
57 0 : NS_IMETHOD AppendProcessingInstruction(nsIContent* aPI,
58 : int32_t aStartOffset,
59 : int32_t aEndOffset,
60 0 : nsAString& aStr) override { return NS_OK; }
61 0 : NS_IMETHOD AppendComment(nsIContent* aComment, int32_t aStartOffset,
62 0 : int32_t aEndOffset, nsAString& aStr) override { return NS_OK; }
63 0 : NS_IMETHOD AppendDoctype(nsIContent *aDoctype,
64 0 : nsAString& aStr) override { return NS_OK; }
65 : NS_IMETHOD AppendElementStart(mozilla::dom::Element* aElement,
66 : mozilla::dom::Element* aOriginalElement,
67 : nsAString& aStr) override;
68 : NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
69 : nsAString& aStr) override;
70 : NS_IMETHOD Flush(nsAString& aStr) override;
71 :
72 : NS_IMETHOD AppendDocumentStart(nsIDocument *aDocument,
73 : nsAString& aStr) override;
74 :
75 : NS_IMETHOD ScanElementForPreformat(mozilla::dom::Element* aElement) override;
76 : NS_IMETHOD ForgetElementForPreformat(mozilla::dom::Element* aElement) override;
77 :
78 : private:
79 : ~nsPlainTextSerializer();
80 :
81 : nsresult GetAttributeValue(nsIAtom* aName, nsString& aValueRet);
82 : void AddToLine(const char16_t* aStringToAdd, int32_t aLength);
83 : void EndLine(bool softlinebreak, bool aBreakBySpace = false);
84 : void EnsureVerticalSpace(int32_t noOfRows);
85 : void FlushLine();
86 : void OutputQuotesAndIndent(bool stripTrailingSpaces=false);
87 : void Output(nsString& aString);
88 : void Write(const nsAString& aString);
89 : bool IsInPre();
90 : bool IsInOL();
91 : bool IsCurrentNodeConverted();
92 : bool MustSuppressLeaf();
93 :
94 : /**
95 : * Returns the local name of the element as an atom if the element is an
96 : * HTML element and the atom is a static atom. Otherwise, nullptr is returned.
97 : */
98 : static nsIAtom* GetIdForContent(nsIContent* aContent);
99 : nsresult DoOpenContainer(nsIAtom* aTag);
100 : nsresult DoCloseContainer(nsIAtom* aTag);
101 : nsresult DoAddLeaf(nsIAtom* aTag);
102 : void DoAddText(bool aIsWhitespace, const nsAString& aText);
103 :
104 : // Inlined functions
105 0 : inline bool MayWrap()
106 : {
107 0 : return mWrapColumn &&
108 0 : ((mFlags & nsIDocumentEncoder::OutputFormatted) ||
109 0 : (mFlags & nsIDocumentEncoder::OutputWrap));
110 : }
111 0 : inline bool MayBreakLines()
112 : {
113 0 : return !(mFlags & nsIDocumentEncoder::OutputDisallowLineBreaking);
114 : }
115 :
116 0 : inline bool DoOutput()
117 : {
118 0 : return mHeadLevel == 0;
119 : }
120 :
121 0 : inline bool IsQuotedLine(const nsAString& aLine)
122 : {
123 0 : return !aLine.IsEmpty() && aLine.First() == char16_t('>');
124 : }
125 :
126 : // Stack handling functions
127 : bool GetLastBool(const nsTArray<bool>& aStack);
128 : void SetLastBool(nsTArray<bool>& aStack, bool aValue);
129 : void PushBool(nsTArray<bool>& aStack, bool aValue);
130 : bool PopBool(nsTArray<bool>& aStack);
131 :
132 : bool ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag);
133 : bool IsIgnorableRubyAnnotation(nsIAtom* aTag);
134 :
135 : bool IsElementPreformatted(mozilla::dom::Element* aElement);
136 : bool IsElementBlock(mozilla::dom::Element* aElement);
137 :
138 : private:
139 : nsString mCurrentLine;
140 : uint32_t mHeadLevel;
141 : bool mAtFirstColumn;
142 :
143 : bool mStructs; // Output structs (pref)
144 :
145 : // If we've just written out a cite blockquote, we need to remember it
146 : // so we don't duplicate spaces before a <pre wrap> (which mail uses to quote
147 : // old messages).
148 : bool mHasWrittenCiteBlockquote;
149 :
150 : int32_t mIndent;
151 : // mInIndentString keeps a header that has to be written in the indent.
152 : // That could be, for instance, the bullet in a bulleted list.
153 : nsString mInIndentString;
154 : int32_t mCiteQuoteLevel;
155 : int32_t mFlags;
156 : int32_t mFloatingLines; // To store the number of lazy line breaks
157 :
158 : // The wrap column is how many standard sized chars (western languages)
159 : // should be allowed on a line. There could be less chars if the chars
160 : // are wider than latin chars of more if the chars are more narrow.
161 : uint32_t mWrapColumn;
162 :
163 : // The width of the line as it will appear on the screen (approx.)
164 : uint32_t mCurrentLineWidth;
165 :
166 : // Treat quoted text as though it's preformatted -- don't wrap it.
167 : // Having it on a pref is a temporary measure, See bug 69638.
168 : int32_t mSpanLevel;
169 :
170 :
171 : int32_t mEmptyLines; // Will be the number of empty lines before
172 : // the current. 0 if we are starting a new
173 : // line and -1 if we are in a line.
174 :
175 : bool mInWhitespace;
176 : bool mPreFormattedMail; // we're dealing with special DOM
177 : // used by Thunderbird code.
178 : bool mStartedOutput; // we've produced at least a character
179 :
180 : // While handling a new tag, this variable should remind if any line break
181 : // is due because of a closing tag. Setting it to "TRUE" while closing the tags.
182 : // Hence opening tags are guaranteed to start with appropriate line breaks.
183 : bool mLineBreakDue;
184 :
185 : bool mPreformattedBlockBoundary;
186 :
187 : // Whether the output should include ruby annotations.
188 : bool mWithRubyAnnotation;
189 :
190 : nsString mURL;
191 : int32_t mHeaderStrategy; /* Header strategy (pref)
192 : 0 = no indention
193 : 1 = indention, increased with
194 : header level (default)
195 : 2 = numbering and slight indention */
196 : int32_t mHeaderCounter[7]; /* For header-numbering:
197 : Number of previous headers of
198 : the same depth and in the same
199 : section.
200 : mHeaderCounter[1] for <h1> etc. */
201 :
202 : RefPtr<mozilla::dom::Element> mElement;
203 :
204 : // For handling table rows
205 : AutoTArray<bool, 8> mHasWrittenCellsForRow;
206 :
207 : // Values gotten in OpenContainer that is (also) needed in CloseContainer
208 : AutoTArray<bool, 8> mIsInCiteBlockquote;
209 :
210 : // The output data
211 : nsAString* mOutputString;
212 :
213 : // The tag stack: the stack of tags we're operating on, so we can nest.
214 : // The stack only ever points to static atoms, so they don't need to be
215 : // refcounted.
216 : nsIAtom** mTagStack;
217 : uint32_t mTagStackIndex;
218 :
219 : // The stack indicating whether the elements we've been operating on are
220 : // CSS preformatted elements, so that we can tell if the text inside them
221 : // should be formatted.
222 : std::stack<bool> mPreformatStack;
223 :
224 : // Content in the stack above this index should be ignored:
225 : uint32_t mIgnoreAboveIndex;
226 :
227 : // The stack for ordered lists
228 : int32_t *mOLStack;
229 : uint32_t mOLStackIndex;
230 :
231 : uint32_t mULCount;
232 :
233 : nsString mLineBreak;
234 : nsCOMPtr<nsILineBreaker> mLineBreaker;
235 :
236 : // Conveniance constant. It would be nice to have it as a const static
237 : // variable, but that causes issues with OpenBSD and module unloading.
238 : const nsString kSpace;
239 :
240 : // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, the child
241 : // nodes of specific nodes - <iframe>, <canvas>, etc. should be ignored.
242 : // mIgnoredChildNodeLevel is used to tell if current node is an ignorable
243 : // child node. The initial value of mIgnoredChildNodeLevel is 0. When
244 : // serializer enters those specific nodes, mIgnoredChildNodeLevel increases
245 : // and is greater than 0. Otherwise when serializer leaves those nodes,
246 : // mIgnoredChildNodeLevel decreases.
247 : uint32_t mIgnoredChildNodeLevel;
248 : };
249 :
250 : nsresult
251 : NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer);
252 :
253 : #endif
|