/src/mozilla-central/dom/base/nsPlainTextSerializer.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | /* |
8 | | * nsIContentSerializer implementation that can be used with an |
9 | | * nsIDocumentEncoder to convert a DOM into plaintext in a nice way |
10 | | * (eg for copy/paste as plaintext). |
11 | | */ |
12 | | |
13 | | #ifndef nsPlainTextSerializer_h__ |
14 | | #define nsPlainTextSerializer_h__ |
15 | | |
16 | | #include "mozilla/Attributes.h" |
17 | | #include "mozilla/intl/LineBreaker.h" |
18 | | #include "nsCOMPtr.h" |
19 | | #include "nsAtom.h" |
20 | | #include "nsCycleCollectionParticipant.h" |
21 | | #include "nsIContentSerializer.h" |
22 | | #include "nsIDocumentEncoder.h" |
23 | | #include "nsString.h" |
24 | | #include "nsTArray.h" |
25 | | |
26 | | #include <stack> |
27 | | |
28 | | class nsIContent; |
29 | | |
30 | | namespace mozilla { |
31 | | namespace dom { |
32 | | class DocumentType; |
33 | | class Element; |
34 | | } // namespace dom |
35 | | } // namespace mozilla |
36 | | |
37 | | class nsPlainTextSerializer final : public nsIContentSerializer |
38 | | { |
39 | | public: |
40 | | nsPlainTextSerializer(); |
41 | | |
42 | | NS_DECL_CYCLE_COLLECTING_ISUPPORTS |
43 | | NS_DECL_CYCLE_COLLECTION_CLASS(nsPlainTextSerializer) |
44 | | |
45 | | // nsIContentSerializer |
46 | | NS_IMETHOD Init(uint32_t flags, |
47 | | uint32_t aWrapColumn, |
48 | | const mozilla::Encoding* aEncoding, |
49 | | bool aIsCopying, |
50 | | bool aIsWholeDocument, |
51 | | bool* aNeedsPreformatScanning) override; |
52 | | |
53 | | NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset, |
54 | | int32_t aEndOffset, nsAString& aStr) override; |
55 | | NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection, |
56 | | int32_t aStartOffset, int32_t aEndOffset, |
57 | | nsAString& aStr) override; |
58 | | NS_IMETHOD AppendProcessingInstruction(mozilla::dom::ProcessingInstruction* aPI, |
59 | | int32_t aStartOffset, |
60 | | int32_t aEndOffset, |
61 | | nsAString& aStr) override |
62 | 0 | { |
63 | 0 | return NS_OK; |
64 | 0 | } |
65 | | NS_IMETHOD AppendComment(mozilla::dom::Comment* aComment, |
66 | | int32_t aStartOffset, |
67 | | int32_t aEndOffset, nsAString& aStr) override |
68 | 0 | { |
69 | 0 | return NS_OK; |
70 | 0 | } |
71 | | NS_IMETHOD AppendDoctype(mozilla::dom::DocumentType* aDoctype, |
72 | 0 | nsAString& aStr) override { return NS_OK; } |
73 | | NS_IMETHOD AppendElementStart(mozilla::dom::Element* aElement, |
74 | | mozilla::dom::Element* aOriginalElement, |
75 | | nsAString& aStr) override; |
76 | | NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement, |
77 | | nsAString& aStr) override; |
78 | | NS_IMETHOD Flush(nsAString& aStr) override; |
79 | | |
80 | | NS_IMETHOD AppendDocumentStart(nsIDocument *aDocument, |
81 | | nsAString& aStr) override; |
82 | | |
83 | | NS_IMETHOD ScanElementForPreformat(mozilla::dom::Element* aElement) override; |
84 | | NS_IMETHOD ForgetElementForPreformat(mozilla::dom::Element* aElement) override; |
85 | | |
86 | | private: |
87 | | ~nsPlainTextSerializer(); |
88 | | |
89 | | nsresult GetAttributeValue(nsAtom* aName, nsString& aValueRet); |
90 | | void AddToLine(const char16_t* aStringToAdd, int32_t aLength); |
91 | | void EndLine(bool softlinebreak, bool aBreakBySpace = false); |
92 | | void EnsureVerticalSpace(int32_t noOfRows); |
93 | | void FlushLine(); |
94 | | void OutputQuotesAndIndent(bool stripTrailingSpaces=false); |
95 | | void Output(nsString& aString); |
96 | | void Write(const nsAString& aString); |
97 | | bool IsInPre(); |
98 | | bool IsInOL(); |
99 | | bool IsCurrentNodeConverted(); |
100 | | bool MustSuppressLeaf(); |
101 | | |
102 | | /** |
103 | | * Returns the local name of the element as an atom if the element is an |
104 | | * HTML element and the atom is a static atom. Otherwise, nullptr is returned. |
105 | | */ |
106 | | static nsAtom* GetIdForContent(nsIContent* aContent); |
107 | | nsresult DoOpenContainer(nsAtom* aTag); |
108 | | nsresult DoCloseContainer(nsAtom* aTag); |
109 | | nsresult DoAddLeaf(nsAtom* aTag); |
110 | | void DoAddText(bool aIsWhitespace, const nsAString& aText); |
111 | | |
112 | | // Inlined functions |
113 | | inline bool MayWrap() |
114 | 0 | { |
115 | 0 | return mWrapColumn && |
116 | 0 | ((mFlags & nsIDocumentEncoder::OutputFormatted) || |
117 | 0 | (mFlags & nsIDocumentEncoder::OutputWrap)); |
118 | 0 | } |
119 | | inline bool MayBreakLines() |
120 | 0 | { |
121 | 0 | return !(mFlags & nsIDocumentEncoder::OutputDisallowLineBreaking); |
122 | 0 | } |
123 | | |
124 | | inline bool DoOutput() |
125 | 0 | { |
126 | 0 | return mHeadLevel == 0; |
127 | 0 | } |
128 | | |
129 | | inline bool IsQuotedLine(const nsAString& aLine) |
130 | 0 | { |
131 | 0 | return !aLine.IsEmpty() && aLine.First() == char16_t('>'); |
132 | 0 | } |
133 | | |
134 | | // Stack handling functions |
135 | | bool GetLastBool(const nsTArray<bool>& aStack); |
136 | | void SetLastBool(nsTArray<bool>& aStack, bool aValue); |
137 | | void PushBool(nsTArray<bool>& aStack, bool aValue); |
138 | | bool PopBool(nsTArray<bool>& aStack); |
139 | | |
140 | | bool ShouldReplaceContainerWithPlaceholder(nsAtom* aTag); |
141 | | bool IsIgnorableRubyAnnotation(nsAtom* aTag); |
142 | | |
143 | | bool IsElementPreformatted(mozilla::dom::Element* aElement); |
144 | | bool IsElementBlock(mozilla::dom::Element* aElement); |
145 | | |
146 | | private: |
147 | | nsString mCurrentLine; |
148 | | uint32_t mHeadLevel; |
149 | | bool mAtFirstColumn; |
150 | | |
151 | | bool mStructs; // Output structs (pref) |
152 | | |
153 | | // If we've just written out a cite blockquote, we need to remember it |
154 | | // so we don't duplicate spaces before a <pre wrap> (which mail uses to quote |
155 | | // old messages). |
156 | | bool mHasWrittenCiteBlockquote; |
157 | | |
158 | | int32_t mIndent; |
159 | | // mInIndentString keeps a header that has to be written in the indent. |
160 | | // That could be, for instance, the bullet in a bulleted list. |
161 | | nsString mInIndentString; |
162 | | int32_t mCiteQuoteLevel; |
163 | | int32_t mFlags; |
164 | | int32_t mFloatingLines; // To store the number of lazy line breaks |
165 | | |
166 | | // The wrap column is how many standard sized chars (western languages) |
167 | | // should be allowed on a line. There could be less chars if the chars |
168 | | // are wider than latin chars of more if the chars are more narrow. |
169 | | uint32_t mWrapColumn; |
170 | | |
171 | | // The width of the line as it will appear on the screen (approx.) |
172 | | uint32_t mCurrentLineWidth; |
173 | | |
174 | | // Treat quoted text as though it's preformatted -- don't wrap it. |
175 | | // Having it on a pref is a temporary measure, See bug 69638. |
176 | | int32_t mSpanLevel; |
177 | | |
178 | | |
179 | | int32_t mEmptyLines; // Will be the number of empty lines before |
180 | | // the current. 0 if we are starting a new |
181 | | // line and -1 if we are in a line. |
182 | | |
183 | | bool mInWhitespace; |
184 | | bool mPreFormattedMail; // we're dealing with special DOM |
185 | | // used by Thunderbird code. |
186 | | bool mStartedOutput; // we've produced at least a character |
187 | | |
188 | | // While handling a new tag, this variable should remind if any line break |
189 | | // is due because of a closing tag. Setting it to "TRUE" while closing the tags. |
190 | | // Hence opening tags are guaranteed to start with appropriate line breaks. |
191 | | bool mLineBreakDue; |
192 | | |
193 | | bool mPreformattedBlockBoundary; |
194 | | |
195 | | // Whether the output should include ruby annotations. |
196 | | bool mWithRubyAnnotation; |
197 | | |
198 | | nsString mURL; |
199 | | int32_t mHeaderStrategy; /* Header strategy (pref) |
200 | | 0 = no indention |
201 | | 1 = indention, increased with |
202 | | header level (default) |
203 | | 2 = numbering and slight indention */ |
204 | | int32_t mHeaderCounter[7]; /* For header-numbering: |
205 | | Number of previous headers of |
206 | | the same depth and in the same |
207 | | section. |
208 | | mHeaderCounter[1] for <h1> etc. */ |
209 | | |
210 | | RefPtr<mozilla::dom::Element> mElement; |
211 | | |
212 | | // For handling table rows |
213 | | AutoTArray<bool, 8> mHasWrittenCellsForRow; |
214 | | |
215 | | // Values gotten in OpenContainer that is (also) needed in CloseContainer |
216 | | AutoTArray<bool, 8> mIsInCiteBlockquote; |
217 | | |
218 | | // The output data |
219 | | nsAString* mOutputString; |
220 | | |
221 | | // The tag stack: the stack of tags we're operating on, so we can nest. |
222 | | // The stack only ever points to static atoms, so they don't need to be |
223 | | // refcounted. |
224 | | nsAtom** mTagStack; |
225 | | uint32_t mTagStackIndex; |
226 | | |
227 | | // The stack indicating whether the elements we've been operating on are |
228 | | // CSS preformatted elements, so that we can tell if the text inside them |
229 | | // should be formatted. |
230 | | std::stack<bool> mPreformatStack; |
231 | | |
232 | | // Content in the stack above this index should be ignored: |
233 | | uint32_t mIgnoreAboveIndex; |
234 | | |
235 | | // The stack for ordered lists |
236 | | int32_t *mOLStack; |
237 | | uint32_t mOLStackIndex; |
238 | | |
239 | | uint32_t mULCount; |
240 | | |
241 | | nsString mLineBreak; |
242 | | RefPtr<mozilla::intl::LineBreaker> mLineBreaker; |
243 | | |
244 | | // Conveniance constant. It would be nice to have it as a const static |
245 | | // variable, but that causes issues with OpenBSD and module unloading. |
246 | | const nsString kSpace; |
247 | | |
248 | | // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, the child |
249 | | // nodes of specific nodes - <iframe>, <canvas>, etc. should be ignored. |
250 | | // mIgnoredChildNodeLevel is used to tell if current node is an ignorable |
251 | | // child node. The initial value of mIgnoredChildNodeLevel is 0. When |
252 | | // serializer enters those specific nodes, mIgnoredChildNodeLevel increases |
253 | | // and is greater than 0. Otherwise when serializer leaves those nodes, |
254 | | // mIgnoredChildNodeLevel decreases. |
255 | | uint32_t mIgnoredChildNodeLevel; |
256 | | }; |
257 | | |
258 | | nsresult |
259 | | NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer); |
260 | | |
261 | | #endif |