Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/dom/base/nsPlainTextSerializer.h
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
/*
8
 * nsIContentSerializer implementation that can be used with an
9
 * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
10
 * (eg for copy/paste as plaintext).
11
 */
12
13
#ifndef nsPlainTextSerializer_h__
14
#define nsPlainTextSerializer_h__
15
16
#include "mozilla/Attributes.h"
17
#include "mozilla/intl/LineBreaker.h"
18
#include "nsCOMPtr.h"
19
#include "nsAtom.h"
20
#include "nsCycleCollectionParticipant.h"
21
#include "nsIContentSerializer.h"
22
#include "nsIDocumentEncoder.h"
23
#include "nsString.h"
24
#include "nsTArray.h"
25
26
#include <stack>
27
28
class nsIContent;
29
30
namespace mozilla {
31
namespace dom {
32
class DocumentType;
33
class Element;
34
} // namespace dom
35
} // namespace mozilla
36
37
class nsPlainTextSerializer final : public nsIContentSerializer
38
{
39
public:
40
  nsPlainTextSerializer();
41
42
  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
43
  NS_DECL_CYCLE_COLLECTION_CLASS(nsPlainTextSerializer)
44
45
  // nsIContentSerializer
46
  NS_IMETHOD Init(uint32_t flags,
47
                  uint32_t aWrapColumn,
48
                  const mozilla::Encoding* aEncoding,
49
                  bool aIsCopying,
50
                  bool aIsWholeDocument,
51
                  bool* aNeedsPreformatScanning) override;
52
53
  NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset,
54
                        int32_t aEndOffset, nsAString& aStr) override;
55
  NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection,
56
                                int32_t aStartOffset, int32_t aEndOffset,
57
                                nsAString& aStr) override;
58
  NS_IMETHOD AppendProcessingInstruction(mozilla::dom::ProcessingInstruction* aPI,
59
                                         int32_t aStartOffset,
60
                                         int32_t aEndOffset,
61
                                         nsAString& aStr) override
62
0
  {
63
0
    return NS_OK;
64
0
  }
65
  NS_IMETHOD AppendComment(mozilla::dom::Comment* aComment,
66
                           int32_t aStartOffset,
67
                           int32_t aEndOffset, nsAString& aStr) override
68
0
  {
69
0
    return NS_OK;
70
0
  }
71
  NS_IMETHOD AppendDoctype(mozilla::dom::DocumentType* aDoctype,
72
0
                           nsAString& aStr) override  { return NS_OK; }
73
  NS_IMETHOD AppendElementStart(mozilla::dom::Element* aElement,
74
                                mozilla::dom::Element* aOriginalElement,
75
                                nsAString& aStr) override;
76
  NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
77
                              nsAString& aStr) override;
78
  NS_IMETHOD Flush(nsAString& aStr) override;
79
80
  NS_IMETHOD AppendDocumentStart(nsIDocument *aDocument,
81
                                 nsAString& aStr) override;
82
83
  NS_IMETHOD ScanElementForPreformat(mozilla::dom::Element* aElement) override;
84
  NS_IMETHOD ForgetElementForPreformat(mozilla::dom::Element* aElement) override;
85
86
private:
87
  ~nsPlainTextSerializer();
88
89
  nsresult GetAttributeValue(nsAtom* aName, nsString& aValueRet);
90
  void AddToLine(const char16_t* aStringToAdd, int32_t aLength);
91
  void EndLine(bool softlinebreak, bool aBreakBySpace = false);
92
  void EnsureVerticalSpace(int32_t noOfRows);
93
  void FlushLine();
94
  void OutputQuotesAndIndent(bool stripTrailingSpaces=false);
95
  void Output(nsString& aString);
96
  void Write(const nsAString& aString);
97
  bool IsInPre();
98
  bool IsInOL();
99
  bool IsCurrentNodeConverted();
100
  bool MustSuppressLeaf();
101
102
  /**
103
   * Returns the local name of the element as an atom if the element is an
104
   * HTML element and the atom is a static atom. Otherwise, nullptr is returned.
105
   */
106
  static nsAtom* GetIdForContent(nsIContent* aContent);
107
  nsresult DoOpenContainer(nsAtom* aTag);
108
  nsresult DoCloseContainer(nsAtom* aTag);
109
  nsresult DoAddLeaf(nsAtom* aTag);
110
  void DoAddText(bool aIsWhitespace, const nsAString& aText);
111
112
  // Inlined functions
113
  inline bool MayWrap()
114
0
  {
115
0
    return mWrapColumn &&
116
0
      ((mFlags & nsIDocumentEncoder::OutputFormatted) ||
117
0
       (mFlags & nsIDocumentEncoder::OutputWrap));
118
0
  }
119
  inline bool MayBreakLines()
120
0
  {
121
0
    return !(mFlags & nsIDocumentEncoder::OutputDisallowLineBreaking);
122
0
  }
123
124
  inline bool DoOutput()
125
0
  {
126
0
    return mHeadLevel == 0;
127
0
  }
128
129
  inline bool IsQuotedLine(const nsAString& aLine)
130
0
  {
131
0
    return !aLine.IsEmpty() && aLine.First() == char16_t('>');
132
0
  }
133
134
  // Stack handling functions
135
  bool GetLastBool(const nsTArray<bool>& aStack);
136
  void SetLastBool(nsTArray<bool>& aStack, bool aValue);
137
  void PushBool(nsTArray<bool>& aStack, bool aValue);
138
  bool PopBool(nsTArray<bool>& aStack);
139
140
  bool ShouldReplaceContainerWithPlaceholder(nsAtom* aTag);
141
  bool IsIgnorableRubyAnnotation(nsAtom* aTag);
142
143
  bool IsElementPreformatted(mozilla::dom::Element* aElement);
144
  bool IsElementBlock(mozilla::dom::Element* aElement);
145
146
private:
147
  nsString         mCurrentLine;
148
  uint32_t         mHeadLevel;
149
  bool             mAtFirstColumn;
150
151
  bool             mStructs;            // Output structs (pref)
152
153
  // If we've just written out a cite blockquote, we need to remember it
154
  // so we don't duplicate spaces before a <pre wrap> (which mail uses to quote
155
  // old messages).
156
  bool             mHasWrittenCiteBlockquote;
157
158
  int32_t          mIndent;
159
  // mInIndentString keeps a header that has to be written in the indent.
160
  // That could be, for instance, the bullet in a bulleted list.
161
  nsString         mInIndentString;
162
  int32_t          mCiteQuoteLevel;
163
  int32_t          mFlags;
164
  int32_t          mFloatingLines; // To store the number of lazy line breaks
165
166
  // The wrap column is how many standard sized chars (western languages)
167
  // should be allowed on a line. There could be less chars if the chars
168
  // are wider than latin chars of more if the chars are more narrow.
169
  uint32_t         mWrapColumn;
170
171
  // The width of the line as it will appear on the screen (approx.)
172
  uint32_t         mCurrentLineWidth;
173
174
  // Treat quoted text as though it's preformatted -- don't wrap it.
175
  // Having it on a pref is a temporary measure, See bug 69638.
176
  int32_t          mSpanLevel;
177
178
179
  int32_t          mEmptyLines; // Will be the number of empty lines before
180
                                // the current. 0 if we are starting a new
181
                                // line and -1 if we are in a line.
182
183
  bool             mInWhitespace;
184
  bool             mPreFormattedMail; // we're dealing with special DOM
185
                                      // used by Thunderbird code.
186
  bool             mStartedOutput; // we've produced at least a character
187
188
  // While handling a new tag, this variable should remind if any line break
189
  // is due because of a closing tag. Setting it to "TRUE" while closing the tags.
190
  // Hence opening tags are guaranteed to start with appropriate line breaks.
191
  bool             mLineBreakDue;
192
193
  bool             mPreformattedBlockBoundary;
194
195
  // Whether the output should include ruby annotations.
196
  bool             mWithRubyAnnotation;
197
198
  nsString         mURL;
199
  int32_t          mHeaderStrategy;    /* Header strategy (pref)
200
                                          0 = no indention
201
                                          1 = indention, increased with
202
                                              header level (default)
203
                                          2 = numbering and slight indention */
204
  int32_t          mHeaderCounter[7];  /* For header-numbering:
205
                                          Number of previous headers of
206
                                          the same depth and in the same
207
                                          section.
208
                                          mHeaderCounter[1] for <h1> etc. */
209
210
  RefPtr<mozilla::dom::Element> mElement;
211
212
  // For handling table rows
213
  AutoTArray<bool, 8> mHasWrittenCellsForRow;
214
215
  // Values gotten in OpenContainer that is (also) needed in CloseContainer
216
  AutoTArray<bool, 8> mIsInCiteBlockquote;
217
218
  // The output data
219
  nsAString*            mOutputString;
220
221
  // The tag stack: the stack of tags we're operating on, so we can nest.
222
  // The stack only ever points to static atoms, so they don't need to be
223
  // refcounted.
224
  nsAtom**        mTagStack;
225
  uint32_t         mTagStackIndex;
226
227
  // The stack indicating whether the elements we've been operating on are
228
  // CSS preformatted elements, so that we can tell if the text inside them
229
  // should be formatted.
230
  std::stack<bool> mPreformatStack;
231
232
  // Content in the stack above this index should be ignored:
233
  uint32_t          mIgnoreAboveIndex;
234
235
  // The stack for ordered lists
236
  int32_t         *mOLStack;
237
  uint32_t         mOLStackIndex;
238
239
  uint32_t         mULCount;
240
241
  nsString                     mLineBreak;
242
  RefPtr<mozilla::intl::LineBreaker> mLineBreaker;
243
244
  // Conveniance constant. It would be nice to have it as a const static
245
  // variable, but that causes issues with OpenBSD and module unloading.
246
  const nsString          kSpace;
247
248
  // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, the child
249
  // nodes of specific nodes - <iframe>, <canvas>, etc. should be ignored.
250
  // mIgnoredChildNodeLevel is used to tell if current node is an ignorable
251
  // child node. The initial value of mIgnoredChildNodeLevel is 0. When
252
  // serializer enters those specific nodes, mIgnoredChildNodeLevel increases
253
  // and is greater than 0. Otherwise when serializer leaves those nodes,
254
  // mIgnoredChildNodeLevel decreases.
255
  uint32_t mIgnoredChildNodeLevel;
256
};
257
258
nsresult
259
NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer);
260
261
#endif