Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/dom/base/nsPlainTextSerializer.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
/*
8
 * nsIContentSerializer implementation that can be used with an
9
 * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
10
 * (eg for copy/paste as plaintext).
11
 */
12
13
#include "nsPlainTextSerializer.h"
14
#include "nsIServiceManager.h"
15
#include "nsGkAtoms.h"
16
#include "nsNameSpaceManager.h"
17
#include "nsTextFragment.h"
18
#include "nsContentUtils.h"
19
#include "nsReadableUtils.h"
20
#include "nsUnicharUtils.h"
21
#include "nsCRT.h"
22
#include "mozilla/dom/Element.h"
23
#include "mozilla/Preferences.h"
24
#include "mozilla/BinarySearch.h"
25
#include "nsComputedDOMStyle.h"
26
27
namespace mozilla {
28
class Encoding;
29
}
30
31
using namespace mozilla;
32
using namespace mozilla::dom;
33
34
0
#define PREF_STRUCTS "converter.html2txt.structs"
35
0
#define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
36
0
#define PREF_ALWAYS_INCLUDE_RUBY "converter.html2txt.always_include_ruby"
37
38
static const  int32_t kTabSize=4;
39
static const  int32_t kIndentSizeHeaders = 2;  /* Indention of h1, if
40
                                                mHeaderStrategy = 1 or = 2.
41
                                                Indention of other headers
42
                                                is derived from that.
43
                                                XXX center h1? */
44
static const  int32_t kIndentIncrementHeaders = 2;  /* If mHeaderStrategy = 1,
45
                                                indent h(x+1) this many
46
                                                columns more than h(x) */
47
static const  int32_t kIndentSizeList = kTabSize;
48
                               // Indention of non-first lines of ul and ol
49
static const  int32_t kIndentSizeDD = kTabSize;  // Indention of <dd>
50
static const  char16_t  kNBSP = 160;
51
static const  char16_t kSPACE = ' ';
52
53
static int32_t HeaderLevel(nsAtom* aTag);
54
static int32_t GetUnicharWidth(char16_t ucs);
55
static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n);
56
57
// Someday may want to make this non-const:
58
static const uint32_t TagStackSize = 500;
59
static const uint32_t OLStackSize = 100;
60
61
static bool gPreferenceInitialized = false;
62
static bool gAlwaysIncludeRuby = false;
63
64
NS_IMPL_CYCLE_COLLECTING_ADDREF(nsPlainTextSerializer)
65
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsPlainTextSerializer)
66
67
0
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsPlainTextSerializer)
68
0
  NS_INTERFACE_MAP_ENTRY(nsIContentSerializer)
69
0
  NS_INTERFACE_MAP_ENTRY(nsISupports)
70
0
NS_INTERFACE_MAP_END
71
72
NS_IMPL_CYCLE_COLLECTION(nsPlainTextSerializer,
73
                         mElement)
74
75
nsresult
76
NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer)
77
0
{
78
0
  RefPtr<nsPlainTextSerializer> it = new nsPlainTextSerializer();
79
0
  it.forget(aSerializer);
80
0
  return NS_OK;
81
0
}
82
83
nsPlainTextSerializer::nsPlainTextSerializer()
84
  : mFlags(0)
85
  , mFloatingLines(-1)
86
  , mLineBreakDue(false)
87
  , kSpace(NS_LITERAL_STRING(" ")) // Init of "constant"
88
0
{
89
0
90
0
  mOutputString = nullptr;
91
0
  mHeadLevel = 0;
92
0
  mAtFirstColumn = true;
93
0
  mIndent = 0;
94
0
  mCiteQuoteLevel = 0;
95
0
  mStructs = true;       // will be read from prefs later
96
0
  mHeaderStrategy = 1 /*indent increasingly*/;   // ditto
97
0
  mHasWrittenCiteBlockquote = false;
98
0
  mSpanLevel = 0;
99
0
  for (int32_t i = 0; i <= 6; i++) {
100
0
    mHeaderCounter[i] = 0;
101
0
  }
102
0
103
0
  // Line breaker
104
0
  mWrapColumn = 72;     // XXX magic number, we expect someone to reset this
105
0
  mCurrentLineWidth = 0;
106
0
107
0
  // Flow
108
0
  mEmptyLines = 1; // The start of the document is an "empty line" in itself,
109
0
  mInWhitespace = false;
110
0
  mPreFormattedMail = false;
111
0
  mStartedOutput = false;
112
0
113
0
  mPreformattedBlockBoundary = false;
114
0
  mWithRubyAnnotation = false;  // will be read from pref and flag later
115
0
116
0
  // initialize the tag stack to zero:
117
0
  // The stack only ever contains pointers to static atoms, so they don't
118
0
  // need refcounting.
119
0
  mTagStack = new nsAtom*[TagStackSize];
120
0
  mTagStackIndex = 0;
121
0
  mIgnoreAboveIndex = (uint32_t)kNotFound;
122
0
123
0
  // initialize the OL stack, where numbers for ordered lists are kept
124
0
  mOLStack = new int32_t[OLStackSize];
125
0
  mOLStackIndex = 0;
126
0
127
0
  mULCount = 0;
128
0
129
0
  mIgnoredChildNodeLevel = 0;
130
0
131
0
  if (!gPreferenceInitialized) {
132
0
    Preferences::AddBoolVarCache(&gAlwaysIncludeRuby, PREF_ALWAYS_INCLUDE_RUBY,
133
0
                                 true);
134
0
    gPreferenceInitialized = true;
135
0
  }
136
0
}
137
138
nsPlainTextSerializer::~nsPlainTextSerializer()
139
0
{
140
0
  delete[] mTagStack;
141
0
  delete[] mOLStack;
142
0
  NS_WARNING_ASSERTION(mHeadLevel == 0, "Wrong head level!");
143
0
}
144
145
NS_IMETHODIMP
146
nsPlainTextSerializer::Init(uint32_t aFlags,
147
                            uint32_t aWrapColumn,
148
                            const Encoding* aEncoding,
149
                            bool aIsCopying,
150
                            bool aIsWholeDocument,
151
                            bool* aNeedsPreformatScanning)
152
0
{
153
#ifdef DEBUG
154
  // Check if the major control flags are set correctly.
155
  if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
156
    NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
157
                 "If you want format=flowed, you must combine it with "
158
                 "nsIDocumentEncoder::OutputFormatted");
159
  }
160
161
  if (aFlags & nsIDocumentEncoder::OutputFormatted) {
162
    NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted),
163
                 "Can't do formatted and preformatted output at the same time!");
164
  }
165
#endif
166
167
0
  *aNeedsPreformatScanning = true;
168
0
  mFlags = aFlags;
169
0
  mWrapColumn = aWrapColumn;
170
0
171
0
  // Only create a linebreaker if we will handle wrapping.
172
0
  if (MayWrap() && MayBreakLines()) {
173
0
    mLineBreaker = nsContentUtils::LineBreaker();
174
0
  }
175
0
176
0
  // Set the line break character:
177
0
  if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
178
0
      && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
179
0
    // Windows
180
0
    mLineBreak.AssignLiteral("\r\n");
181
0
  }
182
0
  else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) {
183
0
    // Mac
184
0
    mLineBreak.Assign(char16_t('\r'));
185
0
  }
186
0
  else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) {
187
0
    // Unix/DOM
188
0
    mLineBreak.Assign(char16_t('\n'));
189
0
  }
190
0
  else {
191
0
    // Platform/default
192
0
    mLineBreak.AssignLiteral(NS_LINEBREAK);
193
0
  }
194
0
195
0
  mLineBreakDue = false;
196
0
  mFloatingLines = -1;
197
0
198
0
  mPreformattedBlockBoundary = false;
199
0
200
0
  if (mFlags & nsIDocumentEncoder::OutputFormatted) {
201
0
    // Get some prefs that controls how we do formatted output
202
0
    mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
203
0
204
0
    mHeaderStrategy =
205
0
      Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy);
206
0
  }
207
0
208
0
  // The pref is default inited to false in libpref, but we use true
209
0
  // as fallback value because we don't want to affect behavior in
210
0
  // other places which use this serializer currently.
211
0
  mWithRubyAnnotation =
212
0
    gAlwaysIncludeRuby ||
213
0
    (mFlags & nsIDocumentEncoder::OutputRubyAnnotation);
214
0
215
0
  // XXX We should let the caller decide whether to do this or not
216
0
  mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
217
0
218
0
  return NS_OK;
219
0
}
220
221
bool
222
nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack)
223
0
{
224
0
  uint32_t size = aStack.Length();
225
0
  if (size == 0) {
226
0
    return false;
227
0
  }
228
0
  return aStack.ElementAt(size-1);
229
0
}
230
231
void
232
nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue)
233
0
{
234
0
  uint32_t size = aStack.Length();
235
0
  if (size > 0) {
236
0
    aStack.ElementAt(size-1) = aValue;
237
0
  }
238
0
  else {
239
0
    NS_ERROR("There is no \"Last\" value");
240
0
  }
241
0
}
242
243
void
244
nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue)
245
0
{
246
0
    aStack.AppendElement(bool(aValue));
247
0
}
248
249
bool
250
nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack)
251
0
{
252
0
  bool returnValue = false;
253
0
  uint32_t size = aStack.Length();
254
0
  if (size > 0) {
255
0
    returnValue = aStack.ElementAt(size-1);
256
0
    aStack.RemoveElementAt(size-1);
257
0
  }
258
0
  return returnValue;
259
0
}
260
261
bool
262
nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsAtom* aTag)
263
0
{
264
0
  // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set,
265
0
  // non-textual container element should be serialized as placeholder
266
0
  // character and its child nodes should be ignored. See bug 895239.
267
0
  if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) {
268
0
    return false;
269
0
  }
270
0
271
0
  return
272
0
    (aTag == nsGkAtoms::audio) ||
273
0
    (aTag == nsGkAtoms::canvas) ||
274
0
    (aTag == nsGkAtoms::iframe) ||
275
0
    (aTag == nsGkAtoms::meter) ||
276
0
    (aTag == nsGkAtoms::progress) ||
277
0
    (aTag == nsGkAtoms::object) ||
278
0
    (aTag == nsGkAtoms::svg) ||
279
0
    (aTag == nsGkAtoms::video);
280
0
}
281
282
bool
283
nsPlainTextSerializer::IsIgnorableRubyAnnotation(nsAtom* aTag)
284
0
{
285
0
  if (mWithRubyAnnotation) {
286
0
    return false;
287
0
  }
288
0
289
0
  return
290
0
    aTag == nsGkAtoms::rp ||
291
0
    aTag == nsGkAtoms::rt ||
292
0
    aTag == nsGkAtoms::rtc;
293
0
}
294
295
NS_IMETHODIMP
296
nsPlainTextSerializer::AppendText(nsIContent* aText,
297
                                  int32_t aStartOffset,
298
                                  int32_t aEndOffset,
299
                                  nsAString& aStr)
300
0
{
301
0
  if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
302
0
    return NS_OK;
303
0
  }
304
0
305
0
  NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
306
0
  if ( aStartOffset < 0 )
307
0
    return NS_ERROR_INVALID_ARG;
308
0
309
0
  NS_ENSURE_ARG(aText);
310
0
311
0
  nsresult rv = NS_OK;
312
0
313
0
  nsIContent* content = aText;
314
0
  const nsTextFragment* frag;
315
0
  if (!content || !(frag = content->GetText())) {
316
0
    return NS_ERROR_FAILURE;
317
0
  }
318
0
319
0
  int32_t fragLength = frag->GetLength();
320
0
  int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
321
0
  NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!");
322
0
323
0
  int32_t length = endoffset - aStartOffset;
324
0
  if (length <= 0) {
325
0
    return NS_OK;
326
0
  }
327
0
328
0
  nsAutoString textstr;
329
0
  if (frag->Is2b()) {
330
0
    textstr.Assign(frag->Get2b() + aStartOffset, length);
331
0
  }
332
0
  else {
333
0
    // AssignASCII is for 7-bit character only, so don't use it
334
0
    const char *data = frag->Get1b();
335
0
    CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
336
0
  }
337
0
338
0
  mOutputString = &aStr;
339
0
340
0
  // We have to split the string across newlines
341
0
  // to match parser behavior
342
0
  int32_t start = 0;
343
0
  int32_t offset = textstr.FindCharInSet("\n\r");
344
0
  while (offset != kNotFound) {
345
0
346
0
    if (offset>start) {
347
0
      // Pass in the line
348
0
      DoAddText(false,
349
0
                Substring(textstr, start, offset-start));
350
0
    }
351
0
352
0
    // Pass in a newline
353
0
    DoAddText(true, mLineBreak);
354
0
355
0
    start = offset+1;
356
0
    offset = textstr.FindCharInSet("\n\r", start);
357
0
  }
358
0
359
0
  // Consume the last bit of the string if there's any left
360
0
  if (start < length) {
361
0
    if (start) {
362
0
      DoAddText(false, Substring(textstr, start, length - start));
363
0
    }
364
0
    else {
365
0
      DoAddText(false, textstr);
366
0
    }
367
0
  }
368
0
369
0
  mOutputString = nullptr;
370
0
371
0
  return rv;
372
0
}
373
374
NS_IMETHODIMP
375
nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
376
                                          int32_t aStartOffset,
377
                                          int32_t aEndOffset,
378
                                          nsAString& aStr)
379
0
{
380
0
  return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr);
381
0
}
382
383
NS_IMETHODIMP
384
nsPlainTextSerializer::ScanElementForPreformat(Element* aElement)
385
0
{
386
0
  mPreformatStack.push(IsElementPreformatted(aElement));
387
0
  return NS_OK;
388
0
}
389
390
NS_IMETHODIMP
391
nsPlainTextSerializer::ForgetElementForPreformat(Element* aElement)
392
0
{
393
0
  MOZ_RELEASE_ASSERT(!mPreformatStack.empty(), "Tried to pop without previous push.");
394
0
  mPreformatStack.pop();
395
0
  return NS_OK;
396
0
}
397
398
NS_IMETHODIMP
399
nsPlainTextSerializer::AppendElementStart(Element* aElement,
400
                                          Element* aOriginalElement,
401
                                          nsAString& aStr)
402
0
{
403
0
  NS_ENSURE_ARG(aElement);
404
0
405
0
  mElement = aElement;
406
0
407
0
  nsresult rv;
408
0
  nsAtom* id = GetIdForContent(mElement);
409
0
410
0
  bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
411
0
412
0
  mOutputString = &aStr;
413
0
414
0
  if (isContainer) {
415
0
    rv = DoOpenContainer(id);
416
0
  }
417
0
  else {
418
0
    rv = DoAddLeaf(id);
419
0
  }
420
0
421
0
  mElement = nullptr;
422
0
  mOutputString = nullptr;
423
0
424
0
  if (id == nsGkAtoms::head) {
425
0
    ++mHeadLevel;
426
0
  }
427
0
428
0
  return rv;
429
0
}
430
431
NS_IMETHODIMP
432
nsPlainTextSerializer::AppendElementEnd(Element* aElement,
433
                                        nsAString& aStr)
434
0
{
435
0
  NS_ENSURE_ARG(aElement);
436
0
437
0
  mElement = aElement;
438
0
439
0
  nsresult rv;
440
0
  nsAtom* id = GetIdForContent(mElement);
441
0
442
0
  bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
443
0
444
0
  mOutputString = &aStr;
445
0
446
0
  rv = NS_OK;
447
0
  if (isContainer) {
448
0
    rv = DoCloseContainer(id);
449
0
  }
450
0
451
0
  mElement = nullptr;
452
0
  mOutputString = nullptr;
453
0
454
0
  if (id == nsGkAtoms::head) {
455
0
    NS_ASSERTION(mHeadLevel != 0,
456
0
                 "mHeadLevel being decremented below 0");
457
0
    --mHeadLevel;
458
0
  }
459
0
460
0
  return rv;
461
0
}
462
463
NS_IMETHODIMP
464
nsPlainTextSerializer::Flush(nsAString& aStr)
465
0
{
466
0
  mOutputString = &aStr;
467
0
  FlushLine();
468
0
  mOutputString = nullptr;
469
0
  return NS_OK;
470
0
}
471
472
NS_IMETHODIMP
473
nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument,
474
                                           nsAString& aStr)
475
0
{
476
0
  return NS_OK;
477
0
}
478
479
nsresult
480
nsPlainTextSerializer::DoOpenContainer(nsAtom* aTag)
481
0
{
482
0
  // Check if we need output current node as placeholder character and ignore
483
0
  // child nodes.
484
0
  if (ShouldReplaceContainerWithPlaceholder(mElement->NodeInfo()->NameAtom())) {
485
0
    if (mIgnoredChildNodeLevel == 0) {
486
0
      // Serialize current node as placeholder character
487
0
      Write(NS_LITERAL_STRING(u"\xFFFC"));
488
0
    }
489
0
    // Ignore child nodes.
490
0
    mIgnoredChildNodeLevel++;
491
0
    return NS_OK;
492
0
  }
493
0
  if (IsIgnorableRubyAnnotation(aTag)) {
494
0
    // Ignorable ruby annotation shouldn't be replaced by a placeholder
495
0
    // character, neither any of its descendants.
496
0
    mIgnoredChildNodeLevel++;
497
0
    return NS_OK;
498
0
  }
499
0
500
0
  if (mFlags & nsIDocumentEncoder::OutputForPlainTextClipboardCopy) {
501
0
    if (mPreformattedBlockBoundary && DoOutput()) {
502
0
      // Should always end a line, but get no more whitespace
503
0
      if (mFloatingLines < 0)
504
0
        mFloatingLines = 0;
505
0
      mLineBreakDue = true;
506
0
    }
507
0
    mPreformattedBlockBoundary = false;
508
0
  }
509
0
510
0
  if (mFlags & nsIDocumentEncoder::OutputRaw) {
511
0
    // Raw means raw.  Don't even think about doing anything fancy
512
0
    // here like indenting, adding line breaks or any other
513
0
    // characters such as list item bullets, quote characters
514
0
    // around <q>, etc.  I mean it!  Don't make me smack you!
515
0
516
0
    return NS_OK;
517
0
  }
518
0
519
0
  if (mTagStackIndex < TagStackSize) {
520
0
    mTagStack[mTagStackIndex++] = aTag;
521
0
  }
522
0
523
0
  if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
524
0
    return NS_OK;
525
0
  }
526
0
527
0
  // Reset this so that <blockquote type=cite> doesn't affect the whitespace
528
0
  // above random <pre>s below it.
529
0
  mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote &&
530
0
                              aTag == nsGkAtoms::pre;
531
0
532
0
  bool isInCiteBlockquote = false;
533
0
534
0
  // XXX special-case <blockquote type=cite> so that we don't add additional
535
0
  // newlines before the text.
536
0
  if (aTag == nsGkAtoms::blockquote) {
537
0
    nsAutoString value;
538
0
    nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
539
0
    isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
540
0
  }
541
0
542
0
  if (mLineBreakDue && !isInCiteBlockquote)
543
0
    EnsureVerticalSpace(mFloatingLines);
544
0
545
0
  // Check if this tag's content that should not be output
546
0
  if ((aTag == nsGkAtoms::noscript &&
547
0
       !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
548
0
      ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
549
0
       !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
550
0
    // Ignore everything that follows the current tag in
551
0
    // question until a matching end tag is encountered.
552
0
    mIgnoreAboveIndex = mTagStackIndex - 1;
553
0
    return NS_OK;
554
0
  }
555
0
556
0
  if (aTag == nsGkAtoms::body) {
557
0
    // Try to figure out here whether we have a
558
0
    // preformatted style attribute set by Thunderbird.
559
0
    //
560
0
    // Trigger on the presence of a "pre-wrap" in the
561
0
    // style attribute. That's a very simplistic way to do
562
0
    // it, but better than nothing.
563
0
    // Also set mWrapColumn to the value given there
564
0
    // (which arguably we should only do if told to do so).
565
0
    nsAutoString style;
566
0
    int32_t whitespace;
567
0
    if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
568
0
       (kNotFound != (whitespace = style.Find("white-space:")))) {
569
0
570
0
      if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
571
#ifdef DEBUG_preformatted
572
        printf("Set mPreFormattedMail based on style pre-wrap\n");
573
#endif
574
        mPreFormattedMail = true;
575
0
        int32_t widthOffset = style.Find("width:");
576
0
        if (widthOffset >= 0) {
577
0
          // We have to search for the ch before the semicolon,
578
0
          // not for the semicolon itself, because nsString::ToInteger()
579
0
          // considers 'c' to be a valid numeric char (even if radix=10)
580
0
          // but then gets confused if it sees it next to the number
581
0
          // when the radix specified was 10, and returns an error code.
582
0
          int32_t semiOffset = style.Find("ch", false, widthOffset+6);
583
0
          int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6
584
0
                            : style.Length() - widthOffset);
585
0
          nsAutoString widthstr;
586
0
          style.Mid(widthstr, widthOffset+6, length);
587
0
          nsresult err;
588
0
          int32_t col = widthstr.ToInteger(&err);
589
0
590
0
          if (NS_SUCCEEDED(err)) {
591
0
            mWrapColumn = (uint32_t)col;
592
#ifdef DEBUG_preformatted
593
            printf("Set wrap column to %d based on style\n", mWrapColumn);
594
#endif
595
          }
596
0
        }
597
0
      }
598
0
      else if (kNotFound != style.Find("pre", true, whitespace)) {
599
#ifdef DEBUG_preformatted
600
        printf("Set mPreFormattedMail based on style pre\n");
601
#endif
602
        mPreFormattedMail = true;
603
0
        mWrapColumn = 0;
604
0
      }
605
0
    }
606
0
    else {
607
0
      /* See comment at end of function. */
608
0
      mInWhitespace = true;
609
0
      mPreFormattedMail = false;
610
0
    }
611
0
612
0
    return NS_OK;
613
0
  }
614
0
615
0
  // Keep this in sync with DoCloseContainer!
616
0
  if (!DoOutput()) {
617
0
    return NS_OK;
618
0
  }
619
0
620
0
  if (aTag == nsGkAtoms::p)
621
0
    EnsureVerticalSpace(1);
622
0
  else if (aTag == nsGkAtoms::pre) {
623
0
    if (GetLastBool(mIsInCiteBlockquote))
624
0
      EnsureVerticalSpace(0);
625
0
    else if (mHasWrittenCiteBlockquote) {
626
0
      EnsureVerticalSpace(0);
627
0
      mHasWrittenCiteBlockquote = false;
628
0
    }
629
0
    else
630
0
      EnsureVerticalSpace(1);
631
0
  }
632
0
  else if (aTag == nsGkAtoms::tr) {
633
0
    PushBool(mHasWrittenCellsForRow, false);
634
0
  }
635
0
  else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
636
0
    // We must make sure that the content of two table cells get a
637
0
    // space between them.
638
0
639
0
    // To make the separation between cells most obvious and
640
0
    // importable, we use a TAB.
641
0
    if (GetLastBool(mHasWrittenCellsForRow)) {
642
0
      // Bypass |Write| so that the TAB isn't compressed away.
643
0
      AddToLine(u"\t", 1);
644
0
      mInWhitespace = true;
645
0
    }
646
0
    else if (mHasWrittenCellsForRow.IsEmpty()) {
647
0
      // We don't always see a <tr> (nor a <table>) before the <td> if we're
648
0
      // copying part of a table
649
0
      PushBool(mHasWrittenCellsForRow, true); // will never be popped
650
0
    }
651
0
    else {
652
0
      SetLastBool(mHasWrittenCellsForRow, true);
653
0
    }
654
0
  }
655
0
  else if (aTag == nsGkAtoms::ul) {
656
0
    // Indent here to support nested lists, which aren't included in li :-(
657
0
    EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
658
0
         // Must end the current line before we change indention
659
0
    mIndent += kIndentSizeList;
660
0
    mULCount++;
661
0
  }
662
0
  else if (aTag == nsGkAtoms::ol) {
663
0
    EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
664
0
    if (mFlags & nsIDocumentEncoder::OutputFormatted) {
665
0
      // Must end the current line before we change indention
666
0
      if (mOLStackIndex < OLStackSize) {
667
0
        nsAutoString startAttr;
668
0
        int32_t startVal = 1;
669
0
        if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
670
0
          nsresult rv = NS_OK;
671
0
          startVal = startAttr.ToInteger(&rv);
672
0
          if (NS_FAILED(rv))
673
0
            startVal = 1;
674
0
        }
675
0
        mOLStack[mOLStackIndex++] = startVal;
676
0
      }
677
0
    } else {
678
0
      mOLStackIndex++;
679
0
    }
680
0
    mIndent += kIndentSizeList;  // see ul
681
0
  }
682
0
  else if (aTag == nsGkAtoms::li &&
683
0
           (mFlags & nsIDocumentEncoder::OutputFormatted)) {
684
0
    if (mTagStackIndex > 1 && IsInOL()) {
685
0
      if (mOLStackIndex > 0) {
686
0
        nsAutoString valueAttr;
687
0
        if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
688
0
          nsresult rv = NS_OK;
689
0
          int32_t valueAttrVal = valueAttr.ToInteger(&rv);
690
0
          if (NS_SUCCEEDED(rv))
691
0
            mOLStack[mOLStackIndex-1] = valueAttrVal;
692
0
        }
693
0
        // This is what nsBulletFrame does for OLs:
694
0
        mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
695
0
      }
696
0
      else {
697
0
        mInIndentString.Append(char16_t('#'));
698
0
      }
699
0
700
0
      mInIndentString.Append(char16_t('.'));
701
0
702
0
    }
703
0
    else {
704
0
      static const char bulletCharArray[] = "*o+#";
705
0
      uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
706
0
      char bulletChar = bulletCharArray[index % 4];
707
0
      mInIndentString.Append(char16_t(bulletChar));
708
0
    }
709
0
710
0
    mInIndentString.Append(char16_t(' '));
711
0
  }
712
0
  else if (aTag == nsGkAtoms::dl) {
713
0
    EnsureVerticalSpace(1);
714
0
  }
715
0
  else if (aTag == nsGkAtoms::dt) {
716
0
    EnsureVerticalSpace(0);
717
0
  }
718
0
  else if (aTag == nsGkAtoms::dd) {
719
0
    EnsureVerticalSpace(0);
720
0
    mIndent += kIndentSizeDD;
721
0
  }
722
0
  else if (aTag == nsGkAtoms::span) {
723
0
    ++mSpanLevel;
724
0
  }
725
0
  else if (aTag == nsGkAtoms::blockquote) {
726
0
    // Push
727
0
    PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
728
0
    if (isInCiteBlockquote) {
729
0
      EnsureVerticalSpace(0);
730
0
      mCiteQuoteLevel++;
731
0
    }
732
0
    else {
733
0
      EnsureVerticalSpace(1);
734
0
      mIndent += kTabSize; // Check for some maximum value?
735
0
    }
736
0
  }
737
0
  else if (aTag == nsGkAtoms::q) {
738
0
    Write(NS_LITERAL_STRING("\""));
739
0
  }
740
0
741
0
  // Else make sure we'll separate block level tags,
742
0
  // even if we're about to leave, before doing any other formatting.
743
0
  else if (IsElementBlock(mElement)) {
744
0
    EnsureVerticalSpace(0);
745
0
  }
746
0
747
0
  //////////////////////////////////////////////////////////////
748
0
  if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
749
0
    return NS_OK;
750
0
  }
751
0
  //////////////////////////////////////////////////////////////
752
0
  // The rest of this routine is formatted output stuff,
753
0
  // which we should skip if we're not formatted:
754
0
  //////////////////////////////////////////////////////////////
755
0
756
0
  // Push on stack
757
0
  bool currentNodeIsConverted = IsCurrentNodeConverted();
758
0
759
0
  if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
760
0
      aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
761
0
      aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6)
762
0
  {
763
0
    EnsureVerticalSpace(2);
764
0
    if (mHeaderStrategy == 2) {  // numbered
765
0
      mIndent += kIndentSizeHeaders;
766
0
      // Caching
767
0
      int32_t level = HeaderLevel(aTag);
768
0
      // Increase counter for current level
769
0
      mHeaderCounter[level]++;
770
0
      // Reset all lower levels
771
0
      int32_t i;
772
0
773
0
      for (i = level + 1; i <= 6; i++) {
774
0
        mHeaderCounter[i] = 0;
775
0
      }
776
0
777
0
      // Construct numbers
778
0
      nsAutoString leadup;
779
0
      for (i = 1; i <= level; i++) {
780
0
        leadup.AppendInt(mHeaderCounter[i]);
781
0
        leadup.Append(char16_t('.'));
782
0
      }
783
0
      leadup.Append(char16_t(' '));
784
0
      Write(leadup);
785
0
    }
786
0
    else if (mHeaderStrategy == 1) { // indent increasingly
787
0
      mIndent += kIndentSizeHeaders;
788
0
      for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
789
0
           // for h(x), run x-1 times
790
0
        mIndent += kIndentIncrementHeaders;
791
0
      }
792
0
    }
793
0
  }
794
0
  else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
795
0
    nsAutoString url;
796
0
    if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url))
797
0
        && !url.IsEmpty()) {
798
0
      mURL = url;
799
0
    }
800
0
  }
801
0
  else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) {
802
0
    Write(NS_LITERAL_STRING("^"));
803
0
  }
804
0
  else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) {
805
0
    Write(NS_LITERAL_STRING("_"));
806
0
  }
807
0
  else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
808
0
    Write(NS_LITERAL_STRING("|"));
809
0
  }
810
0
  else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
811
0
           && mStructs && !currentNodeIsConverted) {
812
0
    Write(NS_LITERAL_STRING("*"));
813
0
  }
814
0
  else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
815
0
           && mStructs && !currentNodeIsConverted) {
816
0
    Write(NS_LITERAL_STRING("/"));
817
0
  }
818
0
  else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
819
0
    Write(NS_LITERAL_STRING("_"));
820
0
  }
821
0
822
0
  /* Container elements are always block elements, so we shouldn't
823
0
     output any whitespace immediately after the container tag even if
824
0
     there's extra whitespace there because the HTML is pretty-printed
825
0
     or something. To ensure that happens, tell the serializer we're
826
0
     already in whitespace so it won't output more. */
827
0
  mInWhitespace = true;
828
0
829
0
  return NS_OK;
830
0
}
831
832
nsresult
833
nsPlainTextSerializer::DoCloseContainer(nsAtom* aTag)
834
0
{
835
0
  if (ShouldReplaceContainerWithPlaceholder(mElement->NodeInfo()->NameAtom())) {
836
0
    mIgnoredChildNodeLevel--;
837
0
    return NS_OK;
838
0
  }
839
0
  if (IsIgnorableRubyAnnotation(aTag)) {
840
0
    mIgnoredChildNodeLevel--;
841
0
    return NS_OK;
842
0
  }
843
0
844
0
  if (mFlags & nsIDocumentEncoder::OutputForPlainTextClipboardCopy) {
845
0
    if (DoOutput() && IsInPre() && IsElementBlock(mElement)) {
846
0
      // If we're closing a preformatted block element, output a line break
847
0
      // when we find a new container.
848
0
      mPreformattedBlockBoundary = true;
849
0
    }
850
0
  }
851
0
852
0
  if (mFlags & nsIDocumentEncoder::OutputRaw) {
853
0
    // Raw means raw.  Don't even think about doing anything fancy
854
0
    // here like indenting, adding line breaks or any other
855
0
    // characters such as list item bullets, quote characters
856
0
    // around <q>, etc.  I mean it!  Don't make me smack you!
857
0
858
0
    return NS_OK;
859
0
  }
860
0
861
0
  if (mTagStackIndex > 0) {
862
0
    --mTagStackIndex;
863
0
  }
864
0
865
0
  if (mTagStackIndex >= mIgnoreAboveIndex) {
866
0
    if (mTagStackIndex == mIgnoreAboveIndex) {
867
0
      // We're dealing with the close tag whose matching
868
0
      // open tag had set the mIgnoreAboveIndex value.
869
0
      // Reset mIgnoreAboveIndex before discarding this tag.
870
0
      mIgnoreAboveIndex = (uint32_t)kNotFound;
871
0
    }
872
0
    return NS_OK;
873
0
  }
874
0
875
0
  // End current line if we're ending a block level tag
876
0
  if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
877
0
    // We want the output to end with a new line,
878
0
    // but in preformatted areas like text fields,
879
0
    // we can't emit newlines that weren't there.
880
0
    // So add the newline only in the case of formatted output.
881
0
    if (mFlags & nsIDocumentEncoder::OutputFormatted) {
882
0
      EnsureVerticalSpace(0);
883
0
    }
884
0
    else {
885
0
      FlushLine();
886
0
    }
887
0
    // We won't want to do anything with these in formatted mode either,
888
0
    // so just return now:
889
0
    return NS_OK;
890
0
  }
891
0
892
0
  // Keep this in sync with DoOpenContainer!
893
0
  if (!DoOutput()) {
894
0
    return NS_OK;
895
0
  }
896
0
897
0
  if (aTag == nsGkAtoms::tr) {
898
0
    PopBool(mHasWrittenCellsForRow);
899
0
    // Should always end a line, but get no more whitespace
900
0
    if (mFloatingLines < 0)
901
0
      mFloatingLines = 0;
902
0
    mLineBreakDue = true;
903
0
  }
904
0
  else if (((aTag == nsGkAtoms::li) ||
905
0
            (aTag == nsGkAtoms::dt)) &&
906
0
           (mFlags & nsIDocumentEncoder::OutputFormatted)) {
907
0
    // Items that should always end a line, but get no more whitespace
908
0
    if (mFloatingLines < 0)
909
0
      mFloatingLines = 0;
910
0
    mLineBreakDue = true;
911
0
  }
912
0
  else if (aTag == nsGkAtoms::pre) {
913
0
    mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
914
0
    mLineBreakDue = true;
915
0
  }
916
0
  else if (aTag == nsGkAtoms::ul) {
917
0
    FlushLine();
918
0
    mIndent -= kIndentSizeList;
919
0
    if (--mULCount + mOLStackIndex == 0) {
920
0
      mFloatingLines = 1;
921
0
      mLineBreakDue = true;
922
0
    }
923
0
  }
924
0
  else if (aTag == nsGkAtoms::ol) {
925
0
    FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
926
0
    mIndent -= kIndentSizeList;
927
0
    NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!");
928
0
    mOLStackIndex--;
929
0
    if (mULCount + mOLStackIndex == 0) {
930
0
      mFloatingLines = 1;
931
0
      mLineBreakDue = true;
932
0
    }
933
0
  }
934
0
  else if (aTag == nsGkAtoms::dl) {
935
0
    mFloatingLines = 1;
936
0
    mLineBreakDue = true;
937
0
  }
938
0
  else if (aTag == nsGkAtoms::dd) {
939
0
    FlushLine();
940
0
    mIndent -= kIndentSizeDD;
941
0
  }
942
0
  else if (aTag == nsGkAtoms::span) {
943
0
    NS_ASSERTION(mSpanLevel, "Span level will be negative!");
944
0
    --mSpanLevel;
945
0
  }
946
0
  else if (aTag == nsGkAtoms::div) {
947
0
    if (mFloatingLines < 0)
948
0
      mFloatingLines = 0;
949
0
    mLineBreakDue = true;
950
0
  }
951
0
  else if (aTag == nsGkAtoms::blockquote) {
952
0
    FlushLine();    // Is this needed?
953
0
954
0
    // Pop
955
0
    bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
956
0
957
0
    if (isInCiteBlockquote) {
958
0
      NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!");
959
0
      mCiteQuoteLevel--;
960
0
      mFloatingLines = 0;
961
0
      mHasWrittenCiteBlockquote = true;
962
0
    }
963
0
    else {
964
0
      mIndent -= kTabSize;
965
0
      mFloatingLines = 1;
966
0
    }
967
0
    mLineBreakDue = true;
968
0
  }
969
0
  else if (aTag == nsGkAtoms::q) {
970
0
    Write(NS_LITERAL_STRING("\""));
971
0
  }
972
0
  else if (IsElementBlock(mElement) && aTag != nsGkAtoms::script) {
973
0
    // All other blocks get 1 vertical space after them
974
0
    // in formatted mode, otherwise 0.
975
0
    // This is hard. Sometimes 0 is a better number, but
976
0
    // how to know?
977
0
    if (mFlags & nsIDocumentEncoder::OutputFormatted)
978
0
      EnsureVerticalSpace(1);
979
0
    else {
980
0
      if (mFloatingLines < 0)
981
0
        mFloatingLines = 0;
982
0
      mLineBreakDue = true;
983
0
    }
984
0
  }
985
0
986
0
  //////////////////////////////////////////////////////////////
987
0
  if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
988
0
    return NS_OK;
989
0
  }
990
0
  //////////////////////////////////////////////////////////////
991
0
  // The rest of this routine is formatted output stuff,
992
0
  // which we should skip if we're not formatted:
993
0
  //////////////////////////////////////////////////////////////
994
0
995
0
  // Pop the currentConverted stack
996
0
  bool currentNodeIsConverted = IsCurrentNodeConverted();
997
0
998
0
  if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
999
0
      aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
1000
0
      aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
1001
0
1002
0
    if (mHeaderStrategy) {  /*numbered or indent increasingly*/
1003
0
      mIndent -= kIndentSizeHeaders;
1004
0
    }
1005
0
    if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
1006
0
      for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
1007
0
           // for h(x), run x-1 times
1008
0
        mIndent -= kIndentIncrementHeaders;
1009
0
      }
1010
0
    }
1011
0
    EnsureVerticalSpace(1);
1012
0
  }
1013
0
  else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) {
1014
0
    nsAutoString temp;
1015
0
    temp.AssignLiteral(" <");
1016
0
    temp += mURL;
1017
0
    temp.Append(char16_t('>'));
1018
0
    Write(temp);
1019
0
    mURL.Truncate();
1020
0
  }
1021
0
  else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub)
1022
0
           && mStructs && !currentNodeIsConverted) {
1023
0
    Write(kSpace);
1024
0
  }
1025
0
  else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
1026
0
    Write(NS_LITERAL_STRING("|"));
1027
0
  }
1028
0
  else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
1029
0
           && mStructs && !currentNodeIsConverted) {
1030
0
    Write(NS_LITERAL_STRING("*"));
1031
0
  }
1032
0
  else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
1033
0
           && mStructs && !currentNodeIsConverted) {
1034
0
    Write(NS_LITERAL_STRING("/"));
1035
0
  }
1036
0
  else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
1037
0
    Write(NS_LITERAL_STRING("_"));
1038
0
  }
1039
0
1040
0
  return NS_OK;
1041
0
}
1042
1043
bool
1044
nsPlainTextSerializer::MustSuppressLeaf()
1045
0
{
1046
0
  if (mIgnoredChildNodeLevel > 0) {
1047
0
    return true;
1048
0
  }
1049
0
1050
0
  if ((mTagStackIndex > 1 &&
1051
0
       mTagStack[mTagStackIndex-2] == nsGkAtoms::select) ||
1052
0
      (mTagStackIndex > 0 &&
1053
0
        mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) {
1054
0
    // Don't output the contents of SELECT elements;
1055
0
    // Might be nice, eventually, to output just the selected element.
1056
0
    // Read more in bug 31994.
1057
0
    return true;
1058
0
  }
1059
0
1060
0
  if (mTagStackIndex > 0 &&
1061
0
      (mTagStack[mTagStackIndex-1] == nsGkAtoms::script ||
1062
0
       mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) {
1063
0
    // Don't output the contents of <script> or <style> tags;
1064
0
    return true;
1065
0
  }
1066
0
1067
0
  return false;
1068
0
}
1069
1070
void
1071
nsPlainTextSerializer::DoAddText(bool aIsLineBreak, const nsAString& aText)
1072
0
{
1073
0
  // If we don't want any output, just return
1074
0
  if (!DoOutput()) {
1075
0
    return;
1076
0
  }
1077
0
1078
0
  if (!aIsLineBreak) {
1079
0
    // Make sure to reset this, since it's no longer true.
1080
0
    mHasWrittenCiteBlockquote = false;
1081
0
  }
1082
0
1083
0
  if (mLineBreakDue)
1084
0
    EnsureVerticalSpace(mFloatingLines);
1085
0
1086
0
  if (MustSuppressLeaf()) {
1087
0
    return;
1088
0
  }
1089
0
1090
0
  if (aIsLineBreak) {
1091
0
    // The only times we want to pass along whitespace from the original
1092
0
    // html source are if we're forced into preformatted mode via flags,
1093
0
    // or if we're prettyprinting and we're inside a <pre>.
1094
0
    // Otherwise, either we're collapsing to minimal text, or we're
1095
0
    // prettyprinting to mimic the html format, and in neither case
1096
0
    // does the formatting of the html source help us.
1097
0
    if ((mFlags & nsIDocumentEncoder::OutputPreformatted) ||
1098
0
        (mPreFormattedMail && !mWrapColumn) ||
1099
0
        IsInPre()) {
1100
0
      EnsureVerticalSpace(mEmptyLines+1);
1101
0
    }
1102
0
    else if (!mInWhitespace) {
1103
0
      Write(kSpace);
1104
0
      mInWhitespace = true;
1105
0
    }
1106
0
    return;
1107
0
  }
1108
0
1109
0
  /* Check, if we are in a link (symbolized with mURL containing the URL)
1110
0
     and the text is equal to the URL. In that case we don't want to output
1111
0
     the URL twice so we scrap the text in mURL. */
1112
0
  if (!mURL.IsEmpty() && mURL.Equals(aText)) {
1113
0
    mURL.Truncate();
1114
0
  }
1115
0
  Write(aText);
1116
0
}
1117
1118
nsresult
1119
nsPlainTextSerializer::DoAddLeaf(nsAtom* aTag)
1120
0
{
1121
0
  mPreformattedBlockBoundary = false;
1122
0
1123
0
  // If we don't want any output, just return
1124
0
  if (!DoOutput()) {
1125
0
    return NS_OK;
1126
0
  }
1127
0
1128
0
  if (mLineBreakDue)
1129
0
    EnsureVerticalSpace(mFloatingLines);
1130
0
1131
0
  if (MustSuppressLeaf()) {
1132
0
    return NS_OK;
1133
0
  }
1134
0
1135
0
  if (aTag == nsGkAtoms::br) {
1136
0
    // Another egregious editor workaround, see bug 38194:
1137
0
    // ignore the bogus br tags that the editor sticks here and there.
1138
0
    nsAutoString tagAttr;
1139
0
    if (NS_FAILED(GetAttributeValue(nsGkAtoms::type, tagAttr))
1140
0
        || !tagAttr.EqualsLiteral("_moz")) {
1141
0
      EnsureVerticalSpace(mEmptyLines+1);
1142
0
    }
1143
0
  }
1144
0
  else if (aTag == nsGkAtoms::hr &&
1145
0
           (mFlags & nsIDocumentEncoder::OutputFormatted)) {
1146
0
    EnsureVerticalSpace(0);
1147
0
1148
0
    // Make a line of dashes as wide as the wrap width
1149
0
    // XXX honoring percentage would be nice
1150
0
    nsAutoString line;
1151
0
    uint32_t width = (mWrapColumn > 0 ? mWrapColumn : 25);
1152
0
    while (line.Length() < width) {
1153
0
      line.Append(char16_t('-'));
1154
0
    }
1155
0
    Write(line);
1156
0
1157
0
    EnsureVerticalSpace(0);
1158
0
  }
1159
0
  else if (mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder) {
1160
0
    Write(NS_LITERAL_STRING(u"\xFFFC"));
1161
0
  }
1162
0
  else if (aTag == nsGkAtoms::img) {
1163
0
    /* Output (in decreasing order of preference)
1164
0
       alt, title or nothing */
1165
0
    // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
1166
0
    nsAutoString imageDescription;
1167
0
    if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt,
1168
0
                                       imageDescription))) {
1169
0
      // If the alt attribute has an empty value (|alt=""|), output nothing
1170
0
    }
1171
0
    else if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::title,
1172
0
                                            imageDescription))
1173
0
             && !imageDescription.IsEmpty()) {
1174
0
      imageDescription = NS_LITERAL_STRING(" [") +
1175
0
                         imageDescription +
1176
0
                         NS_LITERAL_STRING("] ");
1177
0
    }
1178
0
1179
0
    Write(imageDescription);
1180
0
  }
1181
0
1182
0
  return NS_OK;
1183
0
}
1184
1185
/**
1186
 * Adds as many newline as necessary to get |noOfRows| empty lines
1187
 *
1188
 * noOfRows = -1    :   Being in the middle of some line of text
1189
 * noOfRows =  0    :   Being at the start of a line
1190
 * noOfRows =  n>0  :   Having n empty lines before the current line.
1191
 */
1192
void
1193
nsPlainTextSerializer::EnsureVerticalSpace(int32_t noOfRows)
1194
0
{
1195
0
  // If we have something in the indent we probably want to output
1196
0
  // it and it's not included in the count for empty lines so we don't
1197
0
  // realize that we should start a new line.
1198
0
  if (noOfRows >= 0 && !mInIndentString.IsEmpty()) {
1199
0
    EndLine(false);
1200
0
    mInWhitespace = true;
1201
0
  }
1202
0
1203
0
  while(mEmptyLines < noOfRows) {
1204
0
    EndLine(false);
1205
0
    mInWhitespace = true;
1206
0
  }
1207
0
  mLineBreakDue = false;
1208
0
  mFloatingLines = -1;
1209
0
}
1210
1211
/**
1212
 * This empties the current line cache without adding a NEWLINE.
1213
 * Should not be used if line wrapping is of importance since
1214
 * this function destroys the cache information.
1215
 *
1216
 * It will also write indentation and quotes if we believe us to be
1217
 * at the start of the line.
1218
 */
1219
void
1220
nsPlainTextSerializer::FlushLine()
1221
0
{
1222
0
  if (!mCurrentLine.IsEmpty()) {
1223
0
    if (mAtFirstColumn) {
1224
0
      OutputQuotesAndIndent(); // XXX: Should we always do this? Bug?
1225
0
    }
1226
0
1227
0
    Output(mCurrentLine);
1228
0
    mAtFirstColumn = mAtFirstColumn && mCurrentLine.IsEmpty();
1229
0
    mCurrentLine.Truncate();
1230
0
    mCurrentLineWidth = 0;
1231
0
  }
1232
0
}
1233
1234
/**
1235
 * Prints the text to output to our current output device (the string mOutputString).
1236
 * The only logic here is to replace non breaking spaces with a normal space since
1237
 * most (all?) receivers of the result won't understand the nbsp and even be
1238
 * confused by it.
1239
 */
1240
void
1241
nsPlainTextSerializer::Output(nsString& aString)
1242
0
{
1243
0
  if (!aString.IsEmpty()) {
1244
0
    mStartedOutput = true;
1245
0
  }
1246
0
1247
0
  if (!(mFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
1248
0
    // First, replace all nbsp characters with spaces,
1249
0
    // which the unicode encoder won't do for us.
1250
0
    aString.ReplaceChar(kNBSP, kSPACE);
1251
0
  }
1252
0
  mOutputString->Append(aString);
1253
0
}
1254
1255
static bool
1256
IsSpaceStuffable(const char16_t *s)
1257
0
{
1258
0
  if (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
1259
0
      NS_strncmp(s, u"From ", 5) == 0)
1260
0
    return true;
1261
0
  else
1262
0
    return false;
1263
0
}
1264
1265
/**
1266
 * This function adds a piece of text to the current stored line. If we are
1267
 * wrapping text and the stored line will become too long, a suitable
1268
 * location to wrap will be found and the line that's complete will be
1269
 * output.
1270
 */
1271
void
1272
nsPlainTextSerializer::AddToLine(const char16_t * aLineFragment,
1273
                                 int32_t aLineFragmentLength)
1274
0
{
1275
0
  uint32_t prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent;
1276
0
1277
0
  if (mLineBreakDue)
1278
0
    EnsureVerticalSpace(mFloatingLines);
1279
0
1280
0
  int32_t linelength = mCurrentLine.Length();
1281
0
  if (0 == linelength) {
1282
0
    if (0 == aLineFragmentLength) {
1283
0
      // Nothing at all. Are you kidding me?
1284
0
      return;
1285
0
    }
1286
0
1287
0
    if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1288
0
      if (IsSpaceStuffable(aLineFragment)
1289
0
         && mCiteQuoteLevel == 0  // We space-stuff quoted lines anyway
1290
0
         )
1291
0
        {
1292
0
          // Space stuffing a la RFC 2646 (format=flowed).
1293
0
          mCurrentLine.Append(char16_t(' '));
1294
0
1295
0
          if (MayWrap()) {
1296
0
            mCurrentLineWidth += GetUnicharWidth(' ');
1297
#ifdef DEBUG_wrapping
1298
            NS_ASSERTION(GetUnicharStringWidth(mCurrentLine.get(),
1299
                                               mCurrentLine.Length()) ==
1300
                         (int32_t)mCurrentLineWidth,
1301
                         "mCurrentLineWidth and reality out of sync!");
1302
#endif
1303
          }
1304
0
        }
1305
0
    }
1306
0
    mEmptyLines=-1;
1307
0
  }
1308
0
1309
0
  mCurrentLine.Append(aLineFragment, aLineFragmentLength);
1310
0
  if (MayWrap()) {
1311
0
    mCurrentLineWidth += GetUnicharStringWidth(aLineFragment,
1312
0
                                               aLineFragmentLength);
1313
#ifdef DEBUG_wrapping
1314
    NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
1315
                                       mCurrentLine.Length()) ==
1316
                 (int32_t)mCurrentLineWidth,
1317
                 "mCurrentLineWidth and reality out of sync!");
1318
#endif
1319
  }
1320
0
1321
0
  linelength = mCurrentLine.Length();
1322
0
1323
0
  //  Wrap?
1324
0
  if (MayWrap())
1325
0
  {
1326
#ifdef DEBUG_wrapping
1327
    NS_ASSERTION(GetUnicharstringWidth(mCurrentLine.get(),
1328
                                  mCurrentLine.Length()) ==
1329
                 (int32_t)mCurrentLineWidth,
1330
                 "mCurrentLineWidth and reality out of sync!");
1331
#endif
1332
    // Yes, wrap!
1333
0
    // The "+4" is to avoid wrap lines that only would be a couple
1334
0
    // of letters too long. We give this bonus only if the
1335
0
    // wrapcolumn is more than 20.
1336
0
    uint32_t bonuswidth = (mWrapColumn > 20) ? 4 : 0;
1337
0
1338
0
    // XXX: Should calculate prefixwidth with GetUnicharStringWidth
1339
0
    while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) {
1340
0
      // We go from the end removing one letter at a time until
1341
0
      // we have a reasonable width
1342
0
      int32_t goodSpace = mCurrentLine.Length();
1343
0
      uint32_t width = mCurrentLineWidth;
1344
0
      while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) {
1345
0
        goodSpace--;
1346
0
        width -= GetUnicharWidth(mCurrentLine[goodSpace]);
1347
0
      }
1348
0
1349
0
      goodSpace++;
1350
0
1351
0
      if (mLineBreaker) {
1352
0
        goodSpace = mLineBreaker->Prev(mCurrentLine.get(),
1353
0
                                    mCurrentLine.Length(), goodSpace);
1354
0
        if (goodSpace != NS_LINEBREAKER_NEED_MORE_TEXT &&
1355
0
            nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) {
1356
0
          --goodSpace;    // adjust the position since line breaker returns a position next to space
1357
0
        }
1358
0
      }
1359
0
      // fallback if the line breaker is unavailable or failed
1360
0
      if (!mLineBreaker) {
1361
0
        if (mCurrentLine.IsEmpty() || mWrapColumn < prefixwidth) {
1362
0
          goodSpace = NS_LINEBREAKER_NEED_MORE_TEXT;
1363
0
        } else {
1364
0
          goodSpace = std::min(mWrapColumn - prefixwidth, mCurrentLine.Length() - 1);
1365
0
          while (goodSpace >= 0 &&
1366
0
                 !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1367
0
            goodSpace--;
1368
0
          }
1369
0
        }
1370
0
      }
1371
0
1372
0
      nsAutoString restOfLine;
1373
0
      if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT) {
1374
0
        // If we didn't find a good place to break, accept long line and
1375
0
        // try to find another place to break
1376
0
        goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1;
1377
0
        if (mLineBreaker) {
1378
0
          if ((uint32_t)goodSpace < mCurrentLine.Length())
1379
0
            goodSpace = mLineBreaker->Next(mCurrentLine.get(),
1380
0
                                           mCurrentLine.Length(), goodSpace);
1381
0
          if (goodSpace == NS_LINEBREAKER_NEED_MORE_TEXT)
1382
0
            goodSpace = mCurrentLine.Length();
1383
0
        }
1384
0
        // fallback if the line breaker is unavailable or failed
1385
0
        if (!mLineBreaker) {
1386
0
          goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth;
1387
0
          while (goodSpace < linelength &&
1388
0
                 !nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1389
0
            goodSpace++;
1390
0
          }
1391
0
        }
1392
0
      }
1393
0
1394
0
      if ((goodSpace < linelength) && (goodSpace > 0)) {
1395
0
        // Found a place to break
1396
0
1397
0
        // -1 (trim a char at the break position)
1398
0
        // only if the line break was a space.
1399
0
        if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
1400
0
          mCurrentLine.Right(restOfLine, linelength-goodSpace-1);
1401
0
        }
1402
0
        else {
1403
0
          mCurrentLine.Right(restOfLine, linelength-goodSpace);
1404
0
        }
1405
0
        // if breaker was U+0020, it has to consider for delsp=yes support
1406
0
        bool breakBySpace = mCurrentLine.CharAt(goodSpace) == ' ';
1407
0
        mCurrentLine.Truncate(goodSpace);
1408
0
        EndLine(true, breakBySpace);
1409
0
        mCurrentLine.Truncate();
1410
0
        // Space stuff new line?
1411
0
        if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1412
0
          if (!restOfLine.IsEmpty() && IsSpaceStuffable(restOfLine.get())
1413
0
              && mCiteQuoteLevel == 0  // We space-stuff quoted lines anyway
1414
0
            )
1415
0
          {
1416
0
            // Space stuffing a la RFC 2646 (format=flowed).
1417
0
            mCurrentLine.Append(char16_t(' '));
1418
0
            //XXX doesn't seem to work correctly for ' '
1419
0
          }
1420
0
        }
1421
0
        mCurrentLine.Append(restOfLine);
1422
0
        mCurrentLineWidth = GetUnicharStringWidth(mCurrentLine.get(),
1423
0
                                                  mCurrentLine.Length());
1424
0
        linelength = mCurrentLine.Length();
1425
0
        mEmptyLines = -1;
1426
0
      }
1427
0
      else {
1428
0
        // Nothing to do. Hopefully we get more data later
1429
0
        // to use for a place to break line
1430
0
        break;
1431
0
      }
1432
0
    }
1433
0
  }
1434
0
  else {
1435
0
    // No wrapping.
1436
0
  }
1437
0
}
1438
1439
/**
1440
 * Outputs the contents of mCurrentLine, and resets line specific
1441
 * variables. Also adds an indentation and prefix if there is
1442
 * one specified. Strips ending spaces from the line if it isn't
1443
 * preformatted.
1444
 */
1445
void
1446
nsPlainTextSerializer::EndLine(bool aSoftlinebreak, bool aBreakBySpace)
1447
0
{
1448
0
  uint32_t currentlinelength = mCurrentLine.Length();
1449
0
1450
0
  if (aSoftlinebreak && 0 == currentlinelength) {
1451
0
    // No meaning
1452
0
    return;
1453
0
  }
1454
0
1455
0
  /* In non-preformatted mode, remove spaces from the end of the line for
1456
0
   * format=flowed compatibility. Don't do this for these special cases:
1457
0
   * "-- ", the signature separator (RFC 2646) shouldn't be touched and
1458
0
   * "- -- ", the OpenPGP dash-escaped signature separator in inline
1459
0
   * signed messages according to the OpenPGP standard (RFC 2440).
1460
0
   */
1461
0
  if (!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
1462
0
      !(mFlags & nsIDocumentEncoder::OutputDontRemoveLineEndingSpaces) &&
1463
0
     (aSoftlinebreak ||
1464
0
     !(mCurrentLine.EqualsLiteral("-- ") || mCurrentLine.EqualsLiteral("- -- ")))) {
1465
0
    // Remove spaces from the end of the line.
1466
0
    while(currentlinelength > 0 &&
1467
0
          mCurrentLine[currentlinelength-1] == ' ') {
1468
0
      --currentlinelength;
1469
0
    }
1470
0
    mCurrentLine.SetLength(currentlinelength);
1471
0
  }
1472
0
1473
0
  if (aSoftlinebreak &&
1474
0
     (mFlags & nsIDocumentEncoder::OutputFormatFlowed) &&
1475
0
     (mIndent == 0)) {
1476
0
    // Add the soft part of the soft linebreak (RFC 2646 4.1)
1477
0
    // We only do this when there is no indentation since format=flowed
1478
0
    // lines and indentation doesn't work well together.
1479
0
1480
0
    // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
1481
0
    // add twice space.
1482
0
    if ((mFlags & nsIDocumentEncoder::OutputFormatDelSp) && aBreakBySpace)
1483
0
      mCurrentLine.AppendLiteral("  ");
1484
0
    else
1485
0
      mCurrentLine.Append(char16_t(' '));
1486
0
  }
1487
0
1488
0
  if (aSoftlinebreak) {
1489
0
    mEmptyLines=0;
1490
0
  }
1491
0
  else {
1492
0
    // Hard break
1493
0
    if (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty()) {
1494
0
      mEmptyLines=-1;
1495
0
    }
1496
0
1497
0
    mEmptyLines++;
1498
0
  }
1499
0
1500
0
  if (mAtFirstColumn) {
1501
0
    // If we don't have anything "real" to output we have to
1502
0
    // make sure the indent doesn't end in a space since that
1503
0
    // would trick a format=flowed-aware receiver.
1504
0
    bool stripTrailingSpaces = mCurrentLine.IsEmpty();
1505
0
    OutputQuotesAndIndent(stripTrailingSpaces);
1506
0
  }
1507
0
1508
0
  mCurrentLine.Append(mLineBreak);
1509
0
  Output(mCurrentLine);
1510
0
  mCurrentLine.Truncate();
1511
0
  mCurrentLineWidth = 0;
1512
0
  mAtFirstColumn=true;
1513
0
  mInWhitespace=true;
1514
0
  mLineBreakDue = false;
1515
0
  mFloatingLines = -1;
1516
0
}
1517
1518
1519
/**
1520
 * Outputs the calculated and stored indent and text in the indentation. That is
1521
 * quote chars and numbers for numbered lists and such. It will also reset any
1522
 * stored text to put in the indentation after using it.
1523
 */
1524
void
1525
nsPlainTextSerializer::OutputQuotesAndIndent(bool stripTrailingSpaces /* = false */)
1526
0
{
1527
0
  nsAutoString stringToOutput;
1528
0
1529
0
  // Put the mail quote "> " chars in, if appropriate:
1530
0
  if (mCiteQuoteLevel > 0) {
1531
0
    nsAutoString quotes;
1532
0
    for(int i=0; i < mCiteQuoteLevel; i++) {
1533
0
      quotes.Append(char16_t('>'));
1534
0
    }
1535
0
    if (!mCurrentLine.IsEmpty()) {
1536
0
      /* Better don't output a space here, if the line is empty,
1537
0
         in case a receiving f=f-aware UA thinks, this were a flowed line,
1538
0
         which it isn't - it's just empty.
1539
0
         (Flowed lines may be joined with the following one,
1540
0
         so the empty line may be lost completely.) */
1541
0
      quotes.Append(char16_t(' '));
1542
0
    }
1543
0
    stringToOutput = quotes;
1544
0
    mAtFirstColumn = false;
1545
0
  }
1546
0
1547
0
  // Indent if necessary
1548
0
  int32_t indentwidth = mIndent - mInIndentString.Length();
1549
0
  if (indentwidth > 0
1550
0
      && (!mCurrentLine.IsEmpty() || !mInIndentString.IsEmpty())
1551
0
      // Don't make empty lines look flowed
1552
0
      ) {
1553
0
    nsAutoString spaces;
1554
0
    for (int i=0; i < indentwidth; ++i)
1555
0
      spaces.Append(char16_t(' '));
1556
0
    stringToOutput += spaces;
1557
0
    mAtFirstColumn = false;
1558
0
  }
1559
0
1560
0
  if (!mInIndentString.IsEmpty()) {
1561
0
    stringToOutput += mInIndentString;
1562
0
    mAtFirstColumn = false;
1563
0
    mInIndentString.Truncate();
1564
0
  }
1565
0
1566
0
  if (stripTrailingSpaces) {
1567
0
    int32_t lineLength = stringToOutput.Length();
1568
0
    while(lineLength > 0 &&
1569
0
          ' ' == stringToOutput[lineLength-1]) {
1570
0
      --lineLength;
1571
0
    }
1572
0
    stringToOutput.SetLength(lineLength);
1573
0
  }
1574
0
1575
0
  if (!stringToOutput.IsEmpty()) {
1576
0
    Output(stringToOutput);
1577
0
  }
1578
0
1579
0
}
1580
1581
/**
1582
 * Write a string. This is the highlevel function to use to get text output.
1583
 * By using AddToLine, Output, EndLine and other functions it handles quotation,
1584
 * line wrapping, indentation, whitespace compression and other things.
1585
 */
1586
void
1587
nsPlainTextSerializer::Write(const nsAString& aStr)
1588
0
{
1589
0
  // XXX Copy necessary to use nsString methods and gain
1590
0
  // access to underlying buffer
1591
0
  nsAutoString str(aStr);
1592
0
1593
#ifdef DEBUG_wrapping
1594
  printf("Write(%s): wrap col = %d\n",
1595
         NS_ConvertUTF16toUTF8(str).get(), mWrapColumn);
1596
#endif
1597
1598
0
  int32_t bol = 0;
1599
0
  int32_t newline;
1600
0
1601
0
  int32_t totLen = str.Length();
1602
0
1603
0
  // If the string is empty, do nothing:
1604
0
  if (totLen <= 0) return;
1605
0
1606
0
  // For Flowed text change nbsp-ses to spaces at end of lines to allow them
1607
0
  // to be cut off along with usual spaces if required. (bug #125928)
1608
0
  if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1609
0
    for (int32_t i = totLen-1; i >= 0; i--) {
1610
0
      char16_t c = str[i];
1611
0
      if ('\n' == c || '\r' == c || ' ' == c || '\t' == c)
1612
0
        continue;
1613
0
      if (kNBSP == c)
1614
0
        str.Replace(i, 1, ' ');
1615
0
      else
1616
0
        break;
1617
0
    }
1618
0
  }
1619
0
1620
0
  // We have two major codepaths here. One that does preformatted text and one
1621
0
  // that does normal formatted text. The one for preformatted text calls
1622
0
  // Output directly while the other code path goes through AddToLine.
1623
0
  if ((mPreFormattedMail && !mWrapColumn) || (IsInPre() && !mPreFormattedMail)
1624
0
      || (mSpanLevel > 0 && mEmptyLines >= 0 && IsQuotedLine(str))) {
1625
0
    // No intelligent wrapping.
1626
0
1627
0
    // This mustn't be mixed with intelligent wrapping without clearing
1628
0
    // the mCurrentLine buffer before!!!
1629
0
    NS_ASSERTION(mCurrentLine.IsEmpty() || (IsInPre() && !mPreFormattedMail),
1630
0
                 "Mixed wrapping data and nonwrapping data on the same line");
1631
0
    if (!mCurrentLine.IsEmpty()) {
1632
0
      FlushLine();
1633
0
    }
1634
0
1635
0
    // Put the mail quote "> " chars in, if appropriate.
1636
0
    // Have to put it in before every line.
1637
0
    while(bol<totLen) {
1638
0
      bool outputQuotes = mAtFirstColumn;
1639
0
      bool atFirstColumn;
1640
0
      bool outputLineBreak = false;
1641
0
      bool spacesOnly = true;
1642
0
1643
0
      // Find one of '\n' or '\r' using iterators since nsAString
1644
0
      // doesn't have the old FindCharInSet function.
1645
0
      nsAString::const_iterator iter;           str.BeginReading(iter);
1646
0
      nsAString::const_iterator done_searching; str.EndReading(done_searching);
1647
0
      iter.advance(bol);
1648
0
      int32_t new_newline = bol;
1649
0
      newline = kNotFound;
1650
0
      while(iter != done_searching) {
1651
0
        if ('\n' == *iter || '\r' == *iter) {
1652
0
          newline = new_newline;
1653
0
          break;
1654
0
        }
1655
0
        if (' ' != *iter)
1656
0
          spacesOnly = false;
1657
0
        ++new_newline;
1658
0
        ++iter;
1659
0
      }
1660
0
1661
0
      // Done searching
1662
0
      nsAutoString stringpart;
1663
0
      if (newline == kNotFound) {
1664
0
        // No new lines.
1665
0
        stringpart.Assign(Substring(str, bol, totLen - bol));
1666
0
        if (!stringpart.IsEmpty()) {
1667
0
          char16_t lastchar = stringpart[stringpart.Length()-1];
1668
0
          if ((lastchar == '\t') || (lastchar == ' ') ||
1669
0
             (lastchar == '\r') ||(lastchar == '\n')) {
1670
0
            mInWhitespace = true;
1671
0
          }
1672
0
          else {
1673
0
            mInWhitespace = false;
1674
0
          }
1675
0
        }
1676
0
        mEmptyLines=-1;
1677
0
        atFirstColumn = mAtFirstColumn && (totLen-bol)==0;
1678
0
        bol = totLen;
1679
0
      }
1680
0
      else {
1681
0
        // There is a newline
1682
0
        stringpart.Assign(Substring(str, bol, newline-bol));
1683
0
        mInWhitespace = true;
1684
0
        outputLineBreak = true;
1685
0
        mEmptyLines=0;
1686
0
        atFirstColumn = true;
1687
0
        bol = newline+1;
1688
0
        if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
1689
0
          // There was a CRLF in the input. This used to be illegal and
1690
0
          // stripped by the parser. Apparently not anymore. Let's skip
1691
0
          // over the LF.
1692
0
          bol++;
1693
0
        }
1694
0
      }
1695
0
1696
0
      mCurrentLine.Truncate();
1697
0
      if (mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
1698
0
        if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928
1699
0
            !IsQuotedLine(stringpart) &&
1700
0
            !stringpart.EqualsLiteral("-- ") &&
1701
0
            !stringpart.EqualsLiteral("- -- "))
1702
0
          stringpart.Trim(" ", false, true, true);
1703
0
        if (IsSpaceStuffable(stringpart.get()) && !IsQuotedLine(stringpart))
1704
0
          mCurrentLine.Append(char16_t(' '));
1705
0
      }
1706
0
      mCurrentLine.Append(stringpart);
1707
0
1708
0
      if (outputQuotes) {
1709
0
        // Note: this call messes with mAtFirstColumn
1710
0
        OutputQuotesAndIndent();
1711
0
      }
1712
0
1713
0
      Output(mCurrentLine);
1714
0
      if (outputLineBreak) {
1715
0
        Output(mLineBreak);
1716
0
      }
1717
0
      mAtFirstColumn = atFirstColumn;
1718
0
    }
1719
0
1720
0
    // Reset mCurrentLine.
1721
0
    mCurrentLine.Truncate();
1722
0
1723
#ifdef DEBUG_wrapping
1724
    printf("No wrapping: newline is %d, totLen is %d\n",
1725
           newline, totLen);
1726
#endif
1727
    return;
1728
0
  }
1729
0
1730
0
  // Intelligent handling of text
1731
0
  // If needed, strip out all "end of lines"
1732
0
  // and multiple whitespace between words
1733
0
  int32_t nextpos;
1734
0
  const char16_t * offsetIntoBuffer = nullptr;
1735
0
1736
0
  while (bol < totLen) {    // Loop over lines
1737
0
    // Find a place where we may have to do whitespace compression
1738
0
    nextpos = str.FindCharInSet(" \t\n\r", bol);
1739
#ifdef DEBUG_wrapping
1740
    nsAutoString remaining;
1741
    str.Right(remaining, totLen - bol);
1742
    foo = ToNewCString(remaining);
1743
    //    printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n",
1744
    //           bol, nextpos, totLen, foo);
1745
    free(foo);
1746
#endif
1747
1748
0
    if (nextpos == kNotFound) {
1749
0
      // The rest of the string
1750
0
      offsetIntoBuffer = str.get() + bol;
1751
0
      AddToLine(offsetIntoBuffer, totLen-bol);
1752
0
      bol=totLen;
1753
0
      mInWhitespace=false;
1754
0
    }
1755
0
    else {
1756
0
      // There's still whitespace left in the string
1757
0
      if (nextpos != 0 && (nextpos + 1) < totLen) {
1758
0
        offsetIntoBuffer = str.get() + nextpos;
1759
0
        // skip '\n' if it is between CJ chars
1760
0
        if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && IS_CJ_CHAR(offsetIntoBuffer[1])) {
1761
0
          offsetIntoBuffer = str.get() + bol;
1762
0
          AddToLine(offsetIntoBuffer, nextpos-bol);
1763
0
          bol = nextpos + 1;
1764
0
          continue;
1765
0
        }
1766
0
      }
1767
0
      // If we're already in whitespace and not preformatted, just skip it:
1768
0
      if (mInWhitespace && (nextpos == bol) && !mPreFormattedMail &&
1769
0
          !(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1770
0
        // Skip whitespace
1771
0
        bol++;
1772
0
        continue;
1773
0
      }
1774
0
1775
0
      if (nextpos == bol) {
1776
0
        // Note that we are in whitespace.
1777
0
        mInWhitespace = true;
1778
0
        offsetIntoBuffer = str.get() + nextpos;
1779
0
        AddToLine(offsetIntoBuffer, 1);
1780
0
        bol++;
1781
0
        continue;
1782
0
      }
1783
0
1784
0
      mInWhitespace = true;
1785
0
1786
0
      offsetIntoBuffer = str.get() + bol;
1787
0
      if (mPreFormattedMail || (mFlags & nsIDocumentEncoder::OutputPreformatted)) {
1788
0
        // Preserve the real whitespace character
1789
0
        nextpos++;
1790
0
        AddToLine(offsetIntoBuffer, nextpos-bol);
1791
0
        bol = nextpos;
1792
0
      }
1793
0
      else {
1794
0
        // Replace the whitespace with a space
1795
0
        AddToLine(offsetIntoBuffer, nextpos-bol);
1796
0
        AddToLine(kSpace.get(),1);
1797
0
        bol = nextpos + 1; // Let's eat the whitespace
1798
0
      }
1799
0
    }
1800
0
  } // Continue looping over the string
1801
0
}
1802
1803
1804
/**
1805
 * Gets the value of an attribute in a string. If the function returns
1806
 * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
1807
 */
1808
nsresult
1809
nsPlainTextSerializer::GetAttributeValue(nsAtom* aName,
1810
                                         nsString& aValueRet)
1811
0
{
1812
0
  if (mElement) {
1813
0
    if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) {
1814
0
      return NS_OK;
1815
0
    }
1816
0
  }
1817
0
1818
0
  return NS_ERROR_NOT_AVAILABLE;
1819
0
}
1820
1821
/**
1822
 * Returns true, if the element was inserted by Moz' TXT->HTML converter.
1823
 * In this case, we should ignore it.
1824
 */
1825
bool
1826
nsPlainTextSerializer::IsCurrentNodeConverted()
1827
0
{
1828
0
  nsAutoString value;
1829
0
  nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
1830
0
  return (NS_SUCCEEDED(rv) &&
1831
0
          (value.EqualsIgnoreCase("moz-txt", 7) ||
1832
0
           value.EqualsIgnoreCase("\"moz-txt", 8)));
1833
0
}
1834
1835
1836
// static
1837
nsAtom*
1838
nsPlainTextSerializer::GetIdForContent(nsIContent* aContent)
1839
0
{
1840
0
  if (!aContent->IsHTMLElement()) {
1841
0
    return nullptr;
1842
0
  }
1843
0
1844
0
  nsAtom* localName = aContent->NodeInfo()->NameAtom();
1845
0
  return localName->IsStatic() ? localName : nullptr;
1846
0
}
1847
1848
bool
1849
nsPlainTextSerializer::IsInPre()
1850
0
{
1851
0
  return !mPreformatStack.empty() && mPreformatStack.top();
1852
0
}
1853
1854
bool
1855
nsPlainTextSerializer::IsElementPreformatted(Element* aElement)
1856
0
{
1857
0
  RefPtr<ComputedStyle> computedStyle =
1858
0
    nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
1859
0
  if (computedStyle) {
1860
0
    const nsStyleText* textStyle = computedStyle->StyleText();
1861
0
    return textStyle->WhiteSpaceOrNewlineIsSignificant();
1862
0
  }
1863
0
  // Fall back to looking at the tag, in case there is no style information.
1864
0
  return GetIdForContent(aElement) == nsGkAtoms::pre;
1865
0
}
1866
1867
bool
1868
nsPlainTextSerializer::IsElementBlock(Element* aElement)
1869
0
{
1870
0
  RefPtr<ComputedStyle> computedStyle =
1871
0
    nsComputedDOMStyle::GetComputedStyleNoFlush(aElement, nullptr);
1872
0
  if (computedStyle) {
1873
0
    const nsStyleDisplay* displayStyle = computedStyle->StyleDisplay();
1874
0
    return displayStyle->IsBlockOutsideStyle();
1875
0
  }
1876
0
  // Fall back to looking at the tag, in case there is no style information.
1877
0
  return nsContentUtils::IsHTMLBlock(aElement);
1878
0
}
1879
1880
/**
1881
 * This method is required only to identify LI's inside OL.
1882
 * Returns TRUE if we are inside an OL tag and FALSE otherwise.
1883
 */
1884
bool
1885
nsPlainTextSerializer::IsInOL()
1886
0
{
1887
0
  int32_t i = mTagStackIndex;
1888
0
  while(--i >= 0) {
1889
0
    if (mTagStack[i] == nsGkAtoms::ol)
1890
0
      return true;
1891
0
    if (mTagStack[i] == nsGkAtoms::ul) {
1892
0
      // If a UL is reached first, LI belongs the UL nested in OL.
1893
0
      return false;
1894
0
    }
1895
0
  }
1896
0
  // We may reach here for orphan LI's.
1897
0
  return false;
1898
0
}
1899
1900
/*
1901
  @return 0 = no header, 1 = h1, ..., 6 = h6
1902
*/
1903
int32_t HeaderLevel(nsAtom* aTag)
1904
0
{
1905
0
  if (aTag == nsGkAtoms::h1) {
1906
0
    return 1;
1907
0
  }
1908
0
  if (aTag == nsGkAtoms::h2) {
1909
0
    return 2;
1910
0
  }
1911
0
  if (aTag == nsGkAtoms::h3) {
1912
0
    return 3;
1913
0
  }
1914
0
  if (aTag == nsGkAtoms::h4) {
1915
0
    return 4;
1916
0
  }
1917
0
  if (aTag == nsGkAtoms::h5) {
1918
0
    return 5;
1919
0
  }
1920
0
  if (aTag == nsGkAtoms::h6) {
1921
0
    return 6;
1922
0
  }
1923
0
  return 0;
1924
0
}
1925
1926
1927
/*
1928
 * This is an implementation of GetUnicharWidth() and
1929
 * GetUnicharStringWidth() as defined in
1930
 * "The Single UNIX Specification, Version 2, The Open Group, 1997"
1931
 * <http://www.UNIX-systems.org/online.html>
1932
 *
1933
 * Markus Kuhn -- 2000-02-08 -- public domain
1934
 *
1935
 * Minor alterations to fit Mozilla's data types by Daniel Bratell
1936
 */
1937
1938
/* These functions define the column width of an ISO 10646 character
1939
 * as follows:
1940
 *
1941
 *    - The null character (U+0000) has a column width of 0.
1942
 *
1943
 *    - Other C0/C1 control characters and DEL will lead to a return
1944
 *      value of -1.
1945
 *
1946
 *    - Non-spacing and enclosing combining characters (general
1947
 *      category code Mn or Me in the Unicode database) have a
1948
 *      column width of 0.
1949
 *
1950
 *    - Spacing characters in the East Asian Wide (W) or East Asian
1951
 *      FullWidth (F) category as defined in Unicode Technical
1952
 *      Report #11 have a column width of 2.
1953
 *
1954
 *    - All remaining characters (including all printable
1955
 *      ISO 8859-1 and WGL4 characters, Unicode control characters,
1956
 *      etc.) have a column width of 1.
1957
 *
1958
 * This implementation assumes that wchar_t characters are encoded
1959
 * in ISO 10646.
1960
 */
1961
1962
namespace {
1963
1964
struct interval
1965
{
1966
  uint16_t first;
1967
  uint16_t last;
1968
};
1969
1970
struct CombiningComparator
1971
{
1972
  const char16_t mUcs;
1973
0
  explicit CombiningComparator(char16_t aUcs) : mUcs(aUcs) {}
1974
0
  int operator()(const interval& combining) const {
1975
0
    if (mUcs > combining.last)
1976
0
      return 1;
1977
0
    if (mUcs < combining.first)
1978
0
      return -1;
1979
0
1980
0
    MOZ_ASSERT(combining.first <= mUcs);
1981
0
    MOZ_ASSERT(mUcs <= combining.last);
1982
0
    return 0;
1983
0
  }
1984
};
1985
1986
} // namespace
1987
1988
int32_t GetUnicharWidth(char16_t ucs)
1989
0
{
1990
0
  /* sorted list of non-overlapping intervals of non-spacing characters */
1991
0
  static const interval combining[] = {
1992
0
    { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
1993
0
    { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
1994
0
    { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
1995
0
    { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
1996
0
    { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
1997
0
    { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
1998
0
    { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
1999
0
    { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
2000
0
    { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
2001
0
    { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
2002
0
    { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
2003
0
    { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
2004
0
    { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
2005
0
    { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
2006
0
    { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
2007
0
    { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
2008
0
    { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
2009
0
    { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
2010
0
    { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
2011
0
    { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
2012
0
    { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
2013
0
    { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
2014
0
    { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
2015
0
    { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
2016
0
    { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
2017
0
    { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
2018
0
    { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
2019
0
    { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
2020
0
    { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
2021
0
    { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
2022
0
    { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
2023
0
  };
2024
0
2025
0
  /* test for 8-bit control characters */
2026
0
  if (ucs == 0)
2027
0
    return 0;
2028
0
  if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
2029
0
    return -1;
2030
0
2031
0
  /* first quick check for Latin-1 etc. characters */
2032
0
  if (ucs < combining[0].first)
2033
0
    return 1;
2034
0
2035
0
  /* binary search in table of non-spacing characters */
2036
0
  size_t idx;
2037
0
  if (BinarySearchIf(combining, 0, ArrayLength(combining),
2038
0
                     CombiningComparator(ucs), &idx)) {
2039
0
    return 0;
2040
0
  }
2041
0
2042
0
  /* if we arrive here, ucs is not a combining or C0/C1 control character */
2043
0
2044
0
  /* fast test for majority of non-wide scripts */
2045
0
  if (ucs < 0x1100)
2046
0
    return 1;
2047
0
2048
0
  return 1 +
2049
0
    ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
2050
0
     (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
2051
0
      ucs != 0x303f) ||                  /* CJK ... Yi */
2052
0
     (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
2053
0
     (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
2054
0
     (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
2055
0
     (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
2056
0
     (ucs >= 0xffe0 && ucs <= 0xffe6));
2057
0
}
2058
2059
2060
int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n)
2061
0
{
2062
0
  int32_t w, width = 0;
2063
0
2064
0
  for (;*pwcs && n-- > 0; pwcs++)
2065
0
    if ((w = GetUnicharWidth(*pwcs)) < 0)
2066
0
      ++width; // Taking 1 as the width of non-printable character, for bug# 94475.
2067
0
    else
2068
0
      width += w;
2069
0
2070
0
  return width;
2071
0
}