Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/dom/base/nsHTMLContentSerializer.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
/*
8
 * nsIContentSerializer implementation that can be used with an
9
 * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML
10
 * string that could be parsed into more or less the original DOM.
11
 */
12
13
#include "nsHTMLContentSerializer.h"
14
15
#include "nsIContent.h"
16
#include "nsIDocument.h"
17
#include "nsElementTable.h"
18
#include "nsNameSpaceManager.h"
19
#include "nsString.h"
20
#include "nsUnicharUtils.h"
21
#include "nsIServiceManager.h"
22
#include "nsIDocumentEncoder.h"
23
#include "nsGkAtoms.h"
24
#include "nsIURI.h"
25
#include "nsNetUtil.h"
26
#include "nsEscape.h"
27
#include "nsCRT.h"
28
#include "nsContentUtils.h"
29
#include "nsIScriptElement.h"
30
#include "nsAttrName.h"
31
#include "nsIDocShell.h"
32
#include "nsIEditor.h"
33
#include "nsIHTMLEditor.h"
34
#include "mozilla/dom/Element.h"
35
#include "nsParserConstants.h"
36
37
using namespace mozilla::dom;
38
39
nsresult
40
NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer)
41
0
{
42
0
  RefPtr<nsHTMLContentSerializer> it = new nsHTMLContentSerializer();
43
0
  it.forget(aSerializer);
44
0
  return NS_OK;
45
0
}
46
47
nsHTMLContentSerializer::nsHTMLContentSerializer()
48
0
{
49
0
    mIsHTMLSerializer = true;
50
0
}
51
52
nsHTMLContentSerializer::~nsHTMLContentSerializer()
53
0
{
54
0
}
55
56
57
NS_IMETHODIMP
58
nsHTMLContentSerializer::AppendDocumentStart(nsIDocument *aDocument,
59
                                             nsAString& aStr)
60
0
{
61
0
  return NS_OK;
62
0
}
63
64
bool
65
nsHTMLContentSerializer::SerializeHTMLAttributes(Element* aElement,
66
                                                 Element* aOriginalElement,
67
                                                 nsAString& aTagPrefix,
68
                                                 const nsAString& aTagNamespaceURI,
69
                                                 nsAtom* aTagName,
70
                                                 int32_t aNamespace,
71
                                                 nsAString& aStr)
72
0
{
73
0
  MaybeSerializeIsValue(aElement, aStr);
74
0
75
0
  int32_t count = aElement->GetAttrCount();
76
0
  if (!count)
77
0
    return true;
78
0
79
0
  nsresult rv;
80
0
  nsAutoString valueStr;
81
0
  NS_NAMED_LITERAL_STRING(_mozStr, "_moz");
82
0
83
0
  for (int32_t index = 0; index < count; index++) {
84
0
    const nsAttrName* name = aElement->GetAttrNameAt(index);
85
0
    int32_t namespaceID = name->NamespaceID();
86
0
    nsAtom* attrName = name->LocalName();
87
0
88
0
    // Filter out any attribute starting with [-|_]moz
89
0
    nsDependentAtomString attrNameStr(attrName);
90
0
    if (StringBeginsWith(attrNameStr, NS_LITERAL_STRING("_moz")) ||
91
0
        StringBeginsWith(attrNameStr, NS_LITERAL_STRING("-moz"))) {
92
0
      continue;
93
0
    }
94
0
    aElement->GetAttr(namespaceID, attrName, valueStr);
95
0
96
0
    //
97
0
    // Filter out special case of <br type="_moz"> or <br _moz*>,
98
0
    // used by the editor.  Bug 16988.  Yuck.
99
0
    //
100
0
    if (aTagName == nsGkAtoms::br && aNamespace == kNameSpaceID_XHTML &&
101
0
        attrName == nsGkAtoms::type && namespaceID == kNameSpaceID_None &&
102
0
        StringBeginsWith(valueStr, _mozStr)) {
103
0
      continue;
104
0
    }
105
0
106
0
    if (mIsCopying && mIsFirstChildOfOL &&
107
0
        aTagName == nsGkAtoms::li && aNamespace == kNameSpaceID_XHTML &&
108
0
        attrName == nsGkAtoms::value && namespaceID == kNameSpaceID_None){
109
0
      // This is handled separately in SerializeLIValueAttribute()
110
0
      continue;
111
0
    }
112
0
    bool isJS = IsJavaScript(aElement, attrName, namespaceID, valueStr);
113
0
114
0
    if (((attrName == nsGkAtoms::href &&
115
0
          (namespaceID == kNameSpaceID_None ||
116
0
           namespaceID == kNameSpaceID_XLink)) ||
117
0
         (attrName == nsGkAtoms::src && namespaceID == kNameSpaceID_None))) {
118
0
      // Make all links absolute when converting only the selection:
119
0
      if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
120
0
        // Would be nice to handle OBJECT tags, but that gets more complicated
121
0
        // since we have to search the tag list for CODEBASE as well. For now,
122
0
        // just leave them relative.
123
0
        nsCOMPtr<nsIURI> uri = aElement->GetBaseURI();
124
0
        if (uri) {
125
0
          nsAutoString absURI;
126
0
          rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
127
0
          if (NS_SUCCEEDED(rv)) {
128
0
            valueStr = absURI;
129
0
          }
130
0
        }
131
0
      }
132
0
    }
133
0
134
0
    if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
135
0
        aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content
136
0
        && namespaceID == kNameSpaceID_None) {
137
0
      // If we're serializing a <meta http-equiv="content-type">,
138
0
      // use the proper value, rather than what's in the document.
139
0
      nsAutoString header;
140
0
      aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
141
0
      if (header.LowerCaseEqualsLiteral("content-type")) {
142
0
        valueStr = NS_LITERAL_STRING("text/html; charset=") +
143
0
          NS_ConvertASCIItoUTF16(mCharset);
144
0
      }
145
0
    }
146
0
147
0
    nsDependentAtomString nameStr(attrName);
148
0
    nsAutoString prefix;
149
0
    if (namespaceID == kNameSpaceID_XML) {
150
0
      prefix.AssignLiteral(u"xml");
151
0
    } else if (namespaceID == kNameSpaceID_XLink) {
152
0
      prefix.AssignLiteral(u"xlink");
153
0
    }
154
0
155
0
    // Expand shorthand attribute.
156
0
    if (aNamespace == kNameSpaceID_XHTML &&
157
0
        namespaceID == kNameSpaceID_None &&
158
0
        IsShorthandAttr(attrName, aTagName) &&
159
0
        valueStr.IsEmpty()) {
160
0
      valueStr = nameStr;
161
0
    }
162
0
    NS_ENSURE_TRUE(SerializeAttr(prefix, nameStr, valueStr,
163
0
                                 aStr, !isJS), false);
164
0
  }
165
0
166
0
  return true;
167
0
}
168
169
NS_IMETHODIMP
170
nsHTMLContentSerializer::AppendElementStart(Element* aElement,
171
                                            Element* aOriginalElement,
172
                                            nsAString& aStr)
173
0
{
174
0
  NS_ENSURE_ARG(aElement);
175
0
176
0
  bool forceFormat = false;
177
0
  nsresult rv = NS_OK;
178
0
  if (!CheckElementStart(aElement, forceFormat, aStr, rv)) {
179
0
    // When we go to AppendElementEnd for this element, we're going to
180
0
    // MaybeLeaveFromPreContent().  So make sure to MaybeEnterInPreContent()
181
0
    // now, so our PreLevel() doesn't get confused.
182
0
    MaybeEnterInPreContent(aElement);
183
0
    return rv;
184
0
  }
185
0
186
0
  NS_ENSURE_SUCCESS(rv, rv);
187
0
188
0
  nsAtom *name = aElement->NodeInfo()->NameAtom();
189
0
  int32_t ns = aElement->GetNameSpaceID();
190
0
191
0
  bool lineBreakBeforeOpen = LineBreakBeforeOpen(ns, name);
192
0
193
0
  if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
194
0
    if (mColPos && lineBreakBeforeOpen) {
195
0
      NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY);
196
0
    }
197
0
    else {
198
0
      NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(aStr), NS_ERROR_OUT_OF_MEMORY);
199
0
    }
200
0
    if (!mColPos) {
201
0
      NS_ENSURE_TRUE(AppendIndentation(aStr), NS_ERROR_OUT_OF_MEMORY);
202
0
    }
203
0
    else if (mAddSpace) {
204
0
      bool result = AppendToString(char16_t(' '), aStr);
205
0
      mAddSpace = false;
206
0
      NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
207
0
    }
208
0
  }
209
0
  else if (mAddSpace) {
210
0
    bool result = AppendToString(char16_t(' '), aStr);
211
0
    mAddSpace = false;
212
0
    NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
213
0
  }
214
0
  else {
215
0
    NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(aStr), NS_ERROR_OUT_OF_MEMORY);
216
0
  }
217
0
  // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode wasn't
218
0
  // called
219
0
  mAddNewlineForRootNode = false;
220
0
221
0
  NS_ENSURE_TRUE(AppendToString(kLessThan, aStr), NS_ERROR_OUT_OF_MEMORY);
222
0
223
0
  NS_ENSURE_TRUE(AppendToString(nsDependentAtomString(name), aStr), NS_ERROR_OUT_OF_MEMORY);
224
0
225
0
  MaybeEnterInPreContent(aElement);
226
0
227
0
  // for block elements, we increase the indentation
228
0
  if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel())
229
0
    NS_ENSURE_TRUE(IncrIndentation(name), NS_ERROR_OUT_OF_MEMORY);
230
0
231
0
  // Need to keep track of OL and LI elements in order to get ordinal number
232
0
  // for the LI.
233
0
  if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML){
234
0
    // We are copying and current node is an OL;
235
0
    // Store its start attribute value in olState->startVal.
236
0
    nsAutoString start;
237
0
    int32_t startAttrVal = 0;
238
0
239
0
    aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start);
240
0
    if (!start.IsEmpty()){
241
0
      nsresult rv = NS_OK;
242
0
      startAttrVal = start.ToInteger(&rv);
243
0
      //If OL has "start" attribute, first LI element has to start with that value
244
0
      //Therefore subtracting 1 as all the LI elements are incrementing it before using it;
245
0
      //In failure of ToInteger(), default StartAttrValue to 0.
246
0
      if (NS_SUCCEEDED(rv))
247
0
        startAttrVal--;
248
0
      else
249
0
        startAttrVal = 0;
250
0
    }
251
0
    mOLStateStack.AppendElement(olState(startAttrVal, true));
252
0
  }
253
0
254
0
  if (mIsCopying && name == nsGkAtoms::li && ns == kNameSpaceID_XHTML) {
255
0
    mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
256
0
    if (mIsFirstChildOfOL){
257
0
      // If OL is parent of this LI, serialize attributes in different manner.
258
0
      NS_ENSURE_TRUE(SerializeLIValueAttribute(aElement, aStr), NS_ERROR_OUT_OF_MEMORY);
259
0
    }
260
0
  }
261
0
262
0
  // Even LI passed above have to go through this
263
0
  // for serializing attributes other than "value".
264
0
  nsAutoString dummyPrefix;
265
0
  NS_ENSURE_TRUE(SerializeHTMLAttributes(aElement,
266
0
                                         aOriginalElement,
267
0
                                         dummyPrefix,
268
0
                                         EmptyString(),
269
0
                                         name,
270
0
                                         ns,
271
0
                                         aStr), NS_ERROR_OUT_OF_MEMORY);
272
0
273
0
  NS_ENSURE_TRUE(AppendToString(kGreaterThan, aStr), NS_ERROR_OUT_OF_MEMORY);
274
0
275
0
  if (ns == kNameSpaceID_XHTML &&
276
0
      (name == nsGkAtoms::script ||
277
0
       name == nsGkAtoms::style ||
278
0
       name == nsGkAtoms::noscript ||
279
0
       name == nsGkAtoms::noframes)) {
280
0
    ++mDisableEntityEncoding;
281
0
  }
282
0
283
0
  if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel() &&
284
0
    LineBreakAfterOpen(ns, name)) {
285
0
    NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY);
286
0
  }
287
0
288
0
  NS_ENSURE_TRUE(AfterElementStart(aElement, aOriginalElement, aStr), NS_ERROR_OUT_OF_MEMORY);
289
0
290
0
  return NS_OK;
291
0
}
292
293
NS_IMETHODIMP
294
nsHTMLContentSerializer::AppendElementEnd(Element* aElement, nsAString& aStr)
295
0
{
296
0
  NS_ENSURE_ARG(aElement);
297
0
298
0
  nsAtom *name = aElement->NodeInfo()->NameAtom();
299
0
  int32_t ns = aElement->GetNameSpaceID();
300
0
301
0
  if (ns == kNameSpaceID_XHTML &&
302
0
      (name == nsGkAtoms::script ||
303
0
       name == nsGkAtoms::style ||
304
0
       name == nsGkAtoms::noscript ||
305
0
       name == nsGkAtoms::noframes)) {
306
0
    --mDisableEntityEncoding;
307
0
  }
308
0
309
0
  bool forceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
310
0
                     aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty);
311
0
312
0
  if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
313
0
    DecrIndentation(name);
314
0
  }
315
0
316
0
  if (name == nsGkAtoms::script) {
317
0
    nsCOMPtr<nsIScriptElement> script = do_QueryInterface(aElement);
318
0
319
0
    if (ShouldMaintainPreLevel() && script && script->IsMalformed()) {
320
0
      // We're looking at a malformed script tag. This means that the end tag
321
0
      // was missing in the source. Imitate that here by not serializing the end
322
0
      // tag.
323
0
      --PreLevel();
324
0
      return NS_OK;
325
0
    }
326
0
  }
327
0
  else if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) {
328
0
    NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
329
0
    /* Though at this point we must always have an state to be deleted as all
330
0
    the OL opening tags are supposed to push an olState object to the stack*/
331
0
    if (!mOLStateStack.IsEmpty()) {
332
0
      mOLStateStack.RemoveLastElement();
333
0
    }
334
0
  }
335
0
336
0
  if (ns == kNameSpaceID_XHTML) {
337
0
    bool isContainer =
338
0
      nsHTMLElement::IsContainer(nsHTMLTags::CaseSensitiveAtomTagToId(name));
339
0
    if (!isContainer) {
340
0
      // Keep this in sync with the cleanup at the end of this method.
341
0
      MOZ_ASSERT(name != nsGkAtoms::body);
342
0
      MaybeLeaveFromPreContent(aElement);
343
0
      return NS_OK;
344
0
    }
345
0
  }
346
0
347
0
  if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
348
0
349
0
    bool lineBreakBeforeClose = LineBreakBeforeClose(ns, name);
350
0
351
0
    if (mColPos && lineBreakBeforeClose) {
352
0
      NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY);
353
0
    }
354
0
    if (!mColPos) {
355
0
      NS_ENSURE_TRUE(AppendIndentation(aStr), NS_ERROR_OUT_OF_MEMORY);
356
0
    }
357
0
    else if (mAddSpace) {
358
0
      bool result = AppendToString(char16_t(' '), aStr);
359
0
      mAddSpace = false;
360
0
      NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
361
0
    }
362
0
  }
363
0
  else if (mAddSpace) {
364
0
    bool result = AppendToString(char16_t(' '), aStr);
365
0
    mAddSpace = false;
366
0
    NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
367
0
  }
368
0
369
0
  NS_ENSURE_TRUE(AppendToString(kEndTag, aStr), NS_ERROR_OUT_OF_MEMORY);
370
0
  NS_ENSURE_TRUE(AppendToString(nsDependentAtomString(name), aStr), NS_ERROR_OUT_OF_MEMORY);
371
0
  NS_ENSURE_TRUE(AppendToString(kGreaterThan, aStr), NS_ERROR_OUT_OF_MEMORY);
372
0
373
0
  // Keep this cleanup in sync with the IsContainer() early return above.
374
0
  MaybeLeaveFromPreContent(aElement);
375
0
376
0
  if ((mDoFormat || forceFormat)&& !mDoRaw  && !PreLevel()
377
0
      && LineBreakAfterClose(ns, name)) {
378
0
    NS_ENSURE_TRUE(AppendNewLineToString(aStr), NS_ERROR_OUT_OF_MEMORY);
379
0
  }
380
0
  else {
381
0
    MaybeFlagNewlineForRootNode(aElement);
382
0
  }
383
0
384
0
  if (name == nsGkAtoms::body && ns == kNameSpaceID_XHTML) {
385
0
    --mInBody;
386
0
  }
387
0
388
0
  return NS_OK;
389
0
}
390
391
static const uint16_t kValNBSP = 160;
392
393
#define _ 0
394
395
// This table indexes into kEntityStrings[].
396
static const uint8_t kEntities[] = {
397
  _, _, _, _, _, _, _, _, _, _,
398
  _, _, _, _, _, _, _, _, _, _,
399
  _, _, _, _, _, _, _, _, _, _,
400
  _, _, _, _, _, _, _, _, 2, _,
401
  _, _, _, _, _, _, _, _, _, _,
402
  _, _, _, _, _, _, _, _, _, _,
403
  3, _, 4, _, _, _, _, _, _, _,
404
  _, _, _, _, _, _, _, _, _, _,
405
  _, _, _, _, _, _, _, _, _, _,
406
  _, _, _, _, _, _, _, _, _, _,
407
  _, _, _, _, _, _, _, _, _, _,
408
  _, _, _, _, _, _, _, _, _, _,
409
  _, _, _, _, _, _, _, _, _, _,
410
  _, _, _, _, _, _, _, _, _, _,
411
  _, _, _, _, _, _, _, _, _, _,
412
  _, _, _, _, _, _, _, _, _, _,
413
  5
414
};
415
416
// This table indexes into kEntityStrings[].
417
static const uint8_t kAttrEntities[] = {
418
  _, _, _, _, _, _, _, _, _, _,
419
  _, _, _, _, _, _, _, _, _, _,
420
  _, _, _, _, _, _, _, _, _, _,
421
  _, _, _, _, 1, _, _, _, 2, _,
422
  _, _, _, _, _, _, _, _, _, _,
423
  _, _, _, _, _, _, _, _, _, _,
424
  3, _, 4, _, _, _, _, _, _, _,
425
  _, _, _, _, _, _, _, _, _, _,
426
  _, _, _, _, _, _, _, _, _, _,
427
  _, _, _, _, _, _, _, _, _, _,
428
  _, _, _, _, _, _, _, _, _, _,
429
  _, _, _, _, _, _, _, _, _, _,
430
  _, _, _, _, _, _, _, _, _, _,
431
  _, _, _, _, _, _, _, _, _, _,
432
  _, _, _, _, _, _, _, _, _, _,
433
  _, _, _, _, _, _, _, _, _, _,
434
  5
435
};
436
437
#undef _
438
439
static const char* const kEntityStrings[] = {
440
  /* 0 */ nullptr,
441
  /* 1 */ "&quot;",
442
  /* 2 */ "&amp;",
443
  /* 3 */ "&lt;",
444
  /* 4 */ "&gt;",
445
  /* 5 */ "&nbsp;"
446
};
447
448
uint32_t FindNextBasicEntity(const nsAString& aStr,
449
                             const uint32_t aLen,
450
                             uint32_t aIndex,
451
                             const uint8_t* aEntityTable,
452
                             const char** aEntity)
453
0
{
454
0
  for (; aIndex < aLen; ++aIndex) {
455
0
    // for each character in this chunk, check if it
456
0
    // needs to be replaced
457
0
    char16_t val = aStr[aIndex];
458
0
    if (val <= kValNBSP && aEntityTable[val]) {
459
0
      *aEntity = kEntityStrings[aEntityTable[val]];
460
0
      return aIndex;
461
0
    }
462
0
  }
463
0
  return aIndex;
464
0
}
465
466
bool
467
nsHTMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr,
468
                                                     nsAString& aOutputStr)
469
0
{
470
0
  if (mBodyOnly && !mInBody) {
471
0
    return true;
472
0
  }
473
0
474
0
  if (mDisableEntityEncoding) {
475
0
    return aOutputStr.Append(aStr, mozilla::fallible);
476
0
  }
477
0
478
0
  if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities)) {
479
0
    const uint8_t* entityTable = mInAttribute ? kAttrEntities : kEntities;
480
0
    uint32_t start = 0;
481
0
    const uint32_t len = aStr.Length();
482
0
    for (uint32_t i = 0; i < len; ++i) {
483
0
      const char* entity = nullptr;
484
0
      i = FindNextBasicEntity(aStr, len, i, entityTable, &entity);
485
0
      uint32_t normalTextLen = i - start;
486
0
      if (normalTextLen) {
487
0
        NS_ENSURE_TRUE(aOutputStr.Append(Substring(aStr, start, normalTextLen),
488
0
                                         mozilla::fallible), false);
489
0
      }
490
0
      if (entity) {
491
0
        NS_ENSURE_TRUE(aOutputStr.AppendASCII(entity, mozilla::fallible), false);
492
0
        start = i + 1;
493
0
      }
494
0
    }
495
0
    return true;
496
0
  } else {
497
0
    NS_ENSURE_TRUE(nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr), false);
498
0
  }
499
0
500
0
  return true;
501
0
}