Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/parser/htmlparser/nsScanner.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=2 sw=2 et tw=78: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
//#define __INCREMENTAL 1
8
9
#include "nsScanner.h"
10
11
#include "mozilla/Attributes.h"
12
#include "mozilla/DebugOnly.h"
13
#include "mozilla/Encoding.h"
14
#include "nsDebug.h"
15
#include "nsReadableUtils.h"
16
#include "nsIInputStream.h"
17
#include "nsIFile.h"
18
#include "nsUTF8Utils.h" // for LossyConvertEncoding
19
#include "nsCRT.h"
20
#include "nsParser.h"
21
#include "nsCharsetSource.h"
22
23
nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars) :
24
  mChars(aTerminateChars), mFilter(char16_t(~0)) // All bits set
25
0
{
26
0
  // Build filter that will be used to filter out characters with
27
0
  // bits that none of the terminal chars have. This works very well
28
0
  // because terminal chars often have only the last 4-6 bits set and
29
0
  // normal ascii letters have bit 7 set. Other letters have even higher
30
0
  // bits set.
31
0
  
32
0
  // Calculate filter
33
0
  const char16_t *current = aTerminateChars;
34
0
  char16_t terminalChar = *current;
35
0
  while (terminalChar) {
36
0
    mFilter &= ~terminalChar;
37
0
    ++current;
38
0
    terminalChar = *current;
39
0
  }
40
0
}
41
42
/**
43
 *  Use this constructor if you want i/o to be based on 
44
 *  a single string you hand in during construction.
45
 *  This short cut was added for Javascript.
46
 *
47
 *  @update  gess 5/12/98
48
 *  @param   aMode represents the parser mode (nav, other)
49
 *  @return  
50
 */
51
nsScanner::nsScanner(const nsAString& anHTMLString)
52
0
{
53
0
  MOZ_COUNT_CTOR(nsScanner);
54
0
55
0
  mSlidingBuffer = nullptr;
56
0
  if (AppendToBuffer(anHTMLString)) {
57
0
    mSlidingBuffer->BeginReading(mCurrentPosition);
58
0
  } else {
59
0
    /* XXX see hack below, re: bug 182067 */
60
0
    memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
61
0
    mEndPosition = mCurrentPosition;
62
0
  }
63
0
  mMarkPosition = mCurrentPosition;
64
0
  mIncremental = false;
65
0
  mUnicodeDecoder = nullptr;
66
0
  mCharsetSource = kCharsetUninitialized;
67
0
}
68
69
/**
70
 *  Use this constructor if you want i/o to be based on strings 
71
 *  the scanner receives. If you pass a null filename, you
72
 *  can still provide data to the scanner via append.
73
 */
74
nsScanner::nsScanner(nsString& aFilename, bool aCreateStream)
75
  : mFilename(aFilename)
76
0
{
77
0
  MOZ_COUNT_CTOR(nsScanner);
78
0
  NS_ASSERTION(!aCreateStream, "This is always true.");
79
0
80
0
  mSlidingBuffer = nullptr;
81
0
82
0
  // XXX This is a big hack.  We need to initialize the iterators to something.
83
0
  // What matters is that mCurrentPosition == mEndPosition, so that our methods
84
0
  // believe that we are at EOF (see bug 182067).  We null out mCurrentPosition
85
0
  // so that we have some hope of catching null pointer dereferences associated
86
0
  // with this hack. --darin
87
0
  memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
88
0
  mMarkPosition = mCurrentPosition;
89
0
  mEndPosition = mCurrentPosition;
90
0
91
0
  mIncremental = true;
92
0
93
0
  mUnicodeDecoder = nullptr;
94
0
  mCharsetSource = kCharsetUninitialized;
95
0
  // XML defaults to UTF-8 and about:blank is UTF-8, too.
96
0
  SetDocumentCharset(UTF_8_ENCODING, kCharsetFromDocTypeDefault);
97
0
}
98
99
nsresult nsScanner::SetDocumentCharset(NotNull<const Encoding*> aEncoding,
100
                                       int32_t aSource)
101
0
{
102
0
  if (aSource < mCharsetSource) // priority is lower than the current one
103
0
    return NS_OK;
104
0
105
0
  mCharsetSource = aSource;
106
0
  nsCString charsetName;
107
0
  aEncoding->Name(charsetName);
108
0
  if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) {
109
0
    return NS_OK; // no difference, don't change it
110
0
  }
111
0
112
0
  // different, need to change it
113
0
114
0
  mCharset.Assign(charsetName);
115
0
116
0
  mUnicodeDecoder = aEncoding->NewDecoderWithBOMRemoval();
117
0
118
0
  return NS_OK;
119
0
}
120
121
122
/**
123
 *  default destructor
124
 *  
125
 *  @update  gess 3/25/98
126
 *  @param   
127
 *  @return  
128
 */
129
0
nsScanner::~nsScanner() {
130
0
131
0
  delete mSlidingBuffer;
132
0
133
0
  MOZ_COUNT_DTOR(nsScanner);
134
0
}
135
136
/**
137
 *  Resets current offset position of input stream to marked position. 
138
 *  This allows us to back up to this point if the need should arise, 
139
 *  such as when tokenization gets interrupted.
140
 *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
141
 *
142
 *  @update  gess 5/12/98
143
 *  @param   
144
 *  @return  
145
 */
146
0
void nsScanner::RewindToMark(void){
147
0
  if (mSlidingBuffer) {
148
0
    mCurrentPosition = mMarkPosition;
149
0
  }
150
0
}
151
152
153
/**
154
 *  Records current offset position in input stream. This allows us
155
 *  to back up to this point if the need should arise, such as when
156
 *  tokenization gets interrupted.
157
 *
158
 *  @update  gess 7/29/98
159
 *  @param   
160
 *  @return  
161
 */
162
0
int32_t nsScanner::Mark() {
163
0
  int32_t distance = 0;
164
0
  if (mSlidingBuffer) {
165
0
    nsScannerIterator oldStart;
166
0
    mSlidingBuffer->BeginReading(oldStart);
167
0
168
0
    distance = Distance(oldStart, mCurrentPosition);
169
0
170
0
    mSlidingBuffer->DiscardPrefix(mCurrentPosition);
171
0
    mSlidingBuffer->BeginReading(mCurrentPosition);
172
0
    mMarkPosition = mCurrentPosition;
173
0
  }
174
0
175
0
  return distance;
176
0
}
177
178
/** 
179
 * Insert data to our underlying input buffer as
180
 * if it were read from an input stream.
181
 *
182
 * @update  harishd 01/12/99
183
 * @return  error code 
184
 */
185
0
bool nsScanner::UngetReadable(const nsAString& aBuffer) {
186
0
  if (!mSlidingBuffer) {
187
0
    return false;
188
0
  }
189
0
190
0
  mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition);
191
0
  mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators
192
0
  mSlidingBuffer->EndReading(mEndPosition);
193
0
 
194
0
  return true;
195
0
}
196
197
/** 
198
 * Append data to our underlying input buffer as
199
 * if it were read from an input stream.
200
 *
201
 * @update  gess4/3/98
202
 * @return  error code 
203
 */
204
0
nsresult nsScanner::Append(const nsAString& aBuffer) {
205
0
  if (!AppendToBuffer(aBuffer))
206
0
    return NS_ERROR_OUT_OF_MEMORY;
207
0
  return NS_OK;
208
0
}
209
210
/**
211
 *  
212
 *  
213
 *  @update  gess 5/21/98
214
 *  @param   
215
 *  @return  
216
 */
217
nsresult nsScanner::Append(const char* aBuffer, uint32_t aLen)
218
0
{
219
0
  nsresult res = NS_OK;
220
0
  if (mUnicodeDecoder) {
221
0
    CheckedInt<size_t> needed = mUnicodeDecoder->MaxUTF16BufferLength(aLen);
222
0
    if (!needed.isValid()) {
223
0
      return NS_ERROR_OUT_OF_MEMORY;
224
0
    }
225
0
    CheckedInt<uint32_t> allocLen(1); // null terminator due to legacy sadness
226
0
    allocLen += needed.value();
227
0
    if (!allocLen.isValid()) {
228
0
      return NS_ERROR_OUT_OF_MEMORY;
229
0
    }
230
0
    nsScannerString::Buffer* buffer =
231
0
      nsScannerString::AllocBuffer(allocLen.value());
232
0
    NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);
233
0
    char16_t *unichars = buffer->DataStart();
234
0
235
0
    uint32_t result;
236
0
    size_t read;
237
0
    size_t written;
238
0
    Tie(result, read, written) =
239
0
      mUnicodeDecoder->DecodeToUTF16WithoutReplacement(
240
0
        AsBytes(MakeSpan(aBuffer, aLen)),
241
0
        MakeSpan(unichars, needed.value()),
242
0
        false); // Retain bug about failure to handle EOF
243
0
    MOZ_ASSERT(result != kOutputFull);
244
0
    MOZ_ASSERT(read <= aLen);
245
0
    MOZ_ASSERT(written <= needed.value());
246
0
    if (result != kInputEmpty) {
247
0
      // Since about:blank is empty, this line runs only for XML. Use a
248
0
      // character that's illegal in XML instead of U+FFFD in order to make
249
0
      // expat flag the error. There is no need to loop and convert more, since
250
0
      // expat will stop here anyway.
251
0
      unichars[written++] = 0xFFFF;
252
0
    }
253
0
    buffer->SetDataLength(written);
254
0
    // Don't propagate return code of unicode decoder
255
0
    // since it doesn't reflect on our success or failure
256
0
    // - Ref. bug 87110
257
0
    res = NS_OK; 
258
0
    if (!AppendToBuffer(buffer))
259
0
      res = NS_ERROR_OUT_OF_MEMORY;
260
0
  }
261
0
  else {
262
0
    NS_WARNING("No decoder found.");
263
0
    res = NS_ERROR_FAILURE;
264
0
  }
265
0
266
0
  return res;
267
0
}
268
269
/**
270
 *  retrieve next char from scanners internal input stream
271
 *  
272
 *  @update  gess 3/25/98
273
 *  @param   
274
 *  @return  error code reflecting read status
275
 */
276
0
nsresult nsScanner::GetChar(char16_t& aChar) {
277
0
  if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
278
0
    aChar = 0;
279
0
    return NS_ERROR_HTMLPARSER_EOF;
280
0
  }
281
0
282
0
  aChar = *mCurrentPosition++;
283
0
284
0
  return NS_OK;
285
0
}
286
287
void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
288
0
{
289
0
  aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
290
0
}
291
292
void nsScanner::CurrentPosition(nsScannerIterator& aPosition)
293
0
{
294
0
  aPosition = mCurrentPosition;
295
0
}
296
297
void nsScanner::EndReading(nsScannerIterator& aPosition)
298
0
{
299
0
  aPosition = mEndPosition;
300
0
}
301
 
302
void nsScanner::SetPosition(nsScannerIterator& aPosition, bool aTerminate)
303
0
{
304
0
  if (mSlidingBuffer) {
305
0
    mCurrentPosition = aPosition;
306
0
    if (aTerminate && (mCurrentPosition == mEndPosition)) {
307
0
      mMarkPosition = mCurrentPosition;
308
0
      mSlidingBuffer->DiscardPrefix(mCurrentPosition);
309
0
    }
310
0
  }
311
0
}
312
313
bool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf)
314
0
{
315
0
  if (!mSlidingBuffer) {
316
0
    mSlidingBuffer = new nsScannerString(aBuf);
317
0
    if (!mSlidingBuffer)
318
0
      return false;
319
0
    mSlidingBuffer->BeginReading(mCurrentPosition);
320
0
    mMarkPosition = mCurrentPosition;
321
0
    mSlidingBuffer->EndReading(mEndPosition);
322
0
  }
323
0
  else {
324
0
    mSlidingBuffer->AppendBuffer(aBuf);
325
0
    if (mCurrentPosition == mEndPosition) {
326
0
      mSlidingBuffer->BeginReading(mCurrentPosition);
327
0
    }
328
0
    mSlidingBuffer->EndReading(mEndPosition);
329
0
  }
330
0
331
0
  return true;
332
0
}
333
334
/**
335
 *  call this to copy bytes out of the scanner that have not yet been consumed
336
 *  by the tokenization process.
337
 *  
338
 *  @update  gess 5/12/98
339
 *  @param   aCopyBuffer is where the scanner buffer will be copied to
340
 *  @return  true if OK or false on OOM
341
 */
342
0
bool nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
343
0
  if (!mSlidingBuffer) {
344
0
    aCopyBuffer.Truncate();
345
0
    return true;
346
0
  }
347
0
348
0
  nsScannerIterator start, end;
349
0
  start = mCurrentPosition;
350
0
  end = mEndPosition;
351
0
352
0
  return CopyUnicodeTo(start, end, aCopyBuffer);
353
0
}
354
355
/**
356
 *  Retrieve the name of the file that the scanner is reading from.
357
 *  In some cases, it's just a given name, because the scanner isn't
358
 *  really reading from a file.
359
 *  
360
 *  @update  gess 5/12/98
361
 *  @return  
362
 */
363
0
nsString& nsScanner::GetFilename(void) {
364
0
  return mFilename;
365
0
}
366
367
/**
368
 *  Conduct self test. Actually, selftesting for this class
369
 *  occurs in the parser selftest.
370
 *  
371
 *  @update  gess 3/25/98
372
 *  @param   
373
 *  @return  
374
 */
375
376
0
void nsScanner::SelfTest(void) {
377
#ifdef _DEBUG
378
#endif
379
}