Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/parser/htmlparser/nsScannerString.h
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim:set ts=2 sw=2 sts=2 et cindent: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#ifndef nsScannerString_h___
8
#define nsScannerString_h___
9
10
#include "nsString.h"
11
#include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
12
#include "mozilla/LinkedList.h"
13
#include <algorithm>
14
15
16
  /**
17
   * NOTE: nsScannerString (and the other classes defined in this file) are
18
   * not related to nsAString or any of the other xpcom/string classes.
19
   *
20
   * nsScannerString is based on the nsSlidingString implementation that used
21
   * to live in xpcom/string.  Now that nsAString is limited to representing
22
   * only single fragment strings, nsSlidingString can no longer be used.
23
   *
24
   * An advantage to this design is that it does not employ any virtual
25
   * functions.
26
   *
27
   * This file uses SCC-style indenting in deference to the nsSlidingString
28
   * code from which this code is derived ;-)
29
   */
30
31
class nsScannerIterator;
32
class nsScannerSubstring;
33
class nsScannerString;
34
35
36
  /**
37
   * nsScannerBufferList
38
   *
39
   * This class maintains a list of heap-allocated Buffer objects.  The buffers
40
   * are maintained in a circular linked list.  Each buffer has a usage count
41
   * that is decremented by the owning nsScannerSubstring.
42
   *
43
   * The buffer list itself is reference counted.  This allows the buffer list
44
   * to be shared by multiple nsScannerSubstring objects.  The reference
45
   * counting is not threadsafe, which is not at all a requirement.
46
   *
47
   * When a nsScannerSubstring releases its reference to a buffer list, it
48
   * decrements the usage count of the first buffer in the buffer list that it
49
   * was referencing.  It informs the buffer list that it can discard buffers
50
   * starting at that prefix.  The buffer list will do so if the usage count of
51
   * that buffer is 0 and if it is the first buffer in the list.  It will
52
   * continue to prune buffers starting from the front of the buffer list until
53
   * it finds a buffer that has a usage count that is non-zero.
54
   */
55
class nsScannerBufferList
56
  {
57
    public:
58
59
        /**
60
         * Buffer objects are directly followed by a data segment.  The start
61
         * of the data segment is determined by increment the |this| pointer
62
         * by 1 unit.
63
         */
64
      class Buffer : public mozilla::LinkedListElement<Buffer>
65
        {
66
          public:
67
68
0
            void IncrementUsageCount() { ++mUsageCount; }
69
0
            void DecrementUsageCount() { --mUsageCount; }
70
71
0
            bool IsInUse() const { return mUsageCount != 0; }
72
73
0
            const char16_t* DataStart() const { return (const char16_t*) (this+1); }
74
0
                  char16_t* DataStart()       { return (      char16_t*) (this+1); }
75
76
0
            const char16_t* DataEnd() const { return mDataEnd; }
77
0
                  char16_t* DataEnd()       { return mDataEnd; }
78
79
0
            const Buffer* Next() const { return getNext(); }
80
0
                  Buffer* Next()       { return getNext(); }
81
82
0
            const Buffer* Prev() const { return getPrevious(); }
83
0
                  Buffer* Prev()       { return getPrevious(); }
84
85
0
            uint32_t DataLength() const { return mDataEnd - DataStart(); }
86
0
            void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; }
87
88
          private:
89
90
            friend class nsScannerBufferList;
91
92
            int32_t    mUsageCount;
93
            char16_t* mDataEnd;
94
        };
95
96
        /**
97
         * Position objects serve as lightweight pointers into a buffer list.
98
         * The mPosition member must be contained with mBuffer->DataStart()
99
         * and mBuffer->DataEnd().
100
         */
101
      class Position
102
        {
103
          public:
104
            Position()
105
              : mBuffer(nullptr)
106
              , mPosition(nullptr)
107
0
            {
108
0
            }
109
110
            Position( Buffer* buffer, char16_t* position )
111
              : mBuffer(buffer)
112
              , mPosition(position)
113
0
              {}
114
115
            inline
116
            explicit Position( const nsScannerIterator& aIter );
117
118
            inline
119
            Position& operator=( const nsScannerIterator& aIter );
120
121
            static size_t Distance( const Position& p1, const Position& p2 );
122
123
            Buffer*    mBuffer;
124
            char16_t* mPosition;
125
        };
126
127
      static Buffer* AllocBufferFromString( const nsAString& );
128
      static Buffer* AllocBuffer( uint32_t capacity ); // capacity = number of chars
129
130
      explicit nsScannerBufferList( Buffer* buf )
131
        : mRefCnt(0)
132
0
        {
133
0
          mBuffers.insertBack(buf);
134
0
        }
135
136
0
      void  AddRef()  { ++mRefCnt; }
137
0
      void  Release() { if (--mRefCnt == 0) delete this; }
138
139
0
      void  Append( Buffer* buf ) { mBuffers.insertBack(buf); }
140
0
      void  InsertAfter( Buffer* buf, Buffer* prev ) { prev->setNext(buf); }
141
      void  SplitBuffer( const Position& );
142
      void  DiscardUnreferencedPrefix( Buffer* );
143
144
0
            Buffer* Head()       { return mBuffers.getFirst(); }
145
0
      const Buffer* Head() const { return mBuffers.getFirst(); }
146
147
0
            Buffer* Tail()       { return mBuffers.getLast(); }
148
0
      const Buffer* Tail() const { return mBuffers.getLast(); }
149
150
    private:
151
152
      friend class nsScannerSubstring;
153
154
0
      ~nsScannerBufferList() { ReleaseAll(); }
155
      void ReleaseAll();
156
157
      int32_t mRefCnt;
158
      mozilla::LinkedList<Buffer> mBuffers;
159
  };
160
161
162
  /**
163
   * nsScannerFragment represents a "slice" of a Buffer object.
164
   */
165
struct nsScannerFragment
166
  {
167
    typedef nsScannerBufferList::Buffer Buffer;
168
169
    const Buffer*    mBuffer;
170
    const char16_t* mFragmentStart;
171
    const char16_t* mFragmentEnd;
172
  };
173
174
175
  /**
176
   * nsScannerSubstring is the base class for nsScannerString.  It provides
177
   * access to iterators and methods to bind the substring to another
178
   * substring or nsAString instance.
179
   *
180
   * This class owns the buffer list.
181
   */
182
class nsScannerSubstring
183
  {
184
    public:
185
      typedef nsScannerBufferList::Buffer      Buffer;
186
      typedef nsScannerBufferList::Position    Position;
187
      typedef uint32_t                         size_type;
188
189
      nsScannerSubstring();
190
      explicit nsScannerSubstring( const nsAString& s );
191
192
      ~nsScannerSubstring();
193
194
      nsScannerIterator& BeginReading( nsScannerIterator& iter ) const;
195
      nsScannerIterator& EndReading( nsScannerIterator& iter ) const;
196
197
0
      size_type Length() const { return mLength; }
198
199
      int32_t CountChar( char16_t ) const;
200
201
      void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& );
202
      void Rebind( const nsAString& );
203
204
      const nsAString& AsString() const;
205
206
      bool GetNextFragment( nsScannerFragment& ) const;
207
      bool GetPrevFragment( nsScannerFragment& ) const;
208
209
0
      static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); }
210
0
      static inline Buffer* AllocBuffer( size_type aCapacity )             { return nsScannerBufferList::AllocBuffer(aCapacity); }
211
212
    protected:
213
214
      void acquire_ownership_of_buffer_list() const
215
0
        {
216
0
          mBufferList->AddRef();
217
0
          mStart.mBuffer->IncrementUsageCount();
218
0
        }
219
220
      void release_ownership_of_buffer_list()
221
0
        {
222
0
          if (mBufferList)
223
0
            {
224
0
              mStart.mBuffer->DecrementUsageCount();
225
0
              mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
226
0
              mBufferList->Release();
227
0
            }
228
0
        }
229
230
      void init_range_from_buffer_list()
231
0
        {
232
0
          mStart.mBuffer = mBufferList->Head();
233
0
          mStart.mPosition = mStart.mBuffer->DataStart();
234
0
235
0
          mEnd.mBuffer = mBufferList->Tail();
236
0
          mEnd.mPosition = mEnd.mBuffer->DataEnd();
237
0
238
0
          mLength = Position::Distance(mStart, mEnd);
239
0
        }
240
241
      Position             mStart;
242
      Position             mEnd;
243
      nsScannerBufferList *mBufferList;
244
      size_type            mLength;
245
246
      // these fields are used to implement AsString
247
      nsDependentSubstring mFlattenedRep;
248
      bool                 mIsDirty;
249
250
      friend class nsScannerSharedSubstring;
251
  };
252
253
254
  /**
255
   * nsScannerString provides methods to grow and modify a buffer list.
256
   */
257
class nsScannerString : public nsScannerSubstring
258
  {
259
    public:
260
261
      explicit nsScannerString( Buffer* );
262
263
        // you are giving ownership to the string, it takes and keeps your
264
        // buffer, deleting it when done.
265
        // Use AllocBuffer or AllocBufferFromString to create a Buffer object
266
        // for use with this function.
267
      void AppendBuffer( Buffer* );
268
269
      void DiscardPrefix( const nsScannerIterator& );
270
        // any other way you want to do this?
271
272
      void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition);
273
  };
274
275
276
  /**
277
   * nsScannerSharedSubstring implements copy-on-write semantics for
278
   * nsScannerSubstring.  When you call .writable(), it will copy the data
279
   * and return a mutable string object.  This class also manages releasing
280
   * the reference to the scanner buffer when it is no longer needed.
281
   */
282
283
class nsScannerSharedSubstring
284
  {
285
    public:
286
      nsScannerSharedSubstring()
287
0
        : mBuffer(nullptr), mBufferList(nullptr) { }
288
289
      ~nsScannerSharedSubstring()
290
0
        {
291
0
          if (mBufferList)
292
0
            ReleaseBuffer();
293
0
        }
294
295
        // Acquire a copy-on-write reference to the given substring.
296
      void Rebind(const nsScannerIterator& aStart,
297
                              const nsScannerIterator& aEnd);
298
299
       // Get a mutable reference to this string
300
      nsAString& writable()
301
0
        {
302
0
          if (mBufferList)
303
0
            MakeMutable();
304
0
305
0
          return mString;
306
0
        }
307
308
        // Get a const reference to this string
309
0
      const nsAString& str() const { return mString; }
310
311
    private:
312
      typedef nsScannerBufferList::Buffer Buffer;
313
314
      void ReleaseBuffer();
315
      void MakeMutable();
316
317
      nsDependentSubstring  mString;
318
      Buffer               *mBuffer;
319
      nsScannerBufferList  *mBufferList;
320
  };
321
322
  /**
323
   * nsScannerIterator works just like nsReadingIterator<CharT> except that
324
   * it knows how to iterate over a list of scanner buffers.
325
   */
326
class nsScannerIterator
327
  {
328
    public:
329
      typedef nsScannerIterator             self_type;
330
      typedef ptrdiff_t                     difference_type;
331
      typedef char16_t                     value_type;
332
      typedef const char16_t*              pointer;
333
      typedef const char16_t&              reference;
334
      typedef nsScannerSubstring::Buffer    Buffer;
335
336
    protected:
337
338
      nsScannerFragment         mFragment;
339
      const char16_t*          mPosition;
340
      const nsScannerSubstring* mOwner;
341
342
      friend class nsScannerSubstring;
343
      friend class nsScannerSharedSubstring;
344
345
    public:
346
      // nsScannerIterator();                                       // auto-generate default constructor is OK
347
      // nsScannerIterator( const nsScannerIterator& );             // auto-generated copy-constructor OK
348
      // nsScannerIterator& operator=( const nsScannerIterator& );  // auto-generated copy-assignment operator OK
349
350
      inline void normalize_forward();
351
      inline void normalize_backward();
352
353
      pointer get() const
354
0
        {
355
0
          return mPosition;
356
0
        }
357
358
      char16_t operator*() const
359
0
        {
360
0
          return *get();
361
0
        }
362
363
      const nsScannerFragment& fragment() const
364
0
        {
365
0
          return mFragment;
366
0
        }
367
368
      const Buffer* buffer() const
369
0
        {
370
0
          return mFragment.mBuffer;
371
0
        }
372
373
      self_type& operator++()
374
0
        {
375
0
          ++mPosition;
376
0
          normalize_forward();
377
0
          return *this;
378
0
        }
379
380
      self_type operator++( int )
381
0
        {
382
0
          self_type result(*this);
383
0
          ++mPosition;
384
0
          normalize_forward();
385
0
          return result;
386
0
        }
387
388
      self_type& operator--()
389
0
        {
390
0
          normalize_backward();
391
0
          --mPosition;
392
0
          return *this;
393
0
        }
394
395
      self_type operator--( int )
396
0
        {
397
0
          self_type result(*this);
398
0
          normalize_backward();
399
0
          --mPosition;
400
0
          return result;
401
0
        }
402
403
      difference_type size_forward() const
404
0
        {
405
0
          return mFragment.mFragmentEnd - mPosition;
406
0
        }
407
408
      difference_type size_backward() const
409
0
        {
410
0
          return mPosition - mFragment.mFragmentStart;
411
0
        }
412
413
      self_type& advance( difference_type n )
414
0
        {
415
0
          while ( n > 0 )
416
0
            {
417
0
              difference_type one_hop = std::min(n, size_forward());
418
0
419
0
              NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string");
420
0
                // perhaps I should |break| if |!one_hop|?
421
0
422
0
              mPosition += one_hop;
423
0
              normalize_forward();
424
0
              n -= one_hop;
425
0
            }
426
0
427
0
          while ( n < 0 )
428
0
            {
429
0
              normalize_backward();
430
0
              difference_type one_hop = std::max(n, -size_backward());
431
0
432
0
              NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string");
433
0
                // perhaps I should |break| if |!one_hop|?
434
0
435
0
              mPosition += one_hop;
436
0
              n -= one_hop;
437
0
            }
438
0
439
0
          return *this;
440
0
        }
441
  };
442
443
444
inline
445
bool
446
SameFragment( const nsScannerIterator& a, const nsScannerIterator& b )
447
0
  {
448
0
    return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
449
0
  }
450
451
452
  /**
453
   * this class is needed in order to make use of the methods in nsAlgorithm.h
454
   */
455
template <>
456
struct nsCharSourceTraits<nsScannerIterator>
457
  {
458
    typedef nsScannerIterator::difference_type difference_type;
459
460
    static
461
    uint32_t
462
    readable_distance( const nsScannerIterator& first, const nsScannerIterator& last )
463
0
      {
464
0
        return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward());
465
0
      }
466
467
    static
468
    const nsScannerIterator::value_type*
469
    read( const nsScannerIterator& iter )
470
0
      {
471
0
        return iter.get();
472
0
      }
473
474
    static
475
    void
476
    advance( nsScannerIterator& s, difference_type n )
477
0
      {
478
0
        s.advance(n);
479
0
      }
480
  };
481
482
483
  /**
484
   * inline methods follow
485
   */
486
487
inline
488
void
489
nsScannerIterator::normalize_forward()
490
0
  {
491
0
    while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment))
492
0
      mPosition = mFragment.mFragmentStart;
493
0
  }
494
495
inline
496
void
497
nsScannerIterator::normalize_backward()
498
0
  {
499
0
    while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment))
500
0
      mPosition = mFragment.mFragmentEnd;
501
0
  }
502
503
inline
504
bool
505
operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
506
0
  {
507
0
    return lhs.get() == rhs.get();
508
0
  }
509
510
inline
511
bool
512
operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
513
0
  {
514
0
    return lhs.get() != rhs.get();
515
0
  }
516
517
518
inline
519
nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
520
  : mBuffer(const_cast<Buffer*>(aIter.buffer()))
521
  , mPosition(const_cast<char16_t*>(aIter.get()))
522
0
  {}
523
524
inline
525
nsScannerBufferList::Position&
526
nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter)
527
0
  {
528
0
    mBuffer   = const_cast<Buffer*>(aIter.buffer());
529
0
    mPosition = const_cast<char16_t*>(aIter.get());
530
0
    return *this;
531
0
  }
532
533
534
  /**
535
   * scanner string utils
536
   *
537
   * These methods mimic the API provided by nsReadableUtils in xpcom/string.
538
   * Here we provide only the methods that the htmlparser module needs.
539
   */
540
541
inline
542
size_t
543
Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd )
544
0
  {
545
0
    typedef nsScannerBufferList::Position Position;
546
0
    return Position::Distance(Position(aStart), Position(aEnd));
547
0
  }
548
549
bool
550
CopyUnicodeTo( const nsScannerIterator& aSrcStart,
551
               const nsScannerIterator& aSrcEnd,
552
               nsAString& aDest );
553
554
inline
555
bool
556
CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
557
0
  {
558
0
    nsScannerIterator begin, end;
559
0
    return CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
560
0
  }
561
562
bool
563
AppendUnicodeTo( const nsScannerIterator& aSrcStart,
564
                 const nsScannerIterator& aSrcEnd,
565
                 nsAString& aDest );
566
567
inline
568
bool
569
AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
570
0
  {
571
0
    nsScannerIterator begin, end;
572
0
    return AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
573
0
  }
574
575
bool
576
AppendUnicodeTo( const nsScannerIterator& aSrcStart,
577
                 const nsScannerIterator& aSrcEnd,
578
                 nsScannerSharedSubstring& aDest );
579
580
bool
581
FindCharInReadable( char16_t aChar,
582
                    nsScannerIterator& aStart,
583
                    const nsScannerIterator& aEnd );
584
585
bool
586
FindInReadable( const nsAString& aPattern,
587
                nsScannerIterator& aStart,
588
                nsScannerIterator& aEnd,
589
                const nsStringComparator& = nsDefaultStringComparator() );
590
591
bool
592
RFindInReadable( const nsAString& aPattern,
593
                 nsScannerIterator& aStart,
594
                 nsScannerIterator& aEnd,
595
                 const nsStringComparator& = nsDefaultStringComparator() );
596
597
inline
598
bool
599
CaseInsensitiveFindInReadable( const nsAString& aPattern,
600
                               nsScannerIterator& aStart,
601
                               nsScannerIterator& aEnd )
602
0
  {
603
0
    return FindInReadable(aPattern, aStart, aEnd,
604
0
                          nsCaseInsensitiveStringComparator());
605
0
  }
606
607
#endif // !defined(nsScannerString_h___)