/src/mozilla-central/parser/htmlparser/nsScannerString.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim:set ts=2 sw=2 sts=2 et cindent: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #ifndef nsScannerString_h___ |
8 | | #define nsScannerString_h___ |
9 | | |
10 | | #include "nsString.h" |
11 | | #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator |
12 | | #include "mozilla/LinkedList.h" |
13 | | #include <algorithm> |
14 | | |
15 | | |
16 | | /** |
17 | | * NOTE: nsScannerString (and the other classes defined in this file) are |
18 | | * not related to nsAString or any of the other xpcom/string classes. |
19 | | * |
20 | | * nsScannerString is based on the nsSlidingString implementation that used |
21 | | * to live in xpcom/string. Now that nsAString is limited to representing |
22 | | * only single fragment strings, nsSlidingString can no longer be used. |
23 | | * |
24 | | * An advantage to this design is that it does not employ any virtual |
25 | | * functions. |
26 | | * |
27 | | * This file uses SCC-style indenting in deference to the nsSlidingString |
28 | | * code from which this code is derived ;-) |
29 | | */ |
30 | | |
31 | | class nsScannerIterator; |
32 | | class nsScannerSubstring; |
33 | | class nsScannerString; |
34 | | |
35 | | |
36 | | /** |
37 | | * nsScannerBufferList |
38 | | * |
39 | | * This class maintains a list of heap-allocated Buffer objects. The buffers |
40 | | * are maintained in a circular linked list. Each buffer has a usage count |
41 | | * that is decremented by the owning nsScannerSubstring. |
42 | | * |
43 | | * The buffer list itself is reference counted. This allows the buffer list |
44 | | * to be shared by multiple nsScannerSubstring objects. The reference |
45 | | * counting is not threadsafe, which is not at all a requirement. |
46 | | * |
47 | | * When a nsScannerSubstring releases its reference to a buffer list, it |
48 | | * decrements the usage count of the first buffer in the buffer list that it |
49 | | * was referencing. It informs the buffer list that it can discard buffers |
50 | | * starting at that prefix. The buffer list will do so if the usage count of |
51 | | * that buffer is 0 and if it is the first buffer in the list. It will |
52 | | * continue to prune buffers starting from the front of the buffer list until |
53 | | * it finds a buffer that has a usage count that is non-zero. |
54 | | */ |
55 | | class nsScannerBufferList |
56 | | { |
57 | | public: |
58 | | |
59 | | /** |
60 | | * Buffer objects are directly followed by a data segment. The start |
61 | | * of the data segment is determined by increment the |this| pointer |
62 | | * by 1 unit. |
63 | | */ |
64 | | class Buffer : public mozilla::LinkedListElement<Buffer> |
65 | | { |
66 | | public: |
67 | | |
68 | 0 | void IncrementUsageCount() { ++mUsageCount; } |
69 | 0 | void DecrementUsageCount() { --mUsageCount; } |
70 | | |
71 | 0 | bool IsInUse() const { return mUsageCount != 0; } |
72 | | |
73 | 0 | const char16_t* DataStart() const { return (const char16_t*) (this+1); } |
74 | 0 | char16_t* DataStart() { return ( char16_t*) (this+1); } |
75 | | |
76 | 0 | const char16_t* DataEnd() const { return mDataEnd; } |
77 | 0 | char16_t* DataEnd() { return mDataEnd; } |
78 | | |
79 | 0 | const Buffer* Next() const { return getNext(); } |
80 | 0 | Buffer* Next() { return getNext(); } |
81 | | |
82 | 0 | const Buffer* Prev() const { return getPrevious(); } |
83 | 0 | Buffer* Prev() { return getPrevious(); } |
84 | | |
85 | 0 | uint32_t DataLength() const { return mDataEnd - DataStart(); } |
86 | 0 | void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; } |
87 | | |
88 | | private: |
89 | | |
90 | | friend class nsScannerBufferList; |
91 | | |
92 | | int32_t mUsageCount; |
93 | | char16_t* mDataEnd; |
94 | | }; |
95 | | |
96 | | /** |
97 | | * Position objects serve as lightweight pointers into a buffer list. |
98 | | * The mPosition member must be contained with mBuffer->DataStart() |
99 | | * and mBuffer->DataEnd(). |
100 | | */ |
101 | | class Position |
102 | | { |
103 | | public: |
104 | | Position() |
105 | | : mBuffer(nullptr) |
106 | | , mPosition(nullptr) |
107 | 0 | { |
108 | 0 | } |
109 | | |
110 | | Position( Buffer* buffer, char16_t* position ) |
111 | | : mBuffer(buffer) |
112 | | , mPosition(position) |
113 | 0 | {} |
114 | | |
115 | | inline |
116 | | explicit Position( const nsScannerIterator& aIter ); |
117 | | |
118 | | inline |
119 | | Position& operator=( const nsScannerIterator& aIter ); |
120 | | |
121 | | static size_t Distance( const Position& p1, const Position& p2 ); |
122 | | |
123 | | Buffer* mBuffer; |
124 | | char16_t* mPosition; |
125 | | }; |
126 | | |
127 | | static Buffer* AllocBufferFromString( const nsAString& ); |
128 | | static Buffer* AllocBuffer( uint32_t capacity ); // capacity = number of chars |
129 | | |
130 | | explicit nsScannerBufferList( Buffer* buf ) |
131 | | : mRefCnt(0) |
132 | 0 | { |
133 | 0 | mBuffers.insertBack(buf); |
134 | 0 | } |
135 | | |
136 | 0 | void AddRef() { ++mRefCnt; } |
137 | 0 | void Release() { if (--mRefCnt == 0) delete this; } |
138 | | |
139 | 0 | void Append( Buffer* buf ) { mBuffers.insertBack(buf); } |
140 | 0 | void InsertAfter( Buffer* buf, Buffer* prev ) { prev->setNext(buf); } |
141 | | void SplitBuffer( const Position& ); |
142 | | void DiscardUnreferencedPrefix( Buffer* ); |
143 | | |
144 | 0 | Buffer* Head() { return mBuffers.getFirst(); } |
145 | 0 | const Buffer* Head() const { return mBuffers.getFirst(); } |
146 | | |
147 | 0 | Buffer* Tail() { return mBuffers.getLast(); } |
148 | 0 | const Buffer* Tail() const { return mBuffers.getLast(); } |
149 | | |
150 | | private: |
151 | | |
152 | | friend class nsScannerSubstring; |
153 | | |
154 | 0 | ~nsScannerBufferList() { ReleaseAll(); } |
155 | | void ReleaseAll(); |
156 | | |
157 | | int32_t mRefCnt; |
158 | | mozilla::LinkedList<Buffer> mBuffers; |
159 | | }; |
160 | | |
161 | | |
162 | | /** |
163 | | * nsScannerFragment represents a "slice" of a Buffer object. |
164 | | */ |
165 | | struct nsScannerFragment |
166 | | { |
167 | | typedef nsScannerBufferList::Buffer Buffer; |
168 | | |
169 | | const Buffer* mBuffer; |
170 | | const char16_t* mFragmentStart; |
171 | | const char16_t* mFragmentEnd; |
172 | | }; |
173 | | |
174 | | |
175 | | /** |
176 | | * nsScannerSubstring is the base class for nsScannerString. It provides |
177 | | * access to iterators and methods to bind the substring to another |
178 | | * substring or nsAString instance. |
179 | | * |
180 | | * This class owns the buffer list. |
181 | | */ |
182 | | class nsScannerSubstring |
183 | | { |
184 | | public: |
185 | | typedef nsScannerBufferList::Buffer Buffer; |
186 | | typedef nsScannerBufferList::Position Position; |
187 | | typedef uint32_t size_type; |
188 | | |
189 | | nsScannerSubstring(); |
190 | | explicit nsScannerSubstring( const nsAString& s ); |
191 | | |
192 | | ~nsScannerSubstring(); |
193 | | |
194 | | nsScannerIterator& BeginReading( nsScannerIterator& iter ) const; |
195 | | nsScannerIterator& EndReading( nsScannerIterator& iter ) const; |
196 | | |
197 | 0 | size_type Length() const { return mLength; } |
198 | | |
199 | | int32_t CountChar( char16_t ) const; |
200 | | |
201 | | void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& ); |
202 | | void Rebind( const nsAString& ); |
203 | | |
204 | | const nsAString& AsString() const; |
205 | | |
206 | | bool GetNextFragment( nsScannerFragment& ) const; |
207 | | bool GetPrevFragment( nsScannerFragment& ) const; |
208 | | |
209 | 0 | static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); } |
210 | 0 | static inline Buffer* AllocBuffer( size_type aCapacity ) { return nsScannerBufferList::AllocBuffer(aCapacity); } |
211 | | |
212 | | protected: |
213 | | |
214 | | void acquire_ownership_of_buffer_list() const |
215 | 0 | { |
216 | 0 | mBufferList->AddRef(); |
217 | 0 | mStart.mBuffer->IncrementUsageCount(); |
218 | 0 | } |
219 | | |
220 | | void release_ownership_of_buffer_list() |
221 | 0 | { |
222 | 0 | if (mBufferList) |
223 | 0 | { |
224 | 0 | mStart.mBuffer->DecrementUsageCount(); |
225 | 0 | mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer); |
226 | 0 | mBufferList->Release(); |
227 | 0 | } |
228 | 0 | } |
229 | | |
230 | | void init_range_from_buffer_list() |
231 | 0 | { |
232 | 0 | mStart.mBuffer = mBufferList->Head(); |
233 | 0 | mStart.mPosition = mStart.mBuffer->DataStart(); |
234 | 0 |
|
235 | 0 | mEnd.mBuffer = mBufferList->Tail(); |
236 | 0 | mEnd.mPosition = mEnd.mBuffer->DataEnd(); |
237 | 0 |
|
238 | 0 | mLength = Position::Distance(mStart, mEnd); |
239 | 0 | } |
240 | | |
241 | | Position mStart; |
242 | | Position mEnd; |
243 | | nsScannerBufferList *mBufferList; |
244 | | size_type mLength; |
245 | | |
246 | | // these fields are used to implement AsString |
247 | | nsDependentSubstring mFlattenedRep; |
248 | | bool mIsDirty; |
249 | | |
250 | | friend class nsScannerSharedSubstring; |
251 | | }; |
252 | | |
253 | | |
254 | | /** |
255 | | * nsScannerString provides methods to grow and modify a buffer list. |
256 | | */ |
257 | | class nsScannerString : public nsScannerSubstring |
258 | | { |
259 | | public: |
260 | | |
261 | | explicit nsScannerString( Buffer* ); |
262 | | |
263 | | // you are giving ownership to the string, it takes and keeps your |
264 | | // buffer, deleting it when done. |
265 | | // Use AllocBuffer or AllocBufferFromString to create a Buffer object |
266 | | // for use with this function. |
267 | | void AppendBuffer( Buffer* ); |
268 | | |
269 | | void DiscardPrefix( const nsScannerIterator& ); |
270 | | // any other way you want to do this? |
271 | | |
272 | | void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition); |
273 | | }; |
274 | | |
275 | | |
276 | | /** |
277 | | * nsScannerSharedSubstring implements copy-on-write semantics for |
278 | | * nsScannerSubstring. When you call .writable(), it will copy the data |
279 | | * and return a mutable string object. This class also manages releasing |
280 | | * the reference to the scanner buffer when it is no longer needed. |
281 | | */ |
282 | | |
283 | | class nsScannerSharedSubstring |
284 | | { |
285 | | public: |
286 | | nsScannerSharedSubstring() |
287 | 0 | : mBuffer(nullptr), mBufferList(nullptr) { } |
288 | | |
289 | | ~nsScannerSharedSubstring() |
290 | 0 | { |
291 | 0 | if (mBufferList) |
292 | 0 | ReleaseBuffer(); |
293 | 0 | } |
294 | | |
295 | | // Acquire a copy-on-write reference to the given substring. |
296 | | void Rebind(const nsScannerIterator& aStart, |
297 | | const nsScannerIterator& aEnd); |
298 | | |
299 | | // Get a mutable reference to this string |
300 | | nsAString& writable() |
301 | 0 | { |
302 | 0 | if (mBufferList) |
303 | 0 | MakeMutable(); |
304 | 0 |
|
305 | 0 | return mString; |
306 | 0 | } |
307 | | |
308 | | // Get a const reference to this string |
309 | 0 | const nsAString& str() const { return mString; } |
310 | | |
311 | | private: |
312 | | typedef nsScannerBufferList::Buffer Buffer; |
313 | | |
314 | | void ReleaseBuffer(); |
315 | | void MakeMutable(); |
316 | | |
317 | | nsDependentSubstring mString; |
318 | | Buffer *mBuffer; |
319 | | nsScannerBufferList *mBufferList; |
320 | | }; |
321 | | |
322 | | /** |
323 | | * nsScannerIterator works just like nsReadingIterator<CharT> except that |
324 | | * it knows how to iterate over a list of scanner buffers. |
325 | | */ |
326 | | class nsScannerIterator |
327 | | { |
328 | | public: |
329 | | typedef nsScannerIterator self_type; |
330 | | typedef ptrdiff_t difference_type; |
331 | | typedef char16_t value_type; |
332 | | typedef const char16_t* pointer; |
333 | | typedef const char16_t& reference; |
334 | | typedef nsScannerSubstring::Buffer Buffer; |
335 | | |
336 | | protected: |
337 | | |
338 | | nsScannerFragment mFragment; |
339 | | const char16_t* mPosition; |
340 | | const nsScannerSubstring* mOwner; |
341 | | |
342 | | friend class nsScannerSubstring; |
343 | | friend class nsScannerSharedSubstring; |
344 | | |
345 | | public: |
346 | | // nsScannerIterator(); // auto-generate default constructor is OK |
347 | | // nsScannerIterator( const nsScannerIterator& ); // auto-generated copy-constructor OK |
348 | | // nsScannerIterator& operator=( const nsScannerIterator& ); // auto-generated copy-assignment operator OK |
349 | | |
350 | | inline void normalize_forward(); |
351 | | inline void normalize_backward(); |
352 | | |
353 | | pointer get() const |
354 | 0 | { |
355 | 0 | return mPosition; |
356 | 0 | } |
357 | | |
358 | | char16_t operator*() const |
359 | 0 | { |
360 | 0 | return *get(); |
361 | 0 | } |
362 | | |
363 | | const nsScannerFragment& fragment() const |
364 | 0 | { |
365 | 0 | return mFragment; |
366 | 0 | } |
367 | | |
368 | | const Buffer* buffer() const |
369 | 0 | { |
370 | 0 | return mFragment.mBuffer; |
371 | 0 | } |
372 | | |
373 | | self_type& operator++() |
374 | 0 | { |
375 | 0 | ++mPosition; |
376 | 0 | normalize_forward(); |
377 | 0 | return *this; |
378 | 0 | } |
379 | | |
380 | | self_type operator++( int ) |
381 | 0 | { |
382 | 0 | self_type result(*this); |
383 | 0 | ++mPosition; |
384 | 0 | normalize_forward(); |
385 | 0 | return result; |
386 | 0 | } |
387 | | |
388 | | self_type& operator--() |
389 | 0 | { |
390 | 0 | normalize_backward(); |
391 | 0 | --mPosition; |
392 | 0 | return *this; |
393 | 0 | } |
394 | | |
395 | | self_type operator--( int ) |
396 | 0 | { |
397 | 0 | self_type result(*this); |
398 | 0 | normalize_backward(); |
399 | 0 | --mPosition; |
400 | 0 | return result; |
401 | 0 | } |
402 | | |
403 | | difference_type size_forward() const |
404 | 0 | { |
405 | 0 | return mFragment.mFragmentEnd - mPosition; |
406 | 0 | } |
407 | | |
408 | | difference_type size_backward() const |
409 | 0 | { |
410 | 0 | return mPosition - mFragment.mFragmentStart; |
411 | 0 | } |
412 | | |
413 | | self_type& advance( difference_type n ) |
414 | 0 | { |
415 | 0 | while ( n > 0 ) |
416 | 0 | { |
417 | 0 | difference_type one_hop = std::min(n, size_forward()); |
418 | 0 |
|
419 | 0 | NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string"); |
420 | 0 | // perhaps I should |break| if |!one_hop|? |
421 | 0 |
|
422 | 0 | mPosition += one_hop; |
423 | 0 | normalize_forward(); |
424 | 0 | n -= one_hop; |
425 | 0 | } |
426 | 0 |
|
427 | 0 | while ( n < 0 ) |
428 | 0 | { |
429 | 0 | normalize_backward(); |
430 | 0 | difference_type one_hop = std::max(n, -size_backward()); |
431 | 0 |
|
432 | 0 | NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string"); |
433 | 0 | // perhaps I should |break| if |!one_hop|? |
434 | 0 |
|
435 | 0 | mPosition += one_hop; |
436 | 0 | n -= one_hop; |
437 | 0 | } |
438 | 0 |
|
439 | 0 | return *this; |
440 | 0 | } |
441 | | }; |
442 | | |
443 | | |
444 | | inline |
445 | | bool |
446 | | SameFragment( const nsScannerIterator& a, const nsScannerIterator& b ) |
447 | 0 | { |
448 | 0 | return a.fragment().mFragmentStart == b.fragment().mFragmentStart; |
449 | 0 | } |
450 | | |
451 | | |
452 | | /** |
453 | | * this class is needed in order to make use of the methods in nsAlgorithm.h |
454 | | */ |
455 | | template <> |
456 | | struct nsCharSourceTraits<nsScannerIterator> |
457 | | { |
458 | | typedef nsScannerIterator::difference_type difference_type; |
459 | | |
460 | | static |
461 | | uint32_t |
462 | | readable_distance( const nsScannerIterator& first, const nsScannerIterator& last ) |
463 | 0 | { |
464 | 0 | return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward()); |
465 | 0 | } |
466 | | |
467 | | static |
468 | | const nsScannerIterator::value_type* |
469 | | read( const nsScannerIterator& iter ) |
470 | 0 | { |
471 | 0 | return iter.get(); |
472 | 0 | } |
473 | | |
474 | | static |
475 | | void |
476 | | advance( nsScannerIterator& s, difference_type n ) |
477 | 0 | { |
478 | 0 | s.advance(n); |
479 | 0 | } |
480 | | }; |
481 | | |
482 | | |
483 | | /** |
484 | | * inline methods follow |
485 | | */ |
486 | | |
487 | | inline |
488 | | void |
489 | | nsScannerIterator::normalize_forward() |
490 | 0 | { |
491 | 0 | while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment)) |
492 | 0 | mPosition = mFragment.mFragmentStart; |
493 | 0 | } |
494 | | |
495 | | inline |
496 | | void |
497 | | nsScannerIterator::normalize_backward() |
498 | 0 | { |
499 | 0 | while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment)) |
500 | 0 | mPosition = mFragment.mFragmentEnd; |
501 | 0 | } |
502 | | |
503 | | inline |
504 | | bool |
505 | | operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs ) |
506 | 0 | { |
507 | 0 | return lhs.get() == rhs.get(); |
508 | 0 | } |
509 | | |
510 | | inline |
511 | | bool |
512 | | operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs ) |
513 | 0 | { |
514 | 0 | return lhs.get() != rhs.get(); |
515 | 0 | } |
516 | | |
517 | | |
518 | | inline |
519 | | nsScannerBufferList::Position::Position(const nsScannerIterator& aIter) |
520 | | : mBuffer(const_cast<Buffer*>(aIter.buffer())) |
521 | | , mPosition(const_cast<char16_t*>(aIter.get())) |
522 | 0 | {} |
523 | | |
524 | | inline |
525 | | nsScannerBufferList::Position& |
526 | | nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter) |
527 | 0 | { |
528 | 0 | mBuffer = const_cast<Buffer*>(aIter.buffer()); |
529 | 0 | mPosition = const_cast<char16_t*>(aIter.get()); |
530 | 0 | return *this; |
531 | 0 | } |
532 | | |
533 | | |
534 | | /** |
535 | | * scanner string utils |
536 | | * |
537 | | * These methods mimic the API provided by nsReadableUtils in xpcom/string. |
538 | | * Here we provide only the methods that the htmlparser module needs. |
539 | | */ |
540 | | |
541 | | inline |
542 | | size_t |
543 | | Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd ) |
544 | 0 | { |
545 | 0 | typedef nsScannerBufferList::Position Position; |
546 | 0 | return Position::Distance(Position(aStart), Position(aEnd)); |
547 | 0 | } |
548 | | |
549 | | bool |
550 | | CopyUnicodeTo( const nsScannerIterator& aSrcStart, |
551 | | const nsScannerIterator& aSrcEnd, |
552 | | nsAString& aDest ); |
553 | | |
554 | | inline |
555 | | bool |
556 | | CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest ) |
557 | 0 | { |
558 | 0 | nsScannerIterator begin, end; |
559 | 0 | return CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest); |
560 | 0 | } |
561 | | |
562 | | bool |
563 | | AppendUnicodeTo( const nsScannerIterator& aSrcStart, |
564 | | const nsScannerIterator& aSrcEnd, |
565 | | nsAString& aDest ); |
566 | | |
567 | | inline |
568 | | bool |
569 | | AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest ) |
570 | 0 | { |
571 | 0 | nsScannerIterator begin, end; |
572 | 0 | return AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest); |
573 | 0 | } |
574 | | |
575 | | bool |
576 | | AppendUnicodeTo( const nsScannerIterator& aSrcStart, |
577 | | const nsScannerIterator& aSrcEnd, |
578 | | nsScannerSharedSubstring& aDest ); |
579 | | |
580 | | bool |
581 | | FindCharInReadable( char16_t aChar, |
582 | | nsScannerIterator& aStart, |
583 | | const nsScannerIterator& aEnd ); |
584 | | |
585 | | bool |
586 | | FindInReadable( const nsAString& aPattern, |
587 | | nsScannerIterator& aStart, |
588 | | nsScannerIterator& aEnd, |
589 | | const nsStringComparator& = nsDefaultStringComparator() ); |
590 | | |
591 | | bool |
592 | | RFindInReadable( const nsAString& aPattern, |
593 | | nsScannerIterator& aStart, |
594 | | nsScannerIterator& aEnd, |
595 | | const nsStringComparator& = nsDefaultStringComparator() ); |
596 | | |
597 | | inline |
598 | | bool |
599 | | CaseInsensitiveFindInReadable( const nsAString& aPattern, |
600 | | nsScannerIterator& aStart, |
601 | | nsScannerIterator& aEnd ) |
602 | 0 | { |
603 | 0 | return FindInReadable(aPattern, aStart, aEnd, |
604 | 0 | nsCaseInsensitiveStringComparator()); |
605 | 0 | } |
606 | | |
607 | | #endif // !defined(nsScannerString_h___) |