Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #ifndef nsScannerString_h___
8 : #define nsScannerString_h___
9 :
10 : #include "nsString.h"
11 : #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
12 : #include "mozilla/LinkedList.h"
13 : #include <algorithm>
14 :
15 :
16 : /**
17 : * NOTE: nsScannerString (and the other classes defined in this file) are
18 : * not related to nsAString or any of the other xpcom/string classes.
19 : *
20 : * nsScannerString is based on the nsSlidingString implementation that used
21 : * to live in xpcom/string. Now that nsAString is limited to representing
22 : * only single fragment strings, nsSlidingString can no longer be used.
23 : *
24 : * An advantage to this design is that it does not employ any virtual
25 : * functions.
26 : *
27 : * This file uses SCC-style indenting in deference to the nsSlidingString
28 : * code from which this code is derived ;-)
29 : */
30 :
31 : class nsScannerIterator;
32 : class nsScannerSubstring;
33 : class nsScannerString;
34 :
35 :
36 : /**
37 : * nsScannerBufferList
38 : *
39 : * This class maintains a list of heap-allocated Buffer objects. The buffers
40 : * are maintained in a circular linked list. Each buffer has a usage count
41 : * that is decremented by the owning nsScannerSubstring.
42 : *
43 : * The buffer list itself is reference counted. This allows the buffer list
44 : * to be shared by multiple nsScannerSubstring objects. The reference
45 : * counting is not threadsafe, which is not at all a requirement.
46 : *
47 : * When a nsScannerSubstring releases its reference to a buffer list, it
48 : * decrements the usage count of the first buffer in the buffer list that it
49 : * was referencing. It informs the buffer list that it can discard buffers
50 : * starting at that prefix. The buffer list will do so if the usage count of
51 : * that buffer is 0 and if it is the first buffer in the list. It will
52 : * continue to prune buffers starting from the front of the buffer list until
53 : * it finds a buffer that has a usage count that is non-zero.
54 : */
55 : class nsScannerBufferList
56 : {
57 : public:
58 :
59 : /**
60 : * Buffer objects are directly followed by a data segment. The start
61 : * of the data segment is determined by increment the |this| pointer
62 : * by 1 unit.
63 : */
64 22 : class Buffer : public mozilla::LinkedListElement<Buffer>
65 : {
66 : public:
67 :
68 154 : void IncrementUsageCount() { ++mUsageCount; }
69 153 : void DecrementUsageCount() { --mUsageCount; }
70 :
71 153 : bool IsInUse() const { return mUsageCount != 0; }
72 :
73 0 : const char16_t* DataStart() const { return (const char16_t*) (this+1); }
74 88 : char16_t* DataStart() { return ( char16_t*) (this+1); }
75 :
76 0 : const char16_t* DataEnd() const { return mDataEnd; }
77 22 : char16_t* DataEnd() { return mDataEnd; }
78 :
79 : const Buffer* Next() const { return getNext(); }
80 0 : Buffer* Next() { return getNext(); }
81 :
82 : const Buffer* Prev() const { return getPrevious(); }
83 : Buffer* Prev() { return getPrevious(); }
84 :
85 0 : uint32_t DataLength() const { return mDataEnd - DataStart(); }
86 22 : void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; }
87 :
88 : private:
89 :
90 : friend class nsScannerBufferList;
91 :
92 : int32_t mUsageCount;
93 : char16_t* mDataEnd;
94 : };
95 :
96 : /**
97 : * Position objects serve as lightweight pointers into a buffer list.
98 : * The mPosition member must be contained with mBuffer->DataStart()
99 : * and mBuffer->DataEnd().
100 : */
101 : class Position
102 : {
103 : public:
104 :
105 0 : Position() {}
106 :
107 44 : Position( Buffer* buffer, char16_t* position )
108 44 : : mBuffer(buffer)
109 44 : , mPosition(position)
110 44 : {}
111 :
112 : inline
113 : explicit Position( const nsScannerIterator& aIter );
114 :
115 : inline
116 : Position& operator=( const nsScannerIterator& aIter );
117 :
118 : static size_t Distance( const Position& p1, const Position& p2 );
119 :
120 : Buffer* mBuffer;
121 : char16_t* mPosition;
122 : };
123 :
124 : static Buffer* AllocBufferFromString( const nsAString& );
125 : static Buffer* AllocBuffer( uint32_t capacity ); // capacity = number of chars
126 :
127 22 : explicit nsScannerBufferList( Buffer* buf )
128 22 : : mRefCnt(0)
129 : {
130 22 : mBuffers.insertBack(buf);
131 22 : }
132 :
133 22 : void AddRef() { ++mRefCnt; }
134 42 : void Release() { if (--mRefCnt == 0) delete this; }
135 :
136 0 : void Append( Buffer* buf ) { mBuffers.insertBack(buf); }
137 0 : void InsertAfter( Buffer* buf, Buffer* prev ) { prev->setNext(buf); }
138 : void SplitBuffer( const Position& );
139 : void DiscardUnreferencedPrefix( Buffer* );
140 :
141 349 : Buffer* Head() { return mBuffers.getFirst(); }
142 : const Buffer* Head() const { return mBuffers.getFirst(); }
143 :
144 22 : Buffer* Tail() { return mBuffers.getLast(); }
145 : const Buffer* Tail() const { return mBuffers.getLast(); }
146 :
147 : private:
148 :
149 : friend class nsScannerSubstring;
150 :
151 21 : ~nsScannerBufferList() { ReleaseAll(); }
152 : void ReleaseAll();
153 :
154 : int32_t mRefCnt;
155 : mozilla::LinkedList<Buffer> mBuffers;
156 : };
157 :
158 :
159 : /**
160 : * nsScannerFragment represents a "slice" of a Buffer object.
161 : */
162 : struct nsScannerFragment
163 : {
164 : typedef nsScannerBufferList::Buffer Buffer;
165 :
166 : const Buffer* mBuffer;
167 : const char16_t* mFragmentStart;
168 : const char16_t* mFragmentEnd;
169 : };
170 :
171 :
172 : /**
173 : * nsScannerSubstring is the base class for nsScannerString. It provides
174 : * access to iterators and methods to bind the substring to another
175 : * substring or nsAString instance.
176 : *
177 : * This class owns the buffer list.
178 : */
179 : class nsScannerSubstring
180 : {
181 : public:
182 : typedef nsScannerBufferList::Buffer Buffer;
183 : typedef nsScannerBufferList::Position Position;
184 : typedef uint32_t size_type;
185 :
186 : nsScannerSubstring();
187 : explicit nsScannerSubstring( const nsAString& s );
188 :
189 : ~nsScannerSubstring();
190 :
191 : nsScannerIterator& BeginReading( nsScannerIterator& iter ) const;
192 : nsScannerIterator& EndReading( nsScannerIterator& iter ) const;
193 :
194 0 : size_type Length() const { return mLength; }
195 :
196 : int32_t CountChar( char16_t ) const;
197 :
198 : void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& );
199 : void Rebind( const nsAString& );
200 :
201 : const nsAString& AsString() const;
202 :
203 : bool GetNextFragment( nsScannerFragment& ) const;
204 : bool GetPrevFragment( nsScannerFragment& ) const;
205 :
206 0 : static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); }
207 22 : static inline Buffer* AllocBuffer( size_type aCapacity ) { return nsScannerBufferList::AllocBuffer(aCapacity); }
208 :
209 : protected:
210 :
211 22 : void acquire_ownership_of_buffer_list() const
212 : {
213 22 : mBufferList->AddRef();
214 22 : mStart.mBuffer->IncrementUsageCount();
215 22 : }
216 :
217 21 : void release_ownership_of_buffer_list()
218 : {
219 21 : if (mBufferList)
220 : {
221 21 : mStart.mBuffer->DecrementUsageCount();
222 21 : mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
223 21 : mBufferList->Release();
224 : }
225 21 : }
226 :
227 22 : void init_range_from_buffer_list()
228 : {
229 22 : mStart.mBuffer = mBufferList->Head();
230 22 : mStart.mPosition = mStart.mBuffer->DataStart();
231 :
232 22 : mEnd.mBuffer = mBufferList->Tail();
233 22 : mEnd.mPosition = mEnd.mBuffer->DataEnd();
234 :
235 22 : mLength = Position::Distance(mStart, mEnd);
236 22 : }
237 :
238 : Position mStart;
239 : Position mEnd;
240 : nsScannerBufferList *mBufferList;
241 : size_type mLength;
242 :
243 : // these fields are used to implement AsString
244 : nsDependentSubstring mFlattenedRep;
245 : bool mIsDirty;
246 :
247 : friend class nsScannerSharedSubstring;
248 : };
249 :
250 :
251 : /**
252 : * nsScannerString provides methods to grow and modify a buffer list.
253 : */
254 21 : class nsScannerString : public nsScannerSubstring
255 : {
256 : public:
257 :
258 : explicit nsScannerString( Buffer* );
259 :
260 : // you are giving ownership to the string, it takes and keeps your
261 : // buffer, deleting it when done.
262 : // Use AllocBuffer or AllocBufferFromString to create a Buffer object
263 : // for use with this function.
264 : void AppendBuffer( Buffer* );
265 :
266 : void DiscardPrefix( const nsScannerIterator& );
267 : // any other way you want to do this?
268 :
269 : void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition);
270 : };
271 :
272 :
273 : /**
274 : * nsScannerSharedSubstring implements copy-on-write semantics for
275 : * nsScannerSubstring. When you call .writable(), it will copy the data
276 : * and return a mutable string object. This class also manages releasing
277 : * the reference to the scanner buffer when it is no longer needed.
278 : */
279 :
280 : class nsScannerSharedSubstring
281 : {
282 : public:
283 : nsScannerSharedSubstring()
284 : : mBuffer(nullptr), mBufferList(nullptr) { }
285 :
286 : ~nsScannerSharedSubstring()
287 : {
288 : if (mBufferList)
289 : ReleaseBuffer();
290 : }
291 :
292 : // Acquire a copy-on-write reference to the given substring.
293 : void Rebind(const nsScannerIterator& aStart,
294 : const nsScannerIterator& aEnd);
295 :
296 : // Get a mutable reference to this string
297 0 : nsAString& writable()
298 : {
299 0 : if (mBufferList)
300 0 : MakeMutable();
301 :
302 0 : return mString;
303 : }
304 :
305 : // Get a const reference to this string
306 0 : const nsAString& str() const { return mString; }
307 :
308 : private:
309 : typedef nsScannerBufferList::Buffer Buffer;
310 :
311 : void ReleaseBuffer();
312 : void MakeMutable();
313 :
314 : nsDependentSubstring mString;
315 : Buffer *mBuffer;
316 : nsScannerBufferList *mBufferList;
317 : };
318 :
319 : /**
320 : * nsScannerIterator works just like nsReadingIterator<CharT> except that
321 : * it knows how to iterate over a list of scanner buffers.
322 : */
323 : class nsScannerIterator
324 : {
325 : public:
326 : typedef nsScannerIterator self_type;
327 : typedef ptrdiff_t difference_type;
328 : typedef char16_t value_type;
329 : typedef const char16_t* pointer;
330 : typedef const char16_t& reference;
331 : typedef nsScannerSubstring::Buffer Buffer;
332 :
333 : protected:
334 :
335 : nsScannerFragment mFragment;
336 : const char16_t* mPosition;
337 : const nsScannerSubstring* mOwner;
338 :
339 : friend class nsScannerSubstring;
340 : friend class nsScannerSharedSubstring;
341 :
342 : public:
343 245 : nsScannerIterator() {}
344 : // nsScannerIterator( const nsScannerIterator& ); // auto-generated copy-constructor OK
345 : // nsScannerIterator& operator=( const nsScannerIterator& ); // auto-generated copy-assignment operator OK
346 :
347 : inline void normalize_forward();
348 : inline void normalize_backward();
349 :
350 863 : pointer get() const
351 : {
352 863 : return mPosition;
353 : }
354 :
355 0 : char16_t operator*() const
356 : {
357 0 : return *get();
358 : }
359 :
360 2 : const nsScannerFragment& fragment() const
361 : {
362 2 : return mFragment;
363 : }
364 :
365 396 : const Buffer* buffer() const
366 : {
367 396 : return mFragment.mBuffer;
368 : }
369 :
370 0 : self_type& operator++()
371 : {
372 0 : ++mPosition;
373 0 : normalize_forward();
374 0 : return *this;
375 : }
376 :
377 0 : self_type operator++( int )
378 : {
379 0 : self_type result(*this);
380 0 : ++mPosition;
381 0 : normalize_forward();
382 0 : return result;
383 : }
384 :
385 : self_type& operator--()
386 : {
387 : normalize_backward();
388 : --mPosition;
389 : return *this;
390 : }
391 :
392 : self_type operator--( int )
393 : {
394 : self_type result(*this);
395 : normalize_backward();
396 : --mPosition;
397 : return result;
398 : }
399 :
400 67 : difference_type size_forward() const
401 : {
402 67 : return mFragment.mFragmentEnd - mPosition;
403 : }
404 :
405 1 : difference_type size_backward() const
406 : {
407 1 : return mPosition - mFragment.mFragmentStart;
408 : }
409 :
410 178 : self_type& advance( difference_type n )
411 : {
412 223 : while ( n > 0 )
413 : {
414 45 : difference_type one_hop = std::min(n, size_forward());
415 :
416 45 : NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string");
417 : // perhaps I should |break| if |!one_hop|?
418 :
419 45 : mPosition += one_hop;
420 45 : normalize_forward();
421 45 : n -= one_hop;
422 : }
423 :
424 135 : while ( n < 0 )
425 : {
426 1 : normalize_backward();
427 1 : difference_type one_hop = std::max(n, -size_backward());
428 :
429 1 : NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string");
430 : // perhaps I should |break| if |!one_hop|?
431 :
432 1 : mPosition += one_hop;
433 1 : n -= one_hop;
434 : }
435 :
436 133 : return *this;
437 : }
438 : };
439 :
440 :
441 : inline
442 : bool
443 1 : SameFragment( const nsScannerIterator& a, const nsScannerIterator& b )
444 : {
445 1 : return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
446 : }
447 :
448 :
449 : /**
450 : * this class is needed in order to make use of the methods in nsAlgorithm.h
451 : */
452 : template <>
453 : struct nsCharSourceTraits<nsScannerIterator>
454 : {
455 : typedef nsScannerIterator::difference_type difference_type;
456 :
457 : static
458 : uint32_t
459 1 : readable_distance( const nsScannerIterator& first, const nsScannerIterator& last )
460 : {
461 1 : return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward());
462 : }
463 :
464 : static
465 : const nsScannerIterator::value_type*
466 1 : read( const nsScannerIterator& iter )
467 : {
468 1 : return iter.get();
469 : }
470 :
471 : static
472 : void
473 1 : advance( nsScannerIterator& s, difference_type n )
474 : {
475 1 : s.advance(n);
476 1 : }
477 : };
478 :
479 :
480 : /**
481 : * inline methods follow
482 : */
483 :
484 : inline
485 : void
486 243 : nsScannerIterator::normalize_forward()
487 : {
488 243 : while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment))
489 0 : mPosition = mFragment.mFragmentStart;
490 243 : }
491 :
492 : inline
493 : void
494 1 : nsScannerIterator::normalize_backward()
495 : {
496 1 : while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment))
497 0 : mPosition = mFragment.mFragmentEnd;
498 1 : }
499 :
500 : inline
501 : bool
502 110 : operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
503 : {
504 110 : return lhs.get() == rhs.get();
505 : }
506 :
507 : inline
508 : bool
509 111 : operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
510 : {
511 111 : return lhs.get() != rhs.get();
512 : }
513 :
514 :
515 : inline
516 264 : nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
517 264 : : mBuffer(const_cast<Buffer*>(aIter.buffer()))
518 264 : , mPosition(const_cast<char16_t*>(aIter.get()))
519 264 : {}
520 :
521 : inline
522 : nsScannerBufferList::Position&
523 132 : nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter)
524 : {
525 132 : mBuffer = const_cast<Buffer*>(aIter.buffer());
526 132 : mPosition = const_cast<char16_t*>(aIter.get());
527 132 : return *this;
528 : }
529 :
530 :
531 : /**
532 : * scanner string utils
533 : *
534 : * These methods mimic the API provided by nsReadableUtils in xpcom/string.
535 : * Here we provide only the methods that the htmlparser module needs.
536 : */
537 :
538 : inline
539 : size_t
540 132 : Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd )
541 : {
542 : typedef nsScannerBufferList::Position Position;
543 132 : return Position::Distance(Position(aStart), Position(aEnd));
544 : }
545 :
546 : bool
547 : CopyUnicodeTo( const nsScannerIterator& aSrcStart,
548 : const nsScannerIterator& aSrcEnd,
549 : nsAString& aDest );
550 :
551 : inline
552 : bool
553 : CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
554 : {
555 : nsScannerIterator begin, end;
556 : return CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
557 : }
558 :
559 : bool
560 : AppendUnicodeTo( const nsScannerIterator& aSrcStart,
561 : const nsScannerIterator& aSrcEnd,
562 : nsAString& aDest );
563 :
564 : inline
565 : bool
566 : AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
567 : {
568 : nsScannerIterator begin, end;
569 : return AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
570 : }
571 :
572 : bool
573 : AppendUnicodeTo( const nsScannerIterator& aSrcStart,
574 : const nsScannerIterator& aSrcEnd,
575 : nsScannerSharedSubstring& aDest );
576 :
577 : bool
578 : FindCharInReadable( char16_t aChar,
579 : nsScannerIterator& aStart,
580 : const nsScannerIterator& aEnd );
581 :
582 : bool
583 : FindInReadable( const nsAString& aPattern,
584 : nsScannerIterator& aStart,
585 : nsScannerIterator& aEnd,
586 : const nsStringComparator& = nsDefaultStringComparator() );
587 :
588 : bool
589 : RFindInReadable( const nsAString& aPattern,
590 : nsScannerIterator& aStart,
591 : nsScannerIterator& aEnd,
592 : const nsStringComparator& = nsDefaultStringComparator() );
593 :
594 : inline
595 : bool
596 : CaseInsensitiveFindInReadable( const nsAString& aPattern,
597 : nsScannerIterator& aStart,
598 : nsScannerIterator& aEnd )
599 : {
600 : return FindInReadable(aPattern, aStart, aEnd,
601 : nsCaseInsensitiveStringComparator());
602 : }
603 :
604 : #endif // !defined(nsScannerString_h___)
|