Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include <stdlib.h>
8 : #include "nsScannerString.h"
9 : #include "mozilla/CheckedInt.h"
10 :
11 :
12 : /**
13 : * nsScannerBufferList
14 : */
15 :
16 : #define MAX_CAPACITY ((UINT32_MAX / sizeof(char16_t)) - \
17 : (sizeof(Buffer) + sizeof(char16_t)))
18 :
19 : nsScannerBufferList::Buffer*
20 0 : nsScannerBufferList::AllocBufferFromString( const nsAString& aString )
21 : {
22 0 : uint32_t len = aString.Length();
23 0 : Buffer* buf = AllocBuffer(len);
24 :
25 0 : if (buf)
26 : {
27 0 : nsAString::const_iterator source;
28 0 : aString.BeginReading(source);
29 0 : nsCharTraits<char16_t>::copy(buf->DataStart(), source.get(), len);
30 : }
31 0 : return buf;
32 : }
33 :
34 : nsScannerBufferList::Buffer*
35 22 : nsScannerBufferList::AllocBuffer( uint32_t capacity )
36 : {
37 22 : if (capacity > MAX_CAPACITY)
38 0 : return nullptr;
39 :
40 22 : void* ptr = malloc(sizeof(Buffer) + (capacity + 1) * sizeof(char16_t));
41 22 : if (!ptr)
42 0 : return nullptr;
43 :
44 22 : Buffer* buf = new (ptr) Buffer();
45 :
46 22 : buf->mUsageCount = 0;
47 22 : buf->mDataEnd = buf->DataStart() + capacity;
48 :
49 : // XXX null terminate. this shouldn't be required, but we do it because
50 : // nsScanner erroneously thinks it can dereference DataEnd :-(
51 22 : *buf->mDataEnd = char16_t(0);
52 22 : return buf;
53 : }
54 :
55 : void
56 21 : nsScannerBufferList::ReleaseAll()
57 : {
58 21 : while (!mBuffers.isEmpty())
59 : {
60 0 : Buffer* node = mBuffers.popFirst();
61 : //printf(">>> freeing buffer @%p\n", node);
62 0 : free(node);
63 : }
64 21 : }
65 :
66 : void
67 0 : nsScannerBufferList::SplitBuffer( const Position& pos )
68 : {
69 : // splitting to the right keeps the work string and any extant token
70 : // pointing to and holding a reference count on the same buffer.
71 :
72 0 : Buffer* bufferToSplit = pos.mBuffer;
73 0 : NS_ASSERTION(bufferToSplit, "null pointer");
74 :
75 0 : uint32_t splitOffset = pos.mPosition - bufferToSplit->DataStart();
76 0 : NS_ASSERTION(pos.mPosition >= bufferToSplit->DataStart() &&
77 : splitOffset <= bufferToSplit->DataLength(),
78 : "split offset is outside buffer");
79 :
80 0 : uint32_t len = bufferToSplit->DataLength() - splitOffset;
81 0 : Buffer* new_buffer = AllocBuffer(len);
82 0 : if (new_buffer)
83 : {
84 0 : nsCharTraits<char16_t>::copy(new_buffer->DataStart(),
85 0 : bufferToSplit->DataStart() + splitOffset,
86 0 : len);
87 0 : InsertAfter(new_buffer, bufferToSplit);
88 0 : bufferToSplit->SetDataLength(splitOffset);
89 : }
90 0 : }
91 :
92 : void
93 153 : nsScannerBufferList::DiscardUnreferencedPrefix( Buffer* aBuf )
94 : {
95 153 : if (aBuf == Head())
96 : {
97 195 : while (!mBuffers.isEmpty() && !Head()->IsInUse())
98 : {
99 21 : Buffer* buffer = Head();
100 21 : buffer->remove();
101 21 : free(buffer);
102 : }
103 : }
104 153 : }
105 :
106 : size_t
107 286 : nsScannerBufferList::Position::Distance( const Position& aStart, const Position& aEnd )
108 : {
109 286 : size_t result = 0;
110 286 : if (aStart.mBuffer == aEnd.mBuffer)
111 : {
112 286 : result = aEnd.mPosition - aStart.mPosition;
113 : }
114 : else
115 : {
116 0 : result = aStart.mBuffer->DataEnd() - aStart.mPosition;
117 0 : for (Buffer* b = aStart.mBuffer->Next(); b != aEnd.mBuffer; b = b->Next())
118 0 : result += b->DataLength();
119 0 : result += aEnd.mPosition - aEnd.mBuffer->DataStart();
120 : }
121 286 : return result;
122 : }
123 :
124 :
125 : /**
126 : * nsScannerSubstring
127 : */
128 :
129 22 : nsScannerSubstring::nsScannerSubstring()
130 : : mStart(nullptr, nullptr)
131 : , mEnd(nullptr, nullptr)
132 : , mBufferList(nullptr)
133 : , mLength(0)
134 22 : , mIsDirty(true)
135 : {
136 22 : }
137 :
138 0 : nsScannerSubstring::nsScannerSubstring( const nsAString& s )
139 : : mBufferList(nullptr)
140 0 : , mIsDirty(true)
141 : {
142 0 : Rebind(s);
143 0 : }
144 :
145 42 : nsScannerSubstring::~nsScannerSubstring()
146 : {
147 21 : release_ownership_of_buffer_list();
148 21 : }
149 :
150 : int32_t
151 0 : nsScannerSubstring::CountChar( char16_t c ) const
152 : {
153 : /*
154 : re-write this to use a counting sink
155 : */
156 :
157 0 : size_type result = 0;
158 0 : size_type lengthToExamine = Length();
159 :
160 0 : nsScannerIterator iter;
161 0 : for ( BeginReading(iter); ; )
162 : {
163 0 : int32_t lengthToExamineInThisFragment = iter.size_forward();
164 0 : const char16_t* fromBegin = iter.get();
165 0 : result += size_type(NS_COUNT(fromBegin, fromBegin+lengthToExamineInThisFragment, c));
166 0 : if ( !(lengthToExamine -= lengthToExamineInThisFragment) )
167 0 : return result;
168 0 : iter.advance(lengthToExamineInThisFragment);
169 0 : }
170 : // never reached; quiets warnings
171 : return 0;
172 : }
173 :
174 : void
175 0 : nsScannerSubstring::Rebind( const nsScannerSubstring& aString,
176 : const nsScannerIterator& aStart,
177 : const nsScannerIterator& aEnd )
178 : {
179 : // allow for the case where &aString == this
180 :
181 0 : aString.acquire_ownership_of_buffer_list();
182 0 : release_ownership_of_buffer_list();
183 :
184 0 : mStart = aStart;
185 0 : mEnd = aEnd;
186 0 : mBufferList = aString.mBufferList;
187 0 : mLength = Distance(aStart, aEnd);
188 0 : mIsDirty = true;
189 0 : }
190 :
191 : void
192 0 : nsScannerSubstring::Rebind( const nsAString& aString )
193 : {
194 0 : release_ownership_of_buffer_list();
195 :
196 0 : mBufferList = new nsScannerBufferList(AllocBufferFromString(aString));
197 0 : mIsDirty = true;
198 :
199 0 : init_range_from_buffer_list();
200 0 : acquire_ownership_of_buffer_list();
201 0 : }
202 :
203 : const nsAString&
204 0 : nsScannerSubstring::AsString() const
205 : {
206 0 : if (mIsDirty)
207 : {
208 0 : nsScannerSubstring* mutable_this = const_cast<nsScannerSubstring*>(this);
209 :
210 0 : if (mStart.mBuffer == mEnd.mBuffer) {
211 : // We only have a single fragment to deal with, so just return it
212 : // as a substring.
213 0 : mutable_this->mFlattenedRep.Rebind(mStart.mPosition, mEnd.mPosition);
214 : } else {
215 : // Otherwise, we need to copy the data into a flattened buffer.
216 0 : nsScannerIterator start, end;
217 0 : CopyUnicodeTo(BeginReading(start), EndReading(end), mutable_this->mFlattenedRep);
218 : }
219 :
220 0 : mutable_this->mIsDirty = false;
221 : }
222 :
223 0 : return mFlattenedRep;
224 : }
225 :
226 : nsScannerIterator&
227 198 : nsScannerSubstring::BeginReading( nsScannerIterator& iter ) const
228 : {
229 198 : iter.mOwner = this;
230 :
231 198 : iter.mFragment.mBuffer = mStart.mBuffer;
232 198 : iter.mFragment.mFragmentStart = mStart.mPosition;
233 198 : if (mStart.mBuffer == mEnd.mBuffer)
234 198 : iter.mFragment.mFragmentEnd = mEnd.mPosition;
235 : else
236 0 : iter.mFragment.mFragmentEnd = mStart.mBuffer->DataEnd();
237 :
238 198 : iter.mPosition = mStart.mPosition;
239 198 : iter.normalize_forward();
240 198 : return iter;
241 : }
242 :
243 : nsScannerIterator&
244 22 : nsScannerSubstring::EndReading( nsScannerIterator& iter ) const
245 : {
246 22 : iter.mOwner = this;
247 :
248 22 : iter.mFragment.mBuffer = mEnd.mBuffer;
249 22 : iter.mFragment.mFragmentEnd = mEnd.mPosition;
250 22 : if (mStart.mBuffer == mEnd.mBuffer)
251 22 : iter.mFragment.mFragmentStart = mStart.mPosition;
252 : else
253 0 : iter.mFragment.mFragmentStart = mEnd.mBuffer->DataStart();
254 :
255 22 : iter.mPosition = mEnd.mPosition;
256 : // must not |normalize_backward| as that would likely invalidate tests like |while ( first != last )|
257 22 : return iter;
258 : }
259 :
260 : bool
261 177 : nsScannerSubstring::GetNextFragment( nsScannerFragment& frag ) const
262 : {
263 : // check to see if we are at the end of the buffer list
264 177 : if (frag.mBuffer == mEnd.mBuffer)
265 177 : return false;
266 :
267 0 : frag.mBuffer = frag.mBuffer->getNext();
268 :
269 0 : if (frag.mBuffer == mStart.mBuffer)
270 0 : frag.mFragmentStart = mStart.mPosition;
271 : else
272 0 : frag.mFragmentStart = frag.mBuffer->DataStart();
273 :
274 0 : if (frag.mBuffer == mEnd.mBuffer)
275 0 : frag.mFragmentEnd = mEnd.mPosition;
276 : else
277 0 : frag.mFragmentEnd = frag.mBuffer->DataEnd();
278 :
279 0 : return true;
280 : }
281 :
282 : bool
283 0 : nsScannerSubstring::GetPrevFragment( nsScannerFragment& frag ) const
284 : {
285 : // check to see if we are at the beginning of the buffer list
286 0 : if (frag.mBuffer == mStart.mBuffer)
287 0 : return false;
288 :
289 0 : frag.mBuffer = frag.mBuffer->getPrevious();
290 :
291 0 : if (frag.mBuffer == mStart.mBuffer)
292 0 : frag.mFragmentStart = mStart.mPosition;
293 : else
294 0 : frag.mFragmentStart = frag.mBuffer->DataStart();
295 :
296 0 : if (frag.mBuffer == mEnd.mBuffer)
297 0 : frag.mFragmentEnd = mEnd.mPosition;
298 : else
299 0 : frag.mFragmentEnd = frag.mBuffer->DataEnd();
300 :
301 0 : return true;
302 : }
303 :
304 :
305 : /**
306 : * nsScannerString
307 : */
308 :
309 22 : nsScannerString::nsScannerString( Buffer* aBuf )
310 : {
311 22 : mBufferList = new nsScannerBufferList(aBuf);
312 :
313 22 : init_range_from_buffer_list();
314 22 : acquire_ownership_of_buffer_list();
315 22 : }
316 :
317 : void
318 0 : nsScannerString::AppendBuffer( Buffer* aBuf )
319 : {
320 0 : mBufferList->Append(aBuf);
321 0 : mLength += aBuf->DataLength();
322 :
323 0 : mEnd.mBuffer = aBuf;
324 0 : mEnd.mPosition = aBuf->DataEnd();
325 :
326 0 : mIsDirty = true;
327 0 : }
328 :
329 : void
330 132 : nsScannerString::DiscardPrefix( const nsScannerIterator& aIter )
331 : {
332 132 : Position old_start(mStart);
333 132 : mStart = aIter;
334 132 : mLength -= Position::Distance(old_start, mStart);
335 :
336 132 : mStart.mBuffer->IncrementUsageCount();
337 132 : old_start.mBuffer->DecrementUsageCount();
338 :
339 132 : mBufferList->DiscardUnreferencedPrefix(old_start.mBuffer);
340 :
341 132 : mIsDirty = true;
342 132 : }
343 :
344 : void
345 0 : nsScannerString::UngetReadable( const nsAString& aReadable, const nsScannerIterator& aInsertPoint )
346 : /*
347 : * Warning: this routine manipulates the shared buffer list in an unexpected way.
348 : * The original design did not really allow for insertions, but this call promises
349 : * that if called for a point after the end of all extant token strings, that no token string
350 : * or the work string will be invalidated.
351 : *
352 : * This routine is protected because it is the responsibility of the derived class to keep those promises.
353 : */
354 : {
355 0 : Position insertPos(aInsertPoint);
356 :
357 0 : mBufferList->SplitBuffer(insertPos);
358 : // splitting to the right keeps the work string and any extant token pointing to and
359 : // holding a reference count on the same buffer
360 :
361 0 : Buffer* new_buffer = AllocBufferFromString(aReadable);
362 : // make a new buffer with all the data to insert...
363 : // BULLSHIT ALERT: we may have empty space to re-use in the split buffer, measure the cost
364 : // of this and decide if we should do the work to fill it
365 :
366 0 : Buffer* buffer_to_split = insertPos.mBuffer;
367 0 : mBufferList->InsertAfter(new_buffer, buffer_to_split);
368 0 : mLength += aReadable.Length();
369 :
370 0 : mEnd.mBuffer = mBufferList->Tail();
371 0 : mEnd.mPosition = mEnd.mBuffer->DataEnd();
372 :
373 0 : mIsDirty = true;
374 0 : }
375 :
376 : /**
377 : * nsScannerSharedSubstring
378 : */
379 :
380 : void
381 0 : nsScannerSharedSubstring::Rebind(const nsScannerIterator &aStart,
382 : const nsScannerIterator &aEnd)
383 : {
384 : // If the start and end positions are inside the same buffer, we must
385 : // acquire ownership of the buffer. If not, we can optimize by not holding
386 : // onto it.
387 :
388 0 : Buffer *buffer = const_cast<Buffer*>(aStart.buffer());
389 0 : bool sameBuffer = buffer == aEnd.buffer();
390 :
391 : nsScannerBufferList *bufferList;
392 :
393 0 : if (sameBuffer) {
394 0 : bufferList = aStart.mOwner->mBufferList;
395 0 : bufferList->AddRef();
396 0 : buffer->IncrementUsageCount();
397 : }
398 :
399 0 : if (mBufferList)
400 0 : ReleaseBuffer();
401 :
402 0 : if (sameBuffer) {
403 0 : mBuffer = buffer;
404 0 : mBufferList = bufferList;
405 0 : mString.Rebind(aStart.mPosition, aEnd.mPosition);
406 : } else {
407 0 : mBuffer = nullptr;
408 0 : mBufferList = nullptr;
409 0 : CopyUnicodeTo(aStart, aEnd, mString);
410 : }
411 0 : }
412 :
413 : void
414 0 : nsScannerSharedSubstring::ReleaseBuffer()
415 : {
416 0 : NS_ASSERTION(mBufferList, "Should only be called with non-null mBufferList");
417 0 : mBuffer->DecrementUsageCount();
418 0 : mBufferList->DiscardUnreferencedPrefix(mBuffer);
419 0 : mBufferList->Release();
420 0 : }
421 :
422 : void
423 0 : nsScannerSharedSubstring::MakeMutable()
424 : {
425 0 : nsString temp(mString); // this will force a copy of the data
426 0 : mString.Assign(temp); // mString will now share the just-allocated buffer
427 :
428 0 : ReleaseBuffer();
429 :
430 0 : mBuffer = nullptr;
431 0 : mBufferList = nullptr;
432 0 : }
433 :
434 : /**
435 : * utils -- based on code from nsReadableUtils.cpp
436 : */
437 :
438 : // private helper function
439 : static inline
440 : nsAString::iterator&
441 23 : copy_multifragment_string( nsScannerIterator& first, const nsScannerIterator& last, nsAString::iterator& result )
442 : {
443 : typedef nsCharSourceTraits<nsScannerIterator> source_traits;
444 : typedef nsCharSinkTraits<nsAString::iterator> sink_traits;
445 :
446 24 : while ( first != last )
447 : {
448 1 : uint32_t distance = source_traits::readable_distance(first, last);
449 1 : sink_traits::write(result, source_traits::read(first), distance);
450 1 : NS_ASSERTION(distance > 0, "|copy_multifragment_string| will never terminate");
451 1 : source_traits::advance(first, distance);
452 : }
453 :
454 22 : return result;
455 : }
456 :
457 : bool
458 22 : CopyUnicodeTo( const nsScannerIterator& aSrcStart,
459 : const nsScannerIterator& aSrcEnd,
460 : nsAString& aDest )
461 : {
462 22 : nsAString::iterator writer;
463 :
464 22 : mozilla::CheckedInt<nsAString::size_type> distance(Distance(aSrcStart, aSrcEnd));
465 22 : if (!distance.isValid()) {
466 0 : return false; // overflow detected
467 : }
468 :
469 22 : if (!aDest.SetLength(distance.value(), mozilla::fallible)) {
470 0 : aDest.Truncate();
471 0 : return false; // out of memory
472 : }
473 22 : aDest.BeginWriting(writer);
474 22 : nsScannerIterator fromBegin(aSrcStart);
475 :
476 22 : copy_multifragment_string(fromBegin, aSrcEnd, writer);
477 22 : return true;
478 : }
479 :
480 : bool
481 0 : AppendUnicodeTo( const nsScannerIterator& aSrcStart,
482 : const nsScannerIterator& aSrcEnd,
483 : nsScannerSharedSubstring& aDest )
484 : {
485 : // Check whether we can just create a dependent string.
486 0 : if (aDest.str().IsEmpty()) {
487 : // We can just make |aDest| point to the buffer.
488 : // This will take care of copying if the buffer spans fragments.
489 0 : aDest.Rebind(aSrcStart, aSrcEnd);
490 0 : return true;
491 : }
492 : // The dest string is not empty, so it can't be a dependent substring.
493 0 : return AppendUnicodeTo(aSrcStart, aSrcEnd, aDest.writable());
494 : }
495 :
496 : bool
497 0 : AppendUnicodeTo( const nsScannerIterator& aSrcStart,
498 : const nsScannerIterator& aSrcEnd,
499 : nsAString& aDest )
500 : {
501 0 : nsAString::iterator writer;
502 0 : const nsAString::size_type oldLength = aDest.Length();
503 0 : CheckedInt<nsAString::size_type> newLen(Distance(aSrcStart, aSrcEnd));
504 0 : newLen += oldLength;
505 0 : if (!newLen.isValid()) {
506 0 : return false; // overflow detected
507 : }
508 :
509 0 : if (!aDest.SetLength(newLen.value(), mozilla::fallible))
510 0 : return false; // out of memory
511 0 : aDest.BeginWriting(writer).advance(oldLength);
512 0 : nsScannerIterator fromBegin(aSrcStart);
513 :
514 0 : copy_multifragment_string(fromBegin, aSrcEnd, writer);
515 0 : return true;
516 : }
517 :
518 : bool
519 0 : FindCharInReadable( char16_t aChar,
520 : nsScannerIterator& aSearchStart,
521 : const nsScannerIterator& aSearchEnd )
522 : {
523 0 : while ( aSearchStart != aSearchEnd )
524 : {
525 : int32_t fragmentLength;
526 0 : if ( SameFragment(aSearchStart, aSearchEnd) )
527 0 : fragmentLength = aSearchEnd.get() - aSearchStart.get();
528 : else
529 0 : fragmentLength = aSearchStart.size_forward();
530 :
531 0 : const char16_t* charFoundAt = nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
532 0 : if ( charFoundAt ) {
533 0 : aSearchStart.advance( charFoundAt - aSearchStart.get() );
534 0 : return true;
535 : }
536 :
537 0 : aSearchStart.advance(fragmentLength);
538 : }
539 :
540 0 : return false;
541 : }
542 :
543 : bool
544 0 : FindInReadable( const nsAString& aPattern,
545 : nsScannerIterator& aSearchStart,
546 : nsScannerIterator& aSearchEnd,
547 : const nsStringComparator& compare )
548 : {
549 0 : bool found_it = false;
550 :
551 : // only bother searching at all if we're given a non-empty range to search
552 0 : if ( aSearchStart != aSearchEnd )
553 : {
554 0 : nsAString::const_iterator aPatternStart, aPatternEnd;
555 0 : aPattern.BeginReading(aPatternStart);
556 0 : aPattern.EndReading(aPatternEnd);
557 :
558 : // outer loop keeps searching till we find it or run out of string to search
559 0 : while ( !found_it )
560 : {
561 : // fast inner loop (that's what it's called, not what it is) looks for a potential match
562 0 : while ( aSearchStart != aSearchEnd &&
563 0 : compare(aPatternStart.get(), aSearchStart.get(), 1, 1) )
564 0 : ++aSearchStart;
565 :
566 : // if we broke out of the `fast' loop because we're out of string ... we're done: no match
567 0 : if ( aSearchStart == aSearchEnd )
568 0 : break;
569 :
570 : // otherwise, we're at a potential match, let's see if we really hit one
571 0 : nsAString::const_iterator testPattern(aPatternStart);
572 0 : nsScannerIterator testSearch(aSearchStart);
573 :
574 : // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
575 : for(;;)
576 : {
577 : // we already compared the first character in the outer loop,
578 : // so we'll advance before the next comparison
579 0 : ++testPattern;
580 0 : ++testSearch;
581 :
582 : // if we verified all the way to the end of the pattern, then we found it!
583 0 : if ( testPattern == aPatternEnd )
584 : {
585 0 : found_it = true;
586 0 : aSearchEnd = testSearch; // return the exact found range through the parameters
587 0 : break;
588 : }
589 :
590 : // if we got to end of the string we're searching before we hit the end of the
591 : // pattern, we'll never find what we're looking for
592 0 : if ( testSearch == aSearchEnd )
593 : {
594 0 : aSearchStart = aSearchEnd;
595 0 : break;
596 : }
597 :
598 : // else if we mismatched ... it's time to advance to the next search position
599 : // and get back into the `fast' loop
600 0 : if ( compare(testPattern.get(), testSearch.get(), 1, 1) )
601 : {
602 0 : ++aSearchStart;
603 0 : break;
604 : }
605 : }
606 : }
607 : }
608 :
609 0 : return found_it;
610 : }
611 :
612 : /**
613 : * This implementation is simple, but does too much work.
614 : * It searches the entire string from left to right, and returns the last match found, if any.
615 : * This implementation will be replaced when I get |reverse_iterator|s working.
616 : */
617 : bool
618 0 : RFindInReadable( const nsAString& aPattern,
619 : nsScannerIterator& aSearchStart,
620 : nsScannerIterator& aSearchEnd,
621 : const nsStringComparator& aComparator )
622 : {
623 0 : bool found_it = false;
624 :
625 0 : nsScannerIterator savedSearchEnd(aSearchEnd);
626 0 : nsScannerIterator searchStart(aSearchStart), searchEnd(aSearchEnd);
627 :
628 0 : while ( searchStart != searchEnd )
629 : {
630 0 : if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
631 : {
632 0 : found_it = true;
633 :
634 : // this is the best match so far, so remember it
635 0 : aSearchStart = searchStart;
636 0 : aSearchEnd = searchEnd;
637 :
638 : // ...and get ready to search some more
639 : // (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
640 0 : ++searchStart;
641 0 : searchEnd = savedSearchEnd;
642 : }
643 : }
644 :
645 : // if we never found it, return an empty range
646 0 : if ( !found_it )
647 0 : aSearchStart = aSearchEnd;
648 :
649 0 : return found_it;
650 : }
|