LCOV - code coverage report
Current view: top level - intl/icu/source/common - edits.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 209 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 14 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2017 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : 
       4             : // edits.cpp
       5             : // created: 2017feb08 Markus W. Scherer
       6             : 
       7             : #include "unicode/utypes.h"
       8             : #include "unicode/edits.h"
       9             : #include "cmemory.h"
      10             : #include "uassert.h"
      11             : 
      12             : U_NAMESPACE_BEGIN
      13             : 
      14             : namespace {
      15             : 
      16             : // 0000uuuuuuuuuuuu records u+1 unchanged text units.
      17             : const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
      18             : const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
      19             : 
      20             : // 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units.
      21             : // No length change.
      22             : const int32_t MAX_SHORT_WIDTH = 6;
      23             : const int32_t MAX_SHORT_CHANGE_LENGTH = 0xfff;
      24             : const int32_t MAX_SHORT_CHANGE = 0x6fff;
      25             : 
      26             : // 0111mmmmmmnnnnnn records a replacement of m text units with n.
      27             : // m or n = 61: actual length follows in the next edits array unit.
      28             : // m or n = 62..63: actual length follows in the next two edits array units.
      29             : // Bit 30 of the actual length is in the head unit.
      30             : // Trailing units have bit 15 set.
      31             : const int32_t LENGTH_IN_1TRAIL = 61;
      32             : const int32_t LENGTH_IN_2TRAIL = 62;
      33             : 
      34             : }  // namespace
      35             : 
      36           0 : Edits::~Edits() {
      37           0 :     if(array != stackArray) {
      38           0 :         uprv_free(array);
      39             :     }
      40           0 : }
      41             : 
      42           0 : void Edits::reset() {
      43           0 :     length = delta = 0;
      44           0 : }
      45             : 
      46           0 : void Edits::addUnchanged(int32_t unchangedLength) {
      47           0 :     if(U_FAILURE(errorCode) || unchangedLength == 0) { return; }
      48           0 :     if(unchangedLength < 0) {
      49           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
      50           0 :         return;
      51             :     }
      52             :     // Merge into previous unchanged-text record, if any.
      53           0 :     int32_t last = lastUnit();
      54           0 :     if(last < MAX_UNCHANGED) {
      55           0 :         int32_t remaining = MAX_UNCHANGED - last;
      56           0 :         if (remaining >= unchangedLength) {
      57           0 :             setLastUnit(last + unchangedLength);
      58           0 :             return;
      59             :         }
      60           0 :         setLastUnit(MAX_UNCHANGED);
      61           0 :         unchangedLength -= remaining;
      62             :     }
      63             :     // Split large lengths into multiple units.
      64           0 :     while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
      65           0 :         append(MAX_UNCHANGED);
      66           0 :         unchangedLength -= MAX_UNCHANGED_LENGTH;
      67             :     }
      68             :     // Write a small (remaining) length.
      69           0 :     if(unchangedLength > 0) {
      70           0 :         append(unchangedLength - 1);
      71             :     }
      72             : }
      73             : 
      74           0 : void Edits::addReplace(int32_t oldLength, int32_t newLength) {
      75           0 :     if(U_FAILURE(errorCode)) { return; }
      76           0 :     if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
      77             :         // Replacement of short oldLength text units by same-length new text.
      78             :         // Merge into previous short-replacement record, if any.
      79           0 :         int32_t last = lastUnit();
      80           0 :         if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
      81           0 :                 (last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
      82           0 :             setLastUnit(last + 1);
      83           0 :             return;
      84             :         }
      85           0 :         append(oldLength << 12);
      86           0 :         return;
      87             :     }
      88             : 
      89           0 :     if(oldLength < 0 || newLength < 0) {
      90           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
      91           0 :         return;
      92             :     }
      93           0 :     if (oldLength == 0 && newLength == 0) {
      94           0 :         return;
      95             :     }
      96           0 :     int32_t newDelta = newLength - oldLength;
      97           0 :     if (newDelta != 0) {
      98           0 :         if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
      99           0 :                 (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
     100             :             // Integer overflow or underflow.
     101           0 :             errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     102           0 :             return;
     103             :         }
     104           0 :         delta += newDelta;
     105             :     }
     106             : 
     107           0 :     int32_t head = 0x7000;
     108           0 :     if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
     109           0 :         head |= oldLength << 6;
     110           0 :         head |= newLength;
     111           0 :         append(head);
     112           0 :     } else if ((capacity - length) >= 5 || growArray()) {
     113           0 :         int32_t limit = length + 1;
     114           0 :         if(oldLength < LENGTH_IN_1TRAIL) {
     115           0 :             head |= oldLength << 6;
     116           0 :         } else if(oldLength <= 0x7fff) {
     117           0 :             head |= LENGTH_IN_1TRAIL << 6;
     118           0 :             array[limit++] = (uint16_t)(0x8000 | oldLength);
     119             :         } else {
     120           0 :             head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
     121           0 :             array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15));
     122           0 :             array[limit++] = (uint16_t)(0x8000 | oldLength);
     123             :         }
     124           0 :         if(newLength < LENGTH_IN_1TRAIL) {
     125           0 :             head |= newLength;
     126           0 :         } else if(newLength <= 0x7fff) {
     127           0 :             head |= LENGTH_IN_1TRAIL;
     128           0 :             array[limit++] = (uint16_t)(0x8000 | newLength);
     129             :         } else {
     130           0 :             head |= LENGTH_IN_2TRAIL + (newLength >> 30);
     131           0 :             array[limit++] = (uint16_t)(0x8000 | (newLength >> 15));
     132           0 :             array[limit++] = (uint16_t)(0x8000 | newLength);
     133             :         }
     134           0 :         array[length] = (uint16_t)head;
     135           0 :         length = limit;
     136             :     }
     137             : }
     138             : 
     139           0 : void Edits::append(int32_t r) {
     140           0 :     if(length < capacity || growArray()) {
     141           0 :         array[length++] = (uint16_t)r;
     142             :     }
     143           0 : }
     144             : 
     145           0 : UBool Edits::growArray() {
     146             :     int32_t newCapacity;
     147           0 :     if (array == stackArray) {
     148           0 :         newCapacity = 2000;
     149           0 :     } else if (capacity == INT32_MAX) {
     150             :         // Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
     151             :         // with a result-string-buffer overflow.
     152           0 :         errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     153           0 :         return FALSE;
     154           0 :     } else if (capacity >= (INT32_MAX / 2)) {
     155           0 :         newCapacity = INT32_MAX;
     156             :     } else {
     157           0 :         newCapacity = 2 * capacity;
     158             :     }
     159             :     // Grow by at least 5 units so that a maximal change record will fit.
     160           0 :     if ((newCapacity - capacity) < 5) {
     161           0 :         errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     162           0 :         return FALSE;
     163             :     }
     164           0 :     uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
     165           0 :     if (newArray == NULL) {
     166           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
     167           0 :         return FALSE;
     168             :     }
     169           0 :     uprv_memcpy(newArray, array, (size_t)length * 2);
     170           0 :     if (array != stackArray) {
     171           0 :         uprv_free(array);
     172             :     }
     173           0 :     array = newArray;
     174           0 :     capacity = newCapacity;
     175           0 :     return TRUE;
     176             : }
     177             : 
     178           0 : UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
     179           0 :     if (U_FAILURE(outErrorCode)) { return TRUE; }
     180           0 :     if (U_SUCCESS(errorCode)) { return FALSE; }
     181           0 :     outErrorCode = errorCode;
     182           0 :     return TRUE;
     183             : }
     184             : 
     185           0 : UBool Edits::hasChanges() const {
     186           0 :     if (delta != 0) {
     187           0 :         return TRUE;
     188             :     }
     189           0 :     for (int32_t i = 0; i < length; ++i) {
     190           0 :         if (array[i] > MAX_UNCHANGED) {
     191           0 :             return TRUE;
     192             :         }
     193             :     }
     194           0 :     return FALSE;
     195             : }
     196             : 
     197           0 : Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
     198             :         array(a), index(0), length(len), remaining(0),
     199             :         onlyChanges_(oc), coarse(crs),
     200             :         changed(FALSE), oldLength_(0), newLength_(0),
     201           0 :         srcIndex(0), replIndex(0), destIndex(0) {}
     202             : 
     203           0 : int32_t Edits::Iterator::readLength(int32_t head) {
     204           0 :     if (head < LENGTH_IN_1TRAIL) {
     205           0 :         return head;
     206           0 :     } else if (head < LENGTH_IN_2TRAIL) {
     207           0 :         U_ASSERT(index < length);
     208           0 :         U_ASSERT(array[index] >= 0x8000);
     209           0 :         return array[index++] & 0x7fff;
     210             :     } else {
     211           0 :         U_ASSERT((index + 2) <= length);
     212           0 :         U_ASSERT(array[index] >= 0x8000);
     213           0 :         U_ASSERT(array[index + 1] >= 0x8000);
     214           0 :         int32_t len = ((head & 1) << 30) |
     215           0 :                 ((int32_t)(array[index] & 0x7fff) << 15) |
     216           0 :                 (array[index + 1] & 0x7fff);
     217           0 :         index += 2;
     218           0 :         return len;
     219             :     }
     220             : }
     221             : 
     222           0 : void Edits::Iterator::updateIndexes() {
     223           0 :     srcIndex += oldLength_;
     224           0 :     if (changed) {
     225           0 :         replIndex += newLength_;
     226             :     }
     227           0 :     destIndex += newLength_;
     228           0 : }
     229             : 
     230           0 : UBool Edits::Iterator::noNext() {
     231             :     // No change beyond the string.
     232           0 :     changed = FALSE;
     233           0 :     oldLength_ = newLength_ = 0;
     234           0 :     return FALSE;
     235             : }
     236             : 
     237           0 : UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
     238           0 :     if (U_FAILURE(errorCode)) { return FALSE; }
     239             :     // We have an errorCode in case we need to start guarding against integer overflows.
     240             :     // It is also convenient for caller loops if we bail out when an error was set elsewhere.
     241           0 :     updateIndexes();
     242           0 :     if (remaining > 0) {
     243             :         // Fine-grained iterator: Continue a sequence of equal-length changes.
     244           0 :         --remaining;
     245           0 :         return TRUE;
     246             :     }
     247           0 :     if (index >= length) {
     248           0 :         return noNext();
     249             :     }
     250           0 :     int32_t u = array[index++];
     251           0 :     if (u <= MAX_UNCHANGED) {
     252             :         // Combine adjacent unchanged ranges.
     253           0 :         changed = FALSE;
     254           0 :         oldLength_ = u + 1;
     255           0 :         while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
     256           0 :             ++index;
     257           0 :             oldLength_ += u + 1;
     258             :         }
     259           0 :         newLength_ = oldLength_;
     260           0 :         if (onlyChanges) {
     261           0 :             updateIndexes();
     262           0 :             if (index >= length) {
     263           0 :                 return noNext();
     264             :             }
     265             :             // already fetched u > MAX_UNCHANGED at index
     266           0 :             ++index;
     267             :         } else {
     268           0 :             return TRUE;
     269             :         }
     270             :     }
     271           0 :     changed = TRUE;
     272           0 :     if (u <= MAX_SHORT_CHANGE) {
     273           0 :         if (coarse) {
     274           0 :             int32_t w = u >> 12;
     275           0 :             int32_t len = (u & 0xfff) + 1;
     276           0 :             oldLength_ = newLength_ = len * w;
     277             :         } else {
     278             :             // Split a sequence of equal-length changes that was compressed into one unit.
     279           0 :             oldLength_ = newLength_ = u >> 12;
     280           0 :             remaining = u & 0xfff;
     281           0 :             return TRUE;
     282             :         }
     283             :     } else {
     284           0 :         U_ASSERT(u <= 0x7fff);
     285           0 :         oldLength_ = readLength((u >> 6) & 0x3f);
     286           0 :         newLength_ = readLength(u & 0x3f);
     287           0 :         if (!coarse) {
     288           0 :             return TRUE;
     289             :         }
     290             :     }
     291             :     // Combine adjacent changes.
     292           0 :     while (index < length && (u = array[index]) > MAX_UNCHANGED) {
     293           0 :         ++index;
     294           0 :         if (u <= MAX_SHORT_CHANGE) {
     295           0 :             int32_t w = u >> 12;
     296           0 :             int32_t len = (u & 0xfff) + 1;
     297           0 :             len = len * w;
     298           0 :             oldLength_ += len;
     299           0 :             newLength_ += len;
     300             :         } else {
     301           0 :             U_ASSERT(u <= 0x7fff);
     302           0 :             int32_t oldLen = readLength((u >> 6) & 0x3f);
     303           0 :             int32_t newLen = readLength(u & 0x3f);
     304           0 :             oldLength_ += oldLen;
     305           0 :             newLength_ += newLen;
     306             :         }
     307             :     }
     308           0 :     return TRUE;
     309             : }
     310             : 
     311           0 : UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) {
     312           0 :     if (U_FAILURE(errorCode) || i < 0) { return FALSE; }
     313           0 :     if (i < srcIndex) {
     314             :         // Reset the iterator to the start.
     315           0 :         index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
     316           0 :     } else if (i < (srcIndex + oldLength_)) {
     317             :         // The index is in the current span.
     318           0 :         return TRUE;
     319             :     }
     320           0 :     while (next(FALSE, errorCode)) {
     321           0 :         if (i < (srcIndex + oldLength_)) {
     322             :             // The index is in the current span.
     323           0 :             return TRUE;
     324             :         }
     325           0 :         if (remaining > 0) {
     326             :             // Is the index in one of the remaining compressed edits?
     327             :             // srcIndex is the start of the current span, before the remaining ones.
     328           0 :             int32_t len = (remaining + 1) * oldLength_;
     329           0 :             if (i < (srcIndex + len)) {
     330           0 :                 int32_t n = (i - srcIndex) / oldLength_;  // 1 <= n <= remaining
     331           0 :                 len = n * oldLength_;
     332           0 :                 srcIndex += len;
     333           0 :                 replIndex += len;
     334           0 :                 destIndex += len;
     335           0 :                 remaining -= n;
     336           0 :                 return TRUE;
     337             :             }
     338             :             // Make next() skip all of these edits at once.
     339           0 :             oldLength_ = newLength_ = len;
     340           0 :             remaining = 0;
     341             :         }
     342             :     }
     343           0 :     return FALSE;
     344             : }
     345             : 
     346             : U_NAMESPACE_END

Generated by: LCOV version 1.13