LCOV - code coverage report
Current view: top level - intl/icu/source/i18n - collationcompare.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 172 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 1 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : * Copyright (C) 1996-2015, International Business Machines
       6             : * Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : * collationcompare.cpp
       9             : *
      10             : * created on: 2012feb14 with new and old collation code
      11             : * created by: Markus W. Scherer
      12             : */
      13             : 
      14             : #include "unicode/utypes.h"
      15             : 
      16             : #if !UCONFIG_NO_COLLATION
      17             : 
      18             : #include "unicode/ucol.h"
      19             : #include "cmemory.h"
      20             : #include "collation.h"
      21             : #include "collationcompare.h"
      22             : #include "collationiterator.h"
      23             : #include "collationsettings.h"
      24             : #include "uassert.h"
      25             : 
      26             : U_NAMESPACE_BEGIN
      27             : 
      28             : UCollationResult
      29           0 : CollationCompare::compareUpToQuaternary(CollationIterator &left, CollationIterator &right,
      30             :                                         const CollationSettings &settings,
      31             :                                         UErrorCode &errorCode) {
      32           0 :     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
      33             : 
      34           0 :     int32_t options = settings.options;
      35             :     uint32_t variableTop;
      36           0 :     if((options & CollationSettings::ALTERNATE_MASK) == 0) {
      37           0 :         variableTop = 0;
      38             :     } else {
      39             :         // +1 so that we can use "<" and primary ignorables test out early.
      40           0 :         variableTop = settings.variableTop + 1;
      41             :     }
      42           0 :     UBool anyVariable = FALSE;
      43             : 
      44             :     // Fetch CEs, compare primaries, store secondary & tertiary weights.
      45             :     for(;;) {
      46             :         // We fetch CEs until we get a non-ignorable primary or reach the end.
      47             :         uint32_t leftPrimary;
      48           0 :         do {
      49           0 :             int64_t ce = left.nextCE(errorCode);
      50           0 :             leftPrimary = (uint32_t)(ce >> 32);
      51           0 :             if(leftPrimary < variableTop && leftPrimary > Collation::MERGE_SEPARATOR_PRIMARY) {
      52             :                 // Variable CE, shift it to quaternary level.
      53             :                 // Ignore all following primary ignorables, and shift further variable CEs.
      54           0 :                 anyVariable = TRUE;
      55           0 :                 do {
      56             :                     // Store only the primary of the variable CE.
      57           0 :                     left.setCurrentCE(ce & INT64_C(0xffffffff00000000));
      58             :                     for(;;) {
      59           0 :                         ce = left.nextCE(errorCode);
      60           0 :                         leftPrimary = (uint32_t)(ce >> 32);
      61           0 :                         if(leftPrimary == 0) {
      62           0 :                             left.setCurrentCE(0);
      63             :                         } else {
      64           0 :                             break;
      65             :                         }
      66             :                     }
      67           0 :                 } while(leftPrimary < variableTop &&
      68             :                         leftPrimary > Collation::MERGE_SEPARATOR_PRIMARY);
      69             :             }
      70           0 :         } while(leftPrimary == 0);
      71             : 
      72             :         uint32_t rightPrimary;
      73           0 :         do {
      74           0 :             int64_t ce = right.nextCE(errorCode);
      75           0 :             rightPrimary = (uint32_t)(ce >> 32);
      76           0 :             if(rightPrimary < variableTop && rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY) {
      77             :                 // Variable CE, shift it to quaternary level.
      78             :                 // Ignore all following primary ignorables, and shift further variable CEs.
      79           0 :                 anyVariable = TRUE;
      80           0 :                 do {
      81             :                     // Store only the primary of the variable CE.
      82           0 :                     right.setCurrentCE(ce & INT64_C(0xffffffff00000000));
      83             :                     for(;;) {
      84           0 :                         ce = right.nextCE(errorCode);
      85           0 :                         rightPrimary = (uint32_t)(ce >> 32);
      86           0 :                         if(rightPrimary == 0) {
      87           0 :                             right.setCurrentCE(0);
      88             :                         } else {
      89           0 :                             break;
      90             :                         }
      91             :                     }
      92           0 :                 } while(rightPrimary < variableTop &&
      93             :                         rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY);
      94             :             }
      95           0 :         } while(rightPrimary == 0);
      96             : 
      97           0 :         if(leftPrimary != rightPrimary) {
      98             :             // Return the primary difference, with script reordering.
      99           0 :             if(settings.hasReordering()) {
     100           0 :                 leftPrimary = settings.reorder(leftPrimary);
     101           0 :                 rightPrimary = settings.reorder(rightPrimary);
     102             :             }
     103           0 :             return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER;
     104             :         }
     105           0 :         if(leftPrimary == Collation::NO_CE_PRIMARY) { break; }
     106           0 :     }
     107           0 :     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
     108             : 
     109             :     // Compare the buffered secondary & tertiary weights.
     110             :     // We might skip the secondary level but continue with the case level
     111             :     // which is turned on separately.
     112           0 :     if(CollationSettings::getStrength(options) >= UCOL_SECONDARY) {
     113           0 :         if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
     114           0 :             int32_t leftIndex = 0;
     115           0 :             int32_t rightIndex = 0;
     116             :             for(;;) {
     117             :                 uint32_t leftSecondary;
     118           0 :                 do {
     119           0 :                     leftSecondary = ((uint32_t)left.getCE(leftIndex++)) >> 16;
     120           0 :                 } while(leftSecondary == 0);
     121             : 
     122             :                 uint32_t rightSecondary;
     123           0 :                 do {
     124           0 :                     rightSecondary = ((uint32_t)right.getCE(rightIndex++)) >> 16;
     125           0 :                 } while(rightSecondary == 0);
     126             : 
     127           0 :                 if(leftSecondary != rightSecondary) {
     128           0 :                     return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER;
     129             :                 }
     130           0 :                 if(leftSecondary == Collation::NO_CE_WEIGHT16) { break; }
     131           0 :             }
     132             :         } else {
     133             :             // The backwards secondary level compares secondary weights backwards
     134             :             // within segments separated by the merge separator (U+FFFE, weight 02).
     135           0 :             int32_t leftStart = 0;
     136           0 :             int32_t rightStart = 0;
     137             :             for(;;) {
     138             :                 // Find the merge separator or the NO_CE terminator.
     139             :                 uint32_t p;
     140           0 :                 int32_t leftLimit = leftStart;
     141           0 :                 while((p = (uint32_t)(left.getCE(leftLimit) >> 32)) >
     142           0 :                             Collation::MERGE_SEPARATOR_PRIMARY ||
     143             :                         p == 0) {
     144           0 :                     ++leftLimit;
     145             :                 }
     146           0 :                 int32_t rightLimit = rightStart;
     147           0 :                 while((p = (uint32_t)(right.getCE(rightLimit) >> 32)) >
     148           0 :                             Collation::MERGE_SEPARATOR_PRIMARY ||
     149             :                         p == 0) {
     150           0 :                     ++rightLimit;
     151             :                 }
     152             : 
     153             :                 // Compare the segments.
     154           0 :                 int32_t leftIndex = leftLimit;
     155           0 :                 int32_t rightIndex = rightLimit;
     156             :                 for(;;) {
     157           0 :                     int32_t leftSecondary = 0;
     158           0 :                     while(leftSecondary == 0 && leftIndex > leftStart) {
     159           0 :                         leftSecondary = ((uint32_t)left.getCE(--leftIndex)) >> 16;
     160             :                     }
     161             : 
     162           0 :                     int32_t rightSecondary = 0;
     163           0 :                     while(rightSecondary == 0 && rightIndex > rightStart) {
     164           0 :                         rightSecondary = ((uint32_t)right.getCE(--rightIndex)) >> 16;
     165             :                     }
     166             : 
     167           0 :                     if(leftSecondary != rightSecondary) {
     168           0 :                         return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER;
     169             :                     }
     170           0 :                     if(leftSecondary == 0) { break; }
     171           0 :                 }
     172             : 
     173             :                 // Did we reach the end of either string?
     174             :                 // Both strings have the same number of merge separators,
     175             :                 // or else there would have been a primary-level difference.
     176           0 :                 U_ASSERT(left.getCE(leftLimit) == right.getCE(rightLimit));
     177           0 :                 if(p == Collation::NO_CE_PRIMARY) { break; }
     178             :                 // Skip both merge separators and continue.
     179           0 :                 leftStart = leftLimit + 1;
     180           0 :                 rightStart = rightLimit + 1;
     181           0 :             }
     182             :         }
     183             :     }
     184             : 
     185           0 :     if((options & CollationSettings::CASE_LEVEL) != 0) {
     186           0 :         int32_t strength = CollationSettings::getStrength(options);
     187           0 :         int32_t leftIndex = 0;
     188           0 :         int32_t rightIndex = 0;
     189             :         for(;;) {
     190             :             uint32_t leftCase, leftLower32, rightCase;
     191           0 :             if(strength == UCOL_PRIMARY) {
     192             :                 // Primary+caseLevel: Ignore case level weights of primary ignorables.
     193             :                 // Otherwise we would get a-umlaut > a
     194             :                 // which is not desirable for accent-insensitive sorting.
     195             :                 // Check for (lower 32 bits) == 0 as well because variable CEs are stored
     196             :                 // with only primary weights.
     197             :                 int64_t ce;
     198           0 :                 do {
     199           0 :                     ce = left.getCE(leftIndex++);
     200           0 :                     leftCase = (uint32_t)ce;
     201           0 :                 } while((uint32_t)(ce >> 32) == 0 || leftCase == 0);
     202           0 :                 leftLower32 = leftCase;
     203           0 :                 leftCase &= 0xc000;
     204             : 
     205           0 :                 do {
     206           0 :                     ce = right.getCE(rightIndex++);
     207           0 :                     rightCase = (uint32_t)ce;
     208           0 :                 } while((uint32_t)(ce >> 32) == 0 || rightCase == 0);
     209           0 :                 rightCase &= 0xc000;
     210             :             } else {
     211             :                 // Secondary+caseLevel: By analogy with the above,
     212             :                 // ignore case level weights of secondary ignorables.
     213             :                 //
     214             :                 // Note: A tertiary CE has uppercase case bits (0.0.ut)
     215             :                 // to keep tertiary+caseFirst well-formed.
     216             :                 //
     217             :                 // Tertiary+caseLevel: Also ignore case level weights of secondary ignorables.
     218             :                 // Otherwise a tertiary CE's uppercase would be no greater than
     219             :                 // a primary/secondary CE's uppercase.
     220             :                 // (See UCA well-formedness condition 2.)
     221             :                 // We could construct a special case weight higher than uppercase,
     222             :                 // but it's simpler to always ignore case weights of secondary ignorables,
     223             :                 // turning 0.0.ut into 0.0.0.t.
     224             :                 // (See LDML Collation, Case Parameters.)
     225           0 :                 do {
     226           0 :                     leftCase = (uint32_t)left.getCE(leftIndex++);
     227           0 :                 } while(leftCase <= 0xffff);
     228           0 :                 leftLower32 = leftCase;
     229           0 :                 leftCase &= 0xc000;
     230             : 
     231           0 :                 do {
     232           0 :                     rightCase = (uint32_t)right.getCE(rightIndex++);
     233           0 :                 } while(rightCase <= 0xffff);
     234           0 :                 rightCase &= 0xc000;
     235             :             }
     236             : 
     237             :             // No need to handle NO_CE and MERGE_SEPARATOR specially:
     238             :             // There is one case weight for each previous-level weight,
     239             :             // so level length differences were handled there.
     240           0 :             if(leftCase != rightCase) {
     241           0 :                 if((options & CollationSettings::UPPER_FIRST) == 0) {
     242           0 :                     return (leftCase < rightCase) ? UCOL_LESS : UCOL_GREATER;
     243             :                 } else {
     244           0 :                     return (leftCase < rightCase) ? UCOL_GREATER : UCOL_LESS;
     245             :                 }
     246             :             }
     247           0 :             if((leftLower32 >> 16) == Collation::NO_CE_WEIGHT16) { break; }
     248           0 :         }
     249             :     }
     250           0 :     if(CollationSettings::getStrength(options) <= UCOL_SECONDARY) { return UCOL_EQUAL; }
     251             : 
     252           0 :     uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options);
     253             : 
     254           0 :     int32_t leftIndex = 0;
     255           0 :     int32_t rightIndex = 0;
     256           0 :     uint32_t anyQuaternaries = 0;
     257             :     for(;;) {
     258             :         uint32_t leftLower32, leftTertiary;
     259           0 :         do {
     260           0 :             leftLower32 = (uint32_t)left.getCE(leftIndex++);
     261           0 :             anyQuaternaries |= leftLower32;
     262           0 :             U_ASSERT((leftLower32 & Collation::ONLY_TERTIARY_MASK) != 0 ||
     263             :                      (leftLower32 & 0xc0c0) == 0);
     264           0 :             leftTertiary = leftLower32 & tertiaryMask;
     265           0 :         } while(leftTertiary == 0);
     266             : 
     267             :         uint32_t rightLower32, rightTertiary;
     268           0 :         do {
     269           0 :             rightLower32 = (uint32_t)right.getCE(rightIndex++);
     270           0 :             anyQuaternaries |= rightLower32;
     271           0 :             U_ASSERT((rightLower32 & Collation::ONLY_TERTIARY_MASK) != 0 ||
     272             :                      (rightLower32 & 0xc0c0) == 0);
     273           0 :             rightTertiary = rightLower32 & tertiaryMask;
     274           0 :         } while(rightTertiary == 0);
     275             : 
     276           0 :         if(leftTertiary != rightTertiary) {
     277           0 :             if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) {
     278             :                 // Pass through NO_CE and keep real tertiary weights larger than that.
     279             :                 // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
     280             :                 // to keep tertiary CEs well-formed.
     281             :                 // Their case+tertiary weights must be greater than those of
     282             :                 // primary and secondary CEs.
     283           0 :                 if(leftTertiary > Collation::NO_CE_WEIGHT16) {
     284           0 :                     if(leftLower32 > 0xffff) {
     285           0 :                         leftTertiary ^= 0xc000;
     286             :                     } else {
     287           0 :                         leftTertiary += 0x4000;
     288             :                     }
     289             :                 }
     290           0 :                 if(rightTertiary > Collation::NO_CE_WEIGHT16) {
     291           0 :                     if(rightLower32 > 0xffff) {
     292           0 :                         rightTertiary ^= 0xc000;
     293             :                     } else {
     294           0 :                         rightTertiary += 0x4000;
     295             :                     }
     296             :                 }
     297             :             }
     298           0 :             return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER;
     299             :         }
     300           0 :         if(leftTertiary == Collation::NO_CE_WEIGHT16) { break; }
     301           0 :     }
     302           0 :     if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_EQUAL; }
     303             : 
     304           0 :     if(!anyVariable && (anyQuaternaries & 0xc0) == 0) {
     305             :         // If there are no "variable" CEs and no non-zero quaternary weights,
     306             :         // then there are no quaternary differences.
     307           0 :         return UCOL_EQUAL;
     308             :     }
     309             : 
     310           0 :     leftIndex = 0;
     311           0 :     rightIndex = 0;
     312             :     for(;;) {
     313             :         uint32_t leftQuaternary;
     314           0 :         do {
     315           0 :             int64_t ce = left.getCE(leftIndex++);
     316           0 :             leftQuaternary = (uint32_t)ce & 0xffff;
     317           0 :             if(leftQuaternary <= Collation::NO_CE_WEIGHT16) {
     318             :                 // Variable primary or completely ignorable or NO_CE.
     319           0 :                 leftQuaternary = (uint32_t)(ce >> 32);
     320             :             } else {
     321             :                 // Regular CE, not tertiary ignorable.
     322             :                 // Preserve the quaternary weight in bits 7..6.
     323           0 :                 leftQuaternary |= 0xffffff3f;
     324             :             }
     325           0 :         } while(leftQuaternary == 0);
     326             : 
     327             :         uint32_t rightQuaternary;
     328           0 :         do {
     329           0 :             int64_t ce = right.getCE(rightIndex++);
     330           0 :             rightQuaternary = (uint32_t)ce & 0xffff;
     331           0 :             if(rightQuaternary <= Collation::NO_CE_WEIGHT16) {
     332             :                 // Variable primary or completely ignorable or NO_CE.
     333           0 :                 rightQuaternary = (uint32_t)(ce >> 32);
     334             :             } else {
     335             :                 // Regular CE, not tertiary ignorable.
     336             :                 // Preserve the quaternary weight in bits 7..6.
     337           0 :                 rightQuaternary |= 0xffffff3f;
     338             :             }
     339           0 :         } while(rightQuaternary == 0);
     340             : 
     341           0 :         if(leftQuaternary != rightQuaternary) {
     342             :             // Return the difference, with script reordering.
     343           0 :             if(settings.hasReordering()) {
     344           0 :                 leftQuaternary = settings.reorder(leftQuaternary);
     345           0 :                 rightQuaternary = settings.reorder(rightQuaternary);
     346             :             }
     347           0 :             return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER;
     348             :         }
     349           0 :         if(leftQuaternary == Collation::NO_CE_PRIMARY) { break; }
     350           0 :     }
     351           0 :     return UCOL_EQUAL;
     352             : }
     353             : 
     354             : U_NAMESPACE_END
     355             : 
     356             : #endif  // !UCONFIG_NO_COLLATION

Generated by: LCOV version 1.13