LCOV - code coverage report
Current view: top level - intl/icu/source/common/unicode - normalizer2.h (source / functions) Hit Total Coverage
Test: output.info Lines: 1 7 14.3 %
Date: 2017-07-14 16:53:18 Functions: 1 3 33.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : *
       6             : *   Copyright (C) 2009-2013, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : *******************************************************************************
      10             : *   file name:  normalizer2.h
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:4
      14             : *
      15             : *   created on: 2009nov22
      16             : *   created by: Markus W. Scherer
      17             : */
      18             : 
      19             : #ifndef __NORMALIZER2_H__
      20             : #define __NORMALIZER2_H__
      21             : 
      22             : /**
      23             :  * \file
      24             :  * \brief C++ API: New API for Unicode Normalization.
      25             :  */
      26             : 
      27             : #include "unicode/utypes.h"
      28             : 
      29             : #if !UCONFIG_NO_NORMALIZATION
      30             : 
      31             : #include "unicode/uniset.h"
      32             : #include "unicode/unistr.h"
      33             : #include "unicode/unorm2.h"
      34             : 
      35             : U_NAMESPACE_BEGIN
      36             : 
      37             : /**
      38             :  * Unicode normalization functionality for standard Unicode normalization or
      39             :  * for using custom mapping tables.
      40             :  * All instances of this class are unmodifiable/immutable.
      41             :  * Instances returned by getInstance() are singletons that must not be deleted by the caller.
      42             :  * The Normalizer2 class is not intended for public subclassing.
      43             :  *
      44             :  * The primary functions are to produce a normalized string and to detect whether
      45             :  * a string is already normalized.
      46             :  * The most commonly used normalization forms are those defined in
      47             :  * http://www.unicode.org/unicode/reports/tr15/
      48             :  * However, this API supports additional normalization forms for specialized purposes.
      49             :  * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
      50             :  * and can be used in implementations of UTS #46.
      51             :  *
      52             :  * Not only are the standard compose and decompose modes supplied,
      53             :  * but additional modes are provided as documented in the Mode enum.
      54             :  *
      55             :  * Some of the functions in this class identify normalization boundaries.
      56             :  * At a normalization boundary, the portions of the string
      57             :  * before it and starting from it do not interact and can be handled independently.
      58             :  *
      59             :  * The spanQuickCheckYes() stops at a normalization boundary.
      60             :  * When the goal is a normalized string, then the text before the boundary
      61             :  * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
      62             :  *
      63             :  * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
      64             :  * a character is guaranteed to be at a normalization boundary,
      65             :  * regardless of context.
      66             :  * This is used for moving from one normalization boundary to the next
      67             :  * or preceding boundary, and for performing iterative normalization.
      68             :  *
      69             :  * Iterative normalization is useful when only a small portion of a
      70             :  * longer string needs to be processed.
      71             :  * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
      72             :  * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
      73             :  * (to process only the substring for which sort key bytes are computed).
      74             :  *
      75             :  * The set of normalization boundaries returned by these functions may not be
      76             :  * complete: There may be more boundaries that could be returned.
      77             :  * Different functions may return different boundaries.
      78             :  * @stable ICU 4.4
      79             :  */
      80          20 : class U_COMMON_API Normalizer2 : public UObject {
      81             : public:
      82             :     /**
      83             :      * Destructor.
      84             :      * @stable ICU 4.4
      85             :      */
      86             :     ~Normalizer2();
      87             : 
      88             :     /**
      89             :      * Returns a Normalizer2 instance for Unicode NFC normalization.
      90             :      * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
      91             :      * Returns an unmodifiable singleton instance. Do not delete it.
      92             :      * @param errorCode Standard ICU error code. Its input value must
      93             :      *                  pass the U_SUCCESS() test, or else the function returns
      94             :      *                  immediately. Check for U_FAILURE() on output or use with
      95             :      *                  function chaining. (See User Guide for details.)
      96             :      * @return the requested Normalizer2, if successful
      97             :      * @stable ICU 49
      98             :      */
      99             :     static const Normalizer2 *
     100             :     getNFCInstance(UErrorCode &errorCode);
     101             : 
     102             :     /**
     103             :      * Returns a Normalizer2 instance for Unicode NFD normalization.
     104             :      * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
     105             :      * Returns an unmodifiable singleton instance. Do not delete it.
     106             :      * @param errorCode Standard ICU error code. Its input value must
     107             :      *                  pass the U_SUCCESS() test, or else the function returns
     108             :      *                  immediately. Check for U_FAILURE() on output or use with
     109             :      *                  function chaining. (See User Guide for details.)
     110             :      * @return the requested Normalizer2, if successful
     111             :      * @stable ICU 49
     112             :      */
     113             :     static const Normalizer2 *
     114             :     getNFDInstance(UErrorCode &errorCode);
     115             : 
     116             :     /**
     117             :      * Returns a Normalizer2 instance for Unicode NFKC normalization.
     118             :      * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
     119             :      * Returns an unmodifiable singleton instance. Do not delete it.
     120             :      * @param errorCode Standard ICU error code. Its input value must
     121             :      *                  pass the U_SUCCESS() test, or else the function returns
     122             :      *                  immediately. Check for U_FAILURE() on output or use with
     123             :      *                  function chaining. (See User Guide for details.)
     124             :      * @return the requested Normalizer2, if successful
     125             :      * @stable ICU 49
     126             :      */
     127             :     static const Normalizer2 *
     128             :     getNFKCInstance(UErrorCode &errorCode);
     129             : 
     130             :     /**
     131             :      * Returns a Normalizer2 instance for Unicode NFKD normalization.
     132             :      * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
     133             :      * Returns an unmodifiable singleton instance. Do not delete it.
     134             :      * @param errorCode Standard ICU error code. Its input value must
     135             :      *                  pass the U_SUCCESS() test, or else the function returns
     136             :      *                  immediately. Check for U_FAILURE() on output or use with
     137             :      *                  function chaining. (See User Guide for details.)
     138             :      * @return the requested Normalizer2, if successful
     139             :      * @stable ICU 49
     140             :      */
     141             :     static const Normalizer2 *
     142             :     getNFKDInstance(UErrorCode &errorCode);
     143             : 
     144             :     /**
     145             :      * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
     146             :      * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
     147             :      * Returns an unmodifiable singleton instance. Do not delete it.
     148             :      * @param errorCode Standard ICU error code. Its input value must
     149             :      *                  pass the U_SUCCESS() test, or else the function returns
     150             :      *                  immediately. Check for U_FAILURE() on output or use with
     151             :      *                  function chaining. (See User Guide for details.)
     152             :      * @return the requested Normalizer2, if successful
     153             :      * @stable ICU 49
     154             :      */
     155             :     static const Normalizer2 *
     156             :     getNFKCCasefoldInstance(UErrorCode &errorCode);
     157             : 
     158             :     /**
     159             :      * Returns a Normalizer2 instance which uses the specified data file
     160             :      * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
     161             :      * and which composes or decomposes text according to the specified mode.
     162             :      * Returns an unmodifiable singleton instance. Do not delete it.
     163             :      *
     164             :      * Use packageName=NULL for data files that are part of ICU's own data.
     165             :      * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
     166             :      * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
     167             :      * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
     168             :      *
     169             :      * @param packageName NULL for ICU built-in data, otherwise application data package name
     170             :      * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
     171             :      * @param mode normalization mode (compose or decompose etc.)
     172             :      * @param errorCode Standard ICU error code. Its input value must
     173             :      *                  pass the U_SUCCESS() test, or else the function returns
     174             :      *                  immediately. Check for U_FAILURE() on output or use with
     175             :      *                  function chaining. (See User Guide for details.)
     176             :      * @return the requested Normalizer2, if successful
     177             :      * @stable ICU 4.4
     178             :      */
     179             :     static const Normalizer2 *
     180             :     getInstance(const char *packageName,
     181             :                 const char *name,
     182             :                 UNormalization2Mode mode,
     183             :                 UErrorCode &errorCode);
     184             : 
     185             :     /**
     186             :      * Returns the normalized form of the source string.
     187             :      * @param src source string
     188             :      * @param errorCode Standard ICU error code. Its input value must
     189             :      *                  pass the U_SUCCESS() test, or else the function returns
     190             :      *                  immediately. Check for U_FAILURE() on output or use with
     191             :      *                  function chaining. (See User Guide for details.)
     192             :      * @return normalized src
     193             :      * @stable ICU 4.4
     194             :      */
     195             :     UnicodeString
     196           0 :     normalize(const UnicodeString &src, UErrorCode &errorCode) const {
     197           0 :         UnicodeString result;
     198           0 :         normalize(src, result, errorCode);
     199           0 :         return result;
     200             :     }
     201             :     /**
     202             :      * Writes the normalized form of the source string to the destination string
     203             :      * (replacing its contents) and returns the destination string.
     204             :      * The source and destination strings must be different objects.
     205             :      * @param src source string
     206             :      * @param dest destination string; its contents is replaced with normalized src
     207             :      * @param errorCode Standard ICU error code. Its input value must
     208             :      *                  pass the U_SUCCESS() test, or else the function returns
     209             :      *                  immediately. Check for U_FAILURE() on output or use with
     210             :      *                  function chaining. (See User Guide for details.)
     211             :      * @return dest
     212             :      * @stable ICU 4.4
     213             :      */
     214             :     virtual UnicodeString &
     215             :     normalize(const UnicodeString &src,
     216             :               UnicodeString &dest,
     217             :               UErrorCode &errorCode) const = 0;
     218             :     /**
     219             :      * Appends the normalized form of the second string to the first string
     220             :      * (merging them at the boundary) and returns the first string.
     221             :      * The result is normalized if the first string was normalized.
     222             :      * The first and second strings must be different objects.
     223             :      * @param first string, should be normalized
     224             :      * @param second string, will be normalized
     225             :      * @param errorCode Standard ICU error code. Its input value must
     226             :      *                  pass the U_SUCCESS() test, or else the function returns
     227             :      *                  immediately. Check for U_FAILURE() on output or use with
     228             :      *                  function chaining. (See User Guide for details.)
     229             :      * @return first
     230             :      * @stable ICU 4.4
     231             :      */
     232             :     virtual UnicodeString &
     233             :     normalizeSecondAndAppend(UnicodeString &first,
     234             :                              const UnicodeString &second,
     235             :                              UErrorCode &errorCode) const = 0;
     236             :     /**
     237             :      * Appends the second string to the first string
     238             :      * (merging them at the boundary) and returns the first string.
     239             :      * The result is normalized if both the strings were normalized.
     240             :      * The first and second strings must be different objects.
     241             :      * @param first string, should be normalized
     242             :      * @param second string, should be normalized
     243             :      * @param errorCode Standard ICU error code. Its input value must
     244             :      *                  pass the U_SUCCESS() test, or else the function returns
     245             :      *                  immediately. Check for U_FAILURE() on output or use with
     246             :      *                  function chaining. (See User Guide for details.)
     247             :      * @return first
     248             :      * @stable ICU 4.4
     249             :      */
     250             :     virtual UnicodeString &
     251             :     append(UnicodeString &first,
     252             :            const UnicodeString &second,
     253             :            UErrorCode &errorCode) const = 0;
     254             : 
     255             :     /**
     256             :      * Gets the decomposition mapping of c.
     257             :      * Roughly equivalent to normalizing the String form of c
     258             :      * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
     259             :      * returns FALSE and does not write a string
     260             :      * if c does not have a decomposition mapping in this instance's data.
     261             :      * This function is independent of the mode of the Normalizer2.
     262             :      * @param c code point
     263             :      * @param decomposition String object which will be set to c's
     264             :      *                      decomposition mapping, if there is one.
     265             :      * @return TRUE if c has a decomposition, otherwise FALSE
     266             :      * @stable ICU 4.6
     267             :      */
     268             :     virtual UBool
     269             :     getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
     270             : 
     271             :     /**
     272             :      * Gets the raw decomposition mapping of c.
     273             :      *
     274             :      * This is similar to the getDecomposition() method but returns the
     275             :      * raw decomposition mapping as specified in UnicodeData.txt or
     276             :      * (for custom data) in the mapping files processed by the gennorm2 tool.
     277             :      * By contrast, getDecomposition() returns the processed,
     278             :      * recursively-decomposed version of this mapping.
     279             :      *
     280             :      * When used on a standard NFKC Normalizer2 instance,
     281             :      * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
     282             :      *
     283             :      * When used on a standard NFC Normalizer2 instance,
     284             :      * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
     285             :      * in this case, the result contains either one or two code points (=1..4 char16_ts).
     286             :      *
     287             :      * This function is independent of the mode of the Normalizer2.
     288             :      * The default implementation returns FALSE.
     289             :      * @param c code point
     290             :      * @param decomposition String object which will be set to c's
     291             :      *                      raw decomposition mapping, if there is one.
     292             :      * @return TRUE if c has a decomposition, otherwise FALSE
     293             :      * @stable ICU 49
     294             :      */
     295             :     virtual UBool
     296             :     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
     297             : 
     298             :     /**
     299             :      * Performs pairwise composition of a & b and returns the composite if there is one.
     300             :      *
     301             :      * Returns a composite code point c only if c has a two-way mapping to a+b.
     302             :      * In standard Unicode normalization, this means that
     303             :      * c has a canonical decomposition to a+b
     304             :      * and c does not have the Full_Composition_Exclusion property.
     305             :      *
     306             :      * This function is independent of the mode of the Normalizer2.
     307             :      * The default implementation returns a negative value.
     308             :      * @param a A (normalization starter) code point.
     309             :      * @param b Another code point.
     310             :      * @return The non-negative composite code point if there is one; otherwise a negative value.
     311             :      * @stable ICU 49
     312             :      */
     313             :     virtual UChar32
     314             :     composePair(UChar32 a, UChar32 b) const;
     315             : 
     316             :     /**
     317             :      * Gets the combining class of c.
     318             :      * The default implementation returns 0
     319             :      * but all standard implementations return the Unicode Canonical_Combining_Class value.
     320             :      * @param c code point
     321             :      * @return c's combining class
     322             :      * @stable ICU 49
     323             :      */
     324             :     virtual uint8_t
     325             :     getCombiningClass(UChar32 c) const;
     326             : 
     327             :     /**
     328             :      * Tests if the string is normalized.
     329             :      * Internally, in cases where the quickCheck() method would return "maybe"
     330             :      * (which is only possible for the two COMPOSE modes) this method
     331             :      * resolves to "yes" or "no" to provide a definitive result,
     332             :      * at the cost of doing more work in those cases.
     333             :      * @param s input string
     334             :      * @param errorCode Standard ICU error code. Its input value must
     335             :      *                  pass the U_SUCCESS() test, or else the function returns
     336             :      *                  immediately. Check for U_FAILURE() on output or use with
     337             :      *                  function chaining. (See User Guide for details.)
     338             :      * @return TRUE if s is normalized
     339             :      * @stable ICU 4.4
     340             :      */
     341             :     virtual UBool
     342             :     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
     343             : 
     344             :     /**
     345             :      * Tests if the string is normalized.
     346             :      * For the two COMPOSE modes, the result could be "maybe" in cases that
     347             :      * would take a little more work to resolve definitively.
     348             :      * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
     349             :      * combination of quick check + normalization, to avoid
     350             :      * re-checking the "yes" prefix.
     351             :      * @param s input string
     352             :      * @param errorCode Standard ICU error code. Its input value must
     353             :      *                  pass the U_SUCCESS() test, or else the function returns
     354             :      *                  immediately. Check for U_FAILURE() on output or use with
     355             :      *                  function chaining. (See User Guide for details.)
     356             :      * @return UNormalizationCheckResult
     357             :      * @stable ICU 4.4
     358             :      */
     359             :     virtual UNormalizationCheckResult
     360             :     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
     361             : 
     362             :     /**
     363             :      * Returns the end of the normalized substring of the input string.
     364             :      * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
     365             :      * the substring <code>UnicodeString(s, 0, end)</code>
     366             :      * will pass the quick check with a "yes" result.
     367             :      *
     368             :      * The returned end index is usually one or more characters before the
     369             :      * "no" or "maybe" character: The end index is at a normalization boundary.
     370             :      * (See the class documentation for more about normalization boundaries.)
     371             :      *
     372             :      * When the goal is a normalized string and most input strings are expected
     373             :      * to be normalized already, then call this method,
     374             :      * and if it returns a prefix shorter than the input string,
     375             :      * copy that prefix and use normalizeSecondAndAppend() for the remainder.
     376             :      * @param s input string
     377             :      * @param errorCode Standard ICU error code. Its input value must
     378             :      *                  pass the U_SUCCESS() test, or else the function returns
     379             :      *                  immediately. Check for U_FAILURE() on output or use with
     380             :      *                  function chaining. (See User Guide for details.)
     381             :      * @return "yes" span end index
     382             :      * @stable ICU 4.4
     383             :      */
     384             :     virtual int32_t
     385             :     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
     386             : 
     387             :     /**
     388             :      * Tests if the character always has a normalization boundary before it,
     389             :      * regardless of context.
     390             :      * If true, then the character does not normalization-interact with
     391             :      * preceding characters.
     392             :      * In other words, a string containing this character can be normalized
     393             :      * by processing portions before this character and starting from this
     394             :      * character independently.
     395             :      * This is used for iterative normalization. See the class documentation for details.
     396             :      * @param c character to test
     397             :      * @return TRUE if c has a normalization boundary before it
     398             :      * @stable ICU 4.4
     399             :      */
     400             :     virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
     401             : 
     402             :     /**
     403             :      * Tests if the character always has a normalization boundary after it,
     404             :      * regardless of context.
     405             :      * If true, then the character does not normalization-interact with
     406             :      * following characters.
     407             :      * In other words, a string containing this character can be normalized
     408             :      * by processing portions up to this character and after this
     409             :      * character independently.
     410             :      * This is used for iterative normalization. See the class documentation for details.
     411             :      * Note that this operation may be significantly slower than hasBoundaryBefore().
     412             :      * @param c character to test
     413             :      * @return TRUE if c has a normalization boundary after it
     414             :      * @stable ICU 4.4
     415             :      */
     416             :     virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
     417             : 
     418             :     /**
     419             :      * Tests if the character is normalization-inert.
     420             :      * If true, then the character does not change, nor normalization-interact with
     421             :      * preceding or following characters.
     422             :      * In other words, a string containing this character can be normalized
     423             :      * by processing portions before this character and after this
     424             :      * character independently.
     425             :      * This is used for iterative normalization. See the class documentation for details.
     426             :      * Note that this operation may be significantly slower than hasBoundaryBefore().
     427             :      * @param c character to test
     428             :      * @return TRUE if c is normalization-inert
     429             :      * @stable ICU 4.4
     430             :      */
     431             :     virtual UBool isInert(UChar32 c) const = 0;
     432             : };
     433             : 
     434             : /**
     435             :  * Normalization filtered by a UnicodeSet.
     436             :  * Normalizes portions of the text contained in the filter set and leaves
     437             :  * portions not contained in the filter set unchanged.
     438             :  * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
     439             :  * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
     440             :  * This class implements all of (and only) the Normalizer2 API.
     441             :  * An instance of this class is unmodifiable/immutable but is constructed and
     442             :  * must be destructed by the owner.
     443             :  * @stable ICU 4.4
     444             :  */
     445             : class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
     446             : public:
     447             :     /**
     448             :      * Constructs a filtered normalizer wrapping any Normalizer2 instance
     449             :      * and a filter set.
     450             :      * Both are aliased and must not be modified or deleted while this object
     451             :      * is used.
     452             :      * The filter set should be frozen; otherwise the performance will suffer greatly.
     453             :      * @param n2 wrapped Normalizer2 instance
     454             :      * @param filterSet UnicodeSet which determines the characters to be normalized
     455             :      * @stable ICU 4.4
     456             :      */
     457           0 :     FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
     458           0 :             norm2(n2), set(filterSet) {}
     459             : 
     460             :     /**
     461             :      * Destructor.
     462             :      * @stable ICU 4.4
     463             :      */
     464             :     ~FilteredNormalizer2();
     465             : 
     466             :     /**
     467             :      * Writes the normalized form of the source string to the destination string
     468             :      * (replacing its contents) and returns the destination string.
     469             :      * The source and destination strings must be different objects.
     470             :      * @param src source string
     471             :      * @param dest destination string; its contents is replaced with normalized src
     472             :      * @param errorCode Standard ICU error code. Its input value must
     473             :      *                  pass the U_SUCCESS() test, or else the function returns
     474             :      *                  immediately. Check for U_FAILURE() on output or use with
     475             :      *                  function chaining. (See User Guide for details.)
     476             :      * @return dest
     477             :      * @stable ICU 4.4
     478             :      */
     479             :     virtual UnicodeString &
     480             :     normalize(const UnicodeString &src,
     481             :               UnicodeString &dest,
     482             :               UErrorCode &errorCode) const;
     483             :     /**
     484             :      * Appends the normalized form of the second string to the first string
     485             :      * (merging them at the boundary) and returns the first string.
     486             :      * The result is normalized if the first string was normalized.
     487             :      * The first and second strings must be different objects.
     488             :      * @param first string, should be normalized
     489             :      * @param second string, will be normalized
     490             :      * @param errorCode Standard ICU error code. Its input value must
     491             :      *                  pass the U_SUCCESS() test, or else the function returns
     492             :      *                  immediately. Check for U_FAILURE() on output or use with
     493             :      *                  function chaining. (See User Guide for details.)
     494             :      * @return first
     495             :      * @stable ICU 4.4
     496             :      */
     497             :     virtual UnicodeString &
     498             :     normalizeSecondAndAppend(UnicodeString &first,
     499             :                              const UnicodeString &second,
     500             :                              UErrorCode &errorCode) const;
     501             :     /**
     502             :      * Appends the second string to the first string
     503             :      * (merging them at the boundary) and returns the first string.
     504             :      * The result is normalized if both the strings were normalized.
     505             :      * The first and second strings must be different objects.
     506             :      * @param first string, should be normalized
     507             :      * @param second string, should be normalized
     508             :      * @param errorCode Standard ICU error code. Its input value must
     509             :      *                  pass the U_SUCCESS() test, or else the function returns
     510             :      *                  immediately. Check for U_FAILURE() on output or use with
     511             :      *                  function chaining. (See User Guide for details.)
     512             :      * @return first
     513             :      * @stable ICU 4.4
     514             :      */
     515             :     virtual UnicodeString &
     516             :     append(UnicodeString &first,
     517             :            const UnicodeString &second,
     518             :            UErrorCode &errorCode) const;
     519             : 
     520             :     /**
     521             :      * Gets the decomposition mapping of c.
     522             :      * For details see the base class documentation.
     523             :      *
     524             :      * This function is independent of the mode of the Normalizer2.
     525             :      * @param c code point
     526             :      * @param decomposition String object which will be set to c's
     527             :      *                      decomposition mapping, if there is one.
     528             :      * @return TRUE if c has a decomposition, otherwise FALSE
     529             :      * @stable ICU 4.6
     530             :      */
     531             :     virtual UBool
     532             :     getDecomposition(UChar32 c, UnicodeString &decomposition) const;
     533             : 
     534             :     /**
     535             :      * Gets the raw decomposition mapping of c.
     536             :      * For details see the base class documentation.
     537             :      *
     538             :      * This function is independent of the mode of the Normalizer2.
     539             :      * @param c code point
     540             :      * @param decomposition String object which will be set to c's
     541             :      *                      raw decomposition mapping, if there is one.
     542             :      * @return TRUE if c has a decomposition, otherwise FALSE
     543             :      * @stable ICU 49
     544             :      */
     545             :     virtual UBool
     546             :     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
     547             : 
     548             :     /**
     549             :      * Performs pairwise composition of a & b and returns the composite if there is one.
     550             :      * For details see the base class documentation.
     551             :      *
     552             :      * This function is independent of the mode of the Normalizer2.
     553             :      * @param a A (normalization starter) code point.
     554             :      * @param b Another code point.
     555             :      * @return The non-negative composite code point if there is one; otherwise a negative value.
     556             :      * @stable ICU 49
     557             :      */
     558             :     virtual UChar32
     559             :     composePair(UChar32 a, UChar32 b) const;
     560             : 
     561             :     /**
     562             :      * Gets the combining class of c.
     563             :      * The default implementation returns 0
     564             :      * but all standard implementations return the Unicode Canonical_Combining_Class value.
     565             :      * @param c code point
     566             :      * @return c's combining class
     567             :      * @stable ICU 49
     568             :      */
     569             :     virtual uint8_t
     570             :     getCombiningClass(UChar32 c) const;
     571             : 
     572             :     /**
     573             :      * Tests if the string is normalized.
     574             :      * For details see the Normalizer2 base class documentation.
     575             :      * @param s input string
     576             :      * @param errorCode Standard ICU error code. Its input value must
     577             :      *                  pass the U_SUCCESS() test, or else the function returns
     578             :      *                  immediately. Check for U_FAILURE() on output or use with
     579             :      *                  function chaining. (See User Guide for details.)
     580             :      * @return TRUE if s is normalized
     581             :      * @stable ICU 4.4
     582             :      */
     583             :     virtual UBool
     584             :     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
     585             :     /**
     586             :      * Tests if the string is normalized.
     587             :      * For details see the Normalizer2 base class documentation.
     588             :      * @param s input string
     589             :      * @param errorCode Standard ICU error code. Its input value must
     590             :      *                  pass the U_SUCCESS() test, or else the function returns
     591             :      *                  immediately. Check for U_FAILURE() on output or use with
     592             :      *                  function chaining. (See User Guide for details.)
     593             :      * @return UNormalizationCheckResult
     594             :      * @stable ICU 4.4
     595             :      */
     596             :     virtual UNormalizationCheckResult
     597             :     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
     598             :     /**
     599             :      * Returns the end of the normalized substring of the input string.
     600             :      * For details see the Normalizer2 base class documentation.
     601             :      * @param s input string
     602             :      * @param errorCode Standard ICU error code. Its input value must
     603             :      *                  pass the U_SUCCESS() test, or else the function returns
     604             :      *                  immediately. Check for U_FAILURE() on output or use with
     605             :      *                  function chaining. (See User Guide for details.)
     606             :      * @return "yes" span end index
     607             :      * @stable ICU 4.4
     608             :      */
     609             :     virtual int32_t
     610             :     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
     611             : 
     612             :     /**
     613             :      * Tests if the character always has a normalization boundary before it,
     614             :      * regardless of context.
     615             :      * For details see the Normalizer2 base class documentation.
     616             :      * @param c character to test
     617             :      * @return TRUE if c has a normalization boundary before it
     618             :      * @stable ICU 4.4
     619             :      */
     620             :     virtual UBool hasBoundaryBefore(UChar32 c) const;
     621             : 
     622             :     /**
     623             :      * Tests if the character always has a normalization boundary after it,
     624             :      * regardless of context.
     625             :      * For details see the Normalizer2 base class documentation.
     626             :      * @param c character to test
     627             :      * @return TRUE if c has a normalization boundary after it
     628             :      * @stable ICU 4.4
     629             :      */
     630             :     virtual UBool hasBoundaryAfter(UChar32 c) const;
     631             : 
     632             :     /**
     633             :      * Tests if the character is normalization-inert.
     634             :      * For details see the Normalizer2 base class documentation.
     635             :      * @param c character to test
     636             :      * @return TRUE if c is normalization-inert
     637             :      * @stable ICU 4.4
     638             :      */
     639             :     virtual UBool isInert(UChar32 c) const;
     640             : private:
     641             :     UnicodeString &
     642             :     normalize(const UnicodeString &src,
     643             :               UnicodeString &dest,
     644             :               USetSpanCondition spanCondition,
     645             :               UErrorCode &errorCode) const;
     646             : 
     647             :     UnicodeString &
     648             :     normalizeSecondAndAppend(UnicodeString &first,
     649             :                              const UnicodeString &second,
     650             :                              UBool doNormalize,
     651             :                              UErrorCode &errorCode) const;
     652             : 
     653             :     const Normalizer2 &norm2;
     654             :     const UnicodeSet &set;
     655             : };
     656             : 
     657             : U_NAMESPACE_END
     658             : 
     659             : #endif  // !UCONFIG_NO_NORMALIZATION
     660             : #endif  // __NORMALIZER2_H__

Generated by: LCOV version 1.13