LCOV - code coverage report
Current view: top level - intl/icu/source/common/unicode - uniset.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 32 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 11 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ***************************************************************************
       5             : * Copyright (C) 1999-2016, International Business Machines Corporation
       6             : * and others. All Rights Reserved.
       7             : ***************************************************************************
       8             : *   Date        Name        Description
       9             : *   10/20/99    alan        Creation.
      10             : ***************************************************************************
      11             : */
      12             : 
      13             : #ifndef UNICODESET_H
      14             : #define UNICODESET_H
      15             : 
      16             : #include "unicode/unifilt.h"
      17             : #include "unicode/unistr.h"
      18             : #include "unicode/uset.h"
      19             : 
      20             : /**
      21             :  * \file
      22             :  * \brief C++ API: Unicode Set
      23             :  */
      24             : 
      25             : U_NAMESPACE_BEGIN
      26             : 
      27             : // Forward Declarations.
      28             : void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status); /**< @internal */
      29             : 
      30             : class BMPSet;
      31             : class ParsePosition;
      32             : class RBBIRuleScanner;
      33             : class SymbolTable;
      34             : class UnicodeSetStringSpan;
      35             : class UVector;
      36             : class RuleCharacterIterator;
      37             : 
      38             : /**
      39             :  * A mutable set of Unicode characters and multicharacter strings.  Objects of this class
      40             :  * represent <em>character classes</em> used in regular expressions.
      41             :  * A character specifies a subset of Unicode code points.  Legal
      42             :  * code points are U+0000 to U+10FFFF, inclusive.
      43             :  *
      44             :  * <p>The UnicodeSet class is not designed to be subclassed.
      45             :  *
      46             :  * <p><code>UnicodeSet</code> supports two APIs. The first is the
      47             :  * <em>operand</em> API that allows the caller to modify the value of
      48             :  * a <code>UnicodeSet</code> object. It conforms to Java 2's
      49             :  * <code>java.util.Set</code> interface, although
      50             :  * <code>UnicodeSet</code> does not actually implement that
      51             :  * interface. All methods of <code>Set</code> are supported, with the
      52             :  * modification that they take a character range or single character
      53             :  * instead of an <code>Object</code>, and they take a
      54             :  * <code>UnicodeSet</code> instead of a <code>Collection</code>.  The
      55             :  * operand API may be thought of in terms of boolean logic: a boolean
      56             :  * OR is implemented by <code>add</code>, a boolean AND is implemented
      57             :  * by <code>retain</code>, a boolean XOR is implemented by
      58             :  * <code>complement</code> taking an argument, and a boolean NOT is
      59             :  * implemented by <code>complement</code> with no argument.  In terms
      60             :  * of traditional set theory function names, <code>add</code> is a
      61             :  * union, <code>retain</code> is an intersection, <code>remove</code>
      62             :  * is an asymmetric difference, and <code>complement</code> with no
      63             :  * argument is a set complement with respect to the superset range
      64             :  * <code>MIN_VALUE-MAX_VALUE</code>
      65             :  *
      66             :  * <p>The second API is the
      67             :  * <code>applyPattern()</code>/<code>toPattern()</code> API from the
      68             :  * <code>java.text.Format</code>-derived classes.  Unlike the
      69             :  * methods that add characters, add categories, and control the logic
      70             :  * of the set, the method <code>applyPattern()</code> sets all
      71             :  * attributes of a <code>UnicodeSet</code> at once, based on a
      72             :  * string pattern.
      73             :  *
      74             :  * <p><b>Pattern syntax</b></p>
      75             :  *
      76             :  * Patterns are accepted by the constructors and the
      77             :  * <code>applyPattern()</code> methods and returned by the
      78             :  * <code>toPattern()</code> method.  These patterns follow a syntax
      79             :  * similar to that employed by version 8 regular expression character
      80             :  * classes.  Here are some simple examples:
      81             :  *
      82             :  * \htmlonly<blockquote>\endhtmlonly
      83             :  *   <table>
      84             :  *     <tr align="top">
      85             :  *       <td nowrap valign="top" align="left"><code>[]</code></td>
      86             :  *       <td valign="top">No characters</td>
      87             :  *     </tr><tr align="top">
      88             :  *       <td nowrap valign="top" align="left"><code>[a]</code></td>
      89             :  *       <td valign="top">The character 'a'</td>
      90             :  *     </tr><tr align="top">
      91             :  *       <td nowrap valign="top" align="left"><code>[ae]</code></td>
      92             :  *       <td valign="top">The characters 'a' and 'e'</td>
      93             :  *     </tr>
      94             :  *     <tr>
      95             :  *       <td nowrap valign="top" align="left"><code>[a-e]</code></td>
      96             :  *       <td valign="top">The characters 'a' through 'e' inclusive, in Unicode code
      97             :  *       point order</td>
      98             :  *     </tr>
      99             :  *     <tr>
     100             :  *       <td nowrap valign="top" align="left"><code>[\\u4E01]</code></td>
     101             :  *       <td valign="top">The character U+4E01</td>
     102             :  *     </tr>
     103             :  *     <tr>
     104             :  *       <td nowrap valign="top" align="left"><code>[a{ab}{ac}]</code></td>
     105             :  *       <td valign="top">The character 'a' and the multicharacter strings &quot;ab&quot; and
     106             :  *       &quot;ac&quot;</td>
     107             :  *     </tr>
     108             :  *     <tr>
     109             :  *       <td nowrap valign="top" align="left"><code>[\\p{Lu}]</code></td>
     110             :  *       <td valign="top">All characters in the general category Uppercase Letter</td>
     111             :  *     </tr>
     112             :  *   </table>
     113             :  * \htmlonly</blockquote>\endhtmlonly
     114             :  *
     115             :  * Any character may be preceded by a backslash in order to remove any special
     116             :  * meaning.  White space characters, as defined by UCharacter.isWhitespace(), are
     117             :  * ignored, unless they are escaped.
     118             :  *
     119             :  * <p>Property patterns specify a set of characters having a certain
     120             :  * property as defined by the Unicode standard.  Both the POSIX-like
     121             :  * "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized.  For a
     122             :  * complete list of supported property patterns, see the User's Guide
     123             :  * for UnicodeSet at
     124             :  * <a href="http://icu-project.org/userguide/unicodeSet.html">
     125             :  * http://icu-project.org/userguide/unicodeSet.html</a>.
     126             :  * Actual determination of property data is defined by the underlying
     127             :  * Unicode database as implemented by UCharacter.
     128             :  *
     129             :  * <p>Patterns specify individual characters, ranges of characters, and
     130             :  * Unicode property sets.  When elements are concatenated, they
     131             :  * specify their union.  To complement a set, place a '^' immediately
     132             :  * after the opening '['.  Property patterns are inverted by modifying
     133             :  * their delimiters; "[:^foo]" and "\\P{foo}".  In any other location,
     134             :  * '^' has no special meaning.
     135             :  *
     136             :  * <p>Ranges are indicated by placing two a '-' between two
     137             :  * characters, as in "a-z".  This specifies the range of all
     138             :  * characters from the left to the right, in Unicode order.  If the
     139             :  * left character is greater than or equal to the
     140             :  * right character it is a syntax error.  If a '-' occurs as the first
     141             :  * character after the opening '[' or '[^', or if it occurs as the
     142             :  * last character before the closing ']', then it is taken as a
     143             :  * literal.  Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same
     144             :  * set of three characters, 'a', 'b', and '-'.
     145             :  *
     146             :  * <p>Sets may be intersected using the '&' operator or the asymmetric
     147             :  * set difference may be taken using the '-' operator, for example,
     148             :  * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters
     149             :  * with values less than 4096.  Operators ('&' and '|') have equal
     150             :  * precedence and bind left-to-right.  Thus
     151             :  * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to
     152             :  * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]".  This only really matters for
     153             :  * difference; intersection is commutative.
     154             :  *
     155             :  * <table>
     156             :  * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a'
     157             :  * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a'
     158             :  * through 'z' and all letters in between, in Unicode order
     159             :  * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing
     160             :  * all characters but 'a' through 'z',
     161             :  * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF
     162             :  * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
     163             :  * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
     164             :  * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code>
     165             :  * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
     166             :  * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
     167             :  * <td>The asymmetric difference of sets specified by <em>pat1</em> and
     168             :  * <em>pat2</em>
     169             :  * <tr valign=top><td nowrap><code>[:Lu:] or \\p{Lu}</code>
     170             :  * <td>The set of characters having the specified
     171             :  * Unicode property; in
     172             :  * this case, Unicode uppercase letters
     173             :  * <tr valign=top><td nowrap><code>[:^Lu:] or \\P{Lu}</code>
     174             :  * <td>The set of characters <em>not</em> having the given
     175             :  * Unicode property
     176             :  * </table>
     177             :  *
     178             :  * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
     179             :  *
     180             :  * <p><b>Formal syntax</b></p>
     181             :  *
     182             :  * \htmlonly<blockquote>\endhtmlonly
     183             :  *   <table>
     184             :  *     <tr align="top">
     185             :  *       <td nowrap valign="top" align="right"><code>pattern :=&nbsp; </code></td>
     186             :  *       <td valign="top"><code>('[' '^'? item* ']') |
     187             :  *       property</code></td>
     188             :  *     </tr>
     189             :  *     <tr align="top">
     190             :  *       <td nowrap valign="top" align="right"><code>item :=&nbsp; </code></td>
     191             :  *       <td valign="top"><code>char | (char '-' char) | pattern-expr<br>
     192             :  *       </code></td>
     193             :  *     </tr>
     194             :  *     <tr align="top">
     195             :  *       <td nowrap valign="top" align="right"><code>pattern-expr :=&nbsp; </code></td>
     196             :  *       <td valign="top"><code>pattern | pattern-expr pattern |
     197             :  *       pattern-expr op pattern<br>
     198             :  *       </code></td>
     199             :  *     </tr>
     200             :  *     <tr align="top">
     201             :  *       <td nowrap valign="top" align="right"><code>op :=&nbsp; </code></td>
     202             :  *       <td valign="top"><code>'&amp;' | '-'<br>
     203             :  *       </code></td>
     204             :  *     </tr>
     205             :  *     <tr align="top">
     206             :  *       <td nowrap valign="top" align="right"><code>special :=&nbsp; </code></td>
     207             :  *       <td valign="top"><code>'[' | ']' | '-'<br>
     208             :  *       </code></td>
     209             :  *     </tr>
     210             :  *     <tr align="top">
     211             :  *       <td nowrap valign="top" align="right"><code>char :=&nbsp; </code></td>
     212             :  *       <td valign="top"><em>any character that is not</em><code> special<br>
     213             :  *       | ('\' </code><em>any character</em><code>)<br>
     214             :  *       | ('\\u' hex hex hex hex)<br>
     215             :  *       </code></td>
     216             :  *     </tr>
     217             :  *     <tr align="top">
     218             :  *       <td nowrap valign="top" align="right"><code>hex :=&nbsp; </code></td>
     219             :  *       <td valign="top"><em>any character for which
     220             :  *       </em><code>Character.digit(c, 16)</code><em>
     221             :  *       returns a non-negative result</em></td>
     222             :  *     </tr>
     223             :  *     <tr>
     224             :  *       <td nowrap valign="top" align="right"><code>property :=&nbsp; </code></td>
     225             :  *       <td valign="top"><em>a Unicode property set pattern</em></td>
     226             :  *     </tr>
     227             :  *   </table>
     228             :  *   <br>
     229             :  *   <table border="1">
     230             :  *     <tr>
     231             :  *       <td>Legend: <table>
     232             :  *         <tr>
     233             :  *           <td nowrap valign="top"><code>a := b</code></td>
     234             :  *           <td width="20" valign="top">&nbsp; </td>
     235             :  *           <td valign="top"><code>a</code> may be replaced by <code>b</code> </td>
     236             :  *         </tr>
     237             :  *         <tr>
     238             :  *           <td nowrap valign="top"><code>a?</code></td>
     239             :  *           <td valign="top"></td>
     240             :  *           <td valign="top">zero or one instance of <code>a</code><br>
     241             :  *           </td>
     242             :  *         </tr>
     243             :  *         <tr>
     244             :  *           <td nowrap valign="top"><code>a*</code></td>
     245             :  *           <td valign="top"></td>
     246             :  *           <td valign="top">one or more instances of <code>a</code><br>
     247             :  *           </td>
     248             :  *         </tr>
     249             :  *         <tr>
     250             :  *           <td nowrap valign="top"><code>a | b</code></td>
     251             :  *           <td valign="top"></td>
     252             :  *           <td valign="top">either <code>a</code> or <code>b</code><br>
     253             :  *           </td>
     254             :  *         </tr>
     255             :  *         <tr>
     256             :  *           <td nowrap valign="top"><code>'a'</code></td>
     257             :  *           <td valign="top"></td>
     258             :  *           <td valign="top">the literal string between the quotes </td>
     259             :  *         </tr>
     260             :  *       </table>
     261             :  *       </td>
     262             :  *     </tr>
     263             :  *   </table>
     264             :  * \htmlonly</blockquote>\endhtmlonly
     265             :  * 
     266             :  * <p>Note:
     267             :  *  - Most UnicodeSet methods do not take a UErrorCode parameter because
     268             :  *   there are usually very few opportunities for failure other than a shortage
     269             :  *   of memory, error codes in low-level C++ string methods would be inconvenient,
     270             :  *   and the error code as the last parameter (ICU convention) would prevent
     271             :  *   the use of default parameter values.
     272             :  *   Instead, such methods set the UnicodeSet into a "bogus" state
     273             :  *   (see isBogus()) if an error occurs.
     274             :  *
     275             :  * @author Alan Liu
     276             :  * @stable ICU 2.0
     277             :  */
     278             : class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter {
     279             : 
     280             :     int32_t len; // length of list used; 0 <= len <= capacity
     281             :     int32_t capacity; // capacity of list
     282             :     UChar32* list; // MUST be terminated with HIGH
     283             :     BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
     284             :     UChar32* buffer; // internal buffer, may be NULL
     285             :     int32_t bufferCapacity; // capacity of buffer
     286             :     int32_t patLen;
     287             : 
     288             :     /**
     289             :      * The pattern representation of this set.  This may not be the
     290             :      * most economical pattern.  It is the pattern supplied to
     291             :      * applyPattern(), with variables substituted and whitespace
     292             :      * removed.  For sets constructed without applyPattern(), or
     293             :      * modified using the non-pattern API, this string will be empty,
     294             :      * indicating that toPattern() must generate a pattern
     295             :      * representation from the inversion list.
     296             :      */
     297             :     char16_t *pat;
     298             :     UVector* strings; // maintained in sorted order
     299             :     UnicodeSetStringSpan *stringSpan;
     300             : 
     301             : private:
     302             :     enum { // constants
     303             :         kIsBogus = 1       // This set is bogus (i.e. not valid)
     304             :     };
     305             :     uint8_t fFlags;         // Bit flag (see constants above)
     306             : public:
     307             :     /**
     308             :      * Determine if this object contains a valid set.
     309             :      * A bogus set has no value. It is different from an empty set.
     310             :      * It can be used to indicate that no set value is available.
     311             :      *
     312             :      * @return TRUE if the set is bogus/invalid, FALSE otherwise
     313             :      * @see setToBogus()
     314             :      * @stable ICU 4.0
     315             :      */
     316             :     inline UBool isBogus(void) const;
     317             : 
     318             :     /**
     319             :      * Make this UnicodeSet object invalid.
     320             :      * The string will test TRUE with isBogus().
     321             :      *
     322             :      * A bogus set has no value. It is different from an empty set.
     323             :      * It can be used to indicate that no set value is available.
     324             :      *
     325             :      * This utility function is used throughout the UnicodeSet
     326             :      * implementation to indicate that a UnicodeSet operation failed,
     327             :      * and may be used in other functions,
     328             :      * especially but not exclusively when such functions do not
     329             :      * take a UErrorCode for simplicity.
     330             :      *
     331             :      * @see isBogus()
     332             :      * @stable ICU 4.0
     333             :      */
     334             :     void setToBogus();
     335             : 
     336             : public:
     337             : 
     338             :     enum {
     339             :         /**
     340             :          * Minimum value that can be stored in a UnicodeSet.
     341             :          * @stable ICU 2.4
     342             :          */
     343             :         MIN_VALUE = 0,
     344             : 
     345             :         /**
     346             :          * Maximum value that can be stored in a UnicodeSet.
     347             :          * @stable ICU 2.4
     348             :          */
     349             :         MAX_VALUE = 0x10ffff
     350             :     };
     351             : 
     352             :     //----------------------------------------------------------------
     353             :     // Constructors &c
     354             :     //----------------------------------------------------------------
     355             : 
     356             : public:
     357             : 
     358             :     /**
     359             :      * Constructs an empty set.
     360             :      * @stable ICU 2.0
     361             :      */
     362             :     UnicodeSet();
     363             : 
     364             :     /**
     365             :      * Constructs a set containing the given range. If <code>end <
     366             :      * start</code> then an empty set is created.
     367             :      *
     368             :      * @param start first character, inclusive, of range
     369             :      * @param end last character, inclusive, of range
     370             :      * @stable ICU 2.4
     371             :      */
     372             :     UnicodeSet(UChar32 start, UChar32 end);
     373             : 
     374             : #ifndef U_HIDE_INTERNAL_API
     375             :     /**
     376             :      * @internal
     377             :      */
     378             :     enum ESerialization {
     379             :       kSerialized  /* result of serialize() */
     380             :     };
     381             : 
     382             :     /**
     383             :      * Constructs a set from the output of serialize().
     384             :      *
     385             :      * @param buffer the 16 bit array
     386             :      * @param bufferLen the original length returned from serialize()
     387             :      * @param serialization the value 'kSerialized'
     388             :      * @param status error code
     389             :      *
     390             :      * @internal
     391             :      */
     392             :     UnicodeSet(const uint16_t buffer[], int32_t bufferLen,
     393             :                ESerialization serialization, UErrorCode &status);
     394             : #endif  /* U_HIDE_INTERNAL_API */
     395             : 
     396             :     /**
     397             :      * Constructs a set from the given pattern.  See the class
     398             :      * description for the syntax of the pattern language.
     399             :      * @param pattern a string specifying what characters are in the set
     400             :      * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
     401             :      * contains a syntax error.
     402             :      * @stable ICU 2.0
     403             :      */
     404             :     UnicodeSet(const UnicodeString& pattern,
     405             :                UErrorCode& status);
     406             : 
     407             : #ifndef U_HIDE_INTERNAL_API
     408             :     /**
     409             :      * Constructs a set from the given pattern.  See the class
     410             :      * description for the syntax of the pattern language.
     411             :      * @param pattern a string specifying what characters are in the set
     412             :      * @param options bitmask for options to apply to the pattern.
     413             :      * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
     414             :      * @param symbols a symbol table mapping variable names to values
     415             :      * and stand-in characters to UnicodeSets; may be NULL
     416             :      * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
     417             :      * contains a syntax error.
     418             :      * @internal
     419             :      */
     420             :     UnicodeSet(const UnicodeString& pattern,
     421             :                uint32_t options,
     422             :                const SymbolTable* symbols,
     423             :                UErrorCode& status);
     424             : #endif  /* U_HIDE_INTERNAL_API */
     425             : 
     426             :     /**
     427             :      * Constructs a set from the given pattern.  See the class description
     428             :      * for the syntax of the pattern language.
     429             :      * @param pattern a string specifying what characters are in the set
     430             :      * @param pos on input, the position in pattern at which to start parsing.
     431             :      * On output, the position after the last character parsed.
     432             :      * @param options bitmask for options to apply to the pattern.
     433             :      * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
     434             :      * @param symbols a symbol table mapping variable names to values
     435             :      * and stand-in characters to UnicodeSets; may be NULL
     436             :      * @param status input-output error code
     437             :      * @stable ICU 2.8
     438             :      */
     439             :     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
     440             :                uint32_t options,
     441             :                const SymbolTable* symbols,
     442             :                UErrorCode& status);
     443             : 
     444             :     /**
     445             :      * Constructs a set that is identical to the given UnicodeSet.
     446             :      * @stable ICU 2.0
     447             :      */
     448             :     UnicodeSet(const UnicodeSet& o);
     449             : 
     450             :     /**
     451             :      * Destructs the set.
     452             :      * @stable ICU 2.0
     453             :      */
     454             :     virtual ~UnicodeSet();
     455             : 
     456             :     /**
     457             :      * Assigns this object to be a copy of another.
     458             :      * A frozen set will not be modified.
     459             :      * @stable ICU 2.0
     460             :      */
     461             :     UnicodeSet& operator=(const UnicodeSet& o);
     462             : 
     463             :     /**
     464             :      * Compares the specified object with this set for equality.  Returns
     465             :      * <tt>true</tt> if the two sets
     466             :      * have the same size, and every member of the specified set is
     467             :      * contained in this set (or equivalently, every member of this set is
     468             :      * contained in the specified set).
     469             :      *
     470             :      * @param o set to be compared for equality with this set.
     471             :      * @return <tt>true</tt> if the specified set is equal to this set.
     472             :      * @stable ICU 2.0
     473             :      */
     474             :     virtual UBool operator==(const UnicodeSet& o) const;
     475             : 
     476             :     /**
     477             :      * Compares the specified object with this set for equality.  Returns
     478             :      * <tt>true</tt> if the specified set is not equal to this set.
     479             :      * @stable ICU 2.0
     480             :      */
     481             :     UBool operator!=(const UnicodeSet& o) const;
     482             : 
     483             :     /**
     484             :      * Returns a copy of this object.  All UnicodeFunctor objects have
     485             :      * to support cloning in order to allow classes using
     486             :      * UnicodeFunctors, such as Transliterator, to implement cloning.
     487             :      * If this set is frozen, then the clone will be frozen as well.
     488             :      * Use cloneAsThawed() for a mutable clone of a frozen set.
     489             :      * @see cloneAsThawed
     490             :      * @stable ICU 2.0
     491             :      */
     492             :     virtual UnicodeFunctor* clone() const;
     493             : 
     494             :     /**
     495             :      * Returns the hash code value for this set.
     496             :      *
     497             :      * @return the hash code value for this set.
     498             :      * @see Object#hashCode()
     499             :      * @stable ICU 2.0
     500             :      */
     501             :     virtual int32_t hashCode(void) const;
     502             : 
     503             :     /**
     504             :      * Get a UnicodeSet pointer from a USet
     505             :      *
     506             :      * @param uset a USet (the ICU plain C type for UnicodeSet)
     507             :      * @return the corresponding UnicodeSet pointer.
     508             :      *
     509             :      * @stable ICU 4.2
     510             :      */
     511             :     inline static UnicodeSet *fromUSet(USet *uset);
     512             : 
     513             :     /**
     514             :      * Get a UnicodeSet pointer from a const USet
     515             :      *
     516             :      * @param uset a const USet (the ICU plain C type for UnicodeSet)
     517             :      * @return the corresponding UnicodeSet pointer.
     518             :      *
     519             :      * @stable ICU 4.2
     520             :      */
     521             :     inline static const UnicodeSet *fromUSet(const USet *uset);
     522             :     
     523             :     /**
     524             :      * Produce a USet * pointer for this UnicodeSet.
     525             :      * USet is the plain C type for UnicodeSet
     526             :      *
     527             :      * @return a USet pointer for this UnicodeSet
     528             :      * @stable ICU 4.2
     529             :      */
     530             :     inline USet *toUSet();
     531             : 
     532             : 
     533             :     /**
     534             :      * Produce a const USet * pointer for this UnicodeSet.
     535             :      * USet is the plain C type for UnicodeSet
     536             :      *
     537             :      * @return a const USet pointer for this UnicodeSet
     538             :      * @stable ICU 4.2
     539             :      */
     540             :     inline const USet * toUSet() const;
     541             : 
     542             : 
     543             :     //----------------------------------------------------------------
     544             :     // Freezable API
     545             :     //----------------------------------------------------------------
     546             : 
     547             :     /**
     548             :      * Determines whether the set has been frozen (made immutable) or not.
     549             :      * See the ICU4J Freezable interface for details.
     550             :      * @return TRUE/FALSE for whether the set has been frozen
     551             :      * @see freeze
     552             :      * @see cloneAsThawed
     553             :      * @stable ICU 3.8
     554             :      */
     555             :     inline UBool isFrozen() const;
     556             : 
     557             :     /**
     558             :      * Freeze the set (make it immutable).
     559             :      * Once frozen, it cannot be unfrozen and is therefore thread-safe
     560             :      * until it is deleted.
     561             :      * See the ICU4J Freezable interface for details.
     562             :      * Freezing the set may also make some operations faster, for example
     563             :      * contains() and span().
     564             :      * A frozen set will not be modified. (It remains frozen.)
     565             :      * @return this set.
     566             :      * @see isFrozen
     567             :      * @see cloneAsThawed
     568             :      * @stable ICU 3.8
     569             :      */
     570             :     UnicodeFunctor *freeze();
     571             : 
     572             :     /**
     573             :      * Clone the set and make the clone mutable.
     574             :      * See the ICU4J Freezable interface for details.
     575             :      * @return the mutable clone
     576             :      * @see freeze
     577             :      * @see isFrozen
     578             :      * @stable ICU 3.8
     579             :      */
     580             :     UnicodeFunctor *cloneAsThawed() const;
     581             : 
     582             :     //----------------------------------------------------------------
     583             :     // Public API
     584             :     //----------------------------------------------------------------
     585             : 
     586             :     /**
     587             :      * Make this object represent the range <code>start - end</code>.
     588             :      * If <code>end > start</code> then this object is set to an
     589             :      * an empty range.
     590             :      * A frozen set will not be modified.
     591             :      *
     592             :      * @param start first character in the set, inclusive
     593             :      * @param end last character in the set, inclusive
     594             :      * @stable ICU 2.4
     595             :      */
     596             :     UnicodeSet& set(UChar32 start, UChar32 end);
     597             : 
     598             :     /**
     599             :      * Return true if the given position, in the given pattern, appears
     600             :      * to be the start of a UnicodeSet pattern.
     601             :      * @stable ICU 2.4
     602             :      */
     603             :     static UBool resemblesPattern(const UnicodeString& pattern,
     604             :                                   int32_t pos);
     605             : 
     606             :     /**
     607             :      * Modifies this set to represent the set specified by the given
     608             :      * pattern, ignoring Unicode Pattern_White_Space characters.
     609             :      * See the class description for the syntax of the pattern language.
     610             :      * A frozen set will not be modified.
     611             :      * @param pattern a string specifying what characters are in the set
     612             :      * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
     613             :      * contains a syntax error.
     614             :      * <em> Empties the set passed before applying the pattern.</em>
     615             :      * @return a reference to this
     616             :      * @stable ICU 2.0
     617             :      */
     618             :     UnicodeSet& applyPattern(const UnicodeString& pattern,
     619             :                              UErrorCode& status);
     620             : 
     621             : #ifndef U_HIDE_INTERNAL_API
     622             :     /**
     623             :      * Modifies this set to represent the set specified by the given
     624             :      * pattern, optionally ignoring Unicode Pattern_White_Space characters.
     625             :      * See the class description for the syntax of the pattern language.
     626             :      * A frozen set will not be modified.
     627             :      * @param pattern a string specifying what characters are in the set
     628             :      * @param options bitmask for options to apply to the pattern.
     629             :      * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
     630             :      * @param symbols a symbol table mapping variable names to
     631             :      * values and stand-ins to UnicodeSets; may be NULL
     632             :      * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
     633             :      * contains a syntax error.
     634             :      *<em> Empties the set passed before applying the pattern.</em>
     635             :      * @return a reference to this
     636             :      * @internal
     637             :      */
     638             :     UnicodeSet& applyPattern(const UnicodeString& pattern,
     639             :                              uint32_t options,
     640             :                              const SymbolTable* symbols,
     641             :                              UErrorCode& status);
     642             : #endif  /* U_HIDE_INTERNAL_API */
     643             : 
     644             :     /**
     645             :      * Parses the given pattern, starting at the given position.  The
     646             :      * character at pattern.charAt(pos.getIndex()) must be '[', or the
     647             :      * parse fails.  Parsing continues until the corresponding closing
     648             :      * ']'.  If a syntax error is encountered between the opening and
     649             :      * closing brace, the parse fails.  Upon return from a successful
     650             :      * parse, the ParsePosition is updated to point to the character
     651             :      * following the closing ']', and a StringBuffer containing a
     652             :      * pairs list for the parsed pattern is returned.  This method calls
     653             :      * itself recursively to parse embedded subpatterns.
     654             :      *<em> Empties the set passed before applying the pattern.</em>
     655             :      * A frozen set will not be modified.
     656             :      *
     657             :      * @param pattern the string containing the pattern to be parsed.
     658             :      * The portion of the string from pos.getIndex(), which must be a
     659             :      * '[', to the corresponding closing ']', is parsed.
     660             :      * @param pos upon entry, the position at which to being parsing.
     661             :      * The character at pattern.charAt(pos.getIndex()) must be a '['.
     662             :      * Upon return from a successful parse, pos.getIndex() is either
     663             :      * the character after the closing ']' of the parsed pattern, or
     664             :      * pattern.length() if the closing ']' is the last character of
     665             :      * the pattern string.
     666             :      * @param options bitmask for options to apply to the pattern.
     667             :      * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
     668             :      * @param symbols a symbol table mapping variable names to
     669             :      * values and stand-ins to UnicodeSets; may be NULL
     670             :      * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
     671             :      * contains a syntax error.
     672             :      * @return a reference to this
     673             :      * @stable ICU 2.8
     674             :      */
     675             :     UnicodeSet& applyPattern(const UnicodeString& pattern,
     676             :                              ParsePosition& pos,
     677             :                              uint32_t options,
     678             :                              const SymbolTable* symbols,
     679             :                              UErrorCode& status);
     680             : 
     681             :     /**
     682             :      * Returns a string representation of this set.  If the result of
     683             :      * calling this function is passed to a UnicodeSet constructor, it
     684             :      * will produce another set that is equal to this one.
     685             :      * A frozen set will not be modified.
     686             :      * @param result the string to receive the rules.  Previous
     687             :      * contents will be deleted.
     688             :      * @param escapeUnprintable if TRUE then convert unprintable
     689             :      * character to their hex escape representations, \\uxxxx or
     690             :      * \\Uxxxxxxxx.  Unprintable characters are those other than
     691             :      * U+000A, U+0020..U+007E.
     692             :      * @stable ICU 2.0
     693             :      */
     694             :     virtual UnicodeString& toPattern(UnicodeString& result,
     695             :                              UBool escapeUnprintable = FALSE) const;
     696             : 
     697             :     /**
     698             :      * Modifies this set to contain those code points which have the given value
     699             :      * for the given binary or enumerated property, as returned by
     700             :      * u_getIntPropertyValue.  Prior contents of this set are lost.
     701             :      * A frozen set will not be modified.
     702             :      *
     703             :      * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
     704             :      * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
     705             :      * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
     706             :      *
     707             :      * @param value a value in the range u_getIntPropertyMinValue(prop)..
     708             :      * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
     709             :      * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
     710             :      * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
     711             :      * categories such as [:L:] to be represented.
     712             :      *
     713             :      * @param ec error code input/output parameter
     714             :      *
     715             :      * @return a reference to this set
     716             :      *
     717             :      * @stable ICU 2.4
     718             :      */
     719             :     UnicodeSet& applyIntPropertyValue(UProperty prop,
     720             :                                       int32_t value,
     721             :                                       UErrorCode& ec);
     722             : 
     723             :     /**
     724             :      * Modifies this set to contain those code points which have the
     725             :      * given value for the given property.  Prior contents of this
     726             :      * set are lost.
     727             :      * A frozen set will not be modified.
     728             :      *
     729             :      * @param prop a property alias, either short or long.  The name is matched
     730             :      * loosely.  See PropertyAliases.txt for names and a description of loose
     731             :      * matching.  If the value string is empty, then this string is interpreted
     732             :      * as either a General_Category value alias, a Script value alias, a binary
     733             :      * property alias, or a special ID.  Special IDs are matched loosely and
     734             :      * correspond to the following sets:
     735             :      *
     736             :      * "ANY" = [\\u0000-\\U0010FFFF],
     737             :      * "ASCII" = [\\u0000-\\u007F],
     738             :      * "Assigned" = [:^Cn:].
     739             :      *
     740             :      * @param value a value alias, either short or long.  The name is matched
     741             :      * loosely.  See PropertyValueAliases.txt for names and a description of
     742             :      * loose matching.  In addition to aliases listed, numeric values and
     743             :      * canonical combining classes may be expressed numerically, e.g., ("nv",
     744             :      * "0.5") or ("ccc", "220").  The value string may also be empty.
     745             :      *
     746             :      * @param ec error code input/output parameter
     747             :      *
     748             :      * @return a reference to this set
     749             :      *
     750             :      * @stable ICU 2.4
     751             :      */
     752             :     UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
     753             :                                    const UnicodeString& value,
     754             :                                    UErrorCode& ec);
     755             : 
     756             :     /**
     757             :      * Returns the number of elements in this set (its cardinality).
     758             :      * Note than the elements of a set may include both individual
     759             :      * codepoints and strings.
     760             :      *
     761             :      * @return the number of elements in this set (its cardinality).
     762             :      * @stable ICU 2.0
     763             :      */
     764             :     virtual int32_t size(void) const;
     765             : 
     766             :     /**
     767             :      * Returns <tt>true</tt> if this set contains no elements.
     768             :      *
     769             :      * @return <tt>true</tt> if this set contains no elements.
     770             :      * @stable ICU 2.0
     771             :      */
     772             :     virtual UBool isEmpty(void) const;
     773             : 
     774             :     /**
     775             :      * Returns true if this set contains the given character.
     776             :      * This function works faster with a frozen set.
     777             :      * @param c character to be checked for containment
     778             :      * @return true if the test condition is met
     779             :      * @stable ICU 2.0
     780             :      */
     781             :     virtual UBool contains(UChar32 c) const;
     782             : 
     783             :     /**
     784             :      * Returns true if this set contains every character
     785             :      * of the given range.
     786             :      * @param start first character, inclusive, of the range
     787             :      * @param end last character, inclusive, of the range
     788             :      * @return true if the test condition is met
     789             :      * @stable ICU 2.0
     790             :      */
     791             :     virtual UBool contains(UChar32 start, UChar32 end) const;
     792             : 
     793             :     /**
     794             :      * Returns <tt>true</tt> if this set contains the given
     795             :      * multicharacter string.
     796             :      * @param s string to be checked for containment
     797             :      * @return <tt>true</tt> if this set contains the specified string
     798             :      * @stable ICU 2.4
     799             :      */
     800             :     UBool contains(const UnicodeString& s) const;
     801             : 
     802             :     /**
     803             :      * Returns true if this set contains all the characters and strings
     804             :      * of the given set.
     805             :      * @param c set to be checked for containment
     806             :      * @return true if the test condition is met
     807             :      * @stable ICU 2.4
     808             :      */
     809             :     virtual UBool containsAll(const UnicodeSet& c) const;
     810             : 
     811             :     /**
     812             :      * Returns true if this set contains all the characters
     813             :      * of the given string.
     814             :      * @param s string containing characters to be checked for containment
     815             :      * @return true if the test condition is met
     816             :      * @stable ICU 2.4
     817             :      */
     818             :     UBool containsAll(const UnicodeString& s) const;
     819             : 
     820             :     /**
     821             :      * Returns true if this set contains none of the characters
     822             :      * of the given range.
     823             :      * @param start first character, inclusive, of the range
     824             :      * @param end last character, inclusive, of the range
     825             :      * @return true if the test condition is met
     826             :      * @stable ICU 2.4
     827             :      */
     828             :     UBool containsNone(UChar32 start, UChar32 end) const;
     829             : 
     830             :     /**
     831             :      * Returns true if this set contains none of the characters and strings
     832             :      * of the given set.
     833             :      * @param c set to be checked for containment
     834             :      * @return true if the test condition is met
     835             :      * @stable ICU 2.4
     836             :      */
     837             :     UBool containsNone(const UnicodeSet& c) const;
     838             : 
     839             :     /**
     840             :      * Returns true if this set contains none of the characters
     841             :      * of the given string.
     842             :      * @param s string containing characters to be checked for containment
     843             :      * @return true if the test condition is met
     844             :      * @stable ICU 2.4
     845             :      */
     846             :     UBool containsNone(const UnicodeString& s) const;
     847             : 
     848             :     /**
     849             :      * Returns true if this set contains one or more of the characters
     850             :      * in the given range.
     851             :      * @param start first character, inclusive, of the range
     852             :      * @param end last character, inclusive, of the range
     853             :      * @return true if the condition is met
     854             :      * @stable ICU 2.4
     855             :      */
     856             :     inline UBool containsSome(UChar32 start, UChar32 end) const;
     857             : 
     858             :     /**
     859             :      * Returns true if this set contains one or more of the characters
     860             :      * and strings of the given set.
     861             :      * @param s The set to be checked for containment
     862             :      * @return true if the condition is met
     863             :      * @stable ICU 2.4
     864             :      */
     865             :     inline UBool containsSome(const UnicodeSet& s) const;
     866             : 
     867             :     /**
     868             :      * Returns true if this set contains one or more of the characters
     869             :      * of the given string.
     870             :      * @param s string containing characters to be checked for containment
     871             :      * @return true if the condition is met
     872             :      * @stable ICU 2.4
     873             :      */
     874             :     inline UBool containsSome(const UnicodeString& s) const;
     875             : 
     876             :     /**
     877             :      * Returns the length of the initial substring of the input string which
     878             :      * consists only of characters and strings that are contained in this set
     879             :      * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
     880             :      * or only of characters and strings that are not contained
     881             :      * in this set (USET_SPAN_NOT_CONTAINED).
     882             :      * See USetSpanCondition for details.
     883             :      * Similar to the strspn() C library function.
     884             :      * Unpaired surrogates are treated according to contains() of their surrogate code points.
     885             :      * This function works faster with a frozen set and with a non-negative string length argument.
     886             :      * @param s start of the string
     887             :      * @param length of the string; can be -1 for NUL-terminated
     888             :      * @param spanCondition specifies the containment condition
     889             :      * @return the length of the initial substring according to the spanCondition;
     890             :      *         0 if the start of the string does not fit the spanCondition
     891             :      * @stable ICU 3.8
     892             :      * @see USetSpanCondition
     893             :      */
     894             :     int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
     895             : 
     896             :     /**
     897             :      * Returns the end of the substring of the input string according to the USetSpanCondition.
     898             :      * Same as <code>start+span(s.getBuffer()+start, s.length()-start, spanCondition)</code>
     899             :      * after pinning start to 0<=start<=s.length().
     900             :      * @param s the string
     901             :      * @param start the start index in the string for the span operation
     902             :      * @param spanCondition specifies the containment condition
     903             :      * @return the exclusive end of the substring according to the spanCondition;
     904             :      *         the substring s.tempSubStringBetween(start, end) fulfills the spanCondition
     905             :      * @stable ICU 4.4
     906             :      * @see USetSpanCondition
     907             :      */
     908             :     inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
     909             : 
     910             :     /**
     911             :      * Returns the start of the trailing substring of the input string which
     912             :      * consists only of characters and strings that are contained in this set
     913             :      * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
     914             :      * or only of characters and strings that are not contained
     915             :      * in this set (USET_SPAN_NOT_CONTAINED).
     916             :      * See USetSpanCondition for details.
     917             :      * Unpaired surrogates are treated according to contains() of their surrogate code points.
     918             :      * This function works faster with a frozen set and with a non-negative string length argument.
     919             :      * @param s start of the string
     920             :      * @param length of the string; can be -1 for NUL-terminated
     921             :      * @param spanCondition specifies the containment condition
     922             :      * @return the start of the trailing substring according to the spanCondition;
     923             :      *         the string length if the end of the string does not fit the spanCondition
     924             :      * @stable ICU 3.8
     925             :      * @see USetSpanCondition
     926             :      */
     927             :     int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
     928             : 
     929             :     /**
     930             :      * Returns the start of the substring of the input string according to the USetSpanCondition.
     931             :      * Same as <code>spanBack(s.getBuffer(), limit, spanCondition)</code>
     932             :      * after pinning limit to 0<=end<=s.length().
     933             :      * @param s the string
     934             :      * @param limit the exclusive-end index in the string for the span operation
     935             :      *              (use s.length() or INT32_MAX for spanning back from the end of the string)
     936             :      * @param spanCondition specifies the containment condition
     937             :      * @return the start of the substring according to the spanCondition;
     938             :      *         the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition
     939             :      * @stable ICU 4.4
     940             :      * @see USetSpanCondition
     941             :      */
     942             :     inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
     943             : 
     944             :     /**
     945             :      * Returns the length of the initial substring of the input string which
     946             :      * consists only of characters and strings that are contained in this set
     947             :      * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
     948             :      * or only of characters and strings that are not contained
     949             :      * in this set (USET_SPAN_NOT_CONTAINED).
     950             :      * See USetSpanCondition for details.
     951             :      * Similar to the strspn() C library function.
     952             :      * Malformed byte sequences are treated according to contains(0xfffd).
     953             :      * This function works faster with a frozen set and with a non-negative string length argument.
     954             :      * @param s start of the string (UTF-8)
     955             :      * @param length of the string; can be -1 for NUL-terminated
     956             :      * @param spanCondition specifies the containment condition
     957             :      * @return the length of the initial substring according to the spanCondition;
     958             :      *         0 if the start of the string does not fit the spanCondition
     959             :      * @stable ICU 3.8
     960             :      * @see USetSpanCondition
     961             :      */
     962             :     int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
     963             : 
     964             :     /**
     965             :      * Returns the start of the trailing substring of the input string which
     966             :      * consists only of characters and strings that are contained in this set
     967             :      * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
     968             :      * or only of characters and strings that are not contained
     969             :      * in this set (USET_SPAN_NOT_CONTAINED).
     970             :      * See USetSpanCondition for details.
     971             :      * Malformed byte sequences are treated according to contains(0xfffd).
     972             :      * This function works faster with a frozen set and with a non-negative string length argument.
     973             :      * @param s start of the string (UTF-8)
     974             :      * @param length of the string; can be -1 for NUL-terminated
     975             :      * @param spanCondition specifies the containment condition
     976             :      * @return the start of the trailing substring according to the spanCondition;
     977             :      *         the string length if the end of the string does not fit the spanCondition
     978             :      * @stable ICU 3.8
     979             :      * @see USetSpanCondition
     980             :      */
     981             :     int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
     982             : 
     983             :     /**
     984             :      * Implement UnicodeMatcher::matches()
     985             :      * @stable ICU 2.4
     986             :      */
     987             :     virtual UMatchDegree matches(const Replaceable& text,
     988             :                          int32_t& offset,
     989             :                          int32_t limit,
     990             :                          UBool incremental);
     991             : 
     992             : private:
     993             :     /**
     994             :      * Returns the longest match for s in text at the given position.
     995             :      * If limit > start then match forward from start+1 to limit
     996             :      * matching all characters except s.charAt(0).  If limit < start,
     997             :      * go backward starting from start-1 matching all characters
     998             :      * except s.charAt(s.length()-1).  This method assumes that the
     999             :      * first character, text.charAt(start), matches s, so it does not
    1000             :      * check it.
    1001             :      * @param text the text to match
    1002             :      * @param start the first character to match.  In the forward
    1003             :      * direction, text.charAt(start) is matched against s.charAt(0).
    1004             :      * In the reverse direction, it is matched against
    1005             :      * s.charAt(s.length()-1).
    1006             :      * @param limit the limit offset for matching, either last+1 in
    1007             :      * the forward direction, or last-1 in the reverse direction,
    1008             :      * where last is the index of the last character to match.
    1009             :      * @param s
    1010             :      * @return If part of s matches up to the limit, return |limit -
    1011             :      * start|.  If all of s matches before reaching the limit, return
    1012             :      * s.length().  If there is a mismatch between s and text, return
    1013             :      * 0
    1014             :      */
    1015             :     static int32_t matchRest(const Replaceable& text,
    1016             :                              int32_t start, int32_t limit,
    1017             :                              const UnicodeString& s);
    1018             : 
    1019             :     /**
    1020             :      * Returns the smallest value i such that c < list[i].  Caller
    1021             :      * must ensure that c is a legal value or this method will enter
    1022             :      * an infinite loop.  This method performs a binary search.
    1023             :      * @param c a character in the range MIN_VALUE..MAX_VALUE
    1024             :      * inclusive
    1025             :      * @return the smallest integer i in the range 0..len-1,
    1026             :      * inclusive, such that c < list[i]
    1027             :      */
    1028             :     int32_t findCodePoint(UChar32 c) const;
    1029             : 
    1030             : public:
    1031             : 
    1032             :     /**
    1033             :      * Implementation of UnicodeMatcher API.  Union the set of all
    1034             :      * characters that may be matched by this object into the given
    1035             :      * set.
    1036             :      * @param toUnionTo the set into which to union the source characters
    1037             :      * @stable ICU 2.4
    1038             :      */
    1039             :     virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
    1040             : 
    1041             :     /**
    1042             :      * Returns the index of the given character within this set, where
    1043             :      * the set is ordered by ascending code point.  If the character
    1044             :      * is not in this set, return -1.  The inverse of this method is
    1045             :      * <code>charAt()</code>.
    1046             :      * @return an index from 0..size()-1, or -1
    1047             :      * @stable ICU 2.4
    1048             :      */
    1049             :     int32_t indexOf(UChar32 c) const;
    1050             : 
    1051             :     /**
    1052             :      * Returns the character at the given index within this set, where
    1053             :      * the set is ordered by ascending code point.  If the index is
    1054             :      * out of range, return (UChar32)-1.  The inverse of this method is
    1055             :      * <code>indexOf()</code>.
    1056             :      * @param index an index from 0..size()-1
    1057             :      * @return the character at the given index, or (UChar32)-1.
    1058             :      * @stable ICU 2.4
    1059             :      */
    1060             :     UChar32 charAt(int32_t index) const;
    1061             : 
    1062             :     /**
    1063             :      * Adds the specified range to this set if it is not already
    1064             :      * present.  If this set already contains the specified range,
    1065             :      * the call leaves this set unchanged.  If <code>end > start</code>
    1066             :      * then an empty range is added, leaving the set unchanged.
    1067             :      * This is equivalent to a boolean logic OR, or a set UNION.
    1068             :      * A frozen set will not be modified.
    1069             :      *
    1070             :      * @param start first character, inclusive, of range to be added
    1071             :      * to this set.
    1072             :      * @param end last character, inclusive, of range to be added
    1073             :      * to this set.
    1074             :      * @stable ICU 2.0
    1075             :      */
    1076             :     virtual UnicodeSet& add(UChar32 start, UChar32 end);
    1077             : 
    1078             :     /**
    1079             :      * Adds the specified character to this set if it is not already
    1080             :      * present.  If this set already contains the specified character,
    1081             :      * the call leaves this set unchanged.
    1082             :      * A frozen set will not be modified.
    1083             :      * @stable ICU 2.0
    1084             :      */
    1085             :     UnicodeSet& add(UChar32 c);
    1086             : 
    1087             :     /**
    1088             :      * Adds the specified multicharacter to this set if it is not already
    1089             :      * present.  If this set already contains the multicharacter,
    1090             :      * the call leaves this set unchanged.
    1091             :      * Thus "ch" => {"ch"}
    1092             :      * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
    1093             :      * A frozen set will not be modified.
    1094             :      * @param s the source string
    1095             :      * @return this object, for chaining
    1096             :      * @stable ICU 2.4
    1097             :      */
    1098             :     UnicodeSet& add(const UnicodeString& s);
    1099             : 
    1100             :  private:
    1101             :     /**
    1102             :      * @return a code point IF the string consists of a single one.
    1103             :      * otherwise returns -1.
    1104             :      * @param s string to test
    1105             :      */
    1106             :     static int32_t getSingleCP(const UnicodeString& s);
    1107             : 
    1108             :     void _add(const UnicodeString& s);
    1109             : 
    1110             :  public:
    1111             :     /**
    1112             :      * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
    1113             :      * If this set already any particular character, it has no effect on that character.
    1114             :      * A frozen set will not be modified.
    1115             :      * @param s the source string
    1116             :      * @return this object, for chaining
    1117             :      * @stable ICU 2.4
    1118             :      */
    1119             :     UnicodeSet& addAll(const UnicodeString& s);
    1120             : 
    1121             :     /**
    1122             :      * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
    1123             :      * If this set already any particular character, it has no effect on that character.
    1124             :      * A frozen set will not be modified.
    1125             :      * @param s the source string
    1126             :      * @return this object, for chaining
    1127             :      * @stable ICU 2.4
    1128             :      */
    1129             :     UnicodeSet& retainAll(const UnicodeString& s);
    1130             : 
    1131             :     /**
    1132             :      * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
    1133             :      * If this set already any particular character, it has no effect on that character.
    1134             :      * A frozen set will not be modified.
    1135             :      * @param s the source string
    1136             :      * @return this object, for chaining
    1137             :      * @stable ICU 2.4
    1138             :      */
    1139             :     UnicodeSet& complementAll(const UnicodeString& s);
    1140             : 
    1141             :     /**
    1142             :      * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
    1143             :      * If this set already any particular character, it has no effect on that character.
    1144             :      * A frozen set will not be modified.
    1145             :      * @param s the source string
    1146             :      * @return this object, for chaining
    1147             :      * @stable ICU 2.4
    1148             :      */
    1149             :     UnicodeSet& removeAll(const UnicodeString& s);
    1150             : 
    1151             :     /**
    1152             :      * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
    1153             :      * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
    1154             :      * @param s the source string
    1155             :      * @return a newly created set containing the given string.
    1156             :      * The caller owns the return object and is responsible for deleting it.
    1157             :      * @stable ICU 2.4
    1158             :      */
    1159             :     static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
    1160             : 
    1161             : 
    1162             :     /**
    1163             :      * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
    1164             :      * @param s the source string
    1165             :      * @return a newly created set containing the given characters
    1166             :      * The caller owns the return object and is responsible for deleting it.
    1167             :      * @stable ICU 2.4
    1168             :      */
    1169             :     static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
    1170             : 
    1171             :     /**
    1172             :      * Retain only the elements in this set that are contained in the
    1173             :      * specified range.  If <code>end > start</code> then an empty range is
    1174             :      * retained, leaving the set empty.  This is equivalent to
    1175             :      * a boolean logic AND, or a set INTERSECTION.
    1176             :      * A frozen set will not be modified.
    1177             :      *
    1178             :      * @param start first character, inclusive, of range to be retained
    1179             :      * to this set.
    1180             :      * @param end last character, inclusive, of range to be retained
    1181             :      * to this set.
    1182             :      * @stable ICU 2.0
    1183             :      */
    1184             :     virtual UnicodeSet& retain(UChar32 start, UChar32 end);
    1185             : 
    1186             : 
    1187             :     /**
    1188             :      * Retain the specified character from this set if it is present.
    1189             :      * A frozen set will not be modified.
    1190             :      * @stable ICU 2.0
    1191             :      */
    1192             :     UnicodeSet& retain(UChar32 c);
    1193             : 
    1194             :     /**
    1195             :      * Removes the specified range from this set if it is present.
    1196             :      * The set will not contain the specified range once the call
    1197             :      * returns.  If <code>end > start</code> then an empty range is
    1198             :      * removed, leaving the set unchanged.
    1199             :      * A frozen set will not be modified.
    1200             :      *
    1201             :      * @param start first character, inclusive, of range to be removed
    1202             :      * from this set.
    1203             :      * @param end last character, inclusive, of range to be removed
    1204             :      * from this set.
    1205             :      * @stable ICU 2.0
    1206             :      */
    1207             :     virtual UnicodeSet& remove(UChar32 start, UChar32 end);
    1208             : 
    1209             :     /**
    1210             :      * Removes the specified character from this set if it is present.
    1211             :      * The set will not contain the specified range once the call
    1212             :      * returns.
    1213             :      * A frozen set will not be modified.
    1214             :      * @stable ICU 2.0
    1215             :      */
    1216             :     UnicodeSet& remove(UChar32 c);
    1217             : 
    1218             :     /**
    1219             :      * Removes the specified string from this set if it is present.
    1220             :      * The set will not contain the specified character once the call
    1221             :      * returns.
    1222             :      * A frozen set will not be modified.
    1223             :      * @param s the source string
    1224             :      * @return this object, for chaining
    1225             :      * @stable ICU 2.4
    1226             :      */
    1227             :     UnicodeSet& remove(const UnicodeString& s);
    1228             : 
    1229             :     /**
    1230             :      * Inverts this set.  This operation modifies this set so that
    1231             :      * its value is its complement.  This is equivalent to
    1232             :      * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
    1233             :      * A frozen set will not be modified.
    1234             :      * @stable ICU 2.0
    1235             :      */
    1236             :     virtual UnicodeSet& complement(void);
    1237             : 
    1238             :     /**
    1239             :      * Complements the specified range in this set.  Any character in
    1240             :      * the range will be removed if it is in this set, or will be
    1241             :      * added if it is not in this set.  If <code>end > start</code>
    1242             :      * then an empty range is complemented, leaving the set unchanged.
    1243             :      * This is equivalent to a boolean logic XOR.
    1244             :      * A frozen set will not be modified.
    1245             :      *
    1246             :      * @param start first character, inclusive, of range to be removed
    1247             :      * from this set.
    1248             :      * @param end last character, inclusive, of range to be removed
    1249             :      * from this set.
    1250             :      * @stable ICU 2.0
    1251             :      */
    1252             :     virtual UnicodeSet& complement(UChar32 start, UChar32 end);
    1253             : 
    1254             :     /**
    1255             :      * Complements the specified character in this set.  The character
    1256             :      * will be removed if it is in this set, or will be added if it is
    1257             :      * not in this set.
    1258             :      * A frozen set will not be modified.
    1259             :      * @stable ICU 2.0
    1260             :      */
    1261             :     UnicodeSet& complement(UChar32 c);
    1262             : 
    1263             :     /**
    1264             :      * Complement the specified string in this set.
    1265             :      * The set will not contain the specified string once the call
    1266             :      * returns.
    1267             :      * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
    1268             :      * A frozen set will not be modified.
    1269             :      * @param s the string to complement
    1270             :      * @return this object, for chaining
    1271             :      * @stable ICU 2.4
    1272             :      */
    1273             :     UnicodeSet& complement(const UnicodeString& s);
    1274             : 
    1275             :     /**
    1276             :      * Adds all of the elements in the specified set to this set if
    1277             :      * they're not already present.  This operation effectively
    1278             :      * modifies this set so that its value is the <i>union</i> of the two
    1279             :      * sets.  The behavior of this operation is unspecified if the specified
    1280             :      * collection is modified while the operation is in progress.
    1281             :      * A frozen set will not be modified.
    1282             :      *
    1283             :      * @param c set whose elements are to be added to this set.
    1284             :      * @see #add(UChar32, UChar32)
    1285             :      * @stable ICU 2.0
    1286             :      */
    1287             :     virtual UnicodeSet& addAll(const UnicodeSet& c);
    1288             : 
    1289             :     /**
    1290             :      * Retains only the elements in this set that are contained in the
    1291             :      * specified set.  In other words, removes from this set all of
    1292             :      * its elements that are not contained in the specified set.  This
    1293             :      * operation effectively modifies this set so that its value is
    1294             :      * the <i>intersection</i> of the two sets.
    1295             :      * A frozen set will not be modified.
    1296             :      *
    1297             :      * @param c set that defines which elements this set will retain.
    1298             :      * @stable ICU 2.0
    1299             :      */
    1300             :     virtual UnicodeSet& retainAll(const UnicodeSet& c);
    1301             : 
    1302             :     /**
    1303             :      * Removes from this set all of its elements that are contained in the
    1304             :      * specified set.  This operation effectively modifies this
    1305             :      * set so that its value is the <i>asymmetric set difference</i> of
    1306             :      * the two sets.
    1307             :      * A frozen set will not be modified.
    1308             :      *
    1309             :      * @param c set that defines which elements will be removed from
    1310             :      *          this set.
    1311             :      * @stable ICU 2.0
    1312             :      */
    1313             :     virtual UnicodeSet& removeAll(const UnicodeSet& c);
    1314             : 
    1315             :     /**
    1316             :      * Complements in this set all elements contained in the specified
    1317             :      * set.  Any character in the other set will be removed if it is
    1318             :      * in this set, or will be added if it is not in this set.
    1319             :      * A frozen set will not be modified.
    1320             :      *
    1321             :      * @param c set that defines which elements will be xor'ed from
    1322             :      *          this set.
    1323             :      * @stable ICU 2.4
    1324             :      */
    1325             :     virtual UnicodeSet& complementAll(const UnicodeSet& c);
    1326             : 
    1327             :     /**
    1328             :      * Removes all of the elements from this set.  This set will be
    1329             :      * empty after this call returns.
    1330             :      * A frozen set will not be modified.
    1331             :      * @stable ICU 2.0
    1332             :      */
    1333             :     virtual UnicodeSet& clear(void);
    1334             : 
    1335             :     /**
    1336             :      * Close this set over the given attribute.  For the attribute
    1337             :      * USET_CASE, the result is to modify this set so that:
    1338             :      *
    1339             :      * 1. For each character or string 'a' in this set, all strings or
    1340             :      * characters 'b' such that foldCase(a) == foldCase(b) are added
    1341             :      * to this set.
    1342             :      *
    1343             :      * 2. For each string 'e' in the resulting set, if e !=
    1344             :      * foldCase(e), 'e' will be removed.
    1345             :      *
    1346             :      * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
    1347             :      *
    1348             :      * (Here foldCase(x) refers to the operation u_strFoldCase, and a
    1349             :      * == b denotes that the contents are the same, not pointer
    1350             :      * comparison.)
    1351             :      *
    1352             :      * A frozen set will not be modified.
    1353             :      *
    1354             :      * @param attribute bitmask for attributes to close over.
    1355             :      * Currently only the USET_CASE bit is supported.  Any undefined bits
    1356             :      * are ignored.
    1357             :      * @return a reference to this set.
    1358             :      * @stable ICU 4.2
    1359             :      */
    1360             :     UnicodeSet& closeOver(int32_t attribute);
    1361             : 
    1362             :     /**
    1363             :      * Remove all strings from this set.
    1364             :      *
    1365             :      * @return a reference to this set.
    1366             :      * @stable ICU 4.2
    1367             :      */
    1368             :     virtual UnicodeSet &removeAllStrings();
    1369             : 
    1370             :     /**
    1371             :      * Iteration method that returns the number of ranges contained in
    1372             :      * this set.
    1373             :      * @see #getRangeStart
    1374             :      * @see #getRangeEnd
    1375             :      * @stable ICU 2.4
    1376             :      */
    1377             :     virtual int32_t getRangeCount(void) const;
    1378             : 
    1379             :     /**
    1380             :      * Iteration method that returns the first character in the
    1381             :      * specified range of this set.
    1382             :      * @see #getRangeCount
    1383             :      * @see #getRangeEnd
    1384             :      * @stable ICU 2.4
    1385             :      */
    1386             :     virtual UChar32 getRangeStart(int32_t index) const;
    1387             : 
    1388             :     /**
    1389             :      * Iteration method that returns the last character in the
    1390             :      * specified range of this set.
    1391             :      * @see #getRangeStart
    1392             :      * @see #getRangeEnd
    1393             :      * @stable ICU 2.4
    1394             :      */
    1395             :     virtual UChar32 getRangeEnd(int32_t index) const;
    1396             : 
    1397             :     /**
    1398             :      * Serializes this set into an array of 16-bit integers.  Serialization
    1399             :      * (currently) only records the characters in the set; multicharacter
    1400             :      * strings are ignored.
    1401             :      *
    1402             :      * The array has following format (each line is one 16-bit
    1403             :      * integer):
    1404             :      *
    1405             :      *  length     = (n+2*m) | (m!=0?0x8000:0)
    1406             :      *  bmpLength  = n; present if m!=0
    1407             :      *  bmp[0]
    1408             :      *  bmp[1]
    1409             :      *  ...
    1410             :      *  bmp[n-1]
    1411             :      *  supp-high[0]
    1412             :      *  supp-low[0]
    1413             :      *  supp-high[1]
    1414             :      *  supp-low[1]
    1415             :      *  ...
    1416             :      *  supp-high[m-1]
    1417             :      *  supp-low[m-1]
    1418             :      *
    1419             :      * The array starts with a header.  After the header are n bmp
    1420             :      * code points, then m supplementary code points.  Either n or m
    1421             :      * or both may be zero.  n+2*m is always <= 0x7FFF.
    1422             :      *
    1423             :      * If there are no supplementary characters (if m==0) then the
    1424             :      * header is one 16-bit integer, 'length', with value n.
    1425             :      *
    1426             :      * If there are supplementary characters (if m!=0) then the header
    1427             :      * is two 16-bit integers.  The first, 'length', has value
    1428             :      * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
    1429             :      *
    1430             :      * After the header the code points are stored in ascending order.
    1431             :      * Supplementary code points are stored as most significant 16
    1432             :      * bits followed by least significant 16 bits.
    1433             :      *
    1434             :      * @param dest pointer to buffer of destCapacity 16-bit integers.
    1435             :      * May be NULL only if destCapacity is zero.
    1436             :      * @param destCapacity size of dest, or zero.  Must not be negative.
    1437             :      * @param ec error code.  Will be set to U_INDEX_OUTOFBOUNDS_ERROR
    1438             :      * if n+2*m > 0x7FFF.  Will be set to U_BUFFER_OVERFLOW_ERROR if
    1439             :      * n+2*m+(m!=0?2:1) > destCapacity.
    1440             :      * @return the total length of the serialized format, including
    1441             :      * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
    1442             :      * than U_BUFFER_OVERFLOW_ERROR.
    1443             :      * @stable ICU 2.4
    1444             :      */
    1445             :     int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
    1446             : 
    1447             :     /**
    1448             :      * Reallocate this objects internal structures to take up the least
    1449             :      * possible space, without changing this object's value.
    1450             :      * A frozen set will not be modified.
    1451             :      * @stable ICU 2.4
    1452             :      */
    1453             :     virtual UnicodeSet& compact();
    1454             : 
    1455             :     /**
    1456             :      * Return the class ID for this class.  This is useful only for
    1457             :      * comparing to a return value from getDynamicClassID().  For example:
    1458             :      * <pre>
    1459             :      * .      Base* polymorphic_pointer = createPolymorphicObject();
    1460             :      * .      if (polymorphic_pointer->getDynamicClassID() ==
    1461             :      * .          Derived::getStaticClassID()) ...
    1462             :      * </pre>
    1463             :      * @return          The class ID for all objects of this class.
    1464             :      * @stable ICU 2.0
    1465             :      */
    1466             :     static UClassID U_EXPORT2 getStaticClassID(void);
    1467             : 
    1468             :     /**
    1469             :      * Implement UnicodeFunctor API.
    1470             :      *
    1471             :      * @return The class ID for this object. All objects of a given
    1472             :      * class have the same class ID.  Objects of other classes have
    1473             :      * different class IDs.
    1474             :      * @stable ICU 2.4
    1475             :      */
    1476             :     virtual UClassID getDynamicClassID(void) const;
    1477             : 
    1478             : private:
    1479             : 
    1480             :     // Private API for the USet API
    1481             : 
    1482             :     friend class USetAccess;
    1483             : 
    1484             :     int32_t getStringCount() const;
    1485             : 
    1486             :     const UnicodeString* getString(int32_t index) const;
    1487             : 
    1488             :     //----------------------------------------------------------------
    1489             :     // RuleBasedTransliterator support
    1490             :     //----------------------------------------------------------------
    1491             : 
    1492             : private:
    1493             : 
    1494             :     /**
    1495             :      * Returns <tt>true</tt> if this set contains any character whose low byte
    1496             :      * is the given value.  This is used by <tt>RuleBasedTransliterator</tt> for
    1497             :      * indexing.
    1498             :      */
    1499             :     virtual UBool matchesIndexValue(uint8_t v) const;
    1500             : 
    1501             : private:
    1502             :     friend class RBBIRuleScanner;
    1503             : 
    1504             :     //----------------------------------------------------------------
    1505             :     // Implementation: Clone as thawed (see ICU4J Freezable)
    1506             :     //----------------------------------------------------------------
    1507             : 
    1508             :     UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
    1509             : 
    1510             :     //----------------------------------------------------------------
    1511             :     // Implementation: Pattern parsing
    1512             :     //----------------------------------------------------------------
    1513             : 
    1514             :     void applyPatternIgnoreSpace(const UnicodeString& pattern,
    1515             :                                  ParsePosition& pos,
    1516             :                                  const SymbolTable* symbols,
    1517             :                                  UErrorCode& status);
    1518             : 
    1519             :     void applyPattern(RuleCharacterIterator& chars,
    1520             :                       const SymbolTable* symbols,
    1521             :                       UnicodeString& rebuiltPat,
    1522             :                       uint32_t options,
    1523             :                       UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
    1524             :                       UErrorCode& ec);
    1525             : 
    1526             :     //----------------------------------------------------------------
    1527             :     // Implementation: Utility methods
    1528             :     //----------------------------------------------------------------
    1529             : 
    1530             :     void ensureCapacity(int32_t newLen, UErrorCode& ec);
    1531             : 
    1532             :     void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
    1533             : 
    1534             :     void swapBuffers(void);
    1535             : 
    1536             :     UBool allocateStrings(UErrorCode &status);
    1537             : 
    1538             :     UnicodeString& _toPattern(UnicodeString& result,
    1539             :                               UBool escapeUnprintable) const;
    1540             : 
    1541             :     UnicodeString& _generatePattern(UnicodeString& result,
    1542             :                                     UBool escapeUnprintable) const;
    1543             : 
    1544             :     static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
    1545             : 
    1546             :     static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
    1547             : 
    1548             :     //----------------------------------------------------------------
    1549             :     // Implementation: Fundamental operators
    1550             :     //----------------------------------------------------------------
    1551             : 
    1552             :     void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
    1553             : 
    1554             :     void add(const UChar32* other, int32_t otherLen, int8_t polarity);
    1555             : 
    1556             :     void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
    1557             : 
    1558             :     /**
    1559             :      * Return true if the given position, in the given pattern, appears
    1560             :      * to be the start of a property set pattern [:foo:], \\p{foo}, or
    1561             :      * \\P{foo}, or \\N{name}.
    1562             :      */
    1563             :     static UBool resemblesPropertyPattern(const UnicodeString& pattern,
    1564             :                                           int32_t pos);
    1565             : 
    1566             :     static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
    1567             :                                           int32_t iterOpts);
    1568             : 
    1569             :     /**
    1570             :      * Parse the given property pattern at the given parse position
    1571             :      * and set this UnicodeSet to the result.
    1572             :      *
    1573             :      * The original design document is out of date, but still useful.
    1574             :      * Ignore the property and value names:
    1575             :      * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/unicodeset_properties.html
    1576             :      *
    1577             :      * Recognized syntax:
    1578             :      *
    1579             :      * [:foo:] [:^foo:] - white space not allowed within "[:" or ":]"
    1580             :      * \\p{foo} \\P{foo}  - white space not allowed within "\\p" or "\\P"
    1581             :      * \\N{name}         - white space not allowed within "\\N"
    1582             :      *
    1583             :      * Other than the above restrictions, Unicode Pattern_White_Space characters are ignored.
    1584             :      * Case is ignored except in "\\p" and "\\P" and "\\N".  In 'name' leading
    1585             :      * and trailing space is deleted, and internal runs of whitespace
    1586             :      * are collapsed to a single space.
    1587             :      *
    1588             :      * We support binary properties, enumerated properties, and the
    1589             :      * following non-enumerated properties:
    1590             :      *
    1591             :      *  Numeric_Value
    1592             :      *  Name
    1593             :      *  Unicode_1_Name
    1594             :      *
    1595             :      * @param pattern the pattern string
    1596             :      * @param ppos on entry, the position at which to begin parsing.
    1597             :      * This should be one of the locations marked '^':
    1598             :      *
    1599             :      *   [:blah:]     \\p{blah}     \\P{blah}     \\N{name}
    1600             :      *   ^       %    ^       %    ^       %    ^       %
    1601             :      *
    1602             :      * On return, the position after the last character parsed, that is,
    1603             :      * the locations marked '%'.  If the parse fails, ppos is returned
    1604             :      * unchanged.
    1605             :      * @param ec status
    1606             :      * @return a reference to this.
    1607             :      */
    1608             :     UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
    1609             :                                      ParsePosition& ppos,
    1610             :                                      UErrorCode &ec);
    1611             : 
    1612             :     void applyPropertyPattern(RuleCharacterIterator& chars,
    1613             :                               UnicodeString& rebuiltPat,
    1614             :                               UErrorCode& ec);
    1615             : 
    1616             :     friend void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status);
    1617             :     static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
    1618             : 
    1619             :     /**
    1620             :      * A filter that returns TRUE if the given code point should be
    1621             :      * included in the UnicodeSet being constructed.
    1622             :      */
    1623             :     typedef UBool (*Filter)(UChar32 codePoint, void* context);
    1624             : 
    1625             :     /**
    1626             :      * Given a filter, set this UnicodeSet to the code points
    1627             :      * contained by that filter.  The filter MUST be
    1628             :      * property-conformant.  That is, if it returns value v for one
    1629             :      * code point, then it must return v for all affiliated code
    1630             :      * points, as defined by the inclusions list.  See
    1631             :      * getInclusions().
    1632             :      * src is a UPropertySource value.
    1633             :      */
    1634             :     void applyFilter(Filter filter,
    1635             :                      void* context,
    1636             :                      int32_t src,
    1637             :                      UErrorCode &status);
    1638             : 
    1639             :     /**
    1640             :      * Set the new pattern to cache.
    1641             :      */
    1642             :     void setPattern(const UnicodeString& newPat);
    1643             :     /**
    1644             :      * Release existing cached pattern.
    1645             :      */
    1646             :     void releasePattern();
    1647             : 
    1648             :     friend class UnicodeSetIterator;
    1649             : };
    1650             : 
    1651             : 
    1652             : 
    1653           0 : inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
    1654           0 :     return !operator==(o);
    1655             : }
    1656             : 
    1657           0 : inline UBool UnicodeSet::isFrozen() const {
    1658           0 :     return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
    1659             : }
    1660             : 
    1661           0 : inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
    1662           0 :     return !containsNone(start, end);
    1663             : }
    1664             : 
    1665           0 : inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
    1666           0 :     return !containsNone(s);
    1667             : }
    1668             : 
    1669             : inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
    1670             :     return !containsNone(s);
    1671             : }
    1672             : 
    1673           0 : inline UBool UnicodeSet::isBogus() const {
    1674           0 :     return (UBool)(fFlags & kIsBogus);
    1675             : }
    1676             : 
    1677           0 : inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
    1678           0 :     return reinterpret_cast<UnicodeSet *>(uset);
    1679             : }
    1680             : 
    1681           0 : inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
    1682           0 :     return reinterpret_cast<const UnicodeSet *>(uset);
    1683             : }
    1684             : 
    1685           0 : inline USet *UnicodeSet::toUSet() {
    1686           0 :     return reinterpret_cast<USet *>(this);
    1687             : }
    1688             : 
    1689           0 : inline const USet *UnicodeSet::toUSet() const {
    1690           0 :     return reinterpret_cast<const USet *>(this);
    1691             : }
    1692             : 
    1693           0 : inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
    1694           0 :     int32_t sLength=s.length();
    1695           0 :     if(start<0) {
    1696           0 :         start=0;
    1697           0 :     } else if(start>sLength) {
    1698           0 :         start=sLength;
    1699             :     }
    1700           0 :     return start+span(s.getBuffer()+start, sLength-start, spanCondition);
    1701             : }
    1702             : 
    1703           0 : inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
    1704           0 :     int32_t sLength=s.length();
    1705           0 :     if(limit<0) {
    1706           0 :         limit=0;
    1707           0 :     } else if(limit>sLength) {
    1708           0 :         limit=sLength;
    1709             :     }
    1710           0 :     return spanBack(s.getBuffer(), limit, spanCondition);
    1711             : }
    1712             : 
    1713             : U_NAMESPACE_END
    1714             : 
    1715             : #endif

Generated by: LCOV version 1.13