Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2012-2016, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * uitercollationiterator.h
9 : *
10 : * created on: 2012sep23 (from utf16collationiterator.h)
11 : * created by: Markus W. Scherer
12 : */
13 :
14 : #ifndef __UITERCOLLATIONITERATOR_H__
15 : #define __UITERCOLLATIONITERATOR_H__
16 :
17 : #include "unicode/utypes.h"
18 :
19 : #if !UCONFIG_NO_COLLATION
20 :
21 : #include "unicode/uiter.h"
22 : #include "cmemory.h"
23 : #include "collation.h"
24 : #include "collationdata.h"
25 : #include "collationiterator.h"
26 : #include "normalizer2impl.h"
27 :
28 : U_NAMESPACE_BEGIN
29 :
30 : /**
31 : * UCharIterator-based collation element and character iterator.
32 : * Handles normalized text inline, with length or NUL-terminated.
33 : * Unnormalized text is handled by a subclass.
34 : */
35 : class U_I18N_API UIterCollationIterator : public CollationIterator {
36 : public:
37 0 : UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
38 0 : : CollationIterator(d, numeric), iter(ui) {}
39 :
40 : virtual ~UIterCollationIterator();
41 :
42 : virtual void resetToOffset(int32_t newOffset);
43 :
44 : virtual int32_t getOffset() const;
45 :
46 : virtual UChar32 nextCodePoint(UErrorCode &errorCode);
47 :
48 : virtual UChar32 previousCodePoint(UErrorCode &errorCode);
49 :
50 : protected:
51 : virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
52 :
53 : virtual UChar handleGetTrailSurrogate();
54 :
55 : virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
56 :
57 : virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
58 :
59 : UCharIterator &iter;
60 : };
61 :
62 : /**
63 : * Incrementally checks the input text for FCD and normalizes where necessary.
64 : */
65 : class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
66 : public:
67 0 : FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
68 0 : : UIterCollationIterator(data, numeric, ui),
69 : state(ITER_CHECK_FWD), start(startIndex),
70 0 : nfcImpl(data->nfcImpl) {}
71 :
72 : virtual ~FCDUIterCollationIterator();
73 :
74 : virtual void resetToOffset(int32_t newOffset);
75 :
76 : virtual int32_t getOffset() const;
77 :
78 : virtual UChar32 nextCodePoint(UErrorCode &errorCode);
79 :
80 : virtual UChar32 previousCodePoint(UErrorCode &errorCode);
81 :
82 : protected:
83 : virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
84 :
85 : virtual UChar handleGetTrailSurrogate();
86 :
87 : virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
88 :
89 : virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
90 :
91 : private:
92 : /**
93 : * Switches to forward checking if possible.
94 : */
95 : void switchToForward();
96 :
97 : /**
98 : * Extends the FCD text segment forward or normalizes around pos.
99 : * @return TRUE if success
100 : */
101 : UBool nextSegment(UErrorCode &errorCode);
102 :
103 : /**
104 : * Switches to backward checking.
105 : */
106 : void switchToBackward();
107 :
108 : /**
109 : * Extends the FCD text segment backward or normalizes around pos.
110 : * @return TRUE if success
111 : */
112 : UBool previousSegment(UErrorCode &errorCode);
113 :
114 : UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
115 :
116 : enum State {
117 : /**
118 : * The input text [start..(iter index)[ passes the FCD check.
119 : * Moving forward checks incrementally.
120 : * pos & limit are undefined.
121 : */
122 : ITER_CHECK_FWD,
123 : /**
124 : * The input text [(iter index)..limit[ passes the FCD check.
125 : * Moving backward checks incrementally.
126 : * start & pos are undefined.
127 : */
128 : ITER_CHECK_BWD,
129 : /**
130 : * The input text [start..limit[ passes the FCD check.
131 : * pos tracks the current text index.
132 : */
133 : ITER_IN_FCD_SEGMENT,
134 : /**
135 : * The input text [start..limit[ failed the FCD check and was normalized.
136 : * pos tracks the current index in the normalized string.
137 : * The text iterator is at the limit index.
138 : */
139 : IN_NORM_ITER_AT_LIMIT,
140 : /**
141 : * The input text [start..limit[ failed the FCD check and was normalized.
142 : * pos tracks the current index in the normalized string.
143 : * The text iterator is at the start index.
144 : */
145 : IN_NORM_ITER_AT_START
146 : };
147 :
148 : State state;
149 :
150 : int32_t start;
151 : int32_t pos;
152 : int32_t limit;
153 :
154 : const Normalizer2Impl &nfcImpl;
155 : UnicodeString normalized;
156 : };
157 :
158 : U_NAMESPACE_END
159 :
160 : #endif // !UCONFIG_NO_COLLATION
161 : #endif // __UITERCOLLATIONITERATOR_H__
|