Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2010-2014, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * utf16collationiterator.h
9 : *
10 : * created on: 2010oct27
11 : * created by: Markus W. Scherer
12 : */
13 :
14 : #ifndef __UTF16COLLATIONITERATOR_H__
15 : #define __UTF16COLLATIONITERATOR_H__
16 :
17 : #include "unicode/utypes.h"
18 :
19 : #if !UCONFIG_NO_COLLATION
20 :
21 : #include "cmemory.h"
22 : #include "collation.h"
23 : #include "collationdata.h"
24 : #include "collationiterator.h"
25 : #include "normalizer2impl.h"
26 :
27 : U_NAMESPACE_BEGIN
28 :
29 : /**
30 : * UTF-16 collation element and character iterator.
31 : * Handles normalized UTF-16 text inline, with length or NUL-terminated.
32 : * Unnormalized text is handled by a subclass.
33 : */
34 : class U_I18N_API UTF16CollationIterator : public CollationIterator {
35 : public:
36 0 : UTF16CollationIterator(const CollationData *d, UBool numeric,
37 : const UChar *s, const UChar *p, const UChar *lim)
38 0 : : CollationIterator(d, numeric),
39 0 : start(s), pos(p), limit(lim) {}
40 :
41 : UTF16CollationIterator(const UTF16CollationIterator &other, const UChar *newText);
42 :
43 : virtual ~UTF16CollationIterator();
44 :
45 : virtual UBool operator==(const CollationIterator &other) const;
46 :
47 : virtual void resetToOffset(int32_t newOffset);
48 :
49 : virtual int32_t getOffset() const;
50 :
51 0 : void setText(const UChar *s, const UChar *lim) {
52 0 : reset();
53 0 : start = pos = s;
54 0 : limit = lim;
55 0 : }
56 :
57 : virtual UChar32 nextCodePoint(UErrorCode &errorCode);
58 :
59 : virtual UChar32 previousCodePoint(UErrorCode &errorCode);
60 :
61 : protected:
62 : // Copy constructor only for subclasses which set the pointers.
63 0 : UTF16CollationIterator(const UTF16CollationIterator &other)
64 0 : : CollationIterator(other),
65 0 : start(NULL), pos(NULL), limit(NULL) {}
66 :
67 : virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
68 :
69 : virtual UChar handleGetTrailSurrogate();
70 :
71 : virtual UBool foundNULTerminator();
72 :
73 : virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
74 :
75 : virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
76 :
77 : // UTF-16 string pointers.
78 : // limit can be NULL for NUL-terminated strings.
79 : const UChar *start, *pos, *limit;
80 : };
81 :
82 : /**
83 : * Incrementally checks the input text for FCD and normalizes where necessary.
84 : */
85 : class U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator {
86 : public:
87 0 : FCDUTF16CollationIterator(const CollationData *data, UBool numeric,
88 : const UChar *s, const UChar *p, const UChar *lim)
89 0 : : UTF16CollationIterator(data, numeric, s, p, lim),
90 : rawStart(s), segmentStart(p), segmentLimit(NULL), rawLimit(lim),
91 0 : nfcImpl(data->nfcImpl),
92 0 : checkDir(1) {}
93 :
94 : FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const UChar *newText);
95 :
96 : virtual ~FCDUTF16CollationIterator();
97 :
98 : virtual UBool operator==(const CollationIterator &other) const;
99 :
100 : virtual void resetToOffset(int32_t newOffset);
101 :
102 : virtual int32_t getOffset() const;
103 :
104 : virtual UChar32 nextCodePoint(UErrorCode &errorCode);
105 :
106 : virtual UChar32 previousCodePoint(UErrorCode &errorCode);
107 :
108 : protected:
109 : virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
110 :
111 : virtual UBool foundNULTerminator();
112 :
113 : virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
114 :
115 : virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
116 :
117 : private:
118 : /**
119 : * Switches to forward checking if possible.
120 : * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
121 : * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
122 : */
123 : void switchToForward();
124 :
125 : /**
126 : * Extend the FCD text segment forward or normalize around pos.
127 : * To be called when checkDir > 0 && pos != limit.
128 : * @return TRUE if success, checkDir == 0 and pos != limit
129 : */
130 : UBool nextSegment(UErrorCode &errorCode);
131 :
132 : /**
133 : * Switches to backward checking.
134 : * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
135 : * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
136 : */
137 : void switchToBackward();
138 :
139 : /**
140 : * Extend the FCD text segment backward or normalize around pos.
141 : * To be called when checkDir < 0 && pos != start.
142 : * @return TRUE if success, checkDir == 0 and pos != start
143 : */
144 : UBool previousSegment(UErrorCode &errorCode);
145 :
146 : UBool normalize(const UChar *from, const UChar *to, UErrorCode &errorCode);
147 :
148 : // Text pointers: The input text is [rawStart, rawLimit[
149 : // where rawLimit can be NULL for NUL-terminated text.
150 : //
151 : // checkDir > 0:
152 : //
153 : // The input text [segmentStart..pos[ passes the FCD check.
154 : // Moving forward checks incrementally.
155 : // segmentLimit is undefined. limit == rawLimit.
156 : //
157 : // checkDir < 0:
158 : // The input text [pos..segmentLimit[ passes the FCD check.
159 : // Moving backward checks incrementally.
160 : // segmentStart is undefined, start == rawStart.
161 : //
162 : // checkDir == 0:
163 : //
164 : // The input text [segmentStart..segmentLimit[ is being processed.
165 : // These pointers are at FCD boundaries.
166 : // Either this text segment already passes the FCD check
167 : // and segmentStart==start<=pos<=limit==segmentLimit,
168 : // or the current segment had to be normalized so that
169 : // [segmentStart..segmentLimit[ turned into the normalized string,
170 : // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length().
171 : const UChar *rawStart;
172 : const UChar *segmentStart;
173 : const UChar *segmentLimit;
174 : // rawLimit==NULL for a NUL-terminated string.
175 : const UChar *rawLimit;
176 :
177 : const Normalizer2Impl &nfcImpl;
178 : UnicodeString normalized;
179 : // Direction of incremental FCD check. See comments before rawStart.
180 : int8_t checkDir;
181 : };
182 :
183 : U_NAMESPACE_END
184 :
185 : #endif // !UCONFIG_NO_COLLATION
186 : #endif // __UTF16COLLATIONITERATOR_H__
|