Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 :
4 : // edits.h
5 : // created: 2016dec30 Markus W. Scherer
6 :
7 : #ifndef __EDITS_H__
8 : #define __EDITS_H__
9 :
10 : #include "unicode/utypes.h"
11 : #include "unicode/uobject.h"
12 :
13 : /**
14 : * \file
15 : * \brief C++ API: C++ class Edits for low-level string transformations on styled text.
16 : */
17 :
18 : U_NAMESPACE_BEGIN
19 :
20 : #ifndef U_HIDE_DRAFT_API
21 :
22 : /**
23 : * Records lengths of string edits but not replacement text.
24 : * Supports replacements, insertions, deletions in linear progression.
25 : * Does not support moving/reordering of text.
26 : *
27 : * An Edits object tracks a separate UErrorCode, but ICU string transformation functions
28 : * (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
29 : *
30 : * @draft ICU 59
31 : */
32 : class U_COMMON_API Edits U_FINAL : public UMemory {
33 : public:
34 : /**
35 : * Constructs an empty object.
36 : * @draft ICU 59
37 : */
38 0 : Edits() :
39 : array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
40 0 : errorCode(U_ZERO_ERROR) {}
41 : /**
42 : * Destructor.
43 : * @draft ICU 59
44 : */
45 : ~Edits();
46 :
47 : /**
48 : * Resets the data but may not release memory.
49 : * @draft ICU 59
50 : */
51 : void reset();
52 :
53 : /**
54 : * Adds a record for an unchanged segment of text.
55 : * Normally called from inside ICU string transformation functions, not user code.
56 : * @draft ICU 59
57 : */
58 : void addUnchanged(int32_t unchangedLength);
59 : /**
60 : * Adds a record for a text replacement/insertion/deletion.
61 : * Normally called from inside ICU string transformation functions, not user code.
62 : * @draft ICU 59
63 : */
64 : void addReplace(int32_t oldLength, int32_t newLength);
65 : /**
66 : * Sets the UErrorCode if an error occurred while recording edits.
67 : * Preserves older error codes in the outErrorCode.
68 : * Normally called from inside ICU string transformation functions, not user code.
69 : * @return TRUE if U_FAILURE(outErrorCode)
70 : * @draft ICU 59
71 : */
72 : UBool copyErrorTo(UErrorCode &outErrorCode);
73 :
74 : /**
75 : * How much longer is the new text compared with the old text?
76 : * @return new length minus old length
77 : * @draft ICU 59
78 : */
79 0 : int32_t lengthDelta() const { return delta; }
80 : /**
81 : * @return TRUE if there are any change edits
82 : * @draft ICU 59
83 : */
84 : UBool hasChanges() const;
85 :
86 : /**
87 : * Access to the list of edits.
88 : * @see getCoarseIterator
89 : * @see getFineIterator
90 : * @draft ICU 59
91 : */
92 : struct U_COMMON_API Iterator U_FINAL : public UMemory {
93 : /**
94 : * Copy constructor.
95 : * @draft ICU 59
96 : */
97 : Iterator(const Iterator &other) = default;
98 : /**
99 : * Assignment operator.
100 : * @draft ICU 59
101 : */
102 : Iterator &operator=(const Iterator &other) = default;
103 :
104 : /**
105 : * Advances to the next edit.
106 : * @return TRUE if there is another edit
107 : * @draft ICU 59
108 : */
109 0 : UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
110 :
111 : /**
112 : * Finds the edit that contains the source index.
113 : * The source index may be found in a non-change
114 : * even if normal iteration would skip non-changes.
115 : * Normal iteration can continue from a found edit.
116 : *
117 : * The iterator state before this search logically does not matter.
118 : * (It may affect the performance of the search.)
119 : *
120 : * The iterator state after this search is undefined
121 : * if the source index is out of bounds for the source string.
122 : *
123 : * @param i source index
124 : * @return TRUE if the edit for the source index was found
125 : * @draft ICU 59
126 : */
127 : UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
128 :
129 : /**
130 : * @return TRUE if this edit replaces oldLength() units with newLength() different ones.
131 : * FALSE if oldLength units remain unchanged.
132 : * @draft ICU 59
133 : */
134 : UBool hasChange() const { return changed; }
135 : /**
136 : * @return the number of units in the original string which are replaced or remain unchanged.
137 : * @draft ICU 59
138 : */
139 0 : int32_t oldLength() const { return oldLength_; }
140 : /**
141 : * @return the number of units in the modified string, if hasChange() is TRUE.
142 : * Same as oldLength if hasChange() is FALSE.
143 : * @draft ICU 59
144 : */
145 0 : int32_t newLength() const { return newLength_; }
146 :
147 : /**
148 : * @return the current index into the source string
149 : * @draft ICU 59
150 : */
151 : int32_t sourceIndex() const { return srcIndex; }
152 : /**
153 : * @return the current index into the replacement-characters-only string,
154 : * not counting unchanged spans
155 : * @draft ICU 59
156 : */
157 0 : int32_t replacementIndex() const { return replIndex; }
158 : /**
159 : * @return the current index into the full destination string
160 : * @draft ICU 59
161 : */
162 0 : int32_t destinationIndex() const { return destIndex; }
163 :
164 : private:
165 : friend class Edits;
166 :
167 : Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
168 :
169 : int32_t readLength(int32_t head);
170 : void updateIndexes();
171 : UBool noNext();
172 : UBool next(UBool onlyChanges, UErrorCode &errorCode);
173 :
174 : const uint16_t *array;
175 : int32_t index, length;
176 : int32_t remaining;
177 : UBool onlyChanges_, coarse;
178 :
179 : UBool changed;
180 : int32_t oldLength_, newLength_;
181 : int32_t srcIndex, replIndex, destIndex;
182 : };
183 :
184 : /**
185 : * Returns an Iterator for coarse-grained changes for simple string updates.
186 : * Skips non-changes.
187 : * @return an Iterator that merges adjacent changes.
188 : * @draft ICU 59
189 : */
190 0 : Iterator getCoarseChangesIterator() const {
191 0 : return Iterator(array, length, TRUE, TRUE);
192 : }
193 :
194 : /**
195 : * Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
196 : * @return an Iterator that merges adjacent changes.
197 : * @draft ICU 59
198 : */
199 : Iterator getCoarseIterator() const {
200 : return Iterator(array, length, FALSE, TRUE);
201 : }
202 :
203 : /**
204 : * Returns an Iterator for fine-grained changes for modifying styled text.
205 : * Skips non-changes.
206 : * @return an Iterator that separates adjacent changes.
207 : * @draft ICU 59
208 : */
209 : Iterator getFineChangesIterator() const {
210 : return Iterator(array, length, TRUE, FALSE);
211 : }
212 :
213 : /**
214 : * Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
215 : * @return an Iterator that separates adjacent changes.
216 : * @draft ICU 59
217 : */
218 : Iterator getFineIterator() const {
219 : return Iterator(array, length, FALSE, FALSE);
220 : }
221 :
222 : private:
223 : Edits(const Edits &) = delete;
224 : Edits &operator=(const Edits &) = delete;
225 :
226 0 : void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
227 0 : int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
228 :
229 : void append(int32_t r);
230 : UBool growArray();
231 :
232 : static const int32_t STACK_CAPACITY = 100;
233 : uint16_t *array;
234 : int32_t capacity;
235 : int32_t length;
236 : int32_t delta;
237 : UErrorCode errorCode;
238 : uint16_t stackArray[STACK_CAPACITY];
239 : };
240 :
241 : #endif // U_HIDE_DRAFT_API
242 :
243 : U_NAMESPACE_END
244 :
245 : #endif // __EDITS_H__
|