Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2010-2011, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * file name: ucharstrieiterator.h
9 : * encoding: UTF-8
10 : * tab size: 8 (not used)
11 : * indentation:4
12 : *
13 : * created on: 2010nov15
14 : * created by: Markus W. Scherer
15 : */
16 :
17 : #include "unicode/utypes.h"
18 : #include "unicode/ucharstrie.h"
19 : #include "unicode/unistr.h"
20 : #include "uvectr32.h"
21 :
22 : U_NAMESPACE_BEGIN
23 :
24 0 : UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength,
25 0 : UErrorCode &errorCode)
26 : : uchars_(trieUChars),
27 0 : pos_(uchars_), initialPos_(uchars_),
28 : remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
29 : skipValue_(FALSE),
30 0 : maxLength_(maxStringLength), value_(0), stack_(NULL) {
31 0 : if(U_FAILURE(errorCode)) {
32 0 : return;
33 : }
34 : // stack_ is a pointer so that it's easy to turn ucharstrie.h into
35 : // a public API header for which we would want it to depend only on
36 : // other public headers.
37 : // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
38 : // via the UnicodeString and UVector32 implementations, so this additional
39 : // cost is minimal.
40 0 : stack_=new UVector32(errorCode);
41 0 : if(stack_==NULL) {
42 0 : errorCode=U_MEMORY_ALLOCATION_ERROR;
43 : }
44 : }
45 :
46 0 : UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
47 0 : UErrorCode &errorCode)
48 0 : : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
49 0 : remainingMatchLength_(trie.remainingMatchLength_),
50 0 : initialRemainingMatchLength_(trie.remainingMatchLength_),
51 : skipValue_(FALSE),
52 0 : maxLength_(maxStringLength), value_(0), stack_(NULL) {
53 0 : if(U_FAILURE(errorCode)) {
54 0 : return;
55 : }
56 0 : stack_=new UVector32(errorCode);
57 0 : if(U_FAILURE(errorCode)) {
58 0 : return;
59 : }
60 0 : if(stack_==NULL) {
61 0 : errorCode=U_MEMORY_ALLOCATION_ERROR;
62 0 : return;
63 : }
64 0 : int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
65 0 : if(length>=0) {
66 : // Pending linear-match node, append remaining UChars to str_.
67 0 : ++length;
68 0 : if(maxLength_>0 && length>maxLength_) {
69 0 : length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
70 : }
71 0 : str_.append(pos_, length);
72 0 : pos_+=length;
73 0 : remainingMatchLength_-=length;
74 : }
75 : }
76 :
77 0 : UCharsTrie::Iterator::~Iterator() {
78 0 : delete stack_;
79 0 : }
80 :
81 : UCharsTrie::Iterator &
82 0 : UCharsTrie::Iterator::reset() {
83 0 : pos_=initialPos_;
84 0 : remainingMatchLength_=initialRemainingMatchLength_;
85 0 : skipValue_=FALSE;
86 0 : int32_t length=remainingMatchLength_+1; // Remaining match length.
87 0 : if(maxLength_>0 && length>maxLength_) {
88 0 : length=maxLength_;
89 : }
90 0 : str_.truncate(length);
91 0 : pos_+=length;
92 0 : remainingMatchLength_-=length;
93 0 : stack_->setSize(0);
94 0 : return *this;
95 : }
96 :
97 : UBool
98 0 : UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
99 :
100 : UBool
101 0 : UCharsTrie::Iterator::next(UErrorCode &errorCode) {
102 0 : if(U_FAILURE(errorCode)) {
103 0 : return FALSE;
104 : }
105 0 : const UChar *pos=pos_;
106 0 : if(pos==NULL) {
107 0 : if(stack_->isEmpty()) {
108 0 : return FALSE;
109 : }
110 : // Pop the state off the stack and continue with the next outbound edge of
111 : // the branch node.
112 0 : int32_t stackSize=stack_->size();
113 0 : int32_t length=stack_->elementAti(stackSize-1);
114 0 : pos=uchars_+stack_->elementAti(stackSize-2);
115 0 : stack_->setSize(stackSize-2);
116 0 : str_.truncate(length&0xffff);
117 0 : length=(int32_t)((uint32_t)length>>16);
118 0 : if(length>1) {
119 0 : pos=branchNext(pos, length, errorCode);
120 0 : if(pos==NULL) {
121 0 : return TRUE; // Reached a final value.
122 : }
123 : } else {
124 0 : str_.append(*pos++);
125 : }
126 : }
127 0 : if(remainingMatchLength_>=0) {
128 : // We only get here if we started in a pending linear-match node
129 : // with more than maxLength remaining units.
130 0 : return truncateAndStop();
131 : }
132 : for(;;) {
133 0 : int32_t node=*pos++;
134 0 : if(node>=kMinValueLead) {
135 0 : if(skipValue_) {
136 0 : pos=skipNodeValue(pos, node);
137 0 : node&=kNodeTypeMask;
138 0 : skipValue_=FALSE;
139 : } else {
140 : // Deliver value for the string so far.
141 0 : UBool isFinal=(UBool)(node>>15);
142 0 : if(isFinal) {
143 0 : value_=readValue(pos, node&0x7fff);
144 : } else {
145 0 : value_=readNodeValue(pos, node);
146 : }
147 0 : if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
148 0 : pos_=NULL;
149 : } else {
150 : // We cannot skip the value right here because it shares its
151 : // lead unit with a match node which we have to evaluate
152 : // next time.
153 : // Instead, keep pos_ on the node lead unit itself.
154 0 : pos_=pos-1;
155 0 : skipValue_=TRUE;
156 : }
157 0 : return TRUE;
158 : }
159 : }
160 0 : if(maxLength_>0 && str_.length()==maxLength_) {
161 0 : return truncateAndStop();
162 : }
163 0 : if(node<kMinLinearMatch) {
164 0 : if(node==0) {
165 0 : node=*pos++;
166 : }
167 0 : pos=branchNext(pos, node+1, errorCode);
168 0 : if(pos==NULL) {
169 0 : return TRUE; // Reached a final value.
170 : }
171 : } else {
172 : // Linear-match node, append length units to str_.
173 0 : int32_t length=node-kMinLinearMatch+1;
174 0 : if(maxLength_>0 && str_.length()+length>maxLength_) {
175 0 : str_.append(pos, maxLength_-str_.length());
176 0 : return truncateAndStop();
177 : }
178 0 : str_.append(pos, length);
179 0 : pos+=length;
180 : }
181 0 : }
182 : }
183 :
184 : // Branch node, needs to take the first outbound edge and push state for the rest.
185 : const UChar *
186 0 : UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
187 0 : while(length>kMaxBranchLinearSubNodeLength) {
188 0 : ++pos; // ignore the comparison unit
189 : // Push state for the greater-or-equal edge.
190 0 : stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
191 0 : stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
192 : // Follow the less-than edge.
193 0 : length>>=1;
194 0 : pos=jumpByDelta(pos);
195 : }
196 : // List of key-value pairs where values are either final values or jump deltas.
197 : // Read the first (key, value) pair.
198 0 : UChar trieUnit=*pos++;
199 0 : int32_t node=*pos++;
200 0 : UBool isFinal=(UBool)(node>>15);
201 0 : int32_t value=readValue(pos, node&=0x7fff);
202 0 : pos=skipValue(pos, node);
203 0 : stack_->addElement((int32_t)(pos-uchars_), errorCode);
204 0 : stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
205 0 : str_.append(trieUnit);
206 0 : if(isFinal) {
207 0 : pos_=NULL;
208 0 : value_=value;
209 0 : return NULL;
210 : } else {
211 0 : return pos+value;
212 : }
213 : }
214 :
215 : U_NAMESPACE_END
|