Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2010-2012, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * file name: bytestrieiterator.cpp
9 : * encoding: UTF-8
10 : * tab size: 8 (not used)
11 : * indentation:4
12 : *
13 : * created on: 2010nov03
14 : * created by: Markus W. Scherer
15 : */
16 :
17 : #include "unicode/utypes.h"
18 : #include "unicode/bytestrie.h"
19 : #include "unicode/stringpiece.h"
20 : #include "charstr.h"
21 : #include "uvectr32.h"
22 :
23 : U_NAMESPACE_BEGIN
24 :
25 0 : BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
26 0 : UErrorCode &errorCode)
27 : : bytes_(static_cast<const uint8_t *>(trieBytes)),
28 0 : pos_(bytes_), initialPos_(bytes_),
29 : remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
30 0 : str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
31 0 : if(U_FAILURE(errorCode)) {
32 0 : return;
33 : }
34 : // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
35 : // a public API header for which we would want it to depend only on
36 : // other public headers.
37 : // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
38 : // via the CharString and UVector32 implementations, so this additional
39 : // cost is minimal.
40 0 : str_=new CharString();
41 0 : stack_=new UVector32(errorCode);
42 0 : if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
43 0 : errorCode=U_MEMORY_ALLOCATION_ERROR;
44 : }
45 : }
46 :
47 0 : BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
48 0 : UErrorCode &errorCode)
49 0 : : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
50 0 : remainingMatchLength_(trie.remainingMatchLength_),
51 0 : initialRemainingMatchLength_(trie.remainingMatchLength_),
52 0 : str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
53 0 : if(U_FAILURE(errorCode)) {
54 0 : return;
55 : }
56 0 : str_=new CharString();
57 0 : stack_=new UVector32(errorCode);
58 0 : if(U_FAILURE(errorCode)) {
59 0 : return;
60 : }
61 0 : if(str_==NULL || stack_==NULL) {
62 0 : errorCode=U_MEMORY_ALLOCATION_ERROR;
63 0 : return;
64 : }
65 0 : int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
66 0 : if(length>=0) {
67 : // Pending linear-match node, append remaining bytes to str_.
68 0 : ++length;
69 0 : if(maxLength_>0 && length>maxLength_) {
70 0 : length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
71 : }
72 0 : str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
73 0 : pos_+=length;
74 0 : remainingMatchLength_-=length;
75 : }
76 : }
77 :
78 0 : BytesTrie::Iterator::~Iterator() {
79 0 : delete str_;
80 0 : delete stack_;
81 0 : }
82 :
83 : BytesTrie::Iterator &
84 0 : BytesTrie::Iterator::reset() {
85 0 : pos_=initialPos_;
86 0 : remainingMatchLength_=initialRemainingMatchLength_;
87 0 : int32_t length=remainingMatchLength_+1; // Remaining match length.
88 0 : if(maxLength_>0 && length>maxLength_) {
89 0 : length=maxLength_;
90 : }
91 0 : str_->truncate(length);
92 0 : pos_+=length;
93 0 : remainingMatchLength_-=length;
94 0 : stack_->setSize(0);
95 0 : return *this;
96 : }
97 :
98 : UBool
99 0 : BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
100 :
101 : UBool
102 0 : BytesTrie::Iterator::next(UErrorCode &errorCode) {
103 0 : if(U_FAILURE(errorCode)) {
104 0 : return FALSE;
105 : }
106 0 : const uint8_t *pos=pos_;
107 0 : if(pos==NULL) {
108 0 : if(stack_->isEmpty()) {
109 0 : return FALSE;
110 : }
111 : // Pop the state off the stack and continue with the next outbound edge of
112 : // the branch node.
113 0 : int32_t stackSize=stack_->size();
114 0 : int32_t length=stack_->elementAti(stackSize-1);
115 0 : pos=bytes_+stack_->elementAti(stackSize-2);
116 0 : stack_->setSize(stackSize-2);
117 0 : str_->truncate(length&0xffff);
118 0 : length=(int32_t)((uint32_t)length>>16);
119 0 : if(length>1) {
120 0 : pos=branchNext(pos, length, errorCode);
121 0 : if(pos==NULL) {
122 0 : return TRUE; // Reached a final value.
123 : }
124 : } else {
125 0 : str_->append((char)*pos++, errorCode);
126 : }
127 : }
128 0 : if(remainingMatchLength_>=0) {
129 : // We only get here if we started in a pending linear-match node
130 : // with more than maxLength remaining bytes.
131 0 : return truncateAndStop();
132 : }
133 : for(;;) {
134 0 : int32_t node=*pos++;
135 0 : if(node>=kMinValueLead) {
136 : // Deliver value for the byte sequence so far.
137 0 : UBool isFinal=(UBool)(node&kValueIsFinal);
138 0 : value_=readValue(pos, node>>1);
139 0 : if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
140 0 : pos_=NULL;
141 : } else {
142 0 : pos_=skipValue(pos, node);
143 : }
144 0 : return TRUE;
145 : }
146 0 : if(maxLength_>0 && str_->length()==maxLength_) {
147 0 : return truncateAndStop();
148 : }
149 0 : if(node<kMinLinearMatch) {
150 0 : if(node==0) {
151 0 : node=*pos++;
152 : }
153 0 : pos=branchNext(pos, node+1, errorCode);
154 0 : if(pos==NULL) {
155 0 : return TRUE; // Reached a final value.
156 : }
157 : } else {
158 : // Linear-match node, append length bytes to str_.
159 0 : int32_t length=node-kMinLinearMatch+1;
160 0 : if(maxLength_>0 && str_->length()+length>maxLength_) {
161 0 : str_->append(reinterpret_cast<const char *>(pos),
162 0 : maxLength_-str_->length(), errorCode);
163 0 : return truncateAndStop();
164 : }
165 0 : str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
166 0 : pos+=length;
167 : }
168 0 : }
169 : }
170 :
171 : StringPiece
172 0 : BytesTrie::Iterator::getString() const {
173 0 : return str_ == NULL ? StringPiece() : str_->toStringPiece();
174 : }
175 :
176 : UBool
177 0 : BytesTrie::Iterator::truncateAndStop() {
178 0 : pos_=NULL;
179 0 : value_=-1; // no real value for str
180 0 : return TRUE;
181 : }
182 :
183 : // Branch node, needs to take the first outbound edge and push state for the rest.
184 : const uint8_t *
185 0 : BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
186 0 : while(length>kMaxBranchLinearSubNodeLength) {
187 0 : ++pos; // ignore the comparison byte
188 : // Push state for the greater-or-equal edge.
189 0 : stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
190 0 : stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
191 : // Follow the less-than edge.
192 0 : length>>=1;
193 0 : pos=jumpByDelta(pos);
194 : }
195 : // List of key-value pairs where values are either final values or jump deltas.
196 : // Read the first (key, value) pair.
197 0 : uint8_t trieByte=*pos++;
198 0 : int32_t node=*pos++;
199 0 : UBool isFinal=(UBool)(node&kValueIsFinal);
200 0 : int32_t value=readValue(pos, node>>1);
201 0 : pos=skipValue(pos, node);
202 0 : stack_->addElement((int32_t)(pos-bytes_), errorCode);
203 0 : stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
204 0 : str_->append((char)trieByte, errorCode);
205 0 : if(isFinal) {
206 0 : pos_=NULL;
207 0 : value_=value;
208 0 : return NULL;
209 : } else {
210 0 : return pos+value;
211 : }
212 : }
213 :
214 : U_NAMESPACE_END
|