Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2010-2012, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * file name: stringtriebuilder.cpp
9 : * encoding: UTF-8
10 : * tab size: 8 (not used)
11 : * indentation:4
12 : *
13 : * created on: 2010dec24
14 : * created by: Markus W. Scherer
15 : */
16 :
17 : #include "utypeinfo.h" // for 'typeid' to work
18 : #include "unicode/utypes.h"
19 : #include "unicode/stringtriebuilder.h"
20 : #include "uassert.h"
21 : #include "uhash.h"
22 :
23 : U_CDECL_BEGIN
24 :
25 : static int32_t U_CALLCONV
26 0 : hashStringTrieNode(const UHashTok key) {
27 0 : return icu::StringTrieBuilder::hashNode(key.pointer);
28 : }
29 :
30 : static UBool U_CALLCONV
31 0 : equalStringTrieNodes(const UHashTok key1, const UHashTok key2) {
32 0 : return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer);
33 : }
34 :
35 : U_CDECL_END
36 :
37 : U_NAMESPACE_BEGIN
38 :
39 0 : StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {}
40 :
41 0 : StringTrieBuilder::~StringTrieBuilder() {
42 0 : deleteCompactBuilder();
43 0 : }
44 :
45 : void
46 0 : StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
47 0 : if(U_FAILURE(errorCode)) {
48 0 : return;
49 : }
50 0 : nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL,
51 : sizeGuess, &errorCode);
52 0 : if(U_SUCCESS(errorCode)) {
53 0 : if(nodes==NULL) {
54 0 : errorCode=U_MEMORY_ALLOCATION_ERROR;
55 : } else {
56 0 : uhash_setKeyDeleter(nodes, uprv_deleteUObject);
57 : }
58 : }
59 : }
60 :
61 : void
62 0 : StringTrieBuilder::deleteCompactBuilder() {
63 0 : uhash_close(nodes);
64 0 : nodes=NULL;
65 0 : }
66 :
67 : void
68 0 : StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength,
69 : UErrorCode &errorCode) {
70 0 : if(buildOption==USTRINGTRIE_BUILD_FAST) {
71 0 : writeNode(0, elementsLength, 0);
72 : } else /* USTRINGTRIE_BUILD_SMALL */ {
73 0 : createCompactBuilder(2*elementsLength, errorCode);
74 0 : Node *root=makeNode(0, elementsLength, 0, errorCode);
75 0 : if(U_SUCCESS(errorCode)) {
76 0 : root->markRightEdgesFirst(-1);
77 0 : root->write(*this);
78 : }
79 0 : deleteCompactBuilder();
80 : }
81 0 : }
82 :
83 : // Requires start<limit,
84 : // and all strings of the [start..limit[ elements must be sorted and
85 : // have a common prefix of length unitIndex.
86 : int32_t
87 0 : StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
88 0 : UBool hasValue=FALSE;
89 0 : int32_t value=0;
90 : int32_t type;
91 0 : if(unitIndex==getElementStringLength(start)) {
92 : // An intermediate or final value.
93 0 : value=getElementValue(start++);
94 0 : if(start==limit) {
95 0 : return writeValueAndFinal(value, TRUE); // final-value node
96 : }
97 0 : hasValue=TRUE;
98 : }
99 : // Now all [start..limit[ strings are longer than unitIndex.
100 0 : int32_t minUnit=getElementUnit(start, unitIndex);
101 0 : int32_t maxUnit=getElementUnit(limit-1, unitIndex);
102 0 : if(minUnit==maxUnit) {
103 : // Linear-match node: All strings have the same character at unitIndex.
104 0 : int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
105 0 : writeNode(start, limit, lastUnitIndex);
106 : // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
107 0 : int32_t length=lastUnitIndex-unitIndex;
108 0 : int32_t maxLinearMatchLength=getMaxLinearMatchLength();
109 0 : while(length>maxLinearMatchLength) {
110 0 : lastUnitIndex-=maxLinearMatchLength;
111 0 : length-=maxLinearMatchLength;
112 0 : writeElementUnits(start, lastUnitIndex, maxLinearMatchLength);
113 0 : write(getMinLinearMatch()+maxLinearMatchLength-1);
114 : }
115 0 : writeElementUnits(start, unitIndex, length);
116 0 : type=getMinLinearMatch()+length-1;
117 : } else {
118 : // Branch node.
119 0 : int32_t length=countElementUnits(start, limit, unitIndex);
120 : // length>=2 because minUnit!=maxUnit.
121 0 : writeBranchSubNode(start, limit, unitIndex, length);
122 0 : if(--length<getMinLinearMatch()) {
123 0 : type=length;
124 : } else {
125 0 : write(length);
126 0 : type=0;
127 : }
128 : }
129 0 : return writeValueAndType(hasValue, value, type);
130 : }
131 :
132 : // start<limit && all strings longer than unitIndex &&
133 : // length different units at unitIndex
134 : int32_t
135 0 : StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
136 : UChar middleUnits[kMaxSplitBranchLevels];
137 : int32_t lessThan[kMaxSplitBranchLevels];
138 0 : int32_t ltLength=0;
139 0 : while(length>getMaxBranchLinearSubNodeLength()) {
140 : // Branch on the middle unit.
141 : // First, find the middle unit.
142 0 : int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
143 : // Encode the less-than branch first.
144 0 : middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
145 0 : lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2);
146 0 : ++ltLength;
147 : // Continue for the greater-or-equal branch.
148 0 : start=i;
149 0 : length=length-length/2;
150 : }
151 : // For each unit, find its elements array start and whether it has a final value.
152 : int32_t starts[kMaxBranchLinearSubNodeLength];
153 : UBool isFinal[kMaxBranchLinearSubNodeLength-1];
154 0 : int32_t unitNumber=0;
155 0 : do {
156 0 : int32_t i=starts[unitNumber]=start;
157 0 : UChar unit=getElementUnit(i++, unitIndex);
158 0 : i=indexOfElementWithNextUnit(i, unitIndex, unit);
159 0 : isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start);
160 0 : start=i;
161 0 : } while(++unitNumber<length-1);
162 : // unitNumber==length-1, and the maxUnit elements range is [start..limit[
163 0 : starts[unitNumber]=start;
164 :
165 : // Write the sub-nodes in reverse order: The jump lengths are deltas from
166 : // after their own positions, so if we wrote the minUnit sub-node first,
167 : // then its jump delta would be larger.
168 : // Instead we write the minUnit sub-node last, for a shorter delta.
169 : int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1];
170 0 : do {
171 0 : --unitNumber;
172 0 : if(!isFinal[unitNumber]) {
173 0 : jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1);
174 : }
175 0 : } while(unitNumber>0);
176 : // The maxUnit sub-node is written as the very last one because we do
177 : // not jump for it at all.
178 0 : unitNumber=length-1;
179 0 : writeNode(start, limit, unitIndex+1);
180 0 : int32_t offset=write(getElementUnit(start, unitIndex));
181 : // Write the rest of this node's unit-value pairs.
182 0 : while(--unitNumber>=0) {
183 0 : start=starts[unitNumber];
184 : int32_t value;
185 0 : if(isFinal[unitNumber]) {
186 : // Write the final value for the one string ending with this unit.
187 0 : value=getElementValue(start);
188 : } else {
189 : // Write the delta to the start position of the sub-node.
190 0 : value=offset-jumpTargets[unitNumber];
191 : }
192 0 : writeValueAndFinal(value, isFinal[unitNumber]);
193 0 : offset=write(getElementUnit(start, unitIndex));
194 : }
195 : // Write the split-branch nodes.
196 0 : while(ltLength>0) {
197 0 : --ltLength;
198 0 : writeDeltaTo(lessThan[ltLength]);
199 0 : offset=write(middleUnits[ltLength]);
200 : }
201 0 : return offset;
202 : }
203 :
204 : // Requires start<limit,
205 : // and all strings of the [start..limit[ elements must be sorted and
206 : // have a common prefix of length unitIndex.
207 : StringTrieBuilder::Node *
208 0 : StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
209 0 : if(U_FAILURE(errorCode)) {
210 0 : return NULL;
211 : }
212 0 : UBool hasValue=FALSE;
213 0 : int32_t value=0;
214 0 : if(unitIndex==getElementStringLength(start)) {
215 : // An intermediate or final value.
216 0 : value=getElementValue(start++);
217 0 : if(start==limit) {
218 0 : return registerFinalValue(value, errorCode);
219 : }
220 0 : hasValue=TRUE;
221 : }
222 : Node *node;
223 : // Now all [start..limit[ strings are longer than unitIndex.
224 0 : int32_t minUnit=getElementUnit(start, unitIndex);
225 0 : int32_t maxUnit=getElementUnit(limit-1, unitIndex);
226 0 : if(minUnit==maxUnit) {
227 : // Linear-match node: All strings have the same character at unitIndex.
228 0 : int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
229 0 : Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode);
230 : // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
231 0 : int32_t length=lastUnitIndex-unitIndex;
232 0 : int32_t maxLinearMatchLength=getMaxLinearMatchLength();
233 0 : while(length>maxLinearMatchLength) {
234 0 : lastUnitIndex-=maxLinearMatchLength;
235 0 : length-=maxLinearMatchLength;
236 0 : node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode);
237 0 : nextNode=registerNode(node, errorCode);
238 : }
239 0 : node=createLinearMatchNode(start, unitIndex, length, nextNode);
240 : } else {
241 : // Branch node.
242 0 : int32_t length=countElementUnits(start, limit, unitIndex);
243 : // length>=2 because minUnit!=maxUnit.
244 0 : Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode);
245 0 : node=new BranchHeadNode(length, subNode);
246 : }
247 0 : if(hasValue && node!=NULL) {
248 0 : if(matchNodesCanHaveValues()) {
249 0 : ((ValueNode *)node)->setValue(value);
250 : } else {
251 0 : node=new IntermediateValueNode(value, registerNode(node, errorCode));
252 : }
253 : }
254 0 : return registerNode(node, errorCode);
255 : }
256 :
257 : // start<limit && all strings longer than unitIndex &&
258 : // length different units at unitIndex
259 : StringTrieBuilder::Node *
260 0 : StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
261 : int32_t length, UErrorCode &errorCode) {
262 0 : if(U_FAILURE(errorCode)) {
263 0 : return NULL;
264 : }
265 : UChar middleUnits[kMaxSplitBranchLevels];
266 : Node *lessThan[kMaxSplitBranchLevels];
267 0 : int32_t ltLength=0;
268 0 : while(length>getMaxBranchLinearSubNodeLength()) {
269 : // Branch on the middle unit.
270 : // First, find the middle unit.
271 0 : int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2);
272 : // Create the less-than branch.
273 0 : middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
274 0 : lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode);
275 0 : ++ltLength;
276 : // Continue for the greater-or-equal branch.
277 0 : start=i;
278 0 : length=length-length/2;
279 : }
280 0 : if(U_FAILURE(errorCode)) {
281 0 : return NULL;
282 : }
283 0 : ListBranchNode *listNode=new ListBranchNode();
284 0 : if(listNode==NULL) {
285 0 : errorCode=U_MEMORY_ALLOCATION_ERROR;
286 0 : return NULL;
287 : }
288 : // For each unit, find its elements array start and whether it has a final value.
289 0 : int32_t unitNumber=0;
290 0 : do {
291 0 : int32_t i=start;
292 0 : UChar unit=getElementUnit(i++, unitIndex);
293 0 : i=indexOfElementWithNextUnit(i, unitIndex, unit);
294 0 : if(start==i-1 && unitIndex+1==getElementStringLength(start)) {
295 0 : listNode->add(unit, getElementValue(start));
296 : } else {
297 0 : listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode));
298 : }
299 0 : start=i;
300 0 : } while(++unitNumber<length-1);
301 : // unitNumber==length-1, and the maxUnit elements range is [start..limit[
302 0 : UChar unit=getElementUnit(start, unitIndex);
303 0 : if(start==limit-1 && unitIndex+1==getElementStringLength(start)) {
304 0 : listNode->add(unit, getElementValue(start));
305 : } else {
306 0 : listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode));
307 : }
308 0 : Node *node=registerNode(listNode, errorCode);
309 : // Create the split-branch nodes.
310 0 : while(ltLength>0) {
311 0 : --ltLength;
312 : node=registerNode(
313 0 : new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode);
314 : }
315 0 : return node;
316 : }
317 :
318 : StringTrieBuilder::Node *
319 0 : StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
320 0 : if(U_FAILURE(errorCode)) {
321 0 : delete newNode;
322 0 : return NULL;
323 : }
324 0 : if(newNode==NULL) {
325 0 : errorCode=U_MEMORY_ALLOCATION_ERROR;
326 0 : return NULL;
327 : }
328 0 : const UHashElement *old=uhash_find(nodes, newNode);
329 0 : if(old!=NULL) {
330 0 : delete newNode;
331 0 : return (Node *)old->key.pointer;
332 : }
333 : // If uhash_puti() returns a non-zero value from an equivalent, previously
334 : // registered node, then uhash_find() failed to find that and we will leak newNode.
335 : #if U_DEBUG
336 : int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
337 : #endif
338 0 : uhash_puti(nodes, newNode, 1, &errorCode);
339 0 : U_ASSERT(oldValue==0);
340 0 : if(U_FAILURE(errorCode)) {
341 0 : delete newNode;
342 0 : return NULL;
343 : }
344 0 : return newNode;
345 : }
346 :
347 : StringTrieBuilder::Node *
348 0 : StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
349 0 : if(U_FAILURE(errorCode)) {
350 0 : return NULL;
351 : }
352 0 : FinalValueNode key(value);
353 0 : const UHashElement *old=uhash_find(nodes, &key);
354 0 : if(old!=NULL) {
355 0 : return (Node *)old->key.pointer;
356 : }
357 0 : Node *newNode=new FinalValueNode(value);
358 0 : if(newNode==NULL) {
359 0 : errorCode=U_MEMORY_ALLOCATION_ERROR;
360 0 : return NULL;
361 : }
362 : // If uhash_puti() returns a non-zero value from an equivalent, previously
363 : // registered node, then uhash_find() failed to find that and we will leak newNode.
364 : #if U_DEBUG
365 : int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue.
366 : #endif
367 0 : uhash_puti(nodes, newNode, 1, &errorCode);
368 0 : U_ASSERT(oldValue==0);
369 0 : if(U_FAILURE(errorCode)) {
370 0 : delete newNode;
371 0 : return NULL;
372 : }
373 0 : return newNode;
374 : }
375 :
376 : UBool
377 0 : StringTrieBuilder::hashNode(const void *node) {
378 0 : return ((const Node *)node)->hashCode();
379 : }
380 :
381 : UBool
382 0 : StringTrieBuilder::equalNodes(const void *left, const void *right) {
383 0 : return *(const Node *)left==*(const Node *)right;
384 : }
385 :
386 : UBool
387 0 : StringTrieBuilder::Node::operator==(const Node &other) const {
388 0 : return this==&other || (typeid(*this)==typeid(other) && hash==other.hash);
389 : }
390 :
391 : int32_t
392 0 : StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
393 0 : if(offset==0) {
394 0 : offset=edgeNumber;
395 : }
396 0 : return edgeNumber;
397 : }
398 :
399 : UBool
400 0 : StringTrieBuilder::FinalValueNode::operator==(const Node &other) const {
401 0 : if(this==&other) {
402 0 : return TRUE;
403 : }
404 0 : if(!Node::operator==(other)) {
405 0 : return FALSE;
406 : }
407 0 : const FinalValueNode &o=(const FinalValueNode &)other;
408 0 : return value==o.value;
409 : }
410 :
411 : void
412 0 : StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) {
413 0 : offset=builder.writeValueAndFinal(value, TRUE);
414 0 : }
415 :
416 : UBool
417 0 : StringTrieBuilder::ValueNode::operator==(const Node &other) const {
418 0 : if(this==&other) {
419 0 : return TRUE;
420 : }
421 0 : if(!Node::operator==(other)) {
422 0 : return FALSE;
423 : }
424 0 : const ValueNode &o=(const ValueNode &)other;
425 0 : return hasValue==o.hasValue && (!hasValue || value==o.value);
426 : }
427 :
428 : UBool
429 0 : StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
430 0 : if(this==&other) {
431 0 : return TRUE;
432 : }
433 0 : if(!ValueNode::operator==(other)) {
434 0 : return FALSE;
435 : }
436 0 : const IntermediateValueNode &o=(const IntermediateValueNode &)other;
437 0 : return next==o.next;
438 : }
439 :
440 : int32_t
441 0 : StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) {
442 0 : if(offset==0) {
443 0 : offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
444 : }
445 0 : return edgeNumber;
446 : }
447 :
448 : void
449 0 : StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) {
450 0 : next->write(builder);
451 0 : offset=builder.writeValueAndFinal(value, FALSE);
452 0 : }
453 :
454 : UBool
455 0 : StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
456 0 : if(this==&other) {
457 0 : return TRUE;
458 : }
459 0 : if(!ValueNode::operator==(other)) {
460 0 : return FALSE;
461 : }
462 0 : const LinearMatchNode &o=(const LinearMatchNode &)other;
463 0 : return length==o.length && next==o.next;
464 : }
465 :
466 : int32_t
467 0 : StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
468 0 : if(offset==0) {
469 0 : offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
470 : }
471 0 : return edgeNumber;
472 : }
473 :
474 : UBool
475 0 : StringTrieBuilder::ListBranchNode::operator==(const Node &other) const {
476 0 : if(this==&other) {
477 0 : return TRUE;
478 : }
479 0 : if(!Node::operator==(other)) {
480 0 : return FALSE;
481 : }
482 0 : const ListBranchNode &o=(const ListBranchNode &)other;
483 0 : for(int32_t i=0; i<length; ++i) {
484 0 : if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) {
485 0 : return FALSE;
486 : }
487 : }
488 0 : return TRUE;
489 : }
490 :
491 : int32_t
492 0 : StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
493 0 : if(offset==0) {
494 0 : firstEdgeNumber=edgeNumber;
495 0 : int32_t step=0;
496 0 : int32_t i=length;
497 0 : do {
498 0 : Node *edge=equal[--i];
499 0 : if(edge!=NULL) {
500 0 : edgeNumber=edge->markRightEdgesFirst(edgeNumber-step);
501 : }
502 : // For all but the rightmost edge, decrement the edge number.
503 0 : step=1;
504 0 : } while(i>0);
505 0 : offset=edgeNumber;
506 : }
507 0 : return edgeNumber;
508 : }
509 :
510 : void
511 0 : StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) {
512 : // Write the sub-nodes in reverse order: The jump lengths are deltas from
513 : // after their own positions, so if we wrote the minUnit sub-node first,
514 : // then its jump delta would be larger.
515 : // Instead we write the minUnit sub-node last, for a shorter delta.
516 0 : int32_t unitNumber=length-1;
517 0 : Node *rightEdge=equal[unitNumber];
518 0 : int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset();
519 0 : do {
520 0 : --unitNumber;
521 0 : if(equal[unitNumber]!=NULL) {
522 0 : equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
523 : }
524 0 : } while(unitNumber>0);
525 : // The maxUnit sub-node is written as the very last one because we do
526 : // not jump for it at all.
527 0 : unitNumber=length-1;
528 0 : if(rightEdge==NULL) {
529 0 : builder.writeValueAndFinal(values[unitNumber], TRUE);
530 : } else {
531 0 : rightEdge->write(builder);
532 : }
533 0 : offset=builder.write(units[unitNumber]);
534 : // Write the rest of this node's unit-value pairs.
535 0 : while(--unitNumber>=0) {
536 : int32_t value;
537 : UBool isFinal;
538 0 : if(equal[unitNumber]==NULL) {
539 : // Write the final value for the one string ending with this unit.
540 0 : value=values[unitNumber];
541 0 : isFinal=TRUE;
542 : } else {
543 : // Write the delta to the start position of the sub-node.
544 0 : U_ASSERT(equal[unitNumber]->getOffset()>0);
545 0 : value=offset-equal[unitNumber]->getOffset();
546 0 : isFinal=FALSE;
547 : }
548 0 : builder.writeValueAndFinal(value, isFinal);
549 0 : offset=builder.write(units[unitNumber]);
550 : }
551 0 : }
552 :
553 : UBool
554 0 : StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
555 0 : if(this==&other) {
556 0 : return TRUE;
557 : }
558 0 : if(!Node::operator==(other)) {
559 0 : return FALSE;
560 : }
561 0 : const SplitBranchNode &o=(const SplitBranchNode &)other;
562 0 : return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual;
563 : }
564 :
565 : int32_t
566 0 : StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
567 0 : if(offset==0) {
568 0 : firstEdgeNumber=edgeNumber;
569 0 : edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber);
570 0 : offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1);
571 : }
572 0 : return edgeNumber;
573 : }
574 :
575 : void
576 0 : StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) {
577 : // Encode the less-than branch first.
578 0 : lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder);
579 : // Encode the greater-or-equal branch last because we do not jump for it at all.
580 0 : greaterOrEqual->write(builder);
581 : // Write this node.
582 0 : U_ASSERT(lessThan->getOffset()>0);
583 0 : builder.writeDeltaTo(lessThan->getOffset()); // less-than
584 0 : offset=builder.write(unit);
585 0 : }
586 :
587 : UBool
588 0 : StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
589 0 : if(this==&other) {
590 0 : return TRUE;
591 : }
592 0 : if(!ValueNode::operator==(other)) {
593 0 : return FALSE;
594 : }
595 0 : const BranchHeadNode &o=(const BranchHeadNode &)other;
596 0 : return length==o.length && next==o.next;
597 : }
598 :
599 : int32_t
600 0 : StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
601 0 : if(offset==0) {
602 0 : offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
603 : }
604 0 : return edgeNumber;
605 : }
606 :
607 : void
608 0 : StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) {
609 0 : next->write(builder);
610 0 : if(length<=builder.getMinLinearMatch()) {
611 0 : offset=builder.writeValueAndType(hasValue, value, length-1);
612 : } else {
613 0 : builder.write(length-1);
614 0 : offset=builder.writeValueAndType(hasValue, value, 0);
615 : }
616 0 : }
617 :
618 : U_NAMESPACE_END
|