Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2013-2014, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * collationsets.h
9 : *
10 : * created on: 2013feb09
11 : * created by: Markus W. Scherer
12 : */
13 :
14 : #ifndef __COLLATIONSETS_H__
15 : #define __COLLATIONSETS_H__
16 :
17 : #include "unicode/utypes.h"
18 :
19 : #if !UCONFIG_NO_COLLATION
20 :
21 : #include "unicode/uniset.h"
22 : #include "collation.h"
23 :
24 : U_NAMESPACE_BEGIN
25 :
26 : struct CollationData;
27 :
28 : /**
29 : * Finds the set of characters and strings that sort differently in the tailoring
30 : * from the base data.
31 : *
32 : * Every mapping in the tailoring needs to be compared to the base,
33 : * because some mappings are copied for optimization, and
34 : * all contractions for a character are copied if any contractions for that character
35 : * are added, modified or removed.
36 : *
37 : * It might be simpler to re-parse the rule string, but:
38 : * - That would require duplicating some of the from-rules builder code.
39 : * - That would make the runtime code depend on the builder.
40 : * - That would only work if we have the rule string, and we allow users to
41 : * omit the rule string from data files.
42 : */
43 0 : class TailoredSet : public UMemory {
44 : public:
45 0 : TailoredSet(UnicodeSet *t)
46 0 : : data(NULL), baseData(NULL),
47 : tailored(t),
48 : suffix(NULL),
49 0 : errorCode(U_ZERO_ERROR) {}
50 :
51 : void forData(const CollationData *d, UErrorCode &errorCode);
52 :
53 : /**
54 : * @return U_SUCCESS(errorCode) in C++, void in Java
55 : * @internal only public for access by callback
56 : */
57 : UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
58 :
59 : private:
60 : void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32);
61 : void comparePrefixes(UChar32 c, const UChar *p, const UChar *q);
62 : void compareContractions(UChar32 c, const UChar *p, const UChar *q);
63 :
64 : void addPrefixes(const CollationData *d, UChar32 c, const UChar *p);
65 : void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32);
66 : void addContractions(UChar32 c, const UChar *p);
67 : void addSuffix(UChar32 c, const UnicodeString &sfx);
68 : void add(UChar32 c);
69 :
70 : /** Prefixes are reversed in the data structure. */
71 0 : void setPrefix(const UnicodeString &pfx) {
72 0 : unreversedPrefix = pfx;
73 0 : unreversedPrefix.reverse();
74 0 : }
75 0 : void resetPrefix() {
76 0 : unreversedPrefix.remove();
77 0 : }
78 :
79 : const CollationData *data;
80 : const CollationData *baseData;
81 : UnicodeSet *tailored;
82 : UnicodeString unreversedPrefix;
83 : const UnicodeString *suffix;
84 : UErrorCode errorCode;
85 : };
86 :
87 0 : class ContractionsAndExpansions : public UMemory {
88 : public:
89 0 : class CESink : public UMemory {
90 : public:
91 : virtual ~CESink();
92 : virtual void handleCE(int64_t ce) = 0;
93 : virtual void handleExpansion(const int64_t ces[], int32_t length) = 0;
94 : };
95 :
96 0 : ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes)
97 0 : : data(NULL),
98 : contractions(con), expansions(exp),
99 : sink(s),
100 : addPrefixes(prefixes),
101 : checkTailored(0),
102 : suffix(NULL),
103 0 : errorCode(U_ZERO_ERROR) {}
104 :
105 : void forData(const CollationData *d, UErrorCode &errorCode);
106 : void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec);
107 :
108 : // all following: @internal, only public for access by callback
109 :
110 : void handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
111 :
112 : void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32);
113 : void handleContractions(UChar32 start, UChar32 end, uint32_t ce32);
114 :
115 : void addExpansions(UChar32 start, UChar32 end);
116 : void addStrings(UChar32 start, UChar32 end, UnicodeSet *set);
117 :
118 : /** Prefixes are reversed in the data structure. */
119 0 : void setPrefix(const UnicodeString &pfx) {
120 0 : unreversedPrefix = pfx;
121 0 : unreversedPrefix.reverse();
122 0 : }
123 0 : void resetPrefix() {
124 0 : unreversedPrefix.remove();
125 0 : }
126 :
127 : const CollationData *data;
128 : UnicodeSet *contractions;
129 : UnicodeSet *expansions;
130 : CESink *sink;
131 : UBool addPrefixes;
132 : int8_t checkTailored; // -1: collected tailored +1: exclude tailored
133 : UnicodeSet tailored;
134 : UnicodeSet ranges;
135 : UnicodeString unreversedPrefix;
136 : const UnicodeString *suffix;
137 : int64_t ces[Collation::MAX_EXPANSION_LENGTH];
138 : UErrorCode errorCode;
139 : };
140 :
141 : U_NAMESPACE_END
142 :
143 : #endif // !UCONFIG_NO_COLLATION
144 : #endif // __COLLATIONSETS_H__
|