Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2012-2014, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * collationkeys.h
9 : *
10 : * created on: 2012sep02
11 : * created by: Markus W. Scherer
12 : */
13 :
14 : #ifndef __COLLATIONKEYS_H__
15 : #define __COLLATIONKEYS_H__
16 :
17 : #include "unicode/utypes.h"
18 :
19 : #if !UCONFIG_NO_COLLATION
20 :
21 : #include "unicode/bytestream.h"
22 : #include "unicode/ucol.h"
23 : #include "charstr.h"
24 : #include "collation.h"
25 :
26 : U_NAMESPACE_BEGIN
27 :
28 : class CollationIterator;
29 : struct CollationDataReader;
30 : struct CollationSettings;
31 :
32 : class SortKeyByteSink : public ByteSink {
33 : public:
34 0 : SortKeyByteSink(char *dest, int32_t destCapacity)
35 0 : : buffer_(dest), capacity_(destCapacity),
36 0 : appended_(0), ignore_(0) {}
37 : virtual ~SortKeyByteSink();
38 :
39 0 : void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; }
40 :
41 : virtual void Append(const char *bytes, int32_t n);
42 0 : void Append(uint32_t b) {
43 0 : if (ignore_ > 0) {
44 0 : --ignore_;
45 : } else {
46 0 : if (appended_ < capacity_ || Resize(1, appended_)) {
47 0 : buffer_[appended_] = (char)b;
48 : }
49 0 : ++appended_;
50 : }
51 0 : }
52 : virtual char *GetAppendBuffer(int32_t min_capacity,
53 : int32_t desired_capacity_hint,
54 : char *scratch, int32_t scratch_capacity,
55 : int32_t *result_capacity);
56 0 : int32_t NumberOfBytesAppended() const { return appended_; }
57 :
58 : /**
59 : * @return how many bytes can be appended (including ignored ones)
60 : * without reallocation
61 : */
62 0 : int32_t GetRemainingCapacity() const {
63 : // Either ignore_ or appended_ should be 0.
64 0 : return ignore_ + capacity_ - appended_;
65 : }
66 :
67 0 : UBool Overflowed() const { return appended_ > capacity_; }
68 : /** @return FALSE if memory allocation failed */
69 0 : UBool IsOk() const { return buffer_ != NULL; }
70 :
71 : protected:
72 : virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
73 : virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;
74 :
75 0 : void SetNotOk() {
76 0 : buffer_ = NULL;
77 0 : capacity_ = 0;
78 0 : }
79 :
80 : char *buffer_;
81 : int32_t capacity_;
82 : int32_t appended_;
83 : int32_t ignore_;
84 :
85 : private:
86 : SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
87 : SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
88 : };
89 :
90 : class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ {
91 : public:
92 0 : class LevelCallback : public UMemory {
93 : public:
94 : virtual ~LevelCallback();
95 : /**
96 : * @param level The next level about to be written to the ByteSink.
97 : * @return TRUE if the level is to be written
98 : * (the base class implementation always returns TRUE)
99 : */
100 : virtual UBool needToWrite(Collation::Level level);
101 : };
102 :
103 : /**
104 : * Writes the sort key bytes for minLevel up to the iterator data's strength.
105 : * Optionally writes the case level.
106 : * Stops writing levels when callback.needToWrite(level) returns FALSE.
107 : * Separates levels with the LEVEL_SEPARATOR_BYTE
108 : * but does not write a TERMINATOR_BYTE.
109 : */
110 : static void writeSortKeyUpToQuaternary(CollationIterator &iter,
111 : const UBool *compressibleBytes,
112 : const CollationSettings &settings,
113 : SortKeyByteSink &sink,
114 : Collation::Level minLevel, LevelCallback &callback,
115 : UBool preflight, UErrorCode &errorCode);
116 : private:
117 : friend struct CollationDataReader;
118 :
119 : CollationKeys(); // no instantiation
120 :
121 : // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
122 : static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE;
123 : static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
124 : static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40;
125 : static const int32_t SEC_COMMON_MAX_COUNT = 0x21;
126 :
127 : // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
128 : static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1;
129 : static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
130 : static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13;
131 : static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
132 :
133 : // Case level, upperFirst: Compress up to 13 common weights as 3..15.
134 : static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3;
135 : static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15;
136 : static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
137 :
138 : // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
139 : static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE;
140 : static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
141 : static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
142 : static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61;
143 :
144 : // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
145 : static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE;
146 : static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
147 : static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
148 : static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
149 :
150 : // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
151 : static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80;
152 : static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
153 : static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
154 : static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
155 :
156 : // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
157 : static const uint32_t QUAT_COMMON_LOW = 0x1c;
158 : static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
159 : static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
160 : static const int32_t QUAT_COMMON_MAX_COUNT = 0x71;
161 : // Primary weights shifted to quaternary level must be encoded with
162 : // a lead byte below the common-weight compression range.
163 : static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b
164 : };
165 :
166 : U_NAMESPACE_END
167 :
168 : #endif // !UCONFIG_NO_COLLATION
169 : #endif // __COLLATIONKEYS_H__
|