Line data Source code
1 : /*
2 : * Copyright © 2012 Google, Inc.
3 : *
4 : * This is part of HarfBuzz, a text shaping library.
5 : *
6 : * Permission is hereby granted, without written agreement and without
7 : * license or royalty fees, to use, copy, modify, and distribute this
8 : * software and its documentation for any purpose, provided that the
9 : * above copyright notice and the following two paragraphs appear in
10 : * all copies of this software.
11 : *
12 : * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 : * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 : * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 : * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 : * DAMAGE.
17 : *
18 : * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 : * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 : * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 : * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 : * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 : *
24 : * Google Author(s): Behdad Esfahbod
25 : */
26 :
27 : #ifndef HB_SET_PRIVATE_HH
28 : #define HB_SET_PRIVATE_HH
29 :
30 : #include "hb-private.hh"
31 : #include "hb-object-private.hh"
32 :
33 :
34 : /*
35 : * The set digests here implement various "filters" that support
36 : * "approximate member query". Conceptually these are like Bloom
37 : * Filter and Quotient Filter, however, much smaller, faster, and
38 : * designed to fit the requirements of our uses for glyph coverage
39 : * queries.
40 : *
41 : * Our filters are highly accurate if the lookup covers fairly local
42 : * set of glyphs, but fully flooded and ineffective if coverage is
43 : * all over the place.
44 : *
45 : * The frozen-set can be used instead of a digest, to trade more
46 : * memory for 100% accuracy, but in practice, that doesn't look like
47 : * an attractive trade-off.
48 : */
49 :
50 : template <typename mask_t, unsigned int shift>
51 : struct hb_set_digest_lowest_bits_t
52 : {
53 : ASSERT_POD ();
54 :
55 : static const unsigned int mask_bytes = sizeof (mask_t);
56 : static const unsigned int mask_bits = sizeof (mask_t) * 8;
57 : static const unsigned int num_bits = 0
58 : + (mask_bytes >= 1 ? 3 : 0)
59 : + (mask_bytes >= 2 ? 1 : 0)
60 : + (mask_bytes >= 4 ? 1 : 0)
61 : + (mask_bytes >= 8 ? 1 : 0)
62 : + (mask_bytes >= 16? 1 : 0)
63 : + 0;
64 :
65 : ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8);
66 : ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8);
67 :
68 306 : inline void init (void) {
69 306 : mask = 0;
70 306 : }
71 :
72 4620 : inline void add (hb_codepoint_t g) {
73 4620 : mask |= mask_for (g);
74 4620 : }
75 :
76 1284 : inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
77 1284 : if ((b >> shift) - (a >> shift) >= mask_bits - 1)
78 2 : mask = (mask_t) -1;
79 : else {
80 1282 : mask_t ma = mask_for (a);
81 1282 : mask_t mb = mask_for (b);
82 1282 : mask |= mb + (mb - ma) - (mb < ma);
83 : }
84 1284 : }
85 :
86 6753 : inline bool may_have (hb_codepoint_t g) const {
87 6753 : return !!(mask & mask_for (g));
88 : }
89 :
90 : private:
91 :
92 13937 : static inline mask_t mask_for (hb_codepoint_t g) {
93 13937 : return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
94 : }
95 : mask_t mask;
96 : };
97 :
98 : template <typename head_t, typename tail_t>
99 : struct hb_set_digest_combiner_t
100 : {
101 : ASSERT_POD ();
102 :
103 204 : inline void init (void) {
104 204 : head.init ();
105 204 : tail.init ();
106 204 : }
107 :
108 3080 : inline void add (hb_codepoint_t g) {
109 3080 : head.add (g);
110 3080 : tail.add (g);
111 3080 : }
112 :
113 856 : inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
114 856 : head.add_range (a, b);
115 856 : tail.add_range (a, b);
116 856 : }
117 :
118 5500 : inline bool may_have (hb_codepoint_t g) const {
119 5500 : return head.may_have (g) && tail.may_have (g);
120 : }
121 :
122 : private:
123 : head_t head;
124 : tail_t tail;
125 : };
126 :
127 :
128 : /*
129 : * hb_set_digest_t
130 : *
131 : * This is a combination of digests that performs "best".
132 : * There is not much science to this: it's a result of intuition
133 : * and testing.
134 : */
135 : typedef hb_set_digest_combiner_t
136 : <
137 : hb_set_digest_lowest_bits_t<unsigned long, 4>,
138 : hb_set_digest_combiner_t
139 : <
140 : hb_set_digest_lowest_bits_t<unsigned long, 0>,
141 : hb_set_digest_lowest_bits_t<unsigned long, 9>
142 : >
143 : > hb_set_digest_t;
144 :
145 :
146 :
147 : /*
148 : * hb_set_t
149 : */
150 :
151 :
152 : /* TODO Make this faster and memmory efficient. */
153 :
154 : struct hb_set_t
155 : {
156 : friend struct hb_frozen_set_t;
157 :
158 : hb_object_header_t header;
159 : ASSERT_POD ();
160 : bool in_error;
161 :
162 96 : inline void init (void) {
163 96 : hb_object_init (this);
164 96 : clear ();
165 96 : }
166 126 : inline void fini (void) {
167 126 : }
168 136 : inline void clear (void) {
169 136 : if (unlikely (hb_object_is_inert (this)))
170 0 : return;
171 136 : in_error = false;
172 136 : memset (elts, 0, sizeof elts);
173 : }
174 0 : inline bool is_empty (void) const {
175 0 : for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++)
176 0 : if (elts[i])
177 0 : return false;
178 0 : return true;
179 : }
180 20547 : inline void add (hb_codepoint_t g)
181 : {
182 20547 : if (unlikely (in_error)) return;
183 16044 : if (unlikely (g == INVALID)) return;
184 16044 : if (unlikely (g > MAX_G)) return;
185 16044 : elt (g) |= mask (g);
186 : }
187 2006 : inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
188 : {
189 2006 : if (unlikely (in_error)) return;
190 : /* TODO Speedup */
191 6194 : for (unsigned int i = a; i < b + 1; i++)
192 4188 : add (i);
193 : }
194 0 : inline void del (hb_codepoint_t g)
195 : {
196 0 : if (unlikely (in_error)) return;
197 0 : if (unlikely (g > MAX_G)) return;
198 0 : elt (g) &= ~mask (g);
199 : }
200 0 : inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
201 : {
202 0 : if (unlikely (in_error)) return;
203 : /* TODO Speedup */
204 0 : for (unsigned int i = a; i < b + 1; i++)
205 0 : del (i);
206 : }
207 1179654 : inline bool has (hb_codepoint_t g) const
208 : {
209 1179654 : if (unlikely (g > MAX_G)) return false;
210 1179654 : return !!(elt (g) & mask (g));
211 : }
212 0 : inline bool intersects (hb_codepoint_t first,
213 : hb_codepoint_t last) const
214 : {
215 0 : if (unlikely (first > MAX_G)) return false;
216 0 : if (unlikely (last > MAX_G)) last = MAX_G;
217 0 : unsigned int end = last + 1;
218 0 : for (hb_codepoint_t i = first; i < end; i++)
219 0 : if (has (i))
220 0 : return true;
221 0 : return false;
222 : }
223 0 : inline bool is_equal (const hb_set_t *other) const
224 : {
225 0 : for (unsigned int i = 0; i < ELTS; i++)
226 0 : if (elts[i] != other->elts[i])
227 0 : return false;
228 0 : return true;
229 : }
230 0 : inline void set (const hb_set_t *other)
231 : {
232 0 : if (unlikely (in_error)) return;
233 0 : for (unsigned int i = 0; i < ELTS; i++)
234 0 : elts[i] = other->elts[i];
235 : }
236 0 : inline void union_ (const hb_set_t *other)
237 : {
238 0 : if (unlikely (in_error)) return;
239 0 : for (unsigned int i = 0; i < ELTS; i++)
240 0 : elts[i] |= other->elts[i];
241 : }
242 0 : inline void intersect (const hb_set_t *other)
243 : {
244 0 : if (unlikely (in_error)) return;
245 0 : for (unsigned int i = 0; i < ELTS; i++)
246 0 : elts[i] &= other->elts[i];
247 : }
248 0 : inline void subtract (const hb_set_t *other)
249 : {
250 0 : if (unlikely (in_error)) return;
251 0 : for (unsigned int i = 0; i < ELTS; i++)
252 0 : elts[i] &= ~other->elts[i];
253 : }
254 0 : inline void symmetric_difference (const hb_set_t *other)
255 : {
256 0 : if (unlikely (in_error)) return;
257 0 : for (unsigned int i = 0; i < ELTS; i++)
258 0 : elts[i] ^= other->elts[i];
259 : }
260 0 : inline void invert (void)
261 : {
262 0 : if (unlikely (in_error)) return;
263 0 : for (unsigned int i = 0; i < ELTS; i++)
264 0 : elts[i] = ~elts[i];
265 : }
266 116 : inline bool next (hb_codepoint_t *codepoint) const
267 : {
268 116 : if (unlikely (*codepoint == INVALID)) {
269 20 : hb_codepoint_t i = get_min ();
270 20 : if (i != INVALID) {
271 18 : *codepoint = i;
272 18 : return true;
273 : } else {
274 2 : *codepoint = INVALID;
275 2 : return false;
276 : }
277 : }
278 1179576 : for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++)
279 1179558 : if (has (i)) {
280 78 : *codepoint = i;
281 78 : return true;
282 : }
283 18 : *codepoint = INVALID;
284 18 : return false;
285 : }
286 0 : inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
287 : {
288 : hb_codepoint_t i;
289 :
290 0 : i = *last;
291 0 : if (!next (&i))
292 : {
293 0 : *last = *first = INVALID;
294 0 : return false;
295 : }
296 :
297 0 : *last = *first = i;
298 0 : while (next (&i) && i == *last + 1)
299 0 : (*last)++;
300 :
301 0 : return true;
302 : }
303 :
304 0 : inline unsigned int get_population (void) const
305 : {
306 0 : unsigned int count = 0;
307 0 : for (unsigned int i = 0; i < ELTS; i++)
308 0 : count += _hb_popcount32 (elts[i]);
309 0 : return count;
310 : }
311 20 : inline hb_codepoint_t get_min (void) const
312 : {
313 4116 : for (unsigned int i = 0; i < ELTS; i++)
314 4114 : if (elts[i])
315 90 : for (unsigned int j = 0; j < BITS; j++)
316 90 : if (elts[i] & (1u << j))
317 18 : return i * BITS + j;
318 2 : return INVALID;
319 : }
320 0 : inline hb_codepoint_t get_max (void) const
321 : {
322 0 : for (unsigned int i = ELTS; i; i--)
323 0 : if (elts[i - 1])
324 0 : for (unsigned int j = BITS; j; j--)
325 0 : if (elts[i - 1] & (1u << (j - 1)))
326 0 : return (i - 1) * BITS + (j - 1);
327 0 : return INVALID;
328 : }
329 :
330 : typedef uint32_t elt_t;
331 : static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */
332 : static const unsigned int SHIFT = 5;
333 : static const unsigned int BITS = (1 << SHIFT);
334 : static const unsigned int MASK = BITS - 1;
335 : static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS;
336 : static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
337 :
338 16044 : elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; }
339 1179654 : elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
340 1195698 : elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
341 :
342 : elt_t elts[ELTS]; /* XXX 8kb */
343 :
344 : ASSERT_STATIC (sizeof (elt_t) * 8 == BITS);
345 : ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G);
346 : };
347 :
348 : struct hb_frozen_set_t
349 : {
350 : static const unsigned int SHIFT = hb_set_t::SHIFT;
351 : static const unsigned int BITS = hb_set_t::BITS;
352 : static const unsigned int MASK = hb_set_t::MASK;
353 : typedef hb_set_t::elt_t elt_t;
354 :
355 : inline void init (const hb_set_t &set)
356 : {
357 : start = count = 0;
358 : elts = NULL;
359 :
360 : unsigned int max = set.get_max ();
361 : if (max == set.INVALID)
362 : return;
363 : unsigned int min = set.get_min ();
364 : const elt_t &min_elt = set.elt (min);
365 :
366 : start = min & ~MASK;
367 : count = max - start + 1;
368 : unsigned int num_elts = (count + BITS - 1) / BITS;
369 : unsigned int elts_size = num_elts * sizeof (elt_t);
370 : elts = (elt_t *) malloc (elts_size);
371 : if (unlikely (!elts))
372 : {
373 : start = count = 0;
374 : return;
375 : }
376 : memcpy (elts, &min_elt, elts_size);
377 : }
378 :
379 : inline void fini (void)
380 : {
381 : if (elts)
382 : free (elts);
383 : }
384 :
385 : inline bool has (hb_codepoint_t g) const
386 : {
387 : /* hb_codepoint_t is unsigned. */
388 : g -= start;
389 : if (unlikely (g > count)) return false;
390 : return !!(elt (g) & mask (g));
391 : }
392 :
393 : elt_t const &elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
394 : elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
395 :
396 : private:
397 : hb_codepoint_t start, count;
398 : elt_t *elts;
399 : };
400 :
401 :
402 : #endif /* HB_SET_PRIVATE_HH */
|