Line data Source code
1 : /*
2 : * Copyright © 2011,2012,2014 Google, Inc.
3 : *
4 : * This is part of HarfBuzz, a text shaping library.
5 : *
6 : * Permission is hereby granted, without written agreement and without
7 : * license or royalty fees, to use, copy, modify, and distribute this
8 : * software and its documentation for any purpose, provided that the
9 : * above copyright notice and the following two paragraphs appear in
10 : * all copies of this software.
11 : *
12 : * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 : * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 : * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 : * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 : * DAMAGE.
17 : *
18 : * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 : * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 : * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 : * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 : * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 : *
24 : * Google Author(s): Behdad Esfahbod
25 : */
26 :
27 : #ifndef HB_UTF_PRIVATE_HH
28 : #define HB_UTF_PRIVATE_HH
29 :
30 : #include "hb-private.hh"
31 :
32 :
33 : struct hb_utf8_t
34 : {
35 : typedef uint8_t codepoint_t;
36 :
37 : static inline const uint8_t *
38 0 : next (const uint8_t *text,
39 : const uint8_t *end,
40 : hb_codepoint_t *unicode,
41 : hb_codepoint_t replacement)
42 : {
43 : /* Written to only accept well-formed sequences.
44 : * Based on ideas from ICU's U8_NEXT.
45 : * Generates one "replacement" for each ill-formed byte. */
46 :
47 0 : hb_codepoint_t c = *text++;
48 :
49 0 : if (c > 0x7Fu)
50 : {
51 0 : if (hb_in_range (c, 0xC2u, 0xDFu)) /* Two-byte */
52 : {
53 : unsigned int t1;
54 0 : if (likely (text < end &&
55 : (t1 = text[0] - 0x80u) <= 0x3Fu))
56 : {
57 0 : c = ((c&0x1Fu)<<6) | t1;
58 0 : text++;
59 : }
60 : else
61 0 : goto error;
62 : }
63 0 : else if (hb_in_range (c, 0xE0u, 0xEFu)) /* Three-byte */
64 : {
65 : unsigned int t1, t2;
66 0 : if (likely (1 < end - text &&
67 : (t1 = text[0] - 0x80u) <= 0x3Fu &&
68 : (t2 = text[1] - 0x80u) <= 0x3Fu))
69 : {
70 0 : c = ((c&0xFu)<<12) | (t1<<6) | t2;
71 0 : if (unlikely (c < 0x0800u || hb_in_range (c, 0xD800u, 0xDFFFu)))
72 0 : goto error;
73 0 : text += 2;
74 : }
75 : else
76 0 : goto error;
77 : }
78 0 : else if (hb_in_range (c, 0xF0u, 0xF4u)) /* Four-byte */
79 : {
80 : unsigned int t1, t2, t3;
81 0 : if (likely (2 < end - text &&
82 : (t1 = text[0] - 0x80u) <= 0x3Fu &&
83 : (t2 = text[1] - 0x80u) <= 0x3Fu &&
84 : (t3 = text[2] - 0x80u) <= 0x3Fu))
85 : {
86 0 : c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3;
87 0 : if (unlikely (!hb_in_range (c, 0x10000u, 0x10FFFFu)))
88 0 : goto error;
89 0 : text += 3;
90 : }
91 : else
92 0 : goto error;
93 : }
94 : else
95 0 : goto error;
96 : }
97 :
98 0 : *unicode = c;
99 0 : return text;
100 :
101 : error:
102 0 : *unicode = replacement;
103 0 : return text;
104 : }
105 :
106 : static inline const uint8_t *
107 0 : prev (const uint8_t *text,
108 : const uint8_t *start,
109 : hb_codepoint_t *unicode,
110 : hb_codepoint_t replacement)
111 : {
112 0 : const uint8_t *end = text--;
113 0 : while (start < text && (*text & 0xc0) == 0x80 && end - text < 4)
114 0 : text--;
115 :
116 0 : if (likely (next (text, end, unicode, replacement) == end))
117 0 : return text;
118 :
119 0 : *unicode = replacement;
120 0 : return end - 1;
121 : }
122 :
123 : static inline unsigned int
124 0 : strlen (const uint8_t *text)
125 : {
126 0 : return ::strlen ((const char *) text);
127 : }
128 : };
129 :
130 :
131 : struct hb_utf16_t
132 : {
133 : typedef uint16_t codepoint_t;
134 :
135 : static inline const uint16_t *
136 265 : next (const uint16_t *text,
137 : const uint16_t *end,
138 : hb_codepoint_t *unicode,
139 : hb_codepoint_t replacement)
140 : {
141 265 : hb_codepoint_t c = *text++;
142 :
143 265 : if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu)))
144 : {
145 265 : *unicode = c;
146 265 : return text;
147 : }
148 :
149 0 : if (likely (c <= 0xDBFFu && text < end))
150 : {
151 : /* High-surrogate in c */
152 0 : hb_codepoint_t l = *text;
153 0 : if (likely (hb_in_range (l, 0xDC00u, 0xDFFFu)))
154 : {
155 : /* Low-surrogate in l */
156 0 : *unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u);
157 0 : text++;
158 0 : return text;
159 : }
160 : }
161 :
162 : /* Lonely / out-of-order surrogate. */
163 0 : *unicode = replacement;
164 0 : return text;
165 : }
166 :
167 : static inline const uint16_t *
168 0 : prev (const uint16_t *text,
169 : const uint16_t *start,
170 : hb_codepoint_t *unicode,
171 : hb_codepoint_t replacement)
172 : {
173 0 : hb_codepoint_t c = *--text;
174 :
175 0 : if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu)))
176 : {
177 0 : *unicode = c;
178 0 : return text;
179 : }
180 :
181 0 : if (likely (c >= 0xDC00u && start < text))
182 : {
183 : /* Low-surrogate in c */
184 0 : hb_codepoint_t h = text[-1];
185 0 : if (likely (hb_in_range (h, 0xD800u, 0xDBFFu)))
186 : {
187 : /* High-surrogate in h */
188 0 : *unicode = (h << 10) + c - ((0xD800u << 10) - 0x10000u + 0xDC00u);
189 0 : text--;
190 0 : return text;
191 : }
192 : }
193 :
194 : /* Lonely / out-of-order surrogate. */
195 0 : *unicode = replacement;
196 0 : return text;
197 : }
198 :
199 :
200 : static inline unsigned int
201 0 : strlen (const uint16_t *text)
202 : {
203 0 : unsigned int l = 0;
204 0 : while (*text++) l++;
205 0 : return l;
206 : }
207 : };
208 :
209 :
210 : template <bool validate=true>
211 : struct hb_utf32_t
212 : {
213 : typedef uint32_t codepoint_t;
214 :
215 : static inline const uint32_t *
216 0 : next (const uint32_t *text,
217 : const uint32_t *end HB_UNUSED,
218 : hb_codepoint_t *unicode,
219 : hb_codepoint_t replacement)
220 : {
221 0 : hb_codepoint_t c = *unicode = *text++;
222 0 : if (validate && unlikely (c >= 0xD800u && (c <= 0xDFFFu || c > 0x10FFFFu)))
223 0 : *unicode = replacement;
224 0 : return text;
225 : }
226 :
227 : static inline const uint32_t *
228 0 : prev (const uint32_t *text,
229 : const uint32_t *start HB_UNUSED,
230 : hb_codepoint_t *unicode,
231 : hb_codepoint_t replacement)
232 : {
233 0 : hb_codepoint_t c = *unicode = *--text;
234 0 : if (validate && unlikely (c >= 0xD800u && (c <= 0xDFFFu || c > 0x10FFFFu)))
235 0 : *unicode = replacement;
236 0 : return text;
237 : }
238 :
239 : static inline unsigned int
240 0 : strlen (const uint32_t *text)
241 : {
242 0 : unsigned int l = 0;
243 0 : while (*text++) l++;
244 0 : return l;
245 : }
246 : };
247 :
248 :
249 : struct hb_latin1_t
250 : {
251 : typedef uint8_t codepoint_t;
252 :
253 : static inline const uint8_t *
254 0 : next (const uint8_t *text,
255 : const uint8_t *end HB_UNUSED,
256 : hb_codepoint_t *unicode,
257 : hb_codepoint_t replacement HB_UNUSED)
258 : {
259 0 : *unicode = *text++;
260 0 : return text;
261 : }
262 :
263 : static inline const uint8_t *
264 0 : prev (const uint8_t *text,
265 : const uint8_t *start HB_UNUSED,
266 : hb_codepoint_t *unicode,
267 : hb_codepoint_t replacement)
268 : {
269 0 : *unicode = *--text;
270 0 : return text;
271 : }
272 :
273 : static inline unsigned int
274 0 : strlen (const uint8_t *text)
275 : {
276 0 : unsigned int l = 0;
277 0 : while (*text++) l++;
278 0 : return l;
279 : }
280 : };
281 :
282 : #endif /* HB_UTF_PRIVATE_HH */
|