Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : ******************************************************************************
5 : *
6 : * Copyright (C) 2000-2016, International Business Machines
7 : * Corporation and others. All Rights Reserved.
8 : *
9 : ******************************************************************************
10 : * file name: ushape.cpp
11 : * encoding: UTF-8
12 : * tab size: 8 (not used)
13 : * indentation:4
14 : *
15 : * created on: 2000jun29
16 : * created by: Markus W. Scherer
17 : *
18 : * Arabic letter shaping implemented by Ayman Roshdy
19 : */
20 :
21 : #include "unicode/utypes.h"
22 : #include "unicode/uchar.h"
23 : #include "unicode/ustring.h"
24 : #include "unicode/ushape.h"
25 : #include "cmemory.h"
26 : #include "putilimp.h"
27 : #include "ustr_imp.h"
28 : #include "ubidi_props.h"
29 : #include "uassert.h"
30 :
31 : /*
32 : * This implementation is designed for 16-bit Unicode strings.
33 : * The main assumption is that the Arabic characters and their
34 : * presentation forms each fit into a single UChar.
35 : * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII
36 : * characters.
37 : */
38 :
39 : /*
40 : * ### TODO in general for letter shaping:
41 : * - the letter shaping code is UTF-16-unaware; needs update
42 : * + especially invertBuffer()?!
43 : * - needs to handle the "Arabic Tail" that is used in some legacy codepages
44 : * as a glyph fragment of wide-glyph letters
45 : * + IBM Unicode conversion tables map it to U+200B (ZWSP)
46 : * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms
47 : * + Unicode 3.2 added U+FE73 ARABIC TAIL FRAGMENT
48 : */
49 :
50 : /* definitions for Arabic letter shaping ------------------------------------ */
51 :
52 : #define IRRELEVANT 4
53 : #define LAMTYPE 16
54 : #define ALEFTYPE 32
55 : #define LINKR 1
56 : #define LINKL 2
57 : #define APRESENT 8
58 : #define SHADDA 64
59 : #define CSHADDA 128
60 : #define COMBINE (SHADDA+CSHADDA)
61 :
62 : #define HAMZAFE_CHAR 0xfe80
63 : #define HAMZA06_CHAR 0x0621
64 : #define YEH_HAMZA_CHAR 0x0626
65 : #define YEH_HAMZAFE_CHAR 0xFE89
66 : #define LAMALEF_SPACE_SUB 0xFFFF
67 : #define TASHKEEL_SPACE_SUB 0xFFFE
68 : #define NEW_TAIL_CHAR 0xFE73
69 : #define OLD_TAIL_CHAR 0x200B
70 : #define LAM_CHAR 0x0644
71 : #define SPACE_CHAR 0x0020
72 : #define SHADDA_CHAR 0xFE7C
73 : #define TATWEEL_CHAR 0x0640
74 : #define SHADDA_TATWEEL_CHAR 0xFE7D
75 : #define SHADDA06_CHAR 0x0651
76 :
77 : #define SHAPE_MODE 0
78 : #define DESHAPE_MODE 1
79 :
80 : struct uShapeVariables {
81 : UChar tailChar;
82 : uint32_t uShapeLamalefBegin;
83 : uint32_t uShapeLamalefEnd;
84 : uint32_t uShapeTashkeelBegin;
85 : uint32_t uShapeTashkeelEnd;
86 : int spacesRelativeToTextBeginEnd;
87 : };
88 :
89 : static const uint8_t tailFamilyIsolatedFinal[] = {
90 : /* FEB1 */ 1,
91 : /* FEB2 */ 1,
92 : /* FEB3 */ 0,
93 : /* FEB4 */ 0,
94 : /* FEB5 */ 1,
95 : /* FEB6 */ 1,
96 : /* FEB7 */ 0,
97 : /* FEB8 */ 0,
98 : /* FEB9 */ 1,
99 : /* FEBA */ 1,
100 : /* FEBB */ 0,
101 : /* FEBC */ 0,
102 : /* FEBD */ 1,
103 : /* FEBE */ 1
104 : };
105 :
106 : static const uint8_t tashkeelMedial[] = {
107 : /* FE70 */ 0,
108 : /* FE71 */ 1,
109 : /* FE72 */ 0,
110 : /* FE73 */ 0,
111 : /* FE74 */ 0,
112 : /* FE75 */ 0,
113 : /* FE76 */ 0,
114 : /* FE77 */ 1,
115 : /* FE78 */ 0,
116 : /* FE79 */ 1,
117 : /* FE7A */ 0,
118 : /* FE7B */ 1,
119 : /* FE7C */ 0,
120 : /* FE7D */ 1,
121 : /* FE7E */ 0,
122 : /* FE7F */ 1
123 : };
124 :
125 : static const UChar yehHamzaToYeh[] =
126 : {
127 : /* isolated*/ 0xFEEF,
128 : /* final */ 0xFEF0
129 : };
130 :
131 : static const uint8_t IrrelevantPos[] = {
132 : 0x0, 0x2, 0x4, 0x6,
133 : 0x8, 0xA, 0xC, 0xE
134 : };
135 :
136 :
137 : static const UChar convertLamAlef[] =
138 : {
139 : /*FEF5*/ 0x0622,
140 : /*FEF6*/ 0x0622,
141 : /*FEF7*/ 0x0623,
142 : /*FEF8*/ 0x0623,
143 : /*FEF9*/ 0x0625,
144 : /*FEFA*/ 0x0625,
145 : /*FEFB*/ 0x0627,
146 : /*FEFC*/ 0x0627
147 : };
148 :
149 : static const UChar araLink[178]=
150 : {
151 : 1 + 32 + 256 * 0x11,/*0x0622*/
152 : 1 + 32 + 256 * 0x13,/*0x0623*/
153 : 1 + 256 * 0x15,/*0x0624*/
154 : 1 + 32 + 256 * 0x17,/*0x0625*/
155 : 1 + 2 + 256 * 0x19,/*0x0626*/
156 : 1 + 32 + 256 * 0x1D,/*0x0627*/
157 : 1 + 2 + 256 * 0x1F,/*0x0628*/
158 : 1 + 256 * 0x23,/*0x0629*/
159 : 1 + 2 + 256 * 0x25,/*0x062A*/
160 : 1 + 2 + 256 * 0x29,/*0x062B*/
161 : 1 + 2 + 256 * 0x2D,/*0x062C*/
162 : 1 + 2 + 256 * 0x31,/*0x062D*/
163 : 1 + 2 + 256 * 0x35,/*0x062E*/
164 : 1 + 256 * 0x39,/*0x062F*/
165 : 1 + 256 * 0x3B,/*0x0630*/
166 : 1 + 256 * 0x3D,/*0x0631*/
167 : 1 + 256 * 0x3F,/*0x0632*/
168 : 1 + 2 + 256 * 0x41,/*0x0633*/
169 : 1 + 2 + 256 * 0x45,/*0x0634*/
170 : 1 + 2 + 256 * 0x49,/*0x0635*/
171 : 1 + 2 + 256 * 0x4D,/*0x0636*/
172 : 1 + 2 + 256 * 0x51,/*0x0637*/
173 : 1 + 2 + 256 * 0x55,/*0x0638*/
174 : 1 + 2 + 256 * 0x59,/*0x0639*/
175 : 1 + 2 + 256 * 0x5D,/*0x063A*/
176 : 0, 0, 0, 0, 0, /*0x063B-0x063F*/
177 : 1 + 2, /*0x0640*/
178 : 1 + 2 + 256 * 0x61,/*0x0641*/
179 : 1 + 2 + 256 * 0x65,/*0x0642*/
180 : 1 + 2 + 256 * 0x69,/*0x0643*/
181 : 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/
182 : 1 + 2 + 256 * 0x71,/*0x0645*/
183 : 1 + 2 + 256 * 0x75,/*0x0646*/
184 : 1 + 2 + 256 * 0x79,/*0x0647*/
185 : 1 + 256 * 0x7D,/*0x0648*/
186 : 1 + 256 * 0x7F,/*0x0649*/
187 : 1 + 2 + 256 * 0x81,/*0x064A*/
188 : 4 + 256 * 1, /*0x064B*/
189 : 4 + 128 + 256 * 1, /*0x064C*/
190 : 4 + 128 + 256 * 1, /*0x064D*/
191 : 4 + 128 + 256 * 1, /*0x064E*/
192 : 4 + 128 + 256 * 1, /*0x064F*/
193 : 4 + 128 + 256 * 1, /*0x0650*/
194 : 4 + 64 + 256 * 3, /*0x0651*/
195 : 4 + 256 * 1, /*0x0652*/
196 : 4 + 256 * 7, /*0x0653*/
197 : 4 + 256 * 8, /*0x0654*/
198 : 4 + 256 * 8, /*0x0655*/
199 : 4 + 256 * 1, /*0x0656*/
200 : 0, 0, 0, 0, 0, /*0x0657-0x065B*/
201 : 1 + 256 * 0x85,/*0x065C*/
202 : 1 + 256 * 0x87,/*0x065D*/
203 : 1 + 256 * 0x89,/*0x065E*/
204 : 1 + 256 * 0x8B,/*0x065F*/
205 : 0, 0, 0, 0, 0, /*0x0660-0x0664*/
206 : 0, 0, 0, 0, 0, /*0x0665-0x0669*/
207 : 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/
208 : 4 + 256 * 6, /*0x0670*/
209 : 1 + 8 + 256 * 0x00,/*0x0671*/
210 : 1 + 32, /*0x0672*/
211 : 1 + 32, /*0x0673*/
212 : 0, /*0x0674*/
213 : 1 + 32, /*0x0675*/
214 : 1, 1, /*0x0676-0x0677*/
215 : 1 + 2, /*0x0678*/
216 : 1 + 2 + 8 + 256 * 0x16,/*0x0679*/
217 : 1 + 2 + 8 + 256 * 0x0E,/*0x067A*/
218 : 1 + 2 + 8 + 256 * 0x02,/*0x067B*/
219 : 1+2, 1+2, /*0x67C-0x067D*/
220 : 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/
221 : 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/
222 : 1 + 8 + 256 * 0x38,/*0x0688*/
223 : 1, 1, 1, /*0x0689-0x068B*/
224 : 1 + 8 + 256 * 0x34,/*0x068C*/
225 : 1 + 8 + 256 * 0x32,/*0x068D*/
226 : 1 + 8 + 256 * 0x36,/*0x068E*/
227 : 1, 1, /*0x068F-0x0690*/
228 : 1 + 8 + 256 * 0x3C,/*0x0691*/
229 : 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/
230 : 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/
231 : 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/
232 : 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/
233 : 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/
234 : 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/
235 : 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/
236 : 1+2, 1+2, /*0x06B8-0x06B9*/
237 : 1 + 8 + 256 * 0x4E,/*0x06BA*/
238 : 1 + 2 + 8 + 256 * 0x50,/*0x06BB*/
239 : 1+2, 1+2, /*0x06BC-0x06BD*/
240 : 1 + 2 + 8 + 256 * 0x5A,/*0x06BE*/
241 : 1+2, /*0x06BF*/
242 : 1 + 8 + 256 * 0x54,/*0x06C0*/
243 : 1 + 2 + 8 + 256 * 0x56,/*0x06C1*/
244 : 1, 1, 1, /*0x06C2-0x06C4*/
245 : 1 + 8 + 256 * 0x90,/*0x06C5*/
246 : 1 + 8 + 256 * 0x89,/*0x06C6*/
247 : 1 + 8 + 256 * 0x87,/*0x06C7*/
248 : 1 + 8 + 256 * 0x8B,/*0x06C8*/
249 : 1 + 8 + 256 * 0x92,/*0x06C9*/
250 : 1, /*0x06CA*/
251 : 1 + 8 + 256 * 0x8E,/*0x06CB*/
252 : 1 + 2 + 8 + 256 * 0xAC,/*0x06CC*/
253 : 1, /*0x06CD*/
254 : 1+2, 1+2, /*0x06CE-0x06CF*/
255 : 1 + 2 + 8 + 256 * 0x94,/*0x06D0*/
256 : 1+2, /*0x06D1*/
257 : 1 + 8 + 256 * 0x5E,/*0x06D2*/
258 : 1 + 8 + 256 * 0x60 /*0x06D3*/
259 : };
260 :
261 : static const uint8_t presALink[] = {
262 : /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/
263 : /*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0,
264 : /*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265 : /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0,
266 : /*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
267 : /*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268 : /*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
269 : /*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
270 : /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
271 : /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
272 : /*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
273 : /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2,
274 : /*FC0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
275 : /*FC1*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276 : /*FC2*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
277 : /*FC3*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278 : /*FC4*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
279 : /*FC5*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4,
280 : /*FC6*/ 4, 4, 4
281 : };
282 :
283 : static const uint8_t presBLink[]=
284 : {
285 : /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/
286 : /*FE7*/1 + 2,1 + 2,1 + 2, 0,1 + 2, 0,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,
287 : /*FE8*/ 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2,1 + 2, 0, 1, 0,
288 : /*FE9*/ 1, 2,1 + 2, 0, 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
289 : /*FEA*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0,
290 : /*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
291 : /*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
292 : /*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
293 : /*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0,
294 : /*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0
295 : };
296 :
297 : static const UChar convertFBto06[] =
298 : {
299 : /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
300 : /*FB5*/ 0x671, 0x671, 0x67B, 0x67B, 0x67B, 0x67B, 0x67E, 0x67E, 0x67E, 0x67E, 0, 0, 0, 0, 0x67A, 0x67A,
301 : /*FB6*/ 0x67A, 0x67A, 0, 0, 0, 0, 0x679, 0x679, 0x679, 0x679, 0, 0, 0, 0, 0, 0,
302 : /*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x686, 0x686, 0x686, 0x686, 0, 0,
303 : /*FB8*/ 0, 0, 0x68D, 0x68D, 0x68C, 0x68C, 0x68E, 0x68E, 0x688, 0x688, 0x698, 0x698, 0x691, 0x691, 0x6A9, 0x6A9,
304 : /*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0, 0, 0, 0, 0, 0x6BA, 0x6BA,
305 : /*FBA*/ 0x6BB, 0x6BB, 0x6BB, 0x6BB, 0x6C0, 0x6C0, 0x6C1, 0x6C1, 0x6C1, 0x6C1, 0x6BE, 0x6BE, 0x6BE, 0x6BE, 0x6d2, 0x6D2,
306 : /*FBB*/ 0x6D3, 0x6D3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307 : /*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
308 : /*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0x6C7, 0x6C7, 0x6C6, 0x6C6, 0x6C8, 0x6C8, 0, 0x6CB, 0x6CB,
309 : /*FBE*/ 0x6C5, 0x6C5, 0x6C9, 0x6C9, 0x6D0, 0x6D0, 0x6D0, 0x6D0, 0, 0, 0, 0, 0, 0, 0, 0,
310 : /*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC
311 : };
312 :
313 : static const UChar convertFEto06[] =
314 : {
315 : /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
316 : /*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
317 : /*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628,
318 : /*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C,
319 : /*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632,
320 : /*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636,
321 : /*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A,
322 : /*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644,
323 : /*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649,
324 : /*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F
325 : };
326 :
327 : static const uint8_t shapeTable[4][4][4]=
328 : {
329 : { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} },
330 : { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} },
331 : { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} },
332 : { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }
333 : };
334 :
335 : /*
336 : * This function shapes European digits to Arabic-Indic digits
337 : * in-place, writing over the input characters.
338 : * Since we know that we are only looking for BMP code points,
339 : * we can safely just work with code units (again, at least UTF-16).
340 : */
341 : static void
342 0 : _shapeToArabicDigitsWithContext(UChar *s, int32_t length,
343 : UChar digitBase,
344 : UBool isLogical, UBool lastStrongWasAL) {
345 : const UBiDiProps *bdp;
346 : int32_t i;
347 : UChar c;
348 :
349 0 : bdp=ubidi_getSingleton();
350 0 : digitBase-=0x30;
351 :
352 : /* the iteration direction depends on the type of input */
353 0 : if(isLogical) {
354 0 : for(i=0; i<length; ++i) {
355 0 : c=s[i];
356 0 : switch(ubidi_getClass(bdp, c)) {
357 : case U_LEFT_TO_RIGHT: /* L */
358 : case U_RIGHT_TO_LEFT: /* R */
359 0 : lastStrongWasAL=FALSE;
360 0 : break;
361 : case U_RIGHT_TO_LEFT_ARABIC: /* AL */
362 0 : lastStrongWasAL=TRUE;
363 0 : break;
364 : case U_EUROPEAN_NUMBER: /* EN */
365 0 : if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
366 0 : s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
367 : }
368 0 : break;
369 : default :
370 0 : break;
371 : }
372 : }
373 : } else {
374 0 : for(i=length; i>0; /* pre-decrement in the body */) {
375 0 : c=s[--i];
376 0 : switch(ubidi_getClass(bdp, c)) {
377 : case U_LEFT_TO_RIGHT: /* L */
378 : case U_RIGHT_TO_LEFT: /* R */
379 0 : lastStrongWasAL=FALSE;
380 0 : break;
381 : case U_RIGHT_TO_LEFT_ARABIC: /* AL */
382 0 : lastStrongWasAL=TRUE;
383 0 : break;
384 : case U_EUROPEAN_NUMBER: /* EN */
385 0 : if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
386 0 : s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
387 : }
388 0 : break;
389 : default :
390 0 : break;
391 : }
392 : }
393 : }
394 0 : }
395 :
396 : /*
397 : *Name : invertBuffer
398 : *Function : This function inverts the buffer, it's used
399 : * in case the user specifies the buffer to be
400 : * U_SHAPE_TEXT_DIRECTION_LOGICAL
401 : */
402 : static void
403 0 : invertBuffer(UChar *buffer, int32_t size, uint32_t /*options*/, int32_t lowlimit, int32_t highlimit) {
404 : UChar temp;
405 0 : int32_t i=0,j=0;
406 0 : for(i=lowlimit,j=size-highlimit-1;i<j;i++,j--) {
407 0 : temp = buffer[i];
408 0 : buffer[i] = buffer[j];
409 0 : buffer[j] = temp;
410 : }
411 0 : }
412 :
413 : /*
414 : *Name : changeLamAlef
415 : *Function : Converts the Alef characters into an equivalent
416 : * LamAlef location in the 0x06xx Range, this is an
417 : * intermediate stage in the operation of the program
418 : * later it'll be converted into the 0xFExx LamAlefs
419 : * in the shaping function.
420 : */
421 : static inline UChar
422 0 : changeLamAlef(UChar ch) {
423 0 : switch(ch) {
424 : case 0x0622 :
425 0 : return 0x065C;
426 : case 0x0623 :
427 0 : return 0x065D;
428 : case 0x0625 :
429 0 : return 0x065E;
430 : case 0x0627 :
431 0 : return 0x065F;
432 : }
433 0 : return 0;
434 : }
435 :
436 : /*
437 : *Name : getLink
438 : *Function : Resolves the link between the characters as
439 : * Arabic characters have four forms :
440 : * Isolated, Initial, Middle and Final Form
441 : */
442 : static UChar
443 0 : getLink(UChar ch) {
444 0 : if(ch >= 0x0622 && ch <= 0x06D3) {
445 0 : return(araLink[ch-0x0622]);
446 0 : } else if(ch == 0x200D) {
447 0 : return(3);
448 0 : } else if(ch >= 0x206D && ch <= 0x206F) {
449 0 : return(4);
450 0 : }else if(ch >= 0xFB50 && ch <= 0xFC62) {
451 0 : return(presALink[ch-0xFB50]);
452 0 : } else if(ch >= 0xFE70 && ch <= 0xFEFC) {
453 0 : return(presBLink[ch-0xFE70]);
454 : }else {
455 0 : return(0);
456 : }
457 : }
458 :
459 : /*
460 : *Name : countSpaces
461 : *Function : Counts the number of spaces
462 : * at each end of the logical buffer
463 : */
464 : static void
465 0 : countSpaces(UChar *dest, int32_t size, uint32_t /*options*/, int32_t *spacesCountl, int32_t *spacesCountr) {
466 0 : int32_t i = 0;
467 0 : int32_t countl = 0,countr = 0;
468 0 : while((dest[i] == SPACE_CHAR) && (countl < size)) {
469 0 : countl++;
470 0 : i++;
471 : }
472 0 : if (countl < size) { /* the entire buffer is not all space */
473 0 : while(dest[size-1] == SPACE_CHAR) {
474 0 : countr++;
475 0 : size--;
476 : }
477 : }
478 0 : *spacesCountl = countl;
479 0 : *spacesCountr = countr;
480 0 : }
481 :
482 : /*
483 : *Name : isTashkeelChar
484 : *Function : Returns 1 for Tashkeel characters in 06 range else return 0
485 : */
486 : static inline int32_t
487 0 : isTashkeelChar(UChar ch) {
488 0 : return (int32_t)( ch>=0x064B && ch<= 0x0652 );
489 : }
490 :
491 : /*
492 : *Name : isTashkeelCharFE
493 : *Function : Returns 1 for Tashkeel characters in FE range else return 0
494 : */
495 : static inline int32_t
496 0 : isTashkeelCharFE(UChar ch) {
497 0 : return (int32_t)( ch>=0xFE70 && ch<= 0xFE7F );
498 : }
499 :
500 : /*
501 : *Name : isAlefChar
502 : *Function : Returns 1 for Alef characters else return 0
503 : */
504 : static inline int32_t
505 0 : isAlefChar(UChar ch) {
506 0 : return (int32_t)( (ch==0x0622)||(ch==0x0623)||(ch==0x0625)||(ch==0x0627) );
507 : }
508 :
509 : /*
510 : *Name : isLamAlefChar
511 : *Function : Returns 1 for LamAlef characters else return 0
512 : */
513 : static inline int32_t
514 0 : isLamAlefChar(UChar ch) {
515 0 : return (int32_t)((ch>=0xFEF5)&&(ch<=0xFEFC) );
516 : }
517 :
518 : /*BIDI
519 : *Name : isTailChar
520 : *Function : returns 1 if the character matches one of the tail characters (0xfe73 or 0x200b) otherwise returns 0
521 : */
522 :
523 : static inline int32_t
524 0 : isTailChar(UChar ch) {
525 0 : if(ch == OLD_TAIL_CHAR || ch == NEW_TAIL_CHAR){
526 0 : return 1;
527 : }else{
528 0 : return 0;
529 : }
530 : }
531 :
532 : /*BIDI
533 : *Name : isSeenTailFamilyChar
534 : *Function : returns 1 if the character is a seen family isolated character
535 : * in the FE range otherwise returns 0
536 : */
537 :
538 : static inline int32_t
539 0 : isSeenTailFamilyChar(UChar ch) {
540 0 : if(ch >= 0xfeb1 && ch < 0xfebf){
541 0 : return tailFamilyIsolatedFinal [ch - 0xFEB1];
542 : }else{
543 0 : return 0;
544 : }
545 : }
546 :
547 : /* Name : isSeenFamilyChar
548 : * Function : returns 1 if the character is a seen family character in the Unicode
549 : * 06 range otherwise returns 0
550 : */
551 :
552 : static inline int32_t
553 0 : isSeenFamilyChar(UChar ch){
554 0 : if(ch >= 0x633 && ch <= 0x636){
555 0 : return 1;
556 : }else {
557 0 : return 0;
558 : }
559 : }
560 :
561 : /*Start of BIDI*/
562 : /*
563 : *Name : isAlefMaksouraChar
564 : *Function : returns 1 if the character is a Alef Maksoura Final or isolated
565 : * otherwise returns 0
566 : */
567 : static inline int32_t
568 0 : isAlefMaksouraChar(UChar ch) {
569 0 : return (int32_t)( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649));
570 : }
571 :
572 : /*
573 : * Name : isYehHamzaChar
574 : * Function : returns 1 if the character is a yehHamza isolated or yehhamza
575 : * final is found otherwise returns 0
576 : */
577 : static inline int32_t
578 0 : isYehHamzaChar(UChar ch) {
579 0 : if((ch==0xFE89)||(ch==0xFE8A)){
580 0 : return 1;
581 : }else{
582 0 : return 0;
583 : }
584 : }
585 :
586 : /*
587 : * Name: isTashkeelOnTatweelChar
588 : * Function: Checks if the Tashkeel Character is on Tatweel or not,if the
589 : * Tashkeel on tatweel (FE range), it returns 1 else if the
590 : * Tashkeel with shadda on tatweel (FC range)return 2 otherwise
591 : * returns 0
592 : */
593 : static inline int32_t
594 0 : isTashkeelOnTatweelChar(UChar ch){
595 0 : if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR)
596 : {
597 0 : return tashkeelMedial [ch - 0xFE70];
598 0 : }else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR)) {
599 0 : return 2;
600 : }else{
601 0 : return 0;
602 : }
603 : }
604 :
605 : /*
606 : * Name: isIsolatedTashkeelChar
607 : * Function: Checks if the Tashkeel Character is in the isolated form
608 : * (i.e. Unicode FE range) returns 1 else if the Tashkeel
609 : * with shadda is in the isolated form (i.e. Unicode FC range)
610 : * returns 2 otherwise returns 0
611 : */
612 : static inline int32_t
613 0 : isIsolatedTashkeelChar(UChar ch){
614 0 : if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75){
615 0 : return (1 - tashkeelMedial [ch - 0xFE70]);
616 0 : }else if(ch >= 0xfc5e && ch <= 0xfc63){
617 0 : return 1;
618 : }else{
619 0 : return 0;
620 : }
621 : }
622 :
623 :
624 :
625 :
626 : /*
627 : *Name : calculateSize
628 : *Function : This function calculates the destSize to be used in preflighting
629 : * when the destSize is equal to 0
630 : * It is used also to calculate the new destsize in case the
631 : * destination buffer will be resized.
632 : */
633 :
634 : static int32_t
635 0 : calculateSize(const UChar *source, int32_t sourceLength,
636 : int32_t destSize,uint32_t options) {
637 0 : int32_t i = 0;
638 :
639 0 : int lamAlefOption = 0;
640 0 : int tashkeelOption = 0;
641 :
642 0 : destSize = sourceLength;
643 :
644 0 : if (((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE ||
645 0 : ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED )) &&
646 0 : ((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE )){
647 0 : lamAlefOption = 1;
648 : }
649 0 : if((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE &&
650 0 : ((options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ) ){
651 0 : tashkeelOption = 1;
652 : }
653 :
654 0 : if(lamAlefOption || tashkeelOption){
655 0 : if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) {
656 0 : for(i=0;i<sourceLength;i++) {
657 0 : if( ((isAlefChar(source[i]))&& (i<(sourceLength-1)) &&(source[i+1] == LAM_CHAR)) || (isTashkeelCharFE(source[i])) ) {
658 0 : destSize--;
659 : }
660 : }
661 0 : }else if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL) {
662 0 : for(i=0;i<sourceLength;i++) {
663 0 : if( ( (source[i] == LAM_CHAR) && (i<(sourceLength-1)) && (isAlefChar(source[i+1]))) || (isTashkeelCharFE(source[i])) ) {
664 0 : destSize--;
665 : }
666 : }
667 : }
668 : }
669 :
670 0 : if ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE){
671 0 : if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){
672 0 : for(i=0;i<sourceLength;i++) {
673 0 : if(isLamAlefChar(source[i]))
674 0 : destSize++;
675 : }
676 : }
677 : }
678 :
679 0 : return destSize;
680 : }
681 :
682 : /*
683 : *Name : handleTashkeelWithTatweel
684 : *Function : Replaces Tashkeel as following:
685 : * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel.
686 : * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace
687 : * it with Shadda on Tatweel.
688 : * Case 3: if the Tashkeel is isolated replace it with Space.
689 : *
690 : */
691 : static int32_t
692 0 : handleTashkeelWithTatweel(UChar *dest, int32_t sourceLength,
693 : int32_t /*destSize*/, uint32_t /*options*/,
694 : UErrorCode * /*pErrorCode*/) {
695 : int i;
696 0 : for(i = 0; i < sourceLength; i++){
697 0 : if((isTashkeelOnTatweelChar(dest[i]) == 1)){
698 0 : dest[i] = TATWEEL_CHAR;
699 0 : }else if((isTashkeelOnTatweelChar(dest[i]) == 2)){
700 0 : dest[i] = SHADDA_TATWEEL_CHAR;
701 0 : }else if(isIsolatedTashkeelChar(dest[i]) && dest[i] != SHADDA_CHAR){
702 0 : dest[i] = SPACE_CHAR;
703 : }
704 : }
705 0 : return sourceLength;
706 : }
707 :
708 :
709 :
710 : /*
711 : *Name : handleGeneratedSpaces
712 : *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space,
713 : * and Tashkeel to space.
714 : * handleGeneratedSpaces function puts these generated spaces
715 : * according to the options the user specifies. LamAlef and Tashkeel
716 : * spaces can be replaced at begin, at end, at near or decrease the
717 : * buffer size.
718 : *
719 : * There is also Auto option for LamAlef and tashkeel, which will put
720 : * the spaces at end of the buffer (or end of text if the user used
721 : * the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END).
722 : *
723 : * If the text type was visual_LTR and the option
724 : * U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END
725 : * option will place the space at the beginning of the buffer and
726 : * BEGIN will place the space at the end of the buffer.
727 : */
728 :
729 : static int32_t
730 0 : handleGeneratedSpaces(UChar *dest, int32_t sourceLength,
731 : int32_t destSize,
732 : uint32_t options,
733 : UErrorCode *pErrorCode,struct uShapeVariables shapeVars ) {
734 :
735 0 : int32_t i = 0, j = 0;
736 0 : int32_t count = 0;
737 0 : UChar *tempbuffer=NULL;
738 :
739 0 : int lamAlefOption = 0;
740 0 : int tashkeelOption = 0;
741 0 : int shapingMode = SHAPE_MODE;
742 :
743 0 : if (shapingMode == 0){
744 0 : if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE ){
745 0 : lamAlefOption = 1;
746 : }
747 0 : if ( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ){
748 0 : tashkeelOption = 1;
749 : }
750 : }
751 :
752 0 : tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
753 : /* Test for NULL */
754 0 : if(tempbuffer == NULL) {
755 0 : *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
756 0 : return 0;
757 : }
758 :
759 :
760 0 : if (lamAlefOption || tashkeelOption){
761 0 : uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
762 :
763 0 : i = j = 0; count = 0;
764 0 : while(i < sourceLength) {
765 0 : if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) ||
766 0 : (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){
767 0 : j--;
768 0 : count++;
769 : } else {
770 0 : tempbuffer[j] = dest[i];
771 : }
772 0 : i++;
773 0 : j++;
774 : }
775 :
776 0 : while(count >= 0) {
777 0 : tempbuffer[i] = 0x0000;
778 0 : i--;
779 0 : count--;
780 : }
781 :
782 0 : u_memcpy(dest, tempbuffer, sourceLength);
783 0 : destSize = u_strlen(dest);
784 : }
785 :
786 0 : lamAlefOption = 0;
787 :
788 0 : if (shapingMode == 0){
789 0 : if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR ){
790 0 : lamAlefOption = 1;
791 : }
792 : }
793 :
794 0 : if (lamAlefOption){
795 : /* Lam+Alef is already shaped into LamAlef + FFFF */
796 0 : i = 0;
797 0 : while(i < sourceLength) {
798 0 : if(lamAlefOption&&dest[i] == LAMALEF_SPACE_SUB){
799 0 : dest[i] = SPACE_CHAR;
800 : }
801 0 : i++;
802 : }
803 0 : destSize = sourceLength;
804 : }
805 0 : lamAlefOption = 0;
806 0 : tashkeelOption = 0;
807 :
808 0 : if (shapingMode == 0) {
809 0 : if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin) ||
810 0 : (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO )
811 0 : && (shapeVars.spacesRelativeToTextBeginEnd==1)) ) {
812 0 : lamAlefOption = 1;
813 : }
814 0 : if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelBegin ) {
815 0 : tashkeelOption = 1;
816 : }
817 : }
818 :
819 0 : if(lamAlefOption || tashkeelOption){
820 0 : uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
821 :
822 0 : i = j = sourceLength; count = 0;
823 :
824 0 : while(i >= 0) {
825 0 : if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) ||
826 0 : (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){
827 0 : j++;
828 0 : count++;
829 : }else {
830 0 : tempbuffer[j] = dest[i];
831 : }
832 0 : i--;
833 0 : j--;
834 : }
835 :
836 0 : for(i=0 ;i < count; i++){
837 0 : tempbuffer[i] = SPACE_CHAR;
838 : }
839 :
840 0 : u_memcpy(dest, tempbuffer, sourceLength);
841 0 : destSize = sourceLength;
842 : }
843 :
844 :
845 :
846 0 : lamAlefOption = 0;
847 0 : tashkeelOption = 0;
848 :
849 0 : if (shapingMode == 0) {
850 0 : if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd) ||
851 0 : (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO )
852 0 : && (shapeVars.spacesRelativeToTextBeginEnd==0)) ) {
853 0 : lamAlefOption = 1;
854 : }
855 0 : if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelEnd ){
856 0 : tashkeelOption = 1;
857 : }
858 : }
859 :
860 0 : if(lamAlefOption || tashkeelOption){
861 0 : uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
862 :
863 0 : i = j = 0; count = 0;
864 0 : while(i < sourceLength) {
865 0 : if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) ||
866 0 : (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){
867 0 : j--;
868 0 : count++;
869 : }else {
870 0 : tempbuffer[j] = dest[i];
871 : }
872 0 : i++;
873 0 : j++;
874 : }
875 :
876 0 : while(count >= 0) {
877 0 : tempbuffer[i] = SPACE_CHAR;
878 0 : i--;
879 0 : count--;
880 : }
881 :
882 0 : u_memcpy(dest, tempbuffer, sourceLength);
883 0 : destSize = sourceLength;
884 : }
885 :
886 :
887 0 : if(tempbuffer){
888 0 : uprv_free(tempbuffer);
889 : }
890 :
891 0 : return destSize;
892 : }
893 :
894 : /*
895 : *Name :expandCompositCharAtBegin
896 : *Function :Expands the LamAlef character to Lam and Alef consuming the required
897 : * space from beginning of the buffer. If the text type was visual_LTR
898 : * and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected
899 : * the spaces will be located at end of buffer.
900 : * If there are no spaces to expand the LamAlef, an error
901 : * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
902 : */
903 :
904 : static int32_t
905 0 : expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) {
906 0 : int32_t i = 0,j = 0;
907 0 : int32_t countl = 0;
908 0 : UChar *tempbuffer=NULL;
909 :
910 0 : tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
911 :
912 : /* Test for NULL */
913 0 : if(tempbuffer == NULL) {
914 0 : *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
915 0 : return 0;
916 : }
917 :
918 0 : uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
919 :
920 0 : i = 0;
921 0 : while(dest[i] == SPACE_CHAR) {
922 0 : countl++;
923 0 : i++;
924 : }
925 :
926 0 : i = j = sourceLength-1;
927 :
928 0 : while(i >= 0 && j >= 0) {
929 0 : if( countl>0 && isLamAlefChar(dest[i])) {
930 0 : tempbuffer[j] = LAM_CHAR;
931 : /* to ensure the array index is within the range */
932 0 : U_ASSERT(dest[i] >= 0xFEF5u
933 : && dest[i]-0xFEF5u < UPRV_LENGTHOF(convertLamAlef));
934 0 : tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ];
935 0 : j--;
936 0 : countl--;
937 : }else {
938 0 : if( countl == 0 && isLamAlefChar(dest[i]) ) {
939 0 : *pErrorCode=U_NO_SPACE_AVAILABLE;
940 : }
941 0 : tempbuffer[j] = dest[i];
942 : }
943 0 : i--;
944 0 : j--;
945 : }
946 0 : u_memcpy(dest, tempbuffer, sourceLength);
947 :
948 0 : uprv_free(tempbuffer);
949 :
950 0 : destSize = sourceLength;
951 0 : return destSize;
952 : }
953 :
954 : /*
955 : *Name : expandCompositCharAtEnd
956 : *Function : Expands the LamAlef character to Lam and Alef consuming the
957 : * required space from end of the buffer. If the text type was
958 : * Visual LTR and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END
959 : * was used, the spaces will be consumed from begin of buffer. If
960 : * there are no spaces to expand the LamAlef, an error
961 : * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
962 : */
963 :
964 : static int32_t
965 0 : expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) {
966 0 : int32_t i = 0,j = 0;
967 :
968 0 : int32_t countr = 0;
969 0 : int32_t inpsize = sourceLength;
970 :
971 0 : UChar *tempbuffer=NULL;
972 0 : tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
973 :
974 : /* Test for NULL */
975 0 : if(tempbuffer == NULL) {
976 0 : *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
977 0 : return 0;
978 : }
979 :
980 0 : uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
981 :
982 0 : while(dest[inpsize-1] == SPACE_CHAR) {
983 0 : countr++;
984 0 : inpsize--;
985 : }
986 :
987 0 : i = sourceLength - countr - 1;
988 0 : j = sourceLength - 1;
989 :
990 0 : while(i >= 0 && j >= 0) {
991 0 : if( countr>0 && isLamAlefChar(dest[i]) ) {
992 0 : tempbuffer[j] = LAM_CHAR;
993 0 : tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ];
994 0 : j--;
995 0 : countr--;
996 : }else {
997 0 : if ((countr == 0) && isLamAlefChar(dest[i]) ) {
998 0 : *pErrorCode=U_NO_SPACE_AVAILABLE;
999 : }
1000 0 : tempbuffer[j] = dest[i];
1001 : }
1002 0 : i--;
1003 0 : j--;
1004 : }
1005 :
1006 0 : if(countr > 0) {
1007 0 : u_memmove(tempbuffer, tempbuffer+countr, sourceLength);
1008 0 : if(u_strlen(tempbuffer) < sourceLength) {
1009 0 : for(i=sourceLength-1;i>=sourceLength-countr;i--) {
1010 0 : tempbuffer[i] = SPACE_CHAR;
1011 : }
1012 : }
1013 : }
1014 0 : u_memcpy(dest, tempbuffer, sourceLength);
1015 :
1016 0 : uprv_free(tempbuffer);
1017 :
1018 0 : destSize = sourceLength;
1019 0 : return destSize;
1020 : }
1021 :
1022 : /*
1023 : *Name : expandCompositCharAtNear
1024 : *Function : Expands the LamAlef character into Lam + Alef, YehHamza character
1025 : * into Yeh + Hamza, SeenFamily character into SeenFamily character
1026 : * + Tail, while consuming the space next to the character.
1027 : * If there are no spaces next to the character, an error
1028 : * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
1029 : */
1030 :
1031 : static int32_t
1032 0 : expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode,
1033 : int yehHamzaOption, int seenTailOption, int lamAlefOption, struct uShapeVariables shapeVars) {
1034 0 : int32_t i = 0;
1035 :
1036 :
1037 : UChar lamalefChar, yehhamzaChar;
1038 :
1039 0 : for(i = 0 ;i<=sourceLength-1;i++) {
1040 0 : if (seenTailOption && isSeenTailFamilyChar(dest[i])) {
1041 0 : if ((i>0) && (dest[i-1] == SPACE_CHAR) ) {
1042 0 : dest[i-1] = shapeVars.tailChar;
1043 : }else {
1044 0 : *pErrorCode=U_NO_SPACE_AVAILABLE;
1045 : }
1046 0 : }else if(yehHamzaOption && (isYehHamzaChar(dest[i])) ) {
1047 0 : if ((i>0) && (dest[i-1] == SPACE_CHAR) ) {
1048 0 : yehhamzaChar = dest[i];
1049 0 : dest[i] = yehHamzaToYeh[yehhamzaChar - YEH_HAMZAFE_CHAR];
1050 0 : dest[i-1] = HAMZAFE_CHAR;
1051 : }else {
1052 :
1053 0 : *pErrorCode=U_NO_SPACE_AVAILABLE;
1054 : }
1055 0 : }else if(lamAlefOption && isLamAlefChar(dest[i+1])) {
1056 0 : if(dest[i] == SPACE_CHAR){
1057 0 : lamalefChar = dest[i+1];
1058 0 : dest[i+1] = LAM_CHAR;
1059 0 : dest[i] = convertLamAlef[ lamalefChar - 0xFEF5 ];
1060 : }else {
1061 0 : *pErrorCode=U_NO_SPACE_AVAILABLE;
1062 : }
1063 : }
1064 : }
1065 0 : destSize = sourceLength;
1066 0 : return destSize;
1067 : }
1068 : /*
1069 : * Name : expandCompositChar
1070 : * Function : LamAlef, need special handling, since it expands from one
1071 : * character into two characters while shaping or deshaping.
1072 : * In order to expand it, near or far spaces according to the
1073 : * options user specifies. Also buffer size can be increased.
1074 : *
1075 : * For SeenFamily characters and YehHamza only the near option is
1076 : * supported, while for LamAlef we can take spaces from begin, end,
1077 : * near or even increase the buffer size.
1078 : * There is also the Auto option for LamAlef only, which will first
1079 : * search for a space at end, begin then near, respectively.
1080 : * If there are no spaces to expand these characters, an error will be set to
1081 : * U_NO_SPACE_AVAILABLE as defined in utypes.h
1082 : */
1083 :
1084 : static int32_t
1085 0 : expandCompositChar(UChar *dest, int32_t sourceLength,
1086 : int32_t destSize,uint32_t options,
1087 : UErrorCode *pErrorCode, int shapingMode,struct uShapeVariables shapeVars) {
1088 :
1089 0 : int32_t i = 0,j = 0;
1090 :
1091 0 : UChar *tempbuffer=NULL;
1092 0 : int yehHamzaOption = 0;
1093 0 : int seenTailOption = 0;
1094 0 : int lamAlefOption = 0;
1095 :
1096 0 : if (shapingMode == 1){
1097 0 : if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO){
1098 :
1099 0 : if(shapeVars.spacesRelativeToTextBeginEnd == 0) {
1100 0 : destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
1101 :
1102 0 : if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
1103 0 : *pErrorCode = U_ZERO_ERROR;
1104 0 : destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
1105 : }
1106 : }else {
1107 0 : destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
1108 :
1109 0 : if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
1110 0 : *pErrorCode = U_ZERO_ERROR;
1111 0 : destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
1112 : }
1113 : }
1114 :
1115 0 : if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
1116 0 : *pErrorCode = U_ZERO_ERROR;
1117 : destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption,
1118 0 : seenTailOption, 1,shapeVars);
1119 : }
1120 : }
1121 : }
1122 :
1123 0 : if (shapingMode == 1){
1124 0 : if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd){
1125 0 : destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
1126 : }
1127 : }
1128 :
1129 0 : if (shapingMode == 1){
1130 0 : if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin){
1131 0 : destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
1132 : }
1133 : }
1134 :
1135 0 : if (shapingMode == 0){
1136 0 : if ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR){
1137 0 : yehHamzaOption = 1;
1138 : }
1139 0 : if ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR){
1140 0 : seenTailOption = 1;
1141 : }
1142 : }
1143 0 : if (shapingMode == 1) {
1144 0 : if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR) {
1145 0 : lamAlefOption = 1;
1146 : }
1147 : }
1148 :
1149 :
1150 0 : if (yehHamzaOption || seenTailOption || lamAlefOption){
1151 : destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption,
1152 0 : seenTailOption,lamAlefOption,shapeVars);
1153 : }
1154 :
1155 :
1156 0 : if (shapingMode == 1){
1157 0 : if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){
1158 0 : destSize = calculateSize(dest,sourceLength,destSize,options);
1159 0 : tempbuffer = (UChar *)uprv_malloc((destSize+1)*U_SIZEOF_UCHAR);
1160 :
1161 : /* Test for NULL */
1162 0 : if(tempbuffer == NULL) {
1163 0 : *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1164 0 : return 0;
1165 : }
1166 :
1167 0 : uprv_memset(tempbuffer, 0, (destSize+1)*U_SIZEOF_UCHAR);
1168 :
1169 0 : i = j = 0;
1170 0 : while(i < destSize && j < destSize) {
1171 0 : if(isLamAlefChar(dest[i]) ) {
1172 0 : tempbuffer[j] = convertLamAlef[ dest[i] - 0xFEF5 ];
1173 0 : tempbuffer[j+1] = LAM_CHAR;
1174 0 : j++;
1175 : }else {
1176 0 : tempbuffer[j] = dest[i];
1177 : }
1178 0 : i++;
1179 0 : j++;
1180 : }
1181 :
1182 0 : u_memcpy(dest, tempbuffer, destSize);
1183 : }
1184 : }
1185 :
1186 0 : if(tempbuffer) {
1187 0 : uprv_free(tempbuffer);
1188 : }
1189 0 : return destSize;
1190 : }
1191 :
1192 : /*
1193 : *Name : shapeUnicode
1194 : *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped
1195 : * arabic Unicode buffer in FExx Range
1196 : */
1197 : static int32_t
1198 0 : shapeUnicode(UChar *dest, int32_t sourceLength,
1199 : int32_t destSize,uint32_t options,
1200 : UErrorCode *pErrorCode,
1201 : int tashkeelFlag, struct uShapeVariables shapeVars) {
1202 :
1203 : int32_t i, iend;
1204 : int32_t step;
1205 : int32_t lastPos,Nx, Nw;
1206 : unsigned int Shape;
1207 0 : int32_t lamalef_found = 0;
1208 0 : int32_t seenfamFound = 0, yehhamzaFound =0, tashkeelFound = 0;
1209 0 : UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0;
1210 : UChar wLamalef;
1211 :
1212 : /*
1213 : * Converts the input buffer from FExx Range into 06xx Range
1214 : * to make sure that all characters are in the 06xx range
1215 : * even the lamalef is converted to the special region in
1216 : * the 06xx range
1217 : */
1218 0 : if ((options & U_SHAPE_PRESERVE_PRESENTATION_MASK) == U_SHAPE_PRESERVE_PRESENTATION_NOOP) {
1219 0 : for (i = 0; i < sourceLength; i++) {
1220 0 : UChar inputChar = dest[i];
1221 0 : if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) {
1222 0 : UChar c = convertFBto06 [ (inputChar - 0xFB50) ];
1223 0 : if (c != 0)
1224 0 : dest[i] = c;
1225 0 : } else if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) {
1226 0 : dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ;
1227 : } else {
1228 0 : dest[i] = inputChar ;
1229 : }
1230 : }
1231 : }
1232 :
1233 :
1234 : /* sets the index to the end of the buffer, together with the step point to -1 */
1235 0 : i = sourceLength - 1;
1236 0 : iend = -1;
1237 0 : step = -1;
1238 :
1239 : /*
1240 : * This function resolves the link between the characters .
1241 : * Arabic characters have four forms :
1242 : * Isolated Form, Initial Form, Middle Form and Final Form
1243 : */
1244 0 : currLink = getLink(dest[i]);
1245 :
1246 0 : lastPos = i;
1247 0 : Nx = -2, Nw = 0;
1248 :
1249 0 : while (i != iend) {
1250 : /* If high byte of currLink > 0 then more than one shape */
1251 0 : if ((currLink & 0xFF00) > 0 || (getLink(dest[i]) & IRRELEVANT) != 0) {
1252 0 : Nw = i + step;
1253 0 : while (Nx < 0) { /* we need to know about next char */
1254 0 : if(Nw == iend) {
1255 0 : nextLink = 0;
1256 0 : Nx = 3000;
1257 : } else {
1258 0 : nextLink = getLink(dest[Nw]);
1259 0 : if((nextLink & IRRELEVANT) == 0) {
1260 0 : Nx = Nw;
1261 : } else {
1262 0 : Nw = Nw + step;
1263 : }
1264 : }
1265 : }
1266 :
1267 0 : if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) {
1268 0 : lamalef_found = 1;
1269 0 : wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */
1270 0 : if ( wLamalef != 0) {
1271 0 : dest[i] = LAMALEF_SPACE_SUB; /* The default case is to drop the Alef and replace */
1272 0 : dest[lastPos] =wLamalef; /* it by LAMALEF_SPACE_SUB which is the last character in the */
1273 0 : i=lastPos; /* unicode private use area, this is done to make */
1274 : } /* sure that removeLamAlefSpaces() handles only the */
1275 0 : lastLink = prevLink; /* spaces generated during lamalef generation. */
1276 0 : currLink = getLink(wLamalef); /* LAMALEF_SPACE_SUB is added here and is replaced by spaces */
1277 : } /* in removeLamAlefSpaces() */
1278 :
1279 0 : if ((i > 0) && (dest[i-1] == SPACE_CHAR)){
1280 0 : if ( isSeenFamilyChar(dest[i])) {
1281 0 : seenfamFound = 1;
1282 0 : } else if (dest[i] == YEH_HAMZA_CHAR) {
1283 0 : yehhamzaFound = 1;
1284 : }
1285 : }
1286 0 : else if(i==0){
1287 0 : if ( isSeenFamilyChar(dest[i])){
1288 0 : seenfamFound = 1;
1289 0 : } else if (dest[i] == YEH_HAMZA_CHAR) {
1290 0 : yehhamzaFound = 1;
1291 : }
1292 : }
1293 :
1294 : /*
1295 : * get the proper shape according to link ability of neighbors
1296 : * and of character; depends on the order of the shapes
1297 : * (isolated, initial, middle, final) in the compatibility area
1298 : */
1299 0 : Shape = shapeTable[nextLink & (LINKR + LINKL)]
1300 0 : [lastLink & (LINKR + LINKL)]
1301 0 : [currLink & (LINKR + LINKL)];
1302 :
1303 0 : if ((currLink & (LINKR+LINKL)) == 1) {
1304 0 : Shape &= 1;
1305 0 : } else if(isTashkeelChar(dest[i])) {
1306 0 : if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) &&
1307 0 : dest[i] != 0x064C && dest[i] != 0x064D )
1308 : {
1309 0 : Shape = 1;
1310 0 : if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) {
1311 0 : Shape = 0;
1312 : }
1313 0 : } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){
1314 0 : Shape = 1;
1315 : } else {
1316 0 : Shape = 0;
1317 : }
1318 : }
1319 0 : if ((dest[i] ^ 0x0600) < 0x100) {
1320 0 : if ( isTashkeelChar(dest[i]) ){
1321 0 : if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR){
1322 0 : dest[i] = TASHKEEL_SPACE_SUB;
1323 0 : tashkeelFound = 1;
1324 : } else {
1325 : /* to ensure the array index is within the range */
1326 0 : U_ASSERT(dest[i] >= 0x064Bu
1327 : && dest[i]-0x064Bu < UPRV_LENGTHOF(IrrelevantPos));
1328 0 : dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape;
1329 : }
1330 0 : }else if ((currLink & APRESENT) > 0) {
1331 0 : dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape);
1332 0 : }else if ((currLink >> 8) > 0 && (currLink & IRRELEVANT) == 0) {
1333 0 : dest[i] = (UChar)(0xFE70 + (currLink >> 8) + Shape);
1334 : }
1335 : }
1336 : }
1337 :
1338 : /* move one notch forward */
1339 0 : if ((currLink & IRRELEVANT) == 0) {
1340 0 : prevLink = lastLink;
1341 0 : lastLink = currLink;
1342 0 : lastPos = i;
1343 : }
1344 :
1345 0 : i = i + step;
1346 0 : if (i == Nx) {
1347 0 : currLink = nextLink;
1348 0 : Nx = -2;
1349 0 : } else if(i != iend) {
1350 0 : currLink = getLink(dest[i]);
1351 : }
1352 : }
1353 0 : destSize = sourceLength;
1354 0 : if ( (lamalef_found != 0 ) || (tashkeelFound != 0) ){
1355 0 : destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode, shapeVars);
1356 : }
1357 :
1358 0 : if ( (seenfamFound != 0) || (yehhamzaFound != 0) ) {
1359 0 : destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE,shapeVars);
1360 : }
1361 0 : return destSize;
1362 : }
1363 :
1364 : /*
1365 : *Name : deShapeUnicode
1366 : *Function : Converts an Arabic Unicode buffer in FExx Range into unshaped
1367 : * arabic Unicode buffer in 06xx Range
1368 : */
1369 : static int32_t
1370 0 : deShapeUnicode(UChar *dest, int32_t sourceLength,
1371 : int32_t destSize,uint32_t options,
1372 : UErrorCode *pErrorCode, struct uShapeVariables shapeVars) {
1373 0 : int32_t i = 0;
1374 0 : int32_t lamalef_found = 0;
1375 0 : int32_t yehHamzaComposeEnabled = 0;
1376 0 : int32_t seenComposeEnabled = 0;
1377 :
1378 0 : yehHamzaComposeEnabled = ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR) ? 1 : 0;
1379 0 : seenComposeEnabled = ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR)? 1 : 0;
1380 :
1381 : /*
1382 : *This for loop changes the buffer from the Unicode FE range to
1383 : *the Unicode 06 range
1384 : */
1385 :
1386 0 : for(i = 0; i < sourceLength; i++) {
1387 0 : UChar inputChar = dest[i];
1388 0 : if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { /* FBxx Arabic range */
1389 0 : UChar c = convertFBto06 [ (inputChar - 0xFB50) ];
1390 0 : if (c != 0)
1391 0 : dest[i] = c;
1392 0 : } else if( (yehHamzaComposeEnabled == 1) && ((inputChar == HAMZA06_CHAR) || (inputChar == HAMZAFE_CHAR))
1393 0 : && (i < (sourceLength - 1)) && isAlefMaksouraChar(dest[i+1] )) {
1394 0 : dest[i] = SPACE_CHAR;
1395 0 : dest[i+1] = YEH_HAMZA_CHAR;
1396 0 : } else if ( (seenComposeEnabled == 1) && (isTailChar(inputChar)) && (i< (sourceLength - 1))
1397 0 : && (isSeenTailFamilyChar(dest[i+1])) ) {
1398 0 : dest[i] = SPACE_CHAR;
1399 0 : } else if (( inputChar >= 0xFE70) && (inputChar <= 0xFEF4 )) { /* FExx Arabic range */
1400 0 : dest[i] = convertFEto06 [ (inputChar - 0xFE70) ];
1401 : } else {
1402 0 : dest[i] = inputChar ;
1403 : }
1404 :
1405 0 : if( isLamAlefChar(dest[i]) )
1406 0 : lamalef_found = 1;
1407 : }
1408 :
1409 0 : destSize = sourceLength;
1410 0 : if (lamalef_found != 0){
1411 0 : destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE, shapeVars);
1412 : }
1413 0 : return destSize;
1414 : }
1415 :
1416 : /*
1417 : ****************************************
1418 : * u_shapeArabic
1419 : ****************************************
1420 : */
1421 :
1422 : U_CAPI int32_t U_EXPORT2
1423 0 : u_shapeArabic(const UChar *source, int32_t sourceLength,
1424 : UChar *dest, int32_t destCapacity,
1425 : uint32_t options,
1426 : UErrorCode *pErrorCode) {
1427 :
1428 : int32_t destLength;
1429 0 : struct uShapeVariables shapeVars = { OLD_TAIL_CHAR,U_SHAPE_LAMALEF_BEGIN,U_SHAPE_LAMALEF_END,U_SHAPE_TASHKEEL_BEGIN,U_SHAPE_TASHKEEL_END,0};
1430 :
1431 : /* usual error checking */
1432 0 : if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1433 0 : return 0;
1434 : }
1435 :
1436 : /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */
1437 0 : if( source==NULL || sourceLength<-1 || (dest==NULL && destCapacity!=0) || destCapacity<0 ||
1438 0 : (((options&U_SHAPE_TASHKEEL_MASK) > 0) &&
1439 0 : ((options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) ) ||
1440 0 : (((options&U_SHAPE_TASHKEEL_MASK) > 0) &&
1441 0 : ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE)) ||
1442 0 : (options&U_SHAPE_DIGIT_TYPE_RESERVED)==U_SHAPE_DIGIT_TYPE_RESERVED ||
1443 0 : (options&U_SHAPE_DIGITS_MASK)==U_SHAPE_DIGITS_RESERVED ||
1444 0 : ((options&U_SHAPE_LAMALEF_MASK) != U_SHAPE_LAMALEF_RESIZE &&
1445 0 : (options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) != 0) ||
1446 0 : ((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) == U_SHAPE_AGGREGATE_TASHKEEL &&
1447 0 : (options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) != U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)
1448 : )
1449 : {
1450 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1451 0 : return 0;
1452 : }
1453 : /* Validate lamalef options */
1454 0 : if(((options&U_SHAPE_LAMALEF_MASK) > 0)&&
1455 0 : !(((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_BEGIN) ||
1456 0 : ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_END ) ||
1457 0 : ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE )||
1458 0 : ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_AUTO) ||
1459 0 : ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_NEAR)))
1460 : {
1461 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1462 0 : return 0;
1463 : }
1464 : /* Validate Tashkeel options */
1465 0 : if(((options&U_SHAPE_TASHKEEL_MASK) > 0)&&
1466 0 : !(((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_BEGIN) ||
1467 0 : ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_END )
1468 0 : ||((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE )||
1469 0 : ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)))
1470 : {
1471 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1472 0 : return 0;
1473 : }
1474 : /* determine the source length */
1475 0 : if(sourceLength==-1) {
1476 0 : sourceLength=u_strlen(source);
1477 : }
1478 0 : if(sourceLength<=0) {
1479 0 : return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
1480 : }
1481 :
1482 : /* check that source and destination do not overlap */
1483 0 : if( dest!=NULL &&
1484 0 : ((source<=dest && dest<source+sourceLength) ||
1485 0 : (dest<=source && source<dest+destCapacity))) {
1486 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1487 0 : return 0;
1488 : }
1489 :
1490 : /* Does Options contain the new Seen Tail Unicode code point option */
1491 0 : if ( (options&U_SHAPE_TAIL_TYPE_MASK) == U_SHAPE_TAIL_NEW_UNICODE){
1492 0 : shapeVars.tailChar = NEW_TAIL_CHAR;
1493 : }else {
1494 0 : shapeVars.tailChar = OLD_TAIL_CHAR;
1495 : }
1496 :
1497 0 : if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) {
1498 : UChar buffer[300];
1499 0 : UChar *tempbuffer, *tempsource = NULL;
1500 0 : int32_t outputSize, spacesCountl=0, spacesCountr=0;
1501 :
1502 0 : if((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK)>0) {
1503 0 : int32_t logical_order = (options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL;
1504 : int32_t aggregate_tashkeel =
1505 0 : (options&(U_SHAPE_AGGREGATE_TASHKEEL_MASK+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)) ==
1506 0 : (U_SHAPE_AGGREGATE_TASHKEEL+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED);
1507 0 : int step=logical_order?1:-1;
1508 0 : int j=logical_order?-1:2*sourceLength;
1509 0 : int i=logical_order?-1:sourceLength;
1510 0 : int end=logical_order?sourceLength:-1;
1511 0 : int aggregation_possible = 1;
1512 0 : UChar prev = 0;
1513 0 : UChar prevLink, currLink = 0;
1514 0 : int newSourceLength = 0;
1515 0 : tempsource = (UChar *)uprv_malloc(2*sourceLength*U_SIZEOF_UCHAR);
1516 0 : if(tempsource == NULL) {
1517 0 : *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1518 0 : return 0;
1519 : }
1520 :
1521 0 : while ((i+=step) != end) {
1522 0 : prevLink = currLink;
1523 0 : currLink = getLink(source[i]);
1524 0 : if (aggregate_tashkeel && ((prevLink|currLink)&COMBINE) == COMBINE && aggregation_possible) {
1525 0 : aggregation_possible = 0;
1526 0 : tempsource[j] = (prev<source[i]?prev:source[i])-0x064C+0xFC5E;
1527 0 : currLink = getLink(tempsource[j]);
1528 : } else {
1529 0 : aggregation_possible = 1;
1530 0 : tempsource[j+=step] = source[i];
1531 0 : prev = source[i];
1532 0 : newSourceLength++;
1533 : }
1534 : }
1535 0 : source = tempsource+(logical_order?0:j);
1536 0 : sourceLength = newSourceLength;
1537 : }
1538 :
1539 : /* calculate destination size */
1540 : /* TODO: do we ever need to do this pure preflighting? */
1541 0 : if(((options&U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE) ||
1542 0 : ((options&U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE)) {
1543 0 : outputSize=calculateSize(source,sourceLength,destCapacity,options);
1544 : } else {
1545 0 : outputSize=sourceLength;
1546 : }
1547 :
1548 0 : if(outputSize>destCapacity) {
1549 0 : *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1550 0 : if (tempsource != NULL) uprv_free(tempsource);
1551 0 : return outputSize;
1552 : }
1553 :
1554 : /*
1555 : * need a temporary buffer of size max(outputSize, sourceLength)
1556 : * because at first we copy source->temp
1557 : */
1558 0 : if(sourceLength>outputSize) {
1559 0 : outputSize=sourceLength;
1560 : }
1561 :
1562 : /* Start of Arabic letter shaping part */
1563 0 : if(outputSize<=UPRV_LENGTHOF(buffer)) {
1564 0 : outputSize=UPRV_LENGTHOF(buffer);
1565 0 : tempbuffer=buffer;
1566 : } else {
1567 0 : tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR);
1568 :
1569 : /*Test for NULL*/
1570 0 : if(tempbuffer == NULL) {
1571 0 : *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1572 0 : if (tempsource != NULL) uprv_free(tempsource);
1573 0 : return 0;
1574 : }
1575 : }
1576 0 : u_memcpy(tempbuffer, source, sourceLength);
1577 0 : if (tempsource != NULL){
1578 0 : uprv_free(tempsource);
1579 : }
1580 :
1581 0 : if(sourceLength<outputSize) {
1582 0 : uprv_memset(tempbuffer+sourceLength, 0, (outputSize-sourceLength)*U_SIZEOF_UCHAR);
1583 : }
1584 :
1585 0 : if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) {
1586 0 : countSpaces(tempbuffer,sourceLength,options,&spacesCountl,&spacesCountr);
1587 0 : invertBuffer(tempbuffer,sourceLength,options,spacesCountl,spacesCountr);
1588 : }
1589 :
1590 0 : if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) {
1591 0 : if((options&U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK) == U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END) {
1592 0 : shapeVars.spacesRelativeToTextBeginEnd = 1;
1593 0 : shapeVars.uShapeLamalefBegin = U_SHAPE_LAMALEF_END;
1594 0 : shapeVars.uShapeLamalefEnd = U_SHAPE_LAMALEF_BEGIN;
1595 0 : shapeVars.uShapeTashkeelBegin = U_SHAPE_TASHKEEL_END;
1596 0 : shapeVars.uShapeTashkeelEnd = U_SHAPE_TASHKEEL_BEGIN;
1597 : }
1598 : }
1599 :
1600 0 : switch(options&U_SHAPE_LETTERS_MASK) {
1601 : case U_SHAPE_LETTERS_SHAPE :
1602 0 : if( (options&U_SHAPE_TASHKEEL_MASK)> 0
1603 0 : && ((options&U_SHAPE_TASHKEEL_MASK) !=U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)) {
1604 : /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */
1605 0 : destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2,shapeVars);
1606 : }else {
1607 : /* default Call the shaping function with tashkeel flag == 1 */
1608 0 : destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1,shapeVars);
1609 :
1610 : /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/
1611 0 : if( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL){
1612 0 : destLength = handleTashkeelWithTatweel(tempbuffer,destLength,destCapacity,options,pErrorCode);
1613 : }
1614 : }
1615 0 : break;
1616 : case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED :
1617 : /* Call the shaping function with tashkeel flag == 0 */
1618 0 : destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0,shapeVars);
1619 0 : break;
1620 :
1621 : case U_SHAPE_LETTERS_UNSHAPE :
1622 : /* Call the deshaping function */
1623 0 : destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,shapeVars);
1624 0 : break;
1625 : default :
1626 : /* will never occur because of validity checks above */
1627 0 : destLength = 0;
1628 0 : break;
1629 : }
1630 :
1631 : /*
1632 : * TODO: (markus 2002aug01)
1633 : * For as long as we always preflight the outputSize above
1634 : * we should U_ASSERT(outputSize==destLength)
1635 : * except for the adjustment above before the tempbuffer allocation
1636 : */
1637 :
1638 0 : if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) {
1639 0 : countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr);
1640 0 : invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr);
1641 : }
1642 0 : u_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity));
1643 :
1644 0 : if(tempbuffer!=buffer) {
1645 0 : uprv_free(tempbuffer);
1646 : }
1647 :
1648 0 : if(destLength>destCapacity) {
1649 0 : *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1650 0 : return destLength;
1651 : }
1652 :
1653 : /* End of Arabic letter shaping part */
1654 : } else {
1655 : /*
1656 : * No letter shaping:
1657 : * just make sure the destination is large enough and copy the string.
1658 : */
1659 0 : if(destCapacity<sourceLength) {
1660 : /* this catches preflighting, too */
1661 0 : *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1662 0 : return sourceLength;
1663 : }
1664 0 : u_memcpy(dest, source, sourceLength);
1665 0 : destLength=sourceLength;
1666 : }
1667 :
1668 : /*
1669 : * Perform number shaping.
1670 : * With UTF-16 or UTF-32, the length of the string is constant.
1671 : * The easiest way to do this is to operate on the destination and
1672 : * "shape" the digits in-place.
1673 : */
1674 0 : if((options&U_SHAPE_DIGITS_MASK)!=U_SHAPE_DIGITS_NOOP) {
1675 : UChar digitBase;
1676 : int32_t i;
1677 :
1678 : /* select the requested digit group */
1679 0 : switch(options&U_SHAPE_DIGIT_TYPE_MASK) {
1680 : case U_SHAPE_DIGIT_TYPE_AN:
1681 0 : digitBase=0x660; /* Unicode: "Arabic-Indic digits" */
1682 0 : break;
1683 : case U_SHAPE_DIGIT_TYPE_AN_EXTENDED:
1684 0 : digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */
1685 0 : break;
1686 : default:
1687 : /* will never occur because of validity checks above */
1688 0 : digitBase=0;
1689 0 : break;
1690 : }
1691 :
1692 : /* perform the requested operation */
1693 0 : switch(options&U_SHAPE_DIGITS_MASK) {
1694 : case U_SHAPE_DIGITS_EN2AN:
1695 : /* add (digitBase-'0') to each European (ASCII) digit code point */
1696 0 : digitBase-=0x30;
1697 0 : for(i=0; i<destLength; ++i) {
1698 0 : if(((uint32_t)dest[i]-0x30)<10) {
1699 0 : dest[i]+=digitBase;
1700 : }
1701 : }
1702 0 : break;
1703 : case U_SHAPE_DIGITS_AN2EN:
1704 : /* subtract (digitBase-'0') from each Arabic digit code point */
1705 0 : for(i=0; i<destLength; ++i) {
1706 0 : if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) {
1707 0 : dest[i]-=digitBase-0x30;
1708 : }
1709 : }
1710 0 : break;
1711 : case U_SHAPE_DIGITS_ALEN2AN_INIT_LR:
1712 0 : _shapeToArabicDigitsWithContext(dest, destLength,
1713 : digitBase,
1714 0 : (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
1715 0 : FALSE);
1716 0 : break;
1717 : case U_SHAPE_DIGITS_ALEN2AN_INIT_AL:
1718 0 : _shapeToArabicDigitsWithContext(dest, destLength,
1719 : digitBase,
1720 0 : (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
1721 0 : TRUE);
1722 0 : break;
1723 : default:
1724 : /* will never occur because of validity checks above */
1725 0 : break;
1726 : }
1727 : }
1728 :
1729 0 : return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
1730 : }
|