Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : #ifndef nsBidiUtils_h__
7 : #define nsBidiUtils_h__
8 :
9 : #include "nsStringGlue.h"
10 :
11 : /**
12 : * Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt
13 : * section BIDIRECTIONAL PROPERTIES
14 : * for the detailed definition of the following categories
15 : *
16 : * The values here must match the equivalents in %bidicategorycode in
17 : * mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl,
18 : * and must also match the values used by ICU's UCharDirection.
19 : */
20 :
21 : enum nsCharType {
22 : eCharType_LeftToRight = 0,
23 : eCharType_RightToLeft = 1,
24 : eCharType_EuropeanNumber = 2,
25 : eCharType_EuropeanNumberSeparator = 3,
26 : eCharType_EuropeanNumberTerminator = 4,
27 : eCharType_ArabicNumber = 5,
28 : eCharType_CommonNumberSeparator = 6,
29 : eCharType_BlockSeparator = 7,
30 : eCharType_SegmentSeparator = 8,
31 : eCharType_WhiteSpaceNeutral = 9,
32 : eCharType_OtherNeutral = 10,
33 : eCharType_LeftToRightEmbedding = 11,
34 : eCharType_LeftToRightOverride = 12,
35 : eCharType_RightToLeftArabic = 13,
36 : eCharType_RightToLeftEmbedding = 14,
37 : eCharType_RightToLeftOverride = 15,
38 : eCharType_PopDirectionalFormat = 16,
39 : eCharType_DirNonSpacingMark = 17,
40 : eCharType_BoundaryNeutral = 18,
41 : eCharType_FirstStrongIsolate = 19,
42 : eCharType_LeftToRightIsolate = 20,
43 : eCharType_RightToLeftIsolate = 21,
44 : eCharType_PopDirectionalIsolate = 22,
45 : eCharType_CharTypeCount
46 : };
47 :
48 : /**
49 : * This specifies the language directional property of a character set.
50 : */
51 : typedef enum nsCharType nsCharType;
52 :
53 : /**
54 : * Find the direction of an embedding level or paragraph level set by
55 : * the Unicode Bidi Algorithm. (Even levels are left-to-right, odd
56 : * levels right-to-left.
57 : */
58 : #define IS_LEVEL_RTL(level) (((level) & 1) == 1)
59 :
60 : /**
61 : * Check whether two bidi levels have the same parity and thus the same
62 : * directionality
63 : */
64 : #define IS_SAME_DIRECTION(level1, level2) (((level1 ^ level2) & 1) == 0)
65 :
66 : /**
67 : * Convert from nsBidiLevel to nsBidiDirection
68 : */
69 : #define DIRECTION_FROM_LEVEL(level) ((IS_LEVEL_RTL(level)) \
70 : ? NSBIDI_RTL : NSBIDI_LTR)
71 :
72 : /**
73 : * definitions of bidirection character types by category
74 : */
75 :
76 : #define CHARTYPE_IS_RTL(val) ( ( (val) == eCharType_RightToLeft) || ( (val) == eCharType_RightToLeftArabic) )
77 :
78 : #define CHARTYPE_IS_WEAK(val) ( ( (val) == eCharType_EuropeanNumberSeparator) \
79 : || ( (val) == eCharType_EuropeanNumberTerminator) \
80 : || ( ( (val) > eCharType_ArabicNumber) && ( (val) != eCharType_RightToLeftArabic) ) )
81 :
82 : /**
83 : * Inspects a Unichar, converting numbers to Arabic or Hindi forms and returning them
84 : * @param aChar is the character
85 : * @param aPrevCharArabic is true if the previous character in the string is an Arabic char
86 : * @param aNumFlag specifies the conversion to perform:
87 : * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion
88 : * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669)
89 : * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039)
90 : * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
91 : * @return the converted Unichar
92 : */
93 : char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic, uint32_t aNumFlag);
94 :
95 : /**
96 : * Scan a Unichar string, converting numbers to Arabic or Hindi forms in place
97 : * @param aBuffer is the string
98 : * @param aSize is the size of aBuffer
99 : * @param aNumFlag specifies the conversion to perform:
100 : * IBMBIDI_NUMERAL_NOMINAL: don't do any conversion
101 : * IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669)
102 : * IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039)
103 : * IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
104 : */
105 : nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t aNumFlag);
106 :
107 : /**
108 : * Give a UTF-32 codepoint
109 : * return true if the codepoint is a Bidi control character (LRM, RLM, ALM;
110 : * LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI).
111 : * Return false, otherwise
112 : */
113 : #define LRM_CHAR 0x200e
114 : #define RLM_CHAR 0x200f
115 :
116 : #define LRE_CHAR 0x202a
117 : #define RLE_CHAR 0x202b
118 : #define PDF_CHAR 0x202c
119 : #define LRO_CHAR 0x202d
120 : #define RLO_CHAR 0x202e
121 :
122 : #define LRI_CHAR 0x2066
123 : #define RLI_CHAR 0x2067
124 : #define FSI_CHAR 0x2068
125 : #define PDI_CHAR 0x2069
126 :
127 : #define ALM_CHAR 0x061C
128 9 : inline bool IsBidiControl(uint32_t aChar) {
129 0 : return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) ||
130 0 : (LRI_CHAR <= aChar && aChar <= PDI_CHAR) ||
131 18 : (aChar == ALM_CHAR) ||
132 18 : (aChar & 0xfffffe) == LRM_CHAR);
133 : }
134 :
135 : /**
136 : * Give a UTF-32 codepoint
137 : * Return true if the codepoint is a Bidi control character that may result
138 : * in RTL directionality and therefore needs to trigger bidi resolution;
139 : * return false otherwise.
140 : */
141 282 : inline bool IsBidiControlRTL(uint32_t aChar) {
142 282 : return aChar == RLM_CHAR ||
143 282 : aChar == RLE_CHAR ||
144 282 : aChar == RLO_CHAR ||
145 564 : aChar == RLI_CHAR ||
146 282 : aChar == ALM_CHAR;
147 : }
148 :
149 : /**
150 : * Give a 16-bit (UTF-16) text buffer and length
151 : * @return true if the string contains right-to-left characters
152 : */
153 : bool HasRTLChars(const char16_t* aText, uint32_t aLength);
154 :
155 : /**
156 : * Convenience function to call the above on an nsAString.
157 : */
158 19 : inline bool HasRTLChars(const nsAString& aString) {
159 19 : return HasRTLChars(aString.BeginReading(), aString.Length());
160 : }
161 :
162 : // These values are shared with Preferences dialog
163 : // ------------------
164 : // If Pref values are to be changed
165 : // in the XUL file of Prefs. the values
166 : // Must be changed here too..
167 : // ------------------
168 : //
169 : #define IBMBIDI_TEXTDIRECTION_STR "bidi.direction"
170 : #define IBMBIDI_TEXTTYPE_STR "bidi.texttype"
171 : #define IBMBIDI_NUMERAL_STR "bidi.numeral"
172 :
173 : // ------------------
174 : // Text Direction
175 : // ------------------
176 : // bidi.direction
177 : #define IBMBIDI_TEXTDIRECTION_LTR 1 // 1 = directionLTRBidi *
178 : #define IBMBIDI_TEXTDIRECTION_RTL 2 // 2 = directionRTLBidi
179 : // ------------------
180 : // Text Type
181 : // ------------------
182 : // bidi.texttype
183 : #define IBMBIDI_TEXTTYPE_CHARSET 1 // 1 = charsettexttypeBidi *
184 : #define IBMBIDI_TEXTTYPE_LOGICAL 2 // 2 = logicaltexttypeBidi
185 : #define IBMBIDI_TEXTTYPE_VISUAL 3 // 3 = visualtexttypeBidi
186 : // ------------------
187 : // Numeral Style
188 : // ------------------
189 : // bidi.numeral
190 : #define IBMBIDI_NUMERAL_NOMINAL 0 // 0 = nominalnumeralBidi *
191 : #define IBMBIDI_NUMERAL_REGULAR 1 // 1 = regularcontextnumeralBidi
192 : #define IBMBIDI_NUMERAL_HINDICONTEXT 2 // 2 = hindicontextnumeralBidi
193 : #define IBMBIDI_NUMERAL_ARABIC 3 // 3 = arabicnumeralBidi
194 : #define IBMBIDI_NUMERAL_HINDI 4 // 4 = hindinumeralBidi
195 : #define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi
196 : #define IBMBIDI_NUMERAL_PERSIAN 6 // 6 = persiannumeralBidi
197 :
198 : #define IBMBIDI_DEFAULT_BIDI_OPTIONS \
199 : ((IBMBIDI_TEXTDIRECTION_LTR<<0) | \
200 : (IBMBIDI_TEXTTYPE_CHARSET<<4) | \
201 : (IBMBIDI_NUMERAL_NOMINAL<<8))
202 :
203 : #define GET_BIDI_OPTION_DIRECTION(bo) (((bo)>>0) & 0x0000000F) /* 4 bits for DIRECTION */
204 : #define GET_BIDI_OPTION_TEXTTYPE(bo) (((bo)>>4) & 0x0000000F) /* 4 bits for TEXTTYPE */
205 : #define GET_BIDI_OPTION_NUMERAL(bo) (((bo)>>8) & 0x0000000F) /* 4 bits for NUMERAL */
206 :
207 : #define SET_BIDI_OPTION_DIRECTION(bo, dir) {(bo)=((bo) & 0xFFFFFFF0)|(((dir)& 0x0000000F)<<0);}
208 : #define SET_BIDI_OPTION_TEXTTYPE(bo, tt) {(bo)=((bo) & 0xFFFFFF0F)|(((tt)& 0x0000000F)<<4);}
209 : #define SET_BIDI_OPTION_NUMERAL(bo, num) {(bo)=((bo) & 0xFFFFF0FF)|(((num)& 0x0000000F)<<8);}
210 :
211 : /* Constants related to the position of numerics in the codepage */
212 : #define START_HINDI_DIGITS 0x0660
213 : #define END_HINDI_DIGITS 0x0669
214 : #define START_ARABIC_DIGITS 0x0030
215 : #define END_ARABIC_DIGITS 0x0039
216 : #define START_FARSI_DIGITS 0x06f0
217 : #define END_FARSI_DIGITS 0x06f9
218 : #define IS_HINDI_DIGIT(u) ( ( (u) >= START_HINDI_DIGITS ) && ( (u) <= END_HINDI_DIGITS ) )
219 : #define IS_ARABIC_DIGIT(u) ( ( (u) >= START_ARABIC_DIGITS ) && ( (u) <= END_ARABIC_DIGITS ) )
220 : #define IS_FARSI_DIGIT(u) ( ( (u) >= START_FARSI_DIGITS ) && ( (u) <= END_FARSI_DIGITS ) )
221 : /**
222 : * Arabic numeric separator and numeric formatting characters:
223 : * U+0600;ARABIC NUMBER SIGN
224 : * U+0601;ARABIC SIGN SANAH
225 : * U+0602;ARABIC FOOTNOTE MARKER
226 : * U+0603;ARABIC SIGN SAFHA
227 : * U+066A;ARABIC PERCENT SIGN
228 : * U+066B;ARABIC DECIMAL SEPARATOR
229 : * U+066C;ARABIC THOUSANDS SEPARATOR
230 : * U+06DD;ARABIC END OF AYAH
231 : */
232 : #define IS_ARABIC_SEPARATOR(u) ( ( /*(u) >= 0x0600 &&*/ (u) <= 0x0603 ) || \
233 : ( (u) >= 0x066A && (u) <= 0x066C ) || \
234 : ( (u) == 0x06DD ) )
235 :
236 : #define IS_BIDI_DIACRITIC(u) ( \
237 : ( (u) >= 0x0591 && (u) <= 0x05A1) || ( (u) >= 0x05A3 && (u) <= 0x05B9) \
238 : || ( (u) >= 0x05BB && (u) <= 0x05BD) || ( (u) == 0x05BF) || ( (u) == 0x05C1) \
239 : || ( (u) == 0x05C2) || ( (u) == 0x05C4) \
240 : || ( (u) >= 0x064B && (u) <= 0x0652) || ( (u) == 0x0670) \
241 : || ( (u) >= 0x06D7 && (u) <= 0x06E4) || ( (u) == 0x06E7) || ( (u) == 0x06E8) \
242 : || ( (u) >= 0x06EA && (u) <= 0x06ED) )
243 :
244 : #define IS_HEBREW_CHAR(c) (((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f)))
245 : #define IS_ARABIC_CHAR(c) ( (0x0600 <= (c) && (c) <= 0x08FF) && \
246 : ( (c) <= 0x06ff || \
247 : ((c) >= 0x0750 && (c) <= 0x077f) || \
248 : (c) >= 0x08a0 ) )
249 : #define IS_ARABIC_ALPHABETIC(c) (IS_ARABIC_CHAR(c) && \
250 : !(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c)))
251 :
252 : /**
253 : * The codepoint ranges in the following macros are based on the blocks
254 : * allocated, or planned to be allocated, to right-to-left characters in the
255 : * BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane)
256 : * according to
257 : * http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt and
258 : * http://www.unicode.org/roadmaps/
259 : */
260 :
261 : #define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff))
262 : #define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \
263 : ((0xfe70 <= (c)) && ((c) <= 0xfefc)))
264 : #define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \
265 : ((0x1e800 <= (c)) && ((c) <= 0x1eFFF)))
266 : #define UCS2_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
267 : (IS_RTL_PRESENTATION_FORM(c)) || \
268 : (c) == 0xD802 || (c) == 0xD803)
269 : #define UTF32_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
270 : (IS_RTL_PRESENTATION_FORM(c)) || \
271 : (IS_IN_SMP_RTL_BLOCK(c)))
272 : #endif /* nsBidiUtils_h__ */
|