LCOV - code coverage report
Current view: top level - intl/unicharutil/util - nsBidiUtils.h (source / functions) Hit Total Coverage
Test: output.info Lines: 11 13 84.6 %
Date: 2017-07-14 16:53:18 Functions: 3 3 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       5             : 
       6             : #ifndef nsBidiUtils_h__
       7             : #define nsBidiUtils_h__
       8             : 
       9             : #include "nsStringGlue.h"
      10             : 
      11             :    /**
      12             :     *  Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt
      13             :     *  section BIDIRECTIONAL PROPERTIES
      14             :     *  for the detailed definition of the following categories
      15             :     *
      16             :     *  The values here must match the equivalents in %bidicategorycode in
      17             :     *  mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl,
      18             :     *  and must also match the values used by ICU's UCharDirection.
      19             :     */
      20             : 
      21             : enum nsCharType   {
      22             :   eCharType_LeftToRight              = 0,
      23             :   eCharType_RightToLeft              = 1,
      24             :   eCharType_EuropeanNumber           = 2,
      25             :   eCharType_EuropeanNumberSeparator  = 3,
      26             :   eCharType_EuropeanNumberTerminator = 4,
      27             :   eCharType_ArabicNumber             = 5,
      28             :   eCharType_CommonNumberSeparator    = 6,
      29             :   eCharType_BlockSeparator           = 7,
      30             :   eCharType_SegmentSeparator         = 8,
      31             :   eCharType_WhiteSpaceNeutral        = 9,
      32             :   eCharType_OtherNeutral             = 10,
      33             :   eCharType_LeftToRightEmbedding     = 11,
      34             :   eCharType_LeftToRightOverride      = 12,
      35             :   eCharType_RightToLeftArabic        = 13,
      36             :   eCharType_RightToLeftEmbedding     = 14,
      37             :   eCharType_RightToLeftOverride      = 15,
      38             :   eCharType_PopDirectionalFormat     = 16,
      39             :   eCharType_DirNonSpacingMark        = 17,
      40             :   eCharType_BoundaryNeutral          = 18,
      41             :   eCharType_FirstStrongIsolate       = 19,
      42             :   eCharType_LeftToRightIsolate       = 20,
      43             :   eCharType_RightToLeftIsolate       = 21,
      44             :   eCharType_PopDirectionalIsolate    = 22,
      45             :   eCharType_CharTypeCount
      46             : };
      47             : 
      48             : /**
      49             :  * This specifies the language directional property of a character set.
      50             :  */
      51             : typedef enum nsCharType nsCharType;
      52             : 
      53             : /**
      54             :  * Find the direction of an embedding level or paragraph level set by
      55             :  * the Unicode Bidi Algorithm. (Even levels are left-to-right, odd
      56             :  * levels right-to-left.
      57             :  */
      58             : #define IS_LEVEL_RTL(level) (((level) & 1) == 1)
      59             : 
      60             : /**
      61             :  * Check whether two bidi levels have the same parity and thus the same
      62             :  * directionality
      63             :  */
      64             : #define IS_SAME_DIRECTION(level1, level2) (((level1 ^ level2) & 1) == 0)
      65             : 
      66             : /**
      67             :  * Convert from nsBidiLevel to nsBidiDirection
      68             :  */
      69             : #define DIRECTION_FROM_LEVEL(level) ((IS_LEVEL_RTL(level)) \
      70             :    ? NSBIDI_RTL : NSBIDI_LTR)
      71             : 
      72             : /**
      73             :  * definitions of bidirection character types by category
      74             :  */
      75             : 
      76             : #define CHARTYPE_IS_RTL(val) ( ( (val) == eCharType_RightToLeft) || ( (val) == eCharType_RightToLeftArabic) )
      77             : 
      78             : #define CHARTYPE_IS_WEAK(val) ( ( (val) == eCharType_EuropeanNumberSeparator)    \
      79             :                            || ( (val) == eCharType_EuropeanNumberTerminator) \
      80             :                            || ( ( (val) > eCharType_ArabicNumber) && ( (val) != eCharType_RightToLeftArabic) ) )
      81             : 
      82             :   /**
      83             :    * Inspects a Unichar, converting numbers to Arabic or Hindi forms and returning them
      84             :    * @param aChar is the character
      85             :    * @param aPrevCharArabic is true if the previous character in the string is an Arabic char
      86             :    * @param aNumFlag specifies the conversion to perform:
      87             :    *        IBMBIDI_NUMERAL_NOMINAL:      don't do any conversion
      88             :    *        IBMBIDI_NUMERAL_HINDI:        convert to Hindi forms (Unicode 0660-0669)
      89             :    *        IBMBIDI_NUMERAL_ARABIC:       convert to Arabic forms (Unicode 0030-0039)
      90             :    *        IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
      91             :    * @return the converted Unichar
      92             :    */
      93             :   char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic, uint32_t aNumFlag);
      94             : 
      95             :   /**
      96             :    * Scan a Unichar string, converting numbers to Arabic or Hindi forms in place
      97             :    * @param aBuffer is the string
      98             :    * @param aSize is the size of aBuffer
      99             :    * @param aNumFlag specifies the conversion to perform:
     100             :    *        IBMBIDI_NUMERAL_NOMINAL:      don't do any conversion
     101             :    *        IBMBIDI_NUMERAL_HINDI:        convert to Hindi forms (Unicode 0660-0669)
     102             :    *        IBMBIDI_NUMERAL_ARABIC:       convert to Arabic forms (Unicode 0030-0039)
     103             :    *        IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
     104             :    */
     105             :   nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t  aNumFlag);
     106             : 
     107             :   /**
     108             :    * Give a UTF-32 codepoint
     109             :    * return true if the codepoint is a Bidi control character (LRM, RLM, ALM;
     110             :    * LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI).
     111             :    * Return false, otherwise
     112             :    */
     113             : #define LRM_CHAR 0x200e
     114             : #define RLM_CHAR 0x200f
     115             : 
     116             : #define LRE_CHAR 0x202a
     117             : #define RLE_CHAR 0x202b
     118             : #define PDF_CHAR 0x202c
     119             : #define LRO_CHAR 0x202d
     120             : #define RLO_CHAR 0x202e
     121             : 
     122             : #define LRI_CHAR 0x2066
     123             : #define RLI_CHAR 0x2067
     124             : #define FSI_CHAR 0x2068
     125             : #define PDI_CHAR 0x2069
     126             : 
     127             : #define ALM_CHAR 0x061C
     128           9 :    inline bool IsBidiControl(uint32_t aChar) {
     129           0 :      return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) ||
     130           0 :              (LRI_CHAR <= aChar && aChar <= PDI_CHAR) ||
     131          18 :              (aChar == ALM_CHAR) ||
     132          18 :              (aChar & 0xfffffe) == LRM_CHAR);
     133             :    }
     134             : 
     135             :   /**
     136             :    * Give a UTF-32 codepoint
     137             :    * Return true if the codepoint is a Bidi control character that may result
     138             :    * in RTL directionality and therefore needs to trigger bidi resolution;
     139             :    * return false otherwise.
     140             :    */
     141         282 :    inline bool IsBidiControlRTL(uint32_t aChar) {
     142         282 :      return aChar == RLM_CHAR ||
     143         282 :             aChar == RLE_CHAR ||
     144         282 :             aChar == RLO_CHAR ||
     145         564 :             aChar == RLI_CHAR ||
     146         282 :             aChar == ALM_CHAR;
     147             :    }
     148             : 
     149             :   /**
     150             :    * Give a 16-bit (UTF-16) text buffer and length
     151             :    * @return true if the string contains right-to-left characters
     152             :    */
     153             :    bool HasRTLChars(const char16_t* aText, uint32_t aLength);
     154             : 
     155             :   /**
     156             :    * Convenience function to call the above on an nsAString.
     157             :    */
     158          19 :    inline bool HasRTLChars(const nsAString& aString) {
     159          19 :      return HasRTLChars(aString.BeginReading(), aString.Length());
     160             :    }
     161             : 
     162             : // These values are shared with Preferences dialog
     163             : //  ------------------
     164             : //  If Pref values are to be changed
     165             : //  in the XUL file of Prefs. the values
     166             : //  Must be changed here too..
     167             : //  ------------------
     168             : //
     169             : #define IBMBIDI_TEXTDIRECTION_STR       "bidi.direction"
     170             : #define IBMBIDI_TEXTTYPE_STR            "bidi.texttype"
     171             : #define IBMBIDI_NUMERAL_STR             "bidi.numeral"
     172             : 
     173             : //  ------------------
     174             : //  Text Direction
     175             : //  ------------------
     176             : //  bidi.direction
     177             : #define IBMBIDI_TEXTDIRECTION_LTR     1 //  1 = directionLTRBidi *
     178             : #define IBMBIDI_TEXTDIRECTION_RTL     2 //  2 = directionRTLBidi
     179             : //  ------------------
     180             : //  Text Type
     181             : //  ------------------
     182             : //  bidi.texttype
     183             : #define IBMBIDI_TEXTTYPE_CHARSET      1 //  1 = charsettexttypeBidi *
     184             : #define IBMBIDI_TEXTTYPE_LOGICAL      2 //  2 = logicaltexttypeBidi
     185             : #define IBMBIDI_TEXTTYPE_VISUAL       3 //  3 = visualtexttypeBidi
     186             : //  ------------------
     187             : //  Numeral Style
     188             : //  ------------------
     189             : //  bidi.numeral
     190             : #define IBMBIDI_NUMERAL_NOMINAL       0 //  0 = nominalnumeralBidi *
     191             : #define IBMBIDI_NUMERAL_REGULAR       1 //  1 = regularcontextnumeralBidi
     192             : #define IBMBIDI_NUMERAL_HINDICONTEXT  2 //  2 = hindicontextnumeralBidi
     193             : #define IBMBIDI_NUMERAL_ARABIC        3 //  3 = arabicnumeralBidi
     194             : #define IBMBIDI_NUMERAL_HINDI         4 //  4 = hindinumeralBidi
     195             : #define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi
     196             : #define IBMBIDI_NUMERAL_PERSIAN       6 //  6 = persiannumeralBidi
     197             : 
     198             : #define IBMBIDI_DEFAULT_BIDI_OPTIONS              \
     199             :         ((IBMBIDI_TEXTDIRECTION_LTR<<0)         | \
     200             :          (IBMBIDI_TEXTTYPE_CHARSET<<4)          | \
     201             :          (IBMBIDI_NUMERAL_NOMINAL<<8))
     202             : 
     203             : #define GET_BIDI_OPTION_DIRECTION(bo) (((bo)>>0) & 0x0000000F) /* 4 bits for DIRECTION */
     204             : #define GET_BIDI_OPTION_TEXTTYPE(bo) (((bo)>>4) & 0x0000000F) /* 4 bits for TEXTTYPE */
     205             : #define GET_BIDI_OPTION_NUMERAL(bo) (((bo)>>8) & 0x0000000F) /* 4 bits for NUMERAL */
     206             : 
     207             : #define SET_BIDI_OPTION_DIRECTION(bo, dir) {(bo)=((bo) & 0xFFFFFFF0)|(((dir)& 0x0000000F)<<0);}
     208             : #define SET_BIDI_OPTION_TEXTTYPE(bo, tt) {(bo)=((bo) & 0xFFFFFF0F)|(((tt)& 0x0000000F)<<4);}
     209             : #define SET_BIDI_OPTION_NUMERAL(bo, num) {(bo)=((bo) & 0xFFFFF0FF)|(((num)& 0x0000000F)<<8);}
     210             : 
     211             : /* Constants related to the position of numerics in the codepage */
     212             : #define START_HINDI_DIGITS              0x0660
     213             : #define END_HINDI_DIGITS                0x0669
     214             : #define START_ARABIC_DIGITS             0x0030
     215             : #define END_ARABIC_DIGITS               0x0039
     216             : #define START_FARSI_DIGITS              0x06f0
     217             : #define END_FARSI_DIGITS                0x06f9
     218             : #define IS_HINDI_DIGIT(u)   ( ( (u) >= START_HINDI_DIGITS )  && ( (u) <= END_HINDI_DIGITS ) )
     219             : #define IS_ARABIC_DIGIT(u)  ( ( (u) >= START_ARABIC_DIGITS ) && ( (u) <= END_ARABIC_DIGITS ) )
     220             : #define IS_FARSI_DIGIT(u)  ( ( (u) >= START_FARSI_DIGITS ) && ( (u) <= END_FARSI_DIGITS ) )
     221             : /**
     222             :  * Arabic numeric separator and numeric formatting characters:
     223             :  *  U+0600;ARABIC NUMBER SIGN
     224             :  *  U+0601;ARABIC SIGN SANAH
     225             :  *  U+0602;ARABIC FOOTNOTE MARKER
     226             :  *  U+0603;ARABIC SIGN SAFHA
     227             :  *  U+066A;ARABIC PERCENT SIGN
     228             :  *  U+066B;ARABIC DECIMAL SEPARATOR
     229             :  *  U+066C;ARABIC THOUSANDS SEPARATOR
     230             :  *  U+06DD;ARABIC END OF AYAH
     231             :  */
     232             : #define IS_ARABIC_SEPARATOR(u) ( ( /*(u) >= 0x0600 &&*/ (u) <= 0x0603 ) || \
     233             :                                  ( (u) >= 0x066A && (u) <= 0x066C ) || \
     234             :                                  ( (u) == 0x06DD ) )
     235             : 
     236             : #define IS_BIDI_DIACRITIC(u) ( \
     237             :   ( (u) >= 0x0591 && (u) <= 0x05A1) || ( (u) >= 0x05A3 && (u) <= 0x05B9) \
     238             :     || ( (u) >= 0x05BB && (u) <= 0x05BD) || ( (u) == 0x05BF) || ( (u) == 0x05C1) \
     239             :     || ( (u) == 0x05C2) || ( (u) == 0x05C4) \
     240             :     || ( (u) >= 0x064B && (u) <= 0x0652) || ( (u) == 0x0670) \
     241             :     || ( (u) >= 0x06D7 && (u) <= 0x06E4) || ( (u) == 0x06E7) || ( (u) == 0x06E8) \
     242             :     || ( (u) >= 0x06EA && (u) <= 0x06ED) )
     243             : 
     244             : #define IS_HEBREW_CHAR(c) (((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f)))
     245             : #define IS_ARABIC_CHAR(c) ( (0x0600 <= (c) && (c) <= 0x08FF) &&   \
     246             :                             ( (c) <= 0x06ff ||                    \
     247             :                               ((c) >= 0x0750 && (c) <= 0x077f) || \
     248             :                               (c) >= 0x08a0 ) )
     249             : #define IS_ARABIC_ALPHABETIC(c) (IS_ARABIC_CHAR(c) && \
     250             :                                 !(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c)))
     251             : 
     252             : /**
     253             :  * The codepoint ranges in the following macros are based on the blocks
     254             :  *  allocated, or planned to be allocated, to right-to-left characters in the
     255             :  *  BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane)
     256             :  *  according to
     257             :  *  http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt and
     258             :  *  http://www.unicode.org/roadmaps/
     259             :  */
     260             : 
     261             : #define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff))
     262             : #define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \
     263             :                                      ((0xfe70 <= (c)) && ((c) <= 0xfefc)))
     264             : #define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \
     265             :                                 ((0x1e800 <= (c)) && ((c) <= 0x1eFFF)))
     266             : #define UCS2_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
     267             :                               (IS_RTL_PRESENTATION_FORM(c)) || \
     268             :                               (c) == 0xD802 || (c) == 0xD803)
     269             : #define UTF32_CHAR_IS_BIDI(c)  ((IS_IN_BMP_RTL_BLOCK(c)) || \
     270             :                                (IS_RTL_PRESENTATION_FORM(c)) || \
     271             :                                (IS_IN_SMP_RTL_BLOCK(c)))
     272             : #endif  /* nsBidiUtils_h__ */

Generated by: LCOV version 1.13