LCOV - code coverage report
Current view: top level - intl/icu/source/common - ushape.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 665 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 26 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             :  ******************************************************************************
       5             :  *
       6             :  *   Copyright (C) 2000-2016, International Business Machines
       7             :  *   Corporation and others.  All Rights Reserved.
       8             :  *
       9             :  ******************************************************************************
      10             :  *   file name:  ushape.cpp
      11             :  *   encoding:   UTF-8
      12             :  *   tab size:   8 (not used)
      13             :  *   indentation:4
      14             :  *
      15             :  *   created on: 2000jun29
      16             :  *   created by: Markus W. Scherer
      17             :  *
      18             :  *   Arabic letter shaping implemented by Ayman Roshdy
      19             :  */
      20             : 
      21             : #include "unicode/utypes.h"
      22             : #include "unicode/uchar.h"
      23             : #include "unicode/ustring.h"
      24             : #include "unicode/ushape.h"
      25             : #include "cmemory.h"
      26             : #include "putilimp.h"
      27             : #include "ustr_imp.h"
      28             : #include "ubidi_props.h"
      29             : #include "uassert.h"
      30             : 
      31             : /*
      32             :  * This implementation is designed for 16-bit Unicode strings.
      33             :  * The main assumption is that the Arabic characters and their
      34             :  * presentation forms each fit into a single UChar.
      35             :  * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII
      36             :  * characters.
      37             :  */
      38             : 
      39             : /*
      40             :  * ### TODO in general for letter shaping:
      41             :  * - the letter shaping code is UTF-16-unaware; needs update
      42             :  *   + especially invertBuffer()?!
      43             :  * - needs to handle the "Arabic Tail" that is used in some legacy codepages
      44             :  *   as a glyph fragment of wide-glyph letters
      45             :  *   + IBM Unicode conversion tables map it to U+200B (ZWSP)
      46             :  *   + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms
      47             :  *   + Unicode 3.2 added U+FE73 ARABIC TAIL FRAGMENT
      48             :  */
      49             : 
      50             : /* definitions for Arabic letter shaping ------------------------------------ */
      51             : 
      52             : #define IRRELEVANT 4
      53             : #define LAMTYPE    16
      54             : #define ALEFTYPE   32
      55             : #define LINKR      1
      56             : #define LINKL      2
      57             : #define APRESENT   8
      58             : #define SHADDA     64
      59             : #define CSHADDA    128
      60             : #define COMBINE    (SHADDA+CSHADDA)
      61             : 
      62             : #define HAMZAFE_CHAR       0xfe80
      63             : #define HAMZA06_CHAR       0x0621
      64             : #define YEH_HAMZA_CHAR     0x0626
      65             : #define YEH_HAMZAFE_CHAR   0xFE89
      66             : #define LAMALEF_SPACE_SUB  0xFFFF
      67             : #define TASHKEEL_SPACE_SUB 0xFFFE
      68             : #define NEW_TAIL_CHAR      0xFE73
      69             : #define OLD_TAIL_CHAR      0x200B
      70             : #define LAM_CHAR           0x0644
      71             : #define SPACE_CHAR         0x0020
      72             : #define SHADDA_CHAR        0xFE7C
      73             : #define TATWEEL_CHAR       0x0640
      74             : #define SHADDA_TATWEEL_CHAR  0xFE7D
      75             : #define SHADDA06_CHAR      0x0651
      76             : 
      77             : #define SHAPE_MODE   0
      78             : #define DESHAPE_MODE 1
      79             : 
      80             : struct uShapeVariables {
      81             :      UChar tailChar;
      82             :      uint32_t uShapeLamalefBegin;
      83             :      uint32_t uShapeLamalefEnd;
      84             :      uint32_t uShapeTashkeelBegin;
      85             :      uint32_t uShapeTashkeelEnd;
      86             :      int spacesRelativeToTextBeginEnd;
      87             : };
      88             : 
      89             : static const uint8_t tailFamilyIsolatedFinal[] = {
      90             :     /* FEB1 */ 1,
      91             :     /* FEB2 */ 1,
      92             :     /* FEB3 */ 0,
      93             :     /* FEB4 */ 0,
      94             :     /* FEB5 */ 1,
      95             :     /* FEB6 */ 1,
      96             :     /* FEB7 */ 0,
      97             :     /* FEB8 */ 0,
      98             :     /* FEB9 */ 1,
      99             :     /* FEBA */ 1,
     100             :     /* FEBB */ 0,
     101             :     /* FEBC */ 0,
     102             :     /* FEBD */ 1,
     103             :     /* FEBE */ 1
     104             : };
     105             : 
     106             : static const uint8_t tashkeelMedial[] = {
     107             :     /* FE70 */ 0,
     108             :     /* FE71 */ 1,
     109             :     /* FE72 */ 0,
     110             :     /* FE73 */ 0,
     111             :     /* FE74 */ 0,
     112             :     /* FE75 */ 0,
     113             :     /* FE76 */ 0,
     114             :     /* FE77 */ 1,
     115             :     /* FE78 */ 0,
     116             :     /* FE79 */ 1,
     117             :     /* FE7A */ 0,
     118             :     /* FE7B */ 1,
     119             :     /* FE7C */ 0,
     120             :     /* FE7D */ 1,
     121             :     /* FE7E */ 0,
     122             :     /* FE7F */ 1
     123             : };
     124             : 
     125             : static const UChar yehHamzaToYeh[] =
     126             : {
     127             : /* isolated*/ 0xFEEF,
     128             : /* final   */ 0xFEF0
     129             : };
     130             : 
     131             : static const uint8_t IrrelevantPos[] = {
     132             :     0x0, 0x2, 0x4, 0x6,
     133             :     0x8, 0xA, 0xC, 0xE
     134             : };
     135             : 
     136             : 
     137             : static const UChar convertLamAlef[] =
     138             : {
     139             : /*FEF5*/    0x0622,
     140             : /*FEF6*/    0x0622,
     141             : /*FEF7*/    0x0623,
     142             : /*FEF8*/    0x0623,
     143             : /*FEF9*/    0x0625,
     144             : /*FEFA*/    0x0625,
     145             : /*FEFB*/    0x0627,
     146             : /*FEFC*/    0x0627
     147             : };
     148             : 
     149             : static const UChar araLink[178]=
     150             : {
     151             :   1           + 32 + 256 * 0x11,/*0x0622*/
     152             :   1           + 32 + 256 * 0x13,/*0x0623*/
     153             :   1                + 256 * 0x15,/*0x0624*/
     154             :   1           + 32 + 256 * 0x17,/*0x0625*/
     155             :   1 + 2            + 256 * 0x19,/*0x0626*/
     156             :   1           + 32 + 256 * 0x1D,/*0x0627*/
     157             :   1 + 2            + 256 * 0x1F,/*0x0628*/
     158             :   1                + 256 * 0x23,/*0x0629*/
     159             :   1 + 2            + 256 * 0x25,/*0x062A*/
     160             :   1 + 2            + 256 * 0x29,/*0x062B*/
     161             :   1 + 2            + 256 * 0x2D,/*0x062C*/
     162             :   1 + 2            + 256 * 0x31,/*0x062D*/
     163             :   1 + 2            + 256 * 0x35,/*0x062E*/
     164             :   1                + 256 * 0x39,/*0x062F*/
     165             :   1                + 256 * 0x3B,/*0x0630*/
     166             :   1                + 256 * 0x3D,/*0x0631*/
     167             :   1                + 256 * 0x3F,/*0x0632*/
     168             :   1 + 2            + 256 * 0x41,/*0x0633*/
     169             :   1 + 2            + 256 * 0x45,/*0x0634*/
     170             :   1 + 2            + 256 * 0x49,/*0x0635*/
     171             :   1 + 2            + 256 * 0x4D,/*0x0636*/
     172             :   1 + 2            + 256 * 0x51,/*0x0637*/
     173             :   1 + 2            + 256 * 0x55,/*0x0638*/
     174             :   1 + 2            + 256 * 0x59,/*0x0639*/
     175             :   1 + 2            + 256 * 0x5D,/*0x063A*/
     176             :   0, 0, 0, 0, 0,                /*0x063B-0x063F*/
     177             :   1 + 2,                        /*0x0640*/
     178             :   1 + 2            + 256 * 0x61,/*0x0641*/
     179             :   1 + 2            + 256 * 0x65,/*0x0642*/
     180             :   1 + 2            + 256 * 0x69,/*0x0643*/
     181             :   1 + 2       + 16 + 256 * 0x6D,/*0x0644*/
     182             :   1 + 2            + 256 * 0x71,/*0x0645*/
     183             :   1 + 2            + 256 * 0x75,/*0x0646*/
     184             :   1 + 2            + 256 * 0x79,/*0x0647*/
     185             :   1                + 256 * 0x7D,/*0x0648*/
     186             :   1                + 256 * 0x7F,/*0x0649*/
     187             :   1 + 2            + 256 * 0x81,/*0x064A*/
     188             :          4         + 256 * 1,   /*0x064B*/
     189             :          4 + 128   + 256 * 1,   /*0x064C*/
     190             :          4 + 128   + 256 * 1,   /*0x064D*/
     191             :          4 + 128   + 256 * 1,   /*0x064E*/
     192             :          4 + 128   + 256 * 1,   /*0x064F*/
     193             :          4 + 128   + 256 * 1,   /*0x0650*/
     194             :          4 + 64    + 256 * 3,   /*0x0651*/
     195             :          4         + 256 * 1,   /*0x0652*/
     196             :          4         + 256 * 7,   /*0x0653*/
     197             :          4         + 256 * 8,   /*0x0654*/
     198             :          4         + 256 * 8,   /*0x0655*/
     199             :          4         + 256 * 1,   /*0x0656*/
     200             :   0, 0, 0, 0, 0,                /*0x0657-0x065B*/
     201             :   1                + 256 * 0x85,/*0x065C*/
     202             :   1                + 256 * 0x87,/*0x065D*/
     203             :   1                + 256 * 0x89,/*0x065E*/
     204             :   1                + 256 * 0x8B,/*0x065F*/
     205             :   0, 0, 0, 0, 0,                /*0x0660-0x0664*/
     206             :   0, 0, 0, 0, 0,                /*0x0665-0x0669*/
     207             :   0, 0, 0, 0, 0, 0,             /*0x066A-0x066F*/
     208             :          4         + 256 * 6,   /*0x0670*/
     209             :   1        + 8     + 256 * 0x00,/*0x0671*/
     210             :   1            + 32,            /*0x0672*/
     211             :   1            + 32,            /*0x0673*/
     212             :   0,                            /*0x0674*/
     213             :   1            + 32,            /*0x0675*/
     214             :   1, 1,                         /*0x0676-0x0677*/
     215             :   1 + 2,                        /*0x0678*/
     216             :   1 + 2 + 8        + 256 * 0x16,/*0x0679*/
     217             :   1 + 2 + 8        + 256 * 0x0E,/*0x067A*/
     218             :   1 + 2 + 8        + 256 * 0x02,/*0x067B*/
     219             :   1+2, 1+2,                     /*0x67C-0x067D*/
     220             :   1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/
     221             :   1+2, 1+2, 1+2+8+256 * 0x2A, 1+2,           /*0x0684-0x0687*/
     222             :   1     + 8        + 256 * 0x38,/*0x0688*/
     223             :   1, 1, 1,                      /*0x0689-0x068B*/
     224             :   1     + 8        + 256 * 0x34,/*0x068C*/
     225             :   1     + 8        + 256 * 0x32,/*0x068D*/
     226             :   1     + 8        + 256 * 0x36,/*0x068E*/
     227             :   1, 1,                         /*0x068F-0x0690*/
     228             :   1     + 8        + 256 * 0x3C,/*0x0691*/
     229             :   1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1,       /*0x0692-0x0699*/
     230             :   1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/
     231             :   1+2, 1+2, 1+2, 1+2,           /*0x069A-0x06A3*/
     232             :   1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/
     233             :   1+2, 1+2, 1+2, 1+2,           /*0x06A4-0x06AD*/
     234             :   1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/
     235             :   1+2, 1+2, 1+2, 1+2,           /*0x06AE-0x06B7*/
     236             :   1+2, 1+2,                     /*0x06B8-0x06B9*/
     237             :   1     + 8        + 256 * 0x4E,/*0x06BA*/
     238             :   1 + 2 + 8        + 256 * 0x50,/*0x06BB*/
     239             :   1+2, 1+2,                     /*0x06BC-0x06BD*/
     240             :   1 + 2 + 8        + 256 * 0x5A,/*0x06BE*/
     241             :   1+2,                          /*0x06BF*/
     242             :   1     + 8        + 256 * 0x54,/*0x06C0*/
     243             :   1 + 2 + 8        + 256 * 0x56,/*0x06C1*/
     244             :   1, 1, 1,                      /*0x06C2-0x06C4*/
     245             :   1     + 8        + 256 * 0x90,/*0x06C5*/
     246             :   1     + 8        + 256 * 0x89,/*0x06C6*/
     247             :   1     + 8        + 256 * 0x87,/*0x06C7*/
     248             :   1     + 8        + 256 * 0x8B,/*0x06C8*/
     249             :   1     + 8        + 256 * 0x92,/*0x06C9*/
     250             :   1,                            /*0x06CA*/
     251             :   1     + 8        + 256 * 0x8E,/*0x06CB*/
     252             :   1 + 2 + 8        + 256 * 0xAC,/*0x06CC*/
     253             :   1,                            /*0x06CD*/
     254             :   1+2, 1+2,                     /*0x06CE-0x06CF*/
     255             :   1 + 2 + 8        + 256 * 0x94,/*0x06D0*/
     256             :   1+2,                          /*0x06D1*/
     257             :   1     + 8        + 256 * 0x5E,/*0x06D2*/
     258             :   1     + 8        + 256 * 0x60 /*0x06D3*/
     259             : };
     260             : 
     261             : static const uint8_t presALink[] = {
     262             : /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/
     263             : /*FB5*/    0,    1,    0,    0,    0,    0,    0,    1,    2,1 + 2,    0,    0,    0,    0,    0,    0,
     264             : /*FB6*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
     265             : /*FB7*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    1,    2,1 + 2,    0,    0,
     266             : /*FB8*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    1,    0,    0,    0,    1,
     267             : /*FB9*/    2,1 + 2,    0,    1,    2,1 + 2,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
     268             : /*FBA*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
     269             : /*FBB*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
     270             : /*FBC*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
     271             : /*FBD*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
     272             : /*FBE*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
     273             : /*FBF*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    1,    2,1 + 2,
     274             : /*FC0*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
     275             : /*FC1*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
     276             : /*FC2*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
     277             : /*FC3*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
     278             : /*FC4*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
     279             : /*FC5*/    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    4,    4,
     280             : /*FC6*/    4,    4,    4
     281             : };
     282             : 
     283             : static const uint8_t presBLink[]=
     284             : {
     285             : /***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/
     286             : /*FE7*/1 + 2,1 + 2,1 + 2,    0,1 + 2,    0,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,
     287             : /*FE8*/    0,    0,    1,    0,    1,    0,    1,    0,    1,    0,    1,    2,1 + 2,    0,    1,    0,
     288             : /*FE9*/    1,    2,1 + 2,    0,    1,    0,    1,    2,1 + 2,    0,    1,    2,1 + 2,    0,    1,    2,
     289             : /*FEA*/1 + 2,    0,    1,    2,1 + 2,    0,    1,    2,1 + 2,    0,    1,    0,    1,    0,    1,    0,
     290             : /*FEB*/    1,    0,    1,    2,1 + 2,    0,    1,    2,1 + 2,    0,    1,    2,1 + 2,    0,    1,    2,
     291             : /*FEC*/1 + 2,    0,    1,    2,1 + 2,    0,    1,    2,1 + 2,    0,    1,    2,1 + 2,    0,    1,    2,
     292             : /*FED*/1 + 2,    0,    1,    2,1 + 2,    0,    1,    2,1 + 2,    0,    1,    2,1 + 2,    0,    1,    2,
     293             : /*FEE*/1 + 2,    0,    1,    2,1 + 2,    0,    1,    2,1 + 2,    0,    1,    2,1 + 2,    0,    1,    0,
     294             : /*FEF*/    1,    0,    1,    2,1 + 2,    0,    1,    0,    1,    0,    1,    0,    1,    0,    0,    0
     295             : };
     296             : 
     297             : static const UChar convertFBto06[] =
     298             : {
     299             : /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
     300             : /*FB5*/   0x671, 0x671, 0x67B, 0x67B, 0x67B, 0x67B, 0x67E, 0x67E, 0x67E, 0x67E,     0,     0,     0,     0, 0x67A, 0x67A,
     301             : /*FB6*/   0x67A, 0x67A,     0,     0,     0,     0, 0x679, 0x679, 0x679, 0x679,     0,     0,     0,     0,     0,     0,
     302             : /*FB7*/       0,     0,     0,     0,     0,     0,     0,     0,     0,     0, 0x686, 0x686, 0x686, 0x686,     0,     0,
     303             : /*FB8*/       0,     0, 0x68D, 0x68D, 0x68C, 0x68C, 0x68E, 0x68E, 0x688, 0x688, 0x698, 0x698, 0x691, 0x691, 0x6A9, 0x6A9,
     304             : /*FB9*/   0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF,     0,     0,     0,     0,     0,     0,     0,     0, 0x6BA, 0x6BA,
     305             : /*FBA*/   0x6BB, 0x6BB, 0x6BB, 0x6BB, 0x6C0, 0x6C0, 0x6C1, 0x6C1, 0x6C1, 0x6C1, 0x6BE, 0x6BE, 0x6BE, 0x6BE, 0x6d2, 0x6D2,
     306             : /*FBB*/   0x6D3, 0x6D3,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
     307             : /*FBC*/       0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
     308             : /*FBD*/       0,     0,     0,     0,     0,     0,     0, 0x6C7, 0x6C7, 0x6C6, 0x6C6, 0x6C8, 0x6C8,     0, 0x6CB, 0x6CB,
     309             : /*FBE*/   0x6C5, 0x6C5, 0x6C9, 0x6C9, 0x6D0, 0x6D0, 0x6D0, 0x6D0,     0,     0,     0,     0,     0,     0,     0,     0,
     310             : /*FBF*/       0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0,     0, 0x6CC, 0x6CC, 0x6CC, 0x6CC
     311             : };
     312             : 
     313             : static const UChar convertFEto06[] =
     314             : {
     315             : /***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
     316             : /*FE7*/   0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
     317             : /*FE8*/   0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628,
     318             : /*FE9*/   0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C,
     319             : /*FEA*/   0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632,
     320             : /*FEB*/   0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636,
     321             : /*FEC*/   0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A,
     322             : /*FED*/   0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644,
     323             : /*FEE*/   0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649,
     324             : /*FEF*/   0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F
     325             : };
     326             : 
     327             : static const uint8_t shapeTable[4][4][4]=
     328             : {
     329             :   { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} },
     330             :   { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} },
     331             :   { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} },
     332             :   { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }
     333             : };
     334             : 
     335             : /*
     336             :  * This function shapes European digits to Arabic-Indic digits
     337             :  * in-place, writing over the input characters.
     338             :  * Since we know that we are only looking for BMP code points,
     339             :  * we can safely just work with code units (again, at least UTF-16).
     340             :  */
     341             : static void
     342           0 : _shapeToArabicDigitsWithContext(UChar *s, int32_t length,
     343             :                                 UChar digitBase,
     344             :                                 UBool isLogical, UBool lastStrongWasAL) {
     345             :     const UBiDiProps *bdp;
     346             :     int32_t i;
     347             :     UChar c;
     348             : 
     349           0 :     bdp=ubidi_getSingleton();
     350           0 :     digitBase-=0x30;
     351             : 
     352             :     /* the iteration direction depends on the type of input */
     353           0 :     if(isLogical) {
     354           0 :         for(i=0; i<length; ++i) {
     355           0 :             c=s[i];
     356           0 :             switch(ubidi_getClass(bdp, c)) {
     357             :             case U_LEFT_TO_RIGHT: /* L */
     358             :             case U_RIGHT_TO_LEFT: /* R */
     359           0 :                 lastStrongWasAL=FALSE;
     360           0 :                 break;
     361             :             case U_RIGHT_TO_LEFT_ARABIC: /* AL */
     362           0 :                 lastStrongWasAL=TRUE;
     363           0 :                 break;
     364             :             case U_EUROPEAN_NUMBER: /* EN */
     365           0 :                 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
     366           0 :                     s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
     367             :                 }
     368           0 :                 break;
     369             :             default :
     370           0 :                 break;
     371             :             }
     372             :         }
     373             :     } else {
     374           0 :         for(i=length; i>0; /* pre-decrement in the body */) {
     375           0 :             c=s[--i];
     376           0 :             switch(ubidi_getClass(bdp, c)) {
     377             :             case U_LEFT_TO_RIGHT: /* L */
     378             :             case U_RIGHT_TO_LEFT: /* R */
     379           0 :                 lastStrongWasAL=FALSE;
     380           0 :                 break;
     381             :             case U_RIGHT_TO_LEFT_ARABIC: /* AL */
     382           0 :                 lastStrongWasAL=TRUE;
     383           0 :                 break;
     384             :             case U_EUROPEAN_NUMBER: /* EN */
     385           0 :                 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
     386           0 :                     s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
     387             :                 }
     388           0 :                 break;
     389             :             default :
     390           0 :                 break;
     391             :             }
     392             :         }
     393             :     }
     394           0 : }
     395             : 
     396             : /*
     397             :  *Name     : invertBuffer
     398             :  *Function : This function inverts the buffer, it's used
     399             :  *           in case the user specifies the buffer to be
     400             :  *           U_SHAPE_TEXT_DIRECTION_LOGICAL
     401             :  */
     402             : static void
     403           0 : invertBuffer(UChar *buffer, int32_t size, uint32_t /*options*/, int32_t lowlimit, int32_t highlimit) {
     404             :     UChar temp;
     405           0 :     int32_t i=0,j=0;
     406           0 :     for(i=lowlimit,j=size-highlimit-1;i<j;i++,j--) {
     407           0 :         temp = buffer[i];
     408           0 :         buffer[i] = buffer[j];
     409           0 :         buffer[j] = temp;
     410             :     }
     411           0 : }
     412             : 
     413             : /*
     414             :  *Name     : changeLamAlef
     415             :  *Function : Converts the Alef characters into an equivalent
     416             :  *           LamAlef location in the 0x06xx Range, this is an
     417             :  *           intermediate stage in the operation of the program
     418             :  *           later it'll be converted into the 0xFExx LamAlefs
     419             :  *           in the shaping function.
     420             :  */
     421             : static inline UChar
     422           0 : changeLamAlef(UChar ch) {
     423           0 :     switch(ch) {
     424             :     case 0x0622 :
     425           0 :         return 0x065C;
     426             :     case 0x0623 :
     427           0 :         return 0x065D;
     428             :     case 0x0625 :
     429           0 :         return 0x065E;
     430             :     case 0x0627 :
     431           0 :         return 0x065F;
     432             :     }
     433           0 :     return 0;
     434             : }
     435             : 
     436             : /*
     437             :  *Name     : getLink
     438             :  *Function : Resolves the link between the characters as
     439             :  *           Arabic characters have four forms :
     440             :  *           Isolated, Initial, Middle and Final Form
     441             :  */
     442             : static UChar
     443           0 : getLink(UChar ch) {
     444           0 :     if(ch >= 0x0622 && ch <= 0x06D3) {
     445           0 :         return(araLink[ch-0x0622]);
     446           0 :     } else if(ch == 0x200D) {
     447           0 :         return(3);
     448           0 :     } else if(ch >= 0x206D && ch <= 0x206F) {
     449           0 :         return(4);
     450           0 :     }else if(ch >= 0xFB50 && ch <= 0xFC62) {
     451           0 :         return(presALink[ch-0xFB50]);
     452           0 :     } else if(ch >= 0xFE70 && ch <= 0xFEFC) {
     453           0 :         return(presBLink[ch-0xFE70]);
     454             :     }else {
     455           0 :         return(0);
     456             :     }
     457             : }
     458             : 
     459             : /*
     460             :  *Name     : countSpaces
     461             :  *Function : Counts the number of spaces
     462             :  *           at each end of the logical buffer
     463             :  */
     464             : static void
     465           0 : countSpaces(UChar *dest, int32_t size, uint32_t /*options*/, int32_t *spacesCountl, int32_t *spacesCountr) {
     466           0 :     int32_t i = 0;
     467           0 :     int32_t countl = 0,countr = 0;
     468           0 :     while((dest[i] == SPACE_CHAR) && (countl < size)) {
     469           0 :        countl++;
     470           0 :        i++;
     471             :     }
     472           0 :     if (countl < size) {  /* the entire buffer is not all space */
     473           0 :         while(dest[size-1] == SPACE_CHAR) {
     474           0 :             countr++;
     475           0 :             size--;
     476             :         }
     477             :     }
     478           0 :     *spacesCountl = countl;
     479           0 :     *spacesCountr = countr;
     480           0 : }
     481             : 
     482             : /*
     483             :  *Name     : isTashkeelChar
     484             :  *Function : Returns 1 for Tashkeel characters in 06 range else return 0
     485             :  */
     486             : static inline int32_t
     487           0 : isTashkeelChar(UChar ch) {
     488           0 :     return (int32_t)( ch>=0x064B && ch<= 0x0652 );
     489             : }
     490             : 
     491             : /*
     492             :  *Name     : isTashkeelCharFE
     493             :  *Function : Returns 1 for Tashkeel characters in FE range else return 0
     494             :  */
     495             : static inline int32_t
     496           0 : isTashkeelCharFE(UChar ch) {
     497           0 :     return (int32_t)( ch>=0xFE70 && ch<= 0xFE7F );
     498             : }
     499             : 
     500             : /*
     501             :  *Name     : isAlefChar
     502             :  *Function : Returns 1 for Alef characters else return 0
     503             :  */
     504             : static inline int32_t
     505           0 : isAlefChar(UChar ch) {
     506           0 :     return (int32_t)( (ch==0x0622)||(ch==0x0623)||(ch==0x0625)||(ch==0x0627) );
     507             : }
     508             : 
     509             : /*
     510             :  *Name     : isLamAlefChar
     511             :  *Function : Returns 1 for LamAlef characters else return 0
     512             :  */
     513             : static inline int32_t
     514           0 : isLamAlefChar(UChar ch) {
     515           0 :     return (int32_t)((ch>=0xFEF5)&&(ch<=0xFEFC) );
     516             : }
     517             : 
     518             : /*BIDI
     519             :  *Name     : isTailChar
     520             :  *Function : returns 1 if the character matches one of the tail characters (0xfe73 or 0x200b) otherwise returns 0
     521             :  */
     522             : 
     523             : static inline int32_t
     524           0 : isTailChar(UChar ch) {
     525           0 :     if(ch == OLD_TAIL_CHAR || ch == NEW_TAIL_CHAR){
     526           0 :             return 1;
     527             :     }else{
     528           0 :             return 0;
     529             :     }
     530             : }
     531             : 
     532             : /*BIDI
     533             :  *Name     : isSeenTailFamilyChar
     534             :  *Function : returns 1 if the character is a seen family isolated character
     535             :  *           in the FE range otherwise returns 0
     536             :  */
     537             : 
     538             : static inline int32_t
     539           0 : isSeenTailFamilyChar(UChar ch) {
     540           0 :     if(ch >= 0xfeb1 && ch < 0xfebf){
     541           0 :             return tailFamilyIsolatedFinal [ch - 0xFEB1];
     542             :     }else{
     543           0 :             return 0;
     544             :     }
     545             : }
     546             : 
     547             :  /* Name     : isSeenFamilyChar
     548             :   * Function : returns 1 if the character is a seen family character in the Unicode
     549             :   *            06 range otherwise returns 0
     550             :  */
     551             : 
     552             : static inline int32_t
     553           0 : isSeenFamilyChar(UChar  ch){
     554           0 :     if(ch >= 0x633 && ch <= 0x636){
     555           0 :         return 1;
     556             :     }else {
     557           0 :         return 0;
     558             :     }
     559             : }
     560             : 
     561             : /*Start of BIDI*/
     562             : /*
     563             :  *Name     : isAlefMaksouraChar
     564             :  *Function : returns 1 if the character is a Alef Maksoura Final or isolated
     565             :  *           otherwise returns 0
     566             :  */
     567             : static inline int32_t
     568           0 : isAlefMaksouraChar(UChar ch) {
     569           0 :     return (int32_t)( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649));
     570             : }
     571             : 
     572             : /*
     573             :  * Name     : isYehHamzaChar
     574             :  * Function : returns 1 if the character is a yehHamza isolated or yehhamza
     575             :  *            final is found otherwise returns 0
     576             :  */
     577             : static inline int32_t
     578           0 : isYehHamzaChar(UChar ch) {
     579           0 :     if((ch==0xFE89)||(ch==0xFE8A)){
     580           0 :         return 1;
     581             :     }else{
     582           0 :         return 0;
     583             :     }
     584             : }
     585             : 
     586             :  /*
     587             :  * Name: isTashkeelOnTatweelChar
     588             :  * Function: Checks if the Tashkeel Character is on Tatweel or not,if the
     589             :  *           Tashkeel on tatweel (FE range), it returns 1 else if the
     590             :  *           Tashkeel with shadda on tatweel (FC range)return 2 otherwise
     591             :  *           returns 0
     592             :  */
     593             : static inline int32_t
     594           0 : isTashkeelOnTatweelChar(UChar ch){
     595           0 :     if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR)
     596             :     {
     597           0 :         return tashkeelMedial [ch - 0xFE70];
     598           0 :     }else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR)) {
     599           0 :         return 2;
     600             :     }else{
     601           0 :         return 0;
     602             :     }
     603             : }
     604             : 
     605             : /*
     606             :  * Name: isIsolatedTashkeelChar
     607             :  * Function: Checks if the Tashkeel Character is in the isolated form
     608             :  *           (i.e. Unicode FE range) returns 1 else if the Tashkeel
     609             :  *           with shadda is in the isolated form (i.e. Unicode FC range)
     610             :  *           returns 2 otherwise returns 0
     611             :  */
     612             : static inline int32_t
     613           0 : isIsolatedTashkeelChar(UChar ch){
     614           0 :     if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75){
     615           0 :         return (1 - tashkeelMedial [ch - 0xFE70]);
     616           0 :     }else if(ch >= 0xfc5e && ch <= 0xfc63){
     617           0 :         return 1;
     618             :     }else{
     619           0 :         return 0;
     620             :     }
     621             : }
     622             : 
     623             : 
     624             : 
     625             : 
     626             : /*
     627             :  *Name     : calculateSize
     628             :  *Function : This function calculates the destSize to be used in preflighting
     629             :  *           when the destSize is equal to 0
     630             :  *           It is used also to calculate the new destsize in case the
     631             :  *           destination buffer will be resized.
     632             :  */
     633             : 
     634             : static int32_t
     635           0 : calculateSize(const UChar *source, int32_t sourceLength,
     636             : int32_t destSize,uint32_t options) {
     637           0 :     int32_t i = 0;
     638             : 
     639           0 :     int lamAlefOption = 0;
     640           0 :     int tashkeelOption = 0;
     641             : 
     642           0 :     destSize = sourceLength;
     643             : 
     644           0 :     if (((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE ||
     645           0 :         ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED )) &&
     646           0 :         ((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE )){
     647           0 :             lamAlefOption = 1;
     648             :     }
     649           0 :     if((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE &&
     650           0 :        ((options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ) ){
     651           0 :             tashkeelOption = 1;
     652             :         }
     653             : 
     654           0 :     if(lamAlefOption || tashkeelOption){
     655           0 :         if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) {
     656           0 :             for(i=0;i<sourceLength;i++) {
     657           0 :                 if( ((isAlefChar(source[i]))&& (i<(sourceLength-1)) &&(source[i+1] == LAM_CHAR)) || (isTashkeelCharFE(source[i])) ) {
     658           0 :                         destSize--;
     659             :                     }
     660             :                 }
     661           0 :             }else if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL) {
     662           0 :                 for(i=0;i<sourceLength;i++) {
     663           0 :                     if( ( (source[i] == LAM_CHAR) && (i<(sourceLength-1)) && (isAlefChar(source[i+1]))) || (isTashkeelCharFE(source[i])) ) {
     664           0 :                         destSize--;
     665             :                     }
     666             :                 }
     667             :             }
     668             :         }
     669             : 
     670           0 :     if ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE){
     671           0 :         if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){
     672           0 :             for(i=0;i<sourceLength;i++) {
     673           0 :                 if(isLamAlefChar(source[i]))
     674           0 :                 destSize++;
     675             :             }
     676             :         }
     677             :     }
     678             : 
     679           0 :     return destSize;
     680             : }
     681             : 
     682             : /*
     683             :  *Name     : handleTashkeelWithTatweel
     684             :  *Function : Replaces Tashkeel as following:
     685             :  *            Case 1 :if the Tashkeel on tatweel, replace it with Tatweel.
     686             :  *            Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace
     687             :  *                   it with Shadda on Tatweel.
     688             :  *            Case 3: if the Tashkeel is isolated replace it with Space.
     689             :  *
     690             :  */
     691             : static int32_t
     692           0 : handleTashkeelWithTatweel(UChar *dest, int32_t sourceLength,
     693             :              int32_t /*destSize*/, uint32_t /*options*/,
     694             :              UErrorCode * /*pErrorCode*/) {
     695             :                  int i;
     696           0 :                  for(i = 0; i < sourceLength; i++){
     697           0 :                      if((isTashkeelOnTatweelChar(dest[i]) == 1)){
     698           0 :                          dest[i] = TATWEEL_CHAR;
     699           0 :                     }else if((isTashkeelOnTatweelChar(dest[i]) == 2)){
     700           0 :                          dest[i] = SHADDA_TATWEEL_CHAR;
     701           0 :                     }else if(isIsolatedTashkeelChar(dest[i]) && dest[i] != SHADDA_CHAR){
     702           0 :                          dest[i] = SPACE_CHAR;
     703             :                     }
     704             :                  }
     705           0 :                  return sourceLength;
     706             : }
     707             : 
     708             : 
     709             : 
     710             : /*
     711             :  *Name     : handleGeneratedSpaces
     712             :  *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space,
     713             :  *           and Tashkeel to space.
     714             :  *           handleGeneratedSpaces function puts these generated spaces
     715             :  *           according to the options the user specifies. LamAlef and Tashkeel
     716             :  *           spaces can be replaced at begin, at end, at near or decrease the
     717             :  *           buffer size.
     718             :  *
     719             :  *           There is also Auto option for LamAlef and tashkeel, which will put
     720             :  *           the spaces at end of the buffer (or end of text if the user used
     721             :  *           the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END).
     722             :  *
     723             :  *           If the text type was visual_LTR and the option
     724             :  *           U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END
     725             :  *           option will place the space at the beginning of the buffer and
     726             :  *           BEGIN will place the space at the end of the buffer.
     727             :  */
     728             : 
     729             : static int32_t
     730           0 : handleGeneratedSpaces(UChar *dest, int32_t sourceLength,
     731             :                     int32_t destSize,
     732             :                     uint32_t options,
     733             :                     UErrorCode *pErrorCode,struct uShapeVariables shapeVars ) {
     734             : 
     735           0 :     int32_t i = 0, j = 0;
     736           0 :     int32_t count = 0;
     737           0 :     UChar *tempbuffer=NULL;
     738             : 
     739           0 :     int lamAlefOption = 0;
     740           0 :     int tashkeelOption = 0;
     741           0 :     int shapingMode = SHAPE_MODE;
     742             : 
     743           0 :     if (shapingMode == 0){
     744           0 :         if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE ){
     745           0 :             lamAlefOption = 1;
     746             :         }
     747           0 :         if ( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ){
     748           0 :             tashkeelOption = 1;
     749             :         }
     750             :     }
     751             : 
     752           0 :     tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
     753             :     /* Test for NULL */
     754           0 :     if(tempbuffer == NULL) {
     755           0 :         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
     756           0 :         return 0;
     757             :     }
     758             : 
     759             : 
     760           0 :     if (lamAlefOption || tashkeelOption){
     761           0 :         uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
     762             : 
     763           0 :         i = j = 0; count = 0;
     764           0 :         while(i < sourceLength) {
     765           0 :             if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) ||
     766           0 :                (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){
     767           0 :                 j--;
     768           0 :                 count++;
     769             :             } else {
     770           0 :                 tempbuffer[j] = dest[i];
     771             :             }
     772           0 :             i++;
     773           0 :             j++;
     774             :         }
     775             : 
     776           0 :         while(count >= 0) {
     777           0 :             tempbuffer[i] = 0x0000;
     778           0 :             i--;
     779           0 :             count--;
     780             :         }
     781             : 
     782           0 :         u_memcpy(dest, tempbuffer, sourceLength);
     783           0 :         destSize = u_strlen(dest);
     784             :     }
     785             : 
     786           0 :       lamAlefOption = 0;
     787             : 
     788           0 :     if (shapingMode == 0){
     789           0 :         if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR ){
     790           0 :             lamAlefOption = 1;
     791             :         }
     792             :     }
     793             : 
     794           0 :     if (lamAlefOption){
     795             :         /* Lam+Alef is already shaped into LamAlef + FFFF */
     796           0 :         i = 0;
     797           0 :         while(i < sourceLength) {
     798           0 :             if(lamAlefOption&&dest[i] == LAMALEF_SPACE_SUB){
     799           0 :                 dest[i] = SPACE_CHAR;
     800             :             }
     801           0 :             i++;
     802             :         }
     803           0 :         destSize = sourceLength;
     804             :     }
     805           0 :     lamAlefOption = 0;
     806           0 :     tashkeelOption = 0;
     807             : 
     808           0 :     if (shapingMode == 0) {
     809           0 :         if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin) ||
     810           0 :               (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO )
     811           0 :               && (shapeVars.spacesRelativeToTextBeginEnd==1)) ) {
     812           0 :             lamAlefOption = 1;
     813             :         }
     814           0 :         if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelBegin ) {
     815           0 :             tashkeelOption = 1;
     816             :         }
     817             :     }
     818             : 
     819           0 :     if(lamAlefOption || tashkeelOption){
     820           0 :         uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
     821             :         
     822           0 :         i = j = sourceLength; count = 0;
     823             :         
     824           0 :         while(i >= 0) {
     825           0 :             if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) ||
     826           0 :                  (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){
     827           0 :                 j++;
     828           0 :                 count++;
     829             :             }else {
     830           0 :                 tempbuffer[j] = dest[i];
     831             :             }
     832           0 :             i--;
     833           0 :             j--;
     834             :         }
     835             : 
     836           0 :         for(i=0 ;i < count; i++){
     837           0 :                 tempbuffer[i] = SPACE_CHAR;
     838             :         }
     839             : 
     840           0 :         u_memcpy(dest, tempbuffer, sourceLength);
     841           0 :         destSize = sourceLength;
     842             :     }
     843             : 
     844             : 
     845             : 
     846           0 :     lamAlefOption = 0;
     847           0 :     tashkeelOption = 0;
     848             : 
     849           0 :     if (shapingMode == 0) {
     850           0 :         if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd) ||
     851           0 :               (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO )
     852           0 :               && (shapeVars.spacesRelativeToTextBeginEnd==0)) ) {
     853           0 :             lamAlefOption = 1;
     854             :         }
     855           0 :         if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelEnd ){
     856           0 :             tashkeelOption = 1;
     857             :         }
     858             :     }
     859             : 
     860           0 :     if(lamAlefOption || tashkeelOption){
     861           0 :         uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
     862             : 
     863           0 :         i = j = 0; count = 0;
     864           0 :         while(i < sourceLength) {
     865           0 :             if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) ||
     866           0 :                  (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){
     867           0 :                 j--;
     868           0 :                 count++;
     869             :             }else {
     870           0 :                 tempbuffer[j] = dest[i];
     871             :             }
     872           0 :             i++;
     873           0 :             j++;
     874             :         }
     875             : 
     876           0 :         while(count >= 0) {
     877           0 :             tempbuffer[i] = SPACE_CHAR;
     878           0 :             i--;
     879           0 :             count--;
     880             :         }
     881             : 
     882           0 :         u_memcpy(dest, tempbuffer, sourceLength);
     883           0 :         destSize = sourceLength;
     884             :     }
     885             : 
     886             : 
     887           0 :     if(tempbuffer){
     888           0 :         uprv_free(tempbuffer);
     889             :     }
     890             : 
     891           0 :     return destSize;
     892             : }
     893             : 
     894             : /*
     895             :  *Name     :expandCompositCharAtBegin
     896             :  *Function :Expands the LamAlef character to Lam and Alef consuming the required
     897             :  *         space from beginning of the buffer. If the text type was visual_LTR
     898             :  *         and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected
     899             :  *         the spaces will be located at end of buffer.
     900             :  *         If there are no spaces to expand the LamAlef, an error
     901             :  *         will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
     902             :  */
     903             : 
     904             : static int32_t
     905           0 : expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) {
     906           0 :     int32_t      i = 0,j = 0;
     907           0 :     int32_t      countl = 0;
     908           0 :     UChar    *tempbuffer=NULL;
     909             : 
     910           0 :     tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
     911             : 
     912             :     /* Test for NULL */
     913           0 :     if(tempbuffer == NULL) {
     914           0 :         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
     915           0 :         return 0;
     916             :     }
     917             : 
     918           0 :         uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
     919             : 
     920           0 :         i = 0;
     921           0 :         while(dest[i] == SPACE_CHAR) {
     922           0 :             countl++;
     923           0 :             i++;
     924             :         }
     925             : 
     926           0 :         i = j = sourceLength-1;
     927             : 
     928           0 :         while(i >= 0 && j >= 0) {
     929           0 :             if( countl>0 && isLamAlefChar(dest[i])) {
     930           0 :                 tempbuffer[j] = LAM_CHAR;
     931             :                 /* to ensure the array index is within the range */
     932           0 :                 U_ASSERT(dest[i] >= 0xFEF5u
     933             :                     && dest[i]-0xFEF5u < UPRV_LENGTHOF(convertLamAlef));
     934           0 :                 tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ];
     935           0 :                 j--;
     936           0 :                 countl--;
     937             :             }else {
     938           0 :                  if( countl == 0 && isLamAlefChar(dest[i]) ) {
     939           0 :                      *pErrorCode=U_NO_SPACE_AVAILABLE;
     940             :                      }
     941           0 :                  tempbuffer[j] = dest[i];
     942             :             }
     943           0 :             i--;
     944           0 :             j--;
     945             :         }
     946           0 :         u_memcpy(dest, tempbuffer, sourceLength);
     947             : 
     948           0 :         uprv_free(tempbuffer);
     949             : 
     950           0 :         destSize = sourceLength;
     951           0 :         return destSize;
     952             : }
     953             : 
     954             : /*
     955             :  *Name     : expandCompositCharAtEnd
     956             :  *Function : Expands the LamAlef character to Lam and Alef consuming the
     957             :  *           required space from end of the buffer. If the text type was
     958             :  *           Visual LTR and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END
     959             :  *           was used, the spaces will be consumed from begin of buffer. If
     960             :  *           there are no spaces to expand the LamAlef, an error
     961             :  *           will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
     962             :  */
     963             : 
     964             : static int32_t
     965           0 : expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) {
     966           0 :     int32_t      i = 0,j = 0;
     967             : 
     968           0 :     int32_t      countr = 0;
     969           0 :     int32_t  inpsize = sourceLength;
     970             : 
     971           0 :     UChar    *tempbuffer=NULL;
     972           0 :     tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
     973             : 
     974             :     /* Test for NULL */
     975           0 :     if(tempbuffer == NULL) {
     976           0 :         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
     977           0 :          return 0;
     978             :     }
     979             : 
     980           0 :     uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
     981             : 
     982           0 :     while(dest[inpsize-1] == SPACE_CHAR) {
     983           0 :         countr++;
     984           0 :         inpsize--;
     985             :     }
     986             : 
     987           0 :     i = sourceLength - countr - 1;
     988           0 :     j = sourceLength - 1;
     989             : 
     990           0 :     while(i >= 0 && j >= 0) {
     991           0 :         if( countr>0 && isLamAlefChar(dest[i]) ) {
     992           0 :             tempbuffer[j] = LAM_CHAR;
     993           0 :             tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ];
     994           0 :             j--;
     995           0 :             countr--;
     996             :         }else {
     997           0 :             if ((countr == 0) && isLamAlefChar(dest[i]) ) {
     998           0 :                 *pErrorCode=U_NO_SPACE_AVAILABLE;
     999             :             }
    1000           0 :             tempbuffer[j] = dest[i];
    1001             :         }
    1002           0 :         i--;
    1003           0 :         j--;
    1004             :     }
    1005             : 
    1006           0 :     if(countr > 0) {
    1007           0 :         u_memmove(tempbuffer, tempbuffer+countr, sourceLength);
    1008           0 :         if(u_strlen(tempbuffer) < sourceLength) {
    1009           0 :             for(i=sourceLength-1;i>=sourceLength-countr;i--) {
    1010           0 :                 tempbuffer[i] = SPACE_CHAR;
    1011             :             }
    1012             :         }
    1013             :     }
    1014           0 :     u_memcpy(dest, tempbuffer, sourceLength);
    1015             : 
    1016           0 :     uprv_free(tempbuffer);
    1017             : 
    1018           0 :     destSize = sourceLength;
    1019           0 :     return destSize;
    1020             : }
    1021             : 
    1022             : /*
    1023             :  *Name     : expandCompositCharAtNear
    1024             :  *Function : Expands the LamAlef character into Lam + Alef, YehHamza character
    1025             :  *           into Yeh + Hamza, SeenFamily character into SeenFamily character
    1026             :  *           + Tail, while consuming the space next to the character.
    1027             :  *           If there are no spaces next to the character, an error
    1028             :  *           will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
    1029             :  */
    1030             : 
    1031             : static int32_t
    1032           0 : expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode,
    1033             :                          int yehHamzaOption, int seenTailOption, int lamAlefOption, struct uShapeVariables shapeVars) {
    1034           0 :     int32_t      i = 0;
    1035             : 
    1036             : 
    1037             :     UChar    lamalefChar, yehhamzaChar;
    1038             : 
    1039           0 :     for(i = 0 ;i<=sourceLength-1;i++) {
    1040           0 :             if (seenTailOption && isSeenTailFamilyChar(dest[i])) {
    1041           0 :                 if ((i>0) && (dest[i-1] == SPACE_CHAR) ) {
    1042           0 :                     dest[i-1] = shapeVars.tailChar;
    1043             :                 }else {
    1044           0 :                     *pErrorCode=U_NO_SPACE_AVAILABLE;
    1045             :                 }
    1046           0 :             }else if(yehHamzaOption && (isYehHamzaChar(dest[i])) ) {
    1047           0 :                 if ((i>0) && (dest[i-1] == SPACE_CHAR) ) {
    1048           0 :                     yehhamzaChar = dest[i];
    1049           0 :                     dest[i] = yehHamzaToYeh[yehhamzaChar - YEH_HAMZAFE_CHAR];
    1050           0 :                     dest[i-1] = HAMZAFE_CHAR;
    1051             :                 }else {
    1052             : 
    1053           0 :                     *pErrorCode=U_NO_SPACE_AVAILABLE;
    1054             :                 }
    1055           0 :             }else if(lamAlefOption && isLamAlefChar(dest[i+1])) {
    1056           0 :                 if(dest[i] == SPACE_CHAR){
    1057           0 :                     lamalefChar = dest[i+1];
    1058           0 :                     dest[i+1] = LAM_CHAR;
    1059           0 :                     dest[i] = convertLamAlef[ lamalefChar - 0xFEF5 ];
    1060             :                 }else {
    1061           0 :                     *pErrorCode=U_NO_SPACE_AVAILABLE;
    1062             :                 }
    1063             :             }
    1064             :        }
    1065           0 :        destSize = sourceLength;
    1066           0 :        return destSize;
    1067             : }
    1068             :  /*
    1069             :  * Name     : expandCompositChar
    1070             :  * Function : LamAlef, need special handling, since it expands from one
    1071             :  *            character into two characters while shaping or deshaping.
    1072             :  *            In order to expand it, near or far spaces according to the
    1073             :  *            options user specifies. Also buffer size can be increased.
    1074             :  *
    1075             :  *            For SeenFamily characters and YehHamza only the near option is
    1076             :  *            supported, while for LamAlef we can take spaces from begin, end,
    1077             :  *            near or even increase the buffer size.
    1078             :  *            There is also the Auto option for LamAlef only, which will first
    1079             :  *            search for a space at end, begin then near, respectively.
    1080             :  *            If there are no spaces to expand these characters, an error will be set to
    1081             :  *            U_NO_SPACE_AVAILABLE as defined in utypes.h
    1082             :  */
    1083             : 
    1084             : static int32_t
    1085           0 : expandCompositChar(UChar *dest, int32_t sourceLength,
    1086             :               int32_t destSize,uint32_t options,
    1087             :               UErrorCode *pErrorCode, int shapingMode,struct uShapeVariables shapeVars) {
    1088             : 
    1089           0 :     int32_t      i = 0,j = 0;
    1090             : 
    1091           0 :     UChar    *tempbuffer=NULL;
    1092           0 :     int yehHamzaOption = 0;
    1093           0 :     int seenTailOption = 0;
    1094           0 :     int lamAlefOption = 0;
    1095             : 
    1096           0 :     if (shapingMode == 1){
    1097           0 :         if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO){
    1098             : 
    1099           0 :             if(shapeVars.spacesRelativeToTextBeginEnd == 0) {
    1100           0 :                 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
    1101             : 
    1102           0 :                 if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
    1103           0 :                     *pErrorCode = U_ZERO_ERROR;
    1104           0 :                     destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
    1105             :                 }
    1106             :             }else {
    1107           0 :                 destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
    1108             : 
    1109           0 :                 if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
    1110           0 :                     *pErrorCode = U_ZERO_ERROR;
    1111           0 :                     destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
    1112             :                 }
    1113             :             }
    1114             : 
    1115           0 :             if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
    1116           0 :                 *pErrorCode = U_ZERO_ERROR;
    1117             :                 destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption,
    1118           0 :                                                 seenTailOption, 1,shapeVars);
    1119             :             }
    1120             :         }
    1121             :     }
    1122             : 
    1123           0 :     if (shapingMode == 1){
    1124           0 :         if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd){
    1125           0 :             destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
    1126             :         }
    1127             :     }
    1128             : 
    1129           0 :     if (shapingMode == 1){
    1130           0 :         if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin){
    1131           0 :             destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
    1132             :         }
    1133             :     }
    1134             : 
    1135           0 :     if (shapingMode == 0){
    1136           0 :          if ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR){
    1137           0 :              yehHamzaOption = 1;
    1138             :          }
    1139           0 :          if ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR){
    1140           0 :             seenTailOption = 1;
    1141             :          }
    1142             :     }
    1143           0 :     if (shapingMode == 1) {
    1144           0 :         if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR) {
    1145           0 :             lamAlefOption = 1;
    1146             :         }
    1147             :     }
    1148             : 
    1149             : 
    1150           0 :     if (yehHamzaOption || seenTailOption || lamAlefOption){
    1151             :         destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption,
    1152           0 :                                             seenTailOption,lamAlefOption,shapeVars);
    1153             :     }
    1154             : 
    1155             : 
    1156           0 :     if (shapingMode == 1){
    1157           0 :         if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){
    1158           0 :             destSize = calculateSize(dest,sourceLength,destSize,options);
    1159           0 :             tempbuffer = (UChar *)uprv_malloc((destSize+1)*U_SIZEOF_UCHAR);
    1160             : 
    1161             :             /* Test for NULL */
    1162           0 :             if(tempbuffer == NULL) {
    1163           0 :                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    1164           0 :                 return 0;
    1165             :             }
    1166             : 
    1167           0 :             uprv_memset(tempbuffer, 0, (destSize+1)*U_SIZEOF_UCHAR);
    1168             : 
    1169           0 :             i = j = 0;
    1170           0 :             while(i < destSize && j < destSize) {
    1171           0 :                 if(isLamAlefChar(dest[i]) ) {
    1172           0 :                     tempbuffer[j] = convertLamAlef[ dest[i] - 0xFEF5 ];
    1173           0 :                     tempbuffer[j+1] = LAM_CHAR;
    1174           0 :                     j++;
    1175             :                 }else {
    1176           0 :                     tempbuffer[j] = dest[i];
    1177             :                 }
    1178           0 :                 i++;
    1179           0 :                 j++;
    1180             :             }
    1181             : 
    1182           0 :             u_memcpy(dest, tempbuffer, destSize);
    1183             :         }
    1184             :     }
    1185             : 
    1186           0 :     if(tempbuffer) {
    1187           0 :         uprv_free(tempbuffer);
    1188             :     }
    1189           0 :     return destSize;
    1190             : }
    1191             : 
    1192             : /*
    1193             :  *Name     : shapeUnicode
    1194             :  *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped
    1195             :  *           arabic Unicode buffer in FExx Range
    1196             :  */
    1197             : static int32_t
    1198           0 : shapeUnicode(UChar *dest, int32_t sourceLength,
    1199             :              int32_t destSize,uint32_t options,
    1200             :              UErrorCode *pErrorCode,
    1201             :              int tashkeelFlag, struct uShapeVariables shapeVars) {
    1202             : 
    1203             :     int32_t          i, iend;
    1204             :     int32_t          step;
    1205             :     int32_t          lastPos,Nx, Nw;
    1206             :     unsigned int     Shape;
    1207           0 :     int32_t          lamalef_found = 0;
    1208           0 :     int32_t seenfamFound = 0, yehhamzaFound =0, tashkeelFound  = 0;
    1209           0 :     UChar            prevLink = 0, lastLink = 0, currLink, nextLink = 0;
    1210             :     UChar            wLamalef;
    1211             : 
    1212             :     /*
    1213             :      * Converts the input buffer from FExx Range into 06xx Range
    1214             :      * to make sure that all characters are in the 06xx range
    1215             :      * even the lamalef is converted to the special region in
    1216             :      * the 06xx range
    1217             :      */
    1218           0 :     if ((options & U_SHAPE_PRESERVE_PRESENTATION_MASK)  == U_SHAPE_PRESERVE_PRESENTATION_NOOP) {
    1219           0 :         for (i = 0; i < sourceLength; i++) {
    1220           0 :             UChar inputChar  = dest[i];
    1221           0 :             if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) {
    1222           0 :                 UChar c = convertFBto06 [ (inputChar - 0xFB50) ];
    1223           0 :                 if (c != 0)
    1224           0 :                     dest[i] = c;
    1225           0 :             } else if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) {
    1226           0 :                 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ;
    1227             :             } else {
    1228           0 :                 dest[i] = inputChar ;
    1229             :             }
    1230             :         }
    1231             :     }
    1232             : 
    1233             : 
    1234             :     /* sets the index to the end of the buffer, together with the step point to -1 */
    1235           0 :     i = sourceLength - 1;
    1236           0 :     iend = -1;
    1237           0 :     step = -1;
    1238             : 
    1239             :     /*
    1240             :      * This function resolves the link between the characters .
    1241             :      * Arabic characters have four forms :
    1242             :      * Isolated Form, Initial Form, Middle Form and Final Form
    1243             :      */
    1244           0 :     currLink = getLink(dest[i]);
    1245             : 
    1246           0 :     lastPos = i;
    1247           0 :     Nx = -2, Nw = 0;
    1248             : 
    1249           0 :     while (i != iend) {
    1250             :         /* If high byte of currLink > 0 then more than one shape */
    1251           0 :         if ((currLink & 0xFF00) > 0 || (getLink(dest[i]) & IRRELEVANT) != 0) {
    1252           0 :             Nw = i + step;
    1253           0 :             while (Nx < 0) {         /* we need to know about next char */
    1254           0 :                 if(Nw == iend) {
    1255           0 :                     nextLink = 0;
    1256           0 :                     Nx = 3000;
    1257             :                 } else {
    1258           0 :                     nextLink = getLink(dest[Nw]);
    1259           0 :                     if((nextLink & IRRELEVANT) == 0) {
    1260           0 :                         Nx = Nw;
    1261             :                     } else {
    1262           0 :                         Nw = Nw + step;
    1263             :                     }
    1264             :                 }
    1265             :             }
    1266             : 
    1267           0 :             if ( ((currLink & ALEFTYPE) > 0)  &&  ((lastLink & LAMTYPE) > 0) ) {
    1268           0 :                 lamalef_found = 1;
    1269           0 :                 wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */
    1270           0 :                 if ( wLamalef != 0) {
    1271           0 :                     dest[i] = LAMALEF_SPACE_SUB;            /* The default case is to drop the Alef and replace */
    1272           0 :                     dest[lastPos] =wLamalef;     /* it by LAMALEF_SPACE_SUB which is the last character in the  */
    1273           0 :                     i=lastPos;                   /* unicode private use area, this is done to make   */
    1274             :                 }                                /* sure that removeLamAlefSpaces() handles only the */
    1275           0 :                 lastLink = prevLink;             /* spaces generated during lamalef generation.      */
    1276           0 :                 currLink = getLink(wLamalef);    /* LAMALEF_SPACE_SUB is added here and is replaced by spaces   */
    1277             :             }                                    /* in removeLamAlefSpaces()                         */
    1278             : 
    1279           0 :             if ((i > 0) && (dest[i-1] == SPACE_CHAR)){
    1280           0 :                 if ( isSeenFamilyChar(dest[i])) {
    1281           0 :                     seenfamFound = 1;
    1282           0 :                 } else if (dest[i] == YEH_HAMZA_CHAR) {
    1283           0 :                     yehhamzaFound = 1;
    1284             :                 }
    1285             :             }
    1286           0 :             else if(i==0){
    1287           0 :                 if ( isSeenFamilyChar(dest[i])){
    1288           0 :                     seenfamFound = 1;
    1289           0 :                 } else if (dest[i] == YEH_HAMZA_CHAR) {
    1290           0 :                     yehhamzaFound = 1;
    1291             :                 }
    1292             :             }
    1293             : 
    1294             :             /*
    1295             :              * get the proper shape according to link ability of neighbors
    1296             :              * and of character; depends on the order of the shapes
    1297             :              * (isolated, initial, middle, final) in the compatibility area
    1298             :              */
    1299           0 :             Shape = shapeTable[nextLink & (LINKR + LINKL)]
    1300           0 :                               [lastLink & (LINKR + LINKL)]
    1301           0 :                               [currLink & (LINKR + LINKL)];
    1302             : 
    1303           0 :             if ((currLink & (LINKR+LINKL)) == 1) {
    1304           0 :                 Shape &= 1;
    1305           0 :             } else if(isTashkeelChar(dest[i])) {
    1306           0 :                 if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) &&
    1307           0 :                      dest[i] != 0x064C && dest[i] != 0x064D )
    1308             :                 {
    1309           0 :                     Shape = 1;
    1310           0 :                     if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) {
    1311           0 :                         Shape = 0;
    1312             :                     }
    1313           0 :                 } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){
    1314           0 :                     Shape = 1;
    1315             :                 } else {
    1316           0 :                     Shape = 0;
    1317             :                 }
    1318             :             }
    1319           0 :             if ((dest[i] ^ 0x0600) < 0x100) {
    1320           0 :                 if ( isTashkeelChar(dest[i]) ){
    1321           0 :                     if (tashkeelFlag == 2  && dest[i] != SHADDA06_CHAR){
    1322           0 :                         dest[i] = TASHKEEL_SPACE_SUB;
    1323           0 :                         tashkeelFound  = 1;
    1324             :                     } else {
    1325             :                         /* to ensure the array index is within the range */
    1326           0 :                         U_ASSERT(dest[i] >= 0x064Bu
    1327             :                             && dest[i]-0x064Bu < UPRV_LENGTHOF(IrrelevantPos));
    1328           0 :                         dest[i] =  0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape;
    1329             :                     }
    1330           0 :                 }else if ((currLink & APRESENT) > 0) {
    1331           0 :                     dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape);
    1332           0 :                 }else if ((currLink >> 8) > 0 && (currLink & IRRELEVANT) == 0) {
    1333           0 :                     dest[i] = (UChar)(0xFE70 + (currLink >> 8) + Shape);
    1334             :                 }
    1335             :             }
    1336             :         }
    1337             : 
    1338             :         /* move one notch forward */
    1339           0 :         if ((currLink & IRRELEVANT) == 0) {
    1340           0 :             prevLink = lastLink;
    1341           0 :             lastLink = currLink;
    1342           0 :             lastPos = i;
    1343             :         }
    1344             : 
    1345           0 :         i = i + step;
    1346           0 :         if (i == Nx) {
    1347           0 :             currLink = nextLink;
    1348           0 :             Nx = -2;
    1349           0 :         } else if(i != iend) {
    1350           0 :             currLink = getLink(dest[i]);
    1351             :         }
    1352             :     }
    1353           0 :     destSize = sourceLength;
    1354           0 :     if ( (lamalef_found != 0 ) || (tashkeelFound  != 0) ){
    1355           0 :         destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode, shapeVars);
    1356             :     }
    1357             : 
    1358           0 :     if ( (seenfamFound != 0) || (yehhamzaFound != 0) ) {
    1359           0 :         destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE,shapeVars);
    1360             :     }
    1361           0 :     return destSize;
    1362             : }
    1363             : 
    1364             : /*
    1365             :  *Name     : deShapeUnicode
    1366             :  *Function : Converts an Arabic Unicode buffer in FExx Range into unshaped
    1367             :  *           arabic Unicode buffer in 06xx Range
    1368             :  */
    1369             : static int32_t
    1370           0 : deShapeUnicode(UChar *dest, int32_t sourceLength,
    1371             :                int32_t destSize,uint32_t options,
    1372             :                UErrorCode *pErrorCode, struct uShapeVariables shapeVars) {
    1373           0 :     int32_t i = 0;
    1374           0 :     int32_t lamalef_found = 0;
    1375           0 :     int32_t yehHamzaComposeEnabled = 0;
    1376           0 :     int32_t seenComposeEnabled = 0;
    1377             : 
    1378           0 :     yehHamzaComposeEnabled = ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR) ? 1 : 0;
    1379           0 :     seenComposeEnabled = ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR)? 1 : 0;
    1380             : 
    1381             :     /*
    1382             :      *This for loop changes the buffer from the Unicode FE range to
    1383             :      *the Unicode 06 range
    1384             :      */
    1385             : 
    1386           0 :     for(i = 0; i < sourceLength; i++) {
    1387           0 :         UChar  inputChar = dest[i];
    1388           0 :         if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { /* FBxx Arabic range */
    1389           0 :             UChar c = convertFBto06 [ (inputChar - 0xFB50) ];
    1390           0 :             if (c != 0)
    1391           0 :                 dest[i] = c;
    1392           0 :         } else if( (yehHamzaComposeEnabled == 1) && ((inputChar == HAMZA06_CHAR) || (inputChar == HAMZAFE_CHAR))
    1393           0 :                 && (i < (sourceLength - 1)) && isAlefMaksouraChar(dest[i+1] )) {
    1394           0 :                  dest[i] = SPACE_CHAR;
    1395           0 :                  dest[i+1] = YEH_HAMZA_CHAR;
    1396           0 :         } else if ( (seenComposeEnabled == 1) && (isTailChar(inputChar)) && (i< (sourceLength - 1))
    1397           0 :                         && (isSeenTailFamilyChar(dest[i+1])) ) {
    1398           0 :                 dest[i] = SPACE_CHAR;
    1399           0 :         } else if (( inputChar >= 0xFE70) && (inputChar <= 0xFEF4 )) { /* FExx Arabic range */
    1400           0 :                 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ];
    1401             :         } else {
    1402           0 :             dest[i] = inputChar ;
    1403             :         }
    1404             : 
    1405           0 :         if( isLamAlefChar(dest[i]) )
    1406           0 :             lamalef_found = 1;
    1407             :     }
    1408             : 
    1409           0 :    destSize = sourceLength;
    1410           0 :    if (lamalef_found != 0){
    1411           0 :           destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE, shapeVars);
    1412             :    }
    1413           0 :    return destSize;
    1414             : }
    1415             : 
    1416             : /*
    1417             :  ****************************************
    1418             :  * u_shapeArabic
    1419             :  ****************************************
    1420             :  */
    1421             : 
    1422             : U_CAPI int32_t U_EXPORT2
    1423           0 : u_shapeArabic(const UChar *source, int32_t sourceLength,
    1424             :               UChar *dest, int32_t destCapacity,
    1425             :               uint32_t options,
    1426             :               UErrorCode *pErrorCode) {
    1427             : 
    1428             :     int32_t destLength;
    1429           0 :     struct  uShapeVariables shapeVars = { OLD_TAIL_CHAR,U_SHAPE_LAMALEF_BEGIN,U_SHAPE_LAMALEF_END,U_SHAPE_TASHKEEL_BEGIN,U_SHAPE_TASHKEEL_END,0};
    1430             : 
    1431             :     /* usual error checking */
    1432           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    1433           0 :         return 0;
    1434             :     }
    1435             : 
    1436             :     /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */
    1437           0 :     if( source==NULL || sourceLength<-1 || (dest==NULL && destCapacity!=0) || destCapacity<0 ||
    1438           0 :                 (((options&U_SHAPE_TASHKEEL_MASK) > 0) &&
    1439           0 :                  ((options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) ) ||
    1440           0 :                 (((options&U_SHAPE_TASHKEEL_MASK) > 0) &&
    1441           0 :                  ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE)) ||
    1442           0 :                 (options&U_SHAPE_DIGIT_TYPE_RESERVED)==U_SHAPE_DIGIT_TYPE_RESERVED ||
    1443           0 :                 (options&U_SHAPE_DIGITS_MASK)==U_SHAPE_DIGITS_RESERVED ||
    1444           0 :                 ((options&U_SHAPE_LAMALEF_MASK) != U_SHAPE_LAMALEF_RESIZE  &&
    1445           0 :                 (options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) != 0) ||
    1446           0 :                 ((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) == U_SHAPE_AGGREGATE_TASHKEEL &&
    1447           0 :                 (options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) != U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)
    1448             :     )
    1449             :     {
    1450           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1451           0 :         return 0;
    1452             :     }
    1453             :     /* Validate  lamalef options */
    1454           0 :     if(((options&U_SHAPE_LAMALEF_MASK) > 0)&&
    1455           0 :               !(((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_BEGIN) ||
    1456           0 :                 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_END ) ||
    1457           0 :                 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE )||
    1458           0 :                  ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_AUTO) ||
    1459           0 :                  ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_NEAR)))
    1460             :     {
    1461           0 :          *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1462           0 :         return 0;
    1463             :     }
    1464             :     /* Validate  Tashkeel options */
    1465           0 :     if(((options&U_SHAPE_TASHKEEL_MASK) > 0)&&
    1466           0 :                    !(((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_BEGIN) ||
    1467           0 :                      ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_END )
    1468           0 :                     ||((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE )||
    1469           0 :                     ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)))
    1470             :     {
    1471           0 :          *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1472           0 :         return 0;
    1473             :     }
    1474             :     /* determine the source length */
    1475           0 :     if(sourceLength==-1) {
    1476           0 :         sourceLength=u_strlen(source);
    1477             :     }
    1478           0 :     if(sourceLength<=0) {
    1479           0 :         return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
    1480             :     }
    1481             : 
    1482             :     /* check that source and destination do not overlap */
    1483           0 :     if( dest!=NULL &&
    1484           0 :         ((source<=dest && dest<source+sourceLength) ||
    1485           0 :          (dest<=source && source<dest+destCapacity))) {
    1486           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1487           0 :         return 0;
    1488             :     }
    1489             : 
    1490             :     /* Does Options contain the new Seen Tail Unicode code point option */
    1491           0 :     if ( (options&U_SHAPE_TAIL_TYPE_MASK) == U_SHAPE_TAIL_NEW_UNICODE){
    1492           0 :         shapeVars.tailChar = NEW_TAIL_CHAR;
    1493             :     }else {
    1494           0 :         shapeVars.tailChar = OLD_TAIL_CHAR;
    1495             :     }
    1496             : 
    1497           0 :     if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) {
    1498             :         UChar buffer[300];
    1499           0 :         UChar *tempbuffer, *tempsource = NULL;
    1500           0 :         int32_t outputSize, spacesCountl=0, spacesCountr=0;
    1501             : 
    1502           0 :         if((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK)>0) {
    1503           0 :             int32_t logical_order = (options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL;
    1504             :             int32_t aggregate_tashkeel =
    1505           0 :                         (options&(U_SHAPE_AGGREGATE_TASHKEEL_MASK+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)) ==
    1506           0 :                         (U_SHAPE_AGGREGATE_TASHKEEL+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED);
    1507           0 :             int step=logical_order?1:-1;
    1508           0 :             int j=logical_order?-1:2*sourceLength;
    1509           0 :             int i=logical_order?-1:sourceLength;
    1510           0 :             int end=logical_order?sourceLength:-1;
    1511           0 :             int aggregation_possible = 1;
    1512           0 :             UChar prev = 0;
    1513           0 :             UChar prevLink, currLink = 0;
    1514           0 :             int newSourceLength = 0;
    1515           0 :             tempsource = (UChar *)uprv_malloc(2*sourceLength*U_SIZEOF_UCHAR);
    1516           0 :             if(tempsource == NULL) {
    1517           0 :                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    1518           0 :                 return 0;
    1519             :             }
    1520             : 
    1521           0 :             while ((i+=step) != end) {
    1522           0 :                 prevLink = currLink;
    1523           0 :                 currLink = getLink(source[i]);
    1524           0 :                 if (aggregate_tashkeel && ((prevLink|currLink)&COMBINE) == COMBINE && aggregation_possible) {
    1525           0 :                     aggregation_possible = 0;
    1526           0 :                     tempsource[j] = (prev<source[i]?prev:source[i])-0x064C+0xFC5E;
    1527           0 :                     currLink = getLink(tempsource[j]);
    1528             :                 } else {
    1529           0 :                     aggregation_possible = 1;
    1530           0 :                     tempsource[j+=step] = source[i];
    1531           0 :                     prev = source[i];
    1532           0 :                     newSourceLength++;
    1533             :                 }
    1534             :             }
    1535           0 :             source = tempsource+(logical_order?0:j);
    1536           0 :             sourceLength = newSourceLength;
    1537             :         }
    1538             : 
    1539             :         /* calculate destination size */
    1540             :         /* TODO: do we ever need to do this pure preflighting? */
    1541           0 :         if(((options&U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE) ||
    1542           0 :            ((options&U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE)) {
    1543           0 :             outputSize=calculateSize(source,sourceLength,destCapacity,options);
    1544             :         } else {
    1545           0 :             outputSize=sourceLength;
    1546             :         }
    1547             : 
    1548           0 :         if(outputSize>destCapacity) {
    1549           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1550           0 :                 if (tempsource != NULL) uprv_free(tempsource);
    1551           0 :             return outputSize;
    1552             :         }
    1553             : 
    1554             :         /*
    1555             :          * need a temporary buffer of size max(outputSize, sourceLength)
    1556             :          * because at first we copy source->temp
    1557             :          */
    1558           0 :         if(sourceLength>outputSize) {
    1559           0 :             outputSize=sourceLength;
    1560             :         }
    1561             : 
    1562             :         /* Start of Arabic letter shaping part */
    1563           0 :         if(outputSize<=UPRV_LENGTHOF(buffer)) {
    1564           0 :             outputSize=UPRV_LENGTHOF(buffer);
    1565           0 :             tempbuffer=buffer;
    1566             :         } else {
    1567           0 :             tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR);
    1568             : 
    1569             :             /*Test for NULL*/
    1570           0 :             if(tempbuffer == NULL) {
    1571           0 :                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    1572           0 :                 if (tempsource != NULL) uprv_free(tempsource);
    1573           0 :                 return 0;
    1574             :             }
    1575             :         }
    1576           0 :         u_memcpy(tempbuffer, source, sourceLength);
    1577           0 :         if (tempsource != NULL){
    1578           0 :             uprv_free(tempsource);
    1579             :         }
    1580             : 
    1581           0 :         if(sourceLength<outputSize) {
    1582           0 :             uprv_memset(tempbuffer+sourceLength, 0, (outputSize-sourceLength)*U_SIZEOF_UCHAR);
    1583             :         }
    1584             : 
    1585           0 :         if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) {
    1586           0 :             countSpaces(tempbuffer,sourceLength,options,&spacesCountl,&spacesCountr);
    1587           0 :             invertBuffer(tempbuffer,sourceLength,options,spacesCountl,spacesCountr);
    1588             :         }
    1589             : 
    1590           0 :         if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) {
    1591           0 :             if((options&U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK) == U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END) {
    1592           0 :                 shapeVars.spacesRelativeToTextBeginEnd = 1;
    1593           0 :                 shapeVars.uShapeLamalefBegin = U_SHAPE_LAMALEF_END;
    1594           0 :                 shapeVars.uShapeLamalefEnd = U_SHAPE_LAMALEF_BEGIN;
    1595           0 :                 shapeVars.uShapeTashkeelBegin = U_SHAPE_TASHKEEL_END;
    1596           0 :                 shapeVars.uShapeTashkeelEnd = U_SHAPE_TASHKEEL_BEGIN;
    1597             :             }
    1598             :         }
    1599             : 
    1600           0 :         switch(options&U_SHAPE_LETTERS_MASK) {
    1601             :         case U_SHAPE_LETTERS_SHAPE :
    1602           0 :              if( (options&U_SHAPE_TASHKEEL_MASK)> 0
    1603           0 :                  && ((options&U_SHAPE_TASHKEEL_MASK) !=U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)) {
    1604             :                 /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */
    1605           0 :                 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2,shapeVars);
    1606             :              }else {
    1607             :                 /* default Call the shaping function with tashkeel flag == 1 */
    1608           0 :                 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1,shapeVars);
    1609             : 
    1610             :                 /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/
    1611           0 :                 if( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL){
    1612           0 :                   destLength = handleTashkeelWithTatweel(tempbuffer,destLength,destCapacity,options,pErrorCode);
    1613             :                 }
    1614             :             }
    1615           0 :             break;
    1616             :         case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED :
    1617             :             /* Call the shaping function with tashkeel flag == 0 */
    1618           0 :             destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0,shapeVars);
    1619           0 :             break;
    1620             : 
    1621             :         case U_SHAPE_LETTERS_UNSHAPE :
    1622             :             /* Call the deshaping function */
    1623           0 :             destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,shapeVars);
    1624           0 :             break;
    1625             :         default :
    1626             :             /* will never occur because of validity checks above */
    1627           0 :             destLength = 0;
    1628           0 :             break;
    1629             :         }
    1630             : 
    1631             :         /*
    1632             :          * TODO: (markus 2002aug01)
    1633             :          * For as long as we always preflight the outputSize above
    1634             :          * we should U_ASSERT(outputSize==destLength)
    1635             :          * except for the adjustment above before the tempbuffer allocation
    1636             :          */
    1637             : 
    1638           0 :         if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) {
    1639           0 :             countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr);
    1640           0 :             invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr);
    1641             :         }
    1642           0 :         u_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity));
    1643             : 
    1644           0 :         if(tempbuffer!=buffer) {
    1645           0 :             uprv_free(tempbuffer);
    1646             :         }
    1647             : 
    1648           0 :         if(destLength>destCapacity) {
    1649           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1650           0 :             return destLength;
    1651             :         }
    1652             : 
    1653             :         /* End of Arabic letter shaping part */
    1654             :     } else {
    1655             :         /*
    1656             :          * No letter shaping:
    1657             :          * just make sure the destination is large enough and copy the string.
    1658             :          */
    1659           0 :         if(destCapacity<sourceLength) {
    1660             :             /* this catches preflighting, too */
    1661           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1662           0 :             return sourceLength;
    1663             :         }
    1664           0 :         u_memcpy(dest, source, sourceLength);
    1665           0 :         destLength=sourceLength;
    1666             :     }
    1667             : 
    1668             :     /*
    1669             :      * Perform number shaping.
    1670             :      * With UTF-16 or UTF-32, the length of the string is constant.
    1671             :      * The easiest way to do this is to operate on the destination and
    1672             :      * "shape" the digits in-place.
    1673             :      */
    1674           0 :     if((options&U_SHAPE_DIGITS_MASK)!=U_SHAPE_DIGITS_NOOP) {
    1675             :         UChar digitBase;
    1676             :         int32_t i;
    1677             : 
    1678             :         /* select the requested digit group */
    1679           0 :         switch(options&U_SHAPE_DIGIT_TYPE_MASK) {
    1680             :         case U_SHAPE_DIGIT_TYPE_AN:
    1681           0 :             digitBase=0x660; /* Unicode: "Arabic-Indic digits" */
    1682           0 :             break;
    1683             :         case U_SHAPE_DIGIT_TYPE_AN_EXTENDED:
    1684           0 :             digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */
    1685           0 :             break;
    1686             :         default:
    1687             :             /* will never occur because of validity checks above */
    1688           0 :             digitBase=0;
    1689           0 :             break;
    1690             :         }
    1691             : 
    1692             :         /* perform the requested operation */
    1693           0 :         switch(options&U_SHAPE_DIGITS_MASK) {
    1694             :         case U_SHAPE_DIGITS_EN2AN:
    1695             :             /* add (digitBase-'0') to each European (ASCII) digit code point */
    1696           0 :             digitBase-=0x30;
    1697           0 :             for(i=0; i<destLength; ++i) {
    1698           0 :                 if(((uint32_t)dest[i]-0x30)<10) {
    1699           0 :                     dest[i]+=digitBase;
    1700             :                 }
    1701             :             }
    1702           0 :             break;
    1703             :         case U_SHAPE_DIGITS_AN2EN:
    1704             :             /* subtract (digitBase-'0') from each Arabic digit code point */
    1705           0 :             for(i=0; i<destLength; ++i) {
    1706           0 :                 if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) {
    1707           0 :                     dest[i]-=digitBase-0x30;
    1708             :                 }
    1709             :             }
    1710           0 :             break;
    1711             :         case U_SHAPE_DIGITS_ALEN2AN_INIT_LR:
    1712           0 :             _shapeToArabicDigitsWithContext(dest, destLength,
    1713             :                                             digitBase,
    1714           0 :                                             (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
    1715           0 :                                             FALSE);
    1716           0 :             break;
    1717             :         case U_SHAPE_DIGITS_ALEN2AN_INIT_AL:
    1718           0 :             _shapeToArabicDigitsWithContext(dest, destLength,
    1719             :                                             digitBase,
    1720           0 :                                             (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
    1721           0 :                                             TRUE);
    1722           0 :             break;
    1723             :         default:
    1724             :             /* will never occur because of validity checks above */
    1725           0 :             break;
    1726             :         }
    1727             :     }
    1728             : 
    1729           0 :     return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
    1730             : }

Generated by: LCOV version 1.13