LCOV - code coverage report
Current view: top level - gfx/harfbuzz/src - hb-ot-shape-complex-thai.cc (source / functions) Hit Total Coverage
Test: output.info Lines: 0 88 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 5 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright © 2010,2012  Google, Inc.
       3             :  *
       4             :  *  This is part of HarfBuzz, a text shaping library.
       5             :  *
       6             :  * Permission is hereby granted, without written agreement and without
       7             :  * license or royalty fees, to use, copy, modify, and distribute this
       8             :  * software and its documentation for any purpose, provided that the
       9             :  * above copyright notice and the following two paragraphs appear in
      10             :  * all copies of this software.
      11             :  *
      12             :  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
      13             :  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
      14             :  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
      15             :  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
      16             :  * DAMAGE.
      17             :  *
      18             :  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
      19             :  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
      20             :  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
      21             :  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
      22             :  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
      23             :  *
      24             :  * Google Author(s): Behdad Esfahbod
      25             :  */
      26             : 
      27             : #include "hb-ot-shape-complex-private.hh"
      28             : 
      29             : 
      30             : /* Thai / Lao shaper */
      31             : 
      32             : 
      33             : /* PUA shaping */
      34             : 
      35             : 
      36             : enum thai_consonant_type_t
      37             : {
      38             :   NC,
      39             :   AC,
      40             :   RC,
      41             :   DC,
      42             :   NOT_CONSONANT,
      43             :   NUM_CONSONANT_TYPES = NOT_CONSONANT
      44             : };
      45             : 
      46             : static thai_consonant_type_t
      47           0 : get_consonant_type (hb_codepoint_t u)
      48             : {
      49           0 :   if (u == 0x0E1Bu || u == 0x0E1Du || u == 0x0E1Fu/* || u == 0x0E2Cu*/)
      50           0 :     return AC;
      51           0 :   if (u == 0x0E0Du || u == 0x0E10u)
      52           0 :     return RC;
      53           0 :   if (u == 0x0E0Eu || u == 0x0E0Fu)
      54           0 :     return DC;
      55           0 :   if (hb_in_range (u, 0x0E01u, 0x0E2Eu))
      56           0 :     return NC;
      57           0 :   return NOT_CONSONANT;
      58             : }
      59             : 
      60             : 
      61             : enum thai_mark_type_t
      62             : {
      63             :   AV,
      64             :   BV,
      65             :   T,
      66             :   NOT_MARK,
      67             :   NUM_MARK_TYPES = NOT_MARK
      68             : };
      69             : 
      70             : static thai_mark_type_t
      71           0 : get_mark_type (hb_codepoint_t u)
      72             : {
      73           0 :   if (u == 0x0E31u || hb_in_range (u, 0x0E34u, 0x0E37u) ||
      74           0 :       u == 0x0E47u || hb_in_range (u, 0x0E4Du, 0x0E4Eu))
      75           0 :     return AV;
      76           0 :   if (hb_in_range (u, 0x0E38u, 0x0E3Au))
      77           0 :     return BV;
      78           0 :   if (hb_in_range (u, 0x0E48u, 0x0E4Cu))
      79           0 :     return T;
      80           0 :   return NOT_MARK;
      81             : }
      82             : 
      83             : 
      84             : enum thai_action_t
      85             : {
      86             :   NOP,
      87             :   SD,  /* Shift combining-mark down */
      88             :   SL,  /* Shift combining-mark left */
      89             :   SDL, /* Shift combining-mark down-left */
      90             :   RD   /* Remove descender from base */
      91             : };
      92             : 
      93             : static hb_codepoint_t
      94           0 : thai_pua_shape (hb_codepoint_t u, thai_action_t action, hb_font_t *font)
      95             : {
      96             :   struct thai_pua_mapping_t {
      97             :     hb_codepoint_t u;
      98             :     hb_codepoint_t win_pua;
      99             :     hb_codepoint_t mac_pua;
     100           0 :   } const *pua_mappings = NULL;
     101             :   static const thai_pua_mapping_t SD_mappings[] = {
     102             :     {0x0E48u, 0xF70Au, 0xF88Bu}, /* MAI EK */
     103             :     {0x0E49u, 0xF70Bu, 0xF88Eu}, /* MAI THO */
     104             :     {0x0E4Au, 0xF70Cu, 0xF891u}, /* MAI TRI */
     105             :     {0x0E4Bu, 0xF70Du, 0xF894u}, /* MAI CHATTAWA */
     106             :     {0x0E4Cu, 0xF70Eu, 0xF897u}, /* THANTHAKHAT */
     107             :     {0x0E38u, 0xF718u, 0xF89Bu}, /* SARA U */
     108             :     {0x0E39u, 0xF719u, 0xF89Cu}, /* SARA UU */
     109             :     {0x0E3Au, 0xF71Au, 0xF89Du}, /* PHINTHU */
     110             :     {0x0000u, 0x0000u, 0x0000u}
     111             :   };
     112             :   static const thai_pua_mapping_t SDL_mappings[] = {
     113             :     {0x0E48u, 0xF705u, 0xF88Cu}, /* MAI EK */
     114             :     {0x0E49u, 0xF706u, 0xF88Fu}, /* MAI THO */
     115             :     {0x0E4Au, 0xF707u, 0xF892u}, /* MAI TRI */
     116             :     {0x0E4Bu, 0xF708u, 0xF895u}, /* MAI CHATTAWA */
     117             :     {0x0E4Cu, 0xF709u, 0xF898u}, /* THANTHAKHAT */
     118             :     {0x0000u, 0x0000u, 0x0000u}
     119             :   };
     120             :   static const thai_pua_mapping_t SL_mappings[] = {
     121             :     {0x0E48u, 0xF713u, 0xF88Au}, /* MAI EK */
     122             :     {0x0E49u, 0xF714u, 0xF88Du}, /* MAI THO */
     123             :     {0x0E4Au, 0xF715u, 0xF890u}, /* MAI TRI */
     124             :     {0x0E4Bu, 0xF716u, 0xF893u}, /* MAI CHATTAWA */
     125             :     {0x0E4Cu, 0xF717u, 0xF896u}, /* THANTHAKHAT */
     126             :     {0x0E31u, 0xF710u, 0xF884u}, /* MAI HAN-AKAT */
     127             :     {0x0E34u, 0xF701u, 0xF885u}, /* SARA I */
     128             :     {0x0E35u, 0xF702u, 0xF886u}, /* SARA II */
     129             :     {0x0E36u, 0xF703u, 0xF887u}, /* SARA UE */
     130             :     {0x0E37u, 0xF704u, 0xF888u}, /* SARA UEE */
     131             :     {0x0E47u, 0xF712u, 0xF889u}, /* MAITAIKHU */
     132             :     {0x0E4Du, 0xF711u, 0xF899u}, /* NIKHAHIT */
     133             :     {0x0000u, 0x0000u, 0x0000u}
     134             :   };
     135             :   static const thai_pua_mapping_t RD_mappings[] = {
     136             :     {0x0E0Du, 0xF70Fu, 0xF89Au}, /* YO YING */
     137             :     {0x0E10u, 0xF700u, 0xF89Eu}, /* THO THAN */
     138             :     {0x0000u, 0x0000u, 0x0000u}
     139             :   };
     140             : 
     141           0 :   switch (action) {
     142           0 :     case NOP: return u;
     143           0 :     case SD:  pua_mappings = SD_mappings; break;
     144           0 :     case SDL: pua_mappings = SDL_mappings; break;
     145           0 :     case SL:  pua_mappings = SL_mappings; break;
     146           0 :     case RD:  pua_mappings = RD_mappings; break;
     147             :   }
     148           0 :   for (; pua_mappings->u; pua_mappings++)
     149           0 :     if (pua_mappings->u == u)
     150             :     {
     151             :       hb_codepoint_t glyph;
     152           0 :       if (hb_font_get_glyph (font, pua_mappings->win_pua, 0, &glyph))
     153           0 :         return pua_mappings->win_pua;
     154           0 :       if (hb_font_get_glyph (font, pua_mappings->mac_pua, 0, &glyph))
     155           0 :         return pua_mappings->mac_pua;
     156           0 :       break;
     157             :     }
     158           0 :   return u;
     159             : }
     160             : 
     161             : 
     162             : static enum thai_above_state_t
     163             : {     /* Cluster above looks like: */
     164             :   T0, /*  ⣤                      */
     165             :   T1, /*     ⣼                   */
     166             :   T2, /*        ⣾                */
     167             :   T3, /*           ⣿             */
     168             :   NUM_ABOVE_STATES
     169             : } thai_above_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] =
     170             : {
     171             :   T0, /* NC */
     172             :   T1, /* AC */
     173             :   T0, /* RC */
     174             :   T0, /* DC */
     175             :   T3, /* NOT_CONSONANT */
     176             : };
     177             : 
     178             : static const struct thai_above_state_machine_edge_t {
     179             :   thai_action_t action;
     180             :   thai_above_state_t next_state;
     181             : } thai_above_state_machine[NUM_ABOVE_STATES][NUM_MARK_TYPES] =
     182             : {        /*AV*/    /*BV*/    /*T*/
     183             : /*T0*/ {{NOP,T3}, {NOP,T0}, {SD, T3}},
     184             : /*T1*/ {{SL, T2}, {NOP,T1}, {SDL,T2}},
     185             : /*T2*/ {{NOP,T3}, {NOP,T2}, {SL, T3}},
     186             : /*T3*/ {{NOP,T3}, {NOP,T3}, {NOP,T3}},
     187             : };
     188             : 
     189             : 
     190             : static enum thai_below_state_t
     191             : {
     192             :   B0, /* No descender */
     193             :   B1, /* Removable descender */
     194             :   B2, /* Strict descender */
     195             :   NUM_BELOW_STATES
     196             : } thai_below_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] =
     197             : {
     198             :   B0, /* NC */
     199             :   B0, /* AC */
     200             :   B1, /* RC */
     201             :   B2, /* DC */
     202             :   B2, /* NOT_CONSONANT */
     203             : };
     204             : 
     205             : static const struct thai_below_state_machine_edge_t {
     206             :   thai_action_t action;
     207             :   thai_below_state_t next_state;
     208             : } thai_below_state_machine[NUM_BELOW_STATES][NUM_MARK_TYPES] =
     209             : {        /*AV*/    /*BV*/    /*T*/
     210             : /*B0*/ {{NOP,B0}, {NOP,B2}, {NOP, B0}},
     211             : /*B1*/ {{NOP,B1}, {RD, B2}, {NOP, B1}},
     212             : /*B2*/ {{NOP,B2}, {SD, B2}, {NOP, B2}},
     213             : };
     214             : 
     215             : 
     216             : static void
     217           0 : do_thai_pua_shaping (const hb_ot_shape_plan_t *plan HB_UNUSED,
     218             :                      hb_buffer_t              *buffer,
     219             :                      hb_font_t                *font)
     220             : {
     221           0 :   thai_above_state_t above_state = thai_above_start_state[NOT_CONSONANT];
     222           0 :   thai_below_state_t below_state = thai_below_start_state[NOT_CONSONANT];
     223           0 :   unsigned int base = 0;
     224             : 
     225           0 :   hb_glyph_info_t *info = buffer->info;
     226           0 :   unsigned int count = buffer->len;
     227           0 :   for (unsigned int i = 0; i < count; i++)
     228             :   {
     229           0 :     thai_mark_type_t mt = get_mark_type (info[i].codepoint);
     230             : 
     231           0 :     if (mt == NOT_MARK) {
     232           0 :       thai_consonant_type_t ct = get_consonant_type (info[i].codepoint);
     233           0 :       above_state = thai_above_start_state[ct];
     234           0 :       below_state = thai_below_start_state[ct];
     235           0 :       base = i;
     236           0 :       continue;
     237             :     }
     238             : 
     239           0 :     const thai_above_state_machine_edge_t &above_edge = thai_above_state_machine[above_state][mt];
     240           0 :     const thai_below_state_machine_edge_t &below_edge = thai_below_state_machine[below_state][mt];
     241           0 :     above_state = above_edge.next_state;
     242           0 :     below_state = below_edge.next_state;
     243             : 
     244             :     /* At least one of the above/below actions is NOP. */
     245           0 :     thai_action_t action = above_edge.action != NOP ? above_edge.action : below_edge.action;
     246             : 
     247           0 :     if (action == RD)
     248           0 :       info[base].codepoint = thai_pua_shape (info[base].codepoint, action, font);
     249             :     else
     250           0 :       info[i].codepoint = thai_pua_shape (info[i].codepoint, action, font);
     251             :   }
     252           0 : }
     253             : 
     254             : 
     255             : static void
     256           0 : preprocess_text_thai (const hb_ot_shape_plan_t *plan,
     257             :                       hb_buffer_t              *buffer,
     258             :                       hb_font_t                *font)
     259             : {
     260             :   /* This function implements the shaping logic documented here:
     261             :    *
     262             :    *   http://linux.thai.net/~thep/th-otf/shaping.html
     263             :    *
     264             :    * The first shaping rule listed there is needed even if the font has Thai
     265             :    * OpenType tables.  The rest do fallback positioning based on PUA codepoints.
     266             :    * We implement that only if there exist no Thai GSUB in the font.
     267             :    */
     268             : 
     269             :   /* The following is NOT specified in the MS OT Thai spec, however, it seems
     270             :    * to be what Uniscribe and other engines implement.  According to Eric Muller:
     271             :    *
     272             :    * When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the
     273             :    * NIKHAHIT backwards over any tone mark (0E48-0E4B).
     274             :    *
     275             :    * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>
     276             :    *
     277             :    * This reordering is legit only when the NIKHAHIT comes from a SARA AM, not
     278             :    * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably
     279             :    * not what a user wanted, but the rendering is nevertheless nikhahit above
     280             :    * chattawa.
     281             :    *
     282             :    * Same for Lao.
     283             :    *
     284             :    * Note:
     285             :    *
     286             :    * Uniscribe also does some below-marks reordering.  Namely, it positions U+0E3A
     287             :    * after U+0E38 and U+0E39.  We do that by modifying the ccc for U+0E3A.
     288             :    * See unicode->modified_combining_class ().  Lao does NOT have a U+0E3A
     289             :    * equivalent.
     290             :    */
     291             : 
     292             : 
     293             :   /*
     294             :    * Here are the characters of significance:
     295             :    *
     296             :    *                    Thai    Lao
     297             :    * SARA AM:           U+0E33  U+0EB3
     298             :    * SARA AA:           U+0E32  U+0EB2
     299             :    * Nikhahit:          U+0E4D  U+0ECD
     300             :    *
     301             :    * Testing shows that Uniscribe reorder the following marks:
     302             :    * Thai:      <0E31,0E34..0E37,0E47..0E4E>
     303             :    * Lao:       <0EB1,0EB4..0EB7,0EC7..0ECE>
     304             :    *
     305             :    * Note how the Lao versions are the same as Thai + 0x80.
     306             :    */
     307             : 
     308             :   /* We only get one script at a time, so a script-agnostic implementation
     309             :    * is adequate here. */
     310             : #define IS_SARA_AM(x) (((x) & ~0x0080u) == 0x0E33u)
     311             : #define NIKHAHIT_FROM_SARA_AM(x) ((x) - 0x0E33u + 0x0E4Du)
     312             : #define SARA_AA_FROM_SARA_AM(x) ((x) - 1)
     313             : #define IS_TONE_MARK(x) (hb_in_ranges ((x) & ~0x0080u, 0x0E34u, 0x0E37u, 0x0E47u, 0x0E4Eu, 0x0E31u, 0x0E31u))
     314             : 
     315           0 :   buffer->clear_output ();
     316           0 :   unsigned int count = buffer->len;
     317           0 :   for (buffer->idx = 0; buffer->idx < count && !buffer->in_error;)
     318             :   {
     319           0 :     hb_codepoint_t u = buffer->cur().codepoint;
     320           0 :     if (likely (!IS_SARA_AM (u))) {
     321           0 :       buffer->next_glyph ();
     322           0 :       continue;
     323             :     }
     324             : 
     325             :     /* Is SARA AM. Decompose and reorder. */
     326           0 :     hb_codepoint_t decomposed[2] = {hb_codepoint_t (NIKHAHIT_FROM_SARA_AM (u)),
     327           0 :                                     hb_codepoint_t (SARA_AA_FROM_SARA_AM (u))};
     328           0 :     buffer->replace_glyphs (1, 2, decomposed);
     329           0 :     if (unlikely (buffer->in_error))
     330           0 :       return;
     331             : 
     332             :     /* Make Nikhahit be recognized as a ccc=0 mark when zeroing widths. */
     333           0 :     unsigned int end = buffer->out_len;
     334           0 :     _hb_glyph_info_set_general_category (&buffer->out_info[end - 2], HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK);
     335             : 
     336             :     /* Ok, let's see... */
     337           0 :     unsigned int start = end - 2;
     338           0 :     while (start > 0 && IS_TONE_MARK (buffer->out_info[start - 1].codepoint))
     339           0 :       start--;
     340             : 
     341           0 :     if (start + 2 < end)
     342             :     {
     343             :       /* Move Nikhahit (end-2) to the beginning */
     344           0 :       buffer->merge_out_clusters (start, end);
     345           0 :       hb_glyph_info_t t = buffer->out_info[end - 2];
     346           0 :       memmove (buffer->out_info + start + 1,
     347           0 :                buffer->out_info + start,
     348           0 :                sizeof (buffer->out_info[0]) * (end - start - 2));
     349           0 :       buffer->out_info[start] = t;
     350             :     }
     351             :     else
     352             :     {
     353             :       /* Since we decomposed, and NIKHAHIT is combining, merge clusters with the
     354             :        * previous cluster. */
     355           0 :       if (start && buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
     356           0 :         buffer->merge_out_clusters (start - 1, end);
     357             :     }
     358             :   }
     359           0 :   buffer->swap_buffers ();
     360             : 
     361             :   /* If font has Thai GSUB, we are done. */
     362           0 :   if (plan->props.script == HB_SCRIPT_THAI && !plan->map.found_script[0])
     363           0 :     do_thai_pua_shaping (plan, buffer, font);
     364             : }
     365             : 
     366             : const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai =
     367             : {
     368             :   "thai",
     369             :   NULL, /* collect_features */
     370             :   NULL, /* override_features */
     371             :   NULL, /* data_create */
     372             :   NULL, /* data_destroy */
     373             :   preprocess_text_thai,
     374             :   NULL, /* postprocess_glyphs */
     375             :   HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
     376             :   NULL, /* decompose */
     377             :   NULL, /* compose */
     378             :   NULL, /* setup_masks */
     379             :   NULL, /* disable_otl */
     380             :   HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
     381             :   false,/* fallback_position */
     382             : };

Generated by: LCOV version 1.13