LCOV - code coverage report
Current view: top level - intl/unicharutil/util - GreekCasing.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 100 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 1 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       5             : 
       6             : #include "GreekCasing.h"
       7             : #include "nsUnicharUtils.h"
       8             : #include "nsUnicodeProperties.h"
       9             : 
      10             : // Custom uppercase mapping for Greek; see bug 307039 for details
      11             : #define GREEK_LOWER_ALPHA                      0x03B1
      12             : #define GREEK_LOWER_ALPHA_TONOS                0x03AC
      13             : #define GREEK_LOWER_ALPHA_OXIA                 0x1F71
      14             : #define GREEK_LOWER_EPSILON                    0x03B5
      15             : #define GREEK_LOWER_EPSILON_TONOS              0x03AD
      16             : #define GREEK_LOWER_EPSILON_OXIA               0x1F73
      17             : #define GREEK_LOWER_ETA                        0x03B7
      18             : #define GREEK_LOWER_ETA_TONOS                  0x03AE
      19             : #define GREEK_LOWER_ETA_OXIA                   0x1F75
      20             : #define GREEK_LOWER_IOTA                       0x03B9
      21             : #define GREEK_LOWER_IOTA_TONOS                 0x03AF
      22             : #define GREEK_LOWER_IOTA_OXIA                  0x1F77
      23             : #define GREEK_LOWER_IOTA_DIALYTIKA             0x03CA
      24             : #define GREEK_LOWER_IOTA_DIALYTIKA_TONOS       0x0390
      25             : #define GREEK_LOWER_IOTA_DIALYTIKA_OXIA        0x1FD3
      26             : #define GREEK_LOWER_OMICRON                    0x03BF
      27             : #define GREEK_LOWER_OMICRON_TONOS              0x03CC
      28             : #define GREEK_LOWER_OMICRON_OXIA               0x1F79
      29             : #define GREEK_LOWER_UPSILON                    0x03C5
      30             : #define GREEK_LOWER_UPSILON_TONOS              0x03CD
      31             : #define GREEK_LOWER_UPSILON_OXIA               0x1F7B
      32             : #define GREEK_LOWER_UPSILON_DIALYTIKA          0x03CB
      33             : #define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS    0x03B0
      34             : #define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA     0x1FE3
      35             : #define GREEK_LOWER_OMEGA                      0x03C9
      36             : #define GREEK_LOWER_OMEGA_TONOS                0x03CE
      37             : #define GREEK_LOWER_OMEGA_OXIA                 0x1F7D
      38             : #define GREEK_UPPER_ALPHA                      0x0391
      39             : #define GREEK_UPPER_EPSILON                    0x0395
      40             : #define GREEK_UPPER_ETA                        0x0397
      41             : #define GREEK_UPPER_IOTA                       0x0399
      42             : #define GREEK_UPPER_IOTA_DIALYTIKA             0x03AA
      43             : #define GREEK_UPPER_OMICRON                    0x039F
      44             : #define GREEK_UPPER_UPSILON                    0x03A5
      45             : #define GREEK_UPPER_UPSILON_DIALYTIKA          0x03AB
      46             : #define GREEK_UPPER_OMEGA                      0x03A9
      47             : #define GREEK_UPPER_ALPHA_TONOS                0x0386
      48             : #define GREEK_UPPER_ALPHA_OXIA                 0x1FBB
      49             : #define GREEK_UPPER_EPSILON_TONOS              0x0388
      50             : #define GREEK_UPPER_EPSILON_OXIA               0x1FC9
      51             : #define GREEK_UPPER_ETA_TONOS                  0x0389
      52             : #define GREEK_UPPER_ETA_OXIA                   0x1FCB
      53             : #define GREEK_UPPER_IOTA_TONOS                 0x038A
      54             : #define GREEK_UPPER_IOTA_OXIA                  0x1FDB
      55             : #define GREEK_UPPER_OMICRON_TONOS              0x038C
      56             : #define GREEK_UPPER_OMICRON_OXIA               0x1FF9
      57             : #define GREEK_UPPER_UPSILON_TONOS              0x038E
      58             : #define GREEK_UPPER_UPSILON_OXIA               0x1FEB
      59             : #define GREEK_UPPER_OMEGA_TONOS                0x038F
      60             : #define GREEK_UPPER_OMEGA_OXIA                 0x1FFB
      61             : #define COMBINING_ACUTE_ACCENT                 0x0301
      62             : #define COMBINING_DIAERESIS                    0x0308
      63             : #define COMBINING_ACUTE_TONE_MARK              0x0341
      64             : #define COMBINING_GREEK_DIALYTIKA_TONOS        0x0344
      65             : 
      66             : namespace mozilla {
      67             : 
      68             : uint32_t
      69           0 : GreekCasing::UpperCase(uint32_t aCh, GreekCasing::State& aState,
      70             :                        bool& aMarkEtaPos, bool& aUpdateMarkedEta)
      71             : {
      72           0 :   aMarkEtaPos = false;
      73           0 :   aUpdateMarkedEta = false;
      74             : 
      75           0 :   uint8_t category = unicode::GetGeneralCategory(aCh);
      76             : 
      77           0 :   if (aState == kEtaAccMarked) {
      78           0 :     switch (category) {
      79             :       case HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER:
      80             :       case HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER:
      81             :       case HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER:
      82             :       case HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER:
      83             :       case HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER:
      84             :       case HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK:
      85             :       case HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK:
      86             :       case HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK:
      87           0 :         aUpdateMarkedEta = true;
      88           0 :         break;
      89             :       default:
      90           0 :         break;
      91             :     }
      92           0 :     aState = kEtaAcc;
      93             :   }
      94             : 
      95           0 :   switch (aCh) {
      96             :   case GREEK_UPPER_ALPHA:
      97             :   case GREEK_LOWER_ALPHA:
      98           0 :     aState = kAlpha;
      99           0 :     return GREEK_UPPER_ALPHA;
     100             : 
     101             :   case GREEK_UPPER_EPSILON:
     102             :   case GREEK_LOWER_EPSILON:
     103           0 :     aState = kEpsilon;
     104           0 :     return GREEK_UPPER_EPSILON;
     105             : 
     106             :   case GREEK_UPPER_ETA:
     107             :   case GREEK_LOWER_ETA:
     108           0 :     aState = kEta;
     109           0 :     return GREEK_UPPER_ETA;
     110             : 
     111             :   case GREEK_UPPER_IOTA:
     112           0 :     aState = kIota;
     113           0 :     return GREEK_UPPER_IOTA;
     114             : 
     115             :   case GREEK_UPPER_OMICRON:
     116             :   case GREEK_LOWER_OMICRON:
     117           0 :     aState = kOmicron;
     118           0 :     return GREEK_UPPER_OMICRON;
     119             : 
     120             :   case GREEK_UPPER_UPSILON:
     121           0 :     switch (aState) {
     122             :     case kOmicron:
     123           0 :       aState = kOmicronUpsilon;
     124           0 :       break;
     125             :     default:
     126           0 :       aState = kUpsilon;
     127           0 :       break;
     128             :     }
     129           0 :     return GREEK_UPPER_UPSILON;
     130             : 
     131             :   case GREEK_UPPER_OMEGA:
     132             :   case GREEK_LOWER_OMEGA:
     133           0 :     aState = kOmega;
     134           0 :     return GREEK_UPPER_OMEGA;
     135             : 
     136             :   // iota and upsilon may be the second vowel of a diphthong
     137             :   case GREEK_LOWER_IOTA:
     138           0 :     switch (aState) {
     139             :     case kAlphaAcc:
     140             :     case kEpsilonAcc:
     141             :     case kOmicronAcc:
     142             :     case kUpsilonAcc:
     143           0 :       aState = kInWord;
     144           0 :       return GREEK_UPPER_IOTA_DIALYTIKA;
     145             :     default:
     146           0 :       break;
     147             :     }
     148           0 :     aState = kIota;
     149           0 :     return GREEK_UPPER_IOTA;
     150             : 
     151             :   case GREEK_LOWER_UPSILON:
     152           0 :     switch (aState) {
     153             :     case kAlphaAcc:
     154             :     case kEpsilonAcc:
     155             :     case kEtaAcc:
     156             :     case kOmicronAcc:
     157           0 :       aState = kInWord;
     158           0 :       return GREEK_UPPER_UPSILON_DIALYTIKA;
     159             :     case kOmicron:
     160           0 :       aState = kOmicronUpsilon;
     161           0 :       break;
     162             :     default:
     163           0 :       aState = kUpsilon;
     164           0 :       break;
     165             :     }
     166           0 :     return GREEK_UPPER_UPSILON;
     167             : 
     168             :   case GREEK_UPPER_IOTA_DIALYTIKA:
     169             :   case GREEK_LOWER_IOTA_DIALYTIKA:
     170             :   case GREEK_UPPER_UPSILON_DIALYTIKA:
     171             :   case GREEK_LOWER_UPSILON_DIALYTIKA:
     172             :   case COMBINING_DIAERESIS:
     173           0 :     aState = kDiaeresis;
     174           0 :     return ToUpperCase(aCh);
     175             : 
     176             :   // remove accent if it follows a vowel or diaeresis,
     177             :   // and set appropriate state for diphthong detection
     178             :   case COMBINING_ACUTE_ACCENT:
     179             :   case COMBINING_ACUTE_TONE_MARK:
     180           0 :     switch (aState) {
     181             :     case kAlpha:
     182           0 :       aState = kAlphaAcc;
     183           0 :       return uint32_t(-1); // omit this char from result string
     184             :     case kEpsilon:
     185           0 :       aState = kEpsilonAcc;
     186           0 :       return uint32_t(-1);
     187             :     case kEta:
     188           0 :       aState = kEtaAcc;
     189           0 :       return uint32_t(-1);
     190             :     case kIota:
     191           0 :       aState = kIotaAcc;
     192           0 :       return uint32_t(-1);
     193             :     case kOmicron:
     194           0 :       aState = kOmicronAcc;
     195           0 :       return uint32_t(-1);
     196             :     case kUpsilon:
     197           0 :       aState = kUpsilonAcc;
     198           0 :       return uint32_t(-1);
     199             :     case kOmicronUpsilon:
     200           0 :       aState = kInWord; // this completed a diphthong
     201           0 :       return uint32_t(-1);
     202             :     case kOmega:
     203           0 :       aState = kOmegaAcc;
     204           0 :       return uint32_t(-1);
     205             :     case kDiaeresis:
     206           0 :       aState = kInWord;
     207           0 :       return uint32_t(-1);
     208             :     default:
     209           0 :       break;
     210             :     }
     211           0 :     break;
     212             : 
     213             :   // combinations with dieresis+accent just strip the accent,
     214             :   // and reset to start state (don't form diphthong with following vowel)
     215             :   case GREEK_LOWER_IOTA_DIALYTIKA_TONOS:
     216             :   case GREEK_LOWER_IOTA_DIALYTIKA_OXIA:
     217           0 :     aState = kInWord;
     218           0 :     return GREEK_UPPER_IOTA_DIALYTIKA;
     219             : 
     220             :   case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS:
     221             :   case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA:
     222           0 :     aState = kInWord;
     223           0 :     return GREEK_UPPER_UPSILON_DIALYTIKA;
     224             : 
     225             :   case COMBINING_GREEK_DIALYTIKA_TONOS:
     226           0 :     aState = kInWord;
     227           0 :     return COMBINING_DIAERESIS;
     228             : 
     229             :   // strip accents from vowels, and note the vowel seen so that we can detect
     230             :   // diphthongs where diaeresis needs to be added
     231             :   case GREEK_LOWER_ALPHA_TONOS:
     232             :   case GREEK_LOWER_ALPHA_OXIA:
     233             :   case GREEK_UPPER_ALPHA_TONOS:
     234             :   case GREEK_UPPER_ALPHA_OXIA:
     235           0 :     aState = kAlphaAcc;
     236           0 :     return GREEK_UPPER_ALPHA;
     237             : 
     238             :   case GREEK_LOWER_EPSILON_TONOS:
     239             :   case GREEK_LOWER_EPSILON_OXIA:
     240             :   case GREEK_UPPER_EPSILON_TONOS:
     241             :   case GREEK_UPPER_EPSILON_OXIA:
     242           0 :     aState = kEpsilonAcc;
     243           0 :     return GREEK_UPPER_EPSILON;
     244             : 
     245             :   case GREEK_LOWER_ETA_TONOS:
     246             :   case GREEK_UPPER_ETA_TONOS:
     247           0 :     if (aState == kStart) {
     248           0 :       aState = kEtaAccMarked;
     249           0 :       aMarkEtaPos = true; // mark in case we need to remove the tonos later
     250           0 :       return GREEK_UPPER_ETA_TONOS; // treat as disjunctive eta for now
     251             :     }
     252             :     // if not in initial state, fall through to strip the accent
     253             :     MOZ_FALLTHROUGH;
     254             : 
     255             :   case GREEK_LOWER_ETA_OXIA:
     256             :   case GREEK_UPPER_ETA_OXIA:
     257           0 :     aState = kEtaAcc;
     258           0 :     return GREEK_UPPER_ETA;
     259             : 
     260             :   case GREEK_LOWER_IOTA_TONOS:
     261             :   case GREEK_LOWER_IOTA_OXIA:
     262             :   case GREEK_UPPER_IOTA_TONOS:
     263             :   case GREEK_UPPER_IOTA_OXIA:
     264           0 :     aState = kIotaAcc;
     265           0 :     return GREEK_UPPER_IOTA;
     266             : 
     267             :   case GREEK_LOWER_OMICRON_TONOS:
     268             :   case GREEK_LOWER_OMICRON_OXIA:
     269             :   case GREEK_UPPER_OMICRON_TONOS:
     270             :   case GREEK_UPPER_OMICRON_OXIA:
     271           0 :     aState = kOmicronAcc;
     272           0 :     return GREEK_UPPER_OMICRON;
     273             : 
     274             :   case GREEK_LOWER_UPSILON_TONOS:
     275             :   case GREEK_LOWER_UPSILON_OXIA:
     276             :   case GREEK_UPPER_UPSILON_TONOS:
     277             :   case GREEK_UPPER_UPSILON_OXIA:
     278           0 :     switch (aState) {
     279             :     case kOmicron:
     280           0 :       aState = kInWord; // this completed a diphthong
     281           0 :       break;
     282             :     default:
     283           0 :       aState = kUpsilonAcc;
     284           0 :       break;
     285             :     }
     286           0 :     return GREEK_UPPER_UPSILON;
     287             : 
     288             :   case GREEK_LOWER_OMEGA_TONOS:
     289             :   case GREEK_LOWER_OMEGA_OXIA:
     290             :   case GREEK_UPPER_OMEGA_TONOS:
     291             :   case GREEK_UPPER_OMEGA_OXIA:
     292           0 :     aState = kOmegaAcc;
     293           0 :     return GREEK_UPPER_OMEGA;
     294             :   }
     295             : 
     296             :   // all other characters just reset the state to either kStart or kInWord,
     297             :   // and use standard mappings
     298           0 :   switch (category) {
     299             :     case HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER:
     300             :     case HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER:
     301             :     case HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER:
     302             :     case HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER:
     303             :     case HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER:
     304             :     case HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK:
     305             :     case HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK:
     306             :     case HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK:
     307           0 :       aState = kInWord;
     308           0 :       break;
     309             :     default:
     310           0 :       aState = kStart;
     311           0 :       break;
     312             :   }
     313             : 
     314           0 :   return ToUpperCase(aCh);
     315             : }
     316             : 
     317             : } // namespace mozilla

Generated by: LCOV version 1.13