LCOV - code coverage report
Current view: top level - gfx/thebes - nsUnicodeRange.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 1 18 5.6 %
Date: 2017-07-14 16:53:18 Functions: 2 3 66.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       5             : 
       6             : #include "nsUnicodeRange.h"
       7             : 
       8             : /**********************************************************************
       9             :  * Unicode subranges as defined in unicode 3.0
      10             :  * x-western  -> latin
      11             :  *  0000 - 036f
      12             :  *  1e00 - 1eff
      13             :  *  2000 - 206f  (general punctuation)
      14             :  *  20a0 - 20cf  (currency symbols)
      15             :  *  2100 - 214f  (letterlike symbols)
      16             :  *  2150 - 218f  (Number Forms)
      17             :  * el         -> greek
      18             :  *  0370 - 03ff
      19             :  *  1f00 - 1fff
      20             :  * x-cyrillic -> cyrillic
      21             :  *  0400 - 04ff
      22             :  * he         -> hebrew
      23             :  *  0590 - 05ff
      24             :  * ar         -> arabic
      25             :  *  0600 - 06ff
      26             :  *  fb50 - fdff (arabic presentation forms)
      27             :  *  fe70 - feff (arabic presentation forms b)
      28             :  * th - thai
      29             :  *  0e00 - 0e7f
      30             :  * ko        -> korean
      31             :  *  ac00 - d7af  (hangul Syllables)
      32             :  *  1100 - 11ff    (jamo)
      33             :  *  3130 - 318f (hangul compatibility jamo)
      34             :  * ja
      35             :  *  3040 - 309f (hiragana)
      36             :  *  30a0 - 30ff (katakana)
      37             :  * zh-CN
      38             :  * zh-TW
      39             :  *
      40             :  * CJK
      41             :  *  3100 - 312f (bopomofo)
      42             :  *  31a0 - 31bf (bopomofo extended)
      43             :  *  3000 - 303f (CJK Symbols and Punctuation) 
      44             :  *  2e80 - 2eff (CJK radicals supplement)
      45             :  *  2f00 - 2fdf (Kangxi Radicals)
      46             :  *  2ff0 - 2fff (Ideographic Description Characters)
      47             :  *  3190 - 319f (kanbun)
      48             :  *  3200 - 32ff (Enclosed CJK letters and Months)
      49             :  *  3300 - 33ff (CJK compatibility)
      50             :  *  3400 - 4dbf (CJK Unified Ideographs Extension A)
      51             :  *  4e00 - 9faf (CJK Unified Ideographs)
      52             :  *  f900 - fa5f (CJK Compatibility Ideographs)
      53             :  *  fe30 - fe4f (CJK compatibility Forms)
      54             :  *  ff00 - ffef (halfwidth and fullwidth forms)
      55             :  *
      56             :  * Armenian
      57             :  *  0530 - 058f 
      58             :  * Sriac 
      59             :  *  0700 - 074f
      60             :  * Thaana
      61             :  *  0780 - 07bf
      62             :  * Devanagari
      63             :  *  0900 - 097f
      64             :  * Bengali
      65             :  *  0980 - 09ff
      66             :  * Gurmukhi
      67             :  *  0a00 - 0a7f
      68             :  * Gujarati
      69             :  *  0a80 - 0aff
      70             :  * Oriya
      71             :  *  0b00 - 0b7f
      72             :  * Tamil
      73             :  *  0b80 - 0bff
      74             :  * Telugu
      75             :  *  0c00 - 0c7f
      76             :  * Kannada
      77             :  *  0c80 - 0cff
      78             :  * Malayalam
      79             :  *  0d00 - 0d7f
      80             :  * Sinhala
      81             :  *  0d80 - 0def
      82             :  * Lao
      83             :  *  0e80 - 0eff
      84             :  * Tibetan
      85             :  *  0f00 - 0fbf
      86             :  * Myanmar
      87             :  *  1000 - 109f
      88             :  * Georgian
      89             :  *  10a0 - 10ff
      90             :  * Ethiopic
      91             :  *  1200 - 137f
      92             :  * Cherokee
      93             :  *  13a0 - 13ff
      94             :  * Canadian Aboriginal Syllabics
      95             :  *  1400 - 167f
      96             :  * Ogham
      97             :  *  1680 - 169f
      98             :  * Runic 
      99             :  *  16a0 - 16ff
     100             :  * Khmer
     101             :  *  1780 - 17ff
     102             :  * Mongolian
     103             :  *  1800 - 18af
     104             :  * Misc - superscripts and subscripts
     105             :  *  2070 - 209f
     106             :  * Misc - Combining Diacritical Marks for Symbols
     107             :  *  20d0 - 20ff
     108             :  * Misc - Arrows
     109             :  *  2190 - 21ff
     110             :  * Misc - Mathematical Operators
     111             :  *  2200 - 22ff
     112             :  * Misc - Miscellaneous Technical
     113             :  *  2300 - 23ff
     114             :  * Misc - Control picture
     115             :  *  2400 - 243f
     116             :  * Misc - Optical character recognition
     117             :  *  2440 - 2450
     118             :  * Misc - Enclose Alphanumerics
     119             :  *  2460 - 24ff
     120             :  * Misc - Box Drawing 
     121             :  *  2500 - 257f
     122             :  * Misc - Block Elements
     123             :  *  2580 - 259f
     124             :  * Misc - Geometric Shapes
     125             :  *  25a0 - 25ff
     126             :  * Misc - Miscellaneous Symbols
     127             :  *  2600 - 267f
     128             :  * Misc - Dingbats
     129             :  *  2700 - 27bf
     130             :  * Misc - Braille Patterns
     131             :  *  2800 - 28ff
     132             :  * Yi Syllables
     133             :  *  a000 - a48f
     134             :  * Yi radicals
     135             :  *  a490 - a4cf
     136             :  * Alphabetic Presentation Forms
     137             :  *  fb00 - fb4f
     138             :  * Misc - Combining half Marks
     139             :  *  fe20 - fe2f
     140             :  * Misc - small form variants
     141             :  *  fe50 - fe6f
     142             :  * Misc - Specials
     143             :  *  fff0 - ffff
     144             :  *********************************************************************/
     145             : 
     146             : 
     147             : 
     148             : #define NUM_OF_SUBTABLES      10
     149             : #define SUBTABLE_SIZE         16
     150             : 
     151             : static const uint8_t gUnicodeSubrangeTable[NUM_OF_SUBTABLES][SUBTABLE_SIZE] = 
     152             : { 
     153             :   { // table for X---
     154             :     kRangeTableBase+1,  //u0xxx
     155             :     kRangeTableBase+2,  //u1xxx
     156             :     kRangeTableBase+3,  //u2xxx
     157             :     kRangeSetCJK,       //u3xxx
     158             :     kRangeSetCJK,       //u4xxx
     159             :     kRangeSetCJK,       //u5xxx
     160             :     kRangeSetCJK,       //u6xxx
     161             :     kRangeSetCJK,       //u7xxx
     162             :     kRangeSetCJK,       //u8xxx
     163             :     kRangeSetCJK,       //u9xxx
     164             :     kRangeTableBase+4,  //uaxxx
     165             :     kRangeKorean,       //ubxxx
     166             :     kRangeKorean,       //ucxxx
     167             :     kRangeTableBase+5,  //udxxx
     168             :     kRangePrivate,      //uexxx
     169             :     kRangeTableBase+6   //ufxxx
     170             :   },
     171             :   { //table for 0X--
     172             :     kRangeSetLatin,          //u00xx
     173             :     kRangeSetLatin,          //u01xx
     174             :     kRangeSetLatin,          //u02xx
     175             :     kRangeGreek,             //u03xx     XXX 0300-036f is in fact kRangeCombiningDiacriticalMarks
     176             :     kRangeCyrillic,          //u04xx
     177             :     kRangeTableBase+7,       //u05xx, includes Cyrillic supplement, Hebrew, and Armenian
     178             :     kRangeArabic,            //u06xx
     179             :     kRangeTertiaryTable,     //u07xx
     180             :     kRangeUnassigned,        //u08xx
     181             :     kRangeTertiaryTable,     //u09xx
     182             :     kRangeTertiaryTable,     //u0axx
     183             :     kRangeTertiaryTable,     //u0bxx
     184             :     kRangeTertiaryTable,     //u0cxx
     185             :     kRangeTertiaryTable,     //u0dxx
     186             :     kRangeTertiaryTable,     //u0exx
     187             :     kRangeTibetan            //u0fxx
     188             :   },
     189             :   { //table for 1x--
     190             :     kRangeTertiaryTable,     //u10xx
     191             :     kRangeKorean,            //u11xx
     192             :     kRangeEthiopic,          //u12xx
     193             :     kRangeTertiaryTable,     //u13xx
     194             :     kRangeCanadian,          //u14xx
     195             :     kRangeCanadian,          //u15xx
     196             :     kRangeTertiaryTable,     //u16xx
     197             :     kRangeKhmer,             //u17xx
     198             :     kRangeMongolian,         //u18xx
     199             :     kRangeUnassigned,        //u19xx
     200             :     kRangeUnassigned,        //u1axx
     201             :     kRangeUnassigned,        //u1bxx
     202             :     kRangeUnassigned,        //u1cxx
     203             :     kRangeUnassigned,        //u1dxx
     204             :     kRangeSetLatin,          //u1exx
     205             :     kRangeGreek              //u1fxx
     206             :   },
     207             :   { //table for 2x--
     208             :     kRangeSetLatin,          //u20xx
     209             :     kRangeSetLatin,          //u21xx
     210             :     kRangeMathOperators,     //u22xx
     211             :     kRangeMiscTechnical,     //u23xx
     212             :     kRangeControlOpticalEnclose, //u24xx
     213             :     kRangeBoxBlockGeometrics, //u25xx
     214             :     kRangeMiscSymbols,       //u26xx
     215             :     kRangeDingbats,          //u27xx
     216             :     kRangeBraillePattern,    //u28xx
     217             :     kRangeUnassigned,        //u29xx
     218             :     kRangeUnassigned,        //u2axx
     219             :     kRangeUnassigned,        //u2bxx
     220             :     kRangeUnassigned,        //u2cxx
     221             :     kRangeUnassigned,        //u2dxx
     222             :     kRangeSetCJK,            //u2exx
     223             :     kRangeSetCJK             //u2fxx
     224             :   },
     225             :   {  //table for ax--
     226             :     kRangeYi,                //ua0xx
     227             :     kRangeYi,                //ua1xx
     228             :     kRangeYi,                //ua2xx
     229             :     kRangeYi,                //ua3xx
     230             :     kRangeYi,                //ua4xx
     231             :     kRangeUnassigned,        //ua5xx
     232             :     kRangeUnassigned,        //ua6xx
     233             :     kRangeUnassigned,        //ua7xx
     234             :     kRangeUnassigned,        //ua8xx
     235             :     kRangeUnassigned,        //ua9xx
     236             :     kRangeUnassigned,        //uaaxx
     237             :     kRangeUnassigned,        //uabxx
     238             :     kRangeKorean,            //uacxx
     239             :     kRangeKorean,            //uadxx
     240             :     kRangeKorean,            //uaexx
     241             :     kRangeKorean             //uafxx
     242             :   },
     243             :   {  //table for dx--
     244             :     kRangeKorean,            //ud0xx
     245             :     kRangeKorean,            //ud1xx
     246             :     kRangeKorean,            //ud2xx
     247             :     kRangeKorean,            //ud3xx
     248             :     kRangeKorean,            //ud4xx
     249             :     kRangeKorean,            //ud5xx
     250             :     kRangeKorean,            //ud6xx
     251             :     kRangeKorean,            //ud7xx
     252             :     kRangeSurrogate,         //ud8xx
     253             :     kRangeSurrogate,         //ud9xx
     254             :     kRangeSurrogate,         //udaxx
     255             :     kRangeSurrogate,         //udbxx
     256             :     kRangeSurrogate,         //udcxx
     257             :     kRangeSurrogate,         //uddxx
     258             :     kRangeSurrogate,         //udexx
     259             :     kRangeSurrogate          //udfxx
     260             :   },
     261             :   { // table for fx--
     262             :     kRangePrivate,           //uf0xx 
     263             :     kRangePrivate,           //uf1xx 
     264             :     kRangePrivate,           //uf2xx 
     265             :     kRangePrivate,           //uf3xx 
     266             :     kRangePrivate,           //uf4xx 
     267             :     kRangePrivate,           //uf5xx 
     268             :     kRangePrivate,           //uf6xx 
     269             :     kRangePrivate,           //uf7xx 
     270             :     kRangePrivate,           //uf8xx 
     271             :     kRangeSetCJK,            //uf9xx 
     272             :     kRangeSetCJK,            //ufaxx 
     273             :     kRangeArabic,            //ufbxx, includes alphabic presentation form
     274             :     kRangeArabic,            //ufcxx
     275             :     kRangeArabic,            //ufdxx
     276             :     kRangeTableBase+8,       //ufexx
     277             :     kRangeTableBase+9        //uffxx, halfwidth and fullwidth forms, includes Specials
     278             :   },
     279             :   { //table for 0x0500 - 0x05ff
     280             :     kRangeCyrillic,          //u050x
     281             :     kRangeCyrillic,          //u051x
     282             :     kRangeCyrillic,          //u052x
     283             :     kRangeArmenian,          //u053x
     284             :     kRangeArmenian,          //u054x
     285             :     kRangeArmenian,          //u055x
     286             :     kRangeArmenian,          //u056x
     287             :     kRangeArmenian,          //u057x
     288             :     kRangeArmenian,          //u058x
     289             :     kRangeHebrew,            //u059x
     290             :     kRangeHebrew,            //u05ax
     291             :     kRangeHebrew,            //u05bx
     292             :     kRangeHebrew,            //u05cx
     293             :     kRangeHebrew,            //u05dx
     294             :     kRangeHebrew,            //u05ex
     295             :     kRangeHebrew             //u05fx
     296             :   },
     297             :   { //table for 0xfe00 - 0xfeff
     298             :     kRangeSetCJK,            //ufe0x
     299             :     kRangeSetCJK,            //ufe1x
     300             :     kRangeSetCJK,            //ufe2x
     301             :     kRangeSetCJK,            //ufe3x
     302             :     kRangeSetCJK,            //ufe4x
     303             :     kRangeSetCJK,            //ufe5x
     304             :     kRangeSetCJK,            //ufe6x
     305             :     kRangeArabic,            //ufe7x
     306             :     kRangeArabic,            //ufe8x
     307             :     kRangeArabic,            //ufe9x
     308             :     kRangeArabic,            //ufeax
     309             :     kRangeArabic,            //ufebx
     310             :     kRangeArabic,            //ufecx
     311             :     kRangeArabic,            //ufedx
     312             :     kRangeArabic,            //ufeex
     313             :     kRangeArabic             //ufefx
     314             :   },
     315             :   { //table for 0xff00 - 0xffff
     316             :     kRangeSetCJK,            //uff0x, fullwidth latin
     317             :     kRangeSetCJK,            //uff1x, fullwidth latin
     318             :     kRangeSetCJK,            //uff2x, fullwidth latin
     319             :     kRangeSetCJK,            //uff3x, fullwidth latin
     320             :     kRangeSetCJK,            //uff4x, fullwidth latin
     321             :     kRangeSetCJK,            //uff5x, fullwidth latin
     322             :     kRangeSetCJK,            //uff6x, halfwidth katakana
     323             :     kRangeSetCJK,            //uff7x, halfwidth katakana
     324             :     kRangeSetCJK,            //uff8x, halfwidth katakana
     325             :     kRangeSetCJK,            //uff9x, halfwidth katakana
     326             :     kRangeSetCJK,            //uffax, halfwidth hangul jamo
     327             :     kRangeSetCJK,            //uffbx, halfwidth hangul jamo
     328             :     kRangeSetCJK,            //uffcx, halfwidth hangul jamo
     329             :     kRangeSetCJK,            //uffdx, halfwidth hangul jamo
     330             :     kRangeSetCJK,            //uffex, fullwidth symbols
     331             :     kRangeSpecials,          //ufffx, Specials
     332             :   },
     333             : };
     334             : 
     335             : // Most scripts between U+0700 and U+16FF are assigned a chunk of 128 (0x80) 
     336             : // code points  so that the number of entries in the tertiary range
     337             : // table for that range is obtained by dividing (0x1700 - 0x0700) by 128.
     338             : // Exceptions: Ethiopic, Tibetan, Hangul Jamo and Canadian aboriginal 
     339             : // syllabaries take multiple chunks and Ogham and Runic share  a single chunk.
     340             : #define TERTIARY_TABLE_SIZE ((0x1700 - 0x0700) / 0x80)
     341             : 
     342             : static const uint8_t gUnicodeTertiaryRangeTable[TERTIARY_TABLE_SIZE] =
     343             : { //table for 0x0700 - 0x1600 
     344             :     kRangeSyriac,            //u070x
     345             :     kRangeThaana,            //u078x
     346             :     kRangeUnassigned,        //u080x  place holder(resolved in the 2ndary tab.)
     347             :     kRangeUnassigned,        //u088x  place holder(resolved in the 2ndary tab.)
     348             :     kRangeDevanagari,        //u090x
     349             :     kRangeBengali,           //u098x
     350             :     kRangeGurmukhi,          //u0a0x
     351             :     kRangeGujarati,          //u0a8x
     352             :     kRangeOriya,             //u0b0x
     353             :     kRangeTamil,             //u0b8x
     354             :     kRangeTelugu,            //u0c0x
     355             :     kRangeKannada,           //u0c8x
     356             :     kRangeMalayalam,         //u0d0x
     357             :     kRangeSinhala,           //u0d8x
     358             :     kRangeThai,              //u0e0x  
     359             :     kRangeLao,               //u0e8x
     360             :     kRangeTibetan,           //u0f0x  place holder(resolved in the 2ndary tab.)
     361             :     kRangeTibetan,           //u0f8x  place holder(resolved in the 2ndary tab.)
     362             :     kRangeMyanmar,           //u100x
     363             :     kRangeGeorgian,          //u108x
     364             :     kRangeKorean,            //u110x  place holder(resolved in the 2ndary tab.)
     365             :     kRangeKorean,            //u118x  place holder(resolved in the 2ndary tab.)
     366             :     kRangeEthiopic,          //u120x  place holder(resolved in the 2ndary tab.)
     367             :     kRangeEthiopic,          //u128x  place holder(resolved in the 2ndary tab.)
     368             :     kRangeEthiopic,          //u130x  
     369             :     kRangeCherokee,          //u138x
     370             :     kRangeCanadian,          //u140x  place holder(resolved in the 2ndary tab.)
     371             :     kRangeCanadian,          //u148x  place holder(resolved in the 2ndary tab.)
     372             :     kRangeCanadian,          //u150x  place holder(resolved in the 2ndary tab.)
     373             :     kRangeCanadian,          //u158x  place holder(resolved in the 2ndary tab.)
     374             :     kRangeCanadian,          //u160x  
     375             :     kRangeOghamRunic         //u168x  this contains two scripts, Ogham & Runic
     376             : };
     377             : 
     378             : // A two level index is almost enough for locating a range, with the 
     379             : // exception of u03xx and u05xx. Since we don't really care about range for
     380             : // combining diacritical marks in our font application, they are 
     381             : // not discriminated further. But future adoption of this module for other use 
     382             : // should be aware of this limitation. The implementation can be extended if 
     383             : // there is such a need.
     384             : // For Indic, Southeast Asian scripts and some other scripts between
     385             : // U+0700 and U+16FF, it's extended to the third level.
     386           0 : uint32_t FindCharUnicodeRange(uint32_t ch)
     387             : {
     388             :   uint32_t range;
     389             :   
     390             :   // aggregate ranges for non-BMP codepoints
     391           0 :   if (ch > 0xFFFF) {
     392           0 :     uint32_t p = (ch >> 16);
     393           0 :     if (p == 1) {
     394           0 :         return kRangeSMP;
     395           0 :     } else if (p == 2) {
     396           0 :         return kRangeSetCJK;
     397             :     }
     398           0 :     return kRangeHigherPlanes;
     399             :   }
     400             : 
     401             :   // lookup explicit range for BMP codepoints
     402             :   // first general range
     403           0 :   range = gUnicodeSubrangeTable[0][ch >> 12];
     404             :   
     405             :   // if general range is good enough, return that
     406           0 :   if (range < kRangeTableBase)
     407             :     // we try to get a specific range 
     408           0 :     return range;
     409             : 
     410             :   // otherwise, use subrange tables
     411           0 :   range = gUnicodeSubrangeTable[range - kRangeTableBase][(ch & 0x0f00) >> 8];
     412           0 :   if (range < kRangeTableBase)
     413           0 :     return range;
     414           0 :   if (range < kRangeTertiaryTable)
     415           0 :     return gUnicodeSubrangeTable[range - kRangeTableBase][(ch & 0x00f0) >> 4];
     416             : 
     417             :   // Yet another table to look at : U+0700 - U+16FF : 128 code point blocks
     418           0 :   return gUnicodeTertiaryRangeTable[(ch - 0x0700) >> 7];
     419           9 : }

Generated by: LCOV version 1.13