LCOV - code coverage report
Current view: top level - gfx/graphite2/src/inc - UtfCodec.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 51 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 46 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*  GRAPHITE2 LICENSING
       2             : 
       3             :     Copyright 2011, SIL International
       4             :     All rights reserved.
       5             : 
       6             :     This library is free software; you can redistribute it and/or modify
       7             :     it under the terms of the GNU Lesser General Public License as published
       8             :     by the Free Software Foundation; either version 2.1 of License, or
       9             :     (at your option) any later version.
      10             : 
      11             :     This program is distributed in the hope that it will be useful,
      12             :     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             :     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14             :     Lesser General Public License for more details.
      15             : 
      16             :     You should also have received a copy of the GNU Lesser General Public
      17             :     License along with this library in the file named "LICENSE".
      18             :     If not, write to the Free Software Foundation, 51 Franklin Street,
      19             :     Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
      20             :     internet at http://www.fsf.org/licenses/lgpl.html.
      21             : 
      22             : Alternatively, the contents of this file may be used under the terms of the
      23             : Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
      24             : License, as published by the Free Software Foundation, either version 2
      25             : of the License or (at your option) any later version.
      26             : */
      27             : #pragma once
      28             : 
      29             : #include <cstdlib>
      30             : #include "inc/Main.h"
      31             : 
      32             : namespace graphite2 {
      33             : 
      34             : typedef uint32  uchar_t;
      35             : 
      36             : template <int N>
      37             : struct _utf_codec
      38             : {
      39             :     typedef uchar_t codeunit_t;
      40             : 
      41             :     static void     put(codeunit_t * cp, const uchar_t , int8 & len) throw();
      42             :     static uchar_t  get(const codeunit_t * cp, int8 & len) throw();
      43             :     static bool     validate(const codeunit_t * s, const codeunit_t * e) throw();
      44             : };
      45             : 
      46             : 
      47             : template <>
      48             : struct _utf_codec<32>
      49             : {
      50             : private:
      51             :     static const uchar_t    limit = 0x110000;
      52             : public:
      53             :     typedef uint32  codeunit_t;
      54             : 
      55             :     inline
      56           0 :     static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
      57             :     {
      58           0 :         *cp = usv; l = 1;
      59           0 :     }
      60             : 
      61             :     inline
      62           0 :     static uchar_t get(const codeunit_t * cp, int8 & l) throw()
      63             :     {
      64           0 :         if (cp[0] < limit)  { l = 1;  return cp[0]; }
      65           0 :         else                { l = -1; return 0xFFFD; }
      66             :     }
      67             : 
      68             :     inline
      69             :     static bool validate(codeunit_t * s, codeunit_t * e) throw()
      70             :     {
      71             :         return e > s;
      72             :     }
      73             : };
      74             : 
      75             : 
      76             : template <>
      77             : struct _utf_codec<16>
      78             : {
      79             : private:
      80             :     static const int32  lead_offset      = 0xD800 - (0x10000 >> 10);
      81             :     static const int32  surrogate_offset = 0x10000 - (0xD800 << 10) - 0xDC00;
      82             : public:
      83             :     typedef uint16  codeunit_t;
      84             : 
      85             :     inline
      86             :     static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
      87             :     {
      88             :         if (usv < 0x10000)  { l = 1; cp[0] = codeunit_t(usv); }
      89             :         else
      90             :         {
      91             :             cp[0] = codeunit_t(lead_offset + (usv >> 10));
      92             :             cp[1] = codeunit_t(0xDC00 + (usv & 0x3FF));
      93             :             l = 2;
      94             :         }
      95             :     }
      96             : 
      97             :     inline
      98           0 :     static uchar_t get(const codeunit_t * cp, int8 & l) throw()
      99             :     {
     100           0 :         const uint32    uh = cp[0];
     101           0 :         l = 1;
     102             : 
     103           0 :         if (uh < 0xD800|| uh > 0xDFFF) { return uh; }
     104           0 :         const uint32 ul = cp[1];
     105           0 :         if (uh > 0xDBFF || ul < 0xDC00 || ul > 0xDFFF) { l = -1; return 0xFFFD; }
     106           0 :         ++l;
     107           0 :         return (uh<<10) + ul + surrogate_offset;
     108             :     }
     109             : 
     110             :     inline
     111           0 :     static bool validate(codeunit_t * s, codeunit_t * e) throw()
     112             :     {
     113           0 :         const ptrdiff_t n = e-s;
     114           0 :         if (n <= 0) return n == 0;
     115           0 :         const uint32 u = *(s+(n-1)); // Get the last codepoint
     116           0 :         return (u < 0xD800 || u > 0xDBFF);
     117             :     }
     118             : };
     119             : 
     120             : 
     121             : template <>
     122             : struct _utf_codec<8>
     123             : {
     124             : private:
     125             :     static const int8 sz_lut[16];
     126             :     static const byte mask_lut[5];
     127             :     static const uchar_t    limit = 0x110000;
     128             : 
     129             : public:
     130             :     typedef uint8   codeunit_t;
     131             : 
     132             :     inline
     133           0 :     static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
     134             :     {
     135           0 :         if (usv < 0x80)     {l = 1; cp[0] = usv; return; }
     136           0 :         if (usv < 0x0800)   {l = 2; cp[0] = 0xC0 + (usv >> 6);  cp[1] = 0x80 + (usv & 0x3F); return; }
     137           0 :         if (usv < 0x10000)  {l = 3; cp[0] = 0xE0 + (usv >> 12); cp[1] = 0x80 + ((usv >> 6) & 0x3F);  cp[2] = 0x80 + (usv & 0x3F); return; }
     138           0 :         else                {l = 4; cp[0] = 0xF0 + (usv >> 18); cp[1] = 0x80 + ((usv >> 12) & 0x3F); cp[2] = 0x80 + ((usv >> 6) & 0x3F); cp[3] = 0x80 + (usv & 0x3F); return; }
     139             :     }
     140             : 
     141             :     inline
     142           0 :     static uchar_t get(const codeunit_t * cp, int8 & l) throw()
     143             :     {
     144           0 :         const int8 seq_sz = sz_lut[*cp >> 4];
     145           0 :         uchar_t u = *cp & mask_lut[seq_sz];
     146           0 :         l = 1;
     147           0 :         bool toolong = false;
     148             : 
     149           0 :         switch(seq_sz) {
     150           0 :             case 4:     u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong  = (u < 0x10); GR_FALLTHROUGH;
     151             :                 // no break
     152           0 :             case 3:     u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x20); GR_FALLTHROUGH;
     153             :                 // no break
     154           0 :             case 2:     u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x80); GR_FALLTHROUGH;
     155             :                 // no break
     156           0 :             case 1:     break;
     157           0 :             case 0:     l = -1; return 0xFFFD;
     158             :         }
     159             : 
     160           0 :         if (l != seq_sz || toolong  || u >= limit)
     161             :         {
     162           0 :             l = -l;
     163           0 :             return 0xFFFD;
     164             :         }
     165           0 :         return u;
     166             :     }
     167             : 
     168             :     inline
     169             :     static bool validate(codeunit_t * s, codeunit_t * e) throw()
     170             :     {
     171             :         const ptrdiff_t n = e-s;
     172             :         if (n <= 0) return n == 0;
     173             :         s += (n-1);
     174             :         if (*s < 0x80) return true;
     175             :         if (*s >= 0xC0) return false;
     176             :         if (n == 1) return true;
     177             :         if (*--s < 0x80) return true;
     178             :         if (*s >= 0xe0) return false;
     179             :         if (n == 2 || *s >= 0xC0) return true;
     180             :         if (*--s < 0x80) return true;
     181             :         if (*s >= 0xF0) return false;
     182             :         return true;
     183             :     }
     184             : 
     185             : };
     186             : 
     187             : 
     188             : template <typename C>
     189             : class _utf_iterator
     190             : {
     191             :     typedef _utf_codec<sizeof(C)*8> codec;
     192             : 
     193             :     C             * cp;
     194             :     mutable int8    sl;
     195             : 
     196             : public:
     197             :     typedef C           codeunit_type;
     198             :     typedef uchar_t     value_type;
     199             :     typedef uchar_t   * pointer;
     200             : 
     201             :     class reference
     202             :     {
     203             :         const _utf_iterator & _i;
     204             : 
     205           0 :         reference(const _utf_iterator & i): _i(i) {}
     206             :     public:
     207           0 :         operator value_type () const throw ()                   { return codec::get(_i.cp, _i.sl); }
     208           0 :         reference & operator = (const value_type usv) throw()   { codec::put(_i.cp, usv, _i.sl); return *this; }
     209             : 
     210             :         friend class _utf_iterator;
     211             :     };
     212             : 
     213             : 
     214           0 :     _utf_iterator(const void * us=0)    : cp(reinterpret_cast<C *>(const_cast<void *>(us))), sl(1) { }
     215             : 
     216           0 :     _utf_iterator   & operator ++ ()    { cp += abs(sl); return *this; }
     217             :     _utf_iterator   operator ++ (int)   { _utf_iterator tmp(*this); operator++(); return tmp; }
     218             : 
     219           0 :     bool operator == (const _utf_iterator & rhs) const throw() { return cp >= rhs.cp; }
     220           0 :     bool operator != (const _utf_iterator & rhs) const throw() { return !operator==(rhs); }
     221             : 
     222           0 :     reference   operator * () const throw() { return *this; }
     223             :     pointer     operator ->() const throw() { return &operator *(); }
     224             : 
     225           0 :     operator codeunit_type * () const throw() { return cp; }
     226             : 
     227           0 :     bool error() const throw()  { return sl < 1; }
     228             : };
     229             : 
     230             : template <typename C>
     231             : struct utf
     232             : {
     233             :     typedef typename _utf_codec<sizeof(C)*8>::codeunit_t codeunit_t;
     234             : 
     235             :     typedef _utf_iterator<C>        iterator;
     236             :     typedef _utf_iterator<const C>  const_iterator;
     237             : 
     238             :     inline
     239           0 :     static bool validate(codeunit_t * s, codeunit_t * e) throw() {
     240           0 :         return _utf_codec<sizeof(C)*8>::validate(s,e);
     241             :     }
     242             : };
     243             : 
     244             : 
     245             : typedef utf<uint32> utf32;
     246             : typedef utf<uint16> utf16;
     247             : typedef utf<uint8>  utf8;
     248             : 
     249             : } // namespace graphite2

Generated by: LCOV version 1.13