LCOV - code coverage report
Current view: top level - intl/lwbrk - nsSampleWordBreaker.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 3 65 4.6 %
Date: 2017-07-14 16:53:18 Functions: 4 10 40.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       5             : 
       6             : 
       7             : #include "nsSampleWordBreaker.h"
       8             : 
       9           3 : nsSampleWordBreaker::nsSampleWordBreaker()
      10             : {
      11           3 : }
      12           0 : nsSampleWordBreaker::~nsSampleWordBreaker()
      13             : {
      14           0 : }
      15             : 
      16          24 : NS_IMPL_ISUPPORTS(nsSampleWordBreaker, nsIWordBreaker)
      17             : 
      18           0 : bool nsSampleWordBreaker::BreakInBetween(
      19             :   const char16_t* aText1 , uint32_t aTextLen1,
      20             :   const char16_t* aText2 , uint32_t aTextLen2)
      21             : {
      22           0 :   NS_PRECONDITION( nullptr != aText1, "null ptr");
      23           0 :   NS_PRECONDITION( nullptr != aText2, "null ptr");
      24             : 
      25           0 :   if(!aText1 || !aText2 || (0 == aTextLen1) || (0 == aTextLen2))
      26           0 :     return false;
      27             : 
      28           0 :   return GetClass(aText1[aTextLen1-1]) != GetClass(aText2[0]);
      29             : }
      30             : 
      31             : 
      32             : #define IS_ASCII(c)            (0 == ( 0xFF80 & (c)))
      33             : #define ASCII_IS_ALPHA(c)         ((( 'a' <= (c)) && ((c) <= 'z')) || (( 'A' <= (c)) && ((c) <= 'Z')))
      34             : #define ASCII_IS_DIGIT(c)         (( '0' <= (c)) && ((c) <= '9'))
      35             : #define ASCII_IS_SPACE(c)         (( ' ' == (c)) || ( '\t' == (c)) || ( '\r' == (c)) || ( '\n' == (c)))
      36             : #define IS_ALPHABETICAL_SCRIPT(c) ((c) < 0x2E80)
      37             : 
      38             : // we change the beginning of IS_HAN from 0x4e00 to 0x3400 to relfect Unicode 3.0
      39             : #define IS_HAN(c)              (( 0x3400 <= (c)) && ((c) <= 0x9fff))||(( 0xf900 <= (c)) && ((c) <= 0xfaff))
      40             : #define IS_KATAKANA(c)         (( 0x30A0 <= (c)) && ((c) <= 0x30FF))
      41             : #define IS_HIRAGANA(c)         (( 0x3040 <= (c)) && ((c) <= 0x309F))
      42             : #define IS_HALFWIDTHKATAKANA(c)         (( 0xFF60 <= (c)) && ((c) <= 0xFF9F))
      43             : #define IS_THAI(c)         (0x0E00 == (0xFF80 & (c) )) // Look at the higest 9 bits
      44             : 
      45             : /* static */ nsWordBreakClass
      46           0 : nsIWordBreaker::GetClass(char16_t c)
      47             : {
      48             :   // begin of the hack
      49             : 
      50           0 :   if (IS_ALPHABETICAL_SCRIPT(c))  {
      51           0 :           if(IS_ASCII(c))  {
      52           0 :                   if(ASCII_IS_SPACE(c)) {
      53           0 :                           return kWbClassSpace;
      54           0 :                   } else if(ASCII_IS_ALPHA(c) || ASCII_IS_DIGIT(c)) {
      55           0 :                           return kWbClassAlphaLetter;
      56             :                   } else {
      57           0 :                           return kWbClassPunct;
      58             :                   }
      59           0 :           } else if(IS_THAI(c)) {
      60           0 :                   return kWbClassThaiLetter;
      61           0 :           } else if (c == 0x00A0/*NBSP*/) {
      62           0 :       return kWbClassSpace;
      63             :     } else {
      64           0 :                   return kWbClassAlphaLetter;
      65             :           }
      66             :   }  else {
      67           0 :           if(IS_HAN(c)) {
      68           0 :                   return kWbClassHanLetter;
      69           0 :           } else if(IS_KATAKANA(c))   {
      70           0 :                   return kWbClassKatakanaLetter;
      71           0 :           } else if(IS_HIRAGANA(c))   {
      72           0 :                   return kWbClassHiraganaLetter;
      73           0 :           } else if(IS_HALFWIDTHKATAKANA(c))  {
      74           0 :                   return kWbClassHWKatakanaLetter;
      75             :           } else  {
      76           0 :                   return kWbClassAlphaLetter;
      77             :           }
      78             :   }
      79             :   return static_cast<nsWordBreakClass>(0);
      80             : }
      81             : 
      82           0 : nsWordRange nsSampleWordBreaker::FindWord(
      83             :   const char16_t* aText , uint32_t aTextLen,
      84             :   uint32_t aOffset)
      85             : {
      86             :   nsWordRange range;
      87           0 :   NS_PRECONDITION( nullptr != aText, "null ptr");
      88           0 :   NS_PRECONDITION( 0 != aTextLen, "len = 0");
      89           0 :   NS_PRECONDITION( aOffset <= aTextLen, "aOffset > aTextLen");
      90             : 
      91           0 :   range.mBegin = aTextLen + 1;
      92           0 :   range.mEnd = aTextLen + 1;
      93             : 
      94           0 :   if(!aText || aOffset > aTextLen)
      95           0 :     return range;
      96             : 
      97           0 :   nsWordBreakClass c = GetClass(aText[aOffset]);
      98             :   uint32_t i;
      99             :   // Scan forward
     100           0 :   range.mEnd--;
     101           0 :   for(i = aOffset +1;i <= aTextLen; i++)
     102             :   {
     103           0 :      if( c != GetClass(aText[i]))
     104             :      {
     105           0 :        range.mEnd = i;
     106           0 :        break;
     107             :      }
     108             :   }
     109             : 
     110             :   // Scan backward
     111           0 :   range.mBegin = 0;
     112           0 :   for(i = aOffset ;i > 0; i--)
     113             :   {
     114           0 :      if( c != GetClass(aText[i-1]))
     115             :      {
     116           0 :        range.mBegin = i;
     117           0 :        break;
     118             :      }
     119             :   }
     120             :   if(kWbClassThaiLetter == c)
     121             :   {
     122             :         // need to call Thai word breaker from here
     123             :         // we should pass the whole Thai segment to the thai word breaker to find a shorter answer
     124             :   }
     125           0 :   return range;
     126             : }
     127             : 
     128           0 : int32_t nsSampleWordBreaker::NextWord(
     129             :   const char16_t* aText, uint32_t aLen, uint32_t aPos)
     130             : {
     131             :   nsWordBreakClass c1, c2;
     132           0 :   uint32_t cur = aPos;
     133           0 :   if (cur == aLen)
     134           0 :     return NS_WORDBREAKER_NEED_MORE_TEXT;
     135           0 :   c1 = GetClass(aText[cur]);
     136             : 
     137           0 :   for(cur++; cur <aLen; cur++)
     138             :   {
     139           0 :      c2 = GetClass(aText[cur]);
     140           0 :      if(c2 != c1)
     141           0 :        break;
     142             :   }
     143             :   if(kWbClassThaiLetter == c1)
     144             :   {
     145             :         // need to call Thai word breaker from here
     146             :         // we should pass the whole Thai segment to the thai word breaker to find a shorter answer
     147             :   }
     148           0 :   if (cur == aLen)
     149           0 :     return NS_WORDBREAKER_NEED_MORE_TEXT;
     150           0 :   return cur;
     151             : }

Generated by: LCOV version 1.13